diff options
Diffstat (limited to 'fs')
549 files changed, 17086 insertions, 23080 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 9e670d527646..ef5905f7c8a3 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, | |||
1789 | kfree(st); | 1789 | kfree(st); |
1790 | } else { | 1790 | } else { |
1791 | /* Caching disabled. No need to get upto date stat info. | 1791 | /* Caching disabled. No need to get upto date stat info. |
1792 | * This dentry will be released immediately. So, just i_count++ | 1792 | * This dentry will be released immediately. So, just hold the |
1793 | * inode | ||
1793 | */ | 1794 | */ |
1794 | atomic_inc(&old_dentry->d_inode->i_count); | 1795 | ihold(old_dentry->d_inode); |
1795 | } | 1796 | } |
1796 | 1797 | ||
1797 | dentry->d_op = old_dentry->d_op; | 1798 | dentry->d_op = old_dentry->d_op; |
diff --git a/fs/Kconfig b/fs/Kconfig index 3d185308ec88..97673c955484 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -47,6 +47,9 @@ source "fs/nilfs2/Kconfig" | |||
47 | 47 | ||
48 | endif # BLOCK | 48 | endif # BLOCK |
49 | 49 | ||
50 | config EXPORTFS | ||
51 | tristate | ||
52 | |||
50 | config FILE_LOCKING | 53 | config FILE_LOCKING |
51 | bool "Enable POSIX file locking API" if EMBEDDED | 54 | bool "Enable POSIX file locking API" if EMBEDDED |
52 | default y | 55 | default y |
@@ -221,9 +224,6 @@ config LOCKD_V4 | |||
221 | depends on FILE_LOCKING | 224 | depends on FILE_LOCKING |
222 | default y | 225 | default y |
223 | 226 | ||
224 | config EXPORTFS | ||
225 | tristate | ||
226 | |||
227 | config NFS_ACL_SUPPORT | 227 | config NFS_ACL_SUPPORT |
228 | tristate | 228 | tristate |
229 | select FS_POSIX_ACL | 229 | select FS_POSIX_ACL |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index bb4cc5b8abc8..79e2ca7973b7 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC | |||
42 | 42 | ||
43 | config CORE_DUMP_DEFAULT_ELF_HEADERS | 43 | config CORE_DUMP_DEFAULT_ELF_HEADERS |
44 | bool "Write ELF core dumps with partial segments" | 44 | bool "Write ELF core dumps with partial segments" |
45 | default n | 45 | default y |
46 | depends on BINFMT_ELF && ELF_CORE | 46 | depends on BINFMT_ELF && ELF_CORE |
47 | help | 47 | help |
48 | ELF core dump files describe each memory mapping of the crashed | 48 | ELF core dump files describe each memory mapping of the crashed |
@@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS | |||
60 | inherited. See Documentation/filesystems/proc.txt for details. | 60 | inherited. See Documentation/filesystems/proc.txt for details. |
61 | 61 | ||
62 | This config option changes the default setting of coredump_filter | 62 | This config option changes the default setting of coredump_filter |
63 | seen at boot time. If unsure, say N. | 63 | seen at boot time. If unsure, say Y. |
64 | 64 | ||
65 | config BINFMT_FLAT | 65 | config BINFMT_FLAT |
66 | bool "Kernel support for flat binaries" | 66 | bool "Kernel support for flat binaries" |
diff --git a/fs/Makefile b/fs/Makefile index e6ec1d309b1d..26956fcec917 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -29,10 +29,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o | |||
29 | obj-$(CONFIG_AIO) += aio.o | 29 | obj-$(CONFIG_AIO) += aio.o |
30 | obj-$(CONFIG_FILE_LOCKING) += locks.o | 30 | obj-$(CONFIG_FILE_LOCKING) += locks.o |
31 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 31 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
32 | 32 | obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o | |
33 | nfsd-$(CONFIG_NFSD) := nfsctl.o | ||
34 | obj-y += $(nfsd-y) $(nfsd-m) | ||
35 | |||
36 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o | 33 | obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o |
37 | obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o | 34 | obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o |
38 | obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o | 35 | obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o |
diff --git a/fs/adfs/Kconfig b/fs/adfs/Kconfig index e55182a74605..1dd5f34b3cf2 100644 --- a/fs/adfs/Kconfig +++ b/fs/adfs/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config ADFS_FS | 1 | config ADFS_FS |
2 | tristate "ADFS file system support (EXPERIMENTAL)" | 2 | tristate "ADFS file system support (EXPERIMENTAL)" |
3 | depends on BLOCK && EXPERIMENTAL | 3 | depends on BLOCK && EXPERIMENTAL |
4 | depends on BKL # need to fix | ||
4 | help | 5 | help |
5 | The Acorn Disc Filing System is the standard file system of the | 6 | The Acorn Disc Filing System is the standard file system of the |
6 | RiscOS operating system which runs on Acorn's ARM-based Risc PC | 7 | RiscOS operating system which runs on Acorn's ARM-based Risc PC |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 4a3af7075c1d..d9803f73236f 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -352,11 +352,15 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) | |||
352 | struct adfs_sb_info *asb; | 352 | struct adfs_sb_info *asb; |
353 | struct inode *root; | 353 | struct inode *root; |
354 | 354 | ||
355 | lock_kernel(); | ||
356 | |||
355 | sb->s_flags |= MS_NODIRATIME; | 357 | sb->s_flags |= MS_NODIRATIME; |
356 | 358 | ||
357 | asb = kzalloc(sizeof(*asb), GFP_KERNEL); | 359 | asb = kzalloc(sizeof(*asb), GFP_KERNEL); |
358 | if (!asb) | 360 | if (!asb) { |
361 | unlock_kernel(); | ||
359 | return -ENOMEM; | 362 | return -ENOMEM; |
363 | } | ||
360 | sb->s_fs_info = asb; | 364 | sb->s_fs_info = asb; |
361 | 365 | ||
362 | /* set default options */ | 366 | /* set default options */ |
@@ -474,6 +478,7 @@ static int adfs_fill_super(struct super_block *sb, void *data, int silent) | |||
474 | goto error; | 478 | goto error; |
475 | } else | 479 | } else |
476 | sb->s_root->d_op = &adfs_dentry_operations; | 480 | sb->s_root->d_op = &adfs_dentry_operations; |
481 | unlock_kernel(); | ||
477 | return 0; | 482 | return 0; |
478 | 483 | ||
479 | error_free_bh: | 484 | error_free_bh: |
@@ -481,6 +486,7 @@ error_free_bh: | |||
481 | error: | 486 | error: |
482 | sb->s_fs_info = NULL; | 487 | sb->s_fs_info = NULL; |
483 | kfree(asb); | 488 | kfree(asb); |
489 | unlock_kernel(); | ||
484 | return -EINVAL; | 490 | return -EINVAL; |
485 | } | 491 | } |
486 | 492 | ||
diff --git a/fs/affs/file.c b/fs/affs/file.c index c4a9875bd1a6..0a90dcd46de2 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c | |||
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode) | |||
894 | if (AFFS_SB(sb)->s_flags & SF_OFS) { | 894 | if (AFFS_SB(sb)->s_flags & SF_OFS) { |
895 | struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); | 895 | struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0); |
896 | u32 tmp; | 896 | u32 tmp; |
897 | if (IS_ERR(ext_bh)) { | 897 | if (IS_ERR(bh)) { |
898 | affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)", | 898 | affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)", |
899 | ext, PTR_ERR(ext_bh)); | 899 | ext, PTR_ERR(bh)); |
900 | return; | 900 | return; |
901 | } | 901 | } |
902 | tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); | 902 | tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next); |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 3a0fdec175ba..5d828903ac69 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 | |||
388 | affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); | 388 | affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); |
389 | mark_buffer_dirty_inode(inode_bh, inode); | 389 | mark_buffer_dirty_inode(inode_bh, inode); |
390 | inode->i_nlink = 2; | 390 | inode->i_nlink = 2; |
391 | atomic_inc(&inode->i_count); | 391 | ihold(inode); |
392 | } | 392 | } |
393 | affs_fix_checksum(sb, bh); | 393 | affs_fix_checksum(sb, bh); |
394 | mark_buffer_dirty_inode(bh, inode); | 394 | mark_buffer_dirty_inode(bh, inode); |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 33c4e7eef470..fa4fbe1e238a 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/parser.h> | 16 | #include <linux/parser.h> |
17 | #include <linux/magic.h> | 17 | #include <linux/magic.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
21 | #include "affs.h" | 20 | #include "affs.h" |
22 | 21 | ||
@@ -46,8 +45,6 @@ affs_put_super(struct super_block *sb) | |||
46 | struct affs_sb_info *sbi = AFFS_SB(sb); | 45 | struct affs_sb_info *sbi = AFFS_SB(sb); |
47 | pr_debug("AFFS: put_super()\n"); | 46 | pr_debug("AFFS: put_super()\n"); |
48 | 47 | ||
49 | lock_kernel(); | ||
50 | |||
51 | if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) | 48 | if (!(sb->s_flags & MS_RDONLY) && sb->s_dirt) |
52 | affs_commit_super(sb, 1, 1); | 49 | affs_commit_super(sb, 1, 1); |
53 | 50 | ||
@@ -56,8 +53,6 @@ affs_put_super(struct super_block *sb) | |||
56 | affs_brelse(sbi->s_root_bh); | 53 | affs_brelse(sbi->s_root_bh); |
57 | kfree(sbi); | 54 | kfree(sbi); |
58 | sb->s_fs_info = NULL; | 55 | sb->s_fs_info = NULL; |
59 | |||
60 | unlock_kernel(); | ||
61 | } | 56 | } |
62 | 57 | ||
63 | static void | 58 | static void |
@@ -109,8 +104,8 @@ static void init_once(void *foo) | |||
109 | { | 104 | { |
110 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; | 105 | struct affs_inode_info *ei = (struct affs_inode_info *) foo; |
111 | 106 | ||
112 | init_MUTEX(&ei->i_link_lock); | 107 | sema_init(&ei->i_link_lock, 1); |
113 | init_MUTEX(&ei->i_ext_lock); | 108 | sema_init(&ei->i_ext_lock, 1); |
114 | inode_init_once(&ei->vfs_inode); | 109 | inode_init_once(&ei->vfs_inode); |
115 | } | 110 | } |
116 | 111 | ||
@@ -302,6 +297,7 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent) | |||
302 | sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL); | 297 | sbi = kzalloc(sizeof(struct affs_sb_info), GFP_KERNEL); |
303 | if (!sbi) | 298 | if (!sbi) |
304 | return -ENOMEM; | 299 | return -ENOMEM; |
300 | |||
305 | sb->s_fs_info = sbi; | 301 | sb->s_fs_info = sbi; |
306 | mutex_init(&sbi->s_bmlock); | 302 | mutex_init(&sbi->s_bmlock); |
307 | spin_lock_init(&sbi->symlink_lock); | 303 | spin_lock_init(&sbi->symlink_lock); |
@@ -527,7 +523,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
527 | kfree(new_opts); | 523 | kfree(new_opts); |
528 | return -EINVAL; | 524 | return -EINVAL; |
529 | } | 525 | } |
530 | lock_kernel(); | 526 | |
531 | replace_mount_options(sb, new_opts); | 527 | replace_mount_options(sb, new_opts); |
532 | 528 | ||
533 | sbi->s_flags = mount_flags; | 529 | sbi->s_flags = mount_flags; |
@@ -543,17 +539,15 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
543 | memcpy(sbi->s_volume, volume, 32); | 539 | memcpy(sbi->s_volume, volume, 32); |
544 | spin_unlock(&sbi->symlink_lock); | 540 | spin_unlock(&sbi->symlink_lock); |
545 | 541 | ||
546 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) { | 542 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
547 | unlock_kernel(); | ||
548 | return 0; | 543 | return 0; |
549 | } | 544 | |
550 | if (*flags & MS_RDONLY) { | 545 | if (*flags & MS_RDONLY) { |
551 | affs_write_super(sb); | 546 | affs_write_super(sb); |
552 | affs_free_bitmap(sb); | 547 | affs_free_bitmap(sb); |
553 | } else | 548 | } else |
554 | res = affs_init_bitmap(sb, flags); | 549 | res = affs_init_bitmap(sb, flags); |
555 | 550 | ||
556 | unlock_kernel(); | ||
557 | return res; | 551 | return res; |
558 | } | 552 | } |
559 | 553 | ||
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0d38c09bd55e..5439e1bc9a86 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir, | |||
1045 | if (ret < 0) | 1045 | if (ret < 0) |
1046 | goto link_error; | 1046 | goto link_error; |
1047 | 1047 | ||
1048 | atomic_inc(&vnode->vfs_inode.i_count); | 1048 | ihold(&vnode->vfs_inode); |
1049 | d_instantiate(dentry, &vnode->vfs_inode); | 1049 | d_instantiate(dentry, &vnode->vfs_inode); |
1050 | key_put(key); | 1050 | key_put(key); |
1051 | _leave(" = 0"); | 1051 | _leave(" = 0"); |
diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 0931bc1325eb..757d664575dd 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c | |||
@@ -9,7 +9,6 @@ | |||
9 | * 2 of the License, or (at your option) any later version. | 9 | * 2 of the License, or (at your option) any later version. |
10 | */ | 10 | */ |
11 | 11 | ||
12 | #include <linux/smp_lock.h> | ||
13 | #include "internal.h" | 12 | #include "internal.h" |
14 | 13 | ||
15 | #define AFS_LOCK_GRANTED 0 | 14 | #define AFS_LOCK_GRANTED 0 |
@@ -274,7 +273,7 @@ static int afs_do_setlk(struct file *file, struct file_lock *fl) | |||
274 | 273 | ||
275 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; | 274 | type = (fl->fl_type == F_RDLCK) ? AFS_LOCK_READ : AFS_LOCK_WRITE; |
276 | 275 | ||
277 | lock_kernel(); | 276 | lock_flocks(); |
278 | 277 | ||
279 | /* make sure we've got a callback on this file and that our view of the | 278 | /* make sure we've got a callback on this file and that our view of the |
280 | * data version is up to date */ | 279 | * data version is up to date */ |
@@ -421,7 +420,7 @@ given_lock: | |||
421 | afs_vnode_fetch_status(vnode, NULL, key); | 420 | afs_vnode_fetch_status(vnode, NULL, key); |
422 | 421 | ||
423 | error: | 422 | error: |
424 | unlock_kernel(); | 423 | unlock_flocks(); |
425 | _leave(" = %d", ret); | 424 | _leave(" = %d", ret); |
426 | return ret; | 425 | return ret; |
427 | 426 | ||
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 6d552686c498..6153417caf57 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c | |||
@@ -29,6 +29,7 @@ static void afs_mntpt_expiry_timed_out(struct work_struct *work); | |||
29 | 29 | ||
30 | const struct file_operations afs_mntpt_file_operations = { | 30 | const struct file_operations afs_mntpt_file_operations = { |
31 | .open = afs_mntpt_open, | 31 | .open = afs_mntpt_open, |
32 | .llseek = noop_llseek, | ||
32 | }; | 33 | }; |
33 | 34 | ||
34 | const struct inode_operations afs_mntpt_inode_operations = { | 35 | const struct inode_operations afs_mntpt_inode_operations = { |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 77e1e5a61154..eacf76d98ae0 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
@@ -19,7 +19,6 @@ | |||
19 | #include <linux/mount.h> | 19 | #include <linux/mount.h> |
20 | #include <linux/init.h> | 20 | #include <linux/init.h> |
21 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
22 | #include <linux/smp_lock.h> | ||
23 | #include <linux/fs.h> | 22 | #include <linux/fs.h> |
24 | #include <linux/pagemap.h> | 23 | #include <linux/pagemap.h> |
25 | #include <linux/parser.h> | 24 | #include <linux/parser.h> |
@@ -453,12 +452,8 @@ static void afs_put_super(struct super_block *sb) | |||
453 | 452 | ||
454 | _enter(""); | 453 | _enter(""); |
455 | 454 | ||
456 | lock_kernel(); | ||
457 | |||
458 | afs_put_volume(as->volume); | 455 | afs_put_volume(as->volume); |
459 | 456 | ||
460 | unlock_kernel(); | ||
461 | |||
462 | _leave(""); | 457 | _leave(""); |
463 | } | 458 | } |
464 | 459 | ||
diff --git a/fs/afs/write.c b/fs/afs/write.c index 722743b152d8..15690bb1d3b5 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
@@ -438,7 +438,6 @@ no_more: | |||
438 | */ | 438 | */ |
439 | int afs_writepage(struct page *page, struct writeback_control *wbc) | 439 | int afs_writepage(struct page *page, struct writeback_control *wbc) |
440 | { | 440 | { |
441 | struct backing_dev_info *bdi = page->mapping->backing_dev_info; | ||
442 | struct afs_writeback *wb; | 441 | struct afs_writeback *wb; |
443 | int ret; | 442 | int ret; |
444 | 443 | ||
@@ -455,8 +454,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc) | |||
455 | } | 454 | } |
456 | 455 | ||
457 | wbc->nr_to_write -= ret; | 456 | wbc->nr_to_write -= ret; |
458 | if (wbc->nonblocking && bdi_write_congested(bdi)) | ||
459 | wbc->encountered_congestion = 1; | ||
460 | 457 | ||
461 | _leave(" = 0"); | 458 | _leave(" = 0"); |
462 | return 0; | 459 | return 0; |
@@ -469,7 +466,6 @@ static int afs_writepages_region(struct address_space *mapping, | |||
469 | struct writeback_control *wbc, | 466 | struct writeback_control *wbc, |
470 | pgoff_t index, pgoff_t end, pgoff_t *_next) | 467 | pgoff_t index, pgoff_t end, pgoff_t *_next) |
471 | { | 468 | { |
472 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
473 | struct afs_writeback *wb; | 469 | struct afs_writeback *wb; |
474 | struct page *page; | 470 | struct page *page; |
475 | int ret, n; | 471 | int ret, n; |
@@ -529,11 +525,6 @@ static int afs_writepages_region(struct address_space *mapping, | |||
529 | 525 | ||
530 | wbc->nr_to_write -= ret; | 526 | wbc->nr_to_write -= ret; |
531 | 527 | ||
532 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
533 | wbc->encountered_congestion = 1; | ||
534 | break; | ||
535 | } | ||
536 | |||
537 | cond_resched(); | 528 | cond_resched(); |
538 | } while (index < end && wbc->nr_to_write > 0); | 529 | } while (index < end && wbc->nr_to_write > 0); |
539 | 530 | ||
@@ -548,24 +539,16 @@ static int afs_writepages_region(struct address_space *mapping, | |||
548 | int afs_writepages(struct address_space *mapping, | 539 | int afs_writepages(struct address_space *mapping, |
549 | struct writeback_control *wbc) | 540 | struct writeback_control *wbc) |
550 | { | 541 | { |
551 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
552 | pgoff_t start, end, next; | 542 | pgoff_t start, end, next; |
553 | int ret; | 543 | int ret; |
554 | 544 | ||
555 | _enter(""); | 545 | _enter(""); |
556 | 546 | ||
557 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
558 | wbc->encountered_congestion = 1; | ||
559 | _leave(" = 0 [congest]"); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | if (wbc->range_cyclic) { | 547 | if (wbc->range_cyclic) { |
564 | start = mapping->writeback_index; | 548 | start = mapping->writeback_index; |
565 | end = -1; | 549 | end = -1; |
566 | ret = afs_writepages_region(mapping, wbc, start, end, &next); | 550 | ret = afs_writepages_region(mapping, wbc, start, end, &next); |
567 | if (start > 0 && wbc->nr_to_write > 0 && ret == 0 && | 551 | if (start > 0 && wbc->nr_to_write > 0 && ret == 0) |
568 | !(wbc->nonblocking && wbc->encountered_congestion)) | ||
569 | ret = afs_writepages_region(mapping, wbc, 0, start, | 552 | ret = afs_writepages_region(mapping, wbc, 0, start, |
570 | &next); | 553 | &next); |
571 | mapping->writeback_index = next; | 554 | mapping->writeback_index = next; |
@@ -712,8 +712,16 @@ static ssize_t aio_run_iocb(struct kiocb *iocb) | |||
712 | */ | 712 | */ |
713 | ret = retry(iocb); | 713 | ret = retry(iocb); |
714 | 714 | ||
715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) | 715 | if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) { |
716 | /* | ||
717 | * There's no easy way to restart the syscall since other AIO's | ||
718 | * may be already running. Just fail this IO with EINTR. | ||
719 | */ | ||
720 | if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || | ||
721 | ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK)) | ||
722 | ret = -EINTR; | ||
716 | aio_complete(iocb, ret, 0); | 723 | aio_complete(iocb, ret, 0); |
724 | } | ||
717 | out: | 725 | out: |
718 | spin_lock_irq(&ctx->ctx_lock); | 726 | spin_lock_irq(&ctx->ctx_lock); |
719 | 727 | ||
@@ -1535,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping, | |||
1535 | } | 1543 | } |
1536 | 1544 | ||
1537 | abe = mempool_alloc(abe_pool, GFP_KERNEL); | 1545 | abe = mempool_alloc(abe_pool, GFP_KERNEL); |
1538 | BUG_ON(!igrab(mapping->host)); | 1546 | |
1547 | /* | ||
1548 | * we should be using igrab here, but | ||
1549 | * we don't want to hammer on the global | ||
1550 | * inode spinlock just to take an extra | ||
1551 | * reference on a file that we must already | ||
1552 | * have a reference to. | ||
1553 | * | ||
1554 | * When we're called, we always have a reference | ||
1555 | * on the file, so we must always have a reference | ||
1556 | * on the inode, so ihold() is safe here. | ||
1557 | */ | ||
1558 | ihold(mapping->host); | ||
1539 | abe->mapping = mapping; | 1559 | abe->mapping = mapping; |
1540 | hlist_add_head(&abe->list, &batch_hash[bucket]); | 1560 | hlist_add_head(&abe->list, &batch_hash[bucket]); |
1541 | return; | 1561 | return; |
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e4b75d6eda83..5365527ca43f 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c | |||
@@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name, | |||
111 | path.mnt = mntget(anon_inode_mnt); | 111 | path.mnt = mntget(anon_inode_mnt); |
112 | /* | 112 | /* |
113 | * We know the anon_inode inode count is always greater than zero, | 113 | * We know the anon_inode inode count is always greater than zero, |
114 | * so we can avoid doing an igrab() and we can use an open-coded | 114 | * so ihold() is safe. |
115 | * atomic_inc(). | ||
116 | */ | 115 | */ |
117 | atomic_inc(&anon_inode_inode->i_count); | 116 | ihold(anon_inode_inode); |
118 | 117 | ||
119 | path.dentry->d_op = &anon_inodefs_dentry_operations; | 118 | path.dentry->d_op = &anon_inodefs_dentry_operations; |
120 | d_instantiate(path.dentry, anon_inode_inode); | 119 | d_instantiate(path.dentry, anon_inode_inode); |
@@ -194,6 +193,7 @@ static struct inode *anon_inode_mkinode(void) | |||
194 | if (!inode) | 193 | if (!inode) |
195 | return ERR_PTR(-ENOMEM); | 194 | return ERR_PTR(-ENOMEM); |
196 | 195 | ||
196 | inode->i_ino = get_next_ino(); | ||
197 | inode->i_fop = &anon_inode_fops; | 197 | inode->i_fop = &anon_inode_fops; |
198 | 198 | ||
199 | inode->i_mapping->a_ops = &anon_aops; | 199 | inode->i_mapping->a_ops = &anon_aops; |
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig index 5f3bea90911e..480e210c83ab 100644 --- a/fs/autofs/Kconfig +++ b/fs/autofs/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config AUTOFS_FS | 1 | config AUTOFS_FS |
2 | tristate "Kernel automounter support" | 2 | tristate "Kernel automounter support" |
3 | depends on BKL # unfixable, just use autofs4 | ||
3 | help | 4 | help |
4 | The automounter is a tool to automatically mount remote file systems | 5 | The automounter is a tool to automatically mount remote file systems |
5 | on demand. This implementation is partially kernel-based to reduce | 6 | on demand. This implementation is partially kernel-based to reduce |
diff --git a/fs/autofs/root.c b/fs/autofs/root.c index 11b1ea786d00..0c4ca81aeaeb 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c | |||
@@ -27,7 +27,9 @@ static int autofs_root_unlink(struct inode *,struct dentry *); | |||
27 | static int autofs_root_rmdir(struct inode *,struct dentry *); | 27 | static int autofs_root_rmdir(struct inode *,struct dentry *); |
28 | static int autofs_root_mkdir(struct inode *,struct dentry *,int); | 28 | static int autofs_root_mkdir(struct inode *,struct dentry *,int); |
29 | static long autofs_root_ioctl(struct file *,unsigned int,unsigned long); | 29 | static long autofs_root_ioctl(struct file *,unsigned int,unsigned long); |
30 | #ifdef CONFIG_COMPAT | ||
30 | static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long); | 31 | static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long); |
32 | #endif | ||
31 | 33 | ||
32 | const struct file_operations autofs_root_operations = { | 34 | const struct file_operations autofs_root_operations = { |
33 | .llseek = generic_file_llseek, | 35 | .llseek = generic_file_llseek, |
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index ba4a38b9c22f..eff9a419469a 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c | |||
@@ -724,6 +724,7 @@ static const struct file_operations _dev_ioctl_fops = { | |||
724 | .unlocked_ioctl = autofs_dev_ioctl, | 724 | .unlocked_ioctl = autofs_dev_ioctl, |
725 | .compat_ioctl = autofs_dev_ioctl_compat, | 725 | .compat_ioctl = autofs_dev_ioctl_compat, |
726 | .owner = THIS_MODULE, | 726 | .owner = THIS_MODULE, |
727 | .llseek = noop_llseek, | ||
727 | }; | 728 | }; |
728 | 729 | ||
729 | static struct miscdevice _autofs_dev_ioctl_misc = { | 730 | static struct miscdevice _autofs_dev_ioctl_misc = { |
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 821b2b955dac..ac87e49fa706 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, | |||
398 | inode->i_gid = sb->s_root->d_inode->i_gid; | 398 | inode->i_gid = sb->s_root->d_inode->i_gid; |
399 | } | 399 | } |
400 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 400 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
401 | inode->i_ino = get_next_ino(); | ||
401 | 402 | ||
402 | if (S_ISDIR(inf->mode)) { | 403 | if (S_ISDIR(inf->mode)) { |
403 | inode->i_nlink = 2; | 404 | inode->i_nlink = 2; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index cb1bd38dc08c..d5c1401f0031 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -19,7 +19,7 @@ | |||
19 | #include <linux/param.h> | 19 | #include <linux/param.h> |
20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
21 | #include <linux/compat.h> | 21 | #include <linux/compat.h> |
22 | #include <linux/smp_lock.h> | 22 | #include <linux/mutex.h> |
23 | 23 | ||
24 | #include "autofs_i.h" | 24 | #include "autofs_i.h" |
25 | 25 | ||
@@ -28,7 +28,9 @@ static int autofs4_dir_unlink(struct inode *,struct dentry *); | |||
28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); | 28 | static int autofs4_dir_rmdir(struct inode *,struct dentry *); |
29 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); | 29 | static int autofs4_dir_mkdir(struct inode *,struct dentry *,int); |
30 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); | 30 | static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long); |
31 | #ifdef CONFIG_COMPAT | ||
31 | static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); | 32 | static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long); |
33 | #endif | ||
32 | static int autofs4_dir_open(struct inode *inode, struct file *file); | 34 | static int autofs4_dir_open(struct inode *inode, struct file *file); |
33 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); | 35 | static struct dentry *autofs4_lookup(struct inode *,struct dentry *, struct nameidata *); |
34 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); | 36 | static void *autofs4_follow_link(struct dentry *, struct nameidata *); |
@@ -978,15 +980,17 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp, | |||
978 | } | 980 | } |
979 | } | 981 | } |
980 | 982 | ||
983 | static DEFINE_MUTEX(autofs4_ioctl_mutex); | ||
984 | |||
981 | static long autofs4_root_ioctl(struct file *filp, | 985 | static long autofs4_root_ioctl(struct file *filp, |
982 | unsigned int cmd, unsigned long arg) | 986 | unsigned int cmd, unsigned long arg) |
983 | { | 987 | { |
984 | long ret; | 988 | long ret; |
985 | struct inode *inode = filp->f_dentry->d_inode; | 989 | struct inode *inode = filp->f_dentry->d_inode; |
986 | 990 | ||
987 | lock_kernel(); | 991 | mutex_lock(&autofs4_ioctl_mutex); |
988 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 992 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
989 | unlock_kernel(); | 993 | mutex_unlock(&autofs4_ioctl_mutex); |
990 | 994 | ||
991 | return ret; | 995 | return ret; |
992 | } | 996 | } |
@@ -998,13 +1002,13 @@ static long autofs4_root_compat_ioctl(struct file *filp, | |||
998 | struct inode *inode = filp->f_path.dentry->d_inode; | 1002 | struct inode *inode = filp->f_path.dentry->d_inode; |
999 | int ret; | 1003 | int ret; |
1000 | 1004 | ||
1001 | lock_kernel(); | 1005 | mutex_lock(&autofs4_ioctl_mutex); |
1002 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) | 1006 | if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL) |
1003 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); | 1007 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg); |
1004 | else | 1008 | else |
1005 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, | 1009 | ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, |
1006 | (unsigned long)compat_ptr(arg)); | 1010 | (unsigned long)compat_ptr(arg)); |
1007 | unlock_kernel(); | 1011 | mutex_unlock(&autofs4_ioctl_mutex); |
1008 | 1012 | ||
1009 | return ret; | 1013 | return ret; |
1010 | } | 1014 | } |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index d967e052b779..685ecff3ab31 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir, | |||
176 | inc_nlink(inode); | 176 | inc_nlink(inode); |
177 | inode->i_ctime = CURRENT_TIME_SEC; | 177 | inode->i_ctime = CURRENT_TIME_SEC; |
178 | mark_inode_dirty(inode); | 178 | mark_inode_dirty(inode); |
179 | atomic_inc(&inode->i_count); | 179 | ihold(inode); |
180 | d_instantiate(new, inode); | 180 | d_instantiate(new, inode); |
181 | mutex_unlock(&info->bfs_lock); | 181 | mutex_unlock(&info->bfs_lock); |
182 | return 0; | 182 | return 0; |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index c4daf0f5fc02..883e77acd5a8 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
13 | #include <linux/init.h> | 13 | #include <linux/init.h> |
14 | #include <linux/fs.h> | 14 | #include <linux/fs.h> |
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
17 | #include <linux/vfs.h> | 16 | #include <linux/vfs.h> |
18 | #include <linux/writeback.h> | 17 | #include <linux/writeback.h> |
@@ -215,14 +214,10 @@ static void bfs_put_super(struct super_block *s) | |||
215 | if (!info) | 214 | if (!info) |
216 | return; | 215 | return; |
217 | 216 | ||
218 | lock_kernel(); | ||
219 | |||
220 | mutex_destroy(&info->bfs_lock); | 217 | mutex_destroy(&info->bfs_lock); |
221 | kfree(info->si_imap); | 218 | kfree(info->si_imap); |
222 | kfree(info); | 219 | kfree(info); |
223 | s->s_fs_info = NULL; | 220 | s->s_fs_info = NULL; |
224 | |||
225 | unlock_kernel(); | ||
226 | } | 221 | } |
227 | 222 | ||
228 | static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 223 | static int bfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index f96eff04e11a..a6395bdb26ae 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c | |||
@@ -134,10 +134,6 @@ static int aout_core_dump(struct coredump_params *cprm) | |||
134 | if (!dump_write(file, dump_start, dump_size)) | 134 | if (!dump_write(file, dump_start, dump_size)) |
135 | goto end_coredump; | 135 | goto end_coredump; |
136 | } | 136 | } |
137 | /* Finally dump the task struct. Not be used by gdb, but could be useful */ | ||
138 | set_fs(KERNEL_DS); | ||
139 | if (!dump_write(file, current, sizeof(*current))) | ||
140 | goto end_coredump; | ||
141 | end_coredump: | 137 | end_coredump: |
142 | set_fs(fs); | 138 | set_fs(fs); |
143 | return has_dumped; | 139 | return has_dumped; |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 535e763ab1a6..6884e198e0c7 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -800,7 +800,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
800 | * default mmap base, as well as whatever program they | 800 | * default mmap base, as well as whatever program they |
801 | * might try to exec. This is because the brk will | 801 | * might try to exec. This is because the brk will |
802 | * follow the loader, and is not movable. */ | 802 | * follow the loader, and is not movable. */ |
803 | #ifdef CONFIG_X86 | 803 | #if defined(CONFIG_X86) || defined(CONFIG_ARM) |
804 | load_bias = 0; | 804 | load_bias = 0; |
805 | #else | 805 | #else |
806 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); | 806 | load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr); |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index fd0cc0bf9a40..29990f0eee0c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) | |||
495 | struct inode * inode = new_inode(sb); | 495 | struct inode * inode = new_inode(sb); |
496 | 496 | ||
497 | if (inode) { | 497 | if (inode) { |
498 | inode->i_ino = get_next_ino(); | ||
498 | inode->i_mode = mode; | 499 | inode->i_mode = mode; |
499 | inode->i_atime = inode->i_mtime = inode->i_ctime = | 500 | inode->i_atime = inode->i_mtime = inode->i_ctime = |
500 | current_fs_time(inode->i_sb); | 501 | current_fs_time(inode->i_sb); |
@@ -576,6 +577,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer, | |||
576 | static const struct file_operations bm_entry_operations = { | 577 | static const struct file_operations bm_entry_operations = { |
577 | .read = bm_entry_read, | 578 | .read = bm_entry_read, |
578 | .write = bm_entry_write, | 579 | .write = bm_entry_write, |
580 | .llseek = default_llseek, | ||
579 | }; | 581 | }; |
580 | 582 | ||
581 | /* /register */ | 583 | /* /register */ |
@@ -643,6 +645,7 @@ out: | |||
643 | 645 | ||
644 | static const struct file_operations bm_register_operations = { | 646 | static const struct file_operations bm_register_operations = { |
645 | .write = bm_register_write, | 647 | .write = bm_register_write, |
648 | .llseek = noop_llseek, | ||
646 | }; | 649 | }; |
647 | 650 | ||
648 | /* /status */ | 651 | /* /status */ |
@@ -680,6 +683,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer, | |||
680 | static const struct file_operations bm_status_operations = { | 683 | static const struct file_operations bm_status_operations = { |
681 | .read = bm_status_read, | 684 | .read = bm_status_read, |
682 | .write = bm_status_write, | 685 | .write = bm_status_write, |
686 | .llseek = default_llseek, | ||
683 | }; | 687 | }; |
684 | 688 | ||
685 | /* Superblock handling */ | 689 | /* Superblock handling */ |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 50e8c8582faa..dea3b628a6ce 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -48,6 +48,21 @@ inline struct block_device *I_BDEV(struct inode *inode) | |||
48 | 48 | ||
49 | EXPORT_SYMBOL(I_BDEV); | 49 | EXPORT_SYMBOL(I_BDEV); |
50 | 50 | ||
51 | /* | ||
52 | * move the inode from it's current bdi to the a new bdi. if the inode is dirty | ||
53 | * we need to move it onto the dirty list of @dst so that the inode is always | ||
54 | * on the right list. | ||
55 | */ | ||
56 | static void bdev_inode_switch_bdi(struct inode *inode, | ||
57 | struct backing_dev_info *dst) | ||
58 | { | ||
59 | spin_lock(&inode_lock); | ||
60 | inode->i_data.backing_dev_info = dst; | ||
61 | if (inode->i_state & I_DIRTY) | ||
62 | list_move(&inode->i_wb_list, &dst->wb.b_dirty); | ||
63 | spin_unlock(&inode_lock); | ||
64 | } | ||
65 | |||
51 | static sector_t max_block(struct block_device *bdev) | 66 | static sector_t max_block(struct block_device *bdev) |
52 | { | 67 | { |
53 | sector_t retval = ~((sector_t)0); | 68 | sector_t retval = ~((sector_t)0); |
@@ -370,7 +385,7 @@ int blkdev_fsync(struct file *filp, int datasync) | |||
370 | */ | 385 | */ |
371 | mutex_unlock(&bd_inode->i_mutex); | 386 | mutex_unlock(&bd_inode->i_mutex); |
372 | 387 | ||
373 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL, BLKDEV_IFL_WAIT); | 388 | error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); |
374 | if (error == -EOPNOTSUPP) | 389 | if (error == -EOPNOTSUPP) |
375 | error = 0; | 390 | error = 0; |
376 | 391 | ||
@@ -550,7 +565,7 @@ EXPORT_SYMBOL(bdget); | |||
550 | */ | 565 | */ |
551 | struct block_device *bdgrab(struct block_device *bdev) | 566 | struct block_device *bdgrab(struct block_device *bdev) |
552 | { | 567 | { |
553 | atomic_inc(&bdev->bd_inode->i_count); | 568 | ihold(bdev->bd_inode); |
554 | return bdev; | 569 | return bdev; |
555 | } | 570 | } |
556 | 571 | ||
@@ -580,7 +595,7 @@ static struct block_device *bd_acquire(struct inode *inode) | |||
580 | spin_lock(&bdev_lock); | 595 | spin_lock(&bdev_lock); |
581 | bdev = inode->i_bdev; | 596 | bdev = inode->i_bdev; |
582 | if (bdev) { | 597 | if (bdev) { |
583 | atomic_inc(&bdev->bd_inode->i_count); | 598 | ihold(bdev->bd_inode); |
584 | spin_unlock(&bdev_lock); | 599 | spin_unlock(&bdev_lock); |
585 | return bdev; | 600 | return bdev; |
586 | } | 601 | } |
@@ -591,12 +606,12 @@ static struct block_device *bd_acquire(struct inode *inode) | |||
591 | spin_lock(&bdev_lock); | 606 | spin_lock(&bdev_lock); |
592 | if (!inode->i_bdev) { | 607 | if (!inode->i_bdev) { |
593 | /* | 608 | /* |
594 | * We take an additional bd_inode->i_count for inode, | 609 | * We take an additional reference to bd_inode, |
595 | * and it's released in clear_inode() of inode. | 610 | * and it's released in clear_inode() of inode. |
596 | * So, we can access it via ->i_mapping always | 611 | * So, we can access it via ->i_mapping always |
597 | * without igrab(). | 612 | * without igrab(). |
598 | */ | 613 | */ |
599 | atomic_inc(&bdev->bd_inode->i_count); | 614 | ihold(bdev->bd_inode); |
600 | inode->i_bdev = bdev; | 615 | inode->i_bdev = bdev; |
601 | inode->i_mapping = bdev->bd_inode->i_mapping; | 616 | inode->i_mapping = bdev->bd_inode->i_mapping; |
602 | list_add(&inode->i_devices, &bdev->bd_inodes); | 617 | list_add(&inode->i_devices, &bdev->bd_inodes); |
@@ -1390,7 +1405,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1390 | bdi = blk_get_backing_dev_info(bdev); | 1405 | bdi = blk_get_backing_dev_info(bdev); |
1391 | if (bdi == NULL) | 1406 | if (bdi == NULL) |
1392 | bdi = &default_backing_dev_info; | 1407 | bdi = &default_backing_dev_info; |
1393 | bdev->bd_inode->i_data.backing_dev_info = bdi; | 1408 | bdev_inode_switch_bdi(bdev->bd_inode, bdi); |
1394 | } | 1409 | } |
1395 | if (bdev->bd_invalidated) | 1410 | if (bdev->bd_invalidated) |
1396 | rescan_partitions(disk, bdev); | 1411 | rescan_partitions(disk, bdev); |
@@ -1405,8 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1405 | if (ret) | 1420 | if (ret) |
1406 | goto out_clear; | 1421 | goto out_clear; |
1407 | bdev->bd_contains = whole; | 1422 | bdev->bd_contains = whole; |
1408 | bdev->bd_inode->i_data.backing_dev_info = | 1423 | bdev_inode_switch_bdi(bdev->bd_inode, |
1409 | whole->bd_inode->i_data.backing_dev_info; | 1424 | whole->bd_inode->i_data.backing_dev_info); |
1410 | bdev->bd_part = disk_get_part(disk, partno); | 1425 | bdev->bd_part = disk_get_part(disk, partno); |
1411 | if (!(disk->flags & GENHD_FL_UP) || | 1426 | if (!(disk->flags & GENHD_FL_UP) || |
1412 | !bdev->bd_part || !bdev->bd_part->nr_sects) { | 1427 | !bdev->bd_part || !bdev->bd_part->nr_sects) { |
@@ -1439,7 +1454,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) | |||
1439 | disk_put_part(bdev->bd_part); | 1454 | disk_put_part(bdev->bd_part); |
1440 | bdev->bd_disk = NULL; | 1455 | bdev->bd_disk = NULL; |
1441 | bdev->bd_part = NULL; | 1456 | bdev->bd_part = NULL; |
1442 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1457 | bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info); |
1443 | if (bdev != bdev->bd_contains) | 1458 | if (bdev != bdev->bd_contains) |
1444 | __blkdev_put(bdev->bd_contains, mode, 1); | 1459 | __blkdev_put(bdev->bd_contains, mode, 1); |
1445 | bdev->bd_contains = NULL; | 1460 | bdev->bd_contains = NULL; |
@@ -1533,7 +1548,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) | |||
1533 | disk_put_part(bdev->bd_part); | 1548 | disk_put_part(bdev->bd_part); |
1534 | bdev->bd_part = NULL; | 1549 | bdev->bd_part = NULL; |
1535 | bdev->bd_disk = NULL; | 1550 | bdev->bd_disk = NULL; |
1536 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1551 | bdev_inode_switch_bdi(bdev->bd_inode, |
1552 | &default_backing_dev_info); | ||
1537 | if (bdev != bdev->bd_contains) | 1553 | if (bdev != bdev->bd_contains) |
1538 | victim = bdev->bd_contains; | 1554 | victim = bdev->bd_contains; |
1539 | bdev->bd_contains = NULL; | 1555 | bdev->bd_contains = NULL; |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 64f10082f048..5e789f4a3ed0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -2063,7 +2063,7 @@ static void btrfs_end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
2063 | if (uptodate) { | 2063 | if (uptodate) { |
2064 | set_buffer_uptodate(bh); | 2064 | set_buffer_uptodate(bh); |
2065 | } else { | 2065 | } else { |
2066 | if (!buffer_eopnotsupp(bh) && printk_ratelimit()) { | 2066 | if (printk_ratelimit()) { |
2067 | printk(KERN_WARNING "lost page write due to " | 2067 | printk(KERN_WARNING "lost page write due to " |
2068 | "I/O error on %s\n", | 2068 | "I/O error on %s\n", |
2069 | bdevname(bh->b_bdev, b)); | 2069 | bdevname(bh->b_bdev, b)); |
@@ -2200,21 +2200,10 @@ static int write_dev_supers(struct btrfs_device *device, | |||
2200 | bh->b_end_io = btrfs_end_buffer_write_sync; | 2200 | bh->b_end_io = btrfs_end_buffer_write_sync; |
2201 | } | 2201 | } |
2202 | 2202 | ||
2203 | if (i == last_barrier && do_barriers && device->barriers) { | 2203 | if (i == last_barrier && do_barriers) |
2204 | ret = submit_bh(WRITE_BARRIER, bh); | 2204 | ret = submit_bh(WRITE_FLUSH_FUA, bh); |
2205 | if (ret == -EOPNOTSUPP) { | 2205 | else |
2206 | printk("btrfs: disabling barriers on dev %s\n", | ||
2207 | device->name); | ||
2208 | set_buffer_uptodate(bh); | ||
2209 | device->barriers = 0; | ||
2210 | /* one reference for submit_bh */ | ||
2211 | get_bh(bh); | ||
2212 | lock_buffer(bh); | ||
2213 | ret = submit_bh(WRITE_SYNC, bh); | ||
2214 | } | ||
2215 | } else { | ||
2216 | ret = submit_bh(WRITE_SYNC, bh); | 2206 | ret = submit_bh(WRITE_SYNC, bh); |
2217 | } | ||
2218 | 2207 | ||
2219 | if (ret) | 2208 | if (ret) |
2220 | errors++; | 2209 | errors++; |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32d094002a57..0b81ecdb101c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1695,8 +1695,7 @@ static int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
1695 | static void btrfs_issue_discard(struct block_device *bdev, | 1695 | static void btrfs_issue_discard(struct block_device *bdev, |
1696 | u64 start, u64 len) | 1696 | u64 start, u64 len) |
1697 | { | 1697 | { |
1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, | 1698 | blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 0); |
1699 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
1700 | } | 1699 | } |
1701 | 1700 | ||
1702 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | 1701 | static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c03864406af3..64f99cf69ce0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -3849,7 +3849,7 @@ again: | |||
3849 | p = &root->inode_tree.rb_node; | 3849 | p = &root->inode_tree.rb_node; |
3850 | parent = NULL; | 3850 | parent = NULL; |
3851 | 3851 | ||
3852 | if (hlist_unhashed(&inode->i_hash)) | 3852 | if (inode_unhashed(inode)) |
3853 | return; | 3853 | return; |
3854 | 3854 | ||
3855 | spin_lock(&root->inode_lock); | 3855 | spin_lock(&root->inode_lock); |
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4758 | } | 4758 | } |
4759 | 4759 | ||
4760 | btrfs_set_trans_block_group(trans, dir); | 4760 | btrfs_set_trans_block_group(trans, dir); |
4761 | atomic_inc(&inode->i_count); | 4761 | ihold(inode); |
4762 | 4762 | ||
4763 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); | 4763 | err = btrfs_add_nondir(trans, dentry, inode, 1, index); |
4764 | 4764 | ||
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1776dbd8dc98..144f8a5730f5 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -815,6 +815,7 @@ static const struct file_operations btrfs_ctl_fops = { | |||
815 | .unlocked_ioctl = btrfs_control_ioctl, | 815 | .unlocked_ioctl = btrfs_control_ioctl, |
816 | .compat_ioctl = btrfs_control_ioctl, | 816 | .compat_ioctl = btrfs_control_ioctl, |
817 | .owner = THIS_MODULE, | 817 | .owner = THIS_MODULE, |
818 | .llseek = noop_llseek, | ||
818 | }; | 819 | }; |
819 | 820 | ||
820 | static struct miscdevice btrfs_misc = { | 821 | static struct miscdevice btrfs_misc = { |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index dd318ff280b2..e25e46a8b4e2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -398,7 +398,6 @@ static noinline int device_list_add(const char *path, | |||
398 | device->work.func = pending_bios_fn; | 398 | device->work.func = pending_bios_fn; |
399 | memcpy(device->uuid, disk_super->dev_item.uuid, | 399 | memcpy(device->uuid, disk_super->dev_item.uuid, |
400 | BTRFS_UUID_SIZE); | 400 | BTRFS_UUID_SIZE); |
401 | device->barriers = 1; | ||
402 | spin_lock_init(&device->io_lock); | 401 | spin_lock_init(&device->io_lock); |
403 | device->name = kstrdup(path, GFP_NOFS); | 402 | device->name = kstrdup(path, GFP_NOFS); |
404 | if (!device->name) { | 403 | if (!device->name) { |
@@ -462,7 +461,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) | |||
462 | device->devid = orig_dev->devid; | 461 | device->devid = orig_dev->devid; |
463 | device->work.func = pending_bios_fn; | 462 | device->work.func = pending_bios_fn; |
464 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); | 463 | memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); |
465 | device->barriers = 1; | ||
466 | spin_lock_init(&device->io_lock); | 464 | spin_lock_init(&device->io_lock); |
467 | INIT_LIST_HEAD(&device->dev_list); | 465 | INIT_LIST_HEAD(&device->dev_list); |
468 | INIT_LIST_HEAD(&device->dev_alloc_list); | 466 | INIT_LIST_HEAD(&device->dev_alloc_list); |
@@ -1489,7 +1487,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1489 | trans = btrfs_start_transaction(root, 0); | 1487 | trans = btrfs_start_transaction(root, 0); |
1490 | lock_chunks(root); | 1488 | lock_chunks(root); |
1491 | 1489 | ||
1492 | device->barriers = 1; | ||
1493 | device->writeable = 1; | 1490 | device->writeable = 1; |
1494 | device->work.func = pending_bios_fn; | 1491 | device->work.func = pending_bios_fn; |
1495 | generate_random_uuid(device->uuid); | 1492 | generate_random_uuid(device->uuid); |
@@ -3084,7 +3081,6 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, | |||
3084 | return NULL; | 3081 | return NULL; |
3085 | list_add(&device->dev_list, | 3082 | list_add(&device->dev_list, |
3086 | &fs_devices->devices); | 3083 | &fs_devices->devices); |
3087 | device->barriers = 1; | ||
3088 | device->dev_root = root->fs_info->dev_root; | 3084 | device->dev_root = root->fs_info->dev_root; |
3089 | device->devid = devid; | 3085 | device->devid = devid; |
3090 | device->work.func = pending_bios_fn; | 3086 | device->work.func = pending_bios_fn; |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 31b0fabdd2ea..2b638b6e4eea 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -42,7 +42,6 @@ struct btrfs_device { | |||
42 | int running_pending; | 42 | int running_pending; |
43 | u64 generation; | 43 | u64 generation; |
44 | 44 | ||
45 | int barriers; | ||
46 | int writeable; | 45 | int writeable; |
47 | int in_fs_metadata; | 46 | int in_fs_metadata; |
48 | 47 | ||
diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca279d1c..5930e382959b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) | |||
156 | if (uptodate) { | 156 | if (uptodate) { |
157 | set_buffer_uptodate(bh); | 157 | set_buffer_uptodate(bh); |
158 | } else { | 158 | } else { |
159 | if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { | 159 | if (!quiet_error(bh)) { |
160 | buffer_io_error(bh); | 160 | buffer_io_error(bh); |
161 | printk(KERN_WARNING "lost page write due to " | 161 | printk(KERN_WARNING "lost page write due to " |
162 | "I/O error on %s\n", | 162 | "I/O error on %s\n", |
@@ -905,7 +905,6 @@ try_again: | |||
905 | 905 | ||
906 | bh->b_state = 0; | 906 | bh->b_state = 0; |
907 | atomic_set(&bh->b_count, 0); | 907 | atomic_set(&bh->b_count, 0); |
908 | bh->b_private = NULL; | ||
909 | bh->b_size = size; | 908 | bh->b_size = size; |
910 | 909 | ||
911 | /* Link the buffer to its page */ | 910 | /* Link the buffer to its page */ |
@@ -1706,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page, | |||
1706 | * and kswapd activity, but those code paths have their own | 1705 | * and kswapd activity, but those code paths have their own |
1707 | * higher-level throttling. | 1706 | * higher-level throttling. |
1708 | */ | 1707 | */ |
1709 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { | 1708 | if (wbc->sync_mode != WB_SYNC_NONE) { |
1710 | lock_buffer(bh); | 1709 | lock_buffer(bh); |
1711 | } else if (!trylock_buffer(bh)) { | 1710 | } else if (!trylock_buffer(bh)) { |
1712 | redirty_page_for_writepage(wbc, page); | 1711 | redirty_page_for_writepage(wbc, page); |
@@ -1834,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) | |||
1834 | } | 1833 | } |
1835 | EXPORT_SYMBOL(page_zero_new_buffers); | 1834 | EXPORT_SYMBOL(page_zero_new_buffers); |
1836 | 1835 | ||
1837 | int block_prepare_write(struct page *page, unsigned from, unsigned to, | 1836 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, |
1838 | get_block_t *get_block) | 1837 | get_block_t *get_block) |
1839 | { | 1838 | { |
1839 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | ||
1840 | unsigned to = from + len; | ||
1840 | struct inode *inode = page->mapping->host; | 1841 | struct inode *inode = page->mapping->host; |
1841 | unsigned block_start, block_end; | 1842 | unsigned block_start, block_end; |
1842 | sector_t block; | 1843 | sector_t block; |
@@ -1916,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to, | |||
1916 | } | 1917 | } |
1917 | return err; | 1918 | return err; |
1918 | } | 1919 | } |
1919 | EXPORT_SYMBOL(block_prepare_write); | 1920 | EXPORT_SYMBOL(__block_write_begin); |
1920 | 1921 | ||
1921 | static int __block_commit_write(struct inode *inode, struct page *page, | 1922 | static int __block_commit_write(struct inode *inode, struct page *page, |
1922 | unsigned from, unsigned to) | 1923 | unsigned from, unsigned to) |
@@ -1953,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page, | |||
1953 | return 0; | 1954 | return 0; |
1954 | } | 1955 | } |
1955 | 1956 | ||
1956 | int __block_write_begin(struct page *page, loff_t pos, unsigned len, | ||
1957 | get_block_t *get_block) | ||
1958 | { | ||
1959 | unsigned start = pos & (PAGE_CACHE_SIZE - 1); | ||
1960 | |||
1961 | return block_prepare_write(page, start, start + len, get_block); | ||
1962 | } | ||
1963 | EXPORT_SYMBOL(__block_write_begin); | ||
1964 | |||
1965 | /* | 1957 | /* |
1966 | * block_write_begin takes care of the basic task of block allocation and | 1958 | * block_write_begin takes care of the basic task of block allocation and |
1967 | * bringing partial write blocks uptodate first. | 1959 | * bringing partial write blocks uptodate first. |
@@ -2379,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, | |||
2379 | else | 2371 | else |
2380 | end = PAGE_CACHE_SIZE; | 2372 | end = PAGE_CACHE_SIZE; |
2381 | 2373 | ||
2382 | ret = block_prepare_write(page, 0, end, get_block); | 2374 | ret = __block_write_begin(page, 0, end, get_block); |
2383 | if (!ret) | 2375 | if (!ret) |
2384 | ret = block_commit_write(page, 0, end); | 2376 | ret = block_commit_write(page, 0, end); |
2385 | 2377 | ||
@@ -2466,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping, | |||
2466 | *fsdata = NULL; | 2458 | *fsdata = NULL; |
2467 | 2459 | ||
2468 | if (page_has_buffers(page)) { | 2460 | if (page_has_buffers(page)) { |
2469 | unlock_page(page); | 2461 | ret = __block_write_begin(page, pos, len, get_block); |
2470 | page_cache_release(page); | 2462 | if (unlikely(ret)) |
2471 | *pagep = NULL; | 2463 | goto out_release; |
2472 | return block_write_begin(mapping, pos, len, flags, pagep, | 2464 | return ret; |
2473 | get_block); | ||
2474 | } | 2465 | } |
2475 | 2466 | ||
2476 | if (PageMappedToDisk(page)) | 2467 | if (PageMappedToDisk(page)) |
@@ -2891,7 +2882,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) | |||
2891 | 2882 | ||
2892 | if (err == -EOPNOTSUPP) { | 2883 | if (err == -EOPNOTSUPP) { |
2893 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); | 2884 | set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); |
2894 | set_bit(BH_Eopnotsupp, &bh->b_state); | ||
2895 | } | 2885 | } |
2896 | 2886 | ||
2897 | if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) | 2887 | if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) |
@@ -3031,10 +3021,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) | |||
3031 | bh->b_end_io = end_buffer_write_sync; | 3021 | bh->b_end_io = end_buffer_write_sync; |
3032 | ret = submit_bh(rw, bh); | 3022 | ret = submit_bh(rw, bh); |
3033 | wait_on_buffer(bh); | 3023 | wait_on_buffer(bh); |
3034 | if (buffer_eopnotsupp(bh)) { | ||
3035 | clear_buffer_eopnotsupp(bh); | ||
3036 | ret = -EOPNOTSUPP; | ||
3037 | } | ||
3038 | if (!ret && !buffer_uptodate(bh)) | 3024 | if (!ret && !buffer_uptodate(bh)) |
3039 | ret = -EIO; | 3025 | ret = -EIO; |
3040 | } else { | 3026 | } else { |
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 727caedcdd92..0a1467b15516 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c | |||
@@ -55,6 +55,7 @@ const struct file_operations cachefiles_daemon_fops = { | |||
55 | .read = cachefiles_daemon_read, | 55 | .read = cachefiles_daemon_read, |
56 | .write = cachefiles_daemon_write, | 56 | .write = cachefiles_daemon_write, |
57 | .poll = cachefiles_daemon_poll, | 57 | .poll = cachefiles_daemon_poll, |
58 | .llseek = noop_llseek, | ||
58 | }; | 59 | }; |
59 | 60 | ||
60 | struct cachefiles_daemon_cmd { | 61 | struct cachefiles_daemon_cmd { |
diff --git a/fs/ceph/Kconfig b/fs/ceph/Kconfig index bc87b9c1d27e..9eb134ea6eb2 100644 --- a/fs/ceph/Kconfig +++ b/fs/ceph/Kconfig | |||
@@ -1,8 +1,11 @@ | |||
1 | config CEPH_FS | 1 | config CEPH_FS |
2 | tristate "Ceph distributed file system (EXPERIMENTAL)" | 2 | tristate "Ceph distributed file system (EXPERIMENTAL)" |
3 | depends on INET && EXPERIMENTAL | 3 | depends on INET && EXPERIMENTAL |
4 | select CEPH_LIB | ||
4 | select LIBCRC32C | 5 | select LIBCRC32C |
5 | select CRYPTO_AES | 6 | select CRYPTO_AES |
7 | select CRYPTO | ||
8 | default n | ||
6 | help | 9 | help |
7 | Choose Y or M here to include support for mounting the | 10 | Choose Y or M here to include support for mounting the |
8 | experimental Ceph distributed file system. Ceph is an extremely | 11 | experimental Ceph distributed file system. Ceph is an extremely |
@@ -13,15 +16,3 @@ config CEPH_FS | |||
13 | 16 | ||
14 | If unsure, say N. | 17 | If unsure, say N. |
15 | 18 | ||
16 | config CEPH_FS_PRETTYDEBUG | ||
17 | bool "Include file:line in ceph debug output" | ||
18 | depends on CEPH_FS | ||
19 | default n | ||
20 | help | ||
21 | If you say Y here, debug output will include a filename and | ||
22 | line to aid debugging. This icnreases kernel size and slows | ||
23 | execution slightly when debug call sites are enabled (e.g., | ||
24 | via CONFIG_DYNAMIC_DEBUG). | ||
25 | |||
26 | If unsure, say N. | ||
27 | |||
diff --git a/fs/ceph/Makefile b/fs/ceph/Makefile index 278e1172600d..9e6c4f2e8ff1 100644 --- a/fs/ceph/Makefile +++ b/fs/ceph/Makefile | |||
@@ -8,15 +8,8 @@ obj-$(CONFIG_CEPH_FS) += ceph.o | |||
8 | 8 | ||
9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ | 9 | ceph-objs := super.o inode.o dir.o file.o locks.o addr.o ioctl.o \ |
10 | export.o caps.o snap.o xattr.o \ | 10 | export.o caps.o snap.o xattr.o \ |
11 | messenger.o msgpool.o buffer.o pagelist.o \ | 11 | mds_client.o mdsmap.o strings.o ceph_frag.o \ |
12 | mds_client.o mdsmap.o \ | 12 | debugfs.o |
13 | mon_client.o \ | ||
14 | osd_client.o osdmap.o crush/crush.o crush/mapper.o crush/hash.o \ | ||
15 | debugfs.o \ | ||
16 | auth.o auth_none.o \ | ||
17 | crypto.o armor.o \ | ||
18 | auth_x.o \ | ||
19 | ceph_fs.o ceph_strings.o ceph_hash.o ceph_frag.o | ||
20 | 13 | ||
21 | else | 14 | else |
22 | #Otherwise we were called directly from the command | 15 | #Otherwise we were called directly from the command |
diff --git a/fs/ceph/README b/fs/ceph/README deleted file mode 100644 index 18352fab37c0..000000000000 --- a/fs/ceph/README +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | # | ||
2 | # The following files are shared by (and manually synchronized | ||
3 | # between) the Ceph userland and kernel client. | ||
4 | # | ||
5 | # userland kernel | ||
6 | src/include/ceph_fs.h fs/ceph/ceph_fs.h | ||
7 | src/include/ceph_fs.cc fs/ceph/ceph_fs.c | ||
8 | src/include/msgr.h fs/ceph/msgr.h | ||
9 | src/include/rados.h fs/ceph/rados.h | ||
10 | src/include/ceph_strings.cc fs/ceph/ceph_strings.c | ||
11 | src/include/ceph_frag.h fs/ceph/ceph_frag.h | ||
12 | src/include/ceph_frag.cc fs/ceph/ceph_frag.c | ||
13 | src/include/ceph_hash.h fs/ceph/ceph_hash.h | ||
14 | src/include/ceph_hash.cc fs/ceph/ceph_hash.c | ||
15 | src/crush/crush.c fs/ceph/crush/crush.c | ||
16 | src/crush/crush.h fs/ceph/crush/crush.h | ||
17 | src/crush/mapper.c fs/ceph/crush/mapper.c | ||
18 | src/crush/mapper.h fs/ceph/crush/mapper.h | ||
19 | src/crush/hash.h fs/ceph/crush/hash.h | ||
20 | src/crush/hash.c fs/ceph/crush/hash.c | ||
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 4cfce1ee31fa..e9c874abc9e1 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/backing-dev.h> | 3 | #include <linux/backing-dev.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
@@ -10,7 +10,8 @@ | |||
10 | #include <linux/task_io_accounting_ops.h> | 10 | #include <linux/task_io_accounting_ops.h> |
11 | 11 | ||
12 | #include "super.h" | 12 | #include "super.h" |
13 | #include "osd_client.h" | 13 | #include "mds_client.h" |
14 | #include <linux/ceph/osd_client.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Ceph address space ops. | 17 | * Ceph address space ops. |
@@ -193,7 +194,8 @@ static int readpage_nounlock(struct file *filp, struct page *page) | |||
193 | { | 194 | { |
194 | struct inode *inode = filp->f_dentry->d_inode; | 195 | struct inode *inode = filp->f_dentry->d_inode; |
195 | struct ceph_inode_info *ci = ceph_inode(inode); | 196 | struct ceph_inode_info *ci = ceph_inode(inode); |
196 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 197 | struct ceph_osd_client *osdc = |
198 | &ceph_inode_to_client(inode)->client->osdc; | ||
197 | int err = 0; | 199 | int err = 0; |
198 | u64 len = PAGE_CACHE_SIZE; | 200 | u64 len = PAGE_CACHE_SIZE; |
199 | 201 | ||
@@ -265,7 +267,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping, | |||
265 | { | 267 | { |
266 | struct inode *inode = file->f_dentry->d_inode; | 268 | struct inode *inode = file->f_dentry->d_inode; |
267 | struct ceph_inode_info *ci = ceph_inode(inode); | 269 | struct ceph_inode_info *ci = ceph_inode(inode); |
268 | struct ceph_osd_client *osdc = &ceph_inode_to_client(inode)->osdc; | 270 | struct ceph_osd_client *osdc = |
271 | &ceph_inode_to_client(inode)->client->osdc; | ||
269 | int rc = 0; | 272 | int rc = 0; |
270 | struct page **pages; | 273 | struct page **pages; |
271 | loff_t offset; | 274 | loff_t offset; |
@@ -365,7 +368,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
365 | { | 368 | { |
366 | struct inode *inode; | 369 | struct inode *inode; |
367 | struct ceph_inode_info *ci; | 370 | struct ceph_inode_info *ci; |
368 | struct ceph_client *client; | 371 | struct ceph_fs_client *fsc; |
369 | struct ceph_osd_client *osdc; | 372 | struct ceph_osd_client *osdc; |
370 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; | 373 | loff_t page_off = page->index << PAGE_CACHE_SHIFT; |
371 | int len = PAGE_CACHE_SIZE; | 374 | int len = PAGE_CACHE_SIZE; |
@@ -383,8 +386,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
383 | } | 386 | } |
384 | inode = page->mapping->host; | 387 | inode = page->mapping->host; |
385 | ci = ceph_inode(inode); | 388 | ci = ceph_inode(inode); |
386 | client = ceph_inode_to_client(inode); | 389 | fsc = ceph_inode_to_client(inode); |
387 | osdc = &client->osdc; | 390 | osdc = &fsc->client->osdc; |
388 | 391 | ||
389 | /* verify this is a writeable snap context */ | 392 | /* verify this is a writeable snap context */ |
390 | snapc = (void *)page->private; | 393 | snapc = (void *)page->private; |
@@ -411,13 +414,13 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) | |||
411 | if (i_size < page_off + len) | 414 | if (i_size < page_off + len) |
412 | len = i_size - page_off; | 415 | len = i_size - page_off; |
413 | 416 | ||
414 | dout("writepage %p page %p index %lu on %llu~%u\n", | 417 | dout("writepage %p page %p index %lu on %llu~%u snapc %p\n", |
415 | inode, page, page->index, page_off, len); | 418 | inode, page, page->index, page_off, len, snapc); |
416 | 419 | ||
417 | writeback_stat = atomic_long_inc_return(&client->writeback_count); | 420 | writeback_stat = atomic_long_inc_return(&fsc->writeback_count); |
418 | if (writeback_stat > | 421 | if (writeback_stat > |
419 | CONGESTION_ON_THRESH(client->mount_args->congestion_kb)) | 422 | CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb)) |
420 | set_bdi_congested(&client->backing_dev_info, BLK_RW_ASYNC); | 423 | set_bdi_congested(&fsc->backing_dev_info, BLK_RW_ASYNC); |
421 | 424 | ||
422 | set_page_writeback(page); | 425 | set_page_writeback(page); |
423 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), | 426 | err = ceph_osdc_writepages(osdc, ceph_vino(inode), |
@@ -496,7 +499,7 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
496 | struct address_space *mapping = inode->i_mapping; | 499 | struct address_space *mapping = inode->i_mapping; |
497 | __s32 rc = -EIO; | 500 | __s32 rc = -EIO; |
498 | u64 bytes = 0; | 501 | u64 bytes = 0; |
499 | struct ceph_client *client = ceph_inode_to_client(inode); | 502 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
500 | long writeback_stat; | 503 | long writeback_stat; |
501 | unsigned issued = ceph_caps_issued(ci); | 504 | unsigned issued = ceph_caps_issued(ci); |
502 | 505 | ||
@@ -529,10 +532,10 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
529 | WARN_ON(!PageUptodate(page)); | 532 | WARN_ON(!PageUptodate(page)); |
530 | 533 | ||
531 | writeback_stat = | 534 | writeback_stat = |
532 | atomic_long_dec_return(&client->writeback_count); | 535 | atomic_long_dec_return(&fsc->writeback_count); |
533 | if (writeback_stat < | 536 | if (writeback_stat < |
534 | CONGESTION_OFF_THRESH(client->mount_args->congestion_kb)) | 537 | CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb)) |
535 | clear_bdi_congested(&client->backing_dev_info, | 538 | clear_bdi_congested(&fsc->backing_dev_info, |
536 | BLK_RW_ASYNC); | 539 | BLK_RW_ASYNC); |
537 | 540 | ||
538 | ceph_put_snap_context((void *)page->private); | 541 | ceph_put_snap_context((void *)page->private); |
@@ -569,13 +572,13 @@ static void writepages_finish(struct ceph_osd_request *req, | |||
569 | * mempool. we avoid the mempool if we can because req->r_num_pages | 572 | * mempool. we avoid the mempool if we can because req->r_num_pages |
570 | * may be less than the maximum write size. | 573 | * may be less than the maximum write size. |
571 | */ | 574 | */ |
572 | static void alloc_page_vec(struct ceph_client *client, | 575 | static void alloc_page_vec(struct ceph_fs_client *fsc, |
573 | struct ceph_osd_request *req) | 576 | struct ceph_osd_request *req) |
574 | { | 577 | { |
575 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, | 578 | req->r_pages = kmalloc(sizeof(struct page *) * req->r_num_pages, |
576 | GFP_NOFS); | 579 | GFP_NOFS); |
577 | if (!req->r_pages) { | 580 | if (!req->r_pages) { |
578 | req->r_pages = mempool_alloc(client->wb_pagevec_pool, GFP_NOFS); | 581 | req->r_pages = mempool_alloc(fsc->wb_pagevec_pool, GFP_NOFS); |
579 | req->r_pages_from_pool = 1; | 582 | req->r_pages_from_pool = 1; |
580 | WARN_ON(!req->r_pages); | 583 | WARN_ON(!req->r_pages); |
581 | } | 584 | } |
@@ -588,9 +591,8 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
588 | struct writeback_control *wbc) | 591 | struct writeback_control *wbc) |
589 | { | 592 | { |
590 | struct inode *inode = mapping->host; | 593 | struct inode *inode = mapping->host; |
591 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
592 | struct ceph_inode_info *ci = ceph_inode(inode); | 594 | struct ceph_inode_info *ci = ceph_inode(inode); |
593 | struct ceph_client *client; | 595 | struct ceph_fs_client *fsc; |
594 | pgoff_t index, start, end; | 596 | pgoff_t index, start, end; |
595 | int range_whole = 0; | 597 | int range_whole = 0; |
596 | int should_loop = 1; | 598 | int should_loop = 1; |
@@ -617,26 +619,19 @@ static int ceph_writepages_start(struct address_space *mapping, | |||
617 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : | 619 | wbc->sync_mode == WB_SYNC_NONE ? "NONE" : |
618 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); | 620 | (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD")); |
619 | 621 | ||
620 | client = ceph_inode_to_client(inode); | 622 | fsc = ceph_inode_to_client(inode); |
621 | if (client->mount_state == CEPH_MOUNT_SHUTDOWN) { | 623 | if (fsc->mount_state == CEPH_MOUNT_SHUTDOWN) { |
622 | pr_warning("writepage_start %p on forced umount\n", inode); | 624 | pr_warning("writepage_start %p on forced umount\n", inode); |
623 | return -EIO; /* we're in a forced umount, don't write! */ | 625 | return -EIO; /* we're in a forced umount, don't write! */ |
624 | } | 626 | } |
625 | if (client->mount_args->wsize && client->mount_args->wsize < wsize) | 627 | if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize) |
626 | wsize = client->mount_args->wsize; | 628 | wsize = fsc->mount_options->wsize; |
627 | if (wsize < PAGE_CACHE_SIZE) | 629 | if (wsize < PAGE_CACHE_SIZE) |
628 | wsize = PAGE_CACHE_SIZE; | 630 | wsize = PAGE_CACHE_SIZE; |
629 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; | 631 | max_pages_ever = wsize >> PAGE_CACHE_SHIFT; |
630 | 632 | ||
631 | pagevec_init(&pvec, 0); | 633 | pagevec_init(&pvec, 0); |
632 | 634 | ||
633 | /* ?? */ | ||
634 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | ||
635 | dout(" writepages congested\n"); | ||
636 | wbc->encountered_congestion = 1; | ||
637 | goto out_final; | ||
638 | } | ||
639 | |||
640 | /* where to start/end? */ | 635 | /* where to start/end? */ |
641 | if (wbc->range_cyclic) { | 636 | if (wbc->range_cyclic) { |
642 | start = mapping->writeback_index; /* Start from prev offset */ | 637 | start = mapping->writeback_index; /* Start from prev offset */ |
@@ -766,9 +761,10 @@ get_more_pages: | |||
766 | /* ok */ | 761 | /* ok */ |
767 | if (locked_pages == 0) { | 762 | if (locked_pages == 0) { |
768 | /* prepare async write request */ | 763 | /* prepare async write request */ |
769 | offset = page->index << PAGE_CACHE_SHIFT; | 764 | offset = (unsigned long long)page->index |
765 | << PAGE_CACHE_SHIFT; | ||
770 | len = wsize; | 766 | len = wsize; |
771 | req = ceph_osdc_new_request(&client->osdc, | 767 | req = ceph_osdc_new_request(&fsc->client->osdc, |
772 | &ci->i_layout, | 768 | &ci->i_layout, |
773 | ceph_vino(inode), | 769 | ceph_vino(inode), |
774 | offset, &len, | 770 | offset, &len, |
@@ -781,7 +777,7 @@ get_more_pages: | |||
781 | &inode->i_mtime, true, 1); | 777 | &inode->i_mtime, true, 1); |
782 | max_pages = req->r_num_pages; | 778 | max_pages = req->r_num_pages; |
783 | 779 | ||
784 | alloc_page_vec(client, req); | 780 | alloc_page_vec(fsc, req); |
785 | req->r_callback = writepages_finish; | 781 | req->r_callback = writepages_finish; |
786 | req->r_inode = inode; | 782 | req->r_inode = inode; |
787 | } | 783 | } |
@@ -793,10 +789,10 @@ get_more_pages: | |||
793 | inode, page, page->index); | 789 | inode, page, page->index); |
794 | 790 | ||
795 | writeback_stat = | 791 | writeback_stat = |
796 | atomic_long_inc_return(&client->writeback_count); | 792 | atomic_long_inc_return(&fsc->writeback_count); |
797 | if (writeback_stat > CONGESTION_ON_THRESH( | 793 | if (writeback_stat > CONGESTION_ON_THRESH( |
798 | client->mount_args->congestion_kb)) { | 794 | fsc->mount_options->congestion_kb)) { |
799 | set_bdi_congested(&client->backing_dev_info, | 795 | set_bdi_congested(&fsc->backing_dev_info, |
800 | BLK_RW_ASYNC); | 796 | BLK_RW_ASYNC); |
801 | } | 797 | } |
802 | 798 | ||
@@ -845,7 +841,7 @@ get_more_pages: | |||
845 | op->payload_len = cpu_to_le32(len); | 841 | op->payload_len = cpu_to_le32(len); |
846 | req->r_request->hdr.data_len = cpu_to_le32(len); | 842 | req->r_request->hdr.data_len = cpu_to_le32(len); |
847 | 843 | ||
848 | ceph_osdc_start_request(&client->osdc, req, true); | 844 | ceph_osdc_start_request(&fsc->client->osdc, req, true); |
849 | req = NULL; | 845 | req = NULL; |
850 | 846 | ||
851 | /* continue? */ | 847 | /* continue? */ |
@@ -881,7 +877,6 @@ out: | |||
881 | rc = 0; /* vfs expects us to return 0 */ | 877 | rc = 0; /* vfs expects us to return 0 */ |
882 | ceph_put_snap_context(snapc); | 878 | ceph_put_snap_context(snapc); |
883 | dout("writepages done, rc = %d\n", rc); | 879 | dout("writepages done, rc = %d\n", rc); |
884 | out_final: | ||
885 | return rc; | 880 | return rc; |
886 | } | 881 | } |
887 | 882 | ||
@@ -914,7 +909,7 @@ static int ceph_update_writeable_page(struct file *file, | |||
914 | { | 909 | { |
915 | struct inode *inode = file->f_dentry->d_inode; | 910 | struct inode *inode = file->f_dentry->d_inode; |
916 | struct ceph_inode_info *ci = ceph_inode(inode); | 911 | struct ceph_inode_info *ci = ceph_inode(inode); |
917 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 912 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
918 | loff_t page_off = pos & PAGE_CACHE_MASK; | 913 | loff_t page_off = pos & PAGE_CACHE_MASK; |
919 | int pos_in_page = pos & ~PAGE_CACHE_MASK; | 914 | int pos_in_page = pos & ~PAGE_CACHE_MASK; |
920 | int end_in_page = pos_in_page + len; | 915 | int end_in_page = pos_in_page + len; |
@@ -1052,8 +1047,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, | |||
1052 | struct page *page, void *fsdata) | 1047 | struct page *page, void *fsdata) |
1053 | { | 1048 | { |
1054 | struct inode *inode = file->f_dentry->d_inode; | 1049 | struct inode *inode = file->f_dentry->d_inode; |
1055 | struct ceph_client *client = ceph_inode_to_client(inode); | 1050 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
1056 | struct ceph_mds_client *mdsc = &client->mdsc; | 1051 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1057 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 1052 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
1058 | int check_cap = 0; | 1053 | int check_cap = 0; |
1059 | 1054 | ||
@@ -1122,7 +1117,7 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1122 | { | 1117 | { |
1123 | struct inode *inode = vma->vm_file->f_dentry->d_inode; | 1118 | struct inode *inode = vma->vm_file->f_dentry->d_inode; |
1124 | struct page *page = vmf->page; | 1119 | struct page *page = vmf->page; |
1125 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1120 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1126 | loff_t off = page->index << PAGE_CACHE_SHIFT; | 1121 | loff_t off = page->index << PAGE_CACHE_SHIFT; |
1127 | loff_t size, len; | 1122 | loff_t size, len; |
1128 | int ret; | 1123 | int ret; |
diff --git a/fs/ceph/armor.c b/fs/ceph/armor.c deleted file mode 100644 index eb2a666b0be7..000000000000 --- a/fs/ceph/armor.c +++ /dev/null | |||
@@ -1,103 +0,0 @@ | |||
1 | |||
2 | #include <linux/errno.h> | ||
3 | |||
4 | int ceph_armor(char *dst, const char *src, const char *end); | ||
5 | int ceph_unarmor(char *dst, const char *src, const char *end); | ||
6 | |||
7 | /* | ||
8 | * base64 encode/decode. | ||
9 | */ | ||
10 | |||
11 | static const char *pem_key = | ||
12 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | ||
13 | |||
14 | static int encode_bits(int c) | ||
15 | { | ||
16 | return pem_key[c]; | ||
17 | } | ||
18 | |||
19 | static int decode_bits(char c) | ||
20 | { | ||
21 | if (c >= 'A' && c <= 'Z') | ||
22 | return c - 'A'; | ||
23 | if (c >= 'a' && c <= 'z') | ||
24 | return c - 'a' + 26; | ||
25 | if (c >= '0' && c <= '9') | ||
26 | return c - '0' + 52; | ||
27 | if (c == '+') | ||
28 | return 62; | ||
29 | if (c == '/') | ||
30 | return 63; | ||
31 | if (c == '=') | ||
32 | return 0; /* just non-negative, please */ | ||
33 | return -EINVAL; | ||
34 | } | ||
35 | |||
36 | int ceph_armor(char *dst, const char *src, const char *end) | ||
37 | { | ||
38 | int olen = 0; | ||
39 | int line = 0; | ||
40 | |||
41 | while (src < end) { | ||
42 | unsigned char a, b, c; | ||
43 | |||
44 | a = *src++; | ||
45 | *dst++ = encode_bits(a >> 2); | ||
46 | if (src < end) { | ||
47 | b = *src++; | ||
48 | *dst++ = encode_bits(((a & 3) << 4) | (b >> 4)); | ||
49 | if (src < end) { | ||
50 | c = *src++; | ||
51 | *dst++ = encode_bits(((b & 15) << 2) | | ||
52 | (c >> 6)); | ||
53 | *dst++ = encode_bits(c & 63); | ||
54 | } else { | ||
55 | *dst++ = encode_bits((b & 15) << 2); | ||
56 | *dst++ = '='; | ||
57 | } | ||
58 | } else { | ||
59 | *dst++ = encode_bits(((a & 3) << 4)); | ||
60 | *dst++ = '='; | ||
61 | *dst++ = '='; | ||
62 | } | ||
63 | olen += 4; | ||
64 | line += 4; | ||
65 | if (line == 64) { | ||
66 | line = 0; | ||
67 | *(dst++) = '\n'; | ||
68 | olen++; | ||
69 | } | ||
70 | } | ||
71 | return olen; | ||
72 | } | ||
73 | |||
74 | int ceph_unarmor(char *dst, const char *src, const char *end) | ||
75 | { | ||
76 | int olen = 0; | ||
77 | |||
78 | while (src < end) { | ||
79 | int a, b, c, d; | ||
80 | |||
81 | if (src < end && src[0] == '\n') | ||
82 | src++; | ||
83 | if (src + 4 > end) | ||
84 | return -EINVAL; | ||
85 | a = decode_bits(src[0]); | ||
86 | b = decode_bits(src[1]); | ||
87 | c = decode_bits(src[2]); | ||
88 | d = decode_bits(src[3]); | ||
89 | if (a < 0 || b < 0 || c < 0 || d < 0) | ||
90 | return -EINVAL; | ||
91 | |||
92 | *dst++ = (a << 2) | (b >> 4); | ||
93 | if (src[2] == '=') | ||
94 | return olen + 1; | ||
95 | *dst++ = ((b & 15) << 4) | (c >> 2); | ||
96 | if (src[3] == '=') | ||
97 | return olen + 2; | ||
98 | *dst++ = ((c & 3) << 6) | d; | ||
99 | olen += 3; | ||
100 | src += 4; | ||
101 | } | ||
102 | return olen; | ||
103 | } | ||
diff --git a/fs/ceph/auth.c b/fs/ceph/auth.c deleted file mode 100644 index 6d2e30600627..000000000000 --- a/fs/ceph/auth.c +++ /dev/null | |||
@@ -1,259 +0,0 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/module.h> | ||
4 | #include <linux/err.h> | ||
5 | #include <linux/slab.h> | ||
6 | |||
7 | #include "types.h" | ||
8 | #include "auth_none.h" | ||
9 | #include "auth_x.h" | ||
10 | #include "decode.h" | ||
11 | #include "super.h" | ||
12 | |||
13 | #include "messenger.h" | ||
14 | |||
15 | /* | ||
16 | * get protocol handler | ||
17 | */ | ||
18 | static u32 supported_protocols[] = { | ||
19 | CEPH_AUTH_NONE, | ||
20 | CEPH_AUTH_CEPHX | ||
21 | }; | ||
22 | |||
23 | static int ceph_auth_init_protocol(struct ceph_auth_client *ac, int protocol) | ||
24 | { | ||
25 | switch (protocol) { | ||
26 | case CEPH_AUTH_NONE: | ||
27 | return ceph_auth_none_init(ac); | ||
28 | case CEPH_AUTH_CEPHX: | ||
29 | return ceph_x_init(ac); | ||
30 | default: | ||
31 | return -ENOENT; | ||
32 | } | ||
33 | } | ||
34 | |||
35 | /* | ||
36 | * setup, teardown. | ||
37 | */ | ||
38 | struct ceph_auth_client *ceph_auth_init(const char *name, const char *secret) | ||
39 | { | ||
40 | struct ceph_auth_client *ac; | ||
41 | int ret; | ||
42 | |||
43 | dout("auth_init name '%s' secret '%s'\n", name, secret); | ||
44 | |||
45 | ret = -ENOMEM; | ||
46 | ac = kzalloc(sizeof(*ac), GFP_NOFS); | ||
47 | if (!ac) | ||
48 | goto out; | ||
49 | |||
50 | ac->negotiating = true; | ||
51 | if (name) | ||
52 | ac->name = name; | ||
53 | else | ||
54 | ac->name = CEPH_AUTH_NAME_DEFAULT; | ||
55 | dout("auth_init name %s secret %s\n", ac->name, secret); | ||
56 | ac->secret = secret; | ||
57 | return ac; | ||
58 | |||
59 | out: | ||
60 | return ERR_PTR(ret); | ||
61 | } | ||
62 | |||
63 | void ceph_auth_destroy(struct ceph_auth_client *ac) | ||
64 | { | ||
65 | dout("auth_destroy %p\n", ac); | ||
66 | if (ac->ops) | ||
67 | ac->ops->destroy(ac); | ||
68 | kfree(ac); | ||
69 | } | ||
70 | |||
71 | /* | ||
72 | * Reset occurs when reconnecting to the monitor. | ||
73 | */ | ||
74 | void ceph_auth_reset(struct ceph_auth_client *ac) | ||
75 | { | ||
76 | dout("auth_reset %p\n", ac); | ||
77 | if (ac->ops && !ac->negotiating) | ||
78 | ac->ops->reset(ac); | ||
79 | ac->negotiating = true; | ||
80 | } | ||
81 | |||
82 | int ceph_entity_name_encode(const char *name, void **p, void *end) | ||
83 | { | ||
84 | int len = strlen(name); | ||
85 | |||
86 | if (*p + 2*sizeof(u32) + len > end) | ||
87 | return -ERANGE; | ||
88 | ceph_encode_32(p, CEPH_ENTITY_TYPE_CLIENT); | ||
89 | ceph_encode_32(p, len); | ||
90 | ceph_encode_copy(p, name, len); | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | /* | ||
95 | * Initiate protocol negotiation with monitor. Include entity name | ||
96 | * and list supported protocols. | ||
97 | */ | ||
98 | int ceph_auth_build_hello(struct ceph_auth_client *ac, void *buf, size_t len) | ||
99 | { | ||
100 | struct ceph_mon_request_header *monhdr = buf; | ||
101 | void *p = monhdr + 1, *end = buf + len, *lenp; | ||
102 | int i, num; | ||
103 | int ret; | ||
104 | |||
105 | dout("auth_build_hello\n"); | ||
106 | monhdr->have_version = 0; | ||
107 | monhdr->session_mon = cpu_to_le16(-1); | ||
108 | monhdr->session_mon_tid = 0; | ||
109 | |||
110 | ceph_encode_32(&p, 0); /* no protocol, yet */ | ||
111 | |||
112 | lenp = p; | ||
113 | p += sizeof(u32); | ||
114 | |||
115 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | ||
116 | ceph_encode_8(&p, 1); | ||
117 | num = ARRAY_SIZE(supported_protocols); | ||
118 | ceph_encode_32(&p, num); | ||
119 | ceph_decode_need(&p, end, num * sizeof(u32), bad); | ||
120 | for (i = 0; i < num; i++) | ||
121 | ceph_encode_32(&p, supported_protocols[i]); | ||
122 | |||
123 | ret = ceph_entity_name_encode(ac->name, &p, end); | ||
124 | if (ret < 0) | ||
125 | return ret; | ||
126 | ceph_decode_need(&p, end, sizeof(u64), bad); | ||
127 | ceph_encode_64(&p, ac->global_id); | ||
128 | |||
129 | ceph_encode_32(&lenp, p - lenp - sizeof(u32)); | ||
130 | return p - buf; | ||
131 | |||
132 | bad: | ||
133 | return -ERANGE; | ||
134 | } | ||
135 | |||
136 | static int ceph_build_auth_request(struct ceph_auth_client *ac, | ||
137 | void *msg_buf, size_t msg_len) | ||
138 | { | ||
139 | struct ceph_mon_request_header *monhdr = msg_buf; | ||
140 | void *p = monhdr + 1; | ||
141 | void *end = msg_buf + msg_len; | ||
142 | int ret; | ||
143 | |||
144 | monhdr->have_version = 0; | ||
145 | monhdr->session_mon = cpu_to_le16(-1); | ||
146 | monhdr->session_mon_tid = 0; | ||
147 | |||
148 | ceph_encode_32(&p, ac->protocol); | ||
149 | |||
150 | ret = ac->ops->build_request(ac, p + sizeof(u32), end); | ||
151 | if (ret < 0) { | ||
152 | pr_err("error %d building auth method %s request\n", ret, | ||
153 | ac->ops->name); | ||
154 | return ret; | ||
155 | } | ||
156 | dout(" built request %d bytes\n", ret); | ||
157 | ceph_encode_32(&p, ret); | ||
158 | return p + ret - msg_buf; | ||
159 | } | ||
160 | |||
161 | /* | ||
162 | * Handle auth message from monitor. | ||
163 | */ | ||
164 | int ceph_handle_auth_reply(struct ceph_auth_client *ac, | ||
165 | void *buf, size_t len, | ||
166 | void *reply_buf, size_t reply_len) | ||
167 | { | ||
168 | void *p = buf; | ||
169 | void *end = buf + len; | ||
170 | int protocol; | ||
171 | s32 result; | ||
172 | u64 global_id; | ||
173 | void *payload, *payload_end; | ||
174 | int payload_len; | ||
175 | char *result_msg; | ||
176 | int result_msg_len; | ||
177 | int ret = -EINVAL; | ||
178 | |||
179 | dout("handle_auth_reply %p %p\n", p, end); | ||
180 | ceph_decode_need(&p, end, sizeof(u32) * 3 + sizeof(u64), bad); | ||
181 | protocol = ceph_decode_32(&p); | ||
182 | result = ceph_decode_32(&p); | ||
183 | global_id = ceph_decode_64(&p); | ||
184 | payload_len = ceph_decode_32(&p); | ||
185 | payload = p; | ||
186 | p += payload_len; | ||
187 | ceph_decode_need(&p, end, sizeof(u32), bad); | ||
188 | result_msg_len = ceph_decode_32(&p); | ||
189 | result_msg = p; | ||
190 | p += result_msg_len; | ||
191 | if (p != end) | ||
192 | goto bad; | ||
193 | |||
194 | dout(" result %d '%.*s' gid %llu len %d\n", result, result_msg_len, | ||
195 | result_msg, global_id, payload_len); | ||
196 | |||
197 | payload_end = payload + payload_len; | ||
198 | |||
199 | if (global_id && ac->global_id != global_id) { | ||
200 | dout(" set global_id %lld -> %lld\n", ac->global_id, global_id); | ||
201 | ac->global_id = global_id; | ||
202 | } | ||
203 | |||
204 | if (ac->negotiating) { | ||
205 | /* server does not support our protocols? */ | ||
206 | if (!protocol && result < 0) { | ||
207 | ret = result; | ||
208 | goto out; | ||
209 | } | ||
210 | /* set up (new) protocol handler? */ | ||
211 | if (ac->protocol && ac->protocol != protocol) { | ||
212 | ac->ops->destroy(ac); | ||
213 | ac->protocol = 0; | ||
214 | ac->ops = NULL; | ||
215 | } | ||
216 | if (ac->protocol != protocol) { | ||
217 | ret = ceph_auth_init_protocol(ac, protocol); | ||
218 | if (ret) { | ||
219 | pr_err("error %d on auth protocol %d init\n", | ||
220 | ret, protocol); | ||
221 | goto out; | ||
222 | } | ||
223 | } | ||
224 | |||
225 | ac->negotiating = false; | ||
226 | } | ||
227 | |||
228 | ret = ac->ops->handle_reply(ac, result, payload, payload_end); | ||
229 | if (ret == -EAGAIN) { | ||
230 | return ceph_build_auth_request(ac, reply_buf, reply_len); | ||
231 | } else if (ret) { | ||
232 | pr_err("auth method '%s' error %d\n", ac->ops->name, ret); | ||
233 | return ret; | ||
234 | } | ||
235 | return 0; | ||
236 | |||
237 | bad: | ||
238 | pr_err("failed to decode auth msg\n"); | ||
239 | out: | ||
240 | return ret; | ||
241 | } | ||
242 | |||
243 | int ceph_build_auth(struct ceph_auth_client *ac, | ||
244 | void *msg_buf, size_t msg_len) | ||
245 | { | ||
246 | if (!ac->protocol) | ||
247 | return ceph_auth_build_hello(ac, msg_buf, msg_len); | ||
248 | BUG_ON(!ac->ops); | ||
249 | if (ac->ops->should_authenticate(ac)) | ||
250 | return ceph_build_auth_request(ac, msg_buf, msg_len); | ||
251 | return 0; | ||
252 | } | ||
253 | |||
254 | int ceph_auth_is_authenticated(struct ceph_auth_client *ac) | ||
255 | { | ||
256 | if (!ac->ops) | ||
257 | return 0; | ||
258 | return ac->ops->is_authenticated(ac); | ||
259 | } | ||
diff --git a/fs/ceph/auth.h b/fs/ceph/auth.h deleted file mode 100644 index d38a2fb4a137..000000000000 --- a/fs/ceph/auth.h +++ /dev/null | |||
@@ -1,92 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_AUTH_H | ||
2 | #define _FS_CEPH_AUTH_H | ||
3 | |||
4 | #include "types.h" | ||
5 | #include "buffer.h" | ||
6 | |||
7 | /* | ||
8 | * Abstract interface for communicating with the authenticate module. | ||
9 | * There is some handshake that takes place between us and the monitor | ||
10 | * to acquire the necessary keys. These are used to generate an | ||
11 | * 'authorizer' that we use when connecting to a service (mds, osd). | ||
12 | */ | ||
13 | |||
14 | struct ceph_auth_client; | ||
15 | struct ceph_authorizer; | ||
16 | |||
17 | struct ceph_auth_client_ops { | ||
18 | const char *name; | ||
19 | |||
20 | /* | ||
21 | * true if we are authenticated and can connect to | ||
22 | * services. | ||
23 | */ | ||
24 | int (*is_authenticated)(struct ceph_auth_client *ac); | ||
25 | |||
26 | /* | ||
27 | * true if we should (re)authenticate, e.g., when our tickets | ||
28 | * are getting old and crusty. | ||
29 | */ | ||
30 | int (*should_authenticate)(struct ceph_auth_client *ac); | ||
31 | |||
32 | /* | ||
33 | * build requests and process replies during monitor | ||
34 | * handshake. if handle_reply returns -EAGAIN, we build | ||
35 | * another request. | ||
36 | */ | ||
37 | int (*build_request)(struct ceph_auth_client *ac, void *buf, void *end); | ||
38 | int (*handle_reply)(struct ceph_auth_client *ac, int result, | ||
39 | void *buf, void *end); | ||
40 | |||
41 | /* | ||
42 | * Create authorizer for connecting to a service, and verify | ||
43 | * the response to authenticate the service. | ||
44 | */ | ||
45 | int (*create_authorizer)(struct ceph_auth_client *ac, int peer_type, | ||
46 | struct ceph_authorizer **a, | ||
47 | void **buf, size_t *len, | ||
48 | void **reply_buf, size_t *reply_len); | ||
49 | int (*verify_authorizer_reply)(struct ceph_auth_client *ac, | ||
50 | struct ceph_authorizer *a, size_t len); | ||
51 | void (*destroy_authorizer)(struct ceph_auth_client *ac, | ||
52 | struct ceph_authorizer *a); | ||
53 | void (*invalidate_authorizer)(struct ceph_auth_client *ac, | ||
54 | int peer_type); | ||
55 | |||
56 | /* reset when we (re)connect to a monitor */ | ||
57 | void (*reset)(struct ceph_auth_client *ac); | ||
58 | |||
59 | void (*destroy)(struct ceph_auth_client *ac); | ||
60 | }; | ||
61 | |||
62 | struct ceph_auth_client { | ||
63 | u32 protocol; /* CEPH_AUTH_* */ | ||
64 | void *private; /* for use by protocol implementation */ | ||
65 | const struct ceph_auth_client_ops *ops; /* null iff protocol==0 */ | ||
66 | |||
67 | bool negotiating; /* true if negotiating protocol */ | ||
68 | const char *name; /* entity name */ | ||
69 | u64 global_id; /* our unique id in system */ | ||
70 | const char *secret; /* our secret key */ | ||
71 | unsigned want_keys; /* which services we want */ | ||
72 | }; | ||
73 | |||
74 | extern struct ceph_auth_client *ceph_auth_init(const char *name, | ||
75 | const char *secret); | ||
76 | extern void ceph_auth_destroy(struct ceph_auth_client *ac); | ||
77 | |||
78 | extern void ceph_auth_reset(struct ceph_auth_client *ac); | ||
79 | |||
80 | extern int ceph_auth_build_hello(struct ceph_auth_client *ac, | ||
81 | void *buf, size_t len); | ||
82 | extern int ceph_handle_auth_reply(struct ceph_auth_client *ac, | ||
83 | void *buf, size_t len, | ||
84 | void *reply_buf, size_t reply_len); | ||
85 | extern int ceph_entity_name_encode(const char *name, void **p, void *end); | ||
86 | |||
87 | extern int ceph_build_auth(struct ceph_auth_client *ac, | ||
88 | void *msg_buf, size_t msg_len); | ||
89 | |||
90 | extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); | ||
91 | |||
92 | #endif | ||
diff --git a/fs/ceph/auth_none.c b/fs/ceph/auth_none.c deleted file mode 100644 index ad1dc21286c7..000000000000 --- a/fs/ceph/auth_none.c +++ /dev/null | |||
@@ -1,131 +0,0 @@ | |||
1 | |||
2 | #include "ceph_debug.h" | ||
3 | |||
4 | #include <linux/err.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/random.h> | ||
7 | #include <linux/slab.h> | ||
8 | |||
9 | #include "auth_none.h" | ||
10 | #include "auth.h" | ||
11 | #include "decode.h" | ||
12 | |||
13 | static void reset(struct ceph_auth_client *ac) | ||
14 | { | ||
15 | struct ceph_auth_none_info *xi = ac->private; | ||
16 | |||
17 | xi->starting = true; | ||
18 | xi->built_authorizer = false; | ||
19 | } | ||
20 | |||
21 | static void destroy(struct ceph_auth_client *ac) | ||
22 | { | ||
23 | kfree(ac->private); | ||
24 | ac->private = NULL; | ||
25 | } | ||
26 | |||
27 | static int is_authenticated(struct ceph_auth_client *ac) | ||
28 | { | ||
29 | struct ceph_auth_none_info *xi = ac->private; | ||
30 | |||
31 | return !xi->starting; | ||
32 | } | ||
33 | |||
34 | static int should_authenticate(struct ceph_auth_client *ac) | ||
35 | { | ||
36 | struct ceph_auth_none_info *xi = ac->private; | ||
37 | |||
38 | return xi->starting; | ||
39 | } | ||
40 | |||
41 | /* | ||
42 | * the generic auth code decode the global_id, and we carry no actual | ||
43 | * authenticate state, so nothing happens here. | ||
44 | */ | ||
45 | static int handle_reply(struct ceph_auth_client *ac, int result, | ||
46 | void *buf, void *end) | ||
47 | { | ||
48 | struct ceph_auth_none_info *xi = ac->private; | ||
49 | |||
50 | xi->starting = false; | ||
51 | return result; | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * build an 'authorizer' with our entity_name and global_id. we can | ||
56 | * reuse a single static copy since it is identical for all services | ||
57 | * we connect to. | ||
58 | */ | ||
59 | static int ceph_auth_none_create_authorizer( | ||
60 | struct ceph_auth_client *ac, int peer_type, | ||
61 | struct ceph_authorizer **a, | ||
62 | void **buf, size_t *len, | ||
63 | void **reply_buf, size_t *reply_len) | ||
64 | { | ||
65 | struct ceph_auth_none_info *ai = ac->private; | ||
66 | struct ceph_none_authorizer *au = &ai->au; | ||
67 | void *p, *end; | ||
68 | int ret; | ||
69 | |||
70 | if (!ai->built_authorizer) { | ||
71 | p = au->buf; | ||
72 | end = p + sizeof(au->buf); | ||
73 | ceph_encode_8(&p, 1); | ||
74 | ret = ceph_entity_name_encode(ac->name, &p, end - 8); | ||
75 | if (ret < 0) | ||
76 | goto bad; | ||
77 | ceph_decode_need(&p, end, sizeof(u64), bad2); | ||
78 | ceph_encode_64(&p, ac->global_id); | ||
79 | au->buf_len = p - (void *)au->buf; | ||
80 | ai->built_authorizer = true; | ||
81 | dout("built authorizer len %d\n", au->buf_len); | ||
82 | } | ||
83 | |||
84 | *a = (struct ceph_authorizer *)au; | ||
85 | *buf = au->buf; | ||
86 | *len = au->buf_len; | ||
87 | *reply_buf = au->reply_buf; | ||
88 | *reply_len = sizeof(au->reply_buf); | ||
89 | return 0; | ||
90 | |||
91 | bad2: | ||
92 | ret = -ERANGE; | ||
93 | bad: | ||
94 | return ret; | ||
95 | } | ||
96 | |||
97 | static void ceph_auth_none_destroy_authorizer(struct ceph_auth_client *ac, | ||
98 | struct ceph_authorizer *a) | ||
99 | { | ||
100 | /* nothing to do */ | ||
101 | } | ||
102 | |||
103 | static const struct ceph_auth_client_ops ceph_auth_none_ops = { | ||
104 | .name = "none", | ||
105 | .reset = reset, | ||
106 | .destroy = destroy, | ||
107 | .is_authenticated = is_authenticated, | ||
108 | .should_authenticate = should_authenticate, | ||
109 | .handle_reply = handle_reply, | ||
110 | .create_authorizer = ceph_auth_none_create_authorizer, | ||
111 | .destroy_authorizer = ceph_auth_none_destroy_authorizer, | ||
112 | }; | ||
113 | |||
114 | int ceph_auth_none_init(struct ceph_auth_client *ac) | ||
115 | { | ||
116 | struct ceph_auth_none_info *xi; | ||
117 | |||
118 | dout("ceph_auth_none_init %p\n", ac); | ||
119 | xi = kzalloc(sizeof(*xi), GFP_NOFS); | ||
120 | if (!xi) | ||
121 | return -ENOMEM; | ||
122 | |||
123 | xi->starting = true; | ||
124 | xi->built_authorizer = false; | ||
125 | |||
126 | ac->protocol = CEPH_AUTH_NONE; | ||
127 | ac->private = xi; | ||
128 | ac->ops = &ceph_auth_none_ops; | ||
129 | return 0; | ||
130 | } | ||
131 | |||
diff --git a/fs/ceph/auth_none.h b/fs/ceph/auth_none.h deleted file mode 100644 index 8164df1a08be..000000000000 --- a/fs/ceph/auth_none.h +++ /dev/null | |||
@@ -1,30 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_AUTH_NONE_H | ||
2 | #define _FS_CEPH_AUTH_NONE_H | ||
3 | |||
4 | #include <linux/slab.h> | ||
5 | |||
6 | #include "auth.h" | ||
7 | |||
8 | /* | ||
9 | * null security mode. | ||
10 | * | ||
11 | * we use a single static authorizer that simply encodes our entity name | ||
12 | * and global id. | ||
13 | */ | ||
14 | |||
15 | struct ceph_none_authorizer { | ||
16 | char buf[128]; | ||
17 | int buf_len; | ||
18 | char reply_buf[0]; | ||
19 | }; | ||
20 | |||
21 | struct ceph_auth_none_info { | ||
22 | bool starting; | ||
23 | bool built_authorizer; | ||
24 | struct ceph_none_authorizer au; /* we only need one; it's static */ | ||
25 | }; | ||
26 | |||
27 | extern int ceph_auth_none_init(struct ceph_auth_client *ac); | ||
28 | |||
29 | #endif | ||
30 | |||
diff --git a/fs/ceph/auth_x.c b/fs/ceph/auth_x.c deleted file mode 100644 index a2d002cbdec2..000000000000 --- a/fs/ceph/auth_x.c +++ /dev/null | |||
@@ -1,687 +0,0 @@ | |||
1 | |||
2 | #include "ceph_debug.h" | ||
3 | |||
4 | #include <linux/err.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/random.h> | ||
7 | #include <linux/slab.h> | ||
8 | |||
9 | #include "auth_x.h" | ||
10 | #include "auth_x_protocol.h" | ||
11 | #include "crypto.h" | ||
12 | #include "auth.h" | ||
13 | #include "decode.h" | ||
14 | |||
15 | #define TEMP_TICKET_BUF_LEN 256 | ||
16 | |||
17 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed); | ||
18 | |||
19 | static int ceph_x_is_authenticated(struct ceph_auth_client *ac) | ||
20 | { | ||
21 | struct ceph_x_info *xi = ac->private; | ||
22 | int need; | ||
23 | |||
24 | ceph_x_validate_tickets(ac, &need); | ||
25 | dout("ceph_x_is_authenticated want=%d need=%d have=%d\n", | ||
26 | ac->want_keys, need, xi->have_keys); | ||
27 | return (ac->want_keys & xi->have_keys) == ac->want_keys; | ||
28 | } | ||
29 | |||
30 | static int ceph_x_should_authenticate(struct ceph_auth_client *ac) | ||
31 | { | ||
32 | struct ceph_x_info *xi = ac->private; | ||
33 | int need; | ||
34 | |||
35 | ceph_x_validate_tickets(ac, &need); | ||
36 | dout("ceph_x_should_authenticate want=%d need=%d have=%d\n", | ||
37 | ac->want_keys, need, xi->have_keys); | ||
38 | return need != 0; | ||
39 | } | ||
40 | |||
41 | static int ceph_x_encrypt_buflen(int ilen) | ||
42 | { | ||
43 | return sizeof(struct ceph_x_encrypt_header) + ilen + 16 + | ||
44 | sizeof(u32); | ||
45 | } | ||
46 | |||
47 | static int ceph_x_encrypt(struct ceph_crypto_key *secret, | ||
48 | void *ibuf, int ilen, void *obuf, size_t olen) | ||
49 | { | ||
50 | struct ceph_x_encrypt_header head = { | ||
51 | .struct_v = 1, | ||
52 | .magic = cpu_to_le64(CEPHX_ENC_MAGIC) | ||
53 | }; | ||
54 | size_t len = olen - sizeof(u32); | ||
55 | int ret; | ||
56 | |||
57 | ret = ceph_encrypt2(secret, obuf + sizeof(u32), &len, | ||
58 | &head, sizeof(head), ibuf, ilen); | ||
59 | if (ret) | ||
60 | return ret; | ||
61 | ceph_encode_32(&obuf, len); | ||
62 | return len + sizeof(u32); | ||
63 | } | ||
64 | |||
65 | static int ceph_x_decrypt(struct ceph_crypto_key *secret, | ||
66 | void **p, void *end, void *obuf, size_t olen) | ||
67 | { | ||
68 | struct ceph_x_encrypt_header head; | ||
69 | size_t head_len = sizeof(head); | ||
70 | int len, ret; | ||
71 | |||
72 | len = ceph_decode_32(p); | ||
73 | if (*p + len > end) | ||
74 | return -EINVAL; | ||
75 | |||
76 | dout("ceph_x_decrypt len %d\n", len); | ||
77 | ret = ceph_decrypt2(secret, &head, &head_len, obuf, &olen, | ||
78 | *p, len); | ||
79 | if (ret) | ||
80 | return ret; | ||
81 | if (head.struct_v != 1 || le64_to_cpu(head.magic) != CEPHX_ENC_MAGIC) | ||
82 | return -EPERM; | ||
83 | *p += len; | ||
84 | return olen; | ||
85 | } | ||
86 | |||
87 | /* | ||
88 | * get existing (or insert new) ticket handler | ||
89 | */ | ||
90 | static struct ceph_x_ticket_handler * | ||
91 | get_ticket_handler(struct ceph_auth_client *ac, int service) | ||
92 | { | ||
93 | struct ceph_x_ticket_handler *th; | ||
94 | struct ceph_x_info *xi = ac->private; | ||
95 | struct rb_node *parent = NULL, **p = &xi->ticket_handlers.rb_node; | ||
96 | |||
97 | while (*p) { | ||
98 | parent = *p; | ||
99 | th = rb_entry(parent, struct ceph_x_ticket_handler, node); | ||
100 | if (service < th->service) | ||
101 | p = &(*p)->rb_left; | ||
102 | else if (service > th->service) | ||
103 | p = &(*p)->rb_right; | ||
104 | else | ||
105 | return th; | ||
106 | } | ||
107 | |||
108 | /* add it */ | ||
109 | th = kzalloc(sizeof(*th), GFP_NOFS); | ||
110 | if (!th) | ||
111 | return ERR_PTR(-ENOMEM); | ||
112 | th->service = service; | ||
113 | rb_link_node(&th->node, parent, p); | ||
114 | rb_insert_color(&th->node, &xi->ticket_handlers); | ||
115 | return th; | ||
116 | } | ||
117 | |||
118 | static void remove_ticket_handler(struct ceph_auth_client *ac, | ||
119 | struct ceph_x_ticket_handler *th) | ||
120 | { | ||
121 | struct ceph_x_info *xi = ac->private; | ||
122 | |||
123 | dout("remove_ticket_handler %p %d\n", th, th->service); | ||
124 | rb_erase(&th->node, &xi->ticket_handlers); | ||
125 | ceph_crypto_key_destroy(&th->session_key); | ||
126 | if (th->ticket_blob) | ||
127 | ceph_buffer_put(th->ticket_blob); | ||
128 | kfree(th); | ||
129 | } | ||
130 | |||
131 | static int ceph_x_proc_ticket_reply(struct ceph_auth_client *ac, | ||
132 | struct ceph_crypto_key *secret, | ||
133 | void *buf, void *end) | ||
134 | { | ||
135 | struct ceph_x_info *xi = ac->private; | ||
136 | int num; | ||
137 | void *p = buf; | ||
138 | int ret; | ||
139 | char *dbuf; | ||
140 | char *ticket_buf; | ||
141 | u8 reply_struct_v; | ||
142 | |||
143 | dbuf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | ||
144 | if (!dbuf) | ||
145 | return -ENOMEM; | ||
146 | |||
147 | ret = -ENOMEM; | ||
148 | ticket_buf = kmalloc(TEMP_TICKET_BUF_LEN, GFP_NOFS); | ||
149 | if (!ticket_buf) | ||
150 | goto out_dbuf; | ||
151 | |||
152 | ceph_decode_need(&p, end, 1 + sizeof(u32), bad); | ||
153 | reply_struct_v = ceph_decode_8(&p); | ||
154 | if (reply_struct_v != 1) | ||
155 | goto bad; | ||
156 | num = ceph_decode_32(&p); | ||
157 | dout("%d tickets\n", num); | ||
158 | while (num--) { | ||
159 | int type; | ||
160 | u8 tkt_struct_v, blob_struct_v; | ||
161 | struct ceph_x_ticket_handler *th; | ||
162 | void *dp, *dend; | ||
163 | int dlen; | ||
164 | char is_enc; | ||
165 | struct timespec validity; | ||
166 | struct ceph_crypto_key old_key; | ||
167 | void *tp, *tpend; | ||
168 | struct ceph_timespec new_validity; | ||
169 | struct ceph_crypto_key new_session_key; | ||
170 | struct ceph_buffer *new_ticket_blob; | ||
171 | unsigned long new_expires, new_renew_after; | ||
172 | u64 new_secret_id; | ||
173 | |||
174 | ceph_decode_need(&p, end, sizeof(u32) + 1, bad); | ||
175 | |||
176 | type = ceph_decode_32(&p); | ||
177 | dout(" ticket type %d %s\n", type, ceph_entity_type_name(type)); | ||
178 | |||
179 | tkt_struct_v = ceph_decode_8(&p); | ||
180 | if (tkt_struct_v != 1) | ||
181 | goto bad; | ||
182 | |||
183 | th = get_ticket_handler(ac, type); | ||
184 | if (IS_ERR(th)) { | ||
185 | ret = PTR_ERR(th); | ||
186 | goto out; | ||
187 | } | ||
188 | |||
189 | /* blob for me */ | ||
190 | dlen = ceph_x_decrypt(secret, &p, end, dbuf, | ||
191 | TEMP_TICKET_BUF_LEN); | ||
192 | if (dlen <= 0) { | ||
193 | ret = dlen; | ||
194 | goto out; | ||
195 | } | ||
196 | dout(" decrypted %d bytes\n", dlen); | ||
197 | dend = dbuf + dlen; | ||
198 | dp = dbuf; | ||
199 | |||
200 | tkt_struct_v = ceph_decode_8(&dp); | ||
201 | if (tkt_struct_v != 1) | ||
202 | goto bad; | ||
203 | |||
204 | memcpy(&old_key, &th->session_key, sizeof(old_key)); | ||
205 | ret = ceph_crypto_key_decode(&new_session_key, &dp, dend); | ||
206 | if (ret) | ||
207 | goto out; | ||
208 | |||
209 | ceph_decode_copy(&dp, &new_validity, sizeof(new_validity)); | ||
210 | ceph_decode_timespec(&validity, &new_validity); | ||
211 | new_expires = get_seconds() + validity.tv_sec; | ||
212 | new_renew_after = new_expires - (validity.tv_sec / 4); | ||
213 | dout(" expires=%lu renew_after=%lu\n", new_expires, | ||
214 | new_renew_after); | ||
215 | |||
216 | /* ticket blob for service */ | ||
217 | ceph_decode_8_safe(&p, end, is_enc, bad); | ||
218 | tp = ticket_buf; | ||
219 | if (is_enc) { | ||
220 | /* encrypted */ | ||
221 | dout(" encrypted ticket\n"); | ||
222 | dlen = ceph_x_decrypt(&old_key, &p, end, ticket_buf, | ||
223 | TEMP_TICKET_BUF_LEN); | ||
224 | if (dlen < 0) { | ||
225 | ret = dlen; | ||
226 | goto out; | ||
227 | } | ||
228 | dlen = ceph_decode_32(&tp); | ||
229 | } else { | ||
230 | /* unencrypted */ | ||
231 | ceph_decode_32_safe(&p, end, dlen, bad); | ||
232 | ceph_decode_need(&p, end, dlen, bad); | ||
233 | ceph_decode_copy(&p, ticket_buf, dlen); | ||
234 | } | ||
235 | tpend = tp + dlen; | ||
236 | dout(" ticket blob is %d bytes\n", dlen); | ||
237 | ceph_decode_need(&tp, tpend, 1 + sizeof(u64), bad); | ||
238 | blob_struct_v = ceph_decode_8(&tp); | ||
239 | new_secret_id = ceph_decode_64(&tp); | ||
240 | ret = ceph_decode_buffer(&new_ticket_blob, &tp, tpend); | ||
241 | if (ret) | ||
242 | goto out; | ||
243 | |||
244 | /* all is well, update our ticket */ | ||
245 | ceph_crypto_key_destroy(&th->session_key); | ||
246 | if (th->ticket_blob) | ||
247 | ceph_buffer_put(th->ticket_blob); | ||
248 | th->session_key = new_session_key; | ||
249 | th->ticket_blob = new_ticket_blob; | ||
250 | th->validity = new_validity; | ||
251 | th->secret_id = new_secret_id; | ||
252 | th->expires = new_expires; | ||
253 | th->renew_after = new_renew_after; | ||
254 | dout(" got ticket service %d (%s) secret_id %lld len %d\n", | ||
255 | type, ceph_entity_type_name(type), th->secret_id, | ||
256 | (int)th->ticket_blob->vec.iov_len); | ||
257 | xi->have_keys |= th->service; | ||
258 | } | ||
259 | |||
260 | ret = 0; | ||
261 | out: | ||
262 | kfree(ticket_buf); | ||
263 | out_dbuf: | ||
264 | kfree(dbuf); | ||
265 | return ret; | ||
266 | |||
267 | bad: | ||
268 | ret = -EINVAL; | ||
269 | goto out; | ||
270 | } | ||
271 | |||
272 | static int ceph_x_build_authorizer(struct ceph_auth_client *ac, | ||
273 | struct ceph_x_ticket_handler *th, | ||
274 | struct ceph_x_authorizer *au) | ||
275 | { | ||
276 | int maxlen; | ||
277 | struct ceph_x_authorize_a *msg_a; | ||
278 | struct ceph_x_authorize_b msg_b; | ||
279 | void *p, *end; | ||
280 | int ret; | ||
281 | int ticket_blob_len = | ||
282 | (th->ticket_blob ? th->ticket_blob->vec.iov_len : 0); | ||
283 | |||
284 | dout("build_authorizer for %s %p\n", | ||
285 | ceph_entity_type_name(th->service), au); | ||
286 | |||
287 | maxlen = sizeof(*msg_a) + sizeof(msg_b) + | ||
288 | ceph_x_encrypt_buflen(ticket_blob_len); | ||
289 | dout(" need len %d\n", maxlen); | ||
290 | if (au->buf && au->buf->alloc_len < maxlen) { | ||
291 | ceph_buffer_put(au->buf); | ||
292 | au->buf = NULL; | ||
293 | } | ||
294 | if (!au->buf) { | ||
295 | au->buf = ceph_buffer_new(maxlen, GFP_NOFS); | ||
296 | if (!au->buf) | ||
297 | return -ENOMEM; | ||
298 | } | ||
299 | au->service = th->service; | ||
300 | |||
301 | msg_a = au->buf->vec.iov_base; | ||
302 | msg_a->struct_v = 1; | ||
303 | msg_a->global_id = cpu_to_le64(ac->global_id); | ||
304 | msg_a->service_id = cpu_to_le32(th->service); | ||
305 | msg_a->ticket_blob.struct_v = 1; | ||
306 | msg_a->ticket_blob.secret_id = cpu_to_le64(th->secret_id); | ||
307 | msg_a->ticket_blob.blob_len = cpu_to_le32(ticket_blob_len); | ||
308 | if (ticket_blob_len) { | ||
309 | memcpy(msg_a->ticket_blob.blob, th->ticket_blob->vec.iov_base, | ||
310 | th->ticket_blob->vec.iov_len); | ||
311 | } | ||
312 | dout(" th %p secret_id %lld %lld\n", th, th->secret_id, | ||
313 | le64_to_cpu(msg_a->ticket_blob.secret_id)); | ||
314 | |||
315 | p = msg_a + 1; | ||
316 | p += ticket_blob_len; | ||
317 | end = au->buf->vec.iov_base + au->buf->vec.iov_len; | ||
318 | |||
319 | get_random_bytes(&au->nonce, sizeof(au->nonce)); | ||
320 | msg_b.struct_v = 1; | ||
321 | msg_b.nonce = cpu_to_le64(au->nonce); | ||
322 | ret = ceph_x_encrypt(&th->session_key, &msg_b, sizeof(msg_b), | ||
323 | p, end - p); | ||
324 | if (ret < 0) | ||
325 | goto out_buf; | ||
326 | p += ret; | ||
327 | au->buf->vec.iov_len = p - au->buf->vec.iov_base; | ||
328 | dout(" built authorizer nonce %llx len %d\n", au->nonce, | ||
329 | (int)au->buf->vec.iov_len); | ||
330 | BUG_ON(au->buf->vec.iov_len > maxlen); | ||
331 | return 0; | ||
332 | |||
333 | out_buf: | ||
334 | ceph_buffer_put(au->buf); | ||
335 | au->buf = NULL; | ||
336 | return ret; | ||
337 | } | ||
338 | |||
339 | static int ceph_x_encode_ticket(struct ceph_x_ticket_handler *th, | ||
340 | void **p, void *end) | ||
341 | { | ||
342 | ceph_decode_need(p, end, 1 + sizeof(u64), bad); | ||
343 | ceph_encode_8(p, 1); | ||
344 | ceph_encode_64(p, th->secret_id); | ||
345 | if (th->ticket_blob) { | ||
346 | const char *buf = th->ticket_blob->vec.iov_base; | ||
347 | u32 len = th->ticket_blob->vec.iov_len; | ||
348 | |||
349 | ceph_encode_32_safe(p, end, len, bad); | ||
350 | ceph_encode_copy_safe(p, end, buf, len, bad); | ||
351 | } else { | ||
352 | ceph_encode_32_safe(p, end, 0, bad); | ||
353 | } | ||
354 | |||
355 | return 0; | ||
356 | bad: | ||
357 | return -ERANGE; | ||
358 | } | ||
359 | |||
360 | static void ceph_x_validate_tickets(struct ceph_auth_client *ac, int *pneed) | ||
361 | { | ||
362 | int want = ac->want_keys; | ||
363 | struct ceph_x_info *xi = ac->private; | ||
364 | int service; | ||
365 | |||
366 | *pneed = ac->want_keys & ~(xi->have_keys); | ||
367 | |||
368 | for (service = 1; service <= want; service <<= 1) { | ||
369 | struct ceph_x_ticket_handler *th; | ||
370 | |||
371 | if (!(ac->want_keys & service)) | ||
372 | continue; | ||
373 | |||
374 | if (*pneed & service) | ||
375 | continue; | ||
376 | |||
377 | th = get_ticket_handler(ac, service); | ||
378 | |||
379 | if (IS_ERR(th)) { | ||
380 | *pneed |= service; | ||
381 | continue; | ||
382 | } | ||
383 | |||
384 | if (get_seconds() >= th->renew_after) | ||
385 | *pneed |= service; | ||
386 | if (get_seconds() >= th->expires) | ||
387 | xi->have_keys &= ~service; | ||
388 | } | ||
389 | } | ||
390 | |||
391 | |||
392 | static int ceph_x_build_request(struct ceph_auth_client *ac, | ||
393 | void *buf, void *end) | ||
394 | { | ||
395 | struct ceph_x_info *xi = ac->private; | ||
396 | int need; | ||
397 | struct ceph_x_request_header *head = buf; | ||
398 | int ret; | ||
399 | struct ceph_x_ticket_handler *th = | ||
400 | get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | ||
401 | |||
402 | if (IS_ERR(th)) | ||
403 | return PTR_ERR(th); | ||
404 | |||
405 | ceph_x_validate_tickets(ac, &need); | ||
406 | |||
407 | dout("build_request want %x have %x need %x\n", | ||
408 | ac->want_keys, xi->have_keys, need); | ||
409 | |||
410 | if (need & CEPH_ENTITY_TYPE_AUTH) { | ||
411 | struct ceph_x_authenticate *auth = (void *)(head + 1); | ||
412 | void *p = auth + 1; | ||
413 | struct ceph_x_challenge_blob tmp; | ||
414 | char tmp_enc[40]; | ||
415 | u64 *u; | ||
416 | |||
417 | if (p > end) | ||
418 | return -ERANGE; | ||
419 | |||
420 | dout(" get_auth_session_key\n"); | ||
421 | head->op = cpu_to_le16(CEPHX_GET_AUTH_SESSION_KEY); | ||
422 | |||
423 | /* encrypt and hash */ | ||
424 | get_random_bytes(&auth->client_challenge, sizeof(u64)); | ||
425 | tmp.client_challenge = auth->client_challenge; | ||
426 | tmp.server_challenge = cpu_to_le64(xi->server_challenge); | ||
427 | ret = ceph_x_encrypt(&xi->secret, &tmp, sizeof(tmp), | ||
428 | tmp_enc, sizeof(tmp_enc)); | ||
429 | if (ret < 0) | ||
430 | return ret; | ||
431 | |||
432 | auth->struct_v = 1; | ||
433 | auth->key = 0; | ||
434 | for (u = (u64 *)tmp_enc; u + 1 <= (u64 *)(tmp_enc + ret); u++) | ||
435 | auth->key ^= *(__le64 *)u; | ||
436 | dout(" server_challenge %llx client_challenge %llx key %llx\n", | ||
437 | xi->server_challenge, le64_to_cpu(auth->client_challenge), | ||
438 | le64_to_cpu(auth->key)); | ||
439 | |||
440 | /* now encode the old ticket if exists */ | ||
441 | ret = ceph_x_encode_ticket(th, &p, end); | ||
442 | if (ret < 0) | ||
443 | return ret; | ||
444 | |||
445 | return p - buf; | ||
446 | } | ||
447 | |||
448 | if (need) { | ||
449 | void *p = head + 1; | ||
450 | struct ceph_x_service_ticket_request *req; | ||
451 | |||
452 | if (p > end) | ||
453 | return -ERANGE; | ||
454 | head->op = cpu_to_le16(CEPHX_GET_PRINCIPAL_SESSION_KEY); | ||
455 | |||
456 | ret = ceph_x_build_authorizer(ac, th, &xi->auth_authorizer); | ||
457 | if (ret) | ||
458 | return ret; | ||
459 | ceph_encode_copy(&p, xi->auth_authorizer.buf->vec.iov_base, | ||
460 | xi->auth_authorizer.buf->vec.iov_len); | ||
461 | |||
462 | req = p; | ||
463 | req->keys = cpu_to_le32(need); | ||
464 | p += sizeof(*req); | ||
465 | return p - buf; | ||
466 | } | ||
467 | |||
468 | return 0; | ||
469 | } | ||
470 | |||
471 | static int ceph_x_handle_reply(struct ceph_auth_client *ac, int result, | ||
472 | void *buf, void *end) | ||
473 | { | ||
474 | struct ceph_x_info *xi = ac->private; | ||
475 | struct ceph_x_reply_header *head = buf; | ||
476 | struct ceph_x_ticket_handler *th; | ||
477 | int len = end - buf; | ||
478 | int op; | ||
479 | int ret; | ||
480 | |||
481 | if (result) | ||
482 | return result; /* XXX hmm? */ | ||
483 | |||
484 | if (xi->starting) { | ||
485 | /* it's a hello */ | ||
486 | struct ceph_x_server_challenge *sc = buf; | ||
487 | |||
488 | if (len != sizeof(*sc)) | ||
489 | return -EINVAL; | ||
490 | xi->server_challenge = le64_to_cpu(sc->server_challenge); | ||
491 | dout("handle_reply got server challenge %llx\n", | ||
492 | xi->server_challenge); | ||
493 | xi->starting = false; | ||
494 | xi->have_keys &= ~CEPH_ENTITY_TYPE_AUTH; | ||
495 | return -EAGAIN; | ||
496 | } | ||
497 | |||
498 | op = le16_to_cpu(head->op); | ||
499 | result = le32_to_cpu(head->result); | ||
500 | dout("handle_reply op %d result %d\n", op, result); | ||
501 | switch (op) { | ||
502 | case CEPHX_GET_AUTH_SESSION_KEY: | ||
503 | /* verify auth key */ | ||
504 | ret = ceph_x_proc_ticket_reply(ac, &xi->secret, | ||
505 | buf + sizeof(*head), end); | ||
506 | break; | ||
507 | |||
508 | case CEPHX_GET_PRINCIPAL_SESSION_KEY: | ||
509 | th = get_ticket_handler(ac, CEPH_ENTITY_TYPE_AUTH); | ||
510 | if (IS_ERR(th)) | ||
511 | return PTR_ERR(th); | ||
512 | ret = ceph_x_proc_ticket_reply(ac, &th->session_key, | ||
513 | buf + sizeof(*head), end); | ||
514 | break; | ||
515 | |||
516 | default: | ||
517 | return -EINVAL; | ||
518 | } | ||
519 | if (ret) | ||
520 | return ret; | ||
521 | if (ac->want_keys == xi->have_keys) | ||
522 | return 0; | ||
523 | return -EAGAIN; | ||
524 | } | ||
525 | |||
526 | static int ceph_x_create_authorizer( | ||
527 | struct ceph_auth_client *ac, int peer_type, | ||
528 | struct ceph_authorizer **a, | ||
529 | void **buf, size_t *len, | ||
530 | void **reply_buf, size_t *reply_len) | ||
531 | { | ||
532 | struct ceph_x_authorizer *au; | ||
533 | struct ceph_x_ticket_handler *th; | ||
534 | int ret; | ||
535 | |||
536 | th = get_ticket_handler(ac, peer_type); | ||
537 | if (IS_ERR(th)) | ||
538 | return PTR_ERR(th); | ||
539 | |||
540 | au = kzalloc(sizeof(*au), GFP_NOFS); | ||
541 | if (!au) | ||
542 | return -ENOMEM; | ||
543 | |||
544 | ret = ceph_x_build_authorizer(ac, th, au); | ||
545 | if (ret) { | ||
546 | kfree(au); | ||
547 | return ret; | ||
548 | } | ||
549 | |||
550 | *a = (struct ceph_authorizer *)au; | ||
551 | *buf = au->buf->vec.iov_base; | ||
552 | *len = au->buf->vec.iov_len; | ||
553 | *reply_buf = au->reply_buf; | ||
554 | *reply_len = sizeof(au->reply_buf); | ||
555 | return 0; | ||
556 | } | ||
557 | |||
558 | static int ceph_x_verify_authorizer_reply(struct ceph_auth_client *ac, | ||
559 | struct ceph_authorizer *a, size_t len) | ||
560 | { | ||
561 | struct ceph_x_authorizer *au = (void *)a; | ||
562 | struct ceph_x_ticket_handler *th; | ||
563 | int ret = 0; | ||
564 | struct ceph_x_authorize_reply reply; | ||
565 | void *p = au->reply_buf; | ||
566 | void *end = p + sizeof(au->reply_buf); | ||
567 | |||
568 | th = get_ticket_handler(ac, au->service); | ||
569 | if (IS_ERR(th)) | ||
570 | return PTR_ERR(th); | ||
571 | ret = ceph_x_decrypt(&th->session_key, &p, end, &reply, sizeof(reply)); | ||
572 | if (ret < 0) | ||
573 | return ret; | ||
574 | if (ret != sizeof(reply)) | ||
575 | return -EPERM; | ||
576 | |||
577 | if (au->nonce + 1 != le64_to_cpu(reply.nonce_plus_one)) | ||
578 | ret = -EPERM; | ||
579 | else | ||
580 | ret = 0; | ||
581 | dout("verify_authorizer_reply nonce %llx got %llx ret %d\n", | ||
582 | au->nonce, le64_to_cpu(reply.nonce_plus_one), ret); | ||
583 | return ret; | ||
584 | } | ||
585 | |||
586 | static void ceph_x_destroy_authorizer(struct ceph_auth_client *ac, | ||
587 | struct ceph_authorizer *a) | ||
588 | { | ||
589 | struct ceph_x_authorizer *au = (void *)a; | ||
590 | |||
591 | ceph_buffer_put(au->buf); | ||
592 | kfree(au); | ||
593 | } | ||
594 | |||
595 | |||
596 | static void ceph_x_reset(struct ceph_auth_client *ac) | ||
597 | { | ||
598 | struct ceph_x_info *xi = ac->private; | ||
599 | |||
600 | dout("reset\n"); | ||
601 | xi->starting = true; | ||
602 | xi->server_challenge = 0; | ||
603 | } | ||
604 | |||
605 | static void ceph_x_destroy(struct ceph_auth_client *ac) | ||
606 | { | ||
607 | struct ceph_x_info *xi = ac->private; | ||
608 | struct rb_node *p; | ||
609 | |||
610 | dout("ceph_x_destroy %p\n", ac); | ||
611 | ceph_crypto_key_destroy(&xi->secret); | ||
612 | |||
613 | while ((p = rb_first(&xi->ticket_handlers)) != NULL) { | ||
614 | struct ceph_x_ticket_handler *th = | ||
615 | rb_entry(p, struct ceph_x_ticket_handler, node); | ||
616 | remove_ticket_handler(ac, th); | ||
617 | } | ||
618 | |||
619 | if (xi->auth_authorizer.buf) | ||
620 | ceph_buffer_put(xi->auth_authorizer.buf); | ||
621 | |||
622 | kfree(ac->private); | ||
623 | ac->private = NULL; | ||
624 | } | ||
625 | |||
626 | static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac, | ||
627 | int peer_type) | ||
628 | { | ||
629 | struct ceph_x_ticket_handler *th; | ||
630 | |||
631 | th = get_ticket_handler(ac, peer_type); | ||
632 | if (!IS_ERR(th)) | ||
633 | remove_ticket_handler(ac, th); | ||
634 | } | ||
635 | |||
636 | |||
637 | static const struct ceph_auth_client_ops ceph_x_ops = { | ||
638 | .name = "x", | ||
639 | .is_authenticated = ceph_x_is_authenticated, | ||
640 | .should_authenticate = ceph_x_should_authenticate, | ||
641 | .build_request = ceph_x_build_request, | ||
642 | .handle_reply = ceph_x_handle_reply, | ||
643 | .create_authorizer = ceph_x_create_authorizer, | ||
644 | .verify_authorizer_reply = ceph_x_verify_authorizer_reply, | ||
645 | .destroy_authorizer = ceph_x_destroy_authorizer, | ||
646 | .invalidate_authorizer = ceph_x_invalidate_authorizer, | ||
647 | .reset = ceph_x_reset, | ||
648 | .destroy = ceph_x_destroy, | ||
649 | }; | ||
650 | |||
651 | |||
652 | int ceph_x_init(struct ceph_auth_client *ac) | ||
653 | { | ||
654 | struct ceph_x_info *xi; | ||
655 | int ret; | ||
656 | |||
657 | dout("ceph_x_init %p\n", ac); | ||
658 | ret = -ENOMEM; | ||
659 | xi = kzalloc(sizeof(*xi), GFP_NOFS); | ||
660 | if (!xi) | ||
661 | goto out; | ||
662 | |||
663 | ret = -EINVAL; | ||
664 | if (!ac->secret) { | ||
665 | pr_err("no secret set (for auth_x protocol)\n"); | ||
666 | goto out_nomem; | ||
667 | } | ||
668 | |||
669 | ret = ceph_crypto_key_unarmor(&xi->secret, ac->secret); | ||
670 | if (ret) | ||
671 | goto out_nomem; | ||
672 | |||
673 | xi->starting = true; | ||
674 | xi->ticket_handlers = RB_ROOT; | ||
675 | |||
676 | ac->protocol = CEPH_AUTH_CEPHX; | ||
677 | ac->private = xi; | ||
678 | ac->ops = &ceph_x_ops; | ||
679 | return 0; | ||
680 | |||
681 | out_nomem: | ||
682 | kfree(xi); | ||
683 | out: | ||
684 | return ret; | ||
685 | } | ||
686 | |||
687 | |||
diff --git a/fs/ceph/auth_x.h b/fs/ceph/auth_x.h deleted file mode 100644 index ff6f8180e681..000000000000 --- a/fs/ceph/auth_x.h +++ /dev/null | |||
@@ -1,49 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_AUTH_X_H | ||
2 | #define _FS_CEPH_AUTH_X_H | ||
3 | |||
4 | #include <linux/rbtree.h> | ||
5 | |||
6 | #include "crypto.h" | ||
7 | #include "auth.h" | ||
8 | #include "auth_x_protocol.h" | ||
9 | |||
10 | /* | ||
11 | * Handle ticket for a single service. | ||
12 | */ | ||
13 | struct ceph_x_ticket_handler { | ||
14 | struct rb_node node; | ||
15 | unsigned service; | ||
16 | |||
17 | struct ceph_crypto_key session_key; | ||
18 | struct ceph_timespec validity; | ||
19 | |||
20 | u64 secret_id; | ||
21 | struct ceph_buffer *ticket_blob; | ||
22 | |||
23 | unsigned long renew_after, expires; | ||
24 | }; | ||
25 | |||
26 | |||
27 | struct ceph_x_authorizer { | ||
28 | struct ceph_buffer *buf; | ||
29 | unsigned service; | ||
30 | u64 nonce; | ||
31 | char reply_buf[128]; /* big enough for encrypted blob */ | ||
32 | }; | ||
33 | |||
34 | struct ceph_x_info { | ||
35 | struct ceph_crypto_key secret; | ||
36 | |||
37 | bool starting; | ||
38 | u64 server_challenge; | ||
39 | |||
40 | unsigned have_keys; | ||
41 | struct rb_root ticket_handlers; | ||
42 | |||
43 | struct ceph_x_authorizer auth_authorizer; | ||
44 | }; | ||
45 | |||
46 | extern int ceph_x_init(struct ceph_auth_client *ac); | ||
47 | |||
48 | #endif | ||
49 | |||
diff --git a/fs/ceph/auth_x_protocol.h b/fs/ceph/auth_x_protocol.h deleted file mode 100644 index 671d30576c4f..000000000000 --- a/fs/ceph/auth_x_protocol.h +++ /dev/null | |||
@@ -1,90 +0,0 @@ | |||
1 | #ifndef __FS_CEPH_AUTH_X_PROTOCOL | ||
2 | #define __FS_CEPH_AUTH_X_PROTOCOL | ||
3 | |||
4 | #define CEPHX_GET_AUTH_SESSION_KEY 0x0100 | ||
5 | #define CEPHX_GET_PRINCIPAL_SESSION_KEY 0x0200 | ||
6 | #define CEPHX_GET_ROTATING_KEY 0x0400 | ||
7 | |||
8 | /* common bits */ | ||
9 | struct ceph_x_ticket_blob { | ||
10 | __u8 struct_v; | ||
11 | __le64 secret_id; | ||
12 | __le32 blob_len; | ||
13 | char blob[]; | ||
14 | } __attribute__ ((packed)); | ||
15 | |||
16 | |||
17 | /* common request/reply headers */ | ||
18 | struct ceph_x_request_header { | ||
19 | __le16 op; | ||
20 | } __attribute__ ((packed)); | ||
21 | |||
22 | struct ceph_x_reply_header { | ||
23 | __le16 op; | ||
24 | __le32 result; | ||
25 | } __attribute__ ((packed)); | ||
26 | |||
27 | |||
28 | /* authenticate handshake */ | ||
29 | |||
30 | /* initial hello (no reply header) */ | ||
31 | struct ceph_x_server_challenge { | ||
32 | __u8 struct_v; | ||
33 | __le64 server_challenge; | ||
34 | } __attribute__ ((packed)); | ||
35 | |||
36 | struct ceph_x_authenticate { | ||
37 | __u8 struct_v; | ||
38 | __le64 client_challenge; | ||
39 | __le64 key; | ||
40 | /* ticket blob */ | ||
41 | } __attribute__ ((packed)); | ||
42 | |||
43 | struct ceph_x_service_ticket_request { | ||
44 | __u8 struct_v; | ||
45 | __le32 keys; | ||
46 | } __attribute__ ((packed)); | ||
47 | |||
48 | struct ceph_x_challenge_blob { | ||
49 | __le64 server_challenge; | ||
50 | __le64 client_challenge; | ||
51 | } __attribute__ ((packed)); | ||
52 | |||
53 | |||
54 | |||
55 | /* authorize handshake */ | ||
56 | |||
57 | /* | ||
58 | * The authorizer consists of two pieces: | ||
59 | * a - service id, ticket blob | ||
60 | * b - encrypted with session key | ||
61 | */ | ||
62 | struct ceph_x_authorize_a { | ||
63 | __u8 struct_v; | ||
64 | __le64 global_id; | ||
65 | __le32 service_id; | ||
66 | struct ceph_x_ticket_blob ticket_blob; | ||
67 | } __attribute__ ((packed)); | ||
68 | |||
69 | struct ceph_x_authorize_b { | ||
70 | __u8 struct_v; | ||
71 | __le64 nonce; | ||
72 | } __attribute__ ((packed)); | ||
73 | |||
74 | struct ceph_x_authorize_reply { | ||
75 | __u8 struct_v; | ||
76 | __le64 nonce_plus_one; | ||
77 | } __attribute__ ((packed)); | ||
78 | |||
79 | |||
80 | /* | ||
81 | * encyption bundle | ||
82 | */ | ||
83 | #define CEPHX_ENC_MAGIC 0xff009cad8826aa55ull | ||
84 | |||
85 | struct ceph_x_encrypt_header { | ||
86 | __u8 struct_v; | ||
87 | __le64 magic; | ||
88 | } __attribute__ ((packed)); | ||
89 | |||
90 | #endif | ||
diff --git a/fs/ceph/buffer.c b/fs/ceph/buffer.c deleted file mode 100644 index cd39f17021de..000000000000 --- a/fs/ceph/buffer.c +++ /dev/null | |||
@@ -1,65 +0,0 @@ | |||
1 | |||
2 | #include "ceph_debug.h" | ||
3 | |||
4 | #include <linux/slab.h> | ||
5 | |||
6 | #include "buffer.h" | ||
7 | #include "decode.h" | ||
8 | |||
9 | struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp) | ||
10 | { | ||
11 | struct ceph_buffer *b; | ||
12 | |||
13 | b = kmalloc(sizeof(*b), gfp); | ||
14 | if (!b) | ||
15 | return NULL; | ||
16 | |||
17 | b->vec.iov_base = kmalloc(len, gfp | __GFP_NOWARN); | ||
18 | if (b->vec.iov_base) { | ||
19 | b->is_vmalloc = false; | ||
20 | } else { | ||
21 | b->vec.iov_base = __vmalloc(len, gfp, PAGE_KERNEL); | ||
22 | if (!b->vec.iov_base) { | ||
23 | kfree(b); | ||
24 | return NULL; | ||
25 | } | ||
26 | b->is_vmalloc = true; | ||
27 | } | ||
28 | |||
29 | kref_init(&b->kref); | ||
30 | b->alloc_len = len; | ||
31 | b->vec.iov_len = len; | ||
32 | dout("buffer_new %p\n", b); | ||
33 | return b; | ||
34 | } | ||
35 | |||
36 | void ceph_buffer_release(struct kref *kref) | ||
37 | { | ||
38 | struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref); | ||
39 | |||
40 | dout("buffer_release %p\n", b); | ||
41 | if (b->vec.iov_base) { | ||
42 | if (b->is_vmalloc) | ||
43 | vfree(b->vec.iov_base); | ||
44 | else | ||
45 | kfree(b->vec.iov_base); | ||
46 | } | ||
47 | kfree(b); | ||
48 | } | ||
49 | |||
50 | int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end) | ||
51 | { | ||
52 | size_t len; | ||
53 | |||
54 | ceph_decode_need(p, end, sizeof(u32), bad); | ||
55 | len = ceph_decode_32(p); | ||
56 | dout("decode_buffer len %d\n", (int)len); | ||
57 | ceph_decode_need(p, end, len, bad); | ||
58 | *b = ceph_buffer_new(len, GFP_NOFS); | ||
59 | if (!*b) | ||
60 | return -ENOMEM; | ||
61 | ceph_decode_copy(p, (*b)->vec.iov_base, len); | ||
62 | return 0; | ||
63 | bad: | ||
64 | return -EINVAL; | ||
65 | } | ||
diff --git a/fs/ceph/buffer.h b/fs/ceph/buffer.h deleted file mode 100644 index 58d19014068f..000000000000 --- a/fs/ceph/buffer.h +++ /dev/null | |||
@@ -1,39 +0,0 @@ | |||
1 | #ifndef __FS_CEPH_BUFFER_H | ||
2 | #define __FS_CEPH_BUFFER_H | ||
3 | |||
4 | #include <linux/kref.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/vmalloc.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/uio.h> | ||
9 | |||
10 | /* | ||
11 | * a simple reference counted buffer. | ||
12 | * | ||
13 | * use kmalloc for small sizes (<= one page), vmalloc for larger | ||
14 | * sizes. | ||
15 | */ | ||
16 | struct ceph_buffer { | ||
17 | struct kref kref; | ||
18 | struct kvec vec; | ||
19 | size_t alloc_len; | ||
20 | bool is_vmalloc; | ||
21 | }; | ||
22 | |||
23 | extern struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp); | ||
24 | extern void ceph_buffer_release(struct kref *kref); | ||
25 | |||
26 | static inline struct ceph_buffer *ceph_buffer_get(struct ceph_buffer *b) | ||
27 | { | ||
28 | kref_get(&b->kref); | ||
29 | return b; | ||
30 | } | ||
31 | |||
32 | static inline void ceph_buffer_put(struct ceph_buffer *b) | ||
33 | { | ||
34 | kref_put(&b->kref, ceph_buffer_release); | ||
35 | } | ||
36 | |||
37 | extern int ceph_decode_buffer(struct ceph_buffer **b, void **p, void *end); | ||
38 | |||
39 | #endif | ||
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index a2069b6680ae..98ab13e2b71d 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
@@ -9,8 +9,9 @@ | |||
9 | #include <linux/writeback.h> | 9 | #include <linux/writeback.h> |
10 | 10 | ||
11 | #include "super.h" | 11 | #include "super.h" |
12 | #include "decode.h" | 12 | #include "mds_client.h" |
13 | #include "messenger.h" | 13 | #include <linux/ceph/decode.h> |
14 | #include <linux/ceph/messenger.h> | ||
14 | 15 | ||
15 | /* | 16 | /* |
16 | * Capability management | 17 | * Capability management |
@@ -287,11 +288,11 @@ void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap) | |||
287 | spin_unlock(&mdsc->caps_list_lock); | 288 | spin_unlock(&mdsc->caps_list_lock); |
288 | } | 289 | } |
289 | 290 | ||
290 | void ceph_reservation_status(struct ceph_client *client, | 291 | void ceph_reservation_status(struct ceph_fs_client *fsc, |
291 | int *total, int *avail, int *used, int *reserved, | 292 | int *total, int *avail, int *used, int *reserved, |
292 | int *min) | 293 | int *min) |
293 | { | 294 | { |
294 | struct ceph_mds_client *mdsc = &client->mdsc; | 295 | struct ceph_mds_client *mdsc = fsc->mdsc; |
295 | 296 | ||
296 | if (total) | 297 | if (total) |
297 | *total = mdsc->caps_total_count; | 298 | *total = mdsc->caps_total_count; |
@@ -399,7 +400,7 @@ static void __insert_cap_node(struct ceph_inode_info *ci, | |||
399 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, | 400 | static void __cap_set_timeouts(struct ceph_mds_client *mdsc, |
400 | struct ceph_inode_info *ci) | 401 | struct ceph_inode_info *ci) |
401 | { | 402 | { |
402 | struct ceph_mount_args *ma = mdsc->client->mount_args; | 403 | struct ceph_mount_options *ma = mdsc->fsc->mount_options; |
403 | 404 | ||
404 | ci->i_hold_caps_min = round_jiffies(jiffies + | 405 | ci->i_hold_caps_min = round_jiffies(jiffies + |
405 | ma->caps_wanted_delay_min * HZ); | 406 | ma->caps_wanted_delay_min * HZ); |
@@ -515,7 +516,7 @@ int ceph_add_cap(struct inode *inode, | |||
515 | unsigned seq, unsigned mseq, u64 realmino, int flags, | 516 | unsigned seq, unsigned mseq, u64 realmino, int flags, |
516 | struct ceph_cap_reservation *caps_reservation) | 517 | struct ceph_cap_reservation *caps_reservation) |
517 | { | 518 | { |
518 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 519 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
519 | struct ceph_inode_info *ci = ceph_inode(inode); | 520 | struct ceph_inode_info *ci = ceph_inode(inode); |
520 | struct ceph_cap *new_cap = NULL; | 521 | struct ceph_cap *new_cap = NULL; |
521 | struct ceph_cap *cap; | 522 | struct ceph_cap *cap; |
@@ -814,7 +815,7 @@ int __ceph_caps_used(struct ceph_inode_info *ci) | |||
814 | used |= CEPH_CAP_PIN; | 815 | used |= CEPH_CAP_PIN; |
815 | if (ci->i_rd_ref) | 816 | if (ci->i_rd_ref) |
816 | used |= CEPH_CAP_FILE_RD; | 817 | used |= CEPH_CAP_FILE_RD; |
817 | if (ci->i_rdcache_ref || ci->i_rdcache_gen) | 818 | if (ci->i_rdcache_ref || ci->vfs_inode.i_data.nrpages) |
818 | used |= CEPH_CAP_FILE_CACHE; | 819 | used |= CEPH_CAP_FILE_CACHE; |
819 | if (ci->i_wr_ref) | 820 | if (ci->i_wr_ref) |
820 | used |= CEPH_CAP_FILE_WR; | 821 | used |= CEPH_CAP_FILE_WR; |
@@ -873,7 +874,7 @@ void __ceph_remove_cap(struct ceph_cap *cap) | |||
873 | struct ceph_mds_session *session = cap->session; | 874 | struct ceph_mds_session *session = cap->session; |
874 | struct ceph_inode_info *ci = cap->ci; | 875 | struct ceph_inode_info *ci = cap->ci; |
875 | struct ceph_mds_client *mdsc = | 876 | struct ceph_mds_client *mdsc = |
876 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 877 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
877 | int removed = 0; | 878 | int removed = 0; |
878 | 879 | ||
879 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); | 880 | dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode); |
@@ -1195,10 +1196,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap, | |||
1195 | * asynchronously back to the MDS once sync writes complete and dirty | 1196 | * asynchronously back to the MDS once sync writes complete and dirty |
1196 | * data is written out. | 1197 | * data is written out. |
1197 | * | 1198 | * |
1199 | * Unless @again is true, skip cap_snaps that were already sent to | ||
1200 | * the MDS (i.e., during this session). | ||
1201 | * | ||
1198 | * Called under i_lock. Takes s_mutex as needed. | 1202 | * Called under i_lock. Takes s_mutex as needed. |
1199 | */ | 1203 | */ |
1200 | void __ceph_flush_snaps(struct ceph_inode_info *ci, | 1204 | void __ceph_flush_snaps(struct ceph_inode_info *ci, |
1201 | struct ceph_mds_session **psession) | 1205 | struct ceph_mds_session **psession, |
1206 | int again) | ||
1202 | __releases(ci->vfs_inode->i_lock) | 1207 | __releases(ci->vfs_inode->i_lock) |
1203 | __acquires(ci->vfs_inode->i_lock) | 1208 | __acquires(ci->vfs_inode->i_lock) |
1204 | { | 1209 | { |
@@ -1206,7 +1211,7 @@ void __ceph_flush_snaps(struct ceph_inode_info *ci, | |||
1206 | int mds; | 1211 | int mds; |
1207 | struct ceph_cap_snap *capsnap; | 1212 | struct ceph_cap_snap *capsnap; |
1208 | u32 mseq; | 1213 | u32 mseq; |
1209 | struct ceph_mds_client *mdsc = &ceph_inode_to_client(inode)->mdsc; | 1214 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; |
1210 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold | 1215 | struct ceph_mds_session *session = NULL; /* if session != NULL, we hold |
1211 | session->s_mutex */ | 1216 | session->s_mutex */ |
1212 | u64 next_follows = 0; /* keep track of how far we've gotten through the | 1217 | u64 next_follows = 0; /* keep track of how far we've gotten through the |
@@ -1227,7 +1232,7 @@ retry: | |||
1227 | * pages to be written out. | 1232 | * pages to be written out. |
1228 | */ | 1233 | */ |
1229 | if (capsnap->dirty_pages || capsnap->writing) | 1234 | if (capsnap->dirty_pages || capsnap->writing) |
1230 | continue; | 1235 | break; |
1231 | 1236 | ||
1232 | /* | 1237 | /* |
1233 | * if cap writeback already occurred, we should have dropped | 1238 | * if cap writeback already occurred, we should have dropped |
@@ -1240,6 +1245,13 @@ retry: | |||
1240 | dout("no auth cap (migrating?), doing nothing\n"); | 1245 | dout("no auth cap (migrating?), doing nothing\n"); |
1241 | goto out; | 1246 | goto out; |
1242 | } | 1247 | } |
1248 | |||
1249 | /* only flush each capsnap once */ | ||
1250 | if (!again && !list_empty(&capsnap->flushing_item)) { | ||
1251 | dout("already flushed %p, skipping\n", capsnap); | ||
1252 | continue; | ||
1253 | } | ||
1254 | |||
1243 | mds = ci->i_auth_cap->session->s_mds; | 1255 | mds = ci->i_auth_cap->session->s_mds; |
1244 | mseq = ci->i_auth_cap->mseq; | 1256 | mseq = ci->i_auth_cap->mseq; |
1245 | 1257 | ||
@@ -1276,8 +1288,8 @@ retry: | |||
1276 | &session->s_cap_snaps_flushing); | 1288 | &session->s_cap_snaps_flushing); |
1277 | spin_unlock(&inode->i_lock); | 1289 | spin_unlock(&inode->i_lock); |
1278 | 1290 | ||
1279 | dout("flush_snaps %p cap_snap %p follows %lld size %llu\n", | 1291 | dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n", |
1280 | inode, capsnap, next_follows, capsnap->size); | 1292 | inode, capsnap, capsnap->follows, capsnap->flush_tid); |
1281 | send_cap_msg(session, ceph_vino(inode).ino, 0, | 1293 | send_cap_msg(session, ceph_vino(inode).ino, 0, |
1282 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, | 1294 | CEPH_CAP_OP_FLUSHSNAP, capsnap->issued, 0, |
1283 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, | 1295 | capsnap->dirty, 0, capsnap->flush_tid, 0, mseq, |
@@ -1314,7 +1326,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1314 | struct inode *inode = &ci->vfs_inode; | 1326 | struct inode *inode = &ci->vfs_inode; |
1315 | 1327 | ||
1316 | spin_lock(&inode->i_lock); | 1328 | spin_lock(&inode->i_lock); |
1317 | __ceph_flush_snaps(ci, NULL); | 1329 | __ceph_flush_snaps(ci, NULL, 0); |
1318 | spin_unlock(&inode->i_lock); | 1330 | spin_unlock(&inode->i_lock); |
1319 | } | 1331 | } |
1320 | 1332 | ||
@@ -1325,7 +1337,7 @@ static void ceph_flush_snaps(struct ceph_inode_info *ci) | |||
1325 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | 1337 | void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) |
1326 | { | 1338 | { |
1327 | struct ceph_mds_client *mdsc = | 1339 | struct ceph_mds_client *mdsc = |
1328 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 1340 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
1329 | struct inode *inode = &ci->vfs_inode; | 1341 | struct inode *inode = &ci->vfs_inode; |
1330 | int was = ci->i_dirty_caps; | 1342 | int was = ci->i_dirty_caps; |
1331 | int dirty = 0; | 1343 | int dirty = 0; |
@@ -1367,7 +1379,7 @@ void __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask) | |||
1367 | static int __mark_caps_flushing(struct inode *inode, | 1379 | static int __mark_caps_flushing(struct inode *inode, |
1368 | struct ceph_mds_session *session) | 1380 | struct ceph_mds_session *session) |
1369 | { | 1381 | { |
1370 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1382 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1371 | struct ceph_inode_info *ci = ceph_inode(inode); | 1383 | struct ceph_inode_info *ci = ceph_inode(inode); |
1372 | int flushing; | 1384 | int flushing; |
1373 | 1385 | ||
@@ -1405,17 +1417,6 @@ static int __mark_caps_flushing(struct inode *inode, | |||
1405 | /* | 1417 | /* |
1406 | * try to invalidate mapping pages without blocking. | 1418 | * try to invalidate mapping pages without blocking. |
1407 | */ | 1419 | */ |
1408 | static int mapping_is_empty(struct address_space *mapping) | ||
1409 | { | ||
1410 | struct page *page = find_get_page(mapping, 0); | ||
1411 | |||
1412 | if (!page) | ||
1413 | return 1; | ||
1414 | |||
1415 | put_page(page); | ||
1416 | return 0; | ||
1417 | } | ||
1418 | |||
1419 | static int try_nonblocking_invalidate(struct inode *inode) | 1420 | static int try_nonblocking_invalidate(struct inode *inode) |
1420 | { | 1421 | { |
1421 | struct ceph_inode_info *ci = ceph_inode(inode); | 1422 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -1425,7 +1426,7 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1425 | invalidate_mapping_pages(&inode->i_data, 0, -1); | 1426 | invalidate_mapping_pages(&inode->i_data, 0, -1); |
1426 | spin_lock(&inode->i_lock); | 1427 | spin_lock(&inode->i_lock); |
1427 | 1428 | ||
1428 | if (mapping_is_empty(&inode->i_data) && | 1429 | if (inode->i_data.nrpages == 0 && |
1429 | invalidating_gen == ci->i_rdcache_gen) { | 1430 | invalidating_gen == ci->i_rdcache_gen) { |
1430 | /* success. */ | 1431 | /* success. */ |
1431 | dout("try_nonblocking_invalidate %p success\n", inode); | 1432 | dout("try_nonblocking_invalidate %p success\n", inode); |
@@ -1451,8 +1452,8 @@ static int try_nonblocking_invalidate(struct inode *inode) | |||
1451 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 1452 | void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
1452 | struct ceph_mds_session *session) | 1453 | struct ceph_mds_session *session) |
1453 | { | 1454 | { |
1454 | struct ceph_client *client = ceph_inode_to_client(&ci->vfs_inode); | 1455 | struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode); |
1455 | struct ceph_mds_client *mdsc = &client->mdsc; | 1456 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1456 | struct inode *inode = &ci->vfs_inode; | 1457 | struct inode *inode = &ci->vfs_inode; |
1457 | struct ceph_cap *cap; | 1458 | struct ceph_cap *cap; |
1458 | int file_wanted, used; | 1459 | int file_wanted, used; |
@@ -1477,7 +1478,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags, | |||
1477 | 1478 | ||
1478 | /* flush snaps first time around only */ | 1479 | /* flush snaps first time around only */ |
1479 | if (!list_empty(&ci->i_cap_snaps)) | 1480 | if (!list_empty(&ci->i_cap_snaps)) |
1480 | __ceph_flush_snaps(ci, &session); | 1481 | __ceph_flush_snaps(ci, &session, 0); |
1481 | goto retry_locked; | 1482 | goto retry_locked; |
1482 | retry: | 1483 | retry: |
1483 | spin_lock(&inode->i_lock); | 1484 | spin_lock(&inode->i_lock); |
@@ -1522,7 +1523,7 @@ retry_locked: | |||
1522 | */ | 1523 | */ |
1523 | if ((!is_delayed || mdsc->stopping) && | 1524 | if ((!is_delayed || mdsc->stopping) && |
1524 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ | 1525 | ci->i_wrbuffer_ref == 0 && /* no dirty pages... */ |
1525 | ci->i_rdcache_gen && /* may have cached pages */ | 1526 | inode->i_data.nrpages && /* have cached pages */ |
1526 | (file_wanted == 0 || /* no open files */ | 1527 | (file_wanted == 0 || /* no open files */ |
1527 | (revoking & (CEPH_CAP_FILE_CACHE| | 1528 | (revoking & (CEPH_CAP_FILE_CACHE| |
1528 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ | 1529 | CEPH_CAP_FILE_LAZYIO))) && /* or revoking cache */ |
@@ -1695,7 +1696,7 @@ ack: | |||
1695 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, | 1696 | static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, |
1696 | unsigned *flush_tid) | 1697 | unsigned *flush_tid) |
1697 | { | 1698 | { |
1698 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 1699 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
1699 | struct ceph_inode_info *ci = ceph_inode(inode); | 1700 | struct ceph_inode_info *ci = ceph_inode(inode); |
1700 | int unlock_session = session ? 0 : 1; | 1701 | int unlock_session = session ? 0 : 1; |
1701 | int flushing = 0; | 1702 | int flushing = 0; |
@@ -1861,7 +1862,7 @@ int ceph_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1861 | caps_are_flushed(inode, flush_tid)); | 1862 | caps_are_flushed(inode, flush_tid)); |
1862 | } else { | 1863 | } else { |
1863 | struct ceph_mds_client *mdsc = | 1864 | struct ceph_mds_client *mdsc = |
1864 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 1865 | ceph_sb_to_client(inode->i_sb)->mdsc; |
1865 | 1866 | ||
1866 | spin_lock(&inode->i_lock); | 1867 | spin_lock(&inode->i_lock); |
1867 | if (__ceph_caps_dirty(ci)) | 1868 | if (__ceph_caps_dirty(ci)) |
@@ -1894,7 +1895,7 @@ static void kick_flushing_capsnaps(struct ceph_mds_client *mdsc, | |||
1894 | if (cap && cap->session == session) { | 1895 | if (cap && cap->session == session) { |
1895 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, | 1896 | dout("kick_flushing_caps %p cap %p capsnap %p\n", inode, |
1896 | cap, capsnap); | 1897 | cap, capsnap); |
1897 | __ceph_flush_snaps(ci, &session); | 1898 | __ceph_flush_snaps(ci, &session, 1); |
1898 | } else { | 1899 | } else { |
1899 | pr_err("%p auth cap %p not mds%d ???\n", inode, | 1900 | pr_err("%p auth cap %p not mds%d ???\n", inode, |
1900 | cap, session->s_mds); | 1901 | cap, session->s_mds); |
@@ -2272,7 +2273,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2272 | { | 2273 | { |
2273 | struct ceph_inode_info *ci = ceph_inode(inode); | 2274 | struct ceph_inode_info *ci = ceph_inode(inode); |
2274 | int mds = session->s_mds; | 2275 | int mds = session->s_mds; |
2275 | int seq = le32_to_cpu(grant->seq); | 2276 | unsigned seq = le32_to_cpu(grant->seq); |
2277 | unsigned issue_seq = le32_to_cpu(grant->issue_seq); | ||
2276 | int newcaps = le32_to_cpu(grant->caps); | 2278 | int newcaps = le32_to_cpu(grant->caps); |
2277 | int issued, implemented, used, wanted, dirty; | 2279 | int issued, implemented, used, wanted, dirty; |
2278 | u64 size = le64_to_cpu(grant->size); | 2280 | u64 size = le64_to_cpu(grant->size); |
@@ -2284,8 +2286,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2284 | int revoked_rdcache = 0; | 2286 | int revoked_rdcache = 0; |
2285 | int queue_invalidate = 0; | 2287 | int queue_invalidate = 0; |
2286 | 2288 | ||
2287 | dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", | 2289 | dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n", |
2288 | inode, cap, mds, seq, ceph_cap_string(newcaps)); | 2290 | inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps)); |
2289 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, | 2291 | dout(" size %llu max_size %llu, i_size %llu\n", size, max_size, |
2290 | inode->i_size); | 2292 | inode->i_size); |
2291 | 2293 | ||
@@ -2381,6 +2383,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, | |||
2381 | } | 2383 | } |
2382 | 2384 | ||
2383 | cap->seq = seq; | 2385 | cap->seq = seq; |
2386 | cap->issue_seq = issue_seq; | ||
2384 | 2387 | ||
2385 | /* file layout may have changed */ | 2388 | /* file layout may have changed */ |
2386 | ci->i_layout = grant->layout; | 2389 | ci->i_layout = grant->layout; |
@@ -2452,7 +2455,7 @@ static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid, | |||
2452 | __releases(inode->i_lock) | 2455 | __releases(inode->i_lock) |
2453 | { | 2456 | { |
2454 | struct ceph_inode_info *ci = ceph_inode(inode); | 2457 | struct ceph_inode_info *ci = ceph_inode(inode); |
2455 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 2458 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
2456 | unsigned seq = le32_to_cpu(m->seq); | 2459 | unsigned seq = le32_to_cpu(m->seq); |
2457 | int dirty = le32_to_cpu(m->dirty); | 2460 | int dirty = le32_to_cpu(m->dirty); |
2458 | int cleaned = 0; | 2461 | int cleaned = 0; |
@@ -2700,7 +2703,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2700 | struct ceph_msg *msg) | 2703 | struct ceph_msg *msg) |
2701 | { | 2704 | { |
2702 | struct ceph_mds_client *mdsc = session->s_mdsc; | 2705 | struct ceph_mds_client *mdsc = session->s_mdsc; |
2703 | struct super_block *sb = mdsc->client->sb; | 2706 | struct super_block *sb = mdsc->fsc->sb; |
2704 | struct inode *inode; | 2707 | struct inode *inode; |
2705 | struct ceph_cap *cap; | 2708 | struct ceph_cap *cap; |
2706 | struct ceph_mds_caps *h; | 2709 | struct ceph_mds_caps *h; |
@@ -2763,15 +2766,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2763 | if (op == CEPH_CAP_OP_IMPORT) | 2766 | if (op == CEPH_CAP_OP_IMPORT) |
2764 | __queue_cap_release(session, vino.ino, cap_id, | 2767 | __queue_cap_release(session, vino.ino, cap_id, |
2765 | mseq, seq); | 2768 | mseq, seq); |
2766 | 2769 | goto flush_cap_releases; | |
2767 | /* | ||
2768 | * send any full release message to try to move things | ||
2769 | * along for the mds (who clearly thinks we still have this | ||
2770 | * cap). | ||
2771 | */ | ||
2772 | ceph_add_cap_releases(mdsc, session); | ||
2773 | ceph_send_cap_releases(mdsc, session); | ||
2774 | goto done; | ||
2775 | } | 2770 | } |
2776 | 2771 | ||
2777 | /* these will work even if we don't have a cap yet */ | 2772 | /* these will work even if we don't have a cap yet */ |
@@ -2799,7 +2794,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2799 | dout(" no cap on %p ino %llx.%llx from mds%d\n", | 2794 | dout(" no cap on %p ino %llx.%llx from mds%d\n", |
2800 | inode, ceph_ino(inode), ceph_snap(inode), mds); | 2795 | inode, ceph_ino(inode), ceph_snap(inode), mds); |
2801 | spin_unlock(&inode->i_lock); | 2796 | spin_unlock(&inode->i_lock); |
2802 | goto done; | 2797 | goto flush_cap_releases; |
2803 | } | 2798 | } |
2804 | 2799 | ||
2805 | /* note that each of these drops i_lock for us */ | 2800 | /* note that each of these drops i_lock for us */ |
@@ -2823,6 +2818,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, | |||
2823 | ceph_cap_op_name(op)); | 2818 | ceph_cap_op_name(op)); |
2824 | } | 2819 | } |
2825 | 2820 | ||
2821 | goto done; | ||
2822 | |||
2823 | flush_cap_releases: | ||
2824 | /* | ||
2825 | * send any full release message to try to move things | ||
2826 | * along for the mds (who clearly thinks we still have this | ||
2827 | * cap). | ||
2828 | */ | ||
2829 | ceph_add_cap_releases(mdsc, session); | ||
2830 | ceph_send_cap_releases(mdsc, session); | ||
2831 | |||
2826 | done: | 2832 | done: |
2827 | mutex_unlock(&session->s_mutex); | 2833 | mutex_unlock(&session->s_mutex); |
2828 | done_unlocked: | 2834 | done_unlocked: |
diff --git a/fs/ceph/ceph_debug.h b/fs/ceph/ceph_debug.h deleted file mode 100644 index 1818c2305610..000000000000 --- a/fs/ceph/ceph_debug.h +++ /dev/null | |||
@@ -1,37 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_DEBUG_H | ||
2 | #define _FS_CEPH_DEBUG_H | ||
3 | |||
4 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
5 | |||
6 | #ifdef CONFIG_CEPH_FS_PRETTYDEBUG | ||
7 | |||
8 | /* | ||
9 | * wrap pr_debug to include a filename:lineno prefix on each line. | ||
10 | * this incurs some overhead (kernel size and execution time) due to | ||
11 | * the extra function call at each call site. | ||
12 | */ | ||
13 | |||
14 | # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) | ||
15 | extern const char *ceph_file_part(const char *s, int len); | ||
16 | # define dout(fmt, ...) \ | ||
17 | pr_debug(" %12.12s:%-4d : " fmt, \ | ||
18 | ceph_file_part(__FILE__, sizeof(__FILE__)), \ | ||
19 | __LINE__, ##__VA_ARGS__) | ||
20 | # else | ||
21 | /* faux printk call just to see any compiler warnings. */ | ||
22 | # define dout(fmt, ...) do { \ | ||
23 | if (0) \ | ||
24 | printk(KERN_DEBUG fmt, ##__VA_ARGS__); \ | ||
25 | } while (0) | ||
26 | # endif | ||
27 | |||
28 | #else | ||
29 | |||
30 | /* | ||
31 | * or, just wrap pr_debug | ||
32 | */ | ||
33 | # define dout(fmt, ...) pr_debug(" " fmt, ##__VA_ARGS__) | ||
34 | |||
35 | #endif | ||
36 | |||
37 | #endif | ||
diff --git a/fs/ceph/ceph_frag.c b/fs/ceph/ceph_frag.c index ab6cf35c4091..bdce8b1fbd06 100644 --- a/fs/ceph/ceph_frag.c +++ b/fs/ceph/ceph_frag.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * Ceph 'frag' type | 2 | * Ceph 'frag' type |
3 | */ | 3 | */ |
4 | #include "types.h" | 4 | #include <linux/module.h> |
5 | #include <linux/ceph/types.h> | ||
5 | 6 | ||
6 | int ceph_frag_compare(__u32 a, __u32 b) | 7 | int ceph_frag_compare(__u32 a, __u32 b) |
7 | { | 8 | { |
diff --git a/fs/ceph/ceph_frag.h b/fs/ceph/ceph_frag.h deleted file mode 100644 index 5babb8e95352..000000000000 --- a/fs/ceph/ceph_frag.h +++ /dev/null | |||
@@ -1,109 +0,0 @@ | |||
1 | #ifndef FS_CEPH_FRAG_H | ||
2 | #define FS_CEPH_FRAG_H | ||
3 | |||
4 | /* | ||
5 | * "Frags" are a way to describe a subset of a 32-bit number space, | ||
6 | * using a mask and a value to match against that mask. Any given frag | ||
7 | * (subset of the number space) can be partitioned into 2^n sub-frags. | ||
8 | * | ||
9 | * Frags are encoded into a 32-bit word: | ||
10 | * 8 upper bits = "bits" | ||
11 | * 24 lower bits = "value" | ||
12 | * (We could go to 5+27 bits, but who cares.) | ||
13 | * | ||
14 | * We use the _most_ significant bits of the 24 bit value. This makes | ||
15 | * values logically sort. | ||
16 | * | ||
17 | * Unfortunately, because the "bits" field is still in the high bits, we | ||
18 | * can't sort encoded frags numerically. However, it does allow you | ||
19 | * to feed encoded frags as values into frag_contains_value. | ||
20 | */ | ||
21 | static inline __u32 ceph_frag_make(__u32 b, __u32 v) | ||
22 | { | ||
23 | return (b << 24) | | ||
24 | (v & (0xffffffu << (24-b)) & 0xffffffu); | ||
25 | } | ||
26 | static inline __u32 ceph_frag_bits(__u32 f) | ||
27 | { | ||
28 | return f >> 24; | ||
29 | } | ||
30 | static inline __u32 ceph_frag_value(__u32 f) | ||
31 | { | ||
32 | return f & 0xffffffu; | ||
33 | } | ||
34 | static inline __u32 ceph_frag_mask(__u32 f) | ||
35 | { | ||
36 | return (0xffffffu << (24-ceph_frag_bits(f))) & 0xffffffu; | ||
37 | } | ||
38 | static inline __u32 ceph_frag_mask_shift(__u32 f) | ||
39 | { | ||
40 | return 24 - ceph_frag_bits(f); | ||
41 | } | ||
42 | |||
43 | static inline int ceph_frag_contains_value(__u32 f, __u32 v) | ||
44 | { | ||
45 | return (v & ceph_frag_mask(f)) == ceph_frag_value(f); | ||
46 | } | ||
47 | static inline int ceph_frag_contains_frag(__u32 f, __u32 sub) | ||
48 | { | ||
49 | /* is sub as specific as us, and contained by us? */ | ||
50 | return ceph_frag_bits(sub) >= ceph_frag_bits(f) && | ||
51 | (ceph_frag_value(sub) & ceph_frag_mask(f)) == ceph_frag_value(f); | ||
52 | } | ||
53 | |||
54 | static inline __u32 ceph_frag_parent(__u32 f) | ||
55 | { | ||
56 | return ceph_frag_make(ceph_frag_bits(f) - 1, | ||
57 | ceph_frag_value(f) & (ceph_frag_mask(f) << 1)); | ||
58 | } | ||
59 | static inline int ceph_frag_is_left_child(__u32 f) | ||
60 | { | ||
61 | return ceph_frag_bits(f) > 0 && | ||
62 | (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 0; | ||
63 | } | ||
64 | static inline int ceph_frag_is_right_child(__u32 f) | ||
65 | { | ||
66 | return ceph_frag_bits(f) > 0 && | ||
67 | (ceph_frag_value(f) & (0x1000000 >> ceph_frag_bits(f))) == 1; | ||
68 | } | ||
69 | static inline __u32 ceph_frag_sibling(__u32 f) | ||
70 | { | ||
71 | return ceph_frag_make(ceph_frag_bits(f), | ||
72 | ceph_frag_value(f) ^ (0x1000000 >> ceph_frag_bits(f))); | ||
73 | } | ||
74 | static inline __u32 ceph_frag_left_child(__u32 f) | ||
75 | { | ||
76 | return ceph_frag_make(ceph_frag_bits(f)+1, ceph_frag_value(f)); | ||
77 | } | ||
78 | static inline __u32 ceph_frag_right_child(__u32 f) | ||
79 | { | ||
80 | return ceph_frag_make(ceph_frag_bits(f)+1, | ||
81 | ceph_frag_value(f) | (0x1000000 >> (1+ceph_frag_bits(f)))); | ||
82 | } | ||
83 | static inline __u32 ceph_frag_make_child(__u32 f, int by, int i) | ||
84 | { | ||
85 | int newbits = ceph_frag_bits(f) + by; | ||
86 | return ceph_frag_make(newbits, | ||
87 | ceph_frag_value(f) | (i << (24 - newbits))); | ||
88 | } | ||
89 | static inline int ceph_frag_is_leftmost(__u32 f) | ||
90 | { | ||
91 | return ceph_frag_value(f) == 0; | ||
92 | } | ||
93 | static inline int ceph_frag_is_rightmost(__u32 f) | ||
94 | { | ||
95 | return ceph_frag_value(f) == ceph_frag_mask(f); | ||
96 | } | ||
97 | static inline __u32 ceph_frag_next(__u32 f) | ||
98 | { | ||
99 | return ceph_frag_make(ceph_frag_bits(f), | ||
100 | ceph_frag_value(f) + (0x1000000 >> ceph_frag_bits(f))); | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * comparator to sort frags logically, as when traversing the | ||
105 | * number space in ascending order... | ||
106 | */ | ||
107 | int ceph_frag_compare(__u32 a, __u32 b); | ||
108 | |||
109 | #endif | ||
diff --git a/fs/ceph/ceph_fs.c b/fs/ceph/ceph_fs.c deleted file mode 100644 index 3ac6cc7c1156..000000000000 --- a/fs/ceph/ceph_fs.c +++ /dev/null | |||
@@ -1,72 +0,0 @@ | |||
1 | /* | ||
2 | * Some non-inline ceph helpers | ||
3 | */ | ||
4 | #include "types.h" | ||
5 | |||
6 | /* | ||
7 | * return true if @layout appears to be valid | ||
8 | */ | ||
9 | int ceph_file_layout_is_valid(const struct ceph_file_layout *layout) | ||
10 | { | ||
11 | __u32 su = le32_to_cpu(layout->fl_stripe_unit); | ||
12 | __u32 sc = le32_to_cpu(layout->fl_stripe_count); | ||
13 | __u32 os = le32_to_cpu(layout->fl_object_size); | ||
14 | |||
15 | /* stripe unit, object size must be non-zero, 64k increment */ | ||
16 | if (!su || (su & (CEPH_MIN_STRIPE_UNIT-1))) | ||
17 | return 0; | ||
18 | if (!os || (os & (CEPH_MIN_STRIPE_UNIT-1))) | ||
19 | return 0; | ||
20 | /* object size must be a multiple of stripe unit */ | ||
21 | if (os < su || os % su) | ||
22 | return 0; | ||
23 | /* stripe count must be non-zero */ | ||
24 | if (!sc) | ||
25 | return 0; | ||
26 | return 1; | ||
27 | } | ||
28 | |||
29 | |||
30 | int ceph_flags_to_mode(int flags) | ||
31 | { | ||
32 | int mode; | ||
33 | |||
34 | #ifdef O_DIRECTORY /* fixme */ | ||
35 | if ((flags & O_DIRECTORY) == O_DIRECTORY) | ||
36 | return CEPH_FILE_MODE_PIN; | ||
37 | #endif | ||
38 | if ((flags & O_APPEND) == O_APPEND) | ||
39 | flags |= O_WRONLY; | ||
40 | |||
41 | if ((flags & O_ACCMODE) == O_RDWR) | ||
42 | mode = CEPH_FILE_MODE_RDWR; | ||
43 | else if ((flags & O_ACCMODE) == O_WRONLY) | ||
44 | mode = CEPH_FILE_MODE_WR; | ||
45 | else | ||
46 | mode = CEPH_FILE_MODE_RD; | ||
47 | |||
48 | #ifdef O_LAZY | ||
49 | if (flags & O_LAZY) | ||
50 | mode |= CEPH_FILE_MODE_LAZY; | ||
51 | #endif | ||
52 | |||
53 | return mode; | ||
54 | } | ||
55 | |||
56 | int ceph_caps_for_mode(int mode) | ||
57 | { | ||
58 | int caps = CEPH_CAP_PIN; | ||
59 | |||
60 | if (mode & CEPH_FILE_MODE_RD) | ||
61 | caps |= CEPH_CAP_FILE_SHARED | | ||
62 | CEPH_CAP_FILE_RD | CEPH_CAP_FILE_CACHE; | ||
63 | if (mode & CEPH_FILE_MODE_WR) | ||
64 | caps |= CEPH_CAP_FILE_EXCL | | ||
65 | CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | | ||
66 | CEPH_CAP_AUTH_SHARED | CEPH_CAP_AUTH_EXCL | | ||
67 | CEPH_CAP_XATTR_SHARED | CEPH_CAP_XATTR_EXCL; | ||
68 | if (mode & CEPH_FILE_MODE_LAZY) | ||
69 | caps |= CEPH_CAP_FILE_LAZYIO; | ||
70 | |||
71 | return caps; | ||
72 | } | ||
diff --git a/fs/ceph/ceph_fs.h b/fs/ceph/ceph_fs.h deleted file mode 100644 index d5619ac86711..000000000000 --- a/fs/ceph/ceph_fs.h +++ /dev/null | |||
@@ -1,728 +0,0 @@ | |||
1 | /* | ||
2 | * ceph_fs.h - Ceph constants and data types to share between kernel and | ||
3 | * user space. | ||
4 | * | ||
5 | * Most types in this file are defined as little-endian, and are | ||
6 | * primarily intended to describe data structures that pass over the | ||
7 | * wire or that are stored on disk. | ||
8 | * | ||
9 | * LGPL2 | ||
10 | */ | ||
11 | |||
12 | #ifndef CEPH_FS_H | ||
13 | #define CEPH_FS_H | ||
14 | |||
15 | #include "msgr.h" | ||
16 | #include "rados.h" | ||
17 | |||
18 | /* | ||
19 | * subprotocol versions. when specific messages types or high-level | ||
20 | * protocols change, bump the affected components. we keep rev | ||
21 | * internal cluster protocols separately from the public, | ||
22 | * client-facing protocol. | ||
23 | */ | ||
24 | #define CEPH_OSD_PROTOCOL 8 /* cluster internal */ | ||
25 | #define CEPH_MDS_PROTOCOL 12 /* cluster internal */ | ||
26 | #define CEPH_MON_PROTOCOL 5 /* cluster internal */ | ||
27 | #define CEPH_OSDC_PROTOCOL 24 /* server/client */ | ||
28 | #define CEPH_MDSC_PROTOCOL 32 /* server/client */ | ||
29 | #define CEPH_MONC_PROTOCOL 15 /* server/client */ | ||
30 | |||
31 | |||
32 | #define CEPH_INO_ROOT 1 | ||
33 | #define CEPH_INO_CEPH 2 /* hidden .ceph dir */ | ||
34 | |||
35 | /* arbitrary limit on max # of monitors (cluster of 3 is typical) */ | ||
36 | #define CEPH_MAX_MON 31 | ||
37 | |||
38 | |||
39 | /* | ||
40 | * feature bits | ||
41 | */ | ||
42 | #define CEPH_FEATURE_UID (1<<0) | ||
43 | #define CEPH_FEATURE_NOSRCADDR (1<<1) | ||
44 | #define CEPH_FEATURE_MONCLOCKCHECK (1<<2) | ||
45 | #define CEPH_FEATURE_FLOCK (1<<3) | ||
46 | |||
47 | |||
48 | /* | ||
49 | * ceph_file_layout - describe data layout for a file/inode | ||
50 | */ | ||
51 | struct ceph_file_layout { | ||
52 | /* file -> object mapping */ | ||
53 | __le32 fl_stripe_unit; /* stripe unit, in bytes. must be multiple | ||
54 | of page size. */ | ||
55 | __le32 fl_stripe_count; /* over this many objects */ | ||
56 | __le32 fl_object_size; /* until objects are this big, then move to | ||
57 | new objects */ | ||
58 | __le32 fl_cas_hash; /* 0 = none; 1 = sha256 */ | ||
59 | |||
60 | /* pg -> disk layout */ | ||
61 | __le32 fl_object_stripe_unit; /* for per-object parity, if any */ | ||
62 | |||
63 | /* object -> pg layout */ | ||
64 | __le32 fl_pg_preferred; /* preferred primary for pg (-1 for none) */ | ||
65 | __le32 fl_pg_pool; /* namespace, crush ruleset, rep level */ | ||
66 | } __attribute__ ((packed)); | ||
67 | |||
68 | #define CEPH_MIN_STRIPE_UNIT 65536 | ||
69 | |||
70 | int ceph_file_layout_is_valid(const struct ceph_file_layout *layout); | ||
71 | |||
72 | |||
73 | /* crypto algorithms */ | ||
74 | #define CEPH_CRYPTO_NONE 0x0 | ||
75 | #define CEPH_CRYPTO_AES 0x1 | ||
76 | |||
77 | #define CEPH_AES_IV "cephsageyudagreg" | ||
78 | |||
79 | /* security/authentication protocols */ | ||
80 | #define CEPH_AUTH_UNKNOWN 0x0 | ||
81 | #define CEPH_AUTH_NONE 0x1 | ||
82 | #define CEPH_AUTH_CEPHX 0x2 | ||
83 | |||
84 | #define CEPH_AUTH_UID_DEFAULT ((__u64) -1) | ||
85 | |||
86 | |||
87 | /********************************************* | ||
88 | * message layer | ||
89 | */ | ||
90 | |||
91 | /* | ||
92 | * message types | ||
93 | */ | ||
94 | |||
95 | /* misc */ | ||
96 | #define CEPH_MSG_SHUTDOWN 1 | ||
97 | #define CEPH_MSG_PING 2 | ||
98 | |||
99 | /* client <-> monitor */ | ||
100 | #define CEPH_MSG_MON_MAP 4 | ||
101 | #define CEPH_MSG_MON_GET_MAP 5 | ||
102 | #define CEPH_MSG_STATFS 13 | ||
103 | #define CEPH_MSG_STATFS_REPLY 14 | ||
104 | #define CEPH_MSG_MON_SUBSCRIBE 15 | ||
105 | #define CEPH_MSG_MON_SUBSCRIBE_ACK 16 | ||
106 | #define CEPH_MSG_AUTH 17 | ||
107 | #define CEPH_MSG_AUTH_REPLY 18 | ||
108 | |||
109 | /* client <-> mds */ | ||
110 | #define CEPH_MSG_MDS_MAP 21 | ||
111 | |||
112 | #define CEPH_MSG_CLIENT_SESSION 22 | ||
113 | #define CEPH_MSG_CLIENT_RECONNECT 23 | ||
114 | |||
115 | #define CEPH_MSG_CLIENT_REQUEST 24 | ||
116 | #define CEPH_MSG_CLIENT_REQUEST_FORWARD 25 | ||
117 | #define CEPH_MSG_CLIENT_REPLY 26 | ||
118 | #define CEPH_MSG_CLIENT_CAPS 0x310 | ||
119 | #define CEPH_MSG_CLIENT_LEASE 0x311 | ||
120 | #define CEPH_MSG_CLIENT_SNAP 0x312 | ||
121 | #define CEPH_MSG_CLIENT_CAPRELEASE 0x313 | ||
122 | |||
123 | /* pool ops */ | ||
124 | #define CEPH_MSG_POOLOP_REPLY 48 | ||
125 | #define CEPH_MSG_POOLOP 49 | ||
126 | |||
127 | |||
128 | /* osd */ | ||
129 | #define CEPH_MSG_OSD_MAP 41 | ||
130 | #define CEPH_MSG_OSD_OP 42 | ||
131 | #define CEPH_MSG_OSD_OPREPLY 43 | ||
132 | |||
133 | /* pool operations */ | ||
134 | enum { | ||
135 | POOL_OP_CREATE = 0x01, | ||
136 | POOL_OP_DELETE = 0x02, | ||
137 | POOL_OP_AUID_CHANGE = 0x03, | ||
138 | POOL_OP_CREATE_SNAP = 0x11, | ||
139 | POOL_OP_DELETE_SNAP = 0x12, | ||
140 | POOL_OP_CREATE_UNMANAGED_SNAP = 0x21, | ||
141 | POOL_OP_DELETE_UNMANAGED_SNAP = 0x22, | ||
142 | }; | ||
143 | |||
144 | struct ceph_mon_request_header { | ||
145 | __le64 have_version; | ||
146 | __le16 session_mon; | ||
147 | __le64 session_mon_tid; | ||
148 | } __attribute__ ((packed)); | ||
149 | |||
150 | struct ceph_mon_statfs { | ||
151 | struct ceph_mon_request_header monhdr; | ||
152 | struct ceph_fsid fsid; | ||
153 | } __attribute__ ((packed)); | ||
154 | |||
155 | struct ceph_statfs { | ||
156 | __le64 kb, kb_used, kb_avail; | ||
157 | __le64 num_objects; | ||
158 | } __attribute__ ((packed)); | ||
159 | |||
160 | struct ceph_mon_statfs_reply { | ||
161 | struct ceph_fsid fsid; | ||
162 | __le64 version; | ||
163 | struct ceph_statfs st; | ||
164 | } __attribute__ ((packed)); | ||
165 | |||
166 | const char *ceph_pool_op_name(int op); | ||
167 | |||
168 | struct ceph_mon_poolop { | ||
169 | struct ceph_mon_request_header monhdr; | ||
170 | struct ceph_fsid fsid; | ||
171 | __le32 pool; | ||
172 | __le32 op; | ||
173 | __le64 auid; | ||
174 | __le64 snapid; | ||
175 | __le32 name_len; | ||
176 | } __attribute__ ((packed)); | ||
177 | |||
178 | struct ceph_mon_poolop_reply { | ||
179 | struct ceph_mon_request_header monhdr; | ||
180 | struct ceph_fsid fsid; | ||
181 | __le32 reply_code; | ||
182 | __le32 epoch; | ||
183 | char has_data; | ||
184 | char data[0]; | ||
185 | } __attribute__ ((packed)); | ||
186 | |||
187 | struct ceph_mon_unmanaged_snap { | ||
188 | __le64 snapid; | ||
189 | } __attribute__ ((packed)); | ||
190 | |||
191 | struct ceph_osd_getmap { | ||
192 | struct ceph_mon_request_header monhdr; | ||
193 | struct ceph_fsid fsid; | ||
194 | __le32 start; | ||
195 | } __attribute__ ((packed)); | ||
196 | |||
197 | struct ceph_mds_getmap { | ||
198 | struct ceph_mon_request_header monhdr; | ||
199 | struct ceph_fsid fsid; | ||
200 | } __attribute__ ((packed)); | ||
201 | |||
202 | struct ceph_client_mount { | ||
203 | struct ceph_mon_request_header monhdr; | ||
204 | } __attribute__ ((packed)); | ||
205 | |||
206 | struct ceph_mon_subscribe_item { | ||
207 | __le64 have_version; __le64 have; | ||
208 | __u8 onetime; | ||
209 | } __attribute__ ((packed)); | ||
210 | |||
211 | struct ceph_mon_subscribe_ack { | ||
212 | __le32 duration; /* seconds */ | ||
213 | struct ceph_fsid fsid; | ||
214 | } __attribute__ ((packed)); | ||
215 | |||
216 | /* | ||
217 | * mds states | ||
218 | * > 0 -> in | ||
219 | * <= 0 -> out | ||
220 | */ | ||
221 | #define CEPH_MDS_STATE_DNE 0 /* down, does not exist. */ | ||
222 | #define CEPH_MDS_STATE_STOPPED -1 /* down, once existed, but no subtrees. | ||
223 | empty log. */ | ||
224 | #define CEPH_MDS_STATE_BOOT -4 /* up, boot announcement. */ | ||
225 | #define CEPH_MDS_STATE_STANDBY -5 /* up, idle. waiting for assignment. */ | ||
226 | #define CEPH_MDS_STATE_CREATING -6 /* up, creating MDS instance. */ | ||
227 | #define CEPH_MDS_STATE_STARTING -7 /* up, starting previously stopped mds */ | ||
228 | #define CEPH_MDS_STATE_STANDBY_REPLAY -8 /* up, tailing active node's journal */ | ||
229 | |||
230 | #define CEPH_MDS_STATE_REPLAY 8 /* up, replaying journal. */ | ||
231 | #define CEPH_MDS_STATE_RESOLVE 9 /* up, disambiguating distributed | ||
232 | operations (import, rename, etc.) */ | ||
233 | #define CEPH_MDS_STATE_RECONNECT 10 /* up, reconnect to clients */ | ||
234 | #define CEPH_MDS_STATE_REJOIN 11 /* up, rejoining distributed cache */ | ||
235 | #define CEPH_MDS_STATE_CLIENTREPLAY 12 /* up, replaying client operations */ | ||
236 | #define CEPH_MDS_STATE_ACTIVE 13 /* up, active */ | ||
237 | #define CEPH_MDS_STATE_STOPPING 14 /* up, but exporting metadata */ | ||
238 | |||
239 | extern const char *ceph_mds_state_name(int s); | ||
240 | |||
241 | |||
242 | /* | ||
243 | * metadata lock types. | ||
244 | * - these are bitmasks.. we can compose them | ||
245 | * - they also define the lock ordering by the MDS | ||
246 | * - a few of these are internal to the mds | ||
247 | */ | ||
248 | #define CEPH_LOCK_DVERSION 1 | ||
249 | #define CEPH_LOCK_DN 2 | ||
250 | #define CEPH_LOCK_ISNAP 16 | ||
251 | #define CEPH_LOCK_IVERSION 32 /* mds internal */ | ||
252 | #define CEPH_LOCK_IFILE 64 | ||
253 | #define CEPH_LOCK_IAUTH 128 | ||
254 | #define CEPH_LOCK_ILINK 256 | ||
255 | #define CEPH_LOCK_IDFT 512 /* dir frag tree */ | ||
256 | #define CEPH_LOCK_INEST 1024 /* mds internal */ | ||
257 | #define CEPH_LOCK_IXATTR 2048 | ||
258 | #define CEPH_LOCK_IFLOCK 4096 /* advisory file locks */ | ||
259 | #define CEPH_LOCK_INO 8192 /* immutable inode bits; not a lock */ | ||
260 | |||
261 | /* client_session ops */ | ||
262 | enum { | ||
263 | CEPH_SESSION_REQUEST_OPEN, | ||
264 | CEPH_SESSION_OPEN, | ||
265 | CEPH_SESSION_REQUEST_CLOSE, | ||
266 | CEPH_SESSION_CLOSE, | ||
267 | CEPH_SESSION_REQUEST_RENEWCAPS, | ||
268 | CEPH_SESSION_RENEWCAPS, | ||
269 | CEPH_SESSION_STALE, | ||
270 | CEPH_SESSION_RECALL_STATE, | ||
271 | }; | ||
272 | |||
273 | extern const char *ceph_session_op_name(int op); | ||
274 | |||
275 | struct ceph_mds_session_head { | ||
276 | __le32 op; | ||
277 | __le64 seq; | ||
278 | struct ceph_timespec stamp; | ||
279 | __le32 max_caps, max_leases; | ||
280 | } __attribute__ ((packed)); | ||
281 | |||
282 | /* client_request */ | ||
283 | /* | ||
284 | * metadata ops. | ||
285 | * & 0x001000 -> write op | ||
286 | * & 0x010000 -> follow symlink (e.g. stat(), not lstat()). | ||
287 | & & 0x100000 -> use weird ino/path trace | ||
288 | */ | ||
289 | #define CEPH_MDS_OP_WRITE 0x001000 | ||
290 | enum { | ||
291 | CEPH_MDS_OP_LOOKUP = 0x00100, | ||
292 | CEPH_MDS_OP_GETATTR = 0x00101, | ||
293 | CEPH_MDS_OP_LOOKUPHASH = 0x00102, | ||
294 | CEPH_MDS_OP_LOOKUPPARENT = 0x00103, | ||
295 | |||
296 | CEPH_MDS_OP_SETXATTR = 0x01105, | ||
297 | CEPH_MDS_OP_RMXATTR = 0x01106, | ||
298 | CEPH_MDS_OP_SETLAYOUT = 0x01107, | ||
299 | CEPH_MDS_OP_SETATTR = 0x01108, | ||
300 | CEPH_MDS_OP_SETFILELOCK= 0x01109, | ||
301 | CEPH_MDS_OP_GETFILELOCK= 0x00110, | ||
302 | |||
303 | CEPH_MDS_OP_MKNOD = 0x01201, | ||
304 | CEPH_MDS_OP_LINK = 0x01202, | ||
305 | CEPH_MDS_OP_UNLINK = 0x01203, | ||
306 | CEPH_MDS_OP_RENAME = 0x01204, | ||
307 | CEPH_MDS_OP_MKDIR = 0x01220, | ||
308 | CEPH_MDS_OP_RMDIR = 0x01221, | ||
309 | CEPH_MDS_OP_SYMLINK = 0x01222, | ||
310 | |||
311 | CEPH_MDS_OP_CREATE = 0x01301, | ||
312 | CEPH_MDS_OP_OPEN = 0x00302, | ||
313 | CEPH_MDS_OP_READDIR = 0x00305, | ||
314 | |||
315 | CEPH_MDS_OP_LOOKUPSNAP = 0x00400, | ||
316 | CEPH_MDS_OP_MKSNAP = 0x01400, | ||
317 | CEPH_MDS_OP_RMSNAP = 0x01401, | ||
318 | CEPH_MDS_OP_LSSNAP = 0x00402, | ||
319 | }; | ||
320 | |||
321 | extern const char *ceph_mds_op_name(int op); | ||
322 | |||
323 | |||
324 | #define CEPH_SETATTR_MODE 1 | ||
325 | #define CEPH_SETATTR_UID 2 | ||
326 | #define CEPH_SETATTR_GID 4 | ||
327 | #define CEPH_SETATTR_MTIME 8 | ||
328 | #define CEPH_SETATTR_ATIME 16 | ||
329 | #define CEPH_SETATTR_SIZE 32 | ||
330 | #define CEPH_SETATTR_CTIME 64 | ||
331 | |||
332 | union ceph_mds_request_args { | ||
333 | struct { | ||
334 | __le32 mask; /* CEPH_CAP_* */ | ||
335 | } __attribute__ ((packed)) getattr; | ||
336 | struct { | ||
337 | __le32 mode; | ||
338 | __le32 uid; | ||
339 | __le32 gid; | ||
340 | struct ceph_timespec mtime; | ||
341 | struct ceph_timespec atime; | ||
342 | __le64 size, old_size; /* old_size needed by truncate */ | ||
343 | __le32 mask; /* CEPH_SETATTR_* */ | ||
344 | } __attribute__ ((packed)) setattr; | ||
345 | struct { | ||
346 | __le32 frag; /* which dir fragment */ | ||
347 | __le32 max_entries; /* how many dentries to grab */ | ||
348 | __le32 max_bytes; | ||
349 | } __attribute__ ((packed)) readdir; | ||
350 | struct { | ||
351 | __le32 mode; | ||
352 | __le32 rdev; | ||
353 | } __attribute__ ((packed)) mknod; | ||
354 | struct { | ||
355 | __le32 mode; | ||
356 | } __attribute__ ((packed)) mkdir; | ||
357 | struct { | ||
358 | __le32 flags; | ||
359 | __le32 mode; | ||
360 | __le32 stripe_unit; /* layout for newly created file */ | ||
361 | __le32 stripe_count; /* ... */ | ||
362 | __le32 object_size; | ||
363 | __le32 file_replication; | ||
364 | __le32 preferred; | ||
365 | } __attribute__ ((packed)) open; | ||
366 | struct { | ||
367 | __le32 flags; | ||
368 | } __attribute__ ((packed)) setxattr; | ||
369 | struct { | ||
370 | struct ceph_file_layout layout; | ||
371 | } __attribute__ ((packed)) setlayout; | ||
372 | struct { | ||
373 | __u8 rule; /* currently fcntl or flock */ | ||
374 | __u8 type; /* shared, exclusive, remove*/ | ||
375 | __le64 pid; /* process id requesting the lock */ | ||
376 | __le64 pid_namespace; | ||
377 | __le64 start; /* initial location to lock */ | ||
378 | __le64 length; /* num bytes to lock from start */ | ||
379 | __u8 wait; /* will caller wait for lock to become available? */ | ||
380 | } __attribute__ ((packed)) filelock_change; | ||
381 | } __attribute__ ((packed)); | ||
382 | |||
383 | #define CEPH_MDS_FLAG_REPLAY 1 /* this is a replayed op */ | ||
384 | #define CEPH_MDS_FLAG_WANT_DENTRY 2 /* want dentry in reply */ | ||
385 | |||
386 | struct ceph_mds_request_head { | ||
387 | __le64 oldest_client_tid; | ||
388 | __le32 mdsmap_epoch; /* on client */ | ||
389 | __le32 flags; /* CEPH_MDS_FLAG_* */ | ||
390 | __u8 num_retry, num_fwd; /* count retry, fwd attempts */ | ||
391 | __le16 num_releases; /* # include cap/lease release records */ | ||
392 | __le32 op; /* mds op code */ | ||
393 | __le32 caller_uid, caller_gid; | ||
394 | __le64 ino; /* use this ino for openc, mkdir, mknod, | ||
395 | etc. (if replaying) */ | ||
396 | union ceph_mds_request_args args; | ||
397 | } __attribute__ ((packed)); | ||
398 | |||
399 | /* cap/lease release record */ | ||
400 | struct ceph_mds_request_release { | ||
401 | __le64 ino, cap_id; /* ino and unique cap id */ | ||
402 | __le32 caps, wanted; /* new issued, wanted */ | ||
403 | __le32 seq, issue_seq, mseq; | ||
404 | __le32 dname_seq; /* if releasing a dentry lease, a */ | ||
405 | __le32 dname_len; /* string follows. */ | ||
406 | } __attribute__ ((packed)); | ||
407 | |||
408 | /* client reply */ | ||
409 | struct ceph_mds_reply_head { | ||
410 | __le32 op; | ||
411 | __le32 result; | ||
412 | __le32 mdsmap_epoch; | ||
413 | __u8 safe; /* true if committed to disk */ | ||
414 | __u8 is_dentry, is_target; /* true if dentry, target inode records | ||
415 | are included with reply */ | ||
416 | } __attribute__ ((packed)); | ||
417 | |||
418 | /* one for each node split */ | ||
419 | struct ceph_frag_tree_split { | ||
420 | __le32 frag; /* this frag splits... */ | ||
421 | __le32 by; /* ...by this many bits */ | ||
422 | } __attribute__ ((packed)); | ||
423 | |||
424 | struct ceph_frag_tree_head { | ||
425 | __le32 nsplits; /* num ceph_frag_tree_split records */ | ||
426 | struct ceph_frag_tree_split splits[]; | ||
427 | } __attribute__ ((packed)); | ||
428 | |||
429 | /* capability issue, for bundling with mds reply */ | ||
430 | struct ceph_mds_reply_cap { | ||
431 | __le32 caps, wanted; /* caps issued, wanted */ | ||
432 | __le64 cap_id; | ||
433 | __le32 seq, mseq; | ||
434 | __le64 realm; /* snap realm */ | ||
435 | __u8 flags; /* CEPH_CAP_FLAG_* */ | ||
436 | } __attribute__ ((packed)); | ||
437 | |||
438 | #define CEPH_CAP_FLAG_AUTH 1 /* cap is issued by auth mds */ | ||
439 | |||
440 | /* inode record, for bundling with mds reply */ | ||
441 | struct ceph_mds_reply_inode { | ||
442 | __le64 ino; | ||
443 | __le64 snapid; | ||
444 | __le32 rdev; | ||
445 | __le64 version; /* inode version */ | ||
446 | __le64 xattr_version; /* version for xattr blob */ | ||
447 | struct ceph_mds_reply_cap cap; /* caps issued for this inode */ | ||
448 | struct ceph_file_layout layout; | ||
449 | struct ceph_timespec ctime, mtime, atime; | ||
450 | __le32 time_warp_seq; | ||
451 | __le64 size, max_size, truncate_size; | ||
452 | __le32 truncate_seq; | ||
453 | __le32 mode, uid, gid; | ||
454 | __le32 nlink; | ||
455 | __le64 files, subdirs, rbytes, rfiles, rsubdirs; /* dir stats */ | ||
456 | struct ceph_timespec rctime; | ||
457 | struct ceph_frag_tree_head fragtree; /* (must be at end of struct) */ | ||
458 | } __attribute__ ((packed)); | ||
459 | /* followed by frag array, then symlink string, then xattr blob */ | ||
460 | |||
461 | /* reply_lease follows dname, and reply_inode */ | ||
462 | struct ceph_mds_reply_lease { | ||
463 | __le16 mask; /* lease type(s) */ | ||
464 | __le32 duration_ms; /* lease duration */ | ||
465 | __le32 seq; | ||
466 | } __attribute__ ((packed)); | ||
467 | |||
468 | struct ceph_mds_reply_dirfrag { | ||
469 | __le32 frag; /* fragment */ | ||
470 | __le32 auth; /* auth mds, if this is a delegation point */ | ||
471 | __le32 ndist; /* number of mds' this is replicated on */ | ||
472 | __le32 dist[]; | ||
473 | } __attribute__ ((packed)); | ||
474 | |||
475 | #define CEPH_LOCK_FCNTL 1 | ||
476 | #define CEPH_LOCK_FLOCK 2 | ||
477 | |||
478 | #define CEPH_LOCK_SHARED 1 | ||
479 | #define CEPH_LOCK_EXCL 2 | ||
480 | #define CEPH_LOCK_UNLOCK 4 | ||
481 | |||
482 | struct ceph_filelock { | ||
483 | __le64 start;/* file offset to start lock at */ | ||
484 | __le64 length; /* num bytes to lock; 0 for all following start */ | ||
485 | __le64 client; /* which client holds the lock */ | ||
486 | __le64 pid; /* process id holding the lock on the client */ | ||
487 | __le64 pid_namespace; | ||
488 | __u8 type; /* shared lock, exclusive lock, or unlock */ | ||
489 | } __attribute__ ((packed)); | ||
490 | |||
491 | |||
492 | /* file access modes */ | ||
493 | #define CEPH_FILE_MODE_PIN 0 | ||
494 | #define CEPH_FILE_MODE_RD 1 | ||
495 | #define CEPH_FILE_MODE_WR 2 | ||
496 | #define CEPH_FILE_MODE_RDWR 3 /* RD | WR */ | ||
497 | #define CEPH_FILE_MODE_LAZY 4 /* lazy io */ | ||
498 | #define CEPH_FILE_MODE_NUM 8 /* bc these are bit fields.. mostly */ | ||
499 | |||
500 | int ceph_flags_to_mode(int flags); | ||
501 | |||
502 | |||
503 | /* capability bits */ | ||
504 | #define CEPH_CAP_PIN 1 /* no specific capabilities beyond the pin */ | ||
505 | |||
506 | /* generic cap bits */ | ||
507 | #define CEPH_CAP_GSHARED 1 /* client can reads */ | ||
508 | #define CEPH_CAP_GEXCL 2 /* client can read and update */ | ||
509 | #define CEPH_CAP_GCACHE 4 /* (file) client can cache reads */ | ||
510 | #define CEPH_CAP_GRD 8 /* (file) client can read */ | ||
511 | #define CEPH_CAP_GWR 16 /* (file) client can write */ | ||
512 | #define CEPH_CAP_GBUFFER 32 /* (file) client can buffer writes */ | ||
513 | #define CEPH_CAP_GWREXTEND 64 /* (file) client can extend EOF */ | ||
514 | #define CEPH_CAP_GLAZYIO 128 /* (file) client can perform lazy io */ | ||
515 | |||
516 | /* per-lock shift */ | ||
517 | #define CEPH_CAP_SAUTH 2 | ||
518 | #define CEPH_CAP_SLINK 4 | ||
519 | #define CEPH_CAP_SXATTR 6 | ||
520 | #define CEPH_CAP_SFILE 8 | ||
521 | #define CEPH_CAP_SFLOCK 20 | ||
522 | |||
523 | #define CEPH_CAP_BITS 22 | ||
524 | |||
525 | /* composed values */ | ||
526 | #define CEPH_CAP_AUTH_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SAUTH) | ||
527 | #define CEPH_CAP_AUTH_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SAUTH) | ||
528 | #define CEPH_CAP_LINK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SLINK) | ||
529 | #define CEPH_CAP_LINK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SLINK) | ||
530 | #define CEPH_CAP_XATTR_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SXATTR) | ||
531 | #define CEPH_CAP_XATTR_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SXATTR) | ||
532 | #define CEPH_CAP_FILE(x) (x << CEPH_CAP_SFILE) | ||
533 | #define CEPH_CAP_FILE_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFILE) | ||
534 | #define CEPH_CAP_FILE_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFILE) | ||
535 | #define CEPH_CAP_FILE_CACHE (CEPH_CAP_GCACHE << CEPH_CAP_SFILE) | ||
536 | #define CEPH_CAP_FILE_RD (CEPH_CAP_GRD << CEPH_CAP_SFILE) | ||
537 | #define CEPH_CAP_FILE_WR (CEPH_CAP_GWR << CEPH_CAP_SFILE) | ||
538 | #define CEPH_CAP_FILE_BUFFER (CEPH_CAP_GBUFFER << CEPH_CAP_SFILE) | ||
539 | #define CEPH_CAP_FILE_WREXTEND (CEPH_CAP_GWREXTEND << CEPH_CAP_SFILE) | ||
540 | #define CEPH_CAP_FILE_LAZYIO (CEPH_CAP_GLAZYIO << CEPH_CAP_SFILE) | ||
541 | #define CEPH_CAP_FLOCK_SHARED (CEPH_CAP_GSHARED << CEPH_CAP_SFLOCK) | ||
542 | #define CEPH_CAP_FLOCK_EXCL (CEPH_CAP_GEXCL << CEPH_CAP_SFLOCK) | ||
543 | |||
544 | |||
545 | /* cap masks (for getattr) */ | ||
546 | #define CEPH_STAT_CAP_INODE CEPH_CAP_PIN | ||
547 | #define CEPH_STAT_CAP_TYPE CEPH_CAP_PIN /* mode >> 12 */ | ||
548 | #define CEPH_STAT_CAP_SYMLINK CEPH_CAP_PIN | ||
549 | #define CEPH_STAT_CAP_UID CEPH_CAP_AUTH_SHARED | ||
550 | #define CEPH_STAT_CAP_GID CEPH_CAP_AUTH_SHARED | ||
551 | #define CEPH_STAT_CAP_MODE CEPH_CAP_AUTH_SHARED | ||
552 | #define CEPH_STAT_CAP_NLINK CEPH_CAP_LINK_SHARED | ||
553 | #define CEPH_STAT_CAP_LAYOUT CEPH_CAP_FILE_SHARED | ||
554 | #define CEPH_STAT_CAP_MTIME CEPH_CAP_FILE_SHARED | ||
555 | #define CEPH_STAT_CAP_SIZE CEPH_CAP_FILE_SHARED | ||
556 | #define CEPH_STAT_CAP_ATIME CEPH_CAP_FILE_SHARED /* fixme */ | ||
557 | #define CEPH_STAT_CAP_XATTR CEPH_CAP_XATTR_SHARED | ||
558 | #define CEPH_STAT_CAP_INODE_ALL (CEPH_CAP_PIN | \ | ||
559 | CEPH_CAP_AUTH_SHARED | \ | ||
560 | CEPH_CAP_LINK_SHARED | \ | ||
561 | CEPH_CAP_FILE_SHARED | \ | ||
562 | CEPH_CAP_XATTR_SHARED) | ||
563 | |||
564 | #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED | \ | ||
565 | CEPH_CAP_LINK_SHARED | \ | ||
566 | CEPH_CAP_XATTR_SHARED | \ | ||
567 | CEPH_CAP_FILE_SHARED) | ||
568 | #define CEPH_CAP_ANY_RD (CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_RD | \ | ||
569 | CEPH_CAP_FILE_CACHE) | ||
570 | |||
571 | #define CEPH_CAP_ANY_EXCL (CEPH_CAP_AUTH_EXCL | \ | ||
572 | CEPH_CAP_LINK_EXCL | \ | ||
573 | CEPH_CAP_XATTR_EXCL | \ | ||
574 | CEPH_CAP_FILE_EXCL) | ||
575 | #define CEPH_CAP_ANY_FILE_WR (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_BUFFER | \ | ||
576 | CEPH_CAP_FILE_EXCL) | ||
577 | #define CEPH_CAP_ANY_WR (CEPH_CAP_ANY_EXCL | CEPH_CAP_ANY_FILE_WR) | ||
578 | #define CEPH_CAP_ANY (CEPH_CAP_ANY_RD | CEPH_CAP_ANY_EXCL | \ | ||
579 | CEPH_CAP_ANY_FILE_WR | CEPH_CAP_FILE_LAZYIO | \ | ||
580 | CEPH_CAP_PIN) | ||
581 | |||
582 | #define CEPH_CAP_LOCKS (CEPH_LOCK_IFILE | CEPH_LOCK_IAUTH | CEPH_LOCK_ILINK | \ | ||
583 | CEPH_LOCK_IXATTR) | ||
584 | |||
585 | int ceph_caps_for_mode(int mode); | ||
586 | |||
587 | enum { | ||
588 | CEPH_CAP_OP_GRANT, /* mds->client grant */ | ||
589 | CEPH_CAP_OP_REVOKE, /* mds->client revoke */ | ||
590 | CEPH_CAP_OP_TRUNC, /* mds->client trunc notify */ | ||
591 | CEPH_CAP_OP_EXPORT, /* mds has exported the cap */ | ||
592 | CEPH_CAP_OP_IMPORT, /* mds has imported the cap */ | ||
593 | CEPH_CAP_OP_UPDATE, /* client->mds update */ | ||
594 | CEPH_CAP_OP_DROP, /* client->mds drop cap bits */ | ||
595 | CEPH_CAP_OP_FLUSH, /* client->mds cap writeback */ | ||
596 | CEPH_CAP_OP_FLUSH_ACK, /* mds->client flushed */ | ||
597 | CEPH_CAP_OP_FLUSHSNAP, /* client->mds flush snapped metadata */ | ||
598 | CEPH_CAP_OP_FLUSHSNAP_ACK, /* mds->client flushed snapped metadata */ | ||
599 | CEPH_CAP_OP_RELEASE, /* client->mds release (clean) cap */ | ||
600 | CEPH_CAP_OP_RENEW, /* client->mds renewal request */ | ||
601 | }; | ||
602 | |||
603 | extern const char *ceph_cap_op_name(int op); | ||
604 | |||
605 | /* | ||
606 | * caps message, used for capability callbacks, acks, requests, etc. | ||
607 | */ | ||
608 | struct ceph_mds_caps { | ||
609 | __le32 op; /* CEPH_CAP_OP_* */ | ||
610 | __le64 ino, realm; | ||
611 | __le64 cap_id; | ||
612 | __le32 seq, issue_seq; | ||
613 | __le32 caps, wanted, dirty; /* latest issued/wanted/dirty */ | ||
614 | __le32 migrate_seq; | ||
615 | __le64 snap_follows; | ||
616 | __le32 snap_trace_len; | ||
617 | |||
618 | /* authlock */ | ||
619 | __le32 uid, gid, mode; | ||
620 | |||
621 | /* linklock */ | ||
622 | __le32 nlink; | ||
623 | |||
624 | /* xattrlock */ | ||
625 | __le32 xattr_len; | ||
626 | __le64 xattr_version; | ||
627 | |||
628 | /* filelock */ | ||
629 | __le64 size, max_size, truncate_size; | ||
630 | __le32 truncate_seq; | ||
631 | struct ceph_timespec mtime, atime, ctime; | ||
632 | struct ceph_file_layout layout; | ||
633 | __le32 time_warp_seq; | ||
634 | } __attribute__ ((packed)); | ||
635 | |||
636 | /* cap release msg head */ | ||
637 | struct ceph_mds_cap_release { | ||
638 | __le32 num; /* number of cap_items that follow */ | ||
639 | } __attribute__ ((packed)); | ||
640 | |||
641 | struct ceph_mds_cap_item { | ||
642 | __le64 ino; | ||
643 | __le64 cap_id; | ||
644 | __le32 migrate_seq, seq; | ||
645 | } __attribute__ ((packed)); | ||
646 | |||
647 | #define CEPH_MDS_LEASE_REVOKE 1 /* mds -> client */ | ||
648 | #define CEPH_MDS_LEASE_RELEASE 2 /* client -> mds */ | ||
649 | #define CEPH_MDS_LEASE_RENEW 3 /* client <-> mds */ | ||
650 | #define CEPH_MDS_LEASE_REVOKE_ACK 4 /* client -> mds */ | ||
651 | |||
652 | extern const char *ceph_lease_op_name(int o); | ||
653 | |||
654 | /* lease msg header */ | ||
655 | struct ceph_mds_lease { | ||
656 | __u8 action; /* CEPH_MDS_LEASE_* */ | ||
657 | __le16 mask; /* which lease */ | ||
658 | __le64 ino; | ||
659 | __le64 first, last; /* snap range */ | ||
660 | __le32 seq; | ||
661 | __le32 duration_ms; /* duration of renewal */ | ||
662 | } __attribute__ ((packed)); | ||
663 | /* followed by a __le32+string for dname */ | ||
664 | |||
665 | /* client reconnect */ | ||
666 | struct ceph_mds_cap_reconnect { | ||
667 | __le64 cap_id; | ||
668 | __le32 wanted; | ||
669 | __le32 issued; | ||
670 | __le64 snaprealm; | ||
671 | __le64 pathbase; /* base ino for our path to this ino */ | ||
672 | __le32 flock_len; /* size of flock state blob, if any */ | ||
673 | } __attribute__ ((packed)); | ||
674 | /* followed by flock blob */ | ||
675 | |||
676 | struct ceph_mds_cap_reconnect_v1 { | ||
677 | __le64 cap_id; | ||
678 | __le32 wanted; | ||
679 | __le32 issued; | ||
680 | __le64 size; | ||
681 | struct ceph_timespec mtime, atime; | ||
682 | __le64 snaprealm; | ||
683 | __le64 pathbase; /* base ino for our path to this ino */ | ||
684 | } __attribute__ ((packed)); | ||
685 | |||
686 | struct ceph_mds_snaprealm_reconnect { | ||
687 | __le64 ino; /* snap realm base */ | ||
688 | __le64 seq; /* snap seq for this snap realm */ | ||
689 | __le64 parent; /* parent realm */ | ||
690 | } __attribute__ ((packed)); | ||
691 | |||
692 | /* | ||
693 | * snaps | ||
694 | */ | ||
695 | enum { | ||
696 | CEPH_SNAP_OP_UPDATE, /* CREATE or DESTROY */ | ||
697 | CEPH_SNAP_OP_CREATE, | ||
698 | CEPH_SNAP_OP_DESTROY, | ||
699 | CEPH_SNAP_OP_SPLIT, | ||
700 | }; | ||
701 | |||
702 | extern const char *ceph_snap_op_name(int o); | ||
703 | |||
704 | /* snap msg header */ | ||
705 | struct ceph_mds_snap_head { | ||
706 | __le32 op; /* CEPH_SNAP_OP_* */ | ||
707 | __le64 split; /* ino to split off, if any */ | ||
708 | __le32 num_split_inos; /* # inos belonging to new child realm */ | ||
709 | __le32 num_split_realms; /* # child realms udner new child realm */ | ||
710 | __le32 trace_len; /* size of snap trace blob */ | ||
711 | } __attribute__ ((packed)); | ||
712 | /* followed by split ino list, then split realms, then the trace blob */ | ||
713 | |||
714 | /* | ||
715 | * encode info about a snaprealm, as viewed by a client | ||
716 | */ | ||
717 | struct ceph_mds_snap_realm { | ||
718 | __le64 ino; /* ino */ | ||
719 | __le64 created; /* snap: when created */ | ||
720 | __le64 parent; /* ino: parent realm */ | ||
721 | __le64 parent_since; /* snap: same parent since */ | ||
722 | __le64 seq; /* snap: version */ | ||
723 | __le32 num_snaps; | ||
724 | __le32 num_prior_parent_snaps; | ||
725 | } __attribute__ ((packed)); | ||
726 | /* followed by my snap list, then prior parent snap list */ | ||
727 | |||
728 | #endif | ||
diff --git a/fs/ceph/ceph_hash.c b/fs/ceph/ceph_hash.c deleted file mode 100644 index bd570015d147..000000000000 --- a/fs/ceph/ceph_hash.c +++ /dev/null | |||
@@ -1,118 +0,0 @@ | |||
1 | |||
2 | #include "types.h" | ||
3 | |||
4 | /* | ||
5 | * Robert Jenkin's hash function. | ||
6 | * http://burtleburtle.net/bob/hash/evahash.html | ||
7 | * This is in the public domain. | ||
8 | */ | ||
9 | #define mix(a, b, c) \ | ||
10 | do { \ | ||
11 | a = a - b; a = a - c; a = a ^ (c >> 13); \ | ||
12 | b = b - c; b = b - a; b = b ^ (a << 8); \ | ||
13 | c = c - a; c = c - b; c = c ^ (b >> 13); \ | ||
14 | a = a - b; a = a - c; a = a ^ (c >> 12); \ | ||
15 | b = b - c; b = b - a; b = b ^ (a << 16); \ | ||
16 | c = c - a; c = c - b; c = c ^ (b >> 5); \ | ||
17 | a = a - b; a = a - c; a = a ^ (c >> 3); \ | ||
18 | b = b - c; b = b - a; b = b ^ (a << 10); \ | ||
19 | c = c - a; c = c - b; c = c ^ (b >> 15); \ | ||
20 | } while (0) | ||
21 | |||
22 | unsigned ceph_str_hash_rjenkins(const char *str, unsigned length) | ||
23 | { | ||
24 | const unsigned char *k = (const unsigned char *)str; | ||
25 | __u32 a, b, c; /* the internal state */ | ||
26 | __u32 len; /* how many key bytes still need mixing */ | ||
27 | |||
28 | /* Set up the internal state */ | ||
29 | len = length; | ||
30 | a = 0x9e3779b9; /* the golden ratio; an arbitrary value */ | ||
31 | b = a; | ||
32 | c = 0; /* variable initialization of internal state */ | ||
33 | |||
34 | /* handle most of the key */ | ||
35 | while (len >= 12) { | ||
36 | a = a + (k[0] + ((__u32)k[1] << 8) + ((__u32)k[2] << 16) + | ||
37 | ((__u32)k[3] << 24)); | ||
38 | b = b + (k[4] + ((__u32)k[5] << 8) + ((__u32)k[6] << 16) + | ||
39 | ((__u32)k[7] << 24)); | ||
40 | c = c + (k[8] + ((__u32)k[9] << 8) + ((__u32)k[10] << 16) + | ||
41 | ((__u32)k[11] << 24)); | ||
42 | mix(a, b, c); | ||
43 | k = k + 12; | ||
44 | len = len - 12; | ||
45 | } | ||
46 | |||
47 | /* handle the last 11 bytes */ | ||
48 | c = c + length; | ||
49 | switch (len) { /* all the case statements fall through */ | ||
50 | case 11: | ||
51 | c = c + ((__u32)k[10] << 24); | ||
52 | case 10: | ||
53 | c = c + ((__u32)k[9] << 16); | ||
54 | case 9: | ||
55 | c = c + ((__u32)k[8] << 8); | ||
56 | /* the first byte of c is reserved for the length */ | ||
57 | case 8: | ||
58 | b = b + ((__u32)k[7] << 24); | ||
59 | case 7: | ||
60 | b = b + ((__u32)k[6] << 16); | ||
61 | case 6: | ||
62 | b = b + ((__u32)k[5] << 8); | ||
63 | case 5: | ||
64 | b = b + k[4]; | ||
65 | case 4: | ||
66 | a = a + ((__u32)k[3] << 24); | ||
67 | case 3: | ||
68 | a = a + ((__u32)k[2] << 16); | ||
69 | case 2: | ||
70 | a = a + ((__u32)k[1] << 8); | ||
71 | case 1: | ||
72 | a = a + k[0]; | ||
73 | /* case 0: nothing left to add */ | ||
74 | } | ||
75 | mix(a, b, c); | ||
76 | |||
77 | return c; | ||
78 | } | ||
79 | |||
80 | /* | ||
81 | * linux dcache hash | ||
82 | */ | ||
83 | unsigned ceph_str_hash_linux(const char *str, unsigned length) | ||
84 | { | ||
85 | unsigned long hash = 0; | ||
86 | unsigned char c; | ||
87 | |||
88 | while (length--) { | ||
89 | c = *str++; | ||
90 | hash = (hash + (c << 4) + (c >> 4)) * 11; | ||
91 | } | ||
92 | return hash; | ||
93 | } | ||
94 | |||
95 | |||
96 | unsigned ceph_str_hash(int type, const char *s, unsigned len) | ||
97 | { | ||
98 | switch (type) { | ||
99 | case CEPH_STR_HASH_LINUX: | ||
100 | return ceph_str_hash_linux(s, len); | ||
101 | case CEPH_STR_HASH_RJENKINS: | ||
102 | return ceph_str_hash_rjenkins(s, len); | ||
103 | default: | ||
104 | return -1; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | const char *ceph_str_hash_name(int type) | ||
109 | { | ||
110 | switch (type) { | ||
111 | case CEPH_STR_HASH_LINUX: | ||
112 | return "linux"; | ||
113 | case CEPH_STR_HASH_RJENKINS: | ||
114 | return "rjenkins"; | ||
115 | default: | ||
116 | return "unknown"; | ||
117 | } | ||
118 | } | ||
diff --git a/fs/ceph/ceph_hash.h b/fs/ceph/ceph_hash.h deleted file mode 100644 index d099c3f90236..000000000000 --- a/fs/ceph/ceph_hash.h +++ /dev/null | |||
@@ -1,13 +0,0 @@ | |||
1 | #ifndef FS_CEPH_HASH_H | ||
2 | #define FS_CEPH_HASH_H | ||
3 | |||
4 | #define CEPH_STR_HASH_LINUX 0x1 /* linux dcache hash */ | ||
5 | #define CEPH_STR_HASH_RJENKINS 0x2 /* robert jenkins' */ | ||
6 | |||
7 | extern unsigned ceph_str_hash_linux(const char *s, unsigned len); | ||
8 | extern unsigned ceph_str_hash_rjenkins(const char *s, unsigned len); | ||
9 | |||
10 | extern unsigned ceph_str_hash(int type, const char *s, unsigned len); | ||
11 | extern const char *ceph_str_hash_name(int type); | ||
12 | |||
13 | #endif | ||
diff --git a/fs/ceph/crush/crush.c b/fs/ceph/crush/crush.c deleted file mode 100644 index fabd302e5779..000000000000 --- a/fs/ceph/crush/crush.c +++ /dev/null | |||
@@ -1,151 +0,0 @@ | |||
1 | |||
2 | #ifdef __KERNEL__ | ||
3 | # include <linux/slab.h> | ||
4 | #else | ||
5 | # include <stdlib.h> | ||
6 | # include <assert.h> | ||
7 | # define kfree(x) do { if (x) free(x); } while (0) | ||
8 | # define BUG_ON(x) assert(!(x)) | ||
9 | #endif | ||
10 | |||
11 | #include "crush.h" | ||
12 | |||
13 | const char *crush_bucket_alg_name(int alg) | ||
14 | { | ||
15 | switch (alg) { | ||
16 | case CRUSH_BUCKET_UNIFORM: return "uniform"; | ||
17 | case CRUSH_BUCKET_LIST: return "list"; | ||
18 | case CRUSH_BUCKET_TREE: return "tree"; | ||
19 | case CRUSH_BUCKET_STRAW: return "straw"; | ||
20 | default: return "unknown"; | ||
21 | } | ||
22 | } | ||
23 | |||
24 | /** | ||
25 | * crush_get_bucket_item_weight - Get weight of an item in given bucket | ||
26 | * @b: bucket pointer | ||
27 | * @p: item index in bucket | ||
28 | */ | ||
29 | int crush_get_bucket_item_weight(struct crush_bucket *b, int p) | ||
30 | { | ||
31 | if (p >= b->size) | ||
32 | return 0; | ||
33 | |||
34 | switch (b->alg) { | ||
35 | case CRUSH_BUCKET_UNIFORM: | ||
36 | return ((struct crush_bucket_uniform *)b)->item_weight; | ||
37 | case CRUSH_BUCKET_LIST: | ||
38 | return ((struct crush_bucket_list *)b)->item_weights[p]; | ||
39 | case CRUSH_BUCKET_TREE: | ||
40 | if (p & 1) | ||
41 | return ((struct crush_bucket_tree *)b)->node_weights[p]; | ||
42 | return 0; | ||
43 | case CRUSH_BUCKET_STRAW: | ||
44 | return ((struct crush_bucket_straw *)b)->item_weights[p]; | ||
45 | } | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | /** | ||
50 | * crush_calc_parents - Calculate parent vectors for the given crush map. | ||
51 | * @map: crush_map pointer | ||
52 | */ | ||
53 | void crush_calc_parents(struct crush_map *map) | ||
54 | { | ||
55 | int i, b, c; | ||
56 | |||
57 | for (b = 0; b < map->max_buckets; b++) { | ||
58 | if (map->buckets[b] == NULL) | ||
59 | continue; | ||
60 | for (i = 0; i < map->buckets[b]->size; i++) { | ||
61 | c = map->buckets[b]->items[i]; | ||
62 | BUG_ON(c >= map->max_devices || | ||
63 | c < -map->max_buckets); | ||
64 | if (c >= 0) | ||
65 | map->device_parents[c] = map->buckets[b]->id; | ||
66 | else | ||
67 | map->bucket_parents[-1-c] = map->buckets[b]->id; | ||
68 | } | ||
69 | } | ||
70 | } | ||
71 | |||
72 | void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b) | ||
73 | { | ||
74 | kfree(b->h.perm); | ||
75 | kfree(b->h.items); | ||
76 | kfree(b); | ||
77 | } | ||
78 | |||
79 | void crush_destroy_bucket_list(struct crush_bucket_list *b) | ||
80 | { | ||
81 | kfree(b->item_weights); | ||
82 | kfree(b->sum_weights); | ||
83 | kfree(b->h.perm); | ||
84 | kfree(b->h.items); | ||
85 | kfree(b); | ||
86 | } | ||
87 | |||
88 | void crush_destroy_bucket_tree(struct crush_bucket_tree *b) | ||
89 | { | ||
90 | kfree(b->node_weights); | ||
91 | kfree(b); | ||
92 | } | ||
93 | |||
94 | void crush_destroy_bucket_straw(struct crush_bucket_straw *b) | ||
95 | { | ||
96 | kfree(b->straws); | ||
97 | kfree(b->item_weights); | ||
98 | kfree(b->h.perm); | ||
99 | kfree(b->h.items); | ||
100 | kfree(b); | ||
101 | } | ||
102 | |||
103 | void crush_destroy_bucket(struct crush_bucket *b) | ||
104 | { | ||
105 | switch (b->alg) { | ||
106 | case CRUSH_BUCKET_UNIFORM: | ||
107 | crush_destroy_bucket_uniform((struct crush_bucket_uniform *)b); | ||
108 | break; | ||
109 | case CRUSH_BUCKET_LIST: | ||
110 | crush_destroy_bucket_list((struct crush_bucket_list *)b); | ||
111 | break; | ||
112 | case CRUSH_BUCKET_TREE: | ||
113 | crush_destroy_bucket_tree((struct crush_bucket_tree *)b); | ||
114 | break; | ||
115 | case CRUSH_BUCKET_STRAW: | ||
116 | crush_destroy_bucket_straw((struct crush_bucket_straw *)b); | ||
117 | break; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * crush_destroy - Destroy a crush_map | ||
123 | * @map: crush_map pointer | ||
124 | */ | ||
125 | void crush_destroy(struct crush_map *map) | ||
126 | { | ||
127 | int b; | ||
128 | |||
129 | /* buckets */ | ||
130 | if (map->buckets) { | ||
131 | for (b = 0; b < map->max_buckets; b++) { | ||
132 | if (map->buckets[b] == NULL) | ||
133 | continue; | ||
134 | crush_destroy_bucket(map->buckets[b]); | ||
135 | } | ||
136 | kfree(map->buckets); | ||
137 | } | ||
138 | |||
139 | /* rules */ | ||
140 | if (map->rules) { | ||
141 | for (b = 0; b < map->max_rules; b++) | ||
142 | kfree(map->rules[b]); | ||
143 | kfree(map->rules); | ||
144 | } | ||
145 | |||
146 | kfree(map->bucket_parents); | ||
147 | kfree(map->device_parents); | ||
148 | kfree(map); | ||
149 | } | ||
150 | |||
151 | |||
diff --git a/fs/ceph/crush/crush.h b/fs/ceph/crush/crush.h deleted file mode 100644 index 97e435b191f4..000000000000 --- a/fs/ceph/crush/crush.h +++ /dev/null | |||
@@ -1,180 +0,0 @@ | |||
1 | #ifndef CEPH_CRUSH_CRUSH_H | ||
2 | #define CEPH_CRUSH_CRUSH_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | /* | ||
7 | * CRUSH is a pseudo-random data distribution algorithm that | ||
8 | * efficiently distributes input values (typically, data objects) | ||
9 | * across a heterogeneous, structured storage cluster. | ||
10 | * | ||
11 | * The algorithm was originally described in detail in this paper | ||
12 | * (although the algorithm has evolved somewhat since then): | ||
13 | * | ||
14 | * http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf | ||
15 | * | ||
16 | * LGPL2 | ||
17 | */ | ||
18 | |||
19 | |||
20 | #define CRUSH_MAGIC 0x00010000ul /* for detecting algorithm revisions */ | ||
21 | |||
22 | |||
23 | #define CRUSH_MAX_DEPTH 10 /* max crush hierarchy depth */ | ||
24 | #define CRUSH_MAX_SET 10 /* max size of a mapping result */ | ||
25 | |||
26 | |||
27 | /* | ||
28 | * CRUSH uses user-defined "rules" to describe how inputs should be | ||
29 | * mapped to devices. A rule consists of sequence of steps to perform | ||
30 | * to generate the set of output devices. | ||
31 | */ | ||
32 | struct crush_rule_step { | ||
33 | __u32 op; | ||
34 | __s32 arg1; | ||
35 | __s32 arg2; | ||
36 | }; | ||
37 | |||
38 | /* step op codes */ | ||
39 | enum { | ||
40 | CRUSH_RULE_NOOP = 0, | ||
41 | CRUSH_RULE_TAKE = 1, /* arg1 = value to start with */ | ||
42 | CRUSH_RULE_CHOOSE_FIRSTN = 2, /* arg1 = num items to pick */ | ||
43 | /* arg2 = type */ | ||
44 | CRUSH_RULE_CHOOSE_INDEP = 3, /* same */ | ||
45 | CRUSH_RULE_EMIT = 4, /* no args */ | ||
46 | CRUSH_RULE_CHOOSE_LEAF_FIRSTN = 6, | ||
47 | CRUSH_RULE_CHOOSE_LEAF_INDEP = 7, | ||
48 | }; | ||
49 | |||
50 | /* | ||
51 | * for specifying choose num (arg1) relative to the max parameter | ||
52 | * passed to do_rule | ||
53 | */ | ||
54 | #define CRUSH_CHOOSE_N 0 | ||
55 | #define CRUSH_CHOOSE_N_MINUS(x) (-(x)) | ||
56 | |||
57 | /* | ||
58 | * The rule mask is used to describe what the rule is intended for. | ||
59 | * Given a ruleset and size of output set, we search through the | ||
60 | * rule list for a matching rule_mask. | ||
61 | */ | ||
62 | struct crush_rule_mask { | ||
63 | __u8 ruleset; | ||
64 | __u8 type; | ||
65 | __u8 min_size; | ||
66 | __u8 max_size; | ||
67 | }; | ||
68 | |||
69 | struct crush_rule { | ||
70 | __u32 len; | ||
71 | struct crush_rule_mask mask; | ||
72 | struct crush_rule_step steps[0]; | ||
73 | }; | ||
74 | |||
75 | #define crush_rule_size(len) (sizeof(struct crush_rule) + \ | ||
76 | (len)*sizeof(struct crush_rule_step)) | ||
77 | |||
78 | |||
79 | |||
80 | /* | ||
81 | * A bucket is a named container of other items (either devices or | ||
82 | * other buckets). Items within a bucket are chosen using one of a | ||
83 | * few different algorithms. The table summarizes how the speed of | ||
84 | * each option measures up against mapping stability when items are | ||
85 | * added or removed. | ||
86 | * | ||
87 | * Bucket Alg Speed Additions Removals | ||
88 | * ------------------------------------------------ | ||
89 | * uniform O(1) poor poor | ||
90 | * list O(n) optimal poor | ||
91 | * tree O(log n) good good | ||
92 | * straw O(n) optimal optimal | ||
93 | */ | ||
94 | enum { | ||
95 | CRUSH_BUCKET_UNIFORM = 1, | ||
96 | CRUSH_BUCKET_LIST = 2, | ||
97 | CRUSH_BUCKET_TREE = 3, | ||
98 | CRUSH_BUCKET_STRAW = 4 | ||
99 | }; | ||
100 | extern const char *crush_bucket_alg_name(int alg); | ||
101 | |||
102 | struct crush_bucket { | ||
103 | __s32 id; /* this'll be negative */ | ||
104 | __u16 type; /* non-zero; type=0 is reserved for devices */ | ||
105 | __u8 alg; /* one of CRUSH_BUCKET_* */ | ||
106 | __u8 hash; /* which hash function to use, CRUSH_HASH_* */ | ||
107 | __u32 weight; /* 16-bit fixed point */ | ||
108 | __u32 size; /* num items */ | ||
109 | __s32 *items; | ||
110 | |||
111 | /* | ||
112 | * cached random permutation: used for uniform bucket and for | ||
113 | * the linear search fallback for the other bucket types. | ||
114 | */ | ||
115 | __u32 perm_x; /* @x for which *perm is defined */ | ||
116 | __u32 perm_n; /* num elements of *perm that are permuted/defined */ | ||
117 | __u32 *perm; | ||
118 | }; | ||
119 | |||
120 | struct crush_bucket_uniform { | ||
121 | struct crush_bucket h; | ||
122 | __u32 item_weight; /* 16-bit fixed point; all items equally weighted */ | ||
123 | }; | ||
124 | |||
125 | struct crush_bucket_list { | ||
126 | struct crush_bucket h; | ||
127 | __u32 *item_weights; /* 16-bit fixed point */ | ||
128 | __u32 *sum_weights; /* 16-bit fixed point. element i is sum | ||
129 | of weights 0..i, inclusive */ | ||
130 | }; | ||
131 | |||
132 | struct crush_bucket_tree { | ||
133 | struct crush_bucket h; /* note: h.size is _tree_ size, not number of | ||
134 | actual items */ | ||
135 | __u8 num_nodes; | ||
136 | __u32 *node_weights; | ||
137 | }; | ||
138 | |||
139 | struct crush_bucket_straw { | ||
140 | struct crush_bucket h; | ||
141 | __u32 *item_weights; /* 16-bit fixed point */ | ||
142 | __u32 *straws; /* 16-bit fixed point */ | ||
143 | }; | ||
144 | |||
145 | |||
146 | |||
147 | /* | ||
148 | * CRUSH map includes all buckets, rules, etc. | ||
149 | */ | ||
150 | struct crush_map { | ||
151 | struct crush_bucket **buckets; | ||
152 | struct crush_rule **rules; | ||
153 | |||
154 | /* | ||
155 | * Parent pointers to identify the parent bucket a device or | ||
156 | * bucket in the hierarchy. If an item appears more than | ||
157 | * once, this is the _last_ time it appeared (where buckets | ||
158 | * are processed in bucket id order, from -1 on down to | ||
159 | * -max_buckets. | ||
160 | */ | ||
161 | __u32 *bucket_parents; | ||
162 | __u32 *device_parents; | ||
163 | |||
164 | __s32 max_buckets; | ||
165 | __u32 max_rules; | ||
166 | __s32 max_devices; | ||
167 | }; | ||
168 | |||
169 | |||
170 | /* crush.c */ | ||
171 | extern int crush_get_bucket_item_weight(struct crush_bucket *b, int pos); | ||
172 | extern void crush_calc_parents(struct crush_map *map); | ||
173 | extern void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b); | ||
174 | extern void crush_destroy_bucket_list(struct crush_bucket_list *b); | ||
175 | extern void crush_destroy_bucket_tree(struct crush_bucket_tree *b); | ||
176 | extern void crush_destroy_bucket_straw(struct crush_bucket_straw *b); | ||
177 | extern void crush_destroy_bucket(struct crush_bucket *b); | ||
178 | extern void crush_destroy(struct crush_map *map); | ||
179 | |||
180 | #endif | ||
diff --git a/fs/ceph/crush/hash.c b/fs/ceph/crush/hash.c deleted file mode 100644 index 5873aed694bf..000000000000 --- a/fs/ceph/crush/hash.c +++ /dev/null | |||
@@ -1,149 +0,0 @@ | |||
1 | |||
2 | #include <linux/types.h> | ||
3 | #include "hash.h" | ||
4 | |||
5 | /* | ||
6 | * Robert Jenkins' function for mixing 32-bit values | ||
7 | * http://burtleburtle.net/bob/hash/evahash.html | ||
8 | * a, b = random bits, c = input and output | ||
9 | */ | ||
10 | #define crush_hashmix(a, b, c) do { \ | ||
11 | a = a-b; a = a-c; a = a^(c>>13); \ | ||
12 | b = b-c; b = b-a; b = b^(a<<8); \ | ||
13 | c = c-a; c = c-b; c = c^(b>>13); \ | ||
14 | a = a-b; a = a-c; a = a^(c>>12); \ | ||
15 | b = b-c; b = b-a; b = b^(a<<16); \ | ||
16 | c = c-a; c = c-b; c = c^(b>>5); \ | ||
17 | a = a-b; a = a-c; a = a^(c>>3); \ | ||
18 | b = b-c; b = b-a; b = b^(a<<10); \ | ||
19 | c = c-a; c = c-b; c = c^(b>>15); \ | ||
20 | } while (0) | ||
21 | |||
22 | #define crush_hash_seed 1315423911 | ||
23 | |||
24 | static __u32 crush_hash32_rjenkins1(__u32 a) | ||
25 | { | ||
26 | __u32 hash = crush_hash_seed ^ a; | ||
27 | __u32 b = a; | ||
28 | __u32 x = 231232; | ||
29 | __u32 y = 1232; | ||
30 | crush_hashmix(b, x, hash); | ||
31 | crush_hashmix(y, a, hash); | ||
32 | return hash; | ||
33 | } | ||
34 | |||
35 | static __u32 crush_hash32_rjenkins1_2(__u32 a, __u32 b) | ||
36 | { | ||
37 | __u32 hash = crush_hash_seed ^ a ^ b; | ||
38 | __u32 x = 231232; | ||
39 | __u32 y = 1232; | ||
40 | crush_hashmix(a, b, hash); | ||
41 | crush_hashmix(x, a, hash); | ||
42 | crush_hashmix(b, y, hash); | ||
43 | return hash; | ||
44 | } | ||
45 | |||
46 | static __u32 crush_hash32_rjenkins1_3(__u32 a, __u32 b, __u32 c) | ||
47 | { | ||
48 | __u32 hash = crush_hash_seed ^ a ^ b ^ c; | ||
49 | __u32 x = 231232; | ||
50 | __u32 y = 1232; | ||
51 | crush_hashmix(a, b, hash); | ||
52 | crush_hashmix(c, x, hash); | ||
53 | crush_hashmix(y, a, hash); | ||
54 | crush_hashmix(b, x, hash); | ||
55 | crush_hashmix(y, c, hash); | ||
56 | return hash; | ||
57 | } | ||
58 | |||
59 | static __u32 crush_hash32_rjenkins1_4(__u32 a, __u32 b, __u32 c, __u32 d) | ||
60 | { | ||
61 | __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d; | ||
62 | __u32 x = 231232; | ||
63 | __u32 y = 1232; | ||
64 | crush_hashmix(a, b, hash); | ||
65 | crush_hashmix(c, d, hash); | ||
66 | crush_hashmix(a, x, hash); | ||
67 | crush_hashmix(y, b, hash); | ||
68 | crush_hashmix(c, x, hash); | ||
69 | crush_hashmix(y, d, hash); | ||
70 | return hash; | ||
71 | } | ||
72 | |||
73 | static __u32 crush_hash32_rjenkins1_5(__u32 a, __u32 b, __u32 c, __u32 d, | ||
74 | __u32 e) | ||
75 | { | ||
76 | __u32 hash = crush_hash_seed ^ a ^ b ^ c ^ d ^ e; | ||
77 | __u32 x = 231232; | ||
78 | __u32 y = 1232; | ||
79 | crush_hashmix(a, b, hash); | ||
80 | crush_hashmix(c, d, hash); | ||
81 | crush_hashmix(e, x, hash); | ||
82 | crush_hashmix(y, a, hash); | ||
83 | crush_hashmix(b, x, hash); | ||
84 | crush_hashmix(y, c, hash); | ||
85 | crush_hashmix(d, x, hash); | ||
86 | crush_hashmix(y, e, hash); | ||
87 | return hash; | ||
88 | } | ||
89 | |||
90 | |||
91 | __u32 crush_hash32(int type, __u32 a) | ||
92 | { | ||
93 | switch (type) { | ||
94 | case CRUSH_HASH_RJENKINS1: | ||
95 | return crush_hash32_rjenkins1(a); | ||
96 | default: | ||
97 | return 0; | ||
98 | } | ||
99 | } | ||
100 | |||
101 | __u32 crush_hash32_2(int type, __u32 a, __u32 b) | ||
102 | { | ||
103 | switch (type) { | ||
104 | case CRUSH_HASH_RJENKINS1: | ||
105 | return crush_hash32_rjenkins1_2(a, b); | ||
106 | default: | ||
107 | return 0; | ||
108 | } | ||
109 | } | ||
110 | |||
111 | __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c) | ||
112 | { | ||
113 | switch (type) { | ||
114 | case CRUSH_HASH_RJENKINS1: | ||
115 | return crush_hash32_rjenkins1_3(a, b, c); | ||
116 | default: | ||
117 | return 0; | ||
118 | } | ||
119 | } | ||
120 | |||
121 | __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d) | ||
122 | { | ||
123 | switch (type) { | ||
124 | case CRUSH_HASH_RJENKINS1: | ||
125 | return crush_hash32_rjenkins1_4(a, b, c, d); | ||
126 | default: | ||
127 | return 0; | ||
128 | } | ||
129 | } | ||
130 | |||
131 | __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, __u32 e) | ||
132 | { | ||
133 | switch (type) { | ||
134 | case CRUSH_HASH_RJENKINS1: | ||
135 | return crush_hash32_rjenkins1_5(a, b, c, d, e); | ||
136 | default: | ||
137 | return 0; | ||
138 | } | ||
139 | } | ||
140 | |||
141 | const char *crush_hash_name(int type) | ||
142 | { | ||
143 | switch (type) { | ||
144 | case CRUSH_HASH_RJENKINS1: | ||
145 | return "rjenkins1"; | ||
146 | default: | ||
147 | return "unknown"; | ||
148 | } | ||
149 | } | ||
diff --git a/fs/ceph/crush/hash.h b/fs/ceph/crush/hash.h deleted file mode 100644 index 91e884230d5d..000000000000 --- a/fs/ceph/crush/hash.h +++ /dev/null | |||
@@ -1,17 +0,0 @@ | |||
1 | #ifndef CEPH_CRUSH_HASH_H | ||
2 | #define CEPH_CRUSH_HASH_H | ||
3 | |||
4 | #define CRUSH_HASH_RJENKINS1 0 | ||
5 | |||
6 | #define CRUSH_HASH_DEFAULT CRUSH_HASH_RJENKINS1 | ||
7 | |||
8 | extern const char *crush_hash_name(int type); | ||
9 | |||
10 | extern __u32 crush_hash32(int type, __u32 a); | ||
11 | extern __u32 crush_hash32_2(int type, __u32 a, __u32 b); | ||
12 | extern __u32 crush_hash32_3(int type, __u32 a, __u32 b, __u32 c); | ||
13 | extern __u32 crush_hash32_4(int type, __u32 a, __u32 b, __u32 c, __u32 d); | ||
14 | extern __u32 crush_hash32_5(int type, __u32 a, __u32 b, __u32 c, __u32 d, | ||
15 | __u32 e); | ||
16 | |||
17 | #endif | ||
diff --git a/fs/ceph/crush/mapper.c b/fs/ceph/crush/mapper.c deleted file mode 100644 index a4eec133258e..000000000000 --- a/fs/ceph/crush/mapper.c +++ /dev/null | |||
@@ -1,609 +0,0 @@ | |||
1 | |||
2 | #ifdef __KERNEL__ | ||
3 | # include <linux/string.h> | ||
4 | # include <linux/slab.h> | ||
5 | # include <linux/bug.h> | ||
6 | # include <linux/kernel.h> | ||
7 | # ifndef dprintk | ||
8 | # define dprintk(args...) | ||
9 | # endif | ||
10 | #else | ||
11 | # include <string.h> | ||
12 | # include <stdio.h> | ||
13 | # include <stdlib.h> | ||
14 | # include <assert.h> | ||
15 | # define BUG_ON(x) assert(!(x)) | ||
16 | # define dprintk(args...) /* printf(args) */ | ||
17 | # define kmalloc(x, f) malloc(x) | ||
18 | # define kfree(x) free(x) | ||
19 | #endif | ||
20 | |||
21 | #include "crush.h" | ||
22 | #include "hash.h" | ||
23 | |||
24 | /* | ||
25 | * Implement the core CRUSH mapping algorithm. | ||
26 | */ | ||
27 | |||
28 | /** | ||
29 | * crush_find_rule - find a crush_rule id for a given ruleset, type, and size. | ||
30 | * @map: the crush_map | ||
31 | * @ruleset: the storage ruleset id (user defined) | ||
32 | * @type: storage ruleset type (user defined) | ||
33 | * @size: output set size | ||
34 | */ | ||
35 | int crush_find_rule(struct crush_map *map, int ruleset, int type, int size) | ||
36 | { | ||
37 | int i; | ||
38 | |||
39 | for (i = 0; i < map->max_rules; i++) { | ||
40 | if (map->rules[i] && | ||
41 | map->rules[i]->mask.ruleset == ruleset && | ||
42 | map->rules[i]->mask.type == type && | ||
43 | map->rules[i]->mask.min_size <= size && | ||
44 | map->rules[i]->mask.max_size >= size) | ||
45 | return i; | ||
46 | } | ||
47 | return -1; | ||
48 | } | ||
49 | |||
50 | |||
51 | /* | ||
52 | * bucket choose methods | ||
53 | * | ||
54 | * For each bucket algorithm, we have a "choose" method that, given a | ||
55 | * crush input @x and replica position (usually, position in output set) @r, | ||
56 | * will produce an item in the bucket. | ||
57 | */ | ||
58 | |||
59 | /* | ||
60 | * Choose based on a random permutation of the bucket. | ||
61 | * | ||
62 | * We used to use some prime number arithmetic to do this, but it | ||
63 | * wasn't very random, and had some other bad behaviors. Instead, we | ||
64 | * calculate an actual random permutation of the bucket members. | ||
65 | * Since this is expensive, we optimize for the r=0 case, which | ||
66 | * captures the vast majority of calls. | ||
67 | */ | ||
68 | static int bucket_perm_choose(struct crush_bucket *bucket, | ||
69 | int x, int r) | ||
70 | { | ||
71 | unsigned pr = r % bucket->size; | ||
72 | unsigned i, s; | ||
73 | |||
74 | /* start a new permutation if @x has changed */ | ||
75 | if (bucket->perm_x != x || bucket->perm_n == 0) { | ||
76 | dprintk("bucket %d new x=%d\n", bucket->id, x); | ||
77 | bucket->perm_x = x; | ||
78 | |||
79 | /* optimize common r=0 case */ | ||
80 | if (pr == 0) { | ||
81 | s = crush_hash32_3(bucket->hash, x, bucket->id, 0) % | ||
82 | bucket->size; | ||
83 | bucket->perm[0] = s; | ||
84 | bucket->perm_n = 0xffff; /* magic value, see below */ | ||
85 | goto out; | ||
86 | } | ||
87 | |||
88 | for (i = 0; i < bucket->size; i++) | ||
89 | bucket->perm[i] = i; | ||
90 | bucket->perm_n = 0; | ||
91 | } else if (bucket->perm_n == 0xffff) { | ||
92 | /* clean up after the r=0 case above */ | ||
93 | for (i = 1; i < bucket->size; i++) | ||
94 | bucket->perm[i] = i; | ||
95 | bucket->perm[bucket->perm[0]] = 0; | ||
96 | bucket->perm_n = 1; | ||
97 | } | ||
98 | |||
99 | /* calculate permutation up to pr */ | ||
100 | for (i = 0; i < bucket->perm_n; i++) | ||
101 | dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]); | ||
102 | while (bucket->perm_n <= pr) { | ||
103 | unsigned p = bucket->perm_n; | ||
104 | /* no point in swapping the final entry */ | ||
105 | if (p < bucket->size - 1) { | ||
106 | i = crush_hash32_3(bucket->hash, x, bucket->id, p) % | ||
107 | (bucket->size - p); | ||
108 | if (i) { | ||
109 | unsigned t = bucket->perm[p + i]; | ||
110 | bucket->perm[p + i] = bucket->perm[p]; | ||
111 | bucket->perm[p] = t; | ||
112 | } | ||
113 | dprintk(" perm_choose swap %d with %d\n", p, p+i); | ||
114 | } | ||
115 | bucket->perm_n++; | ||
116 | } | ||
117 | for (i = 0; i < bucket->size; i++) | ||
118 | dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]); | ||
119 | |||
120 | s = bucket->perm[pr]; | ||
121 | out: | ||
122 | dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id, | ||
123 | bucket->size, x, r, pr, s); | ||
124 | return bucket->items[s]; | ||
125 | } | ||
126 | |||
127 | /* uniform */ | ||
128 | static int bucket_uniform_choose(struct crush_bucket_uniform *bucket, | ||
129 | int x, int r) | ||
130 | { | ||
131 | return bucket_perm_choose(&bucket->h, x, r); | ||
132 | } | ||
133 | |||
134 | /* list */ | ||
135 | static int bucket_list_choose(struct crush_bucket_list *bucket, | ||
136 | int x, int r) | ||
137 | { | ||
138 | int i; | ||
139 | |||
140 | for (i = bucket->h.size-1; i >= 0; i--) { | ||
141 | __u64 w = crush_hash32_4(bucket->h.hash,x, bucket->h.items[i], | ||
142 | r, bucket->h.id); | ||
143 | w &= 0xffff; | ||
144 | dprintk("list_choose i=%d x=%d r=%d item %d weight %x " | ||
145 | "sw %x rand %llx", | ||
146 | i, x, r, bucket->h.items[i], bucket->item_weights[i], | ||
147 | bucket->sum_weights[i], w); | ||
148 | w *= bucket->sum_weights[i]; | ||
149 | w = w >> 16; | ||
150 | /*dprintk(" scaled %llx\n", w);*/ | ||
151 | if (w < bucket->item_weights[i]) | ||
152 | return bucket->h.items[i]; | ||
153 | } | ||
154 | |||
155 | BUG_ON(1); | ||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | |||
160 | /* (binary) tree */ | ||
161 | static int height(int n) | ||
162 | { | ||
163 | int h = 0; | ||
164 | while ((n & 1) == 0) { | ||
165 | h++; | ||
166 | n = n >> 1; | ||
167 | } | ||
168 | return h; | ||
169 | } | ||
170 | |||
171 | static int left(int x) | ||
172 | { | ||
173 | int h = height(x); | ||
174 | return x - (1 << (h-1)); | ||
175 | } | ||
176 | |||
177 | static int right(int x) | ||
178 | { | ||
179 | int h = height(x); | ||
180 | return x + (1 << (h-1)); | ||
181 | } | ||
182 | |||
183 | static int terminal(int x) | ||
184 | { | ||
185 | return x & 1; | ||
186 | } | ||
187 | |||
188 | static int bucket_tree_choose(struct crush_bucket_tree *bucket, | ||
189 | int x, int r) | ||
190 | { | ||
191 | int n, l; | ||
192 | __u32 w; | ||
193 | __u64 t; | ||
194 | |||
195 | /* start at root */ | ||
196 | n = bucket->num_nodes >> 1; | ||
197 | |||
198 | while (!terminal(n)) { | ||
199 | /* pick point in [0, w) */ | ||
200 | w = bucket->node_weights[n]; | ||
201 | t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r, | ||
202 | bucket->h.id) * (__u64)w; | ||
203 | t = t >> 32; | ||
204 | |||
205 | /* descend to the left or right? */ | ||
206 | l = left(n); | ||
207 | if (t < bucket->node_weights[l]) | ||
208 | n = l; | ||
209 | else | ||
210 | n = right(n); | ||
211 | } | ||
212 | |||
213 | return bucket->h.items[n >> 1]; | ||
214 | } | ||
215 | |||
216 | |||
217 | /* straw */ | ||
218 | |||
219 | static int bucket_straw_choose(struct crush_bucket_straw *bucket, | ||
220 | int x, int r) | ||
221 | { | ||
222 | int i; | ||
223 | int high = 0; | ||
224 | __u64 high_draw = 0; | ||
225 | __u64 draw; | ||
226 | |||
227 | for (i = 0; i < bucket->h.size; i++) { | ||
228 | draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r); | ||
229 | draw &= 0xffff; | ||
230 | draw *= bucket->straws[i]; | ||
231 | if (i == 0 || draw > high_draw) { | ||
232 | high = i; | ||
233 | high_draw = draw; | ||
234 | } | ||
235 | } | ||
236 | return bucket->h.items[high]; | ||
237 | } | ||
238 | |||
239 | static int crush_bucket_choose(struct crush_bucket *in, int x, int r) | ||
240 | { | ||
241 | dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r); | ||
242 | switch (in->alg) { | ||
243 | case CRUSH_BUCKET_UNIFORM: | ||
244 | return bucket_uniform_choose((struct crush_bucket_uniform *)in, | ||
245 | x, r); | ||
246 | case CRUSH_BUCKET_LIST: | ||
247 | return bucket_list_choose((struct crush_bucket_list *)in, | ||
248 | x, r); | ||
249 | case CRUSH_BUCKET_TREE: | ||
250 | return bucket_tree_choose((struct crush_bucket_tree *)in, | ||
251 | x, r); | ||
252 | case CRUSH_BUCKET_STRAW: | ||
253 | return bucket_straw_choose((struct crush_bucket_straw *)in, | ||
254 | x, r); | ||
255 | default: | ||
256 | BUG_ON(1); | ||
257 | return in->items[0]; | ||
258 | } | ||
259 | } | ||
260 | |||
261 | /* | ||
262 | * true if device is marked "out" (failed, fully offloaded) | ||
263 | * of the cluster | ||
264 | */ | ||
265 | static int is_out(struct crush_map *map, __u32 *weight, int item, int x) | ||
266 | { | ||
267 | if (weight[item] >= 0x10000) | ||
268 | return 0; | ||
269 | if (weight[item] == 0) | ||
270 | return 1; | ||
271 | if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff) | ||
272 | < weight[item]) | ||
273 | return 0; | ||
274 | return 1; | ||
275 | } | ||
276 | |||
277 | /** | ||
278 | * crush_choose - choose numrep distinct items of given type | ||
279 | * @map: the crush_map | ||
280 | * @bucket: the bucket we are choose an item from | ||
281 | * @x: crush input value | ||
282 | * @numrep: the number of items to choose | ||
283 | * @type: the type of item to choose | ||
284 | * @out: pointer to output vector | ||
285 | * @outpos: our position in that vector | ||
286 | * @firstn: true if choosing "first n" items, false if choosing "indep" | ||
287 | * @recurse_to_leaf: true if we want one device under each item of given type | ||
288 | * @out2: second output vector for leaf items (if @recurse_to_leaf) | ||
289 | */ | ||
290 | static int crush_choose(struct crush_map *map, | ||
291 | struct crush_bucket *bucket, | ||
292 | __u32 *weight, | ||
293 | int x, int numrep, int type, | ||
294 | int *out, int outpos, | ||
295 | int firstn, int recurse_to_leaf, | ||
296 | int *out2) | ||
297 | { | ||
298 | int rep; | ||
299 | int ftotal, flocal; | ||
300 | int retry_descent, retry_bucket, skip_rep; | ||
301 | struct crush_bucket *in = bucket; | ||
302 | int r; | ||
303 | int i; | ||
304 | int item = 0; | ||
305 | int itemtype; | ||
306 | int collide, reject; | ||
307 | const int orig_tries = 5; /* attempts before we fall back to search */ | ||
308 | |||
309 | dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", | ||
310 | bucket->id, x, outpos, numrep); | ||
311 | |||
312 | for (rep = outpos; rep < numrep; rep++) { | ||
313 | /* keep trying until we get a non-out, non-colliding item */ | ||
314 | ftotal = 0; | ||
315 | skip_rep = 0; | ||
316 | do { | ||
317 | retry_descent = 0; | ||
318 | in = bucket; /* initial bucket */ | ||
319 | |||
320 | /* choose through intervening buckets */ | ||
321 | flocal = 0; | ||
322 | do { | ||
323 | collide = 0; | ||
324 | retry_bucket = 0; | ||
325 | r = rep; | ||
326 | if (in->alg == CRUSH_BUCKET_UNIFORM) { | ||
327 | /* be careful */ | ||
328 | if (firstn || numrep >= in->size) | ||
329 | /* r' = r + f_total */ | ||
330 | r += ftotal; | ||
331 | else if (in->size % numrep == 0) | ||
332 | /* r'=r+(n+1)*f_local */ | ||
333 | r += (numrep+1) * | ||
334 | (flocal+ftotal); | ||
335 | else | ||
336 | /* r' = r + n*f_local */ | ||
337 | r += numrep * (flocal+ftotal); | ||
338 | } else { | ||
339 | if (firstn) | ||
340 | /* r' = r + f_total */ | ||
341 | r += ftotal; | ||
342 | else | ||
343 | /* r' = r + n*f_local */ | ||
344 | r += numrep * (flocal+ftotal); | ||
345 | } | ||
346 | |||
347 | /* bucket choose */ | ||
348 | if (in->size == 0) { | ||
349 | reject = 1; | ||
350 | goto reject; | ||
351 | } | ||
352 | if (flocal >= (in->size>>1) && | ||
353 | flocal > orig_tries) | ||
354 | item = bucket_perm_choose(in, x, r); | ||
355 | else | ||
356 | item = crush_bucket_choose(in, x, r); | ||
357 | BUG_ON(item >= map->max_devices); | ||
358 | |||
359 | /* desired type? */ | ||
360 | if (item < 0) | ||
361 | itemtype = map->buckets[-1-item]->type; | ||
362 | else | ||
363 | itemtype = 0; | ||
364 | dprintk(" item %d type %d\n", item, itemtype); | ||
365 | |||
366 | /* keep going? */ | ||
367 | if (itemtype != type) { | ||
368 | BUG_ON(item >= 0 || | ||
369 | (-1-item) >= map->max_buckets); | ||
370 | in = map->buckets[-1-item]; | ||
371 | retry_bucket = 1; | ||
372 | continue; | ||
373 | } | ||
374 | |||
375 | /* collision? */ | ||
376 | for (i = 0; i < outpos; i++) { | ||
377 | if (out[i] == item) { | ||
378 | collide = 1; | ||
379 | break; | ||
380 | } | ||
381 | } | ||
382 | |||
383 | reject = 0; | ||
384 | if (recurse_to_leaf) { | ||
385 | if (item < 0) { | ||
386 | if (crush_choose(map, | ||
387 | map->buckets[-1-item], | ||
388 | weight, | ||
389 | x, outpos+1, 0, | ||
390 | out2, outpos, | ||
391 | firstn, 0, | ||
392 | NULL) <= outpos) | ||
393 | /* didn't get leaf */ | ||
394 | reject = 1; | ||
395 | } else { | ||
396 | /* we already have a leaf! */ | ||
397 | out2[outpos] = item; | ||
398 | } | ||
399 | } | ||
400 | |||
401 | if (!reject) { | ||
402 | /* out? */ | ||
403 | if (itemtype == 0) | ||
404 | reject = is_out(map, weight, | ||
405 | item, x); | ||
406 | else | ||
407 | reject = 0; | ||
408 | } | ||
409 | |||
410 | reject: | ||
411 | if (reject || collide) { | ||
412 | ftotal++; | ||
413 | flocal++; | ||
414 | |||
415 | if (collide && flocal < 3) | ||
416 | /* retry locally a few times */ | ||
417 | retry_bucket = 1; | ||
418 | else if (flocal < in->size + orig_tries) | ||
419 | /* exhaustive bucket search */ | ||
420 | retry_bucket = 1; | ||
421 | else if (ftotal < 20) | ||
422 | /* then retry descent */ | ||
423 | retry_descent = 1; | ||
424 | else | ||
425 | /* else give up */ | ||
426 | skip_rep = 1; | ||
427 | dprintk(" reject %d collide %d " | ||
428 | "ftotal %d flocal %d\n", | ||
429 | reject, collide, ftotal, | ||
430 | flocal); | ||
431 | } | ||
432 | } while (retry_bucket); | ||
433 | } while (retry_descent); | ||
434 | |||
435 | if (skip_rep) { | ||
436 | dprintk("skip rep\n"); | ||
437 | continue; | ||
438 | } | ||
439 | |||
440 | dprintk("CHOOSE got %d\n", item); | ||
441 | out[outpos] = item; | ||
442 | outpos++; | ||
443 | } | ||
444 | |||
445 | dprintk("CHOOSE returns %d\n", outpos); | ||
446 | return outpos; | ||
447 | } | ||
448 | |||
449 | |||
450 | /** | ||
451 | * crush_do_rule - calculate a mapping with the given input and rule | ||
452 | * @map: the crush_map | ||
453 | * @ruleno: the rule id | ||
454 | * @x: hash input | ||
455 | * @result: pointer to result vector | ||
456 | * @result_max: maximum result size | ||
457 | * @force: force initial replica choice; -1 for none | ||
458 | */ | ||
459 | int crush_do_rule(struct crush_map *map, | ||
460 | int ruleno, int x, int *result, int result_max, | ||
461 | int force, __u32 *weight) | ||
462 | { | ||
463 | int result_len; | ||
464 | int force_context[CRUSH_MAX_DEPTH]; | ||
465 | int force_pos = -1; | ||
466 | int a[CRUSH_MAX_SET]; | ||
467 | int b[CRUSH_MAX_SET]; | ||
468 | int c[CRUSH_MAX_SET]; | ||
469 | int recurse_to_leaf; | ||
470 | int *w; | ||
471 | int wsize = 0; | ||
472 | int *o; | ||
473 | int osize; | ||
474 | int *tmp; | ||
475 | struct crush_rule *rule; | ||
476 | int step; | ||
477 | int i, j; | ||
478 | int numrep; | ||
479 | int firstn; | ||
480 | int rc = -1; | ||
481 | |||
482 | BUG_ON(ruleno >= map->max_rules); | ||
483 | |||
484 | rule = map->rules[ruleno]; | ||
485 | result_len = 0; | ||
486 | w = a; | ||
487 | o = b; | ||
488 | |||
489 | /* | ||
490 | * determine hierarchical context of force, if any. note | ||
491 | * that this may or may not correspond to the specific types | ||
492 | * referenced by the crush rule. | ||
493 | */ | ||
494 | if (force >= 0) { | ||
495 | if (force >= map->max_devices || | ||
496 | map->device_parents[force] == 0) { | ||
497 | /*dprintk("CRUSH: forcefed device dne\n");*/ | ||
498 | rc = -1; /* force fed device dne */ | ||
499 | goto out; | ||
500 | } | ||
501 | if (!is_out(map, weight, force, x)) { | ||
502 | while (1) { | ||
503 | force_context[++force_pos] = force; | ||
504 | if (force >= 0) | ||
505 | force = map->device_parents[force]; | ||
506 | else | ||
507 | force = map->bucket_parents[-1-force]; | ||
508 | if (force == 0) | ||
509 | break; | ||
510 | } | ||
511 | } | ||
512 | } | ||
513 | |||
514 | for (step = 0; step < rule->len; step++) { | ||
515 | firstn = 0; | ||
516 | switch (rule->steps[step].op) { | ||
517 | case CRUSH_RULE_TAKE: | ||
518 | w[0] = rule->steps[step].arg1; | ||
519 | if (force_pos >= 0) { | ||
520 | BUG_ON(force_context[force_pos] != w[0]); | ||
521 | force_pos--; | ||
522 | } | ||
523 | wsize = 1; | ||
524 | break; | ||
525 | |||
526 | case CRUSH_RULE_CHOOSE_LEAF_FIRSTN: | ||
527 | case CRUSH_RULE_CHOOSE_FIRSTN: | ||
528 | firstn = 1; | ||
529 | case CRUSH_RULE_CHOOSE_LEAF_INDEP: | ||
530 | case CRUSH_RULE_CHOOSE_INDEP: | ||
531 | BUG_ON(wsize == 0); | ||
532 | |||
533 | recurse_to_leaf = | ||
534 | rule->steps[step].op == | ||
535 | CRUSH_RULE_CHOOSE_LEAF_FIRSTN || | ||
536 | rule->steps[step].op == | ||
537 | CRUSH_RULE_CHOOSE_LEAF_INDEP; | ||
538 | |||
539 | /* reset output */ | ||
540 | osize = 0; | ||
541 | |||
542 | for (i = 0; i < wsize; i++) { | ||
543 | /* | ||
544 | * see CRUSH_N, CRUSH_N_MINUS macros. | ||
545 | * basically, numrep <= 0 means relative to | ||
546 | * the provided result_max | ||
547 | */ | ||
548 | numrep = rule->steps[step].arg1; | ||
549 | if (numrep <= 0) { | ||
550 | numrep += result_max; | ||
551 | if (numrep <= 0) | ||
552 | continue; | ||
553 | } | ||
554 | j = 0; | ||
555 | if (osize == 0 && force_pos >= 0) { | ||
556 | /* skip any intermediate types */ | ||
557 | while (force_pos && | ||
558 | force_context[force_pos] < 0 && | ||
559 | rule->steps[step].arg2 != | ||
560 | map->buckets[-1 - | ||
561 | force_context[force_pos]]->type) | ||
562 | force_pos--; | ||
563 | o[osize] = force_context[force_pos]; | ||
564 | if (recurse_to_leaf) | ||
565 | c[osize] = force_context[0]; | ||
566 | j++; | ||
567 | force_pos--; | ||
568 | } | ||
569 | osize += crush_choose(map, | ||
570 | map->buckets[-1-w[i]], | ||
571 | weight, | ||
572 | x, numrep, | ||
573 | rule->steps[step].arg2, | ||
574 | o+osize, j, | ||
575 | firstn, | ||
576 | recurse_to_leaf, c+osize); | ||
577 | } | ||
578 | |||
579 | if (recurse_to_leaf) | ||
580 | /* copy final _leaf_ values to output set */ | ||
581 | memcpy(o, c, osize*sizeof(*o)); | ||
582 | |||
583 | /* swap t and w arrays */ | ||
584 | tmp = o; | ||
585 | o = w; | ||
586 | w = tmp; | ||
587 | wsize = osize; | ||
588 | break; | ||
589 | |||
590 | |||
591 | case CRUSH_RULE_EMIT: | ||
592 | for (i = 0; i < wsize && result_len < result_max; i++) { | ||
593 | result[result_len] = w[i]; | ||
594 | result_len++; | ||
595 | } | ||
596 | wsize = 0; | ||
597 | break; | ||
598 | |||
599 | default: | ||
600 | BUG_ON(1); | ||
601 | } | ||
602 | } | ||
603 | rc = result_len; | ||
604 | |||
605 | out: | ||
606 | return rc; | ||
607 | } | ||
608 | |||
609 | |||
diff --git a/fs/ceph/crush/mapper.h b/fs/ceph/crush/mapper.h deleted file mode 100644 index c46b99c18bb0..000000000000 --- a/fs/ceph/crush/mapper.h +++ /dev/null | |||
@@ -1,20 +0,0 @@ | |||
1 | #ifndef CEPH_CRUSH_MAPPER_H | ||
2 | #define CEPH_CRUSH_MAPPER_H | ||
3 | |||
4 | /* | ||
5 | * CRUSH functions for find rules and then mapping an input to an | ||
6 | * output set. | ||
7 | * | ||
8 | * LGPL2 | ||
9 | */ | ||
10 | |||
11 | #include "crush.h" | ||
12 | |||
13 | extern int crush_find_rule(struct crush_map *map, int pool, int type, int size); | ||
14 | extern int crush_do_rule(struct crush_map *map, | ||
15 | int ruleno, | ||
16 | int x, int *result, int result_max, | ||
17 | int forcefeed, /* -1 for none */ | ||
18 | __u32 *weights); | ||
19 | |||
20 | #endif | ||
diff --git a/fs/ceph/crypto.c b/fs/ceph/crypto.c deleted file mode 100644 index a3e627f63293..000000000000 --- a/fs/ceph/crypto.c +++ /dev/null | |||
@@ -1,412 +0,0 @@ | |||
1 | |||
2 | #include "ceph_debug.h" | ||
3 | |||
4 | #include <linux/err.h> | ||
5 | #include <linux/scatterlist.h> | ||
6 | #include <linux/slab.h> | ||
7 | #include <crypto/hash.h> | ||
8 | |||
9 | #include "crypto.h" | ||
10 | #include "decode.h" | ||
11 | |||
12 | int ceph_crypto_key_encode(struct ceph_crypto_key *key, void **p, void *end) | ||
13 | { | ||
14 | if (*p + sizeof(u16) + sizeof(key->created) + | ||
15 | sizeof(u16) + key->len > end) | ||
16 | return -ERANGE; | ||
17 | ceph_encode_16(p, key->type); | ||
18 | ceph_encode_copy(p, &key->created, sizeof(key->created)); | ||
19 | ceph_encode_16(p, key->len); | ||
20 | ceph_encode_copy(p, key->key, key->len); | ||
21 | return 0; | ||
22 | } | ||
23 | |||
24 | int ceph_crypto_key_decode(struct ceph_crypto_key *key, void **p, void *end) | ||
25 | { | ||
26 | ceph_decode_need(p, end, 2*sizeof(u16) + sizeof(key->created), bad); | ||
27 | key->type = ceph_decode_16(p); | ||
28 | ceph_decode_copy(p, &key->created, sizeof(key->created)); | ||
29 | key->len = ceph_decode_16(p); | ||
30 | ceph_decode_need(p, end, key->len, bad); | ||
31 | key->key = kmalloc(key->len, GFP_NOFS); | ||
32 | if (!key->key) | ||
33 | return -ENOMEM; | ||
34 | ceph_decode_copy(p, key->key, key->len); | ||
35 | return 0; | ||
36 | |||
37 | bad: | ||
38 | dout("failed to decode crypto key\n"); | ||
39 | return -EINVAL; | ||
40 | } | ||
41 | |||
42 | int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *inkey) | ||
43 | { | ||
44 | int inlen = strlen(inkey); | ||
45 | int blen = inlen * 3 / 4; | ||
46 | void *buf, *p; | ||
47 | int ret; | ||
48 | |||
49 | dout("crypto_key_unarmor %s\n", inkey); | ||
50 | buf = kmalloc(blen, GFP_NOFS); | ||
51 | if (!buf) | ||
52 | return -ENOMEM; | ||
53 | blen = ceph_unarmor(buf, inkey, inkey+inlen); | ||
54 | if (blen < 0) { | ||
55 | kfree(buf); | ||
56 | return blen; | ||
57 | } | ||
58 | |||
59 | p = buf; | ||
60 | ret = ceph_crypto_key_decode(key, &p, p + blen); | ||
61 | kfree(buf); | ||
62 | if (ret) | ||
63 | return ret; | ||
64 | dout("crypto_key_unarmor key %p type %d len %d\n", key, | ||
65 | key->type, key->len); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | |||
70 | |||
71 | #define AES_KEY_SIZE 16 | ||
72 | |||
73 | static struct crypto_blkcipher *ceph_crypto_alloc_cipher(void) | ||
74 | { | ||
75 | return crypto_alloc_blkcipher("cbc(aes)", 0, CRYPTO_ALG_ASYNC); | ||
76 | } | ||
77 | |||
78 | static const u8 *aes_iv = (u8 *)CEPH_AES_IV; | ||
79 | |||
80 | static int ceph_aes_encrypt(const void *key, int key_len, | ||
81 | void *dst, size_t *dst_len, | ||
82 | const void *src, size_t src_len) | ||
83 | { | ||
84 | struct scatterlist sg_in[2], sg_out[1]; | ||
85 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | ||
86 | struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; | ||
87 | int ret; | ||
88 | void *iv; | ||
89 | int ivsize; | ||
90 | size_t zero_padding = (0x10 - (src_len & 0x0f)); | ||
91 | char pad[16]; | ||
92 | |||
93 | if (IS_ERR(tfm)) | ||
94 | return PTR_ERR(tfm); | ||
95 | |||
96 | memset(pad, zero_padding, zero_padding); | ||
97 | |||
98 | *dst_len = src_len + zero_padding; | ||
99 | |||
100 | crypto_blkcipher_setkey((void *)tfm, key, key_len); | ||
101 | sg_init_table(sg_in, 2); | ||
102 | sg_set_buf(&sg_in[0], src, src_len); | ||
103 | sg_set_buf(&sg_in[1], pad, zero_padding); | ||
104 | sg_init_table(sg_out, 1); | ||
105 | sg_set_buf(sg_out, dst, *dst_len); | ||
106 | iv = crypto_blkcipher_crt(tfm)->iv; | ||
107 | ivsize = crypto_blkcipher_ivsize(tfm); | ||
108 | |||
109 | memcpy(iv, aes_iv, ivsize); | ||
110 | /* | ||
111 | print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, | ||
112 | key, key_len, 1); | ||
113 | print_hex_dump(KERN_ERR, "enc src: ", DUMP_PREFIX_NONE, 16, 1, | ||
114 | src, src_len, 1); | ||
115 | print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, | ||
116 | pad, zero_padding, 1); | ||
117 | */ | ||
118 | ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, | ||
119 | src_len + zero_padding); | ||
120 | crypto_free_blkcipher(tfm); | ||
121 | if (ret < 0) | ||
122 | pr_err("ceph_aes_crypt failed %d\n", ret); | ||
123 | /* | ||
124 | print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, | ||
125 | dst, *dst_len, 1); | ||
126 | */ | ||
127 | return 0; | ||
128 | } | ||
129 | |||
130 | static int ceph_aes_encrypt2(const void *key, int key_len, void *dst, | ||
131 | size_t *dst_len, | ||
132 | const void *src1, size_t src1_len, | ||
133 | const void *src2, size_t src2_len) | ||
134 | { | ||
135 | struct scatterlist sg_in[3], sg_out[1]; | ||
136 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | ||
137 | struct blkcipher_desc desc = { .tfm = tfm, .flags = 0 }; | ||
138 | int ret; | ||
139 | void *iv; | ||
140 | int ivsize; | ||
141 | size_t zero_padding = (0x10 - ((src1_len + src2_len) & 0x0f)); | ||
142 | char pad[16]; | ||
143 | |||
144 | if (IS_ERR(tfm)) | ||
145 | return PTR_ERR(tfm); | ||
146 | |||
147 | memset(pad, zero_padding, zero_padding); | ||
148 | |||
149 | *dst_len = src1_len + src2_len + zero_padding; | ||
150 | |||
151 | crypto_blkcipher_setkey((void *)tfm, key, key_len); | ||
152 | sg_init_table(sg_in, 3); | ||
153 | sg_set_buf(&sg_in[0], src1, src1_len); | ||
154 | sg_set_buf(&sg_in[1], src2, src2_len); | ||
155 | sg_set_buf(&sg_in[2], pad, zero_padding); | ||
156 | sg_init_table(sg_out, 1); | ||
157 | sg_set_buf(sg_out, dst, *dst_len); | ||
158 | iv = crypto_blkcipher_crt(tfm)->iv; | ||
159 | ivsize = crypto_blkcipher_ivsize(tfm); | ||
160 | |||
161 | memcpy(iv, aes_iv, ivsize); | ||
162 | /* | ||
163 | print_hex_dump(KERN_ERR, "enc key: ", DUMP_PREFIX_NONE, 16, 1, | ||
164 | key, key_len, 1); | ||
165 | print_hex_dump(KERN_ERR, "enc src1: ", DUMP_PREFIX_NONE, 16, 1, | ||
166 | src1, src1_len, 1); | ||
167 | print_hex_dump(KERN_ERR, "enc src2: ", DUMP_PREFIX_NONE, 16, 1, | ||
168 | src2, src2_len, 1); | ||
169 | print_hex_dump(KERN_ERR, "enc pad: ", DUMP_PREFIX_NONE, 16, 1, | ||
170 | pad, zero_padding, 1); | ||
171 | */ | ||
172 | ret = crypto_blkcipher_encrypt(&desc, sg_out, sg_in, | ||
173 | src1_len + src2_len + zero_padding); | ||
174 | crypto_free_blkcipher(tfm); | ||
175 | if (ret < 0) | ||
176 | pr_err("ceph_aes_crypt2 failed %d\n", ret); | ||
177 | /* | ||
178 | print_hex_dump(KERN_ERR, "enc out: ", DUMP_PREFIX_NONE, 16, 1, | ||
179 | dst, *dst_len, 1); | ||
180 | */ | ||
181 | return 0; | ||
182 | } | ||
183 | |||
184 | static int ceph_aes_decrypt(const void *key, int key_len, | ||
185 | void *dst, size_t *dst_len, | ||
186 | const void *src, size_t src_len) | ||
187 | { | ||
188 | struct scatterlist sg_in[1], sg_out[2]; | ||
189 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | ||
190 | struct blkcipher_desc desc = { .tfm = tfm }; | ||
191 | char pad[16]; | ||
192 | void *iv; | ||
193 | int ivsize; | ||
194 | int ret; | ||
195 | int last_byte; | ||
196 | |||
197 | if (IS_ERR(tfm)) | ||
198 | return PTR_ERR(tfm); | ||
199 | |||
200 | crypto_blkcipher_setkey((void *)tfm, key, key_len); | ||
201 | sg_init_table(sg_in, 1); | ||
202 | sg_init_table(sg_out, 2); | ||
203 | sg_set_buf(sg_in, src, src_len); | ||
204 | sg_set_buf(&sg_out[0], dst, *dst_len); | ||
205 | sg_set_buf(&sg_out[1], pad, sizeof(pad)); | ||
206 | |||
207 | iv = crypto_blkcipher_crt(tfm)->iv; | ||
208 | ivsize = crypto_blkcipher_ivsize(tfm); | ||
209 | |||
210 | memcpy(iv, aes_iv, ivsize); | ||
211 | |||
212 | /* | ||
213 | print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, | ||
214 | key, key_len, 1); | ||
215 | print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, | ||
216 | src, src_len, 1); | ||
217 | */ | ||
218 | |||
219 | ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); | ||
220 | crypto_free_blkcipher(tfm); | ||
221 | if (ret < 0) { | ||
222 | pr_err("ceph_aes_decrypt failed %d\n", ret); | ||
223 | return ret; | ||
224 | } | ||
225 | |||
226 | if (src_len <= *dst_len) | ||
227 | last_byte = ((char *)dst)[src_len - 1]; | ||
228 | else | ||
229 | last_byte = pad[src_len - *dst_len - 1]; | ||
230 | if (last_byte <= 16 && src_len >= last_byte) { | ||
231 | *dst_len = src_len - last_byte; | ||
232 | } else { | ||
233 | pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", | ||
234 | last_byte, (int)src_len); | ||
235 | return -EPERM; /* bad padding */ | ||
236 | } | ||
237 | /* | ||
238 | print_hex_dump(KERN_ERR, "dec out: ", DUMP_PREFIX_NONE, 16, 1, | ||
239 | dst, *dst_len, 1); | ||
240 | */ | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static int ceph_aes_decrypt2(const void *key, int key_len, | ||
245 | void *dst1, size_t *dst1_len, | ||
246 | void *dst2, size_t *dst2_len, | ||
247 | const void *src, size_t src_len) | ||
248 | { | ||
249 | struct scatterlist sg_in[1], sg_out[3]; | ||
250 | struct crypto_blkcipher *tfm = ceph_crypto_alloc_cipher(); | ||
251 | struct blkcipher_desc desc = { .tfm = tfm }; | ||
252 | char pad[16]; | ||
253 | void *iv; | ||
254 | int ivsize; | ||
255 | int ret; | ||
256 | int last_byte; | ||
257 | |||
258 | if (IS_ERR(tfm)) | ||
259 | return PTR_ERR(tfm); | ||
260 | |||
261 | sg_init_table(sg_in, 1); | ||
262 | sg_set_buf(sg_in, src, src_len); | ||
263 | sg_init_table(sg_out, 3); | ||
264 | sg_set_buf(&sg_out[0], dst1, *dst1_len); | ||
265 | sg_set_buf(&sg_out[1], dst2, *dst2_len); | ||
266 | sg_set_buf(&sg_out[2], pad, sizeof(pad)); | ||
267 | |||
268 | crypto_blkcipher_setkey((void *)tfm, key, key_len); | ||
269 | iv = crypto_blkcipher_crt(tfm)->iv; | ||
270 | ivsize = crypto_blkcipher_ivsize(tfm); | ||
271 | |||
272 | memcpy(iv, aes_iv, ivsize); | ||
273 | |||
274 | /* | ||
275 | print_hex_dump(KERN_ERR, "dec key: ", DUMP_PREFIX_NONE, 16, 1, | ||
276 | key, key_len, 1); | ||
277 | print_hex_dump(KERN_ERR, "dec in: ", DUMP_PREFIX_NONE, 16, 1, | ||
278 | src, src_len, 1); | ||
279 | */ | ||
280 | |||
281 | ret = crypto_blkcipher_decrypt(&desc, sg_out, sg_in, src_len); | ||
282 | crypto_free_blkcipher(tfm); | ||
283 | if (ret < 0) { | ||
284 | pr_err("ceph_aes_decrypt failed %d\n", ret); | ||
285 | return ret; | ||
286 | } | ||
287 | |||
288 | if (src_len <= *dst1_len) | ||
289 | last_byte = ((char *)dst1)[src_len - 1]; | ||
290 | else if (src_len <= *dst1_len + *dst2_len) | ||
291 | last_byte = ((char *)dst2)[src_len - *dst1_len - 1]; | ||
292 | else | ||
293 | last_byte = pad[src_len - *dst1_len - *dst2_len - 1]; | ||
294 | if (last_byte <= 16 && src_len >= last_byte) { | ||
295 | src_len -= last_byte; | ||
296 | } else { | ||
297 | pr_err("ceph_aes_decrypt got bad padding %d on src len %d\n", | ||
298 | last_byte, (int)src_len); | ||
299 | return -EPERM; /* bad padding */ | ||
300 | } | ||
301 | |||
302 | if (src_len < *dst1_len) { | ||
303 | *dst1_len = src_len; | ||
304 | *dst2_len = 0; | ||
305 | } else { | ||
306 | *dst2_len = src_len - *dst1_len; | ||
307 | } | ||
308 | /* | ||
309 | print_hex_dump(KERN_ERR, "dec out1: ", DUMP_PREFIX_NONE, 16, 1, | ||
310 | dst1, *dst1_len, 1); | ||
311 | print_hex_dump(KERN_ERR, "dec out2: ", DUMP_PREFIX_NONE, 16, 1, | ||
312 | dst2, *dst2_len, 1); | ||
313 | */ | ||
314 | |||
315 | return 0; | ||
316 | } | ||
317 | |||
318 | |||
319 | int ceph_decrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, | ||
320 | const void *src, size_t src_len) | ||
321 | { | ||
322 | switch (secret->type) { | ||
323 | case CEPH_CRYPTO_NONE: | ||
324 | if (*dst_len < src_len) | ||
325 | return -ERANGE; | ||
326 | memcpy(dst, src, src_len); | ||
327 | *dst_len = src_len; | ||
328 | return 0; | ||
329 | |||
330 | case CEPH_CRYPTO_AES: | ||
331 | return ceph_aes_decrypt(secret->key, secret->len, dst, | ||
332 | dst_len, src, src_len); | ||
333 | |||
334 | default: | ||
335 | return -EINVAL; | ||
336 | } | ||
337 | } | ||
338 | |||
339 | int ceph_decrypt2(struct ceph_crypto_key *secret, | ||
340 | void *dst1, size_t *dst1_len, | ||
341 | void *dst2, size_t *dst2_len, | ||
342 | const void *src, size_t src_len) | ||
343 | { | ||
344 | size_t t; | ||
345 | |||
346 | switch (secret->type) { | ||
347 | case CEPH_CRYPTO_NONE: | ||
348 | if (*dst1_len + *dst2_len < src_len) | ||
349 | return -ERANGE; | ||
350 | t = min(*dst1_len, src_len); | ||
351 | memcpy(dst1, src, t); | ||
352 | *dst1_len = t; | ||
353 | src += t; | ||
354 | src_len -= t; | ||
355 | if (src_len) { | ||
356 | t = min(*dst2_len, src_len); | ||
357 | memcpy(dst2, src, t); | ||
358 | *dst2_len = t; | ||
359 | } | ||
360 | return 0; | ||
361 | |||
362 | case CEPH_CRYPTO_AES: | ||
363 | return ceph_aes_decrypt2(secret->key, secret->len, | ||
364 | dst1, dst1_len, dst2, dst2_len, | ||
365 | src, src_len); | ||
366 | |||
367 | default: | ||
368 | return -EINVAL; | ||
369 | } | ||
370 | } | ||
371 | |||
372 | int ceph_encrypt(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, | ||
373 | const void *src, size_t src_len) | ||
374 | { | ||
375 | switch (secret->type) { | ||
376 | case CEPH_CRYPTO_NONE: | ||
377 | if (*dst_len < src_len) | ||
378 | return -ERANGE; | ||
379 | memcpy(dst, src, src_len); | ||
380 | *dst_len = src_len; | ||
381 | return 0; | ||
382 | |||
383 | case CEPH_CRYPTO_AES: | ||
384 | return ceph_aes_encrypt(secret->key, secret->len, dst, | ||
385 | dst_len, src, src_len); | ||
386 | |||
387 | default: | ||
388 | return -EINVAL; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | int ceph_encrypt2(struct ceph_crypto_key *secret, void *dst, size_t *dst_len, | ||
393 | const void *src1, size_t src1_len, | ||
394 | const void *src2, size_t src2_len) | ||
395 | { | ||
396 | switch (secret->type) { | ||
397 | case CEPH_CRYPTO_NONE: | ||
398 | if (*dst_len < src1_len + src2_len) | ||
399 | return -ERANGE; | ||
400 | memcpy(dst, src1, src1_len); | ||
401 | memcpy(dst + src1_len, src2, src2_len); | ||
402 | *dst_len = src1_len + src2_len; | ||
403 | return 0; | ||
404 | |||
405 | case CEPH_CRYPTO_AES: | ||
406 | return ceph_aes_encrypt2(secret->key, secret->len, dst, dst_len, | ||
407 | src1, src1_len, src2, src2_len); | ||
408 | |||
409 | default: | ||
410 | return -EINVAL; | ||
411 | } | ||
412 | } | ||
diff --git a/fs/ceph/crypto.h b/fs/ceph/crypto.h deleted file mode 100644 index bdf38607323c..000000000000 --- a/fs/ceph/crypto.h +++ /dev/null | |||
@@ -1,48 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_CRYPTO_H | ||
2 | #define _FS_CEPH_CRYPTO_H | ||
3 | |||
4 | #include "types.h" | ||
5 | #include "buffer.h" | ||
6 | |||
7 | /* | ||
8 | * cryptographic secret | ||
9 | */ | ||
10 | struct ceph_crypto_key { | ||
11 | int type; | ||
12 | struct ceph_timespec created; | ||
13 | int len; | ||
14 | void *key; | ||
15 | }; | ||
16 | |||
17 | static inline void ceph_crypto_key_destroy(struct ceph_crypto_key *key) | ||
18 | { | ||
19 | kfree(key->key); | ||
20 | } | ||
21 | |||
22 | extern int ceph_crypto_key_encode(struct ceph_crypto_key *key, | ||
23 | void **p, void *end); | ||
24 | extern int ceph_crypto_key_decode(struct ceph_crypto_key *key, | ||
25 | void **p, void *end); | ||
26 | extern int ceph_crypto_key_unarmor(struct ceph_crypto_key *key, const char *in); | ||
27 | |||
28 | /* crypto.c */ | ||
29 | extern int ceph_decrypt(struct ceph_crypto_key *secret, | ||
30 | void *dst, size_t *dst_len, | ||
31 | const void *src, size_t src_len); | ||
32 | extern int ceph_encrypt(struct ceph_crypto_key *secret, | ||
33 | void *dst, size_t *dst_len, | ||
34 | const void *src, size_t src_len); | ||
35 | extern int ceph_decrypt2(struct ceph_crypto_key *secret, | ||
36 | void *dst1, size_t *dst1_len, | ||
37 | void *dst2, size_t *dst2_len, | ||
38 | const void *src, size_t src_len); | ||
39 | extern int ceph_encrypt2(struct ceph_crypto_key *secret, | ||
40 | void *dst, size_t *dst_len, | ||
41 | const void *src1, size_t src1_len, | ||
42 | const void *src2, size_t src2_len); | ||
43 | |||
44 | /* armor.c */ | ||
45 | extern int ceph_armor(char *dst, const char *src, const char *end); | ||
46 | extern int ceph_unarmor(char *dst, const char *src, const char *end); | ||
47 | |||
48 | #endif | ||
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6fd8b20a8611..7ae1b3d55b58 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/device.h> | 3 | #include <linux/device.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
@@ -7,143 +7,49 @@ | |||
7 | #include <linux/debugfs.h> | 7 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 8 | #include <linux/seq_file.h> |
9 | 9 | ||
10 | #include <linux/ceph/libceph.h> | ||
11 | #include <linux/ceph/mon_client.h> | ||
12 | #include <linux/ceph/auth.h> | ||
13 | #include <linux/ceph/debugfs.h> | ||
14 | |||
10 | #include "super.h" | 15 | #include "super.h" |
11 | #include "mds_client.h" | ||
12 | #include "mon_client.h" | ||
13 | #include "auth.h" | ||
14 | 16 | ||
15 | #ifdef CONFIG_DEBUG_FS | 17 | #ifdef CONFIG_DEBUG_FS |
16 | 18 | ||
17 | /* | 19 | #include "mds_client.h" |
18 | * Implement /sys/kernel/debug/ceph fun | ||
19 | * | ||
20 | * /sys/kernel/debug/ceph/client* - an instance of the ceph client | ||
21 | * .../osdmap - current osdmap | ||
22 | * .../mdsmap - current mdsmap | ||
23 | * .../monmap - current monmap | ||
24 | * .../osdc - active osd requests | ||
25 | * .../mdsc - active mds requests | ||
26 | * .../monc - mon client state | ||
27 | * .../dentry_lru - dump contents of dentry lru | ||
28 | * .../caps - expose cap (reservation) stats | ||
29 | * .../bdi - symlink to ../../bdi/something | ||
30 | */ | ||
31 | |||
32 | static struct dentry *ceph_debugfs_dir; | ||
33 | |||
34 | static int monmap_show(struct seq_file *s, void *p) | ||
35 | { | ||
36 | int i; | ||
37 | struct ceph_client *client = s->private; | ||
38 | |||
39 | if (client->monc.monmap == NULL) | ||
40 | return 0; | ||
41 | |||
42 | seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); | ||
43 | for (i = 0; i < client->monc.monmap->num_mon; i++) { | ||
44 | struct ceph_entity_inst *inst = | ||
45 | &client->monc.monmap->mon_inst[i]; | ||
46 | |||
47 | seq_printf(s, "\t%s%lld\t%s\n", | ||
48 | ENTITY_NAME(inst->name), | ||
49 | pr_addr(&inst->addr.in_addr)); | ||
50 | } | ||
51 | return 0; | ||
52 | } | ||
53 | 20 | ||
54 | static int mdsmap_show(struct seq_file *s, void *p) | 21 | static int mdsmap_show(struct seq_file *s, void *p) |
55 | { | 22 | { |
56 | int i; | 23 | int i; |
57 | struct ceph_client *client = s->private; | 24 | struct ceph_fs_client *fsc = s->private; |
58 | 25 | ||
59 | if (client->mdsc.mdsmap == NULL) | 26 | if (fsc->mdsc == NULL || fsc->mdsc->mdsmap == NULL) |
60 | return 0; | 27 | return 0; |
61 | seq_printf(s, "epoch %d\n", client->mdsc.mdsmap->m_epoch); | 28 | seq_printf(s, "epoch %d\n", fsc->mdsc->mdsmap->m_epoch); |
62 | seq_printf(s, "root %d\n", client->mdsc.mdsmap->m_root); | 29 | seq_printf(s, "root %d\n", fsc->mdsc->mdsmap->m_root); |
63 | seq_printf(s, "session_timeout %d\n", | 30 | seq_printf(s, "session_timeout %d\n", |
64 | client->mdsc.mdsmap->m_session_timeout); | 31 | fsc->mdsc->mdsmap->m_session_timeout); |
65 | seq_printf(s, "session_autoclose %d\n", | 32 | seq_printf(s, "session_autoclose %d\n", |
66 | client->mdsc.mdsmap->m_session_autoclose); | 33 | fsc->mdsc->mdsmap->m_session_autoclose); |
67 | for (i = 0; i < client->mdsc.mdsmap->m_max_mds; i++) { | 34 | for (i = 0; i < fsc->mdsc->mdsmap->m_max_mds; i++) { |
68 | struct ceph_entity_addr *addr = | 35 | struct ceph_entity_addr *addr = |
69 | &client->mdsc.mdsmap->m_info[i].addr; | 36 | &fsc->mdsc->mdsmap->m_info[i].addr; |
70 | int state = client->mdsc.mdsmap->m_info[i].state; | 37 | int state = fsc->mdsc->mdsmap->m_info[i].state; |
71 | 38 | ||
72 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, pr_addr(&addr->in_addr), | 39 | seq_printf(s, "\tmds%d\t%s\t(%s)\n", i, |
40 | ceph_pr_addr(&addr->in_addr), | ||
73 | ceph_mds_state_name(state)); | 41 | ceph_mds_state_name(state)); |
74 | } | 42 | } |
75 | return 0; | 43 | return 0; |
76 | } | 44 | } |
77 | 45 | ||
78 | static int osdmap_show(struct seq_file *s, void *p) | 46 | /* |
79 | { | 47 | * mdsc debugfs |
80 | int i; | 48 | */ |
81 | struct ceph_client *client = s->private; | ||
82 | struct rb_node *n; | ||
83 | |||
84 | if (client->osdc.osdmap == NULL) | ||
85 | return 0; | ||
86 | seq_printf(s, "epoch %d\n", client->osdc.osdmap->epoch); | ||
87 | seq_printf(s, "flags%s%s\n", | ||
88 | (client->osdc.osdmap->flags & CEPH_OSDMAP_NEARFULL) ? | ||
89 | " NEARFULL" : "", | ||
90 | (client->osdc.osdmap->flags & CEPH_OSDMAP_FULL) ? | ||
91 | " FULL" : ""); | ||
92 | for (n = rb_first(&client->osdc.osdmap->pg_pools); n; n = rb_next(n)) { | ||
93 | struct ceph_pg_pool_info *pool = | ||
94 | rb_entry(n, struct ceph_pg_pool_info, node); | ||
95 | seq_printf(s, "pg_pool %d pg_num %d / %d, lpg_num %d / %d\n", | ||
96 | pool->id, pool->v.pg_num, pool->pg_num_mask, | ||
97 | pool->v.lpg_num, pool->lpg_num_mask); | ||
98 | } | ||
99 | for (i = 0; i < client->osdc.osdmap->max_osd; i++) { | ||
100 | struct ceph_entity_addr *addr = | ||
101 | &client->osdc.osdmap->osd_addr[i]; | ||
102 | int state = client->osdc.osdmap->osd_state[i]; | ||
103 | char sb[64]; | ||
104 | |||
105 | seq_printf(s, "\tosd%d\t%s\t%3d%%\t(%s)\n", | ||
106 | i, pr_addr(&addr->in_addr), | ||
107 | ((client->osdc.osdmap->osd_weight[i]*100) >> 16), | ||
108 | ceph_osdmap_state_str(sb, sizeof(sb), state)); | ||
109 | } | ||
110 | return 0; | ||
111 | } | ||
112 | |||
113 | static int monc_show(struct seq_file *s, void *p) | ||
114 | { | ||
115 | struct ceph_client *client = s->private; | ||
116 | struct ceph_mon_generic_request *req; | ||
117 | struct ceph_mon_client *monc = &client->monc; | ||
118 | struct rb_node *rp; | ||
119 | |||
120 | mutex_lock(&monc->mutex); | ||
121 | |||
122 | if (monc->have_mdsmap) | ||
123 | seq_printf(s, "have mdsmap %u\n", (unsigned)monc->have_mdsmap); | ||
124 | if (monc->have_osdmap) | ||
125 | seq_printf(s, "have osdmap %u\n", (unsigned)monc->have_osdmap); | ||
126 | if (monc->want_next_osdmap) | ||
127 | seq_printf(s, "want next osdmap\n"); | ||
128 | |||
129 | for (rp = rb_first(&monc->generic_request_tree); rp; rp = rb_next(rp)) { | ||
130 | __u16 op; | ||
131 | req = rb_entry(rp, struct ceph_mon_generic_request, node); | ||
132 | op = le16_to_cpu(req->request->hdr.type); | ||
133 | if (op == CEPH_MSG_STATFS) | ||
134 | seq_printf(s, "%lld statfs\n", req->tid); | ||
135 | else | ||
136 | seq_printf(s, "%lld unknown\n", req->tid); | ||
137 | } | ||
138 | |||
139 | mutex_unlock(&monc->mutex); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int mdsc_show(struct seq_file *s, void *p) | 49 | static int mdsc_show(struct seq_file *s, void *p) |
144 | { | 50 | { |
145 | struct ceph_client *client = s->private; | 51 | struct ceph_fs_client *fsc = s->private; |
146 | struct ceph_mds_client *mdsc = &client->mdsc; | 52 | struct ceph_mds_client *mdsc = fsc->mdsc; |
147 | struct ceph_mds_request *req; | 53 | struct ceph_mds_request *req; |
148 | struct rb_node *rp; | 54 | struct rb_node *rp; |
149 | int pathlen; | 55 | int pathlen; |
@@ -214,61 +120,12 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
214 | return 0; | 120 | return 0; |
215 | } | 121 | } |
216 | 122 | ||
217 | static int osdc_show(struct seq_file *s, void *pp) | ||
218 | { | ||
219 | struct ceph_client *client = s->private; | ||
220 | struct ceph_osd_client *osdc = &client->osdc; | ||
221 | struct rb_node *p; | ||
222 | |||
223 | mutex_lock(&osdc->request_mutex); | ||
224 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
225 | struct ceph_osd_request *req; | ||
226 | struct ceph_osd_request_head *head; | ||
227 | struct ceph_osd_op *op; | ||
228 | int num_ops; | ||
229 | int opcode, olen; | ||
230 | int i; | ||
231 | |||
232 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
233 | |||
234 | seq_printf(s, "%lld\tosd%d\t%d.%x\t", req->r_tid, | ||
235 | req->r_osd ? req->r_osd->o_osd : -1, | ||
236 | le32_to_cpu(req->r_pgid.pool), | ||
237 | le16_to_cpu(req->r_pgid.ps)); | ||
238 | |||
239 | head = req->r_request->front.iov_base; | ||
240 | op = (void *)(head + 1); | ||
241 | |||
242 | num_ops = le16_to_cpu(head->num_ops); | ||
243 | olen = le32_to_cpu(head->object_len); | ||
244 | seq_printf(s, "%.*s", olen, | ||
245 | (const char *)(head->ops + num_ops)); | ||
246 | |||
247 | if (req->r_reassert_version.epoch) | ||
248 | seq_printf(s, "\t%u'%llu", | ||
249 | (unsigned)le32_to_cpu(req->r_reassert_version.epoch), | ||
250 | le64_to_cpu(req->r_reassert_version.version)); | ||
251 | else | ||
252 | seq_printf(s, "\t"); | ||
253 | |||
254 | for (i = 0; i < num_ops; i++) { | ||
255 | opcode = le16_to_cpu(op->op); | ||
256 | seq_printf(s, "\t%s", ceph_osd_op_name(opcode)); | ||
257 | op++; | ||
258 | } | ||
259 | |||
260 | seq_printf(s, "\n"); | ||
261 | } | ||
262 | mutex_unlock(&osdc->request_mutex); | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static int caps_show(struct seq_file *s, void *p) | 123 | static int caps_show(struct seq_file *s, void *p) |
267 | { | 124 | { |
268 | struct ceph_client *client = s->private; | 125 | struct ceph_fs_client *fsc = s->private; |
269 | int total, avail, used, reserved, min; | 126 | int total, avail, used, reserved, min; |
270 | 127 | ||
271 | ceph_reservation_status(client, &total, &avail, &used, &reserved, &min); | 128 | ceph_reservation_status(fsc, &total, &avail, &used, &reserved, &min); |
272 | seq_printf(s, "total\t\t%d\n" | 129 | seq_printf(s, "total\t\t%d\n" |
273 | "avail\t\t%d\n" | 130 | "avail\t\t%d\n" |
274 | "used\t\t%d\n" | 131 | "used\t\t%d\n" |
@@ -280,8 +137,8 @@ static int caps_show(struct seq_file *s, void *p) | |||
280 | 137 | ||
281 | static int dentry_lru_show(struct seq_file *s, void *ptr) | 138 | static int dentry_lru_show(struct seq_file *s, void *ptr) |
282 | { | 139 | { |
283 | struct ceph_client *client = s->private; | 140 | struct ceph_fs_client *fsc = s->private; |
284 | struct ceph_mds_client *mdsc = &client->mdsc; | 141 | struct ceph_mds_client *mdsc = fsc->mdsc; |
285 | struct ceph_dentry_info *di; | 142 | struct ceph_dentry_info *di; |
286 | 143 | ||
287 | spin_lock(&mdsc->dentry_lru_lock); | 144 | spin_lock(&mdsc->dentry_lru_lock); |
@@ -295,199 +152,124 @@ static int dentry_lru_show(struct seq_file *s, void *ptr) | |||
295 | return 0; | 152 | return 0; |
296 | } | 153 | } |
297 | 154 | ||
298 | #define DEFINE_SHOW_FUNC(name) \ | 155 | CEPH_DEFINE_SHOW_FUNC(mdsmap_show) |
299 | static int name##_open(struct inode *inode, struct file *file) \ | 156 | CEPH_DEFINE_SHOW_FUNC(mdsc_show) |
300 | { \ | 157 | CEPH_DEFINE_SHOW_FUNC(caps_show) |
301 | struct seq_file *sf; \ | 158 | CEPH_DEFINE_SHOW_FUNC(dentry_lru_show) |
302 | int ret; \ | 159 | |
303 | \ | ||
304 | ret = single_open(file, name, NULL); \ | ||
305 | sf = file->private_data; \ | ||
306 | sf->private = inode->i_private; \ | ||
307 | return ret; \ | ||
308 | } \ | ||
309 | \ | ||
310 | static const struct file_operations name##_fops = { \ | ||
311 | .open = name##_open, \ | ||
312 | .read = seq_read, \ | ||
313 | .llseek = seq_lseek, \ | ||
314 | .release = single_release, \ | ||
315 | }; | ||
316 | |||
317 | DEFINE_SHOW_FUNC(monmap_show) | ||
318 | DEFINE_SHOW_FUNC(mdsmap_show) | ||
319 | DEFINE_SHOW_FUNC(osdmap_show) | ||
320 | DEFINE_SHOW_FUNC(monc_show) | ||
321 | DEFINE_SHOW_FUNC(mdsc_show) | ||
322 | DEFINE_SHOW_FUNC(osdc_show) | ||
323 | DEFINE_SHOW_FUNC(dentry_lru_show) | ||
324 | DEFINE_SHOW_FUNC(caps_show) | ||
325 | 160 | ||
161 | /* | ||
162 | * debugfs | ||
163 | */ | ||
326 | static int congestion_kb_set(void *data, u64 val) | 164 | static int congestion_kb_set(void *data, u64 val) |
327 | { | 165 | { |
328 | struct ceph_client *client = (struct ceph_client *)data; | 166 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
329 | |||
330 | if (client) | ||
331 | client->mount_args->congestion_kb = (int)val; | ||
332 | 167 | ||
168 | fsc->mount_options->congestion_kb = (int)val; | ||
333 | return 0; | 169 | return 0; |
334 | } | 170 | } |
335 | 171 | ||
336 | static int congestion_kb_get(void *data, u64 *val) | 172 | static int congestion_kb_get(void *data, u64 *val) |
337 | { | 173 | { |
338 | struct ceph_client *client = (struct ceph_client *)data; | 174 | struct ceph_fs_client *fsc = (struct ceph_fs_client *)data; |
339 | |||
340 | if (client) | ||
341 | *val = (u64)client->mount_args->congestion_kb; | ||
342 | 175 | ||
176 | *val = (u64)fsc->mount_options->congestion_kb; | ||
343 | return 0; | 177 | return 0; |
344 | } | 178 | } |
345 | 179 | ||
346 | |||
347 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, | 180 | DEFINE_SIMPLE_ATTRIBUTE(congestion_kb_fops, congestion_kb_get, |
348 | congestion_kb_set, "%llu\n"); | 181 | congestion_kb_set, "%llu\n"); |
349 | 182 | ||
350 | int __init ceph_debugfs_init(void) | ||
351 | { | ||
352 | ceph_debugfs_dir = debugfs_create_dir("ceph", NULL); | ||
353 | if (!ceph_debugfs_dir) | ||
354 | return -ENOMEM; | ||
355 | return 0; | ||
356 | } | ||
357 | 183 | ||
358 | void ceph_debugfs_cleanup(void) | 184 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
359 | { | 185 | { |
360 | debugfs_remove(ceph_debugfs_dir); | 186 | dout("ceph_fs_debugfs_cleanup\n"); |
187 | debugfs_remove(fsc->debugfs_bdi); | ||
188 | debugfs_remove(fsc->debugfs_congestion_kb); | ||
189 | debugfs_remove(fsc->debugfs_mdsmap); | ||
190 | debugfs_remove(fsc->debugfs_caps); | ||
191 | debugfs_remove(fsc->debugfs_mdsc); | ||
192 | debugfs_remove(fsc->debugfs_dentry_lru); | ||
361 | } | 193 | } |
362 | 194 | ||
363 | int ceph_debugfs_client_init(struct ceph_client *client) | 195 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
364 | { | 196 | { |
365 | int ret = 0; | 197 | char name[100]; |
366 | char name[80]; | 198 | int err = -ENOMEM; |
367 | |||
368 | snprintf(name, sizeof(name), "%pU.client%lld", &client->fsid, | ||
369 | client->monc.auth->global_id); | ||
370 | 199 | ||
371 | client->debugfs_dir = debugfs_create_dir(name, ceph_debugfs_dir); | 200 | dout("ceph_fs_debugfs_init\n"); |
372 | if (!client->debugfs_dir) | 201 | fsc->debugfs_congestion_kb = |
373 | goto out; | 202 | debugfs_create_file("writeback_congestion_kb", |
374 | 203 | 0600, | |
375 | client->monc.debugfs_file = debugfs_create_file("monc", | 204 | fsc->client->debugfs_dir, |
376 | 0600, | 205 | fsc, |
377 | client->debugfs_dir, | 206 | &congestion_kb_fops); |
378 | client, | 207 | if (!fsc->debugfs_congestion_kb) |
379 | &monc_show_fops); | ||
380 | if (!client->monc.debugfs_file) | ||
381 | goto out; | 208 | goto out; |
382 | 209 | ||
383 | client->mdsc.debugfs_file = debugfs_create_file("mdsc", | 210 | dout("a\n"); |
384 | 0600, | ||
385 | client->debugfs_dir, | ||
386 | client, | ||
387 | &mdsc_show_fops); | ||
388 | if (!client->mdsc.debugfs_file) | ||
389 | goto out; | ||
390 | 211 | ||
391 | client->osdc.debugfs_file = debugfs_create_file("osdc", | 212 | snprintf(name, sizeof(name), "../../bdi/%s", |
392 | 0600, | 213 | dev_name(fsc->backing_dev_info.dev)); |
393 | client->debugfs_dir, | 214 | fsc->debugfs_bdi = |
394 | client, | 215 | debugfs_create_symlink("bdi", |
395 | &osdc_show_fops); | 216 | fsc->client->debugfs_dir, |
396 | if (!client->osdc.debugfs_file) | 217 | name); |
218 | if (!fsc->debugfs_bdi) | ||
397 | goto out; | 219 | goto out; |
398 | 220 | ||
399 | client->debugfs_monmap = debugfs_create_file("monmap", | 221 | dout("b\n"); |
222 | fsc->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
400 | 0600, | 223 | 0600, |
401 | client->debugfs_dir, | 224 | fsc->client->debugfs_dir, |
402 | client, | 225 | fsc, |
403 | &monmap_show_fops); | ||
404 | if (!client->debugfs_monmap) | ||
405 | goto out; | ||
406 | |||
407 | client->debugfs_mdsmap = debugfs_create_file("mdsmap", | ||
408 | 0600, | ||
409 | client->debugfs_dir, | ||
410 | client, | ||
411 | &mdsmap_show_fops); | 226 | &mdsmap_show_fops); |
412 | if (!client->debugfs_mdsmap) | 227 | if (!fsc->debugfs_mdsmap) |
413 | goto out; | ||
414 | |||
415 | client->debugfs_osdmap = debugfs_create_file("osdmap", | ||
416 | 0600, | ||
417 | client->debugfs_dir, | ||
418 | client, | ||
419 | &osdmap_show_fops); | ||
420 | if (!client->debugfs_osdmap) | ||
421 | goto out; | 228 | goto out; |
422 | 229 | ||
423 | client->debugfs_dentry_lru = debugfs_create_file("dentry_lru", | 230 | dout("ca\n"); |
424 | 0600, | 231 | fsc->debugfs_mdsc = debugfs_create_file("mdsc", |
425 | client->debugfs_dir, | 232 | 0600, |
426 | client, | 233 | fsc->client->debugfs_dir, |
427 | &dentry_lru_show_fops); | 234 | fsc, |
428 | if (!client->debugfs_dentry_lru) | 235 | &mdsc_show_fops); |
236 | if (!fsc->debugfs_mdsc) | ||
429 | goto out; | 237 | goto out; |
430 | 238 | ||
431 | client->debugfs_caps = debugfs_create_file("caps", | 239 | dout("da\n"); |
240 | fsc->debugfs_caps = debugfs_create_file("caps", | ||
432 | 0400, | 241 | 0400, |
433 | client->debugfs_dir, | 242 | fsc->client->debugfs_dir, |
434 | client, | 243 | fsc, |
435 | &caps_show_fops); | 244 | &caps_show_fops); |
436 | if (!client->debugfs_caps) | 245 | if (!fsc->debugfs_caps) |
437 | goto out; | 246 | goto out; |
438 | 247 | ||
439 | client->debugfs_congestion_kb = | 248 | dout("ea\n"); |
440 | debugfs_create_file("writeback_congestion_kb", | 249 | fsc->debugfs_dentry_lru = debugfs_create_file("dentry_lru", |
441 | 0600, | 250 | 0600, |
442 | client->debugfs_dir, | 251 | fsc->client->debugfs_dir, |
443 | client, | 252 | fsc, |
444 | &congestion_kb_fops); | 253 | &dentry_lru_show_fops); |
445 | if (!client->debugfs_congestion_kb) | 254 | if (!fsc->debugfs_dentry_lru) |
446 | goto out; | 255 | goto out; |
447 | 256 | ||
448 | sprintf(name, "../../bdi/%s", dev_name(client->sb->s_bdi->dev)); | ||
449 | client->debugfs_bdi = debugfs_create_symlink("bdi", client->debugfs_dir, | ||
450 | name); | ||
451 | |||
452 | return 0; | 257 | return 0; |
453 | 258 | ||
454 | out: | 259 | out: |
455 | ceph_debugfs_client_cleanup(client); | 260 | ceph_fs_debugfs_cleanup(fsc); |
456 | return ret; | 261 | return err; |
457 | } | 262 | } |
458 | 263 | ||
459 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | ||
460 | { | ||
461 | debugfs_remove(client->debugfs_bdi); | ||
462 | debugfs_remove(client->debugfs_caps); | ||
463 | debugfs_remove(client->debugfs_dentry_lru); | ||
464 | debugfs_remove(client->debugfs_osdmap); | ||
465 | debugfs_remove(client->debugfs_mdsmap); | ||
466 | debugfs_remove(client->debugfs_monmap); | ||
467 | debugfs_remove(client->osdc.debugfs_file); | ||
468 | debugfs_remove(client->mdsc.debugfs_file); | ||
469 | debugfs_remove(client->monc.debugfs_file); | ||
470 | debugfs_remove(client->debugfs_congestion_kb); | ||
471 | debugfs_remove(client->debugfs_dir); | ||
472 | } | ||
473 | 264 | ||
474 | #else /* CONFIG_DEBUG_FS */ | 265 | #else /* CONFIG_DEBUG_FS */ |
475 | 266 | ||
476 | int __init ceph_debugfs_init(void) | 267 | int ceph_fs_debugfs_init(struct ceph_fs_client *fsc) |
477 | { | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | void ceph_debugfs_cleanup(void) | ||
482 | { | ||
483 | } | ||
484 | |||
485 | int ceph_debugfs_client_init(struct ceph_client *client) | ||
486 | { | 268 | { |
487 | return 0; | 269 | return 0; |
488 | } | 270 | } |
489 | 271 | ||
490 | void ceph_debugfs_client_cleanup(struct ceph_client *client) | 272 | void ceph_fs_debugfs_cleanup(struct ceph_fs_client *fsc) |
491 | { | 273 | { |
492 | } | 274 | } |
493 | 275 | ||
diff --git a/fs/ceph/decode.h b/fs/ceph/decode.h deleted file mode 100644 index 3d25415afe63..000000000000 --- a/fs/ceph/decode.h +++ /dev/null | |||
@@ -1,196 +0,0 @@ | |||
1 | #ifndef __CEPH_DECODE_H | ||
2 | #define __CEPH_DECODE_H | ||
3 | |||
4 | #include <asm/unaligned.h> | ||
5 | #include <linux/time.h> | ||
6 | |||
7 | #include "types.h" | ||
8 | |||
9 | /* | ||
10 | * in all cases, | ||
11 | * void **p pointer to position pointer | ||
12 | * void *end pointer to end of buffer (last byte + 1) | ||
13 | */ | ||
14 | |||
15 | static inline u64 ceph_decode_64(void **p) | ||
16 | { | ||
17 | u64 v = get_unaligned_le64(*p); | ||
18 | *p += sizeof(u64); | ||
19 | return v; | ||
20 | } | ||
21 | static inline u32 ceph_decode_32(void **p) | ||
22 | { | ||
23 | u32 v = get_unaligned_le32(*p); | ||
24 | *p += sizeof(u32); | ||
25 | return v; | ||
26 | } | ||
27 | static inline u16 ceph_decode_16(void **p) | ||
28 | { | ||
29 | u16 v = get_unaligned_le16(*p); | ||
30 | *p += sizeof(u16); | ||
31 | return v; | ||
32 | } | ||
33 | static inline u8 ceph_decode_8(void **p) | ||
34 | { | ||
35 | u8 v = *(u8 *)*p; | ||
36 | (*p)++; | ||
37 | return v; | ||
38 | } | ||
39 | static inline void ceph_decode_copy(void **p, void *pv, size_t n) | ||
40 | { | ||
41 | memcpy(pv, *p, n); | ||
42 | *p += n; | ||
43 | } | ||
44 | |||
45 | /* | ||
46 | * bounds check input. | ||
47 | */ | ||
48 | #define ceph_decode_need(p, end, n, bad) \ | ||
49 | do { \ | ||
50 | if (unlikely(*(p) + (n) > (end))) \ | ||
51 | goto bad; \ | ||
52 | } while (0) | ||
53 | |||
54 | #define ceph_decode_64_safe(p, end, v, bad) \ | ||
55 | do { \ | ||
56 | ceph_decode_need(p, end, sizeof(u64), bad); \ | ||
57 | v = ceph_decode_64(p); \ | ||
58 | } while (0) | ||
59 | #define ceph_decode_32_safe(p, end, v, bad) \ | ||
60 | do { \ | ||
61 | ceph_decode_need(p, end, sizeof(u32), bad); \ | ||
62 | v = ceph_decode_32(p); \ | ||
63 | } while (0) | ||
64 | #define ceph_decode_16_safe(p, end, v, bad) \ | ||
65 | do { \ | ||
66 | ceph_decode_need(p, end, sizeof(u16), bad); \ | ||
67 | v = ceph_decode_16(p); \ | ||
68 | } while (0) | ||
69 | #define ceph_decode_8_safe(p, end, v, bad) \ | ||
70 | do { \ | ||
71 | ceph_decode_need(p, end, sizeof(u8), bad); \ | ||
72 | v = ceph_decode_8(p); \ | ||
73 | } while (0) | ||
74 | |||
75 | #define ceph_decode_copy_safe(p, end, pv, n, bad) \ | ||
76 | do { \ | ||
77 | ceph_decode_need(p, end, n, bad); \ | ||
78 | ceph_decode_copy(p, pv, n); \ | ||
79 | } while (0) | ||
80 | |||
81 | /* | ||
82 | * struct ceph_timespec <-> struct timespec | ||
83 | */ | ||
84 | static inline void ceph_decode_timespec(struct timespec *ts, | ||
85 | const struct ceph_timespec *tv) | ||
86 | { | ||
87 | ts->tv_sec = le32_to_cpu(tv->tv_sec); | ||
88 | ts->tv_nsec = le32_to_cpu(tv->tv_nsec); | ||
89 | } | ||
90 | static inline void ceph_encode_timespec(struct ceph_timespec *tv, | ||
91 | const struct timespec *ts) | ||
92 | { | ||
93 | tv->tv_sec = cpu_to_le32(ts->tv_sec); | ||
94 | tv->tv_nsec = cpu_to_le32(ts->tv_nsec); | ||
95 | } | ||
96 | |||
97 | /* | ||
98 | * sockaddr_storage <-> ceph_sockaddr | ||
99 | */ | ||
100 | static inline void ceph_encode_addr(struct ceph_entity_addr *a) | ||
101 | { | ||
102 | __be16 ss_family = htons(a->in_addr.ss_family); | ||
103 | a->in_addr.ss_family = *(__u16 *)&ss_family; | ||
104 | } | ||
105 | static inline void ceph_decode_addr(struct ceph_entity_addr *a) | ||
106 | { | ||
107 | __be16 ss_family = *(__be16 *)&a->in_addr.ss_family; | ||
108 | a->in_addr.ss_family = ntohs(ss_family); | ||
109 | WARN_ON(a->in_addr.ss_family == 512); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * encoders | ||
114 | */ | ||
115 | static inline void ceph_encode_64(void **p, u64 v) | ||
116 | { | ||
117 | put_unaligned_le64(v, (__le64 *)*p); | ||
118 | *p += sizeof(u64); | ||
119 | } | ||
120 | static inline void ceph_encode_32(void **p, u32 v) | ||
121 | { | ||
122 | put_unaligned_le32(v, (__le32 *)*p); | ||
123 | *p += sizeof(u32); | ||
124 | } | ||
125 | static inline void ceph_encode_16(void **p, u16 v) | ||
126 | { | ||
127 | put_unaligned_le16(v, (__le16 *)*p); | ||
128 | *p += sizeof(u16); | ||
129 | } | ||
130 | static inline void ceph_encode_8(void **p, u8 v) | ||
131 | { | ||
132 | *(u8 *)*p = v; | ||
133 | (*p)++; | ||
134 | } | ||
135 | static inline void ceph_encode_copy(void **p, const void *s, int len) | ||
136 | { | ||
137 | memcpy(*p, s, len); | ||
138 | *p += len; | ||
139 | } | ||
140 | |||
141 | /* | ||
142 | * filepath, string encoders | ||
143 | */ | ||
144 | static inline void ceph_encode_filepath(void **p, void *end, | ||
145 | u64 ino, const char *path) | ||
146 | { | ||
147 | u32 len = path ? strlen(path) : 0; | ||
148 | BUG_ON(*p + sizeof(ino) + sizeof(len) + len > end); | ||
149 | ceph_encode_8(p, 1); | ||
150 | ceph_encode_64(p, ino); | ||
151 | ceph_encode_32(p, len); | ||
152 | if (len) | ||
153 | memcpy(*p, path, len); | ||
154 | *p += len; | ||
155 | } | ||
156 | |||
157 | static inline void ceph_encode_string(void **p, void *end, | ||
158 | const char *s, u32 len) | ||
159 | { | ||
160 | BUG_ON(*p + sizeof(len) + len > end); | ||
161 | ceph_encode_32(p, len); | ||
162 | if (len) | ||
163 | memcpy(*p, s, len); | ||
164 | *p += len; | ||
165 | } | ||
166 | |||
167 | #define ceph_encode_need(p, end, n, bad) \ | ||
168 | do { \ | ||
169 | if (unlikely(*(p) + (n) > (end))) \ | ||
170 | goto bad; \ | ||
171 | } while (0) | ||
172 | |||
173 | #define ceph_encode_64_safe(p, end, v, bad) \ | ||
174 | do { \ | ||
175 | ceph_encode_need(p, end, sizeof(u64), bad); \ | ||
176 | ceph_encode_64(p, v); \ | ||
177 | } while (0) | ||
178 | #define ceph_encode_32_safe(p, end, v, bad) \ | ||
179 | do { \ | ||
180 | ceph_encode_need(p, end, sizeof(u32), bad); \ | ||
181 | ceph_encode_32(p, v); \ | ||
182 | } while (0) | ||
183 | #define ceph_encode_16_safe(p, end, v, bad) \ | ||
184 | do { \ | ||
185 | ceph_encode_need(p, end, sizeof(u16), bad); \ | ||
186 | ceph_encode_16(p, v); \ | ||
187 | } while (0) | ||
188 | |||
189 | #define ceph_encode_copy_safe(p, end, pv, n, bad) \ | ||
190 | do { \ | ||
191 | ceph_encode_need(p, end, n, bad); \ | ||
192 | ceph_encode_copy(p, pv, n); \ | ||
193 | } while (0) | ||
194 | |||
195 | |||
196 | #endif | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 6e4f43ff23ec..e0a2dc6fcafc 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/spinlock.h> | 3 | #include <linux/spinlock.h> |
4 | #include <linux/fs_struct.h> | 4 | #include <linux/fs_struct.h> |
@@ -7,6 +7,7 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | 8 | ||
9 | #include "super.h" | 9 | #include "super.h" |
10 | #include "mds_client.h" | ||
10 | 11 | ||
11 | /* | 12 | /* |
12 | * Directory operations: readdir, lookup, create, link, unlink, | 13 | * Directory operations: readdir, lookup, create, link, unlink, |
@@ -94,10 +95,7 @@ static unsigned fpos_off(loff_t p) | |||
94 | */ | 95 | */ |
95 | static int __dcache_readdir(struct file *filp, | 96 | static int __dcache_readdir(struct file *filp, |
96 | void *dirent, filldir_t filldir) | 97 | void *dirent, filldir_t filldir) |
97 | __releases(inode->i_lock) | ||
98 | __acquires(inode->i_lock) | ||
99 | { | 98 | { |
100 | struct inode *inode = filp->f_dentry->d_inode; | ||
101 | struct ceph_file_info *fi = filp->private_data; | 99 | struct ceph_file_info *fi = filp->private_data; |
102 | struct dentry *parent = filp->f_dentry; | 100 | struct dentry *parent = filp->f_dentry; |
103 | struct inode *dir = parent->d_inode; | 101 | struct inode *dir = parent->d_inode; |
@@ -153,7 +151,6 @@ more: | |||
153 | 151 | ||
154 | atomic_inc(&dentry->d_count); | 152 | atomic_inc(&dentry->d_count); |
155 | spin_unlock(&dcache_lock); | 153 | spin_unlock(&dcache_lock); |
156 | spin_unlock(&inode->i_lock); | ||
157 | 154 | ||
158 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 155 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, |
159 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 156 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
@@ -171,35 +168,30 @@ more: | |||
171 | } else { | 168 | } else { |
172 | dput(last); | 169 | dput(last); |
173 | } | 170 | } |
174 | last = NULL; | ||
175 | } | 171 | } |
176 | |||
177 | spin_lock(&inode->i_lock); | ||
178 | spin_lock(&dcache_lock); | ||
179 | |||
180 | last = dentry; | 172 | last = dentry; |
181 | 173 | ||
182 | if (err < 0) | 174 | if (err < 0) |
183 | goto out_unlock; | 175 | goto out; |
184 | 176 | ||
185 | p = p->prev; | ||
186 | filp->f_pos++; | 177 | filp->f_pos++; |
187 | 178 | ||
188 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ | 179 | /* make sure a dentry wasn't dropped while we didn't have dcache_lock */ |
189 | if ((ceph_inode(dir)->i_ceph_flags & CEPH_I_COMPLETE)) | 180 | if (!ceph_i_test(dir, CEPH_I_COMPLETE)) { |
190 | goto more; | 181 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); |
191 | dout(" lost I_COMPLETE on %p; falling back to mds\n", dir); | 182 | err = -EAGAIN; |
192 | err = -EAGAIN; | 183 | goto out; |
184 | } | ||
185 | |||
186 | spin_lock(&dcache_lock); | ||
187 | p = p->prev; /* advance to next dentry */ | ||
188 | goto more; | ||
193 | 189 | ||
194 | out_unlock: | 190 | out_unlock: |
195 | spin_unlock(&dcache_lock); | 191 | spin_unlock(&dcache_lock); |
196 | 192 | out: | |
197 | if (last) { | 193 | if (last) |
198 | spin_unlock(&inode->i_lock); | ||
199 | dput(last); | 194 | dput(last); |
200 | spin_lock(&inode->i_lock); | ||
201 | } | ||
202 | |||
203 | return err; | 195 | return err; |
204 | } | 196 | } |
205 | 197 | ||
@@ -227,15 +219,15 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
227 | struct ceph_file_info *fi = filp->private_data; | 219 | struct ceph_file_info *fi = filp->private_data; |
228 | struct inode *inode = filp->f_dentry->d_inode; | 220 | struct inode *inode = filp->f_dentry->d_inode; |
229 | struct ceph_inode_info *ci = ceph_inode(inode); | 221 | struct ceph_inode_info *ci = ceph_inode(inode); |
230 | struct ceph_client *client = ceph_inode_to_client(inode); | 222 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
231 | struct ceph_mds_client *mdsc = &client->mdsc; | 223 | struct ceph_mds_client *mdsc = fsc->mdsc; |
232 | unsigned frag = fpos_frag(filp->f_pos); | 224 | unsigned frag = fpos_frag(filp->f_pos); |
233 | int off = fpos_off(filp->f_pos); | 225 | int off = fpos_off(filp->f_pos); |
234 | int err; | 226 | int err; |
235 | u32 ftype; | 227 | u32 ftype; |
236 | struct ceph_mds_reply_info_parsed *rinfo; | 228 | struct ceph_mds_reply_info_parsed *rinfo; |
237 | const int max_entries = client->mount_args->max_readdir; | 229 | const int max_entries = fsc->mount_options->max_readdir; |
238 | const int max_bytes = client->mount_args->max_readdir_bytes; | 230 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
239 | 231 | ||
240 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 232 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); |
241 | if (fi->at_end) | 233 | if (fi->at_end) |
@@ -267,17 +259,17 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
267 | /* can we use the dcache? */ | 259 | /* can we use the dcache? */ |
268 | spin_lock(&inode->i_lock); | 260 | spin_lock(&inode->i_lock); |
269 | if ((filp->f_pos == 2 || fi->dentry) && | 261 | if ((filp->f_pos == 2 || fi->dentry) && |
270 | !ceph_test_opt(client, NOASYNCREADDIR) && | 262 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
271 | ceph_snap(inode) != CEPH_SNAPDIR && | 263 | ceph_snap(inode) != CEPH_SNAPDIR && |
272 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 264 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
273 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 265 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
266 | spin_unlock(&inode->i_lock); | ||
274 | err = __dcache_readdir(filp, dirent, filldir); | 267 | err = __dcache_readdir(filp, dirent, filldir); |
275 | if (err != -EAGAIN) { | 268 | if (err != -EAGAIN) |
276 | spin_unlock(&inode->i_lock); | ||
277 | return err; | 269 | return err; |
278 | } | 270 | } else { |
271 | spin_unlock(&inode->i_lock); | ||
279 | } | 272 | } |
280 | spin_unlock(&inode->i_lock); | ||
281 | if (fi->dentry) { | 273 | if (fi->dentry) { |
282 | err = note_last_dentry(fi, fi->dentry->d_name.name, | 274 | err = note_last_dentry(fi, fi->dentry->d_name.name, |
283 | fi->dentry->d_name.len); | 275 | fi->dentry->d_name.len); |
@@ -487,14 +479,13 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) | |||
487 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, | 479 | struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, |
488 | struct dentry *dentry, int err) | 480 | struct dentry *dentry, int err) |
489 | { | 481 | { |
490 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 482 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
491 | struct inode *parent = dentry->d_parent->d_inode; | 483 | struct inode *parent = dentry->d_parent->d_inode; |
492 | 484 | ||
493 | /* .snap dir? */ | 485 | /* .snap dir? */ |
494 | if (err == -ENOENT && | 486 | if (err == -ENOENT && |
495 | ceph_vino(parent).ino != CEPH_INO_ROOT && /* no .snap in root dir */ | ||
496 | strcmp(dentry->d_name.name, | 487 | strcmp(dentry->d_name.name, |
497 | client->mount_args->snapdir_name) == 0) { | 488 | fsc->mount_options->snapdir_name) == 0) { |
498 | struct inode *inode = ceph_get_snapdir(parent); | 489 | struct inode *inode = ceph_get_snapdir(parent); |
499 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", | 490 | dout("ENOENT on snapdir %p '%.*s', linking to snapdir %p\n", |
500 | dentry, dentry->d_name.len, dentry->d_name.name, inode); | 491 | dentry, dentry->d_name.len, dentry->d_name.name, inode); |
@@ -539,8 +530,8 @@ static int is_root_ceph_dentry(struct inode *inode, struct dentry *dentry) | |||
539 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | 530 | static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, |
540 | struct nameidata *nd) | 531 | struct nameidata *nd) |
541 | { | 532 | { |
542 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 533 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
543 | struct ceph_mds_client *mdsc = &client->mdsc; | 534 | struct ceph_mds_client *mdsc = fsc->mdsc; |
544 | struct ceph_mds_request *req; | 535 | struct ceph_mds_request *req; |
545 | int op; | 536 | int op; |
546 | int err; | 537 | int err; |
@@ -572,7 +563,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, | |||
572 | spin_lock(&dir->i_lock); | 563 | spin_lock(&dir->i_lock); |
573 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); | 564 | dout(" dir %p flags are %d\n", dir, ci->i_ceph_flags); |
574 | if (strncmp(dentry->d_name.name, | 565 | if (strncmp(dentry->d_name.name, |
575 | client->mount_args->snapdir_name, | 566 | fsc->mount_options->snapdir_name, |
576 | dentry->d_name.len) && | 567 | dentry->d_name.len) && |
577 | !is_root_ceph_dentry(dir, dentry) && | 568 | !is_root_ceph_dentry(dir, dentry) && |
578 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && | 569 | (ci->i_ceph_flags & CEPH_I_COMPLETE) && |
@@ -629,8 +620,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry) | |||
629 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, | 620 | static int ceph_mknod(struct inode *dir, struct dentry *dentry, |
630 | int mode, dev_t rdev) | 621 | int mode, dev_t rdev) |
631 | { | 622 | { |
632 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 623 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
633 | struct ceph_mds_client *mdsc = &client->mdsc; | 624 | struct ceph_mds_client *mdsc = fsc->mdsc; |
634 | struct ceph_mds_request *req; | 625 | struct ceph_mds_request *req; |
635 | int err; | 626 | int err; |
636 | 627 | ||
@@ -685,8 +676,8 @@ static int ceph_create(struct inode *dir, struct dentry *dentry, int mode, | |||
685 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, | 676 | static int ceph_symlink(struct inode *dir, struct dentry *dentry, |
686 | const char *dest) | 677 | const char *dest) |
687 | { | 678 | { |
688 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 679 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
689 | struct ceph_mds_client *mdsc = &client->mdsc; | 680 | struct ceph_mds_client *mdsc = fsc->mdsc; |
690 | struct ceph_mds_request *req; | 681 | struct ceph_mds_request *req; |
691 | int err; | 682 | int err; |
692 | 683 | ||
@@ -716,8 +707,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry, | |||
716 | 707 | ||
717 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) | 708 | static int ceph_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
718 | { | 709 | { |
719 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 710 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
720 | struct ceph_mds_client *mdsc = &client->mdsc; | 711 | struct ceph_mds_client *mdsc = fsc->mdsc; |
721 | struct ceph_mds_request *req; | 712 | struct ceph_mds_request *req; |
722 | int err = -EROFS; | 713 | int err = -EROFS; |
723 | int op; | 714 | int op; |
@@ -758,8 +749,8 @@ out: | |||
758 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, | 749 | static int ceph_link(struct dentry *old_dentry, struct inode *dir, |
759 | struct dentry *dentry) | 750 | struct dentry *dentry) |
760 | { | 751 | { |
761 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 752 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
762 | struct ceph_mds_client *mdsc = &client->mdsc; | 753 | struct ceph_mds_client *mdsc = fsc->mdsc; |
763 | struct ceph_mds_request *req; | 754 | struct ceph_mds_request *req; |
764 | int err; | 755 | int err; |
765 | 756 | ||
@@ -813,8 +804,8 @@ static int drop_caps_for_unlink(struct inode *inode) | |||
813 | */ | 804 | */ |
814 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) | 805 | static int ceph_unlink(struct inode *dir, struct dentry *dentry) |
815 | { | 806 | { |
816 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 807 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
817 | struct ceph_mds_client *mdsc = &client->mdsc; | 808 | struct ceph_mds_client *mdsc = fsc->mdsc; |
818 | struct inode *inode = dentry->d_inode; | 809 | struct inode *inode = dentry->d_inode; |
819 | struct ceph_mds_request *req; | 810 | struct ceph_mds_request *req; |
820 | int err = -EROFS; | 811 | int err = -EROFS; |
@@ -854,8 +845,8 @@ out: | |||
854 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | 845 | static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, |
855 | struct inode *new_dir, struct dentry *new_dentry) | 846 | struct inode *new_dir, struct dentry *new_dentry) |
856 | { | 847 | { |
857 | struct ceph_client *client = ceph_sb_to_client(old_dir->i_sb); | 848 | struct ceph_fs_client *fsc = ceph_sb_to_client(old_dir->i_sb); |
858 | struct ceph_mds_client *mdsc = &client->mdsc; | 849 | struct ceph_mds_client *mdsc = fsc->mdsc; |
859 | struct ceph_mds_request *req; | 850 | struct ceph_mds_request *req; |
860 | int err; | 851 | int err; |
861 | 852 | ||
@@ -1021,11 +1012,15 @@ out_touch: | |||
1021 | static void ceph_dentry_release(struct dentry *dentry) | 1012 | static void ceph_dentry_release(struct dentry *dentry) |
1022 | { | 1013 | { |
1023 | struct ceph_dentry_info *di = ceph_dentry(dentry); | 1014 | struct ceph_dentry_info *di = ceph_dentry(dentry); |
1024 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1015 | struct inode *parent_inode = NULL; |
1025 | u64 snapid = ceph_snap(parent_inode); | 1016 | u64 snapid = CEPH_NOSNAP; |
1026 | 1017 | ||
1018 | if (!IS_ROOT(dentry)) { | ||
1019 | parent_inode = dentry->d_parent->d_inode; | ||
1020 | if (parent_inode) | ||
1021 | snapid = ceph_snap(parent_inode); | ||
1022 | } | ||
1027 | dout("dentry_release %p parent %p\n", dentry, parent_inode); | 1023 | dout("dentry_release %p parent %p\n", dentry, parent_inode); |
1028 | |||
1029 | if (parent_inode && snapid != CEPH_SNAPDIR) { | 1024 | if (parent_inode && snapid != CEPH_SNAPDIR) { |
1030 | struct ceph_inode_info *ci = ceph_inode(parent_inode); | 1025 | struct ceph_inode_info *ci = ceph_inode(parent_inode); |
1031 | 1026 | ||
@@ -1072,7 +1067,7 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
1072 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
1073 | int left; | 1068 | int left; |
1074 | 1069 | ||
1075 | if (!ceph_test_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
1076 | return -EISDIR; | 1071 | return -EISDIR; |
1077 | 1072 | ||
1078 | if (!cf->dir_info) { | 1073 | if (!cf->dir_info) { |
@@ -1173,7 +1168,7 @@ void ceph_dentry_lru_add(struct dentry *dn) | |||
1173 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, | 1168 | dout("dentry_lru_add %p %p '%.*s'\n", di, dn, |
1174 | dn->d_name.len, dn->d_name.name); | 1169 | dn->d_name.len, dn->d_name.name); |
1175 | if (di) { | 1170 | if (di) { |
1176 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1171 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1177 | spin_lock(&mdsc->dentry_lru_lock); | 1172 | spin_lock(&mdsc->dentry_lru_lock); |
1178 | list_add_tail(&di->lru, &mdsc->dentry_lru); | 1173 | list_add_tail(&di->lru, &mdsc->dentry_lru); |
1179 | mdsc->num_dentry++; | 1174 | mdsc->num_dentry++; |
@@ -1189,7 +1184,7 @@ void ceph_dentry_lru_touch(struct dentry *dn) | |||
1189 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, | 1184 | dout("dentry_lru_touch %p %p '%.*s' (offset %lld)\n", di, dn, |
1190 | dn->d_name.len, dn->d_name.name, di->offset); | 1185 | dn->d_name.len, dn->d_name.name, di->offset); |
1191 | if (di) { | 1186 | if (di) { |
1192 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1187 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1193 | spin_lock(&mdsc->dentry_lru_lock); | 1188 | spin_lock(&mdsc->dentry_lru_lock); |
1194 | list_move_tail(&di->lru, &mdsc->dentry_lru); | 1189 | list_move_tail(&di->lru, &mdsc->dentry_lru); |
1195 | spin_unlock(&mdsc->dentry_lru_lock); | 1190 | spin_unlock(&mdsc->dentry_lru_lock); |
@@ -1204,7 +1199,7 @@ void ceph_dentry_lru_del(struct dentry *dn) | |||
1204 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, | 1199 | dout("dentry_lru_del %p %p '%.*s'\n", di, dn, |
1205 | dn->d_name.len, dn->d_name.name); | 1200 | dn->d_name.len, dn->d_name.name); |
1206 | if (di) { | 1201 | if (di) { |
1207 | mdsc = &ceph_sb_to_client(dn->d_sb)->mdsc; | 1202 | mdsc = ceph_sb_to_client(dn->d_sb)->mdsc; |
1208 | spin_lock(&mdsc->dentry_lru_lock); | 1203 | spin_lock(&mdsc->dentry_lru_lock); |
1209 | list_del_init(&di->lru); | 1204 | list_del_init(&di->lru); |
1210 | mdsc->num_dentry--; | 1205 | mdsc->num_dentry--; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 4480cb1c63e7..2297d9426992 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -1,10 +1,11 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/exportfs.h> | 3 | #include <linux/exportfs.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | #include <asm/unaligned.h> | 5 | #include <asm/unaligned.h> |
6 | 6 | ||
7 | #include "super.h" | 7 | #include "super.h" |
8 | #include "mds_client.h" | ||
8 | 9 | ||
9 | /* | 10 | /* |
10 | * NFS export support | 11 | * NFS export support |
@@ -42,32 +43,37 @@ struct ceph_nfs_confh { | |||
42 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | 43 | static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, |
43 | int connectable) | 44 | int connectable) |
44 | { | 45 | { |
46 | int type; | ||
45 | struct ceph_nfs_fh *fh = (void *)rawfh; | 47 | struct ceph_nfs_fh *fh = (void *)rawfh; |
46 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 48 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
47 | struct dentry *parent = dentry->d_parent; | 49 | struct dentry *parent = dentry->d_parent; |
48 | struct inode *inode = dentry->d_inode; | 50 | struct inode *inode = dentry->d_inode; |
49 | int type; | 51 | int connected_handle_length = sizeof(*cfh)/4; |
52 | int handle_length = sizeof(*fh)/4; | ||
50 | 53 | ||
51 | /* don't re-export snaps */ | 54 | /* don't re-export snaps */ |
52 | if (ceph_snap(inode) != CEPH_NOSNAP) | 55 | if (ceph_snap(inode) != CEPH_NOSNAP) |
53 | return -EINVAL; | 56 | return -EINVAL; |
54 | 57 | ||
55 | if (*max_len >= sizeof(*cfh)) { | 58 | if (*max_len >= connected_handle_length) { |
56 | dout("encode_fh %p connectable\n", dentry); | 59 | dout("encode_fh %p connectable\n", dentry); |
57 | cfh->ino = ceph_ino(dentry->d_inode); | 60 | cfh->ino = ceph_ino(dentry->d_inode); |
58 | cfh->parent_ino = ceph_ino(parent->d_inode); | 61 | cfh->parent_ino = ceph_ino(parent->d_inode); |
59 | cfh->parent_name_hash = parent->d_name.hash; | 62 | cfh->parent_name_hash = parent->d_name.hash; |
60 | *max_len = sizeof(*cfh); | 63 | *max_len = connected_handle_length; |
61 | type = 2; | 64 | type = 2; |
62 | } else if (*max_len > sizeof(*fh)) { | 65 | } else if (*max_len >= handle_length) { |
63 | if (connectable) | 66 | if (connectable) { |
64 | return -ENOSPC; | 67 | *max_len = connected_handle_length; |
68 | return 255; | ||
69 | } | ||
65 | dout("encode_fh %p\n", dentry); | 70 | dout("encode_fh %p\n", dentry); |
66 | fh->ino = ceph_ino(dentry->d_inode); | 71 | fh->ino = ceph_ino(dentry->d_inode); |
67 | *max_len = sizeof(*fh); | 72 | *max_len = handle_length; |
68 | type = 1; | 73 | type = 1; |
69 | } else { | 74 | } else { |
70 | return -ENOSPC; | 75 | *max_len = handle_length; |
76 | return 255; | ||
71 | } | 77 | } |
72 | return type; | 78 | return type; |
73 | } | 79 | } |
@@ -115,7 +121,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
115 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 121 | static struct dentry *__cfh_to_dentry(struct super_block *sb, |
116 | struct ceph_nfs_confh *cfh) | 122 | struct ceph_nfs_confh *cfh) |
117 | { | 123 | { |
118 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(sb)->mdsc; | 124 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
119 | struct inode *inode; | 125 | struct inode *inode; |
120 | struct dentry *dentry; | 126 | struct dentry *dentry; |
121 | struct ceph_vino vino; | 127 | struct ceph_vino vino; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 8c044a4f0457..e77c28cf3690 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -1,5 +1,6 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | ||
3 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/file.h> | 6 | #include <linux/file.h> |
@@ -38,8 +39,8 @@ | |||
38 | static struct ceph_mds_request * | 39 | static struct ceph_mds_request * |
39 | prepare_open_request(struct super_block *sb, int flags, int create_mode) | 40 | prepare_open_request(struct super_block *sb, int flags, int create_mode) |
40 | { | 41 | { |
41 | struct ceph_client *client = ceph_sb_to_client(sb); | 42 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
42 | struct ceph_mds_client *mdsc = &client->mdsc; | 43 | struct ceph_mds_client *mdsc = fsc->mdsc; |
43 | struct ceph_mds_request *req; | 44 | struct ceph_mds_request *req; |
44 | int want_auth = USE_ANY_MDS; | 45 | int want_auth = USE_ANY_MDS; |
45 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; | 46 | int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN; |
@@ -117,8 +118,8 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode) | |||
117 | int ceph_open(struct inode *inode, struct file *file) | 118 | int ceph_open(struct inode *inode, struct file *file) |
118 | { | 119 | { |
119 | struct ceph_inode_info *ci = ceph_inode(inode); | 120 | struct ceph_inode_info *ci = ceph_inode(inode); |
120 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 121 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
121 | struct ceph_mds_client *mdsc = &client->mdsc; | 122 | struct ceph_mds_client *mdsc = fsc->mdsc; |
122 | struct ceph_mds_request *req; | 123 | struct ceph_mds_request *req; |
123 | struct ceph_file_info *cf = file->private_data; | 124 | struct ceph_file_info *cf = file->private_data; |
124 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 125 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
@@ -216,8 +217,8 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | |||
216 | struct nameidata *nd, int mode, | 217 | struct nameidata *nd, int mode, |
217 | int locked_dir) | 218 | int locked_dir) |
218 | { | 219 | { |
219 | struct ceph_client *client = ceph_sb_to_client(dir->i_sb); | 220 | struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); |
220 | struct ceph_mds_client *mdsc = &client->mdsc; | 221 | struct ceph_mds_client *mdsc = fsc->mdsc; |
221 | struct file *file = nd->intent.open.file; | 222 | struct file *file = nd->intent.open.file; |
222 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); | 223 | struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); |
223 | struct ceph_mds_request *req; | 224 | struct ceph_mds_request *req; |
@@ -270,163 +271,6 @@ int ceph_release(struct inode *inode, struct file *file) | |||
270 | } | 271 | } |
271 | 272 | ||
272 | /* | 273 | /* |
273 | * build a vector of user pages | ||
274 | */ | ||
275 | static struct page **get_direct_page_vector(const char __user *data, | ||
276 | int num_pages, | ||
277 | loff_t off, size_t len) | ||
278 | { | ||
279 | struct page **pages; | ||
280 | int rc; | ||
281 | |||
282 | pages = kmalloc(sizeof(*pages) * num_pages, GFP_NOFS); | ||
283 | if (!pages) | ||
284 | return ERR_PTR(-ENOMEM); | ||
285 | |||
286 | down_read(¤t->mm->mmap_sem); | ||
287 | rc = get_user_pages(current, current->mm, (unsigned long)data, | ||
288 | num_pages, 0, 0, pages, NULL); | ||
289 | up_read(¤t->mm->mmap_sem); | ||
290 | if (rc < 0) | ||
291 | goto fail; | ||
292 | return pages; | ||
293 | |||
294 | fail: | ||
295 | kfree(pages); | ||
296 | return ERR_PTR(rc); | ||
297 | } | ||
298 | |||
299 | static void put_page_vector(struct page **pages, int num_pages) | ||
300 | { | ||
301 | int i; | ||
302 | |||
303 | for (i = 0; i < num_pages; i++) | ||
304 | put_page(pages[i]); | ||
305 | kfree(pages); | ||
306 | } | ||
307 | |||
308 | void ceph_release_page_vector(struct page **pages, int num_pages) | ||
309 | { | ||
310 | int i; | ||
311 | |||
312 | for (i = 0; i < num_pages; i++) | ||
313 | __free_pages(pages[i], 0); | ||
314 | kfree(pages); | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * allocate a vector new pages | ||
319 | */ | ||
320 | static struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags) | ||
321 | { | ||
322 | struct page **pages; | ||
323 | int i; | ||
324 | |||
325 | pages = kmalloc(sizeof(*pages) * num_pages, flags); | ||
326 | if (!pages) | ||
327 | return ERR_PTR(-ENOMEM); | ||
328 | for (i = 0; i < num_pages; i++) { | ||
329 | pages[i] = __page_cache_alloc(flags); | ||
330 | if (pages[i] == NULL) { | ||
331 | ceph_release_page_vector(pages, i); | ||
332 | return ERR_PTR(-ENOMEM); | ||
333 | } | ||
334 | } | ||
335 | return pages; | ||
336 | } | ||
337 | |||
338 | /* | ||
339 | * copy user data into a page vector | ||
340 | */ | ||
341 | static int copy_user_to_page_vector(struct page **pages, | ||
342 | const char __user *data, | ||
343 | loff_t off, size_t len) | ||
344 | { | ||
345 | int i = 0; | ||
346 | int po = off & ~PAGE_CACHE_MASK; | ||
347 | int left = len; | ||
348 | int l, bad; | ||
349 | |||
350 | while (left > 0) { | ||
351 | l = min_t(int, PAGE_CACHE_SIZE-po, left); | ||
352 | bad = copy_from_user(page_address(pages[i]) + po, data, l); | ||
353 | if (bad == l) | ||
354 | return -EFAULT; | ||
355 | data += l - bad; | ||
356 | left -= l - bad; | ||
357 | po += l - bad; | ||
358 | if (po == PAGE_CACHE_SIZE) { | ||
359 | po = 0; | ||
360 | i++; | ||
361 | } | ||
362 | } | ||
363 | return len; | ||
364 | } | ||
365 | |||
366 | /* | ||
367 | * copy user data from a page vector into a user pointer | ||
368 | */ | ||
369 | static int copy_page_vector_to_user(struct page **pages, char __user *data, | ||
370 | loff_t off, size_t len) | ||
371 | { | ||
372 | int i = 0; | ||
373 | int po = off & ~PAGE_CACHE_MASK; | ||
374 | int left = len; | ||
375 | int l, bad; | ||
376 | |||
377 | while (left > 0) { | ||
378 | l = min_t(int, left, PAGE_CACHE_SIZE-po); | ||
379 | bad = copy_to_user(data, page_address(pages[i]) + po, l); | ||
380 | if (bad == l) | ||
381 | return -EFAULT; | ||
382 | data += l - bad; | ||
383 | left -= l - bad; | ||
384 | if (po) { | ||
385 | po += l - bad; | ||
386 | if (po == PAGE_CACHE_SIZE) | ||
387 | po = 0; | ||
388 | } | ||
389 | i++; | ||
390 | } | ||
391 | return len; | ||
392 | } | ||
393 | |||
394 | /* | ||
395 | * Zero an extent within a page vector. Offset is relative to the | ||
396 | * start of the first page. | ||
397 | */ | ||
398 | static void zero_page_vector_range(int off, int len, struct page **pages) | ||
399 | { | ||
400 | int i = off >> PAGE_CACHE_SHIFT; | ||
401 | |||
402 | off &= ~PAGE_CACHE_MASK; | ||
403 | |||
404 | dout("zero_page_vector_page %u~%u\n", off, len); | ||
405 | |||
406 | /* leading partial page? */ | ||
407 | if (off) { | ||
408 | int end = min((int)PAGE_CACHE_SIZE, off + len); | ||
409 | dout("zeroing %d %p head from %d\n", i, pages[i], | ||
410 | (int)off); | ||
411 | zero_user_segment(pages[i], off, end); | ||
412 | len -= (end - off); | ||
413 | i++; | ||
414 | } | ||
415 | while (len >= PAGE_CACHE_SIZE) { | ||
416 | dout("zeroing %d %p len=%d\n", i, pages[i], len); | ||
417 | zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); | ||
418 | len -= PAGE_CACHE_SIZE; | ||
419 | i++; | ||
420 | } | ||
421 | /* trailing partial page? */ | ||
422 | if (len) { | ||
423 | dout("zeroing %d %p tail to %d\n", i, pages[i], (int)len); | ||
424 | zero_user_segment(pages[i], 0, len); | ||
425 | } | ||
426 | } | ||
427 | |||
428 | |||
429 | /* | ||
430 | * Read a range of bytes striped over one or more objects. Iterate over | 274 | * Read a range of bytes striped over one or more objects. Iterate over |
431 | * objects we stripe over. (That's not atomic, but good enough for now.) | 275 | * objects we stripe over. (That's not atomic, but good enough for now.) |
432 | * | 276 | * |
@@ -438,7 +282,7 @@ static int striped_read(struct inode *inode, | |||
438 | struct page **pages, int num_pages, | 282 | struct page **pages, int num_pages, |
439 | int *checkeof) | 283 | int *checkeof) |
440 | { | 284 | { |
441 | struct ceph_client *client = ceph_inode_to_client(inode); | 285 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
442 | struct ceph_inode_info *ci = ceph_inode(inode); | 286 | struct ceph_inode_info *ci = ceph_inode(inode); |
443 | u64 pos, this_len; | 287 | u64 pos, this_len; |
444 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ | 288 | int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */ |
@@ -459,7 +303,7 @@ static int striped_read(struct inode *inode, | |||
459 | 303 | ||
460 | more: | 304 | more: |
461 | this_len = left; | 305 | this_len = left; |
462 | ret = ceph_osdc_readpages(&client->osdc, ceph_vino(inode), | 306 | ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode), |
463 | &ci->i_layout, pos, &this_len, | 307 | &ci->i_layout, pos, &this_len, |
464 | ci->i_truncate_seq, | 308 | ci->i_truncate_seq, |
465 | ci->i_truncate_size, | 309 | ci->i_truncate_size, |
@@ -477,8 +321,8 @@ more: | |||
477 | 321 | ||
478 | if (read < pos - off) { | 322 | if (read < pos - off) { |
479 | dout(" zero gap %llu to %llu\n", off + read, pos); | 323 | dout(" zero gap %llu to %llu\n", off + read, pos); |
480 | zero_page_vector_range(page_off + read, | 324 | ceph_zero_page_vector_range(page_off + read, |
481 | pos - off - read, pages); | 325 | pos - off - read, pages); |
482 | } | 326 | } |
483 | pos += ret; | 327 | pos += ret; |
484 | read = pos - off; | 328 | read = pos - off; |
@@ -495,8 +339,8 @@ more: | |||
495 | /* was original extent fully inside i_size? */ | 339 | /* was original extent fully inside i_size? */ |
496 | if (pos + left <= inode->i_size) { | 340 | if (pos + left <= inode->i_size) { |
497 | dout("zero tail\n"); | 341 | dout("zero tail\n"); |
498 | zero_page_vector_range(page_off + read, len - read, | 342 | ceph_zero_page_vector_range(page_off + read, len - read, |
499 | pages); | 343 | pages); |
500 | read = len; | 344 | read = len; |
501 | goto out; | 345 | goto out; |
502 | } | 346 | } |
@@ -531,7 +375,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
531 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); | 375 | (file->f_flags & O_DIRECT) ? "O_DIRECT" : ""); |
532 | 376 | ||
533 | if (file->f_flags & O_DIRECT) { | 377 | if (file->f_flags & O_DIRECT) { |
534 | pages = get_direct_page_vector(data, num_pages, off, len); | 378 | pages = ceph_get_direct_page_vector(data, num_pages, off, len); |
535 | 379 | ||
536 | /* | 380 | /* |
537 | * flush any page cache pages in this range. this | 381 | * flush any page cache pages in this range. this |
@@ -552,13 +396,13 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data, | |||
552 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); | 396 | ret = striped_read(inode, off, len, pages, num_pages, checkeof); |
553 | 397 | ||
554 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) | 398 | if (ret >= 0 && (file->f_flags & O_DIRECT) == 0) |
555 | ret = copy_page_vector_to_user(pages, data, off, ret); | 399 | ret = ceph_copy_page_vector_to_user(pages, data, off, ret); |
556 | if (ret >= 0) | 400 | if (ret >= 0) |
557 | *poff = off + ret; | 401 | *poff = off + ret; |
558 | 402 | ||
559 | done: | 403 | done: |
560 | if (file->f_flags & O_DIRECT) | 404 | if (file->f_flags & O_DIRECT) |
561 | put_page_vector(pages, num_pages); | 405 | ceph_put_page_vector(pages, num_pages); |
562 | else | 406 | else |
563 | ceph_release_page_vector(pages, num_pages); | 407 | ceph_release_page_vector(pages, num_pages); |
564 | dout("sync_read result %d\n", ret); | 408 | dout("sync_read result %d\n", ret); |
@@ -594,7 +438,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
594 | { | 438 | { |
595 | struct inode *inode = file->f_dentry->d_inode; | 439 | struct inode *inode = file->f_dentry->d_inode; |
596 | struct ceph_inode_info *ci = ceph_inode(inode); | 440 | struct ceph_inode_info *ci = ceph_inode(inode); |
597 | struct ceph_client *client = ceph_inode_to_client(inode); | 441 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
598 | struct ceph_osd_request *req; | 442 | struct ceph_osd_request *req; |
599 | struct page **pages; | 443 | struct page **pages; |
600 | int num_pages; | 444 | int num_pages; |
@@ -642,7 +486,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data, | |||
642 | */ | 486 | */ |
643 | more: | 487 | more: |
644 | len = left; | 488 | len = left; |
645 | req = ceph_osdc_new_request(&client->osdc, &ci->i_layout, | 489 | req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, |
646 | ceph_vino(inode), pos, &len, | 490 | ceph_vino(inode), pos, &len, |
647 | CEPH_OSD_OP_WRITE, flags, | 491 | CEPH_OSD_OP_WRITE, flags, |
648 | ci->i_snap_realm->cached_context, | 492 | ci->i_snap_realm->cached_context, |
@@ -655,7 +499,7 @@ more: | |||
655 | num_pages = calc_pages_for(pos, len); | 499 | num_pages = calc_pages_for(pos, len); |
656 | 500 | ||
657 | if (file->f_flags & O_DIRECT) { | 501 | if (file->f_flags & O_DIRECT) { |
658 | pages = get_direct_page_vector(data, num_pages, pos, len); | 502 | pages = ceph_get_direct_page_vector(data, num_pages, pos, len); |
659 | if (IS_ERR(pages)) { | 503 | if (IS_ERR(pages)) { |
660 | ret = PTR_ERR(pages); | 504 | ret = PTR_ERR(pages); |
661 | goto out; | 505 | goto out; |
@@ -673,7 +517,7 @@ more: | |||
673 | ret = PTR_ERR(pages); | 517 | ret = PTR_ERR(pages); |
674 | goto out; | 518 | goto out; |
675 | } | 519 | } |
676 | ret = copy_user_to_page_vector(pages, data, pos, len); | 520 | ret = ceph_copy_user_to_page_vector(pages, data, pos, len); |
677 | if (ret < 0) { | 521 | if (ret < 0) { |
678 | ceph_release_page_vector(pages, num_pages); | 522 | ceph_release_page_vector(pages, num_pages); |
679 | goto out; | 523 | goto out; |
@@ -689,7 +533,7 @@ more: | |||
689 | req->r_num_pages = num_pages; | 533 | req->r_num_pages = num_pages; |
690 | req->r_inode = inode; | 534 | req->r_inode = inode; |
691 | 535 | ||
692 | ret = ceph_osdc_start_request(&client->osdc, req, false); | 536 | ret = ceph_osdc_start_request(&fsc->client->osdc, req, false); |
693 | if (!ret) { | 537 | if (!ret) { |
694 | if (req->r_safe_callback) { | 538 | if (req->r_safe_callback) { |
695 | /* | 539 | /* |
@@ -697,15 +541,15 @@ more: | |||
697 | * start_request so that a tid has been assigned. | 541 | * start_request so that a tid has been assigned. |
698 | */ | 542 | */ |
699 | spin_lock(&ci->i_unsafe_lock); | 543 | spin_lock(&ci->i_unsafe_lock); |
700 | list_add(&ci->i_unsafe_writes, &req->r_unsafe_item); | 544 | list_add(&req->r_unsafe_item, &ci->i_unsafe_writes); |
701 | spin_unlock(&ci->i_unsafe_lock); | 545 | spin_unlock(&ci->i_unsafe_lock); |
702 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); | 546 | ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR); |
703 | } | 547 | } |
704 | ret = ceph_osdc_wait_request(&client->osdc, req); | 548 | ret = ceph_osdc_wait_request(&fsc->client->osdc, req); |
705 | } | 549 | } |
706 | 550 | ||
707 | if (file->f_flags & O_DIRECT) | 551 | if (file->f_flags & O_DIRECT) |
708 | put_page_vector(pages, num_pages); | 552 | ceph_put_page_vector(pages, num_pages); |
709 | else if (file->f_flags & O_SYNC) | 553 | else if (file->f_flags & O_SYNC) |
710 | ceph_release_page_vector(pages, num_pages); | 554 | ceph_release_page_vector(pages, num_pages); |
711 | 555 | ||
@@ -814,7 +658,8 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
814 | struct ceph_file_info *fi = file->private_data; | 658 | struct ceph_file_info *fi = file->private_data; |
815 | struct inode *inode = file->f_dentry->d_inode; | 659 | struct inode *inode = file->f_dentry->d_inode; |
816 | struct ceph_inode_info *ci = ceph_inode(inode); | 660 | struct ceph_inode_info *ci = ceph_inode(inode); |
817 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 661 | struct ceph_osd_client *osdc = |
662 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
818 | loff_t endoff = pos + iov->iov_len; | 663 | loff_t endoff = pos + iov->iov_len; |
819 | int want, got = 0; | 664 | int want, got = 0; |
820 | int ret, err; | 665 | int ret, err; |
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e7cca414da03..1d6a45b5a04c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/module.h> | 3 | #include <linux/module.h> |
4 | #include <linux/fs.h> | 4 | #include <linux/fs.h> |
@@ -13,7 +13,8 @@ | |||
13 | #include <linux/pagevec.h> | 13 | #include <linux/pagevec.h> |
14 | 14 | ||
15 | #include "super.h" | 15 | #include "super.h" |
16 | #include "decode.h" | 16 | #include "mds_client.h" |
17 | #include <linux/ceph/decode.h> | ||
17 | 18 | ||
18 | /* | 19 | /* |
19 | * Ceph inode operations | 20 | * Ceph inode operations |
@@ -384,7 +385,7 @@ void ceph_destroy_inode(struct inode *inode) | |||
384 | */ | 385 | */ |
385 | if (ci->i_snap_realm) { | 386 | if (ci->i_snap_realm) { |
386 | struct ceph_mds_client *mdsc = | 387 | struct ceph_mds_client *mdsc = |
387 | &ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; | 388 | ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc; |
388 | struct ceph_snap_realm *realm = ci->i_snap_realm; | 389 | struct ceph_snap_realm *realm = ci->i_snap_realm; |
389 | 390 | ||
390 | dout(" dropping residual ref to snap realm %p\n", realm); | 391 | dout(" dropping residual ref to snap realm %p\n", realm); |
@@ -685,7 +686,7 @@ static int fill_inode(struct inode *inode, | |||
685 | } | 686 | } |
686 | 687 | ||
687 | /* it may be better to set st_size in getattr instead? */ | 688 | /* it may be better to set st_size in getattr instead? */ |
688 | if (ceph_test_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) | 689 | if (ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), RBYTES)) |
689 | inode->i_size = ci->i_rbytes; | 690 | inode->i_size = ci->i_rbytes; |
690 | break; | 691 | break; |
691 | default: | 692 | default: |
@@ -845,7 +846,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) | |||
845 | * the caller) if we fail. | 846 | * the caller) if we fail. |
846 | */ | 847 | */ |
847 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | 848 | static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, |
848 | bool *prehash) | 849 | bool *prehash, bool set_offset) |
849 | { | 850 | { |
850 | struct dentry *realdn; | 851 | struct dentry *realdn; |
851 | 852 | ||
@@ -877,7 +878,8 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in, | |||
877 | } | 878 | } |
878 | if ((!prehash || *prehash) && d_unhashed(dn)) | 879 | if ((!prehash || *prehash) && d_unhashed(dn)) |
879 | d_rehash(dn); | 880 | d_rehash(dn); |
880 | ceph_set_dentry_offset(dn); | 881 | if (set_offset) |
882 | ceph_set_dentry_offset(dn); | ||
881 | out: | 883 | out: |
882 | return dn; | 884 | return dn; |
883 | } | 885 | } |
@@ -900,7 +902,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
900 | struct inode *in = NULL; | 902 | struct inode *in = NULL; |
901 | struct ceph_mds_reply_inode *ininfo; | 903 | struct ceph_mds_reply_inode *ininfo; |
902 | struct ceph_vino vino; | 904 | struct ceph_vino vino; |
903 | struct ceph_client *client = ceph_sb_to_client(sb); | 905 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
904 | int i = 0; | 906 | int i = 0; |
905 | int err = 0; | 907 | int err = 0; |
906 | 908 | ||
@@ -964,7 +966,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
964 | */ | 966 | */ |
965 | if (rinfo->head->is_dentry && !req->r_aborted && | 967 | if (rinfo->head->is_dentry && !req->r_aborted && |
966 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, | 968 | (rinfo->head->is_target || strncmp(req->r_dentry->d_name.name, |
967 | client->mount_args->snapdir_name, | 969 | fsc->mount_options->snapdir_name, |
968 | req->r_dentry->d_name.len))) { | 970 | req->r_dentry->d_name.len))) { |
969 | /* | 971 | /* |
970 | * lookup link rename : null -> possibly existing inode | 972 | * lookup link rename : null -> possibly existing inode |
@@ -1062,7 +1064,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1062 | d_delete(dn); | 1064 | d_delete(dn); |
1063 | goto done; | 1065 | goto done; |
1064 | } | 1066 | } |
1065 | dn = splice_dentry(dn, in, &have_lease); | 1067 | dn = splice_dentry(dn, in, &have_lease, true); |
1066 | if (IS_ERR(dn)) { | 1068 | if (IS_ERR(dn)) { |
1067 | err = PTR_ERR(dn); | 1069 | err = PTR_ERR(dn); |
1068 | goto done; | 1070 | goto done; |
@@ -1105,7 +1107,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1105 | goto done; | 1107 | goto done; |
1106 | } | 1108 | } |
1107 | dout(" linking snapped dir %p to dn %p\n", in, dn); | 1109 | dout(" linking snapped dir %p to dn %p\n", in, dn); |
1108 | dn = splice_dentry(dn, in, NULL); | 1110 | dn = splice_dentry(dn, in, NULL, true); |
1109 | if (IS_ERR(dn)) { | 1111 | if (IS_ERR(dn)) { |
1110 | err = PTR_ERR(dn); | 1112 | err = PTR_ERR(dn); |
1111 | goto done; | 1113 | goto done; |
@@ -1237,7 +1239,7 @@ retry_lookup: | |||
1237 | err = PTR_ERR(in); | 1239 | err = PTR_ERR(in); |
1238 | goto out; | 1240 | goto out; |
1239 | } | 1241 | } |
1240 | dn = splice_dentry(dn, in, NULL); | 1242 | dn = splice_dentry(dn, in, NULL, false); |
1241 | if (IS_ERR(dn)) | 1243 | if (IS_ERR(dn)) |
1242 | dn = NULL; | 1244 | dn = NULL; |
1243 | } | 1245 | } |
@@ -1532,7 +1534,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1532 | struct inode *parent_inode = dentry->d_parent->d_inode; | 1534 | struct inode *parent_inode = dentry->d_parent->d_inode; |
1533 | const unsigned int ia_valid = attr->ia_valid; | 1535 | const unsigned int ia_valid = attr->ia_valid; |
1534 | struct ceph_mds_request *req; | 1536 | struct ceph_mds_request *req; |
1535 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(dentry->d_sb)->mdsc; | 1537 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
1536 | int issued; | 1538 | int issued; |
1537 | int release = 0, dirtied = 0; | 1539 | int release = 0, dirtied = 0; |
1538 | int mask = 0; | 1540 | int mask = 0; |
@@ -1727,8 +1729,8 @@ out: | |||
1727 | */ | 1729 | */ |
1728 | int ceph_do_getattr(struct inode *inode, int mask) | 1730 | int ceph_do_getattr(struct inode *inode, int mask) |
1729 | { | 1731 | { |
1730 | struct ceph_client *client = ceph_sb_to_client(inode->i_sb); | 1732 | struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb); |
1731 | struct ceph_mds_client *mdsc = &client->mdsc; | 1733 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1732 | struct ceph_mds_request *req; | 1734 | struct ceph_mds_request *req; |
1733 | int err; | 1735 | int err; |
1734 | 1736 | ||
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index 76e307d2aba1..8888c9ba68db 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -1,8 +1,10 @@ | |||
1 | #include <linux/in.h> | 1 | #include <linux/in.h> |
2 | 2 | ||
3 | #include "ioctl.h" | ||
4 | #include "super.h" | 3 | #include "super.h" |
5 | #include "ceph_debug.h" | 4 | #include "mds_client.h" |
5 | #include <linux/ceph/ceph_debug.h> | ||
6 | |||
7 | #include "ioctl.h" | ||
6 | 8 | ||
7 | 9 | ||
8 | /* | 10 | /* |
@@ -37,7 +39,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
37 | { | 39 | { |
38 | struct inode *inode = file->f_dentry->d_inode; | 40 | struct inode *inode = file->f_dentry->d_inode; |
39 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; | 41 | struct inode *parent_inode = file->f_dentry->d_parent->d_inode; |
40 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 42 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
41 | struct ceph_mds_request *req; | 43 | struct ceph_mds_request *req; |
42 | struct ceph_ioctl_layout l; | 44 | struct ceph_ioctl_layout l; |
43 | int err, i; | 45 | int err, i; |
@@ -90,6 +92,68 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
90 | } | 92 | } |
91 | 93 | ||
92 | /* | 94 | /* |
95 | * Set a layout policy on a directory inode. All items in the tree | ||
96 | * rooted at this inode will inherit this layout on creation, | ||
97 | * (It doesn't apply retroactively ) | ||
98 | * unless a subdirectory has its own layout policy. | ||
99 | */ | ||
100 | static long ceph_ioctl_set_layout_policy (struct file *file, void __user *arg) | ||
101 | { | ||
102 | struct inode *inode = file->f_dentry->d_inode; | ||
103 | struct ceph_mds_request *req; | ||
104 | struct ceph_ioctl_layout l; | ||
105 | int err, i; | ||
106 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | ||
107 | |||
108 | /* copy and validate */ | ||
109 | if (copy_from_user(&l, arg, sizeof(l))) | ||
110 | return -EFAULT; | ||
111 | |||
112 | if ((l.object_size & ~PAGE_MASK) || | ||
113 | (l.stripe_unit & ~PAGE_MASK) || | ||
114 | !l.stripe_unit || | ||
115 | (l.object_size && | ||
116 | (unsigned)l.object_size % (unsigned)l.stripe_unit)) | ||
117 | return -EINVAL; | ||
118 | |||
119 | /* make sure it's a valid data pool */ | ||
120 | if (l.data_pool > 0) { | ||
121 | mutex_lock(&mdsc->mutex); | ||
122 | err = -EINVAL; | ||
123 | for (i = 0; i < mdsc->mdsmap->m_num_data_pg_pools; i++) | ||
124 | if (mdsc->mdsmap->m_data_pg_pools[i] == l.data_pool) { | ||
125 | err = 0; | ||
126 | break; | ||
127 | } | ||
128 | mutex_unlock(&mdsc->mutex); | ||
129 | if (err) | ||
130 | return err; | ||
131 | } | ||
132 | |||
133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETDIRLAYOUT, | ||
134 | USE_AUTH_MDS); | ||
135 | |||
136 | if (IS_ERR(req)) | ||
137 | return PTR_ERR(req); | ||
138 | req->r_inode = igrab(inode); | ||
139 | |||
140 | req->r_args.setlayout.layout.fl_stripe_unit = | ||
141 | cpu_to_le32(l.stripe_unit); | ||
142 | req->r_args.setlayout.layout.fl_stripe_count = | ||
143 | cpu_to_le32(l.stripe_count); | ||
144 | req->r_args.setlayout.layout.fl_object_size = | ||
145 | cpu_to_le32(l.object_size); | ||
146 | req->r_args.setlayout.layout.fl_pg_pool = | ||
147 | cpu_to_le32(l.data_pool); | ||
148 | req->r_args.setlayout.layout.fl_pg_preferred = | ||
149 | cpu_to_le32(l.preferred_osd); | ||
150 | |||
151 | err = ceph_mdsc_do_request(mdsc, inode, req); | ||
152 | ceph_mdsc_put_request(req); | ||
153 | return err; | ||
154 | } | ||
155 | |||
156 | /* | ||
93 | * Return object name, size/offset information, and location (OSD | 157 | * Return object name, size/offset information, and location (OSD |
94 | * number, network address) for a given file offset. | 158 | * number, network address) for a given file offset. |
95 | */ | 159 | */ |
@@ -98,7 +162,8 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) | |||
98 | struct ceph_ioctl_dataloc dl; | 162 | struct ceph_ioctl_dataloc dl; |
99 | struct inode *inode = file->f_dentry->d_inode; | 163 | struct inode *inode = file->f_dentry->d_inode; |
100 | struct ceph_inode_info *ci = ceph_inode(inode); | 164 | struct ceph_inode_info *ci = ceph_inode(inode); |
101 | struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->osdc; | 165 | struct ceph_osd_client *osdc = |
166 | &ceph_sb_to_client(inode->i_sb)->client->osdc; | ||
102 | u64 len = 1, olen; | 167 | u64 len = 1, olen; |
103 | u64 tmp; | 168 | u64 tmp; |
104 | struct ceph_object_layout ol; | 169 | struct ceph_object_layout ol; |
@@ -174,11 +239,15 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
174 | case CEPH_IOC_SET_LAYOUT: | 239 | case CEPH_IOC_SET_LAYOUT: |
175 | return ceph_ioctl_set_layout(file, (void __user *)arg); | 240 | return ceph_ioctl_set_layout(file, (void __user *)arg); |
176 | 241 | ||
242 | case CEPH_IOC_SET_LAYOUT_POLICY: | ||
243 | return ceph_ioctl_set_layout_policy(file, (void __user *)arg); | ||
244 | |||
177 | case CEPH_IOC_GET_DATALOC: | 245 | case CEPH_IOC_GET_DATALOC: |
178 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); | 246 | return ceph_ioctl_get_dataloc(file, (void __user *)arg); |
179 | 247 | ||
180 | case CEPH_IOC_LAZYIO: | 248 | case CEPH_IOC_LAZYIO: |
181 | return ceph_ioctl_lazyio(file); | 249 | return ceph_ioctl_lazyio(file); |
182 | } | 250 | } |
251 | |||
183 | return -ENOTTY; | 252 | return -ENOTTY; |
184 | } | 253 | } |
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 88451a3b6857..a6ce54e94eb5 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h | |||
@@ -4,7 +4,7 @@ | |||
4 | #include <linux/ioctl.h> | 4 | #include <linux/ioctl.h> |
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
6 | 6 | ||
7 | #define CEPH_IOCTL_MAGIC 0x97 | 7 | #define CEPH_IOCTL_MAGIC 0x98 |
8 | 8 | ||
9 | /* just use u64 to align sanely on all archs */ | 9 | /* just use u64 to align sanely on all archs */ |
10 | struct ceph_ioctl_layout { | 10 | struct ceph_ioctl_layout { |
@@ -17,6 +17,8 @@ struct ceph_ioctl_layout { | |||
17 | struct ceph_ioctl_layout) | 17 | struct ceph_ioctl_layout) |
18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ | 18 | #define CEPH_IOC_SET_LAYOUT _IOW(CEPH_IOCTL_MAGIC, 2, \ |
19 | struct ceph_ioctl_layout) | 19 | struct ceph_ioctl_layout) |
20 | #define CEPH_IOC_SET_LAYOUT_POLICY _IOW(CEPH_IOCTL_MAGIC, 5, \ | ||
21 | struct ceph_ioctl_layout) | ||
20 | 22 | ||
21 | /* | 23 | /* |
22 | * Extract identity, address of the OSD and object storing a given | 24 | * Extract identity, address of the OSD and object storing a given |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ff4e753aae92..40abde93c345 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -1,11 +1,11 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
5 | 5 | ||
6 | #include "super.h" | 6 | #include "super.h" |
7 | #include "mds_client.h" | 7 | #include "mds_client.h" |
8 | #include "pagelist.h" | 8 | #include <linux/ceph/pagelist.h> |
9 | 9 | ||
10 | /** | 10 | /** |
11 | * Implement fcntl and flock locking functions. | 11 | * Implement fcntl and flock locking functions. |
@@ -16,7 +16,7 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
16 | { | 16 | { |
17 | struct inode *inode = file->f_dentry->d_inode; | 17 | struct inode *inode = file->f_dentry->d_inode; |
18 | struct ceph_mds_client *mdsc = | 18 | struct ceph_mds_client *mdsc = |
19 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 19 | ceph_sb_to_client(inode->i_sb)->mdsc; |
20 | struct ceph_mds_request *req; | 20 | struct ceph_mds_request *req; |
21 | int err; | 21 | int err; |
22 | 22 | ||
@@ -181,8 +181,9 @@ void ceph_count_locks(struct inode *inode, int *fcntl_count, int *flock_count) | |||
181 | * Encode the flock and fcntl locks for the given inode into the pagelist. | 181 | * Encode the flock and fcntl locks for the given inode into the pagelist. |
182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, | 182 | * Format is: #fcntl locks, sequential fcntl locks, #flock locks, |
183 | * sequential flock locks. | 183 | * sequential flock locks. |
184 | * Must be called with BLK already held, and the lock numbers should have | 184 | * Must be called with lock_flocks() already held. |
185 | * been gathered under the same lock holding window. | 185 | * If we encounter more of a specific lock type than expected, |
186 | * we return the value 1. | ||
186 | */ | 187 | */ |
187 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | 188 | int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, |
188 | int num_fcntl_locks, int num_flock_locks) | 189 | int num_fcntl_locks, int num_flock_locks) |
@@ -190,6 +191,8 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
190 | struct file_lock *lock; | 191 | struct file_lock *lock; |
191 | struct ceph_filelock cephlock; | 192 | struct ceph_filelock cephlock; |
192 | int err = 0; | 193 | int err = 0; |
194 | int seen_fcntl = 0; | ||
195 | int seen_flock = 0; | ||
193 | 196 | ||
194 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, | 197 | dout("encoding %d flock and %d fcntl locks", num_flock_locks, |
195 | num_fcntl_locks); | 198 | num_fcntl_locks); |
@@ -198,6 +201,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
198 | goto fail; | 201 | goto fail; |
199 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 202 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
200 | if (lock->fl_flags & FL_POSIX) { | 203 | if (lock->fl_flags & FL_POSIX) { |
204 | ++seen_fcntl; | ||
205 | if (seen_fcntl > num_fcntl_locks) { | ||
206 | err = -ENOSPC; | ||
207 | goto fail; | ||
208 | } | ||
201 | err = lock_to_ceph_filelock(lock, &cephlock); | 209 | err = lock_to_ceph_filelock(lock, &cephlock); |
202 | if (err) | 210 | if (err) |
203 | goto fail; | 211 | goto fail; |
@@ -213,6 +221,11 @@ int ceph_encode_locks(struct inode *inode, struct ceph_pagelist *pagelist, | |||
213 | goto fail; | 221 | goto fail; |
214 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { | 222 | for (lock = inode->i_flock; lock != NULL; lock = lock->fl_next) { |
215 | if (lock->fl_flags & FL_FLOCK) { | 223 | if (lock->fl_flags & FL_FLOCK) { |
224 | ++seen_flock; | ||
225 | if (seen_flock > num_flock_locks) { | ||
226 | err = -ENOSPC; | ||
227 | goto fail; | ||
228 | } | ||
216 | err = lock_to_ceph_filelock(lock, &cephlock); | 229 | err = lock_to_ceph_filelock(lock, &cephlock); |
217 | if (err) | 230 | if (err) |
218 | goto fail; | 231 | goto fail; |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f091b1351786..3142b15940c2 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -1,17 +1,21 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/fs.h> | ||
3 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
4 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
5 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | ||
8 | #include <linux/seq_file.h> | ||
6 | #include <linux/smp_lock.h> | 9 | #include <linux/smp_lock.h> |
7 | 10 | ||
8 | #include "mds_client.h" | ||
9 | #include "mon_client.h" | ||
10 | #include "super.h" | 11 | #include "super.h" |
11 | #include "messenger.h" | 12 | #include "mds_client.h" |
12 | #include "decode.h" | 13 | |
13 | #include "auth.h" | 14 | #include <linux/ceph/messenger.h> |
14 | #include "pagelist.h" | 15 | #include <linux/ceph/decode.h> |
16 | #include <linux/ceph/pagelist.h> | ||
17 | #include <linux/ceph/auth.h> | ||
18 | #include <linux/ceph/debugfs.h> | ||
15 | 19 | ||
16 | /* | 20 | /* |
17 | * A cluster of MDS (metadata server) daemons is responsible for | 21 | * A cluster of MDS (metadata server) daemons is responsible for |
@@ -286,8 +290,9 @@ void ceph_put_mds_session(struct ceph_mds_session *s) | |||
286 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); | 290 | atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1); |
287 | if (atomic_dec_and_test(&s->s_ref)) { | 291 | if (atomic_dec_and_test(&s->s_ref)) { |
288 | if (s->s_authorizer) | 292 | if (s->s_authorizer) |
289 | s->s_mdsc->client->monc.auth->ops->destroy_authorizer( | 293 | s->s_mdsc->fsc->client->monc.auth->ops->destroy_authorizer( |
290 | s->s_mdsc->client->monc.auth, s->s_authorizer); | 294 | s->s_mdsc->fsc->client->monc.auth, |
295 | s->s_authorizer); | ||
291 | kfree(s); | 296 | kfree(s); |
292 | } | 297 | } |
293 | } | 298 | } |
@@ -344,7 +349,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, | |||
344 | s->s_seq = 0; | 349 | s->s_seq = 0; |
345 | mutex_init(&s->s_mutex); | 350 | mutex_init(&s->s_mutex); |
346 | 351 | ||
347 | ceph_con_init(mdsc->client->msgr, &s->s_con); | 352 | ceph_con_init(mdsc->fsc->client->msgr, &s->s_con); |
348 | s->s_con.private = s; | 353 | s->s_con.private = s; |
349 | s->s_con.ops = &mds_con_ops; | 354 | s->s_con.ops = &mds_con_ops; |
350 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; | 355 | s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS; |
@@ -599,7 +604,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, | |||
599 | } else if (req->r_dentry) { | 604 | } else if (req->r_dentry) { |
600 | struct inode *dir = req->r_dentry->d_parent->d_inode; | 605 | struct inode *dir = req->r_dentry->d_parent->d_inode; |
601 | 606 | ||
602 | if (dir->i_sb != mdsc->client->sb) { | 607 | if (dir->i_sb != mdsc->fsc->sb) { |
603 | /* not this fs! */ | 608 | /* not this fs! */ |
604 | inode = req->r_dentry->d_inode; | 609 | inode = req->r_dentry->d_inode; |
605 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { | 610 | } else if (ceph_snap(dir) != CEPH_NOSNAP) { |
@@ -884,7 +889,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
884 | __ceph_remove_cap(cap); | 889 | __ceph_remove_cap(cap); |
885 | if (!__ceph_is_any_real_caps(ci)) { | 890 | if (!__ceph_is_any_real_caps(ci)) { |
886 | struct ceph_mds_client *mdsc = | 891 | struct ceph_mds_client *mdsc = |
887 | &ceph_sb_to_client(inode->i_sb)->mdsc; | 892 | ceph_sb_to_client(inode->i_sb)->mdsc; |
888 | 893 | ||
889 | spin_lock(&mdsc->cap_dirty_lock); | 894 | spin_lock(&mdsc->cap_dirty_lock); |
890 | if (!list_empty(&ci->i_dirty_item)) { | 895 | if (!list_empty(&ci->i_dirty_item)) { |
@@ -1146,7 +1151,7 @@ int ceph_add_cap_releases(struct ceph_mds_client *mdsc, | |||
1146 | struct ceph_msg *msg, *partial = NULL; | 1151 | struct ceph_msg *msg, *partial = NULL; |
1147 | struct ceph_mds_cap_release *head; | 1152 | struct ceph_mds_cap_release *head; |
1148 | int err = -ENOMEM; | 1153 | int err = -ENOMEM; |
1149 | int extra = mdsc->client->mount_args->cap_release_safety; | 1154 | int extra = mdsc->fsc->mount_options->cap_release_safety; |
1150 | int num; | 1155 | int num; |
1151 | 1156 | ||
1152 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, | 1157 | dout("add_cap_releases %p mds%d extra %d\n", session, session->s_mds, |
@@ -2085,7 +2090,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) | |||
2085 | 2090 | ||
2086 | /* insert trace into our cache */ | 2091 | /* insert trace into our cache */ |
2087 | mutex_lock(&req->r_fill_mutex); | 2092 | mutex_lock(&req->r_fill_mutex); |
2088 | err = ceph_fill_trace(mdsc->client->sb, req, req->r_session); | 2093 | err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session); |
2089 | if (err == 0) { | 2094 | if (err == 0) { |
2090 | if (result == 0 && rinfo->dir_nr) | 2095 | if (result == 0 && rinfo->dir_nr) |
2091 | ceph_readdir_prepopulate(req, req->r_session); | 2096 | ceph_readdir_prepopulate(req, req->r_session); |
@@ -2361,19 +2366,37 @@ static int encode_caps_cb(struct inode *inode, struct ceph_cap *cap, | |||
2361 | 2366 | ||
2362 | if (recon_state->flock) { | 2367 | if (recon_state->flock) { |
2363 | int num_fcntl_locks, num_flock_locks; | 2368 | int num_fcntl_locks, num_flock_locks; |
2364 | 2369 | struct ceph_pagelist_cursor trunc_point; | |
2365 | lock_kernel(); | 2370 | |
2366 | ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks); | 2371 | ceph_pagelist_set_cursor(pagelist, &trunc_point); |
2367 | rec.v2.flock_len = (2*sizeof(u32) + | 2372 | do { |
2368 | (num_fcntl_locks+num_flock_locks) * | 2373 | lock_flocks(); |
2369 | sizeof(struct ceph_filelock)); | 2374 | ceph_count_locks(inode, &num_fcntl_locks, |
2370 | 2375 | &num_flock_locks); | |
2376 | rec.v2.flock_len = (2*sizeof(u32) + | ||
2377 | (num_fcntl_locks+num_flock_locks) * | ||
2378 | sizeof(struct ceph_filelock)); | ||
2379 | unlock_flocks(); | ||
2380 | |||
2381 | /* pre-alloc pagelist */ | ||
2382 | ceph_pagelist_truncate(pagelist, &trunc_point); | ||
2383 | err = ceph_pagelist_append(pagelist, &rec, reclen); | ||
2384 | if (!err) | ||
2385 | err = ceph_pagelist_reserve(pagelist, | ||
2386 | rec.v2.flock_len); | ||
2387 | |||
2388 | /* encode locks */ | ||
2389 | if (!err) { | ||
2390 | lock_flocks(); | ||
2391 | err = ceph_encode_locks(inode, | ||
2392 | pagelist, | ||
2393 | num_fcntl_locks, | ||
2394 | num_flock_locks); | ||
2395 | unlock_flocks(); | ||
2396 | } | ||
2397 | } while (err == -ENOSPC); | ||
2398 | } else { | ||
2371 | err = ceph_pagelist_append(pagelist, &rec, reclen); | 2399 | err = ceph_pagelist_append(pagelist, &rec, reclen); |
2372 | if (!err) | ||
2373 | err = ceph_encode_locks(inode, pagelist, | ||
2374 | num_fcntl_locks, | ||
2375 | num_flock_locks); | ||
2376 | unlock_kernel(); | ||
2377 | } | 2400 | } |
2378 | 2401 | ||
2379 | out_free: | 2402 | out_free: |
@@ -2611,7 +2634,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
2611 | struct ceph_mds_session *session, | 2634 | struct ceph_mds_session *session, |
2612 | struct ceph_msg *msg) | 2635 | struct ceph_msg *msg) |
2613 | { | 2636 | { |
2614 | struct super_block *sb = mdsc->client->sb; | 2637 | struct super_block *sb = mdsc->fsc->sb; |
2615 | struct inode *inode; | 2638 | struct inode *inode; |
2616 | struct ceph_inode_info *ci; | 2639 | struct ceph_inode_info *ci; |
2617 | struct dentry *parent, *dentry; | 2640 | struct dentry *parent, *dentry; |
@@ -2889,10 +2912,16 @@ static void delayed_work(struct work_struct *work) | |||
2889 | schedule_delayed(mdsc); | 2912 | schedule_delayed(mdsc); |
2890 | } | 2913 | } |
2891 | 2914 | ||
2915 | int ceph_mdsc_init(struct ceph_fs_client *fsc) | ||
2892 | 2916 | ||
2893 | int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | ||
2894 | { | 2917 | { |
2895 | mdsc->client = client; | 2918 | struct ceph_mds_client *mdsc; |
2919 | |||
2920 | mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS); | ||
2921 | if (!mdsc) | ||
2922 | return -ENOMEM; | ||
2923 | mdsc->fsc = fsc; | ||
2924 | fsc->mdsc = mdsc; | ||
2896 | mutex_init(&mdsc->mutex); | 2925 | mutex_init(&mdsc->mutex); |
2897 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); | 2926 | mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); |
2898 | if (mdsc->mdsmap == NULL) | 2927 | if (mdsc->mdsmap == NULL) |
@@ -2925,7 +2954,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2925 | INIT_LIST_HEAD(&mdsc->dentry_lru); | 2954 | INIT_LIST_HEAD(&mdsc->dentry_lru); |
2926 | 2955 | ||
2927 | ceph_caps_init(mdsc); | 2956 | ceph_caps_init(mdsc); |
2928 | ceph_adjust_min_caps(mdsc, client->min_caps); | 2957 | ceph_adjust_min_caps(mdsc, fsc->min_caps); |
2929 | 2958 | ||
2930 | return 0; | 2959 | return 0; |
2931 | } | 2960 | } |
@@ -2937,7 +2966,7 @@ int ceph_mdsc_init(struct ceph_mds_client *mdsc, struct ceph_client *client) | |||
2937 | static void wait_requests(struct ceph_mds_client *mdsc) | 2966 | static void wait_requests(struct ceph_mds_client *mdsc) |
2938 | { | 2967 | { |
2939 | struct ceph_mds_request *req; | 2968 | struct ceph_mds_request *req; |
2940 | struct ceph_client *client = mdsc->client; | 2969 | struct ceph_fs_client *fsc = mdsc->fsc; |
2941 | 2970 | ||
2942 | mutex_lock(&mdsc->mutex); | 2971 | mutex_lock(&mdsc->mutex); |
2943 | if (__get_oldest_req(mdsc)) { | 2972 | if (__get_oldest_req(mdsc)) { |
@@ -2945,7 +2974,7 @@ static void wait_requests(struct ceph_mds_client *mdsc) | |||
2945 | 2974 | ||
2946 | dout("wait_requests waiting for requests\n"); | 2975 | dout("wait_requests waiting for requests\n"); |
2947 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, | 2976 | wait_for_completion_timeout(&mdsc->safe_umount_waiters, |
2948 | client->mount_args->mount_timeout * HZ); | 2977 | fsc->client->options->mount_timeout * HZ); |
2949 | 2978 | ||
2950 | /* tear down remaining requests */ | 2979 | /* tear down remaining requests */ |
2951 | mutex_lock(&mdsc->mutex); | 2980 | mutex_lock(&mdsc->mutex); |
@@ -3028,7 +3057,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc) | |||
3028 | { | 3057 | { |
3029 | u64 want_tid, want_flush; | 3058 | u64 want_tid, want_flush; |
3030 | 3059 | ||
3031 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3060 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
3032 | return; | 3061 | return; |
3033 | 3062 | ||
3034 | dout("sync\n"); | 3063 | dout("sync\n"); |
@@ -3051,7 +3080,7 @@ bool done_closing_sessions(struct ceph_mds_client *mdsc) | |||
3051 | { | 3080 | { |
3052 | int i, n = 0; | 3081 | int i, n = 0; |
3053 | 3082 | ||
3054 | if (mdsc->client->mount_state == CEPH_MOUNT_SHUTDOWN) | 3083 | if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN) |
3055 | return true; | 3084 | return true; |
3056 | 3085 | ||
3057 | mutex_lock(&mdsc->mutex); | 3086 | mutex_lock(&mdsc->mutex); |
@@ -3069,8 +3098,8 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3069 | { | 3098 | { |
3070 | struct ceph_mds_session *session; | 3099 | struct ceph_mds_session *session; |
3071 | int i; | 3100 | int i; |
3072 | struct ceph_client *client = mdsc->client; | 3101 | struct ceph_fs_client *fsc = mdsc->fsc; |
3073 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | 3102 | unsigned long timeout = fsc->client->options->mount_timeout * HZ; |
3074 | 3103 | ||
3075 | dout("close_sessions\n"); | 3104 | dout("close_sessions\n"); |
3076 | 3105 | ||
@@ -3117,7 +3146,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc) | |||
3117 | dout("stopped\n"); | 3146 | dout("stopped\n"); |
3118 | } | 3147 | } |
3119 | 3148 | ||
3120 | void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | 3149 | static void ceph_mdsc_stop(struct ceph_mds_client *mdsc) |
3121 | { | 3150 | { |
3122 | dout("stop\n"); | 3151 | dout("stop\n"); |
3123 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ | 3152 | cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */ |
@@ -3127,6 +3156,15 @@ void ceph_mdsc_stop(struct ceph_mds_client *mdsc) | |||
3127 | ceph_caps_finalize(mdsc); | 3156 | ceph_caps_finalize(mdsc); |
3128 | } | 3157 | } |
3129 | 3158 | ||
3159 | void ceph_mdsc_destroy(struct ceph_fs_client *fsc) | ||
3160 | { | ||
3161 | struct ceph_mds_client *mdsc = fsc->mdsc; | ||
3162 | |||
3163 | ceph_mdsc_stop(mdsc); | ||
3164 | fsc->mdsc = NULL; | ||
3165 | kfree(mdsc); | ||
3166 | } | ||
3167 | |||
3130 | 3168 | ||
3131 | /* | 3169 | /* |
3132 | * handle mds map update. | 3170 | * handle mds map update. |
@@ -3143,14 +3181,14 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
3143 | 3181 | ||
3144 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); | 3182 | ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad); |
3145 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | 3183 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); |
3146 | if (ceph_check_fsid(mdsc->client, &fsid) < 0) | 3184 | if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0) |
3147 | return; | 3185 | return; |
3148 | epoch = ceph_decode_32(&p); | 3186 | epoch = ceph_decode_32(&p); |
3149 | maplen = ceph_decode_32(&p); | 3187 | maplen = ceph_decode_32(&p); |
3150 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); | 3188 | dout("handle_map epoch %u len %d\n", epoch, (int)maplen); |
3151 | 3189 | ||
3152 | /* do we need it? */ | 3190 | /* do we need it? */ |
3153 | ceph_monc_got_mdsmap(&mdsc->client->monc, epoch); | 3191 | ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch); |
3154 | mutex_lock(&mdsc->mutex); | 3192 | mutex_lock(&mdsc->mutex); |
3155 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { | 3193 | if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) { |
3156 | dout("handle_map epoch %u <= our %u\n", | 3194 | dout("handle_map epoch %u <= our %u\n", |
@@ -3174,7 +3212,7 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg) | |||
3174 | } else { | 3212 | } else { |
3175 | mdsc->mdsmap = newmap; /* first mds map */ | 3213 | mdsc->mdsmap = newmap; /* first mds map */ |
3176 | } | 3214 | } |
3177 | mdsc->client->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; | 3215 | mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size; |
3178 | 3216 | ||
3179 | __wake_requests(mdsc, &mdsc->waiting_for_map); | 3217 | __wake_requests(mdsc, &mdsc->waiting_for_map); |
3180 | 3218 | ||
@@ -3275,7 +3313,7 @@ static int get_authorizer(struct ceph_connection *con, | |||
3275 | { | 3313 | { |
3276 | struct ceph_mds_session *s = con->private; | 3314 | struct ceph_mds_session *s = con->private; |
3277 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3315 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3278 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3316 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3279 | int ret = 0; | 3317 | int ret = 0; |
3280 | 3318 | ||
3281 | if (force_new && s->s_authorizer) { | 3319 | if (force_new && s->s_authorizer) { |
@@ -3309,7 +3347,7 @@ static int verify_authorizer_reply(struct ceph_connection *con, int len) | |||
3309 | { | 3347 | { |
3310 | struct ceph_mds_session *s = con->private; | 3348 | struct ceph_mds_session *s = con->private; |
3311 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3349 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3312 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3350 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3313 | 3351 | ||
3314 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); | 3352 | return ac->ops->verify_authorizer_reply(ac, s->s_authorizer, len); |
3315 | } | 3353 | } |
@@ -3318,12 +3356,12 @@ static int invalidate_authorizer(struct ceph_connection *con) | |||
3318 | { | 3356 | { |
3319 | struct ceph_mds_session *s = con->private; | 3357 | struct ceph_mds_session *s = con->private; |
3320 | struct ceph_mds_client *mdsc = s->s_mdsc; | 3358 | struct ceph_mds_client *mdsc = s->s_mdsc; |
3321 | struct ceph_auth_client *ac = mdsc->client->monc.auth; | 3359 | struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth; |
3322 | 3360 | ||
3323 | if (ac->ops->invalidate_authorizer) | 3361 | if (ac->ops->invalidate_authorizer) |
3324 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); | 3362 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS); |
3325 | 3363 | ||
3326 | return ceph_monc_validate_auth(&mdsc->client->monc); | 3364 | return ceph_monc_validate_auth(&mdsc->fsc->client->monc); |
3327 | } | 3365 | } |
3328 | 3366 | ||
3329 | static const struct ceph_connection_operations mds_con_ops = { | 3367 | static const struct ceph_connection_operations mds_con_ops = { |
@@ -3336,7 +3374,4 @@ static const struct ceph_connection_operations mds_con_ops = { | |||
3336 | .peer_reset = peer_reset, | 3374 | .peer_reset = peer_reset, |
3337 | }; | 3375 | }; |
3338 | 3376 | ||
3339 | |||
3340 | |||
3341 | |||
3342 | /* eof */ | 3377 | /* eof */ |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index c98267ce6d2a..d66d63c72355 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -8,9 +8,9 @@ | |||
8 | #include <linux/rbtree.h> | 8 | #include <linux/rbtree.h> |
9 | #include <linux/spinlock.h> | 9 | #include <linux/spinlock.h> |
10 | 10 | ||
11 | #include "types.h" | 11 | #include <linux/ceph/types.h> |
12 | #include "messenger.h" | 12 | #include <linux/ceph/messenger.h> |
13 | #include "mdsmap.h" | 13 | #include <linux/ceph/mdsmap.h> |
14 | 14 | ||
15 | /* | 15 | /* |
16 | * Some lock dependencies: | 16 | * Some lock dependencies: |
@@ -26,7 +26,7 @@ | |||
26 | * | 26 | * |
27 | */ | 27 | */ |
28 | 28 | ||
29 | struct ceph_client; | 29 | struct ceph_fs_client; |
30 | struct ceph_cap; | 30 | struct ceph_cap; |
31 | 31 | ||
32 | /* | 32 | /* |
@@ -230,7 +230,7 @@ struct ceph_mds_request { | |||
230 | * mds client state | 230 | * mds client state |
231 | */ | 231 | */ |
232 | struct ceph_mds_client { | 232 | struct ceph_mds_client { |
233 | struct ceph_client *client; | 233 | struct ceph_fs_client *fsc; |
234 | struct mutex mutex; /* all nested structures */ | 234 | struct mutex mutex; /* all nested structures */ |
235 | 235 | ||
236 | struct ceph_mdsmap *mdsmap; | 236 | struct ceph_mdsmap *mdsmap; |
@@ -289,11 +289,6 @@ struct ceph_mds_client { | |||
289 | int caps_avail_count; /* unused, unreserved */ | 289 | int caps_avail_count; /* unused, unreserved */ |
290 | int caps_min_count; /* keep at least this many | 290 | int caps_min_count; /* keep at least this many |
291 | (unreserved) */ | 291 | (unreserved) */ |
292 | |||
293 | #ifdef CONFIG_DEBUG_FS | ||
294 | struct dentry *debugfs_file; | ||
295 | #endif | ||
296 | |||
297 | spinlock_t dentry_lru_lock; | 292 | spinlock_t dentry_lru_lock; |
298 | struct list_head dentry_lru; | 293 | struct list_head dentry_lru; |
299 | int num_dentry; | 294 | int num_dentry; |
@@ -316,10 +311,9 @@ extern void ceph_put_mds_session(struct ceph_mds_session *s); | |||
316 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, | 311 | extern int ceph_send_msg_mds(struct ceph_mds_client *mdsc, |
317 | struct ceph_msg *msg, int mds); | 312 | struct ceph_msg *msg, int mds); |
318 | 313 | ||
319 | extern int ceph_mdsc_init(struct ceph_mds_client *mdsc, | 314 | extern int ceph_mdsc_init(struct ceph_fs_client *fsc); |
320 | struct ceph_client *client); | ||
321 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); | 315 | extern void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc); |
322 | extern void ceph_mdsc_stop(struct ceph_mds_client *mdsc); | 316 | extern void ceph_mdsc_destroy(struct ceph_fs_client *fsc); |
323 | 317 | ||
324 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); | 318 | extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); |
325 | 319 | ||
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 040be6d1150b..73b7d44e8a35 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/bug.h> | 3 | #include <linux/bug.h> |
4 | #include <linux/err.h> | 4 | #include <linux/err.h> |
@@ -6,9 +6,9 @@ | |||
6 | #include <linux/slab.h> | 6 | #include <linux/slab.h> |
7 | #include <linux/types.h> | 7 | #include <linux/types.h> |
8 | 8 | ||
9 | #include "mdsmap.h" | 9 | #include <linux/ceph/mdsmap.h> |
10 | #include "messenger.h" | 10 | #include <linux/ceph/messenger.h> |
11 | #include "decode.h" | 11 | #include <linux/ceph/decode.h> |
12 | 12 | ||
13 | #include "super.h" | 13 | #include "super.h" |
14 | 14 | ||
@@ -117,7 +117,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) | |||
117 | } | 117 | } |
118 | 118 | ||
119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", | 119 | dout("mdsmap_decode %d/%d %lld mds%d.%d %s %s\n", |
120 | i+1, n, global_id, mds, inc, pr_addr(&addr.in_addr), | 120 | i+1, n, global_id, mds, inc, |
121 | ceph_pr_addr(&addr.in_addr), | ||
121 | ceph_mds_state_name(state)); | 122 | ceph_mds_state_name(state)); |
122 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { | 123 | if (mds >= 0 && mds < m->m_max_mds && state > 0) { |
123 | m->m_info[mds].global_id = global_id; | 124 | m->m_info[mds].global_id = global_id; |
diff --git a/fs/ceph/mdsmap.h b/fs/ceph/mdsmap.h deleted file mode 100644 index 4c5cb0880bba..000000000000 --- a/fs/ceph/mdsmap.h +++ /dev/null | |||
@@ -1,62 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_MDSMAP_H | ||
2 | #define _FS_CEPH_MDSMAP_H | ||
3 | |||
4 | #include "types.h" | ||
5 | |||
6 | /* | ||
7 | * mds map - describe servers in the mds cluster. | ||
8 | * | ||
9 | * we limit fields to those the client actually xcares about | ||
10 | */ | ||
11 | struct ceph_mds_info { | ||
12 | u64 global_id; | ||
13 | struct ceph_entity_addr addr; | ||
14 | s32 state; | ||
15 | int num_export_targets; | ||
16 | bool laggy; | ||
17 | u32 *export_targets; | ||
18 | }; | ||
19 | |||
20 | struct ceph_mdsmap { | ||
21 | u32 m_epoch, m_client_epoch, m_last_failure; | ||
22 | u32 m_root; | ||
23 | u32 m_session_timeout; /* seconds */ | ||
24 | u32 m_session_autoclose; /* seconds */ | ||
25 | u64 m_max_file_size; | ||
26 | u32 m_max_mds; /* size of m_addr, m_state arrays */ | ||
27 | struct ceph_mds_info *m_info; | ||
28 | |||
29 | /* which object pools file data can be stored in */ | ||
30 | int m_num_data_pg_pools; | ||
31 | u32 *m_data_pg_pools; | ||
32 | u32 m_cas_pg_pool; | ||
33 | }; | ||
34 | |||
35 | static inline struct ceph_entity_addr * | ||
36 | ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) | ||
37 | { | ||
38 | if (w >= m->m_max_mds) | ||
39 | return NULL; | ||
40 | return &m->m_info[w].addr; | ||
41 | } | ||
42 | |||
43 | static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) | ||
44 | { | ||
45 | BUG_ON(w < 0); | ||
46 | if (w >= m->m_max_mds) | ||
47 | return CEPH_MDS_STATE_DNE; | ||
48 | return m->m_info[w].state; | ||
49 | } | ||
50 | |||
51 | static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) | ||
52 | { | ||
53 | if (w >= 0 && w < m->m_max_mds) | ||
54 | return m->m_info[w].laggy; | ||
55 | return false; | ||
56 | } | ||
57 | |||
58 | extern int ceph_mdsmap_get_random_mds(struct ceph_mdsmap *m); | ||
59 | extern struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end); | ||
60 | extern void ceph_mdsmap_destroy(struct ceph_mdsmap *m); | ||
61 | |||
62 | #endif | ||
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c deleted file mode 100644 index 2502d76fcec1..000000000000 --- a/fs/ceph/messenger.c +++ /dev/null | |||
@@ -1,2277 +0,0 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/crc32c.h> | ||
4 | #include <linux/ctype.h> | ||
5 | #include <linux/highmem.h> | ||
6 | #include <linux/inet.h> | ||
7 | #include <linux/kthread.h> | ||
8 | #include <linux/net.h> | ||
9 | #include <linux/slab.h> | ||
10 | #include <linux/socket.h> | ||
11 | #include <linux/string.h> | ||
12 | #include <net/tcp.h> | ||
13 | |||
14 | #include "super.h" | ||
15 | #include "messenger.h" | ||
16 | #include "decode.h" | ||
17 | #include "pagelist.h" | ||
18 | |||
19 | /* | ||
20 | * Ceph uses the messenger to exchange ceph_msg messages with other | ||
21 | * hosts in the system. The messenger provides ordered and reliable | ||
22 | * delivery. We tolerate TCP disconnects by reconnecting (with | ||
23 | * exponential backoff) in the case of a fault (disconnection, bad | ||
24 | * crc, protocol error). Acks allow sent messages to be discarded by | ||
25 | * the sender. | ||
26 | */ | ||
27 | |||
28 | /* static tag bytes (protocol control messages) */ | ||
29 | static char tag_msg = CEPH_MSGR_TAG_MSG; | ||
30 | static char tag_ack = CEPH_MSGR_TAG_ACK; | ||
31 | static char tag_keepalive = CEPH_MSGR_TAG_KEEPALIVE; | ||
32 | |||
33 | #ifdef CONFIG_LOCKDEP | ||
34 | static struct lock_class_key socket_class; | ||
35 | #endif | ||
36 | |||
37 | |||
38 | static void queue_con(struct ceph_connection *con); | ||
39 | static void con_work(struct work_struct *); | ||
40 | static void ceph_fault(struct ceph_connection *con); | ||
41 | |||
42 | /* | ||
43 | * nicely render a sockaddr as a string. | ||
44 | */ | ||
45 | #define MAX_ADDR_STR 20 | ||
46 | #define MAX_ADDR_STR_LEN 60 | ||
47 | static char addr_str[MAX_ADDR_STR][MAX_ADDR_STR_LEN]; | ||
48 | static DEFINE_SPINLOCK(addr_str_lock); | ||
49 | static int last_addr_str; | ||
50 | |||
51 | const char *pr_addr(const struct sockaddr_storage *ss) | ||
52 | { | ||
53 | int i; | ||
54 | char *s; | ||
55 | struct sockaddr_in *in4 = (void *)ss; | ||
56 | struct sockaddr_in6 *in6 = (void *)ss; | ||
57 | |||
58 | spin_lock(&addr_str_lock); | ||
59 | i = last_addr_str++; | ||
60 | if (last_addr_str == MAX_ADDR_STR) | ||
61 | last_addr_str = 0; | ||
62 | spin_unlock(&addr_str_lock); | ||
63 | s = addr_str[i]; | ||
64 | |||
65 | switch (ss->ss_family) { | ||
66 | case AF_INET: | ||
67 | snprintf(s, MAX_ADDR_STR_LEN, "%pI4:%u", &in4->sin_addr, | ||
68 | (unsigned int)ntohs(in4->sin_port)); | ||
69 | break; | ||
70 | |||
71 | case AF_INET6: | ||
72 | snprintf(s, MAX_ADDR_STR_LEN, "[%pI6c]:%u", &in6->sin6_addr, | ||
73 | (unsigned int)ntohs(in6->sin6_port)); | ||
74 | break; | ||
75 | |||
76 | default: | ||
77 | sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family); | ||
78 | } | ||
79 | |||
80 | return s; | ||
81 | } | ||
82 | |||
83 | static void encode_my_addr(struct ceph_messenger *msgr) | ||
84 | { | ||
85 | memcpy(&msgr->my_enc_addr, &msgr->inst.addr, sizeof(msgr->my_enc_addr)); | ||
86 | ceph_encode_addr(&msgr->my_enc_addr); | ||
87 | } | ||
88 | |||
89 | /* | ||
90 | * work queue for all reading and writing to/from the socket. | ||
91 | */ | ||
92 | struct workqueue_struct *ceph_msgr_wq; | ||
93 | |||
94 | int __init ceph_msgr_init(void) | ||
95 | { | ||
96 | ceph_msgr_wq = create_workqueue("ceph-msgr"); | ||
97 | if (IS_ERR(ceph_msgr_wq)) { | ||
98 | int ret = PTR_ERR(ceph_msgr_wq); | ||
99 | pr_err("msgr_init failed to create workqueue: %d\n", ret); | ||
100 | ceph_msgr_wq = NULL; | ||
101 | return ret; | ||
102 | } | ||
103 | return 0; | ||
104 | } | ||
105 | |||
106 | void ceph_msgr_exit(void) | ||
107 | { | ||
108 | destroy_workqueue(ceph_msgr_wq); | ||
109 | } | ||
110 | |||
111 | void ceph_msgr_flush(void) | ||
112 | { | ||
113 | flush_workqueue(ceph_msgr_wq); | ||
114 | } | ||
115 | |||
116 | |||
117 | /* | ||
118 | * socket callback functions | ||
119 | */ | ||
120 | |||
121 | /* data available on socket, or listen socket received a connect */ | ||
122 | static void ceph_data_ready(struct sock *sk, int count_unused) | ||
123 | { | ||
124 | struct ceph_connection *con = | ||
125 | (struct ceph_connection *)sk->sk_user_data; | ||
126 | if (sk->sk_state != TCP_CLOSE_WAIT) { | ||
127 | dout("ceph_data_ready on %p state = %lu, queueing work\n", | ||
128 | con, con->state); | ||
129 | queue_con(con); | ||
130 | } | ||
131 | } | ||
132 | |||
133 | /* socket has buffer space for writing */ | ||
134 | static void ceph_write_space(struct sock *sk) | ||
135 | { | ||
136 | struct ceph_connection *con = | ||
137 | (struct ceph_connection *)sk->sk_user_data; | ||
138 | |||
139 | /* only queue to workqueue if there is data we want to write. */ | ||
140 | if (test_bit(WRITE_PENDING, &con->state)) { | ||
141 | dout("ceph_write_space %p queueing write work\n", con); | ||
142 | queue_con(con); | ||
143 | } else { | ||
144 | dout("ceph_write_space %p nothing to write\n", con); | ||
145 | } | ||
146 | |||
147 | /* since we have our own write_space, clear the SOCK_NOSPACE flag */ | ||
148 | clear_bit(SOCK_NOSPACE, &sk->sk_socket->flags); | ||
149 | } | ||
150 | |||
151 | /* socket's state has changed */ | ||
152 | static void ceph_state_change(struct sock *sk) | ||
153 | { | ||
154 | struct ceph_connection *con = | ||
155 | (struct ceph_connection *)sk->sk_user_data; | ||
156 | |||
157 | dout("ceph_state_change %p state = %lu sk_state = %u\n", | ||
158 | con, con->state, sk->sk_state); | ||
159 | |||
160 | if (test_bit(CLOSED, &con->state)) | ||
161 | return; | ||
162 | |||
163 | switch (sk->sk_state) { | ||
164 | case TCP_CLOSE: | ||
165 | dout("ceph_state_change TCP_CLOSE\n"); | ||
166 | case TCP_CLOSE_WAIT: | ||
167 | dout("ceph_state_change TCP_CLOSE_WAIT\n"); | ||
168 | if (test_and_set_bit(SOCK_CLOSED, &con->state) == 0) { | ||
169 | if (test_bit(CONNECTING, &con->state)) | ||
170 | con->error_msg = "connection failed"; | ||
171 | else | ||
172 | con->error_msg = "socket closed"; | ||
173 | queue_con(con); | ||
174 | } | ||
175 | break; | ||
176 | case TCP_ESTABLISHED: | ||
177 | dout("ceph_state_change TCP_ESTABLISHED\n"); | ||
178 | queue_con(con); | ||
179 | break; | ||
180 | } | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * set up socket callbacks | ||
185 | */ | ||
186 | static void set_sock_callbacks(struct socket *sock, | ||
187 | struct ceph_connection *con) | ||
188 | { | ||
189 | struct sock *sk = sock->sk; | ||
190 | sk->sk_user_data = (void *)con; | ||
191 | sk->sk_data_ready = ceph_data_ready; | ||
192 | sk->sk_write_space = ceph_write_space; | ||
193 | sk->sk_state_change = ceph_state_change; | ||
194 | } | ||
195 | |||
196 | |||
197 | /* | ||
198 | * socket helpers | ||
199 | */ | ||
200 | |||
201 | /* | ||
202 | * initiate connection to a remote socket. | ||
203 | */ | ||
204 | static struct socket *ceph_tcp_connect(struct ceph_connection *con) | ||
205 | { | ||
206 | struct sockaddr_storage *paddr = &con->peer_addr.in_addr; | ||
207 | struct socket *sock; | ||
208 | int ret; | ||
209 | |||
210 | BUG_ON(con->sock); | ||
211 | ret = sock_create_kern(con->peer_addr.in_addr.ss_family, SOCK_STREAM, | ||
212 | IPPROTO_TCP, &sock); | ||
213 | if (ret) | ||
214 | return ERR_PTR(ret); | ||
215 | con->sock = sock; | ||
216 | sock->sk->sk_allocation = GFP_NOFS; | ||
217 | |||
218 | #ifdef CONFIG_LOCKDEP | ||
219 | lockdep_set_class(&sock->sk->sk_lock, &socket_class); | ||
220 | #endif | ||
221 | |||
222 | set_sock_callbacks(sock, con); | ||
223 | |||
224 | dout("connect %s\n", pr_addr(&con->peer_addr.in_addr)); | ||
225 | |||
226 | ret = sock->ops->connect(sock, (struct sockaddr *)paddr, sizeof(*paddr), | ||
227 | O_NONBLOCK); | ||
228 | if (ret == -EINPROGRESS) { | ||
229 | dout("connect %s EINPROGRESS sk_state = %u\n", | ||
230 | pr_addr(&con->peer_addr.in_addr), | ||
231 | sock->sk->sk_state); | ||
232 | ret = 0; | ||
233 | } | ||
234 | if (ret < 0) { | ||
235 | pr_err("connect %s error %d\n", | ||
236 | pr_addr(&con->peer_addr.in_addr), ret); | ||
237 | sock_release(sock); | ||
238 | con->sock = NULL; | ||
239 | con->error_msg = "connect error"; | ||
240 | } | ||
241 | |||
242 | if (ret < 0) | ||
243 | return ERR_PTR(ret); | ||
244 | return sock; | ||
245 | } | ||
246 | |||
247 | static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len) | ||
248 | { | ||
249 | struct kvec iov = {buf, len}; | ||
250 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; | ||
251 | |||
252 | return kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags); | ||
253 | } | ||
254 | |||
255 | /* | ||
256 | * write something. @more is true if caller will be sending more data | ||
257 | * shortly. | ||
258 | */ | ||
259 | static int ceph_tcp_sendmsg(struct socket *sock, struct kvec *iov, | ||
260 | size_t kvlen, size_t len, int more) | ||
261 | { | ||
262 | struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL }; | ||
263 | |||
264 | if (more) | ||
265 | msg.msg_flags |= MSG_MORE; | ||
266 | else | ||
267 | msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */ | ||
268 | |||
269 | return kernel_sendmsg(sock, &msg, iov, kvlen, len); | ||
270 | } | ||
271 | |||
272 | |||
273 | /* | ||
274 | * Shutdown/close the socket for the given connection. | ||
275 | */ | ||
276 | static int con_close_socket(struct ceph_connection *con) | ||
277 | { | ||
278 | int rc; | ||
279 | |||
280 | dout("con_close_socket on %p sock %p\n", con, con->sock); | ||
281 | if (!con->sock) | ||
282 | return 0; | ||
283 | set_bit(SOCK_CLOSED, &con->state); | ||
284 | rc = con->sock->ops->shutdown(con->sock, SHUT_RDWR); | ||
285 | sock_release(con->sock); | ||
286 | con->sock = NULL; | ||
287 | clear_bit(SOCK_CLOSED, &con->state); | ||
288 | return rc; | ||
289 | } | ||
290 | |||
291 | /* | ||
292 | * Reset a connection. Discard all incoming and outgoing messages | ||
293 | * and clear *_seq state. | ||
294 | */ | ||
295 | static void ceph_msg_remove(struct ceph_msg *msg) | ||
296 | { | ||
297 | list_del_init(&msg->list_head); | ||
298 | ceph_msg_put(msg); | ||
299 | } | ||
300 | static void ceph_msg_remove_list(struct list_head *head) | ||
301 | { | ||
302 | while (!list_empty(head)) { | ||
303 | struct ceph_msg *msg = list_first_entry(head, struct ceph_msg, | ||
304 | list_head); | ||
305 | ceph_msg_remove(msg); | ||
306 | } | ||
307 | } | ||
308 | |||
309 | static void reset_connection(struct ceph_connection *con) | ||
310 | { | ||
311 | /* reset connection, out_queue, msg_ and connect_seq */ | ||
312 | /* discard existing out_queue and msg_seq */ | ||
313 | ceph_msg_remove_list(&con->out_queue); | ||
314 | ceph_msg_remove_list(&con->out_sent); | ||
315 | |||
316 | if (con->in_msg) { | ||
317 | ceph_msg_put(con->in_msg); | ||
318 | con->in_msg = NULL; | ||
319 | } | ||
320 | |||
321 | con->connect_seq = 0; | ||
322 | con->out_seq = 0; | ||
323 | if (con->out_msg) { | ||
324 | ceph_msg_put(con->out_msg); | ||
325 | con->out_msg = NULL; | ||
326 | } | ||
327 | con->out_keepalive_pending = false; | ||
328 | con->in_seq = 0; | ||
329 | con->in_seq_acked = 0; | ||
330 | } | ||
331 | |||
332 | /* | ||
333 | * mark a peer down. drop any open connections. | ||
334 | */ | ||
335 | void ceph_con_close(struct ceph_connection *con) | ||
336 | { | ||
337 | dout("con_close %p peer %s\n", con, pr_addr(&con->peer_addr.in_addr)); | ||
338 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | ||
339 | clear_bit(STANDBY, &con->state); /* avoid connect_seq bump */ | ||
340 | clear_bit(LOSSYTX, &con->state); /* so we retry next connect */ | ||
341 | clear_bit(KEEPALIVE_PENDING, &con->state); | ||
342 | clear_bit(WRITE_PENDING, &con->state); | ||
343 | mutex_lock(&con->mutex); | ||
344 | reset_connection(con); | ||
345 | con->peer_global_seq = 0; | ||
346 | cancel_delayed_work(&con->work); | ||
347 | mutex_unlock(&con->mutex); | ||
348 | queue_con(con); | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * Reopen a closed connection, with a new peer address. | ||
353 | */ | ||
354 | void ceph_con_open(struct ceph_connection *con, struct ceph_entity_addr *addr) | ||
355 | { | ||
356 | dout("con_open %p %s\n", con, pr_addr(&addr->in_addr)); | ||
357 | set_bit(OPENING, &con->state); | ||
358 | clear_bit(CLOSED, &con->state); | ||
359 | memcpy(&con->peer_addr, addr, sizeof(*addr)); | ||
360 | con->delay = 0; /* reset backoff memory */ | ||
361 | queue_con(con); | ||
362 | } | ||
363 | |||
364 | /* | ||
365 | * return true if this connection ever successfully opened | ||
366 | */ | ||
367 | bool ceph_con_opened(struct ceph_connection *con) | ||
368 | { | ||
369 | return con->connect_seq > 0; | ||
370 | } | ||
371 | |||
372 | /* | ||
373 | * generic get/put | ||
374 | */ | ||
375 | struct ceph_connection *ceph_con_get(struct ceph_connection *con) | ||
376 | { | ||
377 | dout("con_get %p nref = %d -> %d\n", con, | ||
378 | atomic_read(&con->nref), atomic_read(&con->nref) + 1); | ||
379 | if (atomic_inc_not_zero(&con->nref)) | ||
380 | return con; | ||
381 | return NULL; | ||
382 | } | ||
383 | |||
384 | void ceph_con_put(struct ceph_connection *con) | ||
385 | { | ||
386 | dout("con_put %p nref = %d -> %d\n", con, | ||
387 | atomic_read(&con->nref), atomic_read(&con->nref) - 1); | ||
388 | BUG_ON(atomic_read(&con->nref) == 0); | ||
389 | if (atomic_dec_and_test(&con->nref)) { | ||
390 | BUG_ON(con->sock); | ||
391 | kfree(con); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | /* | ||
396 | * initialize a new connection. | ||
397 | */ | ||
398 | void ceph_con_init(struct ceph_messenger *msgr, struct ceph_connection *con) | ||
399 | { | ||
400 | dout("con_init %p\n", con); | ||
401 | memset(con, 0, sizeof(*con)); | ||
402 | atomic_set(&con->nref, 1); | ||
403 | con->msgr = msgr; | ||
404 | mutex_init(&con->mutex); | ||
405 | INIT_LIST_HEAD(&con->out_queue); | ||
406 | INIT_LIST_HEAD(&con->out_sent); | ||
407 | INIT_DELAYED_WORK(&con->work, con_work); | ||
408 | } | ||
409 | |||
410 | |||
411 | /* | ||
412 | * We maintain a global counter to order connection attempts. Get | ||
413 | * a unique seq greater than @gt. | ||
414 | */ | ||
415 | static u32 get_global_seq(struct ceph_messenger *msgr, u32 gt) | ||
416 | { | ||
417 | u32 ret; | ||
418 | |||
419 | spin_lock(&msgr->global_seq_lock); | ||
420 | if (msgr->global_seq < gt) | ||
421 | msgr->global_seq = gt; | ||
422 | ret = ++msgr->global_seq; | ||
423 | spin_unlock(&msgr->global_seq_lock); | ||
424 | return ret; | ||
425 | } | ||
426 | |||
427 | |||
428 | /* | ||
429 | * Prepare footer for currently outgoing message, and finish things | ||
430 | * off. Assumes out_kvec* are already valid.. we just add on to the end. | ||
431 | */ | ||
432 | static void prepare_write_message_footer(struct ceph_connection *con, int v) | ||
433 | { | ||
434 | struct ceph_msg *m = con->out_msg; | ||
435 | |||
436 | dout("prepare_write_message_footer %p\n", con); | ||
437 | con->out_kvec_is_msg = true; | ||
438 | con->out_kvec[v].iov_base = &m->footer; | ||
439 | con->out_kvec[v].iov_len = sizeof(m->footer); | ||
440 | con->out_kvec_bytes += sizeof(m->footer); | ||
441 | con->out_kvec_left++; | ||
442 | con->out_more = m->more_to_follow; | ||
443 | con->out_msg_done = true; | ||
444 | } | ||
445 | |||
446 | /* | ||
447 | * Prepare headers for the next outgoing message. | ||
448 | */ | ||
449 | static void prepare_write_message(struct ceph_connection *con) | ||
450 | { | ||
451 | struct ceph_msg *m; | ||
452 | int v = 0; | ||
453 | |||
454 | con->out_kvec_bytes = 0; | ||
455 | con->out_kvec_is_msg = true; | ||
456 | con->out_msg_done = false; | ||
457 | |||
458 | /* Sneak an ack in there first? If we can get it into the same | ||
459 | * TCP packet that's a good thing. */ | ||
460 | if (con->in_seq > con->in_seq_acked) { | ||
461 | con->in_seq_acked = con->in_seq; | ||
462 | con->out_kvec[v].iov_base = &tag_ack; | ||
463 | con->out_kvec[v++].iov_len = 1; | ||
464 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | ||
465 | con->out_kvec[v].iov_base = &con->out_temp_ack; | ||
466 | con->out_kvec[v++].iov_len = sizeof(con->out_temp_ack); | ||
467 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
468 | } | ||
469 | |||
470 | m = list_first_entry(&con->out_queue, | ||
471 | struct ceph_msg, list_head); | ||
472 | con->out_msg = m; | ||
473 | if (test_bit(LOSSYTX, &con->state)) { | ||
474 | list_del_init(&m->list_head); | ||
475 | } else { | ||
476 | /* put message on sent list */ | ||
477 | ceph_msg_get(m); | ||
478 | list_move_tail(&m->list_head, &con->out_sent); | ||
479 | } | ||
480 | |||
481 | /* | ||
482 | * only assign outgoing seq # if we haven't sent this message | ||
483 | * yet. if it is requeued, resend with it's original seq. | ||
484 | */ | ||
485 | if (m->needs_out_seq) { | ||
486 | m->hdr.seq = cpu_to_le64(++con->out_seq); | ||
487 | m->needs_out_seq = false; | ||
488 | } | ||
489 | |||
490 | dout("prepare_write_message %p seq %lld type %d len %d+%d+%d %d pgs\n", | ||
491 | m, con->out_seq, le16_to_cpu(m->hdr.type), | ||
492 | le32_to_cpu(m->hdr.front_len), le32_to_cpu(m->hdr.middle_len), | ||
493 | le32_to_cpu(m->hdr.data_len), | ||
494 | m->nr_pages); | ||
495 | BUG_ON(le32_to_cpu(m->hdr.front_len) != m->front.iov_len); | ||
496 | |||
497 | /* tag + hdr + front + middle */ | ||
498 | con->out_kvec[v].iov_base = &tag_msg; | ||
499 | con->out_kvec[v++].iov_len = 1; | ||
500 | con->out_kvec[v].iov_base = &m->hdr; | ||
501 | con->out_kvec[v++].iov_len = sizeof(m->hdr); | ||
502 | con->out_kvec[v++] = m->front; | ||
503 | if (m->middle) | ||
504 | con->out_kvec[v++] = m->middle->vec; | ||
505 | con->out_kvec_left = v; | ||
506 | con->out_kvec_bytes += 1 + sizeof(m->hdr) + m->front.iov_len + | ||
507 | (m->middle ? m->middle->vec.iov_len : 0); | ||
508 | con->out_kvec_cur = con->out_kvec; | ||
509 | |||
510 | /* fill in crc (except data pages), footer */ | ||
511 | con->out_msg->hdr.crc = | ||
512 | cpu_to_le32(crc32c(0, (void *)&m->hdr, | ||
513 | sizeof(m->hdr) - sizeof(m->hdr.crc))); | ||
514 | con->out_msg->footer.flags = CEPH_MSG_FOOTER_COMPLETE; | ||
515 | con->out_msg->footer.front_crc = | ||
516 | cpu_to_le32(crc32c(0, m->front.iov_base, m->front.iov_len)); | ||
517 | if (m->middle) | ||
518 | con->out_msg->footer.middle_crc = | ||
519 | cpu_to_le32(crc32c(0, m->middle->vec.iov_base, | ||
520 | m->middle->vec.iov_len)); | ||
521 | else | ||
522 | con->out_msg->footer.middle_crc = 0; | ||
523 | con->out_msg->footer.data_crc = 0; | ||
524 | dout("prepare_write_message front_crc %u data_crc %u\n", | ||
525 | le32_to_cpu(con->out_msg->footer.front_crc), | ||
526 | le32_to_cpu(con->out_msg->footer.middle_crc)); | ||
527 | |||
528 | /* is there a data payload? */ | ||
529 | if (le32_to_cpu(m->hdr.data_len) > 0) { | ||
530 | /* initialize page iterator */ | ||
531 | con->out_msg_pos.page = 0; | ||
532 | con->out_msg_pos.page_pos = | ||
533 | le16_to_cpu(m->hdr.data_off) & ~PAGE_MASK; | ||
534 | con->out_msg_pos.data_pos = 0; | ||
535 | con->out_msg_pos.did_page_crc = 0; | ||
536 | con->out_more = 1; /* data + footer will follow */ | ||
537 | } else { | ||
538 | /* no, queue up footer too and be done */ | ||
539 | prepare_write_message_footer(con, v); | ||
540 | } | ||
541 | |||
542 | set_bit(WRITE_PENDING, &con->state); | ||
543 | } | ||
544 | |||
545 | /* | ||
546 | * Prepare an ack. | ||
547 | */ | ||
548 | static void prepare_write_ack(struct ceph_connection *con) | ||
549 | { | ||
550 | dout("prepare_write_ack %p %llu -> %llu\n", con, | ||
551 | con->in_seq_acked, con->in_seq); | ||
552 | con->in_seq_acked = con->in_seq; | ||
553 | |||
554 | con->out_kvec[0].iov_base = &tag_ack; | ||
555 | con->out_kvec[0].iov_len = 1; | ||
556 | con->out_temp_ack = cpu_to_le64(con->in_seq_acked); | ||
557 | con->out_kvec[1].iov_base = &con->out_temp_ack; | ||
558 | con->out_kvec[1].iov_len = sizeof(con->out_temp_ack); | ||
559 | con->out_kvec_left = 2; | ||
560 | con->out_kvec_bytes = 1 + sizeof(con->out_temp_ack); | ||
561 | con->out_kvec_cur = con->out_kvec; | ||
562 | con->out_more = 1; /* more will follow.. eventually.. */ | ||
563 | set_bit(WRITE_PENDING, &con->state); | ||
564 | } | ||
565 | |||
566 | /* | ||
567 | * Prepare to write keepalive byte. | ||
568 | */ | ||
569 | static void prepare_write_keepalive(struct ceph_connection *con) | ||
570 | { | ||
571 | dout("prepare_write_keepalive %p\n", con); | ||
572 | con->out_kvec[0].iov_base = &tag_keepalive; | ||
573 | con->out_kvec[0].iov_len = 1; | ||
574 | con->out_kvec_left = 1; | ||
575 | con->out_kvec_bytes = 1; | ||
576 | con->out_kvec_cur = con->out_kvec; | ||
577 | set_bit(WRITE_PENDING, &con->state); | ||
578 | } | ||
579 | |||
580 | /* | ||
581 | * Connection negotiation. | ||
582 | */ | ||
583 | |||
584 | static void prepare_connect_authorizer(struct ceph_connection *con) | ||
585 | { | ||
586 | void *auth_buf; | ||
587 | int auth_len = 0; | ||
588 | int auth_protocol = 0; | ||
589 | |||
590 | mutex_unlock(&con->mutex); | ||
591 | if (con->ops->get_authorizer) | ||
592 | con->ops->get_authorizer(con, &auth_buf, &auth_len, | ||
593 | &auth_protocol, &con->auth_reply_buf, | ||
594 | &con->auth_reply_buf_len, | ||
595 | con->auth_retry); | ||
596 | mutex_lock(&con->mutex); | ||
597 | |||
598 | con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol); | ||
599 | con->out_connect.authorizer_len = cpu_to_le32(auth_len); | ||
600 | |||
601 | con->out_kvec[con->out_kvec_left].iov_base = auth_buf; | ||
602 | con->out_kvec[con->out_kvec_left].iov_len = auth_len; | ||
603 | con->out_kvec_left++; | ||
604 | con->out_kvec_bytes += auth_len; | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * We connected to a peer and are saying hello. | ||
609 | */ | ||
610 | static void prepare_write_banner(struct ceph_messenger *msgr, | ||
611 | struct ceph_connection *con) | ||
612 | { | ||
613 | int len = strlen(CEPH_BANNER); | ||
614 | |||
615 | con->out_kvec[0].iov_base = CEPH_BANNER; | ||
616 | con->out_kvec[0].iov_len = len; | ||
617 | con->out_kvec[1].iov_base = &msgr->my_enc_addr; | ||
618 | con->out_kvec[1].iov_len = sizeof(msgr->my_enc_addr); | ||
619 | con->out_kvec_left = 2; | ||
620 | con->out_kvec_bytes = len + sizeof(msgr->my_enc_addr); | ||
621 | con->out_kvec_cur = con->out_kvec; | ||
622 | con->out_more = 0; | ||
623 | set_bit(WRITE_PENDING, &con->state); | ||
624 | } | ||
625 | |||
626 | static void prepare_write_connect(struct ceph_messenger *msgr, | ||
627 | struct ceph_connection *con, | ||
628 | int after_banner) | ||
629 | { | ||
630 | unsigned global_seq = get_global_seq(con->msgr, 0); | ||
631 | int proto; | ||
632 | |||
633 | switch (con->peer_name.type) { | ||
634 | case CEPH_ENTITY_TYPE_MON: | ||
635 | proto = CEPH_MONC_PROTOCOL; | ||
636 | break; | ||
637 | case CEPH_ENTITY_TYPE_OSD: | ||
638 | proto = CEPH_OSDC_PROTOCOL; | ||
639 | break; | ||
640 | case CEPH_ENTITY_TYPE_MDS: | ||
641 | proto = CEPH_MDSC_PROTOCOL; | ||
642 | break; | ||
643 | default: | ||
644 | BUG(); | ||
645 | } | ||
646 | |||
647 | dout("prepare_write_connect %p cseq=%d gseq=%d proto=%d\n", con, | ||
648 | con->connect_seq, global_seq, proto); | ||
649 | |||
650 | con->out_connect.features = cpu_to_le64(CEPH_FEATURE_SUPPORTED); | ||
651 | con->out_connect.host_type = cpu_to_le32(CEPH_ENTITY_TYPE_CLIENT); | ||
652 | con->out_connect.connect_seq = cpu_to_le32(con->connect_seq); | ||
653 | con->out_connect.global_seq = cpu_to_le32(global_seq); | ||
654 | con->out_connect.protocol_version = cpu_to_le32(proto); | ||
655 | con->out_connect.flags = 0; | ||
656 | |||
657 | if (!after_banner) { | ||
658 | con->out_kvec_left = 0; | ||
659 | con->out_kvec_bytes = 0; | ||
660 | } | ||
661 | con->out_kvec[con->out_kvec_left].iov_base = &con->out_connect; | ||
662 | con->out_kvec[con->out_kvec_left].iov_len = sizeof(con->out_connect); | ||
663 | con->out_kvec_left++; | ||
664 | con->out_kvec_bytes += sizeof(con->out_connect); | ||
665 | con->out_kvec_cur = con->out_kvec; | ||
666 | con->out_more = 0; | ||
667 | set_bit(WRITE_PENDING, &con->state); | ||
668 | |||
669 | prepare_connect_authorizer(con); | ||
670 | } | ||
671 | |||
672 | |||
673 | /* | ||
674 | * write as much of pending kvecs to the socket as we can. | ||
675 | * 1 -> done | ||
676 | * 0 -> socket full, but more to do | ||
677 | * <0 -> error | ||
678 | */ | ||
679 | static int write_partial_kvec(struct ceph_connection *con) | ||
680 | { | ||
681 | int ret; | ||
682 | |||
683 | dout("write_partial_kvec %p %d left\n", con, con->out_kvec_bytes); | ||
684 | while (con->out_kvec_bytes > 0) { | ||
685 | ret = ceph_tcp_sendmsg(con->sock, con->out_kvec_cur, | ||
686 | con->out_kvec_left, con->out_kvec_bytes, | ||
687 | con->out_more); | ||
688 | if (ret <= 0) | ||
689 | goto out; | ||
690 | con->out_kvec_bytes -= ret; | ||
691 | if (con->out_kvec_bytes == 0) | ||
692 | break; /* done */ | ||
693 | while (ret > 0) { | ||
694 | if (ret >= con->out_kvec_cur->iov_len) { | ||
695 | ret -= con->out_kvec_cur->iov_len; | ||
696 | con->out_kvec_cur++; | ||
697 | con->out_kvec_left--; | ||
698 | } else { | ||
699 | con->out_kvec_cur->iov_len -= ret; | ||
700 | con->out_kvec_cur->iov_base += ret; | ||
701 | ret = 0; | ||
702 | break; | ||
703 | } | ||
704 | } | ||
705 | } | ||
706 | con->out_kvec_left = 0; | ||
707 | con->out_kvec_is_msg = false; | ||
708 | ret = 1; | ||
709 | out: | ||
710 | dout("write_partial_kvec %p %d left in %d kvecs ret = %d\n", con, | ||
711 | con->out_kvec_bytes, con->out_kvec_left, ret); | ||
712 | return ret; /* done! */ | ||
713 | } | ||
714 | |||
715 | /* | ||
716 | * Write as much message data payload as we can. If we finish, queue | ||
717 | * up the footer. | ||
718 | * 1 -> done, footer is now queued in out_kvec[]. | ||
719 | * 0 -> socket full, but more to do | ||
720 | * <0 -> error | ||
721 | */ | ||
722 | static int write_partial_msg_pages(struct ceph_connection *con) | ||
723 | { | ||
724 | struct ceph_msg *msg = con->out_msg; | ||
725 | unsigned data_len = le32_to_cpu(msg->hdr.data_len); | ||
726 | size_t len; | ||
727 | int crc = con->msgr->nocrc; | ||
728 | int ret; | ||
729 | |||
730 | dout("write_partial_msg_pages %p msg %p page %d/%d offset %d\n", | ||
731 | con, con->out_msg, con->out_msg_pos.page, con->out_msg->nr_pages, | ||
732 | con->out_msg_pos.page_pos); | ||
733 | |||
734 | while (con->out_msg_pos.page < con->out_msg->nr_pages) { | ||
735 | struct page *page = NULL; | ||
736 | void *kaddr = NULL; | ||
737 | |||
738 | /* | ||
739 | * if we are calculating the data crc (the default), we need | ||
740 | * to map the page. if our pages[] has been revoked, use the | ||
741 | * zero page. | ||
742 | */ | ||
743 | if (msg->pages) { | ||
744 | page = msg->pages[con->out_msg_pos.page]; | ||
745 | if (crc) | ||
746 | kaddr = kmap(page); | ||
747 | } else if (msg->pagelist) { | ||
748 | page = list_first_entry(&msg->pagelist->head, | ||
749 | struct page, lru); | ||
750 | if (crc) | ||
751 | kaddr = kmap(page); | ||
752 | } else { | ||
753 | page = con->msgr->zero_page; | ||
754 | if (crc) | ||
755 | kaddr = page_address(con->msgr->zero_page); | ||
756 | } | ||
757 | len = min((int)(PAGE_SIZE - con->out_msg_pos.page_pos), | ||
758 | (int)(data_len - con->out_msg_pos.data_pos)); | ||
759 | if (crc && !con->out_msg_pos.did_page_crc) { | ||
760 | void *base = kaddr + con->out_msg_pos.page_pos; | ||
761 | u32 tmpcrc = le32_to_cpu(con->out_msg->footer.data_crc); | ||
762 | |||
763 | BUG_ON(kaddr == NULL); | ||
764 | con->out_msg->footer.data_crc = | ||
765 | cpu_to_le32(crc32c(tmpcrc, base, len)); | ||
766 | con->out_msg_pos.did_page_crc = 1; | ||
767 | } | ||
768 | |||
769 | ret = kernel_sendpage(con->sock, page, | ||
770 | con->out_msg_pos.page_pos, len, | ||
771 | MSG_DONTWAIT | MSG_NOSIGNAL | | ||
772 | MSG_MORE); | ||
773 | |||
774 | if (crc && (msg->pages || msg->pagelist)) | ||
775 | kunmap(page); | ||
776 | |||
777 | if (ret <= 0) | ||
778 | goto out; | ||
779 | |||
780 | con->out_msg_pos.data_pos += ret; | ||
781 | con->out_msg_pos.page_pos += ret; | ||
782 | if (ret == len) { | ||
783 | con->out_msg_pos.page_pos = 0; | ||
784 | con->out_msg_pos.page++; | ||
785 | con->out_msg_pos.did_page_crc = 0; | ||
786 | if (msg->pagelist) | ||
787 | list_move_tail(&page->lru, | ||
788 | &msg->pagelist->head); | ||
789 | } | ||
790 | } | ||
791 | |||
792 | dout("write_partial_msg_pages %p msg %p done\n", con, msg); | ||
793 | |||
794 | /* prepare and queue up footer, too */ | ||
795 | if (!crc) | ||
796 | con->out_msg->footer.flags |= CEPH_MSG_FOOTER_NOCRC; | ||
797 | con->out_kvec_bytes = 0; | ||
798 | con->out_kvec_left = 0; | ||
799 | con->out_kvec_cur = con->out_kvec; | ||
800 | prepare_write_message_footer(con, 0); | ||
801 | ret = 1; | ||
802 | out: | ||
803 | return ret; | ||
804 | } | ||
805 | |||
806 | /* | ||
807 | * write some zeros | ||
808 | */ | ||
809 | static int write_partial_skip(struct ceph_connection *con) | ||
810 | { | ||
811 | int ret; | ||
812 | |||
813 | while (con->out_skip > 0) { | ||
814 | struct kvec iov = { | ||
815 | .iov_base = page_address(con->msgr->zero_page), | ||
816 | .iov_len = min(con->out_skip, (int)PAGE_CACHE_SIZE) | ||
817 | }; | ||
818 | |||
819 | ret = ceph_tcp_sendmsg(con->sock, &iov, 1, iov.iov_len, 1); | ||
820 | if (ret <= 0) | ||
821 | goto out; | ||
822 | con->out_skip -= ret; | ||
823 | } | ||
824 | ret = 1; | ||
825 | out: | ||
826 | return ret; | ||
827 | } | ||
828 | |||
829 | /* | ||
830 | * Prepare to read connection handshake, or an ack. | ||
831 | */ | ||
832 | static void prepare_read_banner(struct ceph_connection *con) | ||
833 | { | ||
834 | dout("prepare_read_banner %p\n", con); | ||
835 | con->in_base_pos = 0; | ||
836 | } | ||
837 | |||
838 | static void prepare_read_connect(struct ceph_connection *con) | ||
839 | { | ||
840 | dout("prepare_read_connect %p\n", con); | ||
841 | con->in_base_pos = 0; | ||
842 | } | ||
843 | |||
844 | static void prepare_read_ack(struct ceph_connection *con) | ||
845 | { | ||
846 | dout("prepare_read_ack %p\n", con); | ||
847 | con->in_base_pos = 0; | ||
848 | } | ||
849 | |||
850 | static void prepare_read_tag(struct ceph_connection *con) | ||
851 | { | ||
852 | dout("prepare_read_tag %p\n", con); | ||
853 | con->in_base_pos = 0; | ||
854 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
855 | } | ||
856 | |||
857 | /* | ||
858 | * Prepare to read a message. | ||
859 | */ | ||
860 | static int prepare_read_message(struct ceph_connection *con) | ||
861 | { | ||
862 | dout("prepare_read_message %p\n", con); | ||
863 | BUG_ON(con->in_msg != NULL); | ||
864 | con->in_base_pos = 0; | ||
865 | con->in_front_crc = con->in_middle_crc = con->in_data_crc = 0; | ||
866 | return 0; | ||
867 | } | ||
868 | |||
869 | |||
870 | static int read_partial(struct ceph_connection *con, | ||
871 | int *to, int size, void *object) | ||
872 | { | ||
873 | *to += size; | ||
874 | while (con->in_base_pos < *to) { | ||
875 | int left = *to - con->in_base_pos; | ||
876 | int have = size - left; | ||
877 | int ret = ceph_tcp_recvmsg(con->sock, object + have, left); | ||
878 | if (ret <= 0) | ||
879 | return ret; | ||
880 | con->in_base_pos += ret; | ||
881 | } | ||
882 | return 1; | ||
883 | } | ||
884 | |||
885 | |||
886 | /* | ||
887 | * Read all or part of the connect-side handshake on a new connection | ||
888 | */ | ||
889 | static int read_partial_banner(struct ceph_connection *con) | ||
890 | { | ||
891 | int ret, to = 0; | ||
892 | |||
893 | dout("read_partial_banner %p at %d\n", con, con->in_base_pos); | ||
894 | |||
895 | /* peer's banner */ | ||
896 | ret = read_partial(con, &to, strlen(CEPH_BANNER), con->in_banner); | ||
897 | if (ret <= 0) | ||
898 | goto out; | ||
899 | ret = read_partial(con, &to, sizeof(con->actual_peer_addr), | ||
900 | &con->actual_peer_addr); | ||
901 | if (ret <= 0) | ||
902 | goto out; | ||
903 | ret = read_partial(con, &to, sizeof(con->peer_addr_for_me), | ||
904 | &con->peer_addr_for_me); | ||
905 | if (ret <= 0) | ||
906 | goto out; | ||
907 | out: | ||
908 | return ret; | ||
909 | } | ||
910 | |||
911 | static int read_partial_connect(struct ceph_connection *con) | ||
912 | { | ||
913 | int ret, to = 0; | ||
914 | |||
915 | dout("read_partial_connect %p at %d\n", con, con->in_base_pos); | ||
916 | |||
917 | ret = read_partial(con, &to, sizeof(con->in_reply), &con->in_reply); | ||
918 | if (ret <= 0) | ||
919 | goto out; | ||
920 | ret = read_partial(con, &to, le32_to_cpu(con->in_reply.authorizer_len), | ||
921 | con->auth_reply_buf); | ||
922 | if (ret <= 0) | ||
923 | goto out; | ||
924 | |||
925 | dout("read_partial_connect %p tag %d, con_seq = %u, g_seq = %u\n", | ||
926 | con, (int)con->in_reply.tag, | ||
927 | le32_to_cpu(con->in_reply.connect_seq), | ||
928 | le32_to_cpu(con->in_reply.global_seq)); | ||
929 | out: | ||
930 | return ret; | ||
931 | |||
932 | } | ||
933 | |||
934 | /* | ||
935 | * Verify the hello banner looks okay. | ||
936 | */ | ||
937 | static int verify_hello(struct ceph_connection *con) | ||
938 | { | ||
939 | if (memcmp(con->in_banner, CEPH_BANNER, strlen(CEPH_BANNER))) { | ||
940 | pr_err("connect to %s got bad banner\n", | ||
941 | pr_addr(&con->peer_addr.in_addr)); | ||
942 | con->error_msg = "protocol error, bad banner"; | ||
943 | return -1; | ||
944 | } | ||
945 | return 0; | ||
946 | } | ||
947 | |||
948 | static bool addr_is_blank(struct sockaddr_storage *ss) | ||
949 | { | ||
950 | switch (ss->ss_family) { | ||
951 | case AF_INET: | ||
952 | return ((struct sockaddr_in *)ss)->sin_addr.s_addr == 0; | ||
953 | case AF_INET6: | ||
954 | return | ||
955 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[0] == 0 && | ||
956 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[1] == 0 && | ||
957 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[2] == 0 && | ||
958 | ((struct sockaddr_in6 *)ss)->sin6_addr.s6_addr32[3] == 0; | ||
959 | } | ||
960 | return false; | ||
961 | } | ||
962 | |||
963 | static int addr_port(struct sockaddr_storage *ss) | ||
964 | { | ||
965 | switch (ss->ss_family) { | ||
966 | case AF_INET: | ||
967 | return ntohs(((struct sockaddr_in *)ss)->sin_port); | ||
968 | case AF_INET6: | ||
969 | return ntohs(((struct sockaddr_in6 *)ss)->sin6_port); | ||
970 | } | ||
971 | return 0; | ||
972 | } | ||
973 | |||
974 | static void addr_set_port(struct sockaddr_storage *ss, int p) | ||
975 | { | ||
976 | switch (ss->ss_family) { | ||
977 | case AF_INET: | ||
978 | ((struct sockaddr_in *)ss)->sin_port = htons(p); | ||
979 | case AF_INET6: | ||
980 | ((struct sockaddr_in6 *)ss)->sin6_port = htons(p); | ||
981 | } | ||
982 | } | ||
983 | |||
984 | /* | ||
985 | * Parse an ip[:port] list into an addr array. Use the default | ||
986 | * monitor port if a port isn't specified. | ||
987 | */ | ||
988 | int ceph_parse_ips(const char *c, const char *end, | ||
989 | struct ceph_entity_addr *addr, | ||
990 | int max_count, int *count) | ||
991 | { | ||
992 | int i; | ||
993 | const char *p = c; | ||
994 | |||
995 | dout("parse_ips on '%.*s'\n", (int)(end-c), c); | ||
996 | for (i = 0; i < max_count; i++) { | ||
997 | const char *ipend; | ||
998 | struct sockaddr_storage *ss = &addr[i].in_addr; | ||
999 | struct sockaddr_in *in4 = (void *)ss; | ||
1000 | struct sockaddr_in6 *in6 = (void *)ss; | ||
1001 | int port; | ||
1002 | char delim = ','; | ||
1003 | |||
1004 | if (*p == '[') { | ||
1005 | delim = ']'; | ||
1006 | p++; | ||
1007 | } | ||
1008 | |||
1009 | memset(ss, 0, sizeof(*ss)); | ||
1010 | if (in4_pton(p, end - p, (u8 *)&in4->sin_addr.s_addr, | ||
1011 | delim, &ipend)) | ||
1012 | ss->ss_family = AF_INET; | ||
1013 | else if (in6_pton(p, end - p, (u8 *)&in6->sin6_addr.s6_addr, | ||
1014 | delim, &ipend)) | ||
1015 | ss->ss_family = AF_INET6; | ||
1016 | else | ||
1017 | goto bad; | ||
1018 | p = ipend; | ||
1019 | |||
1020 | if (delim == ']') { | ||
1021 | if (*p != ']') { | ||
1022 | dout("missing matching ']'\n"); | ||
1023 | goto bad; | ||
1024 | } | ||
1025 | p++; | ||
1026 | } | ||
1027 | |||
1028 | /* port? */ | ||
1029 | if (p < end && *p == ':') { | ||
1030 | port = 0; | ||
1031 | p++; | ||
1032 | while (p < end && *p >= '0' && *p <= '9') { | ||
1033 | port = (port * 10) + (*p - '0'); | ||
1034 | p++; | ||
1035 | } | ||
1036 | if (port > 65535 || port == 0) | ||
1037 | goto bad; | ||
1038 | } else { | ||
1039 | port = CEPH_MON_PORT; | ||
1040 | } | ||
1041 | |||
1042 | addr_set_port(ss, port); | ||
1043 | |||
1044 | dout("parse_ips got %s\n", pr_addr(ss)); | ||
1045 | |||
1046 | if (p == end) | ||
1047 | break; | ||
1048 | if (*p != ',') | ||
1049 | goto bad; | ||
1050 | p++; | ||
1051 | } | ||
1052 | |||
1053 | if (p != end) | ||
1054 | goto bad; | ||
1055 | |||
1056 | if (count) | ||
1057 | *count = i + 1; | ||
1058 | return 0; | ||
1059 | |||
1060 | bad: | ||
1061 | pr_err("parse_ips bad ip '%.*s'\n", (int)(end - c), c); | ||
1062 | return -EINVAL; | ||
1063 | } | ||
1064 | |||
1065 | static int process_banner(struct ceph_connection *con) | ||
1066 | { | ||
1067 | dout("process_banner on %p\n", con); | ||
1068 | |||
1069 | if (verify_hello(con) < 0) | ||
1070 | return -1; | ||
1071 | |||
1072 | ceph_decode_addr(&con->actual_peer_addr); | ||
1073 | ceph_decode_addr(&con->peer_addr_for_me); | ||
1074 | |||
1075 | /* | ||
1076 | * Make sure the other end is who we wanted. note that the other | ||
1077 | * end may not yet know their ip address, so if it's 0.0.0.0, give | ||
1078 | * them the benefit of the doubt. | ||
1079 | */ | ||
1080 | if (memcmp(&con->peer_addr, &con->actual_peer_addr, | ||
1081 | sizeof(con->peer_addr)) != 0 && | ||
1082 | !(addr_is_blank(&con->actual_peer_addr.in_addr) && | ||
1083 | con->actual_peer_addr.nonce == con->peer_addr.nonce)) { | ||
1084 | pr_warning("wrong peer, want %s/%d, got %s/%d\n", | ||
1085 | pr_addr(&con->peer_addr.in_addr), | ||
1086 | (int)le32_to_cpu(con->peer_addr.nonce), | ||
1087 | pr_addr(&con->actual_peer_addr.in_addr), | ||
1088 | (int)le32_to_cpu(con->actual_peer_addr.nonce)); | ||
1089 | con->error_msg = "wrong peer at address"; | ||
1090 | return -1; | ||
1091 | } | ||
1092 | |||
1093 | /* | ||
1094 | * did we learn our address? | ||
1095 | */ | ||
1096 | if (addr_is_blank(&con->msgr->inst.addr.in_addr)) { | ||
1097 | int port = addr_port(&con->msgr->inst.addr.in_addr); | ||
1098 | |||
1099 | memcpy(&con->msgr->inst.addr.in_addr, | ||
1100 | &con->peer_addr_for_me.in_addr, | ||
1101 | sizeof(con->peer_addr_for_me.in_addr)); | ||
1102 | addr_set_port(&con->msgr->inst.addr.in_addr, port); | ||
1103 | encode_my_addr(con->msgr); | ||
1104 | dout("process_banner learned my addr is %s\n", | ||
1105 | pr_addr(&con->msgr->inst.addr.in_addr)); | ||
1106 | } | ||
1107 | |||
1108 | set_bit(NEGOTIATING, &con->state); | ||
1109 | prepare_read_connect(con); | ||
1110 | return 0; | ||
1111 | } | ||
1112 | |||
1113 | static void fail_protocol(struct ceph_connection *con) | ||
1114 | { | ||
1115 | reset_connection(con); | ||
1116 | set_bit(CLOSED, &con->state); /* in case there's queued work */ | ||
1117 | |||
1118 | mutex_unlock(&con->mutex); | ||
1119 | if (con->ops->bad_proto) | ||
1120 | con->ops->bad_proto(con); | ||
1121 | mutex_lock(&con->mutex); | ||
1122 | } | ||
1123 | |||
1124 | static int process_connect(struct ceph_connection *con) | ||
1125 | { | ||
1126 | u64 sup_feat = CEPH_FEATURE_SUPPORTED; | ||
1127 | u64 req_feat = CEPH_FEATURE_REQUIRED; | ||
1128 | u64 server_feat = le64_to_cpu(con->in_reply.features); | ||
1129 | |||
1130 | dout("process_connect on %p tag %d\n", con, (int)con->in_tag); | ||
1131 | |||
1132 | switch (con->in_reply.tag) { | ||
1133 | case CEPH_MSGR_TAG_FEATURES: | ||
1134 | pr_err("%s%lld %s feature set mismatch," | ||
1135 | " my %llx < server's %llx, missing %llx\n", | ||
1136 | ENTITY_NAME(con->peer_name), | ||
1137 | pr_addr(&con->peer_addr.in_addr), | ||
1138 | sup_feat, server_feat, server_feat & ~sup_feat); | ||
1139 | con->error_msg = "missing required protocol features"; | ||
1140 | fail_protocol(con); | ||
1141 | return -1; | ||
1142 | |||
1143 | case CEPH_MSGR_TAG_BADPROTOVER: | ||
1144 | pr_err("%s%lld %s protocol version mismatch," | ||
1145 | " my %d != server's %d\n", | ||
1146 | ENTITY_NAME(con->peer_name), | ||
1147 | pr_addr(&con->peer_addr.in_addr), | ||
1148 | le32_to_cpu(con->out_connect.protocol_version), | ||
1149 | le32_to_cpu(con->in_reply.protocol_version)); | ||
1150 | con->error_msg = "protocol version mismatch"; | ||
1151 | fail_protocol(con); | ||
1152 | return -1; | ||
1153 | |||
1154 | case CEPH_MSGR_TAG_BADAUTHORIZER: | ||
1155 | con->auth_retry++; | ||
1156 | dout("process_connect %p got BADAUTHORIZER attempt %d\n", con, | ||
1157 | con->auth_retry); | ||
1158 | if (con->auth_retry == 2) { | ||
1159 | con->error_msg = "connect authorization failure"; | ||
1160 | reset_connection(con); | ||
1161 | set_bit(CLOSED, &con->state); | ||
1162 | return -1; | ||
1163 | } | ||
1164 | con->auth_retry = 1; | ||
1165 | prepare_write_connect(con->msgr, con, 0); | ||
1166 | prepare_read_connect(con); | ||
1167 | break; | ||
1168 | |||
1169 | case CEPH_MSGR_TAG_RESETSESSION: | ||
1170 | /* | ||
1171 | * If we connected with a large connect_seq but the peer | ||
1172 | * has no record of a session with us (no connection, or | ||
1173 | * connect_seq == 0), they will send RESETSESION to indicate | ||
1174 | * that they must have reset their session, and may have | ||
1175 | * dropped messages. | ||
1176 | */ | ||
1177 | dout("process_connect got RESET peer seq %u\n", | ||
1178 | le32_to_cpu(con->in_connect.connect_seq)); | ||
1179 | pr_err("%s%lld %s connection reset\n", | ||
1180 | ENTITY_NAME(con->peer_name), | ||
1181 | pr_addr(&con->peer_addr.in_addr)); | ||
1182 | reset_connection(con); | ||
1183 | prepare_write_connect(con->msgr, con, 0); | ||
1184 | prepare_read_connect(con); | ||
1185 | |||
1186 | /* Tell ceph about it. */ | ||
1187 | mutex_unlock(&con->mutex); | ||
1188 | pr_info("reset on %s%lld\n", ENTITY_NAME(con->peer_name)); | ||
1189 | if (con->ops->peer_reset) | ||
1190 | con->ops->peer_reset(con); | ||
1191 | mutex_lock(&con->mutex); | ||
1192 | break; | ||
1193 | |||
1194 | case CEPH_MSGR_TAG_RETRY_SESSION: | ||
1195 | /* | ||
1196 | * If we sent a smaller connect_seq than the peer has, try | ||
1197 | * again with a larger value. | ||
1198 | */ | ||
1199 | dout("process_connect got RETRY my seq = %u, peer_seq = %u\n", | ||
1200 | le32_to_cpu(con->out_connect.connect_seq), | ||
1201 | le32_to_cpu(con->in_connect.connect_seq)); | ||
1202 | con->connect_seq = le32_to_cpu(con->in_connect.connect_seq); | ||
1203 | prepare_write_connect(con->msgr, con, 0); | ||
1204 | prepare_read_connect(con); | ||
1205 | break; | ||
1206 | |||
1207 | case CEPH_MSGR_TAG_RETRY_GLOBAL: | ||
1208 | /* | ||
1209 | * If we sent a smaller global_seq than the peer has, try | ||
1210 | * again with a larger value. | ||
1211 | */ | ||
1212 | dout("process_connect got RETRY_GLOBAL my %u peer_gseq %u\n", | ||
1213 | con->peer_global_seq, | ||
1214 | le32_to_cpu(con->in_connect.global_seq)); | ||
1215 | get_global_seq(con->msgr, | ||
1216 | le32_to_cpu(con->in_connect.global_seq)); | ||
1217 | prepare_write_connect(con->msgr, con, 0); | ||
1218 | prepare_read_connect(con); | ||
1219 | break; | ||
1220 | |||
1221 | case CEPH_MSGR_TAG_READY: | ||
1222 | if (req_feat & ~server_feat) { | ||
1223 | pr_err("%s%lld %s protocol feature mismatch," | ||
1224 | " my required %llx > server's %llx, need %llx\n", | ||
1225 | ENTITY_NAME(con->peer_name), | ||
1226 | pr_addr(&con->peer_addr.in_addr), | ||
1227 | req_feat, server_feat, req_feat & ~server_feat); | ||
1228 | con->error_msg = "missing required protocol features"; | ||
1229 | fail_protocol(con); | ||
1230 | return -1; | ||
1231 | } | ||
1232 | clear_bit(CONNECTING, &con->state); | ||
1233 | con->peer_global_seq = le32_to_cpu(con->in_reply.global_seq); | ||
1234 | con->connect_seq++; | ||
1235 | con->peer_features = server_feat; | ||
1236 | dout("process_connect got READY gseq %d cseq %d (%d)\n", | ||
1237 | con->peer_global_seq, | ||
1238 | le32_to_cpu(con->in_reply.connect_seq), | ||
1239 | con->connect_seq); | ||
1240 | WARN_ON(con->connect_seq != | ||
1241 | le32_to_cpu(con->in_reply.connect_seq)); | ||
1242 | |||
1243 | if (con->in_reply.flags & CEPH_MSG_CONNECT_LOSSY) | ||
1244 | set_bit(LOSSYTX, &con->state); | ||
1245 | |||
1246 | prepare_read_tag(con); | ||
1247 | break; | ||
1248 | |||
1249 | case CEPH_MSGR_TAG_WAIT: | ||
1250 | /* | ||
1251 | * If there is a connection race (we are opening | ||
1252 | * connections to each other), one of us may just have | ||
1253 | * to WAIT. This shouldn't happen if we are the | ||
1254 | * client. | ||
1255 | */ | ||
1256 | pr_err("process_connect peer connecting WAIT\n"); | ||
1257 | |||
1258 | default: | ||
1259 | pr_err("connect protocol error, will retry\n"); | ||
1260 | con->error_msg = "protocol error, garbage tag during connect"; | ||
1261 | return -1; | ||
1262 | } | ||
1263 | return 0; | ||
1264 | } | ||
1265 | |||
1266 | |||
1267 | /* | ||
1268 | * read (part of) an ack | ||
1269 | */ | ||
1270 | static int read_partial_ack(struct ceph_connection *con) | ||
1271 | { | ||
1272 | int to = 0; | ||
1273 | |||
1274 | return read_partial(con, &to, sizeof(con->in_temp_ack), | ||
1275 | &con->in_temp_ack); | ||
1276 | } | ||
1277 | |||
1278 | |||
1279 | /* | ||
1280 | * We can finally discard anything that's been acked. | ||
1281 | */ | ||
1282 | static void process_ack(struct ceph_connection *con) | ||
1283 | { | ||
1284 | struct ceph_msg *m; | ||
1285 | u64 ack = le64_to_cpu(con->in_temp_ack); | ||
1286 | u64 seq; | ||
1287 | |||
1288 | while (!list_empty(&con->out_sent)) { | ||
1289 | m = list_first_entry(&con->out_sent, struct ceph_msg, | ||
1290 | list_head); | ||
1291 | seq = le64_to_cpu(m->hdr.seq); | ||
1292 | if (seq > ack) | ||
1293 | break; | ||
1294 | dout("got ack for seq %llu type %d at %p\n", seq, | ||
1295 | le16_to_cpu(m->hdr.type), m); | ||
1296 | ceph_msg_remove(m); | ||
1297 | } | ||
1298 | prepare_read_tag(con); | ||
1299 | } | ||
1300 | |||
1301 | |||
1302 | |||
1303 | |||
1304 | static int read_partial_message_section(struct ceph_connection *con, | ||
1305 | struct kvec *section, | ||
1306 | unsigned int sec_len, u32 *crc) | ||
1307 | { | ||
1308 | int left; | ||
1309 | int ret; | ||
1310 | |||
1311 | BUG_ON(!section); | ||
1312 | |||
1313 | while (section->iov_len < sec_len) { | ||
1314 | BUG_ON(section->iov_base == NULL); | ||
1315 | left = sec_len - section->iov_len; | ||
1316 | ret = ceph_tcp_recvmsg(con->sock, (char *)section->iov_base + | ||
1317 | section->iov_len, left); | ||
1318 | if (ret <= 0) | ||
1319 | return ret; | ||
1320 | section->iov_len += ret; | ||
1321 | if (section->iov_len == sec_len) | ||
1322 | *crc = crc32c(0, section->iov_base, | ||
1323 | section->iov_len); | ||
1324 | } | ||
1325 | |||
1326 | return 1; | ||
1327 | } | ||
1328 | |||
1329 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | ||
1330 | struct ceph_msg_header *hdr, | ||
1331 | int *skip); | ||
1332 | /* | ||
1333 | * read (part of) a message. | ||
1334 | */ | ||
1335 | static int read_partial_message(struct ceph_connection *con) | ||
1336 | { | ||
1337 | struct ceph_msg *m = con->in_msg; | ||
1338 | void *p; | ||
1339 | int ret; | ||
1340 | int to, left; | ||
1341 | unsigned front_len, middle_len, data_len, data_off; | ||
1342 | int datacrc = con->msgr->nocrc; | ||
1343 | int skip; | ||
1344 | u64 seq; | ||
1345 | |||
1346 | dout("read_partial_message con %p msg %p\n", con, m); | ||
1347 | |||
1348 | /* header */ | ||
1349 | while (con->in_base_pos < sizeof(con->in_hdr)) { | ||
1350 | left = sizeof(con->in_hdr) - con->in_base_pos; | ||
1351 | ret = ceph_tcp_recvmsg(con->sock, | ||
1352 | (char *)&con->in_hdr + con->in_base_pos, | ||
1353 | left); | ||
1354 | if (ret <= 0) | ||
1355 | return ret; | ||
1356 | con->in_base_pos += ret; | ||
1357 | if (con->in_base_pos == sizeof(con->in_hdr)) { | ||
1358 | u32 crc = crc32c(0, (void *)&con->in_hdr, | ||
1359 | sizeof(con->in_hdr) - sizeof(con->in_hdr.crc)); | ||
1360 | if (crc != le32_to_cpu(con->in_hdr.crc)) { | ||
1361 | pr_err("read_partial_message bad hdr " | ||
1362 | " crc %u != expected %u\n", | ||
1363 | crc, con->in_hdr.crc); | ||
1364 | return -EBADMSG; | ||
1365 | } | ||
1366 | } | ||
1367 | } | ||
1368 | front_len = le32_to_cpu(con->in_hdr.front_len); | ||
1369 | if (front_len > CEPH_MSG_MAX_FRONT_LEN) | ||
1370 | return -EIO; | ||
1371 | middle_len = le32_to_cpu(con->in_hdr.middle_len); | ||
1372 | if (middle_len > CEPH_MSG_MAX_DATA_LEN) | ||
1373 | return -EIO; | ||
1374 | data_len = le32_to_cpu(con->in_hdr.data_len); | ||
1375 | if (data_len > CEPH_MSG_MAX_DATA_LEN) | ||
1376 | return -EIO; | ||
1377 | data_off = le16_to_cpu(con->in_hdr.data_off); | ||
1378 | |||
1379 | /* verify seq# */ | ||
1380 | seq = le64_to_cpu(con->in_hdr.seq); | ||
1381 | if ((s64)seq - (s64)con->in_seq < 1) { | ||
1382 | pr_info("skipping %s%lld %s seq %lld, expected %lld\n", | ||
1383 | ENTITY_NAME(con->peer_name), | ||
1384 | pr_addr(&con->peer_addr.in_addr), | ||
1385 | seq, con->in_seq + 1); | ||
1386 | con->in_base_pos = -front_len - middle_len - data_len - | ||
1387 | sizeof(m->footer); | ||
1388 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
1389 | con->in_seq++; | ||
1390 | return 0; | ||
1391 | } else if ((s64)seq - (s64)con->in_seq > 1) { | ||
1392 | pr_err("read_partial_message bad seq %lld expected %lld\n", | ||
1393 | seq, con->in_seq + 1); | ||
1394 | con->error_msg = "bad message sequence # for incoming message"; | ||
1395 | return -EBADMSG; | ||
1396 | } | ||
1397 | |||
1398 | /* allocate message? */ | ||
1399 | if (!con->in_msg) { | ||
1400 | dout("got hdr type %d front %d data %d\n", con->in_hdr.type, | ||
1401 | con->in_hdr.front_len, con->in_hdr.data_len); | ||
1402 | skip = 0; | ||
1403 | con->in_msg = ceph_alloc_msg(con, &con->in_hdr, &skip); | ||
1404 | if (skip) { | ||
1405 | /* skip this message */ | ||
1406 | dout("alloc_msg said skip message\n"); | ||
1407 | BUG_ON(con->in_msg); | ||
1408 | con->in_base_pos = -front_len - middle_len - data_len - | ||
1409 | sizeof(m->footer); | ||
1410 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
1411 | con->in_seq++; | ||
1412 | return 0; | ||
1413 | } | ||
1414 | if (!con->in_msg) { | ||
1415 | con->error_msg = | ||
1416 | "error allocating memory for incoming message"; | ||
1417 | return -ENOMEM; | ||
1418 | } | ||
1419 | m = con->in_msg; | ||
1420 | m->front.iov_len = 0; /* haven't read it yet */ | ||
1421 | if (m->middle) | ||
1422 | m->middle->vec.iov_len = 0; | ||
1423 | |||
1424 | con->in_msg_pos.page = 0; | ||
1425 | con->in_msg_pos.page_pos = data_off & ~PAGE_MASK; | ||
1426 | con->in_msg_pos.data_pos = 0; | ||
1427 | } | ||
1428 | |||
1429 | /* front */ | ||
1430 | ret = read_partial_message_section(con, &m->front, front_len, | ||
1431 | &con->in_front_crc); | ||
1432 | if (ret <= 0) | ||
1433 | return ret; | ||
1434 | |||
1435 | /* middle */ | ||
1436 | if (m->middle) { | ||
1437 | ret = read_partial_message_section(con, &m->middle->vec, | ||
1438 | middle_len, | ||
1439 | &con->in_middle_crc); | ||
1440 | if (ret <= 0) | ||
1441 | return ret; | ||
1442 | } | ||
1443 | |||
1444 | /* (page) data */ | ||
1445 | while (con->in_msg_pos.data_pos < data_len) { | ||
1446 | left = min((int)(data_len - con->in_msg_pos.data_pos), | ||
1447 | (int)(PAGE_SIZE - con->in_msg_pos.page_pos)); | ||
1448 | BUG_ON(m->pages == NULL); | ||
1449 | p = kmap(m->pages[con->in_msg_pos.page]); | ||
1450 | ret = ceph_tcp_recvmsg(con->sock, p + con->in_msg_pos.page_pos, | ||
1451 | left); | ||
1452 | if (ret > 0 && datacrc) | ||
1453 | con->in_data_crc = | ||
1454 | crc32c(con->in_data_crc, | ||
1455 | p + con->in_msg_pos.page_pos, ret); | ||
1456 | kunmap(m->pages[con->in_msg_pos.page]); | ||
1457 | if (ret <= 0) | ||
1458 | return ret; | ||
1459 | con->in_msg_pos.data_pos += ret; | ||
1460 | con->in_msg_pos.page_pos += ret; | ||
1461 | if (con->in_msg_pos.page_pos == PAGE_SIZE) { | ||
1462 | con->in_msg_pos.page_pos = 0; | ||
1463 | con->in_msg_pos.page++; | ||
1464 | } | ||
1465 | } | ||
1466 | |||
1467 | /* footer */ | ||
1468 | to = sizeof(m->hdr) + sizeof(m->footer); | ||
1469 | while (con->in_base_pos < to) { | ||
1470 | left = to - con->in_base_pos; | ||
1471 | ret = ceph_tcp_recvmsg(con->sock, (char *)&m->footer + | ||
1472 | (con->in_base_pos - sizeof(m->hdr)), | ||
1473 | left); | ||
1474 | if (ret <= 0) | ||
1475 | return ret; | ||
1476 | con->in_base_pos += ret; | ||
1477 | } | ||
1478 | dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n", | ||
1479 | m, front_len, m->footer.front_crc, middle_len, | ||
1480 | m->footer.middle_crc, data_len, m->footer.data_crc); | ||
1481 | |||
1482 | /* crc ok? */ | ||
1483 | if (con->in_front_crc != le32_to_cpu(m->footer.front_crc)) { | ||
1484 | pr_err("read_partial_message %p front crc %u != exp. %u\n", | ||
1485 | m, con->in_front_crc, m->footer.front_crc); | ||
1486 | return -EBADMSG; | ||
1487 | } | ||
1488 | if (con->in_middle_crc != le32_to_cpu(m->footer.middle_crc)) { | ||
1489 | pr_err("read_partial_message %p middle crc %u != exp %u\n", | ||
1490 | m, con->in_middle_crc, m->footer.middle_crc); | ||
1491 | return -EBADMSG; | ||
1492 | } | ||
1493 | if (datacrc && | ||
1494 | (m->footer.flags & CEPH_MSG_FOOTER_NOCRC) == 0 && | ||
1495 | con->in_data_crc != le32_to_cpu(m->footer.data_crc)) { | ||
1496 | pr_err("read_partial_message %p data crc %u != exp. %u\n", m, | ||
1497 | con->in_data_crc, le32_to_cpu(m->footer.data_crc)); | ||
1498 | return -EBADMSG; | ||
1499 | } | ||
1500 | |||
1501 | return 1; /* done! */ | ||
1502 | } | ||
1503 | |||
1504 | /* | ||
1505 | * Process message. This happens in the worker thread. The callback should | ||
1506 | * be careful not to do anything that waits on other incoming messages or it | ||
1507 | * may deadlock. | ||
1508 | */ | ||
1509 | static void process_message(struct ceph_connection *con) | ||
1510 | { | ||
1511 | struct ceph_msg *msg; | ||
1512 | |||
1513 | msg = con->in_msg; | ||
1514 | con->in_msg = NULL; | ||
1515 | |||
1516 | /* if first message, set peer_name */ | ||
1517 | if (con->peer_name.type == 0) | ||
1518 | con->peer_name = msg->hdr.src; | ||
1519 | |||
1520 | con->in_seq++; | ||
1521 | mutex_unlock(&con->mutex); | ||
1522 | |||
1523 | dout("===== %p %llu from %s%lld %d=%s len %d+%d (%u %u %u) =====\n", | ||
1524 | msg, le64_to_cpu(msg->hdr.seq), | ||
1525 | ENTITY_NAME(msg->hdr.src), | ||
1526 | le16_to_cpu(msg->hdr.type), | ||
1527 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | ||
1528 | le32_to_cpu(msg->hdr.front_len), | ||
1529 | le32_to_cpu(msg->hdr.data_len), | ||
1530 | con->in_front_crc, con->in_middle_crc, con->in_data_crc); | ||
1531 | con->ops->dispatch(con, msg); | ||
1532 | |||
1533 | mutex_lock(&con->mutex); | ||
1534 | prepare_read_tag(con); | ||
1535 | } | ||
1536 | |||
1537 | |||
1538 | /* | ||
1539 | * Write something to the socket. Called in a worker thread when the | ||
1540 | * socket appears to be writeable and we have something ready to send. | ||
1541 | */ | ||
1542 | static int try_write(struct ceph_connection *con) | ||
1543 | { | ||
1544 | struct ceph_messenger *msgr = con->msgr; | ||
1545 | int ret = 1; | ||
1546 | |||
1547 | dout("try_write start %p state %lu nref %d\n", con, con->state, | ||
1548 | atomic_read(&con->nref)); | ||
1549 | |||
1550 | more: | ||
1551 | dout("try_write out_kvec_bytes %d\n", con->out_kvec_bytes); | ||
1552 | |||
1553 | /* open the socket first? */ | ||
1554 | if (con->sock == NULL) { | ||
1555 | /* | ||
1556 | * if we were STANDBY and are reconnecting _this_ | ||
1557 | * connection, bump connect_seq now. Always bump | ||
1558 | * global_seq. | ||
1559 | */ | ||
1560 | if (test_and_clear_bit(STANDBY, &con->state)) | ||
1561 | con->connect_seq++; | ||
1562 | |||
1563 | prepare_write_banner(msgr, con); | ||
1564 | prepare_write_connect(msgr, con, 1); | ||
1565 | prepare_read_banner(con); | ||
1566 | set_bit(CONNECTING, &con->state); | ||
1567 | clear_bit(NEGOTIATING, &con->state); | ||
1568 | |||
1569 | BUG_ON(con->in_msg); | ||
1570 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
1571 | dout("try_write initiating connect on %p new state %lu\n", | ||
1572 | con, con->state); | ||
1573 | con->sock = ceph_tcp_connect(con); | ||
1574 | if (IS_ERR(con->sock)) { | ||
1575 | con->sock = NULL; | ||
1576 | con->error_msg = "connect error"; | ||
1577 | ret = -1; | ||
1578 | goto out; | ||
1579 | } | ||
1580 | } | ||
1581 | |||
1582 | more_kvec: | ||
1583 | /* kvec data queued? */ | ||
1584 | if (con->out_skip) { | ||
1585 | ret = write_partial_skip(con); | ||
1586 | if (ret <= 0) | ||
1587 | goto done; | ||
1588 | if (ret < 0) { | ||
1589 | dout("try_write write_partial_skip err %d\n", ret); | ||
1590 | goto done; | ||
1591 | } | ||
1592 | } | ||
1593 | if (con->out_kvec_left) { | ||
1594 | ret = write_partial_kvec(con); | ||
1595 | if (ret <= 0) | ||
1596 | goto done; | ||
1597 | } | ||
1598 | |||
1599 | /* msg pages? */ | ||
1600 | if (con->out_msg) { | ||
1601 | if (con->out_msg_done) { | ||
1602 | ceph_msg_put(con->out_msg); | ||
1603 | con->out_msg = NULL; /* we're done with this one */ | ||
1604 | goto do_next; | ||
1605 | } | ||
1606 | |||
1607 | ret = write_partial_msg_pages(con); | ||
1608 | if (ret == 1) | ||
1609 | goto more_kvec; /* we need to send the footer, too! */ | ||
1610 | if (ret == 0) | ||
1611 | goto done; | ||
1612 | if (ret < 0) { | ||
1613 | dout("try_write write_partial_msg_pages err %d\n", | ||
1614 | ret); | ||
1615 | goto done; | ||
1616 | } | ||
1617 | } | ||
1618 | |||
1619 | do_next: | ||
1620 | if (!test_bit(CONNECTING, &con->state)) { | ||
1621 | /* is anything else pending? */ | ||
1622 | if (!list_empty(&con->out_queue)) { | ||
1623 | prepare_write_message(con); | ||
1624 | goto more; | ||
1625 | } | ||
1626 | if (con->in_seq > con->in_seq_acked) { | ||
1627 | prepare_write_ack(con); | ||
1628 | goto more; | ||
1629 | } | ||
1630 | if (test_and_clear_bit(KEEPALIVE_PENDING, &con->state)) { | ||
1631 | prepare_write_keepalive(con); | ||
1632 | goto more; | ||
1633 | } | ||
1634 | } | ||
1635 | |||
1636 | /* Nothing to do! */ | ||
1637 | clear_bit(WRITE_PENDING, &con->state); | ||
1638 | dout("try_write nothing else to write.\n"); | ||
1639 | done: | ||
1640 | ret = 0; | ||
1641 | out: | ||
1642 | dout("try_write done on %p\n", con); | ||
1643 | return ret; | ||
1644 | } | ||
1645 | |||
1646 | |||
1647 | |||
1648 | /* | ||
1649 | * Read what we can from the socket. | ||
1650 | */ | ||
1651 | static int try_read(struct ceph_connection *con) | ||
1652 | { | ||
1653 | int ret = -1; | ||
1654 | |||
1655 | if (!con->sock) | ||
1656 | return 0; | ||
1657 | |||
1658 | if (test_bit(STANDBY, &con->state)) | ||
1659 | return 0; | ||
1660 | |||
1661 | dout("try_read start on %p\n", con); | ||
1662 | |||
1663 | more: | ||
1664 | dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag, | ||
1665 | con->in_base_pos); | ||
1666 | if (test_bit(CONNECTING, &con->state)) { | ||
1667 | if (!test_bit(NEGOTIATING, &con->state)) { | ||
1668 | dout("try_read connecting\n"); | ||
1669 | ret = read_partial_banner(con); | ||
1670 | if (ret <= 0) | ||
1671 | goto done; | ||
1672 | if (process_banner(con) < 0) { | ||
1673 | ret = -1; | ||
1674 | goto out; | ||
1675 | } | ||
1676 | } | ||
1677 | ret = read_partial_connect(con); | ||
1678 | if (ret <= 0) | ||
1679 | goto done; | ||
1680 | if (process_connect(con) < 0) { | ||
1681 | ret = -1; | ||
1682 | goto out; | ||
1683 | } | ||
1684 | goto more; | ||
1685 | } | ||
1686 | |||
1687 | if (con->in_base_pos < 0) { | ||
1688 | /* | ||
1689 | * skipping + discarding content. | ||
1690 | * | ||
1691 | * FIXME: there must be a better way to do this! | ||
1692 | */ | ||
1693 | static char buf[1024]; | ||
1694 | int skip = min(1024, -con->in_base_pos); | ||
1695 | dout("skipping %d / %d bytes\n", skip, -con->in_base_pos); | ||
1696 | ret = ceph_tcp_recvmsg(con->sock, buf, skip); | ||
1697 | if (ret <= 0) | ||
1698 | goto done; | ||
1699 | con->in_base_pos += ret; | ||
1700 | if (con->in_base_pos) | ||
1701 | goto more; | ||
1702 | } | ||
1703 | if (con->in_tag == CEPH_MSGR_TAG_READY) { | ||
1704 | /* | ||
1705 | * what's next? | ||
1706 | */ | ||
1707 | ret = ceph_tcp_recvmsg(con->sock, &con->in_tag, 1); | ||
1708 | if (ret <= 0) | ||
1709 | goto done; | ||
1710 | dout("try_read got tag %d\n", (int)con->in_tag); | ||
1711 | switch (con->in_tag) { | ||
1712 | case CEPH_MSGR_TAG_MSG: | ||
1713 | prepare_read_message(con); | ||
1714 | break; | ||
1715 | case CEPH_MSGR_TAG_ACK: | ||
1716 | prepare_read_ack(con); | ||
1717 | break; | ||
1718 | case CEPH_MSGR_TAG_CLOSE: | ||
1719 | set_bit(CLOSED, &con->state); /* fixme */ | ||
1720 | goto done; | ||
1721 | default: | ||
1722 | goto bad_tag; | ||
1723 | } | ||
1724 | } | ||
1725 | if (con->in_tag == CEPH_MSGR_TAG_MSG) { | ||
1726 | ret = read_partial_message(con); | ||
1727 | if (ret <= 0) { | ||
1728 | switch (ret) { | ||
1729 | case -EBADMSG: | ||
1730 | con->error_msg = "bad crc"; | ||
1731 | ret = -EIO; | ||
1732 | goto out; | ||
1733 | case -EIO: | ||
1734 | con->error_msg = "io error"; | ||
1735 | goto out; | ||
1736 | default: | ||
1737 | goto done; | ||
1738 | } | ||
1739 | } | ||
1740 | if (con->in_tag == CEPH_MSGR_TAG_READY) | ||
1741 | goto more; | ||
1742 | process_message(con); | ||
1743 | goto more; | ||
1744 | } | ||
1745 | if (con->in_tag == CEPH_MSGR_TAG_ACK) { | ||
1746 | ret = read_partial_ack(con); | ||
1747 | if (ret <= 0) | ||
1748 | goto done; | ||
1749 | process_ack(con); | ||
1750 | goto more; | ||
1751 | } | ||
1752 | |||
1753 | done: | ||
1754 | ret = 0; | ||
1755 | out: | ||
1756 | dout("try_read done on %p\n", con); | ||
1757 | return ret; | ||
1758 | |||
1759 | bad_tag: | ||
1760 | pr_err("try_read bad con->in_tag = %d\n", (int)con->in_tag); | ||
1761 | con->error_msg = "protocol error, garbage tag"; | ||
1762 | ret = -1; | ||
1763 | goto out; | ||
1764 | } | ||
1765 | |||
1766 | |||
1767 | /* | ||
1768 | * Atomically queue work on a connection. Bump @con reference to | ||
1769 | * avoid races with connection teardown. | ||
1770 | * | ||
1771 | * There is some trickery going on with QUEUED and BUSY because we | ||
1772 | * only want a _single_ thread operating on each connection at any | ||
1773 | * point in time, but we want to use all available CPUs. | ||
1774 | * | ||
1775 | * The worker thread only proceeds if it can atomically set BUSY. It | ||
1776 | * clears QUEUED and does it's thing. When it thinks it's done, it | ||
1777 | * clears BUSY, then rechecks QUEUED.. if it's set again, it loops | ||
1778 | * (tries again to set BUSY). | ||
1779 | * | ||
1780 | * To queue work, we first set QUEUED, _then_ if BUSY isn't set, we | ||
1781 | * try to queue work. If that fails (work is already queued, or BUSY) | ||
1782 | * we give up (work also already being done or is queued) but leave QUEUED | ||
1783 | * set so that the worker thread will loop if necessary. | ||
1784 | */ | ||
1785 | static void queue_con(struct ceph_connection *con) | ||
1786 | { | ||
1787 | if (test_bit(DEAD, &con->state)) { | ||
1788 | dout("queue_con %p ignoring: DEAD\n", | ||
1789 | con); | ||
1790 | return; | ||
1791 | } | ||
1792 | |||
1793 | if (!con->ops->get(con)) { | ||
1794 | dout("queue_con %p ref count 0\n", con); | ||
1795 | return; | ||
1796 | } | ||
1797 | |||
1798 | set_bit(QUEUED, &con->state); | ||
1799 | if (test_bit(BUSY, &con->state)) { | ||
1800 | dout("queue_con %p - already BUSY\n", con); | ||
1801 | con->ops->put(con); | ||
1802 | } else if (!queue_work(ceph_msgr_wq, &con->work.work)) { | ||
1803 | dout("queue_con %p - already queued\n", con); | ||
1804 | con->ops->put(con); | ||
1805 | } else { | ||
1806 | dout("queue_con %p\n", con); | ||
1807 | } | ||
1808 | } | ||
1809 | |||
1810 | /* | ||
1811 | * Do some work on a connection. Drop a connection ref when we're done. | ||
1812 | */ | ||
1813 | static void con_work(struct work_struct *work) | ||
1814 | { | ||
1815 | struct ceph_connection *con = container_of(work, struct ceph_connection, | ||
1816 | work.work); | ||
1817 | int backoff = 0; | ||
1818 | |||
1819 | more: | ||
1820 | if (test_and_set_bit(BUSY, &con->state) != 0) { | ||
1821 | dout("con_work %p BUSY already set\n", con); | ||
1822 | goto out; | ||
1823 | } | ||
1824 | dout("con_work %p start, clearing QUEUED\n", con); | ||
1825 | clear_bit(QUEUED, &con->state); | ||
1826 | |||
1827 | mutex_lock(&con->mutex); | ||
1828 | |||
1829 | if (test_bit(CLOSED, &con->state)) { /* e.g. if we are replaced */ | ||
1830 | dout("con_work CLOSED\n"); | ||
1831 | con_close_socket(con); | ||
1832 | goto done; | ||
1833 | } | ||
1834 | if (test_and_clear_bit(OPENING, &con->state)) { | ||
1835 | /* reopen w/ new peer */ | ||
1836 | dout("con_work OPENING\n"); | ||
1837 | con_close_socket(con); | ||
1838 | } | ||
1839 | |||
1840 | if (test_and_clear_bit(SOCK_CLOSED, &con->state) || | ||
1841 | try_read(con) < 0 || | ||
1842 | try_write(con) < 0) { | ||
1843 | mutex_unlock(&con->mutex); | ||
1844 | backoff = 1; | ||
1845 | ceph_fault(con); /* error/fault path */ | ||
1846 | goto done_unlocked; | ||
1847 | } | ||
1848 | |||
1849 | done: | ||
1850 | mutex_unlock(&con->mutex); | ||
1851 | |||
1852 | done_unlocked: | ||
1853 | clear_bit(BUSY, &con->state); | ||
1854 | dout("con->state=%lu\n", con->state); | ||
1855 | if (test_bit(QUEUED, &con->state)) { | ||
1856 | if (!backoff || test_bit(OPENING, &con->state)) { | ||
1857 | dout("con_work %p QUEUED reset, looping\n", con); | ||
1858 | goto more; | ||
1859 | } | ||
1860 | dout("con_work %p QUEUED reset, but just faulted\n", con); | ||
1861 | clear_bit(QUEUED, &con->state); | ||
1862 | } | ||
1863 | dout("con_work %p done\n", con); | ||
1864 | |||
1865 | out: | ||
1866 | con->ops->put(con); | ||
1867 | } | ||
1868 | |||
1869 | |||
1870 | /* | ||
1871 | * Generic error/fault handler. A retry mechanism is used with | ||
1872 | * exponential backoff | ||
1873 | */ | ||
1874 | static void ceph_fault(struct ceph_connection *con) | ||
1875 | { | ||
1876 | pr_err("%s%lld %s %s\n", ENTITY_NAME(con->peer_name), | ||
1877 | pr_addr(&con->peer_addr.in_addr), con->error_msg); | ||
1878 | dout("fault %p state %lu to peer %s\n", | ||
1879 | con, con->state, pr_addr(&con->peer_addr.in_addr)); | ||
1880 | |||
1881 | if (test_bit(LOSSYTX, &con->state)) { | ||
1882 | dout("fault on LOSSYTX channel\n"); | ||
1883 | goto out; | ||
1884 | } | ||
1885 | |||
1886 | mutex_lock(&con->mutex); | ||
1887 | if (test_bit(CLOSED, &con->state)) | ||
1888 | goto out_unlock; | ||
1889 | |||
1890 | con_close_socket(con); | ||
1891 | |||
1892 | if (con->in_msg) { | ||
1893 | ceph_msg_put(con->in_msg); | ||
1894 | con->in_msg = NULL; | ||
1895 | } | ||
1896 | |||
1897 | /* Requeue anything that hasn't been acked */ | ||
1898 | list_splice_init(&con->out_sent, &con->out_queue); | ||
1899 | |||
1900 | /* If there are no messages in the queue, place the connection | ||
1901 | * in a STANDBY state (i.e., don't try to reconnect just yet). */ | ||
1902 | if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { | ||
1903 | dout("fault setting STANDBY\n"); | ||
1904 | set_bit(STANDBY, &con->state); | ||
1905 | } else { | ||
1906 | /* retry after a delay. */ | ||
1907 | if (con->delay == 0) | ||
1908 | con->delay = BASE_DELAY_INTERVAL; | ||
1909 | else if (con->delay < MAX_DELAY_INTERVAL) | ||
1910 | con->delay *= 2; | ||
1911 | dout("fault queueing %p delay %lu\n", con, con->delay); | ||
1912 | con->ops->get(con); | ||
1913 | if (queue_delayed_work(ceph_msgr_wq, &con->work, | ||
1914 | round_jiffies_relative(con->delay)) == 0) | ||
1915 | con->ops->put(con); | ||
1916 | } | ||
1917 | |||
1918 | out_unlock: | ||
1919 | mutex_unlock(&con->mutex); | ||
1920 | out: | ||
1921 | /* | ||
1922 | * in case we faulted due to authentication, invalidate our | ||
1923 | * current tickets so that we can get new ones. | ||
1924 | */ | ||
1925 | if (con->auth_retry && con->ops->invalidate_authorizer) { | ||
1926 | dout("calling invalidate_authorizer()\n"); | ||
1927 | con->ops->invalidate_authorizer(con); | ||
1928 | } | ||
1929 | |||
1930 | if (con->ops->fault) | ||
1931 | con->ops->fault(con); | ||
1932 | } | ||
1933 | |||
1934 | |||
1935 | |||
1936 | /* | ||
1937 | * create a new messenger instance | ||
1938 | */ | ||
1939 | struct ceph_messenger *ceph_messenger_create(struct ceph_entity_addr *myaddr) | ||
1940 | { | ||
1941 | struct ceph_messenger *msgr; | ||
1942 | |||
1943 | msgr = kzalloc(sizeof(*msgr), GFP_KERNEL); | ||
1944 | if (msgr == NULL) | ||
1945 | return ERR_PTR(-ENOMEM); | ||
1946 | |||
1947 | spin_lock_init(&msgr->global_seq_lock); | ||
1948 | |||
1949 | /* the zero page is needed if a request is "canceled" while the message | ||
1950 | * is being written over the socket */ | ||
1951 | msgr->zero_page = __page_cache_alloc(GFP_KERNEL | __GFP_ZERO); | ||
1952 | if (!msgr->zero_page) { | ||
1953 | kfree(msgr); | ||
1954 | return ERR_PTR(-ENOMEM); | ||
1955 | } | ||
1956 | kmap(msgr->zero_page); | ||
1957 | |||
1958 | if (myaddr) | ||
1959 | msgr->inst.addr = *myaddr; | ||
1960 | |||
1961 | /* select a random nonce */ | ||
1962 | msgr->inst.addr.type = 0; | ||
1963 | get_random_bytes(&msgr->inst.addr.nonce, sizeof(msgr->inst.addr.nonce)); | ||
1964 | encode_my_addr(msgr); | ||
1965 | |||
1966 | dout("messenger_create %p\n", msgr); | ||
1967 | return msgr; | ||
1968 | } | ||
1969 | |||
1970 | void ceph_messenger_destroy(struct ceph_messenger *msgr) | ||
1971 | { | ||
1972 | dout("destroy %p\n", msgr); | ||
1973 | kunmap(msgr->zero_page); | ||
1974 | __free_page(msgr->zero_page); | ||
1975 | kfree(msgr); | ||
1976 | dout("destroyed messenger %p\n", msgr); | ||
1977 | } | ||
1978 | |||
1979 | /* | ||
1980 | * Queue up an outgoing message on the given connection. | ||
1981 | */ | ||
1982 | void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg) | ||
1983 | { | ||
1984 | if (test_bit(CLOSED, &con->state)) { | ||
1985 | dout("con_send %p closed, dropping %p\n", con, msg); | ||
1986 | ceph_msg_put(msg); | ||
1987 | return; | ||
1988 | } | ||
1989 | |||
1990 | /* set src+dst */ | ||
1991 | msg->hdr.src = con->msgr->inst.name; | ||
1992 | |||
1993 | BUG_ON(msg->front.iov_len != le32_to_cpu(msg->hdr.front_len)); | ||
1994 | |||
1995 | msg->needs_out_seq = true; | ||
1996 | |||
1997 | /* queue */ | ||
1998 | mutex_lock(&con->mutex); | ||
1999 | BUG_ON(!list_empty(&msg->list_head)); | ||
2000 | list_add_tail(&msg->list_head, &con->out_queue); | ||
2001 | dout("----- %p to %s%lld %d=%s len %d+%d+%d -----\n", msg, | ||
2002 | ENTITY_NAME(con->peer_name), le16_to_cpu(msg->hdr.type), | ||
2003 | ceph_msg_type_name(le16_to_cpu(msg->hdr.type)), | ||
2004 | le32_to_cpu(msg->hdr.front_len), | ||
2005 | le32_to_cpu(msg->hdr.middle_len), | ||
2006 | le32_to_cpu(msg->hdr.data_len)); | ||
2007 | mutex_unlock(&con->mutex); | ||
2008 | |||
2009 | /* if there wasn't anything waiting to send before, queue | ||
2010 | * new work */ | ||
2011 | if (test_and_set_bit(WRITE_PENDING, &con->state) == 0) | ||
2012 | queue_con(con); | ||
2013 | } | ||
2014 | |||
2015 | /* | ||
2016 | * Revoke a message that was previously queued for send | ||
2017 | */ | ||
2018 | void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg) | ||
2019 | { | ||
2020 | mutex_lock(&con->mutex); | ||
2021 | if (!list_empty(&msg->list_head)) { | ||
2022 | dout("con_revoke %p msg %p - was on queue\n", con, msg); | ||
2023 | list_del_init(&msg->list_head); | ||
2024 | ceph_msg_put(msg); | ||
2025 | msg->hdr.seq = 0; | ||
2026 | } | ||
2027 | if (con->out_msg == msg) { | ||
2028 | dout("con_revoke %p msg %p - was sending\n", con, msg); | ||
2029 | con->out_msg = NULL; | ||
2030 | if (con->out_kvec_is_msg) { | ||
2031 | con->out_skip = con->out_kvec_bytes; | ||
2032 | con->out_kvec_is_msg = false; | ||
2033 | } | ||
2034 | ceph_msg_put(msg); | ||
2035 | msg->hdr.seq = 0; | ||
2036 | } | ||
2037 | mutex_unlock(&con->mutex); | ||
2038 | } | ||
2039 | |||
2040 | /* | ||
2041 | * Revoke a message that we may be reading data into | ||
2042 | */ | ||
2043 | void ceph_con_revoke_message(struct ceph_connection *con, struct ceph_msg *msg) | ||
2044 | { | ||
2045 | mutex_lock(&con->mutex); | ||
2046 | if (con->in_msg && con->in_msg == msg) { | ||
2047 | unsigned front_len = le32_to_cpu(con->in_hdr.front_len); | ||
2048 | unsigned middle_len = le32_to_cpu(con->in_hdr.middle_len); | ||
2049 | unsigned data_len = le32_to_cpu(con->in_hdr.data_len); | ||
2050 | |||
2051 | /* skip rest of message */ | ||
2052 | dout("con_revoke_pages %p msg %p revoked\n", con, msg); | ||
2053 | con->in_base_pos = con->in_base_pos - | ||
2054 | sizeof(struct ceph_msg_header) - | ||
2055 | front_len - | ||
2056 | middle_len - | ||
2057 | data_len - | ||
2058 | sizeof(struct ceph_msg_footer); | ||
2059 | ceph_msg_put(con->in_msg); | ||
2060 | con->in_msg = NULL; | ||
2061 | con->in_tag = CEPH_MSGR_TAG_READY; | ||
2062 | con->in_seq++; | ||
2063 | } else { | ||
2064 | dout("con_revoke_pages %p msg %p pages %p no-op\n", | ||
2065 | con, con->in_msg, msg); | ||
2066 | } | ||
2067 | mutex_unlock(&con->mutex); | ||
2068 | } | ||
2069 | |||
2070 | /* | ||
2071 | * Queue a keepalive byte to ensure the tcp connection is alive. | ||
2072 | */ | ||
2073 | void ceph_con_keepalive(struct ceph_connection *con) | ||
2074 | { | ||
2075 | if (test_and_set_bit(KEEPALIVE_PENDING, &con->state) == 0 && | ||
2076 | test_and_set_bit(WRITE_PENDING, &con->state) == 0) | ||
2077 | queue_con(con); | ||
2078 | } | ||
2079 | |||
2080 | |||
2081 | /* | ||
2082 | * construct a new message with given type, size | ||
2083 | * the new msg has a ref count of 1. | ||
2084 | */ | ||
2085 | struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags) | ||
2086 | { | ||
2087 | struct ceph_msg *m; | ||
2088 | |||
2089 | m = kmalloc(sizeof(*m), flags); | ||
2090 | if (m == NULL) | ||
2091 | goto out; | ||
2092 | kref_init(&m->kref); | ||
2093 | INIT_LIST_HEAD(&m->list_head); | ||
2094 | |||
2095 | m->hdr.tid = 0; | ||
2096 | m->hdr.type = cpu_to_le16(type); | ||
2097 | m->hdr.priority = cpu_to_le16(CEPH_MSG_PRIO_DEFAULT); | ||
2098 | m->hdr.version = 0; | ||
2099 | m->hdr.front_len = cpu_to_le32(front_len); | ||
2100 | m->hdr.middle_len = 0; | ||
2101 | m->hdr.data_len = 0; | ||
2102 | m->hdr.data_off = 0; | ||
2103 | m->hdr.reserved = 0; | ||
2104 | m->footer.front_crc = 0; | ||
2105 | m->footer.middle_crc = 0; | ||
2106 | m->footer.data_crc = 0; | ||
2107 | m->footer.flags = 0; | ||
2108 | m->front_max = front_len; | ||
2109 | m->front_is_vmalloc = false; | ||
2110 | m->more_to_follow = false; | ||
2111 | m->pool = NULL; | ||
2112 | |||
2113 | /* front */ | ||
2114 | if (front_len) { | ||
2115 | if (front_len > PAGE_CACHE_SIZE) { | ||
2116 | m->front.iov_base = __vmalloc(front_len, flags, | ||
2117 | PAGE_KERNEL); | ||
2118 | m->front_is_vmalloc = true; | ||
2119 | } else { | ||
2120 | m->front.iov_base = kmalloc(front_len, flags); | ||
2121 | } | ||
2122 | if (m->front.iov_base == NULL) { | ||
2123 | pr_err("msg_new can't allocate %d bytes\n", | ||
2124 | front_len); | ||
2125 | goto out2; | ||
2126 | } | ||
2127 | } else { | ||
2128 | m->front.iov_base = NULL; | ||
2129 | } | ||
2130 | m->front.iov_len = front_len; | ||
2131 | |||
2132 | /* middle */ | ||
2133 | m->middle = NULL; | ||
2134 | |||
2135 | /* data */ | ||
2136 | m->nr_pages = 0; | ||
2137 | m->pages = NULL; | ||
2138 | m->pagelist = NULL; | ||
2139 | |||
2140 | dout("ceph_msg_new %p front %d\n", m, front_len); | ||
2141 | return m; | ||
2142 | |||
2143 | out2: | ||
2144 | ceph_msg_put(m); | ||
2145 | out: | ||
2146 | pr_err("msg_new can't create type %d front %d\n", type, front_len); | ||
2147 | return NULL; | ||
2148 | } | ||
2149 | |||
2150 | /* | ||
2151 | * Allocate "middle" portion of a message, if it is needed and wasn't | ||
2152 | * allocated by alloc_msg. This allows us to read a small fixed-size | ||
2153 | * per-type header in the front and then gracefully fail (i.e., | ||
2154 | * propagate the error to the caller based on info in the front) when | ||
2155 | * the middle is too large. | ||
2156 | */ | ||
2157 | static int ceph_alloc_middle(struct ceph_connection *con, struct ceph_msg *msg) | ||
2158 | { | ||
2159 | int type = le16_to_cpu(msg->hdr.type); | ||
2160 | int middle_len = le32_to_cpu(msg->hdr.middle_len); | ||
2161 | |||
2162 | dout("alloc_middle %p type %d %s middle_len %d\n", msg, type, | ||
2163 | ceph_msg_type_name(type), middle_len); | ||
2164 | BUG_ON(!middle_len); | ||
2165 | BUG_ON(msg->middle); | ||
2166 | |||
2167 | msg->middle = ceph_buffer_new(middle_len, GFP_NOFS); | ||
2168 | if (!msg->middle) | ||
2169 | return -ENOMEM; | ||
2170 | return 0; | ||
2171 | } | ||
2172 | |||
2173 | /* | ||
2174 | * Generic message allocator, for incoming messages. | ||
2175 | */ | ||
2176 | static struct ceph_msg *ceph_alloc_msg(struct ceph_connection *con, | ||
2177 | struct ceph_msg_header *hdr, | ||
2178 | int *skip) | ||
2179 | { | ||
2180 | int type = le16_to_cpu(hdr->type); | ||
2181 | int front_len = le32_to_cpu(hdr->front_len); | ||
2182 | int middle_len = le32_to_cpu(hdr->middle_len); | ||
2183 | struct ceph_msg *msg = NULL; | ||
2184 | int ret; | ||
2185 | |||
2186 | if (con->ops->alloc_msg) { | ||
2187 | mutex_unlock(&con->mutex); | ||
2188 | msg = con->ops->alloc_msg(con, hdr, skip); | ||
2189 | mutex_lock(&con->mutex); | ||
2190 | if (!msg || *skip) | ||
2191 | return NULL; | ||
2192 | } | ||
2193 | if (!msg) { | ||
2194 | *skip = 0; | ||
2195 | msg = ceph_msg_new(type, front_len, GFP_NOFS); | ||
2196 | if (!msg) { | ||
2197 | pr_err("unable to allocate msg type %d len %d\n", | ||
2198 | type, front_len); | ||
2199 | return NULL; | ||
2200 | } | ||
2201 | } | ||
2202 | memcpy(&msg->hdr, &con->in_hdr, sizeof(con->in_hdr)); | ||
2203 | |||
2204 | if (middle_len && !msg->middle) { | ||
2205 | ret = ceph_alloc_middle(con, msg); | ||
2206 | if (ret < 0) { | ||
2207 | ceph_msg_put(msg); | ||
2208 | return NULL; | ||
2209 | } | ||
2210 | } | ||
2211 | |||
2212 | return msg; | ||
2213 | } | ||
2214 | |||
2215 | |||
2216 | /* | ||
2217 | * Free a generically kmalloc'd message. | ||
2218 | */ | ||
2219 | void ceph_msg_kfree(struct ceph_msg *m) | ||
2220 | { | ||
2221 | dout("msg_kfree %p\n", m); | ||
2222 | if (m->front_is_vmalloc) | ||
2223 | vfree(m->front.iov_base); | ||
2224 | else | ||
2225 | kfree(m->front.iov_base); | ||
2226 | kfree(m); | ||
2227 | } | ||
2228 | |||
2229 | /* | ||
2230 | * Drop a msg ref. Destroy as needed. | ||
2231 | */ | ||
2232 | void ceph_msg_last_put(struct kref *kref) | ||
2233 | { | ||
2234 | struct ceph_msg *m = container_of(kref, struct ceph_msg, kref); | ||
2235 | |||
2236 | dout("ceph_msg_put last one on %p\n", m); | ||
2237 | WARN_ON(!list_empty(&m->list_head)); | ||
2238 | |||
2239 | /* drop middle, data, if any */ | ||
2240 | if (m->middle) { | ||
2241 | ceph_buffer_put(m->middle); | ||
2242 | m->middle = NULL; | ||
2243 | } | ||
2244 | m->nr_pages = 0; | ||
2245 | m->pages = NULL; | ||
2246 | |||
2247 | if (m->pagelist) { | ||
2248 | ceph_pagelist_release(m->pagelist); | ||
2249 | kfree(m->pagelist); | ||
2250 | m->pagelist = NULL; | ||
2251 | } | ||
2252 | |||
2253 | if (m->pool) | ||
2254 | ceph_msgpool_put(m->pool, m); | ||
2255 | else | ||
2256 | ceph_msg_kfree(m); | ||
2257 | } | ||
2258 | |||
2259 | void ceph_msg_dump(struct ceph_msg *msg) | ||
2260 | { | ||
2261 | pr_debug("msg_dump %p (front_max %d nr_pages %d)\n", msg, | ||
2262 | msg->front_max, msg->nr_pages); | ||
2263 | print_hex_dump(KERN_DEBUG, "header: ", | ||
2264 | DUMP_PREFIX_OFFSET, 16, 1, | ||
2265 | &msg->hdr, sizeof(msg->hdr), true); | ||
2266 | print_hex_dump(KERN_DEBUG, " front: ", | ||
2267 | DUMP_PREFIX_OFFSET, 16, 1, | ||
2268 | msg->front.iov_base, msg->front.iov_len, true); | ||
2269 | if (msg->middle) | ||
2270 | print_hex_dump(KERN_DEBUG, "middle: ", | ||
2271 | DUMP_PREFIX_OFFSET, 16, 1, | ||
2272 | msg->middle->vec.iov_base, | ||
2273 | msg->middle->vec.iov_len, true); | ||
2274 | print_hex_dump(KERN_DEBUG, "footer: ", | ||
2275 | DUMP_PREFIX_OFFSET, 16, 1, | ||
2276 | &msg->footer, sizeof(msg->footer), true); | ||
2277 | } | ||
diff --git a/fs/ceph/messenger.h b/fs/ceph/messenger.h deleted file mode 100644 index 76fbc957bc13..000000000000 --- a/fs/ceph/messenger.h +++ /dev/null | |||
@@ -1,253 +0,0 @@ | |||
1 | #ifndef __FS_CEPH_MESSENGER_H | ||
2 | #define __FS_CEPH_MESSENGER_H | ||
3 | |||
4 | #include <linux/kref.h> | ||
5 | #include <linux/mutex.h> | ||
6 | #include <linux/net.h> | ||
7 | #include <linux/radix-tree.h> | ||
8 | #include <linux/uio.h> | ||
9 | #include <linux/version.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | |||
12 | #include "types.h" | ||
13 | #include "buffer.h" | ||
14 | |||
15 | struct ceph_msg; | ||
16 | struct ceph_connection; | ||
17 | |||
18 | extern struct workqueue_struct *ceph_msgr_wq; /* receive work queue */ | ||
19 | |||
20 | /* | ||
21 | * Ceph defines these callbacks for handling connection events. | ||
22 | */ | ||
23 | struct ceph_connection_operations { | ||
24 | struct ceph_connection *(*get)(struct ceph_connection *); | ||
25 | void (*put)(struct ceph_connection *); | ||
26 | |||
27 | /* handle an incoming message. */ | ||
28 | void (*dispatch) (struct ceph_connection *con, struct ceph_msg *m); | ||
29 | |||
30 | /* authorize an outgoing connection */ | ||
31 | int (*get_authorizer) (struct ceph_connection *con, | ||
32 | void **buf, int *len, int *proto, | ||
33 | void **reply_buf, int *reply_len, int force_new); | ||
34 | int (*verify_authorizer_reply) (struct ceph_connection *con, int len); | ||
35 | int (*invalidate_authorizer)(struct ceph_connection *con); | ||
36 | |||
37 | /* protocol version mismatch */ | ||
38 | void (*bad_proto) (struct ceph_connection *con); | ||
39 | |||
40 | /* there was some error on the socket (disconnect, whatever) */ | ||
41 | void (*fault) (struct ceph_connection *con); | ||
42 | |||
43 | /* a remote host as terminated a message exchange session, and messages | ||
44 | * we sent (or they tried to send us) may be lost. */ | ||
45 | void (*peer_reset) (struct ceph_connection *con); | ||
46 | |||
47 | struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, | ||
48 | struct ceph_msg_header *hdr, | ||
49 | int *skip); | ||
50 | }; | ||
51 | |||
52 | /* use format string %s%d */ | ||
53 | #define ENTITY_NAME(n) ceph_entity_type_name((n).type), le64_to_cpu((n).num) | ||
54 | |||
55 | struct ceph_messenger { | ||
56 | struct ceph_entity_inst inst; /* my name+address */ | ||
57 | struct ceph_entity_addr my_enc_addr; | ||
58 | struct page *zero_page; /* used in certain error cases */ | ||
59 | |||
60 | bool nocrc; | ||
61 | |||
62 | /* | ||
63 | * the global_seq counts connections i (attempt to) initiate | ||
64 | * in order to disambiguate certain connect race conditions. | ||
65 | */ | ||
66 | u32 global_seq; | ||
67 | spinlock_t global_seq_lock; | ||
68 | }; | ||
69 | |||
70 | /* | ||
71 | * a single message. it contains a header (src, dest, message type, etc.), | ||
72 | * footer (crc values, mainly), a "front" message body, and possibly a | ||
73 | * data payload (stored in some number of pages). | ||
74 | */ | ||
75 | struct ceph_msg { | ||
76 | struct ceph_msg_header hdr; /* header */ | ||
77 | struct ceph_msg_footer footer; /* footer */ | ||
78 | struct kvec front; /* unaligned blobs of message */ | ||
79 | struct ceph_buffer *middle; | ||
80 | struct page **pages; /* data payload. NOT OWNER. */ | ||
81 | unsigned nr_pages; /* size of page array */ | ||
82 | struct ceph_pagelist *pagelist; /* instead of pages */ | ||
83 | struct list_head list_head; | ||
84 | struct kref kref; | ||
85 | bool front_is_vmalloc; | ||
86 | bool more_to_follow; | ||
87 | bool needs_out_seq; | ||
88 | int front_max; | ||
89 | |||
90 | struct ceph_msgpool *pool; | ||
91 | }; | ||
92 | |||
93 | struct ceph_msg_pos { | ||
94 | int page, page_pos; /* which page; offset in page */ | ||
95 | int data_pos; /* offset in data payload */ | ||
96 | int did_page_crc; /* true if we've calculated crc for current page */ | ||
97 | }; | ||
98 | |||
99 | /* ceph connection fault delay defaults, for exponential backoff */ | ||
100 | #define BASE_DELAY_INTERVAL (HZ/2) | ||
101 | #define MAX_DELAY_INTERVAL (5 * 60 * HZ) | ||
102 | |||
103 | /* | ||
104 | * ceph_connection state bit flags | ||
105 | * | ||
106 | * QUEUED and BUSY are used together to ensure that only a single | ||
107 | * thread is currently opening, reading or writing data to the socket. | ||
108 | */ | ||
109 | #define LOSSYTX 0 /* we can close channel or drop messages on errors */ | ||
110 | #define CONNECTING 1 | ||
111 | #define NEGOTIATING 2 | ||
112 | #define KEEPALIVE_PENDING 3 | ||
113 | #define WRITE_PENDING 4 /* we have data ready to send */ | ||
114 | #define QUEUED 5 /* there is work queued on this connection */ | ||
115 | #define BUSY 6 /* work is being done */ | ||
116 | #define STANDBY 8 /* no outgoing messages, socket closed. we keep | ||
117 | * the ceph_connection around to maintain shared | ||
118 | * state with the peer. */ | ||
119 | #define CLOSED 10 /* we've closed the connection */ | ||
120 | #define SOCK_CLOSED 11 /* socket state changed to closed */ | ||
121 | #define OPENING 13 /* open connection w/ (possibly new) peer */ | ||
122 | #define DEAD 14 /* dead, about to kfree */ | ||
123 | |||
124 | /* | ||
125 | * A single connection with another host. | ||
126 | * | ||
127 | * We maintain a queue of outgoing messages, and some session state to | ||
128 | * ensure that we can preserve the lossless, ordered delivery of | ||
129 | * messages in the case of a TCP disconnect. | ||
130 | */ | ||
131 | struct ceph_connection { | ||
132 | void *private; | ||
133 | atomic_t nref; | ||
134 | |||
135 | const struct ceph_connection_operations *ops; | ||
136 | |||
137 | struct ceph_messenger *msgr; | ||
138 | struct socket *sock; | ||
139 | unsigned long state; /* connection state (see flags above) */ | ||
140 | const char *error_msg; /* error message, if any */ | ||
141 | |||
142 | struct ceph_entity_addr peer_addr; /* peer address */ | ||
143 | struct ceph_entity_name peer_name; /* peer name */ | ||
144 | struct ceph_entity_addr peer_addr_for_me; | ||
145 | unsigned peer_features; | ||
146 | u32 connect_seq; /* identify the most recent connection | ||
147 | attempt for this connection, client */ | ||
148 | u32 peer_global_seq; /* peer's global seq for this connection */ | ||
149 | |||
150 | int auth_retry; /* true if we need a newer authorizer */ | ||
151 | void *auth_reply_buf; /* where to put the authorizer reply */ | ||
152 | int auth_reply_buf_len; | ||
153 | |||
154 | struct mutex mutex; | ||
155 | |||
156 | /* out queue */ | ||
157 | struct list_head out_queue; | ||
158 | struct list_head out_sent; /* sending or sent but unacked */ | ||
159 | u64 out_seq; /* last message queued for send */ | ||
160 | bool out_keepalive_pending; | ||
161 | |||
162 | u64 in_seq, in_seq_acked; /* last message received, acked */ | ||
163 | |||
164 | /* connection negotiation temps */ | ||
165 | char in_banner[CEPH_BANNER_MAX_LEN]; | ||
166 | union { | ||
167 | struct { /* outgoing connection */ | ||
168 | struct ceph_msg_connect out_connect; | ||
169 | struct ceph_msg_connect_reply in_reply; | ||
170 | }; | ||
171 | struct { /* incoming */ | ||
172 | struct ceph_msg_connect in_connect; | ||
173 | struct ceph_msg_connect_reply out_reply; | ||
174 | }; | ||
175 | }; | ||
176 | struct ceph_entity_addr actual_peer_addr; | ||
177 | |||
178 | /* message out temps */ | ||
179 | struct ceph_msg *out_msg; /* sending message (== tail of | ||
180 | out_sent) */ | ||
181 | bool out_msg_done; | ||
182 | struct ceph_msg_pos out_msg_pos; | ||
183 | |||
184 | struct kvec out_kvec[8], /* sending header/footer data */ | ||
185 | *out_kvec_cur; | ||
186 | int out_kvec_left; /* kvec's left in out_kvec */ | ||
187 | int out_skip; /* skip this many bytes */ | ||
188 | int out_kvec_bytes; /* total bytes left */ | ||
189 | bool out_kvec_is_msg; /* kvec refers to out_msg */ | ||
190 | int out_more; /* there is more data after the kvecs */ | ||
191 | __le64 out_temp_ack; /* for writing an ack */ | ||
192 | |||
193 | /* message in temps */ | ||
194 | struct ceph_msg_header in_hdr; | ||
195 | struct ceph_msg *in_msg; | ||
196 | struct ceph_msg_pos in_msg_pos; | ||
197 | u32 in_front_crc, in_middle_crc, in_data_crc; /* calculated crc */ | ||
198 | |||
199 | char in_tag; /* protocol control byte */ | ||
200 | int in_base_pos; /* bytes read */ | ||
201 | __le64 in_temp_ack; /* for reading an ack */ | ||
202 | |||
203 | struct delayed_work work; /* send|recv work */ | ||
204 | unsigned long delay; /* current delay interval */ | ||
205 | }; | ||
206 | |||
207 | |||
208 | extern const char *pr_addr(const struct sockaddr_storage *ss); | ||
209 | extern int ceph_parse_ips(const char *c, const char *end, | ||
210 | struct ceph_entity_addr *addr, | ||
211 | int max_count, int *count); | ||
212 | |||
213 | |||
214 | extern int ceph_msgr_init(void); | ||
215 | extern void ceph_msgr_exit(void); | ||
216 | extern void ceph_msgr_flush(void); | ||
217 | |||
218 | extern struct ceph_messenger *ceph_messenger_create( | ||
219 | struct ceph_entity_addr *myaddr); | ||
220 | extern void ceph_messenger_destroy(struct ceph_messenger *); | ||
221 | |||
222 | extern void ceph_con_init(struct ceph_messenger *msgr, | ||
223 | struct ceph_connection *con); | ||
224 | extern void ceph_con_open(struct ceph_connection *con, | ||
225 | struct ceph_entity_addr *addr); | ||
226 | extern bool ceph_con_opened(struct ceph_connection *con); | ||
227 | extern void ceph_con_close(struct ceph_connection *con); | ||
228 | extern void ceph_con_send(struct ceph_connection *con, struct ceph_msg *msg); | ||
229 | extern void ceph_con_revoke(struct ceph_connection *con, struct ceph_msg *msg); | ||
230 | extern void ceph_con_revoke_message(struct ceph_connection *con, | ||
231 | struct ceph_msg *msg); | ||
232 | extern void ceph_con_keepalive(struct ceph_connection *con); | ||
233 | extern struct ceph_connection *ceph_con_get(struct ceph_connection *con); | ||
234 | extern void ceph_con_put(struct ceph_connection *con); | ||
235 | |||
236 | extern struct ceph_msg *ceph_msg_new(int type, int front_len, gfp_t flags); | ||
237 | extern void ceph_msg_kfree(struct ceph_msg *m); | ||
238 | |||
239 | |||
240 | static inline struct ceph_msg *ceph_msg_get(struct ceph_msg *msg) | ||
241 | { | ||
242 | kref_get(&msg->kref); | ||
243 | return msg; | ||
244 | } | ||
245 | extern void ceph_msg_last_put(struct kref *kref); | ||
246 | static inline void ceph_msg_put(struct ceph_msg *msg) | ||
247 | { | ||
248 | kref_put(&msg->kref, ceph_msg_last_put); | ||
249 | } | ||
250 | |||
251 | extern void ceph_msg_dump(struct ceph_msg *msg); | ||
252 | |||
253 | #endif | ||
diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c deleted file mode 100644 index b2a5a3e4a671..000000000000 --- a/fs/ceph/mon_client.c +++ /dev/null | |||
@@ -1,1018 +0,0 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/types.h> | ||
4 | #include <linux/slab.h> | ||
5 | #include <linux/random.h> | ||
6 | #include <linux/sched.h> | ||
7 | |||
8 | #include "mon_client.h" | ||
9 | #include "super.h" | ||
10 | #include "auth.h" | ||
11 | #include "decode.h" | ||
12 | |||
13 | /* | ||
14 | * Interact with Ceph monitor cluster. Handle requests for new map | ||
15 | * versions, and periodically resend as needed. Also implement | ||
16 | * statfs() and umount(). | ||
17 | * | ||
18 | * A small cluster of Ceph "monitors" are responsible for managing critical | ||
19 | * cluster configuration and state information. An odd number (e.g., 3, 5) | ||
20 | * of cmon daemons use a modified version of the Paxos part-time parliament | ||
21 | * algorithm to manage the MDS map (mds cluster membership), OSD map, and | ||
22 | * list of clients who have mounted the file system. | ||
23 | * | ||
24 | * We maintain an open, active session with a monitor at all times in order to | ||
25 | * receive timely MDSMap updates. We periodically send a keepalive byte on the | ||
26 | * TCP socket to ensure we detect a failure. If the connection does break, we | ||
27 | * randomly hunt for a new monitor. Once the connection is reestablished, we | ||
28 | * resend any outstanding requests. | ||
29 | */ | ||
30 | |||
31 | static const struct ceph_connection_operations mon_con_ops; | ||
32 | |||
33 | static int __validate_auth(struct ceph_mon_client *monc); | ||
34 | |||
35 | /* | ||
36 | * Decode a monmap blob (e.g., during mount). | ||
37 | */ | ||
38 | struct ceph_monmap *ceph_monmap_decode(void *p, void *end) | ||
39 | { | ||
40 | struct ceph_monmap *m = NULL; | ||
41 | int i, err = -EINVAL; | ||
42 | struct ceph_fsid fsid; | ||
43 | u32 epoch, num_mon; | ||
44 | u16 version; | ||
45 | u32 len; | ||
46 | |||
47 | ceph_decode_32_safe(&p, end, len, bad); | ||
48 | ceph_decode_need(&p, end, len, bad); | ||
49 | |||
50 | dout("monmap_decode %p %p len %d\n", p, end, (int)(end-p)); | ||
51 | |||
52 | ceph_decode_16_safe(&p, end, version, bad); | ||
53 | |||
54 | ceph_decode_need(&p, end, sizeof(fsid) + 2*sizeof(u32), bad); | ||
55 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | ||
56 | epoch = ceph_decode_32(&p); | ||
57 | |||
58 | num_mon = ceph_decode_32(&p); | ||
59 | ceph_decode_need(&p, end, num_mon*sizeof(m->mon_inst[0]), bad); | ||
60 | |||
61 | if (num_mon >= CEPH_MAX_MON) | ||
62 | goto bad; | ||
63 | m = kmalloc(sizeof(*m) + sizeof(m->mon_inst[0])*num_mon, GFP_NOFS); | ||
64 | if (m == NULL) | ||
65 | return ERR_PTR(-ENOMEM); | ||
66 | m->fsid = fsid; | ||
67 | m->epoch = epoch; | ||
68 | m->num_mon = num_mon; | ||
69 | ceph_decode_copy(&p, m->mon_inst, num_mon*sizeof(m->mon_inst[0])); | ||
70 | for (i = 0; i < num_mon; i++) | ||
71 | ceph_decode_addr(&m->mon_inst[i].addr); | ||
72 | |||
73 | dout("monmap_decode epoch %d, num_mon %d\n", m->epoch, | ||
74 | m->num_mon); | ||
75 | for (i = 0; i < m->num_mon; i++) | ||
76 | dout("monmap_decode mon%d is %s\n", i, | ||
77 | pr_addr(&m->mon_inst[i].addr.in_addr)); | ||
78 | return m; | ||
79 | |||
80 | bad: | ||
81 | dout("monmap_decode failed with %d\n", err); | ||
82 | kfree(m); | ||
83 | return ERR_PTR(err); | ||
84 | } | ||
85 | |||
86 | /* | ||
87 | * return true if *addr is included in the monmap. | ||
88 | */ | ||
89 | int ceph_monmap_contains(struct ceph_monmap *m, struct ceph_entity_addr *addr) | ||
90 | { | ||
91 | int i; | ||
92 | |||
93 | for (i = 0; i < m->num_mon; i++) | ||
94 | if (memcmp(addr, &m->mon_inst[i].addr, sizeof(*addr)) == 0) | ||
95 | return 1; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | /* | ||
100 | * Send an auth request. | ||
101 | */ | ||
102 | static void __send_prepared_auth_request(struct ceph_mon_client *monc, int len) | ||
103 | { | ||
104 | monc->pending_auth = 1; | ||
105 | monc->m_auth->front.iov_len = len; | ||
106 | monc->m_auth->hdr.front_len = cpu_to_le32(len); | ||
107 | ceph_con_revoke(monc->con, monc->m_auth); | ||
108 | ceph_msg_get(monc->m_auth); /* keep our ref */ | ||
109 | ceph_con_send(monc->con, monc->m_auth); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Close monitor session, if any. | ||
114 | */ | ||
115 | static void __close_session(struct ceph_mon_client *monc) | ||
116 | { | ||
117 | if (monc->con) { | ||
118 | dout("__close_session closing mon%d\n", monc->cur_mon); | ||
119 | ceph_con_revoke(monc->con, monc->m_auth); | ||
120 | ceph_con_close(monc->con); | ||
121 | monc->cur_mon = -1; | ||
122 | monc->pending_auth = 0; | ||
123 | ceph_auth_reset(monc->auth); | ||
124 | } | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Open a session with a (new) monitor. | ||
129 | */ | ||
130 | static int __open_session(struct ceph_mon_client *monc) | ||
131 | { | ||
132 | char r; | ||
133 | int ret; | ||
134 | |||
135 | if (monc->cur_mon < 0) { | ||
136 | get_random_bytes(&r, 1); | ||
137 | monc->cur_mon = r % monc->monmap->num_mon; | ||
138 | dout("open_session num=%d r=%d -> mon%d\n", | ||
139 | monc->monmap->num_mon, r, monc->cur_mon); | ||
140 | monc->sub_sent = 0; | ||
141 | monc->sub_renew_after = jiffies; /* i.e., expired */ | ||
142 | monc->want_next_osdmap = !!monc->want_next_osdmap; | ||
143 | |||
144 | dout("open_session mon%d opening\n", monc->cur_mon); | ||
145 | monc->con->peer_name.type = CEPH_ENTITY_TYPE_MON; | ||
146 | monc->con->peer_name.num = cpu_to_le64(monc->cur_mon); | ||
147 | ceph_con_open(monc->con, | ||
148 | &monc->monmap->mon_inst[monc->cur_mon].addr); | ||
149 | |||
150 | /* initiatiate authentication handshake */ | ||
151 | ret = ceph_auth_build_hello(monc->auth, | ||
152 | monc->m_auth->front.iov_base, | ||
153 | monc->m_auth->front_max); | ||
154 | __send_prepared_auth_request(monc, ret); | ||
155 | } else { | ||
156 | dout("open_session mon%d already open\n", monc->cur_mon); | ||
157 | } | ||
158 | return 0; | ||
159 | } | ||
160 | |||
161 | static bool __sub_expired(struct ceph_mon_client *monc) | ||
162 | { | ||
163 | return time_after_eq(jiffies, monc->sub_renew_after); | ||
164 | } | ||
165 | |||
166 | /* | ||
167 | * Reschedule delayed work timer. | ||
168 | */ | ||
169 | static void __schedule_delayed(struct ceph_mon_client *monc) | ||
170 | { | ||
171 | unsigned delay; | ||
172 | |||
173 | if (monc->cur_mon < 0 || __sub_expired(monc)) | ||
174 | delay = 10 * HZ; | ||
175 | else | ||
176 | delay = 20 * HZ; | ||
177 | dout("__schedule_delayed after %u\n", delay); | ||
178 | schedule_delayed_work(&monc->delayed_work, delay); | ||
179 | } | ||
180 | |||
181 | /* | ||
182 | * Send subscribe request for mdsmap and/or osdmap. | ||
183 | */ | ||
184 | static void __send_subscribe(struct ceph_mon_client *monc) | ||
185 | { | ||
186 | dout("__send_subscribe sub_sent=%u exp=%u want_osd=%d\n", | ||
187 | (unsigned)monc->sub_sent, __sub_expired(monc), | ||
188 | monc->want_next_osdmap); | ||
189 | if ((__sub_expired(monc) && !monc->sub_sent) || | ||
190 | monc->want_next_osdmap == 1) { | ||
191 | struct ceph_msg *msg = monc->m_subscribe; | ||
192 | struct ceph_mon_subscribe_item *i; | ||
193 | void *p, *end; | ||
194 | |||
195 | p = msg->front.iov_base; | ||
196 | end = p + msg->front_max; | ||
197 | |||
198 | dout("__send_subscribe to 'mdsmap' %u+\n", | ||
199 | (unsigned)monc->have_mdsmap); | ||
200 | if (monc->want_next_osdmap) { | ||
201 | dout("__send_subscribe to 'osdmap' %u\n", | ||
202 | (unsigned)monc->have_osdmap); | ||
203 | ceph_encode_32(&p, 3); | ||
204 | ceph_encode_string(&p, end, "osdmap", 6); | ||
205 | i = p; | ||
206 | i->have = cpu_to_le64(monc->have_osdmap); | ||
207 | i->onetime = 1; | ||
208 | p += sizeof(*i); | ||
209 | monc->want_next_osdmap = 2; /* requested */ | ||
210 | } else { | ||
211 | ceph_encode_32(&p, 2); | ||
212 | } | ||
213 | ceph_encode_string(&p, end, "mdsmap", 6); | ||
214 | i = p; | ||
215 | i->have = cpu_to_le64(monc->have_mdsmap); | ||
216 | i->onetime = 0; | ||
217 | p += sizeof(*i); | ||
218 | ceph_encode_string(&p, end, "monmap", 6); | ||
219 | i = p; | ||
220 | i->have = 0; | ||
221 | i->onetime = 0; | ||
222 | p += sizeof(*i); | ||
223 | |||
224 | msg->front.iov_len = p - msg->front.iov_base; | ||
225 | msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); | ||
226 | ceph_con_revoke(monc->con, msg); | ||
227 | ceph_con_send(monc->con, ceph_msg_get(msg)); | ||
228 | |||
229 | monc->sub_sent = jiffies | 1; /* never 0 */ | ||
230 | } | ||
231 | } | ||
232 | |||
233 | static void handle_subscribe_ack(struct ceph_mon_client *monc, | ||
234 | struct ceph_msg *msg) | ||
235 | { | ||
236 | unsigned seconds; | ||
237 | struct ceph_mon_subscribe_ack *h = msg->front.iov_base; | ||
238 | |||
239 | if (msg->front.iov_len < sizeof(*h)) | ||
240 | goto bad; | ||
241 | seconds = le32_to_cpu(h->duration); | ||
242 | |||
243 | mutex_lock(&monc->mutex); | ||
244 | if (monc->hunting) { | ||
245 | pr_info("mon%d %s session established\n", | ||
246 | monc->cur_mon, pr_addr(&monc->con->peer_addr.in_addr)); | ||
247 | monc->hunting = false; | ||
248 | } | ||
249 | dout("handle_subscribe_ack after %d seconds\n", seconds); | ||
250 | monc->sub_renew_after = monc->sub_sent + (seconds >> 1)*HZ - 1; | ||
251 | monc->sub_sent = 0; | ||
252 | mutex_unlock(&monc->mutex); | ||
253 | return; | ||
254 | bad: | ||
255 | pr_err("got corrupt subscribe-ack msg\n"); | ||
256 | ceph_msg_dump(msg); | ||
257 | } | ||
258 | |||
259 | /* | ||
260 | * Keep track of which maps we have | ||
261 | */ | ||
262 | int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 got) | ||
263 | { | ||
264 | mutex_lock(&monc->mutex); | ||
265 | monc->have_mdsmap = got; | ||
266 | mutex_unlock(&monc->mutex); | ||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 got) | ||
271 | { | ||
272 | mutex_lock(&monc->mutex); | ||
273 | monc->have_osdmap = got; | ||
274 | monc->want_next_osdmap = 0; | ||
275 | mutex_unlock(&monc->mutex); | ||
276 | return 0; | ||
277 | } | ||
278 | |||
279 | /* | ||
280 | * Register interest in the next osdmap | ||
281 | */ | ||
282 | void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc) | ||
283 | { | ||
284 | dout("request_next_osdmap have %u\n", monc->have_osdmap); | ||
285 | mutex_lock(&monc->mutex); | ||
286 | if (!monc->want_next_osdmap) | ||
287 | monc->want_next_osdmap = 1; | ||
288 | if (monc->want_next_osdmap < 2) | ||
289 | __send_subscribe(monc); | ||
290 | mutex_unlock(&monc->mutex); | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * | ||
295 | */ | ||
296 | int ceph_monc_open_session(struct ceph_mon_client *monc) | ||
297 | { | ||
298 | if (!monc->con) { | ||
299 | monc->con = kmalloc(sizeof(*monc->con), GFP_KERNEL); | ||
300 | if (!monc->con) | ||
301 | return -ENOMEM; | ||
302 | ceph_con_init(monc->client->msgr, monc->con); | ||
303 | monc->con->private = monc; | ||
304 | monc->con->ops = &mon_con_ops; | ||
305 | } | ||
306 | |||
307 | mutex_lock(&monc->mutex); | ||
308 | __open_session(monc); | ||
309 | __schedule_delayed(monc); | ||
310 | mutex_unlock(&monc->mutex); | ||
311 | return 0; | ||
312 | } | ||
313 | |||
314 | /* | ||
315 | * The monitor responds with mount ack indicate mount success. The | ||
316 | * included client ticket allows the client to talk to MDSs and OSDs. | ||
317 | */ | ||
318 | static void ceph_monc_handle_map(struct ceph_mon_client *monc, | ||
319 | struct ceph_msg *msg) | ||
320 | { | ||
321 | struct ceph_client *client = monc->client; | ||
322 | struct ceph_monmap *monmap = NULL, *old = monc->monmap; | ||
323 | void *p, *end; | ||
324 | |||
325 | mutex_lock(&monc->mutex); | ||
326 | |||
327 | dout("handle_monmap\n"); | ||
328 | p = msg->front.iov_base; | ||
329 | end = p + msg->front.iov_len; | ||
330 | |||
331 | monmap = ceph_monmap_decode(p, end); | ||
332 | if (IS_ERR(monmap)) { | ||
333 | pr_err("problem decoding monmap, %d\n", | ||
334 | (int)PTR_ERR(monmap)); | ||
335 | goto out; | ||
336 | } | ||
337 | |||
338 | if (ceph_check_fsid(monc->client, &monmap->fsid) < 0) { | ||
339 | kfree(monmap); | ||
340 | goto out; | ||
341 | } | ||
342 | |||
343 | client->monc.monmap = monmap; | ||
344 | kfree(old); | ||
345 | |||
346 | out: | ||
347 | mutex_unlock(&monc->mutex); | ||
348 | wake_up_all(&client->auth_wq); | ||
349 | } | ||
350 | |||
351 | /* | ||
352 | * generic requests (e.g., statfs, poolop) | ||
353 | */ | ||
354 | static struct ceph_mon_generic_request *__lookup_generic_req( | ||
355 | struct ceph_mon_client *monc, u64 tid) | ||
356 | { | ||
357 | struct ceph_mon_generic_request *req; | ||
358 | struct rb_node *n = monc->generic_request_tree.rb_node; | ||
359 | |||
360 | while (n) { | ||
361 | req = rb_entry(n, struct ceph_mon_generic_request, node); | ||
362 | if (tid < req->tid) | ||
363 | n = n->rb_left; | ||
364 | else if (tid > req->tid) | ||
365 | n = n->rb_right; | ||
366 | else | ||
367 | return req; | ||
368 | } | ||
369 | return NULL; | ||
370 | } | ||
371 | |||
372 | static void __insert_generic_request(struct ceph_mon_client *monc, | ||
373 | struct ceph_mon_generic_request *new) | ||
374 | { | ||
375 | struct rb_node **p = &monc->generic_request_tree.rb_node; | ||
376 | struct rb_node *parent = NULL; | ||
377 | struct ceph_mon_generic_request *req = NULL; | ||
378 | |||
379 | while (*p) { | ||
380 | parent = *p; | ||
381 | req = rb_entry(parent, struct ceph_mon_generic_request, node); | ||
382 | if (new->tid < req->tid) | ||
383 | p = &(*p)->rb_left; | ||
384 | else if (new->tid > req->tid) | ||
385 | p = &(*p)->rb_right; | ||
386 | else | ||
387 | BUG(); | ||
388 | } | ||
389 | |||
390 | rb_link_node(&new->node, parent, p); | ||
391 | rb_insert_color(&new->node, &monc->generic_request_tree); | ||
392 | } | ||
393 | |||
394 | static void release_generic_request(struct kref *kref) | ||
395 | { | ||
396 | struct ceph_mon_generic_request *req = | ||
397 | container_of(kref, struct ceph_mon_generic_request, kref); | ||
398 | |||
399 | if (req->reply) | ||
400 | ceph_msg_put(req->reply); | ||
401 | if (req->request) | ||
402 | ceph_msg_put(req->request); | ||
403 | |||
404 | kfree(req); | ||
405 | } | ||
406 | |||
407 | static void put_generic_request(struct ceph_mon_generic_request *req) | ||
408 | { | ||
409 | kref_put(&req->kref, release_generic_request); | ||
410 | } | ||
411 | |||
412 | static void get_generic_request(struct ceph_mon_generic_request *req) | ||
413 | { | ||
414 | kref_get(&req->kref); | ||
415 | } | ||
416 | |||
417 | static struct ceph_msg *get_generic_reply(struct ceph_connection *con, | ||
418 | struct ceph_msg_header *hdr, | ||
419 | int *skip) | ||
420 | { | ||
421 | struct ceph_mon_client *monc = con->private; | ||
422 | struct ceph_mon_generic_request *req; | ||
423 | u64 tid = le64_to_cpu(hdr->tid); | ||
424 | struct ceph_msg *m; | ||
425 | |||
426 | mutex_lock(&monc->mutex); | ||
427 | req = __lookup_generic_req(monc, tid); | ||
428 | if (!req) { | ||
429 | dout("get_generic_reply %lld dne\n", tid); | ||
430 | *skip = 1; | ||
431 | m = NULL; | ||
432 | } else { | ||
433 | dout("get_generic_reply %lld got %p\n", tid, req->reply); | ||
434 | m = ceph_msg_get(req->reply); | ||
435 | /* | ||
436 | * we don't need to track the connection reading into | ||
437 | * this reply because we only have one open connection | ||
438 | * at a time, ever. | ||
439 | */ | ||
440 | } | ||
441 | mutex_unlock(&monc->mutex); | ||
442 | return m; | ||
443 | } | ||
444 | |||
445 | static int do_generic_request(struct ceph_mon_client *monc, | ||
446 | struct ceph_mon_generic_request *req) | ||
447 | { | ||
448 | int err; | ||
449 | |||
450 | /* register request */ | ||
451 | mutex_lock(&monc->mutex); | ||
452 | req->tid = ++monc->last_tid; | ||
453 | req->request->hdr.tid = cpu_to_le64(req->tid); | ||
454 | __insert_generic_request(monc, req); | ||
455 | monc->num_generic_requests++; | ||
456 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
457 | mutex_unlock(&monc->mutex); | ||
458 | |||
459 | err = wait_for_completion_interruptible(&req->completion); | ||
460 | |||
461 | mutex_lock(&monc->mutex); | ||
462 | rb_erase(&req->node, &monc->generic_request_tree); | ||
463 | monc->num_generic_requests--; | ||
464 | mutex_unlock(&monc->mutex); | ||
465 | |||
466 | if (!err) | ||
467 | err = req->result; | ||
468 | return err; | ||
469 | } | ||
470 | |||
471 | /* | ||
472 | * statfs | ||
473 | */ | ||
474 | static void handle_statfs_reply(struct ceph_mon_client *monc, | ||
475 | struct ceph_msg *msg) | ||
476 | { | ||
477 | struct ceph_mon_generic_request *req; | ||
478 | struct ceph_mon_statfs_reply *reply = msg->front.iov_base; | ||
479 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
480 | |||
481 | if (msg->front.iov_len != sizeof(*reply)) | ||
482 | goto bad; | ||
483 | dout("handle_statfs_reply %p tid %llu\n", msg, tid); | ||
484 | |||
485 | mutex_lock(&monc->mutex); | ||
486 | req = __lookup_generic_req(monc, tid); | ||
487 | if (req) { | ||
488 | *(struct ceph_statfs *)req->buf = reply->st; | ||
489 | req->result = 0; | ||
490 | get_generic_request(req); | ||
491 | } | ||
492 | mutex_unlock(&monc->mutex); | ||
493 | if (req) { | ||
494 | complete_all(&req->completion); | ||
495 | put_generic_request(req); | ||
496 | } | ||
497 | return; | ||
498 | |||
499 | bad: | ||
500 | pr_err("corrupt generic reply, tid %llu\n", tid); | ||
501 | ceph_msg_dump(msg); | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Do a synchronous statfs(). | ||
506 | */ | ||
507 | int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) | ||
508 | { | ||
509 | struct ceph_mon_generic_request *req; | ||
510 | struct ceph_mon_statfs *h; | ||
511 | int err; | ||
512 | |||
513 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
514 | if (!req) | ||
515 | return -ENOMEM; | ||
516 | |||
517 | kref_init(&req->kref); | ||
518 | req->buf = buf; | ||
519 | req->buf_len = sizeof(*buf); | ||
520 | init_completion(&req->completion); | ||
521 | |||
522 | err = -ENOMEM; | ||
523 | req->request = ceph_msg_new(CEPH_MSG_STATFS, sizeof(*h), GFP_NOFS); | ||
524 | if (!req->request) | ||
525 | goto out; | ||
526 | req->reply = ceph_msg_new(CEPH_MSG_STATFS_REPLY, 1024, GFP_NOFS); | ||
527 | if (!req->reply) | ||
528 | goto out; | ||
529 | |||
530 | /* fill out request */ | ||
531 | h = req->request->front.iov_base; | ||
532 | h->monhdr.have_version = 0; | ||
533 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
534 | h->monhdr.session_mon_tid = 0; | ||
535 | h->fsid = monc->monmap->fsid; | ||
536 | |||
537 | err = do_generic_request(monc, req); | ||
538 | |||
539 | out: | ||
540 | kref_put(&req->kref, release_generic_request); | ||
541 | return err; | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * pool ops | ||
546 | */ | ||
547 | static int get_poolop_reply_buf(const char *src, size_t src_len, | ||
548 | char *dst, size_t dst_len) | ||
549 | { | ||
550 | u32 buf_len; | ||
551 | |||
552 | if (src_len != sizeof(u32) + dst_len) | ||
553 | return -EINVAL; | ||
554 | |||
555 | buf_len = le32_to_cpu(*(u32 *)src); | ||
556 | if (buf_len != dst_len) | ||
557 | return -EINVAL; | ||
558 | |||
559 | memcpy(dst, src + sizeof(u32), dst_len); | ||
560 | return 0; | ||
561 | } | ||
562 | |||
563 | static void handle_poolop_reply(struct ceph_mon_client *monc, | ||
564 | struct ceph_msg *msg) | ||
565 | { | ||
566 | struct ceph_mon_generic_request *req; | ||
567 | struct ceph_mon_poolop_reply *reply = msg->front.iov_base; | ||
568 | u64 tid = le64_to_cpu(msg->hdr.tid); | ||
569 | |||
570 | if (msg->front.iov_len < sizeof(*reply)) | ||
571 | goto bad; | ||
572 | dout("handle_poolop_reply %p tid %llu\n", msg, tid); | ||
573 | |||
574 | mutex_lock(&monc->mutex); | ||
575 | req = __lookup_generic_req(monc, tid); | ||
576 | if (req) { | ||
577 | if (req->buf_len && | ||
578 | get_poolop_reply_buf(msg->front.iov_base + sizeof(*reply), | ||
579 | msg->front.iov_len - sizeof(*reply), | ||
580 | req->buf, req->buf_len) < 0) { | ||
581 | mutex_unlock(&monc->mutex); | ||
582 | goto bad; | ||
583 | } | ||
584 | req->result = le32_to_cpu(reply->reply_code); | ||
585 | get_generic_request(req); | ||
586 | } | ||
587 | mutex_unlock(&monc->mutex); | ||
588 | if (req) { | ||
589 | complete(&req->completion); | ||
590 | put_generic_request(req); | ||
591 | } | ||
592 | return; | ||
593 | |||
594 | bad: | ||
595 | pr_err("corrupt generic reply, tid %llu\n", tid); | ||
596 | ceph_msg_dump(msg); | ||
597 | } | ||
598 | |||
599 | /* | ||
600 | * Do a synchronous pool op. | ||
601 | */ | ||
602 | int ceph_monc_do_poolop(struct ceph_mon_client *monc, u32 op, | ||
603 | u32 pool, u64 snapid, | ||
604 | char *buf, int len) | ||
605 | { | ||
606 | struct ceph_mon_generic_request *req; | ||
607 | struct ceph_mon_poolop *h; | ||
608 | int err; | ||
609 | |||
610 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
611 | if (!req) | ||
612 | return -ENOMEM; | ||
613 | |||
614 | kref_init(&req->kref); | ||
615 | req->buf = buf; | ||
616 | req->buf_len = len; | ||
617 | init_completion(&req->completion); | ||
618 | |||
619 | err = -ENOMEM; | ||
620 | req->request = ceph_msg_new(CEPH_MSG_POOLOP, sizeof(*h), GFP_NOFS); | ||
621 | if (!req->request) | ||
622 | goto out; | ||
623 | req->reply = ceph_msg_new(CEPH_MSG_POOLOP_REPLY, 1024, GFP_NOFS); | ||
624 | if (!req->reply) | ||
625 | goto out; | ||
626 | |||
627 | /* fill out request */ | ||
628 | req->request->hdr.version = cpu_to_le16(2); | ||
629 | h = req->request->front.iov_base; | ||
630 | h->monhdr.have_version = 0; | ||
631 | h->monhdr.session_mon = cpu_to_le16(-1); | ||
632 | h->monhdr.session_mon_tid = 0; | ||
633 | h->fsid = monc->monmap->fsid; | ||
634 | h->pool = cpu_to_le32(pool); | ||
635 | h->op = cpu_to_le32(op); | ||
636 | h->auid = 0; | ||
637 | h->snapid = cpu_to_le64(snapid); | ||
638 | h->name_len = 0; | ||
639 | |||
640 | err = do_generic_request(monc, req); | ||
641 | |||
642 | out: | ||
643 | kref_put(&req->kref, release_generic_request); | ||
644 | return err; | ||
645 | } | ||
646 | |||
647 | int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
648 | u32 pool, u64 *snapid) | ||
649 | { | ||
650 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
651 | pool, 0, (char *)snapid, sizeof(*snapid)); | ||
652 | |||
653 | } | ||
654 | |||
655 | int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
656 | u32 pool, u64 snapid) | ||
657 | { | ||
658 | return ceph_monc_do_poolop(monc, POOL_OP_CREATE_UNMANAGED_SNAP, | ||
659 | pool, snapid, 0, 0); | ||
660 | |||
661 | } | ||
662 | |||
663 | /* | ||
664 | * Resend pending generic requests. | ||
665 | */ | ||
666 | static void __resend_generic_request(struct ceph_mon_client *monc) | ||
667 | { | ||
668 | struct ceph_mon_generic_request *req; | ||
669 | struct rb_node *p; | ||
670 | |||
671 | for (p = rb_first(&monc->generic_request_tree); p; p = rb_next(p)) { | ||
672 | req = rb_entry(p, struct ceph_mon_generic_request, node); | ||
673 | ceph_con_revoke(monc->con, req->request); | ||
674 | ceph_con_send(monc->con, ceph_msg_get(req->request)); | ||
675 | } | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * Delayed work. If we haven't mounted yet, retry. Otherwise, | ||
680 | * renew/retry subscription as needed (in case it is timing out, or we | ||
681 | * got an ENOMEM). And keep the monitor connection alive. | ||
682 | */ | ||
683 | static void delayed_work(struct work_struct *work) | ||
684 | { | ||
685 | struct ceph_mon_client *monc = | ||
686 | container_of(work, struct ceph_mon_client, delayed_work.work); | ||
687 | |||
688 | dout("monc delayed_work\n"); | ||
689 | mutex_lock(&monc->mutex); | ||
690 | if (monc->hunting) { | ||
691 | __close_session(monc); | ||
692 | __open_session(monc); /* continue hunting */ | ||
693 | } else { | ||
694 | ceph_con_keepalive(monc->con); | ||
695 | |||
696 | __validate_auth(monc); | ||
697 | |||
698 | if (monc->auth->ops->is_authenticated(monc->auth)) | ||
699 | __send_subscribe(monc); | ||
700 | } | ||
701 | __schedule_delayed(monc); | ||
702 | mutex_unlock(&monc->mutex); | ||
703 | } | ||
704 | |||
705 | /* | ||
706 | * On startup, we build a temporary monmap populated with the IPs | ||
707 | * provided by mount(2). | ||
708 | */ | ||
709 | static int build_initial_monmap(struct ceph_mon_client *monc) | ||
710 | { | ||
711 | struct ceph_mount_args *args = monc->client->mount_args; | ||
712 | struct ceph_entity_addr *mon_addr = args->mon_addr; | ||
713 | int num_mon = args->num_mon; | ||
714 | int i; | ||
715 | |||
716 | /* build initial monmap */ | ||
717 | monc->monmap = kzalloc(sizeof(*monc->monmap) + | ||
718 | num_mon*sizeof(monc->monmap->mon_inst[0]), | ||
719 | GFP_KERNEL); | ||
720 | if (!monc->monmap) | ||
721 | return -ENOMEM; | ||
722 | for (i = 0; i < num_mon; i++) { | ||
723 | monc->monmap->mon_inst[i].addr = mon_addr[i]; | ||
724 | monc->monmap->mon_inst[i].addr.nonce = 0; | ||
725 | monc->monmap->mon_inst[i].name.type = | ||
726 | CEPH_ENTITY_TYPE_MON; | ||
727 | monc->monmap->mon_inst[i].name.num = cpu_to_le64(i); | ||
728 | } | ||
729 | monc->monmap->num_mon = num_mon; | ||
730 | monc->have_fsid = false; | ||
731 | |||
732 | /* release addr memory */ | ||
733 | kfree(args->mon_addr); | ||
734 | args->mon_addr = NULL; | ||
735 | args->num_mon = 0; | ||
736 | return 0; | ||
737 | } | ||
738 | |||
739 | int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl) | ||
740 | { | ||
741 | int err = 0; | ||
742 | |||
743 | dout("init\n"); | ||
744 | memset(monc, 0, sizeof(*monc)); | ||
745 | monc->client = cl; | ||
746 | monc->monmap = NULL; | ||
747 | mutex_init(&monc->mutex); | ||
748 | |||
749 | err = build_initial_monmap(monc); | ||
750 | if (err) | ||
751 | goto out; | ||
752 | |||
753 | monc->con = NULL; | ||
754 | |||
755 | /* authentication */ | ||
756 | monc->auth = ceph_auth_init(cl->mount_args->name, | ||
757 | cl->mount_args->secret); | ||
758 | if (IS_ERR(monc->auth)) | ||
759 | return PTR_ERR(monc->auth); | ||
760 | monc->auth->want_keys = | ||
761 | CEPH_ENTITY_TYPE_AUTH | CEPH_ENTITY_TYPE_MON | | ||
762 | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MDS; | ||
763 | |||
764 | /* msgs */ | ||
765 | err = -ENOMEM; | ||
766 | monc->m_subscribe_ack = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE_ACK, | ||
767 | sizeof(struct ceph_mon_subscribe_ack), | ||
768 | GFP_NOFS); | ||
769 | if (!monc->m_subscribe_ack) | ||
770 | goto out_monmap; | ||
771 | |||
772 | monc->m_subscribe = ceph_msg_new(CEPH_MSG_MON_SUBSCRIBE, 96, GFP_NOFS); | ||
773 | if (!monc->m_subscribe) | ||
774 | goto out_subscribe_ack; | ||
775 | |||
776 | monc->m_auth_reply = ceph_msg_new(CEPH_MSG_AUTH_REPLY, 4096, GFP_NOFS); | ||
777 | if (!monc->m_auth_reply) | ||
778 | goto out_subscribe; | ||
779 | |||
780 | monc->m_auth = ceph_msg_new(CEPH_MSG_AUTH, 4096, GFP_NOFS); | ||
781 | monc->pending_auth = 0; | ||
782 | if (!monc->m_auth) | ||
783 | goto out_auth_reply; | ||
784 | |||
785 | monc->cur_mon = -1; | ||
786 | monc->hunting = true; | ||
787 | monc->sub_renew_after = jiffies; | ||
788 | monc->sub_sent = 0; | ||
789 | |||
790 | INIT_DELAYED_WORK(&monc->delayed_work, delayed_work); | ||
791 | monc->generic_request_tree = RB_ROOT; | ||
792 | monc->num_generic_requests = 0; | ||
793 | monc->last_tid = 0; | ||
794 | |||
795 | monc->have_mdsmap = 0; | ||
796 | monc->have_osdmap = 0; | ||
797 | monc->want_next_osdmap = 1; | ||
798 | return 0; | ||
799 | |||
800 | out_auth_reply: | ||
801 | ceph_msg_put(monc->m_auth_reply); | ||
802 | out_subscribe: | ||
803 | ceph_msg_put(monc->m_subscribe); | ||
804 | out_subscribe_ack: | ||
805 | ceph_msg_put(monc->m_subscribe_ack); | ||
806 | out_monmap: | ||
807 | kfree(monc->monmap); | ||
808 | out: | ||
809 | return err; | ||
810 | } | ||
811 | |||
812 | void ceph_monc_stop(struct ceph_mon_client *monc) | ||
813 | { | ||
814 | dout("stop\n"); | ||
815 | cancel_delayed_work_sync(&monc->delayed_work); | ||
816 | |||
817 | mutex_lock(&monc->mutex); | ||
818 | __close_session(monc); | ||
819 | if (monc->con) { | ||
820 | monc->con->private = NULL; | ||
821 | monc->con->ops->put(monc->con); | ||
822 | monc->con = NULL; | ||
823 | } | ||
824 | mutex_unlock(&monc->mutex); | ||
825 | |||
826 | ceph_auth_destroy(monc->auth); | ||
827 | |||
828 | ceph_msg_put(monc->m_auth); | ||
829 | ceph_msg_put(monc->m_auth_reply); | ||
830 | ceph_msg_put(monc->m_subscribe); | ||
831 | ceph_msg_put(monc->m_subscribe_ack); | ||
832 | |||
833 | kfree(monc->monmap); | ||
834 | } | ||
835 | |||
836 | static void handle_auth_reply(struct ceph_mon_client *monc, | ||
837 | struct ceph_msg *msg) | ||
838 | { | ||
839 | int ret; | ||
840 | int was_auth = 0; | ||
841 | |||
842 | mutex_lock(&monc->mutex); | ||
843 | if (monc->auth->ops) | ||
844 | was_auth = monc->auth->ops->is_authenticated(monc->auth); | ||
845 | monc->pending_auth = 0; | ||
846 | ret = ceph_handle_auth_reply(monc->auth, msg->front.iov_base, | ||
847 | msg->front.iov_len, | ||
848 | monc->m_auth->front.iov_base, | ||
849 | monc->m_auth->front_max); | ||
850 | if (ret < 0) { | ||
851 | monc->client->auth_err = ret; | ||
852 | wake_up_all(&monc->client->auth_wq); | ||
853 | } else if (ret > 0) { | ||
854 | __send_prepared_auth_request(monc, ret); | ||
855 | } else if (!was_auth && monc->auth->ops->is_authenticated(monc->auth)) { | ||
856 | dout("authenticated, starting session\n"); | ||
857 | |||
858 | monc->client->msgr->inst.name.type = CEPH_ENTITY_TYPE_CLIENT; | ||
859 | monc->client->msgr->inst.name.num = | ||
860 | cpu_to_le64(monc->auth->global_id); | ||
861 | |||
862 | __send_subscribe(monc); | ||
863 | __resend_generic_request(monc); | ||
864 | } | ||
865 | mutex_unlock(&monc->mutex); | ||
866 | } | ||
867 | |||
868 | static int __validate_auth(struct ceph_mon_client *monc) | ||
869 | { | ||
870 | int ret; | ||
871 | |||
872 | if (monc->pending_auth) | ||
873 | return 0; | ||
874 | |||
875 | ret = ceph_build_auth(monc->auth, monc->m_auth->front.iov_base, | ||
876 | monc->m_auth->front_max); | ||
877 | if (ret <= 0) | ||
878 | return ret; /* either an error, or no need to authenticate */ | ||
879 | __send_prepared_auth_request(monc, ret); | ||
880 | return 0; | ||
881 | } | ||
882 | |||
883 | int ceph_monc_validate_auth(struct ceph_mon_client *monc) | ||
884 | { | ||
885 | int ret; | ||
886 | |||
887 | mutex_lock(&monc->mutex); | ||
888 | ret = __validate_auth(monc); | ||
889 | mutex_unlock(&monc->mutex); | ||
890 | return ret; | ||
891 | } | ||
892 | |||
893 | /* | ||
894 | * handle incoming message | ||
895 | */ | ||
896 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | ||
897 | { | ||
898 | struct ceph_mon_client *monc = con->private; | ||
899 | int type = le16_to_cpu(msg->hdr.type); | ||
900 | |||
901 | if (!monc) | ||
902 | return; | ||
903 | |||
904 | switch (type) { | ||
905 | case CEPH_MSG_AUTH_REPLY: | ||
906 | handle_auth_reply(monc, msg); | ||
907 | break; | ||
908 | |||
909 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | ||
910 | handle_subscribe_ack(monc, msg); | ||
911 | break; | ||
912 | |||
913 | case CEPH_MSG_STATFS_REPLY: | ||
914 | handle_statfs_reply(monc, msg); | ||
915 | break; | ||
916 | |||
917 | case CEPH_MSG_POOLOP_REPLY: | ||
918 | handle_poolop_reply(monc, msg); | ||
919 | break; | ||
920 | |||
921 | case CEPH_MSG_MON_MAP: | ||
922 | ceph_monc_handle_map(monc, msg); | ||
923 | break; | ||
924 | |||
925 | case CEPH_MSG_MDS_MAP: | ||
926 | ceph_mdsc_handle_map(&monc->client->mdsc, msg); | ||
927 | break; | ||
928 | |||
929 | case CEPH_MSG_OSD_MAP: | ||
930 | ceph_osdc_handle_map(&monc->client->osdc, msg); | ||
931 | break; | ||
932 | |||
933 | default: | ||
934 | pr_err("received unknown message type %d %s\n", type, | ||
935 | ceph_msg_type_name(type)); | ||
936 | } | ||
937 | ceph_msg_put(msg); | ||
938 | } | ||
939 | |||
940 | /* | ||
941 | * Allocate memory for incoming message | ||
942 | */ | ||
943 | static struct ceph_msg *mon_alloc_msg(struct ceph_connection *con, | ||
944 | struct ceph_msg_header *hdr, | ||
945 | int *skip) | ||
946 | { | ||
947 | struct ceph_mon_client *monc = con->private; | ||
948 | int type = le16_to_cpu(hdr->type); | ||
949 | int front_len = le32_to_cpu(hdr->front_len); | ||
950 | struct ceph_msg *m = NULL; | ||
951 | |||
952 | *skip = 0; | ||
953 | |||
954 | switch (type) { | ||
955 | case CEPH_MSG_MON_SUBSCRIBE_ACK: | ||
956 | m = ceph_msg_get(monc->m_subscribe_ack); | ||
957 | break; | ||
958 | case CEPH_MSG_POOLOP_REPLY: | ||
959 | case CEPH_MSG_STATFS_REPLY: | ||
960 | return get_generic_reply(con, hdr, skip); | ||
961 | case CEPH_MSG_AUTH_REPLY: | ||
962 | m = ceph_msg_get(monc->m_auth_reply); | ||
963 | break; | ||
964 | case CEPH_MSG_MON_MAP: | ||
965 | case CEPH_MSG_MDS_MAP: | ||
966 | case CEPH_MSG_OSD_MAP: | ||
967 | m = ceph_msg_new(type, front_len, GFP_NOFS); | ||
968 | break; | ||
969 | } | ||
970 | |||
971 | if (!m) { | ||
972 | pr_info("alloc_msg unknown type %d\n", type); | ||
973 | *skip = 1; | ||
974 | } | ||
975 | return m; | ||
976 | } | ||
977 | |||
978 | /* | ||
979 | * If the monitor connection resets, pick a new monitor and resubmit | ||
980 | * any pending requests. | ||
981 | */ | ||
982 | static void mon_fault(struct ceph_connection *con) | ||
983 | { | ||
984 | struct ceph_mon_client *monc = con->private; | ||
985 | |||
986 | if (!monc) | ||
987 | return; | ||
988 | |||
989 | dout("mon_fault\n"); | ||
990 | mutex_lock(&monc->mutex); | ||
991 | if (!con->private) | ||
992 | goto out; | ||
993 | |||
994 | if (monc->con && !monc->hunting) | ||
995 | pr_info("mon%d %s session lost, " | ||
996 | "hunting for new mon\n", monc->cur_mon, | ||
997 | pr_addr(&monc->con->peer_addr.in_addr)); | ||
998 | |||
999 | __close_session(monc); | ||
1000 | if (!monc->hunting) { | ||
1001 | /* start hunting */ | ||
1002 | monc->hunting = true; | ||
1003 | __open_session(monc); | ||
1004 | } else { | ||
1005 | /* already hunting, let's wait a bit */ | ||
1006 | __schedule_delayed(monc); | ||
1007 | } | ||
1008 | out: | ||
1009 | mutex_unlock(&monc->mutex); | ||
1010 | } | ||
1011 | |||
1012 | static const struct ceph_connection_operations mon_con_ops = { | ||
1013 | .get = ceph_con_get, | ||
1014 | .put = ceph_con_put, | ||
1015 | .dispatch = dispatch, | ||
1016 | .fault = mon_fault, | ||
1017 | .alloc_msg = mon_alloc_msg, | ||
1018 | }; | ||
diff --git a/fs/ceph/mon_client.h b/fs/ceph/mon_client.h deleted file mode 100644 index 8e396f2c0963..000000000000 --- a/fs/ceph/mon_client.h +++ /dev/null | |||
@@ -1,121 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_MON_CLIENT_H | ||
2 | #define _FS_CEPH_MON_CLIENT_H | ||
3 | |||
4 | #include <linux/completion.h> | ||
5 | #include <linux/kref.h> | ||
6 | #include <linux/rbtree.h> | ||
7 | |||
8 | #include "messenger.h" | ||
9 | |||
10 | struct ceph_client; | ||
11 | struct ceph_mount_args; | ||
12 | struct ceph_auth_client; | ||
13 | |||
14 | /* | ||
15 | * The monitor map enumerates the set of all monitors. | ||
16 | */ | ||
17 | struct ceph_monmap { | ||
18 | struct ceph_fsid fsid; | ||
19 | u32 epoch; | ||
20 | u32 num_mon; | ||
21 | struct ceph_entity_inst mon_inst[0]; | ||
22 | }; | ||
23 | |||
24 | struct ceph_mon_client; | ||
25 | struct ceph_mon_generic_request; | ||
26 | |||
27 | |||
28 | /* | ||
29 | * Generic mechanism for resending monitor requests. | ||
30 | */ | ||
31 | typedef void (*ceph_monc_request_func_t)(struct ceph_mon_client *monc, | ||
32 | int newmon); | ||
33 | |||
34 | /* a pending monitor request */ | ||
35 | struct ceph_mon_request { | ||
36 | struct ceph_mon_client *monc; | ||
37 | struct delayed_work delayed_work; | ||
38 | unsigned long delay; | ||
39 | ceph_monc_request_func_t do_request; | ||
40 | }; | ||
41 | |||
42 | /* | ||
43 | * ceph_mon_generic_request is being used for the statfs and poolop requests | ||
44 | * which are bening done a bit differently because we need to get data back | ||
45 | * to the caller | ||
46 | */ | ||
47 | struct ceph_mon_generic_request { | ||
48 | struct kref kref; | ||
49 | u64 tid; | ||
50 | struct rb_node node; | ||
51 | int result; | ||
52 | void *buf; | ||
53 | int buf_len; | ||
54 | struct completion completion; | ||
55 | struct ceph_msg *request; /* original request */ | ||
56 | struct ceph_msg *reply; /* and reply */ | ||
57 | }; | ||
58 | |||
59 | struct ceph_mon_client { | ||
60 | struct ceph_client *client; | ||
61 | struct ceph_monmap *monmap; | ||
62 | |||
63 | struct mutex mutex; | ||
64 | struct delayed_work delayed_work; | ||
65 | |||
66 | struct ceph_auth_client *auth; | ||
67 | struct ceph_msg *m_auth, *m_auth_reply, *m_subscribe, *m_subscribe_ack; | ||
68 | int pending_auth; | ||
69 | |||
70 | bool hunting; | ||
71 | int cur_mon; /* last monitor i contacted */ | ||
72 | unsigned long sub_sent, sub_renew_after; | ||
73 | struct ceph_connection *con; | ||
74 | bool have_fsid; | ||
75 | |||
76 | /* pending generic requests */ | ||
77 | struct rb_root generic_request_tree; | ||
78 | int num_generic_requests; | ||
79 | u64 last_tid; | ||
80 | |||
81 | /* mds/osd map */ | ||
82 | int want_next_osdmap; /* 1 = want, 2 = want+asked */ | ||
83 | u32 have_osdmap, have_mdsmap; | ||
84 | |||
85 | #ifdef CONFIG_DEBUG_FS | ||
86 | struct dentry *debugfs_file; | ||
87 | #endif | ||
88 | }; | ||
89 | |||
90 | extern struct ceph_monmap *ceph_monmap_decode(void *p, void *end); | ||
91 | extern int ceph_monmap_contains(struct ceph_monmap *m, | ||
92 | struct ceph_entity_addr *addr); | ||
93 | |||
94 | extern int ceph_monc_init(struct ceph_mon_client *monc, struct ceph_client *cl); | ||
95 | extern void ceph_monc_stop(struct ceph_mon_client *monc); | ||
96 | |||
97 | /* | ||
98 | * The model here is to indicate that we need a new map of at least | ||
99 | * epoch @want, and also call in when we receive a map. We will | ||
100 | * periodically rerequest the map from the monitor cluster until we | ||
101 | * get what we want. | ||
102 | */ | ||
103 | extern int ceph_monc_got_mdsmap(struct ceph_mon_client *monc, u32 have); | ||
104 | extern int ceph_monc_got_osdmap(struct ceph_mon_client *monc, u32 have); | ||
105 | |||
106 | extern void ceph_monc_request_next_osdmap(struct ceph_mon_client *monc); | ||
107 | |||
108 | extern int ceph_monc_do_statfs(struct ceph_mon_client *monc, | ||
109 | struct ceph_statfs *buf); | ||
110 | |||
111 | extern int ceph_monc_open_session(struct ceph_mon_client *monc); | ||
112 | |||
113 | extern int ceph_monc_validate_auth(struct ceph_mon_client *monc); | ||
114 | |||
115 | extern int ceph_monc_create_snapid(struct ceph_mon_client *monc, | ||
116 | u32 pool, u64 *snapid); | ||
117 | |||
118 | extern int ceph_monc_delete_snapid(struct ceph_mon_client *monc, | ||
119 | u32 pool, u64 snapid); | ||
120 | |||
121 | #endif | ||
diff --git a/fs/ceph/msgpool.c b/fs/ceph/msgpool.c deleted file mode 100644 index dd65a6438131..000000000000 --- a/fs/ceph/msgpool.c +++ /dev/null | |||
@@ -1,64 +0,0 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/err.h> | ||
4 | #include <linux/sched.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/vmalloc.h> | ||
7 | |||
8 | #include "msgpool.h" | ||
9 | |||
10 | static void *alloc_fn(gfp_t gfp_mask, void *arg) | ||
11 | { | ||
12 | struct ceph_msgpool *pool = arg; | ||
13 | void *p; | ||
14 | |||
15 | p = ceph_msg_new(0, pool->front_len, gfp_mask); | ||
16 | if (!p) | ||
17 | pr_err("msgpool %s alloc failed\n", pool->name); | ||
18 | return p; | ||
19 | } | ||
20 | |||
21 | static void free_fn(void *element, void *arg) | ||
22 | { | ||
23 | ceph_msg_put(element); | ||
24 | } | ||
25 | |||
26 | int ceph_msgpool_init(struct ceph_msgpool *pool, | ||
27 | int front_len, int size, bool blocking, const char *name) | ||
28 | { | ||
29 | pool->front_len = front_len; | ||
30 | pool->pool = mempool_create(size, alloc_fn, free_fn, pool); | ||
31 | if (!pool->pool) | ||
32 | return -ENOMEM; | ||
33 | pool->name = name; | ||
34 | return 0; | ||
35 | } | ||
36 | |||
37 | void ceph_msgpool_destroy(struct ceph_msgpool *pool) | ||
38 | { | ||
39 | mempool_destroy(pool->pool); | ||
40 | } | ||
41 | |||
42 | struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *pool, | ||
43 | int front_len) | ||
44 | { | ||
45 | if (front_len > pool->front_len) { | ||
46 | pr_err("msgpool_get pool %s need front %d, pool size is %d\n", | ||
47 | pool->name, front_len, pool->front_len); | ||
48 | WARN_ON(1); | ||
49 | |||
50 | /* try to alloc a fresh message */ | ||
51 | return ceph_msg_new(0, front_len, GFP_NOFS); | ||
52 | } | ||
53 | |||
54 | return mempool_alloc(pool->pool, GFP_NOFS); | ||
55 | } | ||
56 | |||
57 | void ceph_msgpool_put(struct ceph_msgpool *pool, struct ceph_msg *msg) | ||
58 | { | ||
59 | /* reset msg front_len; user may have changed it */ | ||
60 | msg->front.iov_len = pool->front_len; | ||
61 | msg->hdr.front_len = cpu_to_le32(pool->front_len); | ||
62 | |||
63 | kref_init(&msg->kref); /* retake single ref */ | ||
64 | } | ||
diff --git a/fs/ceph/msgpool.h b/fs/ceph/msgpool.h deleted file mode 100644 index a362605f9368..000000000000 --- a/fs/ceph/msgpool.h +++ /dev/null | |||
@@ -1,25 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_MSGPOOL | ||
2 | #define _FS_CEPH_MSGPOOL | ||
3 | |||
4 | #include <linux/mempool.h> | ||
5 | #include "messenger.h" | ||
6 | |||
7 | /* | ||
8 | * we use memory pools for preallocating messages we may receive, to | ||
9 | * avoid unexpected OOM conditions. | ||
10 | */ | ||
11 | struct ceph_msgpool { | ||
12 | const char *name; | ||
13 | mempool_t *pool; | ||
14 | int front_len; /* preallocated payload size */ | ||
15 | }; | ||
16 | |||
17 | extern int ceph_msgpool_init(struct ceph_msgpool *pool, | ||
18 | int front_len, int size, bool blocking, | ||
19 | const char *name); | ||
20 | extern void ceph_msgpool_destroy(struct ceph_msgpool *pool); | ||
21 | extern struct ceph_msg *ceph_msgpool_get(struct ceph_msgpool *, | ||
22 | int front_len); | ||
23 | extern void ceph_msgpool_put(struct ceph_msgpool *, struct ceph_msg *); | ||
24 | |||
25 | #endif | ||
diff --git a/fs/ceph/msgr.h b/fs/ceph/msgr.h deleted file mode 100644 index 680d3d648cac..000000000000 --- a/fs/ceph/msgr.h +++ /dev/null | |||
@@ -1,175 +0,0 @@ | |||
1 | #ifndef CEPH_MSGR_H | ||
2 | #define CEPH_MSGR_H | ||
3 | |||
4 | /* | ||
5 | * Data types for message passing layer used by Ceph. | ||
6 | */ | ||
7 | |||
8 | #define CEPH_MON_PORT 6789 /* default monitor port */ | ||
9 | |||
10 | /* | ||
11 | * client-side processes will try to bind to ports in this | ||
12 | * range, simply for the benefit of tools like nmap or wireshark | ||
13 | * that would like to identify the protocol. | ||
14 | */ | ||
15 | #define CEPH_PORT_FIRST 6789 | ||
16 | #define CEPH_PORT_START 6800 /* non-monitors start here */ | ||
17 | #define CEPH_PORT_LAST 6900 | ||
18 | |||
19 | /* | ||
20 | * tcp connection banner. include a protocol version. and adjust | ||
21 | * whenever the wire protocol changes. try to keep this string length | ||
22 | * constant. | ||
23 | */ | ||
24 | #define CEPH_BANNER "ceph v027" | ||
25 | #define CEPH_BANNER_MAX_LEN 30 | ||
26 | |||
27 | |||
28 | /* | ||
29 | * Rollover-safe type and comparator for 32-bit sequence numbers. | ||
30 | * Comparator returns -1, 0, or 1. | ||
31 | */ | ||
32 | typedef __u32 ceph_seq_t; | ||
33 | |||
34 | static inline __s32 ceph_seq_cmp(__u32 a, __u32 b) | ||
35 | { | ||
36 | return (__s32)a - (__s32)b; | ||
37 | } | ||
38 | |||
39 | |||
40 | /* | ||
41 | * entity_name -- logical name for a process participating in the | ||
42 | * network, e.g. 'mds0' or 'osd3'. | ||
43 | */ | ||
44 | struct ceph_entity_name { | ||
45 | __u8 type; /* CEPH_ENTITY_TYPE_* */ | ||
46 | __le64 num; | ||
47 | } __attribute__ ((packed)); | ||
48 | |||
49 | #define CEPH_ENTITY_TYPE_MON 0x01 | ||
50 | #define CEPH_ENTITY_TYPE_MDS 0x02 | ||
51 | #define CEPH_ENTITY_TYPE_OSD 0x04 | ||
52 | #define CEPH_ENTITY_TYPE_CLIENT 0x08 | ||
53 | #define CEPH_ENTITY_TYPE_AUTH 0x20 | ||
54 | |||
55 | #define CEPH_ENTITY_TYPE_ANY 0xFF | ||
56 | |||
57 | extern const char *ceph_entity_type_name(int type); | ||
58 | |||
59 | /* | ||
60 | * entity_addr -- network address | ||
61 | */ | ||
62 | struct ceph_entity_addr { | ||
63 | __le32 type; | ||
64 | __le32 nonce; /* unique id for process (e.g. pid) */ | ||
65 | struct sockaddr_storage in_addr; | ||
66 | } __attribute__ ((packed)); | ||
67 | |||
68 | struct ceph_entity_inst { | ||
69 | struct ceph_entity_name name; | ||
70 | struct ceph_entity_addr addr; | ||
71 | } __attribute__ ((packed)); | ||
72 | |||
73 | |||
74 | /* used by message exchange protocol */ | ||
75 | #define CEPH_MSGR_TAG_READY 1 /* server->client: ready for messages */ | ||
76 | #define CEPH_MSGR_TAG_RESETSESSION 2 /* server->client: reset, try again */ | ||
77 | #define CEPH_MSGR_TAG_WAIT 3 /* server->client: wait for racing | ||
78 | incoming connection */ | ||
79 | #define CEPH_MSGR_TAG_RETRY_SESSION 4 /* server->client + cseq: try again | ||
80 | with higher cseq */ | ||
81 | #define CEPH_MSGR_TAG_RETRY_GLOBAL 5 /* server->client + gseq: try again | ||
82 | with higher gseq */ | ||
83 | #define CEPH_MSGR_TAG_CLOSE 6 /* closing pipe */ | ||
84 | #define CEPH_MSGR_TAG_MSG 7 /* message */ | ||
85 | #define CEPH_MSGR_TAG_ACK 8 /* message ack */ | ||
86 | #define CEPH_MSGR_TAG_KEEPALIVE 9 /* just a keepalive byte! */ | ||
87 | #define CEPH_MSGR_TAG_BADPROTOVER 10 /* bad protocol version */ | ||
88 | #define CEPH_MSGR_TAG_BADAUTHORIZER 11 /* bad authorizer */ | ||
89 | #define CEPH_MSGR_TAG_FEATURES 12 /* insufficient features */ | ||
90 | |||
91 | |||
92 | /* | ||
93 | * connection negotiation | ||
94 | */ | ||
95 | struct ceph_msg_connect { | ||
96 | __le64 features; /* supported feature bits */ | ||
97 | __le32 host_type; /* CEPH_ENTITY_TYPE_* */ | ||
98 | __le32 global_seq; /* count connections initiated by this host */ | ||
99 | __le32 connect_seq; /* count connections initiated in this session */ | ||
100 | __le32 protocol_version; | ||
101 | __le32 authorizer_protocol; | ||
102 | __le32 authorizer_len; | ||
103 | __u8 flags; /* CEPH_MSG_CONNECT_* */ | ||
104 | } __attribute__ ((packed)); | ||
105 | |||
106 | struct ceph_msg_connect_reply { | ||
107 | __u8 tag; | ||
108 | __le64 features; /* feature bits for this session */ | ||
109 | __le32 global_seq; | ||
110 | __le32 connect_seq; | ||
111 | __le32 protocol_version; | ||
112 | __le32 authorizer_len; | ||
113 | __u8 flags; | ||
114 | } __attribute__ ((packed)); | ||
115 | |||
116 | #define CEPH_MSG_CONNECT_LOSSY 1 /* messages i send may be safely dropped */ | ||
117 | |||
118 | |||
119 | /* | ||
120 | * message header | ||
121 | */ | ||
122 | struct ceph_msg_header_old { | ||
123 | __le64 seq; /* message seq# for this session */ | ||
124 | __le64 tid; /* transaction id */ | ||
125 | __le16 type; /* message type */ | ||
126 | __le16 priority; /* priority. higher value == higher priority */ | ||
127 | __le16 version; /* version of message encoding */ | ||
128 | |||
129 | __le32 front_len; /* bytes in main payload */ | ||
130 | __le32 middle_len;/* bytes in middle payload */ | ||
131 | __le32 data_len; /* bytes of data payload */ | ||
132 | __le16 data_off; /* sender: include full offset; | ||
133 | receiver: mask against ~PAGE_MASK */ | ||
134 | |||
135 | struct ceph_entity_inst src, orig_src; | ||
136 | __le32 reserved; | ||
137 | __le32 crc; /* header crc32c */ | ||
138 | } __attribute__ ((packed)); | ||
139 | |||
140 | struct ceph_msg_header { | ||
141 | __le64 seq; /* message seq# for this session */ | ||
142 | __le64 tid; /* transaction id */ | ||
143 | __le16 type; /* message type */ | ||
144 | __le16 priority; /* priority. higher value == higher priority */ | ||
145 | __le16 version; /* version of message encoding */ | ||
146 | |||
147 | __le32 front_len; /* bytes in main payload */ | ||
148 | __le32 middle_len;/* bytes in middle payload */ | ||
149 | __le32 data_len; /* bytes of data payload */ | ||
150 | __le16 data_off; /* sender: include full offset; | ||
151 | receiver: mask against ~PAGE_MASK */ | ||
152 | |||
153 | struct ceph_entity_name src; | ||
154 | __le32 reserved; | ||
155 | __le32 crc; /* header crc32c */ | ||
156 | } __attribute__ ((packed)); | ||
157 | |||
158 | #define CEPH_MSG_PRIO_LOW 64 | ||
159 | #define CEPH_MSG_PRIO_DEFAULT 127 | ||
160 | #define CEPH_MSG_PRIO_HIGH 196 | ||
161 | #define CEPH_MSG_PRIO_HIGHEST 255 | ||
162 | |||
163 | /* | ||
164 | * follows data payload | ||
165 | */ | ||
166 | struct ceph_msg_footer { | ||
167 | __le32 front_crc, middle_crc, data_crc; | ||
168 | __u8 flags; | ||
169 | } __attribute__ ((packed)); | ||
170 | |||
171 | #define CEPH_MSG_FOOTER_COMPLETE (1<<0) /* msg wasn't aborted */ | ||
172 | #define CEPH_MSG_FOOTER_NOCRC (1<<1) /* no data crc */ | ||
173 | |||
174 | |||
175 | #endif | ||
diff --git a/fs/ceph/osd_client.c b/fs/ceph/osd_client.c deleted file mode 100644 index dfced1dacbcd..000000000000 --- a/fs/ceph/osd_client.c +++ /dev/null | |||
@@ -1,1539 +0,0 @@ | |||
1 | #include "ceph_debug.h" | ||
2 | |||
3 | #include <linux/err.h> | ||
4 | #include <linux/highmem.h> | ||
5 | #include <linux/mm.h> | ||
6 | #include <linux/pagemap.h> | ||
7 | #include <linux/slab.h> | ||
8 | #include <linux/uaccess.h> | ||
9 | |||
10 | #include "super.h" | ||
11 | #include "osd_client.h" | ||
12 | #include "messenger.h" | ||
13 | #include "decode.h" | ||
14 | #include "auth.h" | ||
15 | |||
16 | #define OSD_OP_FRONT_LEN 4096 | ||
17 | #define OSD_OPREPLY_FRONT_LEN 512 | ||
18 | |||
19 | static const struct ceph_connection_operations osd_con_ops; | ||
20 | static int __kick_requests(struct ceph_osd_client *osdc, | ||
21 | struct ceph_osd *kickosd); | ||
22 | |||
23 | static void kick_requests(struct ceph_osd_client *osdc, struct ceph_osd *osd); | ||
24 | |||
25 | /* | ||
26 | * Implement client access to distributed object storage cluster. | ||
27 | * | ||
28 | * All data objects are stored within a cluster/cloud of OSDs, or | ||
29 | * "object storage devices." (Note that Ceph OSDs have _nothing_ to | ||
30 | * do with the T10 OSD extensions to SCSI.) Ceph OSDs are simply | ||
31 | * remote daemons serving up and coordinating consistent and safe | ||
32 | * access to storage. | ||
33 | * | ||
34 | * Cluster membership and the mapping of data objects onto storage devices | ||
35 | * are described by the osd map. | ||
36 | * | ||
37 | * We keep track of pending OSD requests (read, write), resubmit | ||
38 | * requests to different OSDs when the cluster topology/data layout | ||
39 | * change, or retry the affected requests when the communications | ||
40 | * channel with an OSD is reset. | ||
41 | */ | ||
42 | |||
43 | /* | ||
44 | * calculate the mapping of a file extent onto an object, and fill out the | ||
45 | * request accordingly. shorten extent as necessary if it crosses an | ||
46 | * object boundary. | ||
47 | * | ||
48 | * fill osd op in request message. | ||
49 | */ | ||
50 | static void calc_layout(struct ceph_osd_client *osdc, | ||
51 | struct ceph_vino vino, struct ceph_file_layout *layout, | ||
52 | u64 off, u64 *plen, | ||
53 | struct ceph_osd_request *req) | ||
54 | { | ||
55 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
56 | struct ceph_osd_op *op = (void *)(reqhead + 1); | ||
57 | u64 orig_len = *plen; | ||
58 | u64 objoff, objlen; /* extent in object */ | ||
59 | u64 bno; | ||
60 | |||
61 | reqhead->snapid = cpu_to_le64(vino.snap); | ||
62 | |||
63 | /* object extent? */ | ||
64 | ceph_calc_file_object_mapping(layout, off, plen, &bno, | ||
65 | &objoff, &objlen); | ||
66 | if (*plen < orig_len) | ||
67 | dout(" skipping last %llu, final file extent %llu~%llu\n", | ||
68 | orig_len - *plen, off, *plen); | ||
69 | |||
70 | sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno); | ||
71 | req->r_oid_len = strlen(req->r_oid); | ||
72 | |||
73 | op->extent.offset = cpu_to_le64(objoff); | ||
74 | op->extent.length = cpu_to_le64(objlen); | ||
75 | req->r_num_pages = calc_pages_for(off, *plen); | ||
76 | |||
77 | dout("calc_layout %s (%d) %llu~%llu (%d pages)\n", | ||
78 | req->r_oid, req->r_oid_len, objoff, objlen, req->r_num_pages); | ||
79 | } | ||
80 | |||
81 | /* | ||
82 | * requests | ||
83 | */ | ||
84 | void ceph_osdc_release_request(struct kref *kref) | ||
85 | { | ||
86 | struct ceph_osd_request *req = container_of(kref, | ||
87 | struct ceph_osd_request, | ||
88 | r_kref); | ||
89 | |||
90 | if (req->r_request) | ||
91 | ceph_msg_put(req->r_request); | ||
92 | if (req->r_reply) | ||
93 | ceph_msg_put(req->r_reply); | ||
94 | if (req->r_con_filling_msg) { | ||
95 | dout("release_request revoking pages %p from con %p\n", | ||
96 | req->r_pages, req->r_con_filling_msg); | ||
97 | ceph_con_revoke_message(req->r_con_filling_msg, | ||
98 | req->r_reply); | ||
99 | ceph_con_put(req->r_con_filling_msg); | ||
100 | } | ||
101 | if (req->r_own_pages) | ||
102 | ceph_release_page_vector(req->r_pages, | ||
103 | req->r_num_pages); | ||
104 | ceph_put_snap_context(req->r_snapc); | ||
105 | if (req->r_mempool) | ||
106 | mempool_free(req, req->r_osdc->req_mempool); | ||
107 | else | ||
108 | kfree(req); | ||
109 | } | ||
110 | |||
111 | /* | ||
112 | * build new request AND message, calculate layout, and adjust file | ||
113 | * extent as needed. | ||
114 | * | ||
115 | * if the file was recently truncated, we include information about its | ||
116 | * old and new size so that the object can be updated appropriately. (we | ||
117 | * avoid synchronously deleting truncated objects because it's slow.) | ||
118 | * | ||
119 | * if @do_sync, include a 'startsync' command so that the osd will flush | ||
120 | * data quickly. | ||
121 | */ | ||
122 | struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, | ||
123 | struct ceph_file_layout *layout, | ||
124 | struct ceph_vino vino, | ||
125 | u64 off, u64 *plen, | ||
126 | int opcode, int flags, | ||
127 | struct ceph_snap_context *snapc, | ||
128 | int do_sync, | ||
129 | u32 truncate_seq, | ||
130 | u64 truncate_size, | ||
131 | struct timespec *mtime, | ||
132 | bool use_mempool, int num_reply) | ||
133 | { | ||
134 | struct ceph_osd_request *req; | ||
135 | struct ceph_msg *msg; | ||
136 | struct ceph_osd_request_head *head; | ||
137 | struct ceph_osd_op *op; | ||
138 | void *p; | ||
139 | int num_op = 1 + do_sync; | ||
140 | size_t msg_size = sizeof(*head) + num_op*sizeof(*op); | ||
141 | int i; | ||
142 | |||
143 | if (use_mempool) { | ||
144 | req = mempool_alloc(osdc->req_mempool, GFP_NOFS); | ||
145 | memset(req, 0, sizeof(*req)); | ||
146 | } else { | ||
147 | req = kzalloc(sizeof(*req), GFP_NOFS); | ||
148 | } | ||
149 | if (req == NULL) | ||
150 | return NULL; | ||
151 | |||
152 | req->r_osdc = osdc; | ||
153 | req->r_mempool = use_mempool; | ||
154 | kref_init(&req->r_kref); | ||
155 | init_completion(&req->r_completion); | ||
156 | init_completion(&req->r_safe_completion); | ||
157 | INIT_LIST_HEAD(&req->r_unsafe_item); | ||
158 | req->r_flags = flags; | ||
159 | |||
160 | WARN_ON((flags & (CEPH_OSD_FLAG_READ|CEPH_OSD_FLAG_WRITE)) == 0); | ||
161 | |||
162 | /* create reply message */ | ||
163 | if (use_mempool) | ||
164 | msg = ceph_msgpool_get(&osdc->msgpool_op_reply, 0); | ||
165 | else | ||
166 | msg = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, | ||
167 | OSD_OPREPLY_FRONT_LEN, GFP_NOFS); | ||
168 | if (!msg) { | ||
169 | ceph_osdc_put_request(req); | ||
170 | return NULL; | ||
171 | } | ||
172 | req->r_reply = msg; | ||
173 | |||
174 | /* create request message; allow space for oid */ | ||
175 | msg_size += 40; | ||
176 | if (snapc) | ||
177 | msg_size += sizeof(u64) * snapc->num_snaps; | ||
178 | if (use_mempool) | ||
179 | msg = ceph_msgpool_get(&osdc->msgpool_op, 0); | ||
180 | else | ||
181 | msg = ceph_msg_new(CEPH_MSG_OSD_OP, msg_size, GFP_NOFS); | ||
182 | if (!msg) { | ||
183 | ceph_osdc_put_request(req); | ||
184 | return NULL; | ||
185 | } | ||
186 | msg->hdr.type = cpu_to_le16(CEPH_MSG_OSD_OP); | ||
187 | memset(msg->front.iov_base, 0, msg->front.iov_len); | ||
188 | head = msg->front.iov_base; | ||
189 | op = (void *)(head + 1); | ||
190 | p = (void *)(op + num_op); | ||
191 | |||
192 | req->r_request = msg; | ||
193 | req->r_snapc = ceph_get_snap_context(snapc); | ||
194 | |||
195 | head->client_inc = cpu_to_le32(1); /* always, for now. */ | ||
196 | head->flags = cpu_to_le32(flags); | ||
197 | if (flags & CEPH_OSD_FLAG_WRITE) | ||
198 | ceph_encode_timespec(&head->mtime, mtime); | ||
199 | head->num_ops = cpu_to_le16(num_op); | ||
200 | op->op = cpu_to_le16(opcode); | ||
201 | |||
202 | /* calculate max write size */ | ||
203 | calc_layout(osdc, vino, layout, off, plen, req); | ||
204 | req->r_file_layout = *layout; /* keep a copy */ | ||
205 | |||
206 | if (flags & CEPH_OSD_FLAG_WRITE) { | ||
207 | req->r_request->hdr.data_off = cpu_to_le16(off); | ||
208 | req->r_request->hdr.data_len = cpu_to_le32(*plen); | ||
209 | op->payload_len = cpu_to_le32(*plen); | ||
210 | } | ||
211 | op->extent.truncate_size = cpu_to_le64(truncate_size); | ||
212 | op->extent.truncate_seq = cpu_to_le32(truncate_seq); | ||
213 | |||
214 | /* fill in oid */ | ||
215 | head->object_len = cpu_to_le32(req->r_oid_len); | ||
216 | memcpy(p, req->r_oid, req->r_oid_len); | ||
217 | p += req->r_oid_len; | ||
218 | |||
219 | if (do_sync) { | ||
220 | op++; | ||
221 | op->op = cpu_to_le16(CEPH_OSD_OP_STARTSYNC); | ||
222 | } | ||
223 | if (snapc) { | ||
224 | head->snap_seq = cpu_to_le64(snapc->seq); | ||
225 | head->num_snaps = cpu_to_le32(snapc->num_snaps); | ||
226 | for (i = 0; i < snapc->num_snaps; i++) { | ||
227 | put_unaligned_le64(snapc->snaps[i], p); | ||
228 | p += sizeof(u64); | ||
229 | } | ||
230 | } | ||
231 | |||
232 | BUG_ON(p > msg->front.iov_base + msg->front.iov_len); | ||
233 | msg_size = p - msg->front.iov_base; | ||
234 | msg->front.iov_len = msg_size; | ||
235 | msg->hdr.front_len = cpu_to_le32(msg_size); | ||
236 | return req; | ||
237 | } | ||
238 | |||
239 | /* | ||
240 | * We keep osd requests in an rbtree, sorted by ->r_tid. | ||
241 | */ | ||
242 | static void __insert_request(struct ceph_osd_client *osdc, | ||
243 | struct ceph_osd_request *new) | ||
244 | { | ||
245 | struct rb_node **p = &osdc->requests.rb_node; | ||
246 | struct rb_node *parent = NULL; | ||
247 | struct ceph_osd_request *req = NULL; | ||
248 | |||
249 | while (*p) { | ||
250 | parent = *p; | ||
251 | req = rb_entry(parent, struct ceph_osd_request, r_node); | ||
252 | if (new->r_tid < req->r_tid) | ||
253 | p = &(*p)->rb_left; | ||
254 | else if (new->r_tid > req->r_tid) | ||
255 | p = &(*p)->rb_right; | ||
256 | else | ||
257 | BUG(); | ||
258 | } | ||
259 | |||
260 | rb_link_node(&new->r_node, parent, p); | ||
261 | rb_insert_color(&new->r_node, &osdc->requests); | ||
262 | } | ||
263 | |||
264 | static struct ceph_osd_request *__lookup_request(struct ceph_osd_client *osdc, | ||
265 | u64 tid) | ||
266 | { | ||
267 | struct ceph_osd_request *req; | ||
268 | struct rb_node *n = osdc->requests.rb_node; | ||
269 | |||
270 | while (n) { | ||
271 | req = rb_entry(n, struct ceph_osd_request, r_node); | ||
272 | if (tid < req->r_tid) | ||
273 | n = n->rb_left; | ||
274 | else if (tid > req->r_tid) | ||
275 | n = n->rb_right; | ||
276 | else | ||
277 | return req; | ||
278 | } | ||
279 | return NULL; | ||
280 | } | ||
281 | |||
282 | static struct ceph_osd_request * | ||
283 | __lookup_request_ge(struct ceph_osd_client *osdc, | ||
284 | u64 tid) | ||
285 | { | ||
286 | struct ceph_osd_request *req; | ||
287 | struct rb_node *n = osdc->requests.rb_node; | ||
288 | |||
289 | while (n) { | ||
290 | req = rb_entry(n, struct ceph_osd_request, r_node); | ||
291 | if (tid < req->r_tid) { | ||
292 | if (!n->rb_left) | ||
293 | return req; | ||
294 | n = n->rb_left; | ||
295 | } else if (tid > req->r_tid) { | ||
296 | n = n->rb_right; | ||
297 | } else { | ||
298 | return req; | ||
299 | } | ||
300 | } | ||
301 | return NULL; | ||
302 | } | ||
303 | |||
304 | |||
305 | /* | ||
306 | * If the osd connection drops, we need to resubmit all requests. | ||
307 | */ | ||
308 | static void osd_reset(struct ceph_connection *con) | ||
309 | { | ||
310 | struct ceph_osd *osd = con->private; | ||
311 | struct ceph_osd_client *osdc; | ||
312 | |||
313 | if (!osd) | ||
314 | return; | ||
315 | dout("osd_reset osd%d\n", osd->o_osd); | ||
316 | osdc = osd->o_osdc; | ||
317 | down_read(&osdc->map_sem); | ||
318 | kick_requests(osdc, osd); | ||
319 | up_read(&osdc->map_sem); | ||
320 | } | ||
321 | |||
322 | /* | ||
323 | * Track open sessions with osds. | ||
324 | */ | ||
325 | static struct ceph_osd *create_osd(struct ceph_osd_client *osdc) | ||
326 | { | ||
327 | struct ceph_osd *osd; | ||
328 | |||
329 | osd = kzalloc(sizeof(*osd), GFP_NOFS); | ||
330 | if (!osd) | ||
331 | return NULL; | ||
332 | |||
333 | atomic_set(&osd->o_ref, 1); | ||
334 | osd->o_osdc = osdc; | ||
335 | INIT_LIST_HEAD(&osd->o_requests); | ||
336 | INIT_LIST_HEAD(&osd->o_osd_lru); | ||
337 | osd->o_incarnation = 1; | ||
338 | |||
339 | ceph_con_init(osdc->client->msgr, &osd->o_con); | ||
340 | osd->o_con.private = osd; | ||
341 | osd->o_con.ops = &osd_con_ops; | ||
342 | osd->o_con.peer_name.type = CEPH_ENTITY_TYPE_OSD; | ||
343 | |||
344 | INIT_LIST_HEAD(&osd->o_keepalive_item); | ||
345 | return osd; | ||
346 | } | ||
347 | |||
348 | static struct ceph_osd *get_osd(struct ceph_osd *osd) | ||
349 | { | ||
350 | if (atomic_inc_not_zero(&osd->o_ref)) { | ||
351 | dout("get_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref)-1, | ||
352 | atomic_read(&osd->o_ref)); | ||
353 | return osd; | ||
354 | } else { | ||
355 | dout("get_osd %p FAIL\n", osd); | ||
356 | return NULL; | ||
357 | } | ||
358 | } | ||
359 | |||
360 | static void put_osd(struct ceph_osd *osd) | ||
361 | { | ||
362 | dout("put_osd %p %d -> %d\n", osd, atomic_read(&osd->o_ref), | ||
363 | atomic_read(&osd->o_ref) - 1); | ||
364 | if (atomic_dec_and_test(&osd->o_ref)) { | ||
365 | struct ceph_auth_client *ac = osd->o_osdc->client->monc.auth; | ||
366 | |||
367 | if (osd->o_authorizer) | ||
368 | ac->ops->destroy_authorizer(ac, osd->o_authorizer); | ||
369 | kfree(osd); | ||
370 | } | ||
371 | } | ||
372 | |||
373 | /* | ||
374 | * remove an osd from our map | ||
375 | */ | ||
376 | static void __remove_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | ||
377 | { | ||
378 | dout("__remove_osd %p\n", osd); | ||
379 | BUG_ON(!list_empty(&osd->o_requests)); | ||
380 | rb_erase(&osd->o_node, &osdc->osds); | ||
381 | list_del_init(&osd->o_osd_lru); | ||
382 | ceph_con_close(&osd->o_con); | ||
383 | put_osd(osd); | ||
384 | } | ||
385 | |||
386 | static void __move_osd_to_lru(struct ceph_osd_client *osdc, | ||
387 | struct ceph_osd *osd) | ||
388 | { | ||
389 | dout("__move_osd_to_lru %p\n", osd); | ||
390 | BUG_ON(!list_empty(&osd->o_osd_lru)); | ||
391 | list_add_tail(&osd->o_osd_lru, &osdc->osd_lru); | ||
392 | osd->lru_ttl = jiffies + osdc->client->mount_args->osd_idle_ttl * HZ; | ||
393 | } | ||
394 | |||
395 | static void __remove_osd_from_lru(struct ceph_osd *osd) | ||
396 | { | ||
397 | dout("__remove_osd_from_lru %p\n", osd); | ||
398 | if (!list_empty(&osd->o_osd_lru)) | ||
399 | list_del_init(&osd->o_osd_lru); | ||
400 | } | ||
401 | |||
402 | static void remove_old_osds(struct ceph_osd_client *osdc, int remove_all) | ||
403 | { | ||
404 | struct ceph_osd *osd, *nosd; | ||
405 | |||
406 | dout("__remove_old_osds %p\n", osdc); | ||
407 | mutex_lock(&osdc->request_mutex); | ||
408 | list_for_each_entry_safe(osd, nosd, &osdc->osd_lru, o_osd_lru) { | ||
409 | if (!remove_all && time_before(jiffies, osd->lru_ttl)) | ||
410 | break; | ||
411 | __remove_osd(osdc, osd); | ||
412 | } | ||
413 | mutex_unlock(&osdc->request_mutex); | ||
414 | } | ||
415 | |||
416 | /* | ||
417 | * reset osd connect | ||
418 | */ | ||
419 | static int __reset_osd(struct ceph_osd_client *osdc, struct ceph_osd *osd) | ||
420 | { | ||
421 | struct ceph_osd_request *req; | ||
422 | int ret = 0; | ||
423 | |||
424 | dout("__reset_osd %p osd%d\n", osd, osd->o_osd); | ||
425 | if (list_empty(&osd->o_requests)) { | ||
426 | __remove_osd(osdc, osd); | ||
427 | } else if (memcmp(&osdc->osdmap->osd_addr[osd->o_osd], | ||
428 | &osd->o_con.peer_addr, | ||
429 | sizeof(osd->o_con.peer_addr)) == 0 && | ||
430 | !ceph_con_opened(&osd->o_con)) { | ||
431 | dout(" osd addr hasn't changed and connection never opened," | ||
432 | " letting msgr retry"); | ||
433 | /* touch each r_stamp for handle_timeout()'s benfit */ | ||
434 | list_for_each_entry(req, &osd->o_requests, r_osd_item) | ||
435 | req->r_stamp = jiffies; | ||
436 | ret = -EAGAIN; | ||
437 | } else { | ||
438 | ceph_con_close(&osd->o_con); | ||
439 | ceph_con_open(&osd->o_con, &osdc->osdmap->osd_addr[osd->o_osd]); | ||
440 | osd->o_incarnation++; | ||
441 | } | ||
442 | return ret; | ||
443 | } | ||
444 | |||
445 | static void __insert_osd(struct ceph_osd_client *osdc, struct ceph_osd *new) | ||
446 | { | ||
447 | struct rb_node **p = &osdc->osds.rb_node; | ||
448 | struct rb_node *parent = NULL; | ||
449 | struct ceph_osd *osd = NULL; | ||
450 | |||
451 | while (*p) { | ||
452 | parent = *p; | ||
453 | osd = rb_entry(parent, struct ceph_osd, o_node); | ||
454 | if (new->o_osd < osd->o_osd) | ||
455 | p = &(*p)->rb_left; | ||
456 | else if (new->o_osd > osd->o_osd) | ||
457 | p = &(*p)->rb_right; | ||
458 | else | ||
459 | BUG(); | ||
460 | } | ||
461 | |||
462 | rb_link_node(&new->o_node, parent, p); | ||
463 | rb_insert_color(&new->o_node, &osdc->osds); | ||
464 | } | ||
465 | |||
466 | static struct ceph_osd *__lookup_osd(struct ceph_osd_client *osdc, int o) | ||
467 | { | ||
468 | struct ceph_osd *osd; | ||
469 | struct rb_node *n = osdc->osds.rb_node; | ||
470 | |||
471 | while (n) { | ||
472 | osd = rb_entry(n, struct ceph_osd, o_node); | ||
473 | if (o < osd->o_osd) | ||
474 | n = n->rb_left; | ||
475 | else if (o > osd->o_osd) | ||
476 | n = n->rb_right; | ||
477 | else | ||
478 | return osd; | ||
479 | } | ||
480 | return NULL; | ||
481 | } | ||
482 | |||
483 | static void __schedule_osd_timeout(struct ceph_osd_client *osdc) | ||
484 | { | ||
485 | schedule_delayed_work(&osdc->timeout_work, | ||
486 | osdc->client->mount_args->osd_keepalive_timeout * HZ); | ||
487 | } | ||
488 | |||
489 | static void __cancel_osd_timeout(struct ceph_osd_client *osdc) | ||
490 | { | ||
491 | cancel_delayed_work(&osdc->timeout_work); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * Register request, assign tid. If this is the first request, set up | ||
496 | * the timeout event. | ||
497 | */ | ||
498 | static void register_request(struct ceph_osd_client *osdc, | ||
499 | struct ceph_osd_request *req) | ||
500 | { | ||
501 | mutex_lock(&osdc->request_mutex); | ||
502 | req->r_tid = ++osdc->last_tid; | ||
503 | req->r_request->hdr.tid = cpu_to_le64(req->r_tid); | ||
504 | INIT_LIST_HEAD(&req->r_req_lru_item); | ||
505 | |||
506 | dout("register_request %p tid %lld\n", req, req->r_tid); | ||
507 | __insert_request(osdc, req); | ||
508 | ceph_osdc_get_request(req); | ||
509 | osdc->num_requests++; | ||
510 | |||
511 | if (osdc->num_requests == 1) { | ||
512 | dout(" first request, scheduling timeout\n"); | ||
513 | __schedule_osd_timeout(osdc); | ||
514 | } | ||
515 | mutex_unlock(&osdc->request_mutex); | ||
516 | } | ||
517 | |||
518 | /* | ||
519 | * called under osdc->request_mutex | ||
520 | */ | ||
521 | static void __unregister_request(struct ceph_osd_client *osdc, | ||
522 | struct ceph_osd_request *req) | ||
523 | { | ||
524 | dout("__unregister_request %p tid %lld\n", req, req->r_tid); | ||
525 | rb_erase(&req->r_node, &osdc->requests); | ||
526 | osdc->num_requests--; | ||
527 | |||
528 | if (req->r_osd) { | ||
529 | /* make sure the original request isn't in flight. */ | ||
530 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | ||
531 | |||
532 | list_del_init(&req->r_osd_item); | ||
533 | if (list_empty(&req->r_osd->o_requests)) | ||
534 | __move_osd_to_lru(osdc, req->r_osd); | ||
535 | req->r_osd = NULL; | ||
536 | } | ||
537 | |||
538 | ceph_osdc_put_request(req); | ||
539 | |||
540 | list_del_init(&req->r_req_lru_item); | ||
541 | if (osdc->num_requests == 0) { | ||
542 | dout(" no requests, canceling timeout\n"); | ||
543 | __cancel_osd_timeout(osdc); | ||
544 | } | ||
545 | } | ||
546 | |||
547 | /* | ||
548 | * Cancel a previously queued request message | ||
549 | */ | ||
550 | static void __cancel_request(struct ceph_osd_request *req) | ||
551 | { | ||
552 | if (req->r_sent) { | ||
553 | ceph_con_revoke(&req->r_osd->o_con, req->r_request); | ||
554 | req->r_sent = 0; | ||
555 | } | ||
556 | list_del_init(&req->r_req_lru_item); | ||
557 | } | ||
558 | |||
559 | /* | ||
560 | * Pick an osd (the first 'up' osd in the pg), allocate the osd struct | ||
561 | * (as needed), and set the request r_osd appropriately. If there is | ||
562 | * no up osd, set r_osd to NULL. | ||
563 | * | ||
564 | * Return 0 if unchanged, 1 if changed, or negative on error. | ||
565 | * | ||
566 | * Caller should hold map_sem for read and request_mutex. | ||
567 | */ | ||
568 | static int __map_osds(struct ceph_osd_client *osdc, | ||
569 | struct ceph_osd_request *req) | ||
570 | { | ||
571 | struct ceph_osd_request_head *reqhead = req->r_request->front.iov_base; | ||
572 | struct ceph_pg pgid; | ||
573 | int acting[CEPH_PG_MAX_SIZE]; | ||
574 | int o = -1, num = 0; | ||
575 | int err; | ||
576 | |||
577 | dout("map_osds %p tid %lld\n", req, req->r_tid); | ||
578 | err = ceph_calc_object_layout(&reqhead->layout, req->r_oid, | ||
579 | &req->r_file_layout, osdc->osdmap); | ||
580 | if (err) | ||
581 | return err; | ||
582 | pgid = reqhead->layout.ol_pgid; | ||
583 | req->r_pgid = pgid; | ||
584 | |||
585 | err = ceph_calc_pg_acting(osdc->osdmap, pgid, acting); | ||
586 | if (err > 0) { | ||
587 | o = acting[0]; | ||
588 | num = err; | ||
589 | } | ||
590 | |||
591 | if ((req->r_osd && req->r_osd->o_osd == o && | ||
592 | req->r_sent >= req->r_osd->o_incarnation && | ||
593 | req->r_num_pg_osds == num && | ||
594 | memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) || | ||
595 | (req->r_osd == NULL && o == -1)) | ||
596 | return 0; /* no change */ | ||
597 | |||
598 | dout("map_osds tid %llu pgid %d.%x osd%d (was osd%d)\n", | ||
599 | req->r_tid, le32_to_cpu(pgid.pool), le16_to_cpu(pgid.ps), o, | ||
600 | req->r_osd ? req->r_osd->o_osd : -1); | ||
601 | |||
602 | /* record full pg acting set */ | ||
603 | memcpy(req->r_pg_osds, acting, sizeof(acting[0]) * num); | ||
604 | req->r_num_pg_osds = num; | ||
605 | |||
606 | if (req->r_osd) { | ||
607 | __cancel_request(req); | ||
608 | list_del_init(&req->r_osd_item); | ||
609 | req->r_osd = NULL; | ||
610 | } | ||
611 | |||
612 | req->r_osd = __lookup_osd(osdc, o); | ||
613 | if (!req->r_osd && o >= 0) { | ||
614 | err = -ENOMEM; | ||
615 | req->r_osd = create_osd(osdc); | ||
616 | if (!req->r_osd) | ||
617 | goto out; | ||
618 | |||
619 | dout("map_osds osd %p is osd%d\n", req->r_osd, o); | ||
620 | req->r_osd->o_osd = o; | ||
621 | req->r_osd->o_con.peer_name.num = cpu_to_le64(o); | ||
622 | __insert_osd(osdc, req->r_osd); | ||
623 | |||
624 | ceph_con_open(&req->r_osd->o_con, &osdc->osdmap->osd_addr[o]); | ||
625 | } | ||
626 | |||
627 | if (req->r_osd) { | ||
628 | __remove_osd_from_lru(req->r_osd); | ||
629 | list_add(&req->r_osd_item, &req->r_osd->o_requests); | ||
630 | } | ||
631 | err = 1; /* osd or pg changed */ | ||
632 | |||
633 | out: | ||
634 | return err; | ||
635 | } | ||
636 | |||
637 | /* | ||
638 | * caller should hold map_sem (for read) and request_mutex | ||
639 | */ | ||
640 | static int __send_request(struct ceph_osd_client *osdc, | ||
641 | struct ceph_osd_request *req) | ||
642 | { | ||
643 | struct ceph_osd_request_head *reqhead; | ||
644 | int err; | ||
645 | |||
646 | err = __map_osds(osdc, req); | ||
647 | if (err < 0) | ||
648 | return err; | ||
649 | if (req->r_osd == NULL) { | ||
650 | dout("send_request %p no up osds in pg\n", req); | ||
651 | ceph_monc_request_next_osdmap(&osdc->client->monc); | ||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | dout("send_request %p tid %llu to osd%d flags %d\n", | ||
656 | req, req->r_tid, req->r_osd->o_osd, req->r_flags); | ||
657 | |||
658 | reqhead = req->r_request->front.iov_base; | ||
659 | reqhead->osdmap_epoch = cpu_to_le32(osdc->osdmap->epoch); | ||
660 | reqhead->flags |= cpu_to_le32(req->r_flags); /* e.g., RETRY */ | ||
661 | reqhead->reassert_version = req->r_reassert_version; | ||
662 | |||
663 | req->r_stamp = jiffies; | ||
664 | list_move_tail(&req->r_req_lru_item, &osdc->req_lru); | ||
665 | |||
666 | ceph_msg_get(req->r_request); /* send consumes a ref */ | ||
667 | ceph_con_send(&req->r_osd->o_con, req->r_request); | ||
668 | req->r_sent = req->r_osd->o_incarnation; | ||
669 | return 0; | ||
670 | } | ||
671 | |||
672 | /* | ||
673 | * Timeout callback, called every N seconds when 1 or more osd | ||
674 | * requests has been active for more than N seconds. When this | ||
675 | * happens, we ping all OSDs with requests who have timed out to | ||
676 | * ensure any communications channel reset is detected. Reset the | ||
677 | * request timeouts another N seconds in the future as we go. | ||
678 | * Reschedule the timeout event another N seconds in future (unless | ||
679 | * there are no open requests). | ||
680 | */ | ||
681 | static void handle_timeout(struct work_struct *work) | ||
682 | { | ||
683 | struct ceph_osd_client *osdc = | ||
684 | container_of(work, struct ceph_osd_client, timeout_work.work); | ||
685 | struct ceph_osd_request *req, *last_req = NULL; | ||
686 | struct ceph_osd *osd; | ||
687 | unsigned long timeout = osdc->client->mount_args->osd_timeout * HZ; | ||
688 | unsigned long keepalive = | ||
689 | osdc->client->mount_args->osd_keepalive_timeout * HZ; | ||
690 | unsigned long last_stamp = 0; | ||
691 | struct rb_node *p; | ||
692 | struct list_head slow_osds; | ||
693 | |||
694 | dout("timeout\n"); | ||
695 | down_read(&osdc->map_sem); | ||
696 | |||
697 | ceph_monc_request_next_osdmap(&osdc->client->monc); | ||
698 | |||
699 | mutex_lock(&osdc->request_mutex); | ||
700 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
701 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
702 | |||
703 | if (req->r_resend) { | ||
704 | int err; | ||
705 | |||
706 | dout("osdc resending prev failed %lld\n", req->r_tid); | ||
707 | err = __send_request(osdc, req); | ||
708 | if (err) | ||
709 | dout("osdc failed again on %lld\n", req->r_tid); | ||
710 | else | ||
711 | req->r_resend = false; | ||
712 | continue; | ||
713 | } | ||
714 | } | ||
715 | |||
716 | /* | ||
717 | * reset osds that appear to be _really_ unresponsive. this | ||
718 | * is a failsafe measure.. we really shouldn't be getting to | ||
719 | * this point if the system is working properly. the monitors | ||
720 | * should mark the osd as failed and we should find out about | ||
721 | * it from an updated osd map. | ||
722 | */ | ||
723 | while (timeout && !list_empty(&osdc->req_lru)) { | ||
724 | req = list_entry(osdc->req_lru.next, struct ceph_osd_request, | ||
725 | r_req_lru_item); | ||
726 | |||
727 | if (time_before(jiffies, req->r_stamp + timeout)) | ||
728 | break; | ||
729 | |||
730 | BUG_ON(req == last_req && req->r_stamp == last_stamp); | ||
731 | last_req = req; | ||
732 | last_stamp = req->r_stamp; | ||
733 | |||
734 | osd = req->r_osd; | ||
735 | BUG_ON(!osd); | ||
736 | pr_warning(" tid %llu timed out on osd%d, will reset osd\n", | ||
737 | req->r_tid, osd->o_osd); | ||
738 | __kick_requests(osdc, osd); | ||
739 | } | ||
740 | |||
741 | /* | ||
742 | * ping osds that are a bit slow. this ensures that if there | ||
743 | * is a break in the TCP connection we will notice, and reopen | ||
744 | * a connection with that osd (from the fault callback). | ||
745 | */ | ||
746 | INIT_LIST_HEAD(&slow_osds); | ||
747 | list_for_each_entry(req, &osdc->req_lru, r_req_lru_item) { | ||
748 | if (time_before(jiffies, req->r_stamp + keepalive)) | ||
749 | break; | ||
750 | |||
751 | osd = req->r_osd; | ||
752 | BUG_ON(!osd); | ||
753 | dout(" tid %llu is slow, will send keepalive on osd%d\n", | ||
754 | req->r_tid, osd->o_osd); | ||
755 | list_move_tail(&osd->o_keepalive_item, &slow_osds); | ||
756 | } | ||
757 | while (!list_empty(&slow_osds)) { | ||
758 | osd = list_entry(slow_osds.next, struct ceph_osd, | ||
759 | o_keepalive_item); | ||
760 | list_del_init(&osd->o_keepalive_item); | ||
761 | ceph_con_keepalive(&osd->o_con); | ||
762 | } | ||
763 | |||
764 | __schedule_osd_timeout(osdc); | ||
765 | mutex_unlock(&osdc->request_mutex); | ||
766 | |||
767 | up_read(&osdc->map_sem); | ||
768 | } | ||
769 | |||
770 | static void handle_osds_timeout(struct work_struct *work) | ||
771 | { | ||
772 | struct ceph_osd_client *osdc = | ||
773 | container_of(work, struct ceph_osd_client, | ||
774 | osds_timeout_work.work); | ||
775 | unsigned long delay = | ||
776 | osdc->client->mount_args->osd_idle_ttl * HZ >> 2; | ||
777 | |||
778 | dout("osds timeout\n"); | ||
779 | down_read(&osdc->map_sem); | ||
780 | remove_old_osds(osdc, 0); | ||
781 | up_read(&osdc->map_sem); | ||
782 | |||
783 | schedule_delayed_work(&osdc->osds_timeout_work, | ||
784 | round_jiffies_relative(delay)); | ||
785 | } | ||
786 | |||
787 | /* | ||
788 | * handle osd op reply. either call the callback if it is specified, | ||
789 | * or do the completion to wake up the waiting thread. | ||
790 | */ | ||
791 | static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, | ||
792 | struct ceph_connection *con) | ||
793 | { | ||
794 | struct ceph_osd_reply_head *rhead = msg->front.iov_base; | ||
795 | struct ceph_osd_request *req; | ||
796 | u64 tid; | ||
797 | int numops, object_len, flags; | ||
798 | s32 result; | ||
799 | |||
800 | tid = le64_to_cpu(msg->hdr.tid); | ||
801 | if (msg->front.iov_len < sizeof(*rhead)) | ||
802 | goto bad; | ||
803 | numops = le32_to_cpu(rhead->num_ops); | ||
804 | object_len = le32_to_cpu(rhead->object_len); | ||
805 | result = le32_to_cpu(rhead->result); | ||
806 | if (msg->front.iov_len != sizeof(*rhead) + object_len + | ||
807 | numops * sizeof(struct ceph_osd_op)) | ||
808 | goto bad; | ||
809 | dout("handle_reply %p tid %llu result %d\n", msg, tid, (int)result); | ||
810 | |||
811 | /* lookup */ | ||
812 | mutex_lock(&osdc->request_mutex); | ||
813 | req = __lookup_request(osdc, tid); | ||
814 | if (req == NULL) { | ||
815 | dout("handle_reply tid %llu dne\n", tid); | ||
816 | mutex_unlock(&osdc->request_mutex); | ||
817 | return; | ||
818 | } | ||
819 | ceph_osdc_get_request(req); | ||
820 | flags = le32_to_cpu(rhead->flags); | ||
821 | |||
822 | /* | ||
823 | * if this connection filled our message, drop our reference now, to | ||
824 | * avoid a (safe but slower) revoke later. | ||
825 | */ | ||
826 | if (req->r_con_filling_msg == con && req->r_reply == msg) { | ||
827 | dout(" dropping con_filling_msg ref %p\n", con); | ||
828 | req->r_con_filling_msg = NULL; | ||
829 | ceph_con_put(con); | ||
830 | } | ||
831 | |||
832 | if (!req->r_got_reply) { | ||
833 | unsigned bytes; | ||
834 | |||
835 | req->r_result = le32_to_cpu(rhead->result); | ||
836 | bytes = le32_to_cpu(msg->hdr.data_len); | ||
837 | dout("handle_reply result %d bytes %d\n", req->r_result, | ||
838 | bytes); | ||
839 | if (req->r_result == 0) | ||
840 | req->r_result = bytes; | ||
841 | |||
842 | /* in case this is a write and we need to replay, */ | ||
843 | req->r_reassert_version = rhead->reassert_version; | ||
844 | |||
845 | req->r_got_reply = 1; | ||
846 | } else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) { | ||
847 | dout("handle_reply tid %llu dup ack\n", tid); | ||
848 | mutex_unlock(&osdc->request_mutex); | ||
849 | goto done; | ||
850 | } | ||
851 | |||
852 | dout("handle_reply tid %llu flags %d\n", tid, flags); | ||
853 | |||
854 | /* either this is a read, or we got the safe response */ | ||
855 | if (result < 0 || | ||
856 | (flags & CEPH_OSD_FLAG_ONDISK) || | ||
857 | ((flags & CEPH_OSD_FLAG_WRITE) == 0)) | ||
858 | __unregister_request(osdc, req); | ||
859 | |||
860 | mutex_unlock(&osdc->request_mutex); | ||
861 | |||
862 | if (req->r_callback) | ||
863 | req->r_callback(req, msg); | ||
864 | else | ||
865 | complete_all(&req->r_completion); | ||
866 | |||
867 | if (flags & CEPH_OSD_FLAG_ONDISK) { | ||
868 | if (req->r_safe_callback) | ||
869 | req->r_safe_callback(req, msg); | ||
870 | complete_all(&req->r_safe_completion); /* fsync waiter */ | ||
871 | } | ||
872 | |||
873 | done: | ||
874 | ceph_osdc_put_request(req); | ||
875 | return; | ||
876 | |||
877 | bad: | ||
878 | pr_err("corrupt osd_op_reply got %d %d expected %d\n", | ||
879 | (int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len), | ||
880 | (int)sizeof(*rhead)); | ||
881 | ceph_msg_dump(msg); | ||
882 | } | ||
883 | |||
884 | |||
885 | static int __kick_requests(struct ceph_osd_client *osdc, | ||
886 | struct ceph_osd *kickosd) | ||
887 | { | ||
888 | struct ceph_osd_request *req; | ||
889 | struct rb_node *p, *n; | ||
890 | int needmap = 0; | ||
891 | int err; | ||
892 | |||
893 | dout("kick_requests osd%d\n", kickosd ? kickosd->o_osd : -1); | ||
894 | if (kickosd) { | ||
895 | err = __reset_osd(osdc, kickosd); | ||
896 | if (err == -EAGAIN) | ||
897 | return 1; | ||
898 | } else { | ||
899 | for (p = rb_first(&osdc->osds); p; p = n) { | ||
900 | struct ceph_osd *osd = | ||
901 | rb_entry(p, struct ceph_osd, o_node); | ||
902 | |||
903 | n = rb_next(p); | ||
904 | if (!ceph_osd_is_up(osdc->osdmap, osd->o_osd) || | ||
905 | memcmp(&osd->o_con.peer_addr, | ||
906 | ceph_osd_addr(osdc->osdmap, | ||
907 | osd->o_osd), | ||
908 | sizeof(struct ceph_entity_addr)) != 0) | ||
909 | __reset_osd(osdc, osd); | ||
910 | } | ||
911 | } | ||
912 | |||
913 | for (p = rb_first(&osdc->requests); p; p = rb_next(p)) { | ||
914 | req = rb_entry(p, struct ceph_osd_request, r_node); | ||
915 | |||
916 | if (req->r_resend) { | ||
917 | dout(" r_resend set on tid %llu\n", req->r_tid); | ||
918 | __cancel_request(req); | ||
919 | goto kick; | ||
920 | } | ||
921 | if (req->r_osd && kickosd == req->r_osd) { | ||
922 | __cancel_request(req); | ||
923 | goto kick; | ||
924 | } | ||
925 | |||
926 | err = __map_osds(osdc, req); | ||
927 | if (err == 0) | ||
928 | continue; /* no change */ | ||
929 | if (err < 0) { | ||
930 | /* | ||
931 | * FIXME: really, we should set the request | ||
932 | * error and fail if this isn't a 'nofail' | ||
933 | * request, but that's a fair bit more | ||
934 | * complicated to do. So retry! | ||
935 | */ | ||
936 | dout(" setting r_resend on %llu\n", req->r_tid); | ||
937 | req->r_resend = true; | ||
938 | continue; | ||
939 | } | ||
940 | if (req->r_osd == NULL) { | ||
941 | dout("tid %llu maps to no valid osd\n", req->r_tid); | ||
942 | needmap++; /* request a newer map */ | ||
943 | continue; | ||
944 | } | ||
945 | |||
946 | kick: | ||
947 | dout("kicking %p tid %llu osd%d\n", req, req->r_tid, | ||
948 | req->r_osd ? req->r_osd->o_osd : -1); | ||
949 | req->r_flags |= CEPH_OSD_FLAG_RETRY; | ||
950 | err = __send_request(osdc, req); | ||
951 | if (err) { | ||
952 | dout(" setting r_resend on %llu\n", req->r_tid); | ||
953 | req->r_resend = true; | ||
954 | } | ||
955 | } | ||
956 | |||
957 | return needmap; | ||
958 | } | ||
959 | |||
960 | /* | ||
961 | * Resubmit osd requests whose osd or osd address has changed. Request | ||
962 | * a new osd map if osds are down, or we are otherwise unable to determine | ||
963 | * how to direct a request. | ||
964 | * | ||
965 | * Close connections to down osds. | ||
966 | * | ||
967 | * If @who is specified, resubmit requests for that specific osd. | ||
968 | * | ||
969 | * Caller should hold map_sem for read and request_mutex. | ||
970 | */ | ||
971 | static void kick_requests(struct ceph_osd_client *osdc, | ||
972 | struct ceph_osd *kickosd) | ||
973 | { | ||
974 | int needmap; | ||
975 | |||
976 | mutex_lock(&osdc->request_mutex); | ||
977 | needmap = __kick_requests(osdc, kickosd); | ||
978 | mutex_unlock(&osdc->request_mutex); | ||
979 | |||
980 | if (needmap) { | ||
981 | dout("%d requests for down osds, need new map\n", needmap); | ||
982 | ceph_monc_request_next_osdmap(&osdc->client->monc); | ||
983 | } | ||
984 | |||
985 | } | ||
986 | /* | ||
987 | * Process updated osd map. | ||
988 | * | ||
989 | * The message contains any number of incremental and full maps, normally | ||
990 | * indicating some sort of topology change in the cluster. Kick requests | ||
991 | * off to different OSDs as needed. | ||
992 | */ | ||
993 | void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) | ||
994 | { | ||
995 | void *p, *end, *next; | ||
996 | u32 nr_maps, maplen; | ||
997 | u32 epoch; | ||
998 | struct ceph_osdmap *newmap = NULL, *oldmap; | ||
999 | int err; | ||
1000 | struct ceph_fsid fsid; | ||
1001 | |||
1002 | dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0); | ||
1003 | p = msg->front.iov_base; | ||
1004 | end = p + msg->front.iov_len; | ||
1005 | |||
1006 | /* verify fsid */ | ||
1007 | ceph_decode_need(&p, end, sizeof(fsid), bad); | ||
1008 | ceph_decode_copy(&p, &fsid, sizeof(fsid)); | ||
1009 | if (ceph_check_fsid(osdc->client, &fsid) < 0) | ||
1010 | return; | ||
1011 | |||
1012 | down_write(&osdc->map_sem); | ||
1013 | |||
1014 | /* incremental maps */ | ||
1015 | ceph_decode_32_safe(&p, end, nr_maps, bad); | ||
1016 | dout(" %d inc maps\n", nr_maps); | ||
1017 | while (nr_maps > 0) { | ||
1018 | ceph_decode_need(&p, end, 2*sizeof(u32), bad); | ||
1019 | epoch = ceph_decode_32(&p); | ||
1020 | maplen = ceph_decode_32(&p); | ||
1021 | ceph_decode_need(&p, end, maplen, bad); | ||
1022 | next = p + maplen; | ||
1023 | if (osdc->osdmap && osdc->osdmap->epoch+1 == epoch) { | ||
1024 | dout("applying incremental map %u len %d\n", | ||
1025 | epoch, maplen); | ||
1026 | newmap = osdmap_apply_incremental(&p, next, | ||
1027 | osdc->osdmap, | ||
1028 | osdc->client->msgr); | ||
1029 | if (IS_ERR(newmap)) { | ||
1030 | err = PTR_ERR(newmap); | ||
1031 | goto bad; | ||
1032 | } | ||
1033 | BUG_ON(!newmap); | ||
1034 | if (newmap != osdc->osdmap) { | ||
1035 | ceph_osdmap_destroy(osdc->osdmap); | ||
1036 | osdc->osdmap = newmap; | ||
1037 | } | ||
1038 | } else { | ||
1039 | dout("ignoring incremental map %u len %d\n", | ||
1040 | epoch, maplen); | ||
1041 | } | ||
1042 | p = next; | ||
1043 | nr_maps--; | ||
1044 | } | ||
1045 | if (newmap) | ||
1046 | goto done; | ||
1047 | |||
1048 | /* full maps */ | ||
1049 | ceph_decode_32_safe(&p, end, nr_maps, bad); | ||
1050 | dout(" %d full maps\n", nr_maps); | ||
1051 | while (nr_maps) { | ||
1052 | ceph_decode_need(&p, end, 2*sizeof(u32), bad); | ||
1053 | epoch = ceph_decode_32(&p); | ||
1054 | maplen = ceph_decode_32(&p); | ||
1055 | ceph_decode_need(&p, end, maplen, bad); | ||
1056 | if (nr_maps > 1) { | ||
1057 | dout("skipping non-latest full map %u len %d\n", | ||
1058 | epoch, maplen); | ||
1059 | } else if (osdc->osdmap && osdc->osdmap->epoch >= epoch) { | ||
1060 | dout("skipping full map %u len %d, " | ||
1061 | "older than our %u\n", epoch, maplen, | ||
1062 | osdc->osdmap->epoch); | ||
1063 | } else { | ||
1064 | dout("taking full map %u len %d\n", epoch, maplen); | ||
1065 | newmap = osdmap_decode(&p, p+maplen); | ||
1066 | if (IS_ERR(newmap)) { | ||
1067 | err = PTR_ERR(newmap); | ||
1068 | goto bad; | ||
1069 | } | ||
1070 | BUG_ON(!newmap); | ||
1071 | oldmap = osdc->osdmap; | ||
1072 | osdc->osdmap = newmap; | ||
1073 | if (oldmap) | ||
1074 | ceph_osdmap_destroy(oldmap); | ||
1075 | } | ||
1076 | p += maplen; | ||
1077 | nr_maps--; | ||
1078 | } | ||
1079 | |||
1080 | done: | ||
1081 | downgrade_write(&osdc->map_sem); | ||
1082 | ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); | ||
1083 | if (newmap) | ||
1084 | kick_requests(osdc, NULL); | ||
1085 | up_read(&osdc->map_sem); | ||
1086 | wake_up_all(&osdc->client->auth_wq); | ||
1087 | return; | ||
1088 | |||
1089 | bad: | ||
1090 | pr_err("osdc handle_map corrupt msg\n"); | ||
1091 | ceph_msg_dump(msg); | ||
1092 | up_write(&osdc->map_sem); | ||
1093 | return; | ||
1094 | } | ||
1095 | |||
1096 | /* | ||
1097 | * Register request, send initial attempt. | ||
1098 | */ | ||
1099 | int ceph_osdc_start_request(struct ceph_osd_client *osdc, | ||
1100 | struct ceph_osd_request *req, | ||
1101 | bool nofail) | ||
1102 | { | ||
1103 | int rc = 0; | ||
1104 | |||
1105 | req->r_request->pages = req->r_pages; | ||
1106 | req->r_request->nr_pages = req->r_num_pages; | ||
1107 | |||
1108 | register_request(osdc, req); | ||
1109 | |||
1110 | down_read(&osdc->map_sem); | ||
1111 | mutex_lock(&osdc->request_mutex); | ||
1112 | /* | ||
1113 | * a racing kick_requests() may have sent the message for us | ||
1114 | * while we dropped request_mutex above, so only send now if | ||
1115 | * the request still han't been touched yet. | ||
1116 | */ | ||
1117 | if (req->r_sent == 0) { | ||
1118 | rc = __send_request(osdc, req); | ||
1119 | if (rc) { | ||
1120 | if (nofail) { | ||
1121 | dout("osdc_start_request failed send, " | ||
1122 | " marking %lld\n", req->r_tid); | ||
1123 | req->r_resend = true; | ||
1124 | rc = 0; | ||
1125 | } else { | ||
1126 | __unregister_request(osdc, req); | ||
1127 | } | ||
1128 | } | ||
1129 | } | ||
1130 | mutex_unlock(&osdc->request_mutex); | ||
1131 | up_read(&osdc->map_sem); | ||
1132 | return rc; | ||
1133 | } | ||
1134 | |||
1135 | /* | ||
1136 | * wait for a request to complete | ||
1137 | */ | ||
1138 | int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | ||
1139 | struct ceph_osd_request *req) | ||
1140 | { | ||
1141 | int rc; | ||
1142 | |||
1143 | rc = wait_for_completion_interruptible(&req->r_completion); | ||
1144 | if (rc < 0) { | ||
1145 | mutex_lock(&osdc->request_mutex); | ||
1146 | __cancel_request(req); | ||
1147 | __unregister_request(osdc, req); | ||
1148 | mutex_unlock(&osdc->request_mutex); | ||
1149 | dout("wait_request tid %llu canceled/timed out\n", req->r_tid); | ||
1150 | return rc; | ||
1151 | } | ||
1152 | |||
1153 | dout("wait_request tid %llu result %d\n", req->r_tid, req->r_result); | ||
1154 | return req->r_result; | ||
1155 | } | ||
1156 | |||
1157 | /* | ||
1158 | * sync - wait for all in-flight requests to flush. avoid starvation. | ||
1159 | */ | ||
1160 | void ceph_osdc_sync(struct ceph_osd_client *osdc) | ||
1161 | { | ||
1162 | struct ceph_osd_request *req; | ||
1163 | u64 last_tid, next_tid = 0; | ||
1164 | |||
1165 | mutex_lock(&osdc->request_mutex); | ||
1166 | last_tid = osdc->last_tid; | ||
1167 | while (1) { | ||
1168 | req = __lookup_request_ge(osdc, next_tid); | ||
1169 | if (!req) | ||
1170 | break; | ||
1171 | if (req->r_tid > last_tid) | ||
1172 | break; | ||
1173 | |||
1174 | next_tid = req->r_tid + 1; | ||
1175 | if ((req->r_flags & CEPH_OSD_FLAG_WRITE) == 0) | ||
1176 | continue; | ||
1177 | |||
1178 | ceph_osdc_get_request(req); | ||
1179 | mutex_unlock(&osdc->request_mutex); | ||
1180 | dout("sync waiting on tid %llu (last is %llu)\n", | ||
1181 | req->r_tid, last_tid); | ||
1182 | wait_for_completion(&req->r_safe_completion); | ||
1183 | mutex_lock(&osdc->request_mutex); | ||
1184 | ceph_osdc_put_request(req); | ||
1185 | } | ||
1186 | mutex_unlock(&osdc->request_mutex); | ||
1187 | dout("sync done (thru tid %llu)\n", last_tid); | ||
1188 | } | ||
1189 | |||
1190 | /* | ||
1191 | * init, shutdown | ||
1192 | */ | ||
1193 | int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) | ||
1194 | { | ||
1195 | int err; | ||
1196 | |||
1197 | dout("init\n"); | ||
1198 | osdc->client = client; | ||
1199 | osdc->osdmap = NULL; | ||
1200 | init_rwsem(&osdc->map_sem); | ||
1201 | init_completion(&osdc->map_waiters); | ||
1202 | osdc->last_requested_map = 0; | ||
1203 | mutex_init(&osdc->request_mutex); | ||
1204 | osdc->last_tid = 0; | ||
1205 | osdc->osds = RB_ROOT; | ||
1206 | INIT_LIST_HEAD(&osdc->osd_lru); | ||
1207 | osdc->requests = RB_ROOT; | ||
1208 | INIT_LIST_HEAD(&osdc->req_lru); | ||
1209 | osdc->num_requests = 0; | ||
1210 | INIT_DELAYED_WORK(&osdc->timeout_work, handle_timeout); | ||
1211 | INIT_DELAYED_WORK(&osdc->osds_timeout_work, handle_osds_timeout); | ||
1212 | |||
1213 | schedule_delayed_work(&osdc->osds_timeout_work, | ||
1214 | round_jiffies_relative(osdc->client->mount_args->osd_idle_ttl * HZ)); | ||
1215 | |||
1216 | err = -ENOMEM; | ||
1217 | osdc->req_mempool = mempool_create_kmalloc_pool(10, | ||
1218 | sizeof(struct ceph_osd_request)); | ||
1219 | if (!osdc->req_mempool) | ||
1220 | goto out; | ||
1221 | |||
1222 | err = ceph_msgpool_init(&osdc->msgpool_op, OSD_OP_FRONT_LEN, 10, true, | ||
1223 | "osd_op"); | ||
1224 | if (err < 0) | ||
1225 | goto out_mempool; | ||
1226 | err = ceph_msgpool_init(&osdc->msgpool_op_reply, | ||
1227 | OSD_OPREPLY_FRONT_LEN, 10, true, | ||
1228 | "osd_op_reply"); | ||
1229 | if (err < 0) | ||
1230 | goto out_msgpool; | ||
1231 | return 0; | ||
1232 | |||
1233 | out_msgpool: | ||
1234 | ceph_msgpool_destroy(&osdc->msgpool_op); | ||
1235 | out_mempool: | ||
1236 | mempool_destroy(osdc->req_mempool); | ||
1237 | out: | ||
1238 | return err; | ||
1239 | } | ||
1240 | |||
1241 | void ceph_osdc_stop(struct ceph_osd_client *osdc) | ||
1242 | { | ||
1243 | cancel_delayed_work_sync(&osdc->timeout_work); | ||
1244 | cancel_delayed_work_sync(&osdc->osds_timeout_work); | ||
1245 | if (osdc->osdmap) { | ||
1246 | ceph_osdmap_destroy(osdc->osdmap); | ||
1247 | osdc->osdmap = NULL; | ||
1248 | } | ||
1249 | remove_old_osds(osdc, 1); | ||
1250 | mempool_destroy(osdc->req_mempool); | ||
1251 | ceph_msgpool_destroy(&osdc->msgpool_op); | ||
1252 | ceph_msgpool_destroy(&osdc->msgpool_op_reply); | ||
1253 | } | ||
1254 | |||
1255 | /* | ||
1256 | * Read some contiguous pages. If we cross a stripe boundary, shorten | ||
1257 | * *plen. Return number of bytes read, or error. | ||
1258 | */ | ||
1259 | int ceph_osdc_readpages(struct ceph_osd_client *osdc, | ||
1260 | struct ceph_vino vino, struct ceph_file_layout *layout, | ||
1261 | u64 off, u64 *plen, | ||
1262 | u32 truncate_seq, u64 truncate_size, | ||
1263 | struct page **pages, int num_pages) | ||
1264 | { | ||
1265 | struct ceph_osd_request *req; | ||
1266 | int rc = 0; | ||
1267 | |||
1268 | dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino, | ||
1269 | vino.snap, off, *plen); | ||
1270 | req = ceph_osdc_new_request(osdc, layout, vino, off, plen, | ||
1271 | CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, | ||
1272 | NULL, 0, truncate_seq, truncate_size, NULL, | ||
1273 | false, 1); | ||
1274 | if (!req) | ||
1275 | return -ENOMEM; | ||
1276 | |||
1277 | /* it may be a short read due to an object boundary */ | ||
1278 | req->r_pages = pages; | ||
1279 | |||
1280 | dout("readpages final extent is %llu~%llu (%d pages)\n", | ||
1281 | off, *plen, req->r_num_pages); | ||
1282 | |||
1283 | rc = ceph_osdc_start_request(osdc, req, false); | ||
1284 | if (!rc) | ||
1285 | rc = ceph_osdc_wait_request(osdc, req); | ||
1286 | |||
1287 | ceph_osdc_put_request(req); | ||
1288 | dout("readpages result %d\n", rc); | ||
1289 | return rc; | ||
1290 | } | ||
1291 | |||
1292 | /* | ||
1293 | * do a synchronous write on N pages | ||
1294 | */ | ||
1295 | int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino, | ||
1296 | struct ceph_file_layout *layout, | ||
1297 | struct ceph_snap_context *snapc, | ||
1298 | u64 off, u64 len, | ||
1299 | u32 truncate_seq, u64 truncate_size, | ||
1300 | struct timespec *mtime, | ||
1301 | struct page **pages, int num_pages, | ||
1302 | int flags, int do_sync, bool nofail) | ||
1303 | { | ||
1304 | struct ceph_osd_request *req; | ||
1305 | int rc = 0; | ||
1306 | |||
1307 | BUG_ON(vino.snap != CEPH_NOSNAP); | ||
1308 | req = ceph_osdc_new_request(osdc, layout, vino, off, &len, | ||
1309 | CEPH_OSD_OP_WRITE, | ||
1310 | flags | CEPH_OSD_FLAG_ONDISK | | ||
1311 | CEPH_OSD_FLAG_WRITE, | ||
1312 | snapc, do_sync, | ||
1313 | truncate_seq, truncate_size, mtime, | ||
1314 | nofail, 1); | ||
1315 | if (!req) | ||
1316 | return -ENOMEM; | ||
1317 | |||
1318 | /* it may be a short write due to an object boundary */ | ||
1319 | req->r_pages = pages; | ||
1320 | dout("writepages %llu~%llu (%d pages)\n", off, len, | ||
1321 | req->r_num_pages); | ||
1322 | |||
1323 | rc = ceph_osdc_start_request(osdc, req, nofail); | ||
1324 | if (!rc) | ||
1325 | rc = ceph_osdc_wait_request(osdc, req); | ||
1326 | |||
1327 | ceph_osdc_put_request(req); | ||
1328 | if (rc == 0) | ||
1329 | rc = len; | ||
1330 | dout("writepages result %d\n", rc); | ||
1331 | return rc; | ||
1332 | } | ||
1333 | |||
1334 | /* | ||
1335 | * handle incoming message | ||
1336 | */ | ||
1337 | static void dispatch(struct ceph_connection *con, struct ceph_msg *msg) | ||
1338 | { | ||
1339 | struct ceph_osd *osd = con->private; | ||
1340 | struct ceph_osd_client *osdc; | ||
1341 | int type = le16_to_cpu(msg->hdr.type); | ||
1342 | |||
1343 | if (!osd) | ||
1344 | goto out; | ||
1345 | osdc = osd->o_osdc; | ||
1346 | |||
1347 | switch (type) { | ||
1348 | case CEPH_MSG_OSD_MAP: | ||
1349 | ceph_osdc_handle_map(osdc, msg); | ||
1350 | break; | ||
1351 | case CEPH_MSG_OSD_OPREPLY: | ||
1352 | handle_reply(osdc, msg, con); | ||
1353 | break; | ||
1354 | |||
1355 | default: | ||
1356 | pr_err("received unknown message type %d %s\n", type, | ||
1357 | ceph_msg_type_name(type)); | ||
1358 | } | ||
1359 | out: | ||
1360 | ceph_msg_put(msg); | ||
1361 | } | ||
1362 | |||
1363 | /* | ||
1364 | * lookup and return message for incoming reply. set up reply message | ||
1365 | * pages. | ||
1366 | */ | ||
1367 | static struct ceph_msg *get_reply(struct ceph_connection *con, | ||
1368 | struct ceph_msg_header *hdr, | ||
1369 | int *skip) | ||
1370 | { | ||
1371 | struct ceph_osd *osd = con->private; | ||
1372 | struct ceph_osd_client *osdc = osd->o_osdc; | ||
1373 | struct ceph_msg *m; | ||
1374 | struct ceph_osd_request *req; | ||
1375 | int front = le32_to_cpu(hdr->front_len); | ||
1376 | int data_len = le32_to_cpu(hdr->data_len); | ||
1377 | u64 tid; | ||
1378 | |||
1379 | tid = le64_to_cpu(hdr->tid); | ||
1380 | mutex_lock(&osdc->request_mutex); | ||
1381 | req = __lookup_request(osdc, tid); | ||
1382 | if (!req) { | ||
1383 | *skip = 1; | ||
1384 | m = NULL; | ||
1385 | pr_info("get_reply unknown tid %llu from osd%d\n", tid, | ||
1386 | osd->o_osd); | ||
1387 | goto out; | ||
1388 | } | ||
1389 | |||
1390 | if (req->r_con_filling_msg) { | ||
1391 | dout("get_reply revoking msg %p from old con %p\n", | ||
1392 | req->r_reply, req->r_con_filling_msg); | ||
1393 | ceph_con_revoke_message(req->r_con_filling_msg, req->r_reply); | ||
1394 | ceph_con_put(req->r_con_filling_msg); | ||
1395 | req->r_con_filling_msg = NULL; | ||
1396 | } | ||
1397 | |||
1398 | if (front > req->r_reply->front.iov_len) { | ||
1399 | pr_warning("get_reply front %d > preallocated %d\n", | ||
1400 | front, (int)req->r_reply->front.iov_len); | ||
1401 | m = ceph_msg_new(CEPH_MSG_OSD_OPREPLY, front, GFP_NOFS); | ||
1402 | if (!m) | ||
1403 | goto out; | ||
1404 | ceph_msg_put(req->r_reply); | ||
1405 | req->r_reply = m; | ||
1406 | } | ||
1407 | m = ceph_msg_get(req->r_reply); | ||
1408 | |||
1409 | if (data_len > 0) { | ||
1410 | unsigned data_off = le16_to_cpu(hdr->data_off); | ||
1411 | int want = calc_pages_for(data_off & ~PAGE_MASK, data_len); | ||
1412 | |||
1413 | if (unlikely(req->r_num_pages < want)) { | ||
1414 | pr_warning("tid %lld reply %d > expected %d pages\n", | ||
1415 | tid, want, m->nr_pages); | ||
1416 | *skip = 1; | ||
1417 | ceph_msg_put(m); | ||
1418 | m = NULL; | ||
1419 | goto out; | ||
1420 | } | ||
1421 | m->pages = req->r_pages; | ||
1422 | m->nr_pages = req->r_num_pages; | ||
1423 | } | ||
1424 | *skip = 0; | ||
1425 | req->r_con_filling_msg = ceph_con_get(con); | ||
1426 | dout("get_reply tid %lld %p\n", tid, m); | ||
1427 | |||
1428 | out: | ||
1429 | mutex_unlock(&osdc->request_mutex); | ||
1430 | return m; | ||
1431 | |||
1432 | } | ||
1433 | |||
1434 | static struct ceph_msg *alloc_msg(struct ceph_connection *con, | ||
1435 | struct ceph_msg_header *hdr, | ||
1436 | int *skip) | ||
1437 | { | ||
1438 | struct ceph_osd *osd = con->private; | ||
1439 | int type = le16_to_cpu(hdr->type); | ||
1440 | int front = le32_to_cpu(hdr->front_len); | ||
1441 | |||
1442 | switch (type) { | ||
1443 | case CEPH_MSG_OSD_MAP: | ||
1444 | return ceph_msg_new(type, front, GFP_NOFS); | ||
1445 | case CEPH_MSG_OSD_OPREPLY: | ||
1446 | return get_reply(con, hdr, skip); | ||
1447 | default: | ||
1448 | pr_info("alloc_msg unexpected msg type %d from osd%d\n", type, | ||
1449 | osd->o_osd); | ||
1450 | *skip = 1; | ||
1451 | return NULL; | ||
1452 | } | ||
1453 | } | ||
1454 | |||
1455 | /* | ||
1456 | * Wrappers to refcount containing ceph_osd struct | ||
1457 | */ | ||
1458 | static struct ceph_connection *get_osd_con(struct ceph_connection *con) | ||
1459 | { | ||
1460 | struct ceph_osd *osd = con->private; | ||
1461 | if (get_osd(osd)) | ||
1462 | return con; | ||
1463 | return NULL; | ||
1464 | } | ||
1465 | |||
1466 | static void put_osd_con(struct ceph_connection *con) | ||
1467 | { | ||
1468 | struct ceph_osd *osd = con->private; | ||
1469 | put_osd(osd); | ||
1470 | } | ||
1471 | |||
1472 | /* | ||
1473 | * authentication | ||
1474 | */ | ||
1475 | static int get_authorizer(struct ceph_connection *con, | ||
1476 | void **buf, int *len, int *proto, | ||
1477 | void **reply_buf, int *reply_len, int force_new) | ||
1478 | { | ||
1479 | struct ceph_osd *o = con->private; | ||
1480 | struct ceph_osd_client *osdc = o->o_osdc; | ||
1481 | struct ceph_auth_client *ac = osdc->client->monc.auth; | ||
1482 | int ret = 0; | ||
1483 | |||
1484 | if (force_new && o->o_authorizer) { | ||
1485 | ac->ops->destroy_authorizer(ac, o->o_authorizer); | ||
1486 | o->o_authorizer = NULL; | ||
1487 | } | ||
1488 | if (o->o_authorizer == NULL) { | ||
1489 | ret = ac->ops->create_authorizer( | ||
1490 | ac, CEPH_ENTITY_TYPE_OSD, | ||
1491 | &o->o_authorizer, | ||
1492 | &o->o_authorizer_buf, | ||
1493 | &o->o_authorizer_buf_len, | ||
1494 | &o->o_authorizer_reply_buf, | ||
1495 | &o->o_authorizer_reply_buf_len); | ||
1496 | if (ret) | ||
1497 | return ret; | ||
1498 | } | ||
1499 | |||
1500 | *proto = ac->protocol; | ||
1501 | *buf = o->o_authorizer_buf; | ||
1502 | *len = o->o_authorizer_buf_len; | ||
1503 | *reply_buf = o->o_authorizer_reply_buf; | ||
1504 | *reply_len = o->o_authorizer_reply_buf_len; | ||
1505 | return 0; | ||
1506 | } | ||
1507 | |||
1508 | |||
1509 | static int verify_authorizer_reply(struct ceph_connection *con, int len) | ||
1510 | { | ||
1511 | struct ceph_osd *o = con->private; | ||
1512 | struct ceph_osd_client *osdc = o->o_osdc; | ||
1513 | struct ceph_auth_client *ac = osdc->client->monc.auth; | ||
1514 | |||
1515 | return ac->ops->verify_authorizer_reply(ac, o->o_authorizer, len); | ||
1516 | } | ||
1517 | |||
1518 | static int invalidate_authorizer(struct ceph_connection *con) | ||
1519 | { | ||
1520 | struct ceph_osd *o = con->private; | ||
1521 | struct ceph_osd_client *osdc = o->o_osdc; | ||
1522 | struct ceph_auth_client *ac = osdc->client->monc.auth; | ||
1523 | |||
1524 | if (ac->ops->invalidate_authorizer) | ||
1525 | ac->ops->invalidate_authorizer(ac, CEPH_ENTITY_TYPE_OSD); | ||
1526 | |||
1527 | return ceph_monc_validate_auth(&osdc->client->monc); | ||
1528 | } | ||
1529 | |||
1530 | static const struct ceph_connection_operations osd_con_ops = { | ||
1531 | .get = get_osd_con, | ||
1532 | .put = put_osd_con, | ||
1533 | .dispatch = dispatch, | ||
1534 | .get_authorizer = get_authorizer, | ||
1535 | .verify_authorizer_reply = verify_authorizer_reply, | ||
1536 | .invalidate_authorizer = invalidate_authorizer, | ||
1537 | .alloc_msg = alloc_msg, | ||
1538 | .fault = osd_reset, | ||
1539 | }; | ||
diff --git a/fs/ceph/osd_client.h b/fs/ceph/osd_client.h deleted file mode 100644 index ce776989ef6a..000000000000 --- a/fs/ceph/osd_client.h +++ /dev/null | |||
@@ -1,167 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_OSD_CLIENT_H | ||
2 | #define _FS_CEPH_OSD_CLIENT_H | ||
3 | |||
4 | #include <linux/completion.h> | ||
5 | #include <linux/kref.h> | ||
6 | #include <linux/mempool.h> | ||
7 | #include <linux/rbtree.h> | ||
8 | |||
9 | #include "types.h" | ||
10 | #include "osdmap.h" | ||
11 | #include "messenger.h" | ||
12 | |||
13 | struct ceph_msg; | ||
14 | struct ceph_snap_context; | ||
15 | struct ceph_osd_request; | ||
16 | struct ceph_osd_client; | ||
17 | struct ceph_authorizer; | ||
18 | |||
19 | /* | ||
20 | * completion callback for async writepages | ||
21 | */ | ||
22 | typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *, | ||
23 | struct ceph_msg *); | ||
24 | |||
25 | /* a given osd we're communicating with */ | ||
26 | struct ceph_osd { | ||
27 | atomic_t o_ref; | ||
28 | struct ceph_osd_client *o_osdc; | ||
29 | int o_osd; | ||
30 | int o_incarnation; | ||
31 | struct rb_node o_node; | ||
32 | struct ceph_connection o_con; | ||
33 | struct list_head o_requests; | ||
34 | struct list_head o_osd_lru; | ||
35 | struct ceph_authorizer *o_authorizer; | ||
36 | void *o_authorizer_buf, *o_authorizer_reply_buf; | ||
37 | size_t o_authorizer_buf_len, o_authorizer_reply_buf_len; | ||
38 | unsigned long lru_ttl; | ||
39 | int o_marked_for_keepalive; | ||
40 | struct list_head o_keepalive_item; | ||
41 | }; | ||
42 | |||
43 | /* an in-flight request */ | ||
44 | struct ceph_osd_request { | ||
45 | u64 r_tid; /* unique for this client */ | ||
46 | struct rb_node r_node; | ||
47 | struct list_head r_req_lru_item; | ||
48 | struct list_head r_osd_item; | ||
49 | struct ceph_osd *r_osd; | ||
50 | struct ceph_pg r_pgid; | ||
51 | int r_pg_osds[CEPH_PG_MAX_SIZE]; | ||
52 | int r_num_pg_osds; | ||
53 | |||
54 | struct ceph_connection *r_con_filling_msg; | ||
55 | |||
56 | struct ceph_msg *r_request, *r_reply; | ||
57 | int r_result; | ||
58 | int r_flags; /* any additional flags for the osd */ | ||
59 | u32 r_sent; /* >0 if r_request is sending/sent */ | ||
60 | int r_got_reply; | ||
61 | |||
62 | struct ceph_osd_client *r_osdc; | ||
63 | struct kref r_kref; | ||
64 | bool r_mempool; | ||
65 | struct completion r_completion, r_safe_completion; | ||
66 | ceph_osdc_callback_t r_callback, r_safe_callback; | ||
67 | struct ceph_eversion r_reassert_version; | ||
68 | struct list_head r_unsafe_item; | ||
69 | |||
70 | struct inode *r_inode; /* for use by callbacks */ | ||
71 | |||
72 | char r_oid[40]; /* object name */ | ||
73 | int r_oid_len; | ||
74 | unsigned long r_stamp; /* send OR check time */ | ||
75 | bool r_resend; /* msg send failed, needs retry */ | ||
76 | |||
77 | struct ceph_file_layout r_file_layout; | ||
78 | struct ceph_snap_context *r_snapc; /* snap context for writes */ | ||
79 | unsigned r_num_pages; /* size of page array (follows) */ | ||
80 | struct page **r_pages; /* pages for data payload */ | ||
81 | int r_pages_from_pool; | ||
82 | int r_own_pages; /* if true, i own page list */ | ||
83 | }; | ||
84 | |||
85 | struct ceph_osd_client { | ||
86 | struct ceph_client *client; | ||
87 | |||
88 | struct ceph_osdmap *osdmap; /* current map */ | ||
89 | struct rw_semaphore map_sem; | ||
90 | struct completion map_waiters; | ||
91 | u64 last_requested_map; | ||
92 | |||
93 | struct mutex request_mutex; | ||
94 | struct rb_root osds; /* osds */ | ||
95 | struct list_head osd_lru; /* idle osds */ | ||
96 | u64 timeout_tid; /* tid of timeout triggering rq */ | ||
97 | u64 last_tid; /* tid of last request */ | ||
98 | struct rb_root requests; /* pending requests */ | ||
99 | struct list_head req_lru; /* pending requests lru */ | ||
100 | int num_requests; | ||
101 | struct delayed_work timeout_work; | ||
102 | struct delayed_work osds_timeout_work; | ||
103 | #ifdef CONFIG_DEBUG_FS | ||
104 | struct dentry *debugfs_file; | ||
105 | #endif | ||
106 | |||
107 | mempool_t *req_mempool; | ||
108 | |||
109 | struct ceph_msgpool msgpool_op; | ||
110 | struct ceph_msgpool msgpool_op_reply; | ||
111 | }; | ||
112 | |||
113 | extern int ceph_osdc_init(struct ceph_osd_client *osdc, | ||
114 | struct ceph_client *client); | ||
115 | extern void ceph_osdc_stop(struct ceph_osd_client *osdc); | ||
116 | |||
117 | extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, | ||
118 | struct ceph_msg *msg); | ||
119 | extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, | ||
120 | struct ceph_msg *msg); | ||
121 | |||
122 | extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *, | ||
123 | struct ceph_file_layout *layout, | ||
124 | struct ceph_vino vino, | ||
125 | u64 offset, u64 *len, int op, int flags, | ||
126 | struct ceph_snap_context *snapc, | ||
127 | int do_sync, u32 truncate_seq, | ||
128 | u64 truncate_size, | ||
129 | struct timespec *mtime, | ||
130 | bool use_mempool, int num_reply); | ||
131 | |||
132 | static inline void ceph_osdc_get_request(struct ceph_osd_request *req) | ||
133 | { | ||
134 | kref_get(&req->r_kref); | ||
135 | } | ||
136 | extern void ceph_osdc_release_request(struct kref *kref); | ||
137 | static inline void ceph_osdc_put_request(struct ceph_osd_request *req) | ||
138 | { | ||
139 | kref_put(&req->r_kref, ceph_osdc_release_request); | ||
140 | } | ||
141 | |||
142 | extern int ceph_osdc_start_request(struct ceph_osd_client *osdc, | ||
143 | struct ceph_osd_request *req, | ||
144 | bool nofail); | ||
145 | extern int ceph_osdc_wait_request(struct ceph_osd_client *osdc, | ||
146 | struct ceph_osd_request *req); | ||
147 | extern void ceph_osdc_sync(struct ceph_osd_client *osdc); | ||
148 | |||
149 | extern int ceph_osdc_readpages(struct ceph_osd_client *osdc, | ||
150 | struct ceph_vino vino, | ||
151 | struct ceph_file_layout *layout, | ||
152 | u64 off, u64 *plen, | ||
153 | u32 truncate_seq, u64 truncate_size, | ||
154 | struct page **pages, int nr_pages); | ||
155 | |||
156 | extern int ceph_osdc_writepages(struct ceph_osd_client *osdc, | ||
157 | struct ceph_vino vino, | ||
158 | struct ceph_file_layout *layout, | ||
159 | struct ceph_snap_context *sc, | ||
160 | u64 off, u64 len, | ||
161 | u32 truncate_seq, u64 truncate_size, | ||
162 | struct timespec *mtime, | ||
163 | struct page **pages, int nr_pages, | ||
164 | int flags, int do_sync, bool nofail); | ||
165 | |||
166 | #endif | ||
167 | |||
diff --git a/fs/ceph/osdmap.c b/fs/ceph/osdmap.c deleted file mode 100644 index e31f118f1392..000000000000 --- a/fs/ceph/osdmap.c +++ /dev/null | |||
@@ -1,1110 +0,0 @@ | |||
1 | |||
2 | #include "ceph_debug.h" | ||
3 | |||
4 | #include <linux/slab.h> | ||
5 | #include <asm/div64.h> | ||
6 | |||
7 | #include "super.h" | ||
8 | #include "osdmap.h" | ||
9 | #include "crush/hash.h" | ||
10 | #include "crush/mapper.h" | ||
11 | #include "decode.h" | ||
12 | |||
13 | char *ceph_osdmap_state_str(char *str, int len, int state) | ||
14 | { | ||
15 | int flag = 0; | ||
16 | |||
17 | if (!len) | ||
18 | goto done; | ||
19 | |||
20 | *str = '\0'; | ||
21 | if (state) { | ||
22 | if (state & CEPH_OSD_EXISTS) { | ||
23 | snprintf(str, len, "exists"); | ||
24 | flag = 1; | ||
25 | } | ||
26 | if (state & CEPH_OSD_UP) { | ||
27 | snprintf(str, len, "%s%s%s", str, (flag ? ", " : ""), | ||
28 | "up"); | ||
29 | flag = 1; | ||
30 | } | ||
31 | } else { | ||
32 | snprintf(str, len, "doesn't exist"); | ||
33 | } | ||
34 | done: | ||
35 | return str; | ||
36 | } | ||
37 | |||
38 | /* maps */ | ||
39 | |||
40 | static int calc_bits_of(unsigned t) | ||
41 | { | ||
42 | int b = 0; | ||
43 | while (t) { | ||
44 | t = t >> 1; | ||
45 | b++; | ||
46 | } | ||
47 | return b; | ||
48 | } | ||
49 | |||
50 | /* | ||
51 | * the foo_mask is the smallest value 2^n-1 that is >= foo. | ||
52 | */ | ||
53 | static void calc_pg_masks(struct ceph_pg_pool_info *pi) | ||
54 | { | ||
55 | pi->pg_num_mask = (1 << calc_bits_of(le32_to_cpu(pi->v.pg_num)-1)) - 1; | ||
56 | pi->pgp_num_mask = | ||
57 | (1 << calc_bits_of(le32_to_cpu(pi->v.pgp_num)-1)) - 1; | ||
58 | pi->lpg_num_mask = | ||
59 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpg_num)-1)) - 1; | ||
60 | pi->lpgp_num_mask = | ||
61 | (1 << calc_bits_of(le32_to_cpu(pi->v.lpgp_num)-1)) - 1; | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * decode crush map | ||
66 | */ | ||
67 | static int crush_decode_uniform_bucket(void **p, void *end, | ||
68 | struct crush_bucket_uniform *b) | ||
69 | { | ||
70 | dout("crush_decode_uniform_bucket %p to %p\n", *p, end); | ||
71 | ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad); | ||
72 | b->item_weight = ceph_decode_32(p); | ||
73 | return 0; | ||
74 | bad: | ||
75 | return -EINVAL; | ||
76 | } | ||
77 | |||
78 | static int crush_decode_list_bucket(void **p, void *end, | ||
79 | struct crush_bucket_list *b) | ||
80 | { | ||
81 | int j; | ||
82 | dout("crush_decode_list_bucket %p to %p\n", *p, end); | ||
83 | b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
84 | if (b->item_weights == NULL) | ||
85 | return -ENOMEM; | ||
86 | b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
87 | if (b->sum_weights == NULL) | ||
88 | return -ENOMEM; | ||
89 | ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad); | ||
90 | for (j = 0; j < b->h.size; j++) { | ||
91 | b->item_weights[j] = ceph_decode_32(p); | ||
92 | b->sum_weights[j] = ceph_decode_32(p); | ||
93 | } | ||
94 | return 0; | ||
95 | bad: | ||
96 | return -EINVAL; | ||
97 | } | ||
98 | |||
99 | static int crush_decode_tree_bucket(void **p, void *end, | ||
100 | struct crush_bucket_tree *b) | ||
101 | { | ||
102 | int j; | ||
103 | dout("crush_decode_tree_bucket %p to %p\n", *p, end); | ||
104 | ceph_decode_32_safe(p, end, b->num_nodes, bad); | ||
105 | b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS); | ||
106 | if (b->node_weights == NULL) | ||
107 | return -ENOMEM; | ||
108 | ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad); | ||
109 | for (j = 0; j < b->num_nodes; j++) | ||
110 | b->node_weights[j] = ceph_decode_32(p); | ||
111 | return 0; | ||
112 | bad: | ||
113 | return -EINVAL; | ||
114 | } | ||
115 | |||
116 | static int crush_decode_straw_bucket(void **p, void *end, | ||
117 | struct crush_bucket_straw *b) | ||
118 | { | ||
119 | int j; | ||
120 | dout("crush_decode_straw_bucket %p to %p\n", *p, end); | ||
121 | b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
122 | if (b->item_weights == NULL) | ||
123 | return -ENOMEM; | ||
124 | b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS); | ||
125 | if (b->straws == NULL) | ||
126 | return -ENOMEM; | ||
127 | ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad); | ||
128 | for (j = 0; j < b->h.size; j++) { | ||
129 | b->item_weights[j] = ceph_decode_32(p); | ||
130 | b->straws[j] = ceph_decode_32(p); | ||
131 | } | ||
132 | return 0; | ||
133 | bad: | ||
134 | return -EINVAL; | ||
135 | } | ||
136 | |||
137 | static struct crush_map *crush_decode(void *pbyval, void *end) | ||
138 | { | ||
139 | struct crush_map *c; | ||
140 | int err = -EINVAL; | ||
141 | int i, j; | ||
142 | void **p = &pbyval; | ||
143 | void *start = pbyval; | ||
144 | u32 magic; | ||
145 | |||
146 | dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | ||
147 | |||
148 | c = kzalloc(sizeof(*c), GFP_NOFS); | ||
149 | if (c == NULL) | ||
150 | return ERR_PTR(-ENOMEM); | ||
151 | |||
152 | ceph_decode_need(p, end, 4*sizeof(u32), bad); | ||
153 | magic = ceph_decode_32(p); | ||
154 | if (magic != CRUSH_MAGIC) { | ||
155 | pr_err("crush_decode magic %x != current %x\n", | ||
156 | (unsigned)magic, (unsigned)CRUSH_MAGIC); | ||
157 | goto bad; | ||
158 | } | ||
159 | c->max_buckets = ceph_decode_32(p); | ||
160 | c->max_rules = ceph_decode_32(p); | ||
161 | c->max_devices = ceph_decode_32(p); | ||
162 | |||
163 | c->device_parents = kcalloc(c->max_devices, sizeof(u32), GFP_NOFS); | ||
164 | if (c->device_parents == NULL) | ||
165 | goto badmem; | ||
166 | c->bucket_parents = kcalloc(c->max_buckets, sizeof(u32), GFP_NOFS); | ||
167 | if (c->bucket_parents == NULL) | ||
168 | goto badmem; | ||
169 | |||
170 | c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS); | ||
171 | if (c->buckets == NULL) | ||
172 | goto badmem; | ||
173 | c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS); | ||
174 | if (c->rules == NULL) | ||
175 | goto badmem; | ||
176 | |||
177 | /* buckets */ | ||
178 | for (i = 0; i < c->max_buckets; i++) { | ||
179 | int size = 0; | ||
180 | u32 alg; | ||
181 | struct crush_bucket *b; | ||
182 | |||
183 | ceph_decode_32_safe(p, end, alg, bad); | ||
184 | if (alg == 0) { | ||
185 | c->buckets[i] = NULL; | ||
186 | continue; | ||
187 | } | ||
188 | dout("crush_decode bucket %d off %x %p to %p\n", | ||
189 | i, (int)(*p-start), *p, end); | ||
190 | |||
191 | switch (alg) { | ||
192 | case CRUSH_BUCKET_UNIFORM: | ||
193 | size = sizeof(struct crush_bucket_uniform); | ||
194 | break; | ||
195 | case CRUSH_BUCKET_LIST: | ||
196 | size = sizeof(struct crush_bucket_list); | ||
197 | break; | ||
198 | case CRUSH_BUCKET_TREE: | ||
199 | size = sizeof(struct crush_bucket_tree); | ||
200 | break; | ||
201 | case CRUSH_BUCKET_STRAW: | ||
202 | size = sizeof(struct crush_bucket_straw); | ||
203 | break; | ||
204 | default: | ||
205 | err = -EINVAL; | ||
206 | goto bad; | ||
207 | } | ||
208 | BUG_ON(size == 0); | ||
209 | b = c->buckets[i] = kzalloc(size, GFP_NOFS); | ||
210 | if (b == NULL) | ||
211 | goto badmem; | ||
212 | |||
213 | ceph_decode_need(p, end, 4*sizeof(u32), bad); | ||
214 | b->id = ceph_decode_32(p); | ||
215 | b->type = ceph_decode_16(p); | ||
216 | b->alg = ceph_decode_8(p); | ||
217 | b->hash = ceph_decode_8(p); | ||
218 | b->weight = ceph_decode_32(p); | ||
219 | b->size = ceph_decode_32(p); | ||
220 | |||
221 | dout("crush_decode bucket size %d off %x %p to %p\n", | ||
222 | b->size, (int)(*p-start), *p, end); | ||
223 | |||
224 | b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS); | ||
225 | if (b->items == NULL) | ||
226 | goto badmem; | ||
227 | b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS); | ||
228 | if (b->perm == NULL) | ||
229 | goto badmem; | ||
230 | b->perm_n = 0; | ||
231 | |||
232 | ceph_decode_need(p, end, b->size*sizeof(u32), bad); | ||
233 | for (j = 0; j < b->size; j++) | ||
234 | b->items[j] = ceph_decode_32(p); | ||
235 | |||
236 | switch (b->alg) { | ||
237 | case CRUSH_BUCKET_UNIFORM: | ||
238 | err = crush_decode_uniform_bucket(p, end, | ||
239 | (struct crush_bucket_uniform *)b); | ||
240 | if (err < 0) | ||
241 | goto bad; | ||
242 | break; | ||
243 | case CRUSH_BUCKET_LIST: | ||
244 | err = crush_decode_list_bucket(p, end, | ||
245 | (struct crush_bucket_list *)b); | ||
246 | if (err < 0) | ||
247 | goto bad; | ||
248 | break; | ||
249 | case CRUSH_BUCKET_TREE: | ||
250 | err = crush_decode_tree_bucket(p, end, | ||
251 | (struct crush_bucket_tree *)b); | ||
252 | if (err < 0) | ||
253 | goto bad; | ||
254 | break; | ||
255 | case CRUSH_BUCKET_STRAW: | ||
256 | err = crush_decode_straw_bucket(p, end, | ||
257 | (struct crush_bucket_straw *)b); | ||
258 | if (err < 0) | ||
259 | goto bad; | ||
260 | break; | ||
261 | } | ||
262 | } | ||
263 | |||
264 | /* rules */ | ||
265 | dout("rule vec is %p\n", c->rules); | ||
266 | for (i = 0; i < c->max_rules; i++) { | ||
267 | u32 yes; | ||
268 | struct crush_rule *r; | ||
269 | |||
270 | ceph_decode_32_safe(p, end, yes, bad); | ||
271 | if (!yes) { | ||
272 | dout("crush_decode NO rule %d off %x %p to %p\n", | ||
273 | i, (int)(*p-start), *p, end); | ||
274 | c->rules[i] = NULL; | ||
275 | continue; | ||
276 | } | ||
277 | |||
278 | dout("crush_decode rule %d off %x %p to %p\n", | ||
279 | i, (int)(*p-start), *p, end); | ||
280 | |||
281 | /* len */ | ||
282 | ceph_decode_32_safe(p, end, yes, bad); | ||
283 | #if BITS_PER_LONG == 32 | ||
284 | err = -EINVAL; | ||
285 | if (yes > ULONG_MAX / sizeof(struct crush_rule_step)) | ||
286 | goto bad; | ||
287 | #endif | ||
288 | r = c->rules[i] = kmalloc(sizeof(*r) + | ||
289 | yes*sizeof(struct crush_rule_step), | ||
290 | GFP_NOFS); | ||
291 | if (r == NULL) | ||
292 | goto badmem; | ||
293 | dout(" rule %d is at %p\n", i, r); | ||
294 | r->len = yes; | ||
295 | ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */ | ||
296 | ceph_decode_need(p, end, r->len*3*sizeof(u32), bad); | ||
297 | for (j = 0; j < r->len; j++) { | ||
298 | r->steps[j].op = ceph_decode_32(p); | ||
299 | r->steps[j].arg1 = ceph_decode_32(p); | ||
300 | r->steps[j].arg2 = ceph_decode_32(p); | ||
301 | } | ||
302 | } | ||
303 | |||
304 | /* ignore trailing name maps. */ | ||
305 | |||
306 | dout("crush_decode success\n"); | ||
307 | return c; | ||
308 | |||
309 | badmem: | ||
310 | err = -ENOMEM; | ||
311 | bad: | ||
312 | dout("crush_decode fail %d\n", err); | ||
313 | crush_destroy(c); | ||
314 | return ERR_PTR(err); | ||
315 | } | ||
316 | |||
317 | /* | ||
318 | * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid | ||
319 | * to a set of osds) | ||
320 | */ | ||
321 | static int pgid_cmp(struct ceph_pg l, struct ceph_pg r) | ||
322 | { | ||
323 | u64 a = *(u64 *)&l; | ||
324 | u64 b = *(u64 *)&r; | ||
325 | |||
326 | if (a < b) | ||
327 | return -1; | ||
328 | if (a > b) | ||
329 | return 1; | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | static int __insert_pg_mapping(struct ceph_pg_mapping *new, | ||
334 | struct rb_root *root) | ||
335 | { | ||
336 | struct rb_node **p = &root->rb_node; | ||
337 | struct rb_node *parent = NULL; | ||
338 | struct ceph_pg_mapping *pg = NULL; | ||
339 | int c; | ||
340 | |||
341 | while (*p) { | ||
342 | parent = *p; | ||
343 | pg = rb_entry(parent, struct ceph_pg_mapping, node); | ||
344 | c = pgid_cmp(new->pgid, pg->pgid); | ||
345 | if (c < 0) | ||
346 | p = &(*p)->rb_left; | ||
347 | else if (c > 0) | ||
348 | p = &(*p)->rb_right; | ||
349 | else | ||
350 | return -EEXIST; | ||
351 | } | ||
352 | |||
353 | rb_link_node(&new->node, parent, p); | ||
354 | rb_insert_color(&new->node, root); | ||
355 | return 0; | ||
356 | } | ||
357 | |||
358 | static struct ceph_pg_mapping *__lookup_pg_mapping(struct rb_root *root, | ||
359 | struct ceph_pg pgid) | ||
360 | { | ||
361 | struct rb_node *n = root->rb_node; | ||
362 | struct ceph_pg_mapping *pg; | ||
363 | int c; | ||
364 | |||
365 | while (n) { | ||
366 | pg = rb_entry(n, struct ceph_pg_mapping, node); | ||
367 | c = pgid_cmp(pgid, pg->pgid); | ||
368 | if (c < 0) | ||
369 | n = n->rb_left; | ||
370 | else if (c > 0) | ||
371 | n = n->rb_right; | ||
372 | else | ||
373 | return pg; | ||
374 | } | ||
375 | return NULL; | ||
376 | } | ||
377 | |||
378 | /* | ||
379 | * rbtree of pg pool info | ||
380 | */ | ||
381 | static int __insert_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *new) | ||
382 | { | ||
383 | struct rb_node **p = &root->rb_node; | ||
384 | struct rb_node *parent = NULL; | ||
385 | struct ceph_pg_pool_info *pi = NULL; | ||
386 | |||
387 | while (*p) { | ||
388 | parent = *p; | ||
389 | pi = rb_entry(parent, struct ceph_pg_pool_info, node); | ||
390 | if (new->id < pi->id) | ||
391 | p = &(*p)->rb_left; | ||
392 | else if (new->id > pi->id) | ||
393 | p = &(*p)->rb_right; | ||
394 | else | ||
395 | return -EEXIST; | ||
396 | } | ||
397 | |||
398 | rb_link_node(&new->node, parent, p); | ||
399 | rb_insert_color(&new->node, root); | ||
400 | return 0; | ||
401 | } | ||
402 | |||
403 | static struct ceph_pg_pool_info *__lookup_pg_pool(struct rb_root *root, int id) | ||
404 | { | ||
405 | struct ceph_pg_pool_info *pi; | ||
406 | struct rb_node *n = root->rb_node; | ||
407 | |||
408 | while (n) { | ||
409 | pi = rb_entry(n, struct ceph_pg_pool_info, node); | ||
410 | if (id < pi->id) | ||
411 | n = n->rb_left; | ||
412 | else if (id > pi->id) | ||
413 | n = n->rb_right; | ||
414 | else | ||
415 | return pi; | ||
416 | } | ||
417 | return NULL; | ||
418 | } | ||
419 | |||
420 | static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi) | ||
421 | { | ||
422 | rb_erase(&pi->node, root); | ||
423 | kfree(pi->name); | ||
424 | kfree(pi); | ||
425 | } | ||
426 | |||
427 | static int __decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi) | ||
428 | { | ||
429 | unsigned n, m; | ||
430 | |||
431 | ceph_decode_copy(p, &pi->v, sizeof(pi->v)); | ||
432 | calc_pg_masks(pi); | ||
433 | |||
434 | /* num_snaps * snap_info_t */ | ||
435 | n = le32_to_cpu(pi->v.num_snaps); | ||
436 | while (n--) { | ||
437 | ceph_decode_need(p, end, sizeof(u64) + 1 + sizeof(u64) + | ||
438 | sizeof(struct ceph_timespec), bad); | ||
439 | *p += sizeof(u64) + /* key */ | ||
440 | 1 + sizeof(u64) + /* u8, snapid */ | ||
441 | sizeof(struct ceph_timespec); | ||
442 | m = ceph_decode_32(p); /* snap name */ | ||
443 | *p += m; | ||
444 | } | ||
445 | |||
446 | *p += le32_to_cpu(pi->v.num_removed_snap_intervals) * sizeof(u64) * 2; | ||
447 | return 0; | ||
448 | |||
449 | bad: | ||
450 | return -EINVAL; | ||
451 | } | ||
452 | |||
453 | static int __decode_pool_names(void **p, void *end, struct ceph_osdmap *map) | ||
454 | { | ||
455 | struct ceph_pg_pool_info *pi; | ||
456 | u32 num, len, pool; | ||
457 | |||
458 | ceph_decode_32_safe(p, end, num, bad); | ||
459 | dout(" %d pool names\n", num); | ||
460 | while (num--) { | ||
461 | ceph_decode_32_safe(p, end, pool, bad); | ||
462 | ceph_decode_32_safe(p, end, len, bad); | ||
463 | dout(" pool %d len %d\n", pool, len); | ||
464 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
465 | if (pi) { | ||
466 | kfree(pi->name); | ||
467 | pi->name = kmalloc(len + 1, GFP_NOFS); | ||
468 | if (pi->name) { | ||
469 | memcpy(pi->name, *p, len); | ||
470 | pi->name[len] = '\0'; | ||
471 | dout(" name is %s\n", pi->name); | ||
472 | } | ||
473 | } | ||
474 | *p += len; | ||
475 | } | ||
476 | return 0; | ||
477 | |||
478 | bad: | ||
479 | return -EINVAL; | ||
480 | } | ||
481 | |||
482 | /* | ||
483 | * osd map | ||
484 | */ | ||
485 | void ceph_osdmap_destroy(struct ceph_osdmap *map) | ||
486 | { | ||
487 | dout("osdmap_destroy %p\n", map); | ||
488 | if (map->crush) | ||
489 | crush_destroy(map->crush); | ||
490 | while (!RB_EMPTY_ROOT(&map->pg_temp)) { | ||
491 | struct ceph_pg_mapping *pg = | ||
492 | rb_entry(rb_first(&map->pg_temp), | ||
493 | struct ceph_pg_mapping, node); | ||
494 | rb_erase(&pg->node, &map->pg_temp); | ||
495 | kfree(pg); | ||
496 | } | ||
497 | while (!RB_EMPTY_ROOT(&map->pg_pools)) { | ||
498 | struct ceph_pg_pool_info *pi = | ||
499 | rb_entry(rb_first(&map->pg_pools), | ||
500 | struct ceph_pg_pool_info, node); | ||
501 | __remove_pg_pool(&map->pg_pools, pi); | ||
502 | } | ||
503 | kfree(map->osd_state); | ||
504 | kfree(map->osd_weight); | ||
505 | kfree(map->osd_addr); | ||
506 | kfree(map); | ||
507 | } | ||
508 | |||
509 | /* | ||
510 | * adjust max osd value. reallocate arrays. | ||
511 | */ | ||
512 | static int osdmap_set_max_osd(struct ceph_osdmap *map, int max) | ||
513 | { | ||
514 | u8 *state; | ||
515 | struct ceph_entity_addr *addr; | ||
516 | u32 *weight; | ||
517 | |||
518 | state = kcalloc(max, sizeof(*state), GFP_NOFS); | ||
519 | addr = kcalloc(max, sizeof(*addr), GFP_NOFS); | ||
520 | weight = kcalloc(max, sizeof(*weight), GFP_NOFS); | ||
521 | if (state == NULL || addr == NULL || weight == NULL) { | ||
522 | kfree(state); | ||
523 | kfree(addr); | ||
524 | kfree(weight); | ||
525 | return -ENOMEM; | ||
526 | } | ||
527 | |||
528 | /* copy old? */ | ||
529 | if (map->osd_state) { | ||
530 | memcpy(state, map->osd_state, map->max_osd*sizeof(*state)); | ||
531 | memcpy(addr, map->osd_addr, map->max_osd*sizeof(*addr)); | ||
532 | memcpy(weight, map->osd_weight, map->max_osd*sizeof(*weight)); | ||
533 | kfree(map->osd_state); | ||
534 | kfree(map->osd_addr); | ||
535 | kfree(map->osd_weight); | ||
536 | } | ||
537 | |||
538 | map->osd_state = state; | ||
539 | map->osd_weight = weight; | ||
540 | map->osd_addr = addr; | ||
541 | map->max_osd = max; | ||
542 | return 0; | ||
543 | } | ||
544 | |||
545 | /* | ||
546 | * decode a full map. | ||
547 | */ | ||
548 | struct ceph_osdmap *osdmap_decode(void **p, void *end) | ||
549 | { | ||
550 | struct ceph_osdmap *map; | ||
551 | u16 version; | ||
552 | u32 len, max, i; | ||
553 | u8 ev; | ||
554 | int err = -EINVAL; | ||
555 | void *start = *p; | ||
556 | struct ceph_pg_pool_info *pi; | ||
557 | |||
558 | dout("osdmap_decode %p to %p len %d\n", *p, end, (int)(end - *p)); | ||
559 | |||
560 | map = kzalloc(sizeof(*map), GFP_NOFS); | ||
561 | if (map == NULL) | ||
562 | return ERR_PTR(-ENOMEM); | ||
563 | map->pg_temp = RB_ROOT; | ||
564 | |||
565 | ceph_decode_16_safe(p, end, version, bad); | ||
566 | if (version > CEPH_OSDMAP_VERSION) { | ||
567 | pr_warning("got unknown v %d > %d of osdmap\n", version, | ||
568 | CEPH_OSDMAP_VERSION); | ||
569 | goto bad; | ||
570 | } | ||
571 | |||
572 | ceph_decode_need(p, end, 2*sizeof(u64)+6*sizeof(u32), bad); | ||
573 | ceph_decode_copy(p, &map->fsid, sizeof(map->fsid)); | ||
574 | map->epoch = ceph_decode_32(p); | ||
575 | ceph_decode_copy(p, &map->created, sizeof(map->created)); | ||
576 | ceph_decode_copy(p, &map->modified, sizeof(map->modified)); | ||
577 | |||
578 | ceph_decode_32_safe(p, end, max, bad); | ||
579 | while (max--) { | ||
580 | ceph_decode_need(p, end, 4 + 1 + sizeof(pi->v), bad); | ||
581 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | ||
582 | if (!pi) | ||
583 | goto bad; | ||
584 | pi->id = ceph_decode_32(p); | ||
585 | ev = ceph_decode_8(p); /* encoding version */ | ||
586 | if (ev > CEPH_PG_POOL_VERSION) { | ||
587 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
588 | ev, CEPH_PG_POOL_VERSION); | ||
589 | kfree(pi); | ||
590 | goto bad; | ||
591 | } | ||
592 | err = __decode_pool(p, end, pi); | ||
593 | if (err < 0) | ||
594 | goto bad; | ||
595 | __insert_pg_pool(&map->pg_pools, pi); | ||
596 | } | ||
597 | |||
598 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
599 | goto bad; | ||
600 | |||
601 | ceph_decode_32_safe(p, end, map->pool_max, bad); | ||
602 | |||
603 | ceph_decode_32_safe(p, end, map->flags, bad); | ||
604 | |||
605 | max = ceph_decode_32(p); | ||
606 | |||
607 | /* (re)alloc osd arrays */ | ||
608 | err = osdmap_set_max_osd(map, max); | ||
609 | if (err < 0) | ||
610 | goto bad; | ||
611 | dout("osdmap_decode max_osd = %d\n", map->max_osd); | ||
612 | |||
613 | /* osds */ | ||
614 | err = -EINVAL; | ||
615 | ceph_decode_need(p, end, 3*sizeof(u32) + | ||
616 | map->max_osd*(1 + sizeof(*map->osd_weight) + | ||
617 | sizeof(*map->osd_addr)), bad); | ||
618 | *p += 4; /* skip length field (should match max) */ | ||
619 | ceph_decode_copy(p, map->osd_state, map->max_osd); | ||
620 | |||
621 | *p += 4; /* skip length field (should match max) */ | ||
622 | for (i = 0; i < map->max_osd; i++) | ||
623 | map->osd_weight[i] = ceph_decode_32(p); | ||
624 | |||
625 | *p += 4; /* skip length field (should match max) */ | ||
626 | ceph_decode_copy(p, map->osd_addr, map->max_osd*sizeof(*map->osd_addr)); | ||
627 | for (i = 0; i < map->max_osd; i++) | ||
628 | ceph_decode_addr(&map->osd_addr[i]); | ||
629 | |||
630 | /* pg_temp */ | ||
631 | ceph_decode_32_safe(p, end, len, bad); | ||
632 | for (i = 0; i < len; i++) { | ||
633 | int n, j; | ||
634 | struct ceph_pg pgid; | ||
635 | struct ceph_pg_mapping *pg; | ||
636 | |||
637 | ceph_decode_need(p, end, sizeof(u32) + sizeof(u64), bad); | ||
638 | ceph_decode_copy(p, &pgid, sizeof(pgid)); | ||
639 | n = ceph_decode_32(p); | ||
640 | ceph_decode_need(p, end, n * sizeof(u32), bad); | ||
641 | err = -ENOMEM; | ||
642 | pg = kmalloc(sizeof(*pg) + n*sizeof(u32), GFP_NOFS); | ||
643 | if (!pg) | ||
644 | goto bad; | ||
645 | pg->pgid = pgid; | ||
646 | pg->len = n; | ||
647 | for (j = 0; j < n; j++) | ||
648 | pg->osds[j] = ceph_decode_32(p); | ||
649 | |||
650 | err = __insert_pg_mapping(pg, &map->pg_temp); | ||
651 | if (err) | ||
652 | goto bad; | ||
653 | dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, len); | ||
654 | } | ||
655 | |||
656 | /* crush */ | ||
657 | ceph_decode_32_safe(p, end, len, bad); | ||
658 | dout("osdmap_decode crush len %d from off 0x%x\n", len, | ||
659 | (int)(*p - start)); | ||
660 | ceph_decode_need(p, end, len, bad); | ||
661 | map->crush = crush_decode(*p, end); | ||
662 | *p += len; | ||
663 | if (IS_ERR(map->crush)) { | ||
664 | err = PTR_ERR(map->crush); | ||
665 | map->crush = NULL; | ||
666 | goto bad; | ||
667 | } | ||
668 | |||
669 | /* ignore the rest of the map */ | ||
670 | *p = end; | ||
671 | |||
672 | dout("osdmap_decode done %p %p\n", *p, end); | ||
673 | return map; | ||
674 | |||
675 | bad: | ||
676 | dout("osdmap_decode fail\n"); | ||
677 | ceph_osdmap_destroy(map); | ||
678 | return ERR_PTR(err); | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * decode and apply an incremental map update. | ||
683 | */ | ||
684 | struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | ||
685 | struct ceph_osdmap *map, | ||
686 | struct ceph_messenger *msgr) | ||
687 | { | ||
688 | struct crush_map *newcrush = NULL; | ||
689 | struct ceph_fsid fsid; | ||
690 | u32 epoch = 0; | ||
691 | struct ceph_timespec modified; | ||
692 | u32 len, pool; | ||
693 | __s32 new_pool_max, new_flags, max; | ||
694 | void *start = *p; | ||
695 | int err = -EINVAL; | ||
696 | u16 version; | ||
697 | struct rb_node *rbp; | ||
698 | |||
699 | ceph_decode_16_safe(p, end, version, bad); | ||
700 | if (version > CEPH_OSDMAP_INC_VERSION) { | ||
701 | pr_warning("got unknown v %d > %d of inc osdmap\n", version, | ||
702 | CEPH_OSDMAP_INC_VERSION); | ||
703 | goto bad; | ||
704 | } | ||
705 | |||
706 | ceph_decode_need(p, end, sizeof(fsid)+sizeof(modified)+2*sizeof(u32), | ||
707 | bad); | ||
708 | ceph_decode_copy(p, &fsid, sizeof(fsid)); | ||
709 | epoch = ceph_decode_32(p); | ||
710 | BUG_ON(epoch != map->epoch+1); | ||
711 | ceph_decode_copy(p, &modified, sizeof(modified)); | ||
712 | new_pool_max = ceph_decode_32(p); | ||
713 | new_flags = ceph_decode_32(p); | ||
714 | |||
715 | /* full map? */ | ||
716 | ceph_decode_32_safe(p, end, len, bad); | ||
717 | if (len > 0) { | ||
718 | dout("apply_incremental full map len %d, %p to %p\n", | ||
719 | len, *p, end); | ||
720 | return osdmap_decode(p, min(*p+len, end)); | ||
721 | } | ||
722 | |||
723 | /* new crush? */ | ||
724 | ceph_decode_32_safe(p, end, len, bad); | ||
725 | if (len > 0) { | ||
726 | dout("apply_incremental new crush map len %d, %p to %p\n", | ||
727 | len, *p, end); | ||
728 | newcrush = crush_decode(*p, min(*p+len, end)); | ||
729 | if (IS_ERR(newcrush)) | ||
730 | return ERR_CAST(newcrush); | ||
731 | *p += len; | ||
732 | } | ||
733 | |||
734 | /* new flags? */ | ||
735 | if (new_flags >= 0) | ||
736 | map->flags = new_flags; | ||
737 | if (new_pool_max >= 0) | ||
738 | map->pool_max = new_pool_max; | ||
739 | |||
740 | ceph_decode_need(p, end, 5*sizeof(u32), bad); | ||
741 | |||
742 | /* new max? */ | ||
743 | max = ceph_decode_32(p); | ||
744 | if (max >= 0) { | ||
745 | err = osdmap_set_max_osd(map, max); | ||
746 | if (err < 0) | ||
747 | goto bad; | ||
748 | } | ||
749 | |||
750 | map->epoch++; | ||
751 | map->modified = map->modified; | ||
752 | if (newcrush) { | ||
753 | if (map->crush) | ||
754 | crush_destroy(map->crush); | ||
755 | map->crush = newcrush; | ||
756 | newcrush = NULL; | ||
757 | } | ||
758 | |||
759 | /* new_pool */ | ||
760 | ceph_decode_32_safe(p, end, len, bad); | ||
761 | while (len--) { | ||
762 | __u8 ev; | ||
763 | struct ceph_pg_pool_info *pi; | ||
764 | |||
765 | ceph_decode_32_safe(p, end, pool, bad); | ||
766 | ceph_decode_need(p, end, 1 + sizeof(pi->v), bad); | ||
767 | ev = ceph_decode_8(p); /* encoding version */ | ||
768 | if (ev > CEPH_PG_POOL_VERSION) { | ||
769 | pr_warning("got unknown v %d > %d of ceph_pg_pool\n", | ||
770 | ev, CEPH_PG_POOL_VERSION); | ||
771 | goto bad; | ||
772 | } | ||
773 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
774 | if (!pi) { | ||
775 | pi = kzalloc(sizeof(*pi), GFP_NOFS); | ||
776 | if (!pi) { | ||
777 | err = -ENOMEM; | ||
778 | goto bad; | ||
779 | } | ||
780 | pi->id = pool; | ||
781 | __insert_pg_pool(&map->pg_pools, pi); | ||
782 | } | ||
783 | err = __decode_pool(p, end, pi); | ||
784 | if (err < 0) | ||
785 | goto bad; | ||
786 | } | ||
787 | if (version >= 5 && __decode_pool_names(p, end, map) < 0) | ||
788 | goto bad; | ||
789 | |||
790 | /* old_pool */ | ||
791 | ceph_decode_32_safe(p, end, len, bad); | ||
792 | while (len--) { | ||
793 | struct ceph_pg_pool_info *pi; | ||
794 | |||
795 | ceph_decode_32_safe(p, end, pool, bad); | ||
796 | pi = __lookup_pg_pool(&map->pg_pools, pool); | ||
797 | if (pi) | ||
798 | __remove_pg_pool(&map->pg_pools, pi); | ||
799 | } | ||
800 | |||
801 | /* new_up */ | ||
802 | err = -EINVAL; | ||
803 | ceph_decode_32_safe(p, end, len, bad); | ||
804 | while (len--) { | ||
805 | u32 osd; | ||
806 | struct ceph_entity_addr addr; | ||
807 | ceph_decode_32_safe(p, end, osd, bad); | ||
808 | ceph_decode_copy_safe(p, end, &addr, sizeof(addr), bad); | ||
809 | ceph_decode_addr(&addr); | ||
810 | pr_info("osd%d up\n", osd); | ||
811 | BUG_ON(osd >= map->max_osd); | ||
812 | map->osd_state[osd] |= CEPH_OSD_UP; | ||
813 | map->osd_addr[osd] = addr; | ||
814 | } | ||
815 | |||
816 | /* new_down */ | ||
817 | ceph_decode_32_safe(p, end, len, bad); | ||
818 | while (len--) { | ||
819 | u32 osd; | ||
820 | ceph_decode_32_safe(p, end, osd, bad); | ||
821 | (*p)++; /* clean flag */ | ||
822 | pr_info("osd%d down\n", osd); | ||
823 | if (osd < map->max_osd) | ||
824 | map->osd_state[osd] &= ~CEPH_OSD_UP; | ||
825 | } | ||
826 | |||
827 | /* new_weight */ | ||
828 | ceph_decode_32_safe(p, end, len, bad); | ||
829 | while (len--) { | ||
830 | u32 osd, off; | ||
831 | ceph_decode_need(p, end, sizeof(u32)*2, bad); | ||
832 | osd = ceph_decode_32(p); | ||
833 | off = ceph_decode_32(p); | ||
834 | pr_info("osd%d weight 0x%x %s\n", osd, off, | ||
835 | off == CEPH_OSD_IN ? "(in)" : | ||
836 | (off == CEPH_OSD_OUT ? "(out)" : "")); | ||
837 | if (osd < map->max_osd) | ||
838 | map->osd_weight[osd] = off; | ||
839 | } | ||
840 | |||
841 | /* new_pg_temp */ | ||
842 | rbp = rb_first(&map->pg_temp); | ||
843 | ceph_decode_32_safe(p, end, len, bad); | ||
844 | while (len--) { | ||
845 | struct ceph_pg_mapping *pg; | ||
846 | int j; | ||
847 | struct ceph_pg pgid; | ||
848 | u32 pglen; | ||
849 | ceph_decode_need(p, end, sizeof(u64) + sizeof(u32), bad); | ||
850 | ceph_decode_copy(p, &pgid, sizeof(pgid)); | ||
851 | pglen = ceph_decode_32(p); | ||
852 | |||
853 | /* remove any? */ | ||
854 | while (rbp && pgid_cmp(rb_entry(rbp, struct ceph_pg_mapping, | ||
855 | node)->pgid, pgid) <= 0) { | ||
856 | struct ceph_pg_mapping *cur = | ||
857 | rb_entry(rbp, struct ceph_pg_mapping, node); | ||
858 | |||
859 | rbp = rb_next(rbp); | ||
860 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | ||
861 | rb_erase(&cur->node, &map->pg_temp); | ||
862 | kfree(cur); | ||
863 | } | ||
864 | |||
865 | if (pglen) { | ||
866 | /* insert */ | ||
867 | ceph_decode_need(p, end, pglen*sizeof(u32), bad); | ||
868 | pg = kmalloc(sizeof(*pg) + sizeof(u32)*pglen, GFP_NOFS); | ||
869 | if (!pg) { | ||
870 | err = -ENOMEM; | ||
871 | goto bad; | ||
872 | } | ||
873 | pg->pgid = pgid; | ||
874 | pg->len = pglen; | ||
875 | for (j = 0; j < pglen; j++) | ||
876 | pg->osds[j] = ceph_decode_32(p); | ||
877 | err = __insert_pg_mapping(pg, &map->pg_temp); | ||
878 | if (err) { | ||
879 | kfree(pg); | ||
880 | goto bad; | ||
881 | } | ||
882 | dout(" added pg_temp %llx len %d\n", *(u64 *)&pgid, | ||
883 | pglen); | ||
884 | } | ||
885 | } | ||
886 | while (rbp) { | ||
887 | struct ceph_pg_mapping *cur = | ||
888 | rb_entry(rbp, struct ceph_pg_mapping, node); | ||
889 | |||
890 | rbp = rb_next(rbp); | ||
891 | dout(" removed pg_temp %llx\n", *(u64 *)&cur->pgid); | ||
892 | rb_erase(&cur->node, &map->pg_temp); | ||
893 | kfree(cur); | ||
894 | } | ||
895 | |||
896 | /* ignore the rest */ | ||
897 | *p = end; | ||
898 | return map; | ||
899 | |||
900 | bad: | ||
901 | pr_err("corrupt inc osdmap epoch %d off %d (%p of %p-%p)\n", | ||
902 | epoch, (int)(*p - start), *p, start, end); | ||
903 | print_hex_dump(KERN_DEBUG, "osdmap: ", | ||
904 | DUMP_PREFIX_OFFSET, 16, 1, | ||
905 | start, end - start, true); | ||
906 | if (newcrush) | ||
907 | crush_destroy(newcrush); | ||
908 | return ERR_PTR(err); | ||
909 | } | ||
910 | |||
911 | |||
912 | |||
913 | |||
914 | /* | ||
915 | * calculate file layout from given offset, length. | ||
916 | * fill in correct oid, logical length, and object extent | ||
917 | * offset, length. | ||
918 | * | ||
919 | * for now, we write only a single su, until we can | ||
920 | * pass a stride back to the caller. | ||
921 | */ | ||
922 | void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | ||
923 | u64 off, u64 *plen, | ||
924 | u64 *ono, | ||
925 | u64 *oxoff, u64 *oxlen) | ||
926 | { | ||
927 | u32 osize = le32_to_cpu(layout->fl_object_size); | ||
928 | u32 su = le32_to_cpu(layout->fl_stripe_unit); | ||
929 | u32 sc = le32_to_cpu(layout->fl_stripe_count); | ||
930 | u32 bl, stripeno, stripepos, objsetno; | ||
931 | u32 su_per_object; | ||
932 | u64 t, su_offset; | ||
933 | |||
934 | dout("mapping %llu~%llu osize %u fl_su %u\n", off, *plen, | ||
935 | osize, su); | ||
936 | su_per_object = osize / su; | ||
937 | dout("osize %u / su %u = su_per_object %u\n", osize, su, | ||
938 | su_per_object); | ||
939 | |||
940 | BUG_ON((su & ~PAGE_MASK) != 0); | ||
941 | /* bl = *off / su; */ | ||
942 | t = off; | ||
943 | do_div(t, su); | ||
944 | bl = t; | ||
945 | dout("off %llu / su %u = bl %u\n", off, su, bl); | ||
946 | |||
947 | stripeno = bl / sc; | ||
948 | stripepos = bl % sc; | ||
949 | objsetno = stripeno / su_per_object; | ||
950 | |||
951 | *ono = objsetno * sc + stripepos; | ||
952 | dout("objset %u * sc %u = ono %u\n", objsetno, sc, (unsigned)*ono); | ||
953 | |||
954 | /* *oxoff = *off % layout->fl_stripe_unit; # offset in su */ | ||
955 | t = off; | ||
956 | su_offset = do_div(t, su); | ||
957 | *oxoff = su_offset + (stripeno % su_per_object) * su; | ||
958 | |||
959 | /* | ||
960 | * Calculate the length of the extent being written to the selected | ||
961 | * object. This is the minimum of the full length requested (plen) or | ||
962 | * the remainder of the current stripe being written to. | ||
963 | */ | ||
964 | *oxlen = min_t(u64, *plen, su - su_offset); | ||
965 | *plen = *oxlen; | ||
966 | |||
967 | dout(" obj extent %llu~%llu\n", *oxoff, *oxlen); | ||
968 | } | ||
969 | |||
970 | /* | ||
971 | * calculate an object layout (i.e. pgid) from an oid, | ||
972 | * file_layout, and osdmap | ||
973 | */ | ||
974 | int ceph_calc_object_layout(struct ceph_object_layout *ol, | ||
975 | const char *oid, | ||
976 | struct ceph_file_layout *fl, | ||
977 | struct ceph_osdmap *osdmap) | ||
978 | { | ||
979 | unsigned num, num_mask; | ||
980 | struct ceph_pg pgid; | ||
981 | s32 preferred = (s32)le32_to_cpu(fl->fl_pg_preferred); | ||
982 | int poolid = le32_to_cpu(fl->fl_pg_pool); | ||
983 | struct ceph_pg_pool_info *pool; | ||
984 | unsigned ps; | ||
985 | |||
986 | BUG_ON(!osdmap); | ||
987 | |||
988 | pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); | ||
989 | if (!pool) | ||
990 | return -EIO; | ||
991 | ps = ceph_str_hash(pool->v.object_hash, oid, strlen(oid)); | ||
992 | if (preferred >= 0) { | ||
993 | ps += preferred; | ||
994 | num = le32_to_cpu(pool->v.lpg_num); | ||
995 | num_mask = pool->lpg_num_mask; | ||
996 | } else { | ||
997 | num = le32_to_cpu(pool->v.pg_num); | ||
998 | num_mask = pool->pg_num_mask; | ||
999 | } | ||
1000 | |||
1001 | pgid.ps = cpu_to_le16(ps); | ||
1002 | pgid.preferred = cpu_to_le16(preferred); | ||
1003 | pgid.pool = fl->fl_pg_pool; | ||
1004 | if (preferred >= 0) | ||
1005 | dout("calc_object_layout '%s' pgid %d.%xp%d\n", oid, poolid, ps, | ||
1006 | (int)preferred); | ||
1007 | else | ||
1008 | dout("calc_object_layout '%s' pgid %d.%x\n", oid, poolid, ps); | ||
1009 | |||
1010 | ol->ol_pgid = pgid; | ||
1011 | ol->ol_stripe_unit = fl->fl_object_stripe_unit; | ||
1012 | return 0; | ||
1013 | } | ||
1014 | |||
1015 | /* | ||
1016 | * Calculate raw osd vector for the given pgid. Return pointer to osd | ||
1017 | * array, or NULL on failure. | ||
1018 | */ | ||
1019 | static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
1020 | int *osds, int *num) | ||
1021 | { | ||
1022 | struct ceph_pg_mapping *pg; | ||
1023 | struct ceph_pg_pool_info *pool; | ||
1024 | int ruleno; | ||
1025 | unsigned poolid, ps, pps; | ||
1026 | int preferred; | ||
1027 | |||
1028 | /* pg_temp? */ | ||
1029 | pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); | ||
1030 | if (pg) { | ||
1031 | *num = pg->len; | ||
1032 | return pg->osds; | ||
1033 | } | ||
1034 | |||
1035 | /* crush */ | ||
1036 | poolid = le32_to_cpu(pgid.pool); | ||
1037 | ps = le16_to_cpu(pgid.ps); | ||
1038 | preferred = (s16)le16_to_cpu(pgid.preferred); | ||
1039 | |||
1040 | /* don't forcefeed bad device ids to crush */ | ||
1041 | if (preferred >= osdmap->max_osd || | ||
1042 | preferred >= osdmap->crush->max_devices) | ||
1043 | preferred = -1; | ||
1044 | |||
1045 | pool = __lookup_pg_pool(&osdmap->pg_pools, poolid); | ||
1046 | if (!pool) | ||
1047 | return NULL; | ||
1048 | ruleno = crush_find_rule(osdmap->crush, pool->v.crush_ruleset, | ||
1049 | pool->v.type, pool->v.size); | ||
1050 | if (ruleno < 0) { | ||
1051 | pr_err("no crush rule pool %d ruleset %d type %d size %d\n", | ||
1052 | poolid, pool->v.crush_ruleset, pool->v.type, | ||
1053 | pool->v.size); | ||
1054 | return NULL; | ||
1055 | } | ||
1056 | |||
1057 | if (preferred >= 0) | ||
1058 | pps = ceph_stable_mod(ps, | ||
1059 | le32_to_cpu(pool->v.lpgp_num), | ||
1060 | pool->lpgp_num_mask); | ||
1061 | else | ||
1062 | pps = ceph_stable_mod(ps, | ||
1063 | le32_to_cpu(pool->v.pgp_num), | ||
1064 | pool->pgp_num_mask); | ||
1065 | pps += poolid; | ||
1066 | *num = crush_do_rule(osdmap->crush, ruleno, pps, osds, | ||
1067 | min_t(int, pool->v.size, *num), | ||
1068 | preferred, osdmap->osd_weight); | ||
1069 | return osds; | ||
1070 | } | ||
1071 | |||
1072 | /* | ||
1073 | * Return acting set for given pgid. | ||
1074 | */ | ||
1075 | int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
1076 | int *acting) | ||
1077 | { | ||
1078 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
1079 | int i, o, num = CEPH_PG_MAX_SIZE; | ||
1080 | |||
1081 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | ||
1082 | if (!osds) | ||
1083 | return -1; | ||
1084 | |||
1085 | /* primary is first up osd */ | ||
1086 | o = 0; | ||
1087 | for (i = 0; i < num; i++) | ||
1088 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
1089 | acting[o++] = osds[i]; | ||
1090 | return o; | ||
1091 | } | ||
1092 | |||
1093 | /* | ||
1094 | * Return primary osd for given pgid, or -1 if none. | ||
1095 | */ | ||
1096 | int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid) | ||
1097 | { | ||
1098 | int rawosds[CEPH_PG_MAX_SIZE], *osds; | ||
1099 | int i, num = CEPH_PG_MAX_SIZE; | ||
1100 | |||
1101 | osds = calc_pg_raw(osdmap, pgid, rawosds, &num); | ||
1102 | if (!osds) | ||
1103 | return -1; | ||
1104 | |||
1105 | /* primary is first up osd */ | ||
1106 | for (i = 0; i < num; i++) | ||
1107 | if (ceph_osd_is_up(osdmap, osds[i])) | ||
1108 | return osds[i]; | ||
1109 | return -1; | ||
1110 | } | ||
diff --git a/fs/ceph/osdmap.h b/fs/ceph/osdmap.h deleted file mode 100644 index 970b547e510d..000000000000 --- a/fs/ceph/osdmap.h +++ /dev/null | |||
@@ -1,128 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_OSDMAP_H | ||
2 | #define _FS_CEPH_OSDMAP_H | ||
3 | |||
4 | #include <linux/rbtree.h> | ||
5 | #include "types.h" | ||
6 | #include "ceph_fs.h" | ||
7 | #include "crush/crush.h" | ||
8 | |||
9 | /* | ||
10 | * The osd map describes the current membership of the osd cluster and | ||
11 | * specifies the mapping of objects to placement groups and placement | ||
12 | * groups to (sets of) osds. That is, it completely specifies the | ||
13 | * (desired) distribution of all data objects in the system at some | ||
14 | * point in time. | ||
15 | * | ||
16 | * Each map version is identified by an epoch, which increases monotonically. | ||
17 | * | ||
18 | * The map can be updated either via an incremental map (diff) describing | ||
19 | * the change between two successive epochs, or as a fully encoded map. | ||
20 | */ | ||
21 | struct ceph_pg_pool_info { | ||
22 | struct rb_node node; | ||
23 | int id; | ||
24 | struct ceph_pg_pool v; | ||
25 | int pg_num_mask, pgp_num_mask, lpg_num_mask, lpgp_num_mask; | ||
26 | char *name; | ||
27 | }; | ||
28 | |||
29 | struct ceph_pg_mapping { | ||
30 | struct rb_node node; | ||
31 | struct ceph_pg pgid; | ||
32 | int len; | ||
33 | int osds[]; | ||
34 | }; | ||
35 | |||
36 | struct ceph_osdmap { | ||
37 | struct ceph_fsid fsid; | ||
38 | u32 epoch; | ||
39 | u32 mkfs_epoch; | ||
40 | struct ceph_timespec created, modified; | ||
41 | |||
42 | u32 flags; /* CEPH_OSDMAP_* */ | ||
43 | |||
44 | u32 max_osd; /* size of osd_state, _offload, _addr arrays */ | ||
45 | u8 *osd_state; /* CEPH_OSD_* */ | ||
46 | u32 *osd_weight; /* 0 = failed, 0x10000 = 100% normal */ | ||
47 | struct ceph_entity_addr *osd_addr; | ||
48 | |||
49 | struct rb_root pg_temp; | ||
50 | struct rb_root pg_pools; | ||
51 | u32 pool_max; | ||
52 | |||
53 | /* the CRUSH map specifies the mapping of placement groups to | ||
54 | * the list of osds that store+replicate them. */ | ||
55 | struct crush_map *crush; | ||
56 | }; | ||
57 | |||
58 | /* | ||
59 | * file layout helpers | ||
60 | */ | ||
61 | #define ceph_file_layout_su(l) ((__s32)le32_to_cpu((l).fl_stripe_unit)) | ||
62 | #define ceph_file_layout_stripe_count(l) \ | ||
63 | ((__s32)le32_to_cpu((l).fl_stripe_count)) | ||
64 | #define ceph_file_layout_object_size(l) ((__s32)le32_to_cpu((l).fl_object_size)) | ||
65 | #define ceph_file_layout_cas_hash(l) ((__s32)le32_to_cpu((l).fl_cas_hash)) | ||
66 | #define ceph_file_layout_object_su(l) \ | ||
67 | ((__s32)le32_to_cpu((l).fl_object_stripe_unit)) | ||
68 | #define ceph_file_layout_pg_preferred(l) \ | ||
69 | ((__s32)le32_to_cpu((l).fl_pg_preferred)) | ||
70 | #define ceph_file_layout_pg_pool(l) \ | ||
71 | ((__s32)le32_to_cpu((l).fl_pg_pool)) | ||
72 | |||
73 | static inline unsigned ceph_file_layout_stripe_width(struct ceph_file_layout *l) | ||
74 | { | ||
75 | return le32_to_cpu(l->fl_stripe_unit) * | ||
76 | le32_to_cpu(l->fl_stripe_count); | ||
77 | } | ||
78 | |||
79 | /* "period" == bytes before i start on a new set of objects */ | ||
80 | static inline unsigned ceph_file_layout_period(struct ceph_file_layout *l) | ||
81 | { | ||
82 | return le32_to_cpu(l->fl_object_size) * | ||
83 | le32_to_cpu(l->fl_stripe_count); | ||
84 | } | ||
85 | |||
86 | |||
87 | static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) | ||
88 | { | ||
89 | return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); | ||
90 | } | ||
91 | |||
92 | static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) | ||
93 | { | ||
94 | return map && (map->flags & flag); | ||
95 | } | ||
96 | |||
97 | extern char *ceph_osdmap_state_str(char *str, int len, int state); | ||
98 | |||
99 | static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, | ||
100 | int osd) | ||
101 | { | ||
102 | if (osd >= map->max_osd) | ||
103 | return NULL; | ||
104 | return &map->osd_addr[osd]; | ||
105 | } | ||
106 | |||
107 | extern struct ceph_osdmap *osdmap_decode(void **p, void *end); | ||
108 | extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, | ||
109 | struct ceph_osdmap *map, | ||
110 | struct ceph_messenger *msgr); | ||
111 | extern void ceph_osdmap_destroy(struct ceph_osdmap *map); | ||
112 | |||
113 | /* calculate mapping of a file extent to an object */ | ||
114 | extern void ceph_calc_file_object_mapping(struct ceph_file_layout *layout, | ||
115 | u64 off, u64 *plen, | ||
116 | u64 *bno, u64 *oxoff, u64 *oxlen); | ||
117 | |||
118 | /* calculate mapping of object to a placement group */ | ||
119 | extern int ceph_calc_object_layout(struct ceph_object_layout *ol, | ||
120 | const char *oid, | ||
121 | struct ceph_file_layout *fl, | ||
122 | struct ceph_osdmap *osdmap); | ||
123 | extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, | ||
124 | int *acting); | ||
125 | extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, | ||
126 | struct ceph_pg pgid); | ||
127 | |||
128 | #endif | ||
diff --git a/fs/ceph/pagelist.c b/fs/ceph/pagelist.c deleted file mode 100644 index b6859f47d364..000000000000 --- a/fs/ceph/pagelist.c +++ /dev/null | |||
@@ -1,55 +0,0 @@ | |||
1 | |||
2 | #include <linux/gfp.h> | ||
3 | #include <linux/pagemap.h> | ||
4 | #include <linux/highmem.h> | ||
5 | |||
6 | #include "pagelist.h" | ||
7 | |||
8 | int ceph_pagelist_release(struct ceph_pagelist *pl) | ||
9 | { | ||
10 | if (pl->mapped_tail) | ||
11 | kunmap(pl->mapped_tail); | ||
12 | while (!list_empty(&pl->head)) { | ||
13 | struct page *page = list_first_entry(&pl->head, struct page, | ||
14 | lru); | ||
15 | list_del(&page->lru); | ||
16 | __free_page(page); | ||
17 | } | ||
18 | return 0; | ||
19 | } | ||
20 | |||
21 | static int ceph_pagelist_addpage(struct ceph_pagelist *pl) | ||
22 | { | ||
23 | struct page *page = __page_cache_alloc(GFP_NOFS); | ||
24 | if (!page) | ||
25 | return -ENOMEM; | ||
26 | pl->room += PAGE_SIZE; | ||
27 | list_add_tail(&page->lru, &pl->head); | ||
28 | if (pl->mapped_tail) | ||
29 | kunmap(pl->mapped_tail); | ||
30 | pl->mapped_tail = kmap(page); | ||
31 | return 0; | ||
32 | } | ||
33 | |||
34 | int ceph_pagelist_append(struct ceph_pagelist *pl, void *buf, size_t len) | ||
35 | { | ||
36 | while (pl->room < len) { | ||
37 | size_t bit = pl->room; | ||
38 | int ret; | ||
39 | |||
40 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), | ||
41 | buf, bit); | ||
42 | pl->length += bit; | ||
43 | pl->room -= bit; | ||
44 | buf += bit; | ||
45 | len -= bit; | ||
46 | ret = ceph_pagelist_addpage(pl); | ||
47 | if (ret) | ||
48 | return ret; | ||
49 | } | ||
50 | |||
51 | memcpy(pl->mapped_tail + (pl->length & ~PAGE_CACHE_MASK), buf, len); | ||
52 | pl->length += len; | ||
53 | pl->room -= len; | ||
54 | return 0; | ||
55 | } | ||
diff --git a/fs/ceph/pagelist.h b/fs/ceph/pagelist.h deleted file mode 100644 index e8a4187e1087..000000000000 --- a/fs/ceph/pagelist.h +++ /dev/null | |||
@@ -1,54 +0,0 @@ | |||
1 | #ifndef __FS_CEPH_PAGELIST_H | ||
2 | #define __FS_CEPH_PAGELIST_H | ||
3 | |||
4 | #include <linux/list.h> | ||
5 | |||
6 | struct ceph_pagelist { | ||
7 | struct list_head head; | ||
8 | void *mapped_tail; | ||
9 | size_t length; | ||
10 | size_t room; | ||
11 | }; | ||
12 | |||
13 | static inline void ceph_pagelist_init(struct ceph_pagelist *pl) | ||
14 | { | ||
15 | INIT_LIST_HEAD(&pl->head); | ||
16 | pl->mapped_tail = NULL; | ||
17 | pl->length = 0; | ||
18 | pl->room = 0; | ||
19 | } | ||
20 | extern int ceph_pagelist_release(struct ceph_pagelist *pl); | ||
21 | |||
22 | extern int ceph_pagelist_append(struct ceph_pagelist *pl, void *d, size_t l); | ||
23 | |||
24 | static inline int ceph_pagelist_encode_64(struct ceph_pagelist *pl, u64 v) | ||
25 | { | ||
26 | __le64 ev = cpu_to_le64(v); | ||
27 | return ceph_pagelist_append(pl, &ev, sizeof(ev)); | ||
28 | } | ||
29 | static inline int ceph_pagelist_encode_32(struct ceph_pagelist *pl, u32 v) | ||
30 | { | ||
31 | __le32 ev = cpu_to_le32(v); | ||
32 | return ceph_pagelist_append(pl, &ev, sizeof(ev)); | ||
33 | } | ||
34 | static inline int ceph_pagelist_encode_16(struct ceph_pagelist *pl, u16 v) | ||
35 | { | ||
36 | __le16 ev = cpu_to_le16(v); | ||
37 | return ceph_pagelist_append(pl, &ev, sizeof(ev)); | ||
38 | } | ||
39 | static inline int ceph_pagelist_encode_8(struct ceph_pagelist *pl, u8 v) | ||
40 | { | ||
41 | return ceph_pagelist_append(pl, &v, 1); | ||
42 | } | ||
43 | static inline int ceph_pagelist_encode_string(struct ceph_pagelist *pl, | ||
44 | char *s, size_t len) | ||
45 | { | ||
46 | int ret = ceph_pagelist_encode_32(pl, len); | ||
47 | if (ret) | ||
48 | return ret; | ||
49 | if (len) | ||
50 | return ceph_pagelist_append(pl, s, len); | ||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | #endif | ||
diff --git a/fs/ceph/rados.h b/fs/ceph/rados.h deleted file mode 100644 index 6d5247f2e81b..000000000000 --- a/fs/ceph/rados.h +++ /dev/null | |||
@@ -1,405 +0,0 @@ | |||
1 | #ifndef CEPH_RADOS_H | ||
2 | #define CEPH_RADOS_H | ||
3 | |||
4 | /* | ||
5 | * Data types for the Ceph distributed object storage layer RADOS | ||
6 | * (Reliable Autonomic Distributed Object Store). | ||
7 | */ | ||
8 | |||
9 | #include "msgr.h" | ||
10 | |||
11 | /* | ||
12 | * osdmap encoding versions | ||
13 | */ | ||
14 | #define CEPH_OSDMAP_INC_VERSION 5 | ||
15 | #define CEPH_OSDMAP_INC_VERSION_EXT 5 | ||
16 | #define CEPH_OSDMAP_VERSION 5 | ||
17 | #define CEPH_OSDMAP_VERSION_EXT 5 | ||
18 | |||
19 | /* | ||
20 | * fs id | ||
21 | */ | ||
22 | struct ceph_fsid { | ||
23 | unsigned char fsid[16]; | ||
24 | }; | ||
25 | |||
26 | static inline int ceph_fsid_compare(const struct ceph_fsid *a, | ||
27 | const struct ceph_fsid *b) | ||
28 | { | ||
29 | return memcmp(a, b, sizeof(*a)); | ||
30 | } | ||
31 | |||
32 | /* | ||
33 | * ino, object, etc. | ||
34 | */ | ||
35 | typedef __le64 ceph_snapid_t; | ||
36 | #define CEPH_SNAPDIR ((__u64)(-1)) /* reserved for hidden .snap dir */ | ||
37 | #define CEPH_NOSNAP ((__u64)(-2)) /* "head", "live" revision */ | ||
38 | #define CEPH_MAXSNAP ((__u64)(-3)) /* largest valid snapid */ | ||
39 | |||
40 | struct ceph_timespec { | ||
41 | __le32 tv_sec; | ||
42 | __le32 tv_nsec; | ||
43 | } __attribute__ ((packed)); | ||
44 | |||
45 | |||
46 | /* | ||
47 | * object layout - how objects are mapped into PGs | ||
48 | */ | ||
49 | #define CEPH_OBJECT_LAYOUT_HASH 1 | ||
50 | #define CEPH_OBJECT_LAYOUT_LINEAR 2 | ||
51 | #define CEPH_OBJECT_LAYOUT_HASHINO 3 | ||
52 | |||
53 | /* | ||
54 | * pg layout -- how PGs are mapped onto (sets of) OSDs | ||
55 | */ | ||
56 | #define CEPH_PG_LAYOUT_CRUSH 0 | ||
57 | #define CEPH_PG_LAYOUT_HASH 1 | ||
58 | #define CEPH_PG_LAYOUT_LINEAR 2 | ||
59 | #define CEPH_PG_LAYOUT_HYBRID 3 | ||
60 | |||
61 | #define CEPH_PG_MAX_SIZE 16 /* max # osds in a single pg */ | ||
62 | |||
63 | /* | ||
64 | * placement group. | ||
65 | * we encode this into one __le64. | ||
66 | */ | ||
67 | struct ceph_pg { | ||
68 | __le16 preferred; /* preferred primary osd */ | ||
69 | __le16 ps; /* placement seed */ | ||
70 | __le32 pool; /* object pool */ | ||
71 | } __attribute__ ((packed)); | ||
72 | |||
73 | /* | ||
74 | * pg_pool is a set of pgs storing a pool of objects | ||
75 | * | ||
76 | * pg_num -- base number of pseudorandomly placed pgs | ||
77 | * | ||
78 | * pgp_num -- effective number when calculating pg placement. this | ||
79 | * is used for pg_num increases. new pgs result in data being "split" | ||
80 | * into new pgs. for this to proceed smoothly, new pgs are intiially | ||
81 | * colocated with their parents; that is, pgp_num doesn't increase | ||
82 | * until the new pgs have successfully split. only _then_ are the new | ||
83 | * pgs placed independently. | ||
84 | * | ||
85 | * lpg_num -- localized pg count (per device). replicas are randomly | ||
86 | * selected. | ||
87 | * | ||
88 | * lpgp_num -- as above. | ||
89 | */ | ||
90 | #define CEPH_PG_TYPE_REP 1 | ||
91 | #define CEPH_PG_TYPE_RAID4 2 | ||
92 | #define CEPH_PG_POOL_VERSION 2 | ||
93 | struct ceph_pg_pool { | ||
94 | __u8 type; /* CEPH_PG_TYPE_* */ | ||
95 | __u8 size; /* number of osds in each pg */ | ||
96 | __u8 crush_ruleset; /* crush placement rule */ | ||
97 | __u8 object_hash; /* hash mapping object name to ps */ | ||
98 | __le32 pg_num, pgp_num; /* number of pg's */ | ||
99 | __le32 lpg_num, lpgp_num; /* number of localized pg's */ | ||
100 | __le32 last_change; /* most recent epoch changed */ | ||
101 | __le64 snap_seq; /* seq for per-pool snapshot */ | ||
102 | __le32 snap_epoch; /* epoch of last snap */ | ||
103 | __le32 num_snaps; | ||
104 | __le32 num_removed_snap_intervals; /* if non-empty, NO per-pool snaps */ | ||
105 | __le64 auid; /* who owns the pg */ | ||
106 | } __attribute__ ((packed)); | ||
107 | |||
108 | /* | ||
109 | * stable_mod func is used to control number of placement groups. | ||
110 | * similar to straight-up modulo, but produces a stable mapping as b | ||
111 | * increases over time. b is the number of bins, and bmask is the | ||
112 | * containing power of 2 minus 1. | ||
113 | * | ||
114 | * b <= bmask and bmask=(2**n)-1 | ||
115 | * e.g., b=12 -> bmask=15, b=123 -> bmask=127 | ||
116 | */ | ||
117 | static inline int ceph_stable_mod(int x, int b, int bmask) | ||
118 | { | ||
119 | if ((x & bmask) < b) | ||
120 | return x & bmask; | ||
121 | else | ||
122 | return x & (bmask >> 1); | ||
123 | } | ||
124 | |||
125 | /* | ||
126 | * object layout - how a given object should be stored. | ||
127 | */ | ||
128 | struct ceph_object_layout { | ||
129 | struct ceph_pg ol_pgid; /* raw pg, with _full_ ps precision. */ | ||
130 | __le32 ol_stripe_unit; /* for per-object parity, if any */ | ||
131 | } __attribute__ ((packed)); | ||
132 | |||
133 | /* | ||
134 | * compound epoch+version, used by storage layer to serialize mutations | ||
135 | */ | ||
136 | struct ceph_eversion { | ||
137 | __le32 epoch; | ||
138 | __le64 version; | ||
139 | } __attribute__ ((packed)); | ||
140 | |||
141 | /* | ||
142 | * osd map bits | ||
143 | */ | ||
144 | |||
145 | /* status bits */ | ||
146 | #define CEPH_OSD_EXISTS 1 | ||
147 | #define CEPH_OSD_UP 2 | ||
148 | |||
149 | /* osd weights. fixed point value: 0x10000 == 1.0 ("in"), 0 == "out" */ | ||
150 | #define CEPH_OSD_IN 0x10000 | ||
151 | #define CEPH_OSD_OUT 0 | ||
152 | |||
153 | |||
154 | /* | ||
155 | * osd map flag bits | ||
156 | */ | ||
157 | #define CEPH_OSDMAP_NEARFULL (1<<0) /* sync writes (near ENOSPC) */ | ||
158 | #define CEPH_OSDMAP_FULL (1<<1) /* no data writes (ENOSPC) */ | ||
159 | #define CEPH_OSDMAP_PAUSERD (1<<2) /* pause all reads */ | ||
160 | #define CEPH_OSDMAP_PAUSEWR (1<<3) /* pause all writes */ | ||
161 | #define CEPH_OSDMAP_PAUSEREC (1<<4) /* pause recovery */ | ||
162 | |||
163 | /* | ||
164 | * osd ops | ||
165 | */ | ||
166 | #define CEPH_OSD_OP_MODE 0xf000 | ||
167 | #define CEPH_OSD_OP_MODE_RD 0x1000 | ||
168 | #define CEPH_OSD_OP_MODE_WR 0x2000 | ||
169 | #define CEPH_OSD_OP_MODE_RMW 0x3000 | ||
170 | #define CEPH_OSD_OP_MODE_SUB 0x4000 | ||
171 | |||
172 | #define CEPH_OSD_OP_TYPE 0x0f00 | ||
173 | #define CEPH_OSD_OP_TYPE_LOCK 0x0100 | ||
174 | #define CEPH_OSD_OP_TYPE_DATA 0x0200 | ||
175 | #define CEPH_OSD_OP_TYPE_ATTR 0x0300 | ||
176 | #define CEPH_OSD_OP_TYPE_EXEC 0x0400 | ||
177 | #define CEPH_OSD_OP_TYPE_PG 0x0500 | ||
178 | |||
179 | enum { | ||
180 | /** data **/ | ||
181 | /* read */ | ||
182 | CEPH_OSD_OP_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1, | ||
183 | CEPH_OSD_OP_STAT = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2, | ||
184 | |||
185 | /* fancy read */ | ||
186 | CEPH_OSD_OP_MASKTRUNC = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4, | ||
187 | |||
188 | /* write */ | ||
189 | CEPH_OSD_OP_WRITE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1, | ||
190 | CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2, | ||
191 | CEPH_OSD_OP_TRUNCATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3, | ||
192 | CEPH_OSD_OP_ZERO = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4, | ||
193 | CEPH_OSD_OP_DELETE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5, | ||
194 | |||
195 | /* fancy write */ | ||
196 | CEPH_OSD_OP_APPEND = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6, | ||
197 | CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7, | ||
198 | CEPH_OSD_OP_SETTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8, | ||
199 | CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9, | ||
200 | |||
201 | CEPH_OSD_OP_TMAPUP = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10, | ||
202 | CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11, | ||
203 | CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12, | ||
204 | |||
205 | CEPH_OSD_OP_CREATE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13, | ||
206 | CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14, | ||
207 | |||
208 | /** attrs **/ | ||
209 | /* read */ | ||
210 | CEPH_OSD_OP_GETXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
211 | CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
212 | CEPH_OSD_OP_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3, | ||
213 | |||
214 | /* write */ | ||
215 | CEPH_OSD_OP_SETXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1, | ||
216 | CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2, | ||
217 | CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3, | ||
218 | CEPH_OSD_OP_RMXATTR = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4, | ||
219 | |||
220 | /** subop **/ | ||
221 | CEPH_OSD_OP_PULL = CEPH_OSD_OP_MODE_SUB | 1, | ||
222 | CEPH_OSD_OP_PUSH = CEPH_OSD_OP_MODE_SUB | 2, | ||
223 | CEPH_OSD_OP_BALANCEREADS = CEPH_OSD_OP_MODE_SUB | 3, | ||
224 | CEPH_OSD_OP_UNBALANCEREADS = CEPH_OSD_OP_MODE_SUB | 4, | ||
225 | CEPH_OSD_OP_SCRUB = CEPH_OSD_OP_MODE_SUB | 5, | ||
226 | |||
227 | /** lock **/ | ||
228 | CEPH_OSD_OP_WRLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1, | ||
229 | CEPH_OSD_OP_WRUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2, | ||
230 | CEPH_OSD_OP_RDLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3, | ||
231 | CEPH_OSD_OP_RDUNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4, | ||
232 | CEPH_OSD_OP_UPLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5, | ||
233 | CEPH_OSD_OP_DNLOCK = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6, | ||
234 | |||
235 | /** exec **/ | ||
236 | CEPH_OSD_OP_CALL = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1, | ||
237 | |||
238 | /** pg **/ | ||
239 | CEPH_OSD_OP_PGLS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1, | ||
240 | }; | ||
241 | |||
242 | static inline int ceph_osd_op_type_lock(int op) | ||
243 | { | ||
244 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_LOCK; | ||
245 | } | ||
246 | static inline int ceph_osd_op_type_data(int op) | ||
247 | { | ||
248 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_DATA; | ||
249 | } | ||
250 | static inline int ceph_osd_op_type_attr(int op) | ||
251 | { | ||
252 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_ATTR; | ||
253 | } | ||
254 | static inline int ceph_osd_op_type_exec(int op) | ||
255 | { | ||
256 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_EXEC; | ||
257 | } | ||
258 | static inline int ceph_osd_op_type_pg(int op) | ||
259 | { | ||
260 | return (op & CEPH_OSD_OP_TYPE) == CEPH_OSD_OP_TYPE_PG; | ||
261 | } | ||
262 | |||
263 | static inline int ceph_osd_op_mode_subop(int op) | ||
264 | { | ||
265 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_SUB; | ||
266 | } | ||
267 | static inline int ceph_osd_op_mode_read(int op) | ||
268 | { | ||
269 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_RD; | ||
270 | } | ||
271 | static inline int ceph_osd_op_mode_modify(int op) | ||
272 | { | ||
273 | return (op & CEPH_OSD_OP_MODE) == CEPH_OSD_OP_MODE_WR; | ||
274 | } | ||
275 | |||
276 | /* | ||
277 | * note that the following tmap stuff is also defined in the ceph librados.h | ||
278 | * any modification here needs to be updated there | ||
279 | */ | ||
280 | #define CEPH_OSD_TMAP_HDR 'h' | ||
281 | #define CEPH_OSD_TMAP_SET 's' | ||
282 | #define CEPH_OSD_TMAP_RM 'r' | ||
283 | |||
284 | extern const char *ceph_osd_op_name(int op); | ||
285 | |||
286 | |||
287 | /* | ||
288 | * osd op flags | ||
289 | * | ||
290 | * An op may be READ, WRITE, or READ|WRITE. | ||
291 | */ | ||
292 | enum { | ||
293 | CEPH_OSD_FLAG_ACK = 1, /* want (or is) "ack" ack */ | ||
294 | CEPH_OSD_FLAG_ONNVRAM = 2, /* want (or is) "onnvram" ack */ | ||
295 | CEPH_OSD_FLAG_ONDISK = 4, /* want (or is) "ondisk" ack */ | ||
296 | CEPH_OSD_FLAG_RETRY = 8, /* resend attempt */ | ||
297 | CEPH_OSD_FLAG_READ = 16, /* op may read */ | ||
298 | CEPH_OSD_FLAG_WRITE = 32, /* op may write */ | ||
299 | CEPH_OSD_FLAG_ORDERSNAP = 64, /* EOLDSNAP if snapc is out of order */ | ||
300 | CEPH_OSD_FLAG_PEERSTAT = 128, /* msg includes osd_peer_stat */ | ||
301 | CEPH_OSD_FLAG_BALANCE_READS = 256, | ||
302 | CEPH_OSD_FLAG_PARALLELEXEC = 512, /* execute op in parallel */ | ||
303 | CEPH_OSD_FLAG_PGOP = 1024, /* pg op, no object */ | ||
304 | CEPH_OSD_FLAG_EXEC = 2048, /* op may exec */ | ||
305 | CEPH_OSD_FLAG_EXEC_PUBLIC = 4096, /* op may exec (public) */ | ||
306 | }; | ||
307 | |||
308 | enum { | ||
309 | CEPH_OSD_OP_FLAG_EXCL = 1, /* EXCL object create */ | ||
310 | }; | ||
311 | |||
312 | #define EOLDSNAPC ERESTART /* ORDERSNAP flag set; writer has old snapc*/ | ||
313 | #define EBLACKLISTED ESHUTDOWN /* blacklisted */ | ||
314 | |||
315 | /* xattr comparison */ | ||
316 | enum { | ||
317 | CEPH_OSD_CMPXATTR_OP_NOP = 0, | ||
318 | CEPH_OSD_CMPXATTR_OP_EQ = 1, | ||
319 | CEPH_OSD_CMPXATTR_OP_NE = 2, | ||
320 | CEPH_OSD_CMPXATTR_OP_GT = 3, | ||
321 | CEPH_OSD_CMPXATTR_OP_GTE = 4, | ||
322 | CEPH_OSD_CMPXATTR_OP_LT = 5, | ||
323 | CEPH_OSD_CMPXATTR_OP_LTE = 6 | ||
324 | }; | ||
325 | |||
326 | enum { | ||
327 | CEPH_OSD_CMPXATTR_MODE_STRING = 1, | ||
328 | CEPH_OSD_CMPXATTR_MODE_U64 = 2 | ||
329 | }; | ||
330 | |||
331 | /* | ||
332 | * an individual object operation. each may be accompanied by some data | ||
333 | * payload | ||
334 | */ | ||
335 | struct ceph_osd_op { | ||
336 | __le16 op; /* CEPH_OSD_OP_* */ | ||
337 | __le32 flags; /* CEPH_OSD_FLAG_* */ | ||
338 | union { | ||
339 | struct { | ||
340 | __le64 offset, length; | ||
341 | __le64 truncate_size; | ||
342 | __le32 truncate_seq; | ||
343 | } __attribute__ ((packed)) extent; | ||
344 | struct { | ||
345 | __le32 name_len; | ||
346 | __le32 value_len; | ||
347 | __u8 cmp_op; /* CEPH_OSD_CMPXATTR_OP_* */ | ||
348 | __u8 cmp_mode; /* CEPH_OSD_CMPXATTR_MODE_* */ | ||
349 | } __attribute__ ((packed)) xattr; | ||
350 | struct { | ||
351 | __u8 class_len; | ||
352 | __u8 method_len; | ||
353 | __u8 argc; | ||
354 | __le32 indata_len; | ||
355 | } __attribute__ ((packed)) cls; | ||
356 | struct { | ||
357 | __le64 cookie, count; | ||
358 | } __attribute__ ((packed)) pgls; | ||
359 | struct { | ||
360 | __le64 snapid; | ||
361 | } __attribute__ ((packed)) snap; | ||
362 | }; | ||
363 | __le32 payload_len; | ||
364 | } __attribute__ ((packed)); | ||
365 | |||
366 | /* | ||
367 | * osd request message header. each request may include multiple | ||
368 | * ceph_osd_op object operations. | ||
369 | */ | ||
370 | struct ceph_osd_request_head { | ||
371 | __le32 client_inc; /* client incarnation */ | ||
372 | struct ceph_object_layout layout; /* pgid */ | ||
373 | __le32 osdmap_epoch; /* client's osdmap epoch */ | ||
374 | |||
375 | __le32 flags; | ||
376 | |||
377 | struct ceph_timespec mtime; /* for mutations only */ | ||
378 | struct ceph_eversion reassert_version; /* if we are replaying op */ | ||
379 | |||
380 | __le32 object_len; /* length of object name */ | ||
381 | |||
382 | __le64 snapid; /* snapid to read */ | ||
383 | __le64 snap_seq; /* writer's snap context */ | ||
384 | __le32 num_snaps; | ||
385 | |||
386 | __le16 num_ops; | ||
387 | struct ceph_osd_op ops[]; /* followed by ops[], obj, ticket, snaps */ | ||
388 | } __attribute__ ((packed)); | ||
389 | |||
390 | struct ceph_osd_reply_head { | ||
391 | __le32 client_inc; /* client incarnation */ | ||
392 | __le32 flags; | ||
393 | struct ceph_object_layout layout; | ||
394 | __le32 osdmap_epoch; | ||
395 | struct ceph_eversion reassert_version; /* for replaying uncommitted */ | ||
396 | |||
397 | __le32 result; /* result code */ | ||
398 | |||
399 | __le32 object_len; /* length of object name */ | ||
400 | __le32 num_ops; | ||
401 | struct ceph_osd_op ops[0]; /* ops[], object */ | ||
402 | } __attribute__ ((packed)); | ||
403 | |||
404 | |||
405 | #endif | ||
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 4868b9dcac5a..39c243acd062 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c | |||
@@ -1,10 +1,12 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | 2 | ||
3 | #include <linux/sort.h> | 3 | #include <linux/sort.h> |
4 | #include <linux/slab.h> | 4 | #include <linux/slab.h> |
5 | 5 | ||
6 | #include "super.h" | 6 | #include "super.h" |
7 | #include "decode.h" | 7 | #include "mds_client.h" |
8 | |||
9 | #include <linux/ceph/decode.h> | ||
8 | 10 | ||
9 | /* | 11 | /* |
10 | * Snapshots in ceph are driven in large part by cooperation from the | 12 | * Snapshots in ceph are driven in large part by cooperation from the |
@@ -119,6 +121,7 @@ static struct ceph_snap_realm *ceph_create_snap_realm( | |||
119 | INIT_LIST_HEAD(&realm->children); | 121 | INIT_LIST_HEAD(&realm->children); |
120 | INIT_LIST_HEAD(&realm->child_item); | 122 | INIT_LIST_HEAD(&realm->child_item); |
121 | INIT_LIST_HEAD(&realm->empty_item); | 123 | INIT_LIST_HEAD(&realm->empty_item); |
124 | INIT_LIST_HEAD(&realm->dirty_item); | ||
122 | INIT_LIST_HEAD(&realm->inodes_with_caps); | 125 | INIT_LIST_HEAD(&realm->inodes_with_caps); |
123 | spin_lock_init(&realm->inodes_with_caps_lock); | 126 | spin_lock_init(&realm->inodes_with_caps_lock); |
124 | __insert_snap_realm(&mdsc->snap_realms, realm); | 127 | __insert_snap_realm(&mdsc->snap_realms, realm); |
@@ -467,7 +470,7 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) | |||
467 | INIT_LIST_HEAD(&capsnap->ci_item); | 470 | INIT_LIST_HEAD(&capsnap->ci_item); |
468 | INIT_LIST_HEAD(&capsnap->flushing_item); | 471 | INIT_LIST_HEAD(&capsnap->flushing_item); |
469 | 472 | ||
470 | capsnap->follows = snapc->seq - 1; | 473 | capsnap->follows = snapc->seq; |
471 | capsnap->issued = __ceph_caps_issued(ci, NULL); | 474 | capsnap->issued = __ceph_caps_issued(ci, NULL); |
472 | capsnap->dirty = dirty; | 475 | capsnap->dirty = dirty; |
473 | 476 | ||
@@ -525,7 +528,7 @@ int __ceph_finish_cap_snap(struct ceph_inode_info *ci, | |||
525 | struct ceph_cap_snap *capsnap) | 528 | struct ceph_cap_snap *capsnap) |
526 | { | 529 | { |
527 | struct inode *inode = &ci->vfs_inode; | 530 | struct inode *inode = &ci->vfs_inode; |
528 | struct ceph_mds_client *mdsc = &ceph_sb_to_client(inode->i_sb)->mdsc; | 531 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
529 | 532 | ||
530 | BUG_ON(capsnap->writing); | 533 | BUG_ON(capsnap->writing); |
531 | capsnap->size = inode->i_size; | 534 | capsnap->size = inode->i_size; |
@@ -604,6 +607,7 @@ int ceph_update_snap_trace(struct ceph_mds_client *mdsc, | |||
604 | struct ceph_snap_realm *realm; | 607 | struct ceph_snap_realm *realm; |
605 | int invalidate = 0; | 608 | int invalidate = 0; |
606 | int err = -ENOMEM; | 609 | int err = -ENOMEM; |
610 | LIST_HEAD(dirty_realms); | ||
607 | 611 | ||
608 | dout("update_snap_trace deletion=%d\n", deletion); | 612 | dout("update_snap_trace deletion=%d\n", deletion); |
609 | more: | 613 | more: |
@@ -626,24 +630,6 @@ more: | |||
626 | } | 630 | } |
627 | } | 631 | } |
628 | 632 | ||
629 | if (le64_to_cpu(ri->seq) > realm->seq) { | ||
630 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
631 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
632 | /* | ||
633 | * if the realm seq has changed, queue a cap_snap for every | ||
634 | * inode with open caps. we do this _before_ we update | ||
635 | * the realm info so that we prepare for writeback under the | ||
636 | * _previous_ snap context. | ||
637 | * | ||
638 | * ...unless it's a snap deletion! | ||
639 | */ | ||
640 | if (!deletion) | ||
641 | queue_realm_cap_snaps(realm); | ||
642 | } else { | ||
643 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
644 | realm->ino, realm, realm->seq); | ||
645 | } | ||
646 | |||
647 | /* ensure the parent is correct */ | 633 | /* ensure the parent is correct */ |
648 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); | 634 | err = adjust_snap_realm_parent(mdsc, realm, le64_to_cpu(ri->parent)); |
649 | if (err < 0) | 635 | if (err < 0) |
@@ -651,6 +637,8 @@ more: | |||
651 | invalidate += err; | 637 | invalidate += err; |
652 | 638 | ||
653 | if (le64_to_cpu(ri->seq) > realm->seq) { | 639 | if (le64_to_cpu(ri->seq) > realm->seq) { |
640 | dout("update_snap_trace updating %llx %p %lld -> %lld\n", | ||
641 | realm->ino, realm, realm->seq, le64_to_cpu(ri->seq)); | ||
654 | /* update realm parameters, snap lists */ | 642 | /* update realm parameters, snap lists */ |
655 | realm->seq = le64_to_cpu(ri->seq); | 643 | realm->seq = le64_to_cpu(ri->seq); |
656 | realm->created = le64_to_cpu(ri->created); | 644 | realm->created = le64_to_cpu(ri->created); |
@@ -668,9 +656,17 @@ more: | |||
668 | if (err < 0) | 656 | if (err < 0) |
669 | goto fail; | 657 | goto fail; |
670 | 658 | ||
659 | /* queue realm for cap_snap creation */ | ||
660 | list_add(&realm->dirty_item, &dirty_realms); | ||
661 | |||
671 | invalidate = 1; | 662 | invalidate = 1; |
672 | } else if (!realm->cached_context) { | 663 | } else if (!realm->cached_context) { |
664 | dout("update_snap_trace %llx %p seq %lld new\n", | ||
665 | realm->ino, realm, realm->seq); | ||
673 | invalidate = 1; | 666 | invalidate = 1; |
667 | } else { | ||
668 | dout("update_snap_trace %llx %p seq %lld unchanged\n", | ||
669 | realm->ino, realm, realm->seq); | ||
674 | } | 670 | } |
675 | 671 | ||
676 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, | 672 | dout("done with %llx %p, invalidated=%d, %p %p\n", realm->ino, |
@@ -683,6 +679,14 @@ more: | |||
683 | if (invalidate) | 679 | if (invalidate) |
684 | rebuild_snap_realms(realm); | 680 | rebuild_snap_realms(realm); |
685 | 681 | ||
682 | /* | ||
683 | * queue cap snaps _after_ we've built the new snap contexts, | ||
684 | * so that i_head_snapc can be set appropriately. | ||
685 | */ | ||
686 | list_for_each_entry(realm, &dirty_realms, dirty_item) { | ||
687 | queue_realm_cap_snaps(realm); | ||
688 | } | ||
689 | |||
686 | __cleanup_empty_realms(mdsc); | 690 | __cleanup_empty_realms(mdsc); |
687 | return 0; | 691 | return 0; |
688 | 692 | ||
@@ -715,7 +719,7 @@ static void flush_snaps(struct ceph_mds_client *mdsc) | |||
715 | igrab(inode); | 719 | igrab(inode); |
716 | spin_unlock(&mdsc->snap_flush_lock); | 720 | spin_unlock(&mdsc->snap_flush_lock); |
717 | spin_lock(&inode->i_lock); | 721 | spin_lock(&inode->i_lock); |
718 | __ceph_flush_snaps(ci, &session); | 722 | __ceph_flush_snaps(ci, &session, 0); |
719 | spin_unlock(&inode->i_lock); | 723 | spin_unlock(&inode->i_lock); |
720 | iput(inode); | 724 | iput(inode); |
721 | spin_lock(&mdsc->snap_flush_lock); | 725 | spin_lock(&mdsc->snap_flush_lock); |
@@ -745,7 +749,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
745 | struct ceph_mds_session *session, | 749 | struct ceph_mds_session *session, |
746 | struct ceph_msg *msg) | 750 | struct ceph_msg *msg) |
747 | { | 751 | { |
748 | struct super_block *sb = mdsc->client->sb; | 752 | struct super_block *sb = mdsc->fsc->sb; |
749 | int mds = session->s_mds; | 753 | int mds = session->s_mds; |
750 | u64 split; | 754 | u64 split; |
751 | int op; | 755 | int op; |
@@ -816,6 +820,7 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
816 | }; | 820 | }; |
817 | struct inode *inode = ceph_find_inode(sb, vino); | 821 | struct inode *inode = ceph_find_inode(sb, vino); |
818 | struct ceph_inode_info *ci; | 822 | struct ceph_inode_info *ci; |
823 | struct ceph_snap_realm *oldrealm; | ||
819 | 824 | ||
820 | if (!inode) | 825 | if (!inode) |
821 | continue; | 826 | continue; |
@@ -841,18 +846,19 @@ void ceph_handle_snap(struct ceph_mds_client *mdsc, | |||
841 | dout(" will move %p to split realm %llx %p\n", | 846 | dout(" will move %p to split realm %llx %p\n", |
842 | inode, realm->ino, realm); | 847 | inode, realm->ino, realm); |
843 | /* | 848 | /* |
844 | * Remove the inode from the realm's inode | 849 | * Move the inode to the new realm |
845 | * list, but don't add it to the new realm | ||
846 | * yet. We don't want the cap_snap to be | ||
847 | * queued (again) by ceph_update_snap_trace() | ||
848 | * below. Queue it _now_, under the old context. | ||
849 | */ | 850 | */ |
850 | spin_lock(&realm->inodes_with_caps_lock); | 851 | spin_lock(&realm->inodes_with_caps_lock); |
851 | list_del_init(&ci->i_snap_realm_item); | 852 | list_del_init(&ci->i_snap_realm_item); |
853 | list_add(&ci->i_snap_realm_item, | ||
854 | &realm->inodes_with_caps); | ||
855 | oldrealm = ci->i_snap_realm; | ||
856 | ci->i_snap_realm = realm; | ||
852 | spin_unlock(&realm->inodes_with_caps_lock); | 857 | spin_unlock(&realm->inodes_with_caps_lock); |
853 | spin_unlock(&inode->i_lock); | 858 | spin_unlock(&inode->i_lock); |
854 | 859 | ||
855 | ceph_queue_cap_snap(ci); | 860 | ceph_get_snap_realm(mdsc, realm); |
861 | ceph_put_snap_realm(mdsc, oldrealm); | ||
856 | 862 | ||
857 | iput(inode); | 863 | iput(inode); |
858 | continue; | 864 | continue; |
@@ -880,43 +886,9 @@ skip_inode: | |||
880 | ceph_update_snap_trace(mdsc, p, e, | 886 | ceph_update_snap_trace(mdsc, p, e, |
881 | op == CEPH_SNAP_OP_DESTROY); | 887 | op == CEPH_SNAP_OP_DESTROY); |
882 | 888 | ||
883 | if (op == CEPH_SNAP_OP_SPLIT) { | 889 | if (op == CEPH_SNAP_OP_SPLIT) |
884 | /* | ||
885 | * ok, _now_ add the inodes into the new realm. | ||
886 | */ | ||
887 | for (i = 0; i < num_split_inos; i++) { | ||
888 | struct ceph_vino vino = { | ||
889 | .ino = le64_to_cpu(split_inos[i]), | ||
890 | .snap = CEPH_NOSNAP, | ||
891 | }; | ||
892 | struct inode *inode = ceph_find_inode(sb, vino); | ||
893 | struct ceph_inode_info *ci; | ||
894 | |||
895 | if (!inode) | ||
896 | continue; | ||
897 | ci = ceph_inode(inode); | ||
898 | spin_lock(&inode->i_lock); | ||
899 | if (list_empty(&ci->i_snap_realm_item)) { | ||
900 | struct ceph_snap_realm *oldrealm = | ||
901 | ci->i_snap_realm; | ||
902 | |||
903 | dout(" moving %p to split realm %llx %p\n", | ||
904 | inode, realm->ino, realm); | ||
905 | spin_lock(&realm->inodes_with_caps_lock); | ||
906 | list_add(&ci->i_snap_realm_item, | ||
907 | &realm->inodes_with_caps); | ||
908 | ci->i_snap_realm = realm; | ||
909 | spin_unlock(&realm->inodes_with_caps_lock); | ||
910 | ceph_get_snap_realm(mdsc, realm); | ||
911 | ceph_put_snap_realm(mdsc, oldrealm); | ||
912 | } | ||
913 | spin_unlock(&inode->i_lock); | ||
914 | iput(inode); | ||
915 | } | ||
916 | |||
917 | /* we took a reference when we created the realm, above */ | 890 | /* we took a reference when we created the realm, above */ |
918 | ceph_put_snap_realm(mdsc, realm); | 891 | ceph_put_snap_realm(mdsc, realm); |
919 | } | ||
920 | 892 | ||
921 | __cleanup_empty_realms(mdsc); | 893 | __cleanup_empty_realms(mdsc); |
922 | 894 | ||
diff --git a/fs/ceph/ceph_strings.c b/fs/ceph/strings.c index c6179d3a26a2..cd5097d7c804 100644 --- a/fs/ceph/ceph_strings.c +++ b/fs/ceph/strings.c | |||
@@ -1,71 +1,9 @@ | |||
1 | /* | 1 | /* |
2 | * Ceph string constants | 2 | * Ceph fs string constants |
3 | */ | 3 | */ |
4 | #include "types.h" | 4 | #include <linux/module.h> |
5 | #include <linux/ceph/types.h> | ||
5 | 6 | ||
6 | const char *ceph_entity_type_name(int type) | ||
7 | { | ||
8 | switch (type) { | ||
9 | case CEPH_ENTITY_TYPE_MDS: return "mds"; | ||
10 | case CEPH_ENTITY_TYPE_OSD: return "osd"; | ||
11 | case CEPH_ENTITY_TYPE_MON: return "mon"; | ||
12 | case CEPH_ENTITY_TYPE_CLIENT: return "client"; | ||
13 | case CEPH_ENTITY_TYPE_AUTH: return "auth"; | ||
14 | default: return "unknown"; | ||
15 | } | ||
16 | } | ||
17 | |||
18 | const char *ceph_osd_op_name(int op) | ||
19 | { | ||
20 | switch (op) { | ||
21 | case CEPH_OSD_OP_READ: return "read"; | ||
22 | case CEPH_OSD_OP_STAT: return "stat"; | ||
23 | |||
24 | case CEPH_OSD_OP_MASKTRUNC: return "masktrunc"; | ||
25 | |||
26 | case CEPH_OSD_OP_WRITE: return "write"; | ||
27 | case CEPH_OSD_OP_DELETE: return "delete"; | ||
28 | case CEPH_OSD_OP_TRUNCATE: return "truncate"; | ||
29 | case CEPH_OSD_OP_ZERO: return "zero"; | ||
30 | case CEPH_OSD_OP_WRITEFULL: return "writefull"; | ||
31 | case CEPH_OSD_OP_ROLLBACK: return "rollback"; | ||
32 | |||
33 | case CEPH_OSD_OP_APPEND: return "append"; | ||
34 | case CEPH_OSD_OP_STARTSYNC: return "startsync"; | ||
35 | case CEPH_OSD_OP_SETTRUNC: return "settrunc"; | ||
36 | case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc"; | ||
37 | |||
38 | case CEPH_OSD_OP_TMAPUP: return "tmapup"; | ||
39 | case CEPH_OSD_OP_TMAPGET: return "tmapget"; | ||
40 | case CEPH_OSD_OP_TMAPPUT: return "tmapput"; | ||
41 | |||
42 | case CEPH_OSD_OP_GETXATTR: return "getxattr"; | ||
43 | case CEPH_OSD_OP_GETXATTRS: return "getxattrs"; | ||
44 | case CEPH_OSD_OP_SETXATTR: return "setxattr"; | ||
45 | case CEPH_OSD_OP_SETXATTRS: return "setxattrs"; | ||
46 | case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs"; | ||
47 | case CEPH_OSD_OP_RMXATTR: return "rmxattr"; | ||
48 | case CEPH_OSD_OP_CMPXATTR: return "cmpxattr"; | ||
49 | |||
50 | case CEPH_OSD_OP_PULL: return "pull"; | ||
51 | case CEPH_OSD_OP_PUSH: return "push"; | ||
52 | case CEPH_OSD_OP_BALANCEREADS: return "balance-reads"; | ||
53 | case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads"; | ||
54 | case CEPH_OSD_OP_SCRUB: return "scrub"; | ||
55 | |||
56 | case CEPH_OSD_OP_WRLOCK: return "wrlock"; | ||
57 | case CEPH_OSD_OP_WRUNLOCK: return "wrunlock"; | ||
58 | case CEPH_OSD_OP_RDLOCK: return "rdlock"; | ||
59 | case CEPH_OSD_OP_RDUNLOCK: return "rdunlock"; | ||
60 | case CEPH_OSD_OP_UPLOCK: return "uplock"; | ||
61 | case CEPH_OSD_OP_DNLOCK: return "dnlock"; | ||
62 | |||
63 | case CEPH_OSD_OP_CALL: return "call"; | ||
64 | |||
65 | case CEPH_OSD_OP_PGLS: return "pgls"; | ||
66 | } | ||
67 | return "???"; | ||
68 | } | ||
69 | 7 | ||
70 | const char *ceph_mds_state_name(int s) | 8 | const char *ceph_mds_state_name(int s) |
71 | { | 9 | { |
@@ -177,17 +115,3 @@ const char *ceph_snap_op_name(int o) | |||
177 | } | 115 | } |
178 | return "???"; | 116 | return "???"; |
179 | } | 117 | } |
180 | |||
181 | const char *ceph_pool_op_name(int op) | ||
182 | { | ||
183 | switch (op) { | ||
184 | case POOL_OP_CREATE: return "create"; | ||
185 | case POOL_OP_DELETE: return "delete"; | ||
186 | case POOL_OP_AUID_CHANGE: return "auid change"; | ||
187 | case POOL_OP_CREATE_SNAP: return "create snap"; | ||
188 | case POOL_OP_DELETE_SNAP: return "delete snap"; | ||
189 | case POOL_OP_CREATE_UNMANAGED_SNAP: return "create unmanaged snap"; | ||
190 | case POOL_OP_DELETE_UNMANAGED_SNAP: return "delete unmanaged snap"; | ||
191 | } | ||
192 | return "???"; | ||
193 | } | ||
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 9922628532b2..d6e0e0421891 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -1,5 +1,5 @@ | |||
1 | 1 | ||
2 | #include "ceph_debug.h" | 2 | #include <linux/ceph/ceph_debug.h> |
3 | 3 | ||
4 | #include <linux/backing-dev.h> | 4 | #include <linux/backing-dev.h> |
5 | #include <linux/ctype.h> | 5 | #include <linux/ctype.h> |
@@ -15,10 +15,13 @@ | |||
15 | #include <linux/statfs.h> | 15 | #include <linux/statfs.h> |
16 | #include <linux/string.h> | 16 | #include <linux/string.h> |
17 | 17 | ||
18 | #include "decode.h" | ||
19 | #include "super.h" | 18 | #include "super.h" |
20 | #include "mon_client.h" | 19 | #include "mds_client.h" |
21 | #include "auth.h" | 20 | |
21 | #include <linux/ceph/decode.h> | ||
22 | #include <linux/ceph/mon_client.h> | ||
23 | #include <linux/ceph/auth.h> | ||
24 | #include <linux/ceph/debugfs.h> | ||
22 | 25 | ||
23 | /* | 26 | /* |
24 | * Ceph superblock operations | 27 | * Ceph superblock operations |
@@ -26,36 +29,22 @@ | |||
26 | * Handle the basics of mounting, unmounting. | 29 | * Handle the basics of mounting, unmounting. |
27 | */ | 30 | */ |
28 | 31 | ||
29 | |||
30 | /* | ||
31 | * find filename portion of a path (/foo/bar/baz -> baz) | ||
32 | */ | ||
33 | const char *ceph_file_part(const char *s, int len) | ||
34 | { | ||
35 | const char *e = s + len; | ||
36 | |||
37 | while (e != s && *(e-1) != '/') | ||
38 | e--; | ||
39 | return e; | ||
40 | } | ||
41 | |||
42 | |||
43 | /* | 32 | /* |
44 | * super ops | 33 | * super ops |
45 | */ | 34 | */ |
46 | static void ceph_put_super(struct super_block *s) | 35 | static void ceph_put_super(struct super_block *s) |
47 | { | 36 | { |
48 | struct ceph_client *client = ceph_sb_to_client(s); | 37 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
49 | 38 | ||
50 | dout("put_super\n"); | 39 | dout("put_super\n"); |
51 | ceph_mdsc_close_sessions(&client->mdsc); | 40 | ceph_mdsc_close_sessions(fsc->mdsc); |
52 | 41 | ||
53 | /* | 42 | /* |
54 | * ensure we release the bdi before put_anon_super releases | 43 | * ensure we release the bdi before put_anon_super releases |
55 | * the device name. | 44 | * the device name. |
56 | */ | 45 | */ |
57 | if (s->s_bdi == &client->backing_dev_info) { | 46 | if (s->s_bdi == &fsc->backing_dev_info) { |
58 | bdi_unregister(&client->backing_dev_info); | 47 | bdi_unregister(&fsc->backing_dev_info); |
59 | s->s_bdi = NULL; | 48 | s->s_bdi = NULL; |
60 | } | 49 | } |
61 | 50 | ||
@@ -64,14 +53,14 @@ static void ceph_put_super(struct super_block *s) | |||
64 | 53 | ||
65 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | 54 | static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) |
66 | { | 55 | { |
67 | struct ceph_client *client = ceph_inode_to_client(dentry->d_inode); | 56 | struct ceph_fs_client *fsc = ceph_inode_to_client(dentry->d_inode); |
68 | struct ceph_monmap *monmap = client->monc.monmap; | 57 | struct ceph_monmap *monmap = fsc->client->monc.monmap; |
69 | struct ceph_statfs st; | 58 | struct ceph_statfs st; |
70 | u64 fsid; | 59 | u64 fsid; |
71 | int err; | 60 | int err; |
72 | 61 | ||
73 | dout("statfs\n"); | 62 | dout("statfs\n"); |
74 | err = ceph_monc_do_statfs(&client->monc, &st); | 63 | err = ceph_monc_do_statfs(&fsc->client->monc, &st); |
75 | if (err < 0) | 64 | if (err < 0) |
76 | return err; | 65 | return err; |
77 | 66 | ||
@@ -104,238 +93,28 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
104 | 93 | ||
105 | static int ceph_sync_fs(struct super_block *sb, int wait) | 94 | static int ceph_sync_fs(struct super_block *sb, int wait) |
106 | { | 95 | { |
107 | struct ceph_client *client = ceph_sb_to_client(sb); | 96 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
108 | 97 | ||
109 | if (!wait) { | 98 | if (!wait) { |
110 | dout("sync_fs (non-blocking)\n"); | 99 | dout("sync_fs (non-blocking)\n"); |
111 | ceph_flush_dirty_caps(&client->mdsc); | 100 | ceph_flush_dirty_caps(fsc->mdsc); |
112 | dout("sync_fs (non-blocking) done\n"); | 101 | dout("sync_fs (non-blocking) done\n"); |
113 | return 0; | 102 | return 0; |
114 | } | 103 | } |
115 | 104 | ||
116 | dout("sync_fs (blocking)\n"); | 105 | dout("sync_fs (blocking)\n"); |
117 | ceph_osdc_sync(&ceph_sb_to_client(sb)->osdc); | 106 | ceph_osdc_sync(&fsc->client->osdc); |
118 | ceph_mdsc_sync(&ceph_sb_to_client(sb)->mdsc); | 107 | ceph_mdsc_sync(fsc->mdsc); |
119 | dout("sync_fs (blocking) done\n"); | 108 | dout("sync_fs (blocking) done\n"); |
120 | return 0; | 109 | return 0; |
121 | } | 110 | } |
122 | 111 | ||
123 | static int default_congestion_kb(void) | ||
124 | { | ||
125 | int congestion_kb; | ||
126 | |||
127 | /* | ||
128 | * Copied from NFS | ||
129 | * | ||
130 | * congestion size, scale with available memory. | ||
131 | * | ||
132 | * 64MB: 8192k | ||
133 | * 128MB: 11585k | ||
134 | * 256MB: 16384k | ||
135 | * 512MB: 23170k | ||
136 | * 1GB: 32768k | ||
137 | * 2GB: 46340k | ||
138 | * 4GB: 65536k | ||
139 | * 8GB: 92681k | ||
140 | * 16GB: 131072k | ||
141 | * | ||
142 | * This allows larger machines to have larger/more transfers. | ||
143 | * Limit the default to 256M | ||
144 | */ | ||
145 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
146 | if (congestion_kb > 256*1024) | ||
147 | congestion_kb = 256*1024; | ||
148 | |||
149 | return congestion_kb; | ||
150 | } | ||
151 | |||
152 | /** | ||
153 | * ceph_show_options - Show mount options in /proc/mounts | ||
154 | * @m: seq_file to write to | ||
155 | * @mnt: mount descriptor | ||
156 | */ | ||
157 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
158 | { | ||
159 | struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb); | ||
160 | struct ceph_mount_args *args = client->mount_args; | ||
161 | |||
162 | if (args->flags & CEPH_OPT_FSID) | ||
163 | seq_printf(m, ",fsid=%pU", &args->fsid); | ||
164 | if (args->flags & CEPH_OPT_NOSHARE) | ||
165 | seq_puts(m, ",noshare"); | ||
166 | if (args->flags & CEPH_OPT_DIRSTAT) | ||
167 | seq_puts(m, ",dirstat"); | ||
168 | if ((args->flags & CEPH_OPT_RBYTES) == 0) | ||
169 | seq_puts(m, ",norbytes"); | ||
170 | if (args->flags & CEPH_OPT_NOCRC) | ||
171 | seq_puts(m, ",nocrc"); | ||
172 | if (args->flags & CEPH_OPT_NOASYNCREADDIR) | ||
173 | seq_puts(m, ",noasyncreaddir"); | ||
174 | |||
175 | if (args->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
176 | seq_printf(m, ",mount_timeout=%d", args->mount_timeout); | ||
177 | if (args->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
178 | seq_printf(m, ",osd_idle_ttl=%d", args->osd_idle_ttl); | ||
179 | if (args->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
180 | seq_printf(m, ",osdtimeout=%d", args->osd_timeout); | ||
181 | if (args->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
182 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
183 | args->osd_keepalive_timeout); | ||
184 | if (args->wsize) | ||
185 | seq_printf(m, ",wsize=%d", args->wsize); | ||
186 | if (args->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
187 | seq_printf(m, ",rsize=%d", args->rsize); | ||
188 | if (args->congestion_kb != default_congestion_kb()) | ||
189 | seq_printf(m, ",write_congestion_kb=%d", args->congestion_kb); | ||
190 | if (args->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
191 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
192 | args->caps_wanted_delay_min); | ||
193 | if (args->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
194 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
195 | args->caps_wanted_delay_max); | ||
196 | if (args->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
197 | seq_printf(m, ",cap_release_safety=%d", | ||
198 | args->cap_release_safety); | ||
199 | if (args->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
200 | seq_printf(m, ",readdir_max_entries=%d", args->max_readdir); | ||
201 | if (args->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
202 | seq_printf(m, ",readdir_max_bytes=%d", args->max_readdir_bytes); | ||
203 | if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
204 | seq_printf(m, ",snapdirname=%s", args->snapdir_name); | ||
205 | if (args->name) | ||
206 | seq_printf(m, ",name=%s", args->name); | ||
207 | if (args->secret) | ||
208 | seq_puts(m, ",secret=<hidden>"); | ||
209 | return 0; | ||
210 | } | ||
211 | |||
212 | /* | ||
213 | * caches | ||
214 | */ | ||
215 | struct kmem_cache *ceph_inode_cachep; | ||
216 | struct kmem_cache *ceph_cap_cachep; | ||
217 | struct kmem_cache *ceph_dentry_cachep; | ||
218 | struct kmem_cache *ceph_file_cachep; | ||
219 | |||
220 | static void ceph_inode_init_once(void *foo) | ||
221 | { | ||
222 | struct ceph_inode_info *ci = foo; | ||
223 | inode_init_once(&ci->vfs_inode); | ||
224 | } | ||
225 | |||
226 | static int __init init_caches(void) | ||
227 | { | ||
228 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", | ||
229 | sizeof(struct ceph_inode_info), | ||
230 | __alignof__(struct ceph_inode_info), | ||
231 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), | ||
232 | ceph_inode_init_once); | ||
233 | if (ceph_inode_cachep == NULL) | ||
234 | return -ENOMEM; | ||
235 | |||
236 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
237 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
238 | if (ceph_cap_cachep == NULL) | ||
239 | goto bad_cap; | ||
240 | |||
241 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
242 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
243 | if (ceph_dentry_cachep == NULL) | ||
244 | goto bad_dentry; | ||
245 | |||
246 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
247 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
248 | if (ceph_file_cachep == NULL) | ||
249 | goto bad_file; | ||
250 | |||
251 | return 0; | ||
252 | |||
253 | bad_file: | ||
254 | kmem_cache_destroy(ceph_dentry_cachep); | ||
255 | bad_dentry: | ||
256 | kmem_cache_destroy(ceph_cap_cachep); | ||
257 | bad_cap: | ||
258 | kmem_cache_destroy(ceph_inode_cachep); | ||
259 | return -ENOMEM; | ||
260 | } | ||
261 | |||
262 | static void destroy_caches(void) | ||
263 | { | ||
264 | kmem_cache_destroy(ceph_inode_cachep); | ||
265 | kmem_cache_destroy(ceph_cap_cachep); | ||
266 | kmem_cache_destroy(ceph_dentry_cachep); | ||
267 | kmem_cache_destroy(ceph_file_cachep); | ||
268 | } | ||
269 | |||
270 | |||
271 | /* | ||
272 | * ceph_umount_begin - initiate forced umount. Tear down down the | ||
273 | * mount, skipping steps that may hang while waiting for server(s). | ||
274 | */ | ||
275 | static void ceph_umount_begin(struct super_block *sb) | ||
276 | { | ||
277 | struct ceph_client *client = ceph_sb_to_client(sb); | ||
278 | |||
279 | dout("ceph_umount_begin - starting forced umount\n"); | ||
280 | if (!client) | ||
281 | return; | ||
282 | client->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
283 | return; | ||
284 | } | ||
285 | |||
286 | static const struct super_operations ceph_super_ops = { | ||
287 | .alloc_inode = ceph_alloc_inode, | ||
288 | .destroy_inode = ceph_destroy_inode, | ||
289 | .write_inode = ceph_write_inode, | ||
290 | .sync_fs = ceph_sync_fs, | ||
291 | .put_super = ceph_put_super, | ||
292 | .show_options = ceph_show_options, | ||
293 | .statfs = ceph_statfs, | ||
294 | .umount_begin = ceph_umount_begin, | ||
295 | }; | ||
296 | |||
297 | |||
298 | const char *ceph_msg_type_name(int type) | ||
299 | { | ||
300 | switch (type) { | ||
301 | case CEPH_MSG_SHUTDOWN: return "shutdown"; | ||
302 | case CEPH_MSG_PING: return "ping"; | ||
303 | case CEPH_MSG_AUTH: return "auth"; | ||
304 | case CEPH_MSG_AUTH_REPLY: return "auth_reply"; | ||
305 | case CEPH_MSG_MON_MAP: return "mon_map"; | ||
306 | case CEPH_MSG_MON_GET_MAP: return "mon_get_map"; | ||
307 | case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe"; | ||
308 | case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack"; | ||
309 | case CEPH_MSG_STATFS: return "statfs"; | ||
310 | case CEPH_MSG_STATFS_REPLY: return "statfs_reply"; | ||
311 | case CEPH_MSG_MDS_MAP: return "mds_map"; | ||
312 | case CEPH_MSG_CLIENT_SESSION: return "client_session"; | ||
313 | case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect"; | ||
314 | case CEPH_MSG_CLIENT_REQUEST: return "client_request"; | ||
315 | case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward"; | ||
316 | case CEPH_MSG_CLIENT_REPLY: return "client_reply"; | ||
317 | case CEPH_MSG_CLIENT_CAPS: return "client_caps"; | ||
318 | case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release"; | ||
319 | case CEPH_MSG_CLIENT_SNAP: return "client_snap"; | ||
320 | case CEPH_MSG_CLIENT_LEASE: return "client_lease"; | ||
321 | case CEPH_MSG_OSD_MAP: return "osd_map"; | ||
322 | case CEPH_MSG_OSD_OP: return "osd_op"; | ||
323 | case CEPH_MSG_OSD_OPREPLY: return "osd_opreply"; | ||
324 | default: return "unknown"; | ||
325 | } | ||
326 | } | ||
327 | |||
328 | |||
329 | /* | 112 | /* |
330 | * mount options | 113 | * mount options |
331 | */ | 114 | */ |
332 | enum { | 115 | enum { |
333 | Opt_wsize, | 116 | Opt_wsize, |
334 | Opt_rsize, | 117 | Opt_rsize, |
335 | Opt_osdtimeout, | ||
336 | Opt_osdkeepalivetimeout, | ||
337 | Opt_mount_timeout, | ||
338 | Opt_osd_idle_ttl, | ||
339 | Opt_caps_wanted_delay_min, | 118 | Opt_caps_wanted_delay_min, |
340 | Opt_caps_wanted_delay_max, | 119 | Opt_caps_wanted_delay_max, |
341 | Opt_cap_release_safety, | 120 | Opt_cap_release_safety, |
@@ -344,29 +123,19 @@ enum { | |||
344 | Opt_congestion_kb, | 123 | Opt_congestion_kb, |
345 | Opt_last_int, | 124 | Opt_last_int, |
346 | /* int args above */ | 125 | /* int args above */ |
347 | Opt_fsid, | ||
348 | Opt_snapdirname, | 126 | Opt_snapdirname, |
349 | Opt_name, | ||
350 | Opt_secret, | ||
351 | Opt_last_string, | 127 | Opt_last_string, |
352 | /* string args above */ | 128 | /* string args above */ |
353 | Opt_ip, | ||
354 | Opt_noshare, | ||
355 | Opt_dirstat, | 129 | Opt_dirstat, |
356 | Opt_nodirstat, | 130 | Opt_nodirstat, |
357 | Opt_rbytes, | 131 | Opt_rbytes, |
358 | Opt_norbytes, | 132 | Opt_norbytes, |
359 | Opt_nocrc, | ||
360 | Opt_noasyncreaddir, | 133 | Opt_noasyncreaddir, |
361 | }; | 134 | }; |
362 | 135 | ||
363 | static match_table_t arg_tokens = { | 136 | static match_table_t fsopt_tokens = { |
364 | {Opt_wsize, "wsize=%d"}, | 137 | {Opt_wsize, "wsize=%d"}, |
365 | {Opt_rsize, "rsize=%d"}, | 138 | {Opt_rsize, "rsize=%d"}, |
366 | {Opt_osdtimeout, "osdtimeout=%d"}, | ||
367 | {Opt_osdkeepalivetimeout, "osdkeepalive=%d"}, | ||
368 | {Opt_mount_timeout, "mount_timeout=%d"}, | ||
369 | {Opt_osd_idle_ttl, "osd_idle_ttl=%d"}, | ||
370 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, | 139 | {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"}, |
371 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, | 140 | {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"}, |
372 | {Opt_cap_release_safety, "cap_release_safety=%d"}, | 141 | {Opt_cap_release_safety, "cap_release_safety=%d"}, |
@@ -374,403 +143,459 @@ static match_table_t arg_tokens = { | |||
374 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, | 143 | {Opt_readdir_max_bytes, "readdir_max_bytes=%d"}, |
375 | {Opt_congestion_kb, "write_congestion_kb=%d"}, | 144 | {Opt_congestion_kb, "write_congestion_kb=%d"}, |
376 | /* int args above */ | 145 | /* int args above */ |
377 | {Opt_fsid, "fsid=%s"}, | ||
378 | {Opt_snapdirname, "snapdirname=%s"}, | 146 | {Opt_snapdirname, "snapdirname=%s"}, |
379 | {Opt_name, "name=%s"}, | ||
380 | {Opt_secret, "secret=%s"}, | ||
381 | /* string args above */ | 147 | /* string args above */ |
382 | {Opt_ip, "ip=%s"}, | ||
383 | {Opt_noshare, "noshare"}, | ||
384 | {Opt_dirstat, "dirstat"}, | 148 | {Opt_dirstat, "dirstat"}, |
385 | {Opt_nodirstat, "nodirstat"}, | 149 | {Opt_nodirstat, "nodirstat"}, |
386 | {Opt_rbytes, "rbytes"}, | 150 | {Opt_rbytes, "rbytes"}, |
387 | {Opt_norbytes, "norbytes"}, | 151 | {Opt_norbytes, "norbytes"}, |
388 | {Opt_nocrc, "nocrc"}, | ||
389 | {Opt_noasyncreaddir, "noasyncreaddir"}, | 152 | {Opt_noasyncreaddir, "noasyncreaddir"}, |
390 | {-1, NULL} | 153 | {-1, NULL} |
391 | }; | 154 | }; |
392 | 155 | ||
393 | static int parse_fsid(const char *str, struct ceph_fsid *fsid) | 156 | static int parse_fsopt_token(char *c, void *private) |
394 | { | 157 | { |
395 | int i = 0; | 158 | struct ceph_mount_options *fsopt = private; |
396 | char tmp[3]; | 159 | substring_t argstr[MAX_OPT_ARGS]; |
397 | int err = -EINVAL; | 160 | int token, intval, ret; |
398 | int d; | 161 | |
399 | 162 | token = match_token((char *)c, fsopt_tokens, argstr); | |
400 | dout("parse_fsid '%s'\n", str); | 163 | if (token < 0) |
401 | tmp[2] = 0; | 164 | return -EINVAL; |
402 | while (*str && i < 16) { | 165 | |
403 | if (ispunct(*str)) { | 166 | if (token < Opt_last_int) { |
404 | str++; | 167 | ret = match_int(&argstr[0], &intval); |
405 | continue; | 168 | if (ret < 0) { |
169 | pr_err("bad mount option arg (not int) " | ||
170 | "at '%s'\n", c); | ||
171 | return ret; | ||
406 | } | 172 | } |
407 | if (!isxdigit(str[0]) || !isxdigit(str[1])) | 173 | dout("got int token %d val %d\n", token, intval); |
408 | break; | 174 | } else if (token > Opt_last_int && token < Opt_last_string) { |
409 | tmp[0] = str[0]; | 175 | dout("got string token %d val %s\n", token, |
410 | tmp[1] = str[1]; | 176 | argstr[0].from); |
411 | if (sscanf(tmp, "%x", &d) < 1) | 177 | } else { |
412 | break; | 178 | dout("got token %d\n", token); |
413 | fsid->fsid[i] = d & 0xff; | ||
414 | i++; | ||
415 | str += 2; | ||
416 | } | 179 | } |
417 | 180 | ||
418 | if (i == 16) | 181 | switch (token) { |
419 | err = 0; | 182 | case Opt_snapdirname: |
420 | dout("parse_fsid ret %d got fsid %pU", err, fsid); | 183 | kfree(fsopt->snapdir_name); |
421 | return err; | 184 | fsopt->snapdir_name = kstrndup(argstr[0].from, |
185 | argstr[0].to-argstr[0].from, | ||
186 | GFP_KERNEL); | ||
187 | if (!fsopt->snapdir_name) | ||
188 | return -ENOMEM; | ||
189 | break; | ||
190 | |||
191 | /* misc */ | ||
192 | case Opt_wsize: | ||
193 | fsopt->wsize = intval; | ||
194 | break; | ||
195 | case Opt_rsize: | ||
196 | fsopt->rsize = intval; | ||
197 | break; | ||
198 | case Opt_caps_wanted_delay_min: | ||
199 | fsopt->caps_wanted_delay_min = intval; | ||
200 | break; | ||
201 | case Opt_caps_wanted_delay_max: | ||
202 | fsopt->caps_wanted_delay_max = intval; | ||
203 | break; | ||
204 | case Opt_readdir_max_entries: | ||
205 | fsopt->max_readdir = intval; | ||
206 | break; | ||
207 | case Opt_readdir_max_bytes: | ||
208 | fsopt->max_readdir_bytes = intval; | ||
209 | break; | ||
210 | case Opt_congestion_kb: | ||
211 | fsopt->congestion_kb = intval; | ||
212 | break; | ||
213 | case Opt_dirstat: | ||
214 | fsopt->flags |= CEPH_MOUNT_OPT_DIRSTAT; | ||
215 | break; | ||
216 | case Opt_nodirstat: | ||
217 | fsopt->flags &= ~CEPH_MOUNT_OPT_DIRSTAT; | ||
218 | break; | ||
219 | case Opt_rbytes: | ||
220 | fsopt->flags |= CEPH_MOUNT_OPT_RBYTES; | ||
221 | break; | ||
222 | case Opt_norbytes: | ||
223 | fsopt->flags &= ~CEPH_MOUNT_OPT_RBYTES; | ||
224 | break; | ||
225 | case Opt_noasyncreaddir: | ||
226 | fsopt->flags |= CEPH_MOUNT_OPT_NOASYNCREADDIR; | ||
227 | break; | ||
228 | default: | ||
229 | BUG_ON(token); | ||
230 | } | ||
231 | return 0; | ||
422 | } | 232 | } |
423 | 233 | ||
424 | static struct ceph_mount_args *parse_mount_args(int flags, char *options, | 234 | static void destroy_mount_options(struct ceph_mount_options *args) |
425 | const char *dev_name, | ||
426 | const char **path) | ||
427 | { | 235 | { |
428 | struct ceph_mount_args *args; | 236 | dout("destroy_mount_options %p\n", args); |
429 | const char *c; | 237 | kfree(args->snapdir_name); |
430 | int err = -ENOMEM; | 238 | kfree(args); |
431 | substring_t argstr[MAX_OPT_ARGS]; | 239 | } |
432 | 240 | ||
433 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 241 | static int strcmp_null(const char *s1, const char *s2) |
434 | if (!args) | 242 | { |
435 | return ERR_PTR(-ENOMEM); | 243 | if (!s1 && !s2) |
436 | args->mon_addr = kcalloc(CEPH_MAX_MON, sizeof(*args->mon_addr), | 244 | return 0; |
437 | GFP_KERNEL); | 245 | if (s1 && !s2) |
438 | if (!args->mon_addr) | 246 | return -1; |
439 | goto out; | 247 | if (!s1 && s2) |
248 | return 1; | ||
249 | return strcmp(s1, s2); | ||
250 | } | ||
440 | 251 | ||
441 | dout("parse_mount_args %p, dev_name '%s'\n", args, dev_name); | 252 | static int compare_mount_options(struct ceph_mount_options *new_fsopt, |
442 | 253 | struct ceph_options *new_opt, | |
443 | /* start with defaults */ | 254 | struct ceph_fs_client *fsc) |
444 | args->sb_flags = flags; | 255 | { |
445 | args->flags = CEPH_OPT_DEFAULT; | 256 | struct ceph_mount_options *fsopt1 = new_fsopt; |
446 | args->osd_timeout = CEPH_OSD_TIMEOUT_DEFAULT; | 257 | struct ceph_mount_options *fsopt2 = fsc->mount_options; |
447 | args->osd_keepalive_timeout = CEPH_OSD_KEEPALIVE_DEFAULT; | 258 | int ofs = offsetof(struct ceph_mount_options, snapdir_name); |
448 | args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */ | 259 | int ret; |
449 | args->osd_idle_ttl = CEPH_OSD_IDLE_TTL_DEFAULT; /* seconds */ | ||
450 | args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT; | ||
451 | args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT; | ||
452 | args->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
453 | args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
454 | args->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
455 | args->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
456 | args->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
457 | args->congestion_kb = default_congestion_kb(); | ||
458 | |||
459 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
460 | err = -EINVAL; | ||
461 | if (!dev_name) | ||
462 | goto out; | ||
463 | *path = strstr(dev_name, ":/"); | ||
464 | if (*path == NULL) { | ||
465 | pr_err("device name is missing path (no :/ in %s)\n", | ||
466 | dev_name); | ||
467 | goto out; | ||
468 | } | ||
469 | 260 | ||
470 | /* get mon ip(s) */ | 261 | ret = memcmp(fsopt1, fsopt2, ofs); |
471 | err = ceph_parse_ips(dev_name, *path, args->mon_addr, | 262 | if (ret) |
472 | CEPH_MAX_MON, &args->num_mon); | 263 | return ret; |
473 | if (err < 0) | 264 | |
474 | goto out; | 265 | ret = strcmp_null(fsopt1->snapdir_name, fsopt2->snapdir_name); |
266 | if (ret) | ||
267 | return ret; | ||
268 | |||
269 | return ceph_compare_options(new_opt, fsc->client); | ||
270 | } | ||
271 | |||
272 | static int parse_mount_options(struct ceph_mount_options **pfsopt, | ||
273 | struct ceph_options **popt, | ||
274 | int flags, char *options, | ||
275 | const char *dev_name, | ||
276 | const char **path) | ||
277 | { | ||
278 | struct ceph_mount_options *fsopt; | ||
279 | const char *dev_name_end; | ||
280 | int err = -ENOMEM; | ||
281 | |||
282 | fsopt = kzalloc(sizeof(*fsopt), GFP_KERNEL); | ||
283 | if (!fsopt) | ||
284 | return -ENOMEM; | ||
285 | |||
286 | dout("parse_mount_options %p, dev_name '%s'\n", fsopt, dev_name); | ||
287 | |||
288 | fsopt->sb_flags = flags; | ||
289 | fsopt->flags = CEPH_MOUNT_OPT_DEFAULT; | ||
290 | |||
291 | fsopt->rsize = CEPH_MOUNT_RSIZE_DEFAULT; | ||
292 | fsopt->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL); | ||
293 | fsopt->cap_release_safety = CEPH_CAP_RELEASE_SAFETY_DEFAULT; | ||
294 | fsopt->max_readdir = CEPH_MAX_READDIR_DEFAULT; | ||
295 | fsopt->max_readdir_bytes = CEPH_MAX_READDIR_BYTES_DEFAULT; | ||
296 | fsopt->congestion_kb = default_congestion_kb(); | ||
297 | |||
298 | /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */ | ||
299 | err = -EINVAL; | ||
300 | if (!dev_name) | ||
301 | goto out; | ||
302 | *path = strstr(dev_name, ":/"); | ||
303 | if (*path == NULL) { | ||
304 | pr_err("device name is missing path (no :/ in %s)\n", | ||
305 | dev_name); | ||
306 | goto out; | ||
307 | } | ||
308 | dev_name_end = *path; | ||
309 | dout("device name '%.*s'\n", (int)(dev_name_end - dev_name), dev_name); | ||
475 | 310 | ||
476 | /* path on server */ | 311 | /* path on server */ |
477 | *path += 2; | 312 | *path += 2; |
478 | dout("server path '%s'\n", *path); | 313 | dout("server path '%s'\n", *path); |
479 | 314 | ||
480 | /* parse mount options */ | 315 | err = ceph_parse_options(popt, options, dev_name, dev_name_end, |
481 | while ((c = strsep(&options, ",")) != NULL) { | 316 | parse_fsopt_token, (void *)fsopt); |
482 | int token, intval, ret; | 317 | if (err) |
483 | if (!*c) | 318 | goto out; |
484 | continue; | 319 | |
485 | err = -EINVAL; | 320 | /* success */ |
486 | token = match_token((char *)c, arg_tokens, argstr); | 321 | *pfsopt = fsopt; |
487 | if (token < 0) { | 322 | return 0; |
488 | pr_err("bad mount option at '%s'\n", c); | ||
489 | goto out; | ||
490 | } | ||
491 | if (token < Opt_last_int) { | ||
492 | ret = match_int(&argstr[0], &intval); | ||
493 | if (ret < 0) { | ||
494 | pr_err("bad mount option arg (not int) " | ||
495 | "at '%s'\n", c); | ||
496 | continue; | ||
497 | } | ||
498 | dout("got int token %d val %d\n", token, intval); | ||
499 | } else if (token > Opt_last_int && token < Opt_last_string) { | ||
500 | dout("got string token %d val %s\n", token, | ||
501 | argstr[0].from); | ||
502 | } else { | ||
503 | dout("got token %d\n", token); | ||
504 | } | ||
505 | switch (token) { | ||
506 | case Opt_ip: | ||
507 | err = ceph_parse_ips(argstr[0].from, | ||
508 | argstr[0].to, | ||
509 | &args->my_addr, | ||
510 | 1, NULL); | ||
511 | if (err < 0) | ||
512 | goto out; | ||
513 | args->flags |= CEPH_OPT_MYIP; | ||
514 | break; | ||
515 | |||
516 | case Opt_fsid: | ||
517 | err = parse_fsid(argstr[0].from, &args->fsid); | ||
518 | if (err == 0) | ||
519 | args->flags |= CEPH_OPT_FSID; | ||
520 | break; | ||
521 | case Opt_snapdirname: | ||
522 | kfree(args->snapdir_name); | ||
523 | args->snapdir_name = kstrndup(argstr[0].from, | ||
524 | argstr[0].to-argstr[0].from, | ||
525 | GFP_KERNEL); | ||
526 | break; | ||
527 | case Opt_name: | ||
528 | args->name = kstrndup(argstr[0].from, | ||
529 | argstr[0].to-argstr[0].from, | ||
530 | GFP_KERNEL); | ||
531 | break; | ||
532 | case Opt_secret: | ||
533 | args->secret = kstrndup(argstr[0].from, | ||
534 | argstr[0].to-argstr[0].from, | ||
535 | GFP_KERNEL); | ||
536 | break; | ||
537 | |||
538 | /* misc */ | ||
539 | case Opt_wsize: | ||
540 | args->wsize = intval; | ||
541 | break; | ||
542 | case Opt_rsize: | ||
543 | args->rsize = intval; | ||
544 | break; | ||
545 | case Opt_osdtimeout: | ||
546 | args->osd_timeout = intval; | ||
547 | break; | ||
548 | case Opt_osdkeepalivetimeout: | ||
549 | args->osd_keepalive_timeout = intval; | ||
550 | break; | ||
551 | case Opt_osd_idle_ttl: | ||
552 | args->osd_idle_ttl = intval; | ||
553 | break; | ||
554 | case Opt_mount_timeout: | ||
555 | args->mount_timeout = intval; | ||
556 | break; | ||
557 | case Opt_caps_wanted_delay_min: | ||
558 | args->caps_wanted_delay_min = intval; | ||
559 | break; | ||
560 | case Opt_caps_wanted_delay_max: | ||
561 | args->caps_wanted_delay_max = intval; | ||
562 | break; | ||
563 | case Opt_readdir_max_entries: | ||
564 | args->max_readdir = intval; | ||
565 | break; | ||
566 | case Opt_readdir_max_bytes: | ||
567 | args->max_readdir_bytes = intval; | ||
568 | break; | ||
569 | case Opt_congestion_kb: | ||
570 | args->congestion_kb = intval; | ||
571 | break; | ||
572 | |||
573 | case Opt_noshare: | ||
574 | args->flags |= CEPH_OPT_NOSHARE; | ||
575 | break; | ||
576 | |||
577 | case Opt_dirstat: | ||
578 | args->flags |= CEPH_OPT_DIRSTAT; | ||
579 | break; | ||
580 | case Opt_nodirstat: | ||
581 | args->flags &= ~CEPH_OPT_DIRSTAT; | ||
582 | break; | ||
583 | case Opt_rbytes: | ||
584 | args->flags |= CEPH_OPT_RBYTES; | ||
585 | break; | ||
586 | case Opt_norbytes: | ||
587 | args->flags &= ~CEPH_OPT_RBYTES; | ||
588 | break; | ||
589 | case Opt_nocrc: | ||
590 | args->flags |= CEPH_OPT_NOCRC; | ||
591 | break; | ||
592 | case Opt_noasyncreaddir: | ||
593 | args->flags |= CEPH_OPT_NOASYNCREADDIR; | ||
594 | break; | ||
595 | |||
596 | default: | ||
597 | BUG_ON(token); | ||
598 | } | ||
599 | } | ||
600 | return args; | ||
601 | 323 | ||
602 | out: | 324 | out: |
603 | kfree(args->mon_addr); | 325 | destroy_mount_options(fsopt); |
604 | kfree(args); | 326 | return err; |
605 | return ERR_PTR(err); | ||
606 | } | 327 | } |
607 | 328 | ||
608 | static void destroy_mount_args(struct ceph_mount_args *args) | 329 | /** |
330 | * ceph_show_options - Show mount options in /proc/mounts | ||
331 | * @m: seq_file to write to | ||
332 | * @mnt: mount descriptor | ||
333 | */ | ||
334 | static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt) | ||
609 | { | 335 | { |
610 | dout("destroy_mount_args %p\n", args); | 336 | struct ceph_fs_client *fsc = ceph_sb_to_client(mnt->mnt_sb); |
611 | kfree(args->snapdir_name); | 337 | struct ceph_mount_options *fsopt = fsc->mount_options; |
612 | args->snapdir_name = NULL; | 338 | struct ceph_options *opt = fsc->client->options; |
613 | kfree(args->name); | 339 | |
614 | args->name = NULL; | 340 | if (opt->flags & CEPH_OPT_FSID) |
615 | kfree(args->secret); | 341 | seq_printf(m, ",fsid=%pU", &opt->fsid); |
616 | args->secret = NULL; | 342 | if (opt->flags & CEPH_OPT_NOSHARE) |
617 | kfree(args); | 343 | seq_puts(m, ",noshare"); |
344 | if (opt->flags & CEPH_OPT_NOCRC) | ||
345 | seq_puts(m, ",nocrc"); | ||
346 | |||
347 | if (opt->name) | ||
348 | seq_printf(m, ",name=%s", opt->name); | ||
349 | if (opt->secret) | ||
350 | seq_puts(m, ",secret=<hidden>"); | ||
351 | |||
352 | if (opt->mount_timeout != CEPH_MOUNT_TIMEOUT_DEFAULT) | ||
353 | seq_printf(m, ",mount_timeout=%d", opt->mount_timeout); | ||
354 | if (opt->osd_idle_ttl != CEPH_OSD_IDLE_TTL_DEFAULT) | ||
355 | seq_printf(m, ",osd_idle_ttl=%d", opt->osd_idle_ttl); | ||
356 | if (opt->osd_timeout != CEPH_OSD_TIMEOUT_DEFAULT) | ||
357 | seq_printf(m, ",osdtimeout=%d", opt->osd_timeout); | ||
358 | if (opt->osd_keepalive_timeout != CEPH_OSD_KEEPALIVE_DEFAULT) | ||
359 | seq_printf(m, ",osdkeepalivetimeout=%d", | ||
360 | opt->osd_keepalive_timeout); | ||
361 | |||
362 | if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT) | ||
363 | seq_puts(m, ",dirstat"); | ||
364 | if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0) | ||
365 | seq_puts(m, ",norbytes"); | ||
366 | if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR) | ||
367 | seq_puts(m, ",noasyncreaddir"); | ||
368 | |||
369 | if (fsopt->wsize) | ||
370 | seq_printf(m, ",wsize=%d", fsopt->wsize); | ||
371 | if (fsopt->rsize != CEPH_MOUNT_RSIZE_DEFAULT) | ||
372 | seq_printf(m, ",rsize=%d", fsopt->rsize); | ||
373 | if (fsopt->congestion_kb != default_congestion_kb()) | ||
374 | seq_printf(m, ",write_congestion_kb=%d", fsopt->congestion_kb); | ||
375 | if (fsopt->caps_wanted_delay_min != CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT) | ||
376 | seq_printf(m, ",caps_wanted_delay_min=%d", | ||
377 | fsopt->caps_wanted_delay_min); | ||
378 | if (fsopt->caps_wanted_delay_max != CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT) | ||
379 | seq_printf(m, ",caps_wanted_delay_max=%d", | ||
380 | fsopt->caps_wanted_delay_max); | ||
381 | if (fsopt->cap_release_safety != CEPH_CAP_RELEASE_SAFETY_DEFAULT) | ||
382 | seq_printf(m, ",cap_release_safety=%d", | ||
383 | fsopt->cap_release_safety); | ||
384 | if (fsopt->max_readdir != CEPH_MAX_READDIR_DEFAULT) | ||
385 | seq_printf(m, ",readdir_max_entries=%d", fsopt->max_readdir); | ||
386 | if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT) | ||
387 | seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes); | ||
388 | if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT)) | ||
389 | seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name); | ||
390 | return 0; | ||
618 | } | 391 | } |
619 | 392 | ||
620 | /* | 393 | /* |
621 | * create a fresh client instance | 394 | * handle any mon messages the standard library doesn't understand. |
395 | * return error if we don't either. | ||
622 | */ | 396 | */ |
623 | static struct ceph_client *ceph_create_client(struct ceph_mount_args *args) | 397 | static int extra_mon_dispatch(struct ceph_client *client, struct ceph_msg *msg) |
624 | { | 398 | { |
625 | struct ceph_client *client; | 399 | struct ceph_fs_client *fsc = client->private; |
400 | int type = le16_to_cpu(msg->hdr.type); | ||
401 | |||
402 | switch (type) { | ||
403 | case CEPH_MSG_MDS_MAP: | ||
404 | ceph_mdsc_handle_map(fsc->mdsc, msg); | ||
405 | return 0; | ||
406 | |||
407 | default: | ||
408 | return -1; | ||
409 | } | ||
410 | } | ||
411 | |||
412 | /* | ||
413 | * create a new fs client | ||
414 | */ | ||
415 | struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt, | ||
416 | struct ceph_options *opt) | ||
417 | { | ||
418 | struct ceph_fs_client *fsc; | ||
626 | int err = -ENOMEM; | 419 | int err = -ENOMEM; |
627 | 420 | ||
628 | client = kzalloc(sizeof(*client), GFP_KERNEL); | 421 | fsc = kzalloc(sizeof(*fsc), GFP_KERNEL); |
629 | if (client == NULL) | 422 | if (!fsc) |
630 | return ERR_PTR(-ENOMEM); | 423 | return ERR_PTR(-ENOMEM); |
631 | 424 | ||
632 | mutex_init(&client->mount_mutex); | 425 | fsc->client = ceph_create_client(opt, fsc); |
633 | 426 | if (IS_ERR(fsc->client)) { | |
634 | init_waitqueue_head(&client->auth_wq); | 427 | err = PTR_ERR(fsc->client); |
428 | goto fail; | ||
429 | } | ||
430 | fsc->client->extra_mon_dispatch = extra_mon_dispatch; | ||
431 | fsc->client->supported_features |= CEPH_FEATURE_FLOCK; | ||
432 | fsc->client->monc.want_mdsmap = 1; | ||
635 | 433 | ||
636 | client->sb = NULL; | 434 | fsc->mount_options = fsopt; |
637 | client->mount_state = CEPH_MOUNT_MOUNTING; | ||
638 | client->mount_args = args; | ||
639 | 435 | ||
640 | client->msgr = NULL; | 436 | fsc->sb = NULL; |
437 | fsc->mount_state = CEPH_MOUNT_MOUNTING; | ||
641 | 438 | ||
642 | client->auth_err = 0; | 439 | atomic_long_set(&fsc->writeback_count, 0); |
643 | atomic_long_set(&client->writeback_count, 0); | ||
644 | 440 | ||
645 | err = bdi_init(&client->backing_dev_info); | 441 | err = bdi_init(&fsc->backing_dev_info); |
646 | if (err < 0) | 442 | if (err < 0) |
647 | goto fail; | 443 | goto fail_client; |
648 | 444 | ||
649 | err = -ENOMEM; | 445 | err = -ENOMEM; |
650 | client->wb_wq = create_workqueue("ceph-writeback"); | 446 | fsc->wb_wq = create_workqueue("ceph-writeback"); |
651 | if (client->wb_wq == NULL) | 447 | if (fsc->wb_wq == NULL) |
652 | goto fail_bdi; | 448 | goto fail_bdi; |
653 | client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); | 449 | fsc->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid"); |
654 | if (client->pg_inv_wq == NULL) | 450 | if (fsc->pg_inv_wq == NULL) |
655 | goto fail_wb_wq; | 451 | goto fail_wb_wq; |
656 | client->trunc_wq = create_singlethread_workqueue("ceph-trunc"); | 452 | fsc->trunc_wq = create_singlethread_workqueue("ceph-trunc"); |
657 | if (client->trunc_wq == NULL) | 453 | if (fsc->trunc_wq == NULL) |
658 | goto fail_pg_inv_wq; | 454 | goto fail_pg_inv_wq; |
659 | 455 | ||
660 | /* set up mempools */ | 456 | /* set up mempools */ |
661 | err = -ENOMEM; | 457 | err = -ENOMEM; |
662 | client->wb_pagevec_pool = mempool_create_kmalloc_pool(10, | 458 | fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, |
663 | client->mount_args->wsize >> PAGE_CACHE_SHIFT); | 459 | fsc->mount_options->wsize >> PAGE_CACHE_SHIFT); |
664 | if (!client->wb_pagevec_pool) | 460 | if (!fsc->wb_pagevec_pool) |
665 | goto fail_trunc_wq; | 461 | goto fail_trunc_wq; |
666 | 462 | ||
667 | /* caps */ | 463 | /* caps */ |
668 | client->min_caps = args->max_readdir; | 464 | fsc->min_caps = fsopt->max_readdir; |
465 | |||
466 | return fsc; | ||
669 | 467 | ||
670 | /* subsystems */ | ||
671 | err = ceph_monc_init(&client->monc, client); | ||
672 | if (err < 0) | ||
673 | goto fail_mempool; | ||
674 | err = ceph_osdc_init(&client->osdc, client); | ||
675 | if (err < 0) | ||
676 | goto fail_monc; | ||
677 | err = ceph_mdsc_init(&client->mdsc, client); | ||
678 | if (err < 0) | ||
679 | goto fail_osdc; | ||
680 | return client; | ||
681 | |||
682 | fail_osdc: | ||
683 | ceph_osdc_stop(&client->osdc); | ||
684 | fail_monc: | ||
685 | ceph_monc_stop(&client->monc); | ||
686 | fail_mempool: | ||
687 | mempool_destroy(client->wb_pagevec_pool); | ||
688 | fail_trunc_wq: | 468 | fail_trunc_wq: |
689 | destroy_workqueue(client->trunc_wq); | 469 | destroy_workqueue(fsc->trunc_wq); |
690 | fail_pg_inv_wq: | 470 | fail_pg_inv_wq: |
691 | destroy_workqueue(client->pg_inv_wq); | 471 | destroy_workqueue(fsc->pg_inv_wq); |
692 | fail_wb_wq: | 472 | fail_wb_wq: |
693 | destroy_workqueue(client->wb_wq); | 473 | destroy_workqueue(fsc->wb_wq); |
694 | fail_bdi: | 474 | fail_bdi: |
695 | bdi_destroy(&client->backing_dev_info); | 475 | bdi_destroy(&fsc->backing_dev_info); |
476 | fail_client: | ||
477 | ceph_destroy_client(fsc->client); | ||
696 | fail: | 478 | fail: |
697 | kfree(client); | 479 | kfree(fsc); |
698 | return ERR_PTR(err); | 480 | return ERR_PTR(err); |
699 | } | 481 | } |
700 | 482 | ||
701 | static void ceph_destroy_client(struct ceph_client *client) | 483 | void destroy_fs_client(struct ceph_fs_client *fsc) |
702 | { | 484 | { |
703 | dout("destroy_client %p\n", client); | 485 | dout("destroy_fs_client %p\n", fsc); |
704 | 486 | ||
705 | /* unmount */ | 487 | destroy_workqueue(fsc->wb_wq); |
706 | ceph_mdsc_stop(&client->mdsc); | 488 | destroy_workqueue(fsc->pg_inv_wq); |
707 | ceph_osdc_stop(&client->osdc); | 489 | destroy_workqueue(fsc->trunc_wq); |
708 | 490 | ||
709 | /* | 491 | bdi_destroy(&fsc->backing_dev_info); |
710 | * make sure mds and osd connections close out before destroying | ||
711 | * the auth module, which is needed to free those connections' | ||
712 | * ceph_authorizers. | ||
713 | */ | ||
714 | ceph_msgr_flush(); | ||
715 | |||
716 | ceph_monc_stop(&client->monc); | ||
717 | 492 | ||
718 | ceph_debugfs_client_cleanup(client); | 493 | mempool_destroy(fsc->wb_pagevec_pool); |
719 | destroy_workqueue(client->wb_wq); | ||
720 | destroy_workqueue(client->pg_inv_wq); | ||
721 | destroy_workqueue(client->trunc_wq); | ||
722 | 494 | ||
723 | bdi_destroy(&client->backing_dev_info); | 495 | destroy_mount_options(fsc->mount_options); |
724 | 496 | ||
725 | if (client->msgr) | 497 | ceph_fs_debugfs_cleanup(fsc); |
726 | ceph_messenger_destroy(client->msgr); | ||
727 | mempool_destroy(client->wb_pagevec_pool); | ||
728 | 498 | ||
729 | destroy_mount_args(client->mount_args); | 499 | ceph_destroy_client(fsc->client); |
730 | 500 | ||
731 | kfree(client); | 501 | kfree(fsc); |
732 | dout("destroy_client %p done\n", client); | 502 | dout("destroy_fs_client %p done\n", fsc); |
733 | } | 503 | } |
734 | 504 | ||
735 | /* | 505 | /* |
736 | * Initially learn our fsid, or verify an fsid matches. | 506 | * caches |
737 | */ | 507 | */ |
738 | int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid) | 508 | struct kmem_cache *ceph_inode_cachep; |
509 | struct kmem_cache *ceph_cap_cachep; | ||
510 | struct kmem_cache *ceph_dentry_cachep; | ||
511 | struct kmem_cache *ceph_file_cachep; | ||
512 | |||
513 | static void ceph_inode_init_once(void *foo) | ||
739 | { | 514 | { |
740 | if (client->have_fsid) { | 515 | struct ceph_inode_info *ci = foo; |
741 | if (ceph_fsid_compare(&client->fsid, fsid)) { | 516 | inode_init_once(&ci->vfs_inode); |
742 | pr_err("bad fsid, had %pU got %pU", | 517 | } |
743 | &client->fsid, fsid); | 518 | |
744 | return -1; | 519 | static int __init init_caches(void) |
745 | } | 520 | { |
746 | } else { | 521 | ceph_inode_cachep = kmem_cache_create("ceph_inode_info", |
747 | pr_info("client%lld fsid %pU\n", client->monc.auth->global_id, | 522 | sizeof(struct ceph_inode_info), |
748 | fsid); | 523 | __alignof__(struct ceph_inode_info), |
749 | memcpy(&client->fsid, fsid, sizeof(*fsid)); | 524 | (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD), |
750 | ceph_debugfs_client_init(client); | 525 | ceph_inode_init_once); |
751 | client->have_fsid = true; | 526 | if (ceph_inode_cachep == NULL) |
752 | } | 527 | return -ENOMEM; |
528 | |||
529 | ceph_cap_cachep = KMEM_CACHE(ceph_cap, | ||
530 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
531 | if (ceph_cap_cachep == NULL) | ||
532 | goto bad_cap; | ||
533 | |||
534 | ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info, | ||
535 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
536 | if (ceph_dentry_cachep == NULL) | ||
537 | goto bad_dentry; | ||
538 | |||
539 | ceph_file_cachep = KMEM_CACHE(ceph_file_info, | ||
540 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD); | ||
541 | if (ceph_file_cachep == NULL) | ||
542 | goto bad_file; | ||
543 | |||
753 | return 0; | 544 | return 0; |
545 | |||
546 | bad_file: | ||
547 | kmem_cache_destroy(ceph_dentry_cachep); | ||
548 | bad_dentry: | ||
549 | kmem_cache_destroy(ceph_cap_cachep); | ||
550 | bad_cap: | ||
551 | kmem_cache_destroy(ceph_inode_cachep); | ||
552 | return -ENOMEM; | ||
754 | } | 553 | } |
755 | 554 | ||
555 | static void destroy_caches(void) | ||
556 | { | ||
557 | kmem_cache_destroy(ceph_inode_cachep); | ||
558 | kmem_cache_destroy(ceph_cap_cachep); | ||
559 | kmem_cache_destroy(ceph_dentry_cachep); | ||
560 | kmem_cache_destroy(ceph_file_cachep); | ||
561 | } | ||
562 | |||
563 | |||
756 | /* | 564 | /* |
757 | * true if we have the mon map (and have thus joined the cluster) | 565 | * ceph_umount_begin - initiate forced umount. Tear down down the |
566 | * mount, skipping steps that may hang while waiting for server(s). | ||
758 | */ | 567 | */ |
759 | static int have_mon_and_osd_map(struct ceph_client *client) | 568 | static void ceph_umount_begin(struct super_block *sb) |
760 | { | 569 | { |
761 | return client->monc.monmap && client->monc.monmap->epoch && | 570 | struct ceph_fs_client *fsc = ceph_sb_to_client(sb); |
762 | client->osdc.osdmap && client->osdc.osdmap->epoch; | 571 | |
572 | dout("ceph_umount_begin - starting forced umount\n"); | ||
573 | if (!fsc) | ||
574 | return; | ||
575 | fsc->mount_state = CEPH_MOUNT_SHUTDOWN; | ||
576 | return; | ||
763 | } | 577 | } |
764 | 578 | ||
579 | static const struct super_operations ceph_super_ops = { | ||
580 | .alloc_inode = ceph_alloc_inode, | ||
581 | .destroy_inode = ceph_destroy_inode, | ||
582 | .write_inode = ceph_write_inode, | ||
583 | .sync_fs = ceph_sync_fs, | ||
584 | .put_super = ceph_put_super, | ||
585 | .show_options = ceph_show_options, | ||
586 | .statfs = ceph_statfs, | ||
587 | .umount_begin = ceph_umount_begin, | ||
588 | }; | ||
589 | |||
765 | /* | 590 | /* |
766 | * Bootstrap mount by opening the root directory. Note the mount | 591 | * Bootstrap mount by opening the root directory. Note the mount |
767 | * @started time from caller, and time out if this takes too long. | 592 | * @started time from caller, and time out if this takes too long. |
768 | */ | 593 | */ |
769 | static struct dentry *open_root_dentry(struct ceph_client *client, | 594 | static struct dentry *open_root_dentry(struct ceph_fs_client *fsc, |
770 | const char *path, | 595 | const char *path, |
771 | unsigned long started) | 596 | unsigned long started) |
772 | { | 597 | { |
773 | struct ceph_mds_client *mdsc = &client->mdsc; | 598 | struct ceph_mds_client *mdsc = fsc->mdsc; |
774 | struct ceph_mds_request *req = NULL; | 599 | struct ceph_mds_request *req = NULL; |
775 | int err; | 600 | int err; |
776 | struct dentry *root; | 601 | struct dentry *root; |
@@ -784,14 +609,14 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
784 | req->r_ino1.ino = CEPH_INO_ROOT; | 609 | req->r_ino1.ino = CEPH_INO_ROOT; |
785 | req->r_ino1.snap = CEPH_NOSNAP; | 610 | req->r_ino1.snap = CEPH_NOSNAP; |
786 | req->r_started = started; | 611 | req->r_started = started; |
787 | req->r_timeout = client->mount_args->mount_timeout * HZ; | 612 | req->r_timeout = fsc->client->options->mount_timeout * HZ; |
788 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); | 613 | req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); |
789 | req->r_num_caps = 2; | 614 | req->r_num_caps = 2; |
790 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 615 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
791 | if (err == 0) { | 616 | if (err == 0) { |
792 | dout("open_root_inode success\n"); | 617 | dout("open_root_inode success\n"); |
793 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && | 618 | if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT && |
794 | client->sb->s_root == NULL) | 619 | fsc->sb->s_root == NULL) |
795 | root = d_alloc_root(req->r_target_inode); | 620 | root = d_alloc_root(req->r_target_inode); |
796 | else | 621 | else |
797 | root = d_obtain_alias(req->r_target_inode); | 622 | root = d_obtain_alias(req->r_target_inode); |
@@ -804,105 +629,86 @@ static struct dentry *open_root_dentry(struct ceph_client *client, | |||
804 | return root; | 629 | return root; |
805 | } | 630 | } |
806 | 631 | ||
632 | |||
633 | |||
634 | |||
807 | /* | 635 | /* |
808 | * mount: join the ceph cluster, and open root directory. | 636 | * mount: join the ceph cluster, and open root directory. |
809 | */ | 637 | */ |
810 | static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt, | 638 | static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt, |
811 | const char *path) | 639 | const char *path) |
812 | { | 640 | { |
813 | struct ceph_entity_addr *myaddr = NULL; | ||
814 | int err; | 641 | int err; |
815 | unsigned long timeout = client->mount_args->mount_timeout * HZ; | ||
816 | unsigned long started = jiffies; /* note the start time */ | 642 | unsigned long started = jiffies; /* note the start time */ |
817 | struct dentry *root; | 643 | struct dentry *root; |
644 | int first = 0; /* first vfsmount for this super_block */ | ||
818 | 645 | ||
819 | dout("mount start\n"); | 646 | dout("mount start\n"); |
820 | mutex_lock(&client->mount_mutex); | 647 | mutex_lock(&fsc->client->mount_mutex); |
821 | |||
822 | /* initialize the messenger */ | ||
823 | if (client->msgr == NULL) { | ||
824 | if (ceph_test_opt(client, MYIP)) | ||
825 | myaddr = &client->mount_args->my_addr; | ||
826 | client->msgr = ceph_messenger_create(myaddr); | ||
827 | if (IS_ERR(client->msgr)) { | ||
828 | err = PTR_ERR(client->msgr); | ||
829 | client->msgr = NULL; | ||
830 | goto out; | ||
831 | } | ||
832 | client->msgr->nocrc = ceph_test_opt(client, NOCRC); | ||
833 | } | ||
834 | 648 | ||
835 | /* open session, and wait for mon, mds, and osd maps */ | 649 | err = __ceph_open_session(fsc->client, started); |
836 | err = ceph_monc_open_session(&client->monc); | ||
837 | if (err < 0) | 650 | if (err < 0) |
838 | goto out; | 651 | goto out; |
839 | 652 | ||
840 | while (!have_mon_and_osd_map(client)) { | ||
841 | err = -EIO; | ||
842 | if (timeout && time_after_eq(jiffies, started + timeout)) | ||
843 | goto out; | ||
844 | |||
845 | /* wait */ | ||
846 | dout("mount waiting for mon_map\n"); | ||
847 | err = wait_event_interruptible_timeout(client->auth_wq, | ||
848 | have_mon_and_osd_map(client) || (client->auth_err < 0), | ||
849 | timeout); | ||
850 | if (err == -EINTR || err == -ERESTARTSYS) | ||
851 | goto out; | ||
852 | if (client->auth_err < 0) { | ||
853 | err = client->auth_err; | ||
854 | goto out; | ||
855 | } | ||
856 | } | ||
857 | |||
858 | dout("mount opening root\n"); | 653 | dout("mount opening root\n"); |
859 | root = open_root_dentry(client, "", started); | 654 | root = open_root_dentry(fsc, "", started); |
860 | if (IS_ERR(root)) { | 655 | if (IS_ERR(root)) { |
861 | err = PTR_ERR(root); | 656 | err = PTR_ERR(root); |
862 | goto out; | 657 | goto out; |
863 | } | 658 | } |
864 | if (client->sb->s_root) | 659 | if (fsc->sb->s_root) { |
865 | dput(root); | 660 | dput(root); |
866 | else | 661 | } else { |
867 | client->sb->s_root = root; | 662 | fsc->sb->s_root = root; |
663 | first = 1; | ||
664 | |||
665 | err = ceph_fs_debugfs_init(fsc); | ||
666 | if (err < 0) | ||
667 | goto fail; | ||
668 | } | ||
868 | 669 | ||
869 | if (path[0] == 0) { | 670 | if (path[0] == 0) { |
870 | dget(root); | 671 | dget(root); |
871 | } else { | 672 | } else { |
872 | dout("mount opening base mountpoint\n"); | 673 | dout("mount opening base mountpoint\n"); |
873 | root = open_root_dentry(client, path, started); | 674 | root = open_root_dentry(fsc, path, started); |
874 | if (IS_ERR(root)) { | 675 | if (IS_ERR(root)) { |
875 | err = PTR_ERR(root); | 676 | err = PTR_ERR(root); |
876 | dput(client->sb->s_root); | 677 | goto fail; |
877 | client->sb->s_root = NULL; | ||
878 | goto out; | ||
879 | } | 678 | } |
880 | } | 679 | } |
881 | 680 | ||
882 | mnt->mnt_root = root; | 681 | mnt->mnt_root = root; |
883 | mnt->mnt_sb = client->sb; | 682 | mnt->mnt_sb = fsc->sb; |
884 | 683 | ||
885 | client->mount_state = CEPH_MOUNT_MOUNTED; | 684 | fsc->mount_state = CEPH_MOUNT_MOUNTED; |
886 | dout("mount success\n"); | 685 | dout("mount success\n"); |
887 | err = 0; | 686 | err = 0; |
888 | 687 | ||
889 | out: | 688 | out: |
890 | mutex_unlock(&client->mount_mutex); | 689 | mutex_unlock(&fsc->client->mount_mutex); |
891 | return err; | 690 | return err; |
691 | |||
692 | fail: | ||
693 | if (first) { | ||
694 | dput(fsc->sb->s_root); | ||
695 | fsc->sb->s_root = NULL; | ||
696 | } | ||
697 | goto out; | ||
892 | } | 698 | } |
893 | 699 | ||
894 | static int ceph_set_super(struct super_block *s, void *data) | 700 | static int ceph_set_super(struct super_block *s, void *data) |
895 | { | 701 | { |
896 | struct ceph_client *client = data; | 702 | struct ceph_fs_client *fsc = data; |
897 | int ret; | 703 | int ret; |
898 | 704 | ||
899 | dout("set_super %p data %p\n", s, data); | 705 | dout("set_super %p data %p\n", s, data); |
900 | 706 | ||
901 | s->s_flags = client->mount_args->sb_flags; | 707 | s->s_flags = fsc->mount_options->sb_flags; |
902 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ | 708 | s->s_maxbytes = 1ULL << 40; /* temp value until we get mdsmap */ |
903 | 709 | ||
904 | s->s_fs_info = client; | 710 | s->s_fs_info = fsc; |
905 | client->sb = s; | 711 | fsc->sb = s; |
906 | 712 | ||
907 | s->s_op = &ceph_super_ops; | 713 | s->s_op = &ceph_super_ops; |
908 | s->s_export_op = &ceph_export_ops; | 714 | s->s_export_op = &ceph_export_ops; |
@@ -917,7 +723,7 @@ static int ceph_set_super(struct super_block *s, void *data) | |||
917 | 723 | ||
918 | fail: | 724 | fail: |
919 | s->s_fs_info = NULL; | 725 | s->s_fs_info = NULL; |
920 | client->sb = NULL; | 726 | fsc->sb = NULL; |
921 | return ret; | 727 | return ret; |
922 | } | 728 | } |
923 | 729 | ||
@@ -926,30 +732,23 @@ fail: | |||
926 | */ | 732 | */ |
927 | static int ceph_compare_super(struct super_block *sb, void *data) | 733 | static int ceph_compare_super(struct super_block *sb, void *data) |
928 | { | 734 | { |
929 | struct ceph_client *new = data; | 735 | struct ceph_fs_client *new = data; |
930 | struct ceph_mount_args *args = new->mount_args; | 736 | struct ceph_mount_options *fsopt = new->mount_options; |
931 | struct ceph_client *other = ceph_sb_to_client(sb); | 737 | struct ceph_options *opt = new->client->options; |
932 | int i; | 738 | struct ceph_fs_client *other = ceph_sb_to_client(sb); |
933 | 739 | ||
934 | dout("ceph_compare_super %p\n", sb); | 740 | dout("ceph_compare_super %p\n", sb); |
935 | if (args->flags & CEPH_OPT_FSID) { | 741 | |
936 | if (ceph_fsid_compare(&args->fsid, &other->fsid)) { | 742 | if (compare_mount_options(fsopt, opt, other)) { |
937 | dout("fsid doesn't match\n"); | 743 | dout("monitor(s)/mount options don't match\n"); |
938 | return 0; | 744 | return 0; |
939 | } | ||
940 | } else { | ||
941 | /* do we share (a) monitor? */ | ||
942 | for (i = 0; i < new->monc.monmap->num_mon; i++) | ||
943 | if (ceph_monmap_contains(other->monc.monmap, | ||
944 | &new->monc.monmap->mon_inst[i].addr)) | ||
945 | break; | ||
946 | if (i == new->monc.monmap->num_mon) { | ||
947 | dout("mon ip not part of monmap\n"); | ||
948 | return 0; | ||
949 | } | ||
950 | dout("mon ip matches existing sb %p\n", sb); | ||
951 | } | 745 | } |
952 | if (args->sb_flags != other->mount_args->sb_flags) { | 746 | if ((opt->flags & CEPH_OPT_FSID) && |
747 | ceph_fsid_compare(&opt->fsid, &other->client->fsid)) { | ||
748 | dout("fsid doesn't match\n"); | ||
749 | return 0; | ||
750 | } | ||
751 | if (fsopt->sb_flags != other->mount_options->sb_flags) { | ||
953 | dout("flags differ\n"); | 752 | dout("flags differ\n"); |
954 | return 0; | 753 | return 0; |
955 | } | 754 | } |
@@ -961,19 +760,20 @@ static int ceph_compare_super(struct super_block *sb, void *data) | |||
961 | */ | 760 | */ |
962 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); | 761 | static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); |
963 | 762 | ||
964 | static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) | 763 | static int ceph_register_bdi(struct super_block *sb, |
764 | struct ceph_fs_client *fsc) | ||
965 | { | 765 | { |
966 | int err; | 766 | int err; |
967 | 767 | ||
968 | /* set ra_pages based on rsize mount option? */ | 768 | /* set ra_pages based on rsize mount option? */ |
969 | if (client->mount_args->rsize >= PAGE_CACHE_SIZE) | 769 | if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE) |
970 | client->backing_dev_info.ra_pages = | 770 | fsc->backing_dev_info.ra_pages = |
971 | (client->mount_args->rsize + PAGE_CACHE_SIZE - 1) | 771 | (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) |
972 | >> PAGE_SHIFT; | 772 | >> PAGE_SHIFT; |
973 | err = bdi_register(&client->backing_dev_info, NULL, "ceph-%d", | 773 | err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", |
974 | atomic_long_inc_return(&bdi_seq)); | 774 | atomic_long_inc_return(&bdi_seq)); |
975 | if (!err) | 775 | if (!err) |
976 | sb->s_bdi = &client->backing_dev_info; | 776 | sb->s_bdi = &fsc->backing_dev_info; |
977 | return err; | 777 | return err; |
978 | } | 778 | } |
979 | 779 | ||
@@ -982,46 +782,52 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
982 | struct vfsmount *mnt) | 782 | struct vfsmount *mnt) |
983 | { | 783 | { |
984 | struct super_block *sb; | 784 | struct super_block *sb; |
985 | struct ceph_client *client; | 785 | struct ceph_fs_client *fsc; |
986 | int err; | 786 | int err; |
987 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; | 787 | int (*compare_super)(struct super_block *, void *) = ceph_compare_super; |
988 | const char *path = NULL; | 788 | const char *path = NULL; |
989 | struct ceph_mount_args *args; | 789 | struct ceph_mount_options *fsopt = NULL; |
790 | struct ceph_options *opt = NULL; | ||
990 | 791 | ||
991 | dout("ceph_get_sb\n"); | 792 | dout("ceph_get_sb\n"); |
992 | args = parse_mount_args(flags, data, dev_name, &path); | 793 | err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path); |
993 | if (IS_ERR(args)) { | 794 | if (err < 0) |
994 | err = PTR_ERR(args); | ||
995 | goto out_final; | 795 | goto out_final; |
996 | } | ||
997 | 796 | ||
998 | /* create client (which we may/may not use) */ | 797 | /* create client (which we may/may not use) */ |
999 | client = ceph_create_client(args); | 798 | fsc = create_fs_client(fsopt, opt); |
1000 | if (IS_ERR(client)) { | 799 | if (IS_ERR(fsc)) { |
1001 | err = PTR_ERR(client); | 800 | err = PTR_ERR(fsc); |
801 | kfree(fsopt); | ||
802 | kfree(opt); | ||
1002 | goto out_final; | 803 | goto out_final; |
1003 | } | 804 | } |
1004 | 805 | ||
1005 | if (client->mount_args->flags & CEPH_OPT_NOSHARE) | 806 | err = ceph_mdsc_init(fsc); |
807 | if (err < 0) | ||
808 | goto out; | ||
809 | |||
810 | if (ceph_test_opt(fsc->client, NOSHARE)) | ||
1006 | compare_super = NULL; | 811 | compare_super = NULL; |
1007 | sb = sget(fs_type, compare_super, ceph_set_super, client); | 812 | sb = sget(fs_type, compare_super, ceph_set_super, fsc); |
1008 | if (IS_ERR(sb)) { | 813 | if (IS_ERR(sb)) { |
1009 | err = PTR_ERR(sb); | 814 | err = PTR_ERR(sb); |
1010 | goto out; | 815 | goto out; |
1011 | } | 816 | } |
1012 | 817 | ||
1013 | if (ceph_sb_to_client(sb) != client) { | 818 | if (ceph_sb_to_client(sb) != fsc) { |
1014 | ceph_destroy_client(client); | 819 | ceph_mdsc_destroy(fsc); |
1015 | client = ceph_sb_to_client(sb); | 820 | destroy_fs_client(fsc); |
1016 | dout("get_sb got existing client %p\n", client); | 821 | fsc = ceph_sb_to_client(sb); |
822 | dout("get_sb got existing client %p\n", fsc); | ||
1017 | } else { | 823 | } else { |
1018 | dout("get_sb using new client %p\n", client); | 824 | dout("get_sb using new client %p\n", fsc); |
1019 | err = ceph_register_bdi(sb, client); | 825 | err = ceph_register_bdi(sb, fsc); |
1020 | if (err < 0) | 826 | if (err < 0) |
1021 | goto out_splat; | 827 | goto out_splat; |
1022 | } | 828 | } |
1023 | 829 | ||
1024 | err = ceph_mount(client, mnt, path); | 830 | err = ceph_mount(fsc, mnt, path); |
1025 | if (err < 0) | 831 | if (err < 0) |
1026 | goto out_splat; | 832 | goto out_splat; |
1027 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, | 833 | dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root, |
@@ -1029,12 +835,13 @@ static int ceph_get_sb(struct file_system_type *fs_type, | |||
1029 | return 0; | 835 | return 0; |
1030 | 836 | ||
1031 | out_splat: | 837 | out_splat: |
1032 | ceph_mdsc_close_sessions(&client->mdsc); | 838 | ceph_mdsc_close_sessions(fsc->mdsc); |
1033 | deactivate_locked_super(sb); | 839 | deactivate_locked_super(sb); |
1034 | goto out_final; | 840 | goto out_final; |
1035 | 841 | ||
1036 | out: | 842 | out: |
1037 | ceph_destroy_client(client); | 843 | ceph_mdsc_destroy(fsc); |
844 | destroy_fs_client(fsc); | ||
1038 | out_final: | 845 | out_final: |
1039 | dout("ceph_get_sb fail %d\n", err); | 846 | dout("ceph_get_sb fail %d\n", err); |
1040 | return err; | 847 | return err; |
@@ -1042,11 +849,12 @@ out_final: | |||
1042 | 849 | ||
1043 | static void ceph_kill_sb(struct super_block *s) | 850 | static void ceph_kill_sb(struct super_block *s) |
1044 | { | 851 | { |
1045 | struct ceph_client *client = ceph_sb_to_client(s); | 852 | struct ceph_fs_client *fsc = ceph_sb_to_client(s); |
1046 | dout("kill_sb %p\n", s); | 853 | dout("kill_sb %p\n", s); |
1047 | ceph_mdsc_pre_umount(&client->mdsc); | 854 | ceph_mdsc_pre_umount(fsc->mdsc); |
1048 | kill_anon_super(s); /* will call put_super after sb is r/o */ | 855 | kill_anon_super(s); /* will call put_super after sb is r/o */ |
1049 | ceph_destroy_client(client); | 856 | ceph_mdsc_destroy(fsc); |
857 | destroy_fs_client(fsc); | ||
1050 | } | 858 | } |
1051 | 859 | ||
1052 | static struct file_system_type ceph_fs_type = { | 860 | static struct file_system_type ceph_fs_type = { |
@@ -1062,36 +870,20 @@ static struct file_system_type ceph_fs_type = { | |||
1062 | 870 | ||
1063 | static int __init init_ceph(void) | 871 | static int __init init_ceph(void) |
1064 | { | 872 | { |
1065 | int ret = 0; | 873 | int ret = init_caches(); |
1066 | |||
1067 | ret = ceph_debugfs_init(); | ||
1068 | if (ret < 0) | ||
1069 | goto out; | ||
1070 | |||
1071 | ret = ceph_msgr_init(); | ||
1072 | if (ret < 0) | ||
1073 | goto out_debugfs; | ||
1074 | |||
1075 | ret = init_caches(); | ||
1076 | if (ret) | 874 | if (ret) |
1077 | goto out_msgr; | 875 | goto out; |
1078 | 876 | ||
1079 | ret = register_filesystem(&ceph_fs_type); | 877 | ret = register_filesystem(&ceph_fs_type); |
1080 | if (ret) | 878 | if (ret) |
1081 | goto out_icache; | 879 | goto out_icache; |
1082 | 880 | ||
1083 | pr_info("loaded (mon/mds/osd proto %d/%d/%d, osdmap %d/%d %d/%d)\n", | 881 | pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL); |
1084 | CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL, | 882 | |
1085 | CEPH_OSDMAP_VERSION, CEPH_OSDMAP_VERSION_EXT, | ||
1086 | CEPH_OSDMAP_INC_VERSION, CEPH_OSDMAP_INC_VERSION_EXT); | ||
1087 | return 0; | 883 | return 0; |
1088 | 884 | ||
1089 | out_icache: | 885 | out_icache: |
1090 | destroy_caches(); | 886 | destroy_caches(); |
1091 | out_msgr: | ||
1092 | ceph_msgr_exit(); | ||
1093 | out_debugfs: | ||
1094 | ceph_debugfs_cleanup(); | ||
1095 | out: | 887 | out: |
1096 | return ret; | 888 | return ret; |
1097 | } | 889 | } |
@@ -1101,8 +893,6 @@ static void __exit exit_ceph(void) | |||
1101 | dout("exit_ceph\n"); | 893 | dout("exit_ceph\n"); |
1102 | unregister_filesystem(&ceph_fs_type); | 894 | unregister_filesystem(&ceph_fs_type); |
1103 | destroy_caches(); | 895 | destroy_caches(); |
1104 | ceph_msgr_exit(); | ||
1105 | ceph_debugfs_cleanup(); | ||
1106 | } | 896 | } |
1107 | 897 | ||
1108 | module_init(init_ceph); | 898 | module_init(init_ceph); |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index c33897ae5725..1886294e12f7 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -1,7 +1,7 @@ | |||
1 | #ifndef _FS_CEPH_SUPER_H | 1 | #ifndef _FS_CEPH_SUPER_H |
2 | #define _FS_CEPH_SUPER_H | 2 | #define _FS_CEPH_SUPER_H |
3 | 3 | ||
4 | #include "ceph_debug.h" | 4 | #include <linux/ceph/ceph_debug.h> |
5 | 5 | ||
6 | #include <asm/unaligned.h> | 6 | #include <asm/unaligned.h> |
7 | #include <linux/backing-dev.h> | 7 | #include <linux/backing-dev.h> |
@@ -14,13 +14,7 @@ | |||
14 | #include <linux/writeback.h> | 14 | #include <linux/writeback.h> |
15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
16 | 16 | ||
17 | #include "types.h" | 17 | #include <linux/ceph/libceph.h> |
18 | #include "messenger.h" | ||
19 | #include "msgpool.h" | ||
20 | #include "mon_client.h" | ||
21 | #include "mds_client.h" | ||
22 | #include "osd_client.h" | ||
23 | #include "ceph_fs.h" | ||
24 | 18 | ||
25 | /* f_type in struct statfs */ | 19 | /* f_type in struct statfs */ |
26 | #define CEPH_SUPER_MAGIC 0x00c36400 | 20 | #define CEPH_SUPER_MAGIC 0x00c36400 |
@@ -30,42 +24,25 @@ | |||
30 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ | 24 | #define CEPH_BLOCK_SHIFT 20 /* 1 MB */ |
31 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) | 25 | #define CEPH_BLOCK (1 << CEPH_BLOCK_SHIFT) |
32 | 26 | ||
33 | /* | 27 | #define CEPH_MOUNT_OPT_DIRSTAT (1<<4) /* `cat dirname` for stats */ |
34 | * Supported features | 28 | #define CEPH_MOUNT_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ |
35 | */ | 29 | #define CEPH_MOUNT_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ |
36 | #define CEPH_FEATURE_SUPPORTED CEPH_FEATURE_NOSRCADDR | CEPH_FEATURE_FLOCK | ||
37 | #define CEPH_FEATURE_REQUIRED CEPH_FEATURE_NOSRCADDR | ||
38 | 30 | ||
39 | /* | 31 | #define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES) |
40 | * mount options | ||
41 | */ | ||
42 | #define CEPH_OPT_FSID (1<<0) | ||
43 | #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ | ||
44 | #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ | ||
45 | #define CEPH_OPT_DIRSTAT (1<<4) /* funky `cat dirname` for stats */ | ||
46 | #define CEPH_OPT_RBYTES (1<<5) /* dir st_bytes = rbytes */ | ||
47 | #define CEPH_OPT_NOCRC (1<<6) /* no data crc on writes */ | ||
48 | #define CEPH_OPT_NOASYNCREADDIR (1<<7) /* no dcache readdir */ | ||
49 | 32 | ||
50 | #define CEPH_OPT_DEFAULT (CEPH_OPT_RBYTES) | 33 | #define ceph_set_mount_opt(fsc, opt) \ |
34 | (fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt; | ||
35 | #define ceph_test_mount_opt(fsc, opt) \ | ||
36 | (!!((fsc)->mount_options->flags & CEPH_MOUNT_OPT_##opt)) | ||
51 | 37 | ||
52 | #define ceph_set_opt(client, opt) \ | 38 | #define CEPH_MAX_READDIR_DEFAULT 1024 |
53 | (client)->mount_args->flags |= CEPH_OPT_##opt; | 39 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) |
54 | #define ceph_test_opt(client, opt) \ | 40 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" |
55 | (!!((client)->mount_args->flags & CEPH_OPT_##opt)) | ||
56 | 41 | ||
57 | 42 | struct ceph_mount_options { | |
58 | struct ceph_mount_args { | ||
59 | int sb_flags; | ||
60 | int flags; | 43 | int flags; |
61 | struct ceph_fsid fsid; | 44 | int sb_flags; |
62 | struct ceph_entity_addr my_addr; | 45 | |
63 | int num_mon; | ||
64 | struct ceph_entity_addr *mon_addr; | ||
65 | int mount_timeout; | ||
66 | int osd_idle_ttl; | ||
67 | int osd_timeout; | ||
68 | int osd_keepalive_timeout; | ||
69 | int wsize; | 46 | int wsize; |
70 | int rsize; /* max readahead */ | 47 | int rsize; /* max readahead */ |
71 | int congestion_kb; /* max writeback in flight */ | 48 | int congestion_kb; /* max writeback in flight */ |
@@ -73,82 +50,25 @@ struct ceph_mount_args { | |||
73 | int cap_release_safety; | 50 | int cap_release_safety; |
74 | int max_readdir; /* max readdir result (entires) */ | 51 | int max_readdir; /* max readdir result (entires) */ |
75 | int max_readdir_bytes; /* max readdir result (bytes) */ | 52 | int max_readdir_bytes; /* max readdir result (bytes) */ |
76 | char *snapdir_name; /* default ".snap" */ | ||
77 | char *name; | ||
78 | char *secret; | ||
79 | }; | ||
80 | 53 | ||
81 | /* | 54 | /* |
82 | * defaults | 55 | * everything above this point can be memcmp'd; everything below |
83 | */ | 56 | * is handled in compare_mount_options() |
84 | #define CEPH_MOUNT_TIMEOUT_DEFAULT 60 | 57 | */ |
85 | #define CEPH_OSD_TIMEOUT_DEFAULT 60 /* seconds */ | ||
86 | #define CEPH_OSD_KEEPALIVE_DEFAULT 5 | ||
87 | #define CEPH_OSD_IDLE_TTL_DEFAULT 60 | ||
88 | #define CEPH_MOUNT_RSIZE_DEFAULT (512*1024) /* readahead */ | ||
89 | #define CEPH_MAX_READDIR_DEFAULT 1024 | ||
90 | #define CEPH_MAX_READDIR_BYTES_DEFAULT (512*1024) | ||
91 | |||
92 | #define CEPH_MSG_MAX_FRONT_LEN (16*1024*1024) | ||
93 | #define CEPH_MSG_MAX_DATA_LEN (16*1024*1024) | ||
94 | |||
95 | #define CEPH_SNAPDIRNAME_DEFAULT ".snap" | ||
96 | #define CEPH_AUTH_NAME_DEFAULT "guest" | ||
97 | /* | ||
98 | * Delay telling the MDS we no longer want caps, in case we reopen | ||
99 | * the file. Delay a minimum amount of time, even if we send a cap | ||
100 | * message for some other reason. Otherwise, take the oppotunity to | ||
101 | * update the mds to avoid sending another message later. | ||
102 | */ | ||
103 | #define CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT 5 /* cap release delay */ | ||
104 | #define CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT 60 /* cap release delay */ | ||
105 | |||
106 | #define CEPH_CAP_RELEASE_SAFETY_DEFAULT (CEPH_CAPS_PER_RELEASE * 4) | ||
107 | |||
108 | /* mount state */ | ||
109 | enum { | ||
110 | CEPH_MOUNT_MOUNTING, | ||
111 | CEPH_MOUNT_MOUNTED, | ||
112 | CEPH_MOUNT_UNMOUNTING, | ||
113 | CEPH_MOUNT_UNMOUNTED, | ||
114 | CEPH_MOUNT_SHUTDOWN, | ||
115 | }; | ||
116 | |||
117 | /* | ||
118 | * subtract jiffies | ||
119 | */ | ||
120 | static inline unsigned long time_sub(unsigned long a, unsigned long b) | ||
121 | { | ||
122 | BUG_ON(time_after(b, a)); | ||
123 | return (long)a - (long)b; | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * per-filesystem client state | ||
128 | * | ||
129 | * possibly shared by multiple mount points, if they are | ||
130 | * mounting the same ceph filesystem/cluster. | ||
131 | */ | ||
132 | struct ceph_client { | ||
133 | struct ceph_fsid fsid; | ||
134 | bool have_fsid; | ||
135 | 58 | ||
136 | struct mutex mount_mutex; /* serialize mount attempts */ | 59 | char *snapdir_name; /* default ".snap" */ |
137 | struct ceph_mount_args *mount_args; | 60 | }; |
138 | 61 | ||
62 | struct ceph_fs_client { | ||
139 | struct super_block *sb; | 63 | struct super_block *sb; |
140 | 64 | ||
141 | unsigned long mount_state; | 65 | struct ceph_mount_options *mount_options; |
142 | wait_queue_head_t auth_wq; | 66 | struct ceph_client *client; |
143 | |||
144 | int auth_err; | ||
145 | 67 | ||
68 | unsigned long mount_state; | ||
146 | int min_caps; /* min caps i added */ | 69 | int min_caps; /* min caps i added */ |
147 | 70 | ||
148 | struct ceph_messenger *msgr; /* messenger instance */ | 71 | struct ceph_mds_client *mdsc; |
149 | struct ceph_mon_client monc; | ||
150 | struct ceph_mds_client mdsc; | ||
151 | struct ceph_osd_client osdc; | ||
152 | 72 | ||
153 | /* writeback */ | 73 | /* writeback */ |
154 | mempool_t *wb_pagevec_pool; | 74 | mempool_t *wb_pagevec_pool; |
@@ -160,14 +80,14 @@ struct ceph_client { | |||
160 | struct backing_dev_info backing_dev_info; | 80 | struct backing_dev_info backing_dev_info; |
161 | 81 | ||
162 | #ifdef CONFIG_DEBUG_FS | 82 | #ifdef CONFIG_DEBUG_FS |
163 | struct dentry *debugfs_monmap; | 83 | struct dentry *debugfs_dentry_lru, *debugfs_caps; |
164 | struct dentry *debugfs_mdsmap, *debugfs_osdmap; | ||
165 | struct dentry *debugfs_dir, *debugfs_dentry_lru, *debugfs_caps; | ||
166 | struct dentry *debugfs_congestion_kb; | 84 | struct dentry *debugfs_congestion_kb; |
167 | struct dentry *debugfs_bdi; | 85 | struct dentry *debugfs_bdi; |
86 | struct dentry *debugfs_mdsc, *debugfs_mdsmap; | ||
168 | #endif | 87 | #endif |
169 | }; | 88 | }; |
170 | 89 | ||
90 | |||
171 | /* | 91 | /* |
172 | * File i/o capability. This tracks shared state with the metadata | 92 | * File i/o capability. This tracks shared state with the metadata |
173 | * server that allows us to cache or writeback attributes or to read | 93 | * server that allows us to cache or writeback attributes or to read |
@@ -275,6 +195,20 @@ struct ceph_inode_xattr { | |||
275 | int should_free_val; | 195 | int should_free_val; |
276 | }; | 196 | }; |
277 | 197 | ||
198 | /* | ||
199 | * Ceph dentry state | ||
200 | */ | ||
201 | struct ceph_dentry_info { | ||
202 | struct ceph_mds_session *lease_session; | ||
203 | u32 lease_gen, lease_shared_gen; | ||
204 | u32 lease_seq; | ||
205 | unsigned long lease_renew_after, lease_renew_from; | ||
206 | struct list_head lru; | ||
207 | struct dentry *dentry; | ||
208 | u64 time; | ||
209 | u64 offset; | ||
210 | }; | ||
211 | |||
278 | struct ceph_inode_xattrs_info { | 212 | struct ceph_inode_xattrs_info { |
279 | /* | 213 | /* |
280 | * (still encoded) xattr blob. we avoid the overhead of parsing | 214 | * (still encoded) xattr blob. we avoid the overhead of parsing |
@@ -296,11 +230,6 @@ struct ceph_inode_xattrs_info { | |||
296 | /* | 230 | /* |
297 | * Ceph inode. | 231 | * Ceph inode. |
298 | */ | 232 | */ |
299 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
300 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
301 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
302 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
303 | |||
304 | struct ceph_inode_info { | 233 | struct ceph_inode_info { |
305 | struct ceph_vino i_vino; /* ceph ino + snap */ | 234 | struct ceph_vino i_vino; /* ceph ino + snap */ |
306 | 235 | ||
@@ -391,6 +320,63 @@ static inline struct ceph_inode_info *ceph_inode(struct inode *inode) | |||
391 | return container_of(inode, struct ceph_inode_info, vfs_inode); | 320 | return container_of(inode, struct ceph_inode_info, vfs_inode); |
392 | } | 321 | } |
393 | 322 | ||
323 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
324 | { | ||
325 | return ceph_inode(inode)->i_vino; | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * ino_t is <64 bits on many architectures, blech. | ||
330 | * | ||
331 | * don't include snap in ino hash, at least for now. | ||
332 | */ | ||
333 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
334 | { | ||
335 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
336 | #if BITS_PER_LONG == 32 | ||
337 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
338 | if (!ino) | ||
339 | ino = 1; | ||
340 | #endif | ||
341 | return ino; | ||
342 | } | ||
343 | |||
344 | /* for printf-style formatting */ | ||
345 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
346 | |||
347 | static inline u64 ceph_ino(struct inode *inode) | ||
348 | { | ||
349 | return ceph_inode(inode)->i_vino.ino; | ||
350 | } | ||
351 | static inline u64 ceph_snap(struct inode *inode) | ||
352 | { | ||
353 | return ceph_inode(inode)->i_vino.snap; | ||
354 | } | ||
355 | |||
356 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
357 | { | ||
358 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
359 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
360 | return ci->i_vino.ino == pvino->ino && | ||
361 | ci->i_vino.snap == pvino->snap; | ||
362 | } | ||
363 | |||
364 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
365 | struct ceph_vino vino) | ||
366 | { | ||
367 | ino_t t = ceph_vino_to_ino(vino); | ||
368 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
369 | } | ||
370 | |||
371 | |||
372 | /* | ||
373 | * Ceph inode. | ||
374 | */ | ||
375 | #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ | ||
376 | #define CEPH_I_NODELAY 4 /* do not delay cap release */ | ||
377 | #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ | ||
378 | #define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ | ||
379 | |||
394 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) | 380 | static inline void ceph_i_clear(struct inode *inode, unsigned mask) |
395 | { | 381 | { |
396 | struct ceph_inode_info *ci = ceph_inode(inode); | 382 | struct ceph_inode_info *ci = ceph_inode(inode); |
@@ -414,8 +400,9 @@ static inline bool ceph_i_test(struct inode *inode, unsigned mask) | |||
414 | struct ceph_inode_info *ci = ceph_inode(inode); | 400 | struct ceph_inode_info *ci = ceph_inode(inode); |
415 | bool r; | 401 | bool r; |
416 | 402 | ||
417 | smp_mb(); | 403 | spin_lock(&inode->i_lock); |
418 | r = (ci->i_ceph_flags & mask) == mask; | 404 | r = (ci->i_ceph_flags & mask) == mask; |
405 | spin_unlock(&inode->i_lock); | ||
419 | return r; | 406 | return r; |
420 | } | 407 | } |
421 | 408 | ||
@@ -432,20 +419,6 @@ extern u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, | |||
432 | struct ceph_inode_frag *pfrag, | 419 | struct ceph_inode_frag *pfrag, |
433 | int *found); | 420 | int *found); |
434 | 421 | ||
435 | /* | ||
436 | * Ceph dentry state | ||
437 | */ | ||
438 | struct ceph_dentry_info { | ||
439 | struct ceph_mds_session *lease_session; | ||
440 | u32 lease_gen, lease_shared_gen; | ||
441 | u32 lease_seq; | ||
442 | unsigned long lease_renew_after, lease_renew_from; | ||
443 | struct list_head lru; | ||
444 | struct dentry *dentry; | ||
445 | u64 time; | ||
446 | u64 offset; | ||
447 | }; | ||
448 | |||
449 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) | 422 | static inline struct ceph_dentry_info *ceph_dentry(struct dentry *dentry) |
450 | { | 423 | { |
451 | return (struct ceph_dentry_info *)dentry->d_fsdata; | 424 | return (struct ceph_dentry_info *)dentry->d_fsdata; |
@@ -456,22 +429,6 @@ static inline loff_t ceph_make_fpos(unsigned frag, unsigned off) | |||
456 | return ((loff_t)frag << 32) | (loff_t)off; | 429 | return ((loff_t)frag << 32) | (loff_t)off; |
457 | } | 430 | } |
458 | 431 | ||
459 | /* | ||
460 | * ino_t is <64 bits on many architectures, blech. | ||
461 | * | ||
462 | * don't include snap in ino hash, at least for now. | ||
463 | */ | ||
464 | static inline ino_t ceph_vino_to_ino(struct ceph_vino vino) | ||
465 | { | ||
466 | ino_t ino = (ino_t)vino.ino; /* ^ (vino.snap << 20); */ | ||
467 | #if BITS_PER_LONG == 32 | ||
468 | ino ^= vino.ino >> (sizeof(u64)-sizeof(ino_t)) * 8; | ||
469 | if (!ino) | ||
470 | ino = 1; | ||
471 | #endif | ||
472 | return ino; | ||
473 | } | ||
474 | |||
475 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) | 432 | static inline int ceph_set_ino_cb(struct inode *inode, void *data) |
476 | { | 433 | { |
477 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; | 434 | ceph_inode(inode)->i_vino = *(struct ceph_vino *)data; |
@@ -479,39 +436,6 @@ static inline int ceph_set_ino_cb(struct inode *inode, void *data) | |||
479 | return 0; | 436 | return 0; |
480 | } | 437 | } |
481 | 438 | ||
482 | static inline struct ceph_vino ceph_vino(struct inode *inode) | ||
483 | { | ||
484 | return ceph_inode(inode)->i_vino; | ||
485 | } | ||
486 | |||
487 | /* for printf-style formatting */ | ||
488 | #define ceph_vinop(i) ceph_inode(i)->i_vino.ino, ceph_inode(i)->i_vino.snap | ||
489 | |||
490 | static inline u64 ceph_ino(struct inode *inode) | ||
491 | { | ||
492 | return ceph_inode(inode)->i_vino.ino; | ||
493 | } | ||
494 | static inline u64 ceph_snap(struct inode *inode) | ||
495 | { | ||
496 | return ceph_inode(inode)->i_vino.snap; | ||
497 | } | ||
498 | |||
499 | static inline int ceph_ino_compare(struct inode *inode, void *data) | ||
500 | { | ||
501 | struct ceph_vino *pvino = (struct ceph_vino *)data; | ||
502 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
503 | return ci->i_vino.ino == pvino->ino && | ||
504 | ci->i_vino.snap == pvino->snap; | ||
505 | } | ||
506 | |||
507 | static inline struct inode *ceph_find_inode(struct super_block *sb, | ||
508 | struct ceph_vino vino) | ||
509 | { | ||
510 | ino_t t = ceph_vino_to_ino(vino); | ||
511 | return ilookup5(sb, t, ceph_ino_compare, &vino); | ||
512 | } | ||
513 | |||
514 | |||
515 | /* | 439 | /* |
516 | * caps helpers | 440 | * caps helpers |
517 | */ | 441 | */ |
@@ -576,18 +500,18 @@ extern int ceph_reserve_caps(struct ceph_mds_client *mdsc, | |||
576 | struct ceph_cap_reservation *ctx, int need); | 500 | struct ceph_cap_reservation *ctx, int need); |
577 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, | 501 | extern int ceph_unreserve_caps(struct ceph_mds_client *mdsc, |
578 | struct ceph_cap_reservation *ctx); | 502 | struct ceph_cap_reservation *ctx); |
579 | extern void ceph_reservation_status(struct ceph_client *client, | 503 | extern void ceph_reservation_status(struct ceph_fs_client *client, |
580 | int *total, int *avail, int *used, | 504 | int *total, int *avail, int *used, |
581 | int *reserved, int *min); | 505 | int *reserved, int *min); |
582 | 506 | ||
583 | static inline struct ceph_client *ceph_inode_to_client(struct inode *inode) | 507 | static inline struct ceph_fs_client *ceph_inode_to_client(struct inode *inode) |
584 | { | 508 | { |
585 | return (struct ceph_client *)inode->i_sb->s_fs_info; | 509 | return (struct ceph_fs_client *)inode->i_sb->s_fs_info; |
586 | } | 510 | } |
587 | 511 | ||
588 | static inline struct ceph_client *ceph_sb_to_client(struct super_block *sb) | 512 | static inline struct ceph_fs_client *ceph_sb_to_client(struct super_block *sb) |
589 | { | 513 | { |
590 | return (struct ceph_client *)sb->s_fs_info; | 514 | return (struct ceph_fs_client *)sb->s_fs_info; |
591 | } | 515 | } |
592 | 516 | ||
593 | 517 | ||
@@ -617,51 +541,6 @@ struct ceph_file_info { | |||
617 | 541 | ||
618 | 542 | ||
619 | /* | 543 | /* |
620 | * snapshots | ||
621 | */ | ||
622 | |||
623 | /* | ||
624 | * A "snap context" is the set of existing snapshots when we | ||
625 | * write data. It is used by the OSD to guide its COW behavior. | ||
626 | * | ||
627 | * The ceph_snap_context is refcounted, and attached to each dirty | ||
628 | * page, indicating which context the dirty data belonged when it was | ||
629 | * dirtied. | ||
630 | */ | ||
631 | struct ceph_snap_context { | ||
632 | atomic_t nref; | ||
633 | u64 seq; | ||
634 | int num_snaps; | ||
635 | u64 snaps[]; | ||
636 | }; | ||
637 | |||
638 | static inline struct ceph_snap_context * | ||
639 | ceph_get_snap_context(struct ceph_snap_context *sc) | ||
640 | { | ||
641 | /* | ||
642 | printk("get_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
643 | atomic_read(&sc->nref)+1); | ||
644 | */ | ||
645 | if (sc) | ||
646 | atomic_inc(&sc->nref); | ||
647 | return sc; | ||
648 | } | ||
649 | |||
650 | static inline void ceph_put_snap_context(struct ceph_snap_context *sc) | ||
651 | { | ||
652 | if (!sc) | ||
653 | return; | ||
654 | /* | ||
655 | printk("put_snap_context %p %d -> %d\n", sc, atomic_read(&sc->nref), | ||
656 | atomic_read(&sc->nref)-1); | ||
657 | */ | ||
658 | if (atomic_dec_and_test(&sc->nref)) { | ||
659 | /*printk(" deleting snap_context %p\n", sc);*/ | ||
660 | kfree(sc); | ||
661 | } | ||
662 | } | ||
663 | |||
664 | /* | ||
665 | * A "snap realm" describes a subset of the file hierarchy sharing | 544 | * A "snap realm" describes a subset of the file hierarchy sharing |
666 | * the same set of snapshots that apply to it. The realms themselves | 545 | * the same set of snapshots that apply to it. The realms themselves |
667 | * are organized into a hierarchy, such that children inherit (some of) | 546 | * are organized into a hierarchy, such that children inherit (some of) |
@@ -690,6 +569,8 @@ struct ceph_snap_realm { | |||
690 | 569 | ||
691 | struct list_head empty_item; /* if i have ref==0 */ | 570 | struct list_head empty_item; /* if i have ref==0 */ |
692 | 571 | ||
572 | struct list_head dirty_item; /* if realm needs new context */ | ||
573 | |||
693 | /* the current set of snaps for this realm */ | 574 | /* the current set of snaps for this realm */ |
694 | struct ceph_snap_context *cached_context; | 575 | struct ceph_snap_context *cached_context; |
695 | 576 | ||
@@ -697,16 +578,33 @@ struct ceph_snap_realm { | |||
697 | spinlock_t inodes_with_caps_lock; | 578 | spinlock_t inodes_with_caps_lock; |
698 | }; | 579 | }; |
699 | 580 | ||
700 | 581 | static inline int default_congestion_kb(void) | |
701 | |||
702 | /* | ||
703 | * calculate the number of pages a given length and offset map onto, | ||
704 | * if we align the data. | ||
705 | */ | ||
706 | static inline int calc_pages_for(u64 off, u64 len) | ||
707 | { | 582 | { |
708 | return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - | 583 | int congestion_kb; |
709 | (off >> PAGE_CACHE_SHIFT); | 584 | |
585 | /* | ||
586 | * Copied from NFS | ||
587 | * | ||
588 | * congestion size, scale with available memory. | ||
589 | * | ||
590 | * 64MB: 8192k | ||
591 | * 128MB: 11585k | ||
592 | * 256MB: 16384k | ||
593 | * 512MB: 23170k | ||
594 | * 1GB: 32768k | ||
595 | * 2GB: 46340k | ||
596 | * 4GB: 65536k | ||
597 | * 8GB: 92681k | ||
598 | * 16GB: 131072k | ||
599 | * | ||
600 | * This allows larger machines to have larger/more transfers. | ||
601 | * Limit the default to 256M | ||
602 | */ | ||
603 | congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); | ||
604 | if (congestion_kb > 256*1024) | ||
605 | congestion_kb = 256*1024; | ||
606 | |||
607 | return congestion_kb; | ||
710 | } | 608 | } |
711 | 609 | ||
712 | 610 | ||
@@ -739,16 +637,6 @@ static inline bool __ceph_have_pending_cap_snap(struct ceph_inode_info *ci) | |||
739 | ci_item)->writing; | 637 | ci_item)->writing; |
740 | } | 638 | } |
741 | 639 | ||
742 | |||
743 | /* super.c */ | ||
744 | extern struct kmem_cache *ceph_inode_cachep; | ||
745 | extern struct kmem_cache *ceph_cap_cachep; | ||
746 | extern struct kmem_cache *ceph_dentry_cachep; | ||
747 | extern struct kmem_cache *ceph_file_cachep; | ||
748 | |||
749 | extern const char *ceph_msg_type_name(int type); | ||
750 | extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid); | ||
751 | |||
752 | /* inode.c */ | 640 | /* inode.c */ |
753 | extern const struct inode_operations ceph_file_iops; | 641 | extern const struct inode_operations ceph_file_iops; |
754 | 642 | ||
@@ -826,7 +714,8 @@ extern void ceph_put_cap_refs(struct ceph_inode_info *ci, int had); | |||
826 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, | 714 | extern void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr, |
827 | struct ceph_snap_context *snapc); | 715 | struct ceph_snap_context *snapc); |
828 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, | 716 | extern void __ceph_flush_snaps(struct ceph_inode_info *ci, |
829 | struct ceph_mds_session **psession); | 717 | struct ceph_mds_session **psession, |
718 | int again); | ||
830 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, | 719 | extern void ceph_check_caps(struct ceph_inode_info *ci, int flags, |
831 | struct ceph_mds_session *session); | 720 | struct ceph_mds_session *session); |
832 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); | 721 | extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc); |
@@ -854,12 +743,18 @@ extern int ceph_mmap(struct file *file, struct vm_area_struct *vma); | |||
854 | /* file.c */ | 743 | /* file.c */ |
855 | extern const struct file_operations ceph_file_fops; | 744 | extern const struct file_operations ceph_file_fops; |
856 | extern const struct address_space_operations ceph_aops; | 745 | extern const struct address_space_operations ceph_aops; |
746 | extern int ceph_copy_to_page_vector(struct page **pages, | ||
747 | const char *data, | ||
748 | loff_t off, size_t len); | ||
749 | extern int ceph_copy_from_page_vector(struct page **pages, | ||
750 | char *data, | ||
751 | loff_t off, size_t len); | ||
752 | extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags); | ||
857 | extern int ceph_open(struct inode *inode, struct file *file); | 753 | extern int ceph_open(struct inode *inode, struct file *file); |
858 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, | 754 | extern struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, |
859 | struct nameidata *nd, int mode, | 755 | struct nameidata *nd, int mode, |
860 | int locked_dir); | 756 | int locked_dir); |
861 | extern int ceph_release(struct inode *inode, struct file *filp); | 757 | extern int ceph_release(struct inode *inode, struct file *filp); |
862 | extern void ceph_release_page_vector(struct page **pages, int num_pages); | ||
863 | 758 | ||
864 | /* dir.c */ | 759 | /* dir.c */ |
865 | extern const struct file_operations ceph_dir_fops; | 760 | extern const struct file_operations ceph_dir_fops; |
@@ -889,12 +784,6 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
889 | /* export.c */ | 784 | /* export.c */ |
890 | extern const struct export_operations ceph_export_ops; | 785 | extern const struct export_operations ceph_export_ops; |
891 | 786 | ||
892 | /* debugfs.c */ | ||
893 | extern int ceph_debugfs_init(void); | ||
894 | extern void ceph_debugfs_cleanup(void); | ||
895 | extern int ceph_debugfs_client_init(struct ceph_client *client); | ||
896 | extern void ceph_debugfs_client_cleanup(struct ceph_client *client); | ||
897 | |||
898 | /* locks.c */ | 787 | /* locks.c */ |
899 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 788 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
900 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 789 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
@@ -911,4 +800,8 @@ static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) | |||
911 | return NULL; | 800 | return NULL; |
912 | } | 801 | } |
913 | 802 | ||
803 | /* debugfs.c */ | ||
804 | extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); | ||
805 | extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); | ||
806 | |||
914 | #endif /* _FS_CEPH_SUPER_H */ | 807 | #endif /* _FS_CEPH_SUPER_H */ |
diff --git a/fs/ceph/types.h b/fs/ceph/types.h deleted file mode 100644 index 28b35a005ec2..000000000000 --- a/fs/ceph/types.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | #ifndef _FS_CEPH_TYPES_H | ||
2 | #define _FS_CEPH_TYPES_H | ||
3 | |||
4 | /* needed before including ceph_fs.h */ | ||
5 | #include <linux/in.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/fcntl.h> | ||
8 | #include <linux/string.h> | ||
9 | |||
10 | #include "ceph_fs.h" | ||
11 | #include "ceph_frag.h" | ||
12 | #include "ceph_hash.h" | ||
13 | |||
14 | /* | ||
15 | * Identify inodes by both their ino AND snapshot id (a u64). | ||
16 | */ | ||
17 | struct ceph_vino { | ||
18 | u64 ino; | ||
19 | u64 snap; | ||
20 | }; | ||
21 | |||
22 | |||
23 | /* context for the caps reservation mechanism */ | ||
24 | struct ceph_cap_reservation { | ||
25 | int count; | ||
26 | }; | ||
27 | |||
28 | |||
29 | #endif | ||
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9578af610b73..6e12a6ba5f79 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -1,6 +1,9 @@ | |||
1 | #include "ceph_debug.h" | 1 | #include <linux/ceph/ceph_debug.h> |
2 | |||
2 | #include "super.h" | 3 | #include "super.h" |
3 | #include "decode.h" | 4 | #include "mds_client.h" |
5 | |||
6 | #include <linux/ceph/decode.h> | ||
4 | 7 | ||
5 | #include <linux/xattr.h> | 8 | #include <linux/xattr.h> |
6 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
@@ -620,12 +623,12 @@ out: | |||
620 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | 623 | static int ceph_sync_setxattr(struct dentry *dentry, const char *name, |
621 | const char *value, size_t size, int flags) | 624 | const char *value, size_t size, int flags) |
622 | { | 625 | { |
623 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 626 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
624 | struct inode *inode = dentry->d_inode; | 627 | struct inode *inode = dentry->d_inode; |
625 | struct ceph_inode_info *ci = ceph_inode(inode); | 628 | struct ceph_inode_info *ci = ceph_inode(inode); |
626 | struct inode *parent_inode = dentry->d_parent->d_inode; | 629 | struct inode *parent_inode = dentry->d_parent->d_inode; |
627 | struct ceph_mds_request *req; | 630 | struct ceph_mds_request *req; |
628 | struct ceph_mds_client *mdsc = &client->mdsc; | 631 | struct ceph_mds_client *mdsc = fsc->mdsc; |
629 | int err; | 632 | int err; |
630 | int i, nr_pages; | 633 | int i, nr_pages; |
631 | struct page **pages = NULL; | 634 | struct page **pages = NULL; |
@@ -713,10 +716,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name, | |||
713 | 716 | ||
714 | /* preallocate memory for xattr name, value, index node */ | 717 | /* preallocate memory for xattr name, value, index node */ |
715 | err = -ENOMEM; | 718 | err = -ENOMEM; |
716 | newname = kmalloc(name_len + 1, GFP_NOFS); | 719 | newname = kmemdup(name, name_len + 1, GFP_NOFS); |
717 | if (!newname) | 720 | if (!newname) |
718 | goto out; | 721 | goto out; |
719 | memcpy(newname, name, name_len + 1); | ||
720 | 722 | ||
721 | if (val_len) { | 723 | if (val_len) { |
722 | newval = kmalloc(val_len + 1, GFP_NOFS); | 724 | newval = kmalloc(val_len + 1, GFP_NOFS); |
@@ -777,8 +779,8 @@ out: | |||
777 | 779 | ||
778 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) | 780 | static int ceph_send_removexattr(struct dentry *dentry, const char *name) |
779 | { | 781 | { |
780 | struct ceph_client *client = ceph_sb_to_client(dentry->d_sb); | 782 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
781 | struct ceph_mds_client *mdsc = &client->mdsc; | 783 | struct ceph_mds_client *mdsc = fsc->mdsc; |
782 | struct inode *inode = dentry->d_inode; | 784 | struct inode *inode = dentry->d_inode; |
783 | struct inode *parent_inode = dentry->d_parent->d_inode; | 785 | struct inode *parent_inode = dentry->d_parent->d_inode; |
784 | struct ceph_mds_request *req; | 786 | struct ceph_mds_request *req; |
diff --git a/fs/char_dev.c b/fs/char_dev.c index f80a4f25123c..e5b9df993b93 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
@@ -40,7 +40,9 @@ struct backing_dev_info directly_mappable_cdev_bdi = { | |||
40 | #endif | 40 | #endif |
41 | /* permit direct mmap, for read, write or exec */ | 41 | /* permit direct mmap, for read, write or exec */ |
42 | BDI_CAP_MAP_DIRECT | | 42 | BDI_CAP_MAP_DIRECT | |
43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP), | 43 | BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP | |
44 | /* no writeback happens */ | ||
45 | BDI_CAP_NO_ACCT_AND_WRITEBACK), | ||
44 | }; | 46 | }; |
45 | 47 | ||
46 | static struct kobj_map *cdev_map; | 48 | static struct kobj_map *cdev_map; |
@@ -454,6 +456,7 @@ static void cdev_purge(struct cdev *cdev) | |||
454 | */ | 456 | */ |
455 | const struct file_operations def_chr_fops = { | 457 | const struct file_operations def_chr_fops = { |
456 | .open = chrdev_open, | 458 | .open = chrdev_open, |
459 | .llseek = noop_llseek, | ||
457 | }; | 460 | }; |
458 | 461 | ||
459 | static struct kobject *exact_match(dev_t dev, int *part, void *data) | 462 | static struct kobject *exact_match(dev_t dev, int *part, void *data) |
diff --git a/fs/cifs/README b/fs/cifs/README index 7099a526f775..ee68d1036544 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -527,6 +527,11 @@ A partial list of the supported mount options follows: | |||
527 | SFU does). In the future the bottom 9 bits of the | 527 | SFU does). In the future the bottom 9 bits of the |
528 | mode also will be emulated using queries of the security | 528 | mode also will be emulated using queries of the security |
529 | descriptor (ACL). | 529 | descriptor (ACL). |
530 | mfsymlinks Enable support for Minshall+French symlinks | ||
531 | (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks) | ||
532 | This option is ignored when specified together with the | ||
533 | 'sfu' option. Minshall+French symlinks are used even if | ||
534 | the server supports the CIFS Unix Extensions. | ||
530 | sign Must use packet signing (helps avoid unwanted data modification | 535 | sign Must use packet signing (helps avoid unwanted data modification |
531 | by intermediate systems in the route). Note that signing | 536 | by intermediate systems in the route). Note that signing |
532 | does not work with lanman or plaintext authentication. | 537 | does not work with lanman or plaintext authentication. |
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index eb1ba493489f..103ab8b605b0 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c | |||
@@ -148,7 +148,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
148 | seq_printf(m, "Servers:"); | 148 | seq_printf(m, "Servers:"); |
149 | 149 | ||
150 | i = 0; | 150 | i = 0; |
151 | read_lock(&cifs_tcp_ses_lock); | 151 | spin_lock(&cifs_tcp_ses_lock); |
152 | list_for_each(tmp1, &cifs_tcp_ses_list) { | 152 | list_for_each(tmp1, &cifs_tcp_ses_list) { |
153 | server = list_entry(tmp1, struct TCP_Server_Info, | 153 | server = list_entry(tmp1, struct TCP_Server_Info, |
154 | tcp_ses_list); | 154 | tcp_ses_list); |
@@ -230,7 +230,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) | |||
230 | spin_unlock(&GlobalMid_Lock); | 230 | spin_unlock(&GlobalMid_Lock); |
231 | } | 231 | } |
232 | } | 232 | } |
233 | read_unlock(&cifs_tcp_ses_lock); | 233 | spin_unlock(&cifs_tcp_ses_lock); |
234 | seq_putc(m, '\n'); | 234 | seq_putc(m, '\n'); |
235 | 235 | ||
236 | /* BB add code to dump additional info such as TCP session info now */ | 236 | /* BB add code to dump additional info such as TCP session info now */ |
@@ -270,7 +270,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, | |||
270 | atomic_set(&totBufAllocCount, 0); | 270 | atomic_set(&totBufAllocCount, 0); |
271 | atomic_set(&totSmBufAllocCount, 0); | 271 | atomic_set(&totSmBufAllocCount, 0); |
272 | #endif /* CONFIG_CIFS_STATS2 */ | 272 | #endif /* CONFIG_CIFS_STATS2 */ |
273 | read_lock(&cifs_tcp_ses_lock); | 273 | spin_lock(&cifs_tcp_ses_lock); |
274 | list_for_each(tmp1, &cifs_tcp_ses_list) { | 274 | list_for_each(tmp1, &cifs_tcp_ses_list) { |
275 | server = list_entry(tmp1, struct TCP_Server_Info, | 275 | server = list_entry(tmp1, struct TCP_Server_Info, |
276 | tcp_ses_list); | 276 | tcp_ses_list); |
@@ -303,7 +303,7 @@ static ssize_t cifs_stats_proc_write(struct file *file, | |||
303 | } | 303 | } |
304 | } | 304 | } |
305 | } | 305 | } |
306 | read_unlock(&cifs_tcp_ses_lock); | 306 | spin_unlock(&cifs_tcp_ses_lock); |
307 | } | 307 | } |
308 | 308 | ||
309 | return count; | 309 | return count; |
@@ -343,7 +343,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) | |||
343 | GlobalCurrentXid, GlobalMaxActiveXid); | 343 | GlobalCurrentXid, GlobalMaxActiveXid); |
344 | 344 | ||
345 | i = 0; | 345 | i = 0; |
346 | read_lock(&cifs_tcp_ses_lock); | 346 | spin_lock(&cifs_tcp_ses_lock); |
347 | list_for_each(tmp1, &cifs_tcp_ses_list) { | 347 | list_for_each(tmp1, &cifs_tcp_ses_list) { |
348 | server = list_entry(tmp1, struct TCP_Server_Info, | 348 | server = list_entry(tmp1, struct TCP_Server_Info, |
349 | tcp_ses_list); | 349 | tcp_ses_list); |
@@ -397,7 +397,7 @@ static int cifs_stats_proc_show(struct seq_file *m, void *v) | |||
397 | } | 397 | } |
398 | } | 398 | } |
399 | } | 399 | } |
400 | read_unlock(&cifs_tcp_ses_lock); | 400 | spin_unlock(&cifs_tcp_ses_lock); |
401 | 401 | ||
402 | seq_putc(m, '\n'); | 402 | seq_putc(m, '\n'); |
403 | return 0; | 403 | return 0; |
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h index aa316891ac0c..8942b28cf807 100644 --- a/fs/cifs/cifs_debug.h +++ b/fs/cifs/cifs_debug.h | |||
@@ -34,7 +34,7 @@ void cifs_dump_mids(struct TCP_Server_Info *); | |||
34 | extern int traceSMB; /* flag which enables the function below */ | 34 | extern int traceSMB; /* flag which enables the function below */ |
35 | void dump_smb(struct smb_hdr *, int); | 35 | void dump_smb(struct smb_hdr *, int); |
36 | #define CIFS_INFO 0x01 | 36 | #define CIFS_INFO 0x01 |
37 | #define CIFS_RC 0x02 | 37 | #define CIFS_RC 0x02 |
38 | #define CIFS_TIMER 0x04 | 38 | #define CIFS_TIMER 0x04 |
39 | 39 | ||
40 | /* | 40 | /* |
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index d6ced7aa23cf..c68a056f27fd 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c | |||
@@ -44,8 +44,7 @@ static void cifs_dfs_expire_automounts(struct work_struct *work) | |||
44 | void cifs_dfs_release_automount_timer(void) | 44 | void cifs_dfs_release_automount_timer(void) |
45 | { | 45 | { |
46 | BUG_ON(!list_empty(&cifs_dfs_automount_list)); | 46 | BUG_ON(!list_empty(&cifs_dfs_automount_list)); |
47 | cancel_delayed_work(&cifs_dfs_automount_task); | 47 | cancel_delayed_work_sync(&cifs_dfs_automount_task); |
48 | flush_scheduled_work(); | ||
49 | } | 48 | } |
50 | 49 | ||
51 | /** | 50 | /** |
@@ -306,6 +305,7 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
306 | int xid, i; | 305 | int xid, i; |
307 | int rc = 0; | 306 | int rc = 0; |
308 | struct vfsmount *mnt = ERR_PTR(-ENOENT); | 307 | struct vfsmount *mnt = ERR_PTR(-ENOENT); |
308 | struct tcon_link *tlink; | ||
309 | 309 | ||
310 | cFYI(1, "in %s", __func__); | 310 | cFYI(1, "in %s", __func__); |
311 | BUG_ON(IS_ROOT(dentry)); | 311 | BUG_ON(IS_ROOT(dentry)); |
@@ -315,14 +315,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
315 | dput(nd->path.dentry); | 315 | dput(nd->path.dentry); |
316 | nd->path.dentry = dget(dentry); | 316 | nd->path.dentry = dget(dentry); |
317 | 317 | ||
318 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); | ||
319 | ses = cifs_sb->tcon->ses; | ||
320 | |||
321 | if (!ses) { | ||
322 | rc = -EINVAL; | ||
323 | goto out_err; | ||
324 | } | ||
325 | |||
326 | /* | 318 | /* |
327 | * The MSDFS spec states that paths in DFS referral requests and | 319 | * The MSDFS spec states that paths in DFS referral requests and |
328 | * responses must be prefixed by a single '\' character instead of | 320 | * responses must be prefixed by a single '\' character instead of |
@@ -335,10 +327,20 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
335 | goto out_err; | 327 | goto out_err; |
336 | } | 328 | } |
337 | 329 | ||
338 | rc = get_dfs_path(xid, ses , full_path + 1, cifs_sb->local_nls, | 330 | cifs_sb = CIFS_SB(dentry->d_inode->i_sb); |
331 | tlink = cifs_sb_tlink(cifs_sb); | ||
332 | if (IS_ERR(tlink)) { | ||
333 | rc = PTR_ERR(tlink); | ||
334 | goto out_err; | ||
335 | } | ||
336 | ses = tlink_tcon(tlink)->ses; | ||
337 | |||
338 | rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, | ||
339 | &num_referrals, &referrals, | 339 | &num_referrals, &referrals, |
340 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 340 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
341 | 341 | ||
342 | cifs_put_tlink(tlink); | ||
343 | |||
342 | for (i = 0; i < num_referrals; i++) { | 344 | for (i = 0; i < num_referrals; i++) { |
343 | int len; | 345 | int len; |
344 | dump_referral(referrals+i); | 346 | dump_referral(referrals+i); |
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 9e771450c3b8..525ba59a4105 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h | |||
@@ -15,6 +15,8 @@ | |||
15 | * the GNU Lesser General Public License for more details. | 15 | * the GNU Lesser General Public License for more details. |
16 | * | 16 | * |
17 | */ | 17 | */ |
18 | #include <linux/radix-tree.h> | ||
19 | |||
18 | #ifndef _CIFS_FS_SB_H | 20 | #ifndef _CIFS_FS_SB_H |
19 | #define _CIFS_FS_SB_H | 21 | #define _CIFS_FS_SB_H |
20 | 22 | ||
@@ -36,23 +38,28 @@ | |||
36 | #define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */ | 38 | #define CIFS_MOUNT_NOPOSIXBRL 0x2000 /* mandatory not posix byte range lock */ |
37 | #define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/ | 39 | #define CIFS_MOUNT_NOSSYNC 0x4000 /* don't do slow SMBflush on every sync*/ |
38 | #define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ | 40 | #define CIFS_MOUNT_FSCACHE 0x8000 /* local caching enabled */ |
41 | #define CIFS_MOUNT_MF_SYMLINKS 0x10000 /* Minshall+French Symlinks enabled */ | ||
42 | #define CIFS_MOUNT_MULTIUSER 0x20000 /* multiuser mount */ | ||
39 | 43 | ||
40 | struct cifs_sb_info { | 44 | struct cifs_sb_info { |
41 | struct cifsTconInfo *tcon; /* primary mount */ | 45 | struct radix_tree_root tlink_tree; |
42 | struct list_head nested_tcon_q; | 46 | #define CIFS_TLINK_MASTER_TAG 0 /* is "master" (mount) tcon */ |
47 | spinlock_t tlink_tree_lock; | ||
43 | struct nls_table *local_nls; | 48 | struct nls_table *local_nls; |
44 | unsigned int rsize; | 49 | unsigned int rsize; |
45 | unsigned int wsize; | 50 | unsigned int wsize; |
51 | atomic_t active; | ||
46 | uid_t mnt_uid; | 52 | uid_t mnt_uid; |
47 | gid_t mnt_gid; | 53 | gid_t mnt_gid; |
48 | mode_t mnt_file_mode; | 54 | mode_t mnt_file_mode; |
49 | mode_t mnt_dir_mode; | 55 | mode_t mnt_dir_mode; |
50 | int mnt_cifs_flags; | 56 | unsigned int mnt_cifs_flags; |
51 | int prepathlen; | 57 | int prepathlen; |
52 | char *prepath; /* relative path under the share to mount to */ | 58 | char *prepath; /* relative path under the share to mount to */ |
53 | #ifdef CONFIG_CIFS_DFS_UPCALL | 59 | #ifdef CONFIG_CIFS_DFS_UPCALL |
54 | char *mountdata; /* mount options received at mount time */ | 60 | char *mountdata; /* mount options received at mount time */ |
55 | #endif | 61 | #endif |
56 | struct backing_dev_info bdi; | 62 | struct backing_dev_info bdi; |
63 | struct delayed_work prune_tlinks; | ||
57 | }; | 64 | }; |
58 | #endif /* _CIFS_FS_SB_H */ | 65 | #endif /* _CIFS_FS_SB_H */ |
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 85d7cf7ff2c8..c9b4792ae825 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c | |||
@@ -557,11 +557,16 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, | |||
557 | { | 557 | { |
558 | struct cifs_ntsd *pntsd = NULL; | 558 | struct cifs_ntsd *pntsd = NULL; |
559 | int xid, rc; | 559 | int xid, rc; |
560 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
561 | |||
562 | if (IS_ERR(tlink)) | ||
563 | return NULL; | ||
560 | 564 | ||
561 | xid = GetXid(); | 565 | xid = GetXid(); |
562 | rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); | 566 | rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen); |
563 | FreeXid(xid); | 567 | FreeXid(xid); |
564 | 568 | ||
569 | cifs_put_tlink(tlink); | ||
565 | 570 | ||
566 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); | 571 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); |
567 | return pntsd; | 572 | return pntsd; |
@@ -574,10 +579,16 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
574 | int oplock = 0; | 579 | int oplock = 0; |
575 | int xid, rc; | 580 | int xid, rc; |
576 | __u16 fid; | 581 | __u16 fid; |
582 | struct cifsTconInfo *tcon; | ||
583 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
584 | |||
585 | if (IS_ERR(tlink)) | ||
586 | return NULL; | ||
577 | 587 | ||
588 | tcon = tlink_tcon(tlink); | ||
578 | xid = GetXid(); | 589 | xid = GetXid(); |
579 | 590 | ||
580 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, READ_CONTROL, 0, | 591 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0, |
581 | &fid, &oplock, NULL, cifs_sb->local_nls, | 592 | &fid, &oplock, NULL, cifs_sb->local_nls, |
582 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 593 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
583 | if (rc) { | 594 | if (rc) { |
@@ -585,11 +596,12 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, | |||
585 | goto out; | 596 | goto out; |
586 | } | 597 | } |
587 | 598 | ||
588 | rc = CIFSSMBGetCIFSACL(xid, cifs_sb->tcon, fid, &pntsd, pacllen); | 599 | rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen); |
589 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); | 600 | cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen); |
590 | 601 | ||
591 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | 602 | CIFSSMBClose(xid, tcon, fid); |
592 | out: | 603 | out: |
604 | cifs_put_tlink(tlink); | ||
593 | FreeXid(xid); | 605 | FreeXid(xid); |
594 | return pntsd; | 606 | return pntsd; |
595 | } | 607 | } |
@@ -603,7 +615,7 @@ static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb, | |||
603 | struct cifsFileInfo *open_file = NULL; | 615 | struct cifsFileInfo *open_file = NULL; |
604 | 616 | ||
605 | if (inode) | 617 | if (inode) |
606 | open_file = find_readable_file(CIFS_I(inode)); | 618 | open_file = find_readable_file(CIFS_I(inode), true); |
607 | if (!open_file) | 619 | if (!open_file) |
608 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); | 620 | return get_cifs_acl_by_path(cifs_sb, path, pacllen); |
609 | 621 | ||
@@ -616,10 +628,15 @@ static int set_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb, __u16 fid, | |||
616 | struct cifs_ntsd *pnntsd, u32 acllen) | 628 | struct cifs_ntsd *pnntsd, u32 acllen) |
617 | { | 629 | { |
618 | int xid, rc; | 630 | int xid, rc; |
631 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
632 | |||
633 | if (IS_ERR(tlink)) | ||
634 | return PTR_ERR(tlink); | ||
619 | 635 | ||
620 | xid = GetXid(); | 636 | xid = GetXid(); |
621 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); | 637 | rc = CIFSSMBSetCIFSACL(xid, tlink_tcon(tlink), fid, pnntsd, acllen); |
622 | FreeXid(xid); | 638 | FreeXid(xid); |
639 | cifs_put_tlink(tlink); | ||
623 | 640 | ||
624 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | 641 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); |
625 | return rc; | 642 | return rc; |
@@ -631,10 +648,16 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | |||
631 | int oplock = 0; | 648 | int oplock = 0; |
632 | int xid, rc; | 649 | int xid, rc; |
633 | __u16 fid; | 650 | __u16 fid; |
651 | struct cifsTconInfo *tcon; | ||
652 | struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); | ||
634 | 653 | ||
654 | if (IS_ERR(tlink)) | ||
655 | return PTR_ERR(tlink); | ||
656 | |||
657 | tcon = tlink_tcon(tlink); | ||
635 | xid = GetXid(); | 658 | xid = GetXid(); |
636 | 659 | ||
637 | rc = CIFSSMBOpen(xid, cifs_sb->tcon, path, FILE_OPEN, WRITE_DAC, 0, | 660 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, WRITE_DAC, 0, |
638 | &fid, &oplock, NULL, cifs_sb->local_nls, | 661 | &fid, &oplock, NULL, cifs_sb->local_nls, |
639 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 662 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
640 | if (rc) { | 663 | if (rc) { |
@@ -642,12 +665,13 @@ static int set_cifs_acl_by_path(struct cifs_sb_info *cifs_sb, const char *path, | |||
642 | goto out; | 665 | goto out; |
643 | } | 666 | } |
644 | 667 | ||
645 | rc = CIFSSMBSetCIFSACL(xid, cifs_sb->tcon, fid, pnntsd, acllen); | 668 | rc = CIFSSMBSetCIFSACL(xid, tcon, fid, pnntsd, acllen); |
646 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); | 669 | cFYI(DBG2, "SetCIFSACL rc = %d", rc); |
647 | 670 | ||
648 | CIFSSMBClose(xid, cifs_sb->tcon, fid); | 671 | CIFSSMBClose(xid, tcon, fid); |
649 | out: | 672 | out: |
650 | FreeXid(xid); | 673 | FreeXid(xid); |
674 | cifs_put_tlink(tlink); | ||
651 | return rc; | 675 | return rc; |
652 | } | 676 | } |
653 | 677 | ||
@@ -661,7 +685,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen, | |||
661 | 685 | ||
662 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); | 686 | cFYI(DBG2, "set ACL for %s from mode 0x%x", path, inode->i_mode); |
663 | 687 | ||
664 | open_file = find_readable_file(CIFS_I(inode)); | 688 | open_file = find_readable_file(CIFS_I(inode), true); |
665 | if (!open_file) | 689 | if (!open_file) |
666 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); | 690 | return set_cifs_acl_by_path(cifs_sb, path, pnntsd, acllen); |
667 | 691 | ||
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 35042d8f7338..7ac0056294cf 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include "md5.h" | 27 | #include "md5.h" |
28 | #include "cifs_unicode.h" | 28 | #include "cifs_unicode.h" |
29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
30 | #include "ntlmssp.h" | ||
30 | #include <linux/ctype.h> | 31 | #include <linux/ctype.h> |
31 | #include <linux/random.h> | 32 | #include <linux/random.h> |
32 | 33 | ||
@@ -42,7 +43,7 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8, | |||
42 | unsigned char *p24); | 43 | unsigned char *p24); |
43 | 44 | ||
44 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, | 45 | static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, |
45 | const struct mac_key *key, char *signature) | 46 | const struct session_key *key, char *signature) |
46 | { | 47 | { |
47 | struct MD5Context context; | 48 | struct MD5Context context; |
48 | 49 | ||
@@ -78,7 +79,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
78 | server->sequence_number++; | 79 | server->sequence_number++; |
79 | spin_unlock(&GlobalMid_Lock); | 80 | spin_unlock(&GlobalMid_Lock); |
80 | 81 | ||
81 | rc = cifs_calculate_signature(cifs_pdu, &server->mac_signing_key, | 82 | rc = cifs_calculate_signature(cifs_pdu, &server->session_key, |
82 | smb_signature); | 83 | smb_signature); |
83 | if (rc) | 84 | if (rc) |
84 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 85 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
@@ -89,7 +90,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server, | |||
89 | } | 90 | } |
90 | 91 | ||
91 | static int cifs_calc_signature2(const struct kvec *iov, int n_vec, | 92 | static int cifs_calc_signature2(const struct kvec *iov, int n_vec, |
92 | const struct mac_key *key, char *signature) | 93 | const struct session_key *key, char *signature) |
93 | { | 94 | { |
94 | struct MD5Context context; | 95 | struct MD5Context context; |
95 | int i; | 96 | int i; |
@@ -145,7 +146,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
145 | server->sequence_number++; | 146 | server->sequence_number++; |
146 | spin_unlock(&GlobalMid_Lock); | 147 | spin_unlock(&GlobalMid_Lock); |
147 | 148 | ||
148 | rc = cifs_calc_signature2(iov, n_vec, &server->mac_signing_key, | 149 | rc = cifs_calc_signature2(iov, n_vec, &server->session_key, |
149 | smb_signature); | 150 | smb_signature); |
150 | if (rc) | 151 | if (rc) |
151 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); | 152 | memset(cifs_pdu->Signature.SecuritySignature, 0, 8); |
@@ -156,14 +157,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server, | |||
156 | } | 157 | } |
157 | 158 | ||
158 | int cifs_verify_signature(struct smb_hdr *cifs_pdu, | 159 | int cifs_verify_signature(struct smb_hdr *cifs_pdu, |
159 | const struct mac_key *mac_key, | 160 | const struct session_key *session_key, |
160 | __u32 expected_sequence_number) | 161 | __u32 expected_sequence_number) |
161 | { | 162 | { |
162 | unsigned int rc; | 163 | unsigned int rc; |
163 | char server_response_sig[8]; | 164 | char server_response_sig[8]; |
164 | char what_we_think_sig_should_be[20]; | 165 | char what_we_think_sig_should_be[20]; |
165 | 166 | ||
166 | if ((cifs_pdu == NULL) || (mac_key == NULL)) | 167 | if (cifs_pdu == NULL || session_key == NULL) |
167 | return -EINVAL; | 168 | return -EINVAL; |
168 | 169 | ||
169 | if (cifs_pdu->Command == SMB_COM_NEGOTIATE) | 170 | if (cifs_pdu->Command == SMB_COM_NEGOTIATE) |
@@ -192,7 +193,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
192 | cpu_to_le32(expected_sequence_number); | 193 | cpu_to_le32(expected_sequence_number); |
193 | cifs_pdu->Signature.Sequence.Reserved = 0; | 194 | cifs_pdu->Signature.Sequence.Reserved = 0; |
194 | 195 | ||
195 | rc = cifs_calculate_signature(cifs_pdu, mac_key, | 196 | rc = cifs_calculate_signature(cifs_pdu, session_key, |
196 | what_we_think_sig_should_be); | 197 | what_we_think_sig_should_be); |
197 | 198 | ||
198 | if (rc) | 199 | if (rc) |
@@ -209,7 +210,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu, | |||
209 | } | 210 | } |
210 | 211 | ||
211 | /* We fill in key by putting in 40 byte array which was allocated by caller */ | 212 | /* We fill in key by putting in 40 byte array which was allocated by caller */ |
212 | int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | 213 | int cifs_calculate_session_key(struct session_key *key, const char *rn, |
213 | const char *password) | 214 | const char *password) |
214 | { | 215 | { |
215 | char temp_key[16]; | 216 | char temp_key[16]; |
@@ -262,6 +263,148 @@ void calc_lanman_hash(const char *password, const char *cryptkey, bool encrypt, | |||
262 | } | 263 | } |
263 | #endif /* CIFS_WEAK_PW_HASH */ | 264 | #endif /* CIFS_WEAK_PW_HASH */ |
264 | 265 | ||
266 | /* Build a proper attribute value/target info pairs blob. | ||
267 | * Fill in netbios and dns domain name and workstation name | ||
268 | * and client time (total five av pairs and + one end of fields indicator. | ||
269 | * Allocate domain name which gets freed when session struct is deallocated. | ||
270 | */ | ||
271 | static int | ||
272 | build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp) | ||
273 | { | ||
274 | unsigned int dlen; | ||
275 | unsigned int wlen; | ||
276 | unsigned int size = 6 * sizeof(struct ntlmssp2_name); | ||
277 | __le64 curtime; | ||
278 | char *defdmname = "WORKGROUP"; | ||
279 | unsigned char *blobptr; | ||
280 | struct ntlmssp2_name *attrptr; | ||
281 | |||
282 | if (!ses->domainName) { | ||
283 | ses->domainName = kstrdup(defdmname, GFP_KERNEL); | ||
284 | if (!ses->domainName) | ||
285 | return -ENOMEM; | ||
286 | } | ||
287 | |||
288 | dlen = strlen(ses->domainName); | ||
289 | wlen = strlen(ses->server->hostname); | ||
290 | |||
291 | /* The length of this blob is a size which is | ||
292 | * six times the size of a structure which holds name/size + | ||
293 | * two times the unicode length of a domain name + | ||
294 | * two times the unicode length of a server name + | ||
295 | * size of a timestamp (which is 8 bytes). | ||
296 | */ | ||
297 | ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8; | ||
298 | ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL); | ||
299 | if (!ses->tiblob) { | ||
300 | ses->tilen = 0; | ||
301 | cERROR(1, "Challenge target info allocation failure"); | ||
302 | return -ENOMEM; | ||
303 | } | ||
304 | |||
305 | blobptr = ses->tiblob; | ||
306 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
307 | |||
308 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME); | ||
309 | attrptr->length = cpu_to_le16(2 * dlen); | ||
310 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
311 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | ||
312 | |||
313 | blobptr += 2 * dlen; | ||
314 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
315 | |||
316 | attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_COMPUTER_NAME); | ||
317 | attrptr->length = cpu_to_le16(2 * wlen); | ||
318 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
319 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
320 | |||
321 | blobptr += 2 * wlen; | ||
322 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
323 | |||
324 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_DOMAIN_NAME); | ||
325 | attrptr->length = cpu_to_le16(2 * dlen); | ||
326 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
327 | cifs_strtoUCS((__le16 *)blobptr, ses->domainName, dlen, nls_cp); | ||
328 | |||
329 | blobptr += 2 * dlen; | ||
330 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
331 | |||
332 | attrptr->type = cpu_to_le16(NTLMSSP_AV_DNS_COMPUTER_NAME); | ||
333 | attrptr->length = cpu_to_le16(2 * wlen); | ||
334 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
335 | cifs_strtoUCS((__le16 *)blobptr, ses->server->hostname, wlen, nls_cp); | ||
336 | |||
337 | blobptr += 2 * wlen; | ||
338 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
339 | |||
340 | attrptr->type = cpu_to_le16(NTLMSSP_AV_TIMESTAMP); | ||
341 | attrptr->length = cpu_to_le16(sizeof(__le64)); | ||
342 | blobptr = (unsigned char *)attrptr + sizeof(struct ntlmssp2_name); | ||
343 | curtime = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); | ||
344 | memcpy(blobptr, &curtime, sizeof(__le64)); | ||
345 | |||
346 | return 0; | ||
347 | } | ||
348 | |||
349 | /* Server has provided av pairs/target info in the type 2 challenge | ||
350 | * packet and we have plucked it and stored within smb session. | ||
351 | * We parse that blob here to find netbios domain name to be used | ||
352 | * as part of ntlmv2 authentication (in Target String), if not already | ||
353 | * specified on the command line. | ||
354 | * If this function returns without any error but without fetching | ||
355 | * domain name, authentication may fail against some server but | ||
356 | * may not fail against other (those who are not very particular | ||
357 | * about target string i.e. for some, just user name might suffice. | ||
358 | */ | ||
359 | static int | ||
360 | find_domain_name(struct cifsSesInfo *ses) | ||
361 | { | ||
362 | unsigned int attrsize; | ||
363 | unsigned int type; | ||
364 | unsigned int onesize = sizeof(struct ntlmssp2_name); | ||
365 | unsigned char *blobptr; | ||
366 | unsigned char *blobend; | ||
367 | struct ntlmssp2_name *attrptr; | ||
368 | |||
369 | if (!ses->tilen || !ses->tiblob) | ||
370 | return 0; | ||
371 | |||
372 | blobptr = ses->tiblob; | ||
373 | blobend = ses->tiblob + ses->tilen; | ||
374 | |||
375 | while (blobptr + onesize < blobend) { | ||
376 | attrptr = (struct ntlmssp2_name *) blobptr; | ||
377 | type = le16_to_cpu(attrptr->type); | ||
378 | if (type == NTLMSSP_AV_EOL) | ||
379 | break; | ||
380 | blobptr += 2; /* advance attr type */ | ||
381 | attrsize = le16_to_cpu(attrptr->length); | ||
382 | blobptr += 2; /* advance attr size */ | ||
383 | if (blobptr + attrsize > blobend) | ||
384 | break; | ||
385 | if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { | ||
386 | if (!attrsize) | ||
387 | break; | ||
388 | if (!ses->domainName) { | ||
389 | struct nls_table *default_nls; | ||
390 | ses->domainName = | ||
391 | kmalloc(attrsize + 1, GFP_KERNEL); | ||
392 | if (!ses->domainName) | ||
393 | return -ENOMEM; | ||
394 | default_nls = load_nls_default(); | ||
395 | cifs_from_ucs2(ses->domainName, | ||
396 | (__le16 *)blobptr, attrsize, attrsize, | ||
397 | default_nls, false); | ||
398 | unload_nls(default_nls); | ||
399 | break; | ||
400 | } | ||
401 | } | ||
402 | blobptr += attrsize; /* advance attr value */ | ||
403 | } | ||
404 | |||
405 | return 0; | ||
406 | } | ||
407 | |||
265 | static int calc_ntlmv2_hash(struct cifsSesInfo *ses, | 408 | static int calc_ntlmv2_hash(struct cifsSesInfo *ses, |
266 | const struct nls_table *nls_cp) | 409 | const struct nls_table *nls_cp) |
267 | { | 410 | { |
@@ -315,13 +458,14 @@ calc_exit_1: | |||
315 | calc_exit_2: | 458 | calc_exit_2: |
316 | /* BB FIXME what about bytes 24 through 40 of the signing key? | 459 | /* BB FIXME what about bytes 24 through 40 of the signing key? |
317 | compare with the NTLM example */ | 460 | compare with the NTLM example */ |
318 | hmac_md5_final(ses->server->ntlmv2_hash, pctxt); | 461 | hmac_md5_final(ses->ntlmv2_hash, pctxt); |
319 | 462 | ||
320 | kfree(pctxt); | 463 | kfree(pctxt); |
321 | return rc; | 464 | return rc; |
322 | } | 465 | } |
323 | 466 | ||
324 | void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, | 467 | int |
468 | setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, | ||
325 | const struct nls_table *nls_cp) | 469 | const struct nls_table *nls_cp) |
326 | { | 470 | { |
327 | int rc; | 471 | int rc; |
@@ -333,25 +477,48 @@ void setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf, | |||
333 | buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); | 477 | buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME)); |
334 | get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); | 478 | get_random_bytes(&buf->client_chal, sizeof(buf->client_chal)); |
335 | buf->reserved2 = 0; | 479 | buf->reserved2 = 0; |
336 | buf->names[0].type = cpu_to_le16(NTLMSSP_DOMAIN_TYPE); | 480 | |
337 | buf->names[0].length = 0; | 481 | if (ses->server->secType == RawNTLMSSP) { |
338 | buf->names[1].type = 0; | 482 | if (!ses->domainName) { |
339 | buf->names[1].length = 0; | 483 | rc = find_domain_name(ses); |
484 | if (rc) { | ||
485 | cERROR(1, "error %d finding domain name", rc); | ||
486 | goto setup_ntlmv2_rsp_ret; | ||
487 | } | ||
488 | } | ||
489 | } else { | ||
490 | rc = build_avpair_blob(ses, nls_cp); | ||
491 | if (rc) { | ||
492 | cERROR(1, "error %d building av pair blob", rc); | ||
493 | return rc; | ||
494 | } | ||
495 | } | ||
340 | 496 | ||
341 | /* calculate buf->ntlmv2_hash */ | 497 | /* calculate buf->ntlmv2_hash */ |
342 | rc = calc_ntlmv2_hash(ses, nls_cp); | 498 | rc = calc_ntlmv2_hash(ses, nls_cp); |
343 | if (rc) | 499 | if (rc) { |
344 | cERROR(1, "could not get v2 hash rc %d", rc); | 500 | cERROR(1, "could not get v2 hash rc %d", rc); |
501 | goto setup_ntlmv2_rsp_ret; | ||
502 | } | ||
345 | CalcNTLMv2_response(ses, resp_buf); | 503 | CalcNTLMv2_response(ses, resp_buf); |
346 | 504 | ||
347 | /* now calculate the MAC key for NTLMv2 */ | 505 | /* now calculate the session key for NTLMv2 */ |
348 | hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); | 506 | hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); |
349 | hmac_md5_update(resp_buf, 16, &context); | 507 | hmac_md5_update(resp_buf, 16, &context); |
350 | hmac_md5_final(ses->server->mac_signing_key.data.ntlmv2.key, &context); | 508 | hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context); |
351 | 509 | ||
352 | memcpy(&ses->server->mac_signing_key.data.ntlmv2.resp, resp_buf, | 510 | memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf, |
353 | sizeof(struct ntlmv2_resp)); | 511 | sizeof(struct ntlmv2_resp)); |
354 | ses->server->mac_signing_key.len = 16 + sizeof(struct ntlmv2_resp); | 512 | ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp); |
513 | |||
514 | return 0; | ||
515 | |||
516 | setup_ntlmv2_rsp_ret: | ||
517 | kfree(ses->tiblob); | ||
518 | ses->tiblob = NULL; | ||
519 | ses->tilen = 0; | ||
520 | |||
521 | return rc; | ||
355 | } | 522 | } |
356 | 523 | ||
357 | void CalcNTLMv2_response(const struct cifsSesInfo *ses, | 524 | void CalcNTLMv2_response(const struct cifsSesInfo *ses, |
@@ -359,12 +526,15 @@ void CalcNTLMv2_response(const struct cifsSesInfo *ses, | |||
359 | { | 526 | { |
360 | struct HMACMD5Context context; | 527 | struct HMACMD5Context context; |
361 | /* rest of v2 struct already generated */ | 528 | /* rest of v2 struct already generated */ |
362 | memcpy(v2_session_response + 8, ses->server->cryptKey, 8); | 529 | memcpy(v2_session_response + 8, ses->cryptKey, 8); |
363 | hmac_md5_init_limK_to_64(ses->server->ntlmv2_hash, 16, &context); | 530 | hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context); |
364 | 531 | ||
365 | hmac_md5_update(v2_session_response+8, | 532 | hmac_md5_update(v2_session_response+8, |
366 | sizeof(struct ntlmv2_resp) - 8, &context); | 533 | sizeof(struct ntlmv2_resp) - 8, &context); |
367 | 534 | ||
535 | if (ses->tilen) | ||
536 | hmac_md5_update(ses->tiblob, ses->tilen, &context); | ||
537 | |||
368 | hmac_md5_final(v2_session_response, &context); | 538 | hmac_md5_final(v2_session_response, &context); |
369 | /* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ | 539 | /* cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */ |
370 | } | 540 | } |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index b7431afdd76d..34371637f210 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/delay.h> | 35 | #include <linux/delay.h> |
36 | #include <linux/kthread.h> | 36 | #include <linux/kthread.h> |
37 | #include <linux/freezer.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/smp_lock.h> | 38 | #include <net/ipv6.h> |
39 | #include "cifsfs.h" | 39 | #include "cifsfs.h" |
40 | #include "cifspdu.h" | 40 | #include "cifspdu.h" |
41 | #define DECLARE_GLOBALS_HERE | 41 | #define DECLARE_GLOBALS_HERE |
@@ -82,6 +82,24 @@ extern mempool_t *cifs_sm_req_poolp; | |||
82 | extern mempool_t *cifs_req_poolp; | 82 | extern mempool_t *cifs_req_poolp; |
83 | extern mempool_t *cifs_mid_poolp; | 83 | extern mempool_t *cifs_mid_poolp; |
84 | 84 | ||
85 | void | ||
86 | cifs_sb_active(struct super_block *sb) | ||
87 | { | ||
88 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
89 | |||
90 | if (atomic_inc_return(&server->active) == 1) | ||
91 | atomic_inc(&sb->s_active); | ||
92 | } | ||
93 | |||
94 | void | ||
95 | cifs_sb_deactive(struct super_block *sb) | ||
96 | { | ||
97 | struct cifs_sb_info *server = CIFS_SB(sb); | ||
98 | |||
99 | if (atomic_dec_and_test(&server->active)) | ||
100 | deactivate_super(sb); | ||
101 | } | ||
102 | |||
85 | static int | 103 | static int |
86 | cifs_read_super(struct super_block *sb, void *data, | 104 | cifs_read_super(struct super_block *sb, void *data, |
87 | const char *devname, int silent) | 105 | const char *devname, int silent) |
@@ -97,6 +115,9 @@ cifs_read_super(struct super_block *sb, void *data, | |||
97 | if (cifs_sb == NULL) | 115 | if (cifs_sb == NULL) |
98 | return -ENOMEM; | 116 | return -ENOMEM; |
99 | 117 | ||
118 | spin_lock_init(&cifs_sb->tlink_tree_lock); | ||
119 | INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL); | ||
120 | |||
100 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); | 121 | rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY); |
101 | if (rc) { | 122 | if (rc) { |
102 | kfree(cifs_sb); | 123 | kfree(cifs_sb); |
@@ -136,9 +157,6 @@ cifs_read_super(struct super_block *sb, void *data, | |||
136 | sb->s_magic = CIFS_MAGIC_NUMBER; | 157 | sb->s_magic = CIFS_MAGIC_NUMBER; |
137 | sb->s_op = &cifs_super_ops; | 158 | sb->s_op = &cifs_super_ops; |
138 | sb->s_bdi = &cifs_sb->bdi; | 159 | sb->s_bdi = &cifs_sb->bdi; |
139 | /* if (cifs_sb->tcon->ses->server->maxBuf > MAX_CIFS_HDR_SIZE + 512) | ||
140 | sb->s_blocksize = | ||
141 | cifs_sb->tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE; */ | ||
142 | sb->s_blocksize = CIFS_MAX_MSGSIZE; | 160 | sb->s_blocksize = CIFS_MAX_MSGSIZE; |
143 | sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ | 161 | sb->s_blocksize_bits = 14; /* default 2**14 = CIFS_MAX_MSGSIZE */ |
144 | inode = cifs_root_iget(sb, ROOT_I); | 162 | inode = cifs_root_iget(sb, ROOT_I); |
@@ -200,8 +218,6 @@ cifs_put_super(struct super_block *sb) | |||
200 | return; | 218 | return; |
201 | } | 219 | } |
202 | 220 | ||
203 | lock_kernel(); | ||
204 | |||
205 | rc = cifs_umount(sb, cifs_sb); | 221 | rc = cifs_umount(sb, cifs_sb); |
206 | if (rc) | 222 | if (rc) |
207 | cERROR(1, "cifs_umount failed with return code %d", rc); | 223 | cERROR(1, "cifs_umount failed with return code %d", rc); |
@@ -215,8 +231,6 @@ cifs_put_super(struct super_block *sb) | |||
215 | unload_nls(cifs_sb->local_nls); | 231 | unload_nls(cifs_sb->local_nls); |
216 | bdi_destroy(&cifs_sb->bdi); | 232 | bdi_destroy(&cifs_sb->bdi); |
217 | kfree(cifs_sb); | 233 | kfree(cifs_sb); |
218 | |||
219 | unlock_kernel(); | ||
220 | } | 234 | } |
221 | 235 | ||
222 | static int | 236 | static int |
@@ -224,7 +238,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
224 | { | 238 | { |
225 | struct super_block *sb = dentry->d_sb; | 239 | struct super_block *sb = dentry->d_sb; |
226 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 240 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
227 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 241 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); |
228 | int rc = -EOPNOTSUPP; | 242 | int rc = -EOPNOTSUPP; |
229 | int xid; | 243 | int xid; |
230 | 244 | ||
@@ -366,14 +380,36 @@ static int | |||
366 | cifs_show_options(struct seq_file *s, struct vfsmount *m) | 380 | cifs_show_options(struct seq_file *s, struct vfsmount *m) |
367 | { | 381 | { |
368 | struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); | 382 | struct cifs_sb_info *cifs_sb = CIFS_SB(m->mnt_sb); |
369 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 383 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); |
384 | struct sockaddr *srcaddr; | ||
385 | srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr; | ||
370 | 386 | ||
371 | seq_printf(s, ",unc=%s", tcon->treeName); | 387 | seq_printf(s, ",unc=%s", tcon->treeName); |
372 | if (tcon->ses->userName) | 388 | |
389 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) | ||
390 | seq_printf(s, ",multiuser"); | ||
391 | else if (tcon->ses->userName) | ||
373 | seq_printf(s, ",username=%s", tcon->ses->userName); | 392 | seq_printf(s, ",username=%s", tcon->ses->userName); |
393 | |||
374 | if (tcon->ses->domainName) | 394 | if (tcon->ses->domainName) |
375 | seq_printf(s, ",domain=%s", tcon->ses->domainName); | 395 | seq_printf(s, ",domain=%s", tcon->ses->domainName); |
376 | 396 | ||
397 | if (srcaddr->sa_family != AF_UNSPEC) { | ||
398 | struct sockaddr_in *saddr4; | ||
399 | struct sockaddr_in6 *saddr6; | ||
400 | saddr4 = (struct sockaddr_in *)srcaddr; | ||
401 | saddr6 = (struct sockaddr_in6 *)srcaddr; | ||
402 | if (srcaddr->sa_family == AF_INET6) | ||
403 | seq_printf(s, ",srcaddr=%pI6c", | ||
404 | &saddr6->sin6_addr); | ||
405 | else if (srcaddr->sa_family == AF_INET) | ||
406 | seq_printf(s, ",srcaddr=%pI4", | ||
407 | &saddr4->sin_addr.s_addr); | ||
408 | else | ||
409 | seq_printf(s, ",srcaddr=BAD-AF:%i", | ||
410 | (int)(srcaddr->sa_family)); | ||
411 | } | ||
412 | |||
377 | seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); | 413 | seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); |
378 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) | 414 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) |
379 | seq_printf(s, ",forceuid"); | 415 | seq_printf(s, ",forceuid"); |
@@ -422,6 +458,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m) | |||
422 | seq_printf(s, ",dynperm"); | 458 | seq_printf(s, ",dynperm"); |
423 | if (m->mnt_sb->s_flags & MS_POSIXACL) | 459 | if (m->mnt_sb->s_flags & MS_POSIXACL) |
424 | seq_printf(s, ",acl"); | 460 | seq_printf(s, ",acl"); |
461 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) | ||
462 | seq_printf(s, ",mfsymlinks"); | ||
425 | 463 | ||
426 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); | 464 | seq_printf(s, ",rsize=%d", cifs_sb->rsize); |
427 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); | 465 | seq_printf(s, ",wsize=%d", cifs_sb->wsize); |
@@ -437,20 +475,18 @@ static void cifs_umount_begin(struct super_block *sb) | |||
437 | if (cifs_sb == NULL) | 475 | if (cifs_sb == NULL) |
438 | return; | 476 | return; |
439 | 477 | ||
440 | tcon = cifs_sb->tcon; | 478 | tcon = cifs_sb_master_tcon(cifs_sb); |
441 | if (tcon == NULL) | ||
442 | return; | ||
443 | 479 | ||
444 | read_lock(&cifs_tcp_ses_lock); | 480 | spin_lock(&cifs_tcp_ses_lock); |
445 | if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { | 481 | if ((tcon->tc_count > 1) || (tcon->tidStatus == CifsExiting)) { |
446 | /* we have other mounts to same share or we have | 482 | /* we have other mounts to same share or we have |
447 | already tried to force umount this and woken up | 483 | already tried to force umount this and woken up |
448 | all waiting network requests, nothing to do */ | 484 | all waiting network requests, nothing to do */ |
449 | read_unlock(&cifs_tcp_ses_lock); | 485 | spin_unlock(&cifs_tcp_ses_lock); |
450 | return; | 486 | return; |
451 | } else if (tcon->tc_count == 1) | 487 | } else if (tcon->tc_count == 1) |
452 | tcon->tidStatus = CifsExiting; | 488 | tcon->tidStatus = CifsExiting; |
453 | read_unlock(&cifs_tcp_ses_lock); | 489 | spin_unlock(&cifs_tcp_ses_lock); |
454 | 490 | ||
455 | /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ | 491 | /* cancel_brl_requests(tcon); */ /* BB mark all brl mids as exiting */ |
456 | /* cancel_notify_requests(tcon); */ | 492 | /* cancel_notify_requests(tcon); */ |
@@ -514,7 +550,9 @@ cifs_get_sb(struct file_system_type *fs_type, | |||
514 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | 550 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) |
515 | { | 551 | { |
516 | int rc; | 552 | int rc; |
517 | struct super_block *sb = sget(fs_type, NULL, set_anon_super, NULL); | 553 | struct super_block *sb; |
554 | |||
555 | sb = sget(fs_type, NULL, set_anon_super, NULL); | ||
518 | 556 | ||
519 | cFYI(1, "Devname: %s flags: %d ", dev_name, flags); | 557 | cFYI(1, "Devname: %s flags: %d ", dev_name, flags); |
520 | 558 | ||
@@ -565,9 +603,10 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) | |||
565 | 603 | ||
566 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | 604 | static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) |
567 | { | 605 | { |
568 | /* note that this is called by vfs setlease with the BKL held | 606 | /* note that this is called by vfs setlease with lock_flocks held |
569 | although I doubt that BKL is needed here in cifs */ | 607 | to protect *lease from going away */ |
570 | struct inode *inode = file->f_path.dentry->d_inode; | 608 | struct inode *inode = file->f_path.dentry->d_inode; |
609 | struct cifsFileInfo *cfile = file->private_data; | ||
571 | 610 | ||
572 | if (!(S_ISREG(inode->i_mode))) | 611 | if (!(S_ISREG(inode->i_mode))) |
573 | return -EINVAL; | 612 | return -EINVAL; |
@@ -578,8 +617,8 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease) | |||
578 | ((arg == F_WRLCK) && | 617 | ((arg == F_WRLCK) && |
579 | (CIFS_I(inode)->clientCanCacheAll))) | 618 | (CIFS_I(inode)->clientCanCacheAll))) |
580 | return generic_setlease(file, arg, lease); | 619 | return generic_setlease(file, arg, lease); |
581 | else if (CIFS_SB(inode->i_sb)->tcon->local_lease && | 620 | else if (tlink_tcon(cfile->tlink)->local_lease && |
582 | !CIFS_I(inode)->clientCanCacheRead) | 621 | !CIFS_I(inode)->clientCanCacheRead) |
583 | /* If the server claims to support oplock on this | 622 | /* If the server claims to support oplock on this |
584 | file, then we still need to check oplock even | 623 | file, then we still need to check oplock even |
585 | if the local_lease mount option is set, but there | 624 | if the local_lease mount option is set, but there |
@@ -898,8 +937,8 @@ init_cifs(void) | |||
898 | GlobalTotalActiveXid = 0; | 937 | GlobalTotalActiveXid = 0; |
899 | GlobalMaxActiveXid = 0; | 938 | GlobalMaxActiveXid = 0; |
900 | memset(Local_System_Name, 0, 15); | 939 | memset(Local_System_Name, 0, 15); |
901 | rwlock_init(&GlobalSMBSeslock); | 940 | spin_lock_init(&cifs_tcp_ses_lock); |
902 | rwlock_init(&cifs_tcp_ses_lock); | 941 | spin_lock_init(&cifs_file_list_lock); |
903 | spin_lock_init(&GlobalMid_Lock); | 942 | spin_lock_init(&GlobalMid_Lock); |
904 | 943 | ||
905 | if (cifs_max_pending < 2) { | 944 | if (cifs_max_pending < 2) { |
@@ -912,11 +951,11 @@ init_cifs(void) | |||
912 | 951 | ||
913 | rc = cifs_fscache_register(); | 952 | rc = cifs_fscache_register(); |
914 | if (rc) | 953 | if (rc) |
915 | goto out; | 954 | goto out_clean_proc; |
916 | 955 | ||
917 | rc = cifs_init_inodecache(); | 956 | rc = cifs_init_inodecache(); |
918 | if (rc) | 957 | if (rc) |
919 | goto out_clean_proc; | 958 | goto out_unreg_fscache; |
920 | 959 | ||
921 | rc = cifs_init_mids(); | 960 | rc = cifs_init_mids(); |
922 | if (rc) | 961 | if (rc) |
@@ -938,19 +977,19 @@ init_cifs(void) | |||
938 | return 0; | 977 | return 0; |
939 | 978 | ||
940 | #ifdef CONFIG_CIFS_UPCALL | 979 | #ifdef CONFIG_CIFS_UPCALL |
941 | out_unregister_filesystem: | 980 | out_unregister_filesystem: |
942 | unregister_filesystem(&cifs_fs_type); | 981 | unregister_filesystem(&cifs_fs_type); |
943 | #endif | 982 | #endif |
944 | out_destroy_request_bufs: | 983 | out_destroy_request_bufs: |
945 | cifs_destroy_request_bufs(); | 984 | cifs_destroy_request_bufs(); |
946 | out_destroy_mids: | 985 | out_destroy_mids: |
947 | cifs_destroy_mids(); | 986 | cifs_destroy_mids(); |
948 | out_destroy_inodecache: | 987 | out_destroy_inodecache: |
949 | cifs_destroy_inodecache(); | 988 | cifs_destroy_inodecache(); |
950 | out_clean_proc: | 989 | out_unreg_fscache: |
951 | cifs_proc_clean(); | ||
952 | cifs_fscache_unregister(); | 990 | cifs_fscache_unregister(); |
953 | out: | 991 | out_clean_proc: |
992 | cifs_proc_clean(); | ||
954 | return rc; | 993 | return rc; |
955 | } | 994 | } |
956 | 995 | ||
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d82f5fb4761e..f35795a16b42 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
@@ -42,10 +42,8 @@ extern const struct address_space_operations cifs_addr_ops; | |||
42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; | 42 | extern const struct address_space_operations cifs_addr_ops_smallbuf; |
43 | 43 | ||
44 | /* Functions related to super block operations */ | 44 | /* Functions related to super block operations */ |
45 | /* extern const struct super_operations cifs_super_ops;*/ | 45 | extern void cifs_sb_active(struct super_block *sb); |
46 | extern void cifs_read_inode(struct inode *); | 46 | extern void cifs_sb_deactive(struct super_block *sb); |
47 | /*extern void cifs_delete_inode(struct inode *);*/ /* BB not needed yet */ | ||
48 | /* extern void cifs_write_inode(struct inode *); */ /* BB not needed yet */ | ||
49 | 47 | ||
50 | /* Functions related to inodes */ | 48 | /* Functions related to inodes */ |
51 | extern const struct inode_operations cifs_dir_inode_ops; | 49 | extern const struct inode_operations cifs_dir_inode_ops; |
@@ -104,7 +102,7 @@ extern int cifs_readlink(struct dentry *direntry, char __user *buffer, | |||
104 | extern int cifs_symlink(struct inode *inode, struct dentry *direntry, | 102 | extern int cifs_symlink(struct inode *inode, struct dentry *direntry, |
105 | const char *symname); | 103 | const char *symname); |
106 | extern int cifs_removexattr(struct dentry *, const char *); | 104 | extern int cifs_removexattr(struct dentry *, const char *); |
107 | extern int cifs_setxattr(struct dentry *, const char *, const void *, | 105 | extern int cifs_setxattr(struct dentry *, const char *, const void *, |
108 | size_t, int); | 106 | size_t, int); |
109 | extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); | 107 | extern ssize_t cifs_getxattr(struct dentry *, const char *, void *, size_t); |
110 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); | 108 | extern ssize_t cifs_listxattr(struct dentry *, char *, size_t); |
@@ -114,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); | |||
114 | extern const struct export_operations cifs_export_ops; | 112 | extern const struct export_operations cifs_export_ops; |
115 | #endif /* EXPERIMENTAL */ | 113 | #endif /* EXPERIMENTAL */ |
116 | 114 | ||
117 | #define CIFS_VERSION "1.65" | 115 | #define CIFS_VERSION "1.67" |
118 | #endif /* _CIFSFS_H */ | 116 | #endif /* _CIFSFS_H */ |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 0cdfb8c32ac6..3365e77f6f24 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -97,7 +97,7 @@ enum protocolEnum { | |||
97 | /* Netbios frames protocol not supported at this time */ | 97 | /* Netbios frames protocol not supported at this time */ |
98 | }; | 98 | }; |
99 | 99 | ||
100 | struct mac_key { | 100 | struct session_key { |
101 | unsigned int len; | 101 | unsigned int len; |
102 | union { | 102 | union { |
103 | char ntlm[CIFS_SESS_KEY_SIZE + 16]; | 103 | char ntlm[CIFS_SESS_KEY_SIZE + 16]; |
@@ -139,6 +139,7 @@ struct TCP_Server_Info { | |||
139 | struct sockaddr_in sockAddr; | 139 | struct sockaddr_in sockAddr; |
140 | struct sockaddr_in6 sockAddr6; | 140 | struct sockaddr_in6 sockAddr6; |
141 | } addr; | 141 | } addr; |
142 | struct sockaddr_storage srcaddr; /* locally bind to this IP */ | ||
142 | wait_queue_head_t response_q; | 143 | wait_queue_head_t response_q; |
143 | wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ | 144 | wait_queue_head_t request_q; /* if more than maxmpx to srvr must block*/ |
144 | struct list_head pending_mid_q; | 145 | struct list_head pending_mid_q; |
@@ -178,12 +179,10 @@ struct TCP_Server_Info { | |||
178 | int capabilities; /* allow selective disabling of caps by smb sess */ | 179 | int capabilities; /* allow selective disabling of caps by smb sess */ |
179 | int timeAdj; /* Adjust for difference in server time zone in sec */ | 180 | int timeAdj; /* Adjust for difference in server time zone in sec */ |
180 | __u16 CurrentMid; /* multiplex id - rotating counter */ | 181 | __u16 CurrentMid; /* multiplex id - rotating counter */ |
181 | char cryptKey[CIFS_CRYPTO_KEY_SIZE]; | ||
182 | /* 16th byte of RFC1001 workstation name is always null */ | 182 | /* 16th byte of RFC1001 workstation name is always null */ |
183 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; | 183 | char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL]; |
184 | __u32 sequence_number; /* needed for CIFS PDU signature */ | 184 | __u32 sequence_number; /* needed for CIFS PDU signature */ |
185 | struct mac_key mac_signing_key; | 185 | struct session_key session_key; |
186 | char ntlmv2_hash[16]; | ||
187 | unsigned long lstrp; /* when we got last response from this server */ | 186 | unsigned long lstrp; /* when we got last response from this server */ |
188 | u16 dialect; /* dialect index that server chose */ | 187 | u16 dialect; /* dialect index that server chose */ |
189 | /* extended security flavors that server supports */ | 188 | /* extended security flavors that server supports */ |
@@ -191,6 +190,7 @@ struct TCP_Server_Info { | |||
191 | bool sec_mskerberos; /* supports legacy MS Kerberos */ | 190 | bool sec_mskerberos; /* supports legacy MS Kerberos */ |
192 | bool sec_kerberosu2u; /* supports U2U Kerberos */ | 191 | bool sec_kerberosu2u; /* supports U2U Kerberos */ |
193 | bool sec_ntlmssp; /* supports NTLMSSP */ | 192 | bool sec_ntlmssp; /* supports NTLMSSP */ |
193 | bool session_estab; /* mark when very first sess is established */ | ||
194 | #ifdef CONFIG_CIFS_FSCACHE | 194 | #ifdef CONFIG_CIFS_FSCACHE |
195 | struct fscache_cookie *fscache; /* client index cache cookie */ | 195 | struct fscache_cookie *fscache; /* client index cache cookie */ |
196 | #endif | 196 | #endif |
@@ -222,6 +222,11 @@ struct cifsSesInfo { | |||
222 | char userName[MAX_USERNAME_SIZE + 1]; | 222 | char userName[MAX_USERNAME_SIZE + 1]; |
223 | char *domainName; | 223 | char *domainName; |
224 | char *password; | 224 | char *password; |
225 | char cryptKey[CIFS_CRYPTO_KEY_SIZE]; | ||
226 | struct session_key auth_key; | ||
227 | char ntlmv2_hash[16]; | ||
228 | unsigned int tilen; /* length of the target info blob */ | ||
229 | unsigned char *tiblob; /* target info blob in challenge response */ | ||
225 | bool need_reconnect:1; /* connection reset, uid now invalid */ | 230 | bool need_reconnect:1; /* connection reset, uid now invalid */ |
226 | }; | 231 | }; |
227 | /* no more than one of the following three session flags may be set */ | 232 | /* no more than one of the following three session flags may be set */ |
@@ -308,6 +313,44 @@ struct cifsTconInfo { | |||
308 | }; | 313 | }; |
309 | 314 | ||
310 | /* | 315 | /* |
316 | * This is a refcounted and timestamped container for a tcon pointer. The | ||
317 | * container holds a tcon reference. It is considered safe to free one of | ||
318 | * these when the tl_count goes to 0. The tl_time is the time of the last | ||
319 | * "get" on the container. | ||
320 | */ | ||
321 | struct tcon_link { | ||
322 | unsigned long tl_index; | ||
323 | unsigned long tl_flags; | ||
324 | #define TCON_LINK_MASTER 0 | ||
325 | #define TCON_LINK_PENDING 1 | ||
326 | #define TCON_LINK_IN_TREE 2 | ||
327 | unsigned long tl_time; | ||
328 | atomic_t tl_count; | ||
329 | struct cifsTconInfo *tl_tcon; | ||
330 | }; | ||
331 | |||
332 | extern struct tcon_link *cifs_sb_tlink(struct cifs_sb_info *cifs_sb); | ||
333 | |||
334 | static inline struct cifsTconInfo * | ||
335 | tlink_tcon(struct tcon_link *tlink) | ||
336 | { | ||
337 | return tlink->tl_tcon; | ||
338 | } | ||
339 | |||
340 | extern void cifs_put_tlink(struct tcon_link *tlink); | ||
341 | |||
342 | static inline struct tcon_link * | ||
343 | cifs_get_tlink(struct tcon_link *tlink) | ||
344 | { | ||
345 | if (tlink && !IS_ERR(tlink)) | ||
346 | atomic_inc(&tlink->tl_count); | ||
347 | return tlink; | ||
348 | } | ||
349 | |||
350 | /* This function is always expected to succeed */ | ||
351 | extern struct cifsTconInfo *cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb); | ||
352 | |||
353 | /* | ||
311 | * This info hangs off the cifsFileInfo structure, pointed to by llist. | 354 | * This info hangs off the cifsFileInfo structure, pointed to by llist. |
312 | * This is used to track byte stream locks on the file | 355 | * This is used to track byte stream locks on the file |
313 | */ | 356 | */ |
@@ -345,12 +388,11 @@ struct cifsFileInfo { | |||
345 | __u16 netfid; /* file id from remote */ | 388 | __u16 netfid; /* file id from remote */ |
346 | /* BB add lock scope info here if needed */ ; | 389 | /* BB add lock scope info here if needed */ ; |
347 | /* lock scope id (0 if none) */ | 390 | /* lock scope id (0 if none) */ |
348 | struct file *pfile; /* needed for writepage */ | 391 | struct dentry *dentry; |
349 | struct inode *pInode; /* needed for oplock break */ | 392 | unsigned int f_flags; |
350 | struct vfsmount *mnt; | 393 | struct tcon_link *tlink; |
351 | struct mutex lock_mutex; | 394 | struct mutex lock_mutex; |
352 | struct list_head llist; /* list of byte range locks we have. */ | 395 | struct list_head llist; /* list of byte range locks we have. */ |
353 | bool closePend:1; /* file is marked to close */ | ||
354 | bool invalidHandle:1; /* file closed via session abend */ | 396 | bool invalidHandle:1; /* file closed via session abend */ |
355 | bool oplock_break_cancelled:1; | 397 | bool oplock_break_cancelled:1; |
356 | atomic_t count; /* reference count */ | 398 | atomic_t count; /* reference count */ |
@@ -365,14 +407,7 @@ static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file) | |||
365 | atomic_inc(&cifs_file->count); | 407 | atomic_inc(&cifs_file->count); |
366 | } | 408 | } |
367 | 409 | ||
368 | /* Release a reference on the file private data */ | 410 | void cifsFileInfo_put(struct cifsFileInfo *cifs_file); |
369 | static inline void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | ||
370 | { | ||
371 | if (atomic_dec_and_test(&cifs_file->count)) { | ||
372 | iput(cifs_file->pInode); | ||
373 | kfree(cifs_file); | ||
374 | } | ||
375 | } | ||
376 | 411 | ||
377 | /* | 412 | /* |
378 | * One of these for each file inode | 413 | * One of these for each file inode |
@@ -474,16 +509,16 @@ struct oplock_q_entry { | |||
474 | 509 | ||
475 | /* for pending dnotify requests */ | 510 | /* for pending dnotify requests */ |
476 | struct dir_notify_req { | 511 | struct dir_notify_req { |
477 | struct list_head lhead; | 512 | struct list_head lhead; |
478 | __le16 Pid; | 513 | __le16 Pid; |
479 | __le16 PidHigh; | 514 | __le16 PidHigh; |
480 | __u16 Mid; | 515 | __u16 Mid; |
481 | __u16 Tid; | 516 | __u16 Tid; |
482 | __u16 Uid; | 517 | __u16 Uid; |
483 | __u16 netfid; | 518 | __u16 netfid; |
484 | __u32 filter; /* CompletionFilter (for multishot) */ | 519 | __u32 filter; /* CompletionFilter (for multishot) */ |
485 | int multishot; | 520 | int multishot; |
486 | struct file *pfile; | 521 | struct file *pfile; |
487 | }; | 522 | }; |
488 | 523 | ||
489 | struct dfs_info3_param { | 524 | struct dfs_info3_param { |
@@ -667,7 +702,7 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list; | |||
667 | * the reference counters for the server, smb session, and tcon. Finally, | 702 | * the reference counters for the server, smb session, and tcon. Finally, |
668 | * changes to the tcon->tidStatus should be done while holding this lock. | 703 | * changes to the tcon->tidStatus should be done while holding this lock. |
669 | */ | 704 | */ |
670 | GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; | 705 | GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock; |
671 | 706 | ||
672 | /* | 707 | /* |
673 | * This lock protects the cifs_file->llist and cifs_file->flist | 708 | * This lock protects the cifs_file->llist and cifs_file->flist |
@@ -676,7 +711,7 @@ GLOBAL_EXTERN rwlock_t cifs_tcp_ses_lock; | |||
676 | * If cifs_tcp_ses_lock and the lock below are both needed to be held, then | 711 | * If cifs_tcp_ses_lock and the lock below are both needed to be held, then |
677 | * the cifs_tcp_ses_lock must be grabbed first and released last. | 712 | * the cifs_tcp_ses_lock must be grabbed first and released last. |
678 | */ | 713 | */ |
679 | GLOBAL_EXTERN rwlock_t GlobalSMBSeslock; | 714 | GLOBAL_EXTERN spinlock_t cifs_file_list_lock; |
680 | 715 | ||
681 | /* Outstanding dir notify requests */ | 716 | /* Outstanding dir notify requests */ |
682 | GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; | 717 | GLOBAL_EXTERN struct list_head GlobalDnotifyReqList; |
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 14d036d8db11..b0f4b5656d4c 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h | |||
@@ -663,7 +663,6 @@ struct ntlmv2_resp { | |||
663 | __le64 time; | 663 | __le64 time; |
664 | __u64 client_chal; /* random */ | 664 | __u64 client_chal; /* random */ |
665 | __u32 reserved2; | 665 | __u32 reserved2; |
666 | struct ntlmssp2_name names[2]; | ||
667 | /* array of name entries could follow ending in minimum 4 byte struct */ | 666 | /* array of name entries could follow ending in minimum 4 byte struct */ |
668 | } __attribute__((packed)); | 667 | } __attribute__((packed)); |
669 | 668 | ||
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index 1d60c655e3e0..e593c40ba7ba 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
@@ -78,9 +78,9 @@ extern int checkSMB(struct smb_hdr *smb, __u16 mid, unsigned int length); | |||
78 | extern bool is_valid_oplock_break(struct smb_hdr *smb, | 78 | extern bool is_valid_oplock_break(struct smb_hdr *smb, |
79 | struct TCP_Server_Info *); | 79 | struct TCP_Server_Info *); |
80 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); | 80 | extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof); |
81 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *); | 81 | extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool); |
82 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 82 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
83 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *); | 83 | extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool); |
84 | #endif | 84 | #endif |
85 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); | 85 | extern unsigned int smbCalcSize(struct smb_hdr *ptr); |
86 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); | 86 | extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr); |
@@ -105,12 +105,12 @@ extern u64 cifs_UnixTimeToNT(struct timespec); | |||
105 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, | 105 | extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, |
106 | int offset); | 106 | int offset); |
107 | 107 | ||
108 | extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode, | 108 | extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle, |
109 | __u16 fileHandle, struct file *file, | 109 | struct file *file, struct tcon_link *tlink, |
110 | struct vfsmount *mnt, unsigned int oflags); | 110 | __u32 oplock); |
111 | extern int cifs_posix_open(char *full_path, struct inode **pinode, | 111 | extern int cifs_posix_open(char *full_path, struct inode **pinode, |
112 | struct super_block *sb, | 112 | struct super_block *sb, |
113 | int mode, int oflags, | 113 | int mode, unsigned int f_flags, |
114 | __u32 *poplock, __u16 *pnetfid, int xid); | 114 | __u32 *poplock, __u16 *pnetfid, int xid); |
115 | void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); | 115 | void cifs_fill_uniqueid(struct super_block *sb, struct cifs_fattr *fattr); |
116 | extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, | 116 | extern void cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, |
@@ -362,12 +362,12 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *); | |||
362 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, | 362 | extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *, |
363 | __u32 *); | 363 | __u32 *); |
364 | extern int cifs_verify_signature(struct smb_hdr *, | 364 | extern int cifs_verify_signature(struct smb_hdr *, |
365 | const struct mac_key *mac_key, | 365 | const struct session_key *session_key, |
366 | __u32 expected_sequence_number); | 366 | __u32 expected_sequence_number); |
367 | extern int cifs_calculate_mac_key(struct mac_key *key, const char *rn, | 367 | extern int cifs_calculate_session_key(struct session_key *key, const char *rn, |
368 | const char *pass); | 368 | const char *pass); |
369 | extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); | 369 | extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *); |
370 | extern void setup_ntlmv2_rsp(struct cifsSesInfo *, char *, | 370 | extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *, |
371 | const struct nls_table *); | 371 | const struct nls_table *); |
372 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 372 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
373 | extern void calc_lanman_hash(const char *password, const char *cryptkey, | 373 | extern void calc_lanman_hash(const char *password, const char *cryptkey, |
@@ -408,4 +408,8 @@ extern int CIFSSMBSetPosixACL(const int xid, struct cifsTconInfo *tcon, | |||
408 | extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, | 408 | extern int CIFSGetExtAttr(const int xid, struct cifsTconInfo *tcon, |
409 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); | 409 | const int netfid, __u64 *pExtAttrBits, __u64 *pMask); |
410 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); | 410 | extern void cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb); |
411 | extern bool CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr); | ||
412 | extern int CIFSCheckMFSymlink(struct cifs_fattr *fattr, | ||
413 | const unsigned char *path, | ||
414 | struct cifs_sb_info *cifs_sb, int xid); | ||
411 | #endif /* _CIFSPROTO_H */ | 415 | #endif /* _CIFSPROTO_H */ |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index c65c3419dd37..e98f1f317b15 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
@@ -91,13 +91,13 @@ static void mark_open_files_invalid(struct cifsTconInfo *pTcon) | |||
91 | struct list_head *tmp1; | 91 | struct list_head *tmp1; |
92 | 92 | ||
93 | /* list all files open on tree connection and mark them invalid */ | 93 | /* list all files open on tree connection and mark them invalid */ |
94 | write_lock(&GlobalSMBSeslock); | 94 | spin_lock(&cifs_file_list_lock); |
95 | list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { | 95 | list_for_each_safe(tmp, tmp1, &pTcon->openFileList) { |
96 | open_file = list_entry(tmp, struct cifsFileInfo, tlist); | 96 | open_file = list_entry(tmp, struct cifsFileInfo, tlist); |
97 | open_file->invalidHandle = true; | 97 | open_file->invalidHandle = true; |
98 | open_file->oplock_break_cancelled = true; | 98 | open_file->oplock_break_cancelled = true; |
99 | } | 99 | } |
100 | write_unlock(&GlobalSMBSeslock); | 100 | spin_unlock(&cifs_file_list_lock); |
101 | /* BB Add call to invalidate_inodes(sb) for all superblocks mounted | 101 | /* BB Add call to invalidate_inodes(sb) for all superblocks mounted |
102 | to this tcon */ | 102 | to this tcon */ |
103 | } | 103 | } |
@@ -232,7 +232,7 @@ static int | |||
232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 232 | small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
233 | void **request_buf) | 233 | void **request_buf) |
234 | { | 234 | { |
235 | int rc = 0; | 235 | int rc; |
236 | 236 | ||
237 | rc = cifs_reconnect_tcon(tcon, smb_command); | 237 | rc = cifs_reconnect_tcon(tcon, smb_command); |
238 | if (rc) | 238 | if (rc) |
@@ -250,7 +250,7 @@ small_smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
250 | if (tcon != NULL) | 250 | if (tcon != NULL) |
251 | cifs_stats_inc(&tcon->num_smbs_sent); | 251 | cifs_stats_inc(&tcon->num_smbs_sent); |
252 | 252 | ||
253 | return rc; | 253 | return 0; |
254 | } | 254 | } |
255 | 255 | ||
256 | int | 256 | int |
@@ -281,16 +281,9 @@ small_smb_init_no_tc(const int smb_command, const int wct, | |||
281 | 281 | ||
282 | /* If the return code is zero, this function must fill in request_buf pointer */ | 282 | /* If the return code is zero, this function must fill in request_buf pointer */ |
283 | static int | 283 | static int |
284 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | 284 | __smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, |
285 | void **request_buf /* returned */ , | 285 | void **request_buf, void **response_buf) |
286 | void **response_buf /* returned */ ) | ||
287 | { | 286 | { |
288 | int rc = 0; | ||
289 | |||
290 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
291 | if (rc) | ||
292 | return rc; | ||
293 | |||
294 | *request_buf = cifs_buf_get(); | 287 | *request_buf = cifs_buf_get(); |
295 | if (*request_buf == NULL) { | 288 | if (*request_buf == NULL) { |
296 | /* BB should we add a retry in here if not a writepage? */ | 289 | /* BB should we add a retry in here if not a writepage? */ |
@@ -309,7 +302,31 @@ smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | |||
309 | if (tcon != NULL) | 302 | if (tcon != NULL) |
310 | cifs_stats_inc(&tcon->num_smbs_sent); | 303 | cifs_stats_inc(&tcon->num_smbs_sent); |
311 | 304 | ||
312 | return rc; | 305 | return 0; |
306 | } | ||
307 | |||
308 | /* If the return code is zero, this function must fill in request_buf pointer */ | ||
309 | static int | ||
310 | smb_init(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
311 | void **request_buf, void **response_buf) | ||
312 | { | ||
313 | int rc; | ||
314 | |||
315 | rc = cifs_reconnect_tcon(tcon, smb_command); | ||
316 | if (rc) | ||
317 | return rc; | ||
318 | |||
319 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
320 | } | ||
321 | |||
322 | static int | ||
323 | smb_init_no_reconnect(int smb_command, int wct, struct cifsTconInfo *tcon, | ||
324 | void **request_buf, void **response_buf) | ||
325 | { | ||
326 | if (tcon->ses->need_reconnect || tcon->need_reconnect) | ||
327 | return -EHOSTDOWN; | ||
328 | |||
329 | return __smb_init(smb_command, wct, tcon, request_buf, response_buf); | ||
313 | } | 330 | } |
314 | 331 | ||
315 | static int validate_t2(struct smb_t2_rsp *pSMB) | 332 | static int validate_t2(struct smb_t2_rsp *pSMB) |
@@ -486,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
486 | 503 | ||
487 | if (rsp->EncryptionKeyLength == | 504 | if (rsp->EncryptionKeyLength == |
488 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { | 505 | cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) { |
489 | memcpy(server->cryptKey, rsp->EncryptionKey, | 506 | memcpy(ses->cryptKey, rsp->EncryptionKey, |
490 | CIFS_CRYPTO_KEY_SIZE); | 507 | CIFS_CRYPTO_KEY_SIZE); |
491 | } else if (server->secMode & SECMODE_PW_ENCRYPT) { | 508 | } else if (server->secMode & SECMODE_PW_ENCRYPT) { |
492 | rc = -EIO; /* need cryptkey unless plain text */ | 509 | rc = -EIO; /* need cryptkey unless plain text */ |
@@ -557,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
557 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); | 574 | server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone); |
558 | server->timeAdj *= 60; | 575 | server->timeAdj *= 60; |
559 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { | 576 | if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) { |
560 | memcpy(server->cryptKey, pSMBr->u.EncryptionKey, | 577 | memcpy(ses->cryptKey, pSMBr->u.EncryptionKey, |
561 | CIFS_CRYPTO_KEY_SIZE); | 578 | CIFS_CRYPTO_KEY_SIZE); |
562 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) | 579 | } else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC) |
563 | && (pSMBr->EncryptionKeyLength == 0)) { | 580 | && (pSMBr->EncryptionKeyLength == 0)) { |
@@ -576,9 +593,9 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
576 | rc = -EIO; | 593 | rc = -EIO; |
577 | goto neg_err_exit; | 594 | goto neg_err_exit; |
578 | } | 595 | } |
579 | read_lock(&cifs_tcp_ses_lock); | 596 | spin_lock(&cifs_tcp_ses_lock); |
580 | if (server->srv_count > 1) { | 597 | if (server->srv_count > 1) { |
581 | read_unlock(&cifs_tcp_ses_lock); | 598 | spin_unlock(&cifs_tcp_ses_lock); |
582 | if (memcmp(server->server_GUID, | 599 | if (memcmp(server->server_GUID, |
583 | pSMBr->u.extended_response. | 600 | pSMBr->u.extended_response. |
584 | GUID, 16) != 0) { | 601 | GUID, 16) != 0) { |
@@ -588,7 +605,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
588 | 16); | 605 | 16); |
589 | } | 606 | } |
590 | } else { | 607 | } else { |
591 | read_unlock(&cifs_tcp_ses_lock); | 608 | spin_unlock(&cifs_tcp_ses_lock); |
592 | memcpy(server->server_GUID, | 609 | memcpy(server->server_GUID, |
593 | pSMBr->u.extended_response.GUID, 16); | 610 | pSMBr->u.extended_response.GUID, 16); |
594 | } | 611 | } |
@@ -603,13 +620,15 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses) | |||
603 | rc = 0; | 620 | rc = 0; |
604 | else | 621 | else |
605 | rc = -EINVAL; | 622 | rc = -EINVAL; |
606 | 623 | if (server->secType == Kerberos) { | |
607 | if (server->sec_kerberos || server->sec_mskerberos) | 624 | if (!server->sec_kerberos && |
608 | server->secType = Kerberos; | 625 | !server->sec_mskerberos) |
609 | else if (server->sec_ntlmssp) | 626 | rc = -EOPNOTSUPP; |
610 | server->secType = RawNTLMSSP; | 627 | } else if (server->secType == RawNTLMSSP) { |
611 | else | 628 | if (!server->sec_ntlmssp) |
612 | rc = -EOPNOTSUPP; | 629 | rc = -EOPNOTSUPP; |
630 | } else | ||
631 | rc = -EOPNOTSUPP; | ||
613 | } | 632 | } |
614 | } else | 633 | } else |
615 | server->capabilities &= ~CAP_EXTENDED_SECURITY; | 634 | server->capabilities &= ~CAP_EXTENDED_SECURITY; |
@@ -4534,8 +4553,8 @@ CIFSSMBQFSUnixInfo(const int xid, struct cifsTconInfo *tcon) | |||
4534 | 4553 | ||
4535 | cFYI(1, "In QFSUnixInfo"); | 4554 | cFYI(1, "In QFSUnixInfo"); |
4536 | QFSUnixRetry: | 4555 | QFSUnixRetry: |
4537 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4556 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4538 | (void **) &pSMBr); | 4557 | (void **) &pSMB, (void **) &pSMBr); |
4539 | if (rc) | 4558 | if (rc) |
4540 | return rc; | 4559 | return rc; |
4541 | 4560 | ||
@@ -4604,8 +4623,8 @@ CIFSSMBSetFSUnixInfo(const int xid, struct cifsTconInfo *tcon, __u64 cap) | |||
4604 | cFYI(1, "In SETFSUnixInfo"); | 4623 | cFYI(1, "In SETFSUnixInfo"); |
4605 | SETFSUnixRetry: | 4624 | SETFSUnixRetry: |
4606 | /* BB switch to small buf init to save memory */ | 4625 | /* BB switch to small buf init to save memory */ |
4607 | rc = smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB, | 4626 | rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, tcon, |
4608 | (void **) &pSMBr); | 4627 | (void **) &pSMB, (void **) &pSMBr); |
4609 | if (rc) | 4628 | if (rc) |
4610 | return rc; | 4629 | return rc; |
4611 | 4630 | ||
diff --git a/fs/cifs/cn_cifs.h b/fs/cifs/cn_cifs.h deleted file mode 100644 index ea59ccac2eb1..000000000000 --- a/fs/cifs/cn_cifs.h +++ /dev/null | |||
@@ -1,37 +0,0 @@ | |||
1 | /* | ||
2 | * fs/cifs/cn_cifs.h | ||
3 | * | ||
4 | * Copyright (c) International Business Machines Corp., 2002 | ||
5 | * Author(s): Steve French (sfrench@us.ibm.com) | ||
6 | * | ||
7 | * This library is free software; you can redistribute it and/or modify | ||
8 | * it under the terms of the GNU Lesser General Public License as published | ||
9 | * by the Free Software Foundation; either version 2.1 of the License, or | ||
10 | * (at your option) any later version. | ||
11 | * | ||
12 | * This library is distributed in the hope that it will be useful, | ||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See | ||
15 | * the GNU Lesser General Public License for more details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU Lesser General Public License | ||
18 | * along with this library; if not, write to the Free Software | ||
19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
20 | */ | ||
21 | |||
22 | #ifndef _CN_CIFS_H | ||
23 | #define _CN_CIFS_H | ||
24 | #ifdef CONFIG_CIFS_UPCALL | ||
25 | #include <linux/types.h> | ||
26 | #include <linux/connector.h> | ||
27 | |||
28 | struct cifs_upcall { | ||
29 | char signature[4]; /* CIFS */ | ||
30 | enum command { | ||
31 | CIFS_GET_IP = 0x00000001, /* get ip address for hostname */ | ||
32 | CIFS_GET_SECBLOB = 0x00000002, /* get SPNEGO wrapped blob */ | ||
33 | } command; | ||
34 | /* union cifs upcall data follows */ | ||
35 | }; | ||
36 | #endif /* CIFS_UPCALL */ | ||
37 | #endif /* _CN_CIFS_H */ | ||
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 88c84a38bccb..7e73176acb58 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -47,7 +47,6 @@ | |||
47 | #include "ntlmssp.h" | 47 | #include "ntlmssp.h" |
48 | #include "nterr.h" | 48 | #include "nterr.h" |
49 | #include "rfc1002pdu.h" | 49 | #include "rfc1002pdu.h" |
50 | #include "cn_cifs.h" | ||
51 | #include "fscache.h" | 50 | #include "fscache.h" |
52 | 51 | ||
53 | #define CIFS_PORT 445 | 52 | #define CIFS_PORT 445 |
@@ -100,16 +99,24 @@ struct smb_vol { | |||
100 | bool noautotune:1; | 99 | bool noautotune:1; |
101 | bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ | 100 | bool nostrictsync:1; /* do not force expensive SMBflush on every sync */ |
102 | bool fsc:1; /* enable fscache */ | 101 | bool fsc:1; /* enable fscache */ |
102 | bool mfsymlinks:1; /* use Minshall+French Symlinks */ | ||
103 | bool multiuser:1; | ||
103 | unsigned int rsize; | 104 | unsigned int rsize; |
104 | unsigned int wsize; | 105 | unsigned int wsize; |
105 | bool sockopt_tcp_nodelay:1; | 106 | bool sockopt_tcp_nodelay:1; |
106 | unsigned short int port; | 107 | unsigned short int port; |
107 | char *prepath; | 108 | char *prepath; |
109 | struct sockaddr_storage srcaddr; /* allow binding to a local IP */ | ||
108 | struct nls_table *local_nls; | 110 | struct nls_table *local_nls; |
109 | }; | 111 | }; |
110 | 112 | ||
113 | /* FIXME: should these be tunable? */ | ||
114 | #define TLINK_ERROR_EXPIRE (1 * HZ) | ||
115 | #define TLINK_IDLE_EXPIRE (600 * HZ) | ||
116 | |||
111 | static int ipv4_connect(struct TCP_Server_Info *server); | 117 | static int ipv4_connect(struct TCP_Server_Info *server); |
112 | static int ipv6_connect(struct TCP_Server_Info *server); | 118 | static int ipv6_connect(struct TCP_Server_Info *server); |
119 | static void cifs_prune_tlinks(struct work_struct *work); | ||
113 | 120 | ||
114 | /* | 121 | /* |
115 | * cifs tcp session reconnection | 122 | * cifs tcp session reconnection |
@@ -143,7 +150,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
143 | 150 | ||
144 | /* before reconnecting the tcp session, mark the smb session (uid) | 151 | /* before reconnecting the tcp session, mark the smb session (uid) |
145 | and the tid bad so they are not used until reconnected */ | 152 | and the tid bad so they are not used until reconnected */ |
146 | read_lock(&cifs_tcp_ses_lock); | 153 | spin_lock(&cifs_tcp_ses_lock); |
147 | list_for_each(tmp, &server->smb_ses_list) { | 154 | list_for_each(tmp, &server->smb_ses_list) { |
148 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); | 155 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); |
149 | ses->need_reconnect = true; | 156 | ses->need_reconnect = true; |
@@ -153,7 +160,7 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
153 | tcon->need_reconnect = true; | 160 | tcon->need_reconnect = true; |
154 | } | 161 | } |
155 | } | 162 | } |
156 | read_unlock(&cifs_tcp_ses_lock); | 163 | spin_unlock(&cifs_tcp_ses_lock); |
157 | /* do not want to be sending data on a socket we are freeing */ | 164 | /* do not want to be sending data on a socket we are freeing */ |
158 | mutex_lock(&server->srv_mutex); | 165 | mutex_lock(&server->srv_mutex); |
159 | if (server->ssocket) { | 166 | if (server->ssocket) { |
@@ -166,6 +173,8 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
166 | sock_release(server->ssocket); | 173 | sock_release(server->ssocket); |
167 | server->ssocket = NULL; | 174 | server->ssocket = NULL; |
168 | } | 175 | } |
176 | server->sequence_number = 0; | ||
177 | server->session_estab = false; | ||
169 | 178 | ||
170 | spin_lock(&GlobalMid_Lock); | 179 | spin_lock(&GlobalMid_Lock); |
171 | list_for_each(tmp, &server->pending_mid_q) { | 180 | list_for_each(tmp, &server->pending_mid_q) { |
@@ -198,7 +207,6 @@ cifs_reconnect(struct TCP_Server_Info *server) | |||
198 | spin_lock(&GlobalMid_Lock); | 207 | spin_lock(&GlobalMid_Lock); |
199 | if (server->tcpStatus != CifsExiting) | 208 | if (server->tcpStatus != CifsExiting) |
200 | server->tcpStatus = CifsGood; | 209 | server->tcpStatus = CifsGood; |
201 | server->sequence_number = 0; | ||
202 | spin_unlock(&GlobalMid_Lock); | 210 | spin_unlock(&GlobalMid_Lock); |
203 | /* atomic_set(&server->inFlight,0);*/ | 211 | /* atomic_set(&server->inFlight,0);*/ |
204 | wake_up(&server->response_q); | 212 | wake_up(&server->response_q); |
@@ -629,9 +637,9 @@ multi_t2_fnd: | |||
629 | } /* end while !EXITING */ | 637 | } /* end while !EXITING */ |
630 | 638 | ||
631 | /* take it off the list, if it's not already */ | 639 | /* take it off the list, if it's not already */ |
632 | write_lock(&cifs_tcp_ses_lock); | 640 | spin_lock(&cifs_tcp_ses_lock); |
633 | list_del_init(&server->tcp_ses_list); | 641 | list_del_init(&server->tcp_ses_list); |
634 | write_unlock(&cifs_tcp_ses_lock); | 642 | spin_unlock(&cifs_tcp_ses_lock); |
635 | 643 | ||
636 | spin_lock(&GlobalMid_Lock); | 644 | spin_lock(&GlobalMid_Lock); |
637 | server->tcpStatus = CifsExiting; | 645 | server->tcpStatus = CifsExiting; |
@@ -669,7 +677,7 @@ multi_t2_fnd: | |||
669 | * BB: we shouldn't have to do any of this. It shouldn't be | 677 | * BB: we shouldn't have to do any of this. It shouldn't be |
670 | * possible to exit from the thread with active SMB sessions | 678 | * possible to exit from the thread with active SMB sessions |
671 | */ | 679 | */ |
672 | read_lock(&cifs_tcp_ses_lock); | 680 | spin_lock(&cifs_tcp_ses_lock); |
673 | if (list_empty(&server->pending_mid_q)) { | 681 | if (list_empty(&server->pending_mid_q)) { |
674 | /* loop through server session structures attached to this and | 682 | /* loop through server session structures attached to this and |
675 | mark them dead */ | 683 | mark them dead */ |
@@ -679,7 +687,7 @@ multi_t2_fnd: | |||
679 | ses->status = CifsExiting; | 687 | ses->status = CifsExiting; |
680 | ses->server = NULL; | 688 | ses->server = NULL; |
681 | } | 689 | } |
682 | read_unlock(&cifs_tcp_ses_lock); | 690 | spin_unlock(&cifs_tcp_ses_lock); |
683 | } else { | 691 | } else { |
684 | /* although we can not zero the server struct pointer yet, | 692 | /* although we can not zero the server struct pointer yet, |
685 | since there are active requests which may depnd on them, | 693 | since there are active requests which may depnd on them, |
@@ -702,7 +710,7 @@ multi_t2_fnd: | |||
702 | } | 710 | } |
703 | } | 711 | } |
704 | spin_unlock(&GlobalMid_Lock); | 712 | spin_unlock(&GlobalMid_Lock); |
705 | read_unlock(&cifs_tcp_ses_lock); | 713 | spin_unlock(&cifs_tcp_ses_lock); |
706 | /* 1/8th of sec is more than enough time for them to exit */ | 714 | /* 1/8th of sec is more than enough time for them to exit */ |
707 | msleep(125); | 715 | msleep(125); |
708 | } | 716 | } |
@@ -725,12 +733,12 @@ multi_t2_fnd: | |||
725 | if a crazy root user tried to kill cifsd | 733 | if a crazy root user tried to kill cifsd |
726 | kernel thread explicitly this might happen) */ | 734 | kernel thread explicitly this might happen) */ |
727 | /* BB: This shouldn't be necessary, see above */ | 735 | /* BB: This shouldn't be necessary, see above */ |
728 | read_lock(&cifs_tcp_ses_lock); | 736 | spin_lock(&cifs_tcp_ses_lock); |
729 | list_for_each(tmp, &server->smb_ses_list) { | 737 | list_for_each(tmp, &server->smb_ses_list) { |
730 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); | 738 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); |
731 | ses->server = NULL; | 739 | ses->server = NULL; |
732 | } | 740 | } |
733 | read_unlock(&cifs_tcp_ses_lock); | 741 | spin_unlock(&cifs_tcp_ses_lock); |
734 | 742 | ||
735 | kfree(server->hostname); | 743 | kfree(server->hostname); |
736 | task_to_wake = xchg(&server->tsk, NULL); | 744 | task_to_wake = xchg(&server->tsk, NULL); |
@@ -1046,6 +1054,22 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1046 | "long\n"); | 1054 | "long\n"); |
1047 | return 1; | 1055 | return 1; |
1048 | } | 1056 | } |
1057 | } else if (strnicmp(data, "srcaddr", 7) == 0) { | ||
1058 | vol->srcaddr.ss_family = AF_UNSPEC; | ||
1059 | |||
1060 | if (!value || !*value) { | ||
1061 | printk(KERN_WARNING "CIFS: srcaddr value" | ||
1062 | " not specified.\n"); | ||
1063 | return 1; /* needs_arg; */ | ||
1064 | } | ||
1065 | i = cifs_convert_address((struct sockaddr *)&vol->srcaddr, | ||
1066 | value, strlen(value)); | ||
1067 | if (i < 0) { | ||
1068 | printk(KERN_WARNING "CIFS: Could not parse" | ||
1069 | " srcaddr: %s\n", | ||
1070 | value); | ||
1071 | return 1; | ||
1072 | } | ||
1049 | } else if (strnicmp(data, "prefixpath", 10) == 0) { | 1073 | } else if (strnicmp(data, "prefixpath", 10) == 0) { |
1050 | if (!value || !*value) { | 1074 | if (!value || !*value) { |
1051 | printk(KERN_WARNING | 1075 | printk(KERN_WARNING |
@@ -1325,6 +1349,10 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1325 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); | 1349 | "/proc/fs/cifs/LookupCacheEnabled to 0\n"); |
1326 | } else if (strnicmp(data, "fsc", 3) == 0) { | 1350 | } else if (strnicmp(data, "fsc", 3) == 0) { |
1327 | vol->fsc = true; | 1351 | vol->fsc = true; |
1352 | } else if (strnicmp(data, "mfsymlinks", 10) == 0) { | ||
1353 | vol->mfsymlinks = true; | ||
1354 | } else if (strnicmp(data, "multiuser", 8) == 0) { | ||
1355 | vol->multiuser = true; | ||
1328 | } else | 1356 | } else |
1329 | printk(KERN_WARNING "CIFS: Unknown mount option %s\n", | 1357 | printk(KERN_WARNING "CIFS: Unknown mount option %s\n", |
1330 | data); | 1358 | data); |
@@ -1356,6 +1384,13 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1356 | return 1; | 1384 | return 1; |
1357 | } | 1385 | } |
1358 | } | 1386 | } |
1387 | |||
1388 | if (vol->multiuser && !(vol->secFlg & CIFSSEC_MAY_KRB5)) { | ||
1389 | cERROR(1, "Multiuser mounts currently require krb5 " | ||
1390 | "authentication!"); | ||
1391 | return 1; | ||
1392 | } | ||
1393 | |||
1359 | if (vol->UNCip == NULL) | 1394 | if (vol->UNCip == NULL) |
1360 | vol->UNCip = &vol->UNC[2]; | 1395 | vol->UNCip = &vol->UNC[2]; |
1361 | 1396 | ||
@@ -1374,8 +1409,36 @@ cifs_parse_mount_options(char *options, const char *devname, | |||
1374 | return 0; | 1409 | return 0; |
1375 | } | 1410 | } |
1376 | 1411 | ||
1412 | /** Returns true if srcaddr isn't specified and rhs isn't | ||
1413 | * specified, or if srcaddr is specified and | ||
1414 | * matches the IP address of the rhs argument. | ||
1415 | */ | ||
1416 | static bool | ||
1417 | srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs) | ||
1418 | { | ||
1419 | switch (srcaddr->sa_family) { | ||
1420 | case AF_UNSPEC: | ||
1421 | return (rhs->sa_family == AF_UNSPEC); | ||
1422 | case AF_INET: { | ||
1423 | struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr; | ||
1424 | struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs; | ||
1425 | return (saddr4->sin_addr.s_addr == vaddr4->sin_addr.s_addr); | ||
1426 | } | ||
1427 | case AF_INET6: { | ||
1428 | struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; | ||
1429 | struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; | ||
1430 | return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); | ||
1431 | } | ||
1432 | default: | ||
1433 | WARN_ON(1); | ||
1434 | return false; /* don't expect to be here */ | ||
1435 | } | ||
1436 | } | ||
1437 | |||
1438 | |||
1377 | static bool | 1439 | static bool |
1378 | match_address(struct TCP_Server_Info *server, struct sockaddr *addr) | 1440 | match_address(struct TCP_Server_Info *server, struct sockaddr *addr, |
1441 | struct sockaddr *srcaddr) | ||
1379 | { | 1442 | { |
1380 | struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; | 1443 | struct sockaddr_in *addr4 = (struct sockaddr_in *)addr; |
1381 | struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; | 1444 | struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)addr; |
@@ -1402,6 +1465,9 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr) | |||
1402 | break; | 1465 | break; |
1403 | } | 1466 | } |
1404 | 1467 | ||
1468 | if (!srcip_matches(srcaddr, (struct sockaddr *)&server->srcaddr)) | ||
1469 | return false; | ||
1470 | |||
1405 | return true; | 1471 | return true; |
1406 | } | 1472 | } |
1407 | 1473 | ||
@@ -1458,29 +1524,21 @@ cifs_find_tcp_session(struct sockaddr *addr, struct smb_vol *vol) | |||
1458 | { | 1524 | { |
1459 | struct TCP_Server_Info *server; | 1525 | struct TCP_Server_Info *server; |
1460 | 1526 | ||
1461 | write_lock(&cifs_tcp_ses_lock); | 1527 | spin_lock(&cifs_tcp_ses_lock); |
1462 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { | 1528 | list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { |
1463 | /* | 1529 | if (!match_address(server, addr, |
1464 | * the demux thread can exit on its own while still in CifsNew | 1530 | (struct sockaddr *)&vol->srcaddr)) |
1465 | * so don't accept any sockets in that state. Since the | ||
1466 | * tcpStatus never changes back to CifsNew it's safe to check | ||
1467 | * for this without a lock. | ||
1468 | */ | ||
1469 | if (server->tcpStatus == CifsNew) | ||
1470 | continue; | ||
1471 | |||
1472 | if (!match_address(server, addr)) | ||
1473 | continue; | 1531 | continue; |
1474 | 1532 | ||
1475 | if (!match_security(server, vol)) | 1533 | if (!match_security(server, vol)) |
1476 | continue; | 1534 | continue; |
1477 | 1535 | ||
1478 | ++server->srv_count; | 1536 | ++server->srv_count; |
1479 | write_unlock(&cifs_tcp_ses_lock); | 1537 | spin_unlock(&cifs_tcp_ses_lock); |
1480 | cFYI(1, "Existing tcp session with server found"); | 1538 | cFYI(1, "Existing tcp session with server found"); |
1481 | return server; | 1539 | return server; |
1482 | } | 1540 | } |
1483 | write_unlock(&cifs_tcp_ses_lock); | 1541 | spin_unlock(&cifs_tcp_ses_lock); |
1484 | return NULL; | 1542 | return NULL; |
1485 | } | 1543 | } |
1486 | 1544 | ||
@@ -1489,14 +1547,14 @@ cifs_put_tcp_session(struct TCP_Server_Info *server) | |||
1489 | { | 1547 | { |
1490 | struct task_struct *task; | 1548 | struct task_struct *task; |
1491 | 1549 | ||
1492 | write_lock(&cifs_tcp_ses_lock); | 1550 | spin_lock(&cifs_tcp_ses_lock); |
1493 | if (--server->srv_count > 0) { | 1551 | if (--server->srv_count > 0) { |
1494 | write_unlock(&cifs_tcp_ses_lock); | 1552 | spin_unlock(&cifs_tcp_ses_lock); |
1495 | return; | 1553 | return; |
1496 | } | 1554 | } |
1497 | 1555 | ||
1498 | list_del_init(&server->tcp_ses_list); | 1556 | list_del_init(&server->tcp_ses_list); |
1499 | write_unlock(&cifs_tcp_ses_lock); | 1557 | spin_unlock(&cifs_tcp_ses_lock); |
1500 | 1558 | ||
1501 | spin_lock(&GlobalMid_Lock); | 1559 | spin_lock(&GlobalMid_Lock); |
1502 | server->tcpStatus = CifsExiting; | 1560 | server->tcpStatus = CifsExiting; |
@@ -1574,6 +1632,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1574 | volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); | 1632 | volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); |
1575 | memcpy(tcp_ses->server_RFC1001_name, | 1633 | memcpy(tcp_ses->server_RFC1001_name, |
1576 | volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); | 1634 | volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL); |
1635 | tcp_ses->session_estab = false; | ||
1577 | tcp_ses->sequence_number = 0; | 1636 | tcp_ses->sequence_number = 0; |
1578 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); | 1637 | INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); |
1579 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); | 1638 | INIT_LIST_HEAD(&tcp_ses->smb_ses_list); |
@@ -1584,6 +1643,8 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1584 | * no need to spinlock this init of tcpStatus or srv_count | 1643 | * no need to spinlock this init of tcpStatus or srv_count |
1585 | */ | 1644 | */ |
1586 | tcp_ses->tcpStatus = CifsNew; | 1645 | tcp_ses->tcpStatus = CifsNew; |
1646 | memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr, | ||
1647 | sizeof(tcp_ses->srcaddr)); | ||
1587 | ++tcp_ses->srv_count; | 1648 | ++tcp_ses->srv_count; |
1588 | 1649 | ||
1589 | if (addr.ss_family == AF_INET6) { | 1650 | if (addr.ss_family == AF_INET6) { |
@@ -1618,9 +1679,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info) | |||
1618 | } | 1679 | } |
1619 | 1680 | ||
1620 | /* thread spawned, put it on the list */ | 1681 | /* thread spawned, put it on the list */ |
1621 | write_lock(&cifs_tcp_ses_lock); | 1682 | spin_lock(&cifs_tcp_ses_lock); |
1622 | list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); | 1683 | list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list); |
1623 | write_unlock(&cifs_tcp_ses_lock); | 1684 | spin_unlock(&cifs_tcp_ses_lock); |
1624 | 1685 | ||
1625 | cifs_fscache_get_client_cookie(tcp_ses); | 1686 | cifs_fscache_get_client_cookie(tcp_ses); |
1626 | 1687 | ||
@@ -1642,7 +1703,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
1642 | { | 1703 | { |
1643 | struct cifsSesInfo *ses; | 1704 | struct cifsSesInfo *ses; |
1644 | 1705 | ||
1645 | write_lock(&cifs_tcp_ses_lock); | 1706 | spin_lock(&cifs_tcp_ses_lock); |
1646 | list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { | 1707 | list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { |
1647 | switch (server->secType) { | 1708 | switch (server->secType) { |
1648 | case Kerberos: | 1709 | case Kerberos: |
@@ -1662,10 +1723,10 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol) | |||
1662 | continue; | 1723 | continue; |
1663 | } | 1724 | } |
1664 | ++ses->ses_count; | 1725 | ++ses->ses_count; |
1665 | write_unlock(&cifs_tcp_ses_lock); | 1726 | spin_unlock(&cifs_tcp_ses_lock); |
1666 | return ses; | 1727 | return ses; |
1667 | } | 1728 | } |
1668 | write_unlock(&cifs_tcp_ses_lock); | 1729 | spin_unlock(&cifs_tcp_ses_lock); |
1669 | return NULL; | 1730 | return NULL; |
1670 | } | 1731 | } |
1671 | 1732 | ||
@@ -1676,14 +1737,14 @@ cifs_put_smb_ses(struct cifsSesInfo *ses) | |||
1676 | struct TCP_Server_Info *server = ses->server; | 1737 | struct TCP_Server_Info *server = ses->server; |
1677 | 1738 | ||
1678 | cFYI(1, "%s: ses_count=%d\n", __func__, ses->ses_count); | 1739 | cFYI(1, "%s: ses_count=%d\n", __func__, ses->ses_count); |
1679 | write_lock(&cifs_tcp_ses_lock); | 1740 | spin_lock(&cifs_tcp_ses_lock); |
1680 | if (--ses->ses_count > 0) { | 1741 | if (--ses->ses_count > 0) { |
1681 | write_unlock(&cifs_tcp_ses_lock); | 1742 | spin_unlock(&cifs_tcp_ses_lock); |
1682 | return; | 1743 | return; |
1683 | } | 1744 | } |
1684 | 1745 | ||
1685 | list_del_init(&ses->smb_ses_list); | 1746 | list_del_init(&ses->smb_ses_list); |
1686 | write_unlock(&cifs_tcp_ses_lock); | 1747 | spin_unlock(&cifs_tcp_ses_lock); |
1687 | 1748 | ||
1688 | if (ses->status == CifsGood) { | 1749 | if (ses->status == CifsGood) { |
1689 | xid = GetXid(); | 1750 | xid = GetXid(); |
@@ -1740,6 +1801,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1740 | if (ses == NULL) | 1801 | if (ses == NULL) |
1741 | goto get_ses_fail; | 1802 | goto get_ses_fail; |
1742 | 1803 | ||
1804 | ses->tilen = 0; | ||
1805 | ses->tiblob = NULL; | ||
1743 | /* new SMB session uses our server ref */ | 1806 | /* new SMB session uses our server ref */ |
1744 | ses->server = server; | 1807 | ses->server = server; |
1745 | if (server->addr.sockAddr6.sin6_family == AF_INET6) | 1808 | if (server->addr.sockAddr6.sin6_family == AF_INET6) |
@@ -1778,9 +1841,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info) | |||
1778 | goto get_ses_fail; | 1841 | goto get_ses_fail; |
1779 | 1842 | ||
1780 | /* success, put it on the list */ | 1843 | /* success, put it on the list */ |
1781 | write_lock(&cifs_tcp_ses_lock); | 1844 | spin_lock(&cifs_tcp_ses_lock); |
1782 | list_add(&ses->smb_ses_list, &server->smb_ses_list); | 1845 | list_add(&ses->smb_ses_list, &server->smb_ses_list); |
1783 | write_unlock(&cifs_tcp_ses_lock); | 1846 | spin_unlock(&cifs_tcp_ses_lock); |
1784 | 1847 | ||
1785 | FreeXid(xid); | 1848 | FreeXid(xid); |
1786 | return ses; | 1849 | return ses; |
@@ -1797,7 +1860,7 @@ cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) | |||
1797 | struct list_head *tmp; | 1860 | struct list_head *tmp; |
1798 | struct cifsTconInfo *tcon; | 1861 | struct cifsTconInfo *tcon; |
1799 | 1862 | ||
1800 | write_lock(&cifs_tcp_ses_lock); | 1863 | spin_lock(&cifs_tcp_ses_lock); |
1801 | list_for_each(tmp, &ses->tcon_list) { | 1864 | list_for_each(tmp, &ses->tcon_list) { |
1802 | tcon = list_entry(tmp, struct cifsTconInfo, tcon_list); | 1865 | tcon = list_entry(tmp, struct cifsTconInfo, tcon_list); |
1803 | if (tcon->tidStatus == CifsExiting) | 1866 | if (tcon->tidStatus == CifsExiting) |
@@ -1806,10 +1869,10 @@ cifs_find_tcon(struct cifsSesInfo *ses, const char *unc) | |||
1806 | continue; | 1869 | continue; |
1807 | 1870 | ||
1808 | ++tcon->tc_count; | 1871 | ++tcon->tc_count; |
1809 | write_unlock(&cifs_tcp_ses_lock); | 1872 | spin_unlock(&cifs_tcp_ses_lock); |
1810 | return tcon; | 1873 | return tcon; |
1811 | } | 1874 | } |
1812 | write_unlock(&cifs_tcp_ses_lock); | 1875 | spin_unlock(&cifs_tcp_ses_lock); |
1813 | return NULL; | 1876 | return NULL; |
1814 | } | 1877 | } |
1815 | 1878 | ||
@@ -1820,14 +1883,14 @@ cifs_put_tcon(struct cifsTconInfo *tcon) | |||
1820 | struct cifsSesInfo *ses = tcon->ses; | 1883 | struct cifsSesInfo *ses = tcon->ses; |
1821 | 1884 | ||
1822 | cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count); | 1885 | cFYI(1, "%s: tc_count=%d\n", __func__, tcon->tc_count); |
1823 | write_lock(&cifs_tcp_ses_lock); | 1886 | spin_lock(&cifs_tcp_ses_lock); |
1824 | if (--tcon->tc_count > 0) { | 1887 | if (--tcon->tc_count > 0) { |
1825 | write_unlock(&cifs_tcp_ses_lock); | 1888 | spin_unlock(&cifs_tcp_ses_lock); |
1826 | return; | 1889 | return; |
1827 | } | 1890 | } |
1828 | 1891 | ||
1829 | list_del_init(&tcon->tcon_list); | 1892 | list_del_init(&tcon->tcon_list); |
1830 | write_unlock(&cifs_tcp_ses_lock); | 1893 | spin_unlock(&cifs_tcp_ses_lock); |
1831 | 1894 | ||
1832 | xid = GetXid(); | 1895 | xid = GetXid(); |
1833 | CIFSSMBTDis(xid, tcon); | 1896 | CIFSSMBTDis(xid, tcon); |
@@ -1900,9 +1963,9 @@ cifs_get_tcon(struct cifsSesInfo *ses, struct smb_vol *volume_info) | |||
1900 | tcon->nocase = volume_info->nocase; | 1963 | tcon->nocase = volume_info->nocase; |
1901 | tcon->local_lease = volume_info->local_lease; | 1964 | tcon->local_lease = volume_info->local_lease; |
1902 | 1965 | ||
1903 | write_lock(&cifs_tcp_ses_lock); | 1966 | spin_lock(&cifs_tcp_ses_lock); |
1904 | list_add(&tcon->tcon_list, &ses->tcon_list); | 1967 | list_add(&tcon->tcon_list, &ses->tcon_list); |
1905 | write_unlock(&cifs_tcp_ses_lock); | 1968 | spin_unlock(&cifs_tcp_ses_lock); |
1906 | 1969 | ||
1907 | cifs_fscache_get_super_cookie(tcon); | 1970 | cifs_fscache_get_super_cookie(tcon); |
1908 | 1971 | ||
@@ -1913,6 +1976,23 @@ out_fail: | |||
1913 | return ERR_PTR(rc); | 1976 | return ERR_PTR(rc); |
1914 | } | 1977 | } |
1915 | 1978 | ||
1979 | void | ||
1980 | cifs_put_tlink(struct tcon_link *tlink) | ||
1981 | { | ||
1982 | if (!tlink || IS_ERR(tlink)) | ||
1983 | return; | ||
1984 | |||
1985 | if (!atomic_dec_and_test(&tlink->tl_count) || | ||
1986 | test_bit(TCON_LINK_IN_TREE, &tlink->tl_flags)) { | ||
1987 | tlink->tl_time = jiffies; | ||
1988 | return; | ||
1989 | } | ||
1990 | |||
1991 | if (!IS_ERR(tlink_tcon(tlink))) | ||
1992 | cifs_put_tcon(tlink_tcon(tlink)); | ||
1993 | kfree(tlink); | ||
1994 | return; | ||
1995 | } | ||
1916 | 1996 | ||
1917 | int | 1997 | int |
1918 | get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, | 1998 | get_dfs_path(int xid, struct cifsSesInfo *pSesInfo, const char *old_path, |
@@ -1997,6 +2077,33 @@ static void rfc1002mangle(char *target, char *source, unsigned int length) | |||
1997 | 2077 | ||
1998 | } | 2078 | } |
1999 | 2079 | ||
2080 | static int | ||
2081 | bind_socket(struct TCP_Server_Info *server) | ||
2082 | { | ||
2083 | int rc = 0; | ||
2084 | if (server->srcaddr.ss_family != AF_UNSPEC) { | ||
2085 | /* Bind to the specified local IP address */ | ||
2086 | struct socket *socket = server->ssocket; | ||
2087 | rc = socket->ops->bind(socket, | ||
2088 | (struct sockaddr *) &server->srcaddr, | ||
2089 | sizeof(server->srcaddr)); | ||
2090 | if (rc < 0) { | ||
2091 | struct sockaddr_in *saddr4; | ||
2092 | struct sockaddr_in6 *saddr6; | ||
2093 | saddr4 = (struct sockaddr_in *)&server->srcaddr; | ||
2094 | saddr6 = (struct sockaddr_in6 *)&server->srcaddr; | ||
2095 | if (saddr6->sin6_family == AF_INET6) | ||
2096 | cERROR(1, "cifs: " | ||
2097 | "Failed to bind to: %pI6c, error: %d\n", | ||
2098 | &saddr6->sin6_addr, rc); | ||
2099 | else | ||
2100 | cERROR(1, "cifs: " | ||
2101 | "Failed to bind to: %pI4, error: %d\n", | ||
2102 | &saddr4->sin_addr.s_addr, rc); | ||
2103 | } | ||
2104 | } | ||
2105 | return rc; | ||
2106 | } | ||
2000 | 2107 | ||
2001 | static int | 2108 | static int |
2002 | ipv4_connect(struct TCP_Server_Info *server) | 2109 | ipv4_connect(struct TCP_Server_Info *server) |
@@ -2022,6 +2129,10 @@ ipv4_connect(struct TCP_Server_Info *server) | |||
2022 | cifs_reclassify_socket4(socket); | 2129 | cifs_reclassify_socket4(socket); |
2023 | } | 2130 | } |
2024 | 2131 | ||
2132 | rc = bind_socket(server); | ||
2133 | if (rc < 0) | ||
2134 | return rc; | ||
2135 | |||
2025 | /* user overrode default port */ | 2136 | /* user overrode default port */ |
2026 | if (server->addr.sockAddr.sin_port) { | 2137 | if (server->addr.sockAddr.sin_port) { |
2027 | rc = socket->ops->connect(socket, (struct sockaddr *) | 2138 | rc = socket->ops->connect(socket, (struct sockaddr *) |
@@ -2184,6 +2295,10 @@ ipv6_connect(struct TCP_Server_Info *server) | |||
2184 | cifs_reclassify_socket6(socket); | 2295 | cifs_reclassify_socket6(socket); |
2185 | } | 2296 | } |
2186 | 2297 | ||
2298 | rc = bind_socket(server); | ||
2299 | if (rc < 0) | ||
2300 | return rc; | ||
2301 | |||
2187 | /* user overrode default port */ | 2302 | /* user overrode default port */ |
2188 | if (server->addr.sockAddr6.sin6_port) { | 2303 | if (server->addr.sockAddr6.sin6_port) { |
2189 | rc = socket->ops->connect(socket, | 2304 | rc = socket->ops->connect(socket, |
@@ -2383,6 +2498,8 @@ convert_delimiter(char *path, char delim) | |||
2383 | static void setup_cifs_sb(struct smb_vol *pvolume_info, | 2498 | static void setup_cifs_sb(struct smb_vol *pvolume_info, |
2384 | struct cifs_sb_info *cifs_sb) | 2499 | struct cifs_sb_info *cifs_sb) |
2385 | { | 2500 | { |
2501 | INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks); | ||
2502 | |||
2386 | if (pvolume_info->rsize > CIFSMaxBufSize) { | 2503 | if (pvolume_info->rsize > CIFSMaxBufSize) { |
2387 | cERROR(1, "rsize %d too large, using MaxBufSize", | 2504 | cERROR(1, "rsize %d too large, using MaxBufSize", |
2388 | pvolume_info->rsize); | 2505 | pvolume_info->rsize); |
@@ -2462,10 +2579,21 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info, | |||
2462 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; | 2579 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM; |
2463 | if (pvolume_info->fsc) | 2580 | if (pvolume_info->fsc) |
2464 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; | 2581 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE; |
2582 | if (pvolume_info->multiuser) | ||
2583 | cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER | | ||
2584 | CIFS_MOUNT_NO_PERM); | ||
2465 | if (pvolume_info->direct_io) { | 2585 | if (pvolume_info->direct_io) { |
2466 | cFYI(1, "mounting share using direct i/o"); | 2586 | cFYI(1, "mounting share using direct i/o"); |
2467 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; | 2587 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO; |
2468 | } | 2588 | } |
2589 | if (pvolume_info->mfsymlinks) { | ||
2590 | if (pvolume_info->sfu_emul) { | ||
2591 | cERROR(1, "mount option mfsymlinks ignored if sfu " | ||
2592 | "mount option is used"); | ||
2593 | } else { | ||
2594 | cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS; | ||
2595 | } | ||
2596 | } | ||
2469 | 2597 | ||
2470 | if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) | 2598 | if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm)) |
2471 | cERROR(1, "mount option dynperm ignored if cifsacl " | 2599 | cERROR(1, "mount option dynperm ignored if cifsacl " |
@@ -2552,6 +2680,7 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb, | |||
2552 | struct TCP_Server_Info *srvTcp; | 2680 | struct TCP_Server_Info *srvTcp; |
2553 | char *full_path; | 2681 | char *full_path; |
2554 | char *mount_data = mount_data_global; | 2682 | char *mount_data = mount_data_global; |
2683 | struct tcon_link *tlink; | ||
2555 | #ifdef CONFIG_CIFS_DFS_UPCALL | 2684 | #ifdef CONFIG_CIFS_DFS_UPCALL |
2556 | struct dfs_info3_param *referrals = NULL; | 2685 | struct dfs_info3_param *referrals = NULL; |
2557 | unsigned int num_referrals = 0; | 2686 | unsigned int num_referrals = 0; |
@@ -2563,6 +2692,7 @@ try_mount_again: | |||
2563 | pSesInfo = NULL; | 2692 | pSesInfo = NULL; |
2564 | srvTcp = NULL; | 2693 | srvTcp = NULL; |
2565 | full_path = NULL; | 2694 | full_path = NULL; |
2695 | tlink = NULL; | ||
2566 | 2696 | ||
2567 | xid = GetXid(); | 2697 | xid = GetXid(); |
2568 | 2698 | ||
@@ -2638,8 +2768,6 @@ try_mount_again: | |||
2638 | goto remote_path_check; | 2768 | goto remote_path_check; |
2639 | } | 2769 | } |
2640 | 2770 | ||
2641 | cifs_sb->tcon = tcon; | ||
2642 | |||
2643 | /* do not care if following two calls succeed - informational */ | 2771 | /* do not care if following two calls succeed - informational */ |
2644 | if (!tcon->ipc) { | 2772 | if (!tcon->ipc) { |
2645 | CIFSSMBQFSDeviceInfo(xid, tcon); | 2773 | CIFSSMBQFSDeviceInfo(xid, tcon); |
@@ -2748,6 +2876,38 @@ remote_path_check: | |||
2748 | #endif | 2876 | #endif |
2749 | } | 2877 | } |
2750 | 2878 | ||
2879 | if (rc) | ||
2880 | goto mount_fail_check; | ||
2881 | |||
2882 | /* now, hang the tcon off of the superblock */ | ||
2883 | tlink = kzalloc(sizeof *tlink, GFP_KERNEL); | ||
2884 | if (tlink == NULL) { | ||
2885 | rc = -ENOMEM; | ||
2886 | goto mount_fail_check; | ||
2887 | } | ||
2888 | |||
2889 | tlink->tl_index = pSesInfo->linux_uid; | ||
2890 | tlink->tl_tcon = tcon; | ||
2891 | tlink->tl_time = jiffies; | ||
2892 | set_bit(TCON_LINK_MASTER, &tlink->tl_flags); | ||
2893 | set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags); | ||
2894 | |||
2895 | rc = radix_tree_preload(GFP_KERNEL); | ||
2896 | if (rc == -ENOMEM) { | ||
2897 | kfree(tlink); | ||
2898 | goto mount_fail_check; | ||
2899 | } | ||
2900 | |||
2901 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
2902 | radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink); | ||
2903 | radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid, | ||
2904 | CIFS_TLINK_MASTER_TAG); | ||
2905 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
2906 | radix_tree_preload_end(); | ||
2907 | |||
2908 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, | ||
2909 | TLINK_IDLE_EXPIRE); | ||
2910 | |||
2751 | mount_fail_check: | 2911 | mount_fail_check: |
2752 | /* on error free sesinfo and tcon struct if needed */ | 2912 | /* on error free sesinfo and tcon struct if needed */ |
2753 | if (rc) { | 2913 | if (rc) { |
@@ -2825,14 +2985,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2825 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 2985 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
2826 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && | 2986 | if ((global_secflags & CIFSSEC_MAY_LANMAN) && |
2827 | (ses->server->secType == LANMAN)) | 2987 | (ses->server->secType == LANMAN)) |
2828 | calc_lanman_hash(tcon->password, ses->server->cryptKey, | 2988 | calc_lanman_hash(tcon->password, ses->cryptKey, |
2829 | ses->server->secMode & | 2989 | ses->server->secMode & |
2830 | SECMODE_PW_ENCRYPT ? true : false, | 2990 | SECMODE_PW_ENCRYPT ? true : false, |
2831 | bcc_ptr); | 2991 | bcc_ptr); |
2832 | else | 2992 | else |
2833 | #endif /* CIFS_WEAK_PW_HASH */ | 2993 | #endif /* CIFS_WEAK_PW_HASH */ |
2834 | SMBNTencrypt(tcon->password, ses->server->cryptKey, | 2994 | SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr); |
2835 | bcc_ptr); | ||
2836 | 2995 | ||
2837 | bcc_ptr += CIFS_SESS_KEY_SIZE; | 2996 | bcc_ptr += CIFS_SESS_KEY_SIZE; |
2838 | if (ses->capabilities & CAP_UNICODE) { | 2997 | if (ses->capabilities & CAP_UNICODE) { |
@@ -2934,19 +3093,39 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses, | |||
2934 | int | 3093 | int |
2935 | cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) | 3094 | cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb) |
2936 | { | 3095 | { |
2937 | int rc = 0; | 3096 | int i, ret; |
2938 | char *tmp; | 3097 | char *tmp; |
3098 | struct tcon_link *tlink[8]; | ||
3099 | unsigned long index = 0; | ||
3100 | |||
3101 | cancel_delayed_work_sync(&cifs_sb->prune_tlinks); | ||
3102 | |||
3103 | do { | ||
3104 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3105 | ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, | ||
3106 | (void **)tlink, index, | ||
3107 | ARRAY_SIZE(tlink)); | ||
3108 | /* increment index for next pass */ | ||
3109 | if (ret > 0) | ||
3110 | index = tlink[ret - 1]->tl_index + 1; | ||
3111 | for (i = 0; i < ret; i++) { | ||
3112 | cifs_get_tlink(tlink[i]); | ||
3113 | clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); | ||
3114 | radix_tree_delete(&cifs_sb->tlink_tree, | ||
3115 | tlink[i]->tl_index); | ||
3116 | } | ||
3117 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
2939 | 3118 | ||
2940 | if (cifs_sb->tcon) | 3119 | for (i = 0; i < ret; i++) |
2941 | cifs_put_tcon(cifs_sb->tcon); | 3120 | cifs_put_tlink(tlink[i]); |
3121 | } while (ret != 0); | ||
2942 | 3122 | ||
2943 | cifs_sb->tcon = NULL; | ||
2944 | tmp = cifs_sb->prepath; | 3123 | tmp = cifs_sb->prepath; |
2945 | cifs_sb->prepathlen = 0; | 3124 | cifs_sb->prepathlen = 0; |
2946 | cifs_sb->prepath = NULL; | 3125 | cifs_sb->prepath = NULL; |
2947 | kfree(tmp); | 3126 | kfree(tmp); |
2948 | 3127 | ||
2949 | return rc; | 3128 | return 0; |
2950 | } | 3129 | } |
2951 | 3130 | ||
2952 | int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses) | 3131 | int cifs_negotiate_protocol(unsigned int xid, struct cifsSesInfo *ses) |
@@ -2997,6 +3176,15 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, | |||
2997 | if (rc) { | 3176 | if (rc) { |
2998 | cERROR(1, "Send error in SessSetup = %d", rc); | 3177 | cERROR(1, "Send error in SessSetup = %d", rc); |
2999 | } else { | 3178 | } else { |
3179 | mutex_lock(&ses->server->srv_mutex); | ||
3180 | if (!server->session_estab) { | ||
3181 | memcpy(&server->session_key.data, | ||
3182 | &ses->auth_key.data, ses->auth_key.len); | ||
3183 | server->session_key.len = ses->auth_key.len; | ||
3184 | ses->server->session_estab = true; | ||
3185 | } | ||
3186 | mutex_unlock(&server->srv_mutex); | ||
3187 | |||
3000 | cFYI(1, "CIFS Session Established successfully"); | 3188 | cFYI(1, "CIFS Session Established successfully"); |
3001 | spin_lock(&GlobalMid_Lock); | 3189 | spin_lock(&GlobalMid_Lock); |
3002 | ses->status = CifsGood; | 3190 | ses->status = CifsGood; |
@@ -3007,3 +3195,237 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses, | |||
3007 | return rc; | 3195 | return rc; |
3008 | } | 3196 | } |
3009 | 3197 | ||
3198 | static struct cifsTconInfo * | ||
3199 | cifs_construct_tcon(struct cifs_sb_info *cifs_sb, uid_t fsuid) | ||
3200 | { | ||
3201 | struct cifsTconInfo *master_tcon = cifs_sb_master_tcon(cifs_sb); | ||
3202 | struct cifsSesInfo *ses; | ||
3203 | struct cifsTconInfo *tcon = NULL; | ||
3204 | struct smb_vol *vol_info; | ||
3205 | char username[MAX_USERNAME_SIZE + 1]; | ||
3206 | |||
3207 | vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL); | ||
3208 | if (vol_info == NULL) { | ||
3209 | tcon = ERR_PTR(-ENOMEM); | ||
3210 | goto out; | ||
3211 | } | ||
3212 | |||
3213 | snprintf(username, MAX_USERNAME_SIZE, "krb50x%x", fsuid); | ||
3214 | vol_info->username = username; | ||
3215 | vol_info->local_nls = cifs_sb->local_nls; | ||
3216 | vol_info->linux_uid = fsuid; | ||
3217 | vol_info->cred_uid = fsuid; | ||
3218 | vol_info->UNC = master_tcon->treeName; | ||
3219 | vol_info->retry = master_tcon->retry; | ||
3220 | vol_info->nocase = master_tcon->nocase; | ||
3221 | vol_info->local_lease = master_tcon->local_lease; | ||
3222 | vol_info->no_linux_ext = !master_tcon->unix_ext; | ||
3223 | |||
3224 | /* FIXME: allow for other secFlg settings */ | ||
3225 | vol_info->secFlg = CIFSSEC_MUST_KRB5; | ||
3226 | |||
3227 | /* get a reference for the same TCP session */ | ||
3228 | spin_lock(&cifs_tcp_ses_lock); | ||
3229 | ++master_tcon->ses->server->srv_count; | ||
3230 | spin_unlock(&cifs_tcp_ses_lock); | ||
3231 | |||
3232 | ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info); | ||
3233 | if (IS_ERR(ses)) { | ||
3234 | tcon = (struct cifsTconInfo *)ses; | ||
3235 | cifs_put_tcp_session(master_tcon->ses->server); | ||
3236 | goto out; | ||
3237 | } | ||
3238 | |||
3239 | tcon = cifs_get_tcon(ses, vol_info); | ||
3240 | if (IS_ERR(tcon)) { | ||
3241 | cifs_put_smb_ses(ses); | ||
3242 | goto out; | ||
3243 | } | ||
3244 | |||
3245 | if (ses->capabilities & CAP_UNIX) | ||
3246 | reset_cifs_unix_caps(0, tcon, NULL, vol_info); | ||
3247 | out: | ||
3248 | kfree(vol_info); | ||
3249 | |||
3250 | return tcon; | ||
3251 | } | ||
3252 | |||
3253 | static struct tcon_link * | ||
3254 | cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb) | ||
3255 | { | ||
3256 | struct tcon_link *tlink; | ||
3257 | unsigned int ret; | ||
3258 | |||
3259 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3260 | ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink, | ||
3261 | 0, 1, CIFS_TLINK_MASTER_TAG); | ||
3262 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3263 | |||
3264 | /* the master tcon should always be present */ | ||
3265 | if (ret == 0) | ||
3266 | BUG(); | ||
3267 | |||
3268 | return tlink; | ||
3269 | } | ||
3270 | |||
3271 | struct cifsTconInfo * | ||
3272 | cifs_sb_master_tcon(struct cifs_sb_info *cifs_sb) | ||
3273 | { | ||
3274 | return tlink_tcon(cifs_sb_master_tlink(cifs_sb)); | ||
3275 | } | ||
3276 | |||
3277 | static int | ||
3278 | cifs_sb_tcon_pending_wait(void *unused) | ||
3279 | { | ||
3280 | schedule(); | ||
3281 | return signal_pending(current) ? -ERESTARTSYS : 0; | ||
3282 | } | ||
3283 | |||
3284 | /* | ||
3285 | * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the | ||
3286 | * current task. | ||
3287 | * | ||
3288 | * If the superblock doesn't refer to a multiuser mount, then just return | ||
3289 | * the master tcon for the mount. | ||
3290 | * | ||
3291 | * First, search the radix tree for an existing tcon for this fsuid. If one | ||
3292 | * exists, then check to see if it's pending construction. If it is then wait | ||
3293 | * for construction to complete. Once it's no longer pending, check to see if | ||
3294 | * it failed and either return an error or retry construction, depending on | ||
3295 | * the timeout. | ||
3296 | * | ||
3297 | * If one doesn't exist then insert a new tcon_link struct into the tree and | ||
3298 | * try to construct a new one. | ||
3299 | */ | ||
3300 | struct tcon_link * | ||
3301 | cifs_sb_tlink(struct cifs_sb_info *cifs_sb) | ||
3302 | { | ||
3303 | int ret; | ||
3304 | unsigned long fsuid = (unsigned long) current_fsuid(); | ||
3305 | struct tcon_link *tlink, *newtlink; | ||
3306 | |||
3307 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) | ||
3308 | return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); | ||
3309 | |||
3310 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3311 | tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); | ||
3312 | if (tlink) | ||
3313 | cifs_get_tlink(tlink); | ||
3314 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3315 | |||
3316 | if (tlink == NULL) { | ||
3317 | newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL); | ||
3318 | if (newtlink == NULL) | ||
3319 | return ERR_PTR(-ENOMEM); | ||
3320 | newtlink->tl_index = fsuid; | ||
3321 | newtlink->tl_tcon = ERR_PTR(-EACCES); | ||
3322 | set_bit(TCON_LINK_PENDING, &newtlink->tl_flags); | ||
3323 | set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags); | ||
3324 | cifs_get_tlink(newtlink); | ||
3325 | |||
3326 | ret = radix_tree_preload(GFP_KERNEL); | ||
3327 | if (ret != 0) { | ||
3328 | kfree(newtlink); | ||
3329 | return ERR_PTR(ret); | ||
3330 | } | ||
3331 | |||
3332 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3333 | /* was one inserted after previous search? */ | ||
3334 | tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid); | ||
3335 | if (tlink) { | ||
3336 | cifs_get_tlink(tlink); | ||
3337 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3338 | radix_tree_preload_end(); | ||
3339 | kfree(newtlink); | ||
3340 | goto wait_for_construction; | ||
3341 | } | ||
3342 | ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink); | ||
3343 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3344 | radix_tree_preload_end(); | ||
3345 | if (ret) { | ||
3346 | kfree(newtlink); | ||
3347 | return ERR_PTR(ret); | ||
3348 | } | ||
3349 | tlink = newtlink; | ||
3350 | } else { | ||
3351 | wait_for_construction: | ||
3352 | ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, | ||
3353 | cifs_sb_tcon_pending_wait, | ||
3354 | TASK_INTERRUPTIBLE); | ||
3355 | if (ret) { | ||
3356 | cifs_put_tlink(tlink); | ||
3357 | return ERR_PTR(ret); | ||
3358 | } | ||
3359 | |||
3360 | /* if it's good, return it */ | ||
3361 | if (!IS_ERR(tlink->tl_tcon)) | ||
3362 | return tlink; | ||
3363 | |||
3364 | /* return error if we tried this already recently */ | ||
3365 | if (time_before(jiffies, tlink->tl_time + TLINK_ERROR_EXPIRE)) { | ||
3366 | cifs_put_tlink(tlink); | ||
3367 | return ERR_PTR(-EACCES); | ||
3368 | } | ||
3369 | |||
3370 | if (test_and_set_bit(TCON_LINK_PENDING, &tlink->tl_flags)) | ||
3371 | goto wait_for_construction; | ||
3372 | } | ||
3373 | |||
3374 | tlink->tl_tcon = cifs_construct_tcon(cifs_sb, fsuid); | ||
3375 | clear_bit(TCON_LINK_PENDING, &tlink->tl_flags); | ||
3376 | wake_up_bit(&tlink->tl_flags, TCON_LINK_PENDING); | ||
3377 | |||
3378 | if (IS_ERR(tlink->tl_tcon)) { | ||
3379 | cifs_put_tlink(tlink); | ||
3380 | return ERR_PTR(-EACCES); | ||
3381 | } | ||
3382 | |||
3383 | return tlink; | ||
3384 | } | ||
3385 | |||
3386 | /* | ||
3387 | * periodic workqueue job that scans tcon_tree for a superblock and closes | ||
3388 | * out tcons. | ||
3389 | */ | ||
3390 | static void | ||
3391 | cifs_prune_tlinks(struct work_struct *work) | ||
3392 | { | ||
3393 | struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info, | ||
3394 | prune_tlinks.work); | ||
3395 | struct tcon_link *tlink[8]; | ||
3396 | unsigned long now = jiffies; | ||
3397 | unsigned long index = 0; | ||
3398 | int i, ret; | ||
3399 | |||
3400 | do { | ||
3401 | spin_lock(&cifs_sb->tlink_tree_lock); | ||
3402 | ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree, | ||
3403 | (void **)tlink, index, | ||
3404 | ARRAY_SIZE(tlink)); | ||
3405 | /* increment index for next pass */ | ||
3406 | if (ret > 0) | ||
3407 | index = tlink[ret - 1]->tl_index + 1; | ||
3408 | for (i = 0; i < ret; i++) { | ||
3409 | if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) || | ||
3410 | atomic_read(&tlink[i]->tl_count) != 0 || | ||
3411 | time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE, | ||
3412 | now)) { | ||
3413 | tlink[i] = NULL; | ||
3414 | continue; | ||
3415 | } | ||
3416 | cifs_get_tlink(tlink[i]); | ||
3417 | clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags); | ||
3418 | radix_tree_delete(&cifs_sb->tlink_tree, | ||
3419 | tlink[i]->tl_index); | ||
3420 | } | ||
3421 | spin_unlock(&cifs_sb->tlink_tree_lock); | ||
3422 | |||
3423 | for (i = 0; i < ret; i++) { | ||
3424 | if (tlink[i] != NULL) | ||
3425 | cifs_put_tlink(tlink[i]); | ||
3426 | } | ||
3427 | } while (ret != 0); | ||
3428 | |||
3429 | queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks, | ||
3430 | TLINK_IDLE_EXPIRE); | ||
3431 | } | ||
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index f9ed0751cc12..3840eddbfb7a 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c | |||
@@ -54,18 +54,18 @@ build_path_from_dentry(struct dentry *direntry) | |||
54 | int dfsplen; | 54 | int dfsplen; |
55 | char *full_path; | 55 | char *full_path; |
56 | char dirsep; | 56 | char dirsep; |
57 | struct cifs_sb_info *cifs_sb; | 57 | struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); |
58 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
58 | 59 | ||
59 | if (direntry == NULL) | 60 | if (direntry == NULL) |
60 | return NULL; /* not much we can do if dentry is freed and | 61 | return NULL; /* not much we can do if dentry is freed and |
61 | we need to reopen the file after it was closed implicitly | 62 | we need to reopen the file after it was closed implicitly |
62 | when the server crashed */ | 63 | when the server crashed */ |
63 | 64 | ||
64 | cifs_sb = CIFS_SB(direntry->d_sb); | ||
65 | dirsep = CIFS_DIR_SEP(cifs_sb); | 65 | dirsep = CIFS_DIR_SEP(cifs_sb); |
66 | pplen = cifs_sb->prepathlen; | 66 | pplen = cifs_sb->prepathlen; |
67 | if (cifs_sb->tcon && (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS)) | 67 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) |
68 | dfsplen = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE + 1); | 68 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); |
69 | else | 69 | else |
70 | dfsplen = 0; | 70 | dfsplen = 0; |
71 | cifs_bp_rename_retry: | 71 | cifs_bp_rename_retry: |
@@ -117,7 +117,7 @@ cifs_bp_rename_retry: | |||
117 | /* BB test paths to Windows with '/' in the midst of prepath */ | 117 | /* BB test paths to Windows with '/' in the midst of prepath */ |
118 | 118 | ||
119 | if (dfsplen) { | 119 | if (dfsplen) { |
120 | strncpy(full_path, cifs_sb->tcon->treeName, dfsplen); | 120 | strncpy(full_path, tcon->treeName, dfsplen); |
121 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { | 121 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { |
122 | int i; | 122 | int i; |
123 | for (i = 0; i < dfsplen; i++) { | 123 | for (i = 0; i < dfsplen; i++) { |
@@ -130,135 +130,6 @@ cifs_bp_rename_retry: | |||
130 | return full_path; | 130 | return full_path; |
131 | } | 131 | } |
132 | 132 | ||
133 | struct cifsFileInfo * | ||
134 | cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle, | ||
135 | struct file *file, struct vfsmount *mnt, unsigned int oflags) | ||
136 | { | ||
137 | int oplock = 0; | ||
138 | struct cifsFileInfo *pCifsFile; | ||
139 | struct cifsInodeInfo *pCifsInode; | ||
140 | struct cifs_sb_info *cifs_sb = CIFS_SB(mnt->mnt_sb); | ||
141 | |||
142 | pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | ||
143 | if (pCifsFile == NULL) | ||
144 | return pCifsFile; | ||
145 | |||
146 | if (oplockEnabled) | ||
147 | oplock = REQ_OPLOCK; | ||
148 | |||
149 | pCifsFile->netfid = fileHandle; | ||
150 | pCifsFile->pid = current->tgid; | ||
151 | pCifsFile->pInode = igrab(newinode); | ||
152 | pCifsFile->mnt = mnt; | ||
153 | pCifsFile->pfile = file; | ||
154 | pCifsFile->invalidHandle = false; | ||
155 | pCifsFile->closePend = false; | ||
156 | mutex_init(&pCifsFile->fh_mutex); | ||
157 | mutex_init(&pCifsFile->lock_mutex); | ||
158 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
159 | atomic_set(&pCifsFile->count, 1); | ||
160 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); | ||
161 | |||
162 | write_lock(&GlobalSMBSeslock); | ||
163 | list_add(&pCifsFile->tlist, &cifs_sb->tcon->openFileList); | ||
164 | pCifsInode = CIFS_I(newinode); | ||
165 | if (pCifsInode) { | ||
166 | /* if readable file instance put first in list*/ | ||
167 | if (oflags & FMODE_READ) | ||
168 | list_add(&pCifsFile->flist, &pCifsInode->openFileList); | ||
169 | else | ||
170 | list_add_tail(&pCifsFile->flist, | ||
171 | &pCifsInode->openFileList); | ||
172 | |||
173 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | ||
174 | pCifsInode->clientCanCacheAll = true; | ||
175 | pCifsInode->clientCanCacheRead = true; | ||
176 | cFYI(1, "Exclusive Oplock inode %p", newinode); | ||
177 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
178 | pCifsInode->clientCanCacheRead = true; | ||
179 | } | ||
180 | write_unlock(&GlobalSMBSeslock); | ||
181 | |||
182 | file->private_data = pCifsFile; | ||
183 | |||
184 | return pCifsFile; | ||
185 | } | ||
186 | |||
187 | int cifs_posix_open(char *full_path, struct inode **pinode, | ||
188 | struct super_block *sb, int mode, int oflags, | ||
189 | __u32 *poplock, __u16 *pnetfid, int xid) | ||
190 | { | ||
191 | int rc; | ||
192 | FILE_UNIX_BASIC_INFO *presp_data; | ||
193 | __u32 posix_flags = 0; | ||
194 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
195 | struct cifs_fattr fattr; | ||
196 | |||
197 | cFYI(1, "posix open %s", full_path); | ||
198 | |||
199 | presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | ||
200 | if (presp_data == NULL) | ||
201 | return -ENOMEM; | ||
202 | |||
203 | /* So far cifs posix extensions can only map the following flags. | ||
204 | There are other valid fmode oflags such as FMODE_LSEEK, FMODE_PREAD, but | ||
205 | so far we do not seem to need them, and we can treat them as local only */ | ||
206 | if ((oflags & (FMODE_READ | FMODE_WRITE)) == | ||
207 | (FMODE_READ | FMODE_WRITE)) | ||
208 | posix_flags = SMB_O_RDWR; | ||
209 | else if (oflags & FMODE_READ) | ||
210 | posix_flags = SMB_O_RDONLY; | ||
211 | else if (oflags & FMODE_WRITE) | ||
212 | posix_flags = SMB_O_WRONLY; | ||
213 | if (oflags & O_CREAT) | ||
214 | posix_flags |= SMB_O_CREAT; | ||
215 | if (oflags & O_EXCL) | ||
216 | posix_flags |= SMB_O_EXCL; | ||
217 | if (oflags & O_TRUNC) | ||
218 | posix_flags |= SMB_O_TRUNC; | ||
219 | /* be safe and imply O_SYNC for O_DSYNC */ | ||
220 | if (oflags & O_DSYNC) | ||
221 | posix_flags |= SMB_O_SYNC; | ||
222 | if (oflags & O_DIRECTORY) | ||
223 | posix_flags |= SMB_O_DIRECTORY; | ||
224 | if (oflags & O_NOFOLLOW) | ||
225 | posix_flags |= SMB_O_NOFOLLOW; | ||
226 | if (oflags & O_DIRECT) | ||
227 | posix_flags |= SMB_O_DIRECT; | ||
228 | |||
229 | mode &= ~current_umask(); | ||
230 | rc = CIFSPOSIXCreate(xid, cifs_sb->tcon, posix_flags, mode, | ||
231 | pnetfid, presp_data, poplock, full_path, | ||
232 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
233 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
234 | if (rc) | ||
235 | goto posix_open_ret; | ||
236 | |||
237 | if (presp_data->Type == cpu_to_le32(-1)) | ||
238 | goto posix_open_ret; /* open ok, caller does qpathinfo */ | ||
239 | |||
240 | if (!pinode) | ||
241 | goto posix_open_ret; /* caller does not need info */ | ||
242 | |||
243 | cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); | ||
244 | |||
245 | /* get new inode and set it up */ | ||
246 | if (*pinode == NULL) { | ||
247 | cifs_fill_uniqueid(sb, &fattr); | ||
248 | *pinode = cifs_iget(sb, &fattr); | ||
249 | if (!*pinode) { | ||
250 | rc = -ENOMEM; | ||
251 | goto posix_open_ret; | ||
252 | } | ||
253 | } else { | ||
254 | cifs_fattr_to_inode(*pinode, &fattr); | ||
255 | } | ||
256 | |||
257 | posix_open_ret: | ||
258 | kfree(presp_data); | ||
259 | return rc; | ||
260 | } | ||
261 | |||
262 | static void setup_cifs_dentry(struct cifsTconInfo *tcon, | 133 | static void setup_cifs_dentry(struct cifsTconInfo *tcon, |
263 | struct dentry *direntry, | 134 | struct dentry *direntry, |
264 | struct inode *newinode) | 135 | struct inode *newinode) |
@@ -291,6 +162,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
291 | int desiredAccess = GENERIC_READ | GENERIC_WRITE; | 162 | int desiredAccess = GENERIC_READ | GENERIC_WRITE; |
292 | __u16 fileHandle; | 163 | __u16 fileHandle; |
293 | struct cifs_sb_info *cifs_sb; | 164 | struct cifs_sb_info *cifs_sb; |
165 | struct tcon_link *tlink; | ||
294 | struct cifsTconInfo *tcon; | 166 | struct cifsTconInfo *tcon; |
295 | char *full_path = NULL; | 167 | char *full_path = NULL; |
296 | FILE_ALL_INFO *buf = NULL; | 168 | FILE_ALL_INFO *buf = NULL; |
@@ -300,21 +172,26 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
300 | xid = GetXid(); | 172 | xid = GetXid(); |
301 | 173 | ||
302 | cifs_sb = CIFS_SB(inode->i_sb); | 174 | cifs_sb = CIFS_SB(inode->i_sb); |
303 | tcon = cifs_sb->tcon; | 175 | tlink = cifs_sb_tlink(cifs_sb); |
304 | 176 | if (IS_ERR(tlink)) { | |
305 | full_path = build_path_from_dentry(direntry); | 177 | FreeXid(xid); |
306 | if (full_path == NULL) { | 178 | return PTR_ERR(tlink); |
307 | rc = -ENOMEM; | ||
308 | goto cifs_create_out; | ||
309 | } | 179 | } |
180 | tcon = tlink_tcon(tlink); | ||
310 | 181 | ||
311 | if (oplockEnabled) | 182 | if (oplockEnabled) |
312 | oplock = REQ_OPLOCK; | 183 | oplock = REQ_OPLOCK; |
313 | 184 | ||
314 | if (nd && (nd->flags & LOOKUP_OPEN)) | 185 | if (nd && (nd->flags & LOOKUP_OPEN)) |
315 | oflags = nd->intent.open.flags; | 186 | oflags = nd->intent.open.file->f_flags; |
316 | else | 187 | else |
317 | oflags = FMODE_READ | SMB_O_CREAT; | 188 | oflags = O_RDONLY | O_CREAT; |
189 | |||
190 | full_path = build_path_from_dentry(direntry); | ||
191 | if (full_path == NULL) { | ||
192 | rc = -ENOMEM; | ||
193 | goto cifs_create_out; | ||
194 | } | ||
318 | 195 | ||
319 | if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && | 196 | if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && |
320 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 197 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
@@ -344,9 +221,9 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
344 | /* if the file is going to stay open, then we | 221 | /* if the file is going to stay open, then we |
345 | need to set the desired access properly */ | 222 | need to set the desired access properly */ |
346 | desiredAccess = 0; | 223 | desiredAccess = 0; |
347 | if (oflags & FMODE_READ) | 224 | if (OPEN_FMODE(oflags) & FMODE_READ) |
348 | desiredAccess |= GENERIC_READ; /* is this too little? */ | 225 | desiredAccess |= GENERIC_READ; /* is this too little? */ |
349 | if (oflags & FMODE_WRITE) | 226 | if (OPEN_FMODE(oflags) & FMODE_WRITE) |
350 | desiredAccess |= GENERIC_WRITE; | 227 | desiredAccess |= GENERIC_WRITE; |
351 | 228 | ||
352 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) | 229 | if ((oflags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) |
@@ -375,7 +252,7 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode, | |||
375 | if (!tcon->unix_ext && (mode & S_IWUGO) == 0) | 252 | if (!tcon->unix_ext && (mode & S_IWUGO) == 0) |
376 | create_options |= CREATE_OPTION_READONLY; | 253 | create_options |= CREATE_OPTION_READONLY; |
377 | 254 | ||
378 | if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) | 255 | if (tcon->ses->capabilities & CAP_NT_SMBS) |
379 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 256 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, |
380 | desiredAccess, create_options, | 257 | desiredAccess, create_options, |
381 | &fileHandle, &oplock, buf, cifs_sb->local_nls, | 258 | &fileHandle, &oplock, buf, cifs_sb->local_nls, |
@@ -467,8 +344,7 @@ cifs_create_set_dentry: | |||
467 | goto cifs_create_out; | 344 | goto cifs_create_out; |
468 | } | 345 | } |
469 | 346 | ||
470 | pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp, | 347 | pfile_info = cifs_new_fileinfo(fileHandle, filp, tlink, oplock); |
471 | nd->path.mnt, oflags); | ||
472 | if (pfile_info == NULL) { | 348 | if (pfile_info == NULL) { |
473 | fput(filp); | 349 | fput(filp); |
474 | CIFSSMBClose(xid, tcon, fileHandle); | 350 | CIFSSMBClose(xid, tcon, fileHandle); |
@@ -481,6 +357,7 @@ cifs_create_set_dentry: | |||
481 | cifs_create_out: | 357 | cifs_create_out: |
482 | kfree(buf); | 358 | kfree(buf); |
483 | kfree(full_path); | 359 | kfree(full_path); |
360 | cifs_put_tlink(tlink); | ||
484 | FreeXid(xid); | 361 | FreeXid(xid); |
485 | return rc; | 362 | return rc; |
486 | } | 363 | } |
@@ -491,6 +368,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
491 | int rc = -EPERM; | 368 | int rc = -EPERM; |
492 | int xid; | 369 | int xid; |
493 | struct cifs_sb_info *cifs_sb; | 370 | struct cifs_sb_info *cifs_sb; |
371 | struct tcon_link *tlink; | ||
494 | struct cifsTconInfo *pTcon; | 372 | struct cifsTconInfo *pTcon; |
495 | char *full_path = NULL; | 373 | char *full_path = NULL; |
496 | struct inode *newinode = NULL; | 374 | struct inode *newinode = NULL; |
@@ -503,10 +381,14 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, | |||
503 | if (!old_valid_dev(device_number)) | 381 | if (!old_valid_dev(device_number)) |
504 | return -EINVAL; | 382 | return -EINVAL; |
505 | 383 | ||
506 | xid = GetXid(); | ||
507 | |||
508 | cifs_sb = CIFS_SB(inode->i_sb); | 384 | cifs_sb = CIFS_SB(inode->i_sb); |
509 | pTcon = cifs_sb->tcon; | 385 | tlink = cifs_sb_tlink(cifs_sb); |
386 | if (IS_ERR(tlink)) | ||
387 | return PTR_ERR(tlink); | ||
388 | |||
389 | pTcon = tlink_tcon(tlink); | ||
390 | |||
391 | xid = GetXid(); | ||
510 | 392 | ||
511 | full_path = build_path_from_dentry(direntry); | 393 | full_path = build_path_from_dentry(direntry); |
512 | if (full_path == NULL) { | 394 | if (full_path == NULL) { |
@@ -606,6 +488,7 @@ mknod_out: | |||
606 | kfree(full_path); | 488 | kfree(full_path); |
607 | kfree(buf); | 489 | kfree(buf); |
608 | FreeXid(xid); | 490 | FreeXid(xid); |
491 | cifs_put_tlink(tlink); | ||
609 | return rc; | 492 | return rc; |
610 | } | 493 | } |
611 | 494 | ||
@@ -619,6 +502,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
619 | __u16 fileHandle = 0; | 502 | __u16 fileHandle = 0; |
620 | bool posix_open = false; | 503 | bool posix_open = false; |
621 | struct cifs_sb_info *cifs_sb; | 504 | struct cifs_sb_info *cifs_sb; |
505 | struct tcon_link *tlink; | ||
622 | struct cifsTconInfo *pTcon; | 506 | struct cifsTconInfo *pTcon; |
623 | struct cifsFileInfo *cfile; | 507 | struct cifsFileInfo *cfile; |
624 | struct inode *newInode = NULL; | 508 | struct inode *newInode = NULL; |
@@ -633,7 +517,12 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
633 | /* check whether path exists */ | 517 | /* check whether path exists */ |
634 | 518 | ||
635 | cifs_sb = CIFS_SB(parent_dir_inode->i_sb); | 519 | cifs_sb = CIFS_SB(parent_dir_inode->i_sb); |
636 | pTcon = cifs_sb->tcon; | 520 | tlink = cifs_sb_tlink(cifs_sb); |
521 | if (IS_ERR(tlink)) { | ||
522 | FreeXid(xid); | ||
523 | return (struct dentry *)tlink; | ||
524 | } | ||
525 | pTcon = tlink_tcon(tlink); | ||
637 | 526 | ||
638 | /* | 527 | /* |
639 | * Don't allow the separator character in a path component. | 528 | * Don't allow the separator character in a path component. |
@@ -644,8 +533,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
644 | for (i = 0; i < direntry->d_name.len; i++) | 533 | for (i = 0; i < direntry->d_name.len; i++) |
645 | if (direntry->d_name.name[i] == '\\') { | 534 | if (direntry->d_name.name[i] == '\\') { |
646 | cFYI(1, "Invalid file name"); | 535 | cFYI(1, "Invalid file name"); |
647 | FreeXid(xid); | 536 | rc = -EINVAL; |
648 | return ERR_PTR(-EINVAL); | 537 | goto lookup_out; |
649 | } | 538 | } |
650 | } | 539 | } |
651 | 540 | ||
@@ -655,7 +544,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
655 | */ | 544 | */ |
656 | if (nd && (nd->flags & LOOKUP_EXCL)) { | 545 | if (nd && (nd->flags & LOOKUP_EXCL)) { |
657 | d_instantiate(direntry, NULL); | 546 | d_instantiate(direntry, NULL); |
658 | return NULL; | 547 | rc = 0; |
548 | goto lookup_out; | ||
659 | } | 549 | } |
660 | 550 | ||
661 | /* can not grab the rename sem here since it would | 551 | /* can not grab the rename sem here since it would |
@@ -663,8 +553,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
663 | in which we already have the sb rename sem */ | 553 | in which we already have the sb rename sem */ |
664 | full_path = build_path_from_dentry(direntry); | 554 | full_path = build_path_from_dentry(direntry); |
665 | if (full_path == NULL) { | 555 | if (full_path == NULL) { |
666 | FreeXid(xid); | 556 | rc = -ENOMEM; |
667 | return ERR_PTR(-ENOMEM); | 557 | goto lookup_out; |
668 | } | 558 | } |
669 | 559 | ||
670 | if (direntry->d_inode != NULL) { | 560 | if (direntry->d_inode != NULL) { |
@@ -687,11 +577,11 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
687 | if (pTcon->unix_ext) { | 577 | if (pTcon->unix_ext) { |
688 | if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && | 578 | if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) && |
689 | (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && | 579 | (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open && |
690 | (nd->intent.open.flags & O_CREAT)) { | 580 | (nd->intent.open.file->f_flags & O_CREAT)) { |
691 | rc = cifs_posix_open(full_path, &newInode, | 581 | rc = cifs_posix_open(full_path, &newInode, |
692 | parent_dir_inode->i_sb, | 582 | parent_dir_inode->i_sb, |
693 | nd->intent.open.create_mode, | 583 | nd->intent.open.create_mode, |
694 | nd->intent.open.flags, &oplock, | 584 | nd->intent.open.file->f_flags, &oplock, |
695 | &fileHandle, xid); | 585 | &fileHandle, xid); |
696 | /* | 586 | /* |
697 | * The check below works around a bug in POSIX | 587 | * The check below works around a bug in POSIX |
@@ -727,9 +617,8 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
727 | goto lookup_out; | 617 | goto lookup_out; |
728 | } | 618 | } |
729 | 619 | ||
730 | cfile = cifs_new_fileinfo(newInode, fileHandle, filp, | 620 | cfile = cifs_new_fileinfo(fileHandle, filp, tlink, |
731 | nd->path.mnt, | 621 | oplock); |
732 | nd->intent.open.flags); | ||
733 | if (cfile == NULL) { | 622 | if (cfile == NULL) { |
734 | fput(filp); | 623 | fput(filp); |
735 | CIFSSMBClose(xid, pTcon, fileHandle); | 624 | CIFSSMBClose(xid, pTcon, fileHandle); |
@@ -759,6 +648,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, | |||
759 | 648 | ||
760 | lookup_out: | 649 | lookup_out: |
761 | kfree(full_path); | 650 | kfree(full_path); |
651 | cifs_put_tlink(tlink); | ||
762 | FreeXid(xid); | 652 | FreeXid(xid); |
763 | return ERR_PTR(rc); | 653 | return ERR_PTR(rc); |
764 | } | 654 | } |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index de748c652d11..45af003865d2 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -60,34 +60,32 @@ static inline int cifs_convert_flags(unsigned int flags) | |||
60 | FILE_READ_DATA); | 60 | FILE_READ_DATA); |
61 | } | 61 | } |
62 | 62 | ||
63 | static inline fmode_t cifs_posix_convert_flags(unsigned int flags) | 63 | static u32 cifs_posix_convert_flags(unsigned int flags) |
64 | { | 64 | { |
65 | fmode_t posix_flags = 0; | 65 | u32 posix_flags = 0; |
66 | 66 | ||
67 | if ((flags & O_ACCMODE) == O_RDONLY) | 67 | if ((flags & O_ACCMODE) == O_RDONLY) |
68 | posix_flags = FMODE_READ; | 68 | posix_flags = SMB_O_RDONLY; |
69 | else if ((flags & O_ACCMODE) == O_WRONLY) | 69 | else if ((flags & O_ACCMODE) == O_WRONLY) |
70 | posix_flags = FMODE_WRITE; | 70 | posix_flags = SMB_O_WRONLY; |
71 | else if ((flags & O_ACCMODE) == O_RDWR) { | 71 | else if ((flags & O_ACCMODE) == O_RDWR) |
72 | /* GENERIC_ALL is too much permission to request | 72 | posix_flags = SMB_O_RDWR; |
73 | can cause unnecessary access denied on create */ | 73 | |
74 | /* return GENERIC_ALL; */ | 74 | if (flags & O_CREAT) |
75 | posix_flags = FMODE_READ | FMODE_WRITE; | 75 | posix_flags |= SMB_O_CREAT; |
76 | } | 76 | if (flags & O_EXCL) |
77 | /* can not map O_CREAT or O_EXCL or O_TRUNC flags when | 77 | posix_flags |= SMB_O_EXCL; |
78 | reopening a file. They had their effect on the original open */ | 78 | if (flags & O_TRUNC) |
79 | if (flags & O_APPEND) | 79 | posix_flags |= SMB_O_TRUNC; |
80 | posix_flags |= (fmode_t)O_APPEND; | 80 | /* be safe and imply O_SYNC for O_DSYNC */ |
81 | if (flags & O_DSYNC) | 81 | if (flags & O_DSYNC) |
82 | posix_flags |= (fmode_t)O_DSYNC; | 82 | posix_flags |= SMB_O_SYNC; |
83 | if (flags & __O_SYNC) | ||
84 | posix_flags |= (fmode_t)__O_SYNC; | ||
85 | if (flags & O_DIRECTORY) | 83 | if (flags & O_DIRECTORY) |
86 | posix_flags |= (fmode_t)O_DIRECTORY; | 84 | posix_flags |= SMB_O_DIRECTORY; |
87 | if (flags & O_NOFOLLOW) | 85 | if (flags & O_NOFOLLOW) |
88 | posix_flags |= (fmode_t)O_NOFOLLOW; | 86 | posix_flags |= SMB_O_NOFOLLOW; |
89 | if (flags & O_DIRECT) | 87 | if (flags & O_DIRECT) |
90 | posix_flags |= (fmode_t)O_DIRECT; | 88 | posix_flags |= SMB_O_DIRECT; |
91 | 89 | ||
92 | return posix_flags; | 90 | return posix_flags; |
93 | } | 91 | } |
@@ -106,66 +104,8 @@ static inline int cifs_get_disposition(unsigned int flags) | |||
106 | return FILE_OPEN; | 104 | return FILE_OPEN; |
107 | } | 105 | } |
108 | 106 | ||
109 | /* all arguments to this function must be checked for validity in caller */ | ||
110 | static inline int | ||
111 | cifs_posix_open_inode_helper(struct inode *inode, struct file *file, | ||
112 | struct cifsInodeInfo *pCifsInode, __u32 oplock, | ||
113 | u16 netfid) | ||
114 | { | ||
115 | |||
116 | write_lock(&GlobalSMBSeslock); | ||
117 | |||
118 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); | ||
119 | if (pCifsInode == NULL) { | ||
120 | write_unlock(&GlobalSMBSeslock); | ||
121 | return -EINVAL; | ||
122 | } | ||
123 | |||
124 | if (pCifsInode->clientCanCacheRead) { | ||
125 | /* we have the inode open somewhere else | ||
126 | no need to discard cache data */ | ||
127 | goto psx_client_can_cache; | ||
128 | } | ||
129 | |||
130 | /* BB FIXME need to fix this check to move it earlier into posix_open | ||
131 | BB fIX following section BB FIXME */ | ||
132 | |||
133 | /* if not oplocked, invalidate inode pages if mtime or file | ||
134 | size changed */ | ||
135 | /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime)); | ||
136 | if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) && | ||
137 | (file->f_path.dentry->d_inode->i_size == | ||
138 | (loff_t)le64_to_cpu(buf->EndOfFile))) { | ||
139 | cFYI(1, "inode unchanged on server"); | ||
140 | } else { | ||
141 | if (file->f_path.dentry->d_inode->i_mapping) { | ||
142 | rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping); | ||
143 | if (rc != 0) | ||
144 | CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc; | ||
145 | } | ||
146 | cFYI(1, "invalidating remote inode since open detected it " | ||
147 | "changed"); | ||
148 | invalidate_remote_inode(file->f_path.dentry->d_inode); | ||
149 | } */ | ||
150 | |||
151 | psx_client_can_cache: | ||
152 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | ||
153 | pCifsInode->clientCanCacheAll = true; | ||
154 | pCifsInode->clientCanCacheRead = true; | ||
155 | cFYI(1, "Exclusive Oplock granted on inode %p", | ||
156 | file->f_path.dentry->d_inode); | ||
157 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
158 | pCifsInode->clientCanCacheRead = true; | ||
159 | |||
160 | /* will have to change the unlock if we reenable the | ||
161 | filemap_fdatawrite (which does not seem necessary */ | ||
162 | write_unlock(&GlobalSMBSeslock); | ||
163 | return 0; | ||
164 | } | ||
165 | |||
166 | /* all arguments to this function must be checked for validity in caller */ | ||
167 | static inline int cifs_open_inode_helper(struct inode *inode, | 107 | static inline int cifs_open_inode_helper(struct inode *inode, |
168 | struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf, | 108 | struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf, |
169 | char *full_path, int xid) | 109 | char *full_path, int xid) |
170 | { | 110 | { |
171 | struct cifsInodeInfo *pCifsInode = CIFS_I(inode); | 111 | struct cifsInodeInfo *pCifsInode = CIFS_I(inode); |
@@ -207,16 +147,175 @@ client_can_cache: | |||
207 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, | 147 | rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb, |
208 | xid, NULL); | 148 | xid, NULL); |
209 | 149 | ||
210 | if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) { | 150 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { |
211 | pCifsInode->clientCanCacheAll = true; | 151 | pCifsInode->clientCanCacheAll = true; |
212 | pCifsInode->clientCanCacheRead = true; | 152 | pCifsInode->clientCanCacheRead = true; |
213 | cFYI(1, "Exclusive Oplock granted on inode %p", inode); | 153 | cFYI(1, "Exclusive Oplock granted on inode %p", inode); |
214 | } else if ((*oplock & 0xF) == OPLOCK_READ) | 154 | } else if ((oplock & 0xF) == OPLOCK_READ) |
215 | pCifsInode->clientCanCacheRead = true; | 155 | pCifsInode->clientCanCacheRead = true; |
216 | 156 | ||
217 | return rc; | 157 | return rc; |
218 | } | 158 | } |
219 | 159 | ||
160 | int cifs_posix_open(char *full_path, struct inode **pinode, | ||
161 | struct super_block *sb, int mode, unsigned int f_flags, | ||
162 | __u32 *poplock, __u16 *pnetfid, int xid) | ||
163 | { | ||
164 | int rc; | ||
165 | FILE_UNIX_BASIC_INFO *presp_data; | ||
166 | __u32 posix_flags = 0; | ||
167 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
168 | struct cifs_fattr fattr; | ||
169 | struct tcon_link *tlink; | ||
170 | struct cifsTconInfo *tcon; | ||
171 | |||
172 | cFYI(1, "posix open %s", full_path); | ||
173 | |||
174 | presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | ||
175 | if (presp_data == NULL) | ||
176 | return -ENOMEM; | ||
177 | |||
178 | tlink = cifs_sb_tlink(cifs_sb); | ||
179 | if (IS_ERR(tlink)) { | ||
180 | rc = PTR_ERR(tlink); | ||
181 | goto posix_open_ret; | ||
182 | } | ||
183 | |||
184 | tcon = tlink_tcon(tlink); | ||
185 | mode &= ~current_umask(); | ||
186 | |||
187 | posix_flags = cifs_posix_convert_flags(f_flags); | ||
188 | rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, | ||
189 | poplock, full_path, cifs_sb->local_nls, | ||
190 | cifs_sb->mnt_cifs_flags & | ||
191 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
192 | cifs_put_tlink(tlink); | ||
193 | |||
194 | if (rc) | ||
195 | goto posix_open_ret; | ||
196 | |||
197 | if (presp_data->Type == cpu_to_le32(-1)) | ||
198 | goto posix_open_ret; /* open ok, caller does qpathinfo */ | ||
199 | |||
200 | if (!pinode) | ||
201 | goto posix_open_ret; /* caller does not need info */ | ||
202 | |||
203 | cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb); | ||
204 | |||
205 | /* get new inode and set it up */ | ||
206 | if (*pinode == NULL) { | ||
207 | cifs_fill_uniqueid(sb, &fattr); | ||
208 | *pinode = cifs_iget(sb, &fattr); | ||
209 | if (!*pinode) { | ||
210 | rc = -ENOMEM; | ||
211 | goto posix_open_ret; | ||
212 | } | ||
213 | } else { | ||
214 | cifs_fattr_to_inode(*pinode, &fattr); | ||
215 | } | ||
216 | |||
217 | posix_open_ret: | ||
218 | kfree(presp_data); | ||
219 | return rc; | ||
220 | } | ||
221 | |||
222 | struct cifsFileInfo * | ||
223 | cifs_new_fileinfo(__u16 fileHandle, struct file *file, | ||
224 | struct tcon_link *tlink, __u32 oplock) | ||
225 | { | ||
226 | struct dentry *dentry = file->f_path.dentry; | ||
227 | struct inode *inode = dentry->d_inode; | ||
228 | struct cifsInodeInfo *pCifsInode = CIFS_I(inode); | ||
229 | struct cifsFileInfo *pCifsFile; | ||
230 | |||
231 | pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | ||
232 | if (pCifsFile == NULL) | ||
233 | return pCifsFile; | ||
234 | |||
235 | pCifsFile->netfid = fileHandle; | ||
236 | pCifsFile->pid = current->tgid; | ||
237 | pCifsFile->uid = current_fsuid(); | ||
238 | pCifsFile->dentry = dget(dentry); | ||
239 | pCifsFile->f_flags = file->f_flags; | ||
240 | pCifsFile->invalidHandle = false; | ||
241 | pCifsFile->tlink = cifs_get_tlink(tlink); | ||
242 | mutex_init(&pCifsFile->fh_mutex); | ||
243 | mutex_init(&pCifsFile->lock_mutex); | ||
244 | INIT_LIST_HEAD(&pCifsFile->llist); | ||
245 | atomic_set(&pCifsFile->count, 1); | ||
246 | INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break); | ||
247 | |||
248 | spin_lock(&cifs_file_list_lock); | ||
249 | list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList)); | ||
250 | /* if readable file instance put first in list*/ | ||
251 | if (file->f_mode & FMODE_READ) | ||
252 | list_add(&pCifsFile->flist, &pCifsInode->openFileList); | ||
253 | else | ||
254 | list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList); | ||
255 | spin_unlock(&cifs_file_list_lock); | ||
256 | |||
257 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | ||
258 | pCifsInode->clientCanCacheAll = true; | ||
259 | pCifsInode->clientCanCacheRead = true; | ||
260 | cFYI(1, "Exclusive Oplock inode %p", inode); | ||
261 | } else if ((oplock & 0xF) == OPLOCK_READ) | ||
262 | pCifsInode->clientCanCacheRead = true; | ||
263 | |||
264 | file->private_data = pCifsFile; | ||
265 | return pCifsFile; | ||
266 | } | ||
267 | |||
268 | /* | ||
269 | * Release a reference on the file private data. This may involve closing | ||
270 | * the filehandle out on the server. | ||
271 | */ | ||
272 | void cifsFileInfo_put(struct cifsFileInfo *cifs_file) | ||
273 | { | ||
274 | struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink); | ||
275 | struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode); | ||
276 | struct cifsLockInfo *li, *tmp; | ||
277 | |||
278 | spin_lock(&cifs_file_list_lock); | ||
279 | if (!atomic_dec_and_test(&cifs_file->count)) { | ||
280 | spin_unlock(&cifs_file_list_lock); | ||
281 | return; | ||
282 | } | ||
283 | |||
284 | /* remove it from the lists */ | ||
285 | list_del(&cifs_file->flist); | ||
286 | list_del(&cifs_file->tlist); | ||
287 | |||
288 | if (list_empty(&cifsi->openFileList)) { | ||
289 | cFYI(1, "closing last open instance for inode %p", | ||
290 | cifs_file->dentry->d_inode); | ||
291 | cifsi->clientCanCacheRead = false; | ||
292 | cifsi->clientCanCacheAll = false; | ||
293 | } | ||
294 | spin_unlock(&cifs_file_list_lock); | ||
295 | |||
296 | if (!tcon->need_reconnect && !cifs_file->invalidHandle) { | ||
297 | int xid, rc; | ||
298 | |||
299 | xid = GetXid(); | ||
300 | rc = CIFSSMBClose(xid, tcon, cifs_file->netfid); | ||
301 | FreeXid(xid); | ||
302 | } | ||
303 | |||
304 | /* Delete any outstanding lock records. We'll lose them when the file | ||
305 | * is closed anyway. | ||
306 | */ | ||
307 | mutex_lock(&cifs_file->lock_mutex); | ||
308 | list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) { | ||
309 | list_del(&li->llist); | ||
310 | kfree(li); | ||
311 | } | ||
312 | mutex_unlock(&cifs_file->lock_mutex); | ||
313 | |||
314 | cifs_put_tlink(cifs_file->tlink); | ||
315 | dput(cifs_file->dentry); | ||
316 | kfree(cifs_file); | ||
317 | } | ||
318 | |||
220 | int cifs_open(struct inode *inode, struct file *file) | 319 | int cifs_open(struct inode *inode, struct file *file) |
221 | { | 320 | { |
222 | int rc = -EACCES; | 321 | int rc = -EACCES; |
@@ -224,6 +323,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
224 | __u32 oplock; | 323 | __u32 oplock; |
225 | struct cifs_sb_info *cifs_sb; | 324 | struct cifs_sb_info *cifs_sb; |
226 | struct cifsTconInfo *tcon; | 325 | struct cifsTconInfo *tcon; |
326 | struct tcon_link *tlink; | ||
227 | struct cifsFileInfo *pCifsFile = NULL; | 327 | struct cifsFileInfo *pCifsFile = NULL; |
228 | struct cifsInodeInfo *pCifsInode; | 328 | struct cifsInodeInfo *pCifsInode; |
229 | char *full_path = NULL; | 329 | char *full_path = NULL; |
@@ -235,7 +335,12 @@ int cifs_open(struct inode *inode, struct file *file) | |||
235 | xid = GetXid(); | 335 | xid = GetXid(); |
236 | 336 | ||
237 | cifs_sb = CIFS_SB(inode->i_sb); | 337 | cifs_sb = CIFS_SB(inode->i_sb); |
238 | tcon = cifs_sb->tcon; | 338 | tlink = cifs_sb_tlink(cifs_sb); |
339 | if (IS_ERR(tlink)) { | ||
340 | FreeXid(xid); | ||
341 | return PTR_ERR(tlink); | ||
342 | } | ||
343 | tcon = tlink_tcon(tlink); | ||
239 | 344 | ||
240 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); | 345 | pCifsInode = CIFS_I(file->f_path.dentry->d_inode); |
241 | 346 | ||
@@ -257,27 +362,15 @@ int cifs_open(struct inode *inode, struct file *file) | |||
257 | (tcon->ses->capabilities & CAP_UNIX) && | 362 | (tcon->ses->capabilities & CAP_UNIX) && |
258 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 363 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
259 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { | 364 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
260 | int oflags = (int) cifs_posix_convert_flags(file->f_flags); | ||
261 | oflags |= SMB_O_CREAT; | ||
262 | /* can not refresh inode info since size could be stale */ | 365 | /* can not refresh inode info since size could be stale */ |
263 | rc = cifs_posix_open(full_path, &inode, inode->i_sb, | 366 | rc = cifs_posix_open(full_path, &inode, inode->i_sb, |
264 | cifs_sb->mnt_file_mode /* ignored */, | 367 | cifs_sb->mnt_file_mode /* ignored */, |
265 | oflags, &oplock, &netfid, xid); | 368 | file->f_flags, &oplock, &netfid, xid); |
266 | if (rc == 0) { | 369 | if (rc == 0) { |
267 | cFYI(1, "posix open succeeded"); | 370 | cFYI(1, "posix open succeeded"); |
268 | /* no need for special case handling of setting mode | ||
269 | on read only files needed here */ | ||
270 | 371 | ||
271 | rc = cifs_posix_open_inode_helper(inode, file, | 372 | pCifsFile = cifs_new_fileinfo(netfid, file, tlink, |
272 | pCifsInode, oplock, netfid); | 373 | oplock); |
273 | if (rc != 0) { | ||
274 | CIFSSMBClose(xid, tcon, netfid); | ||
275 | goto out; | ||
276 | } | ||
277 | |||
278 | pCifsFile = cifs_new_fileinfo(inode, netfid, file, | ||
279 | file->f_path.mnt, | ||
280 | oflags); | ||
281 | if (pCifsFile == NULL) { | 374 | if (pCifsFile == NULL) { |
282 | CIFSSMBClose(xid, tcon, netfid); | 375 | CIFSSMBClose(xid, tcon, netfid); |
283 | rc = -ENOMEM; | 376 | rc = -ENOMEM; |
@@ -345,7 +438,7 @@ int cifs_open(struct inode *inode, struct file *file) | |||
345 | goto out; | 438 | goto out; |
346 | } | 439 | } |
347 | 440 | ||
348 | if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS) | 441 | if (tcon->ses->capabilities & CAP_NT_SMBS) |
349 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, | 442 | rc = CIFSSMBOpen(xid, tcon, full_path, disposition, |
350 | desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, | 443 | desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf, |
351 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags | 444 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags |
@@ -365,12 +458,11 @@ int cifs_open(struct inode *inode, struct file *file) | |||
365 | goto out; | 458 | goto out; |
366 | } | 459 | } |
367 | 460 | ||
368 | rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid); | 461 | rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid); |
369 | if (rc != 0) | 462 | if (rc != 0) |
370 | goto out; | 463 | goto out; |
371 | 464 | ||
372 | pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt, | 465 | pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock); |
373 | file->f_flags); | ||
374 | if (pCifsFile == NULL) { | 466 | if (pCifsFile == NULL) { |
375 | rc = -ENOMEM; | 467 | rc = -ENOMEM; |
376 | goto out; | 468 | goto out; |
@@ -402,6 +494,7 @@ out: | |||
402 | kfree(buf); | 494 | kfree(buf); |
403 | kfree(full_path); | 495 | kfree(full_path); |
404 | FreeXid(xid); | 496 | FreeXid(xid); |
497 | cifs_put_tlink(tlink); | ||
405 | return rc; | 498 | return rc; |
406 | } | 499 | } |
407 | 500 | ||
@@ -416,14 +509,13 @@ static int cifs_relock_file(struct cifsFileInfo *cifsFile) | |||
416 | return rc; | 509 | return rc; |
417 | } | 510 | } |
418 | 511 | ||
419 | static int cifs_reopen_file(struct file *file, bool can_flush) | 512 | static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush) |
420 | { | 513 | { |
421 | int rc = -EACCES; | 514 | int rc = -EACCES; |
422 | int xid; | 515 | int xid; |
423 | __u32 oplock; | 516 | __u32 oplock; |
424 | struct cifs_sb_info *cifs_sb; | 517 | struct cifs_sb_info *cifs_sb; |
425 | struct cifsTconInfo *tcon; | 518 | struct cifsTconInfo *tcon; |
426 | struct cifsFileInfo *pCifsFile; | ||
427 | struct cifsInodeInfo *pCifsInode; | 519 | struct cifsInodeInfo *pCifsInode; |
428 | struct inode *inode; | 520 | struct inode *inode; |
429 | char *full_path = NULL; | 521 | char *full_path = NULL; |
@@ -431,11 +523,6 @@ static int cifs_reopen_file(struct file *file, bool can_flush) | |||
431 | int disposition = FILE_OPEN; | 523 | int disposition = FILE_OPEN; |
432 | __u16 netfid; | 524 | __u16 netfid; |
433 | 525 | ||
434 | if (file->private_data) | ||
435 | pCifsFile = file->private_data; | ||
436 | else | ||
437 | return -EBADF; | ||
438 | |||
439 | xid = GetXid(); | 526 | xid = GetXid(); |
440 | mutex_lock(&pCifsFile->fh_mutex); | 527 | mutex_lock(&pCifsFile->fh_mutex); |
441 | if (!pCifsFile->invalidHandle) { | 528 | if (!pCifsFile->invalidHandle) { |
@@ -445,39 +532,24 @@ static int cifs_reopen_file(struct file *file, bool can_flush) | |||
445 | return rc; | 532 | return rc; |
446 | } | 533 | } |
447 | 534 | ||
448 | if (file->f_path.dentry == NULL) { | 535 | inode = pCifsFile->dentry->d_inode; |
449 | cERROR(1, "no valid name if dentry freed"); | ||
450 | dump_stack(); | ||
451 | rc = -EBADF; | ||
452 | goto reopen_error_exit; | ||
453 | } | ||
454 | |||
455 | inode = file->f_path.dentry->d_inode; | ||
456 | if (inode == NULL) { | ||
457 | cERROR(1, "inode not valid"); | ||
458 | dump_stack(); | ||
459 | rc = -EBADF; | ||
460 | goto reopen_error_exit; | ||
461 | } | ||
462 | |||
463 | cifs_sb = CIFS_SB(inode->i_sb); | 536 | cifs_sb = CIFS_SB(inode->i_sb); |
464 | tcon = cifs_sb->tcon; | 537 | tcon = tlink_tcon(pCifsFile->tlink); |
465 | 538 | ||
466 | /* can not grab rename sem here because various ops, including | 539 | /* can not grab rename sem here because various ops, including |
467 | those that already have the rename sem can end up causing writepage | 540 | those that already have the rename sem can end up causing writepage |
468 | to get called and if the server was down that means we end up here, | 541 | to get called and if the server was down that means we end up here, |
469 | and we can never tell if the caller already has the rename_sem */ | 542 | and we can never tell if the caller already has the rename_sem */ |
470 | full_path = build_path_from_dentry(file->f_path.dentry); | 543 | full_path = build_path_from_dentry(pCifsFile->dentry); |
471 | if (full_path == NULL) { | 544 | if (full_path == NULL) { |
472 | rc = -ENOMEM; | 545 | rc = -ENOMEM; |
473 | reopen_error_exit: | ||
474 | mutex_unlock(&pCifsFile->fh_mutex); | 546 | mutex_unlock(&pCifsFile->fh_mutex); |
475 | FreeXid(xid); | 547 | FreeXid(xid); |
476 | return rc; | 548 | return rc; |
477 | } | 549 | } |
478 | 550 | ||
479 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", | 551 | cFYI(1, "inode = 0x%p file flags 0x%x for %s", |
480 | inode, file->f_flags, full_path); | 552 | inode, pCifsFile->f_flags, full_path); |
481 | 553 | ||
482 | if (oplockEnabled) | 554 | if (oplockEnabled) |
483 | oplock = REQ_OPLOCK; | 555 | oplock = REQ_OPLOCK; |
@@ -487,8 +559,14 @@ reopen_error_exit: | |||
487 | if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && | 559 | if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) && |
488 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 560 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
489 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { | 561 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
490 | int oflags = (int) cifs_posix_convert_flags(file->f_flags); | 562 | |
491 | /* can not refresh inode info since size could be stale */ | 563 | /* |
564 | * O_CREAT, O_EXCL and O_TRUNC already had their effect on the | ||
565 | * original open. Must mask them off for a reopen. | ||
566 | */ | ||
567 | unsigned int oflags = pCifsFile->f_flags & | ||
568 | ~(O_CREAT | O_EXCL | O_TRUNC); | ||
569 | |||
492 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, | 570 | rc = cifs_posix_open(full_path, NULL, inode->i_sb, |
493 | cifs_sb->mnt_file_mode /* ignored */, | 571 | cifs_sb->mnt_file_mode /* ignored */, |
494 | oflags, &oplock, &netfid, xid); | 572 | oflags, &oplock, &netfid, xid); |
@@ -500,7 +578,7 @@ reopen_error_exit: | |||
500 | in the reconnect path it is important to retry hard */ | 578 | in the reconnect path it is important to retry hard */ |
501 | } | 579 | } |
502 | 580 | ||
503 | desiredAccess = cifs_convert_flags(file->f_flags); | 581 | desiredAccess = cifs_convert_flags(pCifsFile->f_flags); |
504 | 582 | ||
505 | /* Can not refresh inode by passing in file_info buf to be returned | 583 | /* Can not refresh inode by passing in file_info buf to be returned |
506 | by SMBOpen and then calling get_inode_info with returned buf | 584 | by SMBOpen and then calling get_inode_info with returned buf |
@@ -516,49 +594,50 @@ reopen_error_exit: | |||
516 | mutex_unlock(&pCifsFile->fh_mutex); | 594 | mutex_unlock(&pCifsFile->fh_mutex); |
517 | cFYI(1, "cifs_open returned 0x%x", rc); | 595 | cFYI(1, "cifs_open returned 0x%x", rc); |
518 | cFYI(1, "oplock: %d", oplock); | 596 | cFYI(1, "oplock: %d", oplock); |
519 | } else { | 597 | goto reopen_error_exit; |
598 | } | ||
599 | |||
520 | reopen_success: | 600 | reopen_success: |
521 | pCifsFile->netfid = netfid; | 601 | pCifsFile->netfid = netfid; |
522 | pCifsFile->invalidHandle = false; | 602 | pCifsFile->invalidHandle = false; |
523 | mutex_unlock(&pCifsFile->fh_mutex); | 603 | mutex_unlock(&pCifsFile->fh_mutex); |
524 | pCifsInode = CIFS_I(inode); | 604 | pCifsInode = CIFS_I(inode); |
525 | if (pCifsInode) { | 605 | |
526 | if (can_flush) { | 606 | if (can_flush) { |
527 | rc = filemap_write_and_wait(inode->i_mapping); | 607 | rc = filemap_write_and_wait(inode->i_mapping); |
528 | if (rc != 0) | 608 | if (rc != 0) |
529 | CIFS_I(inode)->write_behind_rc = rc; | 609 | CIFS_I(inode)->write_behind_rc = rc; |
530 | /* temporarily disable caching while we | 610 | |
531 | go to server to get inode info */ | 611 | pCifsInode->clientCanCacheAll = false; |
532 | pCifsInode->clientCanCacheAll = false; | 612 | pCifsInode->clientCanCacheRead = false; |
533 | pCifsInode->clientCanCacheRead = false; | 613 | if (tcon->unix_ext) |
534 | if (tcon->unix_ext) | 614 | rc = cifs_get_inode_info_unix(&inode, |
535 | rc = cifs_get_inode_info_unix(&inode, | 615 | full_path, inode->i_sb, xid); |
536 | full_path, inode->i_sb, xid); | 616 | else |
537 | else | 617 | rc = cifs_get_inode_info(&inode, |
538 | rc = cifs_get_inode_info(&inode, | 618 | full_path, NULL, inode->i_sb, |
539 | full_path, NULL, inode->i_sb, | 619 | xid, NULL); |
540 | xid, NULL); | 620 | } /* else we are writing out data to server already |
541 | } /* else we are writing out data to server already | 621 | and could deadlock if we tried to flush data, and |
542 | and could deadlock if we tried to flush data, and | 622 | since we do not know if we have data that would |
543 | since we do not know if we have data that would | 623 | invalidate the current end of file on the server |
544 | invalidate the current end of file on the server | 624 | we can not go to the server to get the new inod |
545 | we can not go to the server to get the new inod | 625 | info */ |
546 | info */ | 626 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { |
547 | if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) { | 627 | pCifsInode->clientCanCacheAll = true; |
548 | pCifsInode->clientCanCacheAll = true; | 628 | pCifsInode->clientCanCacheRead = true; |
549 | pCifsInode->clientCanCacheRead = true; | 629 | cFYI(1, "Exclusive Oplock granted on inode %p", |
550 | cFYI(1, "Exclusive Oplock granted on inode %p", | 630 | pCifsFile->dentry->d_inode); |
551 | file->f_path.dentry->d_inode); | 631 | } else if ((oplock & 0xF) == OPLOCK_READ) { |
552 | } else if ((oplock & 0xF) == OPLOCK_READ) { | 632 | pCifsInode->clientCanCacheRead = true; |
553 | pCifsInode->clientCanCacheRead = true; | 633 | pCifsInode->clientCanCacheAll = false; |
554 | pCifsInode->clientCanCacheAll = false; | 634 | } else { |
555 | } else { | 635 | pCifsInode->clientCanCacheRead = false; |
556 | pCifsInode->clientCanCacheRead = false; | 636 | pCifsInode->clientCanCacheAll = false; |
557 | pCifsInode->clientCanCacheAll = false; | ||
558 | } | ||
559 | cifs_relock_file(pCifsFile); | ||
560 | } | ||
561 | } | 637 | } |
638 | cifs_relock_file(pCifsFile); | ||
639 | |||
640 | reopen_error_exit: | ||
562 | kfree(full_path); | 641 | kfree(full_path); |
563 | FreeXid(xid); | 642 | FreeXid(xid); |
564 | return rc; | 643 | return rc; |
@@ -566,79 +645,11 @@ reopen_success: | |||
566 | 645 | ||
567 | int cifs_close(struct inode *inode, struct file *file) | 646 | int cifs_close(struct inode *inode, struct file *file) |
568 | { | 647 | { |
569 | int rc = 0; | 648 | cifsFileInfo_put(file->private_data); |
570 | int xid, timeout; | 649 | file->private_data = NULL; |
571 | struct cifs_sb_info *cifs_sb; | ||
572 | struct cifsTconInfo *pTcon; | ||
573 | struct cifsFileInfo *pSMBFile = file->private_data; | ||
574 | |||
575 | xid = GetXid(); | ||
576 | |||
577 | cifs_sb = CIFS_SB(inode->i_sb); | ||
578 | pTcon = cifs_sb->tcon; | ||
579 | if (pSMBFile) { | ||
580 | struct cifsLockInfo *li, *tmp; | ||
581 | write_lock(&GlobalSMBSeslock); | ||
582 | pSMBFile->closePend = true; | ||
583 | if (pTcon) { | ||
584 | /* no sense reconnecting to close a file that is | ||
585 | already closed */ | ||
586 | if (!pTcon->need_reconnect) { | ||
587 | write_unlock(&GlobalSMBSeslock); | ||
588 | timeout = 2; | ||
589 | while ((atomic_read(&pSMBFile->count) != 1) | ||
590 | && (timeout <= 2048)) { | ||
591 | /* Give write a better chance to get to | ||
592 | server ahead of the close. We do not | ||
593 | want to add a wait_q here as it would | ||
594 | increase the memory utilization as | ||
595 | the struct would be in each open file, | ||
596 | but this should give enough time to | ||
597 | clear the socket */ | ||
598 | cFYI(DBG2, "close delay, write pending"); | ||
599 | msleep(timeout); | ||
600 | timeout *= 4; | ||
601 | } | ||
602 | if (!pTcon->need_reconnect && | ||
603 | !pSMBFile->invalidHandle) | ||
604 | rc = CIFSSMBClose(xid, pTcon, | ||
605 | pSMBFile->netfid); | ||
606 | } else | ||
607 | write_unlock(&GlobalSMBSeslock); | ||
608 | } else | ||
609 | write_unlock(&GlobalSMBSeslock); | ||
610 | |||
611 | /* Delete any outstanding lock records. | ||
612 | We'll lose them when the file is closed anyway. */ | ||
613 | mutex_lock(&pSMBFile->lock_mutex); | ||
614 | list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) { | ||
615 | list_del(&li->llist); | ||
616 | kfree(li); | ||
617 | } | ||
618 | mutex_unlock(&pSMBFile->lock_mutex); | ||
619 | 650 | ||
620 | write_lock(&GlobalSMBSeslock); | 651 | /* return code from the ->release op is always ignored */ |
621 | list_del(&pSMBFile->flist); | 652 | return 0; |
622 | list_del(&pSMBFile->tlist); | ||
623 | write_unlock(&GlobalSMBSeslock); | ||
624 | cifsFileInfo_put(file->private_data); | ||
625 | file->private_data = NULL; | ||
626 | } else | ||
627 | rc = -EBADF; | ||
628 | |||
629 | read_lock(&GlobalSMBSeslock); | ||
630 | if (list_empty(&(CIFS_I(inode)->openFileList))) { | ||
631 | cFYI(1, "closing last open instance for inode %p", inode); | ||
632 | /* if the file is not open we do not know if we can cache info | ||
633 | on this inode, much less write behind and read ahead */ | ||
634 | CIFS_I(inode)->clientCanCacheRead = false; | ||
635 | CIFS_I(inode)->clientCanCacheAll = false; | ||
636 | } | ||
637 | read_unlock(&GlobalSMBSeslock); | ||
638 | if ((rc == 0) && CIFS_I(inode)->write_behind_rc) | ||
639 | rc = CIFS_I(inode)->write_behind_rc; | ||
640 | FreeXid(xid); | ||
641 | return rc; | ||
642 | } | 653 | } |
643 | 654 | ||
644 | int cifs_closedir(struct inode *inode, struct file *file) | 655 | int cifs_closedir(struct inode *inode, struct file *file) |
@@ -653,25 +664,21 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
653 | xid = GetXid(); | 664 | xid = GetXid(); |
654 | 665 | ||
655 | if (pCFileStruct) { | 666 | if (pCFileStruct) { |
656 | struct cifsTconInfo *pTcon; | 667 | struct cifsTconInfo *pTcon = tlink_tcon(pCFileStruct->tlink); |
657 | struct cifs_sb_info *cifs_sb = | ||
658 | CIFS_SB(file->f_path.dentry->d_sb); | ||
659 | |||
660 | pTcon = cifs_sb->tcon; | ||
661 | 668 | ||
662 | cFYI(1, "Freeing private data in close dir"); | 669 | cFYI(1, "Freeing private data in close dir"); |
663 | write_lock(&GlobalSMBSeslock); | 670 | spin_lock(&cifs_file_list_lock); |
664 | if (!pCFileStruct->srch_inf.endOfSearch && | 671 | if (!pCFileStruct->srch_inf.endOfSearch && |
665 | !pCFileStruct->invalidHandle) { | 672 | !pCFileStruct->invalidHandle) { |
666 | pCFileStruct->invalidHandle = true; | 673 | pCFileStruct->invalidHandle = true; |
667 | write_unlock(&GlobalSMBSeslock); | 674 | spin_unlock(&cifs_file_list_lock); |
668 | rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); | 675 | rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid); |
669 | cFYI(1, "Closing uncompleted readdir with rc %d", | 676 | cFYI(1, "Closing uncompleted readdir with rc %d", |
670 | rc); | 677 | rc); |
671 | /* not much we can do if it fails anyway, ignore rc */ | 678 | /* not much we can do if it fails anyway, ignore rc */ |
672 | rc = 0; | 679 | rc = 0; |
673 | } else | 680 | } else |
674 | write_unlock(&GlobalSMBSeslock); | 681 | spin_unlock(&cifs_file_list_lock); |
675 | ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; | 682 | ptmp = pCFileStruct->srch_inf.ntwrk_buf_start; |
676 | if (ptmp) { | 683 | if (ptmp) { |
677 | cFYI(1, "closedir free smb buf in srch struct"); | 684 | cFYI(1, "closedir free smb buf in srch struct"); |
@@ -681,6 +688,7 @@ int cifs_closedir(struct inode *inode, struct file *file) | |||
681 | else | 688 | else |
682 | cifs_buf_release(ptmp); | 689 | cifs_buf_release(ptmp); |
683 | } | 690 | } |
691 | cifs_put_tlink(pCFileStruct->tlink); | ||
684 | kfree(file->private_data); | 692 | kfree(file->private_data); |
685 | file->private_data = NULL; | 693 | file->private_data = NULL; |
686 | } | 694 | } |
@@ -767,7 +775,7 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock) | |||
767 | cFYI(1, "Unknown type of lock"); | 775 | cFYI(1, "Unknown type of lock"); |
768 | 776 | ||
769 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 777 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
770 | tcon = cifs_sb->tcon; | 778 | tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink); |
771 | 779 | ||
772 | if (file->private_data == NULL) { | 780 | if (file->private_data == NULL) { |
773 | rc = -EBADF; | 781 | rc = -EBADF; |
@@ -960,14 +968,14 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
960 | 968 | ||
961 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 969 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
962 | 970 | ||
963 | pTcon = cifs_sb->tcon; | ||
964 | |||
965 | /* cFYI(1, " write %d bytes to offset %lld of %s", write_size, | 971 | /* cFYI(1, " write %d bytes to offset %lld of %s", write_size, |
966 | *poffset, file->f_path.dentry->d_name.name); */ | 972 | *poffset, file->f_path.dentry->d_name.name); */ |
967 | 973 | ||
968 | if (file->private_data == NULL) | 974 | if (file->private_data == NULL) |
969 | return -EBADF; | 975 | return -EBADF; |
976 | |||
970 | open_file = file->private_data; | 977 | open_file = file->private_data; |
978 | pTcon = tlink_tcon(open_file->tlink); | ||
971 | 979 | ||
972 | rc = generic_write_checks(file, poffset, &write_size, 0); | 980 | rc = generic_write_checks(file, poffset, &write_size, 0); |
973 | if (rc) | 981 | if (rc) |
@@ -988,19 +996,12 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
988 | we blocked so return what we managed to write */ | 996 | we blocked so return what we managed to write */ |
989 | return total_written; | 997 | return total_written; |
990 | } | 998 | } |
991 | if (open_file->closePend) { | ||
992 | FreeXid(xid); | ||
993 | if (total_written) | ||
994 | return total_written; | ||
995 | else | ||
996 | return -EBADF; | ||
997 | } | ||
998 | if (open_file->invalidHandle) { | 999 | if (open_file->invalidHandle) { |
999 | /* we could deadlock if we called | 1000 | /* we could deadlock if we called |
1000 | filemap_fdatawait from here so tell | 1001 | filemap_fdatawait from here so tell |
1001 | reopen_file not to flush data to server | 1002 | reopen_file not to flush data to server |
1002 | now */ | 1003 | now */ |
1003 | rc = cifs_reopen_file(file, false); | 1004 | rc = cifs_reopen_file(open_file, false); |
1004 | if (rc != 0) | 1005 | if (rc != 0) |
1005 | break; | 1006 | break; |
1006 | } | 1007 | } |
@@ -1048,8 +1049,9 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
1048 | return total_written; | 1049 | return total_written; |
1049 | } | 1050 | } |
1050 | 1051 | ||
1051 | static ssize_t cifs_write(struct file *file, const char *write_data, | 1052 | static ssize_t cifs_write(struct cifsFileInfo *open_file, |
1052 | size_t write_size, loff_t *poffset) | 1053 | const char *write_data, size_t write_size, |
1054 | loff_t *poffset) | ||
1053 | { | 1055 | { |
1054 | int rc = 0; | 1056 | int rc = 0; |
1055 | unsigned int bytes_written = 0; | 1057 | unsigned int bytes_written = 0; |
@@ -1057,19 +1059,15 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1057 | struct cifs_sb_info *cifs_sb; | 1059 | struct cifs_sb_info *cifs_sb; |
1058 | struct cifsTconInfo *pTcon; | 1060 | struct cifsTconInfo *pTcon; |
1059 | int xid, long_op; | 1061 | int xid, long_op; |
1060 | struct cifsFileInfo *open_file; | 1062 | struct dentry *dentry = open_file->dentry; |
1061 | struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode); | 1063 | struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode); |
1062 | 1064 | ||
1063 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1065 | cifs_sb = CIFS_SB(dentry->d_sb); |
1064 | |||
1065 | pTcon = cifs_sb->tcon; | ||
1066 | 1066 | ||
1067 | cFYI(1, "write %zd bytes to offset %lld of %s", write_size, | 1067 | cFYI(1, "write %zd bytes to offset %lld of %s", write_size, |
1068 | *poffset, file->f_path.dentry->d_name.name); | 1068 | *poffset, dentry->d_name.name); |
1069 | 1069 | ||
1070 | if (file->private_data == NULL) | 1070 | pTcon = tlink_tcon(open_file->tlink); |
1071 | return -EBADF; | ||
1072 | open_file = file->private_data; | ||
1073 | 1071 | ||
1074 | xid = GetXid(); | 1072 | xid = GetXid(); |
1075 | 1073 | ||
@@ -1078,28 +1076,12 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1078 | total_written += bytes_written) { | 1076 | total_written += bytes_written) { |
1079 | rc = -EAGAIN; | 1077 | rc = -EAGAIN; |
1080 | while (rc == -EAGAIN) { | 1078 | while (rc == -EAGAIN) { |
1081 | if (file->private_data == NULL) { | ||
1082 | /* file has been closed on us */ | ||
1083 | FreeXid(xid); | ||
1084 | /* if we have gotten here we have written some data | ||
1085 | and blocked, and the file has been freed on us | ||
1086 | while we blocked so return what we managed to | ||
1087 | write */ | ||
1088 | return total_written; | ||
1089 | } | ||
1090 | if (open_file->closePend) { | ||
1091 | FreeXid(xid); | ||
1092 | if (total_written) | ||
1093 | return total_written; | ||
1094 | else | ||
1095 | return -EBADF; | ||
1096 | } | ||
1097 | if (open_file->invalidHandle) { | 1079 | if (open_file->invalidHandle) { |
1098 | /* we could deadlock if we called | 1080 | /* we could deadlock if we called |
1099 | filemap_fdatawait from here so tell | 1081 | filemap_fdatawait from here so tell |
1100 | reopen_file not to flush data to | 1082 | reopen_file not to flush data to |
1101 | server now */ | 1083 | server now */ |
1102 | rc = cifs_reopen_file(file, false); | 1084 | rc = cifs_reopen_file(open_file, false); |
1103 | if (rc != 0) | 1085 | if (rc != 0) |
1104 | break; | 1086 | break; |
1105 | } | 1087 | } |
@@ -1146,43 +1128,41 @@ static ssize_t cifs_write(struct file *file, const char *write_data, | |||
1146 | 1128 | ||
1147 | cifs_stats_bytes_written(pTcon, total_written); | 1129 | cifs_stats_bytes_written(pTcon, total_written); |
1148 | 1130 | ||
1149 | /* since the write may have blocked check these pointers again */ | 1131 | if (total_written > 0) { |
1150 | if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) { | 1132 | spin_lock(&dentry->d_inode->i_lock); |
1151 | /*BB We could make this contingent on superblock ATIME flag too */ | 1133 | if (*poffset > dentry->d_inode->i_size) |
1152 | /* file->f_path.dentry->d_inode->i_ctime = | 1134 | i_size_write(dentry->d_inode, *poffset); |
1153 | file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/ | 1135 | spin_unlock(&dentry->d_inode->i_lock); |
1154 | if (total_written > 0) { | ||
1155 | spin_lock(&file->f_path.dentry->d_inode->i_lock); | ||
1156 | if (*poffset > file->f_path.dentry->d_inode->i_size) | ||
1157 | i_size_write(file->f_path.dentry->d_inode, | ||
1158 | *poffset); | ||
1159 | spin_unlock(&file->f_path.dentry->d_inode->i_lock); | ||
1160 | } | ||
1161 | mark_inode_dirty_sync(file->f_path.dentry->d_inode); | ||
1162 | } | 1136 | } |
1137 | mark_inode_dirty_sync(dentry->d_inode); | ||
1163 | FreeXid(xid); | 1138 | FreeXid(xid); |
1164 | return total_written; | 1139 | return total_written; |
1165 | } | 1140 | } |
1166 | 1141 | ||
1167 | #ifdef CONFIG_CIFS_EXPERIMENTAL | 1142 | #ifdef CONFIG_CIFS_EXPERIMENTAL |
1168 | struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | 1143 | struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode, |
1144 | bool fsuid_only) | ||
1169 | { | 1145 | { |
1170 | struct cifsFileInfo *open_file = NULL; | 1146 | struct cifsFileInfo *open_file = NULL; |
1147 | struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); | ||
1148 | |||
1149 | /* only filter by fsuid on multiuser mounts */ | ||
1150 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) | ||
1151 | fsuid_only = false; | ||
1171 | 1152 | ||
1172 | read_lock(&GlobalSMBSeslock); | 1153 | spin_lock(&cifs_file_list_lock); |
1173 | /* we could simply get the first_list_entry since write-only entries | 1154 | /* we could simply get the first_list_entry since write-only entries |
1174 | are always at the end of the list but since the first entry might | 1155 | are always at the end of the list but since the first entry might |
1175 | have a close pending, we go through the whole list */ | 1156 | have a close pending, we go through the whole list */ |
1176 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1157 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
1177 | if (open_file->closePend) | 1158 | if (fsuid_only && open_file->uid != current_fsuid()) |
1178 | continue; | 1159 | continue; |
1179 | if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) || | 1160 | if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) { |
1180 | (open_file->pfile->f_flags & O_RDONLY))) { | ||
1181 | if (!open_file->invalidHandle) { | 1161 | if (!open_file->invalidHandle) { |
1182 | /* found a good file */ | 1162 | /* found a good file */ |
1183 | /* lock it so it will not be closed on us */ | 1163 | /* lock it so it will not be closed on us */ |
1184 | cifsFileInfo_get(open_file); | 1164 | cifsFileInfo_get(open_file); |
1185 | read_unlock(&GlobalSMBSeslock); | 1165 | spin_unlock(&cifs_file_list_lock); |
1186 | return open_file; | 1166 | return open_file; |
1187 | } /* else might as well continue, and look for | 1167 | } /* else might as well continue, and look for |
1188 | another, or simply have the caller reopen it | 1168 | another, or simply have the caller reopen it |
@@ -1190,14 +1170,16 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
1190 | } else /* write only file */ | 1170 | } else /* write only file */ |
1191 | break; /* write only files are last so must be done */ | 1171 | break; /* write only files are last so must be done */ |
1192 | } | 1172 | } |
1193 | read_unlock(&GlobalSMBSeslock); | 1173 | spin_unlock(&cifs_file_list_lock); |
1194 | return NULL; | 1174 | return NULL; |
1195 | } | 1175 | } |
1196 | #endif | 1176 | #endif |
1197 | 1177 | ||
1198 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | 1178 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode, |
1179 | bool fsuid_only) | ||
1199 | { | 1180 | { |
1200 | struct cifsFileInfo *open_file; | 1181 | struct cifsFileInfo *open_file; |
1182 | struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb); | ||
1201 | bool any_available = false; | 1183 | bool any_available = false; |
1202 | int rc; | 1184 | int rc; |
1203 | 1185 | ||
@@ -1211,53 +1193,39 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | |||
1211 | return NULL; | 1193 | return NULL; |
1212 | } | 1194 | } |
1213 | 1195 | ||
1214 | read_lock(&GlobalSMBSeslock); | 1196 | /* only filter by fsuid on multiuser mounts */ |
1197 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) | ||
1198 | fsuid_only = false; | ||
1199 | |||
1200 | spin_lock(&cifs_file_list_lock); | ||
1215 | refind_writable: | 1201 | refind_writable: |
1216 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1202 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
1217 | if (open_file->closePend || | 1203 | if (!any_available && open_file->pid != current->tgid) |
1218 | (!any_available && open_file->pid != current->tgid)) | ||
1219 | continue; | 1204 | continue; |
1220 | 1205 | if (fsuid_only && open_file->uid != current_fsuid()) | |
1221 | if (open_file->pfile && | 1206 | continue; |
1222 | ((open_file->pfile->f_flags & O_RDWR) || | 1207 | if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { |
1223 | (open_file->pfile->f_flags & O_WRONLY))) { | ||
1224 | cifsFileInfo_get(open_file); | 1208 | cifsFileInfo_get(open_file); |
1225 | 1209 | ||
1226 | if (!open_file->invalidHandle) { | 1210 | if (!open_file->invalidHandle) { |
1227 | /* found a good writable file */ | 1211 | /* found a good writable file */ |
1228 | read_unlock(&GlobalSMBSeslock); | 1212 | spin_unlock(&cifs_file_list_lock); |
1229 | return open_file; | 1213 | return open_file; |
1230 | } | 1214 | } |
1231 | 1215 | ||
1232 | read_unlock(&GlobalSMBSeslock); | 1216 | spin_unlock(&cifs_file_list_lock); |
1217 | |||
1233 | /* Had to unlock since following call can block */ | 1218 | /* Had to unlock since following call can block */ |
1234 | rc = cifs_reopen_file(open_file->pfile, false); | 1219 | rc = cifs_reopen_file(open_file, false); |
1235 | if (!rc) { | 1220 | if (!rc) |
1236 | if (!open_file->closePend) | 1221 | return open_file; |
1237 | return open_file; | ||
1238 | else { /* start over in case this was deleted */ | ||
1239 | /* since the list could be modified */ | ||
1240 | read_lock(&GlobalSMBSeslock); | ||
1241 | cifsFileInfo_put(open_file); | ||
1242 | goto refind_writable; | ||
1243 | } | ||
1244 | } | ||
1245 | 1222 | ||
1246 | /* if it fails, try another handle if possible - | 1223 | /* if it fails, try another handle if possible */ |
1247 | (we can not do this if closePending since | ||
1248 | loop could be modified - in which case we | ||
1249 | have to start at the beginning of the list | ||
1250 | again. Note that it would be bad | ||
1251 | to hold up writepages here (rather than | ||
1252 | in caller) with continuous retries */ | ||
1253 | cFYI(1, "wp failed on reopen file"); | 1224 | cFYI(1, "wp failed on reopen file"); |
1254 | read_lock(&GlobalSMBSeslock); | ||
1255 | /* can not use this handle, no write | ||
1256 | pending on this one after all */ | ||
1257 | cifsFileInfo_put(open_file); | 1225 | cifsFileInfo_put(open_file); |
1258 | 1226 | ||
1259 | if (open_file->closePend) /* list could have changed */ | 1227 | spin_lock(&cifs_file_list_lock); |
1260 | goto refind_writable; | 1228 | |
1261 | /* else we simply continue to the next entry. Thus | 1229 | /* else we simply continue to the next entry. Thus |
1262 | we do not loop on reopen errors. If we | 1230 | we do not loop on reopen errors. If we |
1263 | can not reopen the file, for example if we | 1231 | can not reopen the file, for example if we |
@@ -1272,7 +1240,7 @@ refind_writable: | |||
1272 | any_available = true; | 1240 | any_available = true; |
1273 | goto refind_writable; | 1241 | goto refind_writable; |
1274 | } | 1242 | } |
1275 | read_unlock(&GlobalSMBSeslock); | 1243 | spin_unlock(&cifs_file_list_lock); |
1276 | return NULL; | 1244 | return NULL; |
1277 | } | 1245 | } |
1278 | 1246 | ||
@@ -1284,7 +1252,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1284 | int rc = -EFAULT; | 1252 | int rc = -EFAULT; |
1285 | int bytes_written = 0; | 1253 | int bytes_written = 0; |
1286 | struct cifs_sb_info *cifs_sb; | 1254 | struct cifs_sb_info *cifs_sb; |
1287 | struct cifsTconInfo *pTcon; | ||
1288 | struct inode *inode; | 1255 | struct inode *inode; |
1289 | struct cifsFileInfo *open_file; | 1256 | struct cifsFileInfo *open_file; |
1290 | 1257 | ||
@@ -1293,7 +1260,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1293 | 1260 | ||
1294 | inode = page->mapping->host; | 1261 | inode = page->mapping->host; |
1295 | cifs_sb = CIFS_SB(inode->i_sb); | 1262 | cifs_sb = CIFS_SB(inode->i_sb); |
1296 | pTcon = cifs_sb->tcon; | ||
1297 | 1263 | ||
1298 | offset += (loff_t)from; | 1264 | offset += (loff_t)from; |
1299 | write_data = kmap(page); | 1265 | write_data = kmap(page); |
@@ -1314,10 +1280,10 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1314 | if (mapping->host->i_size - offset < (loff_t)to) | 1280 | if (mapping->host->i_size - offset < (loff_t)to) |
1315 | to = (unsigned)(mapping->host->i_size - offset); | 1281 | to = (unsigned)(mapping->host->i_size - offset); |
1316 | 1282 | ||
1317 | open_file = find_writable_file(CIFS_I(mapping->host)); | 1283 | open_file = find_writable_file(CIFS_I(mapping->host), false); |
1318 | if (open_file) { | 1284 | if (open_file) { |
1319 | bytes_written = cifs_write(open_file->pfile, write_data, | 1285 | bytes_written = cifs_write(open_file, write_data, |
1320 | to-from, &offset); | 1286 | to - from, &offset); |
1321 | cifsFileInfo_put(open_file); | 1287 | cifsFileInfo_put(open_file); |
1322 | /* Does mm or vfs already set times? */ | 1288 | /* Does mm or vfs already set times? */ |
1323 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); | 1289 | inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb); |
@@ -1337,7 +1303,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) | |||
1337 | static int cifs_writepages(struct address_space *mapping, | 1303 | static int cifs_writepages(struct address_space *mapping, |
1338 | struct writeback_control *wbc) | 1304 | struct writeback_control *wbc) |
1339 | { | 1305 | { |
1340 | struct backing_dev_info *bdi = mapping->backing_dev_info; | ||
1341 | unsigned int bytes_to_write; | 1306 | unsigned int bytes_to_write; |
1342 | unsigned int bytes_written; | 1307 | unsigned int bytes_written; |
1343 | struct cifs_sb_info *cifs_sb; | 1308 | struct cifs_sb_info *cifs_sb; |
@@ -1352,6 +1317,7 @@ static int cifs_writepages(struct address_space *mapping, | |||
1352 | int nr_pages; | 1317 | int nr_pages; |
1353 | __u64 offset = 0; | 1318 | __u64 offset = 0; |
1354 | struct cifsFileInfo *open_file; | 1319 | struct cifsFileInfo *open_file; |
1320 | struct cifsTconInfo *tcon; | ||
1355 | struct cifsInodeInfo *cifsi = CIFS_I(mapping->host); | 1321 | struct cifsInodeInfo *cifsi = CIFS_I(mapping->host); |
1356 | struct page *page; | 1322 | struct page *page; |
1357 | struct pagevec pvec; | 1323 | struct pagevec pvec; |
@@ -1368,27 +1334,29 @@ static int cifs_writepages(struct address_space *mapping, | |||
1368 | if (cifs_sb->wsize < PAGE_CACHE_SIZE) | 1334 | if (cifs_sb->wsize < PAGE_CACHE_SIZE) |
1369 | return generic_writepages(mapping, wbc); | 1335 | return generic_writepages(mapping, wbc); |
1370 | 1336 | ||
1371 | if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server)) | ||
1372 | if (cifs_sb->tcon->ses->server->secMode & | ||
1373 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | ||
1374 | if (!experimEnabled) | ||
1375 | return generic_writepages(mapping, wbc); | ||
1376 | |||
1377 | iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); | 1337 | iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL); |
1378 | if (iov == NULL) | 1338 | if (iov == NULL) |
1379 | return generic_writepages(mapping, wbc); | 1339 | return generic_writepages(mapping, wbc); |
1380 | 1340 | ||
1381 | |||
1382 | /* | 1341 | /* |
1383 | * BB: Is this meaningful for a non-block-device file system? | 1342 | * if there's no open file, then this is likely to fail too, |
1384 | * If it is, we should test it again after we do I/O | 1343 | * but it'll at least handle the return. Maybe it should be |
1344 | * a BUG() instead? | ||
1385 | */ | 1345 | */ |
1386 | if (wbc->nonblocking && bdi_write_congested(bdi)) { | 1346 | open_file = find_writable_file(CIFS_I(mapping->host), false); |
1387 | wbc->encountered_congestion = 1; | 1347 | if (!open_file) { |
1388 | kfree(iov); | 1348 | kfree(iov); |
1389 | return 0; | 1349 | return generic_writepages(mapping, wbc); |
1390 | } | 1350 | } |
1391 | 1351 | ||
1352 | tcon = tlink_tcon(open_file->tlink); | ||
1353 | if (!experimEnabled && tcon->ses->server->secMode & | ||
1354 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) { | ||
1355 | cifsFileInfo_put(open_file); | ||
1356 | return generic_writepages(mapping, wbc); | ||
1357 | } | ||
1358 | cifsFileInfo_put(open_file); | ||
1359 | |||
1392 | xid = GetXid(); | 1360 | xid = GetXid(); |
1393 | 1361 | ||
1394 | pagevec_init(&pvec, 0); | 1362 | pagevec_init(&pvec, 0); |
@@ -1492,38 +1460,34 @@ retry: | |||
1492 | break; | 1460 | break; |
1493 | } | 1461 | } |
1494 | if (n_iov) { | 1462 | if (n_iov) { |
1495 | /* Search for a writable handle every time we call | 1463 | open_file = find_writable_file(CIFS_I(mapping->host), |
1496 | * CIFSSMBWrite2. We can't rely on the last handle | 1464 | false); |
1497 | * we used to still be valid | ||
1498 | */ | ||
1499 | open_file = find_writable_file(CIFS_I(mapping->host)); | ||
1500 | if (!open_file) { | 1465 | if (!open_file) { |
1501 | cERROR(1, "No writable handles for inode"); | 1466 | cERROR(1, "No writable handles for inode"); |
1502 | rc = -EBADF; | 1467 | rc = -EBADF; |
1503 | } else { | 1468 | } else { |
1504 | long_op = cifs_write_timeout(cifsi, offset); | 1469 | long_op = cifs_write_timeout(cifsi, offset); |
1505 | rc = CIFSSMBWrite2(xid, cifs_sb->tcon, | 1470 | rc = CIFSSMBWrite2(xid, tcon, open_file->netfid, |
1506 | open_file->netfid, | ||
1507 | bytes_to_write, offset, | 1471 | bytes_to_write, offset, |
1508 | &bytes_written, iov, n_iov, | 1472 | &bytes_written, iov, n_iov, |
1509 | long_op); | 1473 | long_op); |
1510 | cifsFileInfo_put(open_file); | 1474 | cifsFileInfo_put(open_file); |
1511 | cifs_update_eof(cifsi, offset, bytes_written); | 1475 | cifs_update_eof(cifsi, offset, bytes_written); |
1476 | } | ||
1512 | 1477 | ||
1513 | if (rc || bytes_written < bytes_to_write) { | 1478 | if (rc || bytes_written < bytes_to_write) { |
1514 | cERROR(1, "Write2 ret %d, wrote %d", | 1479 | cERROR(1, "Write2 ret %d, wrote %d", |
1515 | rc, bytes_written); | 1480 | rc, bytes_written); |
1516 | /* BB what if continued retry is | 1481 | /* BB what if continued retry is |
1517 | requested via mount flags? */ | 1482 | requested via mount flags? */ |
1518 | if (rc == -ENOSPC) | 1483 | if (rc == -ENOSPC) |
1519 | set_bit(AS_ENOSPC, &mapping->flags); | 1484 | set_bit(AS_ENOSPC, &mapping->flags); |
1520 | else | 1485 | else |
1521 | set_bit(AS_EIO, &mapping->flags); | 1486 | set_bit(AS_EIO, &mapping->flags); |
1522 | } else { | 1487 | } else { |
1523 | cifs_stats_bytes_written(cifs_sb->tcon, | 1488 | cifs_stats_bytes_written(tcon, bytes_written); |
1524 | bytes_written); | ||
1525 | } | ||
1526 | } | 1489 | } |
1490 | |||
1527 | for (i = 0; i < n_iov; i++) { | 1491 | for (i = 0; i < n_iov; i++) { |
1528 | page = pvec.pages[first + i]; | 1492 | page = pvec.pages[first + i]; |
1529 | /* Should we also set page error on | 1493 | /* Should we also set page error on |
@@ -1624,7 +1588,8 @@ static int cifs_write_end(struct file *file, struct address_space *mapping, | |||
1624 | /* BB check if anything else missing out of ppw | 1588 | /* BB check if anything else missing out of ppw |
1625 | such as updating last write time */ | 1589 | such as updating last write time */ |
1626 | page_data = kmap(page); | 1590 | page_data = kmap(page); |
1627 | rc = cifs_write(file, page_data + offset, copied, &pos); | 1591 | rc = cifs_write(file->private_data, page_data + offset, |
1592 | copied, &pos); | ||
1628 | /* if (rc < 0) should we set writebehind rc? */ | 1593 | /* if (rc < 0) should we set writebehind rc? */ |
1629 | kunmap(page); | 1594 | kunmap(page); |
1630 | 1595 | ||
@@ -1665,7 +1630,7 @@ int cifs_fsync(struct file *file, int datasync) | |||
1665 | if (rc == 0) { | 1630 | if (rc == 0) { |
1666 | rc = CIFS_I(inode)->write_behind_rc; | 1631 | rc = CIFS_I(inode)->write_behind_rc; |
1667 | CIFS_I(inode)->write_behind_rc = 0; | 1632 | CIFS_I(inode)->write_behind_rc = 0; |
1668 | tcon = CIFS_SB(inode->i_sb)->tcon; | 1633 | tcon = tlink_tcon(smbfile->tlink); |
1669 | if (!rc && tcon && smbfile && | 1634 | if (!rc && tcon && smbfile && |
1670 | !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) | 1635 | !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) |
1671 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); | 1636 | rc = CIFSSMBFlush(xid, tcon, smbfile->netfid); |
@@ -1750,7 +1715,6 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1750 | 1715 | ||
1751 | xid = GetXid(); | 1716 | xid = GetXid(); |
1752 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1717 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1753 | pTcon = cifs_sb->tcon; | ||
1754 | 1718 | ||
1755 | if (file->private_data == NULL) { | 1719 | if (file->private_data == NULL) { |
1756 | rc = -EBADF; | 1720 | rc = -EBADF; |
@@ -1758,6 +1722,7 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1758 | return rc; | 1722 | return rc; |
1759 | } | 1723 | } |
1760 | open_file = file->private_data; | 1724 | open_file = file->private_data; |
1725 | pTcon = tlink_tcon(open_file->tlink); | ||
1761 | 1726 | ||
1762 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 1727 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
1763 | cFYI(1, "attempting read on write only file instance"); | 1728 | cFYI(1, "attempting read on write only file instance"); |
@@ -1771,9 +1736,8 @@ ssize_t cifs_user_read(struct file *file, char __user *read_data, | |||
1771 | smb_read_data = NULL; | 1736 | smb_read_data = NULL; |
1772 | while (rc == -EAGAIN) { | 1737 | while (rc == -EAGAIN) { |
1773 | int buf_type = CIFS_NO_BUFFER; | 1738 | int buf_type = CIFS_NO_BUFFER; |
1774 | if ((open_file->invalidHandle) && | 1739 | if (open_file->invalidHandle) { |
1775 | (!open_file->closePend)) { | 1740 | rc = cifs_reopen_file(open_file, true); |
1776 | rc = cifs_reopen_file(file, true); | ||
1777 | if (rc != 0) | 1741 | if (rc != 0) |
1778 | break; | 1742 | break; |
1779 | } | 1743 | } |
@@ -1831,7 +1795,6 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1831 | 1795 | ||
1832 | xid = GetXid(); | 1796 | xid = GetXid(); |
1833 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1797 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1834 | pTcon = cifs_sb->tcon; | ||
1835 | 1798 | ||
1836 | if (file->private_data == NULL) { | 1799 | if (file->private_data == NULL) { |
1837 | rc = -EBADF; | 1800 | rc = -EBADF; |
@@ -1839,6 +1802,7 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1839 | return rc; | 1802 | return rc; |
1840 | } | 1803 | } |
1841 | open_file = file->private_data; | 1804 | open_file = file->private_data; |
1805 | pTcon = tlink_tcon(open_file->tlink); | ||
1842 | 1806 | ||
1843 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) | 1807 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) |
1844 | cFYI(1, "attempting read on write only file instance"); | 1808 | cFYI(1, "attempting read on write only file instance"); |
@@ -1857,9 +1821,8 @@ static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size, | |||
1857 | } | 1821 | } |
1858 | rc = -EAGAIN; | 1822 | rc = -EAGAIN; |
1859 | while (rc == -EAGAIN) { | 1823 | while (rc == -EAGAIN) { |
1860 | if ((open_file->invalidHandle) && | 1824 | if (open_file->invalidHandle) { |
1861 | (!open_file->closePend)) { | 1825 | rc = cifs_reopen_file(open_file, true); |
1862 | rc = cifs_reopen_file(file, true); | ||
1863 | if (rc != 0) | 1826 | if (rc != 0) |
1864 | break; | 1827 | break; |
1865 | } | 1828 | } |
@@ -1974,7 +1937,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
1974 | } | 1937 | } |
1975 | open_file = file->private_data; | 1938 | open_file = file->private_data; |
1976 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 1939 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
1977 | pTcon = cifs_sb->tcon; | 1940 | pTcon = tlink_tcon(open_file->tlink); |
1978 | 1941 | ||
1979 | /* | 1942 | /* |
1980 | * Reads as many pages as possible from fscache. Returns -ENOBUFS | 1943 | * Reads as many pages as possible from fscache. Returns -ENOBUFS |
@@ -2022,9 +1985,8 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
2022 | read_size, contig_pages); | 1985 | read_size, contig_pages); |
2023 | rc = -EAGAIN; | 1986 | rc = -EAGAIN; |
2024 | while (rc == -EAGAIN) { | 1987 | while (rc == -EAGAIN) { |
2025 | if ((open_file->invalidHandle) && | 1988 | if (open_file->invalidHandle) { |
2026 | (!open_file->closePend)) { | 1989 | rc = cifs_reopen_file(open_file, true); |
2027 | rc = cifs_reopen_file(file, true); | ||
2028 | if (rc != 0) | 1990 | if (rc != 0) |
2029 | break; | 1991 | break; |
2030 | } | 1992 | } |
@@ -2173,18 +2135,14 @@ static int is_inode_writable(struct cifsInodeInfo *cifs_inode) | |||
2173 | { | 2135 | { |
2174 | struct cifsFileInfo *open_file; | 2136 | struct cifsFileInfo *open_file; |
2175 | 2137 | ||
2176 | read_lock(&GlobalSMBSeslock); | 2138 | spin_lock(&cifs_file_list_lock); |
2177 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 2139 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
2178 | if (open_file->closePend) | 2140 | if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) { |
2179 | continue; | 2141 | spin_unlock(&cifs_file_list_lock); |
2180 | if (open_file->pfile && | ||
2181 | ((open_file->pfile->f_flags & O_RDWR) || | ||
2182 | (open_file->pfile->f_flags & O_WRONLY))) { | ||
2183 | read_unlock(&GlobalSMBSeslock); | ||
2184 | return 1; | 2142 | return 1; |
2185 | } | 2143 | } |
2186 | } | 2144 | } |
2187 | read_unlock(&GlobalSMBSeslock); | 2145 | spin_unlock(&cifs_file_list_lock); |
2188 | return 0; | 2146 | return 0; |
2189 | } | 2147 | } |
2190 | 2148 | ||
@@ -2310,9 +2268,8 @@ void cifs_oplock_break(struct work_struct *work) | |||
2310 | { | 2268 | { |
2311 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, | 2269 | struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo, |
2312 | oplock_break); | 2270 | oplock_break); |
2313 | struct inode *inode = cfile->pInode; | 2271 | struct inode *inode = cfile->dentry->d_inode; |
2314 | struct cifsInodeInfo *cinode = CIFS_I(inode); | 2272 | struct cifsInodeInfo *cinode = CIFS_I(inode); |
2315 | struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb); | ||
2316 | int rc, waitrc = 0; | 2273 | int rc, waitrc = 0; |
2317 | 2274 | ||
2318 | if (inode && S_ISREG(inode->i_mode)) { | 2275 | if (inode && S_ISREG(inode->i_mode)) { |
@@ -2338,9 +2295,9 @@ void cifs_oplock_break(struct work_struct *work) | |||
2338 | * not bother sending an oplock release if session to server still is | 2295 | * not bother sending an oplock release if session to server still is |
2339 | * disconnected since oplock already released by the server | 2296 | * disconnected since oplock already released by the server |
2340 | */ | 2297 | */ |
2341 | if (!cfile->closePend && !cfile->oplock_break_cancelled) { | 2298 | if (!cfile->oplock_break_cancelled) { |
2342 | rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0, | 2299 | rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0, |
2343 | LOCKING_ANDX_OPLOCK_RELEASE, false); | 2300 | 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false); |
2344 | cFYI(1, "Oplock release rc = %d", rc); | 2301 | cFYI(1, "Oplock release rc = %d", rc); |
2345 | } | 2302 | } |
2346 | 2303 | ||
@@ -2349,22 +2306,22 @@ void cifs_oplock_break(struct work_struct *work) | |||
2349 | * finished grabbing reference for us. Make sure it's done by | 2306 | * finished grabbing reference for us. Make sure it's done by |
2350 | * waiting for GlobalSMSSeslock. | 2307 | * waiting for GlobalSMSSeslock. |
2351 | */ | 2308 | */ |
2352 | write_lock(&GlobalSMBSeslock); | 2309 | spin_lock(&cifs_file_list_lock); |
2353 | write_unlock(&GlobalSMBSeslock); | 2310 | spin_unlock(&cifs_file_list_lock); |
2354 | 2311 | ||
2355 | cifs_oplock_break_put(cfile); | 2312 | cifs_oplock_break_put(cfile); |
2356 | } | 2313 | } |
2357 | 2314 | ||
2358 | void cifs_oplock_break_get(struct cifsFileInfo *cfile) | 2315 | void cifs_oplock_break_get(struct cifsFileInfo *cfile) |
2359 | { | 2316 | { |
2360 | mntget(cfile->mnt); | 2317 | cifs_sb_active(cfile->dentry->d_sb); |
2361 | cifsFileInfo_get(cfile); | 2318 | cifsFileInfo_get(cfile); |
2362 | } | 2319 | } |
2363 | 2320 | ||
2364 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) | 2321 | void cifs_oplock_break_put(struct cifsFileInfo *cfile) |
2365 | { | 2322 | { |
2366 | mntput(cfile->mnt); | ||
2367 | cifsFileInfo_put(cfile); | 2323 | cifsFileInfo_put(cfile); |
2324 | cifs_sb_deactive(cfile->dentry->d_sb); | ||
2368 | } | 2325 | } |
2369 | 2326 | ||
2370 | const struct address_space_operations cifs_addr_ops = { | 2327 | const struct address_space_operations cifs_addr_ops = { |
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index 9f3f5c4be161..a2ad94efcfe6 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c | |||
@@ -62,15 +62,15 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) | |||
62 | { | 62 | { |
63 | struct cifsInodeInfo *cifsi = CIFS_I(inode); | 63 | struct cifsInodeInfo *cifsi = CIFS_I(inode); |
64 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 64 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
65 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
65 | 66 | ||
66 | if (cifsi->fscache) | 67 | if (cifsi->fscache) |
67 | return; | 68 | return; |
68 | 69 | ||
69 | cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache, | 70 | cifsi->fscache = fscache_acquire_cookie(tcon->fscache, |
70 | &cifs_fscache_inode_object_def, | 71 | &cifs_fscache_inode_object_def, cifsi); |
71 | cifsi); | 72 | cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache, |
72 | cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", | 73 | cifsi->fscache); |
73 | cifs_sb->tcon->fscache, cifsi->fscache); | ||
74 | } | 74 | } |
75 | 75 | ||
76 | void cifs_fscache_release_inode_cookie(struct inode *inode) | 76 | void cifs_fscache_release_inode_cookie(struct inode *inode) |
@@ -117,7 +117,8 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) | |||
117 | /* retire the current fscache cache and get a new one */ | 117 | /* retire the current fscache cache and get a new one */ |
118 | fscache_relinquish_cookie(cifsi->fscache, 1); | 118 | fscache_relinquish_cookie(cifsi->fscache, 1); |
119 | 119 | ||
120 | cifsi->fscache = fscache_acquire_cookie(cifs_sb->tcon->fscache, | 120 | cifsi->fscache = fscache_acquire_cookie( |
121 | cifs_sb_master_tcon(cifs_sb)->fscache, | ||
121 | &cifs_fscache_inode_object_def, | 122 | &cifs_fscache_inode_object_def, |
122 | cifsi); | 123 | cifsi); |
123 | cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", | 124 | cFYI(1, "CIFS: new cookie 0x%p oldcookie 0x%p", |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 93f77d438d3c..94979309698a 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -52,7 +52,7 @@ static void cifs_set_ops(struct inode *inode, const bool is_dfs_referral) | |||
52 | 52 | ||
53 | 53 | ||
54 | /* check if server can support readpages */ | 54 | /* check if server can support readpages */ |
55 | if (cifs_sb->tcon->ses->server->maxBuf < | 55 | if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf < |
56 | PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) | 56 | PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE) |
57 | inode->i_data.a_ops = &cifs_addr_ops_smallbuf; | 57 | inode->i_data.a_ops = &cifs_addr_ops_smallbuf; |
58 | else | 58 | else |
@@ -288,8 +288,8 @@ int cifs_get_file_info_unix(struct file *filp) | |||
288 | struct cifs_fattr fattr; | 288 | struct cifs_fattr fattr; |
289 | struct inode *inode = filp->f_path.dentry->d_inode; | 289 | struct inode *inode = filp->f_path.dentry->d_inode; |
290 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 290 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
291 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
292 | struct cifsFileInfo *cfile = filp->private_data; | 291 | struct cifsFileInfo *cfile = filp->private_data; |
292 | struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); | ||
293 | 293 | ||
294 | xid = GetXid(); | 294 | xid = GetXid(); |
295 | rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); | 295 | rc = CIFSSMBUnixQFileInfo(xid, tcon, cfile->netfid, &find_data); |
@@ -313,15 +313,21 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
313 | FILE_UNIX_BASIC_INFO find_data; | 313 | FILE_UNIX_BASIC_INFO find_data; |
314 | struct cifs_fattr fattr; | 314 | struct cifs_fattr fattr; |
315 | struct cifsTconInfo *tcon; | 315 | struct cifsTconInfo *tcon; |
316 | struct tcon_link *tlink; | ||
316 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 317 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
317 | 318 | ||
318 | tcon = cifs_sb->tcon; | ||
319 | cFYI(1, "Getting info on %s", full_path); | 319 | cFYI(1, "Getting info on %s", full_path); |
320 | 320 | ||
321 | tlink = cifs_sb_tlink(cifs_sb); | ||
322 | if (IS_ERR(tlink)) | ||
323 | return PTR_ERR(tlink); | ||
324 | tcon = tlink_tcon(tlink); | ||
325 | |||
321 | /* could have done a find first instead but this returns more info */ | 326 | /* could have done a find first instead but this returns more info */ |
322 | rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, | 327 | rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, |
323 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | 328 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & |
324 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 329 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
330 | cifs_put_tlink(tlink); | ||
325 | 331 | ||
326 | if (!rc) { | 332 | if (!rc) { |
327 | cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); | 333 | cifs_unix_basic_to_fattr(&fattr, &find_data, cifs_sb); |
@@ -332,6 +338,13 @@ int cifs_get_inode_info_unix(struct inode **pinode, | |||
332 | return rc; | 338 | return rc; |
333 | } | 339 | } |
334 | 340 | ||
341 | /* check for Minshall+French symlinks */ | ||
342 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | ||
343 | int tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | ||
344 | if (tmprc) | ||
345 | cFYI(1, "CIFSCheckMFSymlink: %d", tmprc); | ||
346 | } | ||
347 | |||
335 | if (*pinode == NULL) { | 348 | if (*pinode == NULL) { |
336 | /* get new inode */ | 349 | /* get new inode */ |
337 | cifs_fill_uniqueid(sb, &fattr); | 350 | cifs_fill_uniqueid(sb, &fattr); |
@@ -353,7 +366,8 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, | |||
353 | int rc; | 366 | int rc; |
354 | int oplock = 0; | 367 | int oplock = 0; |
355 | __u16 netfid; | 368 | __u16 netfid; |
356 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 369 | struct tcon_link *tlink; |
370 | struct cifsTconInfo *tcon; | ||
357 | char buf[24]; | 371 | char buf[24]; |
358 | unsigned int bytes_read; | 372 | unsigned int bytes_read; |
359 | char *pbuf; | 373 | char *pbuf; |
@@ -372,7 +386,12 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, | |||
372 | return -EINVAL; /* EOPNOTSUPP? */ | 386 | return -EINVAL; /* EOPNOTSUPP? */ |
373 | } | 387 | } |
374 | 388 | ||
375 | rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, | 389 | tlink = cifs_sb_tlink(cifs_sb); |
390 | if (IS_ERR(tlink)) | ||
391 | return PTR_ERR(tlink); | ||
392 | tcon = tlink_tcon(tlink); | ||
393 | |||
394 | rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, GENERIC_READ, | ||
376 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | 395 | CREATE_NOT_DIR, &netfid, &oplock, NULL, |
377 | cifs_sb->local_nls, | 396 | cifs_sb->local_nls, |
378 | cifs_sb->mnt_cifs_flags & | 397 | cifs_sb->mnt_cifs_flags & |
@@ -380,7 +399,7 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, | |||
380 | if (rc == 0) { | 399 | if (rc == 0) { |
381 | int buf_type = CIFS_NO_BUFFER; | 400 | int buf_type = CIFS_NO_BUFFER; |
382 | /* Read header */ | 401 | /* Read header */ |
383 | rc = CIFSSMBRead(xid, pTcon, netfid, | 402 | rc = CIFSSMBRead(xid, tcon, netfid, |
384 | 24 /* length */, 0 /* offset */, | 403 | 24 /* length */, 0 /* offset */, |
385 | &bytes_read, &pbuf, &buf_type); | 404 | &bytes_read, &pbuf, &buf_type); |
386 | if ((rc == 0) && (bytes_read >= 8)) { | 405 | if ((rc == 0) && (bytes_read >= 8)) { |
@@ -422,8 +441,9 @@ cifs_sfu_type(struct cifs_fattr *fattr, const unsigned char *path, | |||
422 | fattr->cf_dtype = DT_REG; | 441 | fattr->cf_dtype = DT_REG; |
423 | rc = -EOPNOTSUPP; /* or some unknown SFU type */ | 442 | rc = -EOPNOTSUPP; /* or some unknown SFU type */ |
424 | } | 443 | } |
425 | CIFSSMBClose(xid, pTcon, netfid); | 444 | CIFSSMBClose(xid, tcon, netfid); |
426 | } | 445 | } |
446 | cifs_put_tlink(tlink); | ||
427 | return rc; | 447 | return rc; |
428 | } | 448 | } |
429 | 449 | ||
@@ -441,11 +461,19 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, | |||
441 | ssize_t rc; | 461 | ssize_t rc; |
442 | char ea_value[4]; | 462 | char ea_value[4]; |
443 | __u32 mode; | 463 | __u32 mode; |
464 | struct tcon_link *tlink; | ||
465 | struct cifsTconInfo *tcon; | ||
444 | 466 | ||
445 | rc = CIFSSMBQAllEAs(xid, cifs_sb->tcon, path, "SETFILEBITS", | 467 | tlink = cifs_sb_tlink(cifs_sb); |
468 | if (IS_ERR(tlink)) | ||
469 | return PTR_ERR(tlink); | ||
470 | tcon = tlink_tcon(tlink); | ||
471 | |||
472 | rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS", | ||
446 | ea_value, 4 /* size of buf */, cifs_sb->local_nls, | 473 | ea_value, 4 /* size of buf */, cifs_sb->local_nls, |
447 | cifs_sb->mnt_cifs_flags & | 474 | cifs_sb->mnt_cifs_flags & |
448 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 475 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
476 | cifs_put_tlink(tlink); | ||
449 | if (rc < 0) | 477 | if (rc < 0) |
450 | return (int)rc; | 478 | return (int)rc; |
451 | else if (rc > 3) { | 479 | else if (rc > 3) { |
@@ -468,6 +496,8 @@ static void | |||
468 | cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, | 496 | cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, |
469 | struct cifs_sb_info *cifs_sb, bool adjust_tz) | 497 | struct cifs_sb_info *cifs_sb, bool adjust_tz) |
470 | { | 498 | { |
499 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
500 | |||
471 | memset(fattr, 0, sizeof(*fattr)); | 501 | memset(fattr, 0, sizeof(*fattr)); |
472 | fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); | 502 | fattr->cf_cifsattrs = le32_to_cpu(info->Attributes); |
473 | if (info->DeletePending) | 503 | if (info->DeletePending) |
@@ -482,8 +512,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, | |||
482 | fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); | 512 | fattr->cf_mtime = cifs_NTtimeToUnix(info->LastWriteTime); |
483 | 513 | ||
484 | if (adjust_tz) { | 514 | if (adjust_tz) { |
485 | fattr->cf_ctime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; | 515 | fattr->cf_ctime.tv_sec += tcon->ses->server->timeAdj; |
486 | fattr->cf_mtime.tv_sec += cifs_sb->tcon->ses->server->timeAdj; | 516 | fattr->cf_mtime.tv_sec += tcon->ses->server->timeAdj; |
487 | } | 517 | } |
488 | 518 | ||
489 | fattr->cf_eof = le64_to_cpu(info->EndOfFile); | 519 | fattr->cf_eof = le64_to_cpu(info->EndOfFile); |
@@ -515,8 +545,8 @@ int cifs_get_file_info(struct file *filp) | |||
515 | struct cifs_fattr fattr; | 545 | struct cifs_fattr fattr; |
516 | struct inode *inode = filp->f_path.dentry->d_inode; | 546 | struct inode *inode = filp->f_path.dentry->d_inode; |
517 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 547 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
518 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
519 | struct cifsFileInfo *cfile = filp->private_data; | 548 | struct cifsFileInfo *cfile = filp->private_data; |
549 | struct cifsTconInfo *tcon = tlink_tcon(cfile->tlink); | ||
520 | 550 | ||
521 | xid = GetXid(); | 551 | xid = GetXid(); |
522 | rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); | 552 | rc = CIFSSMBQFileInfo(xid, tcon, cfile->netfid, &find_data); |
@@ -554,26 +584,33 @@ int cifs_get_inode_info(struct inode **pinode, | |||
554 | { | 584 | { |
555 | int rc = 0, tmprc; | 585 | int rc = 0, tmprc; |
556 | struct cifsTconInfo *pTcon; | 586 | struct cifsTconInfo *pTcon; |
587 | struct tcon_link *tlink; | ||
557 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 588 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
558 | char *buf = NULL; | 589 | char *buf = NULL; |
559 | bool adjustTZ = false; | 590 | bool adjustTZ = false; |
560 | struct cifs_fattr fattr; | 591 | struct cifs_fattr fattr; |
561 | 592 | ||
562 | pTcon = cifs_sb->tcon; | 593 | tlink = cifs_sb_tlink(cifs_sb); |
594 | if (IS_ERR(tlink)) | ||
595 | return PTR_ERR(tlink); | ||
596 | pTcon = tlink_tcon(tlink); | ||
597 | |||
563 | cFYI(1, "Getting info on %s", full_path); | 598 | cFYI(1, "Getting info on %s", full_path); |
564 | 599 | ||
565 | if ((pfindData == NULL) && (*pinode != NULL)) { | 600 | if ((pfindData == NULL) && (*pinode != NULL)) { |
566 | if (CIFS_I(*pinode)->clientCanCacheRead) { | 601 | if (CIFS_I(*pinode)->clientCanCacheRead) { |
567 | cFYI(1, "No need to revalidate cached inode sizes"); | 602 | cFYI(1, "No need to revalidate cached inode sizes"); |
568 | return rc; | 603 | goto cgii_exit; |
569 | } | 604 | } |
570 | } | 605 | } |
571 | 606 | ||
572 | /* if file info not passed in then get it from server */ | 607 | /* if file info not passed in then get it from server */ |
573 | if (pfindData == NULL) { | 608 | if (pfindData == NULL) { |
574 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); | 609 | buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); |
575 | if (buf == NULL) | 610 | if (buf == NULL) { |
576 | return -ENOMEM; | 611 | rc = -ENOMEM; |
612 | goto cgii_exit; | ||
613 | } | ||
577 | pfindData = (FILE_ALL_INFO *)buf; | 614 | pfindData = (FILE_ALL_INFO *)buf; |
578 | 615 | ||
579 | /* could do find first instead but this returns more info */ | 616 | /* could do find first instead but this returns more info */ |
@@ -661,6 +698,13 @@ int cifs_get_inode_info(struct inode **pinode, | |||
661 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) | 698 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) |
662 | cifs_sfu_mode(&fattr, full_path, cifs_sb, xid); | 699 | cifs_sfu_mode(&fattr, full_path, cifs_sb, xid); |
663 | 700 | ||
701 | /* check for Minshall+French symlinks */ | ||
702 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) { | ||
703 | tmprc = CIFSCheckMFSymlink(&fattr, full_path, cifs_sb, xid); | ||
704 | if (tmprc) | ||
705 | cFYI(1, "CIFSCheckMFSymlink: %d", tmprc); | ||
706 | } | ||
707 | |||
664 | if (!*pinode) { | 708 | if (!*pinode) { |
665 | *pinode = cifs_iget(sb, &fattr); | 709 | *pinode = cifs_iget(sb, &fattr); |
666 | if (!*pinode) | 710 | if (!*pinode) |
@@ -671,6 +715,7 @@ int cifs_get_inode_info(struct inode **pinode, | |||
671 | 715 | ||
672 | cgii_exit: | 716 | cgii_exit: |
673 | kfree(buf); | 717 | kfree(buf); |
718 | cifs_put_tlink(tlink); | ||
674 | return rc; | 719 | return rc; |
675 | } | 720 | } |
676 | 721 | ||
@@ -683,6 +728,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) | |||
683 | int pplen = cifs_sb->prepathlen; | 728 | int pplen = cifs_sb->prepathlen; |
684 | int dfsplen; | 729 | int dfsplen; |
685 | char *full_path = NULL; | 730 | char *full_path = NULL; |
731 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
686 | 732 | ||
687 | /* if no prefix path, simply set path to the root of share to "" */ | 733 | /* if no prefix path, simply set path to the root of share to "" */ |
688 | if (pplen == 0) { | 734 | if (pplen == 0) { |
@@ -692,8 +738,8 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) | |||
692 | return full_path; | 738 | return full_path; |
693 | } | 739 | } |
694 | 740 | ||
695 | if (cifs_sb->tcon && (cifs_sb->tcon->Flags & SMB_SHARE_IS_IN_DFS)) | 741 | if (tcon->Flags & SMB_SHARE_IS_IN_DFS) |
696 | dfsplen = strnlen(cifs_sb->tcon->treeName, MAX_TREE_SIZE + 1); | 742 | dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); |
697 | else | 743 | else |
698 | dfsplen = 0; | 744 | dfsplen = 0; |
699 | 745 | ||
@@ -702,7 +748,7 @@ char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb) | |||
702 | return full_path; | 748 | return full_path; |
703 | 749 | ||
704 | if (dfsplen) { | 750 | if (dfsplen) { |
705 | strncpy(full_path, cifs_sb->tcon->treeName, dfsplen); | 751 | strncpy(full_path, tcon->treeName, dfsplen); |
706 | /* switch slash direction in prepath depending on whether | 752 | /* switch slash direction in prepath depending on whether |
707 | * windows or posix style path names | 753 | * windows or posix style path names |
708 | */ | 754 | */ |
@@ -801,6 +847,8 @@ retry_iget5_locked: | |||
801 | inode->i_flags |= S_NOATIME | S_NOCMTIME; | 847 | inode->i_flags |= S_NOATIME | S_NOCMTIME; |
802 | if (inode->i_state & I_NEW) { | 848 | if (inode->i_state & I_NEW) { |
803 | inode->i_ino = hash; | 849 | inode->i_ino = hash; |
850 | if (S_ISREG(inode->i_mode)) | ||
851 | inode->i_data.backing_dev_info = sb->s_bdi; | ||
804 | #ifdef CONFIG_CIFS_FSCACHE | 852 | #ifdef CONFIG_CIFS_FSCACHE |
805 | /* initialize per-inode cache cookie pointer */ | 853 | /* initialize per-inode cache cookie pointer */ |
806 | CIFS_I(inode)->fscache = NULL; | 854 | CIFS_I(inode)->fscache = NULL; |
@@ -816,18 +864,18 @@ retry_iget5_locked: | |||
816 | struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | 864 | struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) |
817 | { | 865 | { |
818 | int xid; | 866 | int xid; |
819 | struct cifs_sb_info *cifs_sb; | 867 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
820 | struct inode *inode = NULL; | 868 | struct inode *inode = NULL; |
821 | long rc; | 869 | long rc; |
822 | char *full_path; | 870 | char *full_path; |
871 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
823 | 872 | ||
824 | cifs_sb = CIFS_SB(sb); | ||
825 | full_path = cifs_build_path_to_root(cifs_sb); | 873 | full_path = cifs_build_path_to_root(cifs_sb); |
826 | if (full_path == NULL) | 874 | if (full_path == NULL) |
827 | return ERR_PTR(-ENOMEM); | 875 | return ERR_PTR(-ENOMEM); |
828 | 876 | ||
829 | xid = GetXid(); | 877 | xid = GetXid(); |
830 | if (cifs_sb->tcon->unix_ext) | 878 | if (tcon->unix_ext) |
831 | rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); | 879 | rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); |
832 | else | 880 | else |
833 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, | 881 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, |
@@ -838,10 +886,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino) | |||
838 | 886 | ||
839 | #ifdef CONFIG_CIFS_FSCACHE | 887 | #ifdef CONFIG_CIFS_FSCACHE |
840 | /* populate tcon->resource_id */ | 888 | /* populate tcon->resource_id */ |
841 | cifs_sb->tcon->resource_id = CIFS_I(inode)->uniqueid; | 889 | tcon->resource_id = CIFS_I(inode)->uniqueid; |
842 | #endif | 890 | #endif |
843 | 891 | ||
844 | if (rc && cifs_sb->tcon->ipc) { | 892 | if (rc && tcon->ipc) { |
845 | cFYI(1, "ipc connection - fake read inode"); | 893 | cFYI(1, "ipc connection - fake read inode"); |
846 | inode->i_mode |= S_IFDIR; | 894 | inode->i_mode |= S_IFDIR; |
847 | inode->i_nlink = 2; | 895 | inode->i_nlink = 2; |
@@ -877,7 +925,8 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | |||
877 | struct cifsFileInfo *open_file; | 925 | struct cifsFileInfo *open_file; |
878 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 926 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
879 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 927 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
880 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 928 | struct tcon_link *tlink = NULL; |
929 | struct cifsTconInfo *pTcon; | ||
881 | FILE_BASIC_INFO info_buf; | 930 | FILE_BASIC_INFO info_buf; |
882 | 931 | ||
883 | if (attrs == NULL) | 932 | if (attrs == NULL) |
@@ -916,13 +965,22 @@ cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | |||
916 | /* | 965 | /* |
917 | * If the file is already open for write, just use that fileid | 966 | * If the file is already open for write, just use that fileid |
918 | */ | 967 | */ |
919 | open_file = find_writable_file(cifsInode); | 968 | open_file = find_writable_file(cifsInode, true); |
920 | if (open_file) { | 969 | if (open_file) { |
921 | netfid = open_file->netfid; | 970 | netfid = open_file->netfid; |
922 | netpid = open_file->pid; | 971 | netpid = open_file->pid; |
972 | pTcon = tlink_tcon(open_file->tlink); | ||
923 | goto set_via_filehandle; | 973 | goto set_via_filehandle; |
924 | } | 974 | } |
925 | 975 | ||
976 | tlink = cifs_sb_tlink(cifs_sb); | ||
977 | if (IS_ERR(tlink)) { | ||
978 | rc = PTR_ERR(tlink); | ||
979 | tlink = NULL; | ||
980 | goto out; | ||
981 | } | ||
982 | pTcon = tlink_tcon(tlink); | ||
983 | |||
926 | /* | 984 | /* |
927 | * NT4 apparently returns success on this call, but it doesn't | 985 | * NT4 apparently returns success on this call, but it doesn't |
928 | * really work. | 986 | * really work. |
@@ -966,6 +1024,8 @@ set_via_filehandle: | |||
966 | else | 1024 | else |
967 | cifsFileInfo_put(open_file); | 1025 | cifsFileInfo_put(open_file); |
968 | out: | 1026 | out: |
1027 | if (tlink != NULL) | ||
1028 | cifs_put_tlink(tlink); | ||
969 | return rc; | 1029 | return rc; |
970 | } | 1030 | } |
971 | 1031 | ||
@@ -983,10 +1043,16 @@ cifs_rename_pending_delete(char *full_path, struct dentry *dentry, int xid) | |||
983 | struct inode *inode = dentry->d_inode; | 1043 | struct inode *inode = dentry->d_inode; |
984 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 1044 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
985 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1045 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
986 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 1046 | struct tcon_link *tlink; |
1047 | struct cifsTconInfo *tcon; | ||
987 | __u32 dosattr, origattr; | 1048 | __u32 dosattr, origattr; |
988 | FILE_BASIC_INFO *info_buf = NULL; | 1049 | FILE_BASIC_INFO *info_buf = NULL; |
989 | 1050 | ||
1051 | tlink = cifs_sb_tlink(cifs_sb); | ||
1052 | if (IS_ERR(tlink)) | ||
1053 | return PTR_ERR(tlink); | ||
1054 | tcon = tlink_tcon(tlink); | ||
1055 | |||
990 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | 1056 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, |
991 | DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, | 1057 | DELETE|FILE_WRITE_ATTRIBUTES, CREATE_NOT_DIR, |
992 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 1058 | &netfid, &oplock, NULL, cifs_sb->local_nls, |
@@ -1055,6 +1121,7 @@ out_close: | |||
1055 | CIFSSMBClose(xid, tcon, netfid); | 1121 | CIFSSMBClose(xid, tcon, netfid); |
1056 | out: | 1122 | out: |
1057 | kfree(info_buf); | 1123 | kfree(info_buf); |
1124 | cifs_put_tlink(tlink); | ||
1058 | return rc; | 1125 | return rc; |
1059 | 1126 | ||
1060 | /* | 1127 | /* |
@@ -1094,12 +1161,18 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) | |||
1094 | struct cifsInodeInfo *cifs_inode; | 1161 | struct cifsInodeInfo *cifs_inode; |
1095 | struct super_block *sb = dir->i_sb; | 1162 | struct super_block *sb = dir->i_sb; |
1096 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | 1163 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); |
1097 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 1164 | struct tcon_link *tlink; |
1165 | struct cifsTconInfo *tcon; | ||
1098 | struct iattr *attrs = NULL; | 1166 | struct iattr *attrs = NULL; |
1099 | __u32 dosattr = 0, origattr = 0; | 1167 | __u32 dosattr = 0, origattr = 0; |
1100 | 1168 | ||
1101 | cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry); | 1169 | cFYI(1, "cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry); |
1102 | 1170 | ||
1171 | tlink = cifs_sb_tlink(cifs_sb); | ||
1172 | if (IS_ERR(tlink)) | ||
1173 | return PTR_ERR(tlink); | ||
1174 | tcon = tlink_tcon(tlink); | ||
1175 | |||
1103 | xid = GetXid(); | 1176 | xid = GetXid(); |
1104 | 1177 | ||
1105 | /* Unlink can be called from rename so we can not take the | 1178 | /* Unlink can be called from rename so we can not take the |
@@ -1107,8 +1180,7 @@ int cifs_unlink(struct inode *dir, struct dentry *dentry) | |||
1107 | full_path = build_path_from_dentry(dentry); | 1180 | full_path = build_path_from_dentry(dentry); |
1108 | if (full_path == NULL) { | 1181 | if (full_path == NULL) { |
1109 | rc = -ENOMEM; | 1182 | rc = -ENOMEM; |
1110 | FreeXid(xid); | 1183 | goto unlink_out; |
1111 | return rc; | ||
1112 | } | 1184 | } |
1113 | 1185 | ||
1114 | if ((tcon->ses->capabilities & CAP_UNIX) && | 1186 | if ((tcon->ses->capabilities & CAP_UNIX) && |
@@ -1174,10 +1246,11 @@ out_reval: | |||
1174 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); | 1246 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); |
1175 | cifs_inode = CIFS_I(dir); | 1247 | cifs_inode = CIFS_I(dir); |
1176 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ | 1248 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ |
1177 | 1249 | unlink_out: | |
1178 | kfree(full_path); | 1250 | kfree(full_path); |
1179 | kfree(attrs); | 1251 | kfree(attrs); |
1180 | FreeXid(xid); | 1252 | FreeXid(xid); |
1253 | cifs_put_tlink(tlink); | ||
1181 | return rc; | 1254 | return rc; |
1182 | } | 1255 | } |
1183 | 1256 | ||
@@ -1186,6 +1259,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
1186 | int rc = 0, tmprc; | 1259 | int rc = 0, tmprc; |
1187 | int xid; | 1260 | int xid; |
1188 | struct cifs_sb_info *cifs_sb; | 1261 | struct cifs_sb_info *cifs_sb; |
1262 | struct tcon_link *tlink; | ||
1189 | struct cifsTconInfo *pTcon; | 1263 | struct cifsTconInfo *pTcon; |
1190 | char *full_path = NULL; | 1264 | char *full_path = NULL; |
1191 | struct inode *newinode = NULL; | 1265 | struct inode *newinode = NULL; |
@@ -1193,16 +1267,18 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
1193 | 1267 | ||
1194 | cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode); | 1268 | cFYI(1, "In cifs_mkdir, mode = 0x%x inode = 0x%p", mode, inode); |
1195 | 1269 | ||
1196 | xid = GetXid(); | ||
1197 | |||
1198 | cifs_sb = CIFS_SB(inode->i_sb); | 1270 | cifs_sb = CIFS_SB(inode->i_sb); |
1199 | pTcon = cifs_sb->tcon; | 1271 | tlink = cifs_sb_tlink(cifs_sb); |
1272 | if (IS_ERR(tlink)) | ||
1273 | return PTR_ERR(tlink); | ||
1274 | pTcon = tlink_tcon(tlink); | ||
1275 | |||
1276 | xid = GetXid(); | ||
1200 | 1277 | ||
1201 | full_path = build_path_from_dentry(direntry); | 1278 | full_path = build_path_from_dentry(direntry); |
1202 | if (full_path == NULL) { | 1279 | if (full_path == NULL) { |
1203 | rc = -ENOMEM; | 1280 | rc = -ENOMEM; |
1204 | FreeXid(xid); | 1281 | goto mkdir_out; |
1205 | return rc; | ||
1206 | } | 1282 | } |
1207 | 1283 | ||
1208 | if ((pTcon->ses->capabilities & CAP_UNIX) && | 1284 | if ((pTcon->ses->capabilities & CAP_UNIX) && |
@@ -1360,6 +1436,7 @@ mkdir_get_info: | |||
1360 | mkdir_out: | 1436 | mkdir_out: |
1361 | kfree(full_path); | 1437 | kfree(full_path); |
1362 | FreeXid(xid); | 1438 | FreeXid(xid); |
1439 | cifs_put_tlink(tlink); | ||
1363 | return rc; | 1440 | return rc; |
1364 | } | 1441 | } |
1365 | 1442 | ||
@@ -1368,6 +1445,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1368 | int rc = 0; | 1445 | int rc = 0; |
1369 | int xid; | 1446 | int xid; |
1370 | struct cifs_sb_info *cifs_sb; | 1447 | struct cifs_sb_info *cifs_sb; |
1448 | struct tcon_link *tlink; | ||
1371 | struct cifsTconInfo *pTcon; | 1449 | struct cifsTconInfo *pTcon; |
1372 | char *full_path = NULL; | 1450 | char *full_path = NULL; |
1373 | struct cifsInodeInfo *cifsInode; | 1451 | struct cifsInodeInfo *cifsInode; |
@@ -1376,18 +1454,23 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1376 | 1454 | ||
1377 | xid = GetXid(); | 1455 | xid = GetXid(); |
1378 | 1456 | ||
1379 | cifs_sb = CIFS_SB(inode->i_sb); | ||
1380 | pTcon = cifs_sb->tcon; | ||
1381 | |||
1382 | full_path = build_path_from_dentry(direntry); | 1457 | full_path = build_path_from_dentry(direntry); |
1383 | if (full_path == NULL) { | 1458 | if (full_path == NULL) { |
1384 | rc = -ENOMEM; | 1459 | rc = -ENOMEM; |
1385 | FreeXid(xid); | 1460 | goto rmdir_exit; |
1386 | return rc; | ||
1387 | } | 1461 | } |
1388 | 1462 | ||
1463 | cifs_sb = CIFS_SB(inode->i_sb); | ||
1464 | tlink = cifs_sb_tlink(cifs_sb); | ||
1465 | if (IS_ERR(tlink)) { | ||
1466 | rc = PTR_ERR(tlink); | ||
1467 | goto rmdir_exit; | ||
1468 | } | ||
1469 | pTcon = tlink_tcon(tlink); | ||
1470 | |||
1389 | rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, | 1471 | rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls, |
1390 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 1472 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
1473 | cifs_put_tlink(tlink); | ||
1391 | 1474 | ||
1392 | if (!rc) { | 1475 | if (!rc) { |
1393 | drop_nlink(inode); | 1476 | drop_nlink(inode); |
@@ -1408,6 +1491,7 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
1408 | direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = | 1491 | direntry->d_inode->i_ctime = inode->i_ctime = inode->i_mtime = |
1409 | current_fs_time(inode->i_sb); | 1492 | current_fs_time(inode->i_sb); |
1410 | 1493 | ||
1494 | rmdir_exit: | ||
1411 | kfree(full_path); | 1495 | kfree(full_path); |
1412 | FreeXid(xid); | 1496 | FreeXid(xid); |
1413 | return rc; | 1497 | return rc; |
@@ -1418,10 +1502,16 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | |||
1418 | struct dentry *to_dentry, const char *toPath) | 1502 | struct dentry *to_dentry, const char *toPath) |
1419 | { | 1503 | { |
1420 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | 1504 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); |
1421 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 1505 | struct tcon_link *tlink; |
1506 | struct cifsTconInfo *pTcon; | ||
1422 | __u16 srcfid; | 1507 | __u16 srcfid; |
1423 | int oplock, rc; | 1508 | int oplock, rc; |
1424 | 1509 | ||
1510 | tlink = cifs_sb_tlink(cifs_sb); | ||
1511 | if (IS_ERR(tlink)) | ||
1512 | return PTR_ERR(tlink); | ||
1513 | pTcon = tlink_tcon(tlink); | ||
1514 | |||
1425 | /* try path-based rename first */ | 1515 | /* try path-based rename first */ |
1426 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | 1516 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, |
1427 | cifs_sb->mnt_cifs_flags & | 1517 | cifs_sb->mnt_cifs_flags & |
@@ -1433,11 +1523,11 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | |||
1433 | * rename by filehandle to various Windows servers. | 1523 | * rename by filehandle to various Windows servers. |
1434 | */ | 1524 | */ |
1435 | if (rc == 0 || rc != -ETXTBSY) | 1525 | if (rc == 0 || rc != -ETXTBSY) |
1436 | return rc; | 1526 | goto do_rename_exit; |
1437 | 1527 | ||
1438 | /* open-file renames don't work across directories */ | 1528 | /* open-file renames don't work across directories */ |
1439 | if (to_dentry->d_parent != from_dentry->d_parent) | 1529 | if (to_dentry->d_parent != from_dentry->d_parent) |
1440 | return rc; | 1530 | goto do_rename_exit; |
1441 | 1531 | ||
1442 | /* open the file to be renamed -- we need DELETE perms */ | 1532 | /* open the file to be renamed -- we need DELETE perms */ |
1443 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | 1533 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, |
@@ -1453,7 +1543,8 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | |||
1453 | 1543 | ||
1454 | CIFSSMBClose(xid, pTcon, srcfid); | 1544 | CIFSSMBClose(xid, pTcon, srcfid); |
1455 | } | 1545 | } |
1456 | 1546 | do_rename_exit: | |
1547 | cifs_put_tlink(tlink); | ||
1457 | return rc; | 1548 | return rc; |
1458 | } | 1549 | } |
1459 | 1550 | ||
@@ -1463,13 +1554,17 @@ int cifs_rename(struct inode *source_dir, struct dentry *source_dentry, | |||
1463 | char *fromName = NULL; | 1554 | char *fromName = NULL; |
1464 | char *toName = NULL; | 1555 | char *toName = NULL; |
1465 | struct cifs_sb_info *cifs_sb; | 1556 | struct cifs_sb_info *cifs_sb; |
1557 | struct tcon_link *tlink; | ||
1466 | struct cifsTconInfo *tcon; | 1558 | struct cifsTconInfo *tcon; |
1467 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | 1559 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; |
1468 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1560 | FILE_UNIX_BASIC_INFO *info_buf_target; |
1469 | int xid, rc, tmprc; | 1561 | int xid, rc, tmprc; |
1470 | 1562 | ||
1471 | cifs_sb = CIFS_SB(source_dir->i_sb); | 1563 | cifs_sb = CIFS_SB(source_dir->i_sb); |
1472 | tcon = cifs_sb->tcon; | 1564 | tlink = cifs_sb_tlink(cifs_sb); |
1565 | if (IS_ERR(tlink)) | ||
1566 | return PTR_ERR(tlink); | ||
1567 | tcon = tlink_tcon(tlink); | ||
1473 | 1568 | ||
1474 | xid = GetXid(); | 1569 | xid = GetXid(); |
1475 | 1570 | ||
@@ -1545,6 +1640,7 @@ cifs_rename_exit: | |||
1545 | kfree(fromName); | 1640 | kfree(fromName); |
1546 | kfree(toName); | 1641 | kfree(toName); |
1547 | FreeXid(xid); | 1642 | FreeXid(xid); |
1643 | cifs_put_tlink(tlink); | ||
1548 | return rc; | 1644 | return rc; |
1549 | } | 1645 | } |
1550 | 1646 | ||
@@ -1597,11 +1693,12 @@ int cifs_revalidate_file(struct file *filp) | |||
1597 | { | 1693 | { |
1598 | int rc = 0; | 1694 | int rc = 0; |
1599 | struct inode *inode = filp->f_path.dentry->d_inode; | 1695 | struct inode *inode = filp->f_path.dentry->d_inode; |
1696 | struct cifsFileInfo *cfile = (struct cifsFileInfo *) filp->private_data; | ||
1600 | 1697 | ||
1601 | if (!cifs_inode_needs_reval(inode)) | 1698 | if (!cifs_inode_needs_reval(inode)) |
1602 | goto check_inval; | 1699 | goto check_inval; |
1603 | 1700 | ||
1604 | if (CIFS_SB(inode->i_sb)->tcon->unix_ext) | 1701 | if (tlink_tcon(cfile->tlink)->unix_ext) |
1605 | rc = cifs_get_file_info_unix(filp); | 1702 | rc = cifs_get_file_info_unix(filp); |
1606 | else | 1703 | else |
1607 | rc = cifs_get_file_info(filp); | 1704 | rc = cifs_get_file_info(filp); |
@@ -1642,7 +1739,7 @@ int cifs_revalidate_dentry(struct dentry *dentry) | |||
1642 | "jiffies %ld", full_path, inode, inode->i_count.counter, | 1739 | "jiffies %ld", full_path, inode, inode->i_count.counter, |
1643 | dentry, dentry->d_time, jiffies); | 1740 | dentry, dentry->d_time, jiffies); |
1644 | 1741 | ||
1645 | if (CIFS_SB(sb)->tcon->unix_ext) | 1742 | if (cifs_sb_master_tcon(CIFS_SB(sb))->unix_ext) |
1646 | rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); | 1743 | rc = cifs_get_inode_info_unix(&inode, full_path, sb, xid); |
1647 | else | 1744 | else |
1648 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, | 1745 | rc = cifs_get_inode_info(&inode, full_path, NULL, sb, |
@@ -1658,13 +1755,29 @@ check_inval: | |||
1658 | } | 1755 | } |
1659 | 1756 | ||
1660 | int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1757 | int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, |
1661 | struct kstat *stat) | 1758 | struct kstat *stat) |
1662 | { | 1759 | { |
1760 | struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); | ||
1761 | struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb); | ||
1663 | int err = cifs_revalidate_dentry(dentry); | 1762 | int err = cifs_revalidate_dentry(dentry); |
1763 | |||
1664 | if (!err) { | 1764 | if (!err) { |
1665 | generic_fillattr(dentry->d_inode, stat); | 1765 | generic_fillattr(dentry->d_inode, stat); |
1666 | stat->blksize = CIFS_MAX_MSGSIZE; | 1766 | stat->blksize = CIFS_MAX_MSGSIZE; |
1667 | stat->ino = CIFS_I(dentry->d_inode)->uniqueid; | 1767 | stat->ino = CIFS_I(dentry->d_inode)->uniqueid; |
1768 | |||
1769 | /* | ||
1770 | * If on a multiuser mount without unix extensions, and the | ||
1771 | * admin hasn't overridden them, set the ownership to the | ||
1772 | * fsuid/fsgid of the current process. | ||
1773 | */ | ||
1774 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER) && | ||
1775 | !tcon->unix_ext) { | ||
1776 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) | ||
1777 | stat->uid = current_fsuid(); | ||
1778 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) | ||
1779 | stat->gid = current_fsgid(); | ||
1780 | } | ||
1668 | } | 1781 | } |
1669 | return err; | 1782 | return err; |
1670 | } | 1783 | } |
@@ -1706,7 +1819,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1706 | struct cifsFileInfo *open_file; | 1819 | struct cifsFileInfo *open_file; |
1707 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 1820 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
1708 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1821 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
1709 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 1822 | struct tcon_link *tlink = NULL; |
1823 | struct cifsTconInfo *pTcon = NULL; | ||
1710 | 1824 | ||
1711 | /* | 1825 | /* |
1712 | * To avoid spurious oplock breaks from server, in the case of | 1826 | * To avoid spurious oplock breaks from server, in the case of |
@@ -1717,10 +1831,11 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1717 | * writebehind data than the SMB timeout for the SetPathInfo | 1831 | * writebehind data than the SMB timeout for the SetPathInfo |
1718 | * request would allow | 1832 | * request would allow |
1719 | */ | 1833 | */ |
1720 | open_file = find_writable_file(cifsInode); | 1834 | open_file = find_writable_file(cifsInode, true); |
1721 | if (open_file) { | 1835 | if (open_file) { |
1722 | __u16 nfid = open_file->netfid; | 1836 | __u16 nfid = open_file->netfid; |
1723 | __u32 npid = open_file->pid; | 1837 | __u32 npid = open_file->pid; |
1838 | pTcon = tlink_tcon(open_file->tlink); | ||
1724 | rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, | 1839 | rc = CIFSSMBSetFileSize(xid, pTcon, attrs->ia_size, nfid, |
1725 | npid, false); | 1840 | npid, false); |
1726 | cifsFileInfo_put(open_file); | 1841 | cifsFileInfo_put(open_file); |
@@ -1735,6 +1850,13 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1735 | rc = -EINVAL; | 1850 | rc = -EINVAL; |
1736 | 1851 | ||
1737 | if (rc != 0) { | 1852 | if (rc != 0) { |
1853 | if (pTcon == NULL) { | ||
1854 | tlink = cifs_sb_tlink(cifs_sb); | ||
1855 | if (IS_ERR(tlink)) | ||
1856 | return PTR_ERR(tlink); | ||
1857 | pTcon = tlink_tcon(tlink); | ||
1858 | } | ||
1859 | |||
1738 | /* Set file size by pathname rather than by handle | 1860 | /* Set file size by pathname rather than by handle |
1739 | either because no valid, writeable file handle for | 1861 | either because no valid, writeable file handle for |
1740 | it was found or because there was an error setting | 1862 | it was found or because there was an error setting |
@@ -1764,6 +1886,8 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
1764 | CIFSSMBClose(xid, pTcon, netfid); | 1886 | CIFSSMBClose(xid, pTcon, netfid); |
1765 | } | 1887 | } |
1766 | } | 1888 | } |
1889 | if (tlink) | ||
1890 | cifs_put_tlink(tlink); | ||
1767 | } | 1891 | } |
1768 | 1892 | ||
1769 | if (rc == 0) { | 1893 | if (rc == 0) { |
@@ -1784,7 +1908,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1784 | struct inode *inode = direntry->d_inode; | 1908 | struct inode *inode = direntry->d_inode; |
1785 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | 1909 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
1786 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 1910 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
1787 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 1911 | struct tcon_link *tlink; |
1912 | struct cifsTconInfo *pTcon; | ||
1788 | struct cifs_unix_set_info_args *args = NULL; | 1913 | struct cifs_unix_set_info_args *args = NULL; |
1789 | struct cifsFileInfo *open_file; | 1914 | struct cifsFileInfo *open_file; |
1790 | 1915 | ||
@@ -1871,17 +1996,25 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | |||
1871 | args->ctime = NO_CHANGE_64; | 1996 | args->ctime = NO_CHANGE_64; |
1872 | 1997 | ||
1873 | args->device = 0; | 1998 | args->device = 0; |
1874 | open_file = find_writable_file(cifsInode); | 1999 | open_file = find_writable_file(cifsInode, true); |
1875 | if (open_file) { | 2000 | if (open_file) { |
1876 | u16 nfid = open_file->netfid; | 2001 | u16 nfid = open_file->netfid; |
1877 | u32 npid = open_file->pid; | 2002 | u32 npid = open_file->pid; |
2003 | pTcon = tlink_tcon(open_file->tlink); | ||
1878 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); | 2004 | rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); |
1879 | cifsFileInfo_put(open_file); | 2005 | cifsFileInfo_put(open_file); |
1880 | } else { | 2006 | } else { |
2007 | tlink = cifs_sb_tlink(cifs_sb); | ||
2008 | if (IS_ERR(tlink)) { | ||
2009 | rc = PTR_ERR(tlink); | ||
2010 | goto out; | ||
2011 | } | ||
2012 | pTcon = tlink_tcon(tlink); | ||
1881 | rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, | 2013 | rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, |
1882 | cifs_sb->local_nls, | 2014 | cifs_sb->local_nls, |
1883 | cifs_sb->mnt_cifs_flags & | 2015 | cifs_sb->mnt_cifs_flags & |
1884 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 2016 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
2017 | cifs_put_tlink(tlink); | ||
1885 | } | 2018 | } |
1886 | 2019 | ||
1887 | if (rc) | 2020 | if (rc) |
@@ -2062,7 +2195,7 @@ cifs_setattr(struct dentry *direntry, struct iattr *attrs) | |||
2062 | { | 2195 | { |
2063 | struct inode *inode = direntry->d_inode; | 2196 | struct inode *inode = direntry->d_inode; |
2064 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 2197 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
2065 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | 2198 | struct cifsTconInfo *pTcon = cifs_sb_master_tcon(cifs_sb); |
2066 | 2199 | ||
2067 | if (pTcon->unix_ext) | 2200 | if (pTcon->unix_ext) |
2068 | return cifs_setattr_unix(direntry, attrs); | 2201 | return cifs_setattr_unix(direntry, attrs); |
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c index 9d38a71c8e14..077bf756f342 100644 --- a/fs/cifs/ioctl.c +++ b/fs/cifs/ioctl.c | |||
@@ -37,11 +37,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
37 | int xid; | 37 | int xid; |
38 | struct cifs_sb_info *cifs_sb; | 38 | struct cifs_sb_info *cifs_sb; |
39 | #ifdef CONFIG_CIFS_POSIX | 39 | #ifdef CONFIG_CIFS_POSIX |
40 | struct cifsFileInfo *pSMBFile = filep->private_data; | ||
41 | struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink); | ||
40 | __u64 ExtAttrBits = 0; | 42 | __u64 ExtAttrBits = 0; |
41 | __u64 ExtAttrMask = 0; | 43 | __u64 ExtAttrMask = 0; |
42 | __u64 caps; | 44 | __u64 caps = le64_to_cpu(tcon->fsUnixInfo.Capability); |
43 | struct cifsTconInfo *tcon; | ||
44 | struct cifsFileInfo *pSMBFile = filep->private_data; | ||
45 | #endif /* CONFIG_CIFS_POSIX */ | 45 | #endif /* CONFIG_CIFS_POSIX */ |
46 | 46 | ||
47 | xid = GetXid(); | 47 | xid = GetXid(); |
@@ -50,17 +50,6 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg) | |||
50 | 50 | ||
51 | cifs_sb = CIFS_SB(inode->i_sb); | 51 | cifs_sb = CIFS_SB(inode->i_sb); |
52 | 52 | ||
53 | #ifdef CONFIG_CIFS_POSIX | ||
54 | tcon = cifs_sb->tcon; | ||
55 | if (tcon) | ||
56 | caps = le64_to_cpu(tcon->fsUnixInfo.Capability); | ||
57 | else { | ||
58 | rc = -EIO; | ||
59 | FreeXid(xid); | ||
60 | return -EIO; | ||
61 | } | ||
62 | #endif /* CONFIG_CIFS_POSIX */ | ||
63 | |||
64 | switch (command) { | 53 | switch (command) { |
65 | case CIFS_IOC_CHECKUMOUNT: | 54 | case CIFS_IOC_CHECKUMOUNT: |
66 | cFYI(1, "User unmount attempted"); | 55 | cFYI(1, "User unmount attempted"); |
diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 473ca8033656..85cdbf831e7b 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c | |||
@@ -28,6 +28,296 @@ | |||
28 | #include "cifsproto.h" | 28 | #include "cifsproto.h" |
29 | #include "cifs_debug.h" | 29 | #include "cifs_debug.h" |
30 | #include "cifs_fs_sb.h" | 30 | #include "cifs_fs_sb.h" |
31 | #include "md5.h" | ||
32 | |||
33 | #define CIFS_MF_SYMLINK_LEN_OFFSET (4+1) | ||
34 | #define CIFS_MF_SYMLINK_MD5_OFFSET (CIFS_MF_SYMLINK_LEN_OFFSET+(4+1)) | ||
35 | #define CIFS_MF_SYMLINK_LINK_OFFSET (CIFS_MF_SYMLINK_MD5_OFFSET+(32+1)) | ||
36 | #define CIFS_MF_SYMLINK_LINK_MAXLEN (1024) | ||
37 | #define CIFS_MF_SYMLINK_FILE_SIZE \ | ||
38 | (CIFS_MF_SYMLINK_LINK_OFFSET + CIFS_MF_SYMLINK_LINK_MAXLEN) | ||
39 | |||
40 | #define CIFS_MF_SYMLINK_LEN_FORMAT "XSym\n%04u\n" | ||
41 | #define CIFS_MF_SYMLINK_MD5_FORMAT \ | ||
42 | "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x\n" | ||
43 | #define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) \ | ||
44 | md5_hash[0], md5_hash[1], md5_hash[2], md5_hash[3], \ | ||
45 | md5_hash[4], md5_hash[5], md5_hash[6], md5_hash[7], \ | ||
46 | md5_hash[8], md5_hash[9], md5_hash[10], md5_hash[11],\ | ||
47 | md5_hash[12], md5_hash[13], md5_hash[14], md5_hash[15] | ||
48 | |||
49 | static int | ||
50 | CIFSParseMFSymlink(const u8 *buf, | ||
51 | unsigned int buf_len, | ||
52 | unsigned int *_link_len, | ||
53 | char **_link_str) | ||
54 | { | ||
55 | int rc; | ||
56 | unsigned int link_len; | ||
57 | const char *md5_str1; | ||
58 | const char *link_str; | ||
59 | struct MD5Context md5_ctx; | ||
60 | u8 md5_hash[16]; | ||
61 | char md5_str2[34]; | ||
62 | |||
63 | if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) | ||
64 | return -EINVAL; | ||
65 | |||
66 | md5_str1 = (const char *)&buf[CIFS_MF_SYMLINK_MD5_OFFSET]; | ||
67 | link_str = (const char *)&buf[CIFS_MF_SYMLINK_LINK_OFFSET]; | ||
68 | |||
69 | rc = sscanf(buf, CIFS_MF_SYMLINK_LEN_FORMAT, &link_len); | ||
70 | if (rc != 1) | ||
71 | return -EINVAL; | ||
72 | |||
73 | cifs_MD5_init(&md5_ctx); | ||
74 | cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); | ||
75 | cifs_MD5_final(md5_hash, &md5_ctx); | ||
76 | |||
77 | snprintf(md5_str2, sizeof(md5_str2), | ||
78 | CIFS_MF_SYMLINK_MD5_FORMAT, | ||
79 | CIFS_MF_SYMLINK_MD5_ARGS(md5_hash)); | ||
80 | |||
81 | if (strncmp(md5_str1, md5_str2, 17) != 0) | ||
82 | return -EINVAL; | ||
83 | |||
84 | if (_link_str) { | ||
85 | *_link_str = kstrndup(link_str, link_len, GFP_KERNEL); | ||
86 | if (!*_link_str) | ||
87 | return -ENOMEM; | ||
88 | } | ||
89 | |||
90 | *_link_len = link_len; | ||
91 | return 0; | ||
92 | } | ||
93 | |||
94 | static int | ||
95 | CIFSFormatMFSymlink(u8 *buf, unsigned int buf_len, const char *link_str) | ||
96 | { | ||
97 | unsigned int link_len; | ||
98 | unsigned int ofs; | ||
99 | struct MD5Context md5_ctx; | ||
100 | u8 md5_hash[16]; | ||
101 | |||
102 | if (buf_len != CIFS_MF_SYMLINK_FILE_SIZE) | ||
103 | return -EINVAL; | ||
104 | |||
105 | link_len = strlen(link_str); | ||
106 | |||
107 | if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN) | ||
108 | return -ENAMETOOLONG; | ||
109 | |||
110 | cifs_MD5_init(&md5_ctx); | ||
111 | cifs_MD5_update(&md5_ctx, (const u8 *)link_str, link_len); | ||
112 | cifs_MD5_final(md5_hash, &md5_ctx); | ||
113 | |||
114 | snprintf(buf, buf_len, | ||
115 | CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT, | ||
116 | link_len, | ||
117 | CIFS_MF_SYMLINK_MD5_ARGS(md5_hash)); | ||
118 | |||
119 | ofs = CIFS_MF_SYMLINK_LINK_OFFSET; | ||
120 | memcpy(buf + ofs, link_str, link_len); | ||
121 | |||
122 | ofs += link_len; | ||
123 | if (ofs < CIFS_MF_SYMLINK_FILE_SIZE) { | ||
124 | buf[ofs] = '\n'; | ||
125 | ofs++; | ||
126 | } | ||
127 | |||
128 | while (ofs < CIFS_MF_SYMLINK_FILE_SIZE) { | ||
129 | buf[ofs] = ' '; | ||
130 | ofs++; | ||
131 | } | ||
132 | |||
133 | return 0; | ||
134 | } | ||
135 | |||
136 | static int | ||
137 | CIFSCreateMFSymLink(const int xid, struct cifsTconInfo *tcon, | ||
138 | const char *fromName, const char *toName, | ||
139 | const struct nls_table *nls_codepage, int remap) | ||
140 | { | ||
141 | int rc; | ||
142 | int oplock = 0; | ||
143 | __u16 netfid = 0; | ||
144 | u8 *buf; | ||
145 | unsigned int bytes_written = 0; | ||
146 | |||
147 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | ||
148 | if (!buf) | ||
149 | return -ENOMEM; | ||
150 | |||
151 | rc = CIFSFormatMFSymlink(buf, CIFS_MF_SYMLINK_FILE_SIZE, toName); | ||
152 | if (rc != 0) { | ||
153 | kfree(buf); | ||
154 | return rc; | ||
155 | } | ||
156 | |||
157 | rc = CIFSSMBOpen(xid, tcon, fromName, FILE_CREATE, GENERIC_WRITE, | ||
158 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | ||
159 | nls_codepage, remap); | ||
160 | if (rc != 0) { | ||
161 | kfree(buf); | ||
162 | return rc; | ||
163 | } | ||
164 | |||
165 | rc = CIFSSMBWrite(xid, tcon, netfid, | ||
166 | CIFS_MF_SYMLINK_FILE_SIZE /* length */, | ||
167 | 0 /* offset */, | ||
168 | &bytes_written, buf, NULL, 0); | ||
169 | CIFSSMBClose(xid, tcon, netfid); | ||
170 | kfree(buf); | ||
171 | if (rc != 0) | ||
172 | return rc; | ||
173 | |||
174 | if (bytes_written != CIFS_MF_SYMLINK_FILE_SIZE) | ||
175 | return -EIO; | ||
176 | |||
177 | return 0; | ||
178 | } | ||
179 | |||
180 | static int | ||
181 | CIFSQueryMFSymLink(const int xid, struct cifsTconInfo *tcon, | ||
182 | const unsigned char *searchName, char **symlinkinfo, | ||
183 | const struct nls_table *nls_codepage, int remap) | ||
184 | { | ||
185 | int rc; | ||
186 | int oplock = 0; | ||
187 | __u16 netfid = 0; | ||
188 | u8 *buf; | ||
189 | char *pbuf; | ||
190 | unsigned int bytes_read = 0; | ||
191 | int buf_type = CIFS_NO_BUFFER; | ||
192 | unsigned int link_len = 0; | ||
193 | FILE_ALL_INFO file_info; | ||
194 | |||
195 | rc = CIFSSMBOpen(xid, tcon, searchName, FILE_OPEN, GENERIC_READ, | ||
196 | CREATE_NOT_DIR, &netfid, &oplock, &file_info, | ||
197 | nls_codepage, remap); | ||
198 | if (rc != 0) | ||
199 | return rc; | ||
200 | |||
201 | if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { | ||
202 | CIFSSMBClose(xid, tcon, netfid); | ||
203 | /* it's not a symlink */ | ||
204 | return -EINVAL; | ||
205 | } | ||
206 | |||
207 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | ||
208 | if (!buf) | ||
209 | return -ENOMEM; | ||
210 | pbuf = buf; | ||
211 | |||
212 | rc = CIFSSMBRead(xid, tcon, netfid, | ||
213 | CIFS_MF_SYMLINK_FILE_SIZE /* length */, | ||
214 | 0 /* offset */, | ||
215 | &bytes_read, &pbuf, &buf_type); | ||
216 | CIFSSMBClose(xid, tcon, netfid); | ||
217 | if (rc != 0) { | ||
218 | kfree(buf); | ||
219 | return rc; | ||
220 | } | ||
221 | |||
222 | rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, symlinkinfo); | ||
223 | kfree(buf); | ||
224 | if (rc != 0) | ||
225 | return rc; | ||
226 | |||
227 | return 0; | ||
228 | } | ||
229 | |||
230 | bool | ||
231 | CIFSCouldBeMFSymlink(const struct cifs_fattr *fattr) | ||
232 | { | ||
233 | if (!(fattr->cf_mode & S_IFREG)) | ||
234 | /* it's not a symlink */ | ||
235 | return false; | ||
236 | |||
237 | if (fattr->cf_eof != CIFS_MF_SYMLINK_FILE_SIZE) | ||
238 | /* it's not a symlink */ | ||
239 | return false; | ||
240 | |||
241 | return true; | ||
242 | } | ||
243 | |||
244 | int | ||
245 | CIFSCheckMFSymlink(struct cifs_fattr *fattr, | ||
246 | const unsigned char *path, | ||
247 | struct cifs_sb_info *cifs_sb, int xid) | ||
248 | { | ||
249 | int rc; | ||
250 | int oplock = 0; | ||
251 | __u16 netfid = 0; | ||
252 | struct tcon_link *tlink; | ||
253 | struct cifsTconInfo *pTcon; | ||
254 | u8 *buf; | ||
255 | char *pbuf; | ||
256 | unsigned int bytes_read = 0; | ||
257 | int buf_type = CIFS_NO_BUFFER; | ||
258 | unsigned int link_len = 0; | ||
259 | FILE_ALL_INFO file_info; | ||
260 | |||
261 | if (!CIFSCouldBeMFSymlink(fattr)) | ||
262 | /* it's not a symlink */ | ||
263 | return 0; | ||
264 | |||
265 | tlink = cifs_sb_tlink(cifs_sb); | ||
266 | if (IS_ERR(tlink)) | ||
267 | return PTR_ERR(tlink); | ||
268 | pTcon = tlink_tcon(tlink); | ||
269 | |||
270 | rc = CIFSSMBOpen(xid, pTcon, path, FILE_OPEN, GENERIC_READ, | ||
271 | CREATE_NOT_DIR, &netfid, &oplock, &file_info, | ||
272 | cifs_sb->local_nls, | ||
273 | cifs_sb->mnt_cifs_flags & | ||
274 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
275 | if (rc != 0) | ||
276 | goto out; | ||
277 | |||
278 | if (file_info.EndOfFile != CIFS_MF_SYMLINK_FILE_SIZE) { | ||
279 | CIFSSMBClose(xid, pTcon, netfid); | ||
280 | /* it's not a symlink */ | ||
281 | goto out; | ||
282 | } | ||
283 | |||
284 | buf = kmalloc(CIFS_MF_SYMLINK_FILE_SIZE, GFP_KERNEL); | ||
285 | if (!buf) { | ||
286 | rc = -ENOMEM; | ||
287 | goto out; | ||
288 | } | ||
289 | pbuf = buf; | ||
290 | |||
291 | rc = CIFSSMBRead(xid, pTcon, netfid, | ||
292 | CIFS_MF_SYMLINK_FILE_SIZE /* length */, | ||
293 | 0 /* offset */, | ||
294 | &bytes_read, &pbuf, &buf_type); | ||
295 | CIFSSMBClose(xid, pTcon, netfid); | ||
296 | if (rc != 0) { | ||
297 | kfree(buf); | ||
298 | goto out; | ||
299 | } | ||
300 | |||
301 | rc = CIFSParseMFSymlink(buf, bytes_read, &link_len, NULL); | ||
302 | kfree(buf); | ||
303 | if (rc == -EINVAL) { | ||
304 | /* it's not a symlink */ | ||
305 | rc = 0; | ||
306 | goto out; | ||
307 | } | ||
308 | |||
309 | if (rc != 0) | ||
310 | goto out; | ||
311 | |||
312 | /* it is a symlink */ | ||
313 | fattr->cf_eof = link_len; | ||
314 | fattr->cf_mode &= ~S_IFMT; | ||
315 | fattr->cf_mode |= S_IFLNK | S_IRWXU | S_IRWXG | S_IRWXO; | ||
316 | fattr->cf_dtype = DT_LNK; | ||
317 | out: | ||
318 | cifs_put_tlink(tlink); | ||
319 | return rc; | ||
320 | } | ||
31 | 321 | ||
32 | int | 322 | int |
33 | cifs_hardlink(struct dentry *old_file, struct inode *inode, | 323 | cifs_hardlink(struct dentry *old_file, struct inode *inode, |
@@ -37,17 +327,17 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, | |||
37 | int xid; | 327 | int xid; |
38 | char *fromName = NULL; | 328 | char *fromName = NULL; |
39 | char *toName = NULL; | 329 | char *toName = NULL; |
40 | struct cifs_sb_info *cifs_sb_target; | 330 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
331 | struct tcon_link *tlink; | ||
41 | struct cifsTconInfo *pTcon; | 332 | struct cifsTconInfo *pTcon; |
42 | struct cifsInodeInfo *cifsInode; | 333 | struct cifsInodeInfo *cifsInode; |
43 | 334 | ||
44 | xid = GetXid(); | 335 | tlink = cifs_sb_tlink(cifs_sb); |
45 | 336 | if (IS_ERR(tlink)) | |
46 | cifs_sb_target = CIFS_SB(inode->i_sb); | 337 | return PTR_ERR(tlink); |
47 | pTcon = cifs_sb_target->tcon; | 338 | pTcon = tlink_tcon(tlink); |
48 | 339 | ||
49 | /* No need to check for cross device links since server will do that | 340 | xid = GetXid(); |
50 | BB note DFS case in future though (when we may have to check) */ | ||
51 | 341 | ||
52 | fromName = build_path_from_dentry(old_file); | 342 | fromName = build_path_from_dentry(old_file); |
53 | toName = build_path_from_dentry(direntry); | 343 | toName = build_path_from_dentry(direntry); |
@@ -56,16 +346,15 @@ cifs_hardlink(struct dentry *old_file, struct inode *inode, | |||
56 | goto cifs_hl_exit; | 346 | goto cifs_hl_exit; |
57 | } | 347 | } |
58 | 348 | ||
59 | /* if (cifs_sb_target->tcon->ses->capabilities & CAP_UNIX)*/ | ||
60 | if (pTcon->unix_ext) | 349 | if (pTcon->unix_ext) |
61 | rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, | 350 | rc = CIFSUnixCreateHardLink(xid, pTcon, fromName, toName, |
62 | cifs_sb_target->local_nls, | 351 | cifs_sb->local_nls, |
63 | cifs_sb_target->mnt_cifs_flags & | 352 | cifs_sb->mnt_cifs_flags & |
64 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 353 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
65 | else { | 354 | else { |
66 | rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, | 355 | rc = CIFSCreateHardLink(xid, pTcon, fromName, toName, |
67 | cifs_sb_target->local_nls, | 356 | cifs_sb->local_nls, |
68 | cifs_sb_target->mnt_cifs_flags & | 357 | cifs_sb->mnt_cifs_flags & |
69 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 358 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
70 | if ((rc == -EIO) || (rc == -EINVAL)) | 359 | if ((rc == -EIO) || (rc == -EINVAL)) |
71 | rc = -EOPNOTSUPP; | 360 | rc = -EOPNOTSUPP; |
@@ -101,6 +390,7 @@ cifs_hl_exit: | |||
101 | kfree(fromName); | 390 | kfree(fromName); |
102 | kfree(toName); | 391 | kfree(toName); |
103 | FreeXid(xid); | 392 | FreeXid(xid); |
393 | cifs_put_tlink(tlink); | ||
104 | return rc; | 394 | return rc; |
105 | } | 395 | } |
106 | 396 | ||
@@ -113,10 +403,19 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) | |||
113 | char *full_path = NULL; | 403 | char *full_path = NULL; |
114 | char *target_path = NULL; | 404 | char *target_path = NULL; |
115 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | 405 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
116 | struct cifsTconInfo *tcon = cifs_sb->tcon; | 406 | struct tcon_link *tlink = NULL; |
407 | struct cifsTconInfo *tcon; | ||
117 | 408 | ||
118 | xid = GetXid(); | 409 | xid = GetXid(); |
119 | 410 | ||
411 | tlink = cifs_sb_tlink(cifs_sb); | ||
412 | if (IS_ERR(tlink)) { | ||
413 | rc = PTR_ERR(tlink); | ||
414 | tlink = NULL; | ||
415 | goto out; | ||
416 | } | ||
417 | tcon = tlink_tcon(tlink); | ||
418 | |||
120 | /* | 419 | /* |
121 | * For now, we just handle symlinks with unix extensions enabled. | 420 | * For now, we just handle symlinks with unix extensions enabled. |
122 | * Eventually we should handle NTFS reparse points, and MacOS | 421 | * Eventually we should handle NTFS reparse points, and MacOS |
@@ -130,7 +429,8 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) | |||
130 | * but there doesn't seem to be any harm in allowing the client to | 429 | * but there doesn't seem to be any harm in allowing the client to |
131 | * read them. | 430 | * read them. |
132 | */ | 431 | */ |
133 | if (!(tcon->ses->capabilities & CAP_UNIX)) { | 432 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) |
433 | && !(tcon->ses->capabilities & CAP_UNIX)) { | ||
134 | rc = -EACCES; | 434 | rc = -EACCES; |
135 | goto out; | 435 | goto out; |
136 | } | 436 | } |
@@ -141,8 +441,21 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) | |||
141 | 441 | ||
142 | cFYI(1, "Full path: %s inode = 0x%p", full_path, inode); | 442 | cFYI(1, "Full path: %s inode = 0x%p", full_path, inode); |
143 | 443 | ||
144 | rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, | 444 | rc = -EACCES; |
145 | cifs_sb->local_nls); | 445 | /* |
446 | * First try Minshall+French Symlinks, if configured | ||
447 | * and fallback to UNIX Extensions Symlinks. | ||
448 | */ | ||
449 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) | ||
450 | rc = CIFSQueryMFSymLink(xid, tcon, full_path, &target_path, | ||
451 | cifs_sb->local_nls, | ||
452 | cifs_sb->mnt_cifs_flags & | ||
453 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
454 | |||
455 | if ((rc != 0) && (tcon->ses->capabilities & CAP_UNIX)) | ||
456 | rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path, | ||
457 | cifs_sb->local_nls); | ||
458 | |||
146 | kfree(full_path); | 459 | kfree(full_path); |
147 | out: | 460 | out: |
148 | if (rc != 0) { | 461 | if (rc != 0) { |
@@ -151,6 +464,8 @@ out: | |||
151 | } | 464 | } |
152 | 465 | ||
153 | FreeXid(xid); | 466 | FreeXid(xid); |
467 | if (tlink) | ||
468 | cifs_put_tlink(tlink); | ||
154 | nd_set_link(nd, target_path); | 469 | nd_set_link(nd, target_path); |
155 | return NULL; | 470 | return NULL; |
156 | } | 471 | } |
@@ -160,29 +475,37 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) | |||
160 | { | 475 | { |
161 | int rc = -EOPNOTSUPP; | 476 | int rc = -EOPNOTSUPP; |
162 | int xid; | 477 | int xid; |
163 | struct cifs_sb_info *cifs_sb; | 478 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); |
479 | struct tcon_link *tlink; | ||
164 | struct cifsTconInfo *pTcon; | 480 | struct cifsTconInfo *pTcon; |
165 | char *full_path = NULL; | 481 | char *full_path = NULL; |
166 | struct inode *newinode = NULL; | 482 | struct inode *newinode = NULL; |
167 | 483 | ||
168 | xid = GetXid(); | 484 | xid = GetXid(); |
169 | 485 | ||
170 | cifs_sb = CIFS_SB(inode->i_sb); | 486 | tlink = cifs_sb_tlink(cifs_sb); |
171 | pTcon = cifs_sb->tcon; | 487 | if (IS_ERR(tlink)) { |
488 | rc = PTR_ERR(tlink); | ||
489 | goto symlink_exit; | ||
490 | } | ||
491 | pTcon = tlink_tcon(tlink); | ||
172 | 492 | ||
173 | full_path = build_path_from_dentry(direntry); | 493 | full_path = build_path_from_dentry(direntry); |
174 | |||
175 | if (full_path == NULL) { | 494 | if (full_path == NULL) { |
176 | rc = -ENOMEM; | 495 | rc = -ENOMEM; |
177 | FreeXid(xid); | 496 | goto symlink_exit; |
178 | return rc; | ||
179 | } | 497 | } |
180 | 498 | ||
181 | cFYI(1, "Full path: %s", full_path); | 499 | cFYI(1, "Full path: %s", full_path); |
182 | cFYI(1, "symname is %s", symname); | 500 | cFYI(1, "symname is %s", symname); |
183 | 501 | ||
184 | /* BB what if DFS and this volume is on different share? BB */ | 502 | /* BB what if DFS and this volume is on different share? BB */ |
185 | if (pTcon->unix_ext) | 503 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) |
504 | rc = CIFSCreateMFSymLink(xid, pTcon, full_path, symname, | ||
505 | cifs_sb->local_nls, | ||
506 | cifs_sb->mnt_cifs_flags & | ||
507 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
508 | else if (pTcon->unix_ext) | ||
186 | rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, | 509 | rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, |
187 | cifs_sb->local_nls); | 510 | cifs_sb->local_nls); |
188 | /* else | 511 | /* else |
@@ -208,8 +531,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) | |||
208 | d_instantiate(direntry, newinode); | 531 | d_instantiate(direntry, newinode); |
209 | } | 532 | } |
210 | } | 533 | } |
211 | 534 | symlink_exit: | |
212 | kfree(full_path); | 535 | kfree(full_path); |
536 | cifs_put_tlink(tlink); | ||
213 | FreeXid(xid); | 537 | FreeXid(xid); |
214 | return rc; | 538 | return rc; |
215 | } | 539 | } |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 3ccadc1326d6..1c681f6a6803 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
@@ -347,7 +347,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
347 | if (current_fsuid() != treeCon->ses->linux_uid) { | 347 | if (current_fsuid() != treeCon->ses->linux_uid) { |
348 | cFYI(1, "Multiuser mode and UID " | 348 | cFYI(1, "Multiuser mode and UID " |
349 | "did not match tcon uid"); | 349 | "did not match tcon uid"); |
350 | read_lock(&cifs_tcp_ses_lock); | 350 | spin_lock(&cifs_tcp_ses_lock); |
351 | list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { | 351 | list_for_each(temp_item, &treeCon->ses->server->smb_ses_list) { |
352 | ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); | 352 | ses = list_entry(temp_item, struct cifsSesInfo, smb_ses_list); |
353 | if (ses->linux_uid == current_fsuid()) { | 353 | if (ses->linux_uid == current_fsuid()) { |
@@ -361,7 +361,7 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
361 | } | 361 | } |
362 | } | 362 | } |
363 | } | 363 | } |
364 | read_unlock(&cifs_tcp_ses_lock); | 364 | spin_unlock(&cifs_tcp_ses_lock); |
365 | } | 365 | } |
366 | } | 366 | } |
367 | } | 367 | } |
@@ -551,7 +551,7 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
551 | return false; | 551 | return false; |
552 | 552 | ||
553 | /* look up tcon based on tid & uid */ | 553 | /* look up tcon based on tid & uid */ |
554 | read_lock(&cifs_tcp_ses_lock); | 554 | spin_lock(&cifs_tcp_ses_lock); |
555 | list_for_each(tmp, &srv->smb_ses_list) { | 555 | list_for_each(tmp, &srv->smb_ses_list) { |
556 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); | 556 | ses = list_entry(tmp, struct cifsSesInfo, smb_ses_list); |
557 | list_for_each(tmp1, &ses->tcon_list) { | 557 | list_for_each(tmp1, &ses->tcon_list) { |
@@ -560,25 +560,15 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
560 | continue; | 560 | continue; |
561 | 561 | ||
562 | cifs_stats_inc(&tcon->num_oplock_brks); | 562 | cifs_stats_inc(&tcon->num_oplock_brks); |
563 | read_lock(&GlobalSMBSeslock); | 563 | spin_lock(&cifs_file_list_lock); |
564 | list_for_each(tmp2, &tcon->openFileList) { | 564 | list_for_each(tmp2, &tcon->openFileList) { |
565 | netfile = list_entry(tmp2, struct cifsFileInfo, | 565 | netfile = list_entry(tmp2, struct cifsFileInfo, |
566 | tlist); | 566 | tlist); |
567 | if (pSMB->Fid != netfile->netfid) | 567 | if (pSMB->Fid != netfile->netfid) |
568 | continue; | 568 | continue; |
569 | 569 | ||
570 | /* | ||
571 | * don't do anything if file is about to be | ||
572 | * closed anyway. | ||
573 | */ | ||
574 | if (netfile->closePend) { | ||
575 | read_unlock(&GlobalSMBSeslock); | ||
576 | read_unlock(&cifs_tcp_ses_lock); | ||
577 | return true; | ||
578 | } | ||
579 | |||
580 | cFYI(1, "file id match, oplock break"); | 570 | cFYI(1, "file id match, oplock break"); |
581 | pCifsInode = CIFS_I(netfile->pInode); | 571 | pCifsInode = CIFS_I(netfile->dentry->d_inode); |
582 | pCifsInode->clientCanCacheAll = false; | 572 | pCifsInode->clientCanCacheAll = false; |
583 | if (pSMB->OplockLevel == 0) | 573 | if (pSMB->OplockLevel == 0) |
584 | pCifsInode->clientCanCacheRead = false; | 574 | pCifsInode->clientCanCacheRead = false; |
@@ -594,17 +584,17 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv) | |||
594 | cifs_oplock_break_get(netfile); | 584 | cifs_oplock_break_get(netfile); |
595 | netfile->oplock_break_cancelled = false; | 585 | netfile->oplock_break_cancelled = false; |
596 | 586 | ||
597 | read_unlock(&GlobalSMBSeslock); | 587 | spin_unlock(&cifs_file_list_lock); |
598 | read_unlock(&cifs_tcp_ses_lock); | 588 | spin_unlock(&cifs_tcp_ses_lock); |
599 | return true; | 589 | return true; |
600 | } | 590 | } |
601 | read_unlock(&GlobalSMBSeslock); | 591 | spin_unlock(&cifs_file_list_lock); |
602 | read_unlock(&cifs_tcp_ses_lock); | 592 | spin_unlock(&cifs_tcp_ses_lock); |
603 | cFYI(1, "No matching file for oplock break"); | 593 | cFYI(1, "No matching file for oplock break"); |
604 | return true; | 594 | return true; |
605 | } | 595 | } |
606 | } | 596 | } |
607 | read_unlock(&cifs_tcp_ses_lock); | 597 | spin_unlock(&cifs_tcp_ses_lock); |
608 | cFYI(1, "Can not process oplock break for non-existent connection"); | 598 | cFYI(1, "Can not process oplock break for non-existent connection"); |
609 | return true; | 599 | return true; |
610 | } | 600 | } |
@@ -729,6 +719,6 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb) | |||
729 | "properly. Hardlinks will not be recognized on this " | 719 | "properly. Hardlinks will not be recognized on this " |
730 | "mount. Consider mounting with the \"noserverino\" " | 720 | "mount. Consider mounting with the \"noserverino\" " |
731 | "option to silence this message.", | 721 | "option to silence this message.", |
732 | cifs_sb->tcon->treeName); | 722 | cifs_sb_master_tcon(cifs_sb)->treeName); |
733 | } | 723 | } |
734 | } | 724 | } |
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h index 49c9a4e75319..5d52e4a3b1ed 100644 --- a/fs/cifs/ntlmssp.h +++ b/fs/cifs/ntlmssp.h | |||
@@ -61,6 +61,21 @@ | |||
61 | #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 | 61 | #define NTLMSSP_NEGOTIATE_KEY_XCH 0x40000000 |
62 | #define NTLMSSP_NEGOTIATE_56 0x80000000 | 62 | #define NTLMSSP_NEGOTIATE_56 0x80000000 |
63 | 63 | ||
64 | /* Define AV Pair Field IDs */ | ||
65 | enum av_field_type { | ||
66 | NTLMSSP_AV_EOL = 0, | ||
67 | NTLMSSP_AV_NB_COMPUTER_NAME, | ||
68 | NTLMSSP_AV_NB_DOMAIN_NAME, | ||
69 | NTLMSSP_AV_DNS_COMPUTER_NAME, | ||
70 | NTLMSSP_AV_DNS_DOMAIN_NAME, | ||
71 | NTLMSSP_AV_DNS_TREE_NAME, | ||
72 | NTLMSSP_AV_FLAGS, | ||
73 | NTLMSSP_AV_TIMESTAMP, | ||
74 | NTLMSSP_AV_RESTRICTION, | ||
75 | NTLMSSP_AV_TARGET_NAME, | ||
76 | NTLMSSP_AV_CHANNEL_BINDINGS | ||
77 | }; | ||
78 | |||
64 | /* Although typedefs are not commonly used for structure definitions */ | 79 | /* Although typedefs are not commonly used for structure definitions */ |
65 | /* in the Linux kernel, in this particular case they are useful */ | 80 | /* in the Linux kernel, in this particular case they are useful */ |
66 | /* to more closely match the standards document for NTLMSSP from */ | 81 | /* to more closely match the standards document for NTLMSSP from */ |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index d5e591fab475..ef7bb7b50f58 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
@@ -102,7 +102,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, | |||
102 | return NULL; | 102 | return NULL; |
103 | } | 103 | } |
104 | 104 | ||
105 | if (CIFS_SB(sb)->tcon->nocase) | 105 | if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) |
106 | dentry->d_op = &cifs_ci_dentry_ops; | 106 | dentry->d_op = &cifs_ci_dentry_ops; |
107 | else | 107 | else |
108 | dentry->d_op = &cifs_dentry_ops; | 108 | dentry->d_op = &cifs_dentry_ops; |
@@ -171,7 +171,7 @@ static void | |||
171 | cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, | 171 | cifs_std_info_to_fattr(struct cifs_fattr *fattr, FIND_FILE_STANDARD_INFO *info, |
172 | struct cifs_sb_info *cifs_sb) | 172 | struct cifs_sb_info *cifs_sb) |
173 | { | 173 | { |
174 | int offset = cifs_sb->tcon->ses->server->timeAdj; | 174 | int offset = cifs_sb_master_tcon(cifs_sb)->ses->server->timeAdj; |
175 | 175 | ||
176 | memset(fattr, 0, sizeof(*fattr)); | 176 | memset(fattr, 0, sizeof(*fattr)); |
177 | fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate, | 177 | fattr->cf_atime = cnvrtDosUnixTm(info->LastAccessDate, |
@@ -199,7 +199,7 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, | |||
199 | int len; | 199 | int len; |
200 | int oplock = 0; | 200 | int oplock = 0; |
201 | int rc; | 201 | int rc; |
202 | struct cifsTconInfo *ptcon = cifs_sb->tcon; | 202 | struct cifsTconInfo *ptcon = cifs_sb_tcon(cifs_sb); |
203 | char *tmpbuffer; | 203 | char *tmpbuffer; |
204 | 204 | ||
205 | rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, | 205 | rc = CIFSSMBOpen(xid, ptcon, full_path, FILE_OPEN, GENERIC_READ, |
@@ -223,34 +223,35 @@ int get_symlink_reparse_path(char *full_path, struct cifs_sb_info *cifs_sb, | |||
223 | static int initiate_cifs_search(const int xid, struct file *file) | 223 | static int initiate_cifs_search(const int xid, struct file *file) |
224 | { | 224 | { |
225 | int rc = 0; | 225 | int rc = 0; |
226 | char *full_path; | 226 | char *full_path = NULL; |
227 | struct cifsFileInfo *cifsFile; | 227 | struct cifsFileInfo *cifsFile; |
228 | struct cifs_sb_info *cifs_sb; | 228 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
229 | struct tcon_link *tlink; | ||
229 | struct cifsTconInfo *pTcon; | 230 | struct cifsTconInfo *pTcon; |
230 | 231 | ||
231 | if (file->private_data == NULL) { | 232 | tlink = cifs_sb_tlink(cifs_sb); |
233 | if (IS_ERR(tlink)) | ||
234 | return PTR_ERR(tlink); | ||
235 | pTcon = tlink_tcon(tlink); | ||
236 | |||
237 | if (file->private_data == NULL) | ||
232 | file->private_data = | 238 | file->private_data = |
233 | kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); | 239 | kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL); |
240 | if (file->private_data == NULL) { | ||
241 | rc = -ENOMEM; | ||
242 | goto error_exit; | ||
234 | } | 243 | } |
235 | 244 | ||
236 | if (file->private_data == NULL) | ||
237 | return -ENOMEM; | ||
238 | cifsFile = file->private_data; | 245 | cifsFile = file->private_data; |
239 | cifsFile->invalidHandle = true; | 246 | cifsFile->invalidHandle = true; |
240 | cifsFile->srch_inf.endOfSearch = false; | 247 | cifsFile->srch_inf.endOfSearch = false; |
241 | 248 | cifsFile->tlink = cifs_get_tlink(tlink); | |
242 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | ||
243 | if (cifs_sb == NULL) | ||
244 | return -EINVAL; | ||
245 | |||
246 | pTcon = cifs_sb->tcon; | ||
247 | if (pTcon == NULL) | ||
248 | return -EINVAL; | ||
249 | 249 | ||
250 | full_path = build_path_from_dentry(file->f_path.dentry); | 250 | full_path = build_path_from_dentry(file->f_path.dentry); |
251 | 251 | if (full_path == NULL) { | |
252 | if (full_path == NULL) | 252 | rc = -ENOMEM; |
253 | return -ENOMEM; | 253 | goto error_exit; |
254 | } | ||
254 | 255 | ||
255 | cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos); | 256 | cFYI(1, "Full path: %s start at: %lld", full_path, file->f_pos); |
256 | 257 | ||
@@ -283,7 +284,9 @@ ffirst_retry: | |||
283 | cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; | 284 | cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM; |
284 | goto ffirst_retry; | 285 | goto ffirst_retry; |
285 | } | 286 | } |
287 | error_exit: | ||
286 | kfree(full_path); | 288 | kfree(full_path); |
289 | cifs_put_tlink(tlink); | ||
287 | return rc; | 290 | return rc; |
288 | } | 291 | } |
289 | 292 | ||
@@ -525,14 +528,14 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
525 | (index_to_find < first_entry_in_buffer)) { | 528 | (index_to_find < first_entry_in_buffer)) { |
526 | /* close and restart search */ | 529 | /* close and restart search */ |
527 | cFYI(1, "search backing up - close and restart search"); | 530 | cFYI(1, "search backing up - close and restart search"); |
528 | write_lock(&GlobalSMBSeslock); | 531 | spin_lock(&cifs_file_list_lock); |
529 | if (!cifsFile->srch_inf.endOfSearch && | 532 | if (!cifsFile->srch_inf.endOfSearch && |
530 | !cifsFile->invalidHandle) { | 533 | !cifsFile->invalidHandle) { |
531 | cifsFile->invalidHandle = true; | 534 | cifsFile->invalidHandle = true; |
532 | write_unlock(&GlobalSMBSeslock); | 535 | spin_unlock(&cifs_file_list_lock); |
533 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 536 | CIFSFindClose(xid, pTcon, cifsFile->netfid); |
534 | } else | 537 | } else |
535 | write_unlock(&GlobalSMBSeslock); | 538 | spin_unlock(&cifs_file_list_lock); |
536 | if (cifsFile->srch_inf.ntwrk_buf_start) { | 539 | if (cifsFile->srch_inf.ntwrk_buf_start) { |
537 | cFYI(1, "freeing SMB ff cache buf on search rewind"); | 540 | cFYI(1, "freeing SMB ff cache buf on search rewind"); |
538 | if (cifsFile->srch_inf.smallBuf) | 541 | if (cifsFile->srch_inf.smallBuf) |
@@ -738,6 +741,15 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, | |||
738 | cifs_autodisable_serverino(cifs_sb); | 741 | cifs_autodisable_serverino(cifs_sb); |
739 | } | 742 | } |
740 | 743 | ||
744 | if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) && | ||
745 | CIFSCouldBeMFSymlink(&fattr)) | ||
746 | /* | ||
747 | * trying to get the type and mode can be slow, | ||
748 | * so just call those regular files for now, and mark | ||
749 | * for reval | ||
750 | */ | ||
751 | fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; | ||
752 | |||
741 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | 753 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); |
742 | tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); | 754 | tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); |
743 | 755 | ||
@@ -777,9 +789,17 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
777 | xid = GetXid(); | 789 | xid = GetXid(); |
778 | 790 | ||
779 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 791 | cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
780 | pTcon = cifs_sb->tcon; | 792 | |
781 | if (pTcon == NULL) | 793 | /* |
782 | return -EINVAL; | 794 | * Ensure FindFirst doesn't fail before doing filldir() for '.' and |
795 | * '..'. Otherwise we won't be able to notify VFS in case of failure. | ||
796 | */ | ||
797 | if (file->private_data == NULL) { | ||
798 | rc = initiate_cifs_search(xid, file); | ||
799 | cFYI(1, "initiate cifs search rc %d", rc); | ||
800 | if (rc) | ||
801 | goto rddir2_exit; | ||
802 | } | ||
783 | 803 | ||
784 | switch ((int) file->f_pos) { | 804 | switch ((int) file->f_pos) { |
785 | case 0: | 805 | case 0: |
@@ -805,14 +825,6 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
805 | if after then keep searching till find it */ | 825 | if after then keep searching till find it */ |
806 | 826 | ||
807 | if (file->private_data == NULL) { | 827 | if (file->private_data == NULL) { |
808 | rc = initiate_cifs_search(xid, file); | ||
809 | cFYI(1, "initiate cifs search rc %d", rc); | ||
810 | if (rc) { | ||
811 | FreeXid(xid); | ||
812 | return rc; | ||
813 | } | ||
814 | } | ||
815 | if (file->private_data == NULL) { | ||
816 | rc = -EINVAL; | 828 | rc = -EINVAL; |
817 | FreeXid(xid); | 829 | FreeXid(xid); |
818 | return rc; | 830 | return rc; |
@@ -829,6 +841,7 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
829 | CIFSFindClose(xid, pTcon, cifsFile->netfid); | 841 | CIFSFindClose(xid, pTcon, cifsFile->netfid); |
830 | } */ | 842 | } */ |
831 | 843 | ||
844 | pTcon = tlink_tcon(cifsFile->tlink); | ||
832 | rc = find_cifs_entry(xid, pTcon, file, | 845 | rc = find_cifs_entry(xid, pTcon, file, |
833 | ¤t_entry, &num_to_fill); | 846 | ¤t_entry, &num_to_fill); |
834 | if (rc) { | 847 | if (rc) { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 0a57cb7db5dd..2a11efd96592 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -80,7 +80,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses) | |||
80 | if (max_vcs < 2) | 80 | if (max_vcs < 2) |
81 | max_vcs = 0xFFFF; | 81 | max_vcs = 0xFFFF; |
82 | 82 | ||
83 | write_lock(&cifs_tcp_ses_lock); | 83 | spin_lock(&cifs_tcp_ses_lock); |
84 | if ((ses->need_reconnect) && is_first_ses_reconnect(ses)) | 84 | if ((ses->need_reconnect) && is_first_ses_reconnect(ses)) |
85 | goto get_vc_num_exit; /* vcnum will be zero */ | 85 | goto get_vc_num_exit; /* vcnum will be zero */ |
86 | for (i = ses->server->srv_count - 1; i < max_vcs; i++) { | 86 | for (i = ses->server->srv_count - 1; i < max_vcs; i++) { |
@@ -112,7 +112,7 @@ static __le16 get_next_vcnum(struct cifsSesInfo *ses) | |||
112 | vcnum = i; | 112 | vcnum = i; |
113 | ses->vcnum = vcnum; | 113 | ses->vcnum = vcnum; |
114 | get_vc_num_exit: | 114 | get_vc_num_exit: |
115 | write_unlock(&cifs_tcp_ses_lock); | 115 | spin_unlock(&cifs_tcp_ses_lock); |
116 | 116 | ||
117 | return cpu_to_le16(vcnum); | 117 | return cpu_to_le16(vcnum); |
118 | } | 118 | } |
@@ -383,6 +383,9 @@ static int decode_ascii_ssetup(char **pbcc_area, int bleft, | |||
383 | static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | 383 | static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, |
384 | struct cifsSesInfo *ses) | 384 | struct cifsSesInfo *ses) |
385 | { | 385 | { |
386 | unsigned int tioffset; /* challenge message target info area */ | ||
387 | unsigned int tilen; /* challenge message target info area length */ | ||
388 | |||
386 | CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; | 389 | CHALLENGE_MESSAGE *pblob = (CHALLENGE_MESSAGE *)bcc_ptr; |
387 | 390 | ||
388 | if (blob_len < sizeof(CHALLENGE_MESSAGE)) { | 391 | if (blob_len < sizeof(CHALLENGE_MESSAGE)) { |
@@ -399,12 +402,25 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, | |||
399 | return -EINVAL; | 402 | return -EINVAL; |
400 | } | 403 | } |
401 | 404 | ||
402 | memcpy(ses->server->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); | 405 | memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE); |
403 | /* BB we could decode pblob->NegotiateFlags; some may be useful */ | 406 | /* BB we could decode pblob->NegotiateFlags; some may be useful */ |
404 | /* In particular we can examine sign flags */ | 407 | /* In particular we can examine sign flags */ |
405 | /* BB spec says that if AvId field of MsvAvTimestamp is populated then | 408 | /* BB spec says that if AvId field of MsvAvTimestamp is populated then |
406 | we must set the MIC field of the AUTHENTICATE_MESSAGE */ | 409 | we must set the MIC field of the AUTHENTICATE_MESSAGE */ |
407 | 410 | ||
411 | tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset); | ||
412 | tilen = cpu_to_le16(pblob->TargetInfoArray.Length); | ||
413 | ses->tilen = tilen; | ||
414 | if (ses->tilen) { | ||
415 | ses->tiblob = kmalloc(tilen, GFP_KERNEL); | ||
416 | if (!ses->tiblob) { | ||
417 | cERROR(1, "Challenge target info allocation failure"); | ||
418 | ses->tilen = 0; | ||
419 | return -ENOMEM; | ||
420 | } | ||
421 | memcpy(ses->tiblob, bcc_ptr + tioffset, ses->tilen); | ||
422 | } | ||
423 | |||
408 | return 0; | 424 | return 0; |
409 | } | 425 | } |
410 | 426 | ||
@@ -425,7 +441,7 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, | |||
425 | /* BB is NTLMV2 session security format easier to use here? */ | 441 | /* BB is NTLMV2 session security format easier to use here? */ |
426 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | | 442 | flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | |
427 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 443 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
428 | NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; | 444 | NTLMSSP_NEGOTIATE_NTLM; |
429 | if (ses->server->secMode & | 445 | if (ses->server->secMode & |
430 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 446 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) |
431 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 447 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
@@ -448,13 +464,16 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, | |||
448 | maximum possible size is fixed and small, making this approach cleaner. | 464 | maximum possible size is fixed and small, making this approach cleaner. |
449 | This function returns the length of the data in the blob */ | 465 | This function returns the length of the data in the blob */ |
450 | static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | 466 | static int build_ntlmssp_auth_blob(unsigned char *pbuffer, |
467 | u16 *buflen, | ||
451 | struct cifsSesInfo *ses, | 468 | struct cifsSesInfo *ses, |
452 | const struct nls_table *nls_cp, bool first) | 469 | const struct nls_table *nls_cp) |
453 | { | 470 | { |
471 | int rc; | ||
472 | unsigned int size; | ||
454 | AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; | 473 | AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer; |
455 | __u32 flags; | 474 | __u32 flags; |
456 | unsigned char *tmp; | 475 | unsigned char *tmp; |
457 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 476 | struct ntlmv2_resp ntlmv2_response = {}; |
458 | 477 | ||
459 | memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); | 478 | memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8); |
460 | sec_blob->MessageType = NtLmAuthenticate; | 479 | sec_blob->MessageType = NtLmAuthenticate; |
@@ -462,7 +481,7 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
462 | flags = NTLMSSP_NEGOTIATE_56 | | 481 | flags = NTLMSSP_NEGOTIATE_56 | |
463 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | | 482 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | |
464 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | | 483 | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | |
465 | NTLMSSP_NEGOTIATE_NT_ONLY | NTLMSSP_NEGOTIATE_NTLM; | 484 | NTLMSSP_NEGOTIATE_NTLM; |
466 | if (ses->server->secMode & | 485 | if (ses->server->secMode & |
467 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) | 486 | (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) |
468 | flags |= NTLMSSP_NEGOTIATE_SIGN; | 487 | flags |= NTLMSSP_NEGOTIATE_SIGN; |
@@ -477,19 +496,26 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
477 | sec_blob->LmChallengeResponse.Length = 0; | 496 | sec_blob->LmChallengeResponse.Length = 0; |
478 | sec_blob->LmChallengeResponse.MaximumLength = 0; | 497 | sec_blob->LmChallengeResponse.MaximumLength = 0; |
479 | 498 | ||
480 | /* calculate session key, BB what about adding similar ntlmv2 path? */ | ||
481 | SMBNTencrypt(ses->password, ses->server->cryptKey, ntlm_session_key); | ||
482 | if (first) | ||
483 | cifs_calculate_mac_key(&ses->server->mac_signing_key, | ||
484 | ntlm_session_key, ses->password); | ||
485 | |||
486 | memcpy(tmp, ntlm_session_key, CIFS_SESS_KEY_SIZE); | ||
487 | sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); | 499 | sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer); |
488 | sec_blob->NtChallengeResponse.Length = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 500 | rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp); |
489 | sec_blob->NtChallengeResponse.MaximumLength = | 501 | if (rc) { |
490 | cpu_to_le16(CIFS_SESS_KEY_SIZE); | 502 | cERROR(1, "Error %d during NTLMSSP authentication", rc); |
503 | goto setup_ntlmv2_ret; | ||
504 | } | ||
505 | size = sizeof(struct ntlmv2_resp); | ||
506 | memcpy(tmp, (char *)&ntlmv2_response, size); | ||
507 | tmp += size; | ||
508 | if (ses->tilen > 0) { | ||
509 | memcpy(tmp, ses->tiblob, ses->tilen); | ||
510 | tmp += ses->tilen; | ||
511 | } | ||
491 | 512 | ||
492 | tmp += CIFS_SESS_KEY_SIZE; | 513 | sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen); |
514 | sec_blob->NtChallengeResponse.MaximumLength = | ||
515 | cpu_to_le16(size + ses->tilen); | ||
516 | kfree(ses->tiblob); | ||
517 | ses->tiblob = NULL; | ||
518 | ses->tilen = 0; | ||
493 | 519 | ||
494 | if (ses->domainName == NULL) { | 520 | if (ses->domainName == NULL) { |
495 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 521 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
@@ -501,7 +527,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
501 | len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, | 527 | len = cifs_strtoUCS((__le16 *)tmp, ses->domainName, |
502 | MAX_USERNAME_SIZE, nls_cp); | 528 | MAX_USERNAME_SIZE, nls_cp); |
503 | len *= 2; /* unicode is 2 bytes each */ | 529 | len *= 2; /* unicode is 2 bytes each */ |
504 | len += 2; /* trailing null */ | ||
505 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 530 | sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
506 | sec_blob->DomainName.Length = cpu_to_le16(len); | 531 | sec_blob->DomainName.Length = cpu_to_le16(len); |
507 | sec_blob->DomainName.MaximumLength = cpu_to_le16(len); | 532 | sec_blob->DomainName.MaximumLength = cpu_to_le16(len); |
@@ -518,7 +543,6 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
518 | len = cifs_strtoUCS((__le16 *)tmp, ses->userName, | 543 | len = cifs_strtoUCS((__le16 *)tmp, ses->userName, |
519 | MAX_USERNAME_SIZE, nls_cp); | 544 | MAX_USERNAME_SIZE, nls_cp); |
520 | len *= 2; /* unicode is 2 bytes each */ | 545 | len *= 2; /* unicode is 2 bytes each */ |
521 | len += 2; /* trailing null */ | ||
522 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); | 546 | sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer); |
523 | sec_blob->UserName.Length = cpu_to_le16(len); | 547 | sec_blob->UserName.Length = cpu_to_le16(len); |
524 | sec_blob->UserName.MaximumLength = cpu_to_le16(len); | 548 | sec_blob->UserName.MaximumLength = cpu_to_le16(len); |
@@ -533,7 +557,10 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer, | |||
533 | sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); | 557 | sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer); |
534 | sec_blob->SessionKey.Length = 0; | 558 | sec_blob->SessionKey.Length = 0; |
535 | sec_blob->SessionKey.MaximumLength = 0; | 559 | sec_blob->SessionKey.MaximumLength = 0; |
536 | return tmp - pbuffer; | 560 | |
561 | setup_ntlmv2_ret: | ||
562 | *buflen = tmp - pbuffer; | ||
563 | return rc; | ||
537 | } | 564 | } |
538 | 565 | ||
539 | 566 | ||
@@ -545,19 +572,6 @@ static void setup_ntlmssp_neg_req(SESSION_SETUP_ANDX *pSMB, | |||
545 | 572 | ||
546 | return; | 573 | return; |
547 | } | 574 | } |
548 | |||
549 | static int setup_ntlmssp_auth_req(SESSION_SETUP_ANDX *pSMB, | ||
550 | struct cifsSesInfo *ses, | ||
551 | const struct nls_table *nls, bool first_time) | ||
552 | { | ||
553 | int bloblen; | ||
554 | |||
555 | bloblen = build_ntlmssp_auth_blob(&pSMB->req.SecurityBlob[0], ses, nls, | ||
556 | first_time); | ||
557 | pSMB->req.SecurityBlobLength = cpu_to_le16(bloblen); | ||
558 | |||
559 | return bloblen; | ||
560 | } | ||
561 | #endif | 575 | #endif |
562 | 576 | ||
563 | int | 577 | int |
@@ -579,15 +593,12 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, | |||
579 | int bytes_remaining; | 593 | int bytes_remaining; |
580 | struct key *spnego_key = NULL; | 594 | struct key *spnego_key = NULL; |
581 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ | 595 | __le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */ |
582 | bool first_time; | 596 | u16 blob_len; |
597 | char *ntlmsspblob = NULL; | ||
583 | 598 | ||
584 | if (ses == NULL) | 599 | if (ses == NULL) |
585 | return -EINVAL; | 600 | return -EINVAL; |
586 | 601 | ||
587 | read_lock(&cifs_tcp_ses_lock); | ||
588 | first_time = is_first_ses_reconnect(ses); | ||
589 | read_unlock(&cifs_tcp_ses_lock); | ||
590 | |||
591 | type = ses->server->secType; | 602 | type = ses->server->secType; |
592 | 603 | ||
593 | cFYI(1, "sess setup type %d", type); | 604 | cFYI(1, "sess setup type %d", type); |
@@ -658,7 +669,7 @@ ssetup_ntlmssp_authenticate: | |||
658 | /* BB calculate hash with password */ | 669 | /* BB calculate hash with password */ |
659 | /* and copy into bcc */ | 670 | /* and copy into bcc */ |
660 | 671 | ||
661 | calc_lanman_hash(ses->password, ses->server->cryptKey, | 672 | calc_lanman_hash(ses->password, ses->cryptKey, |
662 | ses->server->secMode & SECMODE_PW_ENCRYPT ? | 673 | ses->server->secMode & SECMODE_PW_ENCRYPT ? |
663 | true : false, lnm_session_key); | 674 | true : false, lnm_session_key); |
664 | 675 | ||
@@ -685,15 +696,11 @@ ssetup_ntlmssp_authenticate: | |||
685 | cpu_to_le16(CIFS_SESS_KEY_SIZE); | 696 | cpu_to_le16(CIFS_SESS_KEY_SIZE); |
686 | 697 | ||
687 | /* calculate session key */ | 698 | /* calculate session key */ |
688 | SMBNTencrypt(ses->password, ses->server->cryptKey, | 699 | SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key); |
689 | ntlm_session_key); | ||
690 | 700 | ||
691 | if (first_time) /* should this be moved into common code | 701 | cifs_calculate_session_key(&ses->auth_key, |
692 | with similar ntlmv2 path? */ | 702 | ntlm_session_key, ses->password); |
693 | cifs_calculate_mac_key(&ses->server->mac_signing_key, | ||
694 | ntlm_session_key, ses->password); | ||
695 | /* copy session key */ | 703 | /* copy session key */ |
696 | |||
697 | memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); | 704 | memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); |
698 | bcc_ptr += CIFS_SESS_KEY_SIZE; | 705 | bcc_ptr += CIFS_SESS_KEY_SIZE; |
699 | memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); | 706 | memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE); |
@@ -725,16 +732,31 @@ ssetup_ntlmssp_authenticate: | |||
725 | pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; | 732 | pSMB->req_no_secext.CaseInsensitivePasswordLength = 0; |
726 | /* cpu_to_le16(LM2_SESS_KEY_SIZE); */ | 733 | /* cpu_to_le16(LM2_SESS_KEY_SIZE); */ |
727 | 734 | ||
728 | pSMB->req_no_secext.CaseSensitivePasswordLength = | ||
729 | cpu_to_le16(sizeof(struct ntlmv2_resp)); | ||
730 | |||
731 | /* calculate session key */ | 735 | /* calculate session key */ |
732 | setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); | 736 | rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp); |
733 | /* FIXME: calculate MAC key */ | 737 | if (rc) { |
738 | cERROR(1, "Error %d during NTLMv2 authentication", rc); | ||
739 | kfree(v2_sess_key); | ||
740 | goto ssetup_exit; | ||
741 | } | ||
734 | memcpy(bcc_ptr, (char *)v2_sess_key, | 742 | memcpy(bcc_ptr, (char *)v2_sess_key, |
735 | sizeof(struct ntlmv2_resp)); | 743 | sizeof(struct ntlmv2_resp)); |
736 | bcc_ptr += sizeof(struct ntlmv2_resp); | 744 | bcc_ptr += sizeof(struct ntlmv2_resp); |
737 | kfree(v2_sess_key); | 745 | kfree(v2_sess_key); |
746 | /* set case sensitive password length after tilen may get | ||
747 | * assigned, tilen is 0 otherwise. | ||
748 | */ | ||
749 | pSMB->req_no_secext.CaseSensitivePasswordLength = | ||
750 | cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen); | ||
751 | if (ses->tilen > 0) { | ||
752 | memcpy(bcc_ptr, ses->tiblob, ses->tilen); | ||
753 | bcc_ptr += ses->tilen; | ||
754 | /* we never did allocate ses->domainName to free */ | ||
755 | kfree(ses->tiblob); | ||
756 | ses->tiblob = NULL; | ||
757 | ses->tilen = 0; | ||
758 | } | ||
759 | |||
738 | if (ses->capabilities & CAP_UNICODE) { | 760 | if (ses->capabilities & CAP_UNICODE) { |
739 | if (iov[0].iov_len % 2) { | 761 | if (iov[0].iov_len % 2) { |
740 | *bcc_ptr = 0; | 762 | *bcc_ptr = 0; |
@@ -765,17 +787,14 @@ ssetup_ntlmssp_authenticate: | |||
765 | } | 787 | } |
766 | /* bail out if key is too long */ | 788 | /* bail out if key is too long */ |
767 | if (msg->sesskey_len > | 789 | if (msg->sesskey_len > |
768 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 790 | sizeof(ses->auth_key.data.krb5)) { |
769 | cERROR(1, "Kerberos signing key too long (%u bytes)", | 791 | cERROR(1, "Kerberos signing key too long (%u bytes)", |
770 | msg->sesskey_len); | 792 | msg->sesskey_len); |
771 | rc = -EOVERFLOW; | 793 | rc = -EOVERFLOW; |
772 | goto ssetup_exit; | 794 | goto ssetup_exit; |
773 | } | 795 | } |
774 | if (first_time) { | 796 | ses->auth_key.len = msg->sesskey_len; |
775 | ses->server->mac_signing_key.len = msg->sesskey_len; | 797 | memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len); |
776 | memcpy(ses->server->mac_signing_key.data.krb5, | ||
777 | msg->data, msg->sesskey_len); | ||
778 | } | ||
779 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; | 798 | pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC; |
780 | capabilities |= CAP_EXTENDED_SECURITY; | 799 | capabilities |= CAP_EXTENDED_SECURITY; |
781 | pSMB->req.Capabilities = cpu_to_le32(capabilities); | 800 | pSMB->req.Capabilities = cpu_to_le32(capabilities); |
@@ -815,12 +834,30 @@ ssetup_ntlmssp_authenticate: | |||
815 | if (phase == NtLmNegotiate) { | 834 | if (phase == NtLmNegotiate) { |
816 | setup_ntlmssp_neg_req(pSMB, ses); | 835 | setup_ntlmssp_neg_req(pSMB, ses); |
817 | iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); | 836 | iov[1].iov_len = sizeof(NEGOTIATE_MESSAGE); |
837 | iov[1].iov_base = &pSMB->req.SecurityBlob[0]; | ||
818 | } else if (phase == NtLmAuthenticate) { | 838 | } else if (phase == NtLmAuthenticate) { |
819 | int blob_len; | 839 | /* 5 is an empirical value, large enought to |
820 | blob_len = setup_ntlmssp_auth_req(pSMB, ses, | 840 | * hold authenticate message, max 10 of |
821 | nls_cp, | 841 | * av paris, doamin,user,workstation mames, |
822 | first_time); | 842 | * flags etc.. |
843 | */ | ||
844 | ntlmsspblob = kmalloc( | ||
845 | 5*sizeof(struct _AUTHENTICATE_MESSAGE), | ||
846 | GFP_KERNEL); | ||
847 | if (!ntlmsspblob) { | ||
848 | cERROR(1, "Can't allocate NTLMSSP"); | ||
849 | rc = -ENOMEM; | ||
850 | goto ssetup_exit; | ||
851 | } | ||
852 | |||
853 | rc = build_ntlmssp_auth_blob(ntlmsspblob, | ||
854 | &blob_len, ses, nls_cp); | ||
855 | if (rc) | ||
856 | goto ssetup_exit; | ||
823 | iov[1].iov_len = blob_len; | 857 | iov[1].iov_len = blob_len; |
858 | iov[1].iov_base = ntlmsspblob; | ||
859 | pSMB->req.SecurityBlobLength = | ||
860 | cpu_to_le16(blob_len); | ||
824 | /* Make sure that we tell the server that we | 861 | /* Make sure that we tell the server that we |
825 | are using the uid that it just gave us back | 862 | are using the uid that it just gave us back |
826 | on the response (challenge) */ | 863 | on the response (challenge) */ |
@@ -830,7 +867,6 @@ ssetup_ntlmssp_authenticate: | |||
830 | rc = -ENOSYS; | 867 | rc = -ENOSYS; |
831 | goto ssetup_exit; | 868 | goto ssetup_exit; |
832 | } | 869 | } |
833 | iov[1].iov_base = &pSMB->req.SecurityBlob[0]; | ||
834 | /* unicode strings must be word aligned */ | 870 | /* unicode strings must be word aligned */ |
835 | if ((iov[0].iov_len + iov[1].iov_len) % 2) { | 871 | if ((iov[0].iov_len + iov[1].iov_len) % 2) { |
836 | *bcc_ptr = 0; | 872 | *bcc_ptr = 0; |
@@ -895,7 +931,6 @@ ssetup_ntlmssp_authenticate: | |||
895 | bcc_ptr = pByteArea(smb_buf); | 931 | bcc_ptr = pByteArea(smb_buf); |
896 | 932 | ||
897 | if (smb_buf->WordCount == 4) { | 933 | if (smb_buf->WordCount == 4) { |
898 | __u16 blob_len; | ||
899 | blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); | 934 | blob_len = le16_to_cpu(pSMB->resp.SecurityBlobLength); |
900 | if (blob_len > bytes_remaining) { | 935 | if (blob_len > bytes_remaining) { |
901 | cERROR(1, "bad security blob length %d", blob_len); | 936 | cERROR(1, "bad security blob length %d", blob_len); |
@@ -931,6 +966,8 @@ ssetup_exit: | |||
931 | key_put(spnego_key); | 966 | key_put(spnego_key); |
932 | } | 967 | } |
933 | kfree(str_area); | 968 | kfree(str_area); |
969 | kfree(ntlmsspblob); | ||
970 | ntlmsspblob = NULL; | ||
934 | if (resp_buf_type == CIFS_SMALL_BUFFER) { | 971 | if (resp_buf_type == CIFS_SMALL_BUFFER) { |
935 | cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base); | 972 | cFYI(1, "ssetup freeing small buf %p", iov[0].iov_base); |
936 | cifs_small_buf_release(iov[0].iov_base); | 973 | cifs_small_buf_release(iov[0].iov_base); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index 82f78c4d6978..a66c91eb6eb4 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses, | |||
543 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 543 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
544 | SECMODE_SIGN_ENABLED))) { | 544 | SECMODE_SIGN_ENABLED))) { |
545 | rc = cifs_verify_signature(midQ->resp_buf, | 545 | rc = cifs_verify_signature(midQ->resp_buf, |
546 | &ses->server->mac_signing_key, | 546 | &ses->server->session_key, |
547 | midQ->sequence_number+1); | 547 | midQ->sequence_number+1); |
548 | if (rc) { | 548 | if (rc) { |
549 | cERROR(1, "Unexpected SMB signature"); | 549 | cERROR(1, "Unexpected SMB signature"); |
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses, | |||
731 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 731 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
732 | SECMODE_SIGN_ENABLED))) { | 732 | SECMODE_SIGN_ENABLED))) { |
733 | rc = cifs_verify_signature(out_buf, | 733 | rc = cifs_verify_signature(out_buf, |
734 | &ses->server->mac_signing_key, | 734 | &ses->server->session_key, |
735 | midQ->sequence_number+1); | 735 | midQ->sequence_number+1); |
736 | if (rc) { | 736 | if (rc) { |
737 | cERROR(1, "Unexpected SMB signature"); | 737 | cERROR(1, "Unexpected SMB signature"); |
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon, | |||
981 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | | 981 | (ses->server->secMode & (SECMODE_SIGN_REQUIRED | |
982 | SECMODE_SIGN_ENABLED))) { | 982 | SECMODE_SIGN_ENABLED))) { |
983 | rc = cifs_verify_signature(out_buf, | 983 | rc = cifs_verify_signature(out_buf, |
984 | &ses->server->mac_signing_key, | 984 | &ses->server->session_key, |
985 | midQ->sequence_number+1); | 985 | midQ->sequence_number+1); |
986 | if (rc) { | 986 | if (rc) { |
987 | cERROR(1, "Unexpected SMB signature"); | 987 | cERROR(1, "Unexpected SMB signature"); |
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index a1509207bfa6..a264b744bb41 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c | |||
@@ -47,9 +47,10 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) | |||
47 | #ifdef CONFIG_CIFS_XATTR | 47 | #ifdef CONFIG_CIFS_XATTR |
48 | int xid; | 48 | int xid; |
49 | struct cifs_sb_info *cifs_sb; | 49 | struct cifs_sb_info *cifs_sb; |
50 | struct tcon_link *tlink; | ||
50 | struct cifsTconInfo *pTcon; | 51 | struct cifsTconInfo *pTcon; |
51 | struct super_block *sb; | 52 | struct super_block *sb; |
52 | char *full_path; | 53 | char *full_path = NULL; |
53 | 54 | ||
54 | if (direntry == NULL) | 55 | if (direntry == NULL) |
55 | return -EIO; | 56 | return -EIO; |
@@ -58,16 +59,19 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) | |||
58 | sb = direntry->d_inode->i_sb; | 59 | sb = direntry->d_inode->i_sb; |
59 | if (sb == NULL) | 60 | if (sb == NULL) |
60 | return -EIO; | 61 | return -EIO; |
61 | xid = GetXid(); | ||
62 | 62 | ||
63 | cifs_sb = CIFS_SB(sb); | 63 | cifs_sb = CIFS_SB(sb); |
64 | pTcon = cifs_sb->tcon; | 64 | tlink = cifs_sb_tlink(cifs_sb); |
65 | if (IS_ERR(tlink)) | ||
66 | return PTR_ERR(tlink); | ||
67 | pTcon = tlink_tcon(tlink); | ||
68 | |||
69 | xid = GetXid(); | ||
65 | 70 | ||
66 | full_path = build_path_from_dentry(direntry); | 71 | full_path = build_path_from_dentry(direntry); |
67 | if (full_path == NULL) { | 72 | if (full_path == NULL) { |
68 | rc = -ENOMEM; | 73 | rc = -ENOMEM; |
69 | FreeXid(xid); | 74 | goto remove_ea_exit; |
70 | return rc; | ||
71 | } | 75 | } |
72 | if (ea_name == NULL) { | 76 | if (ea_name == NULL) { |
73 | cFYI(1, "Null xattr names not supported"); | 77 | cFYI(1, "Null xattr names not supported"); |
@@ -91,6 +95,7 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) | |||
91 | remove_ea_exit: | 95 | remove_ea_exit: |
92 | kfree(full_path); | 96 | kfree(full_path); |
93 | FreeXid(xid); | 97 | FreeXid(xid); |
98 | cifs_put_tlink(tlink); | ||
94 | #endif | 99 | #endif |
95 | return rc; | 100 | return rc; |
96 | } | 101 | } |
@@ -102,6 +107,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
102 | #ifdef CONFIG_CIFS_XATTR | 107 | #ifdef CONFIG_CIFS_XATTR |
103 | int xid; | 108 | int xid; |
104 | struct cifs_sb_info *cifs_sb; | 109 | struct cifs_sb_info *cifs_sb; |
110 | struct tcon_link *tlink; | ||
105 | struct cifsTconInfo *pTcon; | 111 | struct cifsTconInfo *pTcon; |
106 | struct super_block *sb; | 112 | struct super_block *sb; |
107 | char *full_path; | 113 | char *full_path; |
@@ -113,16 +119,19 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
113 | sb = direntry->d_inode->i_sb; | 119 | sb = direntry->d_inode->i_sb; |
114 | if (sb == NULL) | 120 | if (sb == NULL) |
115 | return -EIO; | 121 | return -EIO; |
116 | xid = GetXid(); | ||
117 | 122 | ||
118 | cifs_sb = CIFS_SB(sb); | 123 | cifs_sb = CIFS_SB(sb); |
119 | pTcon = cifs_sb->tcon; | 124 | tlink = cifs_sb_tlink(cifs_sb); |
125 | if (IS_ERR(tlink)) | ||
126 | return PTR_ERR(tlink); | ||
127 | pTcon = tlink_tcon(tlink); | ||
128 | |||
129 | xid = GetXid(); | ||
120 | 130 | ||
121 | full_path = build_path_from_dentry(direntry); | 131 | full_path = build_path_from_dentry(direntry); |
122 | if (full_path == NULL) { | 132 | if (full_path == NULL) { |
123 | rc = -ENOMEM; | 133 | rc = -ENOMEM; |
124 | FreeXid(xid); | 134 | goto set_ea_exit; |
125 | return rc; | ||
126 | } | 135 | } |
127 | /* return dos attributes as pseudo xattr */ | 136 | /* return dos attributes as pseudo xattr */ |
128 | /* return alt name if available as pseudo attr */ | 137 | /* return alt name if available as pseudo attr */ |
@@ -132,9 +141,8 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
132 | returns as xattrs */ | 141 | returns as xattrs */ |
133 | if (value_size > MAX_EA_VALUE_SIZE) { | 142 | if (value_size > MAX_EA_VALUE_SIZE) { |
134 | cFYI(1, "size of EA value too large"); | 143 | cFYI(1, "size of EA value too large"); |
135 | kfree(full_path); | 144 | rc = -EOPNOTSUPP; |
136 | FreeXid(xid); | 145 | goto set_ea_exit; |
137 | return -EOPNOTSUPP; | ||
138 | } | 146 | } |
139 | 147 | ||
140 | if (ea_name == NULL) { | 148 | if (ea_name == NULL) { |
@@ -198,6 +206,7 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, | |||
198 | set_ea_exit: | 206 | set_ea_exit: |
199 | kfree(full_path); | 207 | kfree(full_path); |
200 | FreeXid(xid); | 208 | FreeXid(xid); |
209 | cifs_put_tlink(tlink); | ||
201 | #endif | 210 | #endif |
202 | return rc; | 211 | return rc; |
203 | } | 212 | } |
@@ -209,6 +218,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
209 | #ifdef CONFIG_CIFS_XATTR | 218 | #ifdef CONFIG_CIFS_XATTR |
210 | int xid; | 219 | int xid; |
211 | struct cifs_sb_info *cifs_sb; | 220 | struct cifs_sb_info *cifs_sb; |
221 | struct tcon_link *tlink; | ||
212 | struct cifsTconInfo *pTcon; | 222 | struct cifsTconInfo *pTcon; |
213 | struct super_block *sb; | 223 | struct super_block *sb; |
214 | char *full_path; | 224 | char *full_path; |
@@ -221,16 +231,18 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
221 | if (sb == NULL) | 231 | if (sb == NULL) |
222 | return -EIO; | 232 | return -EIO; |
223 | 233 | ||
224 | xid = GetXid(); | ||
225 | |||
226 | cifs_sb = CIFS_SB(sb); | 234 | cifs_sb = CIFS_SB(sb); |
227 | pTcon = cifs_sb->tcon; | 235 | tlink = cifs_sb_tlink(cifs_sb); |
236 | if (IS_ERR(tlink)) | ||
237 | return PTR_ERR(tlink); | ||
238 | pTcon = tlink_tcon(tlink); | ||
239 | |||
240 | xid = GetXid(); | ||
228 | 241 | ||
229 | full_path = build_path_from_dentry(direntry); | 242 | full_path = build_path_from_dentry(direntry); |
230 | if (full_path == NULL) { | 243 | if (full_path == NULL) { |
231 | rc = -ENOMEM; | 244 | rc = -ENOMEM; |
232 | FreeXid(xid); | 245 | goto get_ea_exit; |
233 | return rc; | ||
234 | } | 246 | } |
235 | /* return dos attributes as pseudo xattr */ | 247 | /* return dos attributes as pseudo xattr */ |
236 | /* return alt name if available as pseudo attr */ | 248 | /* return alt name if available as pseudo attr */ |
@@ -323,6 +335,7 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, | |||
323 | get_ea_exit: | 335 | get_ea_exit: |
324 | kfree(full_path); | 336 | kfree(full_path); |
325 | FreeXid(xid); | 337 | FreeXid(xid); |
338 | cifs_put_tlink(tlink); | ||
326 | #endif | 339 | #endif |
327 | return rc; | 340 | return rc; |
328 | } | 341 | } |
@@ -333,6 +346,7 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) | |||
333 | #ifdef CONFIG_CIFS_XATTR | 346 | #ifdef CONFIG_CIFS_XATTR |
334 | int xid; | 347 | int xid; |
335 | struct cifs_sb_info *cifs_sb; | 348 | struct cifs_sb_info *cifs_sb; |
349 | struct tcon_link *tlink; | ||
336 | struct cifsTconInfo *pTcon; | 350 | struct cifsTconInfo *pTcon; |
337 | struct super_block *sb; | 351 | struct super_block *sb; |
338 | char *full_path; | 352 | char *full_path; |
@@ -346,18 +360,20 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) | |||
346 | return -EIO; | 360 | return -EIO; |
347 | 361 | ||
348 | cifs_sb = CIFS_SB(sb); | 362 | cifs_sb = CIFS_SB(sb); |
349 | pTcon = cifs_sb->tcon; | ||
350 | |||
351 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) | 363 | if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) |
352 | return -EOPNOTSUPP; | 364 | return -EOPNOTSUPP; |
353 | 365 | ||
366 | tlink = cifs_sb_tlink(cifs_sb); | ||
367 | if (IS_ERR(tlink)) | ||
368 | return PTR_ERR(tlink); | ||
369 | pTcon = tlink_tcon(tlink); | ||
370 | |||
354 | xid = GetXid(); | 371 | xid = GetXid(); |
355 | 372 | ||
356 | full_path = build_path_from_dentry(direntry); | 373 | full_path = build_path_from_dentry(direntry); |
357 | if (full_path == NULL) { | 374 | if (full_path == NULL) { |
358 | rc = -ENOMEM; | 375 | rc = -ENOMEM; |
359 | FreeXid(xid); | 376 | goto list_ea_exit; |
360 | return rc; | ||
361 | } | 377 | } |
362 | /* return dos attributes as pseudo xattr */ | 378 | /* return dos attributes as pseudo xattr */ |
363 | /* return alt name if available as pseudo attr */ | 379 | /* return alt name if available as pseudo attr */ |
@@ -370,8 +386,10 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) | |||
370 | cifs_sb->mnt_cifs_flags & | 386 | cifs_sb->mnt_cifs_flags & |
371 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 387 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
372 | 388 | ||
389 | list_ea_exit: | ||
373 | kfree(full_path); | 390 | kfree(full_path); |
374 | FreeXid(xid); | 391 | FreeXid(xid); |
392 | cifs_put_tlink(tlink); | ||
375 | #endif | 393 | #endif |
376 | return rc; | 394 | return rc; |
377 | } | 395 | } |
diff --git a/fs/coda/cache.c b/fs/coda/cache.c index a5bf5771a22a..9060f08e70cf 100644 --- a/fs/coda/cache.c +++ b/fs/coda/cache.c | |||
@@ -17,6 +17,7 @@ | |||
17 | #include <linux/string.h> | 17 | #include <linux/string.h> |
18 | #include <linux/list.h> | 18 | #include <linux/list.h> |
19 | #include <linux/sched.h> | 19 | #include <linux/sched.h> |
20 | #include <linux/spinlock.h> | ||
20 | 21 | ||
21 | #include <linux/coda.h> | 22 | #include <linux/coda.h> |
22 | #include <linux/coda_linux.h> | 23 | #include <linux/coda_linux.h> |
@@ -31,19 +32,23 @@ void coda_cache_enter(struct inode *inode, int mask) | |||
31 | { | 32 | { |
32 | struct coda_inode_info *cii = ITOC(inode); | 33 | struct coda_inode_info *cii = ITOC(inode); |
33 | 34 | ||
35 | spin_lock(&cii->c_lock); | ||
34 | cii->c_cached_epoch = atomic_read(&permission_epoch); | 36 | cii->c_cached_epoch = atomic_read(&permission_epoch); |
35 | if (cii->c_uid != current_fsuid()) { | 37 | if (cii->c_uid != current_fsuid()) { |
36 | cii->c_uid = current_fsuid(); | 38 | cii->c_uid = current_fsuid(); |
37 | cii->c_cached_perm = mask; | 39 | cii->c_cached_perm = mask; |
38 | } else | 40 | } else |
39 | cii->c_cached_perm |= mask; | 41 | cii->c_cached_perm |= mask; |
42 | spin_unlock(&cii->c_lock); | ||
40 | } | 43 | } |
41 | 44 | ||
42 | /* remove cached acl from an inode */ | 45 | /* remove cached acl from an inode */ |
43 | void coda_cache_clear_inode(struct inode *inode) | 46 | void coda_cache_clear_inode(struct inode *inode) |
44 | { | 47 | { |
45 | struct coda_inode_info *cii = ITOC(inode); | 48 | struct coda_inode_info *cii = ITOC(inode); |
49 | spin_lock(&cii->c_lock); | ||
46 | cii->c_cached_epoch = atomic_read(&permission_epoch) - 1; | 50 | cii->c_cached_epoch = atomic_read(&permission_epoch) - 1; |
51 | spin_unlock(&cii->c_lock); | ||
47 | } | 52 | } |
48 | 53 | ||
49 | /* remove all acl caches */ | 54 | /* remove all acl caches */ |
@@ -57,13 +62,15 @@ void coda_cache_clear_all(struct super_block *sb) | |||
57 | int coda_cache_check(struct inode *inode, int mask) | 62 | int coda_cache_check(struct inode *inode, int mask) |
58 | { | 63 | { |
59 | struct coda_inode_info *cii = ITOC(inode); | 64 | struct coda_inode_info *cii = ITOC(inode); |
60 | int hit; | 65 | int hit; |
61 | 66 | ||
62 | hit = (mask & cii->c_cached_perm) == mask && | 67 | spin_lock(&cii->c_lock); |
63 | cii->c_uid == current_fsuid() && | 68 | hit = (mask & cii->c_cached_perm) == mask && |
64 | cii->c_cached_epoch == atomic_read(&permission_epoch); | 69 | cii->c_uid == current_fsuid() && |
70 | cii->c_cached_epoch == atomic_read(&permission_epoch); | ||
71 | spin_unlock(&cii->c_lock); | ||
65 | 72 | ||
66 | return hit; | 73 | return hit; |
67 | } | 74 | } |
68 | 75 | ||
69 | 76 | ||
diff --git a/fs/coda/cnode.c b/fs/coda/cnode.c index a7a780929eec..602240569c89 100644 --- a/fs/coda/cnode.c +++ b/fs/coda/cnode.c | |||
@@ -45,13 +45,15 @@ static void coda_fill_inode(struct inode *inode, struct coda_vattr *attr) | |||
45 | static int coda_test_inode(struct inode *inode, void *data) | 45 | static int coda_test_inode(struct inode *inode, void *data) |
46 | { | 46 | { |
47 | struct CodaFid *fid = (struct CodaFid *)data; | 47 | struct CodaFid *fid = (struct CodaFid *)data; |
48 | return coda_fideq(&(ITOC(inode)->c_fid), fid); | 48 | struct coda_inode_info *cii = ITOC(inode); |
49 | return coda_fideq(&cii->c_fid, fid); | ||
49 | } | 50 | } |
50 | 51 | ||
51 | static int coda_set_inode(struct inode *inode, void *data) | 52 | static int coda_set_inode(struct inode *inode, void *data) |
52 | { | 53 | { |
53 | struct CodaFid *fid = (struct CodaFid *)data; | 54 | struct CodaFid *fid = (struct CodaFid *)data; |
54 | ITOC(inode)->c_fid = *fid; | 55 | struct coda_inode_info *cii = ITOC(inode); |
56 | cii->c_fid = *fid; | ||
55 | return 0; | 57 | return 0; |
56 | } | 58 | } |
57 | 59 | ||
@@ -71,6 +73,7 @@ struct inode * coda_iget(struct super_block * sb, struct CodaFid * fid, | |||
71 | cii = ITOC(inode); | 73 | cii = ITOC(inode); |
72 | /* we still need to set i_ino for things like stat(2) */ | 74 | /* we still need to set i_ino for things like stat(2) */ |
73 | inode->i_ino = hash; | 75 | inode->i_ino = hash; |
76 | /* inode is locked and unique, no need to grab cii->c_lock */ | ||
74 | cii->c_mapcount = 0; | 77 | cii->c_mapcount = 0; |
75 | unlock_new_inode(inode); | 78 | unlock_new_inode(inode); |
76 | } | 79 | } |
@@ -107,14 +110,20 @@ int coda_cnode_make(struct inode **inode, struct CodaFid *fid, struct super_bloc | |||
107 | } | 110 | } |
108 | 111 | ||
109 | 112 | ||
113 | /* Although we treat Coda file identifiers as immutable, there is one | ||
114 | * special case for files created during a disconnection where they may | ||
115 | * not be globally unique. When an identifier collision is detected we | ||
116 | * first try to flush the cached inode from the kernel and finally | ||
117 | * resort to renaming/rehashing in-place. Userspace remembers both old | ||
118 | * and new values of the identifier to handle any in-flight upcalls. | ||
119 | * The real solution is to use globally unique UUIDs as identifiers, but | ||
120 | * retrofitting the existing userspace code for this is non-trivial. */ | ||
110 | void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, | 121 | void coda_replace_fid(struct inode *inode, struct CodaFid *oldfid, |
111 | struct CodaFid *newfid) | 122 | struct CodaFid *newfid) |
112 | { | 123 | { |
113 | struct coda_inode_info *cii; | 124 | struct coda_inode_info *cii = ITOC(inode); |
114 | unsigned long hash = coda_f2i(newfid); | 125 | unsigned long hash = coda_f2i(newfid); |
115 | 126 | ||
116 | cii = ITOC(inode); | ||
117 | |||
118 | BUG_ON(!coda_fideq(&cii->c_fid, oldfid)); | 127 | BUG_ON(!coda_fideq(&cii->c_fid, oldfid)); |
119 | 128 | ||
120 | /* replace fid and rehash inode */ | 129 | /* replace fid and rehash inode */ |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index ccd98b0f2b0b..5d8b35539601 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
@@ -17,7 +17,7 @@ | |||
17 | #include <linux/stat.h> | 17 | #include <linux/stat.h> |
18 | #include <linux/errno.h> | 18 | #include <linux/errno.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | #include <linux/smp_lock.h> | 20 | #include <linux/spinlock.h> |
21 | 21 | ||
22 | #include <asm/uaccess.h> | 22 | #include <asm/uaccess.h> |
23 | 23 | ||
@@ -116,15 +116,11 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, struc | |||
116 | goto exit; | 116 | goto exit; |
117 | } | 117 | } |
118 | 118 | ||
119 | lock_kernel(); | ||
120 | |||
121 | error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, | 119 | error = venus_lookup(dir->i_sb, coda_i2f(dir), name, length, |
122 | &type, &resfid); | 120 | &type, &resfid); |
123 | if (!error) | 121 | if (!error) |
124 | error = coda_cnode_make(&inode, &resfid, dir->i_sb); | 122 | error = coda_cnode_make(&inode, &resfid, dir->i_sb); |
125 | 123 | ||
126 | unlock_kernel(); | ||
127 | |||
128 | if (error && error != -ENOENT) | 124 | if (error && error != -ENOENT) |
129 | return ERR_PTR(error); | 125 | return ERR_PTR(error); |
130 | 126 | ||
@@ -140,28 +136,24 @@ exit: | |||
140 | 136 | ||
141 | int coda_permission(struct inode *inode, int mask) | 137 | int coda_permission(struct inode *inode, int mask) |
142 | { | 138 | { |
143 | int error = 0; | 139 | int error; |
144 | 140 | ||
145 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; | 141 | mask &= MAY_READ | MAY_WRITE | MAY_EXEC; |
146 | 142 | ||
147 | if (!mask) | 143 | if (!mask) |
148 | return 0; | 144 | return 0; |
149 | 145 | ||
150 | if ((mask & MAY_EXEC) && !execute_ok(inode)) | 146 | if ((mask & MAY_EXEC) && !execute_ok(inode)) |
151 | return -EACCES; | 147 | return -EACCES; |
152 | 148 | ||
153 | lock_kernel(); | ||
154 | |||
155 | if (coda_cache_check(inode, mask)) | 149 | if (coda_cache_check(inode, mask)) |
156 | goto out; | 150 | return 0; |
157 | 151 | ||
158 | error = venus_access(inode->i_sb, coda_i2f(inode), mask); | 152 | error = venus_access(inode->i_sb, coda_i2f(inode), mask); |
159 | 153 | ||
160 | if (!error) | 154 | if (!error) |
161 | coda_cache_enter(inode, mask); | 155 | coda_cache_enter(inode, mask); |
162 | 156 | ||
163 | out: | ||
164 | unlock_kernel(); | ||
165 | return error; | 157 | return error; |
166 | } | 158 | } |
167 | 159 | ||
@@ -200,41 +192,34 @@ static inline void coda_dir_drop_nlink(struct inode *dir) | |||
200 | /* creation routines: create, mknod, mkdir, link, symlink */ | 192 | /* creation routines: create, mknod, mkdir, link, symlink */ |
201 | static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) | 193 | static int coda_create(struct inode *dir, struct dentry *de, int mode, struct nameidata *nd) |
202 | { | 194 | { |
203 | int error=0; | 195 | int error; |
204 | const char *name=de->d_name.name; | 196 | const char *name=de->d_name.name; |
205 | int length=de->d_name.len; | 197 | int length=de->d_name.len; |
206 | struct inode *inode; | 198 | struct inode *inode; |
207 | struct CodaFid newfid; | 199 | struct CodaFid newfid; |
208 | struct coda_vattr attrs; | 200 | struct coda_vattr attrs; |
209 | 201 | ||
210 | lock_kernel(); | 202 | if (coda_isroot(dir) && coda_iscontrol(name, length)) |
211 | |||
212 | if (coda_isroot(dir) && coda_iscontrol(name, length)) { | ||
213 | unlock_kernel(); | ||
214 | return -EPERM; | 203 | return -EPERM; |
215 | } | ||
216 | 204 | ||
217 | error = venus_create(dir->i_sb, coda_i2f(dir), name, length, | 205 | error = venus_create(dir->i_sb, coda_i2f(dir), name, length, |
218 | 0, mode, &newfid, &attrs); | 206 | 0, mode, &newfid, &attrs); |
219 | 207 | if (error) | |
220 | if ( error ) { | 208 | goto err_out; |
221 | unlock_kernel(); | ||
222 | d_drop(de); | ||
223 | return error; | ||
224 | } | ||
225 | 209 | ||
226 | inode = coda_iget(dir->i_sb, &newfid, &attrs); | 210 | inode = coda_iget(dir->i_sb, &newfid, &attrs); |
227 | if ( IS_ERR(inode) ) { | 211 | if (IS_ERR(inode)) { |
228 | unlock_kernel(); | 212 | error = PTR_ERR(inode); |
229 | d_drop(de); | 213 | goto err_out; |
230 | return PTR_ERR(inode); | ||
231 | } | 214 | } |
232 | 215 | ||
233 | /* invalidate the directory cnode's attributes */ | 216 | /* invalidate the directory cnode's attributes */ |
234 | coda_dir_update_mtime(dir); | 217 | coda_dir_update_mtime(dir); |
235 | unlock_kernel(); | ||
236 | d_instantiate(de, inode); | 218 | d_instantiate(de, inode); |
237 | return 0; | 219 | return 0; |
220 | err_out: | ||
221 | d_drop(de); | ||
222 | return error; | ||
238 | } | 223 | } |
239 | 224 | ||
240 | static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) | 225 | static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) |
@@ -246,36 +231,29 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, int mode) | |||
246 | int error; | 231 | int error; |
247 | struct CodaFid newfid; | 232 | struct CodaFid newfid; |
248 | 233 | ||
249 | lock_kernel(); | 234 | if (coda_isroot(dir) && coda_iscontrol(name, len)) |
250 | |||
251 | if (coda_isroot(dir) && coda_iscontrol(name, len)) { | ||
252 | unlock_kernel(); | ||
253 | return -EPERM; | 235 | return -EPERM; |
254 | } | ||
255 | 236 | ||
256 | attrs.va_mode = mode; | 237 | attrs.va_mode = mode; |
257 | error = venus_mkdir(dir->i_sb, coda_i2f(dir), | 238 | error = venus_mkdir(dir->i_sb, coda_i2f(dir), |
258 | name, len, &newfid, &attrs); | 239 | name, len, &newfid, &attrs); |
259 | 240 | if (error) | |
260 | if ( error ) { | 241 | goto err_out; |
261 | unlock_kernel(); | ||
262 | d_drop(de); | ||
263 | return error; | ||
264 | } | ||
265 | 242 | ||
266 | inode = coda_iget(dir->i_sb, &newfid, &attrs); | 243 | inode = coda_iget(dir->i_sb, &newfid, &attrs); |
267 | if ( IS_ERR(inode) ) { | 244 | if (IS_ERR(inode)) { |
268 | unlock_kernel(); | 245 | error = PTR_ERR(inode); |
269 | d_drop(de); | 246 | goto err_out; |
270 | return PTR_ERR(inode); | ||
271 | } | 247 | } |
272 | 248 | ||
273 | /* invalidate the directory cnode's attributes */ | 249 | /* invalidate the directory cnode's attributes */ |
274 | coda_dir_inc_nlink(dir); | 250 | coda_dir_inc_nlink(dir); |
275 | coda_dir_update_mtime(dir); | 251 | coda_dir_update_mtime(dir); |
276 | unlock_kernel(); | ||
277 | d_instantiate(de, inode); | 252 | d_instantiate(de, inode); |
278 | return 0; | 253 | return 0; |
254 | err_out: | ||
255 | d_drop(de); | ||
256 | return error; | ||
279 | } | 257 | } |
280 | 258 | ||
281 | /* try to make de an entry in dir_inodde linked to source_de */ | 259 | /* try to make de an entry in dir_inodde linked to source_de */ |
@@ -287,52 +265,38 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, | |||
287 | int len = de->d_name.len; | 265 | int len = de->d_name.len; |
288 | int error; | 266 | int error; |
289 | 267 | ||
290 | lock_kernel(); | 268 | if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) |
291 | |||
292 | if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) { | ||
293 | unlock_kernel(); | ||
294 | return -EPERM; | 269 | return -EPERM; |
295 | } | ||
296 | 270 | ||
297 | error = venus_link(dir_inode->i_sb, coda_i2f(inode), | 271 | error = venus_link(dir_inode->i_sb, coda_i2f(inode), |
298 | coda_i2f(dir_inode), (const char *)name, len); | 272 | coda_i2f(dir_inode), (const char *)name, len); |
299 | |||
300 | if (error) { | 273 | if (error) { |
301 | d_drop(de); | 274 | d_drop(de); |
302 | goto out; | 275 | return error; |
303 | } | 276 | } |
304 | 277 | ||
305 | coda_dir_update_mtime(dir_inode); | 278 | coda_dir_update_mtime(dir_inode); |
306 | atomic_inc(&inode->i_count); | 279 | ihold(inode); |
307 | d_instantiate(de, inode); | 280 | d_instantiate(de, inode); |
308 | inc_nlink(inode); | 281 | inc_nlink(inode); |
309 | 282 | return 0; | |
310 | out: | ||
311 | unlock_kernel(); | ||
312 | return(error); | ||
313 | } | 283 | } |
314 | 284 | ||
315 | 285 | ||
316 | static int coda_symlink(struct inode *dir_inode, struct dentry *de, | 286 | static int coda_symlink(struct inode *dir_inode, struct dentry *de, |
317 | const char *symname) | 287 | const char *symname) |
318 | { | 288 | { |
319 | const char *name = de->d_name.name; | 289 | const char *name = de->d_name.name; |
320 | int len = de->d_name.len; | 290 | int len = de->d_name.len; |
321 | int symlen; | 291 | int symlen; |
322 | int error = 0; | 292 | int error; |
323 | |||
324 | lock_kernel(); | ||
325 | 293 | ||
326 | if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) { | 294 | if (coda_isroot(dir_inode) && coda_iscontrol(name, len)) |
327 | unlock_kernel(); | ||
328 | return -EPERM; | 295 | return -EPERM; |
329 | } | ||
330 | 296 | ||
331 | symlen = strlen(symname); | 297 | symlen = strlen(symname); |
332 | if ( symlen > CODA_MAXPATHLEN ) { | 298 | if (symlen > CODA_MAXPATHLEN) |
333 | unlock_kernel(); | ||
334 | return -ENAMETOOLONG; | 299 | return -ENAMETOOLONG; |
335 | } | ||
336 | 300 | ||
337 | /* | 301 | /* |
338 | * This entry is now negative. Since we do not create | 302 | * This entry is now negative. Since we do not create |
@@ -343,10 +307,9 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de, | |||
343 | symname, symlen); | 307 | symname, symlen); |
344 | 308 | ||
345 | /* mtime is no good anymore */ | 309 | /* mtime is no good anymore */ |
346 | if ( !error ) | 310 | if (!error) |
347 | coda_dir_update_mtime(dir_inode); | 311 | coda_dir_update_mtime(dir_inode); |
348 | 312 | ||
349 | unlock_kernel(); | ||
350 | return error; | 313 | return error; |
351 | } | 314 | } |
352 | 315 | ||
@@ -357,17 +320,12 @@ static int coda_unlink(struct inode *dir, struct dentry *de) | |||
357 | const char *name = de->d_name.name; | 320 | const char *name = de->d_name.name; |
358 | int len = de->d_name.len; | 321 | int len = de->d_name.len; |
359 | 322 | ||
360 | lock_kernel(); | ||
361 | |||
362 | error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); | 323 | error = venus_remove(dir->i_sb, coda_i2f(dir), name, len); |
363 | if ( error ) { | 324 | if (error) |
364 | unlock_kernel(); | ||
365 | return error; | 325 | return error; |
366 | } | ||
367 | 326 | ||
368 | coda_dir_update_mtime(dir); | 327 | coda_dir_update_mtime(dir); |
369 | drop_nlink(de->d_inode); | 328 | drop_nlink(de->d_inode); |
370 | unlock_kernel(); | ||
371 | return 0; | 329 | return 0; |
372 | } | 330 | } |
373 | 331 | ||
@@ -377,8 +335,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) | |||
377 | int len = de->d_name.len; | 335 | int len = de->d_name.len; |
378 | int error; | 336 | int error; |
379 | 337 | ||
380 | lock_kernel(); | ||
381 | |||
382 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); | 338 | error = venus_rmdir(dir->i_sb, coda_i2f(dir), name, len); |
383 | if (!error) { | 339 | if (!error) { |
384 | /* VFS may delete the child */ | 340 | /* VFS may delete the child */ |
@@ -389,7 +345,6 @@ static int coda_rmdir(struct inode *dir, struct dentry *de) | |||
389 | coda_dir_drop_nlink(dir); | 345 | coda_dir_drop_nlink(dir); |
390 | coda_dir_update_mtime(dir); | 346 | coda_dir_update_mtime(dir); |
391 | } | 347 | } |
392 | unlock_kernel(); | ||
393 | return error; | 348 | return error; |
394 | } | 349 | } |
395 | 350 | ||
@@ -403,15 +358,12 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
403 | int new_length = new_dentry->d_name.len; | 358 | int new_length = new_dentry->d_name.len; |
404 | int error; | 359 | int error; |
405 | 360 | ||
406 | lock_kernel(); | ||
407 | |||
408 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), | 361 | error = venus_rename(old_dir->i_sb, coda_i2f(old_dir), |
409 | coda_i2f(new_dir), old_length, new_length, | 362 | coda_i2f(new_dir), old_length, new_length, |
410 | (const char *) old_name, (const char *)new_name); | 363 | (const char *) old_name, (const char *)new_name); |
411 | 364 | if (!error) { | |
412 | if ( !error ) { | 365 | if (new_dentry->d_inode) { |
413 | if ( new_dentry->d_inode ) { | 366 | if (S_ISDIR(new_dentry->d_inode->i_mode)) { |
414 | if ( S_ISDIR(new_dentry->d_inode->i_mode) ) { | ||
415 | coda_dir_drop_nlink(old_dir); | 367 | coda_dir_drop_nlink(old_dir); |
416 | coda_dir_inc_nlink(new_dir); | 368 | coda_dir_inc_nlink(new_dir); |
417 | } | 369 | } |
@@ -423,8 +375,6 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
423 | coda_flag_inode(new_dir, C_VATTR); | 375 | coda_flag_inode(new_dir, C_VATTR); |
424 | } | 376 | } |
425 | } | 377 | } |
426 | unlock_kernel(); | ||
427 | |||
428 | return error; | 378 | return error; |
429 | } | 379 | } |
430 | 380 | ||
@@ -594,10 +544,7 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) | |||
594 | struct inode *inode = de->d_inode; | 544 | struct inode *inode = de->d_inode; |
595 | struct coda_inode_info *cii; | 545 | struct coda_inode_info *cii; |
596 | 546 | ||
597 | if (!inode) | 547 | if (!inode || coda_isroot(inode)) |
598 | return 1; | ||
599 | lock_kernel(); | ||
600 | if (coda_isroot(inode)) | ||
601 | goto out; | 548 | goto out; |
602 | if (is_bad_inode(inode)) | 549 | if (is_bad_inode(inode)) |
603 | goto bad; | 550 | goto bad; |
@@ -617,13 +564,12 @@ static int coda_dentry_revalidate(struct dentry *de, struct nameidata *nd) | |||
617 | goto out; | 564 | goto out; |
618 | 565 | ||
619 | /* clear the flags. */ | 566 | /* clear the flags. */ |
567 | spin_lock(&cii->c_lock); | ||
620 | cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); | 568 | cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); |
621 | 569 | spin_unlock(&cii->c_lock); | |
622 | bad: | 570 | bad: |
623 | unlock_kernel(); | ||
624 | return 0; | 571 | return 0; |
625 | out: | 572 | out: |
626 | unlock_kernel(); | ||
627 | return 1; | 573 | return 1; |
628 | } | 574 | } |
629 | 575 | ||
@@ -656,20 +602,19 @@ static int coda_dentry_delete(struct dentry * dentry) | |||
656 | int coda_revalidate_inode(struct dentry *dentry) | 602 | int coda_revalidate_inode(struct dentry *dentry) |
657 | { | 603 | { |
658 | struct coda_vattr attr; | 604 | struct coda_vattr attr; |
659 | int error = 0; | 605 | int error; |
660 | int old_mode; | 606 | int old_mode; |
661 | ino_t old_ino; | 607 | ino_t old_ino; |
662 | struct inode *inode = dentry->d_inode; | 608 | struct inode *inode = dentry->d_inode; |
663 | struct coda_inode_info *cii = ITOC(inode); | 609 | struct coda_inode_info *cii = ITOC(inode); |
664 | 610 | ||
665 | lock_kernel(); | 611 | if (!cii->c_flags) |
666 | if ( !cii->c_flags ) | 612 | return 0; |
667 | goto ok; | ||
668 | 613 | ||
669 | if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) { | 614 | if (cii->c_flags & (C_VATTR | C_PURGE | C_FLUSH)) { |
670 | error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr); | 615 | error = venus_getattr(inode->i_sb, &(cii->c_fid), &attr); |
671 | if ( error ) | 616 | if (error) |
672 | goto return_bad; | 617 | return -EIO; |
673 | 618 | ||
674 | /* this inode may be lost if: | 619 | /* this inode may be lost if: |
675 | - it's ino changed | 620 | - it's ino changed |
@@ -688,17 +633,13 @@ int coda_revalidate_inode(struct dentry *dentry) | |||
688 | /* the following can happen when a local fid is replaced | 633 | /* the following can happen when a local fid is replaced |
689 | with a global one, here we lose and declare the inode bad */ | 634 | with a global one, here we lose and declare the inode bad */ |
690 | if (inode->i_ino != old_ino) | 635 | if (inode->i_ino != old_ino) |
691 | goto return_bad; | 636 | return -EIO; |
692 | 637 | ||
693 | coda_flag_inode_children(inode, C_FLUSH); | 638 | coda_flag_inode_children(inode, C_FLUSH); |
639 | |||
640 | spin_lock(&cii->c_lock); | ||
694 | cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); | 641 | cii->c_flags &= ~(C_VATTR | C_PURGE | C_FLUSH); |
642 | spin_unlock(&cii->c_lock); | ||
695 | } | 643 | } |
696 | |||
697 | ok: | ||
698 | unlock_kernel(); | ||
699 | return 0; | 644 | return 0; |
700 | |||
701 | return_bad: | ||
702 | unlock_kernel(); | ||
703 | return -EIO; | ||
704 | } | 645 | } |
diff --git a/fs/coda/file.c b/fs/coda/file.c index ad3cd2abeeb4..c8b50ba4366a 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c | |||
@@ -15,7 +15,7 @@ | |||
15 | #include <linux/stat.h> | 15 | #include <linux/stat.h> |
16 | #include <linux/cred.h> | 16 | #include <linux/cred.h> |
17 | #include <linux/errno.h> | 17 | #include <linux/errno.h> |
18 | #include <linux/smp_lock.h> | 18 | #include <linux/spinlock.h> |
19 | #include <linux/string.h> | 19 | #include <linux/string.h> |
20 | #include <linux/slab.h> | 20 | #include <linux/slab.h> |
21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
@@ -109,19 +109,24 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) | |||
109 | 109 | ||
110 | coda_inode = coda_file->f_path.dentry->d_inode; | 110 | coda_inode = coda_file->f_path.dentry->d_inode; |
111 | host_inode = host_file->f_path.dentry->d_inode; | 111 | host_inode = host_file->f_path.dentry->d_inode; |
112 | |||
113 | cii = ITOC(coda_inode); | ||
114 | spin_lock(&cii->c_lock); | ||
112 | coda_file->f_mapping = host_file->f_mapping; | 115 | coda_file->f_mapping = host_file->f_mapping; |
113 | if (coda_inode->i_mapping == &coda_inode->i_data) | 116 | if (coda_inode->i_mapping == &coda_inode->i_data) |
114 | coda_inode->i_mapping = host_inode->i_mapping; | 117 | coda_inode->i_mapping = host_inode->i_mapping; |
115 | 118 | ||
116 | /* only allow additional mmaps as long as userspace isn't changing | 119 | /* only allow additional mmaps as long as userspace isn't changing |
117 | * the container file on us! */ | 120 | * the container file on us! */ |
118 | else if (coda_inode->i_mapping != host_inode->i_mapping) | 121 | else if (coda_inode->i_mapping != host_inode->i_mapping) { |
122 | spin_unlock(&cii->c_lock); | ||
119 | return -EBUSY; | 123 | return -EBUSY; |
124 | } | ||
120 | 125 | ||
121 | /* keep track of how often the coda_inode/host_file has been mmapped */ | 126 | /* keep track of how often the coda_inode/host_file has been mmapped */ |
122 | cii = ITOC(coda_inode); | ||
123 | cii->c_mapcount++; | 127 | cii->c_mapcount++; |
124 | cfi->cfi_mapcount++; | 128 | cfi->cfi_mapcount++; |
129 | spin_unlock(&cii->c_lock); | ||
125 | 130 | ||
126 | return host_file->f_op->mmap(host_file, vma); | 131 | return host_file->f_op->mmap(host_file, vma); |
127 | } | 132 | } |
@@ -138,8 +143,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) | |||
138 | if (!cfi) | 143 | if (!cfi) |
139 | return -ENOMEM; | 144 | return -ENOMEM; |
140 | 145 | ||
141 | lock_kernel(); | ||
142 | |||
143 | error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, | 146 | error = venus_open(coda_inode->i_sb, coda_i2f(coda_inode), coda_flags, |
144 | &host_file); | 147 | &host_file); |
145 | if (!host_file) | 148 | if (!host_file) |
@@ -147,7 +150,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) | |||
147 | 150 | ||
148 | if (error) { | 151 | if (error) { |
149 | kfree(cfi); | 152 | kfree(cfi); |
150 | unlock_kernel(); | ||
151 | return error; | 153 | return error; |
152 | } | 154 | } |
153 | 155 | ||
@@ -159,8 +161,6 @@ int coda_open(struct inode *coda_inode, struct file *coda_file) | |||
159 | 161 | ||
160 | BUG_ON(coda_file->private_data != NULL); | 162 | BUG_ON(coda_file->private_data != NULL); |
161 | coda_file->private_data = cfi; | 163 | coda_file->private_data = cfi; |
162 | |||
163 | unlock_kernel(); | ||
164 | return 0; | 164 | return 0; |
165 | } | 165 | } |
166 | 166 | ||
@@ -171,9 +171,7 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) | |||
171 | struct coda_file_info *cfi; | 171 | struct coda_file_info *cfi; |
172 | struct coda_inode_info *cii; | 172 | struct coda_inode_info *cii; |
173 | struct inode *host_inode; | 173 | struct inode *host_inode; |
174 | int err = 0; | 174 | int err; |
175 | |||
176 | lock_kernel(); | ||
177 | 175 | ||
178 | cfi = CODA_FTOC(coda_file); | 176 | cfi = CODA_FTOC(coda_file); |
179 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); | 177 | BUG_ON(!cfi || cfi->cfi_magic != CODA_MAGIC); |
@@ -185,18 +183,18 @@ int coda_release(struct inode *coda_inode, struct file *coda_file) | |||
185 | cii = ITOC(coda_inode); | 183 | cii = ITOC(coda_inode); |
186 | 184 | ||
187 | /* did we mmap this file? */ | 185 | /* did we mmap this file? */ |
186 | spin_lock(&cii->c_lock); | ||
188 | if (coda_inode->i_mapping == &host_inode->i_data) { | 187 | if (coda_inode->i_mapping == &host_inode->i_data) { |
189 | cii->c_mapcount -= cfi->cfi_mapcount; | 188 | cii->c_mapcount -= cfi->cfi_mapcount; |
190 | if (!cii->c_mapcount) | 189 | if (!cii->c_mapcount) |
191 | coda_inode->i_mapping = &coda_inode->i_data; | 190 | coda_inode->i_mapping = &coda_inode->i_data; |
192 | } | 191 | } |
192 | spin_unlock(&cii->c_lock); | ||
193 | 193 | ||
194 | fput(cfi->cfi_container); | 194 | fput(cfi->cfi_container); |
195 | kfree(coda_file->private_data); | 195 | kfree(coda_file->private_data); |
196 | coda_file->private_data = NULL; | 196 | coda_file->private_data = NULL; |
197 | 197 | ||
198 | unlock_kernel(); | ||
199 | |||
200 | /* VFS fput ignores the return value from file_operations->release, so | 198 | /* VFS fput ignores the return value from file_operations->release, so |
201 | * there is no use returning an error here */ | 199 | * there is no use returning an error here */ |
202 | return 0; | 200 | return 0; |
@@ -207,7 +205,7 @@ int coda_fsync(struct file *coda_file, int datasync) | |||
207 | struct file *host_file; | 205 | struct file *host_file; |
208 | struct inode *coda_inode = coda_file->f_path.dentry->d_inode; | 206 | struct inode *coda_inode = coda_file->f_path.dentry->d_inode; |
209 | struct coda_file_info *cfi; | 207 | struct coda_file_info *cfi; |
210 | int err = 0; | 208 | int err; |
211 | 209 | ||
212 | if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) || | 210 | if (!(S_ISREG(coda_inode->i_mode) || S_ISDIR(coda_inode->i_mode) || |
213 | S_ISLNK(coda_inode->i_mode))) | 211 | S_ISLNK(coda_inode->i_mode))) |
@@ -218,11 +216,8 @@ int coda_fsync(struct file *coda_file, int datasync) | |||
218 | host_file = cfi->cfi_container; | 216 | host_file = cfi->cfi_container; |
219 | 217 | ||
220 | err = vfs_fsync(host_file, datasync); | 218 | err = vfs_fsync(host_file, datasync); |
221 | if ( !err && !datasync ) { | 219 | if (!err && !datasync) |
222 | lock_kernel(); | ||
223 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); | 220 | err = venus_fsync(coda_inode->i_sb, coda_i2f(coda_inode)); |
224 | unlock_kernel(); | ||
225 | } | ||
226 | 221 | ||
227 | return err; | 222 | return err; |
228 | } | 223 | } |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6526e6f21ecf..7993b96ca348 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -15,7 +15,8 @@ | |||
15 | #include <linux/stat.h> | 15 | #include <linux/stat.h> |
16 | #include <linux/errno.h> | 16 | #include <linux/errno.h> |
17 | #include <linux/unistd.h> | 17 | #include <linux/unistd.h> |
18 | #include <linux/smp_lock.h> | 18 | #include <linux/mutex.h> |
19 | #include <linux/spinlock.h> | ||
19 | #include <linux/file.h> | 20 | #include <linux/file.h> |
20 | #include <linux/vfs.h> | 21 | #include <linux/vfs.h> |
21 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
@@ -51,6 +52,7 @@ static struct inode *coda_alloc_inode(struct super_block *sb) | |||
51 | ei->c_flags = 0; | 52 | ei->c_flags = 0; |
52 | ei->c_uid = 0; | 53 | ei->c_uid = 0; |
53 | ei->c_cached_perm = 0; | 54 | ei->c_cached_perm = 0; |
55 | spin_lock_init(&ei->c_lock); | ||
54 | return &ei->vfs_inode; | 56 | return &ei->vfs_inode; |
55 | } | 57 | } |
56 | 58 | ||
@@ -143,7 +145,7 @@ static int get_device_index(struct coda_mount_data *data) | |||
143 | static int coda_fill_super(struct super_block *sb, void *data, int silent) | 145 | static int coda_fill_super(struct super_block *sb, void *data, int silent) |
144 | { | 146 | { |
145 | struct inode *root = NULL; | 147 | struct inode *root = NULL; |
146 | struct venus_comm *vc = NULL; | 148 | struct venus_comm *vc; |
147 | struct CodaFid fid; | 149 | struct CodaFid fid; |
148 | int error; | 150 | int error; |
149 | int idx; | 151 | int idx; |
@@ -157,21 +159,26 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
157 | printk(KERN_INFO "coda_read_super: device index: %i\n", idx); | 159 | printk(KERN_INFO "coda_read_super: device index: %i\n", idx); |
158 | 160 | ||
159 | vc = &coda_comms[idx]; | 161 | vc = &coda_comms[idx]; |
162 | mutex_lock(&vc->vc_mutex); | ||
163 | |||
160 | if (!vc->vc_inuse) { | 164 | if (!vc->vc_inuse) { |
161 | printk("coda_read_super: No pseudo device\n"); | 165 | printk("coda_read_super: No pseudo device\n"); |
162 | return -EINVAL; | 166 | error = -EINVAL; |
167 | goto unlock_out; | ||
163 | } | 168 | } |
164 | 169 | ||
165 | if ( vc->vc_sb ) { | 170 | if (vc->vc_sb) { |
166 | printk("coda_read_super: Device already mounted\n"); | 171 | printk("coda_read_super: Device already mounted\n"); |
167 | return -EBUSY; | 172 | error = -EBUSY; |
173 | goto unlock_out; | ||
168 | } | 174 | } |
169 | 175 | ||
170 | error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); | 176 | error = bdi_setup_and_register(&vc->bdi, "coda", BDI_CAP_MAP_COPY); |
171 | if (error) | 177 | if (error) |
172 | goto bdi_err; | 178 | goto unlock_out; |
173 | 179 | ||
174 | vc->vc_sb = sb; | 180 | vc->vc_sb = sb; |
181 | mutex_unlock(&vc->vc_mutex); | ||
175 | 182 | ||
176 | sb->s_fs_info = vc; | 183 | sb->s_fs_info = vc; |
177 | sb->s_flags |= MS_NOATIME; | 184 | sb->s_flags |= MS_NOATIME; |
@@ -200,26 +207,33 @@ static int coda_fill_super(struct super_block *sb, void *data, int silent) | |||
200 | printk("coda_read_super: rootinode is %ld dev %s\n", | 207 | printk("coda_read_super: rootinode is %ld dev %s\n", |
201 | root->i_ino, root->i_sb->s_id); | 208 | root->i_ino, root->i_sb->s_id); |
202 | sb->s_root = d_alloc_root(root); | 209 | sb->s_root = d_alloc_root(root); |
203 | if (!sb->s_root) | 210 | if (!sb->s_root) { |
211 | error = -EINVAL; | ||
204 | goto error; | 212 | goto error; |
205 | return 0; | 213 | } |
214 | return 0; | ||
206 | 215 | ||
207 | error: | 216 | error: |
208 | bdi_destroy(&vc->bdi); | ||
209 | bdi_err: | ||
210 | if (root) | 217 | if (root) |
211 | iput(root); | 218 | iput(root); |
212 | if (vc) | ||
213 | vc->vc_sb = NULL; | ||
214 | 219 | ||
215 | return -EINVAL; | 220 | mutex_lock(&vc->vc_mutex); |
221 | bdi_destroy(&vc->bdi); | ||
222 | vc->vc_sb = NULL; | ||
223 | sb->s_fs_info = NULL; | ||
224 | unlock_out: | ||
225 | mutex_unlock(&vc->vc_mutex); | ||
226 | return error; | ||
216 | } | 227 | } |
217 | 228 | ||
218 | static void coda_put_super(struct super_block *sb) | 229 | static void coda_put_super(struct super_block *sb) |
219 | { | 230 | { |
220 | bdi_destroy(&coda_vcp(sb)->bdi); | 231 | struct venus_comm *vcp = coda_vcp(sb); |
221 | coda_vcp(sb)->vc_sb = NULL; | 232 | mutex_lock(&vcp->vc_mutex); |
233 | bdi_destroy(&vcp->bdi); | ||
234 | vcp->vc_sb = NULL; | ||
222 | sb->s_fs_info = NULL; | 235 | sb->s_fs_info = NULL; |
236 | mutex_unlock(&vcp->vc_mutex); | ||
223 | 237 | ||
224 | printk("Coda: Bye bye.\n"); | 238 | printk("Coda: Bye bye.\n"); |
225 | } | 239 | } |
@@ -245,8 +259,6 @@ int coda_setattr(struct dentry *de, struct iattr *iattr) | |||
245 | struct coda_vattr vattr; | 259 | struct coda_vattr vattr; |
246 | int error; | 260 | int error; |
247 | 261 | ||
248 | lock_kernel(); | ||
249 | |||
250 | memset(&vattr, 0, sizeof(vattr)); | 262 | memset(&vattr, 0, sizeof(vattr)); |
251 | 263 | ||
252 | inode->i_ctime = CURRENT_TIME_SEC; | 264 | inode->i_ctime = CURRENT_TIME_SEC; |
@@ -256,13 +268,10 @@ int coda_setattr(struct dentry *de, struct iattr *iattr) | |||
256 | /* Venus is responsible for truncating the container-file!!! */ | 268 | /* Venus is responsible for truncating the container-file!!! */ |
257 | error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr); | 269 | error = venus_setattr(inode->i_sb, coda_i2f(inode), &vattr); |
258 | 270 | ||
259 | if ( !error ) { | 271 | if (!error) { |
260 | coda_vattr_to_iattr(inode, &vattr); | 272 | coda_vattr_to_iattr(inode, &vattr); |
261 | coda_cache_clear_inode(inode); | 273 | coda_cache_clear_inode(inode); |
262 | } | 274 | } |
263 | |||
264 | unlock_kernel(); | ||
265 | |||
266 | return error; | 275 | return error; |
267 | } | 276 | } |
268 | 277 | ||
@@ -276,12 +285,8 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
276 | { | 285 | { |
277 | int error; | 286 | int error; |
278 | 287 | ||
279 | lock_kernel(); | ||
280 | |||
281 | error = venus_statfs(dentry, buf); | 288 | error = venus_statfs(dentry, buf); |
282 | 289 | ||
283 | unlock_kernel(); | ||
284 | |||
285 | if (error) { | 290 | if (error) { |
286 | /* fake something like AFS does */ | 291 | /* fake something like AFS does */ |
287 | buf->f_blocks = 9000000; | 292 | buf->f_blocks = 9000000; |
diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index ca25d96d45c9..2fd89b5c5c7b 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c | |||
@@ -23,8 +23,6 @@ | |||
23 | #include <linux/coda_fs_i.h> | 23 | #include <linux/coda_fs_i.h> |
24 | #include <linux/coda_psdev.h> | 24 | #include <linux/coda_psdev.h> |
25 | 25 | ||
26 | #include <linux/smp_lock.h> | ||
27 | |||
28 | /* pioctl ops */ | 26 | /* pioctl ops */ |
29 | static int coda_ioctl_permission(struct inode *inode, int mask); | 27 | static int coda_ioctl_permission(struct inode *inode, int mask); |
30 | static long coda_pioctl(struct file *filp, unsigned int cmd, | 28 | static long coda_pioctl(struct file *filp, unsigned int cmd, |
@@ -39,6 +37,7 @@ const struct inode_operations coda_ioctl_inode_operations = { | |||
39 | const struct file_operations coda_ioctl_operations = { | 37 | const struct file_operations coda_ioctl_operations = { |
40 | .owner = THIS_MODULE, | 38 | .owner = THIS_MODULE, |
41 | .unlocked_ioctl = coda_pioctl, | 39 | .unlocked_ioctl = coda_pioctl, |
40 | .llseek = noop_llseek, | ||
42 | }; | 41 | }; |
43 | 42 | ||
44 | /* the coda pioctl inode ops */ | 43 | /* the coda pioctl inode ops */ |
@@ -57,13 +56,9 @@ static long coda_pioctl(struct file *filp, unsigned int cmd, | |||
57 | struct inode *target_inode = NULL; | 56 | struct inode *target_inode = NULL; |
58 | struct coda_inode_info *cnp; | 57 | struct coda_inode_info *cnp; |
59 | 58 | ||
60 | lock_kernel(); | ||
61 | |||
62 | /* get the Pioctl data arguments from user space */ | 59 | /* get the Pioctl data arguments from user space */ |
63 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) { | 60 | if (copy_from_user(&data, (void __user *)user_data, sizeof(data))) |
64 | error = -EINVAL; | 61 | return -EINVAL; |
65 | goto out; | ||
66 | } | ||
67 | 62 | ||
68 | /* | 63 | /* |
69 | * Look up the pathname. Note that the pathname is in | 64 | * Look up the pathname. Note that the pathname is in |
@@ -75,13 +70,12 @@ static long coda_pioctl(struct file *filp, unsigned int cmd, | |||
75 | error = user_lpath(data.path, &path); | 70 | error = user_lpath(data.path, &path); |
76 | 71 | ||
77 | if (error) | 72 | if (error) |
78 | goto out; | 73 | return error; |
79 | else | 74 | |
80 | target_inode = path.dentry->d_inode; | 75 | target_inode = path.dentry->d_inode; |
81 | 76 | ||
82 | /* return if it is not a Coda inode */ | 77 | /* return if it is not a Coda inode */ |
83 | if (target_inode->i_sb != inode->i_sb) { | 78 | if (target_inode->i_sb != inode->i_sb) { |
84 | path_put(&path); | ||
85 | error = -EINVAL; | 79 | error = -EINVAL; |
86 | goto out; | 80 | goto out; |
87 | } | 81 | } |
@@ -90,10 +84,7 @@ static long coda_pioctl(struct file *filp, unsigned int cmd, | |||
90 | cnp = ITOC(target_inode); | 84 | cnp = ITOC(target_inode); |
91 | 85 | ||
92 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); | 86 | error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); |
93 | |||
94 | path_put(&path); | ||
95 | |||
96 | out: | 87 | out: |
97 | unlock_kernel(); | 88 | path_put(&path); |
98 | return error; | 89 | return error; |
99 | } | 90 | } |
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 116af7546cf0..62647a8595e4 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
@@ -35,7 +35,7 @@ | |||
35 | #include <linux/poll.h> | 35 | #include <linux/poll.h> |
36 | #include <linux/init.h> | 36 | #include <linux/init.h> |
37 | #include <linux/list.h> | 37 | #include <linux/list.h> |
38 | #include <linux/smp_lock.h> | 38 | #include <linux/mutex.h> |
39 | #include <linux/device.h> | 39 | #include <linux/device.h> |
40 | #include <asm/io.h> | 40 | #include <asm/io.h> |
41 | #include <asm/system.h> | 41 | #include <asm/system.h> |
@@ -67,8 +67,10 @@ static unsigned int coda_psdev_poll(struct file *file, poll_table * wait) | |||
67 | unsigned int mask = POLLOUT | POLLWRNORM; | 67 | unsigned int mask = POLLOUT | POLLWRNORM; |
68 | 68 | ||
69 | poll_wait(file, &vcp->vc_waitq, wait); | 69 | poll_wait(file, &vcp->vc_waitq, wait); |
70 | mutex_lock(&vcp->vc_mutex); | ||
70 | if (!list_empty(&vcp->vc_pending)) | 71 | if (!list_empty(&vcp->vc_pending)) |
71 | mask |= POLLIN | POLLRDNORM; | 72 | mask |= POLLIN | POLLRDNORM; |
73 | mutex_unlock(&vcp->vc_mutex); | ||
72 | 74 | ||
73 | return mask; | 75 | return mask; |
74 | } | 76 | } |
@@ -108,16 +110,9 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
108 | return -EFAULT; | 110 | return -EFAULT; |
109 | 111 | ||
110 | if (DOWNCALL(hdr.opcode)) { | 112 | if (DOWNCALL(hdr.opcode)) { |
111 | struct super_block *sb = NULL; | 113 | union outputArgs *dcbuf; |
112 | union outputArgs *dcbuf; | ||
113 | int size = sizeof(*dcbuf); | 114 | int size = sizeof(*dcbuf); |
114 | 115 | ||
115 | sb = vcp->vc_sb; | ||
116 | if ( !sb ) { | ||
117 | count = nbytes; | ||
118 | goto out; | ||
119 | } | ||
120 | |||
121 | if ( nbytes < sizeof(struct coda_out_hdr) ) { | 116 | if ( nbytes < sizeof(struct coda_out_hdr) ) { |
122 | printk("coda_downcall opc %d uniq %d, not enough!\n", | 117 | printk("coda_downcall opc %d uniq %d, not enough!\n", |
123 | hdr.opcode, hdr.unique); | 118 | hdr.opcode, hdr.unique); |
@@ -137,9 +132,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
137 | } | 132 | } |
138 | 133 | ||
139 | /* what downcall errors does Venus handle ? */ | 134 | /* what downcall errors does Venus handle ? */ |
140 | lock_kernel(); | 135 | error = coda_downcall(vcp, hdr.opcode, dcbuf); |
141 | error = coda_downcall(hdr.opcode, dcbuf, sb); | ||
142 | unlock_kernel(); | ||
143 | 136 | ||
144 | CODA_FREE(dcbuf, nbytes); | 137 | CODA_FREE(dcbuf, nbytes); |
145 | if (error) { | 138 | if (error) { |
@@ -152,7 +145,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
152 | } | 145 | } |
153 | 146 | ||
154 | /* Look for the message on the processing queue. */ | 147 | /* Look for the message on the processing queue. */ |
155 | lock_kernel(); | 148 | mutex_lock(&vcp->vc_mutex); |
156 | list_for_each(lh, &vcp->vc_processing) { | 149 | list_for_each(lh, &vcp->vc_processing) { |
157 | tmp = list_entry(lh, struct upc_req , uc_chain); | 150 | tmp = list_entry(lh, struct upc_req , uc_chain); |
158 | if (tmp->uc_unique == hdr.unique) { | 151 | if (tmp->uc_unique == hdr.unique) { |
@@ -161,7 +154,7 @@ static ssize_t coda_psdev_write(struct file *file, const char __user *buf, | |||
161 | break; | 154 | break; |
162 | } | 155 | } |
163 | } | 156 | } |
164 | unlock_kernel(); | 157 | mutex_unlock(&vcp->vc_mutex); |
165 | 158 | ||
166 | if (!req) { | 159 | if (!req) { |
167 | printk("psdev_write: msg (%d, %d) not found\n", | 160 | printk("psdev_write: msg (%d, %d) not found\n", |
@@ -216,7 +209,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, | |||
216 | if (nbytes == 0) | 209 | if (nbytes == 0) |
217 | return 0; | 210 | return 0; |
218 | 211 | ||
219 | lock_kernel(); | 212 | mutex_lock(&vcp->vc_mutex); |
220 | 213 | ||
221 | add_wait_queue(&vcp->vc_waitq, &wait); | 214 | add_wait_queue(&vcp->vc_waitq, &wait); |
222 | set_current_state(TASK_INTERRUPTIBLE); | 215 | set_current_state(TASK_INTERRUPTIBLE); |
@@ -230,7 +223,9 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, | |||
230 | retval = -ERESTARTSYS; | 223 | retval = -ERESTARTSYS; |
231 | break; | 224 | break; |
232 | } | 225 | } |
226 | mutex_unlock(&vcp->vc_mutex); | ||
233 | schedule(); | 227 | schedule(); |
228 | mutex_lock(&vcp->vc_mutex); | ||
234 | } | 229 | } |
235 | 230 | ||
236 | set_current_state(TASK_RUNNING); | 231 | set_current_state(TASK_RUNNING); |
@@ -263,7 +258,7 @@ static ssize_t coda_psdev_read(struct file * file, char __user * buf, | |||
263 | CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); | 258 | CODA_FREE(req->uc_data, sizeof(struct coda_in_hdr)); |
264 | kfree(req); | 259 | kfree(req); |
265 | out: | 260 | out: |
266 | unlock_kernel(); | 261 | mutex_unlock(&vcp->vc_mutex); |
267 | return (count ? count : retval); | 262 | return (count ? count : retval); |
268 | } | 263 | } |
269 | 264 | ||
@@ -276,10 +271,10 @@ static int coda_psdev_open(struct inode * inode, struct file * file) | |||
276 | if (idx < 0 || idx >= MAX_CODADEVS) | 271 | if (idx < 0 || idx >= MAX_CODADEVS) |
277 | return -ENODEV; | 272 | return -ENODEV; |
278 | 273 | ||
279 | lock_kernel(); | ||
280 | |||
281 | err = -EBUSY; | 274 | err = -EBUSY; |
282 | vcp = &coda_comms[idx]; | 275 | vcp = &coda_comms[idx]; |
276 | mutex_lock(&vcp->vc_mutex); | ||
277 | |||
283 | if (!vcp->vc_inuse) { | 278 | if (!vcp->vc_inuse) { |
284 | vcp->vc_inuse++; | 279 | vcp->vc_inuse++; |
285 | 280 | ||
@@ -293,7 +288,7 @@ static int coda_psdev_open(struct inode * inode, struct file * file) | |||
293 | err = 0; | 288 | err = 0; |
294 | } | 289 | } |
295 | 290 | ||
296 | unlock_kernel(); | 291 | mutex_unlock(&vcp->vc_mutex); |
297 | return err; | 292 | return err; |
298 | } | 293 | } |
299 | 294 | ||
@@ -308,7 +303,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file) | |||
308 | return -1; | 303 | return -1; |
309 | } | 304 | } |
310 | 305 | ||
311 | lock_kernel(); | 306 | mutex_lock(&vcp->vc_mutex); |
312 | 307 | ||
313 | /* Wakeup clients so they can return. */ | 308 | /* Wakeup clients so they can return. */ |
314 | list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) { | 309 | list_for_each_entry_safe(req, tmp, &vcp->vc_pending, uc_chain) { |
@@ -333,7 +328,7 @@ static int coda_psdev_release(struct inode * inode, struct file * file) | |||
333 | 328 | ||
334 | file->private_data = NULL; | 329 | file->private_data = NULL; |
335 | vcp->vc_inuse--; | 330 | vcp->vc_inuse--; |
336 | unlock_kernel(); | 331 | mutex_unlock(&vcp->vc_mutex); |
337 | return 0; | 332 | return 0; |
338 | } | 333 | } |
339 | 334 | ||
@@ -346,6 +341,7 @@ static const struct file_operations coda_psdev_fops = { | |||
346 | .unlocked_ioctl = coda_psdev_ioctl, | 341 | .unlocked_ioctl = coda_psdev_ioctl, |
347 | .open = coda_psdev_open, | 342 | .open = coda_psdev_open, |
348 | .release = coda_psdev_release, | 343 | .release = coda_psdev_release, |
344 | .llseek = noop_llseek, | ||
349 | }; | 345 | }; |
350 | 346 | ||
351 | static int init_coda_psdev(void) | 347 | static int init_coda_psdev(void) |
@@ -361,9 +357,11 @@ static int init_coda_psdev(void) | |||
361 | err = PTR_ERR(coda_psdev_class); | 357 | err = PTR_ERR(coda_psdev_class); |
362 | goto out_chrdev; | 358 | goto out_chrdev; |
363 | } | 359 | } |
364 | for (i = 0; i < MAX_CODADEVS; i++) | 360 | for (i = 0; i < MAX_CODADEVS; i++) { |
361 | mutex_init(&(&coda_comms[i])->vc_mutex); | ||
365 | device_create(coda_psdev_class, NULL, | 362 | device_create(coda_psdev_class, NULL, |
366 | MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); | 363 | MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); |
364 | } | ||
367 | coda_sysctl_init(); | 365 | coda_sysctl_init(); |
368 | goto out; | 366 | goto out; |
369 | 367 | ||
diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index 4513b7258458..af78f007a2b0 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/stat.h> | 14 | #include <linux/stat.h> |
15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
16 | #include <linux/pagemap.h> | 16 | #include <linux/pagemap.h> |
17 | #include <linux/smp_lock.h> | ||
18 | 17 | ||
19 | #include <linux/coda.h> | 18 | #include <linux/coda.h> |
20 | #include <linux/coda_linux.h> | 19 | #include <linux/coda_linux.h> |
@@ -29,11 +28,9 @@ static int coda_symlink_filler(struct file *file, struct page *page) | |||
29 | unsigned int len = PAGE_SIZE; | 28 | unsigned int len = PAGE_SIZE; |
30 | char *p = kmap(page); | 29 | char *p = kmap(page); |
31 | 30 | ||
32 | lock_kernel(); | ||
33 | cii = ITOC(inode); | 31 | cii = ITOC(inode); |
34 | 32 | ||
35 | error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); | 33 | error = venus_readlink(inode->i_sb, &cii->c_fid, p, &len); |
36 | unlock_kernel(); | ||
37 | if (error) | 34 | if (error) |
38 | goto fail; | 35 | goto fail; |
39 | SetPageUptodate(page); | 36 | SetPageUptodate(page); |
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index b8893ab6f9e6..c3563cab9758 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <linux/errno.h> | 27 | #include <linux/errno.h> |
28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
30 | #include <linux/mutex.h> | ||
30 | #include <asm/uaccess.h> | 31 | #include <asm/uaccess.h> |
31 | #include <linux/vmalloc.h> | 32 | #include <linux/vmalloc.h> |
32 | #include <linux/vfs.h> | 33 | #include <linux/vfs.h> |
@@ -606,7 +607,8 @@ static void coda_unblock_signals(sigset_t *old) | |||
606 | (r)->uc_opcode != CODA_RELEASE) || \ | 607 | (r)->uc_opcode != CODA_RELEASE) || \ |
607 | (r)->uc_flags & CODA_REQ_READ)) | 608 | (r)->uc_flags & CODA_REQ_READ)) |
608 | 609 | ||
609 | static inline void coda_waitfor_upcall(struct upc_req *req) | 610 | static inline void coda_waitfor_upcall(struct venus_comm *vcp, |
611 | struct upc_req *req) | ||
610 | { | 612 | { |
611 | DECLARE_WAITQUEUE(wait, current); | 613 | DECLARE_WAITQUEUE(wait, current); |
612 | unsigned long timeout = jiffies + coda_timeout * HZ; | 614 | unsigned long timeout = jiffies + coda_timeout * HZ; |
@@ -639,10 +641,12 @@ static inline void coda_waitfor_upcall(struct upc_req *req) | |||
639 | break; | 641 | break; |
640 | } | 642 | } |
641 | 643 | ||
644 | mutex_unlock(&vcp->vc_mutex); | ||
642 | if (blocked) | 645 | if (blocked) |
643 | schedule_timeout(HZ); | 646 | schedule_timeout(HZ); |
644 | else | 647 | else |
645 | schedule(); | 648 | schedule(); |
649 | mutex_lock(&vcp->vc_mutex); | ||
646 | } | 650 | } |
647 | if (blocked) | 651 | if (blocked) |
648 | coda_unblock_signals(&old); | 652 | coda_unblock_signals(&old); |
@@ -667,18 +671,23 @@ static int coda_upcall(struct venus_comm *vcp, | |||
667 | { | 671 | { |
668 | union outputArgs *out; | 672 | union outputArgs *out; |
669 | union inputArgs *sig_inputArgs; | 673 | union inputArgs *sig_inputArgs; |
670 | struct upc_req *req, *sig_req; | 674 | struct upc_req *req = NULL, *sig_req; |
671 | int error = 0; | 675 | int error; |
676 | |||
677 | mutex_lock(&vcp->vc_mutex); | ||
672 | 678 | ||
673 | if (!vcp->vc_inuse) { | 679 | if (!vcp->vc_inuse) { |
674 | printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n"); | 680 | printk(KERN_NOTICE "coda: Venus dead, not sending upcall\n"); |
675 | return -ENXIO; | 681 | error = -ENXIO; |
682 | goto exit; | ||
676 | } | 683 | } |
677 | 684 | ||
678 | /* Format the request message. */ | 685 | /* Format the request message. */ |
679 | req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); | 686 | req = kmalloc(sizeof(struct upc_req), GFP_KERNEL); |
680 | if (!req) | 687 | if (!req) { |
681 | return -ENOMEM; | 688 | error = -ENOMEM; |
689 | goto exit; | ||
690 | } | ||
682 | 691 | ||
683 | req->uc_data = (void *)buffer; | 692 | req->uc_data = (void *)buffer; |
684 | req->uc_flags = 0; | 693 | req->uc_flags = 0; |
@@ -705,7 +714,7 @@ static int coda_upcall(struct venus_comm *vcp, | |||
705 | * ENODEV. */ | 714 | * ENODEV. */ |
706 | 715 | ||
707 | /* Go to sleep. Wake up on signals only after the timeout. */ | 716 | /* Go to sleep. Wake up on signals only after the timeout. */ |
708 | coda_waitfor_upcall(req); | 717 | coda_waitfor_upcall(vcp, req); |
709 | 718 | ||
710 | /* Op went through, interrupt or not... */ | 719 | /* Op went through, interrupt or not... */ |
711 | if (req->uc_flags & CODA_REQ_WRITE) { | 720 | if (req->uc_flags & CODA_REQ_WRITE) { |
@@ -759,6 +768,7 @@ static int coda_upcall(struct venus_comm *vcp, | |||
759 | 768 | ||
760 | exit: | 769 | exit: |
761 | kfree(req); | 770 | kfree(req); |
771 | mutex_unlock(&vcp->vc_mutex); | ||
762 | return error; | 772 | return error; |
763 | } | 773 | } |
764 | 774 | ||
@@ -796,21 +806,24 @@ exit: | |||
796 | * | 806 | * |
797 | * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */ | 807 | * CODA_REPLACE -- replace one CodaFid with another throughout the name cache */ |
798 | 808 | ||
799 | int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) | 809 | int coda_downcall(struct venus_comm *vcp, int opcode, union outputArgs *out) |
800 | { | 810 | { |
801 | struct inode *inode = NULL; | 811 | struct inode *inode = NULL; |
802 | struct CodaFid *fid, *newfid; | 812 | struct CodaFid *fid = NULL, *newfid; |
813 | struct super_block *sb; | ||
803 | 814 | ||
804 | /* Handle invalidation requests. */ | 815 | /* Handle invalidation requests. */ |
805 | if ( !sb || !sb->s_root) | 816 | mutex_lock(&vcp->vc_mutex); |
806 | return 0; | 817 | sb = vcp->vc_sb; |
818 | if (!sb || !sb->s_root) | ||
819 | goto unlock_out; | ||
807 | 820 | ||
808 | switch (opcode) { | 821 | switch (opcode) { |
809 | case CODA_FLUSH: | 822 | case CODA_FLUSH: |
810 | coda_cache_clear_all(sb); | 823 | coda_cache_clear_all(sb); |
811 | shrink_dcache_sb(sb); | 824 | shrink_dcache_sb(sb); |
812 | if (sb->s_root->d_inode) | 825 | if (sb->s_root->d_inode) |
813 | coda_flag_inode(sb->s_root->d_inode, C_FLUSH); | 826 | coda_flag_inode(sb->s_root->d_inode, C_FLUSH); |
814 | break; | 827 | break; |
815 | 828 | ||
816 | case CODA_PURGEUSER: | 829 | case CODA_PURGEUSER: |
@@ -819,45 +832,53 @@ int coda_downcall(int opcode, union outputArgs * out, struct super_block *sb) | |||
819 | 832 | ||
820 | case CODA_ZAPDIR: | 833 | case CODA_ZAPDIR: |
821 | fid = &out->coda_zapdir.CodaFid; | 834 | fid = &out->coda_zapdir.CodaFid; |
822 | inode = coda_fid_to_inode(fid, sb); | ||
823 | if (inode) { | ||
824 | coda_flag_inode_children(inode, C_PURGE); | ||
825 | coda_flag_inode(inode, C_VATTR); | ||
826 | } | ||
827 | break; | 835 | break; |
828 | 836 | ||
829 | case CODA_ZAPFILE: | 837 | case CODA_ZAPFILE: |
830 | fid = &out->coda_zapfile.CodaFid; | 838 | fid = &out->coda_zapfile.CodaFid; |
831 | inode = coda_fid_to_inode(fid, sb); | ||
832 | if (inode) | ||
833 | coda_flag_inode(inode, C_VATTR); | ||
834 | break; | 839 | break; |
835 | 840 | ||
836 | case CODA_PURGEFID: | 841 | case CODA_PURGEFID: |
837 | fid = &out->coda_purgefid.CodaFid; | 842 | fid = &out->coda_purgefid.CodaFid; |
843 | break; | ||
844 | |||
845 | case CODA_REPLACE: | ||
846 | fid = &out->coda_replace.OldFid; | ||
847 | break; | ||
848 | } | ||
849 | if (fid) | ||
838 | inode = coda_fid_to_inode(fid, sb); | 850 | inode = coda_fid_to_inode(fid, sb); |
839 | if (inode) { | ||
840 | coda_flag_inode_children(inode, C_PURGE); | ||
841 | 851 | ||
842 | /* catch the dentries later if some are still busy */ | 852 | unlock_out: |
843 | coda_flag_inode(inode, C_PURGE); | 853 | mutex_unlock(&vcp->vc_mutex); |
844 | d_prune_aliases(inode); | ||
845 | 854 | ||
846 | } | 855 | if (!inode) |
856 | return 0; | ||
857 | |||
858 | switch (opcode) { | ||
859 | case CODA_ZAPDIR: | ||
860 | coda_flag_inode_children(inode, C_PURGE); | ||
861 | coda_flag_inode(inode, C_VATTR); | ||
862 | break; | ||
863 | |||
864 | case CODA_ZAPFILE: | ||
865 | coda_flag_inode(inode, C_VATTR); | ||
866 | break; | ||
867 | |||
868 | case CODA_PURGEFID: | ||
869 | coda_flag_inode_children(inode, C_PURGE); | ||
870 | |||
871 | /* catch the dentries later if some are still busy */ | ||
872 | coda_flag_inode(inode, C_PURGE); | ||
873 | d_prune_aliases(inode); | ||
847 | break; | 874 | break; |
848 | 875 | ||
849 | case CODA_REPLACE: | 876 | case CODA_REPLACE: |
850 | fid = &out->coda_replace.OldFid; | ||
851 | newfid = &out->coda_replace.NewFid; | 877 | newfid = &out->coda_replace.NewFid; |
852 | inode = coda_fid_to_inode(fid, sb); | 878 | coda_replace_fid(inode, fid, newfid); |
853 | if (inode) | ||
854 | coda_replace_fid(inode, fid, newfid); | ||
855 | break; | 879 | break; |
856 | } | 880 | } |
857 | 881 | iput(inode); | |
858 | if (inode) | ||
859 | iput(inode); | ||
860 | |||
861 | return 0; | 882 | return 0; |
862 | } | 883 | } |
863 | 884 | ||
diff --git a/fs/compat.c b/fs/compat.c index 718c7062aec1..f03abdadc401 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -1153,7 +1153,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, | |||
1153 | { | 1153 | { |
1154 | compat_ssize_t tot_len; | 1154 | compat_ssize_t tot_len; |
1155 | struct iovec iovstack[UIO_FASTIOV]; | 1155 | struct iovec iovstack[UIO_FASTIOV]; |
1156 | struct iovec *iov; | 1156 | struct iovec *iov = iovstack; |
1157 | ssize_t ret; | 1157 | ssize_t ret; |
1158 | io_fn_t fn; | 1158 | io_fn_t fn; |
1159 | iov_fn_t fnv; | 1159 | iov_fn_t fnv; |
@@ -1963,7 +1963,7 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | |||
1963 | } | 1963 | } |
1964 | #endif /* HAVE_SET_RESTORE_SIGMASK */ | 1964 | #endif /* HAVE_SET_RESTORE_SIGMASK */ |
1965 | 1965 | ||
1966 | #if defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE) | 1966 | #if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) |
1967 | /* Stuff for NFS server syscalls... */ | 1967 | /* Stuff for NFS server syscalls... */ |
1968 | struct compat_nfsctl_svc { | 1968 | struct compat_nfsctl_svc { |
1969 | u16 svc32_port; | 1969 | u16 svc32_port; |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 03e59aa318eb..d0ad09d57789 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -599,69 +599,6 @@ static int do_smb_getmountuid(unsigned int fd, unsigned int cmd, | |||
599 | #define HIDPGETCONNLIST _IOR('H', 210, int) | 599 | #define HIDPGETCONNLIST _IOR('H', 210, int) |
600 | #define HIDPGETCONNINFO _IOR('H', 211, int) | 600 | #define HIDPGETCONNINFO _IOR('H', 211, int) |
601 | 601 | ||
602 | #ifdef CONFIG_BLOCK | ||
603 | struct raw32_config_request | ||
604 | { | ||
605 | compat_int_t raw_minor; | ||
606 | __u64 block_major; | ||
607 | __u64 block_minor; | ||
608 | } __attribute__((packed)); | ||
609 | |||
610 | static int get_raw32_request(struct raw_config_request *req, struct raw32_config_request __user *user_req) | ||
611 | { | ||
612 | int ret; | ||
613 | |||
614 | if (!access_ok(VERIFY_READ, user_req, sizeof(struct raw32_config_request))) | ||
615 | return -EFAULT; | ||
616 | |||
617 | ret = __get_user(req->raw_minor, &user_req->raw_minor); | ||
618 | ret |= __get_user(req->block_major, &user_req->block_major); | ||
619 | ret |= __get_user(req->block_minor, &user_req->block_minor); | ||
620 | |||
621 | return ret ? -EFAULT : 0; | ||
622 | } | ||
623 | |||
624 | static int set_raw32_request(struct raw_config_request *req, struct raw32_config_request __user *user_req) | ||
625 | { | ||
626 | int ret; | ||
627 | |||
628 | if (!access_ok(VERIFY_WRITE, user_req, sizeof(struct raw32_config_request))) | ||
629 | return -EFAULT; | ||
630 | |||
631 | ret = __put_user(req->raw_minor, &user_req->raw_minor); | ||
632 | ret |= __put_user(req->block_major, &user_req->block_major); | ||
633 | ret |= __put_user(req->block_minor, &user_req->block_minor); | ||
634 | |||
635 | return ret ? -EFAULT : 0; | ||
636 | } | ||
637 | |||
638 | static int raw_ioctl(unsigned fd, unsigned cmd, | ||
639 | struct raw32_config_request __user *user_req) | ||
640 | { | ||
641 | int ret; | ||
642 | |||
643 | switch (cmd) { | ||
644 | case RAW_SETBIND: | ||
645 | default: { /* RAW_GETBIND */ | ||
646 | struct raw_config_request req; | ||
647 | mm_segment_t oldfs = get_fs(); | ||
648 | |||
649 | if ((ret = get_raw32_request(&req, user_req))) | ||
650 | return ret; | ||
651 | |||
652 | set_fs(KERNEL_DS); | ||
653 | ret = sys_ioctl(fd,cmd,(unsigned long)&req); | ||
654 | set_fs(oldfs); | ||
655 | |||
656 | if ((!ret) && (cmd == RAW_GETBIND)) { | ||
657 | ret = set_raw32_request(&req, user_req); | ||
658 | } | ||
659 | break; | ||
660 | } | ||
661 | } | ||
662 | return ret; | ||
663 | } | ||
664 | #endif /* CONFIG_BLOCK */ | ||
665 | 602 | ||
666 | struct serial_struct32 { | 603 | struct serial_struct32 { |
667 | compat_int_t type; | 604 | compat_int_t type; |
@@ -1262,9 +1199,6 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5) | |||
1262 | COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) | 1199 | COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS) |
1263 | COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) | 1200 | COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS) |
1264 | COMPATIBLE_IOCTL(OSS_GETVERSION) | 1201 | COMPATIBLE_IOCTL(OSS_GETVERSION) |
1265 | /* Raw devices */ | ||
1266 | COMPATIBLE_IOCTL(RAW_SETBIND) | ||
1267 | COMPATIBLE_IOCTL(RAW_GETBIND) | ||
1268 | /* SMB ioctls which do not need any translations */ | 1202 | /* SMB ioctls which do not need any translations */ |
1269 | COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) | 1203 | COMPATIBLE_IOCTL(SMB_IOC_NEWCONN) |
1270 | /* Watchdog */ | 1204 | /* Watchdog */ |
@@ -1523,10 +1457,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd, | |||
1523 | case MTIOCGET32: | 1457 | case MTIOCGET32: |
1524 | case MTIOCPOS32: | 1458 | case MTIOCPOS32: |
1525 | return mt_ioctl_trans(fd, cmd, argp); | 1459 | return mt_ioctl_trans(fd, cmd, argp); |
1526 | /* Raw devices */ | ||
1527 | case RAW_SETBIND: | ||
1528 | case RAW_GETBIND: | ||
1529 | return raw_ioctl(fd, cmd, argp); | ||
1530 | #endif | 1460 | #endif |
1531 | /* One SMB ioctl needs translations. */ | 1461 | /* One SMB ioctl needs translations. */ |
1532 | #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) | 1462 | #define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t) |
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index cf78d44a8d6a..253476d78ed8 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c | |||
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) | |||
135 | { | 135 | { |
136 | struct inode * inode = new_inode(configfs_sb); | 136 | struct inode * inode = new_inode(configfs_sb); |
137 | if (inode) { | 137 | if (inode) { |
138 | inode->i_ino = get_next_ino(); | ||
138 | inode->i_mapping->a_ops = &configfs_aops; | 139 | inode->i_mapping->a_ops = &configfs_aops; |
139 | inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; | 140 | inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; |
140 | inode->i_op = &configfs_inode_operations; | 141 | inode->i_op = &configfs_inode_operations; |
diff --git a/fs/dcache.c b/fs/dcache.c index 83293be48149..23702a9d4e6d 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = { | |||
67 | .age_limit = 45, | 67 | .age_limit = 45, |
68 | }; | 68 | }; |
69 | 69 | ||
70 | static void __d_free(struct dentry *dentry) | 70 | static struct percpu_counter nr_dentry __cacheline_aligned_in_smp; |
71 | static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp; | ||
72 | |||
73 | #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) | ||
74 | int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, | ||
75 | size_t *lenp, loff_t *ppos) | ||
76 | { | ||
77 | dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry); | ||
78 | dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); | ||
79 | return proc_dointvec(table, write, buffer, lenp, ppos); | ||
80 | } | ||
81 | #endif | ||
82 | |||
83 | static void __d_free(struct rcu_head *head) | ||
71 | { | 84 | { |
85 | struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); | ||
86 | |||
72 | WARN_ON(!list_empty(&dentry->d_alias)); | 87 | WARN_ON(!list_empty(&dentry->d_alias)); |
73 | if (dname_external(dentry)) | 88 | if (dname_external(dentry)) |
74 | kfree(dentry->d_name.name); | 89 | kfree(dentry->d_name.name); |
75 | kmem_cache_free(dentry_cache, dentry); | 90 | kmem_cache_free(dentry_cache, dentry); |
76 | } | 91 | } |
77 | 92 | ||
78 | static void d_callback(struct rcu_head *head) | ||
79 | { | ||
80 | struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu); | ||
81 | __d_free(dentry); | ||
82 | } | ||
83 | |||
84 | /* | 93 | /* |
85 | * no dcache_lock, please. The caller must decrement dentry_stat.nr_dentry | 94 | * no dcache_lock, please. |
86 | * inside dcache_lock. | ||
87 | */ | 95 | */ |
88 | static void d_free(struct dentry *dentry) | 96 | static void d_free(struct dentry *dentry) |
89 | { | 97 | { |
98 | percpu_counter_dec(&nr_dentry); | ||
90 | if (dentry->d_op && dentry->d_op->d_release) | 99 | if (dentry->d_op && dentry->d_op->d_release) |
91 | dentry->d_op->d_release(dentry); | 100 | dentry->d_op->d_release(dentry); |
101 | |||
92 | /* if dentry was never inserted into hash, immediate free is OK */ | 102 | /* if dentry was never inserted into hash, immediate free is OK */ |
93 | if (hlist_unhashed(&dentry->d_hash)) | 103 | if (hlist_unhashed(&dentry->d_hash)) |
94 | __d_free(dentry); | 104 | __d_free(&dentry->d_u.d_rcu); |
95 | else | 105 | else |
96 | call_rcu(&dentry->d_u.d_rcu, d_callback); | 106 | call_rcu(&dentry->d_u.d_rcu, __d_free); |
97 | } | 107 | } |
98 | 108 | ||
99 | /* | 109 | /* |
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry) | |||
123 | } | 133 | } |
124 | 134 | ||
125 | /* | 135 | /* |
126 | * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held. | 136 | * dentry_lru_(add|del|move_tail) must be called with dcache_lock held. |
127 | */ | 137 | */ |
128 | static void dentry_lru_add(struct dentry *dentry) | 138 | static void dentry_lru_add(struct dentry *dentry) |
129 | { | 139 | { |
130 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | 140 | if (list_empty(&dentry->d_lru)) { |
131 | dentry->d_sb->s_nr_dentry_unused++; | 141 | list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); |
132 | dentry_stat.nr_unused++; | 142 | dentry->d_sb->s_nr_dentry_unused++; |
133 | } | 143 | percpu_counter_inc(&nr_dentry_unused); |
134 | 144 | } | |
135 | static void dentry_lru_add_tail(struct dentry *dentry) | ||
136 | { | ||
137 | list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | ||
138 | dentry->d_sb->s_nr_dentry_unused++; | ||
139 | dentry_stat.nr_unused++; | ||
140 | } | 145 | } |
141 | 146 | ||
142 | static void dentry_lru_del(struct dentry *dentry) | 147 | static void dentry_lru_del(struct dentry *dentry) |
143 | { | 148 | { |
144 | if (!list_empty(&dentry->d_lru)) { | 149 | if (!list_empty(&dentry->d_lru)) { |
145 | list_del(&dentry->d_lru); | 150 | list_del_init(&dentry->d_lru); |
146 | dentry->d_sb->s_nr_dentry_unused--; | 151 | dentry->d_sb->s_nr_dentry_unused--; |
147 | dentry_stat.nr_unused--; | 152 | percpu_counter_dec(&nr_dentry_unused); |
148 | } | 153 | } |
149 | } | 154 | } |
150 | 155 | ||
151 | static void dentry_lru_del_init(struct dentry *dentry) | 156 | static void dentry_lru_move_tail(struct dentry *dentry) |
152 | { | 157 | { |
153 | if (likely(!list_empty(&dentry->d_lru))) { | 158 | if (list_empty(&dentry->d_lru)) { |
154 | list_del_init(&dentry->d_lru); | 159 | list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); |
155 | dentry->d_sb->s_nr_dentry_unused--; | 160 | dentry->d_sb->s_nr_dentry_unused++; |
156 | dentry_stat.nr_unused--; | 161 | percpu_counter_inc(&nr_dentry_unused); |
162 | } else { | ||
163 | list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru); | ||
157 | } | 164 | } |
158 | } | 165 | } |
159 | 166 | ||
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry) | |||
172 | struct dentry *parent; | 179 | struct dentry *parent; |
173 | 180 | ||
174 | list_del(&dentry->d_u.d_child); | 181 | list_del(&dentry->d_u.d_child); |
175 | dentry_stat.nr_dentry--; /* For d_free, below */ | ||
176 | /*drops the locks, at that point nobody can reach this dentry */ | 182 | /*drops the locks, at that point nobody can reach this dentry */ |
177 | dentry_iput(dentry); | 183 | dentry_iput(dentry); |
178 | if (IS_ROOT(dentry)) | 184 | if (IS_ROOT(dentry)) |
@@ -237,13 +243,15 @@ repeat: | |||
237 | if (dentry->d_op->d_delete(dentry)) | 243 | if (dentry->d_op->d_delete(dentry)) |
238 | goto unhash_it; | 244 | goto unhash_it; |
239 | } | 245 | } |
246 | |||
240 | /* Unreachable? Get rid of it */ | 247 | /* Unreachable? Get rid of it */ |
241 | if (d_unhashed(dentry)) | 248 | if (d_unhashed(dentry)) |
242 | goto kill_it; | 249 | goto kill_it; |
243 | if (list_empty(&dentry->d_lru)) { | 250 | |
244 | dentry->d_flags |= DCACHE_REFERENCED; | 251 | /* Otherwise leave it cached and ensure it's on the LRU */ |
245 | dentry_lru_add(dentry); | 252 | dentry->d_flags |= DCACHE_REFERENCED; |
246 | } | 253 | dentry_lru_add(dentry); |
254 | |||
247 | spin_unlock(&dentry->d_lock); | 255 | spin_unlock(&dentry->d_lock); |
248 | spin_unlock(&dcache_lock); | 256 | spin_unlock(&dcache_lock); |
249 | return; | 257 | return; |
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry) | |||
318 | EXPORT_SYMBOL(d_invalidate); | 326 | EXPORT_SYMBOL(d_invalidate); |
319 | 327 | ||
320 | /* This should be called _only_ with dcache_lock held */ | 328 | /* This should be called _only_ with dcache_lock held */ |
321 | |||
322 | static inline struct dentry * __dget_locked(struct dentry *dentry) | 329 | static inline struct dentry * __dget_locked(struct dentry *dentry) |
323 | { | 330 | { |
324 | atomic_inc(&dentry->d_count); | 331 | atomic_inc(&dentry->d_count); |
325 | dentry_lru_del_init(dentry); | 332 | dentry_lru_del(dentry); |
326 | return dentry; | 333 | return dentry; |
327 | } | 334 | } |
328 | 335 | ||
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry) | |||
441 | 448 | ||
442 | if (dentry->d_op && dentry->d_op->d_delete) | 449 | if (dentry->d_op && dentry->d_op->d_delete) |
443 | dentry->d_op->d_delete(dentry); | 450 | dentry->d_op->d_delete(dentry); |
444 | dentry_lru_del_init(dentry); | 451 | dentry_lru_del(dentry); |
445 | __d_drop(dentry); | 452 | __d_drop(dentry); |
446 | dentry = d_kill(dentry); | 453 | dentry = d_kill(dentry); |
447 | spin_lock(&dcache_lock); | 454 | spin_lock(&dcache_lock); |
448 | } | 455 | } |
449 | } | 456 | } |
450 | 457 | ||
451 | /* | 458 | static void shrink_dentry_list(struct list_head *list) |
452 | * Shrink the dentry LRU on a given superblock. | ||
453 | * @sb : superblock to shrink dentry LRU. | ||
454 | * @count: If count is NULL, we prune all dentries on superblock. | ||
455 | * @flags: If flags is non-zero, we need to do special processing based on | ||
456 | * which flags are set. This means we don't need to maintain multiple | ||
457 | * similar copies of this loop. | ||
458 | */ | ||
459 | static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) | ||
460 | { | 459 | { |
461 | LIST_HEAD(referenced); | ||
462 | LIST_HEAD(tmp); | ||
463 | struct dentry *dentry; | 460 | struct dentry *dentry; |
464 | int cnt = 0; | ||
465 | 461 | ||
466 | BUG_ON(!sb); | 462 | while (!list_empty(list)) { |
467 | BUG_ON((flags & DCACHE_REFERENCED) && count == NULL); | 463 | dentry = list_entry(list->prev, struct dentry, d_lru); |
468 | spin_lock(&dcache_lock); | 464 | dentry_lru_del(dentry); |
469 | if (count != NULL) | ||
470 | /* called from prune_dcache() and shrink_dcache_parent() */ | ||
471 | cnt = *count; | ||
472 | restart: | ||
473 | if (count == NULL) | ||
474 | list_splice_init(&sb->s_dentry_lru, &tmp); | ||
475 | else { | ||
476 | while (!list_empty(&sb->s_dentry_lru)) { | ||
477 | dentry = list_entry(sb->s_dentry_lru.prev, | ||
478 | struct dentry, d_lru); | ||
479 | BUG_ON(dentry->d_sb != sb); | ||
480 | 465 | ||
481 | spin_lock(&dentry->d_lock); | ||
482 | /* | ||
483 | * If we are honouring the DCACHE_REFERENCED flag and | ||
484 | * the dentry has this flag set, don't free it. Clear | ||
485 | * the flag and put it back on the LRU. | ||
486 | */ | ||
487 | if ((flags & DCACHE_REFERENCED) | ||
488 | && (dentry->d_flags & DCACHE_REFERENCED)) { | ||
489 | dentry->d_flags &= ~DCACHE_REFERENCED; | ||
490 | list_move(&dentry->d_lru, &referenced); | ||
491 | spin_unlock(&dentry->d_lock); | ||
492 | } else { | ||
493 | list_move_tail(&dentry->d_lru, &tmp); | ||
494 | spin_unlock(&dentry->d_lock); | ||
495 | cnt--; | ||
496 | if (!cnt) | ||
497 | break; | ||
498 | } | ||
499 | cond_resched_lock(&dcache_lock); | ||
500 | } | ||
501 | } | ||
502 | while (!list_empty(&tmp)) { | ||
503 | dentry = list_entry(tmp.prev, struct dentry, d_lru); | ||
504 | dentry_lru_del_init(dentry); | ||
505 | spin_lock(&dentry->d_lock); | ||
506 | /* | 466 | /* |
507 | * We found an inuse dentry which was not removed from | 467 | * We found an inuse dentry which was not removed from |
508 | * the LRU because of laziness during lookup. Do not free | 468 | * the LRU because of laziness during lookup. Do not free |
509 | * it - just keep it off the LRU list. | 469 | * it - just keep it off the LRU list. |
510 | */ | 470 | */ |
471 | spin_lock(&dentry->d_lock); | ||
511 | if (atomic_read(&dentry->d_count)) { | 472 | if (atomic_read(&dentry->d_count)) { |
512 | spin_unlock(&dentry->d_lock); | 473 | spin_unlock(&dentry->d_lock); |
513 | continue; | 474 | continue; |
@@ -516,13 +477,60 @@ restart: | |||
516 | /* dentry->d_lock was dropped in prune_one_dentry() */ | 477 | /* dentry->d_lock was dropped in prune_one_dentry() */ |
517 | cond_resched_lock(&dcache_lock); | 478 | cond_resched_lock(&dcache_lock); |
518 | } | 479 | } |
519 | if (count == NULL && !list_empty(&sb->s_dentry_lru)) | 480 | } |
520 | goto restart; | 481 | |
521 | if (count != NULL) | 482 | /** |
522 | *count = cnt; | 483 | * __shrink_dcache_sb - shrink the dentry LRU on a given superblock |
484 | * @sb: superblock to shrink dentry LRU. | ||
485 | * @count: number of entries to prune | ||
486 | * @flags: flags to control the dentry processing | ||
487 | * | ||
488 | * If flags contains DCACHE_REFERENCED reference dentries will not be pruned. | ||
489 | */ | ||
490 | static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags) | ||
491 | { | ||
492 | /* called from prune_dcache() and shrink_dcache_parent() */ | ||
493 | struct dentry *dentry; | ||
494 | LIST_HEAD(referenced); | ||
495 | LIST_HEAD(tmp); | ||
496 | int cnt = *count; | ||
497 | |||
498 | spin_lock(&dcache_lock); | ||
499 | while (!list_empty(&sb->s_dentry_lru)) { | ||
500 | dentry = list_entry(sb->s_dentry_lru.prev, | ||
501 | struct dentry, d_lru); | ||
502 | BUG_ON(dentry->d_sb != sb); | ||
503 | |||
504 | /* | ||
505 | * If we are honouring the DCACHE_REFERENCED flag and the | ||
506 | * dentry has this flag set, don't free it. Clear the flag | ||
507 | * and put it back on the LRU. | ||
508 | */ | ||
509 | if (flags & DCACHE_REFERENCED) { | ||
510 | spin_lock(&dentry->d_lock); | ||
511 | if (dentry->d_flags & DCACHE_REFERENCED) { | ||
512 | dentry->d_flags &= ~DCACHE_REFERENCED; | ||
513 | list_move(&dentry->d_lru, &referenced); | ||
514 | spin_unlock(&dentry->d_lock); | ||
515 | cond_resched_lock(&dcache_lock); | ||
516 | continue; | ||
517 | } | ||
518 | spin_unlock(&dentry->d_lock); | ||
519 | } | ||
520 | |||
521 | list_move_tail(&dentry->d_lru, &tmp); | ||
522 | if (!--cnt) | ||
523 | break; | ||
524 | cond_resched_lock(&dcache_lock); | ||
525 | } | ||
526 | |||
527 | *count = cnt; | ||
528 | shrink_dentry_list(&tmp); | ||
529 | |||
523 | if (!list_empty(&referenced)) | 530 | if (!list_empty(&referenced)) |
524 | list_splice(&referenced, &sb->s_dentry_lru); | 531 | list_splice(&referenced, &sb->s_dentry_lru); |
525 | spin_unlock(&dcache_lock); | 532 | spin_unlock(&dcache_lock); |
533 | |||
526 | } | 534 | } |
527 | 535 | ||
528 | /** | 536 | /** |
@@ -538,7 +546,7 @@ static void prune_dcache(int count) | |||
538 | { | 546 | { |
539 | struct super_block *sb, *p = NULL; | 547 | struct super_block *sb, *p = NULL; |
540 | int w_count; | 548 | int w_count; |
541 | int unused = dentry_stat.nr_unused; | 549 | int unused = percpu_counter_sum_positive(&nr_dentry_unused); |
542 | int prune_ratio; | 550 | int prune_ratio; |
543 | int pruned; | 551 | int pruned; |
544 | 552 | ||
@@ -608,13 +616,19 @@ static void prune_dcache(int count) | |||
608 | * shrink_dcache_sb - shrink dcache for a superblock | 616 | * shrink_dcache_sb - shrink dcache for a superblock |
609 | * @sb: superblock | 617 | * @sb: superblock |
610 | * | 618 | * |
611 | * Shrink the dcache for the specified super block. This | 619 | * Shrink the dcache for the specified super block. This is used to free |
612 | * is used to free the dcache before unmounting a file | 620 | * the dcache before unmounting a file system. |
613 | * system | ||
614 | */ | 621 | */ |
615 | void shrink_dcache_sb(struct super_block * sb) | 622 | void shrink_dcache_sb(struct super_block *sb) |
616 | { | 623 | { |
617 | __shrink_dcache_sb(sb, NULL, 0); | 624 | LIST_HEAD(tmp); |
625 | |||
626 | spin_lock(&dcache_lock); | ||
627 | while (!list_empty(&sb->s_dentry_lru)) { | ||
628 | list_splice_init(&sb->s_dentry_lru, &tmp); | ||
629 | shrink_dentry_list(&tmp); | ||
630 | } | ||
631 | spin_unlock(&dcache_lock); | ||
618 | } | 632 | } |
619 | EXPORT_SYMBOL(shrink_dcache_sb); | 633 | EXPORT_SYMBOL(shrink_dcache_sb); |
620 | 634 | ||
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
632 | 646 | ||
633 | /* detach this root from the system */ | 647 | /* detach this root from the system */ |
634 | spin_lock(&dcache_lock); | 648 | spin_lock(&dcache_lock); |
635 | dentry_lru_del_init(dentry); | 649 | dentry_lru_del(dentry); |
636 | __d_drop(dentry); | 650 | __d_drop(dentry); |
637 | spin_unlock(&dcache_lock); | 651 | spin_unlock(&dcache_lock); |
638 | 652 | ||
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
646 | spin_lock(&dcache_lock); | 660 | spin_lock(&dcache_lock); |
647 | list_for_each_entry(loop, &dentry->d_subdirs, | 661 | list_for_each_entry(loop, &dentry->d_subdirs, |
648 | d_u.d_child) { | 662 | d_u.d_child) { |
649 | dentry_lru_del_init(loop); | 663 | dentry_lru_del(loop); |
650 | __d_drop(loop); | 664 | __d_drop(loop); |
651 | cond_resched_lock(&dcache_lock); | 665 | cond_resched_lock(&dcache_lock); |
652 | } | 666 | } |
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry) | |||
703 | * otherwise we ascend to the parent and move to the | 717 | * otherwise we ascend to the parent and move to the |
704 | * next sibling if there is one */ | 718 | * next sibling if there is one */ |
705 | if (!parent) | 719 | if (!parent) |
706 | goto out; | 720 | return; |
707 | |||
708 | dentry = parent; | 721 | dentry = parent; |
709 | |||
710 | } while (list_empty(&dentry->d_subdirs)); | 722 | } while (list_empty(&dentry->d_subdirs)); |
711 | 723 | ||
712 | dentry = list_entry(dentry->d_subdirs.next, | 724 | dentry = list_entry(dentry->d_subdirs.next, |
713 | struct dentry, d_u.d_child); | 725 | struct dentry, d_u.d_child); |
714 | } | 726 | } |
715 | out: | ||
716 | /* several dentries were freed, need to correct nr_dentry */ | ||
717 | spin_lock(&dcache_lock); | ||
718 | dentry_stat.nr_dentry -= detached; | ||
719 | spin_unlock(&dcache_lock); | ||
720 | } | 727 | } |
721 | 728 | ||
722 | /* | 729 | /* |
@@ -830,14 +837,15 @@ resume: | |||
830 | struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); | 837 | struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); |
831 | next = tmp->next; | 838 | next = tmp->next; |
832 | 839 | ||
833 | dentry_lru_del_init(dentry); | ||
834 | /* | 840 | /* |
835 | * move only zero ref count dentries to the end | 841 | * move only zero ref count dentries to the end |
836 | * of the unused list for prune_dcache | 842 | * of the unused list for prune_dcache |
837 | */ | 843 | */ |
838 | if (!atomic_read(&dentry->d_count)) { | 844 | if (!atomic_read(&dentry->d_count)) { |
839 | dentry_lru_add_tail(dentry); | 845 | dentry_lru_move_tail(dentry); |
840 | found++; | 846 | found++; |
847 | } else { | ||
848 | dentry_lru_del(dentry); | ||
841 | } | 849 | } |
842 | 850 | ||
843 | /* | 851 | /* |
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent); | |||
900 | */ | 908 | */ |
901 | static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 909 | static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) |
902 | { | 910 | { |
911 | int nr_unused; | ||
912 | |||
903 | if (nr) { | 913 | if (nr) { |
904 | if (!(gfp_mask & __GFP_FS)) | 914 | if (!(gfp_mask & __GFP_FS)) |
905 | return -1; | 915 | return -1; |
906 | prune_dcache(nr); | 916 | prune_dcache(nr); |
907 | } | 917 | } |
908 | return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 918 | |
919 | nr_unused = percpu_counter_sum_positive(&nr_dentry_unused); | ||
920 | return (nr_unused / 100) * sysctl_vfs_cache_pressure; | ||
909 | } | 921 | } |
910 | 922 | ||
911 | static struct shrinker dcache_shrinker = { | 923 | static struct shrinker dcache_shrinker = { |
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) | |||
972 | spin_lock(&dcache_lock); | 984 | spin_lock(&dcache_lock); |
973 | if (parent) | 985 | if (parent) |
974 | list_add(&dentry->d_u.d_child, &parent->d_subdirs); | 986 | list_add(&dentry->d_u.d_child, &parent->d_subdirs); |
975 | dentry_stat.nr_dentry++; | ||
976 | spin_unlock(&dcache_lock); | 987 | spin_unlock(&dcache_lock); |
977 | 988 | ||
989 | percpu_counter_inc(&nr_dentry); | ||
990 | |||
978 | return dentry; | 991 | return dentry; |
979 | } | 992 | } |
980 | EXPORT_SYMBOL(d_alloc); | 993 | EXPORT_SYMBOL(d_alloc); |
@@ -1478,33 +1491,26 @@ out: | |||
1478 | * This is used by ncpfs in its readdir implementation. | 1491 | * This is used by ncpfs in its readdir implementation. |
1479 | * Zero is returned in the dentry is invalid. | 1492 | * Zero is returned in the dentry is invalid. |
1480 | */ | 1493 | */ |
1481 | 1494 | int d_validate(struct dentry *dentry, struct dentry *parent) | |
1482 | int d_validate(struct dentry *dentry, struct dentry *dparent) | ||
1483 | { | 1495 | { |
1484 | struct hlist_head *base; | 1496 | struct hlist_head *head = d_hash(parent, dentry->d_name.hash); |
1485 | struct hlist_node *lhp; | 1497 | struct hlist_node *node; |
1498 | struct dentry *d; | ||
1486 | 1499 | ||
1487 | /* Check whether the ptr might be valid at all.. */ | 1500 | /* Check whether the ptr might be valid at all.. */ |
1488 | if (!kmem_ptr_validate(dentry_cache, dentry)) | 1501 | if (!kmem_ptr_validate(dentry_cache, dentry)) |
1489 | goto out; | 1502 | return 0; |
1490 | 1503 | if (dentry->d_parent != parent) | |
1491 | if (dentry->d_parent != dparent) | 1504 | return 0; |
1492 | goto out; | ||
1493 | 1505 | ||
1494 | spin_lock(&dcache_lock); | 1506 | rcu_read_lock(); |
1495 | base = d_hash(dparent, dentry->d_name.hash); | 1507 | hlist_for_each_entry_rcu(d, node, head, d_hash) { |
1496 | hlist_for_each(lhp,base) { | 1508 | if (d == dentry) { |
1497 | /* hlist_for_each_entry_rcu() not required for d_hash list | 1509 | dget(dentry); |
1498 | * as it is parsed under dcache_lock | ||
1499 | */ | ||
1500 | if (dentry == hlist_entry(lhp, struct dentry, d_hash)) { | ||
1501 | __dget_locked(dentry); | ||
1502 | spin_unlock(&dcache_lock); | ||
1503 | return 1; | 1510 | return 1; |
1504 | } | 1511 | } |
1505 | } | 1512 | } |
1506 | spin_unlock(&dcache_lock); | 1513 | rcu_read_unlock(); |
1507 | out: | ||
1508 | return 0; | 1514 | return 0; |
1509 | } | 1515 | } |
1510 | EXPORT_SYMBOL(d_validate); | 1516 | EXPORT_SYMBOL(d_validate); |
@@ -1994,7 +2000,7 @@ global_root: | |||
1994 | * Returns a pointer into the buffer or an error code if the | 2000 | * Returns a pointer into the buffer or an error code if the |
1995 | * path was too long. | 2001 | * path was too long. |
1996 | * | 2002 | * |
1997 | * "buflen" should be positive. Caller holds the dcache_lock. | 2003 | * "buflen" should be positive. |
1998 | * | 2004 | * |
1999 | * If path is not reachable from the supplied root, then the value of | 2005 | * If path is not reachable from the supplied root, then the value of |
2000 | * root is changed (without modifying refcounts). | 2006 | * root is changed (without modifying refcounts). |
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root, | |||
2006 | int error; | 2012 | int error; |
2007 | 2013 | ||
2008 | prepend(&res, &buflen, "\0", 1); | 2014 | prepend(&res, &buflen, "\0", 1); |
2015 | spin_lock(&dcache_lock); | ||
2009 | error = prepend_path(path, root, &res, &buflen); | 2016 | error = prepend_path(path, root, &res, &buflen); |
2017 | spin_unlock(&dcache_lock); | ||
2018 | |||
2010 | if (error) | 2019 | if (error) |
2011 | return ERR_PTR(error); | 2020 | return ERR_PTR(error); |
2012 | |||
2013 | return res; | 2021 | return res; |
2014 | } | 2022 | } |
2015 | 2023 | ||
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void) | |||
2419 | { | 2427 | { |
2420 | int loop; | 2428 | int loop; |
2421 | 2429 | ||
2430 | percpu_counter_init(&nr_dentry, 0); | ||
2431 | percpu_counter_init(&nr_dentry_unused, 0); | ||
2432 | |||
2422 | /* | 2433 | /* |
2423 | * A constructor could be added for stable state like the lists, | 2434 | * A constructor could be added for stable state like the lists, |
2424 | * but it is probably not worth it because of the cache nature | 2435 | * but it is probably not worth it because of the cache nature |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 0210898458b2..89d394d8fe24 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
@@ -43,6 +43,7 @@ const struct file_operations debugfs_file_operations = { | |||
43 | .read = default_read_file, | 43 | .read = default_read_file, |
44 | .write = default_write_file, | 44 | .write = default_write_file, |
45 | .open = default_open, | 45 | .open = default_open, |
46 | .llseek = noop_llseek, | ||
46 | }; | 47 | }; |
47 | 48 | ||
48 | static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd) | 49 | static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd) |
@@ -454,6 +455,7 @@ static const struct file_operations fops_bool = { | |||
454 | .read = read_file_bool, | 455 | .read = read_file_bool, |
455 | .write = write_file_bool, | 456 | .write = write_file_bool, |
456 | .open = default_open, | 457 | .open = default_open, |
458 | .llseek = default_llseek, | ||
457 | }; | 459 | }; |
458 | 460 | ||
459 | /** | 461 | /** |
@@ -498,6 +500,7 @@ static ssize_t read_file_blob(struct file *file, char __user *user_buf, | |||
498 | static const struct file_operations fops_blob = { | 500 | static const struct file_operations fops_blob = { |
499 | .read = read_file_blob, | 501 | .read = read_file_blob, |
500 | .open = default_open, | 502 | .open = default_open, |
503 | .llseek = default_llseek, | ||
501 | }; | 504 | }; |
502 | 505 | ||
503 | /** | 506 | /** |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 30a87b3dbcac..a4ed8380e98a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d | |||
40 | struct inode *inode = new_inode(sb); | 40 | struct inode *inode = new_inode(sb); |
41 | 41 | ||
42 | if (inode) { | 42 | if (inode) { |
43 | inode->i_ino = get_next_ino(); | ||
43 | inode->i_mode = mode; | 44 | inode->i_mode = mode; |
44 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 45 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
45 | switch (mode & S_IFMT) { | 46 | switch (mode & S_IFMT) { |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 48d74c7391d1..85882f6ba5f7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio) | |||
218 | * filesystems can use it to hold additional state between get_block calls and | 218 | * filesystems can use it to hold additional state between get_block calls and |
219 | * dio_complete. | 219 | * dio_complete. |
220 | */ | 220 | */ |
221 | static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) | 221 | static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async) |
222 | { | 222 | { |
223 | ssize_t transferred = 0; | 223 | ssize_t transferred = 0; |
224 | 224 | ||
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c index c6cf25158746..6b42ba807dfd 100644 --- a/fs/dlm/debug_fs.c +++ b/fs/dlm/debug_fs.c | |||
@@ -643,7 +643,8 @@ static ssize_t waiters_read(struct file *file, char __user *userbuf, | |||
643 | static const struct file_operations waiters_fops = { | 643 | static const struct file_operations waiters_fops = { |
644 | .owner = THIS_MODULE, | 644 | .owner = THIS_MODULE, |
645 | .open = waiters_open, | 645 | .open = waiters_open, |
646 | .read = waiters_read | 646 | .read = waiters_read, |
647 | .llseek = default_llseek, | ||
647 | }; | 648 | }; |
648 | 649 | ||
649 | void dlm_delete_debug_file(struct dlm_ls *ls) | 650 | void dlm_delete_debug_file(struct dlm_ls *ls) |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 031dbe3a15ca..64e5f3efdd81 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -1846,6 +1846,9 @@ static void send_bast_queue(struct dlm_rsb *r, struct list_head *head, | |||
1846 | struct dlm_lkb *gr; | 1846 | struct dlm_lkb *gr; |
1847 | 1847 | ||
1848 | list_for_each_entry(gr, head, lkb_statequeue) { | 1848 | list_for_each_entry(gr, head, lkb_statequeue) { |
1849 | /* skip self when sending basts to convertqueue */ | ||
1850 | if (gr == lkb) | ||
1851 | continue; | ||
1849 | if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) { | 1852 | if (gr->lkb_bastfn && modes_require_bast(gr, lkb)) { |
1850 | queue_bast(r, gr, lkb->lkb_rqmode); | 1853 | queue_bast(r, gr, lkb->lkb_rqmode); |
1851 | gr->lkb_highbast = lkb->lkb_rqmode; | 1854 | gr->lkb_highbast = lkb->lkb_rqmode; |
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index d45c02db6943..30d8b85febbf 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c | |||
@@ -412,7 +412,8 @@ static const struct file_operations dev_fops = { | |||
412 | .read = dev_read, | 412 | .read = dev_read, |
413 | .write = dev_write, | 413 | .write = dev_write, |
414 | .poll = dev_poll, | 414 | .poll = dev_poll, |
415 | .owner = THIS_MODULE | 415 | .owner = THIS_MODULE, |
416 | .llseek = noop_llseek, | ||
416 | }; | 417 | }; |
417 | 418 | ||
418 | static struct miscdevice plock_dev_misc = { | 419 | static struct miscdevice plock_dev_misc = { |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index b6272853130c..66d6c16bf440 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
@@ -1009,6 +1009,7 @@ static const struct file_operations device_fops = { | |||
1009 | .write = device_write, | 1009 | .write = device_write, |
1010 | .poll = device_poll, | 1010 | .poll = device_poll, |
1011 | .owner = THIS_MODULE, | 1011 | .owner = THIS_MODULE, |
1012 | .llseek = noop_llseek, | ||
1012 | }; | 1013 | }; |
1013 | 1014 | ||
1014 | static const struct file_operations ctl_device_fops = { | 1015 | static const struct file_operations ctl_device_fops = { |
@@ -1017,6 +1018,7 @@ static const struct file_operations ctl_device_fops = { | |||
1017 | .read = device_read, | 1018 | .read = device_read, |
1018 | .write = device_write, | 1019 | .write = device_write, |
1019 | .owner = THIS_MODULE, | 1020 | .owner = THIS_MODULE, |
1021 | .llseek = noop_llseek, | ||
1020 | }; | 1022 | }; |
1021 | 1023 | ||
1022 | static struct miscdevice ctl_device = { | 1024 | static struct miscdevice ctl_device = { |
@@ -1029,6 +1031,7 @@ static const struct file_operations monitor_device_fops = { | |||
1029 | .open = monitor_device_open, | 1031 | .open = monitor_device_open, |
1030 | .release = monitor_device_close, | 1032 | .release = monitor_device_close, |
1031 | .owner = THIS_MODULE, | 1033 | .owner = THIS_MODULE, |
1034 | .llseek = noop_llseek, | ||
1032 | }; | 1035 | }; |
1033 | 1036 | ||
1034 | static struct miscdevice monitor_device = { | 1037 | static struct miscdevice monitor_device = { |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 0032a9f5a3a9..40186b959429 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
@@ -477,7 +477,7 @@ ecryptfs_lower_header_size(struct ecryptfs_crypt_stat *crypt_stat) | |||
477 | static inline struct ecryptfs_file_info * | 477 | static inline struct ecryptfs_file_info * |
478 | ecryptfs_file_to_private(struct file *file) | 478 | ecryptfs_file_to_private(struct file *file) |
479 | { | 479 | { |
480 | return (struct ecryptfs_file_info *)file->private_data; | 480 | return file->private_data; |
481 | } | 481 | } |
482 | 482 | ||
483 | static inline void | 483 | static inline void |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 622c95140802..91da02987bff 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
@@ -31,7 +31,6 @@ | |||
31 | #include <linux/security.h> | 31 | #include <linux/security.h> |
32 | #include <linux/compat.h> | 32 | #include <linux/compat.h> |
33 | #include <linux/fs_stack.h> | 33 | #include <linux/fs_stack.h> |
34 | #include <linux/smp_lock.h> | ||
35 | #include "ecryptfs_kernel.h" | 34 | #include "ecryptfs_kernel.h" |
36 | 35 | ||
37 | /** | 36 | /** |
@@ -284,11 +283,9 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag) | |||
284 | int rc = 0; | 283 | int rc = 0; |
285 | struct file *lower_file = NULL; | 284 | struct file *lower_file = NULL; |
286 | 285 | ||
287 | lock_kernel(); | ||
288 | lower_file = ecryptfs_file_to_lower(file); | 286 | lower_file = ecryptfs_file_to_lower(file); |
289 | if (lower_file->f_op && lower_file->f_op->fasync) | 287 | if (lower_file->f_op && lower_file->f_op->fasync) |
290 | rc = lower_file->f_op->fasync(fd, lower_file, flag); | 288 | rc = lower_file->f_op->fasync(fd, lower_file, flag); |
291 | unlock_kernel(); | ||
292 | return rc; | 289 | return rc; |
293 | } | 290 | } |
294 | 291 | ||
@@ -332,6 +329,7 @@ const struct file_operations ecryptfs_dir_fops = { | |||
332 | .fsync = ecryptfs_fsync, | 329 | .fsync = ecryptfs_fsync, |
333 | .fasync = ecryptfs_fasync, | 330 | .fasync = ecryptfs_fasync, |
334 | .splice_read = generic_file_splice_read, | 331 | .splice_read = generic_file_splice_read, |
332 | .llseek = default_llseek, | ||
335 | }; | 333 | }; |
336 | 334 | ||
337 | const struct file_operations ecryptfs_main_fops = { | 335 | const struct file_operations ecryptfs_main_fops = { |
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c index 00208c3d7e92..940a82e63dc3 100644 --- a/fs/ecryptfs/miscdev.c +++ b/fs/ecryptfs/miscdev.c | |||
@@ -482,6 +482,7 @@ static const struct file_operations ecryptfs_miscdev_fops = { | |||
482 | .read = ecryptfs_miscdev_read, | 482 | .read = ecryptfs_miscdev_read, |
483 | .write = ecryptfs_miscdev_write, | 483 | .write = ecryptfs_miscdev_write, |
484 | .release = ecryptfs_miscdev_release, | 484 | .release = ecryptfs_miscdev_release, |
485 | .llseek = noop_llseek, | ||
485 | }; | 486 | }; |
486 | 487 | ||
487 | static struct miscdevice ecryptfs_miscdev = { | 488 | static struct miscdevice ecryptfs_miscdev = { |
diff --git a/fs/eventfd.c b/fs/eventfd.c index 6bd3f76fdf88..e0194b3e14d6 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c | |||
@@ -293,6 +293,7 @@ static const struct file_operations eventfd_fops = { | |||
293 | .poll = eventfd_poll, | 293 | .poll = eventfd_poll, |
294 | .read = eventfd_read, | 294 | .read = eventfd_read, |
295 | .write = eventfd_write, | 295 | .write = eventfd_write, |
296 | .llseek = noop_llseek, | ||
296 | }; | 297 | }; |
297 | 298 | ||
298 | /** | 299 | /** |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 3817149919cb..8cf07242067d 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
@@ -77,9 +77,6 @@ | |||
77 | /* Maximum number of nesting allowed inside epoll sets */ | 77 | /* Maximum number of nesting allowed inside epoll sets */ |
78 | #define EP_MAX_NESTS 4 | 78 | #define EP_MAX_NESTS 4 |
79 | 79 | ||
80 | /* Maximum msec timeout value storeable in a long int */ | ||
81 | #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ) | ||
82 | |||
83 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) | 80 | #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) |
84 | 81 | ||
85 | #define EP_UNACTIVE_PTR ((void *) -1L) | 82 | #define EP_UNACTIVE_PTR ((void *) -1L) |
@@ -674,7 +671,8 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait) | |||
674 | /* File callbacks that implement the eventpoll file behaviour */ | 671 | /* File callbacks that implement the eventpoll file behaviour */ |
675 | static const struct file_operations eventpoll_fops = { | 672 | static const struct file_operations eventpoll_fops = { |
676 | .release = ep_eventpoll_release, | 673 | .release = ep_eventpoll_release, |
677 | .poll = ep_eventpoll_poll | 674 | .poll = ep_eventpoll_poll, |
675 | .llseek = noop_llseek, | ||
678 | }; | 676 | }; |
679 | 677 | ||
680 | /* Fast test to see if the file is an evenpoll file */ | 678 | /* Fast test to see if the file is an evenpoll file */ |
@@ -1116,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep, | |||
1116 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, | 1114 | static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, |
1117 | int maxevents, long timeout) | 1115 | int maxevents, long timeout) |
1118 | { | 1116 | { |
1119 | int res, eavail; | 1117 | int res, eavail, timed_out = 0; |
1120 | unsigned long flags; | 1118 | unsigned long flags; |
1121 | long jtimeout; | 1119 | long slack; |
1122 | wait_queue_t wait; | 1120 | wait_queue_t wait; |
1123 | 1121 | struct timespec end_time; | |
1124 | /* | 1122 | ktime_t expires, *to = NULL; |
1125 | * Calculate the timeout by checking for the "infinite" value (-1) | 1123 | |
1126 | * and the overflow condition. The passed timeout is in milliseconds, | 1124 | if (timeout > 0) { |
1127 | * that why (t * HZ) / 1000. | 1125 | ktime_get_ts(&end_time); |
1128 | */ | 1126 | timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC); |
1129 | jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ? | 1127 | slack = select_estimate_accuracy(&end_time); |
1130 | MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000; | 1128 | to = &expires; |
1129 | *to = timespec_to_ktime(end_time); | ||
1130 | } else if (timeout == 0) { | ||
1131 | timed_out = 1; | ||
1132 | } | ||
1131 | 1133 | ||
1132 | retry: | 1134 | retry: |
1133 | spin_lock_irqsave(&ep->lock, flags); | 1135 | spin_lock_irqsave(&ep->lock, flags); |
@@ -1149,7 +1151,7 @@ retry: | |||
1149 | * to TASK_INTERRUPTIBLE before doing the checks. | 1151 | * to TASK_INTERRUPTIBLE before doing the checks. |
1150 | */ | 1152 | */ |
1151 | set_current_state(TASK_INTERRUPTIBLE); | 1153 | set_current_state(TASK_INTERRUPTIBLE); |
1152 | if (!list_empty(&ep->rdllist) || !jtimeout) | 1154 | if (!list_empty(&ep->rdllist) || timed_out) |
1153 | break; | 1155 | break; |
1154 | if (signal_pending(current)) { | 1156 | if (signal_pending(current)) { |
1155 | res = -EINTR; | 1157 | res = -EINTR; |
@@ -1157,7 +1159,9 @@ retry: | |||
1157 | } | 1159 | } |
1158 | 1160 | ||
1159 | spin_unlock_irqrestore(&ep->lock, flags); | 1161 | spin_unlock_irqrestore(&ep->lock, flags); |
1160 | jtimeout = schedule_timeout(jtimeout); | 1162 | if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) |
1163 | timed_out = 1; | ||
1164 | |||
1161 | spin_lock_irqsave(&ep->lock, flags); | 1165 | spin_lock_irqsave(&ep->lock, flags); |
1162 | } | 1166 | } |
1163 | __remove_wait_queue(&ep->wq, &wait); | 1167 | __remove_wait_queue(&ep->wq, &wait); |
@@ -1175,7 +1179,7 @@ retry: | |||
1175 | * more luck. | 1179 | * more luck. |
1176 | */ | 1180 | */ |
1177 | if (!res && eavail && | 1181 | if (!res && eavail && |
1178 | !(res = ep_send_events(ep, events, maxevents)) && jtimeout) | 1182 | !(res = ep_send_events(ep, events, maxevents)) && !timed_out) |
1179 | goto retry; | 1183 | goto retry; |
1180 | 1184 | ||
1181 | return res; | 1185 | return res; |
@@ -54,6 +54,7 @@ | |||
54 | #include <linux/fsnotify.h> | 54 | #include <linux/fsnotify.h> |
55 | #include <linux/fs_struct.h> | 55 | #include <linux/fs_struct.h> |
56 | #include <linux/pipe_fs_i.h> | 56 | #include <linux/pipe_fs_i.h> |
57 | #include <linux/oom.h> | ||
57 | 58 | ||
58 | #include <asm/uaccess.h> | 59 | #include <asm/uaccess.h> |
59 | #include <asm/mmu_context.h> | 60 | #include <asm/mmu_context.h> |
@@ -65,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core"; | |||
65 | unsigned int core_pipe_limit; | 66 | unsigned int core_pipe_limit; |
66 | int suid_dumpable = 0; | 67 | int suid_dumpable = 0; |
67 | 68 | ||
69 | struct core_name { | ||
70 | char *corename; | ||
71 | int used, size; | ||
72 | }; | ||
73 | static atomic_t call_count = ATOMIC_INIT(1); | ||
74 | |||
68 | /* The maximal length of core_pattern is also specified in sysctl.c */ | 75 | /* The maximal length of core_pattern is also specified in sysctl.c */ |
69 | 76 | ||
70 | static LIST_HEAD(formats); | 77 | static LIST_HEAD(formats); |
@@ -759,6 +766,10 @@ static int exec_mmap(struct mm_struct *mm) | |||
759 | tsk->mm = mm; | 766 | tsk->mm = mm; |
760 | tsk->active_mm = mm; | 767 | tsk->active_mm = mm; |
761 | activate_mm(active_mm, mm); | 768 | activate_mm(active_mm, mm); |
769 | if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) { | ||
770 | atomic_dec(&old_mm->oom_disable_count); | ||
771 | atomic_inc(&tsk->mm->oom_disable_count); | ||
772 | } | ||
762 | task_unlock(tsk); | 773 | task_unlock(tsk); |
763 | arch_pick_mmap_layout(mm); | 774 | arch_pick_mmap_layout(mm); |
764 | if (old_mm) { | 775 | if (old_mm) { |
@@ -998,7 +1009,7 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
998 | 1009 | ||
999 | bprm->mm = NULL; /* We're using it now */ | 1010 | bprm->mm = NULL; /* We're using it now */ |
1000 | 1011 | ||
1001 | current->flags &= ~PF_RANDOMIZE; | 1012 | current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD); |
1002 | flush_thread(); | 1013 | flush_thread(); |
1003 | current->personality &= ~bprm->per_clear; | 1014 | current->personality &= ~bprm->per_clear; |
1004 | 1015 | ||
@@ -1078,14 +1089,14 @@ EXPORT_SYMBOL(setup_new_exec); | |||
1078 | */ | 1089 | */ |
1079 | int prepare_bprm_creds(struct linux_binprm *bprm) | 1090 | int prepare_bprm_creds(struct linux_binprm *bprm) |
1080 | { | 1091 | { |
1081 | if (mutex_lock_interruptible(¤t->cred_guard_mutex)) | 1092 | if (mutex_lock_interruptible(¤t->signal->cred_guard_mutex)) |
1082 | return -ERESTARTNOINTR; | 1093 | return -ERESTARTNOINTR; |
1083 | 1094 | ||
1084 | bprm->cred = prepare_exec_creds(); | 1095 | bprm->cred = prepare_exec_creds(); |
1085 | if (likely(bprm->cred)) | 1096 | if (likely(bprm->cred)) |
1086 | return 0; | 1097 | return 0; |
1087 | 1098 | ||
1088 | mutex_unlock(¤t->cred_guard_mutex); | 1099 | mutex_unlock(¤t->signal->cred_guard_mutex); |
1089 | return -ENOMEM; | 1100 | return -ENOMEM; |
1090 | } | 1101 | } |
1091 | 1102 | ||
@@ -1093,7 +1104,7 @@ void free_bprm(struct linux_binprm *bprm) | |||
1093 | { | 1104 | { |
1094 | free_arg_pages(bprm); | 1105 | free_arg_pages(bprm); |
1095 | if (bprm->cred) { | 1106 | if (bprm->cred) { |
1096 | mutex_unlock(¤t->cred_guard_mutex); | 1107 | mutex_unlock(¤t->signal->cred_guard_mutex); |
1097 | abort_creds(bprm->cred); | 1108 | abort_creds(bprm->cred); |
1098 | } | 1109 | } |
1099 | kfree(bprm); | 1110 | kfree(bprm); |
@@ -1114,13 +1125,13 @@ void install_exec_creds(struct linux_binprm *bprm) | |||
1114 | * credentials; any time after this it may be unlocked. | 1125 | * credentials; any time after this it may be unlocked. |
1115 | */ | 1126 | */ |
1116 | security_bprm_committed_creds(bprm); | 1127 | security_bprm_committed_creds(bprm); |
1117 | mutex_unlock(¤t->cred_guard_mutex); | 1128 | mutex_unlock(¤t->signal->cred_guard_mutex); |
1118 | } | 1129 | } |
1119 | EXPORT_SYMBOL(install_exec_creds); | 1130 | EXPORT_SYMBOL(install_exec_creds); |
1120 | 1131 | ||
1121 | /* | 1132 | /* |
1122 | * determine how safe it is to execute the proposed program | 1133 | * determine how safe it is to execute the proposed program |
1123 | * - the caller must hold current->cred_guard_mutex to protect against | 1134 | * - the caller must hold ->cred_guard_mutex to protect against |
1124 | * PTRACE_ATTACH | 1135 | * PTRACE_ATTACH |
1125 | */ | 1136 | */ |
1126 | int check_unsafe_exec(struct linux_binprm *bprm) | 1137 | int check_unsafe_exec(struct linux_binprm *bprm) |
@@ -1401,7 +1412,6 @@ int do_execve(const char * filename, | |||
1401 | if (retval < 0) | 1412 | if (retval < 0) |
1402 | goto out; | 1413 | goto out; |
1403 | 1414 | ||
1404 | current->flags &= ~PF_KTHREAD; | ||
1405 | retval = search_binary_handler(bprm,regs); | 1415 | retval = search_binary_handler(bprm,regs); |
1406 | if (retval < 0) | 1416 | if (retval < 0) |
1407 | goto out; | 1417 | goto out; |
@@ -1454,127 +1464,148 @@ void set_binfmt(struct linux_binfmt *new) | |||
1454 | 1464 | ||
1455 | EXPORT_SYMBOL(set_binfmt); | 1465 | EXPORT_SYMBOL(set_binfmt); |
1456 | 1466 | ||
1467 | static int expand_corename(struct core_name *cn) | ||
1468 | { | ||
1469 | char *old_corename = cn->corename; | ||
1470 | |||
1471 | cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count); | ||
1472 | cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL); | ||
1473 | |||
1474 | if (!cn->corename) { | ||
1475 | kfree(old_corename); | ||
1476 | return -ENOMEM; | ||
1477 | } | ||
1478 | |||
1479 | return 0; | ||
1480 | } | ||
1481 | |||
1482 | static int cn_printf(struct core_name *cn, const char *fmt, ...) | ||
1483 | { | ||
1484 | char *cur; | ||
1485 | int need; | ||
1486 | int ret; | ||
1487 | va_list arg; | ||
1488 | |||
1489 | va_start(arg, fmt); | ||
1490 | need = vsnprintf(NULL, 0, fmt, arg); | ||
1491 | va_end(arg); | ||
1492 | |||
1493 | if (likely(need < cn->size - cn->used - 1)) | ||
1494 | goto out_printf; | ||
1495 | |||
1496 | ret = expand_corename(cn); | ||
1497 | if (ret) | ||
1498 | goto expand_fail; | ||
1499 | |||
1500 | out_printf: | ||
1501 | cur = cn->corename + cn->used; | ||
1502 | va_start(arg, fmt); | ||
1503 | vsnprintf(cur, need + 1, fmt, arg); | ||
1504 | va_end(arg); | ||
1505 | cn->used += need; | ||
1506 | return 0; | ||
1507 | |||
1508 | expand_fail: | ||
1509 | return ret; | ||
1510 | } | ||
1511 | |||
1457 | /* format_corename will inspect the pattern parameter, and output a | 1512 | /* format_corename will inspect the pattern parameter, and output a |
1458 | * name into corename, which must have space for at least | 1513 | * name into corename, which must have space for at least |
1459 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | 1514 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. |
1460 | */ | 1515 | */ |
1461 | static int format_corename(char *corename, long signr) | 1516 | static int format_corename(struct core_name *cn, long signr) |
1462 | { | 1517 | { |
1463 | const struct cred *cred = current_cred(); | 1518 | const struct cred *cred = current_cred(); |
1464 | const char *pat_ptr = core_pattern; | 1519 | const char *pat_ptr = core_pattern; |
1465 | int ispipe = (*pat_ptr == '|'); | 1520 | int ispipe = (*pat_ptr == '|'); |
1466 | char *out_ptr = corename; | ||
1467 | char *const out_end = corename + CORENAME_MAX_SIZE; | ||
1468 | int rc; | ||
1469 | int pid_in_pattern = 0; | 1521 | int pid_in_pattern = 0; |
1522 | int err = 0; | ||
1523 | |||
1524 | cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count); | ||
1525 | cn->corename = kmalloc(cn->size, GFP_KERNEL); | ||
1526 | cn->used = 0; | ||
1527 | |||
1528 | if (!cn->corename) | ||
1529 | return -ENOMEM; | ||
1470 | 1530 | ||
1471 | /* Repeat as long as we have more pattern to process and more output | 1531 | /* Repeat as long as we have more pattern to process and more output |
1472 | space */ | 1532 | space */ |
1473 | while (*pat_ptr) { | 1533 | while (*pat_ptr) { |
1474 | if (*pat_ptr != '%') { | 1534 | if (*pat_ptr != '%') { |
1475 | if (out_ptr == out_end) | 1535 | if (*pat_ptr == 0) |
1476 | goto out; | 1536 | goto out; |
1477 | *out_ptr++ = *pat_ptr++; | 1537 | err = cn_printf(cn, "%c", *pat_ptr++); |
1478 | } else { | 1538 | } else { |
1479 | switch (*++pat_ptr) { | 1539 | switch (*++pat_ptr) { |
1540 | /* single % at the end, drop that */ | ||
1480 | case 0: | 1541 | case 0: |
1481 | goto out; | 1542 | goto out; |
1482 | /* Double percent, output one percent */ | 1543 | /* Double percent, output one percent */ |
1483 | case '%': | 1544 | case '%': |
1484 | if (out_ptr == out_end) | 1545 | err = cn_printf(cn, "%c", '%'); |
1485 | goto out; | ||
1486 | *out_ptr++ = '%'; | ||
1487 | break; | 1546 | break; |
1488 | /* pid */ | 1547 | /* pid */ |
1489 | case 'p': | 1548 | case 'p': |
1490 | pid_in_pattern = 1; | 1549 | pid_in_pattern = 1; |
1491 | rc = snprintf(out_ptr, out_end - out_ptr, | 1550 | err = cn_printf(cn, "%d", |
1492 | "%d", task_tgid_vnr(current)); | 1551 | task_tgid_vnr(current)); |
1493 | if (rc > out_end - out_ptr) | ||
1494 | goto out; | ||
1495 | out_ptr += rc; | ||
1496 | break; | 1552 | break; |
1497 | /* uid */ | 1553 | /* uid */ |
1498 | case 'u': | 1554 | case 'u': |
1499 | rc = snprintf(out_ptr, out_end - out_ptr, | 1555 | err = cn_printf(cn, "%d", cred->uid); |
1500 | "%d", cred->uid); | ||
1501 | if (rc > out_end - out_ptr) | ||
1502 | goto out; | ||
1503 | out_ptr += rc; | ||
1504 | break; | 1556 | break; |
1505 | /* gid */ | 1557 | /* gid */ |
1506 | case 'g': | 1558 | case 'g': |
1507 | rc = snprintf(out_ptr, out_end - out_ptr, | 1559 | err = cn_printf(cn, "%d", cred->gid); |
1508 | "%d", cred->gid); | ||
1509 | if (rc > out_end - out_ptr) | ||
1510 | goto out; | ||
1511 | out_ptr += rc; | ||
1512 | break; | 1560 | break; |
1513 | /* signal that caused the coredump */ | 1561 | /* signal that caused the coredump */ |
1514 | case 's': | 1562 | case 's': |
1515 | rc = snprintf(out_ptr, out_end - out_ptr, | 1563 | err = cn_printf(cn, "%ld", signr); |
1516 | "%ld", signr); | ||
1517 | if (rc > out_end - out_ptr) | ||
1518 | goto out; | ||
1519 | out_ptr += rc; | ||
1520 | break; | 1564 | break; |
1521 | /* UNIX time of coredump */ | 1565 | /* UNIX time of coredump */ |
1522 | case 't': { | 1566 | case 't': { |
1523 | struct timeval tv; | 1567 | struct timeval tv; |
1524 | do_gettimeofday(&tv); | 1568 | do_gettimeofday(&tv); |
1525 | rc = snprintf(out_ptr, out_end - out_ptr, | 1569 | err = cn_printf(cn, "%lu", tv.tv_sec); |
1526 | "%lu", tv.tv_sec); | ||
1527 | if (rc > out_end - out_ptr) | ||
1528 | goto out; | ||
1529 | out_ptr += rc; | ||
1530 | break; | 1570 | break; |
1531 | } | 1571 | } |
1532 | /* hostname */ | 1572 | /* hostname */ |
1533 | case 'h': | 1573 | case 'h': |
1534 | down_read(&uts_sem); | 1574 | down_read(&uts_sem); |
1535 | rc = snprintf(out_ptr, out_end - out_ptr, | 1575 | err = cn_printf(cn, "%s", |
1536 | "%s", utsname()->nodename); | 1576 | utsname()->nodename); |
1537 | up_read(&uts_sem); | 1577 | up_read(&uts_sem); |
1538 | if (rc > out_end - out_ptr) | ||
1539 | goto out; | ||
1540 | out_ptr += rc; | ||
1541 | break; | 1578 | break; |
1542 | /* executable */ | 1579 | /* executable */ |
1543 | case 'e': | 1580 | case 'e': |
1544 | rc = snprintf(out_ptr, out_end - out_ptr, | 1581 | err = cn_printf(cn, "%s", current->comm); |
1545 | "%s", current->comm); | ||
1546 | if (rc > out_end - out_ptr) | ||
1547 | goto out; | ||
1548 | out_ptr += rc; | ||
1549 | break; | 1582 | break; |
1550 | /* core limit size */ | 1583 | /* core limit size */ |
1551 | case 'c': | 1584 | case 'c': |
1552 | rc = snprintf(out_ptr, out_end - out_ptr, | 1585 | err = cn_printf(cn, "%lu", |
1553 | "%lu", rlimit(RLIMIT_CORE)); | 1586 | rlimit(RLIMIT_CORE)); |
1554 | if (rc > out_end - out_ptr) | ||
1555 | goto out; | ||
1556 | out_ptr += rc; | ||
1557 | break; | 1587 | break; |
1558 | default: | 1588 | default: |
1559 | break; | 1589 | break; |
1560 | } | 1590 | } |
1561 | ++pat_ptr; | 1591 | ++pat_ptr; |
1562 | } | 1592 | } |
1593 | |||
1594 | if (err) | ||
1595 | return err; | ||
1563 | } | 1596 | } |
1597 | |||
1564 | /* Backward compatibility with core_uses_pid: | 1598 | /* Backward compatibility with core_uses_pid: |
1565 | * | 1599 | * |
1566 | * If core_pattern does not include a %p (as is the default) | 1600 | * If core_pattern does not include a %p (as is the default) |
1567 | * and core_uses_pid is set, then .%pid will be appended to | 1601 | * and core_uses_pid is set, then .%pid will be appended to |
1568 | * the filename. Do not do this for piped commands. */ | 1602 | * the filename. Do not do this for piped commands. */ |
1569 | if (!ispipe && !pid_in_pattern && core_uses_pid) { | 1603 | if (!ispipe && !pid_in_pattern && core_uses_pid) { |
1570 | rc = snprintf(out_ptr, out_end - out_ptr, | 1604 | err = cn_printf(cn, ".%d", task_tgid_vnr(current)); |
1571 | ".%d", task_tgid_vnr(current)); | 1605 | if (err) |
1572 | if (rc > out_end - out_ptr) | 1606 | return err; |
1573 | goto out; | ||
1574 | out_ptr += rc; | ||
1575 | } | 1607 | } |
1576 | out: | 1608 | out: |
1577 | *out_ptr = 0; | ||
1578 | return ispipe; | 1609 | return ispipe; |
1579 | } | 1610 | } |
1580 | 1611 | ||
@@ -1851,7 +1882,7 @@ static int umh_pipe_setup(struct subprocess_info *info) | |||
1851 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) | 1882 | void do_coredump(long signr, int exit_code, struct pt_regs *regs) |
1852 | { | 1883 | { |
1853 | struct core_state core_state; | 1884 | struct core_state core_state; |
1854 | char corename[CORENAME_MAX_SIZE + 1]; | 1885 | struct core_name cn; |
1855 | struct mm_struct *mm = current->mm; | 1886 | struct mm_struct *mm = current->mm; |
1856 | struct linux_binfmt * binfmt; | 1887 | struct linux_binfmt * binfmt; |
1857 | const struct cred *old_cred; | 1888 | const struct cred *old_cred; |
@@ -1906,7 +1937,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1906 | */ | 1937 | */ |
1907 | clear_thread_flag(TIF_SIGPENDING); | 1938 | clear_thread_flag(TIF_SIGPENDING); |
1908 | 1939 | ||
1909 | ispipe = format_corename(corename, signr); | 1940 | ispipe = format_corename(&cn, signr); |
1941 | |||
1942 | if (ispipe == -ENOMEM) { | ||
1943 | printk(KERN_WARNING "format_corename failed\n"); | ||
1944 | printk(KERN_WARNING "Aborting core\n"); | ||
1945 | goto fail_corename; | ||
1946 | } | ||
1910 | 1947 | ||
1911 | if (ispipe) { | 1948 | if (ispipe) { |
1912 | int dump_count; | 1949 | int dump_count; |
@@ -1943,7 +1980,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1943 | goto fail_dropcount; | 1980 | goto fail_dropcount; |
1944 | } | 1981 | } |
1945 | 1982 | ||
1946 | helper_argv = argv_split(GFP_KERNEL, corename+1, NULL); | 1983 | helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL); |
1947 | if (!helper_argv) { | 1984 | if (!helper_argv) { |
1948 | printk(KERN_WARNING "%s failed to allocate memory\n", | 1985 | printk(KERN_WARNING "%s failed to allocate memory\n", |
1949 | __func__); | 1986 | __func__); |
@@ -1956,7 +1993,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1956 | argv_free(helper_argv); | 1993 | argv_free(helper_argv); |
1957 | if (retval) { | 1994 | if (retval) { |
1958 | printk(KERN_INFO "Core dump to %s pipe failed\n", | 1995 | printk(KERN_INFO "Core dump to %s pipe failed\n", |
1959 | corename); | 1996 | cn.corename); |
1960 | goto close_fail; | 1997 | goto close_fail; |
1961 | } | 1998 | } |
1962 | } else { | 1999 | } else { |
@@ -1965,7 +2002,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) | |||
1965 | if (cprm.limit < binfmt->min_coredump) | 2002 | if (cprm.limit < binfmt->min_coredump) |
1966 | goto fail_unlock; | 2003 | goto fail_unlock; |
1967 | 2004 | ||
1968 | cprm.file = filp_open(corename, | 2005 | cprm.file = filp_open(cn.corename, |
1969 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, | 2006 | O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag, |
1970 | 0600); | 2007 | 0600); |
1971 | if (IS_ERR(cprm.file)) | 2008 | if (IS_ERR(cprm.file)) |
@@ -2007,6 +2044,8 @@ fail_dropcount: | |||
2007 | if (ispipe) | 2044 | if (ispipe) |
2008 | atomic_dec(&core_dump_count); | 2045 | atomic_dec(&core_dump_count); |
2009 | fail_unlock: | 2046 | fail_unlock: |
2047 | kfree(cn.corename); | ||
2048 | fail_corename: | ||
2010 | coredump_finish(mm); | 2049 | coredump_finish(mm); |
2011 | revert_creds(old_cred); | 2050 | revert_creds(old_cred); |
2012 | fail_creds: | 2051 | fail_creds: |
@@ -2014,3 +2053,43 @@ fail_creds: | |||
2014 | fail: | 2053 | fail: |
2015 | return; | 2054 | return; |
2016 | } | 2055 | } |
2056 | |||
2057 | /* | ||
2058 | * Core dumping helper functions. These are the only things you should | ||
2059 | * do on a core-file: use only these functions to write out all the | ||
2060 | * necessary info. | ||
2061 | */ | ||
2062 | int dump_write(struct file *file, const void *addr, int nr) | ||
2063 | { | ||
2064 | return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr; | ||
2065 | } | ||
2066 | EXPORT_SYMBOL(dump_write); | ||
2067 | |||
2068 | int dump_seek(struct file *file, loff_t off) | ||
2069 | { | ||
2070 | int ret = 1; | ||
2071 | |||
2072 | if (file->f_op->llseek && file->f_op->llseek != no_llseek) { | ||
2073 | if (file->f_op->llseek(file, off, SEEK_CUR) < 0) | ||
2074 | return 0; | ||
2075 | } else { | ||
2076 | char *buf = (char *)get_zeroed_page(GFP_KERNEL); | ||
2077 | |||
2078 | if (!buf) | ||
2079 | return 0; | ||
2080 | while (off > 0) { | ||
2081 | unsigned long n = off; | ||
2082 | |||
2083 | if (n > PAGE_SIZE) | ||
2084 | n = PAGE_SIZE; | ||
2085 | if (!dump_write(file, buf, n)) { | ||
2086 | ret = 0; | ||
2087 | break; | ||
2088 | } | ||
2089 | off -= n; | ||
2090 | } | ||
2091 | free_page((unsigned long)buf); | ||
2092 | } | ||
2093 | return ret; | ||
2094 | } | ||
2095 | EXPORT_SYMBOL(dump_seek); | ||
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index d91e9d829bc1..dcc941d82d67 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
@@ -420,7 +420,7 @@ int exofs_set_link(struct inode *dir, struct exofs_dir_entry *de, | |||
420 | err = exofs_write_begin(NULL, page->mapping, pos, len, | 420 | err = exofs_write_begin(NULL, page->mapping, pos, len, |
421 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 421 | AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); |
422 | if (err) | 422 | if (err) |
423 | EXOFS_ERR("exofs_set_link: exofs_write_begin FAILD => %d\n", | 423 | EXOFS_ERR("exofs_set_link: exofs_write_begin FAILED => %d\n", |
424 | err); | 424 | err); |
425 | 425 | ||
426 | de->inode_no = cpu_to_le64(inode->i_ino); | 426 | de->inode_no = cpu_to_le64(inode->i_ino); |
@@ -556,7 +556,7 @@ int exofs_delete_entry(struct exofs_dir_entry *dir, struct page *page) | |||
556 | err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, | 556 | err = exofs_write_begin(NULL, page->mapping, pos, to - from, 0, |
557 | &page, NULL); | 557 | &page, NULL); |
558 | if (err) | 558 | if (err) |
559 | EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILD => %d\n", | 559 | EXOFS_ERR("exofs_delete_entry: exofs_write_begin FAILED => %d\n", |
560 | err); | 560 | err); |
561 | if (pde) | 561 | if (pde) |
562 | pde->rec_len = cpu_to_le16(to - from); | 562 | pde->rec_len = cpu_to_le16(to - from); |
diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 68cb23e3bb98..b905c79b4f0a 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c | |||
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync) | |||
46 | { | 46 | { |
47 | int ret; | 47 | int ret; |
48 | struct inode *inode = filp->f_mapping->host; | 48 | struct inode *inode = filp->f_mapping->host; |
49 | struct writeback_control wbc = { | ||
50 | .sync_mode = WB_SYNC_ALL, | ||
51 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | ||
52 | }; | ||
53 | struct super_block *sb; | 49 | struct super_block *sb; |
54 | 50 | ||
55 | if (!(inode->i_state & I_DIRTY)) | 51 | if (!(inode->i_state & I_DIRTY)) |
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync) | |||
57 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 53 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
58 | return 0; | 54 | return 0; |
59 | 55 | ||
60 | ret = sync_inode(inode, &wbc); | 56 | ret = sync_inode_metadata(inode, 1); |
61 | 57 | ||
62 | /* This is a good place to write the sb */ | 58 | /* This is a good place to write the sb */ |
63 | /* TODO: Sechedule an sb-sync on create */ | 59 | /* TODO: Sechedule an sb-sync on create */ |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index eb7368ebd8cd..42685424817b 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -54,6 +54,9 @@ struct page_collect { | |||
54 | unsigned nr_pages; | 54 | unsigned nr_pages; |
55 | unsigned long length; | 55 | unsigned long length; |
56 | loff_t pg_first; /* keep 64bit also in 32-arches */ | 56 | loff_t pg_first; /* keep 64bit also in 32-arches */ |
57 | bool read_4_write; /* This means two things: that the read is sync | ||
58 | * And the pages should not be unlocked. | ||
59 | */ | ||
57 | }; | 60 | }; |
58 | 61 | ||
59 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | 62 | static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, |
@@ -71,6 +74,7 @@ static void _pcol_init(struct page_collect *pcol, unsigned expected_pages, | |||
71 | pcol->nr_pages = 0; | 74 | pcol->nr_pages = 0; |
72 | pcol->length = 0; | 75 | pcol->length = 0; |
73 | pcol->pg_first = -1; | 76 | pcol->pg_first = -1; |
77 | pcol->read_4_write = false; | ||
74 | } | 78 | } |
75 | 79 | ||
76 | static void _pcol_reset(struct page_collect *pcol) | 80 | static void _pcol_reset(struct page_collect *pcol) |
@@ -181,7 +185,7 @@ static void update_write_page(struct page *page, int ret) | |||
181 | /* Called at the end of reads, to optionally unlock pages and update their | 185 | /* Called at the end of reads, to optionally unlock pages and update their |
182 | * status. | 186 | * status. |
183 | */ | 187 | */ |
184 | static int __readpages_done(struct page_collect *pcol, bool do_unlock) | 188 | static int __readpages_done(struct page_collect *pcol) |
185 | { | 189 | { |
186 | int i; | 190 | int i; |
187 | u64 resid; | 191 | u64 resid; |
@@ -217,7 +221,7 @@ static int __readpages_done(struct page_collect *pcol, bool do_unlock) | |||
217 | page_stat ? "bad_bytes" : "good_bytes"); | 221 | page_stat ? "bad_bytes" : "good_bytes"); |
218 | 222 | ||
219 | ret = update_read_page(page, page_stat); | 223 | ret = update_read_page(page, page_stat); |
220 | if (do_unlock) | 224 | if (!pcol->read_4_write) |
221 | unlock_page(page); | 225 | unlock_page(page); |
222 | length += PAGE_SIZE; | 226 | length += PAGE_SIZE; |
223 | } | 227 | } |
@@ -232,7 +236,7 @@ static void readpages_done(struct exofs_io_state *ios, void *p) | |||
232 | { | 236 | { |
233 | struct page_collect *pcol = p; | 237 | struct page_collect *pcol = p; |
234 | 238 | ||
235 | __readpages_done(pcol, true); | 239 | __readpages_done(pcol); |
236 | atomic_dec(&pcol->sbi->s_curr_pending); | 240 | atomic_dec(&pcol->sbi->s_curr_pending); |
237 | kfree(pcol); | 241 | kfree(pcol); |
238 | } | 242 | } |
@@ -253,7 +257,7 @@ static void _unlock_pcol_pages(struct page_collect *pcol, int ret, int rw) | |||
253 | } | 257 | } |
254 | } | 258 | } |
255 | 259 | ||
256 | static int read_exec(struct page_collect *pcol, bool is_sync) | 260 | static int read_exec(struct page_collect *pcol) |
257 | { | 261 | { |
258 | struct exofs_i_info *oi = exofs_i(pcol->inode); | 262 | struct exofs_i_info *oi = exofs_i(pcol->inode); |
259 | struct exofs_io_state *ios = pcol->ios; | 263 | struct exofs_io_state *ios = pcol->ios; |
@@ -263,17 +267,14 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
263 | if (!pcol->pages) | 267 | if (!pcol->pages) |
264 | return 0; | 268 | return 0; |
265 | 269 | ||
266 | /* see comment in _readpage() about sync reads */ | ||
267 | WARN_ON(is_sync && (pcol->nr_pages != 1)); | ||
268 | |||
269 | ios->pages = pcol->pages; | 270 | ios->pages = pcol->pages; |
270 | ios->nr_pages = pcol->nr_pages; | 271 | ios->nr_pages = pcol->nr_pages; |
271 | ios->length = pcol->length; | 272 | ios->length = pcol->length; |
272 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; | 273 | ios->offset = pcol->pg_first << PAGE_CACHE_SHIFT; |
273 | 274 | ||
274 | if (is_sync) { | 275 | if (pcol->read_4_write) { |
275 | exofs_oi_read(oi, pcol->ios); | 276 | exofs_oi_read(oi, pcol->ios); |
276 | return __readpages_done(pcol, false); | 277 | return __readpages_done(pcol); |
277 | } | 278 | } |
278 | 279 | ||
279 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 280 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
@@ -299,7 +300,7 @@ static int read_exec(struct page_collect *pcol, bool is_sync) | |||
299 | return 0; | 300 | return 0; |
300 | 301 | ||
301 | err: | 302 | err: |
302 | if (!is_sync) | 303 | if (!pcol->read_4_write) |
303 | _unlock_pcol_pages(pcol, ret, READ); | 304 | _unlock_pcol_pages(pcol, ret, READ); |
304 | 305 | ||
305 | pcol_free(pcol); | 306 | pcol_free(pcol); |
@@ -347,11 +348,12 @@ static int readpage_strip(void *data, struct page *page) | |||
347 | if (PageError(page)) | 348 | if (PageError(page)) |
348 | ClearPageError(page); | 349 | ClearPageError(page); |
349 | 350 | ||
350 | unlock_page(page); | 351 | if (!pcol->read_4_write) |
352 | unlock_page(page); | ||
351 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," | 353 | EXOFS_DBGMSG("readpage_strip(0x%lx, 0x%lx) empty page," |
352 | " splitting\n", inode->i_ino, page->index); | 354 | " splitting\n", inode->i_ino, page->index); |
353 | 355 | ||
354 | return read_exec(pcol, false); | 356 | return read_exec(pcol); |
355 | } | 357 | } |
356 | 358 | ||
357 | try_again: | 359 | try_again: |
@@ -361,7 +363,7 @@ try_again: | |||
361 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != | 363 | } else if (unlikely((pcol->pg_first + pcol->nr_pages) != |
362 | page->index)) { | 364 | page->index)) { |
363 | /* Discontinuity detected, split the request */ | 365 | /* Discontinuity detected, split the request */ |
364 | ret = read_exec(pcol, false); | 366 | ret = read_exec(pcol); |
365 | if (unlikely(ret)) | 367 | if (unlikely(ret)) |
366 | goto fail; | 368 | goto fail; |
367 | goto try_again; | 369 | goto try_again; |
@@ -386,7 +388,7 @@ try_again: | |||
386 | page, len, pcol->nr_pages, pcol->length); | 388 | page, len, pcol->nr_pages, pcol->length); |
387 | 389 | ||
388 | /* split the request, and start again with current page */ | 390 | /* split the request, and start again with current page */ |
389 | ret = read_exec(pcol, false); | 391 | ret = read_exec(pcol); |
390 | if (unlikely(ret)) | 392 | if (unlikely(ret)) |
391 | goto fail; | 393 | goto fail; |
392 | 394 | ||
@@ -415,26 +417,24 @@ static int exofs_readpages(struct file *file, struct address_space *mapping, | |||
415 | return ret; | 417 | return ret; |
416 | } | 418 | } |
417 | 419 | ||
418 | return read_exec(&pcol, false); | 420 | return read_exec(&pcol); |
419 | } | 421 | } |
420 | 422 | ||
421 | static int _readpage(struct page *page, bool is_sync) | 423 | static int _readpage(struct page *page, bool read_4_write) |
422 | { | 424 | { |
423 | struct page_collect pcol; | 425 | struct page_collect pcol; |
424 | int ret; | 426 | int ret; |
425 | 427 | ||
426 | _pcol_init(&pcol, 1, page->mapping->host); | 428 | _pcol_init(&pcol, 1, page->mapping->host); |
427 | 429 | ||
428 | /* readpage_strip might call read_exec(,is_sync==false) at several | 430 | pcol.read_4_write = read_4_write; |
429 | * places but not if we have a single page. | ||
430 | */ | ||
431 | ret = readpage_strip(&pcol, page); | 431 | ret = readpage_strip(&pcol, page); |
432 | if (ret) { | 432 | if (ret) { |
433 | EXOFS_ERR("_readpage => %d\n", ret); | 433 | EXOFS_ERR("_readpage => %d\n", ret); |
434 | return ret; | 434 | return ret; |
435 | } | 435 | } |
436 | 436 | ||
437 | return read_exec(&pcol, is_sync); | 437 | return read_exec(&pcol); |
438 | } | 438 | } |
439 | 439 | ||
440 | /* | 440 | /* |
@@ -505,7 +505,7 @@ static int write_exec(struct page_collect *pcol) | |||
505 | 505 | ||
506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); | 506 | pcol_copy = kmalloc(sizeof(*pcol_copy), GFP_KERNEL); |
507 | if (!pcol_copy) { | 507 | if (!pcol_copy) { |
508 | EXOFS_ERR("write_exec: Faild to kmalloc(pcol)\n"); | 508 | EXOFS_ERR("write_exec: Failed to kmalloc(pcol)\n"); |
509 | ret = -ENOMEM; | 509 | ret = -ENOMEM; |
510 | goto err; | 510 | goto err; |
511 | } | 511 | } |
@@ -521,7 +521,7 @@ static int write_exec(struct page_collect *pcol) | |||
521 | 521 | ||
522 | ret = exofs_oi_write(oi, ios); | 522 | ret = exofs_oi_write(oi, ios); |
523 | if (unlikely(ret)) { | 523 | if (unlikely(ret)) { |
524 | EXOFS_ERR("write_exec: exofs_oi_write() Faild\n"); | 524 | EXOFS_ERR("write_exec: exofs_oi_write() Failed\n"); |
525 | goto err; | 525 | goto err; |
526 | } | 526 | } |
527 | 527 | ||
@@ -622,7 +622,7 @@ try_again: | |||
622 | /* split the request, next loop will start again */ | 622 | /* split the request, next loop will start again */ |
623 | ret = write_exec(pcol); | 623 | ret = write_exec(pcol); |
624 | if (unlikely(ret)) { | 624 | if (unlikely(ret)) { |
625 | EXOFS_DBGMSG("write_exec faild => %d", ret); | 625 | EXOFS_DBGMSG("write_exec failed => %d", ret); |
626 | goto fail; | 626 | goto fail; |
627 | } | 627 | } |
628 | 628 | ||
@@ -713,7 +713,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
713 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, | 713 | ret = simple_write_begin(file, mapping, pos, len, flags, pagep, |
714 | fsdata); | 714 | fsdata); |
715 | if (ret) { | 715 | if (ret) { |
716 | EXOFS_DBGMSG("simple_write_begin faild\n"); | 716 | EXOFS_DBGMSG("simple_write_begin failed\n"); |
717 | goto out; | 717 | goto out; |
718 | } | 718 | } |
719 | 719 | ||
@@ -726,7 +726,7 @@ int exofs_write_begin(struct file *file, struct address_space *mapping, | |||
726 | if (ret) { | 726 | if (ret) { |
727 | /*SetPageError was done by _readpage. Is it ok?*/ | 727 | /*SetPageError was done by _readpage. Is it ok?*/ |
728 | unlock_page(page); | 728 | unlock_page(page); |
729 | EXOFS_DBGMSG("__readpage_filler faild\n"); | 729 | EXOFS_DBGMSG("__readpage_filler failed\n"); |
730 | } | 730 | } |
731 | } | 731 | } |
732 | out: | 732 | out: |
@@ -1030,6 +1030,7 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) | |||
1030 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); | 1030 | memcpy(oi->i_data, fcb.i_data, sizeof(fcb.i_data)); |
1031 | } | 1031 | } |
1032 | 1032 | ||
1033 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1033 | if (S_ISREG(inode->i_mode)) { | 1034 | if (S_ISREG(inode->i_mode)) { |
1034 | inode->i_op = &exofs_file_inode_operations; | 1035 | inode->i_op = &exofs_file_inode_operations; |
1035 | inode->i_fop = &exofs_file_operations; | 1036 | inode->i_fop = &exofs_file_operations; |
@@ -1066,8 +1067,10 @@ bad_inode: | |||
1066 | int __exofs_wait_obj_created(struct exofs_i_info *oi) | 1067 | int __exofs_wait_obj_created(struct exofs_i_info *oi) |
1067 | { | 1068 | { |
1068 | if (!obj_created(oi)) { | 1069 | if (!obj_created(oi)) { |
1070 | EXOFS_DBGMSG("!obj_created\n"); | ||
1069 | BUG_ON(!obj_2bcreated(oi)); | 1071 | BUG_ON(!obj_2bcreated(oi)); |
1070 | wait_event(oi->i_wq, obj_created(oi)); | 1072 | wait_event(oi->i_wq, obj_created(oi)); |
1073 | EXOFS_DBGMSG("wait_event done\n"); | ||
1071 | } | 1074 | } |
1072 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; | 1075 | return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0; |
1073 | } | 1076 | } |
@@ -1089,7 +1092,7 @@ static void create_done(struct exofs_io_state *ios, void *p) | |||
1089 | atomic_dec(&sbi->s_curr_pending); | 1092 | atomic_dec(&sbi->s_curr_pending); |
1090 | 1093 | ||
1091 | if (unlikely(ret)) { | 1094 | if (unlikely(ret)) { |
1092 | EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", | 1095 | EXOFS_ERR("object=0x%llx creation failed in pid=0x%llx", |
1093 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); | 1096 | _LLU(exofs_oi_objno(oi)), _LLU(sbi->layout.s_pid)); |
1094 | /*TODO: When FS is corrupted creation can fail, object already | 1097 | /*TODO: When FS is corrupted creation can fail, object already |
1095 | * exist. Get rid of this asynchronous creation, if exist | 1098 | * exist. Get rid of this asynchronous creation, if exist |
@@ -1101,7 +1104,6 @@ static void create_done(struct exofs_io_state *ios, void *p) | |||
1101 | 1104 | ||
1102 | set_obj_created(oi); | 1105 | set_obj_created(oi); |
1103 | 1106 | ||
1104 | atomic_dec(&inode->i_count); | ||
1105 | wake_up(&oi->i_wq); | 1107 | wake_up(&oi->i_wq); |
1106 | } | 1108 | } |
1107 | 1109 | ||
@@ -1129,6 +1131,7 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1129 | 1131 | ||
1130 | sbi = sb->s_fs_info; | 1132 | sbi = sb->s_fs_info; |
1131 | 1133 | ||
1134 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
1132 | sb->s_dirt = 1; | 1135 | sb->s_dirt = 1; |
1133 | inode_init_owner(inode, dir, mode); | 1136 | inode_init_owner(inode, dir, mode); |
1134 | inode->i_ino = sbi->s_nextid++; | 1137 | inode->i_ino = sbi->s_nextid++; |
@@ -1151,17 +1154,11 @@ struct inode *exofs_new_inode(struct inode *dir, int mode) | |||
1151 | ios->obj.id = exofs_oi_objno(oi); | 1154 | ios->obj.id = exofs_oi_objno(oi); |
1152 | exofs_make_credential(oi->i_cred, &ios->obj); | 1155 | exofs_make_credential(oi->i_cred, &ios->obj); |
1153 | 1156 | ||
1154 | /* increment the refcount so that the inode will still be around when we | ||
1155 | * reach the callback | ||
1156 | */ | ||
1157 | atomic_inc(&inode->i_count); | ||
1158 | |||
1159 | ios->done = create_done; | 1157 | ios->done = create_done; |
1160 | ios->private = inode; | 1158 | ios->private = inode; |
1161 | ios->cred = oi->i_cred; | 1159 | ios->cred = oi->i_cred; |
1162 | ret = exofs_sbi_create(ios); | 1160 | ret = exofs_sbi_create(ios); |
1163 | if (ret) { | 1161 | if (ret) { |
1164 | atomic_dec(&inode->i_count); | ||
1165 | exofs_put_io_state(ios); | 1162 | exofs_put_io_state(ios); |
1166 | return ERR_PTR(ret); | 1163 | return ERR_PTR(ret); |
1167 | } | 1164 | } |
@@ -1209,7 +1206,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1209 | 1206 | ||
1210 | args = kzalloc(sizeof(*args), GFP_KERNEL); | 1207 | args = kzalloc(sizeof(*args), GFP_KERNEL); |
1211 | if (!args) { | 1208 | if (!args) { |
1212 | EXOFS_DBGMSG("Faild kzalloc of args\n"); | 1209 | EXOFS_DBGMSG("Failed kzalloc of args\n"); |
1213 | return -ENOMEM; | 1210 | return -ENOMEM; |
1214 | } | 1211 | } |
1215 | 1212 | ||
@@ -1251,12 +1248,7 @@ static int exofs_update_inode(struct inode *inode, int do_sync) | |||
1251 | ios->out_attr_len = 1; | 1248 | ios->out_attr_len = 1; |
1252 | ios->out_attr = &attr; | 1249 | ios->out_attr = &attr; |
1253 | 1250 | ||
1254 | if (!obj_created(oi)) { | 1251 | wait_obj_created(oi); |
1255 | EXOFS_DBGMSG("!obj_created\n"); | ||
1256 | BUG_ON(!obj_2bcreated(oi)); | ||
1257 | wait_event(oi->i_wq, obj_created(oi)); | ||
1258 | EXOFS_DBGMSG("wait_event done\n"); | ||
1259 | } | ||
1260 | 1252 | ||
1261 | if (!do_sync) { | 1253 | if (!do_sync) { |
1262 | args->sbi = sbi; | 1254 | args->sbi = sbi; |
@@ -1319,12 +1311,12 @@ void exofs_evict_inode(struct inode *inode) | |||
1319 | inode->i_size = 0; | 1311 | inode->i_size = 0; |
1320 | end_writeback(inode); | 1312 | end_writeback(inode); |
1321 | 1313 | ||
1322 | /* if we are deleting an obj that hasn't been created yet, wait */ | 1314 | /* if we are deleting an obj that hasn't been created yet, wait. |
1323 | if (!obj_created(oi)) { | 1315 | * This also makes sure that create_done cannot be called with an |
1324 | BUG_ON(!obj_2bcreated(oi)); | 1316 | * already evicted inode. |
1325 | wait_event(oi->i_wq, obj_created(oi)); | 1317 | */ |
1326 | /* ignore the error attempt a remove anyway */ | 1318 | wait_obj_created(oi); |
1327 | } | 1319 | /* ignore the error, attempt a remove anyway */ |
1328 | 1320 | ||
1329 | /* Now Remove the OSD objects */ | 1321 | /* Now Remove the OSD objects */ |
1330 | ret = exofs_get_io_state(&sbi->layout, &ios); | 1322 | ret = exofs_get_io_state(&sbi->layout, &ios); |
diff --git a/fs/exofs/ios.c b/fs/exofs/ios.c index 6550bf70e41d..f74a2ec027a6 100644 --- a/fs/exofs/ios.c +++ b/fs/exofs/ios.c | |||
@@ -55,7 +55,7 @@ int exofs_read_kern(struct osd_dev *od, u8 *cred, struct osd_obj_id *obj, | |||
55 | 55 | ||
56 | ret = osd_finalize_request(or, 0, cred, NULL); | 56 | ret = osd_finalize_request(or, 0, cred, NULL); |
57 | if (unlikely(ret)) { | 57 | if (unlikely(ret)) { |
58 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", ret); | 58 | EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", ret); |
59 | goto out; | 59 | goto out; |
60 | } | 60 | } |
61 | 61 | ||
@@ -79,7 +79,7 @@ int exofs_get_io_state(struct exofs_layout *layout, | |||
79 | */ | 79 | */ |
80 | ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); | 80 | ios = kzalloc(exofs_io_state_size(layout->s_numdevs), GFP_KERNEL); |
81 | if (unlikely(!ios)) { | 81 | if (unlikely(!ios)) { |
82 | EXOFS_DBGMSG("Faild kzalloc bytes=%d\n", | 82 | EXOFS_DBGMSG("Failed kzalloc bytes=%d\n", |
83 | exofs_io_state_size(layout->s_numdevs)); | 83 | exofs_io_state_size(layout->s_numdevs)); |
84 | *pios = NULL; | 84 | *pios = NULL; |
85 | return -ENOMEM; | 85 | return -ENOMEM; |
@@ -172,7 +172,7 @@ static int exofs_io_execute(struct exofs_io_state *ios) | |||
172 | 172 | ||
173 | ret = osd_finalize_request(or, 0, ios->cred, NULL); | 173 | ret = osd_finalize_request(or, 0, ios->cred, NULL); |
174 | if (unlikely(ret)) { | 174 | if (unlikely(ret)) { |
175 | EXOFS_DBGMSG("Faild to osd_finalize_request() => %d\n", | 175 | EXOFS_DBGMSG("Failed to osd_finalize_request() => %d\n", |
176 | ret); | 176 | ret); |
177 | return ret; | 177 | return ret; |
178 | } | 178 | } |
@@ -361,7 +361,7 @@ static int _add_stripe_unit(struct exofs_io_state *ios, unsigned *cur_pg, | |||
361 | 361 | ||
362 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); | 362 | per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); |
363 | if (unlikely(!per_dev->bio)) { | 363 | if (unlikely(!per_dev->bio)) { |
364 | EXOFS_DBGMSG("Faild to allocate BIO size=%u\n", | 364 | EXOFS_DBGMSG("Failed to allocate BIO size=%u\n", |
365 | bio_size); | 365 | bio_size); |
366 | return -ENOMEM; | 366 | return -ENOMEM; |
367 | } | 367 | } |
@@ -564,7 +564,7 @@ static int _sbi_write_mirror(struct exofs_io_state *ios, int cur_comp) | |||
564 | master_dev->bio->bi_max_vecs); | 564 | master_dev->bio->bi_max_vecs); |
565 | if (unlikely(!bio)) { | 565 | if (unlikely(!bio)) { |
566 | EXOFS_DBGMSG( | 566 | EXOFS_DBGMSG( |
567 | "Faild to allocate BIO size=%u\n", | 567 | "Failed to allocate BIO size=%u\n", |
568 | master_dev->bio->bi_max_vecs); | 568 | master_dev->bio->bi_max_vecs); |
569 | ret = -ENOMEM; | 569 | ret = -ENOMEM; |
570 | goto out; | 570 | goto out; |
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index b7dd0c236863..264e95d02830 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c | |||
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, | |||
153 | 153 | ||
154 | inode->i_ctime = CURRENT_TIME; | 154 | inode->i_ctime = CURRENT_TIME; |
155 | inode_inc_link_count(inode); | 155 | inode_inc_link_count(inode); |
156 | atomic_inc(&inode->i_count); | 156 | ihold(inode); |
157 | 157 | ||
158 | return exofs_add_nondir(dentry, inode); | 158 | return exofs_add_nondir(dentry, inode); |
159 | } | 159 | } |
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index e9e175949a63..51b304056f10 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
@@ -74,21 +74,20 @@ static struct dentry * | |||
74 | find_disconnected_root(struct dentry *dentry) | 74 | find_disconnected_root(struct dentry *dentry) |
75 | { | 75 | { |
76 | dget(dentry); | 76 | dget(dentry); |
77 | spin_lock(&dentry->d_lock); | 77 | while (!IS_ROOT(dentry)) { |
78 | while (!IS_ROOT(dentry) && | 78 | struct dentry *parent = dget_parent(dentry); |
79 | (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) { | 79 | |
80 | struct dentry *parent = dentry->d_parent; | 80 | if (!(parent->d_flags & DCACHE_DISCONNECTED)) { |
81 | dget(parent); | 81 | dput(parent); |
82 | spin_unlock(&dentry->d_lock); | 82 | break; |
83 | } | ||
84 | |||
83 | dput(dentry); | 85 | dput(dentry); |
84 | dentry = parent; | 86 | dentry = parent; |
85 | spin_lock(&dentry->d_lock); | ||
86 | } | 87 | } |
87 | spin_unlock(&dentry->d_lock); | ||
88 | return dentry; | 88 | return dentry; |
89 | } | 89 | } |
90 | 90 | ||
91 | |||
92 | /* | 91 | /* |
93 | * Make sure target_dir is fully connected to the dentry tree. | 92 | * Make sure target_dir is fully connected to the dentry tree. |
94 | * | 93 | * |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 764109886ec0..2709b34206ab 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) | |||
98 | if (IS_DIRSYNC(dir)) { | 98 | if (IS_DIRSYNC(dir)) { |
99 | err = write_one_page(page, 1); | 99 | err = write_one_page(page, 1); |
100 | if (!err) | 100 | if (!err) |
101 | err = ext2_sync_inode(dir); | 101 | err = sync_inode_metadata(dir, 1); |
102 | } else { | 102 | } else { |
103 | unlock_page(page); | 103 | unlock_page(page); |
104 | } | 104 | } |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 416daa62242c..6346a2acf326 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned); | |||
120 | extern struct inode *ext2_iget (struct super_block *, unsigned long); | 120 | extern struct inode *ext2_iget (struct super_block *, unsigned long); |
121 | extern int ext2_write_inode (struct inode *, struct writeback_control *); | 121 | extern int ext2_write_inode (struct inode *, struct writeback_control *); |
122 | extern void ext2_evict_inode(struct inode *); | 122 | extern void ext2_evict_inode(struct inode *); |
123 | extern int ext2_sync_inode (struct inode *); | ||
124 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); | 123 | extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int); |
125 | extern int ext2_setattr (struct dentry *, struct iattr *); | 124 | extern int ext2_setattr (struct dentry *, struct iattr *); |
126 | extern void ext2_set_inode_flags(struct inode *inode); | 125 | extern void ext2_set_inode_flags(struct inode *inode); |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 940c96168868..40ad210a5049 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -458,7 +458,7 @@ failed_out: | |||
458 | * the same format as ext2_get_branch() would do. We are calling it after | 458 | * the same format as ext2_get_branch() would do. We are calling it after |
459 | * we had read the existing part of chain and partial points to the last | 459 | * we had read the existing part of chain and partial points to the last |
460 | * triple of that (one with zero ->key). Upon the exit we have the same | 460 | * triple of that (one with zero ->key). Upon the exit we have the same |
461 | * picture as after the successful ext2_get_block(), excpet that in one | 461 | * picture as after the successful ext2_get_block(), except that in one |
462 | * place chain is disconnected - *branch->p is still zero (we did not | 462 | * place chain is disconnected - *branch->p is still zero (we did not |
463 | * set the last link), but branch->key contains the number that should | 463 | * set the last link), but branch->key contains the number that should |
464 | * be placed into *branch->p to fill that gap. | 464 | * be placed into *branch->p to fill that gap. |
@@ -662,7 +662,7 @@ static int ext2_get_blocks(struct inode *inode, | |||
662 | mutex_lock(&ei->truncate_mutex); | 662 | mutex_lock(&ei->truncate_mutex); |
663 | /* | 663 | /* |
664 | * If the indirect block is missing while we are reading | 664 | * If the indirect block is missing while we are reading |
665 | * the chain(ext3_get_branch() returns -EAGAIN err), or | 665 | * the chain(ext2_get_branch() returns -EAGAIN err), or |
666 | * if the chain has been changed after we grab the semaphore, | 666 | * if the chain has been changed after we grab the semaphore, |
667 | * (either because another process truncated this branch, or | 667 | * (either because another process truncated this branch, or |
668 | * another get_block allocated this branch) re-grab the chain to see if | 668 | * another get_block allocated this branch) re-grab the chain to see if |
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize) | |||
1203 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; | 1203 | inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC; |
1204 | if (inode_needs_sync(inode)) { | 1204 | if (inode_needs_sync(inode)) { |
1205 | sync_mapping_buffers(inode->i_mapping); | 1205 | sync_mapping_buffers(inode->i_mapping); |
1206 | ext2_sync_inode (inode); | 1206 | sync_inode_metadata(inode, 1); |
1207 | } else { | 1207 | } else { |
1208 | mark_inode_dirty(inode); | 1208 | mark_inode_dirty(inode); |
1209 | } | 1209 | } |
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
1523 | return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); | 1523 | return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL); |
1524 | } | 1524 | } |
1525 | 1525 | ||
1526 | int ext2_sync_inode(struct inode *inode) | ||
1527 | { | ||
1528 | struct writeback_control wbc = { | ||
1529 | .sync_mode = WB_SYNC_ALL, | ||
1530 | .nr_to_write = 0, /* sys_fsync did this */ | ||
1531 | }; | ||
1532 | return sync_inode(inode, &wbc); | ||
1533 | } | ||
1534 | |||
1535 | int ext2_setattr(struct dentry *dentry, struct iattr *iattr) | 1526 | int ext2_setattr(struct dentry *dentry, struct iattr *iattr) |
1536 | { | 1527 | { |
1537 | struct inode *inode = dentry->d_inode; | 1528 | struct inode *inode = dentry->d_inode; |
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 71efb0e9a3f2..f8aecd2e3297 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c | |||
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, | |||
206 | 206 | ||
207 | inode->i_ctime = CURRENT_TIME_SEC; | 207 | inode->i_ctime = CURRENT_TIME_SEC; |
208 | inode_inc_link_count(inode); | 208 | inode_inc_link_count(inode); |
209 | atomic_inc(&inode->i_count); | 209 | ihold(inode); |
210 | 210 | ||
211 | err = ext2_add_link(dentry, inode); | 211 | err = ext2_add_link(dentry, inode); |
212 | if (!err) { | 212 | if (!err) { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 1ec602673ea8..0901320671da 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -747,15 +747,16 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
747 | __le32 features; | 747 | __le32 features; |
748 | int err; | 748 | int err; |
749 | 749 | ||
750 | err = -ENOMEM; | ||
750 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 751 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
751 | if (!sbi) | 752 | if (!sbi) |
752 | return -ENOMEM; | 753 | goto failed_unlock; |
753 | 754 | ||
754 | sbi->s_blockgroup_lock = | 755 | sbi->s_blockgroup_lock = |
755 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); | 756 | kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); |
756 | if (!sbi->s_blockgroup_lock) { | 757 | if (!sbi->s_blockgroup_lock) { |
757 | kfree(sbi); | 758 | kfree(sbi); |
758 | return -ENOMEM; | 759 | goto failed_unlock; |
759 | } | 760 | } |
760 | sb->s_fs_info = sbi; | 761 | sb->s_fs_info = sbi; |
761 | sbi->s_sb_block = sb_block; | 762 | sbi->s_sb_block = sb_block; |
@@ -1107,6 +1108,7 @@ failed_sbi: | |||
1107 | sb->s_fs_info = NULL; | 1108 | sb->s_fs_info = NULL; |
1108 | kfree(sbi->s_blockgroup_lock); | 1109 | kfree(sbi->s_blockgroup_lock); |
1109 | kfree(sbi); | 1110 | kfree(sbi); |
1111 | failed_unlock: | ||
1110 | return ret; | 1112 | return ret; |
1111 | } | 1113 | } |
1112 | 1114 | ||
@@ -1219,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1219 | } | 1221 | } |
1220 | 1222 | ||
1221 | es = sbi->s_es; | 1223 | es = sbi->s_es; |
1222 | if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) != | 1224 | if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) { |
1223 | (old_mount_opt & EXT2_MOUNT_XIP)) && | ||
1224 | invalidate_inodes(sb)) { | ||
1225 | ext2_msg(sb, KERN_WARNING, "warning: refusing change of " | 1225 | ext2_msg(sb, KERN_WARNING, "warning: refusing change of " |
1226 | "xip flag with busy inodes while remounting"); | 1226 | "xip flag with busy inodes while remounting"); |
1227 | sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; | 1227 | sbi->s_mount_opt &= ~EXT2_MOUNT_XIP; |
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 8c29ae15129e..f84700be3274 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c | |||
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh, | |||
699 | EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; | 699 | EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0; |
700 | inode->i_ctime = CURRENT_TIME_SEC; | 700 | inode->i_ctime = CURRENT_TIME_SEC; |
701 | if (IS_SYNC(inode)) { | 701 | if (IS_SYNC(inode)) { |
702 | error = ext2_sync_inode (inode); | 702 | error = sync_inode_metadata(inode, 1); |
703 | /* In case sync failed due to ENOSPC the inode was actually | 703 | /* In case sync failed due to ENOSPC the inode was actually |
704 | * written (only some dirty data were not) so we just proceed | 704 | * written (only some dirty data were not) so we just proceed |
705 | * as if nothing happened and cleanup the unused block */ | 705 | * as if nothing happened and cleanup the unused block */ |
diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index d7e9f74dc3a6..09b13bb34c94 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c | |||
@@ -90,7 +90,6 @@ int ext3_sync_file(struct file *file, int datasync) | |||
90 | * storage | 90 | * storage |
91 | */ | 91 | */ |
92 | if (needs_barrier) | 92 | if (needs_barrier) |
93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 93 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
94 | BLKDEV_IFL_WAIT); | ||
95 | return ret; | 94 | return ret; |
96 | } | 95 | } |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5e0faf4cda79..ad05353040a1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page, | |||
1696 | * doesn't seem much point in redirtying the page here. | 1696 | * doesn't seem much point in redirtying the page here. |
1697 | */ | 1697 | */ |
1698 | ClearPageChecked(page); | 1698 | ClearPageChecked(page); |
1699 | ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, | 1699 | ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, |
1700 | ext3_get_block); | 1700 | ext3_get_block); |
1701 | if (ret != 0) { | 1701 | if (ret != 0) { |
1702 | ext3_journal_stop(handle); | 1702 | ext3_journal_stop(handle); |
1703 | goto out_unlock; | 1703 | goto out_unlock; |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 2b35ddb70d65..bce9dce639b8 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
@@ -2260,7 +2260,7 @@ retry: | |||
2260 | 2260 | ||
2261 | inode->i_ctime = CURRENT_TIME_SEC; | 2261 | inode->i_ctime = CURRENT_TIME_SEC; |
2262 | inc_nlink(inode); | 2262 | inc_nlink(inode); |
2263 | atomic_inc(&inode->i_count); | 2263 | ihold(inode); |
2264 | 2264 | ||
2265 | err = ext3_add_entry(handle, dentry, inode); | 2265 | err = ext3_add_entry(handle, dentry, inode); |
2266 | if (!err) { | 2266 | if (!err) { |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 5dbf4dba03c4..377768009106 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -411,9 +411,6 @@ static void ext3_put_super (struct super_block * sb) | |||
411 | int i, err; | 411 | int i, err; |
412 | 412 | ||
413 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 413 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
414 | |||
415 | lock_kernel(); | ||
416 | |||
417 | ext3_xattr_put_super(sb); | 414 | ext3_xattr_put_super(sb); |
418 | err = journal_destroy(sbi->s_journal); | 415 | err = journal_destroy(sbi->s_journal); |
419 | sbi->s_journal = NULL; | 416 | sbi->s_journal = NULL; |
@@ -462,8 +459,6 @@ static void ext3_put_super (struct super_block * sb) | |||
462 | sb->s_fs_info = NULL; | 459 | sb->s_fs_info = NULL; |
463 | kfree(sbi->s_blockgroup_lock); | 460 | kfree(sbi->s_blockgroup_lock); |
464 | kfree(sbi); | 461 | kfree(sbi); |
465 | |||
466 | unlock_kernel(); | ||
467 | } | 462 | } |
468 | 463 | ||
469 | static struct kmem_cache *ext3_inode_cachep; | 464 | static struct kmem_cache *ext3_inode_cachep; |
@@ -1627,8 +1622,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1627 | sbi->s_resgid = EXT3_DEF_RESGID; | 1622 | sbi->s_resgid = EXT3_DEF_RESGID; |
1628 | sbi->s_sb_block = sb_block; | 1623 | sbi->s_sb_block = sb_block; |
1629 | 1624 | ||
1630 | unlock_kernel(); | ||
1631 | |||
1632 | blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); | 1625 | blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE); |
1633 | if (!blocksize) { | 1626 | if (!blocksize) { |
1634 | ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); | 1627 | ext3_msg(sb, KERN_ERR, "error: unable to set blocksize"); |
@@ -1849,8 +1842,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
1849 | goto failed_mount; | 1842 | goto failed_mount; |
1850 | } | 1843 | } |
1851 | 1844 | ||
1852 | if (le32_to_cpu(es->s_blocks_count) > | 1845 | if (generic_check_addressable(sb->s_blocksize_bits, |
1853 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) { | 1846 | le32_to_cpu(es->s_blocks_count))) { |
1854 | ext3_msg(sb, KERN_ERR, | 1847 | ext3_msg(sb, KERN_ERR, |
1855 | "error: filesystem is too large to mount safely"); | 1848 | "error: filesystem is too large to mount safely"); |
1856 | if (sizeof(sector_t) < 8) | 1849 | if (sizeof(sector_t) < 8) |
@@ -2025,7 +2018,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent) | |||
2025 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": | 2018 | test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered": |
2026 | "writeback"); | 2019 | "writeback"); |
2027 | 2020 | ||
2028 | lock_kernel(); | ||
2029 | return 0; | 2021 | return 0; |
2030 | 2022 | ||
2031 | cantfind_ext3: | 2023 | cantfind_ext3: |
@@ -2055,7 +2047,6 @@ out_fail: | |||
2055 | sb->s_fs_info = NULL; | 2047 | sb->s_fs_info = NULL; |
2056 | kfree(sbi->s_blockgroup_lock); | 2048 | kfree(sbi->s_blockgroup_lock); |
2057 | kfree(sbi); | 2049 | kfree(sbi); |
2058 | lock_kernel(); | ||
2059 | return ret; | 2050 | return ret; |
2060 | } | 2051 | } |
2061 | 2052 | ||
@@ -2538,8 +2529,6 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2538 | int i; | 2529 | int i; |
2539 | #endif | 2530 | #endif |
2540 | 2531 | ||
2541 | lock_kernel(); | ||
2542 | |||
2543 | /* Store the original options */ | 2532 | /* Store the original options */ |
2544 | lock_super(sb); | 2533 | lock_super(sb); |
2545 | old_sb_flags = sb->s_flags; | 2534 | old_sb_flags = sb->s_flags; |
@@ -2648,7 +2637,6 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2648 | kfree(old_opts.s_qf_names[i]); | 2637 | kfree(old_opts.s_qf_names[i]); |
2649 | #endif | 2638 | #endif |
2650 | unlock_super(sb); | 2639 | unlock_super(sb); |
2651 | unlock_kernel(); | ||
2652 | 2640 | ||
2653 | if (enable_quota) | 2641 | if (enable_quota) |
2654 | dquot_resume(sb, -1); | 2642 | dquot_resume(sb, -1); |
@@ -2669,7 +2657,6 @@ restore_opts: | |||
2669 | } | 2657 | } |
2670 | #endif | 2658 | #endif |
2671 | unlock_super(sb); | 2659 | unlock_super(sb); |
2672 | unlock_kernel(); | ||
2673 | return err; | 2660 | return err; |
2674 | } | 2661 | } |
2675 | 2662 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 592adf2e546e..3f3ff5ee8f9d 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -128,10 +128,9 @@ int ext4_sync_file(struct file *file, int datasync) | |||
128 | (journal->j_fs_dev != journal->j_dev) && | 128 | (journal->j_fs_dev != journal->j_dev) && |
129 | (journal->j_flags & JBD2_BARRIER)) | 129 | (journal->j_flags & JBD2_BARRIER)) |
130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 130 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, |
131 | NULL, BLKDEV_IFL_WAIT); | 131 | NULL); |
132 | ret = jbd2_log_wait_commit(journal, commit_tid); | 132 | ret = jbd2_log_wait_commit(journal, commit_tid); |
133 | } else if (journal->j_flags & JBD2_BARRIER) | 133 | } else if (journal->j_flags & JBD2_BARRIER) |
134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 134 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
135 | BLKDEV_IFL_WAIT); | ||
136 | return ret; | 135 | return ret; |
137 | } | 136 | } |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..49635ef236f8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle, | |||
1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) | 1538 | if (!buffer_mapped(bh) || buffer_freed(bh)) |
1539 | return 0; | 1539 | return 0; |
1540 | /* | 1540 | /* |
1541 | * __block_prepare_write() could have dirtied some buffers. Clean | 1541 | * __block_write_begin() could have dirtied some buffers. Clean |
1542 | * the dirty bit as jbd2_journal_get_write_access() could complain | 1542 | * the dirty bit as jbd2_journal_get_write_access() could complain |
1543 | * otherwise about fs integrity issues. Setting of the dirty bit | 1543 | * otherwise about fs integrity issues. Setting of the dirty bit |
1544 | * by __block_prepare_write() isn't a real problem here as we clear | 1544 | * by __block_write_begin() isn't a real problem here as we clear |
1545 | * the bit before releasing a page lock and thus writeback cannot | 1545 | * the bit before releasing a page lock and thus writeback cannot |
1546 | * ever write the buffer. | 1546 | * ever write the buffer. |
1547 | */ | 1547 | */ |
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2550 | if (buffer_delay(bh)) | 2550 | if (buffer_delay(bh)) |
2551 | return 0; /* Not sure this could or should happen */ | 2551 | return 0; /* Not sure this could or should happen */ |
2552 | /* | 2552 | /* |
2553 | * XXX: __block_prepare_write() unmaps passed block, | 2553 | * XXX: __block_write_begin() unmaps passed block, is it OK? |
2554 | * is it OK? | ||
2555 | */ | 2554 | */ |
2556 | ret = ext4_da_reserve_space(inode, iblock); | 2555 | ret = ext4_da_reserve_space(inode, iblock); |
2557 | if (ret) | 2556 | if (ret) |
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, | |||
2583 | /* | 2582 | /* |
2584 | * This function is used as a standard get_block_t calback function | 2583 | * This function is used as a standard get_block_t calback function |
2585 | * when there is no desire to allocate any blocks. It is used as a | 2584 | * when there is no desire to allocate any blocks. It is used as a |
2586 | * callback function for block_prepare_write() and block_write_full_page(). | 2585 | * callback function for block_write_begin() and block_write_full_page(). |
2587 | * These functions should only try to map a single block at a time. | 2586 | * These functions should only try to map a single block at a time. |
2588 | * | 2587 | * |
2589 | * Since this function doesn't do block allocations even if the caller | 2588 | * Since this function doesn't do block allocations even if the caller |
@@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page, | |||
2743 | * all are mapped and non delay. We don't want to | 2742 | * all are mapped and non delay. We don't want to |
2744 | * do block allocation here. | 2743 | * do block allocation here. |
2745 | */ | 2744 | */ |
2746 | ret = block_prepare_write(page, 0, len, | 2745 | ret = __block_write_begin(page, 0, len, |
2747 | noalloc_get_block_write); | 2746 | noalloc_get_block_write); |
2748 | if (!ret) { | 2747 | if (!ret) { |
2749 | page_bufs = page_buffers(page); | 2748 | page_bufs = page_buffers(page); |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 4b4ad4b7ce57..42f77b1dc72d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2373 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); | 2373 | printk(KERN_ERR "EXT4-fs: can't get new inode\n"); |
2374 | goto err_freesgi; | 2374 | goto err_freesgi; |
2375 | } | 2375 | } |
2376 | sbi->s_buddy_cache->i_ino = get_next_ino(); | ||
2376 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; | 2377 | EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; |
2377 | for (i = 0; i < ngroups; i++) { | 2378 | for (i = 0; i < ngroups; i++) { |
2378 | desc = ext4_get_group_desc(sb, i, NULL); | 2379 | desc = ext4_get_group_desc(sb, i, NULL); |
@@ -2566,7 +2567,7 @@ static inline void ext4_issue_discard(struct super_block *sb, | |||
2566 | discard_block = block + ext4_group_first_block_no(sb, block_group); | 2567 | discard_block = block + ext4_group_first_block_no(sb, block_group); |
2567 | trace_ext4_discard_blocks(sb, | 2568 | trace_ext4_discard_blocks(sb, |
2568 | (unsigned long long) discard_block, count); | 2569 | (unsigned long long) discard_block, count); |
2569 | ret = sb_issue_discard(sb, discard_block, count); | 2570 | ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0); |
2570 | if (ret == EOPNOTSUPP) { | 2571 | if (ret == EOPNOTSUPP) { |
2571 | ext4_warning(sb, "discard not supported, disabling"); | 2572 | ext4_warning(sb, "discard not supported, disabling"); |
2572 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); | 2573 | clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 314c0d3b3fa9..bd39885b5998 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2312,7 +2312,7 @@ retry: | |||
2312 | 2312 | ||
2313 | inode->i_ctime = ext4_current_time(inode); | 2313 | inode->i_ctime = ext4_current_time(inode); |
2314 | ext4_inc_count(handle, inode); | 2314 | ext4_inc_count(handle, inode); |
2315 | atomic_inc(&inode->i_count); | 2315 | ihold(inode); |
2316 | 2316 | ||
2317 | err = ext4_add_entry(handle, dentry, inode); | 2317 | err = ext4_add_entry(handle, dentry, inode); |
2318 | if (!err) { | 2318 | if (!err) { |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 26147746c272..8ecc1e590303 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -26,7 +26,6 @@ | |||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/blkdev.h> | 27 | #include <linux/blkdev.h> |
28 | #include <linux/parser.h> | 28 | #include <linux/parser.h> |
29 | #include <linux/smp_lock.h> | ||
30 | #include <linux/buffer_head.h> | 29 | #include <linux/buffer_head.h> |
31 | #include <linux/exportfs.h> | 30 | #include <linux/exportfs.h> |
32 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
@@ -708,7 +707,6 @@ static void ext4_put_super(struct super_block *sb) | |||
708 | destroy_workqueue(sbi->dio_unwritten_wq); | 707 | destroy_workqueue(sbi->dio_unwritten_wq); |
709 | 708 | ||
710 | lock_super(sb); | 709 | lock_super(sb); |
711 | lock_kernel(); | ||
712 | if (sb->s_dirt) | 710 | if (sb->s_dirt) |
713 | ext4_commit_super(sb, 1); | 711 | ext4_commit_super(sb, 1); |
714 | 712 | ||
@@ -775,7 +773,6 @@ static void ext4_put_super(struct super_block *sb) | |||
775 | * Now that we are completely done shutting down the | 773 | * Now that we are completely done shutting down the |
776 | * superblock, we need to actually destroy the kobject. | 774 | * superblock, we need to actually destroy the kobject. |
777 | */ | 775 | */ |
778 | unlock_kernel(); | ||
779 | unlock_super(sb); | 776 | unlock_super(sb); |
780 | kobject_put(&sbi->s_kobj); | 777 | kobject_put(&sbi->s_kobj); |
781 | wait_for_completion(&sbi->s_kobj_unregister); | 778 | wait_for_completion(&sbi->s_kobj_unregister); |
@@ -2588,8 +2585,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2588 | sbi->s_sectors_written_start = | 2585 | sbi->s_sectors_written_start = |
2589 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); | 2586 | part_stat_read(sb->s_bdev->bd_part, sectors[1]); |
2590 | 2587 | ||
2591 | unlock_kernel(); | ||
2592 | |||
2593 | /* Cleanup superblock name */ | 2588 | /* Cleanup superblock name */ |
2594 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | 2589 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) |
2595 | *cp = '!'; | 2590 | *cp = '!'; |
@@ -2831,15 +2826,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
2831 | * Test whether we have more sectors than will fit in sector_t, | 2826 | * Test whether we have more sectors than will fit in sector_t, |
2832 | * and whether the max offset is addressable by the page cache. | 2827 | * and whether the max offset is addressable by the page cache. |
2833 | */ | 2828 | */ |
2834 | if ((ext4_blocks_count(es) > | 2829 | ret = generic_check_addressable(sb->s_blocksize_bits, |
2835 | (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) || | 2830 | ext4_blocks_count(es)); |
2836 | (ext4_blocks_count(es) > | 2831 | if (ret) { |
2837 | (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) { | ||
2838 | ext4_msg(sb, KERN_ERR, "filesystem" | 2832 | ext4_msg(sb, KERN_ERR, "filesystem" |
2839 | " too large to mount safely on this system"); | 2833 | " too large to mount safely on this system"); |
2840 | if (sizeof(sector_t) < 8) | 2834 | if (sizeof(sector_t) < 8) |
2841 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); | 2835 | ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled"); |
2842 | ret = -EFBIG; | ||
2843 | goto failed_mount; | 2836 | goto failed_mount; |
2844 | } | 2837 | } |
2845 | 2838 | ||
@@ -3166,7 +3159,6 @@ no_journal: | |||
3166 | if (es->s_error_count) | 3159 | if (es->s_error_count) |
3167 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | 3160 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ |
3168 | 3161 | ||
3169 | lock_kernel(); | ||
3170 | kfree(orig_data); | 3162 | kfree(orig_data); |
3171 | return 0; | 3163 | return 0; |
3172 | 3164 | ||
@@ -3213,7 +3205,6 @@ out_fail: | |||
3213 | sb->s_fs_info = NULL; | 3205 | sb->s_fs_info = NULL; |
3214 | kfree(sbi->s_blockgroup_lock); | 3206 | kfree(sbi->s_blockgroup_lock); |
3215 | kfree(sbi); | 3207 | kfree(sbi); |
3216 | lock_kernel(); | ||
3217 | out_free_orig: | 3208 | out_free_orig: |
3218 | kfree(orig_data); | 3209 | kfree(orig_data); |
3219 | return ret; | 3210 | return ret; |
@@ -3722,8 +3713,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3722 | #endif | 3713 | #endif |
3723 | char *orig_data = kstrdup(data, GFP_KERNEL); | 3714 | char *orig_data = kstrdup(data, GFP_KERNEL); |
3724 | 3715 | ||
3725 | lock_kernel(); | ||
3726 | |||
3727 | /* Store the original options */ | 3716 | /* Store the original options */ |
3728 | lock_super(sb); | 3717 | lock_super(sb); |
3729 | old_sb_flags = sb->s_flags; | 3718 | old_sb_flags = sb->s_flags; |
@@ -3858,7 +3847,6 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
3858 | kfree(old_opts.s_qf_names[i]); | 3847 | kfree(old_opts.s_qf_names[i]); |
3859 | #endif | 3848 | #endif |
3860 | unlock_super(sb); | 3849 | unlock_super(sb); |
3861 | unlock_kernel(); | ||
3862 | if (enable_quota) | 3850 | if (enable_quota) |
3863 | dquot_resume(sb, -1); | 3851 | dquot_resume(sb, -1); |
3864 | 3852 | ||
@@ -3884,7 +3872,6 @@ restore_opts: | |||
3884 | } | 3872 | } |
3885 | #endif | 3873 | #endif |
3886 | unlock_super(sb); | 3874 | unlock_super(sb); |
3887 | unlock_kernel(); | ||
3888 | kfree(orig_data); | 3875 | kfree(orig_data); |
3889 | return err; | 3876 | return err; |
3890 | } | 3877 | } |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 81184d3b75a3..b47d2c9f4fa1 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
@@ -577,7 +577,8 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
577 | 577 | ||
578 | sb_issue_discard(sb, | 578 | sb_issue_discard(sb, |
579 | fat_clus_to_blknr(sbi, first_cl), | 579 | fat_clus_to_blknr(sbi, first_cl), |
580 | nr_clus * sbi->sec_per_clus); | 580 | nr_clus * sbi->sec_per_clus, |
581 | GFP_NOFS, 0); | ||
581 | 582 | ||
582 | first_cl = cluster; | 583 | first_cl = cluster; |
583 | } | 584 | } |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 830058057d33..ad6998a92c30 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -14,7 +14,6 @@ | |||
14 | #include <linux/init.h> | 14 | #include <linux/init.h> |
15 | #include <linux/time.h> | 15 | #include <linux/time.h> |
16 | #include <linux/slab.h> | 16 | #include <linux/slab.h> |
17 | #include <linux/smp_lock.h> | ||
18 | #include <linux/seq_file.h> | 17 | #include <linux/seq_file.h> |
19 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
20 | #include <linux/mpage.h> | 19 | #include <linux/mpage.h> |
@@ -489,8 +488,6 @@ static void fat_put_super(struct super_block *sb) | |||
489 | { | 488 | { |
490 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 489 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
491 | 490 | ||
492 | lock_kernel(); | ||
493 | |||
494 | if (sb->s_dirt) | 491 | if (sb->s_dirt) |
495 | fat_write_super(sb); | 492 | fat_write_super(sb); |
496 | 493 | ||
@@ -504,8 +501,6 @@ static void fat_put_super(struct super_block *sb) | |||
504 | 501 | ||
505 | sb->s_fs_info = NULL; | 502 | sb->s_fs_info = NULL; |
506 | kfree(sbi); | 503 | kfree(sbi); |
507 | |||
508 | unlock_kernel(); | ||
509 | } | 504 | } |
510 | 505 | ||
511 | static struct kmem_cache *fat_inode_cachep; | 506 | static struct kmem_cache *fat_inode_cachep; |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f2356388..970e682ea754 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
@@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) | |||
255 | 255 | ||
256 | for (i = 0; i < nr_bhs; i++) { | 256 | for (i = 0; i < nr_bhs; i++) { |
257 | wait_on_buffer(bhs[i]); | 257 | wait_on_buffer(bhs[i]); |
258 | if (buffer_eopnotsupp(bhs[i])) { | 258 | if (!err && !buffer_uptodate(bhs[i])) |
259 | clear_buffer_eopnotsupp(bhs[i]); | ||
260 | err = -EOPNOTSUPP; | ||
261 | } else if (!err && !buffer_uptodate(bhs[i])) | ||
262 | err = -EIO; | 259 | err = -EIO; |
263 | } | 260 | } |
264 | return err; | 261 | return err; |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index bbc94ae4fd77..bbca5c186ae7 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
@@ -662,12 +662,16 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent) | |||
662 | { | 662 | { |
663 | int res; | 663 | int res; |
664 | 664 | ||
665 | lock_super(sb); | ||
665 | res = fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 0); | 666 | res = fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, 0); |
666 | if (res) | 667 | if (res) { |
668 | unlock_super(sb); | ||
667 | return res; | 669 | return res; |
670 | } | ||
668 | 671 | ||
669 | sb->s_flags |= MS_NOATIME; | 672 | sb->s_flags |= MS_NOATIME; |
670 | sb->s_root->d_op = &msdos_dentry_operations; | 673 | sb->s_root->d_op = &msdos_dentry_operations; |
674 | unlock_super(sb); | ||
671 | return 0; | 675 | return 0; |
672 | } | 676 | } |
673 | 677 | ||
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index 6fcc7e71fbaa..6f0f6c9a0152 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
@@ -1055,15 +1055,19 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent) | |||
1055 | { | 1055 | { |
1056 | int res; | 1056 | int res; |
1057 | 1057 | ||
1058 | lock_super(sb); | ||
1058 | res = fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1); | 1059 | res = fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, 1); |
1059 | if (res) | 1060 | if (res) { |
1061 | unlock_super(sb); | ||
1060 | return res; | 1062 | return res; |
1063 | } | ||
1061 | 1064 | ||
1062 | if (MSDOS_SB(sb)->options.name_check != 's') | 1065 | if (MSDOS_SB(sb)->options.name_check != 's') |
1063 | sb->s_root->d_op = &vfat_ci_dentry_ops; | 1066 | sb->s_root->d_op = &vfat_ci_dentry_ops; |
1064 | else | 1067 | else |
1065 | sb->s_root->d_op = &vfat_dentry_ops; | 1068 | sb->s_root->d_op = &vfat_dentry_ops; |
1066 | 1069 | ||
1070 | unlock_super(sb); | ||
1067 | return 0; | 1071 | return 0; |
1068 | } | 1072 | } |
1069 | 1073 | ||
diff --git a/fs/fcntl.c b/fs/fcntl.c index f8cc34f542c3..ecc8b3954ed6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head) | |||
640 | * match the state "is the filp on a fasync list". | 640 | * match the state "is the filp on a fasync list". |
641 | * | 641 | * |
642 | */ | 642 | */ |
643 | static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | 643 | int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) |
644 | { | 644 | { |
645 | struct fasync_struct *fa, **fp; | 645 | struct fasync_struct *fa, **fp; |
646 | int result = 0; | 646 | int result = 0; |
@@ -666,21 +666,31 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) | |||
666 | return result; | 666 | return result; |
667 | } | 667 | } |
668 | 668 | ||
669 | struct fasync_struct *fasync_alloc(void) | ||
670 | { | ||
671 | return kmem_cache_alloc(fasync_cache, GFP_KERNEL); | ||
672 | } | ||
673 | |||
669 | /* | 674 | /* |
670 | * Add a fasync entry. Return negative on error, positive if | 675 | * NOTE! This can be used only for unused fasync entries: |
671 | * added, and zero if did nothing but change an existing one. | 676 | * entries that actually got inserted on the fasync list |
677 | * need to be released by rcu - see fasync_remove_entry. | ||
678 | */ | ||
679 | void fasync_free(struct fasync_struct *new) | ||
680 | { | ||
681 | kmem_cache_free(fasync_cache, new); | ||
682 | } | ||
683 | |||
684 | /* | ||
685 | * Insert a new entry into the fasync list. Return the pointer to the | ||
686 | * old one if we didn't use the new one. | ||
672 | * | 687 | * |
673 | * NOTE! It is very important that the FASYNC flag always | 688 | * NOTE! It is very important that the FASYNC flag always |
674 | * match the state "is the filp on a fasync list". | 689 | * match the state "is the filp on a fasync list". |
675 | */ | 690 | */ |
676 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) | 691 | struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new) |
677 | { | 692 | { |
678 | struct fasync_struct *new, *fa, **fp; | 693 | struct fasync_struct *fa, **fp; |
679 | int result = 0; | ||
680 | |||
681 | new = kmem_cache_alloc(fasync_cache, GFP_KERNEL); | ||
682 | if (!new) | ||
683 | return -ENOMEM; | ||
684 | 694 | ||
685 | spin_lock(&filp->f_lock); | 695 | spin_lock(&filp->f_lock); |
686 | spin_lock(&fasync_lock); | 696 | spin_lock(&fasync_lock); |
@@ -691,8 +701,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa | |||
691 | spin_lock_irq(&fa->fa_lock); | 701 | spin_lock_irq(&fa->fa_lock); |
692 | fa->fa_fd = fd; | 702 | fa->fa_fd = fd; |
693 | spin_unlock_irq(&fa->fa_lock); | 703 | spin_unlock_irq(&fa->fa_lock); |
694 | |||
695 | kmem_cache_free(fasync_cache, new); | ||
696 | goto out; | 704 | goto out; |
697 | } | 705 | } |
698 | 706 | ||
@@ -702,13 +710,39 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa | |||
702 | new->fa_fd = fd; | 710 | new->fa_fd = fd; |
703 | new->fa_next = *fapp; | 711 | new->fa_next = *fapp; |
704 | rcu_assign_pointer(*fapp, new); | 712 | rcu_assign_pointer(*fapp, new); |
705 | result = 1; | ||
706 | filp->f_flags |= FASYNC; | 713 | filp->f_flags |= FASYNC; |
707 | 714 | ||
708 | out: | 715 | out: |
709 | spin_unlock(&fasync_lock); | 716 | spin_unlock(&fasync_lock); |
710 | spin_unlock(&filp->f_lock); | 717 | spin_unlock(&filp->f_lock); |
711 | return result; | 718 | return fa; |
719 | } | ||
720 | |||
721 | /* | ||
722 | * Add a fasync entry. Return negative on error, positive if | ||
723 | * added, and zero if did nothing but change an existing one. | ||
724 | */ | ||
725 | static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp) | ||
726 | { | ||
727 | struct fasync_struct *new; | ||
728 | |||
729 | new = fasync_alloc(); | ||
730 | if (!new) | ||
731 | return -ENOMEM; | ||
732 | |||
733 | /* | ||
734 | * fasync_insert_entry() returns the old (update) entry if | ||
735 | * it existed. | ||
736 | * | ||
737 | * So free the (unused) new entry and return 0 to let the | ||
738 | * caller know that we didn't add any new fasync entries. | ||
739 | */ | ||
740 | if (fasync_insert_entry(fd, filp, fapp, new)) { | ||
741 | fasync_free(new); | ||
742 | return 0; | ||
743 | } | ||
744 | |||
745 | return 1; | ||
712 | } | 746 | } |
713 | 747 | ||
714 | /* | 748 | /* |
@@ -151,4 +151,5 @@ err_nocleanup: | |||
151 | */ | 151 | */ |
152 | const struct file_operations def_fifo_fops = { | 152 | const struct file_operations def_fifo_fops = { |
153 | .open = fifo_open, /* will set read_ or write_pipefifo_fops */ | 153 | .open = fifo_open, /* will set read_ or write_pipefifo_fops */ |
154 | .llseek = noop_llseek, | ||
154 | }; | 155 | }; |
diff --git a/fs/file_table.c b/fs/file_table.c index a04bdd81c11c..c3dee381f1b4 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -60,7 +60,7 @@ static inline void file_free(struct file *f) | |||
60 | /* | 60 | /* |
61 | * Return the total number of open files in the system | 61 | * Return the total number of open files in the system |
62 | */ | 62 | */ |
63 | static int get_nr_files(void) | 63 | static long get_nr_files(void) |
64 | { | 64 | { |
65 | return percpu_counter_read_positive(&nr_files); | 65 | return percpu_counter_read_positive(&nr_files); |
66 | } | 66 | } |
@@ -68,7 +68,7 @@ static int get_nr_files(void) | |||
68 | /* | 68 | /* |
69 | * Return the maximum number of open files in the system | 69 | * Return the maximum number of open files in the system |
70 | */ | 70 | */ |
71 | int get_max_files(void) | 71 | unsigned long get_max_files(void) |
72 | { | 72 | { |
73 | return files_stat.max_files; | 73 | return files_stat.max_files; |
74 | } | 74 | } |
@@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write, | |||
82 | void __user *buffer, size_t *lenp, loff_t *ppos) | 82 | void __user *buffer, size_t *lenp, loff_t *ppos) |
83 | { | 83 | { |
84 | files_stat.nr_files = get_nr_files(); | 84 | files_stat.nr_files = get_nr_files(); |
85 | return proc_dointvec(table, write, buffer, lenp, ppos); | 85 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
86 | } | 86 | } |
87 | #else | 87 | #else |
88 | int proc_nr_files(ctl_table *table, int write, | 88 | int proc_nr_files(ctl_table *table, int write, |
@@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write, | |||
105 | struct file *get_empty_filp(void) | 105 | struct file *get_empty_filp(void) |
106 | { | 106 | { |
107 | const struct cred *cred = current_cred(); | 107 | const struct cred *cred = current_cred(); |
108 | static int old_max; | 108 | static long old_max; |
109 | struct file * f; | 109 | struct file * f; |
110 | 110 | ||
111 | /* | 111 | /* |
@@ -140,8 +140,7 @@ struct file *get_empty_filp(void) | |||
140 | over: | 140 | over: |
141 | /* Ran out of filps - report that */ | 141 | /* Ran out of filps - report that */ |
142 | if (get_nr_files() > old_max) { | 142 | if (get_nr_files() > old_max) { |
143 | printk(KERN_INFO "VFS: file-max limit %d reached\n", | 143 | pr_info("VFS: file-max limit %lu reached\n", get_max_files()); |
144 | get_max_files()); | ||
145 | old_max = get_nr_files(); | 144 | old_max = get_nr_files(); |
146 | } | 145 | } |
147 | goto fail; | 146 | goto fail; |
@@ -487,7 +486,7 @@ retry: | |||
487 | 486 | ||
488 | void __init files_init(unsigned long mempages) | 487 | void __init files_init(unsigned long mempages) |
489 | { | 488 | { |
490 | int n; | 489 | unsigned long n; |
491 | 490 | ||
492 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, | 491 | filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0, |
493 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | 492 | SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
@@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages) | |||
498 | */ | 497 | */ |
499 | 498 | ||
500 | n = (mempages * (PAGE_SIZE / 1024)) / 10; | 499 | n = (mempages * (PAGE_SIZE / 1024)) / 10; |
501 | files_stat.max_files = n; | 500 | files_stat.max_files = max_t(unsigned long, n, NR_FILE); |
502 | if (files_stat.max_files < NR_FILE) | ||
503 | files_stat.max_files = NR_FILE; | ||
504 | files_defer_init(); | 501 | files_defer_init(); |
505 | lg_lock_init(files_lglock); | 502 | lg_lock_init(files_lglock); |
506 | percpu_counter_init(&nr_files, 0); | 503 | percpu_counter_init(&nr_files, 0); |
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 79d1b4ea13e7..8c04eac5079d 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c | |||
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) | |||
260 | struct inode *ip = NULL; | 260 | struct inode *ip = NULL; |
261 | 261 | ||
262 | if ((ip = new_inode(sbp))) { | 262 | if ((ip = new_inode(sbp))) { |
263 | ip->i_ino = get_next_ino(); | ||
263 | vxfs_iinit(ip, vip); | 264 | vxfs_iinit(ip, vip); |
264 | ip->i_mapping->a_ops = &vxfs_aops; | 265 | ip->i_mapping->a_ops = &vxfs_aops; |
265 | } | 266 | } |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 0ec7bb2c95c6..6c5131d592f0 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -36,7 +36,6 @@ | |||
36 | #include <linux/highmem.h> | 36 | #include <linux/highmem.h> |
37 | #include <linux/kernel.h> | 37 | #include <linux/kernel.h> |
38 | #include <linux/pagemap.h> | 38 | #include <linux/pagemap.h> |
39 | #include <linux/smp_lock.h> | ||
40 | 39 | ||
41 | #include "vxfs.h" | 40 | #include "vxfs.h" |
42 | #include "vxfs_dir.h" | 41 | #include "vxfs_dir.h" |
@@ -212,16 +211,12 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, struct nameidata *nd) | |||
212 | if (dp->d_name.len > VXFS_NAMELEN) | 211 | if (dp->d_name.len > VXFS_NAMELEN) |
213 | return ERR_PTR(-ENAMETOOLONG); | 212 | return ERR_PTR(-ENAMETOOLONG); |
214 | 213 | ||
215 | lock_kernel(); | ||
216 | ino = vxfs_inode_by_name(dip, dp); | 214 | ino = vxfs_inode_by_name(dip, dp); |
217 | if (ino) { | 215 | if (ino) { |
218 | ip = vxfs_iget(dip->i_sb, ino); | 216 | ip = vxfs_iget(dip->i_sb, ino); |
219 | if (IS_ERR(ip)) { | 217 | if (IS_ERR(ip)) |
220 | unlock_kernel(); | ||
221 | return ERR_CAST(ip); | 218 | return ERR_CAST(ip); |
222 | } | ||
223 | } | 219 | } |
224 | unlock_kernel(); | ||
225 | d_add(dp, ip); | 220 | d_add(dp, ip); |
226 | return NULL; | 221 | return NULL; |
227 | } | 222 | } |
@@ -248,8 +243,6 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
248 | u_long page, npages, block, pblocks, nblocks, offset; | 243 | u_long page, npages, block, pblocks, nblocks, offset; |
249 | loff_t pos; | 244 | loff_t pos; |
250 | 245 | ||
251 | lock_kernel(); | ||
252 | |||
253 | switch ((long)fp->f_pos) { | 246 | switch ((long)fp->f_pos) { |
254 | case 0: | 247 | case 0: |
255 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) | 248 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) |
@@ -265,10 +258,8 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
265 | 258 | ||
266 | pos = fp->f_pos - 2; | 259 | pos = fp->f_pos - 2; |
267 | 260 | ||
268 | if (pos > VXFS_DIRROUND(ip->i_size)) { | 261 | if (pos > VXFS_DIRROUND(ip->i_size)) |
269 | unlock_kernel(); | ||
270 | return 0; | 262 | return 0; |
271 | } | ||
272 | 263 | ||
273 | npages = dir_pages(ip); | 264 | npages = dir_pages(ip); |
274 | nblocks = dir_blocks(ip); | 265 | nblocks = dir_blocks(ip); |
@@ -327,6 +318,5 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
327 | done: | 318 | done: |
328 | fp->f_pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; | 319 | fp->f_pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; |
329 | out: | 320 | out: |
330 | unlock_kernel(); | ||
331 | return 0; | 321 | return 0; |
332 | } | 322 | } |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index dc0c041e85cb..71b0148b8784 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -38,7 +38,6 @@ | |||
38 | #include <linux/buffer_head.h> | 38 | #include <linux/buffer_head.h> |
39 | #include <linux/kernel.h> | 39 | #include <linux/kernel.h> |
40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
41 | #include <linux/smp_lock.h> | ||
42 | #include <linux/stat.h> | 41 | #include <linux/stat.h> |
43 | #include <linux/vfs.h> | 42 | #include <linux/vfs.h> |
44 | #include <linux/mount.h> | 43 | #include <linux/mount.h> |
@@ -81,16 +80,12 @@ vxfs_put_super(struct super_block *sbp) | |||
81 | { | 80 | { |
82 | struct vxfs_sb_info *infp = VXFS_SBI(sbp); | 81 | struct vxfs_sb_info *infp = VXFS_SBI(sbp); |
83 | 82 | ||
84 | lock_kernel(); | ||
85 | |||
86 | vxfs_put_fake_inode(infp->vsi_fship); | 83 | vxfs_put_fake_inode(infp->vsi_fship); |
87 | vxfs_put_fake_inode(infp->vsi_ilist); | 84 | vxfs_put_fake_inode(infp->vsi_ilist); |
88 | vxfs_put_fake_inode(infp->vsi_stilist); | 85 | vxfs_put_fake_inode(infp->vsi_stilist); |
89 | 86 | ||
90 | brelse(infp->vsi_bp); | 87 | brelse(infp->vsi_bp); |
91 | kfree(infp); | 88 | kfree(infp); |
92 | |||
93 | unlock_kernel(); | ||
94 | } | 89 | } |
95 | 90 | ||
96 | /** | 91 | /** |
@@ -148,7 +143,7 @@ static int vxfs_remount(struct super_block *sb, int *flags, char *data) | |||
148 | * The superblock on success, else %NULL. | 143 | * The superblock on success, else %NULL. |
149 | * | 144 | * |
150 | * Locking: | 145 | * Locking: |
151 | * We are under the bkl and @sbp->s_lock. | 146 | * We are under @sbp->s_lock. |
152 | */ | 147 | */ |
153 | static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) | 148 | static int vxfs_fill_super(struct super_block *sbp, void *dp, int silent) |
154 | { | 149 | { |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 81e086d8aa57..aed881a76b22 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -52,8 +52,6 @@ struct wb_writeback_work { | |||
52 | #define CREATE_TRACE_POINTS | 52 | #define CREATE_TRACE_POINTS |
53 | #include <trace/events/writeback.h> | 53 | #include <trace/events/writeback.h> |
54 | 54 | ||
55 | #define inode_to_bdi(inode) ((inode)->i_mapping->backing_dev_info) | ||
56 | |||
57 | /* | 55 | /* |
58 | * We don't actually have pdflush, but this one is exported though /proc... | 56 | * We don't actually have pdflush, but this one is exported though /proc... |
59 | */ | 57 | */ |
@@ -71,6 +69,21 @@ int writeback_in_progress(struct backing_dev_info *bdi) | |||
71 | return test_bit(BDI_writeback_running, &bdi->state); | 69 | return test_bit(BDI_writeback_running, &bdi->state); |
72 | } | 70 | } |
73 | 71 | ||
72 | static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) | ||
73 | { | ||
74 | struct super_block *sb = inode->i_sb; | ||
75 | |||
76 | if (strcmp(sb->s_type->name, "bdev") == 0) | ||
77 | return inode->i_mapping->backing_dev_info; | ||
78 | |||
79 | return sb->s_bdi; | ||
80 | } | ||
81 | |||
82 | static inline struct inode *wb_inode(struct list_head *head) | ||
83 | { | ||
84 | return list_entry(head, struct inode, i_wb_list); | ||
85 | } | ||
86 | |||
74 | static void bdi_queue_work(struct backing_dev_info *bdi, | 87 | static void bdi_queue_work(struct backing_dev_info *bdi, |
75 | struct wb_writeback_work *work) | 88 | struct wb_writeback_work *work) |
76 | { | 89 | { |
@@ -164,11 +177,11 @@ static void redirty_tail(struct inode *inode) | |||
164 | if (!list_empty(&wb->b_dirty)) { | 177 | if (!list_empty(&wb->b_dirty)) { |
165 | struct inode *tail; | 178 | struct inode *tail; |
166 | 179 | ||
167 | tail = list_entry(wb->b_dirty.next, struct inode, i_list); | 180 | tail = wb_inode(wb->b_dirty.next); |
168 | if (time_before(inode->dirtied_when, tail->dirtied_when)) | 181 | if (time_before(inode->dirtied_when, tail->dirtied_when)) |
169 | inode->dirtied_when = jiffies; | 182 | inode->dirtied_when = jiffies; |
170 | } | 183 | } |
171 | list_move(&inode->i_list, &wb->b_dirty); | 184 | list_move(&inode->i_wb_list, &wb->b_dirty); |
172 | } | 185 | } |
173 | 186 | ||
174 | /* | 187 | /* |
@@ -178,7 +191,7 @@ static void requeue_io(struct inode *inode) | |||
178 | { | 191 | { |
179 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; | 192 | struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; |
180 | 193 | ||
181 | list_move(&inode->i_list, &wb->b_more_io); | 194 | list_move(&inode->i_wb_list, &wb->b_more_io); |
182 | } | 195 | } |
183 | 196 | ||
184 | static void inode_sync_complete(struct inode *inode) | 197 | static void inode_sync_complete(struct inode *inode) |
@@ -219,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
219 | int do_sb_sort = 0; | 232 | int do_sb_sort = 0; |
220 | 233 | ||
221 | while (!list_empty(delaying_queue)) { | 234 | while (!list_empty(delaying_queue)) { |
222 | inode = list_entry(delaying_queue->prev, struct inode, i_list); | 235 | inode = wb_inode(delaying_queue->prev); |
223 | if (older_than_this && | 236 | if (older_than_this && |
224 | inode_dirtied_after(inode, *older_than_this)) | 237 | inode_dirtied_after(inode, *older_than_this)) |
225 | break; | 238 | break; |
226 | if (sb && sb != inode->i_sb) | 239 | if (sb && sb != inode->i_sb) |
227 | do_sb_sort = 1; | 240 | do_sb_sort = 1; |
228 | sb = inode->i_sb; | 241 | sb = inode->i_sb; |
229 | list_move(&inode->i_list, &tmp); | 242 | list_move(&inode->i_wb_list, &tmp); |
230 | } | 243 | } |
231 | 244 | ||
232 | /* just one sb in list, splice to dispatch_queue and we're done */ | 245 | /* just one sb in list, splice to dispatch_queue and we're done */ |
@@ -237,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue, | |||
237 | 250 | ||
238 | /* Move inodes from one superblock together */ | 251 | /* Move inodes from one superblock together */ |
239 | while (!list_empty(&tmp)) { | 252 | while (!list_empty(&tmp)) { |
240 | inode = list_entry(tmp.prev, struct inode, i_list); | 253 | sb = wb_inode(tmp.prev)->i_sb; |
241 | sb = inode->i_sb; | ||
242 | list_for_each_prev_safe(pos, node, &tmp) { | 254 | list_for_each_prev_safe(pos, node, &tmp) { |
243 | inode = list_entry(pos, struct inode, i_list); | 255 | inode = wb_inode(pos); |
244 | if (inode->i_sb == sb) | 256 | if (inode->i_sb == sb) |
245 | list_move(&inode->i_list, dispatch_queue); | 257 | list_move(&inode->i_wb_list, dispatch_queue); |
246 | } | 258 | } |
247 | } | 259 | } |
248 | } | 260 | } |
@@ -400,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
400 | * completion. | 412 | * completion. |
401 | */ | 413 | */ |
402 | redirty_tail(inode); | 414 | redirty_tail(inode); |
403 | } else if (atomic_read(&inode->i_count)) { | ||
404 | /* | ||
405 | * The inode is clean, inuse | ||
406 | */ | ||
407 | list_move(&inode->i_list, &inode_in_use); | ||
408 | } else { | 415 | } else { |
409 | /* | 416 | /* |
410 | * The inode is clean, unused | 417 | * The inode is clean. At this point we either have |
418 | * a reference to the inode or it's on it's way out. | ||
419 | * No need to add it back to the LRU. | ||
411 | */ | 420 | */ |
412 | list_move(&inode->i_list, &inode_unused); | 421 | list_del_init(&inode->i_wb_list); |
413 | } | 422 | } |
414 | } | 423 | } |
415 | inode_sync_complete(inode); | 424 | inode_sync_complete(inode); |
@@ -457,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
457 | { | 466 | { |
458 | while (!list_empty(&wb->b_io)) { | 467 | while (!list_empty(&wb->b_io)) { |
459 | long pages_skipped; | 468 | long pages_skipped; |
460 | struct inode *inode = list_entry(wb->b_io.prev, | 469 | struct inode *inode = wb_inode(wb->b_io.prev); |
461 | struct inode, i_list); | ||
462 | 470 | ||
463 | if (inode->i_sb != sb) { | 471 | if (inode->i_sb != sb) { |
464 | if (only_this_sb) { | 472 | if (only_this_sb) { |
@@ -479,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
479 | return 0; | 487 | return 0; |
480 | } | 488 | } |
481 | 489 | ||
482 | if (inode->i_state & (I_NEW | I_WILL_FREE)) { | 490 | /* |
491 | * Don't bother with new inodes or inodes beeing freed, first | ||
492 | * kind does not need peridic writeout yet, and for the latter | ||
493 | * kind writeout is handled by the freer. | ||
494 | */ | ||
495 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { | ||
483 | requeue_io(inode); | 496 | requeue_io(inode); |
484 | continue; | 497 | continue; |
485 | } | 498 | } |
499 | |||
486 | /* | 500 | /* |
487 | * Was this inode dirtied after sync_sb_inodes was called? | 501 | * Was this inode dirtied after sync_sb_inodes was called? |
488 | * This keeps sync from extra jobs and livelock. | 502 | * This keeps sync from extra jobs and livelock. |
@@ -490,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, | |||
490 | if (inode_dirtied_after(inode, wbc->wb_start)) | 504 | if (inode_dirtied_after(inode, wbc->wb_start)) |
491 | return 1; | 505 | return 1; |
492 | 506 | ||
493 | BUG_ON(inode->i_state & I_FREEING); | ||
494 | __iget(inode); | 507 | __iget(inode); |
495 | pages_skipped = wbc->pages_skipped; | 508 | pages_skipped = wbc->pages_skipped; |
496 | writeback_single_inode(inode, wbc); | 509 | writeback_single_inode(inode, wbc); |
@@ -528,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, | |||
528 | queue_io(wb, wbc->older_than_this); | 541 | queue_io(wb, wbc->older_than_this); |
529 | 542 | ||
530 | while (!list_empty(&wb->b_io)) { | 543 | while (!list_empty(&wb->b_io)) { |
531 | struct inode *inode = list_entry(wb->b_io.prev, | 544 | struct inode *inode = wb_inode(wb->b_io.prev); |
532 | struct inode, i_list); | ||
533 | struct super_block *sb = inode->i_sb; | 545 | struct super_block *sb = inode->i_sb; |
534 | 546 | ||
535 | if (!pin_sb_for_writeback(sb)) { | 547 | if (!pin_sb_for_writeback(sb)) { |
@@ -574,7 +586,7 @@ static inline bool over_bground_thresh(void) | |||
574 | global_dirty_limits(&background_thresh, &dirty_thresh); | 586 | global_dirty_limits(&background_thresh, &dirty_thresh); |
575 | 587 | ||
576 | return (global_page_state(NR_FILE_DIRTY) + | 588 | return (global_page_state(NR_FILE_DIRTY) + |
577 | global_page_state(NR_UNSTABLE_NFS) >= background_thresh); | 589 | global_page_state(NR_UNSTABLE_NFS) > background_thresh); |
578 | } | 590 | } |
579 | 591 | ||
580 | /* | 592 | /* |
@@ -667,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb, | |||
667 | */ | 679 | */ |
668 | spin_lock(&inode_lock); | 680 | spin_lock(&inode_lock); |
669 | if (!list_empty(&wb->b_more_io)) { | 681 | if (!list_empty(&wb->b_more_io)) { |
670 | inode = list_entry(wb->b_more_io.prev, | 682 | inode = wb_inode(wb->b_more_io.prev); |
671 | struct inode, i_list); | ||
672 | trace_wbc_writeback_wait(&wbc, wb->bdi); | 683 | trace_wbc_writeback_wait(&wbc, wb->bdi); |
673 | inode_wait_for_writeback(inode); | 684 | inode_wait_for_writeback(inode); |
674 | } | 685 | } |
@@ -713,9 +724,13 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) | |||
713 | return 0; | 724 | return 0; |
714 | 725 | ||
715 | wb->last_old_flush = jiffies; | 726 | wb->last_old_flush = jiffies; |
727 | /* | ||
728 | * Add in the number of potentially dirty inodes, because each inode | ||
729 | * write can dirty pagecache in the underlying blockdev. | ||
730 | */ | ||
716 | nr_pages = global_page_state(NR_FILE_DIRTY) + | 731 | nr_pages = global_page_state(NR_FILE_DIRTY) + |
717 | global_page_state(NR_UNSTABLE_NFS) + | 732 | global_page_state(NR_UNSTABLE_NFS) + |
718 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | 733 | get_nr_dirty_inodes(); |
719 | 734 | ||
720 | if (nr_pages) { | 735 | if (nr_pages) { |
721 | struct wb_writeback_work work = { | 736 | struct wb_writeback_work work = { |
@@ -782,7 +797,7 @@ int bdi_writeback_thread(void *data) | |||
782 | struct backing_dev_info *bdi = wb->bdi; | 797 | struct backing_dev_info *bdi = wb->bdi; |
783 | long pages_written; | 798 | long pages_written; |
784 | 799 | ||
785 | current->flags |= PF_FLUSHER | PF_SWAPWRITE; | 800 | current->flags |= PF_SWAPWRITE; |
786 | set_freezable(); | 801 | set_freezable(); |
787 | wb->last_active = jiffies; | 802 | wb->last_active = jiffies; |
788 | 803 | ||
@@ -954,7 +969,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
954 | * dirty list. Add blockdev inodes as well. | 969 | * dirty list. Add blockdev inodes as well. |
955 | */ | 970 | */ |
956 | if (!S_ISBLK(inode->i_mode)) { | 971 | if (!S_ISBLK(inode->i_mode)) { |
957 | if (hlist_unhashed(&inode->i_hash)) | 972 | if (inode_unhashed(inode)) |
958 | goto out; | 973 | goto out; |
959 | } | 974 | } |
960 | if (inode->i_state & I_FREEING) | 975 | if (inode->i_state & I_FREEING) |
@@ -982,7 +997,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) | |||
982 | } | 997 | } |
983 | 998 | ||
984 | inode->dirtied_when = jiffies; | 999 | inode->dirtied_when = jiffies; |
985 | list_move(&inode->i_list, &bdi->wb.b_dirty); | 1000 | list_move(&inode->i_wb_list, &bdi->wb.b_dirty); |
986 | } | 1001 | } |
987 | } | 1002 | } |
988 | out: | 1003 | out: |
@@ -1082,8 +1097,7 @@ void writeback_inodes_sb(struct super_block *sb) | |||
1082 | 1097 | ||
1083 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); | 1098 | WARN_ON(!rwsem_is_locked(&sb->s_umount)); |
1084 | 1099 | ||
1085 | work.nr_pages = nr_dirty + nr_unstable + | 1100 | work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes(); |
1086 | (inodes_stat.nr_inodes - inodes_stat.nr_unused); | ||
1087 | 1101 | ||
1088 | bdi_queue_work(sb->s_bdi, &work); | 1102 | bdi_queue_work(sb->s_bdi, &work); |
1089 | wait_for_completion(&done); | 1103 | wait_for_completion(&done); |
@@ -1190,3 +1204,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc) | |||
1190 | return ret; | 1204 | return ret; |
1191 | } | 1205 | } |
1192 | EXPORT_SYMBOL(sync_inode); | 1206 | EXPORT_SYMBOL(sync_inode); |
1207 | |||
1208 | /** | ||
1209 | * sync_inode - write an inode to disk | ||
1210 | * @inode: the inode to sync | ||
1211 | * @wait: wait for I/O to complete. | ||
1212 | * | ||
1213 | * Write an inode to disk and adjust it's dirty state after completion. | ||
1214 | * | ||
1215 | * Note: only writes the actual inode, no associated data or other metadata. | ||
1216 | */ | ||
1217 | int sync_inode_metadata(struct inode *inode, int wait) | ||
1218 | { | ||
1219 | struct writeback_control wbc = { | ||
1220 | .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, | ||
1221 | .nr_to_write = 0, /* metadata-only */ | ||
1222 | }; | ||
1223 | |||
1224 | return sync_inode(inode, &wbc); | ||
1225 | } | ||
1226 | EXPORT_SYMBOL(sync_inode_metadata); | ||
diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 3773fd63d2f9..4eba07661e5c 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c | |||
@@ -179,23 +179,27 @@ static ssize_t fuse_conn_congestion_threshold_write(struct file *file, | |||
179 | static const struct file_operations fuse_ctl_abort_ops = { | 179 | static const struct file_operations fuse_ctl_abort_ops = { |
180 | .open = nonseekable_open, | 180 | .open = nonseekable_open, |
181 | .write = fuse_conn_abort_write, | 181 | .write = fuse_conn_abort_write, |
182 | .llseek = no_llseek, | ||
182 | }; | 183 | }; |
183 | 184 | ||
184 | static const struct file_operations fuse_ctl_waiting_ops = { | 185 | static const struct file_operations fuse_ctl_waiting_ops = { |
185 | .open = nonseekable_open, | 186 | .open = nonseekable_open, |
186 | .read = fuse_conn_waiting_read, | 187 | .read = fuse_conn_waiting_read, |
188 | .llseek = no_llseek, | ||
187 | }; | 189 | }; |
188 | 190 | ||
189 | static const struct file_operations fuse_conn_max_background_ops = { | 191 | static const struct file_operations fuse_conn_max_background_ops = { |
190 | .open = nonseekable_open, | 192 | .open = nonseekable_open, |
191 | .read = fuse_conn_max_background_read, | 193 | .read = fuse_conn_max_background_read, |
192 | .write = fuse_conn_max_background_write, | 194 | .write = fuse_conn_max_background_write, |
195 | .llseek = no_llseek, | ||
193 | }; | 196 | }; |
194 | 197 | ||
195 | static const struct file_operations fuse_conn_congestion_threshold_ops = { | 198 | static const struct file_operations fuse_conn_congestion_threshold_ops = { |
196 | .open = nonseekable_open, | 199 | .open = nonseekable_open, |
197 | .read = fuse_conn_congestion_threshold_read, | 200 | .read = fuse_conn_congestion_threshold_read, |
198 | .write = fuse_conn_congestion_threshold_write, | 201 | .write = fuse_conn_congestion_threshold_write, |
202 | .llseek = no_llseek, | ||
199 | }; | 203 | }; |
200 | 204 | ||
201 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, | 205 | static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, |
@@ -218,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, | |||
218 | if (!inode) | 222 | if (!inode) |
219 | return NULL; | 223 | return NULL; |
220 | 224 | ||
225 | inode->i_ino = get_next_ino(); | ||
221 | inode->i_mode = mode; | 226 | inode->i_mode = mode; |
222 | inode->i_uid = fc->user_id; | 227 | inode->i_uid = fc->user_id; |
223 | inode->i_gid = fc->group_id; | 228 | inode->i_gid = fc->group_id; |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index e1f8171278bd..3e87cce5837d 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
@@ -182,6 +182,7 @@ static const struct file_operations cuse_frontend_fops = { | |||
182 | .unlocked_ioctl = cuse_file_ioctl, | 182 | .unlocked_ioctl = cuse_file_ioctl, |
183 | .compat_ioctl = cuse_file_compat_ioctl, | 183 | .compat_ioctl = cuse_file_compat_ioctl, |
184 | .poll = fuse_file_poll, | 184 | .poll = fuse_file_poll, |
185 | .llseek = noop_llseek, | ||
185 | }; | 186 | }; |
186 | 187 | ||
187 | 188 | ||
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index d367af1514ef..6e07696308dc 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c | |||
@@ -809,11 +809,9 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, | |||
809 | int err; | 809 | int err; |
810 | struct page *page = *pagep; | 810 | struct page *page = *pagep; |
811 | 811 | ||
812 | if (page && zeroing && count < PAGE_SIZE) { | 812 | if (page && zeroing && count < PAGE_SIZE) |
813 | void *mapaddr = kmap_atomic(page, KM_USER1); | 813 | clear_highpage(page); |
814 | memset(mapaddr, 0, PAGE_SIZE); | 814 | |
815 | kunmap_atomic(mapaddr, KM_USER1); | ||
816 | } | ||
817 | while (count) { | 815 | while (count) { |
818 | if (cs->write && cs->pipebufs && page) { | 816 | if (cs->write && cs->pipebufs && page) { |
819 | return fuse_ref_page(cs, page, offset, count); | 817 | return fuse_ref_page(cs, page, offset, count); |
@@ -830,10 +828,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep, | |||
830 | } | 828 | } |
831 | } | 829 | } |
832 | if (page) { | 830 | if (page) { |
833 | void *mapaddr = kmap_atomic(page, KM_USER1); | 831 | void *mapaddr = kmap_atomic(page, KM_USER0); |
834 | void *buf = mapaddr + offset; | 832 | void *buf = mapaddr + offset; |
835 | offset += fuse_copy_do(cs, &buf, &count); | 833 | offset += fuse_copy_do(cs, &buf, &count); |
836 | kunmap_atomic(mapaddr, KM_USER1); | 834 | kunmap_atomic(mapaddr, KM_USER0); |
837 | } else | 835 | } else |
838 | offset += fuse_copy_do(cs, NULL, &count); | 836 | offset += fuse_copy_do(cs, NULL, &count); |
839 | } | 837 | } |
@@ -1336,12 +1334,7 @@ out_finish: | |||
1336 | 1334 | ||
1337 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) | 1335 | static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req) |
1338 | { | 1336 | { |
1339 | int i; | 1337 | release_pages(req->pages, req->num_pages, 0); |
1340 | |||
1341 | for (i = 0; i < req->num_pages; i++) { | ||
1342 | struct page *page = req->pages[i]; | ||
1343 | page_cache_release(page); | ||
1344 | } | ||
1345 | } | 1338 | } |
1346 | 1339 | ||
1347 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | 1340 | static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, |
@@ -1354,7 +1347,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode, | |||
1354 | loff_t file_size; | 1347 | loff_t file_size; |
1355 | unsigned int num; | 1348 | unsigned int num; |
1356 | unsigned int offset; | 1349 | unsigned int offset; |
1357 | size_t total_len; | 1350 | size_t total_len = 0; |
1358 | 1351 | ||
1359 | req = fuse_get_req(fc); | 1352 | req = fuse_get_req(fc); |
1360 | if (IS_ERR(req)) | 1353 | if (IS_ERR(req)) |
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index cc9665522148..c465ae066c62 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
@@ -1,6 +1,6 @@ | |||
1 | config GFS2_FS | 1 | config GFS2_FS |
2 | tristate "GFS2 file system support" | 2 | tristate "GFS2 file system support" |
3 | depends on EXPERIMENTAL && (64BIT || LBDAF) | 3 | depends on (64BIT || LBDAF) |
4 | select DLM if GFS2_FS_LOCKING_DLM | 4 | select DLM if GFS2_FS_LOCKING_DLM |
5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM | 5 | select CONFIGFS_FS if GFS2_FS_LOCKING_DLM |
6 | select SYSFS if GFS2_FS_LOCKING_DLM | 6 | select SYSFS if GFS2_FS_LOCKING_DLM |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 194fe16d8418..4f36f8832b9b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -36,8 +36,8 @@ | |||
36 | #include "glops.h" | 36 | #include "glops.h" |
37 | 37 | ||
38 | 38 | ||
39 | static void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | 39 | void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, |
40 | unsigned int from, unsigned int to) | 40 | unsigned int from, unsigned int to) |
41 | { | 41 | { |
42 | struct buffer_head *head = page_buffers(page); | 42 | struct buffer_head *head = page_buffers(page); |
43 | unsigned int bsize = head->b_size; | 43 | unsigned int bsize = head->b_size; |
@@ -615,10 +615,9 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | 615 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; |
616 | int alloc_required; | 616 | int alloc_required; |
617 | int error = 0; | 617 | int error = 0; |
618 | struct gfs2_alloc *al; | 618 | struct gfs2_alloc *al = NULL; |
619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | 619 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); | 620 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
621 | unsigned to = from + len; | ||
622 | struct page *page; | 621 | struct page *page; |
623 | 622 | ||
624 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | 623 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
@@ -663,6 +662,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
663 | rblocks += RES_STATFS + RES_QUOTA; | 662 | rblocks += RES_STATFS + RES_QUOTA; |
664 | if (&ip->i_inode == sdp->sd_rindex) | 663 | if (&ip->i_inode == sdp->sd_rindex) |
665 | rblocks += 2 * RES_STATFS; | 664 | rblocks += 2 * RES_STATFS; |
665 | if (alloc_required) | ||
666 | rblocks += gfs2_rg_blocks(al); | ||
666 | 667 | ||
667 | error = gfs2_trans_begin(sdp, rblocks, | 668 | error = gfs2_trans_begin(sdp, rblocks, |
668 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | 669 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); |
@@ -689,20 +690,18 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
689 | } | 690 | } |
690 | 691 | ||
691 | prepare_write: | 692 | prepare_write: |
692 | error = block_prepare_write(page, from, to, gfs2_block_map); | 693 | error = __block_write_begin(page, from, len, gfs2_block_map); |
693 | out: | 694 | out: |
694 | if (error == 0) | 695 | if (error == 0) |
695 | return 0; | 696 | return 0; |
696 | 697 | ||
697 | page_cache_release(page); | 698 | page_cache_release(page); |
698 | 699 | ||
699 | /* | 700 | gfs2_trans_end(sdp); |
700 | * XXX(truncate): the call below should probably be replaced with | ||
701 | * a call to the gfs2-specific truncate blocks helper to actually | ||
702 | * release disk blocks.. | ||
703 | */ | ||
704 | if (pos + len > ip->i_inode.i_size) | 701 | if (pos + len > ip->i_inode.i_size) |
705 | truncate_setsize(&ip->i_inode, ip->i_inode.i_size); | 702 | gfs2_trim_blocks(&ip->i_inode); |
703 | goto out_trans_fail; | ||
704 | |||
706 | out_endtrans: | 705 | out_endtrans: |
707 | gfs2_trans_end(sdp); | 706 | gfs2_trans_end(sdp); |
708 | out_trans_fail: | 707 | out_trans_fail: |
@@ -802,10 +801,8 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, | |||
802 | page_cache_release(page); | 801 | page_cache_release(page); |
803 | 802 | ||
804 | if (copied) { | 803 | if (copied) { |
805 | if (inode->i_size < to) { | 804 | if (inode->i_size < to) |
806 | i_size_write(inode, to); | 805 | i_size_write(inode, to); |
807 | ip->i_disksize = inode->i_size; | ||
808 | } | ||
809 | gfs2_dinode_out(ip, di); | 806 | gfs2_dinode_out(ip, di); |
810 | mark_inode_dirty(inode); | 807 | mark_inode_dirty(inode); |
811 | } | 808 | } |
@@ -876,8 +873,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, | |||
876 | 873 | ||
877 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); | 874 | ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); |
878 | if (ret > 0) { | 875 | if (ret > 0) { |
879 | if (inode->i_size > ip->i_disksize) | ||
880 | ip->i_disksize = inode->i_size; | ||
881 | gfs2_dinode_out(ip, dibh->b_data); | 876 | gfs2_dinode_out(ip, dibh->b_data); |
882 | mark_inode_dirty(inode); | 877 | mark_inode_dirty(inode); |
883 | } | 878 | } |
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 6f482809d1a3..5476c066d4ee 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -50,7 +50,7 @@ struct strip_mine { | |||
50 | * @ip: the inode | 50 | * @ip: the inode |
51 | * @dibh: the dinode buffer | 51 | * @dibh: the dinode buffer |
52 | * @block: the block number that was allocated | 52 | * @block: the block number that was allocated |
53 | * @private: any locked page held by the caller process | 53 | * @page: The (optional) page. This is looked up if @page is NULL |
54 | * | 54 | * |
55 | * Returns: errno | 55 | * Returns: errno |
56 | */ | 56 | */ |
@@ -109,8 +109,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh, | |||
109 | /** | 109 | /** |
110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big | 110 | * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big |
111 | * @ip: The GFS2 inode to unstuff | 111 | * @ip: The GFS2 inode to unstuff |
112 | * @unstuffer: the routine that handles unstuffing a non-zero length file | 112 | * @page: The (optional) page. This is looked up if the @page is NULL |
113 | * @private: private data for the unstuffer | ||
114 | * | 113 | * |
115 | * This routine unstuffs a dinode and returns it to a "normal" state such | 114 | * This routine unstuffs a dinode and returns it to a "normal" state such |
116 | * that the height can be grown in the traditional way. | 115 | * that the height can be grown in the traditional way. |
@@ -132,7 +131,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
132 | if (error) | 131 | if (error) |
133 | goto out; | 132 | goto out; |
134 | 133 | ||
135 | if (ip->i_disksize) { | 134 | if (i_size_read(&ip->i_inode)) { |
136 | /* Get a free block, fill it with the stuffed data, | 135 | /* Get a free block, fill it with the stuffed data, |
137 | and write it out to disk */ | 136 | and write it out to disk */ |
138 | 137 | ||
@@ -161,7 +160,7 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) | |||
161 | di = (struct gfs2_dinode *)dibh->b_data; | 160 | di = (struct gfs2_dinode *)dibh->b_data; |
162 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 161 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
163 | 162 | ||
164 | if (ip->i_disksize) { | 163 | if (i_size_read(&ip->i_inode)) { |
165 | *(__be64 *)(di + 1) = cpu_to_be64(block); | 164 | *(__be64 *)(di + 1) = cpu_to_be64(block); |
166 | gfs2_add_inode_blocks(&ip->i_inode, 1); | 165 | gfs2_add_inode_blocks(&ip->i_inode, 1); |
167 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 166 | di->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
@@ -885,83 +884,14 @@ out: | |||
885 | } | 884 | } |
886 | 885 | ||
887 | /** | 886 | /** |
888 | * do_grow - Make a file look bigger than it is | ||
889 | * @ip: the inode | ||
890 | * @size: the size to set the file to | ||
891 | * | ||
892 | * Called with an exclusive lock on @ip. | ||
893 | * | ||
894 | * Returns: errno | ||
895 | */ | ||
896 | |||
897 | static int do_grow(struct gfs2_inode *ip, u64 size) | ||
898 | { | ||
899 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
900 | struct gfs2_alloc *al; | ||
901 | struct buffer_head *dibh; | ||
902 | int error; | ||
903 | |||
904 | al = gfs2_alloc_get(ip); | ||
905 | if (!al) | ||
906 | return -ENOMEM; | ||
907 | |||
908 | error = gfs2_quota_lock_check(ip); | ||
909 | if (error) | ||
910 | goto out; | ||
911 | |||
912 | al->al_requested = sdp->sd_max_height + RES_DATA; | ||
913 | |||
914 | error = gfs2_inplace_reserve(ip); | ||
915 | if (error) | ||
916 | goto out_gunlock_q; | ||
917 | |||
918 | error = gfs2_trans_begin(sdp, | ||
919 | sdp->sd_max_height + al->al_rgd->rd_length + | ||
920 | RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0); | ||
921 | if (error) | ||
922 | goto out_ipres; | ||
923 | |||
924 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
925 | if (error) | ||
926 | goto out_end_trans; | ||
927 | |||
928 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) { | ||
929 | if (gfs2_is_stuffed(ip)) { | ||
930 | error = gfs2_unstuff_dinode(ip, NULL); | ||
931 | if (error) | ||
932 | goto out_brelse; | ||
933 | } | ||
934 | } | ||
935 | |||
936 | ip->i_disksize = size; | ||
937 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
938 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
939 | gfs2_dinode_out(ip, dibh->b_data); | ||
940 | |||
941 | out_brelse: | ||
942 | brelse(dibh); | ||
943 | out_end_trans: | ||
944 | gfs2_trans_end(sdp); | ||
945 | out_ipres: | ||
946 | gfs2_inplace_release(ip); | ||
947 | out_gunlock_q: | ||
948 | gfs2_quota_unlock(ip); | ||
949 | out: | ||
950 | gfs2_alloc_put(ip); | ||
951 | return error; | ||
952 | } | ||
953 | |||
954 | |||
955 | /** | ||
956 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate | 887 | * gfs2_block_truncate_page - Deal with zeroing out data for truncate |
957 | * | 888 | * |
958 | * This is partly borrowed from ext3. | 889 | * This is partly borrowed from ext3. |
959 | */ | 890 | */ |
960 | static int gfs2_block_truncate_page(struct address_space *mapping) | 891 | static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from) |
961 | { | 892 | { |
962 | struct inode *inode = mapping->host; | 893 | struct inode *inode = mapping->host; |
963 | struct gfs2_inode *ip = GFS2_I(inode); | 894 | struct gfs2_inode *ip = GFS2_I(inode); |
964 | loff_t from = inode->i_size; | ||
965 | unsigned long index = from >> PAGE_CACHE_SHIFT; | 895 | unsigned long index = from >> PAGE_CACHE_SHIFT; |
966 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | 896 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
967 | unsigned blocksize, iblock, length, pos; | 897 | unsigned blocksize, iblock, length, pos; |
@@ -1023,9 +953,11 @@ unlock: | |||
1023 | return err; | 953 | return err; |
1024 | } | 954 | } |
1025 | 955 | ||
1026 | static int trunc_start(struct gfs2_inode *ip, u64 size) | 956 | static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize) |
1027 | { | 957 | { |
1028 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 958 | struct gfs2_inode *ip = GFS2_I(inode); |
959 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
960 | struct address_space *mapping = inode->i_mapping; | ||
1029 | struct buffer_head *dibh; | 961 | struct buffer_head *dibh; |
1030 | int journaled = gfs2_is_jdata(ip); | 962 | int journaled = gfs2_is_jdata(ip); |
1031 | int error; | 963 | int error; |
@@ -1039,31 +971,26 @@ static int trunc_start(struct gfs2_inode *ip, u64 size) | |||
1039 | if (error) | 971 | if (error) |
1040 | goto out; | 972 | goto out; |
1041 | 973 | ||
974 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
975 | |||
1042 | if (gfs2_is_stuffed(ip)) { | 976 | if (gfs2_is_stuffed(ip)) { |
1043 | u64 dsize = size + sizeof(struct gfs2_dinode); | 977 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + newsize); |
1044 | ip->i_disksize = size; | ||
1045 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
1046 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1047 | gfs2_dinode_out(ip, dibh->b_data); | ||
1048 | if (dsize > dibh->b_size) | ||
1049 | dsize = dibh->b_size; | ||
1050 | gfs2_buffer_clear_tail(dibh, dsize); | ||
1051 | error = 1; | ||
1052 | } else { | 978 | } else { |
1053 | if (size & (u64)(sdp->sd_sb.sb_bsize - 1)) | 979 | if (newsize & (u64)(sdp->sd_sb.sb_bsize - 1)) { |
1054 | error = gfs2_block_truncate_page(ip->i_inode.i_mapping); | 980 | error = gfs2_block_truncate_page(mapping, newsize); |
1055 | 981 | if (error) | |
1056 | if (!error) { | 982 | goto out_brelse; |
1057 | ip->i_disksize = size; | ||
1058 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
1059 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
1060 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1061 | gfs2_dinode_out(ip, dibh->b_data); | ||
1062 | } | 983 | } |
984 | ip->i_diskflags |= GFS2_DIF_TRUNC_IN_PROG; | ||
1063 | } | 985 | } |
1064 | 986 | ||
1065 | brelse(dibh); | 987 | i_size_write(inode, newsize); |
988 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | ||
989 | gfs2_dinode_out(ip, dibh->b_data); | ||
1066 | 990 | ||
991 | truncate_pagecache(inode, oldsize, newsize); | ||
992 | out_brelse: | ||
993 | brelse(dibh); | ||
1067 | out: | 994 | out: |
1068 | gfs2_trans_end(sdp); | 995 | gfs2_trans_end(sdp); |
1069 | return error; | 996 | return error; |
@@ -1123,7 +1050,7 @@ static int trunc_end(struct gfs2_inode *ip) | |||
1123 | if (error) | 1050 | if (error) |
1124 | goto out; | 1051 | goto out; |
1125 | 1052 | ||
1126 | if (!ip->i_disksize) { | 1053 | if (!i_size_read(&ip->i_inode)) { |
1127 | ip->i_height = 0; | 1054 | ip->i_height = 0; |
1128 | ip->i_goal = ip->i_no_addr; | 1055 | ip->i_goal = ip->i_no_addr; |
1129 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); | 1056 | gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode)); |
@@ -1143,92 +1070,154 @@ out: | |||
1143 | 1070 | ||
1144 | /** | 1071 | /** |
1145 | * do_shrink - make a file smaller | 1072 | * do_shrink - make a file smaller |
1146 | * @ip: the inode | 1073 | * @inode: the inode |
1147 | * @size: the size to make the file | 1074 | * @oldsize: the current inode size |
1148 | * @truncator: function to truncate the last partial block | 1075 | * @newsize: the size to make the file |
1149 | * | 1076 | * |
1150 | * Called with an exclusive lock on @ip. | 1077 | * Called with an exclusive lock on @inode. The @size must |
1078 | * be equal to or smaller than the current inode size. | ||
1151 | * | 1079 | * |
1152 | * Returns: errno | 1080 | * Returns: errno |
1153 | */ | 1081 | */ |
1154 | 1082 | ||
1155 | static int do_shrink(struct gfs2_inode *ip, u64 size) | 1083 | static int do_shrink(struct inode *inode, u64 oldsize, u64 newsize) |
1156 | { | 1084 | { |
1085 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1157 | int error; | 1086 | int error; |
1158 | 1087 | ||
1159 | error = trunc_start(ip, size); | 1088 | error = trunc_start(inode, oldsize, newsize); |
1160 | if (error < 0) | 1089 | if (error < 0) |
1161 | return error; | 1090 | return error; |
1162 | if (error > 0) | 1091 | if (gfs2_is_stuffed(ip)) |
1163 | return 0; | 1092 | return 0; |
1164 | 1093 | ||
1165 | error = trunc_dealloc(ip, size); | 1094 | error = trunc_dealloc(ip, newsize); |
1166 | if (!error) | 1095 | if (error == 0) |
1167 | error = trunc_end(ip); | 1096 | error = trunc_end(ip); |
1168 | 1097 | ||
1169 | return error; | 1098 | return error; |
1170 | } | 1099 | } |
1171 | 1100 | ||
1172 | static int do_touch(struct gfs2_inode *ip, u64 size) | 1101 | void gfs2_trim_blocks(struct inode *inode) |
1173 | { | 1102 | { |
1174 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1103 | u64 size = inode->i_size; |
1104 | int ret; | ||
1105 | |||
1106 | ret = do_shrink(inode, size, size); | ||
1107 | WARN_ON(ret != 0); | ||
1108 | } | ||
1109 | |||
1110 | /** | ||
1111 | * do_grow - Touch and update inode size | ||
1112 | * @inode: The inode | ||
1113 | * @size: The new size | ||
1114 | * | ||
1115 | * This function updates the timestamps on the inode and | ||
1116 | * may also increase the size of the inode. This function | ||
1117 | * must not be called with @size any smaller than the current | ||
1118 | * inode size. | ||
1119 | * | ||
1120 | * Although it is not strictly required to unstuff files here, | ||
1121 | * earlier versions of GFS2 have a bug in the stuffed file reading | ||
1122 | * code which will result in a buffer overrun if the size is larger | ||
1123 | * than the max stuffed file size. In order to prevent this from | ||
1124 | * occuring, such files are unstuffed, but in other cases we can | ||
1125 | * just update the inode size directly. | ||
1126 | * | ||
1127 | * Returns: 0 on success, or -ve on error | ||
1128 | */ | ||
1129 | |||
1130 | static int do_grow(struct inode *inode, u64 size) | ||
1131 | { | ||
1132 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1133 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1175 | struct buffer_head *dibh; | 1134 | struct buffer_head *dibh; |
1135 | struct gfs2_alloc *al = NULL; | ||
1176 | int error; | 1136 | int error; |
1177 | 1137 | ||
1178 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 1138 | if (gfs2_is_stuffed(ip) && |
1139 | (size > (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)))) { | ||
1140 | al = gfs2_alloc_get(ip); | ||
1141 | if (al == NULL) | ||
1142 | return -ENOMEM; | ||
1143 | |||
1144 | error = gfs2_quota_lock_check(ip); | ||
1145 | if (error) | ||
1146 | goto do_grow_alloc_put; | ||
1147 | |||
1148 | al->al_requested = 1; | ||
1149 | error = gfs2_inplace_reserve(ip); | ||
1150 | if (error) | ||
1151 | goto do_grow_qunlock; | ||
1152 | } | ||
1153 | |||
1154 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | ||
1179 | if (error) | 1155 | if (error) |
1180 | return error; | 1156 | goto do_grow_release; |
1181 | 1157 | ||
1182 | down_write(&ip->i_rw_mutex); | 1158 | if (al) { |
1159 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1160 | if (error) | ||
1161 | goto do_end_trans; | ||
1162 | } | ||
1183 | 1163 | ||
1184 | error = gfs2_meta_inode_buffer(ip, &dibh); | 1164 | error = gfs2_meta_inode_buffer(ip, &dibh); |
1185 | if (error) | 1165 | if (error) |
1186 | goto do_touch_out; | 1166 | goto do_end_trans; |
1187 | 1167 | ||
1168 | i_size_write(inode, size); | ||
1188 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 1169 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
1189 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 1170 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
1190 | gfs2_dinode_out(ip, dibh->b_data); | 1171 | gfs2_dinode_out(ip, dibh->b_data); |
1191 | brelse(dibh); | 1172 | brelse(dibh); |
1192 | 1173 | ||
1193 | do_touch_out: | 1174 | do_end_trans: |
1194 | up_write(&ip->i_rw_mutex); | ||
1195 | gfs2_trans_end(sdp); | 1175 | gfs2_trans_end(sdp); |
1176 | do_grow_release: | ||
1177 | if (al) { | ||
1178 | gfs2_inplace_release(ip); | ||
1179 | do_grow_qunlock: | ||
1180 | gfs2_quota_unlock(ip); | ||
1181 | do_grow_alloc_put: | ||
1182 | gfs2_alloc_put(ip); | ||
1183 | } | ||
1196 | return error; | 1184 | return error; |
1197 | } | 1185 | } |
1198 | 1186 | ||
1199 | /** | 1187 | /** |
1200 | * gfs2_truncatei - make a file a given size | 1188 | * gfs2_setattr_size - make a file a given size |
1201 | * @ip: the inode | 1189 | * @inode: the inode |
1202 | * @size: the size to make the file | 1190 | * @newsize: the size to make the file |
1203 | * @truncator: function to truncate the last partial block | ||
1204 | * | 1191 | * |
1205 | * The file size can grow, shrink, or stay the same size. | 1192 | * The file size can grow, shrink, or stay the same size. This |
1193 | * is called holding i_mutex and an exclusive glock on the inode | ||
1194 | * in question. | ||
1206 | * | 1195 | * |
1207 | * Returns: errno | 1196 | * Returns: errno |
1208 | */ | 1197 | */ |
1209 | 1198 | ||
1210 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size) | 1199 | int gfs2_setattr_size(struct inode *inode, u64 newsize) |
1211 | { | 1200 | { |
1212 | int error; | 1201 | int ret; |
1202 | u64 oldsize; | ||
1213 | 1203 | ||
1214 | if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode))) | 1204 | BUG_ON(!S_ISREG(inode->i_mode)); |
1215 | return -EINVAL; | ||
1216 | 1205 | ||
1217 | if (size > ip->i_disksize) | 1206 | ret = inode_newsize_ok(inode, newsize); |
1218 | error = do_grow(ip, size); | 1207 | if (ret) |
1219 | else if (size < ip->i_disksize) | 1208 | return ret; |
1220 | error = do_shrink(ip, size); | ||
1221 | else | ||
1222 | /* update time stamps */ | ||
1223 | error = do_touch(ip, size); | ||
1224 | 1209 | ||
1225 | return error; | 1210 | oldsize = inode->i_size; |
1211 | if (newsize >= oldsize) | ||
1212 | return do_grow(inode, newsize); | ||
1213 | |||
1214 | return do_shrink(inode, oldsize, newsize); | ||
1226 | } | 1215 | } |
1227 | 1216 | ||
1228 | int gfs2_truncatei_resume(struct gfs2_inode *ip) | 1217 | int gfs2_truncatei_resume(struct gfs2_inode *ip) |
1229 | { | 1218 | { |
1230 | int error; | 1219 | int error; |
1231 | error = trunc_dealloc(ip, ip->i_disksize); | 1220 | error = trunc_dealloc(ip, i_size_read(&ip->i_inode)); |
1232 | if (!error) | 1221 | if (!error) |
1233 | error = trunc_end(ip); | 1222 | error = trunc_end(ip); |
1234 | return error; | 1223 | return error; |
@@ -1269,7 +1258,7 @@ int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | |||
1269 | 1258 | ||
1270 | shift = sdp->sd_sb.sb_bsize_shift; | 1259 | shift = sdp->sd_sb.sb_bsize_shift; |
1271 | BUG_ON(gfs2_is_dir(ip)); | 1260 | BUG_ON(gfs2_is_dir(ip)); |
1272 | end_of_file = (ip->i_disksize + sdp->sd_sb.sb_bsize - 1) >> shift; | 1261 | end_of_file = (i_size_read(&ip->i_inode) + sdp->sd_sb.sb_bsize - 1) >> shift; |
1273 | lblock = offset >> shift; | 1262 | lblock = offset >> shift; |
1274 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; | 1263 | lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift; |
1275 | if (lblock_stop > end_of_file) | 1264 | if (lblock_stop > end_of_file) |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index a20a5213135a..42fea03e2bd9 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
@@ -44,14 +44,16 @@ static inline void gfs2_write_calc_reserv(const struct gfs2_inode *ip, | |||
44 | } | 44 | } |
45 | } | 45 | } |
46 | 46 | ||
47 | int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); | 47 | extern int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page); |
48 | int gfs2_block_map(struct inode *inode, sector_t lblock, struct buffer_head *bh, int create); | 48 | extern int gfs2_block_map(struct inode *inode, sector_t lblock, |
49 | int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen); | 49 | struct buffer_head *bh, int create); |
50 | 50 | extern int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, | |
51 | int gfs2_truncatei(struct gfs2_inode *ip, u64 size); | 51 | u64 *dblock, unsigned *extlen); |
52 | int gfs2_truncatei_resume(struct gfs2_inode *ip); | 52 | extern int gfs2_setattr_size(struct inode *inode, u64 size); |
53 | int gfs2_file_dealloc(struct gfs2_inode *ip); | 53 | extern void gfs2_trim_blocks(struct inode *inode); |
54 | int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 54 | extern int gfs2_truncatei_resume(struct gfs2_inode *ip); |
55 | unsigned int len); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | ||
57 | unsigned int len); | ||
56 | 58 | ||
57 | #endif /* __BMAP_DOT_H__ */ | 59 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dentry.c b/fs/gfs2/dentry.c index bb7907bde3d8..6798755b3858 100644 --- a/fs/gfs2/dentry.c +++ b/fs/gfs2/dentry.c | |||
@@ -49,7 +49,7 @@ static int gfs2_drevalidate(struct dentry *dentry, struct nameidata *nd) | |||
49 | ip = GFS2_I(inode); | 49 | ip = GFS2_I(inode); |
50 | } | 50 | } |
51 | 51 | ||
52 | if (sdp->sd_args.ar_localcaching) | 52 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
53 | goto valid; | 53 | goto valid; |
54 | 54 | ||
55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); | 55 | had_lock = (gfs2_glock_is_locked_by_me(dip->i_gl) != NULL); |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b9dd88a78dd4..5c356d09c321 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -79,6 +79,9 @@ | |||
79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) | 79 | #define gfs2_disk_hash2offset(h) (((u64)(h)) >> 1) |
80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) | 80 | #define gfs2_dir_offset2hash(p) ((u32)(((u64)(p)) << 1)) |
81 | 81 | ||
82 | struct qstr gfs2_qdot __read_mostly; | ||
83 | struct qstr gfs2_qdotdot __read_mostly; | ||
84 | |||
82 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, | 85 | typedef int (*leaf_call_t) (struct gfs2_inode *dip, u32 index, u32 len, |
83 | u64 leaf_no, void *data); | 86 | u64 leaf_no, void *data); |
84 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, | 87 | typedef int (*gfs2_dscan_t)(const struct gfs2_dirent *dent, |
@@ -127,8 +130,8 @@ static int gfs2_dir_write_stuffed(struct gfs2_inode *ip, const char *buf, | |||
127 | 130 | ||
128 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 131 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
129 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); | 132 | memcpy(dibh->b_data + offset + sizeof(struct gfs2_dinode), buf, size); |
130 | if (ip->i_disksize < offset + size) | 133 | if (ip->i_inode.i_size < offset + size) |
131 | ip->i_disksize = offset + size; | 134 | i_size_write(&ip->i_inode, offset + size); |
132 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 135 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
133 | gfs2_dinode_out(ip, dibh->b_data); | 136 | gfs2_dinode_out(ip, dibh->b_data); |
134 | 137 | ||
@@ -225,8 +228,8 @@ out: | |||
225 | if (error) | 228 | if (error) |
226 | return error; | 229 | return error; |
227 | 230 | ||
228 | if (ip->i_disksize < offset + copied) | 231 | if (ip->i_inode.i_size < offset + copied) |
229 | ip->i_disksize = offset + copied; | 232 | i_size_write(&ip->i_inode, offset + copied); |
230 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; | 233 | ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME; |
231 | 234 | ||
232 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 235 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
@@ -275,12 +278,13 @@ static int gfs2_dir_read_data(struct gfs2_inode *ip, char *buf, u64 offset, | |||
275 | unsigned int o; | 278 | unsigned int o; |
276 | int copied = 0; | 279 | int copied = 0; |
277 | int error = 0; | 280 | int error = 0; |
281 | u64 disksize = i_size_read(&ip->i_inode); | ||
278 | 282 | ||
279 | if (offset >= ip->i_disksize) | 283 | if (offset >= disksize) |
280 | return 0; | 284 | return 0; |
281 | 285 | ||
282 | if (offset + size > ip->i_disksize) | 286 | if (offset + size > disksize) |
283 | size = ip->i_disksize - offset; | 287 | size = disksize - offset; |
284 | 288 | ||
285 | if (!size) | 289 | if (!size) |
286 | return 0; | 290 | return 0; |
@@ -727,7 +731,7 @@ static struct gfs2_dirent *gfs2_dirent_search(struct inode *inode, | |||
727 | unsigned hsize = 1 << ip->i_depth; | 731 | unsigned hsize = 1 << ip->i_depth; |
728 | unsigned index; | 732 | unsigned index; |
729 | u64 ln; | 733 | u64 ln; |
730 | if (hsize * sizeof(u64) != ip->i_disksize) { | 734 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
731 | gfs2_consist_inode(ip); | 735 | gfs2_consist_inode(ip); |
732 | return ERR_PTR(-EIO); | 736 | return ERR_PTR(-EIO); |
733 | } | 737 | } |
@@ -879,7 +883,7 @@ static int dir_make_exhash(struct inode *inode) | |||
879 | for (x = sdp->sd_hash_ptrs; x--; lp++) | 883 | for (x = sdp->sd_hash_ptrs; x--; lp++) |
880 | *lp = cpu_to_be64(bn); | 884 | *lp = cpu_to_be64(bn); |
881 | 885 | ||
882 | dip->i_disksize = sdp->sd_sb.sb_bsize / 2; | 886 | i_size_write(inode, sdp->sd_sb.sb_bsize / 2); |
883 | gfs2_add_inode_blocks(&dip->i_inode, 1); | 887 | gfs2_add_inode_blocks(&dip->i_inode, 1); |
884 | dip->i_diskflags |= GFS2_DIF_EXHASH; | 888 | dip->i_diskflags |= GFS2_DIF_EXHASH; |
885 | 889 | ||
@@ -1057,11 +1061,12 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1057 | u64 *buf; | 1061 | u64 *buf; |
1058 | u64 *from, *to; | 1062 | u64 *from, *to; |
1059 | u64 block; | 1063 | u64 block; |
1064 | u64 disksize = i_size_read(&dip->i_inode); | ||
1060 | int x; | 1065 | int x; |
1061 | int error = 0; | 1066 | int error = 0; |
1062 | 1067 | ||
1063 | hsize = 1 << dip->i_depth; | 1068 | hsize = 1 << dip->i_depth; |
1064 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1069 | if (hsize * sizeof(u64) != disksize) { |
1065 | gfs2_consist_inode(dip); | 1070 | gfs2_consist_inode(dip); |
1066 | return -EIO; | 1071 | return -EIO; |
1067 | } | 1072 | } |
@@ -1072,7 +1077,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
1072 | if (!buf) | 1077 | if (!buf) |
1073 | return -ENOMEM; | 1078 | return -ENOMEM; |
1074 | 1079 | ||
1075 | for (block = dip->i_disksize >> sdp->sd_hash_bsize_shift; block--;) { | 1080 | for (block = disksize >> sdp->sd_hash_bsize_shift; block--;) { |
1076 | error = gfs2_dir_read_data(dip, (char *)buf, | 1081 | error = gfs2_dir_read_data(dip, (char *)buf, |
1077 | block * sdp->sd_hash_bsize, | 1082 | block * sdp->sd_hash_bsize, |
1078 | sdp->sd_hash_bsize, 1); | 1083 | sdp->sd_hash_bsize, 1); |
@@ -1370,7 +1375,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
1370 | unsigned depth = 0; | 1375 | unsigned depth = 0; |
1371 | 1376 | ||
1372 | hsize = 1 << dip->i_depth; | 1377 | hsize = 1 << dip->i_depth; |
1373 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1378 | if (hsize * sizeof(u64) != i_size_read(inode)) { |
1374 | gfs2_consist_inode(dip); | 1379 | gfs2_consist_inode(dip); |
1375 | return -EIO; | 1380 | return -EIO; |
1376 | } | 1381 | } |
@@ -1784,7 +1789,7 @@ static int foreach_leaf(struct gfs2_inode *dip, leaf_call_t lc, void *data) | |||
1784 | int error = 0; | 1789 | int error = 0; |
1785 | 1790 | ||
1786 | hsize = 1 << dip->i_depth; | 1791 | hsize = 1 << dip->i_depth; |
1787 | if (hsize * sizeof(u64) != dip->i_disksize) { | 1792 | if (hsize * sizeof(u64) != i_size_read(&dip->i_inode)) { |
1788 | gfs2_consist_inode(dip); | 1793 | gfs2_consist_inode(dip); |
1789 | return -EIO; | 1794 | return -EIO; |
1790 | } | 1795 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 4f919440c3be..a98f644bd3df 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
@@ -17,23 +17,24 @@ struct inode; | |||
17 | struct gfs2_inode; | 17 | struct gfs2_inode; |
18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
19 | 19 | ||
20 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *filename); | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
21 | int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 21 | const struct qstr *filename); |
22 | const struct gfs2_inode *ip); | 22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
23 | int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 23 | const struct gfs2_inode *ip); |
24 | const struct gfs2_inode *ip, unsigned int type); | 24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
25 | int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); | 25 | const struct gfs2_inode *ip, unsigned int type); |
26 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct qstr *filename); |
27 | filldir_t filldir); | 27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, |
28 | int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 28 | filldir_t filldir); |
29 | const struct gfs2_inode *nip, unsigned int new_type); | 29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
30 | const struct gfs2_inode *nip, unsigned int new_type); | ||
30 | 31 | ||
31 | int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); | 32 | extern int gfs2_dir_exhash_dealloc(struct gfs2_inode *dip); |
32 | 33 | ||
33 | int gfs2_diradd_alloc_required(struct inode *dir, | 34 | extern int gfs2_diradd_alloc_required(struct inode *dir, |
34 | const struct qstr *filename); | 35 | const struct qstr *filename); |
35 | int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, | 36 | extern int gfs2_dir_get_new_buffer(struct gfs2_inode *ip, u64 block, |
36 | struct buffer_head **bhp); | 37 | struct buffer_head **bhp); |
37 | 38 | ||
38 | static inline u32 gfs2_disk_hash(const char *data, int len) | 39 | static inline u32 gfs2_disk_hash(const char *data, int len) |
39 | { | 40 | { |
@@ -61,4 +62,7 @@ static inline void gfs2_qstr2dirent(const struct qstr *name, u16 reclen, struct | |||
61 | memcpy(dent + 1, name->name, name->len); | 62 | memcpy(dent + 1, name->name, name->len); |
62 | } | 63 | } |
63 | 64 | ||
65 | extern struct qstr gfs2_qdot; | ||
66 | extern struct qstr gfs2_qdotdot; | ||
67 | |||
64 | #endif /* __DIR_DOT_H__ */ | 68 | #endif /* __DIR_DOT_H__ */ |
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index dfe237a3f8ad..06d582732d34 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
@@ -126,16 +126,9 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
126 | 126 | ||
127 | static struct dentry *gfs2_get_parent(struct dentry *child) | 127 | static struct dentry *gfs2_get_parent(struct dentry *child) |
128 | { | 128 | { |
129 | struct qstr dotdot; | ||
130 | struct dentry *dentry; | 129 | struct dentry *dentry; |
131 | 130 | ||
132 | /* | 131 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &gfs2_qdotdot, 1)); |
133 | * XXX(hch): it would be a good idea to keep this around as a | ||
134 | * static variable. | ||
135 | */ | ||
136 | gfs2_str2qstr(&dotdot, ".."); | ||
137 | |||
138 | dentry = d_obtain_alias(gfs2_lookupi(child->d_inode, &dotdot, 1)); | ||
139 | if (!IS_ERR(dentry)) | 132 | if (!IS_ERR(dentry)) |
140 | dentry->d_op = &gfs2_dops; | 133 | dentry->d_op = &gfs2_dops; |
141 | return dentry; | 134 | return dentry; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 4edd662c8232..aa996471ec5c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -382,8 +382,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
382 | rblocks = RES_DINODE + ind_blocks; | 382 | rblocks = RES_DINODE + ind_blocks; |
383 | if (gfs2_is_jdata(ip)) | 383 | if (gfs2_is_jdata(ip)) |
384 | rblocks += data_blocks ? data_blocks : 1; | 384 | rblocks += data_blocks ? data_blocks : 1; |
385 | if (ind_blocks || data_blocks) | 385 | if (ind_blocks || data_blocks) { |
386 | rblocks += RES_STATFS + RES_QUOTA; | 386 | rblocks += RES_STATFS + RES_QUOTA; |
387 | rblocks += gfs2_rg_blocks(al); | ||
388 | } | ||
387 | ret = gfs2_trans_begin(sdp, rblocks, 0); | 389 | ret = gfs2_trans_begin(sdp, rblocks, 0); |
388 | if (ret) | 390 | if (ret) |
389 | goto out_trans_fail; | 391 | goto out_trans_fail; |
@@ -491,7 +493,7 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
491 | goto fail; | 493 | goto fail; |
492 | 494 | ||
493 | if (!(file->f_flags & O_LARGEFILE) && | 495 | if (!(file->f_flags & O_LARGEFILE) && |
494 | ip->i_disksize > MAX_NON_LFS) { | 496 | i_size_read(inode) > MAX_NON_LFS) { |
495 | error = -EOVERFLOW; | 497 | error = -EOVERFLOW; |
496 | goto fail_gunlock; | 498 | goto fail_gunlock; |
497 | } | 499 | } |
@@ -620,6 +622,8 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
620 | * cluster; until we do, disable leases (by just returning -EINVAL), | 622 | * cluster; until we do, disable leases (by just returning -EINVAL), |
621 | * unless the administrator has requested purely local locking. | 623 | * unless the administrator has requested purely local locking. |
622 | * | 624 | * |
625 | * Locking: called under lock_flocks | ||
626 | * | ||
623 | * Returns: errno | 627 | * Returns: errno |
624 | */ | 628 | */ |
625 | 629 | ||
@@ -771,6 +775,7 @@ const struct file_operations gfs2_dir_fops = { | |||
771 | .fsync = gfs2_fsync, | 775 | .fsync = gfs2_fsync, |
772 | .lock = gfs2_lock, | 776 | .lock = gfs2_lock, |
773 | .flock = gfs2_flock, | 777 | .flock = gfs2_flock, |
778 | .llseek = default_llseek, | ||
774 | }; | 779 | }; |
775 | 780 | ||
776 | #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ | 781 | #endif /* CONFIG_GFS2_FS_LOCKING_DLM */ |
@@ -797,5 +802,6 @@ const struct file_operations gfs2_dir_fops_nolock = { | |||
797 | .open = gfs2_open, | 802 | .open = gfs2_open, |
798 | .release = gfs2_close, | 803 | .release = gfs2_close, |
799 | .fsync = gfs2_fsync, | 804 | .fsync = gfs2_fsync, |
805 | .llseek = default_llseek, | ||
800 | }; | 806 | }; |
801 | 807 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 9adf8f924e08..87778857f099 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -441,6 +441,8 @@ static void state_change(struct gfs2_glock *gl, unsigned int new_state) | |||
441 | else | 441 | else |
442 | gfs2_glock_put_nolock(gl); | 442 | gfs2_glock_put_nolock(gl); |
443 | } | 443 | } |
444 | if (held1 && held2 && list_empty(&gl->gl_holders)) | ||
445 | clear_bit(GLF_QUEUED, &gl->gl_flags); | ||
444 | 446 | ||
445 | gl->gl_state = new_state; | 447 | gl->gl_state = new_state; |
446 | gl->gl_tchange = jiffies; | 448 | gl->gl_tchange = jiffies; |
@@ -1012,6 +1014,7 @@ fail: | |||
1012 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) | 1014 | if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt)) |
1013 | insert_pt = &gh2->gh_list; | 1015 | insert_pt = &gh2->gh_list; |
1014 | } | 1016 | } |
1017 | set_bit(GLF_QUEUED, &gl->gl_flags); | ||
1015 | if (likely(insert_pt == NULL)) { | 1018 | if (likely(insert_pt == NULL)) { |
1016 | list_add_tail(&gh->gh_list, &gl->gl_holders); | 1019 | list_add_tail(&gh->gh_list, &gl->gl_holders); |
1017 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) | 1020 | if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY)) |
@@ -1310,10 +1313,12 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state) | |||
1310 | 1313 | ||
1311 | gfs2_glock_hold(gl); | 1314 | gfs2_glock_hold(gl); |
1312 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1315 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
1313 | if (time_before(now, holdtime)) | 1316 | if (test_bit(GLF_QUEUED, &gl->gl_flags)) { |
1314 | delay = holdtime - now; | 1317 | if (time_before(now, holdtime)) |
1315 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | 1318 | delay = holdtime - now; |
1316 | delay = gl->gl_ops->go_min_hold_time; | 1319 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) |
1320 | delay = gl->gl_ops->go_min_hold_time; | ||
1321 | } | ||
1317 | 1322 | ||
1318 | spin_lock(&gl->gl_spin); | 1323 | spin_lock(&gl->gl_spin); |
1319 | handle_callback(gl, state, delay); | 1324 | handle_callback(gl, state, delay); |
@@ -1512,7 +1517,7 @@ static void clear_glock(struct gfs2_glock *gl) | |||
1512 | spin_unlock(&lru_lock); | 1517 | spin_unlock(&lru_lock); |
1513 | 1518 | ||
1514 | spin_lock(&gl->gl_spin); | 1519 | spin_lock(&gl->gl_spin); |
1515 | if (find_first_holder(gl) == NULL && gl->gl_state != LM_ST_UNLOCKED) | 1520 | if (gl->gl_state != LM_ST_UNLOCKED) |
1516 | handle_callback(gl, LM_ST_UNLOCKED, 0); | 1521 | handle_callback(gl, LM_ST_UNLOCKED, 0); |
1517 | spin_unlock(&gl->gl_spin); | 1522 | spin_unlock(&gl->gl_spin); |
1518 | gfs2_glock_hold(gl); | 1523 | gfs2_glock_hold(gl); |
@@ -1660,6 +1665,8 @@ static const char *gflags2str(char *buf, const unsigned long *gflags) | |||
1660 | *p++ = 'I'; | 1665 | *p++ = 'I'; |
1661 | if (test_bit(GLF_FROZEN, gflags)) | 1666 | if (test_bit(GLF_FROZEN, gflags)) |
1662 | *p++ = 'F'; | 1667 | *p++ = 'F'; |
1668 | if (test_bit(GLF_QUEUED, gflags)) | ||
1669 | *p++ = 'q'; | ||
1663 | *p = 0; | 1670 | *p = 0; |
1664 | return buf; | 1671 | return buf; |
1665 | } | 1672 | } |
@@ -1776,10 +1783,12 @@ int __init gfs2_glock_init(void) | |||
1776 | } | 1783 | } |
1777 | #endif | 1784 | #endif |
1778 | 1785 | ||
1779 | glock_workqueue = create_workqueue("glock_workqueue"); | 1786 | glock_workqueue = alloc_workqueue("glock_workqueue", WQ_RESCUER | |
1787 | WQ_HIGHPRI | WQ_FREEZEABLE, 0); | ||
1780 | if (IS_ERR(glock_workqueue)) | 1788 | if (IS_ERR(glock_workqueue)) |
1781 | return PTR_ERR(glock_workqueue); | 1789 | return PTR_ERR(glock_workqueue); |
1782 | gfs2_delete_workqueue = create_workqueue("delete_workqueue"); | 1790 | gfs2_delete_workqueue = alloc_workqueue("delete_workqueue", WQ_RESCUER | |
1791 | WQ_FREEZEABLE, 0); | ||
1783 | if (IS_ERR(gfs2_delete_workqueue)) { | 1792 | if (IS_ERR(gfs2_delete_workqueue)) { |
1784 | destroy_workqueue(glock_workqueue); | 1793 | destroy_workqueue(glock_workqueue); |
1785 | return PTR_ERR(gfs2_delete_workqueue); | 1794 | return PTR_ERR(gfs2_delete_workqueue); |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 2bda1911b156..db1c26d6d220 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
@@ -215,7 +215,7 @@ void gfs2_glock_dq_uninit_m(unsigned int num_gh, struct gfs2_holder *ghs); | |||
215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); | 215 | void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...); |
216 | 216 | ||
217 | /** | 217 | /** |
218 | * gfs2_glock_nq_init - intialize a holder and enqueue it on a glock | 218 | * gfs2_glock_nq_init - initialize a holder and enqueue it on a glock |
219 | * @gl: the glock | 219 | * @gl: the glock |
220 | * @state: the state we're requesting | 220 | * @state: the state we're requesting |
221 | * @flags: the modifier flags | 221 | * @flags: the modifier flags |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 49f97d3bb690..0d149dcc04e5 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -262,13 +262,12 @@ static int inode_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) | |||
262 | const struct gfs2_inode *ip = gl->gl_object; | 262 | const struct gfs2_inode *ip = gl->gl_object; |
263 | if (ip == NULL) | 263 | if (ip == NULL) |
264 | return 0; | 264 | return 0; |
265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu/%llu\n", | 265 | gfs2_print_dbg(seq, " I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu\n", |
266 | (unsigned long long)ip->i_no_formal_ino, | 266 | (unsigned long long)ip->i_no_formal_ino, |
267 | (unsigned long long)ip->i_no_addr, | 267 | (unsigned long long)ip->i_no_addr, |
268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, | 268 | IF2DT(ip->i_inode.i_mode), ip->i_flags, |
269 | (unsigned int)ip->i_diskflags, | 269 | (unsigned int)ip->i_diskflags, |
270 | (unsigned long long)ip->i_inode.i_size, | 270 | (unsigned long long)i_size_read(&ip->i_inode)); |
271 | (unsigned long long)ip->i_disksize); | ||
272 | return 0; | 271 | return 0; |
273 | } | 272 | } |
274 | 273 | ||
@@ -453,7 +452,6 @@ const struct gfs2_glock_operations *gfs2_glops_list[] = { | |||
453 | [LM_TYPE_META] = &gfs2_meta_glops, | 452 | [LM_TYPE_META] = &gfs2_meta_glops, |
454 | [LM_TYPE_INODE] = &gfs2_inode_glops, | 453 | [LM_TYPE_INODE] = &gfs2_inode_glops, |
455 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, | 454 | [LM_TYPE_RGRP] = &gfs2_rgrp_glops, |
456 | [LM_TYPE_NONDISK] = &gfs2_trans_glops, | ||
457 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, | 455 | [LM_TYPE_IOPEN] = &gfs2_iopen_glops, |
458 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, | 456 | [LM_TYPE_FLOCK] = &gfs2_flock_glops, |
459 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, | 457 | [LM_TYPE_NONDISK] = &gfs2_nondisk_glops, |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index fdbf4b366fa5..764fbb49efc8 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -196,6 +196,7 @@ enum { | |||
196 | GLF_REPLY_PENDING = 9, | 196 | GLF_REPLY_PENDING = 9, |
197 | GLF_INITIAL = 10, | 197 | GLF_INITIAL = 10, |
198 | GLF_FROZEN = 11, | 198 | GLF_FROZEN = 11, |
199 | GLF_QUEUED = 12, | ||
199 | }; | 200 | }; |
200 | 201 | ||
201 | struct gfs2_glock { | 202 | struct gfs2_glock { |
@@ -267,7 +268,6 @@ struct gfs2_inode { | |||
267 | u64 i_no_formal_ino; | 268 | u64 i_no_formal_ino; |
268 | u64 i_generation; | 269 | u64 i_generation; |
269 | u64 i_eattr; | 270 | u64 i_eattr; |
270 | loff_t i_disksize; | ||
271 | unsigned long i_flags; /* GIF_... */ | 271 | unsigned long i_flags; /* GIF_... */ |
272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ | 272 | struct gfs2_glock *i_gl; /* Move into i_gh? */ |
273 | struct gfs2_holder i_iopen_gh; | 273 | struct gfs2_holder i_iopen_gh; |
@@ -416,11 +416,8 @@ struct gfs2_args { | |||
416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 416 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 417 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
418 | unsigned int ar_spectator:1; /* Don't get a journal */ | 418 | unsigned int ar_spectator:1; /* Don't get a journal */ |
419 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ | ||
420 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ | 419 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
421 | unsigned int ar_localcaching:1; /* Local caching */ | ||
422 | unsigned int ar_debug:1; /* Oops on errors */ | 420 | unsigned int ar_debug:1; /* Oops on errors */ |
423 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ | ||
424 | unsigned int ar_posix_acl:1; /* Enable posix acls */ | 421 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
425 | unsigned int ar_quota:2; /* off/account/on */ | 422 | unsigned int ar_quota:2; /* off/account/on */ |
426 | unsigned int ar_suiddir:1; /* suiddir support */ | 423 | unsigned int ar_suiddir:1; /* suiddir support */ |
@@ -497,7 +494,7 @@ struct gfs2_sb_host { | |||
497 | */ | 494 | */ |
498 | 495 | ||
499 | struct lm_lockstruct { | 496 | struct lm_lockstruct { |
500 | unsigned int ls_jid; | 497 | int ls_jid; |
501 | unsigned int ls_first; | 498 | unsigned int ls_first; |
502 | unsigned int ls_first_done; | 499 | unsigned int ls_first_done; |
503 | unsigned int ls_nodir; | 500 | unsigned int ls_nodir; |
@@ -572,6 +569,7 @@ struct gfs2_sbd { | |||
572 | struct list_head sd_rindex_mru_list; | 569 | struct list_head sd_rindex_mru_list; |
573 | struct gfs2_rgrpd *sd_rindex_forward; | 570 | struct gfs2_rgrpd *sd_rindex_forward; |
574 | unsigned int sd_rgrps; | 571 | unsigned int sd_rgrps; |
572 | unsigned int sd_max_rg_data; | ||
575 | 573 | ||
576 | /* Journal index stuff */ | 574 | /* Journal index stuff */ |
577 | 575 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 08140f185a37..06370f8bd8cf 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -359,8 +359,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
359 | * to do that. | 359 | * to do that. |
360 | */ | 360 | */ |
361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); | 361 | ip->i_inode.i_nlink = be32_to_cpu(str->di_nlink); |
362 | ip->i_disksize = be64_to_cpu(str->di_size); | 362 | i_size_write(&ip->i_inode, be64_to_cpu(str->di_size)); |
363 | i_size_write(&ip->i_inode, ip->i_disksize); | ||
364 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 363 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
365 | atime.tv_sec = be64_to_cpu(str->di_atime); | 364 | atime.tv_sec = be64_to_cpu(str->di_atime); |
366 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 365 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
@@ -1055,7 +1054,7 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf) | |||
1055 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); | 1054 | str->di_uid = cpu_to_be32(ip->i_inode.i_uid); |
1056 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); | 1055 | str->di_gid = cpu_to_be32(ip->i_inode.i_gid); |
1057 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); | 1056 | str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink); |
1058 | str->di_size = cpu_to_be64(ip->i_disksize); | 1057 | str->di_size = cpu_to_be64(i_size_read(&ip->i_inode)); |
1059 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); | 1058 | str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode)); |
1060 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | 1059 | str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); |
1061 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); | 1060 | str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec); |
@@ -1085,8 +1084,8 @@ void gfs2_dinode_print(const struct gfs2_inode *ip) | |||
1085 | (unsigned long long)ip->i_no_formal_ino); | 1084 | (unsigned long long)ip->i_no_formal_ino); |
1086 | printk(KERN_INFO " no_addr = %llu\n", | 1085 | printk(KERN_INFO " no_addr = %llu\n", |
1087 | (unsigned long long)ip->i_no_addr); | 1086 | (unsigned long long)ip->i_no_addr); |
1088 | printk(KERN_INFO " i_disksize = %llu\n", | 1087 | printk(KERN_INFO " i_size = %llu\n", |
1089 | (unsigned long long)ip->i_disksize); | 1088 | (unsigned long long)i_size_read(&ip->i_inode)); |
1090 | printk(KERN_INFO " blocks = %llu\n", | 1089 | printk(KERN_INFO " blocks = %llu\n", |
1091 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); | 1090 | (unsigned long long)gfs2_get_inode_blocks(&ip->i_inode)); |
1092 | printk(KERN_INFO " i_goal = %llu\n", | 1091 | printk(KERN_INFO " i_goal = %llu\n", |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 300ada3f21de..6720d7d5fbc6 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
@@ -19,6 +19,8 @@ extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask); | |||
19 | extern int gfs2_internal_read(struct gfs2_inode *ip, | 19 | extern int gfs2_internal_read(struct gfs2_inode *ip, |
20 | struct file_ra_state *ra_state, | 20 | struct file_ra_state *ra_state, |
21 | char *buf, loff_t *pos, unsigned size); | 21 | char *buf, loff_t *pos, unsigned size); |
22 | extern void gfs2_page_add_databufs(struct gfs2_inode *ip, struct page *page, | ||
23 | unsigned int from, unsigned int to); | ||
22 | extern void gfs2_set_aops(struct inode *inode); | 24 | extern void gfs2_set_aops(struct inode *inode); |
23 | 25 | ||
24 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) | 26 | static inline int gfs2_is_stuffed(const struct gfs2_inode *ip) |
@@ -80,6 +82,19 @@ static inline void gfs2_inum_out(const struct gfs2_inode *ip, | |||
80 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); | 82 | dent->de_inum.no_addr = cpu_to_be64(ip->i_no_addr); |
81 | } | 83 | } |
82 | 84 | ||
85 | static inline int gfs2_check_internal_file_size(struct inode *inode, | ||
86 | u64 minsize, u64 maxsize) | ||
87 | { | ||
88 | u64 size = i_size_read(inode); | ||
89 | if (size < minsize || size > maxsize) | ||
90 | goto err; | ||
91 | if (size & ((1 << inode->i_blkbits) - 1)) | ||
92 | goto err; | ||
93 | return 0; | ||
94 | err: | ||
95 | gfs2_consist_inode(GFS2_I(inode)); | ||
96 | return -EIO; | ||
97 | } | ||
83 | 98 | ||
84 | extern void gfs2_set_iop(struct inode *inode); | 99 | extern void gfs2_set_iop(struct inode *inode); |
85 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, | 100 | extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 0e0470ed34c2..1c09425b45fd 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -42,9 +42,9 @@ static void gdlm_ast(void *arg) | |||
42 | ret |= LM_OUT_CANCELED; | 42 | ret |= LM_OUT_CANCELED; |
43 | goto out; | 43 | goto out; |
44 | case -EAGAIN: /* Try lock fails */ | 44 | case -EAGAIN: /* Try lock fails */ |
45 | case -EDEADLK: /* Deadlock detected */ | ||
45 | goto out; | 46 | goto out; |
46 | case -EINVAL: /* Invalid */ | 47 | case -ETIMEDOUT: /* Canceled due to timeout */ |
47 | case -ENOMEM: /* Out of memory */ | ||
48 | ret |= LM_OUT_ERROR; | 48 | ret |= LM_OUT_ERROR; |
49 | goto out; | 49 | goto out; |
50 | case 0: /* Success */ | 50 | case 0: /* Success */ |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index ac750bd31a6f..eb01f3575e10 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -592,22 +592,13 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
592 | lh->lh_hash = cpu_to_be32(hash); | 592 | lh->lh_hash = cpu_to_be32(hash); |
593 | 593 | ||
594 | bh->b_end_io = end_buffer_write_sync; | 594 | bh->b_end_io = end_buffer_write_sync; |
595 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) | ||
596 | goto skip_barrier; | ||
597 | get_bh(bh); | 595 | get_bh(bh); |
598 | submit_bh(WRITE_BARRIER | REQ_META, bh); | 596 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
599 | wait_on_buffer(bh); | ||
600 | if (buffer_eopnotsupp(bh)) { | ||
601 | clear_buffer_eopnotsupp(bh); | ||
602 | set_buffer_uptodate(bh); | ||
603 | fs_info(sdp, "barrier sync failed - disabling barriers\n"); | ||
604 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
605 | lock_buffer(bh); | ||
606 | skip_barrier: | ||
607 | get_bh(bh); | ||
608 | submit_bh(WRITE_SYNC | REQ_META, bh); | 597 | submit_bh(WRITE_SYNC | REQ_META, bh); |
609 | wait_on_buffer(bh); | 598 | else |
610 | } | 599 | submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); |
600 | wait_on_buffer(bh); | ||
601 | |||
611 | if (!buffer_uptodate(bh)) | 602 | if (!buffer_uptodate(bh)) |
612 | gfs2_io_error_bh(sdp, bh); | 603 | gfs2_io_error_bh(sdp, bh); |
613 | brelse(bh); | 604 | brelse(bh); |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index b1e9630eb46a..ebef7ab6e17e 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "glock.h" | 24 | #include "glock.h" |
25 | #include "quota.h" | 25 | #include "quota.h" |
26 | #include "recovery.h" | 26 | #include "recovery.h" |
27 | #include "dir.h" | ||
27 | 28 | ||
28 | static struct shrinker qd_shrinker = { | 29 | static struct shrinker qd_shrinker = { |
29 | .shrink = gfs2_shrink_qd_memory, | 30 | .shrink = gfs2_shrink_qd_memory, |
@@ -78,6 +79,9 @@ static int __init init_gfs2_fs(void) | |||
78 | { | 79 | { |
79 | int error; | 80 | int error; |
80 | 81 | ||
82 | gfs2_str2qstr(&gfs2_qdot, "."); | ||
83 | gfs2_str2qstr(&gfs2_qdotdot, ".."); | ||
84 | |||
81 | error = gfs2_sys_init(); | 85 | error = gfs2_sys_init(); |
82 | if (error) | 86 | if (error) |
83 | return error; | 87 | return error; |
@@ -140,7 +144,7 @@ static int __init init_gfs2_fs(void) | |||
140 | 144 | ||
141 | error = -ENOMEM; | 145 | error = -ENOMEM; |
142 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", | 146 | gfs_recovery_wq = alloc_workqueue("gfs_recovery", |
143 | WQ_NON_REENTRANT | WQ_RESCUER, 0); | 147 | WQ_MEM_RECLAIM | WQ_FREEZEABLE, 0); |
144 | if (!gfs_recovery_wq) | 148 | if (!gfs_recovery_wq) |
145 | goto fail_wq; | 149 | goto fail_wq; |
146 | 150 | ||
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index f3b071f921aa..939739c7b3f9 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb | |||
55 | * activity, but those code paths have their own higher-level | 55 | * activity, but those code paths have their own higher-level |
56 | * throttling. | 56 | * throttling. |
57 | */ | 57 | */ |
58 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { | 58 | if (wbc->sync_mode != WB_SYNC_NONE) { |
59 | lock_buffer(bh); | 59 | lock_buffer(bh); |
60 | } else if (!trylock_buffer(bh)) { | 60 | } else if (!trylock_buffer(bh)) { |
61 | redirty_page_for_writepage(wbc, page); | 61 | redirty_page_for_writepage(wbc, page); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 4d4b1e8ac64c..cade1acbcea9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -38,14 +38,6 @@ | |||
38 | #define DO 0 | 38 | #define DO 0 |
39 | #define UNDO 1 | 39 | #define UNDO 1 |
40 | 40 | ||
41 | static const u32 gfs2_old_fs_formats[] = { | ||
42 | 0 | ||
43 | }; | ||
44 | |||
45 | static const u32 gfs2_old_multihost_formats[] = { | ||
46 | 0 | ||
47 | }; | ||
48 | |||
49 | /** | 41 | /** |
50 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | 42 | * gfs2_tune_init - Fill a gfs2_tune structure with default values |
51 | * @gt: tune | 43 | * @gt: tune |
@@ -135,8 +127,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
135 | 127 | ||
136 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | 128 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) |
137 | { | 129 | { |
138 | unsigned int x; | ||
139 | |||
140 | if (sb->sb_magic != GFS2_MAGIC || | 130 | if (sb->sb_magic != GFS2_MAGIC || |
141 | sb->sb_type != GFS2_METATYPE_SB) { | 131 | sb->sb_type != GFS2_METATYPE_SB) { |
142 | if (!silent) | 132 | if (!silent) |
@@ -150,55 +140,9 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int sile | |||
150 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | 140 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) |
151 | return 0; | 141 | return 0; |
152 | 142 | ||
153 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | 143 | fs_warn(sdp, "Unknown on-disk format, unable to mount\n"); |
154 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
155 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
156 | break; | ||
157 | 144 | ||
158 | if (!gfs2_old_fs_formats[x]) { | 145 | return -EINVAL; |
159 | printk(KERN_WARNING | ||
160 | "GFS2: code version (%u, %u) is incompatible " | ||
161 | "with ondisk format (%u, %u)\n", | ||
162 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
163 | sb->sb_fs_format, sb->sb_multihost_format); | ||
164 | printk(KERN_WARNING | ||
165 | "GFS2: I don't know how to upgrade this FS\n"); | ||
166 | return -EINVAL; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
171 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
172 | if (gfs2_old_multihost_formats[x] == | ||
173 | sb->sb_multihost_format) | ||
174 | break; | ||
175 | |||
176 | if (!gfs2_old_multihost_formats[x]) { | ||
177 | printk(KERN_WARNING | ||
178 | "GFS2: code version (%u, %u) is incompatible " | ||
179 | "with ondisk format (%u, %u)\n", | ||
180 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
181 | sb->sb_fs_format, sb->sb_multihost_format); | ||
182 | printk(KERN_WARNING | ||
183 | "GFS2: I don't know how to upgrade this FS\n"); | ||
184 | return -EINVAL; | ||
185 | } | ||
186 | } | ||
187 | |||
188 | if (!sdp->sd_args.ar_upgrade) { | ||
189 | printk(KERN_WARNING | ||
190 | "GFS2: code version (%u, %u) is incompatible " | ||
191 | "with ondisk format (%u, %u)\n", | ||
192 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
193 | sb->sb_fs_format, sb->sb_multihost_format); | ||
194 | printk(KERN_INFO | ||
195 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
196 | "the FS\n"); | ||
197 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
198 | return -EINVAL; | ||
199 | } | ||
200 | |||
201 | return 0; | ||
202 | } | 146 | } |
203 | 147 | ||
204 | static void end_bio_io_page(struct bio *bio, int error) | 148 | static void end_bio_io_page(struct bio *bio, int error) |
@@ -586,7 +530,7 @@ static int map_journal_extents(struct gfs2_sbd *sdp) | |||
586 | 530 | ||
587 | prev_db = 0; | 531 | prev_db = 0; |
588 | 532 | ||
589 | for (lb = 0; lb < ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; lb++) { | 533 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { |
590 | bh.b_state = 0; | 534 | bh.b_state = 0; |
591 | bh.b_blocknr = 0; | 535 | bh.b_blocknr = 0; |
592 | bh.b_size = 1 << ip->i_inode.i_blkbits; | 536 | bh.b_size = 1 << ip->i_inode.i_blkbits; |
@@ -1022,7 +966,6 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
1022 | if (!strcmp("lock_nolock", proto)) { | 966 | if (!strcmp("lock_nolock", proto)) { |
1023 | lm = &nolock_ops; | 967 | lm = &nolock_ops; |
1024 | sdp->sd_args.ar_localflocks = 1; | 968 | sdp->sd_args.ar_localflocks = 1; |
1025 | sdp->sd_args.ar_localcaching = 1; | ||
1026 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM | 969 | #ifdef CONFIG_GFS2_FS_LOCKING_DLM |
1027 | } else if (!strcmp("lock_dlm", proto)) { | 970 | } else if (!strcmp("lock_dlm", proto)) { |
1028 | lm = &gfs2_dlm_ops; | 971 | lm = &gfs2_dlm_ops; |
@@ -1113,8 +1056,6 @@ static int gfs2_journalid_wait(void *word) | |||
1113 | 1056 | ||
1114 | static int wait_on_journal(struct gfs2_sbd *sdp) | 1057 | static int wait_on_journal(struct gfs2_sbd *sdp) |
1115 | { | 1058 | { |
1116 | if (sdp->sd_args.ar_spectator) | ||
1117 | return 0; | ||
1118 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 1059 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
1119 | return 0; | 1060 | return 0; |
1120 | 1061 | ||
@@ -1217,6 +1158,20 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1217 | if (error) | 1158 | if (error) |
1218 | goto fail_sb; | 1159 | goto fail_sb; |
1219 | 1160 | ||
1161 | /* | ||
1162 | * If user space has failed to join the cluster or some similar | ||
1163 | * failure has occurred, then the journal id will contain a | ||
1164 | * negative (error) number. This will then be returned to the | ||
1165 | * caller (of the mount syscall). We do this even for spectator | ||
1166 | * mounts (which just write a jid of 0 to indicate "ok" even though | ||
1167 | * the jid is unused in the spectator case) | ||
1168 | */ | ||
1169 | if (sdp->sd_lockstruct.ls_jid < 0) { | ||
1170 | error = sdp->sd_lockstruct.ls_jid; | ||
1171 | sdp->sd_lockstruct.ls_jid = 0; | ||
1172 | goto fail_sb; | ||
1173 | } | ||
1174 | |||
1220 | error = init_inodes(sdp, DO); | 1175 | error = init_inodes(sdp, DO); |
1221 | if (error) | 1176 | if (error) |
1222 | goto fail_sb; | 1177 | goto fail_sb; |
@@ -1264,7 +1219,6 @@ fail_sb: | |||
1264 | fail_locking: | 1219 | fail_locking: |
1265 | init_locking(sdp, &mount_gh, UNDO); | 1220 | init_locking(sdp, &mount_gh, UNDO); |
1266 | fail_lm: | 1221 | fail_lm: |
1267 | invalidate_inodes(sb); | ||
1268 | gfs2_gl_hash_clear(sdp); | 1222 | gfs2_gl_hash_clear(sdp); |
1269 | gfs2_lm_unmount(sdp); | 1223 | gfs2_lm_unmount(sdp); |
1270 | fail_sys: | 1224 | fail_sys: |
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 1009be2c9737..12cbea7502c2 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
@@ -18,6 +18,8 @@ | |||
18 | #include <linux/gfs2_ondisk.h> | 18 | #include <linux/gfs2_ondisk.h> |
19 | #include <linux/crc32.h> | 19 | #include <linux/crc32.h> |
20 | #include <linux/fiemap.h> | 20 | #include <linux/fiemap.h> |
21 | #include <linux/swap.h> | ||
22 | #include <linux/falloc.h> | ||
21 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
22 | 24 | ||
23 | #include "gfs2.h" | 25 | #include "gfs2.h" |
@@ -217,7 +219,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
217 | goto out_gunlock_q; | 219 | goto out_gunlock_q; |
218 | 220 | ||
219 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 221 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
220 | al->al_rgd->rd_length + | 222 | gfs2_rg_blocks(al) + |
221 | 2 * RES_DINODE + RES_STATFS + | 223 | 2 * RES_DINODE + RES_STATFS + |
222 | RES_QUOTA, 0); | 224 | RES_QUOTA, 0); |
223 | if (error) | 225 | if (error) |
@@ -253,7 +255,7 @@ out_parent: | |||
253 | gfs2_holder_uninit(ghs); | 255 | gfs2_holder_uninit(ghs); |
254 | gfs2_holder_uninit(ghs + 1); | 256 | gfs2_holder_uninit(ghs + 1); |
255 | if (!error) { | 257 | if (!error) { |
256 | atomic_inc(&inode->i_count); | 258 | ihold(inode); |
257 | d_instantiate(dentry, inode); | 259 | d_instantiate(dentry, inode); |
258 | mark_inode_dirty(inode); | 260 | mark_inode_dirty(inode); |
259 | } | 261 | } |
@@ -406,7 +408,6 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
406 | 408 | ||
407 | ip = ghs[1].gh_gl->gl_object; | 409 | ip = ghs[1].gh_gl->gl_object; |
408 | 410 | ||
409 | ip->i_disksize = size; | ||
410 | i_size_write(inode, size); | 411 | i_size_write(inode, size); |
411 | 412 | ||
412 | error = gfs2_meta_inode_buffer(ip, &dibh); | 413 | error = gfs2_meta_inode_buffer(ip, &dibh); |
@@ -461,7 +462,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
461 | ip = ghs[1].gh_gl->gl_object; | 462 | ip = ghs[1].gh_gl->gl_object; |
462 | 463 | ||
463 | ip->i_inode.i_nlink = 2; | 464 | ip->i_inode.i_nlink = 2; |
464 | ip->i_disksize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 465 | i_size_write(inode, sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)); |
465 | ip->i_diskflags |= GFS2_DIF_JDATA; | 466 | ip->i_diskflags |= GFS2_DIF_JDATA; |
466 | ip->i_entries = 2; | 467 | ip->i_entries = 2; |
467 | 468 | ||
@@ -470,18 +471,15 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
470 | if (!gfs2_assert_withdraw(sdp, !error)) { | 471 | if (!gfs2_assert_withdraw(sdp, !error)) { |
471 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; | 472 | struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; |
472 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); | 473 | struct gfs2_dirent *dent = (struct gfs2_dirent *)(di+1); |
473 | struct qstr str; | ||
474 | 474 | ||
475 | gfs2_str2qstr(&str, "."); | ||
476 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 475 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
477 | gfs2_qstr2dirent(&str, GFS2_DIRENT_SIZE(str.len), dent); | 476 | gfs2_qstr2dirent(&gfs2_qdot, GFS2_DIRENT_SIZE(gfs2_qdot.len), dent); |
478 | dent->de_inum = di->di_num; /* already GFS2 endian */ | 477 | dent->de_inum = di->di_num; /* already GFS2 endian */ |
479 | dent->de_type = cpu_to_be16(DT_DIR); | 478 | dent->de_type = cpu_to_be16(DT_DIR); |
480 | di->di_entries = cpu_to_be32(1); | 479 | di->di_entries = cpu_to_be32(1); |
481 | 480 | ||
482 | gfs2_str2qstr(&str, ".."); | ||
483 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); | 481 | dent = (struct gfs2_dirent *)((char*)dent + GFS2_DIRENT_SIZE(1)); |
484 | gfs2_qstr2dirent(&str, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); | 482 | gfs2_qstr2dirent(&gfs2_qdotdot, dibh->b_size - GFS2_DIRENT_SIZE(1) - sizeof(struct gfs2_dinode), dent); |
485 | 483 | ||
486 | gfs2_inum_out(dip, dent); | 484 | gfs2_inum_out(dip, dent); |
487 | dent->de_type = cpu_to_be16(DT_DIR); | 485 | dent->de_type = cpu_to_be16(DT_DIR); |
@@ -522,7 +520,6 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
522 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | 520 | static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, |
523 | struct gfs2_inode *ip) | 521 | struct gfs2_inode *ip) |
524 | { | 522 | { |
525 | struct qstr dotname; | ||
526 | int error; | 523 | int error; |
527 | 524 | ||
528 | if (ip->i_entries != 2) { | 525 | if (ip->i_entries != 2) { |
@@ -539,13 +536,11 @@ static int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
539 | if (error) | 536 | if (error) |
540 | return error; | 537 | return error; |
541 | 538 | ||
542 | gfs2_str2qstr(&dotname, "."); | 539 | error = gfs2_dir_del(ip, &gfs2_qdot); |
543 | error = gfs2_dir_del(ip, &dotname); | ||
544 | if (error) | 540 | if (error) |
545 | return error; | 541 | return error; |
546 | 542 | ||
547 | gfs2_str2qstr(&dotname, ".."); | 543 | error = gfs2_dir_del(ip, &gfs2_qdotdot); |
548 | error = gfs2_dir_del(ip, &dotname); | ||
549 | if (error) | 544 | if (error) |
550 | return error; | 545 | return error; |
551 | 546 | ||
@@ -694,11 +689,8 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
694 | struct inode *dir = &to->i_inode; | 689 | struct inode *dir = &to->i_inode; |
695 | struct super_block *sb = dir->i_sb; | 690 | struct super_block *sb = dir->i_sb; |
696 | struct inode *tmp; | 691 | struct inode *tmp; |
697 | struct qstr dotdot; | ||
698 | int error = 0; | 692 | int error = 0; |
699 | 693 | ||
700 | gfs2_str2qstr(&dotdot, ".."); | ||
701 | |||
702 | igrab(dir); | 694 | igrab(dir); |
703 | 695 | ||
704 | for (;;) { | 696 | for (;;) { |
@@ -711,7 +703,7 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
711 | break; | 703 | break; |
712 | } | 704 | } |
713 | 705 | ||
714 | tmp = gfs2_lookupi(dir, &dotdot, 1); | 706 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
715 | if (IS_ERR(tmp)) { | 707 | if (IS_ERR(tmp)) { |
716 | error = PTR_ERR(tmp); | 708 | error = PTR_ERR(tmp); |
717 | break; | 709 | break; |
@@ -744,7 +736,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
744 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 736 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
745 | struct gfs2_inode *nip = NULL; | 737 | struct gfs2_inode *nip = NULL; |
746 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 738 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
747 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; | 739 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }, ri_gh; |
748 | struct gfs2_rgrpd *nrgd; | 740 | struct gfs2_rgrpd *nrgd; |
749 | unsigned int num_gh; | 741 | unsigned int num_gh; |
750 | int dir_rename = 0; | 742 | int dir_rename = 0; |
@@ -758,6 +750,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
758 | return 0; | 750 | return 0; |
759 | } | 751 | } |
760 | 752 | ||
753 | error = gfs2_rindex_hold(sdp, &ri_gh); | ||
754 | if (error) | ||
755 | return error; | ||
761 | 756 | ||
762 | if (odip != ndip) { | 757 | if (odip != ndip) { |
763 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, | 758 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
@@ -887,12 +882,12 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
887 | 882 | ||
888 | al->al_requested = sdp->sd_max_dirres; | 883 | al->al_requested = sdp->sd_max_dirres; |
889 | 884 | ||
890 | error = gfs2_inplace_reserve(ndip); | 885 | error = gfs2_inplace_reserve_ri(ndip); |
891 | if (error) | 886 | if (error) |
892 | goto out_gunlock_q; | 887 | goto out_gunlock_q; |
893 | 888 | ||
894 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + | 889 | error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + |
895 | al->al_rgd->rd_length + | 890 | gfs2_rg_blocks(al) + |
896 | 4 * RES_DINODE + 4 * RES_LEAF + | 891 | 4 * RES_DINODE + 4 * RES_LEAF + |
897 | RES_STATFS + RES_QUOTA + 4, 0); | 892 | RES_STATFS + RES_QUOTA + 4, 0); |
898 | if (error) | 893 | if (error) |
@@ -920,9 +915,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
920 | } | 915 | } |
921 | 916 | ||
922 | if (dir_rename) { | 917 | if (dir_rename) { |
923 | struct qstr name; | ||
924 | gfs2_str2qstr(&name, ".."); | ||
925 | |||
926 | error = gfs2_change_nlink(ndip, +1); | 918 | error = gfs2_change_nlink(ndip, +1); |
927 | if (error) | 919 | if (error) |
928 | goto out_end_trans; | 920 | goto out_end_trans; |
@@ -930,7 +922,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
930 | if (error) | 922 | if (error) |
931 | goto out_end_trans; | 923 | goto out_end_trans; |
932 | 924 | ||
933 | error = gfs2_dir_mvino(ip, &name, ndip, DT_DIR); | 925 | error = gfs2_dir_mvino(ip, &gfs2_qdotdot, ndip, DT_DIR); |
934 | if (error) | 926 | if (error) |
935 | goto out_end_trans; | 927 | goto out_end_trans; |
936 | } else { | 928 | } else { |
@@ -972,6 +964,7 @@ out_gunlock_r: | |||
972 | if (r_gh.gh_gl) | 964 | if (r_gh.gh_gl) |
973 | gfs2_glock_dq_uninit(&r_gh); | 965 | gfs2_glock_dq_uninit(&r_gh); |
974 | out: | 966 | out: |
967 | gfs2_glock_dq_uninit(&ri_gh); | ||
975 | return error; | 968 | return error; |
976 | } | 969 | } |
977 | 970 | ||
@@ -990,7 +983,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
990 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); | 983 | struct gfs2_inode *ip = GFS2_I(dentry->d_inode); |
991 | struct gfs2_holder i_gh; | 984 | struct gfs2_holder i_gh; |
992 | struct buffer_head *dibh; | 985 | struct buffer_head *dibh; |
993 | unsigned int x; | 986 | unsigned int x, size; |
994 | char *buf; | 987 | char *buf; |
995 | int error; | 988 | int error; |
996 | 989 | ||
@@ -1002,7 +995,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1002 | return NULL; | 995 | return NULL; |
1003 | } | 996 | } |
1004 | 997 | ||
1005 | if (!ip->i_disksize) { | 998 | size = (unsigned int)i_size_read(&ip->i_inode); |
999 | if (size == 0) { | ||
1006 | gfs2_consist_inode(ip); | 1000 | gfs2_consist_inode(ip); |
1007 | buf = ERR_PTR(-EIO); | 1001 | buf = ERR_PTR(-EIO); |
1008 | goto out; | 1002 | goto out; |
@@ -1014,7 +1008,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) | |||
1014 | goto out; | 1008 | goto out; |
1015 | } | 1009 | } |
1016 | 1010 | ||
1017 | x = ip->i_disksize + 1; | 1011 | x = size + 1; |
1018 | buf = kmalloc(x, GFP_NOFS); | 1012 | buf = kmalloc(x, GFP_NOFS); |
1019 | if (!buf) | 1013 | if (!buf) |
1020 | buf = ERR_PTR(-ENOMEM); | 1014 | buf = ERR_PTR(-ENOMEM); |
@@ -1071,30 +1065,6 @@ int gfs2_permission(struct inode *inode, int mask) | |||
1071 | return error; | 1065 | return error; |
1072 | } | 1066 | } |
1073 | 1067 | ||
1074 | /* | ||
1075 | * XXX(truncate): the truncate_setsize calls should be moved to the end. | ||
1076 | */ | ||
1077 | static int setattr_size(struct inode *inode, struct iattr *attr) | ||
1078 | { | ||
1079 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1080 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1081 | int error; | ||
1082 | |||
1083 | if (attr->ia_size != ip->i_disksize) { | ||
1084 | error = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); | ||
1085 | if (error) | ||
1086 | return error; | ||
1087 | truncate_setsize(inode, attr->ia_size); | ||
1088 | gfs2_trans_end(sdp); | ||
1089 | } | ||
1090 | |||
1091 | error = gfs2_truncatei(ip, attr->ia_size); | ||
1092 | if (error && (inode->i_size != ip->i_disksize)) | ||
1093 | i_size_write(inode, ip->i_disksize); | ||
1094 | |||
1095 | return error; | ||
1096 | } | ||
1097 | |||
1098 | static int setattr_chown(struct inode *inode, struct iattr *attr) | 1068 | static int setattr_chown(struct inode *inode, struct iattr *attr) |
1099 | { | 1069 | { |
1100 | struct gfs2_inode *ip = GFS2_I(inode); | 1070 | struct gfs2_inode *ip = GFS2_I(inode); |
@@ -1195,7 +1165,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
1195 | goto out; | 1165 | goto out; |
1196 | 1166 | ||
1197 | if (attr->ia_valid & ATTR_SIZE) | 1167 | if (attr->ia_valid & ATTR_SIZE) |
1198 | error = setattr_size(inode, attr); | 1168 | error = gfs2_setattr_size(inode, attr->ia_size); |
1199 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) | 1169 | else if (attr->ia_valid & (ATTR_UID | ATTR_GID)) |
1200 | error = setattr_chown(inode, attr); | 1170 | error = setattr_chown(inode, attr); |
1201 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) | 1171 | else if ((attr->ia_valid & ATTR_MODE) && IS_POSIXACL(inode)) |
@@ -1301,6 +1271,257 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name) | |||
1301 | return ret; | 1271 | return ret; |
1302 | } | 1272 | } |
1303 | 1273 | ||
1274 | static void empty_write_end(struct page *page, unsigned from, | ||
1275 | unsigned to) | ||
1276 | { | ||
1277 | struct gfs2_inode *ip = GFS2_I(page->mapping->host); | ||
1278 | |||
1279 | page_zero_new_buffers(page, from, to); | ||
1280 | flush_dcache_page(page); | ||
1281 | mark_page_accessed(page); | ||
1282 | |||
1283 | if (!gfs2_is_writeback(ip)) | ||
1284 | gfs2_page_add_databufs(ip, page, from, to); | ||
1285 | |||
1286 | block_commit_write(page, from, to); | ||
1287 | } | ||
1288 | |||
1289 | |||
1290 | static int write_empty_blocks(struct page *page, unsigned from, unsigned to) | ||
1291 | { | ||
1292 | unsigned start, end, next; | ||
1293 | struct buffer_head *bh, *head; | ||
1294 | int error; | ||
1295 | |||
1296 | if (!page_has_buffers(page)) { | ||
1297 | error = __block_write_begin(page, from, to - from, gfs2_block_map); | ||
1298 | if (unlikely(error)) | ||
1299 | return error; | ||
1300 | |||
1301 | empty_write_end(page, from, to); | ||
1302 | return 0; | ||
1303 | } | ||
1304 | |||
1305 | bh = head = page_buffers(page); | ||
1306 | next = end = 0; | ||
1307 | while (next < from) { | ||
1308 | next += bh->b_size; | ||
1309 | bh = bh->b_this_page; | ||
1310 | } | ||
1311 | start = next; | ||
1312 | do { | ||
1313 | next += bh->b_size; | ||
1314 | if (buffer_mapped(bh)) { | ||
1315 | if (end) { | ||
1316 | error = __block_write_begin(page, start, end - start, | ||
1317 | gfs2_block_map); | ||
1318 | if (unlikely(error)) | ||
1319 | return error; | ||
1320 | empty_write_end(page, start, end); | ||
1321 | end = 0; | ||
1322 | } | ||
1323 | start = next; | ||
1324 | } | ||
1325 | else | ||
1326 | end = next; | ||
1327 | bh = bh->b_this_page; | ||
1328 | } while (next < to); | ||
1329 | |||
1330 | if (end) { | ||
1331 | error = __block_write_begin(page, start, end - start, gfs2_block_map); | ||
1332 | if (unlikely(error)) | ||
1333 | return error; | ||
1334 | empty_write_end(page, start, end); | ||
1335 | } | ||
1336 | |||
1337 | return 0; | ||
1338 | } | ||
1339 | |||
1340 | static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, | ||
1341 | int mode) | ||
1342 | { | ||
1343 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1344 | struct buffer_head *dibh; | ||
1345 | int error; | ||
1346 | u64 start = offset >> PAGE_CACHE_SHIFT; | ||
1347 | unsigned int start_offset = offset & ~PAGE_CACHE_MASK; | ||
1348 | u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT; | ||
1349 | pgoff_t curr; | ||
1350 | struct page *page; | ||
1351 | unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK; | ||
1352 | unsigned int from, to; | ||
1353 | |||
1354 | if (!end_offset) | ||
1355 | end_offset = PAGE_CACHE_SIZE; | ||
1356 | |||
1357 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
1358 | if (unlikely(error)) | ||
1359 | goto out; | ||
1360 | |||
1361 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
1362 | |||
1363 | if (gfs2_is_stuffed(ip)) { | ||
1364 | error = gfs2_unstuff_dinode(ip, NULL); | ||
1365 | if (unlikely(error)) | ||
1366 | goto out; | ||
1367 | } | ||
1368 | |||
1369 | curr = start; | ||
1370 | offset = start << PAGE_CACHE_SHIFT; | ||
1371 | from = start_offset; | ||
1372 | to = PAGE_CACHE_SIZE; | ||
1373 | while (curr <= end) { | ||
1374 | page = grab_cache_page_write_begin(inode->i_mapping, curr, | ||
1375 | AOP_FLAG_NOFS); | ||
1376 | if (unlikely(!page)) { | ||
1377 | error = -ENOMEM; | ||
1378 | goto out; | ||
1379 | } | ||
1380 | |||
1381 | if (curr == end) | ||
1382 | to = end_offset; | ||
1383 | error = write_empty_blocks(page, from, to); | ||
1384 | if (!error && offset + to > inode->i_size && | ||
1385 | !(mode & FALLOC_FL_KEEP_SIZE)) { | ||
1386 | i_size_write(inode, offset + to); | ||
1387 | } | ||
1388 | unlock_page(page); | ||
1389 | page_cache_release(page); | ||
1390 | if (error) | ||
1391 | goto out; | ||
1392 | curr++; | ||
1393 | offset += PAGE_CACHE_SIZE; | ||
1394 | from = 0; | ||
1395 | } | ||
1396 | |||
1397 | gfs2_dinode_out(ip, dibh->b_data); | ||
1398 | mark_inode_dirty(inode); | ||
1399 | |||
1400 | brelse(dibh); | ||
1401 | |||
1402 | out: | ||
1403 | return error; | ||
1404 | } | ||
1405 | |||
1406 | static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len, | ||
1407 | unsigned int *data_blocks, unsigned int *ind_blocks) | ||
1408 | { | ||
1409 | const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
1410 | unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone; | ||
1411 | unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1); | ||
1412 | |||
1413 | for (tmp = max_data; tmp > sdp->sd_diptrs;) { | ||
1414 | tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs); | ||
1415 | max_data -= tmp; | ||
1416 | } | ||
1417 | /* This calculation isn't the exact reverse of gfs2_write_calc_reserve, | ||
1418 | so it might end up with fewer data blocks */ | ||
1419 | if (max_data <= *data_blocks) | ||
1420 | return; | ||
1421 | *data_blocks = max_data; | ||
1422 | *ind_blocks = max_blocks - max_data; | ||
1423 | *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift; | ||
1424 | if (*len > max) { | ||
1425 | *len = max; | ||
1426 | gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks); | ||
1427 | } | ||
1428 | } | ||
1429 | |||
1430 | static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset, | ||
1431 | loff_t len) | ||
1432 | { | ||
1433 | struct gfs2_sbd *sdp = GFS2_SB(inode); | ||
1434 | struct gfs2_inode *ip = GFS2_I(inode); | ||
1435 | unsigned int data_blocks = 0, ind_blocks = 0, rblocks; | ||
1436 | loff_t bytes, max_bytes; | ||
1437 | struct gfs2_alloc *al; | ||
1438 | int error; | ||
1439 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | ||
1440 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | ||
1441 | |||
1442 | offset = (offset >> sdp->sd_sb.sb_bsize_shift) << | ||
1443 | sdp->sd_sb.sb_bsize_shift; | ||
1444 | |||
1445 | len = next - offset; | ||
1446 | bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2; | ||
1447 | if (!bytes) | ||
1448 | bytes = UINT_MAX; | ||
1449 | |||
1450 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | ||
1451 | error = gfs2_glock_nq(&ip->i_gh); | ||
1452 | if (unlikely(error)) | ||
1453 | goto out_uninit; | ||
1454 | |||
1455 | if (!gfs2_write_alloc_required(ip, offset, len)) | ||
1456 | goto out_unlock; | ||
1457 | |||
1458 | while (len > 0) { | ||
1459 | if (len < bytes) | ||
1460 | bytes = len; | ||
1461 | al = gfs2_alloc_get(ip); | ||
1462 | if (!al) { | ||
1463 | error = -ENOMEM; | ||
1464 | goto out_unlock; | ||
1465 | } | ||
1466 | |||
1467 | error = gfs2_quota_lock_check(ip); | ||
1468 | if (error) | ||
1469 | goto out_alloc_put; | ||
1470 | |||
1471 | retry: | ||
1472 | gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks); | ||
1473 | |||
1474 | al->al_requested = data_blocks + ind_blocks; | ||
1475 | error = gfs2_inplace_reserve(ip); | ||
1476 | if (error) { | ||
1477 | if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) { | ||
1478 | bytes >>= 1; | ||
1479 | goto retry; | ||
1480 | } | ||
1481 | goto out_qunlock; | ||
1482 | } | ||
1483 | max_bytes = bytes; | ||
1484 | calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks); | ||
1485 | al->al_requested = data_blocks + ind_blocks; | ||
1486 | |||
1487 | rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + | ||
1488 | RES_RG_HDR + gfs2_rg_blocks(al); | ||
1489 | if (gfs2_is_jdata(ip)) | ||
1490 | rblocks += data_blocks ? data_blocks : 1; | ||
1491 | |||
1492 | error = gfs2_trans_begin(sdp, rblocks, | ||
1493 | PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); | ||
1494 | if (error) | ||
1495 | goto out_trans_fail; | ||
1496 | |||
1497 | error = fallocate_chunk(inode, offset, max_bytes, mode); | ||
1498 | gfs2_trans_end(sdp); | ||
1499 | |||
1500 | if (error) | ||
1501 | goto out_trans_fail; | ||
1502 | |||
1503 | len -= max_bytes; | ||
1504 | offset += max_bytes; | ||
1505 | gfs2_inplace_release(ip); | ||
1506 | gfs2_quota_unlock(ip); | ||
1507 | gfs2_alloc_put(ip); | ||
1508 | } | ||
1509 | goto out_unlock; | ||
1510 | |||
1511 | out_trans_fail: | ||
1512 | gfs2_inplace_release(ip); | ||
1513 | out_qunlock: | ||
1514 | gfs2_quota_unlock(ip); | ||
1515 | out_alloc_put: | ||
1516 | gfs2_alloc_put(ip); | ||
1517 | out_unlock: | ||
1518 | gfs2_glock_dq(&ip->i_gh); | ||
1519 | out_uninit: | ||
1520 | gfs2_holder_uninit(&ip->i_gh); | ||
1521 | return error; | ||
1522 | } | ||
1523 | |||
1524 | |||
1304 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 1525 | static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
1305 | u64 start, u64 len) | 1526 | u64 start, u64 len) |
1306 | { | 1527 | { |
@@ -1351,6 +1572,7 @@ const struct inode_operations gfs2_file_iops = { | |||
1351 | .getxattr = gfs2_getxattr, | 1572 | .getxattr = gfs2_getxattr, |
1352 | .listxattr = gfs2_listxattr, | 1573 | .listxattr = gfs2_listxattr, |
1353 | .removexattr = gfs2_removexattr, | 1574 | .removexattr = gfs2_removexattr, |
1575 | .fallocate = gfs2_fallocate, | ||
1354 | .fiemap = gfs2_fiemap, | 1576 | .fiemap = gfs2_fiemap, |
1355 | }; | 1577 | }; |
1356 | 1578 | ||
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 1bc6b5695e6d..58a9b9998b42 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -735,10 +735,8 @@ get_a_page: | |||
735 | goto out; | 735 | goto out; |
736 | 736 | ||
737 | size = loc + sizeof(struct gfs2_quota); | 737 | size = loc + sizeof(struct gfs2_quota); |
738 | if (size > inode->i_size) { | 738 | if (size > inode->i_size) |
739 | ip->i_disksize = size; | ||
740 | i_size_write(inode, size); | 739 | i_size_write(inode, size); |
741 | } | ||
742 | inode->i_mtime = inode->i_atime = CURRENT_TIME; | 740 | inode->i_mtime = inode->i_atime = CURRENT_TIME; |
743 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | 741 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); |
744 | gfs2_dinode_out(ip, dibh->b_data); | 742 | gfs2_dinode_out(ip, dibh->b_data); |
@@ -817,7 +815,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) | |||
817 | goto out_alloc; | 815 | goto out_alloc; |
818 | 816 | ||
819 | if (nalloc) | 817 | if (nalloc) |
820 | blocks += al->al_rgd->rd_length + nalloc * ind_blocks + RES_STATFS; | 818 | blocks += gfs2_rg_blocks(al) + nalloc * ind_blocks + RES_STATFS; |
821 | 819 | ||
822 | error = gfs2_trans_begin(sdp, blocks, 0); | 820 | error = gfs2_trans_begin(sdp, blocks, 0); |
823 | if (error) | 821 | if (error) |
@@ -1190,18 +1188,17 @@ static void gfs2_quota_change_in(struct gfs2_quota_change_host *qc, const void * | |||
1190 | int gfs2_quota_init(struct gfs2_sbd *sdp) | 1188 | int gfs2_quota_init(struct gfs2_sbd *sdp) |
1191 | { | 1189 | { |
1192 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); | 1190 | struct gfs2_inode *ip = GFS2_I(sdp->sd_qc_inode); |
1193 | unsigned int blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | 1191 | u64 size = i_size_read(sdp->sd_qc_inode); |
1192 | unsigned int blocks = size >> sdp->sd_sb.sb_bsize_shift; | ||
1194 | unsigned int x, slot = 0; | 1193 | unsigned int x, slot = 0; |
1195 | unsigned int found = 0; | 1194 | unsigned int found = 0; |
1196 | u64 dblock; | 1195 | u64 dblock; |
1197 | u32 extlen = 0; | 1196 | u32 extlen = 0; |
1198 | int error; | 1197 | int error; |
1199 | 1198 | ||
1200 | if (!ip->i_disksize || ip->i_disksize > (64 << 20) || | 1199 | if (gfs2_check_internal_file_size(sdp->sd_qc_inode, 1, 64 << 20)) |
1201 | ip->i_disksize & (sdp->sd_sb.sb_bsize - 1)) { | ||
1202 | gfs2_consist_inode(ip); | ||
1203 | return -EIO; | 1200 | return -EIO; |
1204 | } | 1201 | |
1205 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; | 1202 | sdp->sd_quota_slots = blocks * sdp->sd_qc_per_block; |
1206 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); | 1203 | sdp->sd_quota_chunks = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * PAGE_SIZE); |
1207 | 1204 | ||
@@ -1589,6 +1586,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, | |||
1589 | error = gfs2_inplace_reserve(ip); | 1586 | error = gfs2_inplace_reserve(ip); |
1590 | if (error) | 1587 | if (error) |
1591 | goto out_alloc; | 1588 | goto out_alloc; |
1589 | blocks += gfs2_rg_blocks(al); | ||
1592 | } | 1590 | } |
1593 | 1591 | ||
1594 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); | 1592 | error = gfs2_trans_begin(sdp, blocks + RES_DINODE + 1, 0); |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index f7f89a94a5a4..f2a02edcac8f 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -455,11 +455,13 @@ void gfs2_recover_func(struct work_struct *work) | |||
455 | int ro = 0; | 455 | int ro = 0; |
456 | unsigned int pass; | 456 | unsigned int pass; |
457 | int error; | 457 | int error; |
458 | int jlocked = 0; | ||
458 | 459 | ||
459 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 460 | if (sdp->sd_args.ar_spectator || |
461 | (jd->jd_jid != sdp->sd_lockstruct.ls_jid)) { | ||
460 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", | 462 | fs_info(sdp, "jid=%u: Trying to acquire journal lock...\n", |
461 | jd->jd_jid); | 463 | jd->jd_jid); |
462 | 464 | jlocked = 1; | |
463 | /* Acquire the journal lock so we can do recovery */ | 465 | /* Acquire the journal lock so we can do recovery */ |
464 | 466 | ||
465 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, | 467 | error = gfs2_glock_nq_num(sdp, jd->jd_jid, &gfs2_journal_glops, |
@@ -554,13 +556,12 @@ void gfs2_recover_func(struct work_struct *work) | |||
554 | jd->jd_jid, t); | 556 | jd->jd_jid, t); |
555 | } | 557 | } |
556 | 558 | ||
557 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | ||
558 | gfs2_glock_dq_uninit(&ji_gh); | ||
559 | |||
560 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); | 559 | gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_SUCCESS); |
561 | 560 | ||
562 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) | 561 | if (jlocked) { |
562 | gfs2_glock_dq_uninit(&ji_gh); | ||
563 | gfs2_glock_dq_uninit(&j_gh); | 563 | gfs2_glock_dq_uninit(&j_gh); |
564 | } | ||
564 | 565 | ||
565 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); | 566 | fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); |
566 | goto done; | 567 | goto done; |
@@ -568,7 +569,7 @@ void gfs2_recover_func(struct work_struct *work) | |||
568 | fail_gunlock_tr: | 569 | fail_gunlock_tr: |
569 | gfs2_glock_dq_uninit(&t_gh); | 570 | gfs2_glock_dq_uninit(&t_gh); |
570 | fail_gunlock_ji: | 571 | fail_gunlock_ji: |
571 | if (jd->jd_jid != sdp->sd_lockstruct.ls_jid) { | 572 | if (jlocked) { |
572 | gfs2_glock_dq_uninit(&ji_gh); | 573 | gfs2_glock_dq_uninit(&ji_gh); |
573 | fail_gunlock_j: | 574 | fail_gunlock_j: |
574 | gfs2_glock_dq_uninit(&j_gh); | 575 | gfs2_glock_dq_uninit(&j_gh); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 171a744f8e45..bef3ab6cf5c1 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -500,7 +500,7 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp) | |||
500 | for (rgrps = 0;; rgrps++) { | 500 | for (rgrps = 0;; rgrps++) { |
501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); | 501 | loff_t pos = rgrps * sizeof(struct gfs2_rindex); |
502 | 502 | ||
503 | if (pos + sizeof(struct gfs2_rindex) >= ip->i_disksize) | 503 | if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) |
504 | break; | 504 | break; |
505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, | 505 | error = gfs2_internal_read(ip, &ra_state, buf, &pos, |
506 | sizeof(struct gfs2_rindex)); | 506 | sizeof(struct gfs2_rindex)); |
@@ -588,7 +588,9 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 588 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
589 | struct inode *inode = &ip->i_inode; | 589 | struct inode *inode = &ip->i_inode; |
590 | struct file_ra_state ra_state; | 590 | struct file_ra_state ra_state; |
591 | u64 rgrp_count = ip->i_disksize; | 591 | u64 rgrp_count = i_size_read(inode); |
592 | struct gfs2_rgrpd *rgd; | ||
593 | unsigned int max_data = 0; | ||
592 | int error; | 594 | int error; |
593 | 595 | ||
594 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); | 596 | do_div(rgrp_count, sizeof(struct gfs2_rindex)); |
@@ -603,6 +605,10 @@ static int gfs2_ri_update(struct gfs2_inode *ip) | |||
603 | } | 605 | } |
604 | } | 606 | } |
605 | 607 | ||
608 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
609 | if (rgd->rd_data > max_data) | ||
610 | max_data = rgd->rd_data; | ||
611 | sdp->sd_max_rg_data = max_data; | ||
606 | sdp->sd_rindex_uptodate = 1; | 612 | sdp->sd_rindex_uptodate = 1; |
607 | return 0; | 613 | return 0; |
608 | } | 614 | } |
@@ -622,13 +628,15 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
622 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 628 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
623 | struct inode *inode = &ip->i_inode; | 629 | struct inode *inode = &ip->i_inode; |
624 | struct file_ra_state ra_state; | 630 | struct file_ra_state ra_state; |
631 | struct gfs2_rgrpd *rgd; | ||
632 | unsigned int max_data = 0; | ||
625 | int error; | 633 | int error; |
626 | 634 | ||
627 | file_ra_state_init(&ra_state, inode->i_mapping); | 635 | file_ra_state_init(&ra_state, inode->i_mapping); |
628 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { | 636 | for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { |
629 | /* Ignore partials */ | 637 | /* Ignore partials */ |
630 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > | 638 | if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > |
631 | ip->i_disksize) | 639 | i_size_read(inode)) |
632 | break; | 640 | break; |
633 | error = read_rindex_entry(ip, &ra_state); | 641 | error = read_rindex_entry(ip, &ra_state); |
634 | if (error) { | 642 | if (error) { |
@@ -636,6 +644,10 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip) | |||
636 | return error; | 644 | return error; |
637 | } | 645 | } |
638 | } | 646 | } |
647 | list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) | ||
648 | if (rgd->rd_data > max_data) | ||
649 | max_data = rgd->rd_data; | ||
650 | sdp->sd_max_rg_data = max_data; | ||
639 | 651 | ||
640 | sdp->sd_rindex_uptodate = 1; | 652 | sdp->sd_rindex_uptodate = 1; |
641 | return 0; | 653 | return 0; |
@@ -854,8 +866,7 @@ static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, | |||
854 | if ((start + nr_sects) != blk) { | 866 | if ((start + nr_sects) != blk) { |
855 | rv = blkdev_issue_discard(bdev, start, | 867 | rv = blkdev_issue_discard(bdev, start, |
856 | nr_sects, GFP_NOFS, | 868 | nr_sects, GFP_NOFS, |
857 | BLKDEV_IFL_WAIT | | 869 | 0); |
858 | BLKDEV_IFL_BARRIER); | ||
859 | if (rv) | 870 | if (rv) |
860 | goto fail; | 871 | goto fail; |
861 | nr_sects = 0; | 872 | nr_sects = 0; |
@@ -869,8 +880,7 @@ start_new_extent: | |||
869 | } | 880 | } |
870 | } | 881 | } |
871 | if (nr_sects) { | 882 | if (nr_sects) { |
872 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, | 883 | rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); |
873 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
874 | if (rv) | 884 | if (rv) |
875 | goto fail; | 885 | goto fail; |
876 | } | 886 | } |
@@ -1188,7 +1198,8 @@ out: | |||
1188 | * Returns: errno | 1198 | * Returns: errno |
1189 | */ | 1199 | */ |
1190 | 1200 | ||
1191 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | 1201 | int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
1202 | char *file, unsigned int line) | ||
1192 | { | 1203 | { |
1193 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1204 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1194 | struct gfs2_alloc *al = ip->i_alloc; | 1205 | struct gfs2_alloc *al = ip->i_alloc; |
@@ -1199,12 +1210,15 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, unsigned int line) | |||
1199 | return -EINVAL; | 1210 | return -EINVAL; |
1200 | 1211 | ||
1201 | try_again: | 1212 | try_again: |
1202 | /* We need to hold the rindex unless the inode we're using is | 1213 | if (hold_rindex) { |
1203 | the rindex itself, in which case it's already held. */ | 1214 | /* We need to hold the rindex unless the inode we're using is |
1204 | if (ip != GFS2_I(sdp->sd_rindex)) | 1215 | the rindex itself, in which case it's already held. */ |
1205 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); | 1216 | if (ip != GFS2_I(sdp->sd_rindex)) |
1206 | else if (!sdp->sd_rgrps) /* We may not have the rindex read in, so: */ | 1217 | error = gfs2_rindex_hold(sdp, &al->al_ri_gh); |
1207 | error = gfs2_ri_update_special(ip); | 1218 | else if (!sdp->sd_rgrps) /* We may not have the rindex read |
1219 | in, so: */ | ||
1220 | error = gfs2_ri_update_special(ip); | ||
1221 | } | ||
1208 | 1222 | ||
1209 | if (error) | 1223 | if (error) |
1210 | return error; | 1224 | return error; |
@@ -1215,7 +1229,7 @@ try_again: | |||
1215 | try to free it, and try the allocation again. */ | 1229 | try to free it, and try the allocation again. */ |
1216 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); | 1230 | error = get_local_rgrp(ip, &unlinked, &last_unlinked); |
1217 | if (error) { | 1231 | if (error) { |
1218 | if (ip != GFS2_I(sdp->sd_rindex)) | 1232 | if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) |
1219 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1233 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1220 | if (error != -EAGAIN) | 1234 | if (error != -EAGAIN) |
1221 | return error; | 1235 | return error; |
@@ -1257,7 +1271,7 @@ void gfs2_inplace_release(struct gfs2_inode *ip) | |||
1257 | al->al_rgd = NULL; | 1271 | al->al_rgd = NULL; |
1258 | if (al->al_rgd_gh.gh_gl) | 1272 | if (al->al_rgd_gh.gh_gl) |
1259 | gfs2_glock_dq_uninit(&al->al_rgd_gh); | 1273 | gfs2_glock_dq_uninit(&al->al_rgd_gh); |
1260 | if (ip != GFS2_I(sdp->sd_rindex)) | 1274 | if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) |
1261 | gfs2_glock_dq_uninit(&al->al_ri_gh); | 1275 | gfs2_glock_dq_uninit(&al->al_ri_gh); |
1262 | } | 1276 | } |
1263 | 1277 | ||
@@ -1496,11 +1510,19 @@ int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) | |||
1496 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 1510 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
1497 | struct buffer_head *dibh; | 1511 | struct buffer_head *dibh; |
1498 | struct gfs2_alloc *al = ip->i_alloc; | 1512 | struct gfs2_alloc *al = ip->i_alloc; |
1499 | struct gfs2_rgrpd *rgd = al->al_rgd; | 1513 | struct gfs2_rgrpd *rgd; |
1500 | u32 goal, blk; | 1514 | u32 goal, blk; |
1501 | u64 block; | 1515 | u64 block; |
1502 | int error; | 1516 | int error; |
1503 | 1517 | ||
1518 | /* Only happens if there is a bug in gfs2, return something distinctive | ||
1519 | * to ensure that it is noticed. | ||
1520 | */ | ||
1521 | if (al == NULL) | ||
1522 | return -ECANCELED; | ||
1523 | |||
1524 | rgd = al->al_rgd; | ||
1525 | |||
1504 | if (rgrp_contains_block(rgd, ip->i_goal)) | 1526 | if (rgrp_contains_block(rgd, ip->i_goal)) |
1505 | goal = ip->i_goal - rgd->rd_data0; | 1527 | goal = ip->i_goal - rgd->rd_data0; |
1506 | else | 1528 | else |
diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index f07119d89557..0e35c0466f9a 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h | |||
@@ -39,10 +39,12 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) | |||
39 | ip->i_alloc = NULL; | 39 | ip->i_alloc = NULL; |
40 | } | 40 | } |
41 | 41 | ||
42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, | 42 | extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, |
43 | unsigned int line); | 43 | char *file, unsigned int line); |
44 | #define gfs2_inplace_reserve(ip) \ | 44 | #define gfs2_inplace_reserve(ip) \ |
45 | gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) | 45 | gfs2_inplace_reserve_i((ip), 1, __FILE__, __LINE__) |
46 | #define gfs2_inplace_reserve_ri(ip) \ | ||
47 | gfs2_inplace_reserve_i((ip), 0, __FILE__, __LINE__) | ||
46 | 48 | ||
47 | extern void gfs2_inplace_release(struct gfs2_inode *ip); | 49 | extern void gfs2_inplace_release(struct gfs2_inode *ip); |
48 | 50 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 77cb9f830ee4..2b2c4997430b 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -85,6 +85,7 @@ static const match_table_t tokens = { | |||
85 | {Opt_locktable, "locktable=%s"}, | 85 | {Opt_locktable, "locktable=%s"}, |
86 | {Opt_hostdata, "hostdata=%s"}, | 86 | {Opt_hostdata, "hostdata=%s"}, |
87 | {Opt_spectator, "spectator"}, | 87 | {Opt_spectator, "spectator"}, |
88 | {Opt_spectator, "norecovery"}, | ||
88 | {Opt_ignore_local_fs, "ignore_local_fs"}, | 89 | {Opt_ignore_local_fs, "ignore_local_fs"}, |
89 | {Opt_localflocks, "localflocks"}, | 90 | {Opt_localflocks, "localflocks"}, |
90 | {Opt_localcaching, "localcaching"}, | 91 | {Opt_localcaching, "localcaching"}, |
@@ -159,13 +160,13 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
159 | args->ar_spectator = 1; | 160 | args->ar_spectator = 1; |
160 | break; | 161 | break; |
161 | case Opt_ignore_local_fs: | 162 | case Opt_ignore_local_fs: |
162 | args->ar_ignore_local_fs = 1; | 163 | /* Retained for backwards compat only */ |
163 | break; | 164 | break; |
164 | case Opt_localflocks: | 165 | case Opt_localflocks: |
165 | args->ar_localflocks = 1; | 166 | args->ar_localflocks = 1; |
166 | break; | 167 | break; |
167 | case Opt_localcaching: | 168 | case Opt_localcaching: |
168 | args->ar_localcaching = 1; | 169 | /* Retained for backwards compat only */ |
169 | break; | 170 | break; |
170 | case Opt_debug: | 171 | case Opt_debug: |
171 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 172 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
@@ -179,7 +180,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
179 | args->ar_debug = 0; | 180 | args->ar_debug = 0; |
180 | break; | 181 | break; |
181 | case Opt_upgrade: | 182 | case Opt_upgrade: |
182 | args->ar_upgrade = 1; | 183 | /* Retained for backwards compat only */ |
183 | break; | 184 | break; |
184 | case Opt_acl: | 185 | case Opt_acl: |
185 | args->ar_posix_acl = 1; | 186 | args->ar_posix_acl = 1; |
@@ -342,15 +343,14 @@ int gfs2_jdesc_check(struct gfs2_jdesc *jd) | |||
342 | { | 343 | { |
343 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | 344 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); |
344 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 345 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
346 | u64 size = i_size_read(jd->jd_inode); | ||
345 | 347 | ||
346 | if (ip->i_disksize < (8 << 20) || ip->i_disksize > (1 << 30) || | 348 | if (gfs2_check_internal_file_size(jd->jd_inode, 8 << 20, 1 << 30)) |
347 | (ip->i_disksize & (sdp->sd_sb.sb_bsize - 1))) { | ||
348 | gfs2_consist_inode(ip); | ||
349 | return -EIO; | 349 | return -EIO; |
350 | } | ||
351 | jd->jd_blocks = ip->i_disksize >> sdp->sd_sb.sb_bsize_shift; | ||
352 | 350 | ||
353 | if (gfs2_write_alloc_required(ip, 0, ip->i_disksize)) { | 351 | jd->jd_blocks = size >> sdp->sd_sb.sb_bsize_shift; |
352 | |||
353 | if (gfs2_write_alloc_required(ip, 0, size)) { | ||
354 | gfs2_consist_inode(ip); | 354 | gfs2_consist_inode(ip); |
355 | return -EIO; | 355 | return -EIO; |
356 | } | 356 | } |
@@ -857,7 +857,6 @@ restart: | |||
857 | gfs2_clear_rgrpd(sdp); | 857 | gfs2_clear_rgrpd(sdp); |
858 | gfs2_jindex_free(sdp); | 858 | gfs2_jindex_free(sdp); |
859 | /* Take apart glock structures and buffer lists */ | 859 | /* Take apart glock structures and buffer lists */ |
860 | invalidate_inodes(sdp->sd_vfs); | ||
861 | gfs2_gl_hash_clear(sdp); | 860 | gfs2_gl_hash_clear(sdp); |
862 | /* Unmount the locking protocol */ | 861 | /* Unmount the locking protocol */ |
863 | gfs2_lm_unmount(sdp); | 862 | gfs2_lm_unmount(sdp); |
@@ -1129,9 +1128,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1129 | 1128 | ||
1130 | /* Some flags must not be changed */ | 1129 | /* Some flags must not be changed */ |
1131 | if (args_neq(&args, &sdp->sd_args, spectator) || | 1130 | if (args_neq(&args, &sdp->sd_args, spectator) || |
1132 | args_neq(&args, &sdp->sd_args, ignore_local_fs) || | ||
1133 | args_neq(&args, &sdp->sd_args, localflocks) || | 1131 | args_neq(&args, &sdp->sd_args, localflocks) || |
1134 | args_neq(&args, &sdp->sd_args, localcaching) || | ||
1135 | args_neq(&args, &sdp->sd_args, meta)) | 1132 | args_neq(&args, &sdp->sd_args, meta)) |
1136 | return -EINVAL; | 1133 | return -EINVAL; |
1137 | 1134 | ||
@@ -1234,16 +1231,10 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1234 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); | 1231 | seq_printf(s, ",hostdata=%s", args->ar_hostdata); |
1235 | if (args->ar_spectator) | 1232 | if (args->ar_spectator) |
1236 | seq_printf(s, ",spectator"); | 1233 | seq_printf(s, ",spectator"); |
1237 | if (args->ar_ignore_local_fs) | ||
1238 | seq_printf(s, ",ignore_local_fs"); | ||
1239 | if (args->ar_localflocks) | 1234 | if (args->ar_localflocks) |
1240 | seq_printf(s, ",localflocks"); | 1235 | seq_printf(s, ",localflocks"); |
1241 | if (args->ar_localcaching) | ||
1242 | seq_printf(s, ",localcaching"); | ||
1243 | if (args->ar_debug) | 1236 | if (args->ar_debug) |
1244 | seq_printf(s, ",debug"); | 1237 | seq_printf(s, ",debug"); |
1245 | if (args->ar_upgrade) | ||
1246 | seq_printf(s, ",upgrade"); | ||
1247 | if (args->ar_posix_acl) | 1238 | if (args->ar_posix_acl) |
1248 | seq_printf(s, ",acl"); | 1239 | seq_printf(s, ",acl"); |
1249 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { | 1240 | if (args->ar_quota != GFS2_QUOTA_DEFAULT) { |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index ccacffd2faaa..748ccb557c18 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -230,7 +230,10 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len | |||
230 | 230 | ||
231 | if (gltype > LM_TYPE_JOURNAL) | 231 | if (gltype > LM_TYPE_JOURNAL) |
232 | return -EINVAL; | 232 | return -EINVAL; |
233 | glops = gfs2_glops_list[gltype]; | 233 | if (gltype == LM_TYPE_NONDISK && glnum == GFS2_TRANS_LOCK) |
234 | glops = &gfs2_trans_glops; | ||
235 | else | ||
236 | glops = gfs2_glops_list[gltype]; | ||
234 | if (glops == NULL) | 237 | if (glops == NULL) |
235 | return -EINVAL; | 238 | return -EINVAL; |
236 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) | 239 | if (!test_and_set_bit(SDF_DEMOTE, &sdp->sd_flags)) |
@@ -399,31 +402,32 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) | |||
399 | 402 | ||
400 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) | 403 | static ssize_t jid_show(struct gfs2_sbd *sdp, char *buf) |
401 | { | 404 | { |
402 | return sprintf(buf, "%u\n", sdp->sd_lockstruct.ls_jid); | 405 | return sprintf(buf, "%d\n", sdp->sd_lockstruct.ls_jid); |
403 | } | 406 | } |
404 | 407 | ||
405 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | 408 | static ssize_t jid_store(struct gfs2_sbd *sdp, const char *buf, size_t len) |
406 | { | 409 | { |
407 | unsigned jid; | 410 | int jid; |
408 | int rv; | 411 | int rv; |
409 | 412 | ||
410 | rv = sscanf(buf, "%u", &jid); | 413 | rv = sscanf(buf, "%d", &jid); |
411 | if (rv != 1) | 414 | if (rv != 1) |
412 | return -EINVAL; | 415 | return -EINVAL; |
413 | 416 | ||
414 | spin_lock(&sdp->sd_jindex_spin); | 417 | spin_lock(&sdp->sd_jindex_spin); |
415 | rv = -EINVAL; | 418 | rv = -EINVAL; |
416 | if (sdp->sd_args.ar_spectator) | ||
417 | goto out; | ||
418 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) | 419 | if (sdp->sd_lockstruct.ls_ops->lm_mount == NULL) |
419 | goto out; | 420 | goto out; |
420 | rv = -EBUSY; | 421 | rv = -EBUSY; |
421 | if (test_and_clear_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) | 422 | if (test_bit(SDF_NOJOURNALID, &sdp->sd_flags) == 0) |
422 | goto out; | 423 | goto out; |
424 | rv = 0; | ||
425 | if (sdp->sd_args.ar_spectator && jid > 0) | ||
426 | rv = jid = -EINVAL; | ||
423 | sdp->sd_lockstruct.ls_jid = jid; | 427 | sdp->sd_lockstruct.ls_jid = jid; |
428 | clear_bit(SDF_NOJOURNALID, &sdp->sd_flags); | ||
424 | smp_mb__after_clear_bit(); | 429 | smp_mb__after_clear_bit(); |
425 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); | 430 | wake_up_bit(&sdp->sd_flags, SDF_NOJOURNALID); |
426 | rv = 0; | ||
427 | out: | 431 | out: |
428 | spin_unlock(&sdp->sd_jindex_spin); | 432 | spin_unlock(&sdp->sd_jindex_spin); |
429 | return rv ? rv : len; | 433 | return rv ? rv : len; |
@@ -617,7 +621,7 @@ static int gfs2_uevent(struct kset *kset, struct kobject *kobj, | |||
617 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); | 621 | add_uevent_var(env, "LOCKTABLE=%s", sdp->sd_table_name); |
618 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); | 622 | add_uevent_var(env, "LOCKPROTO=%s", sdp->sd_proto_name); |
619 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) | 623 | if (!test_bit(SDF_NOJOURNALID, &sdp->sd_flags)) |
620 | add_uevent_var(env, "JOURNALID=%u", sdp->sd_lockstruct.ls_jid); | 624 | add_uevent_var(env, "JOURNALID=%d", sdp->sd_lockstruct.ls_jid); |
621 | if (gfs2_uuid_valid(uuid)) | 625 | if (gfs2_uuid_valid(uuid)) |
622 | add_uevent_var(env, "UUID=%pUB", uuid); | 626 | add_uevent_var(env, "UUID=%pUB", uuid); |
623 | return 0; | 627 | return 0; |
diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 148d55c14171..cedb0bb96d96 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h | |||
@@ -39,7 +39,8 @@ | |||
39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ | 39 | {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ |
40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ | 40 | {(1UL << GLF_REPLY_PENDING), "r" }, \ |
41 | {(1UL << GLF_INITIAL), "I" }, \ | 41 | {(1UL << GLF_INITIAL), "I" }, \ |
42 | {(1UL << GLF_FROZEN), "F" }) | 42 | {(1UL << GLF_FROZEN), "F" }, \ |
43 | {(1UL << GLF_QUEUED), "q" }) | ||
43 | 44 | ||
44 | #ifndef NUMPTY | 45 | #ifndef NUMPTY |
45 | #define NUMPTY | 46 | #define NUMPTY |
diff --git a/fs/gfs2/trans.h b/fs/gfs2/trans.h index edf9d4bd908e..fb56b783e028 100644 --- a/fs/gfs2/trans.h +++ b/fs/gfs2/trans.h | |||
@@ -20,11 +20,20 @@ struct gfs2_glock; | |||
20 | #define RES_JDATA 1 | 20 | #define RES_JDATA 1 |
21 | #define RES_DATA 1 | 21 | #define RES_DATA 1 |
22 | #define RES_LEAF 1 | 22 | #define RES_LEAF 1 |
23 | #define RES_RG_HDR 1 | ||
23 | #define RES_RG_BIT 2 | 24 | #define RES_RG_BIT 2 |
24 | #define RES_EATTR 1 | 25 | #define RES_EATTR 1 |
25 | #define RES_STATFS 1 | 26 | #define RES_STATFS 1 |
26 | #define RES_QUOTA 2 | 27 | #define RES_QUOTA 2 |
27 | 28 | ||
29 | /* reserve either the number of blocks to be allocated plus the rg header | ||
30 | * block, or all of the blocks in the rg, whichever is smaller */ | ||
31 | static inline unsigned int gfs2_rg_blocks(const struct gfs2_alloc *al) | ||
32 | { | ||
33 | return (al->al_requested < al->al_rgd->rd_length)? | ||
34 | al->al_requested + 1 : al->al_rgd->rd_length; | ||
35 | } | ||
36 | |||
28 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | 37 | int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, |
29 | unsigned int revokes); | 38 | unsigned int revokes); |
30 | 39 | ||
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 776af6eb4bcb..30b58f07c8a6 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c | |||
@@ -734,7 +734,7 @@ static int ea_alloc_skeleton(struct gfs2_inode *ip, struct gfs2_ea_request *er, | |||
734 | goto out_gunlock_q; | 734 | goto out_gunlock_q; |
735 | 735 | ||
736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), | 736 | error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), |
737 | blks + al->al_rgd->rd_length + | 737 | blks + gfs2_rg_blocks(al) + |
738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); | 738 | RES_DINODE + RES_STATFS + RES_QUOTA, 0); |
739 | if (error) | 739 | if (error) |
740 | goto out_ipres; | 740 | goto out_ipres; |
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index 4129cdb3f0d8..571abe97b42a 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c | |||
@@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
@@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
37 | } | 37 | } |
38 | 38 | ||
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c index 38a0a9917d7f..3ebc437736fe 100644 --- a/fs/hfs/btree.c +++ b/fs/hfs/btree.c | |||
@@ -27,7 +27,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke | |||
27 | if (!tree) | 27 | if (!tree) |
28 | return NULL; | 28 | return NULL; |
29 | 29 | ||
30 | init_MUTEX(&tree->tree_lock); | 30 | mutex_init(&tree->tree_lock); |
31 | spin_lock_init(&tree->hash_lock); | 31 | spin_lock_init(&tree->hash_lock); |
32 | /* Set the correct compare function */ | 32 | /* Set the correct compare function */ |
33 | tree->sb = sb; | 33 | tree->sb = sb; |
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h index cc51905ac21d..2a1d712f85dc 100644 --- a/fs/hfs/btree.h +++ b/fs/hfs/btree.h | |||
@@ -33,7 +33,7 @@ struct hfs_btree { | |||
33 | unsigned int depth; | 33 | unsigned int depth; |
34 | 34 | ||
35 | //unsigned int map1_size, map_size; | 35 | //unsigned int map1_size, map_size; |
36 | struct semaphore tree_lock; | 36 | struct mutex tree_lock; |
37 | 37 | ||
38 | unsigned int pages_per_bnode; | 38 | unsigned int pages_per_bnode; |
39 | spinlock_t hash_lock; | 39 | spinlock_t hash_lock; |
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h index 4f55651aaa51..c8cffb81e849 100644 --- a/fs/hfs/hfs_fs.h +++ b/fs/hfs/hfs_fs.h | |||
@@ -147,8 +147,6 @@ struct hfs_sb_info { | |||
147 | u16 blockoffset; | 147 | u16 blockoffset; |
148 | 148 | ||
149 | int fs_div; | 149 | int fs_div; |
150 | |||
151 | struct hlist_head rsrc_inodes; | ||
152 | }; | 150 | }; |
153 | 151 | ||
154 | #define HFS_FLG_BITMAP_DIRTY 0 | 152 | #define HFS_FLG_BITMAP_DIRTY 0 |
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb) | |||
254 | sb->s_dirt = 1; | 252 | sb->s_dirt = 1; |
255 | } | 253 | } |
256 | 254 | ||
257 | static inline void hfs_buffer_sync(struct buffer_head *bh) | ||
258 | { | ||
259 | while (buffer_locked(bh)) { | ||
260 | wait_on_buffer(bh); | ||
261 | } | ||
262 | if (buffer_dirty(bh)) { | ||
263 | ll_rw_block(WRITE, 1, &bh); | ||
264 | wait_on_buffer(bh); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | #define sb_bread512(sb, sec, data) ({ \ | 255 | #define sb_bread512(sb, sec, data) ({ \ |
269 | struct buffer_head *__bh; \ | 256 | struct buffer_head *__bh; \ |
270 | sector_t __block; \ | 257 | sector_t __block; \ |
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 397b7adc7ce6..dffb4e996643 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry, | |||
524 | HFS_I(inode)->rsrc_inode = dir; | 524 | HFS_I(inode)->rsrc_inode = dir; |
525 | HFS_I(dir)->rsrc_inode = inode; | 525 | HFS_I(dir)->rsrc_inode = inode; |
526 | igrab(dir); | 526 | igrab(dir); |
527 | hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes); | 527 | hlist_add_fake(&inode->i_hash); |
528 | mark_inode_dirty(inode); | 528 | mark_inode_dirty(inode); |
529 | out: | 529 | out: |
530 | d_add(dentry, inode); | 530 | d_add(dentry, inode); |
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c index 86428f5ac991..1563d5ce5764 100644 --- a/fs/hfs/mdb.c +++ b/fs/hfs/mdb.c | |||
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb) | |||
220 | mdb->drLsMod = hfs_mtime(); | 220 | mdb->drLsMod = hfs_mtime(); |
221 | 221 | ||
222 | mark_buffer_dirty(HFS_SB(sb)->mdb_bh); | 222 | mark_buffer_dirty(HFS_SB(sb)->mdb_bh); |
223 | hfs_buffer_sync(HFS_SB(sb)->mdb_bh); | 223 | sync_dirty_buffer(HFS_SB(sb)->mdb_bh); |
224 | } | 224 | } |
225 | 225 | ||
226 | return 0; | 226 | return 0; |
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb) | |||
287 | HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); | 287 | HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT); |
288 | HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); | 288 | HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT); |
289 | mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); | 289 | mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh); |
290 | hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh); | 290 | sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh); |
291 | } | 291 | } |
292 | 292 | ||
293 | if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) { | 293 | if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) { |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 34235d4bf08b..6ee1586f2334 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/parser.h> | 20 | #include <linux/parser.h> |
21 | #include <linux/seq_file.h> | 21 | #include <linux/seq_file.h> |
22 | #include <linux/slab.h> | 22 | #include <linux/slab.h> |
23 | #include <linux/smp_lock.h> | ||
24 | #include <linux/vfs.h> | 23 | #include <linux/vfs.h> |
25 | 24 | ||
26 | #include "hfs_fs.h" | 25 | #include "hfs_fs.h" |
@@ -79,15 +78,11 @@ static int hfs_sync_fs(struct super_block *sb, int wait) | |||
79 | */ | 78 | */ |
80 | static void hfs_put_super(struct super_block *sb) | 79 | static void hfs_put_super(struct super_block *sb) |
81 | { | 80 | { |
82 | lock_kernel(); | ||
83 | |||
84 | if (sb->s_dirt) | 81 | if (sb->s_dirt) |
85 | hfs_write_super(sb); | 82 | hfs_write_super(sb); |
86 | hfs_mdb_close(sb); | 83 | hfs_mdb_close(sb); |
87 | /* release the MDB's resources */ | 84 | /* release the MDB's resources */ |
88 | hfs_mdb_put(sb); | 85 | hfs_mdb_put(sb); |
89 | |||
90 | unlock_kernel(); | ||
91 | } | 86 | } |
92 | 87 | ||
93 | /* | 88 | /* |
@@ -385,8 +380,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent) | |||
385 | sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); | 380 | sbi = kzalloc(sizeof(struct hfs_sb_info), GFP_KERNEL); |
386 | if (!sbi) | 381 | if (!sbi) |
387 | return -ENOMEM; | 382 | return -ENOMEM; |
383 | |||
388 | sb->s_fs_info = sbi; | 384 | sb->s_fs_info = sbi; |
389 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | ||
390 | 385 | ||
391 | res = -EINVAL; | 386 | res = -EINVAL; |
392 | if (!parse_options((char *)data, sbi)) { | 387 | if (!parse_options((char *)data, sbi)) { |
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c index 5007a41f1be9..d182438c7ae4 100644 --- a/fs/hfsplus/bfind.c +++ b/fs/hfsplus/bfind.c | |||
@@ -23,7 +23,7 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) | |||
23 | fd->search_key = ptr; | 23 | fd->search_key = ptr; |
24 | fd->key = ptr + tree->max_key_len + 2; | 24 | fd->key = ptr + tree->max_key_len + 2; |
25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); | 25 | dprint(DBG_BNODE_REFS, "find_init: %d (%p)\n", tree->cnid, __builtin_return_address(0)); |
26 | down(&tree->tree_lock); | 26 | mutex_lock(&tree->tree_lock); |
27 | return 0; | 27 | return 0; |
28 | } | 28 | } |
29 | 29 | ||
@@ -32,7 +32,7 @@ void hfs_find_exit(struct hfs_find_data *fd) | |||
32 | hfs_bnode_put(fd->bnode); | 32 | hfs_bnode_put(fd->bnode); |
33 | kfree(fd->search_key); | 33 | kfree(fd->search_key); |
34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); | 34 | dprint(DBG_BNODE_REFS, "find_exit: %d (%p)\n", fd->tree->cnid, __builtin_return_address(0)); |
35 | up(&fd->tree->tree_lock); | 35 | mutex_unlock(&fd->tree->tree_lock); |
36 | fd->tree = NULL; | 36 | fd->tree = NULL; |
37 | } | 37 | } |
38 | 38 | ||
@@ -52,6 +52,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
52 | rec = (e + b) / 2; | 52 | rec = (e + b) / 2; |
53 | len = hfs_brec_lenoff(bnode, rec, &off); | 53 | len = hfs_brec_lenoff(bnode, rec, &off); |
54 | keylen = hfs_brec_keylen(bnode, rec); | 54 | keylen = hfs_brec_keylen(bnode, rec); |
55 | if (keylen == 0) { | ||
56 | res = -EINVAL; | ||
57 | goto fail; | ||
58 | } | ||
55 | hfs_bnode_read(bnode, fd->key, off, keylen); | 59 | hfs_bnode_read(bnode, fd->key, off, keylen); |
56 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); | 60 | cmpval = bnode->tree->keycmp(fd->key, fd->search_key); |
57 | if (!cmpval) { | 61 | if (!cmpval) { |
@@ -67,6 +71,10 @@ int __hfs_brec_find(struct hfs_bnode *bnode, struct hfs_find_data *fd) | |||
67 | if (rec != e && e >= 0) { | 71 | if (rec != e && e >= 0) { |
68 | len = hfs_brec_lenoff(bnode, e, &off); | 72 | len = hfs_brec_lenoff(bnode, e, &off); |
69 | keylen = hfs_brec_keylen(bnode, e); | 73 | keylen = hfs_brec_keylen(bnode, e); |
74 | if (keylen == 0) { | ||
75 | res = -EINVAL; | ||
76 | goto fail; | ||
77 | } | ||
70 | hfs_bnode_read(bnode, fd->key, off, keylen); | 78 | hfs_bnode_read(bnode, fd->key, off, keylen); |
71 | } | 79 | } |
72 | done: | 80 | done: |
@@ -75,6 +83,7 @@ done: | |||
75 | fd->keylength = keylen; | 83 | fd->keylength = keylen; |
76 | fd->entryoffset = off + keylen; | 84 | fd->entryoffset = off + keylen; |
77 | fd->entrylength = len - keylen; | 85 | fd->entrylength = len - keylen; |
86 | fail: | ||
78 | return res; | 87 | return res; |
79 | } | 88 | } |
80 | 89 | ||
@@ -198,6 +207,10 @@ int hfs_brec_goto(struct hfs_find_data *fd, int cnt) | |||
198 | 207 | ||
199 | len = hfs_brec_lenoff(bnode, fd->record, &off); | 208 | len = hfs_brec_lenoff(bnode, fd->record, &off); |
200 | keylen = hfs_brec_keylen(bnode, fd->record); | 209 | keylen = hfs_brec_keylen(bnode, fd->record); |
210 | if (keylen == 0) { | ||
211 | res = -EINVAL; | ||
212 | goto out; | ||
213 | } | ||
201 | fd->keyoffset = off; | 214 | fd->keyoffset = off; |
202 | fd->keylength = keylen; | 215 | fd->keylength = keylen; |
203 | fd->entryoffset = off + keylen; | 216 | fd->entryoffset = off + keylen; |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index ea30afc2a03c..ad57f5991eb1 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
@@ -17,6 +17,7 @@ | |||
17 | 17 | ||
18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) | 18 | int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *max) |
19 | { | 19 | { |
20 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
20 | struct page *page; | 21 | struct page *page; |
21 | struct address_space *mapping; | 22 | struct address_space *mapping; |
22 | __be32 *pptr, *curr, *end; | 23 | __be32 *pptr, *curr, *end; |
@@ -29,8 +30,8 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
29 | return size; | 30 | return size; |
30 | 31 | ||
31 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); | 32 | dprint(DBG_BITMAP, "block_allocate: %u,%u,%u\n", size, offset, len); |
32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 33 | mutex_lock(&sbi->alloc_mutex); |
33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 34 | mapping = sbi->alloc_file->i_mapping; |
34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 35 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
35 | if (IS_ERR(page)) { | 36 | if (IS_ERR(page)) { |
36 | start = size; | 37 | start = size; |
@@ -150,16 +151,17 @@ done: | |||
150 | set_page_dirty(page); | 151 | set_page_dirty(page); |
151 | kunmap(page); | 152 | kunmap(page); |
152 | *max = offset + (curr - pptr) * 32 + i - start; | 153 | *max = offset + (curr - pptr) * 32 + i - start; |
153 | HFSPLUS_SB(sb).free_blocks -= *max; | 154 | sbi->free_blocks -= *max; |
154 | sb->s_dirt = 1; | 155 | sb->s_dirt = 1; |
155 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); | 156 | dprint(DBG_BITMAP, "-> %u,%u\n", start, *max); |
156 | out: | 157 | out: |
157 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 158 | mutex_unlock(&sbi->alloc_mutex); |
158 | return start; | 159 | return start; |
159 | } | 160 | } |
160 | 161 | ||
161 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | 162 | int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) |
162 | { | 163 | { |
164 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
163 | struct page *page; | 165 | struct page *page; |
164 | struct address_space *mapping; | 166 | struct address_space *mapping; |
165 | __be32 *pptr, *curr, *end; | 167 | __be32 *pptr, *curr, *end; |
@@ -172,11 +174,11 @@ int hfsplus_block_free(struct super_block *sb, u32 offset, u32 count) | |||
172 | 174 | ||
173 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); | 175 | dprint(DBG_BITMAP, "block_free: %u,%u\n", offset, count); |
174 | /* are all of the bits in range? */ | 176 | /* are all of the bits in range? */ |
175 | if ((offset + count) > HFSPLUS_SB(sb).total_blocks) | 177 | if ((offset + count) > sbi->total_blocks) |
176 | return -2; | 178 | return -2; |
177 | 179 | ||
178 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 180 | mutex_lock(&sbi->alloc_mutex); |
179 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 181 | mapping = sbi->alloc_file->i_mapping; |
180 | pnr = offset / PAGE_CACHE_BITS; | 182 | pnr = offset / PAGE_CACHE_BITS; |
181 | page = read_mapping_page(mapping, pnr, NULL); | 183 | page = read_mapping_page(mapping, pnr, NULL); |
182 | pptr = kmap(page); | 184 | pptr = kmap(page); |
@@ -224,9 +226,9 @@ done: | |||
224 | out: | 226 | out: |
225 | set_page_dirty(page); | 227 | set_page_dirty(page); |
226 | kunmap(page); | 228 | kunmap(page); |
227 | HFSPLUS_SB(sb).free_blocks += len; | 229 | sbi->free_blocks += len; |
228 | sb->s_dirt = 1; | 230 | sb->s_dirt = 1; |
229 | mutex_unlock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 231 | mutex_unlock(&sbi->alloc_mutex); |
230 | 232 | ||
231 | return 0; | 233 | return 0; |
232 | } | 234 | } |
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c index c88e5d72a402..2f39d05443e1 100644 --- a/fs/hfsplus/brec.c +++ b/fs/hfsplus/brec.c | |||
@@ -42,10 +42,13 @@ u16 hfs_brec_keylen(struct hfs_bnode *node, u16 rec) | |||
42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); | 42 | recoff = hfs_bnode_read_u16(node, node->tree->node_size - (rec + 1) * 2); |
43 | if (!recoff) | 43 | if (!recoff) |
44 | return 0; | 44 | return 0; |
45 | if (node->tree->attributes & HFS_TREE_BIGKEYS) | 45 | |
46 | retval = hfs_bnode_read_u16(node, recoff) + 2; | 46 | retval = hfs_bnode_read_u16(node, recoff) + 2; |
47 | else | 47 | if (retval > node->tree->max_key_len + 2) { |
48 | retval = (hfs_bnode_read_u8(node, recoff) | 1) + 1; | 48 | printk(KERN_ERR "hfs: keylen %d too large\n", |
49 | retval); | ||
50 | retval = 0; | ||
51 | } | ||
49 | } | 52 | } |
50 | return retval; | 53 | return retval; |
51 | } | 54 | } |
@@ -216,7 +219,7 @@ skip: | |||
216 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | 219 | static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) |
217 | { | 220 | { |
218 | struct hfs_btree *tree; | 221 | struct hfs_btree *tree; |
219 | struct hfs_bnode *node, *new_node; | 222 | struct hfs_bnode *node, *new_node, *next_node; |
220 | struct hfs_bnode_desc node_desc; | 223 | struct hfs_bnode_desc node_desc; |
221 | int num_recs, new_rec_off, new_off, old_rec_off; | 224 | int num_recs, new_rec_off, new_off, old_rec_off; |
222 | int data_start, data_end, size; | 225 | int data_start, data_end, size; |
@@ -235,6 +238,17 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
235 | new_node->type = node->type; | 238 | new_node->type = node->type; |
236 | new_node->height = node->height; | 239 | new_node->height = node->height; |
237 | 240 | ||
241 | if (node->next) | ||
242 | next_node = hfs_bnode_find(tree, node->next); | ||
243 | else | ||
244 | next_node = NULL; | ||
245 | |||
246 | if (IS_ERR(next_node)) { | ||
247 | hfs_bnode_put(node); | ||
248 | hfs_bnode_put(new_node); | ||
249 | return next_node; | ||
250 | } | ||
251 | |||
238 | size = tree->node_size / 2 - node->num_recs * 2 - 14; | 252 | size = tree->node_size / 2 - node->num_recs * 2 - 14; |
239 | old_rec_off = tree->node_size - 4; | 253 | old_rec_off = tree->node_size - 4; |
240 | num_recs = 1; | 254 | num_recs = 1; |
@@ -248,6 +262,8 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
248 | /* panic? */ | 262 | /* panic? */ |
249 | hfs_bnode_put(node); | 263 | hfs_bnode_put(node); |
250 | hfs_bnode_put(new_node); | 264 | hfs_bnode_put(new_node); |
265 | if (next_node) | ||
266 | hfs_bnode_put(next_node); | ||
251 | return ERR_PTR(-ENOSPC); | 267 | return ERR_PTR(-ENOSPC); |
252 | } | 268 | } |
253 | 269 | ||
@@ -302,8 +318,7 @@ static struct hfs_bnode *hfs_bnode_split(struct hfs_find_data *fd) | |||
302 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); | 318 | hfs_bnode_write(node, &node_desc, 0, sizeof(node_desc)); |
303 | 319 | ||
304 | /* update next bnode header */ | 320 | /* update next bnode header */ |
305 | if (new_node->next) { | 321 | if (next_node) { |
306 | struct hfs_bnode *next_node = hfs_bnode_find(tree, new_node->next); | ||
307 | next_node->prev = new_node->this; | 322 | next_node->prev = new_node->this; |
308 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); | 323 | hfs_bnode_read(next_node, &node_desc, 0, sizeof(node_desc)); |
309 | node_desc.prev = cpu_to_be32(next_node->prev); | 324 | node_desc.prev = cpu_to_be32(next_node->prev); |
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c index e49fcee1e293..22e4d4e32999 100644 --- a/fs/hfsplus/btree.c +++ b/fs/hfsplus/btree.c | |||
@@ -30,7 +30,7 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
30 | if (!tree) | 30 | if (!tree) |
31 | return NULL; | 31 | return NULL; |
32 | 32 | ||
33 | init_MUTEX(&tree->tree_lock); | 33 | mutex_init(&tree->tree_lock); |
34 | spin_lock_init(&tree->hash_lock); | 34 | spin_lock_init(&tree->hash_lock); |
35 | tree->sb = sb; | 35 | tree->sb = sb; |
36 | tree->cnid = id; | 36 | tree->cnid = id; |
@@ -39,10 +39,16 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
39 | goto free_tree; | 39 | goto free_tree; |
40 | tree->inode = inode; | 40 | tree->inode = inode; |
41 | 41 | ||
42 | if (!HFSPLUS_I(tree->inode)->first_blocks) { | ||
43 | printk(KERN_ERR | ||
44 | "hfs: invalid btree extent records (0 size).\n"); | ||
45 | goto free_inode; | ||
46 | } | ||
47 | |||
42 | mapping = tree->inode->i_mapping; | 48 | mapping = tree->inode->i_mapping; |
43 | page = read_mapping_page(mapping, 0, NULL); | 49 | page = read_mapping_page(mapping, 0, NULL); |
44 | if (IS_ERR(page)) | 50 | if (IS_ERR(page)) |
45 | goto free_tree; | 51 | goto free_inode; |
46 | 52 | ||
47 | /* Load the header */ | 53 | /* Load the header */ |
48 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); | 54 | head = (struct hfs_btree_header_rec *)(kmap(page) + sizeof(struct hfs_bnode_desc)); |
@@ -57,27 +63,56 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
57 | tree->max_key_len = be16_to_cpu(head->max_key_len); | 63 | tree->max_key_len = be16_to_cpu(head->max_key_len); |
58 | tree->depth = be16_to_cpu(head->depth); | 64 | tree->depth = be16_to_cpu(head->depth); |
59 | 65 | ||
60 | /* Set the correct compare function */ | 66 | /* Verify the tree and set the correct compare function */ |
61 | if (id == HFSPLUS_EXT_CNID) { | 67 | switch (id) { |
68 | case HFSPLUS_EXT_CNID: | ||
69 | if (tree->max_key_len != HFSPLUS_EXT_KEYLEN - sizeof(u16)) { | ||
70 | printk(KERN_ERR "hfs: invalid extent max_key_len %d\n", | ||
71 | tree->max_key_len); | ||
72 | goto fail_page; | ||
73 | } | ||
74 | if (tree->attributes & HFS_TREE_VARIDXKEYS) { | ||
75 | printk(KERN_ERR "hfs: invalid extent btree flag\n"); | ||
76 | goto fail_page; | ||
77 | } | ||
78 | |||
62 | tree->keycmp = hfsplus_ext_cmp_key; | 79 | tree->keycmp = hfsplus_ext_cmp_key; |
63 | } else if (id == HFSPLUS_CAT_CNID) { | 80 | break; |
64 | if ((HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX) && | 81 | case HFSPLUS_CAT_CNID: |
82 | if (tree->max_key_len != HFSPLUS_CAT_KEYLEN - sizeof(u16)) { | ||
83 | printk(KERN_ERR "hfs: invalid catalog max_key_len %d\n", | ||
84 | tree->max_key_len); | ||
85 | goto fail_page; | ||
86 | } | ||
87 | if (!(tree->attributes & HFS_TREE_VARIDXKEYS)) { | ||
88 | printk(KERN_ERR "hfs: invalid catalog btree flag\n"); | ||
89 | goto fail_page; | ||
90 | } | ||
91 | |||
92 | if (test_bit(HFSPLUS_SB_HFSX, &HFSPLUS_SB(sb)->flags) && | ||
65 | (head->key_type == HFSPLUS_KEY_BINARY)) | 93 | (head->key_type == HFSPLUS_KEY_BINARY)) |
66 | tree->keycmp = hfsplus_cat_bin_cmp_key; | 94 | tree->keycmp = hfsplus_cat_bin_cmp_key; |
67 | else { | 95 | else { |
68 | tree->keycmp = hfsplus_cat_case_cmp_key; | 96 | tree->keycmp = hfsplus_cat_case_cmp_key; |
69 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_CASEFOLD; | 97 | set_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
70 | } | 98 | } |
71 | } else { | 99 | break; |
100 | default: | ||
72 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); | 101 | printk(KERN_ERR "hfs: unknown B*Tree requested\n"); |
73 | goto fail_page; | 102 | goto fail_page; |
74 | } | 103 | } |
75 | 104 | ||
105 | if (!(tree->attributes & HFS_TREE_BIGKEYS)) { | ||
106 | printk(KERN_ERR "hfs: invalid btree flag\n"); | ||
107 | goto fail_page; | ||
108 | } | ||
109 | |||
76 | size = tree->node_size; | 110 | size = tree->node_size; |
77 | if (!is_power_of_2(size)) | 111 | if (!is_power_of_2(size)) |
78 | goto fail_page; | 112 | goto fail_page; |
79 | if (!tree->node_count) | 113 | if (!tree->node_count) |
80 | goto fail_page; | 114 | goto fail_page; |
115 | |||
81 | tree->node_size_shift = ffs(size) - 1; | 116 | tree->node_size_shift = ffs(size) - 1; |
82 | 117 | ||
83 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 118 | tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
@@ -87,10 +122,11 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id) | |||
87 | return tree; | 122 | return tree; |
88 | 123 | ||
89 | fail_page: | 124 | fail_page: |
90 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
91 | page_cache_release(page); | 125 | page_cache_release(page); |
92 | free_tree: | 126 | free_inode: |
127 | tree->inode->i_mapping->a_ops = &hfsplus_aops; | ||
93 | iput(tree->inode); | 128 | iput(tree->inode); |
129 | free_tree: | ||
94 | kfree(tree); | 130 | kfree(tree); |
95 | return NULL; | 131 | return NULL; |
96 | } | 132 | } |
@@ -192,17 +228,18 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree) | |||
192 | 228 | ||
193 | while (!tree->free_nodes) { | 229 | while (!tree->free_nodes) { |
194 | struct inode *inode = tree->inode; | 230 | struct inode *inode = tree->inode; |
231 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
195 | u32 count; | 232 | u32 count; |
196 | int res; | 233 | int res; |
197 | 234 | ||
198 | res = hfsplus_file_extend(inode); | 235 | res = hfsplus_file_extend(inode); |
199 | if (res) | 236 | if (res) |
200 | return ERR_PTR(res); | 237 | return ERR_PTR(res); |
201 | HFSPLUS_I(inode).phys_size = inode->i_size = | 238 | hip->phys_size = inode->i_size = |
202 | (loff_t)HFSPLUS_I(inode).alloc_blocks << | 239 | (loff_t)hip->alloc_blocks << |
203 | HFSPLUS_SB(tree->sb).alloc_blksz_shift; | 240 | HFSPLUS_SB(tree->sb)->alloc_blksz_shift; |
204 | HFSPLUS_I(inode).fs_blocks = HFSPLUS_I(inode).alloc_blocks << | 241 | hip->fs_blocks = |
205 | HFSPLUS_SB(tree->sb).fs_shift; | 242 | hip->alloc_blocks << HFSPLUS_SB(tree->sb)->fs_shift; |
206 | inode_set_bytes(inode, inode->i_size); | 243 | inode_set_bytes(inode, inode->i_size); |
207 | count = inode->i_size >> tree->node_size_shift; | 244 | count = inode->i_size >> tree->node_size_shift; |
208 | tree->free_nodes = count - tree->node_count; | 245 | tree->free_nodes = count - tree->node_count; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index f6874acb2cf2..8af45fc5b051 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
@@ -67,7 +67,7 @@ static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, | |||
67 | key->key_len = cpu_to_be16(6 + ustrlen); | 67 | key->key_len = cpu_to_be16(6 + ustrlen); |
68 | } | 68 | } |
69 | 69 | ||
70 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | 70 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms) |
71 | { | 71 | { |
72 | if (inode->i_flags & S_IMMUTABLE) | 72 | if (inode->i_flags & S_IMMUTABLE) |
73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | 73 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; |
@@ -77,15 +77,24 @@ static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | |||
77 | perms->rootflags |= HFSPLUS_FLG_APPEND; | 77 | perms->rootflags |= HFSPLUS_FLG_APPEND; |
78 | else | 78 | else |
79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | 79 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; |
80 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 80 | |
81 | HFSPLUS_I(inode).userflags = perms->userflags; | 81 | perms->userflags = HFSPLUS_I(inode)->userflags; |
82 | perms->mode = cpu_to_be16(inode->i_mode); | 82 | perms->mode = cpu_to_be16(inode->i_mode); |
83 | perms->owner = cpu_to_be32(inode->i_uid); | 83 | perms->owner = cpu_to_be32(inode->i_uid); |
84 | perms->group = cpu_to_be32(inode->i_gid); | 84 | perms->group = cpu_to_be32(inode->i_gid); |
85 | |||
86 | if (S_ISREG(inode->i_mode)) | ||
87 | perms->dev = cpu_to_be32(inode->i_nlink); | ||
88 | else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) | ||
89 | perms->dev = cpu_to_be32(inode->i_rdev); | ||
90 | else | ||
91 | perms->dev = 0; | ||
85 | } | 92 | } |
86 | 93 | ||
87 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) | 94 | static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct inode *inode) |
88 | { | 95 | { |
96 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); | ||
97 | |||
89 | if (S_ISDIR(inode->i_mode)) { | 98 | if (S_ISDIR(inode->i_mode)) { |
90 | struct hfsplus_cat_folder *folder; | 99 | struct hfsplus_cat_folder *folder; |
91 | 100 | ||
@@ -93,13 +102,13 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
93 | memset(folder, 0, sizeof(*folder)); | 102 | memset(folder, 0, sizeof(*folder)); |
94 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); | 103 | folder->type = cpu_to_be16(HFSPLUS_FOLDER); |
95 | folder->id = cpu_to_be32(inode->i_ino); | 104 | folder->id = cpu_to_be32(inode->i_ino); |
96 | HFSPLUS_I(inode).create_date = | 105 | HFSPLUS_I(inode)->create_date = |
97 | folder->create_date = | 106 | folder->create_date = |
98 | folder->content_mod_date = | 107 | folder->content_mod_date = |
99 | folder->attribute_mod_date = | 108 | folder->attribute_mod_date = |
100 | folder->access_date = hfsp_now2mt(); | 109 | folder->access_date = hfsp_now2mt(); |
101 | hfsplus_set_perms(inode, &folder->permissions); | 110 | hfsplus_cat_set_perms(inode, &folder->permissions); |
102 | if (inode == HFSPLUS_SB(inode->i_sb).hidden_dir) | 111 | if (inode == sbi->hidden_dir) |
103 | /* invisible and namelocked */ | 112 | /* invisible and namelocked */ |
104 | folder->user_info.frFlags = cpu_to_be16(0x5000); | 113 | folder->user_info.frFlags = cpu_to_be16(0x5000); |
105 | return sizeof(*folder); | 114 | return sizeof(*folder); |
@@ -111,19 +120,19 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
111 | file->type = cpu_to_be16(HFSPLUS_FILE); | 120 | file->type = cpu_to_be16(HFSPLUS_FILE); |
112 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); | 121 | file->flags = cpu_to_be16(HFSPLUS_FILE_THREAD_EXISTS); |
113 | file->id = cpu_to_be32(cnid); | 122 | file->id = cpu_to_be32(cnid); |
114 | HFSPLUS_I(inode).create_date = | 123 | HFSPLUS_I(inode)->create_date = |
115 | file->create_date = | 124 | file->create_date = |
116 | file->content_mod_date = | 125 | file->content_mod_date = |
117 | file->attribute_mod_date = | 126 | file->attribute_mod_date = |
118 | file->access_date = hfsp_now2mt(); | 127 | file->access_date = hfsp_now2mt(); |
119 | if (cnid == inode->i_ino) { | 128 | if (cnid == inode->i_ino) { |
120 | hfsplus_set_perms(inode, &file->permissions); | 129 | hfsplus_cat_set_perms(inode, &file->permissions); |
121 | if (S_ISLNK(inode->i_mode)) { | 130 | if (S_ISLNK(inode->i_mode)) { |
122 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); | 131 | file->user_info.fdType = cpu_to_be32(HFSP_SYMLINK_TYPE); |
123 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); | 132 | file->user_info.fdCreator = cpu_to_be32(HFSP_SYMLINK_CREATOR); |
124 | } else { | 133 | } else { |
125 | file->user_info.fdType = cpu_to_be32(HFSPLUS_SB(inode->i_sb).type); | 134 | file->user_info.fdType = cpu_to_be32(sbi->type); |
126 | file->user_info.fdCreator = cpu_to_be32(HFSPLUS_SB(inode->i_sb).creator); | 135 | file->user_info.fdCreator = cpu_to_be32(sbi->creator); |
127 | } | 136 | } |
128 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 137 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
129 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 138 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
@@ -131,8 +140,8 @@ static int hfsplus_cat_build_record(hfsplus_cat_entry *entry, u32 cnid, struct i | |||
131 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); | 140 | file->user_info.fdType = cpu_to_be32(HFSP_HARDLINK_TYPE); |
132 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); | 141 | file->user_info.fdCreator = cpu_to_be32(HFSP_HFSPLUS_CREATOR); |
133 | file->user_info.fdFlags = cpu_to_be16(0x100); | 142 | file->user_info.fdFlags = cpu_to_be16(0x100); |
134 | file->create_date = HFSPLUS_I(HFSPLUS_SB(inode->i_sb).hidden_dir).create_date; | 143 | file->create_date = HFSPLUS_I(sbi->hidden_dir)->create_date; |
135 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode).dev); | 144 | file->permissions.dev = cpu_to_be32(HFSPLUS_I(inode)->linkid); |
136 | } | 145 | } |
137 | return sizeof(*file); | 146 | return sizeof(*file); |
138 | } | 147 | } |
@@ -180,15 +189,14 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
180 | 189 | ||
181 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) | 190 | int hfsplus_create_cat(u32 cnid, struct inode *dir, struct qstr *str, struct inode *inode) |
182 | { | 191 | { |
192 | struct super_block *sb = dir->i_sb; | ||
183 | struct hfs_find_data fd; | 193 | struct hfs_find_data fd; |
184 | struct super_block *sb; | ||
185 | hfsplus_cat_entry entry; | 194 | hfsplus_cat_entry entry; |
186 | int entry_size; | 195 | int entry_size; |
187 | int err; | 196 | int err; |
188 | 197 | ||
189 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); | 198 | dprint(DBG_CAT_MOD, "create_cat: %s,%u(%d)\n", str->name, cnid, inode->i_nlink); |
190 | sb = dir->i_sb; | 199 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
191 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
192 | 200 | ||
193 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); | 201 | hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); |
194 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? | 202 | entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? |
@@ -234,7 +242,7 @@ err2: | |||
234 | 242 | ||
235 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | 243 | int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) |
236 | { | 244 | { |
237 | struct super_block *sb; | 245 | struct super_block *sb = dir->i_sb; |
238 | struct hfs_find_data fd; | 246 | struct hfs_find_data fd; |
239 | struct hfsplus_fork_raw fork; | 247 | struct hfsplus_fork_raw fork; |
240 | struct list_head *pos; | 248 | struct list_head *pos; |
@@ -242,8 +250,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
242 | u16 type; | 250 | u16 type; |
243 | 251 | ||
244 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); | 252 | dprint(DBG_CAT_MOD, "delete_cat: %s,%u\n", str ? str->name : NULL, cnid); |
245 | sb = dir->i_sb; | 253 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
246 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | ||
247 | 254 | ||
248 | if (!str) { | 255 | if (!str) { |
249 | int len; | 256 | int len; |
@@ -279,7 +286,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) | |||
279 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); | 286 | hfsplus_free_fork(sb, cnid, &fork, HFSPLUS_TYPE_RSRC); |
280 | } | 287 | } |
281 | 288 | ||
282 | list_for_each(pos, &HFSPLUS_I(dir).open_dir_list) { | 289 | list_for_each(pos, &HFSPLUS_I(dir)->open_dir_list) { |
283 | struct hfsplus_readdir_data *rd = | 290 | struct hfsplus_readdir_data *rd = |
284 | list_entry(pos, struct hfsplus_readdir_data, list); | 291 | list_entry(pos, struct hfsplus_readdir_data, list); |
285 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) | 292 | if (fd.tree->keycmp(fd.search_key, (void *)&rd->key) < 0) |
@@ -312,7 +319,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
312 | struct inode *src_dir, struct qstr *src_name, | 319 | struct inode *src_dir, struct qstr *src_name, |
313 | struct inode *dst_dir, struct qstr *dst_name) | 320 | struct inode *dst_dir, struct qstr *dst_name) |
314 | { | 321 | { |
315 | struct super_block *sb; | 322 | struct super_block *sb = src_dir->i_sb; |
316 | struct hfs_find_data src_fd, dst_fd; | 323 | struct hfs_find_data src_fd, dst_fd; |
317 | hfsplus_cat_entry entry; | 324 | hfsplus_cat_entry entry; |
318 | int entry_size, type; | 325 | int entry_size, type; |
@@ -320,8 +327,7 @@ int hfsplus_rename_cat(u32 cnid, | |||
320 | 327 | ||
321 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, | 328 | dprint(DBG_CAT_MOD, "rename_cat: %u - %lu,%s - %lu,%s\n", cnid, src_dir->i_ino, src_name->name, |
322 | dst_dir->i_ino, dst_name->name); | 329 | dst_dir->i_ino, dst_name->name); |
323 | sb = src_dir->i_sb; | 330 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &src_fd); |
324 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &src_fd); | ||
325 | dst_fd = src_fd; | 331 | dst_fd = src_fd; |
326 | 332 | ||
327 | /* find the old dir entry and read the data */ | 333 | /* find the old dir entry and read the data */ |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 764fd1bdca88..e318bbc0daf6 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
@@ -39,7 +39,7 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, | |||
39 | 39 | ||
40 | dentry->d_op = &hfsplus_dentry_operations; | 40 | dentry->d_op = &hfsplus_dentry_operations; |
41 | dentry->d_fsdata = NULL; | 41 | dentry->d_fsdata = NULL; |
42 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 42 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); | 43 | hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); |
44 | again: | 44 | again: |
45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); | 45 | err = hfs_brec_read(&fd, &entry, sizeof(entry)); |
@@ -68,9 +68,9 @@ again: | |||
68 | cnid = be32_to_cpu(entry.file.id); | 68 | cnid = be32_to_cpu(entry.file.id); |
69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && | 69 | if (entry.file.user_info.fdType == cpu_to_be32(HFSP_HARDLINK_TYPE) && |
70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && | 70 | entry.file.user_info.fdCreator == cpu_to_be32(HFSP_HFSPLUS_CREATOR) && |
71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb).hidden_dir).create_date || | 71 | (entry.file.create_date == HFSPLUS_I(HFSPLUS_SB(sb)->hidden_dir)->create_date || |
72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode).create_date) && | 72 | entry.file.create_date == HFSPLUS_I(sb->s_root->d_inode)->create_date) && |
73 | HFSPLUS_SB(sb).hidden_dir) { | 73 | HFSPLUS_SB(sb)->hidden_dir) { |
74 | struct qstr str; | 74 | struct qstr str; |
75 | char name[32]; | 75 | char name[32]; |
76 | 76 | ||
@@ -86,7 +86,8 @@ again: | |||
86 | linkid = be32_to_cpu(entry.file.permissions.dev); | 86 | linkid = be32_to_cpu(entry.file.permissions.dev); |
87 | str.len = sprintf(name, "iNode%d", linkid); | 87 | str.len = sprintf(name, "iNode%d", linkid); |
88 | str.name = name; | 88 | str.name = name; |
89 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb).hidden_dir->i_ino, &str); | 89 | hfsplus_cat_build_key(sb, fd.search_key, |
90 | HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); | ||
90 | goto again; | 91 | goto again; |
91 | } | 92 | } |
92 | } else if (!dentry->d_fsdata) | 93 | } else if (!dentry->d_fsdata) |
@@ -101,7 +102,7 @@ again: | |||
101 | if (IS_ERR(inode)) | 102 | if (IS_ERR(inode)) |
102 | return ERR_CAST(inode); | 103 | return ERR_CAST(inode); |
103 | if (S_ISREG(inode->i_mode)) | 104 | if (S_ISREG(inode->i_mode)) |
104 | HFSPLUS_I(inode).dev = linkid; | 105 | HFSPLUS_I(inode)->linkid = linkid; |
105 | out: | 106 | out: |
106 | d_add(dentry, inode); | 107 | d_add(dentry, inode); |
107 | return NULL; | 108 | return NULL; |
@@ -124,7 +125,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
124 | if (filp->f_pos >= inode->i_size) | 125 | if (filp->f_pos >= inode->i_size) |
125 | return 0; | 126 | return 0; |
126 | 127 | ||
127 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 128 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
128 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); | 129 | hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); |
129 | err = hfs_brec_find(&fd); | 130 | err = hfs_brec_find(&fd); |
130 | if (err) | 131 | if (err) |
@@ -180,8 +181,9 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
180 | err = -EIO; | 181 | err = -EIO; |
181 | goto out; | 182 | goto out; |
182 | } | 183 | } |
183 | if (HFSPLUS_SB(sb).hidden_dir && | 184 | if (HFSPLUS_SB(sb)->hidden_dir && |
184 | HFSPLUS_SB(sb).hidden_dir->i_ino == be32_to_cpu(entry.folder.id)) | 185 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
186 | be32_to_cpu(entry.folder.id)) | ||
185 | goto next; | 187 | goto next; |
186 | if (filldir(dirent, strbuf, len, filp->f_pos, | 188 | if (filldir(dirent, strbuf, len, filp->f_pos, |
187 | be32_to_cpu(entry.folder.id), DT_DIR)) | 189 | be32_to_cpu(entry.folder.id), DT_DIR)) |
@@ -217,7 +219,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
217 | } | 219 | } |
218 | filp->private_data = rd; | 220 | filp->private_data = rd; |
219 | rd->file = filp; | 221 | rd->file = filp; |
220 | list_add(&rd->list, &HFSPLUS_I(inode).open_dir_list); | 222 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
221 | } | 223 | } |
222 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 224 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
223 | out: | 225 | out: |
@@ -229,38 +231,18 @@ static int hfsplus_dir_release(struct inode *inode, struct file *file) | |||
229 | { | 231 | { |
230 | struct hfsplus_readdir_data *rd = file->private_data; | 232 | struct hfsplus_readdir_data *rd = file->private_data; |
231 | if (rd) { | 233 | if (rd) { |
234 | mutex_lock(&inode->i_mutex); | ||
232 | list_del(&rd->list); | 235 | list_del(&rd->list); |
236 | mutex_unlock(&inode->i_mutex); | ||
233 | kfree(rd); | 237 | kfree(rd); |
234 | } | 238 | } |
235 | return 0; | 239 | return 0; |
236 | } | 240 | } |
237 | 241 | ||
238 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, | ||
239 | struct nameidata *nd) | ||
240 | { | ||
241 | struct inode *inode; | ||
242 | int res; | ||
243 | |||
244 | inode = hfsplus_new_inode(dir->i_sb, mode); | ||
245 | if (!inode) | ||
246 | return -ENOSPC; | ||
247 | |||
248 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
249 | if (res) { | ||
250 | inode->i_nlink = 0; | ||
251 | hfsplus_delete_inode(inode); | ||
252 | iput(inode); | ||
253 | return res; | ||
254 | } | ||
255 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
256 | mark_inode_dirty(inode); | ||
257 | return 0; | ||
258 | } | ||
259 | |||
260 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | 242 | static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, |
261 | struct dentry *dst_dentry) | 243 | struct dentry *dst_dentry) |
262 | { | 244 | { |
263 | struct super_block *sb = dst_dir->i_sb; | 245 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dst_dir->i_sb); |
264 | struct inode *inode = src_dentry->d_inode; | 246 | struct inode *inode = src_dentry->d_inode; |
265 | struct inode *src_dir = src_dentry->d_parent->d_inode; | 247 | struct inode *src_dir = src_dentry->d_parent->d_inode; |
266 | struct qstr str; | 248 | struct qstr str; |
@@ -270,7 +252,10 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
270 | 252 | ||
271 | if (HFSPLUS_IS_RSRC(inode)) | 253 | if (HFSPLUS_IS_RSRC(inode)) |
272 | return -EPERM; | 254 | return -EPERM; |
255 | if (!S_ISREG(inode->i_mode)) | ||
256 | return -EPERM; | ||
273 | 257 | ||
258 | mutex_lock(&sbi->vh_mutex); | ||
274 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { | 259 | if (inode->i_ino == (u32)(unsigned long)src_dentry->d_fsdata) { |
275 | for (;;) { | 260 | for (;;) { |
276 | get_random_bytes(&id, sizeof(cnid)); | 261 | get_random_bytes(&id, sizeof(cnid)); |
@@ -279,40 +264,41 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, | |||
279 | str.len = sprintf(name, "iNode%d", id); | 264 | str.len = sprintf(name, "iNode%d", id); |
280 | res = hfsplus_rename_cat(inode->i_ino, | 265 | res = hfsplus_rename_cat(inode->i_ino, |
281 | src_dir, &src_dentry->d_name, | 266 | src_dir, &src_dentry->d_name, |
282 | HFSPLUS_SB(sb).hidden_dir, &str); | 267 | sbi->hidden_dir, &str); |
283 | if (!res) | 268 | if (!res) |
284 | break; | 269 | break; |
285 | if (res != -EEXIST) | 270 | if (res != -EEXIST) |
286 | return res; | 271 | goto out; |
287 | } | 272 | } |
288 | HFSPLUS_I(inode).dev = id; | 273 | HFSPLUS_I(inode)->linkid = id; |
289 | cnid = HFSPLUS_SB(sb).next_cnid++; | 274 | cnid = sbi->next_cnid++; |
290 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; | 275 | src_dentry->d_fsdata = (void *)(unsigned long)cnid; |
291 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); | 276 | res = hfsplus_create_cat(cnid, src_dir, &src_dentry->d_name, inode); |
292 | if (res) | 277 | if (res) |
293 | /* panic? */ | 278 | /* panic? */ |
294 | return res; | 279 | goto out; |
295 | HFSPLUS_SB(sb).file_count++; | 280 | sbi->file_count++; |
296 | } | 281 | } |
297 | cnid = HFSPLUS_SB(sb).next_cnid++; | 282 | cnid = sbi->next_cnid++; |
298 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); | 283 | res = hfsplus_create_cat(cnid, dst_dir, &dst_dentry->d_name, inode); |
299 | if (res) | 284 | if (res) |
300 | return res; | 285 | goto out; |
301 | 286 | ||
302 | inc_nlink(inode); | 287 | inc_nlink(inode); |
303 | hfsplus_instantiate(dst_dentry, inode, cnid); | 288 | hfsplus_instantiate(dst_dentry, inode, cnid); |
304 | atomic_inc(&inode->i_count); | 289 | ihold(inode); |
305 | inode->i_ctime = CURRENT_TIME_SEC; | 290 | inode->i_ctime = CURRENT_TIME_SEC; |
306 | mark_inode_dirty(inode); | 291 | mark_inode_dirty(inode); |
307 | HFSPLUS_SB(sb).file_count++; | 292 | sbi->file_count++; |
308 | sb->s_dirt = 1; | 293 | dst_dir->i_sb->s_dirt = 1; |
309 | 294 | out: | |
310 | return 0; | 295 | mutex_unlock(&sbi->vh_mutex); |
296 | return res; | ||
311 | } | 297 | } |
312 | 298 | ||
313 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | 299 | static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) |
314 | { | 300 | { |
315 | struct super_block *sb = dir->i_sb; | 301 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
316 | struct inode *inode = dentry->d_inode; | 302 | struct inode *inode = dentry->d_inode; |
317 | struct qstr str; | 303 | struct qstr str; |
318 | char name[32]; | 304 | char name[32]; |
@@ -322,21 +308,22 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
322 | if (HFSPLUS_IS_RSRC(inode)) | 308 | if (HFSPLUS_IS_RSRC(inode)) |
323 | return -EPERM; | 309 | return -EPERM; |
324 | 310 | ||
311 | mutex_lock(&sbi->vh_mutex); | ||
325 | cnid = (u32)(unsigned long)dentry->d_fsdata; | 312 | cnid = (u32)(unsigned long)dentry->d_fsdata; |
326 | if (inode->i_ino == cnid && | 313 | if (inode->i_ino == cnid && |
327 | atomic_read(&HFSPLUS_I(inode).opencnt)) { | 314 | atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
328 | str.name = name; | 315 | str.name = name; |
329 | str.len = sprintf(name, "temp%lu", inode->i_ino); | 316 | str.len = sprintf(name, "temp%lu", inode->i_ino); |
330 | res = hfsplus_rename_cat(inode->i_ino, | 317 | res = hfsplus_rename_cat(inode->i_ino, |
331 | dir, &dentry->d_name, | 318 | dir, &dentry->d_name, |
332 | HFSPLUS_SB(sb).hidden_dir, &str); | 319 | sbi->hidden_dir, &str); |
333 | if (!res) | 320 | if (!res) |
334 | inode->i_flags |= S_DEAD; | 321 | inode->i_flags |= S_DEAD; |
335 | return res; | 322 | goto out; |
336 | } | 323 | } |
337 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); | 324 | res = hfsplus_delete_cat(cnid, dir, &dentry->d_name); |
338 | if (res) | 325 | if (res) |
339 | return res; | 326 | goto out; |
340 | 327 | ||
341 | if (inode->i_nlink > 0) | 328 | if (inode->i_nlink > 0) |
342 | drop_nlink(inode); | 329 | drop_nlink(inode); |
@@ -344,10 +331,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
344 | clear_nlink(inode); | 331 | clear_nlink(inode); |
345 | if (!inode->i_nlink) { | 332 | if (!inode->i_nlink) { |
346 | if (inode->i_ino != cnid) { | 333 | if (inode->i_ino != cnid) { |
347 | HFSPLUS_SB(sb).file_count--; | 334 | sbi->file_count--; |
348 | if (!atomic_read(&HFSPLUS_I(inode).opencnt)) { | 335 | if (!atomic_read(&HFSPLUS_I(inode)->opencnt)) { |
349 | res = hfsplus_delete_cat(inode->i_ino, | 336 | res = hfsplus_delete_cat(inode->i_ino, |
350 | HFSPLUS_SB(sb).hidden_dir, | 337 | sbi->hidden_dir, |
351 | NULL); | 338 | NULL); |
352 | if (!res) | 339 | if (!res) |
353 | hfsplus_delete_inode(inode); | 340 | hfsplus_delete_inode(inode); |
@@ -356,107 +343,108 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry) | |||
356 | } else | 343 | } else |
357 | hfsplus_delete_inode(inode); | 344 | hfsplus_delete_inode(inode); |
358 | } else | 345 | } else |
359 | HFSPLUS_SB(sb).file_count--; | 346 | sbi->file_count--; |
360 | inode->i_ctime = CURRENT_TIME_SEC; | 347 | inode->i_ctime = CURRENT_TIME_SEC; |
361 | mark_inode_dirty(inode); | 348 | mark_inode_dirty(inode); |
362 | 349 | out: | |
350 | mutex_unlock(&sbi->vh_mutex); | ||
363 | return res; | 351 | return res; |
364 | } | 352 | } |
365 | 353 | ||
366 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
367 | { | ||
368 | struct inode *inode; | ||
369 | int res; | ||
370 | |||
371 | inode = hfsplus_new_inode(dir->i_sb, S_IFDIR | mode); | ||
372 | if (!inode) | ||
373 | return -ENOSPC; | ||
374 | |||
375 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | ||
376 | if (res) { | ||
377 | inode->i_nlink = 0; | ||
378 | hfsplus_delete_inode(inode); | ||
379 | iput(inode); | ||
380 | return res; | ||
381 | } | ||
382 | hfsplus_instantiate(dentry, inode, inode->i_ino); | ||
383 | mark_inode_dirty(inode); | ||
384 | return 0; | ||
385 | } | ||
386 | |||
387 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) | 354 | static int hfsplus_rmdir(struct inode *dir, struct dentry *dentry) |
388 | { | 355 | { |
389 | struct inode *inode; | 356 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
357 | struct inode *inode = dentry->d_inode; | ||
390 | int res; | 358 | int res; |
391 | 359 | ||
392 | inode = dentry->d_inode; | ||
393 | if (inode->i_size != 2) | 360 | if (inode->i_size != 2) |
394 | return -ENOTEMPTY; | 361 | return -ENOTEMPTY; |
362 | |||
363 | mutex_lock(&sbi->vh_mutex); | ||
395 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); | 364 | res = hfsplus_delete_cat(inode->i_ino, dir, &dentry->d_name); |
396 | if (res) | 365 | if (res) |
397 | return res; | 366 | goto out; |
398 | clear_nlink(inode); | 367 | clear_nlink(inode); |
399 | inode->i_ctime = CURRENT_TIME_SEC; | 368 | inode->i_ctime = CURRENT_TIME_SEC; |
400 | hfsplus_delete_inode(inode); | 369 | hfsplus_delete_inode(inode); |
401 | mark_inode_dirty(inode); | 370 | mark_inode_dirty(inode); |
402 | return 0; | 371 | out: |
372 | mutex_unlock(&sbi->vh_mutex); | ||
373 | return res; | ||
403 | } | 374 | } |
404 | 375 | ||
405 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, | 376 | static int hfsplus_symlink(struct inode *dir, struct dentry *dentry, |
406 | const char *symname) | 377 | const char *symname) |
407 | { | 378 | { |
408 | struct super_block *sb; | 379 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
409 | struct inode *inode; | 380 | struct inode *inode; |
410 | int res; | 381 | int res = -ENOSPC; |
411 | 382 | ||
412 | sb = dir->i_sb; | 383 | mutex_lock(&sbi->vh_mutex); |
413 | inode = hfsplus_new_inode(sb, S_IFLNK | S_IRWXUGO); | 384 | inode = hfsplus_new_inode(dir->i_sb, S_IFLNK | S_IRWXUGO); |
414 | if (!inode) | 385 | if (!inode) |
415 | return -ENOSPC; | 386 | goto out; |
416 | 387 | ||
417 | res = page_symlink(inode, symname, strlen(symname) + 1); | 388 | res = page_symlink(inode, symname, strlen(symname) + 1); |
418 | if (res) { | 389 | if (res) |
419 | inode->i_nlink = 0; | 390 | goto out_err; |
420 | hfsplus_delete_inode(inode); | ||
421 | iput(inode); | ||
422 | return res; | ||
423 | } | ||
424 | 391 | ||
425 | mark_inode_dirty(inode); | ||
426 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 392 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
393 | if (res) | ||
394 | goto out_err; | ||
427 | 395 | ||
428 | if (!res) { | 396 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
429 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 397 | mark_inode_dirty(inode); |
430 | mark_inode_dirty(inode); | 398 | goto out; |
431 | } | ||
432 | 399 | ||
400 | out_err: | ||
401 | inode->i_nlink = 0; | ||
402 | hfsplus_delete_inode(inode); | ||
403 | iput(inode); | ||
404 | out: | ||
405 | mutex_unlock(&sbi->vh_mutex); | ||
433 | return res; | 406 | return res; |
434 | } | 407 | } |
435 | 408 | ||
436 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, | 409 | static int hfsplus_mknod(struct inode *dir, struct dentry *dentry, |
437 | int mode, dev_t rdev) | 410 | int mode, dev_t rdev) |
438 | { | 411 | { |
439 | struct super_block *sb; | 412 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb); |
440 | struct inode *inode; | 413 | struct inode *inode; |
441 | int res; | 414 | int res = -ENOSPC; |
442 | 415 | ||
443 | sb = dir->i_sb; | 416 | mutex_lock(&sbi->vh_mutex); |
444 | inode = hfsplus_new_inode(sb, mode); | 417 | inode = hfsplus_new_inode(dir->i_sb, mode); |
445 | if (!inode) | 418 | if (!inode) |
446 | return -ENOSPC; | 419 | goto out; |
420 | |||
421 | if (S_ISBLK(mode) || S_ISCHR(mode) || S_ISFIFO(mode) || S_ISSOCK(mode)) | ||
422 | init_special_inode(inode, mode, rdev); | ||
447 | 423 | ||
448 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); | 424 | res = hfsplus_create_cat(inode->i_ino, dir, &dentry->d_name, inode); |
449 | if (res) { | 425 | if (res) { |
450 | inode->i_nlink = 0; | 426 | inode->i_nlink = 0; |
451 | hfsplus_delete_inode(inode); | 427 | hfsplus_delete_inode(inode); |
452 | iput(inode); | 428 | iput(inode); |
453 | return res; | 429 | goto out; |
454 | } | 430 | } |
455 | init_special_inode(inode, mode, rdev); | 431 | |
456 | hfsplus_instantiate(dentry, inode, inode->i_ino); | 432 | hfsplus_instantiate(dentry, inode, inode->i_ino); |
457 | mark_inode_dirty(inode); | 433 | mark_inode_dirty(inode); |
434 | out: | ||
435 | mutex_unlock(&sbi->vh_mutex); | ||
436 | return res; | ||
437 | } | ||
458 | 438 | ||
459 | return 0; | 439 | static int hfsplus_create(struct inode *dir, struct dentry *dentry, int mode, |
440 | struct nameidata *nd) | ||
441 | { | ||
442 | return hfsplus_mknod(dir, dentry, mode, 0); | ||
443 | } | ||
444 | |||
445 | static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, int mode) | ||
446 | { | ||
447 | return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0); | ||
460 | } | 448 | } |
461 | 449 | ||
462 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | 450 | static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, |
@@ -466,7 +454,10 @@ static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
466 | 454 | ||
467 | /* Unlink destination if it already exists */ | 455 | /* Unlink destination if it already exists */ |
468 | if (new_dentry->d_inode) { | 456 | if (new_dentry->d_inode) { |
469 | res = hfsplus_unlink(new_dir, new_dentry); | 457 | if (S_ISDIR(new_dentry->d_inode->i_mode)) |
458 | res = hfsplus_rmdir(new_dir, new_dentry); | ||
459 | else | ||
460 | res = hfsplus_unlink(new_dir, new_dentry); | ||
470 | if (res) | 461 | if (res) |
471 | return res; | 462 | return res; |
472 | } | 463 | } |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index 0022eec63cda..0c9cb1820a52 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
@@ -85,35 +85,49 @@ static u32 hfsplus_ext_lastblock(struct hfsplus_extent *ext) | |||
85 | 85 | ||
86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) | 86 | static void __hfsplus_ext_write_extent(struct inode *inode, struct hfs_find_data *fd) |
87 | { | 87 | { |
88 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
88 | int res; | 89 | int res; |
89 | 90 | ||
90 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, HFSPLUS_I(inode).cached_start, | 91 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
91 | HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 92 | |
93 | hfsplus_ext_build_key(fd->search_key, inode->i_ino, hip->cached_start, | ||
94 | HFSPLUS_IS_RSRC(inode) ? | ||
95 | HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | ||
96 | |||
92 | res = hfs_brec_find(fd); | 97 | res = hfs_brec_find(fd); |
93 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_NEW) { | 98 | if (hip->flags & HFSPLUS_FLG_EXT_NEW) { |
94 | if (res != -ENOENT) | 99 | if (res != -ENOENT) |
95 | return; | 100 | return; |
96 | hfs_brec_insert(fd, HFSPLUS_I(inode).cached_extents, sizeof(hfsplus_extent_rec)); | 101 | hfs_brec_insert(fd, hip->cached_extents, |
97 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 102 | sizeof(hfsplus_extent_rec)); |
103 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | ||
98 | } else { | 104 | } else { |
99 | if (res) | 105 | if (res) |
100 | return; | 106 | return; |
101 | hfs_bnode_write(fd->bnode, HFSPLUS_I(inode).cached_extents, fd->entryoffset, fd->entrylength); | 107 | hfs_bnode_write(fd->bnode, hip->cached_extents, |
102 | HFSPLUS_I(inode).flags &= ~HFSPLUS_FLG_EXT_DIRTY; | 108 | fd->entryoffset, fd->entrylength); |
109 | hip->flags &= ~HFSPLUS_FLG_EXT_DIRTY; | ||
103 | } | 110 | } |
104 | } | 111 | } |
105 | 112 | ||
106 | void hfsplus_ext_write_extent(struct inode *inode) | 113 | static void hfsplus_ext_write_extent_locked(struct inode *inode) |
107 | { | 114 | { |
108 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) { | 115 | if (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_EXT_DIRTY) { |
109 | struct hfs_find_data fd; | 116 | struct hfs_find_data fd; |
110 | 117 | ||
111 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 118 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
112 | __hfsplus_ext_write_extent(inode, &fd); | 119 | __hfsplus_ext_write_extent(inode, &fd); |
113 | hfs_find_exit(&fd); | 120 | hfs_find_exit(&fd); |
114 | } | 121 | } |
115 | } | 122 | } |
116 | 123 | ||
124 | void hfsplus_ext_write_extent(struct inode *inode) | ||
125 | { | ||
126 | mutex_lock(&HFSPLUS_I(inode)->extents_lock); | ||
127 | hfsplus_ext_write_extent_locked(inode); | ||
128 | mutex_unlock(&HFSPLUS_I(inode)->extents_lock); | ||
129 | } | ||
130 | |||
117 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | 131 | static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, |
118 | struct hfsplus_extent *extent, | 132 | struct hfsplus_extent *extent, |
119 | u32 cnid, u32 block, u8 type) | 133 | u32 cnid, u32 block, u8 type) |
@@ -136,33 +150,39 @@ static inline int __hfsplus_ext_read_extent(struct hfs_find_data *fd, | |||
136 | 150 | ||
137 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) | 151 | static inline int __hfsplus_ext_cache_extent(struct hfs_find_data *fd, struct inode *inode, u32 block) |
138 | { | 152 | { |
153 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
139 | int res; | 154 | int res; |
140 | 155 | ||
141 | if (HFSPLUS_I(inode).flags & HFSPLUS_FLG_EXT_DIRTY) | 156 | WARN_ON(!mutex_is_locked(&hip->extents_lock)); |
157 | |||
158 | if (hip->flags & HFSPLUS_FLG_EXT_DIRTY) | ||
142 | __hfsplus_ext_write_extent(inode, fd); | 159 | __hfsplus_ext_write_extent(inode, fd); |
143 | 160 | ||
144 | res = __hfsplus_ext_read_extent(fd, HFSPLUS_I(inode).cached_extents, inode->i_ino, | 161 | res = __hfsplus_ext_read_extent(fd, hip->cached_extents, inode->i_ino, |
145 | block, HFSPLUS_IS_RSRC(inode) ? HFSPLUS_TYPE_RSRC : HFSPLUS_TYPE_DATA); | 162 | block, HFSPLUS_IS_RSRC(inode) ? |
163 | HFSPLUS_TYPE_RSRC : | ||
164 | HFSPLUS_TYPE_DATA); | ||
146 | if (!res) { | 165 | if (!res) { |
147 | HFSPLUS_I(inode).cached_start = be32_to_cpu(fd->key->ext.start_block); | 166 | hip->cached_start = be32_to_cpu(fd->key->ext.start_block); |
148 | HFSPLUS_I(inode).cached_blocks = hfsplus_ext_block_count(HFSPLUS_I(inode).cached_extents); | 167 | hip->cached_blocks = hfsplus_ext_block_count(hip->cached_extents); |
149 | } else { | 168 | } else { |
150 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 169 | hip->cached_start = hip->cached_blocks = 0; |
151 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 170 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
152 | } | 171 | } |
153 | return res; | 172 | return res; |
154 | } | 173 | } |
155 | 174 | ||
156 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | 175 | static int hfsplus_ext_read_extent(struct inode *inode, u32 block) |
157 | { | 176 | { |
177 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
158 | struct hfs_find_data fd; | 178 | struct hfs_find_data fd; |
159 | int res; | 179 | int res; |
160 | 180 | ||
161 | if (block >= HFSPLUS_I(inode).cached_start && | 181 | if (block >= hip->cached_start && |
162 | block < HFSPLUS_I(inode).cached_start + HFSPLUS_I(inode).cached_blocks) | 182 | block < hip->cached_start + hip->cached_blocks) |
163 | return 0; | 183 | return 0; |
164 | 184 | ||
165 | hfs_find_init(HFSPLUS_SB(inode->i_sb).ext_tree, &fd); | 185 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->ext_tree, &fd); |
166 | res = __hfsplus_ext_cache_extent(&fd, inode, block); | 186 | res = __hfsplus_ext_cache_extent(&fd, inode, block); |
167 | hfs_find_exit(&fd); | 187 | hfs_find_exit(&fd); |
168 | return res; | 188 | return res; |
@@ -172,21 +192,21 @@ static int hfsplus_ext_read_extent(struct inode *inode, u32 block) | |||
172 | int hfsplus_get_block(struct inode *inode, sector_t iblock, | 192 | int hfsplus_get_block(struct inode *inode, sector_t iblock, |
173 | struct buffer_head *bh_result, int create) | 193 | struct buffer_head *bh_result, int create) |
174 | { | 194 | { |
175 | struct super_block *sb; | 195 | struct super_block *sb = inode->i_sb; |
196 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
197 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
176 | int res = -EIO; | 198 | int res = -EIO; |
177 | u32 ablock, dblock, mask; | 199 | u32 ablock, dblock, mask; |
178 | int shift; | 200 | int shift; |
179 | 201 | ||
180 | sb = inode->i_sb; | ||
181 | |||
182 | /* Convert inode block to disk allocation block */ | 202 | /* Convert inode block to disk allocation block */ |
183 | shift = HFSPLUS_SB(sb).alloc_blksz_shift - sb->s_blocksize_bits; | 203 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
184 | ablock = iblock >> HFSPLUS_SB(sb).fs_shift; | 204 | ablock = iblock >> sbi->fs_shift; |
185 | 205 | ||
186 | if (iblock >= HFSPLUS_I(inode).fs_blocks) { | 206 | if (iblock >= hip->fs_blocks) { |
187 | if (iblock > HFSPLUS_I(inode).fs_blocks || !create) | 207 | if (iblock > hip->fs_blocks || !create) |
188 | return -EIO; | 208 | return -EIO; |
189 | if (ablock >= HFSPLUS_I(inode).alloc_blocks) { | 209 | if (ablock >= hip->alloc_blocks) { |
190 | res = hfsplus_file_extend(inode); | 210 | res = hfsplus_file_extend(inode); |
191 | if (res) | 211 | if (res) |
192 | return res; | 212 | return res; |
@@ -194,33 +214,33 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
194 | } else | 214 | } else |
195 | create = 0; | 215 | create = 0; |
196 | 216 | ||
197 | if (ablock < HFSPLUS_I(inode).first_blocks) { | 217 | if (ablock < hip->first_blocks) { |
198 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).first_extents, ablock); | 218 | dblock = hfsplus_ext_find_block(hip->first_extents, ablock); |
199 | goto done; | 219 | goto done; |
200 | } | 220 | } |
201 | 221 | ||
202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | 222 | if (inode->i_ino == HFSPLUS_EXT_CNID) |
203 | return -EIO; | 223 | return -EIO; |
204 | 224 | ||
205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 225 | mutex_lock(&hip->extents_lock); |
206 | res = hfsplus_ext_read_extent(inode, ablock); | 226 | res = hfsplus_ext_read_extent(inode, ablock); |
207 | if (!res) { | 227 | if (!res) { |
208 | dblock = hfsplus_ext_find_block(HFSPLUS_I(inode).cached_extents, ablock - | 228 | dblock = hfsplus_ext_find_block(hip->cached_extents, |
209 | HFSPLUS_I(inode).cached_start); | 229 | ablock - hip->cached_start); |
210 | } else { | 230 | } else { |
211 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 231 | mutex_unlock(&hip->extents_lock); |
212 | return -EIO; | 232 | return -EIO; |
213 | } | 233 | } |
214 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 234 | mutex_unlock(&hip->extents_lock); |
215 | 235 | ||
216 | done: | 236 | done: |
217 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); | 237 | dprint(DBG_EXTENT, "get_block(%lu): %llu - %u\n", inode->i_ino, (long long)iblock, dblock); |
218 | mask = (1 << HFSPLUS_SB(sb).fs_shift) - 1; | 238 | mask = (1 << sbi->fs_shift) - 1; |
219 | map_bh(bh_result, sb, (dblock << HFSPLUS_SB(sb).fs_shift) + HFSPLUS_SB(sb).blockoffset + (iblock & mask)); | 239 | map_bh(bh_result, sb, (dblock << sbi->fs_shift) + sbi->blockoffset + (iblock & mask)); |
220 | if (create) { | 240 | if (create) { |
221 | set_buffer_new(bh_result); | 241 | set_buffer_new(bh_result); |
222 | HFSPLUS_I(inode).phys_size += sb->s_blocksize; | 242 | hip->phys_size += sb->s_blocksize; |
223 | HFSPLUS_I(inode).fs_blocks++; | 243 | hip->fs_blocks++; |
224 | inode_add_bytes(inode, sb->s_blocksize); | 244 | inode_add_bytes(inode, sb->s_blocksize); |
225 | mark_inode_dirty(inode); | 245 | mark_inode_dirty(inode); |
226 | } | 246 | } |
@@ -327,7 +347,7 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
327 | if (total_blocks == blocks) | 347 | if (total_blocks == blocks) |
328 | return 0; | 348 | return 0; |
329 | 349 | ||
330 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 350 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
331 | do { | 351 | do { |
332 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, | 352 | res = __hfsplus_ext_read_extent(&fd, ext_entry, cnid, |
333 | total_blocks, type); | 353 | total_blocks, type); |
@@ -348,29 +368,33 @@ int hfsplus_free_fork(struct super_block *sb, u32 cnid, struct hfsplus_fork_raw | |||
348 | int hfsplus_file_extend(struct inode *inode) | 368 | int hfsplus_file_extend(struct inode *inode) |
349 | { | 369 | { |
350 | struct super_block *sb = inode->i_sb; | 370 | struct super_block *sb = inode->i_sb; |
371 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
372 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
351 | u32 start, len, goal; | 373 | u32 start, len, goal; |
352 | int res; | 374 | int res; |
353 | 375 | ||
354 | if (HFSPLUS_SB(sb).alloc_file->i_size * 8 < HFSPLUS_SB(sb).total_blocks - HFSPLUS_SB(sb).free_blocks + 8) { | 376 | if (sbi->alloc_file->i_size * 8 < |
377 | sbi->total_blocks - sbi->free_blocks + 8) { | ||
355 | // extend alloc file | 378 | // extend alloc file |
356 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", HFSPLUS_SB(sb).alloc_file->i_size * 8, | 379 | printk(KERN_ERR "hfs: extend alloc file! (%Lu,%u,%u)\n", |
357 | HFSPLUS_SB(sb).total_blocks, HFSPLUS_SB(sb).free_blocks); | 380 | sbi->alloc_file->i_size * 8, |
381 | sbi->total_blocks, sbi->free_blocks); | ||
358 | return -ENOSPC; | 382 | return -ENOSPC; |
359 | } | 383 | } |
360 | 384 | ||
361 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 385 | mutex_lock(&hip->extents_lock); |
362 | if (HFSPLUS_I(inode).alloc_blocks == HFSPLUS_I(inode).first_blocks) | 386 | if (hip->alloc_blocks == hip->first_blocks) |
363 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).first_extents); | 387 | goal = hfsplus_ext_lastblock(hip->first_extents); |
364 | else { | 388 | else { |
365 | res = hfsplus_ext_read_extent(inode, HFSPLUS_I(inode).alloc_blocks); | 389 | res = hfsplus_ext_read_extent(inode, hip->alloc_blocks); |
366 | if (res) | 390 | if (res) |
367 | goto out; | 391 | goto out; |
368 | goal = hfsplus_ext_lastblock(HFSPLUS_I(inode).cached_extents); | 392 | goal = hfsplus_ext_lastblock(hip->cached_extents); |
369 | } | 393 | } |
370 | 394 | ||
371 | len = HFSPLUS_I(inode).clump_blocks; | 395 | len = hip->clump_blocks; |
372 | start = hfsplus_block_allocate(sb, HFSPLUS_SB(sb).total_blocks, goal, &len); | 396 | start = hfsplus_block_allocate(sb, sbi->total_blocks, goal, &len); |
373 | if (start >= HFSPLUS_SB(sb).total_blocks) { | 397 | if (start >= sbi->total_blocks) { |
374 | start = hfsplus_block_allocate(sb, goal, 0, &len); | 398 | start = hfsplus_block_allocate(sb, goal, 0, &len); |
375 | if (start >= goal) { | 399 | if (start >= goal) { |
376 | res = -ENOSPC; | 400 | res = -ENOSPC; |
@@ -379,56 +403,56 @@ int hfsplus_file_extend(struct inode *inode) | |||
379 | } | 403 | } |
380 | 404 | ||
381 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); | 405 | dprint(DBG_EXTENT, "extend %lu: %u,%u\n", inode->i_ino, start, len); |
382 | if (HFSPLUS_I(inode).alloc_blocks <= HFSPLUS_I(inode).first_blocks) { | 406 | |
383 | if (!HFSPLUS_I(inode).first_blocks) { | 407 | if (hip->alloc_blocks <= hip->first_blocks) { |
408 | if (!hip->first_blocks) { | ||
384 | dprint(DBG_EXTENT, "first extents\n"); | 409 | dprint(DBG_EXTENT, "first extents\n"); |
385 | /* no extents yet */ | 410 | /* no extents yet */ |
386 | HFSPLUS_I(inode).first_extents[0].start_block = cpu_to_be32(start); | 411 | hip->first_extents[0].start_block = cpu_to_be32(start); |
387 | HFSPLUS_I(inode).first_extents[0].block_count = cpu_to_be32(len); | 412 | hip->first_extents[0].block_count = cpu_to_be32(len); |
388 | res = 0; | 413 | res = 0; |
389 | } else { | 414 | } else { |
390 | /* try to append to extents in inode */ | 415 | /* try to append to extents in inode */ |
391 | res = hfsplus_add_extent(HFSPLUS_I(inode).first_extents, | 416 | res = hfsplus_add_extent(hip->first_extents, |
392 | HFSPLUS_I(inode).alloc_blocks, | 417 | hip->alloc_blocks, |
393 | start, len); | 418 | start, len); |
394 | if (res == -ENOSPC) | 419 | if (res == -ENOSPC) |
395 | goto insert_extent; | 420 | goto insert_extent; |
396 | } | 421 | } |
397 | if (!res) { | 422 | if (!res) { |
398 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 423 | hfsplus_dump_extent(hip->first_extents); |
399 | HFSPLUS_I(inode).first_blocks += len; | 424 | hip->first_blocks += len; |
400 | } | 425 | } |
401 | } else { | 426 | } else { |
402 | res = hfsplus_add_extent(HFSPLUS_I(inode).cached_extents, | 427 | res = hfsplus_add_extent(hip->cached_extents, |
403 | HFSPLUS_I(inode).alloc_blocks - | 428 | hip->alloc_blocks - hip->cached_start, |
404 | HFSPLUS_I(inode).cached_start, | ||
405 | start, len); | 429 | start, len); |
406 | if (!res) { | 430 | if (!res) { |
407 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 431 | hfsplus_dump_extent(hip->cached_extents); |
408 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 432 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
409 | HFSPLUS_I(inode).cached_blocks += len; | 433 | hip->cached_blocks += len; |
410 | } else if (res == -ENOSPC) | 434 | } else if (res == -ENOSPC) |
411 | goto insert_extent; | 435 | goto insert_extent; |
412 | } | 436 | } |
413 | out: | 437 | out: |
414 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 438 | mutex_unlock(&hip->extents_lock); |
415 | if (!res) { | 439 | if (!res) { |
416 | HFSPLUS_I(inode).alloc_blocks += len; | 440 | hip->alloc_blocks += len; |
417 | mark_inode_dirty(inode); | 441 | mark_inode_dirty(inode); |
418 | } | 442 | } |
419 | return res; | 443 | return res; |
420 | 444 | ||
421 | insert_extent: | 445 | insert_extent: |
422 | dprint(DBG_EXTENT, "insert new extent\n"); | 446 | dprint(DBG_EXTENT, "insert new extent\n"); |
423 | hfsplus_ext_write_extent(inode); | 447 | hfsplus_ext_write_extent_locked(inode); |
424 | 448 | ||
425 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 449 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
426 | HFSPLUS_I(inode).cached_extents[0].start_block = cpu_to_be32(start); | 450 | hip->cached_extents[0].start_block = cpu_to_be32(start); |
427 | HFSPLUS_I(inode).cached_extents[0].block_count = cpu_to_be32(len); | 451 | hip->cached_extents[0].block_count = cpu_to_be32(len); |
428 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 452 | hfsplus_dump_extent(hip->cached_extents); |
429 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; | 453 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW; |
430 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).alloc_blocks; | 454 | hip->cached_start = hip->alloc_blocks; |
431 | HFSPLUS_I(inode).cached_blocks = len; | 455 | hip->cached_blocks = len; |
432 | 456 | ||
433 | res = 0; | 457 | res = 0; |
434 | goto out; | 458 | goto out; |
@@ -437,13 +461,15 @@ insert_extent: | |||
437 | void hfsplus_file_truncate(struct inode *inode) | 461 | void hfsplus_file_truncate(struct inode *inode) |
438 | { | 462 | { |
439 | struct super_block *sb = inode->i_sb; | 463 | struct super_block *sb = inode->i_sb; |
464 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
440 | struct hfs_find_data fd; | 465 | struct hfs_find_data fd; |
441 | u32 alloc_cnt, blk_cnt, start; | 466 | u32 alloc_cnt, blk_cnt, start; |
442 | int res; | 467 | int res; |
443 | 468 | ||
444 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", inode->i_ino, | 469 | dprint(DBG_INODE, "truncate: %lu, %Lu -> %Lu\n", |
445 | (long long)HFSPLUS_I(inode).phys_size, inode->i_size); | 470 | inode->i_ino, (long long)hip->phys_size, inode->i_size); |
446 | if (inode->i_size > HFSPLUS_I(inode).phys_size) { | 471 | |
472 | if (inode->i_size > hip->phys_size) { | ||
447 | struct address_space *mapping = inode->i_mapping; | 473 | struct address_space *mapping = inode->i_mapping; |
448 | struct page *page; | 474 | struct page *page; |
449 | void *fsdata; | 475 | void *fsdata; |
@@ -460,47 +486,48 @@ void hfsplus_file_truncate(struct inode *inode) | |||
460 | return; | 486 | return; |
461 | mark_inode_dirty(inode); | 487 | mark_inode_dirty(inode); |
462 | return; | 488 | return; |
463 | } else if (inode->i_size == HFSPLUS_I(inode).phys_size) | 489 | } else if (inode->i_size == hip->phys_size) |
464 | return; | 490 | return; |
465 | 491 | ||
466 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb).alloc_blksz - 1) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 492 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
467 | alloc_cnt = HFSPLUS_I(inode).alloc_blocks; | 493 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
494 | alloc_cnt = hip->alloc_blocks; | ||
468 | if (blk_cnt == alloc_cnt) | 495 | if (blk_cnt == alloc_cnt) |
469 | goto out; | 496 | goto out; |
470 | 497 | ||
471 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 498 | mutex_lock(&hip->extents_lock); |
472 | hfs_find_init(HFSPLUS_SB(sb).ext_tree, &fd); | 499 | hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
473 | while (1) { | 500 | while (1) { |
474 | if (alloc_cnt == HFSPLUS_I(inode).first_blocks) { | 501 | if (alloc_cnt == hip->first_blocks) { |
475 | hfsplus_free_extents(sb, HFSPLUS_I(inode).first_extents, | 502 | hfsplus_free_extents(sb, hip->first_extents, |
476 | alloc_cnt, alloc_cnt - blk_cnt); | 503 | alloc_cnt, alloc_cnt - blk_cnt); |
477 | hfsplus_dump_extent(HFSPLUS_I(inode).first_extents); | 504 | hfsplus_dump_extent(hip->first_extents); |
478 | HFSPLUS_I(inode).first_blocks = blk_cnt; | 505 | hip->first_blocks = blk_cnt; |
479 | break; | 506 | break; |
480 | } | 507 | } |
481 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); | 508 | res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt); |
482 | if (res) | 509 | if (res) |
483 | break; | 510 | break; |
484 | start = HFSPLUS_I(inode).cached_start; | 511 | start = hip->cached_start; |
485 | hfsplus_free_extents(sb, HFSPLUS_I(inode).cached_extents, | 512 | hfsplus_free_extents(sb, hip->cached_extents, |
486 | alloc_cnt - start, alloc_cnt - blk_cnt); | 513 | alloc_cnt - start, alloc_cnt - blk_cnt); |
487 | hfsplus_dump_extent(HFSPLUS_I(inode).cached_extents); | 514 | hfsplus_dump_extent(hip->cached_extents); |
488 | if (blk_cnt > start) { | 515 | if (blk_cnt > start) { |
489 | HFSPLUS_I(inode).flags |= HFSPLUS_FLG_EXT_DIRTY; | 516 | hip->flags |= HFSPLUS_FLG_EXT_DIRTY; |
490 | break; | 517 | break; |
491 | } | 518 | } |
492 | alloc_cnt = start; | 519 | alloc_cnt = start; |
493 | HFSPLUS_I(inode).cached_start = HFSPLUS_I(inode).cached_blocks = 0; | 520 | hip->cached_start = hip->cached_blocks = 0; |
494 | HFSPLUS_I(inode).flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); | 521 | hip->flags &= ~(HFSPLUS_FLG_EXT_DIRTY | HFSPLUS_FLG_EXT_NEW); |
495 | hfs_brec_remove(&fd); | 522 | hfs_brec_remove(&fd); |
496 | } | 523 | } |
497 | hfs_find_exit(&fd); | 524 | hfs_find_exit(&fd); |
498 | mutex_unlock(&HFSPLUS_I(inode).extents_lock); | 525 | mutex_unlock(&hip->extents_lock); |
499 | 526 | ||
500 | HFSPLUS_I(inode).alloc_blocks = blk_cnt; | 527 | hip->alloc_blocks = blk_cnt; |
501 | out: | 528 | out: |
502 | HFSPLUS_I(inode).phys_size = inode->i_size; | 529 | hip->phys_size = inode->i_size; |
503 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 530 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
504 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 531 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
505 | mark_inode_dirty(inode); | 532 | mark_inode_dirty(inode); |
506 | } | 533 | } |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index dc856be3c2b0..cb3653efb57a 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -62,7 +62,7 @@ struct hfs_btree { | |||
62 | unsigned int depth; | 62 | unsigned int depth; |
63 | 63 | ||
64 | //unsigned int map1_size, map_size; | 64 | //unsigned int map1_size, map_size; |
65 | struct semaphore tree_lock; | 65 | struct mutex tree_lock; |
66 | 66 | ||
67 | unsigned int pages_per_bnode; | 67 | unsigned int pages_per_bnode; |
68 | spinlock_t hash_lock; | 68 | spinlock_t hash_lock; |
@@ -121,16 +121,21 @@ struct hfsplus_sb_info { | |||
121 | u32 sect_count; | 121 | u32 sect_count; |
122 | int fs_shift; | 122 | int fs_shift; |
123 | 123 | ||
124 | /* Stuff in host order from Vol Header */ | 124 | /* immutable data from the volume header */ |
125 | u32 alloc_blksz; | 125 | u32 alloc_blksz; |
126 | int alloc_blksz_shift; | 126 | int alloc_blksz_shift; |
127 | u32 total_blocks; | 127 | u32 total_blocks; |
128 | u32 data_clump_blocks, rsrc_clump_blocks; | ||
129 | |||
130 | /* mutable data from the volume header, protected by alloc_mutex */ | ||
128 | u32 free_blocks; | 131 | u32 free_blocks; |
129 | u32 next_alloc; | 132 | struct mutex alloc_mutex; |
133 | |||
134 | /* mutable data from the volume header, protected by vh_mutex */ | ||
130 | u32 next_cnid; | 135 | u32 next_cnid; |
131 | u32 file_count; | 136 | u32 file_count; |
132 | u32 folder_count; | 137 | u32 folder_count; |
133 | u32 data_clump_blocks, rsrc_clump_blocks; | 138 | struct mutex vh_mutex; |
134 | 139 | ||
135 | /* Config options */ | 140 | /* Config options */ |
136 | u32 creator; | 141 | u32 creator; |
@@ -143,40 +148,50 @@ struct hfsplus_sb_info { | |||
143 | int part, session; | 148 | int part, session; |
144 | 149 | ||
145 | unsigned long flags; | 150 | unsigned long flags; |
146 | |||
147 | struct hlist_head rsrc_inodes; | ||
148 | }; | 151 | }; |
149 | 152 | ||
150 | #define HFSPLUS_SB_WRITEBACKUP 0x0001 | 153 | #define HFSPLUS_SB_WRITEBACKUP 0 |
151 | #define HFSPLUS_SB_NODECOMPOSE 0x0002 | 154 | #define HFSPLUS_SB_NODECOMPOSE 1 |
152 | #define HFSPLUS_SB_FORCE 0x0004 | 155 | #define HFSPLUS_SB_FORCE 2 |
153 | #define HFSPLUS_SB_HFSX 0x0008 | 156 | #define HFSPLUS_SB_HFSX 3 |
154 | #define HFSPLUS_SB_CASEFOLD 0x0010 | 157 | #define HFSPLUS_SB_CASEFOLD 4 |
155 | 158 | ||
156 | 159 | ||
157 | struct hfsplus_inode_info { | 160 | struct hfsplus_inode_info { |
158 | struct mutex extents_lock; | ||
159 | u32 clump_blocks, alloc_blocks; | ||
160 | sector_t fs_blocks; | ||
161 | /* Allocation extents from catalog record or volume header */ | ||
162 | hfsplus_extent_rec first_extents; | ||
163 | u32 first_blocks; | ||
164 | hfsplus_extent_rec cached_extents; | ||
165 | u32 cached_start, cached_blocks; | ||
166 | atomic_t opencnt; | 161 | atomic_t opencnt; |
167 | 162 | ||
168 | struct inode *rsrc_inode; | 163 | /* |
164 | * Extent allocation information, protected by extents_lock. | ||
165 | */ | ||
166 | u32 first_blocks; | ||
167 | u32 clump_blocks; | ||
168 | u32 alloc_blocks; | ||
169 | u32 cached_start; | ||
170 | u32 cached_blocks; | ||
171 | hfsplus_extent_rec first_extents; | ||
172 | hfsplus_extent_rec cached_extents; | ||
169 | unsigned long flags; | 173 | unsigned long flags; |
174 | struct mutex extents_lock; | ||
170 | 175 | ||
176 | /* | ||
177 | * Immutable data. | ||
178 | */ | ||
179 | struct inode *rsrc_inode; | ||
171 | __be32 create_date; | 180 | __be32 create_date; |
172 | /* Device number in hfsplus_permissions in catalog */ | ||
173 | u32 dev; | ||
174 | /* BSD system and user file flags */ | ||
175 | u8 rootflags; | ||
176 | u8 userflags; | ||
177 | 181 | ||
182 | /* | ||
183 | * Protected by sbi->vh_mutex. | ||
184 | */ | ||
185 | u32 linkid; | ||
186 | |||
187 | /* | ||
188 | * Protected by i_mutex. | ||
189 | */ | ||
190 | sector_t fs_blocks; | ||
191 | u8 userflags; /* BSD user file flags */ | ||
178 | struct list_head open_dir_list; | 192 | struct list_head open_dir_list; |
179 | loff_t phys_size; | 193 | loff_t phys_size; |
194 | |||
180 | struct inode vfs_inode; | 195 | struct inode vfs_inode; |
181 | }; | 196 | }; |
182 | 197 | ||
@@ -184,8 +199,8 @@ struct hfsplus_inode_info { | |||
184 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 | 199 | #define HFSPLUS_FLG_EXT_DIRTY 0x0002 |
185 | #define HFSPLUS_FLG_EXT_NEW 0x0004 | 200 | #define HFSPLUS_FLG_EXT_NEW 0x0004 |
186 | 201 | ||
187 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC)) | 202 | #define HFSPLUS_IS_DATA(inode) (!(HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC)) |
188 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode).flags & HFSPLUS_FLG_RSRC) | 203 | #define HFSPLUS_IS_RSRC(inode) (HFSPLUS_I(inode)->flags & HFSPLUS_FLG_RSRC) |
189 | 204 | ||
190 | struct hfs_find_data { | 205 | struct hfs_find_data { |
191 | /* filled by caller */ | 206 | /* filled by caller */ |
@@ -311,6 +326,7 @@ int hfsplus_create_cat(u32, struct inode *, struct qstr *, struct inode *); | |||
311 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); | 326 | int hfsplus_delete_cat(u32, struct inode *, struct qstr *); |
312 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, | 327 | int hfsplus_rename_cat(u32, struct inode *, struct qstr *, |
313 | struct inode *, struct qstr *); | 328 | struct inode *, struct qstr *); |
329 | void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); | ||
314 | 330 | ||
315 | /* dir.c */ | 331 | /* dir.c */ |
316 | extern const struct inode_operations hfsplus_dir_inode_operations; | 332 | extern const struct inode_operations hfsplus_dir_inode_operations; |
@@ -372,26 +388,15 @@ int hfsplus_read_wrapper(struct super_block *); | |||
372 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); | 388 | int hfs_part_find(struct super_block *, sector_t *, sector_t *); |
373 | 389 | ||
374 | /* access macros */ | 390 | /* access macros */ |
375 | /* | ||
376 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) | 391 | static inline struct hfsplus_sb_info *HFSPLUS_SB(struct super_block *sb) |
377 | { | 392 | { |
378 | return sb->s_fs_info; | 393 | return sb->s_fs_info; |
379 | } | 394 | } |
395 | |||
380 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | 396 | static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) |
381 | { | 397 | { |
382 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); | 398 | return list_entry(inode, struct hfsplus_inode_info, vfs_inode); |
383 | } | 399 | } |
384 | */ | ||
385 | #define HFSPLUS_SB(super) (*(struct hfsplus_sb_info *)(super)->s_fs_info) | ||
386 | #define HFSPLUS_I(inode) (*list_entry(inode, struct hfsplus_inode_info, vfs_inode)) | ||
387 | |||
388 | #if 1 | ||
389 | #define hfsplus_kmap(p) ({ struct page *__p = (p); kmap(__p); }) | ||
390 | #define hfsplus_kunmap(p) ({ struct page *__p = (p); kunmap(__p); __p; }) | ||
391 | #else | ||
392 | #define hfsplus_kmap(p) kmap(p) | ||
393 | #define hfsplus_kunmap(p) kunmap(p) | ||
394 | #endif | ||
395 | 400 | ||
396 | #define sb_bread512(sb, sec, data) ({ \ | 401 | #define sb_bread512(sb, sec, data) ({ \ |
397 | struct buffer_head *__bh; \ | 402 | struct buffer_head *__bh; \ |
@@ -419,6 +424,4 @@ static inline struct hfsplus_inode_info *HFSPLUS_I(struct inode *inode) | |||
419 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) | 424 | #define hfsp_ut2mt(t) __hfsp_ut2mt((t).tv_sec) |
420 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) | 425 | #define hfsp_now2mt() __hfsp_ut2mt(get_seconds()) |
421 | 426 | ||
422 | #define kdev_t_to_nr(x) (x) | ||
423 | |||
424 | #endif | 427 | #endif |
diff --git a/fs/hfsplus/hfsplus_raw.h b/fs/hfsplus/hfsplus_raw.h index fe99fe8db61a..6892899fd6fb 100644 --- a/fs/hfsplus/hfsplus_raw.h +++ b/fs/hfsplus/hfsplus_raw.h | |||
@@ -200,6 +200,7 @@ struct hfsplus_cat_key { | |||
200 | struct hfsplus_unistr name; | 200 | struct hfsplus_unistr name; |
201 | } __packed; | 201 | } __packed; |
202 | 202 | ||
203 | #define HFSPLUS_CAT_KEYLEN (sizeof(struct hfsplus_cat_key)) | ||
203 | 204 | ||
204 | /* Structs from hfs.h */ | 205 | /* Structs from hfs.h */ |
205 | struct hfsp_point { | 206 | struct hfsp_point { |
@@ -323,7 +324,7 @@ struct hfsplus_ext_key { | |||
323 | __be32 start_block; | 324 | __be32 start_block; |
324 | } __packed; | 325 | } __packed; |
325 | 326 | ||
326 | #define HFSPLUS_EXT_KEYLEN 12 | 327 | #define HFSPLUS_EXT_KEYLEN sizeof(struct hfsplus_ext_key) |
327 | 328 | ||
328 | /* HFS+ generic BTree key */ | 329 | /* HFS+ generic BTree key */ |
329 | typedef union { | 330 | typedef union { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index c5a979d62c65..8afd7e84f98d 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
@@ -36,7 +36,7 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, | |||
36 | *pagep = NULL; | 36 | *pagep = NULL; |
37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, | 37 | ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, |
38 | hfsplus_get_block, | 38 | hfsplus_get_block, |
39 | &HFSPLUS_I(mapping->host).phys_size); | 39 | &HFSPLUS_I(mapping->host)->phys_size); |
40 | if (unlikely(ret)) { | 40 | if (unlikely(ret)) { |
41 | loff_t isize = mapping->host->i_size; | 41 | loff_t isize = mapping->host->i_size; |
42 | if (pos + len > isize) | 42 | if (pos + len > isize) |
@@ -62,13 +62,13 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) | |||
62 | 62 | ||
63 | switch (inode->i_ino) { | 63 | switch (inode->i_ino) { |
64 | case HFSPLUS_EXT_CNID: | 64 | case HFSPLUS_EXT_CNID: |
65 | tree = HFSPLUS_SB(sb).ext_tree; | 65 | tree = HFSPLUS_SB(sb)->ext_tree; |
66 | break; | 66 | break; |
67 | case HFSPLUS_CAT_CNID: | 67 | case HFSPLUS_CAT_CNID: |
68 | tree = HFSPLUS_SB(sb).cat_tree; | 68 | tree = HFSPLUS_SB(sb)->cat_tree; |
69 | break; | 69 | break; |
70 | case HFSPLUS_ATTR_CNID: | 70 | case HFSPLUS_ATTR_CNID: |
71 | tree = HFSPLUS_SB(sb).attr_tree; | 71 | tree = HFSPLUS_SB(sb)->attr_tree; |
72 | break; | 72 | break; |
73 | default: | 73 | default: |
74 | BUG(); | 74 | BUG(); |
@@ -172,12 +172,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
172 | struct hfs_find_data fd; | 172 | struct hfs_find_data fd; |
173 | struct super_block *sb = dir->i_sb; | 173 | struct super_block *sb = dir->i_sb; |
174 | struct inode *inode = NULL; | 174 | struct inode *inode = NULL; |
175 | struct hfsplus_inode_info *hip; | ||
175 | int err; | 176 | int err; |
176 | 177 | ||
177 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) | 178 | if (HFSPLUS_IS_RSRC(dir) || strcmp(dentry->d_name.name, "rsrc")) |
178 | goto out; | 179 | goto out; |
179 | 180 | ||
180 | inode = HFSPLUS_I(dir).rsrc_inode; | 181 | inode = HFSPLUS_I(dir)->rsrc_inode; |
181 | if (inode) | 182 | if (inode) |
182 | goto out; | 183 | goto out; |
183 | 184 | ||
@@ -185,12 +186,13 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
185 | if (!inode) | 186 | if (!inode) |
186 | return ERR_PTR(-ENOMEM); | 187 | return ERR_PTR(-ENOMEM); |
187 | 188 | ||
189 | hip = HFSPLUS_I(inode); | ||
188 | inode->i_ino = dir->i_ino; | 190 | inode->i_ino = dir->i_ino; |
189 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 191 | INIT_LIST_HEAD(&hip->open_dir_list); |
190 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 192 | mutex_init(&hip->extents_lock); |
191 | HFSPLUS_I(inode).flags = HFSPLUS_FLG_RSRC; | 193 | hip->flags = HFSPLUS_FLG_RSRC; |
192 | 194 | ||
193 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 195 | hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
194 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); | 196 | err = hfsplus_find_cat(sb, dir->i_ino, &fd); |
195 | if (!err) | 197 | if (!err) |
196 | err = hfsplus_cat_read_inode(inode, &fd); | 198 | err = hfsplus_cat_read_inode(inode, &fd); |
@@ -199,10 +201,18 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent | |||
199 | iput(inode); | 201 | iput(inode); |
200 | return ERR_PTR(err); | 202 | return ERR_PTR(err); |
201 | } | 203 | } |
202 | HFSPLUS_I(inode).rsrc_inode = dir; | 204 | hip->rsrc_inode = dir; |
203 | HFSPLUS_I(dir).rsrc_inode = inode; | 205 | HFSPLUS_I(dir)->rsrc_inode = inode; |
204 | igrab(dir); | 206 | igrab(dir); |
205 | hlist_add_head(&inode->i_hash, &HFSPLUS_SB(sb).rsrc_inodes); | 207 | |
208 | /* | ||
209 | * __mark_inode_dirty expects inodes to be hashed. Since we don't | ||
210 | * want resource fork inodes in the regular inode space, we make them | ||
211 | * appear hashed, but do not put on any lists. hlist_del() | ||
212 | * will work fine and require no locking. | ||
213 | */ | ||
214 | hlist_add_fake(&inode->i_hash); | ||
215 | |||
206 | mark_inode_dirty(inode); | 216 | mark_inode_dirty(inode); |
207 | out: | 217 | out: |
208 | d_add(dentry, inode); | 218 | d_add(dentry, inode); |
@@ -211,30 +221,27 @@ out: | |||
211 | 221 | ||
212 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) | 222 | static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, int dir) |
213 | { | 223 | { |
214 | struct super_block *sb = inode->i_sb; | 224 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
215 | u16 mode; | 225 | u16 mode; |
216 | 226 | ||
217 | mode = be16_to_cpu(perms->mode); | 227 | mode = be16_to_cpu(perms->mode); |
218 | 228 | ||
219 | inode->i_uid = be32_to_cpu(perms->owner); | 229 | inode->i_uid = be32_to_cpu(perms->owner); |
220 | if (!inode->i_uid && !mode) | 230 | if (!inode->i_uid && !mode) |
221 | inode->i_uid = HFSPLUS_SB(sb).uid; | 231 | inode->i_uid = sbi->uid; |
222 | 232 | ||
223 | inode->i_gid = be32_to_cpu(perms->group); | 233 | inode->i_gid = be32_to_cpu(perms->group); |
224 | if (!inode->i_gid && !mode) | 234 | if (!inode->i_gid && !mode) |
225 | inode->i_gid = HFSPLUS_SB(sb).gid; | 235 | inode->i_gid = sbi->gid; |
226 | 236 | ||
227 | if (dir) { | 237 | if (dir) { |
228 | mode = mode ? (mode & S_IALLUGO) : | 238 | mode = mode ? (mode & S_IALLUGO) : (S_IRWXUGO & ~(sbi->umask)); |
229 | (S_IRWXUGO & ~(HFSPLUS_SB(sb).umask)); | ||
230 | mode |= S_IFDIR; | 239 | mode |= S_IFDIR; |
231 | } else if (!mode) | 240 | } else if (!mode) |
232 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & | 241 | mode = S_IFREG | ((S_IRUGO|S_IWUGO) & ~(sbi->umask)); |
233 | ~(HFSPLUS_SB(sb).umask)); | ||
234 | inode->i_mode = mode; | 242 | inode->i_mode = mode; |
235 | 243 | ||
236 | HFSPLUS_I(inode).rootflags = perms->rootflags; | 244 | HFSPLUS_I(inode)->userflags = perms->userflags; |
237 | HFSPLUS_I(inode).userflags = perms->userflags; | ||
238 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) | 245 | if (perms->rootflags & HFSPLUS_FLG_IMMUTABLE) |
239 | inode->i_flags |= S_IMMUTABLE; | 246 | inode->i_flags |= S_IMMUTABLE; |
240 | else | 247 | else |
@@ -245,30 +252,13 @@ static void hfsplus_get_perms(struct inode *inode, struct hfsplus_perm *perms, i | |||
245 | inode->i_flags &= ~S_APPEND; | 252 | inode->i_flags &= ~S_APPEND; |
246 | } | 253 | } |
247 | 254 | ||
248 | static void hfsplus_set_perms(struct inode *inode, struct hfsplus_perm *perms) | ||
249 | { | ||
250 | if (inode->i_flags & S_IMMUTABLE) | ||
251 | perms->rootflags |= HFSPLUS_FLG_IMMUTABLE; | ||
252 | else | ||
253 | perms->rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | ||
254 | if (inode->i_flags & S_APPEND) | ||
255 | perms->rootflags |= HFSPLUS_FLG_APPEND; | ||
256 | else | ||
257 | perms->rootflags &= ~HFSPLUS_FLG_APPEND; | ||
258 | perms->userflags = HFSPLUS_I(inode).userflags; | ||
259 | perms->mode = cpu_to_be16(inode->i_mode); | ||
260 | perms->owner = cpu_to_be32(inode->i_uid); | ||
261 | perms->group = cpu_to_be32(inode->i_gid); | ||
262 | perms->dev = cpu_to_be32(HFSPLUS_I(inode).dev); | ||
263 | } | ||
264 | |||
265 | static int hfsplus_file_open(struct inode *inode, struct file *file) | 255 | static int hfsplus_file_open(struct inode *inode, struct file *file) |
266 | { | 256 | { |
267 | if (HFSPLUS_IS_RSRC(inode)) | 257 | if (HFSPLUS_IS_RSRC(inode)) |
268 | inode = HFSPLUS_I(inode).rsrc_inode; | 258 | inode = HFSPLUS_I(inode)->rsrc_inode; |
269 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | 259 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) |
270 | return -EOVERFLOW; | 260 | return -EOVERFLOW; |
271 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 261 | atomic_inc(&HFSPLUS_I(inode)->opencnt); |
272 | return 0; | 262 | return 0; |
273 | } | 263 | } |
274 | 264 | ||
@@ -277,12 +267,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file) | |||
277 | struct super_block *sb = inode->i_sb; | 267 | struct super_block *sb = inode->i_sb; |
278 | 268 | ||
279 | if (HFSPLUS_IS_RSRC(inode)) | 269 | if (HFSPLUS_IS_RSRC(inode)) |
280 | inode = HFSPLUS_I(inode).rsrc_inode; | 270 | inode = HFSPLUS_I(inode)->rsrc_inode; |
281 | if (atomic_dec_and_test(&HFSPLUS_I(inode).opencnt)) { | 271 | if (atomic_dec_and_test(&HFSPLUS_I(inode)->opencnt)) { |
282 | mutex_lock(&inode->i_mutex); | 272 | mutex_lock(&inode->i_mutex); |
283 | hfsplus_file_truncate(inode); | 273 | hfsplus_file_truncate(inode); |
284 | if (inode->i_flags & S_DEAD) { | 274 | if (inode->i_flags & S_DEAD) { |
285 | hfsplus_delete_cat(inode->i_ino, HFSPLUS_SB(sb).hidden_dir, NULL); | 275 | hfsplus_delete_cat(inode->i_ino, |
276 | HFSPLUS_SB(sb)->hidden_dir, NULL); | ||
286 | hfsplus_delete_inode(inode); | 277 | hfsplus_delete_inode(inode); |
287 | } | 278 | } |
288 | mutex_unlock(&inode->i_mutex); | 279 | mutex_unlock(&inode->i_mutex); |
@@ -361,47 +352,52 @@ static const struct file_operations hfsplus_file_operations = { | |||
361 | 352 | ||
362 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) | 353 | struct inode *hfsplus_new_inode(struct super_block *sb, int mode) |
363 | { | 354 | { |
355 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
364 | struct inode *inode = new_inode(sb); | 356 | struct inode *inode = new_inode(sb); |
357 | struct hfsplus_inode_info *hip; | ||
358 | |||
365 | if (!inode) | 359 | if (!inode) |
366 | return NULL; | 360 | return NULL; |
367 | 361 | ||
368 | inode->i_ino = HFSPLUS_SB(sb).next_cnid++; | 362 | inode->i_ino = sbi->next_cnid++; |
369 | inode->i_mode = mode; | 363 | inode->i_mode = mode; |
370 | inode->i_uid = current_fsuid(); | 364 | inode->i_uid = current_fsuid(); |
371 | inode->i_gid = current_fsgid(); | 365 | inode->i_gid = current_fsgid(); |
372 | inode->i_nlink = 1; | 366 | inode->i_nlink = 1; |
373 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; | 367 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC; |
374 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 368 | |
375 | mutex_init(&HFSPLUS_I(inode).extents_lock); | 369 | hip = HFSPLUS_I(inode); |
376 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | 370 | INIT_LIST_HEAD(&hip->open_dir_list); |
377 | HFSPLUS_I(inode).flags = 0; | 371 | mutex_init(&hip->extents_lock); |
378 | memset(HFSPLUS_I(inode).first_extents, 0, sizeof(hfsplus_extent_rec)); | 372 | atomic_set(&hip->opencnt, 0); |
379 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 373 | hip->flags = 0; |
380 | HFSPLUS_I(inode).alloc_blocks = 0; | 374 | memset(hip->first_extents, 0, sizeof(hfsplus_extent_rec)); |
381 | HFSPLUS_I(inode).first_blocks = 0; | 375 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
382 | HFSPLUS_I(inode).cached_start = 0; | 376 | hip->alloc_blocks = 0; |
383 | HFSPLUS_I(inode).cached_blocks = 0; | 377 | hip->first_blocks = 0; |
384 | HFSPLUS_I(inode).phys_size = 0; | 378 | hip->cached_start = 0; |
385 | HFSPLUS_I(inode).fs_blocks = 0; | 379 | hip->cached_blocks = 0; |
386 | HFSPLUS_I(inode).rsrc_inode = NULL; | 380 | hip->phys_size = 0; |
381 | hip->fs_blocks = 0; | ||
382 | hip->rsrc_inode = NULL; | ||
387 | if (S_ISDIR(inode->i_mode)) { | 383 | if (S_ISDIR(inode->i_mode)) { |
388 | inode->i_size = 2; | 384 | inode->i_size = 2; |
389 | HFSPLUS_SB(sb).folder_count++; | 385 | sbi->folder_count++; |
390 | inode->i_op = &hfsplus_dir_inode_operations; | 386 | inode->i_op = &hfsplus_dir_inode_operations; |
391 | inode->i_fop = &hfsplus_dir_operations; | 387 | inode->i_fop = &hfsplus_dir_operations; |
392 | } else if (S_ISREG(inode->i_mode)) { | 388 | } else if (S_ISREG(inode->i_mode)) { |
393 | HFSPLUS_SB(sb).file_count++; | 389 | sbi->file_count++; |
394 | inode->i_op = &hfsplus_file_inode_operations; | 390 | inode->i_op = &hfsplus_file_inode_operations; |
395 | inode->i_fop = &hfsplus_file_operations; | 391 | inode->i_fop = &hfsplus_file_operations; |
396 | inode->i_mapping->a_ops = &hfsplus_aops; | 392 | inode->i_mapping->a_ops = &hfsplus_aops; |
397 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_SB(sb).data_clump_blocks; | 393 | hip->clump_blocks = sbi->data_clump_blocks; |
398 | } else if (S_ISLNK(inode->i_mode)) { | 394 | } else if (S_ISLNK(inode->i_mode)) { |
399 | HFSPLUS_SB(sb).file_count++; | 395 | sbi->file_count++; |
400 | inode->i_op = &page_symlink_inode_operations; | 396 | inode->i_op = &page_symlink_inode_operations; |
401 | inode->i_mapping->a_ops = &hfsplus_aops; | 397 | inode->i_mapping->a_ops = &hfsplus_aops; |
402 | HFSPLUS_I(inode).clump_blocks = 1; | 398 | hip->clump_blocks = 1; |
403 | } else | 399 | } else |
404 | HFSPLUS_SB(sb).file_count++; | 400 | sbi->file_count++; |
405 | insert_inode_hash(inode); | 401 | insert_inode_hash(inode); |
406 | mark_inode_dirty(inode); | 402 | mark_inode_dirty(inode); |
407 | sb->s_dirt = 1; | 403 | sb->s_dirt = 1; |
@@ -414,11 +410,11 @@ void hfsplus_delete_inode(struct inode *inode) | |||
414 | struct super_block *sb = inode->i_sb; | 410 | struct super_block *sb = inode->i_sb; |
415 | 411 | ||
416 | if (S_ISDIR(inode->i_mode)) { | 412 | if (S_ISDIR(inode->i_mode)) { |
417 | HFSPLUS_SB(sb).folder_count--; | 413 | HFSPLUS_SB(sb)->folder_count--; |
418 | sb->s_dirt = 1; | 414 | sb->s_dirt = 1; |
419 | return; | 415 | return; |
420 | } | 416 | } |
421 | HFSPLUS_SB(sb).file_count--; | 417 | HFSPLUS_SB(sb)->file_count--; |
422 | if (S_ISREG(inode->i_mode)) { | 418 | if (S_ISREG(inode->i_mode)) { |
423 | if (!inode->i_nlink) { | 419 | if (!inode->i_nlink) { |
424 | inode->i_size = 0; | 420 | inode->i_size = 0; |
@@ -434,34 +430,39 @@ void hfsplus_delete_inode(struct inode *inode) | |||
434 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 430 | void hfsplus_inode_read_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
435 | { | 431 | { |
436 | struct super_block *sb = inode->i_sb; | 432 | struct super_block *sb = inode->i_sb; |
433 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
434 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
437 | u32 count; | 435 | u32 count; |
438 | int i; | 436 | int i; |
439 | 437 | ||
440 | memcpy(&HFSPLUS_I(inode).first_extents, &fork->extents, | 438 | memcpy(&hip->first_extents, &fork->extents, sizeof(hfsplus_extent_rec)); |
441 | sizeof(hfsplus_extent_rec)); | ||
442 | for (count = 0, i = 0; i < 8; i++) | 439 | for (count = 0, i = 0; i < 8; i++) |
443 | count += be32_to_cpu(fork->extents[i].block_count); | 440 | count += be32_to_cpu(fork->extents[i].block_count); |
444 | HFSPLUS_I(inode).first_blocks = count; | 441 | hip->first_blocks = count; |
445 | memset(HFSPLUS_I(inode).cached_extents, 0, sizeof(hfsplus_extent_rec)); | 442 | memset(hip->cached_extents, 0, sizeof(hfsplus_extent_rec)); |
446 | HFSPLUS_I(inode).cached_start = 0; | 443 | hip->cached_start = 0; |
447 | HFSPLUS_I(inode).cached_blocks = 0; | 444 | hip->cached_blocks = 0; |
448 | 445 | ||
449 | HFSPLUS_I(inode).alloc_blocks = be32_to_cpu(fork->total_blocks); | 446 | hip->alloc_blocks = be32_to_cpu(fork->total_blocks); |
450 | inode->i_size = HFSPLUS_I(inode).phys_size = be64_to_cpu(fork->total_size); | 447 | hip->phys_size = inode->i_size = be64_to_cpu(fork->total_size); |
451 | HFSPLUS_I(inode).fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; | 448 | hip->fs_blocks = |
452 | inode_set_bytes(inode, HFSPLUS_I(inode).fs_blocks << sb->s_blocksize_bits); | 449 | (inode->i_size + sb->s_blocksize - 1) >> sb->s_blocksize_bits; |
453 | HFSPLUS_I(inode).clump_blocks = be32_to_cpu(fork->clump_size) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 450 | inode_set_bytes(inode, hip->fs_blocks << sb->s_blocksize_bits); |
454 | if (!HFSPLUS_I(inode).clump_blocks) | 451 | hip->clump_blocks = |
455 | HFSPLUS_I(inode).clump_blocks = HFSPLUS_IS_RSRC(inode) ? HFSPLUS_SB(sb).rsrc_clump_blocks : | 452 | be32_to_cpu(fork->clump_size) >> sbi->alloc_blksz_shift; |
456 | HFSPLUS_SB(sb).data_clump_blocks; | 453 | if (!hip->clump_blocks) { |
454 | hip->clump_blocks = HFSPLUS_IS_RSRC(inode) ? | ||
455 | sbi->rsrc_clump_blocks : | ||
456 | sbi->data_clump_blocks; | ||
457 | } | ||
457 | } | 458 | } |
458 | 459 | ||
459 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) | 460 | void hfsplus_inode_write_fork(struct inode *inode, struct hfsplus_fork_raw *fork) |
460 | { | 461 | { |
461 | memcpy(&fork->extents, &HFSPLUS_I(inode).first_extents, | 462 | memcpy(&fork->extents, &HFSPLUS_I(inode)->first_extents, |
462 | sizeof(hfsplus_extent_rec)); | 463 | sizeof(hfsplus_extent_rec)); |
463 | fork->total_size = cpu_to_be64(inode->i_size); | 464 | fork->total_size = cpu_to_be64(inode->i_size); |
464 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode).alloc_blocks); | 465 | fork->total_blocks = cpu_to_be32(HFSPLUS_I(inode)->alloc_blocks); |
465 | } | 466 | } |
466 | 467 | ||
467 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | 468 | int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) |
@@ -472,7 +473,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
472 | 473 | ||
473 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); | 474 | type = hfs_bnode_read_u16(fd->bnode, fd->entryoffset); |
474 | 475 | ||
475 | HFSPLUS_I(inode).dev = 0; | 476 | HFSPLUS_I(inode)->linkid = 0; |
476 | if (type == HFSPLUS_FOLDER) { | 477 | if (type == HFSPLUS_FOLDER) { |
477 | struct hfsplus_cat_folder *folder = &entry.folder; | 478 | struct hfsplus_cat_folder *folder = &entry.folder; |
478 | 479 | ||
@@ -486,8 +487,8 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
486 | inode->i_atime = hfsp_mt2ut(folder->access_date); | 487 | inode->i_atime = hfsp_mt2ut(folder->access_date); |
487 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); | 488 | inode->i_mtime = hfsp_mt2ut(folder->content_mod_date); |
488 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); | 489 | inode->i_ctime = hfsp_mt2ut(folder->attribute_mod_date); |
489 | HFSPLUS_I(inode).create_date = folder->create_date; | 490 | HFSPLUS_I(inode)->create_date = folder->create_date; |
490 | HFSPLUS_I(inode).fs_blocks = 0; | 491 | HFSPLUS_I(inode)->fs_blocks = 0; |
491 | inode->i_op = &hfsplus_dir_inode_operations; | 492 | inode->i_op = &hfsplus_dir_inode_operations; |
492 | inode->i_fop = &hfsplus_dir_operations; | 493 | inode->i_fop = &hfsplus_dir_operations; |
493 | } else if (type == HFSPLUS_FILE) { | 494 | } else if (type == HFSPLUS_FILE) { |
@@ -518,7 +519,7 @@ int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd) | |||
518 | inode->i_atime = hfsp_mt2ut(file->access_date); | 519 | inode->i_atime = hfsp_mt2ut(file->access_date); |
519 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); | 520 | inode->i_mtime = hfsp_mt2ut(file->content_mod_date); |
520 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); | 521 | inode->i_ctime = hfsp_mt2ut(file->attribute_mod_date); |
521 | HFSPLUS_I(inode).create_date = file->create_date; | 522 | HFSPLUS_I(inode)->create_date = file->create_date; |
522 | } else { | 523 | } else { |
523 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); | 524 | printk(KERN_ERR "hfs: bad catalog entry used to create inode\n"); |
524 | res = -EIO; | 525 | res = -EIO; |
@@ -533,12 +534,12 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
533 | hfsplus_cat_entry entry; | 534 | hfsplus_cat_entry entry; |
534 | 535 | ||
535 | if (HFSPLUS_IS_RSRC(inode)) | 536 | if (HFSPLUS_IS_RSRC(inode)) |
536 | main_inode = HFSPLUS_I(inode).rsrc_inode; | 537 | main_inode = HFSPLUS_I(inode)->rsrc_inode; |
537 | 538 | ||
538 | if (!main_inode->i_nlink) | 539 | if (!main_inode->i_nlink) |
539 | return 0; | 540 | return 0; |
540 | 541 | ||
541 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb).cat_tree, &fd)) | 542 | if (hfs_find_init(HFSPLUS_SB(main_inode->i_sb)->cat_tree, &fd)) |
542 | /* panic? */ | 543 | /* panic? */ |
543 | return -EIO; | 544 | return -EIO; |
544 | 545 | ||
@@ -554,7 +555,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
554 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 555 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
555 | sizeof(struct hfsplus_cat_folder)); | 556 | sizeof(struct hfsplus_cat_folder)); |
556 | /* simple node checks? */ | 557 | /* simple node checks? */ |
557 | hfsplus_set_perms(inode, &folder->permissions); | 558 | hfsplus_cat_set_perms(inode, &folder->permissions); |
558 | folder->access_date = hfsp_ut2mt(inode->i_atime); | 559 | folder->access_date = hfsp_ut2mt(inode->i_atime); |
559 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); | 560 | folder->content_mod_date = hfsp_ut2mt(inode->i_mtime); |
560 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); | 561 | folder->attribute_mod_date = hfsp_ut2mt(inode->i_ctime); |
@@ -576,11 +577,7 @@ int hfsplus_cat_write_inode(struct inode *inode) | |||
576 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, | 577 | hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, |
577 | sizeof(struct hfsplus_cat_file)); | 578 | sizeof(struct hfsplus_cat_file)); |
578 | hfsplus_inode_write_fork(inode, &file->data_fork); | 579 | hfsplus_inode_write_fork(inode, &file->data_fork); |
579 | if (S_ISREG(inode->i_mode)) | 580 | hfsplus_cat_set_perms(inode, &file->permissions); |
580 | HFSPLUS_I(inode).dev = inode->i_nlink; | ||
581 | if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) | ||
582 | HFSPLUS_I(inode).dev = kdev_t_to_nr(inode->i_rdev); | ||
583 | hfsplus_set_perms(inode, &file->permissions); | ||
584 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) | 581 | if ((file->permissions.rootflags | file->permissions.userflags) & HFSPLUS_FLG_IMMUTABLE) |
585 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); | 582 | file->flags |= cpu_to_be16(HFSPLUS_FILE_LOCKED); |
586 | else | 583 | else |
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c index ac405f099026..5b4667e08ef7 100644 --- a/fs/hfsplus/ioctl.c +++ b/fs/hfsplus/ioctl.c | |||
@@ -17,83 +17,98 @@ | |||
17 | #include <linux/mount.h> | 17 | #include <linux/mount.h> |
18 | #include <linux/sched.h> | 18 | #include <linux/sched.h> |
19 | #include <linux/xattr.h> | 19 | #include <linux/xattr.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <asm/uaccess.h> | 20 | #include <asm/uaccess.h> |
22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
23 | 22 | ||
24 | long hfsplus_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 23 | static int hfsplus_ioctl_getflags(struct file *file, int __user *user_flags) |
25 | { | 24 | { |
26 | struct inode *inode = filp->f_path.dentry->d_inode; | 25 | struct inode *inode = file->f_path.dentry->d_inode; |
26 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
27 | unsigned int flags = 0; | ||
28 | |||
29 | if (inode->i_flags & S_IMMUTABLE) | ||
30 | flags |= FS_IMMUTABLE_FL; | ||
31 | if (inode->i_flags |= S_APPEND) | ||
32 | flags |= FS_APPEND_FL; | ||
33 | if (hip->userflags & HFSPLUS_FLG_NODUMP) | ||
34 | flags |= FS_NODUMP_FL; | ||
35 | |||
36 | return put_user(flags, user_flags); | ||
37 | } | ||
38 | |||
39 | static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags) | ||
40 | { | ||
41 | struct inode *inode = file->f_path.dentry->d_inode; | ||
42 | struct hfsplus_inode_info *hip = HFSPLUS_I(inode); | ||
27 | unsigned int flags; | 43 | unsigned int flags; |
44 | int err = 0; | ||
28 | 45 | ||
29 | lock_kernel(); | 46 | err = mnt_want_write(file->f_path.mnt); |
30 | switch (cmd) { | 47 | if (err) |
31 | case HFSPLUS_IOC_EXT2_GETFLAGS: | 48 | goto out; |
32 | flags = 0; | ||
33 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_IMMUTABLE) | ||
34 | flags |= FS_IMMUTABLE_FL; /* EXT2_IMMUTABLE_FL */ | ||
35 | if (HFSPLUS_I(inode).rootflags & HFSPLUS_FLG_APPEND) | ||
36 | flags |= FS_APPEND_FL; /* EXT2_APPEND_FL */ | ||
37 | if (HFSPLUS_I(inode).userflags & HFSPLUS_FLG_NODUMP) | ||
38 | flags |= FS_NODUMP_FL; /* EXT2_NODUMP_FL */ | ||
39 | return put_user(flags, (int __user *)arg); | ||
40 | case HFSPLUS_IOC_EXT2_SETFLAGS: { | ||
41 | int err = 0; | ||
42 | err = mnt_want_write(filp->f_path.mnt); | ||
43 | if (err) { | ||
44 | unlock_kernel(); | ||
45 | return err; | ||
46 | } | ||
47 | 49 | ||
48 | if (!is_owner_or_cap(inode)) { | 50 | if (!is_owner_or_cap(inode)) { |
49 | err = -EACCES; | 51 | err = -EACCES; |
50 | goto setflags_out; | 52 | goto out_drop_write; |
51 | } | 53 | } |
52 | if (get_user(flags, (int __user *)arg)) { | ||
53 | err = -EFAULT; | ||
54 | goto setflags_out; | ||
55 | } | ||
56 | if (flags & (FS_IMMUTABLE_FL|FS_APPEND_FL) || | ||
57 | HFSPLUS_I(inode).rootflags & (HFSPLUS_FLG_IMMUTABLE|HFSPLUS_FLG_APPEND)) { | ||
58 | if (!capable(CAP_LINUX_IMMUTABLE)) { | ||
59 | err = -EPERM; | ||
60 | goto setflags_out; | ||
61 | } | ||
62 | } | ||
63 | 54 | ||
64 | /* don't silently ignore unsupported ext2 flags */ | 55 | if (get_user(flags, user_flags)) { |
65 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | 56 | err = -EFAULT; |
66 | err = -EOPNOTSUPP; | 57 | goto out_drop_write; |
67 | goto setflags_out; | 58 | } |
68 | } | 59 | |
69 | if (flags & FS_IMMUTABLE_FL) { /* EXT2_IMMUTABLE_FL */ | 60 | mutex_lock(&inode->i_mutex); |
70 | inode->i_flags |= S_IMMUTABLE; | 61 | |
71 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_IMMUTABLE; | 62 | if ((flags & (FS_IMMUTABLE_FL|FS_APPEND_FL)) || |
72 | } else { | 63 | inode->i_flags & (S_IMMUTABLE|S_APPEND)) { |
73 | inode->i_flags &= ~S_IMMUTABLE; | 64 | if (!capable(CAP_LINUX_IMMUTABLE)) { |
74 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_IMMUTABLE; | 65 | err = -EPERM; |
75 | } | 66 | goto out_unlock_inode; |
76 | if (flags & FS_APPEND_FL) { /* EXT2_APPEND_FL */ | ||
77 | inode->i_flags |= S_APPEND; | ||
78 | HFSPLUS_I(inode).rootflags |= HFSPLUS_FLG_APPEND; | ||
79 | } else { | ||
80 | inode->i_flags &= ~S_APPEND; | ||
81 | HFSPLUS_I(inode).rootflags &= ~HFSPLUS_FLG_APPEND; | ||
82 | } | 67 | } |
83 | if (flags & FS_NODUMP_FL) /* EXT2_NODUMP_FL */ | ||
84 | HFSPLUS_I(inode).userflags |= HFSPLUS_FLG_NODUMP; | ||
85 | else | ||
86 | HFSPLUS_I(inode).userflags &= ~HFSPLUS_FLG_NODUMP; | ||
87 | |||
88 | inode->i_ctime = CURRENT_TIME_SEC; | ||
89 | mark_inode_dirty(inode); | ||
90 | setflags_out: | ||
91 | mnt_drop_write(filp->f_path.mnt); | ||
92 | unlock_kernel(); | ||
93 | return err; | ||
94 | } | 68 | } |
69 | |||
70 | /* don't silently ignore unsupported ext2 flags */ | ||
71 | if (flags & ~(FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NODUMP_FL)) { | ||
72 | err = -EOPNOTSUPP; | ||
73 | goto out_unlock_inode; | ||
74 | } | ||
75 | |||
76 | if (flags & FS_IMMUTABLE_FL) | ||
77 | inode->i_flags |= S_IMMUTABLE; | ||
78 | else | ||
79 | inode->i_flags &= ~S_IMMUTABLE; | ||
80 | |||
81 | if (flags & FS_APPEND_FL) | ||
82 | inode->i_flags |= S_APPEND; | ||
83 | else | ||
84 | inode->i_flags &= ~S_APPEND; | ||
85 | |||
86 | if (flags & FS_NODUMP_FL) | ||
87 | hip->userflags |= HFSPLUS_FLG_NODUMP; | ||
88 | else | ||
89 | hip->userflags &= ~HFSPLUS_FLG_NODUMP; | ||
90 | |||
91 | inode->i_ctime = CURRENT_TIME_SEC; | ||
92 | mark_inode_dirty(inode); | ||
93 | |||
94 | out_unlock_inode: | ||
95 | mutex_lock(&inode->i_mutex); | ||
96 | out_drop_write: | ||
97 | mnt_drop_write(file->f_path.mnt); | ||
98 | out: | ||
99 | return err; | ||
100 | } | ||
101 | |||
102 | long hfsplus_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | ||
103 | { | ||
104 | void __user *argp = (void __user *)arg; | ||
105 | |||
106 | switch (cmd) { | ||
107 | case HFSPLUS_IOC_EXT2_GETFLAGS: | ||
108 | return hfsplus_ioctl_getflags(file, argp); | ||
109 | case HFSPLUS_IOC_EXT2_SETFLAGS: | ||
110 | return hfsplus_ioctl_setflags(file, argp); | ||
95 | default: | 111 | default: |
96 | unlock_kernel(); | ||
97 | return -ENOTTY; | 112 | return -ENOTTY; |
98 | } | 113 | } |
99 | } | 114 | } |
@@ -110,7 +125,7 @@ int hfsplus_setxattr(struct dentry *dentry, const char *name, | |||
110 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) | 125 | if (!S_ISREG(inode->i_mode) || HFSPLUS_IS_RSRC(inode)) |
111 | return -EOPNOTSUPP; | 126 | return -EOPNOTSUPP; |
112 | 127 | ||
113 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 128 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
114 | if (res) | 129 | if (res) |
115 | return res; | 130 | return res; |
116 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 131 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
@@ -153,7 +168,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
153 | return -EOPNOTSUPP; | 168 | return -EOPNOTSUPP; |
154 | 169 | ||
155 | if (size) { | 170 | if (size) { |
156 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | 171 | res = hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); |
157 | if (res) | 172 | if (res) |
158 | return res; | 173 | return res; |
159 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | 174 | res = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); |
@@ -177,7 +192,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name, | |||
177 | } else | 192 | } else |
178 | res = size ? -ERANGE : 4; | 193 | res = size ? -ERANGE : 4; |
179 | } else | 194 | } else |
180 | res = -ENODATA; | 195 | res = -EOPNOTSUPP; |
181 | out: | 196 | out: |
182 | if (size) | 197 | if (size) |
183 | hfs_find_exit(&fd); | 198 | hfs_find_exit(&fd); |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 572628b4b07d..f9ab276a4d8d 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
@@ -143,13 +143,13 @@ int hfsplus_parse_options(char *input, struct hfsplus_sb_info *sbi) | |||
143 | kfree(p); | 143 | kfree(p); |
144 | break; | 144 | break; |
145 | case opt_decompose: | 145 | case opt_decompose: |
146 | sbi->flags &= ~HFSPLUS_SB_NODECOMPOSE; | 146 | clear_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
147 | break; | 147 | break; |
148 | case opt_nodecompose: | 148 | case opt_nodecompose: |
149 | sbi->flags |= HFSPLUS_SB_NODECOMPOSE; | 149 | set_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags); |
150 | break; | 150 | break; |
151 | case opt_force: | 151 | case opt_force: |
152 | sbi->flags |= HFSPLUS_SB_FORCE; | 152 | set_bit(HFSPLUS_SB_FORCE, &sbi->flags); |
153 | break; | 153 | break; |
154 | default: | 154 | default: |
155 | return 0; | 155 | return 0; |
@@ -171,7 +171,7 @@ done: | |||
171 | 171 | ||
172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | 172 | int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) |
173 | { | 173 | { |
174 | struct hfsplus_sb_info *sbi = &HFSPLUS_SB(mnt->mnt_sb); | 174 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(mnt->mnt_sb); |
175 | 175 | ||
176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) | 176 | if (sbi->creator != HFSPLUS_DEF_CR_TYPE) |
177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); | 177 | seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator); |
@@ -184,7 +184,7 @@ int hfsplus_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
184 | seq_printf(seq, ",session=%u", sbi->session); | 184 | seq_printf(seq, ",session=%u", sbi->session); |
185 | if (sbi->nls) | 185 | if (sbi->nls) |
186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); | 186 | seq_printf(seq, ",nls=%s", sbi->nls->charset); |
187 | if (sbi->flags & HFSPLUS_SB_NODECOMPOSE) | 187 | if (test_bit(HFSPLUS_SB_NODECOMPOSE, &sbi->flags)) |
188 | seq_printf(seq, ",nodecompose"); | 188 | seq_printf(seq, ",nodecompose"); |
189 | return 0; | 189 | return 0; |
190 | } | 190 | } |
diff --git a/fs/hfsplus/part_tbl.c b/fs/hfsplus/part_tbl.c index 1528a6fd0299..208b16c645cc 100644 --- a/fs/hfsplus/part_tbl.c +++ b/fs/hfsplus/part_tbl.c | |||
@@ -74,6 +74,7 @@ struct old_pmap { | |||
74 | int hfs_part_find(struct super_block *sb, | 74 | int hfs_part_find(struct super_block *sb, |
75 | sector_t *part_start, sector_t *part_size) | 75 | sector_t *part_start, sector_t *part_size) |
76 | { | 76 | { |
77 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
77 | struct buffer_head *bh; | 78 | struct buffer_head *bh; |
78 | __be16 *data; | 79 | __be16 *data; |
79 | int i, size, res; | 80 | int i, size, res; |
@@ -95,7 +96,7 @@ int hfs_part_find(struct super_block *sb, | |||
95 | for (i = 0; i < size; p++, i++) { | 96 | for (i = 0; i < size; p++, i++) { |
96 | if (p->pdStart && p->pdSize && | 97 | if (p->pdStart && p->pdSize && |
97 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && | 98 | p->pdFSID == cpu_to_be32(0x54465331)/*"TFS1"*/ && |
98 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 99 | (sbi->part < 0 || sbi->part == i)) { |
99 | *part_start += be32_to_cpu(p->pdStart); | 100 | *part_start += be32_to_cpu(p->pdStart); |
100 | *part_size = be32_to_cpu(p->pdSize); | 101 | *part_size = be32_to_cpu(p->pdSize); |
101 | res = 0; | 102 | res = 0; |
@@ -111,7 +112,7 @@ int hfs_part_find(struct super_block *sb, | |||
111 | size = be32_to_cpu(pm->pmMapBlkCnt); | 112 | size = be32_to_cpu(pm->pmMapBlkCnt); |
112 | for (i = 0; i < size;) { | 113 | for (i = 0; i < size;) { |
113 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && | 114 | if (!memcmp(pm->pmPartType,"Apple_HFS", 9) && |
114 | (HFSPLUS_SB(sb).part < 0 || HFSPLUS_SB(sb).part == i)) { | 115 | (sbi->part < 0 || sbi->part == i)) { |
115 | *part_start += be32_to_cpu(pm->pmPyPartStart); | 116 | *part_start += be32_to_cpu(pm->pmPyPartStart); |
116 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); | 117 | *part_size = be32_to_cpu(pm->pmPartBlkCnt); |
117 | res = 0; | 118 | res = 0; |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 3b55c050c742..9a88d7536103 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/pagemap.h> | 12 | #include <linux/pagemap.h> |
13 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/vfs.h> | 15 | #include <linux/vfs.h> |
17 | #include <linux/nls.h> | 16 | #include <linux/nls.h> |
18 | 17 | ||
@@ -21,40 +20,11 @@ static void hfsplus_destroy_inode(struct inode *inode); | |||
21 | 20 | ||
22 | #include "hfsplus_fs.h" | 21 | #include "hfsplus_fs.h" |
23 | 22 | ||
24 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | 23 | static int hfsplus_system_read_inode(struct inode *inode) |
25 | { | 24 | { |
26 | struct hfs_find_data fd; | 25 | struct hfsplus_vh *vhdr = HFSPLUS_SB(inode->i_sb)->s_vhdr; |
27 | struct hfsplus_vh *vhdr; | ||
28 | struct inode *inode; | ||
29 | long err = -EIO; | ||
30 | |||
31 | inode = iget_locked(sb, ino); | ||
32 | if (!inode) | ||
33 | return ERR_PTR(-ENOMEM); | ||
34 | if (!(inode->i_state & I_NEW)) | ||
35 | return inode; | ||
36 | 26 | ||
37 | INIT_LIST_HEAD(&HFSPLUS_I(inode).open_dir_list); | 27 | switch (inode->i_ino) { |
38 | mutex_init(&HFSPLUS_I(inode).extents_lock); | ||
39 | HFSPLUS_I(inode).flags = 0; | ||
40 | HFSPLUS_I(inode).rsrc_inode = NULL; | ||
41 | atomic_set(&HFSPLUS_I(inode).opencnt, 0); | ||
42 | |||
43 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
44 | read_inode: | ||
45 | hfs_find_init(HFSPLUS_SB(inode->i_sb).cat_tree, &fd); | ||
46 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
47 | if (!err) | ||
48 | err = hfsplus_cat_read_inode(inode, &fd); | ||
49 | hfs_find_exit(&fd); | ||
50 | if (err) | ||
51 | goto bad_inode; | ||
52 | goto done; | ||
53 | } | ||
54 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
55 | switch(inode->i_ino) { | ||
56 | case HFSPLUS_ROOT_CNID: | ||
57 | goto read_inode; | ||
58 | case HFSPLUS_EXT_CNID: | 28 | case HFSPLUS_EXT_CNID: |
59 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); | 29 | hfsplus_inode_read_fork(inode, &vhdr->ext_file); |
60 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 30 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
@@ -75,74 +45,101 @@ struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | |||
75 | inode->i_mapping->a_ops = &hfsplus_btree_aops; | 45 | inode->i_mapping->a_ops = &hfsplus_btree_aops; |
76 | break; | 46 | break; |
77 | default: | 47 | default: |
78 | goto bad_inode; | 48 | return -EIO; |
49 | } | ||
50 | |||
51 | return 0; | ||
52 | } | ||
53 | |||
54 | struct inode *hfsplus_iget(struct super_block *sb, unsigned long ino) | ||
55 | { | ||
56 | struct hfs_find_data fd; | ||
57 | struct inode *inode; | ||
58 | int err; | ||
59 | |||
60 | inode = iget_locked(sb, ino); | ||
61 | if (!inode) | ||
62 | return ERR_PTR(-ENOMEM); | ||
63 | if (!(inode->i_state & I_NEW)) | ||
64 | return inode; | ||
65 | |||
66 | INIT_LIST_HEAD(&HFSPLUS_I(inode)->open_dir_list); | ||
67 | mutex_init(&HFSPLUS_I(inode)->extents_lock); | ||
68 | HFSPLUS_I(inode)->flags = 0; | ||
69 | HFSPLUS_I(inode)->rsrc_inode = NULL; | ||
70 | atomic_set(&HFSPLUS_I(inode)->opencnt, 0); | ||
71 | |||
72 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
73 | inode->i_ino == HFSPLUS_ROOT_CNID) { | ||
74 | hfs_find_init(HFSPLUS_SB(inode->i_sb)->cat_tree, &fd); | ||
75 | err = hfsplus_find_cat(inode->i_sb, inode->i_ino, &fd); | ||
76 | if (!err) | ||
77 | err = hfsplus_cat_read_inode(inode, &fd); | ||
78 | hfs_find_exit(&fd); | ||
79 | } else { | ||
80 | err = hfsplus_system_read_inode(inode); | ||
81 | } | ||
82 | |||
83 | if (err) { | ||
84 | iget_failed(inode); | ||
85 | return ERR_PTR(err); | ||
79 | } | 86 | } |
80 | 87 | ||
81 | done: | ||
82 | unlock_new_inode(inode); | 88 | unlock_new_inode(inode); |
83 | return inode; | 89 | return inode; |
84 | |||
85 | bad_inode: | ||
86 | iget_failed(inode); | ||
87 | return ERR_PTR(err); | ||
88 | } | 90 | } |
89 | 91 | ||
90 | static int hfsplus_write_inode(struct inode *inode, | 92 | static int hfsplus_system_write_inode(struct inode *inode) |
91 | struct writeback_control *wbc) | ||
92 | { | 93 | { |
93 | struct hfsplus_vh *vhdr; | 94 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(inode->i_sb); |
94 | int ret = 0; | 95 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
96 | struct hfsplus_fork_raw *fork; | ||
97 | struct hfs_btree *tree = NULL; | ||
95 | 98 | ||
96 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
97 | hfsplus_ext_write_extent(inode); | ||
98 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID) { | ||
99 | return hfsplus_cat_write_inode(inode); | ||
100 | } | ||
101 | vhdr = HFSPLUS_SB(inode->i_sb).s_vhdr; | ||
102 | switch (inode->i_ino) { | 99 | switch (inode->i_ino) { |
103 | case HFSPLUS_ROOT_CNID: | ||
104 | ret = hfsplus_cat_write_inode(inode); | ||
105 | break; | ||
106 | case HFSPLUS_EXT_CNID: | 100 | case HFSPLUS_EXT_CNID: |
107 | if (vhdr->ext_file.total_size != cpu_to_be64(inode->i_size)) { | 101 | fork = &vhdr->ext_file; |
108 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 102 | tree = sbi->ext_tree; |
109 | inode->i_sb->s_dirt = 1; | ||
110 | } | ||
111 | hfsplus_inode_write_fork(inode, &vhdr->ext_file); | ||
112 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).ext_tree); | ||
113 | break; | 103 | break; |
114 | case HFSPLUS_CAT_CNID: | 104 | case HFSPLUS_CAT_CNID: |
115 | if (vhdr->cat_file.total_size != cpu_to_be64(inode->i_size)) { | 105 | fork = &vhdr->cat_file; |
116 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 106 | tree = sbi->cat_tree; |
117 | inode->i_sb->s_dirt = 1; | ||
118 | } | ||
119 | hfsplus_inode_write_fork(inode, &vhdr->cat_file); | ||
120 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).cat_tree); | ||
121 | break; | 107 | break; |
122 | case HFSPLUS_ALLOC_CNID: | 108 | case HFSPLUS_ALLOC_CNID: |
123 | if (vhdr->alloc_file.total_size != cpu_to_be64(inode->i_size)) { | 109 | fork = &vhdr->alloc_file; |
124 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
125 | inode->i_sb->s_dirt = 1; | ||
126 | } | ||
127 | hfsplus_inode_write_fork(inode, &vhdr->alloc_file); | ||
128 | break; | 110 | break; |
129 | case HFSPLUS_START_CNID: | 111 | case HFSPLUS_START_CNID: |
130 | if (vhdr->start_file.total_size != cpu_to_be64(inode->i_size)) { | 112 | fork = &vhdr->start_file; |
131 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | ||
132 | inode->i_sb->s_dirt = 1; | ||
133 | } | ||
134 | hfsplus_inode_write_fork(inode, &vhdr->start_file); | ||
135 | break; | 113 | break; |
136 | case HFSPLUS_ATTR_CNID: | 114 | case HFSPLUS_ATTR_CNID: |
137 | if (vhdr->attr_file.total_size != cpu_to_be64(inode->i_size)) { | 115 | fork = &vhdr->attr_file; |
138 | HFSPLUS_SB(inode->i_sb).flags |= HFSPLUS_SB_WRITEBACKUP; | 116 | tree = sbi->attr_tree; |
139 | inode->i_sb->s_dirt = 1; | 117 | default: |
140 | } | 118 | return -EIO; |
141 | hfsplus_inode_write_fork(inode, &vhdr->attr_file); | 119 | } |
142 | hfs_btree_write(HFSPLUS_SB(inode->i_sb).attr_tree); | 120 | |
143 | break; | 121 | if (fork->total_size != cpu_to_be64(inode->i_size)) { |
122 | set_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags); | ||
123 | inode->i_sb->s_dirt = 1; | ||
144 | } | 124 | } |
145 | return ret; | 125 | hfsplus_inode_write_fork(inode, fork); |
126 | if (tree) | ||
127 | hfs_btree_write(tree); | ||
128 | return 0; | ||
129 | } | ||
130 | |||
131 | static int hfsplus_write_inode(struct inode *inode, | ||
132 | struct writeback_control *wbc) | ||
133 | { | ||
134 | dprint(DBG_INODE, "hfsplus_write_inode: %lu\n", inode->i_ino); | ||
135 | |||
136 | hfsplus_ext_write_extent(inode); | ||
137 | |||
138 | if (inode->i_ino >= HFSPLUS_FIRSTUSER_CNID || | ||
139 | inode->i_ino == HFSPLUS_ROOT_CNID) | ||
140 | return hfsplus_cat_write_inode(inode); | ||
141 | else | ||
142 | return hfsplus_system_write_inode(inode); | ||
146 | } | 143 | } |
147 | 144 | ||
148 | static void hfsplus_evict_inode(struct inode *inode) | 145 | static void hfsplus_evict_inode(struct inode *inode) |
@@ -151,51 +148,53 @@ static void hfsplus_evict_inode(struct inode *inode) | |||
151 | truncate_inode_pages(&inode->i_data, 0); | 148 | truncate_inode_pages(&inode->i_data, 0); |
152 | end_writeback(inode); | 149 | end_writeback(inode); |
153 | if (HFSPLUS_IS_RSRC(inode)) { | 150 | if (HFSPLUS_IS_RSRC(inode)) { |
154 | HFSPLUS_I(HFSPLUS_I(inode).rsrc_inode).rsrc_inode = NULL; | 151 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
155 | iput(HFSPLUS_I(inode).rsrc_inode); | 152 | iput(HFSPLUS_I(inode)->rsrc_inode); |
156 | } | 153 | } |
157 | } | 154 | } |
158 | 155 | ||
159 | int hfsplus_sync_fs(struct super_block *sb, int wait) | 156 | int hfsplus_sync_fs(struct super_block *sb, int wait) |
160 | { | 157 | { |
161 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 158 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); |
159 | struct hfsplus_vh *vhdr = sbi->s_vhdr; | ||
162 | 160 | ||
163 | dprint(DBG_SUPER, "hfsplus_write_super\n"); | 161 | dprint(DBG_SUPER, "hfsplus_write_super\n"); |
164 | 162 | ||
165 | lock_super(sb); | 163 | mutex_lock(&sbi->vh_mutex); |
164 | mutex_lock(&sbi->alloc_mutex); | ||
166 | sb->s_dirt = 0; | 165 | sb->s_dirt = 0; |
167 | 166 | ||
168 | vhdr->free_blocks = cpu_to_be32(HFSPLUS_SB(sb).free_blocks); | 167 | vhdr->free_blocks = cpu_to_be32(sbi->free_blocks); |
169 | vhdr->next_alloc = cpu_to_be32(HFSPLUS_SB(sb).next_alloc); | 168 | vhdr->next_cnid = cpu_to_be32(sbi->next_cnid); |
170 | vhdr->next_cnid = cpu_to_be32(HFSPLUS_SB(sb).next_cnid); | 169 | vhdr->folder_count = cpu_to_be32(sbi->folder_count); |
171 | vhdr->folder_count = cpu_to_be32(HFSPLUS_SB(sb).folder_count); | 170 | vhdr->file_count = cpu_to_be32(sbi->file_count); |
172 | vhdr->file_count = cpu_to_be32(HFSPLUS_SB(sb).file_count); | ||
173 | 171 | ||
174 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 172 | mark_buffer_dirty(sbi->s_vhbh); |
175 | if (HFSPLUS_SB(sb).flags & HFSPLUS_SB_WRITEBACKUP) { | 173 | if (test_and_clear_bit(HFSPLUS_SB_WRITEBACKUP, &sbi->flags)) { |
176 | if (HFSPLUS_SB(sb).sect_count) { | 174 | if (sbi->sect_count) { |
177 | struct buffer_head *bh; | 175 | struct buffer_head *bh; |
178 | u32 block, offset; | 176 | u32 block, offset; |
179 | 177 | ||
180 | block = HFSPLUS_SB(sb).blockoffset; | 178 | block = sbi->blockoffset; |
181 | block += (HFSPLUS_SB(sb).sect_count - 2) >> (sb->s_blocksize_bits - 9); | 179 | block += (sbi->sect_count - 2) >> (sb->s_blocksize_bits - 9); |
182 | offset = ((HFSPLUS_SB(sb).sect_count - 2) << 9) & (sb->s_blocksize - 1); | 180 | offset = ((sbi->sect_count - 2) << 9) & (sb->s_blocksize - 1); |
183 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", HFSPLUS_SB(sb).blockoffset, | 181 | printk(KERN_DEBUG "hfs: backup: %u,%u,%u,%u\n", |
184 | HFSPLUS_SB(sb).sect_count, block, offset); | 182 | sbi->blockoffset, sbi->sect_count, |
183 | block, offset); | ||
185 | bh = sb_bread(sb, block); | 184 | bh = sb_bread(sb, block); |
186 | if (bh) { | 185 | if (bh) { |
187 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); | 186 | vhdr = (struct hfsplus_vh *)(bh->b_data + offset); |
188 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { | 187 | if (be16_to_cpu(vhdr->signature) == HFSPLUS_VOLHEAD_SIG) { |
189 | memcpy(vhdr, HFSPLUS_SB(sb).s_vhdr, sizeof(*vhdr)); | 188 | memcpy(vhdr, sbi->s_vhdr, sizeof(*vhdr)); |
190 | mark_buffer_dirty(bh); | 189 | mark_buffer_dirty(bh); |
191 | brelse(bh); | 190 | brelse(bh); |
192 | } else | 191 | } else |
193 | printk(KERN_WARNING "hfs: backup not found!\n"); | 192 | printk(KERN_WARNING "hfs: backup not found!\n"); |
194 | } | 193 | } |
195 | } | 194 | } |
196 | HFSPLUS_SB(sb).flags &= ~HFSPLUS_SB_WRITEBACKUP; | ||
197 | } | 195 | } |
198 | unlock_super(sb); | 196 | mutex_unlock(&sbi->alloc_mutex); |
197 | mutex_unlock(&sbi->vh_mutex); | ||
199 | return 0; | 198 | return 0; |
200 | } | 199 | } |
201 | 200 | ||
@@ -209,48 +208,48 @@ static void hfsplus_write_super(struct super_block *sb) | |||
209 | 208 | ||
210 | static void hfsplus_put_super(struct super_block *sb) | 209 | static void hfsplus_put_super(struct super_block *sb) |
211 | { | 210 | { |
211 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
212 | |||
212 | dprint(DBG_SUPER, "hfsplus_put_super\n"); | 213 | dprint(DBG_SUPER, "hfsplus_put_super\n"); |
214 | |||
213 | if (!sb->s_fs_info) | 215 | if (!sb->s_fs_info) |
214 | return; | 216 | return; |
215 | 217 | ||
216 | lock_kernel(); | ||
217 | |||
218 | if (sb->s_dirt) | 218 | if (sb->s_dirt) |
219 | hfsplus_write_super(sb); | 219 | hfsplus_write_super(sb); |
220 | if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) { | 220 | if (!(sb->s_flags & MS_RDONLY) && sbi->s_vhdr) { |
221 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 221 | struct hfsplus_vh *vhdr = sbi->s_vhdr; |
222 | 222 | ||
223 | vhdr->modify_date = hfsp_now2mt(); | 223 | vhdr->modify_date = hfsp_now2mt(); |
224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); | 224 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_UNMNT); |
225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); | 225 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_INCNSTNT); |
226 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 226 | mark_buffer_dirty(sbi->s_vhbh); |
227 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 227 | sync_dirty_buffer(sbi->s_vhbh); |
228 | } | 228 | } |
229 | 229 | ||
230 | hfs_btree_close(HFSPLUS_SB(sb).cat_tree); | 230 | hfs_btree_close(sbi->cat_tree); |
231 | hfs_btree_close(HFSPLUS_SB(sb).ext_tree); | 231 | hfs_btree_close(sbi->ext_tree); |
232 | iput(HFSPLUS_SB(sb).alloc_file); | 232 | iput(sbi->alloc_file); |
233 | iput(HFSPLUS_SB(sb).hidden_dir); | 233 | iput(sbi->hidden_dir); |
234 | brelse(HFSPLUS_SB(sb).s_vhbh); | 234 | brelse(sbi->s_vhbh); |
235 | unload_nls(HFSPLUS_SB(sb).nls); | 235 | unload_nls(sbi->nls); |
236 | kfree(sb->s_fs_info); | 236 | kfree(sb->s_fs_info); |
237 | sb->s_fs_info = NULL; | 237 | sb->s_fs_info = NULL; |
238 | |||
239 | unlock_kernel(); | ||
240 | } | 238 | } |
241 | 239 | ||
242 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | 240 | static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) |
243 | { | 241 | { |
244 | struct super_block *sb = dentry->d_sb; | 242 | struct super_block *sb = dentry->d_sb; |
243 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
245 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 244 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
246 | 245 | ||
247 | buf->f_type = HFSPLUS_SUPER_MAGIC; | 246 | buf->f_type = HFSPLUS_SUPER_MAGIC; |
248 | buf->f_bsize = sb->s_blocksize; | 247 | buf->f_bsize = sb->s_blocksize; |
249 | buf->f_blocks = HFSPLUS_SB(sb).total_blocks << HFSPLUS_SB(sb).fs_shift; | 248 | buf->f_blocks = sbi->total_blocks << sbi->fs_shift; |
250 | buf->f_bfree = HFSPLUS_SB(sb).free_blocks << HFSPLUS_SB(sb).fs_shift; | 249 | buf->f_bfree = sbi->free_blocks << sbi->fs_shift; |
251 | buf->f_bavail = buf->f_bfree; | 250 | buf->f_bavail = buf->f_bfree; |
252 | buf->f_files = 0xFFFFFFFF; | 251 | buf->f_files = 0xFFFFFFFF; |
253 | buf->f_ffree = 0xFFFFFFFF - HFSPLUS_SB(sb).next_cnid; | 252 | buf->f_ffree = 0xFFFFFFFF - sbi->next_cnid; |
254 | buf->f_fsid.val[0] = (u32)id; | 253 | buf->f_fsid.val[0] = (u32)id; |
255 | buf->f_fsid.val[1] = (u32)(id >> 32); | 254 | buf->f_fsid.val[1] = (u32)(id >> 32); |
256 | buf->f_namelen = HFSPLUS_MAX_STRLEN; | 255 | buf->f_namelen = HFSPLUS_MAX_STRLEN; |
@@ -263,11 +262,11 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
263 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 262 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
264 | return 0; | 263 | return 0; |
265 | if (!(*flags & MS_RDONLY)) { | 264 | if (!(*flags & MS_RDONLY)) { |
266 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; | 265 | struct hfsplus_vh *vhdr = HFSPLUS_SB(sb)->s_vhdr; |
267 | struct hfsplus_sb_info sbi; | 266 | struct hfsplus_sb_info sbi; |
268 | 267 | ||
269 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); | 268 | memset(&sbi, 0, sizeof(struct hfsplus_sb_info)); |
270 | sbi.nls = HFSPLUS_SB(sb).nls; | 269 | sbi.nls = HFSPLUS_SB(sb)->nls; |
271 | if (!hfsplus_parse_options(data, &sbi)) | 270 | if (!hfsplus_parse_options(data, &sbi)) |
272 | return -EINVAL; | 271 | return -EINVAL; |
273 | 272 | ||
@@ -276,7 +275,7 @@ static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | |||
276 | "running fsck.hfsplus is recommended. leaving read-only.\n"); | 275 | "running fsck.hfsplus is recommended. leaving read-only.\n"); |
277 | sb->s_flags |= MS_RDONLY; | 276 | sb->s_flags |= MS_RDONLY; |
278 | *flags |= MS_RDONLY; | 277 | *flags |= MS_RDONLY; |
279 | } else if (sbi.flags & HFSPLUS_SB_FORCE) { | 278 | } else if (test_bit(HFSPLUS_SB_FORCE, &sbi.flags)) { |
280 | /* nothing */ | 279 | /* nothing */ |
281 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 280 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
282 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); | 281 | printk(KERN_WARNING "hfs: filesystem is marked locked, leaving read-only.\n"); |
@@ -320,7 +319,8 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
320 | return -ENOMEM; | 319 | return -ENOMEM; |
321 | 320 | ||
322 | sb->s_fs_info = sbi; | 321 | sb->s_fs_info = sbi; |
323 | INIT_HLIST_HEAD(&sbi->rsrc_inodes); | 322 | mutex_init(&sbi->alloc_mutex); |
323 | mutex_init(&sbi->vh_mutex); | ||
324 | hfsplus_fill_defaults(sbi); | 324 | hfsplus_fill_defaults(sbi); |
325 | if (!hfsplus_parse_options(data, sbi)) { | 325 | if (!hfsplus_parse_options(data, sbi)) { |
326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); | 326 | printk(KERN_ERR "hfs: unable to parse mount options\n"); |
@@ -344,7 +344,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
344 | err = -EINVAL; | 344 | err = -EINVAL; |
345 | goto cleanup; | 345 | goto cleanup; |
346 | } | 346 | } |
347 | vhdr = HFSPLUS_SB(sb).s_vhdr; | 347 | vhdr = sbi->s_vhdr; |
348 | 348 | ||
349 | /* Copy parts of the volume header into the superblock */ | 349 | /* Copy parts of the volume header into the superblock */ |
350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; | 350 | sb->s_magic = HFSPLUS_VOLHEAD_SIG; |
@@ -353,18 +353,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); | 353 | printk(KERN_ERR "hfs: wrong filesystem version\n"); |
354 | goto cleanup; | 354 | goto cleanup; |
355 | } | 355 | } |
356 | HFSPLUS_SB(sb).total_blocks = be32_to_cpu(vhdr->total_blocks); | 356 | sbi->total_blocks = be32_to_cpu(vhdr->total_blocks); |
357 | HFSPLUS_SB(sb).free_blocks = be32_to_cpu(vhdr->free_blocks); | 357 | sbi->free_blocks = be32_to_cpu(vhdr->free_blocks); |
358 | HFSPLUS_SB(sb).next_alloc = be32_to_cpu(vhdr->next_alloc); | 358 | sbi->next_cnid = be32_to_cpu(vhdr->next_cnid); |
359 | HFSPLUS_SB(sb).next_cnid = be32_to_cpu(vhdr->next_cnid); | 359 | sbi->file_count = be32_to_cpu(vhdr->file_count); |
360 | HFSPLUS_SB(sb).file_count = be32_to_cpu(vhdr->file_count); | 360 | sbi->folder_count = be32_to_cpu(vhdr->folder_count); |
361 | HFSPLUS_SB(sb).folder_count = be32_to_cpu(vhdr->folder_count); | 361 | sbi->data_clump_blocks = |
362 | HFSPLUS_SB(sb).data_clump_blocks = be32_to_cpu(vhdr->data_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 362 | be32_to_cpu(vhdr->data_clump_sz) >> sbi->alloc_blksz_shift; |
363 | if (!HFSPLUS_SB(sb).data_clump_blocks) | 363 | if (!sbi->data_clump_blocks) |
364 | HFSPLUS_SB(sb).data_clump_blocks = 1; | 364 | sbi->data_clump_blocks = 1; |
365 | HFSPLUS_SB(sb).rsrc_clump_blocks = be32_to_cpu(vhdr->rsrc_clump_sz) >> HFSPLUS_SB(sb).alloc_blksz_shift; | 365 | sbi->rsrc_clump_blocks = |
366 | if (!HFSPLUS_SB(sb).rsrc_clump_blocks) | 366 | be32_to_cpu(vhdr->rsrc_clump_sz) >> sbi->alloc_blksz_shift; |
367 | HFSPLUS_SB(sb).rsrc_clump_blocks = 1; | 367 | if (!sbi->rsrc_clump_blocks) |
368 | sbi->rsrc_clump_blocks = 1; | ||
368 | 369 | ||
369 | /* Set up operations so we can load metadata */ | 370 | /* Set up operations so we can load metadata */ |
370 | sb->s_op = &hfsplus_sops; | 371 | sb->s_op = &hfsplus_sops; |
@@ -374,7 +375,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
374 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " | 375 | printk(KERN_WARNING "hfs: Filesystem was not cleanly unmounted, " |
375 | "running fsck.hfsplus is recommended. mounting read-only.\n"); | 376 | "running fsck.hfsplus is recommended. mounting read-only.\n"); |
376 | sb->s_flags |= MS_RDONLY; | 377 | sb->s_flags |= MS_RDONLY; |
377 | } else if (sbi->flags & HFSPLUS_SB_FORCE) { | 378 | } else if (test_and_clear_bit(HFSPLUS_SB_FORCE, &sbi->flags)) { |
378 | /* nothing */ | 379 | /* nothing */ |
379 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 380 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
380 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 381 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
@@ -384,16 +385,15 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
384 | "use the force option at your own risk, mounting read-only.\n"); | 385 | "use the force option at your own risk, mounting read-only.\n"); |
385 | sb->s_flags |= MS_RDONLY; | 386 | sb->s_flags |= MS_RDONLY; |
386 | } | 387 | } |
387 | sbi->flags &= ~HFSPLUS_SB_FORCE; | ||
388 | 388 | ||
389 | /* Load metadata objects (B*Trees) */ | 389 | /* Load metadata objects (B*Trees) */ |
390 | HFSPLUS_SB(sb).ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); | 390 | sbi->ext_tree = hfs_btree_open(sb, HFSPLUS_EXT_CNID); |
391 | if (!HFSPLUS_SB(sb).ext_tree) { | 391 | if (!sbi->ext_tree) { |
392 | printk(KERN_ERR "hfs: failed to load extents file\n"); | 392 | printk(KERN_ERR "hfs: failed to load extents file\n"); |
393 | goto cleanup; | 393 | goto cleanup; |
394 | } | 394 | } |
395 | HFSPLUS_SB(sb).cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); | 395 | sbi->cat_tree = hfs_btree_open(sb, HFSPLUS_CAT_CNID); |
396 | if (!HFSPLUS_SB(sb).cat_tree) { | 396 | if (!sbi->cat_tree) { |
397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); | 397 | printk(KERN_ERR "hfs: failed to load catalog file\n"); |
398 | goto cleanup; | 398 | goto cleanup; |
399 | } | 399 | } |
@@ -404,7 +404,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
404 | err = PTR_ERR(inode); | 404 | err = PTR_ERR(inode); |
405 | goto cleanup; | 405 | goto cleanup; |
406 | } | 406 | } |
407 | HFSPLUS_SB(sb).alloc_file = inode; | 407 | sbi->alloc_file = inode; |
408 | 408 | ||
409 | /* Load the root directory */ | 409 | /* Load the root directory */ |
410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); | 410 | root = hfsplus_iget(sb, HFSPLUS_ROOT_CNID); |
@@ -423,7 +423,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
423 | 423 | ||
424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; | 424 | str.len = sizeof(HFSP_HIDDENDIR_NAME) - 1; |
425 | str.name = HFSP_HIDDENDIR_NAME; | 425 | str.name = HFSP_HIDDENDIR_NAME; |
426 | hfs_find_init(HFSPLUS_SB(sb).cat_tree, &fd); | 426 | hfs_find_init(sbi->cat_tree, &fd); |
427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); | 427 | hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); |
428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { | 428 | if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { |
429 | hfs_find_exit(&fd); | 429 | hfs_find_exit(&fd); |
@@ -434,7 +434,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
434 | err = PTR_ERR(inode); | 434 | err = PTR_ERR(inode); |
435 | goto cleanup; | 435 | goto cleanup; |
436 | } | 436 | } |
437 | HFSPLUS_SB(sb).hidden_dir = inode; | 437 | sbi->hidden_dir = inode; |
438 | } else | 438 | } else |
439 | hfs_find_exit(&fd); | 439 | hfs_find_exit(&fd); |
440 | 440 | ||
@@ -449,15 +449,19 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
449 | be32_add_cpu(&vhdr->write_count, 1); | 449 | be32_add_cpu(&vhdr->write_count, 1); |
450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); | 450 | vhdr->attributes &= cpu_to_be32(~HFSPLUS_VOL_UNMNT); |
451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); | 451 | vhdr->attributes |= cpu_to_be32(HFSPLUS_VOL_INCNSTNT); |
452 | mark_buffer_dirty(HFSPLUS_SB(sb).s_vhbh); | 452 | mark_buffer_dirty(sbi->s_vhbh); |
453 | sync_dirty_buffer(HFSPLUS_SB(sb).s_vhbh); | 453 | sync_dirty_buffer(sbi->s_vhbh); |
454 | 454 | ||
455 | if (!HFSPLUS_SB(sb).hidden_dir) { | 455 | if (!sbi->hidden_dir) { |
456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); | 456 | printk(KERN_DEBUG "hfs: create hidden dir...\n"); |
457 | HFSPLUS_SB(sb).hidden_dir = hfsplus_new_inode(sb, S_IFDIR); | 457 | |
458 | hfsplus_create_cat(HFSPLUS_SB(sb).hidden_dir->i_ino, sb->s_root->d_inode, | 458 | mutex_lock(&sbi->vh_mutex); |
459 | &str, HFSPLUS_SB(sb).hidden_dir); | 459 | sbi->hidden_dir = hfsplus_new_inode(sb, S_IFDIR); |
460 | mark_inode_dirty(HFSPLUS_SB(sb).hidden_dir); | 460 | hfsplus_create_cat(sbi->hidden_dir->i_ino, sb->s_root->d_inode, |
461 | &str, sbi->hidden_dir); | ||
462 | mutex_unlock(&sbi->vh_mutex); | ||
463 | |||
464 | mark_inode_dirty(sbi->hidden_dir); | ||
461 | } | 465 | } |
462 | out: | 466 | out: |
463 | unload_nls(sbi->nls); | 467 | unload_nls(sbi->nls); |
@@ -486,7 +490,7 @@ static struct inode *hfsplus_alloc_inode(struct super_block *sb) | |||
486 | 490 | ||
487 | static void hfsplus_destroy_inode(struct inode *inode) | 491 | static void hfsplus_destroy_inode(struct inode *inode) |
488 | { | 492 | { |
489 | kmem_cache_free(hfsplus_inode_cachep, &HFSPLUS_I(inode)); | 493 | kmem_cache_free(hfsplus_inode_cachep, HFSPLUS_I(inode)); |
490 | } | 494 | } |
491 | 495 | ||
492 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) | 496 | #define HFSPLUS_INODE_SIZE sizeof(struct hfsplus_inode_info) |
diff --git a/fs/hfsplus/unicode.c b/fs/hfsplus/unicode.c index 628ccf6fa402..b66d67de882c 100644 --- a/fs/hfsplus/unicode.c +++ b/fs/hfsplus/unicode.c | |||
@@ -121,7 +121,7 @@ static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) | |||
121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) | 121 | int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, char *astr, int *len_p) |
122 | { | 122 | { |
123 | const hfsplus_unichr *ip; | 123 | const hfsplus_unichr *ip; |
124 | struct nls_table *nls = HFSPLUS_SB(sb).nls; | 124 | struct nls_table *nls = HFSPLUS_SB(sb)->nls; |
125 | u8 *op; | 125 | u8 *op; |
126 | u16 cc, c0, c1; | 126 | u16 cc, c0, c1; |
127 | u16 *ce1, *ce2; | 127 | u16 *ce1, *ce2; |
@@ -132,7 +132,7 @@ int hfsplus_uni2asc(struct super_block *sb, const struct hfsplus_unistr *ustr, c | |||
132 | ustrlen = be16_to_cpu(ustr->length); | 132 | ustrlen = be16_to_cpu(ustr->length); |
133 | len = *len_p; | 133 | len = *len_p; |
134 | ce1 = NULL; | 134 | ce1 = NULL; |
135 | compose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 135 | compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
136 | 136 | ||
137 | while (ustrlen > 0) { | 137 | while (ustrlen > 0) { |
138 | c0 = be16_to_cpu(*ip++); | 138 | c0 = be16_to_cpu(*ip++); |
@@ -246,7 +246,7 @@ out: | |||
246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, | 246 | static inline int asc2unichar(struct super_block *sb, const char *astr, int len, |
247 | wchar_t *uc) | 247 | wchar_t *uc) |
248 | { | 248 | { |
249 | int size = HFSPLUS_SB(sb).nls->char2uni(astr, len, uc); | 249 | int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); |
250 | if (size <= 0) { | 250 | if (size <= 0) { |
251 | *uc = '?'; | 251 | *uc = '?'; |
252 | size = 1; | 252 | size = 1; |
@@ -293,7 +293,7 @@ int hfsplus_asc2uni(struct super_block *sb, struct hfsplus_unistr *ustr, | |||
293 | u16 *dstr, outlen = 0; | 293 | u16 *dstr, outlen = 0; |
294 | wchar_t c; | 294 | wchar_t c; |
295 | 295 | ||
296 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 296 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { | 297 | while (outlen < HFSPLUS_MAX_STRLEN && len > 0) { |
298 | size = asc2unichar(sb, astr, len, &c); | 298 | size = asc2unichar(sb, astr, len, &c); |
299 | 299 | ||
@@ -330,8 +330,8 @@ int hfsplus_hash_dentry(struct dentry *dentry, struct qstr *str) | |||
330 | wchar_t c; | 330 | wchar_t c; |
331 | u16 c2; | 331 | u16 c2; |
332 | 332 | ||
333 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 333 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
334 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 334 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
335 | hash = init_name_hash(); | 335 | hash = init_name_hash(); |
336 | astr = str->name; | 336 | astr = str->name; |
337 | len = str->len; | 337 | len = str->len; |
@@ -373,8 +373,8 @@ int hfsplus_compare_dentry(struct dentry *dentry, struct qstr *s1, struct qstr * | |||
373 | u16 c1, c2; | 373 | u16 c1, c2; |
374 | wchar_t c; | 374 | wchar_t c; |
375 | 375 | ||
376 | casefold = (HFSPLUS_SB(sb).flags & HFSPLUS_SB_CASEFOLD); | 376 | casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); |
377 | decompose = !(HFSPLUS_SB(sb).flags & HFSPLUS_SB_NODECOMPOSE); | 377 | decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); |
378 | astr1 = s1->name; | 378 | astr1 = s1->name; |
379 | len1 = s1->len; | 379 | len1 = s1->len; |
380 | astr2 = s2->name; | 380 | astr2 = s2->name; |
diff --git a/fs/hfsplus/wrapper.c b/fs/hfsplus/wrapper.c index bed78ac8f6d1..8972c20b3216 100644 --- a/fs/hfsplus/wrapper.c +++ b/fs/hfsplus/wrapper.c | |||
@@ -65,8 +65,8 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
65 | *start = 0; | 65 | *start = 0; |
66 | *size = sb->s_bdev->bd_inode->i_size >> 9; | 66 | *size = sb->s_bdev->bd_inode->i_size >> 9; |
67 | 67 | ||
68 | if (HFSPLUS_SB(sb).session >= 0) { | 68 | if (HFSPLUS_SB(sb)->session >= 0) { |
69 | te.cdte_track = HFSPLUS_SB(sb).session; | 69 | te.cdte_track = HFSPLUS_SB(sb)->session; |
70 | te.cdte_format = CDROM_LBA; | 70 | te.cdte_format = CDROM_LBA; |
71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); | 71 | res = ioctl_by_bdev(sb->s_bdev, CDROMREADTOCENTRY, (unsigned long)&te); |
72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { | 72 | if (!res && (te.cdte_ctrl & CDROM_DATA_TRACK) == 4) { |
@@ -87,6 +87,7 @@ static int hfsplus_get_last_session(struct super_block *sb, | |||
87 | /* Takes in super block, returns true if good data read */ | 87 | /* Takes in super block, returns true if good data read */ |
88 | int hfsplus_read_wrapper(struct super_block *sb) | 88 | int hfsplus_read_wrapper(struct super_block *sb) |
89 | { | 89 | { |
90 | struct hfsplus_sb_info *sbi = HFSPLUS_SB(sb); | ||
90 | struct buffer_head *bh; | 91 | struct buffer_head *bh; |
91 | struct hfsplus_vh *vhdr; | 92 | struct hfsplus_vh *vhdr; |
92 | struct hfsplus_wd wd; | 93 | struct hfsplus_wd wd; |
@@ -122,7 +123,7 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
122 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) | 123 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
123 | break; | 124 | break; |
124 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { | 125 | if (vhdr->signature == cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) { |
125 | HFSPLUS_SB(sb).flags |= HFSPLUS_SB_HFSX; | 126 | set_bit(HFSPLUS_SB_HFSX, &sbi->flags); |
126 | break; | 127 | break; |
127 | } | 128 | } |
128 | brelse(bh); | 129 | brelse(bh); |
@@ -143,11 +144,11 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
143 | if (blocksize < HFSPLUS_SECTOR_SIZE || | 144 | if (blocksize < HFSPLUS_SECTOR_SIZE || |
144 | ((blocksize - 1) & blocksize)) | 145 | ((blocksize - 1) & blocksize)) |
145 | return -EINVAL; | 146 | return -EINVAL; |
146 | HFSPLUS_SB(sb).alloc_blksz = blocksize; | 147 | sbi->alloc_blksz = blocksize; |
147 | HFSPLUS_SB(sb).alloc_blksz_shift = 0; | 148 | sbi->alloc_blksz_shift = 0; |
148 | while ((blocksize >>= 1) != 0) | 149 | while ((blocksize >>= 1) != 0) |
149 | HFSPLUS_SB(sb).alloc_blksz_shift++; | 150 | sbi->alloc_blksz_shift++; |
150 | blocksize = min(HFSPLUS_SB(sb).alloc_blksz, (u32)PAGE_SIZE); | 151 | blocksize = min(sbi->alloc_blksz, (u32)PAGE_SIZE); |
151 | 152 | ||
152 | /* align block size to block offset */ | 153 | /* align block size to block offset */ |
153 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) | 154 | while (part_start & ((blocksize >> HFSPLUS_SECTOR_SHIFT) - 1)) |
@@ -158,23 +159,26 @@ int hfsplus_read_wrapper(struct super_block *sb) | |||
158 | return -EINVAL; | 159 | return -EINVAL; |
159 | } | 160 | } |
160 | 161 | ||
161 | HFSPLUS_SB(sb).blockoffset = part_start >> | 162 | sbi->blockoffset = |
162 | (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); | 163 | part_start >> (sb->s_blocksize_bits - HFSPLUS_SECTOR_SHIFT); |
163 | HFSPLUS_SB(sb).sect_count = part_size; | 164 | sbi->sect_count = part_size; |
164 | HFSPLUS_SB(sb).fs_shift = HFSPLUS_SB(sb).alloc_blksz_shift - | 165 | sbi->fs_shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; |
165 | sb->s_blocksize_bits; | ||
166 | 166 | ||
167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); | 167 | bh = sb_bread512(sb, part_start + HFSPLUS_VOLHEAD_SECTOR, vhdr); |
168 | if (!bh) | 168 | if (!bh) |
169 | return -EIO; | 169 | return -EIO; |
170 | 170 | ||
171 | /* should still be the same... */ | 171 | /* should still be the same... */ |
172 | if (vhdr->signature != (HFSPLUS_SB(sb).flags & HFSPLUS_SB_HFSX ? | 172 | if (test_bit(HFSPLUS_SB_HFSX, &sbi->flags)) { |
173 | cpu_to_be16(HFSPLUS_VOLHEAD_SIGX) : | 173 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIGX)) |
174 | cpu_to_be16(HFSPLUS_VOLHEAD_SIG))) | 174 | goto error; |
175 | goto error; | 175 | } else { |
176 | HFSPLUS_SB(sb).s_vhbh = bh; | 176 | if (vhdr->signature != cpu_to_be16(HFSPLUS_VOLHEAD_SIG)) |
177 | HFSPLUS_SB(sb).s_vhdr = vhdr; | 177 | goto error; |
178 | } | ||
179 | |||
180 | sbi->s_vhbh = bh; | ||
181 | sbi->s_vhdr = vhdr; | ||
178 | 182 | ||
179 | return 0; | 183 | return 0; |
180 | error: | 184 | error: |
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h index 6bbd75c5589b..bf15a43016b9 100644 --- a/fs/hostfs/hostfs.h +++ b/fs/hostfs/hostfs.h | |||
@@ -28,12 +28,7 @@ | |||
28 | * #define ATTR_KILL_SUID 2048 | 28 | * #define ATTR_KILL_SUID 2048 |
29 | * #define ATTR_KILL_SGID 4096 | 29 | * #define ATTR_KILL_SGID 4096 |
30 | * | 30 | * |
31 | * and this is because they were added in 2.5 development in this patch: | 31 | * and this is because they were added in 2.5 development. |
32 | * | ||
33 | * http://linux.bkbits.net:8080/linux-2.5/ | ||
34 | * cset@3caf4a12k4XgDzK7wyK-TGpSZ9u2Ww?nav=index.html | ||
35 | * |src/.|src/include|src/include/linux|related/include/linux/fs.h | ||
36 | * | ||
37 | * Actually, they are not needed by most ->setattr() methods - they are set by | 32 | * Actually, they are not needed by most ->setattr() methods - they are set by |
38 | * callers of notify_change() to notify that the setuid/setgid bits must be | 33 | * callers of notify_change() to notify that the setuid/setgid bits must be |
39 | * dropped. | 34 | * dropped. |
@@ -96,7 +91,6 @@ extern int rename_file(char *from, char *to); | |||
96 | extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, | 91 | extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, |
97 | long long *bfree_out, long long *bavail_out, | 92 | long long *bfree_out, long long *bavail_out, |
98 | long long *files_out, long long *ffree_out, | 93 | long long *files_out, long long *ffree_out, |
99 | void *fsid_out, int fsid_size, long *namelen_out, | 94 | void *fsid_out, int fsid_size, long *namelen_out); |
100 | long *spare_out); | ||
101 | 95 | ||
102 | #endif | 96 | #endif |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index f7dc9b5f9ef8..cd7c93917cc7 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -217,7 +217,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf) | |||
217 | err = do_statfs(dentry->d_sb->s_fs_info, | 217 | err = do_statfs(dentry->d_sb->s_fs_info, |
218 | &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, | 218 | &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, |
219 | &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), | 219 | &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), |
220 | &sf->f_namelen, sf->f_spare); | 220 | &sf->f_namelen); |
221 | if (err) | 221 | if (err) |
222 | return err; | 222 | return err; |
223 | sf->f_blocks = f_blocks; | 223 | sf->f_blocks = f_blocks; |
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c index 6777aa06ce2c..d51a98384bc0 100644 --- a/fs/hostfs/hostfs_user.c +++ b/fs/hostfs/hostfs_user.c | |||
@@ -94,8 +94,7 @@ void *open_dir(char *path, int *err_out) | |||
94 | 94 | ||
95 | dir = opendir(path); | 95 | dir = opendir(path); |
96 | *err_out = errno; | 96 | *err_out = errno; |
97 | if (dir == NULL) | 97 | |
98 | return NULL; | ||
99 | return dir; | 98 | return dir; |
100 | } | 99 | } |
101 | 100 | ||
@@ -205,7 +204,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd) | |||
205 | if (attrs->ia_valid & HOSTFS_ATTR_MODE) { | 204 | if (attrs->ia_valid & HOSTFS_ATTR_MODE) { |
206 | if (fd >= 0) { | 205 | if (fd >= 0) { |
207 | if (fchmod(fd, attrs->ia_mode) != 0) | 206 | if (fchmod(fd, attrs->ia_mode) != 0) |
208 | return (-errno); | 207 | return -errno; |
209 | } else if (chmod(file, attrs->ia_mode) != 0) { | 208 | } else if (chmod(file, attrs->ia_mode) != 0) { |
210 | return -errno; | 209 | return -errno; |
211 | } | 210 | } |
@@ -364,8 +363,7 @@ int rename_file(char *from, char *to) | |||
364 | int do_statfs(char *root, long *bsize_out, long long *blocks_out, | 363 | int do_statfs(char *root, long *bsize_out, long long *blocks_out, |
365 | long long *bfree_out, long long *bavail_out, | 364 | long long *bfree_out, long long *bavail_out, |
366 | long long *files_out, long long *ffree_out, | 365 | long long *files_out, long long *ffree_out, |
367 | void *fsid_out, int fsid_size, long *namelen_out, | 366 | void *fsid_out, int fsid_size, long *namelen_out) |
368 | long *spare_out) | ||
369 | { | 367 | { |
370 | struct statfs64 buf; | 368 | struct statfs64 buf; |
371 | int err; | 369 | int err; |
@@ -384,10 +382,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out, | |||
384 | sizeof(buf.f_fsid) > fsid_size ? fsid_size : | 382 | sizeof(buf.f_fsid) > fsid_size ? fsid_size : |
385 | sizeof(buf.f_fsid)); | 383 | sizeof(buf.f_fsid)); |
386 | *namelen_out = buf.f_namelen; | 384 | *namelen_out = buf.f_namelen; |
387 | spare_out[0] = buf.f_spare[0]; | 385 | |
388 | spare_out[1] = buf.f_spare[1]; | ||
389 | spare_out[2] = buf.f_spare[2]; | ||
390 | spare_out[3] = buf.f_spare[3]; | ||
391 | spare_out[4] = buf.f_spare[4]; | ||
392 | return 0; | 386 | return 0; |
393 | } | 387 | } |
diff --git a/fs/hpfs/Kconfig b/fs/hpfs/Kconfig index 56bd15c5bf6c..63b6f5632318 100644 --- a/fs/hpfs/Kconfig +++ b/fs/hpfs/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config HPFS_FS | 1 | config HPFS_FS |
2 | tristate "OS/2 HPFS file system support" | 2 | tristate "OS/2 HPFS file system support" |
3 | depends on BLOCK | 3 | depends on BLOCK |
4 | depends on BKL # nontrivial to fix | ||
4 | help | 5 | help |
5 | OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS | 6 | OS/2 is IBM's operating system for PC's, the same as Warp, and HPFS |
6 | is the file system used for organizing files on OS/2 hard disk | 7 | is the file system used for organizing files on OS/2 hard disk |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 2607010be2fe..c969a1aa163a 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -477,11 +477,15 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
477 | 477 | ||
478 | int o; | 478 | int o; |
479 | 479 | ||
480 | lock_kernel(); | ||
481 | |||
480 | save_mount_options(s, options); | 482 | save_mount_options(s, options); |
481 | 483 | ||
482 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); | 484 | sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); |
483 | if (!sbi) | 485 | if (!sbi) { |
486 | unlock_kernel(); | ||
484 | return -ENOMEM; | 487 | return -ENOMEM; |
488 | } | ||
485 | s->s_fs_info = sbi; | 489 | s->s_fs_info = sbi; |
486 | 490 | ||
487 | sbi->sb_bmp_dir = NULL; | 491 | sbi->sb_bmp_dir = NULL; |
@@ -666,6 +670,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) | |||
666 | root->i_blocks = 5; | 670 | root->i_blocks = 5; |
667 | hpfs_brelse4(&qbh); | 671 | hpfs_brelse4(&qbh); |
668 | } | 672 | } |
673 | unlock_kernel(); | ||
669 | return 0; | 674 | return 0; |
670 | 675 | ||
671 | bail4: brelse(bh2); | 676 | bail4: brelse(bh2); |
@@ -677,6 +682,7 @@ bail0: | |||
677 | kfree(sbi->sb_cp_table); | 682 | kfree(sbi->sb_cp_table); |
678 | s->s_fs_info = NULL; | 683 | s->s_fs_info = NULL; |
679 | kfree(sbi); | 684 | kfree(sbi); |
685 | unlock_kernel(); | ||
680 | return -EINVAL; | 686 | return -EINVAL; |
681 | } | 687 | } |
682 | 688 | ||
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 7b027720d820..4e2a45ea6140 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
@@ -598,6 +598,7 @@ static const struct file_operations hppfs_dir_fops = { | |||
598 | .readdir = hppfs_readdir, | 598 | .readdir = hppfs_readdir, |
599 | .open = hppfs_dir_open, | 599 | .open = hppfs_dir_open, |
600 | .fsync = hppfs_fsync, | 600 | .fsync = hppfs_fsync, |
601 | .llseek = default_llseek, | ||
601 | }; | 602 | }; |
602 | 603 | ||
603 | static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) | 604 | static int hppfs_statfs(struct dentry *dentry, struct kstatfs *sf) |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 6e5bd42f3860..b14be3f781c7 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/statfs.h> | 31 | #include <linux/statfs.h> |
32 | #include <linux/security.h> | 32 | #include <linux/security.h> |
33 | #include <linux/magic.h> | 33 | #include <linux/magic.h> |
34 | #include <linux/migrate.h> | ||
34 | 35 | ||
35 | #include <asm/uaccess.h> | 36 | #include <asm/uaccess.h> |
36 | 37 | ||
@@ -455,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, | |||
455 | inode = new_inode(sb); | 456 | inode = new_inode(sb); |
456 | if (inode) { | 457 | if (inode) { |
457 | struct hugetlbfs_inode_info *info; | 458 | struct hugetlbfs_inode_info *info; |
459 | inode->i_ino = get_next_ino(); | ||
458 | inode->i_mode = mode; | 460 | inode->i_mode = mode; |
459 | inode->i_uid = uid; | 461 | inode->i_uid = uid; |
460 | inode->i_gid = gid; | 462 | inode->i_gid = gid; |
@@ -573,6 +575,19 @@ static int hugetlbfs_set_page_dirty(struct page *page) | |||
573 | return 0; | 575 | return 0; |
574 | } | 576 | } |
575 | 577 | ||
578 | static int hugetlbfs_migrate_page(struct address_space *mapping, | ||
579 | struct page *newpage, struct page *page) | ||
580 | { | ||
581 | int rc; | ||
582 | |||
583 | rc = migrate_huge_page_move_mapping(mapping, newpage, page); | ||
584 | if (rc) | ||
585 | return rc; | ||
586 | migrate_page_copy(newpage, page); | ||
587 | |||
588 | return 0; | ||
589 | } | ||
590 | |||
576 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 591 | static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
577 | { | 592 | { |
578 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); | 593 | struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(dentry->d_sb); |
@@ -659,6 +674,7 @@ static const struct address_space_operations hugetlbfs_aops = { | |||
659 | .write_begin = hugetlbfs_write_begin, | 674 | .write_begin = hugetlbfs_write_begin, |
660 | .write_end = hugetlbfs_write_end, | 675 | .write_end = hugetlbfs_write_end, |
661 | .set_page_dirty = hugetlbfs_set_page_dirty, | 676 | .set_page_dirty = hugetlbfs_set_page_dirty, |
677 | .migratepage = hugetlbfs_migrate_page, | ||
662 | }; | 678 | }; |
663 | 679 | ||
664 | 680 | ||
@@ -674,6 +690,7 @@ const struct file_operations hugetlbfs_file_operations = { | |||
674 | .mmap = hugetlbfs_file_mmap, | 690 | .mmap = hugetlbfs_file_mmap, |
675 | .fsync = noop_fsync, | 691 | .fsync = noop_fsync, |
676 | .get_unmapped_area = hugetlb_get_unmapped_area, | 692 | .get_unmapped_area = hugetlb_get_unmapped_area, |
693 | .llseek = default_llseek, | ||
677 | }; | 694 | }; |
678 | 695 | ||
679 | static const struct inode_operations hugetlbfs_dir_inode_operations = { | 696 | static const struct inode_operations hugetlbfs_dir_inode_operations = { |
diff --git a/fs/inode.c b/fs/inode.c index 86464332e590..ae2727ab0c3a 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -24,11 +24,11 @@ | |||
24 | #include <linux/mount.h> | 24 | #include <linux/mount.h> |
25 | #include <linux/async.h> | 25 | #include <linux/async.h> |
26 | #include <linux/posix_acl.h> | 26 | #include <linux/posix_acl.h> |
27 | #include <linux/ima.h> | ||
27 | 28 | ||
28 | /* | 29 | /* |
29 | * This is needed for the following functions: | 30 | * This is needed for the following functions: |
30 | * - inode_has_buffers | 31 | * - inode_has_buffers |
31 | * - invalidate_inode_buffers | ||
32 | * - invalidate_bdev | 32 | * - invalidate_bdev |
33 | * | 33 | * |
34 | * FIXME: remove all knowledge of the buffer layer from this file | 34 | * FIXME: remove all knowledge of the buffer layer from this file |
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; | |||
72 | * allowing for low-overhead inode sync() operations. | 72 | * allowing for low-overhead inode sync() operations. |
73 | */ | 73 | */ |
74 | 74 | ||
75 | LIST_HEAD(inode_in_use); | 75 | static LIST_HEAD(inode_lru); |
76 | LIST_HEAD(inode_unused); | ||
77 | static struct hlist_head *inode_hashtable __read_mostly; | 76 | static struct hlist_head *inode_hashtable __read_mostly; |
78 | 77 | ||
79 | /* | 78 | /* |
@@ -103,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem); | |||
103 | */ | 102 | */ |
104 | struct inodes_stat_t inodes_stat; | 103 | struct inodes_stat_t inodes_stat; |
105 | 104 | ||
105 | static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; | ||
106 | static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; | ||
107 | |||
106 | static struct kmem_cache *inode_cachep __read_mostly; | 108 | static struct kmem_cache *inode_cachep __read_mostly; |
107 | 109 | ||
110 | static inline int get_nr_inodes(void) | ||
111 | { | ||
112 | return percpu_counter_sum_positive(&nr_inodes); | ||
113 | } | ||
114 | |||
115 | static inline int get_nr_inodes_unused(void) | ||
116 | { | ||
117 | return percpu_counter_sum_positive(&nr_inodes_unused); | ||
118 | } | ||
119 | |||
120 | int get_nr_dirty_inodes(void) | ||
121 | { | ||
122 | int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); | ||
123 | return nr_dirty > 0 ? nr_dirty : 0; | ||
124 | |||
125 | } | ||
126 | |||
127 | /* | ||
128 | * Handle nr_inode sysctl | ||
129 | */ | ||
130 | #ifdef CONFIG_SYSCTL | ||
131 | int proc_nr_inodes(ctl_table *table, int write, | ||
132 | void __user *buffer, size_t *lenp, loff_t *ppos) | ||
133 | { | ||
134 | inodes_stat.nr_inodes = get_nr_inodes(); | ||
135 | inodes_stat.nr_unused = get_nr_inodes_unused(); | ||
136 | return proc_dointvec(table, write, buffer, lenp, ppos); | ||
137 | } | ||
138 | #endif | ||
139 | |||
108 | static void wake_up_inode(struct inode *inode) | 140 | static void wake_up_inode(struct inode *inode) |
109 | { | 141 | { |
110 | /* | 142 | /* |
@@ -192,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) | |||
192 | inode->i_fsnotify_mask = 0; | 224 | inode->i_fsnotify_mask = 0; |
193 | #endif | 225 | #endif |
194 | 226 | ||
227 | percpu_counter_inc(&nr_inodes); | ||
228 | |||
195 | return 0; | 229 | return 0; |
196 | out: | 230 | out: |
197 | return -ENOMEM; | 231 | return -ENOMEM; |
@@ -232,11 +266,13 @@ void __destroy_inode(struct inode *inode) | |||
232 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) | 266 | if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) |
233 | posix_acl_release(inode->i_default_acl); | 267 | posix_acl_release(inode->i_default_acl); |
234 | #endif | 268 | #endif |
269 | percpu_counter_dec(&nr_inodes); | ||
235 | } | 270 | } |
236 | EXPORT_SYMBOL(__destroy_inode); | 271 | EXPORT_SYMBOL(__destroy_inode); |
237 | 272 | ||
238 | void destroy_inode(struct inode *inode) | 273 | static void destroy_inode(struct inode *inode) |
239 | { | 274 | { |
275 | BUG_ON(!list_empty(&inode->i_lru)); | ||
240 | __destroy_inode(inode); | 276 | __destroy_inode(inode); |
241 | if (inode->i_sb->s_op->destroy_inode) | 277 | if (inode->i_sb->s_op->destroy_inode) |
242 | inode->i_sb->s_op->destroy_inode(inode); | 278 | inode->i_sb->s_op->destroy_inode(inode); |
@@ -255,6 +291,8 @@ void inode_init_once(struct inode *inode) | |||
255 | INIT_HLIST_NODE(&inode->i_hash); | 291 | INIT_HLIST_NODE(&inode->i_hash); |
256 | INIT_LIST_HEAD(&inode->i_dentry); | 292 | INIT_LIST_HEAD(&inode->i_dentry); |
257 | INIT_LIST_HEAD(&inode->i_devices); | 293 | INIT_LIST_HEAD(&inode->i_devices); |
294 | INIT_LIST_HEAD(&inode->i_wb_list); | ||
295 | INIT_LIST_HEAD(&inode->i_lru); | ||
258 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); | 296 | INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); |
259 | spin_lock_init(&inode->i_data.tree_lock); | 297 | spin_lock_init(&inode->i_data.tree_lock); |
260 | spin_lock_init(&inode->i_data.i_mmap_lock); | 298 | spin_lock_init(&inode->i_data.i_mmap_lock); |
@@ -281,14 +319,109 @@ static void init_once(void *foo) | |||
281 | */ | 319 | */ |
282 | void __iget(struct inode *inode) | 320 | void __iget(struct inode *inode) |
283 | { | 321 | { |
284 | if (atomic_inc_return(&inode->i_count) != 1) | 322 | atomic_inc(&inode->i_count); |
285 | return; | 323 | } |
324 | |||
325 | /* | ||
326 | * get additional reference to inode; caller must already hold one. | ||
327 | */ | ||
328 | void ihold(struct inode *inode) | ||
329 | { | ||
330 | WARN_ON(atomic_inc_return(&inode->i_count) < 2); | ||
331 | } | ||
332 | EXPORT_SYMBOL(ihold); | ||
333 | |||
334 | static void inode_lru_list_add(struct inode *inode) | ||
335 | { | ||
336 | if (list_empty(&inode->i_lru)) { | ||
337 | list_add(&inode->i_lru, &inode_lru); | ||
338 | percpu_counter_inc(&nr_inodes_unused); | ||
339 | } | ||
340 | } | ||
286 | 341 | ||
287 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | 342 | static void inode_lru_list_del(struct inode *inode) |
288 | list_move(&inode->i_list, &inode_in_use); | 343 | { |
289 | inodes_stat.nr_unused--; | 344 | if (!list_empty(&inode->i_lru)) { |
345 | list_del_init(&inode->i_lru); | ||
346 | percpu_counter_dec(&nr_inodes_unused); | ||
347 | } | ||
348 | } | ||
349 | |||
350 | static inline void __inode_sb_list_add(struct inode *inode) | ||
351 | { | ||
352 | list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); | ||
290 | } | 353 | } |
291 | 354 | ||
355 | /** | ||
356 | * inode_sb_list_add - add inode to the superblock list of inodes | ||
357 | * @inode: inode to add | ||
358 | */ | ||
359 | void inode_sb_list_add(struct inode *inode) | ||
360 | { | ||
361 | spin_lock(&inode_lock); | ||
362 | __inode_sb_list_add(inode); | ||
363 | spin_unlock(&inode_lock); | ||
364 | } | ||
365 | EXPORT_SYMBOL_GPL(inode_sb_list_add); | ||
366 | |||
367 | static inline void __inode_sb_list_del(struct inode *inode) | ||
368 | { | ||
369 | list_del_init(&inode->i_sb_list); | ||
370 | } | ||
371 | |||
372 | static unsigned long hash(struct super_block *sb, unsigned long hashval) | ||
373 | { | ||
374 | unsigned long tmp; | ||
375 | |||
376 | tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / | ||
377 | L1_CACHE_BYTES; | ||
378 | tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); | ||
379 | return tmp & I_HASHMASK; | ||
380 | } | ||
381 | |||
382 | /** | ||
383 | * __insert_inode_hash - hash an inode | ||
384 | * @inode: unhashed inode | ||
385 | * @hashval: unsigned long value used to locate this object in the | ||
386 | * inode_hashtable. | ||
387 | * | ||
388 | * Add an inode to the inode hash for this superblock. | ||
389 | */ | ||
390 | void __insert_inode_hash(struct inode *inode, unsigned long hashval) | ||
391 | { | ||
392 | struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); | ||
393 | |||
394 | spin_lock(&inode_lock); | ||
395 | hlist_add_head(&inode->i_hash, b); | ||
396 | spin_unlock(&inode_lock); | ||
397 | } | ||
398 | EXPORT_SYMBOL(__insert_inode_hash); | ||
399 | |||
400 | /** | ||
401 | * __remove_inode_hash - remove an inode from the hash | ||
402 | * @inode: inode to unhash | ||
403 | * | ||
404 | * Remove an inode from the superblock. | ||
405 | */ | ||
406 | static void __remove_inode_hash(struct inode *inode) | ||
407 | { | ||
408 | hlist_del_init(&inode->i_hash); | ||
409 | } | ||
410 | |||
411 | /** | ||
412 | * remove_inode_hash - remove an inode from the hash | ||
413 | * @inode: inode to unhash | ||
414 | * | ||
415 | * Remove an inode from the superblock. | ||
416 | */ | ||
417 | void remove_inode_hash(struct inode *inode) | ||
418 | { | ||
419 | spin_lock(&inode_lock); | ||
420 | hlist_del_init(&inode->i_hash); | ||
421 | spin_unlock(&inode_lock); | ||
422 | } | ||
423 | EXPORT_SYMBOL(remove_inode_hash); | ||
424 | |||
292 | void end_writeback(struct inode *inode) | 425 | void end_writeback(struct inode *inode) |
293 | { | 426 | { |
294 | might_sleep(); | 427 | might_sleep(); |
@@ -327,101 +460,113 @@ static void evict(struct inode *inode) | |||
327 | */ | 460 | */ |
328 | static void dispose_list(struct list_head *head) | 461 | static void dispose_list(struct list_head *head) |
329 | { | 462 | { |
330 | int nr_disposed = 0; | ||
331 | |||
332 | while (!list_empty(head)) { | 463 | while (!list_empty(head)) { |
333 | struct inode *inode; | 464 | struct inode *inode; |
334 | 465 | ||
335 | inode = list_first_entry(head, struct inode, i_list); | 466 | inode = list_first_entry(head, struct inode, i_lru); |
336 | list_del(&inode->i_list); | 467 | list_del_init(&inode->i_lru); |
337 | 468 | ||
338 | evict(inode); | 469 | evict(inode); |
339 | 470 | ||
340 | spin_lock(&inode_lock); | 471 | spin_lock(&inode_lock); |
341 | hlist_del_init(&inode->i_hash); | 472 | __remove_inode_hash(inode); |
342 | list_del_init(&inode->i_sb_list); | 473 | __inode_sb_list_del(inode); |
343 | spin_unlock(&inode_lock); | 474 | spin_unlock(&inode_lock); |
344 | 475 | ||
345 | wake_up_inode(inode); | 476 | wake_up_inode(inode); |
346 | destroy_inode(inode); | 477 | destroy_inode(inode); |
347 | nr_disposed++; | ||
348 | } | 478 | } |
349 | spin_lock(&inode_lock); | ||
350 | inodes_stat.nr_inodes -= nr_disposed; | ||
351 | spin_unlock(&inode_lock); | ||
352 | } | 479 | } |
353 | 480 | ||
354 | /* | 481 | /** |
355 | * Invalidate all inodes for a device. | 482 | * evict_inodes - evict all evictable inodes for a superblock |
483 | * @sb: superblock to operate on | ||
484 | * | ||
485 | * Make sure that no inodes with zero refcount are retained. This is | ||
486 | * called by superblock shutdown after having MS_ACTIVE flag removed, | ||
487 | * so any inode reaching zero refcount during or after that call will | ||
488 | * be immediately evicted. | ||
356 | */ | 489 | */ |
357 | static int invalidate_list(struct list_head *head, struct list_head *dispose) | 490 | void evict_inodes(struct super_block *sb) |
358 | { | 491 | { |
359 | struct list_head *next; | 492 | struct inode *inode, *next; |
360 | int busy = 0, count = 0; | 493 | LIST_HEAD(dispose); |
361 | |||
362 | next = head->next; | ||
363 | for (;;) { | ||
364 | struct list_head *tmp = next; | ||
365 | struct inode *inode; | ||
366 | 494 | ||
367 | /* | 495 | down_write(&iprune_sem); |
368 | * We can reschedule here without worrying about the list's | ||
369 | * consistency because the per-sb list of inodes must not | ||
370 | * change during umount anymore, and because iprune_sem keeps | ||
371 | * shrink_icache_memory() away. | ||
372 | */ | ||
373 | cond_resched_lock(&inode_lock); | ||
374 | 496 | ||
375 | next = next->next; | 497 | spin_lock(&inode_lock); |
376 | if (tmp == head) | 498 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
377 | break; | 499 | if (atomic_read(&inode->i_count)) |
378 | inode = list_entry(tmp, struct inode, i_sb_list); | ||
379 | if (inode->i_state & I_NEW) | ||
380 | continue; | 500 | continue; |
381 | invalidate_inode_buffers(inode); | 501 | |
382 | if (!atomic_read(&inode->i_count)) { | 502 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { |
383 | list_move(&inode->i_list, dispose); | 503 | WARN_ON(1); |
384 | WARN_ON(inode->i_state & I_NEW); | ||
385 | inode->i_state |= I_FREEING; | ||
386 | count++; | ||
387 | continue; | 504 | continue; |
388 | } | 505 | } |
389 | busy = 1; | 506 | |
507 | inode->i_state |= I_FREEING; | ||
508 | |||
509 | /* | ||
510 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
511 | * set so that it won't get moved back on there if it is dirty. | ||
512 | */ | ||
513 | list_move(&inode->i_lru, &dispose); | ||
514 | list_del_init(&inode->i_wb_list); | ||
515 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
516 | percpu_counter_dec(&nr_inodes_unused); | ||
390 | } | 517 | } |
391 | /* only unused inodes may be cached with i_count zero */ | 518 | spin_unlock(&inode_lock); |
392 | inodes_stat.nr_unused -= count; | 519 | |
393 | return busy; | 520 | dispose_list(&dispose); |
521 | up_write(&iprune_sem); | ||
394 | } | 522 | } |
395 | 523 | ||
396 | /** | 524 | /** |
397 | * invalidate_inodes - discard the inodes on a device | 525 | * invalidate_inodes - attempt to free all inodes on a superblock |
398 | * @sb: superblock | 526 | * @sb: superblock to operate on |
399 | * | 527 | * |
400 | * Discard all of the inodes for a given superblock. If the discard | 528 | * Attempts to free all inodes for a given superblock. If there were any |
401 | * fails because there are busy inodes then a non zero value is returned. | 529 | * busy inodes return a non-zero value, else zero. |
402 | * If the discard is successful all the inodes have been discarded. | ||
403 | */ | 530 | */ |
404 | int invalidate_inodes(struct super_block *sb) | 531 | int invalidate_inodes(struct super_block *sb) |
405 | { | 532 | { |
406 | int busy; | 533 | int busy = 0; |
407 | LIST_HEAD(throw_away); | 534 | struct inode *inode, *next; |
535 | LIST_HEAD(dispose); | ||
408 | 536 | ||
409 | down_write(&iprune_sem); | 537 | down_write(&iprune_sem); |
538 | |||
410 | spin_lock(&inode_lock); | 539 | spin_lock(&inode_lock); |
411 | fsnotify_unmount_inodes(&sb->s_inodes); | 540 | list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { |
412 | busy = invalidate_list(&sb->s_inodes, &throw_away); | 541 | if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) |
542 | continue; | ||
543 | if (atomic_read(&inode->i_count)) { | ||
544 | busy = 1; | ||
545 | continue; | ||
546 | } | ||
547 | |||
548 | inode->i_state |= I_FREEING; | ||
549 | |||
550 | /* | ||
551 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
552 | * set so that it won't get moved back on there if it is dirty. | ||
553 | */ | ||
554 | list_move(&inode->i_lru, &dispose); | ||
555 | list_del_init(&inode->i_wb_list); | ||
556 | if (!(inode->i_state & (I_DIRTY | I_SYNC))) | ||
557 | percpu_counter_dec(&nr_inodes_unused); | ||
558 | } | ||
413 | spin_unlock(&inode_lock); | 559 | spin_unlock(&inode_lock); |
414 | 560 | ||
415 | dispose_list(&throw_away); | 561 | dispose_list(&dispose); |
416 | up_write(&iprune_sem); | 562 | up_write(&iprune_sem); |
417 | 563 | ||
418 | return busy; | 564 | return busy; |
419 | } | 565 | } |
420 | EXPORT_SYMBOL(invalidate_inodes); | ||
421 | 566 | ||
422 | static int can_unuse(struct inode *inode) | 567 | static int can_unuse(struct inode *inode) |
423 | { | 568 | { |
424 | if (inode->i_state) | 569 | if (inode->i_state & ~I_REFERENCED) |
425 | return 0; | 570 | return 0; |
426 | if (inode_has_buffers(inode)) | 571 | if (inode_has_buffers(inode)) |
427 | return 0; | 572 | return 0; |
@@ -433,22 +578,24 @@ static int can_unuse(struct inode *inode) | |||
433 | } | 578 | } |
434 | 579 | ||
435 | /* | 580 | /* |
436 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to | 581 | * Scan `goal' inodes on the unused list for freeable ones. They are moved to a |
437 | * a temporary list and then are freed outside inode_lock by dispose_list(). | 582 | * temporary list and then are freed outside inode_lock by dispose_list(). |
438 | * | 583 | * |
439 | * Any inodes which are pinned purely because of attached pagecache have their | 584 | * Any inodes which are pinned purely because of attached pagecache have their |
440 | * pagecache removed. We expect the final iput() on that inode to add it to | 585 | * pagecache removed. If the inode has metadata buffers attached to |
441 | * the front of the inode_unused list. So look for it there and if the | 586 | * mapping->private_list then try to remove them. |
442 | * inode is still freeable, proceed. The right inode is found 99.9% of the | ||
443 | * time in testing on a 4-way. | ||
444 | * | 587 | * |
445 | * If the inode has metadata buffers attached to mapping->private_list then | 588 | * If the inode has the I_REFERENCED flag set, then it means that it has been |
446 | * try to remove them. | 589 | * used recently - the flag is set in iput_final(). When we encounter such an |
590 | * inode, clear the flag and move it to the back of the LRU so it gets another | ||
591 | * pass through the LRU before it gets reclaimed. This is necessary because of | ||
592 | * the fact we are doing lazy LRU updates to minimise lock contention so the | ||
593 | * LRU does not have strict ordering. Hence we don't want to reclaim inodes | ||
594 | * with this flag set because they are the inodes that are out of order. | ||
447 | */ | 595 | */ |
448 | static void prune_icache(int nr_to_scan) | 596 | static void prune_icache(int nr_to_scan) |
449 | { | 597 | { |
450 | LIST_HEAD(freeable); | 598 | LIST_HEAD(freeable); |
451 | int nr_pruned = 0; | ||
452 | int nr_scanned; | 599 | int nr_scanned; |
453 | unsigned long reap = 0; | 600 | unsigned long reap = 0; |
454 | 601 | ||
@@ -457,13 +604,26 @@ static void prune_icache(int nr_to_scan) | |||
457 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { | 604 | for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { |
458 | struct inode *inode; | 605 | struct inode *inode; |
459 | 606 | ||
460 | if (list_empty(&inode_unused)) | 607 | if (list_empty(&inode_lru)) |
461 | break; | 608 | break; |
462 | 609 | ||
463 | inode = list_entry(inode_unused.prev, struct inode, i_list); | 610 | inode = list_entry(inode_lru.prev, struct inode, i_lru); |
464 | 611 | ||
465 | if (inode->i_state || atomic_read(&inode->i_count)) { | 612 | /* |
466 | list_move(&inode->i_list, &inode_unused); | 613 | * Referenced or dirty inodes are still in use. Give them |
614 | * another pass through the LRU as we canot reclaim them now. | ||
615 | */ | ||
616 | if (atomic_read(&inode->i_count) || | ||
617 | (inode->i_state & ~I_REFERENCED)) { | ||
618 | list_del_init(&inode->i_lru); | ||
619 | percpu_counter_dec(&nr_inodes_unused); | ||
620 | continue; | ||
621 | } | ||
622 | |||
623 | /* recently referenced inodes get one more pass */ | ||
624 | if (inode->i_state & I_REFERENCED) { | ||
625 | list_move(&inode->i_lru, &inode_lru); | ||
626 | inode->i_state &= ~I_REFERENCED; | ||
467 | continue; | 627 | continue; |
468 | } | 628 | } |
469 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { | 629 | if (inode_has_buffers(inode) || inode->i_data.nrpages) { |
@@ -475,18 +635,23 @@ static void prune_icache(int nr_to_scan) | |||
475 | iput(inode); | 635 | iput(inode); |
476 | spin_lock(&inode_lock); | 636 | spin_lock(&inode_lock); |
477 | 637 | ||
478 | if (inode != list_entry(inode_unused.next, | 638 | if (inode != list_entry(inode_lru.next, |
479 | struct inode, i_list)) | 639 | struct inode, i_lru)) |
480 | continue; /* wrong inode or list_empty */ | 640 | continue; /* wrong inode or list_empty */ |
481 | if (!can_unuse(inode)) | 641 | if (!can_unuse(inode)) |
482 | continue; | 642 | continue; |
483 | } | 643 | } |
484 | list_move(&inode->i_list, &freeable); | ||
485 | WARN_ON(inode->i_state & I_NEW); | 644 | WARN_ON(inode->i_state & I_NEW); |
486 | inode->i_state |= I_FREEING; | 645 | inode->i_state |= I_FREEING; |
487 | nr_pruned++; | 646 | |
647 | /* | ||
648 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
649 | * set so that it won't get moved back on there if it is dirty. | ||
650 | */ | ||
651 | list_move(&inode->i_lru, &freeable); | ||
652 | list_del_init(&inode->i_wb_list); | ||
653 | percpu_counter_dec(&nr_inodes_unused); | ||
488 | } | 654 | } |
489 | inodes_stat.nr_unused -= nr_pruned; | ||
490 | if (current_is_kswapd()) | 655 | if (current_is_kswapd()) |
491 | __count_vm_events(KSWAPD_INODESTEAL, reap); | 656 | __count_vm_events(KSWAPD_INODESTEAL, reap); |
492 | else | 657 | else |
@@ -518,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | |||
518 | return -1; | 683 | return -1; |
519 | prune_icache(nr); | 684 | prune_icache(nr); |
520 | } | 685 | } |
521 | return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; | 686 | return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; |
522 | } | 687 | } |
523 | 688 | ||
524 | static struct shrinker icache_shrinker = { | 689 | static struct shrinker icache_shrinker = { |
@@ -529,9 +694,6 @@ static struct shrinker icache_shrinker = { | |||
529 | static void __wait_on_freeing_inode(struct inode *inode); | 694 | static void __wait_on_freeing_inode(struct inode *inode); |
530 | /* | 695 | /* |
531 | * Called with the inode lock held. | 696 | * Called with the inode lock held. |
532 | * NOTE: we are not increasing the inode-refcount, you must call __iget() | ||
533 | * by hand after calling find_inode now! This simplifies iunique and won't | ||
534 | * add any additional branch in the common code. | ||
535 | */ | 697 | */ |
536 | static struct inode *find_inode(struct super_block *sb, | 698 | static struct inode *find_inode(struct super_block *sb, |
537 | struct hlist_head *head, | 699 | struct hlist_head *head, |
@@ -551,9 +713,10 @@ repeat: | |||
551 | __wait_on_freeing_inode(inode); | 713 | __wait_on_freeing_inode(inode); |
552 | goto repeat; | 714 | goto repeat; |
553 | } | 715 | } |
554 | break; | 716 | __iget(inode); |
717 | return inode; | ||
555 | } | 718 | } |
556 | return node ? inode : NULL; | 719 | return NULL; |
557 | } | 720 | } |
558 | 721 | ||
559 | /* | 722 | /* |
@@ -576,53 +739,49 @@ repeat: | |||
576 | __wait_on_freeing_inode(inode); | 739 | __wait_on_freeing_inode(inode); |
577 | goto repeat; | 740 | goto repeat; |
578 | } | 741 | } |
579 | break; | 742 | __iget(inode); |
743 | return inode; | ||
580 | } | 744 | } |
581 | return node ? inode : NULL; | 745 | return NULL; |
582 | } | ||
583 | |||
584 | static unsigned long hash(struct super_block *sb, unsigned long hashval) | ||
585 | { | ||
586 | unsigned long tmp; | ||
587 | |||
588 | tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / | ||
589 | L1_CACHE_BYTES; | ||
590 | tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); | ||
591 | return tmp & I_HASHMASK; | ||
592 | } | ||
593 | |||
594 | static inline void | ||
595 | __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, | ||
596 | struct inode *inode) | ||
597 | { | ||
598 | inodes_stat.nr_inodes++; | ||
599 | list_add(&inode->i_list, &inode_in_use); | ||
600 | list_add(&inode->i_sb_list, &sb->s_inodes); | ||
601 | if (head) | ||
602 | hlist_add_head(&inode->i_hash, head); | ||
603 | } | 746 | } |
604 | 747 | ||
605 | /** | 748 | /* |
606 | * inode_add_to_lists - add a new inode to relevant lists | 749 | * Each cpu owns a range of LAST_INO_BATCH numbers. |
607 | * @sb: superblock inode belongs to | 750 | * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, |
608 | * @inode: inode to mark in use | 751 | * to renew the exhausted range. |
609 | * | 752 | * |
610 | * When an inode is allocated it needs to be accounted for, added to the in use | 753 | * This does not significantly increase overflow rate because every CPU can |
611 | * list, the owning superblock and the inode hash. This needs to be done under | 754 | * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is |
612 | * the inode_lock, so export a function to do this rather than the inode lock | 755 | * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the |
613 | * itself. We calculate the hash list to add to here so it is all internal | 756 | * 2^32 range, and is a worst-case. Even a 50% wastage would only increase |
614 | * which requires the caller to have already set up the inode number in the | 757 | * overflow rate by 2x, which does not seem too significant. |
615 | * inode to add. | 758 | * |
759 | * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW | ||
760 | * error if st_ino won't fit in target struct field. Use 32bit counter | ||
761 | * here to attempt to avoid that. | ||
616 | */ | 762 | */ |
617 | void inode_add_to_lists(struct super_block *sb, struct inode *inode) | 763 | #define LAST_INO_BATCH 1024 |
764 | static DEFINE_PER_CPU(unsigned int, last_ino); | ||
765 | |||
766 | unsigned int get_next_ino(void) | ||
618 | { | 767 | { |
619 | struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); | 768 | unsigned int *p = &get_cpu_var(last_ino); |
769 | unsigned int res = *p; | ||
620 | 770 | ||
621 | spin_lock(&inode_lock); | 771 | #ifdef CONFIG_SMP |
622 | __inode_add_to_lists(sb, head, inode); | 772 | if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { |
623 | spin_unlock(&inode_lock); | 773 | static atomic_t shared_last_ino; |
774 | int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); | ||
775 | |||
776 | res = next - LAST_INO_BATCH; | ||
777 | } | ||
778 | #endif | ||
779 | |||
780 | *p = ++res; | ||
781 | put_cpu_var(last_ino); | ||
782 | return res; | ||
624 | } | 783 | } |
625 | EXPORT_SYMBOL_GPL(inode_add_to_lists); | 784 | EXPORT_SYMBOL(get_next_ino); |
626 | 785 | ||
627 | /** | 786 | /** |
628 | * new_inode - obtain an inode | 787 | * new_inode - obtain an inode |
@@ -638,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists); | |||
638 | */ | 797 | */ |
639 | struct inode *new_inode(struct super_block *sb) | 798 | struct inode *new_inode(struct super_block *sb) |
640 | { | 799 | { |
641 | /* | ||
642 | * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW | ||
643 | * error if st_ino won't fit in target struct field. Use 32bit counter | ||
644 | * here to attempt to avoid that. | ||
645 | */ | ||
646 | static unsigned int last_ino; | ||
647 | struct inode *inode; | 800 | struct inode *inode; |
648 | 801 | ||
649 | spin_lock_prefetch(&inode_lock); | 802 | spin_lock_prefetch(&inode_lock); |
@@ -651,8 +804,7 @@ struct inode *new_inode(struct super_block *sb) | |||
651 | inode = alloc_inode(sb); | 804 | inode = alloc_inode(sb); |
652 | if (inode) { | 805 | if (inode) { |
653 | spin_lock(&inode_lock); | 806 | spin_lock(&inode_lock); |
654 | __inode_add_to_lists(sb, NULL, inode); | 807 | __inode_sb_list_add(inode); |
655 | inode->i_ino = ++last_ino; | ||
656 | inode->i_state = 0; | 808 | inode->i_state = 0; |
657 | spin_unlock(&inode_lock); | 809 | spin_unlock(&inode_lock); |
658 | } | 810 | } |
@@ -663,7 +815,7 @@ EXPORT_SYMBOL(new_inode); | |||
663 | void unlock_new_inode(struct inode *inode) | 815 | void unlock_new_inode(struct inode *inode) |
664 | { | 816 | { |
665 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 817 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
666 | if (inode->i_mode & S_IFDIR) { | 818 | if (S_ISDIR(inode->i_mode)) { |
667 | struct file_system_type *type = inode->i_sb->s_type; | 819 | struct file_system_type *type = inode->i_sb->s_type; |
668 | 820 | ||
669 | /* Set new key only if filesystem hasn't already changed it */ | 821 | /* Set new key only if filesystem hasn't already changed it */ |
@@ -720,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
720 | if (set(inode, data)) | 872 | if (set(inode, data)) |
721 | goto set_failed; | 873 | goto set_failed; |
722 | 874 | ||
723 | __inode_add_to_lists(sb, head, inode); | 875 | hlist_add_head(&inode->i_hash, head); |
876 | __inode_sb_list_add(inode); | ||
724 | inode->i_state = I_NEW; | 877 | inode->i_state = I_NEW; |
725 | spin_unlock(&inode_lock); | 878 | spin_unlock(&inode_lock); |
726 | 879 | ||
@@ -735,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb, | |||
735 | * us. Use the old inode instead of the one we just | 888 | * us. Use the old inode instead of the one we just |
736 | * allocated. | 889 | * allocated. |
737 | */ | 890 | */ |
738 | __iget(old); | ||
739 | spin_unlock(&inode_lock); | 891 | spin_unlock(&inode_lock); |
740 | destroy_inode(inode); | 892 | destroy_inode(inode); |
741 | inode = old; | 893 | inode = old; |
@@ -767,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb, | |||
767 | old = find_inode_fast(sb, head, ino); | 919 | old = find_inode_fast(sb, head, ino); |
768 | if (!old) { | 920 | if (!old) { |
769 | inode->i_ino = ino; | 921 | inode->i_ino = ino; |
770 | __inode_add_to_lists(sb, head, inode); | 922 | hlist_add_head(&inode->i_hash, head); |
923 | __inode_sb_list_add(inode); | ||
771 | inode->i_state = I_NEW; | 924 | inode->i_state = I_NEW; |
772 | spin_unlock(&inode_lock); | 925 | spin_unlock(&inode_lock); |
773 | 926 | ||
@@ -782,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb, | |||
782 | * us. Use the old inode instead of the one we just | 935 | * us. Use the old inode instead of the one we just |
783 | * allocated. | 936 | * allocated. |
784 | */ | 937 | */ |
785 | __iget(old); | ||
786 | spin_unlock(&inode_lock); | 938 | spin_unlock(&inode_lock); |
787 | destroy_inode(inode); | 939 | destroy_inode(inode); |
788 | inode = old; | 940 | inode = old; |
@@ -791,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb, | |||
791 | return inode; | 943 | return inode; |
792 | } | 944 | } |
793 | 945 | ||
946 | /* | ||
947 | * search the inode cache for a matching inode number. | ||
948 | * If we find one, then the inode number we are trying to | ||
949 | * allocate is not unique and so we should not use it. | ||
950 | * | ||
951 | * Returns 1 if the inode number is unique, 0 if it is not. | ||
952 | */ | ||
953 | static int test_inode_iunique(struct super_block *sb, unsigned long ino) | ||
954 | { | ||
955 | struct hlist_head *b = inode_hashtable + hash(sb, ino); | ||
956 | struct hlist_node *node; | ||
957 | struct inode *inode; | ||
958 | |||
959 | hlist_for_each_entry(inode, node, b, i_hash) { | ||
960 | if (inode->i_ino == ino && inode->i_sb == sb) | ||
961 | return 0; | ||
962 | } | ||
963 | |||
964 | return 1; | ||
965 | } | ||
966 | |||
794 | /** | 967 | /** |
795 | * iunique - get a unique inode number | 968 | * iunique - get a unique inode number |
796 | * @sb: superblock | 969 | * @sb: superblock |
@@ -812,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) | |||
812 | * error if st_ino won't fit in target struct field. Use 32bit counter | 985 | * error if st_ino won't fit in target struct field. Use 32bit counter |
813 | * here to attempt to avoid that. | 986 | * here to attempt to avoid that. |
814 | */ | 987 | */ |
988 | static DEFINE_SPINLOCK(iunique_lock); | ||
815 | static unsigned int counter; | 989 | static unsigned int counter; |
816 | struct inode *inode; | ||
817 | struct hlist_head *head; | ||
818 | ino_t res; | 990 | ino_t res; |
819 | 991 | ||
820 | spin_lock(&inode_lock); | 992 | spin_lock(&inode_lock); |
993 | spin_lock(&iunique_lock); | ||
821 | do { | 994 | do { |
822 | if (counter <= max_reserved) | 995 | if (counter <= max_reserved) |
823 | counter = max_reserved + 1; | 996 | counter = max_reserved + 1; |
824 | res = counter++; | 997 | res = counter++; |
825 | head = inode_hashtable + hash(sb, res); | 998 | } while (!test_inode_iunique(sb, res)); |
826 | inode = find_inode_fast(sb, head, res); | 999 | spin_unlock(&iunique_lock); |
827 | } while (inode != NULL); | ||
828 | spin_unlock(&inode_lock); | 1000 | spin_unlock(&inode_lock); |
829 | 1001 | ||
830 | return res; | 1002 | return res; |
@@ -876,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb, | |||
876 | spin_lock(&inode_lock); | 1048 | spin_lock(&inode_lock); |
877 | inode = find_inode(sb, head, test, data); | 1049 | inode = find_inode(sb, head, test, data); |
878 | if (inode) { | 1050 | if (inode) { |
879 | __iget(inode); | ||
880 | spin_unlock(&inode_lock); | 1051 | spin_unlock(&inode_lock); |
881 | if (likely(wait)) | 1052 | if (likely(wait)) |
882 | wait_on_inode(inode); | 1053 | wait_on_inode(inode); |
@@ -909,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb, | |||
909 | spin_lock(&inode_lock); | 1080 | spin_lock(&inode_lock); |
910 | inode = find_inode_fast(sb, head, ino); | 1081 | inode = find_inode_fast(sb, head, ino); |
911 | if (inode) { | 1082 | if (inode) { |
912 | __iget(inode); | ||
913 | spin_unlock(&inode_lock); | 1083 | spin_unlock(&inode_lock); |
914 | wait_on_inode(inode); | 1084 | wait_on_inode(inode); |
915 | return inode; | 1085 | return inode; |
@@ -1095,7 +1265,7 @@ int insert_inode_locked(struct inode *inode) | |||
1095 | __iget(old); | 1265 | __iget(old); |
1096 | spin_unlock(&inode_lock); | 1266 | spin_unlock(&inode_lock); |
1097 | wait_on_inode(old); | 1267 | wait_on_inode(old); |
1098 | if (unlikely(!hlist_unhashed(&old->i_hash))) { | 1268 | if (unlikely(!inode_unhashed(old))) { |
1099 | iput(old); | 1269 | iput(old); |
1100 | return -EBUSY; | 1270 | return -EBUSY; |
1101 | } | 1271 | } |
@@ -1134,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, | |||
1134 | __iget(old); | 1304 | __iget(old); |
1135 | spin_unlock(&inode_lock); | 1305 | spin_unlock(&inode_lock); |
1136 | wait_on_inode(old); | 1306 | wait_on_inode(old); |
1137 | if (unlikely(!hlist_unhashed(&old->i_hash))) { | 1307 | if (unlikely(!inode_unhashed(old))) { |
1138 | iput(old); | 1308 | iput(old); |
1139 | return -EBUSY; | 1309 | return -EBUSY; |
1140 | } | 1310 | } |
@@ -1143,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, | |||
1143 | } | 1313 | } |
1144 | EXPORT_SYMBOL(insert_inode_locked4); | 1314 | EXPORT_SYMBOL(insert_inode_locked4); |
1145 | 1315 | ||
1146 | /** | ||
1147 | * __insert_inode_hash - hash an inode | ||
1148 | * @inode: unhashed inode | ||
1149 | * @hashval: unsigned long value used to locate this object in the | ||
1150 | * inode_hashtable. | ||
1151 | * | ||
1152 | * Add an inode to the inode hash for this superblock. | ||
1153 | */ | ||
1154 | void __insert_inode_hash(struct inode *inode, unsigned long hashval) | ||
1155 | { | ||
1156 | struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); | ||
1157 | spin_lock(&inode_lock); | ||
1158 | hlist_add_head(&inode->i_hash, head); | ||
1159 | spin_unlock(&inode_lock); | ||
1160 | } | ||
1161 | EXPORT_SYMBOL(__insert_inode_hash); | ||
1162 | |||
1163 | /** | ||
1164 | * remove_inode_hash - remove an inode from the hash | ||
1165 | * @inode: inode to unhash | ||
1166 | * | ||
1167 | * Remove an inode from the superblock. | ||
1168 | */ | ||
1169 | void remove_inode_hash(struct inode *inode) | ||
1170 | { | ||
1171 | spin_lock(&inode_lock); | ||
1172 | hlist_del_init(&inode->i_hash); | ||
1173 | spin_unlock(&inode_lock); | ||
1174 | } | ||
1175 | EXPORT_SYMBOL(remove_inode_hash); | ||
1176 | 1316 | ||
1177 | int generic_delete_inode(struct inode *inode) | 1317 | int generic_delete_inode(struct inode *inode) |
1178 | { | 1318 | { |
@@ -1187,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode); | |||
1187 | */ | 1327 | */ |
1188 | int generic_drop_inode(struct inode *inode) | 1328 | int generic_drop_inode(struct inode *inode) |
1189 | { | 1329 | { |
1190 | return !inode->i_nlink || hlist_unhashed(&inode->i_hash); | 1330 | return !inode->i_nlink || inode_unhashed(inode); |
1191 | } | 1331 | } |
1192 | EXPORT_SYMBOL_GPL(generic_drop_inode); | 1332 | EXPORT_SYMBOL_GPL(generic_drop_inode); |
1193 | 1333 | ||
@@ -1213,10 +1353,11 @@ static void iput_final(struct inode *inode) | |||
1213 | drop = generic_drop_inode(inode); | 1353 | drop = generic_drop_inode(inode); |
1214 | 1354 | ||
1215 | if (!drop) { | 1355 | if (!drop) { |
1216 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) | ||
1217 | list_move(&inode->i_list, &inode_unused); | ||
1218 | inodes_stat.nr_unused++; | ||
1219 | if (sb->s_flags & MS_ACTIVE) { | 1356 | if (sb->s_flags & MS_ACTIVE) { |
1357 | inode->i_state |= I_REFERENCED; | ||
1358 | if (!(inode->i_state & (I_DIRTY|I_SYNC))) { | ||
1359 | inode_lru_list_add(inode); | ||
1360 | } | ||
1220 | spin_unlock(&inode_lock); | 1361 | spin_unlock(&inode_lock); |
1221 | return; | 1362 | return; |
1222 | } | 1363 | } |
@@ -1227,19 +1368,23 @@ static void iput_final(struct inode *inode) | |||
1227 | spin_lock(&inode_lock); | 1368 | spin_lock(&inode_lock); |
1228 | WARN_ON(inode->i_state & I_NEW); | 1369 | WARN_ON(inode->i_state & I_NEW); |
1229 | inode->i_state &= ~I_WILL_FREE; | 1370 | inode->i_state &= ~I_WILL_FREE; |
1230 | inodes_stat.nr_unused--; | 1371 | __remove_inode_hash(inode); |
1231 | hlist_del_init(&inode->i_hash); | ||
1232 | } | 1372 | } |
1233 | list_del_init(&inode->i_list); | 1373 | |
1234 | list_del_init(&inode->i_sb_list); | ||
1235 | WARN_ON(inode->i_state & I_NEW); | 1374 | WARN_ON(inode->i_state & I_NEW); |
1236 | inode->i_state |= I_FREEING; | 1375 | inode->i_state |= I_FREEING; |
1237 | inodes_stat.nr_inodes--; | 1376 | |
1377 | /* | ||
1378 | * Move the inode off the IO lists and LRU once I_FREEING is | ||
1379 | * set so that it won't get moved back on there if it is dirty. | ||
1380 | */ | ||
1381 | inode_lru_list_del(inode); | ||
1382 | list_del_init(&inode->i_wb_list); | ||
1383 | |||
1384 | __inode_sb_list_del(inode); | ||
1238 | spin_unlock(&inode_lock); | 1385 | spin_unlock(&inode_lock); |
1239 | evict(inode); | 1386 | evict(inode); |
1240 | spin_lock(&inode_lock); | 1387 | remove_inode_hash(inode); |
1241 | hlist_del_init(&inode->i_hash); | ||
1242 | spin_unlock(&inode_lock); | ||
1243 | wake_up_inode(inode); | 1388 | wake_up_inode(inode); |
1244 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); | 1389 | BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); |
1245 | destroy_inode(inode); | 1390 | destroy_inode(inode); |
@@ -1503,6 +1648,8 @@ void __init inode_init(void) | |||
1503 | SLAB_MEM_SPREAD), | 1648 | SLAB_MEM_SPREAD), |
1504 | init_once); | 1649 | init_once); |
1505 | register_shrinker(&icache_shrinker); | 1650 | register_shrinker(&icache_shrinker); |
1651 | percpu_counter_init(&nr_inodes, 0); | ||
1652 | percpu_counter_init(&nr_inodes_unused, 0); | ||
1506 | 1653 | ||
1507 | /* Hash may have been set up in inode_init_early */ | 1654 | /* Hash may have been set up in inode_init_early */ |
1508 | if (!hashdist) | 1655 | if (!hashdist) |
diff --git a/fs/internal.h b/fs/internal.h index a6910e91cee8..ebad3b90752d 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb); | |||
101 | struct nameidata; | 101 | struct nameidata; |
102 | extern struct file *nameidata_to_filp(struct nameidata *); | 102 | extern struct file *nameidata_to_filp(struct nameidata *); |
103 | extern void release_open_intent(struct nameidata *); | 103 | extern void release_open_intent(struct nameidata *); |
104 | |||
105 | /* | ||
106 | * inode.c | ||
107 | */ | ||
108 | extern int get_nr_dirty_inodes(void); | ||
109 | extern int evict_inodes(struct super_block *); | ||
110 | extern int invalidate_inodes(struct super_block *); | ||
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index e0aca9a0ac68..0542b6eedf80 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
@@ -10,7 +10,6 @@ | |||
10 | * | 10 | * |
11 | * isofs directory handling functions | 11 | * isofs directory handling functions |
12 | */ | 12 | */ |
13 | #include <linux/smp_lock.h> | ||
14 | #include <linux/gfp.h> | 13 | #include <linux/gfp.h> |
15 | #include "isofs.h" | 14 | #include "isofs.h" |
16 | 15 | ||
@@ -255,18 +254,19 @@ static int isofs_readdir(struct file *filp, | |||
255 | char *tmpname; | 254 | char *tmpname; |
256 | struct iso_directory_record *tmpde; | 255 | struct iso_directory_record *tmpde; |
257 | struct inode *inode = filp->f_path.dentry->d_inode; | 256 | struct inode *inode = filp->f_path.dentry->d_inode; |
257 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); | ||
258 | 258 | ||
259 | tmpname = (char *)__get_free_page(GFP_KERNEL); | 259 | tmpname = (char *)__get_free_page(GFP_KERNEL); |
260 | if (tmpname == NULL) | 260 | if (tmpname == NULL) |
261 | return -ENOMEM; | 261 | return -ENOMEM; |
262 | 262 | ||
263 | lock_kernel(); | 263 | mutex_lock(&sbi->s_mutex); |
264 | tmpde = (struct iso_directory_record *) (tmpname+1024); | 264 | tmpde = (struct iso_directory_record *) (tmpname+1024); |
265 | 265 | ||
266 | result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); | 266 | result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); |
267 | 267 | ||
268 | free_page((unsigned long) tmpname); | 268 | free_page((unsigned long) tmpname); |
269 | unlock_kernel(); | 269 | mutex_unlock(&sbi->s_mutex); |
270 | return result; | 270 | return result; |
271 | } | 271 | } |
272 | 272 | ||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 5a44811b5027..79cf7f616bbe 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -17,7 +17,6 @@ | |||
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/nls.h> | 18 | #include <linux/nls.h> |
19 | #include <linux/ctype.h> | 19 | #include <linux/ctype.h> |
20 | #include <linux/smp_lock.h> | ||
21 | #include <linux/statfs.h> | 20 | #include <linux/statfs.h> |
22 | #include <linux/cdrom.h> | 21 | #include <linux/cdrom.h> |
23 | #include <linux/parser.h> | 22 | #include <linux/parser.h> |
@@ -44,11 +43,7 @@ static void isofs_put_super(struct super_block *sb) | |||
44 | struct isofs_sb_info *sbi = ISOFS_SB(sb); | 43 | struct isofs_sb_info *sbi = ISOFS_SB(sb); |
45 | 44 | ||
46 | #ifdef CONFIG_JOLIET | 45 | #ifdef CONFIG_JOLIET |
47 | lock_kernel(); | ||
48 | |||
49 | unload_nls(sbi->s_nls_iocharset); | 46 | unload_nls(sbi->s_nls_iocharset); |
50 | |||
51 | unlock_kernel(); | ||
52 | #endif | 47 | #endif |
53 | 48 | ||
54 | kfree(sbi); | 49 | kfree(sbi); |
@@ -549,6 +544,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session) | |||
549 | } | 544 | } |
550 | 545 | ||
551 | /* | 546 | /* |
547 | * Check if root directory is empty (has less than 3 files). | ||
548 | * | ||
549 | * Used to detect broken CDs where ISO root directory is empty but Joliet root | ||
550 | * directory is OK. If such CD has Rock Ridge extensions, they will be disabled | ||
551 | * (and Joliet used instead) or else no files would be visible. | ||
552 | */ | ||
553 | static bool rootdir_empty(struct super_block *sb, unsigned long block) | ||
554 | { | ||
555 | int offset = 0, files = 0, de_len; | ||
556 | struct iso_directory_record *de; | ||
557 | struct buffer_head *bh; | ||
558 | |||
559 | bh = sb_bread(sb, block); | ||
560 | if (!bh) | ||
561 | return true; | ||
562 | while (files < 3) { | ||
563 | de = (struct iso_directory_record *) (bh->b_data + offset); | ||
564 | de_len = *(unsigned char *) de; | ||
565 | if (de_len == 0) | ||
566 | break; | ||
567 | files++; | ||
568 | offset += de_len; | ||
569 | } | ||
570 | brelse(bh); | ||
571 | return files < 3; | ||
572 | } | ||
573 | |||
574 | /* | ||
552 | * Initialize the superblock and read the root inode. | 575 | * Initialize the superblock and read the root inode. |
553 | * | 576 | * |
554 | * Note: a check_disk_change() has been done immediately prior | 577 | * Note: a check_disk_change() has been done immediately prior |
@@ -823,6 +846,7 @@ root_found: | |||
823 | sbi->s_utf8 = opt.utf8; | 846 | sbi->s_utf8 = opt.utf8; |
824 | sbi->s_nocompress = opt.nocompress; | 847 | sbi->s_nocompress = opt.nocompress; |
825 | sbi->s_overriderockperm = opt.overriderockperm; | 848 | sbi->s_overriderockperm = opt.overriderockperm; |
849 | mutex_init(&sbi->s_mutex); | ||
826 | /* | 850 | /* |
827 | * It would be incredibly stupid to allow people to mark every file | 851 | * It would be incredibly stupid to allow people to mark every file |
828 | * on the disk as suid, so we merely allow them to set the default | 852 | * on the disk as suid, so we merely allow them to set the default |
@@ -847,6 +871,18 @@ root_found: | |||
847 | goto out_no_root; | 871 | goto out_no_root; |
848 | 872 | ||
849 | /* | 873 | /* |
874 | * Fix for broken CDs with Rock Ridge and empty ISO root directory but | ||
875 | * correct Joliet root directory. | ||
876 | */ | ||
877 | if (sbi->s_rock == 1 && joliet_level && | ||
878 | rootdir_empty(s, sbi->s_firstdatazone)) { | ||
879 | printk(KERN_NOTICE | ||
880 | "ISOFS: primary root directory is empty. " | ||
881 | "Disabling Rock Ridge and switching to Joliet."); | ||
882 | sbi->s_rock = 0; | ||
883 | } | ||
884 | |||
885 | /* | ||
850 | * If this disk has both Rock Ridge and Joliet on it, then we | 886 | * If this disk has both Rock Ridge and Joliet on it, then we |
851 | * want to use Rock Ridge by default. This can be overridden | 887 | * want to use Rock Ridge by default. This can be overridden |
852 | * by using the norock mount option. There is still one other | 888 | * by using the norock mount option. There is still one other |
@@ -966,27 +1002,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf) | |||
966 | * or getblk() if they are not. Returns the number of blocks inserted | 1002 | * or getblk() if they are not. Returns the number of blocks inserted |
967 | * (-ve == error.) | 1003 | * (-ve == error.) |
968 | */ | 1004 | */ |
969 | int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | 1005 | int isofs_get_blocks(struct inode *inode, sector_t iblock, |
970 | struct buffer_head **bh, unsigned long nblocks) | 1006 | struct buffer_head **bh, unsigned long nblocks) |
971 | { | 1007 | { |
972 | unsigned long b_off; | 1008 | unsigned long b_off = iblock; |
973 | unsigned offset, sect_size; | 1009 | unsigned offset, sect_size; |
974 | unsigned int firstext; | 1010 | unsigned int firstext; |
975 | unsigned long nextblk, nextoff; | 1011 | unsigned long nextblk, nextoff; |
976 | long iblock = (long)iblock_s; | ||
977 | int section, rv, error; | 1012 | int section, rv, error; |
978 | struct iso_inode_info *ei = ISOFS_I(inode); | 1013 | struct iso_inode_info *ei = ISOFS_I(inode); |
979 | 1014 | ||
980 | lock_kernel(); | ||
981 | |||
982 | error = -EIO; | 1015 | error = -EIO; |
983 | rv = 0; | 1016 | rv = 0; |
984 | if (iblock < 0 || iblock != iblock_s) { | 1017 | if (iblock != b_off) { |
985 | printk(KERN_DEBUG "%s: block number too large\n", __func__); | 1018 | printk(KERN_DEBUG "%s: block number too large\n", __func__); |
986 | goto abort; | 1019 | goto abort; |
987 | } | 1020 | } |
988 | 1021 | ||
989 | b_off = iblock; | ||
990 | 1022 | ||
991 | offset = 0; | 1023 | offset = 0; |
992 | firstext = ei->i_first_extent; | 1024 | firstext = ei->i_first_extent; |
@@ -1004,8 +1036,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
1004 | * I/O errors. | 1036 | * I/O errors. |
1005 | */ | 1037 | */ |
1006 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { | 1038 | if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) { |
1007 | printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n", | 1039 | printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n", |
1008 | __func__, iblock, (unsigned long) inode->i_size); | 1040 | __func__, b_off, |
1041 | (unsigned long long)inode->i_size); | ||
1009 | goto abort; | 1042 | goto abort; |
1010 | } | 1043 | } |
1011 | 1044 | ||
@@ -1031,9 +1064,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
1031 | if (++section > 100) { | 1064 | if (++section > 100) { |
1032 | printk(KERN_DEBUG "%s: More than 100 file sections ?!?" | 1065 | printk(KERN_DEBUG "%s: More than 100 file sections ?!?" |
1033 | " aborting...\n", __func__); | 1066 | " aborting...\n", __func__); |
1034 | printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u " | 1067 | printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u " |
1035 | "nextblk=%lu nextoff=%lu\n", __func__, | 1068 | "nextblk=%lu nextoff=%lu\n", __func__, |
1036 | iblock, firstext, (unsigned) sect_size, | 1069 | b_off, firstext, (unsigned) sect_size, |
1037 | nextblk, nextoff); | 1070 | nextblk, nextoff); |
1038 | goto abort; | 1071 | goto abort; |
1039 | } | 1072 | } |
@@ -1054,7 +1087,6 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s, | |||
1054 | 1087 | ||
1055 | error = 0; | 1088 | error = 0; |
1056 | abort: | 1089 | abort: |
1057 | unlock_kernel(); | ||
1058 | return rv != 0 ? rv : error; | 1090 | return rv != 0 ? rv : error; |
1059 | } | 1091 | } |
1060 | 1092 | ||
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h index 7d33de84f52a..2882dc089f87 100644 --- a/fs/isofs/isofs.h +++ b/fs/isofs/isofs.h | |||
@@ -55,6 +55,7 @@ struct isofs_sb_info { | |||
55 | gid_t s_gid; | 55 | gid_t s_gid; |
56 | uid_t s_uid; | 56 | uid_t s_uid; |
57 | struct nls_table *s_nls_iocharset; /* Native language support table */ | 57 | struct nls_table *s_nls_iocharset; /* Native language support table */ |
58 | struct mutex s_mutex; /* replaces BKL, please remove if possible */ | ||
58 | }; | 59 | }; |
59 | 60 | ||
60 | #define ISOFS_INVALID_MODE ((mode_t) -1) | 61 | #define ISOFS_INVALID_MODE ((mode_t) -1) |
diff --git a/fs/isofs/namei.c b/fs/isofs/namei.c index ab438beb867c..0d23abfd4280 100644 --- a/fs/isofs/namei.c +++ b/fs/isofs/namei.c | |||
@@ -6,7 +6,6 @@ | |||
6 | * (C) 1991 Linus Torvalds - minix filesystem | 6 | * (C) 1991 Linus Torvalds - minix filesystem |
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/smp_lock.h> | ||
10 | #include <linux/gfp.h> | 9 | #include <linux/gfp.h> |
11 | #include "isofs.h" | 10 | #include "isofs.h" |
12 | 11 | ||
@@ -168,6 +167,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam | |||
168 | int found; | 167 | int found; |
169 | unsigned long uninitialized_var(block); | 168 | unsigned long uninitialized_var(block); |
170 | unsigned long uninitialized_var(offset); | 169 | unsigned long uninitialized_var(offset); |
170 | struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb); | ||
171 | struct inode *inode; | 171 | struct inode *inode; |
172 | struct page *page; | 172 | struct page *page; |
173 | 173 | ||
@@ -177,7 +177,7 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam | |||
177 | if (!page) | 177 | if (!page) |
178 | return ERR_PTR(-ENOMEM); | 178 | return ERR_PTR(-ENOMEM); |
179 | 179 | ||
180 | lock_kernel(); | 180 | mutex_lock(&sbi->s_mutex); |
181 | found = isofs_find_entry(dir, dentry, | 181 | found = isofs_find_entry(dir, dentry, |
182 | &block, &offset, | 182 | &block, &offset, |
183 | page_address(page), | 183 | page_address(page), |
@@ -188,10 +188,10 @@ struct dentry *isofs_lookup(struct inode *dir, struct dentry *dentry, struct nam | |||
188 | if (found) { | 188 | if (found) { |
189 | inode = isofs_iget(dir->i_sb, block, offset); | 189 | inode = isofs_iget(dir->i_sb, block, offset); |
190 | if (IS_ERR(inode)) { | 190 | if (IS_ERR(inode)) { |
191 | unlock_kernel(); | 191 | mutex_unlock(&sbi->s_mutex); |
192 | return ERR_CAST(inode); | 192 | return ERR_CAST(inode); |
193 | } | 193 | } |
194 | } | 194 | } |
195 | unlock_kernel(); | 195 | mutex_unlock(&sbi->s_mutex); |
196 | return d_splice_alias(inode, dentry); | 196 | return d_splice_alias(inode, dentry); |
197 | } | 197 | } |
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c index 96a685c550fd..f9cd04db6eab 100644 --- a/fs/isofs/rock.c +++ b/fs/isofs/rock.c | |||
@@ -8,7 +8,6 @@ | |||
8 | 8 | ||
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/pagemap.h> | 10 | #include <linux/pagemap.h> |
11 | #include <linux/smp_lock.h> | ||
12 | 11 | ||
13 | #include "isofs.h" | 12 | #include "isofs.h" |
14 | #include "rock.h" | 13 | #include "rock.h" |
@@ -661,6 +660,7 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) | |||
661 | { | 660 | { |
662 | struct inode *inode = page->mapping->host; | 661 | struct inode *inode = page->mapping->host; |
663 | struct iso_inode_info *ei = ISOFS_I(inode); | 662 | struct iso_inode_info *ei = ISOFS_I(inode); |
663 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); | ||
664 | char *link = kmap(page); | 664 | char *link = kmap(page); |
665 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 665 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
666 | struct buffer_head *bh; | 666 | struct buffer_head *bh; |
@@ -673,12 +673,12 @@ static int rock_ridge_symlink_readpage(struct file *file, struct page *page) | |||
673 | struct rock_state rs; | 673 | struct rock_state rs; |
674 | int ret; | 674 | int ret; |
675 | 675 | ||
676 | if (!ISOFS_SB(inode->i_sb)->s_rock) | 676 | if (!sbi->s_rock) |
677 | goto error; | 677 | goto error; |
678 | 678 | ||
679 | init_rock_state(&rs, inode); | 679 | init_rock_state(&rs, inode); |
680 | block = ei->i_iget5_block; | 680 | block = ei->i_iget5_block; |
681 | lock_kernel(); | 681 | mutex_lock(&sbi->s_mutex); |
682 | bh = sb_bread(inode->i_sb, block); | 682 | bh = sb_bread(inode->i_sb, block); |
683 | if (!bh) | 683 | if (!bh) |
684 | goto out_noread; | 684 | goto out_noread; |
@@ -748,7 +748,7 @@ repeat: | |||
748 | goto fail; | 748 | goto fail; |
749 | brelse(bh); | 749 | brelse(bh); |
750 | *rpnt = '\0'; | 750 | *rpnt = '\0'; |
751 | unlock_kernel(); | 751 | mutex_unlock(&sbi->s_mutex); |
752 | SetPageUptodate(page); | 752 | SetPageUptodate(page); |
753 | kunmap(page); | 753 | kunmap(page); |
754 | unlock_page(page); | 754 | unlock_page(page); |
@@ -765,7 +765,7 @@ out_bad_span: | |||
765 | printk("symlink spans iso9660 blocks\n"); | 765 | printk("symlink spans iso9660 blocks\n"); |
766 | fail: | 766 | fail: |
767 | brelse(bh); | 767 | brelse(bh); |
768 | unlock_kernel(); | 768 | mutex_unlock(&sbi->s_mutex); |
769 | error: | 769 | error: |
770 | SetPageError(page); | 770 | SetPageError(page); |
771 | kunmap(page); | 771 | kunmap(page); |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 95d8c11c929e..85a6883c0aca 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
@@ -137,34 +137,10 @@ static int journal_write_commit_record(journal_t *journal, | |||
137 | JBUFFER_TRACE(descriptor, "write commit block"); | 137 | JBUFFER_TRACE(descriptor, "write commit block"); |
138 | set_buffer_dirty(bh); | 138 | set_buffer_dirty(bh); |
139 | 139 | ||
140 | if (journal->j_flags & JFS_BARRIER) { | 140 | if (journal->j_flags & JFS_BARRIER) |
141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); | 141 | ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_FLUSH_FUA); |
142 | 142 | else | |
143 | /* | ||
144 | * Is it possible for another commit to fail at roughly | ||
145 | * the same time as this one? If so, we don't want to | ||
146 | * trust the barrier flag in the super, but instead want | ||
147 | * to remember if we sent a barrier request | ||
148 | */ | ||
149 | if (ret == -EOPNOTSUPP) { | ||
150 | char b[BDEVNAME_SIZE]; | ||
151 | |||
152 | printk(KERN_WARNING | ||
153 | "JBD: barrier-based sync failed on %s - " | ||
154 | "disabling barriers\n", | ||
155 | bdevname(journal->j_dev, b)); | ||
156 | spin_lock(&journal->j_state_lock); | ||
157 | journal->j_flags &= ~JFS_BARRIER; | ||
158 | spin_unlock(&journal->j_state_lock); | ||
159 | |||
160 | /* And try again, without the barrier */ | ||
161 | set_buffer_uptodate(bh); | ||
162 | set_buffer_dirty(bh); | ||
163 | ret = sync_dirty_buffer(bh); | ||
164 | } | ||
165 | } else { | ||
166 | ret = sync_dirty_buffer(bh); | 143 | ret = sync_dirty_buffer(bh); |
167 | } | ||
168 | 144 | ||
169 | put_bh(bh); /* One for getblk() */ | 145 | put_bh(bh); /* One for getblk() */ |
170 | journal_put_journal_head(descriptor); | 146 | journal_put_journal_head(descriptor); |
@@ -318,7 +294,7 @@ void journal_commit_transaction(journal_t *journal) | |||
318 | int first_tag = 0; | 294 | int first_tag = 0; |
319 | int tag_flag; | 295 | int tag_flag; |
320 | int i; | 296 | int i; |
321 | int write_op = WRITE; | 297 | int write_op = WRITE_SYNC; |
322 | 298 | ||
323 | /* | 299 | /* |
324 | * First job: lock down the current transaction and wait for | 300 | * First job: lock down the current transaction and wait for |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 5247e7ffdcb4..6571a056e55d 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
@@ -532,8 +532,7 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
532 | */ | 532 | */ |
533 | if ((journal->j_fs_dev != journal->j_dev) && | 533 | if ((journal->j_fs_dev != journal->j_dev) && |
534 | (journal->j_flags & JBD2_BARRIER)) | 534 | (journal->j_flags & JBD2_BARRIER)) |
535 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, | 535 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
536 | BLKDEV_IFL_WAIT); | ||
537 | if (!(journal->j_flags & JBD2_ABORT)) | 536 | if (!(journal->j_flags & JBD2_ABORT)) |
538 | jbd2_journal_update_superblock(journal, 1); | 537 | jbd2_journal_update_superblock(journal, 1); |
539 | return 0; | 538 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 7c068c189d80..bc6be8bda1cc 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -134,25 +134,11 @@ static int journal_submit_commit_record(journal_t *journal, | |||
134 | 134 | ||
135 | if (journal->j_flags & JBD2_BARRIER && | 135 | if (journal->j_flags & JBD2_BARRIER && |
136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, | 136 | !JBD2_HAS_INCOMPAT_FEATURE(journal, |
137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { | 137 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) |
138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); | 138 | ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh); |
139 | if (ret == -EOPNOTSUPP) { | 139 | else |
140 | printk(KERN_WARNING | ||
141 | "JBD2: Disabling barriers on %s, " | ||
142 | "not supported by device\n", journal->j_devname); | ||
143 | write_lock(&journal->j_state_lock); | ||
144 | journal->j_flags &= ~JBD2_BARRIER; | ||
145 | write_unlock(&journal->j_state_lock); | ||
146 | |||
147 | /* And try again, without the barrier */ | ||
148 | lock_buffer(bh); | ||
149 | set_buffer_uptodate(bh); | ||
150 | clear_buffer_dirty(bh); | ||
151 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
152 | } | ||
153 | } else { | ||
154 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | 140 | ret = submit_bh(WRITE_SYNC_PLUG, bh); |
155 | } | 141 | |
156 | *cbh = bh; | 142 | *cbh = bh; |
157 | return ret; | 143 | return ret; |
158 | } | 144 | } |
@@ -166,29 +152,8 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
166 | { | 152 | { |
167 | int ret = 0; | 153 | int ret = 0; |
168 | 154 | ||
169 | retry: | ||
170 | clear_buffer_dirty(bh); | 155 | clear_buffer_dirty(bh); |
171 | wait_on_buffer(bh); | 156 | wait_on_buffer(bh); |
172 | if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) { | ||
173 | printk(KERN_WARNING | ||
174 | "JBD2: %s: disabling barries on %s - not supported " | ||
175 | "by device\n", __func__, journal->j_devname); | ||
176 | write_lock(&journal->j_state_lock); | ||
177 | journal->j_flags &= ~JBD2_BARRIER; | ||
178 | write_unlock(&journal->j_state_lock); | ||
179 | |||
180 | lock_buffer(bh); | ||
181 | clear_buffer_dirty(bh); | ||
182 | set_buffer_uptodate(bh); | ||
183 | bh->b_end_io = journal_end_buffer_io_sync; | ||
184 | |||
185 | ret = submit_bh(WRITE_SYNC_PLUG, bh); | ||
186 | if (ret) { | ||
187 | unlock_buffer(bh); | ||
188 | return ret; | ||
189 | } | ||
190 | goto retry; | ||
191 | } | ||
192 | 157 | ||
193 | if (unlikely(!buffer_uptodate(bh))) | 158 | if (unlikely(!buffer_uptodate(bh))) |
194 | ret = -EIO; | 159 | ret = -EIO; |
@@ -360,7 +325,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
360 | int tag_bytes = journal_tag_bytes(journal); | 325 | int tag_bytes = journal_tag_bytes(journal); |
361 | struct buffer_head *cbh = NULL; /* For transactional checksums */ | 326 | struct buffer_head *cbh = NULL; /* For transactional checksums */ |
362 | __u32 crc32_sum = ~0; | 327 | __u32 crc32_sum = ~0; |
363 | int write_op = WRITE; | 328 | int write_op = WRITE_SYNC; |
364 | 329 | ||
365 | /* | 330 | /* |
366 | * First job: lock down the current transaction and wait for | 331 | * First job: lock down the current transaction and wait for |
@@ -701,6 +666,16 @@ start_journal_io: | |||
701 | } | 666 | } |
702 | } | 667 | } |
703 | 668 | ||
669 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
670 | if (err) { | ||
671 | printk(KERN_WARNING | ||
672 | "JBD2: Detected IO errors while flushing file data " | ||
673 | "on %s\n", journal->j_devname); | ||
674 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
675 | jbd2_journal_abort(journal, err); | ||
676 | err = 0; | ||
677 | } | ||
678 | |||
704 | /* | 679 | /* |
705 | * If the journal is not located on the file system device, | 680 | * If the journal is not located on the file system device, |
706 | * then we must flush the file system device before we issue | 681 | * then we must flush the file system device before we issue |
@@ -709,8 +684,7 @@ start_journal_io: | |||
709 | if (commit_transaction->t_flushed_data_blocks && | 684 | if (commit_transaction->t_flushed_data_blocks && |
710 | (journal->j_fs_dev != journal->j_dev) && | 685 | (journal->j_fs_dev != journal->j_dev) && |
711 | (journal->j_flags & JBD2_BARRIER)) | 686 | (journal->j_flags & JBD2_BARRIER)) |
712 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL, | 687 | blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); |
713 | BLKDEV_IFL_WAIT); | ||
714 | 688 | ||
715 | /* Done it all: now write the commit record asynchronously. */ | 689 | /* Done it all: now write the commit record asynchronously. */ |
716 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | 690 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, |
@@ -719,19 +693,6 @@ start_journal_io: | |||
719 | &cbh, crc32_sum); | 693 | &cbh, crc32_sum); |
720 | if (err) | 694 | if (err) |
721 | __jbd2_journal_abort_hard(journal); | 695 | __jbd2_journal_abort_hard(journal); |
722 | if (journal->j_flags & JBD2_BARRIER) | ||
723 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL, | ||
724 | BLKDEV_IFL_WAIT); | ||
725 | } | ||
726 | |||
727 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | ||
728 | if (err) { | ||
729 | printk(KERN_WARNING | ||
730 | "JBD2: Detected IO errors while flushing file data " | ||
731 | "on %s\n", journal->j_devname); | ||
732 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
733 | jbd2_journal_abort(journal, err); | ||
734 | err = 0; | ||
735 | } | 696 | } |
736 | 697 | ||
737 | /* Lo and behold: we have just managed to send a transaction to | 698 | /* Lo and behold: we have just managed to send a transaction to |
@@ -845,6 +806,11 @@ wait_for_iobuf: | |||
845 | } | 806 | } |
846 | if (!err && !is_journal_aborted(journal)) | 807 | if (!err && !is_journal_aborted(journal)) |
847 | err = journal_wait_on_commit_record(journal, cbh); | 808 | err = journal_wait_on_commit_record(journal, cbh); |
809 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, | ||
810 | JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && | ||
811 | journal->j_flags & JBD2_BARRIER) { | ||
812 | blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); | ||
813 | } | ||
848 | 814 | ||
849 | if (err) | 815 | if (err) |
850 | jbd2_journal_abort(journal, err); | 816 | jbd2_journal_abort(journal, err); |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 0e8014ea6b94..262419f83d80 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -1371,6 +1371,10 @@ int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat, | |||
1371 | 1371 | ||
1372 | if (!compat && !ro && !incompat) | 1372 | if (!compat && !ro && !incompat) |
1373 | return 1; | 1373 | return 1; |
1374 | /* Load journal superblock if it is not loaded yet. */ | ||
1375 | if (journal->j_format_version == 0 && | ||
1376 | journal_get_superblock(journal) != 0) | ||
1377 | return 0; | ||
1374 | if (journal->j_format_version == 1) | 1378 | if (journal->j_format_version == 1) |
1375 | return 0; | 1379 | return 0; |
1376 | 1380 | ||
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index ed78a3cf3cb0..79121aa5858b 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de | |||
289 | mutex_unlock(&f->sem); | 289 | mutex_unlock(&f->sem); |
290 | d_instantiate(dentry, old_dentry->d_inode); | 290 | d_instantiate(dentry, old_dentry->d_inode); |
291 | dir_i->i_mtime = dir_i->i_ctime = ITIME(now); | 291 | dir_i->i_mtime = dir_i->i_ctime = ITIME(now); |
292 | atomic_inc(&old_dentry->d_inode->i_count); | 292 | ihold(old_dentry->d_inode); |
293 | } | 293 | } |
294 | return ret; | 294 | return ret; |
295 | } | 295 | } |
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, | |||
864 | printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); | 864 | printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); |
865 | /* Might as well let the VFS know */ | 865 | /* Might as well let the VFS know */ |
866 | d_instantiate(new_dentry, old_dentry->d_inode); | 866 | d_instantiate(new_dentry, old_dentry->d_inode); |
867 | atomic_inc(&old_dentry->d_inode->i_count); | 867 | ihold(old_dentry->d_inode); |
868 | new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); | 868 | new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); |
869 | return ret; | 869 | return ret; |
870 | } | 870 | } |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 6b2964a19850..d9beb06e6fca 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -21,7 +21,6 @@ | |||
21 | #include <linux/vmalloc.h> | 21 | #include <linux/vmalloc.h> |
22 | #include <linux/vfs.h> | 22 | #include <linux/vfs.h> |
23 | #include <linux/crc32.h> | 23 | #include <linux/crc32.h> |
24 | #include <linux/smp_lock.h> | ||
25 | #include "nodelist.h" | 24 | #include "nodelist.h" |
26 | 25 | ||
27 | static int jffs2_flash_setup(struct jffs2_sb_info *c); | 26 | static int jffs2_flash_setup(struct jffs2_sb_info *c); |
@@ -391,7 +390,6 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | |||
391 | This also catches the case where it was stopped and this | 390 | This also catches the case where it was stopped and this |
392 | is just a remount to restart it. | 391 | is just a remount to restart it. |
393 | Flush the writebuffer, if neccecary, else we loose it */ | 392 | Flush the writebuffer, if neccecary, else we loose it */ |
394 | lock_kernel(); | ||
395 | if (!(sb->s_flags & MS_RDONLY)) { | 393 | if (!(sb->s_flags & MS_RDONLY)) { |
396 | jffs2_stop_garbage_collect_thread(c); | 394 | jffs2_stop_garbage_collect_thread(c); |
397 | mutex_lock(&c->alloc_sem); | 395 | mutex_lock(&c->alloc_sem); |
@@ -403,8 +401,6 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) | |||
403 | jffs2_start_garbage_collect_thread(c); | 401 | jffs2_start_garbage_collect_thread(c); |
404 | 402 | ||
405 | *flags |= MS_NOATIME; | 403 | *flags |= MS_NOATIME; |
406 | |||
407 | unlock_kernel(); | ||
408 | return 0; | 404 | return 0; |
409 | } | 405 | } |
410 | 406 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 662bba099501..d1ae5dfc22b9 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -12,7 +12,6 @@ | |||
12 | #include <linux/kernel.h> | 12 | #include <linux/kernel.h> |
13 | #include <linux/module.h> | 13 | #include <linux/module.h> |
14 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/init.h> | 15 | #include <linux/init.h> |
17 | #include <linux/list.h> | 16 | #include <linux/list.h> |
18 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
@@ -146,6 +145,7 @@ static const struct super_operations jffs2_super_operations = | |||
146 | static int jffs2_fill_super(struct super_block *sb, void *data, int silent) | 145 | static int jffs2_fill_super(struct super_block *sb, void *data, int silent) |
147 | { | 146 | { |
148 | struct jffs2_sb_info *c; | 147 | struct jffs2_sb_info *c; |
148 | int ret; | ||
149 | 149 | ||
150 | D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" | 150 | D1(printk(KERN_DEBUG "jffs2_get_sb_mtd():" |
151 | " New superblock for device %d (\"%s\")\n", | 151 | " New superblock for device %d (\"%s\")\n", |
@@ -175,7 +175,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent) | |||
175 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL | 175 | #ifdef CONFIG_JFFS2_FS_POSIX_ACL |
176 | sb->s_flags |= MS_POSIXACL; | 176 | sb->s_flags |= MS_POSIXACL; |
177 | #endif | 177 | #endif |
178 | return jffs2_do_fill_super(sb, data, silent); | 178 | ret = jffs2_do_fill_super(sb, data, silent); |
179 | return ret; | ||
179 | } | 180 | } |
180 | 181 | ||
181 | static int jffs2_get_sb(struct file_system_type *fs_type, | 182 | static int jffs2_get_sb(struct file_system_type *fs_type, |
@@ -192,8 +193,6 @@ static void jffs2_put_super (struct super_block *sb) | |||
192 | 193 | ||
193 | D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); | 194 | D2(printk(KERN_DEBUG "jffs2: jffs2_put_super()\n")); |
194 | 195 | ||
195 | lock_kernel(); | ||
196 | |||
197 | if (sb->s_dirt) | 196 | if (sb->s_dirt) |
198 | jffs2_write_super(sb); | 197 | jffs2_write_super(sb); |
199 | 198 | ||
@@ -215,8 +214,6 @@ static void jffs2_put_super (struct super_block *sb) | |||
215 | if (c->mtd->sync) | 214 | if (c->mtd->sync) |
216 | c->mtd->sync(c->mtd); | 215 | c->mtd->sync(c->mtd); |
217 | 216 | ||
218 | unlock_kernel(); | ||
219 | |||
220 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); | 217 | D1(printk(KERN_DEBUG "jffs2_put_super returning\n")); |
221 | } | 218 | } |
222 | 219 | ||
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c index f8332dc8eeb2..3a09423b6c22 100644 --- a/fs/jfs/jfs_imap.c +++ b/fs/jfs/jfs_imap.c | |||
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary) | |||
497 | * appear hashed, but do not put on any lists. hlist_del() | 497 | * appear hashed, but do not put on any lists. hlist_del() |
498 | * will work fine and require no locking. | 498 | * will work fine and require no locking. |
499 | */ | 499 | */ |
500 | ip->i_hash.pprev = &ip->i_hash.next; | 500 | hlist_add_fake(&ip->i_hash); |
501 | 501 | ||
502 | return (ip); | 502 | return (ip); |
503 | } | 503 | } |
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c index c51af2a14516..e1b8493b9aaa 100644 --- a/fs/jfs/jfs_logmgr.c +++ b/fs/jfs/jfs_logmgr.c | |||
@@ -1010,15 +1010,13 @@ static int lmLogSync(struct jfs_log * log, int hard_sync) | |||
1010 | * option 2 - shutdown file systems | 1010 | * option 2 - shutdown file systems |
1011 | * associated with log ? | 1011 | * associated with log ? |
1012 | * option 3 - extend log ? | 1012 | * option 3 - extend log ? |
1013 | */ | ||
1014 | /* | ||
1015 | * option 4 - second chance | 1013 | * option 4 - second chance |
1016 | * | 1014 | * |
1017 | * mark log wrapped, and continue. | 1015 | * mark log wrapped, and continue. |
1018 | * when all active transactions are completed, | 1016 | * when all active transactions are completed, |
1019 | * mark log vaild for recovery. | 1017 | * mark log valid for recovery. |
1020 | * if crashed during invalid state, log state | 1018 | * if crashed during invalid state, log state |
1021 | * implies invald log, forcing fsck(). | 1019 | * implies invalid log, forcing fsck(). |
1022 | */ | 1020 | */ |
1023 | /* mark log state log wrap in log superblock */ | 1021 | /* mark log state log wrap in log superblock */ |
1024 | /* log->state = LOGWRAP; */ | 1022 | /* log->state = LOGWRAP; */ |
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c index 7b698f2ec45a..9895595fd2f2 100644 --- a/fs/jfs/jfs_mount.c +++ b/fs/jfs/jfs_mount.c | |||
@@ -97,7 +97,7 @@ int jfs_mount(struct super_block *sb) | |||
97 | 97 | ||
98 | ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); | 98 | ipaimap = diReadSpecial(sb, AGGREGATE_I, 0); |
99 | if (ipaimap == NULL) { | 99 | if (ipaimap == NULL) { |
100 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); | 100 | jfs_err("jfs_mount: Failed to read AGGREGATE_I"); |
101 | rc = -EIO; | 101 | rc = -EIO; |
102 | goto errout20; | 102 | goto errout20; |
103 | } | 103 | } |
@@ -148,7 +148,7 @@ int jfs_mount(struct super_block *sb) | |||
148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { | 148 | if ((sbi->mntflag & JFS_BAD_SAIT) == 0) { |
149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); | 149 | ipaimap2 = diReadSpecial(sb, AGGREGATE_I, 1); |
150 | if (!ipaimap2) { | 150 | if (!ipaimap2) { |
151 | jfs_err("jfs_mount: Faild to read AGGREGATE_I"); | 151 | jfs_err("jfs_mount: Failed to read AGGREGATE_I"); |
152 | rc = -EIO; | 152 | rc = -EIO; |
153 | goto errout35; | 153 | goto errout35; |
154 | } | 154 | } |
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index d945ea76b445..9466957ec841 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c | |||
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */ | |||
1279 | * lazy commit thread finishes processing | 1279 | * lazy commit thread finishes processing |
1280 | */ | 1280 | */ |
1281 | if (tblk->xflag & COMMIT_DELETE) { | 1281 | if (tblk->xflag & COMMIT_DELETE) { |
1282 | atomic_inc(&tblk->u.ip->i_count); | 1282 | ihold(tblk->u.ip); |
1283 | /* | 1283 | /* |
1284 | * Avoid a rare deadlock | 1284 | * Avoid a rare deadlock |
1285 | * | 1285 | * |
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index a9cf8e8675be..231ca4af9bce 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry, | |||
839 | ip->i_ctime = CURRENT_TIME; | 839 | ip->i_ctime = CURRENT_TIME; |
840 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 840 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
841 | mark_inode_dirty(dir); | 841 | mark_inode_dirty(dir); |
842 | atomic_inc(&ip->i_count); | 842 | ihold(ip); |
843 | 843 | ||
844 | iplist[0] = ip; | 844 | iplist[0] = ip; |
845 | iplist[1] = dir; | 845 | iplist[1] = dir; |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index ec8c3e4baca3..68eee2bf629e 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -33,7 +33,6 @@ | |||
33 | #include <linux/slab.h> | 33 | #include <linux/slab.h> |
34 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
35 | #include <linux/seq_file.h> | 35 | #include <linux/seq_file.h> |
36 | #include <linux/smp_lock.h> | ||
37 | 36 | ||
38 | #include "jfs_incore.h" | 37 | #include "jfs_incore.h" |
39 | #include "jfs_filsys.h" | 38 | #include "jfs_filsys.h" |
@@ -176,8 +175,6 @@ static void jfs_put_super(struct super_block *sb) | |||
176 | 175 | ||
177 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 176 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
178 | 177 | ||
179 | lock_kernel(); | ||
180 | |||
181 | rc = jfs_umount(sb); | 178 | rc = jfs_umount(sb); |
182 | if (rc) | 179 | if (rc) |
183 | jfs_err("jfs_umount failed with return code %d", rc); | 180 | jfs_err("jfs_umount failed with return code %d", rc); |
@@ -188,8 +185,6 @@ static void jfs_put_super(struct super_block *sb) | |||
188 | iput(sbi->direct_inode); | 185 | iput(sbi->direct_inode); |
189 | 186 | ||
190 | kfree(sbi); | 187 | kfree(sbi); |
191 | |||
192 | unlock_kernel(); | ||
193 | } | 188 | } |
194 | 189 | ||
195 | enum { | 190 | enum { |
@@ -369,19 +364,16 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
369 | if (!parse_options(data, sb, &newLVSize, &flag)) { | 364 | if (!parse_options(data, sb, &newLVSize, &flag)) { |
370 | return -EINVAL; | 365 | return -EINVAL; |
371 | } | 366 | } |
372 | lock_kernel(); | 367 | |
373 | if (newLVSize) { | 368 | if (newLVSize) { |
374 | if (sb->s_flags & MS_RDONLY) { | 369 | if (sb->s_flags & MS_RDONLY) { |
375 | printk(KERN_ERR | 370 | printk(KERN_ERR |
376 | "JFS: resize requires volume to be mounted read-write\n"); | 371 | "JFS: resize requires volume to be mounted read-write\n"); |
377 | unlock_kernel(); | ||
378 | return -EROFS; | 372 | return -EROFS; |
379 | } | 373 | } |
380 | rc = jfs_extendfs(sb, newLVSize, 0); | 374 | rc = jfs_extendfs(sb, newLVSize, 0); |
381 | if (rc) { | 375 | if (rc) |
382 | unlock_kernel(); | ||
383 | return rc; | 376 | return rc; |
384 | } | ||
385 | } | 377 | } |
386 | 378 | ||
387 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 379 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { |
@@ -397,36 +389,30 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
397 | /* mark the fs r/w for quota activity */ | 389 | /* mark the fs r/w for quota activity */ |
398 | sb->s_flags &= ~MS_RDONLY; | 390 | sb->s_flags &= ~MS_RDONLY; |
399 | 391 | ||
400 | unlock_kernel(); | ||
401 | dquot_resume(sb, -1); | 392 | dquot_resume(sb, -1); |
402 | return ret; | 393 | return ret; |
403 | } | 394 | } |
404 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { | 395 | if ((!(sb->s_flags & MS_RDONLY)) && (*flags & MS_RDONLY)) { |
405 | rc = dquot_suspend(sb, -1); | 396 | rc = dquot_suspend(sb, -1); |
406 | if (rc < 0) { | 397 | if (rc < 0) { |
407 | unlock_kernel(); | ||
408 | return rc; | 398 | return rc; |
409 | } | 399 | } |
410 | rc = jfs_umount_rw(sb); | 400 | rc = jfs_umount_rw(sb); |
411 | JFS_SBI(sb)->flag = flag; | 401 | JFS_SBI(sb)->flag = flag; |
412 | unlock_kernel(); | ||
413 | return rc; | 402 | return rc; |
414 | } | 403 | } |
415 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) | 404 | if ((JFS_SBI(sb)->flag & JFS_NOINTEGRITY) != (flag & JFS_NOINTEGRITY)) |
416 | if (!(sb->s_flags & MS_RDONLY)) { | 405 | if (!(sb->s_flags & MS_RDONLY)) { |
417 | rc = jfs_umount_rw(sb); | 406 | rc = jfs_umount_rw(sb); |
418 | if (rc) { | 407 | if (rc) |
419 | unlock_kernel(); | ||
420 | return rc; | 408 | return rc; |
421 | } | 409 | |
422 | JFS_SBI(sb)->flag = flag; | 410 | JFS_SBI(sb)->flag = flag; |
423 | ret = jfs_mount_rw(sb, 1); | 411 | ret = jfs_mount_rw(sb, 1); |
424 | unlock_kernel(); | ||
425 | return ret; | 412 | return ret; |
426 | } | 413 | } |
427 | JFS_SBI(sb)->flag = flag; | 414 | JFS_SBI(sb)->flag = flag; |
428 | 415 | ||
429 | unlock_kernel(); | ||
430 | return 0; | 416 | return 0; |
431 | } | 417 | } |
432 | 418 | ||
@@ -446,6 +432,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent) | |||
446 | sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); | 432 | sbi = kzalloc(sizeof (struct jfs_sb_info), GFP_KERNEL); |
447 | if (!sbi) | 433 | if (!sbi) |
448 | return -ENOMEM; | 434 | return -ENOMEM; |
435 | |||
449 | sb->s_fs_info = sbi; | 436 | sb->s_fs_info = sbi; |
450 | sbi->sb = sb; | 437 | sbi->sb = sb; |
451 | sbi->uid = sbi->gid = sbi->umask = -1; | 438 | sbi->uid = sbi->gid = sbi->umask = -1; |
diff --git a/fs/libfs.c b/fs/libfs.c index 0a9da95317f7..304a5132ca27 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den | |||
255 | 255 | ||
256 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 256 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
257 | inc_nlink(inode); | 257 | inc_nlink(inode); |
258 | atomic_inc(&inode->i_count); | 258 | ihold(inode); |
259 | dget(dentry); | 259 | dget(dentry); |
260 | d_instantiate(dentry, inode); | 260 | d_instantiate(dentry, inode); |
261 | return 0; | 261 | return 0; |
@@ -892,10 +892,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent); | |||
892 | */ | 892 | */ |
893 | int generic_file_fsync(struct file *file, int datasync) | 893 | int generic_file_fsync(struct file *file, int datasync) |
894 | { | 894 | { |
895 | struct writeback_control wbc = { | ||
896 | .sync_mode = WB_SYNC_ALL, | ||
897 | .nr_to_write = 0, /* metadata-only; caller takes care of data */ | ||
898 | }; | ||
899 | struct inode *inode = file->f_mapping->host; | 895 | struct inode *inode = file->f_mapping->host; |
900 | int err; | 896 | int err; |
901 | int ret; | 897 | int ret; |
@@ -906,13 +902,42 @@ int generic_file_fsync(struct file *file, int datasync) | |||
906 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | 902 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) |
907 | return ret; | 903 | return ret; |
908 | 904 | ||
909 | err = sync_inode(inode, &wbc); | 905 | err = sync_inode_metadata(inode, 1); |
910 | if (ret == 0) | 906 | if (ret == 0) |
911 | ret = err; | 907 | ret = err; |
912 | return ret; | 908 | return ret; |
913 | } | 909 | } |
914 | EXPORT_SYMBOL(generic_file_fsync); | 910 | EXPORT_SYMBOL(generic_file_fsync); |
915 | 911 | ||
912 | /** | ||
913 | * generic_check_addressable - Check addressability of file system | ||
914 | * @blocksize_bits: log of file system block size | ||
915 | * @num_blocks: number of blocks in file system | ||
916 | * | ||
917 | * Determine whether a file system with @num_blocks blocks (and a | ||
918 | * block size of 2**@blocksize_bits) is addressable by the sector_t | ||
919 | * and page cache of the system. Return 0 if so and -EFBIG otherwise. | ||
920 | */ | ||
921 | int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks) | ||
922 | { | ||
923 | u64 last_fs_block = num_blocks - 1; | ||
924 | u64 last_fs_page = | ||
925 | last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits); | ||
926 | |||
927 | if (unlikely(num_blocks == 0)) | ||
928 | return 0; | ||
929 | |||
930 | if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT)) | ||
931 | return -EINVAL; | ||
932 | |||
933 | if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) || | ||
934 | (last_fs_page > (pgoff_t)(~0ULL))) { | ||
935 | return -EFBIG; | ||
936 | } | ||
937 | return 0; | ||
938 | } | ||
939 | EXPORT_SYMBOL(generic_check_addressable); | ||
940 | |||
916 | /* | 941 | /* |
917 | * No-op implementation of ->fsync for in-memory filesystems. | 942 | * No-op implementation of ->fsync for in-memory filesystems. |
918 | */ | 943 | */ |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 64fd427c993c..d5bb86866e6c 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
@@ -42,6 +42,7 @@ struct nlm_wait { | |||
42 | }; | 42 | }; |
43 | 43 | ||
44 | static LIST_HEAD(nlm_blocked); | 44 | static LIST_HEAD(nlm_blocked); |
45 | static DEFINE_SPINLOCK(nlm_blocked_lock); | ||
45 | 46 | ||
46 | /** | 47 | /** |
47 | * nlmclnt_init - Set up per-NFS mount point lockd data structures | 48 | * nlmclnt_init - Set up per-NFS mount point lockd data structures |
@@ -97,7 +98,10 @@ struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock * | |||
97 | block->b_lock = fl; | 98 | block->b_lock = fl; |
98 | init_waitqueue_head(&block->b_wait); | 99 | init_waitqueue_head(&block->b_wait); |
99 | block->b_status = nlm_lck_blocked; | 100 | block->b_status = nlm_lck_blocked; |
101 | |||
102 | spin_lock(&nlm_blocked_lock); | ||
100 | list_add(&block->b_list, &nlm_blocked); | 103 | list_add(&block->b_list, &nlm_blocked); |
104 | spin_unlock(&nlm_blocked_lock); | ||
101 | } | 105 | } |
102 | return block; | 106 | return block; |
103 | } | 107 | } |
@@ -106,7 +110,9 @@ void nlmclnt_finish_block(struct nlm_wait *block) | |||
106 | { | 110 | { |
107 | if (block == NULL) | 111 | if (block == NULL) |
108 | return; | 112 | return; |
113 | spin_lock(&nlm_blocked_lock); | ||
109 | list_del(&block->b_list); | 114 | list_del(&block->b_list); |
115 | spin_unlock(&nlm_blocked_lock); | ||
110 | kfree(block); | 116 | kfree(block); |
111 | } | 117 | } |
112 | 118 | ||
@@ -154,6 +160,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) | |||
154 | * Look up blocked request based on arguments. | 160 | * Look up blocked request based on arguments. |
155 | * Warning: must not use cookie to match it! | 161 | * Warning: must not use cookie to match it! |
156 | */ | 162 | */ |
163 | spin_lock(&nlm_blocked_lock); | ||
157 | list_for_each_entry(block, &nlm_blocked, b_list) { | 164 | list_for_each_entry(block, &nlm_blocked, b_list) { |
158 | struct file_lock *fl_blocked = block->b_lock; | 165 | struct file_lock *fl_blocked = block->b_lock; |
159 | 166 | ||
@@ -178,6 +185,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) | |||
178 | wake_up(&block->b_wait); | 185 | wake_up(&block->b_wait); |
179 | res = nlm_granted; | 186 | res = nlm_granted; |
180 | } | 187 | } |
188 | spin_unlock(&nlm_blocked_lock); | ||
181 | return res; | 189 | return res; |
182 | } | 190 | } |
183 | 191 | ||
@@ -216,10 +224,6 @@ reclaimer(void *ptr) | |||
216 | allow_signal(SIGKILL); | 224 | allow_signal(SIGKILL); |
217 | 225 | ||
218 | down_write(&host->h_rwsem); | 226 | down_write(&host->h_rwsem); |
219 | |||
220 | /* This one ensures that our parent doesn't terminate while the | ||
221 | * reclaim is in progress */ | ||
222 | lock_kernel(); | ||
223 | lockd_up(); /* note: this cannot fail as lockd is already running */ | 227 | lockd_up(); /* note: this cannot fail as lockd is already running */ |
224 | 228 | ||
225 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); | 229 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); |
@@ -260,16 +264,17 @@ restart: | |||
260 | dprintk("NLM: done reclaiming locks for host %s\n", host->h_name); | 264 | dprintk("NLM: done reclaiming locks for host %s\n", host->h_name); |
261 | 265 | ||
262 | /* Now, wake up all processes that sleep on a blocked lock */ | 266 | /* Now, wake up all processes that sleep on a blocked lock */ |
267 | spin_lock(&nlm_blocked_lock); | ||
263 | list_for_each_entry(block, &nlm_blocked, b_list) { | 268 | list_for_each_entry(block, &nlm_blocked, b_list) { |
264 | if (block->b_host == host) { | 269 | if (block->b_host == host) { |
265 | block->b_status = nlm_lck_denied_grace_period; | 270 | block->b_status = nlm_lck_denied_grace_period; |
266 | wake_up(&block->b_wait); | 271 | wake_up(&block->b_wait); |
267 | } | 272 | } |
268 | } | 273 | } |
274 | spin_unlock(&nlm_blocked_lock); | ||
269 | 275 | ||
270 | /* Release host handle after use */ | 276 | /* Release host handle after use */ |
271 | nlm_release_host(host); | 277 | nlm_release_host(host); |
272 | lockd_down(); | 278 | lockd_down(); |
273 | unlock_kernel(); | ||
274 | return 0; | 279 | return 0; |
275 | } | 280 | } |
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 7932c399fab4..47ea1e1925b8 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c | |||
@@ -166,7 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) | |||
166 | /* Set up the argument struct */ | 166 | /* Set up the argument struct */ |
167 | nlmclnt_setlockargs(call, fl); | 167 | nlmclnt_setlockargs(call, fl); |
168 | 168 | ||
169 | lock_kernel(); | ||
170 | if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { | 169 | if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { |
171 | if (fl->fl_type != F_UNLCK) { | 170 | if (fl->fl_type != F_UNLCK) { |
172 | call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; | 171 | call->a_args.block = IS_SETLKW(cmd) ? 1 : 0; |
@@ -177,10 +176,8 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) | |||
177 | status = nlmclnt_test(call, fl); | 176 | status = nlmclnt_test(call, fl); |
178 | else | 177 | else |
179 | status = -EINVAL; | 178 | status = -EINVAL; |
180 | |||
181 | fl->fl_ops->fl_release_private(fl); | 179 | fl->fl_ops->fl_release_private(fl); |
182 | fl->fl_ops = NULL; | 180 | fl->fl_ops = NULL; |
183 | unlock_kernel(); | ||
184 | 181 | ||
185 | dprintk("lockd: clnt proc returns %d\n", status); | 182 | dprintk("lockd: clnt proc returns %d\n", status); |
186 | return status; | 183 | return status; |
@@ -226,9 +223,7 @@ void nlm_release_call(struct nlm_rqst *call) | |||
226 | 223 | ||
227 | static void nlmclnt_rpc_release(void *data) | 224 | static void nlmclnt_rpc_release(void *data) |
228 | { | 225 | { |
229 | lock_kernel(); | ||
230 | nlm_release_call(data); | 226 | nlm_release_call(data); |
231 | unlock_kernel(); | ||
232 | } | 227 | } |
233 | 228 | ||
234 | static int nlm_wait_on_grace(wait_queue_head_t *queue) | 229 | static int nlm_wait_on_grace(wait_queue_head_t *queue) |
@@ -448,14 +443,18 @@ out: | |||
448 | 443 | ||
449 | static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) | 444 | static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) |
450 | { | 445 | { |
446 | spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); | ||
451 | new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; | 447 | new->fl_u.nfs_fl.state = fl->fl_u.nfs_fl.state; |
452 | new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); | 448 | new->fl_u.nfs_fl.owner = nlm_get_lockowner(fl->fl_u.nfs_fl.owner); |
453 | list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); | 449 | list_add_tail(&new->fl_u.nfs_fl.list, &fl->fl_u.nfs_fl.owner->host->h_granted); |
450 | spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); | ||
454 | } | 451 | } |
455 | 452 | ||
456 | static void nlmclnt_locks_release_private(struct file_lock *fl) | 453 | static void nlmclnt_locks_release_private(struct file_lock *fl) |
457 | { | 454 | { |
455 | spin_lock(&fl->fl_u.nfs_fl.owner->host->h_lock); | ||
458 | list_del(&fl->fl_u.nfs_fl.list); | 456 | list_del(&fl->fl_u.nfs_fl.list); |
457 | spin_unlock(&fl->fl_u.nfs_fl.owner->host->h_lock); | ||
459 | nlm_put_lockowner(fl->fl_u.nfs_fl.owner); | 458 | nlm_put_lockowner(fl->fl_u.nfs_fl.owner); |
460 | } | 459 | } |
461 | 460 | ||
@@ -721,9 +720,7 @@ static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) | |||
721 | die: | 720 | die: |
722 | return; | 721 | return; |
723 | retry_rebind: | 722 | retry_rebind: |
724 | lock_kernel(); | ||
725 | nlm_rebind_host(req->a_host); | 723 | nlm_rebind_host(req->a_host); |
726 | unlock_kernel(); | ||
727 | retry_unlock: | 724 | retry_unlock: |
728 | rpc_restart_call(task); | 725 | rpc_restart_call(task); |
729 | } | 726 | } |
@@ -801,9 +798,7 @@ retry_cancel: | |||
801 | /* Don't ever retry more than 3 times */ | 798 | /* Don't ever retry more than 3 times */ |
802 | if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) | 799 | if (req->a_retries++ >= NLMCLNT_MAX_RETRIES) |
803 | goto die; | 800 | goto die; |
804 | lock_kernel(); | ||
805 | nlm_rebind_host(req->a_host); | 801 | nlm_rebind_host(req->a_host); |
806 | unlock_kernel(); | ||
807 | rpc_restart_call(task); | 802 | rpc_restart_call(task); |
808 | rpc_delay(task, 30 * HZ); | 803 | rpc_delay(task, 30 * HZ); |
809 | } | 804 | } |
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index bb464d12104c..25e21e4023b2 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
@@ -353,6 +353,7 @@ nlm_bind_host(struct nlm_host *host) | |||
353 | .to_retries = 5U, | 353 | .to_retries = 5U, |
354 | }; | 354 | }; |
355 | struct rpc_create_args args = { | 355 | struct rpc_create_args args = { |
356 | .net = &init_net, | ||
356 | .protocol = host->h_proto, | 357 | .protocol = host->h_proto, |
357 | .address = nlm_addr(host), | 358 | .address = nlm_addr(host), |
358 | .addrsize = host->h_addrlen, | 359 | .addrsize = host->h_addrlen, |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e3015464fbab..e0c918949644 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
@@ -69,6 +69,7 @@ static struct rpc_clnt *nsm_create(void) | |||
69 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), | 69 | .sin_addr.s_addr = htonl(INADDR_LOOPBACK), |
70 | }; | 70 | }; |
71 | struct rpc_create_args args = { | 71 | struct rpc_create_args args = { |
72 | .net = &init_net, | ||
72 | .protocol = XPRT_TRANSPORT_UDP, | 73 | .protocol = XPRT_TRANSPORT_UDP, |
73 | .address = (struct sockaddr *)&sin, | 74 | .address = (struct sockaddr *)&sin, |
74 | .addrsize = sizeof(sin), | 75 | .addrsize = sizeof(sin), |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index f1bacf1a0391..abfff9d7979d 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/in.h> | 22 | #include <linux/in.h> |
23 | #include <linux/uio.h> | 23 | #include <linux/uio.h> |
24 | #include <linux/smp.h> | 24 | #include <linux/smp.h> |
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/mutex.h> | 25 | #include <linux/mutex.h> |
27 | #include <linux/kthread.h> | 26 | #include <linux/kthread.h> |
28 | #include <linux/freezer.h> | 27 | #include <linux/freezer.h> |
@@ -130,15 +129,6 @@ lockd(void *vrqstp) | |||
130 | 129 | ||
131 | dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); | 130 | dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n"); |
132 | 131 | ||
133 | /* | ||
134 | * FIXME: it would be nice if lockd didn't spend its entire life | ||
135 | * running under the BKL. At the very least, it would be good to | ||
136 | * have someone clarify what it's intended to protect here. I've | ||
137 | * seen some handwavy posts about posix locking needing to be | ||
138 | * done under the BKL, but it's far from clear. | ||
139 | */ | ||
140 | lock_kernel(); | ||
141 | |||
142 | if (!nlm_timeout) | 132 | if (!nlm_timeout) |
143 | nlm_timeout = LOCKD_DFLT_TIMEO; | 133 | nlm_timeout = LOCKD_DFLT_TIMEO; |
144 | nlmsvc_timeout = nlm_timeout * HZ; | 134 | nlmsvc_timeout = nlm_timeout * HZ; |
@@ -195,7 +185,6 @@ lockd(void *vrqstp) | |||
195 | if (nlmsvc_ops) | 185 | if (nlmsvc_ops) |
196 | nlmsvc_invalidate_all(); | 186 | nlmsvc_invalidate_all(); |
197 | nlm_shutdown_hosts(); | 187 | nlm_shutdown_hosts(); |
198 | unlock_kernel(); | ||
199 | return 0; | 188 | return 0; |
200 | } | 189 | } |
201 | 190 | ||
@@ -206,7 +195,7 @@ static int create_lockd_listener(struct svc_serv *serv, const char *name, | |||
206 | 195 | ||
207 | xprt = svc_find_xprt(serv, name, family, 0); | 196 | xprt = svc_find_xprt(serv, name, family, 0); |
208 | if (xprt == NULL) | 197 | if (xprt == NULL) |
209 | return svc_create_xprt(serv, name, family, port, | 198 | return svc_create_xprt(serv, name, &init_net, family, port, |
210 | SVC_SOCK_DEFAULTS); | 199 | SVC_SOCK_DEFAULTS); |
211 | svc_xprt_put(xprt); | 200 | svc_xprt_put(xprt); |
212 | return 0; | 201 | return 0; |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 031c6569a134..a336e832475d 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
@@ -230,9 +230,7 @@ static void nlm4svc_callback_exit(struct rpc_task *task, void *data) | |||
230 | 230 | ||
231 | static void nlm4svc_callback_release(void *data) | 231 | static void nlm4svc_callback_release(void *data) |
232 | { | 232 | { |
233 | lock_kernel(); | ||
234 | nlm_release_call(data); | 233 | nlm_release_call(data); |
235 | unlock_kernel(); | ||
236 | } | 234 | } |
237 | 235 | ||
238 | static const struct rpc_call_ops nlm4svc_callback_ops = { | 236 | static const struct rpc_call_ops nlm4svc_callback_ops = { |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 84055d31bfc5..c462d346acbd 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
@@ -52,12 +52,13 @@ static const struct rpc_call_ops nlmsvc_grant_ops; | |||
52 | * The list of blocked locks to retry | 52 | * The list of blocked locks to retry |
53 | */ | 53 | */ |
54 | static LIST_HEAD(nlm_blocked); | 54 | static LIST_HEAD(nlm_blocked); |
55 | static DEFINE_SPINLOCK(nlm_blocked_lock); | ||
55 | 56 | ||
56 | /* | 57 | /* |
57 | * Insert a blocked lock into the global list | 58 | * Insert a blocked lock into the global list |
58 | */ | 59 | */ |
59 | static void | 60 | static void |
60 | nlmsvc_insert_block(struct nlm_block *block, unsigned long when) | 61 | nlmsvc_insert_block_locked(struct nlm_block *block, unsigned long when) |
61 | { | 62 | { |
62 | struct nlm_block *b; | 63 | struct nlm_block *b; |
63 | struct list_head *pos; | 64 | struct list_head *pos; |
@@ -87,6 +88,13 @@ nlmsvc_insert_block(struct nlm_block *block, unsigned long when) | |||
87 | block->b_when = when; | 88 | block->b_when = when; |
88 | } | 89 | } |
89 | 90 | ||
91 | static void nlmsvc_insert_block(struct nlm_block *block, unsigned long when) | ||
92 | { | ||
93 | spin_lock(&nlm_blocked_lock); | ||
94 | nlmsvc_insert_block_locked(block, when); | ||
95 | spin_unlock(&nlm_blocked_lock); | ||
96 | } | ||
97 | |||
90 | /* | 98 | /* |
91 | * Remove a block from the global list | 99 | * Remove a block from the global list |
92 | */ | 100 | */ |
@@ -94,7 +102,9 @@ static inline void | |||
94 | nlmsvc_remove_block(struct nlm_block *block) | 102 | nlmsvc_remove_block(struct nlm_block *block) |
95 | { | 103 | { |
96 | if (!list_empty(&block->b_list)) { | 104 | if (!list_empty(&block->b_list)) { |
105 | spin_lock(&nlm_blocked_lock); | ||
97 | list_del_init(&block->b_list); | 106 | list_del_init(&block->b_list); |
107 | spin_unlock(&nlm_blocked_lock); | ||
98 | nlmsvc_release_block(block); | 108 | nlmsvc_release_block(block); |
99 | } | 109 | } |
100 | } | 110 | } |
@@ -651,7 +661,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, | |||
651 | struct nlm_block *block; | 661 | struct nlm_block *block; |
652 | int rc = -ENOENT; | 662 | int rc = -ENOENT; |
653 | 663 | ||
654 | lock_kernel(); | 664 | spin_lock(&nlm_blocked_lock); |
655 | list_for_each_entry(block, &nlm_blocked, b_list) { | 665 | list_for_each_entry(block, &nlm_blocked, b_list) { |
656 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { | 666 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { |
657 | dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", | 667 | dprintk("lockd: nlmsvc_notify_blocked block %p flags %d\n", |
@@ -665,13 +675,13 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf, | |||
665 | } else if (result == 0) | 675 | } else if (result == 0) |
666 | block->b_granted = 1; | 676 | block->b_granted = 1; |
667 | 677 | ||
668 | nlmsvc_insert_block(block, 0); | 678 | nlmsvc_insert_block_locked(block, 0); |
669 | svc_wake_up(block->b_daemon); | 679 | svc_wake_up(block->b_daemon); |
670 | rc = 0; | 680 | rc = 0; |
671 | break; | 681 | break; |
672 | } | 682 | } |
673 | } | 683 | } |
674 | unlock_kernel(); | 684 | spin_unlock(&nlm_blocked_lock); |
675 | if (rc == -ENOENT) | 685 | if (rc == -ENOENT) |
676 | printk(KERN_WARNING "lockd: grant for unknown block\n"); | 686 | printk(KERN_WARNING "lockd: grant for unknown block\n"); |
677 | return rc; | 687 | return rc; |
@@ -690,14 +700,16 @@ nlmsvc_notify_blocked(struct file_lock *fl) | |||
690 | struct nlm_block *block; | 700 | struct nlm_block *block; |
691 | 701 | ||
692 | dprintk("lockd: VFS unblock notification for block %p\n", fl); | 702 | dprintk("lockd: VFS unblock notification for block %p\n", fl); |
703 | spin_lock(&nlm_blocked_lock); | ||
693 | list_for_each_entry(block, &nlm_blocked, b_list) { | 704 | list_for_each_entry(block, &nlm_blocked, b_list) { |
694 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { | 705 | if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) { |
695 | nlmsvc_insert_block(block, 0); | 706 | nlmsvc_insert_block_locked(block, 0); |
707 | spin_unlock(&nlm_blocked_lock); | ||
696 | svc_wake_up(block->b_daemon); | 708 | svc_wake_up(block->b_daemon); |
697 | return; | 709 | return; |
698 | } | 710 | } |
699 | } | 711 | } |
700 | 712 | spin_unlock(&nlm_blocked_lock); | |
701 | printk(KERN_WARNING "lockd: notification for unknown block!\n"); | 713 | printk(KERN_WARNING "lockd: notification for unknown block!\n"); |
702 | } | 714 | } |
703 | 715 | ||
@@ -803,7 +815,7 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
803 | 815 | ||
804 | dprintk("lockd: GRANT_MSG RPC callback\n"); | 816 | dprintk("lockd: GRANT_MSG RPC callback\n"); |
805 | 817 | ||
806 | lock_kernel(); | 818 | spin_lock(&nlm_blocked_lock); |
807 | /* if the block is not on a list at this point then it has | 819 | /* if the block is not on a list at this point then it has |
808 | * been invalidated. Don't try to requeue it. | 820 | * been invalidated. Don't try to requeue it. |
809 | * | 821 | * |
@@ -825,19 +837,20 @@ static void nlmsvc_grant_callback(struct rpc_task *task, void *data) | |||
825 | /* Call was successful, now wait for client callback */ | 837 | /* Call was successful, now wait for client callback */ |
826 | timeout = 60 * HZ; | 838 | timeout = 60 * HZ; |
827 | } | 839 | } |
828 | nlmsvc_insert_block(block, timeout); | 840 | nlmsvc_insert_block_locked(block, timeout); |
829 | svc_wake_up(block->b_daemon); | 841 | svc_wake_up(block->b_daemon); |
830 | out: | 842 | out: |
831 | unlock_kernel(); | 843 | spin_unlock(&nlm_blocked_lock); |
832 | } | 844 | } |
833 | 845 | ||
846 | /* | ||
847 | * FIXME: nlmsvc_release_block() grabs a mutex. This is not allowed for an | ||
848 | * .rpc_release rpc_call_op | ||
849 | */ | ||
834 | static void nlmsvc_grant_release(void *data) | 850 | static void nlmsvc_grant_release(void *data) |
835 | { | 851 | { |
836 | struct nlm_rqst *call = data; | 852 | struct nlm_rqst *call = data; |
837 | |||
838 | lock_kernel(); | ||
839 | nlmsvc_release_block(call->a_block); | 853 | nlmsvc_release_block(call->a_block); |
840 | unlock_kernel(); | ||
841 | } | 854 | } |
842 | 855 | ||
843 | static const struct rpc_call_ops nlmsvc_grant_ops = { | 856 | static const struct rpc_call_ops nlmsvc_grant_ops = { |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 0f2ab741ae7c..c3069f38d602 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
@@ -260,9 +260,7 @@ static void nlmsvc_callback_exit(struct rpc_task *task, void *data) | |||
260 | 260 | ||
261 | static void nlmsvc_callback_release(void *data) | 261 | static void nlmsvc_callback_release(void *data) |
262 | { | 262 | { |
263 | lock_kernel(); | ||
264 | nlm_release_call(data); | 263 | nlm_release_call(data); |
265 | unlock_kernel(); | ||
266 | } | 264 | } |
267 | 265 | ||
268 | static const struct rpc_call_ops nlmsvc_callback_ops = { | 266 | static const struct rpc_call_ops nlmsvc_callback_ops = { |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index d0ef94cfb3da..1ca0679c80bf 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
@@ -170,6 +170,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file, | |||
170 | 170 | ||
171 | again: | 171 | again: |
172 | file->f_locks = 0; | 172 | file->f_locks = 0; |
173 | lock_flocks(); /* protects i_flock list */ | ||
173 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 174 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
174 | if (fl->fl_lmops != &nlmsvc_lock_operations) | 175 | if (fl->fl_lmops != &nlmsvc_lock_operations) |
175 | continue; | 176 | continue; |
@@ -181,6 +182,7 @@ again: | |||
181 | if (match(lockhost, host)) { | 182 | if (match(lockhost, host)) { |
182 | struct file_lock lock = *fl; | 183 | struct file_lock lock = *fl; |
183 | 184 | ||
185 | unlock_flocks(); | ||
184 | lock.fl_type = F_UNLCK; | 186 | lock.fl_type = F_UNLCK; |
185 | lock.fl_start = 0; | 187 | lock.fl_start = 0; |
186 | lock.fl_end = OFFSET_MAX; | 188 | lock.fl_end = OFFSET_MAX; |
@@ -192,6 +194,7 @@ again: | |||
192 | goto again; | 194 | goto again; |
193 | } | 195 | } |
194 | } | 196 | } |
197 | unlock_flocks(); | ||
195 | 198 | ||
196 | return 0; | 199 | return 0; |
197 | } | 200 | } |
@@ -226,10 +229,14 @@ nlm_file_inuse(struct nlm_file *file) | |||
226 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) | 229 | if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares) |
227 | return 1; | 230 | return 1; |
228 | 231 | ||
232 | lock_flocks(); | ||
229 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { | 233 | for (fl = inode->i_flock; fl; fl = fl->fl_next) { |
230 | if (fl->fl_lmops == &nlmsvc_lock_operations) | 234 | if (fl->fl_lmops == &nlmsvc_lock_operations) { |
235 | unlock_flocks(); | ||
231 | return 1; | 236 | return 1; |
237 | } | ||
232 | } | 238 | } |
239 | unlock_flocks(); | ||
233 | file->f_locks = 0; | 240 | file->f_locks = 0; |
234 | return 0; | 241 | return 0; |
235 | } | 242 | } |
diff --git a/fs/locks.c b/fs/locks.c index ab24d49fc048..50ec15927aab 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -142,14 +142,32 @@ int lease_break_time = 45; | |||
142 | 142 | ||
143 | static LIST_HEAD(file_lock_list); | 143 | static LIST_HEAD(file_lock_list); |
144 | static LIST_HEAD(blocked_list); | 144 | static LIST_HEAD(blocked_list); |
145 | static DEFINE_SPINLOCK(file_lock_lock); | ||
146 | |||
147 | /* | ||
148 | * Protects the two list heads above, plus the inode->i_flock list | ||
149 | * FIXME: should use a spinlock, once lockd and ceph are ready. | ||
150 | */ | ||
151 | void lock_flocks(void) | ||
152 | { | ||
153 | spin_lock(&file_lock_lock); | ||
154 | } | ||
155 | EXPORT_SYMBOL_GPL(lock_flocks); | ||
156 | |||
157 | void unlock_flocks(void) | ||
158 | { | ||
159 | spin_unlock(&file_lock_lock); | ||
160 | } | ||
161 | EXPORT_SYMBOL_GPL(unlock_flocks); | ||
145 | 162 | ||
146 | static struct kmem_cache *filelock_cache __read_mostly; | 163 | static struct kmem_cache *filelock_cache __read_mostly; |
147 | 164 | ||
148 | /* Allocate an empty lock structure. */ | 165 | /* Allocate an empty lock structure. */ |
149 | static struct file_lock *locks_alloc_lock(void) | 166 | struct file_lock *locks_alloc_lock(void) |
150 | { | 167 | { |
151 | return kmem_cache_alloc(filelock_cache, GFP_KERNEL); | 168 | return kmem_cache_alloc(filelock_cache, GFP_KERNEL); |
152 | } | 169 | } |
170 | EXPORT_SYMBOL_GPL(locks_alloc_lock); | ||
153 | 171 | ||
154 | void locks_release_private(struct file_lock *fl) | 172 | void locks_release_private(struct file_lock *fl) |
155 | { | 173 | { |
@@ -511,9 +529,9 @@ static void __locks_delete_block(struct file_lock *waiter) | |||
511 | */ | 529 | */ |
512 | static void locks_delete_block(struct file_lock *waiter) | 530 | static void locks_delete_block(struct file_lock *waiter) |
513 | { | 531 | { |
514 | lock_kernel(); | 532 | lock_flocks(); |
515 | __locks_delete_block(waiter); | 533 | __locks_delete_block(waiter); |
516 | unlock_kernel(); | 534 | unlock_flocks(); |
517 | } | 535 | } |
518 | 536 | ||
519 | /* Insert waiter into blocker's block list. | 537 | /* Insert waiter into blocker's block list. |
@@ -644,7 +662,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
644 | { | 662 | { |
645 | struct file_lock *cfl; | 663 | struct file_lock *cfl; |
646 | 664 | ||
647 | lock_kernel(); | 665 | lock_flocks(); |
648 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { | 666 | for (cfl = filp->f_path.dentry->d_inode->i_flock; cfl; cfl = cfl->fl_next) { |
649 | if (!IS_POSIX(cfl)) | 667 | if (!IS_POSIX(cfl)) |
650 | continue; | 668 | continue; |
@@ -657,7 +675,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl) | |||
657 | fl->fl_pid = pid_vnr(cfl->fl_nspid); | 675 | fl->fl_pid = pid_vnr(cfl->fl_nspid); |
658 | } else | 676 | } else |
659 | fl->fl_type = F_UNLCK; | 677 | fl->fl_type = F_UNLCK; |
660 | unlock_kernel(); | 678 | unlock_flocks(); |
661 | return; | 679 | return; |
662 | } | 680 | } |
663 | EXPORT_SYMBOL(posix_test_lock); | 681 | EXPORT_SYMBOL(posix_test_lock); |
@@ -730,18 +748,16 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
730 | int error = 0; | 748 | int error = 0; |
731 | int found = 0; | 749 | int found = 0; |
732 | 750 | ||
733 | lock_kernel(); | 751 | if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) { |
734 | if (request->fl_flags & FL_ACCESS) | ||
735 | goto find_conflict; | ||
736 | |||
737 | if (request->fl_type != F_UNLCK) { | ||
738 | error = -ENOMEM; | ||
739 | new_fl = locks_alloc_lock(); | 752 | new_fl = locks_alloc_lock(); |
740 | if (new_fl == NULL) | 753 | if (!new_fl) |
741 | goto out; | 754 | return -ENOMEM; |
742 | error = 0; | ||
743 | } | 755 | } |
744 | 756 | ||
757 | lock_flocks(); | ||
758 | if (request->fl_flags & FL_ACCESS) | ||
759 | goto find_conflict; | ||
760 | |||
745 | for_each_lock(inode, before) { | 761 | for_each_lock(inode, before) { |
746 | struct file_lock *fl = *before; | 762 | struct file_lock *fl = *before; |
747 | if (IS_POSIX(fl)) | 763 | if (IS_POSIX(fl)) |
@@ -767,8 +783,11 @@ static int flock_lock_file(struct file *filp, struct file_lock *request) | |||
767 | * If a higher-priority process was blocked on the old file lock, | 783 | * If a higher-priority process was blocked on the old file lock, |
768 | * give it the opportunity to lock the file. | 784 | * give it the opportunity to lock the file. |
769 | */ | 785 | */ |
770 | if (found) | 786 | if (found) { |
787 | unlock_flocks(); | ||
771 | cond_resched(); | 788 | cond_resched(); |
789 | lock_flocks(); | ||
790 | } | ||
772 | 791 | ||
773 | find_conflict: | 792 | find_conflict: |
774 | for_each_lock(inode, before) { | 793 | for_each_lock(inode, before) { |
@@ -794,7 +813,7 @@ find_conflict: | |||
794 | error = 0; | 813 | error = 0; |
795 | 814 | ||
796 | out: | 815 | out: |
797 | unlock_kernel(); | 816 | unlock_flocks(); |
798 | if (new_fl) | 817 | if (new_fl) |
799 | locks_free_lock(new_fl); | 818 | locks_free_lock(new_fl); |
800 | return error; | 819 | return error; |
@@ -823,7 +842,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
823 | new_fl2 = locks_alloc_lock(); | 842 | new_fl2 = locks_alloc_lock(); |
824 | } | 843 | } |
825 | 844 | ||
826 | lock_kernel(); | 845 | lock_flocks(); |
827 | if (request->fl_type != F_UNLCK) { | 846 | if (request->fl_type != F_UNLCK) { |
828 | for_each_lock(inode, before) { | 847 | for_each_lock(inode, before) { |
829 | fl = *before; | 848 | fl = *before; |
@@ -991,7 +1010,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str | |||
991 | locks_wake_up_blocks(left); | 1010 | locks_wake_up_blocks(left); |
992 | } | 1011 | } |
993 | out: | 1012 | out: |
994 | unlock_kernel(); | 1013 | unlock_flocks(); |
995 | /* | 1014 | /* |
996 | * Free any unused locks. | 1015 | * Free any unused locks. |
997 | */ | 1016 | */ |
@@ -1066,14 +1085,14 @@ int locks_mandatory_locked(struct inode *inode) | |||
1066 | /* | 1085 | /* |
1067 | * Search the lock list for this inode for any POSIX locks. | 1086 | * Search the lock list for this inode for any POSIX locks. |
1068 | */ | 1087 | */ |
1069 | lock_kernel(); | 1088 | lock_flocks(); |
1070 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1089 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1071 | if (!IS_POSIX(fl)) | 1090 | if (!IS_POSIX(fl)) |
1072 | continue; | 1091 | continue; |
1073 | if (fl->fl_owner != owner) | 1092 | if (fl->fl_owner != owner) |
1074 | break; | 1093 | break; |
1075 | } | 1094 | } |
1076 | unlock_kernel(); | 1095 | unlock_flocks(); |
1077 | return fl ? -EAGAIN : 0; | 1096 | return fl ? -EAGAIN : 0; |
1078 | } | 1097 | } |
1079 | 1098 | ||
@@ -1186,7 +1205,7 @@ int __break_lease(struct inode *inode, unsigned int mode) | |||
1186 | 1205 | ||
1187 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); | 1206 | new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK); |
1188 | 1207 | ||
1189 | lock_kernel(); | 1208 | lock_flocks(); |
1190 | 1209 | ||
1191 | time_out_leases(inode); | 1210 | time_out_leases(inode); |
1192 | 1211 | ||
@@ -1247,8 +1266,10 @@ restart: | |||
1247 | break_time++; | 1266 | break_time++; |
1248 | } | 1267 | } |
1249 | locks_insert_block(flock, new_fl); | 1268 | locks_insert_block(flock, new_fl); |
1269 | unlock_flocks(); | ||
1250 | error = wait_event_interruptible_timeout(new_fl->fl_wait, | 1270 | error = wait_event_interruptible_timeout(new_fl->fl_wait, |
1251 | !new_fl->fl_next, break_time); | 1271 | !new_fl->fl_next, break_time); |
1272 | lock_flocks(); | ||
1252 | __locks_delete_block(new_fl); | 1273 | __locks_delete_block(new_fl); |
1253 | if (error >= 0) { | 1274 | if (error >= 0) { |
1254 | if (error == 0) | 1275 | if (error == 0) |
@@ -1263,7 +1284,7 @@ restart: | |||
1263 | } | 1284 | } |
1264 | 1285 | ||
1265 | out: | 1286 | out: |
1266 | unlock_kernel(); | 1287 | unlock_flocks(); |
1267 | if (!IS_ERR(new_fl)) | 1288 | if (!IS_ERR(new_fl)) |
1268 | locks_free_lock(new_fl); | 1289 | locks_free_lock(new_fl); |
1269 | return error; | 1290 | return error; |
@@ -1319,7 +1340,7 @@ int fcntl_getlease(struct file *filp) | |||
1319 | struct file_lock *fl; | 1340 | struct file_lock *fl; |
1320 | int type = F_UNLCK; | 1341 | int type = F_UNLCK; |
1321 | 1342 | ||
1322 | lock_kernel(); | 1343 | lock_flocks(); |
1323 | time_out_leases(filp->f_path.dentry->d_inode); | 1344 | time_out_leases(filp->f_path.dentry->d_inode); |
1324 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); | 1345 | for (fl = filp->f_path.dentry->d_inode->i_flock; fl && IS_LEASE(fl); |
1325 | fl = fl->fl_next) { | 1346 | fl = fl->fl_next) { |
@@ -1328,7 +1349,7 @@ int fcntl_getlease(struct file *filp) | |||
1328 | break; | 1349 | break; |
1329 | } | 1350 | } |
1330 | } | 1351 | } |
1331 | unlock_kernel(); | 1352 | unlock_flocks(); |
1332 | return type; | 1353 | return type; |
1333 | } | 1354 | } |
1334 | 1355 | ||
@@ -1341,12 +1362,11 @@ int fcntl_getlease(struct file *filp) | |||
1341 | * The (input) flp->fl_lmops->fl_break function is required | 1362 | * The (input) flp->fl_lmops->fl_break function is required |
1342 | * by break_lease(). | 1363 | * by break_lease(). |
1343 | * | 1364 | * |
1344 | * Called with kernel lock held. | 1365 | * Called with file_lock_lock held. |
1345 | */ | 1366 | */ |
1346 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | 1367 | int generic_setlease(struct file *filp, long arg, struct file_lock **flp) |
1347 | { | 1368 | { |
1348 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1369 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
1349 | struct file_lock *new_fl = NULL; | ||
1350 | struct dentry *dentry = filp->f_path.dentry; | 1370 | struct dentry *dentry = filp->f_path.dentry; |
1351 | struct inode *inode = dentry->d_inode; | 1371 | struct inode *inode = dentry->d_inode; |
1352 | int error, rdlease_count = 0, wrlease_count = 0; | 1372 | int error, rdlease_count = 0, wrlease_count = 0; |
@@ -1366,11 +1386,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1366 | lease = *flp; | 1386 | lease = *flp; |
1367 | 1387 | ||
1368 | if (arg != F_UNLCK) { | 1388 | if (arg != F_UNLCK) { |
1369 | error = -ENOMEM; | ||
1370 | new_fl = locks_alloc_lock(); | ||
1371 | if (new_fl == NULL) | ||
1372 | goto out; | ||
1373 | |||
1374 | error = -EAGAIN; | 1389 | error = -EAGAIN; |
1375 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1390 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) |
1376 | goto out; | 1391 | goto out; |
@@ -1415,7 +1430,6 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1415 | goto out; | 1430 | goto out; |
1416 | } | 1431 | } |
1417 | 1432 | ||
1418 | error = 0; | ||
1419 | if (arg == F_UNLCK) | 1433 | if (arg == F_UNLCK) |
1420 | goto out; | 1434 | goto out; |
1421 | 1435 | ||
@@ -1423,20 +1437,24 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) | |||
1423 | if (!leases_enable) | 1437 | if (!leases_enable) |
1424 | goto out; | 1438 | goto out; |
1425 | 1439 | ||
1426 | locks_copy_lock(new_fl, lease); | 1440 | locks_insert_lock(before, lease); |
1427 | locks_insert_lock(before, new_fl); | ||
1428 | |||
1429 | *flp = new_fl; | ||
1430 | return 0; | 1441 | return 0; |
1431 | 1442 | ||
1432 | out: | 1443 | out: |
1433 | if (new_fl != NULL) | 1444 | locks_free_lock(lease); |
1434 | locks_free_lock(new_fl); | ||
1435 | return error; | 1445 | return error; |
1436 | } | 1446 | } |
1437 | EXPORT_SYMBOL(generic_setlease); | 1447 | EXPORT_SYMBOL(generic_setlease); |
1438 | 1448 | ||
1439 | /** | 1449 | static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) |
1450 | { | ||
1451 | if (filp->f_op && filp->f_op->setlease) | ||
1452 | return filp->f_op->setlease(filp, arg, lease); | ||
1453 | else | ||
1454 | return generic_setlease(filp, arg, lease); | ||
1455 | } | ||
1456 | |||
1457 | /** | ||
1440 | * vfs_setlease - sets a lease on an open file | 1458 | * vfs_setlease - sets a lease on an open file |
1441 | * @filp: file pointer | 1459 | * @filp: file pointer |
1442 | * @arg: type of lease to obtain | 1460 | * @arg: type of lease to obtain |
@@ -1467,12 +1485,9 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease) | |||
1467 | { | 1485 | { |
1468 | int error; | 1486 | int error; |
1469 | 1487 | ||
1470 | lock_kernel(); | 1488 | lock_flocks(); |
1471 | if (filp->f_op && filp->f_op->setlease) | 1489 | error = __vfs_setlease(filp, arg, lease); |
1472 | error = filp->f_op->setlease(filp, arg, lease); | 1490 | unlock_flocks(); |
1473 | else | ||
1474 | error = generic_setlease(filp, arg, lease); | ||
1475 | unlock_kernel(); | ||
1476 | 1491 | ||
1477 | return error; | 1492 | return error; |
1478 | } | 1493 | } |
@@ -1490,33 +1505,47 @@ EXPORT_SYMBOL_GPL(vfs_setlease); | |||
1490 | */ | 1505 | */ |
1491 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) | 1506 | int fcntl_setlease(unsigned int fd, struct file *filp, long arg) |
1492 | { | 1507 | { |
1493 | struct file_lock fl, *flp = &fl; | 1508 | struct file_lock *fl; |
1509 | struct fasync_struct *new; | ||
1494 | struct inode *inode = filp->f_path.dentry->d_inode; | 1510 | struct inode *inode = filp->f_path.dentry->d_inode; |
1495 | int error; | 1511 | int error; |
1496 | 1512 | ||
1497 | locks_init_lock(&fl); | 1513 | fl = lease_alloc(filp, arg); |
1498 | error = lease_init(filp, arg, &fl); | 1514 | if (IS_ERR(fl)) |
1499 | if (error) | 1515 | return PTR_ERR(fl); |
1500 | return error; | ||
1501 | 1516 | ||
1502 | lock_kernel(); | 1517 | new = fasync_alloc(); |
1503 | 1518 | if (!new) { | |
1504 | error = vfs_setlease(filp, arg, &flp); | 1519 | locks_free_lock(fl); |
1520 | return -ENOMEM; | ||
1521 | } | ||
1522 | lock_flocks(); | ||
1523 | error = __vfs_setlease(filp, arg, &fl); | ||
1505 | if (error || arg == F_UNLCK) | 1524 | if (error || arg == F_UNLCK) |
1506 | goto out_unlock; | 1525 | goto out_unlock; |
1507 | 1526 | ||
1508 | error = fasync_helper(fd, filp, 1, &flp->fl_fasync); | 1527 | /* |
1528 | * fasync_insert_entry() returns the old entry if any. | ||
1529 | * If there was no old entry, then it used 'new' and | ||
1530 | * inserted it into the fasync list. Clear new so that | ||
1531 | * we don't release it here. | ||
1532 | */ | ||
1533 | if (!fasync_insert_entry(fd, filp, &fl->fl_fasync, new)) | ||
1534 | new = NULL; | ||
1535 | |||
1509 | if (error < 0) { | 1536 | if (error < 0) { |
1510 | /* remove lease just inserted by setlease */ | 1537 | /* remove lease just inserted by setlease */ |
1511 | flp->fl_type = F_UNLCK | F_INPROGRESS; | 1538 | fl->fl_type = F_UNLCK | F_INPROGRESS; |
1512 | flp->fl_break_time = jiffies - 10; | 1539 | fl->fl_break_time = jiffies - 10; |
1513 | time_out_leases(inode); | 1540 | time_out_leases(inode); |
1514 | goto out_unlock; | 1541 | goto out_unlock; |
1515 | } | 1542 | } |
1516 | 1543 | ||
1517 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); | 1544 | error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0); |
1518 | out_unlock: | 1545 | out_unlock: |
1519 | unlock_kernel(); | 1546 | unlock_flocks(); |
1547 | if (new) | ||
1548 | fasync_free(new); | ||
1520 | return error; | 1549 | return error; |
1521 | } | 1550 | } |
1522 | 1551 | ||
@@ -2020,7 +2049,7 @@ void locks_remove_flock(struct file *filp) | |||
2020 | fl.fl_ops->fl_release_private(&fl); | 2049 | fl.fl_ops->fl_release_private(&fl); |
2021 | } | 2050 | } |
2022 | 2051 | ||
2023 | lock_kernel(); | 2052 | lock_flocks(); |
2024 | before = &inode->i_flock; | 2053 | before = &inode->i_flock; |
2025 | 2054 | ||
2026 | while ((fl = *before) != NULL) { | 2055 | while ((fl = *before) != NULL) { |
@@ -2038,7 +2067,7 @@ void locks_remove_flock(struct file *filp) | |||
2038 | } | 2067 | } |
2039 | before = &fl->fl_next; | 2068 | before = &fl->fl_next; |
2040 | } | 2069 | } |
2041 | unlock_kernel(); | 2070 | unlock_flocks(); |
2042 | } | 2071 | } |
2043 | 2072 | ||
2044 | /** | 2073 | /** |
@@ -2053,12 +2082,12 @@ posix_unblock_lock(struct file *filp, struct file_lock *waiter) | |||
2053 | { | 2082 | { |
2054 | int status = 0; | 2083 | int status = 0; |
2055 | 2084 | ||
2056 | lock_kernel(); | 2085 | lock_flocks(); |
2057 | if (waiter->fl_next) | 2086 | if (waiter->fl_next) |
2058 | __locks_delete_block(waiter); | 2087 | __locks_delete_block(waiter); |
2059 | else | 2088 | else |
2060 | status = -ENOENT; | 2089 | status = -ENOENT; |
2061 | unlock_kernel(); | 2090 | unlock_flocks(); |
2062 | return status; | 2091 | return status; |
2063 | } | 2092 | } |
2064 | 2093 | ||
@@ -2085,7 +2114,7 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock); | |||
2085 | #include <linux/seq_file.h> | 2114 | #include <linux/seq_file.h> |
2086 | 2115 | ||
2087 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, | 2116 | static void lock_get_status(struct seq_file *f, struct file_lock *fl, |
2088 | int id, char *pfx) | 2117 | loff_t id, char *pfx) |
2089 | { | 2118 | { |
2090 | struct inode *inode = NULL; | 2119 | struct inode *inode = NULL; |
2091 | unsigned int fl_pid; | 2120 | unsigned int fl_pid; |
@@ -2098,7 +2127,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2098 | if (fl->fl_file != NULL) | 2127 | if (fl->fl_file != NULL) |
2099 | inode = fl->fl_file->f_path.dentry->d_inode; | 2128 | inode = fl->fl_file->f_path.dentry->d_inode; |
2100 | 2129 | ||
2101 | seq_printf(f, "%d:%s ", id, pfx); | 2130 | seq_printf(f, "%lld:%s ", id, pfx); |
2102 | if (IS_POSIX(fl)) { | 2131 | if (IS_POSIX(fl)) { |
2103 | seq_printf(f, "%6s %s ", | 2132 | seq_printf(f, "%6s %s ", |
2104 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", | 2133 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", |
@@ -2161,30 +2190,33 @@ static int locks_show(struct seq_file *f, void *v) | |||
2161 | 2190 | ||
2162 | fl = list_entry(v, struct file_lock, fl_link); | 2191 | fl = list_entry(v, struct file_lock, fl_link); |
2163 | 2192 | ||
2164 | lock_get_status(f, fl, (long)f->private, ""); | 2193 | lock_get_status(f, fl, *((loff_t *)f->private), ""); |
2165 | 2194 | ||
2166 | list_for_each_entry(bfl, &fl->fl_block, fl_block) | 2195 | list_for_each_entry(bfl, &fl->fl_block, fl_block) |
2167 | lock_get_status(f, bfl, (long)f->private, " ->"); | 2196 | lock_get_status(f, bfl, *((loff_t *)f->private), " ->"); |
2168 | 2197 | ||
2169 | f->private++; | ||
2170 | return 0; | 2198 | return 0; |
2171 | } | 2199 | } |
2172 | 2200 | ||
2173 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2201 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2174 | { | 2202 | { |
2175 | lock_kernel(); | 2203 | loff_t *p = f->private; |
2176 | f->private = (void *)1; | 2204 | |
2205 | lock_flocks(); | ||
2206 | *p = (*pos + 1); | ||
2177 | return seq_list_start(&file_lock_list, *pos); | 2207 | return seq_list_start(&file_lock_list, *pos); |
2178 | } | 2208 | } |
2179 | 2209 | ||
2180 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | 2210 | static void *locks_next(struct seq_file *f, void *v, loff_t *pos) |
2181 | { | 2211 | { |
2212 | loff_t *p = f->private; | ||
2213 | ++*p; | ||
2182 | return seq_list_next(v, &file_lock_list, pos); | 2214 | return seq_list_next(v, &file_lock_list, pos); |
2183 | } | 2215 | } |
2184 | 2216 | ||
2185 | static void locks_stop(struct seq_file *f, void *v) | 2217 | static void locks_stop(struct seq_file *f, void *v) |
2186 | { | 2218 | { |
2187 | unlock_kernel(); | 2219 | unlock_flocks(); |
2188 | } | 2220 | } |
2189 | 2221 | ||
2190 | static const struct seq_operations locks_seq_operations = { | 2222 | static const struct seq_operations locks_seq_operations = { |
@@ -2196,14 +2228,14 @@ static const struct seq_operations locks_seq_operations = { | |||
2196 | 2228 | ||
2197 | static int locks_open(struct inode *inode, struct file *filp) | 2229 | static int locks_open(struct inode *inode, struct file *filp) |
2198 | { | 2230 | { |
2199 | return seq_open(filp, &locks_seq_operations); | 2231 | return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t)); |
2200 | } | 2232 | } |
2201 | 2233 | ||
2202 | static const struct file_operations proc_locks_operations = { | 2234 | static const struct file_operations proc_locks_operations = { |
2203 | .open = locks_open, | 2235 | .open = locks_open, |
2204 | .read = seq_read, | 2236 | .read = seq_read, |
2205 | .llseek = seq_lseek, | 2237 | .llseek = seq_lseek, |
2206 | .release = seq_release, | 2238 | .release = seq_release_private, |
2207 | }; | 2239 | }; |
2208 | 2240 | ||
2209 | static int __init proc_locks_init(void) | 2241 | static int __init proc_locks_init(void) |
@@ -2231,7 +2263,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2231 | { | 2263 | { |
2232 | struct file_lock *fl; | 2264 | struct file_lock *fl; |
2233 | int result = 1; | 2265 | int result = 1; |
2234 | lock_kernel(); | 2266 | lock_flocks(); |
2235 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2267 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2236 | if (IS_POSIX(fl)) { | 2268 | if (IS_POSIX(fl)) { |
2237 | if (fl->fl_type == F_RDLCK) | 2269 | if (fl->fl_type == F_RDLCK) |
@@ -2248,7 +2280,7 @@ int lock_may_read(struct inode *inode, loff_t start, unsigned long len) | |||
2248 | result = 0; | 2280 | result = 0; |
2249 | break; | 2281 | break; |
2250 | } | 2282 | } |
2251 | unlock_kernel(); | 2283 | unlock_flocks(); |
2252 | return result; | 2284 | return result; |
2253 | } | 2285 | } |
2254 | 2286 | ||
@@ -2271,7 +2303,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2271 | { | 2303 | { |
2272 | struct file_lock *fl; | 2304 | struct file_lock *fl; |
2273 | int result = 1; | 2305 | int result = 1; |
2274 | lock_kernel(); | 2306 | lock_flocks(); |
2275 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 2307 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
2276 | if (IS_POSIX(fl)) { | 2308 | if (IS_POSIX(fl)) { |
2277 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) | 2309 | if ((fl->fl_end < start) || (fl->fl_start > (start + len))) |
@@ -2286,7 +2318,7 @@ int lock_may_write(struct inode *inode, loff_t start, unsigned long len) | |||
2286 | result = 0; | 2318 | result = 0; |
2287 | break; | 2319 | break; |
2288 | } | 2320 | } |
2289 | unlock_kernel(); | 2321 | unlock_flocks(); |
2290 | return result; | 2322 | return result; |
2291 | } | 2323 | } |
2292 | 2324 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 9777eb5b5522..409dfd65e9a1 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir, | |||
569 | return -EMLINK; | 569 | return -EMLINK; |
570 | 570 | ||
571 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 571 | inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
572 | atomic_inc(&inode->i_count); | 572 | ihold(inode); |
573 | inode->i_nlink++; | 573 | inode->i_nlink++; |
574 | mark_inode_dirty_sync(inode); | 574 | mark_inode_dirty_sync(inode); |
575 | 575 | ||
@@ -827,4 +827,5 @@ const struct file_operations logfs_dir_fops = { | |||
827 | .unlocked_ioctl = logfs_ioctl, | 827 | .unlocked_ioctl = logfs_ioctl, |
828 | .readdir = logfs_readdir, | 828 | .readdir = logfs_readdir, |
829 | .read = generic_read_dir, | 829 | .read = generic_read_dir, |
830 | .llseek = default_llseek, | ||
830 | }; | 831 | }; |
diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f3f3578393a4..c0d35a3accef 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c | |||
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, | |||
101 | 101 | ||
102 | inode->i_ctime = CURRENT_TIME_SEC; | 102 | inode->i_ctime = CURRENT_TIME_SEC; |
103 | inode_inc_link_count(inode); | 103 | inode_inc_link_count(inode); |
104 | atomic_inc(&inode->i_count); | 104 | ihold(inode); |
105 | return add_nondir(dentry, inode); | 105 | return add_nondir(dentry, inode); |
106 | } | 106 | } |
107 | 107 | ||
diff --git a/fs/namei.c b/fs/namei.c index 24896e833565..f7dbc06857ab 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, | |||
1121 | static struct dentry *__lookup_hash(struct qstr *name, | 1121 | static struct dentry *__lookup_hash(struct qstr *name, |
1122 | struct dentry *base, struct nameidata *nd) | 1122 | struct dentry *base, struct nameidata *nd) |
1123 | { | 1123 | { |
1124 | struct inode *inode = base->d_inode; | ||
1124 | struct dentry *dentry; | 1125 | struct dentry *dentry; |
1125 | struct inode *inode; | ||
1126 | int err; | 1126 | int err; |
1127 | 1127 | ||
1128 | inode = base->d_inode; | 1128 | err = exec_permission(inode); |
1129 | if (err) | ||
1130 | return ERR_PTR(err); | ||
1129 | 1131 | ||
1130 | /* | 1132 | /* |
1131 | * See if the low-level filesystem might want | 1133 | * See if the low-level filesystem might want |
@@ -1161,11 +1163,6 @@ out: | |||
1161 | */ | 1163 | */ |
1162 | static struct dentry *lookup_hash(struct nameidata *nd) | 1164 | static struct dentry *lookup_hash(struct nameidata *nd) |
1163 | { | 1165 | { |
1164 | int err; | ||
1165 | |||
1166 | err = exec_permission(nd->path.dentry->d_inode); | ||
1167 | if (err) | ||
1168 | return ERR_PTR(err); | ||
1169 | return __lookup_hash(&nd->last, nd->path.dentry, nd); | 1166 | return __lookup_hash(&nd->last, nd->path.dentry, nd); |
1170 | } | 1167 | } |
1171 | 1168 | ||
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len) | |||
1213 | if (err) | 1210 | if (err) |
1214 | return ERR_PTR(err); | 1211 | return ERR_PTR(err); |
1215 | 1212 | ||
1216 | err = exec_permission(base->d_inode); | ||
1217 | if (err) | ||
1218 | return ERR_PTR(err); | ||
1219 | return __lookup_hash(&this, base, NULL); | 1213 | return __lookup_hash(&this, base, NULL); |
1220 | } | 1214 | } |
1221 | 1215 | ||
@@ -2291,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) | |||
2291 | goto slashes; | 2285 | goto slashes; |
2292 | inode = dentry->d_inode; | 2286 | inode = dentry->d_inode; |
2293 | if (inode) | 2287 | if (inode) |
2294 | atomic_inc(&inode->i_count); | 2288 | ihold(inode); |
2295 | error = mnt_want_write(nd.path.mnt); | 2289 | error = mnt_want_write(nd.path.mnt); |
2296 | if (error) | 2290 | if (error) |
2297 | goto exit2; | 2291 | goto exit2; |
diff --git a/fs/namespace.c b/fs/namespace.c index a72eaabfe8f2..8a415c9c5e55 100644 --- a/fs/namespace.c +++ b/fs/namespace.c | |||
@@ -595,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root, | |||
595 | goto out_free; | 595 | goto out_free; |
596 | } | 596 | } |
597 | 597 | ||
598 | mnt->mnt_flags = old->mnt_flags; | 598 | mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD; |
599 | atomic_inc(&sb->s_active); | 599 | atomic_inc(&sb->s_active); |
600 | mnt->mnt_sb = sb; | 600 | mnt->mnt_sb = sb; |
601 | mnt->mnt_root = dget(root); | 601 | mnt->mnt_root = dget(root); |
@@ -1744,9 +1744,7 @@ static int do_new_mount(struct path *path, char *type, int flags, | |||
1744 | if (!capable(CAP_SYS_ADMIN)) | 1744 | if (!capable(CAP_SYS_ADMIN)) |
1745 | return -EPERM; | 1745 | return -EPERM; |
1746 | 1746 | ||
1747 | lock_kernel(); | ||
1748 | mnt = do_kern_mount(type, flags, name, data); | 1747 | mnt = do_kern_mount(type, flags, name, data); |
1749 | unlock_kernel(); | ||
1750 | if (IS_ERR(mnt)) | 1748 | if (IS_ERR(mnt)) |
1751 | return PTR_ERR(mnt); | 1749 | return PTR_ERR(mnt); |
1752 | 1750 | ||
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 9578cbe0cd58..aac8832e919e 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
@@ -95,6 +95,34 @@ const struct dentry_operations ncp_root_dentry_operations = | |||
95 | }; | 95 | }; |
96 | 96 | ||
97 | 97 | ||
98 | #define ncp_namespace(i) (NCP_SERVER(i)->name_space[NCP_FINFO(i)->volNumber]) | ||
99 | |||
100 | static inline int ncp_preserve_entry_case(struct inode *i, __u32 nscreator) | ||
101 | { | ||
102 | #ifdef CONFIG_NCPFS_SMALLDOS | ||
103 | int ns = ncp_namespace(i); | ||
104 | |||
105 | if ((ns == NW_NS_DOS) | ||
106 | #ifdef CONFIG_NCPFS_OS2_NS | ||
107 | || ((ns == NW_NS_OS2) && (nscreator == NW_NS_DOS)) | ||
108 | #endif /* CONFIG_NCPFS_OS2_NS */ | ||
109 | ) | ||
110 | return 0; | ||
111 | #endif /* CONFIG_NCPFS_SMALLDOS */ | ||
112 | return 1; | ||
113 | } | ||
114 | |||
115 | #define ncp_preserve_case(i) (ncp_namespace(i) != NW_NS_DOS) | ||
116 | |||
117 | static inline int ncp_case_sensitive(struct dentry *dentry) | ||
118 | { | ||
119 | #ifdef CONFIG_NCPFS_NFS_NS | ||
120 | return ncp_namespace(dentry->d_inode) == NW_NS_NFS; | ||
121 | #else | ||
122 | return 0; | ||
123 | #endif /* CONFIG_NCPFS_NFS_NS */ | ||
124 | } | ||
125 | |||
98 | /* | 126 | /* |
99 | * Note: leave the hash unchanged if the directory | 127 | * Note: leave the hash unchanged if the directory |
100 | * is case-sensitive. | 128 | * is case-sensitive. |
@@ -102,13 +130,12 @@ const struct dentry_operations ncp_root_dentry_operations = | |||
102 | static int | 130 | static int |
103 | ncp_hash_dentry(struct dentry *dentry, struct qstr *this) | 131 | ncp_hash_dentry(struct dentry *dentry, struct qstr *this) |
104 | { | 132 | { |
105 | struct nls_table *t; | 133 | if (!ncp_case_sensitive(dentry)) { |
106 | unsigned long hash; | 134 | struct nls_table *t; |
107 | int i; | 135 | unsigned long hash; |
108 | 136 | int i; | |
109 | t = NCP_IO_TABLE(dentry); | ||
110 | 137 | ||
111 | if (!ncp_case_sensitive(dentry->d_inode)) { | 138 | t = NCP_IO_TABLE(dentry); |
112 | hash = init_name_hash(); | 139 | hash = init_name_hash(); |
113 | for (i=0; i<this->len ; i++) | 140 | for (i=0; i<this->len ; i++) |
114 | hash = partial_name_hash(ncp_tolower(t, this->name[i]), | 141 | hash = partial_name_hash(ncp_tolower(t, this->name[i]), |
@@ -124,7 +151,7 @@ ncp_compare_dentry(struct dentry *dentry, struct qstr *a, struct qstr *b) | |||
124 | if (a->len != b->len) | 151 | if (a->len != b->len) |
125 | return 1; | 152 | return 1; |
126 | 153 | ||
127 | if (ncp_case_sensitive(dentry->d_inode)) | 154 | if (ncp_case_sensitive(dentry)) |
128 | return strncmp(a->name, b->name, a->len); | 155 | return strncmp(a->name, b->name, a->len); |
129 | 156 | ||
130 | return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); | 157 | return ncp_strnicmp(NCP_IO_TABLE(dentry), a->name, b->name, a->len); |
@@ -266,7 +293,7 @@ leave_me:; | |||
266 | 293 | ||
267 | 294 | ||
268 | static int | 295 | static int |
269 | __ncp_lookup_validate(struct dentry *dentry) | 296 | ncp_lookup_validate(struct dentry *dentry, struct nameidata *nd) |
270 | { | 297 | { |
271 | struct ncp_server *server; | 298 | struct ncp_server *server; |
272 | struct dentry *parent; | 299 | struct dentry *parent; |
@@ -283,9 +310,6 @@ __ncp_lookup_validate(struct dentry *dentry) | |||
283 | 310 | ||
284 | server = NCP_SERVER(dir); | 311 | server = NCP_SERVER(dir); |
285 | 312 | ||
286 | if (!ncp_conn_valid(server)) | ||
287 | goto finished; | ||
288 | |||
289 | /* | 313 | /* |
290 | * Inspired by smbfs: | 314 | * Inspired by smbfs: |
291 | * The default validation is based on dentry age: | 315 | * The default validation is based on dentry age: |
@@ -304,8 +328,11 @@ __ncp_lookup_validate(struct dentry *dentry) | |||
304 | if (ncp_is_server_root(dir)) { | 328 | if (ncp_is_server_root(dir)) { |
305 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, | 329 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, |
306 | dentry->d_name.len, 1); | 330 | dentry->d_name.len, 1); |
307 | if (!res) | 331 | if (!res) { |
308 | res = ncp_lookup_volume(server, __name, &(finfo.i)); | 332 | res = ncp_lookup_volume(server, __name, &(finfo.i)); |
333 | if (!res) | ||
334 | ncp_update_known_namespace(server, finfo.i.volNumber, NULL); | ||
335 | } | ||
309 | } else { | 336 | } else { |
310 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, | 337 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, |
311 | dentry->d_name.len, !ncp_preserve_case(dir)); | 338 | dentry->d_name.len, !ncp_preserve_case(dir)); |
@@ -320,13 +347,17 @@ __ncp_lookup_validate(struct dentry *dentry) | |||
320 | * what we remember, it's not valid any more. | 347 | * what we remember, it's not valid any more. |
321 | */ | 348 | */ |
322 | if (!res) { | 349 | if (!res) { |
323 | if (finfo.i.dirEntNum == NCP_FINFO(dentry->d_inode)->dirEntNum) { | 350 | struct inode *inode = dentry->d_inode; |
351 | |||
352 | mutex_lock(&inode->i_mutex); | ||
353 | if (finfo.i.dirEntNum == NCP_FINFO(inode)->dirEntNum) { | ||
324 | ncp_new_dentry(dentry); | 354 | ncp_new_dentry(dentry); |
325 | val=1; | 355 | val=1; |
326 | } else | 356 | } else |
327 | DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n"); | 357 | DDPRINTK("ncp_lookup_validate: found, but dirEntNum changed\n"); |
328 | 358 | ||
329 | ncp_update_inode2(dentry->d_inode, &finfo); | 359 | ncp_update_inode2(inode, &finfo); |
360 | mutex_unlock(&inode->i_mutex); | ||
330 | } | 361 | } |
331 | 362 | ||
332 | finished: | 363 | finished: |
@@ -335,16 +366,6 @@ finished: | |||
335 | return val; | 366 | return val; |
336 | } | 367 | } |
337 | 368 | ||
338 | static int | ||
339 | ncp_lookup_validate(struct dentry * dentry, struct nameidata *nd) | ||
340 | { | ||
341 | int res; | ||
342 | lock_kernel(); | ||
343 | res = __ncp_lookup_validate(dentry); | ||
344 | unlock_kernel(); | ||
345 | return res; | ||
346 | } | ||
347 | |||
348 | static struct dentry * | 369 | static struct dentry * |
349 | ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) | 370 | ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos) |
350 | { | 371 | { |
@@ -411,8 +432,6 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
411 | int result, mtime_valid = 0; | 432 | int result, mtime_valid = 0; |
412 | time_t mtime = 0; | 433 | time_t mtime = 0; |
413 | 434 | ||
414 | lock_kernel(); | ||
415 | |||
416 | ctl.page = NULL; | 435 | ctl.page = NULL; |
417 | ctl.cache = NULL; | 436 | ctl.cache = NULL; |
418 | 437 | ||
@@ -421,6 +440,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
421 | (int) filp->f_pos); | 440 | (int) filp->f_pos); |
422 | 441 | ||
423 | result = -EIO; | 442 | result = -EIO; |
443 | /* Do not generate '.' and '..' when server is dead. */ | ||
424 | if (!ncp_conn_valid(server)) | 444 | if (!ncp_conn_valid(server)) |
425 | goto out; | 445 | goto out; |
426 | 446 | ||
@@ -532,6 +552,12 @@ read_really: | |||
532 | ctl.head.end = ctl.fpos - 1; | 552 | ctl.head.end = ctl.fpos - 1; |
533 | ctl.head.eof = ctl.valid; | 553 | ctl.head.eof = ctl.valid; |
534 | finished: | 554 | finished: |
555 | if (ctl.page) { | ||
556 | kunmap(ctl.page); | ||
557 | SetPageUptodate(ctl.page); | ||
558 | unlock_page(ctl.page); | ||
559 | page_cache_release(ctl.page); | ||
560 | } | ||
535 | if (page) { | 561 | if (page) { |
536 | cache->head = ctl.head; | 562 | cache->head = ctl.head; |
537 | kunmap(page); | 563 | kunmap(page); |
@@ -539,23 +565,17 @@ finished: | |||
539 | unlock_page(page); | 565 | unlock_page(page); |
540 | page_cache_release(page); | 566 | page_cache_release(page); |
541 | } | 567 | } |
542 | if (ctl.page) { | ||
543 | kunmap(ctl.page); | ||
544 | SetPageUptodate(ctl.page); | ||
545 | unlock_page(ctl.page); | ||
546 | page_cache_release(ctl.page); | ||
547 | } | ||
548 | out: | 568 | out: |
549 | unlock_kernel(); | ||
550 | return result; | 569 | return result; |
551 | } | 570 | } |
552 | 571 | ||
553 | static int | 572 | static int |
554 | ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 573 | ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, |
555 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry) | 574 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, |
575 | int inval_childs) | ||
556 | { | 576 | { |
557 | struct dentry *newdent, *dentry = filp->f_path.dentry; | 577 | struct dentry *newdent, *dentry = filp->f_path.dentry; |
558 | struct inode *newino, *inode = dentry->d_inode; | 578 | struct inode *dir = dentry->d_inode; |
559 | struct ncp_cache_control ctl = *ctrl; | 579 | struct ncp_cache_control ctl = *ctrl; |
560 | struct qstr qname; | 580 | struct qstr qname; |
561 | int valid = 0; | 581 | int valid = 0; |
@@ -564,9 +584,9 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | |||
564 | __u8 __name[NCP_MAXPATHLEN + 1]; | 584 | __u8 __name[NCP_MAXPATHLEN + 1]; |
565 | 585 | ||
566 | qname.len = sizeof(__name); | 586 | qname.len = sizeof(__name); |
567 | if (ncp_vol2io(NCP_SERVER(inode), __name, &qname.len, | 587 | if (ncp_vol2io(NCP_SERVER(dir), __name, &qname.len, |
568 | entry->i.entryName, entry->i.nameLen, | 588 | entry->i.entryName, entry->i.nameLen, |
569 | !ncp_preserve_entry_case(inode, entry->i.NSCreator))) | 589 | !ncp_preserve_entry_case(dir, entry->i.NSCreator))) |
570 | return 1; /* I'm not sure */ | 590 | return 1; /* I'm not sure */ |
571 | 591 | ||
572 | qname.name = __name; | 592 | qname.name = __name; |
@@ -584,22 +604,64 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | |||
584 | goto end_advance; | 604 | goto end_advance; |
585 | } else { | 605 | } else { |
586 | hashed = 1; | 606 | hashed = 1; |
587 | memcpy((char *) newdent->d_name.name, qname.name, | 607 | |
588 | newdent->d_name.len); | 608 | /* If case sensitivity changed for this volume, all entries below this one |
609 | should be thrown away. This entry itself is not affected, as its case | ||
610 | sensitivity is controlled by its own parent. */ | ||
611 | if (inval_childs) | ||
612 | shrink_dcache_parent(newdent); | ||
613 | |||
614 | /* | ||
615 | * It is not as dangerous as it looks. NetWare's OS2 namespace is | ||
616 | * case preserving yet case insensitive. So we update dentry's name | ||
617 | * as received from server. We found dentry via d_lookup with our | ||
618 | * hash, so we know that hash does not change, and so replacing name | ||
619 | * should be reasonably safe. | ||
620 | */ | ||
621 | if (qname.len == newdent->d_name.len && | ||
622 | memcmp(newdent->d_name.name, qname.name, newdent->d_name.len)) { | ||
623 | struct inode *inode = newdent->d_inode; | ||
624 | |||
625 | /* | ||
626 | * Inside ncpfs all uses of d_name are either for debugging, | ||
627 | * or on functions which acquire inode mutex (mknod, creat, | ||
628 | * lookup). So grab i_mutex here, to be sure. d_path | ||
629 | * uses dcache_lock when generating path, so we should too. | ||
630 | * And finally d_compare is protected by dentry's d_lock, so | ||
631 | * here we go. | ||
632 | */ | ||
633 | if (inode) | ||
634 | mutex_lock(&inode->i_mutex); | ||
635 | spin_lock(&dcache_lock); | ||
636 | spin_lock(&newdent->d_lock); | ||
637 | memcpy((char *) newdent->d_name.name, qname.name, | ||
638 | newdent->d_name.len); | ||
639 | spin_unlock(&newdent->d_lock); | ||
640 | spin_unlock(&dcache_lock); | ||
641 | if (inode) | ||
642 | mutex_unlock(&inode->i_mutex); | ||
643 | } | ||
589 | } | 644 | } |
590 | 645 | ||
591 | if (!newdent->d_inode) { | 646 | if (!newdent->d_inode) { |
647 | struct inode *inode; | ||
648 | |||
592 | entry->opened = 0; | 649 | entry->opened = 0; |
593 | entry->ino = iunique(inode->i_sb, 2); | 650 | entry->ino = iunique(dir->i_sb, 2); |
594 | newino = ncp_iget(inode->i_sb, entry); | 651 | inode = ncp_iget(dir->i_sb, entry); |
595 | if (newino) { | 652 | if (inode) { |
596 | newdent->d_op = &ncp_dentry_operations; | 653 | newdent->d_op = &ncp_dentry_operations; |
597 | d_instantiate(newdent, newino); | 654 | d_instantiate(newdent, inode); |
598 | if (!hashed) | 655 | if (!hashed) |
599 | d_rehash(newdent); | 656 | d_rehash(newdent); |
600 | } | 657 | } |
601 | } else | 658 | } else { |
602 | ncp_update_inode2(newdent->d_inode, entry); | 659 | struct inode *inode = newdent->d_inode; |
660 | |||
661 | mutex_lock(&inode->i_mutex); | ||
662 | ncp_update_inode2(inode, entry); | ||
663 | mutex_unlock(&inode->i_mutex); | ||
664 | } | ||
603 | 665 | ||
604 | if (newdent->d_inode) { | 666 | if (newdent->d_inode) { |
605 | ino = newdent->d_inode->i_ino; | 667 | ino = newdent->d_inode->i_ino; |
@@ -617,7 +679,7 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | |||
617 | ctl.cache = NULL; | 679 | ctl.cache = NULL; |
618 | ctl.idx -= NCP_DIRCACHE_SIZE; | 680 | ctl.idx -= NCP_DIRCACHE_SIZE; |
619 | ctl.ofs += 1; | 681 | ctl.ofs += 1; |
620 | ctl.page = grab_cache_page(&inode->i_data, ctl.ofs); | 682 | ctl.page = grab_cache_page(&dir->i_data, ctl.ofs); |
621 | if (ctl.page) | 683 | if (ctl.page) |
622 | ctl.cache = kmap(ctl.page); | 684 | ctl.cache = kmap(ctl.page); |
623 | } | 685 | } |
@@ -633,7 +695,7 @@ end_advance: | |||
633 | if (!ino) | 695 | if (!ino) |
634 | ino = find_inode_number(dentry, &qname); | 696 | ino = find_inode_number(dentry, &qname); |
635 | if (!ino) | 697 | if (!ino) |
636 | ino = iunique(inode->i_sb, 2); | 698 | ino = iunique(dir->i_sb, 2); |
637 | ctl.filled = filldir(dirent, qname.name, qname.len, | 699 | ctl.filled = filldir(dirent, qname.name, qname.len, |
638 | filp->f_pos, ino, DT_UNKNOWN); | 700 | filp->f_pos, ino, DT_UNKNOWN); |
639 | if (!ctl.filled) | 701 | if (!ctl.filled) |
@@ -660,6 +722,7 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
660 | (unsigned long) filp->f_pos); | 722 | (unsigned long) filp->f_pos); |
661 | 723 | ||
662 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { | 724 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { |
725 | int inval_dentry; | ||
663 | 726 | ||
664 | if (ncp_get_volume_info_with_number(server, i, &info) != 0) | 727 | if (ncp_get_volume_info_with_number(server, i, &info) != 0) |
665 | return; | 728 | return; |
@@ -675,8 +738,9 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
675 | info.volume_name); | 738 | info.volume_name); |
676 | continue; | 739 | continue; |
677 | } | 740 | } |
741 | inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); | ||
678 | entry.volume = entry.i.volNumber; | 742 | entry.volume = entry.i.volNumber; |
679 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry)) | 743 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, inval_dentry)) |
680 | return; | 744 | return; |
681 | } | 745 | } |
682 | } | 746 | } |
@@ -739,7 +803,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | |||
739 | rpl += onerpl; | 803 | rpl += onerpl; |
740 | rpls -= onerpl; | 804 | rpls -= onerpl; |
741 | entry.volume = entry.i.volNumber; | 805 | entry.volume = entry.i.volNumber; |
742 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry)) | 806 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, 0)) |
743 | break; | 807 | break; |
744 | } | 808 | } |
745 | } while (more); | 809 | } while (more); |
@@ -775,17 +839,19 @@ int ncp_conn_logged_in(struct super_block *sb) | |||
775 | if (dent) { | 839 | if (dent) { |
776 | struct inode* ino = dent->d_inode; | 840 | struct inode* ino = dent->d_inode; |
777 | if (ino) { | 841 | if (ino) { |
842 | ncp_update_known_namespace(server, volNumber, NULL); | ||
778 | NCP_FINFO(ino)->volNumber = volNumber; | 843 | NCP_FINFO(ino)->volNumber = volNumber; |
779 | NCP_FINFO(ino)->dirEntNum = dirEntNum; | 844 | NCP_FINFO(ino)->dirEntNum = dirEntNum; |
780 | NCP_FINFO(ino)->DosDirNum = DosDirNum; | 845 | NCP_FINFO(ino)->DosDirNum = DosDirNum; |
846 | result = 0; | ||
781 | } else { | 847 | } else { |
782 | DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n"); | 848 | DPRINTK("ncpfs: sb->s_root->d_inode == NULL!\n"); |
783 | } | 849 | } |
784 | } else { | 850 | } else { |
785 | DPRINTK("ncpfs: sb->s_root == NULL!\n"); | 851 | DPRINTK("ncpfs: sb->s_root == NULL!\n"); |
786 | } | 852 | } |
787 | } | 853 | } else |
788 | result = 0; | 854 | result = 0; |
789 | 855 | ||
790 | out: | 856 | out: |
791 | return result; | 857 | return result; |
@@ -799,7 +865,6 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc | |||
799 | int error, res, len; | 865 | int error, res, len; |
800 | __u8 __name[NCP_MAXPATHLEN + 1]; | 866 | __u8 __name[NCP_MAXPATHLEN + 1]; |
801 | 867 | ||
802 | lock_kernel(); | ||
803 | error = -EIO; | 868 | error = -EIO; |
804 | if (!ncp_conn_valid(server)) | 869 | if (!ncp_conn_valid(server)) |
805 | goto finished; | 870 | goto finished; |
@@ -813,6 +878,8 @@ static struct dentry *ncp_lookup(struct inode *dir, struct dentry *dentry, struc | |||
813 | dentry->d_name.len, 1); | 878 | dentry->d_name.len, 1); |
814 | if (!res) | 879 | if (!res) |
815 | res = ncp_lookup_volume(server, __name, &(finfo.i)); | 880 | res = ncp_lookup_volume(server, __name, &(finfo.i)); |
881 | if (!res) | ||
882 | ncp_update_known_namespace(server, finfo.i.volNumber, NULL); | ||
816 | } else { | 883 | } else { |
817 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, | 884 | res = ncp_io2vol(server, __name, &len, dentry->d_name.name, |
818 | dentry->d_name.len, !ncp_preserve_case(dir)); | 885 | dentry->d_name.len, !ncp_preserve_case(dir)); |
@@ -846,7 +913,6 @@ add_entry: | |||
846 | 913 | ||
847 | finished: | 914 | finished: |
848 | PPRINTK("ncp_lookup: result=%d\n", error); | 915 | PPRINTK("ncp_lookup: result=%d\n", error); |
849 | unlock_kernel(); | ||
850 | return ERR_PTR(error); | 916 | return ERR_PTR(error); |
851 | } | 917 | } |
852 | 918 | ||
@@ -887,11 +953,6 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode, | |||
887 | PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n", | 953 | PPRINTK("ncp_create_new: creating %s/%s, mode=%x\n", |
888 | dentry->d_parent->d_name.name, dentry->d_name.name, mode); | 954 | dentry->d_parent->d_name.name, dentry->d_name.name, mode); |
889 | 955 | ||
890 | error = -EIO; | ||
891 | lock_kernel(); | ||
892 | if (!ncp_conn_valid(server)) | ||
893 | goto out; | ||
894 | |||
895 | ncp_age_dentry(server, dentry); | 956 | ncp_age_dentry(server, dentry); |
896 | len = sizeof(__name); | 957 | len = sizeof(__name); |
897 | error = ncp_io2vol(server, __name, &len, dentry->d_name.name, | 958 | error = ncp_io2vol(server, __name, &len, dentry->d_name.name, |
@@ -917,6 +978,8 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode, | |||
917 | if (result) { | 978 | if (result) { |
918 | if (result == 0x87) | 979 | if (result == 0x87) |
919 | error = -ENAMETOOLONG; | 980 | error = -ENAMETOOLONG; |
981 | else if (result < 0) | ||
982 | error = result; | ||
920 | DPRINTK("ncp_create: %s/%s failed\n", | 983 | DPRINTK("ncp_create: %s/%s failed\n", |
921 | dentry->d_parent->d_name.name, dentry->d_name.name); | 984 | dentry->d_parent->d_name.name, dentry->d_name.name); |
922 | goto out; | 985 | goto out; |
@@ -935,7 +998,6 @@ int ncp_create_new(struct inode *dir, struct dentry *dentry, int mode, | |||
935 | 998 | ||
936 | error = ncp_instantiate(dir, dentry, &finfo); | 999 | error = ncp_instantiate(dir, dentry, &finfo); |
937 | out: | 1000 | out: |
938 | unlock_kernel(); | ||
939 | return error; | 1001 | return error; |
940 | } | 1002 | } |
941 | 1003 | ||
@@ -955,11 +1017,6 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
955 | DPRINTK("ncp_mkdir: making %s/%s\n", | 1017 | DPRINTK("ncp_mkdir: making %s/%s\n", |
956 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1018 | dentry->d_parent->d_name.name, dentry->d_name.name); |
957 | 1019 | ||
958 | error = -EIO; | ||
959 | lock_kernel(); | ||
960 | if (!ncp_conn_valid(server)) | ||
961 | goto out; | ||
962 | |||
963 | ncp_age_dentry(server, dentry); | 1020 | ncp_age_dentry(server, dentry); |
964 | len = sizeof(__name); | 1021 | len = sizeof(__name); |
965 | error = ncp_io2vol(server, __name, &len, dentry->d_name.name, | 1022 | error = ncp_io2vol(server, __name, &len, dentry->d_name.name, |
@@ -967,12 +1024,11 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
967 | if (error) | 1024 | if (error) |
968 | goto out; | 1025 | goto out; |
969 | 1026 | ||
970 | error = -EACCES; | 1027 | error = ncp_open_create_file_or_subdir(server, dir, __name, |
971 | if (ncp_open_create_file_or_subdir(server, dir, __name, | ||
972 | OC_MODE_CREATE, aDIR, | 1028 | OC_MODE_CREATE, aDIR, |
973 | cpu_to_le16(0xffff), | 1029 | cpu_to_le16(0xffff), |
974 | &finfo) == 0) | 1030 | &finfo); |
975 | { | 1031 | if (error == 0) { |
976 | if (ncp_is_nfs_extras(server, finfo.volume)) { | 1032 | if (ncp_is_nfs_extras(server, finfo.volume)) { |
977 | mode |= S_IFDIR; | 1033 | mode |= S_IFDIR; |
978 | finfo.i.nfs.mode = mode; | 1034 | finfo.i.nfs.mode = mode; |
@@ -983,9 +1039,10 @@ static int ncp_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
983 | goto out; | 1039 | goto out; |
984 | } | 1040 | } |
985 | error = ncp_instantiate(dir, dentry, &finfo); | 1041 | error = ncp_instantiate(dir, dentry, &finfo); |
1042 | } else if (error > 0) { | ||
1043 | error = -EACCES; | ||
986 | } | 1044 | } |
987 | out: | 1045 | out: |
988 | unlock_kernel(); | ||
989 | return error; | 1046 | return error; |
990 | } | 1047 | } |
991 | 1048 | ||
@@ -998,11 +1055,6 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) | |||
998 | DPRINTK("ncp_rmdir: removing %s/%s\n", | 1055 | DPRINTK("ncp_rmdir: removing %s/%s\n", |
999 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1056 | dentry->d_parent->d_name.name, dentry->d_name.name); |
1000 | 1057 | ||
1001 | error = -EIO; | ||
1002 | lock_kernel(); | ||
1003 | if (!ncp_conn_valid(server)) | ||
1004 | goto out; | ||
1005 | |||
1006 | error = -EBUSY; | 1058 | error = -EBUSY; |
1007 | if (!d_unhashed(dentry)) | 1059 | if (!d_unhashed(dentry)) |
1008 | goto out; | 1060 | goto out; |
@@ -1036,11 +1088,10 @@ static int ncp_rmdir(struct inode *dir, struct dentry *dentry) | |||
1036 | error = -ENOENT; | 1088 | error = -ENOENT; |
1037 | break; | 1089 | break; |
1038 | default: | 1090 | default: |
1039 | error = -EACCES; | 1091 | error = result < 0 ? result : -EACCES; |
1040 | break; | 1092 | break; |
1041 | } | 1093 | } |
1042 | out: | 1094 | out: |
1043 | unlock_kernel(); | ||
1044 | return error; | 1095 | return error; |
1045 | } | 1096 | } |
1046 | 1097 | ||
@@ -1050,15 +1101,10 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) | |||
1050 | struct ncp_server *server; | 1101 | struct ncp_server *server; |
1051 | int error; | 1102 | int error; |
1052 | 1103 | ||
1053 | lock_kernel(); | ||
1054 | server = NCP_SERVER(dir); | 1104 | server = NCP_SERVER(dir); |
1055 | DPRINTK("ncp_unlink: unlinking %s/%s\n", | 1105 | DPRINTK("ncp_unlink: unlinking %s/%s\n", |
1056 | dentry->d_parent->d_name.name, dentry->d_name.name); | 1106 | dentry->d_parent->d_name.name, dentry->d_name.name); |
1057 | 1107 | ||
1058 | error = -EIO; | ||
1059 | if (!ncp_conn_valid(server)) | ||
1060 | goto out; | ||
1061 | |||
1062 | /* | 1108 | /* |
1063 | * Check whether to close the file ... | 1109 | * Check whether to close the file ... |
1064 | */ | 1110 | */ |
@@ -1097,12 +1143,9 @@ static int ncp_unlink(struct inode *dir, struct dentry *dentry) | |||
1097 | error = -ENOENT; | 1143 | error = -ENOENT; |
1098 | break; | 1144 | break; |
1099 | default: | 1145 | default: |
1100 | error = -EACCES; | 1146 | error = error < 0 ? error : -EACCES; |
1101 | break; | 1147 | break; |
1102 | } | 1148 | } |
1103 | |||
1104 | out: | ||
1105 | unlock_kernel(); | ||
1106 | return error; | 1149 | return error; |
1107 | } | 1150 | } |
1108 | 1151 | ||
@@ -1118,11 +1161,6 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1118 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, | 1161 | old_dentry->d_parent->d_name.name, old_dentry->d_name.name, |
1119 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); | 1162 | new_dentry->d_parent->d_name.name, new_dentry->d_name.name); |
1120 | 1163 | ||
1121 | error = -EIO; | ||
1122 | lock_kernel(); | ||
1123 | if (!ncp_conn_valid(server)) | ||
1124 | goto out; | ||
1125 | |||
1126 | ncp_age_dentry(server, old_dentry); | 1164 | ncp_age_dentry(server, old_dentry); |
1127 | ncp_age_dentry(server, new_dentry); | 1165 | ncp_age_dentry(server, new_dentry); |
1128 | 1166 | ||
@@ -1161,11 +1199,10 @@ static int ncp_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1161 | error = -ENOENT; | 1199 | error = -ENOENT; |
1162 | break; | 1200 | break; |
1163 | default: | 1201 | default: |
1164 | error = -EACCES; | 1202 | error = error < 0 ? error : -EACCES; |
1165 | break; | 1203 | break; |
1166 | } | 1204 | } |
1167 | out: | 1205 | out: |
1168 | unlock_kernel(); | ||
1169 | return error; | 1206 | return error; |
1170 | } | 1207 | } |
1171 | 1208 | ||
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 3639cc5cbdae..6c754f70c529 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c | |||
@@ -113,9 +113,6 @@ ncp_file_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) | |||
113 | DPRINTK("ncp_file_read: enter %s/%s\n", | 113 | DPRINTK("ncp_file_read: enter %s/%s\n", |
114 | dentry->d_parent->d_name.name, dentry->d_name.name); | 114 | dentry->d_parent->d_name.name, dentry->d_name.name); |
115 | 115 | ||
116 | if (!ncp_conn_valid(NCP_SERVER(inode))) | ||
117 | return -EIO; | ||
118 | |||
119 | pos = *ppos; | 116 | pos = *ppos; |
120 | 117 | ||
121 | if ((ssize_t) count < 0) { | 118 | if ((ssize_t) count < 0) { |
@@ -192,13 +189,11 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
192 | 189 | ||
193 | DPRINTK("ncp_file_write: enter %s/%s\n", | 190 | DPRINTK("ncp_file_write: enter %s/%s\n", |
194 | dentry->d_parent->d_name.name, dentry->d_name.name); | 191 | dentry->d_parent->d_name.name, dentry->d_name.name); |
195 | if (!ncp_conn_valid(NCP_SERVER(inode))) | ||
196 | return -EIO; | ||
197 | if ((ssize_t) count < 0) | 192 | if ((ssize_t) count < 0) |
198 | return -EINVAL; | 193 | return -EINVAL; |
199 | pos = *ppos; | 194 | pos = *ppos; |
200 | if (file->f_flags & O_APPEND) { | 195 | if (file->f_flags & O_APPEND) { |
201 | pos = inode->i_size; | 196 | pos = i_size_read(inode); |
202 | } | 197 | } |
203 | 198 | ||
204 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { | 199 | if (pos + count > MAX_NON_LFS && !(file->f_flags&O_LARGEFILE)) { |
@@ -264,8 +259,11 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t * | |||
264 | 259 | ||
265 | *ppos = pos; | 260 | *ppos = pos; |
266 | 261 | ||
267 | if (pos > inode->i_size) { | 262 | if (pos > i_size_read(inode)) { |
268 | inode->i_size = pos; | 263 | mutex_lock(&inode->i_mutex); |
264 | if (pos > i_size_read(inode)) | ||
265 | i_size_write(inode, pos); | ||
266 | mutex_unlock(&inode->i_mutex); | ||
269 | } | 267 | } |
270 | DPRINTK("ncp_file_write: exit %s/%s\n", | 268 | DPRINTK("ncp_file_write: exit %s/%s\n", |
271 | dentry->d_parent->d_name.name, dentry->d_name.name); | 269 | dentry->d_parent->d_name.name, dentry->d_name.name); |
@@ -281,18 +279,9 @@ static int ncp_release(struct inode *inode, struct file *file) { | |||
281 | return 0; | 279 | return 0; |
282 | } | 280 | } |
283 | 281 | ||
284 | static loff_t ncp_remote_llseek(struct file *file, loff_t offset, int origin) | ||
285 | { | ||
286 | loff_t ret; | ||
287 | lock_kernel(); | ||
288 | ret = generic_file_llseek_unlocked(file, offset, origin); | ||
289 | unlock_kernel(); | ||
290 | return ret; | ||
291 | } | ||
292 | |||
293 | const struct file_operations ncp_file_operations = | 282 | const struct file_operations ncp_file_operations = |
294 | { | 283 | { |
295 | .llseek = ncp_remote_llseek, | 284 | .llseek = generic_file_llseek, |
296 | .read = ncp_file_read, | 285 | .read = ncp_file_read, |
297 | .write = ncp_file_write, | 286 | .write = ncp_file_write, |
298 | .unlocked_ioctl = ncp_ioctl, | 287 | .unlocked_ioctl = ncp_ioctl, |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index b4de38cf49f5..985fabb26aca 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -139,7 +139,7 @@ static void ncp_update_dates(struct inode *inode, struct nw_info_struct *nwi) | |||
139 | inode->i_mode = nwi->nfs.mode; | 139 | inode->i_mode = nwi->nfs.mode; |
140 | } | 140 | } |
141 | 141 | ||
142 | inode->i_blocks = (inode->i_size + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT; | 142 | inode->i_blocks = (i_size_read(inode) + NCP_BLOCK_SIZE - 1) >> NCP_BLOCK_SHIFT; |
143 | 143 | ||
144 | inode->i_mtime.tv_sec = ncp_date_dos2unix(nwi->modifyTime, nwi->modifyDate); | 144 | inode->i_mtime.tv_sec = ncp_date_dos2unix(nwi->modifyTime, nwi->modifyDate); |
145 | inode->i_ctime.tv_sec = ncp_date_dos2unix(nwi->creationTime, nwi->creationDate); | 145 | inode->i_ctime.tv_sec = ncp_date_dos2unix(nwi->creationTime, nwi->creationDate); |
@@ -158,18 +158,21 @@ static void ncp_update_attrs(struct inode *inode, struct ncp_entry_info *nwinfo) | |||
158 | inode->i_mode = server->m.dir_mode; | 158 | inode->i_mode = server->m.dir_mode; |
159 | /* for directories dataStreamSize seems to be some | 159 | /* for directories dataStreamSize seems to be some |
160 | Object ID ??? */ | 160 | Object ID ??? */ |
161 | inode->i_size = NCP_BLOCK_SIZE; | 161 | i_size_write(inode, NCP_BLOCK_SIZE); |
162 | } else { | 162 | } else { |
163 | u32 size; | ||
164 | |||
163 | inode->i_mode = server->m.file_mode; | 165 | inode->i_mode = server->m.file_mode; |
164 | inode->i_size = le32_to_cpu(nwi->dataStreamSize); | 166 | size = le32_to_cpu(nwi->dataStreamSize); |
167 | i_size_write(inode, size); | ||
165 | #ifdef CONFIG_NCPFS_EXTRAS | 168 | #ifdef CONFIG_NCPFS_EXTRAS |
166 | if ((server->m.flags & (NCP_MOUNT_EXTRAS|NCP_MOUNT_SYMLINKS)) | 169 | if ((server->m.flags & (NCP_MOUNT_EXTRAS|NCP_MOUNT_SYMLINKS)) |
167 | && (nwi->attributes & aSHARED)) { | 170 | && (nwi->attributes & aSHARED)) { |
168 | switch (nwi->attributes & (aHIDDEN|aSYSTEM)) { | 171 | switch (nwi->attributes & (aHIDDEN|aSYSTEM)) { |
169 | case aHIDDEN: | 172 | case aHIDDEN: |
170 | if (server->m.flags & NCP_MOUNT_SYMLINKS) { | 173 | if (server->m.flags & NCP_MOUNT_SYMLINKS) { |
171 | if (/* (inode->i_size >= NCP_MIN_SYMLINK_SIZE) | 174 | if (/* (size >= NCP_MIN_SYMLINK_SIZE) |
172 | && */ (inode->i_size <= NCP_MAX_SYMLINK_SIZE)) { | 175 | && */ (size <= NCP_MAX_SYMLINK_SIZE)) { |
173 | inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFLNK; | 176 | inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFLNK; |
174 | NCP_FINFO(inode)->flags |= NCPI_KLUDGE_SYMLINK; | 177 | NCP_FINFO(inode)->flags |= NCPI_KLUDGE_SYMLINK; |
175 | break; | 178 | break; |
@@ -208,7 +211,7 @@ void ncp_update_inode2(struct inode* inode, struct ncp_entry_info *nwinfo) | |||
208 | } | 211 | } |
209 | 212 | ||
210 | /* | 213 | /* |
211 | * Fill in the inode based on the ncp_entry_info structure. | 214 | * Fill in the inode based on the ncp_entry_info structure. Used only for brand new inodes. |
212 | */ | 215 | */ |
213 | static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo) | 216 | static void ncp_set_attr(struct inode *inode, struct ncp_entry_info *nwinfo) |
214 | { | 217 | { |
@@ -254,6 +257,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
254 | if (inode) { | 257 | if (inode) { |
255 | atomic_set(&NCP_FINFO(inode)->opened, info->opened); | 258 | atomic_set(&NCP_FINFO(inode)->opened, info->opened); |
256 | 259 | ||
260 | inode->i_mapping->backing_dev_info = sb->s_bdi; | ||
257 | inode->i_ino = info->ino; | 261 | inode->i_ino = info->ino; |
258 | ncp_set_attr(inode, info); | 262 | ncp_set_attr(inode, info); |
259 | if (S_ISREG(inode->i_mode)) { | 263 | if (S_ISREG(inode->i_mode)) { |
@@ -299,10 +303,12 @@ ncp_evict_inode(struct inode *inode) | |||
299 | 303 | ||
300 | static void ncp_stop_tasks(struct ncp_server *server) { | 304 | static void ncp_stop_tasks(struct ncp_server *server) { |
301 | struct sock* sk = server->ncp_sock->sk; | 305 | struct sock* sk = server->ncp_sock->sk; |
302 | 306 | ||
307 | lock_sock(sk); | ||
303 | sk->sk_error_report = server->error_report; | 308 | sk->sk_error_report = server->error_report; |
304 | sk->sk_data_ready = server->data_ready; | 309 | sk->sk_data_ready = server->data_ready; |
305 | sk->sk_write_space = server->write_space; | 310 | sk->sk_write_space = server->write_space; |
311 | release_sock(sk); | ||
306 | del_timer_sync(&server->timeout_tm); | 312 | del_timer_sync(&server->timeout_tm); |
307 | flush_scheduled_work(); | 313 | flush_scheduled_work(); |
308 | } | 314 | } |
@@ -565,10 +571,12 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
565 | /* server->conn_status = 0; */ | 571 | /* server->conn_status = 0; */ |
566 | /* server->root_dentry = NULL; */ | 572 | /* server->root_dentry = NULL; */ |
567 | /* server->root_setuped = 0; */ | 573 | /* server->root_setuped = 0; */ |
574 | mutex_init(&server->root_setup_lock); | ||
568 | #ifdef CONFIG_NCPFS_PACKET_SIGNING | 575 | #ifdef CONFIG_NCPFS_PACKET_SIGNING |
569 | /* server->sign_wanted = 0; */ | 576 | /* server->sign_wanted = 0; */ |
570 | /* server->sign_active = 0; */ | 577 | /* server->sign_active = 0; */ |
571 | #endif | 578 | #endif |
579 | init_rwsem(&server->auth_rwsem); | ||
572 | server->auth.auth_type = NCP_AUTH_NONE; | 580 | server->auth.auth_type = NCP_AUTH_NONE; |
573 | /* server->auth.object_name_len = 0; */ | 581 | /* server->auth.object_name_len = 0; */ |
574 | /* server->auth.object_name = NULL; */ | 582 | /* server->auth.object_name = NULL; */ |
@@ -593,16 +601,12 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
593 | server->nls_io = load_nls_default(); | 601 | server->nls_io = load_nls_default(); |
594 | #endif /* CONFIG_NCPFS_NLS */ | 602 | #endif /* CONFIG_NCPFS_NLS */ |
595 | 603 | ||
596 | server->dentry_ttl = 0; /* no caching */ | 604 | atomic_set(&server->dentry_ttl, 0); /* no caching */ |
597 | 605 | ||
598 | INIT_LIST_HEAD(&server->tx.requests); | 606 | INIT_LIST_HEAD(&server->tx.requests); |
599 | mutex_init(&server->rcv.creq_mutex); | 607 | mutex_init(&server->rcv.creq_mutex); |
600 | server->tx.creq = NULL; | 608 | server->tx.creq = NULL; |
601 | server->rcv.creq = NULL; | 609 | server->rcv.creq = NULL; |
602 | server->data_ready = sock->sk->sk_data_ready; | ||
603 | server->write_space = sock->sk->sk_write_space; | ||
604 | server->error_report = sock->sk->sk_error_report; | ||
605 | sock->sk->sk_user_data = server; | ||
606 | 610 | ||
607 | init_timer(&server->timeout_tm); | 611 | init_timer(&server->timeout_tm); |
608 | #undef NCP_PACKET_SIZE | 612 | #undef NCP_PACKET_SIZE |
@@ -619,6 +623,11 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
619 | if (server->rxbuf == NULL) | 623 | if (server->rxbuf == NULL) |
620 | goto out_txbuf; | 624 | goto out_txbuf; |
621 | 625 | ||
626 | lock_sock(sock->sk); | ||
627 | server->data_ready = sock->sk->sk_data_ready; | ||
628 | server->write_space = sock->sk->sk_write_space; | ||
629 | server->error_report = sock->sk->sk_error_report; | ||
630 | sock->sk->sk_user_data = server; | ||
622 | sock->sk->sk_data_ready = ncp_tcp_data_ready; | 631 | sock->sk->sk_data_ready = ncp_tcp_data_ready; |
623 | sock->sk->sk_error_report = ncp_tcp_error_report; | 632 | sock->sk->sk_error_report = ncp_tcp_error_report; |
624 | if (sock->type == SOCK_STREAM) { | 633 | if (sock->type == SOCK_STREAM) { |
@@ -634,6 +643,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
634 | server->timeout_tm.data = (unsigned long)server; | 643 | server->timeout_tm.data = (unsigned long)server; |
635 | server->timeout_tm.function = ncpdgram_timeout_call; | 644 | server->timeout_tm.function = ncpdgram_timeout_call; |
636 | } | 645 | } |
646 | release_sock(sock->sk); | ||
637 | 647 | ||
638 | ncp_lock_server(server); | 648 | ncp_lock_server(server); |
639 | error = ncp_connect(server); | 649 | error = ncp_connect(server); |
@@ -658,8 +668,10 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
658 | goto out_disconnect; | 668 | goto out_disconnect; |
659 | } | 669 | } |
660 | } | 670 | } |
671 | ncp_lock_server(server); | ||
661 | if (options & 2) | 672 | if (options & 2) |
662 | server->sign_wanted = 1; | 673 | server->sign_wanted = 1; |
674 | ncp_unlock_server(server); | ||
663 | } | 675 | } |
664 | else | 676 | else |
665 | #endif /* CONFIG_NCPFS_PACKET_SIGNING */ | 677 | #endif /* CONFIG_NCPFS_PACKET_SIGNING */ |
@@ -720,6 +732,9 @@ out_nls: | |||
720 | unload_nls(server->nls_io); | 732 | unload_nls(server->nls_io); |
721 | unload_nls(server->nls_vol); | 733 | unload_nls(server->nls_vol); |
722 | #endif | 734 | #endif |
735 | mutex_destroy(&server->rcv.creq_mutex); | ||
736 | mutex_destroy(&server->root_setup_lock); | ||
737 | mutex_destroy(&server->mutex); | ||
723 | out_fput2: | 738 | out_fput2: |
724 | if (server->info_filp) | 739 | if (server->info_filp) |
725 | fput(server->info_filp); | 740 | fput(server->info_filp); |
@@ -743,8 +758,6 @@ static void ncp_put_super(struct super_block *sb) | |||
743 | { | 758 | { |
744 | struct ncp_server *server = NCP_SBP(sb); | 759 | struct ncp_server *server = NCP_SBP(sb); |
745 | 760 | ||
746 | lock_kernel(); | ||
747 | |||
748 | ncp_lock_server(server); | 761 | ncp_lock_server(server); |
749 | ncp_disconnect(server); | 762 | ncp_disconnect(server); |
750 | ncp_unlock_server(server); | 763 | ncp_unlock_server(server); |
@@ -756,6 +769,9 @@ static void ncp_put_super(struct super_block *sb) | |||
756 | unload_nls(server->nls_vol); | 769 | unload_nls(server->nls_vol); |
757 | unload_nls(server->nls_io); | 770 | unload_nls(server->nls_io); |
758 | #endif /* CONFIG_NCPFS_NLS */ | 771 | #endif /* CONFIG_NCPFS_NLS */ |
772 | mutex_destroy(&server->rcv.creq_mutex); | ||
773 | mutex_destroy(&server->root_setup_lock); | ||
774 | mutex_destroy(&server->mutex); | ||
759 | 775 | ||
760 | if (server->info_filp) | 776 | if (server->info_filp) |
761 | fput(server->info_filp); | 777 | fput(server->info_filp); |
@@ -771,8 +787,6 @@ static void ncp_put_super(struct super_block *sb) | |||
771 | vfree(server->packet); | 787 | vfree(server->packet); |
772 | sb->s_fs_info = NULL; | 788 | sb->s_fs_info = NULL; |
773 | kfree(server); | 789 | kfree(server); |
774 | |||
775 | unlock_kernel(); | ||
776 | } | 790 | } |
777 | 791 | ||
778 | static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) | 792 | static int ncp_statfs(struct dentry *dentry, struct kstatfs *buf) |
@@ -851,10 +865,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
851 | 865 | ||
852 | result = -EIO; | 866 | result = -EIO; |
853 | 867 | ||
854 | lock_kernel(); | ||
855 | |||
856 | server = NCP_SERVER(inode); | 868 | server = NCP_SERVER(inode); |
857 | if ((!server) || !ncp_conn_valid(server)) | 869 | if (!server) /* How this could happen? */ |
858 | goto out; | 870 | goto out; |
859 | 871 | ||
860 | /* ageing the dentry to force validation */ | 872 | /* ageing the dentry to force validation */ |
@@ -981,8 +993,6 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
981 | result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode), | 993 | result = ncp_modify_file_or_subdir_dos_info(NCP_SERVER(inode), |
982 | inode, info_mask, &info); | 994 | inode, info_mask, &info); |
983 | if (result != 0) { | 995 | if (result != 0) { |
984 | result = -EACCES; | ||
985 | |||
986 | if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) { | 996 | if (info_mask == (DM_CREATE_TIME | DM_CREATE_DATE)) { |
987 | /* NetWare seems not to allow this. I | 997 | /* NetWare seems not to allow this. I |
988 | do not know why. So, just tell the | 998 | do not know why. So, just tell the |
@@ -1005,7 +1015,8 @@ int ncp_notify_change(struct dentry *dentry, struct iattr *attr) | |||
1005 | mark_inode_dirty(inode); | 1015 | mark_inode_dirty(inode); |
1006 | 1016 | ||
1007 | out: | 1017 | out: |
1008 | unlock_kernel(); | 1018 | if (result > 0) |
1019 | result = -EACCES; | ||
1009 | return result; | 1020 | return result; |
1010 | } | 1021 | } |
1011 | 1022 | ||
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 84a8cfc4e38e..c2a1f9a155c3 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c | |||
@@ -35,16 +35,11 @@ | |||
35 | #define NCP_PACKET_SIZE_INTERNAL 65536 | 35 | #define NCP_PACKET_SIZE_INTERNAL 65536 |
36 | 36 | ||
37 | static int | 37 | static int |
38 | ncp_get_fs_info(struct ncp_server * server, struct file *file, | 38 | ncp_get_fs_info(struct ncp_server * server, struct inode *inode, |
39 | struct ncp_fs_info __user *arg) | 39 | struct ncp_fs_info __user *arg) |
40 | { | 40 | { |
41 | struct inode *inode = file->f_path.dentry->d_inode; | ||
42 | struct ncp_fs_info info; | 41 | struct ncp_fs_info info; |
43 | 42 | ||
44 | if (file_permission(file, MAY_WRITE) != 0 | ||
45 | && current_uid() != server->m.mounted_uid) | ||
46 | return -EACCES; | ||
47 | |||
48 | if (copy_from_user(&info, arg, sizeof(info))) | 43 | if (copy_from_user(&info, arg, sizeof(info))) |
49 | return -EFAULT; | 44 | return -EFAULT; |
50 | 45 | ||
@@ -65,16 +60,11 @@ ncp_get_fs_info(struct ncp_server * server, struct file *file, | |||
65 | } | 60 | } |
66 | 61 | ||
67 | static int | 62 | static int |
68 | ncp_get_fs_info_v2(struct ncp_server * server, struct file *file, | 63 | ncp_get_fs_info_v2(struct ncp_server * server, struct inode *inode, |
69 | struct ncp_fs_info_v2 __user * arg) | 64 | struct ncp_fs_info_v2 __user * arg) |
70 | { | 65 | { |
71 | struct inode *inode = file->f_path.dentry->d_inode; | ||
72 | struct ncp_fs_info_v2 info2; | 66 | struct ncp_fs_info_v2 info2; |
73 | 67 | ||
74 | if (file_permission(file, MAY_WRITE) != 0 | ||
75 | && current_uid() != server->m.mounted_uid) | ||
76 | return -EACCES; | ||
77 | |||
78 | if (copy_from_user(&info2, arg, sizeof(info2))) | 68 | if (copy_from_user(&info2, arg, sizeof(info2))) |
79 | return -EFAULT; | 69 | return -EFAULT; |
80 | 70 | ||
@@ -136,16 +126,11 @@ struct compat_ncp_privatedata_ioctl | |||
136 | #define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct compat_ncp_privatedata_ioctl) | 126 | #define NCP_IOC_SETPRIVATEDATA_32 _IOR('n', 10, struct compat_ncp_privatedata_ioctl) |
137 | 127 | ||
138 | static int | 128 | static int |
139 | ncp_get_compat_fs_info_v2(struct ncp_server * server, struct file *file, | 129 | ncp_get_compat_fs_info_v2(struct ncp_server * server, struct inode *inode, |
140 | struct compat_ncp_fs_info_v2 __user * arg) | 130 | struct compat_ncp_fs_info_v2 __user * arg) |
141 | { | 131 | { |
142 | struct inode *inode = file->f_path.dentry->d_inode; | ||
143 | struct compat_ncp_fs_info_v2 info2; | 132 | struct compat_ncp_fs_info_v2 info2; |
144 | 133 | ||
145 | if (file_permission(file, MAY_WRITE) != 0 | ||
146 | && current_uid() != server->m.mounted_uid) | ||
147 | return -EACCES; | ||
148 | |||
149 | if (copy_from_user(&info2, arg, sizeof(info2))) | 134 | if (copy_from_user(&info2, arg, sizeof(info2))) |
150 | return -EFAULT; | 135 | return -EFAULT; |
151 | 136 | ||
@@ -182,11 +167,8 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
182 | struct nls_table *iocharset; | 167 | struct nls_table *iocharset; |
183 | struct nls_table *oldset_io; | 168 | struct nls_table *oldset_io; |
184 | struct nls_table *oldset_cp; | 169 | struct nls_table *oldset_cp; |
185 | 170 | int utf8; | |
186 | if (!capable(CAP_SYS_ADMIN)) | 171 | int err; |
187 | return -EACCES; | ||
188 | if (server->root_setuped) | ||
189 | return -EBUSY; | ||
190 | 172 | ||
191 | if (copy_from_user(&user, arg, sizeof(user))) | 173 | if (copy_from_user(&user, arg, sizeof(user))) |
192 | return -EFAULT; | 174 | return -EFAULT; |
@@ -206,28 +188,40 @@ ncp_set_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
206 | user.iocharset[NCP_IOCSNAME_LEN] = 0; | 188 | user.iocharset[NCP_IOCSNAME_LEN] = 0; |
207 | if (!user.iocharset[0] || !strcmp(user.iocharset, "default")) { | 189 | if (!user.iocharset[0] || !strcmp(user.iocharset, "default")) { |
208 | iocharset = load_nls_default(); | 190 | iocharset = load_nls_default(); |
209 | NCP_CLR_FLAG(server, NCP_FLAG_UTF8); | 191 | utf8 = 0; |
210 | } else if (!strcmp(user.iocharset, "utf8")) { | 192 | } else if (!strcmp(user.iocharset, "utf8")) { |
211 | iocharset = load_nls_default(); | 193 | iocharset = load_nls_default(); |
212 | NCP_SET_FLAG(server, NCP_FLAG_UTF8); | 194 | utf8 = 1; |
213 | } else { | 195 | } else { |
214 | iocharset = load_nls(user.iocharset); | 196 | iocharset = load_nls(user.iocharset); |
215 | if (!iocharset) { | 197 | if (!iocharset) { |
216 | unload_nls(codepage); | 198 | unload_nls(codepage); |
217 | return -EBADRQC; | 199 | return -EBADRQC; |
218 | } | 200 | } |
219 | NCP_CLR_FLAG(server, NCP_FLAG_UTF8); | 201 | utf8 = 0; |
220 | } | 202 | } |
221 | 203 | ||
222 | oldset_cp = server->nls_vol; | 204 | mutex_lock(&server->root_setup_lock); |
223 | server->nls_vol = codepage; | 205 | if (server->root_setuped) { |
224 | oldset_io = server->nls_io; | 206 | oldset_cp = codepage; |
225 | server->nls_io = iocharset; | 207 | oldset_io = iocharset; |
226 | 208 | err = -EBUSY; | |
209 | } else { | ||
210 | if (utf8) | ||
211 | NCP_SET_FLAG(server, NCP_FLAG_UTF8); | ||
212 | else | ||
213 | NCP_CLR_FLAG(server, NCP_FLAG_UTF8); | ||
214 | oldset_cp = server->nls_vol; | ||
215 | server->nls_vol = codepage; | ||
216 | oldset_io = server->nls_io; | ||
217 | server->nls_io = iocharset; | ||
218 | err = 0; | ||
219 | } | ||
220 | mutex_unlock(&server->root_setup_lock); | ||
227 | unload_nls(oldset_cp); | 221 | unload_nls(oldset_cp); |
228 | unload_nls(oldset_io); | 222 | unload_nls(oldset_io); |
229 | 223 | ||
230 | return 0; | 224 | return err; |
231 | } | 225 | } |
232 | 226 | ||
233 | static int | 227 | static int |
@@ -237,6 +231,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
237 | int len; | 231 | int len; |
238 | 232 | ||
239 | memset(&user, 0, sizeof(user)); | 233 | memset(&user, 0, sizeof(user)); |
234 | mutex_lock(&server->root_setup_lock); | ||
240 | if (server->nls_vol && server->nls_vol->charset) { | 235 | if (server->nls_vol && server->nls_vol->charset) { |
241 | len = strlen(server->nls_vol->charset); | 236 | len = strlen(server->nls_vol->charset); |
242 | if (len > NCP_IOCSNAME_LEN) | 237 | if (len > NCP_IOCSNAME_LEN) |
@@ -254,6 +249,7 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
254 | strncpy(user.iocharset, server->nls_io->charset, len); | 249 | strncpy(user.iocharset, server->nls_io->charset, len); |
255 | user.iocharset[len] = 0; | 250 | user.iocharset[len] = 0; |
256 | } | 251 | } |
252 | mutex_unlock(&server->root_setup_lock); | ||
257 | 253 | ||
258 | if (copy_to_user(arg, &user, sizeof(user))) | 254 | if (copy_to_user(arg, &user, sizeof(user))) |
259 | return -EFAULT; | 255 | return -EFAULT; |
@@ -261,25 +257,19 @@ ncp_get_charsets(struct ncp_server* server, struct ncp_nls_ioctl __user *arg) | |||
261 | } | 257 | } |
262 | #endif /* CONFIG_NCPFS_NLS */ | 258 | #endif /* CONFIG_NCPFS_NLS */ |
263 | 259 | ||
264 | static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 260 | static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg) |
265 | { | 261 | { |
266 | struct inode *inode = filp->f_dentry->d_inode; | ||
267 | struct ncp_server *server = NCP_SERVER(inode); | 262 | struct ncp_server *server = NCP_SERVER(inode); |
268 | int result; | 263 | int result; |
269 | struct ncp_ioctl_request request; | 264 | struct ncp_ioctl_request request; |
270 | char* bouncebuffer; | 265 | char* bouncebuffer; |
271 | void __user *argp = (void __user *)arg; | 266 | void __user *argp = (void __user *)arg; |
272 | uid_t uid = current_uid(); | ||
273 | 267 | ||
274 | switch (cmd) { | 268 | switch (cmd) { |
275 | #ifdef CONFIG_COMPAT | 269 | #ifdef CONFIG_COMPAT |
276 | case NCP_IOC_NCPREQUEST_32: | 270 | case NCP_IOC_NCPREQUEST_32: |
277 | #endif | 271 | #endif |
278 | case NCP_IOC_NCPREQUEST: | 272 | case NCP_IOC_NCPREQUEST: |
279 | if (file_permission(filp, MAY_WRITE) != 0 | ||
280 | && uid != server->m.mounted_uid) | ||
281 | return -EACCES; | ||
282 | |||
283 | #ifdef CONFIG_COMPAT | 273 | #ifdef CONFIG_COMPAT |
284 | if (cmd == NCP_IOC_NCPREQUEST_32) { | 274 | if (cmd == NCP_IOC_NCPREQUEST_32) { |
285 | struct compat_ncp_ioctl_request request32; | 275 | struct compat_ncp_ioctl_request request32; |
@@ -314,7 +304,7 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
314 | server->current_size = request.size; | 304 | server->current_size = request.size; |
315 | memcpy(server->packet, bouncebuffer, request.size); | 305 | memcpy(server->packet, bouncebuffer, request.size); |
316 | 306 | ||
317 | result = ncp_request2(server, request.function, | 307 | result = ncp_request2(server, request.function, |
318 | bouncebuffer, NCP_PACKET_SIZE_INTERNAL); | 308 | bouncebuffer, NCP_PACKET_SIZE_INTERNAL); |
319 | if (result < 0) | 309 | if (result < 0) |
320 | result = -EIO; | 310 | result = -EIO; |
@@ -331,69 +321,69 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
331 | 321 | ||
332 | case NCP_IOC_CONN_LOGGED_IN: | 322 | case NCP_IOC_CONN_LOGGED_IN: |
333 | 323 | ||
334 | if (!capable(CAP_SYS_ADMIN)) | ||
335 | return -EACCES; | ||
336 | if (!(server->m.int_flags & NCP_IMOUNT_LOGGEDIN_POSSIBLE)) | 324 | if (!(server->m.int_flags & NCP_IMOUNT_LOGGEDIN_POSSIBLE)) |
337 | return -EINVAL; | 325 | return -EINVAL; |
326 | mutex_lock(&server->root_setup_lock); | ||
338 | if (server->root_setuped) | 327 | if (server->root_setuped) |
339 | return -EBUSY; | 328 | result = -EBUSY; |
340 | server->root_setuped = 1; | 329 | else { |
341 | return ncp_conn_logged_in(inode->i_sb); | 330 | result = ncp_conn_logged_in(inode->i_sb); |
331 | if (result == 0) | ||
332 | server->root_setuped = 1; | ||
333 | } | ||
334 | mutex_unlock(&server->root_setup_lock); | ||
335 | return result; | ||
342 | 336 | ||
343 | case NCP_IOC_GET_FS_INFO: | 337 | case NCP_IOC_GET_FS_INFO: |
344 | return ncp_get_fs_info(server, filp, argp); | 338 | return ncp_get_fs_info(server, inode, argp); |
345 | 339 | ||
346 | case NCP_IOC_GET_FS_INFO_V2: | 340 | case NCP_IOC_GET_FS_INFO_V2: |
347 | return ncp_get_fs_info_v2(server, filp, argp); | 341 | return ncp_get_fs_info_v2(server, inode, argp); |
348 | 342 | ||
349 | #ifdef CONFIG_COMPAT | 343 | #ifdef CONFIG_COMPAT |
350 | case NCP_IOC_GET_FS_INFO_V2_32: | 344 | case NCP_IOC_GET_FS_INFO_V2_32: |
351 | return ncp_get_compat_fs_info_v2(server, filp, argp); | 345 | return ncp_get_compat_fs_info_v2(server, inode, argp); |
352 | #endif | 346 | #endif |
353 | /* we have too many combinations of CONFIG_COMPAT, | 347 | /* we have too many combinations of CONFIG_COMPAT, |
354 | * CONFIG_64BIT and CONFIG_UID16, so just handle | 348 | * CONFIG_64BIT and CONFIG_UID16, so just handle |
355 | * any of the possible ioctls */ | 349 | * any of the possible ioctls */ |
356 | case NCP_IOC_GETMOUNTUID16: | 350 | case NCP_IOC_GETMOUNTUID16: |
357 | case NCP_IOC_GETMOUNTUID32: | 351 | { |
358 | case NCP_IOC_GETMOUNTUID64: | ||
359 | if (file_permission(filp, MAY_READ) != 0 | ||
360 | && uid != server->m.mounted_uid) | ||
361 | return -EACCES; | ||
362 | |||
363 | if (cmd == NCP_IOC_GETMOUNTUID16) { | ||
364 | u16 uid; | 352 | u16 uid; |
353 | |||
365 | SET_UID(uid, server->m.mounted_uid); | 354 | SET_UID(uid, server->m.mounted_uid); |
366 | if (put_user(uid, (u16 __user *)argp)) | 355 | if (put_user(uid, (u16 __user *)argp)) |
367 | return -EFAULT; | 356 | return -EFAULT; |
368 | } else if (cmd == NCP_IOC_GETMOUNTUID32) { | 357 | return 0; |
369 | if (put_user(server->m.mounted_uid, | ||
370 | (u32 __user *)argp)) | ||
371 | return -EFAULT; | ||
372 | } else { | ||
373 | if (put_user(server->m.mounted_uid, | ||
374 | (u64 __user *)argp)) | ||
375 | return -EFAULT; | ||
376 | } | 358 | } |
359 | case NCP_IOC_GETMOUNTUID32: | ||
360 | if (put_user(server->m.mounted_uid, | ||
361 | (u32 __user *)argp)) | ||
362 | return -EFAULT; | ||
363 | return 0; | ||
364 | case NCP_IOC_GETMOUNTUID64: | ||
365 | if (put_user(server->m.mounted_uid, | ||
366 | (u64 __user *)argp)) | ||
367 | return -EFAULT; | ||
377 | return 0; | 368 | return 0; |
378 | 369 | ||
379 | case NCP_IOC_GETROOT: | 370 | case NCP_IOC_GETROOT: |
380 | { | 371 | { |
381 | struct ncp_setroot_ioctl sr; | 372 | struct ncp_setroot_ioctl sr; |
382 | 373 | ||
383 | if (file_permission(filp, MAY_READ) != 0 | 374 | result = -EACCES; |
384 | && uid != server->m.mounted_uid) | 375 | mutex_lock(&server->root_setup_lock); |
385 | return -EACCES; | ||
386 | |||
387 | if (server->m.mounted_vol[0]) { | 376 | if (server->m.mounted_vol[0]) { |
388 | struct dentry* dentry = inode->i_sb->s_root; | 377 | struct dentry* dentry = inode->i_sb->s_root; |
389 | 378 | ||
390 | if (dentry) { | 379 | if (dentry) { |
391 | struct inode* s_inode = dentry->d_inode; | 380 | struct inode* s_inode = dentry->d_inode; |
392 | 381 | ||
393 | if (s_inode) { | 382 | if (s_inode) { |
394 | sr.volNumber = NCP_FINFO(s_inode)->volNumber; | 383 | sr.volNumber = NCP_FINFO(s_inode)->volNumber; |
395 | sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum; | 384 | sr.dirEntNum = NCP_FINFO(s_inode)->dirEntNum; |
396 | sr.namespace = server->name_space[sr.volNumber]; | 385 | sr.namespace = server->name_space[sr.volNumber]; |
386 | result = 0; | ||
397 | } else | 387 | } else |
398 | DPRINTK("ncpfs: s_root->d_inode==NULL\n"); | 388 | DPRINTK("ncpfs: s_root->d_inode==NULL\n"); |
399 | } else | 389 | } else |
@@ -402,10 +392,12 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
402 | sr.volNumber = -1; | 392 | sr.volNumber = -1; |
403 | sr.namespace = 0; | 393 | sr.namespace = 0; |
404 | sr.dirEntNum = 0; | 394 | sr.dirEntNum = 0; |
395 | result = 0; | ||
405 | } | 396 | } |
406 | if (copy_to_user(argp, &sr, sizeof(sr))) | 397 | mutex_unlock(&server->root_setup_lock); |
407 | return -EFAULT; | 398 | if (!result && copy_to_user(argp, &sr, sizeof(sr))) |
408 | return 0; | 399 | result = -EFAULT; |
400 | return result; | ||
409 | } | 401 | } |
410 | 402 | ||
411 | case NCP_IOC_SETROOT: | 403 | case NCP_IOC_SETROOT: |
@@ -416,103 +408,114 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
416 | __le32 dosde; | 408 | __le32 dosde; |
417 | struct dentry* dentry; | 409 | struct dentry* dentry; |
418 | 410 | ||
419 | if (!capable(CAP_SYS_ADMIN)) | ||
420 | { | ||
421 | return -EACCES; | ||
422 | } | ||
423 | if (server->root_setuped) return -EBUSY; | ||
424 | if (copy_from_user(&sr, argp, sizeof(sr))) | 411 | if (copy_from_user(&sr, argp, sizeof(sr))) |
425 | return -EFAULT; | 412 | return -EFAULT; |
426 | if (sr.volNumber < 0) { | 413 | mutex_lock(&server->root_setup_lock); |
427 | server->m.mounted_vol[0] = 0; | 414 | if (server->root_setuped) |
428 | vnum = NCP_NUMBER_OF_VOLUMES; | 415 | result = -EBUSY; |
429 | de = 0; | 416 | else { |
430 | dosde = 0; | 417 | if (sr.volNumber < 0) { |
431 | } else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) { | 418 | server->m.mounted_vol[0] = 0; |
432 | return -EINVAL; | 419 | vnum = NCP_NUMBER_OF_VOLUMES; |
433 | } else if (ncp_mount_subdir(server, sr.volNumber, | 420 | de = 0; |
434 | sr.namespace, sr.dirEntNum, | 421 | dosde = 0; |
435 | &vnum, &de, &dosde)) { | 422 | result = 0; |
436 | return -ENOENT; | 423 | } else if (sr.volNumber >= NCP_NUMBER_OF_VOLUMES) { |
437 | } | 424 | result = -EINVAL; |
438 | 425 | } else if (ncp_mount_subdir(server, sr.volNumber, | |
439 | dentry = inode->i_sb->s_root; | 426 | sr.namespace, sr.dirEntNum, |
440 | server->root_setuped = 1; | 427 | &vnum, &de, &dosde)) { |
441 | if (dentry) { | 428 | result = -ENOENT; |
442 | struct inode* s_inode = dentry->d_inode; | ||
443 | |||
444 | if (s_inode) { | ||
445 | NCP_FINFO(s_inode)->volNumber = vnum; | ||
446 | NCP_FINFO(s_inode)->dirEntNum = de; | ||
447 | NCP_FINFO(s_inode)->DosDirNum = dosde; | ||
448 | } else | 429 | } else |
449 | DPRINTK("ncpfs: s_root->d_inode==NULL\n"); | 430 | result = 0; |
450 | } else | 431 | |
451 | DPRINTK("ncpfs: s_root==NULL\n"); | 432 | if (result == 0) { |
433 | dentry = inode->i_sb->s_root; | ||
434 | if (dentry) { | ||
435 | struct inode* s_inode = dentry->d_inode; | ||
436 | |||
437 | if (s_inode) { | ||
438 | NCP_FINFO(s_inode)->volNumber = vnum; | ||
439 | NCP_FINFO(s_inode)->dirEntNum = de; | ||
440 | NCP_FINFO(s_inode)->DosDirNum = dosde; | ||
441 | server->root_setuped = 1; | ||
442 | } else { | ||
443 | DPRINTK("ncpfs: s_root->d_inode==NULL\n"); | ||
444 | result = -EIO; | ||
445 | } | ||
446 | } else { | ||
447 | DPRINTK("ncpfs: s_root==NULL\n"); | ||
448 | result = -EIO; | ||
449 | } | ||
450 | } | ||
451 | result = 0; | ||
452 | } | ||
453 | mutex_unlock(&server->root_setup_lock); | ||
452 | 454 | ||
453 | return 0; | 455 | return result; |
454 | } | 456 | } |
455 | 457 | ||
456 | #ifdef CONFIG_NCPFS_PACKET_SIGNING | 458 | #ifdef CONFIG_NCPFS_PACKET_SIGNING |
457 | case NCP_IOC_SIGN_INIT: | 459 | case NCP_IOC_SIGN_INIT: |
458 | if (file_permission(filp, MAY_WRITE) != 0 | 460 | { |
459 | && uid != server->m.mounted_uid) | 461 | struct ncp_sign_init sign; |
460 | return -EACCES; | ||
461 | |||
462 | if (argp) { | ||
463 | if (server->sign_wanted) | ||
464 | { | ||
465 | struct ncp_sign_init sign; | ||
466 | 462 | ||
463 | if (argp) | ||
467 | if (copy_from_user(&sign, argp, sizeof(sign))) | 464 | if (copy_from_user(&sign, argp, sizeof(sign))) |
468 | return -EFAULT; | 465 | return -EFAULT; |
469 | memcpy(server->sign_root,sign.sign_root,8); | 466 | ncp_lock_server(server); |
470 | memcpy(server->sign_last,sign.sign_last,16); | 467 | mutex_lock(&server->rcv.creq_mutex); |
471 | server->sign_active = 1; | 468 | if (argp) { |
469 | if (server->sign_wanted) { | ||
470 | memcpy(server->sign_root,sign.sign_root,8); | ||
471 | memcpy(server->sign_last,sign.sign_last,16); | ||
472 | server->sign_active = 1; | ||
473 | } | ||
474 | /* ignore when signatures not wanted */ | ||
475 | } else { | ||
476 | server->sign_active = 0; | ||
472 | } | 477 | } |
473 | /* ignore when signatures not wanted */ | 478 | mutex_unlock(&server->rcv.creq_mutex); |
474 | } else { | 479 | ncp_unlock_server(server); |
475 | server->sign_active = 0; | 480 | return 0; |
476 | } | 481 | } |
477 | return 0; | 482 | |
478 | |||
479 | case NCP_IOC_SIGN_WANTED: | 483 | case NCP_IOC_SIGN_WANTED: |
480 | if (file_permission(filp, MAY_READ) != 0 | 484 | { |
481 | && uid != server->m.mounted_uid) | 485 | int state; |
482 | return -EACCES; | 486 | |
483 | 487 | ncp_lock_server(server); | |
484 | if (put_user(server->sign_wanted, (int __user *)argp)) | 488 | state = server->sign_wanted; |
485 | return -EFAULT; | 489 | ncp_unlock_server(server); |
486 | return 0; | 490 | if (put_user(state, (int __user *)argp)) |
491 | return -EFAULT; | ||
492 | return 0; | ||
493 | } | ||
487 | 494 | ||
488 | case NCP_IOC_SET_SIGN_WANTED: | 495 | case NCP_IOC_SET_SIGN_WANTED: |
489 | { | 496 | { |
490 | int newstate; | 497 | int newstate; |
491 | 498 | ||
492 | if (file_permission(filp, MAY_WRITE) != 0 | ||
493 | && uid != server->m.mounted_uid) | ||
494 | return -EACCES; | ||
495 | |||
496 | /* get only low 8 bits... */ | 499 | /* get only low 8 bits... */ |
497 | if (get_user(newstate, (unsigned char __user *)argp)) | 500 | if (get_user(newstate, (unsigned char __user *)argp)) |
498 | return -EFAULT; | 501 | return -EFAULT; |
502 | result = 0; | ||
503 | ncp_lock_server(server); | ||
499 | if (server->sign_active) { | 504 | if (server->sign_active) { |
500 | /* cannot turn signatures OFF when active */ | 505 | /* cannot turn signatures OFF when active */ |
501 | if (!newstate) return -EINVAL; | 506 | if (!newstate) |
507 | result = -EINVAL; | ||
502 | } else { | 508 | } else { |
503 | server->sign_wanted = newstate != 0; | 509 | server->sign_wanted = newstate != 0; |
504 | } | 510 | } |
505 | return 0; | 511 | ncp_unlock_server(server); |
512 | return result; | ||
506 | } | 513 | } |
507 | 514 | ||
508 | #endif /* CONFIG_NCPFS_PACKET_SIGNING */ | 515 | #endif /* CONFIG_NCPFS_PACKET_SIGNING */ |
509 | 516 | ||
510 | #ifdef CONFIG_NCPFS_IOCTL_LOCKING | 517 | #ifdef CONFIG_NCPFS_IOCTL_LOCKING |
511 | case NCP_IOC_LOCKUNLOCK: | 518 | case NCP_IOC_LOCKUNLOCK: |
512 | if (file_permission(filp, MAY_WRITE) != 0 | ||
513 | && uid != server->m.mounted_uid) | ||
514 | return -EACCES; | ||
515 | |||
516 | { | 519 | { |
517 | struct ncp_lock_ioctl rqdata; | 520 | struct ncp_lock_ioctl rqdata; |
518 | 521 | ||
@@ -541,16 +544,13 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
541 | { | 544 | { |
542 | return result; | 545 | return result; |
543 | } | 546 | } |
544 | result = -EIO; | ||
545 | if (!ncp_conn_valid(server)) | ||
546 | goto outrel; | ||
547 | result = -EISDIR; | 547 | result = -EISDIR; |
548 | if (!S_ISREG(inode->i_mode)) | 548 | if (!S_ISREG(inode->i_mode)) |
549 | goto outrel; | 549 | goto outrel; |
550 | if (rqdata.cmd == NCP_LOCK_CLEAR) | 550 | if (rqdata.cmd == NCP_LOCK_CLEAR) |
551 | { | 551 | { |
552 | result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), | 552 | result = ncp_ClearPhysicalRecord(NCP_SERVER(inode), |
553 | NCP_FINFO(inode)->file_handle, | 553 | NCP_FINFO(inode)->file_handle, |
554 | rqdata.offset, | 554 | rqdata.offset, |
555 | rqdata.length); | 555 | rqdata.length); |
556 | if (result > 0) result = 0; /* no such lock */ | 556 | if (result > 0) result = 0; /* no such lock */ |
@@ -573,7 +573,7 @@ static long __ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
573 | rqdata.timeout); | 573 | rqdata.timeout); |
574 | if (result > 0) result = -EAGAIN; | 574 | if (result > 0) result = -EAGAIN; |
575 | } | 575 | } |
576 | outrel: | 576 | outrel: |
577 | ncp_inode_close(inode); | 577 | ncp_inode_close(inode); |
578 | return result; | 578 | return result; |
579 | } | 579 | } |
@@ -581,60 +581,62 @@ outrel: | |||
581 | 581 | ||
582 | #ifdef CONFIG_COMPAT | 582 | #ifdef CONFIG_COMPAT |
583 | case NCP_IOC_GETOBJECTNAME_32: | 583 | case NCP_IOC_GETOBJECTNAME_32: |
584 | if (uid != server->m.mounted_uid) | ||
585 | return -EACCES; | ||
586 | { | 584 | { |
587 | struct compat_ncp_objectname_ioctl user; | 585 | struct compat_ncp_objectname_ioctl user; |
588 | size_t outl; | 586 | size_t outl; |
589 | 587 | ||
590 | if (copy_from_user(&user, argp, sizeof(user))) | 588 | if (copy_from_user(&user, argp, sizeof(user))) |
591 | return -EFAULT; | 589 | return -EFAULT; |
590 | down_read(&server->auth_rwsem); | ||
592 | user.auth_type = server->auth.auth_type; | 591 | user.auth_type = server->auth.auth_type; |
593 | outl = user.object_name_len; | 592 | outl = user.object_name_len; |
594 | user.object_name_len = server->auth.object_name_len; | 593 | user.object_name_len = server->auth.object_name_len; |
595 | if (outl > user.object_name_len) | 594 | if (outl > user.object_name_len) |
596 | outl = user.object_name_len; | 595 | outl = user.object_name_len; |
596 | result = 0; | ||
597 | if (outl) { | 597 | if (outl) { |
598 | if (copy_to_user(compat_ptr(user.object_name), | 598 | if (copy_to_user(compat_ptr(user.object_name), |
599 | server->auth.object_name, | 599 | server->auth.object_name, |
600 | outl)) return -EFAULT; | 600 | outl)) |
601 | result = -EFAULT; | ||
601 | } | 602 | } |
602 | if (copy_to_user(argp, &user, sizeof(user))) | 603 | up_read(&server->auth_rwsem); |
603 | return -EFAULT; | 604 | if (!result && copy_to_user(argp, &user, sizeof(user))) |
604 | return 0; | 605 | result = -EFAULT; |
606 | return result; | ||
605 | } | 607 | } |
606 | #endif | 608 | #endif |
607 | 609 | ||
608 | case NCP_IOC_GETOBJECTNAME: | 610 | case NCP_IOC_GETOBJECTNAME: |
609 | if (uid != server->m.mounted_uid) | ||
610 | return -EACCES; | ||
611 | { | 611 | { |
612 | struct ncp_objectname_ioctl user; | 612 | struct ncp_objectname_ioctl user; |
613 | size_t outl; | 613 | size_t outl; |
614 | 614 | ||
615 | if (copy_from_user(&user, argp, sizeof(user))) | 615 | if (copy_from_user(&user, argp, sizeof(user))) |
616 | return -EFAULT; | 616 | return -EFAULT; |
617 | down_read(&server->auth_rwsem); | ||
617 | user.auth_type = server->auth.auth_type; | 618 | user.auth_type = server->auth.auth_type; |
618 | outl = user.object_name_len; | 619 | outl = user.object_name_len; |
619 | user.object_name_len = server->auth.object_name_len; | 620 | user.object_name_len = server->auth.object_name_len; |
620 | if (outl > user.object_name_len) | 621 | if (outl > user.object_name_len) |
621 | outl = user.object_name_len; | 622 | outl = user.object_name_len; |
623 | result = 0; | ||
622 | if (outl) { | 624 | if (outl) { |
623 | if (copy_to_user(user.object_name, | 625 | if (copy_to_user(user.object_name, |
624 | server->auth.object_name, | 626 | server->auth.object_name, |
625 | outl)) return -EFAULT; | 627 | outl)) |
628 | result = -EFAULT; | ||
626 | } | 629 | } |
627 | if (copy_to_user(argp, &user, sizeof(user))) | 630 | up_read(&server->auth_rwsem); |
628 | return -EFAULT; | 631 | if (!result && copy_to_user(argp, &user, sizeof(user))) |
629 | return 0; | 632 | result = -EFAULT; |
633 | return result; | ||
630 | } | 634 | } |
631 | 635 | ||
632 | #ifdef CONFIG_COMPAT | 636 | #ifdef CONFIG_COMPAT |
633 | case NCP_IOC_SETOBJECTNAME_32: | 637 | case NCP_IOC_SETOBJECTNAME_32: |
634 | #endif | 638 | #endif |
635 | case NCP_IOC_SETOBJECTNAME: | 639 | case NCP_IOC_SETOBJECTNAME: |
636 | if (uid != server->m.mounted_uid) | ||
637 | return -EACCES; | ||
638 | { | 640 | { |
639 | struct ncp_objectname_ioctl user; | 641 | struct ncp_objectname_ioctl user; |
640 | void* newname; | 642 | void* newname; |
@@ -666,9 +668,7 @@ outrel: | |||
666 | } else { | 668 | } else { |
667 | newname = NULL; | 669 | newname = NULL; |
668 | } | 670 | } |
669 | /* enter critical section */ | 671 | down_write(&server->auth_rwsem); |
670 | /* maybe that kfree can sleep so do that this way */ | ||
671 | /* it is at least more SMP friendly (in future...) */ | ||
672 | oldname = server->auth.object_name; | 672 | oldname = server->auth.object_name; |
673 | oldnamelen = server->auth.object_name_len; | 673 | oldnamelen = server->auth.object_name_len; |
674 | oldprivate = server->priv.data; | 674 | oldprivate = server->priv.data; |
@@ -678,7 +678,7 @@ outrel: | |||
678 | server->auth.object_name = newname; | 678 | server->auth.object_name = newname; |
679 | server->priv.len = 0; | 679 | server->priv.len = 0; |
680 | server->priv.data = NULL; | 680 | server->priv.data = NULL; |
681 | /* leave critical section */ | 681 | up_write(&server->auth_rwsem); |
682 | kfree(oldprivate); | 682 | kfree(oldprivate); |
683 | kfree(oldname); | 683 | kfree(oldname); |
684 | return 0; | 684 | return 0; |
@@ -688,8 +688,6 @@ outrel: | |||
688 | case NCP_IOC_GETPRIVATEDATA_32: | 688 | case NCP_IOC_GETPRIVATEDATA_32: |
689 | #endif | 689 | #endif |
690 | case NCP_IOC_GETPRIVATEDATA: | 690 | case NCP_IOC_GETPRIVATEDATA: |
691 | if (uid != server->m.mounted_uid) | ||
692 | return -EACCES; | ||
693 | { | 691 | { |
694 | struct ncp_privatedata_ioctl user; | 692 | struct ncp_privatedata_ioctl user; |
695 | size_t outl; | 693 | size_t outl; |
@@ -706,14 +704,20 @@ outrel: | |||
706 | if (copy_from_user(&user, argp, sizeof(user))) | 704 | if (copy_from_user(&user, argp, sizeof(user))) |
707 | return -EFAULT; | 705 | return -EFAULT; |
708 | 706 | ||
707 | down_read(&server->auth_rwsem); | ||
709 | outl = user.len; | 708 | outl = user.len; |
710 | user.len = server->priv.len; | 709 | user.len = server->priv.len; |
711 | if (outl > user.len) outl = user.len; | 710 | if (outl > user.len) outl = user.len; |
711 | result = 0; | ||
712 | if (outl) { | 712 | if (outl) { |
713 | if (copy_to_user(user.data, | 713 | if (copy_to_user(user.data, |
714 | server->priv.data, | 714 | server->priv.data, |
715 | outl)) return -EFAULT; | 715 | outl)) |
716 | result = -EFAULT; | ||
716 | } | 717 | } |
718 | up_read(&server->auth_rwsem); | ||
719 | if (result) | ||
720 | return result; | ||
717 | #ifdef CONFIG_COMPAT | 721 | #ifdef CONFIG_COMPAT |
718 | if (cmd == NCP_IOC_GETPRIVATEDATA_32) { | 722 | if (cmd == NCP_IOC_GETPRIVATEDATA_32) { |
719 | struct compat_ncp_privatedata_ioctl user32; | 723 | struct compat_ncp_privatedata_ioctl user32; |
@@ -733,8 +737,6 @@ outrel: | |||
733 | case NCP_IOC_SETPRIVATEDATA_32: | 737 | case NCP_IOC_SETPRIVATEDATA_32: |
734 | #endif | 738 | #endif |
735 | case NCP_IOC_SETPRIVATEDATA: | 739 | case NCP_IOC_SETPRIVATEDATA: |
736 | if (uid != server->m.mounted_uid) | ||
737 | return -EACCES; | ||
738 | { | 740 | { |
739 | struct ncp_privatedata_ioctl user; | 741 | struct ncp_privatedata_ioctl user; |
740 | void* new; | 742 | void* new; |
@@ -762,12 +764,12 @@ outrel: | |||
762 | } else { | 764 | } else { |
763 | new = NULL; | 765 | new = NULL; |
764 | } | 766 | } |
765 | /* enter critical section */ | 767 | down_write(&server->auth_rwsem); |
766 | old = server->priv.data; | 768 | old = server->priv.data; |
767 | oldlen = server->priv.len; | 769 | oldlen = server->priv.len; |
768 | server->priv.len = user.len; | 770 | server->priv.len = user.len; |
769 | server->priv.data = new; | 771 | server->priv.data = new; |
770 | /* leave critical section */ | 772 | up_write(&server->auth_rwsem); |
771 | kfree(old); | 773 | kfree(old); |
772 | return 0; | 774 | return 0; |
773 | } | 775 | } |
@@ -775,17 +777,13 @@ outrel: | |||
775 | #ifdef CONFIG_NCPFS_NLS | 777 | #ifdef CONFIG_NCPFS_NLS |
776 | case NCP_IOC_SETCHARSETS: | 778 | case NCP_IOC_SETCHARSETS: |
777 | return ncp_set_charsets(server, argp); | 779 | return ncp_set_charsets(server, argp); |
778 | 780 | ||
779 | case NCP_IOC_GETCHARSETS: | 781 | case NCP_IOC_GETCHARSETS: |
780 | return ncp_get_charsets(server, argp); | 782 | return ncp_get_charsets(server, argp); |
781 | 783 | ||
782 | #endif /* CONFIG_NCPFS_NLS */ | 784 | #endif /* CONFIG_NCPFS_NLS */ |
783 | 785 | ||
784 | case NCP_IOC_SETDENTRYTTL: | 786 | case NCP_IOC_SETDENTRYTTL: |
785 | if (file_permission(filp, MAY_WRITE) != 0 && | ||
786 | uid != server->m.mounted_uid) | ||
787 | return -EACCES; | ||
788 | |||
789 | { | 787 | { |
790 | u_int32_t user; | 788 | u_int32_t user; |
791 | 789 | ||
@@ -795,13 +793,13 @@ outrel: | |||
795 | if (user > 20000) | 793 | if (user > 20000) |
796 | return -EINVAL; | 794 | return -EINVAL; |
797 | user = (user * HZ) / 1000; | 795 | user = (user * HZ) / 1000; |
798 | server->dentry_ttl = user; | 796 | atomic_set(&server->dentry_ttl, user); |
799 | return 0; | 797 | return 0; |
800 | } | 798 | } |
801 | 799 | ||
802 | case NCP_IOC_GETDENTRYTTL: | 800 | case NCP_IOC_GETDENTRYTTL: |
803 | { | 801 | { |
804 | u_int32_t user = (server->dentry_ttl * 1000) / HZ; | 802 | u_int32_t user = (atomic_read(&server->dentry_ttl) * 1000) / HZ; |
805 | if (copy_to_user(argp, &user, sizeof(user))) | 803 | if (copy_to_user(argp, &user, sizeof(user))) |
806 | return -EFAULT; | 804 | return -EFAULT; |
807 | return 0; | 805 | return 0; |
@@ -811,59 +809,103 @@ outrel: | |||
811 | return -EINVAL; | 809 | return -EINVAL; |
812 | } | 810 | } |
813 | 811 | ||
814 | static int ncp_ioctl_need_write(unsigned int cmd) | 812 | long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
815 | { | 813 | { |
814 | struct inode *inode = filp->f_dentry->d_inode; | ||
815 | struct ncp_server *server = NCP_SERVER(inode); | ||
816 | uid_t uid = current_uid(); | ||
817 | int need_drop_write = 0; | ||
818 | long ret; | ||
819 | |||
816 | switch (cmd) { | 820 | switch (cmd) { |
817 | case NCP_IOC_GET_FS_INFO: | ||
818 | case NCP_IOC_GET_FS_INFO_V2: | ||
819 | case NCP_IOC_NCPREQUEST: | ||
820 | case NCP_IOC_SETDENTRYTTL: | ||
821 | case NCP_IOC_SIGN_INIT: | ||
822 | case NCP_IOC_LOCKUNLOCK: | ||
823 | case NCP_IOC_SET_SIGN_WANTED: | ||
824 | return 1; | ||
825 | case NCP_IOC_GETOBJECTNAME: | ||
826 | case NCP_IOC_SETOBJECTNAME: | ||
827 | case NCP_IOC_GETPRIVATEDATA: | ||
828 | case NCP_IOC_SETPRIVATEDATA: | ||
829 | case NCP_IOC_SETCHARSETS: | 821 | case NCP_IOC_SETCHARSETS: |
830 | case NCP_IOC_GETCHARSETS: | ||
831 | case NCP_IOC_CONN_LOGGED_IN: | 822 | case NCP_IOC_CONN_LOGGED_IN: |
832 | case NCP_IOC_GETDENTRYTTL: | ||
833 | case NCP_IOC_GETMOUNTUID2: | ||
834 | case NCP_IOC_SIGN_WANTED: | ||
835 | case NCP_IOC_GETROOT: | ||
836 | case NCP_IOC_SETROOT: | 823 | case NCP_IOC_SETROOT: |
837 | return 0; | 824 | if (!capable(CAP_SYS_ADMIN)) { |
838 | default: | 825 | ret = -EACCES; |
839 | /* unknown IOCTL command, assume write */ | 826 | goto out; |
840 | return 1; | 827 | } |
828 | break; | ||
841 | } | 829 | } |
842 | } | 830 | if (server->m.mounted_uid != uid) { |
843 | 831 | switch (cmd) { | |
844 | long ncp_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | ||
845 | { | ||
846 | long ret; | ||
847 | |||
848 | lock_kernel(); | ||
849 | if (ncp_ioctl_need_write(cmd)) { | ||
850 | /* | 832 | /* |
851 | * inside the ioctl(), any failures which | 833 | * Only mount owner can issue these ioctls. Information |
852 | * are because of file_permission() are | 834 | * necessary to authenticate to other NDS servers are |
853 | * -EACCESS, so it seems consistent to keep | 835 | * stored here. |
854 | * that here. | ||
855 | */ | 836 | */ |
856 | if (mnt_want_write(filp->f_path.mnt)) { | 837 | case NCP_IOC_GETOBJECTNAME: |
838 | case NCP_IOC_SETOBJECTNAME: | ||
839 | case NCP_IOC_GETPRIVATEDATA: | ||
840 | case NCP_IOC_SETPRIVATEDATA: | ||
841 | #ifdef CONFIG_COMPAT | ||
842 | case NCP_IOC_GETOBJECTNAME_32: | ||
843 | case NCP_IOC_SETOBJECTNAME_32: | ||
844 | case NCP_IOC_GETPRIVATEDATA_32: | ||
845 | case NCP_IOC_SETPRIVATEDATA_32: | ||
846 | #endif | ||
857 | ret = -EACCES; | 847 | ret = -EACCES; |
858 | goto out; | 848 | goto out; |
849 | /* | ||
850 | * These require write access on the inode if user id | ||
851 | * does not match. Note that they do not write to the | ||
852 | * file... But old code did mnt_want_write, so I keep | ||
853 | * it as is. Of course not for mountpoint owner, as | ||
854 | * that breaks read-only mounts altogether as ncpmount | ||
855 | * needs working NCP_IOC_NCPREQUEST and | ||
856 | * NCP_IOC_GET_FS_INFO. Some of these codes (setdentryttl, | ||
857 | * signinit, setsignwanted) should be probably restricted | ||
858 | * to owner only, or even more to CAP_SYS_ADMIN). | ||
859 | */ | ||
860 | case NCP_IOC_GET_FS_INFO: | ||
861 | case NCP_IOC_GET_FS_INFO_V2: | ||
862 | case NCP_IOC_NCPREQUEST: | ||
863 | case NCP_IOC_SETDENTRYTTL: | ||
864 | case NCP_IOC_SIGN_INIT: | ||
865 | case NCP_IOC_LOCKUNLOCK: | ||
866 | case NCP_IOC_SET_SIGN_WANTED: | ||
867 | #ifdef CONFIG_COMPAT | ||
868 | case NCP_IOC_GET_FS_INFO_V2_32: | ||
869 | case NCP_IOC_NCPREQUEST_32: | ||
870 | #endif | ||
871 | ret = mnt_want_write_file(filp); | ||
872 | if (ret) | ||
873 | goto out; | ||
874 | need_drop_write = 1; | ||
875 | ret = inode_permission(inode, MAY_WRITE); | ||
876 | if (ret) | ||
877 | goto outDropWrite; | ||
878 | break; | ||
879 | /* | ||
880 | * Read access required. | ||
881 | */ | ||
882 | case NCP_IOC_GETMOUNTUID16: | ||
883 | case NCP_IOC_GETMOUNTUID32: | ||
884 | case NCP_IOC_GETMOUNTUID64: | ||
885 | case NCP_IOC_GETROOT: | ||
886 | case NCP_IOC_SIGN_WANTED: | ||
887 | ret = inode_permission(inode, MAY_READ); | ||
888 | if (ret) | ||
889 | goto out; | ||
890 | break; | ||
891 | /* | ||
892 | * Anybody can read these. | ||
893 | */ | ||
894 | case NCP_IOC_GETCHARSETS: | ||
895 | case NCP_IOC_GETDENTRYTTL: | ||
896 | default: | ||
897 | /* Three codes below are protected by CAP_SYS_ADMIN above. */ | ||
898 | case NCP_IOC_SETCHARSETS: | ||
899 | case NCP_IOC_CONN_LOGGED_IN: | ||
900 | case NCP_IOC_SETROOT: | ||
901 | break; | ||
859 | } | 902 | } |
860 | } | 903 | } |
861 | ret = __ncp_ioctl(filp, cmd, arg); | 904 | ret = __ncp_ioctl(inode, cmd, arg); |
862 | if (ncp_ioctl_need_write(cmd)) | 905 | outDropWrite: |
906 | if (need_drop_write) | ||
863 | mnt_drop_write(filp->f_path.mnt); | 907 | mnt_drop_write(filp->f_path.mnt); |
864 | |||
865 | out: | 908 | out: |
866 | unlock_kernel(); | ||
867 | return ret; | 909 | return ret; |
868 | } | 910 | } |
869 | 911 | ||
@@ -872,10 +914,8 @@ long ncp_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
872 | { | 914 | { |
873 | long ret; | 915 | long ret; |
874 | 916 | ||
875 | lock_kernel(); | ||
876 | arg = (unsigned long) compat_ptr(arg); | 917 | arg = (unsigned long) compat_ptr(arg); |
877 | ret = ncp_ioctl(file, cmd, arg); | 918 | ret = ncp_ioctl(file, cmd, arg); |
878 | unlock_kernel(); | ||
879 | return ret; | 919 | return ret; |
880 | } | 920 | } |
881 | #endif | 921 | #endif |
diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 0ec6237a5970..a95615a0b6ac 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c | |||
@@ -107,17 +107,17 @@ ncp_reply_data(struct ncp_server *server, int offset) | |||
107 | return &(server->packet[sizeof(struct ncp_reply_header) + offset]); | 107 | return &(server->packet[sizeof(struct ncp_reply_header) + offset]); |
108 | } | 108 | } |
109 | 109 | ||
110 | static inline u8 BVAL(void *data) | 110 | static inline u8 BVAL(const void *data) |
111 | { | 111 | { |
112 | return *(u8 *)data; | 112 | return *(const u8 *)data; |
113 | } | 113 | } |
114 | 114 | ||
115 | static u8 ncp_reply_byte(struct ncp_server *server, int offset) | 115 | static u8 ncp_reply_byte(struct ncp_server *server, int offset) |
116 | { | 116 | { |
117 | return *(u8 *)ncp_reply_data(server, offset); | 117 | return *(const u8 *)ncp_reply_data(server, offset); |
118 | } | 118 | } |
119 | 119 | ||
120 | static inline u16 WVAL_LH(void *data) | 120 | static inline u16 WVAL_LH(const void *data) |
121 | { | 121 | { |
122 | return get_unaligned_le16(data); | 122 | return get_unaligned_le16(data); |
123 | } | 123 | } |
@@ -134,7 +134,7 @@ ncp_reply_be16(struct ncp_server *server, int offset) | |||
134 | return get_unaligned_be16(ncp_reply_data(server, offset)); | 134 | return get_unaligned_be16(ncp_reply_data(server, offset)); |
135 | } | 135 | } |
136 | 136 | ||
137 | static inline u32 DVAL_LH(void *data) | 137 | static inline u32 DVAL_LH(const void *data) |
138 | { | 138 | { |
139 | return get_unaligned_le32(data); | 139 | return get_unaligned_le32(data); |
140 | } | 140 | } |
@@ -349,9 +349,9 @@ int ncp_dirhandle_free(struct ncp_server* server, __u8 dirhandle) { | |||
349 | return result; | 349 | return result; |
350 | } | 350 | } |
351 | 351 | ||
352 | void ncp_extract_file_info(void *structure, struct nw_info_struct *target) | 352 | void ncp_extract_file_info(const void *structure, struct nw_info_struct *target) |
353 | { | 353 | { |
354 | __u8 *name_len; | 354 | const __u8 *name_len; |
355 | const int info_struct_size = offsetof(struct nw_info_struct, nameLen); | 355 | const int info_struct_size = offsetof(struct nw_info_struct, nameLen); |
356 | 356 | ||
357 | memcpy(target, structure, info_struct_size); | 357 | memcpy(target, structure, info_struct_size); |
@@ -364,7 +364,7 @@ void ncp_extract_file_info(void *structure, struct nw_info_struct *target) | |||
364 | } | 364 | } |
365 | 365 | ||
366 | #ifdef CONFIG_NCPFS_NFS_NS | 366 | #ifdef CONFIG_NCPFS_NFS_NS |
367 | static inline void ncp_extract_nfs_info(unsigned char *structure, | 367 | static inline void ncp_extract_nfs_info(const unsigned char *structure, |
368 | struct nw_nfs_info *target) | 368 | struct nw_nfs_info *target) |
369 | { | 369 | { |
370 | target->mode = DVAL_LH(structure); | 370 | target->mode = DVAL_LH(structure); |
@@ -417,7 +417,7 @@ int ncp_obtain_nfs_info(struct ncp_server *server, | |||
417 | * Returns information for a (one-component) name relative to | 417 | * Returns information for a (one-component) name relative to |
418 | * the specified directory. | 418 | * the specified directory. |
419 | */ | 419 | */ |
420 | int ncp_obtain_info(struct ncp_server *server, struct inode *dir, char *path, | 420 | int ncp_obtain_info(struct ncp_server *server, struct inode *dir, const char *path, |
421 | struct nw_info_struct *target) | 421 | struct nw_info_struct *target) |
422 | { | 422 | { |
423 | __u8 volnum = NCP_FINFO(dir)->volNumber; | 423 | __u8 volnum = NCP_FINFO(dir)->volNumber; |
@@ -452,16 +452,16 @@ out: | |||
452 | #ifdef CONFIG_NCPFS_NFS_NS | 452 | #ifdef CONFIG_NCPFS_NFS_NS |
453 | static int | 453 | static int |
454 | ncp_obtain_DOS_dir_base(struct ncp_server *server, | 454 | ncp_obtain_DOS_dir_base(struct ncp_server *server, |
455 | __u8 volnum, __le32 dirent, | 455 | __u8 ns, __u8 volnum, __le32 dirent, |
456 | char *path, /* At most 1 component */ | 456 | const char *path, /* At most 1 component */ |
457 | __le32 *DOS_dir_base) | 457 | __le32 *DOS_dir_base) |
458 | { | 458 | { |
459 | int result; | 459 | int result; |
460 | 460 | ||
461 | ncp_init_request(server); | 461 | ncp_init_request(server); |
462 | ncp_add_byte(server, 6); /* subfunction */ | 462 | ncp_add_byte(server, 6); /* subfunction */ |
463 | ncp_add_byte(server, server->name_space[volnum]); | 463 | ncp_add_byte(server, ns); |
464 | ncp_add_byte(server, server->name_space[volnum]); | 464 | ncp_add_byte(server, ns); |
465 | ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */ | 465 | ncp_add_word(server, cpu_to_le16(0x8006)); /* get all */ |
466 | ncp_add_dword(server, RIM_DIRECTORY); | 466 | ncp_add_dword(server, RIM_DIRECTORY); |
467 | ncp_add_handle_path(server, volnum, dirent, 1, path); | 467 | ncp_add_handle_path(server, volnum, dirent, 1, path); |
@@ -523,10 +523,27 @@ ncp_get_known_namespace(struct ncp_server *server, __u8 volume) | |||
523 | #endif /* defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) */ | 523 | #endif /* defined(CONFIG_NCPFS_OS2_NS) || defined(CONFIG_NCPFS_NFS_NS) */ |
524 | } | 524 | } |
525 | 525 | ||
526 | int | ||
527 | ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns) | ||
528 | { | ||
529 | int ns = ncp_get_known_namespace(server, volume); | ||
530 | |||
531 | if (ret_ns) | ||
532 | *ret_ns = ns; | ||
533 | |||
534 | DPRINTK("lookup_vol: namespace[%d] = %d\n", | ||
535 | volume, server->name_space[volume]); | ||
536 | |||
537 | if (server->name_space[volume] == ns) | ||
538 | return 0; | ||
539 | server->name_space[volume] = ns; | ||
540 | return 1; | ||
541 | } | ||
542 | |||
526 | static int | 543 | static int |
527 | ncp_ObtainSpecificDirBase(struct ncp_server *server, | 544 | ncp_ObtainSpecificDirBase(struct ncp_server *server, |
528 | __u8 nsSrc, __u8 nsDst, __u8 vol_num, __le32 dir_base, | 545 | __u8 nsSrc, __u8 nsDst, __u8 vol_num, __le32 dir_base, |
529 | char *path, /* At most 1 component */ | 546 | const char *path, /* At most 1 component */ |
530 | __le32 *dirEntNum, __le32 *DosDirNum) | 547 | __le32 *dirEntNum, __le32 *DosDirNum) |
531 | { | 548 | { |
532 | int result; | 549 | int result; |
@@ -560,14 +577,13 @@ ncp_mount_subdir(struct ncp_server *server, | |||
560 | { | 577 | { |
561 | int dstNS; | 578 | int dstNS; |
562 | int result; | 579 | int result; |
563 | 580 | ||
564 | dstNS = ncp_get_known_namespace(server, volNumber); | 581 | ncp_update_known_namespace(server, volNumber, &dstNS); |
565 | if ((result = ncp_ObtainSpecificDirBase(server, srcNS, dstNS, volNumber, | 582 | if ((result = ncp_ObtainSpecificDirBase(server, srcNS, dstNS, volNumber, |
566 | dirEntNum, NULL, newDirEnt, newDosEnt)) != 0) | 583 | dirEntNum, NULL, newDirEnt, newDosEnt)) != 0) |
567 | { | 584 | { |
568 | return result; | 585 | return result; |
569 | } | 586 | } |
570 | server->name_space[volNumber] = dstNS; | ||
571 | *volume = volNumber; | 587 | *volume = volNumber; |
572 | server->m.mounted_vol[1] = 0; | 588 | server->m.mounted_vol[1] = 0; |
573 | server->m.mounted_vol[0] = 'X'; | 589 | server->m.mounted_vol[0] = 'X'; |
@@ -575,11 +591,10 @@ ncp_mount_subdir(struct ncp_server *server, | |||
575 | } | 591 | } |
576 | 592 | ||
577 | int | 593 | int |
578 | ncp_get_volume_root(struct ncp_server *server, const char *volname, | 594 | ncp_get_volume_root(struct ncp_server *server, |
579 | __u32* volume, __le32* dirent, __le32* dosdirent) | 595 | const char *volname, __u32* volume, __le32* dirent, __le32* dosdirent) |
580 | { | 596 | { |
581 | int result; | 597 | int result; |
582 | __u8 volnum; | ||
583 | 598 | ||
584 | DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname); | 599 | DPRINTK("ncp_get_volume_root: looking up vol %s\n", volname); |
585 | 600 | ||
@@ -601,21 +616,14 @@ ncp_get_volume_root(struct ncp_server *server, const char *volname, | |||
601 | return result; | 616 | return result; |
602 | } | 617 | } |
603 | *dirent = *dosdirent = ncp_reply_dword(server, 4); | 618 | *dirent = *dosdirent = ncp_reply_dword(server, 4); |
604 | volnum = ncp_reply_byte(server, 8); | 619 | *volume = ncp_reply_byte(server, 8); |
605 | ncp_unlock_server(server); | 620 | ncp_unlock_server(server); |
606 | *volume = volnum; | ||
607 | |||
608 | server->name_space[volnum] = ncp_get_known_namespace(server, volnum); | ||
609 | |||
610 | DPRINTK("lookup_vol: namespace[%d] = %d\n", | ||
611 | volnum, server->name_space[volnum]); | ||
612 | |||
613 | return 0; | 621 | return 0; |
614 | } | 622 | } |
615 | 623 | ||
616 | int | 624 | int |
617 | ncp_lookup_volume(struct ncp_server *server, const char *volname, | 625 | ncp_lookup_volume(struct ncp_server *server, |
618 | struct nw_info_struct *target) | 626 | const char *volname, struct nw_info_struct *target) |
619 | { | 627 | { |
620 | int result; | 628 | int result; |
621 | 629 | ||
@@ -625,6 +633,7 @@ ncp_lookup_volume(struct ncp_server *server, const char *volname, | |||
625 | if (result) { | 633 | if (result) { |
626 | return result; | 634 | return result; |
627 | } | 635 | } |
636 | ncp_update_known_namespace(server, target->volNumber, NULL); | ||
628 | target->nameLen = strlen(volname); | 637 | target->nameLen = strlen(volname); |
629 | memcpy(target->entryName, volname, target->nameLen+1); | 638 | memcpy(target->entryName, volname, target->nameLen+1); |
630 | target->attributes = aDIR; | 639 | target->attributes = aDIR; |
@@ -676,8 +685,8 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent, | |||
676 | { | 685 | { |
677 | int result = 0; | 686 | int result = 0; |
678 | 687 | ||
688 | ncp_init_request(server); | ||
679 | if (server->name_space[volnum] == NW_NS_NFS) { | 689 | if (server->name_space[volnum] == NW_NS_NFS) { |
680 | ncp_init_request(server); | ||
681 | ncp_add_byte(server, 25); /* subfunction */ | 690 | ncp_add_byte(server, 25); /* subfunction */ |
682 | ncp_add_byte(server, server->name_space[volnum]); | 691 | ncp_add_byte(server, server->name_space[volnum]); |
683 | ncp_add_byte(server, NW_NS_NFS); | 692 | ncp_add_byte(server, NW_NS_NFS); |
@@ -690,8 +699,8 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent, | |||
690 | ncp_add_dword_lh(server, 1); /* nlinks */ | 699 | ncp_add_dword_lh(server, 1); /* nlinks */ |
691 | ncp_add_dword_lh(server, rdev); | 700 | ncp_add_dword_lh(server, rdev); |
692 | result = ncp_request(server, 87); | 701 | result = ncp_request(server, 87); |
693 | ncp_unlock_server(server); | ||
694 | } | 702 | } |
703 | ncp_unlock_server(server); | ||
695 | return result; | 704 | return result; |
696 | } | 705 | } |
697 | #endif | 706 | #endif |
@@ -700,7 +709,7 @@ int ncp_modify_nfs_info(struct ncp_server *server, __u8 volnum, __le32 dirent, | |||
700 | static int | 709 | static int |
701 | ncp_DeleteNSEntry(struct ncp_server *server, | 710 | ncp_DeleteNSEntry(struct ncp_server *server, |
702 | __u8 have_dir_base, __u8 volnum, __le32 dirent, | 711 | __u8 have_dir_base, __u8 volnum, __le32 dirent, |
703 | char* name, __u8 ns, __le16 attr) | 712 | const char* name, __u8 ns, __le16 attr) |
704 | { | 713 | { |
705 | int result; | 714 | int result; |
706 | 715 | ||
@@ -734,23 +743,25 @@ ncp_del_file_or_subdir2(struct ncp_server *server, | |||
734 | 743 | ||
735 | int | 744 | int |
736 | ncp_del_file_or_subdir(struct ncp_server *server, | 745 | ncp_del_file_or_subdir(struct ncp_server *server, |
737 | struct inode *dir, char *name) | 746 | struct inode *dir, const char *name) |
738 | { | 747 | { |
739 | __u8 volnum = NCP_FINFO(dir)->volNumber; | 748 | __u8 volnum = NCP_FINFO(dir)->volNumber; |
740 | __le32 dirent = NCP_FINFO(dir)->dirEntNum; | 749 | __le32 dirent = NCP_FINFO(dir)->dirEntNum; |
750 | int name_space; | ||
741 | 751 | ||
752 | name_space = server->name_space[volnum]; | ||
742 | #ifdef CONFIG_NCPFS_NFS_NS | 753 | #ifdef CONFIG_NCPFS_NFS_NS |
743 | if (server->name_space[volnum]==NW_NS_NFS) | 754 | if (name_space == NW_NS_NFS) |
744 | { | 755 | { |
745 | int result; | 756 | int result; |
746 | 757 | ||
747 | result=ncp_obtain_DOS_dir_base(server, volnum, dirent, name, &dirent); | 758 | result=ncp_obtain_DOS_dir_base(server, name_space, volnum, dirent, name, &dirent); |
748 | if (result) return result; | 759 | if (result) return result; |
749 | return ncp_DeleteNSEntry(server, 1, volnum, dirent, NULL, NW_NS_DOS, cpu_to_le16(0x8006)); | 760 | name = NULL; |
761 | name_space = NW_NS_DOS; | ||
750 | } | 762 | } |
751 | else | ||
752 | #endif /* CONFIG_NCPFS_NFS_NS */ | 763 | #endif /* CONFIG_NCPFS_NFS_NS */ |
753 | return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, server->name_space[volnum], cpu_to_le16(0x8006)); | 764 | return ncp_DeleteNSEntry(server, 1, volnum, dirent, name, name_space, cpu_to_le16(0x8006)); |
754 | } | 765 | } |
755 | 766 | ||
756 | static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6]) | 767 | static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6]) |
@@ -765,7 +776,7 @@ static inline void ConvertToNWfromDWORD(__u16 v0, __u16 v1, __u8 ret[6]) | |||
765 | /* If both dir and name are NULL, then in target there's already a | 776 | /* If both dir and name are NULL, then in target there's already a |
766 | looked-up entry that wants to be opened. */ | 777 | looked-up entry that wants to be opened. */ |
767 | int ncp_open_create_file_or_subdir(struct ncp_server *server, | 778 | int ncp_open_create_file_or_subdir(struct ncp_server *server, |
768 | struct inode *dir, char *name, | 779 | struct inode *dir, const char *name, |
769 | int open_create_mode, | 780 | int open_create_mode, |
770 | __le32 create_attributes, | 781 | __le32 create_attributes, |
771 | __le16 desired_acc_rights, | 782 | __le16 desired_acc_rights, |
@@ -890,8 +901,8 @@ int ncp_search_for_fileset(struct ncp_server *server, | |||
890 | 901 | ||
891 | static int | 902 | static int |
892 | ncp_RenameNSEntry(struct ncp_server *server, | 903 | ncp_RenameNSEntry(struct ncp_server *server, |
893 | struct inode *old_dir, char *old_name, __le16 old_type, | 904 | struct inode *old_dir, const char *old_name, __le16 old_type, |
894 | struct inode *new_dir, char *new_name) | 905 | struct inode *new_dir, const char *new_name) |
895 | { | 906 | { |
896 | int result = -EINVAL; | 907 | int result = -EINVAL; |
897 | 908 | ||
@@ -929,8 +940,8 @@ out: | |||
929 | } | 940 | } |
930 | 941 | ||
931 | int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, | 942 | int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, |
932 | struct inode *old_dir, char *old_name, | 943 | struct inode *old_dir, const char *old_name, |
933 | struct inode *new_dir, char *new_name) | 944 | struct inode *new_dir, const char *new_name) |
934 | { | 945 | { |
935 | int result; | 946 | int result; |
936 | __le16 old_type = cpu_to_le16(0x06); | 947 | __le16 old_type = cpu_to_le16(0x06); |
@@ -958,7 +969,7 @@ int | |||
958 | ncp_read_kernel(struct ncp_server *server, const char *file_id, | 969 | ncp_read_kernel(struct ncp_server *server, const char *file_id, |
959 | __u32 offset, __u16 to_read, char *target, int *bytes_read) | 970 | __u32 offset, __u16 to_read, char *target, int *bytes_read) |
960 | { | 971 | { |
961 | char *source; | 972 | const char *source; |
962 | int result; | 973 | int result; |
963 | 974 | ||
964 | ncp_init_request(server); | 975 | ncp_init_request(server); |
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h index 2441d1ab57dc..3c57eca634ce 100644 --- a/fs/ncpfs/ncplib_kernel.h +++ b/fs/ncpfs/ncplib_kernel.h | |||
@@ -65,10 +65,11 @@ static inline void ncp_inode_close(struct inode *inode) { | |||
65 | atomic_dec(&NCP_FINFO(inode)->opened); | 65 | atomic_dec(&NCP_FINFO(inode)->opened); |
66 | } | 66 | } |
67 | 67 | ||
68 | void ncp_extract_file_info(void* src, struct nw_info_struct* target); | 68 | void ncp_extract_file_info(const void* src, struct nw_info_struct* target); |
69 | int ncp_obtain_info(struct ncp_server *server, struct inode *, char *, | 69 | int ncp_obtain_info(struct ncp_server *server, struct inode *, const char *, |
70 | struct nw_info_struct *target); | 70 | struct nw_info_struct *target); |
71 | int ncp_obtain_nfs_info(struct ncp_server *server, struct nw_info_struct *target); | 71 | int ncp_obtain_nfs_info(struct ncp_server *server, struct nw_info_struct *target); |
72 | int ncp_update_known_namespace(struct ncp_server *server, __u8 volume, int *ret_ns); | ||
72 | int ncp_get_volume_root(struct ncp_server *server, const char *volname, | 73 | int ncp_get_volume_root(struct ncp_server *server, const char *volname, |
73 | __u32 *volume, __le32 *dirent, __le32 *dosdirent); | 74 | __u32 *volume, __le32 *dirent, __le32 *dosdirent); |
74 | int ncp_lookup_volume(struct ncp_server *, const char *, struct nw_info_struct *); | 75 | int ncp_lookup_volume(struct ncp_server *, const char *, struct nw_info_struct *); |
@@ -80,8 +81,8 @@ int ncp_modify_nfs_info(struct ncp_server *, __u8 volnum, __le32 dirent, | |||
80 | __u32 mode, __u32 rdev); | 81 | __u32 mode, __u32 rdev); |
81 | 82 | ||
82 | int ncp_del_file_or_subdir2(struct ncp_server *, struct dentry*); | 83 | int ncp_del_file_or_subdir2(struct ncp_server *, struct dentry*); |
83 | int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, char *); | 84 | int ncp_del_file_or_subdir(struct ncp_server *, struct inode *, const char *); |
84 | int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, char *, | 85 | int ncp_open_create_file_or_subdir(struct ncp_server *, struct inode *, const char *, |
85 | int, __le32, __le16, struct ncp_entry_info *); | 86 | int, __le32, __le16, struct ncp_entry_info *); |
86 | 87 | ||
87 | int ncp_initialize_search(struct ncp_server *, struct inode *, | 88 | int ncp_initialize_search(struct ncp_server *, struct inode *, |
@@ -93,7 +94,7 @@ int ncp_search_for_fileset(struct ncp_server *server, | |||
93 | char** rbuf, size_t* rsize); | 94 | char** rbuf, size_t* rsize); |
94 | 95 | ||
95 | int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, | 96 | int ncp_ren_or_mov_file_or_subdir(struct ncp_server *server, |
96 | struct inode *, char *, struct inode *, char *); | 97 | struct inode *, const char *, struct inode *, const char *); |
97 | 98 | ||
98 | 99 | ||
99 | int | 100 | int |
@@ -170,13 +171,13 @@ static inline int ncp_strnicmp(struct nls_table *t, const unsigned char *s1, | |||
170 | #endif /* CONFIG_NCPFS_NLS */ | 171 | #endif /* CONFIG_NCPFS_NLS */ |
171 | 172 | ||
172 | #define NCP_GET_AGE(dentry) (jiffies - (dentry)->d_time) | 173 | #define NCP_GET_AGE(dentry) (jiffies - (dentry)->d_time) |
173 | #define NCP_MAX_AGE(server) ((server)->dentry_ttl) | 174 | #define NCP_MAX_AGE(server) atomic_read(&(server)->dentry_ttl) |
174 | #define NCP_TEST_AGE(server,dentry) (NCP_GET_AGE(dentry) < NCP_MAX_AGE(server)) | 175 | #define NCP_TEST_AGE(server,dentry) (NCP_GET_AGE(dentry) < NCP_MAX_AGE(server)) |
175 | 176 | ||
176 | static inline void | 177 | static inline void |
177 | ncp_age_dentry(struct ncp_server* server, struct dentry* dentry) | 178 | ncp_age_dentry(struct ncp_server* server, struct dentry* dentry) |
178 | { | 179 | { |
179 | dentry->d_time = jiffies - server->dentry_ttl; | 180 | dentry->d_time = jiffies - NCP_MAX_AGE(server); |
180 | } | 181 | } |
181 | 182 | ||
182 | static inline void | 183 | static inline void |
diff --git a/fs/ncpfs/ncpsign_kernel.c b/fs/ncpfs/ncpsign_kernel.c index 7c0b5c21e6cf..d8b2d7e6910b 100644 --- a/fs/ncpfs/ncpsign_kernel.c +++ b/fs/ncpfs/ncpsign_kernel.c | |||
@@ -15,21 +15,21 @@ | |||
15 | 15 | ||
16 | /* i386: 32-bit, little endian, handles mis-alignment */ | 16 | /* i386: 32-bit, little endian, handles mis-alignment */ |
17 | #ifdef __i386__ | 17 | #ifdef __i386__ |
18 | #define GET_LE32(p) (*(int *)(p)) | 18 | #define GET_LE32(p) (*(const int *)(p)) |
19 | #define PUT_LE32(p,v) { *(int *)(p)=v; } | 19 | #define PUT_LE32(p,v) { *(int *)(p)=v; } |
20 | #else | 20 | #else |
21 | /* from include/ncplib.h */ | 21 | /* from include/ncplib.h */ |
22 | #define BVAL(buf,pos) (((__u8 *)(buf))[pos]) | 22 | #define BVAL(buf,pos) (((const __u8 *)(buf))[pos]) |
23 | #define PVAL(buf,pos) ((unsigned)BVAL(buf,pos)) | 23 | #define PVAL(buf,pos) ((unsigned)BVAL(buf,pos)) |
24 | #define BSET(buf,pos,val) (BVAL(buf,pos) = (val)) | 24 | #define BSET(buf,pos,val) (((__u8 *)(buf))[pos] = (val)) |
25 | 25 | ||
26 | static inline __u16 | 26 | static inline __u16 |
27 | WVAL_LH(__u8 * buf, int pos) | 27 | WVAL_LH(const __u8 * buf, int pos) |
28 | { | 28 | { |
29 | return PVAL(buf, pos) | PVAL(buf, pos + 1) << 8; | 29 | return PVAL(buf, pos) | PVAL(buf, pos + 1) << 8; |
30 | } | 30 | } |
31 | static inline __u32 | 31 | static inline __u32 |
32 | DVAL_LH(__u8 * buf, int pos) | 32 | DVAL_LH(const __u8 * buf, int pos) |
33 | { | 33 | { |
34 | return WVAL_LH(buf, pos) | WVAL_LH(buf, pos + 2) << 16; | 34 | return WVAL_LH(buf, pos) | WVAL_LH(buf, pos + 2) << 16; |
35 | } | 35 | } |
diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index c7ff6c700a6e..668bd267346e 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c | |||
@@ -746,7 +746,6 @@ static int ncp_do_request(struct ncp_server *server, int size, | |||
746 | return -EIO; | 746 | return -EIO; |
747 | } | 747 | } |
748 | if (!ncp_conn_valid(server)) { | 748 | if (!ncp_conn_valid(server)) { |
749 | printk(KERN_ERR "ncpfs: Connection invalid!\n"); | ||
750 | return -EIO; | 749 | return -EIO; |
751 | } | 750 | } |
752 | { | 751 | { |
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index f7e13db613cb..ba306658a6db 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -76,13 +76,17 @@ config NFS_V4 | |||
76 | 76 | ||
77 | config NFS_V4_1 | 77 | config NFS_V4_1 |
78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" | 78 | bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" |
79 | depends on NFS_V4 && EXPERIMENTAL | 79 | depends on NFS_FS && NFS_V4 && EXPERIMENTAL |
80 | select PNFS_FILE_LAYOUT | ||
80 | help | 81 | help |
81 | This option enables support for minor version 1 of the NFSv4 protocol | 82 | This option enables support for minor version 1 of the NFSv4 protocol |
82 | (draft-ietf-nfsv4-minorversion1) in the kernel's NFS client. | 83 | (RFC 5661) in the kernel's NFS client. |
83 | 84 | ||
84 | If unsure, say N. | 85 | If unsure, say N. |
85 | 86 | ||
87 | config PNFS_FILE_LAYOUT | ||
88 | tristate | ||
89 | |||
86 | config ROOT_NFS | 90 | config ROOT_NFS |
87 | bool "Root file system on NFS" | 91 | bool "Root file system on NFS" |
88 | depends on NFS_FS=y && IP_PNP | 92 | depends on NFS_FS=y && IP_PNP |
@@ -117,3 +121,14 @@ config NFS_USE_KERNEL_DNS | |||
117 | select DNS_RESOLVER | 121 | select DNS_RESOLVER |
118 | select KEYS | 122 | select KEYS |
119 | default y | 123 | default y |
124 | |||
125 | config NFS_USE_NEW_IDMAPPER | ||
126 | bool "Use the new idmapper upcall routine" | ||
127 | depends on NFS_V4 && KEYS | ||
128 | help | ||
129 | Say Y here if you want NFS to use the new idmapper upcall functions. | ||
130 | You will need /sbin/request-key (usually provided by the keyutils | ||
131 | package). For details, read | ||
132 | <file:Documentation/filesystems/nfs/idmapper.txt>. | ||
133 | |||
134 | If you are unsure, say N. | ||
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index da7fda639eac..4776ff9e3814 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -15,5 +15,9 @@ nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ | |||
15 | delegation.o idmap.o \ | 15 | delegation.o idmap.o \ |
16 | callback.o callback_xdr.o callback_proc.o \ | 16 | callback.o callback_xdr.o callback_proc.o \ |
17 | nfs4namespace.o | 17 | nfs4namespace.o |
18 | nfs-$(CONFIG_NFS_V4_1) += pnfs.o | ||
18 | nfs-$(CONFIG_SYSCTL) += sysctl.o | 19 | nfs-$(CONFIG_SYSCTL) += sysctl.o |
19 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o | 20 | nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o |
21 | |||
22 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o | ||
23 | nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o | ||
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index e17b49e2eabd..aeec017fe814 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
@@ -109,7 +109,7 @@ nfs4_callback_up(struct svc_serv *serv) | |||
109 | { | 109 | { |
110 | int ret; | 110 | int ret; |
111 | 111 | ||
112 | ret = svc_create_xprt(serv, "tcp", PF_INET, | 112 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET, |
113 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | 113 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); |
114 | if (ret <= 0) | 114 | if (ret <= 0) |
115 | goto out_err; | 115 | goto out_err; |
@@ -117,7 +117,7 @@ nfs4_callback_up(struct svc_serv *serv) | |||
117 | dprintk("NFS: Callback listener port = %u (af %u)\n", | 117 | dprintk("NFS: Callback listener port = %u (af %u)\n", |
118 | nfs_callback_tcpport, PF_INET); | 118 | nfs_callback_tcpport, PF_INET); |
119 | 119 | ||
120 | ret = svc_create_xprt(serv, "tcp", PF_INET6, | 120 | ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6, |
121 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); | 121 | nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS); |
122 | if (ret > 0) { | 122 | if (ret > 0) { |
123 | nfs_callback_tcpport6 = ret; | 123 | nfs_callback_tcpport6 = ret; |
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 930d10fecdaf..2950fca0c61b 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -118,11 +118,11 @@ int nfs41_validate_delegation_stateid(struct nfs_delegation *delegation, const n | |||
118 | if (delegation == NULL) | 118 | if (delegation == NULL) |
119 | return 0; | 119 | return 0; |
120 | 120 | ||
121 | /* seqid is 4-bytes long */ | 121 | if (stateid->stateid.seqid != 0) |
122 | if (((u32 *) &stateid->data)[0] != 0) | ||
123 | return 0; | 122 | return 0; |
124 | if (memcmp(&delegation->stateid.data[4], &stateid->data[4], | 123 | if (memcmp(&delegation->stateid.stateid.other, |
125 | sizeof(stateid->data)-4)) | 124 | &stateid->stateid.other, |
125 | NFS4_STATEID_OTHER_SIZE)) | ||
126 | return 0; | 126 | return 0; |
127 | 127 | ||
128 | return 1; | 128 | return 1; |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e7340729af89..0870d0d4efc0 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "iostat.h" | 48 | #include "iostat.h" |
49 | #include "internal.h" | 49 | #include "internal.h" |
50 | #include "fscache.h" | 50 | #include "fscache.h" |
51 | #include "pnfs.h" | ||
51 | 52 | ||
52 | #define NFSDBG_FACILITY NFSDBG_CLIENT | 53 | #define NFSDBG_FACILITY NFSDBG_CLIENT |
53 | 54 | ||
@@ -155,7 +156,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ | |||
155 | cred = rpc_lookup_machine_cred(); | 156 | cred = rpc_lookup_machine_cred(); |
156 | if (!IS_ERR(cred)) | 157 | if (!IS_ERR(cred)) |
157 | clp->cl_machine_cred = cred; | 158 | clp->cl_machine_cred = cred; |
158 | 159 | #if defined(CONFIG_NFS_V4_1) | |
160 | INIT_LIST_HEAD(&clp->cl_layouts); | ||
161 | #endif | ||
159 | nfs_fscache_get_client_cookie(clp); | 162 | nfs_fscache_get_client_cookie(clp); |
160 | 163 | ||
161 | return clp; | 164 | return clp; |
@@ -252,6 +255,7 @@ void nfs_put_client(struct nfs_client *clp) | |||
252 | nfs_free_client(clp); | 255 | nfs_free_client(clp); |
253 | } | 256 | } |
254 | } | 257 | } |
258 | EXPORT_SYMBOL_GPL(nfs_put_client); | ||
255 | 259 | ||
256 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 260 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
257 | /* | 261 | /* |
@@ -601,6 +605,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp, | |||
601 | { | 605 | { |
602 | struct rpc_clnt *clnt = NULL; | 606 | struct rpc_clnt *clnt = NULL; |
603 | struct rpc_create_args args = { | 607 | struct rpc_create_args args = { |
608 | .net = &init_net, | ||
604 | .protocol = clp->cl_proto, | 609 | .protocol = clp->cl_proto, |
605 | .address = (struct sockaddr *)&clp->cl_addr, | 610 | .address = (struct sockaddr *)&clp->cl_addr, |
606 | .addrsize = clp->cl_addrlen, | 611 | .addrsize = clp->cl_addrlen, |
@@ -635,7 +640,8 @@ static int nfs_create_rpc_client(struct nfs_client *clp, | |||
635 | */ | 640 | */ |
636 | static void nfs_destroy_server(struct nfs_server *server) | 641 | static void nfs_destroy_server(struct nfs_server *server) |
637 | { | 642 | { |
638 | if (!(server->flags & NFS_MOUNT_NONLM)) | 643 | if (!(server->flags & NFS_MOUNT_LOCAL_FLOCK) || |
644 | !(server->flags & NFS_MOUNT_LOCAL_FCNTL)) | ||
639 | nlmclnt_done(server->nlm_host); | 645 | nlmclnt_done(server->nlm_host); |
640 | } | 646 | } |
641 | 647 | ||
@@ -657,7 +663,8 @@ static int nfs_start_lockd(struct nfs_server *server) | |||
657 | 663 | ||
658 | if (nlm_init.nfs_version > 3) | 664 | if (nlm_init.nfs_version > 3) |
659 | return 0; | 665 | return 0; |
660 | if (server->flags & NFS_MOUNT_NONLM) | 666 | if ((server->flags & NFS_MOUNT_LOCAL_FLOCK) && |
667 | (server->flags & NFS_MOUNT_LOCAL_FCNTL)) | ||
661 | return 0; | 668 | return 0; |
662 | 669 | ||
663 | switch (clp->cl_proto) { | 670 | switch (clp->cl_proto) { |
@@ -898,11 +905,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
898 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) | 905 | if (server->wsize > NFS_MAX_FILE_IO_SIZE) |
899 | server->wsize = NFS_MAX_FILE_IO_SIZE; | 906 | server->wsize = NFS_MAX_FILE_IO_SIZE; |
900 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 907 | server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; |
908 | set_pnfs_layoutdriver(server, fsinfo->layouttype); | ||
909 | |||
901 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); | 910 | server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); |
902 | 911 | ||
903 | server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); | 912 | server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); |
904 | if (server->dtsize > PAGE_CACHE_SIZE) | 913 | if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES) |
905 | server->dtsize = PAGE_CACHE_SIZE; | 914 | server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES; |
906 | if (server->dtsize > server->rsize) | 915 | if (server->dtsize > server->rsize) |
907 | server->dtsize = server->rsize; | 916 | server->dtsize = server->rsize; |
908 | 917 | ||
@@ -913,6 +922,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, struct nfs_fsinfo * | |||
913 | 922 | ||
914 | server->maxfilesize = fsinfo->maxfilesize; | 923 | server->maxfilesize = fsinfo->maxfilesize; |
915 | 924 | ||
925 | server->time_delta = fsinfo->time_delta; | ||
926 | |||
916 | /* We're airborne Set socket buffersize */ | 927 | /* We're airborne Set socket buffersize */ |
917 | rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); | 928 | rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); |
918 | } | 929 | } |
@@ -935,6 +946,7 @@ static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, str | |||
935 | } | 946 | } |
936 | 947 | ||
937 | fsinfo.fattr = fattr; | 948 | fsinfo.fattr = fattr; |
949 | fsinfo.layouttype = 0; | ||
938 | error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); | 950 | error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); |
939 | if (error < 0) | 951 | if (error < 0) |
940 | goto out_error; | 952 | goto out_error; |
@@ -1017,6 +1029,7 @@ void nfs_free_server(struct nfs_server *server) | |||
1017 | { | 1029 | { |
1018 | dprintk("--> nfs_free_server()\n"); | 1030 | dprintk("--> nfs_free_server()\n"); |
1019 | 1031 | ||
1032 | unset_pnfs_layoutdriver(server); | ||
1020 | spin_lock(&nfs_client_lock); | 1033 | spin_lock(&nfs_client_lock); |
1021 | list_del(&server->client_link); | 1034 | list_del(&server->client_link); |
1022 | list_del(&server->master_link); | 1035 | list_del(&server->master_link); |
@@ -1356,8 +1369,9 @@ static int nfs4_init_server(struct nfs_server *server, | |||
1356 | 1369 | ||
1357 | /* Initialise the client representation from the mount data */ | 1370 | /* Initialise the client representation from the mount data */ |
1358 | server->flags = data->flags; | 1371 | server->flags = data->flags; |
1359 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR| | 1372 | server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK; |
1360 | NFS_CAP_POSIX_LOCK; | 1373 | if (!(data->flags & NFS_MOUNT_NORDIRPLUS)) |
1374 | server->caps |= NFS_CAP_READDIRPLUS; | ||
1361 | server->options = data->options; | 1375 | server->options = data->options; |
1362 | 1376 | ||
1363 | /* Get a client record */ | 1377 | /* Get a client record */ |
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index b9c3c43cea1d..232a7eead33a 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c | |||
@@ -71,20 +71,20 @@ static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_ | |||
71 | if (inode->i_flock == NULL) | 71 | if (inode->i_flock == NULL) |
72 | goto out; | 72 | goto out; |
73 | 73 | ||
74 | /* Protect inode->i_flock using the BKL */ | 74 | /* Protect inode->i_flock using the file locks lock */ |
75 | lock_kernel(); | 75 | lock_flocks(); |
76 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 76 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
77 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 77 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
78 | continue; | 78 | continue; |
79 | if (nfs_file_open_context(fl->fl_file) != ctx) | 79 | if (nfs_file_open_context(fl->fl_file) != ctx) |
80 | continue; | 80 | continue; |
81 | unlock_kernel(); | 81 | unlock_flocks(); |
82 | status = nfs4_lock_delegation_recall(state, fl); | 82 | status = nfs4_lock_delegation_recall(state, fl); |
83 | if (status < 0) | 83 | if (status < 0) |
84 | goto out; | 84 | goto out; |
85 | lock_kernel(); | 85 | lock_flocks(); |
86 | } | 86 | } |
87 | unlock_kernel(); | 87 | unlock_flocks(); |
88 | out: | 88 | out: |
89 | return status; | 89 | return status; |
90 | } | 90 | } |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e257172d438c..07ac3847e562 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -33,11 +33,12 @@ | |||
33 | #include <linux/namei.h> | 33 | #include <linux/namei.h> |
34 | #include <linux/mount.h> | 34 | #include <linux/mount.h> |
35 | #include <linux/sched.h> | 35 | #include <linux/sched.h> |
36 | #include <linux/vmalloc.h> | ||
36 | 37 | ||
37 | #include "nfs4_fs.h" | ||
38 | #include "delegation.h" | 38 | #include "delegation.h" |
39 | #include "iostat.h" | 39 | #include "iostat.h" |
40 | #include "internal.h" | 40 | #include "internal.h" |
41 | #include "fscache.h" | ||
41 | 42 | ||
42 | /* #define NFS_DEBUG_VERBOSE 1 */ | 43 | /* #define NFS_DEBUG_VERBOSE 1 */ |
43 | 44 | ||
@@ -55,6 +56,7 @@ static int nfs_rename(struct inode *, struct dentry *, | |||
55 | struct inode *, struct dentry *); | 56 | struct inode *, struct dentry *); |
56 | static int nfs_fsync_dir(struct file *, int); | 57 | static int nfs_fsync_dir(struct file *, int); |
57 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 58 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
59 | static int nfs_readdir_clear_array(struct page*, gfp_t); | ||
58 | 60 | ||
59 | const struct file_operations nfs_dir_operations = { | 61 | const struct file_operations nfs_dir_operations = { |
60 | .llseek = nfs_llseek_dir, | 62 | .llseek = nfs_llseek_dir, |
@@ -80,6 +82,10 @@ const struct inode_operations nfs_dir_inode_operations = { | |||
80 | .setattr = nfs_setattr, | 82 | .setattr = nfs_setattr, |
81 | }; | 83 | }; |
82 | 84 | ||
85 | const struct address_space_operations nfs_dir_addr_space_ops = { | ||
86 | .releasepage = nfs_readdir_clear_array, | ||
87 | }; | ||
88 | |||
83 | #ifdef CONFIG_NFS_V3 | 89 | #ifdef CONFIG_NFS_V3 |
84 | const struct inode_operations nfs3_dir_inode_operations = { | 90 | const struct inode_operations nfs3_dir_inode_operations = { |
85 | .create = nfs_create, | 91 | .create = nfs_create, |
@@ -104,8 +110,9 @@ const struct inode_operations nfs3_dir_inode_operations = { | |||
104 | #ifdef CONFIG_NFS_V4 | 110 | #ifdef CONFIG_NFS_V4 |
105 | 111 | ||
106 | static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); | 112 | static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); |
113 | static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, struct nameidata *nd); | ||
107 | const struct inode_operations nfs4_dir_inode_operations = { | 114 | const struct inode_operations nfs4_dir_inode_operations = { |
108 | .create = nfs_create, | 115 | .create = nfs_open_create, |
109 | .lookup = nfs_atomic_lookup, | 116 | .lookup = nfs_atomic_lookup, |
110 | .link = nfs_link, | 117 | .link = nfs_link, |
111 | .unlink = nfs_unlink, | 118 | .unlink = nfs_unlink, |
@@ -150,51 +157,197 @@ nfs_opendir(struct inode *inode, struct file *filp) | |||
150 | return res; | 157 | return res; |
151 | } | 158 | } |
152 | 159 | ||
153 | typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int); | 160 | struct nfs_cache_array_entry { |
161 | u64 cookie; | ||
162 | u64 ino; | ||
163 | struct qstr string; | ||
164 | }; | ||
165 | |||
166 | struct nfs_cache_array { | ||
167 | unsigned int size; | ||
168 | int eof_index; | ||
169 | u64 last_cookie; | ||
170 | struct nfs_cache_array_entry array[0]; | ||
171 | }; | ||
172 | |||
173 | #define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry)) | ||
174 | |||
175 | typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); | ||
154 | typedef struct { | 176 | typedef struct { |
155 | struct file *file; | 177 | struct file *file; |
156 | struct page *page; | 178 | struct page *page; |
157 | unsigned long page_index; | 179 | unsigned long page_index; |
158 | __be32 *ptr; | ||
159 | u64 *dir_cookie; | 180 | u64 *dir_cookie; |
160 | loff_t current_index; | 181 | loff_t current_index; |
161 | struct nfs_entry *entry; | ||
162 | decode_dirent_t decode; | 182 | decode_dirent_t decode; |
163 | int plus; | 183 | |
164 | unsigned long timestamp; | 184 | unsigned long timestamp; |
165 | unsigned long gencount; | 185 | unsigned long gencount; |
166 | int timestamp_valid; | 186 | unsigned int cache_entry_index; |
187 | unsigned int plus:1; | ||
188 | unsigned int eof:1; | ||
167 | } nfs_readdir_descriptor_t; | 189 | } nfs_readdir_descriptor_t; |
168 | 190 | ||
169 | /* Now we cache directories properly, by stuffing the dirent | 191 | /* |
170 | * data directly in the page cache. | 192 | * The caller is responsible for calling nfs_readdir_release_array(page) |
171 | * | ||
172 | * Inode invalidation due to refresh etc. takes care of | ||
173 | * _everything_, no sloppy entry flushing logic, no extraneous | ||
174 | * copying, network direct to page cache, the way it was meant | ||
175 | * to be. | ||
176 | * | ||
177 | * NOTE: Dirent information verification is done always by the | ||
178 | * page-in of the RPC reply, nowhere else, this simplies | ||
179 | * things substantially. | ||
180 | */ | 193 | */ |
181 | static | 194 | static |
182 | int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | 195 | struct nfs_cache_array *nfs_readdir_get_array(struct page *page) |
196 | { | ||
197 | if (page == NULL) | ||
198 | return ERR_PTR(-EIO); | ||
199 | return (struct nfs_cache_array *)kmap(page); | ||
200 | } | ||
201 | |||
202 | static | ||
203 | void nfs_readdir_release_array(struct page *page) | ||
204 | { | ||
205 | kunmap(page); | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * we are freeing strings created by nfs_add_to_readdir_array() | ||
210 | */ | ||
211 | static | ||
212 | int nfs_readdir_clear_array(struct page *page, gfp_t mask) | ||
213 | { | ||
214 | struct nfs_cache_array *array = nfs_readdir_get_array(page); | ||
215 | int i; | ||
216 | for (i = 0; i < array->size; i++) | ||
217 | kfree(array->array[i].string.name); | ||
218 | nfs_readdir_release_array(page); | ||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | /* | ||
223 | * the caller is responsible for freeing qstr.name | ||
224 | * when called by nfs_readdir_add_to_array, the strings will be freed in | ||
225 | * nfs_clear_readdir_array() | ||
226 | */ | ||
227 | static | ||
228 | int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) | ||
229 | { | ||
230 | string->len = len; | ||
231 | string->name = kmemdup(name, len, GFP_KERNEL); | ||
232 | if (string->name == NULL) | ||
233 | return -ENOMEM; | ||
234 | string->hash = full_name_hash(name, len); | ||
235 | return 0; | ||
236 | } | ||
237 | |||
238 | static | ||
239 | int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) | ||
240 | { | ||
241 | struct nfs_cache_array *array = nfs_readdir_get_array(page); | ||
242 | struct nfs_cache_array_entry *cache_entry; | ||
243 | int ret; | ||
244 | |||
245 | if (IS_ERR(array)) | ||
246 | return PTR_ERR(array); | ||
247 | ret = -EIO; | ||
248 | if (array->size >= MAX_READDIR_ARRAY) | ||
249 | goto out; | ||
250 | |||
251 | cache_entry = &array->array[array->size]; | ||
252 | cache_entry->cookie = entry->prev_cookie; | ||
253 | cache_entry->ino = entry->ino; | ||
254 | ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); | ||
255 | if (ret) | ||
256 | goto out; | ||
257 | array->last_cookie = entry->cookie; | ||
258 | if (entry->eof == 1) | ||
259 | array->eof_index = array->size; | ||
260 | array->size++; | ||
261 | out: | ||
262 | nfs_readdir_release_array(page); | ||
263 | return ret; | ||
264 | } | ||
265 | |||
266 | static | ||
267 | int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | ||
268 | { | ||
269 | loff_t diff = desc->file->f_pos - desc->current_index; | ||
270 | unsigned int index; | ||
271 | |||
272 | if (diff < 0) | ||
273 | goto out_eof; | ||
274 | if (diff >= array->size) { | ||
275 | if (array->eof_index > 0) | ||
276 | goto out_eof; | ||
277 | desc->current_index += array->size; | ||
278 | return -EAGAIN; | ||
279 | } | ||
280 | |||
281 | index = (unsigned int)diff; | ||
282 | *desc->dir_cookie = array->array[index].cookie; | ||
283 | desc->cache_entry_index = index; | ||
284 | if (index == array->eof_index) | ||
285 | desc->eof = 1; | ||
286 | return 0; | ||
287 | out_eof: | ||
288 | desc->eof = 1; | ||
289 | return -EBADCOOKIE; | ||
290 | } | ||
291 | |||
292 | static | ||
293 | int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | ||
294 | { | ||
295 | int i; | ||
296 | int status = -EAGAIN; | ||
297 | |||
298 | for (i = 0; i < array->size; i++) { | ||
299 | if (i == array->eof_index) { | ||
300 | desc->eof = 1; | ||
301 | status = -EBADCOOKIE; | ||
302 | } | ||
303 | if (array->array[i].cookie == *desc->dir_cookie) { | ||
304 | desc->cache_entry_index = i; | ||
305 | status = 0; | ||
306 | break; | ||
307 | } | ||
308 | } | ||
309 | |||
310 | return status; | ||
311 | } | ||
312 | |||
313 | static | ||
314 | int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) | ||
315 | { | ||
316 | struct nfs_cache_array *array; | ||
317 | int status = -EBADCOOKIE; | ||
318 | |||
319 | if (desc->dir_cookie == NULL) | ||
320 | goto out; | ||
321 | |||
322 | array = nfs_readdir_get_array(desc->page); | ||
323 | if (IS_ERR(array)) { | ||
324 | status = PTR_ERR(array); | ||
325 | goto out; | ||
326 | } | ||
327 | |||
328 | if (*desc->dir_cookie == 0) | ||
329 | status = nfs_readdir_search_for_pos(array, desc); | ||
330 | else | ||
331 | status = nfs_readdir_search_for_cookie(array, desc); | ||
332 | |||
333 | nfs_readdir_release_array(desc->page); | ||
334 | out: | ||
335 | return status; | ||
336 | } | ||
337 | |||
338 | /* Fill a page with xdr information before transferring to the cache page */ | ||
339 | static | ||
340 | int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, | ||
341 | struct nfs_entry *entry, struct file *file, struct inode *inode) | ||
183 | { | 342 | { |
184 | struct file *file = desc->file; | ||
185 | struct inode *inode = file->f_path.dentry->d_inode; | ||
186 | struct rpc_cred *cred = nfs_file_cred(file); | 343 | struct rpc_cred *cred = nfs_file_cred(file); |
187 | unsigned long timestamp, gencount; | 344 | unsigned long timestamp, gencount; |
188 | int error; | 345 | int error; |
189 | 346 | ||
190 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", | ||
191 | __func__, (long long)desc->entry->cookie, | ||
192 | page->index); | ||
193 | |||
194 | again: | 347 | again: |
195 | timestamp = jiffies; | 348 | timestamp = jiffies; |
196 | gencount = nfs_inc_attr_generation_counter(); | 349 | gencount = nfs_inc_attr_generation_counter(); |
197 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, | 350 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages, |
198 | NFS_SERVER(inode)->dtsize, desc->plus); | 351 | NFS_SERVER(inode)->dtsize, desc->plus); |
199 | if (error < 0) { | 352 | if (error < 0) { |
200 | /* We requested READDIRPLUS, but the server doesn't grok it */ | 353 | /* We requested READDIRPLUS, but the server doesn't grok it */ |
@@ -208,190 +361,292 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
208 | } | 361 | } |
209 | desc->timestamp = timestamp; | 362 | desc->timestamp = timestamp; |
210 | desc->gencount = gencount; | 363 | desc->gencount = gencount; |
211 | desc->timestamp_valid = 1; | 364 | error: |
212 | SetPageUptodate(page); | 365 | return error; |
213 | /* Ensure consistent page alignment of the data. | ||
214 | * Note: assumes we have exclusive access to this mapping either | ||
215 | * through inode->i_mutex or some other mechanism. | ||
216 | */ | ||
217 | if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { | ||
218 | /* Should never happen */ | ||
219 | nfs_zap_mapping(inode, inode->i_mapping); | ||
220 | } | ||
221 | unlock_page(page); | ||
222 | return 0; | ||
223 | error: | ||
224 | unlock_page(page); | ||
225 | return -EIO; | ||
226 | } | 366 | } |
227 | 367 | ||
228 | static inline | 368 | /* Fill in an entry based on the xdr code stored in desc->page */ |
229 | int dir_decode(nfs_readdir_descriptor_t *desc) | 369 | static |
370 | int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct xdr_stream *stream) | ||
230 | { | 371 | { |
231 | __be32 *p = desc->ptr; | 372 | __be32 *p = desc->decode(stream, entry, NFS_SERVER(desc->file->f_path.dentry->d_inode), desc->plus); |
232 | p = desc->decode(p, desc->entry, desc->plus); | ||
233 | if (IS_ERR(p)) | 373 | if (IS_ERR(p)) |
234 | return PTR_ERR(p); | 374 | return PTR_ERR(p); |
235 | desc->ptr = p; | 375 | |
236 | if (desc->timestamp_valid) { | 376 | entry->fattr->time_start = desc->timestamp; |
237 | desc->entry->fattr->time_start = desc->timestamp; | 377 | entry->fattr->gencount = desc->gencount; |
238 | desc->entry->fattr->gencount = desc->gencount; | ||
239 | } else | ||
240 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; | ||
241 | return 0; | 378 | return 0; |
242 | } | 379 | } |
243 | 380 | ||
244 | static inline | 381 | static |
245 | void dir_page_release(nfs_readdir_descriptor_t *desc) | 382 | int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry) |
246 | { | 383 | { |
247 | kunmap(desc->page); | 384 | struct nfs_inode *node; |
248 | page_cache_release(desc->page); | 385 | if (dentry->d_inode == NULL) |
249 | desc->page = NULL; | 386 | goto different; |
250 | desc->ptr = NULL; | 387 | node = NFS_I(dentry->d_inode); |
388 | if (node->fh.size != entry->fh->size) | ||
389 | goto different; | ||
390 | if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0) | ||
391 | goto different; | ||
392 | return 1; | ||
393 | different: | ||
394 | return 0; | ||
251 | } | 395 | } |
252 | 396 | ||
253 | /* | 397 | static |
254 | * Given a pointer to a buffer that has already been filled by a call | 398 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) |
255 | * to readdir, find the next entry with cookie '*desc->dir_cookie'. | ||
256 | * | ||
257 | * If the end of the buffer has been reached, return -EAGAIN, if not, | ||
258 | * return the offset within the buffer of the next entry to be | ||
259 | * read. | ||
260 | */ | ||
261 | static inline | ||
262 | int find_dirent(nfs_readdir_descriptor_t *desc) | ||
263 | { | 399 | { |
264 | struct nfs_entry *entry = desc->entry; | 400 | struct qstr filename = { |
265 | int loop_count = 0, | 401 | .len = entry->len, |
266 | status; | 402 | .name = entry->name, |
403 | }; | ||
404 | struct dentry *dentry; | ||
405 | struct dentry *alias; | ||
406 | struct inode *dir = parent->d_inode; | ||
407 | struct inode *inode; | ||
267 | 408 | ||
268 | while((status = dir_decode(desc)) == 0) { | 409 | if (filename.name[0] == '.') { |
269 | dfprintk(DIRCACHE, "NFS: %s: examining cookie %Lu\n", | 410 | if (filename.len == 1) |
270 | __func__, (unsigned long long)entry->cookie); | 411 | return; |
271 | if (entry->prev_cookie == *desc->dir_cookie) | 412 | if (filename.len == 2 && filename.name[1] == '.') |
272 | break; | 413 | return; |
273 | if (loop_count++ > 200) { | 414 | } |
274 | loop_count = 0; | 415 | filename.hash = full_name_hash(filename.name, filename.len); |
275 | schedule(); | 416 | |
417 | dentry = d_lookup(parent, &filename); | ||
418 | if (dentry != NULL) { | ||
419 | if (nfs_same_file(dentry, entry)) { | ||
420 | nfs_refresh_inode(dentry->d_inode, entry->fattr); | ||
421 | goto out; | ||
422 | } else { | ||
423 | d_drop(dentry); | ||
424 | dput(dentry); | ||
276 | } | 425 | } |
277 | } | 426 | } |
278 | return status; | 427 | |
428 | dentry = d_alloc(parent, &filename); | ||
429 | if (dentry == NULL) | ||
430 | return; | ||
431 | |||
432 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; | ||
433 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); | ||
434 | if (IS_ERR(inode)) | ||
435 | goto out; | ||
436 | |||
437 | alias = d_materialise_unique(dentry, inode); | ||
438 | if (IS_ERR(alias)) | ||
439 | goto out; | ||
440 | else if (alias) { | ||
441 | nfs_set_verifier(alias, nfs_save_change_attribute(dir)); | ||
442 | dput(alias); | ||
443 | } else | ||
444 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
445 | |||
446 | out: | ||
447 | dput(dentry); | ||
448 | } | ||
449 | |||
450 | /* Perform conversion from xdr to cache array */ | ||
451 | static | ||
452 | void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, | ||
453 | void *xdr_page, struct page *page, unsigned int buflen) | ||
454 | { | ||
455 | struct xdr_stream stream; | ||
456 | struct xdr_buf buf; | ||
457 | __be32 *ptr = xdr_page; | ||
458 | int status; | ||
459 | struct nfs_cache_array *array; | ||
460 | |||
461 | buf.head->iov_base = xdr_page; | ||
462 | buf.head->iov_len = buflen; | ||
463 | buf.tail->iov_len = 0; | ||
464 | buf.page_base = 0; | ||
465 | buf.page_len = 0; | ||
466 | buf.buflen = buf.head->iov_len; | ||
467 | buf.len = buf.head->iov_len; | ||
468 | |||
469 | xdr_init_decode(&stream, &buf, ptr); | ||
470 | |||
471 | |||
472 | do { | ||
473 | status = xdr_decode(desc, entry, &stream); | ||
474 | if (status != 0) | ||
475 | break; | ||
476 | |||
477 | if (nfs_readdir_add_to_array(entry, page) == -1) | ||
478 | break; | ||
479 | if (desc->plus == 1) | ||
480 | nfs_prime_dcache(desc->file->f_path.dentry, entry); | ||
481 | } while (!entry->eof); | ||
482 | |||
483 | if (status == -EBADCOOKIE && entry->eof) { | ||
484 | array = nfs_readdir_get_array(page); | ||
485 | array->eof_index = array->size - 1; | ||
486 | status = 0; | ||
487 | nfs_readdir_release_array(page); | ||
488 | } | ||
489 | } | ||
490 | |||
491 | static | ||
492 | void nfs_readdir_free_pagearray(struct page **pages, unsigned int npages) | ||
493 | { | ||
494 | unsigned int i; | ||
495 | for (i = 0; i < npages; i++) | ||
496 | put_page(pages[i]); | ||
497 | } | ||
498 | |||
499 | static | ||
500 | void nfs_readdir_free_large_page(void *ptr, struct page **pages, | ||
501 | unsigned int npages) | ||
502 | { | ||
503 | vm_unmap_ram(ptr, npages); | ||
504 | nfs_readdir_free_pagearray(pages, npages); | ||
279 | } | 505 | } |
280 | 506 | ||
281 | /* | 507 | /* |
282 | * Given a pointer to a buffer that has already been filled by a call | 508 | * nfs_readdir_large_page will allocate pages that must be freed with a call |
283 | * to readdir, find the entry at offset 'desc->file->f_pos'. | 509 | * to nfs_readdir_free_large_page |
284 | * | ||
285 | * If the end of the buffer has been reached, return -EAGAIN, if not, | ||
286 | * return the offset within the buffer of the next entry to be | ||
287 | * read. | ||
288 | */ | 510 | */ |
289 | static inline | 511 | static |
290 | int find_dirent_index(nfs_readdir_descriptor_t *desc) | 512 | void *nfs_readdir_large_page(struct page **pages, unsigned int npages) |
291 | { | 513 | { |
292 | struct nfs_entry *entry = desc->entry; | 514 | void *ptr; |
293 | int loop_count = 0, | 515 | unsigned int i; |
294 | status; | 516 | |
517 | for (i = 0; i < npages; i++) { | ||
518 | struct page *page = alloc_page(GFP_KERNEL); | ||
519 | if (page == NULL) | ||
520 | goto out_freepages; | ||
521 | pages[i] = page; | ||
522 | } | ||
295 | 523 | ||
296 | for(;;) { | 524 | ptr = vm_map_ram(pages, npages, 0, PAGE_KERNEL); |
297 | status = dir_decode(desc); | 525 | if (!IS_ERR_OR_NULL(ptr)) |
298 | if (status) | 526 | return ptr; |
299 | break; | 527 | out_freepages: |
528 | nfs_readdir_free_pagearray(pages, i); | ||
529 | return NULL; | ||
530 | } | ||
531 | |||
532 | static | ||
533 | int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) | ||
534 | { | ||
535 | struct page *pages[NFS_MAX_READDIR_PAGES]; | ||
536 | void *pages_ptr = NULL; | ||
537 | struct nfs_entry entry; | ||
538 | struct file *file = desc->file; | ||
539 | struct nfs_cache_array *array; | ||
540 | int status = 0; | ||
541 | unsigned int array_size = ARRAY_SIZE(pages); | ||
542 | |||
543 | entry.prev_cookie = 0; | ||
544 | entry.cookie = *desc->dir_cookie; | ||
545 | entry.eof = 0; | ||
546 | entry.fh = nfs_alloc_fhandle(); | ||
547 | entry.fattr = nfs_alloc_fattr(); | ||
548 | if (entry.fh == NULL || entry.fattr == NULL) | ||
549 | goto out; | ||
300 | 550 | ||
301 | dfprintk(DIRCACHE, "NFS: found cookie %Lu at index %Ld\n", | 551 | array = nfs_readdir_get_array(page); |
302 | (unsigned long long)entry->cookie, desc->current_index); | 552 | memset(array, 0, sizeof(struct nfs_cache_array)); |
553 | array->eof_index = -1; | ||
303 | 554 | ||
304 | if (desc->file->f_pos == desc->current_index) { | 555 | pages_ptr = nfs_readdir_large_page(pages, array_size); |
305 | *desc->dir_cookie = entry->cookie; | 556 | if (!pages_ptr) |
557 | goto out_release_array; | ||
558 | do { | ||
559 | status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); | ||
560 | |||
561 | if (status < 0) | ||
306 | break; | 562 | break; |
307 | } | 563 | nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE); |
308 | desc->current_index++; | 564 | } while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY); |
309 | if (loop_count++ > 200) { | 565 | |
310 | loop_count = 0; | 566 | nfs_readdir_free_large_page(pages_ptr, pages, array_size); |
311 | schedule(); | 567 | out_release_array: |
312 | } | 568 | nfs_readdir_release_array(page); |
313 | } | 569 | out: |
570 | nfs_free_fattr(entry.fattr); | ||
571 | nfs_free_fhandle(entry.fh); | ||
314 | return status; | 572 | return status; |
315 | } | 573 | } |
316 | 574 | ||
317 | /* | 575 | /* |
318 | * Find the given page, and call find_dirent() or find_dirent_index in | 576 | * Now we cache directories properly, by converting xdr information |
319 | * order to try to return the next entry. | 577 | * to an array that can be used for lookups later. This results in |
578 | * fewer cache pages, since we can store more information on each page. | ||
579 | * We only need to convert from xdr once so future lookups are much simpler | ||
320 | */ | 580 | */ |
321 | static inline | 581 | static |
322 | int find_dirent_page(nfs_readdir_descriptor_t *desc) | 582 | int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page) |
323 | { | 583 | { |
324 | struct inode *inode = desc->file->f_path.dentry->d_inode; | 584 | struct inode *inode = desc->file->f_path.dentry->d_inode; |
325 | struct page *page; | ||
326 | int status; | ||
327 | 585 | ||
328 | dfprintk(DIRCACHE, "NFS: %s: searching page %ld for target %Lu\n", | 586 | if (nfs_readdir_xdr_to_array(desc, page, inode) < 0) |
329 | __func__, desc->page_index, | 587 | goto error; |
330 | (long long) *desc->dir_cookie); | 588 | SetPageUptodate(page); |
331 | 589 | ||
332 | /* If we find the page in the page_cache, we cannot be sure | 590 | if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { |
333 | * how fresh the data is, so we will ignore readdir_plus attributes. | 591 | /* Should never happen */ |
334 | */ | 592 | nfs_zap_mapping(inode, inode->i_mapping); |
335 | desc->timestamp_valid = 0; | ||
336 | page = read_cache_page(inode->i_mapping, desc->page_index, | ||
337 | (filler_t *)nfs_readdir_filler, desc); | ||
338 | if (IS_ERR(page)) { | ||
339 | status = PTR_ERR(page); | ||
340 | goto out; | ||
341 | } | 593 | } |
594 | unlock_page(page); | ||
595 | return 0; | ||
596 | error: | ||
597 | unlock_page(page); | ||
598 | return -EIO; | ||
599 | } | ||
342 | 600 | ||
343 | /* NOTE: Someone else may have changed the READDIRPLUS flag */ | 601 | static |
344 | desc->page = page; | 602 | void cache_page_release(nfs_readdir_descriptor_t *desc) |
345 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 603 | { |
346 | if (*desc->dir_cookie != 0) | 604 | page_cache_release(desc->page); |
347 | status = find_dirent(desc); | 605 | desc->page = NULL; |
348 | else | 606 | } |
349 | status = find_dirent_index(desc); | 607 | |
350 | if (status < 0) | 608 | static |
351 | dir_page_release(desc); | 609 | struct page *get_cache_page(nfs_readdir_descriptor_t *desc) |
352 | out: | 610 | { |
353 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); | 611 | struct page *page; |
354 | return status; | 612 | page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping, |
613 | desc->page_index, (filler_t *)nfs_readdir_filler, desc); | ||
614 | if (IS_ERR(page)) | ||
615 | desc->eof = 1; | ||
616 | return page; | ||
355 | } | 617 | } |
356 | 618 | ||
357 | /* | 619 | /* |
358 | * Recurse through the page cache pages, and return a | 620 | * Returns 0 if desc->dir_cookie was found on page desc->page_index |
359 | * filled nfs_entry structure of the next directory entry if possible. | ||
360 | * | ||
361 | * The target for the search is '*desc->dir_cookie' if non-0, | ||
362 | * 'desc->file->f_pos' otherwise | ||
363 | */ | 621 | */ |
622 | static | ||
623 | int find_cache_page(nfs_readdir_descriptor_t *desc) | ||
624 | { | ||
625 | int res; | ||
626 | |||
627 | desc->page = get_cache_page(desc); | ||
628 | if (IS_ERR(desc->page)) | ||
629 | return PTR_ERR(desc->page); | ||
630 | |||
631 | res = nfs_readdir_search_array(desc); | ||
632 | if (res == 0) | ||
633 | return 0; | ||
634 | cache_page_release(desc); | ||
635 | return res; | ||
636 | } | ||
637 | |||
638 | /* Search for desc->dir_cookie from the beginning of the page cache */ | ||
364 | static inline | 639 | static inline |
365 | int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) | 640 | int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) |
366 | { | 641 | { |
367 | int loop_count = 0; | 642 | int res = -EAGAIN; |
368 | int res; | ||
369 | |||
370 | /* Always search-by-index from the beginning of the cache */ | ||
371 | if (*desc->dir_cookie == 0) { | ||
372 | dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for offset %Ld\n", | ||
373 | (long long)desc->file->f_pos); | ||
374 | desc->page_index = 0; | ||
375 | desc->entry->cookie = desc->entry->prev_cookie = 0; | ||
376 | desc->entry->eof = 0; | ||
377 | desc->current_index = 0; | ||
378 | } else | ||
379 | dfprintk(DIRCACHE, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", | ||
380 | (unsigned long long)*desc->dir_cookie); | ||
381 | 643 | ||
382 | for (;;) { | 644 | while (1) { |
383 | res = find_dirent_page(desc); | 645 | res = find_cache_page(desc); |
384 | if (res != -EAGAIN) | 646 | if (res != -EAGAIN) |
385 | break; | 647 | break; |
386 | /* Align to beginning of next page */ | 648 | desc->page_index++; |
387 | desc->page_index ++; | ||
388 | if (loop_count++ > 200) { | ||
389 | loop_count = 0; | ||
390 | schedule(); | ||
391 | } | ||
392 | } | 649 | } |
393 | |||
394 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, res); | ||
395 | return res; | 650 | return res; |
396 | } | 651 | } |
397 | 652 | ||
@@ -400,8 +655,6 @@ static inline unsigned int dt_type(struct inode *inode) | |||
400 | return (inode->i_mode >> 12) & 15; | 655 | return (inode->i_mode >> 12) & 15; |
401 | } | 656 | } |
402 | 657 | ||
403 | static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc); | ||
404 | |||
405 | /* | 658 | /* |
406 | * Once we've found the start of the dirent within a page: fill 'er up... | 659 | * Once we've found the start of the dirent within a page: fill 'er up... |
407 | */ | 660 | */ |
@@ -410,49 +663,36 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
410 | filldir_t filldir) | 663 | filldir_t filldir) |
411 | { | 664 | { |
412 | struct file *file = desc->file; | 665 | struct file *file = desc->file; |
413 | struct nfs_entry *entry = desc->entry; | 666 | int i = 0; |
414 | struct dentry *dentry = NULL; | 667 | int res = 0; |
415 | u64 fileid; | 668 | struct nfs_cache_array *array = NULL; |
416 | int loop_count = 0, | 669 | unsigned int d_type = DT_UNKNOWN; |
417 | res; | 670 | struct dentry *dentry = NULL; |
418 | |||
419 | dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", | ||
420 | (unsigned long long)entry->cookie); | ||
421 | |||
422 | for(;;) { | ||
423 | unsigned d_type = DT_UNKNOWN; | ||
424 | /* Note: entry->prev_cookie contains the cookie for | ||
425 | * retrieving the current dirent on the server */ | ||
426 | fileid = entry->ino; | ||
427 | |||
428 | /* Get a dentry if we have one */ | ||
429 | if (dentry != NULL) | ||
430 | dput(dentry); | ||
431 | dentry = nfs_readdir_lookup(desc); | ||
432 | 671 | ||
433 | /* Use readdirplus info */ | 672 | array = nfs_readdir_get_array(desc->page); |
434 | if (dentry != NULL && dentry->d_inode != NULL) { | ||
435 | d_type = dt_type(dentry->d_inode); | ||
436 | fileid = NFS_FILEID(dentry->d_inode); | ||
437 | } | ||
438 | 673 | ||
439 | res = filldir(dirent, entry->name, entry->len, | 674 | for (i = desc->cache_entry_index; i < array->size; i++) { |
440 | file->f_pos, nfs_compat_user_ino64(fileid), | 675 | d_type = DT_UNKNOWN; |
441 | d_type); | 676 | |
677 | res = filldir(dirent, array->array[i].string.name, | ||
678 | array->array[i].string.len, file->f_pos, | ||
679 | nfs_compat_user_ino64(array->array[i].ino), d_type); | ||
442 | if (res < 0) | 680 | if (res < 0) |
443 | break; | 681 | break; |
444 | file->f_pos++; | 682 | file->f_pos++; |
445 | *desc->dir_cookie = entry->cookie; | 683 | desc->cache_entry_index = i; |
446 | if (dir_decode(desc) != 0) { | 684 | if (i < (array->size-1)) |
447 | desc->page_index ++; | 685 | *desc->dir_cookie = array->array[i+1].cookie; |
686 | else | ||
687 | *desc->dir_cookie = array->last_cookie; | ||
688 | if (i == array->eof_index) { | ||
689 | desc->eof = 1; | ||
448 | break; | 690 | break; |
449 | } | 691 | } |
450 | if (loop_count++ > 200) { | ||
451 | loop_count = 0; | ||
452 | schedule(); | ||
453 | } | ||
454 | } | 692 | } |
455 | dir_page_release(desc); | 693 | |
694 | nfs_readdir_release_array(desc->page); | ||
695 | cache_page_release(desc); | ||
456 | if (dentry != NULL) | 696 | if (dentry != NULL) |
457 | dput(dentry); | 697 | dput(dentry); |
458 | dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", | 698 | dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", |
@@ -476,12 +716,9 @@ static inline | |||
476 | int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | 716 | int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, |
477 | filldir_t filldir) | 717 | filldir_t filldir) |
478 | { | 718 | { |
479 | struct file *file = desc->file; | ||
480 | struct inode *inode = file->f_path.dentry->d_inode; | ||
481 | struct rpc_cred *cred = nfs_file_cred(file); | ||
482 | struct page *page = NULL; | 719 | struct page *page = NULL; |
483 | int status; | 720 | int status; |
484 | unsigned long timestamp, gencount; | 721 | struct inode *inode = desc->file->f_path.dentry->d_inode; |
485 | 722 | ||
486 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 723 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
487 | (unsigned long long)*desc->dir_cookie); | 724 | (unsigned long long)*desc->dir_cookie); |
@@ -491,38 +728,22 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
491 | status = -ENOMEM; | 728 | status = -ENOMEM; |
492 | goto out; | 729 | goto out; |
493 | } | 730 | } |
494 | timestamp = jiffies; | 731 | |
495 | gencount = nfs_inc_attr_generation_counter(); | 732 | if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) { |
496 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, | ||
497 | *desc->dir_cookie, page, | ||
498 | NFS_SERVER(inode)->dtsize, | ||
499 | desc->plus); | ||
500 | desc->page = page; | ||
501 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | ||
502 | if (status >= 0) { | ||
503 | desc->timestamp = timestamp; | ||
504 | desc->gencount = gencount; | ||
505 | desc->timestamp_valid = 1; | ||
506 | if ((status = dir_decode(desc)) == 0) | ||
507 | desc->entry->prev_cookie = *desc->dir_cookie; | ||
508 | } else | ||
509 | status = -EIO; | 733 | status = -EIO; |
510 | if (status < 0) | ||
511 | goto out_release; | 734 | goto out_release; |
735 | } | ||
512 | 736 | ||
737 | desc->page_index = 0; | ||
738 | desc->page = page; | ||
513 | status = nfs_do_filldir(desc, dirent, filldir); | 739 | status = nfs_do_filldir(desc, dirent, filldir); |
514 | 740 | ||
515 | /* Reset read descriptor so it searches the page cache from | ||
516 | * the start upon the next call to readdir_search_pagecache() */ | ||
517 | desc->page_index = 0; | ||
518 | desc->entry->cookie = desc->entry->prev_cookie = 0; | ||
519 | desc->entry->eof = 0; | ||
520 | out: | 741 | out: |
521 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", | 742 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", |
522 | __func__, status); | 743 | __func__, status); |
523 | return status; | 744 | return status; |
524 | out_release: | 745 | out_release: |
525 | dir_page_release(desc); | 746 | cache_page_release(desc); |
526 | goto out; | 747 | goto out; |
527 | } | 748 | } |
528 | 749 | ||
@@ -536,7 +757,6 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
536 | struct inode *inode = dentry->d_inode; | 757 | struct inode *inode = dentry->d_inode; |
537 | nfs_readdir_descriptor_t my_desc, | 758 | nfs_readdir_descriptor_t my_desc, |
538 | *desc = &my_desc; | 759 | *desc = &my_desc; |
539 | struct nfs_entry my_entry; | ||
540 | int res = -ENOMEM; | 760 | int res = -ENOMEM; |
541 | 761 | ||
542 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 762 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
@@ -557,26 +777,17 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
557 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 777 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
558 | desc->plus = NFS_USE_READDIRPLUS(inode); | 778 | desc->plus = NFS_USE_READDIRPLUS(inode); |
559 | 779 | ||
560 | my_entry.cookie = my_entry.prev_cookie = 0; | ||
561 | my_entry.eof = 0; | ||
562 | my_entry.fh = nfs_alloc_fhandle(); | ||
563 | my_entry.fattr = nfs_alloc_fattr(); | ||
564 | if (my_entry.fh == NULL || my_entry.fattr == NULL) | ||
565 | goto out_alloc_failed; | ||
566 | |||
567 | desc->entry = &my_entry; | ||
568 | |||
569 | nfs_block_sillyrename(dentry); | 780 | nfs_block_sillyrename(dentry); |
570 | res = nfs_revalidate_mapping(inode, filp->f_mapping); | 781 | res = nfs_revalidate_mapping(inode, filp->f_mapping); |
571 | if (res < 0) | 782 | if (res < 0) |
572 | goto out; | 783 | goto out; |
573 | 784 | ||
574 | while(!desc->entry->eof) { | 785 | while (desc->eof != 1) { |
575 | res = readdir_search_pagecache(desc); | 786 | res = readdir_search_pagecache(desc); |
576 | 787 | ||
577 | if (res == -EBADCOOKIE) { | 788 | if (res == -EBADCOOKIE) { |
578 | /* This means either end of directory */ | 789 | /* This means either end of directory */ |
579 | if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { | 790 | if (*desc->dir_cookie && desc->eof == 0) { |
580 | /* Or that the server has 'lost' a cookie */ | 791 | /* Or that the server has 'lost' a cookie */ |
581 | res = uncached_readdir(desc, dirent, filldir); | 792 | res = uncached_readdir(desc, dirent, filldir); |
582 | if (res >= 0) | 793 | if (res >= 0) |
@@ -588,8 +799,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
588 | if (res == -ETOOSMALL && desc->plus) { | 799 | if (res == -ETOOSMALL && desc->plus) { |
589 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); | 800 | clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
590 | nfs_zap_caches(inode); | 801 | nfs_zap_caches(inode); |
802 | desc->page_index = 0; | ||
591 | desc->plus = 0; | 803 | desc->plus = 0; |
592 | desc->entry->eof = 0; | 804 | desc->eof = 0; |
593 | continue; | 805 | continue; |
594 | } | 806 | } |
595 | if (res < 0) | 807 | if (res < 0) |
@@ -605,9 +817,6 @@ out: | |||
605 | nfs_unblock_sillyrename(dentry); | 817 | nfs_unblock_sillyrename(dentry); |
606 | if (res > 0) | 818 | if (res > 0) |
607 | res = 0; | 819 | res = 0; |
608 | out_alloc_failed: | ||
609 | nfs_free_fattr(my_entry.fattr); | ||
610 | nfs_free_fhandle(my_entry.fh); | ||
611 | dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", | 820 | dfprintk(FILE, "NFS: readdir(%s/%s) returns %d\n", |
612 | dentry->d_parent->d_name.name, dentry->d_name.name, | 821 | dentry->d_parent->d_name.name, dentry->d_name.name, |
613 | res); | 822 | res); |
@@ -1029,10 +1238,63 @@ static int is_atomic_open(struct nameidata *nd) | |||
1029 | return 1; | 1238 | return 1; |
1030 | } | 1239 | } |
1031 | 1240 | ||
1241 | static struct nfs_open_context *nameidata_to_nfs_open_context(struct dentry *dentry, struct nameidata *nd) | ||
1242 | { | ||
1243 | struct path path = { | ||
1244 | .mnt = nd->path.mnt, | ||
1245 | .dentry = dentry, | ||
1246 | }; | ||
1247 | struct nfs_open_context *ctx; | ||
1248 | struct rpc_cred *cred; | ||
1249 | fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); | ||
1250 | |||
1251 | cred = rpc_lookup_cred(); | ||
1252 | if (IS_ERR(cred)) | ||
1253 | return ERR_CAST(cred); | ||
1254 | ctx = alloc_nfs_open_context(&path, cred, fmode); | ||
1255 | put_rpccred(cred); | ||
1256 | if (ctx == NULL) | ||
1257 | return ERR_PTR(-ENOMEM); | ||
1258 | return ctx; | ||
1259 | } | ||
1260 | |||
1261 | static int do_open(struct inode *inode, struct file *filp) | ||
1262 | { | ||
1263 | nfs_fscache_set_inode_cookie(inode, filp); | ||
1264 | return 0; | ||
1265 | } | ||
1266 | |||
1267 | static int nfs_intent_set_file(struct nameidata *nd, struct nfs_open_context *ctx) | ||
1268 | { | ||
1269 | struct file *filp; | ||
1270 | int ret = 0; | ||
1271 | |||
1272 | /* If the open_intent is for execute, we have an extra check to make */ | ||
1273 | if (ctx->mode & FMODE_EXEC) { | ||
1274 | ret = nfs_may_open(ctx->path.dentry->d_inode, | ||
1275 | ctx->cred, | ||
1276 | nd->intent.open.flags); | ||
1277 | if (ret < 0) | ||
1278 | goto out; | ||
1279 | } | ||
1280 | filp = lookup_instantiate_filp(nd, ctx->path.dentry, do_open); | ||
1281 | if (IS_ERR(filp)) | ||
1282 | ret = PTR_ERR(filp); | ||
1283 | else | ||
1284 | nfs_file_set_open_context(filp, ctx); | ||
1285 | out: | ||
1286 | put_nfs_open_context(ctx); | ||
1287 | return ret; | ||
1288 | } | ||
1289 | |||
1032 | static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | 1290 | static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
1033 | { | 1291 | { |
1292 | struct nfs_open_context *ctx; | ||
1293 | struct iattr attr; | ||
1034 | struct dentry *res = NULL; | 1294 | struct dentry *res = NULL; |
1035 | int error; | 1295 | struct inode *inode; |
1296 | int open_flags; | ||
1297 | int err; | ||
1036 | 1298 | ||
1037 | dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", | 1299 | dfprintk(VFS, "NFS: atomic_lookup(%s/%ld), %s\n", |
1038 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1300 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
@@ -1054,13 +1316,32 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1054 | goto out; | 1316 | goto out; |
1055 | } | 1317 | } |
1056 | 1318 | ||
1319 | ctx = nameidata_to_nfs_open_context(dentry, nd); | ||
1320 | res = ERR_CAST(ctx); | ||
1321 | if (IS_ERR(ctx)) | ||
1322 | goto out; | ||
1323 | |||
1324 | open_flags = nd->intent.open.flags; | ||
1325 | if (nd->flags & LOOKUP_CREATE) { | ||
1326 | attr.ia_mode = nd->intent.open.create_mode; | ||
1327 | attr.ia_valid = ATTR_MODE; | ||
1328 | if (!IS_POSIXACL(dir)) | ||
1329 | attr.ia_mode &= ~current_umask(); | ||
1330 | } else { | ||
1331 | open_flags &= ~(O_EXCL | O_CREAT); | ||
1332 | attr.ia_valid = 0; | ||
1333 | } | ||
1334 | |||
1057 | /* Open the file on the server */ | 1335 | /* Open the file on the server */ |
1058 | res = nfs4_atomic_open(dir, dentry, nd); | 1336 | nfs_block_sillyrename(dentry->d_parent); |
1059 | if (IS_ERR(res)) { | 1337 | inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr); |
1060 | error = PTR_ERR(res); | 1338 | if (IS_ERR(inode)) { |
1061 | switch (error) { | 1339 | nfs_unblock_sillyrename(dentry->d_parent); |
1340 | put_nfs_open_context(ctx); | ||
1341 | switch (PTR_ERR(inode)) { | ||
1062 | /* Make a negative dentry */ | 1342 | /* Make a negative dentry */ |
1063 | case -ENOENT: | 1343 | case -ENOENT: |
1344 | d_add(dentry, NULL); | ||
1064 | res = NULL; | 1345 | res = NULL; |
1065 | goto out; | 1346 | goto out; |
1066 | /* This turned out not to be a regular file */ | 1347 | /* This turned out not to be a regular file */ |
@@ -1072,11 +1353,25 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry | |||
1072 | goto no_open; | 1353 | goto no_open; |
1073 | /* case -EINVAL: */ | 1354 | /* case -EINVAL: */ |
1074 | default: | 1355 | default: |
1356 | res = ERR_CAST(inode); | ||
1075 | goto out; | 1357 | goto out; |
1076 | } | 1358 | } |
1077 | } else if (res != NULL) | 1359 | } |
1360 | res = d_add_unique(dentry, inode); | ||
1361 | nfs_unblock_sillyrename(dentry->d_parent); | ||
1362 | if (res != NULL) { | ||
1363 | dput(ctx->path.dentry); | ||
1364 | ctx->path.dentry = dget(res); | ||
1078 | dentry = res; | 1365 | dentry = res; |
1366 | } | ||
1367 | err = nfs_intent_set_file(nd, ctx); | ||
1368 | if (err < 0) { | ||
1369 | if (res != NULL) | ||
1370 | dput(res); | ||
1371 | return ERR_PTR(err); | ||
1372 | } | ||
1079 | out: | 1373 | out: |
1374 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1080 | return res; | 1375 | return res; |
1081 | no_open: | 1376 | no_open: |
1082 | return nfs_lookup(dir, dentry, nd); | 1377 | return nfs_lookup(dir, dentry, nd); |
@@ -1087,12 +1382,15 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1087 | struct dentry *parent = NULL; | 1382 | struct dentry *parent = NULL; |
1088 | struct inode *inode = dentry->d_inode; | 1383 | struct inode *inode = dentry->d_inode; |
1089 | struct inode *dir; | 1384 | struct inode *dir; |
1385 | struct nfs_open_context *ctx; | ||
1090 | int openflags, ret = 0; | 1386 | int openflags, ret = 0; |
1091 | 1387 | ||
1092 | if (!is_atomic_open(nd) || d_mountpoint(dentry)) | 1388 | if (!is_atomic_open(nd) || d_mountpoint(dentry)) |
1093 | goto no_open; | 1389 | goto no_open; |
1390 | |||
1094 | parent = dget_parent(dentry); | 1391 | parent = dget_parent(dentry); |
1095 | dir = parent->d_inode; | 1392 | dir = parent->d_inode; |
1393 | |||
1096 | /* We can't create new files in nfs_open_revalidate(), so we | 1394 | /* We can't create new files in nfs_open_revalidate(), so we |
1097 | * optimize away revalidation of negative dentries. | 1395 | * optimize away revalidation of negative dentries. |
1098 | */ | 1396 | */ |
@@ -1112,99 +1410,96 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) | |||
1112 | /* We can't create new files, or truncate existing ones here */ | 1410 | /* We can't create new files, or truncate existing ones here */ |
1113 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); | 1411 | openflags &= ~(O_CREAT|O_EXCL|O_TRUNC); |
1114 | 1412 | ||
1413 | ctx = nameidata_to_nfs_open_context(dentry, nd); | ||
1414 | ret = PTR_ERR(ctx); | ||
1415 | if (IS_ERR(ctx)) | ||
1416 | goto out; | ||
1115 | /* | 1417 | /* |
1116 | * Note: we're not holding inode->i_mutex and so may be racing with | 1418 | * Note: we're not holding inode->i_mutex and so may be racing with |
1117 | * operations that change the directory. We therefore save the | 1419 | * operations that change the directory. We therefore save the |
1118 | * change attribute *before* we do the RPC call. | 1420 | * change attribute *before* we do the RPC call. |
1119 | */ | 1421 | */ |
1120 | ret = nfs4_open_revalidate(dir, dentry, openflags, nd); | 1422 | inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, NULL); |
1423 | if (IS_ERR(inode)) { | ||
1424 | ret = PTR_ERR(inode); | ||
1425 | switch (ret) { | ||
1426 | case -EPERM: | ||
1427 | case -EACCES: | ||
1428 | case -EDQUOT: | ||
1429 | case -ENOSPC: | ||
1430 | case -EROFS: | ||
1431 | goto out_put_ctx; | ||
1432 | default: | ||
1433 | goto out_drop; | ||
1434 | } | ||
1435 | } | ||
1436 | iput(inode); | ||
1437 | if (inode != dentry->d_inode) | ||
1438 | goto out_drop; | ||
1439 | |||
1440 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1441 | ret = nfs_intent_set_file(nd, ctx); | ||
1442 | if (ret >= 0) | ||
1443 | ret = 1; | ||
1121 | out: | 1444 | out: |
1122 | dput(parent); | 1445 | dput(parent); |
1123 | if (!ret) | ||
1124 | d_drop(dentry); | ||
1125 | return ret; | 1446 | return ret; |
1447 | out_drop: | ||
1448 | d_drop(dentry); | ||
1449 | ret = 0; | ||
1450 | out_put_ctx: | ||
1451 | put_nfs_open_context(ctx); | ||
1452 | goto out; | ||
1453 | |||
1126 | no_open_dput: | 1454 | no_open_dput: |
1127 | dput(parent); | 1455 | dput(parent); |
1128 | no_open: | 1456 | no_open: |
1129 | return nfs_lookup_revalidate(dentry, nd); | 1457 | return nfs_lookup_revalidate(dentry, nd); |
1130 | } | 1458 | } |
1131 | #endif /* CONFIG_NFSV4 */ | ||
1132 | 1459 | ||
1133 | static struct dentry *nfs_readdir_lookup(nfs_readdir_descriptor_t *desc) | 1460 | static int nfs_open_create(struct inode *dir, struct dentry *dentry, int mode, |
1461 | struct nameidata *nd) | ||
1134 | { | 1462 | { |
1135 | struct dentry *parent = desc->file->f_path.dentry; | 1463 | struct nfs_open_context *ctx = NULL; |
1136 | struct inode *dir = parent->d_inode; | 1464 | struct iattr attr; |
1137 | struct nfs_entry *entry = desc->entry; | 1465 | int error; |
1138 | struct dentry *dentry, *alias; | 1466 | int open_flags = 0; |
1139 | struct qstr name = { | ||
1140 | .name = entry->name, | ||
1141 | .len = entry->len, | ||
1142 | }; | ||
1143 | struct inode *inode; | ||
1144 | unsigned long verf = nfs_save_change_attribute(dir); | ||
1145 | 1467 | ||
1146 | switch (name.len) { | 1468 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", |
1147 | case 2: | 1469 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
1148 | if (name.name[0] == '.' && name.name[1] == '.') | ||
1149 | return dget_parent(parent); | ||
1150 | break; | ||
1151 | case 1: | ||
1152 | if (name.name[0] == '.') | ||
1153 | return dget(parent); | ||
1154 | } | ||
1155 | 1470 | ||
1156 | spin_lock(&dir->i_lock); | 1471 | attr.ia_mode = mode; |
1157 | if (NFS_I(dir)->cache_validity & NFS_INO_INVALID_DATA) { | 1472 | attr.ia_valid = ATTR_MODE; |
1158 | spin_unlock(&dir->i_lock); | ||
1159 | return NULL; | ||
1160 | } | ||
1161 | spin_unlock(&dir->i_lock); | ||
1162 | 1473 | ||
1163 | name.hash = full_name_hash(name.name, name.len); | 1474 | if ((nd->flags & LOOKUP_CREATE) != 0) { |
1164 | dentry = d_lookup(parent, &name); | 1475 | open_flags = nd->intent.open.flags; |
1165 | if (dentry != NULL) { | ||
1166 | /* Is this a positive dentry that matches the readdir info? */ | ||
1167 | if (dentry->d_inode != NULL && | ||
1168 | (NFS_FILEID(dentry->d_inode) == entry->ino || | ||
1169 | d_mountpoint(dentry))) { | ||
1170 | if (!desc->plus || entry->fh->size == 0) | ||
1171 | return dentry; | ||
1172 | if (nfs_compare_fh(NFS_FH(dentry->d_inode), | ||
1173 | entry->fh) == 0) | ||
1174 | goto out_renew; | ||
1175 | } | ||
1176 | /* No, so d_drop to allow one to be created */ | ||
1177 | d_drop(dentry); | ||
1178 | dput(dentry); | ||
1179 | } | ||
1180 | if (!desc->plus || !(entry->fattr->valid & NFS_ATTR_FATTR)) | ||
1181 | return NULL; | ||
1182 | if (name.len > NFS_SERVER(dir)->namelen) | ||
1183 | return NULL; | ||
1184 | /* Note: caller is already holding the dir->i_mutex! */ | ||
1185 | dentry = d_alloc(parent, &name); | ||
1186 | if (dentry == NULL) | ||
1187 | return NULL; | ||
1188 | dentry->d_op = NFS_PROTO(dir)->dentry_ops; | ||
1189 | inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr); | ||
1190 | if (IS_ERR(inode)) { | ||
1191 | dput(dentry); | ||
1192 | return NULL; | ||
1193 | } | ||
1194 | 1476 | ||
1195 | alias = d_materialise_unique(dentry, inode); | 1477 | ctx = nameidata_to_nfs_open_context(dentry, nd); |
1196 | if (alias != NULL) { | 1478 | error = PTR_ERR(ctx); |
1197 | dput(dentry); | 1479 | if (IS_ERR(ctx)) |
1198 | if (IS_ERR(alias)) | 1480 | goto out_err_drop; |
1199 | return NULL; | ||
1200 | dentry = alias; | ||
1201 | } | 1481 | } |
1202 | 1482 | ||
1203 | out_renew: | 1483 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, ctx); |
1204 | nfs_set_verifier(dentry, verf); | 1484 | if (error != 0) |
1205 | return dentry; | 1485 | goto out_put_ctx; |
1486 | if (ctx != NULL) { | ||
1487 | error = nfs_intent_set_file(nd, ctx); | ||
1488 | if (error < 0) | ||
1489 | goto out_err; | ||
1490 | } | ||
1491 | return 0; | ||
1492 | out_put_ctx: | ||
1493 | if (ctx != NULL) | ||
1494 | put_nfs_open_context(ctx); | ||
1495 | out_err_drop: | ||
1496 | d_drop(dentry); | ||
1497 | out_err: | ||
1498 | return error; | ||
1206 | } | 1499 | } |
1207 | 1500 | ||
1501 | #endif /* CONFIG_NFSV4 */ | ||
1502 | |||
1208 | /* | 1503 | /* |
1209 | * Code common to create, mkdir, and mknod. | 1504 | * Code common to create, mkdir, and mknod. |
1210 | */ | 1505 | */ |
@@ -1258,7 +1553,6 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1258 | { | 1553 | { |
1259 | struct iattr attr; | 1554 | struct iattr attr; |
1260 | int error; | 1555 | int error; |
1261 | int open_flags = 0; | ||
1262 | 1556 | ||
1263 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", | 1557 | dfprintk(VFS, "NFS: create(%s/%ld), %s\n", |
1264 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); | 1558 | dir->i_sb->s_id, dir->i_ino, dentry->d_name.name); |
@@ -1266,10 +1560,7 @@ static int nfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
1266 | attr.ia_mode = mode; | 1560 | attr.ia_mode = mode; |
1267 | attr.ia_valid = ATTR_MODE; | 1561 | attr.ia_valid = ATTR_MODE; |
1268 | 1562 | ||
1269 | if ((nd->flags & LOOKUP_CREATE) != 0) | 1563 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, 0, NULL); |
1270 | open_flags = nd->intent.open.flags; | ||
1271 | |||
1272 | error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); | ||
1273 | if (error != 0) | 1564 | if (error != 0) |
1274 | goto out_err; | 1565 | goto out_err; |
1275 | return 0; | 1566 | return 0; |
@@ -1351,76 +1642,6 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1351 | return error; | 1642 | return error; |
1352 | } | 1643 | } |
1353 | 1644 | ||
1354 | static int nfs_sillyrename(struct inode *dir, struct dentry *dentry) | ||
1355 | { | ||
1356 | static unsigned int sillycounter; | ||
1357 | const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; | ||
1358 | const int countersize = sizeof(sillycounter)*2; | ||
1359 | const int slen = sizeof(".nfs")+fileidsize+countersize-1; | ||
1360 | char silly[slen+1]; | ||
1361 | struct qstr qsilly; | ||
1362 | struct dentry *sdentry; | ||
1363 | int error = -EIO; | ||
1364 | |||
1365 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", | ||
1366 | dentry->d_parent->d_name.name, dentry->d_name.name, | ||
1367 | atomic_read(&dentry->d_count)); | ||
1368 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); | ||
1369 | |||
1370 | /* | ||
1371 | * We don't allow a dentry to be silly-renamed twice. | ||
1372 | */ | ||
1373 | error = -EBUSY; | ||
1374 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) | ||
1375 | goto out; | ||
1376 | |||
1377 | sprintf(silly, ".nfs%*.*Lx", | ||
1378 | fileidsize, fileidsize, | ||
1379 | (unsigned long long)NFS_FILEID(dentry->d_inode)); | ||
1380 | |||
1381 | /* Return delegation in anticipation of the rename */ | ||
1382 | nfs_inode_return_delegation(dentry->d_inode); | ||
1383 | |||
1384 | sdentry = NULL; | ||
1385 | do { | ||
1386 | char *suffix = silly + slen - countersize; | ||
1387 | |||
1388 | dput(sdentry); | ||
1389 | sillycounter++; | ||
1390 | sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); | ||
1391 | |||
1392 | dfprintk(VFS, "NFS: trying to rename %s to %s\n", | ||
1393 | dentry->d_name.name, silly); | ||
1394 | |||
1395 | sdentry = lookup_one_len(silly, dentry->d_parent, slen); | ||
1396 | /* | ||
1397 | * N.B. Better to return EBUSY here ... it could be | ||
1398 | * dangerous to delete the file while it's in use. | ||
1399 | */ | ||
1400 | if (IS_ERR(sdentry)) | ||
1401 | goto out; | ||
1402 | } while(sdentry->d_inode != NULL); /* need negative lookup */ | ||
1403 | |||
1404 | qsilly.name = silly; | ||
1405 | qsilly.len = strlen(silly); | ||
1406 | if (dentry->d_inode) { | ||
1407 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, | ||
1408 | dir, &qsilly); | ||
1409 | nfs_mark_for_revalidate(dentry->d_inode); | ||
1410 | } else | ||
1411 | error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, | ||
1412 | dir, &qsilly); | ||
1413 | if (!error) { | ||
1414 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
1415 | d_move(dentry, sdentry); | ||
1416 | error = nfs_async_unlink(dir, dentry); | ||
1417 | /* If we return 0 we don't unlink */ | ||
1418 | } | ||
1419 | dput(sdentry); | ||
1420 | out: | ||
1421 | return error; | ||
1422 | } | ||
1423 | |||
1424 | /* | 1645 | /* |
1425 | * Remove a file after making sure there are no pending writes, | 1646 | * Remove a file after making sure there are no pending writes, |
1426 | * and after checking that the file has only one user. | 1647 | * and after checking that the file has only one user. |
@@ -1580,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) | |||
1580 | d_drop(dentry); | 1801 | d_drop(dentry); |
1581 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); | 1802 | error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); |
1582 | if (error == 0) { | 1803 | if (error == 0) { |
1583 | atomic_inc(&inode->i_count); | 1804 | ihold(inode); |
1584 | d_add(dentry, inode); | 1805 | d_add(dentry, inode); |
1585 | } | 1806 | } |
1586 | return error; | 1807 | return error; |
@@ -1711,14 +1932,14 @@ static void nfs_access_free_list(struct list_head *head) | |||
1711 | int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 1932 | int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) |
1712 | { | 1933 | { |
1713 | LIST_HEAD(head); | 1934 | LIST_HEAD(head); |
1714 | struct nfs_inode *nfsi; | 1935 | struct nfs_inode *nfsi, *next; |
1715 | struct nfs_access_entry *cache; | 1936 | struct nfs_access_entry *cache; |
1716 | 1937 | ||
1717 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 1938 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
1718 | return (nr_to_scan == 0) ? 0 : -1; | 1939 | return (nr_to_scan == 0) ? 0 : -1; |
1719 | 1940 | ||
1720 | spin_lock(&nfs_access_lru_lock); | 1941 | spin_lock(&nfs_access_lru_lock); |
1721 | list_for_each_entry(nfsi, &nfs_access_lru_list, access_cache_inode_lru) { | 1942 | list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) { |
1722 | struct inode *inode; | 1943 | struct inode *inode; |
1723 | 1944 | ||
1724 | if (nr_to_scan-- == 0) | 1945 | if (nr_to_scan-- == 0) |
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index dba50a5625db..a6e711ad130f 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c | |||
@@ -167,7 +167,7 @@ static int nfs_dns_show(struct seq_file *m, struct cache_detail *cd, | |||
167 | return 0; | 167 | return 0; |
168 | } | 168 | } |
169 | item = container_of(h, struct nfs_dns_ent, h); | 169 | item = container_of(h, struct nfs_dns_ent, h); |
170 | ttl = (long)item->h.expiry_time - (long)get_seconds(); | 170 | ttl = item->h.expiry_time - seconds_since_boot(); |
171 | if (ttl < 0) | 171 | if (ttl < 0) |
172 | ttl = 0; | 172 | ttl = 0; |
173 | 173 | ||
@@ -239,7 +239,7 @@ static int nfs_dns_parse(struct cache_detail *cd, char *buf, int buflen) | |||
239 | ttl = get_expiry(&buf); | 239 | ttl = get_expiry(&buf); |
240 | if (ttl == 0) | 240 | if (ttl == 0) |
241 | goto out; | 241 | goto out; |
242 | key.h.expiry_time = ttl + get_seconds(); | 242 | key.h.expiry_time = ttl + seconds_since_boot(); |
243 | 243 | ||
244 | ret = -ENOMEM; | 244 | ret = -ENOMEM; |
245 | item = nfs_dns_lookup(cd, &key); | 245 | item = nfs_dns_lookup(cd, &key); |
@@ -301,7 +301,7 @@ static int do_cache_lookup_nowait(struct cache_detail *cd, | |||
301 | goto out_err; | 301 | goto out_err; |
302 | ret = -ETIMEDOUT; | 302 | ret = -ETIMEDOUT; |
303 | if (!test_bit(CACHE_VALID, &(*item)->h.flags) | 303 | if (!test_bit(CACHE_VALID, &(*item)->h.flags) |
304 | || (*item)->h.expiry_time < get_seconds() | 304 | || (*item)->h.expiry_time < seconds_since_boot() |
305 | || cd->flush_time > (*item)->h.last_refresh) | 305 | || cd->flush_time > (*item)->h.last_refresh) |
306 | goto out_put; | 306 | goto out_put; |
307 | ret = -ENOENT; | 307 | ret = -ENOENT; |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 05bf3c0dc751..e756075637b0 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include "internal.h" | 36 | #include "internal.h" |
37 | #include "iostat.h" | 37 | #include "iostat.h" |
38 | #include "fscache.h" | 38 | #include "fscache.h" |
39 | #include "pnfs.h" | ||
39 | 40 | ||
40 | #define NFSDBG_FACILITY NFSDBG_FILE | 41 | #define NFSDBG_FACILITY NFSDBG_FILE |
41 | 42 | ||
@@ -386,6 +387,10 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping, | |||
386 | file->f_path.dentry->d_name.name, | 387 | file->f_path.dentry->d_name.name, |
387 | mapping->host->i_ino, len, (long long) pos); | 388 | mapping->host->i_ino, len, (long long) pos); |
388 | 389 | ||
390 | pnfs_update_layout(mapping->host, | ||
391 | nfs_file_open_context(file), | ||
392 | IOMODE_RW); | ||
393 | |||
389 | start: | 394 | start: |
390 | /* | 395 | /* |
391 | * Prevent starvation issues if someone is doing a consistency | 396 | * Prevent starvation issues if someone is doing a consistency |
@@ -551,7 +556,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
551 | struct file *filp = vma->vm_file; | 556 | struct file *filp = vma->vm_file; |
552 | struct dentry *dentry = filp->f_path.dentry; | 557 | struct dentry *dentry = filp->f_path.dentry; |
553 | unsigned pagelen; | 558 | unsigned pagelen; |
554 | int ret = -EINVAL; | 559 | int ret = VM_FAULT_NOPAGE; |
555 | struct address_space *mapping; | 560 | struct address_space *mapping; |
556 | 561 | ||
557 | dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", | 562 | dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%s/%s(%ld), offset %lld)\n", |
@@ -567,21 +572,20 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
567 | if (mapping != dentry->d_inode->i_mapping) | 572 | if (mapping != dentry->d_inode->i_mapping) |
568 | goto out_unlock; | 573 | goto out_unlock; |
569 | 574 | ||
570 | ret = 0; | ||
571 | pagelen = nfs_page_length(page); | 575 | pagelen = nfs_page_length(page); |
572 | if (pagelen == 0) | 576 | if (pagelen == 0) |
573 | goto out_unlock; | 577 | goto out_unlock; |
574 | 578 | ||
575 | ret = nfs_flush_incompatible(filp, page); | 579 | ret = VM_FAULT_LOCKED; |
576 | if (ret != 0) | 580 | if (nfs_flush_incompatible(filp, page) == 0 && |
577 | goto out_unlock; | 581 | nfs_updatepage(filp, page, 0, pagelen) == 0) |
582 | goto out; | ||
578 | 583 | ||
579 | ret = nfs_updatepage(filp, page, 0, pagelen); | 584 | ret = VM_FAULT_SIGBUS; |
580 | out_unlock: | 585 | out_unlock: |
581 | if (!ret) | ||
582 | return VM_FAULT_LOCKED; | ||
583 | unlock_page(page); | 586 | unlock_page(page); |
584 | return VM_FAULT_SIGBUS; | 587 | out: |
588 | return ret; | ||
585 | } | 589 | } |
586 | 590 | ||
587 | static const struct vm_operations_struct nfs_file_vm_ops = { | 591 | static const struct vm_operations_struct nfs_file_vm_ops = { |
@@ -684,7 +688,8 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, | |||
684 | return ret; | 688 | return ret; |
685 | } | 689 | } |
686 | 690 | ||
687 | static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) | 691 | static int |
692 | do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | ||
688 | { | 693 | { |
689 | struct inode *inode = filp->f_mapping->host; | 694 | struct inode *inode = filp->f_mapping->host; |
690 | int status = 0; | 695 | int status = 0; |
@@ -699,7 +704,7 @@ static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) | |||
699 | if (nfs_have_delegation(inode, FMODE_READ)) | 704 | if (nfs_have_delegation(inode, FMODE_READ)) |
700 | goto out_noconflict; | 705 | goto out_noconflict; |
701 | 706 | ||
702 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) | 707 | if (is_local) |
703 | goto out_noconflict; | 708 | goto out_noconflict; |
704 | 709 | ||
705 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); | 710 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
@@ -726,7 +731,8 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl) | |||
726 | return res; | 731 | return res; |
727 | } | 732 | } |
728 | 733 | ||
729 | static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) | 734 | static int |
735 | do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | ||
730 | { | 736 | { |
731 | struct inode *inode = filp->f_mapping->host; | 737 | struct inode *inode = filp->f_mapping->host; |
732 | int status; | 738 | int status; |
@@ -741,15 +747,24 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) | |||
741 | * If we're signalled while cleaning up locks on process exit, we | 747 | * If we're signalled while cleaning up locks on process exit, we |
742 | * still need to complete the unlock. | 748 | * still need to complete the unlock. |
743 | */ | 749 | */ |
744 | /* Use local locking if mounted with "-onolock" */ | 750 | /* |
745 | if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) | 751 | * Use local locking if mounted with "-onolock" or with appropriate |
752 | * "-olocal_lock=" | ||
753 | */ | ||
754 | if (!is_local) | ||
746 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); | 755 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
747 | else | 756 | else |
748 | status = do_vfs_lock(filp, fl); | 757 | status = do_vfs_lock(filp, fl); |
749 | return status; | 758 | return status; |
750 | } | 759 | } |
751 | 760 | ||
752 | static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) | 761 | static int |
762 | is_time_granular(struct timespec *ts) { | ||
763 | return ((ts->tv_sec == 0) && (ts->tv_nsec <= 1000)); | ||
764 | } | ||
765 | |||
766 | static int | ||
767 | do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) | ||
753 | { | 768 | { |
754 | struct inode *inode = filp->f_mapping->host; | 769 | struct inode *inode = filp->f_mapping->host; |
755 | int status; | 770 | int status; |
@@ -762,20 +777,31 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) | |||
762 | if (status != 0) | 777 | if (status != 0) |
763 | goto out; | 778 | goto out; |
764 | 779 | ||
765 | /* Use local locking if mounted with "-onolock" */ | 780 | /* |
766 | if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) | 781 | * Use local locking if mounted with "-onolock" or with appropriate |
782 | * "-olocal_lock=" | ||
783 | */ | ||
784 | if (!is_local) | ||
767 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); | 785 | status = NFS_PROTO(inode)->lock(filp, cmd, fl); |
768 | else | 786 | else |
769 | status = do_vfs_lock(filp, fl); | 787 | status = do_vfs_lock(filp, fl); |
770 | if (status < 0) | 788 | if (status < 0) |
771 | goto out; | 789 | goto out; |
790 | |||
772 | /* | 791 | /* |
773 | * Make sure we clear the cache whenever we try to get the lock. | 792 | * Revalidate the cache if the server has time stamps granular |
793 | * enough to detect subsecond changes. Otherwise, clear the | ||
794 | * cache to prevent missing any changes. | ||
795 | * | ||
774 | * This makes locking act as a cache coherency point. | 796 | * This makes locking act as a cache coherency point. |
775 | */ | 797 | */ |
776 | nfs_sync_mapping(filp->f_mapping); | 798 | nfs_sync_mapping(filp->f_mapping); |
777 | if (!nfs_have_delegation(inode, FMODE_READ)) | 799 | if (!nfs_have_delegation(inode, FMODE_READ)) { |
778 | nfs_zap_caches(inode); | 800 | if (is_time_granular(&NFS_SERVER(inode)->time_delta)) |
801 | __nfs_revalidate_inode(NFS_SERVER(inode), inode); | ||
802 | else | ||
803 | nfs_zap_caches(inode); | ||
804 | } | ||
779 | out: | 805 | out: |
780 | return status; | 806 | return status; |
781 | } | 807 | } |
@@ -787,6 +813,7 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
787 | { | 813 | { |
788 | struct inode *inode = filp->f_mapping->host; | 814 | struct inode *inode = filp->f_mapping->host; |
789 | int ret = -ENOLCK; | 815 | int ret = -ENOLCK; |
816 | int is_local = 0; | ||
790 | 817 | ||
791 | dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", | 818 | dprintk("NFS: lock(%s/%s, t=%x, fl=%x, r=%lld:%lld)\n", |
792 | filp->f_path.dentry->d_parent->d_name.name, | 819 | filp->f_path.dentry->d_parent->d_name.name, |
@@ -800,6 +827,9 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
800 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) | 827 | if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) |
801 | goto out_err; | 828 | goto out_err; |
802 | 829 | ||
830 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) | ||
831 | is_local = 1; | ||
832 | |||
803 | if (NFS_PROTO(inode)->lock_check_bounds != NULL) { | 833 | if (NFS_PROTO(inode)->lock_check_bounds != NULL) { |
804 | ret = NFS_PROTO(inode)->lock_check_bounds(fl); | 834 | ret = NFS_PROTO(inode)->lock_check_bounds(fl); |
805 | if (ret < 0) | 835 | if (ret < 0) |
@@ -807,11 +837,11 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
807 | } | 837 | } |
808 | 838 | ||
809 | if (IS_GETLK(cmd)) | 839 | if (IS_GETLK(cmd)) |
810 | ret = do_getlk(filp, cmd, fl); | 840 | ret = do_getlk(filp, cmd, fl, is_local); |
811 | else if (fl->fl_type == F_UNLCK) | 841 | else if (fl->fl_type == F_UNLCK) |
812 | ret = do_unlk(filp, cmd, fl); | 842 | ret = do_unlk(filp, cmd, fl, is_local); |
813 | else | 843 | else |
814 | ret = do_setlk(filp, cmd, fl); | 844 | ret = do_setlk(filp, cmd, fl, is_local); |
815 | out_err: | 845 | out_err: |
816 | return ret; | 846 | return ret; |
817 | } | 847 | } |
@@ -821,6 +851,9 @@ out_err: | |||
821 | */ | 851 | */ |
822 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | 852 | static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) |
823 | { | 853 | { |
854 | struct inode *inode = filp->f_mapping->host; | ||
855 | int is_local = 0; | ||
856 | |||
824 | dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", | 857 | dprintk("NFS: flock(%s/%s, t=%x, fl=%x)\n", |
825 | filp->f_path.dentry->d_parent->d_name.name, | 858 | filp->f_path.dentry->d_parent->d_name.name, |
826 | filp->f_path.dentry->d_name.name, | 859 | filp->f_path.dentry->d_name.name, |
@@ -829,14 +862,17 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
829 | if (!(fl->fl_flags & FL_FLOCK)) | 862 | if (!(fl->fl_flags & FL_FLOCK)) |
830 | return -ENOLCK; | 863 | return -ENOLCK; |
831 | 864 | ||
865 | if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) | ||
866 | is_local = 1; | ||
867 | |||
832 | /* We're simulating flock() locks using posix locks on the server */ | 868 | /* We're simulating flock() locks using posix locks on the server */ |
833 | fl->fl_owner = (fl_owner_t)filp; | 869 | fl->fl_owner = (fl_owner_t)filp; |
834 | fl->fl_start = 0; | 870 | fl->fl_start = 0; |
835 | fl->fl_end = OFFSET_MAX; | 871 | fl->fl_end = OFFSET_MAX; |
836 | 872 | ||
837 | if (fl->fl_type == F_UNLCK) | 873 | if (fl->fl_type == F_UNLCK) |
838 | return do_unlk(filp, cmd, fl); | 874 | return do_unlk(filp, cmd, fl, is_local); |
839 | return do_setlk(filp, cmd, fl); | 875 | return do_setlk(filp, cmd, fl, is_local); |
840 | } | 876 | } |
841 | 877 | ||
842 | /* | 878 | /* |
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a70e446e1605..ac7b814ce162 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c | |||
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i | |||
54 | iput(inode); | 54 | iput(inode); |
55 | return -ENOMEM; | 55 | return -ENOMEM; |
56 | } | 56 | } |
57 | /* Circumvent igrab(): we know the inode is not being freed */ | 57 | ihold(inode); |
58 | atomic_inc(&inode->i_count); | ||
59 | /* | 58 | /* |
60 | * Ensure that this dentry is invisible to d_find_alias(). | 59 | * Ensure that this dentry is invisible to d_find_alias(). |
61 | * Otherwise, it may be spliced into the tree by | 60 | * Otherwise, it may be spliced into the tree by |
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 21a84d45916f..dec47ed8b6b9 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -34,6 +34,212 @@ | |||
34 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 34 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
35 | */ | 35 | */ |
36 | 36 | ||
37 | #ifdef CONFIG_NFS_USE_NEW_IDMAPPER | ||
38 | |||
39 | #include <linux/slab.h> | ||
40 | #include <linux/cred.h> | ||
41 | #include <linux/nfs_idmap.h> | ||
42 | #include <linux/keyctl.h> | ||
43 | #include <linux/key-type.h> | ||
44 | #include <linux/rcupdate.h> | ||
45 | #include <linux/kernel.h> | ||
46 | #include <linux/err.h> | ||
47 | |||
48 | #include <keys/user-type.h> | ||
49 | |||
50 | #define NFS_UINT_MAXLEN 11 | ||
51 | |||
52 | const struct cred *id_resolver_cache; | ||
53 | |||
54 | struct key_type key_type_id_resolver = { | ||
55 | .name = "id_resolver", | ||
56 | .instantiate = user_instantiate, | ||
57 | .match = user_match, | ||
58 | .revoke = user_revoke, | ||
59 | .destroy = user_destroy, | ||
60 | .describe = user_describe, | ||
61 | .read = user_read, | ||
62 | }; | ||
63 | |||
64 | int nfs_idmap_init(void) | ||
65 | { | ||
66 | struct cred *cred; | ||
67 | struct key *keyring; | ||
68 | int ret = 0; | ||
69 | |||
70 | printk(KERN_NOTICE "Registering the %s key type\n", key_type_id_resolver.name); | ||
71 | |||
72 | cred = prepare_kernel_cred(NULL); | ||
73 | if (!cred) | ||
74 | return -ENOMEM; | ||
75 | |||
76 | keyring = key_alloc(&key_type_keyring, ".id_resolver", 0, 0, cred, | ||
77 | (KEY_POS_ALL & ~KEY_POS_SETATTR) | | ||
78 | KEY_USR_VIEW | KEY_USR_READ, | ||
79 | KEY_ALLOC_NOT_IN_QUOTA); | ||
80 | if (IS_ERR(keyring)) { | ||
81 | ret = PTR_ERR(keyring); | ||
82 | goto failed_put_cred; | ||
83 | } | ||
84 | |||
85 | ret = key_instantiate_and_link(keyring, NULL, 0, NULL, NULL); | ||
86 | if (ret < 0) | ||
87 | goto failed_put_key; | ||
88 | |||
89 | ret = register_key_type(&key_type_id_resolver); | ||
90 | if (ret < 0) | ||
91 | goto failed_put_key; | ||
92 | |||
93 | cred->thread_keyring = keyring; | ||
94 | cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING; | ||
95 | id_resolver_cache = cred; | ||
96 | return 0; | ||
97 | |||
98 | failed_put_key: | ||
99 | key_put(keyring); | ||
100 | failed_put_cred: | ||
101 | put_cred(cred); | ||
102 | return ret; | ||
103 | } | ||
104 | |||
105 | void nfs_idmap_quit(void) | ||
106 | { | ||
107 | key_revoke(id_resolver_cache->thread_keyring); | ||
108 | unregister_key_type(&key_type_id_resolver); | ||
109 | put_cred(id_resolver_cache); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * Assemble the description to pass to request_key() | ||
114 | * This function will allocate a new string and update dest to point | ||
115 | * at it. The caller is responsible for freeing dest. | ||
116 | * | ||
117 | * On error 0 is returned. Otherwise, the length of dest is returned. | ||
118 | */ | ||
119 | static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen, | ||
120 | const char *type, size_t typelen, char **desc) | ||
121 | { | ||
122 | char *cp; | ||
123 | size_t desclen = typelen + namelen + 2; | ||
124 | |||
125 | *desc = kmalloc(desclen, GFP_KERNEL); | ||
126 | if (!desc) | ||
127 | return -ENOMEM; | ||
128 | |||
129 | cp = *desc; | ||
130 | memcpy(cp, type, typelen); | ||
131 | cp += typelen; | ||
132 | *cp++ = ':'; | ||
133 | |||
134 | memcpy(cp, name, namelen); | ||
135 | cp += namelen; | ||
136 | *cp = '\0'; | ||
137 | return desclen; | ||
138 | } | ||
139 | |||
140 | static ssize_t nfs_idmap_request_key(const char *name, size_t namelen, | ||
141 | const char *type, void *data, size_t data_size) | ||
142 | { | ||
143 | const struct cred *saved_cred; | ||
144 | struct key *rkey; | ||
145 | char *desc; | ||
146 | struct user_key_payload *payload; | ||
147 | ssize_t ret; | ||
148 | |||
149 | ret = nfs_idmap_get_desc(name, namelen, type, strlen(type), &desc); | ||
150 | if (ret <= 0) | ||
151 | goto out; | ||
152 | |||
153 | saved_cred = override_creds(id_resolver_cache); | ||
154 | rkey = request_key(&key_type_id_resolver, desc, ""); | ||
155 | revert_creds(saved_cred); | ||
156 | kfree(desc); | ||
157 | if (IS_ERR(rkey)) { | ||
158 | ret = PTR_ERR(rkey); | ||
159 | goto out; | ||
160 | } | ||
161 | |||
162 | rcu_read_lock(); | ||
163 | rkey->perm |= KEY_USR_VIEW; | ||
164 | |||
165 | ret = key_validate(rkey); | ||
166 | if (ret < 0) | ||
167 | goto out_up; | ||
168 | |||
169 | payload = rcu_dereference(rkey->payload.data); | ||
170 | if (IS_ERR_OR_NULL(payload)) { | ||
171 | ret = PTR_ERR(payload); | ||
172 | goto out_up; | ||
173 | } | ||
174 | |||
175 | ret = payload->datalen; | ||
176 | if (ret > 0 && ret <= data_size) | ||
177 | memcpy(data, payload->data, ret); | ||
178 | else | ||
179 | ret = -EINVAL; | ||
180 | |||
181 | out_up: | ||
182 | rcu_read_unlock(); | ||
183 | key_put(rkey); | ||
184 | out: | ||
185 | return ret; | ||
186 | } | ||
187 | |||
188 | |||
189 | /* ID -> Name */ | ||
190 | static ssize_t nfs_idmap_lookup_name(__u32 id, const char *type, char *buf, size_t buflen) | ||
191 | { | ||
192 | char id_str[NFS_UINT_MAXLEN]; | ||
193 | int id_len; | ||
194 | ssize_t ret; | ||
195 | |||
196 | id_len = snprintf(id_str, sizeof(id_str), "%u", id); | ||
197 | ret = nfs_idmap_request_key(id_str, id_len, type, buf, buflen); | ||
198 | if (ret < 0) | ||
199 | return -EINVAL; | ||
200 | return ret; | ||
201 | } | ||
202 | |||
203 | /* Name -> ID */ | ||
204 | static int nfs_idmap_lookup_id(const char *name, size_t namelen, | ||
205 | const char *type, __u32 *id) | ||
206 | { | ||
207 | char id_str[NFS_UINT_MAXLEN]; | ||
208 | long id_long; | ||
209 | ssize_t data_size; | ||
210 | int ret = 0; | ||
211 | |||
212 | data_size = nfs_idmap_request_key(name, namelen, type, id_str, NFS_UINT_MAXLEN); | ||
213 | if (data_size <= 0) { | ||
214 | ret = -EINVAL; | ||
215 | } else { | ||
216 | ret = strict_strtol(id_str, 10, &id_long); | ||
217 | *id = (__u32)id_long; | ||
218 | } | ||
219 | return ret; | ||
220 | } | ||
221 | |||
222 | int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) | ||
223 | { | ||
224 | return nfs_idmap_lookup_id(name, namelen, "uid", uid); | ||
225 | } | ||
226 | |||
227 | int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) | ||
228 | { | ||
229 | return nfs_idmap_lookup_id(name, namelen, "gid", gid); | ||
230 | } | ||
231 | |||
232 | int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) | ||
233 | { | ||
234 | return nfs_idmap_lookup_name(uid, "user", buf, buflen); | ||
235 | } | ||
236 | int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) | ||
237 | { | ||
238 | return nfs_idmap_lookup_name(gid, "group", buf, buflen); | ||
239 | } | ||
240 | |||
241 | #else /* CONFIG_NFS_USE_IDMAPPER not defined */ | ||
242 | |||
37 | #include <linux/module.h> | 243 | #include <linux/module.h> |
38 | #include <linux/mutex.h> | 244 | #include <linux/mutex.h> |
39 | #include <linux/init.h> | 245 | #include <linux/init.h> |
@@ -503,16 +709,17 @@ int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namele | |||
503 | return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); | 709 | return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); |
504 | } | 710 | } |
505 | 711 | ||
506 | int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf) | 712 | int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) |
507 | { | 713 | { |
508 | struct idmap *idmap = clp->cl_idmap; | 714 | struct idmap *idmap = clp->cl_idmap; |
509 | 715 | ||
510 | return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); | 716 | return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); |
511 | } | 717 | } |
512 | int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf) | 718 | int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) |
513 | { | 719 | { |
514 | struct idmap *idmap = clp->cl_idmap; | 720 | struct idmap *idmap = clp->cl_idmap; |
515 | 721 | ||
516 | return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); | 722 | return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); |
517 | } | 723 | } |
518 | 724 | ||
725 | #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 7d2d6c72aa78..314f57164602 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -48,6 +48,7 @@ | |||
48 | #include "internal.h" | 48 | #include "internal.h" |
49 | #include "fscache.h" | 49 | #include "fscache.h" |
50 | #include "dns_resolve.h" | 50 | #include "dns_resolve.h" |
51 | #include "pnfs.h" | ||
51 | 52 | ||
52 | #define NFSDBG_FACILITY NFSDBG_VFS | 53 | #define NFSDBG_FACILITY NFSDBG_VFS |
53 | 54 | ||
@@ -234,9 +235,6 @@ nfs_init_locked(struct inode *inode, void *opaque) | |||
234 | return 0; | 235 | return 0; |
235 | } | 236 | } |
236 | 237 | ||
237 | /* Don't use READDIRPLUS on directories that we believe are too large */ | ||
238 | #define NFS_LIMIT_READDIRPLUS (8*PAGE_SIZE) | ||
239 | |||
240 | /* | 238 | /* |
241 | * This is our front-end to iget that looks up inodes by file handle | 239 | * This is our front-end to iget that looks up inodes by file handle |
242 | * instead of inode number. | 240 | * instead of inode number. |
@@ -291,8 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
291 | } else if (S_ISDIR(inode->i_mode)) { | 289 | } else if (S_ISDIR(inode->i_mode)) { |
292 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; | 290 | inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops; |
293 | inode->i_fop = &nfs_dir_operations; | 291 | inode->i_fop = &nfs_dir_operations; |
294 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) | 292 | if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) |
295 | && fattr->size <= NFS_LIMIT_READDIRPLUS) | ||
296 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); | 293 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); |
297 | /* Deal with crossing mountpoints */ | 294 | /* Deal with crossing mountpoints */ |
298 | if ((fattr->valid & NFS_ATTR_FATTR_FSID) | 295 | if ((fattr->valid & NFS_ATTR_FATTR_FSID) |
@@ -623,7 +620,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync) | |||
623 | nfs_revalidate_inode(server, inode); | 620 | nfs_revalidate_inode(server, inode); |
624 | } | 621 | } |
625 | 622 | ||
626 | static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred) | 623 | struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cred *cred, fmode_t f_mode) |
627 | { | 624 | { |
628 | struct nfs_open_context *ctx; | 625 | struct nfs_open_context *ctx; |
629 | 626 | ||
@@ -633,11 +630,13 @@ static struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct | |||
633 | path_get(&ctx->path); | 630 | path_get(&ctx->path); |
634 | ctx->cred = get_rpccred(cred); | 631 | ctx->cred = get_rpccred(cred); |
635 | ctx->state = NULL; | 632 | ctx->state = NULL; |
633 | ctx->mode = f_mode; | ||
636 | ctx->flags = 0; | 634 | ctx->flags = 0; |
637 | ctx->error = 0; | 635 | ctx->error = 0; |
638 | ctx->dir_cookie = 0; | 636 | ctx->dir_cookie = 0; |
639 | nfs_init_lock_context(&ctx->lock_context); | 637 | nfs_init_lock_context(&ctx->lock_context); |
640 | ctx->lock_context.open_context = ctx; | 638 | ctx->lock_context.open_context = ctx; |
639 | INIT_LIST_HEAD(&ctx->list); | ||
641 | } | 640 | } |
642 | return ctx; | 641 | return ctx; |
643 | } | 642 | } |
@@ -653,11 +652,15 @@ static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync) | |||
653 | { | 652 | { |
654 | struct inode *inode = ctx->path.dentry->d_inode; | 653 | struct inode *inode = ctx->path.dentry->d_inode; |
655 | 654 | ||
656 | if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) | 655 | if (!list_empty(&ctx->list)) { |
656 | if (!atomic_dec_and_lock(&ctx->lock_context.count, &inode->i_lock)) | ||
657 | return; | ||
658 | list_del(&ctx->list); | ||
659 | spin_unlock(&inode->i_lock); | ||
660 | } else if (!atomic_dec_and_test(&ctx->lock_context.count)) | ||
657 | return; | 661 | return; |
658 | list_del(&ctx->list); | 662 | if (inode != NULL) |
659 | spin_unlock(&inode->i_lock); | 663 | NFS_PROTO(inode)->close_context(ctx, is_sync); |
660 | NFS_PROTO(inode)->close_context(ctx, is_sync); | ||
661 | if (ctx->cred != NULL) | 664 | if (ctx->cred != NULL) |
662 | put_rpccred(ctx->cred); | 665 | put_rpccred(ctx->cred); |
663 | path_put(&ctx->path); | 666 | path_put(&ctx->path); |
@@ -673,7 +676,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx) | |||
673 | * Ensure that mmap has a recent RPC credential for use when writing out | 676 | * Ensure that mmap has a recent RPC credential for use when writing out |
674 | * shared pages | 677 | * shared pages |
675 | */ | 678 | */ |
676 | static void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) | 679 | void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) |
677 | { | 680 | { |
678 | struct inode *inode = filp->f_path.dentry->d_inode; | 681 | struct inode *inode = filp->f_path.dentry->d_inode; |
679 | struct nfs_inode *nfsi = NFS_I(inode); | 682 | struct nfs_inode *nfsi = NFS_I(inode); |
@@ -730,11 +733,10 @@ int nfs_open(struct inode *inode, struct file *filp) | |||
730 | cred = rpc_lookup_cred(); | 733 | cred = rpc_lookup_cred(); |
731 | if (IS_ERR(cred)) | 734 | if (IS_ERR(cred)) |
732 | return PTR_ERR(cred); | 735 | return PTR_ERR(cred); |
733 | ctx = alloc_nfs_open_context(&filp->f_path, cred); | 736 | ctx = alloc_nfs_open_context(&filp->f_path, cred, filp->f_mode); |
734 | put_rpccred(cred); | 737 | put_rpccred(cred); |
735 | if (ctx == NULL) | 738 | if (ctx == NULL) |
736 | return -ENOMEM; | 739 | return -ENOMEM; |
737 | ctx->mode = filp->f_mode; | ||
738 | nfs_file_set_open_context(filp, ctx); | 740 | nfs_file_set_open_context(filp, ctx); |
739 | put_nfs_open_context(ctx); | 741 | put_nfs_open_context(ctx); |
740 | nfs_fscache_set_inode_cookie(inode, filp); | 742 | nfs_fscache_set_inode_cookie(inode, filp); |
@@ -1409,6 +1411,7 @@ void nfs4_evict_inode(struct inode *inode) | |||
1409 | { | 1411 | { |
1410 | truncate_inode_pages(&inode->i_data, 0); | 1412 | truncate_inode_pages(&inode->i_data, 0); |
1411 | end_writeback(inode); | 1413 | end_writeback(inode); |
1414 | pnfs_destroy_layout(NFS_I(inode)); | ||
1412 | /* If we are holding a delegation, return it! */ | 1415 | /* If we are holding a delegation, return it! */ |
1413 | nfs_inode_return_delegation_noreclaim(inode); | 1416 | nfs_inode_return_delegation_noreclaim(inode); |
1414 | /* First call standard NFS clear_inode() code */ | 1417 | /* First call standard NFS clear_inode() code */ |
@@ -1446,6 +1449,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi) | |||
1446 | nfsi->delegation = NULL; | 1449 | nfsi->delegation = NULL; |
1447 | nfsi->delegation_state = 0; | 1450 | nfsi->delegation_state = 0; |
1448 | init_rwsem(&nfsi->rwsem); | 1451 | init_rwsem(&nfsi->rwsem); |
1452 | nfsi->layout = NULL; | ||
1449 | #endif | 1453 | #endif |
1450 | } | 1454 | } |
1451 | 1455 | ||
@@ -1493,7 +1497,7 @@ static int nfsiod_start(void) | |||
1493 | { | 1497 | { |
1494 | struct workqueue_struct *wq; | 1498 | struct workqueue_struct *wq; |
1495 | dprintk("RPC: creating workqueue nfsiod\n"); | 1499 | dprintk("RPC: creating workqueue nfsiod\n"); |
1496 | wq = create_singlethread_workqueue("nfsiod"); | 1500 | wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); |
1497 | if (wq == NULL) | 1501 | if (wq == NULL) |
1498 | return -ENOMEM; | 1502 | return -ENOMEM; |
1499 | nfsiod_workqueue = wq; | 1503 | nfsiod_workqueue = wq; |
@@ -1521,6 +1525,10 @@ static int __init init_nfs_fs(void) | |||
1521 | { | 1525 | { |
1522 | int err; | 1526 | int err; |
1523 | 1527 | ||
1528 | err = nfs_idmap_init(); | ||
1529 | if (err < 0) | ||
1530 | goto out9; | ||
1531 | |||
1524 | err = nfs_dns_resolver_init(); | 1532 | err = nfs_dns_resolver_init(); |
1525 | if (err < 0) | 1533 | if (err < 0) |
1526 | goto out8; | 1534 | goto out8; |
@@ -1585,6 +1593,8 @@ out6: | |||
1585 | out7: | 1593 | out7: |
1586 | nfs_dns_resolver_destroy(); | 1594 | nfs_dns_resolver_destroy(); |
1587 | out8: | 1595 | out8: |
1596 | nfs_idmap_quit(); | ||
1597 | out9: | ||
1588 | return err; | 1598 | return err; |
1589 | } | 1599 | } |
1590 | 1600 | ||
@@ -1597,6 +1607,7 @@ static void __exit exit_nfs_fs(void) | |||
1597 | nfs_destroy_nfspagecache(); | 1607 | nfs_destroy_nfspagecache(); |
1598 | nfs_fscache_unregister(); | 1608 | nfs_fscache_unregister(); |
1599 | nfs_dns_resolver_destroy(); | 1609 | nfs_dns_resolver_destroy(); |
1610 | nfs_idmap_quit(); | ||
1600 | #ifdef CONFIG_PROC_FS | 1611 | #ifdef CONFIG_PROC_FS |
1601 | rpc_proc_unregister("nfs"); | 1612 | rpc_proc_unregister("nfs"); |
1602 | #endif | 1613 | #endif |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c961bc92c107..db08ff3ff454 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -63,6 +63,12 @@ struct nfs_clone_mount { | |||
63 | #define NFS_UNSPEC_PORT (-1) | 63 | #define NFS_UNSPEC_PORT (-1) |
64 | 64 | ||
65 | /* | 65 | /* |
66 | * Maximum number of pages that readdir can use for creating | ||
67 | * a vmapped array of pages. | ||
68 | */ | ||
69 | #define NFS_MAX_READDIR_PAGES 8 | ||
70 | |||
71 | /* | ||
66 | * In-kernel mount arguments | 72 | * In-kernel mount arguments |
67 | */ | 73 | */ |
68 | struct nfs_parsed_mount_data { | 74 | struct nfs_parsed_mount_data { |
@@ -181,15 +187,15 @@ extern void nfs_destroy_directcache(void); | |||
181 | /* nfs2xdr.c */ | 187 | /* nfs2xdr.c */ |
182 | extern int nfs_stat_to_errno(int); | 188 | extern int nfs_stat_to_errno(int); |
183 | extern struct rpc_procinfo nfs_procedures[]; | 189 | extern struct rpc_procinfo nfs_procedures[]; |
184 | extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int); | 190 | extern __be32 *nfs_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); |
185 | 191 | ||
186 | /* nfs3xdr.c */ | 192 | /* nfs3xdr.c */ |
187 | extern struct rpc_procinfo nfs3_procedures[]; | 193 | extern struct rpc_procinfo nfs3_procedures[]; |
188 | extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int); | 194 | extern __be32 *nfs3_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); |
189 | 195 | ||
190 | /* nfs4xdr.c */ | 196 | /* nfs4xdr.c */ |
191 | #ifdef CONFIG_NFS_V4 | 197 | #ifdef CONFIG_NFS_V4 |
192 | extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); | 198 | extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); |
193 | #endif | 199 | #endif |
194 | #ifdef CONFIG_NFS_V4_1 | 200 | #ifdef CONFIG_NFS_V4_1 |
195 | extern const u32 nfs41_maxread_overhead; | 201 | extern const u32 nfs41_maxread_overhead; |
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 59047f8d7d72..eceafe74f473 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
@@ -153,6 +153,7 @@ int nfs_mount(struct nfs_mount_request *info) | |||
153 | .rpc_resp = &result, | 153 | .rpc_resp = &result, |
154 | }; | 154 | }; |
155 | struct rpc_create_args args = { | 155 | struct rpc_create_args args = { |
156 | .net = &init_net, | ||
156 | .protocol = info->protocol, | 157 | .protocol = info->protocol, |
157 | .address = info->sap, | 158 | .address = info->sap, |
158 | .addrsize = info->salen, | 159 | .addrsize = info->salen, |
@@ -224,6 +225,7 @@ void nfs_umount(const struct nfs_mount_request *info) | |||
224 | .to_retries = 2, | 225 | .to_retries = 2, |
225 | }; | 226 | }; |
226 | struct rpc_create_args args = { | 227 | struct rpc_create_args args = { |
228 | .net = &init_net, | ||
227 | .protocol = IPPROTO_UDP, | 229 | .protocol = IPPROTO_UDP, |
228 | .address = info->sap, | 230 | .address = info->sap, |
229 | .addrsize = info->salen, | 231 | .addrsize = info->salen, |
@@ -436,7 +438,7 @@ static int decode_auth_flavors(struct xdr_stream *xdr, struct mountres *res) | |||
436 | 438 | ||
437 | for (i = 0; i < entries; i++) { | 439 | for (i = 0; i < entries; i++) { |
438 | flavors[i] = ntohl(*p++); | 440 | flavors[i] = ntohl(*p++); |
439 | dprintk("NFS:\tflavor %u: %d\n", i, flavors[i]); | 441 | dprintk("NFS: auth flavor[%u]: %d\n", i, flavors[i]); |
440 | } | 442 | } |
441 | *count = i; | 443 | *count = i; |
442 | 444 | ||
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index db8846a0e82e..e6bf45710cc7 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c | |||
@@ -337,10 +337,10 @@ nfs_xdr_createargs(struct rpc_rqst *req, __be32 *p, struct nfs_createargs *args) | |||
337 | static int | 337 | static int |
338 | nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) | 338 | nfs_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) |
339 | { | 339 | { |
340 | p = xdr_encode_fhandle(p, args->fromfh); | 340 | p = xdr_encode_fhandle(p, args->old_dir); |
341 | p = xdr_encode_array(p, args->fromname, args->fromlen); | 341 | p = xdr_encode_array(p, args->old_name->name, args->old_name->len); |
342 | p = xdr_encode_fhandle(p, args->tofh); | 342 | p = xdr_encode_fhandle(p, args->new_dir); |
343 | p = xdr_encode_array(p, args->toname, args->tolen); | 343 | p = xdr_encode_array(p, args->new_name->name, args->new_name->len); |
344 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); | 344 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); |
345 | return 0; | 345 | return 0; |
346 | } | 346 | } |
@@ -423,9 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
423 | struct page **page; | 423 | struct page **page; |
424 | size_t hdrlen; | 424 | size_t hdrlen; |
425 | unsigned int pglen, recvd; | 425 | unsigned int pglen, recvd; |
426 | u32 len; | ||
427 | int status, nr = 0; | 426 | int status, nr = 0; |
428 | __be32 *end, *entry, *kaddr; | ||
429 | 427 | ||
430 | if ((status = ntohl(*p++))) | 428 | if ((status = ntohl(*p++))) |
431 | return nfs_stat_to_errno(status); | 429 | return nfs_stat_to_errno(status); |
@@ -445,80 +443,59 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
445 | if (pglen > recvd) | 443 | if (pglen > recvd) |
446 | pglen = recvd; | 444 | pglen = recvd; |
447 | page = rcvbuf->pages; | 445 | page = rcvbuf->pages; |
448 | kaddr = p = kmap_atomic(*page, KM_USER0); | ||
449 | end = (__be32 *)((char *)p + pglen); | ||
450 | entry = p; | ||
451 | |||
452 | /* Make sure the packet actually has a value_follows and EOF entry */ | ||
453 | if ((entry + 1) > end) | ||
454 | goto short_pkt; | ||
455 | |||
456 | for (; *p++; nr++) { | ||
457 | if (p + 2 > end) | ||
458 | goto short_pkt; | ||
459 | p++; /* fileid */ | ||
460 | len = ntohl(*p++); | ||
461 | p += XDR_QUADLEN(len) + 1; /* name plus cookie */ | ||
462 | if (len > NFS2_MAXNAMLEN) { | ||
463 | dprintk("NFS: giant filename in readdir (len 0x%x)!\n", | ||
464 | len); | ||
465 | goto err_unmap; | ||
466 | } | ||
467 | if (p + 2 > end) | ||
468 | goto short_pkt; | ||
469 | entry = p; | ||
470 | } | ||
471 | |||
472 | /* | ||
473 | * Apparently some server sends responses that are a valid size, but | ||
474 | * contain no entries, and have value_follows==0 and EOF==0. For | ||
475 | * those, just set the EOF marker. | ||
476 | */ | ||
477 | if (!nr && entry[1] == 0) { | ||
478 | dprintk("NFS: readdir reply truncated!\n"); | ||
479 | entry[1] = 1; | ||
480 | } | ||
481 | out: | ||
482 | kunmap_atomic(kaddr, KM_USER0); | ||
483 | return nr; | 446 | return nr; |
484 | short_pkt: | 447 | } |
485 | /* | 448 | |
486 | * When we get a short packet there are 2 possibilities. We can | 449 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
487 | * return an error, or fix up the response to look like a valid | 450 | { |
488 | * response and return what we have so far. If there are no | 451 | dprintk("nfs: %s: prematurely hit end of receive buffer. " |
489 | * entries and the packet was short, then return -EIO. If there | 452 | "Remaining buffer length is %tu words.\n", |
490 | * are valid entries in the response, return them and pretend that | 453 | func, xdr->end - xdr->p); |
491 | * the call was successful, but incomplete. The caller can retry the | ||
492 | * readdir starting at the last cookie. | ||
493 | */ | ||
494 | entry[0] = entry[1] = 0; | ||
495 | if (!nr) | ||
496 | nr = -errno_NFSERR_IO; | ||
497 | goto out; | ||
498 | err_unmap: | ||
499 | nr = -errno_NFSERR_IO; | ||
500 | goto out; | ||
501 | } | 454 | } |
502 | 455 | ||
503 | __be32 * | 456 | __be32 * |
504 | nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | 457 | nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) |
505 | { | 458 | { |
506 | if (!*p++) { | 459 | __be32 *p; |
507 | if (!*p) | 460 | p = xdr_inline_decode(xdr, 4); |
461 | if (unlikely(!p)) | ||
462 | goto out_overflow; | ||
463 | if (!ntohl(*p++)) { | ||
464 | p = xdr_inline_decode(xdr, 4); | ||
465 | if (unlikely(!p)) | ||
466 | goto out_overflow; | ||
467 | if (!ntohl(*p++)) | ||
508 | return ERR_PTR(-EAGAIN); | 468 | return ERR_PTR(-EAGAIN); |
509 | entry->eof = 1; | 469 | entry->eof = 1; |
510 | return ERR_PTR(-EBADCOOKIE); | 470 | return ERR_PTR(-EBADCOOKIE); |
511 | } | 471 | } |
512 | 472 | ||
473 | p = xdr_inline_decode(xdr, 8); | ||
474 | if (unlikely(!p)) | ||
475 | goto out_overflow; | ||
476 | |||
513 | entry->ino = ntohl(*p++); | 477 | entry->ino = ntohl(*p++); |
514 | entry->len = ntohl(*p++); | 478 | entry->len = ntohl(*p++); |
479 | |||
480 | p = xdr_inline_decode(xdr, entry->len + 4); | ||
481 | if (unlikely(!p)) | ||
482 | goto out_overflow; | ||
515 | entry->name = (const char *) p; | 483 | entry->name = (const char *) p; |
516 | p += XDR_QUADLEN(entry->len); | 484 | p += XDR_QUADLEN(entry->len); |
517 | entry->prev_cookie = entry->cookie; | 485 | entry->prev_cookie = entry->cookie; |
518 | entry->cookie = ntohl(*p++); | 486 | entry->cookie = ntohl(*p++); |
519 | entry->eof = !p[0] && p[1]; | 487 | |
488 | p = xdr_inline_peek(xdr, 8); | ||
489 | if (p != NULL) | ||
490 | entry->eof = !p[0] && p[1]; | ||
491 | else | ||
492 | entry->eof = 0; | ||
520 | 493 | ||
521 | return p; | 494 | return p; |
495 | |||
496 | out_overflow: | ||
497 | print_overflow_msg(__func__, xdr); | ||
498 | return ERR_PTR(-EIO); | ||
522 | } | 499 | } |
523 | 500 | ||
524 | /* | 501 | /* |
@@ -596,7 +573,6 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
596 | struct kvec *iov = rcvbuf->head; | 573 | struct kvec *iov = rcvbuf->head; |
597 | size_t hdrlen; | 574 | size_t hdrlen; |
598 | u32 len, recvd; | 575 | u32 len, recvd; |
599 | char *kaddr; | ||
600 | int status; | 576 | int status; |
601 | 577 | ||
602 | if ((status = ntohl(*p++))) | 578 | if ((status = ntohl(*p++))) |
@@ -623,10 +599,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy) | |||
623 | return -EIO; | 599 | return -EIO; |
624 | } | 600 | } |
625 | 601 | ||
626 | /* NULL terminate the string we got */ | 602 | xdr_terminate_string(rcvbuf, len); |
627 | kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); | ||
628 | kaddr[len+rcvbuf->page_base] = '\0'; | ||
629 | kunmap_atomic(kaddr, KM_USER0); | ||
630 | return 0; | 603 | return 0; |
631 | } | 604 | } |
632 | 605 | ||
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index fabb4f2849a1..ce939c062a52 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -313,7 +313,7 @@ static void nfs3_free_createdata(struct nfs3_createdata *data) | |||
313 | */ | 313 | */ |
314 | static int | 314 | static int |
315 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 315 | nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
316 | int flags, struct nameidata *nd) | 316 | int flags, struct nfs_open_context *ctx) |
317 | { | 317 | { |
318 | struct nfs3_createdata *data; | 318 | struct nfs3_createdata *data; |
319 | mode_t mode = sattr->ia_mode; | 319 | mode_t mode = sattr->ia_mode; |
@@ -438,19 +438,38 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
438 | return 1; | 438 | return 1; |
439 | } | 439 | } |
440 | 440 | ||
441 | static void | ||
442 | nfs3_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | ||
443 | { | ||
444 | msg->rpc_proc = &nfs3_procedures[NFS3PROC_RENAME]; | ||
445 | } | ||
446 | |||
447 | static int | ||
448 | nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | ||
449 | struct inode *new_dir) | ||
450 | { | ||
451 | struct nfs_renameres *res; | ||
452 | |||
453 | if (nfs3_async_handle_jukebox(task, old_dir)) | ||
454 | return 0; | ||
455 | res = task->tk_msg.rpc_resp; | ||
456 | |||
457 | nfs_post_op_update_inode(old_dir, res->old_fattr); | ||
458 | nfs_post_op_update_inode(new_dir, res->new_fattr); | ||
459 | return 1; | ||
460 | } | ||
461 | |||
441 | static int | 462 | static int |
442 | nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, | 463 | nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, |
443 | struct inode *new_dir, struct qstr *new_name) | 464 | struct inode *new_dir, struct qstr *new_name) |
444 | { | 465 | { |
445 | struct nfs3_renameargs arg = { | 466 | struct nfs_renameargs arg = { |
446 | .fromfh = NFS_FH(old_dir), | 467 | .old_dir = NFS_FH(old_dir), |
447 | .fromname = old_name->name, | 468 | .old_name = old_name, |
448 | .fromlen = old_name->len, | 469 | .new_dir = NFS_FH(new_dir), |
449 | .tofh = NFS_FH(new_dir), | 470 | .new_name = new_name, |
450 | .toname = new_name->name, | ||
451 | .tolen = new_name->len | ||
452 | }; | 471 | }; |
453 | struct nfs3_renameres res; | 472 | struct nfs_renameres res; |
454 | struct rpc_message msg = { | 473 | struct rpc_message msg = { |
455 | .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], | 474 | .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], |
456 | .rpc_argp = &arg, | 475 | .rpc_argp = &arg, |
@@ -460,17 +479,17 @@ nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, | |||
460 | 479 | ||
461 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); | 480 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); |
462 | 481 | ||
463 | res.fromattr = nfs_alloc_fattr(); | 482 | res.old_fattr = nfs_alloc_fattr(); |
464 | res.toattr = nfs_alloc_fattr(); | 483 | res.new_fattr = nfs_alloc_fattr(); |
465 | if (res.fromattr == NULL || res.toattr == NULL) | 484 | if (res.old_fattr == NULL || res.new_fattr == NULL) |
466 | goto out; | 485 | goto out; |
467 | 486 | ||
468 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); | 487 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); |
469 | nfs_post_op_update_inode(old_dir, res.fromattr); | 488 | nfs_post_op_update_inode(old_dir, res.old_fattr); |
470 | nfs_post_op_update_inode(new_dir, res.toattr); | 489 | nfs_post_op_update_inode(new_dir, res.new_fattr); |
471 | out: | 490 | out: |
472 | nfs_free_fattr(res.toattr); | 491 | nfs_free_fattr(res.old_fattr); |
473 | nfs_free_fattr(res.fromattr); | 492 | nfs_free_fattr(res.new_fattr); |
474 | dprintk("NFS reply rename: %d\n", status); | 493 | dprintk("NFS reply rename: %d\n", status); |
475 | return status; | 494 | return status; |
476 | } | 495 | } |
@@ -611,7 +630,7 @@ out: | |||
611 | */ | 630 | */ |
612 | static int | 631 | static int |
613 | nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | 632 | nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, |
614 | u64 cookie, struct page *page, unsigned int count, int plus) | 633 | u64 cookie, struct page **pages, unsigned int count, int plus) |
615 | { | 634 | { |
616 | struct inode *dir = dentry->d_inode; | 635 | struct inode *dir = dentry->d_inode; |
617 | __be32 *verf = NFS_COOKIEVERF(dir); | 636 | __be32 *verf = NFS_COOKIEVERF(dir); |
@@ -621,7 +640,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
621 | .verf = {verf[0], verf[1]}, | 640 | .verf = {verf[0], verf[1]}, |
622 | .plus = plus, | 641 | .plus = plus, |
623 | .count = count, | 642 | .count = count, |
624 | .pages = &page | 643 | .pages = pages |
625 | }; | 644 | }; |
626 | struct nfs3_readdirres res = { | 645 | struct nfs3_readdirres res = { |
627 | .verf = verf, | 646 | .verf = verf, |
@@ -652,7 +671,8 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
652 | 671 | ||
653 | nfs_free_fattr(res.dir_attr); | 672 | nfs_free_fattr(res.dir_attr); |
654 | out: | 673 | out: |
655 | dprintk("NFS reply readdir: %d\n", status); | 674 | dprintk("NFS reply readdir%s: %d\n", |
675 | plus? "plus" : "", status); | ||
656 | return status; | 676 | return status; |
657 | } | 677 | } |
658 | 678 | ||
@@ -722,7 +742,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
722 | dprintk("NFS call fsstat\n"); | 742 | dprintk("NFS call fsstat\n"); |
723 | nfs_fattr_init(stat->fattr); | 743 | nfs_fattr_init(stat->fattr); |
724 | status = rpc_call_sync(server->client, &msg, 0); | 744 | status = rpc_call_sync(server->client, &msg, 0); |
725 | dprintk("NFS reply statfs: %d\n", status); | 745 | dprintk("NFS reply fsstat: %d\n", status); |
726 | return status; | 746 | return status; |
727 | } | 747 | } |
728 | 748 | ||
@@ -844,6 +864,8 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
844 | .unlink_setup = nfs3_proc_unlink_setup, | 864 | .unlink_setup = nfs3_proc_unlink_setup, |
845 | .unlink_done = nfs3_proc_unlink_done, | 865 | .unlink_done = nfs3_proc_unlink_done, |
846 | .rename = nfs3_proc_rename, | 866 | .rename = nfs3_proc_rename, |
867 | .rename_setup = nfs3_proc_rename_setup, | ||
868 | .rename_done = nfs3_proc_rename_done, | ||
847 | .link = nfs3_proc_link, | 869 | .link = nfs3_proc_link, |
848 | .symlink = nfs3_proc_symlink, | 870 | .symlink = nfs3_proc_symlink, |
849 | .mkdir = nfs3_proc_mkdir, | 871 | .mkdir = nfs3_proc_mkdir, |
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 9769704f8ce6..d9a5e832c257 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c | |||
@@ -100,6 +100,13 @@ static const umode_t nfs_type2fmt[] = { | |||
100 | [NF3FIFO] = S_IFIFO, | 100 | [NF3FIFO] = S_IFIFO, |
101 | }; | 101 | }; |
102 | 102 | ||
103 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | ||
104 | { | ||
105 | dprintk("nfs: %s: prematurely hit end of receive buffer. " | ||
106 | "Remaining buffer length is %tu words.\n", | ||
107 | func, xdr->end - xdr->p); | ||
108 | } | ||
109 | |||
103 | /* | 110 | /* |
104 | * Common NFS XDR functions as inlines | 111 | * Common NFS XDR functions as inlines |
105 | */ | 112 | */ |
@@ -119,6 +126,29 @@ xdr_decode_fhandle(__be32 *p, struct nfs_fh *fh) | |||
119 | return NULL; | 126 | return NULL; |
120 | } | 127 | } |
121 | 128 | ||
129 | static inline __be32 * | ||
130 | xdr_decode_fhandle_stream(struct xdr_stream *xdr, struct nfs_fh *fh) | ||
131 | { | ||
132 | __be32 *p; | ||
133 | p = xdr_inline_decode(xdr, 4); | ||
134 | if (unlikely(!p)) | ||
135 | goto out_overflow; | ||
136 | fh->size = ntohl(*p++); | ||
137 | |||
138 | if (fh->size <= NFS3_FHSIZE) { | ||
139 | p = xdr_inline_decode(xdr, fh->size); | ||
140 | if (unlikely(!p)) | ||
141 | goto out_overflow; | ||
142 | memcpy(fh->data, p, fh->size); | ||
143 | return p + XDR_QUADLEN(fh->size); | ||
144 | } | ||
145 | return NULL; | ||
146 | |||
147 | out_overflow: | ||
148 | print_overflow_msg(__func__, xdr); | ||
149 | return ERR_PTR(-EIO); | ||
150 | } | ||
151 | |||
122 | /* | 152 | /* |
123 | * Encode/decode time. | 153 | * Encode/decode time. |
124 | */ | 154 | */ |
@@ -241,6 +271,26 @@ xdr_decode_post_op_attr(__be32 *p, struct nfs_fattr *fattr) | |||
241 | } | 271 | } |
242 | 272 | ||
243 | static inline __be32 * | 273 | static inline __be32 * |
274 | xdr_decode_post_op_attr_stream(struct xdr_stream *xdr, struct nfs_fattr *fattr) | ||
275 | { | ||
276 | __be32 *p; | ||
277 | |||
278 | p = xdr_inline_decode(xdr, 4); | ||
279 | if (unlikely(!p)) | ||
280 | goto out_overflow; | ||
281 | if (ntohl(*p++)) { | ||
282 | p = xdr_inline_decode(xdr, 84); | ||
283 | if (unlikely(!p)) | ||
284 | goto out_overflow; | ||
285 | p = xdr_decode_fattr(p, fattr); | ||
286 | } | ||
287 | return p; | ||
288 | out_overflow: | ||
289 | print_overflow_msg(__func__, xdr); | ||
290 | return ERR_PTR(-EIO); | ||
291 | } | ||
292 | |||
293 | static inline __be32 * | ||
244 | xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) | 294 | xdr_decode_pre_op_attr(__be32 *p, struct nfs_fattr *fattr) |
245 | { | 295 | { |
246 | if (*p++) | 296 | if (*p++) |
@@ -442,12 +492,12 @@ nfs3_xdr_mknodargs(struct rpc_rqst *req, __be32 *p, struct nfs3_mknodargs *args) | |||
442 | * Encode RENAME arguments | 492 | * Encode RENAME arguments |
443 | */ | 493 | */ |
444 | static int | 494 | static int |
445 | nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs3_renameargs *args) | 495 | nfs3_xdr_renameargs(struct rpc_rqst *req, __be32 *p, struct nfs_renameargs *args) |
446 | { | 496 | { |
447 | p = xdr_encode_fhandle(p, args->fromfh); | 497 | p = xdr_encode_fhandle(p, args->old_dir); |
448 | p = xdr_encode_array(p, args->fromname, args->fromlen); | 498 | p = xdr_encode_array(p, args->old_name->name, args->old_name->len); |
449 | p = xdr_encode_fhandle(p, args->tofh); | 499 | p = xdr_encode_fhandle(p, args->new_dir); |
450 | p = xdr_encode_array(p, args->toname, args->tolen); | 500 | p = xdr_encode_array(p, args->new_name->name, args->new_name->len); |
451 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); | 501 | req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); |
452 | return 0; | 502 | return 0; |
453 | } | 503 | } |
@@ -504,9 +554,8 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
504 | struct kvec *iov = rcvbuf->head; | 554 | struct kvec *iov = rcvbuf->head; |
505 | struct page **page; | 555 | struct page **page; |
506 | size_t hdrlen; | 556 | size_t hdrlen; |
507 | u32 len, recvd, pglen; | 557 | u32 recvd, pglen; |
508 | int status, nr = 0; | 558 | int status, nr = 0; |
509 | __be32 *entry, *end, *kaddr; | ||
510 | 559 | ||
511 | status = ntohl(*p++); | 560 | status = ntohl(*p++); |
512 | /* Decode post_op_attrs */ | 561 | /* Decode post_op_attrs */ |
@@ -536,99 +585,38 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res | |||
536 | if (pglen > recvd) | 585 | if (pglen > recvd) |
537 | pglen = recvd; | 586 | pglen = recvd; |
538 | page = rcvbuf->pages; | 587 | page = rcvbuf->pages; |
539 | kaddr = p = kmap_atomic(*page, KM_USER0); | ||
540 | end = (__be32 *)((char *)p + pglen); | ||
541 | entry = p; | ||
542 | |||
543 | /* Make sure the packet actually has a value_follows and EOF entry */ | ||
544 | if ((entry + 1) > end) | ||
545 | goto short_pkt; | ||
546 | |||
547 | for (; *p++; nr++) { | ||
548 | if (p + 3 > end) | ||
549 | goto short_pkt; | ||
550 | p += 2; /* inode # */ | ||
551 | len = ntohl(*p++); /* string length */ | ||
552 | p += XDR_QUADLEN(len) + 2; /* name + cookie */ | ||
553 | if (len > NFS3_MAXNAMLEN) { | ||
554 | dprintk("NFS: giant filename in readdir (len 0x%x)!\n", | ||
555 | len); | ||
556 | goto err_unmap; | ||
557 | } | ||
558 | 588 | ||
559 | if (res->plus) { | ||
560 | /* post_op_attr */ | ||
561 | if (p + 2 > end) | ||
562 | goto short_pkt; | ||
563 | if (*p++) { | ||
564 | p += 21; | ||
565 | if (p + 1 > end) | ||
566 | goto short_pkt; | ||
567 | } | ||
568 | /* post_op_fh3 */ | ||
569 | if (*p++) { | ||
570 | if (p + 1 > end) | ||
571 | goto short_pkt; | ||
572 | len = ntohl(*p++); | ||
573 | if (len > NFS3_FHSIZE) { | ||
574 | dprintk("NFS: giant filehandle in " | ||
575 | "readdir (len 0x%x)!\n", len); | ||
576 | goto err_unmap; | ||
577 | } | ||
578 | p += XDR_QUADLEN(len); | ||
579 | } | ||
580 | } | ||
581 | |||
582 | if (p + 2 > end) | ||
583 | goto short_pkt; | ||
584 | entry = p; | ||
585 | } | ||
586 | |||
587 | /* | ||
588 | * Apparently some server sends responses that are a valid size, but | ||
589 | * contain no entries, and have value_follows==0 and EOF==0. For | ||
590 | * those, just set the EOF marker. | ||
591 | */ | ||
592 | if (!nr && entry[1] == 0) { | ||
593 | dprintk("NFS: readdir reply truncated!\n"); | ||
594 | entry[1] = 1; | ||
595 | } | ||
596 | out: | ||
597 | kunmap_atomic(kaddr, KM_USER0); | ||
598 | return nr; | 589 | return nr; |
599 | short_pkt: | ||
600 | /* | ||
601 | * When we get a short packet there are 2 possibilities. We can | ||
602 | * return an error, or fix up the response to look like a valid | ||
603 | * response and return what we have so far. If there are no | ||
604 | * entries and the packet was short, then return -EIO. If there | ||
605 | * are valid entries in the response, return them and pretend that | ||
606 | * the call was successful, but incomplete. The caller can retry the | ||
607 | * readdir starting at the last cookie. | ||
608 | */ | ||
609 | entry[0] = entry[1] = 0; | ||
610 | if (!nr) | ||
611 | nr = -errno_NFSERR_IO; | ||
612 | goto out; | ||
613 | err_unmap: | ||
614 | nr = -errno_NFSERR_IO; | ||
615 | goto out; | ||
616 | } | 590 | } |
617 | 591 | ||
618 | __be32 * | 592 | __be32 * |
619 | nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | 593 | nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_server *server, int plus) |
620 | { | 594 | { |
595 | __be32 *p; | ||
621 | struct nfs_entry old = *entry; | 596 | struct nfs_entry old = *entry; |
622 | 597 | ||
623 | if (!*p++) { | 598 | p = xdr_inline_decode(xdr, 4); |
624 | if (!*p) | 599 | if (unlikely(!p)) |
600 | goto out_overflow; | ||
601 | if (!ntohl(*p++)) { | ||
602 | p = xdr_inline_decode(xdr, 4); | ||
603 | if (unlikely(!p)) | ||
604 | goto out_overflow; | ||
605 | if (!ntohl(*p++)) | ||
625 | return ERR_PTR(-EAGAIN); | 606 | return ERR_PTR(-EAGAIN); |
626 | entry->eof = 1; | 607 | entry->eof = 1; |
627 | return ERR_PTR(-EBADCOOKIE); | 608 | return ERR_PTR(-EBADCOOKIE); |
628 | } | 609 | } |
629 | 610 | ||
611 | p = xdr_inline_decode(xdr, 12); | ||
612 | if (unlikely(!p)) | ||
613 | goto out_overflow; | ||
630 | p = xdr_decode_hyper(p, &entry->ino); | 614 | p = xdr_decode_hyper(p, &entry->ino); |
631 | entry->len = ntohl(*p++); | 615 | entry->len = ntohl(*p++); |
616 | |||
617 | p = xdr_inline_decode(xdr, entry->len + 8); | ||
618 | if (unlikely(!p)) | ||
619 | goto out_overflow; | ||
632 | entry->name = (const char *) p; | 620 | entry->name = (const char *) p; |
633 | p += XDR_QUADLEN(entry->len); | 621 | p += XDR_QUADLEN(entry->len); |
634 | entry->prev_cookie = entry->cookie; | 622 | entry->prev_cookie = entry->cookie; |
@@ -636,10 +624,17 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | |||
636 | 624 | ||
637 | if (plus) { | 625 | if (plus) { |
638 | entry->fattr->valid = 0; | 626 | entry->fattr->valid = 0; |
639 | p = xdr_decode_post_op_attr(p, entry->fattr); | 627 | p = xdr_decode_post_op_attr_stream(xdr, entry->fattr); |
628 | if (IS_ERR(p)) | ||
629 | goto out_overflow_exit; | ||
640 | /* In fact, a post_op_fh3: */ | 630 | /* In fact, a post_op_fh3: */ |
631 | p = xdr_inline_decode(xdr, 4); | ||
632 | if (unlikely(!p)) | ||
633 | goto out_overflow; | ||
641 | if (*p++) { | 634 | if (*p++) { |
642 | p = xdr_decode_fhandle(p, entry->fh); | 635 | p = xdr_decode_fhandle_stream(xdr, entry->fh); |
636 | if (IS_ERR(p)) | ||
637 | goto out_overflow_exit; | ||
643 | /* Ugh -- server reply was truncated */ | 638 | /* Ugh -- server reply was truncated */ |
644 | if (p == NULL) { | 639 | if (p == NULL) { |
645 | dprintk("NFS: FH truncated\n"); | 640 | dprintk("NFS: FH truncated\n"); |
@@ -650,8 +645,18 @@ nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | |||
650 | memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); | 645 | memset((u8*)(entry->fh), 0, sizeof(*entry->fh)); |
651 | } | 646 | } |
652 | 647 | ||
653 | entry->eof = !p[0] && p[1]; | 648 | p = xdr_inline_peek(xdr, 8); |
649 | if (p != NULL) | ||
650 | entry->eof = !p[0] && p[1]; | ||
651 | else | ||
652 | entry->eof = 0; | ||
653 | |||
654 | return p; | 654 | return p; |
655 | |||
656 | out_overflow: | ||
657 | print_overflow_msg(__func__, xdr); | ||
658 | out_overflow_exit: | ||
659 | return ERR_PTR(-EIO); | ||
655 | } | 660 | } |
656 | 661 | ||
657 | /* | 662 | /* |
@@ -824,7 +829,6 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
824 | struct kvec *iov = rcvbuf->head; | 829 | struct kvec *iov = rcvbuf->head; |
825 | size_t hdrlen; | 830 | size_t hdrlen; |
826 | u32 len, recvd; | 831 | u32 len, recvd; |
827 | char *kaddr; | ||
828 | int status; | 832 | int status; |
829 | 833 | ||
830 | status = ntohl(*p++); | 834 | status = ntohl(*p++); |
@@ -857,10 +861,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr) | |||
857 | return -EIO; | 861 | return -EIO; |
858 | } | 862 | } |
859 | 863 | ||
860 | /* NULL terminate the string we got */ | 864 | xdr_terminate_string(rcvbuf, len); |
861 | kaddr = (char*)kmap_atomic(rcvbuf->pages[0], KM_USER0); | ||
862 | kaddr[len+rcvbuf->page_base] = '\0'; | ||
863 | kunmap_atomic(kaddr, KM_USER0); | ||
864 | return 0; | 865 | return 0; |
865 | } | 866 | } |
866 | 867 | ||
@@ -970,14 +971,14 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res) | |||
970 | * Decode RENAME reply | 971 | * Decode RENAME reply |
971 | */ | 972 | */ |
972 | static int | 973 | static int |
973 | nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res) | 974 | nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs_renameres *res) |
974 | { | 975 | { |
975 | int status; | 976 | int status; |
976 | 977 | ||
977 | if ((status = ntohl(*p++)) != 0) | 978 | if ((status = ntohl(*p++)) != 0) |
978 | status = nfs_stat_to_errno(status); | 979 | status = nfs_stat_to_errno(status); |
979 | p = xdr_decode_wcc_data(p, res->fromattr); | 980 | p = xdr_decode_wcc_data(p, res->old_fattr); |
980 | p = xdr_decode_wcc_data(p, res->toattr); | 981 | p = xdr_decode_wcc_data(p, res->new_fattr); |
981 | return status; | 982 | return status; |
982 | } | 983 | } |
983 | 984 | ||
@@ -1043,8 +1044,9 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res) | |||
1043 | res->wtmult = ntohl(*p++); | 1044 | res->wtmult = ntohl(*p++); |
1044 | res->dtpref = ntohl(*p++); | 1045 | res->dtpref = ntohl(*p++); |
1045 | p = xdr_decode_hyper(p, &res->maxfilesize); | 1046 | p = xdr_decode_hyper(p, &res->maxfilesize); |
1047 | p = xdr_decode_time3(p, &res->time_delta); | ||
1046 | 1048 | ||
1047 | /* ignore time_delta and properties */ | 1049 | /* ignore properties */ |
1048 | res->lease_time = 0; | 1050 | res->lease_time = 0; |
1049 | return 0; | 1051 | return 0; |
1050 | } | 1052 | } |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 311e15cc8af0..9fa496387fdf 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -242,8 +242,6 @@ extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *); | |||
242 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); | 242 | extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); |
243 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); | 243 | extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); |
244 | extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); | 244 | extern int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, int wait); |
245 | extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); | ||
246 | extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); | ||
247 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); | 245 | extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); |
248 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, | 246 | extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, |
249 | struct nfs4_fs_locations *fs_locations, struct page *page); | 247 | struct nfs4_fs_locations *fs_locations, struct page *page); |
@@ -333,7 +331,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid); | |||
333 | extern const nfs4_stateid zero_stateid; | 331 | extern const nfs4_stateid zero_stateid; |
334 | 332 | ||
335 | /* nfs4xdr.c */ | 333 | /* nfs4xdr.c */ |
336 | extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus); | 334 | extern __be32 *nfs4_decode_dirent(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int); |
337 | extern struct rpc_procinfo nfs4_procedures[]; | 335 | extern struct rpc_procinfo nfs4_procedures[]; |
338 | 336 | ||
339 | struct nfs4_mount_data; | 337 | struct nfs4_mount_data; |
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c new file mode 100644 index 000000000000..2e92f0d8d654 --- /dev/null +++ b/fs/nfs/nfs4filelayout.c | |||
@@ -0,0 +1,280 @@ | |||
1 | /* | ||
2 | * Module for the pnfs nfs4 file layout driver. | ||
3 | * Defines all I/O and Policy interface operations, plus code | ||
4 | * to register itself with the pNFS client. | ||
5 | * | ||
6 | * Copyright (c) 2002 | ||
7 | * The Regents of the University of Michigan | ||
8 | * All Rights Reserved | ||
9 | * | ||
10 | * Dean Hildebrand <dhildebz@umich.edu> | ||
11 | * | ||
12 | * Permission is granted to use, copy, create derivative works, and | ||
13 | * redistribute this software and such derivative works for any purpose, | ||
14 | * so long as the name of the University of Michigan is not used in | ||
15 | * any advertising or publicity pertaining to the use or distribution | ||
16 | * of this software without specific, written prior authorization. If | ||
17 | * the above copyright notice or any other identification of the | ||
18 | * University of Michigan is included in any copy of any portion of | ||
19 | * this software, then the disclaimer below must also be included. | ||
20 | * | ||
21 | * This software is provided as is, without representation or warranty | ||
22 | * of any kind either express or implied, including without limitation | ||
23 | * the implied warranties of merchantability, fitness for a particular | ||
24 | * purpose, or noninfringement. The Regents of the University of | ||
25 | * Michigan shall not be liable for any damages, including special, | ||
26 | * indirect, incidental, or consequential damages, with respect to any | ||
27 | * claim arising out of or in connection with the use of the software, | ||
28 | * even if it has been or is hereafter advised of the possibility of | ||
29 | * such damages. | ||
30 | */ | ||
31 | |||
32 | #include <linux/nfs_fs.h> | ||
33 | |||
34 | #include "internal.h" | ||
35 | #include "nfs4filelayout.h" | ||
36 | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
38 | |||
39 | MODULE_LICENSE("GPL"); | ||
40 | MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); | ||
41 | MODULE_DESCRIPTION("The NFSv4 file layout driver"); | ||
42 | |||
43 | static int | ||
44 | filelayout_set_layoutdriver(struct nfs_server *nfss) | ||
45 | { | ||
46 | int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, | ||
47 | nfs4_fl_free_deviceid_callback); | ||
48 | if (status) { | ||
49 | printk(KERN_WARNING "%s: deviceid cache could not be " | ||
50 | "initialized\n", __func__); | ||
51 | return status; | ||
52 | } | ||
53 | dprintk("%s: deviceid cache has been initialized successfully\n", | ||
54 | __func__); | ||
55 | return 0; | ||
56 | } | ||
57 | |||
58 | /* Clear out the layout by destroying its device list */ | ||
59 | static int | ||
60 | filelayout_clear_layoutdriver(struct nfs_server *nfss) | ||
61 | { | ||
62 | dprintk("--> %s\n", __func__); | ||
63 | |||
64 | if (nfss->nfs_client->cl_devid_cache) | ||
65 | pnfs_put_deviceid_cache(nfss->nfs_client); | ||
66 | return 0; | ||
67 | } | ||
68 | |||
69 | /* | ||
70 | * filelayout_check_layout() | ||
71 | * | ||
72 | * Make sure layout segment parameters are sane WRT the device. | ||
73 | * At this point no generic layer initialization of the lseg has occurred, | ||
74 | * and nothing has been added to the layout_hdr cache. | ||
75 | * | ||
76 | */ | ||
77 | static int | ||
78 | filelayout_check_layout(struct pnfs_layout_hdr *lo, | ||
79 | struct nfs4_filelayout_segment *fl, | ||
80 | struct nfs4_layoutget_res *lgr, | ||
81 | struct nfs4_deviceid *id) | ||
82 | { | ||
83 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
84 | int status = -EINVAL; | ||
85 | struct nfs_server *nfss = NFS_SERVER(lo->inode); | ||
86 | |||
87 | dprintk("--> %s\n", __func__); | ||
88 | |||
89 | if (fl->pattern_offset > lgr->range.offset) { | ||
90 | dprintk("%s pattern_offset %lld to large\n", | ||
91 | __func__, fl->pattern_offset); | ||
92 | goto out; | ||
93 | } | ||
94 | |||
95 | if (fl->stripe_unit % PAGE_SIZE) { | ||
96 | dprintk("%s Stripe unit (%u) not page aligned\n", | ||
97 | __func__, fl->stripe_unit); | ||
98 | goto out; | ||
99 | } | ||
100 | |||
101 | /* find and reference the deviceid */ | ||
102 | dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); | ||
103 | if (dsaddr == NULL) { | ||
104 | dsaddr = get_device_info(lo->inode, id); | ||
105 | if (dsaddr == NULL) | ||
106 | goto out; | ||
107 | } | ||
108 | fl->dsaddr = dsaddr; | ||
109 | |||
110 | if (fl->first_stripe_index < 0 || | ||
111 | fl->first_stripe_index >= dsaddr->stripe_count) { | ||
112 | dprintk("%s Bad first_stripe_index %d\n", | ||
113 | __func__, fl->first_stripe_index); | ||
114 | goto out_put; | ||
115 | } | ||
116 | |||
117 | if ((fl->stripe_type == STRIPE_SPARSE && | ||
118 | fl->num_fh > 1 && fl->num_fh != dsaddr->ds_num) || | ||
119 | (fl->stripe_type == STRIPE_DENSE && | ||
120 | fl->num_fh != dsaddr->stripe_count)) { | ||
121 | dprintk("%s num_fh %u not valid for given packing\n", | ||
122 | __func__, fl->num_fh); | ||
123 | goto out_put; | ||
124 | } | ||
125 | |||
126 | if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) { | ||
127 | dprintk("%s Stripe unit (%u) not aligned with rsize %u " | ||
128 | "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize, | ||
129 | nfss->wsize); | ||
130 | } | ||
131 | |||
132 | status = 0; | ||
133 | out: | ||
134 | dprintk("--> %s returns %d\n", __func__, status); | ||
135 | return status; | ||
136 | out_put: | ||
137 | pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); | ||
138 | goto out; | ||
139 | } | ||
140 | |||
141 | static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl) | ||
142 | { | ||
143 | int i; | ||
144 | |||
145 | for (i = 0; i < fl->num_fh; i++) { | ||
146 | if (!fl->fh_array[i]) | ||
147 | break; | ||
148 | kfree(fl->fh_array[i]); | ||
149 | } | ||
150 | kfree(fl->fh_array); | ||
151 | fl->fh_array = NULL; | ||
152 | } | ||
153 | |||
154 | static void | ||
155 | _filelayout_free_lseg(struct nfs4_filelayout_segment *fl) | ||
156 | { | ||
157 | filelayout_free_fh_array(fl); | ||
158 | kfree(fl); | ||
159 | } | ||
160 | |||
161 | static int | ||
162 | filelayout_decode_layout(struct pnfs_layout_hdr *flo, | ||
163 | struct nfs4_filelayout_segment *fl, | ||
164 | struct nfs4_layoutget_res *lgr, | ||
165 | struct nfs4_deviceid *id) | ||
166 | { | ||
167 | uint32_t *p = (uint32_t *)lgr->layout.buf; | ||
168 | uint32_t nfl_util; | ||
169 | int i; | ||
170 | |||
171 | dprintk("%s: set_layout_map Begin\n", __func__); | ||
172 | |||
173 | memcpy(id, p, sizeof(*id)); | ||
174 | p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); | ||
175 | print_deviceid(id); | ||
176 | |||
177 | nfl_util = be32_to_cpup(p++); | ||
178 | if (nfl_util & NFL4_UFLG_COMMIT_THRU_MDS) | ||
179 | fl->commit_through_mds = 1; | ||
180 | if (nfl_util & NFL4_UFLG_DENSE) | ||
181 | fl->stripe_type = STRIPE_DENSE; | ||
182 | else | ||
183 | fl->stripe_type = STRIPE_SPARSE; | ||
184 | fl->stripe_unit = nfl_util & ~NFL4_UFLG_MASK; | ||
185 | |||
186 | fl->first_stripe_index = be32_to_cpup(p++); | ||
187 | p = xdr_decode_hyper(p, &fl->pattern_offset); | ||
188 | fl->num_fh = be32_to_cpup(p++); | ||
189 | |||
190 | dprintk("%s: nfl_util 0x%X num_fh %u fsi %u po %llu\n", | ||
191 | __func__, nfl_util, fl->num_fh, fl->first_stripe_index, | ||
192 | fl->pattern_offset); | ||
193 | |||
194 | fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), | ||
195 | GFP_KERNEL); | ||
196 | if (!fl->fh_array) | ||
197 | return -ENOMEM; | ||
198 | |||
199 | for (i = 0; i < fl->num_fh; i++) { | ||
200 | /* Do we want to use a mempool here? */ | ||
201 | fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); | ||
202 | if (!fl->fh_array[i]) { | ||
203 | filelayout_free_fh_array(fl); | ||
204 | return -ENOMEM; | ||
205 | } | ||
206 | fl->fh_array[i]->size = be32_to_cpup(p++); | ||
207 | if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { | ||
208 | printk(KERN_ERR "Too big fh %d received %d\n", | ||
209 | i, fl->fh_array[i]->size); | ||
210 | filelayout_free_fh_array(fl); | ||
211 | return -EIO; | ||
212 | } | ||
213 | memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); | ||
214 | p += XDR_QUADLEN(fl->fh_array[i]->size); | ||
215 | dprintk("DEBUG: %s: fh len %d\n", __func__, | ||
216 | fl->fh_array[i]->size); | ||
217 | } | ||
218 | |||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | static struct pnfs_layout_segment * | ||
223 | filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, | ||
224 | struct nfs4_layoutget_res *lgr) | ||
225 | { | ||
226 | struct nfs4_filelayout_segment *fl; | ||
227 | int rc; | ||
228 | struct nfs4_deviceid id; | ||
229 | |||
230 | dprintk("--> %s\n", __func__); | ||
231 | fl = kzalloc(sizeof(*fl), GFP_KERNEL); | ||
232 | if (!fl) | ||
233 | return NULL; | ||
234 | |||
235 | rc = filelayout_decode_layout(layoutid, fl, lgr, &id); | ||
236 | if (rc != 0 || filelayout_check_layout(layoutid, fl, lgr, &id)) { | ||
237 | _filelayout_free_lseg(fl); | ||
238 | return NULL; | ||
239 | } | ||
240 | return &fl->generic_hdr; | ||
241 | } | ||
242 | |||
243 | static void | ||
244 | filelayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
245 | { | ||
246 | struct nfs_server *nfss = NFS_SERVER(lseg->layout->inode); | ||
247 | struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); | ||
248 | |||
249 | dprintk("--> %s\n", __func__); | ||
250 | pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, | ||
251 | &fl->dsaddr->deviceid); | ||
252 | _filelayout_free_lseg(fl); | ||
253 | } | ||
254 | |||
255 | static struct pnfs_layoutdriver_type filelayout_type = { | ||
256 | .id = LAYOUT_NFSV4_1_FILES, | ||
257 | .name = "LAYOUT_NFSV4_1_FILES", | ||
258 | .owner = THIS_MODULE, | ||
259 | .set_layoutdriver = filelayout_set_layoutdriver, | ||
260 | .clear_layoutdriver = filelayout_clear_layoutdriver, | ||
261 | .alloc_lseg = filelayout_alloc_lseg, | ||
262 | .free_lseg = filelayout_free_lseg, | ||
263 | }; | ||
264 | |||
265 | static int __init nfs4filelayout_init(void) | ||
266 | { | ||
267 | printk(KERN_INFO "%s: NFSv4 File Layout Driver Registering...\n", | ||
268 | __func__); | ||
269 | return pnfs_register_layoutdriver(&filelayout_type); | ||
270 | } | ||
271 | |||
272 | static void __exit nfs4filelayout_exit(void) | ||
273 | { | ||
274 | printk(KERN_INFO "%s: NFSv4 File Layout Driver Unregistering...\n", | ||
275 | __func__); | ||
276 | pnfs_unregister_layoutdriver(&filelayout_type); | ||
277 | } | ||
278 | |||
279 | module_init(nfs4filelayout_init); | ||
280 | module_exit(nfs4filelayout_exit); | ||
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h new file mode 100644 index 000000000000..bbf60dd2ab9d --- /dev/null +++ b/fs/nfs/nfs4filelayout.h | |||
@@ -0,0 +1,94 @@ | |||
1 | /* | ||
2 | * NFSv4 file layout driver data structures. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #ifndef FS_NFS_NFS4FILELAYOUT_H | ||
31 | #define FS_NFS_NFS4FILELAYOUT_H | ||
32 | |||
33 | #include "pnfs.h" | ||
34 | |||
35 | /* | ||
36 | * Field testing shows we need to support upto 4096 stripe indices. | ||
37 | * We store each index as a u8 (u32 on the wire) to keep the memory footprint | ||
38 | * reasonable. This in turn means we support a maximum of 256 | ||
39 | * RFC 5661 multipath_list4 structures. | ||
40 | */ | ||
41 | #define NFS4_PNFS_MAX_STRIPE_CNT 4096 | ||
42 | #define NFS4_PNFS_MAX_MULTI_CNT 256 /* 256 fit into a u8 stripe_index */ | ||
43 | |||
44 | enum stripetype4 { | ||
45 | STRIPE_SPARSE = 1, | ||
46 | STRIPE_DENSE = 2 | ||
47 | }; | ||
48 | |||
49 | /* Individual ip address */ | ||
50 | struct nfs4_pnfs_ds { | ||
51 | struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ | ||
52 | u32 ds_ip_addr; | ||
53 | u32 ds_port; | ||
54 | struct nfs_client *ds_clp; | ||
55 | atomic_t ds_count; | ||
56 | }; | ||
57 | |||
58 | struct nfs4_file_layout_dsaddr { | ||
59 | struct pnfs_deviceid_node deviceid; | ||
60 | u32 stripe_count; | ||
61 | u8 *stripe_indices; | ||
62 | u32 ds_num; | ||
63 | struct nfs4_pnfs_ds *ds_list[1]; | ||
64 | }; | ||
65 | |||
66 | struct nfs4_filelayout_segment { | ||
67 | struct pnfs_layout_segment generic_hdr; | ||
68 | u32 stripe_type; | ||
69 | u32 commit_through_mds; | ||
70 | u32 stripe_unit; | ||
71 | u32 first_stripe_index; | ||
72 | u64 pattern_offset; | ||
73 | struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ | ||
74 | unsigned int num_fh; | ||
75 | struct nfs_fh **fh_array; | ||
76 | }; | ||
77 | |||
78 | static inline struct nfs4_filelayout_segment * | ||
79 | FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) | ||
80 | { | ||
81 | return container_of(lseg, | ||
82 | struct nfs4_filelayout_segment, | ||
83 | generic_hdr); | ||
84 | } | ||
85 | |||
86 | extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); | ||
87 | extern void print_ds(struct nfs4_pnfs_ds *ds); | ||
88 | extern void print_deviceid(struct nfs4_deviceid *dev_id); | ||
89 | extern struct nfs4_file_layout_dsaddr * | ||
90 | nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); | ||
91 | struct nfs4_file_layout_dsaddr * | ||
92 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); | ||
93 | |||
94 | #endif /* FS_NFS_NFS4FILELAYOUT_H */ | ||
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c new file mode 100644 index 000000000000..51fe64ace55a --- /dev/null +++ b/fs/nfs/nfs4filelayoutdev.c | |||
@@ -0,0 +1,448 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * Garth Goodson <Garth.Goodson@netapp.com> | ||
10 | * | ||
11 | * Permission is granted to use, copy, create derivative works, and | ||
12 | * redistribute this software and such derivative works for any purpose, | ||
13 | * so long as the name of the University of Michigan is not used in | ||
14 | * any advertising or publicity pertaining to the use or distribution | ||
15 | * of this software without specific, written prior authorization. If | ||
16 | * the above copyright notice or any other identification of the | ||
17 | * University of Michigan is included in any copy of any portion of | ||
18 | * this software, then the disclaimer below must also be included. | ||
19 | * | ||
20 | * This software is provided as is, without representation or warranty | ||
21 | * of any kind either express or implied, including without limitation | ||
22 | * the implied warranties of merchantability, fitness for a particular | ||
23 | * purpose, or noninfringement. The Regents of the University of | ||
24 | * Michigan shall not be liable for any damages, including special, | ||
25 | * indirect, incidental, or consequential damages, with respect to any | ||
26 | * claim arising out of or in connection with the use of the software, | ||
27 | * even if it has been or is hereafter advised of the possibility of | ||
28 | * such damages. | ||
29 | */ | ||
30 | |||
31 | #include <linux/nfs_fs.h> | ||
32 | #include <linux/vmalloc.h> | ||
33 | |||
34 | #include "internal.h" | ||
35 | #include "nfs4filelayout.h" | ||
36 | |||
37 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
38 | |||
39 | /* | ||
40 | * Data server cache | ||
41 | * | ||
42 | * Data servers can be mapped to different device ids. | ||
43 | * nfs4_pnfs_ds reference counting | ||
44 | * - set to 1 on allocation | ||
45 | * - incremented when a device id maps a data server already in the cache. | ||
46 | * - decremented when deviceid is removed from the cache. | ||
47 | */ | ||
48 | DEFINE_SPINLOCK(nfs4_ds_cache_lock); | ||
49 | static LIST_HEAD(nfs4_data_server_cache); | ||
50 | |||
51 | /* Debug routines */ | ||
52 | void | ||
53 | print_ds(struct nfs4_pnfs_ds *ds) | ||
54 | { | ||
55 | if (ds == NULL) { | ||
56 | printk("%s NULL device\n", __func__); | ||
57 | return; | ||
58 | } | ||
59 | printk(" ip_addr %x port %hu\n" | ||
60 | " ref count %d\n" | ||
61 | " client %p\n" | ||
62 | " cl_exchange_flags %x\n", | ||
63 | ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), | ||
64 | atomic_read(&ds->ds_count), ds->ds_clp, | ||
65 | ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); | ||
66 | } | ||
67 | |||
68 | void | ||
69 | print_ds_list(struct nfs4_file_layout_dsaddr *dsaddr) | ||
70 | { | ||
71 | int i; | ||
72 | |||
73 | ifdebug(FACILITY) { | ||
74 | printk("%s dsaddr->ds_num %d\n", __func__, | ||
75 | dsaddr->ds_num); | ||
76 | for (i = 0; i < dsaddr->ds_num; i++) | ||
77 | print_ds(dsaddr->ds_list[i]); | ||
78 | } | ||
79 | } | ||
80 | |||
81 | void print_deviceid(struct nfs4_deviceid *id) | ||
82 | { | ||
83 | u32 *p = (u32 *)id; | ||
84 | |||
85 | dprintk("%s: device id= [%x%x%x%x]\n", __func__, | ||
86 | p[0], p[1], p[2], p[3]); | ||
87 | } | ||
88 | |||
89 | /* nfs4_ds_cache_lock is held */ | ||
90 | static struct nfs4_pnfs_ds * | ||
91 | _data_server_lookup_locked(u32 ip_addr, u32 port) | ||
92 | { | ||
93 | struct nfs4_pnfs_ds *ds; | ||
94 | |||
95 | dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", | ||
96 | ntohl(ip_addr), ntohs(port)); | ||
97 | |||
98 | list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { | ||
99 | if (ds->ds_ip_addr == ip_addr && | ||
100 | ds->ds_port == port) { | ||
101 | return ds; | ||
102 | } | ||
103 | } | ||
104 | return NULL; | ||
105 | } | ||
106 | |||
107 | static void | ||
108 | destroy_ds(struct nfs4_pnfs_ds *ds) | ||
109 | { | ||
110 | dprintk("--> %s\n", __func__); | ||
111 | ifdebug(FACILITY) | ||
112 | print_ds(ds); | ||
113 | |||
114 | if (ds->ds_clp) | ||
115 | nfs_put_client(ds->ds_clp); | ||
116 | kfree(ds); | ||
117 | } | ||
118 | |||
119 | static void | ||
120 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) | ||
121 | { | ||
122 | struct nfs4_pnfs_ds *ds; | ||
123 | int i; | ||
124 | |||
125 | print_deviceid(&dsaddr->deviceid.de_id); | ||
126 | |||
127 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
128 | ds = dsaddr->ds_list[i]; | ||
129 | if (ds != NULL) { | ||
130 | if (atomic_dec_and_lock(&ds->ds_count, | ||
131 | &nfs4_ds_cache_lock)) { | ||
132 | list_del_init(&ds->ds_node); | ||
133 | spin_unlock(&nfs4_ds_cache_lock); | ||
134 | destroy_ds(ds); | ||
135 | } | ||
136 | } | ||
137 | } | ||
138 | kfree(dsaddr->stripe_indices); | ||
139 | kfree(dsaddr); | ||
140 | } | ||
141 | |||
142 | void | ||
143 | nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device) | ||
144 | { | ||
145 | struct nfs4_file_layout_dsaddr *dsaddr = | ||
146 | container_of(device, struct nfs4_file_layout_dsaddr, deviceid); | ||
147 | |||
148 | nfs4_fl_free_deviceid(dsaddr); | ||
149 | } | ||
150 | |||
151 | static struct nfs4_pnfs_ds * | ||
152 | nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) | ||
153 | { | ||
154 | struct nfs4_pnfs_ds *tmp_ds, *ds; | ||
155 | |||
156 | ds = kzalloc(sizeof(*tmp_ds), GFP_KERNEL); | ||
157 | if (!ds) | ||
158 | goto out; | ||
159 | |||
160 | spin_lock(&nfs4_ds_cache_lock); | ||
161 | tmp_ds = _data_server_lookup_locked(ip_addr, port); | ||
162 | if (tmp_ds == NULL) { | ||
163 | ds->ds_ip_addr = ip_addr; | ||
164 | ds->ds_port = port; | ||
165 | atomic_set(&ds->ds_count, 1); | ||
166 | INIT_LIST_HEAD(&ds->ds_node); | ||
167 | ds->ds_clp = NULL; | ||
168 | list_add(&ds->ds_node, &nfs4_data_server_cache); | ||
169 | dprintk("%s add new data server ip 0x%x\n", __func__, | ||
170 | ds->ds_ip_addr); | ||
171 | } else { | ||
172 | kfree(ds); | ||
173 | atomic_inc(&tmp_ds->ds_count); | ||
174 | dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", | ||
175 | __func__, tmp_ds->ds_ip_addr, | ||
176 | atomic_read(&tmp_ds->ds_count)); | ||
177 | ds = tmp_ds; | ||
178 | } | ||
179 | spin_unlock(&nfs4_ds_cache_lock); | ||
180 | out: | ||
181 | return ds; | ||
182 | } | ||
183 | |||
184 | /* | ||
185 | * Currently only support ipv4, and one multi-path address. | ||
186 | */ | ||
187 | static struct nfs4_pnfs_ds * | ||
188 | decode_and_add_ds(__be32 **pp, struct inode *inode) | ||
189 | { | ||
190 | struct nfs4_pnfs_ds *ds = NULL; | ||
191 | char *buf; | ||
192 | const char *ipend, *pstr; | ||
193 | u32 ip_addr, port; | ||
194 | int nlen, rlen, i; | ||
195 | int tmp[2]; | ||
196 | __be32 *r_netid, *r_addr, *p = *pp; | ||
197 | |||
198 | /* r_netid */ | ||
199 | nlen = be32_to_cpup(p++); | ||
200 | r_netid = p; | ||
201 | p += XDR_QUADLEN(nlen); | ||
202 | |||
203 | /* r_addr */ | ||
204 | rlen = be32_to_cpup(p++); | ||
205 | r_addr = p; | ||
206 | p += XDR_QUADLEN(rlen); | ||
207 | *pp = p; | ||
208 | |||
209 | /* Check that netid is "tcp" */ | ||
210 | if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { | ||
211 | dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); | ||
212 | goto out_err; | ||
213 | } | ||
214 | |||
215 | /* ipv6 length plus port is legal */ | ||
216 | if (rlen > INET6_ADDRSTRLEN + 8) { | ||
217 | dprintk("%s Invalid address, length %d\n", __func__, | ||
218 | rlen); | ||
219 | goto out_err; | ||
220 | } | ||
221 | buf = kmalloc(rlen + 1, GFP_KERNEL); | ||
222 | buf[rlen] = '\0'; | ||
223 | memcpy(buf, r_addr, rlen); | ||
224 | |||
225 | /* replace the port dots with dashes for the in4_pton() delimiter*/ | ||
226 | for (i = 0; i < 2; i++) { | ||
227 | char *res = strrchr(buf, '.'); | ||
228 | *res = '-'; | ||
229 | } | ||
230 | |||
231 | /* Currently only support ipv4 address */ | ||
232 | if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { | ||
233 | dprintk("%s: Only ipv4 addresses supported\n", __func__); | ||
234 | goto out_free; | ||
235 | } | ||
236 | |||
237 | /* port */ | ||
238 | pstr = ipend; | ||
239 | sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); | ||
240 | port = htons((tmp[0] << 8) | (tmp[1])); | ||
241 | |||
242 | ds = nfs4_pnfs_ds_add(inode, ip_addr, port); | ||
243 | dprintk("%s Decoded address and port %s\n", __func__, buf); | ||
244 | out_free: | ||
245 | kfree(buf); | ||
246 | out_err: | ||
247 | return ds; | ||
248 | } | ||
249 | |||
250 | /* Decode opaque device data and return the result */ | ||
251 | static struct nfs4_file_layout_dsaddr* | ||
252 | decode_device(struct inode *ino, struct pnfs_device *pdev) | ||
253 | { | ||
254 | int i, dummy; | ||
255 | u32 cnt, num; | ||
256 | u8 *indexp; | ||
257 | __be32 *p = (__be32 *)pdev->area, *indicesp; | ||
258 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
259 | |||
260 | /* Get the stripe count (number of stripe index) */ | ||
261 | cnt = be32_to_cpup(p++); | ||
262 | dprintk("%s stripe count %d\n", __func__, cnt); | ||
263 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { | ||
264 | printk(KERN_WARNING "%s: stripe count %d greater than " | ||
265 | "supported maximum %d\n", __func__, | ||
266 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); | ||
267 | goto out_err; | ||
268 | } | ||
269 | |||
270 | /* Check the multipath list count */ | ||
271 | indicesp = p; | ||
272 | p += XDR_QUADLEN(cnt << 2); | ||
273 | num = be32_to_cpup(p++); | ||
274 | dprintk("%s ds_num %u\n", __func__, num); | ||
275 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { | ||
276 | printk(KERN_WARNING "%s: multipath count %d greater than " | ||
277 | "supported maximum %d\n", __func__, | ||
278 | num, NFS4_PNFS_MAX_MULTI_CNT); | ||
279 | goto out_err; | ||
280 | } | ||
281 | dsaddr = kzalloc(sizeof(*dsaddr) + | ||
282 | (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), | ||
283 | GFP_KERNEL); | ||
284 | if (!dsaddr) | ||
285 | goto out_err; | ||
286 | |||
287 | dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL); | ||
288 | if (!dsaddr->stripe_indices) | ||
289 | goto out_err_free; | ||
290 | |||
291 | dsaddr->stripe_count = cnt; | ||
292 | dsaddr->ds_num = num; | ||
293 | |||
294 | memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); | ||
295 | |||
296 | /* Go back an read stripe indices */ | ||
297 | p = indicesp; | ||
298 | indexp = &dsaddr->stripe_indices[0]; | ||
299 | for (i = 0; i < dsaddr->stripe_count; i++) { | ||
300 | *indexp = be32_to_cpup(p++); | ||
301 | if (*indexp >= num) | ||
302 | goto out_err_free; | ||
303 | indexp++; | ||
304 | } | ||
305 | /* Skip already read multipath list count */ | ||
306 | p++; | ||
307 | |||
308 | for (i = 0; i < dsaddr->ds_num; i++) { | ||
309 | int j; | ||
310 | |||
311 | dummy = be32_to_cpup(p++); /* multipath count */ | ||
312 | if (dummy > 1) { | ||
313 | printk(KERN_WARNING | ||
314 | "%s: Multipath count %d not supported, " | ||
315 | "skipping all greater than 1\n", __func__, | ||
316 | dummy); | ||
317 | } | ||
318 | for (j = 0; j < dummy; j++) { | ||
319 | if (j == 0) { | ||
320 | dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); | ||
321 | if (dsaddr->ds_list[i] == NULL) | ||
322 | goto out_err_free; | ||
323 | } else { | ||
324 | u32 len; | ||
325 | /* skip extra multipath */ | ||
326 | len = be32_to_cpup(p++); | ||
327 | p += XDR_QUADLEN(len); | ||
328 | len = be32_to_cpup(p++); | ||
329 | p += XDR_QUADLEN(len); | ||
330 | continue; | ||
331 | } | ||
332 | } | ||
333 | } | ||
334 | return dsaddr; | ||
335 | |||
336 | out_err_free: | ||
337 | nfs4_fl_free_deviceid(dsaddr); | ||
338 | out_err: | ||
339 | dprintk("%s ERROR: returning NULL\n", __func__); | ||
340 | return NULL; | ||
341 | } | ||
342 | |||
343 | /* | ||
344 | * Decode the opaque device specified in 'dev' | ||
345 | * and add it to the list of available devices. | ||
346 | * If the deviceid is already cached, nfs4_add_deviceid will return | ||
347 | * a pointer to the cached struct and throw away the new. | ||
348 | */ | ||
349 | static struct nfs4_file_layout_dsaddr* | ||
350 | decode_and_add_device(struct inode *inode, struct pnfs_device *dev) | ||
351 | { | ||
352 | struct nfs4_file_layout_dsaddr *dsaddr; | ||
353 | struct pnfs_deviceid_node *d; | ||
354 | |||
355 | dsaddr = decode_device(inode, dev); | ||
356 | if (!dsaddr) { | ||
357 | printk(KERN_WARNING "%s: Could not decode or add device\n", | ||
358 | __func__); | ||
359 | return NULL; | ||
360 | } | ||
361 | |||
362 | d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, | ||
363 | &dsaddr->deviceid); | ||
364 | |||
365 | return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
366 | } | ||
367 | |||
368 | /* | ||
369 | * Retrieve the information for dev_id, add it to the list | ||
370 | * of available devices, and return it. | ||
371 | */ | ||
372 | struct nfs4_file_layout_dsaddr * | ||
373 | get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id) | ||
374 | { | ||
375 | struct pnfs_device *pdev = NULL; | ||
376 | u32 max_resp_sz; | ||
377 | int max_pages; | ||
378 | struct page **pages = NULL; | ||
379 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; | ||
380 | int rc, i; | ||
381 | struct nfs_server *server = NFS_SERVER(inode); | ||
382 | |||
383 | /* | ||
384 | * Use the session max response size as the basis for setting | ||
385 | * GETDEVICEINFO's maxcount | ||
386 | */ | ||
387 | max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; | ||
388 | max_pages = max_resp_sz >> PAGE_SHIFT; | ||
389 | dprintk("%s inode %p max_resp_sz %u max_pages %d\n", | ||
390 | __func__, inode, max_resp_sz, max_pages); | ||
391 | |||
392 | pdev = kzalloc(sizeof(struct pnfs_device), GFP_KERNEL); | ||
393 | if (pdev == NULL) | ||
394 | return NULL; | ||
395 | |||
396 | pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL); | ||
397 | if (pages == NULL) { | ||
398 | kfree(pdev); | ||
399 | return NULL; | ||
400 | } | ||
401 | for (i = 0; i < max_pages; i++) { | ||
402 | pages[i] = alloc_page(GFP_KERNEL); | ||
403 | if (!pages[i]) | ||
404 | goto out_free; | ||
405 | } | ||
406 | |||
407 | /* set pdev->area */ | ||
408 | pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL); | ||
409 | if (!pdev->area) | ||
410 | goto out_free; | ||
411 | |||
412 | memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); | ||
413 | pdev->layout_type = LAYOUT_NFSV4_1_FILES; | ||
414 | pdev->pages = pages; | ||
415 | pdev->pgbase = 0; | ||
416 | pdev->pglen = PAGE_SIZE * max_pages; | ||
417 | pdev->mincount = 0; | ||
418 | |||
419 | rc = nfs4_proc_getdeviceinfo(server, pdev); | ||
420 | dprintk("%s getdevice info returns %d\n", __func__, rc); | ||
421 | if (rc) | ||
422 | goto out_free; | ||
423 | |||
424 | /* | ||
425 | * Found new device, need to decode it and then add it to the | ||
426 | * list of known devices for this mountpoint. | ||
427 | */ | ||
428 | dsaddr = decode_and_add_device(inode, pdev); | ||
429 | out_free: | ||
430 | if (pdev->area != NULL) | ||
431 | vunmap(pdev->area); | ||
432 | for (i = 0; i < max_pages; i++) | ||
433 | __free_page(pages[i]); | ||
434 | kfree(pages); | ||
435 | kfree(pdev); | ||
436 | dprintk("<-- %s dsaddr %p\n", __func__, dsaddr); | ||
437 | return dsaddr; | ||
438 | } | ||
439 | |||
440 | struct nfs4_file_layout_dsaddr * | ||
441 | nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) | ||
442 | { | ||
443 | struct pnfs_deviceid_node *d; | ||
444 | |||
445 | d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); | ||
446 | return (d == NULL) ? NULL : | ||
447 | container_of(d, struct nfs4_file_layout_dsaddr, deviceid); | ||
448 | } | ||
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 089da5b5d20a..32c8758c99fd 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -55,6 +55,7 @@ | |||
55 | #include "internal.h" | 55 | #include "internal.h" |
56 | #include "iostat.h" | 56 | #include "iostat.h" |
57 | #include "callback.h" | 57 | #include "callback.h" |
58 | #include "pnfs.h" | ||
58 | 59 | ||
59 | #define NFSDBG_FACILITY NFSDBG_PROC | 60 | #define NFSDBG_FACILITY NFSDBG_PROC |
60 | 61 | ||
@@ -129,7 +130,8 @@ const u32 nfs4_fsinfo_bitmap[2] = { FATTR4_WORD0_MAXFILESIZE | |||
129 | | FATTR4_WORD0_MAXREAD | 130 | | FATTR4_WORD0_MAXREAD |
130 | | FATTR4_WORD0_MAXWRITE | 131 | | FATTR4_WORD0_MAXWRITE |
131 | | FATTR4_WORD0_LEASE_TIME, | 132 | | FATTR4_WORD0_LEASE_TIME, |
132 | 0 | 133 | FATTR4_WORD1_TIME_DELTA |
134 | | FATTR4_WORD1_FS_LAYOUT_TYPES | ||
133 | }; | 135 | }; |
134 | 136 | ||
135 | const u32 nfs4_fs_locations_bitmap[2] = { | 137 | const u32 nfs4_fs_locations_bitmap[2] = { |
@@ -255,9 +257,6 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, | |||
255 | nfs4_state_mark_reclaim_nograce(clp, state); | 257 | nfs4_state_mark_reclaim_nograce(clp, state); |
256 | goto do_state_recovery; | 258 | goto do_state_recovery; |
257 | case -NFS4ERR_STALE_STATEID: | 259 | case -NFS4ERR_STALE_STATEID: |
258 | if (state == NULL) | ||
259 | break; | ||
260 | nfs4_state_mark_reclaim_reboot(clp, state); | ||
261 | case -NFS4ERR_STALE_CLIENTID: | 260 | case -NFS4ERR_STALE_CLIENTID: |
262 | case -NFS4ERR_EXPIRED: | 261 | case -NFS4ERR_EXPIRED: |
263 | goto do_state_recovery; | 262 | goto do_state_recovery; |
@@ -334,10 +333,12 @@ static void renew_lease(const struct nfs_server *server, unsigned long timestamp | |||
334 | * Must be called while holding tbl->slot_tbl_lock | 333 | * Must be called while holding tbl->slot_tbl_lock |
335 | */ | 334 | */ |
336 | static void | 335 | static void |
337 | nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid) | 336 | nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *free_slot) |
338 | { | 337 | { |
338 | int free_slotid = free_slot - tbl->slots; | ||
339 | int slotid = free_slotid; | 339 | int slotid = free_slotid; |
340 | 340 | ||
341 | BUG_ON(slotid < 0 || slotid >= NFS4_MAX_SLOT_TABLE); | ||
341 | /* clear used bit in bitmap */ | 342 | /* clear used bit in bitmap */ |
342 | __clear_bit(slotid, tbl->used_slots); | 343 | __clear_bit(slotid, tbl->used_slots); |
343 | 344 | ||
@@ -379,7 +380,7 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) | |||
379 | struct nfs4_slot_table *tbl; | 380 | struct nfs4_slot_table *tbl; |
380 | 381 | ||
381 | tbl = &res->sr_session->fc_slot_table; | 382 | tbl = &res->sr_session->fc_slot_table; |
382 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) { | 383 | if (!res->sr_slot) { |
383 | /* just wake up the next guy waiting since | 384 | /* just wake up the next guy waiting since |
384 | * we may have not consumed a slot after all */ | 385 | * we may have not consumed a slot after all */ |
385 | dprintk("%s: No slot\n", __func__); | 386 | dprintk("%s: No slot\n", __func__); |
@@ -387,17 +388,15 @@ static void nfs41_sequence_free_slot(struct nfs4_sequence_res *res) | |||
387 | } | 388 | } |
388 | 389 | ||
389 | spin_lock(&tbl->slot_tbl_lock); | 390 | spin_lock(&tbl->slot_tbl_lock); |
390 | nfs4_free_slot(tbl, res->sr_slotid); | 391 | nfs4_free_slot(tbl, res->sr_slot); |
391 | nfs41_check_drain_session_complete(res->sr_session); | 392 | nfs41_check_drain_session_complete(res->sr_session); |
392 | spin_unlock(&tbl->slot_tbl_lock); | 393 | spin_unlock(&tbl->slot_tbl_lock); |
393 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | 394 | res->sr_slot = NULL; |
394 | } | 395 | } |
395 | 396 | ||
396 | static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) | 397 | static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *res) |
397 | { | 398 | { |
398 | unsigned long timestamp; | 399 | unsigned long timestamp; |
399 | struct nfs4_slot_table *tbl; | ||
400 | struct nfs4_slot *slot; | ||
401 | struct nfs_client *clp; | 400 | struct nfs_client *clp; |
402 | 401 | ||
403 | /* | 402 | /* |
@@ -410,17 +409,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * | |||
410 | res->sr_status = NFS_OK; | 409 | res->sr_status = NFS_OK; |
411 | 410 | ||
412 | /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ | 411 | /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ |
413 | if (res->sr_slotid == NFS4_MAX_SLOT_TABLE) | 412 | if (!res->sr_slot) |
414 | goto out; | 413 | goto out; |
415 | 414 | ||
416 | tbl = &res->sr_session->fc_slot_table; | ||
417 | slot = tbl->slots + res->sr_slotid; | ||
418 | |||
419 | /* Check the SEQUENCE operation status */ | 415 | /* Check the SEQUENCE operation status */ |
420 | switch (res->sr_status) { | 416 | switch (res->sr_status) { |
421 | case 0: | 417 | case 0: |
422 | /* Update the slot's sequence and clientid lease timer */ | 418 | /* Update the slot's sequence and clientid lease timer */ |
423 | ++slot->seq_nr; | 419 | ++res->sr_slot->seq_nr; |
424 | timestamp = res->sr_renewal_time; | 420 | timestamp = res->sr_renewal_time; |
425 | clp = res->sr_session->clp; | 421 | clp = res->sr_session->clp; |
426 | do_renew_lease(clp, timestamp); | 422 | do_renew_lease(clp, timestamp); |
@@ -433,12 +429,14 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res * | |||
433 | * returned NFS4ERR_DELAY as per Section 2.10.6.2 | 429 | * returned NFS4ERR_DELAY as per Section 2.10.6.2 |
434 | * of RFC5661. | 430 | * of RFC5661. |
435 | */ | 431 | */ |
436 | dprintk("%s: slot=%d seq=%d: Operation in progress\n", | 432 | dprintk("%s: slot=%ld seq=%d: Operation in progress\n", |
437 | __func__, res->sr_slotid, slot->seq_nr); | 433 | __func__, |
434 | res->sr_slot - res->sr_session->fc_slot_table.slots, | ||
435 | res->sr_slot->seq_nr); | ||
438 | goto out_retry; | 436 | goto out_retry; |
439 | default: | 437 | default: |
440 | /* Just update the slot sequence no. */ | 438 | /* Just update the slot sequence no. */ |
441 | ++slot->seq_nr; | 439 | ++res->sr_slot->seq_nr; |
442 | } | 440 | } |
443 | out: | 441 | out: |
444 | /* The session may be reset by one of the error handlers. */ | 442 | /* The session may be reset by one of the error handlers. */ |
@@ -505,10 +503,9 @@ static int nfs41_setup_sequence(struct nfs4_session *session, | |||
505 | 503 | ||
506 | dprintk("--> %s\n", __func__); | 504 | dprintk("--> %s\n", __func__); |
507 | /* slot already allocated? */ | 505 | /* slot already allocated? */ |
508 | if (res->sr_slotid != NFS4_MAX_SLOT_TABLE) | 506 | if (res->sr_slot != NULL) |
509 | return 0; | 507 | return 0; |
510 | 508 | ||
511 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
512 | tbl = &session->fc_slot_table; | 509 | tbl = &session->fc_slot_table; |
513 | 510 | ||
514 | spin_lock(&tbl->slot_tbl_lock); | 511 | spin_lock(&tbl->slot_tbl_lock); |
@@ -550,7 +547,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session, | |||
550 | dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); | 547 | dprintk("<-- %s slotid=%d seqid=%d\n", __func__, slotid, slot->seq_nr); |
551 | 548 | ||
552 | res->sr_session = session; | 549 | res->sr_session = session; |
553 | res->sr_slotid = slotid; | 550 | res->sr_slot = slot; |
554 | res->sr_renewal_time = jiffies; | 551 | res->sr_renewal_time = jiffies; |
555 | res->sr_status_flags = 0; | 552 | res->sr_status_flags = 0; |
556 | /* | 553 | /* |
@@ -576,8 +573,9 @@ int nfs4_setup_sequence(const struct nfs_server *server, | |||
576 | goto out; | 573 | goto out; |
577 | } | 574 | } |
578 | 575 | ||
579 | dprintk("--> %s clp %p session %p sr_slotid %d\n", | 576 | dprintk("--> %s clp %p session %p sr_slot %ld\n", |
580 | __func__, session->clp, session, res->sr_slotid); | 577 | __func__, session->clp, session, res->sr_slot ? |
578 | res->sr_slot - session->fc_slot_table.slots : -1); | ||
581 | 579 | ||
582 | ret = nfs41_setup_sequence(session, args, res, cache_reply, | 580 | ret = nfs41_setup_sequence(session, args, res, cache_reply, |
583 | task); | 581 | task); |
@@ -650,7 +648,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server, | |||
650 | .callback_data = &data | 648 | .callback_data = &data |
651 | }; | 649 | }; |
652 | 650 | ||
653 | res->sr_slotid = NFS4_MAX_SLOT_TABLE; | 651 | res->sr_slot = NULL; |
654 | if (privileged) | 652 | if (privileged) |
655 | task_setup.callback_ops = &nfs41_call_priv_sync_ops; | 653 | task_setup.callback_ops = &nfs41_call_priv_sync_ops; |
656 | task = rpc_run_task(&task_setup); | 654 | task = rpc_run_task(&task_setup); |
@@ -735,7 +733,6 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p) | |||
735 | p->o_res.server = p->o_arg.server; | 733 | p->o_res.server = p->o_arg.server; |
736 | nfs_fattr_init(&p->f_attr); | 734 | nfs_fattr_init(&p->f_attr); |
737 | nfs_fattr_init(&p->dir_attr); | 735 | nfs_fattr_init(&p->dir_attr); |
738 | p->o_res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
739 | } | 736 | } |
740 | 737 | ||
741 | static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, | 738 | static struct nfs4_opendata *nfs4_opendata_alloc(struct path *path, |
@@ -1120,6 +1117,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1120 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1117 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
1121 | smp_rmb(); | 1118 | smp_rmb(); |
1122 | if (state->n_rdwr != 0) { | 1119 | if (state->n_rdwr != 0) { |
1120 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
1123 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); | 1121 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); |
1124 | if (ret != 0) | 1122 | if (ret != 0) |
1125 | return ret; | 1123 | return ret; |
@@ -1127,6 +1125,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1127 | return -ESTALE; | 1125 | return -ESTALE; |
1128 | } | 1126 | } |
1129 | if (state->n_wronly != 0) { | 1127 | if (state->n_wronly != 0) { |
1128 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1130 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); | 1129 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); |
1131 | if (ret != 0) | 1130 | if (ret != 0) |
1132 | return ret; | 1131 | return ret; |
@@ -1134,6 +1133,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1134 | return -ESTALE; | 1133 | return -ESTALE; |
1135 | } | 1134 | } |
1136 | if (state->n_rdonly != 0) { | 1135 | if (state->n_rdonly != 0) { |
1136 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1137 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); | 1137 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); |
1138 | if (ret != 0) | 1138 | if (ret != 0) |
1139 | return ret; | 1139 | return ret; |
@@ -1188,7 +1188,7 @@ static int nfs4_do_open_reclaim(struct nfs_open_context *ctx, struct nfs4_state | |||
1188 | int err; | 1188 | int err; |
1189 | do { | 1189 | do { |
1190 | err = _nfs4_do_open_reclaim(ctx, state); | 1190 | err = _nfs4_do_open_reclaim(ctx, state); |
1191 | if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) | 1191 | if (err != -NFS4ERR_DELAY) |
1192 | break; | 1192 | break; |
1193 | nfs4_handle_exception(server, err, &exception); | 1193 | nfs4_handle_exception(server, err, &exception); |
1194 | } while (exception.retry); | 1194 | } while (exception.retry); |
@@ -1258,6 +1258,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state | |||
1258 | case -NFS4ERR_ADMIN_REVOKED: | 1258 | case -NFS4ERR_ADMIN_REVOKED: |
1259 | case -NFS4ERR_BAD_STATEID: | 1259 | case -NFS4ERR_BAD_STATEID: |
1260 | nfs4_state_mark_reclaim_nograce(server->nfs_client, state); | 1260 | nfs4_state_mark_reclaim_nograce(server->nfs_client, state); |
1261 | case -EKEYEXPIRED: | ||
1262 | /* | ||
1263 | * User RPCSEC_GSS context has expired. | ||
1264 | * We cannot recover this stateid now, so | ||
1265 | * skip it and allow recovery thread to | ||
1266 | * proceed. | ||
1267 | */ | ||
1261 | case -ENOMEM: | 1268 | case -ENOMEM: |
1262 | err = 0; | 1269 | err = 0; |
1263 | goto out; | 1270 | goto out; |
@@ -1605,7 +1612,6 @@ static int nfs4_do_open_expired(struct nfs_open_context *ctx, struct nfs4_state | |||
1605 | goto out; | 1612 | goto out; |
1606 | case -NFS4ERR_GRACE: | 1613 | case -NFS4ERR_GRACE: |
1607 | case -NFS4ERR_DELAY: | 1614 | case -NFS4ERR_DELAY: |
1608 | case -EKEYEXPIRED: | ||
1609 | nfs4_handle_exception(server, err, &exception); | 1615 | nfs4_handle_exception(server, err, &exception); |
1610 | err = 0; | 1616 | err = 0; |
1611 | } | 1617 | } |
@@ -1975,7 +1981,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, gfp_t gfp_mask, i | |||
1975 | calldata->res.fattr = &calldata->fattr; | 1981 | calldata->res.fattr = &calldata->fattr; |
1976 | calldata->res.seqid = calldata->arg.seqid; | 1982 | calldata->res.seqid = calldata->arg.seqid; |
1977 | calldata->res.server = server; | 1983 | calldata->res.server = server; |
1978 | calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
1979 | path_get(path); | 1984 | path_get(path); |
1980 | calldata->path = *path; | 1985 | calldata->path = *path; |
1981 | 1986 | ||
@@ -1998,120 +2003,17 @@ out: | |||
1998 | return status; | 2003 | return status; |
1999 | } | 2004 | } |
2000 | 2005 | ||
2001 | static int nfs4_intent_set_file(struct nameidata *nd, struct path *path, struct nfs4_state *state, fmode_t fmode) | 2006 | static struct inode * |
2007 | nfs4_atomic_open(struct inode *dir, struct nfs_open_context *ctx, int open_flags, struct iattr *attr) | ||
2002 | { | 2008 | { |
2003 | struct file *filp; | ||
2004 | int ret; | ||
2005 | |||
2006 | /* If the open_intent is for execute, we have an extra check to make */ | ||
2007 | if (fmode & FMODE_EXEC) { | ||
2008 | ret = nfs_may_open(state->inode, | ||
2009 | state->owner->so_cred, | ||
2010 | nd->intent.open.flags); | ||
2011 | if (ret < 0) | ||
2012 | goto out_close; | ||
2013 | } | ||
2014 | filp = lookup_instantiate_filp(nd, path->dentry, NULL); | ||
2015 | if (!IS_ERR(filp)) { | ||
2016 | struct nfs_open_context *ctx; | ||
2017 | ctx = nfs_file_open_context(filp); | ||
2018 | ctx->state = state; | ||
2019 | return 0; | ||
2020 | } | ||
2021 | ret = PTR_ERR(filp); | ||
2022 | out_close: | ||
2023 | nfs4_close_sync(path, state, fmode & (FMODE_READ|FMODE_WRITE)); | ||
2024 | return ret; | ||
2025 | } | ||
2026 | |||
2027 | struct dentry * | ||
2028 | nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | ||
2029 | { | ||
2030 | struct path path = { | ||
2031 | .mnt = nd->path.mnt, | ||
2032 | .dentry = dentry, | ||
2033 | }; | ||
2034 | struct dentry *parent; | ||
2035 | struct iattr attr; | ||
2036 | struct rpc_cred *cred; | ||
2037 | struct nfs4_state *state; | 2009 | struct nfs4_state *state; |
2038 | struct dentry *res; | ||
2039 | int open_flags = nd->intent.open.flags; | ||
2040 | fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC); | ||
2041 | |||
2042 | if (nd->flags & LOOKUP_CREATE) { | ||
2043 | attr.ia_mode = nd->intent.open.create_mode; | ||
2044 | attr.ia_valid = ATTR_MODE; | ||
2045 | if (!IS_POSIXACL(dir)) | ||
2046 | attr.ia_mode &= ~current_umask(); | ||
2047 | } else { | ||
2048 | open_flags &= ~O_EXCL; | ||
2049 | attr.ia_valid = 0; | ||
2050 | BUG_ON(open_flags & O_CREAT); | ||
2051 | } | ||
2052 | 2010 | ||
2053 | cred = rpc_lookup_cred(); | ||
2054 | if (IS_ERR(cred)) | ||
2055 | return (struct dentry *)cred; | ||
2056 | parent = dentry->d_parent; | ||
2057 | /* Protect against concurrent sillydeletes */ | 2011 | /* Protect against concurrent sillydeletes */ |
2058 | nfs_block_sillyrename(parent); | 2012 | state = nfs4_do_open(dir, &ctx->path, ctx->mode, open_flags, attr, ctx->cred); |
2059 | state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred); | 2013 | if (IS_ERR(state)) |
2060 | put_rpccred(cred); | 2014 | return ERR_CAST(state); |
2061 | if (IS_ERR(state)) { | 2015 | ctx->state = state; |
2062 | if (PTR_ERR(state) == -ENOENT) { | 2016 | return igrab(state->inode); |
2063 | d_add(dentry, NULL); | ||
2064 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
2065 | } | ||
2066 | nfs_unblock_sillyrename(parent); | ||
2067 | return (struct dentry *)state; | ||
2068 | } | ||
2069 | res = d_add_unique(dentry, igrab(state->inode)); | ||
2070 | if (res != NULL) | ||
2071 | path.dentry = res; | ||
2072 | nfs_set_verifier(path.dentry, nfs_save_change_attribute(dir)); | ||
2073 | nfs_unblock_sillyrename(parent); | ||
2074 | nfs4_intent_set_file(nd, &path, state, fmode); | ||
2075 | return res; | ||
2076 | } | ||
2077 | |||
2078 | int | ||
2079 | nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, struct nameidata *nd) | ||
2080 | { | ||
2081 | struct path path = { | ||
2082 | .mnt = nd->path.mnt, | ||
2083 | .dentry = dentry, | ||
2084 | }; | ||
2085 | struct rpc_cred *cred; | ||
2086 | struct nfs4_state *state; | ||
2087 | fmode_t fmode = openflags & (FMODE_READ | FMODE_WRITE); | ||
2088 | |||
2089 | cred = rpc_lookup_cred(); | ||
2090 | if (IS_ERR(cred)) | ||
2091 | return PTR_ERR(cred); | ||
2092 | state = nfs4_do_open(dir, &path, fmode, openflags, NULL, cred); | ||
2093 | put_rpccred(cred); | ||
2094 | if (IS_ERR(state)) { | ||
2095 | switch (PTR_ERR(state)) { | ||
2096 | case -EPERM: | ||
2097 | case -EACCES: | ||
2098 | case -EDQUOT: | ||
2099 | case -ENOSPC: | ||
2100 | case -EROFS: | ||
2101 | return PTR_ERR(state); | ||
2102 | default: | ||
2103 | goto out_drop; | ||
2104 | } | ||
2105 | } | ||
2106 | if (state->inode == dentry->d_inode) { | ||
2107 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | ||
2108 | nfs4_intent_set_file(nd, &path, state, fmode); | ||
2109 | return 1; | ||
2110 | } | ||
2111 | nfs4_close_sync(&path, state, fmode); | ||
2112 | out_drop: | ||
2113 | d_drop(dentry); | ||
2114 | return 0; | ||
2115 | } | 2017 | } |
2116 | 2018 | ||
2117 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) | 2019 | static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) |
@@ -2568,36 +2470,34 @@ static int nfs4_proc_readlink(struct inode *inode, struct page *page, | |||
2568 | 2470 | ||
2569 | static int | 2471 | static int |
2570 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 2472 | nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
2571 | int flags, struct nameidata *nd) | 2473 | int flags, struct nfs_open_context *ctx) |
2572 | { | 2474 | { |
2573 | struct path path = { | 2475 | struct path my_path = { |
2574 | .mnt = nd->path.mnt, | ||
2575 | .dentry = dentry, | 2476 | .dentry = dentry, |
2576 | }; | 2477 | }; |
2478 | struct path *path = &my_path; | ||
2577 | struct nfs4_state *state; | 2479 | struct nfs4_state *state; |
2578 | struct rpc_cred *cred; | 2480 | struct rpc_cred *cred = NULL; |
2579 | fmode_t fmode = flags & (FMODE_READ | FMODE_WRITE); | 2481 | fmode_t fmode = 0; |
2580 | int status = 0; | 2482 | int status = 0; |
2581 | 2483 | ||
2582 | cred = rpc_lookup_cred(); | 2484 | if (ctx != NULL) { |
2583 | if (IS_ERR(cred)) { | 2485 | cred = ctx->cred; |
2584 | status = PTR_ERR(cred); | 2486 | path = &ctx->path; |
2585 | goto out; | 2487 | fmode = ctx->mode; |
2586 | } | 2488 | } |
2587 | state = nfs4_do_open(dir, &path, fmode, flags, sattr, cred); | 2489 | state = nfs4_do_open(dir, path, fmode, flags, sattr, cred); |
2588 | d_drop(dentry); | 2490 | d_drop(dentry); |
2589 | if (IS_ERR(state)) { | 2491 | if (IS_ERR(state)) { |
2590 | status = PTR_ERR(state); | 2492 | status = PTR_ERR(state); |
2591 | goto out_putcred; | 2493 | goto out; |
2592 | } | 2494 | } |
2593 | d_add(dentry, igrab(state->inode)); | 2495 | d_add(dentry, igrab(state->inode)); |
2594 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); | 2496 | nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); |
2595 | if (status == 0 && (nd->flags & LOOKUP_OPEN) != 0) | 2497 | if (ctx != NULL) |
2596 | status = nfs4_intent_set_file(nd, &path, state, fmode); | 2498 | ctx->state = state; |
2597 | else | 2499 | else |
2598 | nfs4_close_sync(&path, state, fmode); | 2500 | nfs4_close_sync(path, state, fmode); |
2599 | out_putcred: | ||
2600 | put_rpccred(cred); | ||
2601 | out: | 2501 | out: |
2602 | return status; | 2502 | return status; |
2603 | } | 2503 | } |
@@ -2655,6 +2555,7 @@ static void nfs4_proc_unlink_setup(struct rpc_message *msg, struct inode *dir) | |||
2655 | 2555 | ||
2656 | args->bitmask = server->cache_consistency_bitmask; | 2556 | args->bitmask = server->cache_consistency_bitmask; |
2657 | res->server = server; | 2557 | res->server = server; |
2558 | res->seq_res.sr_slot = NULL; | ||
2658 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; | 2559 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE]; |
2659 | } | 2560 | } |
2660 | 2561 | ||
@@ -2671,18 +2572,46 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
2671 | return 1; | 2572 | return 1; |
2672 | } | 2573 | } |
2673 | 2574 | ||
2575 | static void nfs4_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | ||
2576 | { | ||
2577 | struct nfs_server *server = NFS_SERVER(dir); | ||
2578 | struct nfs_renameargs *arg = msg->rpc_argp; | ||
2579 | struct nfs_renameres *res = msg->rpc_resp; | ||
2580 | |||
2581 | msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME]; | ||
2582 | arg->bitmask = server->attr_bitmask; | ||
2583 | res->server = server; | ||
2584 | } | ||
2585 | |||
2586 | static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | ||
2587 | struct inode *new_dir) | ||
2588 | { | ||
2589 | struct nfs_renameres *res = task->tk_msg.rpc_resp; | ||
2590 | |||
2591 | if (!nfs4_sequence_done(task, &res->seq_res)) | ||
2592 | return 0; | ||
2593 | if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN) | ||
2594 | return 0; | ||
2595 | |||
2596 | update_changeattr(old_dir, &res->old_cinfo); | ||
2597 | nfs_post_op_update_inode(old_dir, res->old_fattr); | ||
2598 | update_changeattr(new_dir, &res->new_cinfo); | ||
2599 | nfs_post_op_update_inode(new_dir, res->new_fattr); | ||
2600 | return 1; | ||
2601 | } | ||
2602 | |||
2674 | static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | 2603 | static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, |
2675 | struct inode *new_dir, struct qstr *new_name) | 2604 | struct inode *new_dir, struct qstr *new_name) |
2676 | { | 2605 | { |
2677 | struct nfs_server *server = NFS_SERVER(old_dir); | 2606 | struct nfs_server *server = NFS_SERVER(old_dir); |
2678 | struct nfs4_rename_arg arg = { | 2607 | struct nfs_renameargs arg = { |
2679 | .old_dir = NFS_FH(old_dir), | 2608 | .old_dir = NFS_FH(old_dir), |
2680 | .new_dir = NFS_FH(new_dir), | 2609 | .new_dir = NFS_FH(new_dir), |
2681 | .old_name = old_name, | 2610 | .old_name = old_name, |
2682 | .new_name = new_name, | 2611 | .new_name = new_name, |
2683 | .bitmask = server->attr_bitmask, | 2612 | .bitmask = server->attr_bitmask, |
2684 | }; | 2613 | }; |
2685 | struct nfs4_rename_res res = { | 2614 | struct nfs_renameres res = { |
2686 | .server = server, | 2615 | .server = server, |
2687 | }; | 2616 | }; |
2688 | struct rpc_message msg = { | 2617 | struct rpc_message msg = { |
@@ -2896,15 +2825,16 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, | |||
2896 | } | 2825 | } |
2897 | 2826 | ||
2898 | static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | 2827 | static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, |
2899 | u64 cookie, struct page *page, unsigned int count, int plus) | 2828 | u64 cookie, struct page **pages, unsigned int count, int plus) |
2900 | { | 2829 | { |
2901 | struct inode *dir = dentry->d_inode; | 2830 | struct inode *dir = dentry->d_inode; |
2902 | struct nfs4_readdir_arg args = { | 2831 | struct nfs4_readdir_arg args = { |
2903 | .fh = NFS_FH(dir), | 2832 | .fh = NFS_FH(dir), |
2904 | .pages = &page, | 2833 | .pages = pages, |
2905 | .pgbase = 0, | 2834 | .pgbase = 0, |
2906 | .count = count, | 2835 | .count = count, |
2907 | .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, | 2836 | .bitmask = NFS_SERVER(dentry->d_inode)->attr_bitmask, |
2837 | .plus = plus, | ||
2908 | }; | 2838 | }; |
2909 | struct nfs4_readdir_res res; | 2839 | struct nfs4_readdir_res res; |
2910 | struct rpc_message msg = { | 2840 | struct rpc_message msg = { |
@@ -2932,14 +2862,14 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | |||
2932 | } | 2862 | } |
2933 | 2863 | ||
2934 | static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | 2864 | static int nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, |
2935 | u64 cookie, struct page *page, unsigned int count, int plus) | 2865 | u64 cookie, struct page **pages, unsigned int count, int plus) |
2936 | { | 2866 | { |
2937 | struct nfs4_exception exception = { }; | 2867 | struct nfs4_exception exception = { }; |
2938 | int err; | 2868 | int err; |
2939 | do { | 2869 | do { |
2940 | err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), | 2870 | err = nfs4_handle_exception(NFS_SERVER(dentry->d_inode), |
2941 | _nfs4_proc_readdir(dentry, cred, cookie, | 2871 | _nfs4_proc_readdir(dentry, cred, cookie, |
2942 | page, count, plus), | 2872 | pages, count, plus), |
2943 | &exception); | 2873 | &exception); |
2944 | } while (exception.retry); | 2874 | } while (exception.retry); |
2945 | return err; | 2875 | return err; |
@@ -3490,9 +3420,6 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, | |||
3490 | nfs4_state_mark_reclaim_nograce(clp, state); | 3420 | nfs4_state_mark_reclaim_nograce(clp, state); |
3491 | goto do_state_recovery; | 3421 | goto do_state_recovery; |
3492 | case -NFS4ERR_STALE_STATEID: | 3422 | case -NFS4ERR_STALE_STATEID: |
3493 | if (state == NULL) | ||
3494 | break; | ||
3495 | nfs4_state_mark_reclaim_reboot(clp, state); | ||
3496 | case -NFS4ERR_STALE_CLIENTID: | 3423 | case -NFS4ERR_STALE_CLIENTID: |
3497 | case -NFS4ERR_EXPIRED: | 3424 | case -NFS4ERR_EXPIRED: |
3498 | goto do_state_recovery; | 3425 | goto do_state_recovery; |
@@ -3626,7 +3553,6 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp, | |||
3626 | case -NFS4ERR_RESOURCE: | 3553 | case -NFS4ERR_RESOURCE: |
3627 | /* The IBM lawyers misread another document! */ | 3554 | /* The IBM lawyers misread another document! */ |
3628 | case -NFS4ERR_DELAY: | 3555 | case -NFS4ERR_DELAY: |
3629 | case -EKEYEXPIRED: | ||
3630 | err = nfs4_delay(clp->cl_rpcclient, &timeout); | 3556 | err = nfs4_delay(clp->cl_rpcclient, &timeout); |
3631 | } | 3557 | } |
3632 | } while (err == 0); | 3558 | } while (err == 0); |
@@ -3721,7 +3647,6 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co | |||
3721 | memcpy(&data->stateid, stateid, sizeof(data->stateid)); | 3647 | memcpy(&data->stateid, stateid, sizeof(data->stateid)); |
3722 | data->res.fattr = &data->fattr; | 3648 | data->res.fattr = &data->fattr; |
3723 | data->res.server = server; | 3649 | data->res.server = server; |
3724 | data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
3725 | nfs_fattr_init(data->res.fattr); | 3650 | nfs_fattr_init(data->res.fattr); |
3726 | data->timestamp = jiffies; | 3651 | data->timestamp = jiffies; |
3727 | data->rpc_status = 0; | 3652 | data->rpc_status = 0; |
@@ -3874,7 +3799,6 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl, | |||
3874 | p->arg.fl = &p->fl; | 3799 | p->arg.fl = &p->fl; |
3875 | p->arg.seqid = seqid; | 3800 | p->arg.seqid = seqid; |
3876 | p->res.seqid = seqid; | 3801 | p->res.seqid = seqid; |
3877 | p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
3878 | p->arg.stateid = &lsp->ls_stateid; | 3802 | p->arg.stateid = &lsp->ls_stateid; |
3879 | p->lsp = lsp; | 3803 | p->lsp = lsp; |
3880 | atomic_inc(&lsp->ls_count); | 3804 | atomic_inc(&lsp->ls_count); |
@@ -4054,7 +3978,6 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, | |||
4054 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; | 3978 | p->arg.lock_owner.clientid = server->nfs_client->cl_clientid; |
4055 | p->arg.lock_owner.id = lsp->ls_id.id; | 3979 | p->arg.lock_owner.id = lsp->ls_id.id; |
4056 | p->res.lock_seqid = p->arg.lock_seqid; | 3980 | p->res.lock_seqid = p->arg.lock_seqid; |
4057 | p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
4058 | p->lsp = lsp; | 3981 | p->lsp = lsp; |
4059 | p->server = server; | 3982 | p->server = server; |
4060 | atomic_inc(&lsp->ls_count); | 3983 | atomic_inc(&lsp->ls_count); |
@@ -4241,7 +4164,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request | |||
4241 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) | 4164 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) |
4242 | return 0; | 4165 | return 0; |
4243 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); | 4166 | err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM); |
4244 | if (err != -NFS4ERR_DELAY && err != -EKEYEXPIRED) | 4167 | if (err != -NFS4ERR_DELAY) |
4245 | break; | 4168 | break; |
4246 | nfs4_handle_exception(server, err, &exception); | 4169 | nfs4_handle_exception(server, err, &exception); |
4247 | } while (exception.retry); | 4170 | } while (exception.retry); |
@@ -4266,7 +4189,6 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request | |||
4266 | goto out; | 4189 | goto out; |
4267 | case -NFS4ERR_GRACE: | 4190 | case -NFS4ERR_GRACE: |
4268 | case -NFS4ERR_DELAY: | 4191 | case -NFS4ERR_DELAY: |
4269 | case -EKEYEXPIRED: | ||
4270 | nfs4_handle_exception(server, err, &exception); | 4192 | nfs4_handle_exception(server, err, &exception); |
4271 | err = 0; | 4193 | err = 0; |
4272 | } | 4194 | } |
@@ -4412,13 +4334,21 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl) | |||
4412 | nfs4_state_mark_reclaim_nograce(server->nfs_client, state); | 4334 | nfs4_state_mark_reclaim_nograce(server->nfs_client, state); |
4413 | err = 0; | 4335 | err = 0; |
4414 | goto out; | 4336 | goto out; |
4337 | case -EKEYEXPIRED: | ||
4338 | /* | ||
4339 | * User RPCSEC_GSS context has expired. | ||
4340 | * We cannot recover this stateid now, so | ||
4341 | * skip it and allow recovery thread to | ||
4342 | * proceed. | ||
4343 | */ | ||
4344 | err = 0; | ||
4345 | goto out; | ||
4415 | case -ENOMEM: | 4346 | case -ENOMEM: |
4416 | case -NFS4ERR_DENIED: | 4347 | case -NFS4ERR_DENIED: |
4417 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 4348 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ |
4418 | err = 0; | 4349 | err = 0; |
4419 | goto out; | 4350 | goto out; |
4420 | case -NFS4ERR_DELAY: | 4351 | case -NFS4ERR_DELAY: |
4421 | case -EKEYEXPIRED: | ||
4422 | break; | 4352 | break; |
4423 | } | 4353 | } |
4424 | err = nfs4_handle_exception(server, err, &exception); | 4354 | err = nfs4_handle_exception(server, err, &exception); |
@@ -4647,7 +4577,6 @@ static void nfs4_get_lease_time_done(struct rpc_task *task, void *calldata) | |||
4647 | switch (task->tk_status) { | 4577 | switch (task->tk_status) { |
4648 | case -NFS4ERR_DELAY: | 4578 | case -NFS4ERR_DELAY: |
4649 | case -NFS4ERR_GRACE: | 4579 | case -NFS4ERR_GRACE: |
4650 | case -EKEYEXPIRED: | ||
4651 | dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); | 4580 | dprintk("%s Retry: tk_status %d\n", __func__, task->tk_status); |
4652 | rpc_delay(task, NFS4_POLL_RETRY_MIN); | 4581 | rpc_delay(task, NFS4_POLL_RETRY_MIN); |
4653 | task->tk_status = 0; | 4582 | task->tk_status = 0; |
@@ -4687,7 +4616,6 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo) | |||
4687 | }; | 4616 | }; |
4688 | int status; | 4617 | int status; |
4689 | 4618 | ||
4690 | res.lr_seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
4691 | dprintk("--> %s\n", __func__); | 4619 | dprintk("--> %s\n", __func__); |
4692 | task = rpc_run_task(&task_setup); | 4620 | task = rpc_run_task(&task_setup); |
4693 | 4621 | ||
@@ -4914,49 +4842,56 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args) | |||
4914 | args->bc_attrs.max_reqs); | 4842 | args->bc_attrs.max_reqs); |
4915 | } | 4843 | } |
4916 | 4844 | ||
4917 | static int _verify_channel_attr(char *chan, char *attr_name, u32 sent, u32 rcvd) | 4845 | static int nfs4_verify_fore_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) |
4918 | { | 4846 | { |
4919 | if (rcvd <= sent) | 4847 | struct nfs4_channel_attrs *sent = &args->fc_attrs; |
4920 | return 0; | 4848 | struct nfs4_channel_attrs *rcvd = &session->fc_attrs; |
4921 | printk(KERN_WARNING "%s: Session INVALID: %s channel %s increased. " | 4849 | |
4922 | "sent=%u rcvd=%u\n", __func__, chan, attr_name, sent, rcvd); | 4850 | if (rcvd->headerpadsz > sent->headerpadsz) |
4923 | return -EINVAL; | 4851 | return -EINVAL; |
4852 | if (rcvd->max_resp_sz > sent->max_resp_sz) | ||
4853 | return -EINVAL; | ||
4854 | /* | ||
4855 | * Our requested max_ops is the minimum we need; we're not | ||
4856 | * prepared to break up compounds into smaller pieces than that. | ||
4857 | * So, no point even trying to continue if the server won't | ||
4858 | * cooperate: | ||
4859 | */ | ||
4860 | if (rcvd->max_ops < sent->max_ops) | ||
4861 | return -EINVAL; | ||
4862 | if (rcvd->max_reqs == 0) | ||
4863 | return -EINVAL; | ||
4864 | return 0; | ||
4924 | } | 4865 | } |
4925 | 4866 | ||
4926 | #define _verify_fore_channel_attr(_name_) \ | 4867 | static int nfs4_verify_back_channel_attrs(struct nfs41_create_session_args *args, struct nfs4_session *session) |
4927 | _verify_channel_attr("fore", #_name_, \ | 4868 | { |
4928 | args->fc_attrs._name_, \ | 4869 | struct nfs4_channel_attrs *sent = &args->bc_attrs; |
4929 | session->fc_attrs._name_) | 4870 | struct nfs4_channel_attrs *rcvd = &session->bc_attrs; |
4930 | 4871 | ||
4931 | #define _verify_back_channel_attr(_name_) \ | 4872 | if (rcvd->max_rqst_sz > sent->max_rqst_sz) |
4932 | _verify_channel_attr("back", #_name_, \ | 4873 | return -EINVAL; |
4933 | args->bc_attrs._name_, \ | 4874 | if (rcvd->max_resp_sz < sent->max_resp_sz) |
4934 | session->bc_attrs._name_) | 4875 | return -EINVAL; |
4876 | if (rcvd->max_resp_sz_cached > sent->max_resp_sz_cached) | ||
4877 | return -EINVAL; | ||
4878 | /* These would render the backchannel useless: */ | ||
4879 | if (rcvd->max_ops == 0) | ||
4880 | return -EINVAL; | ||
4881 | if (rcvd->max_reqs == 0) | ||
4882 | return -EINVAL; | ||
4883 | return 0; | ||
4884 | } | ||
4935 | 4885 | ||
4936 | /* | ||
4937 | * The server is not allowed to increase the fore channel header pad size, | ||
4938 | * maximum response size, or maximum number of operations. | ||
4939 | * | ||
4940 | * The back channel attributes are only negotiatied down: We send what the | ||
4941 | * (back channel) server insists upon. | ||
4942 | */ | ||
4943 | static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, | 4886 | static int nfs4_verify_channel_attrs(struct nfs41_create_session_args *args, |
4944 | struct nfs4_session *session) | 4887 | struct nfs4_session *session) |
4945 | { | 4888 | { |
4946 | int ret = 0; | 4889 | int ret; |
4947 | |||
4948 | ret |= _verify_fore_channel_attr(headerpadsz); | ||
4949 | ret |= _verify_fore_channel_attr(max_resp_sz); | ||
4950 | ret |= _verify_fore_channel_attr(max_ops); | ||
4951 | |||
4952 | ret |= _verify_back_channel_attr(headerpadsz); | ||
4953 | ret |= _verify_back_channel_attr(max_rqst_sz); | ||
4954 | ret |= _verify_back_channel_attr(max_resp_sz); | ||
4955 | ret |= _verify_back_channel_attr(max_resp_sz_cached); | ||
4956 | ret |= _verify_back_channel_attr(max_ops); | ||
4957 | ret |= _verify_back_channel_attr(max_reqs); | ||
4958 | 4890 | ||
4959 | return ret; | 4891 | ret = nfs4_verify_fore_channel_attrs(args, session); |
4892 | if (ret) | ||
4893 | return ret; | ||
4894 | return nfs4_verify_back_channel_attrs(args, session); | ||
4960 | } | 4895 | } |
4961 | 4896 | ||
4962 | static int _nfs4_proc_create_session(struct nfs_client *clp) | 4897 | static int _nfs4_proc_create_session(struct nfs_client *clp) |
@@ -5111,7 +5046,6 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client | |||
5111 | { | 5046 | { |
5112 | switch(task->tk_status) { | 5047 | switch(task->tk_status) { |
5113 | case -NFS4ERR_DELAY: | 5048 | case -NFS4ERR_DELAY: |
5114 | case -EKEYEXPIRED: | ||
5115 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 5049 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
5116 | return -EAGAIN; | 5050 | return -EAGAIN; |
5117 | default: | 5051 | default: |
@@ -5180,12 +5114,11 @@ static struct rpc_task *_nfs41_proc_sequence(struct nfs_client *clp, struct rpc_ | |||
5180 | 5114 | ||
5181 | if (!atomic_inc_not_zero(&clp->cl_count)) | 5115 | if (!atomic_inc_not_zero(&clp->cl_count)) |
5182 | return ERR_PTR(-EIO); | 5116 | return ERR_PTR(-EIO); |
5183 | calldata = kmalloc(sizeof(*calldata), GFP_NOFS); | 5117 | calldata = kzalloc(sizeof(*calldata), GFP_NOFS); |
5184 | if (calldata == NULL) { | 5118 | if (calldata == NULL) { |
5185 | nfs_put_client(clp); | 5119 | nfs_put_client(clp); |
5186 | return ERR_PTR(-ENOMEM); | 5120 | return ERR_PTR(-ENOMEM); |
5187 | } | 5121 | } |
5188 | calldata->res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
5189 | msg.rpc_argp = &calldata->args; | 5122 | msg.rpc_argp = &calldata->args; |
5190 | msg.rpc_resp = &calldata->res; | 5123 | msg.rpc_resp = &calldata->res; |
5191 | calldata->clp = clp; | 5124 | calldata->clp = clp; |
@@ -5254,7 +5187,6 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf | |||
5254 | case -NFS4ERR_WRONG_CRED: /* What to do here? */ | 5187 | case -NFS4ERR_WRONG_CRED: /* What to do here? */ |
5255 | break; | 5188 | break; |
5256 | case -NFS4ERR_DELAY: | 5189 | case -NFS4ERR_DELAY: |
5257 | case -EKEYEXPIRED: | ||
5258 | rpc_delay(task, NFS4_POLL_RETRY_MAX); | 5190 | rpc_delay(task, NFS4_POLL_RETRY_MAX); |
5259 | return -EAGAIN; | 5191 | return -EAGAIN; |
5260 | default: | 5192 | default: |
@@ -5317,7 +5249,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp) | |||
5317 | goto out; | 5249 | goto out; |
5318 | calldata->clp = clp; | 5250 | calldata->clp = clp; |
5319 | calldata->arg.one_fs = 0; | 5251 | calldata->arg.one_fs = 0; |
5320 | calldata->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
5321 | 5252 | ||
5322 | msg.rpc_argp = &calldata->arg; | 5253 | msg.rpc_argp = &calldata->arg; |
5323 | msg.rpc_resp = &calldata->res; | 5254 | msg.rpc_resp = &calldata->res; |
@@ -5333,6 +5264,147 @@ out: | |||
5333 | dprintk("<-- %s status=%d\n", __func__, status); | 5264 | dprintk("<-- %s status=%d\n", __func__, status); |
5334 | return status; | 5265 | return status; |
5335 | } | 5266 | } |
5267 | |||
5268 | static void | ||
5269 | nfs4_layoutget_prepare(struct rpc_task *task, void *calldata) | ||
5270 | { | ||
5271 | struct nfs4_layoutget *lgp = calldata; | ||
5272 | struct inode *ino = lgp->args.inode; | ||
5273 | struct nfs_server *server = NFS_SERVER(ino); | ||
5274 | |||
5275 | dprintk("--> %s\n", __func__); | ||
5276 | if (nfs4_setup_sequence(server, &lgp->args.seq_args, | ||
5277 | &lgp->res.seq_res, 0, task)) | ||
5278 | return; | ||
5279 | rpc_call_start(task); | ||
5280 | } | ||
5281 | |||
5282 | static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) | ||
5283 | { | ||
5284 | struct nfs4_layoutget *lgp = calldata; | ||
5285 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5286 | |||
5287 | dprintk("--> %s\n", __func__); | ||
5288 | |||
5289 | if (!nfs4_sequence_done(task, &lgp->res.seq_res)) | ||
5290 | return; | ||
5291 | |||
5292 | switch (task->tk_status) { | ||
5293 | case 0: | ||
5294 | break; | ||
5295 | case -NFS4ERR_LAYOUTTRYLATER: | ||
5296 | case -NFS4ERR_RECALLCONFLICT: | ||
5297 | task->tk_status = -NFS4ERR_DELAY; | ||
5298 | /* Fall through */ | ||
5299 | default: | ||
5300 | if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { | ||
5301 | rpc_restart_call_prepare(task); | ||
5302 | return; | ||
5303 | } | ||
5304 | } | ||
5305 | lgp->status = task->tk_status; | ||
5306 | dprintk("<-- %s\n", __func__); | ||
5307 | } | ||
5308 | |||
5309 | static void nfs4_layoutget_release(void *calldata) | ||
5310 | { | ||
5311 | struct nfs4_layoutget *lgp = calldata; | ||
5312 | |||
5313 | dprintk("--> %s\n", __func__); | ||
5314 | put_layout_hdr(lgp->args.inode); | ||
5315 | if (lgp->res.layout.buf != NULL) | ||
5316 | free_page((unsigned long) lgp->res.layout.buf); | ||
5317 | put_nfs_open_context(lgp->args.ctx); | ||
5318 | kfree(calldata); | ||
5319 | dprintk("<-- %s\n", __func__); | ||
5320 | } | ||
5321 | |||
5322 | static const struct rpc_call_ops nfs4_layoutget_call_ops = { | ||
5323 | .rpc_call_prepare = nfs4_layoutget_prepare, | ||
5324 | .rpc_call_done = nfs4_layoutget_done, | ||
5325 | .rpc_release = nfs4_layoutget_release, | ||
5326 | }; | ||
5327 | |||
5328 | int nfs4_proc_layoutget(struct nfs4_layoutget *lgp) | ||
5329 | { | ||
5330 | struct nfs_server *server = NFS_SERVER(lgp->args.inode); | ||
5331 | struct rpc_task *task; | ||
5332 | struct rpc_message msg = { | ||
5333 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTGET], | ||
5334 | .rpc_argp = &lgp->args, | ||
5335 | .rpc_resp = &lgp->res, | ||
5336 | }; | ||
5337 | struct rpc_task_setup task_setup_data = { | ||
5338 | .rpc_client = server->client, | ||
5339 | .rpc_message = &msg, | ||
5340 | .callback_ops = &nfs4_layoutget_call_ops, | ||
5341 | .callback_data = lgp, | ||
5342 | .flags = RPC_TASK_ASYNC, | ||
5343 | }; | ||
5344 | int status = 0; | ||
5345 | |||
5346 | dprintk("--> %s\n", __func__); | ||
5347 | |||
5348 | lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); | ||
5349 | if (lgp->res.layout.buf == NULL) { | ||
5350 | nfs4_layoutget_release(lgp); | ||
5351 | return -ENOMEM; | ||
5352 | } | ||
5353 | |||
5354 | lgp->res.seq_res.sr_slot = NULL; | ||
5355 | task = rpc_run_task(&task_setup_data); | ||
5356 | if (IS_ERR(task)) | ||
5357 | return PTR_ERR(task); | ||
5358 | status = nfs4_wait_for_completion_rpc_task(task); | ||
5359 | if (status != 0) | ||
5360 | goto out; | ||
5361 | status = lgp->status; | ||
5362 | if (status != 0) | ||
5363 | goto out; | ||
5364 | status = pnfs_layout_process(lgp); | ||
5365 | out: | ||
5366 | rpc_put_task(task); | ||
5367 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5368 | return status; | ||
5369 | } | ||
5370 | |||
5371 | static int | ||
5372 | _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5373 | { | ||
5374 | struct nfs4_getdeviceinfo_args args = { | ||
5375 | .pdev = pdev, | ||
5376 | }; | ||
5377 | struct nfs4_getdeviceinfo_res res = { | ||
5378 | .pdev = pdev, | ||
5379 | }; | ||
5380 | struct rpc_message msg = { | ||
5381 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICEINFO], | ||
5382 | .rpc_argp = &args, | ||
5383 | .rpc_resp = &res, | ||
5384 | }; | ||
5385 | int status; | ||
5386 | |||
5387 | dprintk("--> %s\n", __func__); | ||
5388 | status = nfs4_call_sync(server, &msg, &args, &res, 0); | ||
5389 | dprintk("<-- %s status=%d\n", __func__, status); | ||
5390 | |||
5391 | return status; | ||
5392 | } | ||
5393 | |||
5394 | int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev) | ||
5395 | { | ||
5396 | struct nfs4_exception exception = { }; | ||
5397 | int err; | ||
5398 | |||
5399 | do { | ||
5400 | err = nfs4_handle_exception(server, | ||
5401 | _nfs4_proc_getdeviceinfo(server, pdev), | ||
5402 | &exception); | ||
5403 | } while (exception.retry); | ||
5404 | return err; | ||
5405 | } | ||
5406 | EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); | ||
5407 | |||
5336 | #endif /* CONFIG_NFS_V4_1 */ | 5408 | #endif /* CONFIG_NFS_V4_1 */ |
5337 | 5409 | ||
5338 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { | 5410 | struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { |
@@ -5443,6 +5515,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
5443 | .unlink_setup = nfs4_proc_unlink_setup, | 5515 | .unlink_setup = nfs4_proc_unlink_setup, |
5444 | .unlink_done = nfs4_proc_unlink_done, | 5516 | .unlink_done = nfs4_proc_unlink_done, |
5445 | .rename = nfs4_proc_rename, | 5517 | .rename = nfs4_proc_rename, |
5518 | .rename_setup = nfs4_proc_rename_setup, | ||
5519 | .rename_done = nfs4_proc_rename_done, | ||
5446 | .link = nfs4_proc_link, | 5520 | .link = nfs4_proc_link, |
5447 | .symlink = nfs4_proc_symlink, | 5521 | .symlink = nfs4_proc_symlink, |
5448 | .mkdir = nfs4_proc_mkdir, | 5522 | .mkdir = nfs4_proc_mkdir, |
@@ -5463,6 +5537,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
5463 | .lock = nfs4_proc_lock, | 5537 | .lock = nfs4_proc_lock, |
5464 | .clear_acl_cache = nfs4_zap_acl_attr, | 5538 | .clear_acl_cache = nfs4_zap_acl_attr, |
5465 | .close_context = nfs4_close_context, | 5539 | .close_context = nfs4_close_context, |
5540 | .open_context = nfs4_atomic_open, | ||
5466 | }; | 5541 | }; |
5467 | 5542 | ||
5468 | /* | 5543 | /* |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 3e2f19b04c06..f575a3126737 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -40,12 +40,13 @@ | |||
40 | 40 | ||
41 | #include <linux/kernel.h> | 41 | #include <linux/kernel.h> |
42 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
43 | #include <linux/smp_lock.h> | 43 | #include <linux/fs.h> |
44 | #include <linux/nfs_fs.h> | 44 | #include <linux/nfs_fs.h> |
45 | #include <linux/nfs_idmap.h> | 45 | #include <linux/nfs_idmap.h> |
46 | #include <linux/kthread.h> | 46 | #include <linux/kthread.h> |
47 | #include <linux/module.h> | 47 | #include <linux/module.h> |
48 | #include <linux/random.h> | 48 | #include <linux/random.h> |
49 | #include <linux/ratelimit.h> | ||
49 | #include <linux/workqueue.h> | 50 | #include <linux/workqueue.h> |
50 | #include <linux/bitops.h> | 51 | #include <linux/bitops.h> |
51 | 52 | ||
@@ -53,6 +54,7 @@ | |||
53 | #include "callback.h" | 54 | #include "callback.h" |
54 | #include "delegation.h" | 55 | #include "delegation.h" |
55 | #include "internal.h" | 56 | #include "internal.h" |
57 | #include "pnfs.h" | ||
56 | 58 | ||
57 | #define OPENOWNER_POOL_SIZE 8 | 59 | #define OPENOWNER_POOL_SIZE 8 |
58 | 60 | ||
@@ -970,13 +972,13 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
970 | /* Guard against delegation returns and new lock/unlock calls */ | 972 | /* Guard against delegation returns and new lock/unlock calls */ |
971 | down_write(&nfsi->rwsem); | 973 | down_write(&nfsi->rwsem); |
972 | /* Protect inode->i_flock using the BKL */ | 974 | /* Protect inode->i_flock using the BKL */ |
973 | lock_kernel(); | 975 | lock_flocks(); |
974 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 976 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
975 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) | 977 | if (!(fl->fl_flags & (FL_POSIX|FL_FLOCK))) |
976 | continue; | 978 | continue; |
977 | if (nfs_file_open_context(fl->fl_file)->state != state) | 979 | if (nfs_file_open_context(fl->fl_file)->state != state) |
978 | continue; | 980 | continue; |
979 | unlock_kernel(); | 981 | unlock_flocks(); |
980 | status = ops->recover_lock(state, fl); | 982 | status = ops->recover_lock(state, fl); |
981 | switch (status) { | 983 | switch (status) { |
982 | case 0: | 984 | case 0: |
@@ -1003,9 +1005,9 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ | |||
1003 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ | 1005 | /* kill_proc(fl->fl_pid, SIGLOST, 1); */ |
1004 | status = 0; | 1006 | status = 0; |
1005 | } | 1007 | } |
1006 | lock_kernel(); | 1008 | lock_flocks(); |
1007 | } | 1009 | } |
1008 | unlock_kernel(); | 1010 | unlock_flocks(); |
1009 | out: | 1011 | out: |
1010 | up_write(&nfsi->rwsem); | 1012 | up_write(&nfsi->rwsem); |
1011 | return status; | 1013 | return status; |
@@ -1063,6 +1065,14 @@ restart: | |||
1063 | /* Mark the file as being 'closed' */ | 1065 | /* Mark the file as being 'closed' */ |
1064 | state->state = 0; | 1066 | state->state = 0; |
1065 | break; | 1067 | break; |
1068 | case -EKEYEXPIRED: | ||
1069 | /* | ||
1070 | * User RPCSEC_GSS context has expired. | ||
1071 | * We cannot recover this stateid now, so | ||
1072 | * skip it and allow recovery thread to | ||
1073 | * proceed. | ||
1074 | */ | ||
1075 | break; | ||
1066 | case -NFS4ERR_ADMIN_REVOKED: | 1076 | case -NFS4ERR_ADMIN_REVOKED: |
1067 | case -NFS4ERR_STALE_STATEID: | 1077 | case -NFS4ERR_STALE_STATEID: |
1068 | case -NFS4ERR_BAD_STATEID: | 1078 | case -NFS4ERR_BAD_STATEID: |
@@ -1138,16 +1148,14 @@ static void nfs4_reclaim_complete(struct nfs_client *clp, | |||
1138 | (void)ops->reclaim_complete(clp); | 1148 | (void)ops->reclaim_complete(clp); |
1139 | } | 1149 | } |
1140 | 1150 | ||
1141 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | 1151 | static int nfs4_state_clear_reclaim_reboot(struct nfs_client *clp) |
1142 | { | 1152 | { |
1143 | struct nfs4_state_owner *sp; | 1153 | struct nfs4_state_owner *sp; |
1144 | struct rb_node *pos; | 1154 | struct rb_node *pos; |
1145 | struct nfs4_state *state; | 1155 | struct nfs4_state *state; |
1146 | 1156 | ||
1147 | if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) | 1157 | if (!test_and_clear_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) |
1148 | return; | 1158 | return 0; |
1149 | |||
1150 | nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); | ||
1151 | 1159 | ||
1152 | for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { | 1160 | for (pos = rb_first(&clp->cl_state_owners); pos != NULL; pos = rb_next(pos)) { |
1153 | sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); | 1161 | sp = rb_entry(pos, struct nfs4_state_owner, so_client_node); |
@@ -1161,6 +1169,14 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | |||
1161 | } | 1169 | } |
1162 | 1170 | ||
1163 | nfs_delegation_reap_unclaimed(clp); | 1171 | nfs_delegation_reap_unclaimed(clp); |
1172 | return 1; | ||
1173 | } | ||
1174 | |||
1175 | static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) | ||
1176 | { | ||
1177 | if (!nfs4_state_clear_reclaim_reboot(clp)) | ||
1178 | return; | ||
1179 | nfs4_reclaim_complete(clp, clp->cl_mvops->reboot_recovery_ops); | ||
1164 | } | 1180 | } |
1165 | 1181 | ||
1166 | static void nfs_delegation_clear_all(struct nfs_client *clp) | 1182 | static void nfs_delegation_clear_all(struct nfs_client *clp) |
@@ -1175,6 +1191,14 @@ static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) | |||
1175 | nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); | 1191 | nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_nograce); |
1176 | } | 1192 | } |
1177 | 1193 | ||
1194 | static void nfs4_warn_keyexpired(const char *s) | ||
1195 | { | ||
1196 | printk_ratelimited(KERN_WARNING "Error: state manager" | ||
1197 | " encountered RPCSEC_GSS session" | ||
1198 | " expired against NFSv4 server %s.\n", | ||
1199 | s); | ||
1200 | } | ||
1201 | |||
1178 | static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) | 1202 | static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) |
1179 | { | 1203 | { |
1180 | switch (error) { | 1204 | switch (error) { |
@@ -1187,7 +1211,7 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) | |||
1187 | case -NFS4ERR_STALE_CLIENTID: | 1211 | case -NFS4ERR_STALE_CLIENTID: |
1188 | case -NFS4ERR_LEASE_MOVED: | 1212 | case -NFS4ERR_LEASE_MOVED: |
1189 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); | 1213 | set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); |
1190 | nfs4_state_end_reclaim_reboot(clp); | 1214 | nfs4_state_clear_reclaim_reboot(clp); |
1191 | nfs4_state_start_reclaim_reboot(clp); | 1215 | nfs4_state_start_reclaim_reboot(clp); |
1192 | break; | 1216 | break; |
1193 | case -NFS4ERR_EXPIRED: | 1217 | case -NFS4ERR_EXPIRED: |
@@ -1204,6 +1228,10 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) | |||
1204 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); | 1228 | set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state); |
1205 | /* Zero session reset errors */ | 1229 | /* Zero session reset errors */ |
1206 | return 0; | 1230 | return 0; |
1231 | case -EKEYEXPIRED: | ||
1232 | /* Nothing we can do */ | ||
1233 | nfs4_warn_keyexpired(clp->cl_hostname); | ||
1234 | return 0; | ||
1207 | } | 1235 | } |
1208 | return error; | 1236 | return error; |
1209 | } | 1237 | } |
@@ -1414,9 +1442,10 @@ static void nfs4_set_lease_expired(struct nfs_client *clp, int status) | |||
1414 | case -NFS4ERR_DELAY: | 1442 | case -NFS4ERR_DELAY: |
1415 | case -NFS4ERR_CLID_INUSE: | 1443 | case -NFS4ERR_CLID_INUSE: |
1416 | case -EAGAIN: | 1444 | case -EAGAIN: |
1417 | case -EKEYEXPIRED: | ||
1418 | break; | 1445 | break; |
1419 | 1446 | ||
1447 | case -EKEYEXPIRED: | ||
1448 | nfs4_warn_keyexpired(clp->cl_hostname); | ||
1420 | case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery | 1449 | case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery |
1421 | * in nfs4_exchange_id */ | 1450 | * in nfs4_exchange_id */ |
1422 | default: | 1451 | default: |
@@ -1447,6 +1476,7 @@ static void nfs4_state_manager(struct nfs_client *clp) | |||
1447 | } | 1476 | } |
1448 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); | 1477 | clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); |
1449 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); | 1478 | set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); |
1479 | pnfs_destroy_all_layouts(clp); | ||
1450 | } | 1480 | } |
1451 | 1481 | ||
1452 | if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { | 1482 | if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) { |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 08ef91291132..f313c4cce7e4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -52,6 +52,7 @@ | |||
52 | #include <linux/nfs_idmap.h> | 52 | #include <linux/nfs_idmap.h> |
53 | #include "nfs4_fs.h" | 53 | #include "nfs4_fs.h" |
54 | #include "internal.h" | 54 | #include "internal.h" |
55 | #include "pnfs.h" | ||
55 | 56 | ||
56 | #define NFSDBG_FACILITY NFSDBG_XDR | 57 | #define NFSDBG_FACILITY NFSDBG_XDR |
57 | 58 | ||
@@ -310,6 +311,19 @@ static int nfs4_stat_to_errno(int); | |||
310 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) | 311 | XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5) |
311 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) | 312 | #define encode_reclaim_complete_maxsz (op_encode_hdr_maxsz + 4) |
312 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) | 313 | #define decode_reclaim_complete_maxsz (op_decode_hdr_maxsz + 4) |
314 | #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \ | ||
315 | XDR_QUADLEN(NFS4_DEVICEID4_SIZE)) | ||
316 | #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \ | ||
317 | 1 /* layout type */ + \ | ||
318 | 1 /* opaque devaddr4 length */ + \ | ||
319 | /* devaddr4 payload is read into page */ \ | ||
320 | 1 /* notification bitmap length */ + \ | ||
321 | 1 /* notification bitmap */) | ||
322 | #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ | ||
323 | encode_stateid_maxsz) | ||
324 | #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ | ||
325 | decode_stateid_maxsz + \ | ||
326 | XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) | ||
313 | #else /* CONFIG_NFS_V4_1 */ | 327 | #else /* CONFIG_NFS_V4_1 */ |
314 | #define encode_sequence_maxsz 0 | 328 | #define encode_sequence_maxsz 0 |
315 | #define decode_sequence_maxsz 0 | 329 | #define decode_sequence_maxsz 0 |
@@ -699,6 +713,20 @@ static int nfs4_stat_to_errno(int); | |||
699 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ | 713 | #define NFS4_dec_reclaim_complete_sz (compound_decode_hdr_maxsz + \ |
700 | decode_sequence_maxsz + \ | 714 | decode_sequence_maxsz + \ |
701 | decode_reclaim_complete_maxsz) | 715 | decode_reclaim_complete_maxsz) |
716 | #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz + \ | ||
717 | encode_sequence_maxsz +\ | ||
718 | encode_getdeviceinfo_maxsz) | ||
719 | #define NFS4_dec_getdeviceinfo_sz (compound_decode_hdr_maxsz + \ | ||
720 | decode_sequence_maxsz + \ | ||
721 | decode_getdeviceinfo_maxsz) | ||
722 | #define NFS4_enc_layoutget_sz (compound_encode_hdr_maxsz + \ | ||
723 | encode_sequence_maxsz + \ | ||
724 | encode_putfh_maxsz + \ | ||
725 | encode_layoutget_maxsz) | ||
726 | #define NFS4_dec_layoutget_sz (compound_decode_hdr_maxsz + \ | ||
727 | decode_sequence_maxsz + \ | ||
728 | decode_putfh_maxsz + \ | ||
729 | decode_layoutget_maxsz) | ||
702 | 730 | ||
703 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + | 731 | const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + |
704 | compound_encode_hdr_maxsz + | 732 | compound_encode_hdr_maxsz + |
@@ -816,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
816 | if (iap->ia_valid & ATTR_MODE) | 844 | if (iap->ia_valid & ATTR_MODE) |
817 | len += 4; | 845 | len += 4; |
818 | if (iap->ia_valid & ATTR_UID) { | 846 | if (iap->ia_valid & ATTR_UID) { |
819 | owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name); | 847 | owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); |
820 | if (owner_namelen < 0) { | 848 | if (owner_namelen < 0) { |
821 | dprintk("nfs: couldn't resolve uid %d to string\n", | 849 | dprintk("nfs: couldn't resolve uid %d to string\n", |
822 | iap->ia_uid); | 850 | iap->ia_uid); |
@@ -828,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const | |||
828 | len += 4 + (XDR_QUADLEN(owner_namelen) << 2); | 856 | len += 4 + (XDR_QUADLEN(owner_namelen) << 2); |
829 | } | 857 | } |
830 | if (iap->ia_valid & ATTR_GID) { | 858 | if (iap->ia_valid & ATTR_GID) { |
831 | owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group); | 859 | owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); |
832 | if (owner_grouplen < 0) { | 860 | if (owner_grouplen < 0) { |
833 | dprintk("nfs: couldn't resolve gid %d to string\n", | 861 | dprintk("nfs: couldn't resolve gid %d to string\n", |
834 | iap->ia_gid); | 862 | iap->ia_gid); |
@@ -1385,24 +1413,35 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, | |||
1385 | 1413 | ||
1386 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) | 1414 | static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) |
1387 | { | 1415 | { |
1388 | uint32_t attrs[2] = { | 1416 | uint32_t attrs[2] = {0, 0}; |
1389 | FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID, | 1417 | uint32_t dircount = readdir->count >> 1; |
1390 | FATTR4_WORD1_MOUNTED_ON_FILEID, | ||
1391 | }; | ||
1392 | __be32 *p; | 1418 | __be32 *p; |
1393 | 1419 | ||
1420 | if (readdir->plus) { | ||
1421 | attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| | ||
1422 | FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; | ||
1423 | attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| | ||
1424 | FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| | ||
1425 | FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| | ||
1426 | FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; | ||
1427 | dircount >>= 1; | ||
1428 | } | ||
1429 | attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; | ||
1430 | attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; | ||
1431 | /* Switch to mounted_on_fileid if the server supports it */ | ||
1432 | if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) | ||
1433 | attrs[0] &= ~FATTR4_WORD0_FILEID; | ||
1434 | else | ||
1435 | attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; | ||
1436 | |||
1394 | p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); | 1437 | p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); |
1395 | *p++ = cpu_to_be32(OP_READDIR); | 1438 | *p++ = cpu_to_be32(OP_READDIR); |
1396 | p = xdr_encode_hyper(p, readdir->cookie); | 1439 | p = xdr_encode_hyper(p, readdir->cookie); |
1397 | p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); | 1440 | p = xdr_encode_opaque_fixed(p, readdir->verifier.data, NFS4_VERIFIER_SIZE); |
1398 | *p++ = cpu_to_be32(readdir->count >> 1); /* We're not doing readdirplus */ | 1441 | *p++ = cpu_to_be32(dircount); |
1399 | *p++ = cpu_to_be32(readdir->count); | 1442 | *p++ = cpu_to_be32(readdir->count); |
1400 | *p++ = cpu_to_be32(2); | 1443 | *p++ = cpu_to_be32(2); |
1401 | /* Switch to mounted_on_fileid if the server supports it */ | 1444 | |
1402 | if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID) | ||
1403 | attrs[0] &= ~FATTR4_WORD0_FILEID; | ||
1404 | else | ||
1405 | attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; | ||
1406 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); | 1445 | *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); |
1407 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); | 1446 | *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); |
1408 | hdr->nops++; | 1447 | hdr->nops++; |
@@ -1726,6 +1765,58 @@ static void encode_sequence(struct xdr_stream *xdr, | |||
1726 | #endif /* CONFIG_NFS_V4_1 */ | 1765 | #endif /* CONFIG_NFS_V4_1 */ |
1727 | } | 1766 | } |
1728 | 1767 | ||
1768 | #ifdef CONFIG_NFS_V4_1 | ||
1769 | static void | ||
1770 | encode_getdeviceinfo(struct xdr_stream *xdr, | ||
1771 | const struct nfs4_getdeviceinfo_args *args, | ||
1772 | struct compound_hdr *hdr) | ||
1773 | { | ||
1774 | __be32 *p; | ||
1775 | |||
1776 | p = reserve_space(xdr, 16 + NFS4_DEVICEID4_SIZE); | ||
1777 | *p++ = cpu_to_be32(OP_GETDEVICEINFO); | ||
1778 | p = xdr_encode_opaque_fixed(p, args->pdev->dev_id.data, | ||
1779 | NFS4_DEVICEID4_SIZE); | ||
1780 | *p++ = cpu_to_be32(args->pdev->layout_type); | ||
1781 | *p++ = cpu_to_be32(args->pdev->pglen); /* gdia_maxcount */ | ||
1782 | *p++ = cpu_to_be32(0); /* bitmap length 0 */ | ||
1783 | hdr->nops++; | ||
1784 | hdr->replen += decode_getdeviceinfo_maxsz; | ||
1785 | } | ||
1786 | |||
1787 | static void | ||
1788 | encode_layoutget(struct xdr_stream *xdr, | ||
1789 | const struct nfs4_layoutget_args *args, | ||
1790 | struct compound_hdr *hdr) | ||
1791 | { | ||
1792 | nfs4_stateid stateid; | ||
1793 | __be32 *p; | ||
1794 | |||
1795 | p = reserve_space(xdr, 44 + NFS4_STATEID_SIZE); | ||
1796 | *p++ = cpu_to_be32(OP_LAYOUTGET); | ||
1797 | *p++ = cpu_to_be32(0); /* Signal layout available */ | ||
1798 | *p++ = cpu_to_be32(args->type); | ||
1799 | *p++ = cpu_to_be32(args->range.iomode); | ||
1800 | p = xdr_encode_hyper(p, args->range.offset); | ||
1801 | p = xdr_encode_hyper(p, args->range.length); | ||
1802 | p = xdr_encode_hyper(p, args->minlength); | ||
1803 | pnfs_get_layout_stateid(&stateid, NFS_I(args->inode)->layout, | ||
1804 | args->ctx->state); | ||
1805 | p = xdr_encode_opaque_fixed(p, &stateid.data, NFS4_STATEID_SIZE); | ||
1806 | *p = cpu_to_be32(args->maxcount); | ||
1807 | |||
1808 | dprintk("%s: 1st type:0x%x iomode:%d off:%lu len:%lu mc:%d\n", | ||
1809 | __func__, | ||
1810 | args->type, | ||
1811 | args->range.iomode, | ||
1812 | (unsigned long)args->range.offset, | ||
1813 | (unsigned long)args->range.length, | ||
1814 | args->maxcount); | ||
1815 | hdr->nops++; | ||
1816 | hdr->replen += decode_layoutget_maxsz; | ||
1817 | } | ||
1818 | #endif /* CONFIG_NFS_V4_1 */ | ||
1819 | |||
1729 | /* | 1820 | /* |
1730 | * END OF "GENERIC" ENCODE ROUTINES. | 1821 | * END OF "GENERIC" ENCODE ROUTINES. |
1731 | */ | 1822 | */ |
@@ -1823,7 +1914,7 @@ static int nfs4_xdr_enc_remove(struct rpc_rqst *req, __be32 *p, const struct nfs | |||
1823 | /* | 1914 | /* |
1824 | * Encode RENAME request | 1915 | * Encode RENAME request |
1825 | */ | 1916 | */ |
1826 | static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs4_rename_arg *args) | 1917 | static int nfs4_xdr_enc_rename(struct rpc_rqst *req, __be32 *p, const struct nfs_renameargs *args) |
1827 | { | 1918 | { |
1828 | struct xdr_stream xdr; | 1919 | struct xdr_stream xdr; |
1829 | struct compound_hdr hdr = { | 1920 | struct compound_hdr hdr = { |
@@ -2543,6 +2634,51 @@ static int nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req, uint32_t *p, | |||
2543 | return 0; | 2634 | return 0; |
2544 | } | 2635 | } |
2545 | 2636 | ||
2637 | /* | ||
2638 | * Encode GETDEVICEINFO request | ||
2639 | */ | ||
2640 | static int nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, uint32_t *p, | ||
2641 | struct nfs4_getdeviceinfo_args *args) | ||
2642 | { | ||
2643 | struct xdr_stream xdr; | ||
2644 | struct compound_hdr hdr = { | ||
2645 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2646 | }; | ||
2647 | |||
2648 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2649 | encode_compound_hdr(&xdr, req, &hdr); | ||
2650 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2651 | encode_getdeviceinfo(&xdr, args, &hdr); | ||
2652 | |||
2653 | /* set up reply kvec. Subtract notification bitmap max size (2) | ||
2654 | * so that notification bitmap is put in xdr_buf tail */ | ||
2655 | xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2, | ||
2656 | args->pdev->pages, args->pdev->pgbase, | ||
2657 | args->pdev->pglen); | ||
2658 | |||
2659 | encode_nops(&hdr); | ||
2660 | return 0; | ||
2661 | } | ||
2662 | |||
2663 | /* | ||
2664 | * Encode LAYOUTGET request | ||
2665 | */ | ||
2666 | static int nfs4_xdr_enc_layoutget(struct rpc_rqst *req, uint32_t *p, | ||
2667 | struct nfs4_layoutget_args *args) | ||
2668 | { | ||
2669 | struct xdr_stream xdr; | ||
2670 | struct compound_hdr hdr = { | ||
2671 | .minorversion = nfs4_xdr_minorversion(&args->seq_args), | ||
2672 | }; | ||
2673 | |||
2674 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | ||
2675 | encode_compound_hdr(&xdr, req, &hdr); | ||
2676 | encode_sequence(&xdr, &args->seq_args, &hdr); | ||
2677 | encode_putfh(&xdr, NFS_FH(args->inode), &hdr); | ||
2678 | encode_layoutget(&xdr, args, &hdr); | ||
2679 | encode_nops(&hdr); | ||
2680 | return 0; | ||
2681 | } | ||
2546 | #endif /* CONFIG_NFS_V4_1 */ | 2682 | #endif /* CONFIG_NFS_V4_1 */ |
2547 | 2683 | ||
2548 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) | 2684 | static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) |
@@ -2676,7 +2812,10 @@ out_overflow: | |||
2676 | static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) | 2812 | static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask) |
2677 | { | 2813 | { |
2678 | if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { | 2814 | if (likely(bitmap[0] & FATTR4_WORD0_SUPPORTED_ATTRS)) { |
2679 | decode_attr_bitmap(xdr, bitmask); | 2815 | int ret; |
2816 | ret = decode_attr_bitmap(xdr, bitmask); | ||
2817 | if (unlikely(ret < 0)) | ||
2818 | return ret; | ||
2680 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; | 2819 | bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS; |
2681 | } else | 2820 | } else |
2682 | bitmask[0] = bitmask[1] = 0; | 2821 | bitmask[0] = bitmask[1] = 0; |
@@ -2848,6 +2987,56 @@ out_overflow: | |||
2848 | return -EIO; | 2987 | return -EIO; |
2849 | } | 2988 | } |
2850 | 2989 | ||
2990 | static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap) | ||
2991 | { | ||
2992 | __be32 *p; | ||
2993 | |||
2994 | if (unlikely(bitmap[0] & (FATTR4_WORD0_RDATTR_ERROR - 1U))) | ||
2995 | return -EIO; | ||
2996 | if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) { | ||
2997 | p = xdr_inline_decode(xdr, 4); | ||
2998 | if (unlikely(!p)) | ||
2999 | goto out_overflow; | ||
3000 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; | ||
3001 | } | ||
3002 | return 0; | ||
3003 | out_overflow: | ||
3004 | print_overflow_msg(__func__, xdr); | ||
3005 | return -EIO; | ||
3006 | } | ||
3007 | |||
3008 | static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh) | ||
3009 | { | ||
3010 | __be32 *p; | ||
3011 | int len; | ||
3012 | |||
3013 | if (fh != NULL) | ||
3014 | memset(fh, 0, sizeof(*fh)); | ||
3015 | |||
3016 | if (unlikely(bitmap[0] & (FATTR4_WORD0_FILEHANDLE - 1U))) | ||
3017 | return -EIO; | ||
3018 | if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) { | ||
3019 | p = xdr_inline_decode(xdr, 4); | ||
3020 | if (unlikely(!p)) | ||
3021 | goto out_overflow; | ||
3022 | len = be32_to_cpup(p); | ||
3023 | if (len > NFS4_FHSIZE) | ||
3024 | return -EIO; | ||
3025 | p = xdr_inline_decode(xdr, len); | ||
3026 | if (unlikely(!p)) | ||
3027 | goto out_overflow; | ||
3028 | if (fh != NULL) { | ||
3029 | memcpy(fh->data, p, len); | ||
3030 | fh->size = len; | ||
3031 | } | ||
3032 | bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE; | ||
3033 | } | ||
3034 | return 0; | ||
3035 | out_overflow: | ||
3036 | print_overflow_msg(__func__, xdr); | ||
3037 | return -EIO; | ||
3038 | } | ||
3039 | |||
2851 | static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) | 3040 | static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res) |
2852 | { | 3041 | { |
2853 | __be32 *p; | 3042 | __be32 *p; |
@@ -3521,6 +3710,24 @@ static int decode_attr_time_metadata(struct xdr_stream *xdr, uint32_t *bitmap, s | |||
3521 | return status; | 3710 | return status; |
3522 | } | 3711 | } |
3523 | 3712 | ||
3713 | static int decode_attr_time_delta(struct xdr_stream *xdr, uint32_t *bitmap, | ||
3714 | struct timespec *time) | ||
3715 | { | ||
3716 | int status = 0; | ||
3717 | |||
3718 | time->tv_sec = 0; | ||
3719 | time->tv_nsec = 0; | ||
3720 | if (unlikely(bitmap[1] & (FATTR4_WORD1_TIME_DELTA - 1U))) | ||
3721 | return -EIO; | ||
3722 | if (likely(bitmap[1] & FATTR4_WORD1_TIME_DELTA)) { | ||
3723 | status = decode_attr_time(xdr, time); | ||
3724 | bitmap[1] &= ~FATTR4_WORD1_TIME_DELTA; | ||
3725 | } | ||
3726 | dprintk("%s: time_delta=%ld %ld\n", __func__, (long)time->tv_sec, | ||
3727 | (long)time->tv_nsec); | ||
3728 | return status; | ||
3729 | } | ||
3730 | |||
3524 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) | 3731 | static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time) |
3525 | { | 3732 | { |
3526 | int status = 0; | 3733 | int status = 0; |
@@ -3744,29 +3951,14 @@ xdr_error: | |||
3744 | return status; | 3951 | return status; |
3745 | } | 3952 | } |
3746 | 3953 | ||
3747 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | 3954 | static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, |
3955 | struct nfs_fattr *fattr, struct nfs_fh *fh, | ||
3748 | const struct nfs_server *server, int may_sleep) | 3956 | const struct nfs_server *server, int may_sleep) |
3749 | { | 3957 | { |
3750 | __be32 *savep; | ||
3751 | uint32_t attrlen, | ||
3752 | bitmap[2] = {0}, | ||
3753 | type; | ||
3754 | int status; | 3958 | int status; |
3755 | umode_t fmode = 0; | 3959 | umode_t fmode = 0; |
3756 | uint64_t fileid; | 3960 | uint64_t fileid; |
3757 | 3961 | uint32_t type; | |
3758 | status = decode_op_hdr(xdr, OP_GETATTR); | ||
3759 | if (status < 0) | ||
3760 | goto xdr_error; | ||
3761 | |||
3762 | status = decode_attr_bitmap(xdr, bitmap); | ||
3763 | if (status < 0) | ||
3764 | goto xdr_error; | ||
3765 | |||
3766 | status = decode_attr_length(xdr, &attrlen, &savep); | ||
3767 | if (status < 0) | ||
3768 | goto xdr_error; | ||
3769 | |||
3770 | 3962 | ||
3771 | status = decode_attr_type(xdr, bitmap, &type); | 3963 | status = decode_attr_type(xdr, bitmap, &type); |
3772 | if (status < 0) | 3964 | if (status < 0) |
@@ -3792,6 +3984,14 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | |||
3792 | goto xdr_error; | 3984 | goto xdr_error; |
3793 | fattr->valid |= status; | 3985 | fattr->valid |= status; |
3794 | 3986 | ||
3987 | status = decode_attr_error(xdr, bitmap); | ||
3988 | if (status < 0) | ||
3989 | goto xdr_error; | ||
3990 | |||
3991 | status = decode_attr_filehandle(xdr, bitmap, fh); | ||
3992 | if (status < 0) | ||
3993 | goto xdr_error; | ||
3994 | |||
3795 | status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); | 3995 | status = decode_attr_fileid(xdr, bitmap, &fattr->fileid); |
3796 | if (status < 0) | 3996 | if (status < 0) |
3797 | goto xdr_error; | 3997 | goto xdr_error; |
@@ -3862,12 +4062,101 @@ static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | |||
3862 | fattr->valid |= status; | 4062 | fattr->valid |= status; |
3863 | } | 4063 | } |
3864 | 4064 | ||
4065 | xdr_error: | ||
4066 | dprintk("%s: xdr returned %d\n", __func__, -status); | ||
4067 | return status; | ||
4068 | } | ||
4069 | |||
4070 | static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fattr, | ||
4071 | struct nfs_fh *fh, const struct nfs_server *server, int may_sleep) | ||
4072 | { | ||
4073 | __be32 *savep; | ||
4074 | uint32_t attrlen, | ||
4075 | bitmap[2] = {0}; | ||
4076 | int status; | ||
4077 | |||
4078 | status = decode_op_hdr(xdr, OP_GETATTR); | ||
4079 | if (status < 0) | ||
4080 | goto xdr_error; | ||
4081 | |||
4082 | status = decode_attr_bitmap(xdr, bitmap); | ||
4083 | if (status < 0) | ||
4084 | goto xdr_error; | ||
4085 | |||
4086 | status = decode_attr_length(xdr, &attrlen, &savep); | ||
4087 | if (status < 0) | ||
4088 | goto xdr_error; | ||
4089 | |||
4090 | status = decode_getfattr_attrs(xdr, bitmap, fattr, fh, server, may_sleep); | ||
4091 | if (status < 0) | ||
4092 | goto xdr_error; | ||
4093 | |||
3865 | status = verify_attr_len(xdr, savep, attrlen); | 4094 | status = verify_attr_len(xdr, savep, attrlen); |
3866 | xdr_error: | 4095 | xdr_error: |
3867 | dprintk("%s: xdr returned %d\n", __func__, -status); | 4096 | dprintk("%s: xdr returned %d\n", __func__, -status); |
3868 | return status; | 4097 | return status; |
3869 | } | 4098 | } |
3870 | 4099 | ||
4100 | static int decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *fattr, | ||
4101 | const struct nfs_server *server, int may_sleep) | ||
4102 | { | ||
4103 | return decode_getfattr_generic(xdr, fattr, NULL, server, may_sleep); | ||
4104 | } | ||
4105 | |||
4106 | /* | ||
4107 | * Decode potentially multiple layout types. Currently we only support | ||
4108 | * one layout driver per file system. | ||
4109 | */ | ||
4110 | static int decode_first_pnfs_layout_type(struct xdr_stream *xdr, | ||
4111 | uint32_t *layouttype) | ||
4112 | { | ||
4113 | uint32_t *p; | ||
4114 | int num; | ||
4115 | |||
4116 | p = xdr_inline_decode(xdr, 4); | ||
4117 | if (unlikely(!p)) | ||
4118 | goto out_overflow; | ||
4119 | num = be32_to_cpup(p); | ||
4120 | |||
4121 | /* pNFS is not supported by the underlying file system */ | ||
4122 | if (num == 0) { | ||
4123 | *layouttype = 0; | ||
4124 | return 0; | ||
4125 | } | ||
4126 | if (num > 1) | ||
4127 | printk(KERN_INFO "%s: Warning: Multiple pNFS layout drivers " | ||
4128 | "per filesystem not supported\n", __func__); | ||
4129 | |||
4130 | /* Decode and set first layout type, move xdr->p past unused types */ | ||
4131 | p = xdr_inline_decode(xdr, num * 4); | ||
4132 | if (unlikely(!p)) | ||
4133 | goto out_overflow; | ||
4134 | *layouttype = be32_to_cpup(p); | ||
4135 | return 0; | ||
4136 | out_overflow: | ||
4137 | print_overflow_msg(__func__, xdr); | ||
4138 | return -EIO; | ||
4139 | } | ||
4140 | |||
4141 | /* | ||
4142 | * The type of file system exported. | ||
4143 | * Note we must ensure that layouttype is set in any non-error case. | ||
4144 | */ | ||
4145 | static int decode_attr_pnfstype(struct xdr_stream *xdr, uint32_t *bitmap, | ||
4146 | uint32_t *layouttype) | ||
4147 | { | ||
4148 | int status = 0; | ||
4149 | |||
4150 | dprintk("%s: bitmap is %x\n", __func__, bitmap[1]); | ||
4151 | if (unlikely(bitmap[1] & (FATTR4_WORD1_FS_LAYOUT_TYPES - 1U))) | ||
4152 | return -EIO; | ||
4153 | if (bitmap[1] & FATTR4_WORD1_FS_LAYOUT_TYPES) { | ||
4154 | status = decode_first_pnfs_layout_type(xdr, layouttype); | ||
4155 | bitmap[1] &= ~FATTR4_WORD1_FS_LAYOUT_TYPES; | ||
4156 | } else | ||
4157 | *layouttype = 0; | ||
4158 | return status; | ||
4159 | } | ||
3871 | 4160 | ||
3872 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | 4161 | static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) |
3873 | { | 4162 | { |
@@ -3894,6 +4183,12 @@ static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) | |||
3894 | if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) | 4183 | if ((status = decode_attr_maxwrite(xdr, bitmap, &fsinfo->wtmax)) != 0) |
3895 | goto xdr_error; | 4184 | goto xdr_error; |
3896 | fsinfo->wtpref = fsinfo->wtmax; | 4185 | fsinfo->wtpref = fsinfo->wtmax; |
4186 | status = decode_attr_time_delta(xdr, bitmap, &fsinfo->time_delta); | ||
4187 | if (status != 0) | ||
4188 | goto xdr_error; | ||
4189 | status = decode_attr_pnfstype(xdr, bitmap, &fsinfo->layouttype); | ||
4190 | if (status != 0) | ||
4191 | goto xdr_error; | ||
3897 | 4192 | ||
3898 | status = verify_attr_len(xdr, savep, attrlen); | 4193 | status = verify_attr_len(xdr, savep, attrlen); |
3899 | xdr_error: | 4194 | xdr_error: |
@@ -3950,13 +4245,13 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) | |||
3950 | __be32 *p; | 4245 | __be32 *p; |
3951 | uint32_t namelen, type; | 4246 | uint32_t namelen, type; |
3952 | 4247 | ||
3953 | p = xdr_inline_decode(xdr, 32); | 4248 | p = xdr_inline_decode(xdr, 32); /* read 32 bytes */ |
3954 | if (unlikely(!p)) | 4249 | if (unlikely(!p)) |
3955 | goto out_overflow; | 4250 | goto out_overflow; |
3956 | p = xdr_decode_hyper(p, &offset); | 4251 | p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */ |
3957 | p = xdr_decode_hyper(p, &length); | 4252 | p = xdr_decode_hyper(p, &length); |
3958 | type = be32_to_cpup(p++); | 4253 | type = be32_to_cpup(p++); /* 4 byte read */ |
3959 | if (fl != NULL) { | 4254 | if (fl != NULL) { /* manipulate file lock */ |
3960 | fl->fl_start = (loff_t)offset; | 4255 | fl->fl_start = (loff_t)offset; |
3961 | fl->fl_end = fl->fl_start + (loff_t)length - 1; | 4256 | fl->fl_end = fl->fl_start + (loff_t)length - 1; |
3962 | if (length == ~(uint64_t)0) | 4257 | if (length == ~(uint64_t)0) |
@@ -3966,9 +4261,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl) | |||
3966 | fl->fl_type = F_RDLCK; | 4261 | fl->fl_type = F_RDLCK; |
3967 | fl->fl_pid = 0; | 4262 | fl->fl_pid = 0; |
3968 | } | 4263 | } |
3969 | p = xdr_decode_hyper(p, &clientid); | 4264 | p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */ |
3970 | namelen = be32_to_cpup(p); | 4265 | namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */ |
3971 | p = xdr_inline_decode(xdr, namelen); | 4266 | p = xdr_inline_decode(xdr, namelen); /* variable size field */ |
3972 | if (likely(p)) | 4267 | if (likely(p)) |
3973 | return -NFS4ERR_DENIED; | 4268 | return -NFS4ERR_DENIED; |
3974 | out_overflow: | 4269 | out_overflow: |
@@ -4200,12 +4495,9 @@ out_overflow: | |||
4200 | static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) | 4495 | static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) |
4201 | { | 4496 | { |
4202 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; | 4497 | struct xdr_buf *rcvbuf = &req->rq_rcv_buf; |
4203 | struct page *page = *rcvbuf->pages; | ||
4204 | struct kvec *iov = rcvbuf->head; | 4498 | struct kvec *iov = rcvbuf->head; |
4205 | size_t hdrlen; | 4499 | size_t hdrlen; |
4206 | u32 recvd, pglen = rcvbuf->page_len; | 4500 | u32 recvd, pglen = rcvbuf->page_len; |
4207 | __be32 *end, *entry, *p, *kaddr; | ||
4208 | unsigned int nr = 0; | ||
4209 | int status; | 4501 | int status; |
4210 | 4502 | ||
4211 | status = decode_op_hdr(xdr, OP_READDIR); | 4503 | status = decode_op_hdr(xdr, OP_READDIR); |
@@ -4225,71 +4517,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n | |||
4225 | pglen = recvd; | 4517 | pglen = recvd; |
4226 | xdr_read_pages(xdr, pglen); | 4518 | xdr_read_pages(xdr, pglen); |
4227 | 4519 | ||
4228 | BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); | 4520 | |
4229 | kaddr = p = kmap_atomic(page, KM_USER0); | ||
4230 | end = p + ((pglen + readdir->pgbase) >> 2); | ||
4231 | entry = p; | ||
4232 | |||
4233 | /* Make sure the packet actually has a value_follows and EOF entry */ | ||
4234 | if ((entry + 1) > end) | ||
4235 | goto short_pkt; | ||
4236 | |||
4237 | for (; *p++; nr++) { | ||
4238 | u32 len, attrlen, xlen; | ||
4239 | if (end - p < 3) | ||
4240 | goto short_pkt; | ||
4241 | dprintk("cookie = %Lu, ", *((unsigned long long *)p)); | ||
4242 | p += 2; /* cookie */ | ||
4243 | len = ntohl(*p++); /* filename length */ | ||
4244 | if (len > NFS4_MAXNAMLEN) { | ||
4245 | dprintk("NFS: giant filename in readdir (len 0x%x)\n", | ||
4246 | len); | ||
4247 | goto err_unmap; | ||
4248 | } | ||
4249 | xlen = XDR_QUADLEN(len); | ||
4250 | if (end - p < xlen + 1) | ||
4251 | goto short_pkt; | ||
4252 | dprintk("filename = %*s\n", len, (char *)p); | ||
4253 | p += xlen; | ||
4254 | len = ntohl(*p++); /* bitmap length */ | ||
4255 | if (end - p < len + 1) | ||
4256 | goto short_pkt; | ||
4257 | p += len; | ||
4258 | attrlen = XDR_QUADLEN(ntohl(*p++)); | ||
4259 | if (end - p < attrlen + 2) | ||
4260 | goto short_pkt; | ||
4261 | p += attrlen; /* attributes */ | ||
4262 | entry = p; | ||
4263 | } | ||
4264 | /* | ||
4265 | * Apparently some server sends responses that are a valid size, but | ||
4266 | * contain no entries, and have value_follows==0 and EOF==0. For | ||
4267 | * those, just set the EOF marker. | ||
4268 | */ | ||
4269 | if (!nr && entry[1] == 0) { | ||
4270 | dprintk("NFS: readdir reply truncated!\n"); | ||
4271 | entry[1] = 1; | ||
4272 | } | ||
4273 | out: | ||
4274 | kunmap_atomic(kaddr, KM_USER0); | ||
4275 | return 0; | 4521 | return 0; |
4276 | short_pkt: | ||
4277 | /* | ||
4278 | * When we get a short packet there are 2 possibilities. We can | ||
4279 | * return an error, or fix up the response to look like a valid | ||
4280 | * response and return what we have so far. If there are no | ||
4281 | * entries and the packet was short, then return -EIO. If there | ||
4282 | * are valid entries in the response, return them and pretend that | ||
4283 | * the call was successful, but incomplete. The caller can retry the | ||
4284 | * readdir starting at the last cookie. | ||
4285 | */ | ||
4286 | dprintk("%s: short packet at entry %d\n", __func__, nr); | ||
4287 | entry[0] = entry[1] = 0; | ||
4288 | if (nr) | ||
4289 | goto out; | ||
4290 | err_unmap: | ||
4291 | kunmap_atomic(kaddr, KM_USER0); | ||
4292 | return -errno_NFSERR_IO; | ||
4293 | } | 4522 | } |
4294 | 4523 | ||
4295 | static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | 4524 | static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) |
@@ -4299,7 +4528,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
4299 | size_t hdrlen; | 4528 | size_t hdrlen; |
4300 | u32 len, recvd; | 4529 | u32 len, recvd; |
4301 | __be32 *p; | 4530 | __be32 *p; |
4302 | char *kaddr; | ||
4303 | int status; | 4531 | int status; |
4304 | 4532 | ||
4305 | status = decode_op_hdr(xdr, OP_READLINK); | 4533 | status = decode_op_hdr(xdr, OP_READLINK); |
@@ -4330,9 +4558,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) | |||
4330 | * and and null-terminate the text (the VFS expects | 4558 | * and and null-terminate the text (the VFS expects |
4331 | * null-termination). | 4559 | * null-termination). |
4332 | */ | 4560 | */ |
4333 | kaddr = (char *)kmap_atomic(rcvbuf->pages[0], KM_USER0); | 4561 | xdr_terminate_string(rcvbuf, len); |
4334 | kaddr[len+rcvbuf->page_base] = '\0'; | ||
4335 | kunmap_atomic(kaddr, KM_USER0); | ||
4336 | return 0; | 4562 | return 0; |
4337 | out_overflow: | 4563 | out_overflow: |
4338 | print_overflow_msg(__func__, xdr); | 4564 | print_overflow_msg(__func__, xdr); |
@@ -4668,7 +4894,6 @@ static int decode_sequence(struct xdr_stream *xdr, | |||
4668 | struct rpc_rqst *rqstp) | 4894 | struct rpc_rqst *rqstp) |
4669 | { | 4895 | { |
4670 | #if defined(CONFIG_NFS_V4_1) | 4896 | #if defined(CONFIG_NFS_V4_1) |
4671 | struct nfs4_slot *slot; | ||
4672 | struct nfs4_sessionid id; | 4897 | struct nfs4_sessionid id; |
4673 | u32 dummy; | 4898 | u32 dummy; |
4674 | int status; | 4899 | int status; |
@@ -4700,15 +4925,14 @@ static int decode_sequence(struct xdr_stream *xdr, | |||
4700 | goto out_overflow; | 4925 | goto out_overflow; |
4701 | 4926 | ||
4702 | /* seqid */ | 4927 | /* seqid */ |
4703 | slot = &res->sr_session->fc_slot_table.slots[res->sr_slotid]; | ||
4704 | dummy = be32_to_cpup(p++); | 4928 | dummy = be32_to_cpup(p++); |
4705 | if (dummy != slot->seq_nr) { | 4929 | if (dummy != res->sr_slot->seq_nr) { |
4706 | dprintk("%s Invalid sequence number\n", __func__); | 4930 | dprintk("%s Invalid sequence number\n", __func__); |
4707 | goto out_err; | 4931 | goto out_err; |
4708 | } | 4932 | } |
4709 | /* slot id */ | 4933 | /* slot id */ |
4710 | dummy = be32_to_cpup(p++); | 4934 | dummy = be32_to_cpup(p++); |
4711 | if (dummy != res->sr_slotid) { | 4935 | if (dummy != res->sr_slot - res->sr_session->fc_slot_table.slots) { |
4712 | dprintk("%s Invalid slot id\n", __func__); | 4936 | dprintk("%s Invalid slot id\n", __func__); |
4713 | goto out_err; | 4937 | goto out_err; |
4714 | } | 4938 | } |
@@ -4731,6 +4955,134 @@ out_overflow: | |||
4731 | #endif /* CONFIG_NFS_V4_1 */ | 4955 | #endif /* CONFIG_NFS_V4_1 */ |
4732 | } | 4956 | } |
4733 | 4957 | ||
4958 | #if defined(CONFIG_NFS_V4_1) | ||
4959 | |||
4960 | static int decode_getdeviceinfo(struct xdr_stream *xdr, | ||
4961 | struct pnfs_device *pdev) | ||
4962 | { | ||
4963 | __be32 *p; | ||
4964 | uint32_t len, type; | ||
4965 | int status; | ||
4966 | |||
4967 | status = decode_op_hdr(xdr, OP_GETDEVICEINFO); | ||
4968 | if (status) { | ||
4969 | if (status == -ETOOSMALL) { | ||
4970 | p = xdr_inline_decode(xdr, 4); | ||
4971 | if (unlikely(!p)) | ||
4972 | goto out_overflow; | ||
4973 | pdev->mincount = be32_to_cpup(p); | ||
4974 | dprintk("%s: Min count too small. mincnt = %u\n", | ||
4975 | __func__, pdev->mincount); | ||
4976 | } | ||
4977 | return status; | ||
4978 | } | ||
4979 | |||
4980 | p = xdr_inline_decode(xdr, 8); | ||
4981 | if (unlikely(!p)) | ||
4982 | goto out_overflow; | ||
4983 | type = be32_to_cpup(p++); | ||
4984 | if (type != pdev->layout_type) { | ||
4985 | dprintk("%s: layout mismatch req: %u pdev: %u\n", | ||
4986 | __func__, pdev->layout_type, type); | ||
4987 | return -EINVAL; | ||
4988 | } | ||
4989 | /* | ||
4990 | * Get the length of the opaque device_addr4. xdr_read_pages places | ||
4991 | * the opaque device_addr4 in the xdr_buf->pages (pnfs_device->pages) | ||
4992 | * and places the remaining xdr data in xdr_buf->tail | ||
4993 | */ | ||
4994 | pdev->mincount = be32_to_cpup(p); | ||
4995 | xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ | ||
4996 | |||
4997 | /* Parse notification bitmap, verifying that it is zero. */ | ||
4998 | p = xdr_inline_decode(xdr, 4); | ||
4999 | if (unlikely(!p)) | ||
5000 | goto out_overflow; | ||
5001 | len = be32_to_cpup(p); | ||
5002 | if (len) { | ||
5003 | int i; | ||
5004 | |||
5005 | p = xdr_inline_decode(xdr, 4 * len); | ||
5006 | if (unlikely(!p)) | ||
5007 | goto out_overflow; | ||
5008 | for (i = 0; i < len; i++, p++) { | ||
5009 | if (be32_to_cpup(p)) { | ||
5010 | dprintk("%s: notifications not supported\n", | ||
5011 | __func__); | ||
5012 | return -EIO; | ||
5013 | } | ||
5014 | } | ||
5015 | } | ||
5016 | return 0; | ||
5017 | out_overflow: | ||
5018 | print_overflow_msg(__func__, xdr); | ||
5019 | return -EIO; | ||
5020 | } | ||
5021 | |||
5022 | static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, | ||
5023 | struct nfs4_layoutget_res *res) | ||
5024 | { | ||
5025 | __be32 *p; | ||
5026 | int status; | ||
5027 | u32 layout_count; | ||
5028 | |||
5029 | status = decode_op_hdr(xdr, OP_LAYOUTGET); | ||
5030 | if (status) | ||
5031 | return status; | ||
5032 | p = xdr_inline_decode(xdr, 8 + NFS4_STATEID_SIZE); | ||
5033 | if (unlikely(!p)) | ||
5034 | goto out_overflow; | ||
5035 | res->return_on_close = be32_to_cpup(p++); | ||
5036 | p = xdr_decode_opaque_fixed(p, res->stateid.data, NFS4_STATEID_SIZE); | ||
5037 | layout_count = be32_to_cpup(p); | ||
5038 | if (!layout_count) { | ||
5039 | dprintk("%s: server responded with empty layout array\n", | ||
5040 | __func__); | ||
5041 | return -EINVAL; | ||
5042 | } | ||
5043 | |||
5044 | p = xdr_inline_decode(xdr, 24); | ||
5045 | if (unlikely(!p)) | ||
5046 | goto out_overflow; | ||
5047 | p = xdr_decode_hyper(p, &res->range.offset); | ||
5048 | p = xdr_decode_hyper(p, &res->range.length); | ||
5049 | res->range.iomode = be32_to_cpup(p++); | ||
5050 | res->type = be32_to_cpup(p++); | ||
5051 | |||
5052 | status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p); | ||
5053 | if (unlikely(status)) | ||
5054 | return status; | ||
5055 | |||
5056 | dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", | ||
5057 | __func__, | ||
5058 | (unsigned long)res->range.offset, | ||
5059 | (unsigned long)res->range.length, | ||
5060 | res->range.iomode, | ||
5061 | res->type, | ||
5062 | res->layout.len); | ||
5063 | |||
5064 | /* nfs4_proc_layoutget allocated a single page */ | ||
5065 | if (res->layout.len > PAGE_SIZE) | ||
5066 | return -ENOMEM; | ||
5067 | memcpy(res->layout.buf, p, res->layout.len); | ||
5068 | |||
5069 | if (layout_count > 1) { | ||
5070 | /* We only handle a length one array at the moment. Any | ||
5071 | * further entries are just ignored. Note that this means | ||
5072 | * the client may see a response that is less than the | ||
5073 | * minimum it requested. | ||
5074 | */ | ||
5075 | dprintk("%s: server responded with %d layouts, dropping tail\n", | ||
5076 | __func__, layout_count); | ||
5077 | } | ||
5078 | |||
5079 | return 0; | ||
5080 | out_overflow: | ||
5081 | print_overflow_msg(__func__, xdr); | ||
5082 | return -EIO; | ||
5083 | } | ||
5084 | #endif /* CONFIG_NFS_V4_1 */ | ||
5085 | |||
4734 | /* | 5086 | /* |
4735 | * END OF "GENERIC" DECODE ROUTINES. | 5087 | * END OF "GENERIC" DECODE ROUTINES. |
4736 | */ | 5088 | */ |
@@ -4873,7 +5225,7 @@ out: | |||
4873 | /* | 5225 | /* |
4874 | * Decode RENAME response | 5226 | * Decode RENAME response |
4875 | */ | 5227 | */ |
4876 | static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs4_rename_res *res) | 5228 | static int nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, __be32 *p, struct nfs_renameres *res) |
4877 | { | 5229 | { |
4878 | struct xdr_stream xdr; | 5230 | struct xdr_stream xdr; |
4879 | struct compound_hdr hdr; | 5231 | struct compound_hdr hdr; |
@@ -5758,25 +6110,84 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp, uint32_t *p, | |||
5758 | status = decode_reclaim_complete(&xdr, (void *)NULL); | 6110 | status = decode_reclaim_complete(&xdr, (void *)NULL); |
5759 | return status; | 6111 | return status; |
5760 | } | 6112 | } |
6113 | |||
6114 | /* | ||
6115 | * Decode GETDEVINFO response | ||
6116 | */ | ||
6117 | static int nfs4_xdr_dec_getdeviceinfo(struct rpc_rqst *rqstp, uint32_t *p, | ||
6118 | struct nfs4_getdeviceinfo_res *res) | ||
6119 | { | ||
6120 | struct xdr_stream xdr; | ||
6121 | struct compound_hdr hdr; | ||
6122 | int status; | ||
6123 | |||
6124 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6125 | status = decode_compound_hdr(&xdr, &hdr); | ||
6126 | if (status != 0) | ||
6127 | goto out; | ||
6128 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6129 | if (status != 0) | ||
6130 | goto out; | ||
6131 | status = decode_getdeviceinfo(&xdr, res->pdev); | ||
6132 | out: | ||
6133 | return status; | ||
6134 | } | ||
6135 | |||
6136 | /* | ||
6137 | * Decode LAYOUTGET response | ||
6138 | */ | ||
6139 | static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp, uint32_t *p, | ||
6140 | struct nfs4_layoutget_res *res) | ||
6141 | { | ||
6142 | struct xdr_stream xdr; | ||
6143 | struct compound_hdr hdr; | ||
6144 | int status; | ||
6145 | |||
6146 | xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); | ||
6147 | status = decode_compound_hdr(&xdr, &hdr); | ||
6148 | if (status) | ||
6149 | goto out; | ||
6150 | status = decode_sequence(&xdr, &res->seq_res, rqstp); | ||
6151 | if (status) | ||
6152 | goto out; | ||
6153 | status = decode_putfh(&xdr); | ||
6154 | if (status) | ||
6155 | goto out; | ||
6156 | status = decode_layoutget(&xdr, rqstp, res); | ||
6157 | out: | ||
6158 | return status; | ||
6159 | } | ||
5761 | #endif /* CONFIG_NFS_V4_1 */ | 6160 | #endif /* CONFIG_NFS_V4_1 */ |
5762 | 6161 | ||
5763 | __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | 6162 | __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, |
6163 | struct nfs_server *server, int plus) | ||
5764 | { | 6164 | { |
5765 | uint32_t bitmap[2] = {0}; | 6165 | uint32_t bitmap[2] = {0}; |
5766 | uint32_t len; | 6166 | uint32_t len; |
5767 | 6167 | __be32 *p = xdr_inline_decode(xdr, 4); | |
5768 | if (!*p++) { | 6168 | if (unlikely(!p)) |
5769 | if (!*p) | 6169 | goto out_overflow; |
6170 | if (!ntohl(*p++)) { | ||
6171 | p = xdr_inline_decode(xdr, 4); | ||
6172 | if (unlikely(!p)) | ||
6173 | goto out_overflow; | ||
6174 | if (!ntohl(*p++)) | ||
5770 | return ERR_PTR(-EAGAIN); | 6175 | return ERR_PTR(-EAGAIN); |
5771 | entry->eof = 1; | 6176 | entry->eof = 1; |
5772 | return ERR_PTR(-EBADCOOKIE); | 6177 | return ERR_PTR(-EBADCOOKIE); |
5773 | } | 6178 | } |
5774 | 6179 | ||
6180 | p = xdr_inline_decode(xdr, 12); | ||
6181 | if (unlikely(!p)) | ||
6182 | goto out_overflow; | ||
5775 | entry->prev_cookie = entry->cookie; | 6183 | entry->prev_cookie = entry->cookie; |
5776 | p = xdr_decode_hyper(p, &entry->cookie); | 6184 | p = xdr_decode_hyper(p, &entry->cookie); |
5777 | entry->len = ntohl(*p++); | 6185 | entry->len = ntohl(*p++); |
6186 | |||
6187 | p = xdr_inline_decode(xdr, entry->len); | ||
6188 | if (unlikely(!p)) | ||
6189 | goto out_overflow; | ||
5778 | entry->name = (const char *) p; | 6190 | entry->name = (const char *) p; |
5779 | p += XDR_QUADLEN(entry->len); | ||
5780 | 6191 | ||
5781 | /* | 6192 | /* |
5782 | * In case the server doesn't return an inode number, | 6193 | * In case the server doesn't return an inode number, |
@@ -5784,32 +6195,33 @@ __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus) | |||
5784 | * since glibc seems to choke on it...) | 6195 | * since glibc seems to choke on it...) |
5785 | */ | 6196 | */ |
5786 | entry->ino = 1; | 6197 | entry->ino = 1; |
6198 | entry->fattr->valid = 0; | ||
5787 | 6199 | ||
5788 | len = ntohl(*p++); /* bitmap length */ | 6200 | if (decode_attr_bitmap(xdr, bitmap) < 0) |
5789 | if (len-- > 0) { | 6201 | goto out_overflow; |
5790 | bitmap[0] = ntohl(*p++); | 6202 | |
5791 | if (len-- > 0) { | 6203 | if (decode_attr_length(xdr, &len, &p) < 0) |
5792 | bitmap[1] = ntohl(*p++); | 6204 | goto out_overflow; |
5793 | p += len; | 6205 | |
5794 | } | 6206 | if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, server, 1) < 0) |
5795 | } | 6207 | goto out_overflow; |
5796 | len = XDR_QUADLEN(ntohl(*p++)); /* attribute buffer length */ | 6208 | if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) |
5797 | if (len > 0) { | 6209 | entry->ino = entry->fattr->fileid; |
5798 | if (bitmap[0] & FATTR4_WORD0_RDATTR_ERROR) { | 6210 | |
5799 | bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; | 6211 | if (verify_attr_len(xdr, p, len) < 0) |
5800 | /* Ignore the return value of rdattr_error for now */ | 6212 | goto out_overflow; |
5801 | p++; | 6213 | |
5802 | len--; | 6214 | p = xdr_inline_peek(xdr, 8); |
5803 | } | 6215 | if (p != NULL) |
5804 | if (bitmap[0] == 0 && bitmap[1] == FATTR4_WORD1_MOUNTED_ON_FILEID) | 6216 | entry->eof = !p[0] && p[1]; |
5805 | xdr_decode_hyper(p, &entry->ino); | 6217 | else |
5806 | else if (bitmap[0] == FATTR4_WORD0_FILEID) | 6218 | entry->eof = 0; |
5807 | xdr_decode_hyper(p, &entry->ino); | ||
5808 | p += len; | ||
5809 | } | ||
5810 | 6219 | ||
5811 | entry->eof = !p[0] && p[1]; | ||
5812 | return p; | 6220 | return p; |
6221 | |||
6222 | out_overflow: | ||
6223 | print_overflow_msg(__func__, xdr); | ||
6224 | return ERR_PTR(-EIO); | ||
5813 | } | 6225 | } |
5814 | 6226 | ||
5815 | /* | 6227 | /* |
@@ -5936,6 +6348,8 @@ struct rpc_procinfo nfs4_procedures[] = { | |||
5936 | PROC(SEQUENCE, enc_sequence, dec_sequence), | 6348 | PROC(SEQUENCE, enc_sequence, dec_sequence), |
5937 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), | 6349 | PROC(GET_LEASE_TIME, enc_get_lease_time, dec_get_lease_time), |
5938 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), | 6350 | PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), |
6351 | PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), | ||
6352 | PROC(LAYOUTGET, enc_layoutget, dec_layoutget), | ||
5939 | #endif /* CONFIG_NFS_V4_1 */ | 6353 | #endif /* CONFIG_NFS_V4_1 */ |
5940 | }; | 6354 | }; |
5941 | 6355 | ||
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index df101d9f546a..903908a20023 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
@@ -3,9 +3,10 @@ | |||
3 | * | 3 | * |
4 | * Allow an NFS filesystem to be mounted as root. The way this works is: | 4 | * Allow an NFS filesystem to be mounted as root. The way this works is: |
5 | * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. | 5 | * (1) Use the IP autoconfig mechanism to set local IP addresses and routes. |
6 | * (2) Handle RPC negotiation with the system which replied to RARP or | 6 | * (2) Construct the device string and the options string using DHCP |
7 | * was reported as a boot server by BOOTP or manually. | 7 | * option 17 and/or kernel command line options. |
8 | * (3) The actual mounting is done later, when init() is running. | 8 | * (3) When mount_root() sets up the root file system, pass these strings |
9 | * to the NFS client's regular mount interface via sys_mount(). | ||
9 | * | 10 | * |
10 | * | 11 | * |
11 | * Changes: | 12 | * Changes: |
@@ -65,470 +66,245 @@ | |||
65 | * Hua Qin : Support for mounting root file system via | 66 | * Hua Qin : Support for mounting root file system via |
66 | * NFS over TCP. | 67 | * NFS over TCP. |
67 | * Fabian Frederick: Option parser rebuilt (using parser lib) | 68 | * Fabian Frederick: Option parser rebuilt (using parser lib) |
68 | */ | 69 | * Chuck Lever : Use super.c's text-based mount option parsing |
70 | * Chuck Lever : Add "nfsrootdebug". | ||
71 | */ | ||
69 | 72 | ||
70 | #include <linux/types.h> | 73 | #include <linux/types.h> |
71 | #include <linux/string.h> | 74 | #include <linux/string.h> |
72 | #include <linux/kernel.h> | ||
73 | #include <linux/time.h> | ||
74 | #include <linux/fs.h> | ||
75 | #include <linux/init.h> | 75 | #include <linux/init.h> |
76 | #include <linux/sunrpc/clnt.h> | ||
77 | #include <linux/sunrpc/xprtsock.h> | ||
78 | #include <linux/nfs.h> | 76 | #include <linux/nfs.h> |
79 | #include <linux/nfs_fs.h> | 77 | #include <linux/nfs_fs.h> |
80 | #include <linux/nfs_mount.h> | ||
81 | #include <linux/in.h> | ||
82 | #include <linux/major.h> | ||
83 | #include <linux/utsname.h> | 78 | #include <linux/utsname.h> |
84 | #include <linux/inet.h> | ||
85 | #include <linux/root_dev.h> | 79 | #include <linux/root_dev.h> |
86 | #include <net/ipconfig.h> | 80 | #include <net/ipconfig.h> |
87 | #include <linux/parser.h> | ||
88 | 81 | ||
89 | #include "internal.h" | 82 | #include "internal.h" |
90 | 83 | ||
91 | /* Define this to allow debugging output */ | ||
92 | #undef NFSROOT_DEBUG | ||
93 | #define NFSDBG_FACILITY NFSDBG_ROOT | 84 | #define NFSDBG_FACILITY NFSDBG_ROOT |
94 | 85 | ||
95 | /* Default port to use if server is not running a portmapper */ | ||
96 | #define NFS_MNT_PORT 627 | ||
97 | |||
98 | /* Default path we try to mount. "%s" gets replaced by our IP address */ | 86 | /* Default path we try to mount. "%s" gets replaced by our IP address */ |
99 | #define NFS_ROOT "/tftpboot/%s" | 87 | #define NFS_ROOT "/tftpboot/%s" |
100 | 88 | ||
101 | /* Parameters passed from the kernel command line */ | 89 | /* Parameters passed from the kernel command line */ |
102 | static char nfs_root_name[256] __initdata = ""; | 90 | static char nfs_root_parms[256] __initdata = ""; |
91 | |||
92 | /* Text-based mount options passed to super.c */ | ||
93 | static char nfs_root_options[256] __initdata = ""; | ||
103 | 94 | ||
104 | /* Address of NFS server */ | 95 | /* Address of NFS server */ |
105 | static __be32 servaddr __initdata = 0; | 96 | static __be32 servaddr __initdata = htonl(INADDR_NONE); |
106 | 97 | ||
107 | /* Name of directory to mount */ | 98 | /* Name of directory to mount */ |
108 | static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = { 0, }; | 99 | static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = ""; |
109 | |||
110 | /* NFS-related data */ | ||
111 | static struct nfs_mount_data nfs_data __initdata = { 0, };/* NFS mount info */ | ||
112 | static int nfs_port __initdata = 0; /* Port to connect to for NFS */ | ||
113 | static int mount_port __initdata = 0; /* Mount daemon port number */ | ||
114 | |||
115 | |||
116 | /*************************************************************************** | ||
117 | |||
118 | Parsing of options | ||
119 | |||
120 | ***************************************************************************/ | ||
121 | |||
122 | enum { | ||
123 | /* Options that take integer arguments */ | ||
124 | Opt_port, Opt_rsize, Opt_wsize, Opt_timeo, Opt_retrans, Opt_acregmin, | ||
125 | Opt_acregmax, Opt_acdirmin, Opt_acdirmax, | ||
126 | /* Options that take no arguments */ | ||
127 | Opt_soft, Opt_hard, Opt_intr, | ||
128 | Opt_nointr, Opt_posix, Opt_noposix, Opt_cto, Opt_nocto, Opt_ac, | ||
129 | Opt_noac, Opt_lock, Opt_nolock, Opt_v2, Opt_v3, Opt_udp, Opt_tcp, | ||
130 | Opt_acl, Opt_noacl, | ||
131 | /* Error token */ | ||
132 | Opt_err | ||
133 | }; | ||
134 | |||
135 | static const match_table_t tokens __initconst = { | ||
136 | {Opt_port, "port=%u"}, | ||
137 | {Opt_rsize, "rsize=%u"}, | ||
138 | {Opt_wsize, "wsize=%u"}, | ||
139 | {Opt_timeo, "timeo=%u"}, | ||
140 | {Opt_retrans, "retrans=%u"}, | ||
141 | {Opt_acregmin, "acregmin=%u"}, | ||
142 | {Opt_acregmax, "acregmax=%u"}, | ||
143 | {Opt_acdirmin, "acdirmin=%u"}, | ||
144 | {Opt_acdirmax, "acdirmax=%u"}, | ||
145 | {Opt_soft, "soft"}, | ||
146 | {Opt_hard, "hard"}, | ||
147 | {Opt_intr, "intr"}, | ||
148 | {Opt_nointr, "nointr"}, | ||
149 | {Opt_posix, "posix"}, | ||
150 | {Opt_noposix, "noposix"}, | ||
151 | {Opt_cto, "cto"}, | ||
152 | {Opt_nocto, "nocto"}, | ||
153 | {Opt_ac, "ac"}, | ||
154 | {Opt_noac, "noac"}, | ||
155 | {Opt_lock, "lock"}, | ||
156 | {Opt_nolock, "nolock"}, | ||
157 | {Opt_v2, "nfsvers=2"}, | ||
158 | {Opt_v2, "v2"}, | ||
159 | {Opt_v3, "nfsvers=3"}, | ||
160 | {Opt_v3, "v3"}, | ||
161 | {Opt_udp, "proto=udp"}, | ||
162 | {Opt_udp, "udp"}, | ||
163 | {Opt_tcp, "proto=tcp"}, | ||
164 | {Opt_tcp, "tcp"}, | ||
165 | {Opt_acl, "acl"}, | ||
166 | {Opt_noacl, "noacl"}, | ||
167 | {Opt_err, NULL} | ||
168 | |||
169 | }; | ||
170 | 100 | ||
101 | /* server:export path string passed to super.c */ | ||
102 | static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = ""; | ||
103 | |||
104 | #ifdef RPC_DEBUG | ||
171 | /* | 105 | /* |
172 | * Parse option string. | 106 | * When the "nfsrootdebug" kernel command line option is specified, |
107 | * enable debugging messages for NFSROOT. | ||
173 | */ | 108 | */ |
174 | 109 | static int __init nfs_root_debug(char *__unused) | |
175 | static int __init root_nfs_parse(char *name, char *buf) | ||
176 | { | 110 | { |
177 | 111 | nfs_debug |= NFSDBG_ROOT | NFSDBG_MOUNT; | |
178 | char *p; | ||
179 | substring_t args[MAX_OPT_ARGS]; | ||
180 | int option; | ||
181 | |||
182 | if (!name) | ||
183 | return 1; | ||
184 | |||
185 | /* Set the NFS remote path */ | ||
186 | p = strsep(&name, ","); | ||
187 | if (p[0] != '\0' && strcmp(p, "default") != 0) | ||
188 | strlcpy(buf, p, NFS_MAXPATHLEN); | ||
189 | |||
190 | while ((p = strsep (&name, ",")) != NULL) { | ||
191 | int token; | ||
192 | if (!*p) | ||
193 | continue; | ||
194 | token = match_token(p, tokens, args); | ||
195 | |||
196 | /* %u tokens only. Beware if you add new tokens! */ | ||
197 | if (token < Opt_soft && match_int(&args[0], &option)) | ||
198 | return 0; | ||
199 | switch (token) { | ||
200 | case Opt_port: | ||
201 | nfs_port = option; | ||
202 | break; | ||
203 | case Opt_rsize: | ||
204 | nfs_data.rsize = option; | ||
205 | break; | ||
206 | case Opt_wsize: | ||
207 | nfs_data.wsize = option; | ||
208 | break; | ||
209 | case Opt_timeo: | ||
210 | nfs_data.timeo = option; | ||
211 | break; | ||
212 | case Opt_retrans: | ||
213 | nfs_data.retrans = option; | ||
214 | break; | ||
215 | case Opt_acregmin: | ||
216 | nfs_data.acregmin = option; | ||
217 | break; | ||
218 | case Opt_acregmax: | ||
219 | nfs_data.acregmax = option; | ||
220 | break; | ||
221 | case Opt_acdirmin: | ||
222 | nfs_data.acdirmin = option; | ||
223 | break; | ||
224 | case Opt_acdirmax: | ||
225 | nfs_data.acdirmax = option; | ||
226 | break; | ||
227 | case Opt_soft: | ||
228 | nfs_data.flags |= NFS_MOUNT_SOFT; | ||
229 | break; | ||
230 | case Opt_hard: | ||
231 | nfs_data.flags &= ~NFS_MOUNT_SOFT; | ||
232 | break; | ||
233 | case Opt_intr: | ||
234 | case Opt_nointr: | ||
235 | break; | ||
236 | case Opt_posix: | ||
237 | nfs_data.flags |= NFS_MOUNT_POSIX; | ||
238 | break; | ||
239 | case Opt_noposix: | ||
240 | nfs_data.flags &= ~NFS_MOUNT_POSIX; | ||
241 | break; | ||
242 | case Opt_cto: | ||
243 | nfs_data.flags &= ~NFS_MOUNT_NOCTO; | ||
244 | break; | ||
245 | case Opt_nocto: | ||
246 | nfs_data.flags |= NFS_MOUNT_NOCTO; | ||
247 | break; | ||
248 | case Opt_ac: | ||
249 | nfs_data.flags &= ~NFS_MOUNT_NOAC; | ||
250 | break; | ||
251 | case Opt_noac: | ||
252 | nfs_data.flags |= NFS_MOUNT_NOAC; | ||
253 | break; | ||
254 | case Opt_lock: | ||
255 | nfs_data.flags &= ~NFS_MOUNT_NONLM; | ||
256 | break; | ||
257 | case Opt_nolock: | ||
258 | nfs_data.flags |= NFS_MOUNT_NONLM; | ||
259 | break; | ||
260 | case Opt_v2: | ||
261 | nfs_data.flags &= ~NFS_MOUNT_VER3; | ||
262 | break; | ||
263 | case Opt_v3: | ||
264 | nfs_data.flags |= NFS_MOUNT_VER3; | ||
265 | break; | ||
266 | case Opt_udp: | ||
267 | nfs_data.flags &= ~NFS_MOUNT_TCP; | ||
268 | break; | ||
269 | case Opt_tcp: | ||
270 | nfs_data.flags |= NFS_MOUNT_TCP; | ||
271 | break; | ||
272 | case Opt_acl: | ||
273 | nfs_data.flags &= ~NFS_MOUNT_NOACL; | ||
274 | break; | ||
275 | case Opt_noacl: | ||
276 | nfs_data.flags |= NFS_MOUNT_NOACL; | ||
277 | break; | ||
278 | default: | ||
279 | printk(KERN_WARNING "Root-NFS: unknown " | ||
280 | "option: %s\n", p); | ||
281 | return 0; | ||
282 | } | ||
283 | } | ||
284 | |||
285 | return 1; | 112 | return 1; |
286 | } | 113 | } |
287 | 114 | ||
115 | __setup("nfsrootdebug", nfs_root_debug); | ||
116 | #endif | ||
117 | |||
288 | /* | 118 | /* |
289 | * Prepare the NFS data structure and parse all options. | 119 | * Parse NFS server and directory information passed on the kernel |
120 | * command line. | ||
121 | * | ||
122 | * nfsroot=[<server-ip>:]<root-dir>[,<nfs-options>] | ||
123 | * | ||
124 | * If there is a "%s" token in the <root-dir> string, it is replaced | ||
125 | * by the ASCII-representation of the client's IP address. | ||
290 | */ | 126 | */ |
291 | static int __init root_nfs_name(char *name) | 127 | static int __init nfs_root_setup(char *line) |
292 | { | 128 | { |
293 | static char buf[NFS_MAXPATHLEN] __initdata; | 129 | ROOT_DEV = Root_NFS; |
294 | char *cp; | 130 | |
295 | 131 | if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { | |
296 | /* Set some default values */ | 132 | strlcpy(nfs_root_parms, line, sizeof(nfs_root_parms)); |
297 | memset(&nfs_data, 0, sizeof(nfs_data)); | 133 | } else { |
298 | nfs_port = -1; | 134 | size_t n = strlen(line) + sizeof(NFS_ROOT) - 1; |
299 | nfs_data.version = NFS_MOUNT_VERSION; | 135 | if (n >= sizeof(nfs_root_parms)) |
300 | nfs_data.flags = NFS_MOUNT_NONLM; /* No lockd in nfs root yet */ | 136 | line[sizeof(nfs_root_parms) - sizeof(NFS_ROOT) - 2] = '\0'; |
301 | nfs_data.rsize = NFS_DEF_FILE_IO_SIZE; | 137 | sprintf(nfs_root_parms, NFS_ROOT, line); |
302 | nfs_data.wsize = NFS_DEF_FILE_IO_SIZE; | ||
303 | nfs_data.acregmin = NFS_DEF_ACREGMIN; | ||
304 | nfs_data.acregmax = NFS_DEF_ACREGMAX; | ||
305 | nfs_data.acdirmin = NFS_DEF_ACDIRMIN; | ||
306 | nfs_data.acdirmax = NFS_DEF_ACDIRMAX; | ||
307 | strcpy(buf, NFS_ROOT); | ||
308 | |||
309 | /* Process options received from the remote server */ | ||
310 | root_nfs_parse(root_server_path, buf); | ||
311 | |||
312 | /* Override them by options set on kernel command-line */ | ||
313 | root_nfs_parse(name, buf); | ||
314 | |||
315 | cp = utsname()->nodename; | ||
316 | if (strlen(buf) + strlen(cp) > NFS_MAXPATHLEN) { | ||
317 | printk(KERN_ERR "Root-NFS: Pathname for remote directory too long.\n"); | ||
318 | return -1; | ||
319 | } | 138 | } |
320 | sprintf(nfs_export_path, buf, cp); | 139 | |
140 | /* | ||
141 | * Extract the IP address of the NFS server containing our | ||
142 | * root file system, if one was specified. | ||
143 | * | ||
144 | * Note: root_nfs_parse_addr() removes the server-ip from | ||
145 | * nfs_root_parms, if it exists. | ||
146 | */ | ||
147 | root_server_addr = root_nfs_parse_addr(nfs_root_parms); | ||
321 | 148 | ||
322 | return 1; | 149 | return 1; |
323 | } | 150 | } |
324 | 151 | ||
152 | __setup("nfsroot=", nfs_root_setup); | ||
325 | 153 | ||
326 | /* | 154 | static int __init root_nfs_copy(char *dest, const char *src, |
327 | * Get NFS server address. | 155 | const size_t destlen) |
328 | */ | ||
329 | static int __init root_nfs_addr(void) | ||
330 | { | 156 | { |
331 | if ((servaddr = root_server_addr) == htonl(INADDR_NONE)) { | 157 | if (strlcpy(dest, src, destlen) > destlen) |
332 | printk(KERN_ERR "Root-NFS: No NFS server available, giving up.\n"); | ||
333 | return -1; | 158 | return -1; |
334 | } | 159 | return 0; |
160 | } | ||
335 | 161 | ||
336 | snprintf(nfs_data.hostname, sizeof(nfs_data.hostname), | 162 | static int __init root_nfs_cat(char *dest, const char *src, |
337 | "%pI4", &servaddr); | 163 | const size_t destlen) |
164 | { | ||
165 | if (strlcat(dest, src, destlen) > destlen) | ||
166 | return -1; | ||
338 | return 0; | 167 | return 0; |
339 | } | 168 | } |
340 | 169 | ||
341 | /* | 170 | /* |
342 | * Tell the user what's going on. | 171 | * Parse out root export path and mount options from |
172 | * passed-in string @incoming. | ||
173 | * | ||
174 | * Copy the export path into @exppath. | ||
343 | */ | 175 | */ |
344 | #ifdef NFSROOT_DEBUG | 176 | static int __init root_nfs_parse_options(char *incoming, char *exppath, |
345 | static void __init root_nfs_print(void) | 177 | const size_t exppathlen) |
346 | { | 178 | { |
347 | printk(KERN_NOTICE "Root-NFS: Mounting %s on server %s as root\n", | 179 | char *p; |
348 | nfs_export_path, nfs_data.hostname); | ||
349 | printk(KERN_NOTICE "Root-NFS: rsize = %d, wsize = %d, timeo = %d, retrans = %d\n", | ||
350 | nfs_data.rsize, nfs_data.wsize, nfs_data.timeo, nfs_data.retrans); | ||
351 | printk(KERN_NOTICE "Root-NFS: acreg (min,max) = (%d,%d), acdir (min,max) = (%d,%d)\n", | ||
352 | nfs_data.acregmin, nfs_data.acregmax, | ||
353 | nfs_data.acdirmin, nfs_data.acdirmax); | ||
354 | printk(KERN_NOTICE "Root-NFS: nfsd port = %d, mountd port = %d, flags = %08x\n", | ||
355 | nfs_port, mount_port, nfs_data.flags); | ||
356 | } | ||
357 | #endif | ||
358 | |||
359 | 180 | ||
360 | static int __init root_nfs_init(void) | 181 | /* |
361 | { | 182 | * Set the NFS remote path |
362 | #ifdef NFSROOT_DEBUG | 183 | */ |
363 | nfs_debug |= NFSDBG_ROOT; | 184 | p = strsep(&incoming, ","); |
364 | #endif | 185 | if (*p != '\0' && strcmp(p, "default") != 0) |
186 | if (root_nfs_copy(exppath, p, exppathlen)) | ||
187 | return -1; | ||
365 | 188 | ||
366 | /* | 189 | /* |
367 | * Decode the root directory path name and NFS options from | 190 | * @incoming now points to the rest of the string; if it |
368 | * the kernel command line. This has to go here in order to | 191 | * contains something, append it to our root options buffer |
369 | * be able to use the client IP address for the remote root | ||
370 | * directory (necessary for pure RARP booting). | ||
371 | */ | 192 | */ |
372 | if (root_nfs_name(nfs_root_name) < 0 || | 193 | if (incoming != NULL && *incoming != '\0') |
373 | root_nfs_addr() < 0) | 194 | if (root_nfs_cat(nfs_root_options, incoming, |
374 | return -1; | 195 | sizeof(nfs_root_options))) |
196 | return -1; | ||
375 | 197 | ||
376 | #ifdef NFSROOT_DEBUG | 198 | /* |
377 | root_nfs_print(); | 199 | * Possibly prepare for more options to be appended |
378 | #endif | 200 | */ |
201 | if (nfs_root_options[0] != '\0' && | ||
202 | nfs_root_options[strlen(nfs_root_options)] != ',') | ||
203 | if (root_nfs_cat(nfs_root_options, ",", | ||
204 | sizeof(nfs_root_options))) | ||
205 | return -1; | ||
379 | 206 | ||
380 | return 0; | 207 | return 0; |
381 | } | 208 | } |
382 | 209 | ||
383 | |||
384 | /* | 210 | /* |
385 | * Parse NFS server and directory information passed on the kernel | 211 | * Decode the export directory path name and NFS options from |
386 | * command line. | 212 | * the kernel command line. This has to be done late in order to |
213 | * use a dynamically acquired client IP address for the remote | ||
214 | * root directory path. | ||
215 | * | ||
216 | * Returns zero if successful; otherwise -1 is returned. | ||
387 | */ | 217 | */ |
388 | static int __init nfs_root_setup(char *line) | 218 | static int __init root_nfs_data(char *cmdline) |
389 | { | 219 | { |
390 | ROOT_DEV = Root_NFS; | 220 | char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; |
391 | if (line[0] == '/' || line[0] == ',' || (line[0] >= '0' && line[0] <= '9')) { | 221 | int len, retval = -1; |
392 | strlcpy(nfs_root_name, line, sizeof(nfs_root_name)); | 222 | char *tmp = NULL; |
393 | } else { | 223 | const size_t tmplen = sizeof(nfs_export_path); |
394 | int n = strlen(line) + sizeof(NFS_ROOT) - 1; | 224 | |
395 | if (n >= sizeof(nfs_root_name)) | 225 | tmp = kzalloc(tmplen, GFP_KERNEL); |
396 | line[sizeof(nfs_root_name) - sizeof(NFS_ROOT) - 2] = '\0'; | 226 | if (tmp == NULL) |
397 | sprintf(nfs_root_name, NFS_ROOT, line); | 227 | goto out_nomem; |
228 | strcpy(tmp, NFS_ROOT); | ||
229 | |||
230 | if (root_server_path[0] != '\0') { | ||
231 | dprintk("Root-NFS: DHCPv4 option 17: %s\n", | ||
232 | root_server_path); | ||
233 | if (root_nfs_parse_options(root_server_path, tmp, tmplen)) | ||
234 | goto out_optionstoolong; | ||
398 | } | 235 | } |
399 | root_server_addr = root_nfs_parse_addr(nfs_root_name); | ||
400 | return 1; | ||
401 | } | ||
402 | |||
403 | __setup("nfsroot=", nfs_root_setup); | ||
404 | |||
405 | /*************************************************************************** | ||
406 | 236 | ||
407 | Routines to actually mount the root directory | 237 | if (cmdline[0] != '\0') { |
238 | dprintk("Root-NFS: nfsroot=%s\n", cmdline); | ||
239 | if (root_nfs_parse_options(cmdline, tmp, tmplen)) | ||
240 | goto out_optionstoolong; | ||
241 | } | ||
408 | 242 | ||
409 | ***************************************************************************/ | 243 | /* |
244 | * Append mandatory options for nfsroot so they override | ||
245 | * what has come before | ||
246 | */ | ||
247 | snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", | ||
248 | &servaddr); | ||
249 | if (root_nfs_cat(nfs_root_options, addr_option, | ||
250 | sizeof(nfs_root_options))) | ||
251 | goto out_optionstoolong; | ||
410 | 252 | ||
411 | /* | 253 | /* |
412 | * Construct sockaddr_in from address and port number. | 254 | * Set up nfs_root_device. For NFS mounts, this looks like |
413 | */ | 255 | * |
414 | static inline void | 256 | * server:/path |
415 | set_sockaddr(struct sockaddr_in *sin, __be32 addr, __be16 port) | 257 | * |
416 | { | 258 | * At this point, utsname()->nodename contains our local |
417 | sin->sin_family = AF_INET; | 259 | * IP address or hostname, set by ipconfig. If "%s" exists |
418 | sin->sin_addr.s_addr = addr; | 260 | * in tmp, substitute the nodename, then shovel the whole |
419 | sin->sin_port = port; | 261 | * mess into nfs_root_device. |
420 | } | 262 | */ |
263 | len = snprintf(nfs_export_path, sizeof(nfs_export_path), | ||
264 | tmp, utsname()->nodename); | ||
265 | if (len > (int)sizeof(nfs_export_path)) | ||
266 | goto out_devnametoolong; | ||
267 | len = snprintf(nfs_root_device, sizeof(nfs_root_device), | ||
268 | "%pI4:%s", &servaddr, nfs_export_path); | ||
269 | if (len > (int)sizeof(nfs_root_device)) | ||
270 | goto out_devnametoolong; | ||
421 | 271 | ||
422 | /* | 272 | retval = 0; |
423 | * Query server portmapper for the port of a daemon program. | ||
424 | */ | ||
425 | static int __init root_nfs_getport(int program, int version, int proto) | ||
426 | { | ||
427 | struct sockaddr_in sin; | ||
428 | 273 | ||
429 | printk(KERN_NOTICE "Looking up port of RPC %d/%d on %pI4\n", | 274 | out: |
430 | program, version, &servaddr); | 275 | kfree(tmp); |
431 | set_sockaddr(&sin, servaddr, 0); | 276 | return retval; |
432 | return rpcb_getport_sync(&sin, program, version, proto); | 277 | out_nomem: |
278 | printk(KERN_ERR "Root-NFS: could not allocate memory\n"); | ||
279 | goto out; | ||
280 | out_optionstoolong: | ||
281 | printk(KERN_ERR "Root-NFS: mount options string too long\n"); | ||
282 | goto out; | ||
283 | out_devnametoolong: | ||
284 | printk(KERN_ERR "Root-NFS: root device name too long.\n"); | ||
285 | goto out; | ||
433 | } | 286 | } |
434 | 287 | ||
435 | 288 | /** | |
436 | /* | 289 | * nfs_root_data - Return prepared 'data' for NFSROOT mount |
437 | * Use portmapper to find mountd and nfsd port numbers if not overriden | 290 | * @root_device: OUT: address of string containing NFSROOT device |
438 | * by the user. Use defaults if portmapper is not available. | 291 | * @root_data: OUT: address of string containing NFSROOT mount options |
439 | * XXX: Is there any nfs server with no portmapper? | 292 | * |
293 | * Returns zero and sets @root_device and @root_data if successful, | ||
294 | * otherwise -1 is returned. | ||
440 | */ | 295 | */ |
441 | static int __init root_nfs_ports(void) | 296 | int __init nfs_root_data(char **root_device, char **root_data) |
442 | { | 297 | { |
443 | int port; | 298 | servaddr = root_server_addr; |
444 | int nfsd_ver, mountd_ver; | 299 | if (servaddr == htonl(INADDR_NONE)) { |
445 | int nfsd_port, mountd_port; | 300 | printk(KERN_ERR "Root-NFS: no NFS server address\n"); |
446 | int proto; | 301 | return -1; |
447 | |||
448 | if (nfs_data.flags & NFS_MOUNT_VER3) { | ||
449 | nfsd_ver = NFS3_VERSION; | ||
450 | mountd_ver = NFS_MNT3_VERSION; | ||
451 | nfsd_port = NFS_PORT; | ||
452 | mountd_port = NFS_MNT_PORT; | ||
453 | } else { | ||
454 | nfsd_ver = NFS2_VERSION; | ||
455 | mountd_ver = NFS_MNT_VERSION; | ||
456 | nfsd_port = NFS_PORT; | ||
457 | mountd_port = NFS_MNT_PORT; | ||
458 | } | ||
459 | |||
460 | proto = (nfs_data.flags & NFS_MOUNT_TCP) ? IPPROTO_TCP : IPPROTO_UDP; | ||
461 | |||
462 | if (nfs_port < 0) { | ||
463 | if ((port = root_nfs_getport(NFS_PROGRAM, nfsd_ver, proto)) < 0) { | ||
464 | printk(KERN_ERR "Root-NFS: Unable to get nfsd port " | ||
465 | "number from server, using default\n"); | ||
466 | port = nfsd_port; | ||
467 | } | ||
468 | nfs_port = port; | ||
469 | dprintk("Root-NFS: Portmapper on server returned %d " | ||
470 | "as nfsd port\n", port); | ||
471 | } | 302 | } |
472 | 303 | ||
473 | if ((port = root_nfs_getport(NFS_MNT_PROGRAM, mountd_ver, proto)) < 0) { | 304 | if (root_nfs_data(nfs_root_parms) < 0) |
474 | printk(KERN_ERR "Root-NFS: Unable to get mountd port " | 305 | return -1; |
475 | "number from server, using default\n"); | ||
476 | port = mountd_port; | ||
477 | } | ||
478 | mount_port = port; | ||
479 | dprintk("Root-NFS: mountd port is %d\n", port); | ||
480 | 306 | ||
307 | *root_device = nfs_root_device; | ||
308 | *root_data = nfs_root_options; | ||
481 | return 0; | 309 | return 0; |
482 | } | 310 | } |
483 | |||
484 | |||
485 | /* | ||
486 | * Get a file handle from the server for the directory which is to be | ||
487 | * mounted. | ||
488 | */ | ||
489 | static int __init root_nfs_get_handle(void) | ||
490 | { | ||
491 | struct sockaddr_in sin; | ||
492 | unsigned int auth_flav_len = 0; | ||
493 | struct nfs_mount_request request = { | ||
494 | .sap = (struct sockaddr *)&sin, | ||
495 | .salen = sizeof(sin), | ||
496 | .dirpath = nfs_export_path, | ||
497 | .version = (nfs_data.flags & NFS_MOUNT_VER3) ? | ||
498 | NFS_MNT3_VERSION : NFS_MNT_VERSION, | ||
499 | .protocol = (nfs_data.flags & NFS_MOUNT_TCP) ? | ||
500 | XPRT_TRANSPORT_TCP : XPRT_TRANSPORT_UDP, | ||
501 | .auth_flav_len = &auth_flav_len, | ||
502 | }; | ||
503 | int status = -ENOMEM; | ||
504 | |||
505 | request.fh = nfs_alloc_fhandle(); | ||
506 | if (!request.fh) | ||
507 | goto out; | ||
508 | set_sockaddr(&sin, servaddr, htons(mount_port)); | ||
509 | status = nfs_mount(&request); | ||
510 | if (status < 0) | ||
511 | printk(KERN_ERR "Root-NFS: Server returned error %d " | ||
512 | "while mounting %s\n", status, nfs_export_path); | ||
513 | else { | ||
514 | nfs_data.root.size = request.fh->size; | ||
515 | memcpy(&nfs_data.root.data, request.fh->data, request.fh->size); | ||
516 | } | ||
517 | nfs_free_fhandle(request.fh); | ||
518 | out: | ||
519 | return status; | ||
520 | } | ||
521 | |||
522 | /* | ||
523 | * Get the NFS port numbers and file handle, and return the prepared 'data' | ||
524 | * argument for mount() if everything went OK. Return NULL otherwise. | ||
525 | */ | ||
526 | void * __init nfs_root_data(void) | ||
527 | { | ||
528 | if (root_nfs_init() < 0 | ||
529 | || root_nfs_ports() < 0 | ||
530 | || root_nfs_get_handle() < 0) | ||
531 | return NULL; | ||
532 | set_sockaddr((struct sockaddr_in *) &nfs_data.addr, servaddr, htons(nfs_port)); | ||
533 | return (void*)&nfs_data; | ||
534 | } | ||
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c new file mode 100644 index 000000000000..db773428f95f --- /dev/null +++ b/fs/nfs/pnfs.c | |||
@@ -0,0 +1,783 @@ | |||
1 | /* | ||
2 | * pNFS functions to call and manage layout drivers. | ||
3 | * | ||
4 | * Copyright (c) 2002 [year of first publication] | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #include <linux/nfs_fs.h> | ||
31 | #include "internal.h" | ||
32 | #include "pnfs.h" | ||
33 | |||
34 | #define NFSDBG_FACILITY NFSDBG_PNFS | ||
35 | |||
36 | /* Locking: | ||
37 | * | ||
38 | * pnfs_spinlock: | ||
39 | * protects pnfs_modules_tbl. | ||
40 | */ | ||
41 | static DEFINE_SPINLOCK(pnfs_spinlock); | ||
42 | |||
43 | /* | ||
44 | * pnfs_modules_tbl holds all pnfs modules | ||
45 | */ | ||
46 | static LIST_HEAD(pnfs_modules_tbl); | ||
47 | |||
48 | /* Return the registered pnfs layout driver module matching given id */ | ||
49 | static struct pnfs_layoutdriver_type * | ||
50 | find_pnfs_driver_locked(u32 id) | ||
51 | { | ||
52 | struct pnfs_layoutdriver_type *local; | ||
53 | |||
54 | list_for_each_entry(local, &pnfs_modules_tbl, pnfs_tblid) | ||
55 | if (local->id == id) | ||
56 | goto out; | ||
57 | local = NULL; | ||
58 | out: | ||
59 | dprintk("%s: Searching for id %u, found %p\n", __func__, id, local); | ||
60 | return local; | ||
61 | } | ||
62 | |||
63 | static struct pnfs_layoutdriver_type * | ||
64 | find_pnfs_driver(u32 id) | ||
65 | { | ||
66 | struct pnfs_layoutdriver_type *local; | ||
67 | |||
68 | spin_lock(&pnfs_spinlock); | ||
69 | local = find_pnfs_driver_locked(id); | ||
70 | spin_unlock(&pnfs_spinlock); | ||
71 | return local; | ||
72 | } | ||
73 | |||
74 | void | ||
75 | unset_pnfs_layoutdriver(struct nfs_server *nfss) | ||
76 | { | ||
77 | if (nfss->pnfs_curr_ld) { | ||
78 | nfss->pnfs_curr_ld->clear_layoutdriver(nfss); | ||
79 | module_put(nfss->pnfs_curr_ld->owner); | ||
80 | } | ||
81 | nfss->pnfs_curr_ld = NULL; | ||
82 | } | ||
83 | |||
84 | /* | ||
85 | * Try to set the server's pnfs module to the pnfs layout type specified by id. | ||
86 | * Currently only one pNFS layout driver per filesystem is supported. | ||
87 | * | ||
88 | * @id layout type. Zero (illegal layout type) indicates pNFS not in use. | ||
89 | */ | ||
90 | void | ||
91 | set_pnfs_layoutdriver(struct nfs_server *server, u32 id) | ||
92 | { | ||
93 | struct pnfs_layoutdriver_type *ld_type = NULL; | ||
94 | |||
95 | if (id == 0) | ||
96 | goto out_no_driver; | ||
97 | if (!(server->nfs_client->cl_exchange_flags & | ||
98 | (EXCHGID4_FLAG_USE_NON_PNFS | EXCHGID4_FLAG_USE_PNFS_MDS))) { | ||
99 | printk(KERN_ERR "%s: id %u cl_exchange_flags 0x%x\n", __func__, | ||
100 | id, server->nfs_client->cl_exchange_flags); | ||
101 | goto out_no_driver; | ||
102 | } | ||
103 | ld_type = find_pnfs_driver(id); | ||
104 | if (!ld_type) { | ||
105 | request_module("%s-%u", LAYOUT_NFSV4_1_MODULE_PREFIX, id); | ||
106 | ld_type = find_pnfs_driver(id); | ||
107 | if (!ld_type) { | ||
108 | dprintk("%s: No pNFS module found for %u.\n", | ||
109 | __func__, id); | ||
110 | goto out_no_driver; | ||
111 | } | ||
112 | } | ||
113 | if (!try_module_get(ld_type->owner)) { | ||
114 | dprintk("%s: Could not grab reference on module\n", __func__); | ||
115 | goto out_no_driver; | ||
116 | } | ||
117 | server->pnfs_curr_ld = ld_type; | ||
118 | if (ld_type->set_layoutdriver(server)) { | ||
119 | printk(KERN_ERR | ||
120 | "%s: Error initializing mount point for layout driver %u.\n", | ||
121 | __func__, id); | ||
122 | module_put(ld_type->owner); | ||
123 | goto out_no_driver; | ||
124 | } | ||
125 | dprintk("%s: pNFS module for %u set\n", __func__, id); | ||
126 | return; | ||
127 | |||
128 | out_no_driver: | ||
129 | dprintk("%s: Using NFSv4 I/O\n", __func__); | ||
130 | server->pnfs_curr_ld = NULL; | ||
131 | } | ||
132 | |||
133 | int | ||
134 | pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
135 | { | ||
136 | int status = -EINVAL; | ||
137 | struct pnfs_layoutdriver_type *tmp; | ||
138 | |||
139 | if (ld_type->id == 0) { | ||
140 | printk(KERN_ERR "%s id 0 is reserved\n", __func__); | ||
141 | return status; | ||
142 | } | ||
143 | if (!ld_type->alloc_lseg || !ld_type->free_lseg) { | ||
144 | printk(KERN_ERR "%s Layout driver must provide " | ||
145 | "alloc_lseg and free_lseg.\n", __func__); | ||
146 | return status; | ||
147 | } | ||
148 | |||
149 | spin_lock(&pnfs_spinlock); | ||
150 | tmp = find_pnfs_driver_locked(ld_type->id); | ||
151 | if (!tmp) { | ||
152 | list_add(&ld_type->pnfs_tblid, &pnfs_modules_tbl); | ||
153 | status = 0; | ||
154 | dprintk("%s Registering id:%u name:%s\n", __func__, ld_type->id, | ||
155 | ld_type->name); | ||
156 | } else { | ||
157 | printk(KERN_ERR "%s Module with id %d already loaded!\n", | ||
158 | __func__, ld_type->id); | ||
159 | } | ||
160 | spin_unlock(&pnfs_spinlock); | ||
161 | |||
162 | return status; | ||
163 | } | ||
164 | EXPORT_SYMBOL_GPL(pnfs_register_layoutdriver); | ||
165 | |||
166 | void | ||
167 | pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *ld_type) | ||
168 | { | ||
169 | dprintk("%s Deregistering id:%u\n", __func__, ld_type->id); | ||
170 | spin_lock(&pnfs_spinlock); | ||
171 | list_del(&ld_type->pnfs_tblid); | ||
172 | spin_unlock(&pnfs_spinlock); | ||
173 | } | ||
174 | EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver); | ||
175 | |||
176 | /* | ||
177 | * pNFS client layout cache | ||
178 | */ | ||
179 | |||
180 | static void | ||
181 | get_layout_hdr_locked(struct pnfs_layout_hdr *lo) | ||
182 | { | ||
183 | assert_spin_locked(&lo->inode->i_lock); | ||
184 | lo->refcount++; | ||
185 | } | ||
186 | |||
187 | static void | ||
188 | put_layout_hdr_locked(struct pnfs_layout_hdr *lo) | ||
189 | { | ||
190 | assert_spin_locked(&lo->inode->i_lock); | ||
191 | BUG_ON(lo->refcount == 0); | ||
192 | |||
193 | lo->refcount--; | ||
194 | if (!lo->refcount) { | ||
195 | dprintk("%s: freeing layout cache %p\n", __func__, lo); | ||
196 | BUG_ON(!list_empty(&lo->layouts)); | ||
197 | NFS_I(lo->inode)->layout = NULL; | ||
198 | kfree(lo); | ||
199 | } | ||
200 | } | ||
201 | |||
202 | void | ||
203 | put_layout_hdr(struct inode *inode) | ||
204 | { | ||
205 | spin_lock(&inode->i_lock); | ||
206 | put_layout_hdr_locked(NFS_I(inode)->layout); | ||
207 | spin_unlock(&inode->i_lock); | ||
208 | } | ||
209 | |||
210 | static void | ||
211 | init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg) | ||
212 | { | ||
213 | INIT_LIST_HEAD(&lseg->fi_list); | ||
214 | kref_init(&lseg->kref); | ||
215 | lseg->layout = lo; | ||
216 | } | ||
217 | |||
218 | /* Called without i_lock held, as the free_lseg call may sleep */ | ||
219 | static void | ||
220 | destroy_lseg(struct kref *kref) | ||
221 | { | ||
222 | struct pnfs_layout_segment *lseg = | ||
223 | container_of(kref, struct pnfs_layout_segment, kref); | ||
224 | struct inode *ino = lseg->layout->inode; | ||
225 | |||
226 | dprintk("--> %s\n", __func__); | ||
227 | NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); | ||
228 | /* Matched by get_layout_hdr_locked in pnfs_insert_layout */ | ||
229 | put_layout_hdr(ino); | ||
230 | } | ||
231 | |||
232 | static void | ||
233 | put_lseg(struct pnfs_layout_segment *lseg) | ||
234 | { | ||
235 | if (!lseg) | ||
236 | return; | ||
237 | |||
238 | dprintk("%s: lseg %p ref %d\n", __func__, lseg, | ||
239 | atomic_read(&lseg->kref.refcount)); | ||
240 | kref_put(&lseg->kref, destroy_lseg); | ||
241 | } | ||
242 | |||
243 | static void | ||
244 | pnfs_clear_lseg_list(struct pnfs_layout_hdr *lo, struct list_head *tmp_list) | ||
245 | { | ||
246 | struct pnfs_layout_segment *lseg, *next; | ||
247 | struct nfs_client *clp; | ||
248 | |||
249 | dprintk("%s:Begin lo %p\n", __func__, lo); | ||
250 | |||
251 | assert_spin_locked(&lo->inode->i_lock); | ||
252 | list_for_each_entry_safe(lseg, next, &lo->segs, fi_list) { | ||
253 | dprintk("%s: freeing lseg %p\n", __func__, lseg); | ||
254 | list_move(&lseg->fi_list, tmp_list); | ||
255 | } | ||
256 | clp = NFS_SERVER(lo->inode)->nfs_client; | ||
257 | spin_lock(&clp->cl_lock); | ||
258 | /* List does not take a reference, so no need for put here */ | ||
259 | list_del_init(&lo->layouts); | ||
260 | spin_unlock(&clp->cl_lock); | ||
261 | write_seqlock(&lo->seqlock); | ||
262 | clear_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
263 | write_sequnlock(&lo->seqlock); | ||
264 | |||
265 | dprintk("%s:Return\n", __func__); | ||
266 | } | ||
267 | |||
268 | static void | ||
269 | pnfs_free_lseg_list(struct list_head *tmp_list) | ||
270 | { | ||
271 | struct pnfs_layout_segment *lseg; | ||
272 | |||
273 | while (!list_empty(tmp_list)) { | ||
274 | lseg = list_entry(tmp_list->next, struct pnfs_layout_segment, | ||
275 | fi_list); | ||
276 | dprintk("%s calling put_lseg on %p\n", __func__, lseg); | ||
277 | list_del(&lseg->fi_list); | ||
278 | put_lseg(lseg); | ||
279 | } | ||
280 | } | ||
281 | |||
282 | void | ||
283 | pnfs_destroy_layout(struct nfs_inode *nfsi) | ||
284 | { | ||
285 | struct pnfs_layout_hdr *lo; | ||
286 | LIST_HEAD(tmp_list); | ||
287 | |||
288 | spin_lock(&nfsi->vfs_inode.i_lock); | ||
289 | lo = nfsi->layout; | ||
290 | if (lo) { | ||
291 | pnfs_clear_lseg_list(lo, &tmp_list); | ||
292 | /* Matched by refcount set to 1 in alloc_init_layout_hdr */ | ||
293 | put_layout_hdr_locked(lo); | ||
294 | } | ||
295 | spin_unlock(&nfsi->vfs_inode.i_lock); | ||
296 | pnfs_free_lseg_list(&tmp_list); | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * Called by the state manger to remove all layouts established under an | ||
301 | * expired lease. | ||
302 | */ | ||
303 | void | ||
304 | pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
305 | { | ||
306 | struct pnfs_layout_hdr *lo; | ||
307 | LIST_HEAD(tmp_list); | ||
308 | |||
309 | spin_lock(&clp->cl_lock); | ||
310 | list_splice_init(&clp->cl_layouts, &tmp_list); | ||
311 | spin_unlock(&clp->cl_lock); | ||
312 | |||
313 | while (!list_empty(&tmp_list)) { | ||
314 | lo = list_entry(tmp_list.next, struct pnfs_layout_hdr, | ||
315 | layouts); | ||
316 | dprintk("%s freeing layout for inode %lu\n", __func__, | ||
317 | lo->inode->i_ino); | ||
318 | pnfs_destroy_layout(NFS_I(lo->inode)); | ||
319 | } | ||
320 | } | ||
321 | |||
322 | /* update lo->stateid with new if is more recent | ||
323 | * | ||
324 | * lo->stateid could be the open stateid, in which case we just use what given. | ||
325 | */ | ||
326 | static void | ||
327 | pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, | ||
328 | const nfs4_stateid *new) | ||
329 | { | ||
330 | nfs4_stateid *old = &lo->stateid; | ||
331 | bool overwrite = false; | ||
332 | |||
333 | write_seqlock(&lo->seqlock); | ||
334 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state) || | ||
335 | memcmp(old->stateid.other, new->stateid.other, sizeof(new->stateid.other))) | ||
336 | overwrite = true; | ||
337 | else { | ||
338 | u32 oldseq, newseq; | ||
339 | |||
340 | oldseq = be32_to_cpu(old->stateid.seqid); | ||
341 | newseq = be32_to_cpu(new->stateid.seqid); | ||
342 | if ((int)(newseq - oldseq) > 0) | ||
343 | overwrite = true; | ||
344 | } | ||
345 | if (overwrite) | ||
346 | memcpy(&old->stateid, &new->stateid, sizeof(new->stateid)); | ||
347 | write_sequnlock(&lo->seqlock); | ||
348 | } | ||
349 | |||
350 | static void | ||
351 | pnfs_layout_from_open_stateid(struct pnfs_layout_hdr *lo, | ||
352 | struct nfs4_state *state) | ||
353 | { | ||
354 | int seq; | ||
355 | |||
356 | dprintk("--> %s\n", __func__); | ||
357 | write_seqlock(&lo->seqlock); | ||
358 | do { | ||
359 | seq = read_seqbegin(&state->seqlock); | ||
360 | memcpy(lo->stateid.data, state->stateid.data, | ||
361 | sizeof(state->stateid.data)); | ||
362 | } while (read_seqretry(&state->seqlock, seq)); | ||
363 | set_bit(NFS_LAYOUT_STATEID_SET, &lo->state); | ||
364 | write_sequnlock(&lo->seqlock); | ||
365 | dprintk("<-- %s\n", __func__); | ||
366 | } | ||
367 | |||
368 | void | ||
369 | pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
370 | struct nfs4_state *open_state) | ||
371 | { | ||
372 | int seq; | ||
373 | |||
374 | dprintk("--> %s\n", __func__); | ||
375 | do { | ||
376 | seq = read_seqbegin(&lo->seqlock); | ||
377 | if (!test_bit(NFS_LAYOUT_STATEID_SET, &lo->state)) { | ||
378 | /* This will trigger retry of the read */ | ||
379 | pnfs_layout_from_open_stateid(lo, open_state); | ||
380 | } else | ||
381 | memcpy(dst->data, lo->stateid.data, | ||
382 | sizeof(lo->stateid.data)); | ||
383 | } while (read_seqretry(&lo->seqlock, seq)); | ||
384 | dprintk("<-- %s\n", __func__); | ||
385 | } | ||
386 | |||
387 | /* | ||
388 | * Get layout from server. | ||
389 | * for now, assume that whole file layouts are requested. | ||
390 | * arg->offset: 0 | ||
391 | * arg->length: all ones | ||
392 | */ | ||
393 | static struct pnfs_layout_segment * | ||
394 | send_layoutget(struct pnfs_layout_hdr *lo, | ||
395 | struct nfs_open_context *ctx, | ||
396 | u32 iomode) | ||
397 | { | ||
398 | struct inode *ino = lo->inode; | ||
399 | struct nfs_server *server = NFS_SERVER(ino); | ||
400 | struct nfs4_layoutget *lgp; | ||
401 | struct pnfs_layout_segment *lseg = NULL; | ||
402 | |||
403 | dprintk("--> %s\n", __func__); | ||
404 | |||
405 | BUG_ON(ctx == NULL); | ||
406 | lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); | ||
407 | if (lgp == NULL) { | ||
408 | put_layout_hdr(lo->inode); | ||
409 | return NULL; | ||
410 | } | ||
411 | lgp->args.minlength = NFS4_MAX_UINT64; | ||
412 | lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; | ||
413 | lgp->args.range.iomode = iomode; | ||
414 | lgp->args.range.offset = 0; | ||
415 | lgp->args.range.length = NFS4_MAX_UINT64; | ||
416 | lgp->args.type = server->pnfs_curr_ld->id; | ||
417 | lgp->args.inode = ino; | ||
418 | lgp->args.ctx = get_nfs_open_context(ctx); | ||
419 | lgp->lsegpp = &lseg; | ||
420 | |||
421 | /* Synchronously retrieve layout information from server and | ||
422 | * store in lseg. | ||
423 | */ | ||
424 | nfs4_proc_layoutget(lgp); | ||
425 | if (!lseg) { | ||
426 | /* remember that LAYOUTGET failed and suspend trying */ | ||
427 | set_bit(lo_fail_bit(iomode), &lo->state); | ||
428 | } | ||
429 | return lseg; | ||
430 | } | ||
431 | |||
432 | /* | ||
433 | * Compare two layout segments for sorting into layout cache. | ||
434 | * We want to preferentially return RW over RO layouts, so ensure those | ||
435 | * are seen first. | ||
436 | */ | ||
437 | static s64 | ||
438 | cmp_layout(u32 iomode1, u32 iomode2) | ||
439 | { | ||
440 | /* read > read/write */ | ||
441 | return (int)(iomode2 == IOMODE_READ) - (int)(iomode1 == IOMODE_READ); | ||
442 | } | ||
443 | |||
444 | static void | ||
445 | pnfs_insert_layout(struct pnfs_layout_hdr *lo, | ||
446 | struct pnfs_layout_segment *lseg) | ||
447 | { | ||
448 | struct pnfs_layout_segment *lp; | ||
449 | int found = 0; | ||
450 | |||
451 | dprintk("%s:Begin\n", __func__); | ||
452 | |||
453 | assert_spin_locked(&lo->inode->i_lock); | ||
454 | if (list_empty(&lo->segs)) { | ||
455 | struct nfs_client *clp = NFS_SERVER(lo->inode)->nfs_client; | ||
456 | |||
457 | spin_lock(&clp->cl_lock); | ||
458 | BUG_ON(!list_empty(&lo->layouts)); | ||
459 | list_add_tail(&lo->layouts, &clp->cl_layouts); | ||
460 | spin_unlock(&clp->cl_lock); | ||
461 | } | ||
462 | list_for_each_entry(lp, &lo->segs, fi_list) { | ||
463 | if (cmp_layout(lp->range.iomode, lseg->range.iomode) > 0) | ||
464 | continue; | ||
465 | list_add_tail(&lseg->fi_list, &lp->fi_list); | ||
466 | dprintk("%s: inserted lseg %p " | ||
467 | "iomode %d offset %llu length %llu before " | ||
468 | "lp %p iomode %d offset %llu length %llu\n", | ||
469 | __func__, lseg, lseg->range.iomode, | ||
470 | lseg->range.offset, lseg->range.length, | ||
471 | lp, lp->range.iomode, lp->range.offset, | ||
472 | lp->range.length); | ||
473 | found = 1; | ||
474 | break; | ||
475 | } | ||
476 | if (!found) { | ||
477 | list_add_tail(&lseg->fi_list, &lo->segs); | ||
478 | dprintk("%s: inserted lseg %p " | ||
479 | "iomode %d offset %llu length %llu at tail\n", | ||
480 | __func__, lseg, lseg->range.iomode, | ||
481 | lseg->range.offset, lseg->range.length); | ||
482 | } | ||
483 | get_layout_hdr_locked(lo); | ||
484 | |||
485 | dprintk("%s:Return\n", __func__); | ||
486 | } | ||
487 | |||
488 | static struct pnfs_layout_hdr * | ||
489 | alloc_init_layout_hdr(struct inode *ino) | ||
490 | { | ||
491 | struct pnfs_layout_hdr *lo; | ||
492 | |||
493 | lo = kzalloc(sizeof(struct pnfs_layout_hdr), GFP_KERNEL); | ||
494 | if (!lo) | ||
495 | return NULL; | ||
496 | lo->refcount = 1; | ||
497 | INIT_LIST_HEAD(&lo->layouts); | ||
498 | INIT_LIST_HEAD(&lo->segs); | ||
499 | seqlock_init(&lo->seqlock); | ||
500 | lo->inode = ino; | ||
501 | return lo; | ||
502 | } | ||
503 | |||
504 | static struct pnfs_layout_hdr * | ||
505 | pnfs_find_alloc_layout(struct inode *ino) | ||
506 | { | ||
507 | struct nfs_inode *nfsi = NFS_I(ino); | ||
508 | struct pnfs_layout_hdr *new = NULL; | ||
509 | |||
510 | dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); | ||
511 | |||
512 | assert_spin_locked(&ino->i_lock); | ||
513 | if (nfsi->layout) | ||
514 | return nfsi->layout; | ||
515 | |||
516 | spin_unlock(&ino->i_lock); | ||
517 | new = alloc_init_layout_hdr(ino); | ||
518 | spin_lock(&ino->i_lock); | ||
519 | |||
520 | if (likely(nfsi->layout == NULL)) /* Won the race? */ | ||
521 | nfsi->layout = new; | ||
522 | else | ||
523 | kfree(new); | ||
524 | return nfsi->layout; | ||
525 | } | ||
526 | |||
527 | /* | ||
528 | * iomode matching rules: | ||
529 | * iomode lseg match | ||
530 | * ----- ----- ----- | ||
531 | * ANY READ true | ||
532 | * ANY RW true | ||
533 | * RW READ false | ||
534 | * RW RW true | ||
535 | * READ READ true | ||
536 | * READ RW true | ||
537 | */ | ||
538 | static int | ||
539 | is_matching_lseg(struct pnfs_layout_segment *lseg, u32 iomode) | ||
540 | { | ||
541 | return (iomode != IOMODE_RW || lseg->range.iomode == IOMODE_RW); | ||
542 | } | ||
543 | |||
544 | /* | ||
545 | * lookup range in layout | ||
546 | */ | ||
547 | static struct pnfs_layout_segment * | ||
548 | pnfs_has_layout(struct pnfs_layout_hdr *lo, u32 iomode) | ||
549 | { | ||
550 | struct pnfs_layout_segment *lseg, *ret = NULL; | ||
551 | |||
552 | dprintk("%s:Begin\n", __func__); | ||
553 | |||
554 | assert_spin_locked(&lo->inode->i_lock); | ||
555 | list_for_each_entry(lseg, &lo->segs, fi_list) { | ||
556 | if (is_matching_lseg(lseg, iomode)) { | ||
557 | ret = lseg; | ||
558 | break; | ||
559 | } | ||
560 | if (cmp_layout(iomode, lseg->range.iomode) > 0) | ||
561 | break; | ||
562 | } | ||
563 | |||
564 | dprintk("%s:Return lseg %p ref %d\n", | ||
565 | __func__, ret, ret ? atomic_read(&ret->kref.refcount) : 0); | ||
566 | return ret; | ||
567 | } | ||
568 | |||
569 | /* | ||
570 | * Layout segment is retreived from the server if not cached. | ||
571 | * The appropriate layout segment is referenced and returned to the caller. | ||
572 | */ | ||
573 | struct pnfs_layout_segment * | ||
574 | pnfs_update_layout(struct inode *ino, | ||
575 | struct nfs_open_context *ctx, | ||
576 | enum pnfs_iomode iomode) | ||
577 | { | ||
578 | struct nfs_inode *nfsi = NFS_I(ino); | ||
579 | struct pnfs_layout_hdr *lo; | ||
580 | struct pnfs_layout_segment *lseg = NULL; | ||
581 | |||
582 | if (!pnfs_enabled_sb(NFS_SERVER(ino))) | ||
583 | return NULL; | ||
584 | spin_lock(&ino->i_lock); | ||
585 | lo = pnfs_find_alloc_layout(ino); | ||
586 | if (lo == NULL) { | ||
587 | dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); | ||
588 | goto out_unlock; | ||
589 | } | ||
590 | |||
591 | /* Check to see if the layout for the given range already exists */ | ||
592 | lseg = pnfs_has_layout(lo, iomode); | ||
593 | if (lseg) { | ||
594 | dprintk("%s: Using cached lseg %p for iomode %d)\n", | ||
595 | __func__, lseg, iomode); | ||
596 | goto out_unlock; | ||
597 | } | ||
598 | |||
599 | /* if LAYOUTGET already failed once we don't try again */ | ||
600 | if (test_bit(lo_fail_bit(iomode), &nfsi->layout->state)) | ||
601 | goto out_unlock; | ||
602 | |||
603 | get_layout_hdr_locked(lo); /* Matched in nfs4_layoutget_release */ | ||
604 | spin_unlock(&ino->i_lock); | ||
605 | |||
606 | lseg = send_layoutget(lo, ctx, iomode); | ||
607 | out: | ||
608 | dprintk("%s end, state 0x%lx lseg %p\n", __func__, | ||
609 | nfsi->layout->state, lseg); | ||
610 | return lseg; | ||
611 | out_unlock: | ||
612 | spin_unlock(&ino->i_lock); | ||
613 | goto out; | ||
614 | } | ||
615 | |||
616 | int | ||
617 | pnfs_layout_process(struct nfs4_layoutget *lgp) | ||
618 | { | ||
619 | struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; | ||
620 | struct nfs4_layoutget_res *res = &lgp->res; | ||
621 | struct pnfs_layout_segment *lseg; | ||
622 | struct inode *ino = lo->inode; | ||
623 | int status = 0; | ||
624 | |||
625 | /* Inject layout blob into I/O device driver */ | ||
626 | lseg = NFS_SERVER(ino)->pnfs_curr_ld->alloc_lseg(lo, res); | ||
627 | if (!lseg || IS_ERR(lseg)) { | ||
628 | if (!lseg) | ||
629 | status = -ENOMEM; | ||
630 | else | ||
631 | status = PTR_ERR(lseg); | ||
632 | dprintk("%s: Could not allocate layout: error %d\n", | ||
633 | __func__, status); | ||
634 | goto out; | ||
635 | } | ||
636 | |||
637 | spin_lock(&ino->i_lock); | ||
638 | init_lseg(lo, lseg); | ||
639 | lseg->range = res->range; | ||
640 | *lgp->lsegpp = lseg; | ||
641 | pnfs_insert_layout(lo, lseg); | ||
642 | |||
643 | /* Done processing layoutget. Set the layout stateid */ | ||
644 | pnfs_set_layout_stateid(lo, &res->stateid); | ||
645 | spin_unlock(&ino->i_lock); | ||
646 | out: | ||
647 | return status; | ||
648 | } | ||
649 | |||
650 | /* | ||
651 | * Device ID cache. Currently supports one layout type per struct nfs_client. | ||
652 | * Add layout type to the lookup key to expand to support multiple types. | ||
653 | */ | ||
654 | int | ||
655 | pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, | ||
656 | void (*free_callback)(struct pnfs_deviceid_node *)) | ||
657 | { | ||
658 | struct pnfs_deviceid_cache *c; | ||
659 | |||
660 | c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); | ||
661 | if (!c) | ||
662 | return -ENOMEM; | ||
663 | spin_lock(&clp->cl_lock); | ||
664 | if (clp->cl_devid_cache != NULL) { | ||
665 | atomic_inc(&clp->cl_devid_cache->dc_ref); | ||
666 | dprintk("%s [kref [%d]]\n", __func__, | ||
667 | atomic_read(&clp->cl_devid_cache->dc_ref)); | ||
668 | kfree(c); | ||
669 | } else { | ||
670 | /* kzalloc initializes hlists */ | ||
671 | spin_lock_init(&c->dc_lock); | ||
672 | atomic_set(&c->dc_ref, 1); | ||
673 | c->dc_free_callback = free_callback; | ||
674 | clp->cl_devid_cache = c; | ||
675 | dprintk("%s [new]\n", __func__); | ||
676 | } | ||
677 | spin_unlock(&clp->cl_lock); | ||
678 | return 0; | ||
679 | } | ||
680 | EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache); | ||
681 | |||
682 | /* | ||
683 | * Called from pnfs_layoutdriver_type->free_lseg | ||
684 | * last layout segment reference frees deviceid | ||
685 | */ | ||
686 | void | ||
687 | pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
688 | struct pnfs_deviceid_node *devid) | ||
689 | { | ||
690 | struct nfs4_deviceid *id = &devid->de_id; | ||
691 | struct pnfs_deviceid_node *d; | ||
692 | struct hlist_node *n; | ||
693 | long h = nfs4_deviceid_hash(id); | ||
694 | |||
695 | dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); | ||
696 | if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) | ||
697 | return; | ||
698 | |||
699 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) | ||
700 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
701 | hlist_del_rcu(&d->de_node); | ||
702 | spin_unlock(&c->dc_lock); | ||
703 | synchronize_rcu(); | ||
704 | c->dc_free_callback(devid); | ||
705 | return; | ||
706 | } | ||
707 | spin_unlock(&c->dc_lock); | ||
708 | /* Why wasn't it found in the list? */ | ||
709 | BUG(); | ||
710 | } | ||
711 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid); | ||
712 | |||
713 | /* Find and reference a deviceid */ | ||
714 | struct pnfs_deviceid_node * | ||
715 | pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id) | ||
716 | { | ||
717 | struct pnfs_deviceid_node *d; | ||
718 | struct hlist_node *n; | ||
719 | long hash = nfs4_deviceid_hash(id); | ||
720 | |||
721 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
722 | rcu_read_lock(); | ||
723 | hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) { | ||
724 | if (!memcmp(&d->de_id, id, sizeof(*id))) { | ||
725 | if (!atomic_inc_not_zero(&d->de_ref)) { | ||
726 | goto fail; | ||
727 | } else { | ||
728 | rcu_read_unlock(); | ||
729 | return d; | ||
730 | } | ||
731 | } | ||
732 | } | ||
733 | fail: | ||
734 | rcu_read_unlock(); | ||
735 | return NULL; | ||
736 | } | ||
737 | EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); | ||
738 | |||
739 | /* | ||
740 | * Add a deviceid to the cache. | ||
741 | * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new | ||
742 | */ | ||
743 | struct pnfs_deviceid_node * | ||
744 | pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) | ||
745 | { | ||
746 | struct pnfs_deviceid_node *d; | ||
747 | long hash = nfs4_deviceid_hash(&new->de_id); | ||
748 | |||
749 | dprintk("--> %s hash %ld\n", __func__, hash); | ||
750 | spin_lock(&c->dc_lock); | ||
751 | d = pnfs_find_get_deviceid(c, &new->de_id); | ||
752 | if (d) { | ||
753 | spin_unlock(&c->dc_lock); | ||
754 | dprintk("%s [discard]\n", __func__); | ||
755 | c->dc_free_callback(new); | ||
756 | return d; | ||
757 | } | ||
758 | INIT_HLIST_NODE(&new->de_node); | ||
759 | atomic_set(&new->de_ref, 1); | ||
760 | hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]); | ||
761 | spin_unlock(&c->dc_lock); | ||
762 | dprintk("%s [new]\n", __func__); | ||
763 | return new; | ||
764 | } | ||
765 | EXPORT_SYMBOL_GPL(pnfs_add_deviceid); | ||
766 | |||
767 | void | ||
768 | pnfs_put_deviceid_cache(struct nfs_client *clp) | ||
769 | { | ||
770 | struct pnfs_deviceid_cache *local = clp->cl_devid_cache; | ||
771 | |||
772 | dprintk("--> %s cl_devid_cache %p\n", __func__, clp->cl_devid_cache); | ||
773 | if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { | ||
774 | int i; | ||
775 | /* Verify cache is empty */ | ||
776 | for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) | ||
777 | BUG_ON(!hlist_empty(&local->dc_deviceids[i])); | ||
778 | clp->cl_devid_cache = NULL; | ||
779 | spin_unlock(&clp->cl_lock); | ||
780 | kfree(local); | ||
781 | } | ||
782 | } | ||
783 | EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache); | ||
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h new file mode 100644 index 000000000000..e12367d50489 --- /dev/null +++ b/fs/nfs/pnfs.h | |||
@@ -0,0 +1,189 @@ | |||
1 | /* | ||
2 | * pNFS client data structures. | ||
3 | * | ||
4 | * Copyright (c) 2002 | ||
5 | * The Regents of the University of Michigan | ||
6 | * All Rights Reserved | ||
7 | * | ||
8 | * Dean Hildebrand <dhildebz@umich.edu> | ||
9 | * | ||
10 | * Permission is granted to use, copy, create derivative works, and | ||
11 | * redistribute this software and such derivative works for any purpose, | ||
12 | * so long as the name of the University of Michigan is not used in | ||
13 | * any advertising or publicity pertaining to the use or distribution | ||
14 | * of this software without specific, written prior authorization. If | ||
15 | * the above copyright notice or any other identification of the | ||
16 | * University of Michigan is included in any copy of any portion of | ||
17 | * this software, then the disclaimer below must also be included. | ||
18 | * | ||
19 | * This software is provided as is, without representation or warranty | ||
20 | * of any kind either express or implied, including without limitation | ||
21 | * the implied warranties of merchantability, fitness for a particular | ||
22 | * purpose, or noninfringement. The Regents of the University of | ||
23 | * Michigan shall not be liable for any damages, including special, | ||
24 | * indirect, incidental, or consequential damages, with respect to any | ||
25 | * claim arising out of or in connection with the use of the software, | ||
26 | * even if it has been or is hereafter advised of the possibility of | ||
27 | * such damages. | ||
28 | */ | ||
29 | |||
30 | #ifndef FS_NFS_PNFS_H | ||
31 | #define FS_NFS_PNFS_H | ||
32 | |||
33 | struct pnfs_layout_segment { | ||
34 | struct list_head fi_list; | ||
35 | struct pnfs_layout_range range; | ||
36 | struct kref kref; | ||
37 | struct pnfs_layout_hdr *layout; | ||
38 | }; | ||
39 | |||
40 | #ifdef CONFIG_NFS_V4_1 | ||
41 | |||
42 | #define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" | ||
43 | |||
44 | enum { | ||
45 | NFS_LAYOUT_RO_FAILED = 0, /* get ro layout failed stop trying */ | ||
46 | NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ | ||
47 | NFS_LAYOUT_STATEID_SET, /* have a valid layout stateid */ | ||
48 | }; | ||
49 | |||
50 | /* Per-layout driver specific registration structure */ | ||
51 | struct pnfs_layoutdriver_type { | ||
52 | struct list_head pnfs_tblid; | ||
53 | const u32 id; | ||
54 | const char *name; | ||
55 | struct module *owner; | ||
56 | int (*set_layoutdriver) (struct nfs_server *); | ||
57 | int (*clear_layoutdriver) (struct nfs_server *); | ||
58 | struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); | ||
59 | void (*free_lseg) (struct pnfs_layout_segment *lseg); | ||
60 | }; | ||
61 | |||
62 | struct pnfs_layout_hdr { | ||
63 | unsigned long refcount; | ||
64 | struct list_head layouts; /* other client layouts */ | ||
65 | struct list_head segs; /* layout segments list */ | ||
66 | seqlock_t seqlock; /* Protects the stateid */ | ||
67 | nfs4_stateid stateid; | ||
68 | unsigned long state; | ||
69 | struct inode *inode; | ||
70 | }; | ||
71 | |||
72 | struct pnfs_device { | ||
73 | struct nfs4_deviceid dev_id; | ||
74 | unsigned int layout_type; | ||
75 | unsigned int mincount; | ||
76 | struct page **pages; | ||
77 | void *area; | ||
78 | unsigned int pgbase; | ||
79 | unsigned int pglen; | ||
80 | }; | ||
81 | |||
82 | /* | ||
83 | * Device ID RCU cache. A device ID is unique per client ID and layout type. | ||
84 | */ | ||
85 | #define NFS4_DEVICE_ID_HASH_BITS 5 | ||
86 | #define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) | ||
87 | #define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) | ||
88 | |||
89 | static inline u32 | ||
90 | nfs4_deviceid_hash(struct nfs4_deviceid *id) | ||
91 | { | ||
92 | unsigned char *cptr = (unsigned char *)id->data; | ||
93 | unsigned int nbytes = NFS4_DEVICEID4_SIZE; | ||
94 | u32 x = 0; | ||
95 | |||
96 | while (nbytes--) { | ||
97 | x *= 37; | ||
98 | x += *cptr++; | ||
99 | } | ||
100 | return x & NFS4_DEVICE_ID_HASH_MASK; | ||
101 | } | ||
102 | |||
103 | struct pnfs_deviceid_node { | ||
104 | struct hlist_node de_node; | ||
105 | struct nfs4_deviceid de_id; | ||
106 | atomic_t de_ref; | ||
107 | }; | ||
108 | |||
109 | struct pnfs_deviceid_cache { | ||
110 | spinlock_t dc_lock; | ||
111 | atomic_t dc_ref; | ||
112 | void (*dc_free_callback)(struct pnfs_deviceid_node *); | ||
113 | struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE]; | ||
114 | }; | ||
115 | |||
116 | extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *, | ||
117 | void (*free_callback)(struct pnfs_deviceid_node *)); | ||
118 | extern void pnfs_put_deviceid_cache(struct nfs_client *); | ||
119 | extern struct pnfs_deviceid_node *pnfs_find_get_deviceid( | ||
120 | struct pnfs_deviceid_cache *, | ||
121 | struct nfs4_deviceid *); | ||
122 | extern struct pnfs_deviceid_node *pnfs_add_deviceid( | ||
123 | struct pnfs_deviceid_cache *, | ||
124 | struct pnfs_deviceid_node *); | ||
125 | extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c, | ||
126 | struct pnfs_deviceid_node *devid); | ||
127 | |||
128 | extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); | ||
129 | extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); | ||
130 | |||
131 | /* nfs4proc.c */ | ||
132 | extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, | ||
133 | struct pnfs_device *dev); | ||
134 | extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp); | ||
135 | |||
136 | /* pnfs.c */ | ||
137 | struct pnfs_layout_segment * | ||
138 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
139 | enum pnfs_iomode access_type); | ||
140 | void set_pnfs_layoutdriver(struct nfs_server *, u32 id); | ||
141 | void unset_pnfs_layoutdriver(struct nfs_server *); | ||
142 | int pnfs_layout_process(struct nfs4_layoutget *lgp); | ||
143 | void pnfs_destroy_layout(struct nfs_inode *); | ||
144 | void pnfs_destroy_all_layouts(struct nfs_client *); | ||
145 | void put_layout_hdr(struct inode *inode); | ||
146 | void pnfs_get_layout_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo, | ||
147 | struct nfs4_state *open_state); | ||
148 | |||
149 | |||
150 | static inline int lo_fail_bit(u32 iomode) | ||
151 | { | ||
152 | return iomode == IOMODE_RW ? | ||
153 | NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; | ||
154 | } | ||
155 | |||
156 | /* Return true if a layout driver is being used for this mountpoint */ | ||
157 | static inline int pnfs_enabled_sb(struct nfs_server *nfss) | ||
158 | { | ||
159 | return nfss->pnfs_curr_ld != NULL; | ||
160 | } | ||
161 | |||
162 | #else /* CONFIG_NFS_V4_1 */ | ||
163 | |||
164 | static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) | ||
165 | { | ||
166 | } | ||
167 | |||
168 | static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) | ||
169 | { | ||
170 | } | ||
171 | |||
172 | static inline struct pnfs_layout_segment * | ||
173 | pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, | ||
174 | enum pnfs_iomode access_type) | ||
175 | { | ||
176 | return NULL; | ||
177 | } | ||
178 | |||
179 | static inline void set_pnfs_layoutdriver(struct nfs_server *s, u32 id) | ||
180 | { | ||
181 | } | ||
182 | |||
183 | static inline void unset_pnfs_layoutdriver(struct nfs_server *s) | ||
184 | { | ||
185 | } | ||
186 | |||
187 | #endif /* CONFIG_NFS_V4_1 */ | ||
188 | |||
189 | #endif /* FS_NFS_PNFS_H */ | ||
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 611bec22f552..58e7f84fc1fd 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -258,7 +258,7 @@ static void nfs_free_createdata(const struct nfs_createdata *data) | |||
258 | 258 | ||
259 | static int | 259 | static int |
260 | nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, | 260 | nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, |
261 | int flags, struct nameidata *nd) | 261 | int flags, struct nfs_open_context *ctx) |
262 | { | 262 | { |
263 | struct nfs_createdata *data; | 263 | struct nfs_createdata *data; |
264 | struct rpc_message msg = { | 264 | struct rpc_message msg = { |
@@ -365,17 +365,32 @@ static int nfs_proc_unlink_done(struct rpc_task *task, struct inode *dir) | |||
365 | return 1; | 365 | return 1; |
366 | } | 366 | } |
367 | 367 | ||
368 | static void | ||
369 | nfs_proc_rename_setup(struct rpc_message *msg, struct inode *dir) | ||
370 | { | ||
371 | msg->rpc_proc = &nfs_procedures[NFSPROC_RENAME]; | ||
372 | } | ||
373 | |||
374 | static int | ||
375 | nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | ||
376 | struct inode *new_dir) | ||
377 | { | ||
378 | if (nfs_async_handle_expired_key(task)) | ||
379 | return 0; | ||
380 | nfs_mark_for_revalidate(old_dir); | ||
381 | nfs_mark_for_revalidate(new_dir); | ||
382 | return 1; | ||
383 | } | ||
384 | |||
368 | static int | 385 | static int |
369 | nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, | 386 | nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, |
370 | struct inode *new_dir, struct qstr *new_name) | 387 | struct inode *new_dir, struct qstr *new_name) |
371 | { | 388 | { |
372 | struct nfs_renameargs arg = { | 389 | struct nfs_renameargs arg = { |
373 | .fromfh = NFS_FH(old_dir), | 390 | .old_dir = NFS_FH(old_dir), |
374 | .fromname = old_name->name, | 391 | .old_name = old_name, |
375 | .fromlen = old_name->len, | 392 | .new_dir = NFS_FH(new_dir), |
376 | .tofh = NFS_FH(new_dir), | 393 | .new_name = new_name, |
377 | .toname = new_name->name, | ||
378 | .tolen = new_name->len | ||
379 | }; | 394 | }; |
380 | struct rpc_message msg = { | 395 | struct rpc_message msg = { |
381 | .rpc_proc = &nfs_procedures[NFSPROC_RENAME], | 396 | .rpc_proc = &nfs_procedures[NFSPROC_RENAME], |
@@ -519,14 +534,14 @@ nfs_proc_rmdir(struct inode *dir, struct qstr *name) | |||
519 | */ | 534 | */ |
520 | static int | 535 | static int |
521 | nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, | 536 | nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred, |
522 | u64 cookie, struct page *page, unsigned int count, int plus) | 537 | u64 cookie, struct page **pages, unsigned int count, int plus) |
523 | { | 538 | { |
524 | struct inode *dir = dentry->d_inode; | 539 | struct inode *dir = dentry->d_inode; |
525 | struct nfs_readdirargs arg = { | 540 | struct nfs_readdirargs arg = { |
526 | .fh = NFS_FH(dir), | 541 | .fh = NFS_FH(dir), |
527 | .cookie = cookie, | 542 | .cookie = cookie, |
528 | .count = count, | 543 | .count = count, |
529 | .pages = &page, | 544 | .pages = pages, |
530 | }; | 545 | }; |
531 | struct rpc_message msg = { | 546 | struct rpc_message msg = { |
532 | .rpc_proc = &nfs_procedures[NFSPROC_READDIR], | 547 | .rpc_proc = &nfs_procedures[NFSPROC_READDIR], |
@@ -705,6 +720,8 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
705 | .unlink_setup = nfs_proc_unlink_setup, | 720 | .unlink_setup = nfs_proc_unlink_setup, |
706 | .unlink_done = nfs_proc_unlink_done, | 721 | .unlink_done = nfs_proc_unlink_done, |
707 | .rename = nfs_proc_rename, | 722 | .rename = nfs_proc_rename, |
723 | .rename_setup = nfs_proc_rename_setup, | ||
724 | .rename_done = nfs_proc_rename_done, | ||
708 | .link = nfs_proc_link, | 725 | .link = nfs_proc_link, |
709 | .symlink = nfs_proc_symlink, | 726 | .symlink = nfs_proc_symlink, |
710 | .mkdir = nfs_proc_mkdir, | 727 | .mkdir = nfs_proc_mkdir, |
diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 87adc2744246..e4b62c6f5a6e 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "internal.h" | 25 | #include "internal.h" |
26 | #include "iostat.h" | 26 | #include "iostat.h" |
27 | #include "fscache.h" | 27 | #include "fscache.h" |
28 | #include "pnfs.h" | ||
28 | 29 | ||
29 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE | 30 | #define NFSDBG_FACILITY NFSDBG_PAGECACHE |
30 | 31 | ||
@@ -46,7 +47,6 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) | |||
46 | memset(p, 0, sizeof(*p)); | 47 | memset(p, 0, sizeof(*p)); |
47 | INIT_LIST_HEAD(&p->pages); | 48 | INIT_LIST_HEAD(&p->pages); |
48 | p->npages = pagecount; | 49 | p->npages = pagecount; |
49 | p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
50 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 50 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
51 | p->pagevec = p->page_array; | 51 | p->pagevec = p->page_array; |
52 | else { | 52 | else { |
@@ -121,6 +121,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, | |||
121 | len = nfs_page_length(page); | 121 | len = nfs_page_length(page); |
122 | if (len == 0) | 122 | if (len == 0) |
123 | return nfs_return_empty_page(page); | 123 | return nfs_return_empty_page(page); |
124 | pnfs_update_layout(inode, ctx, IOMODE_READ); | ||
124 | new = nfs_create_request(ctx, inode, page, 0, len); | 125 | new = nfs_create_request(ctx, inode, page, 0, len); |
125 | if (IS_ERR(new)) { | 126 | if (IS_ERR(new)) { |
126 | unlock_page(page); | 127 | unlock_page(page); |
@@ -625,6 +626,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, | |||
625 | if (ret == 0) | 626 | if (ret == 0) |
626 | goto read_complete; /* all pages were read */ | 627 | goto read_complete; /* all pages were read */ |
627 | 628 | ||
629 | pnfs_update_layout(inode, desc.ctx, IOMODE_READ); | ||
628 | if (rsize < PAGE_CACHE_SIZE) | 630 | if (rsize < PAGE_CACHE_SIZE) |
629 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); | 631 | nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); |
630 | else | 632 | else |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index f4cbf0c306c6..3600ec700d58 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -100,6 +100,7 @@ enum { | |||
100 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 100 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
101 | Opt_lookupcache, | 101 | Opt_lookupcache, |
102 | Opt_fscache_uniq, | 102 | Opt_fscache_uniq, |
103 | Opt_local_lock, | ||
103 | 104 | ||
104 | /* Special mount options */ | 105 | /* Special mount options */ |
105 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 106 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
@@ -171,6 +172,7 @@ static const match_table_t nfs_mount_option_tokens = { | |||
171 | 172 | ||
172 | { Opt_lookupcache, "lookupcache=%s" }, | 173 | { Opt_lookupcache, "lookupcache=%s" }, |
173 | { Opt_fscache_uniq, "fsc=%s" }, | 174 | { Opt_fscache_uniq, "fsc=%s" }, |
175 | { Opt_local_lock, "local_lock=%s" }, | ||
174 | 176 | ||
175 | { Opt_err, NULL } | 177 | { Opt_err, NULL } |
176 | }; | 178 | }; |
@@ -236,6 +238,22 @@ static match_table_t nfs_lookupcache_tokens = { | |||
236 | { Opt_lookupcache_err, NULL } | 238 | { Opt_lookupcache_err, NULL } |
237 | }; | 239 | }; |
238 | 240 | ||
241 | enum { | ||
242 | Opt_local_lock_all, Opt_local_lock_flock, Opt_local_lock_posix, | ||
243 | Opt_local_lock_none, | ||
244 | |||
245 | Opt_local_lock_err | ||
246 | }; | ||
247 | |||
248 | static match_table_t nfs_local_lock_tokens = { | ||
249 | { Opt_local_lock_all, "all" }, | ||
250 | { Opt_local_lock_flock, "flock" }, | ||
251 | { Opt_local_lock_posix, "posix" }, | ||
252 | { Opt_local_lock_none, "none" }, | ||
253 | |||
254 | { Opt_local_lock_err, NULL } | ||
255 | }; | ||
256 | |||
239 | 257 | ||
240 | static void nfs_umount_begin(struct super_block *); | 258 | static void nfs_umount_begin(struct super_block *); |
241 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 259 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
@@ -622,6 +640,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
622 | const struct proc_nfs_info *nfs_infop; | 640 | const struct proc_nfs_info *nfs_infop; |
623 | struct nfs_client *clp = nfss->nfs_client; | 641 | struct nfs_client *clp = nfss->nfs_client; |
624 | u32 version = clp->rpc_ops->version; | 642 | u32 version = clp->rpc_ops->version; |
643 | int local_flock, local_fcntl; | ||
625 | 644 | ||
626 | seq_printf(m, ",vers=%u", version); | 645 | seq_printf(m, ",vers=%u", version); |
627 | seq_printf(m, ",rsize=%u", nfss->rsize); | 646 | seq_printf(m, ",rsize=%u", nfss->rsize); |
@@ -670,6 +689,18 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, | |||
670 | else | 689 | else |
671 | seq_printf(m, ",lookupcache=pos"); | 690 | seq_printf(m, ",lookupcache=pos"); |
672 | } | 691 | } |
692 | |||
693 | local_flock = nfss->flags & NFS_MOUNT_LOCAL_FLOCK; | ||
694 | local_fcntl = nfss->flags & NFS_MOUNT_LOCAL_FCNTL; | ||
695 | |||
696 | if (!local_flock && !local_fcntl) | ||
697 | seq_printf(m, ",local_lock=none"); | ||
698 | else if (local_flock && local_fcntl) | ||
699 | seq_printf(m, ",local_lock=all"); | ||
700 | else if (local_flock) | ||
701 | seq_printf(m, ",local_lock=flock"); | ||
702 | else | ||
703 | seq_printf(m, ",local_lock=posix"); | ||
673 | } | 704 | } |
674 | 705 | ||
675 | /* | 706 | /* |
@@ -1017,9 +1048,13 @@ static int nfs_parse_mount_options(char *raw, | |||
1017 | break; | 1048 | break; |
1018 | case Opt_lock: | 1049 | case Opt_lock: |
1019 | mnt->flags &= ~NFS_MOUNT_NONLM; | 1050 | mnt->flags &= ~NFS_MOUNT_NONLM; |
1051 | mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | | ||
1052 | NFS_MOUNT_LOCAL_FCNTL); | ||
1020 | break; | 1053 | break; |
1021 | case Opt_nolock: | 1054 | case Opt_nolock: |
1022 | mnt->flags |= NFS_MOUNT_NONLM; | 1055 | mnt->flags |= NFS_MOUNT_NONLM; |
1056 | mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | | ||
1057 | NFS_MOUNT_LOCAL_FCNTL); | ||
1023 | break; | 1058 | break; |
1024 | case Opt_v2: | 1059 | case Opt_v2: |
1025 | mnt->flags &= ~NFS_MOUNT_VER3; | 1060 | mnt->flags &= ~NFS_MOUNT_VER3; |
@@ -1420,6 +1455,34 @@ static int nfs_parse_mount_options(char *raw, | |||
1420 | mnt->fscache_uniq = string; | 1455 | mnt->fscache_uniq = string; |
1421 | mnt->options |= NFS_OPTION_FSCACHE; | 1456 | mnt->options |= NFS_OPTION_FSCACHE; |
1422 | break; | 1457 | break; |
1458 | case Opt_local_lock: | ||
1459 | string = match_strdup(args); | ||
1460 | if (string == NULL) | ||
1461 | goto out_nomem; | ||
1462 | token = match_token(string, nfs_local_lock_tokens, | ||
1463 | args); | ||
1464 | kfree(string); | ||
1465 | switch (token) { | ||
1466 | case Opt_local_lock_all: | ||
1467 | mnt->flags |= (NFS_MOUNT_LOCAL_FLOCK | | ||
1468 | NFS_MOUNT_LOCAL_FCNTL); | ||
1469 | break; | ||
1470 | case Opt_local_lock_flock: | ||
1471 | mnt->flags |= NFS_MOUNT_LOCAL_FLOCK; | ||
1472 | break; | ||
1473 | case Opt_local_lock_posix: | ||
1474 | mnt->flags |= NFS_MOUNT_LOCAL_FCNTL; | ||
1475 | break; | ||
1476 | case Opt_local_lock_none: | ||
1477 | mnt->flags &= ~(NFS_MOUNT_LOCAL_FLOCK | | ||
1478 | NFS_MOUNT_LOCAL_FCNTL); | ||
1479 | break; | ||
1480 | default: | ||
1481 | dfprintk(MOUNT, "NFS: invalid " | ||
1482 | "local_lock argument\n"); | ||
1483 | return 0; | ||
1484 | }; | ||
1485 | break; | ||
1423 | 1486 | ||
1424 | /* | 1487 | /* |
1425 | * Special options | 1488 | * Special options |
@@ -1825,6 +1888,12 @@ static int nfs_validate_mount_data(void *options, | |||
1825 | if (!args->nfs_server.hostname) | 1888 | if (!args->nfs_server.hostname) |
1826 | goto out_nomem; | 1889 | goto out_nomem; |
1827 | 1890 | ||
1891 | if (!(data->flags & NFS_MOUNT_NONLM)) | ||
1892 | args->flags &= ~(NFS_MOUNT_LOCAL_FLOCK| | ||
1893 | NFS_MOUNT_LOCAL_FCNTL); | ||
1894 | else | ||
1895 | args->flags |= (NFS_MOUNT_LOCAL_FLOCK| | ||
1896 | NFS_MOUNT_LOCAL_FCNTL); | ||
1828 | /* | 1897 | /* |
1829 | * The legacy version 6 binary mount data from userspace has a | 1898 | * The legacy version 6 binary mount data from userspace has a |
1830 | * field used only to transport selinux information into the | 1899 | * field used only to transport selinux information into the |
@@ -2441,7 +2510,8 @@ static void nfs4_fill_super(struct super_block *sb) | |||
2441 | 2510 | ||
2442 | static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) | 2511 | static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args) |
2443 | { | 2512 | { |
2444 | args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3); | 2513 | args->flags &= ~(NFS_MOUNT_NONLM|NFS_MOUNT_NOACL|NFS_MOUNT_VER3| |
2514 | NFS_MOUNT_LOCAL_FLOCK|NFS_MOUNT_LOCAL_FCNTL); | ||
2445 | } | 2515 | } |
2446 | 2516 | ||
2447 | static int nfs4_validate_text_mount_data(void *options, | 2517 | static int nfs4_validate_text_mount_data(void *options, |
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c index ad4d2e787b20..978aaeb8a093 100644 --- a/fs/nfs/sysctl.c +++ b/fs/nfs/sysctl.c | |||
@@ -32,6 +32,7 @@ static ctl_table nfs_cb_sysctls[] = { | |||
32 | .extra1 = (int *)&nfs_set_port_min, | 32 | .extra1 = (int *)&nfs_set_port_min, |
33 | .extra2 = (int *)&nfs_set_port_max, | 33 | .extra2 = (int *)&nfs_set_port_max, |
34 | }, | 34 | }, |
35 | #ifndef CONFIG_NFS_USE_NEW_IDMAPPER | ||
35 | { | 36 | { |
36 | .procname = "idmap_cache_timeout", | 37 | .procname = "idmap_cache_timeout", |
37 | .data = &nfs_idmap_cache_timeout, | 38 | .data = &nfs_idmap_cache_timeout, |
@@ -39,6 +40,7 @@ static ctl_table nfs_cb_sysctls[] = { | |||
39 | .mode = 0644, | 40 | .mode = 0644, |
40 | .proc_handler = proc_dointvec_jiffies, | 41 | .proc_handler = proc_dointvec_jiffies, |
41 | }, | 42 | }, |
43 | #endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ | ||
42 | #endif | 44 | #endif |
43 | { | 45 | { |
44 | .procname = "nfs_mountpoint_timeout", | 46 | .procname = "nfs_mountpoint_timeout", |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 2f84adaad427..9a16bad5d2ea 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -13,9 +13,12 @@ | |||
13 | #include <linux/nfs_fs.h> | 13 | #include <linux/nfs_fs.h> |
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/wait.h> | 15 | #include <linux/wait.h> |
16 | #include <linux/namei.h> | ||
16 | 17 | ||
17 | #include "internal.h" | 18 | #include "internal.h" |
18 | #include "nfs4_fs.h" | 19 | #include "nfs4_fs.h" |
20 | #include "iostat.h" | ||
21 | #include "delegation.h" | ||
19 | 22 | ||
20 | struct nfs_unlinkdata { | 23 | struct nfs_unlinkdata { |
21 | struct hlist_node list; | 24 | struct hlist_node list; |
@@ -244,7 +247,7 @@ void nfs_unblock_sillyrename(struct dentry *dentry) | |||
244 | * @dir: parent directory of dentry | 247 | * @dir: parent directory of dentry |
245 | * @dentry: dentry to unlink | 248 | * @dentry: dentry to unlink |
246 | */ | 249 | */ |
247 | int | 250 | static int |
248 | nfs_async_unlink(struct inode *dir, struct dentry *dentry) | 251 | nfs_async_unlink(struct inode *dir, struct dentry *dentry) |
249 | { | 252 | { |
250 | struct nfs_unlinkdata *data; | 253 | struct nfs_unlinkdata *data; |
@@ -259,7 +262,6 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry) | |||
259 | status = PTR_ERR(data->cred); | 262 | status = PTR_ERR(data->cred); |
260 | goto out_free; | 263 | goto out_free; |
261 | } | 264 | } |
262 | data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
263 | data->res.dir_attr = &data->dir_attr; | 265 | data->res.dir_attr = &data->dir_attr; |
264 | 266 | ||
265 | status = -EBUSY; | 267 | status = -EBUSY; |
@@ -303,3 +305,256 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode) | |||
303 | if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) | 305 | if (data != NULL && (NFS_STALE(inode) || !nfs_call_unlink(dentry, data))) |
304 | nfs_free_unlinkdata(data); | 306 | nfs_free_unlinkdata(data); |
305 | } | 307 | } |
308 | |||
309 | /* Cancel a queued async unlink. Called when a sillyrename run fails. */ | ||
310 | static void | ||
311 | nfs_cancel_async_unlink(struct dentry *dentry) | ||
312 | { | ||
313 | spin_lock(&dentry->d_lock); | ||
314 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { | ||
315 | struct nfs_unlinkdata *data = dentry->d_fsdata; | ||
316 | |||
317 | dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; | ||
318 | spin_unlock(&dentry->d_lock); | ||
319 | nfs_free_unlinkdata(data); | ||
320 | return; | ||
321 | } | ||
322 | spin_unlock(&dentry->d_lock); | ||
323 | } | ||
324 | |||
325 | struct nfs_renamedata { | ||
326 | struct nfs_renameargs args; | ||
327 | struct nfs_renameres res; | ||
328 | struct rpc_cred *cred; | ||
329 | struct inode *old_dir; | ||
330 | struct dentry *old_dentry; | ||
331 | struct nfs_fattr old_fattr; | ||
332 | struct inode *new_dir; | ||
333 | struct dentry *new_dentry; | ||
334 | struct nfs_fattr new_fattr; | ||
335 | }; | ||
336 | |||
337 | /** | ||
338 | * nfs_async_rename_done - Sillyrename post-processing | ||
339 | * @task: rpc_task of the sillyrename | ||
340 | * @calldata: nfs_renamedata for the sillyrename | ||
341 | * | ||
342 | * Do the directory attribute updates and the d_move | ||
343 | */ | ||
344 | static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | ||
345 | { | ||
346 | struct nfs_renamedata *data = calldata; | ||
347 | struct inode *old_dir = data->old_dir; | ||
348 | struct inode *new_dir = data->new_dir; | ||
349 | |||
350 | if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { | ||
351 | nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); | ||
352 | return; | ||
353 | } | ||
354 | |||
355 | if (task->tk_status != 0) { | ||
356 | nfs_cancel_async_unlink(data->old_dentry); | ||
357 | return; | ||
358 | } | ||
359 | |||
360 | nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); | ||
361 | d_move(data->old_dentry, data->new_dentry); | ||
362 | } | ||
363 | |||
364 | /** | ||
365 | * nfs_async_rename_release - Release the sillyrename data. | ||
366 | * @calldata: the struct nfs_renamedata to be released | ||
367 | */ | ||
368 | static void nfs_async_rename_release(void *calldata) | ||
369 | { | ||
370 | struct nfs_renamedata *data = calldata; | ||
371 | struct super_block *sb = data->old_dir->i_sb; | ||
372 | |||
373 | if (data->old_dentry->d_inode) | ||
374 | nfs_mark_for_revalidate(data->old_dentry->d_inode); | ||
375 | |||
376 | dput(data->old_dentry); | ||
377 | dput(data->new_dentry); | ||
378 | iput(data->old_dir); | ||
379 | iput(data->new_dir); | ||
380 | nfs_sb_deactive(sb); | ||
381 | put_rpccred(data->cred); | ||
382 | kfree(data); | ||
383 | } | ||
384 | |||
385 | #if defined(CONFIG_NFS_V4_1) | ||
386 | static void nfs_rename_prepare(struct rpc_task *task, void *calldata) | ||
387 | { | ||
388 | struct nfs_renamedata *data = calldata; | ||
389 | struct nfs_server *server = NFS_SERVER(data->old_dir); | ||
390 | |||
391 | if (nfs4_setup_sequence(server, &data->args.seq_args, | ||
392 | &data->res.seq_res, 1, task)) | ||
393 | return; | ||
394 | rpc_call_start(task); | ||
395 | } | ||
396 | #endif /* CONFIG_NFS_V4_1 */ | ||
397 | |||
398 | static const struct rpc_call_ops nfs_rename_ops = { | ||
399 | .rpc_call_done = nfs_async_rename_done, | ||
400 | .rpc_release = nfs_async_rename_release, | ||
401 | #if defined(CONFIG_NFS_V4_1) | ||
402 | .rpc_call_prepare = nfs_rename_prepare, | ||
403 | #endif /* CONFIG_NFS_V4_1 */ | ||
404 | }; | ||
405 | |||
406 | /** | ||
407 | * nfs_async_rename - perform an asynchronous rename operation | ||
408 | * @old_dir: directory that currently holds the dentry to be renamed | ||
409 | * @new_dir: target directory for the rename | ||
410 | * @old_dentry: original dentry to be renamed | ||
411 | * @new_dentry: dentry to which the old_dentry should be renamed | ||
412 | * | ||
413 | * It's expected that valid references to the dentries and inodes are held | ||
414 | */ | ||
415 | static struct rpc_task * | ||
416 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | ||
417 | struct dentry *old_dentry, struct dentry *new_dentry) | ||
418 | { | ||
419 | struct nfs_renamedata *data; | ||
420 | struct rpc_message msg = { }; | ||
421 | struct rpc_task_setup task_setup_data = { | ||
422 | .rpc_message = &msg, | ||
423 | .callback_ops = &nfs_rename_ops, | ||
424 | .workqueue = nfsiod_workqueue, | ||
425 | .rpc_client = NFS_CLIENT(old_dir), | ||
426 | .flags = RPC_TASK_ASYNC, | ||
427 | }; | ||
428 | |||
429 | data = kzalloc(sizeof(*data), GFP_KERNEL); | ||
430 | if (data == NULL) | ||
431 | return ERR_PTR(-ENOMEM); | ||
432 | task_setup_data.callback_data = data, | ||
433 | |||
434 | data->cred = rpc_lookup_cred(); | ||
435 | if (IS_ERR(data->cred)) { | ||
436 | struct rpc_task *task = ERR_CAST(data->cred); | ||
437 | kfree(data); | ||
438 | return task; | ||
439 | } | ||
440 | |||
441 | msg.rpc_argp = &data->args; | ||
442 | msg.rpc_resp = &data->res; | ||
443 | msg.rpc_cred = data->cred; | ||
444 | |||
445 | /* set up nfs_renamedata */ | ||
446 | data->old_dir = old_dir; | ||
447 | atomic_inc(&old_dir->i_count); | ||
448 | data->new_dir = new_dir; | ||
449 | atomic_inc(&new_dir->i_count); | ||
450 | data->old_dentry = dget(old_dentry); | ||
451 | data->new_dentry = dget(new_dentry); | ||
452 | nfs_fattr_init(&data->old_fattr); | ||
453 | nfs_fattr_init(&data->new_fattr); | ||
454 | |||
455 | /* set up nfs_renameargs */ | ||
456 | data->args.old_dir = NFS_FH(old_dir); | ||
457 | data->args.old_name = &old_dentry->d_name; | ||
458 | data->args.new_dir = NFS_FH(new_dir); | ||
459 | data->args.new_name = &new_dentry->d_name; | ||
460 | |||
461 | /* set up nfs_renameres */ | ||
462 | data->res.old_fattr = &data->old_fattr; | ||
463 | data->res.new_fattr = &data->new_fattr; | ||
464 | |||
465 | nfs_sb_active(old_dir->i_sb); | ||
466 | |||
467 | NFS_PROTO(data->old_dir)->rename_setup(&msg, old_dir); | ||
468 | |||
469 | return rpc_run_task(&task_setup_data); | ||
470 | } | ||
471 | |||
472 | /** | ||
473 | * nfs_sillyrename - Perform a silly-rename of a dentry | ||
474 | * @dir: inode of directory that contains dentry | ||
475 | * @dentry: dentry to be sillyrenamed | ||
476 | * | ||
477 | * NFSv2/3 is stateless and the server doesn't know when the client is | ||
478 | * holding a file open. To prevent application problems when a file is | ||
479 | * unlinked while it's still open, the client performs a "silly-rename". | ||
480 | * That is, it renames the file to a hidden file in the same directory, | ||
481 | * and only performs the unlink once the last reference to it is put. | ||
482 | * | ||
483 | * The final cleanup is done during dentry_iput. | ||
484 | */ | ||
485 | int | ||
486 | nfs_sillyrename(struct inode *dir, struct dentry *dentry) | ||
487 | { | ||
488 | static unsigned int sillycounter; | ||
489 | const int fileidsize = sizeof(NFS_FILEID(dentry->d_inode))*2; | ||
490 | const int countersize = sizeof(sillycounter)*2; | ||
491 | const int slen = sizeof(".nfs")+fileidsize+countersize-1; | ||
492 | char silly[slen+1]; | ||
493 | struct dentry *sdentry; | ||
494 | struct rpc_task *task; | ||
495 | int error = -EIO; | ||
496 | |||
497 | dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", | ||
498 | dentry->d_parent->d_name.name, dentry->d_name.name, | ||
499 | atomic_read(&dentry->d_count)); | ||
500 | nfs_inc_stats(dir, NFSIOS_SILLYRENAME); | ||
501 | |||
502 | /* | ||
503 | * We don't allow a dentry to be silly-renamed twice. | ||
504 | */ | ||
505 | error = -EBUSY; | ||
506 | if (dentry->d_flags & DCACHE_NFSFS_RENAMED) | ||
507 | goto out; | ||
508 | |||
509 | sprintf(silly, ".nfs%*.*Lx", | ||
510 | fileidsize, fileidsize, | ||
511 | (unsigned long long)NFS_FILEID(dentry->d_inode)); | ||
512 | |||
513 | /* Return delegation in anticipation of the rename */ | ||
514 | nfs_inode_return_delegation(dentry->d_inode); | ||
515 | |||
516 | sdentry = NULL; | ||
517 | do { | ||
518 | char *suffix = silly + slen - countersize; | ||
519 | |||
520 | dput(sdentry); | ||
521 | sillycounter++; | ||
522 | sprintf(suffix, "%*.*x", countersize, countersize, sillycounter); | ||
523 | |||
524 | dfprintk(VFS, "NFS: trying to rename %s to %s\n", | ||
525 | dentry->d_name.name, silly); | ||
526 | |||
527 | sdentry = lookup_one_len(silly, dentry->d_parent, slen); | ||
528 | /* | ||
529 | * N.B. Better to return EBUSY here ... it could be | ||
530 | * dangerous to delete the file while it's in use. | ||
531 | */ | ||
532 | if (IS_ERR(sdentry)) | ||
533 | goto out; | ||
534 | } while (sdentry->d_inode != NULL); /* need negative lookup */ | ||
535 | |||
536 | /* queue unlink first. Can't do this from rpc_release as it | ||
537 | * has to allocate memory | ||
538 | */ | ||
539 | error = nfs_async_unlink(dir, dentry); | ||
540 | if (error) | ||
541 | goto out_dput; | ||
542 | |||
543 | /* run the rename task, undo unlink if it fails */ | ||
544 | task = nfs_async_rename(dir, dir, dentry, sdentry); | ||
545 | if (IS_ERR(task)) { | ||
546 | error = -EBUSY; | ||
547 | nfs_cancel_async_unlink(dentry); | ||
548 | goto out_dput; | ||
549 | } | ||
550 | |||
551 | /* wait for the RPC task to complete, unless a SIGKILL intervenes */ | ||
552 | error = rpc_wait_for_completion_task(task); | ||
553 | if (error == 0) | ||
554 | error = task->tk_status; | ||
555 | rpc_put_task(task); | ||
556 | out_dput: | ||
557 | dput(sdentry); | ||
558 | out: | ||
559 | return error; | ||
560 | } | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 874972d9427c..4c14c17a5276 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
@@ -55,7 +55,6 @@ struct nfs_write_data *nfs_commitdata_alloc(void) | |||
55 | if (p) { | 55 | if (p) { |
56 | memset(p, 0, sizeof(*p)); | 56 | memset(p, 0, sizeof(*p)); |
57 | INIT_LIST_HEAD(&p->pages); | 57 | INIT_LIST_HEAD(&p->pages); |
58 | p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
59 | } | 58 | } |
60 | return p; | 59 | return p; |
61 | } | 60 | } |
@@ -75,7 +74,6 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) | |||
75 | memset(p, 0, sizeof(*p)); | 74 | memset(p, 0, sizeof(*p)); |
76 | INIT_LIST_HEAD(&p->pages); | 75 | INIT_LIST_HEAD(&p->pages); |
77 | p->npages = pagecount; | 76 | p->npages = pagecount; |
78 | p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; | ||
79 | if (pagecount <= ARRAY_SIZE(p->page_array)) | 77 | if (pagecount <= ARRAY_SIZE(p->page_array)) |
80 | p->pagevec = p->page_array; | 78 | p->pagevec = p->page_array; |
81 | else { | 79 | else { |
@@ -292,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st | |||
292 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); | 290 | nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); |
293 | 291 | ||
294 | nfs_pageio_cond_complete(pgio, page->index); | 292 | nfs_pageio_cond_complete(pgio, page->index); |
295 | ret = nfs_page_async_flush(pgio, page, | 293 | ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); |
296 | wbc->sync_mode == WB_SYNC_NONE || | ||
297 | wbc->nonblocking != 0); | ||
298 | if (ret == -EAGAIN) { | 294 | if (ret == -EAGAIN) { |
299 | redirty_page_for_writepage(wbc, page); | 295 | redirty_page_for_writepage(wbc, page); |
300 | ret = 0; | 296 | ret = 0; |
@@ -1433,15 +1429,17 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr | |||
1433 | int flags = FLUSH_SYNC; | 1429 | int flags = FLUSH_SYNC; |
1434 | int ret = 0; | 1430 | int ret = 0; |
1435 | 1431 | ||
1436 | /* Don't commit yet if this is a non-blocking flush and there are | 1432 | if (wbc->sync_mode == WB_SYNC_NONE) { |
1437 | * lots of outstanding writes for this mapping. | 1433 | /* Don't commit yet if this is a non-blocking flush and there |
1438 | */ | 1434 | * are a lot of outstanding writes for this mapping. |
1439 | if (wbc->sync_mode == WB_SYNC_NONE && | 1435 | */ |
1440 | nfsi->ncommit <= (nfsi->npages >> 1)) | 1436 | if (nfsi->ncommit <= (nfsi->npages >> 1)) |
1441 | goto out_mark_dirty; | 1437 | goto out_mark_dirty; |
1442 | 1438 | ||
1443 | if (wbc->nonblocking || wbc->for_background) | 1439 | /* don't wait for the COMMIT response */ |
1444 | flags = 0; | 1440 | flags = 0; |
1441 | } | ||
1442 | |||
1445 | ret = nfs_commit_inode(inode, flags); | 1443 | ret = nfs_commit_inode(inode, flags); |
1446 | if (ret >= 0) { | 1444 | if (ret >= 0) { |
1447 | if (wbc->sync_mode == WB_SYNC_NONE) { | 1445 | if (wbc->sync_mode == WB_SYNC_NONE) { |
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index 4264377552e2..18b3e8975fe0 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig | |||
@@ -28,6 +28,18 @@ config NFSD | |||
28 | 28 | ||
29 | If unsure, say N. | 29 | If unsure, say N. |
30 | 30 | ||
31 | config NFSD_DEPRECATED | ||
32 | bool "Include support for deprecated syscall interface to NFSD" | ||
33 | depends on NFSD | ||
34 | default y | ||
35 | help | ||
36 | The syscall interface to nfsd was obsoleted in 2.6.0 by a new | ||
37 | filesystem based interface. The old interface is due for removal | ||
38 | in 2.6.40. If you wish to remove the interface before then | ||
39 | say N. | ||
40 | |||
41 | In unsure, say Y. | ||
42 | |||
31 | config NFSD_V2_ACL | 43 | config NFSD_V2_ACL |
32 | bool | 44 | bool |
33 | depends on NFSD | 45 | depends on NFSD |
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index c2a4f71d87dd..c0fcb7ab7f6d 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c | |||
@@ -28,9 +28,6 @@ | |||
28 | typedef struct auth_domain svc_client; | 28 | typedef struct auth_domain svc_client; |
29 | typedef struct svc_export svc_export; | 29 | typedef struct svc_export svc_export; |
30 | 30 | ||
31 | static void exp_do_unexport(svc_export *unexp); | ||
32 | static int exp_verify_string(char *cp, int max); | ||
33 | |||
34 | /* | 31 | /* |
35 | * We have two caches. | 32 | * We have two caches. |
36 | * One maps client+vfsmnt+dentry to export options - the export map | 33 | * One maps client+vfsmnt+dentry to export options - the export map |
@@ -802,6 +799,7 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) | |||
802 | return ek; | 799 | return ek; |
803 | } | 800 | } |
804 | 801 | ||
802 | #ifdef CONFIG_NFSD_DEPRECATED | ||
805 | static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, | 803 | static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, |
806 | struct svc_export *exp) | 804 | struct svc_export *exp) |
807 | { | 805 | { |
@@ -852,6 +850,7 @@ exp_get_fsid_key(svc_client *clp, int fsid) | |||
852 | 850 | ||
853 | return exp_find_key(clp, FSID_NUM, fsidv, NULL); | 851 | return exp_find_key(clp, FSID_NUM, fsidv, NULL); |
854 | } | 852 | } |
853 | #endif | ||
855 | 854 | ||
856 | static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, | 855 | static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, |
857 | struct cache_req *reqp) | 856 | struct cache_req *reqp) |
@@ -893,6 +892,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) | |||
893 | return exp; | 892 | return exp; |
894 | } | 893 | } |
895 | 894 | ||
895 | #ifdef CONFIG_NFSD_DEPRECATED | ||
896 | /* | 896 | /* |
897 | * Hashtable locking. Write locks are placed only by user processes | 897 | * Hashtable locking. Write locks are placed only by user processes |
898 | * wanting to modify export information. | 898 | * wanting to modify export information. |
@@ -925,6 +925,19 @@ exp_writeunlock(void) | |||
925 | { | 925 | { |
926 | up_write(&hash_sem); | 926 | up_write(&hash_sem); |
927 | } | 927 | } |
928 | #else | ||
929 | |||
930 | /* hash_sem not needed once deprecated interface is removed */ | ||
931 | void exp_readlock(void) {} | ||
932 | static inline void exp_writelock(void){} | ||
933 | void exp_readunlock(void) {} | ||
934 | static inline void exp_writeunlock(void){} | ||
935 | |||
936 | #endif | ||
937 | |||
938 | #ifdef CONFIG_NFSD_DEPRECATED | ||
939 | static void exp_do_unexport(svc_export *unexp); | ||
940 | static int exp_verify_string(char *cp, int max); | ||
928 | 941 | ||
929 | static void exp_fsid_unhash(struct svc_export *exp) | 942 | static void exp_fsid_unhash(struct svc_export *exp) |
930 | { | 943 | { |
@@ -935,10 +948,9 @@ static void exp_fsid_unhash(struct svc_export *exp) | |||
935 | 948 | ||
936 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); | 949 | ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); |
937 | if (!IS_ERR(ek)) { | 950 | if (!IS_ERR(ek)) { |
938 | ek->h.expiry_time = get_seconds()-1; | 951 | sunrpc_invalidate(&ek->h, &svc_expkey_cache); |
939 | cache_put(&ek->h, &svc_expkey_cache); | 952 | cache_put(&ek->h, &svc_expkey_cache); |
940 | } | 953 | } |
941 | svc_expkey_cache.nextcheck = get_seconds(); | ||
942 | } | 954 | } |
943 | 955 | ||
944 | static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) | 956 | static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) |
@@ -973,10 +985,9 @@ static void exp_unhash(struct svc_export *exp) | |||
973 | 985 | ||
974 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); | 986 | ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); |
975 | if (!IS_ERR(ek)) { | 987 | if (!IS_ERR(ek)) { |
976 | ek->h.expiry_time = get_seconds()-1; | 988 | sunrpc_invalidate(&ek->h, &svc_expkey_cache); |
977 | cache_put(&ek->h, &svc_expkey_cache); | 989 | cache_put(&ek->h, &svc_expkey_cache); |
978 | } | 990 | } |
979 | svc_expkey_cache.nextcheck = get_seconds(); | ||
980 | } | 991 | } |
981 | 992 | ||
982 | /* | 993 | /* |
@@ -1097,8 +1108,7 @@ out: | |||
1097 | static void | 1108 | static void |
1098 | exp_do_unexport(svc_export *unexp) | 1109 | exp_do_unexport(svc_export *unexp) |
1099 | { | 1110 | { |
1100 | unexp->h.expiry_time = get_seconds()-1; | 1111 | sunrpc_invalidate(&unexp->h, &svc_export_cache); |
1101 | svc_export_cache.nextcheck = get_seconds(); | ||
1102 | exp_unhash(unexp); | 1112 | exp_unhash(unexp); |
1103 | exp_fsid_unhash(unexp); | 1113 | exp_fsid_unhash(unexp); |
1104 | } | 1114 | } |
@@ -1150,6 +1160,7 @@ out_unlock: | |||
1150 | exp_writeunlock(); | 1160 | exp_writeunlock(); |
1151 | return err; | 1161 | return err; |
1152 | } | 1162 | } |
1163 | #endif /* CONFIG_NFSD_DEPRECATED */ | ||
1153 | 1164 | ||
1154 | /* | 1165 | /* |
1155 | * Obtain the root fh on behalf of a client. | 1166 | * Obtain the root fh on behalf of a client. |
@@ -1459,25 +1470,43 @@ static void show_secinfo_flags(struct seq_file *m, int flags) | |||
1459 | show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); | 1470 | show_expflags(m, flags, NFSEXP_SECINFO_FLAGS); |
1460 | } | 1471 | } |
1461 | 1472 | ||
1473 | static bool secinfo_flags_equal(int f, int g) | ||
1474 | { | ||
1475 | f &= NFSEXP_SECINFO_FLAGS; | ||
1476 | g &= NFSEXP_SECINFO_FLAGS; | ||
1477 | return f == g; | ||
1478 | } | ||
1479 | |||
1480 | static int show_secinfo_run(struct seq_file *m, struct exp_flavor_info **fp, struct exp_flavor_info *end) | ||
1481 | { | ||
1482 | int flags; | ||
1483 | |||
1484 | flags = (*fp)->flags; | ||
1485 | seq_printf(m, ",sec=%d", (*fp)->pseudoflavor); | ||
1486 | (*fp)++; | ||
1487 | while (*fp != end && secinfo_flags_equal(flags, (*fp)->flags)) { | ||
1488 | seq_printf(m, ":%d", (*fp)->pseudoflavor); | ||
1489 | (*fp)++; | ||
1490 | } | ||
1491 | return flags; | ||
1492 | } | ||
1493 | |||
1462 | static void show_secinfo(struct seq_file *m, struct svc_export *exp) | 1494 | static void show_secinfo(struct seq_file *m, struct svc_export *exp) |
1463 | { | 1495 | { |
1464 | struct exp_flavor_info *f; | 1496 | struct exp_flavor_info *f; |
1465 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; | 1497 | struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors; |
1466 | int lastflags = 0, first = 0; | 1498 | int flags; |
1467 | 1499 | ||
1468 | if (exp->ex_nflavors == 0) | 1500 | if (exp->ex_nflavors == 0) |
1469 | return; | 1501 | return; |
1470 | for (f = exp->ex_flavors; f < end; f++) { | 1502 | f = exp->ex_flavors; |
1471 | if (first || f->flags != lastflags) { | 1503 | flags = show_secinfo_run(m, &f, end); |
1472 | if (!first) | 1504 | if (!secinfo_flags_equal(flags, exp->ex_flags)) |
1473 | show_secinfo_flags(m, lastflags); | 1505 | show_secinfo_flags(m, flags); |
1474 | seq_printf(m, ",sec=%d", f->pseudoflavor); | 1506 | while (f != end) { |
1475 | lastflags = f->flags; | 1507 | flags = show_secinfo_run(m, &f, end); |
1476 | } else { | 1508 | show_secinfo_flags(m, flags); |
1477 | seq_printf(m, ":%d", f->pseudoflavor); | ||
1478 | } | ||
1479 | } | 1509 | } |
1480 | show_secinfo_flags(m, lastflags); | ||
1481 | } | 1510 | } |
1482 | 1511 | ||
1483 | static void exp_flags(struct seq_file *m, int flag, int fsid, | 1512 | static void exp_flags(struct seq_file *m, int flag, int fsid, |
@@ -1532,6 +1561,7 @@ const struct seq_operations nfs_exports_op = { | |||
1532 | .show = e_show, | 1561 | .show = e_show, |
1533 | }; | 1562 | }; |
1534 | 1563 | ||
1564 | #ifdef CONFIG_NFSD_DEPRECATED | ||
1535 | /* | 1565 | /* |
1536 | * Add or modify a client. | 1566 | * Add or modify a client. |
1537 | * Change requests may involve the list of host addresses. The list of | 1567 | * Change requests may involve the list of host addresses. The list of |
@@ -1563,7 +1593,7 @@ exp_addclient(struct nfsctl_client *ncp) | |||
1563 | /* Insert client into hashtable. */ | 1593 | /* Insert client into hashtable. */ |
1564 | for (i = 0; i < ncp->cl_naddr; i++) { | 1594 | for (i = 0; i < ncp->cl_naddr; i++) { |
1565 | ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); | 1595 | ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); |
1566 | auth_unix_add_addr(&addr6, dom); | 1596 | auth_unix_add_addr(&init_net, &addr6, dom); |
1567 | } | 1597 | } |
1568 | auth_unix_forget_old(dom); | 1598 | auth_unix_forget_old(dom); |
1569 | auth_domain_put(dom); | 1599 | auth_domain_put(dom); |
@@ -1621,6 +1651,7 @@ exp_verify_string(char *cp, int max) | |||
1621 | printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); | 1651 | printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); |
1622 | return 0; | 1652 | return 0; |
1623 | } | 1653 | } |
1654 | #endif /* CONFIG_NFSD_DEPRECATED */ | ||
1624 | 1655 | ||
1625 | /* | 1656 | /* |
1626 | * Initialize the exports module. | 1657 | * Initialize the exports module. |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 988cbb3a19b6..143da2eecd7b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
@@ -41,7 +41,6 @@ | |||
41 | 41 | ||
42 | #define NFSPROC4_CB_NULL 0 | 42 | #define NFSPROC4_CB_NULL 0 |
43 | #define NFSPROC4_CB_COMPOUND 1 | 43 | #define NFSPROC4_CB_COMPOUND 1 |
44 | #define NFS4_STATEID_SIZE 16 | ||
45 | 44 | ||
46 | /* Index of predefined Linux callback client operations */ | 45 | /* Index of predefined Linux callback client operations */ |
47 | 46 | ||
@@ -248,10 +247,11 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_delegation *dp, | |||
248 | } | 247 | } |
249 | 248 | ||
250 | static void | 249 | static void |
251 | encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, | 250 | encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, |
252 | struct nfs4_cb_compound_hdr *hdr) | 251 | struct nfs4_cb_compound_hdr *hdr) |
253 | { | 252 | { |
254 | __be32 *p; | 253 | __be32 *p; |
254 | struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; | ||
255 | 255 | ||
256 | if (hdr->minorversion == 0) | 256 | if (hdr->minorversion == 0) |
257 | return; | 257 | return; |
@@ -259,8 +259,8 @@ encode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *args, | |||
259 | RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); | 259 | RESERVE_SPACE(1 + NFS4_MAX_SESSIONID_LEN + 20); |
260 | 260 | ||
261 | WRITE32(OP_CB_SEQUENCE); | 261 | WRITE32(OP_CB_SEQUENCE); |
262 | WRITEMEM(args->cbs_clp->cl_sessionid.data, NFS4_MAX_SESSIONID_LEN); | 262 | WRITEMEM(ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN); |
263 | WRITE32(args->cbs_clp->cl_cb_seq_nr); | 263 | WRITE32(ses->se_cb_seq_nr); |
264 | WRITE32(0); /* slotid, always 0 */ | 264 | WRITE32(0); /* slotid, always 0 */ |
265 | WRITE32(0); /* highest slotid always 0 */ | 265 | WRITE32(0); /* highest slotid always 0 */ |
266 | WRITE32(0); /* cachethis always 0 */ | 266 | WRITE32(0); /* cachethis always 0 */ |
@@ -280,18 +280,18 @@ nfs4_xdr_enc_cb_null(struct rpc_rqst *req, __be32 *p) | |||
280 | 280 | ||
281 | static int | 281 | static int |
282 | nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, | 282 | nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, __be32 *p, |
283 | struct nfs4_rpc_args *rpc_args) | 283 | struct nfsd4_callback *cb) |
284 | { | 284 | { |
285 | struct xdr_stream xdr; | 285 | struct xdr_stream xdr; |
286 | struct nfs4_delegation *args = rpc_args->args_op; | 286 | struct nfs4_delegation *args = cb->cb_op; |
287 | struct nfs4_cb_compound_hdr hdr = { | 287 | struct nfs4_cb_compound_hdr hdr = { |
288 | .ident = args->dl_ident, | 288 | .ident = cb->cb_clp->cl_cb_ident, |
289 | .minorversion = rpc_args->args_seq.cbs_minorversion, | 289 | .minorversion = cb->cb_minorversion, |
290 | }; | 290 | }; |
291 | 291 | ||
292 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); | 292 | xdr_init_encode(&xdr, &req->rq_snd_buf, p); |
293 | encode_cb_compound_hdr(&xdr, &hdr); | 293 | encode_cb_compound_hdr(&xdr, &hdr); |
294 | encode_cb_sequence(&xdr, &rpc_args->args_seq, &hdr); | 294 | encode_cb_sequence(&xdr, cb, &hdr); |
295 | encode_cb_recall(&xdr, args, &hdr); | 295 | encode_cb_recall(&xdr, args, &hdr); |
296 | encode_cb_nops(&hdr); | 296 | encode_cb_nops(&hdr); |
297 | return 0; | 297 | return 0; |
@@ -339,15 +339,16 @@ decode_cb_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) | |||
339 | * with a single slot. | 339 | * with a single slot. |
340 | */ | 340 | */ |
341 | static int | 341 | static int |
342 | decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, | 342 | decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_callback *cb, |
343 | struct rpc_rqst *rqstp) | 343 | struct rpc_rqst *rqstp) |
344 | { | 344 | { |
345 | struct nfsd4_session *ses = cb->cb_clp->cl_cb_session; | ||
345 | struct nfs4_sessionid id; | 346 | struct nfs4_sessionid id; |
346 | int status; | 347 | int status; |
347 | u32 dummy; | 348 | u32 dummy; |
348 | __be32 *p; | 349 | __be32 *p; |
349 | 350 | ||
350 | if (res->cbs_minorversion == 0) | 351 | if (cb->cb_minorversion == 0) |
351 | return 0; | 352 | return 0; |
352 | 353 | ||
353 | status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); | 354 | status = decode_cb_op_hdr(xdr, OP_CB_SEQUENCE); |
@@ -363,13 +364,12 @@ decode_cb_sequence(struct xdr_stream *xdr, struct nfsd4_cb_sequence *res, | |||
363 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); | 364 | READ_BUF(NFS4_MAX_SESSIONID_LEN + 16); |
364 | memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); | 365 | memcpy(id.data, p, NFS4_MAX_SESSIONID_LEN); |
365 | p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); | 366 | p += XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN); |
366 | if (memcmp(id.data, res->cbs_clp->cl_sessionid.data, | 367 | if (memcmp(id.data, ses->se_sessionid.data, NFS4_MAX_SESSIONID_LEN)) { |
367 | NFS4_MAX_SESSIONID_LEN)) { | ||
368 | dprintk("%s Invalid session id\n", __func__); | 368 | dprintk("%s Invalid session id\n", __func__); |
369 | goto out; | 369 | goto out; |
370 | } | 370 | } |
371 | READ32(dummy); | 371 | READ32(dummy); |
372 | if (dummy != res->cbs_clp->cl_cb_seq_nr) { | 372 | if (dummy != ses->se_cb_seq_nr) { |
373 | dprintk("%s Invalid sequence number\n", __func__); | 373 | dprintk("%s Invalid sequence number\n", __func__); |
374 | goto out; | 374 | goto out; |
375 | } | 375 | } |
@@ -393,7 +393,7 @@ nfs4_xdr_dec_cb_null(struct rpc_rqst *req, __be32 *p) | |||
393 | 393 | ||
394 | static int | 394 | static int |
395 | nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, | 395 | nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, |
396 | struct nfsd4_cb_sequence *seq) | 396 | struct nfsd4_callback *cb) |
397 | { | 397 | { |
398 | struct xdr_stream xdr; | 398 | struct xdr_stream xdr; |
399 | struct nfs4_cb_compound_hdr hdr; | 399 | struct nfs4_cb_compound_hdr hdr; |
@@ -403,8 +403,8 @@ nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, __be32 *p, | |||
403 | status = decode_cb_compound_hdr(&xdr, &hdr); | 403 | status = decode_cb_compound_hdr(&xdr, &hdr); |
404 | if (status) | 404 | if (status) |
405 | goto out; | 405 | goto out; |
406 | if (seq) { | 406 | if (cb) { |
407 | status = decode_cb_sequence(&xdr, seq, rqstp); | 407 | status = decode_cb_sequence(&xdr, cb, rqstp); |
408 | if (status) | 408 | if (status) |
409 | goto out; | 409 | goto out; |
410 | } | 410 | } |
@@ -473,30 +473,34 @@ static int max_cb_time(void) | |||
473 | /* Reference counting, callback cleanup, etc., all look racy as heck. | 473 | /* Reference counting, callback cleanup, etc., all look racy as heck. |
474 | * And why is cl_cb_set an atomic? */ | 474 | * And why is cl_cb_set an atomic? */ |
475 | 475 | ||
476 | int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) | 476 | int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn) |
477 | { | 477 | { |
478 | struct rpc_timeout timeparms = { | 478 | struct rpc_timeout timeparms = { |
479 | .to_initval = max_cb_time(), | 479 | .to_initval = max_cb_time(), |
480 | .to_retries = 0, | 480 | .to_retries = 0, |
481 | }; | 481 | }; |
482 | struct rpc_create_args args = { | 482 | struct rpc_create_args args = { |
483 | .protocol = XPRT_TRANSPORT_TCP, | 483 | .net = &init_net, |
484 | .address = (struct sockaddr *) &cb->cb_addr, | 484 | .address = (struct sockaddr *) &conn->cb_addr, |
485 | .addrsize = cb->cb_addrlen, | 485 | .addrsize = conn->cb_addrlen, |
486 | .timeout = &timeparms, | 486 | .timeout = &timeparms, |
487 | .program = &cb_program, | 487 | .program = &cb_program, |
488 | .prognumber = cb->cb_prog, | ||
489 | .version = 0, | 488 | .version = 0, |
490 | .authflavor = clp->cl_flavor, | 489 | .authflavor = clp->cl_flavor, |
491 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 490 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
492 | .client_name = clp->cl_principal, | ||
493 | }; | 491 | }; |
494 | struct rpc_clnt *client; | 492 | struct rpc_clnt *client; |
495 | 493 | ||
496 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) | 494 | if (clp->cl_minorversion == 0) { |
497 | return -EINVAL; | 495 | if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5)) |
498 | if (cb->cb_minorversion) { | 496 | return -EINVAL; |
499 | args.bc_xprt = cb->cb_xprt; | 497 | args.client_name = clp->cl_principal; |
498 | args.prognumber = conn->cb_prog, | ||
499 | args.protocol = XPRT_TRANSPORT_TCP; | ||
500 | clp->cl_cb_ident = conn->cb_ident; | ||
501 | } else { | ||
502 | args.bc_xprt = conn->cb_xprt; | ||
503 | args.prognumber = clp->cl_cb_session->se_cb_prog; | ||
500 | args.protocol = XPRT_TRANSPORT_BC_TCP; | 504 | args.protocol = XPRT_TRANSPORT_BC_TCP; |
501 | } | 505 | } |
502 | /* Create RPC client */ | 506 | /* Create RPC client */ |
@@ -506,7 +510,7 @@ int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *cb) | |||
506 | PTR_ERR(client)); | 510 | PTR_ERR(client)); |
507 | return PTR_ERR(client); | 511 | return PTR_ERR(client); |
508 | } | 512 | } |
509 | nfsd4_set_callback_client(clp, client); | 513 | clp->cl_cb_client = client; |
510 | return 0; | 514 | return 0; |
511 | 515 | ||
512 | } | 516 | } |
@@ -519,7 +523,7 @@ static void warn_no_callback_path(struct nfs4_client *clp, int reason) | |||
519 | 523 | ||
520 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) | 524 | static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) |
521 | { | 525 | { |
522 | struct nfs4_client *clp = calldata; | 526 | struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null); |
523 | 527 | ||
524 | if (task->tk_status) | 528 | if (task->tk_status) |
525 | warn_no_callback_path(clp, task->tk_status); | 529 | warn_no_callback_path(clp, task->tk_status); |
@@ -528,6 +532,8 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata) | |||
528 | } | 532 | } |
529 | 533 | ||
530 | static const struct rpc_call_ops nfsd4_cb_probe_ops = { | 534 | static const struct rpc_call_ops nfsd4_cb_probe_ops = { |
535 | /* XXX: release method to ensure we set the cb channel down if | ||
536 | * necessary on early failure? */ | ||
531 | .rpc_call_done = nfsd4_cb_probe_done, | 537 | .rpc_call_done = nfsd4_cb_probe_done, |
532 | }; | 538 | }; |
533 | 539 | ||
@@ -543,38 +549,42 @@ int set_callback_cred(void) | |||
543 | return 0; | 549 | return 0; |
544 | } | 550 | } |
545 | 551 | ||
552 | static struct workqueue_struct *callback_wq; | ||
546 | 553 | ||
547 | void do_probe_callback(struct nfs4_client *clp) | 554 | static void do_probe_callback(struct nfs4_client *clp) |
548 | { | 555 | { |
549 | struct rpc_message msg = { | 556 | struct nfsd4_callback *cb = &clp->cl_cb_null; |
550 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL], | ||
551 | .rpc_argp = clp, | ||
552 | .rpc_cred = callback_cred | ||
553 | }; | ||
554 | int status; | ||
555 | 557 | ||
556 | status = rpc_call_async(clp->cl_cb_client, &msg, | 558 | cb->cb_op = NULL; |
557 | RPC_TASK_SOFT | RPC_TASK_SOFTCONN, | 559 | cb->cb_clp = clp; |
558 | &nfsd4_cb_probe_ops, (void *)clp); | 560 | |
559 | if (status) | 561 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_NULL]; |
560 | warn_no_callback_path(clp, status); | 562 | cb->cb_msg.rpc_argp = NULL; |
563 | cb->cb_msg.rpc_resp = NULL; | ||
564 | cb->cb_msg.rpc_cred = callback_cred; | ||
565 | |||
566 | cb->cb_ops = &nfsd4_cb_probe_ops; | ||
567 | |||
568 | queue_work(callback_wq, &cb->cb_work); | ||
561 | } | 569 | } |
562 | 570 | ||
563 | /* | 571 | /* |
564 | * Set up the callback client and put a NFSPROC4_CB_NULL on the wire... | 572 | * Poke the callback thread to process any updates to the callback |
573 | * parameters, and send a null probe. | ||
565 | */ | 574 | */ |
566 | void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) | 575 | void nfsd4_probe_callback(struct nfs4_client *clp) |
567 | { | 576 | { |
568 | int status; | 577 | set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); |
578 | do_probe_callback(clp); | ||
579 | } | ||
569 | 580 | ||
581 | void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *conn) | ||
582 | { | ||
570 | BUG_ON(atomic_read(&clp->cl_cb_set)); | 583 | BUG_ON(atomic_read(&clp->cl_cb_set)); |
571 | 584 | ||
572 | status = setup_callback_client(clp, cb); | 585 | spin_lock(&clp->cl_lock); |
573 | if (status) { | 586 | memcpy(&clp->cl_cb_conn, conn, sizeof(struct nfs4_cb_conn)); |
574 | warn_no_callback_path(clp, status); | 587 | spin_unlock(&clp->cl_lock); |
575 | return; | ||
576 | } | ||
577 | do_probe_callback(clp); | ||
578 | } | 588 | } |
579 | 589 | ||
580 | /* | 590 | /* |
@@ -585,8 +595,7 @@ void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *cb) | |||
585 | static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, | 595 | static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, |
586 | struct rpc_task *task) | 596 | struct rpc_task *task) |
587 | { | 597 | { |
588 | struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; | 598 | u32 *ptr = (u32 *)clp->cl_cb_session->se_sessionid.data; |
589 | u32 *ptr = (u32 *)clp->cl_sessionid.data; | ||
590 | int status = 0; | 599 | int status = 0; |
591 | 600 | ||
592 | dprintk("%s: %u:%u:%u:%u\n", __func__, | 601 | dprintk("%s: %u:%u:%u:%u\n", __func__, |
@@ -598,14 +607,6 @@ static int nfsd41_cb_setup_sequence(struct nfs4_client *clp, | |||
598 | status = -EAGAIN; | 607 | status = -EAGAIN; |
599 | goto out; | 608 | goto out; |
600 | } | 609 | } |
601 | |||
602 | /* | ||
603 | * We'll need the clp during XDR encoding and decoding, | ||
604 | * and the sequence during decoding to verify the reply | ||
605 | */ | ||
606 | args->args_seq.cbs_clp = clp; | ||
607 | task->tk_msg.rpc_resp = &args->args_seq; | ||
608 | |||
609 | out: | 610 | out: |
610 | dprintk("%s status=%d\n", __func__, status); | 611 | dprintk("%s status=%d\n", __func__, status); |
611 | return status; | 612 | return status; |
@@ -617,13 +618,13 @@ out: | |||
617 | */ | 618 | */ |
618 | static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | 619 | static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) |
619 | { | 620 | { |
620 | struct nfs4_delegation *dp = calldata; | 621 | struct nfsd4_callback *cb = calldata; |
622 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
621 | struct nfs4_client *clp = dp->dl_client; | 623 | struct nfs4_client *clp = dp->dl_client; |
622 | struct nfs4_rpc_args *args = task->tk_msg.rpc_argp; | 624 | u32 minorversion = clp->cl_minorversion; |
623 | u32 minorversion = clp->cl_cb_conn.cb_minorversion; | ||
624 | int status = 0; | 625 | int status = 0; |
625 | 626 | ||
626 | args->args_seq.cbs_minorversion = minorversion; | 627 | cb->cb_minorversion = minorversion; |
627 | if (minorversion) { | 628 | if (minorversion) { |
628 | status = nfsd41_cb_setup_sequence(clp, task); | 629 | status = nfsd41_cb_setup_sequence(clp, task); |
629 | if (status) { | 630 | if (status) { |
@@ -640,19 +641,20 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) | |||
640 | 641 | ||
641 | static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | 642 | static void nfsd4_cb_done(struct rpc_task *task, void *calldata) |
642 | { | 643 | { |
643 | struct nfs4_delegation *dp = calldata; | 644 | struct nfsd4_callback *cb = calldata; |
645 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
644 | struct nfs4_client *clp = dp->dl_client; | 646 | struct nfs4_client *clp = dp->dl_client; |
645 | 647 | ||
646 | dprintk("%s: minorversion=%d\n", __func__, | 648 | dprintk("%s: minorversion=%d\n", __func__, |
647 | clp->cl_cb_conn.cb_minorversion); | 649 | clp->cl_minorversion); |
648 | 650 | ||
649 | if (clp->cl_cb_conn.cb_minorversion) { | 651 | if (clp->cl_minorversion) { |
650 | /* No need for lock, access serialized in nfsd4_cb_prepare */ | 652 | /* No need for lock, access serialized in nfsd4_cb_prepare */ |
651 | ++clp->cl_cb_seq_nr; | 653 | ++clp->cl_cb_session->se_cb_seq_nr; |
652 | clear_bit(0, &clp->cl_cb_slot_busy); | 654 | clear_bit(0, &clp->cl_cb_slot_busy); |
653 | rpc_wake_up_next(&clp->cl_cb_waitq); | 655 | rpc_wake_up_next(&clp->cl_cb_waitq); |
654 | dprintk("%s: freed slot, new seqid=%d\n", __func__, | 656 | dprintk("%s: freed slot, new seqid=%d\n", __func__, |
655 | clp->cl_cb_seq_nr); | 657 | clp->cl_cb_session->se_cb_seq_nr); |
656 | 658 | ||
657 | /* We're done looking into the sequence information */ | 659 | /* We're done looking into the sequence information */ |
658 | task->tk_msg.rpc_resp = NULL; | 660 | task->tk_msg.rpc_resp = NULL; |
@@ -662,7 +664,8 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) | |||
662 | 664 | ||
663 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | 665 | static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) |
664 | { | 666 | { |
665 | struct nfs4_delegation *dp = calldata; | 667 | struct nfsd4_callback *cb = calldata; |
668 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
666 | struct nfs4_client *clp = dp->dl_client; | 669 | struct nfs4_client *clp = dp->dl_client; |
667 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; | 670 | struct rpc_clnt *current_rpc_client = clp->cl_cb_client; |
668 | 671 | ||
@@ -707,7 +710,8 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata) | |||
707 | 710 | ||
708 | static void nfsd4_cb_recall_release(void *calldata) | 711 | static void nfsd4_cb_recall_release(void *calldata) |
709 | { | 712 | { |
710 | struct nfs4_delegation *dp = calldata; | 713 | struct nfsd4_callback *cb = calldata; |
714 | struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall); | ||
711 | 715 | ||
712 | nfs4_put_delegation(dp); | 716 | nfs4_put_delegation(dp); |
713 | } | 717 | } |
@@ -718,8 +722,6 @@ static const struct rpc_call_ops nfsd4_cb_recall_ops = { | |||
718 | .rpc_release = nfsd4_cb_recall_release, | 722 | .rpc_release = nfsd4_cb_recall_release, |
719 | }; | 723 | }; |
720 | 724 | ||
721 | static struct workqueue_struct *callback_wq; | ||
722 | |||
723 | int nfsd4_create_callback_queue(void) | 725 | int nfsd4_create_callback_queue(void) |
724 | { | 726 | { |
725 | callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); | 727 | callback_wq = create_singlethread_workqueue("nfsd4_callbacks"); |
@@ -734,57 +736,88 @@ void nfsd4_destroy_callback_queue(void) | |||
734 | } | 736 | } |
735 | 737 | ||
736 | /* must be called under the state lock */ | 738 | /* must be called under the state lock */ |
737 | void nfsd4_set_callback_client(struct nfs4_client *clp, struct rpc_clnt *new) | 739 | void nfsd4_shutdown_callback(struct nfs4_client *clp) |
738 | { | 740 | { |
739 | struct rpc_clnt *old = clp->cl_cb_client; | 741 | set_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags); |
740 | |||
741 | clp->cl_cb_client = new; | ||
742 | /* | 742 | /* |
743 | * After this, any work that saw the old value of cl_cb_client will | 743 | * Note this won't actually result in a null callback; |
744 | * be gone: | 744 | * instead, nfsd4_do_callback_rpc() will detect the killed |
745 | * client, destroy the rpc client, and stop: | ||
745 | */ | 746 | */ |
747 | do_probe_callback(clp); | ||
746 | flush_workqueue(callback_wq); | 748 | flush_workqueue(callback_wq); |
747 | /* So we can safely shut it down: */ | ||
748 | if (old) | ||
749 | rpc_shutdown_client(old); | ||
750 | } | 749 | } |
751 | 750 | ||
752 | /* | 751 | void nfsd4_release_cb(struct nfsd4_callback *cb) |
753 | * called with dp->dl_count inc'ed. | ||
754 | */ | ||
755 | static void _nfsd4_cb_recall(struct nfs4_delegation *dp) | ||
756 | { | 752 | { |
757 | struct nfs4_client *clp = dp->dl_client; | 753 | if (cb->cb_ops->rpc_release) |
758 | struct rpc_clnt *clnt = clp->cl_cb_client; | 754 | cb->cb_ops->rpc_release(cb); |
759 | struct nfs4_rpc_args *args = &dp->dl_recall.cb_args; | 755 | } |
760 | struct rpc_message msg = { | ||
761 | .rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL], | ||
762 | .rpc_cred = callback_cred | ||
763 | }; | ||
764 | 756 | ||
765 | if (clnt == NULL) { | 757 | void nfsd4_process_cb_update(struct nfsd4_callback *cb) |
766 | nfs4_put_delegation(dp); | 758 | { |
767 | return; /* Client is shutting down; give up. */ | 759 | struct nfs4_cb_conn conn; |
760 | struct nfs4_client *clp = cb->cb_clp; | ||
761 | int err; | ||
762 | |||
763 | /* | ||
764 | * This is either an update, or the client dying; in either case, | ||
765 | * kill the old client: | ||
766 | */ | ||
767 | if (clp->cl_cb_client) { | ||
768 | rpc_shutdown_client(clp->cl_cb_client); | ||
769 | clp->cl_cb_client = NULL; | ||
768 | } | 770 | } |
771 | if (test_bit(NFSD4_CLIENT_KILL, &clp->cl_cb_flags)) | ||
772 | return; | ||
773 | spin_lock(&clp->cl_lock); | ||
774 | /* | ||
775 | * Only serialized callback code is allowed to clear these | ||
776 | * flags; main nfsd code can only set them: | ||
777 | */ | ||
778 | BUG_ON(!clp->cl_cb_flags); | ||
779 | clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_cb_flags); | ||
780 | memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn)); | ||
781 | spin_unlock(&clp->cl_lock); | ||
769 | 782 | ||
770 | args->args_op = dp; | 783 | err = setup_callback_client(clp, &conn); |
771 | msg.rpc_argp = args; | 784 | if (err) |
772 | dp->dl_retries = 1; | 785 | warn_no_callback_path(clp, err); |
773 | rpc_call_async(clnt, &msg, RPC_TASK_SOFT, &nfsd4_cb_recall_ops, dp); | ||
774 | } | 786 | } |
775 | 787 | ||
776 | void nfsd4_do_callback_rpc(struct work_struct *w) | 788 | void nfsd4_do_callback_rpc(struct work_struct *w) |
777 | { | 789 | { |
778 | /* XXX: for now, just send off delegation recall. */ | 790 | struct nfsd4_callback *cb = container_of(w, struct nfsd4_callback, cb_work); |
779 | /* In future, generalize to handle any sort of callback. */ | 791 | struct nfs4_client *clp = cb->cb_clp; |
780 | struct nfsd4_callback *c = container_of(w, struct nfsd4_callback, cb_work); | 792 | struct rpc_clnt *clnt; |
781 | struct nfs4_delegation *dp = container_of(c, struct nfs4_delegation, dl_recall); | ||
782 | 793 | ||
783 | _nfsd4_cb_recall(dp); | 794 | if (clp->cl_cb_flags) |
784 | } | 795 | nfsd4_process_cb_update(cb); |
785 | 796 | ||
797 | clnt = clp->cl_cb_client; | ||
798 | if (!clnt) { | ||
799 | /* Callback channel broken, or client killed; give up: */ | ||
800 | nfsd4_release_cb(cb); | ||
801 | return; | ||
802 | } | ||
803 | rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, | ||
804 | cb->cb_ops, cb); | ||
805 | } | ||
786 | 806 | ||
787 | void nfsd4_cb_recall(struct nfs4_delegation *dp) | 807 | void nfsd4_cb_recall(struct nfs4_delegation *dp) |
788 | { | 808 | { |
809 | struct nfsd4_callback *cb = &dp->dl_recall; | ||
810 | |||
811 | dp->dl_retries = 1; | ||
812 | cb->cb_op = dp; | ||
813 | cb->cb_clp = dp->dl_client; | ||
814 | cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_RECALL]; | ||
815 | cb->cb_msg.rpc_argp = cb; | ||
816 | cb->cb_msg.rpc_resp = cb; | ||
817 | cb->cb_msg.rpc_cred = callback_cred; | ||
818 | |||
819 | cb->cb_ops = &nfsd4_cb_recall_ops; | ||
820 | dp->dl_retries = 1; | ||
821 | |||
789 | queue_work(callback_wq, &dp->dl_recall.cb_work); | 822 | queue_work(callback_wq, &dp->dl_recall.cb_work); |
790 | } | 823 | } |
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index c78dbf493424..f0695e815f0e 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c | |||
@@ -482,109 +482,26 @@ nfsd_idmap_shutdown(void) | |||
482 | cache_unregister(&nametoid_cache); | 482 | cache_unregister(&nametoid_cache); |
483 | } | 483 | } |
484 | 484 | ||
485 | /* | ||
486 | * Deferred request handling | ||
487 | */ | ||
488 | |||
489 | struct idmap_defer_req { | ||
490 | struct cache_req req; | ||
491 | struct cache_deferred_req deferred_req; | ||
492 | wait_queue_head_t waitq; | ||
493 | atomic_t count; | ||
494 | }; | ||
495 | |||
496 | static inline void | ||
497 | put_mdr(struct idmap_defer_req *mdr) | ||
498 | { | ||
499 | if (atomic_dec_and_test(&mdr->count)) | ||
500 | kfree(mdr); | ||
501 | } | ||
502 | |||
503 | static inline void | ||
504 | get_mdr(struct idmap_defer_req *mdr) | ||
505 | { | ||
506 | atomic_inc(&mdr->count); | ||
507 | } | ||
508 | |||
509 | static void | ||
510 | idmap_revisit(struct cache_deferred_req *dreq, int toomany) | ||
511 | { | ||
512 | struct idmap_defer_req *mdr = | ||
513 | container_of(dreq, struct idmap_defer_req, deferred_req); | ||
514 | |||
515 | wake_up(&mdr->waitq); | ||
516 | put_mdr(mdr); | ||
517 | } | ||
518 | |||
519 | static struct cache_deferred_req * | ||
520 | idmap_defer(struct cache_req *req) | ||
521 | { | ||
522 | struct idmap_defer_req *mdr = | ||
523 | container_of(req, struct idmap_defer_req, req); | ||
524 | |||
525 | mdr->deferred_req.revisit = idmap_revisit; | ||
526 | get_mdr(mdr); | ||
527 | return (&mdr->deferred_req); | ||
528 | } | ||
529 | |||
530 | static inline int | ||
531 | do_idmap_lookup(struct ent *(*lookup_fn)(struct ent *), struct ent *key, | ||
532 | struct cache_detail *detail, struct ent **item, | ||
533 | struct idmap_defer_req *mdr) | ||
534 | { | ||
535 | *item = lookup_fn(key); | ||
536 | if (!*item) | ||
537 | return -ENOMEM; | ||
538 | return cache_check(detail, &(*item)->h, &mdr->req); | ||
539 | } | ||
540 | |||
541 | static inline int | ||
542 | do_idmap_lookup_nowait(struct ent *(*lookup_fn)(struct ent *), | ||
543 | struct ent *key, struct cache_detail *detail, | ||
544 | struct ent **item) | ||
545 | { | ||
546 | int ret = -ENOMEM; | ||
547 | |||
548 | *item = lookup_fn(key); | ||
549 | if (!*item) | ||
550 | goto out_err; | ||
551 | ret = -ETIMEDOUT; | ||
552 | if (!test_bit(CACHE_VALID, &(*item)->h.flags) | ||
553 | || (*item)->h.expiry_time < get_seconds() | ||
554 | || detail->flush_time > (*item)->h.last_refresh) | ||
555 | goto out_put; | ||
556 | ret = -ENOENT; | ||
557 | if (test_bit(CACHE_NEGATIVE, &(*item)->h.flags)) | ||
558 | goto out_put; | ||
559 | return 0; | ||
560 | out_put: | ||
561 | cache_put(&(*item)->h, detail); | ||
562 | out_err: | ||
563 | *item = NULL; | ||
564 | return ret; | ||
565 | } | ||
566 | |||
567 | static int | 485 | static int |
568 | idmap_lookup(struct svc_rqst *rqstp, | 486 | idmap_lookup(struct svc_rqst *rqstp, |
569 | struct ent *(*lookup_fn)(struct ent *), struct ent *key, | 487 | struct ent *(*lookup_fn)(struct ent *), struct ent *key, |
570 | struct cache_detail *detail, struct ent **item) | 488 | struct cache_detail *detail, struct ent **item) |
571 | { | 489 | { |
572 | struct idmap_defer_req *mdr; | ||
573 | int ret; | 490 | int ret; |
574 | 491 | ||
575 | mdr = kzalloc(sizeof(*mdr), GFP_KERNEL); | 492 | *item = lookup_fn(key); |
576 | if (!mdr) | 493 | if (!*item) |
577 | return -ENOMEM; | 494 | return -ENOMEM; |
578 | atomic_set(&mdr->count, 1); | 495 | retry: |
579 | init_waitqueue_head(&mdr->waitq); | 496 | ret = cache_check(detail, &(*item)->h, &rqstp->rq_chandle); |
580 | mdr->req.defer = idmap_defer; | 497 | |
581 | ret = do_idmap_lookup(lookup_fn, key, detail, item, mdr); | 498 | if (ret == -ETIMEDOUT) { |
582 | if (ret == -EAGAIN) { | 499 | struct ent *prev_item = *item; |
583 | wait_event_interruptible_timeout(mdr->waitq, | 500 | *item = lookup_fn(key); |
584 | test_bit(CACHE_VALID, &(*item)->h.flags), 1 * HZ); | 501 | if (*item != prev_item) |
585 | ret = do_idmap_lookup_nowait(lookup_fn, key, detail, item); | 502 | goto retry; |
503 | cache_put(&(*item)->h, detail); | ||
586 | } | 504 | } |
587 | put_mdr(mdr); | ||
588 | return ret; | 505 | return ret; |
589 | } | 506 | } |
590 | 507 | ||
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 59ec449b0c7f..0cdfd022bb7b 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -1031,8 +1031,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
1031 | resp->cstate.session = NULL; | 1031 | resp->cstate.session = NULL; |
1032 | fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); | 1032 | fh_init(&resp->cstate.current_fh, NFS4_FHSIZE); |
1033 | fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); | 1033 | fh_init(&resp->cstate.save_fh, NFS4_FHSIZE); |
1034 | /* Use the deferral mechanism only for NFSv4.0 compounds */ | 1034 | /* |
1035 | rqstp->rq_usedeferral = (args->minorversion == 0); | 1035 | * Don't use the deferral mechanism for NFSv4; compounds make it |
1036 | * too hard to avoid non-idempotency problems. | ||
1037 | */ | ||
1038 | rqstp->rq_usedeferral = 0; | ||
1036 | 1039 | ||
1037 | /* | 1040 | /* |
1038 | * According to RFC3010, this takes precedence over all other errors. | 1041 | * According to RFC3010, this takes precedence over all other errors. |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index cf0d2ffb3c84..56347e0ac88d 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
@@ -33,7 +33,7 @@ | |||
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/file.h> | 35 | #include <linux/file.h> |
36 | #include <linux/smp_lock.h> | 36 | #include <linux/fs.h> |
37 | #include <linux/slab.h> | 37 | #include <linux/slab.h> |
38 | #include <linux/namei.h> | 38 | #include <linux/namei.h> |
39 | #include <linux/swap.h> | 39 | #include <linux/swap.h> |
@@ -207,7 +207,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
207 | { | 207 | { |
208 | struct nfs4_delegation *dp; | 208 | struct nfs4_delegation *dp; |
209 | struct nfs4_file *fp = stp->st_file; | 209 | struct nfs4_file *fp = stp->st_file; |
210 | struct nfs4_cb_conn *cb = &stp->st_stateowner->so_client->cl_cb_conn; | ||
211 | 210 | ||
212 | dprintk("NFSD alloc_init_deleg\n"); | 211 | dprintk("NFSD alloc_init_deleg\n"); |
213 | /* | 212 | /* |
@@ -234,7 +233,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f | |||
234 | nfs4_file_get_access(fp, O_RDONLY); | 233 | nfs4_file_get_access(fp, O_RDONLY); |
235 | dp->dl_flock = NULL; | 234 | dp->dl_flock = NULL; |
236 | dp->dl_type = type; | 235 | dp->dl_type = type; |
237 | dp->dl_ident = cb->cb_ident; | ||
238 | dp->dl_stateid.si_boot = boot_time; | 236 | dp->dl_stateid.si_boot = boot_time; |
239 | dp->dl_stateid.si_stateownerid = current_delegid++; | 237 | dp->dl_stateid.si_stateownerid = current_delegid++; |
240 | dp->dl_stateid.si_fileid = 0; | 238 | dp->dl_stateid.si_fileid = 0; |
@@ -535,171 +533,258 @@ gen_sessionid(struct nfsd4_session *ses) | |||
535 | */ | 533 | */ |
536 | #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) | 534 | #define NFSD_MIN_HDR_SEQ_SZ (24 + 12 + 44) |
537 | 535 | ||
536 | static void | ||
537 | free_session_slots(struct nfsd4_session *ses) | ||
538 | { | ||
539 | int i; | ||
540 | |||
541 | for (i = 0; i < ses->se_fchannel.maxreqs; i++) | ||
542 | kfree(ses->se_slots[i]); | ||
543 | } | ||
544 | |||
538 | /* | 545 | /* |
539 | * Give the client the number of ca_maxresponsesize_cached slots it | 546 | * We don't actually need to cache the rpc and session headers, so we |
540 | * requests, of size bounded by NFSD_SLOT_CACHE_SIZE, | 547 | * can allocate a little less for each slot: |
541 | * NFSD_MAX_MEM_PER_SESSION, and nfsd_drc_max_mem. Do not allow more | 548 | */ |
542 | * than NFSD_MAX_SLOTS_PER_SESSION. | 549 | static inline int slot_bytes(struct nfsd4_channel_attrs *ca) |
543 | * | 550 | { |
544 | * If we run out of reserved DRC memory we should (up to a point) | 551 | return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; |
552 | } | ||
553 | |||
554 | static int nfsd4_sanitize_slot_size(u32 size) | ||
555 | { | ||
556 | size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */ | ||
557 | size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE); | ||
558 | |||
559 | return size; | ||
560 | } | ||
561 | |||
562 | /* | ||
563 | * XXX: If we run out of reserved DRC memory we could (up to a point) | ||
545 | * re-negotiate active sessions and reduce their slot usage to make | 564 | * re-negotiate active sessions and reduce their slot usage to make |
546 | * rooom for new connections. For now we just fail the create session. | 565 | * rooom for new connections. For now we just fail the create session. |
547 | */ | 566 | */ |
548 | static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan) | 567 | static int nfsd4_get_drc_mem(int slotsize, u32 num) |
549 | { | 568 | { |
550 | int mem, size = fchan->maxresp_cached; | 569 | int avail; |
551 | 570 | ||
552 | if (fchan->maxreqs < 1) | 571 | num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION); |
553 | return nfserr_inval; | ||
554 | 572 | ||
555 | if (size < NFSD_MIN_HDR_SEQ_SZ) | 573 | spin_lock(&nfsd_drc_lock); |
556 | size = NFSD_MIN_HDR_SEQ_SZ; | 574 | avail = min_t(int, NFSD_MAX_MEM_PER_SESSION, |
557 | size -= NFSD_MIN_HDR_SEQ_SZ; | 575 | nfsd_drc_max_mem - nfsd_drc_mem_used); |
558 | if (size > NFSD_SLOT_CACHE_SIZE) | 576 | num = min_t(int, num, avail / slotsize); |
559 | size = NFSD_SLOT_CACHE_SIZE; | 577 | nfsd_drc_mem_used += num * slotsize; |
560 | 578 | spin_unlock(&nfsd_drc_lock); | |
561 | /* bound the maxreqs by NFSD_MAX_MEM_PER_SESSION */ | ||
562 | mem = fchan->maxreqs * size; | ||
563 | if (mem > NFSD_MAX_MEM_PER_SESSION) { | ||
564 | fchan->maxreqs = NFSD_MAX_MEM_PER_SESSION / size; | ||
565 | if (fchan->maxreqs > NFSD_MAX_SLOTS_PER_SESSION) | ||
566 | fchan->maxreqs = NFSD_MAX_SLOTS_PER_SESSION; | ||
567 | mem = fchan->maxreqs * size; | ||
568 | } | ||
569 | 579 | ||
580 | return num; | ||
581 | } | ||
582 | |||
583 | static void nfsd4_put_drc_mem(int slotsize, int num) | ||
584 | { | ||
570 | spin_lock(&nfsd_drc_lock); | 585 | spin_lock(&nfsd_drc_lock); |
571 | /* bound the total session drc memory ussage */ | 586 | nfsd_drc_mem_used -= slotsize * num; |
572 | if (mem + nfsd_drc_mem_used > nfsd_drc_max_mem) { | ||
573 | fchan->maxreqs = (nfsd_drc_max_mem - nfsd_drc_mem_used) / size; | ||
574 | mem = fchan->maxreqs * size; | ||
575 | } | ||
576 | nfsd_drc_mem_used += mem; | ||
577 | spin_unlock(&nfsd_drc_lock); | 587 | spin_unlock(&nfsd_drc_lock); |
588 | } | ||
578 | 589 | ||
579 | if (fchan->maxreqs == 0) | 590 | static struct nfsd4_session *alloc_session(int slotsize, int numslots) |
580 | return nfserr_jukebox; | 591 | { |
592 | struct nfsd4_session *new; | ||
593 | int mem, i; | ||
581 | 594 | ||
582 | fchan->maxresp_cached = size + NFSD_MIN_HDR_SEQ_SZ; | 595 | BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot *) |
583 | return 0; | 596 | + sizeof(struct nfsd4_session) > PAGE_SIZE); |
597 | mem = numslots * sizeof(struct nfsd4_slot *); | ||
598 | |||
599 | new = kzalloc(sizeof(*new) + mem, GFP_KERNEL); | ||
600 | if (!new) | ||
601 | return NULL; | ||
602 | /* allocate each struct nfsd4_slot and data cache in one piece */ | ||
603 | for (i = 0; i < numslots; i++) { | ||
604 | mem = sizeof(struct nfsd4_slot) + slotsize; | ||
605 | new->se_slots[i] = kzalloc(mem, GFP_KERNEL); | ||
606 | if (!new->se_slots[i]) | ||
607 | goto out_free; | ||
608 | } | ||
609 | return new; | ||
610 | out_free: | ||
611 | while (i--) | ||
612 | kfree(new->se_slots[i]); | ||
613 | kfree(new); | ||
614 | return NULL; | ||
584 | } | 615 | } |
585 | 616 | ||
586 | /* | 617 | static void init_forechannel_attrs(struct nfsd4_channel_attrs *new, struct nfsd4_channel_attrs *req, int numslots, int slotsize) |
587 | * fchan holds the client values on input, and the server values on output | ||
588 | * sv_max_mesg is the maximum payload plus one page for overhead. | ||
589 | */ | ||
590 | static int init_forechannel_attrs(struct svc_rqst *rqstp, | ||
591 | struct nfsd4_channel_attrs *session_fchan, | ||
592 | struct nfsd4_channel_attrs *fchan) | ||
593 | { | 618 | { |
594 | int status = 0; | 619 | u32 maxrpc = nfsd_serv->sv_max_mesg; |
595 | __u32 maxcount = nfsd_serv->sv_max_mesg; | ||
596 | 620 | ||
597 | /* headerpadsz set to zero in encode routine */ | 621 | new->maxreqs = numslots; |
622 | new->maxresp_cached = slotsize + NFSD_MIN_HDR_SEQ_SZ; | ||
623 | new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc); | ||
624 | new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc); | ||
625 | new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND); | ||
626 | } | ||
598 | 627 | ||
599 | /* Use the client's max request and max response size if possible */ | 628 | static void free_conn(struct nfsd4_conn *c) |
600 | if (fchan->maxreq_sz > maxcount) | 629 | { |
601 | fchan->maxreq_sz = maxcount; | 630 | svc_xprt_put(c->cn_xprt); |
602 | session_fchan->maxreq_sz = fchan->maxreq_sz; | 631 | kfree(c); |
632 | } | ||
603 | 633 | ||
604 | if (fchan->maxresp_sz > maxcount) | 634 | static void nfsd4_conn_lost(struct svc_xpt_user *u) |
605 | fchan->maxresp_sz = maxcount; | 635 | { |
606 | session_fchan->maxresp_sz = fchan->maxresp_sz; | 636 | struct nfsd4_conn *c = container_of(u, struct nfsd4_conn, cn_xpt_user); |
637 | struct nfs4_client *clp = c->cn_session->se_client; | ||
607 | 638 | ||
608 | /* Use the client's maxops if possible */ | 639 | spin_lock(&clp->cl_lock); |
609 | if (fchan->maxops > NFSD_MAX_OPS_PER_COMPOUND) | 640 | if (!list_empty(&c->cn_persession)) { |
610 | fchan->maxops = NFSD_MAX_OPS_PER_COMPOUND; | 641 | list_del(&c->cn_persession); |
611 | session_fchan->maxops = fchan->maxops; | 642 | free_conn(c); |
643 | } | ||
644 | spin_unlock(&clp->cl_lock); | ||
645 | } | ||
612 | 646 | ||
613 | /* FIXME: Error means no more DRC pages so the server should | 647 | static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags) |
614 | * recover pages from existing sessions. For now fail session | 648 | { |
615 | * creation. | 649 | struct nfsd4_conn *conn; |
616 | */ | ||
617 | status = set_forechannel_drc_size(fchan); | ||
618 | 650 | ||
619 | session_fchan->maxresp_cached = fchan->maxresp_cached; | 651 | conn = kmalloc(sizeof(struct nfsd4_conn), GFP_KERNEL); |
620 | session_fchan->maxreqs = fchan->maxreqs; | 652 | if (!conn) |
653 | return NULL; | ||
654 | svc_xprt_get(rqstp->rq_xprt); | ||
655 | conn->cn_xprt = rqstp->rq_xprt; | ||
656 | conn->cn_flags = flags; | ||
657 | INIT_LIST_HEAD(&conn->cn_xpt_user.list); | ||
658 | return conn; | ||
659 | } | ||
621 | 660 | ||
622 | dprintk("%s status %d\n", __func__, status); | 661 | static void __nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) |
623 | return status; | 662 | { |
663 | conn->cn_session = ses; | ||
664 | list_add(&conn->cn_persession, &ses->se_conns); | ||
624 | } | 665 | } |
625 | 666 | ||
626 | static void | 667 | static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses) |
627 | free_session_slots(struct nfsd4_session *ses) | ||
628 | { | 668 | { |
629 | int i; | 669 | struct nfs4_client *clp = ses->se_client; |
630 | 670 | ||
631 | for (i = 0; i < ses->se_fchannel.maxreqs; i++) | 671 | spin_lock(&clp->cl_lock); |
632 | kfree(ses->se_slots[i]); | 672 | __nfsd4_hash_conn(conn, ses); |
673 | spin_unlock(&clp->cl_lock); | ||
633 | } | 674 | } |
634 | 675 | ||
635 | /* | 676 | static void nfsd4_register_conn(struct nfsd4_conn *conn) |
636 | * We don't actually need to cache the rpc and session headers, so we | ||
637 | * can allocate a little less for each slot: | ||
638 | */ | ||
639 | static inline int slot_bytes(struct nfsd4_channel_attrs *ca) | ||
640 | { | 677 | { |
641 | return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ; | 678 | conn->cn_xpt_user.callback = nfsd4_conn_lost; |
679 | register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user); | ||
642 | } | 680 | } |
643 | 681 | ||
644 | static int | 682 | static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses) |
645 | alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, | ||
646 | struct nfsd4_create_session *cses) | ||
647 | { | 683 | { |
648 | struct nfsd4_session *new, tmp; | 684 | struct nfsd4_conn *conn; |
649 | struct nfsd4_slot *sp; | 685 | u32 flags = NFS4_CDFC4_FORE; |
650 | int idx, slotsize, cachesize, i; | ||
651 | int status; | ||
652 | 686 | ||
653 | memset(&tmp, 0, sizeof(tmp)); | 687 | if (ses->se_flags & SESSION4_BACK_CHAN) |
688 | flags |= NFS4_CDFC4_BACK; | ||
689 | conn = alloc_conn(rqstp, flags); | ||
690 | if (!conn) | ||
691 | return nfserr_jukebox; | ||
692 | nfsd4_hash_conn(conn, ses); | ||
693 | nfsd4_register_conn(conn); | ||
694 | return nfs_ok; | ||
695 | } | ||
654 | 696 | ||
655 | /* FIXME: For now, we just accept the client back channel attributes. */ | 697 | static void nfsd4_del_conns(struct nfsd4_session *s) |
656 | tmp.se_bchannel = cses->back_channel; | 698 | { |
657 | status = init_forechannel_attrs(rqstp, &tmp.se_fchannel, | 699 | struct nfs4_client *clp = s->se_client; |
658 | &cses->fore_channel); | 700 | struct nfsd4_conn *c; |
659 | if (status) | ||
660 | goto out; | ||
661 | 701 | ||
662 | BUILD_BUG_ON(NFSD_MAX_SLOTS_PER_SESSION * sizeof(struct nfsd4_slot) | 702 | spin_lock(&clp->cl_lock); |
663 | + sizeof(struct nfsd4_session) > PAGE_SIZE); | 703 | while (!list_empty(&s->se_conns)) { |
704 | c = list_first_entry(&s->se_conns, struct nfsd4_conn, cn_persession); | ||
705 | list_del_init(&c->cn_persession); | ||
706 | spin_unlock(&clp->cl_lock); | ||
664 | 707 | ||
665 | status = nfserr_jukebox; | 708 | unregister_xpt_user(c->cn_xprt, &c->cn_xpt_user); |
666 | /* allocate struct nfsd4_session and slot table pointers in one piece */ | 709 | free_conn(c); |
667 | slotsize = tmp.se_fchannel.maxreqs * sizeof(struct nfsd4_slot *); | ||
668 | new = kzalloc(sizeof(*new) + slotsize, GFP_KERNEL); | ||
669 | if (!new) | ||
670 | goto out; | ||
671 | 710 | ||
672 | memcpy(new, &tmp, sizeof(*new)); | 711 | spin_lock(&clp->cl_lock); |
712 | } | ||
713 | spin_unlock(&clp->cl_lock); | ||
714 | } | ||
673 | 715 | ||
674 | /* allocate each struct nfsd4_slot and data cache in one piece */ | 716 | void free_session(struct kref *kref) |
675 | cachesize = slot_bytes(&new->se_fchannel); | 717 | { |
676 | for (i = 0; i < new->se_fchannel.maxreqs; i++) { | 718 | struct nfsd4_session *ses; |
677 | sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL); | 719 | int mem; |
678 | if (!sp) | 720 | |
679 | goto out_free; | 721 | ses = container_of(kref, struct nfsd4_session, se_ref); |
680 | new->se_slots[i] = sp; | 722 | nfsd4_del_conns(ses); |
723 | spin_lock(&nfsd_drc_lock); | ||
724 | mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); | ||
725 | nfsd_drc_mem_used -= mem; | ||
726 | spin_unlock(&nfsd_drc_lock); | ||
727 | free_session_slots(ses); | ||
728 | kfree(ses); | ||
729 | } | ||
730 | |||
731 | static struct nfsd4_session *alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp, struct nfsd4_create_session *cses) | ||
732 | { | ||
733 | struct nfsd4_session *new; | ||
734 | struct nfsd4_channel_attrs *fchan = &cses->fore_channel; | ||
735 | int numslots, slotsize; | ||
736 | int status; | ||
737 | int idx; | ||
738 | |||
739 | /* | ||
740 | * Note decreasing slot size below client's request may | ||
741 | * make it difficult for client to function correctly, whereas | ||
742 | * decreasing the number of slots will (just?) affect | ||
743 | * performance. When short on memory we therefore prefer to | ||
744 | * decrease number of slots instead of their size. | ||
745 | */ | ||
746 | slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached); | ||
747 | numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs); | ||
748 | |||
749 | new = alloc_session(slotsize, numslots); | ||
750 | if (!new) { | ||
751 | nfsd4_put_drc_mem(slotsize, fchan->maxreqs); | ||
752 | return NULL; | ||
681 | } | 753 | } |
754 | init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize); | ||
682 | 755 | ||
683 | new->se_client = clp; | 756 | new->se_client = clp; |
684 | gen_sessionid(new); | 757 | gen_sessionid(new); |
685 | idx = hash_sessionid(&new->se_sessionid); | ||
686 | memcpy(clp->cl_sessionid.data, new->se_sessionid.data, | ||
687 | NFS4_MAX_SESSIONID_LEN); | ||
688 | 758 | ||
759 | INIT_LIST_HEAD(&new->se_conns); | ||
760 | |||
761 | new->se_cb_seq_nr = 1; | ||
689 | new->se_flags = cses->flags; | 762 | new->se_flags = cses->flags; |
763 | new->se_cb_prog = cses->callback_prog; | ||
690 | kref_init(&new->se_ref); | 764 | kref_init(&new->se_ref); |
765 | idx = hash_sessionid(&new->se_sessionid); | ||
691 | spin_lock(&client_lock); | 766 | spin_lock(&client_lock); |
692 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); | 767 | list_add(&new->se_hash, &sessionid_hashtbl[idx]); |
693 | list_add(&new->se_perclnt, &clp->cl_sessions); | 768 | list_add(&new->se_perclnt, &clp->cl_sessions); |
694 | spin_unlock(&client_lock); | 769 | spin_unlock(&client_lock); |
695 | 770 | ||
696 | status = nfs_ok; | 771 | status = nfsd4_new_conn(rqstp, new); |
697 | out: | 772 | /* whoops: benny points out, status is ignored! (err, or bogus) */ |
698 | return status; | 773 | if (status) { |
699 | out_free: | 774 | free_session(&new->se_ref); |
700 | free_session_slots(new); | 775 | return NULL; |
701 | kfree(new); | 776 | } |
702 | goto out; | 777 | if (!clp->cl_cb_session && (cses->flags & SESSION4_BACK_CHAN)) { |
778 | struct sockaddr *sa = svc_addr(rqstp); | ||
779 | |||
780 | clp->cl_cb_session = new; | ||
781 | clp->cl_cb_conn.cb_xprt = rqstp->rq_xprt; | ||
782 | svc_xprt_get(rqstp->rq_xprt); | ||
783 | rpc_copy_addr((struct sockaddr *)&clp->cl_cb_conn.cb_addr, sa); | ||
784 | clp->cl_cb_conn.cb_addrlen = svc_addr_len(sa); | ||
785 | nfsd4_probe_callback(clp); | ||
786 | } | ||
787 | return new; | ||
703 | } | 788 | } |
704 | 789 | ||
705 | /* caller must hold client_lock */ | 790 | /* caller must hold client_lock */ |
@@ -731,21 +816,6 @@ unhash_session(struct nfsd4_session *ses) | |||
731 | list_del(&ses->se_perclnt); | 816 | list_del(&ses->se_perclnt); |
732 | } | 817 | } |
733 | 818 | ||
734 | void | ||
735 | free_session(struct kref *kref) | ||
736 | { | ||
737 | struct nfsd4_session *ses; | ||
738 | int mem; | ||
739 | |||
740 | ses = container_of(kref, struct nfsd4_session, se_ref); | ||
741 | spin_lock(&nfsd_drc_lock); | ||
742 | mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel); | ||
743 | nfsd_drc_mem_used -= mem; | ||
744 | spin_unlock(&nfsd_drc_lock); | ||
745 | free_session_slots(ses); | ||
746 | kfree(ses); | ||
747 | } | ||
748 | |||
749 | /* must be called under the client_lock */ | 819 | /* must be called under the client_lock */ |
750 | static inline void | 820 | static inline void |
751 | renew_client_locked(struct nfs4_client *clp) | 821 | renew_client_locked(struct nfs4_client *clp) |
@@ -812,6 +882,13 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) | |||
812 | static inline void | 882 | static inline void |
813 | free_client(struct nfs4_client *clp) | 883 | free_client(struct nfs4_client *clp) |
814 | { | 884 | { |
885 | while (!list_empty(&clp->cl_sessions)) { | ||
886 | struct nfsd4_session *ses; | ||
887 | ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, | ||
888 | se_perclnt); | ||
889 | list_del(&ses->se_perclnt); | ||
890 | nfsd4_put_session(ses); | ||
891 | } | ||
815 | if (clp->cl_cred.cr_group_info) | 892 | if (clp->cl_cred.cr_group_info) |
816 | put_group_info(clp->cl_cred.cr_group_info); | 893 | put_group_info(clp->cl_cred.cr_group_info); |
817 | kfree(clp->cl_principal); | 894 | kfree(clp->cl_principal); |
@@ -838,15 +915,12 @@ release_session_client(struct nfsd4_session *session) | |||
838 | static inline void | 915 | static inline void |
839 | unhash_client_locked(struct nfs4_client *clp) | 916 | unhash_client_locked(struct nfs4_client *clp) |
840 | { | 917 | { |
918 | struct nfsd4_session *ses; | ||
919 | |||
841 | mark_client_expired(clp); | 920 | mark_client_expired(clp); |
842 | list_del(&clp->cl_lru); | 921 | list_del(&clp->cl_lru); |
843 | while (!list_empty(&clp->cl_sessions)) { | 922 | list_for_each_entry(ses, &clp->cl_sessions, se_perclnt) |
844 | struct nfsd4_session *ses; | 923 | list_del_init(&ses->se_hash); |
845 | ses = list_entry(clp->cl_sessions.next, struct nfsd4_session, | ||
846 | se_perclnt); | ||
847 | unhash_session(ses); | ||
848 | nfsd4_put_session(ses); | ||
849 | } | ||
850 | } | 924 | } |
851 | 925 | ||
852 | static void | 926 | static void |
@@ -875,7 +949,7 @@ expire_client(struct nfs4_client *clp) | |||
875 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); | 949 | sop = list_entry(clp->cl_openowners.next, struct nfs4_stateowner, so_perclient); |
876 | release_openowner(sop); | 950 | release_openowner(sop); |
877 | } | 951 | } |
878 | nfsd4_set_callback_client(clp, NULL); | 952 | nfsd4_shutdown_callback(clp); |
879 | if (clp->cl_cb_conn.cb_xprt) | 953 | if (clp->cl_cb_conn.cb_xprt) |
880 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); | 954 | svc_xprt_put(clp->cl_cb_conn.cb_xprt); |
881 | list_del(&clp->cl_idhash); | 955 | list_del(&clp->cl_idhash); |
@@ -960,6 +1034,8 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
960 | if (clp == NULL) | 1034 | if (clp == NULL) |
961 | return NULL; | 1035 | return NULL; |
962 | 1036 | ||
1037 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
1038 | |||
963 | princ = svc_gss_principal(rqstp); | 1039 | princ = svc_gss_principal(rqstp); |
964 | if (princ) { | 1040 | if (princ) { |
965 | clp->cl_principal = kstrdup(princ, GFP_KERNEL); | 1041 | clp->cl_principal = kstrdup(princ, GFP_KERNEL); |
@@ -976,8 +1052,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
976 | INIT_LIST_HEAD(&clp->cl_strhash); | 1052 | INIT_LIST_HEAD(&clp->cl_strhash); |
977 | INIT_LIST_HEAD(&clp->cl_openowners); | 1053 | INIT_LIST_HEAD(&clp->cl_openowners); |
978 | INIT_LIST_HEAD(&clp->cl_delegations); | 1054 | INIT_LIST_HEAD(&clp->cl_delegations); |
979 | INIT_LIST_HEAD(&clp->cl_sessions); | ||
980 | INIT_LIST_HEAD(&clp->cl_lru); | 1055 | INIT_LIST_HEAD(&clp->cl_lru); |
1056 | spin_lock_init(&clp->cl_lock); | ||
1057 | INIT_WORK(&clp->cl_cb_null.cb_work, nfsd4_do_callback_rpc); | ||
981 | clp->cl_time = get_seconds(); | 1058 | clp->cl_time = get_seconds(); |
982 | clear_bit(0, &clp->cl_cb_slot_busy); | 1059 | clear_bit(0, &clp->cl_cb_slot_busy); |
983 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); | 1060 | rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); |
@@ -986,7 +1063,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir, | |||
986 | clp->cl_flavor = rqstp->rq_flavor; | 1063 | clp->cl_flavor = rqstp->rq_flavor; |
987 | copy_cred(&clp->cl_cred, &rqstp->rq_cred); | 1064 | copy_cred(&clp->cl_cred, &rqstp->rq_cred); |
988 | gen_confirm(clp); | 1065 | gen_confirm(clp); |
989 | 1066 | clp->cl_cb_session = NULL; | |
990 | return clp; | 1067 | return clp; |
991 | } | 1068 | } |
992 | 1069 | ||
@@ -1098,7 +1175,7 @@ find_unconfirmed_client_by_str(const char *dname, unsigned int hashval, | |||
1098 | static void | 1175 | static void |
1099 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) | 1176 | gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) |
1100 | { | 1177 | { |
1101 | struct nfs4_cb_conn *cb = &clp->cl_cb_conn; | 1178 | struct nfs4_cb_conn *conn = &clp->cl_cb_conn; |
1102 | unsigned short expected_family; | 1179 | unsigned short expected_family; |
1103 | 1180 | ||
1104 | /* Currently, we only support tcp and tcp6 for the callback channel */ | 1181 | /* Currently, we only support tcp and tcp6 for the callback channel */ |
@@ -1111,24 +1188,23 @@ gen_callback(struct nfs4_client *clp, struct nfsd4_setclientid *se, u32 scopeid) | |||
1111 | else | 1188 | else |
1112 | goto out_err; | 1189 | goto out_err; |
1113 | 1190 | ||
1114 | cb->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, | 1191 | conn->cb_addrlen = rpc_uaddr2sockaddr(se->se_callback_addr_val, |
1115 | se->se_callback_addr_len, | 1192 | se->se_callback_addr_len, |
1116 | (struct sockaddr *) &cb->cb_addr, | 1193 | (struct sockaddr *)&conn->cb_addr, |
1117 | sizeof(cb->cb_addr)); | 1194 | sizeof(conn->cb_addr)); |
1118 | 1195 | ||
1119 | if (!cb->cb_addrlen || cb->cb_addr.ss_family != expected_family) | 1196 | if (!conn->cb_addrlen || conn->cb_addr.ss_family != expected_family) |
1120 | goto out_err; | 1197 | goto out_err; |
1121 | 1198 | ||
1122 | if (cb->cb_addr.ss_family == AF_INET6) | 1199 | if (conn->cb_addr.ss_family == AF_INET6) |
1123 | ((struct sockaddr_in6 *) &cb->cb_addr)->sin6_scope_id = scopeid; | 1200 | ((struct sockaddr_in6 *)&conn->cb_addr)->sin6_scope_id = scopeid; |
1124 | 1201 | ||
1125 | cb->cb_minorversion = 0; | 1202 | conn->cb_prog = se->se_callback_prog; |
1126 | cb->cb_prog = se->se_callback_prog; | 1203 | conn->cb_ident = se->se_callback_ident; |
1127 | cb->cb_ident = se->se_callback_ident; | ||
1128 | return; | 1204 | return; |
1129 | out_err: | 1205 | out_err: |
1130 | cb->cb_addr.ss_family = AF_UNSPEC; | 1206 | conn->cb_addr.ss_family = AF_UNSPEC; |
1131 | cb->cb_addrlen = 0; | 1207 | conn->cb_addrlen = 0; |
1132 | dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " | 1208 | dprintk(KERN_INFO "NFSD: this client (clientid %08x/%08x) " |
1133 | "will not receive delegations\n", | 1209 | "will not receive delegations\n", |
1134 | clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); | 1210 | clp->cl_clientid.cl_boot, clp->cl_clientid.cl_id); |
@@ -1415,7 +1491,9 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1415 | { | 1491 | { |
1416 | struct sockaddr *sa = svc_addr(rqstp); | 1492 | struct sockaddr *sa = svc_addr(rqstp); |
1417 | struct nfs4_client *conf, *unconf; | 1493 | struct nfs4_client *conf, *unconf; |
1494 | struct nfsd4_session *new; | ||
1418 | struct nfsd4_clid_slot *cs_slot = NULL; | 1495 | struct nfsd4_clid_slot *cs_slot = NULL; |
1496 | bool confirm_me = false; | ||
1419 | int status = 0; | 1497 | int status = 0; |
1420 | 1498 | ||
1421 | nfs4_lock_state(); | 1499 | nfs4_lock_state(); |
@@ -1438,7 +1516,6 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1438 | cs_slot->sl_seqid, cr_ses->seqid); | 1516 | cs_slot->sl_seqid, cr_ses->seqid); |
1439 | goto out; | 1517 | goto out; |
1440 | } | 1518 | } |
1441 | cs_slot->sl_seqid++; | ||
1442 | } else if (unconf) { | 1519 | } else if (unconf) { |
1443 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || | 1520 | if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) || |
1444 | !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { | 1521 | !rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) { |
@@ -1451,25 +1528,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1451 | if (status) { | 1528 | if (status) { |
1452 | /* an unconfirmed replay returns misordered */ | 1529 | /* an unconfirmed replay returns misordered */ |
1453 | status = nfserr_seq_misordered; | 1530 | status = nfserr_seq_misordered; |
1454 | goto out_cache; | 1531 | goto out; |
1455 | } | 1532 | } |
1456 | 1533 | ||
1457 | cs_slot->sl_seqid++; /* from 0 to 1 */ | 1534 | confirm_me = true; |
1458 | move_to_confirmed(unconf); | ||
1459 | |||
1460 | if (cr_ses->flags & SESSION4_BACK_CHAN) { | ||
1461 | unconf->cl_cb_conn.cb_xprt = rqstp->rq_xprt; | ||
1462 | svc_xprt_get(rqstp->rq_xprt); | ||
1463 | rpc_copy_addr( | ||
1464 | (struct sockaddr *)&unconf->cl_cb_conn.cb_addr, | ||
1465 | sa); | ||
1466 | unconf->cl_cb_conn.cb_addrlen = svc_addr_len(sa); | ||
1467 | unconf->cl_cb_conn.cb_minorversion = | ||
1468 | cstate->minorversion; | ||
1469 | unconf->cl_cb_conn.cb_prog = cr_ses->callback_prog; | ||
1470 | unconf->cl_cb_seq_nr = 1; | ||
1471 | nfsd4_probe_callback(unconf, &unconf->cl_cb_conn); | ||
1472 | } | ||
1473 | conf = unconf; | 1535 | conf = unconf; |
1474 | } else { | 1536 | } else { |
1475 | status = nfserr_stale_clientid; | 1537 | status = nfserr_stale_clientid; |
@@ -1477,22 +1539,30 @@ nfsd4_create_session(struct svc_rqst *rqstp, | |||
1477 | } | 1539 | } |
1478 | 1540 | ||
1479 | /* | 1541 | /* |
1542 | * XXX: we should probably set this at creation time, and check | ||
1543 | * for consistent minorversion use throughout: | ||
1544 | */ | ||
1545 | conf->cl_minorversion = 1; | ||
1546 | /* | ||
1480 | * We do not support RDMA or persistent sessions | 1547 | * We do not support RDMA or persistent sessions |
1481 | */ | 1548 | */ |
1482 | cr_ses->flags &= ~SESSION4_PERSIST; | 1549 | cr_ses->flags &= ~SESSION4_PERSIST; |
1483 | cr_ses->flags &= ~SESSION4_RDMA; | 1550 | cr_ses->flags &= ~SESSION4_RDMA; |
1484 | 1551 | ||
1485 | status = alloc_init_session(rqstp, conf, cr_ses); | 1552 | status = nfserr_jukebox; |
1486 | if (status) | 1553 | new = alloc_init_session(rqstp, conf, cr_ses); |
1554 | if (!new) | ||
1487 | goto out; | 1555 | goto out; |
1488 | 1556 | status = nfs_ok; | |
1489 | memcpy(cr_ses->sessionid.data, conf->cl_sessionid.data, | 1557 | memcpy(cr_ses->sessionid.data, new->se_sessionid.data, |
1490 | NFS4_MAX_SESSIONID_LEN); | 1558 | NFS4_MAX_SESSIONID_LEN); |
1559 | cs_slot->sl_seqid++; | ||
1491 | cr_ses->seqid = cs_slot->sl_seqid; | 1560 | cr_ses->seqid = cs_slot->sl_seqid; |
1492 | 1561 | ||
1493 | out_cache: | ||
1494 | /* cache solo and embedded create sessions under the state lock */ | 1562 | /* cache solo and embedded create sessions under the state lock */ |
1495 | nfsd4_cache_create_session(cr_ses, cs_slot, status); | 1563 | nfsd4_cache_create_session(cr_ses, cs_slot, status); |
1564 | if (confirm_me) | ||
1565 | move_to_confirmed(conf); | ||
1496 | out: | 1566 | out: |
1497 | nfs4_unlock_state(); | 1567 | nfs4_unlock_state(); |
1498 | dprintk("%s returns %d\n", __func__, ntohl(status)); | 1568 | dprintk("%s returns %d\n", __func__, ntohl(status)); |
@@ -1546,8 +1616,11 @@ nfsd4_destroy_session(struct svc_rqst *r, | |||
1546 | 1616 | ||
1547 | nfs4_lock_state(); | 1617 | nfs4_lock_state(); |
1548 | /* wait for callbacks */ | 1618 | /* wait for callbacks */ |
1549 | nfsd4_set_callback_client(ses->se_client, NULL); | 1619 | nfsd4_shutdown_callback(ses->se_client); |
1550 | nfs4_unlock_state(); | 1620 | nfs4_unlock_state(); |
1621 | |||
1622 | nfsd4_del_conns(ses); | ||
1623 | |||
1551 | nfsd4_put_session(ses); | 1624 | nfsd4_put_session(ses); |
1552 | status = nfs_ok; | 1625 | status = nfs_ok; |
1553 | out: | 1626 | out: |
@@ -1555,6 +1628,36 @@ out: | |||
1555 | return status; | 1628 | return status; |
1556 | } | 1629 | } |
1557 | 1630 | ||
1631 | static struct nfsd4_conn *__nfsd4_find_conn(struct svc_xprt *xpt, struct nfsd4_session *s) | ||
1632 | { | ||
1633 | struct nfsd4_conn *c; | ||
1634 | |||
1635 | list_for_each_entry(c, &s->se_conns, cn_persession) { | ||
1636 | if (c->cn_xprt == xpt) { | ||
1637 | return c; | ||
1638 | } | ||
1639 | } | ||
1640 | return NULL; | ||
1641 | } | ||
1642 | |||
1643 | static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_session *ses) | ||
1644 | { | ||
1645 | struct nfs4_client *clp = ses->se_client; | ||
1646 | struct nfsd4_conn *c; | ||
1647 | |||
1648 | spin_lock(&clp->cl_lock); | ||
1649 | c = __nfsd4_find_conn(new->cn_xprt, ses); | ||
1650 | if (c) { | ||
1651 | spin_unlock(&clp->cl_lock); | ||
1652 | free_conn(new); | ||
1653 | return; | ||
1654 | } | ||
1655 | __nfsd4_hash_conn(new, ses); | ||
1656 | spin_unlock(&clp->cl_lock); | ||
1657 | nfsd4_register_conn(new); | ||
1658 | return; | ||
1659 | } | ||
1660 | |||
1558 | __be32 | 1661 | __be32 |
1559 | nfsd4_sequence(struct svc_rqst *rqstp, | 1662 | nfsd4_sequence(struct svc_rqst *rqstp, |
1560 | struct nfsd4_compound_state *cstate, | 1663 | struct nfsd4_compound_state *cstate, |
@@ -1563,11 +1666,20 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1563 | struct nfsd4_compoundres *resp = rqstp->rq_resp; | 1666 | struct nfsd4_compoundres *resp = rqstp->rq_resp; |
1564 | struct nfsd4_session *session; | 1667 | struct nfsd4_session *session; |
1565 | struct nfsd4_slot *slot; | 1668 | struct nfsd4_slot *slot; |
1669 | struct nfsd4_conn *conn; | ||
1566 | int status; | 1670 | int status; |
1567 | 1671 | ||
1568 | if (resp->opcnt != 1) | 1672 | if (resp->opcnt != 1) |
1569 | return nfserr_sequence_pos; | 1673 | return nfserr_sequence_pos; |
1570 | 1674 | ||
1675 | /* | ||
1676 | * Will be either used or freed by nfsd4_sequence_check_conn | ||
1677 | * below. | ||
1678 | */ | ||
1679 | conn = alloc_conn(rqstp, NFS4_CDFC4_FORE); | ||
1680 | if (!conn) | ||
1681 | return nfserr_jukebox; | ||
1682 | |||
1571 | spin_lock(&client_lock); | 1683 | spin_lock(&client_lock); |
1572 | status = nfserr_badsession; | 1684 | status = nfserr_badsession; |
1573 | session = find_in_sessionid_hashtbl(&seq->sessionid); | 1685 | session = find_in_sessionid_hashtbl(&seq->sessionid); |
@@ -1599,6 +1711,9 @@ nfsd4_sequence(struct svc_rqst *rqstp, | |||
1599 | if (status) | 1711 | if (status) |
1600 | goto out; | 1712 | goto out; |
1601 | 1713 | ||
1714 | nfsd4_sequence_check_conn(conn, session); | ||
1715 | conn = NULL; | ||
1716 | |||
1602 | /* Success! bump slot seqid */ | 1717 | /* Success! bump slot seqid */ |
1603 | slot->sl_inuse = true; | 1718 | slot->sl_inuse = true; |
1604 | slot->sl_seqid = seq->seqid; | 1719 | slot->sl_seqid = seq->seqid; |
@@ -1613,6 +1728,7 @@ out: | |||
1613 | nfsd4_get_session(cstate->session); | 1728 | nfsd4_get_session(cstate->session); |
1614 | atomic_inc(&session->se_client->cl_refcount); | 1729 | atomic_inc(&session->se_client->cl_refcount); |
1615 | } | 1730 | } |
1731 | kfree(conn); | ||
1616 | spin_unlock(&client_lock); | 1732 | spin_unlock(&client_lock); |
1617 | dprintk("%s: return %d\n", __func__, ntohl(status)); | 1733 | dprintk("%s: return %d\n", __func__, ntohl(status)); |
1618 | return status; | 1734 | return status; |
@@ -1747,6 +1863,11 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
1747 | goto out; | 1863 | goto out; |
1748 | gen_clid(new); | 1864 | gen_clid(new); |
1749 | } | 1865 | } |
1866 | /* | ||
1867 | * XXX: we should probably set this at creation time, and check | ||
1868 | * for consistent minorversion use throughout: | ||
1869 | */ | ||
1870 | new->cl_minorversion = 0; | ||
1750 | gen_callback(new, setclid, rpc_get_scope_id(sa)); | 1871 | gen_callback(new, setclid, rpc_get_scope_id(sa)); |
1751 | add_to_unconfirmed(new, strhashval); | 1872 | add_to_unconfirmed(new, strhashval); |
1752 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; | 1873 | setclid->se_clientid.cl_boot = new->cl_clientid.cl_boot; |
@@ -1807,7 +1928,8 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1807 | status = nfserr_clid_inuse; | 1928 | status = nfserr_clid_inuse; |
1808 | else { | 1929 | else { |
1809 | atomic_set(&conf->cl_cb_set, 0); | 1930 | atomic_set(&conf->cl_cb_set, 0); |
1810 | nfsd4_probe_callback(conf, &unconf->cl_cb_conn); | 1931 | nfsd4_change_callback(conf, &unconf->cl_cb_conn); |
1932 | nfsd4_probe_callback(conf); | ||
1811 | expire_client(unconf); | 1933 | expire_client(unconf); |
1812 | status = nfs_ok; | 1934 | status = nfs_ok; |
1813 | 1935 | ||
@@ -1841,7 +1963,7 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp, | |||
1841 | } | 1963 | } |
1842 | move_to_confirmed(unconf); | 1964 | move_to_confirmed(unconf); |
1843 | conf = unconf; | 1965 | conf = unconf; |
1844 | nfsd4_probe_callback(conf, &conf->cl_cb_conn); | 1966 | nfsd4_probe_callback(conf); |
1845 | status = nfs_ok; | 1967 | status = nfs_ok; |
1846 | } | 1968 | } |
1847 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) | 1969 | } else if ((!conf || (conf && !same_verf(&conf->cl_confirm, &confirm))) |
@@ -2492,7 +2614,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2492 | struct nfs4_delegation *dp; | 2614 | struct nfs4_delegation *dp; |
2493 | struct nfs4_stateowner *sop = stp->st_stateowner; | 2615 | struct nfs4_stateowner *sop = stp->st_stateowner; |
2494 | int cb_up = atomic_read(&sop->so_client->cl_cb_set); | 2616 | int cb_up = atomic_read(&sop->so_client->cl_cb_set); |
2495 | struct file_lock fl, *flp = &fl; | 2617 | struct file_lock *fl; |
2496 | int status, flag = 0; | 2618 | int status, flag = 0; |
2497 | 2619 | ||
2498 | flag = NFS4_OPEN_DELEGATE_NONE; | 2620 | flag = NFS4_OPEN_DELEGATE_NONE; |
@@ -2526,20 +2648,24 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
2526 | flag = NFS4_OPEN_DELEGATE_NONE; | 2648 | flag = NFS4_OPEN_DELEGATE_NONE; |
2527 | goto out; | 2649 | goto out; |
2528 | } | 2650 | } |
2529 | locks_init_lock(&fl); | 2651 | status = -ENOMEM; |
2530 | fl.fl_lmops = &nfsd_lease_mng_ops; | 2652 | fl = locks_alloc_lock(); |
2531 | fl.fl_flags = FL_LEASE; | 2653 | if (!fl) |
2532 | fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; | 2654 | goto out; |
2533 | fl.fl_end = OFFSET_MAX; | 2655 | locks_init_lock(fl); |
2534 | fl.fl_owner = (fl_owner_t)dp; | 2656 | fl->fl_lmops = &nfsd_lease_mng_ops; |
2535 | fl.fl_file = find_readable_file(stp->st_file); | 2657 | fl->fl_flags = FL_LEASE; |
2536 | BUG_ON(!fl.fl_file); | 2658 | fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK; |
2537 | fl.fl_pid = current->tgid; | 2659 | fl->fl_end = OFFSET_MAX; |
2660 | fl->fl_owner = (fl_owner_t)dp; | ||
2661 | fl->fl_file = find_readable_file(stp->st_file); | ||
2662 | BUG_ON(!fl->fl_file); | ||
2663 | fl->fl_pid = current->tgid; | ||
2538 | 2664 | ||
2539 | /* vfs_setlease checks to see if delegation should be handed out. | 2665 | /* vfs_setlease checks to see if delegation should be handed out. |
2540 | * the lock_manager callbacks fl_mylease and fl_change are used | 2666 | * the lock_manager callbacks fl_mylease and fl_change are used |
2541 | */ | 2667 | */ |
2542 | if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) { | 2668 | if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) { |
2543 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); | 2669 | dprintk("NFSD: setlease failed [%d], no delegation\n", status); |
2544 | unhash_delegation(dp); | 2670 | unhash_delegation(dp); |
2545 | flag = NFS4_OPEN_DELEGATE_NONE; | 2671 | flag = NFS4_OPEN_DELEGATE_NONE; |
@@ -2944,7 +3070,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2944 | if (STALE_STATEID(stateid)) | 3070 | if (STALE_STATEID(stateid)) |
2945 | goto out; | 3071 | goto out; |
2946 | 3072 | ||
2947 | status = nfserr_bad_stateid; | 3073 | /* |
3074 | * We assume that any stateid that has the current boot time, | ||
3075 | * but that we can't find, is expired: | ||
3076 | */ | ||
3077 | status = nfserr_expired; | ||
2948 | if (is_delegation_stateid(stateid)) { | 3078 | if (is_delegation_stateid(stateid)) { |
2949 | dp = find_delegation_stateid(ino, stateid); | 3079 | dp = find_delegation_stateid(ino, stateid); |
2950 | if (!dp) | 3080 | if (!dp) |
@@ -2964,6 +3094,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate, | |||
2964 | stp = find_stateid(stateid, flags); | 3094 | stp = find_stateid(stateid, flags); |
2965 | if (!stp) | 3095 | if (!stp) |
2966 | goto out; | 3096 | goto out; |
3097 | status = nfserr_bad_stateid; | ||
2967 | if (nfs4_check_fh(current_fh, stp)) | 3098 | if (nfs4_check_fh(current_fh, stp)) |
2968 | goto out; | 3099 | goto out; |
2969 | if (!stp->st_stateowner->so_confirmed) | 3100 | if (!stp->st_stateowner->so_confirmed) |
@@ -3038,8 +3169,9 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid, | |||
3038 | * a replayed close: | 3169 | * a replayed close: |
3039 | */ | 3170 | */ |
3040 | sop = search_close_lru(stateid->si_stateownerid, flags); | 3171 | sop = search_close_lru(stateid->si_stateownerid, flags); |
3172 | /* It's not stale; let's assume it's expired: */ | ||
3041 | if (sop == NULL) | 3173 | if (sop == NULL) |
3042 | return nfserr_bad_stateid; | 3174 | return nfserr_expired; |
3043 | *sopp = sop; | 3175 | *sopp = sop; |
3044 | goto check_replay; | 3176 | goto check_replay; |
3045 | } | 3177 | } |
@@ -3304,6 +3436,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
3304 | status = nfserr_bad_stateid; | 3436 | status = nfserr_bad_stateid; |
3305 | if (!is_delegation_stateid(stateid)) | 3437 | if (!is_delegation_stateid(stateid)) |
3306 | goto out; | 3438 | goto out; |
3439 | status = nfserr_expired; | ||
3307 | dp = find_delegation_stateid(inode, stateid); | 3440 | dp = find_delegation_stateid(inode, stateid); |
3308 | if (!dp) | 3441 | if (!dp) |
3309 | goto out; | 3442 | goto out; |
@@ -3895,7 +4028,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) | |||
3895 | struct inode *inode = filp->fi_inode; | 4028 | struct inode *inode = filp->fi_inode; |
3896 | int status = 0; | 4029 | int status = 0; |
3897 | 4030 | ||
3898 | lock_kernel(); | 4031 | lock_flocks(); |
3899 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { | 4032 | for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { |
3900 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { | 4033 | if ((*flpp)->fl_owner == (fl_owner_t)lowner) { |
3901 | status = 1; | 4034 | status = 1; |
@@ -3903,7 +4036,7 @@ check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner) | |||
3903 | } | 4036 | } |
3904 | } | 4037 | } |
3905 | out: | 4038 | out: |
3906 | unlock_kernel(); | 4039 | unlock_flocks(); |
3907 | return status; | 4040 | return status; |
3908 | } | 4041 | } |
3909 | 4042 | ||
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 1a468bbd330f..f35a94a04026 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
@@ -1805,19 +1805,23 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1805 | goto out_nfserr; | 1805 | goto out_nfserr; |
1806 | } | 1806 | } |
1807 | } | 1807 | } |
1808 | if ((buflen -= 16) < 0) | ||
1809 | goto out_resource; | ||
1810 | 1808 | ||
1811 | if (unlikely(bmval2)) { | 1809 | if (bmval2) { |
1810 | if ((buflen -= 16) < 0) | ||
1811 | goto out_resource; | ||
1812 | WRITE32(3); | 1812 | WRITE32(3); |
1813 | WRITE32(bmval0); | 1813 | WRITE32(bmval0); |
1814 | WRITE32(bmval1); | 1814 | WRITE32(bmval1); |
1815 | WRITE32(bmval2); | 1815 | WRITE32(bmval2); |
1816 | } else if (likely(bmval1)) { | 1816 | } else if (bmval1) { |
1817 | if ((buflen -= 12) < 0) | ||
1818 | goto out_resource; | ||
1817 | WRITE32(2); | 1819 | WRITE32(2); |
1818 | WRITE32(bmval0); | 1820 | WRITE32(bmval0); |
1819 | WRITE32(bmval1); | 1821 | WRITE32(bmval1); |
1820 | } else { | 1822 | } else { |
1823 | if ((buflen -= 8) < 0) | ||
1824 | goto out_resource; | ||
1821 | WRITE32(1); | 1825 | WRITE32(1); |
1822 | WRITE32(bmval0); | 1826 | WRITE32(bmval0); |
1823 | } | 1827 | } |
@@ -1828,15 +1832,17 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, | |||
1828 | u32 word1 = nfsd_suppattrs1(minorversion); | 1832 | u32 word1 = nfsd_suppattrs1(minorversion); |
1829 | u32 word2 = nfsd_suppattrs2(minorversion); | 1833 | u32 word2 = nfsd_suppattrs2(minorversion); |
1830 | 1834 | ||
1831 | if ((buflen -= 12) < 0) | ||
1832 | goto out_resource; | ||
1833 | if (!aclsupport) | 1835 | if (!aclsupport) |
1834 | word0 &= ~FATTR4_WORD0_ACL; | 1836 | word0 &= ~FATTR4_WORD0_ACL; |
1835 | if (!word2) { | 1837 | if (!word2) { |
1838 | if ((buflen -= 12) < 0) | ||
1839 | goto out_resource; | ||
1836 | WRITE32(2); | 1840 | WRITE32(2); |
1837 | WRITE32(word0); | 1841 | WRITE32(word0); |
1838 | WRITE32(word1); | 1842 | WRITE32(word1); |
1839 | } else { | 1843 | } else { |
1844 | if ((buflen -= 16) < 0) | ||
1845 | goto out_resource; | ||
1840 | WRITE32(3); | 1846 | WRITE32(3); |
1841 | WRITE32(word0); | 1847 | WRITE32(word0); |
1842 | WRITE32(word1); | 1848 | WRITE32(word1); |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index b53b1d042f1f..d6dc3f61f8ba 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
@@ -22,6 +22,7 @@ | |||
22 | */ | 22 | */ |
23 | enum { | 23 | enum { |
24 | NFSD_Root = 1, | 24 | NFSD_Root = 1, |
25 | #ifdef CONFIG_NFSD_DEPRECATED | ||
25 | NFSD_Svc, | 26 | NFSD_Svc, |
26 | NFSD_Add, | 27 | NFSD_Add, |
27 | NFSD_Del, | 28 | NFSD_Del, |
@@ -29,6 +30,7 @@ enum { | |||
29 | NFSD_Unexport, | 30 | NFSD_Unexport, |
30 | NFSD_Getfd, | 31 | NFSD_Getfd, |
31 | NFSD_Getfs, | 32 | NFSD_Getfs, |
33 | #endif | ||
32 | NFSD_List, | 34 | NFSD_List, |
33 | NFSD_Export_features, | 35 | NFSD_Export_features, |
34 | NFSD_Fh, | 36 | NFSD_Fh, |
@@ -54,6 +56,7 @@ enum { | |||
54 | /* | 56 | /* |
55 | * write() for these nodes. | 57 | * write() for these nodes. |
56 | */ | 58 | */ |
59 | #ifdef CONFIG_NFSD_DEPRECATED | ||
57 | static ssize_t write_svc(struct file *file, char *buf, size_t size); | 60 | static ssize_t write_svc(struct file *file, char *buf, size_t size); |
58 | static ssize_t write_add(struct file *file, char *buf, size_t size); | 61 | static ssize_t write_add(struct file *file, char *buf, size_t size); |
59 | static ssize_t write_del(struct file *file, char *buf, size_t size); | 62 | static ssize_t write_del(struct file *file, char *buf, size_t size); |
@@ -61,6 +64,7 @@ static ssize_t write_export(struct file *file, char *buf, size_t size); | |||
61 | static ssize_t write_unexport(struct file *file, char *buf, size_t size); | 64 | static ssize_t write_unexport(struct file *file, char *buf, size_t size); |
62 | static ssize_t write_getfd(struct file *file, char *buf, size_t size); | 65 | static ssize_t write_getfd(struct file *file, char *buf, size_t size); |
63 | static ssize_t write_getfs(struct file *file, char *buf, size_t size); | 66 | static ssize_t write_getfs(struct file *file, char *buf, size_t size); |
67 | #endif | ||
64 | static ssize_t write_filehandle(struct file *file, char *buf, size_t size); | 68 | static ssize_t write_filehandle(struct file *file, char *buf, size_t size); |
65 | static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); | 69 | static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); |
66 | static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); | 70 | static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); |
@@ -76,6 +80,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); | |||
76 | #endif | 80 | #endif |
77 | 81 | ||
78 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { | 82 | static ssize_t (*write_op[])(struct file *, char *, size_t) = { |
83 | #ifdef CONFIG_NFSD_DEPRECATED | ||
79 | [NFSD_Svc] = write_svc, | 84 | [NFSD_Svc] = write_svc, |
80 | [NFSD_Add] = write_add, | 85 | [NFSD_Add] = write_add, |
81 | [NFSD_Del] = write_del, | 86 | [NFSD_Del] = write_del, |
@@ -83,6 +88,7 @@ static ssize_t (*write_op[])(struct file *, char *, size_t) = { | |||
83 | [NFSD_Unexport] = write_unexport, | 88 | [NFSD_Unexport] = write_unexport, |
84 | [NFSD_Getfd] = write_getfd, | 89 | [NFSD_Getfd] = write_getfd, |
85 | [NFSD_Getfs] = write_getfs, | 90 | [NFSD_Getfs] = write_getfs, |
91 | #endif | ||
86 | [NFSD_Fh] = write_filehandle, | 92 | [NFSD_Fh] = write_filehandle, |
87 | [NFSD_FO_UnlockIP] = write_unlock_ip, | 93 | [NFSD_FO_UnlockIP] = write_unlock_ip, |
88 | [NFSD_FO_UnlockFS] = write_unlock_fs, | 94 | [NFSD_FO_UnlockFS] = write_unlock_fs, |
@@ -121,6 +127,14 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu | |||
121 | 127 | ||
122 | static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) | 128 | static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) |
123 | { | 129 | { |
130 | static int warned; | ||
131 | if (file->f_dentry->d_name.name[0] == '.' && !warned) { | ||
132 | printk(KERN_INFO | ||
133 | "Warning: \"%s\" uses deprecated NFSD interface: %s." | ||
134 | " This will be removed in 2.6.40\n", | ||
135 | current->comm, file->f_dentry->d_name.name); | ||
136 | warned = 1; | ||
137 | } | ||
124 | if (! file->private_data) { | 138 | if (! file->private_data) { |
125 | /* An attempt to read a transaction file without writing | 139 | /* An attempt to read a transaction file without writing |
126 | * causes a 0-byte write so that the file can return | 140 | * causes a 0-byte write so that the file can return |
@@ -137,6 +151,7 @@ static const struct file_operations transaction_ops = { | |||
137 | .write = nfsctl_transaction_write, | 151 | .write = nfsctl_transaction_write, |
138 | .read = nfsctl_transaction_read, | 152 | .read = nfsctl_transaction_read, |
139 | .release = simple_transaction_release, | 153 | .release = simple_transaction_release, |
154 | .llseek = default_llseek, | ||
140 | }; | 155 | }; |
141 | 156 | ||
142 | static int exports_open(struct inode *inode, struct file *file) | 157 | static int exports_open(struct inode *inode, struct file *file) |
@@ -186,6 +201,7 @@ static const struct file_operations pool_stats_operations = { | |||
186 | * payload - write methods | 201 | * payload - write methods |
187 | */ | 202 | */ |
188 | 203 | ||
204 | #ifdef CONFIG_NFSD_DEPRECATED | ||
189 | /** | 205 | /** |
190 | * write_svc - Start kernel's NFSD server | 206 | * write_svc - Start kernel's NFSD server |
191 | * | 207 | * |
@@ -401,7 +417,7 @@ static ssize_t write_getfs(struct file *file, char *buf, size_t size) | |||
401 | 417 | ||
402 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); | 418 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); |
403 | 419 | ||
404 | clp = auth_unix_lookup(&in6); | 420 | clp = auth_unix_lookup(&init_net, &in6); |
405 | if (!clp) | 421 | if (!clp) |
406 | err = -EPERM; | 422 | err = -EPERM; |
407 | else { | 423 | else { |
@@ -464,7 +480,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) | |||
464 | 480 | ||
465 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); | 481 | ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); |
466 | 482 | ||
467 | clp = auth_unix_lookup(&in6); | 483 | clp = auth_unix_lookup(&init_net, &in6); |
468 | if (!clp) | 484 | if (!clp) |
469 | err = -EPERM; | 485 | err = -EPERM; |
470 | else { | 486 | else { |
@@ -481,6 +497,7 @@ static ssize_t write_getfd(struct file *file, char *buf, size_t size) | |||
481 | out: | 497 | out: |
482 | return err; | 498 | return err; |
483 | } | 499 | } |
500 | #endif /* CONFIG_NFSD_DEPRECATED */ | ||
484 | 501 | ||
485 | /** | 502 | /** |
486 | * write_unlock_ip - Release all locks used by a client | 503 | * write_unlock_ip - Release all locks used by a client |
@@ -999,12 +1016,12 @@ static ssize_t __write_ports_addxprt(char *buf) | |||
999 | if (err != 0) | 1016 | if (err != 0) |
1000 | return err; | 1017 | return err; |
1001 | 1018 | ||
1002 | err = svc_create_xprt(nfsd_serv, transport, | 1019 | err = svc_create_xprt(nfsd_serv, transport, &init_net, |
1003 | PF_INET, port, SVC_SOCK_ANONYMOUS); | 1020 | PF_INET, port, SVC_SOCK_ANONYMOUS); |
1004 | if (err < 0) | 1021 | if (err < 0) |
1005 | goto out_err; | 1022 | goto out_err; |
1006 | 1023 | ||
1007 | err = svc_create_xprt(nfsd_serv, transport, | 1024 | err = svc_create_xprt(nfsd_serv, transport, &init_net, |
1008 | PF_INET6, port, SVC_SOCK_ANONYMOUS); | 1025 | PF_INET6, port, SVC_SOCK_ANONYMOUS); |
1009 | if (err < 0 && err != -EAFNOSUPPORT) | 1026 | if (err < 0 && err != -EAFNOSUPPORT) |
1010 | goto out_close; | 1027 | goto out_close; |
@@ -1355,6 +1372,7 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) | |||
1355 | static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | 1372 | static int nfsd_fill_super(struct super_block * sb, void * data, int silent) |
1356 | { | 1373 | { |
1357 | static struct tree_descr nfsd_files[] = { | 1374 | static struct tree_descr nfsd_files[] = { |
1375 | #ifdef CONFIG_NFSD_DEPRECATED | ||
1358 | [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, | 1376 | [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, |
1359 | [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, | 1377 | [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, |
1360 | [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, | 1378 | [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, |
@@ -1362,6 +1380,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) | |||
1362 | [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, | 1380 | [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, |
1363 | [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, | 1381 | [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, |
1364 | [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, | 1382 | [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, |
1383 | #endif | ||
1365 | [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, | 1384 | [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, |
1366 | [NFSD_Export_features] = {"export_features", | 1385 | [NFSD_Export_features] = {"export_features", |
1367 | &export_features_operations, S_IRUGO}, | 1386 | &export_features_operations, S_IRUGO}, |
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index b76ac3a82e39..6b641cf2c19a 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h | |||
@@ -249,7 +249,7 @@ extern time_t nfsd4_grace; | |||
249 | #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ | 249 | #define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */ |
250 | #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ | 250 | #define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */ |
251 | 251 | ||
252 | #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ | 252 | #define NFSD_LAUNDROMAT_MINTIMEOUT 1 /* seconds */ |
253 | 253 | ||
254 | /* | 254 | /* |
255 | * The following attributes are currently not supported by the NFSv4 server: | 255 | * The following attributes are currently not supported by the NFSv4 server: |
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h index cdfb8c6a4206..c16f8d8331b5 100644 --- a/fs/nfsd/nfsfh.h +++ b/fs/nfsd/nfsfh.h | |||
@@ -196,8 +196,6 @@ fh_lock(struct svc_fh *fhp) | |||
196 | static inline void | 196 | static inline void |
197 | fh_unlock(struct svc_fh *fhp) | 197 | fh_unlock(struct svc_fh *fhp) |
198 | { | 198 | { |
199 | BUG_ON(!fhp->fh_dentry); | ||
200 | |||
201 | if (fhp->fh_locked) { | 199 | if (fhp->fh_locked) { |
202 | fill_post_wcc(fhp); | 200 | fill_post_wcc(fhp); |
203 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); | 201 | mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex); |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e2c43464f237..2bae1d86f5f2 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/lockd/bind.h> | 16 | #include <linux/lockd/bind.h> |
17 | #include <linux/nfsacl.h> | 17 | #include <linux/nfsacl.h> |
18 | #include <linux/seq_file.h> | 18 | #include <linux/seq_file.h> |
19 | #include <net/net_namespace.h> | ||
19 | #include "nfsd.h" | 20 | #include "nfsd.h" |
20 | #include "cache.h" | 21 | #include "cache.h" |
21 | #include "vfs.h" | 22 | #include "vfs.h" |
@@ -186,12 +187,12 @@ static int nfsd_init_socks(int port) | |||
186 | if (!list_empty(&nfsd_serv->sv_permsocks)) | 187 | if (!list_empty(&nfsd_serv->sv_permsocks)) |
187 | return 0; | 188 | return 0; |
188 | 189 | ||
189 | error = svc_create_xprt(nfsd_serv, "udp", PF_INET, port, | 190 | error = svc_create_xprt(nfsd_serv, "udp", &init_net, PF_INET, port, |
190 | SVC_SOCK_DEFAULTS); | 191 | SVC_SOCK_DEFAULTS); |
191 | if (error < 0) | 192 | if (error < 0) |
192 | return error; | 193 | return error; |
193 | 194 | ||
194 | error = svc_create_xprt(nfsd_serv, "tcp", PF_INET, port, | 195 | error = svc_create_xprt(nfsd_serv, "tcp", &init_net, PF_INET, port, |
195 | SVC_SOCK_DEFAULTS); | 196 | SVC_SOCK_DEFAULTS); |
196 | if (error < 0) | 197 | if (error < 0) |
197 | return error; | 198 | return error; |
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 322518c88e4b..39adc27b0685 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h | |||
@@ -35,6 +35,7 @@ | |||
35 | #ifndef _NFSD4_STATE_H | 35 | #ifndef _NFSD4_STATE_H |
36 | #define _NFSD4_STATE_H | 36 | #define _NFSD4_STATE_H |
37 | 37 | ||
38 | #include <linux/sunrpc/svc_xprt.h> | ||
38 | #include <linux/nfsd/nfsfh.h> | 39 | #include <linux/nfsd/nfsfh.h> |
39 | #include "nfsfh.h" | 40 | #include "nfsfh.h" |
40 | 41 | ||
@@ -64,19 +65,12 @@ typedef struct { | |||
64 | (s)->si_fileid, \ | 65 | (s)->si_fileid, \ |
65 | (s)->si_generation | 66 | (s)->si_generation |
66 | 67 | ||
67 | struct nfsd4_cb_sequence { | ||
68 | /* args/res */ | ||
69 | u32 cbs_minorversion; | ||
70 | struct nfs4_client *cbs_clp; | ||
71 | }; | ||
72 | |||
73 | struct nfs4_rpc_args { | ||
74 | void *args_op; | ||
75 | struct nfsd4_cb_sequence args_seq; | ||
76 | }; | ||
77 | |||
78 | struct nfsd4_callback { | 68 | struct nfsd4_callback { |
79 | struct nfs4_rpc_args cb_args; | 69 | void *cb_op; |
70 | struct nfs4_client *cb_clp; | ||
71 | u32 cb_minorversion; | ||
72 | struct rpc_message cb_msg; | ||
73 | const struct rpc_call_ops *cb_ops; | ||
80 | struct work_struct cb_work; | 74 | struct work_struct cb_work; |
81 | }; | 75 | }; |
82 | 76 | ||
@@ -91,7 +85,6 @@ struct nfs4_delegation { | |||
91 | u32 dl_type; | 85 | u32 dl_type; |
92 | time_t dl_time; | 86 | time_t dl_time; |
93 | /* For recall: */ | 87 | /* For recall: */ |
94 | u32 dl_ident; | ||
95 | stateid_t dl_stateid; | 88 | stateid_t dl_stateid; |
96 | struct knfsd_fh dl_fh; | 89 | struct knfsd_fh dl_fh; |
97 | int dl_retries; | 90 | int dl_retries; |
@@ -103,8 +96,8 @@ struct nfs4_cb_conn { | |||
103 | /* SETCLIENTID info */ | 96 | /* SETCLIENTID info */ |
104 | struct sockaddr_storage cb_addr; | 97 | struct sockaddr_storage cb_addr; |
105 | size_t cb_addrlen; | 98 | size_t cb_addrlen; |
106 | u32 cb_prog; | 99 | u32 cb_prog; /* used only in 4.0 case; |
107 | u32 cb_minorversion; | 100 | per-session otherwise */ |
108 | u32 cb_ident; /* minorversion 0 only */ | 101 | u32 cb_ident; /* minorversion 0 only */ |
109 | struct svc_xprt *cb_xprt; /* minorversion 1 only */ | 102 | struct svc_xprt *cb_xprt; /* minorversion 1 only */ |
110 | }; | 103 | }; |
@@ -160,6 +153,15 @@ struct nfsd4_clid_slot { | |||
160 | struct nfsd4_create_session sl_cr_ses; | 153 | struct nfsd4_create_session sl_cr_ses; |
161 | }; | 154 | }; |
162 | 155 | ||
156 | struct nfsd4_conn { | ||
157 | struct list_head cn_persession; | ||
158 | struct svc_xprt *cn_xprt; | ||
159 | struct svc_xpt_user cn_xpt_user; | ||
160 | struct nfsd4_session *cn_session; | ||
161 | /* CDFC4_FORE, CDFC4_BACK: */ | ||
162 | unsigned char cn_flags; | ||
163 | }; | ||
164 | |||
163 | struct nfsd4_session { | 165 | struct nfsd4_session { |
164 | struct kref se_ref; | 166 | struct kref se_ref; |
165 | struct list_head se_hash; /* hash by sessionid */ | 167 | struct list_head se_hash; /* hash by sessionid */ |
@@ -169,6 +171,9 @@ struct nfsd4_session { | |||
169 | struct nfs4_sessionid se_sessionid; | 171 | struct nfs4_sessionid se_sessionid; |
170 | struct nfsd4_channel_attrs se_fchannel; | 172 | struct nfsd4_channel_attrs se_fchannel; |
171 | struct nfsd4_channel_attrs se_bchannel; | 173 | struct nfsd4_channel_attrs se_bchannel; |
174 | struct list_head se_conns; | ||
175 | u32 se_cb_prog; | ||
176 | u32 se_cb_seq_nr; | ||
172 | struct nfsd4_slot *se_slots[]; /* forward channel slots */ | 177 | struct nfsd4_slot *se_slots[]; /* forward channel slots */ |
173 | }; | 178 | }; |
174 | 179 | ||
@@ -221,24 +226,32 @@ struct nfs4_client { | |||
221 | clientid_t cl_clientid; /* generated by server */ | 226 | clientid_t cl_clientid; /* generated by server */ |
222 | nfs4_verifier cl_confirm; /* generated by server */ | 227 | nfs4_verifier cl_confirm; /* generated by server */ |
223 | u32 cl_firststate; /* recovery dir creation */ | 228 | u32 cl_firststate; /* recovery dir creation */ |
229 | u32 cl_minorversion; | ||
224 | 230 | ||
225 | /* for v4.0 and v4.1 callbacks: */ | 231 | /* for v4.0 and v4.1 callbacks: */ |
226 | struct nfs4_cb_conn cl_cb_conn; | 232 | struct nfs4_cb_conn cl_cb_conn; |
233 | #define NFSD4_CLIENT_CB_UPDATE 1 | ||
234 | #define NFSD4_CLIENT_KILL 2 | ||
235 | unsigned long cl_cb_flags; | ||
227 | struct rpc_clnt *cl_cb_client; | 236 | struct rpc_clnt *cl_cb_client; |
237 | u32 cl_cb_ident; | ||
228 | atomic_t cl_cb_set; | 238 | atomic_t cl_cb_set; |
239 | struct nfsd4_callback cl_cb_null; | ||
240 | struct nfsd4_session *cl_cb_session; | ||
241 | |||
242 | /* for all client information that callback code might need: */ | ||
243 | spinlock_t cl_lock; | ||
229 | 244 | ||
230 | /* for nfs41 */ | 245 | /* for nfs41 */ |
231 | struct list_head cl_sessions; | 246 | struct list_head cl_sessions; |
232 | struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ | 247 | struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */ |
233 | u32 cl_exchange_flags; | 248 | u32 cl_exchange_flags; |
234 | struct nfs4_sessionid cl_sessionid; | ||
235 | /* number of rpc's in progress over an associated session: */ | 249 | /* number of rpc's in progress over an associated session: */ |
236 | atomic_t cl_refcount; | 250 | atomic_t cl_refcount; |
237 | 251 | ||
238 | /* for nfs41 callbacks */ | 252 | /* for nfs41 callbacks */ |
239 | /* We currently support a single back channel with a single slot */ | 253 | /* We currently support a single back channel with a single slot */ |
240 | unsigned long cl_cb_slot_busy; | 254 | unsigned long cl_cb_slot_busy; |
241 | u32 cl_cb_seq_nr; | ||
242 | struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ | 255 | struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ |
243 | /* wait here for slots */ | 256 | /* wait here for slots */ |
244 | }; | 257 | }; |
@@ -440,12 +453,13 @@ extern int nfs4_in_grace(void); | |||
440 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid); | 453 | extern __be32 nfs4_check_open_reclaim(clientid_t *clid); |
441 | extern void nfs4_free_stateowner(struct kref *kref); | 454 | extern void nfs4_free_stateowner(struct kref *kref); |
442 | extern int set_callback_cred(void); | 455 | extern int set_callback_cred(void); |
443 | extern void nfsd4_probe_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); | 456 | extern void nfsd4_probe_callback(struct nfs4_client *clp); |
457 | extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); | ||
444 | extern void nfsd4_do_callback_rpc(struct work_struct *); | 458 | extern void nfsd4_do_callback_rpc(struct work_struct *); |
445 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); | 459 | extern void nfsd4_cb_recall(struct nfs4_delegation *dp); |
446 | extern int nfsd4_create_callback_queue(void); | 460 | extern int nfsd4_create_callback_queue(void); |
447 | extern void nfsd4_destroy_callback_queue(void); | 461 | extern void nfsd4_destroy_callback_queue(void); |
448 | extern void nfsd4_set_callback_client(struct nfs4_client *, struct rpc_clnt *); | 462 | extern void nfsd4_shutdown_callback(struct nfs4_client *); |
449 | extern void nfs4_put_delegation(struct nfs4_delegation *dp); | 463 | extern void nfs4_put_delegation(struct nfs4_delegation *dp); |
450 | extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); | 464 | extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname); |
451 | extern void nfsd4_init_recdir(char *recdir_name); | 465 | extern void nfsd4_init_recdir(char *recdir_name); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 661a6cf8e826..184938fcff04 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp) | |||
281 | { | 281 | { |
282 | struct inode *inode = fhp->fh_dentry->d_inode; | 282 | struct inode *inode = fhp->fh_dentry->d_inode; |
283 | const struct export_operations *export_ops = inode->i_sb->s_export_op; | 283 | const struct export_operations *export_ops = inode->i_sb->s_export_op; |
284 | int error = 0; | ||
285 | 284 | ||
286 | if (!EX_ISSYNC(fhp->fh_export)) | 285 | if (!EX_ISSYNC(fhp->fh_export)) |
287 | return 0; | 286 | return 0; |
288 | 287 | ||
289 | if (export_ops->commit_metadata) { | 288 | if (export_ops->commit_metadata) |
290 | error = export_ops->commit_metadata(inode); | 289 | return export_ops->commit_metadata(inode); |
291 | } else { | 290 | return sync_inode_metadata(inode, 1); |
292 | struct writeback_control wbc = { | ||
293 | .sync_mode = WB_SYNC_ALL, | ||
294 | .nr_to_write = 0, /* metadata only */ | ||
295 | }; | ||
296 | |||
297 | error = sync_inode(inode, &wbc); | ||
298 | } | ||
299 | |||
300 | return error; | ||
301 | } | 291 | } |
302 | 292 | ||
303 | /* | 293 | /* |
diff --git a/fs/nilfs2/Makefile b/fs/nilfs2/Makefile index df3e62c1ddc5..85c98737a146 100644 --- a/fs/nilfs2/Makefile +++ b/fs/nilfs2/Makefile | |||
@@ -2,4 +2,4 @@ obj-$(CONFIG_NILFS2_FS) += nilfs2.o | |||
2 | nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ | 2 | nilfs2-y := inode.o file.o dir.o super.o namei.o page.o mdt.o \ |
3 | btnode.o bmap.o btree.o direct.o dat.o recovery.o \ | 3 | btnode.o bmap.o btree.o direct.o dat.o recovery.o \ |
4 | the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ | 4 | the_nilfs.o segbuf.o segment.o cpfile.o sufile.o \ |
5 | ifile.o alloc.o gcinode.o ioctl.o gcdat.o | 5 | ifile.o alloc.o gcinode.o ioctl.o |
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c index 3dbdc1d356bf..8b782b062baa 100644 --- a/fs/nilfs2/bmap.c +++ b/fs/nilfs2/bmap.c | |||
@@ -533,18 +533,20 @@ void nilfs_bmap_init_gc(struct nilfs_bmap *bmap) | |||
533 | nilfs_btree_init_gc(bmap); | 533 | nilfs_btree_init_gc(bmap); |
534 | } | 534 | } |
535 | 535 | ||
536 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | 536 | void nilfs_bmap_save(const struct nilfs_bmap *bmap, |
537 | struct nilfs_bmap_store *store) | ||
537 | { | 538 | { |
538 | memcpy(gcbmap, bmap, sizeof(*bmap)); | 539 | memcpy(store->data, bmap->b_u.u_data, sizeof(store->data)); |
539 | init_rwsem(&gcbmap->b_sem); | 540 | store->last_allocated_key = bmap->b_last_allocated_key; |
540 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | 541 | store->last_allocated_ptr = bmap->b_last_allocated_ptr; |
541 | gcbmap->b_inode = &NILFS_BMAP_I(gcbmap)->vfs_inode; | 542 | store->state = bmap->b_state; |
542 | } | 543 | } |
543 | 544 | ||
544 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *gcbmap, struct nilfs_bmap *bmap) | 545 | void nilfs_bmap_restore(struct nilfs_bmap *bmap, |
546 | const struct nilfs_bmap_store *store) | ||
545 | { | 547 | { |
546 | memcpy(bmap, gcbmap, sizeof(*bmap)); | 548 | memcpy(bmap->b_u.u_data, store->data, sizeof(store->data)); |
547 | init_rwsem(&bmap->b_sem); | 549 | bmap->b_last_allocated_key = store->last_allocated_key; |
548 | lockdep_set_class(&bmap->b_sem, &nilfs_bmap_dat_lock_key); | 550 | bmap->b_last_allocated_ptr = store->last_allocated_ptr; |
549 | bmap->b_inode = &NILFS_BMAP_I(bmap)->vfs_inode; | 551 | bmap->b_state = store->state; |
550 | } | 552 | } |
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h index a20569b19929..bde1c0aa2e15 100644 --- a/fs/nilfs2/bmap.h +++ b/fs/nilfs2/bmap.h | |||
@@ -135,6 +135,12 @@ struct nilfs_bmap { | |||
135 | /* state */ | 135 | /* state */ |
136 | #define NILFS_BMAP_DIRTY 0x00000001 | 136 | #define NILFS_BMAP_DIRTY 0x00000001 |
137 | 137 | ||
138 | struct nilfs_bmap_store { | ||
139 | __le64 data[NILFS_BMAP_SIZE / sizeof(__le64)]; | ||
140 | __u64 last_allocated_key; | ||
141 | __u64 last_allocated_ptr; | ||
142 | int state; | ||
143 | }; | ||
138 | 144 | ||
139 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); | 145 | int nilfs_bmap_test_and_clear_dirty(struct nilfs_bmap *); |
140 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); | 146 | int nilfs_bmap_read(struct nilfs_bmap *, struct nilfs_inode *); |
@@ -153,9 +159,9 @@ int nilfs_bmap_lookup_at_level(struct nilfs_bmap *, __u64, int, __u64 *); | |||
153 | int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); | 159 | int nilfs_bmap_mark(struct nilfs_bmap *, __u64, int); |
154 | 160 | ||
155 | void nilfs_bmap_init_gc(struct nilfs_bmap *); | 161 | void nilfs_bmap_init_gc(struct nilfs_bmap *); |
156 | void nilfs_bmap_init_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
157 | void nilfs_bmap_commit_gcdat(struct nilfs_bmap *, struct nilfs_bmap *); | ||
158 | 162 | ||
163 | void nilfs_bmap_save(const struct nilfs_bmap *, struct nilfs_bmap_store *); | ||
164 | void nilfs_bmap_restore(struct nilfs_bmap *, const struct nilfs_bmap_store *); | ||
159 | 165 | ||
160 | static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, | 166 | static inline int nilfs_bmap_lookup(struct nilfs_bmap *bmap, __u64 key, |
161 | __u64 *ptr) | 167 | __u64 *ptr) |
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index f78ab1044d1d..5115814cb745 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c | |||
@@ -37,15 +37,7 @@ | |||
37 | 37 | ||
38 | void nilfs_btnode_cache_init_once(struct address_space *btnc) | 38 | void nilfs_btnode_cache_init_once(struct address_space *btnc) |
39 | { | 39 | { |
40 | memset(btnc, 0, sizeof(*btnc)); | 40 | nilfs_mapping_init_once(btnc); |
41 | INIT_RADIX_TREE(&btnc->page_tree, GFP_ATOMIC); | ||
42 | spin_lock_init(&btnc->tree_lock); | ||
43 | INIT_LIST_HEAD(&btnc->private_list); | ||
44 | spin_lock_init(&btnc->private_lock); | ||
45 | |||
46 | spin_lock_init(&btnc->i_mmap_lock); | ||
47 | INIT_RAW_PRIO_TREE_ROOT(&btnc->i_mmap); | ||
48 | INIT_LIST_HEAD(&btnc->i_mmap_nonlinear); | ||
49 | } | 41 | } |
50 | 42 | ||
51 | static const struct address_space_operations def_btnode_aops = { | 43 | static const struct address_space_operations def_btnode_aops = { |
@@ -55,12 +47,7 @@ static const struct address_space_operations def_btnode_aops = { | |||
55 | void nilfs_btnode_cache_init(struct address_space *btnc, | 47 | void nilfs_btnode_cache_init(struct address_space *btnc, |
56 | struct backing_dev_info *bdi) | 48 | struct backing_dev_info *bdi) |
57 | { | 49 | { |
58 | btnc->host = NULL; /* can safely set to host inode ? */ | 50 | nilfs_mapping_init(btnc, bdi, &def_btnode_aops); |
59 | btnc->flags = 0; | ||
60 | mapping_set_gfp_mask(btnc, GFP_NOFS); | ||
61 | btnc->assoc_mapping = NULL; | ||
62 | btnc->backing_dev_info = bdi; | ||
63 | btnc->a_ops = &def_btnode_aops; | ||
64 | } | 51 | } |
65 | 52 | ||
66 | void nilfs_btnode_cache_clear(struct address_space *btnc) | 53 | void nilfs_btnode_cache_clear(struct address_space *btnc) |
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index 18737818db63..5ff15a8a1024 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c | |||
@@ -863,26 +863,19 @@ int nilfs_cpfile_is_snapshot(struct inode *cpfile, __u64 cno) | |||
863 | */ | 863 | */ |
864 | int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) | 864 | int nilfs_cpfile_change_cpmode(struct inode *cpfile, __u64 cno, int mode) |
865 | { | 865 | { |
866 | struct the_nilfs *nilfs; | ||
867 | int ret; | 866 | int ret; |
868 | 867 | ||
869 | nilfs = NILFS_MDT(cpfile)->mi_nilfs; | ||
870 | |||
871 | switch (mode) { | 868 | switch (mode) { |
872 | case NILFS_CHECKPOINT: | 869 | case NILFS_CHECKPOINT: |
873 | /* | 870 | if (nilfs_checkpoint_is_mounted(cpfile->i_sb, cno)) |
874 | * Check for protecting existing snapshot mounts: | 871 | /* |
875 | * ns_mount_mutex is used to make this operation atomic and | 872 | * Current implementation does not have to protect |
876 | * exclusive with a new mount job. Though it doesn't cover | 873 | * plain read-only mounts since they are exclusive |
877 | * umount, it's enough for the purpose. | 874 | * with a read/write mount and are protected from the |
878 | */ | 875 | * cleaner. |
879 | if (nilfs_checkpoint_is_mounted(nilfs, cno, 1)) { | 876 | */ |
880 | /* Current implementation does not have to protect | ||
881 | plain read-only mounts since they are exclusive | ||
882 | with a read/write mount and are protected from the | ||
883 | cleaner. */ | ||
884 | ret = -EBUSY; | 877 | ret = -EBUSY; |
885 | } else | 878 | else |
886 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); | 879 | ret = nilfs_cpfile_clear_snapshot(cpfile, cno); |
887 | return ret; | 880 | return ret; |
888 | case NILFS_SNAPSHOT: | 881 | case NILFS_SNAPSHOT: |
@@ -933,27 +926,40 @@ int nilfs_cpfile_get_stat(struct inode *cpfile, struct nilfs_cpstat *cpstat) | |||
933 | } | 926 | } |
934 | 927 | ||
935 | /** | 928 | /** |
936 | * nilfs_cpfile_read - read cpfile inode | 929 | * nilfs_cpfile_read - read or get cpfile inode |
937 | * @cpfile: cpfile inode | 930 | * @sb: super block instance |
938 | * @raw_inode: on-disk cpfile inode | ||
939 | */ | ||
940 | int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode) | ||
941 | { | ||
942 | return nilfs_read_inode_common(cpfile, raw_inode); | ||
943 | } | ||
944 | |||
945 | /** | ||
946 | * nilfs_cpfile_new - create cpfile | ||
947 | * @nilfs: nilfs object | ||
948 | * @cpsize: size of a checkpoint entry | 931 | * @cpsize: size of a checkpoint entry |
932 | * @raw_inode: on-disk cpfile inode | ||
933 | * @inodep: buffer to store the inode | ||
949 | */ | 934 | */ |
950 | struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize) | 935 | int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, |
936 | struct nilfs_inode *raw_inode, struct inode **inodep) | ||
951 | { | 937 | { |
952 | struct inode *cpfile; | 938 | struct inode *cpfile; |
939 | int err; | ||
940 | |||
941 | cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); | ||
942 | if (unlikely(!cpfile)) | ||
943 | return -ENOMEM; | ||
944 | if (!(cpfile->i_state & I_NEW)) | ||
945 | goto out; | ||
946 | |||
947 | err = nilfs_mdt_init(cpfile, NILFS_MDT_GFP, 0); | ||
948 | if (err) | ||
949 | goto failed; | ||
953 | 950 | ||
954 | cpfile = nilfs_mdt_new(nilfs, NULL, NILFS_CPFILE_INO, 0); | 951 | nilfs_mdt_set_entry_size(cpfile, cpsize, |
955 | if (cpfile) | 952 | sizeof(struct nilfs_cpfile_header)); |
956 | nilfs_mdt_set_entry_size(cpfile, cpsize, | 953 | |
957 | sizeof(struct nilfs_cpfile_header)); | 954 | err = nilfs_read_inode_common(cpfile, raw_inode); |
958 | return cpfile; | 955 | if (err) |
956 | goto failed; | ||
957 | |||
958 | unlock_new_inode(cpfile); | ||
959 | out: | ||
960 | *inodep = cpfile; | ||
961 | return 0; | ||
962 | failed: | ||
963 | iget_failed(cpfile); | ||
964 | return err; | ||
959 | } | 965 | } |
diff --git a/fs/nilfs2/cpfile.h b/fs/nilfs2/cpfile.h index bc0809e0ab43..a242b9a314f9 100644 --- a/fs/nilfs2/cpfile.h +++ b/fs/nilfs2/cpfile.h | |||
@@ -40,7 +40,7 @@ int nilfs_cpfile_get_stat(struct inode *, struct nilfs_cpstat *); | |||
40 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, | 40 | ssize_t nilfs_cpfile_get_cpinfo(struct inode *, __u64 *, int, void *, unsigned, |
41 | size_t); | 41 | size_t); |
42 | 42 | ||
43 | int nilfs_cpfile_read(struct inode *cpfile, struct nilfs_inode *raw_inode); | 43 | int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, |
44 | struct inode *nilfs_cpfile_new(struct the_nilfs *nilfs, size_t cpsize); | 44 | struct nilfs_inode *raw_inode, struct inode **inodep); |
45 | 45 | ||
46 | #endif /* _NILFS_CPFILE_H */ | 46 | #endif /* _NILFS_CPFILE_H */ |
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index 013146755683..49c844dab33a 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c | |||
@@ -36,6 +36,7 @@ | |||
36 | struct nilfs_dat_info { | 36 | struct nilfs_dat_info { |
37 | struct nilfs_mdt_info mi; | 37 | struct nilfs_mdt_info mi; |
38 | struct nilfs_palloc_cache palloc_cache; | 38 | struct nilfs_palloc_cache palloc_cache; |
39 | struct nilfs_shadow_map shadow; | ||
39 | }; | 40 | }; |
40 | 41 | ||
41 | static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) | 42 | static inline struct nilfs_dat_info *NILFS_DAT_I(struct inode *dat) |
@@ -102,7 +103,8 @@ void nilfs_dat_abort_alloc(struct inode *dat, struct nilfs_palloc_req *req) | |||
102 | nilfs_palloc_abort_alloc_entry(dat, req); | 103 | nilfs_palloc_abort_alloc_entry(dat, req); |
103 | } | 104 | } |
104 | 105 | ||
105 | void nilfs_dat_commit_free(struct inode *dat, struct nilfs_palloc_req *req) | 106 | static void nilfs_dat_commit_free(struct inode *dat, |
107 | struct nilfs_palloc_req *req) | ||
106 | { | 108 | { |
107 | struct nilfs_dat_entry *entry; | 109 | struct nilfs_dat_entry *entry; |
108 | void *kaddr; | 110 | void *kaddr; |
@@ -327,6 +329,23 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) | |||
327 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); | 329 | ret = nilfs_palloc_get_entry_block(dat, vblocknr, 0, &entry_bh); |
328 | if (ret < 0) | 330 | if (ret < 0) |
329 | return ret; | 331 | return ret; |
332 | |||
333 | /* | ||
334 | * The given disk block number (blocknr) is not yet written to | ||
335 | * the device at this point. | ||
336 | * | ||
337 | * To prevent nilfs_dat_translate() from returning the | ||
338 | * uncommited block number, this makes a copy of the entry | ||
339 | * buffer and redirects nilfs_dat_translate() to the copy. | ||
340 | */ | ||
341 | if (!buffer_nilfs_redirected(entry_bh)) { | ||
342 | ret = nilfs_mdt_freeze_buffer(dat, entry_bh); | ||
343 | if (ret) { | ||
344 | brelse(entry_bh); | ||
345 | return ret; | ||
346 | } | ||
347 | } | ||
348 | |||
330 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | 349 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); |
331 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | 350 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); |
332 | if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { | 351 | if (unlikely(entry->de_blocknr == cpu_to_le64(0))) { |
@@ -371,7 +390,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr) | |||
371 | */ | 390 | */ |
372 | int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) | 391 | int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) |
373 | { | 392 | { |
374 | struct buffer_head *entry_bh; | 393 | struct buffer_head *entry_bh, *bh; |
375 | struct nilfs_dat_entry *entry; | 394 | struct nilfs_dat_entry *entry; |
376 | sector_t blocknr; | 395 | sector_t blocknr; |
377 | void *kaddr; | 396 | void *kaddr; |
@@ -381,6 +400,15 @@ int nilfs_dat_translate(struct inode *dat, __u64 vblocknr, sector_t *blocknrp) | |||
381 | if (ret < 0) | 400 | if (ret < 0) |
382 | return ret; | 401 | return ret; |
383 | 402 | ||
403 | if (!nilfs_doing_gc() && buffer_nilfs_redirected(entry_bh)) { | ||
404 | bh = nilfs_mdt_get_frozen_buffer(dat, entry_bh); | ||
405 | if (bh) { | ||
406 | WARN_ON(!buffer_uptodate(bh)); | ||
407 | brelse(entry_bh); | ||
408 | entry_bh = bh; | ||
409 | } | ||
410 | } | ||
411 | |||
384 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); | 412 | kaddr = kmap_atomic(entry_bh->b_page, KM_USER0); |
385 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); | 413 | entry = nilfs_palloc_block_get_entry(dat, vblocknr, entry_bh, kaddr); |
386 | blocknr = le64_to_cpu(entry->de_blocknr); | 414 | blocknr = le64_to_cpu(entry->de_blocknr); |
@@ -436,38 +464,48 @@ ssize_t nilfs_dat_get_vinfo(struct inode *dat, void *buf, unsigned visz, | |||
436 | } | 464 | } |
437 | 465 | ||
438 | /** | 466 | /** |
439 | * nilfs_dat_read - read dat inode | 467 | * nilfs_dat_read - read or get dat inode |
440 | * @dat: dat inode | 468 | * @sb: super block instance |
441 | * @raw_inode: on-disk dat inode | ||
442 | */ | ||
443 | int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode) | ||
444 | { | ||
445 | return nilfs_read_inode_common(dat, raw_inode); | ||
446 | } | ||
447 | |||
448 | /** | ||
449 | * nilfs_dat_new - create dat file | ||
450 | * @nilfs: nilfs object | ||
451 | * @entry_size: size of a dat entry | 469 | * @entry_size: size of a dat entry |
470 | * @raw_inode: on-disk dat inode | ||
471 | * @inodep: buffer to store the inode | ||
452 | */ | 472 | */ |
453 | struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size) | 473 | int nilfs_dat_read(struct super_block *sb, size_t entry_size, |
474 | struct nilfs_inode *raw_inode, struct inode **inodep) | ||
454 | { | 475 | { |
455 | static struct lock_class_key dat_lock_key; | 476 | static struct lock_class_key dat_lock_key; |
456 | struct inode *dat; | 477 | struct inode *dat; |
457 | struct nilfs_dat_info *di; | 478 | struct nilfs_dat_info *di; |
458 | int err; | 479 | int err; |
459 | 480 | ||
460 | dat = nilfs_mdt_new(nilfs, NULL, NILFS_DAT_INO, sizeof(*di)); | 481 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); |
461 | if (dat) { | 482 | if (unlikely(!dat)) |
462 | err = nilfs_palloc_init_blockgroup(dat, entry_size); | 483 | return -ENOMEM; |
463 | if (unlikely(err)) { | 484 | if (!(dat->i_state & I_NEW)) |
464 | nilfs_mdt_destroy(dat); | 485 | goto out; |
465 | return NULL; | ||
466 | } | ||
467 | 486 | ||
468 | di = NILFS_DAT_I(dat); | 487 | err = nilfs_mdt_init(dat, NILFS_MDT_GFP, sizeof(*di)); |
469 | lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); | 488 | if (err) |
470 | nilfs_palloc_setup_cache(dat, &di->palloc_cache); | 489 | goto failed; |
471 | } | 490 | |
472 | return dat; | 491 | err = nilfs_palloc_init_blockgroup(dat, entry_size); |
492 | if (err) | ||
493 | goto failed; | ||
494 | |||
495 | di = NILFS_DAT_I(dat); | ||
496 | lockdep_set_class(&di->mi.mi_sem, &dat_lock_key); | ||
497 | nilfs_palloc_setup_cache(dat, &di->palloc_cache); | ||
498 | nilfs_mdt_setup_shadow_map(dat, &di->shadow); | ||
499 | |||
500 | err = nilfs_read_inode_common(dat, raw_inode); | ||
501 | if (err) | ||
502 | goto failed; | ||
503 | |||
504 | unlock_new_inode(dat); | ||
505 | out: | ||
506 | *inodep = dat; | ||
507 | return 0; | ||
508 | failed: | ||
509 | iget_failed(dat); | ||
510 | return err; | ||
473 | } | 511 | } |
diff --git a/fs/nilfs2/dat.h b/fs/nilfs2/dat.h index d31c3aab0efe..cbd8e9732503 100644 --- a/fs/nilfs2/dat.h +++ b/fs/nilfs2/dat.h | |||
@@ -53,7 +53,7 @@ int nilfs_dat_freev(struct inode *, __u64 *, size_t); | |||
53 | int nilfs_dat_move(struct inode *, __u64, sector_t); | 53 | int nilfs_dat_move(struct inode *, __u64, sector_t); |
54 | ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); | 54 | ssize_t nilfs_dat_get_vinfo(struct inode *, void *, unsigned, size_t); |
55 | 55 | ||
56 | int nilfs_dat_read(struct inode *dat, struct nilfs_inode *raw_inode); | 56 | int nilfs_dat_read(struct super_block *sb, size_t entry_size, |
57 | struct inode *nilfs_dat_new(struct the_nilfs *nilfs, size_t entry_size); | 57 | struct nilfs_inode *raw_inode, struct inode **inodep); |
58 | 58 | ||
59 | #endif /* _NILFS_DAT_H */ | 59 | #endif /* _NILFS_DAT_H */ |
diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h new file mode 100644 index 000000000000..a71cc412b651 --- /dev/null +++ b/fs/nilfs2/export.h | |||
@@ -0,0 +1,17 @@ | |||
1 | #ifndef NILFS_EXPORT_H | ||
2 | #define NILFS_EXPORT_H | ||
3 | |||
4 | #include <linux/exportfs.h> | ||
5 | |||
6 | extern const struct export_operations nilfs_export_ops; | ||
7 | |||
8 | struct nilfs_fid { | ||
9 | u64 cno; | ||
10 | u64 ino; | ||
11 | u32 gen; | ||
12 | |||
13 | u32 parent_gen; | ||
14 | u64 parent_ino; | ||
15 | } __attribute__ ((packed)); | ||
16 | |||
17 | #endif | ||
diff --git a/fs/nilfs2/gcdat.c b/fs/nilfs2/gcdat.c deleted file mode 100644 index 84a45d1d5464..000000000000 --- a/fs/nilfs2/gcdat.c +++ /dev/null | |||
@@ -1,87 +0,0 @@ | |||
1 | /* | ||
2 | * gcdat.c - NILFS shadow DAT inode for GC | ||
3 | * | ||
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, | ||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
14 | * GNU General Public License for more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License | ||
17 | * along with this program; if not, write to the Free Software | ||
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
19 | * | ||
20 | * Written by Seiji Kihara <kihara@osrg.net>, Amagai Yoshiji <amagai@osrg.net>, | ||
21 | * and Ryusuke Konishi <ryusuke@osrg.net>. | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | #include <linux/buffer_head.h> | ||
26 | #include "nilfs.h" | ||
27 | #include "page.h" | ||
28 | #include "mdt.h" | ||
29 | |||
30 | int nilfs_init_gcdat_inode(struct the_nilfs *nilfs) | ||
31 | { | ||
32 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
33 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
34 | int err; | ||
35 | |||
36 | gcdat->i_state = 0; | ||
37 | gcdat->i_blocks = dat->i_blocks; | ||
38 | gii->i_flags = dii->i_flags; | ||
39 | gii->i_state = dii->i_state | (1 << NILFS_I_GCDAT); | ||
40 | gii->i_cno = 0; | ||
41 | nilfs_bmap_init_gcdat(gii->i_bmap, dii->i_bmap); | ||
42 | err = nilfs_copy_dirty_pages(gcdat->i_mapping, dat->i_mapping); | ||
43 | if (unlikely(err)) | ||
44 | return err; | ||
45 | |||
46 | return nilfs_copy_dirty_pages(&gii->i_btnode_cache, | ||
47 | &dii->i_btnode_cache); | ||
48 | } | ||
49 | |||
50 | void nilfs_commit_gcdat_inode(struct the_nilfs *nilfs) | ||
51 | { | ||
52 | struct inode *dat = nilfs->ns_dat, *gcdat = nilfs->ns_gc_dat; | ||
53 | struct nilfs_inode_info *dii = NILFS_I(dat), *gii = NILFS_I(gcdat); | ||
54 | struct address_space *mapping = dat->i_mapping; | ||
55 | struct address_space *gmapping = gcdat->i_mapping; | ||
56 | |||
57 | down_write(&NILFS_MDT(dat)->mi_sem); | ||
58 | dat->i_blocks = gcdat->i_blocks; | ||
59 | dii->i_flags = gii->i_flags; | ||
60 | dii->i_state = gii->i_state & ~(1 << NILFS_I_GCDAT); | ||
61 | |||
62 | nilfs_bmap_commit_gcdat(gii->i_bmap, dii->i_bmap); | ||
63 | |||
64 | nilfs_palloc_clear_cache(dat); | ||
65 | nilfs_palloc_clear_cache(gcdat); | ||
66 | nilfs_clear_dirty_pages(mapping); | ||
67 | nilfs_copy_back_pages(mapping, gmapping); | ||
68 | /* note: mdt dirty flags should be cleared by segctor. */ | ||
69 | |||
70 | nilfs_clear_dirty_pages(&dii->i_btnode_cache); | ||
71 | nilfs_copy_back_pages(&dii->i_btnode_cache, &gii->i_btnode_cache); | ||
72 | |||
73 | up_write(&NILFS_MDT(dat)->mi_sem); | ||
74 | } | ||
75 | |||
76 | void nilfs_clear_gcdat_inode(struct the_nilfs *nilfs) | ||
77 | { | ||
78 | struct inode *gcdat = nilfs->ns_gc_dat; | ||
79 | struct nilfs_inode_info *gii = NILFS_I(gcdat); | ||
80 | |||
81 | gcdat->i_state = I_FREEING | I_CLEAR; | ||
82 | gii->i_flags = 0; | ||
83 | |||
84 | nilfs_palloc_clear_cache(gcdat); | ||
85 | truncate_inode_pages(gcdat->i_mapping, 0); | ||
86 | truncate_inode_pages(&gii->i_btnode_cache, 0); | ||
87 | } | ||
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index bed3a783129b..33ad25ddd5c4 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c | |||
@@ -28,13 +28,6 @@ | |||
28 | * gcinodes), and this file provides lookup function of the dummy | 28 | * gcinodes), and this file provides lookup function of the dummy |
29 | * inodes and their buffer read function. | 29 | * inodes and their buffer read function. |
30 | * | 30 | * |
31 | * Since NILFS2 keeps up multiple checkpoints/snapshots across GC, it | ||
32 | * has to treat blocks that belong to a same file but have different | ||
33 | * checkpoint numbers. To avoid interference among generations, dummy | ||
34 | * inodes are managed separately from actual inodes, and their lookup | ||
35 | * function (nilfs_gc_iget) is designed to be specified with a | ||
36 | * checkpoint number argument as well as an inode number. | ||
37 | * | ||
38 | * Buffers and pages held by the dummy inodes will be released each | 31 | * Buffers and pages held by the dummy inodes will be released each |
39 | * time after they are copied to a new log. Dirty blocks made on the | 32 | * time after they are copied to a new log. Dirty blocks made on the |
40 | * current generation and the blocks to be moved by GC never overlap | 33 | * current generation and the blocks to be moved by GC never overlap |
@@ -175,125 +168,46 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh) | |||
175 | } | 168 | } |
176 | nilfs_btnode_mark_dirty(bh); | 169 | nilfs_btnode_mark_dirty(bh); |
177 | } else { | 170 | } else { |
178 | nilfs_mdt_mark_buffer_dirty(bh); | 171 | nilfs_mark_buffer_dirty(bh); |
179 | } | 172 | } |
180 | return 0; | 173 | return 0; |
181 | } | 174 | } |
182 | 175 | ||
183 | /* | 176 | int nilfs_init_gcinode(struct inode *inode) |
184 | * nilfs_init_gccache() - allocate and initialize gc_inode hash table | ||
185 | * @nilfs - the_nilfs | ||
186 | * | ||
187 | * Return Value: On success, 0. | ||
188 | * On error, a negative error code is returned. | ||
189 | */ | ||
190 | int nilfs_init_gccache(struct the_nilfs *nilfs) | ||
191 | { | 177 | { |
192 | int loop; | 178 | struct nilfs_inode_info *ii = NILFS_I(inode); |
193 | 179 | struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs; | |
194 | BUG_ON(nilfs->ns_gc_inodes_h); | ||
195 | |||
196 | INIT_LIST_HEAD(&nilfs->ns_gc_inodes); | ||
197 | |||
198 | nilfs->ns_gc_inodes_h = | ||
199 | kmalloc(sizeof(struct hlist_head) * NILFS_GCINODE_HASH_SIZE, | ||
200 | GFP_NOFS); | ||
201 | if (nilfs->ns_gc_inodes_h == NULL) | ||
202 | return -ENOMEM; | ||
203 | |||
204 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++) | ||
205 | INIT_HLIST_HEAD(&nilfs->ns_gc_inodes_h[loop]); | ||
206 | return 0; | ||
207 | } | ||
208 | |||
209 | /* | ||
210 | * nilfs_destroy_gccache() - free gc_inode hash table | ||
211 | * @nilfs - the nilfs | ||
212 | */ | ||
213 | void nilfs_destroy_gccache(struct the_nilfs *nilfs) | ||
214 | { | ||
215 | if (nilfs->ns_gc_inodes_h) { | ||
216 | nilfs_remove_all_gcinode(nilfs); | ||
217 | kfree(nilfs->ns_gc_inodes_h); | ||
218 | nilfs->ns_gc_inodes_h = NULL; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | static struct inode *alloc_gcinode(struct the_nilfs *nilfs, ino_t ino, | ||
223 | __u64 cno) | ||
224 | { | ||
225 | struct inode *inode; | ||
226 | struct nilfs_inode_info *ii; | ||
227 | |||
228 | inode = nilfs_mdt_new_common(nilfs, NULL, ino, GFP_NOFS, 0); | ||
229 | if (!inode) | ||
230 | return NULL; | ||
231 | 180 | ||
232 | inode->i_op = NULL; | 181 | inode->i_mode = S_IFREG; |
233 | inode->i_fop = NULL; | 182 | mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); |
234 | inode->i_mapping->a_ops = &def_gcinode_aops; | 183 | inode->i_mapping->a_ops = &def_gcinode_aops; |
184 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; | ||
235 | 185 | ||
236 | ii = NILFS_I(inode); | ||
237 | ii->i_cno = cno; | ||
238 | ii->i_flags = 0; | 186 | ii->i_flags = 0; |
239 | ii->i_state = 1 << NILFS_I_GCINODE; | ||
240 | ii->i_bh = NULL; | ||
241 | nilfs_bmap_init_gc(ii->i_bmap); | 187 | nilfs_bmap_init_gc(ii->i_bmap); |
242 | 188 | ||
243 | return inode; | 189 | /* |
244 | } | 190 | * Add the inode to GC inode list. Garbage Collection |
245 | 191 | * is serialized and no two processes manipulate the | |
246 | static unsigned long ihash(ino_t ino, __u64 cno) | 192 | * list simultaneously. |
247 | { | 193 | */ |
248 | return hash_long((unsigned long)((ino << 2) + cno), | 194 | igrab(inode); |
249 | NILFS_GCINODE_HASH_BITS); | 195 | list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); |
250 | } | ||
251 | |||
252 | /* | ||
253 | * nilfs_gc_iget() - find or create gc inode with specified (ino,cno) | ||
254 | */ | ||
255 | struct inode *nilfs_gc_iget(struct the_nilfs *nilfs, ino_t ino, __u64 cno) | ||
256 | { | ||
257 | struct hlist_head *head = nilfs->ns_gc_inodes_h + ihash(ino, cno); | ||
258 | struct hlist_node *node; | ||
259 | struct inode *inode; | ||
260 | |||
261 | hlist_for_each_entry(inode, node, head, i_hash) { | ||
262 | if (inode->i_ino == ino && NILFS_I(inode)->i_cno == cno) | ||
263 | return inode; | ||
264 | } | ||
265 | 196 | ||
266 | inode = alloc_gcinode(nilfs, ino, cno); | 197 | return 0; |
267 | if (likely(inode)) { | ||
268 | hlist_add_head(&inode->i_hash, head); | ||
269 | list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes); | ||
270 | } | ||
271 | return inode; | ||
272 | } | ||
273 | |||
274 | /* | ||
275 | * nilfs_clear_gcinode() - clear and free a gc inode | ||
276 | */ | ||
277 | void nilfs_clear_gcinode(struct inode *inode) | ||
278 | { | ||
279 | nilfs_mdt_destroy(inode); | ||
280 | } | 198 | } |
281 | 199 | ||
282 | /* | 200 | /** |
283 | * nilfs_remove_all_gcinode() - remove all inodes from the_nilfs | 201 | * nilfs_remove_all_gcinodes() - remove all unprocessed gc inodes |
284 | */ | 202 | */ |
285 | void nilfs_remove_all_gcinode(struct the_nilfs *nilfs) | 203 | void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs) |
286 | { | 204 | { |
287 | struct hlist_head *head = nilfs->ns_gc_inodes_h; | 205 | struct list_head *head = &nilfs->ns_gc_inodes; |
288 | struct hlist_node *node, *n; | 206 | struct nilfs_inode_info *ii; |
289 | struct inode *inode; | ||
290 | int loop; | ||
291 | 207 | ||
292 | for (loop = 0; loop < NILFS_GCINODE_HASH_SIZE; loop++, head++) { | 208 | while (!list_empty(head)) { |
293 | hlist_for_each_entry_safe(inode, node, n, head, i_hash) { | 209 | ii = list_first_entry(head, struct nilfs_inode_info, i_dirty); |
294 | hlist_del_init(&inode->i_hash); | 210 | list_del_init(&ii->i_dirty); |
295 | list_del_init(&NILFS_I(inode)->i_dirty); | 211 | iput(&ii->vfs_inode); |
296 | nilfs_clear_gcinode(inode); /* might sleep */ | ||
297 | } | ||
298 | } | 212 | } |
299 | } | 213 | } |
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c index 922d9dd42c8f..9f8a2da67f90 100644 --- a/fs/nilfs2/ifile.c +++ b/fs/nilfs2/ifile.c | |||
@@ -161,25 +161,46 @@ int nilfs_ifile_get_inode_block(struct inode *ifile, ino_t ino, | |||
161 | } | 161 | } |
162 | 162 | ||
163 | /** | 163 | /** |
164 | * nilfs_ifile_new - create inode file | 164 | * nilfs_ifile_read - read or get ifile inode |
165 | * @sbi: nilfs_sb_info struct | 165 | * @sb: super block instance |
166 | * @root: root object | ||
166 | * @inode_size: size of an inode | 167 | * @inode_size: size of an inode |
168 | * @raw_inode: on-disk ifile inode | ||
169 | * @inodep: buffer to store the inode | ||
167 | */ | 170 | */ |
168 | struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size) | 171 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, |
172 | size_t inode_size, struct nilfs_inode *raw_inode, | ||
173 | struct inode **inodep) | ||
169 | { | 174 | { |
170 | struct inode *ifile; | 175 | struct inode *ifile; |
171 | int err; | 176 | int err; |
172 | 177 | ||
173 | ifile = nilfs_mdt_new(sbi->s_nilfs, sbi->s_super, NILFS_IFILE_INO, | 178 | ifile = nilfs_iget_locked(sb, root, NILFS_IFILE_INO); |
174 | sizeof(struct nilfs_ifile_info)); | 179 | if (unlikely(!ifile)) |
175 | if (ifile) { | 180 | return -ENOMEM; |
176 | err = nilfs_palloc_init_blockgroup(ifile, inode_size); | 181 | if (!(ifile->i_state & I_NEW)) |
177 | if (unlikely(err)) { | 182 | goto out; |
178 | nilfs_mdt_destroy(ifile); | 183 | |
179 | return NULL; | 184 | err = nilfs_mdt_init(ifile, NILFS_MDT_GFP, |
180 | } | 185 | sizeof(struct nilfs_ifile_info)); |
181 | nilfs_palloc_setup_cache(ifile, | 186 | if (err) |
182 | &NILFS_IFILE_I(ifile)->palloc_cache); | 187 | goto failed; |
183 | } | 188 | |
184 | return ifile; | 189 | err = nilfs_palloc_init_blockgroup(ifile, inode_size); |
190 | if (err) | ||
191 | goto failed; | ||
192 | |||
193 | nilfs_palloc_setup_cache(ifile, &NILFS_IFILE_I(ifile)->palloc_cache); | ||
194 | |||
195 | err = nilfs_read_inode_common(ifile, raw_inode); | ||
196 | if (err) | ||
197 | goto failed; | ||
198 | |||
199 | unlock_new_inode(ifile); | ||
200 | out: | ||
201 | *inodep = ifile; | ||
202 | return 0; | ||
203 | failed: | ||
204 | iget_failed(ifile); | ||
205 | return err; | ||
185 | } | 206 | } |
diff --git a/fs/nilfs2/ifile.h b/fs/nilfs2/ifile.h index cbca32e498f2..59b6f2b51df6 100644 --- a/fs/nilfs2/ifile.h +++ b/fs/nilfs2/ifile.h | |||
@@ -49,6 +49,8 @@ int nilfs_ifile_create_inode(struct inode *, ino_t *, struct buffer_head **); | |||
49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); | 49 | int nilfs_ifile_delete_inode(struct inode *, ino_t); |
50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); | 50 | int nilfs_ifile_get_inode_block(struct inode *, ino_t, struct buffer_head **); |
51 | 51 | ||
52 | struct inode *nilfs_ifile_new(struct nilfs_sb_info *sbi, size_t inode_size); | 52 | int nilfs_ifile_read(struct super_block *sb, struct nilfs_root *root, |
53 | size_t inode_size, struct nilfs_inode *raw_inode, | ||
54 | struct inode **inodep); | ||
53 | 55 | ||
54 | #endif /* _NILFS_IFILE_H */ | 56 | #endif /* _NILFS_IFILE_H */ |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index eccb2f2e2315..71d4bc8464e0 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -34,6 +34,12 @@ | |||
34 | #include "cpfile.h" | 34 | #include "cpfile.h" |
35 | #include "ifile.h" | 35 | #include "ifile.h" |
36 | 36 | ||
37 | struct nilfs_iget_args { | ||
38 | u64 ino; | ||
39 | __u64 cno; | ||
40 | struct nilfs_root *root; | ||
41 | int for_gc; | ||
42 | }; | ||
37 | 43 | ||
38 | /** | 44 | /** |
39 | * nilfs_get_block() - get a file block on the filesystem (callback function) | 45 | * nilfs_get_block() - get a file block on the filesystem (callback function) |
@@ -279,6 +285,7 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
279 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 285 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
280 | struct inode *inode; | 286 | struct inode *inode; |
281 | struct nilfs_inode_info *ii; | 287 | struct nilfs_inode_info *ii; |
288 | struct nilfs_root *root; | ||
282 | int err = -ENOMEM; | 289 | int err = -ENOMEM; |
283 | ino_t ino; | 290 | ino_t ino; |
284 | 291 | ||
@@ -289,15 +296,17 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
289 | mapping_set_gfp_mask(inode->i_mapping, | 296 | mapping_set_gfp_mask(inode->i_mapping, |
290 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); | 297 | mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS); |
291 | 298 | ||
299 | root = NILFS_I(dir)->i_root; | ||
292 | ii = NILFS_I(inode); | 300 | ii = NILFS_I(inode); |
293 | ii->i_state = 1 << NILFS_I_NEW; | 301 | ii->i_state = 1 << NILFS_I_NEW; |
302 | ii->i_root = root; | ||
294 | 303 | ||
295 | err = nilfs_ifile_create_inode(sbi->s_ifile, &ino, &ii->i_bh); | 304 | err = nilfs_ifile_create_inode(root->ifile, &ino, &ii->i_bh); |
296 | if (unlikely(err)) | 305 | if (unlikely(err)) |
297 | goto failed_ifile_create_inode; | 306 | goto failed_ifile_create_inode; |
298 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ | 307 | /* reference count of i_bh inherits from nilfs_mdt_read_block() */ |
299 | 308 | ||
300 | atomic_inc(&sbi->s_inodes_count); | 309 | atomic_inc(&root->inodes_count); |
301 | inode_init_owner(inode, dir, mode); | 310 | inode_init_owner(inode, dir, mode); |
302 | inode->i_ino = ino; | 311 | inode->i_ino = ino; |
303 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 312 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
@@ -320,7 +329,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
320 | /* ii->i_file_acl = 0; */ | 329 | /* ii->i_file_acl = 0; */ |
321 | /* ii->i_dir_acl = 0; */ | 330 | /* ii->i_dir_acl = 0; */ |
322 | ii->i_dir_start_lookup = 0; | 331 | ii->i_dir_start_lookup = 0; |
323 | ii->i_cno = 0; | ||
324 | nilfs_set_inode_flags(inode); | 332 | nilfs_set_inode_flags(inode); |
325 | spin_lock(&sbi->s_next_gen_lock); | 333 | spin_lock(&sbi->s_next_gen_lock); |
326 | inode->i_generation = sbi->s_next_generation++; | 334 | inode->i_generation = sbi->s_next_generation++; |
@@ -350,16 +358,6 @@ struct inode *nilfs_new_inode(struct inode *dir, int mode) | |||
350 | return ERR_PTR(err); | 358 | return ERR_PTR(err); |
351 | } | 359 | } |
352 | 360 | ||
353 | void nilfs_free_inode(struct inode *inode) | ||
354 | { | ||
355 | struct super_block *sb = inode->i_sb; | ||
356 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
357 | |||
358 | /* XXX: check error code? Is there any thing I can do? */ | ||
359 | (void) nilfs_ifile_delete_inode(sbi->s_ifile, inode->i_ino); | ||
360 | atomic_dec(&sbi->s_inodes_count); | ||
361 | } | ||
362 | |||
363 | void nilfs_set_inode_flags(struct inode *inode) | 361 | void nilfs_set_inode_flags(struct inode *inode) |
364 | { | 362 | { |
365 | unsigned int flags = NILFS_I(inode)->i_flags; | 363 | unsigned int flags = NILFS_I(inode)->i_flags; |
@@ -410,7 +408,6 @@ int nilfs_read_inode_common(struct inode *inode, | |||
410 | 0 : le32_to_cpu(raw_inode->i_dir_acl); | 408 | 0 : le32_to_cpu(raw_inode->i_dir_acl); |
411 | #endif | 409 | #endif |
412 | ii->i_dir_start_lookup = 0; | 410 | ii->i_dir_start_lookup = 0; |
413 | ii->i_cno = 0; | ||
414 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); | 411 | inode->i_generation = le32_to_cpu(raw_inode->i_generation); |
415 | 412 | ||
416 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 413 | if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
@@ -424,7 +421,8 @@ int nilfs_read_inode_common(struct inode *inode, | |||
424 | return 0; | 421 | return 0; |
425 | } | 422 | } |
426 | 423 | ||
427 | static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | 424 | static int __nilfs_read_inode(struct super_block *sb, |
425 | struct nilfs_root *root, unsigned long ino, | ||
428 | struct inode *inode) | 426 | struct inode *inode) |
429 | { | 427 | { |
430 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 428 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
@@ -434,11 +432,11 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | |||
434 | int err; | 432 | int err; |
435 | 433 | ||
436 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | 434 | down_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ |
437 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, ino, &bh); | 435 | err = nilfs_ifile_get_inode_block(root->ifile, ino, &bh); |
438 | if (unlikely(err)) | 436 | if (unlikely(err)) |
439 | goto bad_inode; | 437 | goto bad_inode; |
440 | 438 | ||
441 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, bh); | 439 | raw_inode = nilfs_ifile_map_inode(root->ifile, ino, bh); |
442 | 440 | ||
443 | err = nilfs_read_inode_common(inode, raw_inode); | 441 | err = nilfs_read_inode_common(inode, raw_inode); |
444 | if (err) | 442 | if (err) |
@@ -461,14 +459,14 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | |||
461 | inode, inode->i_mode, | 459 | inode, inode->i_mode, |
462 | huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); | 460 | huge_decode_dev(le64_to_cpu(raw_inode->i_device_code))); |
463 | } | 461 | } |
464 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | 462 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
465 | brelse(bh); | 463 | brelse(bh); |
466 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ | 464 | up_read(&NILFS_MDT(dat)->mi_sem); /* XXX */ |
467 | nilfs_set_inode_flags(inode); | 465 | nilfs_set_inode_flags(inode); |
468 | return 0; | 466 | return 0; |
469 | 467 | ||
470 | failed_unmap: | 468 | failed_unmap: |
471 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, bh); | 469 | nilfs_ifile_unmap_inode(root->ifile, ino, bh); |
472 | brelse(bh); | 470 | brelse(bh); |
473 | 471 | ||
474 | bad_inode: | 472 | bad_inode: |
@@ -476,18 +474,95 @@ static int __nilfs_read_inode(struct super_block *sb, unsigned long ino, | |||
476 | return err; | 474 | return err; |
477 | } | 475 | } |
478 | 476 | ||
479 | struct inode *nilfs_iget(struct super_block *sb, unsigned long ino) | 477 | static int nilfs_iget_test(struct inode *inode, void *opaque) |
478 | { | ||
479 | struct nilfs_iget_args *args = opaque; | ||
480 | struct nilfs_inode_info *ii; | ||
481 | |||
482 | if (args->ino != inode->i_ino || args->root != NILFS_I(inode)->i_root) | ||
483 | return 0; | ||
484 | |||
485 | ii = NILFS_I(inode); | ||
486 | if (!test_bit(NILFS_I_GCINODE, &ii->i_state)) | ||
487 | return !args->for_gc; | ||
488 | |||
489 | return args->for_gc && args->cno == ii->i_cno; | ||
490 | } | ||
491 | |||
492 | static int nilfs_iget_set(struct inode *inode, void *opaque) | ||
493 | { | ||
494 | struct nilfs_iget_args *args = opaque; | ||
495 | |||
496 | inode->i_ino = args->ino; | ||
497 | if (args->for_gc) { | ||
498 | NILFS_I(inode)->i_state = 1 << NILFS_I_GCINODE; | ||
499 | NILFS_I(inode)->i_cno = args->cno; | ||
500 | NILFS_I(inode)->i_root = NULL; | ||
501 | } else { | ||
502 | if (args->root && args->ino == NILFS_ROOT_INO) | ||
503 | nilfs_get_root(args->root); | ||
504 | NILFS_I(inode)->i_root = args->root; | ||
505 | } | ||
506 | return 0; | ||
507 | } | ||
508 | |||
509 | struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, | ||
510 | unsigned long ino) | ||
511 | { | ||
512 | struct nilfs_iget_args args = { | ||
513 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 | ||
514 | }; | ||
515 | |||
516 | return ilookup5(sb, ino, nilfs_iget_test, &args); | ||
517 | } | ||
518 | |||
519 | struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, | ||
520 | unsigned long ino) | ||
521 | { | ||
522 | struct nilfs_iget_args args = { | ||
523 | .ino = ino, .root = root, .cno = 0, .for_gc = 0 | ||
524 | }; | ||
525 | |||
526 | return iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); | ||
527 | } | ||
528 | |||
529 | struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, | ||
530 | unsigned long ino) | ||
480 | { | 531 | { |
481 | struct inode *inode; | 532 | struct inode *inode; |
482 | int err; | 533 | int err; |
483 | 534 | ||
484 | inode = iget_locked(sb, ino); | 535 | inode = nilfs_iget_locked(sb, root, ino); |
485 | if (unlikely(!inode)) | 536 | if (unlikely(!inode)) |
486 | return ERR_PTR(-ENOMEM); | 537 | return ERR_PTR(-ENOMEM); |
487 | if (!(inode->i_state & I_NEW)) | 538 | if (!(inode->i_state & I_NEW)) |
488 | return inode; | 539 | return inode; |
489 | 540 | ||
490 | err = __nilfs_read_inode(sb, ino, inode); | 541 | err = __nilfs_read_inode(sb, root, ino, inode); |
542 | if (unlikely(err)) { | ||
543 | iget_failed(inode); | ||
544 | return ERR_PTR(err); | ||
545 | } | ||
546 | unlock_new_inode(inode); | ||
547 | return inode; | ||
548 | } | ||
549 | |||
550 | struct inode *nilfs_iget_for_gc(struct super_block *sb, unsigned long ino, | ||
551 | __u64 cno) | ||
552 | { | ||
553 | struct nilfs_iget_args args = { | ||
554 | .ino = ino, .root = NULL, .cno = cno, .for_gc = 1 | ||
555 | }; | ||
556 | struct inode *inode; | ||
557 | int err; | ||
558 | |||
559 | inode = iget5_locked(sb, ino, nilfs_iget_test, nilfs_iget_set, &args); | ||
560 | if (unlikely(!inode)) | ||
561 | return ERR_PTR(-ENOMEM); | ||
562 | if (!(inode->i_state & I_NEW)) | ||
563 | return inode; | ||
564 | |||
565 | err = nilfs_init_gcinode(inode); | ||
491 | if (unlikely(err)) { | 566 | if (unlikely(err)) { |
492 | iget_failed(inode); | 567 | iget_failed(inode); |
493 | return ERR_PTR(err); | 568 | return ERR_PTR(err); |
@@ -528,21 +603,20 @@ void nilfs_update_inode(struct inode *inode, struct buffer_head *ibh) | |||
528 | { | 603 | { |
529 | ino_t ino = inode->i_ino; | 604 | ino_t ino = inode->i_ino; |
530 | struct nilfs_inode_info *ii = NILFS_I(inode); | 605 | struct nilfs_inode_info *ii = NILFS_I(inode); |
531 | struct super_block *sb = inode->i_sb; | 606 | struct inode *ifile = ii->i_root->ifile; |
532 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
533 | struct nilfs_inode *raw_inode; | 607 | struct nilfs_inode *raw_inode; |
534 | 608 | ||
535 | raw_inode = nilfs_ifile_map_inode(sbi->s_ifile, ino, ibh); | 609 | raw_inode = nilfs_ifile_map_inode(ifile, ino, ibh); |
536 | 610 | ||
537 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) | 611 | if (test_and_clear_bit(NILFS_I_NEW, &ii->i_state)) |
538 | memset(raw_inode, 0, NILFS_MDT(sbi->s_ifile)->mi_entry_size); | 612 | memset(raw_inode, 0, NILFS_MDT(ifile)->mi_entry_size); |
539 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); | 613 | set_bit(NILFS_I_INODE_DIRTY, &ii->i_state); |
540 | 614 | ||
541 | nilfs_write_inode_common(inode, raw_inode, 0); | 615 | nilfs_write_inode_common(inode, raw_inode, 0); |
542 | /* XXX: call with has_bmap = 0 is a workaround to avoid | 616 | /* XXX: call with has_bmap = 0 is a workaround to avoid |
543 | deadlock of bmap. This delays update of i_bmap to just | 617 | deadlock of bmap. This delays update of i_bmap to just |
544 | before writing */ | 618 | before writing */ |
545 | nilfs_ifile_unmap_inode(sbi->s_ifile, ino, ibh); | 619 | nilfs_ifile_unmap_inode(ifile, ino, ibh); |
546 | } | 620 | } |
547 | 621 | ||
548 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ | 622 | #define NILFS_MAX_TRUNCATE_BLOCKS 16384 /* 64MB for 4KB block */ |
@@ -617,6 +691,7 @@ void nilfs_truncate(struct inode *inode) | |||
617 | static void nilfs_clear_inode(struct inode *inode) | 691 | static void nilfs_clear_inode(struct inode *inode) |
618 | { | 692 | { |
619 | struct nilfs_inode_info *ii = NILFS_I(inode); | 693 | struct nilfs_inode_info *ii = NILFS_I(inode); |
694 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | ||
620 | 695 | ||
621 | /* | 696 | /* |
622 | * Free resources allocated in nilfs_read_inode(), here. | 697 | * Free resources allocated in nilfs_read_inode(), here. |
@@ -625,10 +700,16 @@ static void nilfs_clear_inode(struct inode *inode) | |||
625 | brelse(ii->i_bh); | 700 | brelse(ii->i_bh); |
626 | ii->i_bh = NULL; | 701 | ii->i_bh = NULL; |
627 | 702 | ||
703 | if (mdi && mdi->mi_palloc_cache) | ||
704 | nilfs_palloc_destroy_cache(inode); | ||
705 | |||
628 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | 706 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) |
629 | nilfs_bmap_clear(ii->i_bmap); | 707 | nilfs_bmap_clear(ii->i_bmap); |
630 | 708 | ||
631 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | 709 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); |
710 | |||
711 | if (ii->i_root && inode->i_ino == NILFS_ROOT_INO) | ||
712 | nilfs_put_root(ii->i_root); | ||
632 | } | 713 | } |
633 | 714 | ||
634 | void nilfs_evict_inode(struct inode *inode) | 715 | void nilfs_evict_inode(struct inode *inode) |
@@ -637,7 +718,7 @@ void nilfs_evict_inode(struct inode *inode) | |||
637 | struct super_block *sb = inode->i_sb; | 718 | struct super_block *sb = inode->i_sb; |
638 | struct nilfs_inode_info *ii = NILFS_I(inode); | 719 | struct nilfs_inode_info *ii = NILFS_I(inode); |
639 | 720 | ||
640 | if (inode->i_nlink || unlikely(is_bad_inode(inode))) { | 721 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
641 | if (inode->i_data.nrpages) | 722 | if (inode->i_data.nrpages) |
642 | truncate_inode_pages(&inode->i_data, 0); | 723 | truncate_inode_pages(&inode->i_data, 0); |
643 | end_writeback(inode); | 724 | end_writeback(inode); |
@@ -649,12 +730,16 @@ void nilfs_evict_inode(struct inode *inode) | |||
649 | if (inode->i_data.nrpages) | 730 | if (inode->i_data.nrpages) |
650 | truncate_inode_pages(&inode->i_data, 0); | 731 | truncate_inode_pages(&inode->i_data, 0); |
651 | 732 | ||
733 | /* TODO: some of the following operations may fail. */ | ||
652 | nilfs_truncate_bmap(ii, 0); | 734 | nilfs_truncate_bmap(ii, 0); |
653 | nilfs_mark_inode_dirty(inode); | 735 | nilfs_mark_inode_dirty(inode); |
654 | end_writeback(inode); | 736 | end_writeback(inode); |
737 | |||
738 | nilfs_ifile_delete_inode(ii->i_root->ifile, inode->i_ino); | ||
739 | atomic_dec(&ii->i_root->inodes_count); | ||
740 | |||
655 | nilfs_clear_inode(inode); | 741 | nilfs_clear_inode(inode); |
656 | nilfs_free_inode(inode); | 742 | |
657 | /* nilfs_free_inode() marks inode buffer dirty */ | ||
658 | if (IS_SYNC(inode)) | 743 | if (IS_SYNC(inode)) |
659 | nilfs_set_transaction_flag(NILFS_TI_SYNC); | 744 | nilfs_set_transaction_flag(NILFS_TI_SYNC); |
660 | nilfs_transaction_commit(sb); | 745 | nilfs_transaction_commit(sb); |
@@ -700,6 +785,17 @@ out_err: | |||
700 | return err; | 785 | return err; |
701 | } | 786 | } |
702 | 787 | ||
788 | int nilfs_permission(struct inode *inode, int mask) | ||
789 | { | ||
790 | struct nilfs_root *root = NILFS_I(inode)->i_root; | ||
791 | |||
792 | if ((mask & MAY_WRITE) && root && | ||
793 | root->cno != NILFS_CPTREE_CURRENT_CNO) | ||
794 | return -EROFS; /* snapshot is not writable */ | ||
795 | |||
796 | return generic_permission(inode, mask, NULL); | ||
797 | } | ||
798 | |||
703 | int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, | 799 | int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, |
704 | struct buffer_head **pbh) | 800 | struct buffer_head **pbh) |
705 | { | 801 | { |
@@ -709,8 +805,8 @@ int nilfs_load_inode_block(struct nilfs_sb_info *sbi, struct inode *inode, | |||
709 | spin_lock(&sbi->s_inode_lock); | 805 | spin_lock(&sbi->s_inode_lock); |
710 | if (ii->i_bh == NULL) { | 806 | if (ii->i_bh == NULL) { |
711 | spin_unlock(&sbi->s_inode_lock); | 807 | spin_unlock(&sbi->s_inode_lock); |
712 | err = nilfs_ifile_get_inode_block(sbi->s_ifile, inode->i_ino, | 808 | err = nilfs_ifile_get_inode_block(ii->i_root->ifile, |
713 | pbh); | 809 | inode->i_ino, pbh); |
714 | if (unlikely(err)) | 810 | if (unlikely(err)) |
715 | return err; | 811 | return err; |
716 | spin_lock(&sbi->s_inode_lock); | 812 | spin_lock(&sbi->s_inode_lock); |
@@ -790,7 +886,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) | |||
790 | } | 886 | } |
791 | nilfs_update_inode(inode, ibh); | 887 | nilfs_update_inode(inode, ibh); |
792 | nilfs_mdt_mark_buffer_dirty(ibh); | 888 | nilfs_mdt_mark_buffer_dirty(ibh); |
793 | nilfs_mdt_mark_dirty(sbi->s_ifile); | 889 | nilfs_mdt_mark_dirty(NILFS_I(inode)->i_root->ifile); |
794 | brelse(ibh); | 890 | brelse(ibh); |
795 | return 0; | 891 | return 0; |
796 | } | 892 | } |
@@ -808,6 +904,7 @@ int nilfs_mark_inode_dirty(struct inode *inode) | |||
808 | void nilfs_dirty_inode(struct inode *inode) | 904 | void nilfs_dirty_inode(struct inode *inode) |
809 | { | 905 | { |
810 | struct nilfs_transaction_info ti; | 906 | struct nilfs_transaction_info ti; |
907 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | ||
811 | 908 | ||
812 | if (is_bad_inode(inode)) { | 909 | if (is_bad_inode(inode)) { |
813 | nilfs_warning(inode->i_sb, __func__, | 910 | nilfs_warning(inode->i_sb, __func__, |
@@ -815,6 +912,10 @@ void nilfs_dirty_inode(struct inode *inode) | |||
815 | dump_stack(); | 912 | dump_stack(); |
816 | return; | 913 | return; |
817 | } | 914 | } |
915 | if (mdi) { | ||
916 | nilfs_mdt_mark_dirty(inode); | ||
917 | return; | ||
918 | } | ||
818 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | 919 | nilfs_transaction_begin(inode->i_sb, &ti, 0); |
819 | nilfs_mark_inode_dirty(inode); | 920 | nilfs_mark_inode_dirty(inode); |
820 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | 921 | nilfs_transaction_commit(inode->i_sb); /* never fails */ |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f90a33d9a5b0..3e90f86d5bfe 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -22,7 +22,6 @@ | |||
22 | 22 | ||
23 | #include <linux/fs.h> | 23 | #include <linux/fs.h> |
24 | #include <linux/wait.h> | 24 | #include <linux/wait.h> |
25 | #include <linux/smp_lock.h> /* lock_kernel(), unlock_kernel() */ | ||
26 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
27 | #include <linux/capability.h> /* capable() */ | 26 | #include <linux/capability.h> /* capable() */ |
28 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ | 27 | #include <linux/uaccess.h> /* copy_from_user(), copy_to_user() */ |
@@ -118,7 +117,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, | |||
118 | if (copy_from_user(&cpmode, argp, sizeof(cpmode))) | 117 | if (copy_from_user(&cpmode, argp, sizeof(cpmode))) |
119 | goto out; | 118 | goto out; |
120 | 119 | ||
121 | mutex_lock(&nilfs->ns_mount_mutex); | 120 | down_read(&inode->i_sb->s_umount); |
122 | 121 | ||
123 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | 122 | nilfs_transaction_begin(inode->i_sb, &ti, 0); |
124 | ret = nilfs_cpfile_change_cpmode( | 123 | ret = nilfs_cpfile_change_cpmode( |
@@ -128,7 +127,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp, | |||
128 | else | 127 | else |
129 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | 128 | nilfs_transaction_commit(inode->i_sb); /* never fails */ |
130 | 129 | ||
131 | mutex_unlock(&nilfs->ns_mount_mutex); | 130 | up_read(&inode->i_sb->s_umount); |
132 | out: | 131 | out: |
133 | mnt_drop_write(filp->f_path.mnt); | 132 | mnt_drop_write(filp->f_path.mnt); |
134 | return ret; | 133 | return ret; |
@@ -334,7 +333,7 @@ static int nilfs_ioctl_move_inode_block(struct inode *inode, | |||
334 | return 0; | 333 | return 0; |
335 | } | 334 | } |
336 | 335 | ||
337 | static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | 336 | static int nilfs_ioctl_move_blocks(struct super_block *sb, |
338 | struct nilfs_argv *argv, void *buf) | 337 | struct nilfs_argv *argv, void *buf) |
339 | { | 338 | { |
340 | size_t nmembs = argv->v_nmembs; | 339 | size_t nmembs = argv->v_nmembs; |
@@ -349,7 +348,7 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | |||
349 | for (i = 0, vdesc = buf; i < nmembs; ) { | 348 | for (i = 0, vdesc = buf; i < nmembs; ) { |
350 | ino = vdesc->vd_ino; | 349 | ino = vdesc->vd_ino; |
351 | cno = vdesc->vd_cno; | 350 | cno = vdesc->vd_cno; |
352 | inode = nilfs_gc_iget(nilfs, ino, cno); | 351 | inode = nilfs_iget_for_gc(sb, ino, cno); |
353 | if (unlikely(inode == NULL)) { | 352 | if (unlikely(inode == NULL)) { |
354 | ret = -ENOMEM; | 353 | ret = -ENOMEM; |
355 | goto failed; | 354 | goto failed; |
@@ -357,11 +356,15 @@ static int nilfs_ioctl_move_blocks(struct the_nilfs *nilfs, | |||
357 | do { | 356 | do { |
358 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, | 357 | ret = nilfs_ioctl_move_inode_block(inode, vdesc, |
359 | &buffers); | 358 | &buffers); |
360 | if (unlikely(ret < 0)) | 359 | if (unlikely(ret < 0)) { |
360 | iput(inode); | ||
361 | goto failed; | 361 | goto failed; |
362 | } | ||
362 | vdesc++; | 363 | vdesc++; |
363 | } while (++i < nmembs && | 364 | } while (++i < nmembs && |
364 | vdesc->vd_ino == ino && vdesc->vd_cno == cno); | 365 | vdesc->vd_ino == ino && vdesc->vd_cno == cno); |
366 | |||
367 | iput(inode); /* The inode still remains in GC inode list */ | ||
365 | } | 368 | } |
366 | 369 | ||
367 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { | 370 | list_for_each_entry_safe(bh, n, &buffers, b_assoc_buffers) { |
@@ -567,7 +570,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | |||
567 | } | 570 | } |
568 | 571 | ||
569 | /* | 572 | /* |
570 | * nilfs_ioctl_move_blocks() will call nilfs_gc_iget(), | 573 | * nilfs_ioctl_move_blocks() will call nilfs_iget_for_gc(), |
571 | * which will operates an inode list without blocking. | 574 | * which will operates an inode list without blocking. |
572 | * To protect the list from concurrent operations, | 575 | * To protect the list from concurrent operations, |
573 | * nilfs_ioctl_move_blocks should be atomic operation. | 576 | * nilfs_ioctl_move_blocks should be atomic operation. |
@@ -577,15 +580,16 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp, | |||
577 | goto out_free; | 580 | goto out_free; |
578 | } | 581 | } |
579 | 582 | ||
580 | ret = nilfs_ioctl_move_blocks(nilfs, &argv[0], kbufs[0]); | 583 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
584 | |||
585 | ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]); | ||
581 | if (ret < 0) | 586 | if (ret < 0) |
582 | printk(KERN_ERR "NILFS: GC failed during preparation: " | 587 | printk(KERN_ERR "NILFS: GC failed during preparation: " |
583 | "cannot read source blocks: err=%d\n", ret); | 588 | "cannot read source blocks: err=%d\n", ret); |
584 | else | 589 | else |
585 | ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); | 590 | ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); |
586 | 591 | ||
587 | if (ret < 0) | 592 | nilfs_remove_all_gcinodes(nilfs); |
588 | nilfs_remove_all_gcinode(nilfs); | ||
589 | clear_nilfs_gc_running(nilfs); | 593 | clear_nilfs_gc_running(nilfs); |
590 | 594 | ||
591 | out_free: | 595 | out_free: |
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index d01aff4957d9..39a5b84e2c9f 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c | |||
@@ -36,7 +36,6 @@ | |||
36 | 36 | ||
37 | #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) | 37 | #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) |
38 | 38 | ||
39 | #define INIT_UNUSED_INODE_FIELDS | ||
40 | 39 | ||
41 | static int | 40 | static int |
42 | nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, | 41 | nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, |
@@ -78,25 +77,11 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | |||
78 | struct buffer_head *, | 77 | struct buffer_head *, |
79 | void *)) | 78 | void *)) |
80 | { | 79 | { |
81 | struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; | ||
82 | struct super_block *sb = inode->i_sb; | 80 | struct super_block *sb = inode->i_sb; |
83 | struct nilfs_transaction_info ti; | 81 | struct nilfs_transaction_info ti; |
84 | struct buffer_head *bh; | 82 | struct buffer_head *bh; |
85 | int err; | 83 | int err; |
86 | 84 | ||
87 | if (!sb) { | ||
88 | /* | ||
89 | * Make sure this function is not called from any | ||
90 | * read-only context. | ||
91 | */ | ||
92 | if (!nilfs->ns_writer) { | ||
93 | WARN_ON(1); | ||
94 | err = -EROFS; | ||
95 | goto out; | ||
96 | } | ||
97 | sb = nilfs->ns_writer->s_super; | ||
98 | } | ||
99 | |||
100 | nilfs_transaction_begin(sb, &ti, 0); | 85 | nilfs_transaction_begin(sb, &ti, 0); |
101 | 86 | ||
102 | err = -ENOMEM; | 87 | err = -ENOMEM; |
@@ -112,7 +97,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | |||
112 | if (buffer_uptodate(bh)) | 97 | if (buffer_uptodate(bh)) |
113 | goto failed_bh; | 98 | goto failed_bh; |
114 | 99 | ||
115 | bh->b_bdev = nilfs->ns_bdev; | 100 | bh->b_bdev = sb->s_bdev; |
116 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); | 101 | err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); |
117 | if (likely(!err)) { | 102 | if (likely(!err)) { |
118 | get_bh(bh); | 103 | get_bh(bh); |
@@ -129,7 +114,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, | |||
129 | err = nilfs_transaction_commit(sb); | 114 | err = nilfs_transaction_commit(sb); |
130 | else | 115 | else |
131 | nilfs_transaction_abort(sb); | 116 | nilfs_transaction_abort(sb); |
132 | out: | 117 | |
133 | return err; | 118 | return err; |
134 | } | 119 | } |
135 | 120 | ||
@@ -167,9 +152,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, | |||
167 | unlock_buffer(bh); | 152 | unlock_buffer(bh); |
168 | goto failed_bh; | 153 | goto failed_bh; |
169 | } | 154 | } |
170 | bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; | 155 | map_bh(bh, inode->i_sb, (sector_t)blknum); |
171 | bh->b_blocknr = (sector_t)blknum; | ||
172 | set_buffer_mapped(bh); | ||
173 | 156 | ||
174 | bh->b_end_io = end_buffer_read_sync; | 157 | bh->b_end_io = end_buffer_read_sync; |
175 | get_bh(bh); | 158 | get_bh(bh); |
@@ -398,35 +381,24 @@ int nilfs_mdt_fetch_dirty(struct inode *inode) | |||
398 | static int | 381 | static int |
399 | nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) | 382 | nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) |
400 | { | 383 | { |
401 | struct inode *inode = container_of(page->mapping, | 384 | struct inode *inode; |
402 | struct inode, i_data); | 385 | struct super_block *sb; |
403 | struct super_block *sb = inode->i_sb; | ||
404 | struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; | ||
405 | struct nilfs_sb_info *writer = NULL; | ||
406 | int err = 0; | 386 | int err = 0; |
407 | 387 | ||
408 | redirty_page_for_writepage(wbc, page); | 388 | redirty_page_for_writepage(wbc, page); |
409 | unlock_page(page); | 389 | unlock_page(page); |
410 | 390 | ||
411 | if (page->mapping->assoc_mapping) | 391 | inode = page->mapping->host; |
412 | return 0; /* Do not request flush for shadow page cache */ | 392 | if (!inode) |
413 | if (!sb) { | 393 | return 0; |
414 | down_read(&nilfs->ns_writer_sem); | 394 | |
415 | writer = nilfs->ns_writer; | 395 | sb = inode->i_sb; |
416 | if (!writer) { | ||
417 | up_read(&nilfs->ns_writer_sem); | ||
418 | return -EROFS; | ||
419 | } | ||
420 | sb = writer->s_super; | ||
421 | } | ||
422 | 396 | ||
423 | if (wbc->sync_mode == WB_SYNC_ALL) | 397 | if (wbc->sync_mode == WB_SYNC_ALL) |
424 | err = nilfs_construct_segment(sb); | 398 | err = nilfs_construct_segment(sb); |
425 | else if (wbc->for_reclaim) | 399 | else if (wbc->for_reclaim) |
426 | nilfs_flush_segment(sb, inode->i_ino); | 400 | nilfs_flush_segment(sb, inode->i_ino); |
427 | 401 | ||
428 | if (writer) | ||
429 | up_read(&nilfs->ns_writer_sem); | ||
430 | return err; | 402 | return err; |
431 | } | 403 | } |
432 | 404 | ||
@@ -439,105 +411,27 @@ static const struct address_space_operations def_mdt_aops = { | |||
439 | static const struct inode_operations def_mdt_iops; | 411 | static const struct inode_operations def_mdt_iops; |
440 | static const struct file_operations def_mdt_fops; | 412 | static const struct file_operations def_mdt_fops; |
441 | 413 | ||
442 | /* | 414 | |
443 | * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, | 415 | int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz) |
444 | * ifile, or gcinodes. This allows the B-tree code and segment constructor | ||
445 | * to treat them like regular files, and this helps to simplify the | ||
446 | * implementation. | ||
447 | * On the other hand, some of the pseudo inodes have an irregular point: | ||
448 | * They don't have valid inode->i_sb pointer because their lifetimes are | ||
449 | * longer than those of the super block structs; they may continue for | ||
450 | * several consecutive mounts/umounts. This would need discussions. | ||
451 | */ | ||
452 | /** | ||
453 | * nilfs_mdt_new_common - allocate a pseudo inode for metadata file | ||
454 | * @nilfs: nilfs object | ||
455 | * @sb: super block instance the metadata file belongs to | ||
456 | * @ino: inode number | ||
457 | * @gfp_mask: gfp mask for data pages | ||
458 | * @objsz: size of the private object attached to inode->i_private | ||
459 | */ | ||
460 | struct inode * | ||
461 | nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, | ||
462 | ino_t ino, gfp_t gfp_mask, size_t objsz) | ||
463 | { | 416 | { |
464 | struct inode *inode = nilfs_alloc_inode_common(nilfs); | 417 | struct nilfs_mdt_info *mi; |
465 | 418 | ||
466 | if (!inode) | 419 | mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); |
467 | return NULL; | 420 | if (!mi) |
468 | else { | 421 | return -ENOMEM; |
469 | struct address_space * const mapping = &inode->i_data; | ||
470 | struct nilfs_mdt_info *mi; | ||
471 | |||
472 | mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); | ||
473 | if (!mi) { | ||
474 | nilfs_destroy_inode(inode); | ||
475 | return NULL; | ||
476 | } | ||
477 | mi->mi_nilfs = nilfs; | ||
478 | init_rwsem(&mi->mi_sem); | ||
479 | |||
480 | inode->i_sb = sb; /* sb may be NULL for some meta data files */ | ||
481 | inode->i_blkbits = nilfs->ns_blocksize_bits; | ||
482 | inode->i_flags = 0; | ||
483 | atomic_set(&inode->i_count, 1); | ||
484 | inode->i_nlink = 1; | ||
485 | inode->i_ino = ino; | ||
486 | inode->i_mode = S_IFREG; | ||
487 | inode->i_private = mi; | ||
488 | |||
489 | #ifdef INIT_UNUSED_INODE_FIELDS | ||
490 | atomic_set(&inode->i_writecount, 0); | ||
491 | inode->i_size = 0; | ||
492 | inode->i_blocks = 0; | ||
493 | inode->i_bytes = 0; | ||
494 | inode->i_generation = 0; | ||
495 | #ifdef CONFIG_QUOTA | ||
496 | memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); | ||
497 | #endif | ||
498 | inode->i_pipe = NULL; | ||
499 | inode->i_bdev = NULL; | ||
500 | inode->i_cdev = NULL; | ||
501 | inode->i_rdev = 0; | ||
502 | #ifdef CONFIG_SECURITY | ||
503 | inode->i_security = NULL; | ||
504 | #endif | ||
505 | inode->dirtied_when = 0; | ||
506 | |||
507 | INIT_LIST_HEAD(&inode->i_list); | ||
508 | INIT_LIST_HEAD(&inode->i_sb_list); | ||
509 | inode->i_state = 0; | ||
510 | #endif | ||
511 | |||
512 | spin_lock_init(&inode->i_lock); | ||
513 | mutex_init(&inode->i_mutex); | ||
514 | init_rwsem(&inode->i_alloc_sem); | ||
515 | |||
516 | mapping->host = NULL; /* instead of inode */ | ||
517 | mapping->flags = 0; | ||
518 | mapping_set_gfp_mask(mapping, gfp_mask); | ||
519 | mapping->assoc_mapping = NULL; | ||
520 | mapping->backing_dev_info = nilfs->ns_bdi; | ||
521 | |||
522 | inode->i_mapping = mapping; | ||
523 | } | ||
524 | 422 | ||
525 | return inode; | 423 | init_rwsem(&mi->mi_sem); |
526 | } | 424 | inode->i_private = mi; |
527 | 425 | ||
528 | struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, | 426 | inode->i_mode = S_IFREG; |
529 | ino_t ino, size_t objsz) | 427 | mapping_set_gfp_mask(inode->i_mapping, gfp_mask); |
530 | { | 428 | inode->i_mapping->backing_dev_info = inode->i_sb->s_bdi; |
531 | struct inode *inode; | ||
532 | |||
533 | inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); | ||
534 | if (!inode) | ||
535 | return NULL; | ||
536 | 429 | ||
537 | inode->i_op = &def_mdt_iops; | 430 | inode->i_op = &def_mdt_iops; |
538 | inode->i_fop = &def_mdt_fops; | 431 | inode->i_fop = &def_mdt_fops; |
539 | inode->i_mapping->a_ops = &def_mdt_aops; | 432 | inode->i_mapping->a_ops = &def_mdt_aops; |
540 | return inode; | 433 | |
434 | return 0; | ||
541 | } | 435 | } |
542 | 436 | ||
543 | void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, | 437 | void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, |
@@ -550,34 +444,159 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, | |||
550 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); | 444 | mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); |
551 | } | 445 | } |
552 | 446 | ||
553 | void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) | 447 | static const struct address_space_operations shadow_map_aops = { |
448 | .sync_page = block_sync_page, | ||
449 | }; | ||
450 | |||
451 | /** | ||
452 | * nilfs_mdt_setup_shadow_map - setup shadow map and bind it to metadata file | ||
453 | * @inode: inode of the metadata file | ||
454 | * @shadow: shadow mapping | ||
455 | */ | ||
456 | int nilfs_mdt_setup_shadow_map(struct inode *inode, | ||
457 | struct nilfs_shadow_map *shadow) | ||
554 | { | 458 | { |
555 | shadow->i_mapping->assoc_mapping = orig->i_mapping; | 459 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); |
556 | NILFS_I(shadow)->i_btnode_cache.assoc_mapping = | 460 | struct backing_dev_info *bdi = inode->i_sb->s_bdi; |
557 | &NILFS_I(orig)->i_btnode_cache; | 461 | |
462 | INIT_LIST_HEAD(&shadow->frozen_buffers); | ||
463 | nilfs_mapping_init_once(&shadow->frozen_data); | ||
464 | nilfs_mapping_init(&shadow->frozen_data, bdi, &shadow_map_aops); | ||
465 | nilfs_mapping_init_once(&shadow->frozen_btnodes); | ||
466 | nilfs_mapping_init(&shadow->frozen_btnodes, bdi, &shadow_map_aops); | ||
467 | mi->mi_shadow = shadow; | ||
468 | return 0; | ||
558 | } | 469 | } |
559 | 470 | ||
560 | static void nilfs_mdt_clear(struct inode *inode) | 471 | /** |
472 | * nilfs_mdt_save_to_shadow_map - copy bmap and dirty pages to shadow map | ||
473 | * @inode: inode of the metadata file | ||
474 | */ | ||
475 | int nilfs_mdt_save_to_shadow_map(struct inode *inode) | ||
561 | { | 476 | { |
477 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
562 | struct nilfs_inode_info *ii = NILFS_I(inode); | 478 | struct nilfs_inode_info *ii = NILFS_I(inode); |
479 | struct nilfs_shadow_map *shadow = mi->mi_shadow; | ||
480 | int ret; | ||
563 | 481 | ||
564 | invalidate_mapping_pages(inode->i_mapping, 0, -1); | 482 | ret = nilfs_copy_dirty_pages(&shadow->frozen_data, inode->i_mapping); |
565 | truncate_inode_pages(inode->i_mapping, 0); | 483 | if (ret) |
484 | goto out; | ||
485 | |||
486 | ret = nilfs_copy_dirty_pages(&shadow->frozen_btnodes, | ||
487 | &ii->i_btnode_cache); | ||
488 | if (ret) | ||
489 | goto out; | ||
566 | 490 | ||
567 | if (test_bit(NILFS_I_BMAP, &ii->i_state)) | 491 | nilfs_bmap_save(ii->i_bmap, &shadow->bmap_store); |
568 | nilfs_bmap_clear(ii->i_bmap); | 492 | out: |
569 | nilfs_btnode_cache_clear(&ii->i_btnode_cache); | 493 | return ret; |
570 | } | 494 | } |
571 | 495 | ||
572 | void nilfs_mdt_destroy(struct inode *inode) | 496 | int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh) |
573 | { | 497 | { |
574 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | 498 | struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; |
499 | struct buffer_head *bh_frozen; | ||
500 | struct page *page; | ||
501 | int blkbits = inode->i_blkbits; | ||
502 | int ret = -ENOMEM; | ||
503 | |||
504 | page = grab_cache_page(&shadow->frozen_data, bh->b_page->index); | ||
505 | if (!page) | ||
506 | return ret; | ||
507 | |||
508 | if (!page_has_buffers(page)) | ||
509 | create_empty_buffers(page, 1 << blkbits, 0); | ||
510 | |||
511 | bh_frozen = nilfs_page_get_nth_block(page, bh_offset(bh) >> blkbits); | ||
512 | if (bh_frozen) { | ||
513 | if (!buffer_uptodate(bh_frozen)) | ||
514 | nilfs_copy_buffer(bh_frozen, bh); | ||
515 | if (list_empty(&bh_frozen->b_assoc_buffers)) { | ||
516 | list_add_tail(&bh_frozen->b_assoc_buffers, | ||
517 | &shadow->frozen_buffers); | ||
518 | set_buffer_nilfs_redirected(bh); | ||
519 | } else { | ||
520 | brelse(bh_frozen); /* already frozen */ | ||
521 | } | ||
522 | ret = 0; | ||
523 | } | ||
524 | unlock_page(page); | ||
525 | page_cache_release(page); | ||
526 | return ret; | ||
527 | } | ||
528 | |||
529 | struct buffer_head * | ||
530 | nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh) | ||
531 | { | ||
532 | struct nilfs_shadow_map *shadow = NILFS_MDT(inode)->mi_shadow; | ||
533 | struct buffer_head *bh_frozen = NULL; | ||
534 | struct page *page; | ||
535 | int n; | ||
536 | |||
537 | page = find_lock_page(&shadow->frozen_data, bh->b_page->index); | ||
538 | if (page) { | ||
539 | if (page_has_buffers(page)) { | ||
540 | n = bh_offset(bh) >> inode->i_blkbits; | ||
541 | bh_frozen = nilfs_page_get_nth_block(page, n); | ||
542 | } | ||
543 | unlock_page(page); | ||
544 | page_cache_release(page); | ||
545 | } | ||
546 | return bh_frozen; | ||
547 | } | ||
548 | |||
549 | static void nilfs_release_frozen_buffers(struct nilfs_shadow_map *shadow) | ||
550 | { | ||
551 | struct list_head *head = &shadow->frozen_buffers; | ||
552 | struct buffer_head *bh; | ||
553 | |||
554 | while (!list_empty(head)) { | ||
555 | bh = list_first_entry(head, struct buffer_head, | ||
556 | b_assoc_buffers); | ||
557 | list_del_init(&bh->b_assoc_buffers); | ||
558 | brelse(bh); /* drop ref-count to make it releasable */ | ||
559 | } | ||
560 | } | ||
561 | |||
562 | /** | ||
563 | * nilfs_mdt_restore_from_shadow_map - restore dirty pages and bmap state | ||
564 | * @inode: inode of the metadata file | ||
565 | */ | ||
566 | void nilfs_mdt_restore_from_shadow_map(struct inode *inode) | ||
567 | { | ||
568 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
569 | struct nilfs_inode_info *ii = NILFS_I(inode); | ||
570 | struct nilfs_shadow_map *shadow = mi->mi_shadow; | ||
571 | |||
572 | down_write(&mi->mi_sem); | ||
575 | 573 | ||
576 | if (mdi->mi_palloc_cache) | 574 | if (mi->mi_palloc_cache) |
577 | nilfs_palloc_destroy_cache(inode); | 575 | nilfs_palloc_clear_cache(inode); |
578 | nilfs_mdt_clear(inode); | 576 | |
577 | nilfs_clear_dirty_pages(inode->i_mapping); | ||
578 | nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data); | ||
579 | |||
580 | nilfs_clear_dirty_pages(&ii->i_btnode_cache); | ||
581 | nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes); | ||
582 | |||
583 | nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store); | ||
584 | |||
585 | up_write(&mi->mi_sem); | ||
586 | } | ||
587 | |||
588 | /** | ||
589 | * nilfs_mdt_clear_shadow_map - truncate pages in shadow map caches | ||
590 | * @inode: inode of the metadata file | ||
591 | */ | ||
592 | void nilfs_mdt_clear_shadow_map(struct inode *inode) | ||
593 | { | ||
594 | struct nilfs_mdt_info *mi = NILFS_MDT(inode); | ||
595 | struct nilfs_shadow_map *shadow = mi->mi_shadow; | ||
579 | 596 | ||
580 | kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ | 597 | down_write(&mi->mi_sem); |
581 | kfree(mdi); | 598 | nilfs_release_frozen_buffers(shadow); |
582 | nilfs_destroy_inode(inode); | 599 | truncate_inode_pages(&shadow->frozen_data, 0); |
600 | truncate_inode_pages(&shadow->frozen_btnodes, 0); | ||
601 | up_write(&mi->mi_sem); | ||
583 | } | 602 | } |
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h index 6c4bbb0470fc..b13734bf3521 100644 --- a/fs/nilfs2/mdt.h +++ b/fs/nilfs2/mdt.h | |||
@@ -28,26 +28,33 @@ | |||
28 | #include "nilfs.h" | 28 | #include "nilfs.h" |
29 | #include "page.h" | 29 | #include "page.h" |
30 | 30 | ||
31 | struct nilfs_shadow_map { | ||
32 | struct nilfs_bmap_store bmap_store; | ||
33 | struct address_space frozen_data; | ||
34 | struct address_space frozen_btnodes; | ||
35 | struct list_head frozen_buffers; | ||
36 | }; | ||
37 | |||
31 | /** | 38 | /** |
32 | * struct nilfs_mdt_info - on-memory private data of meta data files | 39 | * struct nilfs_mdt_info - on-memory private data of meta data files |
33 | * @mi_nilfs: back pointer to the_nilfs struct | ||
34 | * @mi_sem: reader/writer semaphore for meta data operations | 40 | * @mi_sem: reader/writer semaphore for meta data operations |
35 | * @mi_bgl: per-blockgroup locking | 41 | * @mi_bgl: per-blockgroup locking |
36 | * @mi_entry_size: size of an entry | 42 | * @mi_entry_size: size of an entry |
37 | * @mi_first_entry_offset: offset to the first entry | 43 | * @mi_first_entry_offset: offset to the first entry |
38 | * @mi_entries_per_block: number of entries in a block | 44 | * @mi_entries_per_block: number of entries in a block |
39 | * @mi_palloc_cache: persistent object allocator cache | 45 | * @mi_palloc_cache: persistent object allocator cache |
46 | * @mi_shadow: shadow of bmap and page caches | ||
40 | * @mi_blocks_per_group: number of blocks in a group | 47 | * @mi_blocks_per_group: number of blocks in a group |
41 | * @mi_blocks_per_desc_block: number of blocks per descriptor block | 48 | * @mi_blocks_per_desc_block: number of blocks per descriptor block |
42 | */ | 49 | */ |
43 | struct nilfs_mdt_info { | 50 | struct nilfs_mdt_info { |
44 | struct the_nilfs *mi_nilfs; | ||
45 | struct rw_semaphore mi_sem; | 51 | struct rw_semaphore mi_sem; |
46 | struct blockgroup_lock *mi_bgl; | 52 | struct blockgroup_lock *mi_bgl; |
47 | unsigned mi_entry_size; | 53 | unsigned mi_entry_size; |
48 | unsigned mi_first_entry_offset; | 54 | unsigned mi_first_entry_offset; |
49 | unsigned long mi_entries_per_block; | 55 | unsigned long mi_entries_per_block; |
50 | struct nilfs_palloc_cache *mi_palloc_cache; | 56 | struct nilfs_palloc_cache *mi_palloc_cache; |
57 | struct nilfs_shadow_map *mi_shadow; | ||
51 | unsigned long mi_blocks_per_group; | 58 | unsigned long mi_blocks_per_group; |
52 | unsigned long mi_blocks_per_desc_block; | 59 | unsigned long mi_blocks_per_desc_block; |
53 | }; | 60 | }; |
@@ -59,9 +66,7 @@ static inline struct nilfs_mdt_info *NILFS_MDT(const struct inode *inode) | |||
59 | 66 | ||
60 | static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) | 67 | static inline struct the_nilfs *NILFS_I_NILFS(struct inode *inode) |
61 | { | 68 | { |
62 | struct super_block *sb = inode->i_sb; | 69 | return NILFS_SB(inode->i_sb)->s_nilfs; |
63 | |||
64 | return sb ? NILFS_SB(sb)->s_nilfs : NILFS_MDT(inode)->mi_nilfs; | ||
65 | } | 70 | } |
66 | 71 | ||
67 | /* Default GFP flags using highmem */ | 72 | /* Default GFP flags using highmem */ |
@@ -76,14 +81,17 @@ int nilfs_mdt_forget_block(struct inode *, unsigned long); | |||
76 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); | 81 | int nilfs_mdt_mark_block_dirty(struct inode *, unsigned long); |
77 | int nilfs_mdt_fetch_dirty(struct inode *); | 82 | int nilfs_mdt_fetch_dirty(struct inode *); |
78 | 83 | ||
79 | struct inode *nilfs_mdt_new(struct the_nilfs *, struct super_block *, ino_t, | 84 | int nilfs_mdt_init(struct inode *inode, gfp_t gfp_mask, size_t objsz); |
80 | size_t); | ||
81 | struct inode *nilfs_mdt_new_common(struct the_nilfs *, struct super_block *, | ||
82 | ino_t, gfp_t, size_t); | ||
83 | void nilfs_mdt_destroy(struct inode *); | ||
84 | void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); | 85 | void nilfs_mdt_set_entry_size(struct inode *, unsigned, unsigned); |
85 | void nilfs_mdt_set_shadow(struct inode *, struct inode *); | ||
86 | 86 | ||
87 | int nilfs_mdt_setup_shadow_map(struct inode *inode, | ||
88 | struct nilfs_shadow_map *shadow); | ||
89 | int nilfs_mdt_save_to_shadow_map(struct inode *inode); | ||
90 | void nilfs_mdt_restore_from_shadow_map(struct inode *inode); | ||
91 | void nilfs_mdt_clear_shadow_map(struct inode *inode); | ||
92 | int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh); | ||
93 | struct buffer_head *nilfs_mdt_get_frozen_buffer(struct inode *inode, | ||
94 | struct buffer_head *bh); | ||
87 | 95 | ||
88 | #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) | 96 | #define nilfs_mdt_mark_buffer_dirty(bh) nilfs_mark_buffer_dirty(bh) |
89 | 97 | ||
@@ -100,7 +108,7 @@ static inline void nilfs_mdt_clear_dirty(struct inode *inode) | |||
100 | 108 | ||
101 | static inline __u64 nilfs_mdt_cno(struct inode *inode) | 109 | static inline __u64 nilfs_mdt_cno(struct inode *inode) |
102 | { | 110 | { |
103 | return NILFS_MDT(inode)->mi_nilfs->ns_cno; | 111 | return NILFS_I_NILFS(inode)->ns_cno; |
104 | } | 112 | } |
105 | 113 | ||
106 | #define nilfs_mdt_bgl_lock(inode, bg) \ | 114 | #define nilfs_mdt_bgl_lock(inode, bg) \ |
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index ad6ed2cf19b4..6e9557ecf161 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c | |||
@@ -40,7 +40,11 @@ | |||
40 | 40 | ||
41 | #include <linux/pagemap.h> | 41 | #include <linux/pagemap.h> |
42 | #include "nilfs.h" | 42 | #include "nilfs.h" |
43 | #include "export.h" | ||
43 | 44 | ||
45 | #define NILFS_FID_SIZE_NON_CONNECTABLE \ | ||
46 | (offsetof(struct nilfs_fid, parent_gen) / 4) | ||
47 | #define NILFS_FID_SIZE_CONNECTABLE (sizeof(struct nilfs_fid) / 4) | ||
44 | 48 | ||
45 | static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) | 49 | static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode) |
46 | { | 50 | { |
@@ -70,29 +74,13 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) | |||
70 | ino = nilfs_inode_by_name(dir, &dentry->d_name); | 74 | ino = nilfs_inode_by_name(dir, &dentry->d_name); |
71 | inode = NULL; | 75 | inode = NULL; |
72 | if (ino) { | 76 | if (ino) { |
73 | inode = nilfs_iget(dir->i_sb, ino); | 77 | inode = nilfs_iget(dir->i_sb, NILFS_I(dir)->i_root, ino); |
74 | if (IS_ERR(inode)) | 78 | if (IS_ERR(inode)) |
75 | return ERR_CAST(inode); | 79 | return ERR_CAST(inode); |
76 | } | 80 | } |
77 | return d_splice_alias(inode, dentry); | 81 | return d_splice_alias(inode, dentry); |
78 | } | 82 | } |
79 | 83 | ||
80 | struct dentry *nilfs_get_parent(struct dentry *child) | ||
81 | { | ||
82 | unsigned long ino; | ||
83 | struct inode *inode; | ||
84 | struct qstr dotdot = {.name = "..", .len = 2}; | ||
85 | |||
86 | ino = nilfs_inode_by_name(child->d_inode, &dotdot); | ||
87 | if (!ino) | ||
88 | return ERR_PTR(-ENOENT); | ||
89 | |||
90 | inode = nilfs_iget(child->d_inode->i_sb, ino); | ||
91 | if (IS_ERR(inode)) | ||
92 | return ERR_CAST(inode); | ||
93 | return d_obtain_alias(inode); | ||
94 | } | ||
95 | |||
96 | /* | 84 | /* |
97 | * By the time this is called, we already have created | 85 | * By the time this is called, we already have created |
98 | * the directory cache entry for the new file, but it | 86 | * the directory cache entry for the new file, but it |
@@ -219,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, | |||
219 | 207 | ||
220 | inode->i_ctime = CURRENT_TIME; | 208 | inode->i_ctime = CURRENT_TIME; |
221 | inode_inc_link_count(inode); | 209 | inode_inc_link_count(inode); |
222 | atomic_inc(&inode->i_count); | 210 | ihold(inode); |
223 | 211 | ||
224 | err = nilfs_add_nondir(dentry, inode); | 212 | err = nilfs_add_nondir(dentry, inode); |
225 | if (!err) | 213 | if (!err) |
@@ -468,6 +456,115 @@ out: | |||
468 | return err; | 456 | return err; |
469 | } | 457 | } |
470 | 458 | ||
459 | /* | ||
460 | * Export operations | ||
461 | */ | ||
462 | static struct dentry *nilfs_get_parent(struct dentry *child) | ||
463 | { | ||
464 | unsigned long ino; | ||
465 | struct inode *inode; | ||
466 | struct qstr dotdot = {.name = "..", .len = 2}; | ||
467 | struct nilfs_root *root; | ||
468 | |||
469 | ino = nilfs_inode_by_name(child->d_inode, &dotdot); | ||
470 | if (!ino) | ||
471 | return ERR_PTR(-ENOENT); | ||
472 | |||
473 | root = NILFS_I(child->d_inode)->i_root; | ||
474 | |||
475 | inode = nilfs_iget(child->d_inode->i_sb, root, ino); | ||
476 | if (IS_ERR(inode)) | ||
477 | return ERR_CAST(inode); | ||
478 | |||
479 | return d_obtain_alias(inode); | ||
480 | } | ||
481 | |||
482 | static struct dentry *nilfs_get_dentry(struct super_block *sb, u64 cno, | ||
483 | u64 ino, u32 gen) | ||
484 | { | ||
485 | struct nilfs_root *root; | ||
486 | struct inode *inode; | ||
487 | |||
488 | if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO) | ||
489 | return ERR_PTR(-ESTALE); | ||
490 | |||
491 | root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); | ||
492 | if (!root) | ||
493 | return ERR_PTR(-ESTALE); | ||
494 | |||
495 | inode = nilfs_iget(sb, root, ino); | ||
496 | nilfs_put_root(root); | ||
497 | |||
498 | if (IS_ERR(inode)) | ||
499 | return ERR_CAST(inode); | ||
500 | if (gen && inode->i_generation != gen) { | ||
501 | iput(inode); | ||
502 | return ERR_PTR(-ESTALE); | ||
503 | } | ||
504 | return d_obtain_alias(inode); | ||
505 | } | ||
506 | |||
507 | static struct dentry *nilfs_fh_to_dentry(struct super_block *sb, struct fid *fh, | ||
508 | int fh_len, int fh_type) | ||
509 | { | ||
510 | struct nilfs_fid *fid = (struct nilfs_fid *)fh; | ||
511 | |||
512 | if ((fh_len != NILFS_FID_SIZE_NON_CONNECTABLE && | ||
513 | fh_len != NILFS_FID_SIZE_CONNECTABLE) || | ||
514 | (fh_type != FILEID_NILFS_WITH_PARENT && | ||
515 | fh_type != FILEID_NILFS_WITHOUT_PARENT)) | ||
516 | return NULL; | ||
517 | |||
518 | return nilfs_get_dentry(sb, fid->cno, fid->ino, fid->gen); | ||
519 | } | ||
520 | |||
521 | static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh, | ||
522 | int fh_len, int fh_type) | ||
523 | { | ||
524 | struct nilfs_fid *fid = (struct nilfs_fid *)fh; | ||
525 | |||
526 | if (fh_len != NILFS_FID_SIZE_CONNECTABLE || | ||
527 | fh_type != FILEID_NILFS_WITH_PARENT) | ||
528 | return NULL; | ||
529 | |||
530 | return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen); | ||
531 | } | ||
532 | |||
533 | static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp, | ||
534 | int connectable) | ||
535 | { | ||
536 | struct nilfs_fid *fid = (struct nilfs_fid *)fh; | ||
537 | struct inode *inode = dentry->d_inode; | ||
538 | struct nilfs_root *root = NILFS_I(inode)->i_root; | ||
539 | int type; | ||
540 | |||
541 | if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE || | ||
542 | (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE)) | ||
543 | return 255; | ||
544 | |||
545 | fid->cno = root->cno; | ||
546 | fid->ino = inode->i_ino; | ||
547 | fid->gen = inode->i_generation; | ||
548 | |||
549 | if (connectable && !S_ISDIR(inode->i_mode)) { | ||
550 | struct inode *parent; | ||
551 | |||
552 | spin_lock(&dentry->d_lock); | ||
553 | parent = dentry->d_parent->d_inode; | ||
554 | fid->parent_ino = parent->i_ino; | ||
555 | fid->parent_gen = parent->i_generation; | ||
556 | spin_unlock(&dentry->d_lock); | ||
557 | |||
558 | type = FILEID_NILFS_WITH_PARENT; | ||
559 | *lenp = NILFS_FID_SIZE_CONNECTABLE; | ||
560 | } else { | ||
561 | type = FILEID_NILFS_WITHOUT_PARENT; | ||
562 | *lenp = NILFS_FID_SIZE_NON_CONNECTABLE; | ||
563 | } | ||
564 | |||
565 | return type; | ||
566 | } | ||
567 | |||
471 | const struct inode_operations nilfs_dir_inode_operations = { | 568 | const struct inode_operations nilfs_dir_inode_operations = { |
472 | .create = nilfs_create, | 569 | .create = nilfs_create, |
473 | .lookup = nilfs_lookup, | 570 | .lookup = nilfs_lookup, |
@@ -491,4 +588,12 @@ const struct inode_operations nilfs_symlink_inode_operations = { | |||
491 | .readlink = generic_readlink, | 588 | .readlink = generic_readlink, |
492 | .follow_link = page_follow_link_light, | 589 | .follow_link = page_follow_link_light, |
493 | .put_link = page_put_link, | 590 | .put_link = page_put_link, |
591 | .permission = nilfs_permission, | ||
592 | }; | ||
593 | |||
594 | const struct export_operations nilfs_export_ops = { | ||
595 | .encode_fh = nilfs_encode_fh, | ||
596 | .fh_to_dentry = nilfs_fh_to_dentry, | ||
597 | .fh_to_parent = nilfs_fh_to_parent, | ||
598 | .get_parent = nilfs_get_parent, | ||
494 | }; | 599 | }; |
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h index d3d54046e5f8..f7560da5a567 100644 --- a/fs/nilfs2/nilfs.h +++ b/fs/nilfs2/nilfs.h | |||
@@ -59,6 +59,7 @@ struct nilfs_inode_info { | |||
59 | #endif | 59 | #endif |
60 | struct buffer_head *i_bh; /* i_bh contains a new or dirty | 60 | struct buffer_head *i_bh; /* i_bh contains a new or dirty |
61 | disk inode */ | 61 | disk inode */ |
62 | struct nilfs_root *i_root; | ||
62 | struct inode vfs_inode; | 63 | struct inode vfs_inode; |
63 | }; | 64 | }; |
64 | 65 | ||
@@ -100,7 +101,6 @@ enum { | |||
100 | NILFS_I_INODE_DIRTY, /* write_inode is requested */ | 101 | NILFS_I_INODE_DIRTY, /* write_inode is requested */ |
101 | NILFS_I_BMAP, /* has bmap and btnode_cache */ | 102 | NILFS_I_BMAP, /* has bmap and btnode_cache */ |
102 | NILFS_I_GCINODE, /* inode for GC, on memory only */ | 103 | NILFS_I_GCINODE, /* inode for GC, on memory only */ |
103 | NILFS_I_GCDAT, /* shadow DAT, on memory only */ | ||
104 | }; | 104 | }; |
105 | 105 | ||
106 | /* | 106 | /* |
@@ -192,7 +192,7 @@ static inline int nilfs_doing_construction(void) | |||
192 | 192 | ||
193 | static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) | 193 | static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) |
194 | { | 194 | { |
195 | return nilfs_doing_gc() ? nilfs->ns_gc_dat : nilfs->ns_dat; | 195 | return nilfs->ns_dat; |
196 | } | 196 | } |
197 | 197 | ||
198 | /* | 198 | /* |
@@ -200,12 +200,9 @@ static inline struct inode *nilfs_dat_inode(const struct the_nilfs *nilfs) | |||
200 | */ | 200 | */ |
201 | #ifdef CONFIG_NILFS_POSIX_ACL | 201 | #ifdef CONFIG_NILFS_POSIX_ACL |
202 | #error "NILFS: not yet supported POSIX ACL" | 202 | #error "NILFS: not yet supported POSIX ACL" |
203 | extern int nilfs_permission(struct inode *, int, struct nameidata *); | ||
204 | extern int nilfs_acl_chmod(struct inode *); | 203 | extern int nilfs_acl_chmod(struct inode *); |
205 | extern int nilfs_init_acl(struct inode *, struct inode *); | 204 | extern int nilfs_init_acl(struct inode *, struct inode *); |
206 | #else | 205 | #else |
207 | #define nilfs_permission NULL | ||
208 | |||
209 | static inline int nilfs_acl_chmod(struct inode *inode) | 206 | static inline int nilfs_acl_chmod(struct inode *inode) |
210 | { | 207 | { |
211 | return 0; | 208 | return 0; |
@@ -247,11 +244,19 @@ extern int nilfs_get_block(struct inode *, sector_t, struct buffer_head *, int); | |||
247 | extern void nilfs_set_inode_flags(struct inode *); | 244 | extern void nilfs_set_inode_flags(struct inode *); |
248 | extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); | 245 | extern int nilfs_read_inode_common(struct inode *, struct nilfs_inode *); |
249 | extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); | 246 | extern void nilfs_write_inode_common(struct inode *, struct nilfs_inode *, int); |
250 | extern struct inode *nilfs_iget(struct super_block *, unsigned long); | 247 | struct inode *nilfs_ilookup(struct super_block *sb, struct nilfs_root *root, |
248 | unsigned long ino); | ||
249 | struct inode *nilfs_iget_locked(struct super_block *sb, struct nilfs_root *root, | ||
250 | unsigned long ino); | ||
251 | struct inode *nilfs_iget(struct super_block *sb, struct nilfs_root *root, | ||
252 | unsigned long ino); | ||
253 | extern struct inode *nilfs_iget_for_gc(struct super_block *sb, | ||
254 | unsigned long ino, __u64 cno); | ||
251 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); | 255 | extern void nilfs_update_inode(struct inode *, struct buffer_head *); |
252 | extern void nilfs_truncate(struct inode *); | 256 | extern void nilfs_truncate(struct inode *); |
253 | extern void nilfs_evict_inode(struct inode *); | 257 | extern void nilfs_evict_inode(struct inode *); |
254 | extern int nilfs_setattr(struct dentry *, struct iattr *); | 258 | extern int nilfs_setattr(struct dentry *, struct iattr *); |
259 | int nilfs_permission(struct inode *inode, int mask); | ||
255 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, | 260 | extern int nilfs_load_inode_block(struct nilfs_sb_info *, struct inode *, |
256 | struct buffer_head **); | 261 | struct buffer_head **); |
257 | extern int nilfs_inode_dirty(struct inode *); | 262 | extern int nilfs_inode_dirty(struct inode *); |
@@ -260,11 +265,7 @@ extern int nilfs_set_file_dirty(struct nilfs_sb_info *, struct inode *, | |||
260 | extern int nilfs_mark_inode_dirty(struct inode *); | 265 | extern int nilfs_mark_inode_dirty(struct inode *); |
261 | extern void nilfs_dirty_inode(struct inode *); | 266 | extern void nilfs_dirty_inode(struct inode *); |
262 | 267 | ||
263 | /* namei.c */ | ||
264 | extern struct dentry *nilfs_get_parent(struct dentry *); | ||
265 | |||
266 | /* super.c */ | 268 | /* super.c */ |
267 | extern struct inode *nilfs_alloc_inode_common(struct the_nilfs *); | ||
268 | extern struct inode *nilfs_alloc_inode(struct super_block *); | 269 | extern struct inode *nilfs_alloc_inode(struct super_block *); |
269 | extern void nilfs_destroy_inode(struct inode *); | 270 | extern void nilfs_destroy_inode(struct inode *); |
270 | extern void nilfs_error(struct super_block *, const char *, const char *, ...) | 271 | extern void nilfs_error(struct super_block *, const char *, const char *, ...) |
@@ -283,8 +284,9 @@ extern struct nilfs_super_block **nilfs_prepare_super(struct nilfs_sb_info *, | |||
283 | int flip); | 284 | int flip); |
284 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); | 285 | extern int nilfs_commit_super(struct nilfs_sb_info *, int); |
285 | extern int nilfs_cleanup_super(struct nilfs_sb_info *); | 286 | extern int nilfs_cleanup_super(struct nilfs_sb_info *); |
286 | extern int nilfs_attach_checkpoint(struct nilfs_sb_info *, __u64); | 287 | int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, |
287 | extern void nilfs_detach_checkpoint(struct nilfs_sb_info *); | 288 | struct nilfs_root **root); |
289 | int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno); | ||
288 | 290 | ||
289 | /* gcinode.c */ | 291 | /* gcinode.c */ |
290 | int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, | 292 | int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, |
@@ -292,16 +294,8 @@ int nilfs_gccache_submit_read_data(struct inode *, sector_t, sector_t, __u64, | |||
292 | int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, | 294 | int nilfs_gccache_submit_read_node(struct inode *, sector_t, __u64, |
293 | struct buffer_head **); | 295 | struct buffer_head **); |
294 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); | 296 | int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *); |
295 | int nilfs_init_gccache(struct the_nilfs *); | 297 | int nilfs_init_gcinode(struct inode *inode); |
296 | void nilfs_destroy_gccache(struct the_nilfs *); | 298 | void nilfs_remove_all_gcinodes(struct the_nilfs *nilfs); |
297 | void nilfs_clear_gcinode(struct inode *); | ||
298 | struct inode *nilfs_gc_iget(struct the_nilfs *, ino_t, __u64); | ||
299 | void nilfs_remove_all_gcinode(struct the_nilfs *); | ||
300 | |||
301 | /* gcdat.c */ | ||
302 | int nilfs_init_gcdat_inode(struct the_nilfs *); | ||
303 | void nilfs_commit_gcdat_inode(struct the_nilfs *); | ||
304 | void nilfs_clear_gcdat_inode(struct the_nilfs *); | ||
305 | 299 | ||
306 | /* | 300 | /* |
307 | * Inodes and files operations | 301 | * Inodes and files operations |
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index aab11db2cb08..a6c3c2e817f8 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c | |||
@@ -79,8 +79,8 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, | |||
79 | { | 79 | { |
80 | int blkbits = inode->i_blkbits; | 80 | int blkbits = inode->i_blkbits; |
81 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | 81 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); |
82 | struct page *page, *opage; | 82 | struct page *page; |
83 | struct buffer_head *bh, *obh; | 83 | struct buffer_head *bh; |
84 | 84 | ||
85 | page = grab_cache_page(mapping, index); | 85 | page = grab_cache_page(mapping, index); |
86 | if (unlikely(!page)) | 86 | if (unlikely(!page)) |
@@ -92,30 +92,6 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode, | |||
92 | page_cache_release(page); | 92 | page_cache_release(page); |
93 | return NULL; | 93 | return NULL; |
94 | } | 94 | } |
95 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | ||
96 | /* | ||
97 | * Shadow page cache uses assoc_mapping to point its original | ||
98 | * page cache. The following code tries the original cache | ||
99 | * if the given cache is a shadow and it didn't hit. | ||
100 | */ | ||
101 | opage = find_lock_page(mapping->assoc_mapping, index); | ||
102 | if (!opage) | ||
103 | return bh; | ||
104 | |||
105 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | ||
106 | b_state); | ||
107 | if (buffer_uptodate(obh)) { | ||
108 | nilfs_copy_buffer(bh, obh); | ||
109 | if (buffer_dirty(obh)) { | ||
110 | nilfs_mark_buffer_dirty(bh); | ||
111 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | ||
112 | nilfs_mdt_mark_dirty(inode); | ||
113 | } | ||
114 | } | ||
115 | brelse(obh); | ||
116 | unlock_page(opage); | ||
117 | page_cache_release(opage); | ||
118 | } | ||
119 | return bh; | 95 | return bh; |
120 | } | 96 | } |
121 | 97 | ||
@@ -131,6 +107,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) | |||
131 | lock_buffer(bh); | 107 | lock_buffer(bh); |
132 | clear_buffer_nilfs_volatile(bh); | 108 | clear_buffer_nilfs_volatile(bh); |
133 | clear_buffer_nilfs_checked(bh); | 109 | clear_buffer_nilfs_checked(bh); |
110 | clear_buffer_nilfs_redirected(bh); | ||
134 | clear_buffer_dirty(bh); | 111 | clear_buffer_dirty(bh); |
135 | if (nilfs_page_buffers_clean(page)) | 112 | if (nilfs_page_buffers_clean(page)) |
136 | __nilfs_clear_page_dirty(page); | 113 | __nilfs_clear_page_dirty(page); |
@@ -483,6 +460,7 @@ void nilfs_clear_dirty_pages(struct address_space *mapping) | |||
483 | clear_buffer_dirty(bh); | 460 | clear_buffer_dirty(bh); |
484 | clear_buffer_nilfs_volatile(bh); | 461 | clear_buffer_nilfs_volatile(bh); |
485 | clear_buffer_nilfs_checked(bh); | 462 | clear_buffer_nilfs_checked(bh); |
463 | clear_buffer_nilfs_redirected(bh); | ||
486 | clear_buffer_uptodate(bh); | 464 | clear_buffer_uptodate(bh); |
487 | clear_buffer_mapped(bh); | 465 | clear_buffer_mapped(bh); |
488 | unlock_buffer(bh); | 466 | unlock_buffer(bh); |
@@ -513,6 +491,31 @@ unsigned nilfs_page_count_clean_buffers(struct page *page, | |||
513 | } | 491 | } |
514 | return nc; | 492 | return nc; |
515 | } | 493 | } |
494 | |||
495 | void nilfs_mapping_init_once(struct address_space *mapping) | ||
496 | { | ||
497 | memset(mapping, 0, sizeof(*mapping)); | ||
498 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); | ||
499 | spin_lock_init(&mapping->tree_lock); | ||
500 | INIT_LIST_HEAD(&mapping->private_list); | ||
501 | spin_lock_init(&mapping->private_lock); | ||
502 | |||
503 | spin_lock_init(&mapping->i_mmap_lock); | ||
504 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | ||
505 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | ||
506 | } | ||
507 | |||
508 | void nilfs_mapping_init(struct address_space *mapping, | ||
509 | struct backing_dev_info *bdi, | ||
510 | const struct address_space_operations *aops) | ||
511 | { | ||
512 | mapping->host = NULL; | ||
513 | mapping->flags = 0; | ||
514 | mapping_set_gfp_mask(mapping, GFP_NOFS); | ||
515 | mapping->assoc_mapping = NULL; | ||
516 | mapping->backing_dev_info = bdi; | ||
517 | mapping->a_ops = aops; | ||
518 | } | ||
516 | 519 | ||
517 | /* | 520 | /* |
518 | * NILFS2 needs clear_page_dirty() in the following two cases: | 521 | * NILFS2 needs clear_page_dirty() in the following two cases: |
diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h index f53d8da41ed7..fb9e8a8a2038 100644 --- a/fs/nilfs2/page.h +++ b/fs/nilfs2/page.h | |||
@@ -35,12 +35,14 @@ enum { | |||
35 | BH_NILFS_Node, | 35 | BH_NILFS_Node, |
36 | BH_NILFS_Volatile, | 36 | BH_NILFS_Volatile, |
37 | BH_NILFS_Checked, | 37 | BH_NILFS_Checked, |
38 | BH_NILFS_Redirected, | ||
38 | }; | 39 | }; |
39 | 40 | ||
40 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ | 41 | BUFFER_FNS(NILFS_Allocated, nilfs_allocated) /* nilfs private buffers */ |
41 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ | 42 | BUFFER_FNS(NILFS_Node, nilfs_node) /* nilfs node buffers */ |
42 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) | 43 | BUFFER_FNS(NILFS_Volatile, nilfs_volatile) |
43 | BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ | 44 | BUFFER_FNS(NILFS_Checked, nilfs_checked) /* buffer is verified */ |
45 | BUFFER_FNS(NILFS_Redirected, nilfs_redirected) /* redirected to a copy */ | ||
44 | 46 | ||
45 | 47 | ||
46 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); | 48 | void nilfs_mark_buffer_dirty(struct buffer_head *bh); |
@@ -59,6 +61,10 @@ void nilfs_free_private_page(struct page *); | |||
59 | int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); | 61 | int nilfs_copy_dirty_pages(struct address_space *, struct address_space *); |
60 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); | 62 | void nilfs_copy_back_pages(struct address_space *, struct address_space *); |
61 | void nilfs_clear_dirty_pages(struct address_space *); | 63 | void nilfs_clear_dirty_pages(struct address_space *); |
64 | void nilfs_mapping_init_once(struct address_space *mapping); | ||
65 | void nilfs_mapping_init(struct address_space *mapping, | ||
66 | struct backing_dev_info *bdi, | ||
67 | const struct address_space_operations *aops); | ||
62 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); | 68 | unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned); |
63 | 69 | ||
64 | #define NILFS_PAGE_BUG(page, m, a...) \ | 70 | #define NILFS_PAGE_BUG(page, m, a...) \ |
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index d0c35ef39f6a..5d2711c28da7 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c | |||
@@ -440,7 +440,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | |||
440 | segnum[2] = ri->ri_segnum; | 440 | segnum[2] = ri->ri_segnum; |
441 | segnum[3] = ri->ri_nextnum; | 441 | segnum[3] = ri->ri_nextnum; |
442 | 442 | ||
443 | nilfs_attach_writer(nilfs, sbi); | ||
444 | /* | 443 | /* |
445 | * Releasing the next segment of the latest super root. | 444 | * Releasing the next segment of the latest super root. |
446 | * The next segment is invalidated by this recovery. | 445 | * The next segment is invalidated by this recovery. |
@@ -480,7 +479,6 @@ static int nilfs_prepare_segment_for_recovery(struct the_nilfs *nilfs, | |||
480 | 479 | ||
481 | failed: | 480 | failed: |
482 | /* No need to recover sufile because it will be destroyed on error */ | 481 | /* No need to recover sufile because it will be destroyed on error */ |
483 | nilfs_detach_writer(nilfs, sbi); | ||
484 | return err; | 482 | return err; |
485 | } | 483 | } |
486 | 484 | ||
@@ -504,6 +502,7 @@ static int nilfs_recovery_copy_block(struct the_nilfs *nilfs, | |||
504 | 502 | ||
505 | static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, | 503 | static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, |
506 | struct nilfs_sb_info *sbi, | 504 | struct nilfs_sb_info *sbi, |
505 | struct nilfs_root *root, | ||
507 | struct list_head *head, | 506 | struct list_head *head, |
508 | unsigned long *nr_salvaged_blocks) | 507 | unsigned long *nr_salvaged_blocks) |
509 | { | 508 | { |
@@ -515,7 +514,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, | |||
515 | int err = 0, err2 = 0; | 514 | int err = 0, err2 = 0; |
516 | 515 | ||
517 | list_for_each_entry_safe(rb, n, head, list) { | 516 | list_for_each_entry_safe(rb, n, head, list) { |
518 | inode = nilfs_iget(sbi->s_super, rb->ino); | 517 | inode = nilfs_iget(sbi->s_super, root, rb->ino); |
519 | if (IS_ERR(inode)) { | 518 | if (IS_ERR(inode)) { |
520 | err = PTR_ERR(inode); | 519 | err = PTR_ERR(inode); |
521 | inode = NULL; | 520 | inode = NULL; |
@@ -578,6 +577,7 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs, | |||
578 | */ | 577 | */ |
579 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | 578 | static int nilfs_do_roll_forward(struct the_nilfs *nilfs, |
580 | struct nilfs_sb_info *sbi, | 579 | struct nilfs_sb_info *sbi, |
580 | struct nilfs_root *root, | ||
581 | struct nilfs_recovery_info *ri) | 581 | struct nilfs_recovery_info *ri) |
582 | { | 582 | { |
583 | struct buffer_head *bh_sum = NULL; | 583 | struct buffer_head *bh_sum = NULL; |
@@ -597,7 +597,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
597 | }; | 597 | }; |
598 | int state = RF_INIT_ST; | 598 | int state = RF_INIT_ST; |
599 | 599 | ||
600 | nilfs_attach_writer(nilfs, sbi); | ||
601 | pseg_start = ri->ri_lsegs_start; | 600 | pseg_start = ri->ri_lsegs_start; |
602 | seg_seq = ri->ri_lsegs_start_seq; | 601 | seg_seq = ri->ri_lsegs_start_seq; |
603 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); | 602 | segnum = nilfs_get_segnum_of_block(nilfs, pseg_start); |
@@ -649,7 +648,7 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
649 | goto failed; | 648 | goto failed; |
650 | if (flags & NILFS_SS_LOGEND) { | 649 | if (flags & NILFS_SS_LOGEND) { |
651 | err = nilfs_recover_dsync_blocks( | 650 | err = nilfs_recover_dsync_blocks( |
652 | nilfs, sbi, &dsync_blocks, | 651 | nilfs, sbi, root, &dsync_blocks, |
653 | &nsalvaged_blocks); | 652 | &nsalvaged_blocks); |
654 | if (unlikely(err)) | 653 | if (unlikely(err)) |
655 | goto failed; | 654 | goto failed; |
@@ -688,7 +687,6 @@ static int nilfs_do_roll_forward(struct the_nilfs *nilfs, | |||
688 | out: | 687 | out: |
689 | brelse(bh_sum); | 688 | brelse(bh_sum); |
690 | dispose_recovery_list(&dsync_blocks); | 689 | dispose_recovery_list(&dsync_blocks); |
691 | nilfs_detach_writer(nilfs, sbi); | ||
692 | return err; | 690 | return err; |
693 | 691 | ||
694 | confused: | 692 | confused: |
@@ -746,19 +744,20 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, | |||
746 | struct nilfs_sb_info *sbi, | 744 | struct nilfs_sb_info *sbi, |
747 | struct nilfs_recovery_info *ri) | 745 | struct nilfs_recovery_info *ri) |
748 | { | 746 | { |
747 | struct nilfs_root *root; | ||
749 | int err; | 748 | int err; |
750 | 749 | ||
751 | if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) | 750 | if (ri->ri_lsegs_start == 0 || ri->ri_lsegs_end == 0) |
752 | return 0; | 751 | return 0; |
753 | 752 | ||
754 | err = nilfs_attach_checkpoint(sbi, ri->ri_cno); | 753 | err = nilfs_attach_checkpoint(sbi, ri->ri_cno, true, &root); |
755 | if (unlikely(err)) { | 754 | if (unlikely(err)) { |
756 | printk(KERN_ERR | 755 | printk(KERN_ERR |
757 | "NILFS: error loading the latest checkpoint.\n"); | 756 | "NILFS: error loading the latest checkpoint.\n"); |
758 | return err; | 757 | return err; |
759 | } | 758 | } |
760 | 759 | ||
761 | err = nilfs_do_roll_forward(nilfs, sbi, ri); | 760 | err = nilfs_do_roll_forward(nilfs, sbi, root, ri); |
762 | if (unlikely(err)) | 761 | if (unlikely(err)) |
763 | goto failed; | 762 | goto failed; |
764 | 763 | ||
@@ -770,7 +769,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, | |||
770 | goto failed; | 769 | goto failed; |
771 | } | 770 | } |
772 | 771 | ||
773 | err = nilfs_attach_segment_constructor(sbi); | 772 | err = nilfs_attach_segment_constructor(sbi, root); |
774 | if (unlikely(err)) | 773 | if (unlikely(err)) |
775 | goto failed; | 774 | goto failed; |
776 | 775 | ||
@@ -788,7 +787,7 @@ int nilfs_salvage_orphan_logs(struct the_nilfs *nilfs, | |||
788 | } | 787 | } |
789 | 788 | ||
790 | failed: | 789 | failed: |
791 | nilfs_detach_checkpoint(sbi); | 790 | nilfs_put_root(root); |
792 | return err; | 791 | return err; |
793 | } | 792 | } |
794 | 793 | ||
diff --git a/fs/nilfs2/sb.h b/fs/nilfs2/sb.h index 0776ccc2504a..35a07157b980 100644 --- a/fs/nilfs2/sb.h +++ b/fs/nilfs2/sb.h | |||
@@ -42,11 +42,6 @@ struct nilfs_sc_info; | |||
42 | * NILFS super-block data in memory | 42 | * NILFS super-block data in memory |
43 | */ | 43 | */ |
44 | struct nilfs_sb_info { | 44 | struct nilfs_sb_info { |
45 | /* Snapshot status */ | ||
46 | __u64 s_snapshot_cno; /* Checkpoint number */ | ||
47 | atomic_t s_inodes_count; | ||
48 | atomic_t s_blocks_count; /* Reserved (might be deleted) */ | ||
49 | |||
50 | /* Mount options */ | 45 | /* Mount options */ |
51 | unsigned long s_mount_opt; | 46 | unsigned long s_mount_opt; |
52 | uid_t s_resuid; | 47 | uid_t s_resuid; |
@@ -59,8 +54,6 @@ struct nilfs_sb_info { | |||
59 | /* Fundamental members */ | 54 | /* Fundamental members */ |
60 | struct super_block *s_super; /* reverse pointer to super_block */ | 55 | struct super_block *s_super; /* reverse pointer to super_block */ |
61 | struct the_nilfs *s_nilfs; | 56 | struct the_nilfs *s_nilfs; |
62 | struct list_head s_list; /* list head for nilfs->ns_supers */ | ||
63 | atomic_t s_count; /* reference count */ | ||
64 | 57 | ||
65 | /* Segment constructor */ | 58 | /* Segment constructor */ |
66 | struct list_head s_dirty_files; /* dirty files list */ | 59 | struct list_head s_dirty_files; /* dirty files list */ |
@@ -68,9 +61,6 @@ struct nilfs_sb_info { | |||
68 | spinlock_t s_inode_lock; /* Lock for the nilfs inode. | 61 | spinlock_t s_inode_lock; /* Lock for the nilfs inode. |
69 | It covers s_dirty_files list */ | 62 | It covers s_dirty_files list */ |
70 | 63 | ||
71 | /* Metadata files */ | ||
72 | struct inode *s_ifile; /* index file inode */ | ||
73 | |||
74 | /* Inode allocator */ | 64 | /* Inode allocator */ |
75 | spinlock_t s_next_gen_lock; | 65 | spinlock_t s_next_gen_lock; |
76 | u32 s_next_generation; | 66 | u32 s_next_generation; |
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index 4588fb9e93df..0f83e93935b2 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c | |||
@@ -371,7 +371,8 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, | |||
371 | struct bio *bio = wi->bio; | 371 | struct bio *bio = wi->bio; |
372 | int err; | 372 | int err; |
373 | 373 | ||
374 | if (segbuf->sb_nbio > 0 && bdi_write_congested(wi->nilfs->ns_bdi)) { | 374 | if (segbuf->sb_nbio > 0 && |
375 | bdi_write_congested(segbuf->sb_super->s_bdi)) { | ||
375 | wait_for_completion(&segbuf->sb_bio_event); | 376 | wait_for_completion(&segbuf->sb_bio_event); |
376 | segbuf->sb_nbio--; | 377 | segbuf->sb_nbio--; |
377 | if (unlikely(atomic_read(&segbuf->sb_err))) { | 378 | if (unlikely(atomic_read(&segbuf->sb_err))) { |
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 9fd051a33c4f..687d090cea34 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c | |||
@@ -191,6 +191,8 @@ int nilfs_transaction_begin(struct super_block *sb, | |||
191 | if (ret > 0) | 191 | if (ret > 0) |
192 | return 0; | 192 | return 0; |
193 | 193 | ||
194 | vfs_check_frozen(sb, SB_FREEZE_WRITE); | ||
195 | |||
194 | sbi = NILFS_SB(sb); | 196 | sbi = NILFS_SB(sb); |
195 | nilfs = sbi->s_nilfs; | 197 | nilfs = sbi->s_nilfs; |
196 | down_read(&nilfs->ns_segctor_sem); | 198 | down_read(&nilfs->ns_segctor_sem); |
@@ -366,8 +368,7 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) | |||
366 | 368 | ||
367 | if (nilfs_doing_gc()) | 369 | if (nilfs_doing_gc()) |
368 | flags = NILFS_SS_GC; | 370 | flags = NILFS_SS_GC; |
369 | err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, | 371 | err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); |
370 | sci->sc_sbi->s_nilfs->ns_cno); | ||
371 | if (unlikely(err)) | 372 | if (unlikely(err)) |
372 | return err; | 373 | return err; |
373 | 374 | ||
@@ -440,17 +441,26 @@ static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, | |||
440 | struct nilfs_finfo *finfo; | 441 | struct nilfs_finfo *finfo; |
441 | struct nilfs_inode_info *ii; | 442 | struct nilfs_inode_info *ii; |
442 | struct nilfs_segment_buffer *segbuf; | 443 | struct nilfs_segment_buffer *segbuf; |
444 | __u64 cno; | ||
443 | 445 | ||
444 | if (sci->sc_blk_cnt == 0) | 446 | if (sci->sc_blk_cnt == 0) |
445 | return; | 447 | return; |
446 | 448 | ||
447 | ii = NILFS_I(inode); | 449 | ii = NILFS_I(inode); |
450 | |||
451 | if (test_bit(NILFS_I_GCINODE, &ii->i_state)) | ||
452 | cno = ii->i_cno; | ||
453 | else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) | ||
454 | cno = 0; | ||
455 | else | ||
456 | cno = sci->sc_cno; | ||
457 | |||
448 | finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, | 458 | finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, |
449 | sizeof(*finfo)); | 459 | sizeof(*finfo)); |
450 | finfo->fi_ino = cpu_to_le64(inode->i_ino); | 460 | finfo->fi_ino = cpu_to_le64(inode->i_ino); |
451 | finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); | 461 | finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); |
452 | finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); | 462 | finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); |
453 | finfo->fi_cno = cpu_to_le64(ii->i_cno); | 463 | finfo->fi_cno = cpu_to_le64(cno); |
454 | 464 | ||
455 | segbuf = sci->sc_curseg; | 465 | segbuf = sci->sc_curseg; |
456 | segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + | 466 | segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + |
@@ -755,12 +765,12 @@ static void nilfs_dispose_list(struct nilfs_sb_info *sbi, | |||
755 | } | 765 | } |
756 | } | 766 | } |
757 | 767 | ||
758 | static int nilfs_test_metadata_dirty(struct nilfs_sb_info *sbi) | 768 | static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, |
769 | struct nilfs_root *root) | ||
759 | { | 770 | { |
760 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
761 | int ret = 0; | 771 | int ret = 0; |
762 | 772 | ||
763 | if (nilfs_mdt_fetch_dirty(sbi->s_ifile)) | 773 | if (nilfs_mdt_fetch_dirty(root->ifile)) |
764 | ret++; | 774 | ret++; |
765 | if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) | 775 | if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) |
766 | ret++; | 776 | ret++; |
@@ -785,7 +795,7 @@ static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) | |||
785 | struct nilfs_sb_info *sbi = sci->sc_sbi; | 795 | struct nilfs_sb_info *sbi = sci->sc_sbi; |
786 | int ret = 0; | 796 | int ret = 0; |
787 | 797 | ||
788 | if (nilfs_test_metadata_dirty(sbi)) | 798 | if (nilfs_test_metadata_dirty(sbi->s_nilfs, sci->sc_root)) |
789 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | 799 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); |
790 | 800 | ||
791 | spin_lock(&sbi->s_inode_lock); | 801 | spin_lock(&sbi->s_inode_lock); |
@@ -801,7 +811,7 @@ static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) | |||
801 | struct nilfs_sb_info *sbi = sci->sc_sbi; | 811 | struct nilfs_sb_info *sbi = sci->sc_sbi; |
802 | struct the_nilfs *nilfs = sbi->s_nilfs; | 812 | struct the_nilfs *nilfs = sbi->s_nilfs; |
803 | 813 | ||
804 | nilfs_mdt_clear_dirty(sbi->s_ifile); | 814 | nilfs_mdt_clear_dirty(sci->sc_root->ifile); |
805 | nilfs_mdt_clear_dirty(nilfs->ns_cpfile); | 815 | nilfs_mdt_clear_dirty(nilfs->ns_cpfile); |
806 | nilfs_mdt_clear_dirty(nilfs->ns_sufile); | 816 | nilfs_mdt_clear_dirty(nilfs->ns_sufile); |
807 | nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); | 817 | nilfs_mdt_clear_dirty(nilfs_dat_inode(nilfs)); |
@@ -848,9 +858,9 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | |||
848 | raw_cp->cp_snapshot_list.ssl_next = 0; | 858 | raw_cp->cp_snapshot_list.ssl_next = 0; |
849 | raw_cp->cp_snapshot_list.ssl_prev = 0; | 859 | raw_cp->cp_snapshot_list.ssl_prev = 0; |
850 | raw_cp->cp_inodes_count = | 860 | raw_cp->cp_inodes_count = |
851 | cpu_to_le64(atomic_read(&sbi->s_inodes_count)); | 861 | cpu_to_le64(atomic_read(&sci->sc_root->inodes_count)); |
852 | raw_cp->cp_blocks_count = | 862 | raw_cp->cp_blocks_count = |
853 | cpu_to_le64(atomic_read(&sbi->s_blocks_count)); | 863 | cpu_to_le64(atomic_read(&sci->sc_root->blocks_count)); |
854 | raw_cp->cp_nblk_inc = | 864 | raw_cp->cp_nblk_inc = |
855 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); | 865 | cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); |
856 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); | 866 | raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); |
@@ -861,7 +871,8 @@ static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) | |||
861 | else | 871 | else |
862 | nilfs_checkpoint_set_minor(raw_cp); | 872 | nilfs_checkpoint_set_minor(raw_cp); |
863 | 873 | ||
864 | nilfs_write_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode, 1); | 874 | nilfs_write_inode_common(sci->sc_root->ifile, |
875 | &raw_cp->cp_ifile_inode, 1); | ||
865 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); | 876 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); |
866 | return 0; | 877 | return 0; |
867 | 878 | ||
@@ -886,13 +897,12 @@ static void nilfs_fill_in_file_bmap(struct inode *ifile, | |||
886 | } | 897 | } |
887 | } | 898 | } |
888 | 899 | ||
889 | static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci, | 900 | static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) |
890 | struct inode *ifile) | ||
891 | { | 901 | { |
892 | struct nilfs_inode_info *ii; | 902 | struct nilfs_inode_info *ii; |
893 | 903 | ||
894 | list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { | 904 | list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { |
895 | nilfs_fill_in_file_bmap(ifile, ii); | 905 | nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); |
896 | set_bit(NILFS_I_COLLECTED, &ii->i_state); | 906 | set_bit(NILFS_I_COLLECTED, &ii->i_state); |
897 | } | 907 | } |
898 | } | 908 | } |
@@ -1135,7 +1145,7 @@ static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) | |||
1135 | sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; | 1145 | sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; |
1136 | /* Fall through */ | 1146 | /* Fall through */ |
1137 | case NILFS_ST_IFILE: | 1147 | case NILFS_ST_IFILE: |
1138 | err = nilfs_segctor_scan_file(sci, sbi->s_ifile, | 1148 | err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, |
1139 | &nilfs_sc_file_ops); | 1149 | &nilfs_sc_file_ops); |
1140 | if (unlikely(err)) | 1150 | if (unlikely(err)) |
1141 | break; | 1151 | break; |
@@ -1599,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out) | |||
1599 | kunmap_atomic(kaddr, KM_USER0); | 1609 | kunmap_atomic(kaddr, KM_USER0); |
1600 | 1610 | ||
1601 | if (!TestSetPageWriteback(clone_page)) | 1611 | if (!TestSetPageWriteback(clone_page)) |
1602 | inc_zone_page_state(clone_page, NR_WRITEBACK); | 1612 | account_page_writeback(clone_page); |
1603 | unlock_page(clone_page); | 1613 | unlock_page(clone_page); |
1604 | 1614 | ||
1605 | return 0; | 1615 | return 0; |
@@ -1900,6 +1910,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | |||
1900 | set_buffer_uptodate(bh); | 1910 | set_buffer_uptodate(bh); |
1901 | clear_buffer_dirty(bh); | 1911 | clear_buffer_dirty(bh); |
1902 | clear_buffer_nilfs_volatile(bh); | 1912 | clear_buffer_nilfs_volatile(bh); |
1913 | clear_buffer_nilfs_redirected(bh); | ||
1903 | if (bh == segbuf->sb_super_root) { | 1914 | if (bh == segbuf->sb_super_root) { |
1904 | if (bh->b_page != bd_page) { | 1915 | if (bh->b_page != bd_page) { |
1905 | end_page_writeback(bd_page); | 1916 | end_page_writeback(bd_page); |
@@ -1936,11 +1947,9 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) | |||
1936 | 1947 | ||
1937 | nilfs_drop_collected_inodes(&sci->sc_dirty_files); | 1948 | nilfs_drop_collected_inodes(&sci->sc_dirty_files); |
1938 | 1949 | ||
1939 | if (nilfs_doing_gc()) { | 1950 | if (nilfs_doing_gc()) |
1940 | nilfs_drop_collected_inodes(&sci->sc_gc_inodes); | 1951 | nilfs_drop_collected_inodes(&sci->sc_gc_inodes); |
1941 | if (update_sr) | 1952 | else |
1942 | nilfs_commit_gcdat_inode(nilfs); | ||
1943 | } else | ||
1944 | nilfs->ns_nongc_ctime = sci->sc_seg_ctime; | 1953 | nilfs->ns_nongc_ctime = sci->sc_seg_ctime; |
1945 | 1954 | ||
1946 | sci->sc_nblk_inc += sci->sc_nblk_this_inc; | 1955 | sci->sc_nblk_inc += sci->sc_nblk_this_inc; |
@@ -1976,7 +1985,7 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | |||
1976 | struct nilfs_sb_info *sbi) | 1985 | struct nilfs_sb_info *sbi) |
1977 | { | 1986 | { |
1978 | struct nilfs_inode_info *ii, *n; | 1987 | struct nilfs_inode_info *ii, *n; |
1979 | __u64 cno = sbi->s_nilfs->ns_cno; | 1988 | struct inode *ifile = sci->sc_root->ifile; |
1980 | 1989 | ||
1981 | spin_lock(&sbi->s_inode_lock); | 1990 | spin_lock(&sbi->s_inode_lock); |
1982 | retry: | 1991 | retry: |
@@ -1987,14 +1996,14 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | |||
1987 | 1996 | ||
1988 | spin_unlock(&sbi->s_inode_lock); | 1997 | spin_unlock(&sbi->s_inode_lock); |
1989 | err = nilfs_ifile_get_inode_block( | 1998 | err = nilfs_ifile_get_inode_block( |
1990 | sbi->s_ifile, ii->vfs_inode.i_ino, &ibh); | 1999 | ifile, ii->vfs_inode.i_ino, &ibh); |
1991 | if (unlikely(err)) { | 2000 | if (unlikely(err)) { |
1992 | nilfs_warning(sbi->s_super, __func__, | 2001 | nilfs_warning(sbi->s_super, __func__, |
1993 | "failed to get inode block.\n"); | 2002 | "failed to get inode block.\n"); |
1994 | return err; | 2003 | return err; |
1995 | } | 2004 | } |
1996 | nilfs_mdt_mark_buffer_dirty(ibh); | 2005 | nilfs_mdt_mark_buffer_dirty(ibh); |
1997 | nilfs_mdt_mark_dirty(sbi->s_ifile); | 2006 | nilfs_mdt_mark_dirty(ifile); |
1998 | spin_lock(&sbi->s_inode_lock); | 2007 | spin_lock(&sbi->s_inode_lock); |
1999 | if (likely(!ii->i_bh)) | 2008 | if (likely(!ii->i_bh)) |
2000 | ii->i_bh = ibh; | 2009 | ii->i_bh = ibh; |
@@ -2002,7 +2011,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | |||
2002 | brelse(ibh); | 2011 | brelse(ibh); |
2003 | goto retry; | 2012 | goto retry; |
2004 | } | 2013 | } |
2005 | ii->i_cno = cno; | ||
2006 | 2014 | ||
2007 | clear_bit(NILFS_I_QUEUED, &ii->i_state); | 2015 | clear_bit(NILFS_I_QUEUED, &ii->i_state); |
2008 | set_bit(NILFS_I_BUSY, &ii->i_state); | 2016 | set_bit(NILFS_I_BUSY, &ii->i_state); |
@@ -2011,8 +2019,6 @@ static int nilfs_segctor_check_in_files(struct nilfs_sc_info *sci, | |||
2011 | } | 2019 | } |
2012 | spin_unlock(&sbi->s_inode_lock); | 2020 | spin_unlock(&sbi->s_inode_lock); |
2013 | 2021 | ||
2014 | NILFS_I(sbi->s_ifile)->i_cno = cno; | ||
2015 | |||
2016 | return 0; | 2022 | return 0; |
2017 | } | 2023 | } |
2018 | 2024 | ||
@@ -2021,19 +2027,13 @@ static void nilfs_segctor_check_out_files(struct nilfs_sc_info *sci, | |||
2021 | { | 2027 | { |
2022 | struct nilfs_transaction_info *ti = current->journal_info; | 2028 | struct nilfs_transaction_info *ti = current->journal_info; |
2023 | struct nilfs_inode_info *ii, *n; | 2029 | struct nilfs_inode_info *ii, *n; |
2024 | __u64 cno = sbi->s_nilfs->ns_cno; | ||
2025 | 2030 | ||
2026 | spin_lock(&sbi->s_inode_lock); | 2031 | spin_lock(&sbi->s_inode_lock); |
2027 | list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { | 2032 | list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { |
2028 | if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || | 2033 | if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || |
2029 | test_bit(NILFS_I_DIRTY, &ii->i_state)) { | 2034 | test_bit(NILFS_I_DIRTY, &ii->i_state)) |
2030 | /* The current checkpoint number (=nilfs->ns_cno) is | ||
2031 | changed between check-in and check-out only if the | ||
2032 | super root is written out. So, we can update i_cno | ||
2033 | for the inodes that remain in the dirty list. */ | ||
2034 | ii->i_cno = cno; | ||
2035 | continue; | 2035 | continue; |
2036 | } | 2036 | |
2037 | clear_bit(NILFS_I_BUSY, &ii->i_state); | 2037 | clear_bit(NILFS_I_BUSY, &ii->i_state); |
2038 | brelse(ii->i_bh); | 2038 | brelse(ii->i_bh); |
2039 | ii->i_bh = NULL; | 2039 | ii->i_bh = NULL; |
@@ -2054,12 +2054,13 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) | |||
2054 | int err; | 2054 | int err; |
2055 | 2055 | ||
2056 | sci->sc_stage.scnt = NILFS_ST_INIT; | 2056 | sci->sc_stage.scnt = NILFS_ST_INIT; |
2057 | sci->sc_cno = nilfs->ns_cno; | ||
2057 | 2058 | ||
2058 | err = nilfs_segctor_check_in_files(sci, sbi); | 2059 | err = nilfs_segctor_check_in_files(sci, sbi); |
2059 | if (unlikely(err)) | 2060 | if (unlikely(err)) |
2060 | goto out; | 2061 | goto out; |
2061 | 2062 | ||
2062 | if (nilfs_test_metadata_dirty(sbi)) | 2063 | if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) |
2063 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); | 2064 | set_bit(NILFS_SC_DIRTY, &sci->sc_flags); |
2064 | 2065 | ||
2065 | if (nilfs_segctor_clean(sci)) | 2066 | if (nilfs_segctor_clean(sci)) |
@@ -2091,7 +2092,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) | |||
2091 | goto failed; | 2092 | goto failed; |
2092 | 2093 | ||
2093 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) | 2094 | if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) |
2094 | nilfs_segctor_fill_in_file_bmap(sci, sbi->s_ifile); | 2095 | nilfs_segctor_fill_in_file_bmap(sci); |
2095 | 2096 | ||
2096 | if (mode == SC_LSEG_SR && | 2097 | if (mode == SC_LSEG_SR && |
2097 | sci->sc_stage.scnt >= NILFS_ST_CPFILE) { | 2098 | sci->sc_stage.scnt >= NILFS_ST_CPFILE) { |
@@ -2452,9 +2453,8 @@ nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) | |||
2452 | list_for_each_entry_safe(ii, n, head, i_dirty) { | 2453 | list_for_each_entry_safe(ii, n, head, i_dirty) { |
2453 | if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) | 2454 | if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) |
2454 | continue; | 2455 | continue; |
2455 | hlist_del_init(&ii->vfs_inode.i_hash); | ||
2456 | list_del_init(&ii->i_dirty); | 2456 | list_del_init(&ii->i_dirty); |
2457 | nilfs_clear_gcinode(&ii->vfs_inode); | 2457 | iput(&ii->vfs_inode); |
2458 | } | 2458 | } |
2459 | } | 2459 | } |
2460 | 2460 | ||
@@ -2472,13 +2472,15 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, | |||
2472 | 2472 | ||
2473 | nilfs_transaction_lock(sbi, &ti, 1); | 2473 | nilfs_transaction_lock(sbi, &ti, 1); |
2474 | 2474 | ||
2475 | err = nilfs_init_gcdat_inode(nilfs); | 2475 | err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); |
2476 | if (unlikely(err)) | 2476 | if (unlikely(err)) |
2477 | goto out_unlock; | 2477 | goto out_unlock; |
2478 | 2478 | ||
2479 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); | 2479 | err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); |
2480 | if (unlikely(err)) | 2480 | if (unlikely(err)) { |
2481 | nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); | ||
2481 | goto out_unlock; | 2482 | goto out_unlock; |
2483 | } | ||
2482 | 2484 | ||
2483 | sci->sc_freesegs = kbufs[4]; | 2485 | sci->sc_freesegs = kbufs[4]; |
2484 | sci->sc_nfreesegs = argv[4].v_nmembs; | 2486 | sci->sc_nfreesegs = argv[4].v_nmembs; |
@@ -2510,7 +2512,7 @@ int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, | |||
2510 | out_unlock: | 2512 | out_unlock: |
2511 | sci->sc_freesegs = NULL; | 2513 | sci->sc_freesegs = NULL; |
2512 | sci->sc_nfreesegs = 0; | 2514 | sci->sc_nfreesegs = 0; |
2513 | nilfs_clear_gcdat_inode(nilfs); | 2515 | nilfs_mdt_clear_shadow_map(nilfs->ns_dat); |
2514 | nilfs_transaction_unlock(sbi); | 2516 | nilfs_transaction_unlock(sbi); |
2515 | return err; | 2517 | return err; |
2516 | } | 2518 | } |
@@ -2672,6 +2674,8 @@ static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) | |||
2672 | } | 2674 | } |
2673 | 2675 | ||
2674 | static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) | 2676 | static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) |
2677 | __acquires(&sci->sc_state_lock) | ||
2678 | __releases(&sci->sc_state_lock) | ||
2675 | { | 2679 | { |
2676 | sci->sc_state |= NILFS_SEGCTOR_QUIT; | 2680 | sci->sc_state |= NILFS_SEGCTOR_QUIT; |
2677 | 2681 | ||
@@ -2686,7 +2690,8 @@ static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) | |||
2686 | /* | 2690 | /* |
2687 | * Setup & clean-up functions | 2691 | * Setup & clean-up functions |
2688 | */ | 2692 | */ |
2689 | static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) | 2693 | static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi, |
2694 | struct nilfs_root *root) | ||
2690 | { | 2695 | { |
2691 | struct nilfs_sc_info *sci; | 2696 | struct nilfs_sc_info *sci; |
2692 | 2697 | ||
@@ -2697,6 +2702,9 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct nilfs_sb_info *sbi) | |||
2697 | sci->sc_sbi = sbi; | 2702 | sci->sc_sbi = sbi; |
2698 | sci->sc_super = sbi->s_super; | 2703 | sci->sc_super = sbi->s_super; |
2699 | 2704 | ||
2705 | nilfs_get_root(root); | ||
2706 | sci->sc_root = root; | ||
2707 | |||
2700 | init_waitqueue_head(&sci->sc_wait_request); | 2708 | init_waitqueue_head(&sci->sc_wait_request); |
2701 | init_waitqueue_head(&sci->sc_wait_daemon); | 2709 | init_waitqueue_head(&sci->sc_wait_daemon); |
2702 | init_waitqueue_head(&sci->sc_wait_task); | 2710 | init_waitqueue_head(&sci->sc_wait_task); |
@@ -2771,6 +2779,8 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) | |||
2771 | WARN_ON(!list_empty(&sci->sc_segbufs)); | 2779 | WARN_ON(!list_empty(&sci->sc_segbufs)); |
2772 | WARN_ON(!list_empty(&sci->sc_write_logs)); | 2780 | WARN_ON(!list_empty(&sci->sc_write_logs)); |
2773 | 2781 | ||
2782 | nilfs_put_root(sci->sc_root); | ||
2783 | |||
2774 | down_write(&sbi->s_nilfs->ns_segctor_sem); | 2784 | down_write(&sbi->s_nilfs->ns_segctor_sem); |
2775 | 2785 | ||
2776 | del_timer_sync(&sci->sc_timer); | 2786 | del_timer_sync(&sci->sc_timer); |
@@ -2780,6 +2790,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) | |||
2780 | /** | 2790 | /** |
2781 | * nilfs_attach_segment_constructor - attach a segment constructor | 2791 | * nilfs_attach_segment_constructor - attach a segment constructor |
2782 | * @sbi: nilfs_sb_info | 2792 | * @sbi: nilfs_sb_info |
2793 | * @root: root object of the current filesystem tree | ||
2783 | * | 2794 | * |
2784 | * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, | 2795 | * nilfs_attach_segment_constructor() allocates a struct nilfs_sc_info, |
2785 | * initializes it, and starts the segment constructor. | 2796 | * initializes it, and starts the segment constructor. |
@@ -2789,9 +2800,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) | |||
2789 | * | 2800 | * |
2790 | * %-ENOMEM - Insufficient memory available. | 2801 | * %-ENOMEM - Insufficient memory available. |
2791 | */ | 2802 | */ |
2792 | int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) | 2803 | int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, |
2804 | struct nilfs_root *root) | ||
2793 | { | 2805 | { |
2794 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
2795 | int err; | 2806 | int err; |
2796 | 2807 | ||
2797 | if (NILFS_SC(sbi)) { | 2808 | if (NILFS_SC(sbi)) { |
@@ -2803,14 +2814,12 @@ int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi) | |||
2803 | nilfs_detach_segment_constructor(sbi); | 2814 | nilfs_detach_segment_constructor(sbi); |
2804 | } | 2815 | } |
2805 | 2816 | ||
2806 | sbi->s_sc_info = nilfs_segctor_new(sbi); | 2817 | sbi->s_sc_info = nilfs_segctor_new(sbi, root); |
2807 | if (!sbi->s_sc_info) | 2818 | if (!sbi->s_sc_info) |
2808 | return -ENOMEM; | 2819 | return -ENOMEM; |
2809 | 2820 | ||
2810 | nilfs_attach_writer(nilfs, sbi); | ||
2811 | err = nilfs_segctor_start_thread(NILFS_SC(sbi)); | 2821 | err = nilfs_segctor_start_thread(NILFS_SC(sbi)); |
2812 | if (err) { | 2822 | if (err) { |
2813 | nilfs_detach_writer(nilfs, sbi); | ||
2814 | kfree(sbi->s_sc_info); | 2823 | kfree(sbi->s_sc_info); |
2815 | sbi->s_sc_info = NULL; | 2824 | sbi->s_sc_info = NULL; |
2816 | } | 2825 | } |
@@ -2847,5 +2856,4 @@ void nilfs_detach_segment_constructor(struct nilfs_sb_info *sbi) | |||
2847 | up_write(&nilfs->ns_segctor_sem); | 2856 | up_write(&nilfs->ns_segctor_sem); |
2848 | 2857 | ||
2849 | nilfs_dispose_list(sbi, &garbage_list, 1); | 2858 | nilfs_dispose_list(sbi, &garbage_list, 1); |
2850 | nilfs_detach_writer(nilfs, sbi); | ||
2851 | } | 2859 | } |
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h index 17c487bd8152..cd8056e7cbed 100644 --- a/fs/nilfs2/segment.h +++ b/fs/nilfs2/segment.h | |||
@@ -29,6 +29,8 @@ | |||
29 | #include <linux/nilfs2_fs.h> | 29 | #include <linux/nilfs2_fs.h> |
30 | #include "sb.h" | 30 | #include "sb.h" |
31 | 31 | ||
32 | struct nilfs_root; | ||
33 | |||
32 | /** | 34 | /** |
33 | * struct nilfs_recovery_info - Recovery information | 35 | * struct nilfs_recovery_info - Recovery information |
34 | * @ri_need_recovery: Recovery status | 36 | * @ri_need_recovery: Recovery status |
@@ -87,6 +89,7 @@ struct nilfs_segsum_pointer { | |||
87 | * struct nilfs_sc_info - Segment constructor information | 89 | * struct nilfs_sc_info - Segment constructor information |
88 | * @sc_super: Back pointer to super_block struct | 90 | * @sc_super: Back pointer to super_block struct |
89 | * @sc_sbi: Back pointer to nilfs_sb_info struct | 91 | * @sc_sbi: Back pointer to nilfs_sb_info struct |
92 | * @sc_root: root object of the current filesystem tree | ||
90 | * @sc_nblk_inc: Block count of current generation | 93 | * @sc_nblk_inc: Block count of current generation |
91 | * @sc_dirty_files: List of files to be written | 94 | * @sc_dirty_files: List of files to be written |
92 | * @sc_gc_inodes: List of GC inodes having blocks to be written | 95 | * @sc_gc_inodes: List of GC inodes having blocks to be written |
@@ -107,6 +110,7 @@ struct nilfs_segsum_pointer { | |||
107 | * @sc_datablk_cnt: Data block count of a file | 110 | * @sc_datablk_cnt: Data block count of a file |
108 | * @sc_nblk_this_inc: Number of blocks included in the current logical segment | 111 | * @sc_nblk_this_inc: Number of blocks included in the current logical segment |
109 | * @sc_seg_ctime: Creation time | 112 | * @sc_seg_ctime: Creation time |
113 | * @sc_cno: checkpoint number of current log | ||
110 | * @sc_flags: Internal flags | 114 | * @sc_flags: Internal flags |
111 | * @sc_state_lock: spinlock for sc_state and so on | 115 | * @sc_state_lock: spinlock for sc_state and so on |
112 | * @sc_state: Segctord state flags | 116 | * @sc_state: Segctord state flags |
@@ -128,6 +132,7 @@ struct nilfs_segsum_pointer { | |||
128 | struct nilfs_sc_info { | 132 | struct nilfs_sc_info { |
129 | struct super_block *sc_super; | 133 | struct super_block *sc_super; |
130 | struct nilfs_sb_info *sc_sbi; | 134 | struct nilfs_sb_info *sc_sbi; |
135 | struct nilfs_root *sc_root; | ||
131 | 136 | ||
132 | unsigned long sc_nblk_inc; | 137 | unsigned long sc_nblk_inc; |
133 | 138 | ||
@@ -156,7 +161,7 @@ struct nilfs_sc_info { | |||
156 | unsigned long sc_datablk_cnt; | 161 | unsigned long sc_datablk_cnt; |
157 | unsigned long sc_nblk_this_inc; | 162 | unsigned long sc_nblk_this_inc; |
158 | time_t sc_seg_ctime; | 163 | time_t sc_seg_ctime; |
159 | 164 | __u64 sc_cno; | |
160 | unsigned long sc_flags; | 165 | unsigned long sc_flags; |
161 | 166 | ||
162 | spinlock_t sc_state_lock; | 167 | spinlock_t sc_state_lock; |
@@ -230,7 +235,8 @@ extern void nilfs_flush_segment(struct super_block *, ino_t); | |||
230 | extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, | 235 | extern int nilfs_clean_segments(struct super_block *, struct nilfs_argv *, |
231 | void **); | 236 | void **); |
232 | 237 | ||
233 | extern int nilfs_attach_segment_constructor(struct nilfs_sb_info *); | 238 | int nilfs_attach_segment_constructor(struct nilfs_sb_info *sbi, |
239 | struct nilfs_root *root); | ||
234 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); | 240 | extern void nilfs_detach_segment_constructor(struct nilfs_sb_info *); |
235 | 241 | ||
236 | /* recovery.c */ | 242 | /* recovery.c */ |
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3c6cc6005c2e..1d6f488ccae8 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c | |||
@@ -505,7 +505,7 @@ int nilfs_sufile_get_stat(struct inode *sufile, struct nilfs_sustat *sustat) | |||
505 | { | 505 | { |
506 | struct buffer_head *header_bh; | 506 | struct buffer_head *header_bh; |
507 | struct nilfs_sufile_header *header; | 507 | struct nilfs_sufile_header *header; |
508 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | 508 | struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); |
509 | void *kaddr; | 509 | void *kaddr; |
510 | int ret; | 510 | int ret; |
511 | 511 | ||
@@ -583,7 +583,7 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, | |||
583 | struct nilfs_segment_usage *su; | 583 | struct nilfs_segment_usage *su; |
584 | struct nilfs_suinfo *si = buf; | 584 | struct nilfs_suinfo *si = buf; |
585 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; | 585 | size_t susz = NILFS_MDT(sufile)->mi_entry_size; |
586 | struct the_nilfs *nilfs = NILFS_MDT(sufile)->mi_nilfs; | 586 | struct the_nilfs *nilfs = NILFS_I_NILFS(sufile); |
587 | void *kaddr; | 587 | void *kaddr; |
588 | unsigned long nsegs, segusages_per_block; | 588 | unsigned long nsegs, segusages_per_block; |
589 | ssize_t n; | 589 | ssize_t n; |
@@ -635,46 +635,55 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, | |||
635 | } | 635 | } |
636 | 636 | ||
637 | /** | 637 | /** |
638 | * nilfs_sufile_read - read sufile inode | 638 | * nilfs_sufile_read - read or get sufile inode |
639 | * @sufile: sufile inode | 639 | * @sb: super block instance |
640 | * @susize: size of a segment usage entry | ||
640 | * @raw_inode: on-disk sufile inode | 641 | * @raw_inode: on-disk sufile inode |
642 | * @inodep: buffer to store the inode | ||
641 | */ | 643 | */ |
642 | int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode) | 644 | int nilfs_sufile_read(struct super_block *sb, size_t susize, |
645 | struct nilfs_inode *raw_inode, struct inode **inodep) | ||
643 | { | 646 | { |
644 | struct nilfs_sufile_info *sui = NILFS_SUI(sufile); | 647 | struct inode *sufile; |
648 | struct nilfs_sufile_info *sui; | ||
645 | struct buffer_head *header_bh; | 649 | struct buffer_head *header_bh; |
646 | struct nilfs_sufile_header *header; | 650 | struct nilfs_sufile_header *header; |
647 | void *kaddr; | 651 | void *kaddr; |
648 | int ret; | 652 | int err; |
649 | 653 | ||
650 | ret = nilfs_read_inode_common(sufile, raw_inode); | 654 | sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); |
651 | if (ret < 0) | 655 | if (unlikely(!sufile)) |
652 | return ret; | 656 | return -ENOMEM; |
657 | if (!(sufile->i_state & I_NEW)) | ||
658 | goto out; | ||
653 | 659 | ||
654 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | 660 | err = nilfs_mdt_init(sufile, NILFS_MDT_GFP, sizeof(*sui)); |
655 | if (!ret) { | 661 | if (err) |
656 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); | 662 | goto failed; |
657 | header = kaddr + bh_offset(header_bh); | ||
658 | sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); | ||
659 | kunmap_atomic(kaddr, KM_USER0); | ||
660 | brelse(header_bh); | ||
661 | } | ||
662 | return ret; | ||
663 | } | ||
664 | 663 | ||
665 | /** | 664 | nilfs_mdt_set_entry_size(sufile, susize, |
666 | * nilfs_sufile_new - create sufile | 665 | sizeof(struct nilfs_sufile_header)); |
667 | * @nilfs: nilfs object | 666 | |
668 | * @susize: size of a segment usage entry | 667 | err = nilfs_read_inode_common(sufile, raw_inode); |
669 | */ | 668 | if (err) |
670 | struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize) | 669 | goto failed; |
671 | { | 670 | |
672 | struct inode *sufile; | 671 | err = nilfs_sufile_get_header_block(sufile, &header_bh); |
672 | if (err) | ||
673 | goto failed; | ||
673 | 674 | ||
674 | sufile = nilfs_mdt_new(nilfs, NULL, NILFS_SUFILE_INO, | 675 | sui = NILFS_SUI(sufile); |
675 | sizeof(struct nilfs_sufile_info)); | 676 | kaddr = kmap_atomic(header_bh->b_page, KM_USER0); |
676 | if (sufile) | 677 | header = kaddr + bh_offset(header_bh); |
677 | nilfs_mdt_set_entry_size(sufile, susize, | 678 | sui->ncleansegs = le64_to_cpu(header->sh_ncleansegs); |
678 | sizeof(struct nilfs_sufile_header)); | 679 | kunmap_atomic(kaddr, KM_USER0); |
679 | return sufile; | 680 | brelse(header_bh); |
681 | |||
682 | unlock_new_inode(sufile); | ||
683 | out: | ||
684 | *inodep = sufile; | ||
685 | return 0; | ||
686 | failed: | ||
687 | iget_failed(sufile); | ||
688 | return err; | ||
680 | } | 689 | } |
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index 15163b8aff7d..a943fbacb45b 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h | |||
@@ -31,7 +31,7 @@ | |||
31 | 31 | ||
32 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) | 32 | static inline unsigned long nilfs_sufile_get_nsegments(struct inode *sufile) |
33 | { | 33 | { |
34 | return NILFS_MDT(sufile)->mi_nilfs->ns_nsegments; | 34 | return NILFS_I_NILFS(sufile)->ns_nsegments; |
35 | } | 35 | } |
36 | 36 | ||
37 | unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); | 37 | unsigned long nilfs_sufile_get_ncleansegs(struct inode *sufile); |
@@ -61,8 +61,8 @@ void nilfs_sufile_do_cancel_free(struct inode *, __u64, struct buffer_head *, | |||
61 | void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, | 61 | void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, |
62 | struct buffer_head *); | 62 | struct buffer_head *); |
63 | 63 | ||
64 | int nilfs_sufile_read(struct inode *sufile, struct nilfs_inode *raw_inode); | 64 | int nilfs_sufile_read(struct super_block *sb, size_t susize, |
65 | struct inode *nilfs_sufile_new(struct the_nilfs *nilfs, size_t susize); | 65 | struct nilfs_inode *raw_inode, struct inode **inodep); |
66 | 66 | ||
67 | /** | 67 | /** |
68 | * nilfs_sufile_scrap - make a segment garbage | 68 | * nilfs_sufile_scrap - make a segment garbage |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 922263393c76..35ae03c0db86 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -45,14 +45,13 @@ | |||
45 | #include <linux/parser.h> | 45 | #include <linux/parser.h> |
46 | #include <linux/random.h> | 46 | #include <linux/random.h> |
47 | #include <linux/crc32.h> | 47 | #include <linux/crc32.h> |
48 | #include <linux/smp_lock.h> | ||
49 | #include <linux/vfs.h> | 48 | #include <linux/vfs.h> |
50 | #include <linux/writeback.h> | 49 | #include <linux/writeback.h> |
51 | #include <linux/kobject.h> | 50 | #include <linux/kobject.h> |
52 | #include <linux/exportfs.h> | ||
53 | #include <linux/seq_file.h> | 51 | #include <linux/seq_file.h> |
54 | #include <linux/mount.h> | 52 | #include <linux/mount.h> |
55 | #include "nilfs.h" | 53 | #include "nilfs.h" |
54 | #include "export.h" | ||
56 | #include "mdt.h" | 55 | #include "mdt.h" |
57 | #include "alloc.h" | 56 | #include "alloc.h" |
58 | #include "btree.h" | 57 | #include "btree.h" |
@@ -69,11 +68,12 @@ MODULE_DESCRIPTION("A New Implementation of the Log-structured Filesystem " | |||
69 | "(NILFS)"); | 68 | "(NILFS)"); |
70 | MODULE_LICENSE("GPL"); | 69 | MODULE_LICENSE("GPL"); |
71 | 70 | ||
72 | struct kmem_cache *nilfs_inode_cachep; | 71 | static struct kmem_cache *nilfs_inode_cachep; |
73 | struct kmem_cache *nilfs_transaction_cachep; | 72 | struct kmem_cache *nilfs_transaction_cachep; |
74 | struct kmem_cache *nilfs_segbuf_cachep; | 73 | struct kmem_cache *nilfs_segbuf_cachep; |
75 | struct kmem_cache *nilfs_btree_path_cache; | 74 | struct kmem_cache *nilfs_btree_path_cache; |
76 | 75 | ||
76 | static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount); | ||
77 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); | 77 | static int nilfs_remount(struct super_block *sb, int *flags, char *data); |
78 | 78 | ||
79 | static void nilfs_set_error(struct nilfs_sb_info *sbi) | 79 | static void nilfs_set_error(struct nilfs_sb_info *sbi) |
@@ -147,7 +147,7 @@ void nilfs_warning(struct super_block *sb, const char *function, | |||
147 | } | 147 | } |
148 | 148 | ||
149 | 149 | ||
150 | struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) | 150 | struct inode *nilfs_alloc_inode(struct super_block *sb) |
151 | { | 151 | { |
152 | struct nilfs_inode_info *ii; | 152 | struct nilfs_inode_info *ii; |
153 | 153 | ||
@@ -156,18 +156,20 @@ struct inode *nilfs_alloc_inode_common(struct the_nilfs *nilfs) | |||
156 | return NULL; | 156 | return NULL; |
157 | ii->i_bh = NULL; | 157 | ii->i_bh = NULL; |
158 | ii->i_state = 0; | 158 | ii->i_state = 0; |
159 | ii->i_cno = 0; | ||
159 | ii->vfs_inode.i_version = 1; | 160 | ii->vfs_inode.i_version = 1; |
160 | nilfs_btnode_cache_init(&ii->i_btnode_cache, nilfs->ns_bdi); | 161 | nilfs_btnode_cache_init(&ii->i_btnode_cache, sb->s_bdi); |
161 | return &ii->vfs_inode; | 162 | return &ii->vfs_inode; |
162 | } | 163 | } |
163 | 164 | ||
164 | struct inode *nilfs_alloc_inode(struct super_block *sb) | ||
165 | { | ||
166 | return nilfs_alloc_inode_common(NILFS_SB(sb)->s_nilfs); | ||
167 | } | ||
168 | |||
169 | void nilfs_destroy_inode(struct inode *inode) | 165 | void nilfs_destroy_inode(struct inode *inode) |
170 | { | 166 | { |
167 | struct nilfs_mdt_info *mdi = NILFS_MDT(inode); | ||
168 | |||
169 | if (mdi) { | ||
170 | kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ | ||
171 | kfree(mdi); | ||
172 | } | ||
171 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); | 173 | kmem_cache_free(nilfs_inode_cachep, NILFS_I(inode)); |
172 | } | 174 | } |
173 | 175 | ||
@@ -178,17 +180,9 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) | |||
178 | 180 | ||
179 | retry: | 181 | retry: |
180 | set_buffer_dirty(nilfs->ns_sbh[0]); | 182 | set_buffer_dirty(nilfs->ns_sbh[0]); |
181 | |||
182 | if (nilfs_test_opt(sbi, BARRIER)) { | 183 | if (nilfs_test_opt(sbi, BARRIER)) { |
183 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], | 184 | err = __sync_dirty_buffer(nilfs->ns_sbh[0], |
184 | WRITE_SYNC | WRITE_BARRIER); | 185 | WRITE_SYNC | WRITE_FLUSH_FUA); |
185 | if (err == -EOPNOTSUPP) { | ||
186 | nilfs_warning(sbi->s_super, __func__, | ||
187 | "barrier-based sync failed. " | ||
188 | "disabling barriers\n"); | ||
189 | nilfs_clear_opt(sbi, BARRIER); | ||
190 | goto retry; | ||
191 | } | ||
192 | } else { | 186 | } else { |
193 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); | 187 | err = sync_dirty_buffer(nilfs->ns_sbh[0]); |
194 | } | 188 | } |
@@ -342,8 +336,6 @@ static void nilfs_put_super(struct super_block *sb) | |||
342 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 336 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
343 | struct the_nilfs *nilfs = sbi->s_nilfs; | 337 | struct the_nilfs *nilfs = sbi->s_nilfs; |
344 | 338 | ||
345 | lock_kernel(); | ||
346 | |||
347 | nilfs_detach_segment_constructor(sbi); | 339 | nilfs_detach_segment_constructor(sbi); |
348 | 340 | ||
349 | if (!(sb->s_flags & MS_RDONLY)) { | 341 | if (!(sb->s_flags & MS_RDONLY)) { |
@@ -351,18 +343,15 @@ static void nilfs_put_super(struct super_block *sb) | |||
351 | nilfs_cleanup_super(sbi); | 343 | nilfs_cleanup_super(sbi); |
352 | up_write(&nilfs->ns_sem); | 344 | up_write(&nilfs->ns_sem); |
353 | } | 345 | } |
354 | down_write(&nilfs->ns_super_sem); | ||
355 | if (nilfs->ns_current == sbi) | ||
356 | nilfs->ns_current = NULL; | ||
357 | up_write(&nilfs->ns_super_sem); | ||
358 | 346 | ||
359 | nilfs_detach_checkpoint(sbi); | 347 | iput(nilfs->ns_sufile); |
360 | put_nilfs(sbi->s_nilfs); | 348 | iput(nilfs->ns_cpfile); |
349 | iput(nilfs->ns_dat); | ||
350 | |||
351 | destroy_nilfs(nilfs); | ||
361 | sbi->s_super = NULL; | 352 | sbi->s_super = NULL; |
362 | sb->s_fs_info = NULL; | 353 | sb->s_fs_info = NULL; |
363 | nilfs_put_sbinfo(sbi); | 354 | kfree(sbi); |
364 | |||
365 | unlock_kernel(); | ||
366 | } | 355 | } |
367 | 356 | ||
368 | static int nilfs_sync_fs(struct super_block *sb, int wait) | 357 | static int nilfs_sync_fs(struct super_block *sb, int wait) |
@@ -389,21 +378,22 @@ static int nilfs_sync_fs(struct super_block *sb, int wait) | |||
389 | return err; | 378 | return err; |
390 | } | 379 | } |
391 | 380 | ||
392 | int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | 381 | int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno, int curr_mnt, |
382 | struct nilfs_root **rootp) | ||
393 | { | 383 | { |
394 | struct the_nilfs *nilfs = sbi->s_nilfs; | 384 | struct the_nilfs *nilfs = sbi->s_nilfs; |
385 | struct nilfs_root *root; | ||
395 | struct nilfs_checkpoint *raw_cp; | 386 | struct nilfs_checkpoint *raw_cp; |
396 | struct buffer_head *bh_cp; | 387 | struct buffer_head *bh_cp; |
397 | int err; | 388 | int err = -ENOMEM; |
398 | 389 | ||
399 | down_write(&nilfs->ns_super_sem); | 390 | root = nilfs_find_or_create_root( |
400 | list_add(&sbi->s_list, &nilfs->ns_supers); | 391 | nilfs, curr_mnt ? NILFS_CPTREE_CURRENT_CNO : cno); |
401 | up_write(&nilfs->ns_super_sem); | 392 | if (!root) |
393 | return err; | ||
402 | 394 | ||
403 | err = -ENOMEM; | 395 | if (root->ifile) |
404 | sbi->s_ifile = nilfs_ifile_new(sbi, nilfs->ns_inode_size); | 396 | goto reuse; /* already attached checkpoint */ |
405 | if (!sbi->s_ifile) | ||
406 | goto delist; | ||
407 | 397 | ||
408 | down_read(&nilfs->ns_segctor_sem); | 398 | down_read(&nilfs->ns_segctor_sem); |
409 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, | 399 | err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, cno, 0, &raw_cp, |
@@ -419,45 +409,64 @@ int nilfs_attach_checkpoint(struct nilfs_sb_info *sbi, __u64 cno) | |||
419 | } | 409 | } |
420 | goto failed; | 410 | goto failed; |
421 | } | 411 | } |
422 | err = nilfs_read_inode_common(sbi->s_ifile, &raw_cp->cp_ifile_inode); | 412 | |
423 | if (unlikely(err)) | 413 | err = nilfs_ifile_read(sbi->s_super, root, nilfs->ns_inode_size, |
414 | &raw_cp->cp_ifile_inode, &root->ifile); | ||
415 | if (err) | ||
424 | goto failed_bh; | 416 | goto failed_bh; |
425 | atomic_set(&sbi->s_inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); | 417 | |
426 | atomic_set(&sbi->s_blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | 418 | atomic_set(&root->inodes_count, le64_to_cpu(raw_cp->cp_inodes_count)); |
419 | atomic_set(&root->blocks_count, le64_to_cpu(raw_cp->cp_blocks_count)); | ||
427 | 420 | ||
428 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | 421 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); |
422 | |||
423 | reuse: | ||
424 | *rootp = root; | ||
429 | return 0; | 425 | return 0; |
430 | 426 | ||
431 | failed_bh: | 427 | failed_bh: |
432 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); | 428 | nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, cno, bh_cp); |
433 | failed: | 429 | failed: |
434 | nilfs_mdt_destroy(sbi->s_ifile); | 430 | nilfs_put_root(root); |
435 | sbi->s_ifile = NULL; | ||
436 | 431 | ||
437 | delist: | 432 | return err; |
438 | down_write(&nilfs->ns_super_sem); | 433 | } |
439 | list_del_init(&sbi->s_list); | ||
440 | up_write(&nilfs->ns_super_sem); | ||
441 | 434 | ||
435 | static int nilfs_freeze(struct super_block *sb) | ||
436 | { | ||
437 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
438 | struct the_nilfs *nilfs = sbi->s_nilfs; | ||
439 | int err; | ||
440 | |||
441 | if (sb->s_flags & MS_RDONLY) | ||
442 | return 0; | ||
443 | |||
444 | /* Mark super block clean */ | ||
445 | down_write(&nilfs->ns_sem); | ||
446 | err = nilfs_cleanup_super(sbi); | ||
447 | up_write(&nilfs->ns_sem); | ||
442 | return err; | 448 | return err; |
443 | } | 449 | } |
444 | 450 | ||
445 | void nilfs_detach_checkpoint(struct nilfs_sb_info *sbi) | 451 | static int nilfs_unfreeze(struct super_block *sb) |
446 | { | 452 | { |
453 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | ||
447 | struct the_nilfs *nilfs = sbi->s_nilfs; | 454 | struct the_nilfs *nilfs = sbi->s_nilfs; |
448 | 455 | ||
449 | nilfs_mdt_destroy(sbi->s_ifile); | 456 | if (sb->s_flags & MS_RDONLY) |
450 | sbi->s_ifile = NULL; | 457 | return 0; |
451 | down_write(&nilfs->ns_super_sem); | 458 | |
452 | list_del_init(&sbi->s_list); | 459 | down_write(&nilfs->ns_sem); |
453 | up_write(&nilfs->ns_super_sem); | 460 | nilfs_setup_super(sbi, false); |
461 | up_write(&nilfs->ns_sem); | ||
462 | return 0; | ||
454 | } | 463 | } |
455 | 464 | ||
456 | static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | 465 | static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) |
457 | { | 466 | { |
458 | struct super_block *sb = dentry->d_sb; | 467 | struct super_block *sb = dentry->d_sb; |
459 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 468 | struct nilfs_root *root = NILFS_I(dentry->d_inode)->i_root; |
460 | struct the_nilfs *nilfs = sbi->s_nilfs; | 469 | struct the_nilfs *nilfs = root->nilfs; |
461 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 470 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
462 | unsigned long long blocks; | 471 | unsigned long long blocks; |
463 | unsigned long overhead; | 472 | unsigned long overhead; |
@@ -493,7 +502,7 @@ static int nilfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
493 | buf->f_bfree = nfreeblocks; | 502 | buf->f_bfree = nfreeblocks; |
494 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? | 503 | buf->f_bavail = (buf->f_bfree >= nrsvblocks) ? |
495 | (buf->f_bfree - nrsvblocks) : 0; | 504 | (buf->f_bfree - nrsvblocks) : 0; |
496 | buf->f_files = atomic_read(&sbi->s_inodes_count); | 505 | buf->f_files = atomic_read(&root->inodes_count); |
497 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ | 506 | buf->f_ffree = 0; /* nilfs_count_free_inodes(sb); */ |
498 | buf->f_namelen = NILFS_NAME_LEN; | 507 | buf->f_namelen = NILFS_NAME_LEN; |
499 | buf->f_fsid.val[0] = (u32)id; | 508 | buf->f_fsid.val[0] = (u32)id; |
@@ -506,12 +515,12 @@ static int nilfs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
506 | { | 515 | { |
507 | struct super_block *sb = vfs->mnt_sb; | 516 | struct super_block *sb = vfs->mnt_sb; |
508 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 517 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
518 | struct nilfs_root *root = NILFS_I(vfs->mnt_root->d_inode)->i_root; | ||
509 | 519 | ||
510 | if (!nilfs_test_opt(sbi, BARRIER)) | 520 | if (!nilfs_test_opt(sbi, BARRIER)) |
511 | seq_puts(seq, ",nobarrier"); | 521 | seq_puts(seq, ",nobarrier"); |
512 | if (nilfs_test_opt(sbi, SNAPSHOT)) | 522 | if (root->cno != NILFS_CPTREE_CURRENT_CNO) |
513 | seq_printf(seq, ",cp=%llu", | 523 | seq_printf(seq, ",cp=%llu", (unsigned long long)root->cno); |
514 | (unsigned long long int)sbi->s_snapshot_cno); | ||
515 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) | 524 | if (nilfs_test_opt(sbi, ERRORS_PANIC)) |
516 | seq_puts(seq, ",errors=panic"); | 525 | seq_puts(seq, ",errors=panic"); |
517 | if (nilfs_test_opt(sbi, ERRORS_CONT)) | 526 | if (nilfs_test_opt(sbi, ERRORS_CONT)) |
@@ -537,6 +546,8 @@ static const struct super_operations nilfs_sops = { | |||
537 | .put_super = nilfs_put_super, | 546 | .put_super = nilfs_put_super, |
538 | /* .write_super = nilfs_write_super, */ | 547 | /* .write_super = nilfs_write_super, */ |
539 | .sync_fs = nilfs_sync_fs, | 548 | .sync_fs = nilfs_sync_fs, |
549 | .freeze_fs = nilfs_freeze, | ||
550 | .unfreeze_fs = nilfs_unfreeze, | ||
540 | /* .write_super_lockfs */ | 551 | /* .write_super_lockfs */ |
541 | /* .unlockfs */ | 552 | /* .unlockfs */ |
542 | .statfs = nilfs_statfs, | 553 | .statfs = nilfs_statfs, |
@@ -545,48 +556,6 @@ static const struct super_operations nilfs_sops = { | |||
545 | .show_options = nilfs_show_options | 556 | .show_options = nilfs_show_options |
546 | }; | 557 | }; |
547 | 558 | ||
548 | static struct inode * | ||
549 | nilfs_nfs_get_inode(struct super_block *sb, u64 ino, u32 generation) | ||
550 | { | ||
551 | struct inode *inode; | ||
552 | |||
553 | if (ino < NILFS_FIRST_INO(sb) && ino != NILFS_ROOT_INO && | ||
554 | ino != NILFS_SKETCH_INO) | ||
555 | return ERR_PTR(-ESTALE); | ||
556 | |||
557 | inode = nilfs_iget(sb, ino); | ||
558 | if (IS_ERR(inode)) | ||
559 | return ERR_CAST(inode); | ||
560 | if (generation && inode->i_generation != generation) { | ||
561 | iput(inode); | ||
562 | return ERR_PTR(-ESTALE); | ||
563 | } | ||
564 | |||
565 | return inode; | ||
566 | } | ||
567 | |||
568 | static struct dentry * | ||
569 | nilfs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len, | ||
570 | int fh_type) | ||
571 | { | ||
572 | return generic_fh_to_dentry(sb, fid, fh_len, fh_type, | ||
573 | nilfs_nfs_get_inode); | ||
574 | } | ||
575 | |||
576 | static struct dentry * | ||
577 | nilfs_fh_to_parent(struct super_block *sb, struct fid *fid, int fh_len, | ||
578 | int fh_type) | ||
579 | { | ||
580 | return generic_fh_to_parent(sb, fid, fh_len, fh_type, | ||
581 | nilfs_nfs_get_inode); | ||
582 | } | ||
583 | |||
584 | static const struct export_operations nilfs_export_ops = { | ||
585 | .fh_to_dentry = nilfs_fh_to_dentry, | ||
586 | .fh_to_parent = nilfs_fh_to_parent, | ||
587 | .get_parent = nilfs_get_parent, | ||
588 | }; | ||
589 | |||
590 | enum { | 559 | enum { |
591 | Opt_err_cont, Opt_err_panic, Opt_err_ro, | 560 | Opt_err_cont, Opt_err_panic, Opt_err_ro, |
592 | Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, | 561 | Opt_barrier, Opt_nobarrier, Opt_snapshot, Opt_order, Opt_norecovery, |
@@ -612,7 +581,6 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) | |||
612 | struct nilfs_sb_info *sbi = NILFS_SB(sb); | 581 | struct nilfs_sb_info *sbi = NILFS_SB(sb); |
613 | char *p; | 582 | char *p; |
614 | substring_t args[MAX_OPT_ARGS]; | 583 | substring_t args[MAX_OPT_ARGS]; |
615 | int option; | ||
616 | 584 | ||
617 | if (!options) | 585 | if (!options) |
618 | return 1; | 586 | return 1; |
@@ -650,30 +618,12 @@ static int parse_options(char *options, struct super_block *sb, int is_remount) | |||
650 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); | 618 | nilfs_write_opt(sbi, ERROR_MODE, ERRORS_CONT); |
651 | break; | 619 | break; |
652 | case Opt_snapshot: | 620 | case Opt_snapshot: |
653 | if (match_int(&args[0], &option) || option <= 0) | ||
654 | return 0; | ||
655 | if (is_remount) { | 621 | if (is_remount) { |
656 | if (!nilfs_test_opt(sbi, SNAPSHOT)) { | 622 | printk(KERN_ERR |
657 | printk(KERN_ERR | 623 | "NILFS: \"%s\" option is invalid " |
658 | "NILFS: cannot change regular " | 624 | "for remount.\n", p); |
659 | "mount to snapshot.\n"); | ||
660 | return 0; | ||
661 | } else if (option != sbi->s_snapshot_cno) { | ||
662 | printk(KERN_ERR | ||
663 | "NILFS: cannot remount to a " | ||
664 | "different snapshot.\n"); | ||
665 | return 0; | ||
666 | } | ||
667 | break; | ||
668 | } | ||
669 | if (!(sb->s_flags & MS_RDONLY)) { | ||
670 | printk(KERN_ERR "NILFS: cannot mount snapshot " | ||
671 | "read/write. A read-only option is " | ||
672 | "required.\n"); | ||
673 | return 0; | 625 | return 0; |
674 | } | 626 | } |
675 | sbi->s_snapshot_cno = option; | ||
676 | nilfs_set_opt(sbi, SNAPSHOT); | ||
677 | break; | 627 | break; |
678 | case Opt_norecovery: | 628 | case Opt_norecovery: |
679 | nilfs_set_opt(sbi, NORECOVERY); | 629 | nilfs_set_opt(sbi, NORECOVERY); |
@@ -701,7 +651,7 @@ nilfs_set_default_options(struct nilfs_sb_info *sbi, | |||
701 | NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; | 651 | NILFS_MOUNT_ERRORS_RO | NILFS_MOUNT_BARRIER; |
702 | } | 652 | } |
703 | 653 | ||
704 | static int nilfs_setup_super(struct nilfs_sb_info *sbi) | 654 | static int nilfs_setup_super(struct nilfs_sb_info *sbi, int is_mount) |
705 | { | 655 | { |
706 | struct the_nilfs *nilfs = sbi->s_nilfs; | 656 | struct the_nilfs *nilfs = sbi->s_nilfs; |
707 | struct nilfs_super_block **sbp; | 657 | struct nilfs_super_block **sbp; |
@@ -713,6 +663,9 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) | |||
713 | if (!sbp) | 663 | if (!sbp) |
714 | return -EIO; | 664 | return -EIO; |
715 | 665 | ||
666 | if (!is_mount) | ||
667 | goto skip_mount_setup; | ||
668 | |||
716 | max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); | 669 | max_mnt_count = le16_to_cpu(sbp[0]->s_max_mnt_count); |
717 | mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); | 670 | mnt_count = le16_to_cpu(sbp[0]->s_mnt_count); |
718 | 671 | ||
@@ -729,9 +682,11 @@ static int nilfs_setup_super(struct nilfs_sb_info *sbi) | |||
729 | sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); | 682 | sbp[0]->s_max_mnt_count = cpu_to_le16(NILFS_DFL_MAX_MNT_COUNT); |
730 | 683 | ||
731 | sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); | 684 | sbp[0]->s_mnt_count = cpu_to_le16(mnt_count + 1); |
685 | sbp[0]->s_mtime = cpu_to_le64(get_seconds()); | ||
686 | |||
687 | skip_mount_setup: | ||
732 | sbp[0]->s_state = | 688 | sbp[0]->s_state = |
733 | cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); | 689 | cpu_to_le16(le16_to_cpu(sbp[0]->s_state) & ~NILFS_VALID_FS); |
734 | sbp[0]->s_mtime = cpu_to_le64(get_seconds()); | ||
735 | /* synchronize sbp[1] with sbp[0] */ | 690 | /* synchronize sbp[1] with sbp[0] */ |
736 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); | 691 | memcpy(sbp[1], sbp[0], nilfs->ns_sbsize); |
737 | return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); | 692 | return nilfs_commit_super(sbi, NILFS_SB_COMMIT_ALL); |
@@ -798,22 +753,156 @@ int nilfs_check_feature_compatibility(struct super_block *sb, | |||
798 | return 0; | 753 | return 0; |
799 | } | 754 | } |
800 | 755 | ||
756 | static int nilfs_get_root_dentry(struct super_block *sb, | ||
757 | struct nilfs_root *root, | ||
758 | struct dentry **root_dentry) | ||
759 | { | ||
760 | struct inode *inode; | ||
761 | struct dentry *dentry; | ||
762 | int ret = 0; | ||
763 | |||
764 | inode = nilfs_iget(sb, root, NILFS_ROOT_INO); | ||
765 | if (IS_ERR(inode)) { | ||
766 | printk(KERN_ERR "NILFS: get root inode failed\n"); | ||
767 | ret = PTR_ERR(inode); | ||
768 | goto out; | ||
769 | } | ||
770 | if (!S_ISDIR(inode->i_mode) || !inode->i_blocks || !inode->i_size) { | ||
771 | iput(inode); | ||
772 | printk(KERN_ERR "NILFS: corrupt root inode.\n"); | ||
773 | ret = -EINVAL; | ||
774 | goto out; | ||
775 | } | ||
776 | |||
777 | if (root->cno == NILFS_CPTREE_CURRENT_CNO) { | ||
778 | dentry = d_find_alias(inode); | ||
779 | if (!dentry) { | ||
780 | dentry = d_alloc_root(inode); | ||
781 | if (!dentry) { | ||
782 | iput(inode); | ||
783 | ret = -ENOMEM; | ||
784 | goto failed_dentry; | ||
785 | } | ||
786 | } else { | ||
787 | iput(inode); | ||
788 | } | ||
789 | } else { | ||
790 | dentry = d_obtain_alias(inode); | ||
791 | if (IS_ERR(dentry)) { | ||
792 | ret = PTR_ERR(dentry); | ||
793 | goto failed_dentry; | ||
794 | } | ||
795 | } | ||
796 | *root_dentry = dentry; | ||
797 | out: | ||
798 | return ret; | ||
799 | |||
800 | failed_dentry: | ||
801 | printk(KERN_ERR "NILFS: get root dentry failed\n"); | ||
802 | goto out; | ||
803 | } | ||
804 | |||
805 | static int nilfs_attach_snapshot(struct super_block *s, __u64 cno, | ||
806 | struct dentry **root_dentry) | ||
807 | { | ||
808 | struct the_nilfs *nilfs = NILFS_SB(s)->s_nilfs; | ||
809 | struct nilfs_root *root; | ||
810 | int ret; | ||
811 | |||
812 | down_read(&nilfs->ns_segctor_sem); | ||
813 | ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno); | ||
814 | up_read(&nilfs->ns_segctor_sem); | ||
815 | if (ret < 0) { | ||
816 | ret = (ret == -ENOENT) ? -EINVAL : ret; | ||
817 | goto out; | ||
818 | } else if (!ret) { | ||
819 | printk(KERN_ERR "NILFS: The specified checkpoint is " | ||
820 | "not a snapshot (checkpoint number=%llu).\n", | ||
821 | (unsigned long long)cno); | ||
822 | ret = -EINVAL; | ||
823 | goto out; | ||
824 | } | ||
825 | |||
826 | ret = nilfs_attach_checkpoint(NILFS_SB(s), cno, false, &root); | ||
827 | if (ret) { | ||
828 | printk(KERN_ERR "NILFS: error loading snapshot " | ||
829 | "(checkpoint number=%llu).\n", | ||
830 | (unsigned long long)cno); | ||
831 | goto out; | ||
832 | } | ||
833 | ret = nilfs_get_root_dentry(s, root, root_dentry); | ||
834 | nilfs_put_root(root); | ||
835 | out: | ||
836 | return ret; | ||
837 | } | ||
838 | |||
839 | static int nilfs_tree_was_touched(struct dentry *root_dentry) | ||
840 | { | ||
841 | return atomic_read(&root_dentry->d_count) > 1; | ||
842 | } | ||
843 | |||
844 | /** | ||
845 | * nilfs_try_to_shrink_tree() - try to shrink dentries of a checkpoint | ||
846 | * @root_dentry: root dentry of the tree to be shrunk | ||
847 | * | ||
848 | * This function returns true if the tree was in-use. | ||
849 | */ | ||
850 | static int nilfs_try_to_shrink_tree(struct dentry *root_dentry) | ||
851 | { | ||
852 | if (have_submounts(root_dentry)) | ||
853 | return true; | ||
854 | shrink_dcache_parent(root_dentry); | ||
855 | return nilfs_tree_was_touched(root_dentry); | ||
856 | } | ||
857 | |||
858 | int nilfs_checkpoint_is_mounted(struct super_block *sb, __u64 cno) | ||
859 | { | ||
860 | struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs; | ||
861 | struct nilfs_root *root; | ||
862 | struct inode *inode; | ||
863 | struct dentry *dentry; | ||
864 | int ret; | ||
865 | |||
866 | if (cno < 0 || cno > nilfs->ns_cno) | ||
867 | return false; | ||
868 | |||
869 | if (cno >= nilfs_last_cno(nilfs)) | ||
870 | return true; /* protect recent checkpoints */ | ||
871 | |||
872 | ret = false; | ||
873 | root = nilfs_lookup_root(NILFS_SB(sb)->s_nilfs, cno); | ||
874 | if (root) { | ||
875 | inode = nilfs_ilookup(sb, root, NILFS_ROOT_INO); | ||
876 | if (inode) { | ||
877 | dentry = d_find_alias(inode); | ||
878 | if (dentry) { | ||
879 | if (nilfs_tree_was_touched(dentry)) | ||
880 | ret = nilfs_try_to_shrink_tree(dentry); | ||
881 | dput(dentry); | ||
882 | } | ||
883 | iput(inode); | ||
884 | } | ||
885 | nilfs_put_root(root); | ||
886 | } | ||
887 | return ret; | ||
888 | } | ||
889 | |||
801 | /** | 890 | /** |
802 | * nilfs_fill_super() - initialize a super block instance | 891 | * nilfs_fill_super() - initialize a super block instance |
803 | * @sb: super_block | 892 | * @sb: super_block |
804 | * @data: mount options | 893 | * @data: mount options |
805 | * @silent: silent mode flag | 894 | * @silent: silent mode flag |
806 | * @nilfs: the_nilfs struct | ||
807 | * | 895 | * |
808 | * This function is called exclusively by nilfs->ns_mount_mutex. | 896 | * This function is called exclusively by nilfs->ns_mount_mutex. |
809 | * So, the recovery process is protected from other simultaneous mounts. | 897 | * So, the recovery process is protected from other simultaneous mounts. |
810 | */ | 898 | */ |
811 | static int | 899 | static int |
812 | nilfs_fill_super(struct super_block *sb, void *data, int silent, | 900 | nilfs_fill_super(struct super_block *sb, void *data, int silent) |
813 | struct the_nilfs *nilfs) | ||
814 | { | 901 | { |
902 | struct the_nilfs *nilfs; | ||
815 | struct nilfs_sb_info *sbi; | 903 | struct nilfs_sb_info *sbi; |
816 | struct inode *root; | 904 | struct nilfs_root *fsroot; |
905 | struct backing_dev_info *bdi; | ||
817 | __u64 cno; | 906 | __u64 cno; |
818 | int err; | 907 | int err; |
819 | 908 | ||
@@ -822,19 +911,21 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
822 | return -ENOMEM; | 911 | return -ENOMEM; |
823 | 912 | ||
824 | sb->s_fs_info = sbi; | 913 | sb->s_fs_info = sbi; |
914 | sbi->s_super = sb; | ||
825 | 915 | ||
826 | get_nilfs(nilfs); | 916 | nilfs = alloc_nilfs(sb->s_bdev); |
917 | if (!nilfs) { | ||
918 | err = -ENOMEM; | ||
919 | goto failed_sbi; | ||
920 | } | ||
827 | sbi->s_nilfs = nilfs; | 921 | sbi->s_nilfs = nilfs; |
828 | sbi->s_super = sb; | ||
829 | atomic_set(&sbi->s_count, 1); | ||
830 | 922 | ||
831 | err = init_nilfs(nilfs, sbi, (char *)data); | 923 | err = init_nilfs(nilfs, sbi, (char *)data); |
832 | if (err) | 924 | if (err) |
833 | goto failed_sbi; | 925 | goto failed_nilfs; |
834 | 926 | ||
835 | spin_lock_init(&sbi->s_inode_lock); | 927 | spin_lock_init(&sbi->s_inode_lock); |
836 | INIT_LIST_HEAD(&sbi->s_dirty_files); | 928 | INIT_LIST_HEAD(&sbi->s_dirty_files); |
837 | INIT_LIST_HEAD(&sbi->s_list); | ||
838 | 929 | ||
839 | /* | 930 | /* |
840 | * Following initialization is overlapped because | 931 | * Following initialization is overlapped because |
@@ -850,94 +941,59 @@ nilfs_fill_super(struct super_block *sb, void *data, int silent, | |||
850 | sb->s_export_op = &nilfs_export_ops; | 941 | sb->s_export_op = &nilfs_export_ops; |
851 | sb->s_root = NULL; | 942 | sb->s_root = NULL; |
852 | sb->s_time_gran = 1; | 943 | sb->s_time_gran = 1; |
853 | sb->s_bdi = nilfs->ns_bdi; | 944 | |
945 | bdi = sb->s_bdev->bd_inode->i_mapping->backing_dev_info; | ||
946 | sb->s_bdi = bdi ? : &default_backing_dev_info; | ||
854 | 947 | ||
855 | err = load_nilfs(nilfs, sbi); | 948 | err = load_nilfs(nilfs, sbi); |
856 | if (err) | 949 | if (err) |
857 | goto failed_sbi; | 950 | goto failed_nilfs; |
858 | 951 | ||
859 | cno = nilfs_last_cno(nilfs); | 952 | cno = nilfs_last_cno(nilfs); |
860 | 953 | err = nilfs_attach_checkpoint(sbi, cno, true, &fsroot); | |
861 | if (sb->s_flags & MS_RDONLY) { | ||
862 | if (nilfs_test_opt(sbi, SNAPSHOT)) { | ||
863 | down_read(&nilfs->ns_segctor_sem); | ||
864 | err = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, | ||
865 | sbi->s_snapshot_cno); | ||
866 | up_read(&nilfs->ns_segctor_sem); | ||
867 | if (err < 0) { | ||
868 | if (err == -ENOENT) | ||
869 | err = -EINVAL; | ||
870 | goto failed_sbi; | ||
871 | } | ||
872 | if (!err) { | ||
873 | printk(KERN_ERR | ||
874 | "NILFS: The specified checkpoint is " | ||
875 | "not a snapshot " | ||
876 | "(checkpoint number=%llu).\n", | ||
877 | (unsigned long long)sbi->s_snapshot_cno); | ||
878 | err = -EINVAL; | ||
879 | goto failed_sbi; | ||
880 | } | ||
881 | cno = sbi->s_snapshot_cno; | ||
882 | } | ||
883 | } | ||
884 | |||
885 | err = nilfs_attach_checkpoint(sbi, cno); | ||
886 | if (err) { | 954 | if (err) { |
887 | printk(KERN_ERR "NILFS: error loading a checkpoint" | 955 | printk(KERN_ERR "NILFS: error loading last checkpoint " |
888 | " (checkpoint number=%llu).\n", (unsigned long long)cno); | 956 | "(checkpoint number=%llu).\n", (unsigned long long)cno); |
889 | goto failed_sbi; | 957 | goto failed_unload; |
890 | } | 958 | } |
891 | 959 | ||
892 | if (!(sb->s_flags & MS_RDONLY)) { | 960 | if (!(sb->s_flags & MS_RDONLY)) { |
893 | err = nilfs_attach_segment_constructor(sbi); | 961 | err = nilfs_attach_segment_constructor(sbi, fsroot); |
894 | if (err) | 962 | if (err) |
895 | goto failed_checkpoint; | 963 | goto failed_checkpoint; |
896 | } | 964 | } |
897 | 965 | ||
898 | root = nilfs_iget(sb, NILFS_ROOT_INO); | 966 | err = nilfs_get_root_dentry(sb, fsroot, &sb->s_root); |
899 | if (IS_ERR(root)) { | 967 | if (err) |
900 | printk(KERN_ERR "NILFS: get root inode failed\n"); | ||
901 | err = PTR_ERR(root); | ||
902 | goto failed_segctor; | ||
903 | } | ||
904 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | ||
905 | iput(root); | ||
906 | printk(KERN_ERR "NILFS: corrupt root inode.\n"); | ||
907 | err = -EINVAL; | ||
908 | goto failed_segctor; | ||
909 | } | ||
910 | sb->s_root = d_alloc_root(root); | ||
911 | if (!sb->s_root) { | ||
912 | iput(root); | ||
913 | printk(KERN_ERR "NILFS: get root dentry failed\n"); | ||
914 | err = -ENOMEM; | ||
915 | goto failed_segctor; | 968 | goto failed_segctor; |
916 | } | 969 | |
970 | nilfs_put_root(fsroot); | ||
917 | 971 | ||
918 | if (!(sb->s_flags & MS_RDONLY)) { | 972 | if (!(sb->s_flags & MS_RDONLY)) { |
919 | down_write(&nilfs->ns_sem); | 973 | down_write(&nilfs->ns_sem); |
920 | nilfs_setup_super(sbi); | 974 | nilfs_setup_super(sbi, true); |
921 | up_write(&nilfs->ns_sem); | 975 | up_write(&nilfs->ns_sem); |
922 | } | 976 | } |
923 | 977 | ||
924 | down_write(&nilfs->ns_super_sem); | ||
925 | if (!nilfs_test_opt(sbi, SNAPSHOT)) | ||
926 | nilfs->ns_current = sbi; | ||
927 | up_write(&nilfs->ns_super_sem); | ||
928 | |||
929 | return 0; | 978 | return 0; |
930 | 979 | ||
931 | failed_segctor: | 980 | failed_segctor: |
932 | nilfs_detach_segment_constructor(sbi); | 981 | nilfs_detach_segment_constructor(sbi); |
933 | 982 | ||
934 | failed_checkpoint: | 983 | failed_checkpoint: |
935 | nilfs_detach_checkpoint(sbi); | 984 | nilfs_put_root(fsroot); |
985 | |||
986 | failed_unload: | ||
987 | iput(nilfs->ns_sufile); | ||
988 | iput(nilfs->ns_cpfile); | ||
989 | iput(nilfs->ns_dat); | ||
990 | |||
991 | failed_nilfs: | ||
992 | destroy_nilfs(nilfs); | ||
936 | 993 | ||
937 | failed_sbi: | 994 | failed_sbi: |
938 | put_nilfs(nilfs); | ||
939 | sb->s_fs_info = NULL; | 995 | sb->s_fs_info = NULL; |
940 | nilfs_put_sbinfo(sbi); | 996 | kfree(sbi); |
941 | return err; | 997 | return err; |
942 | } | 998 | } |
943 | 999 | ||
@@ -947,15 +1003,10 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
947 | struct the_nilfs *nilfs = sbi->s_nilfs; | 1003 | struct the_nilfs *nilfs = sbi->s_nilfs; |
948 | unsigned long old_sb_flags; | 1004 | unsigned long old_sb_flags; |
949 | struct nilfs_mount_options old_opts; | 1005 | struct nilfs_mount_options old_opts; |
950 | int was_snapshot, err; | 1006 | int err; |
951 | |||
952 | lock_kernel(); | ||
953 | 1007 | ||
954 | down_write(&nilfs->ns_super_sem); | ||
955 | old_sb_flags = sb->s_flags; | 1008 | old_sb_flags = sb->s_flags; |
956 | old_opts.mount_opt = sbi->s_mount_opt; | 1009 | old_opts.mount_opt = sbi->s_mount_opt; |
957 | old_opts.snapshot_cno = sbi->s_snapshot_cno; | ||
958 | was_snapshot = nilfs_test_opt(sbi, SNAPSHOT); | ||
959 | 1010 | ||
960 | if (!parse_options(data, sb, 1)) { | 1011 | if (!parse_options(data, sb, 1)) { |
961 | err = -EINVAL; | 1012 | err = -EINVAL; |
@@ -964,11 +1015,6 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
964 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); | 1015 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL); |
965 | 1016 | ||
966 | err = -EINVAL; | 1017 | err = -EINVAL; |
967 | if (was_snapshot && !(*flags & MS_RDONLY)) { | ||
968 | printk(KERN_ERR "NILFS (device %s): cannot remount snapshot " | ||
969 | "read/write.\n", sb->s_id); | ||
970 | goto restore_opts; | ||
971 | } | ||
972 | 1018 | ||
973 | if (!nilfs_valid_fs(nilfs)) { | 1019 | if (!nilfs_valid_fs(nilfs)) { |
974 | printk(KERN_WARNING "NILFS (device %s): couldn't " | 1020 | printk(KERN_WARNING "NILFS (device %s): couldn't " |
@@ -993,6 +1039,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
993 | up_write(&nilfs->ns_sem); | 1039 | up_write(&nilfs->ns_sem); |
994 | } else { | 1040 | } else { |
995 | __u64 features; | 1041 | __u64 features; |
1042 | struct nilfs_root *root; | ||
996 | 1043 | ||
997 | /* | 1044 | /* |
998 | * Mounting a RDONLY partition read-write, so reread and | 1045 | * Mounting a RDONLY partition read-write, so reread and |
@@ -1014,25 +1061,21 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
1014 | 1061 | ||
1015 | sb->s_flags &= ~MS_RDONLY; | 1062 | sb->s_flags &= ~MS_RDONLY; |
1016 | 1063 | ||
1017 | err = nilfs_attach_segment_constructor(sbi); | 1064 | root = NILFS_I(sb->s_root->d_inode)->i_root; |
1065 | err = nilfs_attach_segment_constructor(sbi, root); | ||
1018 | if (err) | 1066 | if (err) |
1019 | goto restore_opts; | 1067 | goto restore_opts; |
1020 | 1068 | ||
1021 | down_write(&nilfs->ns_sem); | 1069 | down_write(&nilfs->ns_sem); |
1022 | nilfs_setup_super(sbi); | 1070 | nilfs_setup_super(sbi, true); |
1023 | up_write(&nilfs->ns_sem); | 1071 | up_write(&nilfs->ns_sem); |
1024 | } | 1072 | } |
1025 | out: | 1073 | out: |
1026 | up_write(&nilfs->ns_super_sem); | ||
1027 | unlock_kernel(); | ||
1028 | return 0; | 1074 | return 0; |
1029 | 1075 | ||
1030 | restore_opts: | 1076 | restore_opts: |
1031 | sb->s_flags = old_sb_flags; | 1077 | sb->s_flags = old_sb_flags; |
1032 | sbi->s_mount_opt = old_opts.mount_opt; | 1078 | sbi->s_mount_opt = old_opts.mount_opt; |
1033 | sbi->s_snapshot_cno = old_opts.snapshot_cno; | ||
1034 | up_write(&nilfs->ns_super_sem); | ||
1035 | unlock_kernel(); | ||
1036 | return err; | 1079 | return err; |
1037 | } | 1080 | } |
1038 | 1081 | ||
@@ -1052,7 +1095,7 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) | |||
1052 | { | 1095 | { |
1053 | char *p, *options = data; | 1096 | char *p, *options = data; |
1054 | substring_t args[MAX_OPT_ARGS]; | 1097 | substring_t args[MAX_OPT_ARGS]; |
1055 | int option, token; | 1098 | int token; |
1056 | int ret = 0; | 1099 | int ret = 0; |
1057 | 1100 | ||
1058 | do { | 1101 | do { |
@@ -1060,16 +1103,18 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) | |||
1060 | if (p != NULL && *p) { | 1103 | if (p != NULL && *p) { |
1061 | token = match_token(p, tokens, args); | 1104 | token = match_token(p, tokens, args); |
1062 | if (token == Opt_snapshot) { | 1105 | if (token == Opt_snapshot) { |
1063 | if (!(sd->flags & MS_RDONLY)) | 1106 | if (!(sd->flags & MS_RDONLY)) { |
1064 | ret++; | 1107 | ret++; |
1065 | else { | 1108 | } else { |
1066 | ret = match_int(&args[0], &option); | 1109 | sd->cno = simple_strtoull(args[0].from, |
1067 | if (!ret) { | 1110 | NULL, 0); |
1068 | if (option > 0) | 1111 | /* |
1069 | sd->cno = option; | 1112 | * No need to see the end pointer; |
1070 | else | 1113 | * match_token() has done syntax |
1071 | ret++; | 1114 | * checking. |
1072 | } | 1115 | */ |
1116 | if (sd->cno == 0) | ||
1117 | ret++; | ||
1073 | } | 1118 | } |
1074 | } | 1119 | } |
1075 | if (ret) | 1120 | if (ret) |
@@ -1086,18 +1131,14 @@ static int nilfs_identify(char *data, struct nilfs_super_data *sd) | |||
1086 | 1131 | ||
1087 | static int nilfs_set_bdev_super(struct super_block *s, void *data) | 1132 | static int nilfs_set_bdev_super(struct super_block *s, void *data) |
1088 | { | 1133 | { |
1089 | struct nilfs_super_data *sd = data; | 1134 | s->s_bdev = data; |
1090 | |||
1091 | s->s_bdev = sd->bdev; | ||
1092 | s->s_dev = s->s_bdev->bd_dev; | 1135 | s->s_dev = s->s_bdev->bd_dev; |
1093 | return 0; | 1136 | return 0; |
1094 | } | 1137 | } |
1095 | 1138 | ||
1096 | static int nilfs_test_bdev_super(struct super_block *s, void *data) | 1139 | static int nilfs_test_bdev_super(struct super_block *s, void *data) |
1097 | { | 1140 | { |
1098 | struct nilfs_super_data *sd = data; | 1141 | return (void *)s->s_bdev == data; |
1099 | |||
1100 | return sd->sbi && s->s_fs_info == (void *)sd->sbi; | ||
1101 | } | 1142 | } |
1102 | 1143 | ||
1103 | static int | 1144 | static int |
@@ -1107,8 +1148,8 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1107 | struct nilfs_super_data sd; | 1148 | struct nilfs_super_data sd; |
1108 | struct super_block *s; | 1149 | struct super_block *s; |
1109 | fmode_t mode = FMODE_READ; | 1150 | fmode_t mode = FMODE_READ; |
1110 | struct the_nilfs *nilfs; | 1151 | struct dentry *root_dentry; |
1111 | int err, need_to_close = 1; | 1152 | int err, s_new = false; |
1112 | 1153 | ||
1113 | if (!(flags & MS_RDONLY)) | 1154 | if (!(flags & MS_RDONLY)) |
1114 | mode |= FMODE_WRITE; | 1155 | mode |= FMODE_WRITE; |
@@ -1117,12 +1158,6 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1117 | if (IS_ERR(sd.bdev)) | 1158 | if (IS_ERR(sd.bdev)) |
1118 | return PTR_ERR(sd.bdev); | 1159 | return PTR_ERR(sd.bdev); |
1119 | 1160 | ||
1120 | /* | ||
1121 | * To get mount instance using sget() vfs-routine, NILFS needs | ||
1122 | * much more information than normal filesystems to identify mount | ||
1123 | * instance. For snapshot mounts, not only a mount type (ro-mount | ||
1124 | * or rw-mount) but also a checkpoint number is required. | ||
1125 | */ | ||
1126 | sd.cno = 0; | 1161 | sd.cno = 0; |
1127 | sd.flags = flags; | 1162 | sd.flags = flags; |
1128 | if (nilfs_identify((char *)data, &sd)) { | 1163 | if (nilfs_identify((char *)data, &sd)) { |
@@ -1130,94 +1165,86 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags, | |||
1130 | goto failed; | 1165 | goto failed; |
1131 | } | 1166 | } |
1132 | 1167 | ||
1133 | nilfs = find_or_create_nilfs(sd.bdev); | ||
1134 | if (!nilfs) { | ||
1135 | err = -ENOMEM; | ||
1136 | goto failed; | ||
1137 | } | ||
1138 | |||
1139 | mutex_lock(&nilfs->ns_mount_mutex); | ||
1140 | |||
1141 | if (!sd.cno) { | ||
1142 | /* | ||
1143 | * Check if an exclusive mount exists or not. | ||
1144 | * Snapshot mounts coexist with a current mount | ||
1145 | * (i.e. rw-mount or ro-mount), whereas rw-mount and | ||
1146 | * ro-mount are mutually exclusive. | ||
1147 | */ | ||
1148 | down_read(&nilfs->ns_super_sem); | ||
1149 | if (nilfs->ns_current && | ||
1150 | ((nilfs->ns_current->s_super->s_flags ^ flags) | ||
1151 | & MS_RDONLY)) { | ||
1152 | up_read(&nilfs->ns_super_sem); | ||
1153 | err = -EBUSY; | ||
1154 | goto failed_unlock; | ||
1155 | } | ||
1156 | up_read(&nilfs->ns_super_sem); | ||
1157 | } | ||
1158 | |||
1159 | /* | ||
1160 | * Find existing nilfs_sb_info struct | ||
1161 | */ | ||
1162 | sd.sbi = nilfs_find_sbinfo(nilfs, !(flags & MS_RDONLY), sd.cno); | ||
1163 | |||
1164 | /* | 1168 | /* |
1165 | * Get super block instance holding the nilfs_sb_info struct. | 1169 | * once the super is inserted into the list by sget, s_umount |
1166 | * A new instance is allocated if no existing mount is present or | 1170 | * will protect the lockfs code from trying to start a snapshot |
1167 | * existing instance has been unmounted. | 1171 | * while we are mounting |
1168 | */ | 1172 | */ |
1169 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, &sd); | 1173 | mutex_lock(&sd.bdev->bd_fsfreeze_mutex); |
1170 | if (sd.sbi) | 1174 | if (sd.bdev->bd_fsfreeze_count > 0) { |
1171 | nilfs_put_sbinfo(sd.sbi); | 1175 | mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); |
1172 | 1176 | err = -EBUSY; | |
1177 | goto failed; | ||
1178 | } | ||
1179 | s = sget(fs_type, nilfs_test_bdev_super, nilfs_set_bdev_super, sd.bdev); | ||
1180 | mutex_unlock(&sd.bdev->bd_fsfreeze_mutex); | ||
1173 | if (IS_ERR(s)) { | 1181 | if (IS_ERR(s)) { |
1174 | err = PTR_ERR(s); | 1182 | err = PTR_ERR(s); |
1175 | goto failed_unlock; | 1183 | goto failed; |
1176 | } | 1184 | } |
1177 | 1185 | ||
1178 | if (!s->s_root) { | 1186 | if (!s->s_root) { |
1179 | char b[BDEVNAME_SIZE]; | 1187 | char b[BDEVNAME_SIZE]; |
1180 | 1188 | ||
1189 | s_new = true; | ||
1190 | |||
1181 | /* New superblock instance created */ | 1191 | /* New superblock instance created */ |
1182 | s->s_flags = flags; | 1192 | s->s_flags = flags; |
1183 | s->s_mode = mode; | 1193 | s->s_mode = mode; |
1184 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); | 1194 | strlcpy(s->s_id, bdevname(sd.bdev, b), sizeof(s->s_id)); |
1185 | sb_set_blocksize(s, block_size(sd.bdev)); | 1195 | sb_set_blocksize(s, block_size(sd.bdev)); |
1186 | 1196 | ||
1187 | err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0, | 1197 | err = nilfs_fill_super(s, data, flags & MS_SILENT ? 1 : 0); |
1188 | nilfs); | ||
1189 | if (err) | 1198 | if (err) |
1190 | goto cancel_new; | 1199 | goto failed_super; |
1191 | 1200 | ||
1192 | s->s_flags |= MS_ACTIVE; | 1201 | s->s_flags |= MS_ACTIVE; |
1193 | need_to_close = 0; | 1202 | } else if (!sd.cno) { |
1203 | int busy = false; | ||
1204 | |||
1205 | if (nilfs_tree_was_touched(s->s_root)) { | ||
1206 | busy = nilfs_try_to_shrink_tree(s->s_root); | ||
1207 | if (busy && (flags ^ s->s_flags) & MS_RDONLY) { | ||
1208 | printk(KERN_ERR "NILFS: the device already " | ||
1209 | "has a %s mount.\n", | ||
1210 | (s->s_flags & MS_RDONLY) ? | ||
1211 | "read-only" : "read/write"); | ||
1212 | err = -EBUSY; | ||
1213 | goto failed_super; | ||
1214 | } | ||
1215 | } | ||
1216 | if (!busy) { | ||
1217 | /* | ||
1218 | * Try remount to setup mount states if the current | ||
1219 | * tree is not mounted and only snapshots use this sb. | ||
1220 | */ | ||
1221 | err = nilfs_remount(s, &flags, data); | ||
1222 | if (err) | ||
1223 | goto failed_super; | ||
1224 | } | ||
1194 | } | 1225 | } |
1195 | 1226 | ||
1196 | mutex_unlock(&nilfs->ns_mount_mutex); | 1227 | if (sd.cno) { |
1197 | put_nilfs(nilfs); | 1228 | err = nilfs_attach_snapshot(s, sd.cno, &root_dentry); |
1198 | if (need_to_close) | 1229 | if (err) |
1199 | close_bdev_exclusive(sd.bdev, mode); | 1230 | goto failed_super; |
1200 | simple_set_mnt(mnt, s); | 1231 | } else { |
1201 | return 0; | 1232 | root_dentry = dget(s->s_root); |
1233 | } | ||
1202 | 1234 | ||
1203 | failed_unlock: | 1235 | if (!s_new) |
1204 | mutex_unlock(&nilfs->ns_mount_mutex); | 1236 | close_bdev_exclusive(sd.bdev, mode); |
1205 | put_nilfs(nilfs); | ||
1206 | failed: | ||
1207 | close_bdev_exclusive(sd.bdev, mode); | ||
1208 | 1237 | ||
1209 | return err; | 1238 | mnt->mnt_sb = s; |
1239 | mnt->mnt_root = root_dentry; | ||
1240 | return 0; | ||
1210 | 1241 | ||
1211 | cancel_new: | 1242 | failed_super: |
1212 | /* Abandoning the newly allocated superblock */ | ||
1213 | mutex_unlock(&nilfs->ns_mount_mutex); | ||
1214 | put_nilfs(nilfs); | ||
1215 | deactivate_locked_super(s); | 1243 | deactivate_locked_super(s); |
1216 | /* | 1244 | |
1217 | * deactivate_locked_super() invokes close_bdev_exclusive(). | 1245 | failed: |
1218 | * We must finish all post-cleaning before this call; | 1246 | if (!s_new) |
1219 | * put_nilfs() needs the block device. | 1247 | close_bdev_exclusive(sd.bdev, mode); |
1220 | */ | ||
1221 | return err; | 1248 | return err; |
1222 | } | 1249 | } |
1223 | 1250 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index ba7c10c917fc..0254be2d73c6 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -35,9 +35,6 @@ | |||
35 | #include "segbuf.h" | 35 | #include "segbuf.h" |
36 | 36 | ||
37 | 37 | ||
38 | static LIST_HEAD(nilfs_objects); | ||
39 | static DEFINE_SPINLOCK(nilfs_lock); | ||
40 | |||
41 | static int nilfs_valid_sb(struct nilfs_super_block *sbp); | 38 | static int nilfs_valid_sb(struct nilfs_super_block *sbp); |
42 | 39 | ||
43 | void nilfs_set_last_segment(struct the_nilfs *nilfs, | 40 | void nilfs_set_last_segment(struct the_nilfs *nilfs, |
@@ -61,16 +58,13 @@ void nilfs_set_last_segment(struct the_nilfs *nilfs, | |||
61 | } | 58 | } |
62 | 59 | ||
63 | /** | 60 | /** |
64 | * alloc_nilfs - allocate the_nilfs structure | 61 | * alloc_nilfs - allocate a nilfs object |
65 | * @bdev: block device to which the_nilfs is related | 62 | * @bdev: block device to which the_nilfs is related |
66 | * | 63 | * |
67 | * alloc_nilfs() allocates memory for the_nilfs and | ||
68 | * initializes its reference count and locks. | ||
69 | * | ||
70 | * Return Value: On success, pointer to the_nilfs is returned. | 64 | * Return Value: On success, pointer to the_nilfs is returned. |
71 | * On error, NULL is returned. | 65 | * On error, NULL is returned. |
72 | */ | 66 | */ |
73 | static struct the_nilfs *alloc_nilfs(struct block_device *bdev) | 67 | struct the_nilfs *alloc_nilfs(struct block_device *bdev) |
74 | { | 68 | { |
75 | struct the_nilfs *nilfs; | 69 | struct the_nilfs *nilfs; |
76 | 70 | ||
@@ -79,103 +73,38 @@ static struct the_nilfs *alloc_nilfs(struct block_device *bdev) | |||
79 | return NULL; | 73 | return NULL; |
80 | 74 | ||
81 | nilfs->ns_bdev = bdev; | 75 | nilfs->ns_bdev = bdev; |
82 | atomic_set(&nilfs->ns_count, 1); | ||
83 | atomic_set(&nilfs->ns_ndirtyblks, 0); | 76 | atomic_set(&nilfs->ns_ndirtyblks, 0); |
84 | init_rwsem(&nilfs->ns_sem); | 77 | init_rwsem(&nilfs->ns_sem); |
85 | init_rwsem(&nilfs->ns_super_sem); | 78 | INIT_LIST_HEAD(&nilfs->ns_gc_inodes); |
86 | mutex_init(&nilfs->ns_mount_mutex); | ||
87 | init_rwsem(&nilfs->ns_writer_sem); | ||
88 | INIT_LIST_HEAD(&nilfs->ns_list); | ||
89 | INIT_LIST_HEAD(&nilfs->ns_supers); | ||
90 | spin_lock_init(&nilfs->ns_last_segment_lock); | 79 | spin_lock_init(&nilfs->ns_last_segment_lock); |
91 | nilfs->ns_gc_inodes_h = NULL; | 80 | nilfs->ns_cptree = RB_ROOT; |
81 | spin_lock_init(&nilfs->ns_cptree_lock); | ||
92 | init_rwsem(&nilfs->ns_segctor_sem); | 82 | init_rwsem(&nilfs->ns_segctor_sem); |
93 | 83 | ||
94 | return nilfs; | 84 | return nilfs; |
95 | } | 85 | } |
96 | 86 | ||
97 | /** | 87 | /** |
98 | * find_or_create_nilfs - find or create nilfs object | 88 | * destroy_nilfs - destroy nilfs object |
99 | * @bdev: block device to which the_nilfs is related | 89 | * @nilfs: nilfs object to be released |
100 | * | ||
101 | * find_nilfs() looks up an existent nilfs object created on the | ||
102 | * device and gets the reference count of the object. If no nilfs object | ||
103 | * is found on the device, a new nilfs object is allocated. | ||
104 | * | ||
105 | * Return Value: On success, pointer to the nilfs object is returned. | ||
106 | * On error, NULL is returned. | ||
107 | */ | ||
108 | struct the_nilfs *find_or_create_nilfs(struct block_device *bdev) | ||
109 | { | ||
110 | struct the_nilfs *nilfs, *new = NULL; | ||
111 | |||
112 | retry: | ||
113 | spin_lock(&nilfs_lock); | ||
114 | list_for_each_entry(nilfs, &nilfs_objects, ns_list) { | ||
115 | if (nilfs->ns_bdev == bdev) { | ||
116 | get_nilfs(nilfs); | ||
117 | spin_unlock(&nilfs_lock); | ||
118 | if (new) | ||
119 | put_nilfs(new); | ||
120 | return nilfs; /* existing object */ | ||
121 | } | ||
122 | } | ||
123 | if (new) { | ||
124 | list_add_tail(&new->ns_list, &nilfs_objects); | ||
125 | spin_unlock(&nilfs_lock); | ||
126 | return new; /* new object */ | ||
127 | } | ||
128 | spin_unlock(&nilfs_lock); | ||
129 | |||
130 | new = alloc_nilfs(bdev); | ||
131 | if (new) | ||
132 | goto retry; | ||
133 | return NULL; /* insufficient memory */ | ||
134 | } | ||
135 | |||
136 | /** | ||
137 | * put_nilfs - release a reference to the_nilfs | ||
138 | * @nilfs: the_nilfs structure to be released | ||
139 | * | ||
140 | * put_nilfs() decrements a reference counter of the_nilfs. | ||
141 | * If the reference count reaches zero, the_nilfs is freed. | ||
142 | */ | 90 | */ |
143 | void put_nilfs(struct the_nilfs *nilfs) | 91 | void destroy_nilfs(struct the_nilfs *nilfs) |
144 | { | 92 | { |
145 | spin_lock(&nilfs_lock); | ||
146 | if (!atomic_dec_and_test(&nilfs->ns_count)) { | ||
147 | spin_unlock(&nilfs_lock); | ||
148 | return; | ||
149 | } | ||
150 | list_del_init(&nilfs->ns_list); | ||
151 | spin_unlock(&nilfs_lock); | ||
152 | |||
153 | /* | ||
154 | * Increment of ns_count never occurs below because the caller | ||
155 | * of get_nilfs() holds at least one reference to the_nilfs. | ||
156 | * Thus its exclusion control is not required here. | ||
157 | */ | ||
158 | |||
159 | might_sleep(); | 93 | might_sleep(); |
160 | if (nilfs_loaded(nilfs)) { | ||
161 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
162 | nilfs_mdt_destroy(nilfs->ns_cpfile); | ||
163 | nilfs_mdt_destroy(nilfs->ns_dat); | ||
164 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
165 | } | ||
166 | if (nilfs_init(nilfs)) { | 94 | if (nilfs_init(nilfs)) { |
167 | nilfs_destroy_gccache(nilfs); | ||
168 | brelse(nilfs->ns_sbh[0]); | 95 | brelse(nilfs->ns_sbh[0]); |
169 | brelse(nilfs->ns_sbh[1]); | 96 | brelse(nilfs->ns_sbh[1]); |
170 | } | 97 | } |
171 | kfree(nilfs); | 98 | kfree(nilfs); |
172 | } | 99 | } |
173 | 100 | ||
174 | static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) | 101 | static int nilfs_load_super_root(struct the_nilfs *nilfs, |
102 | struct super_block *sb, sector_t sr_block) | ||
175 | { | 103 | { |
176 | struct buffer_head *bh_sr; | 104 | struct buffer_head *bh_sr; |
177 | struct nilfs_super_root *raw_sr; | 105 | struct nilfs_super_root *raw_sr; |
178 | struct nilfs_super_block **sbp = nilfs->ns_sbp; | 106 | struct nilfs_super_block **sbp = nilfs->ns_sbp; |
107 | struct nilfs_inode *rawi; | ||
179 | unsigned dat_entry_size, segment_usage_size, checkpoint_size; | 108 | unsigned dat_entry_size, segment_usage_size, checkpoint_size; |
180 | unsigned inode_size; | 109 | unsigned inode_size; |
181 | int err; | 110 | int err; |
@@ -192,40 +121,22 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) | |||
192 | 121 | ||
193 | inode_size = nilfs->ns_inode_size; | 122 | inode_size = nilfs->ns_inode_size; |
194 | 123 | ||
195 | err = -ENOMEM; | 124 | rawi = (void *)bh_sr->b_data + NILFS_SR_DAT_OFFSET(inode_size); |
196 | nilfs->ns_dat = nilfs_dat_new(nilfs, dat_entry_size); | 125 | err = nilfs_dat_read(sb, dat_entry_size, rawi, &nilfs->ns_dat); |
197 | if (unlikely(!nilfs->ns_dat)) | 126 | if (err) |
198 | goto failed; | 127 | goto failed; |
199 | 128 | ||
200 | nilfs->ns_gc_dat = nilfs_dat_new(nilfs, dat_entry_size); | 129 | rawi = (void *)bh_sr->b_data + NILFS_SR_CPFILE_OFFSET(inode_size); |
201 | if (unlikely(!nilfs->ns_gc_dat)) | 130 | err = nilfs_cpfile_read(sb, checkpoint_size, rawi, &nilfs->ns_cpfile); |
131 | if (err) | ||
202 | goto failed_dat; | 132 | goto failed_dat; |
203 | 133 | ||
204 | nilfs->ns_cpfile = nilfs_cpfile_new(nilfs, checkpoint_size); | 134 | rawi = (void *)bh_sr->b_data + NILFS_SR_SUFILE_OFFSET(inode_size); |
205 | if (unlikely(!nilfs->ns_cpfile)) | 135 | err = nilfs_sufile_read(sb, segment_usage_size, rawi, |
206 | goto failed_gc_dat; | 136 | &nilfs->ns_sufile); |
207 | 137 | if (err) | |
208 | nilfs->ns_sufile = nilfs_sufile_new(nilfs, segment_usage_size); | ||
209 | if (unlikely(!nilfs->ns_sufile)) | ||
210 | goto failed_cpfile; | 138 | goto failed_cpfile; |
211 | 139 | ||
212 | nilfs_mdt_set_shadow(nilfs->ns_dat, nilfs->ns_gc_dat); | ||
213 | |||
214 | err = nilfs_dat_read(nilfs->ns_dat, (void *)bh_sr->b_data + | ||
215 | NILFS_SR_DAT_OFFSET(inode_size)); | ||
216 | if (unlikely(err)) | ||
217 | goto failed_sufile; | ||
218 | |||
219 | err = nilfs_cpfile_read(nilfs->ns_cpfile, (void *)bh_sr->b_data + | ||
220 | NILFS_SR_CPFILE_OFFSET(inode_size)); | ||
221 | if (unlikely(err)) | ||
222 | goto failed_sufile; | ||
223 | |||
224 | err = nilfs_sufile_read(nilfs->ns_sufile, (void *)bh_sr->b_data + | ||
225 | NILFS_SR_SUFILE_OFFSET(inode_size)); | ||
226 | if (unlikely(err)) | ||
227 | goto failed_sufile; | ||
228 | |||
229 | raw_sr = (struct nilfs_super_root *)bh_sr->b_data; | 140 | raw_sr = (struct nilfs_super_root *)bh_sr->b_data; |
230 | nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); | 141 | nilfs->ns_nongc_ctime = le64_to_cpu(raw_sr->sr_nongc_ctime); |
231 | 142 | ||
@@ -233,17 +144,11 @@ static int nilfs_load_super_root(struct the_nilfs *nilfs, sector_t sr_block) | |||
233 | brelse(bh_sr); | 144 | brelse(bh_sr); |
234 | return err; | 145 | return err; |
235 | 146 | ||
236 | failed_sufile: | ||
237 | nilfs_mdt_destroy(nilfs->ns_sufile); | ||
238 | |||
239 | failed_cpfile: | 147 | failed_cpfile: |
240 | nilfs_mdt_destroy(nilfs->ns_cpfile); | 148 | iput(nilfs->ns_cpfile); |
241 | |||
242 | failed_gc_dat: | ||
243 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
244 | 149 | ||
245 | failed_dat: | 150 | failed_dat: |
246 | nilfs_mdt_destroy(nilfs->ns_dat); | 151 | iput(nilfs->ns_dat); |
247 | goto failed; | 152 | goto failed; |
248 | } | 153 | } |
249 | 154 | ||
@@ -306,15 +211,6 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
306 | int valid_fs = nilfs_valid_fs(nilfs); | 211 | int valid_fs = nilfs_valid_fs(nilfs); |
307 | int err; | 212 | int err; |
308 | 213 | ||
309 | if (nilfs_loaded(nilfs)) { | ||
310 | if (valid_fs || | ||
311 | ((s_flags & MS_RDONLY) && nilfs_test_opt(sbi, NORECOVERY))) | ||
312 | return 0; | ||
313 | printk(KERN_ERR "NILFS: the filesystem is in an incomplete " | ||
314 | "recovery state.\n"); | ||
315 | return -EINVAL; | ||
316 | } | ||
317 | |||
318 | if (!valid_fs) { | 214 | if (!valid_fs) { |
319 | printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); | 215 | printk(KERN_WARNING "NILFS warning: mounting unchecked fs\n"); |
320 | if (s_flags & MS_RDONLY) { | 216 | if (s_flags & MS_RDONLY) { |
@@ -375,7 +271,7 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
375 | goto scan_error; | 271 | goto scan_error; |
376 | } | 272 | } |
377 | 273 | ||
378 | err = nilfs_load_super_root(nilfs, ri.ri_super_root); | 274 | err = nilfs_load_super_root(nilfs, sbi->s_super, ri.ri_super_root); |
379 | if (unlikely(err)) { | 275 | if (unlikely(err)) { |
380 | printk(KERN_ERR "NILFS: error loading super root.\n"); | 276 | printk(KERN_ERR "NILFS: error loading super root.\n"); |
381 | goto failed; | 277 | goto failed; |
@@ -443,10 +339,9 @@ int load_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | |||
443 | goto failed; | 339 | goto failed; |
444 | 340 | ||
445 | failed_unload: | 341 | failed_unload: |
446 | nilfs_mdt_destroy(nilfs->ns_cpfile); | 342 | iput(nilfs->ns_cpfile); |
447 | nilfs_mdt_destroy(nilfs->ns_sufile); | 343 | iput(nilfs->ns_sufile); |
448 | nilfs_mdt_destroy(nilfs->ns_dat); | 344 | iput(nilfs->ns_dat); |
449 | nilfs_mdt_destroy(nilfs->ns_gc_dat); | ||
450 | 345 | ||
451 | failed: | 346 | failed: |
452 | nilfs_clear_recovery_info(&ri); | 347 | nilfs_clear_recovery_info(&ri); |
@@ -468,8 +363,8 @@ static unsigned long long nilfs_max_size(unsigned int blkbits) | |||
468 | static int nilfs_store_disk_layout(struct the_nilfs *nilfs, | 363 | static int nilfs_store_disk_layout(struct the_nilfs *nilfs, |
469 | struct nilfs_super_block *sbp) | 364 | struct nilfs_super_block *sbp) |
470 | { | 365 | { |
471 | if (le32_to_cpu(sbp->s_rev_level) != NILFS_CURRENT_REV) { | 366 | if (le32_to_cpu(sbp->s_rev_level) < NILFS_MIN_SUPP_REV) { |
472 | printk(KERN_ERR "NILFS: revision mismatch " | 367 | printk(KERN_ERR "NILFS: unsupported revision " |
473 | "(superblock rev.=%d.%d, current rev.=%d.%d). " | 368 | "(superblock rev.=%d.%d, current rev.=%d.%d). " |
474 | "Please check the version of mkfs.nilfs.\n", | 369 | "Please check the version of mkfs.nilfs.\n", |
475 | le32_to_cpu(sbp->s_rev_level), | 370 | le32_to_cpu(sbp->s_rev_level), |
@@ -631,12 +526,7 @@ static int nilfs_load_super_block(struct the_nilfs *nilfs, | |||
631 | * | 526 | * |
632 | * init_nilfs() performs common initialization per block device (e.g. | 527 | * init_nilfs() performs common initialization per block device (e.g. |
633 | * reading the super block, getting disk layout information, initializing | 528 | * reading the super block, getting disk layout information, initializing |
634 | * shared fields in the_nilfs). It takes on some portion of the jobs | 529 | * shared fields in the_nilfs). |
635 | * typically done by a fill_super() routine. This division arises from | ||
636 | * the nature that multiple NILFS instances may be simultaneously | ||
637 | * mounted on a device. | ||
638 | * For multiple mounts on the same device, only the first mount | ||
639 | * invokes these tasks. | ||
640 | * | 530 | * |
641 | * Return Value: On success, 0 is returned. On error, a negative error | 531 | * Return Value: On success, 0 is returned. On error, a negative error |
642 | * code is returned. | 532 | * code is returned. |
@@ -645,32 +535,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
645 | { | 535 | { |
646 | struct super_block *sb = sbi->s_super; | 536 | struct super_block *sb = sbi->s_super; |
647 | struct nilfs_super_block *sbp; | 537 | struct nilfs_super_block *sbp; |
648 | struct backing_dev_info *bdi; | ||
649 | int blocksize; | 538 | int blocksize; |
650 | int err; | 539 | int err; |
651 | 540 | ||
652 | down_write(&nilfs->ns_sem); | 541 | down_write(&nilfs->ns_sem); |
653 | if (nilfs_init(nilfs)) { | ||
654 | /* Load values from existing the_nilfs */ | ||
655 | sbp = nilfs->ns_sbp[0]; | ||
656 | err = nilfs_store_magic_and_option(sb, sbp, data); | ||
657 | if (err) | ||
658 | goto out; | ||
659 | |||
660 | err = nilfs_check_feature_compatibility(sb, sbp); | ||
661 | if (err) | ||
662 | goto out; | ||
663 | |||
664 | blocksize = BLOCK_SIZE << le32_to_cpu(sbp->s_log_block_size); | ||
665 | if (sb->s_blocksize != blocksize && | ||
666 | !sb_set_blocksize(sb, blocksize)) { | ||
667 | printk(KERN_ERR "NILFS: blocksize %d unfit to device\n", | ||
668 | blocksize); | ||
669 | err = -EINVAL; | ||
670 | } | ||
671 | sb->s_maxbytes = nilfs_max_size(sb->s_blocksize_bits); | ||
672 | goto out; | ||
673 | } | ||
674 | 542 | ||
675 | blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); | 543 | blocksize = sb_min_blocksize(sb, NILFS_MIN_BLOCK_SIZE); |
676 | if (!blocksize) { | 544 | if (!blocksize) { |
@@ -729,18 +597,10 @@ int init_nilfs(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi, char *data) | |||
729 | 597 | ||
730 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); | 598 | nilfs->ns_mount_state = le16_to_cpu(sbp->s_state); |
731 | 599 | ||
732 | bdi = nilfs->ns_bdev->bd_inode->i_mapping->backing_dev_info; | ||
733 | nilfs->ns_bdi = bdi ? : &default_backing_dev_info; | ||
734 | |||
735 | err = nilfs_store_log_cursor(nilfs, sbp); | 600 | err = nilfs_store_log_cursor(nilfs, sbp); |
736 | if (err) | 601 | if (err) |
737 | goto failed_sbh; | 602 | goto failed_sbh; |
738 | 603 | ||
739 | /* Initialize gcinode cache */ | ||
740 | err = nilfs_init_gccache(nilfs); | ||
741 | if (err) | ||
742 | goto failed_sbh; | ||
743 | |||
744 | set_nilfs_init(nilfs); | 604 | set_nilfs_init(nilfs); |
745 | err = 0; | 605 | err = 0; |
746 | out: | 606 | out: |
@@ -775,9 +635,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
775 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 635 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
776 | start * sects_per_block, | 636 | start * sects_per_block, |
777 | nblocks * sects_per_block, | 637 | nblocks * sects_per_block, |
778 | GFP_NOFS, | 638 | GFP_NOFS, 0); |
779 | BLKDEV_IFL_WAIT | | ||
780 | BLKDEV_IFL_BARRIER); | ||
781 | if (ret < 0) | 639 | if (ret < 0) |
782 | return ret; | 640 | return ret; |
783 | nblocks = 0; | 641 | nblocks = 0; |
@@ -787,8 +645,7 @@ int nilfs_discard_segments(struct the_nilfs *nilfs, __u64 *segnump, | |||
787 | ret = blkdev_issue_discard(nilfs->ns_bdev, | 645 | ret = blkdev_issue_discard(nilfs->ns_bdev, |
788 | start * sects_per_block, | 646 | start * sects_per_block, |
789 | nblocks * sects_per_block, | 647 | nblocks * sects_per_block, |
790 | GFP_NOFS, | 648 | GFP_NOFS, 0); |
791 | BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); | ||
792 | return ret; | 649 | return ret; |
793 | } | 650 | } |
794 | 651 | ||
@@ -815,79 +672,92 @@ int nilfs_near_disk_full(struct the_nilfs *nilfs) | |||
815 | return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; | 672 | return ncleansegs <= nilfs->ns_nrsvsegs + nincsegs; |
816 | } | 673 | } |
817 | 674 | ||
818 | /** | 675 | struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno) |
819 | * nilfs_find_sbinfo - find existing nilfs_sb_info structure | ||
820 | * @nilfs: nilfs object | ||
821 | * @rw_mount: mount type (non-zero value for read/write mount) | ||
822 | * @cno: checkpoint number (zero for read-only mount) | ||
823 | * | ||
824 | * nilfs_find_sbinfo() returns the nilfs_sb_info structure which | ||
825 | * @rw_mount and @cno (in case of snapshots) matched. If no instance | ||
826 | * was found, NULL is returned. Although the super block instance can | ||
827 | * be unmounted after this function returns, the nilfs_sb_info struct | ||
828 | * is kept on memory until nilfs_put_sbinfo() is called. | ||
829 | */ | ||
830 | struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *nilfs, | ||
831 | int rw_mount, __u64 cno) | ||
832 | { | 676 | { |
833 | struct nilfs_sb_info *sbi; | 677 | struct rb_node *n; |
834 | 678 | struct nilfs_root *root; | |
835 | down_read(&nilfs->ns_super_sem); | 679 | |
836 | /* | 680 | spin_lock(&nilfs->ns_cptree_lock); |
837 | * The SNAPSHOT flag and sb->s_flags are supposed to be | 681 | n = nilfs->ns_cptree.rb_node; |
838 | * protected with nilfs->ns_super_sem. | 682 | while (n) { |
839 | */ | 683 | root = rb_entry(n, struct nilfs_root, rb_node); |
840 | sbi = nilfs->ns_current; | 684 | |
841 | if (rw_mount) { | 685 | if (cno < root->cno) { |
842 | if (sbi && !(sbi->s_super->s_flags & MS_RDONLY)) | 686 | n = n->rb_left; |
843 | goto found; /* read/write mount */ | 687 | } else if (cno > root->cno) { |
844 | else | 688 | n = n->rb_right; |
845 | goto out; | 689 | } else { |
846 | } else if (cno == 0) { | 690 | atomic_inc(&root->count); |
847 | if (sbi && (sbi->s_super->s_flags & MS_RDONLY)) | 691 | spin_unlock(&nilfs->ns_cptree_lock); |
848 | goto found; /* read-only mount */ | 692 | return root; |
849 | else | 693 | } |
850 | goto out; | ||
851 | } | 694 | } |
695 | spin_unlock(&nilfs->ns_cptree_lock); | ||
852 | 696 | ||
853 | list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { | ||
854 | if (nilfs_test_opt(sbi, SNAPSHOT) && | ||
855 | sbi->s_snapshot_cno == cno) | ||
856 | goto found; /* snapshot mount */ | ||
857 | } | ||
858 | out: | ||
859 | up_read(&nilfs->ns_super_sem); | ||
860 | return NULL; | 697 | return NULL; |
861 | |||
862 | found: | ||
863 | atomic_inc(&sbi->s_count); | ||
864 | up_read(&nilfs->ns_super_sem); | ||
865 | return sbi; | ||
866 | } | 698 | } |
867 | 699 | ||
868 | int nilfs_checkpoint_is_mounted(struct the_nilfs *nilfs, __u64 cno, | 700 | struct nilfs_root * |
869 | int snapshot_mount) | 701 | nilfs_find_or_create_root(struct the_nilfs *nilfs, __u64 cno) |
870 | { | 702 | { |
871 | struct nilfs_sb_info *sbi; | 703 | struct rb_node **p, *parent; |
872 | int ret = 0; | 704 | struct nilfs_root *root, *new; |
873 | 705 | ||
874 | down_read(&nilfs->ns_super_sem); | 706 | root = nilfs_lookup_root(nilfs, cno); |
875 | if (cno == 0 || cno > nilfs->ns_cno) | 707 | if (root) |
876 | goto out_unlock; | 708 | return root; |
877 | 709 | ||
878 | list_for_each_entry(sbi, &nilfs->ns_supers, s_list) { | 710 | new = kmalloc(sizeof(*root), GFP_KERNEL); |
879 | if (sbi->s_snapshot_cno == cno && | 711 | if (!new) |
880 | (!snapshot_mount || nilfs_test_opt(sbi, SNAPSHOT))) { | 712 | return NULL; |
881 | /* exclude read-only mounts */ | 713 | |
882 | ret++; | 714 | spin_lock(&nilfs->ns_cptree_lock); |
883 | break; | 715 | |
716 | p = &nilfs->ns_cptree.rb_node; | ||
717 | parent = NULL; | ||
718 | |||
719 | while (*p) { | ||
720 | parent = *p; | ||
721 | root = rb_entry(parent, struct nilfs_root, rb_node); | ||
722 | |||
723 | if (cno < root->cno) { | ||
724 | p = &(*p)->rb_left; | ||
725 | } else if (cno > root->cno) { | ||
726 | p = &(*p)->rb_right; | ||
727 | } else { | ||
728 | atomic_inc(&root->count); | ||
729 | spin_unlock(&nilfs->ns_cptree_lock); | ||
730 | kfree(new); | ||
731 | return root; | ||
884 | } | 732 | } |
885 | } | 733 | } |
886 | /* for protecting recent checkpoints */ | ||
887 | if (cno >= nilfs_last_cno(nilfs)) | ||
888 | ret++; | ||
889 | 734 | ||
890 | out_unlock: | 735 | new->cno = cno; |
891 | up_read(&nilfs->ns_super_sem); | 736 | new->ifile = NULL; |
892 | return ret; | 737 | new->nilfs = nilfs; |
738 | atomic_set(&new->count, 1); | ||
739 | atomic_set(&new->inodes_count, 0); | ||
740 | atomic_set(&new->blocks_count, 0); | ||
741 | |||
742 | rb_link_node(&new->rb_node, parent, p); | ||
743 | rb_insert_color(&new->rb_node, &nilfs->ns_cptree); | ||
744 | |||
745 | spin_unlock(&nilfs->ns_cptree_lock); | ||
746 | |||
747 | return new; | ||
748 | } | ||
749 | |||
750 | void nilfs_put_root(struct nilfs_root *root) | ||
751 | { | ||
752 | if (atomic_dec_and_test(&root->count)) { | ||
753 | struct the_nilfs *nilfs = root->nilfs; | ||
754 | |||
755 | spin_lock(&nilfs->ns_cptree_lock); | ||
756 | rb_erase(&root->rb_node, &nilfs->ns_cptree); | ||
757 | spin_unlock(&nilfs->ns_cptree_lock); | ||
758 | if (root->ifile) | ||
759 | iput(root->ifile); | ||
760 | |||
761 | kfree(root); | ||
762 | } | ||
893 | } | 763 | } |
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h index f785a7b0ab99..69226e14b745 100644 --- a/fs/nilfs2/the_nilfs.h +++ b/fs/nilfs2/the_nilfs.h | |||
@@ -26,6 +26,7 @@ | |||
26 | 26 | ||
27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
28 | #include <linux/buffer_head.h> | 28 | #include <linux/buffer_head.h> |
29 | #include <linux/rbtree.h> | ||
29 | #include <linux/fs.h> | 30 | #include <linux/fs.h> |
30 | #include <linux/blkdev.h> | 31 | #include <linux/blkdev.h> |
31 | #include <linux/backing-dev.h> | 32 | #include <linux/backing-dev.h> |
@@ -45,22 +46,13 @@ enum { | |||
45 | /** | 46 | /** |
46 | * struct the_nilfs - struct to supervise multiple nilfs mount points | 47 | * struct the_nilfs - struct to supervise multiple nilfs mount points |
47 | * @ns_flags: flags | 48 | * @ns_flags: flags |
48 | * @ns_count: reference count | ||
49 | * @ns_list: list head for nilfs_list | ||
50 | * @ns_bdev: block device | 49 | * @ns_bdev: block device |
51 | * @ns_bdi: backing dev info | ||
52 | * @ns_writer: back pointer to writable nilfs_sb_info | ||
53 | * @ns_sem: semaphore for shared states | 50 | * @ns_sem: semaphore for shared states |
54 | * @ns_super_sem: semaphore for global operations across super block instances | ||
55 | * @ns_mount_mutex: mutex protecting mount process of nilfs | ||
56 | * @ns_writer_sem: semaphore protecting ns_writer attach/detach | ||
57 | * @ns_current: back pointer to current mount | ||
58 | * @ns_sbh: buffer heads of on-disk super blocks | 51 | * @ns_sbh: buffer heads of on-disk super blocks |
59 | * @ns_sbp: pointers to super block data | 52 | * @ns_sbp: pointers to super block data |
60 | * @ns_sbwtime: previous write time of super block | 53 | * @ns_sbwtime: previous write time of super block |
61 | * @ns_sbwcount: write count of super block | 54 | * @ns_sbwcount: write count of super block |
62 | * @ns_sbsize: size of valid data in super block | 55 | * @ns_sbsize: size of valid data in super block |
63 | * @ns_supers: list of nilfs super block structs | ||
64 | * @ns_seg_seq: segment sequence counter | 56 | * @ns_seg_seq: segment sequence counter |
65 | * @ns_segnum: index number of the latest full segment. | 57 | * @ns_segnum: index number of the latest full segment. |
66 | * @ns_nextnum: index number of the full segment index to be used next | 58 | * @ns_nextnum: index number of the full segment index to be used next |
@@ -79,9 +71,9 @@ enum { | |||
79 | * @ns_dat: DAT file inode | 71 | * @ns_dat: DAT file inode |
80 | * @ns_cpfile: checkpoint file inode | 72 | * @ns_cpfile: checkpoint file inode |
81 | * @ns_sufile: segusage file inode | 73 | * @ns_sufile: segusage file inode |
82 | * @ns_gc_dat: shadow inode of the DAT file inode for GC | 74 | * @ns_cptree: rb-tree of all mounted checkpoints (nilfs_root) |
75 | * @ns_cptree_lock: lock protecting @ns_cptree | ||
83 | * @ns_gc_inodes: dummy inodes to keep live blocks | 76 | * @ns_gc_inodes: dummy inodes to keep live blocks |
84 | * @ns_gc_inodes_h: hash list to keep dummy inode holding live blocks | ||
85 | * @ns_blocksize_bits: bit length of block size | 77 | * @ns_blocksize_bits: bit length of block size |
86 | * @ns_blocksize: block size | 78 | * @ns_blocksize: block size |
87 | * @ns_nsegments: number of segments in filesystem | 79 | * @ns_nsegments: number of segments in filesystem |
@@ -95,22 +87,9 @@ enum { | |||
95 | */ | 87 | */ |
96 | struct the_nilfs { | 88 | struct the_nilfs { |
97 | unsigned long ns_flags; | 89 | unsigned long ns_flags; |
98 | atomic_t ns_count; | ||
99 | struct list_head ns_list; | ||
100 | 90 | ||
101 | struct block_device *ns_bdev; | 91 | struct block_device *ns_bdev; |
102 | struct backing_dev_info *ns_bdi; | ||
103 | struct nilfs_sb_info *ns_writer; | ||
104 | struct rw_semaphore ns_sem; | 92 | struct rw_semaphore ns_sem; |
105 | struct rw_semaphore ns_super_sem; | ||
106 | struct mutex ns_mount_mutex; | ||
107 | struct rw_semaphore ns_writer_sem; | ||
108 | |||
109 | /* | ||
110 | * components protected by ns_super_sem | ||
111 | */ | ||
112 | struct nilfs_sb_info *ns_current; | ||
113 | struct list_head ns_supers; | ||
114 | 93 | ||
115 | /* | 94 | /* |
116 | * used for | 95 | * used for |
@@ -163,11 +142,13 @@ struct the_nilfs { | |||
163 | struct inode *ns_dat; | 142 | struct inode *ns_dat; |
164 | struct inode *ns_cpfile; | 143 | struct inode *ns_cpfile; |
165 | struct inode *ns_sufile; | 144 | struct inode *ns_sufile; |
166 | struct inode *ns_gc_dat; | ||
167 | 145 | ||
168 | /* GC inode list and hash table head */ | 146 | /* Checkpoint tree */ |
147 | struct rb_root ns_cptree; | ||
148 | spinlock_t ns_cptree_lock; | ||
149 | |||
150 | /* GC inode list */ | ||
169 | struct list_head ns_gc_inodes; | 151 | struct list_head ns_gc_inodes; |
170 | struct hlist_head *ns_gc_inodes_h; | ||
171 | 152 | ||
172 | /* Disk layout information (static) */ | 153 | /* Disk layout information (static) */ |
173 | unsigned int ns_blocksize_bits; | 154 | unsigned int ns_blocksize_bits; |
@@ -182,9 +163,6 @@ struct the_nilfs { | |||
182 | u32 ns_crc_seed; | 163 | u32 ns_crc_seed; |
183 | }; | 164 | }; |
184 | 165 | ||
185 | #define NILFS_GCINODE_HASH_BITS 8 | ||
186 | #define NILFS_GCINODE_HASH_SIZE (1<<NILFS_GCINODE_HASH_BITS) | ||
187 | |||
188 | #define THE_NILFS_FNS(bit, name) \ | 166 | #define THE_NILFS_FNS(bit, name) \ |
189 | static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ | 167 | static inline void set_nilfs_##name(struct the_nilfs *nilfs) \ |
190 | { \ | 168 | { \ |
@@ -205,6 +183,32 @@ THE_NILFS_FNS(DISCONTINUED, discontinued) | |||
205 | THE_NILFS_FNS(GC_RUNNING, gc_running) | 183 | THE_NILFS_FNS(GC_RUNNING, gc_running) |
206 | THE_NILFS_FNS(SB_DIRTY, sb_dirty) | 184 | THE_NILFS_FNS(SB_DIRTY, sb_dirty) |
207 | 185 | ||
186 | /** | ||
187 | * struct nilfs_root - nilfs root object | ||
188 | * @cno: checkpoint number | ||
189 | * @rb_node: red-black tree node | ||
190 | * @count: refcount of this structure | ||
191 | * @nilfs: nilfs object | ||
192 | * @ifile: inode file | ||
193 | * @root: root inode | ||
194 | * @inodes_count: number of inodes | ||
195 | * @blocks_count: number of blocks (Reserved) | ||
196 | */ | ||
197 | struct nilfs_root { | ||
198 | __u64 cno; | ||
199 | struct rb_node rb_node; | ||
200 | |||
201 | atomic_t count; | ||
202 | struct the_nilfs *nilfs; | ||
203 | struct inode *ifile; | ||
204 | |||
205 | atomic_t inodes_count; | ||
206 | atomic_t blocks_count; | ||
207 | }; | ||
208 | |||
209 | /* Special checkpoint number */ | ||
210 | #define NILFS_CPTREE_CURRENT_CNO 0 | ||
211 | |||
208 | /* Minimum interval of periodical update of superblocks (in seconds) */ | 212 | /* Minimum interval of periodical update of superblocks (in seconds) */ |
209 | #define NILFS_SB_FREQ 10 | 213 | #define NILFS_SB_FREQ 10 |
210 | 214 | ||
@@ -221,46 +225,25 @@ static inline int nilfs_sb_will_flip(struct the_nilfs *nilfs) | |||
221 | } | 225 | } |
222 | 226 | ||
223 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); | 227 | void nilfs_set_last_segment(struct the_nilfs *, sector_t, u64, __u64); |
224 | struct the_nilfs *find_or_create_nilfs(struct block_device *); | 228 | struct the_nilfs *alloc_nilfs(struct block_device *bdev); |
225 | void put_nilfs(struct the_nilfs *); | 229 | void destroy_nilfs(struct the_nilfs *nilfs); |
226 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); | 230 | int init_nilfs(struct the_nilfs *, struct nilfs_sb_info *, char *); |
227 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); | 231 | int load_nilfs(struct the_nilfs *, struct nilfs_sb_info *); |
228 | int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); | 232 | int nilfs_discard_segments(struct the_nilfs *, __u64 *, size_t); |
229 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); | 233 | int nilfs_count_free_blocks(struct the_nilfs *, sector_t *); |
234 | struct nilfs_root *nilfs_lookup_root(struct the_nilfs *nilfs, __u64 cno); | ||
235 | struct nilfs_root *nilfs_find_or_create_root(struct the_nilfs *nilfs, | ||
236 | __u64 cno); | ||
237 | void nilfs_put_root(struct nilfs_root *root); | ||
230 | struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); | 238 | struct nilfs_sb_info *nilfs_find_sbinfo(struct the_nilfs *, int, __u64); |
231 | int nilfs_checkpoint_is_mounted(struct the_nilfs *, __u64, int); | ||
232 | int nilfs_near_disk_full(struct the_nilfs *); | 239 | int nilfs_near_disk_full(struct the_nilfs *); |
233 | void nilfs_fall_back_super_block(struct the_nilfs *); | 240 | void nilfs_fall_back_super_block(struct the_nilfs *); |
234 | void nilfs_swap_super_block(struct the_nilfs *); | 241 | void nilfs_swap_super_block(struct the_nilfs *); |
235 | 242 | ||
236 | 243 | ||
237 | static inline void get_nilfs(struct the_nilfs *nilfs) | 244 | static inline void nilfs_get_root(struct nilfs_root *root) |
238 | { | ||
239 | /* Caller must have at least one reference of the_nilfs. */ | ||
240 | atomic_inc(&nilfs->ns_count); | ||
241 | } | ||
242 | |||
243 | static inline void | ||
244 | nilfs_attach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
245 | { | ||
246 | down_write(&nilfs->ns_writer_sem); | ||
247 | nilfs->ns_writer = sbi; | ||
248 | up_write(&nilfs->ns_writer_sem); | ||
249 | } | ||
250 | |||
251 | static inline void | ||
252 | nilfs_detach_writer(struct the_nilfs *nilfs, struct nilfs_sb_info *sbi) | ||
253 | { | ||
254 | down_write(&nilfs->ns_writer_sem); | ||
255 | if (sbi == nilfs->ns_writer) | ||
256 | nilfs->ns_writer = NULL; | ||
257 | up_write(&nilfs->ns_writer_sem); | ||
258 | } | ||
259 | |||
260 | static inline void nilfs_put_sbinfo(struct nilfs_sb_info *sbi) | ||
261 | { | 245 | { |
262 | if (atomic_dec_and_test(&sbi->s_count)) | 246 | atomic_inc(&root->count); |
263 | kfree(sbi); | ||
264 | } | 247 | } |
265 | 248 | ||
266 | static inline int nilfs_valid_fs(struct the_nilfs *nilfs) | 249 | static inline int nilfs_valid_fs(struct the_nilfs *nilfs) |
diff --git a/fs/no-block.c b/fs/no-block.c index d269a93d3467..6e40e42a43de 100644 --- a/fs/no-block.c +++ b/fs/no-block.c | |||
@@ -19,4 +19,5 @@ static int no_blkdev_open(struct inode * inode, struct file * filp) | |||
19 | 19 | ||
20 | const struct file_operations def_blk_fops = { | 20 | const struct file_operations def_blk_fops = { |
21 | .open = no_blkdev_open, | 21 | .open = no_blkdev_open, |
22 | .llseek = noop_llseek, | ||
22 | }; | 23 | }; |
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig index 22c629eedd82..b388443c3a09 100644 --- a/fs/notify/Kconfig +++ b/fs/notify/Kconfig | |||
@@ -3,4 +3,4 @@ config FSNOTIFY | |||
3 | 3 | ||
4 | source "fs/notify/dnotify/Kconfig" | 4 | source "fs/notify/dnotify/Kconfig" |
5 | source "fs/notify/inotify/Kconfig" | 5 | source "fs/notify/inotify/Kconfig" |
6 | source "fs/notify/fanotify/Kconfig" | 6 | #source "fs/notify/fanotify/Kconfig" |
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 5ed8e58d7bfc..bbcb98e7fcc6 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -433,6 +433,7 @@ static const struct file_operations fanotify_fops = { | |||
433 | .release = fanotify_release, | 433 | .release = fanotify_release, |
434 | .unlocked_ioctl = fanotify_ioctl, | 434 | .unlocked_ioctl = fanotify_ioctl, |
435 | .compat_ioctl = fanotify_ioctl, | 435 | .compat_ioctl = fanotify_ioctl, |
436 | .llseek = noop_llseek, | ||
436 | }; | 437 | }; |
437 | 438 | ||
438 | static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) | 439 | static void fanotify_free_mark(struct fsnotify_mark *fsn_mark) |
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index 36802420d69a..4498a208df94 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c | |||
@@ -88,8 +88,6 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) | |||
88 | { | 88 | { |
89 | struct dentry *parent; | 89 | struct dentry *parent; |
90 | struct inode *p_inode; | 90 | struct inode *p_inode; |
91 | bool send = false; | ||
92 | bool should_update_children = false; | ||
93 | 91 | ||
94 | if (!dentry) | 92 | if (!dentry) |
95 | dentry = path->dentry; | 93 | dentry = path->dentry; |
@@ -97,29 +95,12 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) | |||
97 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) | 95 | if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED)) |
98 | return; | 96 | return; |
99 | 97 | ||
100 | spin_lock(&dentry->d_lock); | 98 | parent = dget_parent(dentry); |
101 | parent = dentry->d_parent; | ||
102 | p_inode = parent->d_inode; | 99 | p_inode = parent->d_inode; |
103 | 100 | ||
104 | if (fsnotify_inode_watches_children(p_inode)) { | 101 | if (unlikely(!fsnotify_inode_watches_children(p_inode))) |
105 | if (p_inode->i_fsnotify_mask & mask) { | 102 | __fsnotify_update_child_dentry_flags(p_inode); |
106 | dget(parent); | 103 | else if (p_inode->i_fsnotify_mask & mask) { |
107 | send = true; | ||
108 | } | ||
109 | } else { | ||
110 | /* | ||
111 | * The parent doesn't care about events on it's children but | ||
112 | * at least one child thought it did. We need to run all the | ||
113 | * children and update their d_flags to let them know p_inode | ||
114 | * doesn't care about them any more. | ||
115 | */ | ||
116 | dget(parent); | ||
117 | should_update_children = true; | ||
118 | } | ||
119 | |||
120 | spin_unlock(&dentry->d_lock); | ||
121 | |||
122 | if (send) { | ||
123 | /* we are notifying a parent so come up with the new mask which | 104 | /* we are notifying a parent so come up with the new mask which |
124 | * specifies these are events which came from a child. */ | 105 | * specifies these are events which came from a child. */ |
125 | mask |= FS_EVENT_ON_CHILD; | 106 | mask |= FS_EVENT_ON_CHILD; |
@@ -130,13 +111,9 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask) | |||
130 | else | 111 | else |
131 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, | 112 | fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE, |
132 | dentry->d_name.name, 0); | 113 | dentry->d_name.name, 0); |
133 | dput(parent); | ||
134 | } | 114 | } |
135 | 115 | ||
136 | if (unlikely(should_update_children)) { | 116 | dput(parent); |
137 | __fsnotify_update_child_dentry_flags(p_inode); | ||
138 | dput(parent); | ||
139 | } | ||
140 | } | 117 | } |
141 | EXPORT_SYMBOL_GPL(__fsnotify_parent); | 118 | EXPORT_SYMBOL_GPL(__fsnotify_parent); |
142 | 119 | ||
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 33297c005060..21ed10660b80 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c | |||
@@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
240 | { | 240 | { |
241 | struct inode *inode, *next_i, *need_iput = NULL; | 241 | struct inode *inode, *next_i, *need_iput = NULL; |
242 | 242 | ||
243 | spin_lock(&inode_lock); | ||
243 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { | 244 | list_for_each_entry_safe(inode, next_i, list, i_sb_list) { |
244 | struct inode *need_iput_tmp; | 245 | struct inode *need_iput_tmp; |
245 | 246 | ||
@@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list) | |||
297 | 298 | ||
298 | spin_lock(&inode_lock); | 299 | spin_lock(&inode_lock); |
299 | } | 300 | } |
301 | spin_unlock(&inode_lock); | ||
300 | } | 302 | } |
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index bf7f6d776c31..24edc1185d53 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c | |||
@@ -344,6 +344,7 @@ static const struct file_operations inotify_fops = { | |||
344 | .release = inotify_release, | 344 | .release = inotify_release, |
345 | .unlocked_ioctl = inotify_ioctl, | 345 | .unlocked_ioctl = inotify_ioctl, |
346 | .compat_ioctl = inotify_ioctl, | 346 | .compat_ioctl = inotify_ioctl, |
347 | .llseek = noop_llseek, | ||
347 | }; | 348 | }; |
348 | 349 | ||
349 | 350 | ||
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 512806171bfa..d3fbe5730bfc 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/buffer_head.h> | 30 | #include <linux/buffer_head.h> |
31 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
32 | #include <linux/moduleparam.h> | 32 | #include <linux/moduleparam.h> |
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/bitmap.h> | 33 | #include <linux/bitmap.h> |
35 | 34 | ||
36 | #include "sysctl.h" | 35 | #include "sysctl.h" |
@@ -445,7 +444,6 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
445 | 444 | ||
446 | ntfs_debug("Entering with remount options string: %s", opt); | 445 | ntfs_debug("Entering with remount options string: %s", opt); |
447 | 446 | ||
448 | lock_kernel(); | ||
449 | #ifndef NTFS_RW | 447 | #ifndef NTFS_RW |
450 | /* For read-only compiled driver, enforce read-only flag. */ | 448 | /* For read-only compiled driver, enforce read-only flag. */ |
451 | *flags |= MS_RDONLY; | 449 | *flags |= MS_RDONLY; |
@@ -469,18 +467,15 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
469 | if (NVolErrors(vol)) { | 467 | if (NVolErrors(vol)) { |
470 | ntfs_error(sb, "Volume has errors and is read-only%s", | 468 | ntfs_error(sb, "Volume has errors and is read-only%s", |
471 | es); | 469 | es); |
472 | unlock_kernel(); | ||
473 | return -EROFS; | 470 | return -EROFS; |
474 | } | 471 | } |
475 | if (vol->vol_flags & VOLUME_IS_DIRTY) { | 472 | if (vol->vol_flags & VOLUME_IS_DIRTY) { |
476 | ntfs_error(sb, "Volume is dirty and read-only%s", es); | 473 | ntfs_error(sb, "Volume is dirty and read-only%s", es); |
477 | unlock_kernel(); | ||
478 | return -EROFS; | 474 | return -EROFS; |
479 | } | 475 | } |
480 | if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { | 476 | if (vol->vol_flags & VOLUME_MODIFIED_BY_CHKDSK) { |
481 | ntfs_error(sb, "Volume has been modified by chkdsk " | 477 | ntfs_error(sb, "Volume has been modified by chkdsk " |
482 | "and is read-only%s", es); | 478 | "and is read-only%s", es); |
483 | unlock_kernel(); | ||
484 | return -EROFS; | 479 | return -EROFS; |
485 | } | 480 | } |
486 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { | 481 | if (vol->vol_flags & VOLUME_MUST_MOUNT_RO_MASK) { |
@@ -488,13 +483,11 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
488 | "(0x%x) and is read-only%s", | 483 | "(0x%x) and is read-only%s", |
489 | (unsigned)le16_to_cpu(vol->vol_flags), | 484 | (unsigned)le16_to_cpu(vol->vol_flags), |
490 | es); | 485 | es); |
491 | unlock_kernel(); | ||
492 | return -EROFS; | 486 | return -EROFS; |
493 | } | 487 | } |
494 | if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { | 488 | if (ntfs_set_volume_flags(vol, VOLUME_IS_DIRTY)) { |
495 | ntfs_error(sb, "Failed to set dirty bit in volume " | 489 | ntfs_error(sb, "Failed to set dirty bit in volume " |
496 | "information flags%s", es); | 490 | "information flags%s", es); |
497 | unlock_kernel(); | ||
498 | return -EROFS; | 491 | return -EROFS; |
499 | } | 492 | } |
500 | #if 0 | 493 | #if 0 |
@@ -514,21 +507,18 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
514 | ntfs_error(sb, "Failed to empty journal $LogFile%s", | 507 | ntfs_error(sb, "Failed to empty journal $LogFile%s", |
515 | es); | 508 | es); |
516 | NVolSetErrors(vol); | 509 | NVolSetErrors(vol); |
517 | unlock_kernel(); | ||
518 | return -EROFS; | 510 | return -EROFS; |
519 | } | 511 | } |
520 | if (!ntfs_mark_quotas_out_of_date(vol)) { | 512 | if (!ntfs_mark_quotas_out_of_date(vol)) { |
521 | ntfs_error(sb, "Failed to mark quotas out of date%s", | 513 | ntfs_error(sb, "Failed to mark quotas out of date%s", |
522 | es); | 514 | es); |
523 | NVolSetErrors(vol); | 515 | NVolSetErrors(vol); |
524 | unlock_kernel(); | ||
525 | return -EROFS; | 516 | return -EROFS; |
526 | } | 517 | } |
527 | if (!ntfs_stamp_usnjrnl(vol)) { | 518 | if (!ntfs_stamp_usnjrnl(vol)) { |
528 | ntfs_error(sb, "Failed to stamp transation log " | 519 | ntfs_error(sb, "Failed to stamp transation log " |
529 | "($UsnJrnl)%s", es); | 520 | "($UsnJrnl)%s", es); |
530 | NVolSetErrors(vol); | 521 | NVolSetErrors(vol); |
531 | unlock_kernel(); | ||
532 | return -EROFS; | 522 | return -EROFS; |
533 | } | 523 | } |
534 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | 524 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { |
@@ -544,11 +534,9 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
544 | 534 | ||
545 | // TODO: Deal with *flags. | 535 | // TODO: Deal with *flags. |
546 | 536 | ||
547 | if (!parse_options(vol, opt)) { | 537 | if (!parse_options(vol, opt)) |
548 | unlock_kernel(); | ||
549 | return -EINVAL; | 538 | return -EINVAL; |
550 | } | 539 | |
551 | unlock_kernel(); | ||
552 | ntfs_debug("Done."); | 540 | ntfs_debug("Done."); |
553 | return 0; | 541 | return 0; |
554 | } | 542 | } |
@@ -2261,8 +2249,6 @@ static void ntfs_put_super(struct super_block *sb) | |||
2261 | 2249 | ||
2262 | ntfs_debug("Entering."); | 2250 | ntfs_debug("Entering."); |
2263 | 2251 | ||
2264 | lock_kernel(); | ||
2265 | |||
2266 | #ifdef NTFS_RW | 2252 | #ifdef NTFS_RW |
2267 | /* | 2253 | /* |
2268 | * Commit all inodes while they are still open in case some of them | 2254 | * Commit all inodes while they are still open in case some of them |
@@ -2433,8 +2419,6 @@ static void ntfs_put_super(struct super_block *sb) | |||
2433 | 2419 | ||
2434 | sb->s_fs_info = NULL; | 2420 | sb->s_fs_info = NULL; |
2435 | kfree(vol); | 2421 | kfree(vol); |
2436 | |||
2437 | unlock_kernel(); | ||
2438 | } | 2422 | } |
2439 | 2423 | ||
2440 | /** | 2424 | /** |
@@ -2772,8 +2756,6 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2772 | init_rwsem(&vol->mftbmp_lock); | 2756 | init_rwsem(&vol->mftbmp_lock); |
2773 | init_rwsem(&vol->lcnbmp_lock); | 2757 | init_rwsem(&vol->lcnbmp_lock); |
2774 | 2758 | ||
2775 | unlock_kernel(); | ||
2776 | |||
2777 | /* By default, enable sparse support. */ | 2759 | /* By default, enable sparse support. */ |
2778 | NVolSetSparseEnabled(vol); | 2760 | NVolSetSparseEnabled(vol); |
2779 | 2761 | ||
@@ -2929,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2929 | goto unl_upcase_iput_tmp_ino_err_out_now; | 2911 | goto unl_upcase_iput_tmp_ino_err_out_now; |
2930 | } | 2912 | } |
2931 | if ((sb->s_root = d_alloc_root(vol->root_ino))) { | 2913 | if ((sb->s_root = d_alloc_root(vol->root_ino))) { |
2932 | /* We increment i_count simulating an ntfs_iget(). */ | 2914 | /* We grab a reference, simulating an ntfs_iget(). */ |
2933 | atomic_inc(&vol->root_ino->i_count); | 2915 | ihold(vol->root_ino); |
2934 | ntfs_debug("Exiting, status successful."); | 2916 | ntfs_debug("Exiting, status successful."); |
2935 | /* Release the default upcase if it has no users. */ | 2917 | /* Release the default upcase if it has no users. */ |
2936 | mutex_lock(&ntfs_lock); | 2918 | mutex_lock(&ntfs_lock); |
@@ -2940,7 +2922,6 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) | |||
2940 | } | 2922 | } |
2941 | mutex_unlock(&ntfs_lock); | 2923 | mutex_unlock(&ntfs_lock); |
2942 | sb->s_export_op = &ntfs_export_ops; | 2924 | sb->s_export_op = &ntfs_export_ops; |
2943 | lock_kernel(); | ||
2944 | lockdep_on(); | 2925 | lockdep_on(); |
2945 | return 0; | 2926 | return 0; |
2946 | } | 2927 | } |
@@ -3040,24 +3021,8 @@ iput_tmp_ino_err_out_now: | |||
3040 | if (vol->mft_ino && vol->mft_ino != tmp_ino) | 3021 | if (vol->mft_ino && vol->mft_ino != tmp_ino) |
3041 | iput(vol->mft_ino); | 3022 | iput(vol->mft_ino); |
3042 | vol->mft_ino = NULL; | 3023 | vol->mft_ino = NULL; |
3043 | /* | ||
3044 | * This is needed to get ntfs_clear_extent_inode() called for each | ||
3045 | * inode we have ever called ntfs_iget()/iput() on, otherwise we A) | ||
3046 | * leak resources and B) a subsequent mount fails automatically due to | ||
3047 | * ntfs_iget() never calling down into our ntfs_read_locked_inode() | ||
3048 | * method again... FIXME: Do we need to do this twice now because of | ||
3049 | * attribute inodes? I think not, so leave as is for now... (AIA) | ||
3050 | */ | ||
3051 | if (invalidate_inodes(sb)) { | ||
3052 | ntfs_error(sb, "Busy inodes left. This is most likely a NTFS " | ||
3053 | "driver bug."); | ||
3054 | /* Copied from fs/super.c. I just love this message. (-; */ | ||
3055 | printk("NTFS: Busy inodes after umount. Self-destruct in 5 " | ||
3056 | "seconds. Have a nice day...\n"); | ||
3057 | } | ||
3058 | /* Errors at this stage are irrelevant. */ | 3024 | /* Errors at this stage are irrelevant. */ |
3059 | err_out_now: | 3025 | err_out_now: |
3060 | lock_kernel(); | ||
3061 | sb->s_fs_info = NULL; | 3026 | sb->s_fs_info = NULL; |
3062 | kfree(vol); | 3027 | kfree(vol); |
3063 | ntfs_debug("Failed, returning -EINVAL."); | 3028 | ntfs_debug("Failed, returning -EINVAL."); |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa5cd3f..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
209 | } | 209 | } |
210 | 210 | ||
211 | inode->i_mode = new_mode; | 211 | inode->i_mode = new_mode; |
212 | inode->i_ctime = CURRENT_TIME; | ||
212 | di->i_mode = cpu_to_le16(inode->i_mode); | 213 | di->i_mode = cpu_to_le16(inode->i_mode); |
214 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
215 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
213 | 216 | ||
214 | ocfs2_journal_dirty(handle, di_bh); | 217 | ocfs2_journal_dirty(handle, di_bh); |
215 | 218 | ||
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 0de69c9a08be..f1e962cb3b73 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, | |||
165 | * ocfs2 never allocates in this function - the only time we | 165 | * ocfs2 never allocates in this function - the only time we |
166 | * need to use BH_New is when we're extending i_size on a file | 166 | * need to use BH_New is when we're extending i_size on a file |
167 | * system which doesn't support holes, in which case BH_New | 167 | * system which doesn't support holes, in which case BH_New |
168 | * allows block_prepare_write() to zero. | 168 | * allows __block_write_begin() to zero. |
169 | * | 169 | * |
170 | * If we see this on a sparse file system, then a truncate has | 170 | * If we see this on a sparse file system, then a truncate has |
171 | * raced us and removed the cluster. In this case, we clear | 171 | * raced us and removed the cluster. In this case, we clear |
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) | |||
407 | return ret; | 407 | return ret; |
408 | } | 408 | } |
409 | 409 | ||
410 | /* | ||
411 | * This is called from ocfs2_write_zero_page() which has handled it's | ||
412 | * own cluster locking and has ensured allocation exists for those | ||
413 | * blocks to be written. | ||
414 | */ | ||
415 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, | ||
416 | unsigned from, unsigned to) | ||
417 | { | ||
418 | int ret; | ||
419 | |||
420 | ret = block_prepare_write(page, from, to, ocfs2_get_block); | ||
421 | |||
422 | return ret; | ||
423 | } | ||
424 | |||
425 | /* Taken from ext3. We don't necessarily need the full blown | 410 | /* Taken from ext3. We don't necessarily need the full blown |
426 | * functionality yet, but IMHO it's better to cut and paste the whole | 411 | * functionality yet, but IMHO it's better to cut and paste the whole |
427 | * thing so we can avoid introducing our own bugs (and easily pick up | 412 | * thing so we can avoid introducing our own bugs (and easily pick up |
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page, | |||
732 | } | 717 | } |
733 | 718 | ||
734 | /* | 719 | /* |
735 | * Some of this taken from block_prepare_write(). We already have our | 720 | * Some of this taken from __block_write_begin(). We already have our |
736 | * mapping by now though, and the entire write will be allocating or | 721 | * mapping by now though, and the entire write will be allocating or |
737 | * it won't, so not much need to use BH_New. | 722 | * it won't, so not much need to use BH_New. |
738 | * | 723 | * |
@@ -883,8 +868,8 @@ struct ocfs2_write_ctxt { | |||
883 | * out in so that future reads from that region will get | 868 | * out in so that future reads from that region will get |
884 | * zero's. | 869 | * zero's. |
885 | */ | 870 | */ |
886 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
887 | unsigned int w_num_pages; | 871 | unsigned int w_num_pages; |
872 | struct page *w_pages[OCFS2_MAX_CTXT_PAGES]; | ||
888 | struct page *w_target_page; | 873 | struct page *w_target_page; |
889 | 874 | ||
890 | /* | 875 | /* |
@@ -1642,7 +1627,8 @@ static int ocfs2_zero_tail(struct inode *inode, struct buffer_head *di_bh, | |||
1642 | return ret; | 1627 | return ret; |
1643 | } | 1628 | } |
1644 | 1629 | ||
1645 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 1630 | int ocfs2_write_begin_nolock(struct file *filp, |
1631 | struct address_space *mapping, | ||
1646 | loff_t pos, unsigned len, unsigned flags, | 1632 | loff_t pos, unsigned len, unsigned flags, |
1647 | struct page **pagep, void **fsdata, | 1633 | struct page **pagep, void **fsdata, |
1648 | struct buffer_head *di_bh, struct page *mmap_page) | 1634 | struct buffer_head *di_bh, struct page *mmap_page) |
@@ -1692,7 +1678,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
1692 | mlog_errno(ret); | 1678 | mlog_errno(ret); |
1693 | goto out; | 1679 | goto out; |
1694 | } else if (ret == 1) { | 1680 | } else if (ret == 1) { |
1695 | ret = ocfs2_refcount_cow(inode, di_bh, | 1681 | ret = ocfs2_refcount_cow(inode, filp, di_bh, |
1696 | wc->w_cpos, wc->w_clen, UINT_MAX); | 1682 | wc->w_cpos, wc->w_clen, UINT_MAX); |
1697 | if (ret) { | 1683 | if (ret) { |
1698 | mlog_errno(ret); | 1684 | mlog_errno(ret); |
@@ -1854,7 +1840,7 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping, | |||
1854 | */ | 1840 | */ |
1855 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 1841 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
1856 | 1842 | ||
1857 | ret = ocfs2_write_begin_nolock(mapping, pos, len, flags, pagep, | 1843 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep, |
1858 | fsdata, di_bh, NULL); | 1844 | fsdata, di_bh, NULL); |
1859 | if (ret) { | 1845 | if (ret) { |
1860 | mlog_errno(ret); | 1846 | mlog_errno(ret); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index c48e93ffc513..76bfdfda691a 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -22,9 +22,6 @@ | |||
22 | #ifndef OCFS2_AOPS_H | 22 | #ifndef OCFS2_AOPS_H |
23 | #define OCFS2_AOPS_H | 23 | #define OCFS2_AOPS_H |
24 | 24 | ||
25 | int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, | ||
26 | unsigned from, unsigned to); | ||
27 | |||
28 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | 25 | handle_t *ocfs2_start_walk_page_trans(struct inode *inode, |
29 | struct page *page, | 26 | struct page *page, |
30 | unsigned from, | 27 | unsigned from, |
@@ -48,7 +45,8 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
48 | loff_t pos, unsigned len, unsigned copied, | 45 | loff_t pos, unsigned len, unsigned copied, |
49 | struct page *page, void *fsdata); | 46 | struct page *page, void *fsdata); |
50 | 47 | ||
51 | int ocfs2_write_begin_nolock(struct address_space *mapping, | 48 | int ocfs2_write_begin_nolock(struct file *filp, |
49 | struct address_space *mapping, | ||
52 | loff_t pos, unsigned len, unsigned flags, | 50 | loff_t pos, unsigned len, unsigned flags, |
53 | struct page **pagep, void **fsdata, | 51 | struct page **pagep, void **fsdata, |
54 | struct buffer_head *di_bh, struct page *mmap_page); | 52 | struct buffer_head *di_bh, struct page *mmap_page); |
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..52c7557f3e25 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c | |||
@@ -62,10 +62,51 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | |||
62 | static LIST_HEAD(o2hb_node_events); | 62 | static LIST_HEAD(o2hb_node_events); |
63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); | 63 | static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); |
64 | 64 | ||
65 | /* | ||
66 | * In global heartbeat, we maintain a series of region bitmaps. | ||
67 | * - o2hb_region_bitmap allows us to limit the region number to max region. | ||
68 | * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). | ||
69 | * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes | ||
70 | * heartbeat on it. | ||
71 | * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. | ||
72 | */ | ||
73 | static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
74 | static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
75 | static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
76 | static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; | ||
77 | |||
78 | #define O2HB_DB_TYPE_LIVENODES 0 | ||
79 | #define O2HB_DB_TYPE_LIVEREGIONS 1 | ||
80 | #define O2HB_DB_TYPE_QUORUMREGIONS 2 | ||
81 | #define O2HB_DB_TYPE_FAILEDREGIONS 3 | ||
82 | #define O2HB_DB_TYPE_REGION_LIVENODES 4 | ||
83 | #define O2HB_DB_TYPE_REGION_NUMBER 5 | ||
84 | #define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 | ||
85 | struct o2hb_debug_buf { | ||
86 | int db_type; | ||
87 | int db_size; | ||
88 | int db_len; | ||
89 | void *db_data; | ||
90 | }; | ||
91 | |||
92 | static struct o2hb_debug_buf *o2hb_db_livenodes; | ||
93 | static struct o2hb_debug_buf *o2hb_db_liveregions; | ||
94 | static struct o2hb_debug_buf *o2hb_db_quorumregions; | ||
95 | static struct o2hb_debug_buf *o2hb_db_failedregions; | ||
96 | |||
65 | #define O2HB_DEBUG_DIR "o2hb" | 97 | #define O2HB_DEBUG_DIR "o2hb" |
66 | #define O2HB_DEBUG_LIVENODES "livenodes" | 98 | #define O2HB_DEBUG_LIVENODES "livenodes" |
99 | #define O2HB_DEBUG_LIVEREGIONS "live_regions" | ||
100 | #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" | ||
101 | #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" | ||
102 | #define O2HB_DEBUG_REGION_NUMBER "num" | ||
103 | #define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" | ||
104 | |||
67 | static struct dentry *o2hb_debug_dir; | 105 | static struct dentry *o2hb_debug_dir; |
68 | static struct dentry *o2hb_debug_livenodes; | 106 | static struct dentry *o2hb_debug_livenodes; |
107 | static struct dentry *o2hb_debug_liveregions; | ||
108 | static struct dentry *o2hb_debug_quorumregions; | ||
109 | static struct dentry *o2hb_debug_failedregions; | ||
69 | 110 | ||
70 | static LIST_HEAD(o2hb_all_regions); | 111 | static LIST_HEAD(o2hb_all_regions); |
71 | 112 | ||
@@ -77,7 +118,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); | |||
77 | 118 | ||
78 | #define O2HB_DEFAULT_BLOCK_BITS 9 | 119 | #define O2HB_DEFAULT_BLOCK_BITS 9 |
79 | 120 | ||
121 | enum o2hb_heartbeat_modes { | ||
122 | O2HB_HEARTBEAT_LOCAL = 0, | ||
123 | O2HB_HEARTBEAT_GLOBAL, | ||
124 | O2HB_HEARTBEAT_NUM_MODES, | ||
125 | }; | ||
126 | |||
127 | char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { | ||
128 | "local", /* O2HB_HEARTBEAT_LOCAL */ | ||
129 | "global", /* O2HB_HEARTBEAT_GLOBAL */ | ||
130 | }; | ||
131 | |||
80 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; | 132 | unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; |
133 | unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; | ||
81 | 134 | ||
82 | /* Only sets a new threshold if there are no active regions. | 135 | /* Only sets a new threshold if there are no active regions. |
83 | * | 136 | * |
@@ -94,6 +147,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) | |||
94 | } | 147 | } |
95 | } | 148 | } |
96 | 149 | ||
150 | static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) | ||
151 | { | ||
152 | int ret = -1; | ||
153 | |||
154 | if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { | ||
155 | spin_lock(&o2hb_live_lock); | ||
156 | if (list_empty(&o2hb_all_regions)) { | ||
157 | o2hb_heartbeat_mode = hb_mode; | ||
158 | ret = 0; | ||
159 | } | ||
160 | spin_unlock(&o2hb_live_lock); | ||
161 | } | ||
162 | |||
163 | return ret; | ||
164 | } | ||
165 | |||
97 | struct o2hb_node_event { | 166 | struct o2hb_node_event { |
98 | struct list_head hn_item; | 167 | struct list_head hn_item; |
99 | enum o2hb_callback_type hn_event_type; | 168 | enum o2hb_callback_type hn_event_type; |
@@ -135,6 +204,18 @@ struct o2hb_region { | |||
135 | struct block_device *hr_bdev; | 204 | struct block_device *hr_bdev; |
136 | struct o2hb_disk_slot *hr_slots; | 205 | struct o2hb_disk_slot *hr_slots; |
137 | 206 | ||
207 | /* live node map of this region */ | ||
208 | unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
209 | unsigned int hr_region_num; | ||
210 | |||
211 | struct dentry *hr_debug_dir; | ||
212 | struct dentry *hr_debug_livenodes; | ||
213 | struct dentry *hr_debug_regnum; | ||
214 | struct dentry *hr_debug_elapsed_time; | ||
215 | struct o2hb_debug_buf *hr_db_livenodes; | ||
216 | struct o2hb_debug_buf *hr_db_regnum; | ||
217 | struct o2hb_debug_buf *hr_db_elapsed_time; | ||
218 | |||
138 | /* let the person setting up hb wait for it to return until it | 219 | /* let the person setting up hb wait for it to return until it |
139 | * has reached a 'steady' state. This will be fixed when we have | 220 | * has reached a 'steady' state. This will be fixed when we have |
140 | * a more complete api that doesn't lead to this sort of fragility. */ | 221 | * a more complete api that doesn't lead to this sort of fragility. */ |
@@ -163,8 +244,19 @@ struct o2hb_bio_wait_ctxt { | |||
163 | int wc_error; | 244 | int wc_error; |
164 | }; | 245 | }; |
165 | 246 | ||
247 | static int o2hb_pop_count(void *map, int count) | ||
248 | { | ||
249 | int i = -1, pop = 0; | ||
250 | |||
251 | while ((i = find_next_bit(map, count, i + 1)) < count) | ||
252 | pop++; | ||
253 | return pop; | ||
254 | } | ||
255 | |||
166 | static void o2hb_write_timeout(struct work_struct *work) | 256 | static void o2hb_write_timeout(struct work_struct *work) |
167 | { | 257 | { |
258 | int failed, quorum; | ||
259 | unsigned long flags; | ||
168 | struct o2hb_region *reg = | 260 | struct o2hb_region *reg = |
169 | container_of(work, struct o2hb_region, | 261 | container_of(work, struct o2hb_region, |
170 | hr_write_timeout_work.work); | 262 | hr_write_timeout_work.work); |
@@ -172,6 +264,28 @@ static void o2hb_write_timeout(struct work_struct *work) | |||
172 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " | 264 | mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " |
173 | "milliseconds\n", reg->hr_dev_name, | 265 | "milliseconds\n", reg->hr_dev_name, |
174 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); | 266 | jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); |
267 | |||
268 | if (o2hb_global_heartbeat_active()) { | ||
269 | spin_lock_irqsave(&o2hb_live_lock, flags); | ||
270 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
271 | set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
272 | failed = o2hb_pop_count(&o2hb_failed_region_bitmap, | ||
273 | O2NM_MAX_REGIONS); | ||
274 | quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, | ||
275 | O2NM_MAX_REGIONS); | ||
276 | spin_unlock_irqrestore(&o2hb_live_lock, flags); | ||
277 | |||
278 | mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", | ||
279 | quorum, failed); | ||
280 | |||
281 | /* | ||
282 | * Fence if the number of failed regions >= half the number | ||
283 | * of quorum regions | ||
284 | */ | ||
285 | if ((failed << 1) < quorum) | ||
286 | return; | ||
287 | } | ||
288 | |||
175 | o2quo_disk_timeout(); | 289 | o2quo_disk_timeout(); |
176 | } | 290 | } |
177 | 291 | ||
@@ -180,6 +294,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) | |||
180 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", | 294 | mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", |
181 | O2HB_MAX_WRITE_TIMEOUT_MS); | 295 | O2HB_MAX_WRITE_TIMEOUT_MS); |
182 | 296 | ||
297 | if (o2hb_global_heartbeat_active()) { | ||
298 | spin_lock(&o2hb_live_lock); | ||
299 | clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); | ||
300 | spin_unlock(&o2hb_live_lock); | ||
301 | } | ||
183 | cancel_delayed_work(®->hr_write_timeout_work); | 302 | cancel_delayed_work(®->hr_write_timeout_work); |
184 | reg->hr_last_timeout_start = jiffies; | 303 | reg->hr_last_timeout_start = jiffies; |
185 | schedule_delayed_work(®->hr_write_timeout_work, | 304 | schedule_delayed_work(®->hr_write_timeout_work, |
@@ -513,6 +632,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, | |||
513 | { | 632 | { |
514 | assert_spin_locked(&o2hb_live_lock); | 633 | assert_spin_locked(&o2hb_live_lock); |
515 | 634 | ||
635 | BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); | ||
636 | |||
516 | event->hn_event_type = type; | 637 | event->hn_event_type = type; |
517 | event->hn_node = node; | 638 | event->hn_node = node; |
518 | event->hn_node_num = node_num; | 639 | event->hn_node_num = node_num; |
@@ -554,6 +675,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) | |||
554 | o2nm_node_put(node); | 675 | o2nm_node_put(node); |
555 | } | 676 | } |
556 | 677 | ||
678 | static void o2hb_set_quorum_device(struct o2hb_region *reg, | ||
679 | struct o2hb_disk_slot *slot) | ||
680 | { | ||
681 | assert_spin_locked(&o2hb_live_lock); | ||
682 | |||
683 | if (!o2hb_global_heartbeat_active()) | ||
684 | return; | ||
685 | |||
686 | if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) | ||
687 | return; | ||
688 | |||
689 | /* | ||
690 | * A region can be added to the quorum only when it sees all | ||
691 | * live nodes heartbeat on it. In other words, the region has been | ||
692 | * added to all nodes. | ||
693 | */ | ||
694 | if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, | ||
695 | sizeof(o2hb_live_node_bitmap))) | ||
696 | return; | ||
697 | |||
698 | if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) | ||
699 | return; | ||
700 | |||
701 | printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", | ||
702 | config_item_name(®->hr_item)); | ||
703 | |||
704 | set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); | ||
705 | } | ||
706 | |||
557 | static int o2hb_check_slot(struct o2hb_region *reg, | 707 | static int o2hb_check_slot(struct o2hb_region *reg, |
558 | struct o2hb_disk_slot *slot) | 708 | struct o2hb_disk_slot *slot) |
559 | { | 709 | { |
@@ -565,14 +715,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, | |||
565 | u64 cputime; | 715 | u64 cputime; |
566 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; | 716 | unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; |
567 | unsigned int slot_dead_ms; | 717 | unsigned int slot_dead_ms; |
718 | int tmp; | ||
568 | 719 | ||
569 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); | 720 | memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); |
570 | 721 | ||
571 | /* Is this correct? Do we assume that the node doesn't exist | 722 | /* |
572 | * if we're not configured for him? */ | 723 | * If a node is no longer configured but is still in the livemap, we |
724 | * may need to clear that bit from the livemap. | ||
725 | */ | ||
573 | node = o2nm_get_node_by_num(slot->ds_node_num); | 726 | node = o2nm_get_node_by_num(slot->ds_node_num); |
574 | if (!node) | 727 | if (!node) { |
575 | return 0; | 728 | spin_lock(&o2hb_live_lock); |
729 | tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); | ||
730 | spin_unlock(&o2hb_live_lock); | ||
731 | if (!tmp) | ||
732 | return 0; | ||
733 | } | ||
576 | 734 | ||
577 | if (!o2hb_verify_crc(reg, hb_block)) { | 735 | if (!o2hb_verify_crc(reg, hb_block)) { |
578 | /* all paths from here will drop o2hb_live_lock for | 736 | /* all paths from here will drop o2hb_live_lock for |
@@ -639,8 +797,12 @@ fire_callbacks: | |||
639 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", | 797 | mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", |
640 | slot->ds_node_num, (long long)slot->ds_last_generation); | 798 | slot->ds_node_num, (long long)slot->ds_last_generation); |
641 | 799 | ||
800 | set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
801 | |||
642 | /* first on the list generates a callback */ | 802 | /* first on the list generates a callback */ |
643 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 803 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
804 | mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " | ||
805 | "bitmap\n", slot->ds_node_num); | ||
644 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 806 | set_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
645 | 807 | ||
646 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, | 808 | o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, |
@@ -684,13 +846,18 @@ fire_callbacks: | |||
684 | mlog(ML_HEARTBEAT, "Node %d left my region\n", | 846 | mlog(ML_HEARTBEAT, "Node %d left my region\n", |
685 | slot->ds_node_num); | 847 | slot->ds_node_num); |
686 | 848 | ||
849 | clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); | ||
850 | |||
687 | /* last off the live_slot generates a callback */ | 851 | /* last off the live_slot generates a callback */ |
688 | list_del_init(&slot->ds_live_item); | 852 | list_del_init(&slot->ds_live_item); |
689 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { | 853 | if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { |
854 | mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " | ||
855 | "nodes bitmap\n", slot->ds_node_num); | ||
690 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); | 856 | clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); |
691 | 857 | ||
692 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, | 858 | /* node can be null */ |
693 | slot->ds_node_num); | 859 | o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, |
860 | node, slot->ds_node_num); | ||
694 | 861 | ||
695 | changed = 1; | 862 | changed = 1; |
696 | } | 863 | } |
@@ -706,11 +873,14 @@ fire_callbacks: | |||
706 | slot->ds_equal_samples = 0; | 873 | slot->ds_equal_samples = 0; |
707 | } | 874 | } |
708 | out: | 875 | out: |
876 | o2hb_set_quorum_device(reg, slot); | ||
877 | |||
709 | spin_unlock(&o2hb_live_lock); | 878 | spin_unlock(&o2hb_live_lock); |
710 | 879 | ||
711 | o2hb_run_event_list(&event); | 880 | o2hb_run_event_list(&event); |
712 | 881 | ||
713 | o2nm_node_put(node); | 882 | if (node) |
883 | o2nm_node_put(node); | ||
714 | return changed; | 884 | return changed; |
715 | } | 885 | } |
716 | 886 | ||
@@ -737,6 +907,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
737 | { | 907 | { |
738 | int i, ret, highest_node, change = 0; | 908 | int i, ret, highest_node, change = 0; |
739 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 909 | unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
910 | unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; | ||
740 | struct o2hb_bio_wait_ctxt write_wc; | 911 | struct o2hb_bio_wait_ctxt write_wc; |
741 | 912 | ||
742 | ret = o2nm_configured_node_map(configured_nodes, | 913 | ret = o2nm_configured_node_map(configured_nodes, |
@@ -746,6 +917,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) | |||
746 | return ret; | 917 | return ret; |
747 | } | 918 | } |
748 | 919 | ||
920 | /* | ||
921 | * If a node is not configured but is in the livemap, we still need | ||
922 | * to read the slot so as to be able to remove it from the livemap. | ||
923 | */ | ||
924 | o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); | ||
925 | i = -1; | ||
926 | while ((i = find_next_bit(live_node_bitmap, | ||
927 | O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { | ||
928 | set_bit(i, configured_nodes); | ||
929 | } | ||
930 | |||
749 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); | 931 | highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); |
750 | if (highest_node >= O2NM_MAX_NODES) { | 932 | if (highest_node >= O2NM_MAX_NODES) { |
751 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); | 933 | mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); |
@@ -917,21 +1099,59 @@ static int o2hb_thread(void *data) | |||
917 | #ifdef CONFIG_DEBUG_FS | 1099 | #ifdef CONFIG_DEBUG_FS |
918 | static int o2hb_debug_open(struct inode *inode, struct file *file) | 1100 | static int o2hb_debug_open(struct inode *inode, struct file *file) |
919 | { | 1101 | { |
1102 | struct o2hb_debug_buf *db = inode->i_private; | ||
1103 | struct o2hb_region *reg; | ||
920 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; | 1104 | unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; |
921 | char *buf = NULL; | 1105 | char *buf = NULL; |
922 | int i = -1; | 1106 | int i = -1; |
923 | int out = 0; | 1107 | int out = 0; |
924 | 1108 | ||
1109 | /* max_nodes should be the largest bitmap we pass here */ | ||
1110 | BUG_ON(sizeof(map) < db->db_size); | ||
1111 | |||
925 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); | 1112 | buf = kmalloc(PAGE_SIZE, GFP_KERNEL); |
926 | if (!buf) | 1113 | if (!buf) |
927 | goto bail; | 1114 | goto bail; |
928 | 1115 | ||
929 | o2hb_fill_node_map(map, sizeof(map)); | 1116 | switch (db->db_type) { |
1117 | case O2HB_DB_TYPE_LIVENODES: | ||
1118 | case O2HB_DB_TYPE_LIVEREGIONS: | ||
1119 | case O2HB_DB_TYPE_QUORUMREGIONS: | ||
1120 | case O2HB_DB_TYPE_FAILEDREGIONS: | ||
1121 | spin_lock(&o2hb_live_lock); | ||
1122 | memcpy(map, db->db_data, db->db_size); | ||
1123 | spin_unlock(&o2hb_live_lock); | ||
1124 | break; | ||
1125 | |||
1126 | case O2HB_DB_TYPE_REGION_LIVENODES: | ||
1127 | spin_lock(&o2hb_live_lock); | ||
1128 | reg = (struct o2hb_region *)db->db_data; | ||
1129 | memcpy(map, reg->hr_live_node_bitmap, db->db_size); | ||
1130 | spin_unlock(&o2hb_live_lock); | ||
1131 | break; | ||
1132 | |||
1133 | case O2HB_DB_TYPE_REGION_NUMBER: | ||
1134 | reg = (struct o2hb_region *)db->db_data; | ||
1135 | out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", | ||
1136 | reg->hr_region_num); | ||
1137 | goto done; | ||
1138 | |||
1139 | case O2HB_DB_TYPE_REGION_ELAPSED_TIME: | ||
1140 | reg = (struct o2hb_region *)db->db_data; | ||
1141 | out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", | ||
1142 | jiffies_to_msecs(jiffies - | ||
1143 | reg->hr_last_timeout_start)); | ||
1144 | goto done; | ||
1145 | |||
1146 | default: | ||
1147 | goto done; | ||
1148 | } | ||
930 | 1149 | ||
931 | while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) | 1150 | while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) |
932 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); | 1151 | out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); |
933 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); | 1152 | out += snprintf(buf + out, PAGE_SIZE - out, "\n"); |
934 | 1153 | ||
1154 | done: | ||
935 | i_size_write(inode, out); | 1155 | i_size_write(inode, out); |
936 | 1156 | ||
937 | file->private_data = buf; | 1157 | file->private_data = buf; |
@@ -978,10 +1198,104 @@ static const struct file_operations o2hb_debug_fops = { | |||
978 | 1198 | ||
979 | void o2hb_exit(void) | 1199 | void o2hb_exit(void) |
980 | { | 1200 | { |
981 | if (o2hb_debug_livenodes) | 1201 | kfree(o2hb_db_livenodes); |
982 | debugfs_remove(o2hb_debug_livenodes); | 1202 | kfree(o2hb_db_liveregions); |
983 | if (o2hb_debug_dir) | 1203 | kfree(o2hb_db_quorumregions); |
984 | debugfs_remove(o2hb_debug_dir); | 1204 | kfree(o2hb_db_failedregions); |
1205 | debugfs_remove(o2hb_debug_failedregions); | ||
1206 | debugfs_remove(o2hb_debug_quorumregions); | ||
1207 | debugfs_remove(o2hb_debug_liveregions); | ||
1208 | debugfs_remove(o2hb_debug_livenodes); | ||
1209 | debugfs_remove(o2hb_debug_dir); | ||
1210 | } | ||
1211 | |||
1212 | static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, | ||
1213 | struct o2hb_debug_buf **db, int db_len, | ||
1214 | int type, int size, int len, void *data) | ||
1215 | { | ||
1216 | *db = kmalloc(db_len, GFP_KERNEL); | ||
1217 | if (!*db) | ||
1218 | return NULL; | ||
1219 | |||
1220 | (*db)->db_type = type; | ||
1221 | (*db)->db_size = size; | ||
1222 | (*db)->db_len = len; | ||
1223 | (*db)->db_data = data; | ||
1224 | |||
1225 | return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, | ||
1226 | &o2hb_debug_fops); | ||
1227 | } | ||
1228 | |||
1229 | static int o2hb_debug_init(void) | ||
1230 | { | ||
1231 | int ret = -ENOMEM; | ||
1232 | |||
1233 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | ||
1234 | if (!o2hb_debug_dir) { | ||
1235 | mlog_errno(ret); | ||
1236 | goto bail; | ||
1237 | } | ||
1238 | |||
1239 | o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1240 | o2hb_debug_dir, | ||
1241 | &o2hb_db_livenodes, | ||
1242 | sizeof(*o2hb_db_livenodes), | ||
1243 | O2HB_DB_TYPE_LIVENODES, | ||
1244 | sizeof(o2hb_live_node_bitmap), | ||
1245 | O2NM_MAX_NODES, | ||
1246 | o2hb_live_node_bitmap); | ||
1247 | if (!o2hb_debug_livenodes) { | ||
1248 | mlog_errno(ret); | ||
1249 | goto bail; | ||
1250 | } | ||
1251 | |||
1252 | o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, | ||
1253 | o2hb_debug_dir, | ||
1254 | &o2hb_db_liveregions, | ||
1255 | sizeof(*o2hb_db_liveregions), | ||
1256 | O2HB_DB_TYPE_LIVEREGIONS, | ||
1257 | sizeof(o2hb_live_region_bitmap), | ||
1258 | O2NM_MAX_REGIONS, | ||
1259 | o2hb_live_region_bitmap); | ||
1260 | if (!o2hb_debug_liveregions) { | ||
1261 | mlog_errno(ret); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | o2hb_debug_quorumregions = | ||
1266 | o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, | ||
1267 | o2hb_debug_dir, | ||
1268 | &o2hb_db_quorumregions, | ||
1269 | sizeof(*o2hb_db_quorumregions), | ||
1270 | O2HB_DB_TYPE_QUORUMREGIONS, | ||
1271 | sizeof(o2hb_quorum_region_bitmap), | ||
1272 | O2NM_MAX_REGIONS, | ||
1273 | o2hb_quorum_region_bitmap); | ||
1274 | if (!o2hb_debug_quorumregions) { | ||
1275 | mlog_errno(ret); | ||
1276 | goto bail; | ||
1277 | } | ||
1278 | |||
1279 | o2hb_debug_failedregions = | ||
1280 | o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, | ||
1281 | o2hb_debug_dir, | ||
1282 | &o2hb_db_failedregions, | ||
1283 | sizeof(*o2hb_db_failedregions), | ||
1284 | O2HB_DB_TYPE_FAILEDREGIONS, | ||
1285 | sizeof(o2hb_failed_region_bitmap), | ||
1286 | O2NM_MAX_REGIONS, | ||
1287 | o2hb_failed_region_bitmap); | ||
1288 | if (!o2hb_debug_failedregions) { | ||
1289 | mlog_errno(ret); | ||
1290 | goto bail; | ||
1291 | } | ||
1292 | |||
1293 | ret = 0; | ||
1294 | bail: | ||
1295 | if (ret) | ||
1296 | o2hb_exit(); | ||
1297 | |||
1298 | return ret; | ||
985 | } | 1299 | } |
986 | 1300 | ||
987 | int o2hb_init(void) | 1301 | int o2hb_init(void) |
@@ -997,24 +1311,12 @@ int o2hb_init(void) | |||
997 | INIT_LIST_HEAD(&o2hb_node_events); | 1311 | INIT_LIST_HEAD(&o2hb_node_events); |
998 | 1312 | ||
999 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); | 1313 | memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); |
1314 | memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); | ||
1315 | memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); | ||
1316 | memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); | ||
1317 | memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); | ||
1000 | 1318 | ||
1001 | o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); | 1319 | return o2hb_debug_init(); |
1002 | if (!o2hb_debug_dir) { | ||
1003 | mlog_errno(-ENOMEM); | ||
1004 | return -ENOMEM; | ||
1005 | } | ||
1006 | |||
1007 | o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, | ||
1008 | S_IFREG|S_IRUSR, | ||
1009 | o2hb_debug_dir, NULL, | ||
1010 | &o2hb_debug_fops); | ||
1011 | if (!o2hb_debug_livenodes) { | ||
1012 | mlog_errno(-ENOMEM); | ||
1013 | debugfs_remove(o2hb_debug_dir); | ||
1014 | return -ENOMEM; | ||
1015 | } | ||
1016 | |||
1017 | return 0; | ||
1018 | } | 1320 | } |
1019 | 1321 | ||
1020 | /* if we're already in a callback then we're already serialized by the sem */ | 1322 | /* if we're already in a callback then we're already serialized by the sem */ |
@@ -1078,6 +1380,13 @@ static void o2hb_region_release(struct config_item *item) | |||
1078 | if (reg->hr_slots) | 1380 | if (reg->hr_slots) |
1079 | kfree(reg->hr_slots); | 1381 | kfree(reg->hr_slots); |
1080 | 1382 | ||
1383 | kfree(reg->hr_db_regnum); | ||
1384 | kfree(reg->hr_db_livenodes); | ||
1385 | debugfs_remove(reg->hr_debug_livenodes); | ||
1386 | debugfs_remove(reg->hr_debug_regnum); | ||
1387 | debugfs_remove(reg->hr_debug_elapsed_time); | ||
1388 | debugfs_remove(reg->hr_debug_dir); | ||
1389 | |||
1081 | spin_lock(&o2hb_live_lock); | 1390 | spin_lock(&o2hb_live_lock); |
1082 | list_del(®->hr_all_item); | 1391 | list_del(®->hr_all_item); |
1083 | spin_unlock(&o2hb_live_lock); | 1392 | spin_unlock(&o2hb_live_lock); |
@@ -1441,6 +1750,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1441 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ | 1750 | /* Ok, we were woken. Make sure it wasn't by drop_item() */ |
1442 | spin_lock(&o2hb_live_lock); | 1751 | spin_lock(&o2hb_live_lock); |
1443 | hb_task = reg->hr_task; | 1752 | hb_task = reg->hr_task; |
1753 | if (o2hb_global_heartbeat_active()) | ||
1754 | set_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
1444 | spin_unlock(&o2hb_live_lock); | 1755 | spin_unlock(&o2hb_live_lock); |
1445 | 1756 | ||
1446 | if (hb_task) | 1757 | if (hb_task) |
@@ -1448,6 +1759,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, | |||
1448 | else | 1759 | else |
1449 | ret = -EIO; | 1760 | ret = -EIO; |
1450 | 1761 | ||
1762 | if (hb_task && o2hb_global_heartbeat_active()) | ||
1763 | printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", | ||
1764 | config_item_name(®->hr_item)); | ||
1765 | |||
1451 | out: | 1766 | out: |
1452 | if (filp) | 1767 | if (filp) |
1453 | fput(filp); | 1768 | fput(filp); |
@@ -1586,21 +1901,94 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group | |||
1586 | : NULL; | 1901 | : NULL; |
1587 | } | 1902 | } |
1588 | 1903 | ||
1904 | static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) | ||
1905 | { | ||
1906 | int ret = -ENOMEM; | ||
1907 | |||
1908 | reg->hr_debug_dir = | ||
1909 | debugfs_create_dir(config_item_name(®->hr_item), dir); | ||
1910 | if (!reg->hr_debug_dir) { | ||
1911 | mlog_errno(ret); | ||
1912 | goto bail; | ||
1913 | } | ||
1914 | |||
1915 | reg->hr_debug_livenodes = | ||
1916 | o2hb_debug_create(O2HB_DEBUG_LIVENODES, | ||
1917 | reg->hr_debug_dir, | ||
1918 | &(reg->hr_db_livenodes), | ||
1919 | sizeof(*(reg->hr_db_livenodes)), | ||
1920 | O2HB_DB_TYPE_REGION_LIVENODES, | ||
1921 | sizeof(reg->hr_live_node_bitmap), | ||
1922 | O2NM_MAX_NODES, reg); | ||
1923 | if (!reg->hr_debug_livenodes) { | ||
1924 | mlog_errno(ret); | ||
1925 | goto bail; | ||
1926 | } | ||
1927 | |||
1928 | reg->hr_debug_regnum = | ||
1929 | o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, | ||
1930 | reg->hr_debug_dir, | ||
1931 | &(reg->hr_db_regnum), | ||
1932 | sizeof(*(reg->hr_db_regnum)), | ||
1933 | O2HB_DB_TYPE_REGION_NUMBER, | ||
1934 | 0, O2NM_MAX_NODES, reg); | ||
1935 | if (!reg->hr_debug_regnum) { | ||
1936 | mlog_errno(ret); | ||
1937 | goto bail; | ||
1938 | } | ||
1939 | |||
1940 | reg->hr_debug_elapsed_time = | ||
1941 | o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, | ||
1942 | reg->hr_debug_dir, | ||
1943 | &(reg->hr_db_elapsed_time), | ||
1944 | sizeof(*(reg->hr_db_elapsed_time)), | ||
1945 | O2HB_DB_TYPE_REGION_ELAPSED_TIME, | ||
1946 | 0, 0, reg); | ||
1947 | if (!reg->hr_debug_elapsed_time) { | ||
1948 | mlog_errno(ret); | ||
1949 | goto bail; | ||
1950 | } | ||
1951 | |||
1952 | ret = 0; | ||
1953 | bail: | ||
1954 | return ret; | ||
1955 | } | ||
1956 | |||
1589 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, | 1957 | static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, |
1590 | const char *name) | 1958 | const char *name) |
1591 | { | 1959 | { |
1592 | struct o2hb_region *reg = NULL; | 1960 | struct o2hb_region *reg = NULL; |
1961 | int ret; | ||
1593 | 1962 | ||
1594 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); | 1963 | reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); |
1595 | if (reg == NULL) | 1964 | if (reg == NULL) |
1596 | return ERR_PTR(-ENOMEM); | 1965 | return ERR_PTR(-ENOMEM); |
1597 | 1966 | ||
1598 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | 1967 | if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) |
1968 | return ERR_PTR(-ENAMETOOLONG); | ||
1599 | 1969 | ||
1600 | spin_lock(&o2hb_live_lock); | 1970 | spin_lock(&o2hb_live_lock); |
1971 | reg->hr_region_num = 0; | ||
1972 | if (o2hb_global_heartbeat_active()) { | ||
1973 | reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, | ||
1974 | O2NM_MAX_REGIONS); | ||
1975 | if (reg->hr_region_num >= O2NM_MAX_REGIONS) { | ||
1976 | spin_unlock(&o2hb_live_lock); | ||
1977 | return ERR_PTR(-EFBIG); | ||
1978 | } | ||
1979 | set_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
1980 | } | ||
1601 | list_add_tail(®->hr_all_item, &o2hb_all_regions); | 1981 | list_add_tail(®->hr_all_item, &o2hb_all_regions); |
1602 | spin_unlock(&o2hb_live_lock); | 1982 | spin_unlock(&o2hb_live_lock); |
1603 | 1983 | ||
1984 | config_item_init_type_name(®->hr_item, name, &o2hb_region_type); | ||
1985 | |||
1986 | ret = o2hb_debug_region_init(reg, o2hb_debug_dir); | ||
1987 | if (ret) { | ||
1988 | config_item_put(®->hr_item); | ||
1989 | return ERR_PTR(ret); | ||
1990 | } | ||
1991 | |||
1604 | return ®->hr_item; | 1992 | return ®->hr_item; |
1605 | } | 1993 | } |
1606 | 1994 | ||
@@ -1612,6 +2000,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1612 | 2000 | ||
1613 | /* stop the thread when the user removes the region dir */ | 2001 | /* stop the thread when the user removes the region dir */ |
1614 | spin_lock(&o2hb_live_lock); | 2002 | spin_lock(&o2hb_live_lock); |
2003 | if (o2hb_global_heartbeat_active()) { | ||
2004 | clear_bit(reg->hr_region_num, o2hb_region_bitmap); | ||
2005 | clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); | ||
2006 | } | ||
1615 | hb_task = reg->hr_task; | 2007 | hb_task = reg->hr_task; |
1616 | reg->hr_task = NULL; | 2008 | reg->hr_task = NULL; |
1617 | spin_unlock(&o2hb_live_lock); | 2009 | spin_unlock(&o2hb_live_lock); |
@@ -1628,6 +2020,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, | |||
1628 | wake_up(&o2hb_steady_queue); | 2020 | wake_up(&o2hb_steady_queue); |
1629 | } | 2021 | } |
1630 | 2022 | ||
2023 | if (o2hb_global_heartbeat_active()) | ||
2024 | printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", | ||
2025 | config_item_name(®->hr_item)); | ||
1631 | config_item_put(item); | 2026 | config_item_put(item); |
1632 | } | 2027 | } |
1633 | 2028 | ||
@@ -1688,6 +2083,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group | |||
1688 | return count; | 2083 | return count; |
1689 | } | 2084 | } |
1690 | 2085 | ||
2086 | static | ||
2087 | ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, | ||
2088 | char *page) | ||
2089 | { | ||
2090 | return sprintf(page, "%s\n", | ||
2091 | o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); | ||
2092 | } | ||
2093 | |||
2094 | static | ||
2095 | ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, | ||
2096 | const char *page, size_t count) | ||
2097 | { | ||
2098 | unsigned int i; | ||
2099 | int ret; | ||
2100 | size_t len; | ||
2101 | |||
2102 | len = (page[count - 1] == '\n') ? count - 1 : count; | ||
2103 | if (!len) | ||
2104 | return -EINVAL; | ||
2105 | |||
2106 | for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { | ||
2107 | if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) | ||
2108 | continue; | ||
2109 | |||
2110 | ret = o2hb_global_hearbeat_mode_set(i); | ||
2111 | if (!ret) | ||
2112 | printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", | ||
2113 | o2hb_heartbeat_mode_desc[i]); | ||
2114 | return count; | ||
2115 | } | ||
2116 | |||
2117 | return -EINVAL; | ||
2118 | |||
2119 | } | ||
2120 | |||
1691 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { | 2121 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { |
1692 | .attr = { .ca_owner = THIS_MODULE, | 2122 | .attr = { .ca_owner = THIS_MODULE, |
1693 | .ca_name = "dead_threshold", | 2123 | .ca_name = "dead_threshold", |
@@ -1696,8 +2126,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold | |||
1696 | .store = o2hb_heartbeat_group_threshold_store, | 2126 | .store = o2hb_heartbeat_group_threshold_store, |
1697 | }; | 2127 | }; |
1698 | 2128 | ||
2129 | static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { | ||
2130 | .attr = { .ca_owner = THIS_MODULE, | ||
2131 | .ca_name = "mode", | ||
2132 | .ca_mode = S_IRUGO | S_IWUSR }, | ||
2133 | .show = o2hb_heartbeat_group_mode_show, | ||
2134 | .store = o2hb_heartbeat_group_mode_store, | ||
2135 | }; | ||
2136 | |||
1699 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { | 2137 | static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { |
1700 | &o2hb_heartbeat_group_attr_threshold.attr, | 2138 | &o2hb_heartbeat_group_attr_threshold.attr, |
2139 | &o2hb_heartbeat_group_attr_mode.attr, | ||
1701 | NULL, | 2140 | NULL, |
1702 | }; | 2141 | }; |
1703 | 2142 | ||
@@ -1963,3 +2402,34 @@ void o2hb_stop_all_regions(void) | |||
1963 | spin_unlock(&o2hb_live_lock); | 2402 | spin_unlock(&o2hb_live_lock); |
1964 | } | 2403 | } |
1965 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); | 2404 | EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); |
2405 | |||
2406 | int o2hb_get_all_regions(char *region_uuids, u8 max_regions) | ||
2407 | { | ||
2408 | struct o2hb_region *reg; | ||
2409 | int numregs = 0; | ||
2410 | char *p; | ||
2411 | |||
2412 | spin_lock(&o2hb_live_lock); | ||
2413 | |||
2414 | p = region_uuids; | ||
2415 | list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { | ||
2416 | mlog(0, "Region: %s\n", config_item_name(®->hr_item)); | ||
2417 | if (numregs < max_regions) { | ||
2418 | memcpy(p, config_item_name(®->hr_item), | ||
2419 | O2HB_MAX_REGION_NAME_LEN); | ||
2420 | p += O2HB_MAX_REGION_NAME_LEN; | ||
2421 | } | ||
2422 | numregs++; | ||
2423 | } | ||
2424 | |||
2425 | spin_unlock(&o2hb_live_lock); | ||
2426 | |||
2427 | return numregs; | ||
2428 | } | ||
2429 | EXPORT_SYMBOL_GPL(o2hb_get_all_regions); | ||
2430 | |||
2431 | int o2hb_global_heartbeat_active(void) | ||
2432 | { | ||
2433 | return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); | ||
2434 | } | ||
2435 | EXPORT_SYMBOL(o2hb_global_heartbeat_active); | ||
diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f1649253b49..00ad8e8fea51 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h | |||
@@ -31,6 +31,8 @@ | |||
31 | 31 | ||
32 | #define O2HB_REGION_TIMEOUT_MS 2000 | 32 | #define O2HB_REGION_TIMEOUT_MS 2000 |
33 | 33 | ||
34 | #define O2HB_MAX_REGION_NAME_LEN 32 | ||
35 | |||
34 | /* number of changes to be seen as live */ | 36 | /* number of changes to be seen as live */ |
35 | #define O2HB_LIVE_THRESHOLD 2 | 37 | #define O2HB_LIVE_THRESHOLD 2 |
36 | /* number of equal samples to be seen as dead */ | 38 | /* number of equal samples to be seen as dead */ |
@@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); | |||
81 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); | 83 | int o2hb_check_node_heartbeating_from_callback(u8 node_num); |
82 | int o2hb_check_local_node_heartbeating(void); | 84 | int o2hb_check_local_node_heartbeating(void); |
83 | void o2hb_stop_all_regions(void); | 85 | void o2hb_stop_all_regions(void); |
86 | int o2hb_get_all_regions(char *region_uuids, u8 numregions); | ||
87 | int o2hb_global_heartbeat_active(void); | ||
84 | 88 | ||
85 | #endif /* O2CLUSTER_HEARTBEAT_H */ | 89 | #endif /* O2CLUSTER_HEARTBEAT_H */ |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index fd96e2a2fa56..ea2ed9f56c94 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
@@ -119,7 +119,8 @@ | |||
119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 119 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 120 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ | 121 | #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ |
122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ | 122 | #define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ |
123 | #define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ | ||
123 | 124 | ||
124 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) | 125 | #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) |
125 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) | 126 | #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) |
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index ed0c9f367fed..bb240647ca5f 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c | |||
@@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, | |||
711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); | 711 | config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); |
712 | spin_lock_init(&node->nd_lock); | 712 | spin_lock_init(&node->nd_lock); |
713 | 713 | ||
714 | mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); | ||
715 | |||
714 | return &node->nd_item; | 716 | return &node->nd_item; |
715 | } | 717 | } |
716 | 718 | ||
@@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, | |||
744 | } | 746 | } |
745 | write_unlock(&cluster->cl_nodes_lock); | 747 | write_unlock(&cluster->cl_nodes_lock); |
746 | 748 | ||
749 | mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", | ||
750 | config_item_name(&node->nd_item)); | ||
751 | |||
747 | config_item_put(item); | 752 | config_item_put(item); |
748 | } | 753 | } |
749 | 754 | ||
diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854bad571..49b594325bec 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h | |||
@@ -36,4 +36,10 @@ | |||
36 | /* host name, group name, cluster name all 64 bytes */ | 36 | /* host name, group name, cluster name all 64 bytes */ |
37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN | 37 | #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN |
38 | 38 | ||
39 | /* | ||
40 | * Maximum number of global heartbeat regions allowed. | ||
41 | * **CAUTION** Changing this number will break dlm compatibility. | ||
42 | */ | ||
43 | #define O2NM_MAX_REGIONS 32 | ||
44 | |||
39 | #endif /* _OCFS2_NODEMANAGER_H */ | 45 | #endif /* _OCFS2_NODEMANAGER_H */ |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997cf205..9aa426e42123 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, | |||
977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, | 977 | int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, |
978 | size_t caller_veclen, u8 target_node, int *status) | 978 | size_t caller_veclen, u8 target_node, int *status) |
979 | { | 979 | { |
980 | int ret; | 980 | int ret = 0; |
981 | struct o2net_msg *msg = NULL; | 981 | struct o2net_msg *msg = NULL; |
982 | size_t veclen, caller_bytes = 0; | 982 | size_t veclen, caller_bytes = 0; |
983 | struct kvec *vec = NULL; | 983 | struct kvec *vec = NULL; |
@@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, | |||
1696 | { | 1696 | { |
1697 | o2quo_hb_down(node_num); | 1697 | o2quo_hb_down(node_num); |
1698 | 1698 | ||
1699 | if (!node) | ||
1700 | return; | ||
1701 | |||
1699 | if (node_num != o2nm_this_node()) | 1702 | if (node_num != o2nm_this_node()) |
1700 | o2net_disconnect_node(node); | 1703 | o2net_disconnect_node(node); |
1701 | 1704 | ||
@@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, | |||
1709 | 1712 | ||
1710 | o2quo_hb_up(node_num); | 1713 | o2quo_hb_up(node_num); |
1711 | 1714 | ||
1715 | BUG_ON(!node); | ||
1716 | |||
1712 | /* ensure an immediate connect attempt */ | 1717 | /* ensure an immediate connect attempt */ |
1713 | nn->nn_last_connect_attempt = jiffies - | 1718 | nn->nn_last_connect_attempt = jiffies - |
1714 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); | 1719 | (msecs_to_jiffies(o2net_reconnect_delay()) + 1); |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 96fa7ebc530c..15fdbdf9eb4b 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -129,7 +129,7 @@ struct o2net_node { | |||
129 | 129 | ||
130 | struct o2net_sock_container { | 130 | struct o2net_sock_container { |
131 | struct kref sc_kref; | 131 | struct kref sc_kref; |
132 | /* the next two are vaild for the life time of the sc */ | 132 | /* the next two are valid for the life time of the sc */ |
133 | struct socket *sc_sock; | 133 | struct socket *sc_sock; |
134 | struct o2nm_node *sc_node; | 134 | struct o2nm_node *sc_node; |
135 | 135 | ||
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index b4957c7d9fe2..edaded48e7e9 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -40,6 +40,14 @@ | |||
40 | #include "inode.h" | 40 | #include "inode.h" |
41 | #include "super.h" | 41 | #include "super.h" |
42 | 42 | ||
43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | ||
44 | { | ||
45 | unsigned long gen = | ||
46 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; | ||
47 | BUG_ON(dentry->d_inode); | ||
48 | dentry->d_fsdata = (void *)gen; | ||
49 | } | ||
50 | |||
43 | 51 | ||
44 | static int ocfs2_dentry_revalidate(struct dentry *dentry, | 52 | static int ocfs2_dentry_revalidate(struct dentry *dentry, |
45 | struct nameidata *nd) | 53 | struct nameidata *nd) |
@@ -51,11 +59,20 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
51 | mlog_entry("(0x%p, '%.*s')\n", dentry, | 59 | mlog_entry("(0x%p, '%.*s')\n", dentry, |
52 | dentry->d_name.len, dentry->d_name.name); | 60 | dentry->d_name.len, dentry->d_name.name); |
53 | 61 | ||
54 | /* Never trust a negative dentry - force a new lookup. */ | 62 | /* For a negative dentry - |
63 | * check the generation number of the parent and compare with the | ||
64 | * one stored in the inode. | ||
65 | */ | ||
55 | if (inode == NULL) { | 66 | if (inode == NULL) { |
56 | mlog(0, "negative dentry: %.*s\n", dentry->d_name.len, | 67 | unsigned long gen = (unsigned long) dentry->d_fsdata; |
57 | dentry->d_name.name); | 68 | unsigned long pgen = |
58 | goto bail; | 69 | OCFS2_I(dentry->d_parent->d_inode)->ip_dir_lock_gen; |
70 | mlog(0, "negative dentry: %.*s parent gen: %lu " | ||
71 | "dentry gen: %lu\n", | ||
72 | dentry->d_name.len, dentry->d_name.name, pgen, gen); | ||
73 | if (gen != pgen) | ||
74 | goto bail; | ||
75 | goto valid; | ||
59 | } | 76 | } |
60 | 77 | ||
61 | BUG_ON(!osb); | 78 | BUG_ON(!osb); |
@@ -96,6 +113,7 @@ static int ocfs2_dentry_revalidate(struct dentry *dentry, | |||
96 | goto bail; | 113 | goto bail; |
97 | } | 114 | } |
98 | 115 | ||
116 | valid: | ||
99 | ret = 1; | 117 | ret = 1; |
100 | 118 | ||
101 | bail: | 119 | bail: |
@@ -227,6 +245,12 @@ int ocfs2_dentry_attach_lock(struct dentry *dentry, | |||
227 | if (!inode) | 245 | if (!inode) |
228 | return 0; | 246 | return 0; |
229 | 247 | ||
248 | if (!dentry->d_inode && dentry->d_fsdata) { | ||
249 | /* Converting a negative dentry to positive | ||
250 | Clear dentry->d_fsdata */ | ||
251 | dentry->d_fsdata = dl = NULL; | ||
252 | } | ||
253 | |||
230 | if (dl) { | 254 | if (dl) { |
231 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, | 255 | mlog_bug_on_msg(dl->dl_parent_blkno != parent_blkno, |
232 | " \"%.*s\": old parent: %llu, new: %llu\n", | 256 | " \"%.*s\": old parent: %llu, new: %llu\n", |
@@ -452,6 +476,7 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode) | |||
452 | 476 | ||
453 | out: | 477 | out: |
454 | iput(inode); | 478 | iput(inode); |
479 | ocfs2_dentry_attach_gen(dentry); | ||
455 | } | 480 | } |
456 | 481 | ||
457 | /* | 482 | /* |
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index f5dd1789acf1..b79eff709958 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -64,5 +64,6 @@ void ocfs2_dentry_move(struct dentry *dentry, struct dentry *target, | |||
64 | struct inode *old_dir, struct inode *new_dir); | 64 | struct inode *old_dir, struct inode *new_dir); |
65 | 65 | ||
66 | extern spinlock_t dentry_attach_lock; | 66 | extern spinlock_t dentry_attach_lock; |
67 | void ocfs2_dentry_attach_gen(struct dentry *dentry); | ||
67 | 68 | ||
68 | #endif /* OCFS2_DCACHE_H */ | 69 | #endif /* OCFS2_DCACHE_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3931 | goto out_commit; | 3931 | goto out_commit; |
3932 | } | 3932 | } |
3933 | 3933 | ||
3934 | cpos = split_hash; | ||
3935 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | ||
3936 | data_ac, meta_ac, new_dx_leaves, | ||
3937 | num_dx_leaves); | ||
3938 | if (ret) { | ||
3939 | mlog_errno(ret); | ||
3940 | goto out_commit; | ||
3941 | } | ||
3942 | |||
3934 | for (i = 0; i < num_dx_leaves; i++) { | 3943 | for (i = 0; i < num_dx_leaves; i++) { |
3935 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), | 3944 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3936 | orig_dx_leaves[i], | 3945 | orig_dx_leaves[i], |
@@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, | |||
3939 | mlog_errno(ret); | 3948 | mlog_errno(ret); |
3940 | goto out_commit; | 3949 | goto out_commit; |
3941 | } | 3950 | } |
3942 | } | ||
3943 | 3951 | ||
3944 | cpos = split_hash; | 3952 | ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), |
3945 | ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, | 3953 | new_dx_leaves[i], |
3946 | data_ac, meta_ac, new_dx_leaves, | 3954 | OCFS2_JOURNAL_ACCESS_WRITE); |
3947 | num_dx_leaves); | 3955 | if (ret) { |
3948 | if (ret) { | 3956 | mlog_errno(ret); |
3949 | mlog_errno(ret); | 3957 | goto out_commit; |
3950 | goto out_commit; | 3958 | } |
3951 | } | 3959 | } |
3952 | 3960 | ||
3953 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, | 3961 | ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, |
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..b36d0bf77a5a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h | |||
@@ -445,7 +445,9 @@ enum { | |||
445 | DLM_LOCK_REQUEST_MSG, /* 515 */ | 445 | DLM_LOCK_REQUEST_MSG, /* 515 */ |
446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ | 446 | DLM_RECO_DATA_DONE_MSG, /* 516 */ |
447 | DLM_BEGIN_RECO_MSG, /* 517 */ | 447 | DLM_BEGIN_RECO_MSG, /* 517 */ |
448 | DLM_FINALIZE_RECO_MSG /* 518 */ | 448 | DLM_FINALIZE_RECO_MSG, /* 518 */ |
449 | DLM_QUERY_REGION, /* 519 */ | ||
450 | DLM_QUERY_NODEINFO, /* 520 */ | ||
449 | }; | 451 | }; |
450 | 452 | ||
451 | struct dlm_reco_node_data | 453 | struct dlm_reco_node_data |
@@ -727,6 +729,31 @@ struct dlm_cancel_join | |||
727 | u8 domain[O2NM_MAX_NAME_LEN]; | 729 | u8 domain[O2NM_MAX_NAME_LEN]; |
728 | }; | 730 | }; |
729 | 731 | ||
732 | struct dlm_query_region { | ||
733 | u8 qr_node; | ||
734 | u8 qr_numregions; | ||
735 | u8 qr_namelen; | ||
736 | u8 pad1; | ||
737 | u8 qr_domain[O2NM_MAX_NAME_LEN]; | ||
738 | u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; | ||
739 | }; | ||
740 | |||
741 | struct dlm_node_info { | ||
742 | u8 ni_nodenum; | ||
743 | u8 pad1; | ||
744 | u16 ni_ipv4_port; | ||
745 | u32 ni_ipv4_address; | ||
746 | }; | ||
747 | |||
748 | struct dlm_query_nodeinfo { | ||
749 | u8 qn_nodenum; | ||
750 | u8 qn_numnodes; | ||
751 | u8 qn_namelen; | ||
752 | u8 pad1; | ||
753 | u8 qn_domain[O2NM_MAX_NAME_LEN]; | ||
754 | struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; | ||
755 | }; | ||
756 | |||
730 | struct dlm_exit_domain | 757 | struct dlm_exit_domain |
731 | { | 758 | { |
732 | u8 node_idx; | 759 | u8 node_idx; |
@@ -1030,6 +1057,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, | |||
1030 | struct dlm_lock_resource *res); | 1057 | struct dlm_lock_resource *res); |
1031 | void dlm_clean_master_list(struct dlm_ctxt *dlm, | 1058 | void dlm_clean_master_list(struct dlm_ctxt *dlm, |
1032 | u8 dead_node); | 1059 | u8 dead_node); |
1060 | void dlm_force_free_mles(struct dlm_ctxt *dlm); | ||
1033 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); | 1061 | int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); |
1034 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); | 1062 | int __dlm_lockres_has_locks(struct dlm_lock_resource *res); |
1035 | int __dlm_lockres_unused(struct dlm_lock_resource *res); | 1063 | int __dlm_lockres_unused(struct dlm_lock_resource *res); |
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..272ec8631a51 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c | |||
@@ -493,7 +493,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
493 | struct hlist_head *bucket; | 493 | struct hlist_head *bucket; |
494 | struct hlist_node *list; | 494 | struct hlist_node *list; |
495 | int i, out = 0; | 495 | int i, out = 0; |
496 | unsigned long total = 0, longest = 0, bktcnt; | 496 | unsigned long total = 0, longest = 0, bucket_count = 0; |
497 | 497 | ||
498 | out += snprintf(db->buf + out, db->len - out, | 498 | out += snprintf(db->buf + out, db->len - out, |
499 | "Dumping MLEs for Domain: %s\n", dlm->name); | 499 | "Dumping MLEs for Domain: %s\n", dlm->name); |
@@ -505,13 +505,13 @@ static int debug_mle_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
505 | mle = hlist_entry(list, struct dlm_master_list_entry, | 505 | mle = hlist_entry(list, struct dlm_master_list_entry, |
506 | master_hash_node); | 506 | master_hash_node); |
507 | ++total; | 507 | ++total; |
508 | ++bktcnt; | 508 | ++bucket_count; |
509 | if (db->len - out < 200) | 509 | if (db->len - out < 200) |
510 | continue; | 510 | continue; |
511 | out += dump_mle(mle, db->buf + out, db->len - out); | 511 | out += dump_mle(mle, db->buf + out, db->len - out); |
512 | } | 512 | } |
513 | longest = max(longest, bktcnt); | 513 | longest = max(longest, bucket_count); |
514 | bktcnt = 0; | 514 | bucket_count = 0; |
515 | } | 515 | } |
516 | spin_unlock(&dlm->master_lock); | 516 | spin_unlock(&dlm->master_lock); |
517 | 517 | ||
@@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
636 | spin_lock(&dlm->track_lock); | 636 | spin_lock(&dlm->track_lock); |
637 | if (oldres) | 637 | if (oldres) |
638 | track_list = &oldres->tracking; | 638 | track_list = &oldres->tracking; |
639 | else | 639 | else { |
640 | track_list = &dlm->tracking_list; | 640 | track_list = &dlm->tracking_list; |
641 | if (list_empty(track_list)) { | ||
642 | dl = NULL; | ||
643 | spin_unlock(&dlm->track_lock); | ||
644 | goto bail; | ||
645 | } | ||
646 | } | ||
641 | 647 | ||
642 | list_for_each_entry(res, track_list, tracking) { | 648 | list_for_each_entry(res, track_list, tracking) { |
643 | if (&res->tracking == &dlm->tracking_list) | 649 | if (&res->tracking == &dlm->tracking_list) |
@@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) | |||
660 | } else | 666 | } else |
661 | dl = NULL; | 667 | dl = NULL; |
662 | 668 | ||
669 | bail: | ||
663 | /* passed to seq_show */ | 670 | /* passed to seq_show */ |
664 | return dl; | 671 | return dl; |
665 | } | 672 | } |
@@ -775,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) | |||
775 | 782 | ||
776 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ | 783 | /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ |
777 | out += snprintf(db->buf + out, db->len - out, | 784 | out += snprintf(db->buf + out, db->len - out, |
778 | "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); | 785 | "Domain: %s Key: 0x%08x Protocol: %d.%d\n", |
786 | dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, | ||
787 | dlm->dlm_locking_proto.pv_minor); | ||
779 | 788 | ||
780 | /* Thread Pid: xxx Node: xxx State: xxxxx */ | 789 | /* Thread Pid: xxx Node: xxx State: xxxxx */ |
781 | out += snprintf(db->buf + out, db->len - out, | 790 | out += snprintf(db->buf + out, db->len - out, |
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -128,10 +128,14 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); | |||
128 | * will have a negotiated version with the same major number and a minor | 128 | * will have a negotiated version with the same major number and a minor |
129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should | 129 | * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should |
130 | * be used to determine what a running domain is actually using. | 130 | * be used to determine what a running domain is actually using. |
131 | * | ||
132 | * New in version 1.1: | ||
133 | * - Message DLM_QUERY_REGION added to support global heartbeat | ||
134 | * - Message DLM_QUERY_NODEINFO added to allow online node removes | ||
131 | */ | 135 | */ |
132 | static const struct dlm_protocol_version dlm_protocol = { | 136 | static const struct dlm_protocol_version dlm_protocol = { |
133 | .pv_major = 1, | 137 | .pv_major = 1, |
134 | .pv_minor = 0, | 138 | .pv_minor = 1, |
135 | }; | 139 | }; |
136 | 140 | ||
137 | #define DLM_DOMAIN_BACKOFF_MS 200 | 141 | #define DLM_DOMAIN_BACKOFF_MS 200 |
@@ -142,6 +146,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
142 | void **ret_data); | 146 | void **ret_data); |
143 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 147 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
144 | void **ret_data); | 148 | void **ret_data); |
149 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
150 | void *data, void **ret_data); | ||
145 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, | 151 | static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, |
146 | void **ret_data); | 152 | void **ret_data); |
147 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, | 153 | static int dlm_protocol_compare(struct dlm_protocol_version *existing, |
@@ -693,6 +699,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) | |||
693 | 699 | ||
694 | dlm_mark_domain_leaving(dlm); | 700 | dlm_mark_domain_leaving(dlm); |
695 | dlm_leave_domain(dlm); | 701 | dlm_leave_domain(dlm); |
702 | dlm_force_free_mles(dlm); | ||
696 | dlm_complete_dlm_shutdown(dlm); | 703 | dlm_complete_dlm_shutdown(dlm); |
697 | } | 704 | } |
698 | dlm_put(dlm); | 705 | dlm_put(dlm); |
@@ -920,6 +927,370 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, | |||
920 | return 0; | 927 | return 0; |
921 | } | 928 | } |
922 | 929 | ||
930 | static int dlm_match_regions(struct dlm_ctxt *dlm, | ||
931 | struct dlm_query_region *qr) | ||
932 | { | ||
933 | char *local = NULL, *remote = qr->qr_regions; | ||
934 | char *l, *r; | ||
935 | int localnr, i, j, foundit; | ||
936 | int status = 0; | ||
937 | |||
938 | if (!o2hb_global_heartbeat_active()) { | ||
939 | if (qr->qr_numregions) { | ||
940 | mlog(ML_ERROR, "Domain %s: Joining node %d has global " | ||
941 | "heartbeat enabled but local node %d does not\n", | ||
942 | qr->qr_domain, qr->qr_node, dlm->node_num); | ||
943 | status = -EINVAL; | ||
944 | } | ||
945 | goto bail; | ||
946 | } | ||
947 | |||
948 | if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { | ||
949 | mlog(ML_ERROR, "Domain %s: Local node %d has global " | ||
950 | "heartbeat enabled but joining node %d does not\n", | ||
951 | qr->qr_domain, dlm->node_num, qr->qr_node); | ||
952 | status = -EINVAL; | ||
953 | goto bail; | ||
954 | } | ||
955 | |||
956 | r = remote; | ||
957 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
958 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); | ||
959 | r += O2HB_MAX_REGION_NAME_LEN; | ||
960 | } | ||
961 | |||
962 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | ||
963 | if (!local) { | ||
964 | status = -ENOMEM; | ||
965 | goto bail; | ||
966 | } | ||
967 | |||
968 | localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); | ||
969 | |||
970 | /* compare local regions with remote */ | ||
971 | l = local; | ||
972 | for (i = 0; i < localnr; ++i) { | ||
973 | foundit = 0; | ||
974 | r = remote; | ||
975 | for (j = 0; j <= qr->qr_numregions; ++j) { | ||
976 | if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { | ||
977 | foundit = 1; | ||
978 | break; | ||
979 | } | ||
980 | r += O2HB_MAX_REGION_NAME_LEN; | ||
981 | } | ||
982 | if (!foundit) { | ||
983 | status = -EINVAL; | ||
984 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
985 | "in local node %d but not in joining node %d\n", | ||
986 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, | ||
987 | dlm->node_num, qr->qr_node); | ||
988 | goto bail; | ||
989 | } | ||
990 | l += O2HB_MAX_REGION_NAME_LEN; | ||
991 | } | ||
992 | |||
993 | /* compare remote with local regions */ | ||
994 | r = remote; | ||
995 | for (i = 0; i < qr->qr_numregions; ++i) { | ||
996 | foundit = 0; | ||
997 | l = local; | ||
998 | for (j = 0; j < localnr; ++j) { | ||
999 | if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { | ||
1000 | foundit = 1; | ||
1001 | break; | ||
1002 | } | ||
1003 | l += O2HB_MAX_REGION_NAME_LEN; | ||
1004 | } | ||
1005 | if (!foundit) { | ||
1006 | status = -EINVAL; | ||
1007 | mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " | ||
1008 | "in joining node %d but not in local node %d\n", | ||
1009 | qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, | ||
1010 | qr->qr_node, dlm->node_num); | ||
1011 | goto bail; | ||
1012 | } | ||
1013 | r += O2HB_MAX_REGION_NAME_LEN; | ||
1014 | } | ||
1015 | |||
1016 | bail: | ||
1017 | kfree(local); | ||
1018 | |||
1019 | return status; | ||
1020 | } | ||
1021 | |||
1022 | static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1023 | { | ||
1024 | struct dlm_query_region *qr = NULL; | ||
1025 | int status, ret = 0, i; | ||
1026 | char *p; | ||
1027 | |||
1028 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1029 | goto bail; | ||
1030 | |||
1031 | qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); | ||
1032 | if (!qr) { | ||
1033 | ret = -ENOMEM; | ||
1034 | mlog_errno(ret); | ||
1035 | goto bail; | ||
1036 | } | ||
1037 | |||
1038 | qr->qr_node = dlm->node_num; | ||
1039 | qr->qr_namelen = strlen(dlm->name); | ||
1040 | memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); | ||
1041 | /* if local hb, the numregions will be zero */ | ||
1042 | if (o2hb_global_heartbeat_active()) | ||
1043 | qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, | ||
1044 | O2NM_MAX_REGIONS); | ||
1045 | |||
1046 | p = qr->qr_regions; | ||
1047 | for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) | ||
1048 | mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); | ||
1049 | |||
1050 | i = -1; | ||
1051 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1052 | i + 1)) < O2NM_MAX_NODES) { | ||
1053 | if (i == dlm->node_num) | ||
1054 | continue; | ||
1055 | |||
1056 | mlog(0, "Sending regions to node %d\n", i); | ||
1057 | |||
1058 | ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, | ||
1059 | sizeof(struct dlm_query_region), | ||
1060 | i, &status); | ||
1061 | if (ret >= 0) | ||
1062 | ret = status; | ||
1063 | if (ret) { | ||
1064 | mlog(ML_ERROR, "Region mismatch %d, node %d\n", | ||
1065 | ret, i); | ||
1066 | break; | ||
1067 | } | ||
1068 | } | ||
1069 | |||
1070 | bail: | ||
1071 | kfree(qr); | ||
1072 | return ret; | ||
1073 | } | ||
1074 | |||
1075 | static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | ||
1076 | void *data, void **ret_data) | ||
1077 | { | ||
1078 | struct dlm_query_region *qr; | ||
1079 | struct dlm_ctxt *dlm = NULL; | ||
1080 | int status = 0; | ||
1081 | int locked = 0; | ||
1082 | |||
1083 | qr = (struct dlm_query_region *) msg->buf; | ||
1084 | |||
1085 | mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, | ||
1086 | qr->qr_domain); | ||
1087 | |||
1088 | status = -EINVAL; | ||
1089 | |||
1090 | spin_lock(&dlm_domain_lock); | ||
1091 | dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); | ||
1092 | if (!dlm) { | ||
1093 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1094 | "before join domain\n", qr->qr_node, qr->qr_domain); | ||
1095 | goto bail; | ||
1096 | } | ||
1097 | |||
1098 | spin_lock(&dlm->spinlock); | ||
1099 | locked = 1; | ||
1100 | if (dlm->joining_node != qr->qr_node) { | ||
1101 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1102 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | ||
1103 | dlm->joining_node); | ||
1104 | goto bail; | ||
1105 | } | ||
1106 | |||
1107 | /* Support for global heartbeat was added in 1.1 */ | ||
1108 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1109 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1110 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | ||
1111 | "but active dlm protocol is %d.%d\n", qr->qr_node, | ||
1112 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | ||
1113 | dlm->dlm_locking_proto.pv_minor); | ||
1114 | goto bail; | ||
1115 | } | ||
1116 | |||
1117 | status = dlm_match_regions(dlm, qr); | ||
1118 | |||
1119 | bail: | ||
1120 | if (locked) | ||
1121 | spin_unlock(&dlm->spinlock); | ||
1122 | spin_unlock(&dlm_domain_lock); | ||
1123 | |||
1124 | return status; | ||
1125 | } | ||
1126 | |||
1127 | static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) | ||
1128 | { | ||
1129 | struct o2nm_node *local; | ||
1130 | struct dlm_node_info *remote; | ||
1131 | int i, j; | ||
1132 | int status = 0; | ||
1133 | |||
1134 | for (j = 0; j < qn->qn_numnodes; ++j) | ||
1135 | mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, | ||
1136 | &(qn->qn_nodes[j].ni_ipv4_address), | ||
1137 | ntohs(qn->qn_nodes[j].ni_ipv4_port)); | ||
1138 | |||
1139 | for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { | ||
1140 | local = o2nm_get_node_by_num(i); | ||
1141 | remote = NULL; | ||
1142 | for (j = 0; j < qn->qn_numnodes; ++j) { | ||
1143 | if (qn->qn_nodes[j].ni_nodenum == i) { | ||
1144 | remote = &(qn->qn_nodes[j]); | ||
1145 | break; | ||
1146 | } | ||
1147 | } | ||
1148 | |||
1149 | if (!local && !remote) | ||
1150 | continue; | ||
1151 | |||
1152 | if ((local && !remote) || (!local && remote)) | ||
1153 | status = -EINVAL; | ||
1154 | |||
1155 | if (!status && | ||
1156 | ((remote->ni_nodenum != local->nd_num) || | ||
1157 | (remote->ni_ipv4_port != local->nd_ipv4_port) || | ||
1158 | (remote->ni_ipv4_address != local->nd_ipv4_address))) | ||
1159 | status = -EINVAL; | ||
1160 | |||
1161 | if (status) { | ||
1162 | if (remote && !local) | ||
1163 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1164 | "registered in joining node %d but not in " | ||
1165 | "local node %d\n", qn->qn_domain, | ||
1166 | remote->ni_nodenum, | ||
1167 | &(remote->ni_ipv4_address), | ||
1168 | ntohs(remote->ni_ipv4_port), | ||
1169 | qn->qn_nodenum, dlm->node_num); | ||
1170 | if (local && !remote) | ||
1171 | mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " | ||
1172 | "registered in local node %d but not in " | ||
1173 | "joining node %d\n", qn->qn_domain, | ||
1174 | local->nd_num, &(local->nd_ipv4_address), | ||
1175 | ntohs(local->nd_ipv4_port), | ||
1176 | dlm->node_num, qn->qn_nodenum); | ||
1177 | BUG_ON((!local && !remote)); | ||
1178 | } | ||
1179 | |||
1180 | if (local) | ||
1181 | o2nm_node_put(local); | ||
1182 | } | ||
1183 | |||
1184 | return status; | ||
1185 | } | ||
1186 | |||
1187 | static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) | ||
1188 | { | ||
1189 | struct dlm_query_nodeinfo *qn = NULL; | ||
1190 | struct o2nm_node *node; | ||
1191 | int ret = 0, status, count, i; | ||
1192 | |||
1193 | if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) | ||
1194 | goto bail; | ||
1195 | |||
1196 | qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); | ||
1197 | if (!qn) { | ||
1198 | ret = -ENOMEM; | ||
1199 | mlog_errno(ret); | ||
1200 | goto bail; | ||
1201 | } | ||
1202 | |||
1203 | for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { | ||
1204 | node = o2nm_get_node_by_num(i); | ||
1205 | if (!node) | ||
1206 | continue; | ||
1207 | qn->qn_nodes[count].ni_nodenum = node->nd_num; | ||
1208 | qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; | ||
1209 | qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; | ||
1210 | mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, | ||
1211 | &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); | ||
1212 | ++count; | ||
1213 | o2nm_node_put(node); | ||
1214 | } | ||
1215 | |||
1216 | qn->qn_nodenum = dlm->node_num; | ||
1217 | qn->qn_numnodes = count; | ||
1218 | qn->qn_namelen = strlen(dlm->name); | ||
1219 | memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); | ||
1220 | |||
1221 | i = -1; | ||
1222 | while ((i = find_next_bit(node_map, O2NM_MAX_NODES, | ||
1223 | i + 1)) < O2NM_MAX_NODES) { | ||
1224 | if (i == dlm->node_num) | ||
1225 | continue; | ||
1226 | |||
1227 | mlog(0, "Sending nodeinfo to node %d\n", i); | ||
1228 | |||
1229 | ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
1230 | qn, sizeof(struct dlm_query_nodeinfo), | ||
1231 | i, &status); | ||
1232 | if (ret >= 0) | ||
1233 | ret = status; | ||
1234 | if (ret) { | ||
1235 | mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); | ||
1236 | break; | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | bail: | ||
1241 | kfree(qn); | ||
1242 | return ret; | ||
1243 | } | ||
1244 | |||
1245 | static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, | ||
1246 | void *data, void **ret_data) | ||
1247 | { | ||
1248 | struct dlm_query_nodeinfo *qn; | ||
1249 | struct dlm_ctxt *dlm = NULL; | ||
1250 | int locked = 0, status = -EINVAL; | ||
1251 | |||
1252 | qn = (struct dlm_query_nodeinfo *) msg->buf; | ||
1253 | |||
1254 | mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, | ||
1255 | qn->qn_domain); | ||
1256 | |||
1257 | spin_lock(&dlm_domain_lock); | ||
1258 | dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); | ||
1259 | if (!dlm) { | ||
1260 | mlog(ML_ERROR, "Node %d queried nodes on domain %s before " | ||
1261 | "join domain\n", qn->qn_nodenum, qn->qn_domain); | ||
1262 | goto bail; | ||
1263 | } | ||
1264 | |||
1265 | spin_lock(&dlm->spinlock); | ||
1266 | locked = 1; | ||
1267 | if (dlm->joining_node != qn->qn_nodenum) { | ||
1268 | mlog(ML_ERROR, "Node %d queried nodes on domain %s but " | ||
1269 | "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, | ||
1270 | dlm->joining_node); | ||
1271 | goto bail; | ||
1272 | } | ||
1273 | |||
1274 | /* Support for node query was added in 1.1 */ | ||
1275 | if (dlm->dlm_locking_proto.pv_major == 1 && | ||
1276 | dlm->dlm_locking_proto.pv_minor == 0) { | ||
1277 | mlog(ML_ERROR, "Node %d queried nodes on domain %s " | ||
1278 | "but active dlm protocol is %d.%d\n", qn->qn_nodenum, | ||
1279 | qn->qn_domain, dlm->dlm_locking_proto.pv_major, | ||
1280 | dlm->dlm_locking_proto.pv_minor); | ||
1281 | goto bail; | ||
1282 | } | ||
1283 | |||
1284 | status = dlm_match_nodes(dlm, qn); | ||
1285 | |||
1286 | bail: | ||
1287 | if (locked) | ||
1288 | spin_unlock(&dlm->spinlock); | ||
1289 | spin_unlock(&dlm_domain_lock); | ||
1290 | |||
1291 | return status; | ||
1292 | } | ||
1293 | |||
923 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, | 1294 | static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, |
924 | void **ret_data) | 1295 | void **ret_data) |
925 | { | 1296 | { |
@@ -1240,6 +1611,20 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) | |||
1240 | set_bit(dlm->node_num, dlm->domain_map); | 1611 | set_bit(dlm->node_num, dlm->domain_map); |
1241 | spin_unlock(&dlm->spinlock); | 1612 | spin_unlock(&dlm->spinlock); |
1242 | 1613 | ||
1614 | /* Support for global heartbeat and node info was added in 1.1 */ | ||
1615 | if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { | ||
1616 | status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); | ||
1617 | if (status) { | ||
1618 | mlog_errno(status); | ||
1619 | goto bail; | ||
1620 | } | ||
1621 | status = dlm_send_regions(dlm, ctxt->yes_resp_map); | ||
1622 | if (status) { | ||
1623 | mlog_errno(status); | ||
1624 | goto bail; | ||
1625 | } | ||
1626 | } | ||
1627 | |||
1243 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); | 1628 | dlm_send_join_asserts(dlm, ctxt->yes_resp_map); |
1244 | 1629 | ||
1245 | /* Joined state *must* be set before the joining node | 1630 | /* Joined state *must* be set before the joining node |
@@ -1806,7 +2191,21 @@ static int dlm_register_net_handlers(void) | |||
1806 | sizeof(struct dlm_cancel_join), | 2191 | sizeof(struct dlm_cancel_join), |
1807 | dlm_cancel_join_handler, | 2192 | dlm_cancel_join_handler, |
1808 | NULL, NULL, &dlm_join_handlers); | 2193 | NULL, NULL, &dlm_join_handlers); |
2194 | if (status) | ||
2195 | goto bail; | ||
2196 | |||
2197 | status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, | ||
2198 | sizeof(struct dlm_query_region), | ||
2199 | dlm_query_region_handler, | ||
2200 | NULL, NULL, &dlm_join_handlers); | ||
1809 | 2201 | ||
2202 | if (status) | ||
2203 | goto bail; | ||
2204 | |||
2205 | status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, | ||
2206 | sizeof(struct dlm_query_nodeinfo), | ||
2207 | dlm_query_nodeinfo_handler, | ||
2208 | NULL, NULL, &dlm_join_handlers); | ||
1810 | bail: | 2209 | bail: |
1811 | if (status < 0) | 2210 | if (status < 0) |
1812 | dlm_unregister_net_handlers(); | 2211 | dlm_unregister_net_handlers(); |
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68dafa4..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c | |||
@@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, | |||
3433 | wake_up(&res->wq); | 3433 | wake_up(&res->wq); |
3434 | wake_up(&dlm->migration_wq); | 3434 | wake_up(&dlm->migration_wq); |
3435 | } | 3435 | } |
3436 | |||
3437 | void dlm_force_free_mles(struct dlm_ctxt *dlm) | ||
3438 | { | ||
3439 | int i; | ||
3440 | struct hlist_head *bucket; | ||
3441 | struct dlm_master_list_entry *mle; | ||
3442 | struct hlist_node *tmp, *list; | ||
3443 | |||
3444 | /* | ||
3445 | * We notified all other nodes that we are exiting the domain and | ||
3446 | * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still | ||
3447 | * around we force free them and wake any processes that are waiting | ||
3448 | * on the mles | ||
3449 | */ | ||
3450 | spin_lock(&dlm->spinlock); | ||
3451 | spin_lock(&dlm->master_lock); | ||
3452 | |||
3453 | BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); | ||
3454 | BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); | ||
3455 | |||
3456 | for (i = 0; i < DLM_HASH_BUCKETS; i++) { | ||
3457 | bucket = dlm_master_hash(dlm, i); | ||
3458 | hlist_for_each_safe(list, tmp, bucket) { | ||
3459 | mle = hlist_entry(list, struct dlm_master_list_entry, | ||
3460 | master_hash_node); | ||
3461 | if (mle->type != DLM_MLE_BLOCK) { | ||
3462 | mlog(ML_ERROR, "bad mle: %p\n", mle); | ||
3463 | dlm_print_one_mle(mle); | ||
3464 | } | ||
3465 | atomic_set(&mle->woken, 1); | ||
3466 | wake_up(&mle->wq); | ||
3467 | |||
3468 | __dlm_unlink_mle(dlm, mle); | ||
3469 | __dlm_mle_detach_hb_events(dlm, mle); | ||
3470 | __dlm_put_mle(mle); | ||
3471 | } | ||
3472 | } | ||
3473 | spin_unlock(&dlm->master_lock); | ||
3474 | spin_unlock(&dlm->spinlock); | ||
3475 | } | ||
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index c2903b84bb7a..75e115f1bd73 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c | |||
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) | |||
400 | if (inode) { | 400 | if (inode) { |
401 | ip = DLMFS_I(inode); | 401 | ip = DLMFS_I(inode); |
402 | 402 | ||
403 | inode->i_ino = get_next_ino(); | ||
403 | inode->i_mode = mode; | 404 | inode->i_mode = mode; |
404 | inode->i_uid = current_fsuid(); | 405 | inode->i_uid = current_fsuid(); |
405 | inode->i_gid = current_fsgid(); | 406 | inode->i_gid = current_fsgid(); |
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, | |||
425 | if (!inode) | 426 | if (!inode) |
426 | return NULL; | 427 | return NULL; |
427 | 428 | ||
429 | inode->i_ino = get_next_ino(); | ||
428 | inode->i_mode = mode; | 430 | inode->i_mode = mode; |
429 | inode->i_uid = current_fsuid(); | 431 | inode->i_uid = current_fsuid(); |
430 | inode->i_gid = current_fsgid(); | 432 | inode->i_gid = current_fsgid(); |
@@ -612,6 +614,7 @@ static const struct file_operations dlmfs_file_operations = { | |||
612 | .poll = dlmfs_file_poll, | 614 | .poll = dlmfs_file_poll, |
613 | .read = dlmfs_file_read, | 615 | .read = dlmfs_file_read, |
614 | .write = dlmfs_file_write, | 616 | .write = dlmfs_file_write, |
617 | .llseek = default_llseek, | ||
615 | }; | 618 | }; |
616 | 619 | ||
617 | static const struct inode_operations dlmfs_dir_inode_operations = { | 620 | static const struct inode_operations dlmfs_dir_inode_operations = { |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 5e02a893f46e..e8d94d722ecb 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3635,10 +3635,18 @@ static int ocfs2_data_convert_worker(struct ocfs2_lock_res *lockres, | |||
3635 | { | 3635 | { |
3636 | struct inode *inode; | 3636 | struct inode *inode; |
3637 | struct address_space *mapping; | 3637 | struct address_space *mapping; |
3638 | struct ocfs2_inode_info *oi; | ||
3638 | 3639 | ||
3639 | inode = ocfs2_lock_res_inode(lockres); | 3640 | inode = ocfs2_lock_res_inode(lockres); |
3640 | mapping = inode->i_mapping; | 3641 | mapping = inode->i_mapping; |
3641 | 3642 | ||
3643 | if (S_ISDIR(inode->i_mode)) { | ||
3644 | oi = OCFS2_I(inode); | ||
3645 | oi->ip_dir_lock_gen++; | ||
3646 | mlog(0, "generation: %u\n", oi->ip_dir_lock_gen); | ||
3647 | goto out; | ||
3648 | } | ||
3649 | |||
3642 | if (!S_ISREG(inode->i_mode)) | 3650 | if (!S_ISREG(inode->i_mode)) |
3643 | goto out; | 3651 | goto out; |
3644 | 3652 | ||
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -84,6 +84,7 @@ enum { | |||
84 | OI_LS_PARENT, | 84 | OI_LS_PARENT, |
85 | OI_LS_RENAME1, | 85 | OI_LS_RENAME1, |
86 | OI_LS_RENAME2, | 86 | OI_LS_RENAME2, |
87 | OI_LS_REFLINK_TARGET, | ||
87 | }; | 88 | }; |
88 | 89 | ||
89 | int ocfs2_dlm_init(struct ocfs2_super *osb); | 90 | int ocfs2_dlm_init(struct ocfs2_super *osb); |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 9a03c151b5ce..77b4c04a2809 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -64,12 +64,6 @@ | |||
64 | 64 | ||
65 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
66 | 66 | ||
67 | static int ocfs2_sync_inode(struct inode *inode) | ||
68 | { | ||
69 | filemap_fdatawrite(inode->i_mapping); | ||
70 | return sync_mapping_buffers(inode->i_mapping); | ||
71 | } | ||
72 | |||
73 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) | 67 | static int ocfs2_init_file_private(struct inode *inode, struct file *file) |
74 | { | 68 | { |
75 | struct ocfs2_file_private *fp; | 69 | struct ocfs2_file_private *fp; |
@@ -180,16 +174,12 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
180 | { | 174 | { |
181 | int err = 0; | 175 | int err = 0; |
182 | journal_t *journal; | 176 | journal_t *journal; |
183 | struct dentry *dentry = file->f_path.dentry; | ||
184 | struct inode *inode = file->f_mapping->host; | 177 | struct inode *inode = file->f_mapping->host; |
185 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 178 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
186 | 179 | ||
187 | mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", file, dentry, datasync, | 180 | mlog_entry("(0x%p, %d, 0x%p, '%.*s')\n", file, datasync, |
188 | dentry->d_name.len, dentry->d_name.name); | 181 | file->f_path.dentry, file->f_path.dentry->d_name.len, |
189 | 182 | file->f_path.dentry->d_name.name); | |
190 | err = ocfs2_sync_inode(dentry->d_inode); | ||
191 | if (err) | ||
192 | goto bail; | ||
193 | 183 | ||
194 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 184 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { |
195 | /* | 185 | /* |
@@ -197,8 +187,7 @@ static int ocfs2_sync_file(struct file *file, int datasync) | |||
197 | * platter | 187 | * platter |
198 | */ | 188 | */ |
199 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 189 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
200 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, | 190 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
201 | NULL, BLKDEV_IFL_WAIT); | ||
202 | goto bail; | 191 | goto bail; |
203 | } | 192 | } |
204 | 193 | ||
@@ -370,7 +359,7 @@ static int ocfs2_cow_file_pos(struct inode *inode, | |||
370 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) | 359 | if (!(ext_flags & OCFS2_EXT_REFCOUNTED)) |
371 | goto out; | 360 | goto out; |
372 | 361 | ||
373 | return ocfs2_refcount_cow(inode, fe_bh, cpos, 1, cpos+1); | 362 | return ocfs2_refcount_cow(inode, NULL, fe_bh, cpos, 1, cpos+1); |
374 | 363 | ||
375 | out: | 364 | out: |
376 | return status; | 365 | return status; |
@@ -807,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
807 | block_end = block_start + (1 << inode->i_blkbits); | 796 | block_end = block_start + (1 << inode->i_blkbits); |
808 | 797 | ||
809 | /* | 798 | /* |
810 | * block_start is block-aligned. Bump it by one to | 799 | * block_start is block-aligned. Bump it by one to force |
811 | * force ocfs2_{prepare,commit}_write() to zero the | 800 | * __block_write_begin and block_commit_write to zero the |
812 | * whole block. | 801 | * whole block. |
813 | */ | 802 | */ |
814 | ret = ocfs2_prepare_write_nolock(inode, page, | 803 | ret = __block_write_begin(page, block_start + 1, 0, |
815 | block_start + 1, | 804 | ocfs2_get_block); |
816 | block_start + 1); | ||
817 | if (ret < 0) { | 805 | if (ret < 0) { |
818 | mlog_errno(ret); | 806 | mlog_errno(ret); |
819 | goto out_unlock; | 807 | goto out_unlock; |
@@ -913,8 +901,8 @@ static int ocfs2_zero_extend_get_range(struct inode *inode, | |||
913 | zero_clusters = last_cpos - zero_cpos; | 901 | zero_clusters = last_cpos - zero_cpos; |
914 | 902 | ||
915 | if (needs_cow) { | 903 | if (needs_cow) { |
916 | rc = ocfs2_refcount_cow(inode, di_bh, zero_cpos, zero_clusters, | 904 | rc = ocfs2_refcount_cow(inode, NULL, di_bh, zero_cpos, |
917 | UINT_MAX); | 905 | zero_clusters, UINT_MAX); |
918 | if (rc) { | 906 | if (rc) { |
919 | mlog_errno(rc); | 907 | mlog_errno(rc); |
920 | goto out; | 908 | goto out; |
@@ -2062,6 +2050,7 @@ out: | |||
2062 | } | 2050 | } |
2063 | 2051 | ||
2064 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | 2052 | static int ocfs2_prepare_inode_for_refcount(struct inode *inode, |
2053 | struct file *file, | ||
2065 | loff_t pos, size_t count, | 2054 | loff_t pos, size_t count, |
2066 | int *meta_level) | 2055 | int *meta_level) |
2067 | { | 2056 | { |
@@ -2079,7 +2068,7 @@ static int ocfs2_prepare_inode_for_refcount(struct inode *inode, | |||
2079 | 2068 | ||
2080 | *meta_level = 1; | 2069 | *meta_level = 1; |
2081 | 2070 | ||
2082 | ret = ocfs2_refcount_cow(inode, di_bh, cpos, clusters, UINT_MAX); | 2071 | ret = ocfs2_refcount_cow(inode, file, di_bh, cpos, clusters, UINT_MAX); |
2083 | if (ret) | 2072 | if (ret) |
2084 | mlog_errno(ret); | 2073 | mlog_errno(ret); |
2085 | out: | 2074 | out: |
@@ -2087,7 +2076,7 @@ out: | |||
2087 | return ret; | 2076 | return ret; |
2088 | } | 2077 | } |
2089 | 2078 | ||
2090 | static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | 2079 | static int ocfs2_prepare_inode_for_write(struct file *file, |
2091 | loff_t *ppos, | 2080 | loff_t *ppos, |
2092 | size_t count, | 2081 | size_t count, |
2093 | int appending, | 2082 | int appending, |
@@ -2095,6 +2084,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2095 | int *has_refcount) | 2084 | int *has_refcount) |
2096 | { | 2085 | { |
2097 | int ret = 0, meta_level = 0; | 2086 | int ret = 0, meta_level = 0; |
2087 | struct dentry *dentry = file->f_path.dentry; | ||
2098 | struct inode *inode = dentry->d_inode; | 2088 | struct inode *inode = dentry->d_inode; |
2099 | loff_t saved_pos, end; | 2089 | loff_t saved_pos, end; |
2100 | 2090 | ||
@@ -2150,6 +2140,7 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry, | |||
2150 | meta_level = -1; | 2140 | meta_level = -1; |
2151 | 2141 | ||
2152 | ret = ocfs2_prepare_inode_for_refcount(inode, | 2142 | ret = ocfs2_prepare_inode_for_refcount(inode, |
2143 | file, | ||
2153 | saved_pos, | 2144 | saved_pos, |
2154 | count, | 2145 | count, |
2155 | &meta_level); | 2146 | &meta_level); |
@@ -2232,6 +2223,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, | |||
2232 | struct file *file = iocb->ki_filp; | 2223 | struct file *file = iocb->ki_filp; |
2233 | struct inode *inode = file->f_path.dentry->d_inode; | 2224 | struct inode *inode = file->f_path.dentry->d_inode; |
2234 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 2225 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
2226 | int full_coherency = !(osb->s_mount_opt & | ||
2227 | OCFS2_MOUNT_COHERENCY_BUFFERED); | ||
2235 | 2228 | ||
2236 | mlog_entry("(0x%p, %u, '%.*s')\n", file, | 2229 | mlog_entry("(0x%p, %u, '%.*s')\n", file, |
2237 | (unsigned int)nr_segs, | 2230 | (unsigned int)nr_segs, |
@@ -2255,16 +2248,39 @@ relock: | |||
2255 | have_alloc_sem = 1; | 2248 | have_alloc_sem = 1; |
2256 | } | 2249 | } |
2257 | 2250 | ||
2258 | /* concurrent O_DIRECT writes are allowed */ | 2251 | /* |
2259 | rw_level = !direct_io; | 2252 | * Concurrent O_DIRECT writes are allowed with |
2253 | * mount_option "coherency=buffered". | ||
2254 | */ | ||
2255 | rw_level = (!direct_io || full_coherency); | ||
2256 | |||
2260 | ret = ocfs2_rw_lock(inode, rw_level); | 2257 | ret = ocfs2_rw_lock(inode, rw_level); |
2261 | if (ret < 0) { | 2258 | if (ret < 0) { |
2262 | mlog_errno(ret); | 2259 | mlog_errno(ret); |
2263 | goto out_sems; | 2260 | goto out_sems; |
2264 | } | 2261 | } |
2265 | 2262 | ||
2263 | /* | ||
2264 | * O_DIRECT writes with "coherency=full" need to take EX cluster | ||
2265 | * inode_lock to guarantee coherency. | ||
2266 | */ | ||
2267 | if (direct_io && full_coherency) { | ||
2268 | /* | ||
2269 | * We need to take and drop the inode lock to force | ||
2270 | * other nodes to drop their caches. Buffered I/O | ||
2271 | * already does this in write_begin(). | ||
2272 | */ | ||
2273 | ret = ocfs2_inode_lock(inode, NULL, 1); | ||
2274 | if (ret < 0) { | ||
2275 | mlog_errno(ret); | ||
2276 | goto out_sems; | ||
2277 | } | ||
2278 | |||
2279 | ocfs2_inode_unlock(inode, 1); | ||
2280 | } | ||
2281 | |||
2266 | can_do_direct = direct_io; | 2282 | can_do_direct = direct_io; |
2267 | ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos, | 2283 | ret = ocfs2_prepare_inode_for_write(file, ppos, |
2268 | iocb->ki_left, appending, | 2284 | iocb->ki_left, appending, |
2269 | &can_do_direct, &has_refcount); | 2285 | &can_do_direct, &has_refcount); |
2270 | if (ret < 0) { | 2286 | if (ret < 0) { |
@@ -2312,17 +2328,6 @@ relock: | |||
2312 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, | 2328 | written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, |
2313 | ppos, count, ocount); | 2329 | ppos, count, ocount); |
2314 | if (written < 0) { | 2330 | if (written < 0) { |
2315 | /* | ||
2316 | * direct write may have instantiated a few | ||
2317 | * blocks outside i_size. Trim these off again. | ||
2318 | * Don't need i_size_read because we hold i_mutex. | ||
2319 | * | ||
2320 | * XXX(truncate): this looks buggy because ocfs2 did not | ||
2321 | * actually implement ->truncate. Take a look at | ||
2322 | * the new truncate sequence and update this accordingly | ||
2323 | */ | ||
2324 | if (*ppos + count > inode->i_size) | ||
2325 | truncate_setsize(inode, inode->i_size); | ||
2326 | ret = written; | 2331 | ret = written; |
2327 | goto out_dio; | 2332 | goto out_dio; |
2328 | } | 2333 | } |
@@ -2394,7 +2399,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, | |||
2394 | { | 2399 | { |
2395 | int ret; | 2400 | int ret; |
2396 | 2401 | ||
2397 | ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos, | 2402 | ret = ocfs2_prepare_inode_for_write(out, &sd->pos, |
2398 | sd->total_len, 0, NULL, NULL); | 2403 | sd->total_len, 0, NULL, NULL); |
2399 | if (ret < 0) { | 2404 | if (ret < 0) { |
2400 | mlog_errno(ret); | 2405 | mlog_errno(ret); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index eece3e05d9d0..f935fd6600dd 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -335,6 +335,7 @@ void ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
335 | else | 335 | else |
336 | inode->i_fop = &ocfs2_dops_no_plocks; | 336 | inode->i_fop = &ocfs2_dops_no_plocks; |
337 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 337 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
338 | OCFS2_I(inode)->ip_dir_lock_gen = 1; | ||
338 | break; | 339 | break; |
339 | case S_IFLNK: | 340 | case S_IFLNK: |
340 | if (ocfs2_inode_is_fast_symlink(inode)) | 341 | if (ocfs2_inode_is_fast_symlink(inode)) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 6de5a869db30..1c508b149b3a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -46,30 +46,28 @@ struct ocfs2_inode_info | |||
46 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
47 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
48 | u32 ip_open_count; | 48 | u32 ip_open_count; |
49 | u32 ip_clusters; | ||
50 | struct list_head ip_io_markers; | 49 | struct list_head ip_io_markers; |
50 | u32 ip_clusters; | ||
51 | 51 | ||
52 | u16 ip_dyn_features; | ||
52 | struct mutex ip_io_mutex; | 53 | struct mutex ip_io_mutex; |
53 | |||
54 | u32 ip_flags; /* see below */ | 54 | u32 ip_flags; /* see below */ |
55 | u32 ip_attr; /* inode attributes */ | 55 | u32 ip_attr; /* inode attributes */ |
56 | u16 ip_dyn_features; | ||
57 | 56 | ||
58 | /* protected by recovery_lock. */ | 57 | /* protected by recovery_lock. */ |
59 | struct inode *ip_next_orphan; | 58 | struct inode *ip_next_orphan; |
60 | 59 | ||
61 | u32 ip_dir_start_lookup; | ||
62 | |||
63 | struct ocfs2_caching_info ip_metadata_cache; | 60 | struct ocfs2_caching_info ip_metadata_cache; |
64 | |||
65 | struct ocfs2_extent_map ip_extent_map; | 61 | struct ocfs2_extent_map ip_extent_map; |
66 | |||
67 | struct inode vfs_inode; | 62 | struct inode vfs_inode; |
68 | struct jbd2_inode ip_jinode; | 63 | struct jbd2_inode ip_jinode; |
69 | 64 | ||
65 | u32 ip_dir_start_lookup; | ||
66 | |||
70 | /* Only valid if the inode is the dir. */ | 67 | /* Only valid if the inode is the dir. */ |
71 | u32 ip_last_used_slot; | 68 | u32 ip_last_used_slot; |
72 | u64 ip_last_used_group; | 69 | u64 ip_last_used_group; |
70 | u32 ip_dir_lock_gen; | ||
73 | 71 | ||
74 | struct ocfs2_alloc_reservation ip_la_data_resv; | 72 | struct ocfs2_alloc_reservation ip_la_data_resv; |
75 | }; | 73 | }; |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7d9d9c132cef..7a4868196152 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -26,6 +26,26 @@ | |||
26 | 26 | ||
27 | #include <linux/ext2_fs.h> | 27 | #include <linux/ext2_fs.h> |
28 | 28 | ||
29 | #define o2info_from_user(a, b) \ | ||
30 | copy_from_user(&(a), (b), sizeof(a)) | ||
31 | #define o2info_to_user(a, b) \ | ||
32 | copy_to_user((typeof(a) __user *)b, &(a), sizeof(a)) | ||
33 | |||
34 | /* | ||
35 | * This call is void because we are already reporting an error that may | ||
36 | * be -EFAULT. The error will be returned from the ioctl(2) call. It's | ||
37 | * just a best-effort to tell userspace that this request caused the error. | ||
38 | */ | ||
39 | static inline void __o2info_set_request_error(struct ocfs2_info_request *kreq, | ||
40 | struct ocfs2_info_request __user *req) | ||
41 | { | ||
42 | kreq->ir_flags |= OCFS2_INFO_FL_ERROR; | ||
43 | (void)put_user(kreq->ir_flags, (__u32 __user *)&(req->ir_flags)); | ||
44 | } | ||
45 | |||
46 | #define o2info_set_request_error(a, b) \ | ||
47 | __o2info_set_request_error((struct ocfs2_info_request *)&(a), b) | ||
48 | |||
29 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) | 49 | static int ocfs2_get_inode_attr(struct inode *inode, unsigned *flags) |
30 | { | 50 | { |
31 | int status; | 51 | int status; |
@@ -109,6 +129,328 @@ bail: | |||
109 | return status; | 129 | return status; |
110 | } | 130 | } |
111 | 131 | ||
132 | int ocfs2_info_handle_blocksize(struct inode *inode, | ||
133 | struct ocfs2_info_request __user *req) | ||
134 | { | ||
135 | int status = -EFAULT; | ||
136 | struct ocfs2_info_blocksize oib; | ||
137 | |||
138 | if (o2info_from_user(oib, req)) | ||
139 | goto bail; | ||
140 | |||
141 | oib.ib_blocksize = inode->i_sb->s_blocksize; | ||
142 | oib.ib_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
143 | |||
144 | if (o2info_to_user(oib, req)) | ||
145 | goto bail; | ||
146 | |||
147 | status = 0; | ||
148 | bail: | ||
149 | if (status) | ||
150 | o2info_set_request_error(oib, req); | ||
151 | |||
152 | return status; | ||
153 | } | ||
154 | |||
155 | int ocfs2_info_handle_clustersize(struct inode *inode, | ||
156 | struct ocfs2_info_request __user *req) | ||
157 | { | ||
158 | int status = -EFAULT; | ||
159 | struct ocfs2_info_clustersize oic; | ||
160 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
161 | |||
162 | if (o2info_from_user(oic, req)) | ||
163 | goto bail; | ||
164 | |||
165 | oic.ic_clustersize = osb->s_clustersize; | ||
166 | oic.ic_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
167 | |||
168 | if (o2info_to_user(oic, req)) | ||
169 | goto bail; | ||
170 | |||
171 | status = 0; | ||
172 | bail: | ||
173 | if (status) | ||
174 | o2info_set_request_error(oic, req); | ||
175 | |||
176 | return status; | ||
177 | } | ||
178 | |||
179 | int ocfs2_info_handle_maxslots(struct inode *inode, | ||
180 | struct ocfs2_info_request __user *req) | ||
181 | { | ||
182 | int status = -EFAULT; | ||
183 | struct ocfs2_info_maxslots oim; | ||
184 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
185 | |||
186 | if (o2info_from_user(oim, req)) | ||
187 | goto bail; | ||
188 | |||
189 | oim.im_max_slots = osb->max_slots; | ||
190 | oim.im_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
191 | |||
192 | if (o2info_to_user(oim, req)) | ||
193 | goto bail; | ||
194 | |||
195 | status = 0; | ||
196 | bail: | ||
197 | if (status) | ||
198 | o2info_set_request_error(oim, req); | ||
199 | |||
200 | return status; | ||
201 | } | ||
202 | |||
203 | int ocfs2_info_handle_label(struct inode *inode, | ||
204 | struct ocfs2_info_request __user *req) | ||
205 | { | ||
206 | int status = -EFAULT; | ||
207 | struct ocfs2_info_label oil; | ||
208 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
209 | |||
210 | if (o2info_from_user(oil, req)) | ||
211 | goto bail; | ||
212 | |||
213 | memcpy(oil.il_label, osb->vol_label, OCFS2_MAX_VOL_LABEL_LEN); | ||
214 | oil.il_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
215 | |||
216 | if (o2info_to_user(oil, req)) | ||
217 | goto bail; | ||
218 | |||
219 | status = 0; | ||
220 | bail: | ||
221 | if (status) | ||
222 | o2info_set_request_error(oil, req); | ||
223 | |||
224 | return status; | ||
225 | } | ||
226 | |||
227 | int ocfs2_info_handle_uuid(struct inode *inode, | ||
228 | struct ocfs2_info_request __user *req) | ||
229 | { | ||
230 | int status = -EFAULT; | ||
231 | struct ocfs2_info_uuid oiu; | ||
232 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
233 | |||
234 | if (o2info_from_user(oiu, req)) | ||
235 | goto bail; | ||
236 | |||
237 | memcpy(oiu.iu_uuid_str, osb->uuid_str, OCFS2_TEXT_UUID_LEN + 1); | ||
238 | oiu.iu_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
239 | |||
240 | if (o2info_to_user(oiu, req)) | ||
241 | goto bail; | ||
242 | |||
243 | status = 0; | ||
244 | bail: | ||
245 | if (status) | ||
246 | o2info_set_request_error(oiu, req); | ||
247 | |||
248 | return status; | ||
249 | } | ||
250 | |||
251 | int ocfs2_info_handle_fs_features(struct inode *inode, | ||
252 | struct ocfs2_info_request __user *req) | ||
253 | { | ||
254 | int status = -EFAULT; | ||
255 | struct ocfs2_info_fs_features oif; | ||
256 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
257 | |||
258 | if (o2info_from_user(oif, req)) | ||
259 | goto bail; | ||
260 | |||
261 | oif.if_compat_features = osb->s_feature_compat; | ||
262 | oif.if_incompat_features = osb->s_feature_incompat; | ||
263 | oif.if_ro_compat_features = osb->s_feature_ro_compat; | ||
264 | oif.if_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
265 | |||
266 | if (o2info_to_user(oif, req)) | ||
267 | goto bail; | ||
268 | |||
269 | status = 0; | ||
270 | bail: | ||
271 | if (status) | ||
272 | o2info_set_request_error(oif, req); | ||
273 | |||
274 | return status; | ||
275 | } | ||
276 | |||
277 | int ocfs2_info_handle_journal_size(struct inode *inode, | ||
278 | struct ocfs2_info_request __user *req) | ||
279 | { | ||
280 | int status = -EFAULT; | ||
281 | struct ocfs2_info_journal_size oij; | ||
282 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
283 | |||
284 | if (o2info_from_user(oij, req)) | ||
285 | goto bail; | ||
286 | |||
287 | oij.ij_journal_size = osb->journal->j_inode->i_size; | ||
288 | |||
289 | oij.ij_req.ir_flags |= OCFS2_INFO_FL_FILLED; | ||
290 | |||
291 | if (o2info_to_user(oij, req)) | ||
292 | goto bail; | ||
293 | |||
294 | status = 0; | ||
295 | bail: | ||
296 | if (status) | ||
297 | o2info_set_request_error(oij, req); | ||
298 | |||
299 | return status; | ||
300 | } | ||
301 | |||
302 | int ocfs2_info_handle_unknown(struct inode *inode, | ||
303 | struct ocfs2_info_request __user *req) | ||
304 | { | ||
305 | int status = -EFAULT; | ||
306 | struct ocfs2_info_request oir; | ||
307 | |||
308 | if (o2info_from_user(oir, req)) | ||
309 | goto bail; | ||
310 | |||
311 | oir.ir_flags &= ~OCFS2_INFO_FL_FILLED; | ||
312 | |||
313 | if (o2info_to_user(oir, req)) | ||
314 | goto bail; | ||
315 | |||
316 | status = 0; | ||
317 | bail: | ||
318 | if (status) | ||
319 | o2info_set_request_error(oir, req); | ||
320 | |||
321 | return status; | ||
322 | } | ||
323 | |||
324 | /* | ||
325 | * Validate and distinguish OCFS2_IOC_INFO requests. | ||
326 | * | ||
327 | * - validate the magic number. | ||
328 | * - distinguish different requests. | ||
329 | * - validate size of different requests. | ||
330 | */ | ||
331 | int ocfs2_info_handle_request(struct inode *inode, | ||
332 | struct ocfs2_info_request __user *req) | ||
333 | { | ||
334 | int status = -EFAULT; | ||
335 | struct ocfs2_info_request oir; | ||
336 | |||
337 | if (o2info_from_user(oir, req)) | ||
338 | goto bail; | ||
339 | |||
340 | status = -EINVAL; | ||
341 | if (oir.ir_magic != OCFS2_INFO_MAGIC) | ||
342 | goto bail; | ||
343 | |||
344 | switch (oir.ir_code) { | ||
345 | case OCFS2_INFO_BLOCKSIZE: | ||
346 | if (oir.ir_size == sizeof(struct ocfs2_info_blocksize)) | ||
347 | status = ocfs2_info_handle_blocksize(inode, req); | ||
348 | break; | ||
349 | case OCFS2_INFO_CLUSTERSIZE: | ||
350 | if (oir.ir_size == sizeof(struct ocfs2_info_clustersize)) | ||
351 | status = ocfs2_info_handle_clustersize(inode, req); | ||
352 | break; | ||
353 | case OCFS2_INFO_MAXSLOTS: | ||
354 | if (oir.ir_size == sizeof(struct ocfs2_info_maxslots)) | ||
355 | status = ocfs2_info_handle_maxslots(inode, req); | ||
356 | break; | ||
357 | case OCFS2_INFO_LABEL: | ||
358 | if (oir.ir_size == sizeof(struct ocfs2_info_label)) | ||
359 | status = ocfs2_info_handle_label(inode, req); | ||
360 | break; | ||
361 | case OCFS2_INFO_UUID: | ||
362 | if (oir.ir_size == sizeof(struct ocfs2_info_uuid)) | ||
363 | status = ocfs2_info_handle_uuid(inode, req); | ||
364 | break; | ||
365 | case OCFS2_INFO_FS_FEATURES: | ||
366 | if (oir.ir_size == sizeof(struct ocfs2_info_fs_features)) | ||
367 | status = ocfs2_info_handle_fs_features(inode, req); | ||
368 | break; | ||
369 | case OCFS2_INFO_JOURNAL_SIZE: | ||
370 | if (oir.ir_size == sizeof(struct ocfs2_info_journal_size)) | ||
371 | status = ocfs2_info_handle_journal_size(inode, req); | ||
372 | break; | ||
373 | default: | ||
374 | status = ocfs2_info_handle_unknown(inode, req); | ||
375 | break; | ||
376 | } | ||
377 | |||
378 | bail: | ||
379 | return status; | ||
380 | } | ||
381 | |||
382 | int ocfs2_get_request_ptr(struct ocfs2_info *info, int idx, | ||
383 | u64 *req_addr, int compat_flag) | ||
384 | { | ||
385 | int status = -EFAULT; | ||
386 | u64 __user *bp = NULL; | ||
387 | |||
388 | if (compat_flag) { | ||
389 | #ifdef CONFIG_COMPAT | ||
390 | /* | ||
391 | * pointer bp stores the base address of a pointers array, | ||
392 | * which collects all addresses of separate request. | ||
393 | */ | ||
394 | bp = (u64 __user *)(unsigned long)compat_ptr(info->oi_requests); | ||
395 | #else | ||
396 | BUG(); | ||
397 | #endif | ||
398 | } else | ||
399 | bp = (u64 __user *)(unsigned long)(info->oi_requests); | ||
400 | |||
401 | if (o2info_from_user(*req_addr, bp + idx)) | ||
402 | goto bail; | ||
403 | |||
404 | status = 0; | ||
405 | bail: | ||
406 | return status; | ||
407 | } | ||
408 | |||
409 | /* | ||
410 | * OCFS2_IOC_INFO handles an array of requests passed from userspace. | ||
411 | * | ||
412 | * ocfs2_info_handle() recevies a large info aggregation, grab and | ||
413 | * validate the request count from header, then break it into small | ||
414 | * pieces, later specific handlers can handle them one by one. | ||
415 | * | ||
416 | * Idea here is to make each separate request small enough to ensure | ||
417 | * a better backward&forward compatibility, since a small piece of | ||
418 | * request will be less likely to be broken if disk layout get changed. | ||
419 | */ | ||
420 | int ocfs2_info_handle(struct inode *inode, struct ocfs2_info *info, | ||
421 | int compat_flag) | ||
422 | { | ||
423 | int i, status = 0; | ||
424 | u64 req_addr; | ||
425 | struct ocfs2_info_request __user *reqp; | ||
426 | |||
427 | if ((info->oi_count > OCFS2_INFO_MAX_REQUEST) || | ||
428 | (!info->oi_requests)) { | ||
429 | status = -EINVAL; | ||
430 | goto bail; | ||
431 | } | ||
432 | |||
433 | for (i = 0; i < info->oi_count; i++) { | ||
434 | |||
435 | status = ocfs2_get_request_ptr(info, i, &req_addr, compat_flag); | ||
436 | if (status) | ||
437 | break; | ||
438 | |||
439 | reqp = (struct ocfs2_info_request *)(unsigned long)req_addr; | ||
440 | if (!reqp) { | ||
441 | status = -EINVAL; | ||
442 | goto bail; | ||
443 | } | ||
444 | |||
445 | status = ocfs2_info_handle_request(inode, reqp); | ||
446 | if (status) | ||
447 | break; | ||
448 | } | ||
449 | |||
450 | bail: | ||
451 | return status; | ||
452 | } | ||
453 | |||
112 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 454 | long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
113 | { | 455 | { |
114 | struct inode *inode = filp->f_path.dentry->d_inode; | 456 | struct inode *inode = filp->f_path.dentry->d_inode; |
@@ -120,6 +462,7 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
120 | struct reflink_arguments args; | 462 | struct reflink_arguments args; |
121 | const char *old_path, *new_path; | 463 | const char *old_path, *new_path; |
122 | bool preserve; | 464 | bool preserve; |
465 | struct ocfs2_info info; | ||
123 | 466 | ||
124 | switch (cmd) { | 467 | switch (cmd) { |
125 | case OCFS2_IOC_GETFLAGS: | 468 | case OCFS2_IOC_GETFLAGS: |
@@ -174,6 +517,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
174 | preserve = (args.preserve != 0); | 517 | preserve = (args.preserve != 0); |
175 | 518 | ||
176 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); | 519 | return ocfs2_reflink_ioctl(inode, old_path, new_path, preserve); |
520 | case OCFS2_IOC_INFO: | ||
521 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
522 | sizeof(struct ocfs2_info))) | ||
523 | return -EFAULT; | ||
524 | |||
525 | return ocfs2_info_handle(inode, &info, 0); | ||
177 | default: | 526 | default: |
178 | return -ENOTTY; | 527 | return -ENOTTY; |
179 | } | 528 | } |
@@ -185,6 +534,7 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
185 | bool preserve; | 534 | bool preserve; |
186 | struct reflink_arguments args; | 535 | struct reflink_arguments args; |
187 | struct inode *inode = file->f_path.dentry->d_inode; | 536 | struct inode *inode = file->f_path.dentry->d_inode; |
537 | struct ocfs2_info info; | ||
188 | 538 | ||
189 | switch (cmd) { | 539 | switch (cmd) { |
190 | case OCFS2_IOC32_GETFLAGS: | 540 | case OCFS2_IOC32_GETFLAGS: |
@@ -209,6 +559,12 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg) | |||
209 | 559 | ||
210 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), | 560 | return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path), |
211 | compat_ptr(args.new_path), preserve); | 561 | compat_ptr(args.new_path), preserve); |
562 | case OCFS2_IOC_INFO: | ||
563 | if (copy_from_user(&info, (struct ocfs2_info __user *)arg, | ||
564 | sizeof(struct ocfs2_info))) | ||
565 | return -EFAULT; | ||
566 | |||
567 | return ocfs2_info_handle(inode, &info, 1); | ||
212 | default: | 568 | default: |
213 | return -ENOIOCTLCMD; | 569 | return -ENOIOCTLCMD; |
214 | } | 570 | } |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9b57c0350ff9..faa2303dbf0a 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -301,7 +301,6 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
301 | { | 301 | { |
302 | int status = 0; | 302 | int status = 0; |
303 | unsigned int flushed; | 303 | unsigned int flushed; |
304 | unsigned long old_id; | ||
305 | struct ocfs2_journal *journal = NULL; | 304 | struct ocfs2_journal *journal = NULL; |
306 | 305 | ||
307 | mlog_entry_void(); | 306 | mlog_entry_void(); |
@@ -326,7 +325,7 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
326 | goto finally; | 325 | goto finally; |
327 | } | 326 | } |
328 | 327 | ||
329 | old_id = ocfs2_inc_trans_id(journal); | 328 | ocfs2_inc_trans_id(journal); |
330 | 329 | ||
331 | flushed = atomic_read(&journal->j_num_trans); | 330 | flushed = atomic_read(&journal->j_num_trans); |
332 | atomic_set(&journal->j_num_trans, 0); | 331 | atomic_set(&journal->j_num_trans, 0); |
@@ -342,9 +341,6 @@ finally: | |||
342 | return status; | 341 | return status; |
343 | } | 342 | } |
344 | 343 | ||
345 | /* pass it NULL and it will allocate a new handle object for you. If | ||
346 | * you pass it a handle however, it may still return error, in which | ||
347 | * case it has free'd the passed handle for you. */ | ||
348 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | 344 | handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) |
349 | { | 345 | { |
350 | journal_t *journal = osb->journal->j_journal; | 346 | journal_t *journal = osb->journal->j_journal; |
@@ -1888,6 +1884,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1888 | 1884 | ||
1889 | os = &osb->osb_orphan_scan; | 1885 | os = &osb->osb_orphan_scan; |
1890 | 1886 | ||
1887 | mlog(0, "Begin orphan scan\n"); | ||
1888 | |||
1891 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) | 1889 | if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) |
1892 | goto out; | 1890 | goto out; |
1893 | 1891 | ||
@@ -1920,6 +1918,7 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) | |||
1920 | unlock: | 1918 | unlock: |
1921 | ocfs2_orphan_scan_unlock(osb, seqno); | 1919 | ocfs2_orphan_scan_unlock(osb, seqno); |
1922 | out: | 1920 | out: |
1921 | mlog(0, "Orphan scan completed\n"); | ||
1923 | return; | 1922 | return; |
1924 | } | 1923 | } |
1925 | 1924 | ||
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index b5baaa8e710f..43e56b97f9c0 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -67,11 +67,12 @@ struct ocfs2_journal { | |||
67 | struct buffer_head *j_bh; /* Journal disk inode block */ | 67 | struct buffer_head *j_bh; /* Journal disk inode block */ |
68 | atomic_t j_num_trans; /* Number of transactions | 68 | atomic_t j_num_trans; /* Number of transactions |
69 | * currently in the system. */ | 69 | * currently in the system. */ |
70 | spinlock_t j_lock; | ||
70 | unsigned long j_trans_id; | 71 | unsigned long j_trans_id; |
71 | struct rw_semaphore j_trans_barrier; | 72 | struct rw_semaphore j_trans_barrier; |
72 | wait_queue_head_t j_checkpointed; | 73 | wait_queue_head_t j_checkpointed; |
73 | 74 | ||
74 | spinlock_t j_lock; | 75 | /* both fields protected by j_lock*/ |
75 | struct list_head j_la_cleanups; | 76 | struct list_head j_la_cleanups; |
76 | struct work_struct j_recovery_work; | 77 | struct work_struct j_recovery_work; |
77 | }; | 78 | }; |
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c index 4c18f4ad93b4..7e32db9c2c99 100644 --- a/fs/ocfs2/mmap.c +++ b/fs/ocfs2/mmap.c | |||
@@ -59,10 +59,11 @@ static int ocfs2_fault(struct vm_area_struct *area, struct vm_fault *vmf) | |||
59 | return ret; | 59 | return ret; |
60 | } | 60 | } |
61 | 61 | ||
62 | static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | 62 | static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh, |
63 | struct page *page) | 63 | struct page *page) |
64 | { | 64 | { |
65 | int ret; | 65 | int ret; |
66 | struct inode *inode = file->f_path.dentry->d_inode; | ||
66 | struct address_space *mapping = inode->i_mapping; | 67 | struct address_space *mapping = inode->i_mapping; |
67 | loff_t pos = page_offset(page); | 68 | loff_t pos = page_offset(page); |
68 | unsigned int len = PAGE_CACHE_SIZE; | 69 | unsigned int len = PAGE_CACHE_SIZE; |
@@ -111,7 +112,7 @@ static int __ocfs2_page_mkwrite(struct inode *inode, struct buffer_head *di_bh, | |||
111 | if (page->index == last_index) | 112 | if (page->index == last_index) |
112 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; | 113 | len = ((size - 1) & ~PAGE_CACHE_MASK) + 1; |
113 | 114 | ||
114 | ret = ocfs2_write_begin_nolock(mapping, pos, len, 0, &locked_page, | 115 | ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page, |
115 | &fsdata, di_bh, page); | 116 | &fsdata, di_bh, page); |
116 | if (ret) { | 117 | if (ret) { |
117 | if (ret != -ENOSPC) | 118 | if (ret != -ENOSPC) |
@@ -159,7 +160,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
159 | */ | 160 | */ |
160 | down_write(&OCFS2_I(inode)->ip_alloc_sem); | 161 | down_write(&OCFS2_I(inode)->ip_alloc_sem); |
161 | 162 | ||
162 | ret = __ocfs2_page_mkwrite(inode, di_bh, page); | 163 | ret = __ocfs2_page_mkwrite(vma->vm_file, di_bh, page); |
163 | 164 | ||
164 | up_write(&OCFS2_I(inode)->ip_alloc_sem); | 165 | up_write(&OCFS2_I(inode)->ip_alloc_sem); |
165 | 166 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a00dda2e4f16..ff5744e1e36f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -171,7 +171,8 @@ bail_add: | |||
171 | ret = ERR_PTR(status); | 171 | ret = ERR_PTR(status); |
172 | goto bail_unlock; | 172 | goto bail_unlock; |
173 | } | 173 | } |
174 | } | 174 | } else |
175 | ocfs2_dentry_attach_gen(dentry); | ||
175 | 176 | ||
176 | bail_unlock: | 177 | bail_unlock: |
177 | /* Don't drop the cluster lock until *after* the d_add -- | 178 | /* Don't drop the cluster lock until *after* the d_add -- |
@@ -741,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry, | |||
741 | goto out_commit; | 742 | goto out_commit; |
742 | } | 743 | } |
743 | 744 | ||
744 | atomic_inc(&inode->i_count); | 745 | ihold(inode); |
745 | dentry->d_op = &ocfs2_dentry_ops; | 746 | dentry->d_op = &ocfs2_dentry_ops; |
746 | d_instantiate(dentry, inode); | 747 | d_instantiate(dentry, inode); |
747 | 748 | ||
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..d8408217e3bd 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -150,26 +150,33 @@ typedef void (*ocfs2_lock_callback)(int status, unsigned long data); | |||
150 | struct ocfs2_lock_res { | 150 | struct ocfs2_lock_res { |
151 | void *l_priv; | 151 | void *l_priv; |
152 | struct ocfs2_lock_res_ops *l_ops; | 152 | struct ocfs2_lock_res_ops *l_ops; |
153 | spinlock_t l_lock; | 153 | |
154 | 154 | ||
155 | struct list_head l_blocked_list; | 155 | struct list_head l_blocked_list; |
156 | struct list_head l_mask_waiters; | 156 | struct list_head l_mask_waiters; |
157 | 157 | ||
158 | enum ocfs2_lock_type l_type; | ||
159 | unsigned long l_flags; | 158 | unsigned long l_flags; |
160 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; | 159 | char l_name[OCFS2_LOCK_ID_MAX_LEN]; |
161 | int l_level; | ||
162 | unsigned int l_ro_holders; | 160 | unsigned int l_ro_holders; |
163 | unsigned int l_ex_holders; | 161 | unsigned int l_ex_holders; |
164 | struct ocfs2_dlm_lksb l_lksb; | 162 | unsigned char l_level; |
163 | |||
164 | /* Data packed - type enum ocfs2_lock_type */ | ||
165 | unsigned char l_type; | ||
165 | 166 | ||
166 | /* used from AST/BAST funcs. */ | 167 | /* used from AST/BAST funcs. */ |
167 | enum ocfs2_ast_action l_action; | 168 | /* Data packed - enum type ocfs2_ast_action */ |
168 | enum ocfs2_unlock_action l_unlock_action; | 169 | unsigned char l_action; |
169 | int l_requested; | 170 | /* Data packed - enum type ocfs2_unlock_action */ |
170 | int l_blocking; | 171 | unsigned char l_unlock_action; |
172 | unsigned char l_requested; | ||
173 | unsigned char l_blocking; | ||
171 | unsigned int l_pending_gen; | 174 | unsigned int l_pending_gen; |
172 | 175 | ||
176 | spinlock_t l_lock; | ||
177 | |||
178 | struct ocfs2_dlm_lksb l_lksb; | ||
179 | |||
173 | wait_queue_head_t l_event; | 180 | wait_queue_head_t l_event; |
174 | 181 | ||
175 | struct list_head l_debug_list; | 182 | struct list_head l_debug_list; |
@@ -243,7 +250,7 @@ enum ocfs2_local_alloc_state | |||
243 | 250 | ||
244 | enum ocfs2_mount_options | 251 | enum ocfs2_mount_options |
245 | { | 252 | { |
246 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ | 253 | OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ |
247 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ | 254 | OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ |
248 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ | 255 | OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ |
249 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 256 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
@@ -256,6 +263,10 @@ enum ocfs2_mount_options | |||
256 | control lists */ | 263 | control lists */ |
257 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ | 264 | OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ |
258 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ | 265 | OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ |
266 | OCFS2_MOUNT_COHERENCY_BUFFERED = 1 << 12, /* Allow concurrent O_DIRECT | ||
267 | writes */ | ||
268 | OCFS2_MOUNT_HB_NONE = 1 << 13, /* No heartbeat */ | ||
269 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | ||
259 | }; | 270 | }; |
260 | 271 | ||
261 | #define OCFS2_OSB_SOFT_RO 0x0001 | 272 | #define OCFS2_OSB_SOFT_RO 0x0001 |
@@ -277,7 +288,8 @@ struct ocfs2_super | |||
277 | struct super_block *sb; | 288 | struct super_block *sb; |
278 | struct inode *root_inode; | 289 | struct inode *root_inode; |
279 | struct inode *sys_root_inode; | 290 | struct inode *sys_root_inode; |
280 | struct inode *system_inodes[NUM_SYSTEM_INODES]; | 291 | struct inode *global_system_inodes[NUM_GLOBAL_SYSTEM_INODES]; |
292 | struct inode **local_system_inodes; | ||
281 | 293 | ||
282 | struct ocfs2_slot_info *slot_info; | 294 | struct ocfs2_slot_info *slot_info; |
283 | 295 | ||
@@ -368,6 +380,8 @@ struct ocfs2_super | |||
368 | struct ocfs2_alloc_stats alloc_stats; | 380 | struct ocfs2_alloc_stats alloc_stats; |
369 | char dev_str[20]; /* "major,minor" of the device */ | 381 | char dev_str[20]; /* "major,minor" of the device */ |
370 | 382 | ||
383 | u8 osb_stackflags; | ||
384 | |||
371 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; | 385 | char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; |
372 | struct ocfs2_cluster_connection *cconn; | 386 | struct ocfs2_cluster_connection *cconn; |
373 | struct ocfs2_lock_res osb_super_lockres; | 387 | struct ocfs2_lock_res osb_super_lockres; |
@@ -601,10 +615,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) | |||
601 | return ret; | 615 | return ret; |
602 | } | 616 | } |
603 | 617 | ||
604 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | 618 | static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) |
605 | { | 619 | { |
606 | return (osb->s_feature_incompat & | 620 | return (osb->s_feature_incompat & |
607 | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); | 621 | (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | |
622 | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); | ||
623 | } | ||
624 | |||
625 | static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) | ||
626 | { | ||
627 | if (ocfs2_clusterinfo_valid(osb) && | ||
628 | memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
629 | OCFS2_STACK_LABEL_LEN)) | ||
630 | return 1; | ||
631 | return 0; | ||
632 | } | ||
633 | |||
634 | static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) | ||
635 | { | ||
636 | if (ocfs2_clusterinfo_valid(osb) && | ||
637 | !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, | ||
638 | OCFS2_STACK_LABEL_LEN)) | ||
639 | return 1; | ||
640 | return 0; | ||
641 | } | ||
642 | |||
643 | static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) | ||
644 | { | ||
645 | return ocfs2_o2cb_stack(osb) && | ||
646 | (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); | ||
608 | } | 647 | } |
609 | 648 | ||
610 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) | 649 | static inline int ocfs2_mount_local(struct ocfs2_super *osb) |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..c2e4f8222e2f 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
@@ -101,7 +101,8 @@ | |||
101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ | 101 | | OCFS2_FEATURE_INCOMPAT_META_ECC \ |
102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | 102 | | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ |
103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ | 103 | | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ |
104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) | 104 | | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ |
105 | | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) | ||
105 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | 106 | #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ |
106 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | 107 | | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ |
107 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) | 108 | | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) |
@@ -170,6 +171,13 @@ | |||
170 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 | 171 | #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 |
171 | 172 | ||
172 | /* | 173 | /* |
174 | * Incompat bit to indicate useable clusterinfo with stackflags for all | ||
175 | * cluster stacks (userspace adnd o2cb). If this bit is set, | ||
176 | * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. | ||
177 | */ | ||
178 | #define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 | ||
179 | |||
180 | /* | ||
173 | * backup superblock flag is used to indicate that this volume | 181 | * backup superblock flag is used to indicate that this volume |
174 | * has backup superblocks. | 182 | * has backup superblocks. |
175 | */ | 183 | */ |
@@ -235,18 +243,31 @@ | |||
235 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) | 243 | #define OCFS2_HAS_REFCOUNT_FL (0x0010) |
236 | 244 | ||
237 | /* Inode attributes, keep in sync with EXT2 */ | 245 | /* Inode attributes, keep in sync with EXT2 */ |
238 | #define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ | 246 | #define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ |
239 | #define OCFS2_UNRM_FL (0x00000002) /* Undelete */ | 247 | #define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ |
240 | #define OCFS2_COMPR_FL (0x00000004) /* Compress file */ | 248 | #define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ |
241 | #define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ | 249 | #define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ |
242 | #define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ | 250 | #define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ |
243 | #define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ | 251 | #define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ |
244 | #define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ | 252 | #define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ |
245 | #define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ | 253 | #define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ |
246 | #define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ | 254 | /* Reserved for compression usage... */ |
247 | 255 | #define OCFS2_DIRTY_FL FS_DIRTY_FL | |
248 | #define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ | 256 | #define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ |
249 | #define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ | 257 | #define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ |
258 | #define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ | ||
259 | /* End compression flags --- maybe not all used */ | ||
260 | #define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ | ||
261 | #define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ | ||
262 | #define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ | ||
263 | #define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ | ||
264 | #define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ | ||
265 | #define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ | ||
266 | #define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ | ||
267 | #define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ | ||
268 | |||
269 | #define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ | ||
270 | #define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ | ||
250 | 271 | ||
251 | /* | 272 | /* |
252 | * Extent record flags (e_node.leaf.flags) | 273 | * Extent record flags (e_node.leaf.flags) |
@@ -279,10 +300,13 @@ | |||
279 | #define OCFS2_VOL_UUID_LEN 16 | 300 | #define OCFS2_VOL_UUID_LEN 16 |
280 | #define OCFS2_MAX_VOL_LABEL_LEN 64 | 301 | #define OCFS2_MAX_VOL_LABEL_LEN 64 |
281 | 302 | ||
282 | /* The alternate, userspace stack fields */ | 303 | /* The cluster stack fields */ |
283 | #define OCFS2_STACK_LABEL_LEN 4 | 304 | #define OCFS2_STACK_LABEL_LEN 4 |
284 | #define OCFS2_CLUSTER_NAME_LEN 16 | 305 | #define OCFS2_CLUSTER_NAME_LEN 16 |
285 | 306 | ||
307 | /* Classic (historically speaking) cluster stack */ | ||
308 | #define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" | ||
309 | |||
286 | /* Journal limits (in bytes) */ | 310 | /* Journal limits (in bytes) */ |
287 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) | 311 | #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) |
288 | 312 | ||
@@ -292,6 +316,11 @@ | |||
292 | */ | 316 | */ |
293 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | 317 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 |
294 | 318 | ||
319 | /* | ||
320 | * Cluster info flags (ocfs2_cluster_info.ci_stackflags) | ||
321 | */ | ||
322 | #define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) | ||
323 | |||
295 | struct ocfs2_system_inode_info { | 324 | struct ocfs2_system_inode_info { |
296 | char *si_name; | 325 | char *si_name; |
297 | int si_iflags; | 326 | int si_iflags; |
@@ -309,6 +338,7 @@ enum { | |||
309 | USER_QUOTA_SYSTEM_INODE, | 338 | USER_QUOTA_SYSTEM_INODE, |
310 | GROUP_QUOTA_SYSTEM_INODE, | 339 | GROUP_QUOTA_SYSTEM_INODE, |
311 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE | 340 | #define OCFS2_LAST_GLOBAL_SYSTEM_INODE GROUP_QUOTA_SYSTEM_INODE |
341 | #define OCFS2_FIRST_LOCAL_SYSTEM_INODE ORPHAN_DIR_SYSTEM_INODE | ||
312 | ORPHAN_DIR_SYSTEM_INODE, | 342 | ORPHAN_DIR_SYSTEM_INODE, |
313 | EXTENT_ALLOC_SYSTEM_INODE, | 343 | EXTENT_ALLOC_SYSTEM_INODE, |
314 | INODE_ALLOC_SYSTEM_INODE, | 344 | INODE_ALLOC_SYSTEM_INODE, |
@@ -317,8 +347,12 @@ enum { | |||
317 | TRUNCATE_LOG_SYSTEM_INODE, | 347 | TRUNCATE_LOG_SYSTEM_INODE, |
318 | LOCAL_USER_QUOTA_SYSTEM_INODE, | 348 | LOCAL_USER_QUOTA_SYSTEM_INODE, |
319 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, | 349 | LOCAL_GROUP_QUOTA_SYSTEM_INODE, |
350 | #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE | ||
320 | NUM_SYSTEM_INODES | 351 | NUM_SYSTEM_INODES |
321 | }; | 352 | }; |
353 | #define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE | ||
354 | #define NUM_LOCAL_SYSTEM_INODES \ | ||
355 | (NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE) | ||
322 | 356 | ||
323 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | 357 | static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { |
324 | /* Global system inodes (single copy) */ | 358 | /* Global system inodes (single copy) */ |
@@ -347,6 +381,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { | |||
347 | /* Parameter passed from mount.ocfs2 to module */ | 381 | /* Parameter passed from mount.ocfs2 to module */ |
348 | #define OCFS2_HB_NONE "heartbeat=none" | 382 | #define OCFS2_HB_NONE "heartbeat=none" |
349 | #define OCFS2_HB_LOCAL "heartbeat=local" | 383 | #define OCFS2_HB_LOCAL "heartbeat=local" |
384 | #define OCFS2_HB_GLOBAL "heartbeat=global" | ||
350 | 385 | ||
351 | /* | 386 | /* |
352 | * OCFS2 directory file types. Only the low 3 bits are used. The | 387 | * OCFS2 directory file types. Only the low 3 bits are used. The |
@@ -553,9 +588,21 @@ struct ocfs2_slot_map_extended { | |||
553 | */ | 588 | */ |
554 | }; | 589 | }; |
555 | 590 | ||
591 | /* | ||
592 | * ci_stackflags is only valid if the incompat bit | ||
593 | * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. | ||
594 | */ | ||
556 | struct ocfs2_cluster_info { | 595 | struct ocfs2_cluster_info { |
557 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; | 596 | /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; |
558 | __le32 ci_reserved; | 597 | union { |
598 | __le32 ci_reserved; | ||
599 | struct { | ||
600 | __u8 ci_stackflags; | ||
601 | __u8 ci_reserved1; | ||
602 | __u8 ci_reserved2; | ||
603 | __u8 ci_reserved3; | ||
604 | }; | ||
605 | }; | ||
559 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; | 606 | /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; |
560 | /*18*/ | 607 | /*18*/ |
561 | }; | 608 | }; |
@@ -592,9 +639,9 @@ struct ocfs2_super_block { | |||
592 | * group header */ | 639 | * group header */ |
593 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 640 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
594 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ | 641 | /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ |
595 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 642 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either |
596 | stack. Only valid | 643 | userspace or clusterinfo |
597 | with INCOMPAT flag. */ | 644 | INCOMPAT flag set. */ |
598 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size | 645 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
599 | for this fs*/ | 646 | for this fs*/ |
600 | __le16 s_reserved0; | 647 | __le16 s_reserved0; |
diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..b46f39bf7438 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h | |||
@@ -23,10 +23,10 @@ | |||
23 | /* | 23 | /* |
24 | * ioctl commands | 24 | * ioctl commands |
25 | */ | 25 | */ |
26 | #define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) | 26 | #define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS |
27 | #define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) | 27 | #define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS |
28 | #define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) | 28 | #define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS |
29 | #define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) | 29 | #define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS |
30 | 30 | ||
31 | /* | 31 | /* |
32 | * Space reservation / allocation / free ioctls and argument structure | 32 | * Space reservation / allocation / free ioctls and argument structure |
@@ -76,4 +76,99 @@ struct reflink_arguments { | |||
76 | }; | 76 | }; |
77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) | 77 | #define OCFS2_IOC_REFLINK _IOW('o', 4, struct reflink_arguments) |
78 | 78 | ||
79 | /* Following definitions dedicated for ocfs2_info_request ioctls. */ | ||
80 | #define OCFS2_INFO_MAX_REQUEST (50) | ||
81 | #define OCFS2_TEXT_UUID_LEN (OCFS2_VOL_UUID_LEN * 2) | ||
82 | |||
83 | /* Magic number of all requests */ | ||
84 | #define OCFS2_INFO_MAGIC (0x4F32494E) | ||
85 | |||
86 | /* | ||
87 | * Always try to separate info request into small pieces to | ||
88 | * guarantee the backward&forward compatibility. | ||
89 | */ | ||
90 | struct ocfs2_info { | ||
91 | __u64 oi_requests; /* Array of __u64 pointers to requests */ | ||
92 | __u32 oi_count; /* Number of requests in info_requests */ | ||
93 | __u32 oi_pad; | ||
94 | }; | ||
95 | |||
96 | struct ocfs2_info_request { | ||
97 | /*00*/ __u32 ir_magic; /* Magic number */ | ||
98 | __u32 ir_code; /* Info request code */ | ||
99 | __u32 ir_size; /* Size of request */ | ||
100 | __u32 ir_flags; /* Request flags */ | ||
101 | /*10*/ /* Request specific fields */ | ||
102 | }; | ||
103 | |||
104 | struct ocfs2_info_clustersize { | ||
105 | struct ocfs2_info_request ic_req; | ||
106 | __u32 ic_clustersize; | ||
107 | __u32 ic_pad; | ||
108 | }; | ||
109 | |||
110 | struct ocfs2_info_blocksize { | ||
111 | struct ocfs2_info_request ib_req; | ||
112 | __u32 ib_blocksize; | ||
113 | __u32 ib_pad; | ||
114 | }; | ||
115 | |||
116 | struct ocfs2_info_maxslots { | ||
117 | struct ocfs2_info_request im_req; | ||
118 | __u32 im_max_slots; | ||
119 | __u32 im_pad; | ||
120 | }; | ||
121 | |||
122 | struct ocfs2_info_label { | ||
123 | struct ocfs2_info_request il_req; | ||
124 | __u8 il_label[OCFS2_MAX_VOL_LABEL_LEN]; | ||
125 | } __attribute__ ((packed)); | ||
126 | |||
127 | struct ocfs2_info_uuid { | ||
128 | struct ocfs2_info_request iu_req; | ||
129 | __u8 iu_uuid_str[OCFS2_TEXT_UUID_LEN + 1]; | ||
130 | } __attribute__ ((packed)); | ||
131 | |||
132 | struct ocfs2_info_fs_features { | ||
133 | struct ocfs2_info_request if_req; | ||
134 | __u32 if_compat_features; | ||
135 | __u32 if_incompat_features; | ||
136 | __u32 if_ro_compat_features; | ||
137 | __u32 if_pad; | ||
138 | }; | ||
139 | |||
140 | struct ocfs2_info_journal_size { | ||
141 | struct ocfs2_info_request ij_req; | ||
142 | __u64 ij_journal_size; | ||
143 | }; | ||
144 | |||
145 | /* Codes for ocfs2_info_request */ | ||
146 | enum ocfs2_info_type { | ||
147 | OCFS2_INFO_CLUSTERSIZE = 1, | ||
148 | OCFS2_INFO_BLOCKSIZE, | ||
149 | OCFS2_INFO_MAXSLOTS, | ||
150 | OCFS2_INFO_LABEL, | ||
151 | OCFS2_INFO_UUID, | ||
152 | OCFS2_INFO_FS_FEATURES, | ||
153 | OCFS2_INFO_JOURNAL_SIZE, | ||
154 | OCFS2_INFO_NUM_TYPES | ||
155 | }; | ||
156 | |||
157 | /* Flags for struct ocfs2_info_request */ | ||
158 | /* Filled by the caller */ | ||
159 | #define OCFS2_INFO_FL_NON_COHERENT (0x00000001) /* Cluster coherency not | ||
160 | required. This is a hint. | ||
161 | It is up to ocfs2 whether | ||
162 | the request can be fulfilled | ||
163 | without locking. */ | ||
164 | /* Filled by ocfs2 */ | ||
165 | #define OCFS2_INFO_FL_FILLED (0x40000000) /* Filesystem understood | ||
166 | this request and | ||
167 | filled in the answer */ | ||
168 | |||
169 | #define OCFS2_INFO_FL_ERROR (0x80000000) /* Error happened during | ||
170 | request handling. */ | ||
171 | |||
172 | #define OCFS2_IOC_INFO _IOR('o', 5, struct ocfs2_info) | ||
173 | |||
79 | #endif /* OCFS2_IOCTL_H */ | 174 | #endif /* OCFS2_IOCTL_H */ |
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 0afeda83120f..b5f9160e93e9 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c | |||
@@ -49,6 +49,7 @@ | |||
49 | 49 | ||
50 | struct ocfs2_cow_context { | 50 | struct ocfs2_cow_context { |
51 | struct inode *inode; | 51 | struct inode *inode; |
52 | struct file *file; | ||
52 | u32 cow_start; | 53 | u32 cow_start; |
53 | u32 cow_len; | 54 | u32 cow_len; |
54 | struct ocfs2_extent_tree data_et; | 55 | struct ocfs2_extent_tree data_et; |
@@ -2932,13 +2933,16 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2932 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); | 2933 | u64 new_block = ocfs2_clusters_to_blocks(sb, new_cluster); |
2933 | struct page *page; | 2934 | struct page *page; |
2934 | pgoff_t page_index; | 2935 | pgoff_t page_index; |
2935 | unsigned int from, to; | 2936 | unsigned int from, to, readahead_pages; |
2936 | loff_t offset, end, map_end; | 2937 | loff_t offset, end, map_end; |
2937 | struct address_space *mapping = context->inode->i_mapping; | 2938 | struct address_space *mapping = context->inode->i_mapping; |
2938 | 2939 | ||
2939 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, | 2940 | mlog(0, "old_cluster %u, new %u, len %u at offset %u\n", old_cluster, |
2940 | new_cluster, new_len, cpos); | 2941 | new_cluster, new_len, cpos); |
2941 | 2942 | ||
2943 | readahead_pages = | ||
2944 | (ocfs2_cow_contig_clusters(sb) << | ||
2945 | OCFS2_SB(sb)->s_clustersize_bits) >> PAGE_CACHE_SHIFT; | ||
2942 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; | 2946 | offset = ((loff_t)cpos) << OCFS2_SB(sb)->s_clustersize_bits; |
2943 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); | 2947 | end = offset + (new_len << OCFS2_SB(sb)->s_clustersize_bits); |
2944 | /* | 2948 | /* |
@@ -2969,6 +2973,14 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle, | |||
2969 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) | 2973 | if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize) |
2970 | BUG_ON(PageDirty(page)); | 2974 | BUG_ON(PageDirty(page)); |
2971 | 2975 | ||
2976 | if (PageReadahead(page) && context->file) { | ||
2977 | page_cache_async_readahead(mapping, | ||
2978 | &context->file->f_ra, | ||
2979 | context->file, | ||
2980 | page, page_index, | ||
2981 | readahead_pages); | ||
2982 | } | ||
2983 | |||
2972 | if (!PageUptodate(page)) { | 2984 | if (!PageUptodate(page)) { |
2973 | ret = block_read_full_page(page, ocfs2_get_block); | 2985 | ret = block_read_full_page(page, ocfs2_get_block); |
2974 | if (ret) { | 2986 | if (ret) { |
@@ -3409,12 +3421,35 @@ static int ocfs2_replace_cow(struct ocfs2_cow_context *context) | |||
3409 | return ret; | 3421 | return ret; |
3410 | } | 3422 | } |
3411 | 3423 | ||
3424 | static void ocfs2_readahead_for_cow(struct inode *inode, | ||
3425 | struct file *file, | ||
3426 | u32 start, u32 len) | ||
3427 | { | ||
3428 | struct address_space *mapping; | ||
3429 | pgoff_t index; | ||
3430 | unsigned long num_pages; | ||
3431 | int cs_bits = OCFS2_SB(inode->i_sb)->s_clustersize_bits; | ||
3432 | |||
3433 | if (!file) | ||
3434 | return; | ||
3435 | |||
3436 | mapping = file->f_mapping; | ||
3437 | num_pages = (len << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3438 | if (!num_pages) | ||
3439 | num_pages = 1; | ||
3440 | |||
3441 | index = ((loff_t)start << cs_bits) >> PAGE_CACHE_SHIFT; | ||
3442 | page_cache_sync_readahead(mapping, &file->f_ra, file, | ||
3443 | index, num_pages); | ||
3444 | } | ||
3445 | |||
3412 | /* | 3446 | /* |
3413 | * Starting at cpos, try to CoW write_len clusters. Don't CoW | 3447 | * Starting at cpos, try to CoW write_len clusters. Don't CoW |
3414 | * past max_cpos. This will stop when it runs into a hole or an | 3448 | * past max_cpos. This will stop when it runs into a hole or an |
3415 | * unrefcounted extent. | 3449 | * unrefcounted extent. |
3416 | */ | 3450 | */ |
3417 | static int ocfs2_refcount_cow_hunk(struct inode *inode, | 3451 | static int ocfs2_refcount_cow_hunk(struct inode *inode, |
3452 | struct file *file, | ||
3418 | struct buffer_head *di_bh, | 3453 | struct buffer_head *di_bh, |
3419 | u32 cpos, u32 write_len, u32 max_cpos) | 3454 | u32 cpos, u32 write_len, u32 max_cpos) |
3420 | { | 3455 | { |
@@ -3443,6 +3478,8 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3443 | 3478 | ||
3444 | BUG_ON(cow_len == 0); | 3479 | BUG_ON(cow_len == 0); |
3445 | 3480 | ||
3481 | ocfs2_readahead_for_cow(inode, file, cow_start, cow_len); | ||
3482 | |||
3446 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); | 3483 | context = kzalloc(sizeof(struct ocfs2_cow_context), GFP_NOFS); |
3447 | if (!context) { | 3484 | if (!context) { |
3448 | ret = -ENOMEM; | 3485 | ret = -ENOMEM; |
@@ -3464,6 +3501,7 @@ static int ocfs2_refcount_cow_hunk(struct inode *inode, | |||
3464 | context->ref_root_bh = ref_root_bh; | 3501 | context->ref_root_bh = ref_root_bh; |
3465 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; | 3502 | context->cow_duplicate_clusters = ocfs2_duplicate_clusters_by_page; |
3466 | context->get_clusters = ocfs2_di_get_clusters; | 3503 | context->get_clusters = ocfs2_di_get_clusters; |
3504 | context->file = file; | ||
3467 | 3505 | ||
3468 | ocfs2_init_dinode_extent_tree(&context->data_et, | 3506 | ocfs2_init_dinode_extent_tree(&context->data_et, |
3469 | INODE_CACHE(inode), di_bh); | 3507 | INODE_CACHE(inode), di_bh); |
@@ -3492,6 +3530,7 @@ out: | |||
3492 | * clusters between cpos and cpos+write_len are safe to modify. | 3530 | * clusters between cpos and cpos+write_len are safe to modify. |
3493 | */ | 3531 | */ |
3494 | int ocfs2_refcount_cow(struct inode *inode, | 3532 | int ocfs2_refcount_cow(struct inode *inode, |
3533 | struct file *file, | ||
3495 | struct buffer_head *di_bh, | 3534 | struct buffer_head *di_bh, |
3496 | u32 cpos, u32 write_len, u32 max_cpos) | 3535 | u32 cpos, u32 write_len, u32 max_cpos) |
3497 | { | 3536 | { |
@@ -3511,7 +3550,7 @@ int ocfs2_refcount_cow(struct inode *inode, | |||
3511 | num_clusters = write_len; | 3550 | num_clusters = write_len; |
3512 | 3551 | ||
3513 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { | 3552 | if (ext_flags & OCFS2_EXT_REFCOUNTED) { |
3514 | ret = ocfs2_refcount_cow_hunk(inode, di_bh, cpos, | 3553 | ret = ocfs2_refcount_cow_hunk(inode, file, di_bh, cpos, |
3515 | num_clusters, max_cpos); | 3554 | num_clusters, max_cpos); |
3516 | if (ret) { | 3555 | if (ret) { |
3517 | mlog_errno(ret); | 3556 | mlog_errno(ret); |
@@ -4201,8 +4240,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, | |||
4201 | goto out; | 4240 | goto out; |
4202 | } | 4241 | } |
4203 | 4242 | ||
4204 | mutex_lock(&new_inode->i_mutex); | 4243 | mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); |
4205 | ret = ocfs2_inode_lock(new_inode, &new_bh, 1); | 4244 | ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, |
4245 | OI_LS_REFLINK_TARGET); | ||
4206 | if (ret) { | 4246 | if (ret) { |
4207 | mlog_errno(ret); | 4247 | mlog_errno(ret); |
4208 | goto out_unlock; | 4248 | goto out_unlock; |
diff --git a/fs/ocfs2/refcounttree.h b/fs/ocfs2/refcounttree.h index 9983ba1570e2..c8ce46f7d8e3 100644 --- a/fs/ocfs2/refcounttree.h +++ b/fs/ocfs2/refcounttree.h | |||
@@ -21,14 +21,14 @@ struct ocfs2_refcount_tree { | |||
21 | struct rb_node rf_node; | 21 | struct rb_node rf_node; |
22 | u64 rf_blkno; | 22 | u64 rf_blkno; |
23 | u32 rf_generation; | 23 | u32 rf_generation; |
24 | struct kref rf_getcnt; | ||
24 | struct rw_semaphore rf_sem; | 25 | struct rw_semaphore rf_sem; |
25 | struct ocfs2_lock_res rf_lockres; | 26 | struct ocfs2_lock_res rf_lockres; |
26 | struct kref rf_getcnt; | ||
27 | int rf_removed; | 27 | int rf_removed; |
28 | 28 | ||
29 | /* the following 4 fields are used by caching_info. */ | 29 | /* the following 4 fields are used by caching_info. */ |
30 | struct ocfs2_caching_info rf_ci; | ||
31 | spinlock_t rf_lock; | 30 | spinlock_t rf_lock; |
31 | struct ocfs2_caching_info rf_ci; | ||
32 | struct mutex rf_io_mutex; | 32 | struct mutex rf_io_mutex; |
33 | struct super_block *rf_sb; | 33 | struct super_block *rf_sb; |
34 | }; | 34 | }; |
@@ -52,7 +52,8 @@ int ocfs2_prepare_refcount_change_for_del(struct inode *inode, | |||
52 | u32 clusters, | 52 | u32 clusters, |
53 | int *credits, | 53 | int *credits, |
54 | int *ref_blocks); | 54 | int *ref_blocks); |
55 | int ocfs2_refcount_cow(struct inode *inode, struct buffer_head *di_bh, | 55 | int ocfs2_refcount_cow(struct inode *inode, |
56 | struct file *filep, struct buffer_head *di_bh, | ||
56 | u32 cpos, u32 write_len, u32 max_cpos); | 57 | u32 cpos, u32 write_len, u32 max_cpos); |
57 | 58 | ||
58 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, | 59 | typedef int (ocfs2_post_refcount_func)(struct inode *inode, |
diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c | |||
@@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, | |||
732 | struct ocfs2_alloc_reservation *resv, | 732 | struct ocfs2_alloc_reservation *resv, |
733 | int *cstart, int *clen) | 733 | int *cstart, int *clen) |
734 | { | 734 | { |
735 | unsigned int wanted = *clen; | ||
736 | |||
737 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) | 735 | if (resv == NULL || ocfs2_resmap_disabled(resmap)) |
738 | return -ENOSPC; | 736 | return -ENOSPC; |
739 | 737 | ||
740 | spin_lock(&resv_lock); | 738 | spin_lock(&resv_lock); |
741 | 739 | ||
742 | /* | ||
743 | * We don't want to over-allocate for temporary | ||
744 | * windows. Otherwise, we run the risk of fragmenting the | ||
745 | * allocation space. | ||
746 | */ | ||
747 | wanted = ocfs2_resv_window_bits(resmap, resv); | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | if (ocfs2_resv_empty(resv)) { | 740 | if (ocfs2_resv_empty(resv)) { |
752 | mlog(0, "empty reservation, find new window\n"); | 741 | /* |
742 | * We don't want to over-allocate for temporary | ||
743 | * windows. Otherwise, we run the risk of fragmenting the | ||
744 | * allocation space. | ||
745 | */ | ||
746 | unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); | ||
753 | 747 | ||
748 | if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) | ||
749 | wanted = *clen; | ||
750 | |||
751 | mlog(0, "empty reservation, find new window\n"); | ||
754 | /* | 752 | /* |
755 | * Try to get a window here. If it works, we must fall | 753 | * Try to get a window here. If it works, we must fall |
756 | * through and test the bitmap . This avoids some | 754 | * through and test the bitmap . This avoids some |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bfbd7e9e949f..ab4e0172cc1d 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
@@ -357,7 +357,7 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
357 | { | 357 | { |
358 | int status = 0; | 358 | int status = 0; |
359 | u64 blkno; | 359 | u64 blkno; |
360 | unsigned long long blocks, bytes; | 360 | unsigned long long blocks, bytes = 0; |
361 | unsigned int i; | 361 | unsigned int i; |
362 | struct buffer_head *bh; | 362 | struct buffer_head *bh; |
363 | 363 | ||
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 0d3049f696c5..19965b00c43c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c | |||
@@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) | |||
283 | /* for now we only have one cluster/node, make sure we see it | 283 | /* for now we only have one cluster/node, make sure we see it |
284 | * in the heartbeat universe */ | 284 | * in the heartbeat universe */ |
285 | if (!o2hb_check_local_node_heartbeating()) { | 285 | if (!o2hb_check_local_node_heartbeating()) { |
286 | if (o2hb_global_heartbeat_active()) | ||
287 | mlog(ML_ERROR, "Global heartbeat not started\n"); | ||
286 | rc = -EINVAL; | 288 | rc = -EINVAL; |
287 | goto out; | 289 | goto out; |
288 | } | 290 | } |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 2dc57bca0688..252e7c82f929 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
@@ -22,7 +22,6 @@ | |||
22 | #include <linux/miscdevice.h> | 22 | #include <linux/miscdevice.h> |
23 | #include <linux/mutex.h> | 23 | #include <linux/mutex.h> |
24 | #include <linux/slab.h> | 24 | #include <linux/slab.h> |
25 | #include <linux/smp_lock.h> | ||
26 | #include <linux/reboot.h> | 25 | #include <linux/reboot.h> |
27 | #include <asm/uaccess.h> | 26 | #include <asm/uaccess.h> |
28 | 27 | ||
@@ -612,12 +611,10 @@ static int ocfs2_control_open(struct inode *inode, struct file *file) | |||
612 | return -ENOMEM; | 611 | return -ENOMEM; |
613 | p->op_this_node = -1; | 612 | p->op_this_node = -1; |
614 | 613 | ||
615 | lock_kernel(); | ||
616 | mutex_lock(&ocfs2_control_lock); | 614 | mutex_lock(&ocfs2_control_lock); |
617 | file->private_data = p; | 615 | file->private_data = p; |
618 | list_add(&p->op_list, &ocfs2_control_private_list); | 616 | list_add(&p->op_list, &ocfs2_control_private_list); |
619 | mutex_unlock(&ocfs2_control_lock); | 617 | mutex_unlock(&ocfs2_control_lock); |
620 | unlock_kernel(); | ||
621 | 618 | ||
622 | return 0; | 619 | return 0; |
623 | } | 620 | } |
@@ -628,6 +625,7 @@ static const struct file_operations ocfs2_control_fops = { | |||
628 | .read = ocfs2_control_read, | 625 | .read = ocfs2_control_read, |
629 | .write = ocfs2_control_write, | 626 | .write = ocfs2_control_write, |
630 | .owner = THIS_MODULE, | 627 | .owner = THIS_MODULE, |
628 | .llseek = default_llseek, | ||
631 | }; | 629 | }; |
632 | 630 | ||
633 | static struct miscdevice ocfs2_control_device = { | 631 | static struct miscdevice ocfs2_control_device = { |
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a286f54dca1..5fed60de7630 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -357,7 +357,7 @@ out: | |||
357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | 357 | static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, |
358 | struct ocfs2_group_desc *bg, | 358 | struct ocfs2_group_desc *bg, |
359 | struct ocfs2_chain_list *cl, | 359 | struct ocfs2_chain_list *cl, |
360 | u64 p_blkno, u32 clusters) | 360 | u64 p_blkno, unsigned int clusters) |
361 | { | 361 | { |
362 | struct ocfs2_extent_list *el = &bg->bg_list; | 362 | struct ocfs2_extent_list *el = &bg->bg_list; |
363 | struct ocfs2_extent_rec *rec; | 363 | struct ocfs2_extent_rec *rec; |
@@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, | |||
369 | rec->e_blkno = cpu_to_le64(p_blkno); | 369 | rec->e_blkno = cpu_to_le64(p_blkno); |
370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / | 370 | rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / |
371 | le16_to_cpu(cl->cl_bpc)); | 371 | le16_to_cpu(cl->cl_bpc)); |
372 | rec->e_leaf_clusters = cpu_to_le32(clusters); | 372 | rec->e_leaf_clusters = cpu_to_le16(clusters); |
373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); | 373 | le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); |
374 | le16_add_cpu(&bg->bg_free_bits_count, | 374 | le16_add_cpu(&bg->bg_free_bits_count, |
375 | clusters * le16_to_cpu(cl->cl_bpc)); | 375 | clusters * le16_to_cpu(cl->cl_bpc)); |
@@ -1380,6 +1380,14 @@ static inline int ocfs2_block_group_set_bits(handle_t *handle, | |||
1380 | } | 1380 | } |
1381 | 1381 | ||
1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); | 1382 | le16_add_cpu(&bg->bg_free_bits_count, -num_bits); |
1383 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
1384 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
1385 | " count %u but claims %u are freed. num_bits %d", | ||
1386 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
1387 | le16_to_cpu(bg->bg_bits), | ||
1388 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
1389 | return -EROFS; | ||
1390 | } | ||
1383 | while(num_bits--) | 1391 | while(num_bits--) |
1384 | ocfs2_set_bit(bit_off++, bitmap); | 1392 | ocfs2_set_bit(bit_off++, bitmap); |
1385 | 1393 | ||
@@ -2419,6 +2427,14 @@ static int ocfs2_block_group_clear_bits(handle_t *handle, | |||
2419 | (unsigned long *) undo_bg->bg_bitmap); | 2427 | (unsigned long *) undo_bg->bg_bitmap); |
2420 | } | 2428 | } |
2421 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); | 2429 | le16_add_cpu(&bg->bg_free_bits_count, num_bits); |
2430 | if (le16_to_cpu(bg->bg_free_bits_count) > le16_to_cpu(bg->bg_bits)) { | ||
2431 | ocfs2_error(alloc_inode->i_sb, "Group descriptor # %llu has bit" | ||
2432 | " count %u but claims %u are freed. num_bits %d", | ||
2433 | (unsigned long long)le64_to_cpu(bg->bg_blkno), | ||
2434 | le16_to_cpu(bg->bg_bits), | ||
2435 | le16_to_cpu(bg->bg_free_bits_count), num_bits); | ||
2436 | return -EROFS; | ||
2437 | } | ||
2422 | 2438 | ||
2423 | if (undo_fn) | 2439 | if (undo_fn) |
2424 | jbd_unlock_bh_state(group_bh); | 2440 | jbd_unlock_bh_state(group_bh); |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..56f0cb395820 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -162,6 +162,7 @@ enum { | |||
162 | Opt_nointr, | 162 | Opt_nointr, |
163 | Opt_hb_none, | 163 | Opt_hb_none, |
164 | Opt_hb_local, | 164 | Opt_hb_local, |
165 | Opt_hb_global, | ||
165 | Opt_data_ordered, | 166 | Opt_data_ordered, |
166 | Opt_data_writeback, | 167 | Opt_data_writeback, |
167 | Opt_atime_quantum, | 168 | Opt_atime_quantum, |
@@ -177,6 +178,8 @@ enum { | |||
177 | Opt_noacl, | 178 | Opt_noacl, |
178 | Opt_usrquota, | 179 | Opt_usrquota, |
179 | Opt_grpquota, | 180 | Opt_grpquota, |
181 | Opt_coherency_buffered, | ||
182 | Opt_coherency_full, | ||
180 | Opt_resv_level, | 183 | Opt_resv_level, |
181 | Opt_dir_resv_level, | 184 | Opt_dir_resv_level, |
182 | Opt_err, | 185 | Opt_err, |
@@ -190,6 +193,7 @@ static const match_table_t tokens = { | |||
190 | {Opt_nointr, "nointr"}, | 193 | {Opt_nointr, "nointr"}, |
191 | {Opt_hb_none, OCFS2_HB_NONE}, | 194 | {Opt_hb_none, OCFS2_HB_NONE}, |
192 | {Opt_hb_local, OCFS2_HB_LOCAL}, | 195 | {Opt_hb_local, OCFS2_HB_LOCAL}, |
196 | {Opt_hb_global, OCFS2_HB_GLOBAL}, | ||
193 | {Opt_data_ordered, "data=ordered"}, | 197 | {Opt_data_ordered, "data=ordered"}, |
194 | {Opt_data_writeback, "data=writeback"}, | 198 | {Opt_data_writeback, "data=writeback"}, |
195 | {Opt_atime_quantum, "atime_quantum=%u"}, | 199 | {Opt_atime_quantum, "atime_quantum=%u"}, |
@@ -205,6 +209,8 @@ static const match_table_t tokens = { | |||
205 | {Opt_noacl, "noacl"}, | 209 | {Opt_noacl, "noacl"}, |
206 | {Opt_usrquota, "usrquota"}, | 210 | {Opt_usrquota, "usrquota"}, |
207 | {Opt_grpquota, "grpquota"}, | 211 | {Opt_grpquota, "grpquota"}, |
212 | {Opt_coherency_buffered, "coherency=buffered"}, | ||
213 | {Opt_coherency_full, "coherency=full"}, | ||
208 | {Opt_resv_level, "resv_level=%u"}, | 214 | {Opt_resv_level, "resv_level=%u"}, |
209 | {Opt_dir_resv_level, "dir_resv_level=%u"}, | 215 | {Opt_dir_resv_level, "dir_resv_level=%u"}, |
210 | {Opt_err, NULL} | 216 | {Opt_err, NULL} |
@@ -514,11 +520,11 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
514 | 520 | ||
515 | mlog_entry_void(); | 521 | mlog_entry_void(); |
516 | 522 | ||
517 | for (i = 0; i < NUM_SYSTEM_INODES; i++) { | 523 | for (i = 0; i < NUM_GLOBAL_SYSTEM_INODES; i++) { |
518 | inode = osb->system_inodes[i]; | 524 | inode = osb->global_system_inodes[i]; |
519 | if (inode) { | 525 | if (inode) { |
520 | iput(inode); | 526 | iput(inode); |
521 | osb->system_inodes[i] = NULL; | 527 | osb->global_system_inodes[i] = NULL; |
522 | } | 528 | } |
523 | } | 529 | } |
524 | 530 | ||
@@ -534,6 +540,20 @@ static void ocfs2_release_system_inodes(struct ocfs2_super *osb) | |||
534 | osb->root_inode = NULL; | 540 | osb->root_inode = NULL; |
535 | } | 541 | } |
536 | 542 | ||
543 | if (!osb->local_system_inodes) | ||
544 | goto out; | ||
545 | |||
546 | for (i = 0; i < NUM_LOCAL_SYSTEM_INODES * osb->max_slots; i++) { | ||
547 | if (osb->local_system_inodes[i]) { | ||
548 | iput(osb->local_system_inodes[i]); | ||
549 | osb->local_system_inodes[i] = NULL; | ||
550 | } | ||
551 | } | ||
552 | |||
553 | kfree(osb->local_system_inodes); | ||
554 | osb->local_system_inodes = NULL; | ||
555 | |||
556 | out: | ||
537 | mlog_exit(0); | 557 | mlog_exit(0); |
538 | } | 558 | } |
539 | 559 | ||
@@ -608,8 +628,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
608 | int ret = 0; | 628 | int ret = 0; |
609 | struct mount_options parsed_options; | 629 | struct mount_options parsed_options; |
610 | struct ocfs2_super *osb = OCFS2_SB(sb); | 630 | struct ocfs2_super *osb = OCFS2_SB(sb); |
611 | 631 | u32 tmp; | |
612 | lock_kernel(); | ||
613 | 632 | ||
614 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || | 633 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || |
615 | !ocfs2_check_set_options(sb, &parsed_options)) { | 634 | !ocfs2_check_set_options(sb, &parsed_options)) { |
@@ -617,8 +636,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
617 | goto out; | 636 | goto out; |
618 | } | 637 | } |
619 | 638 | ||
620 | if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != | 639 | tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | |
621 | (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 640 | OCFS2_MOUNT_HB_NONE; |
641 | if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { | ||
622 | ret = -EINVAL; | 642 | ret = -EINVAL; |
623 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); | 643 | mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); |
624 | goto out; | 644 | goto out; |
@@ -717,7 +737,6 @@ unlock_osb: | |||
717 | MS_POSIXACL : 0); | 737 | MS_POSIXACL : 0); |
718 | } | 738 | } |
719 | out: | 739 | out: |
720 | unlock_kernel(); | ||
721 | return ret; | 740 | return ret; |
722 | } | 741 | } |
723 | 742 | ||
@@ -809,23 +828,29 @@ bail: | |||
809 | 828 | ||
810 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) | 829 | static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) |
811 | { | 830 | { |
812 | if (ocfs2_mount_local(osb)) { | 831 | u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; |
813 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | 832 | |
833 | if (osb->s_mount_opt & hb_enabled) { | ||
834 | if (ocfs2_mount_local(osb)) { | ||
814 | mlog(ML_ERROR, "Cannot heartbeat on a locally " | 835 | mlog(ML_ERROR, "Cannot heartbeat on a locally " |
815 | "mounted device.\n"); | 836 | "mounted device.\n"); |
816 | return -EINVAL; | 837 | return -EINVAL; |
817 | } | 838 | } |
818 | } | 839 | if (ocfs2_userspace_stack(osb)) { |
819 | |||
820 | if (ocfs2_userspace_stack(osb)) { | ||
821 | if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { | ||
822 | mlog(ML_ERROR, "Userspace stack expected, but " | 840 | mlog(ML_ERROR, "Userspace stack expected, but " |
823 | "o2cb heartbeat arguments passed to mount\n"); | 841 | "o2cb heartbeat arguments passed to mount\n"); |
824 | return -EINVAL; | 842 | return -EINVAL; |
825 | } | 843 | } |
844 | if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && | ||
845 | !ocfs2_cluster_o2cb_global_heartbeat(osb)) || | ||
846 | ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && | ||
847 | ocfs2_cluster_o2cb_global_heartbeat(osb))) { | ||
848 | mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); | ||
849 | return -EINVAL; | ||
850 | } | ||
826 | } | 851 | } |
827 | 852 | ||
828 | if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { | 853 | if (!(osb->s_mount_opt & hb_enabled)) { |
829 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && | 854 | if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && |
830 | !ocfs2_userspace_stack(osb)) { | 855 | !ocfs2_userspace_stack(osb)) { |
831 | mlog(ML_ERROR, "Heartbeat has to be started to mount " | 856 | mlog(ML_ERROR, "Heartbeat has to be started to mount " |
@@ -1291,6 +1316,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1291 | { | 1316 | { |
1292 | int status; | 1317 | int status; |
1293 | char *p; | 1318 | char *p; |
1319 | u32 tmp; | ||
1294 | 1320 | ||
1295 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, | 1321 | mlog_entry("remount: %d, options: \"%s\"\n", is_remount, |
1296 | options ? options : "(none)"); | 1322 | options ? options : "(none)"); |
@@ -1322,7 +1348,10 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1322 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; | 1348 | mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; |
1323 | break; | 1349 | break; |
1324 | case Opt_hb_none: | 1350 | case Opt_hb_none: |
1325 | mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; | 1351 | mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; |
1352 | break; | ||
1353 | case Opt_hb_global: | ||
1354 | mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; | ||
1326 | break; | 1355 | break; |
1327 | case Opt_barrier: | 1356 | case Opt_barrier: |
1328 | if (match_int(&args[0], &option)) { | 1357 | if (match_int(&args[0], &option)) { |
@@ -1438,6 +1467,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1438 | case Opt_grpquota: | 1467 | case Opt_grpquota: |
1439 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; | 1468 | mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA; |
1440 | break; | 1469 | break; |
1470 | case Opt_coherency_buffered: | ||
1471 | mopt->mount_opt |= OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1472 | break; | ||
1473 | case Opt_coherency_full: | ||
1474 | mopt->mount_opt &= ~OCFS2_MOUNT_COHERENCY_BUFFERED; | ||
1475 | break; | ||
1441 | case Opt_acl: | 1476 | case Opt_acl: |
1442 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; | 1477 | mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL; |
1443 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; | 1478 | mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL; |
@@ -1477,6 +1512,15 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
1477 | } | 1512 | } |
1478 | } | 1513 | } |
1479 | 1514 | ||
1515 | /* Ensure only one heartbeat mode */ | ||
1516 | tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | | ||
1517 | OCFS2_MOUNT_HB_NONE); | ||
1518 | if (hweight32(tmp) != 1) { | ||
1519 | mlog(ML_ERROR, "Invalid heartbeat mount options\n"); | ||
1520 | status = 0; | ||
1521 | goto bail; | ||
1522 | } | ||
1523 | |||
1480 | status = 1; | 1524 | status = 1; |
1481 | 1525 | ||
1482 | bail: | 1526 | bail: |
@@ -1490,10 +1534,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1490 | unsigned long opts = osb->s_mount_opt; | 1534 | unsigned long opts = osb->s_mount_opt; |
1491 | unsigned int local_alloc_megs; | 1535 | unsigned int local_alloc_megs; |
1492 | 1536 | ||
1493 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 1537 | if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { |
1494 | seq_printf(s, ",_netdev,heartbeat=local"); | 1538 | seq_printf(s, ",_netdev"); |
1495 | else | 1539 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
1496 | seq_printf(s, ",heartbeat=none"); | 1540 | seq_printf(s, ",%s", OCFS2_HB_LOCAL); |
1541 | else | ||
1542 | seq_printf(s, ",%s", OCFS2_HB_GLOBAL); | ||
1543 | } else | ||
1544 | seq_printf(s, ",%s", OCFS2_HB_NONE); | ||
1497 | 1545 | ||
1498 | if (opts & OCFS2_MOUNT_NOINTR) | 1546 | if (opts & OCFS2_MOUNT_NOINTR) |
1499 | seq_printf(s, ",nointr"); | 1547 | seq_printf(s, ",nointr"); |
@@ -1536,6 +1584,11 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
1536 | if (opts & OCFS2_MOUNT_GRPQUOTA) | 1584 | if (opts & OCFS2_MOUNT_GRPQUOTA) |
1537 | seq_printf(s, ",grpquota"); | 1585 | seq_printf(s, ",grpquota"); |
1538 | 1586 | ||
1587 | if (opts & OCFS2_MOUNT_COHERENCY_BUFFERED) | ||
1588 | seq_printf(s, ",coherency=buffered"); | ||
1589 | else | ||
1590 | seq_printf(s, ",coherency=full"); | ||
1591 | |||
1539 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | 1592 | if (opts & OCFS2_MOUNT_NOUSERXATTR) |
1540 | seq_printf(s, ",nouser_xattr"); | 1593 | seq_printf(s, ",nouser_xattr"); |
1541 | else | 1594 | else |
@@ -1640,13 +1693,9 @@ static void ocfs2_put_super(struct super_block *sb) | |||
1640 | { | 1693 | { |
1641 | mlog_entry("(0x%p)\n", sb); | 1694 | mlog_entry("(0x%p)\n", sb); |
1642 | 1695 | ||
1643 | lock_kernel(); | ||
1644 | |||
1645 | ocfs2_sync_blockdev(sb); | 1696 | ocfs2_sync_blockdev(sb); |
1646 | ocfs2_dismount_volume(sb, 0); | 1697 | ocfs2_dismount_volume(sb, 0); |
1647 | 1698 | ||
1648 | unlock_kernel(); | ||
1649 | |||
1650 | mlog_exit_void(); | 1699 | mlog_exit_void(); |
1651 | } | 1700 | } |
1652 | 1701 | ||
@@ -1990,6 +2039,36 @@ static int ocfs2_setup_osb_uuid(struct ocfs2_super *osb, const unsigned char *uu | |||
1990 | return 0; | 2039 | return 0; |
1991 | } | 2040 | } |
1992 | 2041 | ||
2042 | /* Make sure entire volume is addressable by our journal. Requires | ||
2043 | osb_clusters_at_boot to be valid and for the journal to have been | ||
2044 | initialized by ocfs2_journal_init(). */ | ||
2045 | static int ocfs2_journal_addressable(struct ocfs2_super *osb) | ||
2046 | { | ||
2047 | int status = 0; | ||
2048 | u64 max_block = | ||
2049 | ocfs2_clusters_to_blocks(osb->sb, | ||
2050 | osb->osb_clusters_at_boot) - 1; | ||
2051 | |||
2052 | /* 32-bit block number is always OK. */ | ||
2053 | if (max_block <= (u32)~0ULL) | ||
2054 | goto out; | ||
2055 | |||
2056 | /* Volume is "huge", so see if our journal is new enough to | ||
2057 | support it. */ | ||
2058 | if (!(OCFS2_HAS_COMPAT_FEATURE(osb->sb, | ||
2059 | OCFS2_FEATURE_COMPAT_JBD2_SB) && | ||
2060 | jbd2_journal_check_used_features(osb->journal->j_journal, 0, 0, | ||
2061 | JBD2_FEATURE_INCOMPAT_64BIT))) { | ||
2062 | mlog(ML_ERROR, "The journal cannot address the entire volume. " | ||
2063 | "Enable the 'block64' journal option with tunefs.ocfs2"); | ||
2064 | status = -EFBIG; | ||
2065 | goto out; | ||
2066 | } | ||
2067 | |||
2068 | out: | ||
2069 | return status; | ||
2070 | } | ||
2071 | |||
1993 | static int ocfs2_initialize_super(struct super_block *sb, | 2072 | static int ocfs2_initialize_super(struct super_block *sb, |
1994 | struct buffer_head *bh, | 2073 | struct buffer_head *bh, |
1995 | int sector_size, | 2074 | int sector_size, |
@@ -2002,6 +2081,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2002 | struct ocfs2_journal *journal; | 2081 | struct ocfs2_journal *journal; |
2003 | __le32 uuid_net_key; | 2082 | __le32 uuid_net_key; |
2004 | struct ocfs2_super *osb; | 2083 | struct ocfs2_super *osb; |
2084 | u64 total_blocks; | ||
2005 | 2085 | ||
2006 | mlog_entry_void(); | 2086 | mlog_entry_void(); |
2007 | 2087 | ||
@@ -2060,6 +2140,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2060 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", | 2140 | snprintf(osb->dev_str, sizeof(osb->dev_str), "%u,%u", |
2061 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); | 2141 | MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); |
2062 | 2142 | ||
2143 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2144 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2145 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2146 | osb->max_slots); | ||
2147 | status = -EINVAL; | ||
2148 | goto bail; | ||
2149 | } | ||
2150 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2151 | |||
2063 | ocfs2_orphan_scan_init(osb); | 2152 | ocfs2_orphan_scan_init(osb); |
2064 | 2153 | ||
2065 | status = ocfs2_recovery_init(osb); | 2154 | status = ocfs2_recovery_init(osb); |
@@ -2098,15 +2187,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2098 | goto bail; | 2187 | goto bail; |
2099 | } | 2188 | } |
2100 | 2189 | ||
2101 | osb->max_slots = le16_to_cpu(di->id2.i_super.s_max_slots); | ||
2102 | if (osb->max_slots > OCFS2_MAX_SLOTS || osb->max_slots == 0) { | ||
2103 | mlog(ML_ERROR, "Invalid number of node slots (%u)\n", | ||
2104 | osb->max_slots); | ||
2105 | status = -EINVAL; | ||
2106 | goto bail; | ||
2107 | } | ||
2108 | mlog(0, "max_slots for this device: %u\n", osb->max_slots); | ||
2109 | |||
2110 | osb->slot_recovery_generations = | 2190 | osb->slot_recovery_generations = |
2111 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), | 2191 | kcalloc(osb->max_slots, sizeof(*osb->slot_recovery_generations), |
2112 | GFP_KERNEL); | 2192 | GFP_KERNEL); |
@@ -2149,7 +2229,9 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2149 | goto bail; | 2229 | goto bail; |
2150 | } | 2230 | } |
2151 | 2231 | ||
2152 | if (ocfs2_userspace_stack(osb)) { | 2232 | if (ocfs2_clusterinfo_valid(osb)) { |
2233 | osb->osb_stackflags = | ||
2234 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; | ||
2153 | memcpy(osb->osb_cluster_stack, | 2235 | memcpy(osb->osb_cluster_stack, |
2154 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, | 2236 | OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, |
2155 | OCFS2_STACK_LABEL_LEN); | 2237 | OCFS2_STACK_LABEL_LEN); |
@@ -2214,11 +2296,15 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2214 | goto bail; | 2296 | goto bail; |
2215 | } | 2297 | } |
2216 | 2298 | ||
2217 | if (ocfs2_clusters_to_blocks(osb->sb, le32_to_cpu(di->i_clusters) - 1) | 2299 | total_blocks = ocfs2_clusters_to_blocks(osb->sb, |
2218 | > (u32)~0UL) { | 2300 | le32_to_cpu(di->i_clusters)); |
2219 | mlog(ML_ERROR, "Volume might try to write to blocks beyond " | 2301 | |
2220 | "what jbd can address in 32 bits.\n"); | 2302 | status = generic_check_addressable(osb->sb->s_blocksize_bits, |
2221 | status = -EINVAL; | 2303 | total_blocks); |
2304 | if (status) { | ||
2305 | mlog(ML_ERROR, "Volume too large " | ||
2306 | "to mount safely on this system"); | ||
2307 | status = -EFBIG; | ||
2222 | goto bail; | 2308 | goto bail; |
2223 | } | 2309 | } |
2224 | 2310 | ||
@@ -2380,6 +2466,12 @@ static int ocfs2_check_volume(struct ocfs2_super *osb) | |||
2380 | goto finally; | 2466 | goto finally; |
2381 | } | 2467 | } |
2382 | 2468 | ||
2469 | /* Now that journal has been initialized, check to make sure | ||
2470 | entire volume is addressable. */ | ||
2471 | status = ocfs2_journal_addressable(osb); | ||
2472 | if (status) | ||
2473 | goto finally; | ||
2474 | |||
2383 | /* If the journal was unmounted cleanly then we don't want to | 2475 | /* If the journal was unmounted cleanly then we don't want to |
2384 | * recover anything. Otherwise, journal_load will do that | 2476 | * recover anything. Otherwise, journal_load will do that |
2385 | * dirty work for us :) */ | 2477 | * dirty work for us :) */ |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d213fc4..9975457c981f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
@@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, | |||
128 | } | 128 | } |
129 | 129 | ||
130 | /* Fast symlinks can't be large */ | 130 | /* Fast symlinks can't be large */ |
131 | len = strlen(target); | 131 | len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); |
132 | link = kzalloc(len + 1, GFP_NOFS); | 132 | link = kzalloc(len + 1, GFP_NOFS); |
133 | if (!link) { | 133 | if (!link) { |
134 | status = -ENOMEM; | 134 | status = -ENOMEM; |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index bfe7190cdbf1..902efb23b6a6 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -44,11 +44,6 @@ static struct inode * _ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
44 | int type, | 44 | int type, |
45 | u32 slot); | 45 | u32 slot); |
46 | 46 | ||
47 | static inline int is_global_system_inode(int type); | ||
48 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | ||
49 | int type, | ||
50 | u32 slot); | ||
51 | |||
52 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
53 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; | 48 | static struct lock_class_key ocfs2_sysfile_cluster_lock_key[NUM_SYSTEM_INODES]; |
54 | #endif | 49 | #endif |
@@ -59,11 +54,52 @@ static inline int is_global_system_inode(int type) | |||
59 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; | 54 | type <= OCFS2_LAST_GLOBAL_SYSTEM_INODE; |
60 | } | 55 | } |
61 | 56 | ||
62 | static inline int is_in_system_inode_array(struct ocfs2_super *osb, | 57 | static struct inode **get_local_system_inode(struct ocfs2_super *osb, |
63 | int type, | 58 | int type, |
64 | u32 slot) | 59 | u32 slot) |
65 | { | 60 | { |
66 | return slot == osb->slot_num || is_global_system_inode(type); | 61 | int index; |
62 | struct inode **local_system_inodes, **free = NULL; | ||
63 | |||
64 | BUG_ON(slot == OCFS2_INVALID_SLOT); | ||
65 | BUG_ON(type < OCFS2_FIRST_LOCAL_SYSTEM_INODE || | ||
66 | type > OCFS2_LAST_LOCAL_SYSTEM_INODE); | ||
67 | |||
68 | spin_lock(&osb->osb_lock); | ||
69 | local_system_inodes = osb->local_system_inodes; | ||
70 | spin_unlock(&osb->osb_lock); | ||
71 | |||
72 | if (unlikely(!local_system_inodes)) { | ||
73 | local_system_inodes = kzalloc(sizeof(struct inode *) * | ||
74 | NUM_LOCAL_SYSTEM_INODES * | ||
75 | osb->max_slots, | ||
76 | GFP_NOFS); | ||
77 | if (!local_system_inodes) { | ||
78 | mlog_errno(-ENOMEM); | ||
79 | /* | ||
80 | * return NULL here so that ocfs2_get_sytem_file_inodes | ||
81 | * will try to create an inode and use it. We will try | ||
82 | * to initialize local_system_inodes next time. | ||
83 | */ | ||
84 | return NULL; | ||
85 | } | ||
86 | |||
87 | spin_lock(&osb->osb_lock); | ||
88 | if (osb->local_system_inodes) { | ||
89 | /* Someone has initialized it for us. */ | ||
90 | free = local_system_inodes; | ||
91 | local_system_inodes = osb->local_system_inodes; | ||
92 | } else | ||
93 | osb->local_system_inodes = local_system_inodes; | ||
94 | spin_unlock(&osb->osb_lock); | ||
95 | if (unlikely(free)) | ||
96 | kfree(free); | ||
97 | } | ||
98 | |||
99 | index = (slot * NUM_LOCAL_SYSTEM_INODES) + | ||
100 | (type - OCFS2_FIRST_LOCAL_SYSTEM_INODE); | ||
101 | |||
102 | return &local_system_inodes[index]; | ||
67 | } | 103 | } |
68 | 104 | ||
69 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | 105 | struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, |
@@ -74,8 +110,10 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
74 | struct inode **arr = NULL; | 110 | struct inode **arr = NULL; |
75 | 111 | ||
76 | /* avoid the lookup if cached in local system file array */ | 112 | /* avoid the lookup if cached in local system file array */ |
77 | if (is_in_system_inode_array(osb, type, slot)) | 113 | if (is_global_system_inode(type)) { |
78 | arr = &(osb->system_inodes[type]); | 114 | arr = &(osb->global_system_inodes[type]); |
115 | } else | ||
116 | arr = get_local_system_inode(osb, type, slot); | ||
79 | 117 | ||
80 | if (arr && ((inode = *arr) != NULL)) { | 118 | if (arr && ((inode = *arr) != NULL)) { |
81 | /* get a ref in addition to the array ref */ | 119 | /* get a ref in addition to the array ref */ |
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..67cd43914641 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, | |||
1286 | xis.inode_bh = xbs.inode_bh = di_bh; | 1286 | xis.inode_bh = xbs.inode_bh = di_bh; |
1287 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1287 | di = (struct ocfs2_dinode *)di_bh->b_data; |
1288 | 1288 | ||
1289 | down_read(&oi->ip_xattr_sem); | ||
1290 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | 1289 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, |
1291 | buffer_size, &xis); | 1290 | buffer_size, &xis); |
1292 | if (ret == -ENODATA && di->i_xattr_loc) | 1291 | if (ret == -ENODATA && di->i_xattr_loc) |
1293 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | 1292 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, |
1294 | buffer_size, &xbs); | 1293 | buffer_size, &xbs); |
1295 | up_read(&oi->ip_xattr_sem); | ||
1296 | 1294 | ||
1297 | return ret; | 1295 | return ret; |
1298 | } | 1296 | } |
@@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, | |||
1316 | mlog_errno(ret); | 1314 | mlog_errno(ret); |
1317 | return ret; | 1315 | return ret; |
1318 | } | 1316 | } |
1317 | down_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1319 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, | 1318 | ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, |
1320 | name, buffer, buffer_size); | 1319 | name, buffer, buffer_size); |
1320 | up_read(&OCFS2_I(inode)->ip_xattr_sem); | ||
1321 | 1321 | ||
1322 | ocfs2_inode_unlock(inode, 0); | 1322 | ocfs2_inode_unlock(inode, 0); |
1323 | 1323 | ||
@@ -7081,7 +7081,7 @@ static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args, | |||
7081 | goto out; | 7081 | goto out; |
7082 | } | 7082 | } |
7083 | 7083 | ||
7084 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) | 7084 | if (!indexed) |
7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); | 7085 | ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh); |
7086 | else | 7086 | else |
7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); | 7087 | ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh); |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 79fbf3f390f0..0a8b0ad0c7e2 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
@@ -352,6 +352,7 @@ static void part_release(struct device *dev) | |||
352 | { | 352 | { |
353 | struct hd_struct *p = dev_to_part(dev); | 353 | struct hd_struct *p = dev_to_part(dev); |
354 | free_part_stats(p); | 354 | free_part_stats(p); |
355 | free_part_info(p); | ||
355 | kfree(p); | 356 | kfree(p); |
356 | } | 357 | } |
357 | 358 | ||
@@ -401,7 +402,8 @@ static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | |||
401 | whole_disk_show, NULL); | 402 | whole_disk_show, NULL); |
402 | 403 | ||
403 | struct hd_struct *add_partition(struct gendisk *disk, int partno, | 404 | struct hd_struct *add_partition(struct gendisk *disk, int partno, |
404 | sector_t start, sector_t len, int flags) | 405 | sector_t start, sector_t len, int flags, |
406 | struct partition_meta_info *info) | ||
405 | { | 407 | { |
406 | struct hd_struct *p; | 408 | struct hd_struct *p; |
407 | dev_t devt = MKDEV(0, 0); | 409 | dev_t devt = MKDEV(0, 0); |
@@ -438,6 +440,14 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
438 | p->partno = partno; | 440 | p->partno = partno; |
439 | p->policy = get_disk_ro(disk); | 441 | p->policy = get_disk_ro(disk); |
440 | 442 | ||
443 | if (info) { | ||
444 | struct partition_meta_info *pinfo = alloc_part_info(disk); | ||
445 | if (!pinfo) | ||
446 | goto out_free_stats; | ||
447 | memcpy(pinfo, info, sizeof(*info)); | ||
448 | p->info = pinfo; | ||
449 | } | ||
450 | |||
441 | dname = dev_name(ddev); | 451 | dname = dev_name(ddev); |
442 | if (isdigit(dname[strlen(dname) - 1])) | 452 | if (isdigit(dname[strlen(dname) - 1])) |
443 | dev_set_name(pdev, "%sp%d", dname, partno); | 453 | dev_set_name(pdev, "%sp%d", dname, partno); |
@@ -451,7 +461,7 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
451 | 461 | ||
452 | err = blk_alloc_devt(p, &devt); | 462 | err = blk_alloc_devt(p, &devt); |
453 | if (err) | 463 | if (err) |
454 | goto out_free_stats; | 464 | goto out_free_info; |
455 | pdev->devt = devt; | 465 | pdev->devt = devt; |
456 | 466 | ||
457 | /* delay uevent until 'holders' subdir is created */ | 467 | /* delay uevent until 'holders' subdir is created */ |
@@ -481,6 +491,8 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
481 | 491 | ||
482 | return p; | 492 | return p; |
483 | 493 | ||
494 | out_free_info: | ||
495 | free_part_info(p); | ||
484 | out_free_stats: | 496 | out_free_stats: |
485 | free_part_stats(p); | 497 | free_part_stats(p); |
486 | out_free: | 498 | out_free: |
@@ -513,14 +525,14 @@ void register_disk(struct gendisk *disk) | |||
513 | 525 | ||
514 | if (device_add(ddev)) | 526 | if (device_add(ddev)) |
515 | return; | 527 | return; |
516 | #ifndef CONFIG_SYSFS_DEPRECATED | 528 | if (!sysfs_deprecated) { |
517 | err = sysfs_create_link(block_depr, &ddev->kobj, | 529 | err = sysfs_create_link(block_depr, &ddev->kobj, |
518 | kobject_name(&ddev->kobj)); | 530 | kobject_name(&ddev->kobj)); |
519 | if (err) { | 531 | if (err) { |
520 | device_del(ddev); | 532 | device_del(ddev); |
521 | return; | 533 | return; |
534 | } | ||
522 | } | 535 | } |
523 | #endif | ||
524 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); | 536 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
525 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | 537 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); |
526 | 538 | ||
@@ -642,6 +654,7 @@ rescan: | |||
642 | /* add partitions */ | 654 | /* add partitions */ |
643 | for (p = 1; p < state->limit; p++) { | 655 | for (p = 1; p < state->limit; p++) { |
644 | sector_t size, from; | 656 | sector_t size, from; |
657 | struct partition_meta_info *info = NULL; | ||
645 | 658 | ||
646 | size = state->parts[p].size; | 659 | size = state->parts[p].size; |
647 | if (!size) | 660 | if (!size) |
@@ -675,8 +688,12 @@ rescan: | |||
675 | size = get_capacity(disk) - from; | 688 | size = get_capacity(disk) - from; |
676 | } | 689 | } |
677 | } | 690 | } |
691 | |||
692 | if (state->parts[p].has_info) | ||
693 | info = &state->parts[p].info; | ||
678 | part = add_partition(disk, p, from, size, | 694 | part = add_partition(disk, p, from, size, |
679 | state->parts[p].flags); | 695 | state->parts[p].flags, |
696 | &state->parts[p].info); | ||
680 | if (IS_ERR(part)) { | 697 | if (IS_ERR(part)) { |
681 | printk(KERN_ERR " %s: p%d could not be added: %ld\n", | 698 | printk(KERN_ERR " %s: p%d could not be added: %ld\n", |
682 | disk->disk_name, p, -PTR_ERR(part)); | 699 | disk->disk_name, p, -PTR_ERR(part)); |
@@ -737,8 +754,7 @@ void del_gendisk(struct gendisk *disk) | |||
737 | kobject_put(disk->part0.holder_dir); | 754 | kobject_put(disk->part0.holder_dir); |
738 | kobject_put(disk->slave_dir); | 755 | kobject_put(disk->slave_dir); |
739 | disk->driverfs_dev = NULL; | 756 | disk->driverfs_dev = NULL; |
740 | #ifndef CONFIG_SYSFS_DEPRECATED | 757 | if (!sysfs_deprecated) |
741 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); | 758 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
742 | #endif | ||
743 | device_del(disk_to_dev(disk)); | 759 | device_del(disk_to_dev(disk)); |
744 | } | 760 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 8e4e103ba216..d68bf4dc3bc2 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
@@ -1,5 +1,6 @@ | |||
1 | #include <linux/pagemap.h> | 1 | #include <linux/pagemap.h> |
2 | #include <linux/blkdev.h> | 2 | #include <linux/blkdev.h> |
3 | #include <linux/genhd.h> | ||
3 | 4 | ||
4 | /* | 5 | /* |
5 | * add_gd_partition adds a partitions details to the devices partition | 6 | * add_gd_partition adds a partitions details to the devices partition |
@@ -12,6 +13,8 @@ struct parsed_partitions { | |||
12 | sector_t from; | 13 | sector_t from; |
13 | sector_t size; | 14 | sector_t size; |
14 | int flags; | 15 | int flags; |
16 | bool has_info; | ||
17 | struct partition_meta_info info; | ||
15 | } parts[DISK_MAX_PARTS]; | 18 | } parts[DISK_MAX_PARTS]; |
16 | int next; | 19 | int next; |
17 | int limit; | 20 | int limit; |
diff --git a/fs/partitions/efi.c b/fs/partitions/efi.c index dbb44d4bb8a7..ac0ccb5026a2 100644 --- a/fs/partitions/efi.c +++ b/fs/partitions/efi.c | |||
@@ -94,6 +94,7 @@ | |||
94 | * | 94 | * |
95 | ************************************************************/ | 95 | ************************************************************/ |
96 | #include <linux/crc32.h> | 96 | #include <linux/crc32.h> |
97 | #include <linux/ctype.h> | ||
97 | #include <linux/math64.h> | 98 | #include <linux/math64.h> |
98 | #include <linux/slab.h> | 99 | #include <linux/slab.h> |
99 | #include "check.h" | 100 | #include "check.h" |
@@ -604,6 +605,7 @@ int efi_partition(struct parsed_partitions *state) | |||
604 | gpt_entry *ptes = NULL; | 605 | gpt_entry *ptes = NULL; |
605 | u32 i; | 606 | u32 i; |
606 | unsigned ssz = bdev_logical_block_size(state->bdev) / 512; | 607 | unsigned ssz = bdev_logical_block_size(state->bdev) / 512; |
608 | u8 unparsed_guid[37]; | ||
607 | 609 | ||
608 | if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { | 610 | if (!find_valid_gpt(state, &gpt, &ptes) || !gpt || !ptes) { |
609 | kfree(gpt); | 611 | kfree(gpt); |
@@ -614,6 +616,9 @@ int efi_partition(struct parsed_partitions *state) | |||
614 | pr_debug("GUID Partition Table is valid! Yea!\n"); | 616 | pr_debug("GUID Partition Table is valid! Yea!\n"); |
615 | 617 | ||
616 | for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { | 618 | for (i = 0; i < le32_to_cpu(gpt->num_partition_entries) && i < state->limit-1; i++) { |
619 | struct partition_meta_info *info; | ||
620 | unsigned label_count = 0; | ||
621 | unsigned label_max; | ||
617 | u64 start = le64_to_cpu(ptes[i].starting_lba); | 622 | u64 start = le64_to_cpu(ptes[i].starting_lba); |
618 | u64 size = le64_to_cpu(ptes[i].ending_lba) - | 623 | u64 size = le64_to_cpu(ptes[i].ending_lba) - |
619 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; | 624 | le64_to_cpu(ptes[i].starting_lba) + 1ULL; |
@@ -627,6 +632,26 @@ int efi_partition(struct parsed_partitions *state) | |||
627 | if (!efi_guidcmp(ptes[i].partition_type_guid, | 632 | if (!efi_guidcmp(ptes[i].partition_type_guid, |
628 | PARTITION_LINUX_RAID_GUID)) | 633 | PARTITION_LINUX_RAID_GUID)) |
629 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; | 634 | state->parts[i + 1].flags = ADDPART_FLAG_RAID; |
635 | |||
636 | info = &state->parts[i + 1].info; | ||
637 | /* Instead of doing a manual swap to big endian, reuse the | ||
638 | * common ASCII hex format as the interim. | ||
639 | */ | ||
640 | efi_guid_unparse(&ptes[i].unique_partition_guid, unparsed_guid); | ||
641 | part_pack_uuid(unparsed_guid, info->uuid); | ||
642 | |||
643 | /* Naively convert UTF16-LE to 7 bits. */ | ||
644 | label_max = min(sizeof(info->volname) - 1, | ||
645 | sizeof(ptes[i].partition_name)); | ||
646 | info->volname[label_max] = 0; | ||
647 | while (label_count < label_max) { | ||
648 | u8 c = ptes[i].partition_name[label_count] & 0xff; | ||
649 | if (c && !isprint(c)) | ||
650 | c = '!'; | ||
651 | info->volname[label_count] = c; | ||
652 | label_count++; | ||
653 | } | ||
654 | state->parts[i + 1].has_info = true; | ||
630 | } | 655 | } |
631 | kfree(ptes); | 656 | kfree(ptes); |
632 | kfree(gpt); | 657 | kfree(gpt); |
diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 5bf8a04b5d9b..789c625c7aa5 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c | |||
@@ -5,7 +5,7 @@ | |||
5 | * Copyright (c) 2001-2007 Anton Altaparmakov | 5 | * Copyright (c) 2001-2007 Anton Altaparmakov |
6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> | 6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> |
7 | * | 7 | * |
8 | * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ | 8 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify it under | 10 | * This program is free software; you can redistribute it and/or modify it under |
11 | * the terms of the GNU General Public License as published by the Free Software | 11 | * the terms of the GNU General Public License as published by the Free Software |
diff --git a/fs/partitions/ldm.h b/fs/partitions/ldm.h index d1fb50b28d86..374242c0971a 100644 --- a/fs/partitions/ldm.h +++ b/fs/partitions/ldm.h | |||
@@ -5,7 +5,7 @@ | |||
5 | * Copyright (c) 2001-2007 Anton Altaparmakov | 5 | * Copyright (c) 2001-2007 Anton Altaparmakov |
6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> | 6 | * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> |
7 | * | 7 | * |
8 | * Documentation is available at http://www.linux-ntfs.org/content/view/19/37/ | 8 | * Documentation is available at http://www.linux-ntfs.org/doku.php?id=downloads |
9 | * | 9 | * |
10 | * This program is free software; you can redistribute it and/or modify it | 10 | * This program is free software; you can redistribute it and/or modify it |
11 | * under the terms of the GNU General Public License as published by the Free | 11 | * under the terms of the GNU General Public License as published by the Free |
@@ -382,7 +382,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, | |||
382 | error = ops->confirm(pipe, buf); | 382 | error = ops->confirm(pipe, buf); |
383 | if (error) { | 383 | if (error) { |
384 | if (!ret) | 384 | if (!ret) |
385 | error = ret; | 385 | ret = error; |
386 | break; | 386 | break; |
387 | } | 387 | } |
388 | 388 | ||
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void) | |||
954 | if (!inode) | 954 | if (!inode) |
955 | goto fail_inode; | 955 | goto fail_inode; |
956 | 956 | ||
957 | inode->i_ino = get_next_ino(); | ||
958 | |||
957 | pipe = alloc_pipe_info(inode); | 959 | pipe = alloc_pipe_info(inode); |
958 | if (!pipe) | 960 | if (!pipe) |
959 | goto fail_iput; | 961 | goto fail_iput; |
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 50f8f0600f06..6a0068841d96 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
@@ -33,8 +33,8 @@ config PROC_KCORE | |||
33 | depends on PROC_FS && MMU | 33 | depends on PROC_FS && MMU |
34 | 34 | ||
35 | config PROC_VMCORE | 35 | config PROC_VMCORE |
36 | bool "/proc/vmcore support (EXPERIMENTAL)" | 36 | bool "/proc/vmcore support" |
37 | depends on PROC_FS && CRASH_DUMP | 37 | depends on PROC_FS && CRASH_DUMP |
38 | default y | 38 | default y |
39 | help | 39 | help |
40 | Exports the dump image of crashed kernel in ELF format. | 40 | Exports the dump image of crashed kernel in ELF format. |
diff --git a/fs/proc/base.c b/fs/proc/base.c index a1c43e7c8a7b..f3d02ca461ec 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) | |||
226 | { | 226 | { |
227 | struct mm_struct *mm; | 227 | struct mm_struct *mm; |
228 | 228 | ||
229 | if (mutex_lock_killable(&task->cred_guard_mutex)) | 229 | if (mutex_lock_killable(&task->signal->cred_guard_mutex)) |
230 | return NULL; | 230 | return NULL; |
231 | 231 | ||
232 | mm = get_task_mm(task); | 232 | mm = get_task_mm(task); |
@@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task) | |||
235 | mmput(mm); | 235 | mmput(mm); |
236 | mm = NULL; | 236 | mm = NULL; |
237 | } | 237 | } |
238 | mutex_unlock(&task->cred_guard_mutex); | 238 | mutex_unlock(&task->signal->cred_guard_mutex); |
239 | 239 | ||
240 | return mm; | 240 | return mm; |
241 | } | 241 | } |
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = { | |||
771 | static int mem_open(struct inode* inode, struct file* file) | 771 | static int mem_open(struct inode* inode, struct file* file) |
772 | { | 772 | { |
773 | file->private_data = (void*)((long)current->self_exec_id); | 773 | file->private_data = (void*)((long)current->self_exec_id); |
774 | /* OK to pass negative loff_t, we can catch out-of-range */ | ||
775 | file->f_mode |= FMODE_UNSIGNED_OFFSET; | ||
774 | return 0; | 776 | return 0; |
775 | } | 777 | } |
776 | 778 | ||
@@ -1023,28 +1025,47 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1023 | memset(buffer, 0, sizeof(buffer)); | 1025 | memset(buffer, 0, sizeof(buffer)); |
1024 | if (count > sizeof(buffer) - 1) | 1026 | if (count > sizeof(buffer) - 1) |
1025 | count = sizeof(buffer) - 1; | 1027 | count = sizeof(buffer) - 1; |
1026 | if (copy_from_user(buffer, buf, count)) | 1028 | if (copy_from_user(buffer, buf, count)) { |
1027 | return -EFAULT; | 1029 | err = -EFAULT; |
1030 | goto out; | ||
1031 | } | ||
1028 | 1032 | ||
1029 | err = strict_strtol(strstrip(buffer), 0, &oom_adjust); | 1033 | err = strict_strtol(strstrip(buffer), 0, &oom_adjust); |
1030 | if (err) | 1034 | if (err) |
1031 | return -EINVAL; | 1035 | goto out; |
1032 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && | 1036 | if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) && |
1033 | oom_adjust != OOM_DISABLE) | 1037 | oom_adjust != OOM_DISABLE) { |
1034 | return -EINVAL; | 1038 | err = -EINVAL; |
1039 | goto out; | ||
1040 | } | ||
1035 | 1041 | ||
1036 | task = get_proc_task(file->f_path.dentry->d_inode); | 1042 | task = get_proc_task(file->f_path.dentry->d_inode); |
1037 | if (!task) | 1043 | if (!task) { |
1038 | return -ESRCH; | 1044 | err = -ESRCH; |
1045 | goto out; | ||
1046 | } | ||
1047 | |||
1048 | task_lock(task); | ||
1049 | if (!task->mm) { | ||
1050 | err = -EINVAL; | ||
1051 | goto err_task_lock; | ||
1052 | } | ||
1053 | |||
1039 | if (!lock_task_sighand(task, &flags)) { | 1054 | if (!lock_task_sighand(task, &flags)) { |
1040 | put_task_struct(task); | 1055 | err = -ESRCH; |
1041 | return -ESRCH; | 1056 | goto err_task_lock; |
1042 | } | 1057 | } |
1043 | 1058 | ||
1044 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { | 1059 | if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) { |
1045 | unlock_task_sighand(task, &flags); | 1060 | err = -EACCES; |
1046 | put_task_struct(task); | 1061 | goto err_sighand; |
1047 | return -EACCES; | 1062 | } |
1063 | |||
1064 | if (oom_adjust != task->signal->oom_adj) { | ||
1065 | if (oom_adjust == OOM_DISABLE) | ||
1066 | atomic_inc(&task->mm->oom_disable_count); | ||
1067 | if (task->signal->oom_adj == OOM_DISABLE) | ||
1068 | atomic_dec(&task->mm->oom_disable_count); | ||
1048 | } | 1069 | } |
1049 | 1070 | ||
1050 | /* | 1071 | /* |
@@ -1065,10 +1086,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, | |||
1065 | else | 1086 | else |
1066 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / | 1087 | task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) / |
1067 | -OOM_DISABLE; | 1088 | -OOM_DISABLE; |
1089 | err_sighand: | ||
1068 | unlock_task_sighand(task, &flags); | 1090 | unlock_task_sighand(task, &flags); |
1091 | err_task_lock: | ||
1092 | task_unlock(task); | ||
1069 | put_task_struct(task); | 1093 | put_task_struct(task); |
1070 | 1094 | out: | |
1071 | return count; | 1095 | return err < 0 ? err : count; |
1072 | } | 1096 | } |
1073 | 1097 | ||
1074 | static const struct file_operations proc_oom_adjust_operations = { | 1098 | static const struct file_operations proc_oom_adjust_operations = { |
@@ -1109,30 +1133,49 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1109 | memset(buffer, 0, sizeof(buffer)); | 1133 | memset(buffer, 0, sizeof(buffer)); |
1110 | if (count > sizeof(buffer) - 1) | 1134 | if (count > sizeof(buffer) - 1) |
1111 | count = sizeof(buffer) - 1; | 1135 | count = sizeof(buffer) - 1; |
1112 | if (copy_from_user(buffer, buf, count)) | 1136 | if (copy_from_user(buffer, buf, count)) { |
1113 | return -EFAULT; | 1137 | err = -EFAULT; |
1138 | goto out; | ||
1139 | } | ||
1114 | 1140 | ||
1115 | err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); | 1141 | err = strict_strtol(strstrip(buffer), 0, &oom_score_adj); |
1116 | if (err) | 1142 | if (err) |
1117 | return -EINVAL; | 1143 | goto out; |
1118 | if (oom_score_adj < OOM_SCORE_ADJ_MIN || | 1144 | if (oom_score_adj < OOM_SCORE_ADJ_MIN || |
1119 | oom_score_adj > OOM_SCORE_ADJ_MAX) | 1145 | oom_score_adj > OOM_SCORE_ADJ_MAX) { |
1120 | return -EINVAL; | 1146 | err = -EINVAL; |
1147 | goto out; | ||
1148 | } | ||
1121 | 1149 | ||
1122 | task = get_proc_task(file->f_path.dentry->d_inode); | 1150 | task = get_proc_task(file->f_path.dentry->d_inode); |
1123 | if (!task) | 1151 | if (!task) { |
1124 | return -ESRCH; | 1152 | err = -ESRCH; |
1153 | goto out; | ||
1154 | } | ||
1155 | |||
1156 | task_lock(task); | ||
1157 | if (!task->mm) { | ||
1158 | err = -EINVAL; | ||
1159 | goto err_task_lock; | ||
1160 | } | ||
1161 | |||
1125 | if (!lock_task_sighand(task, &flags)) { | 1162 | if (!lock_task_sighand(task, &flags)) { |
1126 | put_task_struct(task); | 1163 | err = -ESRCH; |
1127 | return -ESRCH; | 1164 | goto err_task_lock; |
1128 | } | 1165 | } |
1166 | |||
1129 | if (oom_score_adj < task->signal->oom_score_adj && | 1167 | if (oom_score_adj < task->signal->oom_score_adj && |
1130 | !capable(CAP_SYS_RESOURCE)) { | 1168 | !capable(CAP_SYS_RESOURCE)) { |
1131 | unlock_task_sighand(task, &flags); | 1169 | err = -EACCES; |
1132 | put_task_struct(task); | 1170 | goto err_sighand; |
1133 | return -EACCES; | ||
1134 | } | 1171 | } |
1135 | 1172 | ||
1173 | if (oom_score_adj != task->signal->oom_score_adj) { | ||
1174 | if (oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1175 | atomic_inc(&task->mm->oom_disable_count); | ||
1176 | if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) | ||
1177 | atomic_dec(&task->mm->oom_disable_count); | ||
1178 | } | ||
1136 | task->signal->oom_score_adj = oom_score_adj; | 1179 | task->signal->oom_score_adj = oom_score_adj; |
1137 | /* | 1180 | /* |
1138 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is | 1181 | * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is |
@@ -1143,14 +1186,19 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf, | |||
1143 | else | 1186 | else |
1144 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / | 1187 | task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) / |
1145 | OOM_SCORE_ADJ_MAX; | 1188 | OOM_SCORE_ADJ_MAX; |
1189 | err_sighand: | ||
1146 | unlock_task_sighand(task, &flags); | 1190 | unlock_task_sighand(task, &flags); |
1191 | err_task_lock: | ||
1192 | task_unlock(task); | ||
1147 | put_task_struct(task); | 1193 | put_task_struct(task); |
1148 | return count; | 1194 | out: |
1195 | return err < 0 ? err : count; | ||
1149 | } | 1196 | } |
1150 | 1197 | ||
1151 | static const struct file_operations proc_oom_score_adj_operations = { | 1198 | static const struct file_operations proc_oom_score_adj_operations = { |
1152 | .read = oom_score_adj_read, | 1199 | .read = oom_score_adj_read, |
1153 | .write = oom_score_adj_write, | 1200 | .write = oom_score_adj_write, |
1201 | .llseek = default_llseek, | ||
1154 | }; | 1202 | }; |
1155 | 1203 | ||
1156 | #ifdef CONFIG_AUDITSYSCALL | 1204 | #ifdef CONFIG_AUDITSYSCALL |
@@ -1600,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st | |||
1600 | 1648 | ||
1601 | /* Common stuff */ | 1649 | /* Common stuff */ |
1602 | ei = PROC_I(inode); | 1650 | ei = PROC_I(inode); |
1651 | inode->i_ino = get_next_ino(); | ||
1603 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 1652 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
1604 | inode->i_op = &proc_def_inode_operations; | 1653 | inode->i_op = &proc_def_inode_operations; |
1605 | 1654 | ||
@@ -2039,11 +2088,13 @@ static ssize_t proc_fdinfo_read(struct file *file, char __user *buf, | |||
2039 | static const struct file_operations proc_fdinfo_file_operations = { | 2088 | static const struct file_operations proc_fdinfo_file_operations = { |
2040 | .open = nonseekable_open, | 2089 | .open = nonseekable_open, |
2041 | .read = proc_fdinfo_read, | 2090 | .read = proc_fdinfo_read, |
2091 | .llseek = no_llseek, | ||
2042 | }; | 2092 | }; |
2043 | 2093 | ||
2044 | static const struct file_operations proc_fd_operations = { | 2094 | static const struct file_operations proc_fd_operations = { |
2045 | .read = generic_read_dir, | 2095 | .read = generic_read_dir, |
2046 | .readdir = proc_readfd, | 2096 | .readdir = proc_readfd, |
2097 | .llseek = default_llseek, | ||
2047 | }; | 2098 | }; |
2048 | 2099 | ||
2049 | /* | 2100 | /* |
@@ -2112,6 +2163,7 @@ static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | |||
2112 | static const struct file_operations proc_fdinfo_operations = { | 2163 | static const struct file_operations proc_fdinfo_operations = { |
2113 | .read = generic_read_dir, | 2164 | .read = generic_read_dir, |
2114 | .readdir = proc_readfdinfo, | 2165 | .readdir = proc_readfdinfo, |
2166 | .llseek = default_llseek, | ||
2115 | }; | 2167 | }; |
2116 | 2168 | ||
2117 | /* | 2169 | /* |
@@ -2302,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf, | |||
2302 | goto out_free; | 2354 | goto out_free; |
2303 | 2355 | ||
2304 | /* Guard against adverse ptrace interaction */ | 2356 | /* Guard against adverse ptrace interaction */ |
2305 | length = mutex_lock_interruptible(&task->cred_guard_mutex); | 2357 | length = mutex_lock_interruptible(&task->signal->cred_guard_mutex); |
2306 | if (length < 0) | 2358 | if (length < 0) |
2307 | goto out_free; | 2359 | goto out_free; |
2308 | 2360 | ||
2309 | length = security_setprocattr(task, | 2361 | length = security_setprocattr(task, |
2310 | (char*)file->f_path.dentry->d_name.name, | 2362 | (char*)file->f_path.dentry->d_name.name, |
2311 | (void*)page, count); | 2363 | (void*)page, count); |
2312 | mutex_unlock(&task->cred_guard_mutex); | 2364 | mutex_unlock(&task->signal->cred_guard_mutex); |
2313 | out_free: | 2365 | out_free: |
2314 | free_page((unsigned long) page); | 2366 | free_page((unsigned long) page); |
2315 | out: | 2367 | out: |
@@ -2343,6 +2395,7 @@ static int proc_attr_dir_readdir(struct file * filp, | |||
2343 | static const struct file_operations proc_attr_dir_operations = { | 2395 | static const struct file_operations proc_attr_dir_operations = { |
2344 | .read = generic_read_dir, | 2396 | .read = generic_read_dir, |
2345 | .readdir = proc_attr_dir_readdir, | 2397 | .readdir = proc_attr_dir_readdir, |
2398 | .llseek = default_llseek, | ||
2346 | }; | 2399 | }; |
2347 | 2400 | ||
2348 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, | 2401 | static struct dentry *proc_attr_dir_lookup(struct inode *dir, |
@@ -2542,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, | |||
2542 | 2595 | ||
2543 | /* Initialize the inode */ | 2596 | /* Initialize the inode */ |
2544 | ei = PROC_I(inode); | 2597 | ei = PROC_I(inode); |
2598 | inode->i_ino = get_next_ino(); | ||
2545 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 2599 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
2546 | 2600 | ||
2547 | /* | 2601 | /* |
@@ -2675,7 +2729,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2675 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2729 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2676 | ONE("status", S_IRUGO, proc_pid_status), | 2730 | ONE("status", S_IRUGO, proc_pid_status), |
2677 | ONE("personality", S_IRUSR, proc_pid_personality), | 2731 | ONE("personality", S_IRUSR, proc_pid_personality), |
2678 | INF("limits", S_IRUSR, proc_pid_limits), | 2732 | INF("limits", S_IRUGO, proc_pid_limits), |
2679 | #ifdef CONFIG_SCHED_DEBUG | 2733 | #ifdef CONFIG_SCHED_DEBUG |
2680 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2734 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2681 | #endif | 2735 | #endif |
@@ -2751,6 +2805,7 @@ static int proc_tgid_base_readdir(struct file * filp, | |||
2751 | static const struct file_operations proc_tgid_base_operations = { | 2805 | static const struct file_operations proc_tgid_base_operations = { |
2752 | .read = generic_read_dir, | 2806 | .read = generic_read_dir, |
2753 | .readdir = proc_tgid_base_readdir, | 2807 | .readdir = proc_tgid_base_readdir, |
2808 | .llseek = default_llseek, | ||
2754 | }; | 2809 | }; |
2755 | 2810 | ||
2756 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ | 2811 | static struct dentry *proc_tgid_base_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd){ |
@@ -3011,7 +3066,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
3011 | INF("auxv", S_IRUSR, proc_pid_auxv), | 3066 | INF("auxv", S_IRUSR, proc_pid_auxv), |
3012 | ONE("status", S_IRUGO, proc_pid_status), | 3067 | ONE("status", S_IRUGO, proc_pid_status), |
3013 | ONE("personality", S_IRUSR, proc_pid_personality), | 3068 | ONE("personality", S_IRUSR, proc_pid_personality), |
3014 | INF("limits", S_IRUSR, proc_pid_limits), | 3069 | INF("limits", S_IRUGO, proc_pid_limits), |
3015 | #ifdef CONFIG_SCHED_DEBUG | 3070 | #ifdef CONFIG_SCHED_DEBUG |
3016 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 3071 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
3017 | #endif | 3072 | #endif |
@@ -3088,6 +3143,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den | |||
3088 | static const struct file_operations proc_tid_base_operations = { | 3143 | static const struct file_operations proc_tid_base_operations = { |
3089 | .read = generic_read_dir, | 3144 | .read = generic_read_dir, |
3090 | .readdir = proc_tid_base_readdir, | 3145 | .readdir = proc_tid_base_readdir, |
3146 | .llseek = default_llseek, | ||
3091 | }; | 3147 | }; |
3092 | 3148 | ||
3093 | static const struct inode_operations proc_tid_base_inode_operations = { | 3149 | static const struct inode_operations proc_tid_base_inode_operations = { |
@@ -3324,4 +3380,5 @@ static const struct inode_operations proc_task_inode_operations = { | |||
3324 | static const struct file_operations proc_task_operations = { | 3380 | static const struct file_operations proc_task_operations = { |
3325 | .read = generic_read_dir, | 3381 | .read = generic_read_dir, |
3326 | .readdir = proc_task_readdir, | 3382 | .readdir = proc_task_readdir, |
3383 | .llseek = default_llseek, | ||
3327 | }; | 3384 | }; |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 5be436ea088e..b652cb00906b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, | |||
23 | if (!inode) | 23 | if (!inode) |
24 | goto out; | 24 | goto out; |
25 | 25 | ||
26 | inode->i_ino = get_next_ino(); | ||
27 | |||
26 | sysctl_head_get(head); | 28 | sysctl_head_get(head); |
27 | ei = PROC_I(inode); | 29 | ei = PROC_I(inode); |
28 | ei->sysctl = head; | 30 | ei->sysctl = head; |
@@ -364,6 +366,7 @@ static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
364 | static const struct file_operations proc_sys_file_operations = { | 366 | static const struct file_operations proc_sys_file_operations = { |
365 | .read = proc_sys_read, | 367 | .read = proc_sys_read, |
366 | .write = proc_sys_write, | 368 | .write = proc_sys_write, |
369 | .llseek = default_llseek, | ||
367 | }; | 370 | }; |
368 | 371 | ||
369 | static const struct file_operations proc_sys_dir_file_operations = { | 372 | static const struct file_operations proc_sys_dir_file_operations = { |
diff --git a/fs/proc/root.c b/fs/proc/root.c index 4258384ed22d..93d99b316325 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -179,6 +179,7 @@ static int proc_root_readdir(struct file * filp, | |||
179 | static const struct file_operations proc_root_operations = { | 179 | static const struct file_operations proc_root_operations = { |
180 | .read = generic_read_dir, | 180 | .read = generic_read_dir, |
181 | .readdir = proc_root_readdir, | 181 | .readdir = proc_root_readdir, |
182 | .llseek = default_llseek, | ||
182 | }; | 183 | }; |
183 | 184 | ||
184 | /* | 185 | /* |
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c index 1807c2419f17..37994737c983 100644 --- a/fs/proc/softirqs.c +++ b/fs/proc/softirqs.c | |||
@@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v) | |||
10 | { | 10 | { |
11 | int i, j; | 11 | int i, j; |
12 | 12 | ||
13 | seq_printf(p, " "); | 13 | seq_printf(p, " "); |
14 | for_each_possible_cpu(i) | 14 | for_each_possible_cpu(i) |
15 | seq_printf(p, "CPU%-8d", i); | 15 | seq_printf(p, "CPU%-8d", i); |
16 | seq_printf(p, "\n"); | 16 | seq_printf(p, "\n"); |
17 | 17 | ||
18 | for (i = 0; i < NR_SOFTIRQS; i++) { | 18 | for (i = 0; i < NR_SOFTIRQS; i++) { |
19 | seq_printf(p, "%8s:", softirq_to_name[i]); | 19 | seq_printf(p, "%12s:", softirq_to_name[i]); |
20 | for_each_possible_cpu(j) | 20 | for_each_possible_cpu(j) |
21 | seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); | 21 | seq_printf(p, " %10u", kstat_softirqs_cpu(i, j)); |
22 | seq_printf(p, "\n"); | 22 | seq_printf(p, "\n"); |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index bf31b03fc275..e15a19c93bae 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v) | |||
31 | u64 sum_softirq = 0; | 31 | u64 sum_softirq = 0; |
32 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; | 32 | unsigned int per_softirq_sums[NR_SOFTIRQS] = {0}; |
33 | struct timespec boottime; | 33 | struct timespec boottime; |
34 | unsigned int per_irq_sum; | ||
35 | 34 | ||
36 | user = nice = system = idle = iowait = | 35 | user = nice = system = idle = iowait = |
37 | irq = softirq = steal = cputime64_zero; | 36 | irq = softirq = steal = cputime64_zero; |
@@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v) | |||
52 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); | 51 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); |
53 | guest_nice = cputime64_add(guest_nice, | 52 | guest_nice = cputime64_add(guest_nice, |
54 | kstat_cpu(i).cpustat.guest_nice); | 53 | kstat_cpu(i).cpustat.guest_nice); |
55 | for_each_irq_nr(j) { | 54 | sum += kstat_cpu_irqs_sum(i); |
56 | sum += kstat_irqs_cpu(j, i); | ||
57 | } | ||
58 | sum += arch_irq_stat_cpu(i); | 55 | sum += arch_irq_stat_cpu(i); |
59 | 56 | ||
60 | for (j = 0; j < NR_SOFTIRQS; j++) { | 57 | for (j = 0; j < NR_SOFTIRQS; j++) { |
@@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v) | |||
110 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 107 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
111 | 108 | ||
112 | /* sum again ? it could be updated? */ | 109 | /* sum again ? it could be updated? */ |
113 | for_each_irq_nr(j) { | 110 | for_each_irq_nr(j) |
114 | per_irq_sum = 0; | 111 | seq_printf(p, " %u", kstat_irqs(j)); |
115 | for_each_possible_cpu(i) | ||
116 | per_irq_sum += kstat_irqs_cpu(j, i); | ||
117 | |||
118 | seq_printf(p, " %u", per_irq_sum); | ||
119 | } | ||
120 | 112 | ||
121 | seq_printf(p, | 113 | seq_printf(p, |
122 | "\nctxt %llu\n" | 114 | "\nctxt %llu\n" |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 271afc48b9a5..da6b01d70f01 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -327,6 +327,7 @@ struct mem_size_stats { | |||
327 | unsigned long private_clean; | 327 | unsigned long private_clean; |
328 | unsigned long private_dirty; | 328 | unsigned long private_dirty; |
329 | unsigned long referenced; | 329 | unsigned long referenced; |
330 | unsigned long anonymous; | ||
330 | unsigned long swap; | 331 | unsigned long swap; |
331 | u64 pss; | 332 | u64 pss; |
332 | }; | 333 | }; |
@@ -357,19 +358,22 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, | |||
357 | if (!page) | 358 | if (!page) |
358 | continue; | 359 | continue; |
359 | 360 | ||
361 | if (PageAnon(page)) | ||
362 | mss->anonymous += PAGE_SIZE; | ||
363 | |||
360 | mss->resident += PAGE_SIZE; | 364 | mss->resident += PAGE_SIZE; |
361 | /* Accumulate the size in pages that have been accessed. */ | 365 | /* Accumulate the size in pages that have been accessed. */ |
362 | if (pte_young(ptent) || PageReferenced(page)) | 366 | if (pte_young(ptent) || PageReferenced(page)) |
363 | mss->referenced += PAGE_SIZE; | 367 | mss->referenced += PAGE_SIZE; |
364 | mapcount = page_mapcount(page); | 368 | mapcount = page_mapcount(page); |
365 | if (mapcount >= 2) { | 369 | if (mapcount >= 2) { |
366 | if (pte_dirty(ptent)) | 370 | if (pte_dirty(ptent) || PageDirty(page)) |
367 | mss->shared_dirty += PAGE_SIZE; | 371 | mss->shared_dirty += PAGE_SIZE; |
368 | else | 372 | else |
369 | mss->shared_clean += PAGE_SIZE; | 373 | mss->shared_clean += PAGE_SIZE; |
370 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; | 374 | mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount; |
371 | } else { | 375 | } else { |
372 | if (pte_dirty(ptent)) | 376 | if (pte_dirty(ptent) || PageDirty(page)) |
373 | mss->private_dirty += PAGE_SIZE; | 377 | mss->private_dirty += PAGE_SIZE; |
374 | else | 378 | else |
375 | mss->private_clean += PAGE_SIZE; | 379 | mss->private_clean += PAGE_SIZE; |
@@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
410 | "Private_Clean: %8lu kB\n" | 414 | "Private_Clean: %8lu kB\n" |
411 | "Private_Dirty: %8lu kB\n" | 415 | "Private_Dirty: %8lu kB\n" |
412 | "Referenced: %8lu kB\n" | 416 | "Referenced: %8lu kB\n" |
417 | "Anonymous: %8lu kB\n" | ||
413 | "Swap: %8lu kB\n" | 418 | "Swap: %8lu kB\n" |
414 | "KernelPageSize: %8lu kB\n" | 419 | "KernelPageSize: %8lu kB\n" |
415 | "MMUPageSize: %8lu kB\n", | 420 | "MMUPageSize: %8lu kB\n", |
@@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v) | |||
421 | mss.private_clean >> 10, | 426 | mss.private_clean >> 10, |
422 | mss.private_dirty >> 10, | 427 | mss.private_dirty >> 10, |
423 | mss.referenced >> 10, | 428 | mss.referenced >> 10, |
429 | mss.anonymous >> 10, | ||
424 | mss.swap >> 10, | 430 | mss.swap >> 10, |
425 | vma_kernel_pagesize(vma) >> 10, | 431 | vma_kernel_pagesize(vma) >> 10, |
426 | vma_mmu_pagesize(vma) >> 10); | 432 | vma_mmu_pagesize(vma) >> 10); |
@@ -539,6 +545,7 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf, | |||
539 | 545 | ||
540 | const struct file_operations proc_clear_refs_operations = { | 546 | const struct file_operations proc_clear_refs_operations = { |
541 | .write = clear_refs_write, | 547 | .write = clear_refs_write, |
548 | .llseek = noop_llseek, | ||
542 | }; | 549 | }; |
543 | 550 | ||
544 | struct pagemapread { | 551 | struct pagemapread { |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 91c817ff02c3..2367fb3f70bc 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -163,7 +163,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
163 | 163 | ||
164 | static const struct file_operations proc_vmcore_operations = { | 164 | static const struct file_operations proc_vmcore_operations = { |
165 | .read = read_vmcore, | 165 | .read = read_vmcore, |
166 | .llseek = generic_file_llseek, | 166 | .llseek = default_llseek, |
167 | }; | 167 | }; |
168 | 168 | ||
169 | static struct vmcore* __init get_new_element(void) | 169 | static struct vmcore* __init get_new_element(void) |
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 6e8fc62b40a8..7b0329468a5d 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
@@ -11,7 +11,6 @@ | |||
11 | * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. | 11 | * 20-06-1998 by Frank Denis : Linux 2.1.99+ & dcache support. |
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/smp_lock.h> | ||
15 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
16 | #include "qnx4.h" | 15 | #include "qnx4.h" |
17 | 16 | ||
@@ -29,8 +28,6 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
29 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); | 28 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); |
30 | QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos)); | 29 | QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos)); |
31 | 30 | ||
32 | lock_kernel(); | ||
33 | |||
34 | while (filp->f_pos < inode->i_size) { | 31 | while (filp->f_pos < inode->i_size) { |
35 | blknum = qnx4_block_map( inode, filp->f_pos >> QNX4_BLOCK_SIZE_BITS ); | 32 | blknum = qnx4_block_map( inode, filp->f_pos >> QNX4_BLOCK_SIZE_BITS ); |
36 | bh = sb_bread(inode->i_sb, blknum); | 33 | bh = sb_bread(inode->i_sb, blknum); |
@@ -71,7 +68,6 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
71 | brelse(bh); | 68 | brelse(bh); |
72 | } | 69 | } |
73 | out: | 70 | out: |
74 | unlock_kernel(); | ||
75 | return 0; | 71 | return 0; |
76 | } | 72 | } |
77 | 73 | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 16829722be93..01bad30026fc 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -16,7 +16,6 @@ | |||
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/highuid.h> | 18 | #include <linux/highuid.h> |
19 | #include <linux/smp_lock.h> | ||
20 | #include <linux/pagemap.h> | 19 | #include <linux/pagemap.h> |
21 | #include <linux/buffer_head.h> | 20 | #include <linux/buffer_head.h> |
22 | #include <linux/writeback.h> | 21 | #include <linux/writeback.h> |
@@ -157,8 +156,6 @@ static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
157 | struct super_block *sb = dentry->d_sb; | 156 | struct super_block *sb = dentry->d_sb; |
158 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 157 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
159 | 158 | ||
160 | lock_kernel(); | ||
161 | |||
162 | buf->f_type = sb->s_magic; | 159 | buf->f_type = sb->s_magic; |
163 | buf->f_bsize = sb->s_blocksize; | 160 | buf->f_bsize = sb->s_blocksize; |
164 | buf->f_blocks = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size) * 8; | 161 | buf->f_blocks = le32_to_cpu(qnx4_sb(sb)->BitMap->di_size) * 8; |
@@ -168,8 +165,6 @@ static int qnx4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
168 | buf->f_fsid.val[0] = (u32)id; | 165 | buf->f_fsid.val[0] = (u32)id; |
169 | buf->f_fsid.val[1] = (u32)(id >> 32); | 166 | buf->f_fsid.val[1] = (u32)(id >> 32); |
170 | 167 | ||
171 | unlock_kernel(); | ||
172 | |||
173 | return 0; | 168 | return 0; |
174 | } | 169 | } |
175 | 170 | ||
@@ -283,7 +278,6 @@ static int qnx4_fill_super(struct super_block *s, void *data, int silent) | |||
283 | goto outi; | 278 | goto outi; |
284 | 279 | ||
285 | brelse(bh); | 280 | brelse(bh); |
286 | |||
287 | return 0; | 281 | return 0; |
288 | 282 | ||
289 | outi: | 283 | outi: |
diff --git a/fs/qnx4/namei.c b/fs/qnx4/namei.c index 58703ebba879..275327b5615e 100644 --- a/fs/qnx4/namei.c +++ b/fs/qnx4/namei.c | |||
@@ -12,7 +12,6 @@ | |||
12 | * 04-07-1998 by Frank Denis : first step for rmdir/unlink. | 12 | * 04-07-1998 by Frank Denis : first step for rmdir/unlink. |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/buffer_head.h> | 15 | #include <linux/buffer_head.h> |
17 | #include "qnx4.h" | 16 | #include "qnx4.h" |
18 | 17 | ||
@@ -109,7 +108,6 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam | |||
109 | int len = dentry->d_name.len; | 108 | int len = dentry->d_name.len; |
110 | struct inode *foundinode = NULL; | 109 | struct inode *foundinode = NULL; |
111 | 110 | ||
112 | lock_kernel(); | ||
113 | if (!(bh = qnx4_find_entry(len, dir, name, &de, &ino))) | 111 | if (!(bh = qnx4_find_entry(len, dir, name, &de, &ino))) |
114 | goto out; | 112 | goto out; |
115 | /* The entry is linked, let's get the real info */ | 113 | /* The entry is linked, let's get the real info */ |
@@ -123,13 +121,11 @@ struct dentry * qnx4_lookup(struct inode *dir, struct dentry *dentry, struct nam | |||
123 | 121 | ||
124 | foundinode = qnx4_iget(dir->i_sb, ino); | 122 | foundinode = qnx4_iget(dir->i_sb, ino); |
125 | if (IS_ERR(foundinode)) { | 123 | if (IS_ERR(foundinode)) { |
126 | unlock_kernel(); | ||
127 | QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n", | 124 | QNX4DEBUG((KERN_ERR "qnx4: lookup->iget -> error %ld\n", |
128 | PTR_ERR(foundinode))); | 125 | PTR_ERR(foundinode))); |
129 | return ERR_CAST(foundinode); | 126 | return ERR_CAST(foundinode); |
130 | } | 127 | } |
131 | out: | 128 | out: |
132 | unlock_kernel(); | ||
133 | d_add(dentry, foundinode); | 129 | d_add(dentry, foundinode); |
134 | 130 | ||
135 | return NULL; | 131 | return NULL; |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a5ebae70dc6d..67fadb1ad2c1 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, | |||
58 | struct inode * inode = new_inode(sb); | 58 | struct inode * inode = new_inode(sb); |
59 | 59 | ||
60 | if (inode) { | 60 | if (inode) { |
61 | inode->i_ino = get_next_ino(); | ||
61 | inode_init_owner(inode, dir, mode); | 62 | inode_init_owner(inode, dir, mode); |
62 | inode->i_mapping->a_ops = &ramfs_aops; | 63 | inode->i_mapping->a_ops = &ramfs_aops; |
63 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 64 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
diff --git a/fs/read_write.c b/fs/read_write.c index 74e36586e4d3..9cd9d148105d 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = { | |||
31 | 31 | ||
32 | EXPORT_SYMBOL(generic_ro_fops); | 32 | EXPORT_SYMBOL(generic_ro_fops); |
33 | 33 | ||
34 | static int | ||
35 | __negative_fpos_check(struct file *file, loff_t pos, size_t count) | ||
36 | { | ||
37 | /* | ||
38 | * pos or pos+count is negative here, check overflow. | ||
39 | * too big "count" will be caught in rw_verify_area(). | ||
40 | */ | ||
41 | if ((pos < 0) && (pos + count < pos)) | ||
42 | return -EOVERFLOW; | ||
43 | if (file->f_mode & FMODE_UNSIGNED_OFFSET) | ||
44 | return 0; | ||
45 | return -EINVAL; | ||
46 | } | ||
47 | |||
34 | /** | 48 | /** |
35 | * generic_file_llseek_unlocked - lockless generic llseek implementation | 49 | * generic_file_llseek_unlocked - lockless generic llseek implementation |
36 | * @file: file structure to seek on | 50 | * @file: file structure to seek on |
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) | |||
62 | break; | 76 | break; |
63 | } | 77 | } |
64 | 78 | ||
65 | if (offset < 0 || offset > inode->i_sb->s_maxbytes) | 79 | if (offset < 0 && __negative_fpos_check(file, offset, 0)) |
80 | return -EINVAL; | ||
81 | if (offset > inode->i_sb->s_maxbytes) | ||
66 | return -EINVAL; | 82 | return -EINVAL; |
67 | 83 | ||
68 | /* Special lock needed here? */ | 84 | /* Special lock needed here? */ |
@@ -124,7 +140,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) | |||
124 | { | 140 | { |
125 | loff_t retval; | 141 | loff_t retval; |
126 | 142 | ||
127 | lock_kernel(); | 143 | mutex_lock(&file->f_dentry->d_inode->i_mutex); |
128 | switch (origin) { | 144 | switch (origin) { |
129 | case SEEK_END: | 145 | case SEEK_END: |
130 | offset += i_size_read(file->f_path.dentry->d_inode); | 146 | offset += i_size_read(file->f_path.dentry->d_inode); |
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) | |||
137 | offset += file->f_pos; | 153 | offset += file->f_pos; |
138 | } | 154 | } |
139 | retval = -EINVAL; | 155 | retval = -EINVAL; |
140 | if (offset >= 0) { | 156 | if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) { |
141 | if (offset != file->f_pos) { | 157 | if (offset != file->f_pos) { |
142 | file->f_pos = offset; | 158 | file->f_pos = offset; |
143 | file->f_version = 0; | 159 | file->f_version = 0; |
@@ -145,7 +161,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin) | |||
145 | retval = offset; | 161 | retval = offset; |
146 | } | 162 | } |
147 | out: | 163 | out: |
148 | unlock_kernel(); | 164 | mutex_unlock(&file->f_dentry->d_inode->i_mutex); |
149 | return retval; | 165 | return retval; |
150 | } | 166 | } |
151 | EXPORT_SYMBOL(default_llseek); | 167 | EXPORT_SYMBOL(default_llseek); |
@@ -156,7 +172,6 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int origin) | |||
156 | 172 | ||
157 | fn = no_llseek; | 173 | fn = no_llseek; |
158 | if (file->f_mode & FMODE_LSEEK) { | 174 | if (file->f_mode & FMODE_LSEEK) { |
159 | fn = default_llseek; | ||
160 | if (file->f_op && file->f_op->llseek) | 175 | if (file->f_op && file->f_op->llseek) |
161 | fn = file->f_op->llseek; | 176 | fn = file->f_op->llseek; |
162 | } | 177 | } |
@@ -222,6 +237,7 @@ bad: | |||
222 | } | 237 | } |
223 | #endif | 238 | #endif |
224 | 239 | ||
240 | |||
225 | /* | 241 | /* |
226 | * rw_verify_area doesn't like huge counts. We limit | 242 | * rw_verify_area doesn't like huge counts. We limit |
227 | * them to something that fits in "int" so that others | 243 | * them to something that fits in "int" so that others |
@@ -239,8 +255,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count | |||
239 | if (unlikely((ssize_t) count < 0)) | 255 | if (unlikely((ssize_t) count < 0)) |
240 | return retval; | 256 | return retval; |
241 | pos = *ppos; | 257 | pos = *ppos; |
242 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) | 258 | if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) { |
243 | return retval; | 259 | retval = __negative_fpos_check(file, pos, count); |
260 | if (retval) | ||
261 | return retval; | ||
262 | } | ||
244 | 263 | ||
245 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { | 264 | if (unlikely(inode->i_flock && mandatory_lock(inode))) { |
246 | retval = locks_mandatory_area( | 265 | retval = locks_mandatory_area( |
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig index 513f431038f9..7cd46666ba2c 100644 --- a/fs/reiserfs/Kconfig +++ b/fs/reiserfs/Kconfig | |||
@@ -10,7 +10,8 @@ config REISERFS_FS | |||
10 | 10 | ||
11 | In general, ReiserFS is as fast as ext2, but is very efficient with | 11 | In general, ReiserFS is as fast as ext2, but is very efficient with |
12 | large directories and small files. Additional patches are needed | 12 | large directories and small files. Additional patches are needed |
13 | for NFS and quotas, please see <http://www.namesys.com/> for links. | 13 | for NFS and quotas, please see |
14 | <https://reiser4.wiki.kernel.org/index.php/Main_Page> for links. | ||
14 | 15 | ||
15 | It is more easily extended to have features currently found in | 16 | It is more easily extended to have features currently found in |
16 | database and keyword search systems than block allocation based file | 17 | database and keyword search systems than block allocation based file |
@@ -18,7 +19,8 @@ config REISERFS_FS | |||
18 | plugins consistent with our motto ``It takes more than a license to | 19 | plugins consistent with our motto ``It takes more than a license to |
19 | make source code open.'' | 20 | make source code open.'' |
20 | 21 | ||
21 | Read <http://www.namesys.com/> to learn more about reiserfs. | 22 | Read <https://reiser4.wiki.kernel.org/index.php/Main_Page> |
23 | to learn more about reiserfs. | ||
22 | 24 | ||
23 | Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com. | 25 | Sponsored by Threshold Networks, Emusic.com, and Bigstorage.com. |
24 | 26 | ||
diff --git a/fs/reiserfs/README b/fs/reiserfs/README index 14e8c9d460e5..e2f7a264e3ff 100644 --- a/fs/reiserfs/README +++ b/fs/reiserfs/README | |||
@@ -43,7 +43,7 @@ to address the fair crediting issue in the next GPL version.) | |||
43 | [END LICENSING] | 43 | [END LICENSING] |
44 | 44 | ||
45 | Reiserfs is a file system based on balanced tree algorithms, which is | 45 | Reiserfs is a file system based on balanced tree algorithms, which is |
46 | described at http://devlinux.com/namesys. | 46 | described at https://reiser4.wiki.kernel.org/index.php/Main_Page |
47 | 47 | ||
48 | Stop reading here. Go there, then return. | 48 | Stop reading here. Go there, then return. |
49 | 49 | ||
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 6846371498b6..91f080cc76c8 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c | |||
@@ -152,8 +152,7 @@ static int reiserfs_sync_file(struct file *filp, int datasync) | |||
152 | barrier_done = reiserfs_commit_for_inode(inode); | 152 | barrier_done = reiserfs_commit_for_inode(inode); |
153 | reiserfs_write_unlock(inode->i_sb); | 153 | reiserfs_write_unlock(inode->i_sb); |
154 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) | 154 | if (barrier_done != 1 && reiserfs_barrier_flush(inode->i_sb)) |
155 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL, | 155 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
156 | BLKDEV_IFL_WAIT); | ||
157 | if (barrier_done < 0) | 156 | if (barrier_done < 0) |
158 | return barrier_done; | 157 | return barrier_done; |
159 | return (err < 0) ? -EIO : 0; | 158 | return (err < 0) ? -EIO : 0; |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index caa758377d66..41656d40dc5c 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -22,8 +22,6 @@ | |||
22 | 22 | ||
23 | int reiserfs_commit_write(struct file *f, struct page *page, | 23 | int reiserfs_commit_write(struct file *f, struct page *page, |
24 | unsigned from, unsigned to); | 24 | unsigned from, unsigned to); |
25 | int reiserfs_prepare_write(struct file *f, struct page *page, | ||
26 | unsigned from, unsigned to); | ||
27 | 25 | ||
28 | void reiserfs_evict_inode(struct inode *inode) | 26 | void reiserfs_evict_inode(struct inode *inode) |
29 | { | 27 | { |
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, | |||
165 | ** but tail is still sitting in a direct item, and we can't write to | 163 | ** but tail is still sitting in a direct item, and we can't write to |
166 | ** it. So, look through this page, and check all the mapped buffers | 164 | ** it. So, look through this page, and check all the mapped buffers |
167 | ** to make sure they have valid block numbers. Any that don't need | 165 | ** to make sure they have valid block numbers. Any that don't need |
168 | ** to be unmapped, so that block_prepare_write will correctly call | 166 | ** to be unmapped, so that __block_write_begin will correctly call |
169 | ** reiserfs_get_block to convert the tail into an unformatted node | 167 | ** reiserfs_get_block to convert the tail into an unformatted node |
170 | */ | 168 | */ |
171 | static inline void fix_tail_page_for_writing(struct page *page) | 169 | static inline void fix_tail_page_for_writing(struct page *page) |
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block, | |||
439 | } | 437 | } |
440 | 438 | ||
441 | /* special version of get_block that is only used by grab_tail_page right | 439 | /* special version of get_block that is only used by grab_tail_page right |
442 | ** now. It is sent to block_prepare_write, and when you try to get a | 440 | ** now. It is sent to __block_write_begin, and when you try to get a |
443 | ** block past the end of the file (or a block from a hole) it returns | 441 | ** block past the end of the file (or a block from a hole) it returns |
444 | ** -ENOENT instead of a valid buffer. block_prepare_write expects to | 442 | ** -ENOENT instead of a valid buffer. __block_write_begin expects to |
445 | ** be able to do i/o on the buffers returned, unless an error value | 443 | ** be able to do i/o on the buffers returned, unless an error value |
446 | ** is also returned. | 444 | ** is also returned. |
447 | ** | 445 | ** |
448 | ** So, this allows block_prepare_write to be used for reading a single block | 446 | ** So, this allows __block_write_begin to be used for reading a single block |
449 | ** in a page. Where it does not produce a valid page for holes, or past the | 447 | ** in a page. Where it does not produce a valid page for holes, or past the |
450 | ** end of the file. This turns out to be exactly what we need for reading | 448 | ** end of the file. This turns out to be exactly what we need for reading |
451 | ** tails for conversion. | 449 | ** tails for conversion. |
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode, | |||
558 | ** | 556 | ** |
559 | ** We must fix the tail page for writing because it might have buffers | 557 | ** We must fix the tail page for writing because it might have buffers |
560 | ** that are mapped, but have a block number of 0. This indicates tail | 558 | ** that are mapped, but have a block number of 0. This indicates tail |
561 | ** data that has been read directly into the page, and block_prepare_write | 559 | ** data that has been read directly into the page, and |
562 | ** won't trigger a get_block in this case. | 560 | ** __block_write_begin won't trigger a get_block in this case. |
563 | */ | 561 | */ |
564 | fix_tail_page_for_writing(tail_page); | 562 | fix_tail_page_for_writing(tail_page); |
565 | retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); | 563 | retval = __reiserfs_write_begin(tail_page, tail_start, |
564 | tail_end - tail_start); | ||
566 | if (retval) | 565 | if (retval) |
567 | goto unlock; | 566 | goto unlock; |
568 | 567 | ||
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode, | |||
2033 | /* start within the page of the last block in the file */ | 2032 | /* start within the page of the last block in the file */ |
2034 | start = (offset / blocksize) * blocksize; | 2033 | start = (offset / blocksize) * blocksize; |
2035 | 2034 | ||
2036 | error = block_prepare_write(page, start, offset, | 2035 | error = __block_write_begin(page, start, offset - start, |
2037 | reiserfs_get_block_create_0); | 2036 | reiserfs_get_block_create_0); |
2038 | if (error) | 2037 | if (error) |
2039 | goto unlock; | 2038 | goto unlock; |
@@ -2438,7 +2437,7 @@ static int reiserfs_write_full_page(struct page *page, | |||
2438 | /* from this point on, we know the buffer is mapped to a | 2437 | /* from this point on, we know the buffer is mapped to a |
2439 | * real block and not a direct item | 2438 | * real block and not a direct item |
2440 | */ | 2439 | */ |
2441 | if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { | 2440 | if (wbc->sync_mode != WB_SYNC_NONE) { |
2442 | lock_buffer(bh); | 2441 | lock_buffer(bh); |
2443 | } else { | 2442 | } else { |
2444 | if (!trylock_buffer(bh)) { | 2443 | if (!trylock_buffer(bh)) { |
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file, | |||
2628 | return ret; | 2627 | return ret; |
2629 | } | 2628 | } |
2630 | 2629 | ||
2631 | int reiserfs_prepare_write(struct file *f, struct page *page, | 2630 | int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) |
2632 | unsigned from, unsigned to) | ||
2633 | { | 2631 | { |
2634 | struct inode *inode = page->mapping->host; | 2632 | struct inode *inode = page->mapping->host; |
2635 | int ret; | 2633 | int ret; |
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, | |||
2650 | th->t_refcount++; | 2648 | th->t_refcount++; |
2651 | } | 2649 | } |
2652 | 2650 | ||
2653 | ret = block_prepare_write(page, from, to, reiserfs_get_block); | 2651 | ret = __block_write_begin(page, from, len, reiserfs_get_block); |
2654 | if (ret && reiserfs_transaction_running(inode->i_sb)) { | 2652 | if (ret && reiserfs_transaction_running(inode->i_sb)) { |
2655 | struct reiserfs_transaction_handle *th = current->journal_info; | 2653 | struct reiserfs_transaction_handle *th = current->journal_info; |
2656 | /* this gets a little ugly. If reiserfs_get_block returned an | 2654 | /* this gets a little ugly. If reiserfs_get_block returned an |
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index f53505de0712..adf22b485cea 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c | |||
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, | |||
160 | 160 | ||
161 | int reiserfs_commit_write(struct file *f, struct page *page, | 161 | int reiserfs_commit_write(struct file *f, struct page *page, |
162 | unsigned from, unsigned to); | 162 | unsigned from, unsigned to); |
163 | int reiserfs_prepare_write(struct file *f, struct page *page, | ||
164 | unsigned from, unsigned to); | ||
165 | /* | 163 | /* |
166 | ** reiserfs_unpack | 164 | ** reiserfs_unpack |
167 | ** Function try to convert tail from direct item into indirect. | 165 | ** Function try to convert tail from direct item into indirect. |
@@ -170,6 +168,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, | |||
170 | int reiserfs_unpack(struct inode *inode, struct file *filp) | 168 | int reiserfs_unpack(struct inode *inode, struct file *filp) |
171 | { | 169 | { |
172 | int retval = 0; | 170 | int retval = 0; |
171 | int depth; | ||
173 | int index; | 172 | int index; |
174 | struct page *page; | 173 | struct page *page; |
175 | struct address_space *mapping; | 174 | struct address_space *mapping; |
@@ -188,8 +187,8 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
188 | /* we need to make sure nobody is changing the file size beneath | 187 | /* we need to make sure nobody is changing the file size beneath |
189 | ** us | 188 | ** us |
190 | */ | 189 | */ |
191 | mutex_lock(&inode->i_mutex); | 190 | reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb); |
192 | reiserfs_write_lock(inode->i_sb); | 191 | depth = reiserfs_write_lock_once(inode->i_sb); |
193 | 192 | ||
194 | write_from = inode->i_size & (blocksize - 1); | 193 | write_from = inode->i_size & (blocksize - 1); |
195 | /* if we are on a block boundary, we are already unpacked. */ | 194 | /* if we are on a block boundary, we are already unpacked. */ |
@@ -199,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
199 | } | 198 | } |
200 | 199 | ||
201 | /* we unpack by finding the page with the tail, and calling | 200 | /* we unpack by finding the page with the tail, and calling |
202 | ** reiserfs_prepare_write on that page. This will force a | 201 | ** __reiserfs_write_begin on that page. This will force a |
203 | ** reiserfs_get_block to unpack the tail for us. | 202 | ** reiserfs_get_block to unpack the tail for us. |
204 | */ | 203 | */ |
205 | index = inode->i_size >> PAGE_CACHE_SHIFT; | 204 | index = inode->i_size >> PAGE_CACHE_SHIFT; |
@@ -209,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
209 | if (!page) { | 208 | if (!page) { |
210 | goto out; | 209 | goto out; |
211 | } | 210 | } |
212 | retval = reiserfs_prepare_write(NULL, page, write_from, write_from); | 211 | retval = __reiserfs_write_begin(page, write_from, 0); |
213 | if (retval) | 212 | if (retval) |
214 | goto out_unlock; | 213 | goto out_unlock; |
215 | 214 | ||
@@ -224,6 +223,6 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) | |||
224 | 223 | ||
225 | out: | 224 | out: |
226 | mutex_unlock(&inode->i_mutex); | 225 | mutex_unlock(&inode->i_mutex); |
227 | reiserfs_write_unlock(inode->i_sb); | 226 | reiserfs_write_unlock_once(inode->i_sb, depth); |
228 | return retval; | 227 | return retval; |
229 | } | 228 | } |
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 812e2c05aa29..076c8b194682 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c | |||
@@ -138,13 +138,6 @@ static int reiserfs_clean_and_file_buffer(struct buffer_head *bh) | |||
138 | return 0; | 138 | return 0; |
139 | } | 139 | } |
140 | 140 | ||
141 | static void disable_barrier(struct super_block *s) | ||
142 | { | ||
143 | REISERFS_SB(s)->s_mount_opt &= ~(1 << REISERFS_BARRIER_FLUSH); | ||
144 | printk("reiserfs: disabling flush barriers on %s\n", | ||
145 | reiserfs_bdevname(s)); | ||
146 | } | ||
147 | |||
148 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block | 141 | static struct reiserfs_bitmap_node *allocate_bitmap_node(struct super_block |
149 | *sb) | 142 | *sb) |
150 | { | 143 | { |
@@ -677,30 +670,6 @@ static void submit_ordered_buffer(struct buffer_head *bh) | |||
677 | submit_bh(WRITE, bh); | 670 | submit_bh(WRITE, bh); |
678 | } | 671 | } |
679 | 672 | ||
680 | static int submit_barrier_buffer(struct buffer_head *bh) | ||
681 | { | ||
682 | get_bh(bh); | ||
683 | bh->b_end_io = reiserfs_end_ordered_io; | ||
684 | clear_buffer_dirty(bh); | ||
685 | if (!buffer_uptodate(bh)) | ||
686 | BUG(); | ||
687 | return submit_bh(WRITE_BARRIER, bh); | ||
688 | } | ||
689 | |||
690 | static void check_barrier_completion(struct super_block *s, | ||
691 | struct buffer_head *bh) | ||
692 | { | ||
693 | if (buffer_eopnotsupp(bh)) { | ||
694 | clear_buffer_eopnotsupp(bh); | ||
695 | disable_barrier(s); | ||
696 | set_buffer_uptodate(bh); | ||
697 | set_buffer_dirty(bh); | ||
698 | reiserfs_write_unlock(s); | ||
699 | sync_dirty_buffer(bh); | ||
700 | reiserfs_write_lock(s); | ||
701 | } | ||
702 | } | ||
703 | |||
704 | #define CHUNK_SIZE 32 | 673 | #define CHUNK_SIZE 32 |
705 | struct buffer_chunk { | 674 | struct buffer_chunk { |
706 | struct buffer_head *bh[CHUNK_SIZE]; | 675 | struct buffer_head *bh[CHUNK_SIZE]; |
@@ -1009,7 +978,6 @@ static int flush_commit_list(struct super_block *s, | |||
1009 | struct buffer_head *tbh = NULL; | 978 | struct buffer_head *tbh = NULL; |
1010 | unsigned int trans_id = jl->j_trans_id; | 979 | unsigned int trans_id = jl->j_trans_id; |
1011 | struct reiserfs_journal *journal = SB_JOURNAL(s); | 980 | struct reiserfs_journal *journal = SB_JOURNAL(s); |
1012 | int barrier = 0; | ||
1013 | int retval = 0; | 981 | int retval = 0; |
1014 | int write_len; | 982 | int write_len; |
1015 | 983 | ||
@@ -1094,24 +1062,6 @@ static int flush_commit_list(struct super_block *s, | |||
1094 | } | 1062 | } |
1095 | atomic_dec(&journal->j_async_throttle); | 1063 | atomic_dec(&journal->j_async_throttle); |
1096 | 1064 | ||
1097 | /* We're skipping the commit if there's an error */ | ||
1098 | if (retval || reiserfs_is_journal_aborted(journal)) | ||
1099 | barrier = 0; | ||
1100 | |||
1101 | /* wait on everything written so far before writing the commit | ||
1102 | * if we are in barrier mode, send the commit down now | ||
1103 | */ | ||
1104 | barrier = reiserfs_barrier_flush(s); | ||
1105 | if (barrier) { | ||
1106 | int ret; | ||
1107 | lock_buffer(jl->j_commit_bh); | ||
1108 | ret = submit_barrier_buffer(jl->j_commit_bh); | ||
1109 | if (ret == -EOPNOTSUPP) { | ||
1110 | set_buffer_uptodate(jl->j_commit_bh); | ||
1111 | disable_barrier(s); | ||
1112 | barrier = 0; | ||
1113 | } | ||
1114 | } | ||
1115 | for (i = 0; i < (jl->j_len + 1); i++) { | 1065 | for (i = 0; i < (jl->j_len + 1); i++) { |
1116 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + | 1066 | bn = SB_ONDISK_JOURNAL_1st_BLOCK(s) + |
1117 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); | 1067 | (jl->j_start + i) % SB_ONDISK_JOURNAL_SIZE(s); |
@@ -1143,27 +1093,22 @@ static int flush_commit_list(struct super_block *s, | |||
1143 | 1093 | ||
1144 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); | 1094 | BUG_ON(atomic_read(&(jl->j_commit_left)) != 1); |
1145 | 1095 | ||
1146 | if (!barrier) { | 1096 | /* If there was a write error in the journal - we can't commit |
1147 | /* If there was a write error in the journal - we can't commit | 1097 | * this transaction - it will be invalid and, if successful, |
1148 | * this transaction - it will be invalid and, if successful, | 1098 | * will just end up propagating the write error out to |
1149 | * will just end up propagating the write error out to | 1099 | * the file system. */ |
1150 | * the file system. */ | 1100 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { |
1151 | if (likely(!retval && !reiserfs_is_journal_aborted (journal))) { | 1101 | if (buffer_dirty(jl->j_commit_bh)) |
1152 | if (buffer_dirty(jl->j_commit_bh)) | 1102 | BUG(); |
1153 | BUG(); | 1103 | mark_buffer_dirty(jl->j_commit_bh) ; |
1154 | mark_buffer_dirty(jl->j_commit_bh) ; | ||
1155 | reiserfs_write_unlock(s); | ||
1156 | sync_dirty_buffer(jl->j_commit_bh) ; | ||
1157 | reiserfs_write_lock(s); | ||
1158 | } | ||
1159 | } else { | ||
1160 | reiserfs_write_unlock(s); | 1104 | reiserfs_write_unlock(s); |
1161 | wait_on_buffer(jl->j_commit_bh); | 1105 | if (reiserfs_barrier_flush(s)) |
1106 | __sync_dirty_buffer(jl->j_commit_bh, WRITE_FLUSH_FUA); | ||
1107 | else | ||
1108 | sync_dirty_buffer(jl->j_commit_bh); | ||
1162 | reiserfs_write_lock(s); | 1109 | reiserfs_write_lock(s); |
1163 | } | 1110 | } |
1164 | 1111 | ||
1165 | check_barrier_completion(s, jl->j_commit_bh); | ||
1166 | |||
1167 | /* If there was a write error in the journal - we can't commit this | 1112 | /* If there was a write error in the journal - we can't commit this |
1168 | * transaction - it will be invalid and, if successful, will just end | 1113 | * transaction - it will be invalid and, if successful, will just end |
1169 | * up propagating the write error out to the filesystem. */ | 1114 | * up propagating the write error out to the filesystem. */ |
@@ -1319,26 +1264,15 @@ static int _update_journal_header_block(struct super_block *sb, | |||
1319 | jh->j_first_unflushed_offset = cpu_to_le32(offset); | 1264 | jh->j_first_unflushed_offset = cpu_to_le32(offset); |
1320 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); | 1265 | jh->j_mount_id = cpu_to_le32(journal->j_mount_id); |
1321 | 1266 | ||
1322 | if (reiserfs_barrier_flush(sb)) { | 1267 | set_buffer_dirty(journal->j_header_bh); |
1323 | int ret; | 1268 | reiserfs_write_unlock(sb); |
1324 | lock_buffer(journal->j_header_bh); | 1269 | |
1325 | ret = submit_barrier_buffer(journal->j_header_bh); | 1270 | if (reiserfs_barrier_flush(sb)) |
1326 | if (ret == -EOPNOTSUPP) { | 1271 | __sync_dirty_buffer(journal->j_header_bh, WRITE_FLUSH_FUA); |
1327 | set_buffer_uptodate(journal->j_header_bh); | 1272 | else |
1328 | disable_barrier(sb); | ||
1329 | goto sync; | ||
1330 | } | ||
1331 | reiserfs_write_unlock(sb); | ||
1332 | wait_on_buffer(journal->j_header_bh); | ||
1333 | reiserfs_write_lock(sb); | ||
1334 | check_barrier_completion(sb, journal->j_header_bh); | ||
1335 | } else { | ||
1336 | sync: | ||
1337 | set_buffer_dirty(journal->j_header_bh); | ||
1338 | reiserfs_write_unlock(sb); | ||
1339 | sync_dirty_buffer(journal->j_header_bh); | 1273 | sync_dirty_buffer(journal->j_header_bh); |
1340 | reiserfs_write_lock(sb); | 1274 | |
1341 | } | 1275 | reiserfs_write_lock(sb); |
1342 | if (!buffer_uptodate(journal->j_header_bh)) { | 1276 | if (!buffer_uptodate(journal->j_header_bh)) { |
1343 | reiserfs_warning(sb, "journal-837", | 1277 | reiserfs_warning(sb, "journal-837", |
1344 | "IO error during journal replay"); | 1278 | "IO error during journal replay"); |
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index ee78d4a0086a..ba5f51ec3458 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c | |||
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, | |||
1156 | inode->i_ctime = CURRENT_TIME_SEC; | 1156 | inode->i_ctime = CURRENT_TIME_SEC; |
1157 | reiserfs_update_sd(&th, inode); | 1157 | reiserfs_update_sd(&th, inode); |
1158 | 1158 | ||
1159 | atomic_inc(&inode->i_count); | 1159 | ihold(inode); |
1160 | d_instantiate(dentry, inode); | 1160 | d_instantiate(dentry, inode); |
1161 | retval = journal_end(&th, dir->i_sb, jbegin_count); | 1161 | retval = journal_end(&th, dir->i_sb, jbegin_count); |
1162 | reiserfs_write_unlock(dir->i_sb); | 1162 | reiserfs_write_unlock(dir->i_sb); |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8c4cf273c672..5d04a7828e7a 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len) | |||
418 | 418 | ||
419 | int reiserfs_commit_write(struct file *f, struct page *page, | 419 | int reiserfs_commit_write(struct file *f, struct page *page, |
420 | unsigned from, unsigned to); | 420 | unsigned from, unsigned to); |
421 | int reiserfs_prepare_write(struct file *f, struct page *page, | ||
422 | unsigned from, unsigned to); | ||
423 | 421 | ||
424 | static void update_ctime(struct inode *inode) | 422 | static void update_ctime(struct inode *inode) |
425 | { | 423 | { |
426 | struct timespec now = current_fs_time(inode->i_sb); | 424 | struct timespec now = current_fs_time(inode->i_sb); |
427 | if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink || | 425 | if (inode_unhashed(inode) || !inode->i_nlink || |
428 | timespec_equal(&inode->i_ctime, &now)) | 426 | timespec_equal(&inode->i_ctime, &now)) |
429 | return; | 427 | return; |
430 | 428 | ||
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, | |||
532 | rxh->h_hash = cpu_to_le32(xahash); | 530 | rxh->h_hash = cpu_to_le32(xahash); |
533 | } | 531 | } |
534 | 532 | ||
535 | err = reiserfs_prepare_write(NULL, page, page_offset, | 533 | err = __reiserfs_write_begin(page, page_offset, chunk + skip); |
536 | page_offset + chunk + skip); | ||
537 | if (!err) { | 534 | if (!err) { |
538 | if (buffer) | 535 | if (buffer) |
539 | memcpy(data + skip, buffer + buffer_pos, chunk); | 536 | memcpy(data + skip, buffer + buffer_pos, chunk); |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 42d213546894..268580535c92 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -282,6 +282,7 @@ error: | |||
282 | static const struct file_operations romfs_dir_operations = { | 282 | static const struct file_operations romfs_dir_operations = { |
283 | .read = generic_read_dir, | 283 | .read = generic_read_dir, |
284 | .readdir = romfs_readdir, | 284 | .readdir = romfs_readdir, |
285 | .llseek = default_llseek, | ||
285 | }; | 286 | }; |
286 | 287 | ||
287 | static const struct inode_operations romfs_dir_inode_operations = { | 288 | static const struct inode_operations romfs_dir_inode_operations = { |
diff --git a/fs/select.c b/fs/select.c index 500a669f7790..b7b10aa30861 100644 --- a/fs/select.c +++ b/fs/select.c | |||
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv) | |||
67 | return slack; | 67 | return slack; |
68 | } | 68 | } |
69 | 69 | ||
70 | static long estimate_accuracy(struct timespec *tv) | 70 | long select_estimate_accuracy(struct timespec *tv) |
71 | { | 71 | { |
72 | unsigned long ret; | 72 | unsigned long ret; |
73 | struct timespec now; | 73 | struct timespec now; |
@@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time) | |||
417 | } | 417 | } |
418 | 418 | ||
419 | if (end_time && !timed_out) | 419 | if (end_time && !timed_out) |
420 | slack = estimate_accuracy(end_time); | 420 | slack = select_estimate_accuracy(end_time); |
421 | 421 | ||
422 | retval = 0; | 422 | retval = 0; |
423 | for (;;) { | 423 | for (;;) { |
@@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds, struct poll_list *list, | |||
769 | } | 769 | } |
770 | 770 | ||
771 | if (end_time && !timed_out) | 771 | if (end_time && !timed_out) |
772 | slack = estimate_accuracy(end_time); | 772 | slack = select_estimate_accuracy(end_time); |
773 | 773 | ||
774 | for (;;) { | 774 | for (;;) { |
775 | struct poll_list *walk; | 775 | struct poll_list *walk; |
diff --git a/fs/seq_file.c b/fs/seq_file.c index e1f437be6c3c..05d6b0e78c95 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -131,7 +131,7 @@ Eoverflow: | |||
131 | */ | 131 | */ |
132 | ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | 132 | ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) |
133 | { | 133 | { |
134 | struct seq_file *m = (struct seq_file *)file->private_data; | 134 | struct seq_file *m = file->private_data; |
135 | size_t copied = 0; | 135 | size_t copied = 0; |
136 | loff_t pos; | 136 | loff_t pos; |
137 | size_t n; | 137 | size_t n; |
@@ -280,7 +280,7 @@ EXPORT_SYMBOL(seq_read); | |||
280 | */ | 280 | */ |
281 | loff_t seq_lseek(struct file *file, loff_t offset, int origin) | 281 | loff_t seq_lseek(struct file *file, loff_t offset, int origin) |
282 | { | 282 | { |
283 | struct seq_file *m = (struct seq_file *)file->private_data; | 283 | struct seq_file *m = file->private_data; |
284 | loff_t retval = -EINVAL; | 284 | loff_t retval = -EINVAL; |
285 | 285 | ||
286 | mutex_lock(&m->lock); | 286 | mutex_lock(&m->lock); |
@@ -324,7 +324,7 @@ EXPORT_SYMBOL(seq_lseek); | |||
324 | */ | 324 | */ |
325 | int seq_release(struct inode *inode, struct file *file) | 325 | int seq_release(struct inode *inode, struct file *file) |
326 | { | 326 | { |
327 | struct seq_file *m = (struct seq_file *)file->private_data; | 327 | struct seq_file *m = file->private_data; |
328 | kfree(m->buf); | 328 | kfree(m->buf); |
329 | kfree(m); | 329 | kfree(m); |
330 | return 0; | 330 | return 0; |
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root, | |||
462 | if (size) { | 462 | if (size) { |
463 | char *p; | 463 | char *p; |
464 | 464 | ||
465 | spin_lock(&dcache_lock); | ||
466 | p = __d_path(path, root, buf, size); | 465 | p = __d_path(path, root, buf, size); |
467 | spin_unlock(&dcache_lock); | ||
468 | res = PTR_ERR(p); | 466 | res = PTR_ERR(p); |
469 | if (!IS_ERR(p)) { | 467 | if (!IS_ERR(p)) { |
470 | char *end = mangle_path(buf, p, esc); | 468 | char *end = mangle_path(buf, p, esc); |
diff --git a/fs/signalfd.c b/fs/signalfd.c index 1c5a6add779d..492465b451dd 100644 --- a/fs/signalfd.c +++ b/fs/signalfd.c | |||
@@ -99,6 +99,16 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo, | |||
99 | #ifdef __ARCH_SI_TRAPNO | 99 | #ifdef __ARCH_SI_TRAPNO |
100 | err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); | 100 | err |= __put_user(kinfo->si_trapno, &uinfo->ssi_trapno); |
101 | #endif | 101 | #endif |
102 | #ifdef BUS_MCEERR_AO | ||
103 | /* | ||
104 | * Other callers might not initialize the si_lsb field, | ||
105 | * so check explicitly for the right codes here. | ||
106 | */ | ||
107 | if (kinfo->si_code == BUS_MCEERR_AR || | ||
108 | kinfo->si_code == BUS_MCEERR_AO) | ||
109 | err |= __put_user((short) kinfo->si_addr_lsb, | ||
110 | &uinfo->ssi_addr_lsb); | ||
111 | #endif | ||
102 | break; | 112 | break; |
103 | case __SI_CHLD: | 113 | case __SI_CHLD: |
104 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); | 114 | err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid); |
@@ -206,6 +216,7 @@ static const struct file_operations signalfd_fops = { | |||
206 | .release = signalfd_release, | 216 | .release = signalfd_release, |
207 | .poll = signalfd_poll, | 217 | .poll = signalfd_poll, |
208 | .read = signalfd_read, | 218 | .read = signalfd_read, |
219 | .llseek = noop_llseek, | ||
209 | }; | 220 | }; |
210 | 221 | ||
211 | SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, | 222 | SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask, |
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig index e668127c8b2e..2bc24a8c4039 100644 --- a/fs/smbfs/Kconfig +++ b/fs/smbfs/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config SMB_FS | 1 | config SMB_FS |
2 | tristate "SMB file system support (OBSOLETE, please use CIFS)" | 2 | tristate "SMB file system support (OBSOLETE, please use CIFS)" |
3 | depends on BKL # probably unfixable | ||
3 | depends on INET | 4 | depends on INET |
4 | select NLS | 5 | select NLS |
5 | help | 6 | help |
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index 00a70cab1f36..f678d421e541 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c | |||
@@ -406,21 +406,15 @@ void | |||
406 | smb_renew_times(struct dentry * dentry) | 406 | smb_renew_times(struct dentry * dentry) |
407 | { | 407 | { |
408 | dget(dentry); | 408 | dget(dentry); |
409 | spin_lock(&dentry->d_lock); | 409 | dentry->d_time = jiffies; |
410 | for (;;) { | ||
411 | struct dentry *parent; | ||
412 | 410 | ||
413 | dentry->d_time = jiffies; | 411 | while (!IS_ROOT(dentry)) { |
414 | if (IS_ROOT(dentry)) | 412 | struct dentry *parent = dget_parent(dentry); |
415 | break; | ||
416 | parent = dentry->d_parent; | ||
417 | dget(parent); | ||
418 | spin_unlock(&dentry->d_lock); | ||
419 | dput(dentry); | 413 | dput(dentry); |
420 | dentry = parent; | 414 | dentry = parent; |
421 | spin_lock(&dentry->d_lock); | 415 | |
416 | dentry->d_time = jiffies; | ||
422 | } | 417 | } |
423 | spin_unlock(&dentry->d_lock); | ||
424 | dput(dentry); | 418 | dput(dentry); |
425 | } | 419 | } |
426 | 420 | ||
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c index 450c91941988..f6e9ee59757e 100644 --- a/fs/smbfs/inode.c +++ b/fs/smbfs/inode.c | |||
@@ -229,7 +229,6 @@ smb_invalidate_inodes(struct smb_sb_info *server) | |||
229 | { | 229 | { |
230 | VERBOSE("\n"); | 230 | VERBOSE("\n"); |
231 | shrink_dcache_sb(SB_of(server)); | 231 | shrink_dcache_sb(SB_of(server)); |
232 | invalidate_inodes(SB_of(server)); | ||
233 | } | 232 | } |
234 | 233 | ||
235 | /* | 234 | /* |
@@ -501,6 +500,8 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
501 | void *mem; | 500 | void *mem; |
502 | static int warn_count; | 501 | static int warn_count; |
503 | 502 | ||
503 | lock_kernel(); | ||
504 | |||
504 | if (warn_count < 5) { | 505 | if (warn_count < 5) { |
505 | warn_count++; | 506 | warn_count++; |
506 | printk(KERN_EMERG "smbfs is deprecated and will be removed" | 507 | printk(KERN_EMERG "smbfs is deprecated and will be removed" |
@@ -621,6 +622,7 @@ static int smb_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
621 | 622 | ||
622 | smb_new_dentry(sb->s_root); | 623 | smb_new_dentry(sb->s_root); |
623 | 624 | ||
625 | unlock_kernel(); | ||
624 | return 0; | 626 | return 0; |
625 | 627 | ||
626 | out_no_root: | 628 | out_no_root: |
@@ -643,9 +645,11 @@ out_wrong_data: | |||
643 | out_no_data: | 645 | out_no_data: |
644 | printk(KERN_ERR "smb_fill_super: missing data argument\n"); | 646 | printk(KERN_ERR "smb_fill_super: missing data argument\n"); |
645 | out_fail: | 647 | out_fail: |
648 | unlock_kernel(); | ||
646 | return -EINVAL; | 649 | return -EINVAL; |
647 | out_no_server: | 650 | out_no_server: |
648 | printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n"); | 651 | printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n"); |
652 | unlock_kernel(); | ||
649 | return -ENOMEM; | 653 | return -ENOMEM; |
650 | } | 654 | } |
651 | 655 | ||
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c index 71c29b6670b4..3dcf638d4d3a 100644 --- a/fs/smbfs/proc.c +++ b/fs/smbfs/proc.c | |||
@@ -332,16 +332,15 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf, | |||
332 | * and store it in reversed order [see reverse_string()] | 332 | * and store it in reversed order [see reverse_string()] |
333 | */ | 333 | */ |
334 | dget(entry); | 334 | dget(entry); |
335 | spin_lock(&entry->d_lock); | ||
336 | while (!IS_ROOT(entry)) { | 335 | while (!IS_ROOT(entry)) { |
337 | struct dentry *parent; | 336 | struct dentry *parent; |
338 | 337 | ||
339 | if (maxlen < (3<<unicode)) { | 338 | if (maxlen < (3<<unicode)) { |
340 | spin_unlock(&entry->d_lock); | ||
341 | dput(entry); | 339 | dput(entry); |
342 | return -ENAMETOOLONG; | 340 | return -ENAMETOOLONG; |
343 | } | 341 | } |
344 | 342 | ||
343 | spin_lock(&entry->d_lock); | ||
345 | len = server->ops->convert(path, maxlen-2, | 344 | len = server->ops->convert(path, maxlen-2, |
346 | entry->d_name.name, entry->d_name.len, | 345 | entry->d_name.name, entry->d_name.len, |
347 | server->local_nls, server->remote_nls); | 346 | server->local_nls, server->remote_nls); |
@@ -359,15 +358,12 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf, | |||
359 | } | 358 | } |
360 | *path++ = '\\'; | 359 | *path++ = '\\'; |
361 | maxlen -= len+1; | 360 | maxlen -= len+1; |
362 | |||
363 | parent = entry->d_parent; | ||
364 | dget(parent); | ||
365 | spin_unlock(&entry->d_lock); | 361 | spin_unlock(&entry->d_lock); |
362 | |||
363 | parent = dget_parent(entry); | ||
366 | dput(entry); | 364 | dput(entry); |
367 | entry = parent; | 365 | entry = parent; |
368 | spin_lock(&entry->d_lock); | ||
369 | } | 366 | } |
370 | spin_unlock(&entry->d_lock); | ||
371 | dput(entry); | 367 | dput(entry); |
372 | reverse_string(buf, path-buf); | 368 | reverse_string(buf, path-buf); |
373 | 369 | ||
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 12b933ac6585..0dc340aa2be9 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c | |||
@@ -230,5 +230,6 @@ failed_read: | |||
230 | 230 | ||
231 | const struct file_operations squashfs_dir_ops = { | 231 | const struct file_operations squashfs_dir_ops = { |
232 | .read = generic_read_dir, | 232 | .read = generic_read_dir, |
233 | .readdir = squashfs_readdir | 233 | .readdir = squashfs_readdir, |
234 | .llseek = default_llseek, | ||
234 | }; | 235 | }; |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 88b4f8606652..07a4f1156048 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -30,7 +30,6 @@ | |||
30 | #include <linux/fs.h> | 30 | #include <linux/fs.h> |
31 | #include <linux/vfs.h> | 31 | #include <linux/vfs.h> |
32 | #include <linux/slab.h> | 32 | #include <linux/slab.h> |
33 | #include <linux/smp_lock.h> | ||
34 | #include <linux/mutex.h> | 33 | #include <linux/mutex.h> |
35 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
36 | #include <linux/init.h> | 35 | #include <linux/init.h> |
@@ -354,8 +353,6 @@ static int squashfs_remount(struct super_block *sb, int *flags, char *data) | |||
354 | 353 | ||
355 | static void squashfs_put_super(struct super_block *sb) | 354 | static void squashfs_put_super(struct super_block *sb) |
356 | { | 355 | { |
357 | lock_kernel(); | ||
358 | |||
359 | if (sb->s_fs_info) { | 356 | if (sb->s_fs_info) { |
360 | struct squashfs_sb_info *sbi = sb->s_fs_info; | 357 | struct squashfs_sb_info *sbi = sb->s_fs_info; |
361 | squashfs_cache_delete(sbi->block_cache); | 358 | squashfs_cache_delete(sbi->block_cache); |
@@ -370,8 +367,6 @@ static void squashfs_put_super(struct super_block *sb) | |||
370 | kfree(sb->s_fs_info); | 367 | kfree(sb->s_fs_info); |
371 | sb->s_fs_info = NULL; | 368 | sb->s_fs_info = NULL; |
372 | } | 369 | } |
373 | |||
374 | unlock_kernel(); | ||
375 | } | 370 | } |
376 | 371 | ||
377 | 372 | ||
diff --git a/fs/super.c b/fs/super.c index 8819e3a7ff20..b9c9869165db 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb) | |||
273 | get_fs_excl(); | 273 | get_fs_excl(); |
274 | sb->s_flags &= ~MS_ACTIVE; | 274 | sb->s_flags &= ~MS_ACTIVE; |
275 | 275 | ||
276 | /* bad name - it should be evict_inodes() */ | 276 | fsnotify_unmount_inodes(&sb->s_inodes); |
277 | invalidate_inodes(sb); | 277 | |
278 | evict_inodes(sb); | ||
278 | 279 | ||
279 | if (sop->put_super) | 280 | if (sop->put_super) |
280 | sop->put_super(sb); | 281 | sop->put_super(sb); |
281 | 282 | ||
282 | /* Forget any remaining inodes */ | 283 | if (!list_empty(&sb->s_inodes)) { |
283 | if (invalidate_inodes(sb)) { | ||
284 | printk("VFS: Busy inodes after unmount of %s. " | 284 | printk("VFS: Busy inodes after unmount of %s. " |
285 | "Self-destruct in 5 seconds. Have a nice day...\n", | 285 | "Self-destruct in 5 seconds. Have a nice day...\n", |
286 | sb->s_id); | 286 | sb->s_id); |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 4e321f7353fa..a4759833d62d 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
@@ -179,30 +179,14 @@ static void bin_vma_open(struct vm_area_struct *vma) | |||
179 | struct bin_buffer *bb = file->private_data; | 179 | struct bin_buffer *bb = file->private_data; |
180 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 180 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
181 | 181 | ||
182 | if (!bb->vm_ops || !bb->vm_ops->open) | 182 | if (!bb->vm_ops) |
183 | return; | ||
184 | |||
185 | if (!sysfs_get_active(attr_sd)) | ||
186 | return; | ||
187 | |||
188 | bb->vm_ops->open(vma); | ||
189 | |||
190 | sysfs_put_active(attr_sd); | ||
191 | } | ||
192 | |||
193 | static void bin_vma_close(struct vm_area_struct *vma) | ||
194 | { | ||
195 | struct file *file = vma->vm_file; | ||
196 | struct bin_buffer *bb = file->private_data; | ||
197 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | ||
198 | |||
199 | if (!bb->vm_ops || !bb->vm_ops->close) | ||
200 | return; | 183 | return; |
201 | 184 | ||
202 | if (!sysfs_get_active(attr_sd)) | 185 | if (!sysfs_get_active(attr_sd)) |
203 | return; | 186 | return; |
204 | 187 | ||
205 | bb->vm_ops->close(vma); | 188 | if (bb->vm_ops->open) |
189 | bb->vm_ops->open(vma); | ||
206 | 190 | ||
207 | sysfs_put_active(attr_sd); | 191 | sysfs_put_active(attr_sd); |
208 | } | 192 | } |
@@ -214,13 +198,15 @@ static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
214 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 198 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
215 | int ret; | 199 | int ret; |
216 | 200 | ||
217 | if (!bb->vm_ops || !bb->vm_ops->fault) | 201 | if (!bb->vm_ops) |
218 | return VM_FAULT_SIGBUS; | 202 | return VM_FAULT_SIGBUS; |
219 | 203 | ||
220 | if (!sysfs_get_active(attr_sd)) | 204 | if (!sysfs_get_active(attr_sd)) |
221 | return VM_FAULT_SIGBUS; | 205 | return VM_FAULT_SIGBUS; |
222 | 206 | ||
223 | ret = bb->vm_ops->fault(vma, vmf); | 207 | ret = VM_FAULT_SIGBUS; |
208 | if (bb->vm_ops->fault) | ||
209 | ret = bb->vm_ops->fault(vma, vmf); | ||
224 | 210 | ||
225 | sysfs_put_active(attr_sd); | 211 | sysfs_put_active(attr_sd); |
226 | return ret; | 212 | return ret; |
@@ -236,13 +222,12 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
236 | if (!bb->vm_ops) | 222 | if (!bb->vm_ops) |
237 | return VM_FAULT_SIGBUS; | 223 | return VM_FAULT_SIGBUS; |
238 | 224 | ||
239 | if (!bb->vm_ops->page_mkwrite) | ||
240 | return 0; | ||
241 | |||
242 | if (!sysfs_get_active(attr_sd)) | 225 | if (!sysfs_get_active(attr_sd)) |
243 | return VM_FAULT_SIGBUS; | 226 | return VM_FAULT_SIGBUS; |
244 | 227 | ||
245 | ret = bb->vm_ops->page_mkwrite(vma, vmf); | 228 | ret = 0; |
229 | if (bb->vm_ops->page_mkwrite) | ||
230 | ret = bb->vm_ops->page_mkwrite(vma, vmf); | ||
246 | 231 | ||
247 | sysfs_put_active(attr_sd); | 232 | sysfs_put_active(attr_sd); |
248 | return ret; | 233 | return ret; |
@@ -256,13 +241,15 @@ static int bin_access(struct vm_area_struct *vma, unsigned long addr, | |||
256 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 241 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
257 | int ret; | 242 | int ret; |
258 | 243 | ||
259 | if (!bb->vm_ops || !bb->vm_ops->access) | 244 | if (!bb->vm_ops) |
260 | return -EINVAL; | 245 | return -EINVAL; |
261 | 246 | ||
262 | if (!sysfs_get_active(attr_sd)) | 247 | if (!sysfs_get_active(attr_sd)) |
263 | return -EINVAL; | 248 | return -EINVAL; |
264 | 249 | ||
265 | ret = bb->vm_ops->access(vma, addr, buf, len, write); | 250 | ret = -EINVAL; |
251 | if (bb->vm_ops->access) | ||
252 | ret = bb->vm_ops->access(vma, addr, buf, len, write); | ||
266 | 253 | ||
267 | sysfs_put_active(attr_sd); | 254 | sysfs_put_active(attr_sd); |
268 | return ret; | 255 | return ret; |
@@ -276,13 +263,15 @@ static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) | |||
276 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 263 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
277 | int ret; | 264 | int ret; |
278 | 265 | ||
279 | if (!bb->vm_ops || !bb->vm_ops->set_policy) | 266 | if (!bb->vm_ops) |
280 | return 0; | 267 | return 0; |
281 | 268 | ||
282 | if (!sysfs_get_active(attr_sd)) | 269 | if (!sysfs_get_active(attr_sd)) |
283 | return -EINVAL; | 270 | return -EINVAL; |
284 | 271 | ||
285 | ret = bb->vm_ops->set_policy(vma, new); | 272 | ret = 0; |
273 | if (bb->vm_ops->set_policy) | ||
274 | ret = bb->vm_ops->set_policy(vma, new); | ||
286 | 275 | ||
287 | sysfs_put_active(attr_sd); | 276 | sysfs_put_active(attr_sd); |
288 | return ret; | 277 | return ret; |
@@ -296,13 +285,15 @@ static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, | |||
296 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 285 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
297 | struct mempolicy *pol; | 286 | struct mempolicy *pol; |
298 | 287 | ||
299 | if (!bb->vm_ops || !bb->vm_ops->get_policy) | 288 | if (!bb->vm_ops) |
300 | return vma->vm_policy; | 289 | return vma->vm_policy; |
301 | 290 | ||
302 | if (!sysfs_get_active(attr_sd)) | 291 | if (!sysfs_get_active(attr_sd)) |
303 | return vma->vm_policy; | 292 | return vma->vm_policy; |
304 | 293 | ||
305 | pol = bb->vm_ops->get_policy(vma, addr); | 294 | pol = vma->vm_policy; |
295 | if (bb->vm_ops->get_policy) | ||
296 | pol = bb->vm_ops->get_policy(vma, addr); | ||
306 | 297 | ||
307 | sysfs_put_active(attr_sd); | 298 | sysfs_put_active(attr_sd); |
308 | return pol; | 299 | return pol; |
@@ -316,13 +307,15 @@ static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, | |||
316 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; | 307 | struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; |
317 | int ret; | 308 | int ret; |
318 | 309 | ||
319 | if (!bb->vm_ops || !bb->vm_ops->migrate) | 310 | if (!bb->vm_ops) |
320 | return 0; | 311 | return 0; |
321 | 312 | ||
322 | if (!sysfs_get_active(attr_sd)) | 313 | if (!sysfs_get_active(attr_sd)) |
323 | return 0; | 314 | return 0; |
324 | 315 | ||
325 | ret = bb->vm_ops->migrate(vma, from, to, flags); | 316 | ret = 0; |
317 | if (bb->vm_ops->migrate) | ||
318 | ret = bb->vm_ops->migrate(vma, from, to, flags); | ||
326 | 319 | ||
327 | sysfs_put_active(attr_sd); | 320 | sysfs_put_active(attr_sd); |
328 | return ret; | 321 | return ret; |
@@ -331,7 +324,6 @@ static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, | |||
331 | 324 | ||
332 | static const struct vm_operations_struct bin_vm_ops = { | 325 | static const struct vm_operations_struct bin_vm_ops = { |
333 | .open = bin_vma_open, | 326 | .open = bin_vma_open, |
334 | .close = bin_vma_close, | ||
335 | .fault = bin_fault, | 327 | .fault = bin_fault, |
336 | .page_mkwrite = bin_page_mkwrite, | 328 | .page_mkwrite = bin_page_mkwrite, |
337 | .access = bin_access, | 329 | .access = bin_access, |
@@ -377,6 +369,14 @@ static int mmap(struct file *file, struct vm_area_struct *vma) | |||
377 | if (bb->mmapped && bb->vm_ops != vma->vm_ops) | 369 | if (bb->mmapped && bb->vm_ops != vma->vm_ops) |
378 | goto out_put; | 370 | goto out_put; |
379 | 371 | ||
372 | /* | ||
373 | * It is not possible to successfully wrap close. | ||
374 | * So error if someone is trying to use close. | ||
375 | */ | ||
376 | rc = -EINVAL; | ||
377 | if (vma->vm_ops && vma->vm_ops->close) | ||
378 | goto out_put; | ||
379 | |||
380 | rc = 0; | 380 | rc = 0; |
381 | bb->mmapped = 1; | 381 | bb->mmapped = 1; |
382 | bb->vm_ops = vma->vm_ops; | 382 | bb->vm_ops = vma->vm_ops; |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 23c1e598792a..442f34ff1af8 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -148,6 +148,65 @@ void sysfs_remove_group(struct kobject * kobj, | |||
148 | sysfs_put(sd); | 148 | sysfs_put(sd); |
149 | } | 149 | } |
150 | 150 | ||
151 | /** | ||
152 | * sysfs_merge_group - merge files into a pre-existing attribute group. | ||
153 | * @kobj: The kobject containing the group. | ||
154 | * @grp: The files to create and the attribute group they belong to. | ||
155 | * | ||
156 | * This function returns an error if the group doesn't exist or any of the | ||
157 | * files already exist in that group, in which case none of the new files | ||
158 | * are created. | ||
159 | */ | ||
160 | int sysfs_merge_group(struct kobject *kobj, | ||
161 | const struct attribute_group *grp) | ||
162 | { | ||
163 | struct sysfs_dirent *dir_sd; | ||
164 | int error = 0; | ||
165 | struct attribute *const *attr; | ||
166 | int i; | ||
167 | |||
168 | if (grp) | ||
169 | dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); | ||
170 | else | ||
171 | dir_sd = sysfs_get(kobj->sd); | ||
172 | if (!dir_sd) | ||
173 | return -ENOENT; | ||
174 | |||
175 | for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) | ||
176 | error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); | ||
177 | if (error) { | ||
178 | while (--i >= 0) | ||
179 | sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name); | ||
180 | } | ||
181 | sysfs_put(dir_sd); | ||
182 | |||
183 | return error; | ||
184 | } | ||
185 | EXPORT_SYMBOL_GPL(sysfs_merge_group); | ||
186 | |||
187 | /** | ||
188 | * sysfs_unmerge_group - remove files from a pre-existing attribute group. | ||
189 | * @kobj: The kobject containing the group. | ||
190 | * @grp: The files to remove and the attribute group they belong to. | ||
191 | */ | ||
192 | void sysfs_unmerge_group(struct kobject *kobj, | ||
193 | const struct attribute_group *grp) | ||
194 | { | ||
195 | struct sysfs_dirent *dir_sd; | ||
196 | struct attribute *const *attr; | ||
197 | |||
198 | if (grp) | ||
199 | dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); | ||
200 | else | ||
201 | dir_sd = sysfs_get(kobj->sd); | ||
202 | if (dir_sd) { | ||
203 | for (attr = grp->attrs; *attr; ++attr) | ||
204 | sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); | ||
205 | sysfs_put(dir_sd); | ||
206 | } | ||
207 | } | ||
208 | EXPORT_SYMBOL_GPL(sysfs_unmerge_group); | ||
209 | |||
151 | 210 | ||
152 | EXPORT_SYMBOL_GPL(sysfs_create_group); | 211 | EXPORT_SYMBOL_GPL(sysfs_create_group); |
153 | EXPORT_SYMBOL_GPL(sysfs_update_group); | 212 | EXPORT_SYMBOL_GPL(sysfs_update_group); |
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 33e047b59b8d..11e7f7d11cd0 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c | |||
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, | |||
126 | 126 | ||
127 | inode->i_ctime = CURRENT_TIME_SEC; | 127 | inode->i_ctime = CURRENT_TIME_SEC; |
128 | inode_inc_link_count(inode); | 128 | inode_inc_link_count(inode); |
129 | atomic_inc(&inode->i_count); | 129 | ihold(inode); |
130 | 130 | ||
131 | return add_nondir(dentry, inode); | 131 | return add_nondir(dentry, inode); |
132 | } | 132 | } |
diff --git a/fs/timerfd.c b/fs/timerfd.c index b86ab8eff79a..8c4fc1425b3e 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -144,6 +144,7 @@ static const struct file_operations timerfd_fops = { | |||
144 | .release = timerfd_release, | 144 | .release = timerfd_release, |
145 | .poll = timerfd_poll, | 145 | .poll = timerfd_poll, |
146 | .read = timerfd_read, | 146 | .read = timerfd_read, |
147 | .llseek = noop_llseek, | ||
147 | }; | 148 | }; |
148 | 149 | ||
149 | static struct file *timerfd_fget(int fd) | 150 | static struct file *timerfd_fget(int fd) |
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 37fa7ed062d8..02429d81ca33 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
@@ -63,7 +63,9 @@ static int do_commit(struct ubifs_info *c) | |||
63 | struct ubifs_lp_stats lst; | 63 | struct ubifs_lp_stats lst; |
64 | 64 | ||
65 | dbg_cmt("start"); | 65 | dbg_cmt("start"); |
66 | if (c->ro_media) { | 66 | ubifs_assert(!c->ro_media && !c->ro_mount); |
67 | |||
68 | if (c->ro_error) { | ||
67 | err = -EROFS; | 69 | err = -EROFS; |
68 | goto out_up; | 70 | goto out_up; |
69 | } | 71 | } |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index c2a68baa782f..0bee4dbffc31 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
@@ -2239,6 +2239,162 @@ out_free: | |||
2239 | return err; | 2239 | return err; |
2240 | } | 2240 | } |
2241 | 2241 | ||
2242 | /** | ||
2243 | * dbg_check_data_nodes_order - check that list of data nodes is sorted. | ||
2244 | * @c: UBIFS file-system description object | ||
2245 | * @head: the list of nodes ('struct ubifs_scan_node' objects) | ||
2246 | * | ||
2247 | * This function returns zero if the list of data nodes is sorted correctly, | ||
2248 | * and %-EINVAL if not. | ||
2249 | */ | ||
2250 | int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) | ||
2251 | { | ||
2252 | struct list_head *cur; | ||
2253 | struct ubifs_scan_node *sa, *sb; | ||
2254 | |||
2255 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
2256 | return 0; | ||
2257 | |||
2258 | for (cur = head->next; cur->next != head; cur = cur->next) { | ||
2259 | ino_t inuma, inumb; | ||
2260 | uint32_t blka, blkb; | ||
2261 | |||
2262 | cond_resched(); | ||
2263 | sa = container_of(cur, struct ubifs_scan_node, list); | ||
2264 | sb = container_of(cur->next, struct ubifs_scan_node, list); | ||
2265 | |||
2266 | if (sa->type != UBIFS_DATA_NODE) { | ||
2267 | ubifs_err("bad node type %d", sa->type); | ||
2268 | dbg_dump_node(c, sa->node); | ||
2269 | return -EINVAL; | ||
2270 | } | ||
2271 | if (sb->type != UBIFS_DATA_NODE) { | ||
2272 | ubifs_err("bad node type %d", sb->type); | ||
2273 | dbg_dump_node(c, sb->node); | ||
2274 | return -EINVAL; | ||
2275 | } | ||
2276 | |||
2277 | inuma = key_inum(c, &sa->key); | ||
2278 | inumb = key_inum(c, &sb->key); | ||
2279 | |||
2280 | if (inuma < inumb) | ||
2281 | continue; | ||
2282 | if (inuma > inumb) { | ||
2283 | ubifs_err("larger inum %lu goes before inum %lu", | ||
2284 | (unsigned long)inuma, (unsigned long)inumb); | ||
2285 | goto error_dump; | ||
2286 | } | ||
2287 | |||
2288 | blka = key_block(c, &sa->key); | ||
2289 | blkb = key_block(c, &sb->key); | ||
2290 | |||
2291 | if (blka > blkb) { | ||
2292 | ubifs_err("larger block %u goes before %u", blka, blkb); | ||
2293 | goto error_dump; | ||
2294 | } | ||
2295 | if (blka == blkb) { | ||
2296 | ubifs_err("two data nodes for the same block"); | ||
2297 | goto error_dump; | ||
2298 | } | ||
2299 | } | ||
2300 | |||
2301 | return 0; | ||
2302 | |||
2303 | error_dump: | ||
2304 | dbg_dump_node(c, sa->node); | ||
2305 | dbg_dump_node(c, sb->node); | ||
2306 | return -EINVAL; | ||
2307 | } | ||
2308 | |||
2309 | /** | ||
2310 | * dbg_check_nondata_nodes_order - check that list of data nodes is sorted. | ||
2311 | * @c: UBIFS file-system description object | ||
2312 | * @head: the list of nodes ('struct ubifs_scan_node' objects) | ||
2313 | * | ||
2314 | * This function returns zero if the list of non-data nodes is sorted correctly, | ||
2315 | * and %-EINVAL if not. | ||
2316 | */ | ||
2317 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) | ||
2318 | { | ||
2319 | struct list_head *cur; | ||
2320 | struct ubifs_scan_node *sa, *sb; | ||
2321 | |||
2322 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
2323 | return 0; | ||
2324 | |||
2325 | for (cur = head->next; cur->next != head; cur = cur->next) { | ||
2326 | ino_t inuma, inumb; | ||
2327 | uint32_t hasha, hashb; | ||
2328 | |||
2329 | cond_resched(); | ||
2330 | sa = container_of(cur, struct ubifs_scan_node, list); | ||
2331 | sb = container_of(cur->next, struct ubifs_scan_node, list); | ||
2332 | |||
2333 | if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && | ||
2334 | sa->type != UBIFS_XENT_NODE) { | ||
2335 | ubifs_err("bad node type %d", sa->type); | ||
2336 | dbg_dump_node(c, sa->node); | ||
2337 | return -EINVAL; | ||
2338 | } | ||
2339 | if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && | ||
2340 | sa->type != UBIFS_XENT_NODE) { | ||
2341 | ubifs_err("bad node type %d", sb->type); | ||
2342 | dbg_dump_node(c, sb->node); | ||
2343 | return -EINVAL; | ||
2344 | } | ||
2345 | |||
2346 | if (sa->type != UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { | ||
2347 | ubifs_err("non-inode node goes before inode node"); | ||
2348 | goto error_dump; | ||
2349 | } | ||
2350 | |||
2351 | if (sa->type == UBIFS_INO_NODE && sb->type != UBIFS_INO_NODE) | ||
2352 | continue; | ||
2353 | |||
2354 | if (sa->type == UBIFS_INO_NODE && sb->type == UBIFS_INO_NODE) { | ||
2355 | /* Inode nodes are sorted in descending size order */ | ||
2356 | if (sa->len < sb->len) { | ||
2357 | ubifs_err("smaller inode node goes first"); | ||
2358 | goto error_dump; | ||
2359 | } | ||
2360 | continue; | ||
2361 | } | ||
2362 | |||
2363 | /* | ||
2364 | * This is either a dentry or xentry, which should be sorted in | ||
2365 | * ascending (parent ino, hash) order. | ||
2366 | */ | ||
2367 | inuma = key_inum(c, &sa->key); | ||
2368 | inumb = key_inum(c, &sb->key); | ||
2369 | |||
2370 | if (inuma < inumb) | ||
2371 | continue; | ||
2372 | if (inuma > inumb) { | ||
2373 | ubifs_err("larger inum %lu goes before inum %lu", | ||
2374 | (unsigned long)inuma, (unsigned long)inumb); | ||
2375 | goto error_dump; | ||
2376 | } | ||
2377 | |||
2378 | hasha = key_block(c, &sa->key); | ||
2379 | hashb = key_block(c, &sb->key); | ||
2380 | |||
2381 | if (hasha > hashb) { | ||
2382 | ubifs_err("larger hash %u goes before %u", hasha, hashb); | ||
2383 | goto error_dump; | ||
2384 | } | ||
2385 | } | ||
2386 | |||
2387 | return 0; | ||
2388 | |||
2389 | error_dump: | ||
2390 | ubifs_msg("dumping first node"); | ||
2391 | dbg_dump_node(c, sa->node); | ||
2392 | ubifs_msg("dumping second node"); | ||
2393 | dbg_dump_node(c, sb->node); | ||
2394 | return -EINVAL; | ||
2395 | return 0; | ||
2396 | } | ||
2397 | |||
2242 | static int invocation_cnt; | 2398 | static int invocation_cnt; |
2243 | 2399 | ||
2244 | int dbg_force_in_the_gaps(void) | 2400 | int dbg_force_in_the_gaps(void) |
@@ -2625,6 +2781,7 @@ static const struct file_operations dfs_fops = { | |||
2625 | .open = open_debugfs_file, | 2781 | .open = open_debugfs_file, |
2626 | .write = write_debugfs_file, | 2782 | .write = write_debugfs_file, |
2627 | .owner = THIS_MODULE, | 2783 | .owner = THIS_MODULE, |
2784 | .llseek = default_llseek, | ||
2628 | }; | 2785 | }; |
2629 | 2786 | ||
2630 | /** | 2787 | /** |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 29d960101ea6..69ebe4729151 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
@@ -324,6 +324,8 @@ int dbg_check_lpt_nodes(struct ubifs_info *c, struct ubifs_cnode *cnode, | |||
324 | int row, int col); | 324 | int row, int col); |
325 | int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, | 325 | int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode, |
326 | loff_t size); | 326 | loff_t size); |
327 | int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); | ||
328 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); | ||
327 | 329 | ||
328 | /* Force the use of in-the-gaps method for testing */ | 330 | /* Force the use of in-the-gaps method for testing */ |
329 | 331 | ||
@@ -465,6 +467,8 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
465 | #define dbg_check_lprops(c) 0 | 467 | #define dbg_check_lprops(c) 0 |
466 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 | 468 | #define dbg_check_lpt_nodes(c, cnode, row, col) 0 |
467 | #define dbg_check_inode_size(c, inode, size) 0 | 469 | #define dbg_check_inode_size(c, inode, size) 0 |
470 | #define dbg_check_data_nodes_order(c, head) 0 | ||
471 | #define dbg_check_nondata_nodes_order(c, head) 0 | ||
468 | #define dbg_force_in_the_gaps_enabled 0 | 472 | #define dbg_force_in_the_gaps_enabled 0 |
469 | #define dbg_force_in_the_gaps() 0 | 473 | #define dbg_force_in_the_gaps() 0 |
470 | #define dbg_failure_mode 0 | 474 | #define dbg_failure_mode 0 |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 87ebcce72213..14f64b689d7f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, | |||
550 | 550 | ||
551 | lock_2_inodes(dir, inode); | 551 | lock_2_inodes(dir, inode); |
552 | inc_nlink(inode); | 552 | inc_nlink(inode); |
553 | atomic_inc(&inode->i_count); | 553 | ihold(inode); |
554 | inode->i_ctime = ubifs_current_time(inode); | 554 | inode->i_ctime = ubifs_current_time(inode); |
555 | dir->i_size += sz_change; | 555 | dir->i_size += sz_change; |
556 | dir_ui->ui_size = dir->i_size; | 556 | dir_ui->ui_size = dir->i_size; |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 03ae894c45de..d77db7e36484 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -433,8 +433,9 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping, | |||
433 | struct page *page; | 433 | struct page *page; |
434 | 434 | ||
435 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); | 435 | ubifs_assert(ubifs_inode(inode)->ui_size == inode->i_size); |
436 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
436 | 437 | ||
437 | if (unlikely(c->ro_media)) | 438 | if (unlikely(c->ro_error)) |
438 | return -EROFS; | 439 | return -EROFS; |
439 | 440 | ||
440 | /* Try out the fast-path part first */ | 441 | /* Try out the fast-path part first */ |
@@ -1439,9 +1440,9 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vm | |||
1439 | 1440 | ||
1440 | dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, | 1441 | dbg_gen("ino %lu, pg %lu, i_size %lld", inode->i_ino, page->index, |
1441 | i_size_read(inode)); | 1442 | i_size_read(inode)); |
1442 | ubifs_assert(!(inode->i_sb->s_flags & MS_RDONLY)); | 1443 | ubifs_assert(!c->ro_media && !c->ro_mount); |
1443 | 1444 | ||
1444 | if (unlikely(c->ro_media)) | 1445 | if (unlikely(c->ro_error)) |
1445 | return VM_FAULT_SIGBUS; /* -EROFS */ | 1446 | return VM_FAULT_SIGBUS; /* -EROFS */ |
1446 | 1447 | ||
1447 | /* | 1448 | /* |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 918d1582ca05..151f10882820 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -125,10 +125,16 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
125 | struct ubifs_scan_node *sa, *sb; | 125 | struct ubifs_scan_node *sa, *sb; |
126 | 126 | ||
127 | cond_resched(); | 127 | cond_resched(); |
128 | if (a == b) | ||
129 | return 0; | ||
130 | |||
128 | sa = list_entry(a, struct ubifs_scan_node, list); | 131 | sa = list_entry(a, struct ubifs_scan_node, list); |
129 | sb = list_entry(b, struct ubifs_scan_node, list); | 132 | sb = list_entry(b, struct ubifs_scan_node, list); |
133 | |||
130 | ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); | 134 | ubifs_assert(key_type(c, &sa->key) == UBIFS_DATA_KEY); |
131 | ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); | 135 | ubifs_assert(key_type(c, &sb->key) == UBIFS_DATA_KEY); |
136 | ubifs_assert(sa->type == UBIFS_DATA_NODE); | ||
137 | ubifs_assert(sb->type == UBIFS_DATA_NODE); | ||
132 | 138 | ||
133 | inuma = key_inum(c, &sa->key); | 139 | inuma = key_inum(c, &sa->key); |
134 | inumb = key_inum(c, &sb->key); | 140 | inumb = key_inum(c, &sb->key); |
@@ -157,28 +163,40 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
157 | */ | 163 | */ |
158 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | 164 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) |
159 | { | 165 | { |
160 | int typea, typeb; | ||
161 | ino_t inuma, inumb; | 166 | ino_t inuma, inumb; |
162 | struct ubifs_info *c = priv; | 167 | struct ubifs_info *c = priv; |
163 | struct ubifs_scan_node *sa, *sb; | 168 | struct ubifs_scan_node *sa, *sb; |
164 | 169 | ||
165 | cond_resched(); | 170 | cond_resched(); |
171 | if (a == b) | ||
172 | return 0; | ||
173 | |||
166 | sa = list_entry(a, struct ubifs_scan_node, list); | 174 | sa = list_entry(a, struct ubifs_scan_node, list); |
167 | sb = list_entry(b, struct ubifs_scan_node, list); | 175 | sb = list_entry(b, struct ubifs_scan_node, list); |
168 | typea = key_type(c, &sa->key); | 176 | |
169 | typeb = key_type(c, &sb->key); | 177 | ubifs_assert(key_type(c, &sa->key) != UBIFS_DATA_KEY && |
170 | ubifs_assert(typea != UBIFS_DATA_KEY && typeb != UBIFS_DATA_KEY); | 178 | key_type(c, &sb->key) != UBIFS_DATA_KEY); |
179 | ubifs_assert(sa->type != UBIFS_DATA_NODE && | ||
180 | sb->type != UBIFS_DATA_NODE); | ||
171 | 181 | ||
172 | /* Inodes go before directory entries */ | 182 | /* Inodes go before directory entries */ |
173 | if (typea == UBIFS_INO_KEY) { | 183 | if (sa->type == UBIFS_INO_NODE) { |
174 | if (typeb == UBIFS_INO_KEY) | 184 | if (sb->type == UBIFS_INO_NODE) |
175 | return sb->len - sa->len; | 185 | return sb->len - sa->len; |
176 | return -1; | 186 | return -1; |
177 | } | 187 | } |
178 | if (typeb == UBIFS_INO_KEY) | 188 | if (sb->type == UBIFS_INO_NODE) |
179 | return 1; | 189 | return 1; |
180 | 190 | ||
181 | ubifs_assert(typea == UBIFS_DENT_KEY && typeb == UBIFS_DENT_KEY); | 191 | ubifs_assert(key_type(c, &sa->key) == UBIFS_DENT_KEY || |
192 | key_type(c, &sa->key) == UBIFS_XENT_KEY); | ||
193 | ubifs_assert(key_type(c, &sb->key) == UBIFS_DENT_KEY || | ||
194 | key_type(c, &sb->key) == UBIFS_XENT_KEY); | ||
195 | ubifs_assert(sa->type == UBIFS_DENT_NODE || | ||
196 | sa->type == UBIFS_XENT_NODE); | ||
197 | ubifs_assert(sb->type == UBIFS_DENT_NODE || | ||
198 | sb->type == UBIFS_XENT_NODE); | ||
199 | |||
182 | inuma = key_inum(c, &sa->key); | 200 | inuma = key_inum(c, &sa->key); |
183 | inumb = key_inum(c, &sb->key); | 201 | inumb = key_inum(c, &sb->key); |
184 | 202 | ||
@@ -224,17 +242,33 @@ int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
224 | static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | 242 | static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, |
225 | struct list_head *nondata, int *min) | 243 | struct list_head *nondata, int *min) |
226 | { | 244 | { |
245 | int err; | ||
227 | struct ubifs_scan_node *snod, *tmp; | 246 | struct ubifs_scan_node *snod, *tmp; |
228 | 247 | ||
229 | *min = INT_MAX; | 248 | *min = INT_MAX; |
230 | 249 | ||
231 | /* Separate data nodes and non-data nodes */ | 250 | /* Separate data nodes and non-data nodes */ |
232 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | 251 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { |
233 | int err; | 252 | ubifs_assert(snod->type == UBIFS_INO_NODE || |
253 | snod->type == UBIFS_DATA_NODE || | ||
254 | snod->type == UBIFS_DENT_NODE || | ||
255 | snod->type == UBIFS_XENT_NODE || | ||
256 | snod->type == UBIFS_TRUN_NODE); | ||
257 | |||
258 | if (snod->type != UBIFS_INO_NODE && | ||
259 | snod->type != UBIFS_DATA_NODE && | ||
260 | snod->type != UBIFS_DENT_NODE && | ||
261 | snod->type != UBIFS_XENT_NODE) { | ||
262 | /* Probably truncation node, zap it */ | ||
263 | list_del(&snod->list); | ||
264 | kfree(snod); | ||
265 | continue; | ||
266 | } | ||
234 | 267 | ||
235 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | 268 | ubifs_assert(key_type(c, &snod->key) == UBIFS_DATA_KEY || |
236 | ubifs_assert(snod->type != UBIFS_REF_NODE); | 269 | key_type(c, &snod->key) == UBIFS_INO_KEY || |
237 | ubifs_assert(snod->type != UBIFS_CS_NODE); | 270 | key_type(c, &snod->key) == UBIFS_DENT_KEY || |
271 | key_type(c, &snod->key) == UBIFS_XENT_KEY); | ||
238 | 272 | ||
239 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, | 273 | err = ubifs_tnc_has_node(c, &snod->key, 0, sleb->lnum, |
240 | snod->offs, 0); | 274 | snod->offs, 0); |
@@ -258,6 +292,13 @@ static int sort_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
258 | /* Sort data and non-data nodes */ | 292 | /* Sort data and non-data nodes */ |
259 | list_sort(c, &sleb->nodes, &data_nodes_cmp); | 293 | list_sort(c, &sleb->nodes, &data_nodes_cmp); |
260 | list_sort(c, nondata, &nondata_nodes_cmp); | 294 | list_sort(c, nondata, &nondata_nodes_cmp); |
295 | |||
296 | err = dbg_check_data_nodes_order(c, &sleb->nodes); | ||
297 | if (err) | ||
298 | return err; | ||
299 | err = dbg_check_nondata_nodes_order(c, nondata); | ||
300 | if (err) | ||
301 | return err; | ||
261 | return 0; | 302 | return 0; |
262 | } | 303 | } |
263 | 304 | ||
@@ -575,13 +616,14 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | |||
575 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | 616 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; |
576 | 617 | ||
577 | ubifs_assert_cmt_locked(c); | 618 | ubifs_assert_cmt_locked(c); |
619 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
578 | 620 | ||
579 | if (ubifs_gc_should_commit(c)) | 621 | if (ubifs_gc_should_commit(c)) |
580 | return -EAGAIN; | 622 | return -EAGAIN; |
581 | 623 | ||
582 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | 624 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
583 | 625 | ||
584 | if (c->ro_media) { | 626 | if (c->ro_error) { |
585 | ret = -EROFS; | 627 | ret = -EROFS; |
586 | goto out_unlock; | 628 | goto out_unlock; |
587 | } | 629 | } |
@@ -677,14 +719,12 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | |||
677 | 719 | ||
678 | ret = ubifs_garbage_collect_leb(c, &lp); | 720 | ret = ubifs_garbage_collect_leb(c, &lp); |
679 | if (ret < 0) { | 721 | if (ret < 0) { |
680 | if (ret == -EAGAIN || ret == -ENOSPC) { | 722 | if (ret == -EAGAIN) { |
681 | /* | 723 | /* |
682 | * These codes are not errors, so we have to | 724 | * This is not error, so we have to return the |
683 | * return the LEB to lprops. But if the | 725 | * LEB to lprops. But if 'ubifs_return_leb()' |
684 | * 'ubifs_return_leb()' function fails, its | 726 | * fails, its failure code is propagated to the |
685 | * failure code is propagated to the caller | 727 | * caller instead of the original '-EAGAIN'. |
686 | * instead of the original '-EAGAIN' or | ||
687 | * '-ENOSPC'. | ||
688 | */ | 728 | */ |
689 | err = ubifs_return_leb(c, lp.lnum); | 729 | err = ubifs_return_leb(c, lp.lnum); |
690 | if (err) | 730 | if (err) |
@@ -774,8 +814,8 @@ out_unlock: | |||
774 | out: | 814 | out: |
775 | ubifs_assert(ret < 0); | 815 | ubifs_assert(ret < 0); |
776 | ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); | 816 | ubifs_assert(ret != -ENOSPC && ret != -EAGAIN); |
777 | ubifs_ro_mode(c, ret); | ||
778 | ubifs_wbuf_sync_nolock(wbuf); | 817 | ubifs_wbuf_sync_nolock(wbuf); |
818 | ubifs_ro_mode(c, ret); | ||
779 | mutex_unlock(&wbuf->io_mutex); | 819 | mutex_unlock(&wbuf->io_mutex); |
780 | ubifs_return_leb(c, lp.lnum); | 820 | ubifs_return_leb(c, lp.lnum); |
781 | return ret; | 821 | return ret; |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index bcf5a16f30bb..d82173182eeb 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
@@ -61,8 +61,8 @@ | |||
61 | */ | 61 | */ |
62 | void ubifs_ro_mode(struct ubifs_info *c, int err) | 62 | void ubifs_ro_mode(struct ubifs_info *c, int err) |
63 | { | 63 | { |
64 | if (!c->ro_media) { | 64 | if (!c->ro_error) { |
65 | c->ro_media = 1; | 65 | c->ro_error = 1; |
66 | c->no_chk_data_crc = 0; | 66 | c->no_chk_data_crc = 0; |
67 | c->vfs_sb->s_flags |= MS_RDONLY; | 67 | c->vfs_sb->s_flags |= MS_RDONLY; |
68 | ubifs_warn("switched to read-only mode, error %d", err); | 68 | ubifs_warn("switched to read-only mode, error %d", err); |
@@ -356,11 +356,11 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
356 | 356 | ||
357 | dbg_io("LEB %d:%d, %d bytes, jhead %s", | 357 | dbg_io("LEB %d:%d, %d bytes, jhead %s", |
358 | wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); | 358 | wbuf->lnum, wbuf->offs, wbuf->used, dbg_jhead(wbuf->jhead)); |
359 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); | ||
360 | ubifs_assert(!(wbuf->avail & 7)); | 359 | ubifs_assert(!(wbuf->avail & 7)); |
361 | ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); | 360 | ubifs_assert(wbuf->offs + c->min_io_size <= c->leb_size); |
361 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
362 | 362 | ||
363 | if (c->ro_media) | 363 | if (c->ro_error) |
364 | return -EROFS; | 364 | return -EROFS; |
365 | 365 | ||
366 | ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); | 366 | ubifs_pad(c, wbuf->buf + wbuf->used, wbuf->avail); |
@@ -440,11 +440,12 @@ int ubifs_bg_wbufs_sync(struct ubifs_info *c) | |||
440 | { | 440 | { |
441 | int err, i; | 441 | int err, i; |
442 | 442 | ||
443 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
443 | if (!c->need_wbuf_sync) | 444 | if (!c->need_wbuf_sync) |
444 | return 0; | 445 | return 0; |
445 | c->need_wbuf_sync = 0; | 446 | c->need_wbuf_sync = 0; |
446 | 447 | ||
447 | if (c->ro_media) { | 448 | if (c->ro_error) { |
448 | err = -EROFS; | 449 | err = -EROFS; |
449 | goto out_timers; | 450 | goto out_timers; |
450 | } | 451 | } |
@@ -519,6 +520,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
519 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); | 520 | ubifs_assert(!(wbuf->offs & 7) && wbuf->offs <= c->leb_size); |
520 | ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); | 521 | ubifs_assert(wbuf->avail > 0 && wbuf->avail <= c->min_io_size); |
521 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 522 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
523 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
522 | 524 | ||
523 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | 525 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
524 | err = -ENOSPC; | 526 | err = -ENOSPC; |
@@ -527,7 +529,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
527 | 529 | ||
528 | cancel_wbuf_timer_nolock(wbuf); | 530 | cancel_wbuf_timer_nolock(wbuf); |
529 | 531 | ||
530 | if (c->ro_media) | 532 | if (c->ro_error) |
531 | return -EROFS; | 533 | return -EROFS; |
532 | 534 | ||
533 | if (aligned_len <= wbuf->avail) { | 535 | if (aligned_len <= wbuf->avail) { |
@@ -663,8 +665,9 @@ int ubifs_write_node(struct ubifs_info *c, void *buf, int len, int lnum, | |||
663 | buf_len); | 665 | buf_len); |
664 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | 666 | ubifs_assert(lnum >= 0 && lnum < c->leb_cnt && offs >= 0); |
665 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); | 667 | ubifs_assert(offs % c->min_io_size == 0 && offs < c->leb_size); |
668 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
666 | 669 | ||
667 | if (c->ro_media) | 670 | if (c->ro_error) |
668 | return -EROFS; | 671 | return -EROFS; |
669 | 672 | ||
670 | ubifs_prepare_node(c, buf, len, 1); | 673 | ubifs_prepare_node(c, buf, len, 1); |
@@ -815,7 +818,8 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, | |||
815 | return 0; | 818 | return 0; |
816 | 819 | ||
817 | out: | 820 | out: |
818 | ubifs_err("bad node at LEB %d:%d", lnum, offs); | 821 | ubifs_err("bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, |
822 | ubi_is_mapped(c->ubi, lnum)); | ||
819 | dbg_dump_node(c, buf); | 823 | dbg_dump_node(c, buf); |
820 | dbg_dump_stack(); | 824 | dbg_dump_stack(); |
821 | return -EINVAL; | 825 | return -EINVAL; |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index d321baeca68d..914f1bd89e57 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
@@ -122,11 +122,12 @@ static int reserve_space(struct ubifs_info *c, int jhead, int len) | |||
122 | * better to try to allocate space at the ends of eraseblocks. This is | 122 | * better to try to allocate space at the ends of eraseblocks. This is |
123 | * what the squeeze parameter does. | 123 | * what the squeeze parameter does. |
124 | */ | 124 | */ |
125 | ubifs_assert(!c->ro_media && !c->ro_mount); | ||
125 | squeeze = (jhead == BASEHD); | 126 | squeeze = (jhead == BASEHD); |
126 | again: | 127 | again: |
127 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | 128 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
128 | 129 | ||
129 | if (c->ro_media) { | 130 | if (c->ro_error) { |
130 | err = -EROFS; | 131 | err = -EROFS; |
131 | goto out_unlock; | 132 | goto out_unlock; |
132 | } | 133 | } |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 0f530c684f0b..92a8491a8f8c 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
@@ -306,6 +306,20 @@ static inline void trun_key_init(const struct ubifs_info *c, | |||
306 | } | 306 | } |
307 | 307 | ||
308 | /** | 308 | /** |
309 | * invalid_key_init - initialize invalid node key. | ||
310 | * @c: UBIFS file-system description object | ||
311 | * @key: key to initialize | ||
312 | * | ||
313 | * This is a helper function which marks a @key object as invalid. | ||
314 | */ | ||
315 | static inline void invalid_key_init(const struct ubifs_info *c, | ||
316 | union ubifs_key *key) | ||
317 | { | ||
318 | key->u32[0] = 0xDEADBEAF; | ||
319 | key->u32[1] = UBIFS_INVALID_KEY; | ||
320 | } | ||
321 | |||
322 | /** | ||
309 | * key_type - get key type. | 323 | * key_type - get key type. |
310 | * @c: UBIFS file-system description object | 324 | * @c: UBIFS file-system description object |
311 | * @key: key to get type of | 325 | * @key: key to get type of |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index c345e125f42c..4d0cb1241460 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
@@ -159,7 +159,7 @@ void ubifs_add_bud(struct ubifs_info *c, struct ubifs_bud *bud) | |||
159 | jhead = &c->jheads[bud->jhead]; | 159 | jhead = &c->jheads[bud->jhead]; |
160 | list_add_tail(&bud->list, &jhead->buds_list); | 160 | list_add_tail(&bud->list, &jhead->buds_list); |
161 | } else | 161 | } else |
162 | ubifs_assert(c->replaying && (c->vfs_sb->s_flags & MS_RDONLY)); | 162 | ubifs_assert(c->replaying && c->ro_mount); |
163 | 163 | ||
164 | /* | 164 | /* |
165 | * Note, although this is a new bud, we anyway account this space now, | 165 | * Note, although this is a new bud, we anyway account this space now, |
@@ -223,8 +223,8 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
223 | } | 223 | } |
224 | 224 | ||
225 | mutex_lock(&c->log_mutex); | 225 | mutex_lock(&c->log_mutex); |
226 | 226 | ubifs_assert(!c->ro_media && !c->ro_mount); | |
227 | if (c->ro_media) { | 227 | if (c->ro_error) { |
228 | err = -EROFS; | 228 | err = -EROFS; |
229 | goto out_unlock; | 229 | goto out_unlock; |
230 | } | 230 | } |
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 0084a33c4c69..72775d35b99e 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c | |||
@@ -1363,6 +1363,7 @@ static int read_lsave(struct ubifs_info *c) | |||
1363 | goto out; | 1363 | goto out; |
1364 | for (i = 0; i < c->lsave_cnt; i++) { | 1364 | for (i = 0; i < c->lsave_cnt; i++) { |
1365 | int lnum = c->lsave[i]; | 1365 | int lnum = c->lsave[i]; |
1366 | struct ubifs_lprops *lprops; | ||
1366 | 1367 | ||
1367 | /* | 1368 | /* |
1368 | * Due to automatic resizing, the values in the lsave table | 1369 | * Due to automatic resizing, the values in the lsave table |
@@ -1370,7 +1371,11 @@ static int read_lsave(struct ubifs_info *c) | |||
1370 | */ | 1371 | */ |
1371 | if (lnum >= c->leb_cnt) | 1372 | if (lnum >= c->leb_cnt) |
1372 | continue; | 1373 | continue; |
1373 | ubifs_lpt_lookup(c, lnum); | 1374 | lprops = ubifs_lpt_lookup(c, lnum); |
1375 | if (IS_ERR(lprops)) { | ||
1376 | err = PTR_ERR(lprops); | ||
1377 | goto out; | ||
1378 | } | ||
1374 | } | 1379 | } |
1375 | out: | 1380 | out: |
1376 | vfree(buf); | 1381 | vfree(buf); |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index d12535b7fc78..5c90dec5db0b 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
@@ -705,6 +705,9 @@ static int make_tree_dirty(struct ubifs_info *c) | |||
705 | struct ubifs_pnode *pnode; | 705 | struct ubifs_pnode *pnode; |
706 | 706 | ||
707 | pnode = pnode_lookup(c, 0); | 707 | pnode = pnode_lookup(c, 0); |
708 | if (IS_ERR(pnode)) | ||
709 | return PTR_ERR(pnode); | ||
710 | |||
708 | while (pnode) { | 711 | while (pnode) { |
709 | do_make_pnode_dirty(c, pnode); | 712 | do_make_pnode_dirty(c, pnode); |
710 | pnode = next_pnode_to_dirty(c, pnode); | 713 | pnode = next_pnode_to_dirty(c, pnode); |
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 28beaeedadc0..21f47afdacff 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c | |||
@@ -361,7 +361,8 @@ int ubifs_write_master(struct ubifs_info *c) | |||
361 | { | 361 | { |
362 | int err, lnum, offs, len; | 362 | int err, lnum, offs, len; |
363 | 363 | ||
364 | if (c->ro_media) | 364 | ubifs_assert(!c->ro_media && !c->ro_mount); |
365 | if (c->ro_error) | ||
365 | return -EROFS; | 366 | return -EROFS; |
366 | 367 | ||
367 | lnum = UBIFS_MST_LNUM; | 368 | lnum = UBIFS_MST_LNUM; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4fa81d867e41..c3de04dc952a 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -132,7 +132,8 @@ static inline int ubifs_leb_unmap(const struct ubifs_info *c, int lnum) | |||
132 | { | 132 | { |
133 | int err; | 133 | int err; |
134 | 134 | ||
135 | if (c->ro_media) | 135 | ubifs_assert(!c->ro_media && !c->ro_mount); |
136 | if (c->ro_error) | ||
136 | return -EROFS; | 137 | return -EROFS; |
137 | err = ubi_leb_unmap(c->ubi, lnum); | 138 | err = ubi_leb_unmap(c->ubi, lnum); |
138 | if (err) { | 139 | if (err) { |
@@ -159,7 +160,8 @@ static inline int ubifs_leb_write(const struct ubifs_info *c, int lnum, | |||
159 | { | 160 | { |
160 | int err; | 161 | int err; |
161 | 162 | ||
162 | if (c->ro_media) | 163 | ubifs_assert(!c->ro_media && !c->ro_mount); |
164 | if (c->ro_error) | ||
163 | return -EROFS; | 165 | return -EROFS; |
164 | err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); | 166 | err = ubi_leb_write(c->ubi, lnum, buf, offs, len, dtype); |
165 | if (err) { | 167 | if (err) { |
@@ -186,7 +188,8 @@ static inline int ubifs_leb_change(const struct ubifs_info *c, int lnum, | |||
186 | { | 188 | { |
187 | int err; | 189 | int err; |
188 | 190 | ||
189 | if (c->ro_media) | 191 | ubifs_assert(!c->ro_media && !c->ro_mount); |
192 | if (c->ro_error) | ||
190 | return -EROFS; | 193 | return -EROFS; |
191 | err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); | 194 | err = ubi_leb_change(c->ubi, lnum, buf, len, dtype); |
192 | if (err) { | 195 | if (err) { |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index daae9e1f5382..77e9b874b6c2 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
@@ -292,7 +292,7 @@ int ubifs_recover_master_node(struct ubifs_info *c) | |||
292 | 292 | ||
293 | memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); | 293 | memcpy(c->mst_node, mst, UBIFS_MST_NODE_SZ); |
294 | 294 | ||
295 | if ((c->vfs_sb->s_flags & MS_RDONLY)) { | 295 | if (c->ro_mount) { |
296 | /* Read-only mode. Keep a copy for switching to rw mode */ | 296 | /* Read-only mode. Keep a copy for switching to rw mode */ |
297 | c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); | 297 | c->rcvrd_mst_node = kmalloc(sz, GFP_KERNEL); |
298 | if (!c->rcvrd_mst_node) { | 298 | if (!c->rcvrd_mst_node) { |
@@ -469,7 +469,7 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
469 | endpt = snod->offs + snod->len; | 469 | endpt = snod->offs + snod->len; |
470 | } | 470 | } |
471 | 471 | ||
472 | if ((c->vfs_sb->s_flags & MS_RDONLY) && !c->remounting_rw) { | 472 | if (c->ro_mount && !c->remounting_rw) { |
473 | /* Add to recovery list */ | 473 | /* Add to recovery list */ |
474 | struct ubifs_unclean_leb *ucleb; | 474 | struct ubifs_unclean_leb *ucleb; |
475 | 475 | ||
@@ -772,7 +772,8 @@ out_free: | |||
772 | * @sbuf: LEB-sized buffer to use | 772 | * @sbuf: LEB-sized buffer to use |
773 | * | 773 | * |
774 | * This function does a scan of a LEB, but caters for errors that might have | 774 | * This function does a scan of a LEB, but caters for errors that might have |
775 | * been caused by the unclean unmount from which we are attempting to recover. | 775 | * been caused by unclean reboots from which we are attempting to recover |
776 | * (assume that only the last log LEB can be corrupted by an unclean reboot). | ||
776 | * | 777 | * |
777 | * This function returns %0 on success and a negative error code on failure. | 778 | * This function returns %0 on success and a negative error code on failure. |
778 | */ | 779 | */ |
@@ -883,7 +884,7 @@ int ubifs_recover_inl_heads(const struct ubifs_info *c, void *sbuf) | |||
883 | { | 884 | { |
884 | int err; | 885 | int err; |
885 | 886 | ||
886 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY) || c->remounting_rw); | 887 | ubifs_assert(!c->ro_mount || c->remounting_rw); |
887 | 888 | ||
888 | dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); | 889 | dbg_rcvry("checking index head at %d:%d", c->ihead_lnum, c->ihead_offs); |
889 | err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); | 890 | err = recover_head(c, c->ihead_lnum, c->ihead_offs, sbuf); |
@@ -1461,7 +1462,7 @@ int ubifs_recover_size(struct ubifs_info *c) | |||
1461 | } | 1462 | } |
1462 | } | 1463 | } |
1463 | if (e->exists && e->i_size < e->d_size) { | 1464 | if (e->exists && e->i_size < e->d_size) { |
1464 | if (!e->inode && (c->vfs_sb->s_flags & MS_RDONLY)) { | 1465 | if (!e->inode && c->ro_mount) { |
1465 | /* Fix the inode size and pin it in memory */ | 1466 | /* Fix the inode size and pin it in memory */ |
1466 | struct inode *inode; | 1467 | struct inode *inode; |
1467 | 1468 | ||
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 5c2d6d759a3e..eed0fcff8d73 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
@@ -627,8 +627,7 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | |||
627 | ubifs_assert(sleb->endpt - offs >= used); | 627 | ubifs_assert(sleb->endpt - offs >= used); |
628 | ubifs_assert(sleb->endpt % c->min_io_size == 0); | 628 | ubifs_assert(sleb->endpt % c->min_io_size == 0); |
629 | 629 | ||
630 | if (sleb->endpt + c->min_io_size <= c->leb_size && | 630 | if (sleb->endpt + c->min_io_size <= c->leb_size && !c->ro_mount) |
631 | !(c->vfs_sb->s_flags & MS_RDONLY)) | ||
632 | err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, | 631 | err = ubifs_wbuf_seek_nolock(&c->jheads[jhead].wbuf, lnum, |
633 | sleb->endpt, UBI_SHORTTERM); | 632 | sleb->endpt, UBI_SHORTTERM); |
634 | 633 | ||
@@ -840,6 +839,11 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | |||
840 | if (IS_ERR(sleb)) { | 839 | if (IS_ERR(sleb)) { |
841 | if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) | 840 | if (PTR_ERR(sleb) != -EUCLEAN || !c->need_recovery) |
842 | return PTR_ERR(sleb); | 841 | return PTR_ERR(sleb); |
842 | /* | ||
843 | * Note, the below function will recover this log LEB only if | ||
844 | * it is the last, because unclean reboots can possibly corrupt | ||
845 | * only the tail of the log. | ||
846 | */ | ||
843 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); | 847 | sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); |
844 | if (IS_ERR(sleb)) | 848 | if (IS_ERR(sleb)) |
845 | return PTR_ERR(sleb); | 849 | return PTR_ERR(sleb); |
@@ -851,7 +855,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | |||
851 | } | 855 | } |
852 | 856 | ||
853 | node = sleb->buf; | 857 | node = sleb->buf; |
854 | |||
855 | snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); | 858 | snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); |
856 | if (c->cs_sqnum == 0) { | 859 | if (c->cs_sqnum == 0) { |
857 | /* | 860 | /* |
@@ -898,7 +901,6 @@ static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) | |||
898 | } | 901 | } |
899 | 902 | ||
900 | list_for_each_entry(snod, &sleb->nodes, list) { | 903 | list_for_each_entry(snod, &sleb->nodes, list) { |
901 | |||
902 | cond_resched(); | 904 | cond_resched(); |
903 | 905 | ||
904 | if (snod->sqnum >= SQNUM_WATERMARK) { | 906 | if (snod->sqnum >= SQNUM_WATERMARK) { |
@@ -1011,7 +1013,6 @@ out: | |||
1011 | int ubifs_replay_journal(struct ubifs_info *c) | 1013 | int ubifs_replay_journal(struct ubifs_info *c) |
1012 | { | 1014 | { |
1013 | int err, i, lnum, offs, free; | 1015 | int err, i, lnum, offs, free; |
1014 | void *sbuf = NULL; | ||
1015 | 1016 | ||
1016 | BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); | 1017 | BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); |
1017 | 1018 | ||
@@ -1026,14 +1027,8 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1026 | return -EINVAL; | 1027 | return -EINVAL; |
1027 | } | 1028 | } |
1028 | 1029 | ||
1029 | sbuf = vmalloc(c->leb_size); | ||
1030 | if (!sbuf) | ||
1031 | return -ENOMEM; | ||
1032 | |||
1033 | dbg_mnt("start replaying the journal"); | 1030 | dbg_mnt("start replaying the journal"); |
1034 | |||
1035 | c->replaying = 1; | 1031 | c->replaying = 1; |
1036 | |||
1037 | lnum = c->ltail_lnum = c->lhead_lnum; | 1032 | lnum = c->ltail_lnum = c->lhead_lnum; |
1038 | offs = c->lhead_offs; | 1033 | offs = c->lhead_offs; |
1039 | 1034 | ||
@@ -1046,7 +1041,7 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1046 | lnum = UBIFS_LOG_LNUM; | 1041 | lnum = UBIFS_LOG_LNUM; |
1047 | offs = 0; | 1042 | offs = 0; |
1048 | } | 1043 | } |
1049 | err = replay_log_leb(c, lnum, offs, sbuf); | 1044 | err = replay_log_leb(c, lnum, offs, c->sbuf); |
1050 | if (err == 1) | 1045 | if (err == 1) |
1051 | /* We hit the end of the log */ | 1046 | /* We hit the end of the log */ |
1052 | break; | 1047 | break; |
@@ -1079,7 +1074,6 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
1079 | out: | 1074 | out: |
1080 | destroy_replay_tree(c); | 1075 | destroy_replay_tree(c); |
1081 | destroy_bud_list(c); | 1076 | destroy_bud_list(c); |
1082 | vfree(sbuf); | ||
1083 | c->replaying = 0; | 1077 | c->replaying = 0; |
1084 | return err; | 1078 | return err; |
1085 | } | 1079 | } |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index 96cb62c8a9dd..bf31b4729e51 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
@@ -542,11 +542,8 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
542 | * due to the unavailability of time-travelling equipment. | 542 | * due to the unavailability of time-travelling equipment. |
543 | */ | 543 | */ |
544 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { | 544 | if (c->fmt_version > UBIFS_FORMAT_VERSION) { |
545 | struct super_block *sb = c->vfs_sb; | 545 | ubifs_assert(!c->ro_media || c->ro_mount); |
546 | int mounting_ro = sb->s_flags & MS_RDONLY; | 546 | if (!c->ro_mount || |
547 | |||
548 | ubifs_assert(!c->ro_media || mounting_ro); | ||
549 | if (!mounting_ro || | ||
550 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { | 547 | c->ro_compat_version > UBIFS_RO_COMPAT_VERSION) { |
551 | ubifs_err("on-flash format version is w%d/r%d, but " | 548 | ubifs_err("on-flash format version is w%d/r%d, but " |
552 | "software only supports up to version " | 549 | "software only supports up to version " |
@@ -624,7 +621,7 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
624 | c->old_leb_cnt = c->leb_cnt; | 621 | c->old_leb_cnt = c->leb_cnt; |
625 | if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { | 622 | if (c->leb_cnt < c->vi.size && c->leb_cnt < c->max_leb_cnt) { |
626 | c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); | 623 | c->leb_cnt = min_t(int, c->max_leb_cnt, c->vi.size); |
627 | if (c->vfs_sb->s_flags & MS_RDONLY) | 624 | if (c->ro_mount) |
628 | dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", | 625 | dbg_mnt("Auto resizing (ro) from %d LEBs to %d LEBs", |
629 | c->old_leb_cnt, c->leb_cnt); | 626 | c->old_leb_cnt, c->leb_cnt); |
630 | else { | 627 | else { |
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index 96c525384191..3e1ee57dbeaa 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c | |||
@@ -197,7 +197,7 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
197 | struct ubifs_ino_node *ino = buf; | 197 | struct ubifs_ino_node *ino = buf; |
198 | struct ubifs_scan_node *snod; | 198 | struct ubifs_scan_node *snod; |
199 | 199 | ||
200 | snod = kzalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); | 200 | snod = kmalloc(sizeof(struct ubifs_scan_node), GFP_NOFS); |
201 | if (!snod) | 201 | if (!snod) |
202 | return -ENOMEM; | 202 | return -ENOMEM; |
203 | 203 | ||
@@ -212,13 +212,15 @@ int ubifs_add_snod(const struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
212 | case UBIFS_DENT_NODE: | 212 | case UBIFS_DENT_NODE: |
213 | case UBIFS_XENT_NODE: | 213 | case UBIFS_XENT_NODE: |
214 | case UBIFS_DATA_NODE: | 214 | case UBIFS_DATA_NODE: |
215 | case UBIFS_TRUN_NODE: | ||
216 | /* | 215 | /* |
217 | * The key is in the same place in all keyed | 216 | * The key is in the same place in all keyed |
218 | * nodes. | 217 | * nodes. |
219 | */ | 218 | */ |
220 | key_read(c, &ino->key, &snod->key); | 219 | key_read(c, &ino->key, &snod->key); |
221 | break; | 220 | break; |
221 | default: | ||
222 | invalid_key_init(c, &snod->key); | ||
223 | break; | ||
222 | } | 224 | } |
223 | list_add_tail(&snod->list, &sleb->nodes); | 225 | list_add_tail(&snod->list, &sleb->nodes); |
224 | sleb->nodes_cnt += 1; | 226 | sleb->nodes_cnt += 1; |
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c index 0b201114a5ad..46961c003236 100644 --- a/fs/ubifs/shrinker.c +++ b/fs/ubifs/shrinker.c | |||
@@ -250,7 +250,7 @@ static int kick_a_thread(void) | |||
250 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); | 250 | dirty_zn_cnt = atomic_long_read(&c->dirty_zn_cnt); |
251 | 251 | ||
252 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || | 252 | if (!dirty_zn_cnt || c->cmt_state == COMMIT_BROKEN || |
253 | c->ro_media) { | 253 | c->ro_mount || c->ro_error) { |
254 | mutex_unlock(&c->umount_mutex); | 254 | mutex_unlock(&c->umount_mutex); |
255 | continue; | 255 | continue; |
256 | } | 256 | } |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index cd5900b85d38..9a47c9f0ad07 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -1137,11 +1137,11 @@ static int check_free_space(struct ubifs_info *c) | |||
1137 | */ | 1137 | */ |
1138 | static int mount_ubifs(struct ubifs_info *c) | 1138 | static int mount_ubifs(struct ubifs_info *c) |
1139 | { | 1139 | { |
1140 | struct super_block *sb = c->vfs_sb; | 1140 | int err; |
1141 | int err, mounted_read_only = (sb->s_flags & MS_RDONLY); | ||
1142 | long long x; | 1141 | long long x; |
1143 | size_t sz; | 1142 | size_t sz; |
1144 | 1143 | ||
1144 | c->ro_mount = !!(c->vfs_sb->s_flags & MS_RDONLY); | ||
1145 | err = init_constants_early(c); | 1145 | err = init_constants_early(c); |
1146 | if (err) | 1146 | if (err) |
1147 | return err; | 1147 | return err; |
@@ -1154,7 +1154,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1154 | if (err) | 1154 | if (err) |
1155 | goto out_free; | 1155 | goto out_free; |
1156 | 1156 | ||
1157 | if (c->empty && (mounted_read_only || c->ro_media)) { | 1157 | if (c->empty && (c->ro_mount || c->ro_media)) { |
1158 | /* | 1158 | /* |
1159 | * This UBI volume is empty, and read-only, or the file system | 1159 | * This UBI volume is empty, and read-only, or the file system |
1160 | * is mounted read-only - we cannot format it. | 1160 | * is mounted read-only - we cannot format it. |
@@ -1165,7 +1165,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1165 | goto out_free; | 1165 | goto out_free; |
1166 | } | 1166 | } |
1167 | 1167 | ||
1168 | if (c->ro_media && !mounted_read_only) { | 1168 | if (c->ro_media && !c->ro_mount) { |
1169 | ubifs_err("cannot mount read-write - read-only media"); | 1169 | ubifs_err("cannot mount read-write - read-only media"); |
1170 | err = -EROFS; | 1170 | err = -EROFS; |
1171 | goto out_free; | 1171 | goto out_free; |
@@ -1185,7 +1185,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1185 | if (!c->sbuf) | 1185 | if (!c->sbuf) |
1186 | goto out_free; | 1186 | goto out_free; |
1187 | 1187 | ||
1188 | if (!mounted_read_only) { | 1188 | if (!c->ro_mount) { |
1189 | c->ileb_buf = vmalloc(c->leb_size); | 1189 | c->ileb_buf = vmalloc(c->leb_size); |
1190 | if (!c->ileb_buf) | 1190 | if (!c->ileb_buf) |
1191 | goto out_free; | 1191 | goto out_free; |
@@ -1228,7 +1228,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1228 | } | 1228 | } |
1229 | 1229 | ||
1230 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | 1230 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); |
1231 | if (!mounted_read_only) { | 1231 | if (!c->ro_mount) { |
1232 | err = alloc_wbufs(c); | 1232 | err = alloc_wbufs(c); |
1233 | if (err) | 1233 | if (err) |
1234 | goto out_cbuf; | 1234 | goto out_cbuf; |
@@ -1254,12 +1254,12 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1254 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { | 1254 | if ((c->mst_node->flags & cpu_to_le32(UBIFS_MST_DIRTY)) != 0) { |
1255 | ubifs_msg("recovery needed"); | 1255 | ubifs_msg("recovery needed"); |
1256 | c->need_recovery = 1; | 1256 | c->need_recovery = 1; |
1257 | if (!mounted_read_only) { | 1257 | if (!c->ro_mount) { |
1258 | err = ubifs_recover_inl_heads(c, c->sbuf); | 1258 | err = ubifs_recover_inl_heads(c, c->sbuf); |
1259 | if (err) | 1259 | if (err) |
1260 | goto out_master; | 1260 | goto out_master; |
1261 | } | 1261 | } |
1262 | } else if (!mounted_read_only) { | 1262 | } else if (!c->ro_mount) { |
1263 | /* | 1263 | /* |
1264 | * Set the "dirty" flag so that if we reboot uncleanly we | 1264 | * Set the "dirty" flag so that if we reboot uncleanly we |
1265 | * will notice this immediately on the next mount. | 1265 | * will notice this immediately on the next mount. |
@@ -1270,7 +1270,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1270 | goto out_master; | 1270 | goto out_master; |
1271 | } | 1271 | } |
1272 | 1272 | ||
1273 | err = ubifs_lpt_init(c, 1, !mounted_read_only); | 1273 | err = ubifs_lpt_init(c, 1, !c->ro_mount); |
1274 | if (err) | 1274 | if (err) |
1275 | goto out_lpt; | 1275 | goto out_lpt; |
1276 | 1276 | ||
@@ -1285,11 +1285,11 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1285 | /* Calculate 'min_idx_lebs' after journal replay */ | 1285 | /* Calculate 'min_idx_lebs' after journal replay */ |
1286 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 1286 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
1287 | 1287 | ||
1288 | err = ubifs_mount_orphans(c, c->need_recovery, mounted_read_only); | 1288 | err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); |
1289 | if (err) | 1289 | if (err) |
1290 | goto out_orphans; | 1290 | goto out_orphans; |
1291 | 1291 | ||
1292 | if (!mounted_read_only) { | 1292 | if (!c->ro_mount) { |
1293 | int lnum; | 1293 | int lnum; |
1294 | 1294 | ||
1295 | err = check_free_space(c); | 1295 | err = check_free_space(c); |
@@ -1351,7 +1351,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1351 | spin_unlock(&ubifs_infos_lock); | 1351 | spin_unlock(&ubifs_infos_lock); |
1352 | 1352 | ||
1353 | if (c->need_recovery) { | 1353 | if (c->need_recovery) { |
1354 | if (mounted_read_only) | 1354 | if (c->ro_mount) |
1355 | ubifs_msg("recovery deferred"); | 1355 | ubifs_msg("recovery deferred"); |
1356 | else { | 1356 | else { |
1357 | c->need_recovery = 0; | 1357 | c->need_recovery = 0; |
@@ -1378,7 +1378,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
1378 | 1378 | ||
1379 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", | 1379 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", |
1380 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); | 1380 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); |
1381 | if (mounted_read_only) | 1381 | if (c->ro_mount) |
1382 | ubifs_msg("mounted read-only"); | 1382 | ubifs_msg("mounted read-only"); |
1383 | x = (long long)c->main_lebs * c->leb_size; | 1383 | x = (long long)c->main_lebs * c->leb_size; |
1384 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " | 1384 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " |
@@ -1640,7 +1640,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
1640 | } | 1640 | } |
1641 | 1641 | ||
1642 | dbg_gen("re-mounted read-write"); | 1642 | dbg_gen("re-mounted read-write"); |
1643 | c->vfs_sb->s_flags &= ~MS_RDONLY; | 1643 | c->ro_mount = 0; |
1644 | c->remounting_rw = 0; | 1644 | c->remounting_rw = 0; |
1645 | c->always_chk_crc = 0; | 1645 | c->always_chk_crc = 0; |
1646 | err = dbg_check_space_info(c); | 1646 | err = dbg_check_space_info(c); |
@@ -1676,7 +1676,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) | |||
1676 | int i, err; | 1676 | int i, err; |
1677 | 1677 | ||
1678 | ubifs_assert(!c->need_recovery); | 1678 | ubifs_assert(!c->need_recovery); |
1679 | ubifs_assert(!(c->vfs_sb->s_flags & MS_RDONLY)); | 1679 | ubifs_assert(!c->ro_mount); |
1680 | 1680 | ||
1681 | mutex_lock(&c->umount_mutex); | 1681 | mutex_lock(&c->umount_mutex); |
1682 | if (c->bgt) { | 1682 | if (c->bgt) { |
@@ -1686,10 +1686,8 @@ static void ubifs_remount_ro(struct ubifs_info *c) | |||
1686 | 1686 | ||
1687 | dbg_save_space_info(c); | 1687 | dbg_save_space_info(c); |
1688 | 1688 | ||
1689 | for (i = 0; i < c->jhead_cnt; i++) { | 1689 | for (i = 0; i < c->jhead_cnt; i++) |
1690 | ubifs_wbuf_sync(&c->jheads[i].wbuf); | 1690 | ubifs_wbuf_sync(&c->jheads[i].wbuf); |
1691 | hrtimer_cancel(&c->jheads[i].wbuf.timer); | ||
1692 | } | ||
1693 | 1691 | ||
1694 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); | 1692 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); |
1695 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | 1693 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); |
@@ -1704,6 +1702,7 @@ static void ubifs_remount_ro(struct ubifs_info *c) | |||
1704 | vfree(c->ileb_buf); | 1702 | vfree(c->ileb_buf); |
1705 | c->ileb_buf = NULL; | 1703 | c->ileb_buf = NULL; |
1706 | ubifs_lpt_free(c, 1); | 1704 | ubifs_lpt_free(c, 1); |
1705 | c->ro_mount = 1; | ||
1707 | err = dbg_check_space_info(c); | 1706 | err = dbg_check_space_info(c); |
1708 | if (err) | 1707 | if (err) |
1709 | ubifs_ro_mode(c, err); | 1708 | ubifs_ro_mode(c, err); |
@@ -1735,7 +1734,7 @@ static void ubifs_put_super(struct super_block *sb) | |||
1735 | * the mutex is locked. | 1734 | * the mutex is locked. |
1736 | */ | 1735 | */ |
1737 | mutex_lock(&c->umount_mutex); | 1736 | mutex_lock(&c->umount_mutex); |
1738 | if (!(c->vfs_sb->s_flags & MS_RDONLY)) { | 1737 | if (!c->ro_mount) { |
1739 | /* | 1738 | /* |
1740 | * First of all kill the background thread to make sure it does | 1739 | * First of all kill the background thread to make sure it does |
1741 | * not interfere with un-mounting and freeing resources. | 1740 | * not interfere with un-mounting and freeing resources. |
@@ -1745,23 +1744,22 @@ static void ubifs_put_super(struct super_block *sb) | |||
1745 | c->bgt = NULL; | 1744 | c->bgt = NULL; |
1746 | } | 1745 | } |
1747 | 1746 | ||
1748 | /* Synchronize write-buffers */ | ||
1749 | if (c->jheads) | ||
1750 | for (i = 0; i < c->jhead_cnt; i++) | ||
1751 | ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
1752 | |||
1753 | /* | 1747 | /* |
1754 | * On fatal errors c->ro_media is set to 1, in which case we do | 1748 | * On fatal errors c->ro_error is set to 1, in which case we do |
1755 | * not write the master node. | 1749 | * not write the master node. |
1756 | */ | 1750 | */ |
1757 | if (!c->ro_media) { | 1751 | if (!c->ro_error) { |
1752 | int err; | ||
1753 | |||
1754 | /* Synchronize write-buffers */ | ||
1755 | for (i = 0; i < c->jhead_cnt; i++) | ||
1756 | ubifs_wbuf_sync(&c->jheads[i].wbuf); | ||
1757 | |||
1758 | /* | 1758 | /* |
1759 | * We are being cleanly unmounted which means the | 1759 | * We are being cleanly unmounted which means the |
1760 | * orphans were killed - indicate this in the master | 1760 | * orphans were killed - indicate this in the master |
1761 | * node. Also save the reserved GC LEB number. | 1761 | * node. Also save the reserved GC LEB number. |
1762 | */ | 1762 | */ |
1763 | int err; | ||
1764 | |||
1765 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); | 1763 | c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); |
1766 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); | 1764 | c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); |
1767 | c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); | 1765 | c->mst_node->gc_lnum = cpu_to_le32(c->gc_lnum); |
@@ -1774,6 +1772,10 @@ static void ubifs_put_super(struct super_block *sb) | |||
1774 | */ | 1772 | */ |
1775 | ubifs_err("failed to write master node, " | 1773 | ubifs_err("failed to write master node, " |
1776 | "error %d", err); | 1774 | "error %d", err); |
1775 | } else { | ||
1776 | for (i = 0; i < c->jhead_cnt; i++) | ||
1777 | /* Make sure write-buffer timers are canceled */ | ||
1778 | hrtimer_cancel(&c->jheads[i].wbuf.timer); | ||
1777 | } | 1779 | } |
1778 | } | 1780 | } |
1779 | 1781 | ||
@@ -1797,17 +1799,21 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1797 | return err; | 1799 | return err; |
1798 | } | 1800 | } |
1799 | 1801 | ||
1800 | if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) { | 1802 | if (c->ro_mount && !(*flags & MS_RDONLY)) { |
1803 | if (c->ro_error) { | ||
1804 | ubifs_msg("cannot re-mount R/W due to prior errors"); | ||
1805 | return -EROFS; | ||
1806 | } | ||
1801 | if (c->ro_media) { | 1807 | if (c->ro_media) { |
1802 | ubifs_msg("cannot re-mount due to prior errors"); | 1808 | ubifs_msg("cannot re-mount R/W - UBI volume is R/O"); |
1803 | return -EROFS; | 1809 | return -EROFS; |
1804 | } | 1810 | } |
1805 | err = ubifs_remount_rw(c); | 1811 | err = ubifs_remount_rw(c); |
1806 | if (err) | 1812 | if (err) |
1807 | return err; | 1813 | return err; |
1808 | } else if (!(sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) { | 1814 | } else if (!c->ro_mount && (*flags & MS_RDONLY)) { |
1809 | if (c->ro_media) { | 1815 | if (c->ro_error) { |
1810 | ubifs_msg("cannot re-mount due to prior errors"); | 1816 | ubifs_msg("cannot re-mount R/O due to prior errors"); |
1811 | return -EROFS; | 1817 | return -EROFS; |
1812 | } | 1818 | } |
1813 | ubifs_remount_ro(c); | 1819 | ubifs_remount_ro(c); |
@@ -2049,8 +2055,8 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, | |||
2049 | */ | 2055 | */ |
2050 | ubi = open_ubi(name, UBI_READONLY); | 2056 | ubi = open_ubi(name, UBI_READONLY); |
2051 | if (IS_ERR(ubi)) { | 2057 | if (IS_ERR(ubi)) { |
2052 | ubifs_err("cannot open \"%s\", error %d", | 2058 | dbg_err("cannot open \"%s\", error %d", |
2053 | name, (int)PTR_ERR(ubi)); | 2059 | name, (int)PTR_ERR(ubi)); |
2054 | return PTR_ERR(ubi); | 2060 | return PTR_ERR(ubi); |
2055 | } | 2061 | } |
2056 | ubi_get_volume_info(ubi, &vi); | 2062 | ubi_get_volume_info(ubi, &vi); |
@@ -2064,9 +2070,11 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags, | |||
2064 | } | 2070 | } |
2065 | 2071 | ||
2066 | if (sb->s_root) { | 2072 | if (sb->s_root) { |
2073 | struct ubifs_info *c1 = sb->s_fs_info; | ||
2074 | |||
2067 | /* A new mount point for already mounted UBIFS */ | 2075 | /* A new mount point for already mounted UBIFS */ |
2068 | dbg_gen("this ubi volume is already mounted"); | 2076 | dbg_gen("this ubi volume is already mounted"); |
2069 | if ((flags ^ sb->s_flags) & MS_RDONLY) { | 2077 | if (!!(flags & MS_RDONLY) != c1->ro_mount) { |
2070 | err = -EBUSY; | 2078 | err = -EBUSY; |
2071 | goto out_deact; | 2079 | goto out_deact; |
2072 | } | 2080 | } |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 2194915220e5..ad9cf0133622 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -1177,6 +1177,7 @@ int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | |||
1177 | unsigned long time = get_seconds(); | 1177 | unsigned long time = get_seconds(); |
1178 | 1178 | ||
1179 | dbg_tnc("search key %s", DBGKEY(key)); | 1179 | dbg_tnc("search key %s", DBGKEY(key)); |
1180 | ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); | ||
1180 | 1181 | ||
1181 | znode = c->zroot.znode; | 1182 | znode = c->zroot.znode; |
1182 | if (unlikely(!znode)) { | 1183 | if (unlikely(!znode)) { |
@@ -2966,7 +2967,7 @@ static struct ubifs_znode *right_znode(struct ubifs_info *c, | |||
2966 | * | 2967 | * |
2967 | * This function searches an indexing node by its first key @key and its | 2968 | * This function searches an indexing node by its first key @key and its |
2968 | * address @lnum:@offs. It looks up the indexing tree by pulling all indexing | 2969 | * address @lnum:@offs. It looks up the indexing tree by pulling all indexing |
2969 | * nodes it traverses to TNC. This function is called fro indexing nodes which | 2970 | * nodes it traverses to TNC. This function is called for indexing nodes which |
2970 | * were found on the media by scanning, for example when garbage-collecting or | 2971 | * were found on the media by scanning, for example when garbage-collecting or |
2971 | * when doing in-the-gaps commit. This means that the indexing node which is | 2972 | * when doing in-the-gaps commit. This means that the indexing node which is |
2972 | * looked for does not have to have exactly the same leftmost key @key, because | 2973 | * looked for does not have to have exactly the same leftmost key @key, because |
@@ -2988,6 +2989,8 @@ static struct ubifs_znode *lookup_znode(struct ubifs_info *c, | |||
2988 | struct ubifs_znode *znode, *zn; | 2989 | struct ubifs_znode *znode, *zn; |
2989 | int n, nn; | 2990 | int n, nn; |
2990 | 2991 | ||
2992 | ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY); | ||
2993 | |||
2991 | /* | 2994 | /* |
2992 | * The arguments have probably been read off flash, so don't assume | 2995 | * The arguments have probably been read off flash, so don't assume |
2993 | * they are valid. | 2996 | * they are valid. |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 0c9876b396dd..381d6b207a52 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -119,8 +119,12 @@ | |||
119 | * in TNC. However, when replaying, it is handy to introduce fake "truncation" | 119 | * in TNC. However, when replaying, it is handy to introduce fake "truncation" |
120 | * keys for truncation nodes because the code becomes simpler. So we define | 120 | * keys for truncation nodes because the code becomes simpler. So we define |
121 | * %UBIFS_TRUN_KEY type. | 121 | * %UBIFS_TRUN_KEY type. |
122 | * | ||
123 | * But otherwise, out of the journal reply scope, the truncation keys are | ||
124 | * invalid. | ||
122 | */ | 125 | */ |
123 | #define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT | 126 | #define UBIFS_TRUN_KEY UBIFS_KEY_TYPES_CNT |
127 | #define UBIFS_INVALID_KEY UBIFS_KEY_TYPES_CNT | ||
124 | 128 | ||
125 | /* | 129 | /* |
126 | * How much a directory entry/extended attribute entry adds to the parent/host | 130 | * How much a directory entry/extended attribute entry adds to the parent/host |
@@ -1028,6 +1032,8 @@ struct ubifs_debug_info; | |||
1028 | * @max_leb_cnt: maximum count of logical eraseblocks | 1032 | * @max_leb_cnt: maximum count of logical eraseblocks |
1029 | * @old_leb_cnt: count of logical eraseblocks before re-size | 1033 | * @old_leb_cnt: count of logical eraseblocks before re-size |
1030 | * @ro_media: the underlying UBI volume is read-only | 1034 | * @ro_media: the underlying UBI volume is read-only |
1035 | * @ro_mount: the file-system was mounted as read-only | ||
1036 | * @ro_error: UBIFS switched to R/O mode because an error happened | ||
1031 | * | 1037 | * |
1032 | * @dirty_pg_cnt: number of dirty pages (not used) | 1038 | * @dirty_pg_cnt: number of dirty pages (not used) |
1033 | * @dirty_zn_cnt: number of dirty znodes | 1039 | * @dirty_zn_cnt: number of dirty znodes |
@@ -1168,11 +1174,14 @@ struct ubifs_debug_info; | |||
1168 | * @replay_sqnum: sequence number of node currently being replayed | 1174 | * @replay_sqnum: sequence number of node currently being replayed |
1169 | * @need_recovery: file-system needs recovery | 1175 | * @need_recovery: file-system needs recovery |
1170 | * @replaying: set to %1 during journal replay | 1176 | * @replaying: set to %1 during journal replay |
1171 | * @unclean_leb_list: LEBs to recover when mounting ro to rw | 1177 | * @unclean_leb_list: LEBs to recover when re-mounting R/O mounted FS to R/W |
1172 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw | 1178 | * mode |
1179 | * @rcvrd_mst_node: recovered master node to write when re-mounting R/O mounted | ||
1180 | * FS to R/W mode | ||
1173 | * @size_tree: inode size information for recovery | 1181 | * @size_tree: inode size information for recovery |
1174 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) | 1182 | * @remounting_rw: set while re-mounting from R/O mode to R/W mode |
1175 | * @always_chk_crc: always check CRCs (while mounting and remounting rw) | 1183 | * @always_chk_crc: always check CRCs (while mounting and remounting to R/W |
1184 | * mode) | ||
1176 | * @mount_opts: UBIFS-specific mount options | 1185 | * @mount_opts: UBIFS-specific mount options |
1177 | * | 1186 | * |
1178 | * @dbg: debugging-related information | 1187 | * @dbg: debugging-related information |
@@ -1268,7 +1277,9 @@ struct ubifs_info { | |||
1268 | int leb_cnt; | 1277 | int leb_cnt; |
1269 | int max_leb_cnt; | 1278 | int max_leb_cnt; |
1270 | int old_leb_cnt; | 1279 | int old_leb_cnt; |
1271 | int ro_media; | 1280 | unsigned int ro_media:1; |
1281 | unsigned int ro_mount:1; | ||
1282 | unsigned int ro_error:1; | ||
1272 | 1283 | ||
1273 | atomic_long_t dirty_pg_cnt; | 1284 | atomic_long_t dirty_pg_cnt; |
1274 | atomic_long_t dirty_zn_cnt; | 1285 | atomic_long_t dirty_zn_cnt; |
diff --git a/fs/udf/Kconfig b/fs/udf/Kconfig index 0e0e99bd6bce..f8def3c8ea4c 100644 --- a/fs/udf/Kconfig +++ b/fs/udf/Kconfig | |||
@@ -1,5 +1,6 @@ | |||
1 | config UDF_FS | 1 | config UDF_FS |
2 | tristate "UDF file system support" | 2 | tristate "UDF file system support" |
3 | depends on BKL # needs serious work to remove | ||
3 | select CRC_ITU_T | 4 | select CRC_ITU_T |
4 | help | 5 | help |
5 | This is the new file system used on some CD-ROMs and DVDs. Say Y if | 6 | This is the new file system used on some CD-ROMs and DVDs. Say Y if |
diff --git a/fs/udf/namei.c b/fs/udf/namei.c index bf5fc674193c..6d8dc02baebb 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c | |||
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, | |||
1101 | inc_nlink(inode); | 1101 | inc_nlink(inode); |
1102 | inode->i_ctime = current_fs_time(inode->i_sb); | 1102 | inode->i_ctime = current_fs_time(inode->i_sb); |
1103 | mark_inode_dirty(inode); | 1103 | mark_inode_dirty(inode); |
1104 | atomic_inc(&inode->i_count); | 1104 | ihold(inode); |
1105 | d_instantiate(dentry, inode); | 1105 | d_instantiate(dentry, inode); |
1106 | unlock_kernel(); | 1106 | unlock_kernel(); |
1107 | 1107 | ||
diff --git a/fs/udf/super.c b/fs/udf/super.c index 65412d84a45d..76f3d6d97b40 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -1880,6 +1880,8 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1880 | struct kernel_lb_addr rootdir, fileset; | 1880 | struct kernel_lb_addr rootdir, fileset; |
1881 | struct udf_sb_info *sbi; | 1881 | struct udf_sb_info *sbi; |
1882 | 1882 | ||
1883 | lock_kernel(); | ||
1884 | |||
1883 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); | 1885 | uopt.flags = (1 << UDF_FLAG_USE_AD_IN_ICB) | (1 << UDF_FLAG_STRICT); |
1884 | uopt.uid = -1; | 1886 | uopt.uid = -1; |
1885 | uopt.gid = -1; | 1887 | uopt.gid = -1; |
@@ -1888,8 +1890,10 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
1888 | uopt.dmode = UDF_INVALID_MODE; | 1890 | uopt.dmode = UDF_INVALID_MODE; |
1889 | 1891 | ||
1890 | sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); | 1892 | sbi = kzalloc(sizeof(struct udf_sb_info), GFP_KERNEL); |
1891 | if (!sbi) | 1893 | if (!sbi) { |
1894 | unlock_kernel(); | ||
1892 | return -ENOMEM; | 1895 | return -ENOMEM; |
1896 | } | ||
1893 | 1897 | ||
1894 | sb->s_fs_info = sbi; | 1898 | sb->s_fs_info = sbi; |
1895 | 1899 | ||
@@ -2035,6 +2039,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) | |||
2035 | goto error_out; | 2039 | goto error_out; |
2036 | } | 2040 | } |
2037 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 2041 | sb->s_maxbytes = MAX_LFS_FILESIZE; |
2042 | unlock_kernel(); | ||
2038 | return 0; | 2043 | return 0; |
2039 | 2044 | ||
2040 | error_out: | 2045 | error_out: |
@@ -2055,6 +2060,7 @@ error_out: | |||
2055 | kfree(sbi); | 2060 | kfree(sbi); |
2056 | sb->s_fs_info = NULL; | 2061 | sb->s_fs_info = NULL; |
2057 | 2062 | ||
2063 | unlock_kernel(); | ||
2058 | return -EINVAL; | 2064 | return -EINVAL; |
2059 | } | 2065 | } |
2060 | 2066 | ||
diff --git a/fs/ufs/Kconfig b/fs/ufs/Kconfig index e4f10a40768a..30c8f223253d 100644 --- a/fs/ufs/Kconfig +++ b/fs/ufs/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config UFS_FS | 1 | config UFS_FS |
2 | tristate "UFS file system support (read only)" | 2 | tristate "UFS file system support (read only)" |
3 | depends on BLOCK | 3 | depends on BLOCK |
4 | depends on BKL # probably fixable | ||
4 | help | 5 | help |
5 | BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, | 6 | BSD and derivate versions of Unix (such as SunOS, FreeBSD, NetBSD, |
6 | OpenBSD and NeXTstep) use a file system called UFS. Some System V | 7 | OpenBSD and NeXTstep) use a file system called UFS. Some System V |
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index b056f02b1fb3..12f39b9e4437 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c | |||
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, | |||
180 | 180 | ||
181 | inode->i_ctime = CURRENT_TIME_SEC; | 181 | inode->i_ctime = CURRENT_TIME_SEC; |
182 | inode_inc_link_count(inode); | 182 | inode_inc_link_count(inode); |
183 | atomic_inc(&inode->i_count); | 183 | ihold(inode); |
184 | 184 | ||
185 | error = ufs_add_nondir(dentry, inode); | 185 | error = ufs_add_nondir(dentry, inode); |
186 | unlock_kernel(); | 186 | unlock_kernel(); |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index d510c1b91817..6b9be90dae7d 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -696,6 +696,8 @@ static int ufs_fill_super(struct super_block *sb, void *data, int silent) | |||
696 | unsigned maxsymlen; | 696 | unsigned maxsymlen; |
697 | int ret = -EINVAL; | 697 | int ret = -EINVAL; |
698 | 698 | ||
699 | lock_kernel(); | ||
700 | |||
699 | uspi = NULL; | 701 | uspi = NULL; |
700 | ubh = NULL; | 702 | ubh = NULL; |
701 | flags = 0; | 703 | flags = 0; |
@@ -1163,6 +1165,7 @@ magic_found: | |||
1163 | goto failed; | 1165 | goto failed; |
1164 | 1166 | ||
1165 | UFSD("EXIT\n"); | 1167 | UFSD("EXIT\n"); |
1168 | unlock_kernel(); | ||
1166 | return 0; | 1169 | return 0; |
1167 | 1170 | ||
1168 | dalloc_failed: | 1171 | dalloc_failed: |
@@ -1174,10 +1177,12 @@ failed: | |||
1174 | kfree(sbi); | 1177 | kfree(sbi); |
1175 | sb->s_fs_info = NULL; | 1178 | sb->s_fs_info = NULL; |
1176 | UFSD("EXIT (FAILED)\n"); | 1179 | UFSD("EXIT (FAILED)\n"); |
1180 | unlock_kernel(); | ||
1177 | return ret; | 1181 | return ret; |
1178 | 1182 | ||
1179 | failed_nomem: | 1183 | failed_nomem: |
1180 | UFSD("EXIT (NOMEM)\n"); | 1184 | UFSD("EXIT (NOMEM)\n"); |
1185 | unlock_kernel(); | ||
1181 | return -ENOMEM; | 1186 | return -ENOMEM; |
1182 | } | 1187 | } |
1183 | 1188 | ||
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index b552f816de15..c9af48fffcd7 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
@@ -1139,8 +1139,7 @@ xfs_vm_writepage( | |||
1139 | type = IO_DELAY; | 1139 | type = IO_DELAY; |
1140 | flags = BMAPI_ALLOCATE; | 1140 | flags = BMAPI_ALLOCATE; |
1141 | 1141 | ||
1142 | if (wbc->sync_mode == WB_SYNC_NONE && | 1142 | if (wbc->sync_mode == WB_SYNC_NONE) |
1143 | wbc->nonblocking) | ||
1144 | flags |= BMAPI_TRYLOCK; | 1143 | flags |= BMAPI_TRYLOCK; |
1145 | } | 1144 | } |
1146 | 1145 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 286e36e21dae..63fd2c07cb57 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
@@ -188,8 +188,8 @@ _xfs_buf_initialize( | |||
188 | atomic_set(&bp->b_hold, 1); | 188 | atomic_set(&bp->b_hold, 1); |
189 | init_completion(&bp->b_iowait); | 189 | init_completion(&bp->b_iowait); |
190 | INIT_LIST_HEAD(&bp->b_list); | 190 | INIT_LIST_HEAD(&bp->b_list); |
191 | INIT_LIST_HEAD(&bp->b_hash_list); | 191 | RB_CLEAR_NODE(&bp->b_rbnode); |
192 | init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */ | 192 | sema_init(&bp->b_sema, 0); /* held, no waiters */ |
193 | XB_SET_OWNER(bp); | 193 | XB_SET_OWNER(bp); |
194 | bp->b_target = target; | 194 | bp->b_target = target; |
195 | bp->b_file_offset = range_base; | 195 | bp->b_file_offset = range_base; |
@@ -262,8 +262,6 @@ xfs_buf_free( | |||
262 | { | 262 | { |
263 | trace_xfs_buf_free(bp, _RET_IP_); | 263 | trace_xfs_buf_free(bp, _RET_IP_); |
264 | 264 | ||
265 | ASSERT(list_empty(&bp->b_hash_list)); | ||
266 | |||
267 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { | 265 | if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) { |
268 | uint i; | 266 | uint i; |
269 | 267 | ||
@@ -422,8 +420,10 @@ _xfs_buf_find( | |||
422 | { | 420 | { |
423 | xfs_off_t range_base; | 421 | xfs_off_t range_base; |
424 | size_t range_length; | 422 | size_t range_length; |
425 | xfs_bufhash_t *hash; | 423 | struct xfs_perag *pag; |
426 | xfs_buf_t *bp, *n; | 424 | struct rb_node **rbp; |
425 | struct rb_node *parent; | ||
426 | xfs_buf_t *bp; | ||
427 | 427 | ||
428 | range_base = (ioff << BBSHIFT); | 428 | range_base = (ioff << BBSHIFT); |
429 | range_length = (isize << BBSHIFT); | 429 | range_length = (isize << BBSHIFT); |
@@ -432,14 +432,37 @@ _xfs_buf_find( | |||
432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); | 432 | ASSERT(!(range_length < (1 << btp->bt_sshift))); |
433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); | 433 | ASSERT(!(range_base & (xfs_off_t)btp->bt_smask)); |
434 | 434 | ||
435 | hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)]; | 435 | /* get tree root */ |
436 | 436 | pag = xfs_perag_get(btp->bt_mount, | |
437 | spin_lock(&hash->bh_lock); | 437 | xfs_daddr_to_agno(btp->bt_mount, ioff)); |
438 | 438 | ||
439 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | 439 | /* walk tree */ |
440 | ASSERT(btp == bp->b_target); | 440 | spin_lock(&pag->pag_buf_lock); |
441 | if (bp->b_file_offset == range_base && | 441 | rbp = &pag->pag_buf_tree.rb_node; |
442 | bp->b_buffer_length == range_length) { | 442 | parent = NULL; |
443 | bp = NULL; | ||
444 | while (*rbp) { | ||
445 | parent = *rbp; | ||
446 | bp = rb_entry(parent, struct xfs_buf, b_rbnode); | ||
447 | |||
448 | if (range_base < bp->b_file_offset) | ||
449 | rbp = &(*rbp)->rb_left; | ||
450 | else if (range_base > bp->b_file_offset) | ||
451 | rbp = &(*rbp)->rb_right; | ||
452 | else { | ||
453 | /* | ||
454 | * found a block offset match. If the range doesn't | ||
455 | * match, the only way this is allowed is if the buffer | ||
456 | * in the cache is stale and the transaction that made | ||
457 | * it stale has not yet committed. i.e. we are | ||
458 | * reallocating a busy extent. Skip this buffer and | ||
459 | * continue searching to the right for an exact match. | ||
460 | */ | ||
461 | if (bp->b_buffer_length != range_length) { | ||
462 | ASSERT(bp->b_flags & XBF_STALE); | ||
463 | rbp = &(*rbp)->rb_right; | ||
464 | continue; | ||
465 | } | ||
443 | atomic_inc(&bp->b_hold); | 466 | atomic_inc(&bp->b_hold); |
444 | goto found; | 467 | goto found; |
445 | } | 468 | } |
@@ -449,17 +472,21 @@ _xfs_buf_find( | |||
449 | if (new_bp) { | 472 | if (new_bp) { |
450 | _xfs_buf_initialize(new_bp, btp, range_base, | 473 | _xfs_buf_initialize(new_bp, btp, range_base, |
451 | range_length, flags); | 474 | range_length, flags); |
452 | new_bp->b_hash = hash; | 475 | rb_link_node(&new_bp->b_rbnode, parent, rbp); |
453 | list_add(&new_bp->b_hash_list, &hash->bh_list); | 476 | rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree); |
477 | /* the buffer keeps the perag reference until it is freed */ | ||
478 | new_bp->b_pag = pag; | ||
479 | spin_unlock(&pag->pag_buf_lock); | ||
454 | } else { | 480 | } else { |
455 | XFS_STATS_INC(xb_miss_locked); | 481 | XFS_STATS_INC(xb_miss_locked); |
482 | spin_unlock(&pag->pag_buf_lock); | ||
483 | xfs_perag_put(pag); | ||
456 | } | 484 | } |
457 | |||
458 | spin_unlock(&hash->bh_lock); | ||
459 | return new_bp; | 485 | return new_bp; |
460 | 486 | ||
461 | found: | 487 | found: |
462 | spin_unlock(&hash->bh_lock); | 488 | spin_unlock(&pag->pag_buf_lock); |
489 | xfs_perag_put(pag); | ||
463 | 490 | ||
464 | /* Attempt to get the semaphore without sleeping, | 491 | /* Attempt to get the semaphore without sleeping, |
465 | * if this does not work then we need to drop the | 492 | * if this does not work then we need to drop the |
@@ -625,8 +652,7 @@ void | |||
625 | xfs_buf_readahead( | 652 | xfs_buf_readahead( |
626 | xfs_buftarg_t *target, | 653 | xfs_buftarg_t *target, |
627 | xfs_off_t ioff, | 654 | xfs_off_t ioff, |
628 | size_t isize, | 655 | size_t isize) |
629 | xfs_buf_flags_t flags) | ||
630 | { | 656 | { |
631 | struct backing_dev_info *bdi; | 657 | struct backing_dev_info *bdi; |
632 | 658 | ||
@@ -634,8 +660,42 @@ xfs_buf_readahead( | |||
634 | if (bdi_read_congested(bdi)) | 660 | if (bdi_read_congested(bdi)) |
635 | return; | 661 | return; |
636 | 662 | ||
637 | flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD); | 663 | xfs_buf_read(target, ioff, isize, |
638 | xfs_buf_read(target, ioff, isize, flags); | 664 | XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK); |
665 | } | ||
666 | |||
667 | /* | ||
668 | * Read an uncached buffer from disk. Allocates and returns a locked | ||
669 | * buffer containing the disk contents or nothing. | ||
670 | */ | ||
671 | struct xfs_buf * | ||
672 | xfs_buf_read_uncached( | ||
673 | struct xfs_mount *mp, | ||
674 | struct xfs_buftarg *target, | ||
675 | xfs_daddr_t daddr, | ||
676 | size_t length, | ||
677 | int flags) | ||
678 | { | ||
679 | xfs_buf_t *bp; | ||
680 | int error; | ||
681 | |||
682 | bp = xfs_buf_get_uncached(target, length, flags); | ||
683 | if (!bp) | ||
684 | return NULL; | ||
685 | |||
686 | /* set up the buffer for a read IO */ | ||
687 | xfs_buf_lock(bp); | ||
688 | XFS_BUF_SET_ADDR(bp, daddr); | ||
689 | XFS_BUF_READ(bp); | ||
690 | XFS_BUF_BUSY(bp); | ||
691 | |||
692 | xfsbdstrat(mp, bp); | ||
693 | error = xfs_buf_iowait(bp); | ||
694 | if (error || bp->b_error) { | ||
695 | xfs_buf_relse(bp); | ||
696 | return NULL; | ||
697 | } | ||
698 | return bp; | ||
639 | } | 699 | } |
640 | 700 | ||
641 | xfs_buf_t * | 701 | xfs_buf_t * |
@@ -707,9 +767,10 @@ xfs_buf_associate_memory( | |||
707 | } | 767 | } |
708 | 768 | ||
709 | xfs_buf_t * | 769 | xfs_buf_t * |
710 | xfs_buf_get_noaddr( | 770 | xfs_buf_get_uncached( |
771 | struct xfs_buftarg *target, | ||
711 | size_t len, | 772 | size_t len, |
712 | xfs_buftarg_t *target) | 773 | int flags) |
713 | { | 774 | { |
714 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; | 775 | unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT; |
715 | int error, i; | 776 | int error, i; |
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr( | |||
725 | goto fail_free_buf; | 786 | goto fail_free_buf; |
726 | 787 | ||
727 | for (i = 0; i < page_count; i++) { | 788 | for (i = 0; i < page_count; i++) { |
728 | bp->b_pages[i] = alloc_page(GFP_KERNEL); | 789 | bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); |
729 | if (!bp->b_pages[i]) | 790 | if (!bp->b_pages[i]) |
730 | goto fail_free_mem; | 791 | goto fail_free_mem; |
731 | } | 792 | } |
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr( | |||
740 | 801 | ||
741 | xfs_buf_unlock(bp); | 802 | xfs_buf_unlock(bp); |
742 | 803 | ||
743 | trace_xfs_buf_get_noaddr(bp, _RET_IP_); | 804 | trace_xfs_buf_get_uncached(bp, _RET_IP_); |
744 | return bp; | 805 | return bp; |
745 | 806 | ||
746 | fail_free_mem: | 807 | fail_free_mem: |
@@ -774,29 +835,30 @@ void | |||
774 | xfs_buf_rele( | 835 | xfs_buf_rele( |
775 | xfs_buf_t *bp) | 836 | xfs_buf_t *bp) |
776 | { | 837 | { |
777 | xfs_bufhash_t *hash = bp->b_hash; | 838 | struct xfs_perag *pag = bp->b_pag; |
778 | 839 | ||
779 | trace_xfs_buf_rele(bp, _RET_IP_); | 840 | trace_xfs_buf_rele(bp, _RET_IP_); |
780 | 841 | ||
781 | if (unlikely(!hash)) { | 842 | if (!pag) { |
782 | ASSERT(!bp->b_relse); | 843 | ASSERT(!bp->b_relse); |
844 | ASSERT(RB_EMPTY_NODE(&bp->b_rbnode)); | ||
783 | if (atomic_dec_and_test(&bp->b_hold)) | 845 | if (atomic_dec_and_test(&bp->b_hold)) |
784 | xfs_buf_free(bp); | 846 | xfs_buf_free(bp); |
785 | return; | 847 | return; |
786 | } | 848 | } |
787 | 849 | ||
850 | ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode)); | ||
788 | ASSERT(atomic_read(&bp->b_hold) > 0); | 851 | ASSERT(atomic_read(&bp->b_hold) > 0); |
789 | if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) { | 852 | if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) { |
790 | if (bp->b_relse) { | 853 | if (bp->b_relse) { |
791 | atomic_inc(&bp->b_hold); | 854 | atomic_inc(&bp->b_hold); |
792 | spin_unlock(&hash->bh_lock); | 855 | spin_unlock(&pag->pag_buf_lock); |
793 | (*(bp->b_relse)) (bp); | 856 | bp->b_relse(bp); |
794 | } else if (bp->b_flags & XBF_FS_MANAGED) { | ||
795 | spin_unlock(&hash->bh_lock); | ||
796 | } else { | 857 | } else { |
797 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); | 858 | ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q))); |
798 | list_del_init(&bp->b_hash_list); | 859 | rb_erase(&bp->b_rbnode, &pag->pag_buf_tree); |
799 | spin_unlock(&hash->bh_lock); | 860 | spin_unlock(&pag->pag_buf_lock); |
861 | xfs_perag_put(pag); | ||
800 | xfs_buf_free(bp); | 862 | xfs_buf_free(bp); |
801 | } | 863 | } |
802 | } | 864 | } |
@@ -859,7 +921,7 @@ xfs_buf_lock( | |||
859 | trace_xfs_buf_lock(bp, _RET_IP_); | 921 | trace_xfs_buf_lock(bp, _RET_IP_); |
860 | 922 | ||
861 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) | 923 | if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE)) |
862 | xfs_log_force(bp->b_mount, 0); | 924 | xfs_log_force(bp->b_target->bt_mount, 0); |
863 | if (atomic_read(&bp->b_io_remaining)) | 925 | if (atomic_read(&bp->b_io_remaining)) |
864 | blk_run_address_space(bp->b_target->bt_mapping); | 926 | blk_run_address_space(bp->b_target->bt_mapping); |
865 | down(&bp->b_sema); | 927 | down(&bp->b_sema); |
@@ -924,19 +986,7 @@ xfs_buf_iodone_work( | |||
924 | xfs_buf_t *bp = | 986 | xfs_buf_t *bp = |
925 | container_of(work, xfs_buf_t, b_iodone_work); | 987 | container_of(work, xfs_buf_t, b_iodone_work); |
926 | 988 | ||
927 | /* | 989 | if (bp->b_iodone) |
928 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | ||
929 | * ordered flag and reissue them. Because we can't tell the higher | ||
930 | * layers directly that they should not issue ordered I/O anymore, they | ||
931 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. | ||
932 | */ | ||
933 | if ((bp->b_error == EOPNOTSUPP) && | ||
934 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | ||
935 | trace_xfs_buf_ordered_retry(bp, _RET_IP_); | ||
936 | bp->b_flags &= ~XBF_ORDERED; | ||
937 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
938 | xfs_buf_iorequest(bp); | ||
939 | } else if (bp->b_iodone) | ||
940 | (*(bp->b_iodone))(bp); | 990 | (*(bp->b_iodone))(bp); |
941 | else if (bp->b_flags & XBF_ASYNC) | 991 | else if (bp->b_flags & XBF_ASYNC) |
942 | xfs_buf_relse(bp); | 992 | xfs_buf_relse(bp); |
@@ -982,7 +1032,6 @@ xfs_bwrite( | |||
982 | { | 1032 | { |
983 | int error; | 1033 | int error; |
984 | 1034 | ||
985 | bp->b_mount = mp; | ||
986 | bp->b_flags |= XBF_WRITE; | 1035 | bp->b_flags |= XBF_WRITE; |
987 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); | 1036 | bp->b_flags &= ~(XBF_ASYNC | XBF_READ); |
988 | 1037 | ||
@@ -1003,8 +1052,6 @@ xfs_bdwrite( | |||
1003 | { | 1052 | { |
1004 | trace_xfs_buf_bdwrite(bp, _RET_IP_); | 1053 | trace_xfs_buf_bdwrite(bp, _RET_IP_); |
1005 | 1054 | ||
1006 | bp->b_mount = mp; | ||
1007 | |||
1008 | bp->b_flags &= ~XBF_READ; | 1055 | bp->b_flags &= ~XBF_READ; |
1009 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); | 1056 | bp->b_flags |= (XBF_DELWRI | XBF_ASYNC); |
1010 | 1057 | ||
@@ -1013,7 +1060,7 @@ xfs_bdwrite( | |||
1013 | 1060 | ||
1014 | /* | 1061 | /* |
1015 | * Called when we want to stop a buffer from getting written or read. | 1062 | * Called when we want to stop a buffer from getting written or read. |
1016 | * We attach the EIO error, muck with its flags, and call biodone | 1063 | * We attach the EIO error, muck with its flags, and call xfs_buf_ioend |
1017 | * so that the proper iodone callbacks get called. | 1064 | * so that the proper iodone callbacks get called. |
1018 | */ | 1065 | */ |
1019 | STATIC int | 1066 | STATIC int |
@@ -1030,21 +1077,21 @@ xfs_bioerror( | |||
1030 | XFS_BUF_ERROR(bp, EIO); | 1077 | XFS_BUF_ERROR(bp, EIO); |
1031 | 1078 | ||
1032 | /* | 1079 | /* |
1033 | * We're calling biodone, so delete XBF_DONE flag. | 1080 | * We're calling xfs_buf_ioend, so delete XBF_DONE flag. |
1034 | */ | 1081 | */ |
1035 | XFS_BUF_UNREAD(bp); | 1082 | XFS_BUF_UNREAD(bp); |
1036 | XFS_BUF_UNDELAYWRITE(bp); | 1083 | XFS_BUF_UNDELAYWRITE(bp); |
1037 | XFS_BUF_UNDONE(bp); | 1084 | XFS_BUF_UNDONE(bp); |
1038 | XFS_BUF_STALE(bp); | 1085 | XFS_BUF_STALE(bp); |
1039 | 1086 | ||
1040 | xfs_biodone(bp); | 1087 | xfs_buf_ioend(bp, 0); |
1041 | 1088 | ||
1042 | return EIO; | 1089 | return EIO; |
1043 | } | 1090 | } |
1044 | 1091 | ||
1045 | /* | 1092 | /* |
1046 | * Same as xfs_bioerror, except that we are releasing the buffer | 1093 | * Same as xfs_bioerror, except that we are releasing the buffer |
1047 | * here ourselves, and avoiding the biodone call. | 1094 | * here ourselves, and avoiding the xfs_buf_ioend call. |
1048 | * This is meant for userdata errors; metadata bufs come with | 1095 | * This is meant for userdata errors; metadata bufs come with |
1049 | * iodone functions attached, so that we can track down errors. | 1096 | * iodone functions attached, so that we can track down errors. |
1050 | */ | 1097 | */ |
@@ -1093,7 +1140,7 @@ int | |||
1093 | xfs_bdstrat_cb( | 1140 | xfs_bdstrat_cb( |
1094 | struct xfs_buf *bp) | 1141 | struct xfs_buf *bp) |
1095 | { | 1142 | { |
1096 | if (XFS_FORCED_SHUTDOWN(bp->b_mount)) { | 1143 | if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) { |
1097 | trace_xfs_bdstrat_shut(bp, _RET_IP_); | 1144 | trace_xfs_bdstrat_shut(bp, _RET_IP_); |
1098 | /* | 1145 | /* |
1099 | * Metadata write that didn't get logged but | 1146 | * Metadata write that didn't get logged but |
@@ -1195,7 +1242,7 @@ _xfs_buf_ioapply( | |||
1195 | 1242 | ||
1196 | if (bp->b_flags & XBF_ORDERED) { | 1243 | if (bp->b_flags & XBF_ORDERED) { |
1197 | ASSERT(!(bp->b_flags & XBF_READ)); | 1244 | ASSERT(!(bp->b_flags & XBF_READ)); |
1198 | rw = WRITE_BARRIER; | 1245 | rw = WRITE_FLUSH_FUA; |
1199 | } else if (bp->b_flags & XBF_LOG_BUFFER) { | 1246 | } else if (bp->b_flags & XBF_LOG_BUFFER) { |
1200 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); | 1247 | ASSERT(!(bp->b_flags & XBF_READ_AHEAD)); |
1201 | bp->b_flags &= ~_XBF_RUN_QUEUES; | 1248 | bp->b_flags &= ~_XBF_RUN_QUEUES; |
@@ -1399,62 +1446,24 @@ xfs_buf_iomove( | |||
1399 | */ | 1446 | */ |
1400 | void | 1447 | void |
1401 | xfs_wait_buftarg( | 1448 | xfs_wait_buftarg( |
1402 | xfs_buftarg_t *btp) | 1449 | struct xfs_buftarg *btp) |
1403 | { | ||
1404 | xfs_buf_t *bp, *n; | ||
1405 | xfs_bufhash_t *hash; | ||
1406 | uint i; | ||
1407 | |||
1408 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | ||
1409 | hash = &btp->bt_hash[i]; | ||
1410 | again: | ||
1411 | spin_lock(&hash->bh_lock); | ||
1412 | list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) { | ||
1413 | ASSERT(btp == bp->b_target); | ||
1414 | if (!(bp->b_flags & XBF_FS_MANAGED)) { | ||
1415 | spin_unlock(&hash->bh_lock); | ||
1416 | /* | ||
1417 | * Catch superblock reference count leaks | ||
1418 | * immediately | ||
1419 | */ | ||
1420 | BUG_ON(bp->b_bn == 0); | ||
1421 | delay(100); | ||
1422 | goto again; | ||
1423 | } | ||
1424 | } | ||
1425 | spin_unlock(&hash->bh_lock); | ||
1426 | } | ||
1427 | } | ||
1428 | |||
1429 | /* | ||
1430 | * Allocate buffer hash table for a given target. | ||
1431 | * For devices containing metadata (i.e. not the log/realtime devices) | ||
1432 | * we need to allocate a much larger hash table. | ||
1433 | */ | ||
1434 | STATIC void | ||
1435 | xfs_alloc_bufhash( | ||
1436 | xfs_buftarg_t *btp, | ||
1437 | int external) | ||
1438 | { | 1450 | { |
1439 | unsigned int i; | 1451 | struct xfs_perag *pag; |
1452 | uint i; | ||
1440 | 1453 | ||
1441 | btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */ | 1454 | for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) { |
1442 | btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) * | 1455 | pag = xfs_perag_get(btp->bt_mount, i); |
1443 | sizeof(xfs_bufhash_t)); | 1456 | spin_lock(&pag->pag_buf_lock); |
1444 | for (i = 0; i < (1 << btp->bt_hashshift); i++) { | 1457 | while (rb_first(&pag->pag_buf_tree)) { |
1445 | spin_lock_init(&btp->bt_hash[i].bh_lock); | 1458 | spin_unlock(&pag->pag_buf_lock); |
1446 | INIT_LIST_HEAD(&btp->bt_hash[i].bh_list); | 1459 | delay(100); |
1460 | spin_lock(&pag->pag_buf_lock); | ||
1461 | } | ||
1462 | spin_unlock(&pag->pag_buf_lock); | ||
1463 | xfs_perag_put(pag); | ||
1447 | } | 1464 | } |
1448 | } | 1465 | } |
1449 | 1466 | ||
1450 | STATIC void | ||
1451 | xfs_free_bufhash( | ||
1452 | xfs_buftarg_t *btp) | ||
1453 | { | ||
1454 | kmem_free_large(btp->bt_hash); | ||
1455 | btp->bt_hash = NULL; | ||
1456 | } | ||
1457 | |||
1458 | /* | 1467 | /* |
1459 | * buftarg list for delwrite queue processing | 1468 | * buftarg list for delwrite queue processing |
1460 | */ | 1469 | */ |
@@ -1487,7 +1496,6 @@ xfs_free_buftarg( | |||
1487 | xfs_flush_buftarg(btp, 1); | 1496 | xfs_flush_buftarg(btp, 1); |
1488 | if (mp->m_flags & XFS_MOUNT_BARRIER) | 1497 | if (mp->m_flags & XFS_MOUNT_BARRIER) |
1489 | xfs_blkdev_issue_flush(btp); | 1498 | xfs_blkdev_issue_flush(btp); |
1490 | xfs_free_bufhash(btp); | ||
1491 | iput(btp->bt_mapping->host); | 1499 | iput(btp->bt_mapping->host); |
1492 | 1500 | ||
1493 | /* Unregister the buftarg first so that we don't get a | 1501 | /* Unregister the buftarg first so that we don't get a |
@@ -1572,6 +1580,7 @@ xfs_mapping_buftarg( | |||
1572 | XFS_BUFTARG_NAME(btp)); | 1580 | XFS_BUFTARG_NAME(btp)); |
1573 | return ENOMEM; | 1581 | return ENOMEM; |
1574 | } | 1582 | } |
1583 | inode->i_ino = get_next_ino(); | ||
1575 | inode->i_mode = S_IFBLK; | 1584 | inode->i_mode = S_IFBLK; |
1576 | inode->i_bdev = bdev; | 1585 | inode->i_bdev = bdev; |
1577 | inode->i_rdev = bdev->bd_dev; | 1586 | inode->i_rdev = bdev->bd_dev; |
@@ -1609,6 +1618,7 @@ out_error: | |||
1609 | 1618 | ||
1610 | xfs_buftarg_t * | 1619 | xfs_buftarg_t * |
1611 | xfs_alloc_buftarg( | 1620 | xfs_alloc_buftarg( |
1621 | struct xfs_mount *mp, | ||
1612 | struct block_device *bdev, | 1622 | struct block_device *bdev, |
1613 | int external, | 1623 | int external, |
1614 | const char *fsname) | 1624 | const char *fsname) |
@@ -1617,6 +1627,7 @@ xfs_alloc_buftarg( | |||
1617 | 1627 | ||
1618 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); | 1628 | btp = kmem_zalloc(sizeof(*btp), KM_SLEEP); |
1619 | 1629 | ||
1630 | btp->bt_mount = mp; | ||
1620 | btp->bt_dev = bdev->bd_dev; | 1631 | btp->bt_dev = bdev->bd_dev; |
1621 | btp->bt_bdev = bdev; | 1632 | btp->bt_bdev = bdev; |
1622 | if (xfs_setsize_buftarg_early(btp, bdev)) | 1633 | if (xfs_setsize_buftarg_early(btp, bdev)) |
@@ -1625,7 +1636,6 @@ xfs_alloc_buftarg( | |||
1625 | goto error; | 1636 | goto error; |
1626 | if (xfs_alloc_delwrite_queue(btp, fsname)) | 1637 | if (xfs_alloc_delwrite_queue(btp, fsname)) |
1627 | goto error; | 1638 | goto error; |
1628 | xfs_alloc_bufhash(btp, external); | ||
1629 | return btp; | 1639 | return btp; |
1630 | 1640 | ||
1631 | error: | 1641 | error: |
@@ -1916,7 +1926,7 @@ xfs_flush_buftarg( | |||
1916 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); | 1926 | bp = list_first_entry(&wait_list, struct xfs_buf, b_list); |
1917 | 1927 | ||
1918 | list_del_init(&bp->b_list); | 1928 | list_del_init(&bp->b_list); |
1919 | xfs_iowait(bp); | 1929 | xfs_buf_iowait(bp); |
1920 | xfs_buf_relse(bp); | 1930 | xfs_buf_relse(bp); |
1921 | } | 1931 | } |
1922 | } | 1932 | } |
@@ -1933,7 +1943,7 @@ xfs_buf_init(void) | |||
1933 | goto out; | 1943 | goto out; |
1934 | 1944 | ||
1935 | xfslogd_workqueue = alloc_workqueue("xfslogd", | 1945 | xfslogd_workqueue = alloc_workqueue("xfslogd", |
1936 | WQ_RESCUER | WQ_HIGHPRI, 1); | 1946 | WQ_MEM_RECLAIM | WQ_HIGHPRI, 1); |
1937 | if (!xfslogd_workqueue) | 1947 | if (!xfslogd_workqueue) |
1938 | goto out_free_buf_zone; | 1948 | goto out_free_buf_zone; |
1939 | 1949 | ||
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index 2a05614f0b92..383a3f37cf98 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
@@ -51,7 +51,6 @@ typedef enum { | |||
51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ | 51 | #define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */ |
52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ | 52 | #define XBF_DELWRI (1 << 6) /* buffer has dirty pages */ |
53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ | 53 | #define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */ |
54 | #define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */ | ||
55 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ | 54 | #define XBF_ORDERED (1 << 11)/* use ordered writes */ |
56 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ | 55 | #define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */ |
57 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ | 56 | #define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */ |
@@ -86,14 +85,6 @@ typedef enum { | |||
86 | */ | 85 | */ |
87 | #define _XBF_PAGE_LOCKED (1 << 22) | 86 | #define _XBF_PAGE_LOCKED (1 << 22) |
88 | 87 | ||
89 | /* | ||
90 | * If we try a barrier write, but it fails we have to communicate | ||
91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
92 | * when the buffer is re-issued so we have to add another flag to | ||
93 | * keep this information. | ||
94 | */ | ||
95 | #define _XFS_BARRIER_FAILED (1 << 23) | ||
96 | |||
97 | typedef unsigned int xfs_buf_flags_t; | 88 | typedef unsigned int xfs_buf_flags_t; |
98 | 89 | ||
99 | #define XFS_BUF_FLAGS \ | 90 | #define XFS_BUF_FLAGS \ |
@@ -104,7 +95,6 @@ typedef unsigned int xfs_buf_flags_t; | |||
104 | { XBF_DONE, "DONE" }, \ | 95 | { XBF_DONE, "DONE" }, \ |
105 | { XBF_DELWRI, "DELWRI" }, \ | 96 | { XBF_DELWRI, "DELWRI" }, \ |
106 | { XBF_STALE, "STALE" }, \ | 97 | { XBF_STALE, "STALE" }, \ |
107 | { XBF_FS_MANAGED, "FS_MANAGED" }, \ | ||
108 | { XBF_ORDERED, "ORDERED" }, \ | 98 | { XBF_ORDERED, "ORDERED" }, \ |
109 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ | 99 | { XBF_READ_AHEAD, "READ_AHEAD" }, \ |
110 | { XBF_LOCK, "LOCK" }, /* should never be set */\ | 100 | { XBF_LOCK, "LOCK" }, /* should never be set */\ |
@@ -114,8 +104,7 @@ typedef unsigned int xfs_buf_flags_t; | |||
114 | { _XBF_PAGES, "PAGES" }, \ | 104 | { _XBF_PAGES, "PAGES" }, \ |
115 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ | 105 | { _XBF_RUN_QUEUES, "RUN_QUEUES" }, \ |
116 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ | 106 | { _XBF_DELWRI_Q, "DELWRI_Q" }, \ |
117 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \ | 107 | { _XBF_PAGE_LOCKED, "PAGE_LOCKED" } |
118 | { _XFS_BARRIER_FAILED, "BARRIER_FAILED" } | ||
119 | 108 | ||
120 | 109 | ||
121 | typedef enum { | 110 | typedef enum { |
@@ -132,14 +121,11 @@ typedef struct xfs_buftarg { | |||
132 | dev_t bt_dev; | 121 | dev_t bt_dev; |
133 | struct block_device *bt_bdev; | 122 | struct block_device *bt_bdev; |
134 | struct address_space *bt_mapping; | 123 | struct address_space *bt_mapping; |
124 | struct xfs_mount *bt_mount; | ||
135 | unsigned int bt_bsize; | 125 | unsigned int bt_bsize; |
136 | unsigned int bt_sshift; | 126 | unsigned int bt_sshift; |
137 | size_t bt_smask; | 127 | size_t bt_smask; |
138 | 128 | ||
139 | /* per device buffer hash table */ | ||
140 | uint bt_hashshift; | ||
141 | xfs_bufhash_t *bt_hash; | ||
142 | |||
143 | /* per device delwri queue */ | 129 | /* per device delwri queue */ |
144 | struct task_struct *bt_task; | 130 | struct task_struct *bt_task; |
145 | struct list_head bt_list; | 131 | struct list_head bt_list; |
@@ -167,34 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *); | |||
167 | #define XB_PAGES 2 | 153 | #define XB_PAGES 2 |
168 | 154 | ||
169 | typedef struct xfs_buf { | 155 | typedef struct xfs_buf { |
156 | /* | ||
157 | * first cacheline holds all the fields needed for an uncontended cache | ||
158 | * hit to be fully processed. The semaphore straddles the cacheline | ||
159 | * boundary, but the counter and lock sits on the first cacheline, | ||
160 | * which is the only bit that is touched if we hit the semaphore | ||
161 | * fast-path on locking. | ||
162 | */ | ||
163 | struct rb_node b_rbnode; /* rbtree node */ | ||
164 | xfs_off_t b_file_offset; /* offset in file */ | ||
165 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
166 | atomic_t b_hold; /* reference count */ | ||
167 | xfs_buf_flags_t b_flags; /* status flags */ | ||
170 | struct semaphore b_sema; /* semaphore for lockables */ | 168 | struct semaphore b_sema; /* semaphore for lockables */ |
171 | unsigned long b_queuetime; /* time buffer was queued */ | 169 | |
172 | atomic_t b_pin_count; /* pin count */ | ||
173 | wait_queue_head_t b_waiters; /* unpin waiters */ | 170 | wait_queue_head_t b_waiters; /* unpin waiters */ |
174 | struct list_head b_list; | 171 | struct list_head b_list; |
175 | xfs_buf_flags_t b_flags; /* status flags */ | 172 | struct xfs_perag *b_pag; /* contains rbtree root */ |
176 | struct list_head b_hash_list; /* hash table list */ | ||
177 | xfs_bufhash_t *b_hash; /* hash table list start */ | ||
178 | xfs_buftarg_t *b_target; /* buffer target (device) */ | 173 | xfs_buftarg_t *b_target; /* buffer target (device) */ |
179 | atomic_t b_hold; /* reference count */ | ||
180 | xfs_daddr_t b_bn; /* block number for I/O */ | 174 | xfs_daddr_t b_bn; /* block number for I/O */ |
181 | xfs_off_t b_file_offset; /* offset in file */ | ||
182 | size_t b_buffer_length;/* size of buffer in bytes */ | ||
183 | size_t b_count_desired;/* desired transfer size */ | 175 | size_t b_count_desired;/* desired transfer size */ |
184 | void *b_addr; /* virtual address of buffer */ | 176 | void *b_addr; /* virtual address of buffer */ |
185 | struct work_struct b_iodone_work; | 177 | struct work_struct b_iodone_work; |
186 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
187 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ | 178 | xfs_buf_iodone_t b_iodone; /* I/O completion function */ |
188 | xfs_buf_relse_t b_relse; /* releasing function */ | 179 | xfs_buf_relse_t b_relse; /* releasing function */ |
189 | struct completion b_iowait; /* queue for I/O waiters */ | 180 | struct completion b_iowait; /* queue for I/O waiters */ |
190 | void *b_fspriv; | 181 | void *b_fspriv; |
191 | void *b_fspriv2; | 182 | void *b_fspriv2; |
192 | struct xfs_mount *b_mount; | ||
193 | unsigned short b_error; /* error code on I/O */ | ||
194 | unsigned int b_page_count; /* size of page array */ | ||
195 | unsigned int b_offset; /* page offset in first page */ | ||
196 | struct page **b_pages; /* array of page pointers */ | 183 | struct page **b_pages; /* array of page pointers */ |
197 | struct page *b_page_array[XB_PAGES]; /* inline pages */ | 184 | struct page *b_page_array[XB_PAGES]; /* inline pages */ |
185 | unsigned long b_queuetime; /* time buffer was queued */ | ||
186 | atomic_t b_pin_count; /* pin count */ | ||
187 | atomic_t b_io_remaining; /* #outstanding I/O requests */ | ||
188 | unsigned int b_page_count; /* size of page array */ | ||
189 | unsigned int b_offset; /* page offset in first page */ | ||
190 | unsigned short b_error; /* error code on I/O */ | ||
198 | #ifdef XFS_BUF_LOCK_TRACKING | 191 | #ifdef XFS_BUF_LOCK_TRACKING |
199 | int b_last_holder; | 192 | int b_last_holder; |
200 | #endif | 193 | #endif |
@@ -213,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | |||
213 | xfs_buf_flags_t); | 206 | xfs_buf_flags_t); |
214 | 207 | ||
215 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 208 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
216 | extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *); | 209 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); |
217 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 210 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
218 | extern void xfs_buf_hold(xfs_buf_t *); | 211 | extern void xfs_buf_hold(xfs_buf_t *); |
219 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t, | 212 | extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t); |
220 | xfs_buf_flags_t); | 213 | struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp, |
214 | struct xfs_buftarg *target, | ||
215 | xfs_daddr_t daddr, size_t length, int flags); | ||
221 | 216 | ||
222 | /* Releasing Buffers */ | 217 | /* Releasing Buffers */ |
223 | extern void xfs_buf_free(xfs_buf_t *); | 218 | extern void xfs_buf_free(xfs_buf_t *); |
@@ -242,6 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *); | |||
242 | extern int xfs_buf_iowait(xfs_buf_t *); | 237 | extern int xfs_buf_iowait(xfs_buf_t *); |
243 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, | 238 | extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *, |
244 | xfs_buf_rw_t); | 239 | xfs_buf_rw_t); |
240 | #define xfs_buf_zero(bp, off, len) \ | ||
241 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
245 | 242 | ||
246 | static inline int xfs_buf_geterror(xfs_buf_t *bp) | 243 | static inline int xfs_buf_geterror(xfs_buf_t *bp) |
247 | { | 244 | { |
@@ -276,8 +273,6 @@ extern void xfs_buf_terminate(void); | |||
276 | XFS_BUF_DONE(bp); \ | 273 | XFS_BUF_DONE(bp); \ |
277 | } while (0) | 274 | } while (0) |
278 | 275 | ||
279 | #define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED) | ||
280 | |||
281 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) | 276 | #define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI) |
282 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) | 277 | #define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp) |
283 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) | 278 | #define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI) |
@@ -356,25 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) | |||
356 | xfs_buf_rele(bp); | 351 | xfs_buf_rele(bp); |
357 | } | 352 | } |
358 | 353 | ||
359 | #define xfs_biodone(bp) xfs_buf_ioend(bp, 0) | ||
360 | |||
361 | #define xfs_biomove(bp, off, len, data, rw) \ | ||
362 | xfs_buf_iomove((bp), (off), (len), (data), \ | ||
363 | ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ) | ||
364 | |||
365 | #define xfs_biozero(bp, off, len) \ | ||
366 | xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) | ||
367 | |||
368 | #define xfs_iowait(bp) xfs_buf_iowait(bp) | ||
369 | |||
370 | #define xfs_baread(target, rablkno, ralen) \ | ||
371 | xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK) | ||
372 | |||
373 | |||
374 | /* | 354 | /* |
375 | * Handling of buftargs. | 355 | * Handling of buftargs. |
376 | */ | 356 | */ |
377 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *); | 357 | extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, |
358 | struct block_device *, int, const char *); | ||
378 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); | 359 | extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *); |
379 | extern void xfs_wait_buftarg(xfs_buftarg_t *); | 360 | extern void xfs_wait_buftarg(xfs_buftarg_t *); |
380 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); | 361 | extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); |
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h deleted file mode 100644 index 55bddf3b6091..000000000000 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ /dev/null | |||
@@ -1,28 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_CRED_H__ | ||
19 | #define __XFS_CRED_H__ | ||
20 | |||
21 | #include <linux/capability.h> | ||
22 | |||
23 | /* | ||
24 | * Credentials | ||
25 | */ | ||
26 | typedef const struct cred cred_t; | ||
27 | |||
28 | #endif /* __XFS_CRED_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c index 1f279b012f94..ed88ed16811c 100644 --- a/fs/xfs/linux-2.6/xfs_fs_subr.c +++ b/fs/xfs/linux-2.6/xfs_fs_subr.c | |||
@@ -32,10 +32,9 @@ xfs_tosspages( | |||
32 | xfs_off_t last, | 32 | xfs_off_t last, |
33 | int fiopt) | 33 | int fiopt) |
34 | { | 34 | { |
35 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 35 | /* can't toss partial tail pages, so mask them out */ |
36 | 36 | last &= ~(PAGE_SIZE - 1); | |
37 | if (mapping->nrpages) | 37 | truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1); |
38 | truncate_inode_pages(mapping, first); | ||
39 | } | 38 | } |
40 | 39 | ||
41 | int | 40 | int |
@@ -50,12 +49,11 @@ xfs_flushinval_pages( | |||
50 | 49 | ||
51 | trace_xfs_pagecache_inval(ip, first, last); | 50 | trace_xfs_pagecache_inval(ip, first, last); |
52 | 51 | ||
53 | if (mapping->nrpages) { | 52 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
54 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 53 | ret = filemap_write_and_wait_range(mapping, first, |
55 | ret = filemap_write_and_wait(mapping); | 54 | last == -1 ? LLONG_MAX : last); |
56 | if (!ret) | 55 | if (!ret) |
57 | truncate_inode_pages(mapping, first); | 56 | truncate_inode_pages_range(mapping, first, last); |
58 | } | ||
59 | return -ret; | 57 | return -ret; |
60 | } | 58 | } |
61 | 59 | ||
@@ -71,10 +69,9 @@ xfs_flush_pages( | |||
71 | int ret = 0; | 69 | int ret = 0; |
72 | int ret2; | 70 | int ret2; |
73 | 71 | ||
74 | if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | 72 | xfs_iflags_clear(ip, XFS_ITRUNCATED); |
75 | xfs_iflags_clear(ip, XFS_ITRUNCATED); | 73 | ret = -filemap_fdatawrite_range(mapping, first, |
76 | ret = -filemap_fdatawrite(mapping); | 74 | last == -1 ? LLONG_MAX : last); |
77 | } | ||
78 | if (flags & XBF_ASYNC) | 75 | if (flags & XBF_ASYNC) |
79 | return ret; | 76 | return ret; |
80 | ret2 = xfs_wait_on_pages(ip, first, last); | 77 | ret2 = xfs_wait_on_pages(ip, first, last); |
@@ -91,7 +88,9 @@ xfs_wait_on_pages( | |||
91 | { | 88 | { |
92 | struct address_space *mapping = VFS_I(ip)->i_mapping; | 89 | struct address_space *mapping = VFS_I(ip)->i_mapping; |
93 | 90 | ||
94 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) | 91 | if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) { |
95 | return -filemap_fdatawait(mapping); | 92 | return -filemap_fdatawait_range(mapping, first, |
93 | last == -1 ? ip->i_size - 1 : last); | ||
94 | } | ||
96 | return 0; | 95 | return 0; |
97 | } | 96 | } |
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c index 2ae8b1ccb02e..76e81cff70b9 100644 --- a/fs/xfs/linux-2.6/xfs_globals.c +++ b/fs/xfs/linux-2.6/xfs_globals.c | |||
@@ -16,7 +16,6 @@ | |||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | 16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
17 | */ | 17 | */ |
18 | #include "xfs.h" | 18 | #include "xfs.h" |
19 | #include "xfs_cred.h" | ||
20 | #include "xfs_sysctl.h" | 19 | #include "xfs_sysctl.h" |
21 | 20 | ||
22 | /* | 21 | /* |
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h deleted file mode 100644 index 69f71caf061c..000000000000 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ /dev/null | |||
@@ -1,23 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_GLOBALS_H__ | ||
19 | #define __XFS_GLOBALS_H__ | ||
20 | |||
21 | extern uint64_t xfs_panic_mask; /* set to cause more panics */ | ||
22 | |||
23 | #endif /* __XFS_GLOBALS_H__ */ | ||
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c index 3b9e626f7cd1..2ea238f6d38e 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl.c +++ b/fs/xfs/linux-2.6/xfs_ioctl.c | |||
@@ -790,7 +790,7 @@ xfs_ioc_fsgetxattr( | |||
790 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 790 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
791 | fa.fsx_xflags = xfs_ip2xflags(ip); | 791 | fa.fsx_xflags = xfs_ip2xflags(ip); |
792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; | 792 | fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog; |
793 | fa.fsx_projid = ip->i_d.di_projid; | 793 | fa.fsx_projid = xfs_get_projid(ip); |
794 | 794 | ||
795 | if (attr) { | 795 | if (attr) { |
796 | if (ip->i_afp) { | 796 | if (ip->i_afp) { |
@@ -909,10 +909,10 @@ xfs_ioctl_setattr( | |||
909 | return XFS_ERROR(EIO); | 909 | return XFS_ERROR(EIO); |
910 | 910 | ||
911 | /* | 911 | /* |
912 | * Disallow 32bit project ids because on-disk structure | 912 | * Disallow 32bit project ids when projid32bit feature is not enabled. |
913 | * is 16bit only. | ||
914 | */ | 913 | */ |
915 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1)) | 914 | if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) && |
915 | !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb)) | ||
916 | return XFS_ERROR(EINVAL); | 916 | return XFS_ERROR(EINVAL); |
917 | 917 | ||
918 | /* | 918 | /* |
@@ -961,7 +961,7 @@ xfs_ioctl_setattr( | |||
961 | if (mask & FSX_PROJID) { | 961 | if (mask & FSX_PROJID) { |
962 | if (XFS_IS_QUOTA_RUNNING(mp) && | 962 | if (XFS_IS_QUOTA_RUNNING(mp) && |
963 | XFS_IS_PQUOTA_ON(mp) && | 963 | XFS_IS_PQUOTA_ON(mp) && |
964 | ip->i_d.di_projid != fa->fsx_projid) { | 964 | xfs_get_projid(ip) != fa->fsx_projid) { |
965 | ASSERT(tp); | 965 | ASSERT(tp); |
966 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, | 966 | code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp, |
967 | capable(CAP_FOWNER) ? | 967 | capable(CAP_FOWNER) ? |
@@ -1063,12 +1063,12 @@ xfs_ioctl_setattr( | |||
1063 | * Change the ownerships and register quota modifications | 1063 | * Change the ownerships and register quota modifications |
1064 | * in the transaction. | 1064 | * in the transaction. |
1065 | */ | 1065 | */ |
1066 | if (ip->i_d.di_projid != fa->fsx_projid) { | 1066 | if (xfs_get_projid(ip) != fa->fsx_projid) { |
1067 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { | 1067 | if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) { |
1068 | olddquot = xfs_qm_vop_chown(tp, ip, | 1068 | olddquot = xfs_qm_vop_chown(tp, ip, |
1069 | &ip->i_gdquot, gdqp); | 1069 | &ip->i_gdquot, gdqp); |
1070 | } | 1070 | } |
1071 | ip->i_d.di_projid = fa->fsx_projid; | 1071 | xfs_set_projid(ip, fa->fsx_projid); |
1072 | 1072 | ||
1073 | /* | 1073 | /* |
1074 | * We may have to rev the inode as well as | 1074 | * We may have to rev the inode as well as |
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr( | |||
1088 | xfs_diflags_to_linux(ip); | 1088 | xfs_diflags_to_linux(ip); |
1089 | } | 1089 | } |
1090 | 1090 | ||
1091 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | ||
1091 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | 1092 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); |
1092 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | ||
1093 | 1093 | ||
1094 | XFS_STATS_INC(xs_ig_attrchg); | 1094 | XFS_STATS_INC(xs_ig_attrchg); |
1095 | 1095 | ||
@@ -1301,7 +1301,8 @@ xfs_file_ioctl( | |||
1301 | case XFS_IOC_ALLOCSP64: | 1301 | case XFS_IOC_ALLOCSP64: |
1302 | case XFS_IOC_FREESP64: | 1302 | case XFS_IOC_FREESP64: |
1303 | case XFS_IOC_RESVSP64: | 1303 | case XFS_IOC_RESVSP64: |
1304 | case XFS_IOC_UNRESVSP64: { | 1304 | case XFS_IOC_UNRESVSP64: |
1305 | case XFS_IOC_ZERO_RANGE: { | ||
1305 | xfs_flock64_t bf; | 1306 | xfs_flock64_t bf; |
1306 | 1307 | ||
1307 | if (copy_from_user(&bf, arg, sizeof(bf))) | 1308 | if (copy_from_user(&bf, arg, sizeof(bf))) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index 6c83f7f62dc9..b3486dfa5520 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin( | |||
164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || | 164 | get_user(bstat->bs_extsize, &bstat32->bs_extsize) || |
165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || | 165 | get_user(bstat->bs_extents, &bstat32->bs_extents) || |
166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || | 166 | get_user(bstat->bs_gen, &bstat32->bs_gen) || |
167 | get_user(bstat->bs_projid, &bstat32->bs_projid) || | 167 | get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) || |
168 | get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) || | ||
168 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || | 169 | get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) || |
169 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || | 170 | get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) || |
170 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) | 171 | get_user(bstat->bs_aextents, &bstat32->bs_aextents)) |
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat( | |||
218 | put_user(buffer->bs_extents, &p32->bs_extents) || | 219 | put_user(buffer->bs_extents, &p32->bs_extents) || |
219 | put_user(buffer->bs_gen, &p32->bs_gen) || | 220 | put_user(buffer->bs_gen, &p32->bs_gen) || |
220 | put_user(buffer->bs_projid, &p32->bs_projid) || | 221 | put_user(buffer->bs_projid, &p32->bs_projid) || |
222 | put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) || | ||
221 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || | 223 | put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) || |
222 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || | 224 | put_user(buffer->bs_dmstate, &p32->bs_dmstate) || |
223 | put_user(buffer->bs_aextents, &p32->bs_aextents)) | 225 | put_user(buffer->bs_aextents, &p32->bs_aextents)) |
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl( | |||
574 | case XFS_IOC_FSGEOMETRY_V1: | 576 | case XFS_IOC_FSGEOMETRY_V1: |
575 | case XFS_IOC_FSGROWFSDATA: | 577 | case XFS_IOC_FSGROWFSDATA: |
576 | case XFS_IOC_FSGROWFSRT: | 578 | case XFS_IOC_FSGROWFSRT: |
579 | case XFS_IOC_ZERO_RANGE: | ||
577 | return xfs_file_ioctl(filp, cmd, p); | 580 | return xfs_file_ioctl(filp, cmd, p); |
578 | #else | 581 | #else |
579 | case XFS_IOC_ALLOCSP_32: | 582 | case XFS_IOC_ALLOCSP_32: |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 1024c4f8ba0d..08b605792a99 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat { | |||
65 | __s32 bs_extsize; /* extent size */ | 65 | __s32 bs_extsize; /* extent size */ |
66 | __s32 bs_extents; /* number of extents */ | 66 | __s32 bs_extents; /* number of extents */ |
67 | __u32 bs_gen; /* generation count */ | 67 | __u32 bs_gen; /* generation count */ |
68 | __u16 bs_projid; /* project id */ | 68 | __u16 bs_projid_lo; /* lower part of project id */ |
69 | unsigned char bs_pad[14]; /* pad space, unused */ | 69 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ |
70 | __u16 bs_projid_hi; /* high part of project id */ | ||
71 | unsigned char bs_pad[12]; /* pad space, unused */ | ||
70 | __u32 bs_dmevmask; /* DMIG event mask */ | 72 | __u32 bs_dmevmask; /* DMIG event mask */ |
71 | __u16 bs_dmstate; /* DMIG state info */ | 73 | __u16 bs_dmstate; /* DMIG state info */ |
72 | __u16 bs_aextents; /* attribute number of extents */ | 74 | __u16 bs_aextents; /* attribute number of extents */ |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index b1fc2a6bfe83..96107efc0c61 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -95,41 +95,6 @@ xfs_mark_inode_dirty( | |||
95 | } | 95 | } |
96 | 96 | ||
97 | /* | 97 | /* |
98 | * Change the requested timestamp in the given inode. | ||
99 | * We don't lock across timestamp updates, and we don't log them but | ||
100 | * we do record the fact that there is dirty information in core. | ||
101 | */ | ||
102 | void | ||
103 | xfs_ichgtime( | ||
104 | xfs_inode_t *ip, | ||
105 | int flags) | ||
106 | { | ||
107 | struct inode *inode = VFS_I(ip); | ||
108 | timespec_t tv; | ||
109 | int sync_it = 0; | ||
110 | |||
111 | tv = current_fs_time(inode->i_sb); | ||
112 | |||
113 | if ((flags & XFS_ICHGTIME_MOD) && | ||
114 | !timespec_equal(&inode->i_mtime, &tv)) { | ||
115 | inode->i_mtime = tv; | ||
116 | sync_it = 1; | ||
117 | } | ||
118 | if ((flags & XFS_ICHGTIME_CHG) && | ||
119 | !timespec_equal(&inode->i_ctime, &tv)) { | ||
120 | inode->i_ctime = tv; | ||
121 | sync_it = 1; | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * Update complete - now make sure everyone knows that the inode | ||
126 | * is dirty. | ||
127 | */ | ||
128 | if (sync_it) | ||
129 | xfs_mark_inode_dirty_sync(ip); | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * Hook in SELinux. This is not quite correct yet, what we really need | 98 | * Hook in SELinux. This is not quite correct yet, what we really need |
134 | * here (as we do for default ACLs) is a mechanism by which creation of | 99 | * here (as we do for default ACLs) is a mechanism by which creation of |
135 | * these attrs can be journalled at inode creation time (along with the | 100 | * these attrs can be journalled at inode creation time (along with the |
@@ -224,7 +189,7 @@ xfs_vn_mknod( | |||
224 | } | 189 | } |
225 | 190 | ||
226 | xfs_dentry_to_name(&name, dentry); | 191 | xfs_dentry_to_name(&name, dentry); |
227 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL); | 192 | error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); |
228 | if (unlikely(error)) | 193 | if (unlikely(error)) |
229 | goto out_free_acl; | 194 | goto out_free_acl; |
230 | 195 | ||
@@ -352,7 +317,7 @@ xfs_vn_link( | |||
352 | if (unlikely(error)) | 317 | if (unlikely(error)) |
353 | return -error; | 318 | return -error; |
354 | 319 | ||
355 | atomic_inc(&inode->i_count); | 320 | ihold(inode); |
356 | d_instantiate(dentry, inode); | 321 | d_instantiate(dentry, inode); |
357 | return 0; | 322 | return 0; |
358 | } | 323 | } |
@@ -397,7 +362,7 @@ xfs_vn_symlink( | |||
397 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); | 362 | (irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO); |
398 | xfs_dentry_to_name(&name, dentry); | 363 | xfs_dentry_to_name(&name, dentry); |
399 | 364 | ||
400 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL); | 365 | error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip); |
401 | if (unlikely(error)) | 366 | if (unlikely(error)) |
402 | goto out; | 367 | goto out; |
403 | 368 | ||
@@ -795,7 +760,9 @@ xfs_setup_inode( | |||
795 | 760 | ||
796 | inode->i_ino = ip->i_ino; | 761 | inode->i_ino = ip->i_ino; |
797 | inode->i_state = I_NEW; | 762 | inode->i_state = I_NEW; |
798 | inode_add_to_lists(ip->i_mount->m_super, inode); | 763 | |
764 | inode_sb_list_add(inode); | ||
765 | insert_inode_hash(inode); | ||
799 | 766 | ||
800 | inode->i_mode = ip->i_d.di_mode; | 767 | inode->i_mode = ip->i_d.di_mode; |
801 | inode->i_nlink = ip->i_d.di_nlink; | 768 | inode->i_nlink = ip->i_d.di_nlink; |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 2fa0bd9ebc7f..214ddd71ff79 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
@@ -71,6 +71,7 @@ | |||
71 | #include <linux/random.h> | 71 | #include <linux/random.h> |
72 | #include <linux/ctype.h> | 72 | #include <linux/ctype.h> |
73 | #include <linux/writeback.h> | 73 | #include <linux/writeback.h> |
74 | #include <linux/capability.h> | ||
74 | 75 | ||
75 | #include <asm/page.h> | 76 | #include <asm/page.h> |
76 | #include <asm/div64.h> | 77 | #include <asm/div64.h> |
@@ -79,14 +80,12 @@ | |||
79 | #include <asm/byteorder.h> | 80 | #include <asm/byteorder.h> |
80 | #include <asm/unaligned.h> | 81 | #include <asm/unaligned.h> |
81 | 82 | ||
82 | #include <xfs_cred.h> | ||
83 | #include <xfs_vnode.h> | 83 | #include <xfs_vnode.h> |
84 | #include <xfs_stats.h> | 84 | #include <xfs_stats.h> |
85 | #include <xfs_sysctl.h> | 85 | #include <xfs_sysctl.h> |
86 | #include <xfs_iops.h> | 86 | #include <xfs_iops.h> |
87 | #include <xfs_aops.h> | 87 | #include <xfs_aops.h> |
88 | #include <xfs_super.h> | 88 | #include <xfs_super.h> |
89 | #include <xfs_globals.h> | ||
90 | #include <xfs_buf.h> | 89 | #include <xfs_buf.h> |
91 | 90 | ||
92 | /* | 91 | /* |
@@ -144,7 +143,7 @@ | |||
144 | #define SYNCHRONIZE() barrier() | 143 | #define SYNCHRONIZE() barrier() |
145 | #define __return_address __builtin_return_address(0) | 144 | #define __return_address __builtin_return_address(0) |
146 | 145 | ||
147 | #define dfltprid 0 | 146 | #define XFS_PROJID_DEFAULT 0 |
148 | #define MAXPATHLEN 1024 | 147 | #define MAXPATHLEN 1024 |
149 | 148 | ||
150 | #define MIN(a,b) (min(a,b)) | 149 | #define MIN(a,b) (min(a,b)) |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index a4e07974955b..cf808782c065 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include "xfs_buf_item.h" | 44 | #include "xfs_buf_item.h" |
45 | #include "xfs_utils.h" | 45 | #include "xfs_utils.h" |
46 | #include "xfs_vnodeops.h" | 46 | #include "xfs_vnodeops.h" |
47 | #include "xfs_version.h" | ||
48 | #include "xfs_log_priv.h" | 47 | #include "xfs_log_priv.h" |
49 | #include "xfs_trans_priv.h" | 48 | #include "xfs_trans_priv.h" |
50 | #include "xfs_filestream.h" | 49 | #include "xfs_filestream.h" |
@@ -577,7 +576,7 @@ xfs_max_file_offset( | |||
577 | 576 | ||
578 | /* Figure out maximum filesize, on Linux this can depend on | 577 | /* Figure out maximum filesize, on Linux this can depend on |
579 | * the filesystem blocksize (on 32 bit platforms). | 578 | * the filesystem blocksize (on 32 bit platforms). |
580 | * __block_prepare_write does this in an [unsigned] long... | 579 | * __block_write_begin does this in an [unsigned] long... |
581 | * page->index << (PAGE_CACHE_SHIFT - bbits) | 580 | * page->index << (PAGE_CACHE_SHIFT - bbits) |
582 | * So, for page sized blocks (4K on 32 bit platforms), | 581 | * So, for page sized blocks (4K on 32 bit platforms), |
583 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is | 582 | * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is |
@@ -645,7 +644,7 @@ xfs_barrier_test( | |||
645 | XFS_BUF_ORDERED(sbp); | 644 | XFS_BUF_ORDERED(sbp); |
646 | 645 | ||
647 | xfsbdstrat(mp, sbp); | 646 | xfsbdstrat(mp, sbp); |
648 | error = xfs_iowait(sbp); | 647 | error = xfs_buf_iowait(sbp); |
649 | 648 | ||
650 | /* | 649 | /* |
651 | * Clear all the flags we set and possible error state in the | 650 | * Clear all the flags we set and possible error state in the |
@@ -693,8 +692,7 @@ void | |||
693 | xfs_blkdev_issue_flush( | 692 | xfs_blkdev_issue_flush( |
694 | xfs_buftarg_t *buftarg) | 693 | xfs_buftarg_t *buftarg) |
695 | { | 694 | { |
696 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL, | 695 | blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL); |
697 | BLKDEV_IFL_WAIT); | ||
698 | } | 696 | } |
699 | 697 | ||
700 | STATIC void | 698 | STATIC void |
@@ -758,18 +756,20 @@ xfs_open_devices( | |||
758 | * Setup xfs_mount buffer target pointers | 756 | * Setup xfs_mount buffer target pointers |
759 | */ | 757 | */ |
760 | error = ENOMEM; | 758 | error = ENOMEM; |
761 | mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname); | 759 | mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); |
762 | if (!mp->m_ddev_targp) | 760 | if (!mp->m_ddev_targp) |
763 | goto out_close_rtdev; | 761 | goto out_close_rtdev; |
764 | 762 | ||
765 | if (rtdev) { | 763 | if (rtdev) { |
766 | mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname); | 764 | mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, |
765 | mp->m_fsname); | ||
767 | if (!mp->m_rtdev_targp) | 766 | if (!mp->m_rtdev_targp) |
768 | goto out_free_ddev_targ; | 767 | goto out_free_ddev_targ; |
769 | } | 768 | } |
770 | 769 | ||
771 | if (logdev && logdev != ddev) { | 770 | if (logdev && logdev != ddev) { |
772 | mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname); | 771 | mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, |
772 | mp->m_fsname); | ||
773 | if (!mp->m_logdev_targp) | 773 | if (!mp->m_logdev_targp) |
774 | goto out_free_rtdev_targ; | 774 | goto out_free_rtdev_targ; |
775 | } else { | 775 | } else { |
@@ -972,12 +972,7 @@ xfs_fs_inode_init_once( | |||
972 | 972 | ||
973 | /* | 973 | /* |
974 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that | 974 | * Dirty the XFS inode when mark_inode_dirty_sync() is called so that |
975 | * we catch unlogged VFS level updates to the inode. Care must be taken | 975 | * we catch unlogged VFS level updates to the inode. |
976 | * here - the transaction code calls mark_inode_dirty_sync() to mark the | ||
977 | * VFS inode dirty in a transaction and clears the i_update_core field; | ||
978 | * it must clear the field after calling mark_inode_dirty_sync() to | ||
979 | * correctly indicate that the dirty state has been propagated into the | ||
980 | * inode log item. | ||
981 | * | 976 | * |
982 | * We need the barrier() to maintain correct ordering between unlogged | 977 | * We need the barrier() to maintain correct ordering between unlogged |
983 | * updates and the transaction commit code that clears the i_update_core | 978 | * updates and the transaction commit code that clears the i_update_core |
@@ -1521,8 +1516,9 @@ xfs_fs_fill_super( | |||
1521 | if (error) | 1516 | if (error) |
1522 | goto out_free_fsname; | 1517 | goto out_free_fsname; |
1523 | 1518 | ||
1524 | if (xfs_icsb_init_counters(mp)) | 1519 | error = xfs_icsb_init_counters(mp); |
1525 | mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB; | 1520 | if (error) |
1521 | goto out_close_devices; | ||
1526 | 1522 | ||
1527 | error = xfs_readsb(mp, flags); | 1523 | error = xfs_readsb(mp, flags); |
1528 | if (error) | 1524 | if (error) |
@@ -1583,6 +1579,7 @@ xfs_fs_fill_super( | |||
1583 | xfs_freesb(mp); | 1579 | xfs_freesb(mp); |
1584 | out_destroy_counters: | 1580 | out_destroy_counters: |
1585 | xfs_icsb_destroy_counters(mp); | 1581 | xfs_icsb_destroy_counters(mp); |
1582 | out_close_devices: | ||
1586 | xfs_close_devices(mp); | 1583 | xfs_close_devices(mp); |
1587 | out_free_fsname: | 1584 | out_free_fsname: |
1588 | xfs_free_fsname(mp); | 1585 | xfs_free_fsname(mp); |
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h index 1ef4a4d2d997..50a3266c999e 100644 --- a/fs/xfs/linux-2.6/xfs_super.h +++ b/fs/xfs/linux-2.6/xfs_super.h | |||
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void); | |||
62 | # define XFS_DBG_STRING "no debug" | 62 | # define XFS_DBG_STRING "no debug" |
63 | #endif | 63 | #endif |
64 | 64 | ||
65 | #define XFS_VERSION_STRING "SGI XFS" | ||
65 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ | 66 | #define XFS_BUILD_OPTIONS XFS_ACL_STRING \ |
66 | XFS_SECURITY_STRING \ | 67 | XFS_SECURITY_STRING \ |
67 | XFS_REALTIME_STRING \ | 68 | XFS_REALTIME_STRING \ |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index d59c4a65d492..37d33254981d 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
@@ -39,42 +39,39 @@ | |||
39 | #include <linux/kthread.h> | 39 | #include <linux/kthread.h> |
40 | #include <linux/freezer.h> | 40 | #include <linux/freezer.h> |
41 | 41 | ||
42 | /* | ||
43 | * The inode lookup is done in batches to keep the amount of lock traffic and | ||
44 | * radix tree lookups to a minimum. The batch size is a trade off between | ||
45 | * lookup reduction and stack usage. This is in the reclaim path, so we can't | ||
46 | * be too greedy. | ||
47 | */ | ||
48 | #define XFS_LOOKUP_BATCH 32 | ||
42 | 49 | ||
43 | STATIC xfs_inode_t * | 50 | STATIC int |
44 | xfs_inode_ag_lookup( | 51 | xfs_inode_ag_walk_grab( |
45 | struct xfs_mount *mp, | 52 | struct xfs_inode *ip) |
46 | struct xfs_perag *pag, | ||
47 | uint32_t *first_index, | ||
48 | int tag) | ||
49 | { | 53 | { |
50 | int nr_found; | 54 | struct inode *inode = VFS_I(ip); |
51 | struct xfs_inode *ip; | ||
52 | 55 | ||
53 | /* | 56 | /* nothing to sync during shutdown */ |
54 | * use a gang lookup to find the next inode in the tree | 57 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
55 | * as the tree is sparse and a gang lookup walks to find | 58 | return EFSCORRUPTED; |
56 | * the number of objects requested. | 59 | |
57 | */ | 60 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ |
58 | if (tag == XFS_ICI_NO_TAG) { | 61 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) |
59 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, | 62 | return ENOENT; |
60 | (void **)&ip, *first_index, 1); | 63 | |
61 | } else { | 64 | /* If we can't grab the inode, it must on it's way to reclaim. */ |
62 | nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root, | 65 | if (!igrab(inode)) |
63 | (void **)&ip, *first_index, 1, tag); | 66 | return ENOENT; |
67 | |||
68 | if (is_bad_inode(inode)) { | ||
69 | IRELE(ip); | ||
70 | return ENOENT; | ||
64 | } | 71 | } |
65 | if (!nr_found) | ||
66 | return NULL; | ||
67 | 72 | ||
68 | /* | 73 | /* inode is valid */ |
69 | * Update the index for the next lookup. Catch overflows | 74 | return 0; |
70 | * into the next AG range which can occur if we have inodes | ||
71 | * in the last block of the AG and we are currently | ||
72 | * pointing to the last inode. | ||
73 | */ | ||
74 | *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
75 | if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
76 | return NULL; | ||
77 | return ip; | ||
78 | } | 75 | } |
79 | 76 | ||
80 | STATIC int | 77 | STATIC int |
@@ -83,49 +80,75 @@ xfs_inode_ag_walk( | |||
83 | struct xfs_perag *pag, | 80 | struct xfs_perag *pag, |
84 | int (*execute)(struct xfs_inode *ip, | 81 | int (*execute)(struct xfs_inode *ip, |
85 | struct xfs_perag *pag, int flags), | 82 | struct xfs_perag *pag, int flags), |
86 | int flags, | 83 | int flags) |
87 | int tag, | ||
88 | int exclusive, | ||
89 | int *nr_to_scan) | ||
90 | { | 84 | { |
91 | uint32_t first_index; | 85 | uint32_t first_index; |
92 | int last_error = 0; | 86 | int last_error = 0; |
93 | int skipped; | 87 | int skipped; |
88 | int done; | ||
89 | int nr_found; | ||
94 | 90 | ||
95 | restart: | 91 | restart: |
92 | done = 0; | ||
96 | skipped = 0; | 93 | skipped = 0; |
97 | first_index = 0; | 94 | first_index = 0; |
95 | nr_found = 0; | ||
98 | do { | 96 | do { |
97 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
99 | int error = 0; | 98 | int error = 0; |
100 | xfs_inode_t *ip; | 99 | int i; |
101 | 100 | ||
102 | if (exclusive) | 101 | read_lock(&pag->pag_ici_lock); |
103 | write_lock(&pag->pag_ici_lock); | 102 | nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, |
104 | else | 103 | (void **)batch, first_index, |
105 | read_lock(&pag->pag_ici_lock); | 104 | XFS_LOOKUP_BATCH); |
106 | ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag); | 105 | if (!nr_found) { |
107 | if (!ip) { | 106 | read_unlock(&pag->pag_ici_lock); |
108 | if (exclusive) | ||
109 | write_unlock(&pag->pag_ici_lock); | ||
110 | else | ||
111 | read_unlock(&pag->pag_ici_lock); | ||
112 | break; | 107 | break; |
113 | } | 108 | } |
114 | 109 | ||
115 | /* execute releases pag->pag_ici_lock */ | 110 | /* |
116 | error = execute(ip, pag, flags); | 111 | * Grab the inodes before we drop the lock. if we found |
117 | if (error == EAGAIN) { | 112 | * nothing, nr == 0 and the loop will be skipped. |
118 | skipped++; | 113 | */ |
119 | continue; | 114 | for (i = 0; i < nr_found; i++) { |
115 | struct xfs_inode *ip = batch[i]; | ||
116 | |||
117 | if (done || xfs_inode_ag_walk_grab(ip)) | ||
118 | batch[i] = NULL; | ||
119 | |||
120 | /* | ||
121 | * Update the index for the next lookup. Catch overflows | ||
122 | * into the next AG range which can occur if we have inodes | ||
123 | * in the last block of the AG and we are currently | ||
124 | * pointing to the last inode. | ||
125 | */ | ||
126 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
127 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
128 | done = 1; | ||
129 | } | ||
130 | |||
131 | /* unlock now we've grabbed the inodes. */ | ||
132 | read_unlock(&pag->pag_ici_lock); | ||
133 | |||
134 | for (i = 0; i < nr_found; i++) { | ||
135 | if (!batch[i]) | ||
136 | continue; | ||
137 | error = execute(batch[i], pag, flags); | ||
138 | IRELE(batch[i]); | ||
139 | if (error == EAGAIN) { | ||
140 | skipped++; | ||
141 | continue; | ||
142 | } | ||
143 | if (error && last_error != EFSCORRUPTED) | ||
144 | last_error = error; | ||
120 | } | 145 | } |
121 | if (error) | ||
122 | last_error = error; | ||
123 | 146 | ||
124 | /* bail out if the filesystem is corrupted. */ | 147 | /* bail out if the filesystem is corrupted. */ |
125 | if (error == EFSCORRUPTED) | 148 | if (error == EFSCORRUPTED) |
126 | break; | 149 | break; |
127 | 150 | ||
128 | } while ((*nr_to_scan)--); | 151 | } while (nr_found && !done); |
129 | 152 | ||
130 | if (skipped) { | 153 | if (skipped) { |
131 | delay(1); | 154 | delay(1); |
@@ -134,110 +157,32 @@ restart: | |||
134 | return last_error; | 157 | return last_error; |
135 | } | 158 | } |
136 | 159 | ||
137 | /* | ||
138 | * Select the next per-ag structure to iterate during the walk. The reclaim | ||
139 | * walk is optimised only to walk AGs with reclaimable inodes in them. | ||
140 | */ | ||
141 | static struct xfs_perag * | ||
142 | xfs_inode_ag_iter_next_pag( | ||
143 | struct xfs_mount *mp, | ||
144 | xfs_agnumber_t *first, | ||
145 | int tag) | ||
146 | { | ||
147 | struct xfs_perag *pag = NULL; | ||
148 | |||
149 | if (tag == XFS_ICI_RECLAIM_TAG) { | ||
150 | int found; | ||
151 | int ref; | ||
152 | |||
153 | spin_lock(&mp->m_perag_lock); | ||
154 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
155 | (void **)&pag, *first, 1, tag); | ||
156 | if (found <= 0) { | ||
157 | spin_unlock(&mp->m_perag_lock); | ||
158 | return NULL; | ||
159 | } | ||
160 | *first = pag->pag_agno + 1; | ||
161 | /* open coded pag reference increment */ | ||
162 | ref = atomic_inc_return(&pag->pag_ref); | ||
163 | spin_unlock(&mp->m_perag_lock); | ||
164 | trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_); | ||
165 | } else { | ||
166 | pag = xfs_perag_get(mp, *first); | ||
167 | (*first)++; | ||
168 | } | ||
169 | return pag; | ||
170 | } | ||
171 | |||
172 | int | 160 | int |
173 | xfs_inode_ag_iterator( | 161 | xfs_inode_ag_iterator( |
174 | struct xfs_mount *mp, | 162 | struct xfs_mount *mp, |
175 | int (*execute)(struct xfs_inode *ip, | 163 | int (*execute)(struct xfs_inode *ip, |
176 | struct xfs_perag *pag, int flags), | 164 | struct xfs_perag *pag, int flags), |
177 | int flags, | 165 | int flags) |
178 | int tag, | ||
179 | int exclusive, | ||
180 | int *nr_to_scan) | ||
181 | { | 166 | { |
182 | struct xfs_perag *pag; | 167 | struct xfs_perag *pag; |
183 | int error = 0; | 168 | int error = 0; |
184 | int last_error = 0; | 169 | int last_error = 0; |
185 | xfs_agnumber_t ag; | 170 | xfs_agnumber_t ag; |
186 | int nr; | ||
187 | 171 | ||
188 | nr = nr_to_scan ? *nr_to_scan : INT_MAX; | ||
189 | ag = 0; | 172 | ag = 0; |
190 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) { | 173 | while ((pag = xfs_perag_get(mp, ag))) { |
191 | error = xfs_inode_ag_walk(mp, pag, execute, flags, tag, | 174 | ag = pag->pag_agno + 1; |
192 | exclusive, &nr); | 175 | error = xfs_inode_ag_walk(mp, pag, execute, flags); |
193 | xfs_perag_put(pag); | 176 | xfs_perag_put(pag); |
194 | if (error) { | 177 | if (error) { |
195 | last_error = error; | 178 | last_error = error; |
196 | if (error == EFSCORRUPTED) | 179 | if (error == EFSCORRUPTED) |
197 | break; | 180 | break; |
198 | } | 181 | } |
199 | if (nr <= 0) | ||
200 | break; | ||
201 | } | 182 | } |
202 | if (nr_to_scan) | ||
203 | *nr_to_scan = nr; | ||
204 | return XFS_ERROR(last_error); | 183 | return XFS_ERROR(last_error); |
205 | } | 184 | } |
206 | 185 | ||
207 | /* must be called with pag_ici_lock held and releases it */ | ||
208 | int | ||
209 | xfs_sync_inode_valid( | ||
210 | struct xfs_inode *ip, | ||
211 | struct xfs_perag *pag) | ||
212 | { | ||
213 | struct inode *inode = VFS_I(ip); | ||
214 | int error = EFSCORRUPTED; | ||
215 | |||
216 | /* nothing to sync during shutdown */ | ||
217 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | ||
218 | goto out_unlock; | ||
219 | |||
220 | /* avoid new or reclaimable inodes. Leave for reclaim code to flush */ | ||
221 | error = ENOENT; | ||
222 | if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM)) | ||
223 | goto out_unlock; | ||
224 | |||
225 | /* If we can't grab the inode, it must on it's way to reclaim. */ | ||
226 | if (!igrab(inode)) | ||
227 | goto out_unlock; | ||
228 | |||
229 | if (is_bad_inode(inode)) { | ||
230 | IRELE(ip); | ||
231 | goto out_unlock; | ||
232 | } | ||
233 | |||
234 | /* inode is valid */ | ||
235 | error = 0; | ||
236 | out_unlock: | ||
237 | read_unlock(&pag->pag_ici_lock); | ||
238 | return error; | ||
239 | } | ||
240 | |||
241 | STATIC int | 186 | STATIC int |
242 | xfs_sync_inode_data( | 187 | xfs_sync_inode_data( |
243 | struct xfs_inode *ip, | 188 | struct xfs_inode *ip, |
@@ -248,10 +193,6 @@ xfs_sync_inode_data( | |||
248 | struct address_space *mapping = inode->i_mapping; | 193 | struct address_space *mapping = inode->i_mapping; |
249 | int error = 0; | 194 | int error = 0; |
250 | 195 | ||
251 | error = xfs_sync_inode_valid(ip, pag); | ||
252 | if (error) | ||
253 | return error; | ||
254 | |||
255 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 196 | if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
256 | goto out_wait; | 197 | goto out_wait; |
257 | 198 | ||
@@ -268,7 +209,6 @@ xfs_sync_inode_data( | |||
268 | out_wait: | 209 | out_wait: |
269 | if (flags & SYNC_WAIT) | 210 | if (flags & SYNC_WAIT) |
270 | xfs_ioend_wait(ip); | 211 | xfs_ioend_wait(ip); |
271 | IRELE(ip); | ||
272 | return error; | 212 | return error; |
273 | } | 213 | } |
274 | 214 | ||
@@ -280,10 +220,6 @@ xfs_sync_inode_attr( | |||
280 | { | 220 | { |
281 | int error = 0; | 221 | int error = 0; |
282 | 222 | ||
283 | error = xfs_sync_inode_valid(ip, pag); | ||
284 | if (error) | ||
285 | return error; | ||
286 | |||
287 | xfs_ilock(ip, XFS_ILOCK_SHARED); | 223 | xfs_ilock(ip, XFS_ILOCK_SHARED); |
288 | if (xfs_inode_clean(ip)) | 224 | if (xfs_inode_clean(ip)) |
289 | goto out_unlock; | 225 | goto out_unlock; |
@@ -302,7 +238,6 @@ xfs_sync_inode_attr( | |||
302 | 238 | ||
303 | out_unlock: | 239 | out_unlock: |
304 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 240 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
305 | IRELE(ip); | ||
306 | return error; | 241 | return error; |
307 | } | 242 | } |
308 | 243 | ||
@@ -318,8 +253,7 @@ xfs_sync_data( | |||
318 | 253 | ||
319 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); | 254 | ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0); |
320 | 255 | ||
321 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags, | 256 | error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags); |
322 | XFS_ICI_NO_TAG, 0, NULL); | ||
323 | if (error) | 257 | if (error) |
324 | return XFS_ERROR(error); | 258 | return XFS_ERROR(error); |
325 | 259 | ||
@@ -337,8 +271,7 @@ xfs_sync_attr( | |||
337 | { | 271 | { |
338 | ASSERT((flags & ~SYNC_WAIT) == 0); | 272 | ASSERT((flags & ~SYNC_WAIT) == 0); |
339 | 273 | ||
340 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags, | 274 | return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags); |
341 | XFS_ICI_NO_TAG, 0, NULL); | ||
342 | } | 275 | } |
343 | 276 | ||
344 | STATIC int | 277 | STATIC int |
@@ -668,14 +601,11 @@ xfs_inode_set_reclaim_tag( | |||
668 | xfs_perag_put(pag); | 601 | xfs_perag_put(pag); |
669 | } | 602 | } |
670 | 603 | ||
671 | void | 604 | STATIC void |
672 | __xfs_inode_clear_reclaim_tag( | 605 | __xfs_inode_clear_reclaim( |
673 | xfs_mount_t *mp, | ||
674 | xfs_perag_t *pag, | 606 | xfs_perag_t *pag, |
675 | xfs_inode_t *ip) | 607 | xfs_inode_t *ip) |
676 | { | 608 | { |
677 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
678 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
679 | pag->pag_ici_reclaimable--; | 609 | pag->pag_ici_reclaimable--; |
680 | if (!pag->pag_ici_reclaimable) { | 610 | if (!pag->pag_ici_reclaimable) { |
681 | /* clear the reclaim tag from the perag radix tree */ | 611 | /* clear the reclaim tag from the perag radix tree */ |
@@ -689,6 +619,54 @@ __xfs_inode_clear_reclaim_tag( | |||
689 | } | 619 | } |
690 | } | 620 | } |
691 | 621 | ||
622 | void | ||
623 | __xfs_inode_clear_reclaim_tag( | ||
624 | xfs_mount_t *mp, | ||
625 | xfs_perag_t *pag, | ||
626 | xfs_inode_t *ip) | ||
627 | { | ||
628 | radix_tree_tag_clear(&pag->pag_ici_root, | ||
629 | XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG); | ||
630 | __xfs_inode_clear_reclaim(pag, ip); | ||
631 | } | ||
632 | |||
633 | /* | ||
634 | * Grab the inode for reclaim exclusively. | ||
635 | * Return 0 if we grabbed it, non-zero otherwise. | ||
636 | */ | ||
637 | STATIC int | ||
638 | xfs_reclaim_inode_grab( | ||
639 | struct xfs_inode *ip, | ||
640 | int flags) | ||
641 | { | ||
642 | |||
643 | /* | ||
644 | * do some unlocked checks first to avoid unnecceary lock traffic. | ||
645 | * The first is a flush lock check, the second is a already in reclaim | ||
646 | * check. Only do these checks if we are not going to block on locks. | ||
647 | */ | ||
648 | if ((flags & SYNC_TRYLOCK) && | ||
649 | (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) { | ||
650 | return 1; | ||
651 | } | ||
652 | |||
653 | /* | ||
654 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
655 | * with us starting reclaim on the inode. Once we have the | ||
656 | * XFS_IRECLAIM flag set it will not touch us. | ||
657 | */ | ||
658 | spin_lock(&ip->i_flags_lock); | ||
659 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
660 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
661 | /* ignore as it is already under reclaim */ | ||
662 | spin_unlock(&ip->i_flags_lock); | ||
663 | return 1; | ||
664 | } | ||
665 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
666 | spin_unlock(&ip->i_flags_lock); | ||
667 | return 0; | ||
668 | } | ||
669 | |||
692 | /* | 670 | /* |
693 | * Inodes in different states need to be treated differently, and the return | 671 | * Inodes in different states need to be treated differently, and the return |
694 | * value of xfs_iflush is not sufficient to get this right. The following table | 672 | * value of xfs_iflush is not sufficient to get this right. The following table |
@@ -747,23 +725,6 @@ xfs_reclaim_inode( | |||
747 | { | 725 | { |
748 | int error = 0; | 726 | int error = 0; |
749 | 727 | ||
750 | /* | ||
751 | * The radix tree lock here protects a thread in xfs_iget from racing | ||
752 | * with us starting reclaim on the inode. Once we have the | ||
753 | * XFS_IRECLAIM flag set it will not touch us. | ||
754 | */ | ||
755 | spin_lock(&ip->i_flags_lock); | ||
756 | ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE)); | ||
757 | if (__xfs_iflags_test(ip, XFS_IRECLAIM)) { | ||
758 | /* ignore as it is already under reclaim */ | ||
759 | spin_unlock(&ip->i_flags_lock); | ||
760 | write_unlock(&pag->pag_ici_lock); | ||
761 | return 0; | ||
762 | } | ||
763 | __xfs_iflags_set(ip, XFS_IRECLAIM); | ||
764 | spin_unlock(&ip->i_flags_lock); | ||
765 | write_unlock(&pag->pag_ici_lock); | ||
766 | |||
767 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 728 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
768 | if (!xfs_iflock_nowait(ip)) { | 729 | if (!xfs_iflock_nowait(ip)) { |
769 | if (!(sync_mode & SYNC_WAIT)) | 730 | if (!(sync_mode & SYNC_WAIT)) |
@@ -838,6 +799,7 @@ reclaim: | |||
838 | if (!radix_tree_delete(&pag->pag_ici_root, | 799 | if (!radix_tree_delete(&pag->pag_ici_root, |
839 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) | 800 | XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino))) |
840 | ASSERT(0); | 801 | ASSERT(0); |
802 | __xfs_inode_clear_reclaim(pag, ip); | ||
841 | write_unlock(&pag->pag_ici_lock); | 803 | write_unlock(&pag->pag_ici_lock); |
842 | 804 | ||
843 | /* | 805 | /* |
@@ -859,13 +821,126 @@ reclaim: | |||
859 | 821 | ||
860 | } | 822 | } |
861 | 823 | ||
824 | /* | ||
825 | * Walk the AGs and reclaim the inodes in them. Even if the filesystem is | ||
826 | * corrupted, we still want to try to reclaim all the inodes. If we don't, | ||
827 | * then a shut down during filesystem unmount reclaim walk leak all the | ||
828 | * unreclaimed inodes. | ||
829 | */ | ||
830 | int | ||
831 | xfs_reclaim_inodes_ag( | ||
832 | struct xfs_mount *mp, | ||
833 | int flags, | ||
834 | int *nr_to_scan) | ||
835 | { | ||
836 | struct xfs_perag *pag; | ||
837 | int error = 0; | ||
838 | int last_error = 0; | ||
839 | xfs_agnumber_t ag; | ||
840 | int trylock = flags & SYNC_TRYLOCK; | ||
841 | int skipped; | ||
842 | |||
843 | restart: | ||
844 | ag = 0; | ||
845 | skipped = 0; | ||
846 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { | ||
847 | unsigned long first_index = 0; | ||
848 | int done = 0; | ||
849 | int nr_found = 0; | ||
850 | |||
851 | ag = pag->pag_agno + 1; | ||
852 | |||
853 | if (trylock) { | ||
854 | if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) { | ||
855 | skipped++; | ||
856 | continue; | ||
857 | } | ||
858 | first_index = pag->pag_ici_reclaim_cursor; | ||
859 | } else | ||
860 | mutex_lock(&pag->pag_ici_reclaim_lock); | ||
861 | |||
862 | do { | ||
863 | struct xfs_inode *batch[XFS_LOOKUP_BATCH]; | ||
864 | int i; | ||
865 | |||
866 | write_lock(&pag->pag_ici_lock); | ||
867 | nr_found = radix_tree_gang_lookup_tag( | ||
868 | &pag->pag_ici_root, | ||
869 | (void **)batch, first_index, | ||
870 | XFS_LOOKUP_BATCH, | ||
871 | XFS_ICI_RECLAIM_TAG); | ||
872 | if (!nr_found) { | ||
873 | write_unlock(&pag->pag_ici_lock); | ||
874 | break; | ||
875 | } | ||
876 | |||
877 | /* | ||
878 | * Grab the inodes before we drop the lock. if we found | ||
879 | * nothing, nr == 0 and the loop will be skipped. | ||
880 | */ | ||
881 | for (i = 0; i < nr_found; i++) { | ||
882 | struct xfs_inode *ip = batch[i]; | ||
883 | |||
884 | if (done || xfs_reclaim_inode_grab(ip, flags)) | ||
885 | batch[i] = NULL; | ||
886 | |||
887 | /* | ||
888 | * Update the index for the next lookup. Catch | ||
889 | * overflows into the next AG range which can | ||
890 | * occur if we have inodes in the last block of | ||
891 | * the AG and we are currently pointing to the | ||
892 | * last inode. | ||
893 | */ | ||
894 | first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1); | ||
895 | if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino)) | ||
896 | done = 1; | ||
897 | } | ||
898 | |||
899 | /* unlock now we've grabbed the inodes. */ | ||
900 | write_unlock(&pag->pag_ici_lock); | ||
901 | |||
902 | for (i = 0; i < nr_found; i++) { | ||
903 | if (!batch[i]) | ||
904 | continue; | ||
905 | error = xfs_reclaim_inode(batch[i], pag, flags); | ||
906 | if (error && last_error != EFSCORRUPTED) | ||
907 | last_error = error; | ||
908 | } | ||
909 | |||
910 | *nr_to_scan -= XFS_LOOKUP_BATCH; | ||
911 | |||
912 | } while (nr_found && !done && *nr_to_scan > 0); | ||
913 | |||
914 | if (trylock && !done) | ||
915 | pag->pag_ici_reclaim_cursor = first_index; | ||
916 | else | ||
917 | pag->pag_ici_reclaim_cursor = 0; | ||
918 | mutex_unlock(&pag->pag_ici_reclaim_lock); | ||
919 | xfs_perag_put(pag); | ||
920 | } | ||
921 | |||
922 | /* | ||
923 | * if we skipped any AG, and we still have scan count remaining, do | ||
924 | * another pass this time using blocking reclaim semantics (i.e | ||
925 | * waiting on the reclaim locks and ignoring the reclaim cursors). This | ||
926 | * ensure that when we get more reclaimers than AGs we block rather | ||
927 | * than spin trying to execute reclaim. | ||
928 | */ | ||
929 | if (trylock && skipped && *nr_to_scan > 0) { | ||
930 | trylock = 0; | ||
931 | goto restart; | ||
932 | } | ||
933 | return XFS_ERROR(last_error); | ||
934 | } | ||
935 | |||
862 | int | 936 | int |
863 | xfs_reclaim_inodes( | 937 | xfs_reclaim_inodes( |
864 | xfs_mount_t *mp, | 938 | xfs_mount_t *mp, |
865 | int mode) | 939 | int mode) |
866 | { | 940 | { |
867 | return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode, | 941 | int nr_to_scan = INT_MAX; |
868 | XFS_ICI_RECLAIM_TAG, 1, NULL); | 942 | |
943 | return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan); | ||
869 | } | 944 | } |
870 | 945 | ||
871 | /* | 946 | /* |
@@ -887,17 +962,16 @@ xfs_reclaim_inode_shrink( | |||
887 | if (!(gfp_mask & __GFP_FS)) | 962 | if (!(gfp_mask & __GFP_FS)) |
888 | return -1; | 963 | return -1; |
889 | 964 | ||
890 | xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0, | 965 | xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan); |
891 | XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan); | 966 | /* terminate if we don't exhaust the scan */ |
892 | /* if we don't exhaust the scan, don't bother coming back */ | ||
893 | if (nr_to_scan > 0) | 967 | if (nr_to_scan > 0) |
894 | return -1; | 968 | return -1; |
895 | } | 969 | } |
896 | 970 | ||
897 | reclaimable = 0; | 971 | reclaimable = 0; |
898 | ag = 0; | 972 | ag = 0; |
899 | while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, | 973 | while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) { |
900 | XFS_ICI_RECLAIM_TAG))) { | 974 | ag = pag->pag_agno + 1; |
901 | reclaimable += pag->pag_ici_reclaimable; | 975 | reclaimable += pag->pag_ici_reclaimable; |
902 | xfs_perag_put(pag); | 976 | xfs_perag_put(pag); |
903 | } | 977 | } |
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h index fe78726196f8..32ba6628290c 100644 --- a/fs/xfs/linux-2.6/xfs_sync.h +++ b/fs/xfs/linux-2.6/xfs_sync.h | |||
@@ -47,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip); | |||
47 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, | 47 | void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag, |
48 | struct xfs_inode *ip); | 48 | struct xfs_inode *ip); |
49 | 49 | ||
50 | int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag); | 50 | int xfs_sync_inode_grab(struct xfs_inode *ip); |
51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, | 51 | int xfs_inode_ag_iterator(struct xfs_mount *mp, |
52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), | 52 | int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags), |
53 | int flags, int tag, int write_lock, int *nr_to_scan); | 53 | int flags); |
54 | 54 | ||
55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); | 55 | void xfs_inode_shrinker_register(struct xfs_mount *mp); |
56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); | 56 | void xfs_inode_shrinker_unregister(struct xfs_mount *mp); |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index be5dffd282a1..acef2e98c594 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \ | |||
124 | unsigned long caller_ip), \ | 124 | unsigned long caller_ip), \ |
125 | TP_ARGS(mp, agno, refcount, caller_ip)) | 125 | TP_ARGS(mp, agno, refcount, caller_ip)) |
126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); | 126 | DEFINE_PERAG_REF_EVENT(xfs_perag_get); |
127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim); | 127 | DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag); |
128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); | 128 | DEFINE_PERAG_REF_EVENT(xfs_perag_put); |
129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); | 129 | DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim); |
130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); | 130 | DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim); |
@@ -325,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock); | |||
325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); | 325 | DEFINE_BUF_EVENT(xfs_buf_lock_done); |
326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); | 326 | DEFINE_BUF_EVENT(xfs_buf_cond_lock); |
327 | DEFINE_BUF_EVENT(xfs_buf_unlock); | 327 | DEFINE_BUF_EVENT(xfs_buf_unlock); |
328 | DEFINE_BUF_EVENT(xfs_buf_ordered_retry); | ||
329 | DEFINE_BUF_EVENT(xfs_buf_iowait); | 328 | DEFINE_BUF_EVENT(xfs_buf_iowait); |
330 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); | 329 | DEFINE_BUF_EVENT(xfs_buf_iowait_done); |
331 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); | 330 | DEFINE_BUF_EVENT(xfs_buf_delwri_queue); |
332 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); | 331 | DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue); |
333 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); | 332 | DEFINE_BUF_EVENT(xfs_buf_delwri_split); |
334 | DEFINE_BUF_EVENT(xfs_buf_get_noaddr); | 333 | DEFINE_BUF_EVENT(xfs_buf_get_uncached); |
335 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); | 334 | DEFINE_BUF_EVENT(xfs_bdstrat_shut); |
336 | DEFINE_BUF_EVENT(xfs_buf_item_relse); | 335 | DEFINE_BUF_EVENT(xfs_buf_item_relse); |
337 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); | 336 | DEFINE_BUF_EVENT(xfs_buf_item_iodone); |
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h deleted file mode 100644 index f8d279d7563a..000000000000 --- a/fs/xfs/linux-2.6/xfs_version.h +++ /dev/null | |||
@@ -1,29 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_VERSION_H__ | ||
19 | #define __XFS_VERSION_H__ | ||
20 | |||
21 | /* | ||
22 | * Dummy file that can contain a timestamp to put into the | ||
23 | * XFS init string, to help users keep track of what they're | ||
24 | * running | ||
25 | */ | ||
26 | |||
27 | #define XFS_VERSION_STRING "SGI XFS" | ||
28 | |||
29 | #endif /* __XFS_VERSION_H__ */ | ||
diff --git a/fs/xfs/quota/xfs_dquot.c b/fs/xfs/quota/xfs_dquot.c index e1a2f6800e01..faf8e1a83a12 100644 --- a/fs/xfs/quota/xfs_dquot.c +++ b/fs/xfs/quota/xfs_dquot.c | |||
@@ -463,87 +463,68 @@ xfs_qm_dqtobp( | |||
463 | uint flags) | 463 | uint flags) |
464 | { | 464 | { |
465 | xfs_bmbt_irec_t map; | 465 | xfs_bmbt_irec_t map; |
466 | int nmaps, error; | 466 | int nmaps = 1, error; |
467 | xfs_buf_t *bp; | 467 | xfs_buf_t *bp; |
468 | xfs_inode_t *quotip; | 468 | xfs_inode_t *quotip = XFS_DQ_TO_QIP(dqp); |
469 | xfs_mount_t *mp; | 469 | xfs_mount_t *mp = dqp->q_mount; |
470 | xfs_disk_dquot_t *ddq; | 470 | xfs_disk_dquot_t *ddq; |
471 | xfs_dqid_t id; | 471 | xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); |
472 | boolean_t newdquot; | ||
473 | xfs_trans_t *tp = (tpp ? *tpp : NULL); | 472 | xfs_trans_t *tp = (tpp ? *tpp : NULL); |
474 | 473 | ||
475 | mp = dqp->q_mount; | 474 | dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; |
476 | id = be32_to_cpu(dqp->q_core.d_id); | ||
477 | nmaps = 1; | ||
478 | newdquot = B_FALSE; | ||
479 | 475 | ||
480 | /* | 476 | xfs_ilock(quotip, XFS_ILOCK_SHARED); |
481 | * If we don't know where the dquot lives, find out. | 477 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { |
482 | */ | ||
483 | if (dqp->q_blkno == (xfs_daddr_t) 0) { | ||
484 | /* We use the id as an index */ | ||
485 | dqp->q_fileoffset = (xfs_fileoff_t)id / | ||
486 | mp->m_quotainfo->qi_dqperchunk; | ||
487 | nmaps = 1; | ||
488 | quotip = XFS_DQ_TO_QIP(dqp); | ||
489 | xfs_ilock(quotip, XFS_ILOCK_SHARED); | ||
490 | /* | 478 | /* |
491 | * Return if this type of quotas is turned off while we didn't | 479 | * Return if this type of quotas is turned off while we |
492 | * have an inode lock | 480 | * didn't have the quota inode lock. |
493 | */ | 481 | */ |
494 | if (XFS_IS_THIS_QUOTA_OFF(dqp)) { | 482 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); |
495 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 483 | return ESRCH; |
496 | return (ESRCH); | 484 | } |
497 | } | 485 | |
486 | /* | ||
487 | * Find the block map; no allocations yet | ||
488 | */ | ||
489 | error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, | ||
490 | XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, | ||
491 | NULL, 0, &map, &nmaps, NULL); | ||
492 | |||
493 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | ||
494 | if (error) | ||
495 | return error; | ||
496 | |||
497 | ASSERT(nmaps == 1); | ||
498 | ASSERT(map.br_blockcount == 1); | ||
499 | |||
500 | /* | ||
501 | * Offset of dquot in the (fixed sized) dquot chunk. | ||
502 | */ | ||
503 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * | ||
504 | sizeof(xfs_dqblk_t); | ||
505 | |||
506 | ASSERT(map.br_startblock != DELAYSTARTBLOCK); | ||
507 | if (map.br_startblock == HOLESTARTBLOCK) { | ||
498 | /* | 508 | /* |
499 | * Find the block map; no allocations yet | 509 | * We don't allocate unless we're asked to |
500 | */ | 510 | */ |
501 | error = xfs_bmapi(NULL, quotip, dqp->q_fileoffset, | 511 | if (!(flags & XFS_QMOPT_DQALLOC)) |
502 | XFS_DQUOT_CLUSTER_SIZE_FSB, | 512 | return ENOENT; |
503 | XFS_BMAPI_METADATA, | ||
504 | NULL, 0, &map, &nmaps, NULL); | ||
505 | 513 | ||
506 | xfs_iunlock(quotip, XFS_ILOCK_SHARED); | 514 | ASSERT(tp); |
515 | error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, | ||
516 | dqp->q_fileoffset, &bp); | ||
507 | if (error) | 517 | if (error) |
508 | return (error); | 518 | return error; |
509 | ASSERT(nmaps == 1); | 519 | tp = *tpp; |
510 | ASSERT(map.br_blockcount == 1); | 520 | } else { |
521 | trace_xfs_dqtobp_read(dqp); | ||
511 | 522 | ||
512 | /* | 523 | /* |
513 | * offset of dquot in the (fixed sized) dquot chunk. | 524 | * store the blkno etc so that we don't have to do the |
525 | * mapping all the time | ||
514 | */ | 526 | */ |
515 | dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * | 527 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); |
516 | sizeof(xfs_dqblk_t); | ||
517 | if (map.br_startblock == HOLESTARTBLOCK) { | ||
518 | /* | ||
519 | * We don't allocate unless we're asked to | ||
520 | */ | ||
521 | if (!(flags & XFS_QMOPT_DQALLOC)) | ||
522 | return (ENOENT); | ||
523 | |||
524 | ASSERT(tp); | ||
525 | if ((error = xfs_qm_dqalloc(tpp, mp, dqp, quotip, | ||
526 | dqp->q_fileoffset, &bp))) | ||
527 | return (error); | ||
528 | tp = *tpp; | ||
529 | newdquot = B_TRUE; | ||
530 | } else { | ||
531 | /* | ||
532 | * store the blkno etc so that we don't have to do the | ||
533 | * mapping all the time | ||
534 | */ | ||
535 | dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); | ||
536 | } | ||
537 | } | ||
538 | ASSERT(dqp->q_blkno != DELAYSTARTBLOCK); | ||
539 | ASSERT(dqp->q_blkno != HOLESTARTBLOCK); | ||
540 | |||
541 | /* | ||
542 | * Read in the buffer, unless we've just done the allocation | ||
543 | * (in which case we already have the buf). | ||
544 | */ | ||
545 | if (!newdquot) { | ||
546 | trace_xfs_dqtobp_read(dqp); | ||
547 | 528 | ||
548 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, | 529 | error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, |
549 | dqp->q_blkno, | 530 | dqp->q_blkno, |
@@ -552,13 +533,14 @@ xfs_qm_dqtobp( | |||
552 | if (error || !bp) | 533 | if (error || !bp) |
553 | return XFS_ERROR(error); | 534 | return XFS_ERROR(error); |
554 | } | 535 | } |
536 | |||
555 | ASSERT(XFS_BUF_ISBUSY(bp)); | 537 | ASSERT(XFS_BUF_ISBUSY(bp)); |
556 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | 538 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); |
557 | 539 | ||
558 | /* | 540 | /* |
559 | * calculate the location of the dquot inside the buffer. | 541 | * calculate the location of the dquot inside the buffer. |
560 | */ | 542 | */ |
561 | ddq = (xfs_disk_dquot_t *)((char *)XFS_BUF_PTR(bp) + dqp->q_bufoffset); | 543 | ddq = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); |
562 | 544 | ||
563 | /* | 545 | /* |
564 | * A simple sanity check in case we got a corrupted dquot... | 546 | * A simple sanity check in case we got a corrupted dquot... |
@@ -1176,18 +1158,18 @@ xfs_qm_dqflush( | |||
1176 | xfs_dquot_t *dqp, | 1158 | xfs_dquot_t *dqp, |
1177 | uint flags) | 1159 | uint flags) |
1178 | { | 1160 | { |
1179 | xfs_mount_t *mp; | 1161 | struct xfs_mount *mp = dqp->q_mount; |
1180 | xfs_buf_t *bp; | 1162 | struct xfs_buf *bp; |
1181 | xfs_disk_dquot_t *ddqp; | 1163 | struct xfs_disk_dquot *ddqp; |
1182 | int error; | 1164 | int error; |
1183 | 1165 | ||
1184 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1166 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); |
1185 | ASSERT(!completion_done(&dqp->q_flush)); | 1167 | ASSERT(!completion_done(&dqp->q_flush)); |
1168 | |||
1186 | trace_xfs_dqflush(dqp); | 1169 | trace_xfs_dqflush(dqp); |
1187 | 1170 | ||
1188 | /* | 1171 | /* |
1189 | * If not dirty, or it's pinned and we are not supposed to | 1172 | * If not dirty, or it's pinned and we are not supposed to block, nada. |
1190 | * block, nada. | ||
1191 | */ | 1173 | */ |
1192 | if (!XFS_DQ_IS_DIRTY(dqp) || | 1174 | if (!XFS_DQ_IS_DIRTY(dqp) || |
1193 | (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { | 1175 | (!(flags & SYNC_WAIT) && atomic_read(&dqp->q_pincount) > 0)) { |
@@ -1201,40 +1183,46 @@ xfs_qm_dqflush( | |||
1201 | * down forcibly. If that's the case we must not write this dquot | 1183 | * down forcibly. If that's the case we must not write this dquot |
1202 | * to disk, because the log record didn't make it to disk! | 1184 | * to disk, because the log record didn't make it to disk! |
1203 | */ | 1185 | */ |
1204 | if (XFS_FORCED_SHUTDOWN(dqp->q_mount)) { | 1186 | if (XFS_FORCED_SHUTDOWN(mp)) { |
1205 | dqp->dq_flags &= ~(XFS_DQ_DIRTY); | 1187 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1206 | xfs_dqfunlock(dqp); | 1188 | xfs_dqfunlock(dqp); |
1207 | return XFS_ERROR(EIO); | 1189 | return XFS_ERROR(EIO); |
1208 | } | 1190 | } |
1209 | 1191 | ||
1210 | /* | 1192 | /* |
1211 | * Get the buffer containing the on-disk dquot | 1193 | * Get the buffer containing the on-disk dquot |
1212 | * We don't need a transaction envelope because we know that the | ||
1213 | * the ondisk-dquot has already been allocated for. | ||
1214 | */ | 1194 | */ |
1215 | if ((error = xfs_qm_dqtobp(NULL, dqp, &ddqp, &bp, XFS_QMOPT_DOWARN))) { | 1195 | error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, |
1196 | mp->m_quotainfo->qi_dqchunklen, 0, &bp); | ||
1197 | if (error) { | ||
1216 | ASSERT(error != ENOENT); | 1198 | ASSERT(error != ENOENT); |
1217 | /* | ||
1218 | * Quotas could have gotten turned off (ESRCH) | ||
1219 | */ | ||
1220 | xfs_dqfunlock(dqp); | 1199 | xfs_dqfunlock(dqp); |
1221 | return (error); | 1200 | return error; |
1222 | } | 1201 | } |
1223 | 1202 | ||
1224 | if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), | 1203 | /* |
1225 | 0, XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { | 1204 | * Calculate the location of the dquot inside the buffer. |
1226 | xfs_force_shutdown(dqp->q_mount, SHUTDOWN_CORRUPT_INCORE); | 1205 | */ |
1206 | ddqp = (struct xfs_disk_dquot *)(XFS_BUF_PTR(bp) + dqp->q_bufoffset); | ||
1207 | |||
1208 | /* | ||
1209 | * A simple sanity check in case we got a corrupted dquot.. | ||
1210 | */ | ||
1211 | if (xfs_qm_dqcheck(&dqp->q_core, be32_to_cpu(ddqp->d_id), 0, | ||
1212 | XFS_QMOPT_DOWARN, "dqflush (incore copy)")) { | ||
1213 | xfs_buf_relse(bp); | ||
1214 | xfs_dqfunlock(dqp); | ||
1215 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); | ||
1227 | return XFS_ERROR(EIO); | 1216 | return XFS_ERROR(EIO); |
1228 | } | 1217 | } |
1229 | 1218 | ||
1230 | /* This is the only portion of data that needs to persist */ | 1219 | /* This is the only portion of data that needs to persist */ |
1231 | memcpy(ddqp, &(dqp->q_core), sizeof(xfs_disk_dquot_t)); | 1220 | memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t)); |
1232 | 1221 | ||
1233 | /* | 1222 | /* |
1234 | * Clear the dirty field and remember the flush lsn for later use. | 1223 | * Clear the dirty field and remember the flush lsn for later use. |
1235 | */ | 1224 | */ |
1236 | dqp->dq_flags &= ~(XFS_DQ_DIRTY); | 1225 | dqp->dq_flags &= ~XFS_DQ_DIRTY; |
1237 | mp = dqp->q_mount; | ||
1238 | 1226 | ||
1239 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, | 1227 | xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, |
1240 | &dqp->q_logitem.qli_item.li_lsn); | 1228 | &dqp->q_logitem.qli_item.li_lsn); |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 9a92407109a1..f8e854b4fde8 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
@@ -55,8 +55,6 @@ uint ndquot; | |||
55 | kmem_zone_t *qm_dqzone; | 55 | kmem_zone_t *qm_dqzone; |
56 | kmem_zone_t *qm_dqtrxzone; | 56 | kmem_zone_t *qm_dqtrxzone; |
57 | 57 | ||
58 | static cred_t xfs_zerocr; | ||
59 | |||
60 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); | 58 | STATIC void xfs_qm_list_init(xfs_dqlist_t *, char *, int); |
61 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | 59 | STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); |
62 | 60 | ||
@@ -837,7 +835,7 @@ xfs_qm_dqattach_locked( | |||
837 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, | 835 | xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP, |
838 | flags & XFS_QMOPT_DQALLOC, | 836 | flags & XFS_QMOPT_DQALLOC, |
839 | ip->i_udquot, &ip->i_gdquot) : | 837 | ip->i_udquot, &ip->i_gdquot) : |
840 | xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ, | 838 | xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ, |
841 | flags & XFS_QMOPT_DQALLOC, | 839 | flags & XFS_QMOPT_DQALLOC, |
842 | ip->i_udquot, &ip->i_gdquot); | 840 | ip->i_udquot, &ip->i_gdquot); |
843 | /* | 841 | /* |
@@ -1199,87 +1197,6 @@ xfs_qm_list_destroy( | |||
1199 | mutex_destroy(&(list->qh_lock)); | 1197 | mutex_destroy(&(list->qh_lock)); |
1200 | } | 1198 | } |
1201 | 1199 | ||
1202 | |||
1203 | /* | ||
1204 | * Stripped down version of dqattach. This doesn't attach, or even look at the | ||
1205 | * dquots attached to the inode. The rationale is that there won't be any | ||
1206 | * attached at the time this is called from quotacheck. | ||
1207 | */ | ||
1208 | STATIC int | ||
1209 | xfs_qm_dqget_noattach( | ||
1210 | xfs_inode_t *ip, | ||
1211 | xfs_dquot_t **O_udqpp, | ||
1212 | xfs_dquot_t **O_gdqpp) | ||
1213 | { | ||
1214 | int error; | ||
1215 | xfs_mount_t *mp; | ||
1216 | xfs_dquot_t *udqp, *gdqp; | ||
1217 | |||
1218 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
1219 | mp = ip->i_mount; | ||
1220 | udqp = NULL; | ||
1221 | gdqp = NULL; | ||
1222 | |||
1223 | if (XFS_IS_UQUOTA_ON(mp)) { | ||
1224 | ASSERT(ip->i_udquot == NULL); | ||
1225 | /* | ||
1226 | * We want the dquot allocated if it doesn't exist. | ||
1227 | */ | ||
1228 | if ((error = xfs_qm_dqget(mp, ip, ip->i_d.di_uid, XFS_DQ_USER, | ||
1229 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, | ||
1230 | &udqp))) { | ||
1231 | /* | ||
1232 | * Shouldn't be able to turn off quotas here. | ||
1233 | */ | ||
1234 | ASSERT(error != ESRCH); | ||
1235 | ASSERT(error != ENOENT); | ||
1236 | return error; | ||
1237 | } | ||
1238 | ASSERT(udqp); | ||
1239 | } | ||
1240 | |||
1241 | if (XFS_IS_OQUOTA_ON(mp)) { | ||
1242 | ASSERT(ip->i_gdquot == NULL); | ||
1243 | if (udqp) | ||
1244 | xfs_dqunlock(udqp); | ||
1245 | error = XFS_IS_GQUOTA_ON(mp) ? | ||
1246 | xfs_qm_dqget(mp, ip, | ||
1247 | ip->i_d.di_gid, XFS_DQ_GROUP, | ||
1248 | XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, | ||
1249 | &gdqp) : | ||
1250 | xfs_qm_dqget(mp, ip, | ||
1251 | ip->i_d.di_projid, XFS_DQ_PROJ, | ||
1252 | XFS_QMOPT_DQALLOC|XFS_QMOPT_DOWARN, | ||
1253 | &gdqp); | ||
1254 | if (error) { | ||
1255 | if (udqp) | ||
1256 | xfs_qm_dqrele(udqp); | ||
1257 | ASSERT(error != ESRCH); | ||
1258 | ASSERT(error != ENOENT); | ||
1259 | return error; | ||
1260 | } | ||
1261 | ASSERT(gdqp); | ||
1262 | |||
1263 | /* Reacquire the locks in the right order */ | ||
1264 | if (udqp) { | ||
1265 | if (! xfs_qm_dqlock_nowait(udqp)) { | ||
1266 | xfs_dqunlock(gdqp); | ||
1267 | xfs_dqlock(udqp); | ||
1268 | xfs_dqlock(gdqp); | ||
1269 | } | ||
1270 | } | ||
1271 | } | ||
1272 | |||
1273 | *O_udqpp = udqp; | ||
1274 | *O_gdqpp = gdqp; | ||
1275 | |||
1276 | #ifdef QUOTADEBUG | ||
1277 | if (udqp) ASSERT(XFS_DQ_IS_LOCKED(udqp)); | ||
1278 | if (gdqp) ASSERT(XFS_DQ_IS_LOCKED(gdqp)); | ||
1279 | #endif | ||
1280 | return 0; | ||
1281 | } | ||
1282 | |||
1283 | /* | 1200 | /* |
1284 | * Create an inode and return with a reference already taken, but unlocked | 1201 | * Create an inode and return with a reference already taken, but unlocked |
1285 | * This is how we create quota inodes | 1202 | * This is how we create quota inodes |
@@ -1305,8 +1222,8 @@ xfs_qm_qino_alloc( | |||
1305 | return error; | 1222 | return error; |
1306 | } | 1223 | } |
1307 | 1224 | ||
1308 | if ((error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, | 1225 | error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, &committed); |
1309 | &xfs_zerocr, 0, 1, ip, &committed))) { | 1226 | if (error) { |
1310 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | | 1227 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | |
1311 | XFS_TRANS_ABORT); | 1228 | XFS_TRANS_ABORT); |
1312 | return error; | 1229 | return error; |
@@ -1516,7 +1433,7 @@ xfs_qm_dqiterate( | |||
1516 | rablkcnt = map[i+1].br_blockcount; | 1433 | rablkcnt = map[i+1].br_blockcount; |
1517 | rablkno = map[i+1].br_startblock; | 1434 | rablkno = map[i+1].br_startblock; |
1518 | while (rablkcnt--) { | 1435 | while (rablkcnt--) { |
1519 | xfs_baread(mp->m_ddev_targp, | 1436 | xfs_buf_readahead(mp->m_ddev_targp, |
1520 | XFS_FSB_TO_DADDR(mp, rablkno), | 1437 | XFS_FSB_TO_DADDR(mp, rablkno), |
1521 | mp->m_quotainfo->qi_dqchunklen); | 1438 | mp->m_quotainfo->qi_dqchunklen); |
1522 | rablkno++; | 1439 | rablkno++; |
@@ -1546,18 +1463,34 @@ xfs_qm_dqiterate( | |||
1546 | 1463 | ||
1547 | /* | 1464 | /* |
1548 | * Called by dqusage_adjust in doing a quotacheck. | 1465 | * Called by dqusage_adjust in doing a quotacheck. |
1549 | * Given the inode, and a dquot (either USR or GRP, doesn't matter), | 1466 | * |
1550 | * this updates its incore copy as well as the buffer copy. This is | 1467 | * Given the inode, and a dquot id this updates both the incore dqout as well |
1551 | * so that once the quotacheck is done, we can just log all the buffers, | 1468 | * as the buffer copy. This is so that once the quotacheck is done, we can |
1552 | * as opposed to logging numerous updates to individual dquots. | 1469 | * just log all the buffers, as opposed to logging numerous updates to |
1470 | * individual dquots. | ||
1553 | */ | 1471 | */ |
1554 | STATIC void | 1472 | STATIC int |
1555 | xfs_qm_quotacheck_dqadjust( | 1473 | xfs_qm_quotacheck_dqadjust( |
1556 | xfs_dquot_t *dqp, | 1474 | struct xfs_inode *ip, |
1475 | xfs_dqid_t id, | ||
1476 | uint type, | ||
1557 | xfs_qcnt_t nblks, | 1477 | xfs_qcnt_t nblks, |
1558 | xfs_qcnt_t rtblks) | 1478 | xfs_qcnt_t rtblks) |
1559 | { | 1479 | { |
1560 | ASSERT(XFS_DQ_IS_LOCKED(dqp)); | 1480 | struct xfs_mount *mp = ip->i_mount; |
1481 | struct xfs_dquot *dqp; | ||
1482 | int error; | ||
1483 | |||
1484 | error = xfs_qm_dqget(mp, ip, id, type, | ||
1485 | XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp); | ||
1486 | if (error) { | ||
1487 | /* | ||
1488 | * Shouldn't be able to turn off quotas here. | ||
1489 | */ | ||
1490 | ASSERT(error != ESRCH); | ||
1491 | ASSERT(error != ENOENT); | ||
1492 | return error; | ||
1493 | } | ||
1561 | 1494 | ||
1562 | trace_xfs_dqadjust(dqp); | 1495 | trace_xfs_dqadjust(dqp); |
1563 | 1496 | ||
@@ -1582,11 +1515,13 @@ xfs_qm_quotacheck_dqadjust( | |||
1582 | * There are no timers for the default values set in the root dquot. | 1515 | * There are no timers for the default values set in the root dquot. |
1583 | */ | 1516 | */ |
1584 | if (dqp->q_core.d_id) { | 1517 | if (dqp->q_core.d_id) { |
1585 | xfs_qm_adjust_dqlimits(dqp->q_mount, &dqp->q_core); | 1518 | xfs_qm_adjust_dqlimits(mp, &dqp->q_core); |
1586 | xfs_qm_adjust_dqtimers(dqp->q_mount, &dqp->q_core); | 1519 | xfs_qm_adjust_dqtimers(mp, &dqp->q_core); |
1587 | } | 1520 | } |
1588 | 1521 | ||
1589 | dqp->dq_flags |= XFS_DQ_DIRTY; | 1522 | dqp->dq_flags |= XFS_DQ_DIRTY; |
1523 | xfs_qm_dqput(dqp); | ||
1524 | return 0; | ||
1590 | } | 1525 | } |
1591 | 1526 | ||
1592 | STATIC int | 1527 | STATIC int |
@@ -1629,8 +1564,7 @@ xfs_qm_dqusage_adjust( | |||
1629 | int *res) /* result code value */ | 1564 | int *res) /* result code value */ |
1630 | { | 1565 | { |
1631 | xfs_inode_t *ip; | 1566 | xfs_inode_t *ip; |
1632 | xfs_dquot_t *udqp, *gdqp; | 1567 | xfs_qcnt_t nblks, rtblks = 0; |
1633 | xfs_qcnt_t nblks, rtblks; | ||
1634 | int error; | 1568 | int error; |
1635 | 1569 | ||
1636 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); | 1570 | ASSERT(XFS_IS_QUOTA_RUNNING(mp)); |
@@ -1650,51 +1584,24 @@ xfs_qm_dqusage_adjust( | |||
1650 | * the case in all other instances. It's OK that we do this because | 1584 | * the case in all other instances. It's OK that we do this because |
1651 | * quotacheck is done only at mount time. | 1585 | * quotacheck is done only at mount time. |
1652 | */ | 1586 | */ |
1653 | if ((error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip))) { | 1587 | error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip); |
1588 | if (error) { | ||
1654 | *res = BULKSTAT_RV_NOTHING; | 1589 | *res = BULKSTAT_RV_NOTHING; |
1655 | return error; | 1590 | return error; |
1656 | } | 1591 | } |
1657 | 1592 | ||
1658 | /* | 1593 | ASSERT(ip->i_delayed_blks == 0); |
1659 | * Obtain the locked dquots. In case of an error (eg. allocation | ||
1660 | * fails for ENOSPC), we return the negative of the error number | ||
1661 | * to bulkstat, so that it can get propagated to quotacheck() and | ||
1662 | * making us disable quotas for the file system. | ||
1663 | */ | ||
1664 | if ((error = xfs_qm_dqget_noattach(ip, &udqp, &gdqp))) { | ||
1665 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1666 | IRELE(ip); | ||
1667 | *res = BULKSTAT_RV_GIVEUP; | ||
1668 | return error; | ||
1669 | } | ||
1670 | 1594 | ||
1671 | rtblks = 0; | 1595 | if (XFS_IS_REALTIME_INODE(ip)) { |
1672 | if (! XFS_IS_REALTIME_INODE(ip)) { | ||
1673 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks; | ||
1674 | } else { | ||
1675 | /* | 1596 | /* |
1676 | * Walk thru the extent list and count the realtime blocks. | 1597 | * Walk thru the extent list and count the realtime blocks. |
1677 | */ | 1598 | */ |
1678 | if ((error = xfs_qm_get_rtblks(ip, &rtblks))) { | 1599 | error = xfs_qm_get_rtblks(ip, &rtblks); |
1679 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 1600 | if (error) |
1680 | IRELE(ip); | 1601 | goto error0; |
1681 | if (udqp) | ||
1682 | xfs_qm_dqput(udqp); | ||
1683 | if (gdqp) | ||
1684 | xfs_qm_dqput(gdqp); | ||
1685 | *res = BULKSTAT_RV_GIVEUP; | ||
1686 | return error; | ||
1687 | } | ||
1688 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; | ||
1689 | } | 1602 | } |
1690 | ASSERT(ip->i_delayed_blks == 0); | ||
1691 | 1603 | ||
1692 | /* | 1604 | nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks; |
1693 | * We can't release the inode while holding its dquot locks. | ||
1694 | * The inode can go into inactive and might try to acquire the dquotlocks. | ||
1695 | * So, just unlock here and do a vn_rele at the end. | ||
1696 | */ | ||
1697 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1698 | 1605 | ||
1699 | /* | 1606 | /* |
1700 | * Add the (disk blocks and inode) resources occupied by this | 1607 | * Add the (disk blocks and inode) resources occupied by this |
@@ -1709,26 +1616,36 @@ xfs_qm_dqusage_adjust( | |||
1709 | * and quotaoffs don't race. (Quotachecks happen at mount time only). | 1616 | * and quotaoffs don't race. (Quotachecks happen at mount time only). |
1710 | */ | 1617 | */ |
1711 | if (XFS_IS_UQUOTA_ON(mp)) { | 1618 | if (XFS_IS_UQUOTA_ON(mp)) { |
1712 | ASSERT(udqp); | 1619 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid, |
1713 | xfs_qm_quotacheck_dqadjust(udqp, nblks, rtblks); | 1620 | XFS_DQ_USER, nblks, rtblks); |
1714 | xfs_qm_dqput(udqp); | 1621 | if (error) |
1622 | goto error0; | ||
1715 | } | 1623 | } |
1716 | if (XFS_IS_OQUOTA_ON(mp)) { | 1624 | |
1717 | ASSERT(gdqp); | 1625 | if (XFS_IS_GQUOTA_ON(mp)) { |
1718 | xfs_qm_quotacheck_dqadjust(gdqp, nblks, rtblks); | 1626 | error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid, |
1719 | xfs_qm_dqput(gdqp); | 1627 | XFS_DQ_GROUP, nblks, rtblks); |
1628 | if (error) | ||
1629 | goto error0; | ||
1720 | } | 1630 | } |
1721 | /* | ||
1722 | * Now release the inode. This will send it to 'inactive', and | ||
1723 | * possibly even free blocks. | ||
1724 | */ | ||
1725 | IRELE(ip); | ||
1726 | 1631 | ||
1727 | /* | 1632 | if (XFS_IS_PQUOTA_ON(mp)) { |
1728 | * Goto next inode. | 1633 | error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip), |
1729 | */ | 1634 | XFS_DQ_PROJ, nblks, rtblks); |
1635 | if (error) | ||
1636 | goto error0; | ||
1637 | } | ||
1638 | |||
1639 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1640 | IRELE(ip); | ||
1730 | *res = BULKSTAT_RV_DIDONE; | 1641 | *res = BULKSTAT_RV_DIDONE; |
1731 | return 0; | 1642 | return 0; |
1643 | |||
1644 | error0: | ||
1645 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1646 | IRELE(ip); | ||
1647 | *res = BULKSTAT_RV_GIVEUP; | ||
1648 | return error; | ||
1732 | } | 1649 | } |
1733 | 1650 | ||
1734 | /* | 1651 | /* |
@@ -2224,7 +2141,7 @@ xfs_qm_write_sb_changes( | |||
2224 | 2141 | ||
2225 | 2142 | ||
2226 | /* | 2143 | /* |
2227 | * Given an inode, a uid and gid (from cred_t) make sure that we have | 2144 | * Given an inode, a uid, gid and prid make sure that we have |
2228 | * allocated relevant dquot(s) on disk, and that we won't exceed inode | 2145 | * allocated relevant dquot(s) on disk, and that we won't exceed inode |
2229 | * quotas by creating this file. | 2146 | * quotas by creating this file. |
2230 | * This also attaches dquot(s) to the given inode after locking it, | 2147 | * This also attaches dquot(s) to the given inode after locking it, |
@@ -2332,7 +2249,7 @@ xfs_qm_vop_dqalloc( | |||
2332 | xfs_dqunlock(gq); | 2249 | xfs_dqunlock(gq); |
2333 | } | 2250 | } |
2334 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { | 2251 | } else if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) { |
2335 | if (ip->i_d.di_projid != prid) { | 2252 | if (xfs_get_projid(ip) != prid) { |
2336 | xfs_iunlock(ip, lockflags); | 2253 | xfs_iunlock(ip, lockflags); |
2337 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, | 2254 | if ((error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid, |
2338 | XFS_DQ_PROJ, | 2255 | XFS_DQ_PROJ, |
@@ -2454,7 +2371,7 @@ xfs_qm_vop_chown_reserve( | |||
2454 | } | 2371 | } |
2455 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { | 2372 | if (XFS_IS_OQUOTA_ON(ip->i_mount) && gdqp) { |
2456 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && | 2373 | if (XFS_IS_PQUOTA_ON(ip->i_mount) && |
2457 | ip->i_d.di_projid != be32_to_cpu(gdqp->q_core.d_id)) | 2374 | xfs_get_projid(ip) != be32_to_cpu(gdqp->q_core.d_id)) |
2458 | prjflags = XFS_QMOPT_ENOSPC; | 2375 | prjflags = XFS_QMOPT_ENOSPC; |
2459 | 2376 | ||
2460 | if (prjflags || | 2377 | if (prjflags || |
@@ -2558,7 +2475,7 @@ xfs_qm_vop_create_dqattach( | |||
2558 | ip->i_gdquot = gdqp; | 2475 | ip->i_gdquot = gdqp; |
2559 | ASSERT(XFS_IS_OQUOTA_ON(mp)); | 2476 | ASSERT(XFS_IS_OQUOTA_ON(mp)); |
2560 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? | 2477 | ASSERT((XFS_IS_GQUOTA_ON(mp) ? |
2561 | ip->i_d.di_gid : ip->i_d.di_projid) == | 2478 | ip->i_d.di_gid : xfs_get_projid(ip)) == |
2562 | be32_to_cpu(gdqp->q_core.d_id)); | 2479 | be32_to_cpu(gdqp->q_core.d_id)); |
2563 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); | 2480 | xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1); |
2564 | } | 2481 | } |
diff --git a/fs/xfs/quota/xfs_qm_bhv.c b/fs/xfs/quota/xfs_qm_bhv.c index bea02d786c5d..45b5cb1788ab 100644 --- a/fs/xfs/quota/xfs_qm_bhv.c +++ b/fs/xfs/quota/xfs_qm_bhv.c | |||
@@ -81,7 +81,7 @@ xfs_qm_statvfs( | |||
81 | xfs_mount_t *mp = ip->i_mount; | 81 | xfs_mount_t *mp = ip->i_mount; |
82 | xfs_dquot_t *dqp; | 82 | xfs_dquot_t *dqp; |
83 | 83 | ||
84 | if (!xfs_qm_dqget(mp, NULL, ip->i_d.di_projid, XFS_DQ_PROJ, 0, &dqp)) { | 84 | if (!xfs_qm_dqget(mp, NULL, xfs_get_projid(ip), XFS_DQ_PROJ, 0, &dqp)) { |
85 | xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); | 85 | xfs_fill_statvfs_from_dquot(statp, &dqp->q_core); |
86 | xfs_qm_dqput(dqp); | 86 | xfs_qm_dqput(dqp); |
87 | } | 87 | } |
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c index 45e5849df238..bdebc183223e 100644 --- a/fs/xfs/quota/xfs_qm_syscalls.c +++ b/fs/xfs/quota/xfs_qm_syscalls.c | |||
@@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile( | |||
276 | goto out_unlock; | 276 | goto out_unlock; |
277 | } | 277 | } |
278 | 278 | ||
279 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 279 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
280 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | 280 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); |
281 | 281 | ||
282 | out_unlock: | 282 | out_unlock: |
@@ -875,21 +875,14 @@ xfs_dqrele_inode( | |||
875 | struct xfs_perag *pag, | 875 | struct xfs_perag *pag, |
876 | int flags) | 876 | int flags) |
877 | { | 877 | { |
878 | int error; | ||
879 | |||
880 | /* skip quota inodes */ | 878 | /* skip quota inodes */ |
881 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || | 879 | if (ip == ip->i_mount->m_quotainfo->qi_uquotaip || |
882 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { | 880 | ip == ip->i_mount->m_quotainfo->qi_gquotaip) { |
883 | ASSERT(ip->i_udquot == NULL); | 881 | ASSERT(ip->i_udquot == NULL); |
884 | ASSERT(ip->i_gdquot == NULL); | 882 | ASSERT(ip->i_gdquot == NULL); |
885 | read_unlock(&pag->pag_ici_lock); | ||
886 | return 0; | 883 | return 0; |
887 | } | 884 | } |
888 | 885 | ||
889 | error = xfs_sync_inode_valid(ip, pag); | ||
890 | if (error) | ||
891 | return error; | ||
892 | |||
893 | xfs_ilock(ip, XFS_ILOCK_EXCL); | 886 | xfs_ilock(ip, XFS_ILOCK_EXCL); |
894 | if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { | 887 | if ((flags & XFS_UQUOTA_ACCT) && ip->i_udquot) { |
895 | xfs_qm_dqrele(ip->i_udquot); | 888 | xfs_qm_dqrele(ip->i_udquot); |
@@ -900,8 +893,6 @@ xfs_dqrele_inode( | |||
900 | ip->i_gdquot = NULL; | 893 | ip->i_gdquot = NULL; |
901 | } | 894 | } |
902 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 895 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
903 | |||
904 | IRELE(ip); | ||
905 | return 0; | 896 | return 0; |
906 | } | 897 | } |
907 | 898 | ||
@@ -918,8 +909,7 @@ xfs_qm_dqrele_all_inodes( | |||
918 | uint flags) | 909 | uint flags) |
919 | { | 910 | { |
920 | ASSERT(mp->m_quotainfo); | 911 | ASSERT(mp->m_quotainfo); |
921 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, | 912 | xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags); |
922 | XFS_ICI_NO_TAG, 0, NULL); | ||
923 | } | 913 | } |
924 | 914 | ||
925 | /*------------------------------------------------------------------------*/ | 915 | /*------------------------------------------------------------------------*/ |
@@ -1175,7 +1165,7 @@ xfs_qm_internalqcheck_adjust( | |||
1175 | } | 1165 | } |
1176 | xfs_qm_internalqcheck_get_dquots(mp, | 1166 | xfs_qm_internalqcheck_get_dquots(mp, |
1177 | (xfs_dqid_t) ip->i_d.di_uid, | 1167 | (xfs_dqid_t) ip->i_d.di_uid, |
1178 | (xfs_dqid_t) ip->i_d.di_projid, | 1168 | (xfs_dqid_t) xfs_get_projid(ip), |
1179 | (xfs_dqid_t) ip->i_d.di_gid, | 1169 | (xfs_dqid_t) ip->i_d.di_gid, |
1180 | &ud, &gd); | 1170 | &ud, &gd); |
1181 | if (XFS_IS_UQUOTA_ON(mp)) { | 1171 | if (XFS_IS_UQUOTA_ON(mp)) { |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4917d4eed4ed..63c7a1a6c022 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -230,6 +230,15 @@ typedef struct xfs_perag { | |||
230 | rwlock_t pag_ici_lock; /* incore inode lock */ | 230 | rwlock_t pag_ici_lock; /* incore inode lock */ |
231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ | 231 | struct radix_tree_root pag_ici_root; /* incore inode cache root */ |
232 | int pag_ici_reclaimable; /* reclaimable inodes */ | 232 | int pag_ici_reclaimable; /* reclaimable inodes */ |
233 | struct mutex pag_ici_reclaim_lock; /* serialisation point */ | ||
234 | unsigned long pag_ici_reclaim_cursor; /* reclaim restart point */ | ||
235 | |||
236 | /* buffer cache index */ | ||
237 | spinlock_t pag_buf_lock; /* lock for pag_buf_tree */ | ||
238 | struct rb_root pag_buf_tree; /* ordered tree of active buffers */ | ||
239 | |||
240 | /* for rcu-safe freeing */ | ||
241 | struct rcu_head rcu_head; | ||
233 | #endif | 242 | #endif |
234 | int pagb_count; /* pagb slots in use */ | 243 | int pagb_count; /* pagb slots in use */ |
235 | } xfs_perag_t; | 244 | } xfs_perag_t; |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index af168faccc7a..112abc439ca5 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -675,7 +675,7 @@ xfs_alloc_ag_vextent_near( | |||
675 | xfs_agblock_t gtbnoa; /* aligned ... */ | 675 | xfs_agblock_t gtbnoa; /* aligned ... */ |
676 | xfs_extlen_t gtdiff; /* difference to right side entry */ | 676 | xfs_extlen_t gtdiff; /* difference to right side entry */ |
677 | xfs_extlen_t gtlen; /* length of right side entry */ | 677 | xfs_extlen_t gtlen; /* length of right side entry */ |
678 | xfs_extlen_t gtlena; /* aligned ... */ | 678 | xfs_extlen_t gtlena = 0; /* aligned ... */ |
679 | xfs_agblock_t gtnew; /* useful start bno of right side */ | 679 | xfs_agblock_t gtnew; /* useful start bno of right side */ |
680 | int error; /* error code */ | 680 | int error; /* error code */ |
681 | int i; /* result code, temporary */ | 681 | int i; /* result code, temporary */ |
@@ -684,7 +684,7 @@ xfs_alloc_ag_vextent_near( | |||
684 | xfs_agblock_t ltbnoa; /* aligned ... */ | 684 | xfs_agblock_t ltbnoa; /* aligned ... */ |
685 | xfs_extlen_t ltdiff; /* difference to left side entry */ | 685 | xfs_extlen_t ltdiff; /* difference to left side entry */ |
686 | xfs_extlen_t ltlen; /* length of left side entry */ | 686 | xfs_extlen_t ltlen; /* length of left side entry */ |
687 | xfs_extlen_t ltlena; /* aligned ... */ | 687 | xfs_extlen_t ltlena = 0; /* aligned ... */ |
688 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 688 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
689 | xfs_extlen_t rlen; /* length of returned extent */ | 689 | xfs_extlen_t rlen; /* length of returned extent */ |
690 | #if defined(DEBUG) && defined(__KERNEL__) | 690 | #if defined(DEBUG) && defined(__KERNEL__) |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 97f7328967fd..3916925e2584 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -280,38 +280,6 @@ xfs_allocbt_key_diff( | |||
280 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; | 280 | return (__int64_t)be32_to_cpu(kp->ar_startblock) - rec->ar_startblock; |
281 | } | 281 | } |
282 | 282 | ||
283 | STATIC int | ||
284 | xfs_allocbt_kill_root( | ||
285 | struct xfs_btree_cur *cur, | ||
286 | struct xfs_buf *bp, | ||
287 | int level, | ||
288 | union xfs_btree_ptr *newroot) | ||
289 | { | ||
290 | int error; | ||
291 | |||
292 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
293 | XFS_BTREE_STATS_INC(cur, killroot); | ||
294 | |||
295 | /* | ||
296 | * Update the root pointer, decreasing the level by 1 and then | ||
297 | * free the old root. | ||
298 | */ | ||
299 | xfs_allocbt_set_root(cur, newroot, -1); | ||
300 | error = xfs_allocbt_free_block(cur, bp); | ||
301 | if (error) { | ||
302 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
303 | return error; | ||
304 | } | ||
305 | |||
306 | XFS_BTREE_STATS_INC(cur, free); | ||
307 | |||
308 | xfs_btree_setbuf(cur, level, NULL); | ||
309 | cur->bc_nlevels--; | ||
310 | |||
311 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
312 | return 0; | ||
313 | } | ||
314 | |||
315 | #ifdef DEBUG | 283 | #ifdef DEBUG |
316 | STATIC int | 284 | STATIC int |
317 | xfs_allocbt_keys_inorder( | 285 | xfs_allocbt_keys_inorder( |
@@ -423,7 +391,6 @@ static const struct xfs_btree_ops xfs_allocbt_ops = { | |||
423 | 391 | ||
424 | .dup_cursor = xfs_allocbt_dup_cursor, | 392 | .dup_cursor = xfs_allocbt_dup_cursor, |
425 | .set_root = xfs_allocbt_set_root, | 393 | .set_root = xfs_allocbt_set_root, |
426 | .kill_root = xfs_allocbt_kill_root, | ||
427 | .alloc_block = xfs_allocbt_alloc_block, | 394 | .alloc_block = xfs_allocbt_alloc_block, |
428 | .free_block = xfs_allocbt_free_block, | 395 | .free_block = xfs_allocbt_free_block, |
429 | .update_lastrec = xfs_allocbt_update_lastrec, | 396 | .update_lastrec = xfs_allocbt_update_lastrec, |
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index c2568242a901..c86375378810 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c | |||
@@ -355,16 +355,15 @@ xfs_attr_set_int( | |||
355 | if (mp->m_flags & XFS_MOUNT_WSYNC) { | 355 | if (mp->m_flags & XFS_MOUNT_WSYNC) { |
356 | xfs_trans_set_sync(args.trans); | 356 | xfs_trans_set_sync(args.trans); |
357 | } | 357 | } |
358 | |||
359 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
360 | xfs_trans_ichgtime(args.trans, dp, | ||
361 | XFS_ICHGTIME_CHG); | ||
362 | } | ||
358 | err2 = xfs_trans_commit(args.trans, | 363 | err2 = xfs_trans_commit(args.trans, |
359 | XFS_TRANS_RELEASE_LOG_RES); | 364 | XFS_TRANS_RELEASE_LOG_RES); |
360 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 365 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
361 | 366 | ||
362 | /* | ||
363 | * Hit the inode change time. | ||
364 | */ | ||
365 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
366 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
367 | } | ||
368 | return(error == 0 ? err2 : error); | 367 | return(error == 0 ? err2 : error); |
369 | } | 368 | } |
370 | 369 | ||
@@ -420,6 +419,9 @@ xfs_attr_set_int( | |||
420 | xfs_trans_set_sync(args.trans); | 419 | xfs_trans_set_sync(args.trans); |
421 | } | 420 | } |
422 | 421 | ||
422 | if ((flags & ATTR_KERNOTIME) == 0) | ||
423 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
424 | |||
423 | /* | 425 | /* |
424 | * Commit the last in the sequence of transactions. | 426 | * Commit the last in the sequence of transactions. |
425 | */ | 427 | */ |
@@ -427,13 +429,6 @@ xfs_attr_set_int( | |||
427 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | 429 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); |
428 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 430 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
429 | 431 | ||
430 | /* | ||
431 | * Hit the inode change time. | ||
432 | */ | ||
433 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
434 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
435 | } | ||
436 | |||
437 | return(error); | 432 | return(error); |
438 | 433 | ||
439 | out: | 434 | out: |
@@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
567 | xfs_trans_set_sync(args.trans); | 562 | xfs_trans_set_sync(args.trans); |
568 | } | 563 | } |
569 | 564 | ||
565 | if ((flags & ATTR_KERNOTIME) == 0) | ||
566 | xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); | ||
567 | |||
570 | /* | 568 | /* |
571 | * Commit the last in the sequence of transactions. | 569 | * Commit the last in the sequence of transactions. |
572 | */ | 570 | */ |
@@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) | |||
574 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); | 572 | error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES); |
575 | xfs_iunlock(dp, XFS_ILOCK_EXCL); | 573 | xfs_iunlock(dp, XFS_ILOCK_EXCL); |
576 | 574 | ||
577 | /* | ||
578 | * Hit the inode change time. | ||
579 | */ | ||
580 | if (!error && (flags & ATTR_KERNOTIME) == 0) { | ||
581 | xfs_ichgtime(dp, XFS_ICHGTIME_CHG); | ||
582 | } | ||
583 | |||
584 | return(error); | 575 | return(error); |
585 | 576 | ||
586 | out: | 577 | out: |
@@ -1995,7 +1986,7 @@ xfs_attr_rmtval_get(xfs_da_args_t *args) | |||
1995 | 1986 | ||
1996 | tmp = (valuelen < XFS_BUF_SIZE(bp)) | 1987 | tmp = (valuelen < XFS_BUF_SIZE(bp)) |
1997 | ? valuelen : XFS_BUF_SIZE(bp); | 1988 | ? valuelen : XFS_BUF_SIZE(bp); |
1998 | xfs_biomove(bp, 0, tmp, dst, XBF_READ); | 1989 | xfs_buf_iomove(bp, 0, tmp, dst, XBRW_READ); |
1999 | xfs_buf_relse(bp); | 1990 | xfs_buf_relse(bp); |
2000 | dst += tmp; | 1991 | dst += tmp; |
2001 | valuelen -= tmp; | 1992 | valuelen -= tmp; |
@@ -2125,9 +2116,9 @@ xfs_attr_rmtval_set(xfs_da_args_t *args) | |||
2125 | 2116 | ||
2126 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : | 2117 | tmp = (valuelen < XFS_BUF_SIZE(bp)) ? valuelen : |
2127 | XFS_BUF_SIZE(bp); | 2118 | XFS_BUF_SIZE(bp); |
2128 | xfs_biomove(bp, 0, tmp, src, XBF_WRITE); | 2119 | xfs_buf_iomove(bp, 0, tmp, src, XBRW_WRITE); |
2129 | if (tmp < XFS_BUF_SIZE(bp)) | 2120 | if (tmp < XFS_BUF_SIZE(bp)) |
2130 | xfs_biozero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); | 2121 | xfs_buf_zero(bp, tmp, XFS_BUF_SIZE(bp) - tmp); |
2131 | if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ | 2122 | if ((error = xfs_bwrite(mp, bp))) {/* GROT: NOTE: synchronous write */ |
2132 | return (error); | 2123 | return (error); |
2133 | } | 2124 | } |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f90dadd5a968..8abd12e32e13 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -614,7 +614,7 @@ xfs_bmap_add_extent( | |||
614 | nblks += cur->bc_private.b.allocated; | 614 | nblks += cur->bc_private.b.allocated; |
615 | ASSERT(nblks <= da_old); | 615 | ASSERT(nblks <= da_old); |
616 | if (nblks < da_old) | 616 | if (nblks < da_old) |
617 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, | 617 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
618 | (int64_t)(da_old - nblks), rsvd); | 618 | (int64_t)(da_old - nblks), rsvd); |
619 | } | 619 | } |
620 | /* | 620 | /* |
@@ -1079,7 +1079,8 @@ xfs_bmap_add_extent_delay_real( | |||
1079 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - | 1079 | diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) - |
1080 | (cur ? cur->bc_private.b.allocated : 0)); | 1080 | (cur ? cur->bc_private.b.allocated : 0)); |
1081 | if (diff > 0 && | 1081 | if (diff > 0 && |
1082 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) { | 1082 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1083 | -((int64_t)diff), rsvd)) { | ||
1083 | /* | 1084 | /* |
1084 | * Ick gross gag me with a spoon. | 1085 | * Ick gross gag me with a spoon. |
1085 | */ | 1086 | */ |
@@ -1089,16 +1090,18 @@ xfs_bmap_add_extent_delay_real( | |||
1089 | temp--; | 1090 | temp--; |
1090 | diff--; | 1091 | diff--; |
1091 | if (!diff || | 1092 | if (!diff || |
1092 | !xfs_mod_incore_sb(ip->i_mount, | 1093 | !xfs_icsb_modify_counters(ip->i_mount, |
1093 | XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) | 1094 | XFS_SBS_FDBLOCKS, |
1095 | -((int64_t)diff), rsvd)) | ||
1094 | break; | 1096 | break; |
1095 | } | 1097 | } |
1096 | if (temp2) { | 1098 | if (temp2) { |
1097 | temp2--; | 1099 | temp2--; |
1098 | diff--; | 1100 | diff--; |
1099 | if (!diff || | 1101 | if (!diff || |
1100 | !xfs_mod_incore_sb(ip->i_mount, | 1102 | !xfs_icsb_modify_counters(ip->i_mount, |
1101 | XFS_SBS_FDBLOCKS, -((int64_t)diff), rsvd)) | 1103 | XFS_SBS_FDBLOCKS, |
1104 | -((int64_t)diff), rsvd)) | ||
1102 | break; | 1105 | break; |
1103 | } | 1106 | } |
1104 | } | 1107 | } |
@@ -1766,7 +1769,7 @@ xfs_bmap_add_extent_hole_delay( | |||
1766 | } | 1769 | } |
1767 | if (oldlen != newlen) { | 1770 | if (oldlen != newlen) { |
1768 | ASSERT(oldlen > newlen); | 1771 | ASSERT(oldlen > newlen); |
1769 | xfs_mod_incore_sb(ip->i_mount, XFS_SBS_FDBLOCKS, | 1772 | xfs_icsb_modify_counters(ip->i_mount, XFS_SBS_FDBLOCKS, |
1770 | (int64_t)(oldlen - newlen), rsvd); | 1773 | (int64_t)(oldlen - newlen), rsvd); |
1771 | /* | 1774 | /* |
1772 | * Nothing to do for disk quota accounting here. | 1775 | * Nothing to do for disk quota accounting here. |
@@ -3111,9 +3114,10 @@ xfs_bmap_del_extent( | |||
3111 | * Nothing to do for disk quota accounting here. | 3114 | * Nothing to do for disk quota accounting here. |
3112 | */ | 3115 | */ |
3113 | ASSERT(da_old >= da_new); | 3116 | ASSERT(da_old >= da_new); |
3114 | if (da_old > da_new) | 3117 | if (da_old > da_new) { |
3115 | xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, (int64_t)(da_old - da_new), | 3118 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
3116 | rsvd); | 3119 | (int64_t)(da_old - da_new), rsvd); |
3120 | } | ||
3117 | done: | 3121 | done: |
3118 | *logflagsp = flags; | 3122 | *logflagsp = flags; |
3119 | return error; | 3123 | return error; |
@@ -4526,13 +4530,13 @@ xfs_bmapi( | |||
4526 | -((int64_t)extsz), (flags & | 4530 | -((int64_t)extsz), (flags & |
4527 | XFS_BMAPI_RSVBLOCKS)); | 4531 | XFS_BMAPI_RSVBLOCKS)); |
4528 | } else { | 4532 | } else { |
4529 | error = xfs_mod_incore_sb(mp, | 4533 | error = xfs_icsb_modify_counters(mp, |
4530 | XFS_SBS_FDBLOCKS, | 4534 | XFS_SBS_FDBLOCKS, |
4531 | -((int64_t)alen), (flags & | 4535 | -((int64_t)alen), (flags & |
4532 | XFS_BMAPI_RSVBLOCKS)); | 4536 | XFS_BMAPI_RSVBLOCKS)); |
4533 | } | 4537 | } |
4534 | if (!error) { | 4538 | if (!error) { |
4535 | error = xfs_mod_incore_sb(mp, | 4539 | error = xfs_icsb_modify_counters(mp, |
4536 | XFS_SBS_FDBLOCKS, | 4540 | XFS_SBS_FDBLOCKS, |
4537 | -((int64_t)indlen), (flags & | 4541 | -((int64_t)indlen), (flags & |
4538 | XFS_BMAPI_RSVBLOCKS)); | 4542 | XFS_BMAPI_RSVBLOCKS)); |
@@ -4542,7 +4546,7 @@ xfs_bmapi( | |||
4542 | (int64_t)extsz, (flags & | 4546 | (int64_t)extsz, (flags & |
4543 | XFS_BMAPI_RSVBLOCKS)); | 4547 | XFS_BMAPI_RSVBLOCKS)); |
4544 | else if (error) | 4548 | else if (error) |
4545 | xfs_mod_incore_sb(mp, | 4549 | xfs_icsb_modify_counters(mp, |
4546 | XFS_SBS_FDBLOCKS, | 4550 | XFS_SBS_FDBLOCKS, |
4547 | (int64_t)alen, (flags & | 4551 | (int64_t)alen, (flags & |
4548 | XFS_BMAPI_RSVBLOCKS)); | 4552 | XFS_BMAPI_RSVBLOCKS)); |
@@ -4744,8 +4748,12 @@ xfs_bmapi( | |||
4744 | * Check if writing previously allocated but | 4748 | * Check if writing previously allocated but |
4745 | * unwritten extents. | 4749 | * unwritten extents. |
4746 | */ | 4750 | */ |
4747 | if (wr && mval->br_state == XFS_EXT_UNWRITTEN && | 4751 | if (wr && |
4748 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) { | 4752 | ((mval->br_state == XFS_EXT_UNWRITTEN && |
4753 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_DELAY)) == 0)) || | ||
4754 | (mval->br_state == XFS_EXT_NORM && | ||
4755 | ((flags & (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT)) == | ||
4756 | (XFS_BMAPI_PREALLOC|XFS_BMAPI_CONVERT))))) { | ||
4749 | /* | 4757 | /* |
4750 | * Modify (by adding) the state flag, if writing. | 4758 | * Modify (by adding) the state flag, if writing. |
4751 | */ | 4759 | */ |
@@ -4757,7 +4765,9 @@ xfs_bmapi( | |||
4757 | *firstblock; | 4765 | *firstblock; |
4758 | cur->bc_private.b.flist = flist; | 4766 | cur->bc_private.b.flist = flist; |
4759 | } | 4767 | } |
4760 | mval->br_state = XFS_EXT_NORM; | 4768 | mval->br_state = (mval->br_state == XFS_EXT_UNWRITTEN) |
4769 | ? XFS_EXT_NORM | ||
4770 | : XFS_EXT_UNWRITTEN; | ||
4761 | error = xfs_bmap_add_extent(ip, lastx, &cur, mval, | 4771 | error = xfs_bmap_add_extent(ip, lastx, &cur, mval, |
4762 | firstblock, flist, &tmp_logflags, | 4772 | firstblock, flist, &tmp_logflags, |
4763 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); | 4773 | whichfork, (flags & XFS_BMAPI_RSVBLOCKS)); |
@@ -5200,7 +5210,7 @@ xfs_bunmapi( | |||
5200 | ip, -((long)del.br_blockcount), 0, | 5210 | ip, -((long)del.br_blockcount), 0, |
5201 | XFS_QMOPT_RES_RTBLKS); | 5211 | XFS_QMOPT_RES_RTBLKS); |
5202 | } else { | 5212 | } else { |
5203 | xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, | 5213 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
5204 | (int64_t)del.br_blockcount, rsvd); | 5214 | (int64_t)del.br_blockcount, rsvd); |
5205 | (void)xfs_trans_reserve_quota_nblks(NULL, | 5215 | (void)xfs_trans_reserve_quota_nblks(NULL, |
5206 | ip, -((long)del.br_blockcount), 0, | 5216 | ip, -((long)del.br_blockcount), 0, |
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index b13569a6179b..71ec9b6ecdfc 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -74,9 +74,12 @@ typedef struct xfs_bmap_free | |||
74 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ | 74 | #define XFS_BMAPI_IGSTATE 0x080 /* Ignore state - */ |
75 | /* combine contig. space */ | 75 | /* combine contig. space */ |
76 | #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ | 76 | #define XFS_BMAPI_CONTIG 0x100 /* must allocate only one extent */ |
77 | #define XFS_BMAPI_CONVERT 0x200 /* unwritten extent conversion - */ | 77 | /* |
78 | /* need write cache flushing and no */ | 78 | * unwritten extent conversion - this needs write cache flushing and no additional |
79 | /* additional allocation alignments */ | 79 | * allocation alignments. When specified with XFS_BMAPI_PREALLOC it converts |
80 | * from written to unwritten, otherwise convert from unwritten to written. | ||
81 | */ | ||
82 | #define XFS_BMAPI_CONVERT 0x200 | ||
80 | 83 | ||
81 | #define XFS_BMAPI_FLAGS \ | 84 | #define XFS_BMAPI_FLAGS \ |
82 | { XFS_BMAPI_WRITE, "WRITE" }, \ | 85 | { XFS_BMAPI_WRITE, "WRITE" }, \ |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 829af92f0fba..04f9cca8da7e 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -217,7 +217,7 @@ xfs_btree_del_cursor( | |||
217 | */ | 217 | */ |
218 | for (i = 0; i < cur->bc_nlevels; i++) { | 218 | for (i = 0; i < cur->bc_nlevels; i++) { |
219 | if (cur->bc_bufs[i]) | 219 | if (cur->bc_bufs[i]) |
220 | xfs_btree_setbuf(cur, i, NULL); | 220 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); |
221 | else if (!error) | 221 | else if (!error) |
222 | break; | 222 | break; |
223 | } | 223 | } |
@@ -656,7 +656,7 @@ xfs_btree_reada_bufl( | |||
656 | 656 | ||
657 | ASSERT(fsbno != NULLFSBLOCK); | 657 | ASSERT(fsbno != NULLFSBLOCK); |
658 | d = XFS_FSB_TO_DADDR(mp, fsbno); | 658 | d = XFS_FSB_TO_DADDR(mp, fsbno); |
659 | xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); | 659 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); |
660 | } | 660 | } |
661 | 661 | ||
662 | /* | 662 | /* |
@@ -676,7 +676,7 @@ xfs_btree_reada_bufs( | |||
676 | ASSERT(agno != NULLAGNUMBER); | 676 | ASSERT(agno != NULLAGNUMBER); |
677 | ASSERT(agbno != NULLAGBLOCK); | 677 | ASSERT(agbno != NULLAGBLOCK); |
678 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); | 678 | d = XFS_AGB_TO_DADDR(mp, agno, agbno); |
679 | xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); | 679 | xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); |
680 | } | 680 | } |
681 | 681 | ||
682 | STATIC int | 682 | STATIC int |
@@ -763,22 +763,19 @@ xfs_btree_readahead( | |||
763 | * Set the buffer for level "lev" in the cursor to bp, releasing | 763 | * Set the buffer for level "lev" in the cursor to bp, releasing |
764 | * any previous buffer. | 764 | * any previous buffer. |
765 | */ | 765 | */ |
766 | void | 766 | STATIC void |
767 | xfs_btree_setbuf( | 767 | xfs_btree_setbuf( |
768 | xfs_btree_cur_t *cur, /* btree cursor */ | 768 | xfs_btree_cur_t *cur, /* btree cursor */ |
769 | int lev, /* level in btree */ | 769 | int lev, /* level in btree */ |
770 | xfs_buf_t *bp) /* new buffer to set */ | 770 | xfs_buf_t *bp) /* new buffer to set */ |
771 | { | 771 | { |
772 | struct xfs_btree_block *b; /* btree block */ | 772 | struct xfs_btree_block *b; /* btree block */ |
773 | xfs_buf_t *obp; /* old buffer pointer */ | ||
774 | 773 | ||
775 | obp = cur->bc_bufs[lev]; | 774 | if (cur->bc_bufs[lev]) |
776 | if (obp) | 775 | xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); |
777 | xfs_trans_brelse(cur->bc_tp, obp); | ||
778 | cur->bc_bufs[lev] = bp; | 776 | cur->bc_bufs[lev] = bp; |
779 | cur->bc_ra[lev] = 0; | 777 | cur->bc_ra[lev] = 0; |
780 | if (!bp) | 778 | |
781 | return; | ||
782 | b = XFS_BUF_TO_BLOCK(bp); | 779 | b = XFS_BUF_TO_BLOCK(bp); |
783 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { | 780 | if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { |
784 | if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) | 781 | if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) |
@@ -3011,6 +3008,43 @@ out0: | |||
3011 | return 0; | 3008 | return 0; |
3012 | } | 3009 | } |
3013 | 3010 | ||
3011 | /* | ||
3012 | * Kill the current root node, and replace it with it's only child node. | ||
3013 | */ | ||
3014 | STATIC int | ||
3015 | xfs_btree_kill_root( | ||
3016 | struct xfs_btree_cur *cur, | ||
3017 | struct xfs_buf *bp, | ||
3018 | int level, | ||
3019 | union xfs_btree_ptr *newroot) | ||
3020 | { | ||
3021 | int error; | ||
3022 | |||
3023 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
3024 | XFS_BTREE_STATS_INC(cur, killroot); | ||
3025 | |||
3026 | /* | ||
3027 | * Update the root pointer, decreasing the level by 1 and then | ||
3028 | * free the old root. | ||
3029 | */ | ||
3030 | cur->bc_ops->set_root(cur, newroot, -1); | ||
3031 | |||
3032 | error = cur->bc_ops->free_block(cur, bp); | ||
3033 | if (error) { | ||
3034 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
3035 | return error; | ||
3036 | } | ||
3037 | |||
3038 | XFS_BTREE_STATS_INC(cur, free); | ||
3039 | |||
3040 | cur->bc_bufs[level] = NULL; | ||
3041 | cur->bc_ra[level] = 0; | ||
3042 | cur->bc_nlevels--; | ||
3043 | |||
3044 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
3045 | return 0; | ||
3046 | } | ||
3047 | |||
3014 | STATIC int | 3048 | STATIC int |
3015 | xfs_btree_dec_cursor( | 3049 | xfs_btree_dec_cursor( |
3016 | struct xfs_btree_cur *cur, | 3050 | struct xfs_btree_cur *cur, |
@@ -3195,7 +3229,7 @@ xfs_btree_delrec( | |||
3195 | * Make it the new root of the btree. | 3229 | * Make it the new root of the btree. |
3196 | */ | 3230 | */ |
3197 | pp = xfs_btree_ptr_addr(cur, 1, block); | 3231 | pp = xfs_btree_ptr_addr(cur, 1, block); |
3198 | error = cur->bc_ops->kill_root(cur, bp, level, pp); | 3232 | error = xfs_btree_kill_root(cur, bp, level, pp); |
3199 | if (error) | 3233 | if (error) |
3200 | goto error0; | 3234 | goto error0; |
3201 | } else if (level > 0) { | 3235 | } else if (level > 0) { |
diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 7fa07062bdda..82fafc66bd1f 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h | |||
@@ -152,9 +152,7 @@ struct xfs_btree_ops { | |||
152 | 152 | ||
153 | /* update btree root pointer */ | 153 | /* update btree root pointer */ |
154 | void (*set_root)(struct xfs_btree_cur *cur, | 154 | void (*set_root)(struct xfs_btree_cur *cur, |
155 | union xfs_btree_ptr *nptr, int level_change); | 155 | union xfs_btree_ptr *nptr, int level_change); |
156 | int (*kill_root)(struct xfs_btree_cur *cur, struct xfs_buf *bp, | ||
157 | int level, union xfs_btree_ptr *newroot); | ||
158 | 156 | ||
159 | /* block allocation / freeing */ | 157 | /* block allocation / freeing */ |
160 | int (*alloc_block)(struct xfs_btree_cur *cur, | 158 | int (*alloc_block)(struct xfs_btree_cur *cur, |
@@ -399,16 +397,6 @@ xfs_btree_reada_bufs( | |||
399 | xfs_agblock_t agbno, /* allocation group block number */ | 397 | xfs_agblock_t agbno, /* allocation group block number */ |
400 | xfs_extlen_t count); /* count of filesystem blocks */ | 398 | xfs_extlen_t count); /* count of filesystem blocks */ |
401 | 399 | ||
402 | /* | ||
403 | * Set the buffer for level "lev" in the cursor to bp, releasing | ||
404 | * any previous buffer. | ||
405 | */ | ||
406 | void | ||
407 | xfs_btree_setbuf( | ||
408 | xfs_btree_cur_t *cur, /* btree cursor */ | ||
409 | int lev, /* level in btree */ | ||
410 | struct xfs_buf *bp); /* new buffer to set */ | ||
411 | |||
412 | 400 | ||
413 | /* | 401 | /* |
414 | * Common btree core entry points. | 402 | * Common btree core entry points. |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 1b09d7a280df..2686d0d54c5b 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -692,8 +692,7 @@ xfs_buf_item_init( | |||
692 | * the first. If we do already have one, there is | 692 | * the first. If we do already have one, there is |
693 | * nothing to do here so return. | 693 | * nothing to do here so return. |
694 | */ | 694 | */ |
695 | if (bp->b_mount != mp) | 695 | ASSERT(bp->b_target->bt_mount == mp); |
696 | bp->b_mount = mp; | ||
697 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { | 696 | if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) { |
698 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); | 697 | lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *); |
699 | if (lip->li_type == XFS_LI_BUF) { | 698 | if (lip->li_type == XFS_LI_BUF) { |
@@ -974,7 +973,7 @@ xfs_buf_iodone_callbacks( | |||
974 | xfs_buf_do_callbacks(bp, lip); | 973 | xfs_buf_do_callbacks(bp, lip); |
975 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 974 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
976 | XFS_BUF_CLR_IODONE_FUNC(bp); | 975 | XFS_BUF_CLR_IODONE_FUNC(bp); |
977 | xfs_biodone(bp); | 976 | xfs_buf_ioend(bp, 0); |
978 | return; | 977 | return; |
979 | } | 978 | } |
980 | 979 | ||
@@ -1033,7 +1032,7 @@ xfs_buf_iodone_callbacks( | |||
1033 | xfs_buf_do_callbacks(bp, lip); | 1032 | xfs_buf_do_callbacks(bp, lip); |
1034 | XFS_BUF_SET_FSPRIVATE(bp, NULL); | 1033 | XFS_BUF_SET_FSPRIVATE(bp, NULL); |
1035 | XFS_BUF_CLR_IODONE_FUNC(bp); | 1034 | XFS_BUF_CLR_IODONE_FUNC(bp); |
1036 | xfs_biodone(bp); | 1035 | xfs_buf_ioend(bp, 0); |
1037 | } | 1036 | } |
1038 | 1037 | ||
1039 | /* | 1038 | /* |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 30fa0e206fba..1c00bedb3175 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -2042,7 +2042,7 @@ xfs_da_do_buf( | |||
2042 | mappedbno, nmapped, 0, &bp); | 2042 | mappedbno, nmapped, 0, &bp); |
2043 | break; | 2043 | break; |
2044 | case 3: | 2044 | case 3: |
2045 | xfs_baread(mp->m_ddev_targp, mappedbno, nmapped); | 2045 | xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped); |
2046 | error = 0; | 2046 | error = 0; |
2047 | bp = NULL; | 2047 | bp = NULL; |
2048 | break; | 2048 | break; |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5b153b2e6a3..dffba9ba0db6 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -49,8 +49,9 @@ typedef struct xfs_dinode { | |||
49 | __be32 di_uid; /* owner's user id */ | 49 | __be32 di_uid; /* owner's user id */ |
50 | __be32 di_gid; /* owner's group id */ | 50 | __be32 di_gid; /* owner's group id */ |
51 | __be32 di_nlink; /* number of links to file */ | 51 | __be32 di_nlink; /* number of links to file */ |
52 | __be16 di_projid; /* owner's project id */ | 52 | __be16 di_projid_lo; /* lower part of owner's project id */ |
53 | __u8 di_pad[8]; /* unused, zeroed space */ | 53 | __be16 di_projid_hi; /* higher part owner's project id */ |
54 | __u8 di_pad[6]; /* unused, zeroed space */ | ||
54 | __be16 di_flushiter; /* incremented on flush */ | 55 | __be16 di_flushiter; /* incremented on flush */ |
55 | xfs_timestamp_t di_atime; /* time last accessed */ | 56 | xfs_timestamp_t di_atime; /* time last accessed */ |
56 | xfs_timestamp_t di_mtime; /* time last modified */ | 57 | xfs_timestamp_t di_mtime; /* time last modified */ |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 504be8640e91..ae891223be90 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -961,7 +961,7 @@ xfs_dir2_leaf_getdents( | |||
961 | if (i > ra_current && | 961 | if (i > ra_current && |
962 | map[ra_index].br_blockcount >= | 962 | map[ra_index].br_blockcount >= |
963 | mp->m_dirblkfsbs) { | 963 | mp->m_dirblkfsbs) { |
964 | xfs_baread(mp->m_ddev_targp, | 964 | xfs_buf_readahead(mp->m_ddev_targp, |
965 | XFS_FSB_TO_DADDR(mp, | 965 | XFS_FSB_TO_DADDR(mp, |
966 | map[ra_index].br_startblock + | 966 | map[ra_index].br_startblock + |
967 | ra_offset), | 967 | ra_offset), |
diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 87c2e9d02288..8f6fc1a96386 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h | |||
@@ -293,9 +293,11 @@ typedef struct xfs_bstat { | |||
293 | __s32 bs_extsize; /* extent size */ | 293 | __s32 bs_extsize; /* extent size */ |
294 | __s32 bs_extents; /* number of extents */ | 294 | __s32 bs_extents; /* number of extents */ |
295 | __u32 bs_gen; /* generation count */ | 295 | __u32 bs_gen; /* generation count */ |
296 | __u16 bs_projid; /* project id */ | 296 | __u16 bs_projid_lo; /* lower part of project id */ |
297 | #define bs_projid bs_projid_lo /* (previously just bs_projid) */ | ||
297 | __u16 bs_forkoff; /* inode fork offset in bytes */ | 298 | __u16 bs_forkoff; /* inode fork offset in bytes */ |
298 | unsigned char bs_pad[12]; /* pad space, unused */ | 299 | __u16 bs_projid_hi; /* higher part of project id */ |
300 | unsigned char bs_pad[10]; /* pad space, unused */ | ||
299 | __u32 bs_dmevmask; /* DMIG event mask */ | 301 | __u32 bs_dmevmask; /* DMIG event mask */ |
300 | __u16 bs_dmstate; /* DMIG state info */ | 302 | __u16 bs_dmstate; /* DMIG state info */ |
301 | __u16 bs_aextents; /* attribute number of extents */ | 303 | __u16 bs_aextents; /* attribute number of extents */ |
@@ -448,6 +450,7 @@ typedef struct xfs_handle { | |||
448 | /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ | 450 | /* XFS_IOC_SETBIOSIZE ---- deprecated 46 */ |
449 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ | 451 | /* XFS_IOC_GETBIOSIZE ---- deprecated 47 */ |
450 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) | 452 | #define XFS_IOC_GETBMAPX _IOWR('X', 56, struct getbmap) |
453 | #define XFS_IOC_ZERO_RANGE _IOW ('X', 57, struct xfs_flock64) | ||
451 | 454 | ||
452 | /* | 455 | /* |
453 | * ioctl commands that replace IRIX syssgi()'s | 456 | * ioctl commands that replace IRIX syssgi()'s |
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 43b1d5699335..a7c116e814af 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c | |||
@@ -144,12 +144,11 @@ xfs_growfs_data_private( | |||
144 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) | 144 | if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb))) |
145 | return error; | 145 | return error; |
146 | dpct = pct - mp->m_sb.sb_imax_pct; | 146 | dpct = pct - mp->m_sb.sb_imax_pct; |
147 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 147 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), | 148 | XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1), |
149 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 149 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); |
150 | if (error) | 150 | if (!bp) |
151 | return error; | 151 | return EIO; |
152 | ASSERT(bp); | ||
153 | xfs_buf_relse(bp); | 152 | xfs_buf_relse(bp); |
154 | 153 | ||
155 | new = nb; /* use new as a temporary here */ | 154 | new = nb; /* use new as a temporary here */ |
@@ -597,7 +596,8 @@ out: | |||
597 | * the extra reserve blocks from the reserve..... | 596 | * the extra reserve blocks from the reserve..... |
598 | */ | 597 | */ |
599 | int error; | 598 | int error; |
600 | error = xfs_mod_incore_sb(mp, XFS_SBS_FDBLOCKS, fdblks_delta, 0); | 599 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
600 | fdblks_delta, 0); | ||
601 | if (error == ENOSPC) | 601 | if (error == ENOSPC) |
602 | goto retry; | 602 | goto retry; |
603 | } | 603 | } |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5371d2dc360e..0626a32c3447 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -212,7 +212,7 @@ xfs_ialloc_inode_init( | |||
212 | * to log a whole cluster of inodes instead of all the | 212 | * to log a whole cluster of inodes instead of all the |
213 | * individual transactions causing a lot of log traffic. | 213 | * individual transactions causing a lot of log traffic. |
214 | */ | 214 | */ |
215 | xfs_biozero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); | 215 | xfs_buf_zero(fbuf, 0, ninodes << mp->m_sb.sb_inodelog); |
216 | for (i = 0; i < ninodes; i++) { | 216 | for (i = 0; i < ninodes; i++) { |
217 | int ioffset = i << mp->m_sb.sb_inodelog; | 217 | int ioffset = i << mp->m_sb.sb_inodelog; |
218 | uint isize = sizeof(struct xfs_dinode); | 218 | uint isize = sizeof(struct xfs_dinode); |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index d352862cefa0..16921f55c542 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -183,38 +183,6 @@ xfs_inobt_key_diff( | |||
183 | cur->bc_rec.i.ir_startino; | 183 | cur->bc_rec.i.ir_startino; |
184 | } | 184 | } |
185 | 185 | ||
186 | STATIC int | ||
187 | xfs_inobt_kill_root( | ||
188 | struct xfs_btree_cur *cur, | ||
189 | struct xfs_buf *bp, | ||
190 | int level, | ||
191 | union xfs_btree_ptr *newroot) | ||
192 | { | ||
193 | int error; | ||
194 | |||
195 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); | ||
196 | XFS_BTREE_STATS_INC(cur, killroot); | ||
197 | |||
198 | /* | ||
199 | * Update the root pointer, decreasing the level by 1 and then | ||
200 | * free the old root. | ||
201 | */ | ||
202 | xfs_inobt_set_root(cur, newroot, -1); | ||
203 | error = xfs_inobt_free_block(cur, bp); | ||
204 | if (error) { | ||
205 | XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); | ||
206 | return error; | ||
207 | } | ||
208 | |||
209 | XFS_BTREE_STATS_INC(cur, free); | ||
210 | |||
211 | cur->bc_bufs[level] = NULL; | ||
212 | cur->bc_nlevels--; | ||
213 | |||
214 | XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); | ||
215 | return 0; | ||
216 | } | ||
217 | |||
218 | #ifdef DEBUG | 186 | #ifdef DEBUG |
219 | STATIC int | 187 | STATIC int |
220 | xfs_inobt_keys_inorder( | 188 | xfs_inobt_keys_inorder( |
@@ -309,7 +277,6 @@ static const struct xfs_btree_ops xfs_inobt_ops = { | |||
309 | 277 | ||
310 | .dup_cursor = xfs_inobt_dup_cursor, | 278 | .dup_cursor = xfs_inobt_dup_cursor, |
311 | .set_root = xfs_inobt_set_root, | 279 | .set_root = xfs_inobt_set_root, |
312 | .kill_root = xfs_inobt_kill_root, | ||
313 | .alloc_block = xfs_inobt_alloc_block, | 280 | .alloc_block = xfs_inobt_alloc_block, |
314 | .free_block = xfs_inobt_free_block, | 281 | .free_block = xfs_inobt_free_block, |
315 | .get_minrecs = xfs_inobt_get_minrecs, | 282 | .get_minrecs = xfs_inobt_get_minrecs, |
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c index b1ecc6f97ade..0cdd26932d8e 100644 --- a/fs/xfs/xfs_iget.c +++ b/fs/xfs/xfs_iget.c | |||
@@ -365,8 +365,8 @@ xfs_iget( | |||
365 | xfs_perag_t *pag; | 365 | xfs_perag_t *pag; |
366 | xfs_agino_t agino; | 366 | xfs_agino_t agino; |
367 | 367 | ||
368 | /* the radix tree exists only in inode capable AGs */ | 368 | /* reject inode numbers outside existing AGs */ |
369 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_maxagi) | 369 | if (XFS_INO_TO_AGNO(mp, ino) >= mp->m_sb.sb_agcount) |
370 | return EINVAL; | 370 | return EINVAL; |
371 | 371 | ||
372 | /* get the perag structure and ensure that it's inode capable */ | 372 | /* get the perag structure and ensure that it's inode capable */ |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 34798f391c49..108c7a085f94 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -660,7 +660,8 @@ xfs_dinode_from_disk( | |||
660 | to->di_uid = be32_to_cpu(from->di_uid); | 660 | to->di_uid = be32_to_cpu(from->di_uid); |
661 | to->di_gid = be32_to_cpu(from->di_gid); | 661 | to->di_gid = be32_to_cpu(from->di_gid); |
662 | to->di_nlink = be32_to_cpu(from->di_nlink); | 662 | to->di_nlink = be32_to_cpu(from->di_nlink); |
663 | to->di_projid = be16_to_cpu(from->di_projid); | 663 | to->di_projid_lo = be16_to_cpu(from->di_projid_lo); |
664 | to->di_projid_hi = be16_to_cpu(from->di_projid_hi); | ||
664 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 665 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
665 | to->di_flushiter = be16_to_cpu(from->di_flushiter); | 666 | to->di_flushiter = be16_to_cpu(from->di_flushiter); |
666 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); | 667 | to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec); |
@@ -695,7 +696,8 @@ xfs_dinode_to_disk( | |||
695 | to->di_uid = cpu_to_be32(from->di_uid); | 696 | to->di_uid = cpu_to_be32(from->di_uid); |
696 | to->di_gid = cpu_to_be32(from->di_gid); | 697 | to->di_gid = cpu_to_be32(from->di_gid); |
697 | to->di_nlink = cpu_to_be32(from->di_nlink); | 698 | to->di_nlink = cpu_to_be32(from->di_nlink); |
698 | to->di_projid = cpu_to_be16(from->di_projid); | 699 | to->di_projid_lo = cpu_to_be16(from->di_projid_lo); |
700 | to->di_projid_hi = cpu_to_be16(from->di_projid_hi); | ||
699 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); | 701 | memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad)); |
700 | to->di_flushiter = cpu_to_be16(from->di_flushiter); | 702 | to->di_flushiter = cpu_to_be16(from->di_flushiter); |
701 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); | 703 | to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec); |
@@ -874,7 +876,7 @@ xfs_iread( | |||
874 | if (ip->i_d.di_version == 1) { | 876 | if (ip->i_d.di_version == 1) { |
875 | ip->i_d.di_nlink = ip->i_d.di_onlink; | 877 | ip->i_d.di_nlink = ip->i_d.di_onlink; |
876 | ip->i_d.di_onlink = 0; | 878 | ip->i_d.di_onlink = 0; |
877 | ip->i_d.di_projid = 0; | 879 | xfs_set_projid(ip, 0); |
878 | } | 880 | } |
879 | 881 | ||
880 | ip->i_delayed_blks = 0; | 882 | ip->i_delayed_blks = 0; |
@@ -982,8 +984,7 @@ xfs_ialloc( | |||
982 | mode_t mode, | 984 | mode_t mode, |
983 | xfs_nlink_t nlink, | 985 | xfs_nlink_t nlink, |
984 | xfs_dev_t rdev, | 986 | xfs_dev_t rdev, |
985 | cred_t *cr, | 987 | prid_t prid, |
986 | xfs_prid_t prid, | ||
987 | int okalloc, | 988 | int okalloc, |
988 | xfs_buf_t **ialloc_context, | 989 | xfs_buf_t **ialloc_context, |
989 | boolean_t *call_again, | 990 | boolean_t *call_again, |
@@ -1027,7 +1028,7 @@ xfs_ialloc( | |||
1027 | ASSERT(ip->i_d.di_nlink == nlink); | 1028 | ASSERT(ip->i_d.di_nlink == nlink); |
1028 | ip->i_d.di_uid = current_fsuid(); | 1029 | ip->i_d.di_uid = current_fsuid(); |
1029 | ip->i_d.di_gid = current_fsgid(); | 1030 | ip->i_d.di_gid = current_fsgid(); |
1030 | ip->i_d.di_projid = prid; | 1031 | xfs_set_projid(ip, prid); |
1031 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 1032 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
1032 | 1033 | ||
1033 | /* | 1034 | /* |
@@ -2725,7 +2726,7 @@ cluster_corrupt_out: | |||
2725 | XFS_BUF_UNDONE(bp); | 2726 | XFS_BUF_UNDONE(bp); |
2726 | XFS_BUF_STALE(bp); | 2727 | XFS_BUF_STALE(bp); |
2727 | XFS_BUF_ERROR(bp,EIO); | 2728 | XFS_BUF_ERROR(bp,EIO); |
2728 | xfs_biodone(bp); | 2729 | xfs_buf_ioend(bp, 0); |
2729 | } else { | 2730 | } else { |
2730 | XFS_BUF_STALE(bp); | 2731 | XFS_BUF_STALE(bp); |
2731 | xfs_buf_relse(bp); | 2732 | xfs_buf_relse(bp); |
@@ -3008,7 +3009,7 @@ xfs_iflush_int( | |||
3008 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); | 3009 | memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); |
3009 | memset(&(dip->di_pad[0]), 0, | 3010 | memset(&(dip->di_pad[0]), 0, |
3010 | sizeof(dip->di_pad)); | 3011 | sizeof(dip->di_pad)); |
3011 | ASSERT(ip->i_d.di_projid == 0); | 3012 | ASSERT(xfs_get_projid(ip) == 0); |
3012 | } | 3013 | } |
3013 | } | 3014 | } |
3014 | 3015 | ||
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 0898c5417d12..fb2ca2e4cdc9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -134,8 +134,9 @@ typedef struct xfs_icdinode { | |||
134 | __uint32_t di_uid; /* owner's user id */ | 134 | __uint32_t di_uid; /* owner's user id */ |
135 | __uint32_t di_gid; /* owner's group id */ | 135 | __uint32_t di_gid; /* owner's group id */ |
136 | __uint32_t di_nlink; /* number of links to file */ | 136 | __uint32_t di_nlink; /* number of links to file */ |
137 | __uint16_t di_projid; /* owner's project id */ | 137 | __uint16_t di_projid_lo; /* lower part of owner's project id */ |
138 | __uint8_t di_pad[8]; /* unused, zeroed space */ | 138 | __uint16_t di_projid_hi; /* higher part of owner's project id */ |
139 | __uint8_t di_pad[6]; /* unused, zeroed space */ | ||
139 | __uint16_t di_flushiter; /* incremented on flush */ | 140 | __uint16_t di_flushiter; /* incremented on flush */ |
140 | xfs_ictimestamp_t di_atime; /* time last accessed */ | 141 | xfs_ictimestamp_t di_atime; /* time last accessed */ |
141 | xfs_ictimestamp_t di_mtime; /* time last modified */ | 142 | xfs_ictimestamp_t di_mtime; /* time last modified */ |
@@ -212,7 +213,6 @@ typedef struct xfs_icdinode { | |||
212 | #ifdef __KERNEL__ | 213 | #ifdef __KERNEL__ |
213 | 214 | ||
214 | struct bhv_desc; | 215 | struct bhv_desc; |
215 | struct cred; | ||
216 | struct xfs_buf; | 216 | struct xfs_buf; |
217 | struct xfs_bmap_free; | 217 | struct xfs_bmap_free; |
218 | struct xfs_bmbt_irec; | 218 | struct xfs_bmbt_irec; |
@@ -335,6 +335,25 @@ xfs_iflags_test_and_clear(xfs_inode_t *ip, unsigned short flags) | |||
335 | } | 335 | } |
336 | 336 | ||
337 | /* | 337 | /* |
338 | * Project quota id helpers (previously projid was 16bit only | ||
339 | * and using two 16bit values to hold new 32bit projid was choosen | ||
340 | * to retain compatibility with "old" filesystems). | ||
341 | */ | ||
342 | static inline prid_t | ||
343 | xfs_get_projid(struct xfs_inode *ip) | ||
344 | { | ||
345 | return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo; | ||
346 | } | ||
347 | |||
348 | static inline void | ||
349 | xfs_set_projid(struct xfs_inode *ip, | ||
350 | prid_t projid) | ||
351 | { | ||
352 | ip->i_d.di_projid_hi = (__uint16_t) (projid >> 16); | ||
353 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); | ||
354 | } | ||
355 | |||
356 | /* | ||
338 | * Manage the i_flush queue embedded in the inode. This completion | 357 | * Manage the i_flush queue embedded in the inode. This completion |
339 | * queue synchronizes processes attempting to flush the in-core | 358 | * queue synchronizes processes attempting to flush the in-core |
340 | * inode back to disk. | 359 | * inode back to disk. |
@@ -456,8 +475,8 @@ void xfs_inode_free(struct xfs_inode *ip); | |||
456 | * xfs_inode.c prototypes. | 475 | * xfs_inode.c prototypes. |
457 | */ | 476 | */ |
458 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, | 477 | int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, |
459 | xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, | 478 | xfs_nlink_t, xfs_dev_t, prid_t, int, |
460 | int, struct xfs_buf **, boolean_t *, xfs_inode_t **); | 479 | struct xfs_buf **, boolean_t *, xfs_inode_t **); |
461 | 480 | ||
462 | uint xfs_ip2xflags(struct xfs_inode *); | 481 | uint xfs_ip2xflags(struct xfs_inode *); |
463 | uint xfs_dic2xflags(struct xfs_dinode *); | 482 | uint xfs_dic2xflags(struct xfs_dinode *); |
@@ -471,7 +490,6 @@ int xfs_iunlink(struct xfs_trans *, xfs_inode_t *); | |||
471 | void xfs_iext_realloc(xfs_inode_t *, int, int); | 490 | void xfs_iext_realloc(xfs_inode_t *, int, int); |
472 | void xfs_iunpin_wait(xfs_inode_t *); | 491 | void xfs_iunpin_wait(xfs_inode_t *); |
473 | int xfs_iflush(xfs_inode_t *, uint); | 492 | int xfs_iflush(xfs_inode_t *, uint); |
474 | void xfs_ichgtime(xfs_inode_t *, int); | ||
475 | void xfs_lock_inodes(xfs_inode_t **, int, uint); | 493 | void xfs_lock_inodes(xfs_inode_t **, int, uint); |
476 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); | 494 | void xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint); |
477 | 495 | ||
@@ -482,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); | |||
482 | #define IHOLD(ip) \ | 500 | #define IHOLD(ip) \ |
483 | do { \ | 501 | do { \ |
484 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ | 502 | ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ |
485 | atomic_inc(&(VFS_I(ip)->i_count)); \ | 503 | ihold(VFS_I(ip)); \ |
486 | trace_xfs_ihold(ip, _THIS_IP_); \ | 504 | trace_xfs_ihold(ip, _THIS_IP_); \ |
487 | } while (0) | 505 | } while (0) |
488 | 506 | ||
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index fe00777e2796..c7ac020705df 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
@@ -223,15 +223,6 @@ xfs_inode_item_format( | |||
223 | nvecs = 1; | 223 | nvecs = 1; |
224 | 224 | ||
225 | /* | 225 | /* |
226 | * Make sure the linux inode is dirty. We do this before | ||
227 | * clearing i_update_core as the VFS will call back into | ||
228 | * XFS here and set i_update_core, so we need to dirty the | ||
229 | * inode first so that the ordering of i_update_core and | ||
230 | * unlogged modifications still works as described below. | ||
231 | */ | ||
232 | xfs_mark_inode_dirty_sync(ip); | ||
233 | |||
234 | /* | ||
235 | * Clear i_update_core if the timestamps (or any other | 226 | * Clear i_update_core if the timestamps (or any other |
236 | * non-transactional modification) need flushing/logging | 227 | * non-transactional modification) need flushing/logging |
237 | * and we're about to log them with the rest of the core. | 228 | * and we're about to log them with the rest of the core. |
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 7e3626e5925c..dc1882adaf54 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c | |||
@@ -92,7 +92,8 @@ xfs_bulkstat_one_int( | |||
92 | * further change. | 92 | * further change. |
93 | */ | 93 | */ |
94 | buf->bs_nlink = dic->di_nlink; | 94 | buf->bs_nlink = dic->di_nlink; |
95 | buf->bs_projid = dic->di_projid; | 95 | buf->bs_projid_lo = dic->di_projid_lo; |
96 | buf->bs_projid_hi = dic->di_projid_hi; | ||
96 | buf->bs_ino = ino; | 97 | buf->bs_ino = ino; |
97 | buf->bs_mode = dic->di_mode; | 98 | buf->bs_mode = dic->di_mode; |
98 | buf->bs_uid = dic->di_uid; | 99 | buf->bs_uid = dic->di_uid; |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 33f718f92a48..cee4ab9f8a9e 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
@@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp) | |||
917 | l = iclog->ic_log; | 917 | l = iclog->ic_log; |
918 | 918 | ||
919 | /* | 919 | /* |
920 | * If the _XFS_BARRIER_FAILED flag was set by a lower | ||
921 | * layer, it means the underlying device no longer supports | ||
922 | * barrier I/O. Warn loudly and turn off barriers. | ||
923 | */ | ||
924 | if (bp->b_flags & _XFS_BARRIER_FAILED) { | ||
925 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
926 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | ||
927 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | ||
928 | "xlog_iodone: Barriers are no longer supported" | ||
929 | " by device. Disabling barriers\n"); | ||
930 | } | ||
931 | |||
932 | /* | ||
933 | * Race to shutdown the filesystem if we see an error. | 920 | * Race to shutdown the filesystem if we see an error. |
934 | */ | 921 | */ |
935 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, | 922 | if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp, |
@@ -1131,7 +1118,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
1131 | iclog->ic_prev = prev_iclog; | 1118 | iclog->ic_prev = prev_iclog; |
1132 | prev_iclog = iclog; | 1119 | prev_iclog = iclog; |
1133 | 1120 | ||
1134 | bp = xfs_buf_get_noaddr(log->l_iclog_size, mp->m_logdev_targp); | 1121 | bp = xfs_buf_get_uncached(mp->m_logdev_targp, |
1122 | log->l_iclog_size, 0); | ||
1135 | if (!bp) | 1123 | if (!bp) |
1136 | goto out_free_iclog; | 1124 | goto out_free_iclog; |
1137 | if (!XFS_BUF_CPSEMA(bp)) | 1125 | if (!XFS_BUF_CPSEMA(bp)) |
@@ -1309,7 +1297,7 @@ xlog_bdstrat( | |||
1309 | if (iclog->ic_state & XLOG_STATE_IOERROR) { | 1297 | if (iclog->ic_state & XLOG_STATE_IOERROR) { |
1310 | XFS_BUF_ERROR(bp, EIO); | 1298 | XFS_BUF_ERROR(bp, EIO); |
1311 | XFS_BUF_STALE(bp); | 1299 | XFS_BUF_STALE(bp); |
1312 | xfs_biodone(bp); | 1300 | xfs_buf_ioend(bp, 0); |
1313 | /* | 1301 | /* |
1314 | * It would seem logical to return EIO here, but we rely on | 1302 | * It would seem logical to return EIO here, but we rely on |
1315 | * the log state machine to propagate I/O errors instead of | 1303 | * the log state machine to propagate I/O errors instead of |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ed575fb4b495..23d6ceb5e97b 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -146,102 +146,6 @@ xlog_cil_init_post_recovery( | |||
146 | } | 146 | } |
147 | 147 | ||
148 | /* | 148 | /* |
149 | * Insert the log item into the CIL and calculate the difference in space | ||
150 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
151 | * if the change requires additional log metadata. If it does, take that space | ||
152 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
153 | * the current transaction ticket so that the accounting works out correctly. | ||
154 | * | ||
155 | * If this is the first time the item is being placed into the CIL in this | ||
156 | * context, pin it so it can't be written to disk until the CIL is flushed to | ||
157 | * the iclog and the iclog written to disk. | ||
158 | */ | ||
159 | static void | ||
160 | xlog_cil_insert( | ||
161 | struct log *log, | ||
162 | struct xlog_ticket *ticket, | ||
163 | struct xfs_log_item *item, | ||
164 | struct xfs_log_vec *lv) | ||
165 | { | ||
166 | struct xfs_cil *cil = log->l_cilp; | ||
167 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
168 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | ||
169 | int len; | ||
170 | int diff_iovecs; | ||
171 | int iclog_space; | ||
172 | |||
173 | if (old) { | ||
174 | /* existing lv on log item, space used is a delta */ | ||
175 | ASSERT(!list_empty(&item->li_cil)); | ||
176 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
177 | |||
178 | len = lv->lv_buf_len - old->lv_buf_len; | ||
179 | diff_iovecs = lv->lv_niovecs - old->lv_niovecs; | ||
180 | kmem_free(old->lv_buf); | ||
181 | kmem_free(old); | ||
182 | } else { | ||
183 | /* new lv, must pin the log item */ | ||
184 | ASSERT(!lv->lv_item->li_lv); | ||
185 | ASSERT(list_empty(&item->li_cil)); | ||
186 | |||
187 | len = lv->lv_buf_len; | ||
188 | diff_iovecs = lv->lv_niovecs; | ||
189 | IOP_PIN(lv->lv_item); | ||
190 | |||
191 | } | ||
192 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
193 | |||
194 | /* attach new log vector to log item */ | ||
195 | lv->lv_item->li_lv = lv; | ||
196 | |||
197 | spin_lock(&cil->xc_cil_lock); | ||
198 | list_move_tail(&item->li_cil, &cil->xc_cil); | ||
199 | ctx->nvecs += diff_iovecs; | ||
200 | |||
201 | /* | ||
202 | * If this is the first time the item is being committed to the CIL, | ||
203 | * store the sequence number on the log item so we can tell | ||
204 | * in future commits whether this is the first checkpoint the item is | ||
205 | * being committed into. | ||
206 | */ | ||
207 | if (!item->li_seq) | ||
208 | item->li_seq = ctx->sequence; | ||
209 | |||
210 | /* | ||
211 | * Now transfer enough transaction reservation to the context ticket | ||
212 | * for the checkpoint. The context ticket is special - the unit | ||
213 | * reservation has to grow as well as the current reservation as we | ||
214 | * steal from tickets so we can correctly determine the space used | ||
215 | * during the transaction commit. | ||
216 | */ | ||
217 | if (ctx->ticket->t_curr_res == 0) { | ||
218 | /* first commit in checkpoint, steal the header reservation */ | ||
219 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
220 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
221 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
222 | } | ||
223 | |||
224 | /* do we need space for more log record headers? */ | ||
225 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
226 | if (len > 0 && (ctx->space_used / iclog_space != | ||
227 | (ctx->space_used + len) / iclog_space)) { | ||
228 | int hdrs; | ||
229 | |||
230 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
231 | /* need to take into account split region headers, too */ | ||
232 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
233 | ctx->ticket->t_unit_res += hdrs; | ||
234 | ctx->ticket->t_curr_res += hdrs; | ||
235 | ticket->t_curr_res -= hdrs; | ||
236 | ASSERT(ticket->t_curr_res >= len); | ||
237 | } | ||
238 | ticket->t_curr_res -= len; | ||
239 | ctx->space_used += len; | ||
240 | |||
241 | spin_unlock(&cil->xc_cil_lock); | ||
242 | } | ||
243 | |||
244 | /* | ||
245 | * Format log item into a flat buffers | 149 | * Format log item into a flat buffers |
246 | * | 150 | * |
247 | * For delayed logging, we need to hold a formatted buffer containing all the | 151 | * For delayed logging, we need to hold a formatted buffer containing all the |
@@ -286,7 +190,7 @@ xlog_cil_format_items( | |||
286 | len += lv->lv_iovecp[index].i_len; | 190 | len += lv->lv_iovecp[index].i_len; |
287 | 191 | ||
288 | lv->lv_buf_len = len; | 192 | lv->lv_buf_len = len; |
289 | lv->lv_buf = kmem_zalloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); | 193 | lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS); |
290 | ptr = lv->lv_buf; | 194 | ptr = lv->lv_buf; |
291 | 195 | ||
292 | for (index = 0; index < lv->lv_niovecs; index++) { | 196 | for (index = 0; index < lv->lv_niovecs; index++) { |
@@ -300,21 +204,136 @@ xlog_cil_format_items( | |||
300 | } | 204 | } |
301 | } | 205 | } |
302 | 206 | ||
207 | /* | ||
208 | * Prepare the log item for insertion into the CIL. Calculate the difference in | ||
209 | * log space and vectors it will consume, and if it is a new item pin it as | ||
210 | * well. | ||
211 | */ | ||
212 | STATIC void | ||
213 | xfs_cil_prepare_item( | ||
214 | struct log *log, | ||
215 | struct xfs_log_vec *lv, | ||
216 | int *len, | ||
217 | int *diff_iovecs) | ||
218 | { | ||
219 | struct xfs_log_vec *old = lv->lv_item->li_lv; | ||
220 | |||
221 | if (old) { | ||
222 | /* existing lv on log item, space used is a delta */ | ||
223 | ASSERT(!list_empty(&lv->lv_item->li_cil)); | ||
224 | ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); | ||
225 | |||
226 | *len += lv->lv_buf_len - old->lv_buf_len; | ||
227 | *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; | ||
228 | kmem_free(old->lv_buf); | ||
229 | kmem_free(old); | ||
230 | } else { | ||
231 | /* new lv, must pin the log item */ | ||
232 | ASSERT(!lv->lv_item->li_lv); | ||
233 | ASSERT(list_empty(&lv->lv_item->li_cil)); | ||
234 | |||
235 | *len += lv->lv_buf_len; | ||
236 | *diff_iovecs += lv->lv_niovecs; | ||
237 | IOP_PIN(lv->lv_item); | ||
238 | |||
239 | } | ||
240 | |||
241 | /* attach new log vector to log item */ | ||
242 | lv->lv_item->li_lv = lv; | ||
243 | |||
244 | /* | ||
245 | * If this is the first time the item is being committed to the | ||
246 | * CIL, store the sequence number on the log item so we can | ||
247 | * tell in future commits whether this is the first checkpoint | ||
248 | * the item is being committed into. | ||
249 | */ | ||
250 | if (!lv->lv_item->li_seq) | ||
251 | lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; | ||
252 | } | ||
253 | |||
254 | /* | ||
255 | * Insert the log items into the CIL and calculate the difference in space | ||
256 | * consumed by the item. Add the space to the checkpoint ticket and calculate | ||
257 | * if the change requires additional log metadata. If it does, take that space | ||
258 | * as well. Remove the amount of space we addded to the checkpoint ticket from | ||
259 | * the current transaction ticket so that the accounting works out correctly. | ||
260 | */ | ||
303 | static void | 261 | static void |
304 | xlog_cil_insert_items( | 262 | xlog_cil_insert_items( |
305 | struct log *log, | 263 | struct log *log, |
306 | struct xfs_log_vec *log_vector, | 264 | struct xfs_log_vec *log_vector, |
307 | struct xlog_ticket *ticket, | 265 | struct xlog_ticket *ticket) |
308 | xfs_lsn_t *start_lsn) | ||
309 | { | 266 | { |
310 | struct xfs_log_vec *lv; | 267 | struct xfs_cil *cil = log->l_cilp; |
311 | 268 | struct xfs_cil_ctx *ctx = cil->xc_ctx; | |
312 | if (start_lsn) | 269 | struct xfs_log_vec *lv; |
313 | *start_lsn = log->l_cilp->xc_ctx->sequence; | 270 | int len = 0; |
271 | int diff_iovecs = 0; | ||
272 | int iclog_space; | ||
314 | 273 | ||
315 | ASSERT(log_vector); | 274 | ASSERT(log_vector); |
275 | |||
276 | /* | ||
277 | * Do all the accounting aggregation and switching of log vectors | ||
278 | * around in a separate loop to the insertion of items into the CIL. | ||
279 | * Then we can do a separate loop to update the CIL within a single | ||
280 | * lock/unlock pair. This reduces the number of round trips on the CIL | ||
281 | * lock from O(nr_logvectors) to O(1) and greatly reduces the overall | ||
282 | * hold time for the transaction commit. | ||
283 | * | ||
284 | * If this is the first time the item is being placed into the CIL in | ||
285 | * this context, pin it so it can't be written to disk until the CIL is | ||
286 | * flushed to the iclog and the iclog written to disk. | ||
287 | * | ||
288 | * We can do this safely because the context can't checkpoint until we | ||
289 | * are done so it doesn't matter exactly how we update the CIL. | ||
290 | */ | ||
316 | for (lv = log_vector; lv; lv = lv->lv_next) | 291 | for (lv = log_vector; lv; lv = lv->lv_next) |
317 | xlog_cil_insert(log, ticket, lv->lv_item, lv); | 292 | xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); |
293 | |||
294 | /* account for space used by new iovec headers */ | ||
295 | len += diff_iovecs * sizeof(xlog_op_header_t); | ||
296 | |||
297 | spin_lock(&cil->xc_cil_lock); | ||
298 | |||
299 | /* move the items to the tail of the CIL */ | ||
300 | for (lv = log_vector; lv; lv = lv->lv_next) | ||
301 | list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); | ||
302 | |||
303 | ctx->nvecs += diff_iovecs; | ||
304 | |||
305 | /* | ||
306 | * Now transfer enough transaction reservation to the context ticket | ||
307 | * for the checkpoint. The context ticket is special - the unit | ||
308 | * reservation has to grow as well as the current reservation as we | ||
309 | * steal from tickets so we can correctly determine the space used | ||
310 | * during the transaction commit. | ||
311 | */ | ||
312 | if (ctx->ticket->t_curr_res == 0) { | ||
313 | /* first commit in checkpoint, steal the header reservation */ | ||
314 | ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); | ||
315 | ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; | ||
316 | ticket->t_curr_res -= ctx->ticket->t_unit_res; | ||
317 | } | ||
318 | |||
319 | /* do we need space for more log record headers? */ | ||
320 | iclog_space = log->l_iclog_size - log->l_iclog_hsize; | ||
321 | if (len > 0 && (ctx->space_used / iclog_space != | ||
322 | (ctx->space_used + len) / iclog_space)) { | ||
323 | int hdrs; | ||
324 | |||
325 | hdrs = (len + iclog_space - 1) / iclog_space; | ||
326 | /* need to take into account split region headers, too */ | ||
327 | hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); | ||
328 | ctx->ticket->t_unit_res += hdrs; | ||
329 | ctx->ticket->t_curr_res += hdrs; | ||
330 | ticket->t_curr_res -= hdrs; | ||
331 | ASSERT(ticket->t_curr_res >= len); | ||
332 | } | ||
333 | ticket->t_curr_res -= len; | ||
334 | ctx->space_used += len; | ||
335 | |||
336 | spin_unlock(&cil->xc_cil_lock); | ||
318 | } | 337 | } |
319 | 338 | ||
320 | static void | 339 | static void |
@@ -405,9 +424,15 @@ xlog_cil_push( | |||
405 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); | 424 | new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS); |
406 | new_ctx->ticket = xlog_cil_ticket_alloc(log); | 425 | new_ctx->ticket = xlog_cil_ticket_alloc(log); |
407 | 426 | ||
408 | /* lock out transaction commit, but don't block on background push */ | 427 | /* |
428 | * Lock out transaction commit, but don't block for background pushes | ||
429 | * unless we are well over the CIL space limit. See the definition of | ||
430 | * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic | ||
431 | * used here. | ||
432 | */ | ||
409 | if (!down_write_trylock(&cil->xc_ctx_lock)) { | 433 | if (!down_write_trylock(&cil->xc_ctx_lock)) { |
410 | if (!push_seq) | 434 | if (!push_seq && |
435 | cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log)) | ||
411 | goto out_free_ticket; | 436 | goto out_free_ticket; |
412 | down_write(&cil->xc_ctx_lock); | 437 | down_write(&cil->xc_ctx_lock); |
413 | } | 438 | } |
@@ -422,7 +447,7 @@ xlog_cil_push( | |||
422 | goto out_skip; | 447 | goto out_skip; |
423 | 448 | ||
424 | /* check for a previously pushed seqeunce */ | 449 | /* check for a previously pushed seqeunce */ |
425 | if (push_seq < cil->xc_ctx->sequence) | 450 | if (push_seq && push_seq < cil->xc_ctx->sequence) |
426 | goto out_skip; | 451 | goto out_skip; |
427 | 452 | ||
428 | /* | 453 | /* |
@@ -632,7 +657,10 @@ xfs_log_commit_cil( | |||
632 | 657 | ||
633 | /* lock out background commit */ | 658 | /* lock out background commit */ |
634 | down_read(&log->l_cilp->xc_ctx_lock); | 659 | down_read(&log->l_cilp->xc_ctx_lock); |
635 | xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn); | 660 | if (commit_lsn) |
661 | *commit_lsn = log->l_cilp->xc_ctx->sequence; | ||
662 | |||
663 | xlog_cil_insert_items(log, log_vector, tp->t_ticket); | ||
636 | 664 | ||
637 | /* check we didn't blow the reservation */ | 665 | /* check we didn't blow the reservation */ |
638 | if (tp->t_ticket->t_curr_res < 0) | 666 | if (tp->t_ticket->t_curr_res < 0) |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index ced52b98b322..edcdfe01617f 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
@@ -426,13 +426,13 @@ struct xfs_cil { | |||
426 | }; | 426 | }; |
427 | 427 | ||
428 | /* | 428 | /* |
429 | * The amount of log space we should the CIL to aggregate is difficult to size. | 429 | * The amount of log space we allow the CIL to aggregate is difficult to size. |
430 | * Whatever we chose we have to make we can get a reservation for the log space | 430 | * Whatever we choose, we have to make sure we can get a reservation for the |
431 | * effectively, that it is large enough to capture sufficient relogging to | 431 | * log space effectively, that it is large enough to capture sufficient |
432 | * reduce log buffer IO significantly, but it is not too large for the log or | 432 | * relogging to reduce log buffer IO significantly, but it is not too large for |
433 | * induces too much latency when writing out through the iclogs. We track both | 433 | * the log or induces too much latency when writing out through the iclogs. We |
434 | * space consumed and the number of vectors in the checkpoint context, so we | 434 | * track both space consumed and the number of vectors in the checkpoint |
435 | * need to decide which to use for limiting. | 435 | * context, so we need to decide which to use for limiting. |
436 | * | 436 | * |
437 | * Every log buffer we write out during a push needs a header reserved, which | 437 | * Every log buffer we write out during a push needs a header reserved, which |
438 | * is at least one sector and more for v2 logs. Hence we need a reservation of | 438 | * is at least one sector and more for v2 logs. Hence we need a reservation of |
@@ -459,16 +459,21 @@ struct xfs_cil { | |||
459 | * checkpoint transaction ticket is specific to the checkpoint context, rather | 459 | * checkpoint transaction ticket is specific to the checkpoint context, rather |
460 | * than the CIL itself. | 460 | * than the CIL itself. |
461 | * | 461 | * |
462 | * With dynamic reservations, we can basically make up arbitrary limits for the | 462 | * With dynamic reservations, we can effectively make up arbitrary limits for |
463 | * checkpoint size so long as they don't violate any other size rules. Hence | 463 | * the checkpoint size so long as they don't violate any other size rules. |
464 | * the initial maximum size for the checkpoint transaction will be set to a | 464 | * Recovery imposes a rule that no transaction exceed half the log, so we are |
465 | * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit | 465 | * limited by that. Furthermore, the log transaction reservation subsystem |
466 | * right now based on the latency of writing out a large amount of data through | 466 | * tries to keep 25% of the log free, so we need to keep below that limit or we |
467 | * the circular iclog buffers. | 467 | * risk running out of free log space to start any new transactions. |
468 | * | ||
469 | * In order to keep background CIL push efficient, we will set a lower | ||
470 | * threshold at which background pushing is attempted without blocking current | ||
471 | * transaction commits. A separate, higher bound defines when CIL pushes are | ||
472 | * enforced to ensure we stay within our maximum checkpoint size bounds. | ||
473 | * threshold, yet give us plenty of space for aggregation on large logs. | ||
468 | */ | 474 | */ |
469 | 475 | #define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3) | |
470 | #define XLOG_CIL_SPACE_LIMIT(log) \ | 476 | #define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4)) |
471 | (min((log->l_logsize >> 2), (8 * 1024 * 1024))) | ||
472 | 477 | ||
473 | /* | 478 | /* |
474 | * The reservation head lsn is not made up of a cycle number and block number. | 479 | * The reservation head lsn is not made up of a cycle number and block number. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 6f3f5fa37acf..966d3f97458c 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
@@ -107,7 +107,8 @@ xlog_get_bp( | |||
107 | nbblks += log->l_sectBBsize; | 107 | nbblks += log->l_sectBBsize; |
108 | nbblks = round_up(nbblks, log->l_sectBBsize); | 108 | nbblks = round_up(nbblks, log->l_sectBBsize); |
109 | 109 | ||
110 | return xfs_buf_get_noaddr(BBTOB(nbblks), log->l_mp->m_logdev_targp); | 110 | return xfs_buf_get_uncached(log->l_mp->m_logdev_targp, |
111 | BBTOB(nbblks), 0); | ||
111 | } | 112 | } |
112 | 113 | ||
113 | STATIC void | 114 | STATIC void |
@@ -167,7 +168,7 @@ xlog_bread_noalign( | |||
167 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); | 168 | XFS_BUF_SET_TARGET(bp, log->l_mp->m_logdev_targp); |
168 | 169 | ||
169 | xfsbdstrat(log->l_mp, bp); | 170 | xfsbdstrat(log->l_mp, bp); |
170 | error = xfs_iowait(bp); | 171 | error = xfs_buf_iowait(bp); |
171 | if (error) | 172 | if (error) |
172 | xfs_ioerror_alert("xlog_bread", log->l_mp, | 173 | xfs_ioerror_alert("xlog_bread", log->l_mp, |
173 | bp, XFS_BUF_ADDR(bp)); | 174 | bp, XFS_BUF_ADDR(bp)); |
@@ -321,12 +322,13 @@ xlog_recover_iodone( | |||
321 | * this during recovery. One strike! | 322 | * this during recovery. One strike! |
322 | */ | 323 | */ |
323 | xfs_ioerror_alert("xlog_recover_iodone", | 324 | xfs_ioerror_alert("xlog_recover_iodone", |
324 | bp->b_mount, bp, XFS_BUF_ADDR(bp)); | 325 | bp->b_target->bt_mount, bp, |
325 | xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR); | 326 | XFS_BUF_ADDR(bp)); |
327 | xfs_force_shutdown(bp->b_target->bt_mount, | ||
328 | SHUTDOWN_META_IO_ERROR); | ||
326 | } | 329 | } |
327 | bp->b_mount = NULL; | ||
328 | XFS_BUF_CLR_IODONE_FUNC(bp); | 330 | XFS_BUF_CLR_IODONE_FUNC(bp); |
329 | xfs_biodone(bp); | 331 | xfs_buf_ioend(bp, 0); |
330 | } | 332 | } |
331 | 333 | ||
332 | /* | 334 | /* |
@@ -2275,8 +2277,7 @@ xlog_recover_do_buffer_trans( | |||
2275 | XFS_BUF_STALE(bp); | 2277 | XFS_BUF_STALE(bp); |
2276 | error = xfs_bwrite(mp, bp); | 2278 | error = xfs_bwrite(mp, bp); |
2277 | } else { | 2279 | } else { |
2278 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2280 | ASSERT(bp->b_target->bt_mount == mp); |
2279 | bp->b_mount = mp; | ||
2280 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2281 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2281 | xfs_bdwrite(mp, bp); | 2282 | xfs_bdwrite(mp, bp); |
2282 | } | 2283 | } |
@@ -2540,8 +2541,7 @@ xlog_recover_do_inode_trans( | |||
2540 | } | 2541 | } |
2541 | 2542 | ||
2542 | write_inode_buffer: | 2543 | write_inode_buffer: |
2543 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2544 | ASSERT(bp->b_target->bt_mount == mp); |
2544 | bp->b_mount = mp; | ||
2545 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2545 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2546 | xfs_bdwrite(mp, bp); | 2546 | xfs_bdwrite(mp, bp); |
2547 | error: | 2547 | error: |
@@ -2678,8 +2678,7 @@ xlog_recover_do_dquot_trans( | |||
2678 | memcpy(ddq, recddq, item->ri_buf[1].i_len); | 2678 | memcpy(ddq, recddq, item->ri_buf[1].i_len); |
2679 | 2679 | ||
2680 | ASSERT(dq_f->qlf_size == 2); | 2680 | ASSERT(dq_f->qlf_size == 2); |
2681 | ASSERT(bp->b_mount == NULL || bp->b_mount == mp); | 2681 | ASSERT(bp->b_target->bt_mount == mp); |
2682 | bp->b_mount = mp; | ||
2683 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); | 2682 | XFS_BUF_SET_IODONE_FUNC(bp, xlog_recover_iodone); |
2684 | xfs_bdwrite(mp, bp); | 2683 | xfs_bdwrite(mp, bp); |
2685 | 2684 | ||
@@ -3817,7 +3816,7 @@ xlog_do_recover( | |||
3817 | XFS_BUF_READ(bp); | 3816 | XFS_BUF_READ(bp); |
3818 | XFS_BUF_UNASYNC(bp); | 3817 | XFS_BUF_UNASYNC(bp); |
3819 | xfsbdstrat(log->l_mp, bp); | 3818 | xfsbdstrat(log->l_mp, bp); |
3820 | error = xfs_iowait(bp); | 3819 | error = xfs_buf_iowait(bp); |
3821 | if (error) { | 3820 | if (error) { |
3822 | xfs_ioerror_alert("xlog_do_recover", | 3821 | xfs_ioerror_alert("xlog_do_recover", |
3823 | log->l_mp, bp, XFS_BUF_ADDR(bp)); | 3822 | log->l_mp, bp, XFS_BUF_ADDR(bp)); |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index aeb9d72ebf6e..b1498ab5a399 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -52,16 +52,11 @@ STATIC void xfs_icsb_balance_counter(xfs_mount_t *, xfs_sb_field_t, | |||
52 | int); | 52 | int); |
53 | STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, | 53 | STATIC void xfs_icsb_balance_counter_locked(xfs_mount_t *, xfs_sb_field_t, |
54 | int); | 54 | int); |
55 | STATIC int xfs_icsb_modify_counters(xfs_mount_t *, xfs_sb_field_t, | ||
56 | int64_t, int); | ||
57 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); | 55 | STATIC void xfs_icsb_disable_counter(xfs_mount_t *, xfs_sb_field_t); |
58 | |||
59 | #else | 56 | #else |
60 | 57 | ||
61 | #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) | 58 | #define xfs_icsb_balance_counter(mp, a, b) do { } while (0) |
62 | #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) | 59 | #define xfs_icsb_balance_counter_locked(mp, a, b) do { } while (0) |
63 | #define xfs_icsb_modify_counters(mp, a, b, c) do { } while (0) | ||
64 | |||
65 | #endif | 60 | #endif |
66 | 61 | ||
67 | static const struct { | 62 | static const struct { |
@@ -199,6 +194,8 @@ xfs_uuid_unmount( | |||
199 | 194 | ||
200 | /* | 195 | /* |
201 | * Reference counting access wrappers to the perag structures. | 196 | * Reference counting access wrappers to the perag structures. |
197 | * Because we never free per-ag structures, the only thing we | ||
198 | * have to protect against changes is the tree structure itself. | ||
202 | */ | 199 | */ |
203 | struct xfs_perag * | 200 | struct xfs_perag * |
204 | xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) | 201 | xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) |
@@ -206,19 +203,43 @@ xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno) | |||
206 | struct xfs_perag *pag; | 203 | struct xfs_perag *pag; |
207 | int ref = 0; | 204 | int ref = 0; |
208 | 205 | ||
209 | spin_lock(&mp->m_perag_lock); | 206 | rcu_read_lock(); |
210 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); | 207 | pag = radix_tree_lookup(&mp->m_perag_tree, agno); |
211 | if (pag) { | 208 | if (pag) { |
212 | ASSERT(atomic_read(&pag->pag_ref) >= 0); | 209 | ASSERT(atomic_read(&pag->pag_ref) >= 0); |
213 | /* catch leaks in the positive direction during testing */ | ||
214 | ASSERT(atomic_read(&pag->pag_ref) < 1000); | ||
215 | ref = atomic_inc_return(&pag->pag_ref); | 210 | ref = atomic_inc_return(&pag->pag_ref); |
216 | } | 211 | } |
217 | spin_unlock(&mp->m_perag_lock); | 212 | rcu_read_unlock(); |
218 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); | 213 | trace_xfs_perag_get(mp, agno, ref, _RET_IP_); |
219 | return pag; | 214 | return pag; |
220 | } | 215 | } |
221 | 216 | ||
217 | /* | ||
218 | * search from @first to find the next perag with the given tag set. | ||
219 | */ | ||
220 | struct xfs_perag * | ||
221 | xfs_perag_get_tag( | ||
222 | struct xfs_mount *mp, | ||
223 | xfs_agnumber_t first, | ||
224 | int tag) | ||
225 | { | ||
226 | struct xfs_perag *pag; | ||
227 | int found; | ||
228 | int ref; | ||
229 | |||
230 | rcu_read_lock(); | ||
231 | found = radix_tree_gang_lookup_tag(&mp->m_perag_tree, | ||
232 | (void **)&pag, first, 1, tag); | ||
233 | if (found <= 0) { | ||
234 | rcu_read_unlock(); | ||
235 | return NULL; | ||
236 | } | ||
237 | ref = atomic_inc_return(&pag->pag_ref); | ||
238 | rcu_read_unlock(); | ||
239 | trace_xfs_perag_get_tag(mp, pag->pag_agno, ref, _RET_IP_); | ||
240 | return pag; | ||
241 | } | ||
242 | |||
222 | void | 243 | void |
223 | xfs_perag_put(struct xfs_perag *pag) | 244 | xfs_perag_put(struct xfs_perag *pag) |
224 | { | 245 | { |
@@ -229,10 +250,18 @@ xfs_perag_put(struct xfs_perag *pag) | |||
229 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); | 250 | trace_xfs_perag_put(pag->pag_mount, pag->pag_agno, ref, _RET_IP_); |
230 | } | 251 | } |
231 | 252 | ||
253 | STATIC void | ||
254 | __xfs_free_perag( | ||
255 | struct rcu_head *head) | ||
256 | { | ||
257 | struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); | ||
258 | |||
259 | ASSERT(atomic_read(&pag->pag_ref) == 0); | ||
260 | kmem_free(pag); | ||
261 | } | ||
262 | |||
232 | /* | 263 | /* |
233 | * Free up the resources associated with a mount structure. Assume that | 264 | * Free up the per-ag resources associated with the mount structure. |
234 | * the structure was initially zeroed, so we can tell which fields got | ||
235 | * initialized. | ||
236 | */ | 265 | */ |
237 | STATIC void | 266 | STATIC void |
238 | xfs_free_perag( | 267 | xfs_free_perag( |
@@ -244,10 +273,9 @@ xfs_free_perag( | |||
244 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { | 273 | for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { |
245 | spin_lock(&mp->m_perag_lock); | 274 | spin_lock(&mp->m_perag_lock); |
246 | pag = radix_tree_delete(&mp->m_perag_tree, agno); | 275 | pag = radix_tree_delete(&mp->m_perag_tree, agno); |
247 | ASSERT(pag); | ||
248 | ASSERT(atomic_read(&pag->pag_ref) == 0); | ||
249 | spin_unlock(&mp->m_perag_lock); | 276 | spin_unlock(&mp->m_perag_lock); |
250 | kmem_free(pag); | 277 | ASSERT(pag); |
278 | call_rcu(&pag->rcu_head, __xfs_free_perag); | ||
251 | } | 279 | } |
252 | } | 280 | } |
253 | 281 | ||
@@ -444,7 +472,10 @@ xfs_initialize_perag( | |||
444 | pag->pag_agno = index; | 472 | pag->pag_agno = index; |
445 | pag->pag_mount = mp; | 473 | pag->pag_mount = mp; |
446 | rwlock_init(&pag->pag_ici_lock); | 474 | rwlock_init(&pag->pag_ici_lock); |
475 | mutex_init(&pag->pag_ici_reclaim_lock); | ||
447 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); | 476 | INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); |
477 | spin_lock_init(&pag->pag_buf_lock); | ||
478 | pag->pag_buf_tree = RB_ROOT; | ||
448 | 479 | ||
449 | if (radix_tree_preload(GFP_NOFS)) | 480 | if (radix_tree_preload(GFP_NOFS)) |
450 | goto out_unwind; | 481 | goto out_unwind; |
@@ -639,7 +670,6 @@ int | |||
639 | xfs_readsb(xfs_mount_t *mp, int flags) | 670 | xfs_readsb(xfs_mount_t *mp, int flags) |
640 | { | 671 | { |
641 | unsigned int sector_size; | 672 | unsigned int sector_size; |
642 | unsigned int extra_flags; | ||
643 | xfs_buf_t *bp; | 673 | xfs_buf_t *bp; |
644 | int error; | 674 | int error; |
645 | 675 | ||
@@ -652,28 +682,24 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
652 | * access to the superblock. | 682 | * access to the superblock. |
653 | */ | 683 | */ |
654 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); | 684 | sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); |
655 | extra_flags = XBF_LOCK | XBF_FS_MANAGED | XBF_MAPPED; | ||
656 | 685 | ||
657 | bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, BTOBB(sector_size), | 686 | reread: |
658 | extra_flags); | 687 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
659 | if (!bp || XFS_BUF_ISERROR(bp)) { | 688 | XFS_SB_DADDR, sector_size, 0); |
660 | xfs_fs_mount_cmn_err(flags, "SB read failed"); | 689 | if (!bp) { |
661 | error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; | 690 | xfs_fs_mount_cmn_err(flags, "SB buffer read failed"); |
662 | goto fail; | 691 | return EIO; |
663 | } | 692 | } |
664 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
665 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | ||
666 | 693 | ||
667 | /* | 694 | /* |
668 | * Initialize the mount structure from the superblock. | 695 | * Initialize the mount structure from the superblock. |
669 | * But first do some basic consistency checking. | 696 | * But first do some basic consistency checking. |
670 | */ | 697 | */ |
671 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); | 698 | xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); |
672 | |||
673 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); | 699 | error = xfs_mount_validate_sb(mp, &(mp->m_sb), flags); |
674 | if (error) { | 700 | if (error) { |
675 | xfs_fs_mount_cmn_err(flags, "SB validate failed"); | 701 | xfs_fs_mount_cmn_err(flags, "SB validate failed"); |
676 | goto fail; | 702 | goto release_buf; |
677 | } | 703 | } |
678 | 704 | ||
679 | /* | 705 | /* |
@@ -684,7 +710,7 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
684 | "device supports only %u byte sectors (not %u)", | 710 | "device supports only %u byte sectors (not %u)", |
685 | sector_size, mp->m_sb.sb_sectsize); | 711 | sector_size, mp->m_sb.sb_sectsize); |
686 | error = ENOSYS; | 712 | error = ENOSYS; |
687 | goto fail; | 713 | goto release_buf; |
688 | } | 714 | } |
689 | 715 | ||
690 | /* | 716 | /* |
@@ -692,33 +718,20 @@ xfs_readsb(xfs_mount_t *mp, int flags) | |||
692 | * re-read the superblock so the buffer is correctly sized. | 718 | * re-read the superblock so the buffer is correctly sized. |
693 | */ | 719 | */ |
694 | if (sector_size < mp->m_sb.sb_sectsize) { | 720 | if (sector_size < mp->m_sb.sb_sectsize) { |
695 | XFS_BUF_UNMANAGE(bp); | ||
696 | xfs_buf_relse(bp); | 721 | xfs_buf_relse(bp); |
697 | sector_size = mp->m_sb.sb_sectsize; | 722 | sector_size = mp->m_sb.sb_sectsize; |
698 | bp = xfs_buf_read(mp->m_ddev_targp, XFS_SB_DADDR, | 723 | goto reread; |
699 | BTOBB(sector_size), extra_flags); | ||
700 | if (!bp || XFS_BUF_ISERROR(bp)) { | ||
701 | xfs_fs_mount_cmn_err(flags, "SB re-read failed"); | ||
702 | error = bp ? XFS_BUF_GETERROR(bp) : ENOMEM; | ||
703 | goto fail; | ||
704 | } | ||
705 | ASSERT(XFS_BUF_ISBUSY(bp)); | ||
706 | ASSERT(XFS_BUF_VALUSEMA(bp) <= 0); | ||
707 | } | 724 | } |
708 | 725 | ||
709 | /* Initialize per-cpu counters */ | 726 | /* Initialize per-cpu counters */ |
710 | xfs_icsb_reinit_counters(mp); | 727 | xfs_icsb_reinit_counters(mp); |
711 | 728 | ||
712 | mp->m_sb_bp = bp; | 729 | mp->m_sb_bp = bp; |
713 | xfs_buf_relse(bp); | 730 | xfs_buf_unlock(bp); |
714 | ASSERT(XFS_BUF_VALUSEMA(bp) > 0); | ||
715 | return 0; | 731 | return 0; |
716 | 732 | ||
717 | fail: | 733 | release_buf: |
718 | if (bp) { | 734 | xfs_buf_relse(bp); |
719 | XFS_BUF_UNMANAGE(bp); | ||
720 | xfs_buf_relse(bp); | ||
721 | } | ||
722 | return error; | 735 | return error; |
723 | } | 736 | } |
724 | 737 | ||
@@ -991,42 +1004,35 @@ xfs_check_sizes(xfs_mount_t *mp) | |||
991 | { | 1004 | { |
992 | xfs_buf_t *bp; | 1005 | xfs_buf_t *bp; |
993 | xfs_daddr_t d; | 1006 | xfs_daddr_t d; |
994 | int error; | ||
995 | 1007 | ||
996 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); | 1008 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks); |
997 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { | 1009 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_dblocks) { |
998 | cmn_err(CE_WARN, "XFS: size check 1 failed"); | 1010 | cmn_err(CE_WARN, "XFS: filesystem size mismatch detected"); |
999 | return XFS_ERROR(EFBIG); | 1011 | return XFS_ERROR(EFBIG); |
1000 | } | 1012 | } |
1001 | error = xfs_read_buf(mp, mp->m_ddev_targp, | 1013 | bp = xfs_buf_read_uncached(mp, mp->m_ddev_targp, |
1002 | d - XFS_FSS_TO_BB(mp, 1), | 1014 | d - XFS_FSS_TO_BB(mp, 1), |
1003 | XFS_FSS_TO_BB(mp, 1), 0, &bp); | 1015 | BBTOB(XFS_FSS_TO_BB(mp, 1)), 0); |
1004 | if (!error) { | 1016 | if (!bp) { |
1005 | xfs_buf_relse(bp); | 1017 | cmn_err(CE_WARN, "XFS: last sector read failed"); |
1006 | } else { | 1018 | return EIO; |
1007 | cmn_err(CE_WARN, "XFS: size check 2 failed"); | ||
1008 | if (error == ENOSPC) | ||
1009 | error = XFS_ERROR(EFBIG); | ||
1010 | return error; | ||
1011 | } | 1019 | } |
1020 | xfs_buf_relse(bp); | ||
1012 | 1021 | ||
1013 | if (mp->m_logdev_targp != mp->m_ddev_targp) { | 1022 | if (mp->m_logdev_targp != mp->m_ddev_targp) { |
1014 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); | 1023 | d = (xfs_daddr_t)XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks); |
1015 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { | 1024 | if (XFS_BB_TO_FSB(mp, d) != mp->m_sb.sb_logblocks) { |
1016 | cmn_err(CE_WARN, "XFS: size check 3 failed"); | 1025 | cmn_err(CE_WARN, "XFS: log size mismatch detected"); |
1017 | return XFS_ERROR(EFBIG); | 1026 | return XFS_ERROR(EFBIG); |
1018 | } | 1027 | } |
1019 | error = xfs_read_buf(mp, mp->m_logdev_targp, | 1028 | bp = xfs_buf_read_uncached(mp, mp->m_logdev_targp, |
1020 | d - XFS_FSB_TO_BB(mp, 1), | 1029 | d - XFS_FSB_TO_BB(mp, 1), |
1021 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 1030 | XFS_FSB_TO_B(mp, 1), 0); |
1022 | if (!error) { | 1031 | if (!bp) { |
1023 | xfs_buf_relse(bp); | 1032 | cmn_err(CE_WARN, "XFS: log device read failed"); |
1024 | } else { | 1033 | return EIO; |
1025 | cmn_err(CE_WARN, "XFS: size check 3 failed"); | ||
1026 | if (error == ENOSPC) | ||
1027 | error = XFS_ERROR(EFBIG); | ||
1028 | return error; | ||
1029 | } | 1034 | } |
1035 | xfs_buf_relse(bp); | ||
1030 | } | 1036 | } |
1031 | return 0; | 1037 | return 0; |
1032 | } | 1038 | } |
@@ -1601,7 +1607,7 @@ xfs_unmountfs_writesb(xfs_mount_t *mp) | |||
1601 | XFS_BUF_UNASYNC(sbp); | 1607 | XFS_BUF_UNASYNC(sbp); |
1602 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); | 1608 | ASSERT(XFS_BUF_TARGET(sbp) == mp->m_ddev_targp); |
1603 | xfsbdstrat(mp, sbp); | 1609 | xfsbdstrat(mp, sbp); |
1604 | error = xfs_iowait(sbp); | 1610 | error = xfs_buf_iowait(sbp); |
1605 | if (error) | 1611 | if (error) |
1606 | xfs_ioerror_alert("xfs_unmountfs_writesb", | 1612 | xfs_ioerror_alert("xfs_unmountfs_writesb", |
1607 | mp, sbp, XFS_BUF_ADDR(sbp)); | 1613 | mp, sbp, XFS_BUF_ADDR(sbp)); |
@@ -1832,135 +1838,72 @@ xfs_mod_incore_sb_unlocked( | |||
1832 | */ | 1838 | */ |
1833 | int | 1839 | int |
1834 | xfs_mod_incore_sb( | 1840 | xfs_mod_incore_sb( |
1835 | xfs_mount_t *mp, | 1841 | struct xfs_mount *mp, |
1836 | xfs_sb_field_t field, | 1842 | xfs_sb_field_t field, |
1837 | int64_t delta, | 1843 | int64_t delta, |
1838 | int rsvd) | 1844 | int rsvd) |
1839 | { | 1845 | { |
1840 | int status; | 1846 | int status; |
1841 | 1847 | ||
1842 | /* check for per-cpu counters */ | ||
1843 | switch (field) { | ||
1844 | #ifdef HAVE_PERCPU_SB | 1848 | #ifdef HAVE_PERCPU_SB |
1845 | case XFS_SBS_ICOUNT: | 1849 | ASSERT(field < XFS_SBS_ICOUNT || field > XFS_SBS_FDBLOCKS); |
1846 | case XFS_SBS_IFREE: | ||
1847 | case XFS_SBS_FDBLOCKS: | ||
1848 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1849 | status = xfs_icsb_modify_counters(mp, field, | ||
1850 | delta, rsvd); | ||
1851 | break; | ||
1852 | } | ||
1853 | /* FALLTHROUGH */ | ||
1854 | #endif | 1850 | #endif |
1855 | default: | 1851 | spin_lock(&mp->m_sb_lock); |
1856 | spin_lock(&mp->m_sb_lock); | 1852 | status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); |
1857 | status = xfs_mod_incore_sb_unlocked(mp, field, delta, rsvd); | 1853 | spin_unlock(&mp->m_sb_lock); |
1858 | spin_unlock(&mp->m_sb_lock); | ||
1859 | break; | ||
1860 | } | ||
1861 | 1854 | ||
1862 | return status; | 1855 | return status; |
1863 | } | 1856 | } |
1864 | 1857 | ||
1865 | /* | 1858 | /* |
1866 | * xfs_mod_incore_sb_batch() is used to change more than one field | 1859 | * Change more than one field in the in-core superblock structure at a time. |
1867 | * in the in-core superblock structure at a time. This modification | ||
1868 | * is protected by a lock internal to this module. The fields and | ||
1869 | * changes to those fields are specified in the array of xfs_mod_sb | ||
1870 | * structures passed in. | ||
1871 | * | 1860 | * |
1872 | * Either all of the specified deltas will be applied or none of | 1861 | * The fields and changes to those fields are specified in the array of |
1873 | * them will. If any modified field dips below 0, then all modifications | 1862 | * xfs_mod_sb structures passed in. Either all of the specified deltas |
1874 | * will be backed out and EINVAL will be returned. | 1863 | * will be applied or none of them will. If any modified field dips below 0, |
1864 | * then all modifications will be backed out and EINVAL will be returned. | ||
1865 | * | ||
1866 | * Note that this function may not be used for the superblock values that | ||
1867 | * are tracked with the in-memory per-cpu counters - a direct call to | ||
1868 | * xfs_icsb_modify_counters is required for these. | ||
1875 | */ | 1869 | */ |
1876 | int | 1870 | int |
1877 | xfs_mod_incore_sb_batch(xfs_mount_t *mp, xfs_mod_sb_t *msb, uint nmsb, int rsvd) | 1871 | xfs_mod_incore_sb_batch( |
1872 | struct xfs_mount *mp, | ||
1873 | xfs_mod_sb_t *msb, | ||
1874 | uint nmsb, | ||
1875 | int rsvd) | ||
1878 | { | 1876 | { |
1879 | int status=0; | 1877 | xfs_mod_sb_t *msbp = &msb[0]; |
1880 | xfs_mod_sb_t *msbp; | 1878 | int error = 0; |
1881 | 1879 | ||
1882 | /* | 1880 | /* |
1883 | * Loop through the array of mod structures and apply each | 1881 | * Loop through the array of mod structures and apply each individually. |
1884 | * individually. If any fail, then back out all those | 1882 | * If any fail, then back out all those which have already been applied. |
1885 | * which have already been applied. Do all of this within | 1883 | * Do all of this within the scope of the m_sb_lock so that all of the |
1886 | * the scope of the m_sb_lock so that all of the changes will | 1884 | * changes will be atomic. |
1887 | * be atomic. | ||
1888 | */ | 1885 | */ |
1889 | spin_lock(&mp->m_sb_lock); | 1886 | spin_lock(&mp->m_sb_lock); |
1890 | msbp = &msb[0]; | ||
1891 | for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { | 1887 | for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { |
1892 | /* | 1888 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || |
1893 | * Apply the delta at index n. If it fails, break | 1889 | msbp->msb_field > XFS_SBS_FDBLOCKS); |
1894 | * from the loop so we'll fall into the undo loop | ||
1895 | * below. | ||
1896 | */ | ||
1897 | switch (msbp->msb_field) { | ||
1898 | #ifdef HAVE_PERCPU_SB | ||
1899 | case XFS_SBS_ICOUNT: | ||
1900 | case XFS_SBS_IFREE: | ||
1901 | case XFS_SBS_FDBLOCKS: | ||
1902 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1903 | spin_unlock(&mp->m_sb_lock); | ||
1904 | status = xfs_icsb_modify_counters(mp, | ||
1905 | msbp->msb_field, | ||
1906 | msbp->msb_delta, rsvd); | ||
1907 | spin_lock(&mp->m_sb_lock); | ||
1908 | break; | ||
1909 | } | ||
1910 | /* FALLTHROUGH */ | ||
1911 | #endif | ||
1912 | default: | ||
1913 | status = xfs_mod_incore_sb_unlocked(mp, | ||
1914 | msbp->msb_field, | ||
1915 | msbp->msb_delta, rsvd); | ||
1916 | break; | ||
1917 | } | ||
1918 | 1890 | ||
1919 | if (status != 0) { | 1891 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, |
1920 | break; | 1892 | msbp->msb_delta, rsvd); |
1921 | } | 1893 | if (error) |
1894 | goto unwind; | ||
1922 | } | 1895 | } |
1896 | spin_unlock(&mp->m_sb_lock); | ||
1897 | return 0; | ||
1923 | 1898 | ||
1924 | /* | 1899 | unwind: |
1925 | * If we didn't complete the loop above, then back out | 1900 | while (--msbp >= msb) { |
1926 | * any changes made to the superblock. If you add code | 1901 | error = xfs_mod_incore_sb_unlocked(mp, msbp->msb_field, |
1927 | * between the loop above and here, make sure that you | 1902 | -msbp->msb_delta, rsvd); |
1928 | * preserve the value of status. Loop back until | 1903 | ASSERT(error == 0); |
1929 | * we step below the beginning of the array. Make sure | ||
1930 | * we don't touch anything back there. | ||
1931 | */ | ||
1932 | if (status != 0) { | ||
1933 | msbp--; | ||
1934 | while (msbp >= msb) { | ||
1935 | switch (msbp->msb_field) { | ||
1936 | #ifdef HAVE_PERCPU_SB | ||
1937 | case XFS_SBS_ICOUNT: | ||
1938 | case XFS_SBS_IFREE: | ||
1939 | case XFS_SBS_FDBLOCKS: | ||
1940 | if (!(mp->m_flags & XFS_MOUNT_NO_PERCPU_SB)) { | ||
1941 | spin_unlock(&mp->m_sb_lock); | ||
1942 | status = xfs_icsb_modify_counters(mp, | ||
1943 | msbp->msb_field, | ||
1944 | -(msbp->msb_delta), | ||
1945 | rsvd); | ||
1946 | spin_lock(&mp->m_sb_lock); | ||
1947 | break; | ||
1948 | } | ||
1949 | /* FALLTHROUGH */ | ||
1950 | #endif | ||
1951 | default: | ||
1952 | status = xfs_mod_incore_sb_unlocked(mp, | ||
1953 | msbp->msb_field, | ||
1954 | -(msbp->msb_delta), | ||
1955 | rsvd); | ||
1956 | break; | ||
1957 | } | ||
1958 | ASSERT(status == 0); | ||
1959 | msbp--; | ||
1960 | } | ||
1961 | } | 1904 | } |
1962 | spin_unlock(&mp->m_sb_lock); | 1905 | spin_unlock(&mp->m_sb_lock); |
1963 | return status; | 1906 | return error; |
1964 | } | 1907 | } |
1965 | 1908 | ||
1966 | /* | 1909 | /* |
@@ -1998,18 +1941,13 @@ xfs_getsb( | |||
1998 | */ | 1941 | */ |
1999 | void | 1942 | void |
2000 | xfs_freesb( | 1943 | xfs_freesb( |
2001 | xfs_mount_t *mp) | 1944 | struct xfs_mount *mp) |
2002 | { | 1945 | { |
2003 | xfs_buf_t *bp; | 1946 | struct xfs_buf *bp = mp->m_sb_bp; |
2004 | 1947 | ||
2005 | /* | 1948 | xfs_buf_lock(bp); |
2006 | * Use xfs_getsb() so that the buffer will be locked | ||
2007 | * when we call xfs_buf_relse(). | ||
2008 | */ | ||
2009 | bp = xfs_getsb(mp, 0); | ||
2010 | XFS_BUF_UNMANAGE(bp); | ||
2011 | xfs_buf_relse(bp); | ||
2012 | mp->m_sb_bp = NULL; | 1949 | mp->m_sb_bp = NULL; |
1950 | xfs_buf_relse(bp); | ||
2013 | } | 1951 | } |
2014 | 1952 | ||
2015 | /* | 1953 | /* |
@@ -2496,7 +2434,7 @@ xfs_icsb_balance_counter( | |||
2496 | spin_unlock(&mp->m_sb_lock); | 2434 | spin_unlock(&mp->m_sb_lock); |
2497 | } | 2435 | } |
2498 | 2436 | ||
2499 | STATIC int | 2437 | int |
2500 | xfs_icsb_modify_counters( | 2438 | xfs_icsb_modify_counters( |
2501 | xfs_mount_t *mp, | 2439 | xfs_mount_t *mp, |
2502 | xfs_sb_field_t field, | 2440 | xfs_sb_field_t field, |
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 622da2179a57..5861b4980740 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h | |||
@@ -53,7 +53,6 @@ typedef struct xfs_trans_reservations { | |||
53 | 53 | ||
54 | #include "xfs_sync.h" | 54 | #include "xfs_sync.h" |
55 | 55 | ||
56 | struct cred; | ||
57 | struct log; | 56 | struct log; |
58 | struct xfs_mount_args; | 57 | struct xfs_mount_args; |
59 | struct xfs_inode; | 58 | struct xfs_inode; |
@@ -91,6 +90,8 @@ extern void xfs_icsb_reinit_counters(struct xfs_mount *); | |||
91 | extern void xfs_icsb_destroy_counters(struct xfs_mount *); | 90 | extern void xfs_icsb_destroy_counters(struct xfs_mount *); |
92 | extern void xfs_icsb_sync_counters(struct xfs_mount *, int); | 91 | extern void xfs_icsb_sync_counters(struct xfs_mount *, int); |
93 | extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | 92 | extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); |
93 | extern int xfs_icsb_modify_counters(struct xfs_mount *, xfs_sb_field_t, | ||
94 | int64_t, int); | ||
94 | 95 | ||
95 | #else | 96 | #else |
96 | #define xfs_icsb_init_counters(mp) (0) | 97 | #define xfs_icsb_init_counters(mp) (0) |
@@ -98,6 +99,8 @@ extern void xfs_icsb_sync_counters_locked(struct xfs_mount *, int); | |||
98 | #define xfs_icsb_reinit_counters(mp) do { } while (0) | 99 | #define xfs_icsb_reinit_counters(mp) do { } while (0) |
99 | #define xfs_icsb_sync_counters(mp, flags) do { } while (0) | 100 | #define xfs_icsb_sync_counters(mp, flags) do { } while (0) |
100 | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) | 101 | #define xfs_icsb_sync_counters_locked(mp, flags) do { } while (0) |
102 | #define xfs_icsb_modify_counters(mp, field, delta, rsvd) \ | ||
103 | xfs_mod_incore_sb(mp, field, delta, rsvd) | ||
101 | #endif | 104 | #endif |
102 | 105 | ||
103 | typedef struct xfs_mount { | 106 | typedef struct xfs_mount { |
@@ -232,8 +235,6 @@ typedef struct xfs_mount { | |||
232 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ | 235 | #define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */ |
233 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred | 236 | #define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred |
234 | * I/O size in stat() */ | 237 | * I/O size in stat() */ |
235 | #define XFS_MOUNT_NO_PERCPU_SB (1ULL << 23) /* don't use per-cpu superblock | ||
236 | counters */ | ||
237 | #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams | 238 | #define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams |
238 | allocator */ | 239 | allocator */ |
239 | #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ | 240 | #define XFS_MOUNT_NOATTR2 (1ULL << 25) /* disable use of attr2 format */ |
@@ -327,6 +328,8 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d) | |||
327 | * perag get/put wrappers for ref counting | 328 | * perag get/put wrappers for ref counting |
328 | */ | 329 | */ |
329 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); | 330 | struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno); |
331 | struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
332 | int tag); | ||
330 | void xfs_perag_put(struct xfs_perag *pag); | 333 | void xfs_perag_put(struct xfs_perag *pag); |
331 | 334 | ||
332 | /* | 335 | /* |
diff --git a/fs/xfs/xfs_refcache.h b/fs/xfs/xfs_refcache.h deleted file mode 100644 index 2dec79edb510..000000000000 --- a/fs/xfs/xfs_refcache.h +++ /dev/null | |||
@@ -1,52 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. | ||
3 | * All Rights Reserved. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or | ||
6 | * modify it under the terms of the GNU General Public License as | ||
7 | * published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope that it would be useful, | ||
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
12 | * GNU General Public License for more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License | ||
15 | * along with this program; if not, write the Free Software Foundation, | ||
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | ||
17 | */ | ||
18 | #ifndef __XFS_REFCACHE_H__ | ||
19 | #define __XFS_REFCACHE_H__ | ||
20 | |||
21 | #ifdef HAVE_REFCACHE | ||
22 | /* | ||
23 | * Maximum size (in inodes) for the NFS reference cache | ||
24 | */ | ||
25 | #define XFS_REFCACHE_SIZE_MAX 512 | ||
26 | |||
27 | struct xfs_inode; | ||
28 | struct xfs_mount; | ||
29 | |||
30 | extern void xfs_refcache_insert(struct xfs_inode *); | ||
31 | extern void xfs_refcache_purge_ip(struct xfs_inode *); | ||
32 | extern void xfs_refcache_purge_mp(struct xfs_mount *); | ||
33 | extern void xfs_refcache_purge_some(struct xfs_mount *); | ||
34 | extern void xfs_refcache_resize(int); | ||
35 | extern void xfs_refcache_destroy(void); | ||
36 | |||
37 | extern void xfs_refcache_iunlock(struct xfs_inode *, uint); | ||
38 | |||
39 | #else | ||
40 | |||
41 | #define xfs_refcache_insert(ip) do { } while (0) | ||
42 | #define xfs_refcache_purge_ip(ip) do { } while (0) | ||
43 | #define xfs_refcache_purge_mp(mp) do { } while (0) | ||
44 | #define xfs_refcache_purge_some(mp) do { } while (0) | ||
45 | #define xfs_refcache_resize(size) do { } while (0) | ||
46 | #define xfs_refcache_destroy() do { } while (0) | ||
47 | |||
48 | #define xfs_refcache_iunlock(ip, flags) xfs_iunlock(ip, flags) | ||
49 | |||
50 | #endif | ||
51 | |||
52 | #endif /* __XFS_REFCACHE_H__ */ | ||
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c index 8fca957200df..d2af0a8381a6 100644 --- a/fs/xfs/xfs_rename.c +++ b/fs/xfs/xfs_rename.c | |||
@@ -183,7 +183,7 @@ xfs_rename( | |||
183 | * tree quota mechanism would be circumvented. | 183 | * tree quota mechanism would be circumvented. |
184 | */ | 184 | */ |
185 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 185 | if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
186 | (target_dp->i_d.di_projid != src_ip->i_d.di_projid))) { | 186 | (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) { |
187 | error = XFS_ERROR(EXDEV); | 187 | error = XFS_ERROR(EXDEV); |
188 | goto error_return; | 188 | goto error_return; |
189 | } | 189 | } |
@@ -211,7 +211,9 @@ xfs_rename( | |||
211 | goto error_return; | 211 | goto error_return; |
212 | if (error) | 212 | if (error) |
213 | goto abort_return; | 213 | goto abort_return; |
214 | xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 214 | |
215 | xfs_trans_ichgtime(tp, target_dp, | ||
216 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
215 | 217 | ||
216 | if (new_parent && src_is_directory) { | 218 | if (new_parent && src_is_directory) { |
217 | error = xfs_bumplink(tp, target_dp); | 219 | error = xfs_bumplink(tp, target_dp); |
@@ -249,7 +251,9 @@ xfs_rename( | |||
249 | &first_block, &free_list, spaceres); | 251 | &first_block, &free_list, spaceres); |
250 | if (error) | 252 | if (error) |
251 | goto abort_return; | 253 | goto abort_return; |
252 | xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 254 | |
255 | xfs_trans_ichgtime(tp, target_dp, | ||
256 | XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | ||
253 | 257 | ||
254 | /* | 258 | /* |
255 | * Decrement the link count on the target since the target | 259 | * Decrement the link count on the target since the target |
@@ -292,7 +296,7 @@ xfs_rename( | |||
292 | * inode isn't really being changed, but old unix file systems did | 296 | * inode isn't really being changed, but old unix file systems did |
293 | * it and some incremental backup programs won't work without it. | 297 | * it and some incremental backup programs won't work without it. |
294 | */ | 298 | */ |
295 | xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG); | 299 | xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG); |
296 | 300 | ||
297 | /* | 301 | /* |
298 | * Adjust the link count on src_dp. This is necessary when | 302 | * Adjust the link count on src_dp. This is necessary when |
@@ -315,7 +319,7 @@ xfs_rename( | |||
315 | if (error) | 319 | if (error) |
316 | goto abort_return; | 320 | goto abort_return; |
317 | 321 | ||
318 | xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 322 | xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
319 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); | 323 | xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE); |
320 | if (new_parent) | 324 | if (new_parent) |
321 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); | 325 | xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE); |
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 891260fea11e..12a191385310 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_trans_space.h" | 39 | #include "xfs_trans_space.h" |
40 | #include "xfs_utils.h" | 40 | #include "xfs_utils.h" |
41 | #include "xfs_trace.h" | 41 | #include "xfs_trace.h" |
42 | #include "xfs_buf.h" | ||
42 | 43 | ||
43 | 44 | ||
44 | /* | 45 | /* |
@@ -1883,13 +1884,13 @@ xfs_growfs_rt( | |||
1883 | /* | 1884 | /* |
1884 | * Read in the last block of the device, make sure it exists. | 1885 | * Read in the last block of the device, make sure it exists. |
1885 | */ | 1886 | */ |
1886 | error = xfs_read_buf(mp, mp->m_rtdev_targp, | 1887 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, |
1887 | XFS_FSB_TO_BB(mp, nrblocks - 1), | 1888 | XFS_FSB_TO_BB(mp, nrblocks - 1), |
1888 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 1889 | XFS_FSB_TO_B(mp, 1), 0); |
1889 | if (error) | 1890 | if (!bp) |
1890 | return error; | 1891 | return EIO; |
1891 | ASSERT(bp); | ||
1892 | xfs_buf_relse(bp); | 1892 | xfs_buf_relse(bp); |
1893 | |||
1893 | /* | 1894 | /* |
1894 | * Calculate new parameters. These are the final values to be reached. | 1895 | * Calculate new parameters. These are the final values to be reached. |
1895 | */ | 1896 | */ |
@@ -2215,7 +2216,6 @@ xfs_rtmount_init( | |||
2215 | { | 2216 | { |
2216 | xfs_buf_t *bp; /* buffer for last block of subvolume */ | 2217 | xfs_buf_t *bp; /* buffer for last block of subvolume */ |
2217 | xfs_daddr_t d; /* address of last block of subvolume */ | 2218 | xfs_daddr_t d; /* address of last block of subvolume */ |
2218 | int error; /* error return value */ | ||
2219 | xfs_sb_t *sbp; /* filesystem superblock copy in mount */ | 2219 | xfs_sb_t *sbp; /* filesystem superblock copy in mount */ |
2220 | 2220 | ||
2221 | sbp = &mp->m_sb; | 2221 | sbp = &mp->m_sb; |
@@ -2242,15 +2242,12 @@ xfs_rtmount_init( | |||
2242 | (unsigned long long) mp->m_sb.sb_rblocks); | 2242 | (unsigned long long) mp->m_sb.sb_rblocks); |
2243 | return XFS_ERROR(EFBIG); | 2243 | return XFS_ERROR(EFBIG); |
2244 | } | 2244 | } |
2245 | error = xfs_read_buf(mp, mp->m_rtdev_targp, | 2245 | bp = xfs_buf_read_uncached(mp, mp->m_rtdev_targp, |
2246 | d - XFS_FSB_TO_BB(mp, 1), | 2246 | d - XFS_FSB_TO_BB(mp, 1), |
2247 | XFS_FSB_TO_BB(mp, 1), 0, &bp); | 2247 | XFS_FSB_TO_B(mp, 1), 0); |
2248 | if (error) { | 2248 | if (!bp) { |
2249 | cmn_err(CE_WARN, | 2249 | cmn_err(CE_WARN, "XFS: realtime device size check failed"); |
2250 | "XFS: realtime mount -- xfs_read_buf failed, returned %d", error); | 2250 | return EIO; |
2251 | if (error == ENOSPC) | ||
2252 | return XFS_ERROR(EFBIG); | ||
2253 | return error; | ||
2254 | } | 2251 | } |
2255 | xfs_buf_relse(bp); | 2252 | xfs_buf_relse(bp); |
2256 | return 0; | 2253 | return 0; |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 1b017c657494..1eb2ba586814 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -80,10 +80,12 @@ struct xfs_mount; | |||
80 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 | 80 | #define XFS_SB_VERSION2_RESERVED4BIT 0x00000004 |
81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ | 81 | #define XFS_SB_VERSION2_ATTR2BIT 0x00000008 /* Inline attr rework */ |
82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ | 82 | #define XFS_SB_VERSION2_PARENTBIT 0x00000010 /* parent pointers */ |
83 | #define XFS_SB_VERSION2_PROJID32BIT 0x00000080 /* 32 bit project id */ | ||
83 | 84 | ||
84 | #define XFS_SB_VERSION2_OKREALFBITS \ | 85 | #define XFS_SB_VERSION2_OKREALFBITS \ |
85 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ | 86 | (XFS_SB_VERSION2_LAZYSBCOUNTBIT | \ |
86 | XFS_SB_VERSION2_ATTR2BIT) | 87 | XFS_SB_VERSION2_ATTR2BIT | \ |
88 | XFS_SB_VERSION2_PROJID32BIT) | ||
87 | #define XFS_SB_VERSION2_OKSASHFBITS \ | 89 | #define XFS_SB_VERSION2_OKSASHFBITS \ |
88 | (0) | 90 | (0) |
89 | #define XFS_SB_VERSION2_OKREALBITS \ | 91 | #define XFS_SB_VERSION2_OKREALBITS \ |
@@ -495,6 +497,12 @@ static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) | |||
495 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; | 497 | sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT; |
496 | } | 498 | } |
497 | 499 | ||
500 | static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) | ||
501 | { | ||
502 | return xfs_sb_version_hasmorebits(sbp) && | ||
503 | (sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT); | ||
504 | } | ||
505 | |||
498 | /* | 506 | /* |
499 | * end of superblock version macros | 507 | * end of superblock version macros |
500 | */ | 508 | */ |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 1c47edaea0d2..f6d956b7711e 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -696,7 +696,7 @@ xfs_trans_reserve( | |||
696 | * fail if the count would go below zero. | 696 | * fail if the count would go below zero. |
697 | */ | 697 | */ |
698 | if (blocks > 0) { | 698 | if (blocks > 0) { |
699 | error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, | 699 | error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
700 | -((int64_t)blocks), rsvd); | 700 | -((int64_t)blocks), rsvd); |
701 | if (error != 0) { | 701 | if (error != 0) { |
702 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 702 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
@@ -767,7 +767,7 @@ undo_log: | |||
767 | 767 | ||
768 | undo_blocks: | 768 | undo_blocks: |
769 | if (blocks > 0) { | 769 | if (blocks > 0) { |
770 | (void) xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FDBLOCKS, | 770 | xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
771 | (int64_t)blocks, rsvd); | 771 | (int64_t)blocks, rsvd); |
772 | tp->t_blk_res = 0; | 772 | tp->t_blk_res = 0; |
773 | } | 773 | } |
@@ -1009,7 +1009,7 @@ void | |||
1009 | xfs_trans_unreserve_and_mod_sb( | 1009 | xfs_trans_unreserve_and_mod_sb( |
1010 | xfs_trans_t *tp) | 1010 | xfs_trans_t *tp) |
1011 | { | 1011 | { |
1012 | xfs_mod_sb_t msb[14]; /* If you add cases, add entries */ | 1012 | xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ |
1013 | xfs_mod_sb_t *msbp; | 1013 | xfs_mod_sb_t *msbp; |
1014 | xfs_mount_t *mp = tp->t_mountp; | 1014 | xfs_mount_t *mp = tp->t_mountp; |
1015 | /* REFERENCED */ | 1015 | /* REFERENCED */ |
@@ -1017,55 +1017,61 @@ xfs_trans_unreserve_and_mod_sb( | |||
1017 | int rsvd; | 1017 | int rsvd; |
1018 | int64_t blkdelta = 0; | 1018 | int64_t blkdelta = 0; |
1019 | int64_t rtxdelta = 0; | 1019 | int64_t rtxdelta = 0; |
1020 | int64_t idelta = 0; | ||
1021 | int64_t ifreedelta = 0; | ||
1020 | 1022 | ||
1021 | msbp = msb; | 1023 | msbp = msb; |
1022 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; | 1024 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
1023 | 1025 | ||
1024 | /* calculate free blocks delta */ | 1026 | /* calculate deltas */ |
1025 | if (tp->t_blk_res > 0) | 1027 | if (tp->t_blk_res > 0) |
1026 | blkdelta = tp->t_blk_res; | 1028 | blkdelta = tp->t_blk_res; |
1027 | |||
1028 | if ((tp->t_fdblocks_delta != 0) && | 1029 | if ((tp->t_fdblocks_delta != 0) && |
1029 | (xfs_sb_version_haslazysbcount(&mp->m_sb) || | 1030 | (xfs_sb_version_haslazysbcount(&mp->m_sb) || |
1030 | (tp->t_flags & XFS_TRANS_SB_DIRTY))) | 1031 | (tp->t_flags & XFS_TRANS_SB_DIRTY))) |
1031 | blkdelta += tp->t_fdblocks_delta; | 1032 | blkdelta += tp->t_fdblocks_delta; |
1032 | 1033 | ||
1033 | if (blkdelta != 0) { | ||
1034 | msbp->msb_field = XFS_SBS_FDBLOCKS; | ||
1035 | msbp->msb_delta = blkdelta; | ||
1036 | msbp++; | ||
1037 | } | ||
1038 | |||
1039 | /* calculate free realtime extents delta */ | ||
1040 | if (tp->t_rtx_res > 0) | 1034 | if (tp->t_rtx_res > 0) |
1041 | rtxdelta = tp->t_rtx_res; | 1035 | rtxdelta = tp->t_rtx_res; |
1042 | |||
1043 | if ((tp->t_frextents_delta != 0) && | 1036 | if ((tp->t_frextents_delta != 0) && |
1044 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) | 1037 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) |
1045 | rtxdelta += tp->t_frextents_delta; | 1038 | rtxdelta += tp->t_frextents_delta; |
1046 | 1039 | ||
1040 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) || | ||
1041 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) { | ||
1042 | idelta = tp->t_icount_delta; | ||
1043 | ifreedelta = tp->t_ifree_delta; | ||
1044 | } | ||
1045 | |||
1046 | /* apply the per-cpu counters */ | ||
1047 | if (blkdelta) { | ||
1048 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, | ||
1049 | blkdelta, rsvd); | ||
1050 | if (error) | ||
1051 | goto out; | ||
1052 | } | ||
1053 | |||
1054 | if (idelta) { | ||
1055 | error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, | ||
1056 | idelta, rsvd); | ||
1057 | if (error) | ||
1058 | goto out_undo_fdblocks; | ||
1059 | } | ||
1060 | |||
1061 | if (ifreedelta) { | ||
1062 | error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, | ||
1063 | ifreedelta, rsvd); | ||
1064 | if (error) | ||
1065 | goto out_undo_icount; | ||
1066 | } | ||
1067 | |||
1068 | /* apply remaining deltas */ | ||
1047 | if (rtxdelta != 0) { | 1069 | if (rtxdelta != 0) { |
1048 | msbp->msb_field = XFS_SBS_FREXTENTS; | 1070 | msbp->msb_field = XFS_SBS_FREXTENTS; |
1049 | msbp->msb_delta = rtxdelta; | 1071 | msbp->msb_delta = rtxdelta; |
1050 | msbp++; | 1072 | msbp++; |
1051 | } | 1073 | } |
1052 | 1074 | ||
1053 | /* apply remaining deltas */ | ||
1054 | |||
1055 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) || | ||
1056 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) { | ||
1057 | if (tp->t_icount_delta != 0) { | ||
1058 | msbp->msb_field = XFS_SBS_ICOUNT; | ||
1059 | msbp->msb_delta = tp->t_icount_delta; | ||
1060 | msbp++; | ||
1061 | } | ||
1062 | if (tp->t_ifree_delta != 0) { | ||
1063 | msbp->msb_field = XFS_SBS_IFREE; | ||
1064 | msbp->msb_delta = tp->t_ifree_delta; | ||
1065 | msbp++; | ||
1066 | } | ||
1067 | } | ||
1068 | |||
1069 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { | 1075 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { |
1070 | if (tp->t_dblocks_delta != 0) { | 1076 | if (tp->t_dblocks_delta != 0) { |
1071 | msbp->msb_field = XFS_SBS_DBLOCKS; | 1077 | msbp->msb_field = XFS_SBS_DBLOCKS; |
@@ -1115,8 +1121,24 @@ xfs_trans_unreserve_and_mod_sb( | |||
1115 | if (msbp > msb) { | 1121 | if (msbp > msb) { |
1116 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, | 1122 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, |
1117 | (uint)(msbp - msb), rsvd); | 1123 | (uint)(msbp - msb), rsvd); |
1118 | ASSERT(error == 0); | 1124 | if (error) |
1125 | goto out_undo_ifreecount; | ||
1119 | } | 1126 | } |
1127 | |||
1128 | return; | ||
1129 | |||
1130 | out_undo_ifreecount: | ||
1131 | if (ifreedelta) | ||
1132 | xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); | ||
1133 | out_undo_icount: | ||
1134 | if (idelta) | ||
1135 | xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); | ||
1136 | out_undo_fdblocks: | ||
1137 | if (blkdelta) | ||
1138 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); | ||
1139 | out: | ||
1140 | ASSERT(error = 0); | ||
1141 | return; | ||
1120 | } | 1142 | } |
1121 | 1143 | ||
1122 | /* | 1144 | /* |
@@ -1389,15 +1411,12 @@ xfs_trans_item_committed( | |||
1389 | */ | 1411 | */ |
1390 | STATIC void | 1412 | STATIC void |
1391 | xfs_trans_committed( | 1413 | xfs_trans_committed( |
1392 | struct xfs_trans *tp, | 1414 | void *arg, |
1393 | int abortflag) | 1415 | int abortflag) |
1394 | { | 1416 | { |
1417 | struct xfs_trans *tp = arg; | ||
1395 | struct xfs_log_item_desc *lidp, *next; | 1418 | struct xfs_log_item_desc *lidp, *next; |
1396 | 1419 | ||
1397 | /* Call the transaction's completion callback if there is one. */ | ||
1398 | if (tp->t_callback != NULL) | ||
1399 | tp->t_callback(tp, tp->t_callarg); | ||
1400 | |||
1401 | list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { | 1420 | list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { |
1402 | xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); | 1421 | xfs_trans_item_committed(lidp->lid_item, tp->t_lsn, abortflag); |
1403 | xfs_trans_free_item_desc(lidp); | 1422 | xfs_trans_free_item_desc(lidp); |
@@ -1525,7 +1544,7 @@ xfs_trans_commit_iclog( | |||
1525 | * running in simulation mode (the log is explicitly turned | 1544 | * running in simulation mode (the log is explicitly turned |
1526 | * off). | 1545 | * off). |
1527 | */ | 1546 | */ |
1528 | tp->t_logcb.cb_func = (void(*)(void*, int))xfs_trans_committed; | 1547 | tp->t_logcb.cb_func = xfs_trans_committed; |
1529 | tp->t_logcb.cb_arg = tp; | 1548 | tp->t_logcb.cb_arg = tp; |
1530 | 1549 | ||
1531 | /* | 1550 | /* |
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index c13c0f97b494..246286b77a86 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h | |||
@@ -399,8 +399,6 @@ typedef struct xfs_trans { | |||
399 | * transaction. */ | 399 | * transaction. */ |
400 | struct xfs_mount *t_mountp; /* ptr to fs mount struct */ | 400 | struct xfs_mount *t_mountp; /* ptr to fs mount struct */ |
401 | struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ | 401 | struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */ |
402 | xfs_trans_callback_t t_callback; /* transaction callback */ | ||
403 | void *t_callarg; /* callback arg */ | ||
404 | unsigned int t_flags; /* misc flags */ | 402 | unsigned int t_flags; /* misc flags */ |
405 | int64_t t_icount_delta; /* superblock icount change */ | 403 | int64_t t_icount_delta; /* superblock icount change */ |
406 | int64_t t_ifree_delta; /* superblock ifree change */ | 404 | int64_t t_ifree_delta; /* superblock ifree change */ |
@@ -473,6 +471,7 @@ void xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint); | |||
473 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); | 471 | void xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *); |
474 | int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, | 472 | int xfs_trans_iget(struct xfs_mount *, xfs_trans_t *, |
475 | xfs_ino_t , uint, uint, struct xfs_inode **); | 473 | xfs_ino_t , uint, uint, struct xfs_inode **); |
474 | void xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int); | ||
476 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); | 475 | void xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint); |
477 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); | 476 | void xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *); |
478 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); | 477 | void xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 90af025e6839..c47918c302a5 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -336,7 +336,7 @@ xfs_trans_read_buf( | |||
336 | ASSERT(!XFS_BUF_ISASYNC(bp)); | 336 | ASSERT(!XFS_BUF_ISASYNC(bp)); |
337 | XFS_BUF_READ(bp); | 337 | XFS_BUF_READ(bp); |
338 | xfsbdstrat(tp->t_mountp, bp); | 338 | xfsbdstrat(tp->t_mountp, bp); |
339 | error = xfs_iowait(bp); | 339 | error = xfs_buf_iowait(bp); |
340 | if (error) { | 340 | if (error) { |
341 | xfs_ioerror_alert("xfs_trans_read_buf", mp, | 341 | xfs_ioerror_alert("xfs_trans_read_buf", mp, |
342 | bp, blkno); | 342 | bp, blkno); |
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index cdc53a1050c5..ccb34532768b 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c | |||
@@ -118,6 +118,36 @@ xfs_trans_ijoin_ref( | |||
118 | } | 118 | } |
119 | 119 | ||
120 | /* | 120 | /* |
121 | * Transactional inode timestamp update. Requires the inode to be locked and | ||
122 | * joined to the transaction supplied. Relies on the transaction subsystem to | ||
123 | * track dirty state and update/writeback the inode accordingly. | ||
124 | */ | ||
125 | void | ||
126 | xfs_trans_ichgtime( | ||
127 | struct xfs_trans *tp, | ||
128 | struct xfs_inode *ip, | ||
129 | int flags) | ||
130 | { | ||
131 | struct inode *inode = VFS_I(ip); | ||
132 | timespec_t tv; | ||
133 | |||
134 | ASSERT(tp); | ||
135 | ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); | ||
136 | ASSERT(ip->i_transp == tp); | ||
137 | |||
138 | tv = current_fs_time(inode->i_sb); | ||
139 | |||
140 | if ((flags & XFS_ICHGTIME_MOD) && | ||
141 | !timespec_equal(&inode->i_mtime, &tv)) { | ||
142 | inode->i_mtime = tv; | ||
143 | } | ||
144 | if ((flags & XFS_ICHGTIME_CHG) && | ||
145 | !timespec_equal(&inode->i_ctime, &tv)) { | ||
146 | inode->i_ctime = tv; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | /* | ||
121 | * This is called to mark the fields indicated in fieldmask as needing | 151 | * This is called to mark the fields indicated in fieldmask as needing |
122 | * to be logged when the transaction is committed. The inode must | 152 | * to be logged when the transaction is committed. The inode must |
123 | * already be associated with the given transaction. | 153 | * already be associated with the given transaction. |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 320775295e32..26d1867d8156 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
@@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ | |||
73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ | 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ |
75 | 75 | ||
76 | typedef __uint16_t xfs_prid_t; /* prid_t truncated to 16bits in XFS */ | ||
77 | |||
78 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | 76 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ |
79 | 77 | ||
80 | /* | 78 | /* |
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c index b7d5769d2df0..8b32d1a4c5a1 100644 --- a/fs/xfs/xfs_utils.c +++ b/fs/xfs/xfs_utils.c | |||
@@ -56,7 +56,6 @@ xfs_dir_ialloc( | |||
56 | mode_t mode, | 56 | mode_t mode, |
57 | xfs_nlink_t nlink, | 57 | xfs_nlink_t nlink, |
58 | xfs_dev_t rdev, | 58 | xfs_dev_t rdev, |
59 | cred_t *credp, | ||
60 | prid_t prid, /* project id */ | 59 | prid_t prid, /* project id */ |
61 | int okalloc, /* ok to allocate new space */ | 60 | int okalloc, /* ok to allocate new space */ |
62 | xfs_inode_t **ipp, /* pointer to inode; it will be | 61 | xfs_inode_t **ipp, /* pointer to inode; it will be |
@@ -93,7 +92,7 @@ xfs_dir_ialloc( | |||
93 | * transaction commit so that no other process can steal | 92 | * transaction commit so that no other process can steal |
94 | * the inode(s) that we've just allocated. | 93 | * the inode(s) that we've just allocated. |
95 | */ | 94 | */ |
96 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, okalloc, | 95 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, |
97 | &ialloc_context, &call_again, &ip); | 96 | &ialloc_context, &call_again, &ip); |
98 | 97 | ||
99 | /* | 98 | /* |
@@ -197,7 +196,7 @@ xfs_dir_ialloc( | |||
197 | * other allocations in this allocation group, | 196 | * other allocations in this allocation group, |
198 | * this call should always succeed. | 197 | * this call should always succeed. |
199 | */ | 198 | */ |
200 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, credp, prid, | 199 | code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, |
201 | okalloc, &ialloc_context, &call_again, &ip); | 200 | okalloc, &ialloc_context, &call_again, &ip); |
202 | 201 | ||
203 | /* | 202 | /* |
@@ -235,7 +234,7 @@ xfs_droplink( | |||
235 | { | 234 | { |
236 | int error; | 235 | int error; |
237 | 236 | ||
238 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | 237 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
239 | 238 | ||
240 | ASSERT (ip->i_d.di_nlink > 0); | 239 | ASSERT (ip->i_d.di_nlink > 0); |
241 | ip->i_d.di_nlink--; | 240 | ip->i_d.di_nlink--; |
@@ -299,7 +298,7 @@ xfs_bumplink( | |||
299 | { | 298 | { |
300 | if (ip->i_d.di_nlink >= XFS_MAXLINK) | 299 | if (ip->i_d.di_nlink >= XFS_MAXLINK) |
301 | return XFS_ERROR(EMLINK); | 300 | return XFS_ERROR(EMLINK); |
302 | xfs_ichgtime(ip, XFS_ICHGTIME_CHG); | 301 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
303 | 302 | ||
304 | ASSERT(ip->i_d.di_nlink > 0); | 303 | ASSERT(ip->i_d.di_nlink > 0); |
305 | ip->i_d.di_nlink++; | 304 | ip->i_d.di_nlink++; |
diff --git a/fs/xfs/xfs_utils.h b/fs/xfs/xfs_utils.h index f55b9678264f..456fca314933 100644 --- a/fs/xfs/xfs_utils.h +++ b/fs/xfs/xfs_utils.h | |||
@@ -19,8 +19,7 @@ | |||
19 | #define __XFS_UTILS_H__ | 19 | #define __XFS_UTILS_H__ |
20 | 20 | ||
21 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, | 21 | extern int xfs_dir_ialloc(xfs_trans_t **, xfs_inode_t *, mode_t, xfs_nlink_t, |
22 | xfs_dev_t, cred_t *, prid_t, int, | 22 | xfs_dev_t, prid_t, int, xfs_inode_t **, int *); |
23 | xfs_inode_t **, int *); | ||
24 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); | 23 | extern int xfs_droplink(xfs_trans_t *, xfs_inode_t *); |
25 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); | 24 | extern int xfs_bumplink(xfs_trans_t *, xfs_inode_t *); |
26 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); | 25 | extern void xfs_bump_ino_vers2(xfs_trans_t *, xfs_inode_t *); |
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index 4c7c7bfb2b2f..8e4a63c4151a 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
@@ -114,7 +114,7 @@ xfs_setattr( | |||
114 | */ | 114 | */ |
115 | ASSERT(udqp == NULL); | 115 | ASSERT(udqp == NULL); |
116 | ASSERT(gdqp == NULL); | 116 | ASSERT(gdqp == NULL); |
117 | code = xfs_qm_vop_dqalloc(ip, uid, gid, ip->i_d.di_projid, | 117 | code = xfs_qm_vop_dqalloc(ip, uid, gid, xfs_get_projid(ip), |
118 | qflags, &udqp, &gdqp); | 118 | qflags, &udqp, &gdqp); |
119 | if (code) | 119 | if (code) |
120 | return code; | 120 | return code; |
@@ -184,8 +184,11 @@ xfs_setattr( | |||
184 | ip->i_size == 0 && ip->i_d.di_nextents == 0) { | 184 | ip->i_size == 0 && ip->i_d.di_nextents == 0) { |
185 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 185 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
186 | lock_flags &= ~XFS_ILOCK_EXCL; | 186 | lock_flags &= ~XFS_ILOCK_EXCL; |
187 | if (mask & ATTR_CTIME) | 187 | if (mask & ATTR_CTIME) { |
188 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 188 | inode->i_mtime = inode->i_ctime = |
189 | current_fs_time(inode->i_sb); | ||
190 | xfs_mark_inode_dirty_sync(ip); | ||
191 | } | ||
189 | code = 0; | 192 | code = 0; |
190 | goto error_return; | 193 | goto error_return; |
191 | } | 194 | } |
@@ -1253,8 +1256,7 @@ xfs_create( | |||
1253 | struct xfs_name *name, | 1256 | struct xfs_name *name, |
1254 | mode_t mode, | 1257 | mode_t mode, |
1255 | xfs_dev_t rdev, | 1258 | xfs_dev_t rdev, |
1256 | xfs_inode_t **ipp, | 1259 | xfs_inode_t **ipp) |
1257 | cred_t *credp) | ||
1258 | { | 1260 | { |
1259 | int is_dir = S_ISDIR(mode); | 1261 | int is_dir = S_ISDIR(mode); |
1260 | struct xfs_mount *mp = dp->i_mount; | 1262 | struct xfs_mount *mp = dp->i_mount; |
@@ -1266,7 +1268,7 @@ xfs_create( | |||
1266 | boolean_t unlock_dp_on_error = B_FALSE; | 1268 | boolean_t unlock_dp_on_error = B_FALSE; |
1267 | uint cancel_flags; | 1269 | uint cancel_flags; |
1268 | int committed; | 1270 | int committed; |
1269 | xfs_prid_t prid; | 1271 | prid_t prid; |
1270 | struct xfs_dquot *udqp = NULL; | 1272 | struct xfs_dquot *udqp = NULL; |
1271 | struct xfs_dquot *gdqp = NULL; | 1273 | struct xfs_dquot *gdqp = NULL; |
1272 | uint resblks; | 1274 | uint resblks; |
@@ -1279,9 +1281,9 @@ xfs_create( | |||
1279 | return XFS_ERROR(EIO); | 1281 | return XFS_ERROR(EIO); |
1280 | 1282 | ||
1281 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1283 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1282 | prid = dp->i_d.di_projid; | 1284 | prid = xfs_get_projid(dp); |
1283 | else | 1285 | else |
1284 | prid = dfltprid; | 1286 | prid = XFS_PROJID_DEFAULT; |
1285 | 1287 | ||
1286 | /* | 1288 | /* |
1287 | * Make sure that we have allocated dquot(s) on disk. | 1289 | * Make sure that we have allocated dquot(s) on disk. |
@@ -1360,7 +1362,7 @@ xfs_create( | |||
1360 | * entry pointing to them, but a directory also the "." entry | 1362 | * entry pointing to them, but a directory also the "." entry |
1361 | * pointing to itself. | 1363 | * pointing to itself. |
1362 | */ | 1364 | */ |
1363 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, credp, | 1365 | error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, |
1364 | prid, resblks > 0, &ip, &committed); | 1366 | prid, resblks > 0, &ip, &committed); |
1365 | if (error) { | 1367 | if (error) { |
1366 | if (error == ENOSPC) | 1368 | if (error == ENOSPC) |
@@ -1391,7 +1393,7 @@ xfs_create( | |||
1391 | ASSERT(error != ENOSPC); | 1393 | ASSERT(error != ENOSPC); |
1392 | goto out_trans_abort; | 1394 | goto out_trans_abort; |
1393 | } | 1395 | } |
1394 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1396 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1395 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 1397 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
1396 | 1398 | ||
1397 | if (is_dir) { | 1399 | if (is_dir) { |
@@ -1742,7 +1744,7 @@ xfs_remove( | |||
1742 | ASSERT(error != ENOENT); | 1744 | ASSERT(error != ENOENT); |
1743 | goto out_bmap_cancel; | 1745 | goto out_bmap_cancel; |
1744 | } | 1746 | } |
1745 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1747 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1746 | 1748 | ||
1747 | if (is_dir) { | 1749 | if (is_dir) { |
1748 | /* | 1750 | /* |
@@ -1880,7 +1882,7 @@ xfs_link( | |||
1880 | * the tree quota mechanism could be circumvented. | 1882 | * the tree quota mechanism could be circumvented. |
1881 | */ | 1883 | */ |
1882 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && | 1884 | if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) && |
1883 | (tdp->i_d.di_projid != sip->i_d.di_projid))) { | 1885 | (xfs_get_projid(tdp) != xfs_get_projid(sip)))) { |
1884 | error = XFS_ERROR(EXDEV); | 1886 | error = XFS_ERROR(EXDEV); |
1885 | goto error_return; | 1887 | goto error_return; |
1886 | } | 1888 | } |
@@ -1895,7 +1897,7 @@ xfs_link( | |||
1895 | &first_block, &free_list, resblks); | 1897 | &first_block, &free_list, resblks); |
1896 | if (error) | 1898 | if (error) |
1897 | goto abort_return; | 1899 | goto abort_return; |
1898 | xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 1900 | xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
1899 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); | 1901 | xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE); |
1900 | 1902 | ||
1901 | error = xfs_bumplink(tp, sip); | 1903 | error = xfs_bumplink(tp, sip); |
@@ -1933,8 +1935,7 @@ xfs_symlink( | |||
1933 | struct xfs_name *link_name, | 1935 | struct xfs_name *link_name, |
1934 | const char *target_path, | 1936 | const char *target_path, |
1935 | mode_t mode, | 1937 | mode_t mode, |
1936 | xfs_inode_t **ipp, | 1938 | xfs_inode_t **ipp) |
1937 | cred_t *credp) | ||
1938 | { | 1939 | { |
1939 | xfs_mount_t *mp = dp->i_mount; | 1940 | xfs_mount_t *mp = dp->i_mount; |
1940 | xfs_trans_t *tp; | 1941 | xfs_trans_t *tp; |
@@ -1955,7 +1956,7 @@ xfs_symlink( | |||
1955 | int byte_cnt; | 1956 | int byte_cnt; |
1956 | int n; | 1957 | int n; |
1957 | xfs_buf_t *bp; | 1958 | xfs_buf_t *bp; |
1958 | xfs_prid_t prid; | 1959 | prid_t prid; |
1959 | struct xfs_dquot *udqp, *gdqp; | 1960 | struct xfs_dquot *udqp, *gdqp; |
1960 | uint resblks; | 1961 | uint resblks; |
1961 | 1962 | ||
@@ -1978,9 +1979,9 @@ xfs_symlink( | |||
1978 | 1979 | ||
1979 | udqp = gdqp = NULL; | 1980 | udqp = gdqp = NULL; |
1980 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1981 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) |
1981 | prid = dp->i_d.di_projid; | 1982 | prid = xfs_get_projid(dp); |
1982 | else | 1983 | else |
1983 | prid = (xfs_prid_t)dfltprid; | 1984 | prid = XFS_PROJID_DEFAULT; |
1984 | 1985 | ||
1985 | /* | 1986 | /* |
1986 | * Make sure that we have allocated dquot(s) on disk. | 1987 | * Make sure that we have allocated dquot(s) on disk. |
@@ -2046,8 +2047,8 @@ xfs_symlink( | |||
2046 | /* | 2047 | /* |
2047 | * Allocate an inode for the symlink. | 2048 | * Allocate an inode for the symlink. |
2048 | */ | 2049 | */ |
2049 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), | 2050 | error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, |
2050 | 1, 0, credp, prid, resblks > 0, &ip, NULL); | 2051 | prid, resblks > 0, &ip, NULL); |
2051 | if (error) { | 2052 | if (error) { |
2052 | if (error == ENOSPC) | 2053 | if (error == ENOSPC) |
2053 | goto error_return; | 2054 | goto error_return; |
@@ -2129,7 +2130,7 @@ xfs_symlink( | |||
2129 | &first_block, &free_list, resblks); | 2130 | &first_block, &free_list, resblks); |
2130 | if (error) | 2131 | if (error) |
2131 | goto error1; | 2132 | goto error1; |
2132 | xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2133 | xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2133 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); | 2134 | xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); |
2134 | 2135 | ||
2135 | /* | 2136 | /* |
@@ -2272,7 +2273,7 @@ xfs_alloc_file_space( | |||
2272 | count = len; | 2273 | count = len; |
2273 | imapp = &imaps[0]; | 2274 | imapp = &imaps[0]; |
2274 | nimaps = 1; | 2275 | nimaps = 1; |
2275 | bmapi_flag = XFS_BMAPI_WRITE | (alloc_type ? XFS_BMAPI_PREALLOC : 0); | 2276 | bmapi_flag = XFS_BMAPI_WRITE | alloc_type; |
2276 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); | 2277 | startoffset_fsb = XFS_B_TO_FSBT(mp, offset); |
2277 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); | 2278 | allocatesize_fsb = XFS_B_TO_FSB(mp, count); |
2278 | 2279 | ||
@@ -2431,9 +2432,9 @@ xfs_zero_remaining_bytes( | |||
2431 | if (endoff > ip->i_size) | 2432 | if (endoff > ip->i_size) |
2432 | endoff = ip->i_size; | 2433 | endoff = ip->i_size; |
2433 | 2434 | ||
2434 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 2435 | bp = xfs_buf_get_uncached(XFS_IS_REALTIME_INODE(ip) ? |
2435 | XFS_IS_REALTIME_INODE(ip) ? | 2436 | mp->m_rtdev_targp : mp->m_ddev_targp, |
2436 | mp->m_rtdev_targp : mp->m_ddev_targp); | 2437 | mp->m_sb.sb_blocksize, XBF_DONT_BLOCK); |
2437 | if (!bp) | 2438 | if (!bp) |
2438 | return XFS_ERROR(ENOMEM); | 2439 | return XFS_ERROR(ENOMEM); |
2439 | 2440 | ||
@@ -2459,7 +2460,7 @@ xfs_zero_remaining_bytes( | |||
2459 | XFS_BUF_READ(bp); | 2460 | XFS_BUF_READ(bp); |
2460 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); | 2461 | XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); |
2461 | xfsbdstrat(mp, bp); | 2462 | xfsbdstrat(mp, bp); |
2462 | error = xfs_iowait(bp); | 2463 | error = xfs_buf_iowait(bp); |
2463 | if (error) { | 2464 | if (error) { |
2464 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", | 2465 | xfs_ioerror_alert("xfs_zero_remaining_bytes(read)", |
2465 | mp, bp, XFS_BUF_ADDR(bp)); | 2466 | mp, bp, XFS_BUF_ADDR(bp)); |
@@ -2472,7 +2473,7 @@ xfs_zero_remaining_bytes( | |||
2472 | XFS_BUF_UNREAD(bp); | 2473 | XFS_BUF_UNREAD(bp); |
2473 | XFS_BUF_WRITE(bp); | 2474 | XFS_BUF_WRITE(bp); |
2474 | xfsbdstrat(mp, bp); | 2475 | xfsbdstrat(mp, bp); |
2475 | error = xfs_iowait(bp); | 2476 | error = xfs_buf_iowait(bp); |
2476 | if (error) { | 2477 | if (error) { |
2477 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", | 2478 | xfs_ioerror_alert("xfs_zero_remaining_bytes(write)", |
2478 | mp, bp, XFS_BUF_ADDR(bp)); | 2479 | mp, bp, XFS_BUF_ADDR(bp)); |
@@ -2711,6 +2712,7 @@ xfs_change_file_space( | |||
2711 | xfs_off_t llen; | 2712 | xfs_off_t llen; |
2712 | xfs_trans_t *tp; | 2713 | xfs_trans_t *tp; |
2713 | struct iattr iattr; | 2714 | struct iattr iattr; |
2715 | int prealloc_type; | ||
2714 | 2716 | ||
2715 | if (!S_ISREG(ip->i_d.di_mode)) | 2717 | if (!S_ISREG(ip->i_d.di_mode)) |
2716 | return XFS_ERROR(EINVAL); | 2718 | return XFS_ERROR(EINVAL); |
@@ -2753,12 +2755,17 @@ xfs_change_file_space( | |||
2753 | * size to be changed. | 2755 | * size to be changed. |
2754 | */ | 2756 | */ |
2755 | setprealloc = clrprealloc = 0; | 2757 | setprealloc = clrprealloc = 0; |
2758 | prealloc_type = XFS_BMAPI_PREALLOC; | ||
2756 | 2759 | ||
2757 | switch (cmd) { | 2760 | switch (cmd) { |
2761 | case XFS_IOC_ZERO_RANGE: | ||
2762 | prealloc_type |= XFS_BMAPI_CONVERT; | ||
2763 | xfs_tosspages(ip, startoffset, startoffset + bf->l_len, 0); | ||
2764 | /* FALLTHRU */ | ||
2758 | case XFS_IOC_RESVSP: | 2765 | case XFS_IOC_RESVSP: |
2759 | case XFS_IOC_RESVSP64: | 2766 | case XFS_IOC_RESVSP64: |
2760 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, | 2767 | error = xfs_alloc_file_space(ip, startoffset, bf->l_len, |
2761 | 1, attr_flags); | 2768 | prealloc_type, attr_flags); |
2762 | if (error) | 2769 | if (error) |
2763 | return error; | 2770 | return error; |
2764 | setprealloc = 1; | 2771 | setprealloc = 1; |
@@ -2827,7 +2834,7 @@ xfs_change_file_space( | |||
2827 | if (ip->i_d.di_mode & S_IXGRP) | 2834 | if (ip->i_d.di_mode & S_IXGRP) |
2828 | ip->i_d.di_mode &= ~S_ISGID; | 2835 | ip->i_d.di_mode &= ~S_ISGID; |
2829 | 2836 | ||
2830 | xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 2837 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
2831 | } | 2838 | } |
2832 | if (setprealloc) | 2839 | if (setprealloc) |
2833 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | 2840 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index d8dfa8d0dadd..f6702927eee4 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
@@ -2,7 +2,6 @@ | |||
2 | #define _XFS_VNODEOPS_H 1 | 2 | #define _XFS_VNODEOPS_H 1 |
3 | 3 | ||
4 | struct attrlist_cursor_kern; | 4 | struct attrlist_cursor_kern; |
5 | struct cred; | ||
6 | struct file; | 5 | struct file; |
7 | struct iattr; | 6 | struct iattr; |
8 | struct inode; | 7 | struct inode; |
@@ -26,7 +25,7 @@ int xfs_inactive(struct xfs_inode *ip); | |||
26 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | 25 | int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, |
27 | struct xfs_inode **ipp, struct xfs_name *ci_name); | 26 | struct xfs_inode **ipp, struct xfs_name *ci_name); |
28 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, | 27 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, |
29 | xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); | 28 | xfs_dev_t rdev, struct xfs_inode **ipp); |
30 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | 29 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
31 | struct xfs_inode *ip); | 30 | struct xfs_inode *ip); |
32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 31 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
@@ -34,8 +33,7 @@ int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | |||
34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 33 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, |
35 | xfs_off_t *offset, filldir_t filldir); | 34 | xfs_off_t *offset, filldir_t filldir); |
36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 35 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
37 | const char *target_path, mode_t mode, struct xfs_inode **ipp, | 36 | const char *target_path, mode_t mode, struct xfs_inode **ipp); |
38 | cred_t *credp); | ||
39 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 37 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
40 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, | 38 | int xfs_change_file_space(struct xfs_inode *ip, int cmd, |
41 | xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); | 39 | xfs_flock64_t *bf, xfs_off_t offset, int attr_flags); |