diff options
author | Ingo Molnar <mingo@elte.hu> | 2008-09-10 05:32:52 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-09-10 05:32:52 -0400 |
commit | e92b4fdacc6a7d8cc7895b81347671d5fcd6c5e1 (patch) | |
tree | 4f84567261682d8ec2ad4102bce1ff970a6eed1a /fs | |
parent | 9fcaff0e660d886e9a766460adbe558dd25de31b (diff) | |
parent | adee14b2e1557d0a8559f29681732d05a89dfc35 (diff) |
Merge commit 'v2.6.27-rc6' into x86/iommu
Diffstat (limited to 'fs')
73 files changed, 1021 insertions, 751 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
@@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
119 | 119 | ||
120 | const struct file_operations v9fs_dir_operations = { | 120 | const struct file_operations v9fs_dir_operations = { |
121 | .read = generic_read_dir, | 121 | .read = generic_read_dir, |
122 | .llseek = generic_file_llseek, | ||
122 | .readdir = v9fs_dir_readdir, | 123 | .readdir = v9fs_dir_readdir, |
123 | .open = v9fs_file_open, | 124 | .open = v9fs_file_open, |
124 | .release = v9fs_dir_release, | 125 | .release = v9fs_dir_release, |
diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..abccb5dab9a8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH | |||
1930 | 1930 | ||
1931 | If unsure, say N. | 1931 | If unsure, say N. |
1932 | 1932 | ||
1933 | config CIFS_UPCALL | ||
1934 | bool "Kerberos/SPNEGO advanced session setup" | ||
1935 | depends on CIFS && KEYS | ||
1936 | help | ||
1937 | Enables an upcall mechanism for CIFS which accesses | ||
1938 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1939 | Kerberos tickets which are needed to mount to certain secure servers | ||
1940 | (for which more secure Kerberos authentication is required). If | ||
1941 | unsure, say N. | ||
1942 | |||
1933 | config CIFS_XATTR | 1943 | config CIFS_XATTR |
1934 | bool "CIFS extended attributes" | 1944 | bool "CIFS extended attributes" |
1935 | depends on CIFS | 1945 | depends on CIFS |
@@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL | |||
1982 | (which is disabled by default). See the file fs/cifs/README | 1992 | (which is disabled by default). See the file fs/cifs/README |
1983 | for more details. If unsure, say N. | 1993 | for more details. If unsure, say N. |
1984 | 1994 | ||
1985 | config CIFS_UPCALL | ||
1986 | bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" | ||
1987 | depends on CIFS_EXPERIMENTAL | ||
1988 | depends on KEYS | ||
1989 | help | ||
1990 | Enables an upcall mechanism for CIFS which accesses | ||
1991 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
1992 | Kerberos tickets which are needed to mount to certain secure servers | ||
1993 | (for which more secure Kerberos authentication is required). If | ||
1994 | unsure, say N. | ||
1995 | |||
1996 | config CIFS_DFS_UPCALL | 1995 | config CIFS_DFS_UPCALL |
1997 | bool "DFS feature support (EXPERIMENTAL)" | 1996 | bool "DFS feature support (EXPERIMENTAL)" |
1998 | depends on CIFS_EXPERIMENTAL | 1997 | depends on CIFS_EXPERIMENTAL |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
@@ -197,6 +197,7 @@ out: | |||
197 | 197 | ||
198 | const struct file_operations adfs_dir_operations = { | 198 | const struct file_operations adfs_dir_operations = { |
199 | .read = generic_read_dir, | 199 | .read = generic_read_dir, |
200 | .llseek = generic_file_llseek, | ||
200 | .readdir = adfs_readdir, | 201 | .readdir = adfs_readdir, |
201 | .fsync = file_fsync, | 202 | .fsync = file_fsync, |
202 | }; | 203 | }; |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); | |||
19 | 19 | ||
20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
22 | .llseek = generic_file_llseek, | ||
22 | .readdir = affs_readdir, | 23 | .readdir = affs_readdir, |
23 | .fsync = file_fsync, | 24 | .fsync = file_fsync, |
24 | }; | 25 | }; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
@@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { | |||
36 | .release = dcache_dir_close, | 36 | .release = dcache_dir_close, |
37 | .read = generic_read_dir, | 37 | .read = generic_read_dir, |
38 | .readdir = dcache_readdir, | 38 | .readdir = dcache_readdir, |
39 | .llseek = dcache_dir_lseek, | ||
39 | .ioctl = autofs4_root_ioctl, | 40 | .ioctl = autofs4_root_ioctl, |
40 | }; | 41 | }; |
41 | 42 | ||
@@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { | |||
44 | .release = dcache_dir_close, | 45 | .release = dcache_dir_close, |
45 | .read = generic_read_dir, | 46 | .read = generic_read_dir, |
46 | .readdir = dcache_readdir, | 47 | .readdir = dcache_readdir, |
48 | .llseek = dcache_dir_lseek, | ||
47 | }; | 49 | }; |
48 | 50 | ||
49 | const struct inode_operations autofs4_indirect_root_inode_operations = { | 51 | const struct inode_operations autofs4_indirect_root_inode_operations = { |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..740f53672a8a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
66 | static const struct file_operations befs_dir_operations = { | 66 | static const struct file_operations befs_dir_operations = { |
67 | .read = generic_read_dir, | 67 | .read = generic_read_dir, |
68 | .readdir = befs_readdir, | 68 | .readdir = befs_readdir, |
69 | .llseek = generic_file_llseek, | ||
69 | }; | 70 | }; |
70 | 71 | ||
71 | static const struct inode_operations befs_dir_inode_operations = { | 72 | static const struct inode_operations befs_dir_inode_operations = { |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 56372ecf1690..dfc0197905ca 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
@@ -914,7 +914,9 @@ static int load_flat_binary(struct linux_binprm * bprm, struct pt_regs * regs) | |||
914 | /* Stash our initial stack pointer into the mm structure */ | 914 | /* Stash our initial stack pointer into the mm structure */ |
915 | current->mm->start_stack = (unsigned long )sp; | 915 | current->mm->start_stack = (unsigned long )sp; |
916 | 916 | ||
917 | 917 | #ifdef FLAT_PLAT_INIT | |
918 | FLAT_PLAT_INIT(regs); | ||
919 | #endif | ||
918 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", | 920 | DBG_FLT("start_thread(regs=0x%x, entry=0x%x, start_stack=0x%x)\n", |
919 | (int)regs, (int)start_addr, (int)current->mm->start_stack); | 921 | (int)regs, (int)start_addr, (int)current->mm->start_stack); |
920 | 922 | ||
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 756205314c24..8d7e88e02e0f 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -120,8 +120,6 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
120 | if (bprm->misc_bang) | 120 | if (bprm->misc_bang) |
121 | goto _ret; | 121 | goto _ret; |
122 | 122 | ||
123 | bprm->misc_bang = 1; | ||
124 | |||
125 | /* to keep locking time low, we copy the interpreter string */ | 123 | /* to keep locking time low, we copy the interpreter string */ |
126 | read_lock(&entries_lock); | 124 | read_lock(&entries_lock); |
127 | fmt = check_file(bprm); | 125 | fmt = check_file(bprm); |
@@ -199,6 +197,8 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
199 | if (retval < 0) | 197 | if (retval < 0) |
200 | goto _error; | 198 | goto _error; |
201 | 199 | ||
200 | bprm->misc_bang = 1; | ||
201 | |||
202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler (bprm, regs); |
203 | if (retval < 0) | 203 | if (retval < 0) |
204 | goto _error; | 204 | goto _error; |
@@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) | |||
469 | kfree(bmd); | 469 | kfree(bmd); |
470 | } | 470 | } |
471 | 471 | ||
472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | 472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, |
473 | gfp_t gfp_mask) | ||
473 | { | 474 | { |
474 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); | 475 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); |
475 | 476 | ||
476 | if (!bmd) | 477 | if (!bmd) |
477 | return NULL; | 478 | return NULL; |
478 | 479 | ||
479 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); | 480 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); |
480 | if (!bmd->iovecs) { | 481 | if (!bmd->iovecs) { |
481 | kfree(bmd); | 482 | kfree(bmd); |
482 | return NULL; | 483 | return NULL; |
483 | } | 484 | } |
484 | 485 | ||
485 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); | 486 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); |
486 | if (bmd->sgvecs) | 487 | if (bmd->sgvecs) |
487 | return bmd; | 488 | return bmd; |
488 | 489 | ||
@@ -491,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | |||
491 | return NULL; | 492 | return NULL; |
492 | } | 493 | } |
493 | 494 | ||
494 | static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | 495 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
495 | int uncopy) | 496 | struct sg_iovec *iov, int iov_count, int uncopy) |
496 | { | 497 | { |
497 | int ret = 0, i; | 498 | int ret = 0, i; |
498 | struct bio_vec *bvec; | 499 | struct bio_vec *bvec; |
@@ -502,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
502 | 503 | ||
503 | __bio_for_each_segment(bvec, bio, i, 0) { | 504 | __bio_for_each_segment(bvec, bio, i, 0) { |
504 | char *bv_addr = page_address(bvec->bv_page); | 505 | char *bv_addr = page_address(bvec->bv_page); |
505 | unsigned int bv_len = bvec->bv_len; | 506 | unsigned int bv_len = iovecs[i].bv_len; |
506 | 507 | ||
507 | while (bv_len && iov_idx < iov_count) { | 508 | while (bv_len && iov_idx < iov_count) { |
508 | unsigned int bytes; | 509 | unsigned int bytes; |
@@ -554,7 +555,7 @@ int bio_uncopy_user(struct bio *bio) | |||
554 | struct bio_map_data *bmd = bio->bi_private; | 555 | struct bio_map_data *bmd = bio->bi_private; |
555 | int ret; | 556 | int ret; |
556 | 557 | ||
557 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); | 558 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); |
558 | 559 | ||
559 | bio_free_map_data(bmd); | 560 | bio_free_map_data(bmd); |
560 | bio_put(bio); | 561 | bio_put(bio); |
@@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
596 | len += iov[i].iov_len; | 597 | len += iov[i].iov_len; |
597 | } | 598 | } |
598 | 599 | ||
599 | bmd = bio_alloc_map_data(nr_pages, iov_count); | 600 | bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); |
600 | if (!bmd) | 601 | if (!bmd) |
601 | return ERR_PTR(-ENOMEM); | 602 | return ERR_PTR(-ENOMEM); |
602 | 603 | ||
@@ -633,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
633 | * success | 634 | * success |
634 | */ | 635 | */ |
635 | if (!write_to_vm) { | 636 | if (!write_to_vm) { |
636 | ret = __bio_copy_iov(bio, iov, iov_count, 0); | 637 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); |
637 | if (ret) | 638 | if (ret) |
638 | goto cleanup; | 639 | goto cleanup; |
639 | } | 640 | } |
@@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
942 | { | 943 | { |
943 | struct bio_vec *bvec; | 944 | struct bio_vec *bvec; |
944 | const int read = bio_data_dir(bio) == READ; | 945 | const int read = bio_data_dir(bio) == READ; |
945 | char *p = bio->bi_private; | 946 | struct bio_map_data *bmd = bio->bi_private; |
946 | int i; | 947 | int i; |
948 | char *p = bmd->sgvecs[0].iov_base; | ||
947 | 949 | ||
948 | __bio_for_each_segment(bvec, bio, i, 0) { | 950 | __bio_for_each_segment(bvec, bio, i, 0) { |
949 | char *addr = page_address(bvec->bv_page); | 951 | char *addr = page_address(bvec->bv_page); |
952 | int len = bmd->iovecs[i].bv_len; | ||
950 | 953 | ||
951 | if (read && !err) | 954 | if (read && !err) |
952 | memcpy(p, addr, bvec->bv_len); | 955 | memcpy(p, addr, len); |
953 | 956 | ||
954 | __free_page(bvec->bv_page); | 957 | __free_page(bvec->bv_page); |
955 | p += bvec->bv_len; | 958 | p += len; |
956 | } | 959 | } |
957 | 960 | ||
961 | bio_free_map_data(bmd); | ||
958 | bio_put(bio); | 962 | bio_put(bio); |
959 | } | 963 | } |
960 | 964 | ||
@@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
978 | const int nr_pages = end - start; | 982 | const int nr_pages = end - start; |
979 | struct bio *bio; | 983 | struct bio *bio; |
980 | struct bio_vec *bvec; | 984 | struct bio_vec *bvec; |
985 | struct bio_map_data *bmd; | ||
981 | int i, ret; | 986 | int i, ret; |
987 | struct sg_iovec iov; | ||
988 | |||
989 | iov.iov_base = data; | ||
990 | iov.iov_len = len; | ||
991 | |||
992 | bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); | ||
993 | if (!bmd) | ||
994 | return ERR_PTR(-ENOMEM); | ||
982 | 995 | ||
996 | ret = -ENOMEM; | ||
983 | bio = bio_alloc(gfp_mask, nr_pages); | 997 | bio = bio_alloc(gfp_mask, nr_pages); |
984 | if (!bio) | 998 | if (!bio) |
985 | return ERR_PTR(-ENOMEM); | 999 | goto out_bmd; |
986 | 1000 | ||
987 | while (len) { | 1001 | while (len) { |
988 | struct page *page; | 1002 | struct page *page; |
@@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
1016 | } | 1030 | } |
1017 | } | 1031 | } |
1018 | 1032 | ||
1019 | bio->bi_private = data; | 1033 | bio->bi_private = bmd; |
1020 | bio->bi_end_io = bio_copy_kern_endio; | 1034 | bio->bi_end_io = bio_copy_kern_endio; |
1035 | |||
1036 | bio_set_map_data(bmd, bio, &iov, 1); | ||
1021 | return bio; | 1037 | return bio; |
1022 | cleanup: | 1038 | cleanup: |
1023 | bio_for_each_segment(bvec, bio, i) | 1039 | bio_for_each_segment(bvec, bio, i) |
1024 | __free_page(bvec->bv_page); | 1040 | __free_page(bvec->bv_page); |
1025 | 1041 | ||
1026 | bio_put(bio); | 1042 | bio_put(bio); |
1043 | out_bmd: | ||
1044 | bio_free_map_data(bmd); | ||
1027 | 1045 | ||
1028 | return ERR_PTR(ret); | 1046 | return ERR_PTR(ret); |
1029 | } | 1047 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index 38653e36e225..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
2926 | BUG_ON(!buffer_mapped(bh)); | 2926 | BUG_ON(!buffer_mapped(bh)); |
2927 | BUG_ON(!bh->b_end_io); | 2927 | BUG_ON(!bh->b_end_io); |
2928 | 2928 | ||
2929 | if (buffer_ordered(bh) && (rw == WRITE)) | 2929 | /* |
2930 | rw = WRITE_BARRIER; | 2930 | * Mask in barrier bit for a write (could be either a WRITE or a |
2931 | * WRITE_SYNC | ||
2932 | */ | ||
2933 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
2934 | rw |= WRITE_BARRIER; | ||
2931 | 2935 | ||
2932 | /* | 2936 | /* |
2933 | * Only clear out a write error when rewriting, should this | 2937 | * Only clear out a write error when rewriting |
2934 | * include WRITE_SYNC as well? | ||
2935 | */ | 2938 | */ |
2936 | if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) | 2939 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
2937 | clear_buffer_write_io_error(bh); | 2940 | clear_buffer_write_io_error(bh); |
2938 | 2941 | ||
2939 | /* | 2942 | /* |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f5d0083e09fa..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
@@ -4,7 +4,15 @@ Fix premature write failure on congested networks (we would give up | |||
4 | on EAGAIN from the socket too quickly on large writes). | 4 | on EAGAIN from the socket too quickly on large writes). |
5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. | 5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. |
6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian | 6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian |
7 | architectures. | 7 | architectures. Fix problems with preserving timestamps on copying open |
8 | files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit | ||
9 | on parent directory when server supports Unix Extensions but not POSIX | ||
10 | create. Update cifs.upcall version to handle new Kerberos sec flags | ||
11 | (this requires update of cifs.upcall program from Samba). Fix memory leak | ||
12 | on dns_upcall (resolving DFS referralls). Fix plain text password | ||
13 | authentication (requires setting SecurityFlags to 0x30030 to enable | ||
14 | lanman and plain text though). Fix writes to be at correct offset when | ||
15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | ||
8 | 16 | ||
9 | Version 1.53 | 17 | Version 1.53 |
10 | ------------ | 18 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
@@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and | |||
542 | hashing mechanisms (as "must use") on the other hand | 542 | hashing mechanisms (as "must use") on the other hand |
543 | does not make much sense. Default flags are | 543 | does not make much sense. Default flags are |
544 | 0x07007 | 544 | 0x07007 |
545 | (NTLM, NTLMv2 and packet signing allowed). Maximum | 545 | (NTLM, NTLMv2 and packet signing allowed). The maximum |
546 | allowable flags if you want to allow mounts to servers | 546 | allowable flags if you want to allow mounts to servers |
547 | using weaker password hashes is 0x37037 (lanman, | 547 | using weaker password hashes is 0x37037 (lanman, |
548 | plaintext, ntlm, ntlmv2, signing allowed): | 548 | plaintext, ntlm, ntlmv2, signing allowed). Some |
549 | SecurityFlags require the corresponding menuconfig | ||
550 | options to be enabled (lanman and plaintext require | ||
551 | CONFIG_CIFS_WEAK_PW_HASH for example). Enabling | ||
552 | plaintext authentication currently requires also | ||
553 | enabling lanman authentication in the security flags | ||
554 | because the cifs module only supports sending | ||
555 | laintext passwords using the older lanman dialect | ||
556 | form of the session setup SMB. (e.g. for authentication | ||
557 | using plain text passwords, set the SecurityFlags | ||
558 | to 0x30030): | ||
549 | 559 | ||
550 | may use packet signing 0x00001 | 560 | may use packet signing 0x00001 |
551 | must use packet signing 0x01001 | 561 | must use packet signing 0x01001 |
@@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in | |||
642 | that they represent all for that share, not just those for which the server | 652 | that they represent all for that share, not just those for which the server |
643 | returned success. | 653 | returned success. |
644 | 654 | ||
645 | Also note that "cat /proc/fs/cifs/DebugData" will display information about | 655 | Also note that "cat /proc/fs/cifs/DebugData" will display information about |
646 | the active sessions and the shares that are mounted. | 656 | the active sessions and the shares that are mounted. |
647 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is | 657 | |
648 | on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and | 658 | Enabling Kerberos (extended security) works but requires version 1.2 or later |
649 | LANMAN support do not require this helper. | 659 | of the helper program cifs.upcall to be present and to be configured in the |
660 | /etc/request-key.conf file. The cifs.upcall helper program is from the Samba | ||
661 | project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not | ||
662 | require this helper. Note that NTLMv2 security (which does not require the | ||
663 | cifs.upcall helper program), instead of using Kerberos, is sufficient for | ||
664 | some use cases. | ||
665 | |||
666 | Enabling DFS support (used to access shares transparently in an MS-DFS | ||
667 | global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In | ||
668 | addition, DFS support for target shares which are specified as UNC | ||
669 | names which begin with host names (rather than IP addresses) requires | ||
670 | a user space helper (such as cifs.upcall) to be present in order to | ||
671 | translate host names to ip address, and the user space helper must also | ||
672 | be configured in the file /etc/request-key.conf | ||
673 | |||
674 | To use cifs Kerberos and DFS support, the Linux keyutils package should be | ||
675 | installed and something like the following lines should be added to the | ||
676 | /etc/request-key.conf file: | ||
677 | |||
678 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k | ||
679 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k | ||
680 | |||
681 | |||
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 5fabd2caf93c..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
@@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
476 | unsigned int cls, con, tag, oidlen, rc; | 476 | unsigned int cls, con, tag, oidlen, rc; |
477 | bool use_ntlmssp = false; | 477 | bool use_ntlmssp = false; |
478 | bool use_kerberos = false; | 478 | bool use_kerberos = false; |
479 | bool use_mskerberos = false; | ||
479 | 480 | ||
480 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ | 481 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ |
481 | 482 | ||
@@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
574 | *(oid + 1), *(oid + 2), *(oid + 3))); | 575 | *(oid + 1), *(oid + 2), *(oid + 3))); |
575 | 576 | ||
576 | if (compare_oid(oid, oidlen, MSKRB5_OID, | 577 | if (compare_oid(oid, oidlen, MSKRB5_OID, |
577 | MSKRB5_OID_LEN)) | 578 | MSKRB5_OID_LEN) && |
578 | use_kerberos = true; | 579 | !use_kerberos) |
580 | use_mskerberos = true; | ||
579 | else if (compare_oid(oid, oidlen, KRB5_OID, | 581 | else if (compare_oid(oid, oidlen, KRB5_OID, |
580 | KRB5_OID_LEN)) | 582 | KRB5_OID_LEN) && |
583 | !use_mskerberos) | ||
581 | use_kerberos = true; | 584 | use_kerberos = true; |
582 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, | 585 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, |
583 | NTLMSSP_OID_LEN)) | 586 | NTLMSSP_OID_LEN)) |
@@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
630 | 633 | ||
631 | if (use_kerberos) | 634 | if (use_kerberos) |
632 | *secType = Kerberos; | 635 | *secType = Kerberos; |
636 | else if (use_mskerberos) | ||
637 | *secType = MSKerberos; | ||
633 | else if (use_ntlmssp) | 638 | else if (use_ntlmssp) |
634 | *secType = NTLMSSP; | 639 | *secType = NTLMSSP; |
635 | 640 | ||
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2434ab0e8791..117ef4bba68e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
@@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
114 | 114 | ||
115 | dp = description + strlen(description); | 115 | dp = description + strlen(description); |
116 | 116 | ||
117 | /* for now, only sec=krb5 is valid */ | 117 | /* for now, only sec=krb5 and sec=mskrb5 are valid */ |
118 | if (server->secType == Kerberos) | 118 | if (server->secType == Kerberos) |
119 | sprintf(dp, ";sec=krb5"); | 119 | sprintf(dp, ";sec=krb5"); |
120 | else if (server->secType == MSKerberos) | ||
121 | sprintf(dp, ";sec=mskrb5"); | ||
120 | else | 122 | else |
121 | goto out; | 123 | goto out; |
122 | 124 | ||
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h | |||
@@ -23,7 +23,7 @@ | |||
23 | #ifndef _CIFS_SPNEGO_H | 23 | #ifndef _CIFS_SPNEGO_H |
24 | #define _CIFS_SPNEGO_H | 24 | #define _CIFS_SPNEGO_H |
25 | 25 | ||
26 | #define CIFS_SPNEGO_UPCALL_VERSION 1 | 26 | #define CIFS_SPNEGO_UPCALL_VERSION 2 |
27 | 27 | ||
28 | /* | 28 | /* |
29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. | 29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
@@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) | |||
294 | 294 | ||
295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { | 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { |
297 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
297 | memcpy(lnm_session_key, password_with_pad, | 298 | memcpy(lnm_session_key, password_with_pad, |
298 | CIFS_ENCPWD_SIZE); | 299 | CIFS_ENCPWD_SIZE); |
299 | return; | 300 | return; |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..8dfd6f24d488 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
@@ -80,7 +80,8 @@ enum securityEnum { | |||
80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ | 81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ |
82 | NTLMSSP, /* NTLMSSP via SPNEGO */ | 82 | NTLMSSP, /* NTLMSSP via SPNEGO */ |
83 | Kerberos /* Kerberos via SPNEGO */ | 83 | Kerberos, /* Kerberos via SPNEGO */ |
84 | MSKerberos, /* MS Kerberos via SPNEGO */ | ||
84 | }; | 85 | }; |
85 | 86 | ||
86 | enum protocolEnum { | 87 | enum protocolEnum { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0711db65afe8..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
@@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; |
3599 | bool ntlmv2_flag = false; | 3599 | bool ntlmv2_flag = false; |
3600 | int first_time = 0; | 3600 | int first_time = 0; |
3601 | struct TCP_Server_Info *server = pSesInfo->server; | ||
3601 | 3602 | ||
3602 | /* what if server changes its buffer size after dropping the session? */ | 3603 | /* what if server changes its buffer size after dropping the session? */ |
3603 | if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { | 3604 | if (server->maxBuf == 0) /* no need to send on reconnect */ { |
3604 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3605 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3605 | if (rc == -EAGAIN) /* retry only once on 1st time connection */ { | 3606 | if (rc == -EAGAIN) { |
3607 | /* retry only once on 1st time connection */ | ||
3606 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3608 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
3607 | if (rc == -EAGAIN) | 3609 | if (rc == -EAGAIN) |
3608 | rc = -EHOSTDOWN; | 3610 | rc = -EHOSTDOWN; |
3609 | } | 3611 | } |
3610 | if (rc == 0) { | 3612 | if (rc == 0) { |
3611 | spin_lock(&GlobalMid_Lock); | 3613 | spin_lock(&GlobalMid_Lock); |
3612 | if (pSesInfo->server->tcpStatus != CifsExiting) | 3614 | if (server->tcpStatus != CifsExiting) |
3613 | pSesInfo->server->tcpStatus = CifsGood; | 3615 | server->tcpStatus = CifsGood; |
3614 | else | 3616 | else |
3615 | rc = -EHOSTDOWN; | 3617 | rc = -EHOSTDOWN; |
3616 | spin_unlock(&GlobalMid_Lock); | 3618 | spin_unlock(&GlobalMid_Lock); |
@@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3623 | goto ss_err_exit; | 3625 | goto ss_err_exit; |
3624 | 3626 | ||
3625 | pSesInfo->flags = 0; | 3627 | pSesInfo->flags = 0; |
3626 | pSesInfo->capabilities = pSesInfo->server->capabilities; | 3628 | pSesInfo->capabilities = server->capabilities; |
3627 | if (linuxExtEnabled == 0) | 3629 | if (linuxExtEnabled == 0) |
3628 | pSesInfo->capabilities &= (~CAP_UNIX); | 3630 | pSesInfo->capabilities &= (~CAP_UNIX); |
3629 | /* pSesInfo->sequence_number = 0;*/ | 3631 | /* pSesInfo->sequence_number = 0;*/ |
3630 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", | 3632 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", |
3631 | pSesInfo->server->secMode, | 3633 | server->secMode, server->capabilities, server->timeAdj)); |
3632 | pSesInfo->server->capabilities, | 3634 | |
3633 | pSesInfo->server->timeAdj)); | ||
3634 | if (experimEnabled < 2) | 3635 | if (experimEnabled < 2) |
3635 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); | 3636 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); |
3636 | else if (extended_security | 3637 | else if (extended_security |
3637 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3638 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3638 | && (pSesInfo->server->secType == NTLMSSP)) { | 3639 | && (server->secType == NTLMSSP)) { |
3639 | rc = -EOPNOTSUPP; | 3640 | rc = -EOPNOTSUPP; |
3640 | } else if (extended_security | 3641 | } else if (extended_security |
3641 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3642 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
3642 | && (pSesInfo->server->secType == RawNTLMSSP)) { | 3643 | && (server->secType == RawNTLMSSP)) { |
3643 | cFYI(1, ("NTLMSSP sesssetup")); | 3644 | cFYI(1, ("NTLMSSP sesssetup")); |
3644 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, | 3645 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, |
3645 | nls_info); | 3646 | nls_info); |
@@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3668 | 3669 | ||
3669 | } else { | 3670 | } else { |
3670 | SMBNTencrypt(pSesInfo->password, | 3671 | SMBNTencrypt(pSesInfo->password, |
3671 | pSesInfo->server->cryptKey, | 3672 | server->cryptKey, |
3672 | ntlm_session_key); | 3673 | ntlm_session_key); |
3673 | 3674 | ||
3674 | if (first_time) | 3675 | if (first_time) |
3675 | cifs_calculate_mac_key( | 3676 | cifs_calculate_mac_key( |
3676 | &pSesInfo->server->mac_signing_key, | 3677 | &server->mac_signing_key, |
3677 | ntlm_session_key, | 3678 | ntlm_session_key, |
3678 | pSesInfo->password); | 3679 | pSesInfo->password); |
3679 | } | 3680 | } |
@@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
3686 | nls_info); | 3687 | nls_info); |
3687 | } | 3688 | } |
3688 | } else { /* old style NTLM 0.12 session setup */ | 3689 | } else { /* old style NTLM 0.12 session setup */ |
3689 | SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, | 3690 | SMBNTencrypt(pSesInfo->password, server->cryptKey, |
3690 | ntlm_session_key); | 3691 | ntlm_session_key); |
3691 | 3692 | ||
3692 | if (first_time) | 3693 | if (first_time) |
3693 | cifs_calculate_mac_key( | 3694 | cifs_calculate_mac_key(&server->mac_signing_key, |
3694 | &pSesInfo->server->mac_signing_key, | 3695 | ntlm_session_key, |
3695 | ntlm_session_key, pSesInfo->password); | 3696 | pSesInfo->password); |
3696 | 3697 | ||
3697 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); | 3698 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); |
3698 | } | 3699 | } |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..a2e0673e1b08 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
@@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data, | |||
47 | return rc; | 47 | return rc; |
48 | } | 48 | } |
49 | 49 | ||
50 | static void | ||
51 | dns_resolver_destroy(struct key *key) | ||
52 | { | ||
53 | kfree(key->payload.data); | ||
54 | } | ||
55 | |||
50 | struct key_type key_type_dns_resolver = { | 56 | struct key_type key_type_dns_resolver = { |
51 | .name = "dns_resolver", | 57 | .name = "dns_resolver", |
52 | .def_datalen = sizeof(struct in_addr), | 58 | .def_datalen = sizeof(struct in_addr), |
53 | .describe = user_describe, | 59 | .describe = user_describe, |
54 | .instantiate = dns_resolver_instantiate, | 60 | .instantiate = dns_resolver_instantiate, |
61 | .destroy = dns_resolver_destroy, | ||
55 | .match = user_match, | 62 | .match = user_match, |
56 | }; | 63 | }; |
57 | 64 | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..cbefe1f1f9fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
833 | return -EBADF; | 833 | return -EBADF; |
834 | open_file = (struct cifsFileInfo *) file->private_data; | 834 | open_file = (struct cifsFileInfo *) file->private_data; |
835 | 835 | ||
836 | rc = generic_write_checks(file, poffset, &write_size, 0); | ||
837 | if (rc) | ||
838 | return rc; | ||
839 | |||
836 | xid = GetXid(); | 840 | xid = GetXid(); |
837 | 841 | ||
838 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 842 | if (*poffset > file->f_path.dentry->d_inode->i_size) |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 848286861c31..9c548f110102 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
@@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, | |||
546 | if ((inode->i_mode & S_IWUGO) == 0 && | 546 | if ((inode->i_mode & S_IWUGO) == 0 && |
547 | (attr & ATTR_READONLY) == 0) | 547 | (attr & ATTR_READONLY) == 0) |
548 | inode->i_mode |= (S_IWUGO & default_mode); | 548 | inode->i_mode |= (S_IWUGO & default_mode); |
549 | inode->i_mode &= ~S_IFMT; | 549 | |
550 | inode->i_mode &= ~S_IFMT; | ||
550 | } | 551 | } |
551 | /* clear write bits if ATTR_READONLY is set */ | 552 | /* clear write bits if ATTR_READONLY is set */ |
552 | if (attr & ATTR_READONLY) | 553 | if (attr & ATTR_READONLY) |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..252fdc0567f1 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
@@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; | 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; |
411 | 411 | ||
412 | pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; | ||
413 | |||
412 | /* no capabilities flags in old lanman negotiation */ | 414 | /* no capabilities flags in old lanman negotiation */ |
413 | 415 | ||
414 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 416 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
@@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
505 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); | 507 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); |
506 | } else | 508 | } else |
507 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); | 509 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); |
508 | } else if (type == Kerberos) { | 510 | } else if (type == Kerberos || type == MSKerberos) { |
509 | #ifdef CONFIG_CIFS_UPCALL | 511 | #ifdef CONFIG_CIFS_UPCALL |
510 | struct cifs_spnego_msg *msg; | 512 | struct cifs_spnego_msg *msg; |
511 | spnego_key = cifs_get_spnego_key(ses); | 513 | spnego_key = cifs_get_spnego_key(ses); |
@@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
516 | } | 518 | } |
517 | 519 | ||
518 | msg = spnego_key->payload.data; | 520 | msg = spnego_key->payload.data; |
521 | /* check version field to make sure that cifs.upcall is | ||
522 | sending us a response in an expected form */ | ||
523 | if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { | ||
524 | cERROR(1, ("incorrect version of cifs.upcall (expected" | ||
525 | " %d but got %d)", | ||
526 | CIFS_SPNEGO_UPCALL_VERSION, msg->version)); | ||
527 | rc = -EKEYREJECTED; | ||
528 | goto ssetup_exit; | ||
529 | } | ||
519 | /* bail out if key is too long */ | 530 | /* bail out if key is too long */ |
520 | if (msg->sesskey_len > | 531 | if (msg->sesskey_len > |
521 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 532 | sizeof(ses->server->mac_signing_key.data.krb5)) { |
diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..075d0509970d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, | |||
792 | if (buf->result) | 792 | if (buf->result) |
793 | return -EINVAL; | 793 | return -EINVAL; |
794 | d_ino = ino; | 794 | d_ino = ino; |
795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
796 | buf->result = -EOVERFLOW; | ||
796 | return -EOVERFLOW; | 797 | return -EOVERFLOW; |
798 | } | ||
797 | buf->result++; | 799 | buf->result++; |
798 | dirent = buf->dirent; | 800 | dirent = buf->dirent; |
799 | if (!access_ok(VERIFY_WRITE, dirent, | 801 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, | |||
862 | if (reclen > buf->count) | 864 | if (reclen > buf->count) |
863 | return -EINVAL; | 865 | return -EINVAL; |
864 | d_ino = ino; | 866 | d_ino = ino; |
865 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 867 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
868 | buf->error = -EOVERFLOW; | ||
866 | return -EOVERFLOW; | 869 | return -EOVERFLOW; |
870 | } | ||
867 | dirent = buf->previous; | 871 | dirent = buf->previous; |
868 | if (dirent) { | 872 | if (dirent) { |
869 | if (__put_user(offset, &dirent->d_off)) | 873 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
@@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1311 | * Ensure that no racing symlink() will make detach_prep() fail while | 1311 | * Ensure that no racing symlink() will make detach_prep() fail while |
1312 | * the new link is temporarily attached | 1312 | * the new link is temporarily attached |
1313 | */ | 1313 | */ |
1314 | mutex_lock(&configfs_symlink_mutex); | ||
1315 | spin_lock(&configfs_dirent_lock); | ||
1316 | do { | 1314 | do { |
1317 | struct mutex *wait_mutex; | 1315 | struct mutex *wait_mutex; |
1318 | 1316 | ||
1317 | mutex_lock(&configfs_symlink_mutex); | ||
1318 | spin_lock(&configfs_dirent_lock); | ||
1319 | ret = configfs_detach_prep(dentry, &wait_mutex); | 1319 | ret = configfs_detach_prep(dentry, &wait_mutex); |
1320 | if (ret) { | 1320 | if (ret) |
1321 | configfs_detach_rollback(dentry); | 1321 | configfs_detach_rollback(dentry); |
1322 | spin_unlock(&configfs_dirent_lock); | 1322 | spin_unlock(&configfs_dirent_lock); |
1323 | mutex_unlock(&configfs_symlink_mutex); | 1323 | mutex_unlock(&configfs_symlink_mutex); |
1324 | |||
1325 | if (ret) { | ||
1324 | if (ret != -EAGAIN) { | 1326 | if (ret != -EAGAIN) { |
1325 | config_item_put(parent_item); | 1327 | config_item_put(parent_item); |
1326 | return ret; | 1328 | return ret; |
@@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
1329 | /* Wait until the racing operation terminates */ | 1331 | /* Wait until the racing operation terminates */ |
1330 | mutex_lock(wait_mutex); | 1332 | mutex_lock(wait_mutex); |
1331 | mutex_unlock(wait_mutex); | 1333 | mutex_unlock(wait_mutex); |
1332 | |||
1333 | mutex_lock(&configfs_symlink_mutex); | ||
1334 | spin_lock(&configfs_dirent_lock); | ||
1335 | } | 1334 | } |
1336 | } while (ret == -EAGAIN); | 1335 | } while (ret == -EAGAIN); |
1337 | spin_unlock(&configfs_dirent_lock); | ||
1338 | mutex_unlock(&configfs_symlink_mutex); | ||
1339 | 1336 | ||
1340 | /* Get a working ref for the duration of this function */ | 1337 | /* Get a working ref for the duration of this function */ |
1341 | item = configfs_get_config_item(dentry); | 1338 | item = configfs_get_config_item(dentry); |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 0c3b618c15b3..f40423eb1a14 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -43,58 +43,13 @@ static DEFINE_MUTEX(read_mutex); | |||
43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) | 43 | static int cramfs_iget5_test(struct inode *inode, void *opaque) |
44 | { | 44 | { |
45 | struct cramfs_inode *cramfs_inode = opaque; | 45 | struct cramfs_inode *cramfs_inode = opaque; |
46 | 46 | return inode->i_ino == CRAMINO(cramfs_inode) && inode->i_ino != 1; | |
47 | if (inode->i_ino != CRAMINO(cramfs_inode)) | ||
48 | return 0; /* does not match */ | ||
49 | |||
50 | if (inode->i_ino != 1) | ||
51 | return 1; | ||
52 | |||
53 | /* all empty directories, char, block, pipe, and sock, share inode #1 */ | ||
54 | |||
55 | if ((inode->i_mode != cramfs_inode->mode) || | ||
56 | (inode->i_gid != cramfs_inode->gid) || | ||
57 | (inode->i_uid != cramfs_inode->uid)) | ||
58 | return 0; /* does not match */ | ||
59 | |||
60 | if ((S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) && | ||
61 | (inode->i_rdev != old_decode_dev(cramfs_inode->size))) | ||
62 | return 0; /* does not match */ | ||
63 | |||
64 | return 1; /* matches */ | ||
65 | } | 47 | } |
66 | 48 | ||
67 | static int cramfs_iget5_set(struct inode *inode, void *opaque) | 49 | static int cramfs_iget5_set(struct inode *inode, void *opaque) |
68 | { | 50 | { |
69 | static struct timespec zerotime; | ||
70 | struct cramfs_inode *cramfs_inode = opaque; | 51 | struct cramfs_inode *cramfs_inode = opaque; |
71 | inode->i_mode = cramfs_inode->mode; | ||
72 | inode->i_uid = cramfs_inode->uid; | ||
73 | inode->i_size = cramfs_inode->size; | ||
74 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
75 | inode->i_gid = cramfs_inode->gid; | ||
76 | /* Struct copy intentional */ | ||
77 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
78 | inode->i_ino = CRAMINO(cramfs_inode); | 52 | inode->i_ino = CRAMINO(cramfs_inode); |
79 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
80 | but it's the best we can do without reading the directory | ||
81 | contents. 1 yields the right result in GNU find, even | ||
82 | without -noleaf option. */ | ||
83 | if (S_ISREG(inode->i_mode)) { | ||
84 | inode->i_fop = &generic_ro_fops; | ||
85 | inode->i_data.a_ops = &cramfs_aops; | ||
86 | } else if (S_ISDIR(inode->i_mode)) { | ||
87 | inode->i_op = &cramfs_dir_inode_operations; | ||
88 | inode->i_fop = &cramfs_directory_operations; | ||
89 | } else if (S_ISLNK(inode->i_mode)) { | ||
90 | inode->i_op = &page_symlink_inode_operations; | ||
91 | inode->i_data.a_ops = &cramfs_aops; | ||
92 | } else { | ||
93 | inode->i_size = 0; | ||
94 | inode->i_blocks = 0; | ||
95 | init_special_inode(inode, inode->i_mode, | ||
96 | old_decode_dev(cramfs_inode->size)); | ||
97 | } | ||
98 | return 0; | 53 | return 0; |
99 | } | 54 | } |
100 | 55 | ||
@@ -104,12 +59,48 @@ static struct inode *get_cramfs_inode(struct super_block *sb, | |||
104 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), | 59 | struct inode *inode = iget5_locked(sb, CRAMINO(cramfs_inode), |
105 | cramfs_iget5_test, cramfs_iget5_set, | 60 | cramfs_iget5_test, cramfs_iget5_set, |
106 | cramfs_inode); | 61 | cramfs_inode); |
62 | static struct timespec zerotime; | ||
63 | |||
107 | if (inode && (inode->i_state & I_NEW)) { | 64 | if (inode && (inode->i_state & I_NEW)) { |
65 | inode->i_mode = cramfs_inode->mode; | ||
66 | inode->i_uid = cramfs_inode->uid; | ||
67 | inode->i_size = cramfs_inode->size; | ||
68 | inode->i_blocks = (cramfs_inode->size - 1) / 512 + 1; | ||
69 | inode->i_gid = cramfs_inode->gid; | ||
70 | /* Struct copy intentional */ | ||
71 | inode->i_mtime = inode->i_atime = inode->i_ctime = zerotime; | ||
72 | /* inode->i_nlink is left 1 - arguably wrong for directories, | ||
73 | but it's the best we can do without reading the directory | ||
74 | contents. 1 yields the right result in GNU find, even | ||
75 | without -noleaf option. */ | ||
76 | if (S_ISREG(inode->i_mode)) { | ||
77 | inode->i_fop = &generic_ro_fops; | ||
78 | inode->i_data.a_ops = &cramfs_aops; | ||
79 | } else if (S_ISDIR(inode->i_mode)) { | ||
80 | inode->i_op = &cramfs_dir_inode_operations; | ||
81 | inode->i_fop = &cramfs_directory_operations; | ||
82 | } else if (S_ISLNK(inode->i_mode)) { | ||
83 | inode->i_op = &page_symlink_inode_operations; | ||
84 | inode->i_data.a_ops = &cramfs_aops; | ||
85 | } else { | ||
86 | inode->i_size = 0; | ||
87 | inode->i_blocks = 0; | ||
88 | init_special_inode(inode, inode->i_mode, | ||
89 | old_decode_dev(cramfs_inode->size)); | ||
90 | } | ||
108 | unlock_new_inode(inode); | 91 | unlock_new_inode(inode); |
109 | } | 92 | } |
110 | return inode; | 93 | return inode; |
111 | } | 94 | } |
112 | 95 | ||
96 | static void cramfs_drop_inode(struct inode *inode) | ||
97 | { | ||
98 | if (inode->i_ino == 1) | ||
99 | generic_delete_inode(inode); | ||
100 | else | ||
101 | generic_drop_inode(inode); | ||
102 | } | ||
103 | |||
113 | /* | 104 | /* |
114 | * We have our own block cache: don't fill up the buffer cache | 105 | * We have our own block cache: don't fill up the buffer cache |
115 | * with the rom-image, because the way the filesystem is set | 106 | * with the rom-image, because the way the filesystem is set |
@@ -534,6 +525,7 @@ static const struct super_operations cramfs_ops = { | |||
534 | .put_super = cramfs_put_super, | 525 | .put_super = cramfs_put_super, |
535 | .remount_fs = cramfs_remount, | 526 | .remount_fs = cramfs_remount, |
536 | .statfs = cramfs_statfs, | 527 | .statfs = cramfs_statfs, |
528 | .drop_inode = cramfs_drop_inode, | ||
537 | }; | 529 | }; |
538 | 530 | ||
539 | static int cramfs_get_sb(struct file_system_type *fs_type, | 531 | static int cramfs_get_sb(struct file_system_type *fs_type, |
diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..80e93956aced 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
1236 | * If no entry exists with the exact case name, allocate new dentry with | 1236 | * If no entry exists with the exact case name, allocate new dentry with |
1237 | * the exact case, and return the spliced entry. | 1237 | * the exact case, and return the spliced entry. |
1238 | */ | 1238 | */ |
1239 | struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, | 1239 | struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, |
1240 | struct qstr *name) | 1240 | struct qstr *name) |
1241 | { | 1241 | { |
1242 | int error; | 1242 | int error; |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
@@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
74 | } | 74 | } |
75 | unlock_kernel(); | 75 | unlock_kernel(); |
76 | 76 | ||
77 | d_add(dentry, inode); | 77 | return d_splice_alias(inode, dentry); |
78 | return NULL; | ||
79 | } | 78 | } |
80 | 79 | ||
81 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..e9fa960ba6da 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | |||
1626 | free_blocks = | 1626 | free_blocks = |
1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); |
1628 | #endif | 1628 | #endif |
1629 | if (free_blocks <= root_blocks) | ||
1630 | /* we don't have free space */ | ||
1631 | return 0; | ||
1629 | if (free_blocks - root_blocks < nblocks) | 1632 | if (free_blocks - root_blocks < nblocks) |
1630 | return free_blocks - root_blocks; | 1633 | return free_blocks - root_blocks; |
1631 | return nblocks; | 1634 | return nblocks; |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..ec8e33b45219 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
@@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
411 | get_dtype(sb, fname->file_type)); | 411 | get_dtype(sb, fname->file_type)); |
412 | if (error) { | 412 | if (error) { |
413 | filp->f_pos = curr_pos; | 413 | filp->f_pos = curr_pos; |
414 | info->extra_fname = fname->next; | 414 | info->extra_fname = fname; |
415 | return error; | 415 | return error; |
416 | } | 416 | } |
417 | fname = fname->next; | 417 | fname = fname->next; |
@@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp, | |||
450 | * If there are any leftover names on the hash collision | 450 | * If there are any leftover names on the hash collision |
451 | * chain, return them first. | 451 | * chain, return them first. |
452 | */ | 452 | */ |
453 | if (info->extra_fname && | 453 | if (info->extra_fname) { |
454 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 454 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
455 | goto finished; | 455 | goto finished; |
456 | 456 | ||
457 | if (!info->curr_node) | 457 | info->extra_fname = NULL; |
458 | info->curr_node = rb_next(info->curr_node); | ||
459 | if (!info->curr_node) { | ||
460 | if (info->next_hash == ~0) { | ||
461 | filp->f_pos = EXT4_HTREE_EOF; | ||
462 | goto finished; | ||
463 | } | ||
464 | info->curr_hash = info->next_hash; | ||
465 | info->curr_minor_hash = 0; | ||
466 | } | ||
467 | } else if (!info->curr_node) | ||
458 | info->curr_node = rb_first(&info->root); | 468 | info->curr_node = rb_first(&info->root); |
459 | 469 | ||
460 | while (1) { | 470 | while (1) { |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *); | |||
1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
1073 | extern void ext4_set_aops(struct inode *inode); | 1073 | extern void ext4_set_aops(struct inode *inode); |
1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1074 | extern int ext4_writepage_trans_blocks(struct inode *); |
1075 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
1076 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
1075 | extern int ext4_block_truncate_page(handle_t *handle, | 1077 | extern int ext4_block_truncate_page(handle_t *handle, |
1076 | struct address_space *mapping, loff_t from); | 1078 | struct address_space *mapping, loff_t from); |
1077 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1079 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
@@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
1227 | /* extents.c */ | 1229 | /* extents.c */ |
1228 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1230 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
1229 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1231 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
1232 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | ||
1233 | int chunk); | ||
1230 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1234 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
1231 | ext4_lblk_t iblock, | 1235 | ext4_lblk_t iblock, |
1232 | unsigned long max_blocks, struct buffer_head *bh_result, | 1236 | unsigned long max_blocks, struct buffer_head *bh_result, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..d33dc56d6986 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
@@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | |||
216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
219 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 219 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
220 | int num, | ||
221 | struct ext4_ext_path *path); | ||
220 | extern int ext4_ext_try_to_merge(struct inode *inode, | 222 | extern int ext4_ext_try_to_merge(struct inode *inode, |
221 | struct ext4_ext_path *path, | 223 | struct ext4_ext_path *path, |
222 | struct ext4_extent *); | 224 | struct ext4_extent *); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -51,6 +51,14 @@ | |||
51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
53 | 53 | ||
54 | /* | ||
55 | * Define the number of metadata blocks we need to account to modify data. | ||
56 | * | ||
57 | * This include super block, inode block, quota blocks and xattr blocks | ||
58 | */ | ||
59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | ||
60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
61 | |||
54 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
55 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
56 | * generous. We can grow the delete transaction later if necessary. */ | 64 | * generous. We can grow the delete transaction later if necessary. */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
1747 | } | 1747 | } |
1748 | 1748 | ||
1749 | /* | 1749 | /* |
1750 | * ext4_ext_calc_credits_for_insert: | 1750 | * ext4_ext_calc_credits_for_single_extent: |
1751 | * This routine returns max. credits that the extent tree can consume. | 1751 | * This routine returns max. credits that needed to insert an extent |
1752 | * It should be OK for low-performance paths like ->writepage() | 1752 | * to the extent tree. |
1753 | * To allow many writing processes to fit into a single transaction, | 1753 | * When pass the actual path, the caller should calculate credits |
1754 | * the caller should calculate credits under i_data_sem and | 1754 | * under i_data_sem. |
1755 | * pass the actual path. | ||
1756 | */ | 1755 | */ |
1757 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1756 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
1758 | struct ext4_ext_path *path) | 1757 | struct ext4_ext_path *path) |
1759 | { | 1758 | { |
1760 | int depth, needed; | ||
1761 | |||
1762 | if (path) { | 1759 | if (path) { |
1760 | int depth = ext_depth(inode); | ||
1761 | int ret = 0; | ||
1762 | |||
1763 | /* probably there is space in leaf? */ | 1763 | /* probably there is space in leaf? */ |
1764 | depth = ext_depth(inode); | ||
1765 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1764 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
1766 | < le16_to_cpu(path[depth].p_hdr->eh_max)) | 1765 | < le16_to_cpu(path[depth].p_hdr->eh_max)) { |
1767 | return 1; | ||
1768 | } | ||
1769 | 1766 | ||
1770 | /* | 1767 | /* |
1771 | * given 32-bit logical block (4294967296 blocks), max. tree | 1768 | * There are some space in the leaf tree, no |
1772 | * can be 4 levels in depth -- 4 * 340^4 == 53453440000. | 1769 | * need to account for leaf block credit |
1773 | * Let's also add one more level for imbalance. | 1770 | * |
1774 | */ | 1771 | * bitmaps and block group descriptor blocks |
1775 | depth = 5; | 1772 | * and other metadat blocks still need to be |
1776 | 1773 | * accounted. | |
1777 | /* allocation of new data block(s) */ | 1774 | */ |
1778 | needed = 2; | 1775 | /* 1 bitmap, 1 block group descriptor */ |
1776 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
1777 | } | ||
1778 | } | ||
1779 | 1779 | ||
1780 | /* | 1780 | return ext4_chunk_trans_blocks(inode, nrblocks); |
1781 | * tree can be full, so it would need to grow in depth: | 1781 | } |
1782 | * we need one credit to modify old root, credits for | ||
1783 | * new root will be added in split accounting | ||
1784 | */ | ||
1785 | needed += 1; | ||
1786 | 1782 | ||
1787 | /* | 1783 | /* |
1788 | * Index split can happen, we would need: | 1784 | * How many index/leaf blocks need to change/allocate to modify nrblocks? |
1789 | * allocate intermediate indexes (bitmap + group) | 1785 | * |
1790 | * + change two blocks at each level, but root (already included) | 1786 | * if nrblocks are fit in a single extent (chunk flag is 1), then |
1791 | */ | 1787 | * in the worse case, each tree level index/leaf need to be changed |
1792 | needed += (depth * 2) + (depth * 2); | 1788 | * if the tree split due to insert a new extent, then the old tree |
1789 | * index/leaf need to be updated too | ||
1790 | * | ||
1791 | * If the nrblocks are discontiguous, they could cause | ||
1792 | * the whole tree split more than once, but this is really rare. | ||
1793 | */ | ||
1794 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
1795 | { | ||
1796 | int index; | ||
1797 | int depth = ext_depth(inode); | ||
1793 | 1798 | ||
1794 | /* any allocation modifies superblock */ | 1799 | if (chunk) |
1795 | needed += 1; | 1800 | index = depth * 2; |
1801 | else | ||
1802 | index = depth * 3; | ||
1796 | 1803 | ||
1797 | return needed; | 1804 | return index; |
1798 | } | 1805 | } |
1799 | 1806 | ||
1800 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1807 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
@@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
1921 | correct_index = 1; | 1928 | correct_index = 1; |
1922 | credits += (ext_depth(inode)) + 1; | 1929 | credits += (ext_depth(inode)) + 1; |
1923 | } | 1930 | } |
1924 | #ifdef CONFIG_QUOTA | ||
1925 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 1931 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
1926 | #endif | ||
1927 | 1932 | ||
1928 | err = ext4_ext_journal_restart(handle, credits); | 1933 | err = ext4_ext_journal_restart(handle, credits); |
1929 | if (err) | 1934 | if (err) |
@@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2805 | /* | 2810 | /* |
2806 | * probably first extent we're gonna free will be last in block | 2811 | * probably first extent we're gonna free will be last in block |
2807 | */ | 2812 | */ |
2808 | err = ext4_writepage_trans_blocks(inode) + 3; | 2813 | err = ext4_writepage_trans_blocks(inode); |
2809 | handle = ext4_journal_start(inode, err); | 2814 | handle = ext4_journal_start(inode, err); |
2810 | if (IS_ERR(handle)) | 2815 | if (IS_ERR(handle)) |
2811 | return; | 2816 | return; |
@@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
2819 | down_write(&EXT4_I(inode)->i_data_sem); | 2824 | down_write(&EXT4_I(inode)->i_data_sem); |
2820 | ext4_ext_invalidate_cache(inode); | 2825 | ext4_ext_invalidate_cache(inode); |
2821 | 2826 | ||
2822 | ext4_mb_discard_inode_preallocations(inode); | 2827 | ext4_discard_reservation(inode); |
2823 | 2828 | ||
2824 | /* | 2829 | /* |
2825 | * TODO: optimization is possible here. | 2830 | * TODO: optimization is possible here. |
@@ -2858,27 +2863,6 @@ out_stop: | |||
2858 | ext4_journal_stop(handle); | 2863 | ext4_journal_stop(handle); |
2859 | } | 2864 | } |
2860 | 2865 | ||
2861 | /* | ||
2862 | * ext4_ext_writepage_trans_blocks: | ||
2863 | * calculate max number of blocks we could modify | ||
2864 | * in order to allocate new block for an inode | ||
2865 | */ | ||
2866 | int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | ||
2867 | { | ||
2868 | int needed; | ||
2869 | |||
2870 | needed = ext4_ext_calc_credits_for_insert(inode, NULL); | ||
2871 | |||
2872 | /* caller wants to allocate num blocks, but note it includes sb */ | ||
2873 | needed = needed * num - (num - 1); | ||
2874 | |||
2875 | #ifdef CONFIG_QUOTA | ||
2876 | needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
2877 | #endif | ||
2878 | |||
2879 | return needed; | ||
2880 | } | ||
2881 | |||
2882 | static void ext4_falloc_update_inode(struct inode *inode, | 2866 | static void ext4_falloc_update_inode(struct inode *inode, |
2883 | int mode, loff_t new_size, int update_ctime) | 2867 | int mode, loff_t new_size, int update_ctime) |
2884 | { | 2868 | { |
@@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
2939 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 2923 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
2940 | - block; | 2924 | - block; |
2941 | /* | 2925 | /* |
2942 | * credits to insert 1 extent into extent tree + buffers to be able to | 2926 | * credits to insert 1 extent into extent tree |
2943 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
2944 | */ | 2927 | */ |
2945 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2928 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
2946 | mutex_lock(&inode->i_mutex); | 2929 | mutex_lock(&inode->i_mutex); |
2947 | retry: | 2930 | retry: |
2948 | while (ret >= 0 && ret < max_blocks) { | 2931 | while (ret >= 0 && ret < max_blocks) { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..f344834bbf58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -351,7 +351,7 @@ find_close_to_parent: | |||
351 | goto found_flexbg; | 351 | goto found_flexbg; |
352 | } | 352 | } |
353 | 353 | ||
354 | if (best_flex < 0 || | 354 | if (flex_group[best_flex].free_inodes == 0 || |
355 | (flex_group[i].free_blocks > | 355 | (flex_group[i].free_blocks > |
356 | flex_group[best_flex].free_blocks && | 356 | flex_group[best_flex].free_blocks && |
357 | flex_group[i].free_inodes)) | 357 | flex_group[i].free_inodes)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -41,6 +41,8 @@ | |||
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
43 | 43 | ||
44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
45 | |||
44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
45 | loff_t new_size) | 47 | loff_t new_size) |
46 | { | 48 | { |
@@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | |||
1005 | */ | 1007 | */ |
1006 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1008 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) |
1007 | { | 1009 | { |
1010 | if (!blocks) | ||
1011 | return 0; | ||
1012 | |||
1008 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1013 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
1009 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1014 | return ext4_ext_calc_metadata_amount(inode, blocks); |
1010 | 1015 | ||
@@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
1041 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
1042 | } | 1047 | } |
1043 | 1048 | ||
1044 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1045 | #define DIO_MAX_BLOCKS 4096 | ||
1046 | /* | ||
1047 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
1048 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
1049 | * For B blocks with A block pointers per block we need: | ||
1050 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
1051 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
1052 | */ | ||
1053 | #define DIO_CREDITS 25 | ||
1054 | |||
1055 | |||
1056 | /* | 1049 | /* |
1057 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1050 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
1058 | * and returns if the blocks are already mapped. | 1051 | * and returns if the blocks are already mapped. |
@@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
1164 | return retval; | 1157 | return retval; |
1165 | } | 1158 | } |
1166 | 1159 | ||
1160 | /* Maximum number of blocks we map for direct IO at once. */ | ||
1161 | #define DIO_MAX_BLOCKS 4096 | ||
1162 | |||
1167 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
1168 | struct buffer_head *bh_result, int create) | 1164 | struct buffer_head *bh_result, int create) |
1169 | { | 1165 | { |
1170 | handle_t *handle = ext4_journal_current_handle(); | 1166 | handle_t *handle = ext4_journal_current_handle(); |
1171 | int ret = 0, started = 0; | 1167 | int ret = 0, started = 0; |
1172 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1168 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
1169 | int dio_credits; | ||
1173 | 1170 | ||
1174 | if (create && !handle) { | 1171 | if (create && !handle) { |
1175 | /* Direct IO write... */ | 1172 | /* Direct IO write... */ |
1176 | if (max_blocks > DIO_MAX_BLOCKS) | 1173 | if (max_blocks > DIO_MAX_BLOCKS) |
1177 | max_blocks = DIO_MAX_BLOCKS; | 1174 | max_blocks = DIO_MAX_BLOCKS; |
1178 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1175 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
1179 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1176 | handle = ext4_journal_start(inode, dio_credits); |
1180 | if (IS_ERR(handle)) { | 1177 | if (IS_ERR(handle)) { |
1181 | ret = PTR_ERR(handle); | 1178 | ret = PTR_ERR(handle); |
1182 | goto out; | 1179 | goto out; |
@@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
1559 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1556 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
1560 | int total, mdb, mdb_free, release; | 1557 | int total, mdb, mdb_free, release; |
1561 | 1558 | ||
1559 | if (!to_free) | ||
1560 | return; /* Nothing to release, exit */ | ||
1561 | |||
1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
1563 | |||
1564 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | ||
1565 | /* | ||
1566 | * if there is no reserved blocks, but we try to free some | ||
1567 | * then the counter is messed up somewhere. | ||
1568 | * but since this function is called from invalidate | ||
1569 | * page, it's harmless to return without any action | ||
1570 | */ | ||
1571 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | ||
1572 | "blocks for inode %lu, but there is no reserved " | ||
1573 | "data blocks\n", to_free, inode->i_ino); | ||
1574 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
1575 | return; | ||
1576 | } | ||
1577 | |||
1563 | /* recalculate the number of metablocks still need to be reserved */ | 1578 | /* recalculate the number of metablocks still need to be reserved */ |
1564 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1579 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; |
1565 | mdb = ext4_calc_metadata_amount(inode, total); | 1580 | mdb = ext4_calc_metadata_amount(inode, total); |
@@ -1613,11 +1628,13 @@ struct mpage_da_data { | |||
1613 | unsigned long first_page, next_page; /* extent of pages */ | 1628 | unsigned long first_page, next_page; /* extent of pages */ |
1614 | get_block_t *get_block; | 1629 | get_block_t *get_block; |
1615 | struct writeback_control *wbc; | 1630 | struct writeback_control *wbc; |
1631 | int io_done; | ||
1632 | long pages_written; | ||
1616 | }; | 1633 | }; |
1617 | 1634 | ||
1618 | /* | 1635 | /* |
1619 | * mpage_da_submit_io - walks through extent of pages and try to write | 1636 | * mpage_da_submit_io - walks through extent of pages and try to write |
1620 | * them with __mpage_writepage() | 1637 | * them with writepage() call back |
1621 | * | 1638 | * |
1622 | * @mpd->inode: inode | 1639 | * @mpd->inode: inode |
1623 | * @mpd->first_page: first page of the extent | 1640 | * @mpd->first_page: first page of the extent |
@@ -1632,18 +1649,11 @@ struct mpage_da_data { | |||
1632 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1649 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
1633 | { | 1650 | { |
1634 | struct address_space *mapping = mpd->inode->i_mapping; | 1651 | struct address_space *mapping = mpd->inode->i_mapping; |
1635 | struct mpage_data mpd_pp = { | ||
1636 | .bio = NULL, | ||
1637 | .last_block_in_bio = 0, | ||
1638 | .get_block = mpd->get_block, | ||
1639 | .use_writepage = 1, | ||
1640 | }; | ||
1641 | int ret = 0, err, nr_pages, i; | 1652 | int ret = 0, err, nr_pages, i; |
1642 | unsigned long index, end; | 1653 | unsigned long index, end; |
1643 | struct pagevec pvec; | 1654 | struct pagevec pvec; |
1644 | 1655 | ||
1645 | BUG_ON(mpd->next_page <= mpd->first_page); | 1656 | BUG_ON(mpd->next_page <= mpd->first_page); |
1646 | |||
1647 | pagevec_init(&pvec, 0); | 1657 | pagevec_init(&pvec, 0); |
1648 | index = mpd->first_page; | 1658 | index = mpd->first_page; |
1649 | end = mpd->next_page - 1; | 1659 | end = mpd->next_page - 1; |
@@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1661 | break; | 1671 | break; |
1662 | index++; | 1672 | index++; |
1663 | 1673 | ||
1664 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1674 | err = mapping->a_ops->writepage(page, mpd->wbc); |
1665 | 1675 | if (!err) | |
1676 | mpd->pages_written++; | ||
1666 | /* | 1677 | /* |
1667 | * In error case, we have to continue because | 1678 | * In error case, we have to continue because |
1668 | * remaining pages are still locked | 1679 | * remaining pages are still locked |
@@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
1673 | } | 1684 | } |
1674 | pagevec_release(&pvec); | 1685 | pagevec_release(&pvec); |
1675 | } | 1686 | } |
1676 | if (mpd_pp.bio) | ||
1677 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
1678 | |||
1679 | return ret; | 1687 | return ret; |
1680 | } | 1688 | } |
1681 | 1689 | ||
@@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1698 | int blocks = exbh->b_size >> inode->i_blkbits; | 1706 | int blocks = exbh->b_size >> inode->i_blkbits; |
1699 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1707 | sector_t pblock = exbh->b_blocknr, cur_logical; |
1700 | struct buffer_head *head, *bh; | 1708 | struct buffer_head *head, *bh; |
1701 | unsigned long index, end; | 1709 | pgoff_t index, end; |
1702 | struct pagevec pvec; | 1710 | struct pagevec pvec; |
1703 | int nr_pages, i; | 1711 | int nr_pages, i; |
1704 | 1712 | ||
@@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
1741 | if (buffer_delay(bh)) { | 1749 | if (buffer_delay(bh)) { |
1742 | bh->b_blocknr = pblock; | 1750 | bh->b_blocknr = pblock; |
1743 | clear_buffer_delay(bh); | 1751 | clear_buffer_delay(bh); |
1752 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1753 | } else if (buffer_unwritten(bh)) { | ||
1754 | bh->b_blocknr = pblock; | ||
1755 | clear_buffer_unwritten(bh); | ||
1756 | set_buffer_mapped(bh); | ||
1757 | set_buffer_new(bh); | ||
1758 | bh->b_bdev = inode->i_sb->s_bdev; | ||
1744 | } else if (buffer_mapped(bh)) | 1759 | } else if (buffer_mapped(bh)) |
1745 | BUG_ON(bh->b_blocknr != pblock); | 1760 | BUG_ON(bh->b_blocknr != pblock); |
1746 | 1761 | ||
@@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
1776 | * | 1791 | * |
1777 | * The function skips space we know is already mapped to disk blocks. | 1792 | * The function skips space we know is already mapped to disk blocks. |
1778 | * | 1793 | * |
1779 | * The function ignores errors ->get_block() returns, thus real | ||
1780 | * error handling is postponed to __mpage_writepage() | ||
1781 | */ | 1794 | */ |
1782 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) |
1783 | { | 1796 | { |
1797 | int err = 0; | ||
1784 | struct buffer_head *lbh = &mpd->lbh; | 1798 | struct buffer_head *lbh = &mpd->lbh; |
1785 | int err = 0, remain = lbh->b_size; | ||
1786 | sector_t next = lbh->b_blocknr; | 1799 | sector_t next = lbh->b_blocknr; |
1787 | struct buffer_head new; | 1800 | struct buffer_head new; |
1788 | 1801 | ||
@@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1792 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
1793 | return; | 1806 | return; |
1794 | 1807 | ||
1795 | while (remain) { | 1808 | new.b_state = lbh->b_state; |
1796 | new.b_state = lbh->b_state; | 1809 | new.b_blocknr = 0; |
1797 | new.b_blocknr = 0; | 1810 | new.b_size = lbh->b_size; |
1798 | new.b_size = remain; | ||
1799 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1800 | if (err) { | ||
1801 | /* | ||
1802 | * Rather than implement own error handling | ||
1803 | * here, we just leave remaining blocks | ||
1804 | * unallocated and try again with ->writepage() | ||
1805 | */ | ||
1806 | break; | ||
1807 | } | ||
1808 | BUG_ON(new.b_size == 0); | ||
1809 | 1811 | ||
1810 | if (buffer_new(&new)) | 1812 | /* |
1811 | __unmap_underlying_blocks(mpd->inode, &new); | 1813 | * If we didn't accumulate anything |
1814 | * to write simply return | ||
1815 | */ | ||
1816 | if (!new.b_size) | ||
1817 | return; | ||
1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
1819 | if (err) | ||
1820 | return; | ||
1821 | BUG_ON(new.b_size == 0); | ||
1812 | 1822 | ||
1813 | /* | 1823 | if (buffer_new(&new)) |
1814 | * If blocks are delayed marked, we need to | 1824 | __unmap_underlying_blocks(mpd->inode, &new); |
1815 | * put actual blocknr and drop delayed bit | ||
1816 | */ | ||
1817 | if (buffer_delay(lbh)) | ||
1818 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1819 | 1825 | ||
1820 | /* go for the remaining blocks */ | 1826 | /* |
1821 | next += new.b_size >> mpd->inode->i_blkbits; | 1827 | * If blocks are delayed marked, we need to |
1822 | remain -= new.b_size; | 1828 | * put actual blocknr and drop delayed bit |
1823 | } | 1829 | */ |
1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
1832 | |||
1833 | return; | ||
1824 | } | 1834 | } |
1825 | 1835 | ||
1826 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | 1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
1837 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
1827 | 1838 | ||
1828 | /* | 1839 | /* |
1829 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 1840 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
@@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
1837 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1848 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
1838 | sector_t logical, struct buffer_head *bh) | 1849 | sector_t logical, struct buffer_head *bh) |
1839 | { | 1850 | { |
1840 | struct buffer_head *lbh = &mpd->lbh; | ||
1841 | sector_t next; | 1851 | sector_t next; |
1852 | size_t b_size = bh->b_size; | ||
1853 | struct buffer_head *lbh = &mpd->lbh; | ||
1854 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
1842 | 1855 | ||
1843 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1856 | /* check if thereserved journal credits might overflow */ |
1844 | 1857 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
1858 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
1859 | /* | ||
1860 | * With non-extent format we are limited by the journal | ||
1861 | * credit available. Total credit needed to insert | ||
1862 | * nrblocks contiguous blocks is dependent on the | ||
1863 | * nrblocks. So limit nrblocks. | ||
1864 | */ | ||
1865 | goto flush_it; | ||
1866 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
1867 | EXT4_MAX_TRANS_DATA) { | ||
1868 | /* | ||
1869 | * Adding the new buffer_head would make it cross the | ||
1870 | * allowed limit for which we have journal credit | ||
1871 | * reserved. So limit the new bh->b_size | ||
1872 | */ | ||
1873 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
1874 | mpd->inode->i_blkbits; | ||
1875 | /* we will do mpage_da_submit_io in the next loop */ | ||
1876 | } | ||
1877 | } | ||
1845 | /* | 1878 | /* |
1846 | * First block in the extent | 1879 | * First block in the extent |
1847 | */ | 1880 | */ |
1848 | if (lbh->b_size == 0) { | 1881 | if (lbh->b_size == 0) { |
1849 | lbh->b_blocknr = logical; | 1882 | lbh->b_blocknr = logical; |
1850 | lbh->b_size = bh->b_size; | 1883 | lbh->b_size = b_size; |
1851 | lbh->b_state = bh->b_state & BH_FLAGS; | 1884 | lbh->b_state = bh->b_state & BH_FLAGS; |
1852 | return; | 1885 | return; |
1853 | } | 1886 | } |
1854 | 1887 | ||
1888 | next = lbh->b_blocknr + nrblocks; | ||
1855 | /* | 1889 | /* |
1856 | * Can we merge the block to our big extent? | 1890 | * Can we merge the block to our big extent? |
1857 | */ | 1891 | */ |
1858 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1892 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
1859 | lbh->b_size += bh->b_size; | 1893 | lbh->b_size += b_size; |
1860 | return; | 1894 | return; |
1861 | } | 1895 | } |
1862 | 1896 | ||
1897 | flush_it: | ||
1863 | /* | 1898 | /* |
1864 | * We couldn't merge the block to our extent, so we | 1899 | * We couldn't merge the block to our extent, so we |
1865 | * need to flush current extent and start new one | 1900 | * need to flush current extent and start new one |
1866 | */ | 1901 | */ |
1867 | mpage_da_map_blocks(mpd); | 1902 | mpage_da_map_blocks(mpd); |
1868 | 1903 | mpage_da_submit_io(mpd); | |
1869 | /* | 1904 | mpd->io_done = 1; |
1870 | * Now start a new extent | 1905 | return; |
1871 | */ | ||
1872 | lbh->b_size = bh->b_size; | ||
1873 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
1874 | lbh->b_blocknr = logical; | ||
1875 | } | 1906 | } |
1876 | 1907 | ||
1877 | /* | 1908 | /* |
@@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page, | |||
1891 | struct buffer_head *bh, *head, fake; | 1922 | struct buffer_head *bh, *head, fake; |
1892 | sector_t logical; | 1923 | sector_t logical; |
1893 | 1924 | ||
1925 | if (mpd->io_done) { | ||
1926 | /* | ||
1927 | * Rest of the page in the page_vec | ||
1928 | * redirty then and skip then. We will | ||
1929 | * try to to write them again after | ||
1930 | * starting a new transaction | ||
1931 | */ | ||
1932 | redirty_page_for_writepage(wbc, page); | ||
1933 | unlock_page(page); | ||
1934 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } | ||
1894 | /* | 1936 | /* |
1895 | * Can we merge this page to current extent? | 1937 | * Can we merge this page to current extent? |
1896 | */ | 1938 | */ |
1897 | if (mpd->next_page != page->index) { | 1939 | if (mpd->next_page != page->index) { |
1898 | /* | 1940 | /* |
1899 | * Nope, we can't. So, we map non-allocated blocks | 1941 | * Nope, we can't. So, we map non-allocated blocks |
1900 | * and start IO on them using __mpage_writepage() | 1942 | * and start IO on them using writepage() |
1901 | */ | 1943 | */ |
1902 | if (mpd->next_page != mpd->first_page) { | 1944 | if (mpd->next_page != mpd->first_page) { |
1903 | mpage_da_map_blocks(mpd); | 1945 | mpage_da_map_blocks(mpd); |
1904 | mpage_da_submit_io(mpd); | 1946 | mpage_da_submit_io(mpd); |
1947 | /* | ||
1948 | * skip rest of the page in the page_vec | ||
1949 | */ | ||
1950 | mpd->io_done = 1; | ||
1951 | redirty_page_for_writepage(wbc, page); | ||
1952 | unlock_page(page); | ||
1953 | return MPAGE_DA_EXTENT_TAIL; | ||
1905 | } | 1954 | } |
1906 | 1955 | ||
1907 | /* | 1956 | /* |
@@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page, | |||
1932 | set_buffer_dirty(bh); | 1981 | set_buffer_dirty(bh); |
1933 | set_buffer_uptodate(bh); | 1982 | set_buffer_uptodate(bh); |
1934 | mpage_add_bh_to_extent(mpd, logical, bh); | 1983 | mpage_add_bh_to_extent(mpd, logical, bh); |
1984 | if (mpd->io_done) | ||
1985 | return MPAGE_DA_EXTENT_TAIL; | ||
1935 | } else { | 1986 | } else { |
1936 | /* | 1987 | /* |
1937 | * Page with regular buffer heads, just add all dirty ones | 1988 | * Page with regular buffer heads, just add all dirty ones |
@@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page, | |||
1940 | bh = head; | 1991 | bh = head; |
1941 | do { | 1992 | do { |
1942 | BUG_ON(buffer_locked(bh)); | 1993 | BUG_ON(buffer_locked(bh)); |
1943 | if (buffer_dirty(bh)) | 1994 | if (buffer_dirty(bh) && |
1995 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
1944 | mpage_add_bh_to_extent(mpd, logical, bh); | 1996 | mpage_add_bh_to_extent(mpd, logical, bh); |
1997 | if (mpd->io_done) | ||
1998 | return MPAGE_DA_EXTENT_TAIL; | ||
1999 | } | ||
1945 | logical++; | 2000 | logical++; |
1946 | } while ((bh = bh->b_this_page) != head); | 2001 | } while ((bh = bh->b_this_page) != head); |
1947 | } | 2002 | } |
@@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page, | |||
1960 | * | 2015 | * |
1961 | * This is a library function, which implements the writepages() | 2016 | * This is a library function, which implements the writepages() |
1962 | * address_space_operation. | 2017 | * address_space_operation. |
1963 | * | ||
1964 | * In order to avoid duplication of logic that deals with partial pages, | ||
1965 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
1966 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
1967 | * | ||
1968 | * It's important that we call __mpage_writepage() only once for each | ||
1969 | * involved page, otherwise we'd have to implement more complicated logic | ||
1970 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
1971 | * | ||
1972 | * See comments to mpage_writepages() | ||
1973 | */ | 2018 | */ |
1974 | static int mpage_da_writepages(struct address_space *mapping, | 2019 | static int mpage_da_writepages(struct address_space *mapping, |
1975 | struct writeback_control *wbc, | 2020 | struct writeback_control *wbc, |
1976 | get_block_t get_block) | 2021 | get_block_t get_block) |
1977 | { | 2022 | { |
1978 | struct mpage_da_data mpd; | 2023 | struct mpage_da_data mpd; |
2024 | long to_write; | ||
1979 | int ret; | 2025 | int ret; |
1980 | 2026 | ||
1981 | if (!get_block) | 2027 | if (!get_block) |
@@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
1989 | mpd.first_page = 0; | 2035 | mpd.first_page = 0; |
1990 | mpd.next_page = 0; | 2036 | mpd.next_page = 0; |
1991 | mpd.get_block = get_block; | 2037 | mpd.get_block = get_block; |
2038 | mpd.io_done = 0; | ||
2039 | mpd.pages_written = 0; | ||
2040 | |||
2041 | to_write = wbc->nr_to_write; | ||
1992 | 2042 | ||
1993 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); |
1994 | 2044 | ||
1995 | /* | 2045 | /* |
1996 | * Handle last extent of pages | 2046 | * Handle last extent of pages |
1997 | */ | 2047 | */ |
1998 | if (mpd.next_page != mpd.first_page) { | 2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
1999 | mpage_da_map_blocks(&mpd); | 2049 | mpage_da_map_blocks(&mpd); |
2000 | mpage_da_submit_io(&mpd); | 2050 | mpage_da_submit_io(&mpd); |
2001 | } | 2051 | } |
2002 | 2052 | ||
2053 | wbc->nr_to_write = to_write - mpd.pages_written; | ||
2003 | return ret; | 2054 | return ret; |
2004 | } | 2055 | } |
2005 | 2056 | ||
@@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page, | |||
2204 | } | 2255 | } |
2205 | 2256 | ||
2206 | /* | 2257 | /* |
2207 | * For now just follow the DIO way to estimate the max credits | 2258 | * This is called via ext4_da_writepages() to |
2208 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2259 | * calulate the total number of credits to reserve to fit |
2209 | * todo: need to calculate the max credits need for | 2260 | * a single extent allocation into a single transaction, |
2210 | * extent based files, currently the DIO credits is based on | 2261 | * ext4_da_writpeages() will loop calling this before |
2211 | * indirect-blocks mapping way. | 2262 | * the block allocation. |
2212 | * | ||
2213 | * Probably should have a generic way to calculate credits | ||
2214 | * for DIO, writepages, and truncate | ||
2215 | */ | 2263 | */ |
2216 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2264 | |
2217 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2265 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
2266 | { | ||
2267 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
2268 | |||
2269 | /* | ||
2270 | * With non-extent format the journal credit needed to | ||
2271 | * insert nrblocks contiguous block is dependent on | ||
2272 | * number of contiguous block. So we will limit | ||
2273 | * number of contiguous block to a sane value | ||
2274 | */ | ||
2275 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
2276 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
2277 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
2278 | |||
2279 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
2280 | } | ||
2218 | 2281 | ||
2219 | static int ext4_da_writepages(struct address_space *mapping, | 2282 | static int ext4_da_writepages(struct address_space *mapping, |
2220 | struct writeback_control *wbc) | 2283 | struct writeback_control *wbc) |
2221 | { | 2284 | { |
2222 | struct inode *inode = mapping->host; | ||
2223 | handle_t *handle = NULL; | 2285 | handle_t *handle = NULL; |
2224 | int needed_blocks; | ||
2225 | int ret = 0; | ||
2226 | long to_write; | ||
2227 | loff_t range_start = 0; | 2286 | loff_t range_start = 0; |
2287 | struct inode *inode = mapping->host; | ||
2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
2289 | long to_write, pages_skipped = 0; | ||
2290 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
2228 | 2291 | ||
2229 | /* | 2292 | /* |
2230 | * No pages to write? This is mainly a kludge to avoid starting | 2293 | * No pages to write? This is mainly a kludge to avoid starting |
2231 | * a transaction for special inodes like journal inode on last iput() | 2294 | * a transaction for special inodes like journal inode on last iput() |
2232 | * because that could violate lock ordering on umount | 2295 | * because that could violate lock ordering on umount |
2233 | */ | 2296 | */ |
2234 | if (!mapping->nrpages) | 2297 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
2235 | return 0; | 2298 | return 0; |
2236 | |||
2237 | /* | 2299 | /* |
2238 | * Estimate the worse case needed credits to write out | 2300 | * Make sure nr_to_write is >= sbi->s_mb_stream_request |
2239 | * EXT4_MAX_BUF_BLOCKS pages | 2301 | * This make sure small files blocks are allocated in |
2302 | * single attempt. This ensure that small files | ||
2303 | * get less fragmented. | ||
2240 | */ | 2304 | */ |
2241 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2305 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { |
2306 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
2307 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
2308 | } | ||
2242 | 2309 | ||
2243 | to_write = wbc->nr_to_write; | 2310 | if (!wbc->range_cyclic) |
2244 | if (!wbc->range_cyclic) { | ||
2245 | /* | 2311 | /* |
2246 | * If range_cyclic is not set force range_cont | 2312 | * If range_cyclic is not set force range_cont |
2247 | * and save the old writeback_index | 2313 | * and save the old writeback_index |
2248 | */ | 2314 | */ |
2249 | wbc->range_cont = 1; | 2315 | wbc->range_cont = 1; |
2250 | range_start = wbc->range_start; | ||
2251 | } | ||
2252 | 2316 | ||
2253 | while (!ret && to_write) { | 2317 | range_start = wbc->range_start; |
2318 | pages_skipped = wbc->pages_skipped; | ||
2319 | |||
2320 | restart_loop: | ||
2321 | to_write = wbc->nr_to_write; | ||
2322 | while (!ret && to_write > 0) { | ||
2323 | |||
2324 | /* | ||
2325 | * we insert one extent at a time. So we need | ||
2326 | * credit needed for single extent allocation. | ||
2327 | * journalled mode is currently not supported | ||
2328 | * by delalloc | ||
2329 | */ | ||
2330 | BUG_ON(ext4_should_journal_data(inode)); | ||
2331 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | ||
2332 | |||
2254 | /* start a new transaction*/ | 2333 | /* start a new transaction*/ |
2255 | handle = ext4_journal_start(inode, needed_blocks); | 2334 | handle = ext4_journal_start(inode, needed_blocks); |
2256 | if (IS_ERR(handle)) { | 2335 | if (IS_ERR(handle)) { |
2257 | ret = PTR_ERR(handle); | 2336 | ret = PTR_ERR(handle); |
2337 | printk(KERN_EMERG "%s: jbd2_start: " | ||
2338 | "%ld pages, ino %lu; err %d\n", __func__, | ||
2339 | wbc->nr_to_write, inode->i_ino, ret); | ||
2340 | dump_stack(); | ||
2258 | goto out_writepages; | 2341 | goto out_writepages; |
2259 | } | 2342 | } |
2260 | if (ext4_should_order_data(inode)) { | 2343 | if (ext4_should_order_data(inode)) { |
2261 | /* | 2344 | /* |
2262 | * With ordered mode we need to add | 2345 | * With ordered mode we need to add |
2263 | * the inode to the journal handle | 2346 | * the inode to the journal handl |
2264 | * when we do block allocation. | 2347 | * when we do block allocation. |
2265 | */ | 2348 | */ |
2266 | ret = ext4_jbd2_file_inode(handle, inode); | 2349 | ret = ext4_jbd2_file_inode(handle, inode); |
@@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2268 | ext4_journal_stop(handle); | 2351 | ext4_journal_stop(handle); |
2269 | goto out_writepages; | 2352 | goto out_writepages; |
2270 | } | 2353 | } |
2271 | |||
2272 | } | 2354 | } |
2273 | /* | ||
2274 | * set the max dirty pages could be write at a time | ||
2275 | * to fit into the reserved transaction credits | ||
2276 | */ | ||
2277 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
2278 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
2279 | 2355 | ||
2280 | to_write -= wbc->nr_to_write; | 2356 | to_write -= wbc->nr_to_write; |
2281 | ret = mpage_da_writepages(mapping, wbc, | 2357 | ret = mpage_da_writepages(mapping, wbc, |
2282 | ext4_da_get_block_write); | 2358 | ext4_da_get_block_write); |
2283 | ext4_journal_stop(handle); | 2359 | ext4_journal_stop(handle); |
2284 | if (wbc->nr_to_write) { | 2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
2361 | /* | ||
2362 | * got one extent now try with | ||
2363 | * rest of the pages | ||
2364 | */ | ||
2365 | to_write += wbc->nr_to_write; | ||
2366 | ret = 0; | ||
2367 | } else if (wbc->nr_to_write) { | ||
2285 | /* | 2368 | /* |
2286 | * There is no more writeout needed | 2369 | * There is no more writeout needed |
2287 | * or we requested for a noblocking writeout | 2370 | * or we requested for a noblocking writeout |
@@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2293 | wbc->nr_to_write = to_write; | 2376 | wbc->nr_to_write = to_write; |
2294 | } | 2377 | } |
2295 | 2378 | ||
2296 | out_writepages: | 2379 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { |
2297 | wbc->nr_to_write = to_write; | 2380 | /* We skipped pages in this loop */ |
2298 | if (range_start) | ||
2299 | wbc->range_start = range_start; | 2381 | wbc->range_start = range_start; |
2382 | wbc->nr_to_write = to_write + | ||
2383 | wbc->pages_skipped - pages_skipped; | ||
2384 | wbc->pages_skipped = pages_skipped; | ||
2385 | goto restart_loop; | ||
2386 | } | ||
2387 | |||
2388 | out_writepages: | ||
2389 | wbc->nr_to_write = to_write - nr_to_writebump; | ||
2390 | wbc->range_start = range_start; | ||
2300 | return ret; | 2391 | return ret; |
2301 | } | 2392 | } |
2302 | 2393 | ||
@@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode) | |||
3486 | * modify the block allocation tree. | 3577 | * modify the block allocation tree. |
3487 | */ | 3578 | */ |
3488 | down_write(&ei->i_data_sem); | 3579 | down_write(&ei->i_data_sem); |
3580 | |||
3581 | ext4_discard_reservation(inode); | ||
3582 | |||
3489 | /* | 3583 | /* |
3490 | * The orphan list entry will now protect us from any crash which | 3584 | * The orphan list entry will now protect us from any crash which |
3491 | * occurs before the truncate completes, so it is now safe to propagate | 3585 | * occurs before the truncate completes, so it is now safe to propagate |
@@ -3555,8 +3649,6 @@ do_indirects: | |||
3555 | ; | 3649 | ; |
3556 | } | 3650 | } |
3557 | 3651 | ||
3558 | ext4_discard_reservation(inode); | ||
3559 | |||
3560 | up_write(&ei->i_data_sem); | 3652 | up_write(&ei->i_data_sem); |
3561 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3653 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3562 | ext4_mark_inode_dirty(handle, inode); | 3654 | ext4_mark_inode_dirty(handle, inode); |
@@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
4324 | return 0; | 4416 | return 0; |
4325 | } | 4417 | } |
4326 | 4418 | ||
4419 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
4420 | int chunk) | ||
4421 | { | ||
4422 | int indirects; | ||
4423 | |||
4424 | /* if nrblocks are contiguous */ | ||
4425 | if (chunk) { | ||
4426 | /* | ||
4427 | * With N contiguous data blocks, it need at most | ||
4428 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | ||
4429 | * 2 dindirect blocks | ||
4430 | * 1 tindirect block | ||
4431 | */ | ||
4432 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
4433 | return indirects + 3; | ||
4434 | } | ||
4435 | /* | ||
4436 | * if nrblocks are not contiguous, worse case, each block touch | ||
4437 | * a indirect block, and each indirect block touch a double indirect | ||
4438 | * block, plus a triple indirect block | ||
4439 | */ | ||
4440 | indirects = nrblocks * 2 + 1; | ||
4441 | return indirects; | ||
4442 | } | ||
4443 | |||
4444 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4445 | { | ||
4446 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
4447 | return ext4_indirect_trans_blocks(inode, nrblocks, 0); | ||
4448 | return ext4_ext_index_trans_blocks(inode, nrblocks, 0); | ||
4449 | } | ||
4327 | /* | 4450 | /* |
4328 | * How many blocks doth make a writepage()? | 4451 | * Account for index blocks, block groups bitmaps and block group |
4329 | * | 4452 | * descriptor blocks if modify datablocks and index blocks |
4330 | * With N blocks per page, it may be: | 4453 | * worse case, the indexs blocks spread over different block groups |
4331 | * N data blocks | ||
4332 | * 2 indirect block | ||
4333 | * 2 dindirect | ||
4334 | * 1 tindirect | ||
4335 | * N+5 bitmap blocks (from the above) | ||
4336 | * N+5 group descriptor summary blocks | ||
4337 | * 1 inode block | ||
4338 | * 1 superblock. | ||
4339 | * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files | ||
4340 | * | 4454 | * |
4341 | * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS | 4455 | * If datablocks are discontiguous, they are possible to spread over |
4456 | * different block groups too. If they are contiugous, with flexbg, | ||
4457 | * they could still across block group boundary. | ||
4342 | * | 4458 | * |
4343 | * With ordered or writeback data it's the same, less the N data blocks. | 4459 | * Also account for superblock, inode, quota and xattr blocks |
4460 | */ | ||
4461 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
4462 | { | ||
4463 | int groups, gdpblocks; | ||
4464 | int idxblocks; | ||
4465 | int ret = 0; | ||
4466 | |||
4467 | /* | ||
4468 | * How many index blocks need to touch to modify nrblocks? | ||
4469 | * The "Chunk" flag indicating whether the nrblocks is | ||
4470 | * physically contiguous on disk | ||
4471 | * | ||
4472 | * For Direct IO and fallocate, they calls get_block to allocate | ||
4473 | * one single extent at a time, so they could set the "Chunk" flag | ||
4474 | */ | ||
4475 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | ||
4476 | |||
4477 | ret = idxblocks; | ||
4478 | |||
4479 | /* | ||
4480 | * Now let's see how many group bitmaps and group descriptors need | ||
4481 | * to account | ||
4482 | */ | ||
4483 | groups = idxblocks; | ||
4484 | if (chunk) | ||
4485 | groups += 1; | ||
4486 | else | ||
4487 | groups += nrblocks; | ||
4488 | |||
4489 | gdpblocks = groups; | ||
4490 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | ||
4491 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | ||
4492 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | ||
4493 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | ||
4494 | |||
4495 | /* bitmaps and block group descriptor blocks */ | ||
4496 | ret += groups + gdpblocks; | ||
4497 | |||
4498 | /* Blocks for super block, inode, quota and xattr blocks */ | ||
4499 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
4500 | |||
4501 | return ret; | ||
4502 | } | ||
4503 | |||
4504 | /* | ||
4505 | * Calulate the total number of credits to reserve to fit | ||
4506 | * the modification of a single pages into a single transaction, | ||
4507 | * which may include multiple chunks of block allocations. | ||
4344 | * | 4508 | * |
4345 | * If the inode's direct blocks can hold an integral number of pages then a | 4509 | * This could be called via ext4_write_begin() |
4346 | * page cannot straddle two indirect blocks, and we can only touch one indirect | ||
4347 | * and dindirect block, and the "5" above becomes "3". | ||
4348 | * | 4510 | * |
4349 | * This still overestimates under most circumstances. If we were to pass the | 4511 | * We need to consider the worse case, when |
4350 | * start and end offsets in here as well we could do block_to_path() on each | 4512 | * one new block per extent. |
4351 | * block and work out the exact number of indirects which are touched. Pah. | ||
4352 | */ | 4513 | */ |
4353 | |||
4354 | int ext4_writepage_trans_blocks(struct inode *inode) | 4514 | int ext4_writepage_trans_blocks(struct inode *inode) |
4355 | { | 4515 | { |
4356 | int bpp = ext4_journal_blocks_per_page(inode); | 4516 | int bpp = ext4_journal_blocks_per_page(inode); |
4357 | int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; | ||
4358 | int ret; | 4517 | int ret; |
4359 | 4518 | ||
4360 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 4519 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
4361 | return ext4_ext_writepage_trans_blocks(inode, bpp); | ||
4362 | 4520 | ||
4521 | /* Account for data blocks for journalled mode */ | ||
4363 | if (ext4_should_journal_data(inode)) | 4522 | if (ext4_should_journal_data(inode)) |
4364 | ret = 3 * (bpp + indirects) + 2; | 4523 | ret += bpp; |
4365 | else | ||
4366 | ret = 2 * (bpp + indirects) + 2; | ||
4367 | |||
4368 | #ifdef CONFIG_QUOTA | ||
4369 | /* We know that structure was already allocated during DQUOT_INIT so | ||
4370 | * we will be updating only the data blocks + inodes */ | ||
4371 | ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
4372 | #endif | ||
4373 | |||
4374 | return ret; | 4524 | return ret; |
4375 | } | 4525 | } |
4376 | 4526 | ||
4377 | /* | 4527 | /* |
4528 | * Calculate the journal credits for a chunk of data modification. | ||
4529 | * | ||
4530 | * This is called from DIO, fallocate or whoever calling | ||
4531 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
4532 | * | ||
4533 | * journal buffers for data blocks are not included here, as DIO | ||
4534 | * and fallocate do no need to journal data buffers. | ||
4535 | */ | ||
4536 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
4537 | { | ||
4538 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
4539 | } | ||
4540 | |||
4541 | /* | ||
4378 | * The caller must have previously called ext4_reserve_inode_write(). | 4542 | * The caller must have previously called ext4_reserve_inode_write(). |
4379 | * Give this, we know that the caller already has write access to iloc->bh. | 4543 | * Give this, we know that the caller already has write access to iloc->bh. |
4380 | */ | 4544 | */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
3282 | } | 3282 | } |
3283 | 3283 | ||
3284 | /* | 3284 | /* |
3285 | * Return the prealloc space that have minimal distance | ||
3286 | * from the goal block. @cpa is the prealloc | ||
3287 | * space that is having currently known minimal distance | ||
3288 | * from the goal block. | ||
3289 | */ | ||
3290 | static struct ext4_prealloc_space * | ||
3291 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
3292 | struct ext4_prealloc_space *pa, | ||
3293 | struct ext4_prealloc_space *cpa) | ||
3294 | { | ||
3295 | ext4_fsblk_t cur_distance, new_distance; | ||
3296 | |||
3297 | if (cpa == NULL) { | ||
3298 | atomic_inc(&pa->pa_count); | ||
3299 | return pa; | ||
3300 | } | ||
3301 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
3302 | new_distance = abs(goal_block - pa->pa_pstart); | ||
3303 | |||
3304 | if (cur_distance < new_distance) | ||
3305 | return cpa; | ||
3306 | |||
3307 | /* drop the previous reference */ | ||
3308 | atomic_dec(&cpa->pa_count); | ||
3309 | atomic_inc(&pa->pa_count); | ||
3310 | return pa; | ||
3311 | } | ||
3312 | |||
3313 | /* | ||
3285 | * search goal blocks in preallocated space | 3314 | * search goal blocks in preallocated space |
3286 | */ | 3315 | */ |
3287 | static noinline_for_stack int | 3316 | static noinline_for_stack int |
@@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3290 | int order, i; | 3319 | int order, i; |
3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3320 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
3292 | struct ext4_locality_group *lg; | 3321 | struct ext4_locality_group *lg; |
3293 | struct ext4_prealloc_space *pa; | 3322 | struct ext4_prealloc_space *pa, *cpa = NULL; |
3323 | ext4_fsblk_t goal_block; | ||
3294 | 3324 | ||
3295 | /* only data can be preallocated */ | 3325 | /* only data can be preallocated */ |
3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3326 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
@@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3363 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
3334 | order = PREALLOC_TB_SIZE - 1; | 3364 | order = PREALLOC_TB_SIZE - 1; |
3335 | 3365 | ||
3366 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
3367 | ac->ac_g_ex.fe_start + | ||
3368 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
3369 | /* | ||
3370 | * search for the prealloc space that is having | ||
3371 | * minimal distance from the goal block. | ||
3372 | */ | ||
3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3373 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
3337 | rcu_read_lock(); | 3374 | rcu_read_lock(); |
3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3375 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
@@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
3340 | spin_lock(&pa->pa_lock); | 3377 | spin_lock(&pa->pa_lock); |
3341 | if (pa->pa_deleted == 0 && | 3378 | if (pa->pa_deleted == 0 && |
3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3379 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
3343 | atomic_inc(&pa->pa_count); | 3380 | |
3344 | ext4_mb_use_group_pa(ac, pa); | 3381 | cpa = ext4_mb_check_group_pa(goal_block, |
3345 | spin_unlock(&pa->pa_lock); | 3382 | pa, cpa); |
3346 | ac->ac_criteria = 20; | ||
3347 | rcu_read_unlock(); | ||
3348 | return 1; | ||
3349 | } | 3383 | } |
3350 | spin_unlock(&pa->pa_lock); | 3384 | spin_unlock(&pa->pa_lock); |
3351 | } | 3385 | } |
3352 | rcu_read_unlock(); | 3386 | rcu_read_unlock(); |
3353 | } | 3387 | } |
3388 | if (cpa) { | ||
3389 | ext4_mb_use_group_pa(ac, cpa); | ||
3390 | ac->ac_criteria = 20; | ||
3391 | return 1; | ||
3392 | } | ||
3354 | return 0; | 3393 | return 0; |
3355 | } | 3394 | } |
3356 | 3395 | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..46fc0b5b12ba 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
53 | * credit. But below we try to not accumalate too much | 53 | * credit. But below we try to not accumalate too much |
54 | * of them by restarting the journal. | 54 | * of them by restarting the journal. |
55 | */ | 55 | */ |
56 | needed = ext4_ext_calc_credits_for_insert(inode, path); | 56 | needed = ext4_ext_calc_credits_for_single_extent(inode, |
57 | lb->last_block - lb->first_block + 1, path); | ||
57 | 58 | ||
58 | /* | 59 | /* |
59 | * Make sure the credit we accumalated is not really high | 60 | * Make sure the credit we accumalated is not really high |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b3d35604ea18 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
773 | 773 | ||
774 | if (reserved_gdb || gdb_off == 0) { | 774 | if (reserved_gdb || gdb_off == 0) { |
775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
776 | EXT4_FEATURE_COMPAT_RESIZE_INODE)){ | 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
777 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
777 | ext4_warning(sb, __func__, | 778 | ext4_warning(sb, __func__, |
778 | "No reserved GDT blocks, can't resize"); | 779 | "No reserved GDT blocks, can't resize"); |
779 | return -EPERM; | 780 | return -EPERM; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..566344b926b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
568 | #endif | 568 | #endif |
569 | ei->i_block_alloc_info = NULL; | 569 | ei->i_block_alloc_info = NULL; |
570 | ei->vfs_inode.i_version = 1; | 570 | ei->vfs_inode.i_version = 1; |
571 | ei->vfs_inode.i_data.writeback_index = 0; | ||
571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 573 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
573 | spin_lock_init(&ei->i_prealloc_lock); | 574 | spin_lock_init(&ei->i_prealloc_lock); |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 6d266d793e2c..80ff3381fa21 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -562,26 +562,23 @@ static int fat_write_inode(struct inode *inode, int wait) | |||
562 | struct buffer_head *bh; | 562 | struct buffer_head *bh; |
563 | struct msdos_dir_entry *raw_entry; | 563 | struct msdos_dir_entry *raw_entry; |
564 | loff_t i_pos; | 564 | loff_t i_pos; |
565 | int err = 0; | 565 | int err; |
566 | 566 | ||
567 | retry: | 567 | retry: |
568 | i_pos = MSDOS_I(inode)->i_pos; | 568 | i_pos = MSDOS_I(inode)->i_pos; |
569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) | 569 | if (inode->i_ino == MSDOS_ROOT_INO || !i_pos) |
570 | return 0; | 570 | return 0; |
571 | 571 | ||
572 | lock_super(sb); | ||
573 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 572 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
574 | if (!bh) { | 573 | if (!bh) { |
575 | printk(KERN_ERR "FAT: unable to read inode block " | 574 | printk(KERN_ERR "FAT: unable to read inode block " |
576 | "for updating (i_pos %lld)\n", i_pos); | 575 | "for updating (i_pos %lld)\n", i_pos); |
577 | err = -EIO; | 576 | return -EIO; |
578 | goto out; | ||
579 | } | 577 | } |
580 | spin_lock(&sbi->inode_hash_lock); | 578 | spin_lock(&sbi->inode_hash_lock); |
581 | if (i_pos != MSDOS_I(inode)->i_pos) { | 579 | if (i_pos != MSDOS_I(inode)->i_pos) { |
582 | spin_unlock(&sbi->inode_hash_lock); | 580 | spin_unlock(&sbi->inode_hash_lock); |
583 | brelse(bh); | 581 | brelse(bh); |
584 | unlock_super(sb); | ||
585 | goto retry; | 582 | goto retry; |
586 | } | 583 | } |
587 | 584 | ||
@@ -607,11 +604,10 @@ retry: | |||
607 | } | 604 | } |
608 | spin_unlock(&sbi->inode_hash_lock); | 605 | spin_unlock(&sbi->inode_hash_lock); |
609 | mark_buffer_dirty(bh); | 606 | mark_buffer_dirty(bh); |
607 | err = 0; | ||
610 | if (wait) | 608 | if (wait) |
611 | err = sync_dirty_buffer(bh); | 609 | err = sync_dirty_buffer(bh); |
612 | brelse(bh); | 610 | brelse(bh); |
613 | out: | ||
614 | unlock_super(sb); | ||
615 | return err; | 611 | return err; |
616 | } | 612 | } |
617 | 613 | ||
diff --git a/fs/ioprio.c b/fs/ioprio.c index c4a1c3c65aac..da3cc460d4df 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c | |||
@@ -115,11 +115,11 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) | |||
115 | pgrp = task_pgrp(current); | 115 | pgrp = task_pgrp(current); |
116 | else | 116 | else |
117 | pgrp = find_vpid(who); | 117 | pgrp = find_vpid(who); |
118 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 118 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
119 | ret = set_task_ioprio(p, ioprio); | 119 | ret = set_task_ioprio(p, ioprio); |
120 | if (ret) | 120 | if (ret) |
121 | break; | 121 | break; |
122 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 122 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
123 | break; | 123 | break; |
124 | case IOPRIO_WHO_USER: | 124 | case IOPRIO_WHO_USER: |
125 | if (!who) | 125 | if (!who) |
@@ -204,7 +204,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
204 | pgrp = task_pgrp(current); | 204 | pgrp = task_pgrp(current); |
205 | else | 205 | else |
206 | pgrp = find_vpid(who); | 206 | pgrp = find_vpid(who); |
207 | do_each_pid_task(pgrp, PIDTYPE_PGID, p) { | 207 | do_each_pid_thread(pgrp, PIDTYPE_PGID, p) { |
208 | tmpio = get_task_ioprio(p); | 208 | tmpio = get_task_ioprio(p); |
209 | if (tmpio < 0) | 209 | if (tmpio < 0) |
210 | continue; | 210 | continue; |
@@ -212,7 +212,7 @@ asmlinkage long sys_ioprio_get(int which, int who) | |||
212 | ret = tmpio; | 212 | ret = tmpio; |
213 | else | 213 | else |
214 | ret = ioprio_best(ret, tmpio); | 214 | ret = ioprio_best(ret, tmpio); |
215 | } while_each_pid_task(pgrp, PIDTYPE_PGID, p); | 215 | } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); |
216 | break; | 216 | break; |
217 | case IOPRIO_WHO_USER: | 217 | case IOPRIO_WHO_USER: |
218 | if (!who) | 218 | if (!who) |
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
@@ -12,7 +12,6 @@ | |||
12 | #ifndef _JFFS2_FS_I | 12 | #ifndef _JFFS2_FS_I |
13 | #define _JFFS2_FS_I | 13 | #define _JFFS2_FS_I |
14 | 14 | ||
15 | #include <linux/version.h> | ||
16 | #include <linux/rbtree.h> | 15 | #include <linux/rbtree.h> |
17 | #include <linux/posix_acl.h> | 16 | #include <linux/posix_acl.h> |
18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, | |||
1279 | } | 1279 | } |
1280 | } | 1280 | } |
1281 | 1281 | ||
1282 | if (errors > 0) { | ||
1283 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
1284 | errors, (errors == 1 ? "" : "s")); | ||
1285 | if (!sloppy) | ||
1286 | return 0; | ||
1287 | } | ||
1282 | return 1; | 1288 | return 1; |
1283 | 1289 | ||
1284 | out_nomem: | 1290 | out_nomem: |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
@@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
443 | * enough space for either: | 443 | * enough space for either: |
444 | */ | 444 | */ |
445 | alloc = sizeof(struct posix_ace_state_array) | 445 | alloc = sizeof(struct posix_ace_state_array) |
446 | + cnt*sizeof(struct posix_ace_state); | 446 | + cnt*sizeof(struct posix_user_ace_state); |
447 | state->users = kzalloc(alloc, GFP_KERNEL); | 447 | state->users = kzalloc(alloc, GFP_KERNEL); |
448 | if (!state->users) | 448 | if (!state->users) |
449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..e5b51ffafc6c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
@@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
867 | int slack_bytes; | 867 | int slack_bytes; |
868 | __be32 status; | 868 | __be32 status; |
869 | 869 | ||
870 | status = nfserr_resource; | ||
871 | cstate = cstate_alloc(); | ||
872 | if (cstate == NULL) | ||
873 | goto out; | ||
874 | |||
875 | resp->xbuf = &rqstp->rq_res; | 870 | resp->xbuf = &rqstp->rq_res; |
876 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; |
877 | resp->tagp = resp->p; | 872 | resp->tagp = resp->p; |
@@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
890 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) |
891 | goto out; | 886 | goto out; |
892 | 887 | ||
888 | status = nfserr_resource; | ||
889 | cstate = cstate_alloc(); | ||
890 | if (cstate == NULL) | ||
891 | goto out; | ||
892 | |||
893 | status = nfs_ok; | 893 | status = nfs_ok; |
894 | while (!status && resp->opcnt < args->opcnt) { | 894 | while (!status && resp->opcnt < args->opcnt) { |
895 | op = &args->ops[resp->opcnt++]; | 895 | op = &args->ops[resp->opcnt++]; |
@@ -957,9 +957,9 @@ encode_op: | |||
957 | nfsd4_increment_op_stats(op->opnum); | 957 | nfsd4_increment_op_stats(op->opnum); |
958 | } | 958 | } |
959 | 959 | ||
960 | cstate_free(cstate); | ||
960 | out: | 961 | out: |
961 | nfsd4_release_compoundargs(args); | 962 | nfsd4_release_compoundargs(args); |
962 | cstate_free(cstate); | ||
963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
964 | return status; | 964 | return status; |
965 | } | 965 | } |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
@@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, | |||
174 | // TODO: Consider moving this lot to a separate function! (AIA) | 174 | // TODO: Consider moving this lot to a separate function! (AIA) |
175 | handle_name: | 175 | handle_name: |
176 | { | 176 | { |
177 | struct dentry *real_dent, *new_dent; | ||
178 | MFT_RECORD *m; | 177 | MFT_RECORD *m; |
179 | ntfs_attr_search_ctx *ctx; | 178 | ntfs_attr_search_ctx *ctx; |
180 | ntfs_inode *ni = NTFS_I(dent_inode); | 179 | ntfs_inode *ni = NTFS_I(dent_inode); |
@@ -255,93 +254,9 @@ handle_name: | |||
255 | } | 254 | } |
256 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); | 255 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); |
257 | 256 | ||
258 | /* | 257 | dent = d_add_ci(dent, dent_inode, &nls_name); |
259 | * Note: No need for dent->d_lock lock as i_mutex is held on the | ||
260 | * parent inode. | ||
261 | */ | ||
262 | |||
263 | /* Does a dentry matching the nls_name exist already? */ | ||
264 | real_dent = d_lookup(dent->d_parent, &nls_name); | ||
265 | /* If not, create it now. */ | ||
266 | if (!real_dent) { | ||
267 | real_dent = d_alloc(dent->d_parent, &nls_name); | ||
268 | kfree(nls_name.name); | ||
269 | if (!real_dent) { | ||
270 | err = -ENOMEM; | ||
271 | goto err_out; | ||
272 | } | ||
273 | new_dent = d_splice_alias(dent_inode, real_dent); | ||
274 | if (new_dent) | ||
275 | dput(real_dent); | ||
276 | else | ||
277 | new_dent = real_dent; | ||
278 | ntfs_debug("Done. (Created new dentry.)"); | ||
279 | return new_dent; | ||
280 | } | ||
281 | kfree(nls_name.name); | 258 | kfree(nls_name.name); |
282 | /* Matching dentry exists, check if it is negative. */ | 259 | return dent; |
283 | if (real_dent->d_inode) { | ||
284 | if (unlikely(real_dent->d_inode != dent_inode)) { | ||
285 | /* This can happen because bad inodes are unhashed. */ | ||
286 | BUG_ON(!is_bad_inode(dent_inode)); | ||
287 | BUG_ON(!is_bad_inode(real_dent->d_inode)); | ||
288 | } | ||
289 | /* | ||
290 | * Already have the inode and the dentry attached, decrement | ||
291 | * the reference count to balance the ntfs_iget() we did | ||
292 | * earlier on. We found the dentry using d_lookup() so it | ||
293 | * cannot be disconnected and thus we do not need to worry | ||
294 | * about any NFS/disconnectedness issues here. | ||
295 | */ | ||
296 | iput(dent_inode); | ||
297 | ntfs_debug("Done. (Already had inode and dentry.)"); | ||
298 | return real_dent; | ||
299 | } | ||
300 | /* | ||
301 | * Negative dentry: instantiate it unless the inode is a directory and | ||
302 | * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), | ||
303 | * in which case d_move() that in place of the found dentry. | ||
304 | */ | ||
305 | if (!S_ISDIR(dent_inode->i_mode)) { | ||
306 | /* Not a directory; everything is easy. */ | ||
307 | d_instantiate(real_dent, dent_inode); | ||
308 | ntfs_debug("Done. (Already had negative file dentry.)"); | ||
309 | return real_dent; | ||
310 | } | ||
311 | spin_lock(&dcache_lock); | ||
312 | if (list_empty(&dent_inode->i_dentry)) { | ||
313 | /* | ||
314 | * Directory without a 'disconnected' dentry; we need to do | ||
315 | * d_instantiate() by hand because it takes dcache_lock which | ||
316 | * we already hold. | ||
317 | */ | ||
318 | list_add(&real_dent->d_alias, &dent_inode->i_dentry); | ||
319 | real_dent->d_inode = dent_inode; | ||
320 | spin_unlock(&dcache_lock); | ||
321 | security_d_instantiate(real_dent, dent_inode); | ||
322 | ntfs_debug("Done. (Already had negative directory dentry.)"); | ||
323 | return real_dent; | ||
324 | } | ||
325 | /* | ||
326 | * Directory with a 'disconnected' dentry; get a reference to the | ||
327 | * 'disconnected' dentry. | ||
328 | */ | ||
329 | new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, | ||
330 | d_alias); | ||
331 | dget_locked(new_dent); | ||
332 | spin_unlock(&dcache_lock); | ||
333 | /* Do security vodoo. */ | ||
334 | security_d_instantiate(real_dent, dent_inode); | ||
335 | /* Move new_dent in place of real_dent. */ | ||
336 | d_move(new_dent, real_dent); | ||
337 | /* Balance the ntfs_iget() we did above. */ | ||
338 | iput(dent_inode); | ||
339 | /* Throw away real_dent. */ | ||
340 | dput(real_dent); | ||
341 | /* Use new_dent as the actual dentry. */ | ||
342 | ntfs_debug("Done. (Already had negative, disconnected directory " | ||
343 | "dentry.)"); | ||
344 | return new_dent; | ||
345 | 260 | ||
346 | eio_err_out: | 261 | eio_err_out: |
347 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); | 262 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
@@ -113,7 +113,7 @@ typedef struct { | |||
113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
117 | */ | 117 | */ |
118 | enum { | 118 | enum { |
119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
@@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
149 | */ | 149 | */ |
150 | enum { | 150 | enum { |
151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
@@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) | |||
138 | " message id: %d\n" | 138 | " message id: %d\n" |
139 | " message type: %u\n" | 139 | " message type: %u\n" |
140 | " message key: 0x%08x\n" | 140 | " message key: 0x%08x\n" |
141 | " sock acquiry: %lu.%lu\n" | 141 | " sock acquiry: %lu.%ld\n" |
142 | " send start: %lu.%lu\n" | 142 | " send start: %lu.%ld\n" |
143 | " wait start: %lu.%lu\n", | 143 | " wait start: %lu.%ld\n", |
144 | nst, (unsigned long)nst->st_task->pid, | 144 | nst, (unsigned long)nst->st_task->pid, |
145 | (unsigned long)nst->st_task->tgid, | 145 | (unsigned long)nst->st_task->tgid, |
146 | nst->st_task->comm, nst->st_node, | 146 | nst->st_task->comm, nst->st_node, |
147 | nst->st_sc, nst->st_id, nst->st_msg_type, | 147 | nst->st_sc, nst->st_id, nst->st_msg_type, |
148 | nst->st_msg_key, | 148 | nst->st_msg_key, |
149 | nst->st_sock_time.tv_sec, | 149 | nst->st_sock_time.tv_sec, |
150 | (unsigned long)nst->st_sock_time.tv_usec, | 150 | (long)nst->st_sock_time.tv_usec, |
151 | nst->st_send_time.tv_sec, | 151 | nst->st_send_time.tv_sec, |
152 | (unsigned long)nst->st_send_time.tv_usec, | 152 | (long)nst->st_send_time.tv_usec, |
153 | nst->st_status_time.tv_sec, | 153 | nst->st_status_time.tv_sec, |
154 | nst->st_status_time.tv_usec); | 154 | (long)nst->st_status_time.tv_usec); |
155 | } | 155 | } |
156 | 156 | ||
157 | spin_unlock(&o2net_debug_lock); | 157 | spin_unlock(&o2net_debug_lock); |
@@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
276 | return sc; /* unused, just needs to be null when done */ | 276 | return sc; /* unused, just needs to be null when done */ |
277 | } | 277 | } |
278 | 278 | ||
279 | #define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec | 279 | #define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec |
280 | 280 | ||
281 | static int sc_seq_show(struct seq_file *seq, void *v) | 281 | static int sc_seq_show(struct seq_file *seq, void *v) |
282 | { | 282 | { |
@@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) | |||
309 | " remote node: %s\n" | 309 | " remote node: %s\n" |
310 | " page off: %zu\n" | 310 | " page off: %zu\n" |
311 | " handshake ok: %u\n" | 311 | " handshake ok: %u\n" |
312 | " timer: %lu.%lu\n" | 312 | " timer: %lu.%ld\n" |
313 | " data ready: %lu.%lu\n" | 313 | " data ready: %lu.%ld\n" |
314 | " advance start: %lu.%lu\n" | 314 | " advance start: %lu.%ld\n" |
315 | " advance stop: %lu.%lu\n" | 315 | " advance stop: %lu.%ld\n" |
316 | " func start: %lu.%lu\n" | 316 | " func start: %lu.%ld\n" |
317 | " func stop: %lu.%lu\n" | 317 | " func stop: %lu.%ld\n" |
318 | " func key: %u\n" | 318 | " func key: %u\n" |
319 | " func type: %u\n", | 319 | " func type: %u\n", |
320 | sc, | 320 | sc, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | |||
143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
144 | 144 | ||
145 | #ifdef CONFIG_DEBUG_FS | 145 | #ifdef CONFIG_DEBUG_FS |
146 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 146 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
147 | u32 msgkey, struct task_struct *task, u8 node) | 147 | u32 msgkey, struct task_struct *task, u8 node) |
148 | { | 148 | { |
149 | INIT_LIST_HEAD(&nst->st_net_debug_item); | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
150 | nst->st_task = task; | 150 | nst->st_task = task; |
@@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | |||
153 | nst->st_node = node; | 153 | nst->st_node = node; |
154 | } | 154 | } |
155 | 155 | ||
156 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | 156 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) |
157 | { | 157 | { |
158 | do_gettimeofday(&nst->st_sock_time); | 158 | do_gettimeofday(&nst->st_sock_time); |
159 | } | 159 | } |
160 | 160 | ||
161 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | 161 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) |
162 | { | 162 | { |
163 | do_gettimeofday(&nst->st_send_time); | 163 | do_gettimeofday(&nst->st_send_time); |
164 | } | 164 | } |
165 | 165 | ||
166 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | 166 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) |
167 | { | 167 | { |
168 | do_gettimeofday(&nst->st_status_time); | 168 | do_gettimeofday(&nst->st_status_time); |
169 | } | 169 | } |
170 | 170 | ||
171 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 171 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, |
172 | struct o2net_sock_container *sc) | 172 | struct o2net_sock_container *sc) |
173 | { | 173 | { |
174 | nst->st_sc = sc; | 174 | nst->st_sc = sc; |
175 | } | 175 | } |
176 | 176 | ||
177 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | 177 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) |
178 | { | 178 | { |
179 | nst->st_id = msg_id; | 179 | nst->st_id = msg_id; |
180 | } | 180 | } |
181 | |||
182 | #else /* CONFIG_DEBUG_FS */ | ||
183 | |||
184 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
185 | u32 msgkey, struct task_struct *task, u8 node) | ||
186 | { | ||
187 | } | ||
188 | |||
189 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
190 | { | ||
191 | } | ||
192 | |||
193 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
194 | { | ||
195 | } | ||
196 | |||
197 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
198 | { | ||
199 | } | ||
200 | |||
201 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
202 | struct o2net_sock_container *sc) | ||
203 | { | ||
204 | } | ||
205 | |||
206 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
207 | u32 msg_id) | ||
208 | { | ||
209 | } | ||
210 | |||
181 | #endif /* CONFIG_DEBUG_FS */ | 211 | #endif /* CONFIG_DEBUG_FS */ |
182 | 212 | ||
183 | static inline int o2net_reconnect_delay(void) | 213 | static inline int o2net_reconnect_delay(void) |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
@@ -224,42 +224,10 @@ struct o2net_send_tracking { | |||
224 | struct timeval st_send_time; | 224 | struct timeval st_send_time; |
225 | struct timeval st_status_time; | 225 | struct timeval st_status_time; |
226 | }; | 226 | }; |
227 | |||
228 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
229 | u32 msgkey, struct task_struct *task, u8 node); | ||
230 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); | ||
231 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst); | ||
232 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst); | ||
233 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
234 | struct o2net_sock_container *sc); | ||
235 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); | ||
236 | |||
237 | #else | 227 | #else |
238 | struct o2net_send_tracking { | 228 | struct o2net_send_tracking { |
239 | u32 dummy; | 229 | u32 dummy; |
240 | }; | 230 | }; |
241 | |||
242 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
243 | u32 msgkey, struct task_struct *task, u8 node) | ||
244 | { | ||
245 | } | ||
246 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
247 | { | ||
248 | } | ||
249 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
250 | { | ||
251 | } | ||
252 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
253 | { | ||
254 | } | ||
255 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
256 | struct o2net_sock_container *sc) | ||
257 | { | ||
258 | } | ||
259 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
260 | u32 msg_id) | ||
261 | { | ||
262 | } | ||
263 | #endif /* CONFIG_DEBUG_FS */ | 231 | #endif /* CONFIG_DEBUG_FS */ |
264 | 232 | ||
265 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..9cce563fd627 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1300 | di->i_size = cpu_to_le64(sb->s_blocksize); | 1300 | di->i_size = cpu_to_le64(sb->s_blocksize); |
1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
1303 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1304 | 1303 | ||
1305 | /* | 1304 | /* |
1306 | * This should never fail as our extent list is empty and all | 1305 | * This should never fail as our extent list is empty and all |
@@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1310 | NULL); | 1309 | NULL); |
1311 | if (ret) { | 1310 | if (ret) { |
1312 | mlog_errno(ret); | 1311 | mlog_errno(ret); |
1313 | goto out; | 1312 | goto out_commit; |
1314 | } | 1313 | } |
1315 | 1314 | ||
1315 | /* | ||
1316 | * Set i_blocks after the extent insert for the most up to | ||
1317 | * date ip_clusters value. | ||
1318 | */ | ||
1319 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
1320 | |||
1316 | ret = ocfs2_journal_dirty(handle, di_bh); | 1321 | ret = ocfs2_journal_dirty(handle, di_bh); |
1317 | if (ret) { | 1322 | if (ret) { |
1318 | mlog_errno(ret); | 1323 | mlog_errno(ret); |
@@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
1336 | len, 0, NULL); | 1341 | len, 0, NULL); |
1337 | if (ret) { | 1342 | if (ret) { |
1338 | mlog_errno(ret); | 1343 | mlog_errno(ret); |
1339 | goto out; | 1344 | goto out_commit; |
1340 | } | 1345 | } |
1341 | } | 1346 | } |
1342 | 1347 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..c47bc2a809c2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1418 | { | 1418 | { |
1419 | unsigned int node_num; | 1419 | unsigned int node_num; |
1420 | int status, i; | 1420 | int status, i; |
1421 | u32 gen; | ||
1421 | struct buffer_head *bh = NULL; | 1422 | struct buffer_head *bh = NULL; |
1422 | struct ocfs2_dinode *di; | 1423 | struct ocfs2_dinode *di; |
1423 | 1424 | ||
1424 | /* This is called with the super block cluster lock, so we | 1425 | /* This is called with the super block cluster lock, so we |
1425 | * know that the slot map can't change underneath us. */ | 1426 | * know that the slot map can't change underneath us. */ |
1426 | 1427 | ||
1427 | spin_lock(&osb->osb_lock); | ||
1428 | for (i = 0; i < osb->max_slots; i++) { | 1428 | for (i = 0; i < osb->max_slots; i++) { |
1429 | /* Read journal inode to get the recovery generation */ | 1429 | /* Read journal inode to get the recovery generation */ |
1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); | 1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); |
@@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1433 | goto bail; | 1433 | goto bail; |
1434 | } | 1434 | } |
1435 | di = (struct ocfs2_dinode *)bh->b_data; | 1435 | di = (struct ocfs2_dinode *)bh->b_data; |
1436 | osb->slot_recovery_generations[i] = | 1436 | gen = ocfs2_get_recovery_generation(di); |
1437 | ocfs2_get_recovery_generation(di); | ||
1438 | brelse(bh); | 1437 | brelse(bh); |
1439 | bh = NULL; | 1438 | bh = NULL; |
1440 | 1439 | ||
1440 | spin_lock(&osb->osb_lock); | ||
1441 | osb->slot_recovery_generations[i] = gen; | ||
1442 | |||
1441 | mlog(0, "Slot %u recovery generation is %u\n", i, | 1443 | mlog(0, "Slot %u recovery generation is %u\n", i, |
1442 | osb->slot_recovery_generations[i]); | 1444 | osb->slot_recovery_generations[i]); |
1443 | 1445 | ||
1444 | if (i == osb->slot_num) | 1446 | if (i == osb->slot_num) { |
1447 | spin_unlock(&osb->osb_lock); | ||
1445 | continue; | 1448 | continue; |
1449 | } | ||
1446 | 1450 | ||
1447 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | 1451 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); |
1448 | if (status == -ENOENT) | 1452 | if (status == -ENOENT) { |
1453 | spin_unlock(&osb->osb_lock); | ||
1449 | continue; | 1454 | continue; |
1455 | } | ||
1450 | 1456 | ||
1451 | if (__ocfs2_recovery_map_test(osb, node_num)) | 1457 | if (__ocfs2_recovery_map_test(osb, node_num)) { |
1458 | spin_unlock(&osb->osb_lock); | ||
1452 | continue; | 1459 | continue; |
1460 | } | ||
1453 | spin_unlock(&osb->osb_lock); | 1461 | spin_unlock(&osb->osb_lock); |
1454 | 1462 | ||
1455 | /* Ok, we have a slot occupied by another node which | 1463 | /* Ok, we have a slot occupied by another node which |
@@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
1465 | mlog_errno(status); | 1473 | mlog_errno(status); |
1466 | goto bail; | 1474 | goto bail; |
1467 | } | 1475 | } |
1468 | |||
1469 | spin_lock(&osb->osb_lock); | ||
1470 | } | 1476 | } |
1471 | spin_unlock(&osb->osb_lock); | ||
1472 | 1477 | ||
1473 | status = 0; | 1478 | status = 0; |
1474 | bail: | 1479 | bail: |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..07f348b8d721 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, | |||
97 | goto out; | 97 | goto out; |
98 | } | 98 | } |
99 | 99 | ||
100 | /* Ok, the stack is pinned */ | ||
101 | p->sp_count++; | ||
102 | active_stack = p; | 100 | active_stack = p; |
103 | |||
104 | rc = 0; | 101 | rc = 0; |
105 | 102 | ||
106 | out: | 103 | out: |
104 | /* If we found it, pin it */ | ||
105 | if (!rc) | ||
106 | active_stack->sp_count++; | ||
107 | |||
107 | spin_unlock(&ocfs2_stack_lock); | 108 | spin_unlock(&ocfs2_stack_lock); |
108 | return rc; | 109 | return rc; |
109 | } | 110 | } |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..71c9be59c9c2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
337 | return 0; | 337 | return 0; |
338 | } | 338 | } |
339 | 339 | ||
340 | /* | ||
341 | * Use precise platform statistics if available: | ||
342 | */ | ||
343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
344 | static cputime_t task_utime(struct task_struct *p) | ||
345 | { | ||
346 | return p->utime; | ||
347 | } | ||
348 | |||
349 | static cputime_t task_stime(struct task_struct *p) | ||
350 | { | ||
351 | return p->stime; | ||
352 | } | ||
353 | #else | ||
354 | static cputime_t task_utime(struct task_struct *p) | ||
355 | { | ||
356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
357 | total = utime + cputime_to_clock_t(p->stime); | ||
358 | u64 temp; | ||
359 | |||
360 | /* | ||
361 | * Use CFS's precise accounting: | ||
362 | */ | ||
363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
364 | |||
365 | if (total) { | ||
366 | temp *= utime; | ||
367 | do_div(temp, total); | ||
368 | } | ||
369 | utime = (clock_t)temp; | ||
370 | |||
371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
372 | return p->prev_utime; | ||
373 | } | ||
374 | |||
375 | static cputime_t task_stime(struct task_struct *p) | ||
376 | { | ||
377 | clock_t stime; | ||
378 | |||
379 | /* | ||
380 | * Use CFS's precise accounting. (we subtract utime from | ||
381 | * the total, to make sure the total observed by userspace | ||
382 | * grows monotonically - apps rely on that): | ||
383 | */ | ||
384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
385 | cputime_to_clock_t(task_utime(p)); | ||
386 | |||
387 | if (stime >= 0) | ||
388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
389 | |||
390 | return p->prev_stime; | ||
391 | } | ||
392 | #endif | ||
393 | |||
394 | static cputime_t task_gtime(struct task_struct *p) | ||
395 | { | ||
396 | return p->gtime; | ||
397 | } | ||
398 | |||
399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 340 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
400 | struct pid *pid, struct task_struct *task, int whole) | 341 | struct pid *pid, struct task_struct *task, int whole) |
401 | { | 342 | { |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..bca0f81eb687 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
@@ -330,6 +330,7 @@ retry: | |||
330 | spin_lock(&proc_inum_lock); | 330 | spin_lock(&proc_inum_lock); |
331 | ida_remove(&proc_inum_ida, i); | 331 | ida_remove(&proc_inum_ida, i); |
332 | spin_unlock(&proc_inum_lock); | 332 | spin_unlock(&proc_inum_lock); |
333 | return 0; | ||
333 | } | 334 | } |
334 | return PROC_DYNAMIC_FIRST + i; | 335 | return PROC_DYNAMIC_FIRST + i; |
335 | } | 336 | } |
diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 79ecd281d2cb..3f87d2632947 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c | |||
@@ -52,14 +52,14 @@ int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) | |||
52 | } | 52 | } |
53 | 53 | ||
54 | seq_printf(m, | 54 | seq_printf(m, |
55 | "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 55 | "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
56 | vma->vm_start, | 56 | vma->vm_start, |
57 | vma->vm_end, | 57 | vma->vm_end, |
58 | flags & VM_READ ? 'r' : '-', | 58 | flags & VM_READ ? 'r' : '-', |
59 | flags & VM_WRITE ? 'w' : '-', | 59 | flags & VM_WRITE ? 'w' : '-', |
60 | flags & VM_EXEC ? 'x' : '-', | 60 | flags & VM_EXEC ? 'x' : '-', |
61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', | 61 | flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', |
62 | vma->vm_pgoff << PAGE_SHIFT, | 62 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
63 | MAJOR(dev), MINOR(dev), ino, &len); | 63 | MAJOR(dev), MINOR(dev), ino, &len); |
64 | 64 | ||
65 | if (file) { | 65 | if (file) { |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..00f10a2dcf12 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
27 | #include <linux/quicklist.h> | ||
27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
@@ -189,7 +190,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
189 | "Committed_AS: %8lu kB\n" | 190 | "Committed_AS: %8lu kB\n" |
190 | "VmallocTotal: %8lu kB\n" | 191 | "VmallocTotal: %8lu kB\n" |
191 | "VmallocUsed: %8lu kB\n" | 192 | "VmallocUsed: %8lu kB\n" |
192 | "VmallocChunk: %8lu kB\n", | 193 | "VmallocChunk: %8lu kB\n" |
194 | "Quicklists: %8lu kB\n", | ||
193 | K(i.totalram), | 195 | K(i.totalram), |
194 | K(i.freeram), | 196 | K(i.freeram), |
195 | K(i.bufferram), | 197 | K(i.bufferram), |
@@ -221,7 +223,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
221 | K(committed), | 223 | K(committed), |
222 | (unsigned long)VMALLOC_TOTAL >> 10, | 224 | (unsigned long)VMALLOC_TOTAL >> 10, |
223 | vmi.used >> 10, | 225 | vmi.used >> 10, |
224 | vmi.largest_chunk >> 10 | 226 | vmi.largest_chunk >> 10, |
227 | K(quicklist_total_size()) | ||
225 | ); | 228 | ); |
226 | 229 | ||
227 | len += hugetlb_report_meminfo(page + len); | 230 | len += hugetlb_report_meminfo(page + len); |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 7546a918f790..73d1891ee625 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -219,14 +219,14 @@ static int show_map(struct seq_file *m, void *v) | |||
219 | ino = inode->i_ino; | 219 | ino = inode->i_ino; |
220 | } | 220 | } |
221 | 221 | ||
222 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", | 222 | seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", |
223 | vma->vm_start, | 223 | vma->vm_start, |
224 | vma->vm_end, | 224 | vma->vm_end, |
225 | flags & VM_READ ? 'r' : '-', | 225 | flags & VM_READ ? 'r' : '-', |
226 | flags & VM_WRITE ? 'w' : '-', | 226 | flags & VM_WRITE ? 'w' : '-', |
227 | flags & VM_EXEC ? 'x' : '-', | 227 | flags & VM_EXEC ? 'x' : '-', |
228 | flags & VM_MAYSHARE ? 's' : 'p', | 228 | flags & VM_MAYSHARE ? 's' : 'p', |
229 | vma->vm_pgoff << PAGE_SHIFT, | 229 | ((loff_t)vma->vm_pgoff) << PAGE_SHIFT, |
230 | MAJOR(dev), MINOR(dev), ino, &len); | 230 | MAJOR(dev), MINOR(dev), ino, &len); |
231 | 231 | ||
232 | /* | 232 | /* |
diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
@@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset | |||
80 | if (buf->result) | 80 | if (buf->result) |
81 | return -EINVAL; | 81 | return -EINVAL; |
82 | d_ino = ino; | 82 | d_ino = ino; |
83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
84 | buf->result = -EOVERFLOW; | ||
84 | return -EOVERFLOW; | 85 | return -EOVERFLOW; |
86 | } | ||
85 | buf->result++; | 87 | buf->result++; |
86 | dirent = buf->dirent; | 88 | dirent = buf->dirent; |
87 | if (!access_ok(VERIFY_WRITE, dirent, | 89 | if (!access_ok(VERIFY_WRITE, dirent, |
@@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | |||
155 | if (reclen > buf->count) | 157 | if (reclen > buf->count) |
156 | return -EINVAL; | 158 | return -EINVAL; |
157 | d_ino = ino; | 159 | d_ino = ino; |
158 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 160 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
161 | buf->error = -EOVERFLOW; | ||
159 | return -EOVERFLOW; | 162 | return -EOVERFLOW; |
163 | } | ||
160 | dirent = buf->previous; | 164 | dirent = buf->previous; |
161 | if (dirent) { | 165 | if (dirent) { |
162 | if (__put_user(offset, &dirent->d_off)) | 166 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 5d54205e486b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
@@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
108 | goto Done; | 108 | goto Done; |
109 | } | 109 | } |
110 | /* we need at least one record in buffer */ | 110 | /* we need at least one record in buffer */ |
111 | pos = m->index; | ||
112 | p = m->op->start(m, &pos); | ||
111 | while (1) { | 113 | while (1) { |
112 | pos = m->index; | ||
113 | p = m->op->start(m, &pos); | ||
114 | err = PTR_ERR(p); | 114 | err = PTR_ERR(p); |
115 | if (!p || IS_ERR(p)) | 115 | if (!p || IS_ERR(p)) |
116 | break; | 116 | break; |
@@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
119 | break; | 119 | break; |
120 | if (unlikely(err)) | 120 | if (unlikely(err)) |
121 | m->count = 0; | 121 | m->count = 0; |
122 | if (unlikely(!m->count)) { | ||
123 | p = m->op->next(m, p, &pos); | ||
124 | m->index = pos; | ||
125 | continue; | ||
126 | } | ||
122 | if (m->count < m->size) | 127 | if (m->count < m->size) |
123 | goto Fill; | 128 | goto Fill; |
124 | m->op->stop(m, p); | 129 | m->op->stop(m, p); |
@@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
128 | goto Enomem; | 133 | goto Enomem; |
129 | m->count = 0; | 134 | m->count = 0; |
130 | m->version = 0; | 135 | m->version = 0; |
136 | pos = m->index; | ||
137 | p = m->op->start(m, &pos); | ||
131 | } | 138 | } |
132 | m->op->stop(m, p); | 139 | m->op->stop(m, p); |
133 | m->count = 0; | 140 | m->count = 0; |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
@@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
302 | int subtract_lebs; | 302 | int subtract_lebs; |
303 | long long available; | 303 | long long available; |
304 | 304 | ||
305 | /* | ||
306 | * Force the amount available to the total size reported if the used | ||
307 | * space is zero. | ||
308 | */ | ||
309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
311 | /* Do the same calculation as for c->block_cnt */ | ||
312 | available = c->main_lebs - 2; | ||
313 | available *= c->leb_size - c->dark_wm; | ||
314 | return available; | ||
315 | } | ||
316 | |||
317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
318 | 306 | ||
319 | /* | 307 | /* |
@@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
714 | } | 702 | } |
715 | 703 | ||
716 | /** | 704 | /** |
717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
706 | * @c: the UBIFS file-system description object | ||
707 | * @free: amount of free space | ||
708 | * | ||
709 | * This function calculates amount of free space which will be reported to | ||
710 | * user-space. User-space application tend to expect that if the file-system | ||
711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
713 | * node and it has to write indexind nodes as well. This introduces additional | ||
714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
715 | * above expectetion. | ||
716 | * | ||
717 | * This function assumes free space is made up of uncompressed data nodes and | ||
718 | * full index nodes (one per data node, tripled because we always allow enough | ||
719 | * space to write the index thrice). | ||
720 | * | ||
721 | * Note, the calculation is pessimistic, which means that most of the time | ||
722 | * UBIFS reports less space than it actually has. | ||
723 | */ | ||
724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
725 | { | ||
726 | int divisor, factor, f; | ||
727 | |||
728 | /* | ||
729 | * Reported space size is @free * X, where X is UBIFS block size | ||
730 | * divided by UBIFS block size + all overhead one data block | ||
731 | * introduces. The overhead is the node header + indexing overhead. | ||
732 | * | ||
733 | * Indexing overhead calculations are based on the following formula: | ||
734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
736 | * as less than maximum fanout, we assume that each data node | ||
737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
739 | * for the index. | ||
740 | */ | ||
741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
742 | factor = UBIFS_BLOCK_SIZE; | ||
743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
745 | free *= factor; | ||
746 | do_div(free, divisor); | ||
747 | return free; | ||
748 | } | ||
749 | |||
750 | /** | ||
751 | * ubifs_get_free_space - return amount of free space. | ||
718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
719 | * | 753 | * |
720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
755 | * | ||
756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
758 | * amount of free flash space it has (well, because not all dirty space is | ||
759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
760 | * it would bread user expectetion about what free space is. Users seem to | ||
761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
721 | */ | 765 | */ |
722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
723 | { | 767 | { |
724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
726 | 770 | ||
727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
730 | 774 | ||
731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
733 | else | 777 | * space is zero. |
734 | rsvd_idx_lebs = 0; | 778 | */ |
735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
737 | - c->lst.taken_empty_lebs) { | ||
738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
740 | } | 782 | } |
741 | 783 | ||
742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
788 | * possible to write a file of free space size. This means that for | ||
789 | * empty LEBs we may use more precise calculations than | ||
790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
792 | * Thus, amend the available space. | ||
793 | * | ||
794 | * Note, the calculations below are similar to what we have in | ||
795 | * 'do_budget_space()', so refer there for comments. | ||
796 | */ | ||
797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
799 | else | ||
800 | rsvd_idx_lebs = 0; | ||
801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
802 | c->lst.taken_empty_lebs; | ||
803 | lebs -= rsvd_idx_lebs; | ||
804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
746 | 806 | ||
747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..2b267c9a1806 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
@@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
587 | if (err) { | 587 | if (err) { |
588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
589 | return err; | 589 | return err; |
590 | err = 0; | ||
591 | budgeted = 0; | 590 | budgeted = 0; |
592 | } | 591 | } |
593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
793 | int err; | 793 | int err; |
794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
798 | 798 | ||
799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
@@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
814 | if (err) | 814 | if (err) { |
815 | return err; | 815 | /* |
816 | * Treat truncations to zero as deletion and always allow them, | ||
817 | * just like we do for '->unlink()'. | ||
818 | */ | ||
819 | if (new_size || err != -ENOSPC) | ||
820 | return err; | ||
821 | budgeted = 0; | ||
822 | } | ||
816 | 823 | ||
817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
818 | if (err) | 825 | if (err) |
@@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
871 | out_budg: | 878 | out_budg: |
872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
880 | ubifs_release_budget(c, &req); | ||
881 | else { | ||
882 | c->nospace = c->nospace_rp = 0; | ||
883 | smp_wmb(); | ||
884 | } | ||
873 | return err; | 885 | return err; |
874 | } | 886 | } |
875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..e045c8b55423 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
@@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
213 | * | 213 | * |
214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
216 | * by this function; | ||
217 | * | ||
218 | * Returns zero and the LEB properties of | ||
219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
220 | * negative error code in case of other failures. The returned LEB is marked as | ||
221 | * "taken". | ||
222 | * | 216 | * |
223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
@@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
231 | * | 225 | * |
232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
228 | * | ||
229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
231 | * case of other failures. The returned LEB is marked as "taken". | ||
234 | */ | 232 | */ |
235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
@@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
246 | 244 | ||
247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
250 | 248 | ||
251 | /* | 249 | /* |
@@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
317 | lp = idx_lp; | 315 | lp = idx_lp; |
318 | 316 | ||
319 | if (lp) { | 317 | if (lp) { |
320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
321 | goto found; | 319 | goto found; |
322 | } | 320 | } |
323 | 321 | ||
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..13f1019c859f 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
@@ -344,6 +344,12 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
344 | if (err) | 344 | if (err) |
345 | goto out; | 345 | goto out; |
346 | 346 | ||
347 | /* Allow for races with TNC */ | ||
348 | c->gced_lnum = lnum; | ||
349 | smp_wmb(); | ||
350 | c->gc_seq += 1; | ||
351 | smp_wmb(); | ||
352 | |||
347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
349 | err = LEB_RETAINED; | 355 | err = LEB_RETAINED; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
@@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
284 | } | 284 | } |
285 | 285 | ||
286 | /** | 286 | /** |
287 | * ubifs_reported_space - calculate reported free space. | ||
288 | * @c: the UBIFS file-system description object | ||
289 | * @free: amount of free space | ||
290 | * | ||
291 | * This function calculates amount of free space which will be reported to | ||
292 | * user-space. User-space application tend to expect that if the file-system | ||
293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
295 | * node and it has to write indexind nodes as well. This introduces additional | ||
296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
297 | * above expectetion. | ||
298 | * | ||
299 | * This function assumes free space is made up of uncompressed data nodes and | ||
300 | * full index nodes (one per data node, doubled because we always allow enough | ||
301 | * space to write the index twice). | ||
302 | * | ||
303 | * Note, the calculation is pessimistic, which means that most of the time | ||
304 | * UBIFS reports less space than it actually has. | ||
305 | */ | ||
306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
307 | uint64_t free) | ||
308 | { | ||
309 | int divisor, factor; | ||
310 | |||
311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
313 | do_div(free, divisor); | ||
314 | |||
315 | return free * factor; | ||
316 | } | ||
317 | |||
318 | /** | ||
319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
320 | * @inode: inode | 288 | * @inode: inode |
321 | */ | 289 | */ |
@@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
326 | } | 294 | } |
327 | 295 | ||
296 | /** | ||
297 | * ubifs_tnc_lookup - look up a file-system node. | ||
298 | * @c: UBIFS file-system description object | ||
299 | * @key: node key to lookup | ||
300 | * @node: the node is returned here | ||
301 | * | ||
302 | * This function look up and reads node with key @key. The caller has to make | ||
303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
305 | * case of failure. | ||
306 | */ | ||
307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
308 | const union ubifs_key *key, void *node) | ||
309 | { | ||
310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
311 | } | ||
312 | |||
328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..7562464ac83f 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
370 | { | 370 | { |
371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
372 | unsigned long long free; | 372 | unsigned long long free; |
373 | __le32 *uuid = (__le32 *)c->uuid; | ||
373 | 374 | ||
374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
377 | 378 | ||
@@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
390 | return 0; | 392 | return 0; |
391 | } | 393 | } |
392 | 394 | ||
@@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
532 | 534 | ||
535 | /* | ||
536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
537 | * fully filled with data nodes of maximum size. This is used in | ||
538 | * calculations when reporting free space. | ||
539 | */ | ||
540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
533 | return 0; | 541 | return 0; |
534 | } | 542 | } |
535 | 543 | ||
@@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
649 | * | 657 | * |
650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
652 | * | ||
653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
654 | */ | 660 | */ |
655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
659 | 665 | ||
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7da209ab9378 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
@@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
507 | ret = 0; | 507 | ret = 0; |
508 | } | 508 | } |
509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
512 | return ret; | 512 | return ret; |
@@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
1382 | } | 1382 | } |
1383 | 1383 | ||
1384 | /** | 1384 | /** |
1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
1389 | * | 1389 | * |
1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
1393 | * case of failure. | ||
1394 | */ | 1393 | */ |
1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
1396 | void *node) | ||
1397 | { | 1395 | { |
1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
1399 | struct ubifs_znode *znode; | ||
1400 | struct ubifs_zbranch zbr, *zt; | ||
1401 | 1397 | ||
1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
1407 | } else if (found < 0) { | 1403 | return 0; |
1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
1410 | } | 1406 | return 1; |
1411 | zt = &znode->zbranch[n]; | 1407 | /* |
1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
1418 | goto out; | 1414 | /* Finally we can check lnum */ |
1419 | } | 1415 | if (gced_lnum == lnum) |
1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
1422 | |||
1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1424 | return err; | ||
1425 | |||
1426 | out: | ||
1427 | mutex_unlock(&c->tnc_mutex); | ||
1428 | return err; | ||
1429 | } | 1418 | } |
1430 | 1419 | ||
1431 | /** | 1420 | /** |
@@ -1436,16 +1425,19 @@ out: | |||
1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
1438 | * | 1427 | * |
1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
1441 | */ | 1432 | */ |
1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
1444 | { | 1435 | { |
1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
1448 | 1439 | ||
1440 | again: | ||
1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
1451 | if (!found) { | 1443 | if (!found) { |
@@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
1456 | goto out; | 1448 | goto out; |
1457 | } | 1449 | } |
1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
1451 | if (lnum) { | ||
1452 | *lnum = zt->lnum; | ||
1453 | *offs = zt->offs; | ||
1454 | } | ||
1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
1460 | /* | 1456 | /* |
1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
1463 | */ | 1459 | */ |
1464 | *lnum = zt->lnum; | ||
1465 | *offs = zt->offs; | ||
1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
1467 | goto out; | 1461 | goto out; |
1468 | } | 1462 | } |
1463 | if (safely) { | ||
1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
1465 | goto out; | ||
1466 | } | ||
1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
1469 | gc_seq1 = c->gc_seq; | ||
1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
1471 | 1471 | ||
1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
1475 | return err; | ||
1476 | } | ||
1474 | 1477 | ||
1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
1476 | return err; | 1479 | if (maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
1480 | /* | ||
1481 | * The node may have been GC'ed out from under us so try again | ||
1482 | * while keeping the TNC mutex locked. | ||
1483 | */ | ||
1484 | safely = 1; | ||
1485 | goto again; | ||
1486 | } | ||
1487 | return 0; | ||
1477 | 1488 | ||
1478 | out: | 1489 | out: |
1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
@@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1498 | { | 1509 | { |
1499 | int found, n, err; | 1510 | int found, n, err; |
1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
1501 | struct ubifs_zbranch zbr; | ||
1502 | 1512 | ||
1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
@@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
1522 | goto out_unlock; | 1532 | goto out_unlock; |
1523 | } | 1533 | } |
1524 | 1534 | ||
1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
1526 | mutex_unlock(&c->tnc_mutex); | ||
1527 | |||
1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
1529 | return err; | ||
1530 | 1536 | ||
1531 | out_unlock: | 1537 | out_unlock: |
1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
@@ -87,7 +87,7 @@ | |||
87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
88 | 88 | ||
89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
91 | 91 | ||
92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
@@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
998 | * | ||
999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
1000 | * data nodes of maximum size - used in free space reporting | ||
998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
@@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
1031 | * | 1036 | * |
1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
@@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
1232 | |||
1233 | int leb_overhead; | ||
1227 | int dead_wm; | 1234 | int dead_wm; |
1228 | int dark_wm; | 1235 | int dark_wm; |
1229 | int block_cnt; | 1236 | int block_cnt; |
@@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
1257 | void *sbuf; | 1264 | void *sbuf; |
1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
1267 | volatile int gc_seq; | ||
1268 | volatile int gced_lnum; | ||
1260 | 1269 | ||
1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
@@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
1441 | 1451 | ||
1442 | /* find.c */ | 1452 | /* find.c */ |
@@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
1455 | void *node); | ||
1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
@@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { | |||
475 | const struct file_operations xfs_dir_file_operations = { | 475 | const struct file_operations xfs_dir_file_operations = { |
476 | .read = generic_read_dir, | 476 | .read = generic_read_dir, |
477 | .readdir = xfs_file_readdir, | 477 | .readdir = xfs_file_readdir, |
478 | .llseek = generic_file_llseek, | ||
478 | .unlocked_ioctl = xfs_file_ioctl, | 479 | .unlocked_ioctl = xfs_file_ioctl, |
479 | #ifdef CONFIG_COMPAT | 480 | #ifdef CONFIG_COMPAT |
480 | .compat_ioctl = xfs_file_compat_ioctl, | 481 | .compat_ioctl = xfs_file_compat_ioctl, |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 91bcd979242c..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
@@ -355,7 +355,7 @@ xfs_vn_ci_lookup( | |||
355 | /* else case-insensitive match... */ | 355 | /* else case-insensitive match... */ |
356 | dname.name = ci_name.name; | 356 | dname.name = ci_name.name; |
357 | dname.len = ci_name.len; | 357 | dname.len = ci_name.len; |
358 | dentry = d_add_ci(VFS_I(ip), dentry, &dname); | 358 | dentry = d_add_ci(dentry, VFS_I(ip), &dname); |
359 | kmem_free(ci_name.name); | 359 | kmem_free(ci_name.name); |
360 | return dentry; | 360 | return dentry; |
361 | } | 361 | } |
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
@@ -18,7 +18,6 @@ | |||
18 | #ifndef __XFS_DMAPI_H__ | 18 | #ifndef __XFS_DMAPI_H__ |
19 | #define __XFS_DMAPI_H__ | 19 | #define __XFS_DMAPI_H__ |
20 | 20 | ||
21 | #include <linux/version.h> | ||
22 | /* Values used to define the on-disk version of dm_attrname_t. All | 21 | /* Values used to define the on-disk version of dm_attrname_t. All |
23 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". | 22 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". |
24 | * | 23 | * |