diff options
Diffstat (limited to 'fs')
84 files changed, 1160 insertions, 877 deletions
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index 88e3787c6ea9..e298fe194093 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
| @@ -119,6 +119,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
| 119 | 119 | ||
| 120 | const struct file_operations v9fs_dir_operations = { | 120 | const struct file_operations v9fs_dir_operations = { |
| 121 | .read = generic_read_dir, | 121 | .read = generic_read_dir, |
| 122 | .llseek = generic_file_llseek, | ||
| 122 | .readdir = v9fs_dir_readdir, | 123 | .readdir = v9fs_dir_readdir, |
| 123 | .open = v9fs_file_open, | 124 | .open = v9fs_file_open, |
| 124 | .release = v9fs_dir_release, | 125 | .release = v9fs_dir_release, |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index c95295c65045..e83aa5ebe861 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -626,8 +626,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, | |||
| 626 | return NULL; | 626 | return NULL; |
| 627 | 627 | ||
| 628 | error: | 628 | error: |
| 629 | if (fid) | 629 | p9_client_clunk(fid); |
| 630 | p9_client_clunk(fid); | ||
| 631 | 630 | ||
| 632 | return ERR_PTR(result); | 631 | return ERR_PTR(result); |
| 633 | } | 632 | } |
diff --git a/fs/Kconfig b/fs/Kconfig index d3873583360b..abccb5dab9a8 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -1930,6 +1930,16 @@ config CIFS_WEAK_PW_HASH | |||
| 1930 | 1930 | ||
| 1931 | If unsure, say N. | 1931 | If unsure, say N. |
| 1932 | 1932 | ||
| 1933 | config CIFS_UPCALL | ||
| 1934 | bool "Kerberos/SPNEGO advanced session setup" | ||
| 1935 | depends on CIFS && KEYS | ||
| 1936 | help | ||
| 1937 | Enables an upcall mechanism for CIFS which accesses | ||
| 1938 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
| 1939 | Kerberos tickets which are needed to mount to certain secure servers | ||
| 1940 | (for which more secure Kerberos authentication is required). If | ||
| 1941 | unsure, say N. | ||
| 1942 | |||
| 1933 | config CIFS_XATTR | 1943 | config CIFS_XATTR |
| 1934 | bool "CIFS extended attributes" | 1944 | bool "CIFS extended attributes" |
| 1935 | depends on CIFS | 1945 | depends on CIFS |
| @@ -1982,17 +1992,6 @@ config CIFS_EXPERIMENTAL | |||
| 1982 | (which is disabled by default). See the file fs/cifs/README | 1992 | (which is disabled by default). See the file fs/cifs/README |
| 1983 | for more details. If unsure, say N. | 1993 | for more details. If unsure, say N. |
| 1984 | 1994 | ||
| 1985 | config CIFS_UPCALL | ||
| 1986 | bool "Kerberos/SPNEGO advanced session setup (EXPERIMENTAL)" | ||
| 1987 | depends on CIFS_EXPERIMENTAL | ||
| 1988 | depends on KEYS | ||
| 1989 | help | ||
| 1990 | Enables an upcall mechanism for CIFS which accesses | ||
| 1991 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
| 1992 | Kerberos tickets which are needed to mount to certain secure servers | ||
| 1993 | (for which more secure Kerberos authentication is required). If | ||
| 1994 | unsure, say N. | ||
| 1995 | |||
| 1996 | config CIFS_DFS_UPCALL | 1995 | config CIFS_DFS_UPCALL |
| 1997 | bool "DFS feature support (EXPERIMENTAL)" | 1996 | bool "DFS feature support (EXPERIMENTAL)" |
| 1998 | depends on CIFS_EXPERIMENTAL | 1997 | depends on CIFS_EXPERIMENTAL |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index fc1a8dc64d78..85a30e929800 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
| @@ -197,6 +197,7 @@ out: | |||
| 197 | 197 | ||
| 198 | const struct file_operations adfs_dir_operations = { | 198 | const struct file_operations adfs_dir_operations = { |
| 199 | .read = generic_read_dir, | 199 | .read = generic_read_dir, |
| 200 | .llseek = generic_file_llseek, | ||
| 200 | .readdir = adfs_readdir, | 201 | .readdir = adfs_readdir, |
| 201 | .fsync = file_fsync, | 202 | .fsync = file_fsync, |
| 202 | }; | 203 | }; |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index 6e3f282424b0..7b36904dbeac 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
| @@ -19,6 +19,7 @@ static int affs_readdir(struct file *, void *, filldir_t); | |||
| 19 | 19 | ||
| 20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
| 21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
| 22 | .llseek = generic_file_llseek, | ||
| 22 | .readdir = affs_readdir, | 23 | .readdir = affs_readdir, |
| 23 | .fsync = file_fsync, | 24 | .fsync = file_fsync, |
| 24 | }; | 25 | }; |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index bcfb2dc0a61b..2a41c2a7fc52 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
| @@ -36,6 +36,7 @@ const struct file_operations autofs4_root_operations = { | |||
| 36 | .release = dcache_dir_close, | 36 | .release = dcache_dir_close, |
| 37 | .read = generic_read_dir, | 37 | .read = generic_read_dir, |
| 38 | .readdir = dcache_readdir, | 38 | .readdir = dcache_readdir, |
| 39 | .llseek = dcache_dir_lseek, | ||
| 39 | .ioctl = autofs4_root_ioctl, | 40 | .ioctl = autofs4_root_ioctl, |
| 40 | }; | 41 | }; |
| 41 | 42 | ||
| @@ -44,6 +45,7 @@ const struct file_operations autofs4_dir_operations = { | |||
| 44 | .release = dcache_dir_close, | 45 | .release = dcache_dir_close, |
| 45 | .read = generic_read_dir, | 46 | .read = generic_read_dir, |
| 46 | .readdir = dcache_readdir, | 47 | .readdir = dcache_readdir, |
| 48 | .llseek = dcache_dir_lseek, | ||
| 47 | }; | 49 | }; |
| 48 | 50 | ||
| 49 | const struct inode_operations autofs4_indirect_root_inode_operations = { | 51 | const struct inode_operations autofs4_indirect_root_inode_operations = { |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 02c6e62b72f8..740f53672a8a 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
| @@ -66,6 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
| 66 | static const struct file_operations befs_dir_operations = { | 66 | static const struct file_operations befs_dir_operations = { |
| 67 | .read = generic_read_dir, | 67 | .read = generic_read_dir, |
| 68 | .readdir = befs_readdir, | 68 | .readdir = befs_readdir, |
| 69 | .llseek = generic_file_llseek, | ||
| 69 | }; | 70 | }; |
| 70 | 71 | ||
| 71 | static const struct inode_operations befs_dir_inode_operations = { | 72 | static const struct inode_operations befs_dir_inode_operations = { |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 87ee5ccee348..ed8feb052df9 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
| @@ -125,8 +125,8 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, int mode, | |||
| 125 | inode->i_ino); | 125 | inode->i_ino); |
| 126 | if (err) { | 126 | if (err) { |
| 127 | inode_dec_link_count(inode); | 127 | inode_dec_link_count(inode); |
| 128 | iput(inode); | ||
| 129 | mutex_unlock(&info->bfs_lock); | 128 | mutex_unlock(&info->bfs_lock); |
| 129 | iput(inode); | ||
| 130 | return err; | 130 | return err; |
| 131 | } | 131 | } |
| 132 | mutex_unlock(&info->bfs_lock); | 132 | mutex_unlock(&info->bfs_lock); |
| @@ -469,20 +469,21 @@ static void bio_free_map_data(struct bio_map_data *bmd) | |||
| 469 | kfree(bmd); | 469 | kfree(bmd); |
| 470 | } | 470 | } |
| 471 | 471 | ||
| 472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | 472 | static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, |
| 473 | gfp_t gfp_mask) | ||
| 473 | { | 474 | { |
| 474 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), GFP_KERNEL); | 475 | struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask); |
| 475 | 476 | ||
| 476 | if (!bmd) | 477 | if (!bmd) |
| 477 | return NULL; | 478 | return NULL; |
| 478 | 479 | ||
| 479 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, GFP_KERNEL); | 480 | bmd->iovecs = kmalloc(sizeof(struct bio_vec) * nr_segs, gfp_mask); |
| 480 | if (!bmd->iovecs) { | 481 | if (!bmd->iovecs) { |
| 481 | kfree(bmd); | 482 | kfree(bmd); |
| 482 | return NULL; | 483 | return NULL; |
| 483 | } | 484 | } |
| 484 | 485 | ||
| 485 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, GFP_KERNEL); | 486 | bmd->sgvecs = kmalloc(sizeof(struct sg_iovec) * iov_count, gfp_mask); |
| 486 | if (bmd->sgvecs) | 487 | if (bmd->sgvecs) |
| 487 | return bmd; | 488 | return bmd; |
| 488 | 489 | ||
| @@ -491,8 +492,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count) | |||
| 491 | return NULL; | 492 | return NULL; |
| 492 | } | 493 | } |
| 493 | 494 | ||
| 494 | static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | 495 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
| 495 | int uncopy) | 496 | struct sg_iovec *iov, int iov_count, int uncopy) |
| 496 | { | 497 | { |
| 497 | int ret = 0, i; | 498 | int ret = 0, i; |
| 498 | struct bio_vec *bvec; | 499 | struct bio_vec *bvec; |
| @@ -502,7 +503,7 @@ static int __bio_copy_iov(struct bio *bio, struct sg_iovec *iov, int iov_count, | |||
| 502 | 503 | ||
| 503 | __bio_for_each_segment(bvec, bio, i, 0) { | 504 | __bio_for_each_segment(bvec, bio, i, 0) { |
| 504 | char *bv_addr = page_address(bvec->bv_page); | 505 | char *bv_addr = page_address(bvec->bv_page); |
| 505 | unsigned int bv_len = bvec->bv_len; | 506 | unsigned int bv_len = iovecs[i].bv_len; |
| 506 | 507 | ||
| 507 | while (bv_len && iov_idx < iov_count) { | 508 | while (bv_len && iov_idx < iov_count) { |
| 508 | unsigned int bytes; | 509 | unsigned int bytes; |
| @@ -554,7 +555,7 @@ int bio_uncopy_user(struct bio *bio) | |||
| 554 | struct bio_map_data *bmd = bio->bi_private; | 555 | struct bio_map_data *bmd = bio->bi_private; |
| 555 | int ret; | 556 | int ret; |
| 556 | 557 | ||
| 557 | ret = __bio_copy_iov(bio, bmd->sgvecs, bmd->nr_sgvecs, 1); | 558 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); |
| 558 | 559 | ||
| 559 | bio_free_map_data(bmd); | 560 | bio_free_map_data(bmd); |
| 560 | bio_put(bio); | 561 | bio_put(bio); |
| @@ -596,7 +597,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
| 596 | len += iov[i].iov_len; | 597 | len += iov[i].iov_len; |
| 597 | } | 598 | } |
| 598 | 599 | ||
| 599 | bmd = bio_alloc_map_data(nr_pages, iov_count); | 600 | bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); |
| 600 | if (!bmd) | 601 | if (!bmd) |
| 601 | return ERR_PTR(-ENOMEM); | 602 | return ERR_PTR(-ENOMEM); |
| 602 | 603 | ||
| @@ -633,7 +634,7 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
| 633 | * success | 634 | * success |
| 634 | */ | 635 | */ |
| 635 | if (!write_to_vm) { | 636 | if (!write_to_vm) { |
| 636 | ret = __bio_copy_iov(bio, iov, iov_count, 0); | 637 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); |
| 637 | if (ret) | 638 | if (ret) |
| 638 | goto cleanup; | 639 | goto cleanup; |
| 639 | } | 640 | } |
| @@ -942,19 +943,22 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
| 942 | { | 943 | { |
| 943 | struct bio_vec *bvec; | 944 | struct bio_vec *bvec; |
| 944 | const int read = bio_data_dir(bio) == READ; | 945 | const int read = bio_data_dir(bio) == READ; |
| 945 | char *p = bio->bi_private; | 946 | struct bio_map_data *bmd = bio->bi_private; |
| 946 | int i; | 947 | int i; |
| 948 | char *p = bmd->sgvecs[0].iov_base; | ||
| 947 | 949 | ||
| 948 | __bio_for_each_segment(bvec, bio, i, 0) { | 950 | __bio_for_each_segment(bvec, bio, i, 0) { |
| 949 | char *addr = page_address(bvec->bv_page); | 951 | char *addr = page_address(bvec->bv_page); |
| 952 | int len = bmd->iovecs[i].bv_len; | ||
| 950 | 953 | ||
| 951 | if (read && !err) | 954 | if (read && !err) |
| 952 | memcpy(p, addr, bvec->bv_len); | 955 | memcpy(p, addr, len); |
| 953 | 956 | ||
| 954 | __free_page(bvec->bv_page); | 957 | __free_page(bvec->bv_page); |
| 955 | p += bvec->bv_len; | 958 | p += len; |
| 956 | } | 959 | } |
| 957 | 960 | ||
| 961 | bio_free_map_data(bmd); | ||
| 958 | bio_put(bio); | 962 | bio_put(bio); |
| 959 | } | 963 | } |
| 960 | 964 | ||
| @@ -978,11 +982,21 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
| 978 | const int nr_pages = end - start; | 982 | const int nr_pages = end - start; |
| 979 | struct bio *bio; | 983 | struct bio *bio; |
| 980 | struct bio_vec *bvec; | 984 | struct bio_vec *bvec; |
| 985 | struct bio_map_data *bmd; | ||
| 981 | int i, ret; | 986 | int i, ret; |
| 987 | struct sg_iovec iov; | ||
| 988 | |||
| 989 | iov.iov_base = data; | ||
| 990 | iov.iov_len = len; | ||
| 991 | |||
| 992 | bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); | ||
| 993 | if (!bmd) | ||
| 994 | return ERR_PTR(-ENOMEM); | ||
| 982 | 995 | ||
| 996 | ret = -ENOMEM; | ||
| 983 | bio = bio_alloc(gfp_mask, nr_pages); | 997 | bio = bio_alloc(gfp_mask, nr_pages); |
| 984 | if (!bio) | 998 | if (!bio) |
| 985 | return ERR_PTR(-ENOMEM); | 999 | goto out_bmd; |
| 986 | 1000 | ||
| 987 | while (len) { | 1001 | while (len) { |
| 988 | struct page *page; | 1002 | struct page *page; |
| @@ -1016,14 +1030,18 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
| 1016 | } | 1030 | } |
| 1017 | } | 1031 | } |
| 1018 | 1032 | ||
| 1019 | bio->bi_private = data; | 1033 | bio->bi_private = bmd; |
| 1020 | bio->bi_end_io = bio_copy_kern_endio; | 1034 | bio->bi_end_io = bio_copy_kern_endio; |
| 1035 | |||
| 1036 | bio_set_map_data(bmd, bio, &iov, 1); | ||
| 1021 | return bio; | 1037 | return bio; |
| 1022 | cleanup: | 1038 | cleanup: |
| 1023 | bio_for_each_segment(bvec, bio, i) | 1039 | bio_for_each_segment(bvec, bio, i) |
| 1024 | __free_page(bvec->bv_page); | 1040 | __free_page(bvec->bv_page); |
| 1025 | 1041 | ||
| 1026 | bio_put(bio); | 1042 | bio_put(bio); |
| 1043 | out_bmd: | ||
| 1044 | bio_free_map_data(bmd); | ||
| 1027 | 1045 | ||
| 1028 | return ERR_PTR(ret); | 1046 | return ERR_PTR(ret); |
| 1029 | } | 1047 | } |
diff --git a/fs/buffer.c b/fs/buffer.c index 38653e36e225..ac78d4c19b3b 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -2926,14 +2926,17 @@ int submit_bh(int rw, struct buffer_head * bh) | |||
| 2926 | BUG_ON(!buffer_mapped(bh)); | 2926 | BUG_ON(!buffer_mapped(bh)); |
| 2927 | BUG_ON(!bh->b_end_io); | 2927 | BUG_ON(!bh->b_end_io); |
| 2928 | 2928 | ||
| 2929 | if (buffer_ordered(bh) && (rw == WRITE)) | 2929 | /* |
| 2930 | rw = WRITE_BARRIER; | 2930 | * Mask in barrier bit for a write (could be either a WRITE or a |
| 2931 | * WRITE_SYNC | ||
| 2932 | */ | ||
| 2933 | if (buffer_ordered(bh) && (rw & WRITE)) | ||
| 2934 | rw |= WRITE_BARRIER; | ||
| 2931 | 2935 | ||
| 2932 | /* | 2936 | /* |
| 2933 | * Only clear out a write error when rewriting, should this | 2937 | * Only clear out a write error when rewriting |
| 2934 | * include WRITE_SYNC as well? | ||
| 2935 | */ | 2938 | */ |
| 2936 | if (test_set_buffer_req(bh) && (rw == WRITE || rw == WRITE_BARRIER)) | 2939 | if (test_set_buffer_req(bh) && (rw & WRITE)) |
| 2937 | clear_buffer_write_io_error(bh); | 2940 | clear_buffer_write_io_error(bh); |
| 2938 | 2941 | ||
| 2939 | /* | 2942 | /* |
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES index f5d0083e09fa..06e521a945c3 100644 --- a/fs/cifs/CHANGES +++ b/fs/cifs/CHANGES | |||
| @@ -4,7 +4,15 @@ Fix premature write failure on congested networks (we would give up | |||
| 4 | on EAGAIN from the socket too quickly on large writes). | 4 | on EAGAIN from the socket too quickly on large writes). |
| 5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. | 5 | Cifs_mkdir and cifs_create now respect the setgid bit on parent dir. |
| 6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian | 6 | Fix endian problems in acl (mode from/to cifs acl) on bigendian |
| 7 | architectures. | 7 | architectures. Fix problems with preserving timestamps on copying open |
| 8 | files (e.g. "cp -a") to Windows servers. For mkdir and create honor setgid bit | ||
| 9 | on parent directory when server supports Unix Extensions but not POSIX | ||
| 10 | create. Update cifs.upcall version to handle new Kerberos sec flags | ||
| 11 | (this requires update of cifs.upcall program from Samba). Fix memory leak | ||
| 12 | on dns_upcall (resolving DFS referralls). Fix plain text password | ||
| 13 | authentication (requires setting SecurityFlags to 0x30030 to enable | ||
| 14 | lanman and plain text though). Fix writes to be at correct offset when | ||
| 15 | file is open with O_APPEND and file is on a directio (forcediretio) mount. | ||
| 8 | 16 | ||
| 9 | Version 1.53 | 17 | Version 1.53 |
| 10 | ------------ | 18 | ------------ |
diff --git a/fs/cifs/README b/fs/cifs/README index 2bd6fe556f88..bd2343d4c6a6 100644 --- a/fs/cifs/README +++ b/fs/cifs/README | |||
| @@ -542,10 +542,20 @@ SecurityFlags Flags which control security negotiation and | |||
| 542 | hashing mechanisms (as "must use") on the other hand | 542 | hashing mechanisms (as "must use") on the other hand |
| 543 | does not make much sense. Default flags are | 543 | does not make much sense. Default flags are |
| 544 | 0x07007 | 544 | 0x07007 |
| 545 | (NTLM, NTLMv2 and packet signing allowed). Maximum | 545 | (NTLM, NTLMv2 and packet signing allowed). The maximum |
| 546 | allowable flags if you want to allow mounts to servers | 546 | allowable flags if you want to allow mounts to servers |
| 547 | using weaker password hashes is 0x37037 (lanman, | 547 | using weaker password hashes is 0x37037 (lanman, |
| 548 | plaintext, ntlm, ntlmv2, signing allowed): | 548 | plaintext, ntlm, ntlmv2, signing allowed). Some |
| 549 | SecurityFlags require the corresponding menuconfig | ||
| 550 | options to be enabled (lanman and plaintext require | ||
| 551 | CONFIG_CIFS_WEAK_PW_HASH for example). Enabling | ||
| 552 | plaintext authentication currently requires also | ||
| 553 | enabling lanman authentication in the security flags | ||
| 554 | because the cifs module only supports sending | ||
| 555 | laintext passwords using the older lanman dialect | ||
| 556 | form of the session setup SMB. (e.g. for authentication | ||
| 557 | using plain text passwords, set the SecurityFlags | ||
| 558 | to 0x30030): | ||
| 549 | 559 | ||
| 550 | may use packet signing 0x00001 | 560 | may use packet signing 0x00001 |
| 551 | must use packet signing 0x01001 | 561 | must use packet signing 0x01001 |
| @@ -642,8 +652,30 @@ The statistics for the number of total SMBs and oplock breaks are different in | |||
| 642 | that they represent all for that share, not just those for which the server | 652 | that they represent all for that share, not just those for which the server |
| 643 | returned success. | 653 | returned success. |
| 644 | 654 | ||
| 645 | Also note that "cat /proc/fs/cifs/DebugData" will display information about | 655 | Also note that "cat /proc/fs/cifs/DebugData" will display information about |
| 646 | the active sessions and the shares that are mounted. | 656 | the active sessions and the shares that are mounted. |
| 647 | Enabling Kerberos (extended security) works when CONFIG_CIFS_EXPERIMENTAL is | 657 | |
| 648 | on but requires a user space helper (from the Samba project). NTLM and NTLMv2 and | 658 | Enabling Kerberos (extended security) works but requires version 1.2 or later |
| 649 | LANMAN support do not require this helper. | 659 | of the helper program cifs.upcall to be present and to be configured in the |
| 660 | /etc/request-key.conf file. The cifs.upcall helper program is from the Samba | ||
| 661 | project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not | ||
| 662 | require this helper. Note that NTLMv2 security (which does not require the | ||
| 663 | cifs.upcall helper program), instead of using Kerberos, is sufficient for | ||
| 664 | some use cases. | ||
| 665 | |||
| 666 | Enabling DFS support (used to access shares transparently in an MS-DFS | ||
| 667 | global name space) requires that CONFIG_CIFS_EXPERIMENTAL be enabled. In | ||
| 668 | addition, DFS support for target shares which are specified as UNC | ||
| 669 | names which begin with host names (rather than IP addresses) requires | ||
| 670 | a user space helper (such as cifs.upcall) to be present in order to | ||
| 671 | translate host names to ip address, and the user space helper must also | ||
| 672 | be configured in the file /etc/request-key.conf | ||
| 673 | |||
| 674 | To use cifs Kerberos and DFS support, the Linux keyutils package should be | ||
| 675 | installed and something like the following lines should be added to the | ||
| 676 | /etc/request-key.conf file: | ||
| 677 | |||
| 678 | create cifs.spnego * * /usr/local/sbin/cifs.upcall %k | ||
| 679 | create dns_resolver * * /usr/local/sbin/cifs.upcall %k | ||
| 680 | |||
| 681 | |||
diff --git a/fs/cifs/asn1.c b/fs/cifs/asn1.c index 5fabd2caf93c..1b09f1670061 100644 --- a/fs/cifs/asn1.c +++ b/fs/cifs/asn1.c | |||
| @@ -476,6 +476,7 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
| 476 | unsigned int cls, con, tag, oidlen, rc; | 476 | unsigned int cls, con, tag, oidlen, rc; |
| 477 | bool use_ntlmssp = false; | 477 | bool use_ntlmssp = false; |
| 478 | bool use_kerberos = false; | 478 | bool use_kerberos = false; |
| 479 | bool use_mskerberos = false; | ||
| 479 | 480 | ||
| 480 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ | 481 | *secType = NTLM; /* BB eventually make Kerberos or NLTMSSP the default*/ |
| 481 | 482 | ||
| @@ -574,10 +575,12 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
| 574 | *(oid + 1), *(oid + 2), *(oid + 3))); | 575 | *(oid + 1), *(oid + 2), *(oid + 3))); |
| 575 | 576 | ||
| 576 | if (compare_oid(oid, oidlen, MSKRB5_OID, | 577 | if (compare_oid(oid, oidlen, MSKRB5_OID, |
| 577 | MSKRB5_OID_LEN)) | 578 | MSKRB5_OID_LEN) && |
| 578 | use_kerberos = true; | 579 | !use_kerberos) |
| 580 | use_mskerberos = true; | ||
| 579 | else if (compare_oid(oid, oidlen, KRB5_OID, | 581 | else if (compare_oid(oid, oidlen, KRB5_OID, |
| 580 | KRB5_OID_LEN)) | 582 | KRB5_OID_LEN) && |
| 583 | !use_mskerberos) | ||
| 581 | use_kerberos = true; | 584 | use_kerberos = true; |
| 582 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, | 585 | else if (compare_oid(oid, oidlen, NTLMSSP_OID, |
| 583 | NTLMSSP_OID_LEN)) | 586 | NTLMSSP_OID_LEN)) |
| @@ -630,6 +633,8 @@ decode_negTokenInit(unsigned char *security_blob, int length, | |||
| 630 | 633 | ||
| 631 | if (use_kerberos) | 634 | if (use_kerberos) |
| 632 | *secType = Kerberos; | 635 | *secType = Kerberos; |
| 636 | else if (use_mskerberos) | ||
| 637 | *secType = MSKerberos; | ||
| 633 | else if (use_ntlmssp) | 638 | else if (use_ntlmssp) |
| 634 | *secType = NTLMSSP; | 639 | *secType = NTLMSSP; |
| 635 | 640 | ||
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 2434ab0e8791..117ef4bba68e 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
| @@ -114,9 +114,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
| 114 | 114 | ||
| 115 | dp = description + strlen(description); | 115 | dp = description + strlen(description); |
| 116 | 116 | ||
| 117 | /* for now, only sec=krb5 is valid */ | 117 | /* for now, only sec=krb5 and sec=mskrb5 are valid */ |
| 118 | if (server->secType == Kerberos) | 118 | if (server->secType == Kerberos) |
| 119 | sprintf(dp, ";sec=krb5"); | 119 | sprintf(dp, ";sec=krb5"); |
| 120 | else if (server->secType == MSKerberos) | ||
| 121 | sprintf(dp, ";sec=mskrb5"); | ||
| 120 | else | 122 | else |
| 121 | goto out; | 123 | goto out; |
| 122 | 124 | ||
diff --git a/fs/cifs/cifs_spnego.h b/fs/cifs/cifs_spnego.h index 05a34b17a1ab..e4041ec4d712 100644 --- a/fs/cifs/cifs_spnego.h +++ b/fs/cifs/cifs_spnego.h | |||
| @@ -23,7 +23,7 @@ | |||
| 23 | #ifndef _CIFS_SPNEGO_H | 23 | #ifndef _CIFS_SPNEGO_H |
| 24 | #define _CIFS_SPNEGO_H | 24 | #define _CIFS_SPNEGO_H |
| 25 | 25 | ||
| 26 | #define CIFS_SPNEGO_UPCALL_VERSION 1 | 26 | #define CIFS_SPNEGO_UPCALL_VERSION 2 |
| 27 | 27 | ||
| 28 | /* | 28 | /* |
| 29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. | 29 | * The version field should always be set to CIFS_SPNEGO_UPCALL_VERSION. |
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 83fd40dc1ef0..bd5f13d38450 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c | |||
| @@ -294,6 +294,7 @@ void calc_lanman_hash(struct cifsSesInfo *ses, char *lnm_session_key) | |||
| 294 | 294 | ||
| 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) | 295 | if ((ses->server->secMode & SECMODE_PW_ENCRYPT) == 0) |
| 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { | 296 | if (extended_security & CIFSSEC_MAY_PLNTXT) { |
| 297 | memset(lnm_session_key, 0, CIFS_SESS_KEY_SIZE); | ||
| 297 | memcpy(lnm_session_key, password_with_pad, | 298 | memcpy(lnm_session_key, password_with_pad, |
| 298 | CIFS_ENCPWD_SIZE); | 299 | CIFS_ENCPWD_SIZE); |
| 299 | return; | 300 | return; |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7e1cf262effe..8dfd6f24d488 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -80,7 +80,8 @@ enum securityEnum { | |||
| 80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ | 80 | NTLMv2, /* Legacy NTLM auth with NTLMv2 hash */ |
| 81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ | 81 | RawNTLMSSP, /* NTLMSSP without SPNEGO */ |
| 82 | NTLMSSP, /* NTLMSSP via SPNEGO */ | 82 | NTLMSSP, /* NTLMSSP via SPNEGO */ |
| 83 | Kerberos /* Kerberos via SPNEGO */ | 83 | Kerberos, /* Kerberos via SPNEGO */ |
| 84 | MSKerberos, /* MS Kerberos via SPNEGO */ | ||
| 84 | }; | 85 | }; |
| 85 | 86 | ||
| 86 | enum protocolEnum { | 87 | enum protocolEnum { |
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 0711db65afe8..4c13bcdb92a5 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c | |||
| @@ -3598,19 +3598,21 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
| 3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; | 3598 | char ntlm_session_key[CIFS_SESS_KEY_SIZE]; |
| 3599 | bool ntlmv2_flag = false; | 3599 | bool ntlmv2_flag = false; |
| 3600 | int first_time = 0; | 3600 | int first_time = 0; |
| 3601 | struct TCP_Server_Info *server = pSesInfo->server; | ||
| 3601 | 3602 | ||
| 3602 | /* what if server changes its buffer size after dropping the session? */ | 3603 | /* what if server changes its buffer size after dropping the session? */ |
| 3603 | if (pSesInfo->server->maxBuf == 0) /* no need to send on reconnect */ { | 3604 | if (server->maxBuf == 0) /* no need to send on reconnect */ { |
| 3604 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3605 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
| 3605 | if (rc == -EAGAIN) /* retry only once on 1st time connection */ { | 3606 | if (rc == -EAGAIN) { |
| 3607 | /* retry only once on 1st time connection */ | ||
| 3606 | rc = CIFSSMBNegotiate(xid, pSesInfo); | 3608 | rc = CIFSSMBNegotiate(xid, pSesInfo); |
| 3607 | if (rc == -EAGAIN) | 3609 | if (rc == -EAGAIN) |
| 3608 | rc = -EHOSTDOWN; | 3610 | rc = -EHOSTDOWN; |
| 3609 | } | 3611 | } |
| 3610 | if (rc == 0) { | 3612 | if (rc == 0) { |
| 3611 | spin_lock(&GlobalMid_Lock); | 3613 | spin_lock(&GlobalMid_Lock); |
| 3612 | if (pSesInfo->server->tcpStatus != CifsExiting) | 3614 | if (server->tcpStatus != CifsExiting) |
| 3613 | pSesInfo->server->tcpStatus = CifsGood; | 3615 | server->tcpStatus = CifsGood; |
| 3614 | else | 3616 | else |
| 3615 | rc = -EHOSTDOWN; | 3617 | rc = -EHOSTDOWN; |
| 3616 | spin_unlock(&GlobalMid_Lock); | 3618 | spin_unlock(&GlobalMid_Lock); |
| @@ -3623,23 +3625,22 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
| 3623 | goto ss_err_exit; | 3625 | goto ss_err_exit; |
| 3624 | 3626 | ||
| 3625 | pSesInfo->flags = 0; | 3627 | pSesInfo->flags = 0; |
| 3626 | pSesInfo->capabilities = pSesInfo->server->capabilities; | 3628 | pSesInfo->capabilities = server->capabilities; |
| 3627 | if (linuxExtEnabled == 0) | 3629 | if (linuxExtEnabled == 0) |
| 3628 | pSesInfo->capabilities &= (~CAP_UNIX); | 3630 | pSesInfo->capabilities &= (~CAP_UNIX); |
| 3629 | /* pSesInfo->sequence_number = 0;*/ | 3631 | /* pSesInfo->sequence_number = 0;*/ |
| 3630 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", | 3632 | cFYI(1, ("Security Mode: 0x%x Capabilities: 0x%x TimeAdjust: %d", |
| 3631 | pSesInfo->server->secMode, | 3633 | server->secMode, server->capabilities, server->timeAdj)); |
| 3632 | pSesInfo->server->capabilities, | 3634 | |
| 3633 | pSesInfo->server->timeAdj)); | ||
| 3634 | if (experimEnabled < 2) | 3635 | if (experimEnabled < 2) |
| 3635 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); | 3636 | rc = CIFS_SessSetup(xid, pSesInfo, first_time, nls_info); |
| 3636 | else if (extended_security | 3637 | else if (extended_security |
| 3637 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3638 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
| 3638 | && (pSesInfo->server->secType == NTLMSSP)) { | 3639 | && (server->secType == NTLMSSP)) { |
| 3639 | rc = -EOPNOTSUPP; | 3640 | rc = -EOPNOTSUPP; |
| 3640 | } else if (extended_security | 3641 | } else if (extended_security |
| 3641 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) | 3642 | && (pSesInfo->capabilities & CAP_EXTENDED_SECURITY) |
| 3642 | && (pSesInfo->server->secType == RawNTLMSSP)) { | 3643 | && (server->secType == RawNTLMSSP)) { |
| 3643 | cFYI(1, ("NTLMSSP sesssetup")); | 3644 | cFYI(1, ("NTLMSSP sesssetup")); |
| 3644 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, | 3645 | rc = CIFSNTLMSSPNegotiateSessSetup(xid, pSesInfo, &ntlmv2_flag, |
| 3645 | nls_info); | 3646 | nls_info); |
| @@ -3668,12 +3669,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
| 3668 | 3669 | ||
| 3669 | } else { | 3670 | } else { |
| 3670 | SMBNTencrypt(pSesInfo->password, | 3671 | SMBNTencrypt(pSesInfo->password, |
| 3671 | pSesInfo->server->cryptKey, | 3672 | server->cryptKey, |
| 3672 | ntlm_session_key); | 3673 | ntlm_session_key); |
| 3673 | 3674 | ||
| 3674 | if (first_time) | 3675 | if (first_time) |
| 3675 | cifs_calculate_mac_key( | 3676 | cifs_calculate_mac_key( |
| 3676 | &pSesInfo->server->mac_signing_key, | 3677 | &server->mac_signing_key, |
| 3677 | ntlm_session_key, | 3678 | ntlm_session_key, |
| 3678 | pSesInfo->password); | 3679 | pSesInfo->password); |
| 3679 | } | 3680 | } |
| @@ -3686,13 +3687,13 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *pSesInfo, | |||
| 3686 | nls_info); | 3687 | nls_info); |
| 3687 | } | 3688 | } |
| 3688 | } else { /* old style NTLM 0.12 session setup */ | 3689 | } else { /* old style NTLM 0.12 session setup */ |
| 3689 | SMBNTencrypt(pSesInfo->password, pSesInfo->server->cryptKey, | 3690 | SMBNTencrypt(pSesInfo->password, server->cryptKey, |
| 3690 | ntlm_session_key); | 3691 | ntlm_session_key); |
| 3691 | 3692 | ||
| 3692 | if (first_time) | 3693 | if (first_time) |
| 3693 | cifs_calculate_mac_key( | 3694 | cifs_calculate_mac_key(&server->mac_signing_key, |
| 3694 | &pSesInfo->server->mac_signing_key, | 3695 | ntlm_session_key, |
| 3695 | ntlm_session_key, pSesInfo->password); | 3696 | pSesInfo->password); |
| 3696 | 3697 | ||
| 3697 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); | 3698 | rc = CIFSSessSetup(xid, pSesInfo, ntlm_session_key, nls_info); |
| 3698 | } | 3699 | } |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index f730ef35499e..a2e0673e1b08 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
| @@ -47,11 +47,18 @@ static int dns_resolver_instantiate(struct key *key, const void *data, | |||
| 47 | return rc; | 47 | return rc; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | static void | ||
| 51 | dns_resolver_destroy(struct key *key) | ||
| 52 | { | ||
| 53 | kfree(key->payload.data); | ||
| 54 | } | ||
| 55 | |||
| 50 | struct key_type key_type_dns_resolver = { | 56 | struct key_type key_type_dns_resolver = { |
| 51 | .name = "dns_resolver", | 57 | .name = "dns_resolver", |
| 52 | .def_datalen = sizeof(struct in_addr), | 58 | .def_datalen = sizeof(struct in_addr), |
| 53 | .describe = user_describe, | 59 | .describe = user_describe, |
| 54 | .instantiate = dns_resolver_instantiate, | 60 | .instantiate = dns_resolver_instantiate, |
| 61 | .destroy = dns_resolver_destroy, | ||
| 55 | .match = user_match, | 62 | .match = user_match, |
| 56 | }; | 63 | }; |
| 57 | 64 | ||
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index ff14d14903a0..cbefe1f1f9fe 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -833,6 +833,10 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
| 833 | return -EBADF; | 833 | return -EBADF; |
| 834 | open_file = (struct cifsFileInfo *) file->private_data; | 834 | open_file = (struct cifsFileInfo *) file->private_data; |
| 835 | 835 | ||
| 836 | rc = generic_write_checks(file, poffset, &write_size, 0); | ||
| 837 | if (rc) | ||
| 838 | return rc; | ||
| 839 | |||
| 836 | xid = GetXid(); | 840 | xid = GetXid(); |
| 837 | 841 | ||
| 838 | if (*poffset > file->f_path.dentry->d_inode->i_size) | 842 | if (*poffset > file->f_path.dentry->d_inode->i_size) |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 848286861c31..9c548f110102 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -546,7 +546,8 @@ int cifs_get_inode_info(struct inode **pinode, | |||
| 546 | if ((inode->i_mode & S_IWUGO) == 0 && | 546 | if ((inode->i_mode & S_IWUGO) == 0 && |
| 547 | (attr & ATTR_READONLY) == 0) | 547 | (attr & ATTR_READONLY) == 0) |
| 548 | inode->i_mode |= (S_IWUGO & default_mode); | 548 | inode->i_mode |= (S_IWUGO & default_mode); |
| 549 | inode->i_mode &= ~S_IFMT; | 549 | |
| 550 | inode->i_mode &= ~S_IFMT; | ||
| 550 | } | 551 | } |
| 551 | /* clear write bits if ATTR_READONLY is set */ | 552 | /* clear write bits if ATTR_READONLY is set */ |
| 552 | if (attr & ATTR_READONLY) | 553 | if (attr & ATTR_READONLY) |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index ed150efbe27c..252fdc0567f1 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
| @@ -409,6 +409,8 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
| 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH | 409 | #ifdef CONFIG_CIFS_WEAK_PW_HASH |
| 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; | 410 | char lnm_session_key[CIFS_SESS_KEY_SIZE]; |
| 411 | 411 | ||
| 412 | pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE; | ||
| 413 | |||
| 412 | /* no capabilities flags in old lanman negotiation */ | 414 | /* no capabilities flags in old lanman negotiation */ |
| 413 | 415 | ||
| 414 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); | 416 | pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE); |
| @@ -505,7 +507,7 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
| 505 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); | 507 | unicode_ssetup_strings(&bcc_ptr, ses, nls_cp); |
| 506 | } else | 508 | } else |
| 507 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); | 509 | ascii_ssetup_strings(&bcc_ptr, ses, nls_cp); |
| 508 | } else if (type == Kerberos) { | 510 | } else if (type == Kerberos || type == MSKerberos) { |
| 509 | #ifdef CONFIG_CIFS_UPCALL | 511 | #ifdef CONFIG_CIFS_UPCALL |
| 510 | struct cifs_spnego_msg *msg; | 512 | struct cifs_spnego_msg *msg; |
| 511 | spnego_key = cifs_get_spnego_key(ses); | 513 | spnego_key = cifs_get_spnego_key(ses); |
| @@ -516,6 +518,15 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
| 516 | } | 518 | } |
| 517 | 519 | ||
| 518 | msg = spnego_key->payload.data; | 520 | msg = spnego_key->payload.data; |
| 521 | /* check version field to make sure that cifs.upcall is | ||
| 522 | sending us a response in an expected form */ | ||
| 523 | if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) { | ||
| 524 | cERROR(1, ("incorrect version of cifs.upcall (expected" | ||
| 525 | " %d but got %d)", | ||
| 526 | CIFS_SPNEGO_UPCALL_VERSION, msg->version)); | ||
| 527 | rc = -EKEYREJECTED; | ||
| 528 | goto ssetup_exit; | ||
| 529 | } | ||
| 519 | /* bail out if key is too long */ | 530 | /* bail out if key is too long */ |
| 520 | if (msg->sesskey_len > | 531 | if (msg->sesskey_len > |
| 521 | sizeof(ses->server->mac_signing_key.data.krb5)) { | 532 | sizeof(ses->server->mac_signing_key.data.krb5)) { |
diff --git a/fs/compat.c b/fs/compat.c index c9d1472e65c5..075d0509970d 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
| @@ -792,8 +792,10 @@ static int compat_fillonedir(void *__buf, const char *name, int namlen, | |||
| 792 | if (buf->result) | 792 | if (buf->result) |
| 793 | return -EINVAL; | 793 | return -EINVAL; |
| 794 | d_ino = ino; | 794 | d_ino = ino; |
| 795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 795 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
| 796 | buf->result = -EOVERFLOW; | ||
| 796 | return -EOVERFLOW; | 797 | return -EOVERFLOW; |
| 798 | } | ||
| 797 | buf->result++; | 799 | buf->result++; |
| 798 | dirent = buf->dirent; | 800 | dirent = buf->dirent; |
| 799 | if (!access_ok(VERIFY_WRITE, dirent, | 801 | if (!access_ok(VERIFY_WRITE, dirent, |
| @@ -862,8 +864,10 @@ static int compat_filldir(void *__buf, const char *name, int namlen, | |||
| 862 | if (reclen > buf->count) | 864 | if (reclen > buf->count) |
| 863 | return -EINVAL; | 865 | return -EINVAL; |
| 864 | d_ino = ino; | 866 | d_ino = ino; |
| 865 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 867 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
| 868 | buf->error = -EOVERFLOW; | ||
| 866 | return -EOVERFLOW; | 869 | return -EOVERFLOW; |
| 870 | } | ||
| 867 | dirent = buf->previous; | 871 | dirent = buf->previous; |
| 868 | if (dirent) { | 872 | if (dirent) { |
| 869 | if (__put_user(offset, &dirent->d_off)) | 873 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7a8db78a91d2..8e93341f3e82 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -1311,16 +1311,18 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 1311 | * Ensure that no racing symlink() will make detach_prep() fail while | 1311 | * Ensure that no racing symlink() will make detach_prep() fail while |
| 1312 | * the new link is temporarily attached | 1312 | * the new link is temporarily attached |
| 1313 | */ | 1313 | */ |
| 1314 | mutex_lock(&configfs_symlink_mutex); | ||
| 1315 | spin_lock(&configfs_dirent_lock); | ||
| 1316 | do { | 1314 | do { |
| 1317 | struct mutex *wait_mutex; | 1315 | struct mutex *wait_mutex; |
| 1318 | 1316 | ||
| 1317 | mutex_lock(&configfs_symlink_mutex); | ||
| 1318 | spin_lock(&configfs_dirent_lock); | ||
| 1319 | ret = configfs_detach_prep(dentry, &wait_mutex); | 1319 | ret = configfs_detach_prep(dentry, &wait_mutex); |
| 1320 | if (ret) { | 1320 | if (ret) |
| 1321 | configfs_detach_rollback(dentry); | 1321 | configfs_detach_rollback(dentry); |
| 1322 | spin_unlock(&configfs_dirent_lock); | 1322 | spin_unlock(&configfs_dirent_lock); |
| 1323 | mutex_unlock(&configfs_symlink_mutex); | 1323 | mutex_unlock(&configfs_symlink_mutex); |
| 1324 | |||
| 1325 | if (ret) { | ||
| 1324 | if (ret != -EAGAIN) { | 1326 | if (ret != -EAGAIN) { |
| 1325 | config_item_put(parent_item); | 1327 | config_item_put(parent_item); |
| 1326 | return ret; | 1328 | return ret; |
| @@ -1329,13 +1331,8 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 1329 | /* Wait until the racing operation terminates */ | 1331 | /* Wait until the racing operation terminates */ |
| 1330 | mutex_lock(wait_mutex); | 1332 | mutex_lock(wait_mutex); |
| 1331 | mutex_unlock(wait_mutex); | 1333 | mutex_unlock(wait_mutex); |
| 1332 | |||
| 1333 | mutex_lock(&configfs_symlink_mutex); | ||
| 1334 | spin_lock(&configfs_dirent_lock); | ||
| 1335 | } | 1334 | } |
| 1336 | } while (ret == -EAGAIN); | 1335 | } while (ret == -EAGAIN); |
| 1337 | spin_unlock(&configfs_dirent_lock); | ||
| 1338 | mutex_unlock(&configfs_symlink_mutex); | ||
| 1339 | 1336 | ||
| 1340 | /* Get a working ref for the duration of this function */ | 1337 | /* Get a working ref for the duration of this function */ |
| 1341 | item = configfs_get_config_item(dentry); | 1338 | item = configfs_get_config_item(dentry); |
diff --git a/fs/dcache.c b/fs/dcache.c index 101663d15e9f..e7a1a99b7464 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1236,7 +1236,7 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
| 1236 | * If no entry exists with the exact case name, allocate new dentry with | 1236 | * If no entry exists with the exact case name, allocate new dentry with |
| 1237 | * the exact case, and return the spliced entry. | 1237 | * the exact case, and return the spliced entry. |
| 1238 | */ | 1238 | */ |
| 1239 | struct dentry *d_add_ci(struct inode *inode, struct dentry *dentry, | 1239 | struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, |
| 1240 | struct qstr *name) | 1240 | struct qstr *name) |
| 1241 | { | 1241 | { |
| 1242 | int error; | 1242 | int error; |
| @@ -1395,6 +1395,10 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
| 1395 | if (dentry->d_parent != parent) | 1395 | if (dentry->d_parent != parent) |
| 1396 | goto next; | 1396 | goto next; |
| 1397 | 1397 | ||
| 1398 | /* non-existing due to RCU? */ | ||
| 1399 | if (d_unhashed(dentry)) | ||
| 1400 | goto next; | ||
| 1401 | |||
| 1398 | /* | 1402 | /* |
| 1399 | * It is safe to compare names since d_move() cannot | 1403 | * It is safe to compare names since d_move() cannot |
| 1400 | * change the qstr (protected by d_lock). | 1404 | * change the qstr (protected by d_lock). |
| @@ -1410,10 +1414,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name) | |||
| 1410 | goto next; | 1414 | goto next; |
| 1411 | } | 1415 | } |
| 1412 | 1416 | ||
| 1413 | if (!d_unhashed(dentry)) { | 1417 | atomic_inc(&dentry->d_count); |
| 1414 | atomic_inc(&dentry->d_count); | 1418 | found = dentry; |
| 1415 | found = dentry; | ||
| 1416 | } | ||
| 1417 | spin_unlock(&dentry->d_lock); | 1419 | spin_unlock(&dentry->d_lock); |
| 1418 | break; | 1420 | break; |
| 1419 | next: | 1421 | next: |
diff --git a/fs/efs/namei.c b/fs/efs/namei.c index 3a404e7fad53..291abb11e20e 100644 --- a/fs/efs/namei.c +++ b/fs/efs/namei.c | |||
| @@ -74,8 +74,7 @@ struct dentry *efs_lookup(struct inode *dir, struct dentry *dentry, struct namei | |||
| 74 | } | 74 | } |
| 75 | unlock_kernel(); | 75 | unlock_kernel(); |
| 76 | 76 | ||
| 77 | d_add(dentry, inode); | 77 | return d_splice_alias(inode, dentry); |
| 78 | return NULL; | ||
| 79 | } | 78 | } |
| 80 | 79 | ||
| 81 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, | 80 | static struct inode *efs_nfs_get_inode(struct super_block *sb, u64 ino, |
| @@ -752,11 +752,11 @@ static int exec_mmap(struct mm_struct *mm) | |||
| 752 | tsk->active_mm = mm; | 752 | tsk->active_mm = mm; |
| 753 | activate_mm(active_mm, mm); | 753 | activate_mm(active_mm, mm); |
| 754 | task_unlock(tsk); | 754 | task_unlock(tsk); |
| 755 | mm_update_next_owner(old_mm); | ||
| 756 | arch_pick_mmap_layout(mm); | 755 | arch_pick_mmap_layout(mm); |
| 757 | if (old_mm) { | 756 | if (old_mm) { |
| 758 | up_read(&old_mm->mmap_sem); | 757 | up_read(&old_mm->mmap_sem); |
| 759 | BUG_ON(active_mm != old_mm); | 758 | BUG_ON(active_mm != old_mm); |
| 759 | mm_update_next_owner(old_mm); | ||
| 760 | mmput(old_mm); | 760 | mmput(old_mm); |
| 761 | return 0; | 761 | return 0; |
| 762 | } | 762 | } |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index 1ae5004e93fc..e9fa960ba6da 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -1626,6 +1626,9 @@ ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | |||
| 1626 | free_blocks = | 1626 | free_blocks = |
| 1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); |
| 1628 | #endif | 1628 | #endif |
| 1629 | if (free_blocks <= root_blocks) | ||
| 1630 | /* we don't have free space */ | ||
| 1631 | return 0; | ||
| 1629 | if (free_blocks - root_blocks < nblocks) | 1632 | if (free_blocks - root_blocks < nblocks) |
| 1630 | return free_blocks - root_blocks; | 1633 | return free_blocks - root_blocks; |
| 1631 | return nblocks; | 1634 | return nblocks; |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index d3d23d73c08b..ec8e33b45219 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -411,7 +411,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
| 411 | get_dtype(sb, fname->file_type)); | 411 | get_dtype(sb, fname->file_type)); |
| 412 | if (error) { | 412 | if (error) { |
| 413 | filp->f_pos = curr_pos; | 413 | filp->f_pos = curr_pos; |
| 414 | info->extra_fname = fname->next; | 414 | info->extra_fname = fname; |
| 415 | return error; | 415 | return error; |
| 416 | } | 416 | } |
| 417 | fname = fname->next; | 417 | fname = fname->next; |
| @@ -450,11 +450,21 @@ static int ext4_dx_readdir(struct file * filp, | |||
| 450 | * If there are any leftover names on the hash collision | 450 | * If there are any leftover names on the hash collision |
| 451 | * chain, return them first. | 451 | * chain, return them first. |
| 452 | */ | 452 | */ |
| 453 | if (info->extra_fname && | 453 | if (info->extra_fname) { |
| 454 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 454 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
| 455 | goto finished; | 455 | goto finished; |
| 456 | 456 | ||
| 457 | if (!info->curr_node) | 457 | info->extra_fname = NULL; |
| 458 | info->curr_node = rb_next(info->curr_node); | ||
| 459 | if (!info->curr_node) { | ||
| 460 | if (info->next_hash == ~0) { | ||
| 461 | filp->f_pos = EXT4_HTREE_EOF; | ||
| 462 | goto finished; | ||
| 463 | } | ||
| 464 | info->curr_hash = info->next_hash; | ||
| 465 | info->curr_minor_hash = 0; | ||
| 466 | } | ||
| 467 | } else if (!info->curr_node) | ||
| 458 | info->curr_node = rb_first(&info->root); | 468 | info->curr_node = rb_first(&info->root); |
| 459 | 469 | ||
| 460 | while (1) { | 470 | while (1) { |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 6c7924d9e358..295003241d3d 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -1072,6 +1072,8 @@ extern void ext4_set_inode_flags(struct inode *); | |||
| 1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| 1073 | extern void ext4_set_aops(struct inode *inode); | 1073 | extern void ext4_set_aops(struct inode *inode); |
| 1074 | extern int ext4_writepage_trans_blocks(struct inode *); | 1074 | extern int ext4_writepage_trans_blocks(struct inode *); |
| 1075 | extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks); | ||
| 1076 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | ||
| 1075 | extern int ext4_block_truncate_page(handle_t *handle, | 1077 | extern int ext4_block_truncate_page(handle_t *handle, |
| 1076 | struct address_space *mapping, loff_t from); | 1078 | struct address_space *mapping, loff_t from); |
| 1077 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 1079 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); |
| @@ -1227,6 +1229,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations; | |||
| 1227 | /* extents.c */ | 1229 | /* extents.c */ |
| 1228 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 1230 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
| 1229 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 1231 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
| 1232 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | ||
| 1233 | int chunk); | ||
| 1230 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | 1234 | extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, |
| 1231 | ext4_lblk_t iblock, | 1235 | ext4_lblk_t iblock, |
| 1232 | unsigned long max_blocks, struct buffer_head *bh_result, | 1236 | unsigned long max_blocks, struct buffer_head *bh_result, |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index 6c166c0a54b7..d33dc56d6986 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -216,7 +216,9 @@ extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks); | |||
| 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); | 216 | extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *); |
| 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); | 217 | extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t); |
| 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); | 218 | extern int ext4_extent_tree_init(handle_t *, struct inode *); |
| 219 | extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *); | 219 | extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, |
| 220 | int num, | ||
| 221 | struct ext4_ext_path *path); | ||
| 220 | extern int ext4_ext_try_to_merge(struct inode *inode, | 222 | extern int ext4_ext_try_to_merge(struct inode *inode, |
| 221 | struct ext4_ext_path *path, | 223 | struct ext4_ext_path *path, |
| 222 | struct ext4_extent *); | 224 | struct ext4_extent *); |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index eb8bc3afe6e9..b455c685a98b 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -51,6 +51,14 @@ | |||
| 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ | 51 | EXT4_XATTR_TRANS_BLOCKS - 2 + \ |
| 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | 52 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) |
| 53 | 53 | ||
| 54 | /* | ||
| 55 | * Define the number of metadata blocks we need to account to modify data. | ||
| 56 | * | ||
| 57 | * This include super block, inode block, quota blocks and xattr blocks | ||
| 58 | */ | ||
| 59 | #define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ | ||
| 60 | 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) | ||
| 61 | |||
| 54 | /* Delete operations potentially hit one directory's namespace plus an | 62 | /* Delete operations potentially hit one directory's namespace plus an |
| 55 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be | 63 | * entire inode, plus arbitrary amounts of bitmap/indirection data. Be |
| 56 | * generous. We can grow the delete transaction later if necessary. */ | 64 | * generous. We can grow the delete transaction later if necessary. */ |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 612c3d2c3824..b24d3c53f20c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -1747,54 +1747,61 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
| 1747 | } | 1747 | } |
| 1748 | 1748 | ||
| 1749 | /* | 1749 | /* |
| 1750 | * ext4_ext_calc_credits_for_insert: | 1750 | * ext4_ext_calc_credits_for_single_extent: |
| 1751 | * This routine returns max. credits that the extent tree can consume. | 1751 | * This routine returns max. credits that needed to insert an extent |
| 1752 | * It should be OK for low-performance paths like ->writepage() | 1752 | * to the extent tree. |
| 1753 | * To allow many writing processes to fit into a single transaction, | 1753 | * When pass the actual path, the caller should calculate credits |
| 1754 | * the caller should calculate credits under i_data_sem and | 1754 | * under i_data_sem. |
| 1755 | * pass the actual path. | ||
| 1756 | */ | 1755 | */ |
| 1757 | int ext4_ext_calc_credits_for_insert(struct inode *inode, | 1756 | int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, |
| 1758 | struct ext4_ext_path *path) | 1757 | struct ext4_ext_path *path) |
| 1759 | { | 1758 | { |
| 1760 | int depth, needed; | ||
| 1761 | |||
| 1762 | if (path) { | 1759 | if (path) { |
| 1760 | int depth = ext_depth(inode); | ||
| 1761 | int ret = 0; | ||
| 1762 | |||
| 1763 | /* probably there is space in leaf? */ | 1763 | /* probably there is space in leaf? */ |
| 1764 | depth = ext_depth(inode); | ||
| 1765 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) | 1764 | if (le16_to_cpu(path[depth].p_hdr->eh_entries) |
| 1766 | < le16_to_cpu(path[depth].p_hdr->eh_max)) | 1765 | < le16_to_cpu(path[depth].p_hdr->eh_max)) { |
| 1767 | return 1; | ||
| 1768 | } | ||
| 1769 | 1766 | ||
| 1770 | /* | 1767 | /* |
| 1771 | * given 32-bit logical block (4294967296 blocks), max. tree | 1768 | * There are some space in the leaf tree, no |
| 1772 | * can be 4 levels in depth -- 4 * 340^4 == 53453440000. | 1769 | * need to account for leaf block credit |
| 1773 | * Let's also add one more level for imbalance. | 1770 | * |
| 1774 | */ | 1771 | * bitmaps and block group descriptor blocks |
| 1775 | depth = 5; | 1772 | * and other metadat blocks still need to be |
| 1776 | 1773 | * accounted. | |
| 1777 | /* allocation of new data block(s) */ | 1774 | */ |
| 1778 | needed = 2; | 1775 | /* 1 bitmap, 1 block group descriptor */ |
| 1776 | ret = 2 + EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
| 1777 | } | ||
| 1778 | } | ||
| 1779 | 1779 | ||
| 1780 | /* | 1780 | return ext4_chunk_trans_blocks(inode, nrblocks); |
| 1781 | * tree can be full, so it would need to grow in depth: | 1781 | } |
| 1782 | * we need one credit to modify old root, credits for | ||
| 1783 | * new root will be added in split accounting | ||
| 1784 | */ | ||
| 1785 | needed += 1; | ||
| 1786 | 1782 | ||
| 1787 | /* | 1783 | /* |
| 1788 | * Index split can happen, we would need: | 1784 | * How many index/leaf blocks need to change/allocate to modify nrblocks? |
| 1789 | * allocate intermediate indexes (bitmap + group) | 1785 | * |
| 1790 | * + change two blocks at each level, but root (already included) | 1786 | * if nrblocks are fit in a single extent (chunk flag is 1), then |
| 1791 | */ | 1787 | * in the worse case, each tree level index/leaf need to be changed |
| 1792 | needed += (depth * 2) + (depth * 2); | 1788 | * if the tree split due to insert a new extent, then the old tree |
| 1789 | * index/leaf need to be updated too | ||
| 1790 | * | ||
| 1791 | * If the nrblocks are discontiguous, they could cause | ||
| 1792 | * the whole tree split more than once, but this is really rare. | ||
| 1793 | */ | ||
| 1794 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
| 1795 | { | ||
| 1796 | int index; | ||
| 1797 | int depth = ext_depth(inode); | ||
| 1793 | 1798 | ||
| 1794 | /* any allocation modifies superblock */ | 1799 | if (chunk) |
| 1795 | needed += 1; | 1800 | index = depth * 2; |
| 1801 | else | ||
| 1802 | index = depth * 3; | ||
| 1796 | 1803 | ||
| 1797 | return needed; | 1804 | return index; |
| 1798 | } | 1805 | } |
| 1799 | 1806 | ||
| 1800 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 1807 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
| @@ -1921,9 +1928,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 1921 | correct_index = 1; | 1928 | correct_index = 1; |
| 1922 | credits += (ext_depth(inode)) + 1; | 1929 | credits += (ext_depth(inode)) + 1; |
| 1923 | } | 1930 | } |
| 1924 | #ifdef CONFIG_QUOTA | ||
| 1925 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | 1931 | credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); |
| 1926 | #endif | ||
| 1927 | 1932 | ||
| 1928 | err = ext4_ext_journal_restart(handle, credits); | 1933 | err = ext4_ext_journal_restart(handle, credits); |
| 1929 | if (err) | 1934 | if (err) |
| @@ -2805,7 +2810,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
| 2805 | /* | 2810 | /* |
| 2806 | * probably first extent we're gonna free will be last in block | 2811 | * probably first extent we're gonna free will be last in block |
| 2807 | */ | 2812 | */ |
| 2808 | err = ext4_writepage_trans_blocks(inode) + 3; | 2813 | err = ext4_writepage_trans_blocks(inode); |
| 2809 | handle = ext4_journal_start(inode, err); | 2814 | handle = ext4_journal_start(inode, err); |
| 2810 | if (IS_ERR(handle)) | 2815 | if (IS_ERR(handle)) |
| 2811 | return; | 2816 | return; |
| @@ -2819,7 +2824,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
| 2819 | down_write(&EXT4_I(inode)->i_data_sem); | 2824 | down_write(&EXT4_I(inode)->i_data_sem); |
| 2820 | ext4_ext_invalidate_cache(inode); | 2825 | ext4_ext_invalidate_cache(inode); |
| 2821 | 2826 | ||
| 2822 | ext4_mb_discard_inode_preallocations(inode); | 2827 | ext4_discard_reservation(inode); |
| 2823 | 2828 | ||
| 2824 | /* | 2829 | /* |
| 2825 | * TODO: optimization is possible here. | 2830 | * TODO: optimization is possible here. |
| @@ -2858,27 +2863,6 @@ out_stop: | |||
| 2858 | ext4_journal_stop(handle); | 2863 | ext4_journal_stop(handle); |
| 2859 | } | 2864 | } |
| 2860 | 2865 | ||
| 2861 | /* | ||
| 2862 | * ext4_ext_writepage_trans_blocks: | ||
| 2863 | * calculate max number of blocks we could modify | ||
| 2864 | * in order to allocate new block for an inode | ||
| 2865 | */ | ||
| 2866 | int ext4_ext_writepage_trans_blocks(struct inode *inode, int num) | ||
| 2867 | { | ||
| 2868 | int needed; | ||
| 2869 | |||
| 2870 | needed = ext4_ext_calc_credits_for_insert(inode, NULL); | ||
| 2871 | |||
| 2872 | /* caller wants to allocate num blocks, but note it includes sb */ | ||
| 2873 | needed = needed * num - (num - 1); | ||
| 2874 | |||
| 2875 | #ifdef CONFIG_QUOTA | ||
| 2876 | needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
| 2877 | #endif | ||
| 2878 | |||
| 2879 | return needed; | ||
| 2880 | } | ||
| 2881 | |||
| 2882 | static void ext4_falloc_update_inode(struct inode *inode, | 2866 | static void ext4_falloc_update_inode(struct inode *inode, |
| 2883 | int mode, loff_t new_size, int update_ctime) | 2867 | int mode, loff_t new_size, int update_ctime) |
| 2884 | { | 2868 | { |
| @@ -2939,10 +2923,9 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len) | |||
| 2939 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 2923 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
| 2940 | - block; | 2924 | - block; |
| 2941 | /* | 2925 | /* |
| 2942 | * credits to insert 1 extent into extent tree + buffers to be able to | 2926 | * credits to insert 1 extent into extent tree |
| 2943 | * modify 1 super block, 1 block bitmap and 1 group descriptor. | ||
| 2944 | */ | 2927 | */ |
| 2945 | credits = EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 3; | 2928 | credits = ext4_chunk_trans_blocks(inode, max_blocks); |
| 2946 | mutex_lock(&inode->i_mutex); | 2929 | mutex_lock(&inode->i_mutex); |
| 2947 | retry: | 2930 | retry: |
| 2948 | while (ret >= 0 && ret < max_blocks) { | 2931 | while (ret >= 0 && ret < max_blocks) { |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 655e760212b8..f344834bbf58 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -351,7 +351,7 @@ find_close_to_parent: | |||
| 351 | goto found_flexbg; | 351 | goto found_flexbg; |
| 352 | } | 352 | } |
| 353 | 353 | ||
| 354 | if (best_flex < 0 || | 354 | if (flex_group[best_flex].free_inodes == 0 || |
| 355 | (flex_group[i].free_blocks > | 355 | (flex_group[i].free_blocks > |
| 356 | flex_group[best_flex].free_blocks && | 356 | flex_group[best_flex].free_blocks && |
| 357 | flex_group[i].free_inodes)) | 357 | flex_group[i].free_inodes)) |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 59fbbe899acc..7e91913e325b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -41,6 +41,8 @@ | |||
| 41 | #include "acl.h" | 41 | #include "acl.h" |
| 42 | #include "ext4_extents.h" | 42 | #include "ext4_extents.h" |
| 43 | 43 | ||
| 44 | #define MPAGE_DA_EXTENT_TAIL 0x01 | ||
| 45 | |||
| 44 | static inline int ext4_begin_ordered_truncate(struct inode *inode, | 46 | static inline int ext4_begin_ordered_truncate(struct inode *inode, |
| 45 | loff_t new_size) | 47 | loff_t new_size) |
| 46 | { | 48 | { |
| @@ -1005,6 +1007,9 @@ static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks) | |||
| 1005 | */ | 1007 | */ |
| 1006 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) | 1008 | static int ext4_calc_metadata_amount(struct inode *inode, int blocks) |
| 1007 | { | 1009 | { |
| 1010 | if (!blocks) | ||
| 1011 | return 0; | ||
| 1012 | |||
| 1008 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 1013 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) |
| 1009 | return ext4_ext_calc_metadata_amount(inode, blocks); | 1014 | return ext4_ext_calc_metadata_amount(inode, blocks); |
| 1010 | 1015 | ||
| @@ -1041,18 +1046,6 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
| 1041 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1042 | } | 1047 | } |
| 1043 | 1048 | ||
| 1044 | /* Maximum number of blocks we map for direct IO at once. */ | ||
| 1045 | #define DIO_MAX_BLOCKS 4096 | ||
| 1046 | /* | ||
| 1047 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
| 1048 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
| 1049 | * For B blocks with A block pointers per block we need: | ||
| 1050 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
| 1051 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
| 1052 | */ | ||
| 1053 | #define DIO_CREDITS 25 | ||
| 1054 | |||
| 1055 | |||
| 1056 | /* | 1049 | /* |
| 1057 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, | 1050 | * The ext4_get_blocks_wrap() function try to look up the requested blocks, |
| 1058 | * and returns if the blocks are already mapped. | 1051 | * and returns if the blocks are already mapped. |
| @@ -1164,19 +1157,23 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1164 | return retval; | 1157 | return retval; |
| 1165 | } | 1158 | } |
| 1166 | 1159 | ||
| 1160 | /* Maximum number of blocks we map for direct IO at once. */ | ||
| 1161 | #define DIO_MAX_BLOCKS 4096 | ||
| 1162 | |||
| 1167 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, |
| 1168 | struct buffer_head *bh_result, int create) | 1164 | struct buffer_head *bh_result, int create) |
| 1169 | { | 1165 | { |
| 1170 | handle_t *handle = ext4_journal_current_handle(); | 1166 | handle_t *handle = ext4_journal_current_handle(); |
| 1171 | int ret = 0, started = 0; | 1167 | int ret = 0, started = 0; |
| 1172 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 1168 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
| 1169 | int dio_credits; | ||
| 1173 | 1170 | ||
| 1174 | if (create && !handle) { | 1171 | if (create && !handle) { |
| 1175 | /* Direct IO write... */ | 1172 | /* Direct IO write... */ |
| 1176 | if (max_blocks > DIO_MAX_BLOCKS) | 1173 | if (max_blocks > DIO_MAX_BLOCKS) |
| 1177 | max_blocks = DIO_MAX_BLOCKS; | 1174 | max_blocks = DIO_MAX_BLOCKS; |
| 1178 | handle = ext4_journal_start(inode, DIO_CREDITS + | 1175 | dio_credits = ext4_chunk_trans_blocks(inode, max_blocks); |
| 1179 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | 1176 | handle = ext4_journal_start(inode, dio_credits); |
| 1180 | if (IS_ERR(handle)) { | 1177 | if (IS_ERR(handle)) { |
| 1181 | ret = PTR_ERR(handle); | 1178 | ret = PTR_ERR(handle); |
| 1182 | goto out; | 1179 | goto out; |
| @@ -1559,7 +1556,25 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
| 1559 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1556 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1560 | int total, mdb, mdb_free, release; | 1557 | int total, mdb, mdb_free, release; |
| 1561 | 1558 | ||
| 1559 | if (!to_free) | ||
| 1560 | return; /* Nothing to release, exit */ | ||
| 1561 | |||
| 1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1562 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1563 | |||
| 1564 | if (!EXT4_I(inode)->i_reserved_data_blocks) { | ||
| 1565 | /* | ||
| 1566 | * if there is no reserved blocks, but we try to free some | ||
| 1567 | * then the counter is messed up somewhere. | ||
| 1568 | * but since this function is called from invalidate | ||
| 1569 | * page, it's harmless to return without any action | ||
| 1570 | */ | ||
| 1571 | printk(KERN_INFO "ext4 delalloc try to release %d reserved " | ||
| 1572 | "blocks for inode %lu, but there is no reserved " | ||
| 1573 | "data blocks\n", to_free, inode->i_ino); | ||
| 1574 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | ||
| 1575 | return; | ||
| 1576 | } | ||
| 1577 | |||
| 1563 | /* recalculate the number of metablocks still need to be reserved */ | 1578 | /* recalculate the number of metablocks still need to be reserved */ |
| 1564 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; | 1579 | total = EXT4_I(inode)->i_reserved_data_blocks - to_free; |
| 1565 | mdb = ext4_calc_metadata_amount(inode, total); | 1580 | mdb = ext4_calc_metadata_amount(inode, total); |
| @@ -1613,11 +1628,13 @@ struct mpage_da_data { | |||
| 1613 | unsigned long first_page, next_page; /* extent of pages */ | 1628 | unsigned long first_page, next_page; /* extent of pages */ |
| 1614 | get_block_t *get_block; | 1629 | get_block_t *get_block; |
| 1615 | struct writeback_control *wbc; | 1630 | struct writeback_control *wbc; |
| 1631 | int io_done; | ||
| 1632 | long pages_written; | ||
| 1616 | }; | 1633 | }; |
| 1617 | 1634 | ||
| 1618 | /* | 1635 | /* |
| 1619 | * mpage_da_submit_io - walks through extent of pages and try to write | 1636 | * mpage_da_submit_io - walks through extent of pages and try to write |
| 1620 | * them with __mpage_writepage() | 1637 | * them with writepage() call back |
| 1621 | * | 1638 | * |
| 1622 | * @mpd->inode: inode | 1639 | * @mpd->inode: inode |
| 1623 | * @mpd->first_page: first page of the extent | 1640 | * @mpd->first_page: first page of the extent |
| @@ -1632,18 +1649,11 @@ struct mpage_da_data { | |||
| 1632 | static int mpage_da_submit_io(struct mpage_da_data *mpd) | 1649 | static int mpage_da_submit_io(struct mpage_da_data *mpd) |
| 1633 | { | 1650 | { |
| 1634 | struct address_space *mapping = mpd->inode->i_mapping; | 1651 | struct address_space *mapping = mpd->inode->i_mapping; |
| 1635 | struct mpage_data mpd_pp = { | ||
| 1636 | .bio = NULL, | ||
| 1637 | .last_block_in_bio = 0, | ||
| 1638 | .get_block = mpd->get_block, | ||
| 1639 | .use_writepage = 1, | ||
| 1640 | }; | ||
| 1641 | int ret = 0, err, nr_pages, i; | 1652 | int ret = 0, err, nr_pages, i; |
| 1642 | unsigned long index, end; | 1653 | unsigned long index, end; |
| 1643 | struct pagevec pvec; | 1654 | struct pagevec pvec; |
| 1644 | 1655 | ||
| 1645 | BUG_ON(mpd->next_page <= mpd->first_page); | 1656 | BUG_ON(mpd->next_page <= mpd->first_page); |
| 1646 | |||
| 1647 | pagevec_init(&pvec, 0); | 1657 | pagevec_init(&pvec, 0); |
| 1648 | index = mpd->first_page; | 1658 | index = mpd->first_page; |
| 1649 | end = mpd->next_page - 1; | 1659 | end = mpd->next_page - 1; |
| @@ -1661,8 +1671,9 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
| 1661 | break; | 1671 | break; |
| 1662 | index++; | 1672 | index++; |
| 1663 | 1673 | ||
| 1664 | err = __mpage_writepage(page, mpd->wbc, &mpd_pp); | 1674 | err = mapping->a_ops->writepage(page, mpd->wbc); |
| 1665 | 1675 | if (!err) | |
| 1676 | mpd->pages_written++; | ||
| 1666 | /* | 1677 | /* |
| 1667 | * In error case, we have to continue because | 1678 | * In error case, we have to continue because |
| 1668 | * remaining pages are still locked | 1679 | * remaining pages are still locked |
| @@ -1673,9 +1684,6 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
| 1673 | } | 1684 | } |
| 1674 | pagevec_release(&pvec); | 1685 | pagevec_release(&pvec); |
| 1675 | } | 1686 | } |
| 1676 | if (mpd_pp.bio) | ||
| 1677 | mpage_bio_submit(WRITE, mpd_pp.bio); | ||
| 1678 | |||
| 1679 | return ret; | 1687 | return ret; |
| 1680 | } | 1688 | } |
| 1681 | 1689 | ||
| @@ -1698,7 +1706,7 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
| 1698 | int blocks = exbh->b_size >> inode->i_blkbits; | 1706 | int blocks = exbh->b_size >> inode->i_blkbits; |
| 1699 | sector_t pblock = exbh->b_blocknr, cur_logical; | 1707 | sector_t pblock = exbh->b_blocknr, cur_logical; |
| 1700 | struct buffer_head *head, *bh; | 1708 | struct buffer_head *head, *bh; |
| 1701 | unsigned long index, end; | 1709 | pgoff_t index, end; |
| 1702 | struct pagevec pvec; | 1710 | struct pagevec pvec; |
| 1703 | int nr_pages, i; | 1711 | int nr_pages, i; |
| 1704 | 1712 | ||
| @@ -1741,6 +1749,13 @@ static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd, sector_t logical, | |||
| 1741 | if (buffer_delay(bh)) { | 1749 | if (buffer_delay(bh)) { |
| 1742 | bh->b_blocknr = pblock; | 1750 | bh->b_blocknr = pblock; |
| 1743 | clear_buffer_delay(bh); | 1751 | clear_buffer_delay(bh); |
| 1752 | bh->b_bdev = inode->i_sb->s_bdev; | ||
| 1753 | } else if (buffer_unwritten(bh)) { | ||
| 1754 | bh->b_blocknr = pblock; | ||
| 1755 | clear_buffer_unwritten(bh); | ||
| 1756 | set_buffer_mapped(bh); | ||
| 1757 | set_buffer_new(bh); | ||
| 1758 | bh->b_bdev = inode->i_sb->s_bdev; | ||
| 1744 | } else if (buffer_mapped(bh)) | 1759 | } else if (buffer_mapped(bh)) |
| 1745 | BUG_ON(bh->b_blocknr != pblock); | 1760 | BUG_ON(bh->b_blocknr != pblock); |
| 1746 | 1761 | ||
| @@ -1776,13 +1791,11 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
| 1776 | * | 1791 | * |
| 1777 | * The function skips space we know is already mapped to disk blocks. | 1792 | * The function skips space we know is already mapped to disk blocks. |
| 1778 | * | 1793 | * |
| 1779 | * The function ignores errors ->get_block() returns, thus real | ||
| 1780 | * error handling is postponed to __mpage_writepage() | ||
| 1781 | */ | 1794 | */ |
| 1782 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) |
| 1783 | { | 1796 | { |
| 1797 | int err = 0; | ||
| 1784 | struct buffer_head *lbh = &mpd->lbh; | 1798 | struct buffer_head *lbh = &mpd->lbh; |
| 1785 | int err = 0, remain = lbh->b_size; | ||
| 1786 | sector_t next = lbh->b_blocknr; | 1799 | sector_t next = lbh->b_blocknr; |
| 1787 | struct buffer_head new; | 1800 | struct buffer_head new; |
| 1788 | 1801 | ||
| @@ -1792,38 +1805,36 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 1792 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
| 1793 | return; | 1806 | return; |
| 1794 | 1807 | ||
| 1795 | while (remain) { | 1808 | new.b_state = lbh->b_state; |
| 1796 | new.b_state = lbh->b_state; | 1809 | new.b_blocknr = 0; |
| 1797 | new.b_blocknr = 0; | 1810 | new.b_size = lbh->b_size; |
| 1798 | new.b_size = remain; | ||
| 1799 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
| 1800 | if (err) { | ||
| 1801 | /* | ||
| 1802 | * Rather than implement own error handling | ||
| 1803 | * here, we just leave remaining blocks | ||
| 1804 | * unallocated and try again with ->writepage() | ||
| 1805 | */ | ||
| 1806 | break; | ||
| 1807 | } | ||
| 1808 | BUG_ON(new.b_size == 0); | ||
| 1809 | 1811 | ||
| 1810 | if (buffer_new(&new)) | 1812 | /* |
| 1811 | __unmap_underlying_blocks(mpd->inode, &new); | 1813 | * If we didn't accumulate anything |
| 1814 | * to write simply return | ||
| 1815 | */ | ||
| 1816 | if (!new.b_size) | ||
| 1817 | return; | ||
| 1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | ||
| 1819 | if (err) | ||
| 1820 | return; | ||
| 1821 | BUG_ON(new.b_size == 0); | ||
| 1812 | 1822 | ||
| 1813 | /* | 1823 | if (buffer_new(&new)) |
| 1814 | * If blocks are delayed marked, we need to | 1824 | __unmap_underlying_blocks(mpd->inode, &new); |
| 1815 | * put actual blocknr and drop delayed bit | ||
| 1816 | */ | ||
| 1817 | if (buffer_delay(lbh)) | ||
| 1818 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
| 1819 | 1825 | ||
| 1820 | /* go for the remaining blocks */ | 1826 | /* |
| 1821 | next += new.b_size >> mpd->inode->i_blkbits; | 1827 | * If blocks are delayed marked, we need to |
| 1822 | remain -= new.b_size; | 1828 | * put actual blocknr and drop delayed bit |
| 1823 | } | 1829 | */ |
| 1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | ||
| 1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | ||
| 1832 | |||
| 1833 | return; | ||
| 1824 | } | 1834 | } |
| 1825 | 1835 | ||
| 1826 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | (1 << BH_Delay)) | 1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
| 1837 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
| 1827 | 1838 | ||
| 1828 | /* | 1839 | /* |
| 1829 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | 1840 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks |
| @@ -1837,41 +1848,61 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 1837 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, | 1848 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, |
| 1838 | sector_t logical, struct buffer_head *bh) | 1849 | sector_t logical, struct buffer_head *bh) |
| 1839 | { | 1850 | { |
| 1840 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1841 | sector_t next; | 1851 | sector_t next; |
| 1852 | size_t b_size = bh->b_size; | ||
| 1853 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1854 | int nrblocks = lbh->b_size >> mpd->inode->i_blkbits; | ||
| 1842 | 1855 | ||
| 1843 | next = lbh->b_blocknr + (lbh->b_size >> mpd->inode->i_blkbits); | 1856 | /* check if thereserved journal credits might overflow */ |
| 1844 | 1857 | if (!(EXT4_I(mpd->inode)->i_flags & EXT4_EXTENTS_FL)) { | |
| 1858 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
| 1859 | /* | ||
| 1860 | * With non-extent format we are limited by the journal | ||
| 1861 | * credit available. Total credit needed to insert | ||
| 1862 | * nrblocks contiguous blocks is dependent on the | ||
| 1863 | * nrblocks. So limit nrblocks. | ||
| 1864 | */ | ||
| 1865 | goto flush_it; | ||
| 1866 | } else if ((nrblocks + (b_size >> mpd->inode->i_blkbits)) > | ||
| 1867 | EXT4_MAX_TRANS_DATA) { | ||
| 1868 | /* | ||
| 1869 | * Adding the new buffer_head would make it cross the | ||
| 1870 | * allowed limit for which we have journal credit | ||
| 1871 | * reserved. So limit the new bh->b_size | ||
| 1872 | */ | ||
| 1873 | b_size = (EXT4_MAX_TRANS_DATA - nrblocks) << | ||
| 1874 | mpd->inode->i_blkbits; | ||
| 1875 | /* we will do mpage_da_submit_io in the next loop */ | ||
| 1876 | } | ||
| 1877 | } | ||
| 1845 | /* | 1878 | /* |
| 1846 | * First block in the extent | 1879 | * First block in the extent |
| 1847 | */ | 1880 | */ |
| 1848 | if (lbh->b_size == 0) { | 1881 | if (lbh->b_size == 0) { |
| 1849 | lbh->b_blocknr = logical; | 1882 | lbh->b_blocknr = logical; |
| 1850 | lbh->b_size = bh->b_size; | 1883 | lbh->b_size = b_size; |
| 1851 | lbh->b_state = bh->b_state & BH_FLAGS; | 1884 | lbh->b_state = bh->b_state & BH_FLAGS; |
| 1852 | return; | 1885 | return; |
| 1853 | } | 1886 | } |
| 1854 | 1887 | ||
| 1888 | next = lbh->b_blocknr + nrblocks; | ||
| 1855 | /* | 1889 | /* |
| 1856 | * Can we merge the block to our big extent? | 1890 | * Can we merge the block to our big extent? |
| 1857 | */ | 1891 | */ |
| 1858 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { | 1892 | if (logical == next && (bh->b_state & BH_FLAGS) == lbh->b_state) { |
| 1859 | lbh->b_size += bh->b_size; | 1893 | lbh->b_size += b_size; |
| 1860 | return; | 1894 | return; |
| 1861 | } | 1895 | } |
| 1862 | 1896 | ||
| 1897 | flush_it: | ||
| 1863 | /* | 1898 | /* |
| 1864 | * We couldn't merge the block to our extent, so we | 1899 | * We couldn't merge the block to our extent, so we |
| 1865 | * need to flush current extent and start new one | 1900 | * need to flush current extent and start new one |
| 1866 | */ | 1901 | */ |
| 1867 | mpage_da_map_blocks(mpd); | 1902 | mpage_da_map_blocks(mpd); |
| 1868 | 1903 | mpage_da_submit_io(mpd); | |
| 1869 | /* | 1904 | mpd->io_done = 1; |
| 1870 | * Now start a new extent | 1905 | return; |
| 1871 | */ | ||
| 1872 | lbh->b_size = bh->b_size; | ||
| 1873 | lbh->b_state = bh->b_state & BH_FLAGS; | ||
| 1874 | lbh->b_blocknr = logical; | ||
| 1875 | } | 1906 | } |
| 1876 | 1907 | ||
| 1877 | /* | 1908 | /* |
| @@ -1891,17 +1922,35 @@ static int __mpage_da_writepage(struct page *page, | |||
| 1891 | struct buffer_head *bh, *head, fake; | 1922 | struct buffer_head *bh, *head, fake; |
| 1892 | sector_t logical; | 1923 | sector_t logical; |
| 1893 | 1924 | ||
| 1925 | if (mpd->io_done) { | ||
| 1926 | /* | ||
| 1927 | * Rest of the page in the page_vec | ||
| 1928 | * redirty then and skip then. We will | ||
| 1929 | * try to to write them again after | ||
| 1930 | * starting a new transaction | ||
| 1931 | */ | ||
| 1932 | redirty_page_for_writepage(wbc, page); | ||
| 1933 | unlock_page(page); | ||
| 1934 | return MPAGE_DA_EXTENT_TAIL; | ||
| 1935 | } | ||
| 1894 | /* | 1936 | /* |
| 1895 | * Can we merge this page to current extent? | 1937 | * Can we merge this page to current extent? |
| 1896 | */ | 1938 | */ |
| 1897 | if (mpd->next_page != page->index) { | 1939 | if (mpd->next_page != page->index) { |
| 1898 | /* | 1940 | /* |
| 1899 | * Nope, we can't. So, we map non-allocated blocks | 1941 | * Nope, we can't. So, we map non-allocated blocks |
| 1900 | * and start IO on them using __mpage_writepage() | 1942 | * and start IO on them using writepage() |
| 1901 | */ | 1943 | */ |
| 1902 | if (mpd->next_page != mpd->first_page) { | 1944 | if (mpd->next_page != mpd->first_page) { |
| 1903 | mpage_da_map_blocks(mpd); | 1945 | mpage_da_map_blocks(mpd); |
| 1904 | mpage_da_submit_io(mpd); | 1946 | mpage_da_submit_io(mpd); |
| 1947 | /* | ||
| 1948 | * skip rest of the page in the page_vec | ||
| 1949 | */ | ||
| 1950 | mpd->io_done = 1; | ||
| 1951 | redirty_page_for_writepage(wbc, page); | ||
| 1952 | unlock_page(page); | ||
| 1953 | return MPAGE_DA_EXTENT_TAIL; | ||
| 1905 | } | 1954 | } |
| 1906 | 1955 | ||
| 1907 | /* | 1956 | /* |
| @@ -1932,6 +1981,8 @@ static int __mpage_da_writepage(struct page *page, | |||
| 1932 | set_buffer_dirty(bh); | 1981 | set_buffer_dirty(bh); |
| 1933 | set_buffer_uptodate(bh); | 1982 | set_buffer_uptodate(bh); |
| 1934 | mpage_add_bh_to_extent(mpd, logical, bh); | 1983 | mpage_add_bh_to_extent(mpd, logical, bh); |
| 1984 | if (mpd->io_done) | ||
| 1985 | return MPAGE_DA_EXTENT_TAIL; | ||
| 1935 | } else { | 1986 | } else { |
| 1936 | /* | 1987 | /* |
| 1937 | * Page with regular buffer heads, just add all dirty ones | 1988 | * Page with regular buffer heads, just add all dirty ones |
| @@ -1940,8 +1991,12 @@ static int __mpage_da_writepage(struct page *page, | |||
| 1940 | bh = head; | 1991 | bh = head; |
| 1941 | do { | 1992 | do { |
| 1942 | BUG_ON(buffer_locked(bh)); | 1993 | BUG_ON(buffer_locked(bh)); |
| 1943 | if (buffer_dirty(bh)) | 1994 | if (buffer_dirty(bh) && |
| 1995 | (!buffer_mapped(bh) || buffer_delay(bh))) { | ||
| 1944 | mpage_add_bh_to_extent(mpd, logical, bh); | 1996 | mpage_add_bh_to_extent(mpd, logical, bh); |
| 1997 | if (mpd->io_done) | ||
| 1998 | return MPAGE_DA_EXTENT_TAIL; | ||
| 1999 | } | ||
| 1945 | logical++; | 2000 | logical++; |
| 1946 | } while ((bh = bh->b_this_page) != head); | 2001 | } while ((bh = bh->b_this_page) != head); |
| 1947 | } | 2002 | } |
| @@ -1960,22 +2015,13 @@ static int __mpage_da_writepage(struct page *page, | |||
| 1960 | * | 2015 | * |
| 1961 | * This is a library function, which implements the writepages() | 2016 | * This is a library function, which implements the writepages() |
| 1962 | * address_space_operation. | 2017 | * address_space_operation. |
| 1963 | * | ||
| 1964 | * In order to avoid duplication of logic that deals with partial pages, | ||
| 1965 | * multiple bio per page, etc, we find non-allocated blocks, allocate | ||
| 1966 | * them with minimal calls to ->get_block() and re-use __mpage_writepage() | ||
| 1967 | * | ||
| 1968 | * It's important that we call __mpage_writepage() only once for each | ||
| 1969 | * involved page, otherwise we'd have to implement more complicated logic | ||
| 1970 | * to deal with pages w/o PG_lock or w/ PG_writeback and so on. | ||
| 1971 | * | ||
| 1972 | * See comments to mpage_writepages() | ||
| 1973 | */ | 2018 | */ |
| 1974 | static int mpage_da_writepages(struct address_space *mapping, | 2019 | static int mpage_da_writepages(struct address_space *mapping, |
| 1975 | struct writeback_control *wbc, | 2020 | struct writeback_control *wbc, |
| 1976 | get_block_t get_block) | 2021 | get_block_t get_block) |
| 1977 | { | 2022 | { |
| 1978 | struct mpage_da_data mpd; | 2023 | struct mpage_da_data mpd; |
| 2024 | long to_write; | ||
| 1979 | int ret; | 2025 | int ret; |
| 1980 | 2026 | ||
| 1981 | if (!get_block) | 2027 | if (!get_block) |
| @@ -1989,17 +2035,22 @@ static int mpage_da_writepages(struct address_space *mapping, | |||
| 1989 | mpd.first_page = 0; | 2035 | mpd.first_page = 0; |
| 1990 | mpd.next_page = 0; | 2036 | mpd.next_page = 0; |
| 1991 | mpd.get_block = get_block; | 2037 | mpd.get_block = get_block; |
| 2038 | mpd.io_done = 0; | ||
| 2039 | mpd.pages_written = 0; | ||
| 2040 | |||
| 2041 | to_write = wbc->nr_to_write; | ||
| 1992 | 2042 | ||
| 1993 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | 2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); |
| 1994 | 2044 | ||
| 1995 | /* | 2045 | /* |
| 1996 | * Handle last extent of pages | 2046 | * Handle last extent of pages |
| 1997 | */ | 2047 | */ |
| 1998 | if (mpd.next_page != mpd.first_page) { | 2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { |
| 1999 | mpage_da_map_blocks(&mpd); | 2049 | mpage_da_map_blocks(&mpd); |
| 2000 | mpage_da_submit_io(&mpd); | 2050 | mpage_da_submit_io(&mpd); |
| 2001 | } | 2051 | } |
| 2002 | 2052 | ||
| 2053 | wbc->nr_to_write = to_write - mpd.pages_written; | ||
| 2003 | return ret; | 2054 | return ret; |
| 2004 | } | 2055 | } |
| 2005 | 2056 | ||
| @@ -2204,63 +2255,95 @@ static int ext4_da_writepage(struct page *page, | |||
| 2204 | } | 2255 | } |
| 2205 | 2256 | ||
| 2206 | /* | 2257 | /* |
| 2207 | * For now just follow the DIO way to estimate the max credits | 2258 | * This is called via ext4_da_writepages() to |
| 2208 | * needed to write out EXT4_MAX_WRITEBACK_PAGES. | 2259 | * calulate the total number of credits to reserve to fit |
| 2209 | * todo: need to calculate the max credits need for | 2260 | * a single extent allocation into a single transaction, |
| 2210 | * extent based files, currently the DIO credits is based on | 2261 | * ext4_da_writpeages() will loop calling this before |
| 2211 | * indirect-blocks mapping way. | 2262 | * the block allocation. |
| 2212 | * | ||
| 2213 | * Probably should have a generic way to calculate credits | ||
| 2214 | * for DIO, writepages, and truncate | ||
| 2215 | */ | 2263 | */ |
| 2216 | #define EXT4_MAX_WRITEBACK_PAGES DIO_MAX_BLOCKS | 2264 | |
| 2217 | #define EXT4_MAX_WRITEBACK_CREDITS DIO_CREDITS | 2265 | static int ext4_da_writepages_trans_blocks(struct inode *inode) |
| 2266 | { | ||
| 2267 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | ||
| 2268 | |||
| 2269 | /* | ||
| 2270 | * With non-extent format the journal credit needed to | ||
| 2271 | * insert nrblocks contiguous block is dependent on | ||
| 2272 | * number of contiguous block. So we will limit | ||
| 2273 | * number of contiguous block to a sane value | ||
| 2274 | */ | ||
| 2275 | if (!(inode->i_flags & EXT4_EXTENTS_FL) && | ||
| 2276 | (max_blocks > EXT4_MAX_TRANS_DATA)) | ||
| 2277 | max_blocks = EXT4_MAX_TRANS_DATA; | ||
| 2278 | |||
| 2279 | return ext4_chunk_trans_blocks(inode, max_blocks); | ||
| 2280 | } | ||
| 2218 | 2281 | ||
| 2219 | static int ext4_da_writepages(struct address_space *mapping, | 2282 | static int ext4_da_writepages(struct address_space *mapping, |
| 2220 | struct writeback_control *wbc) | 2283 | struct writeback_control *wbc) |
| 2221 | { | 2284 | { |
| 2222 | struct inode *inode = mapping->host; | ||
| 2223 | handle_t *handle = NULL; | 2285 | handle_t *handle = NULL; |
| 2224 | int needed_blocks; | ||
| 2225 | int ret = 0; | ||
| 2226 | long to_write; | ||
| 2227 | loff_t range_start = 0; | 2286 | loff_t range_start = 0; |
| 2287 | struct inode *inode = mapping->host; | ||
| 2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | ||
| 2289 | long to_write, pages_skipped = 0; | ||
| 2290 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | ||
| 2228 | 2291 | ||
| 2229 | /* | 2292 | /* |
| 2230 | * No pages to write? This is mainly a kludge to avoid starting | 2293 | * No pages to write? This is mainly a kludge to avoid starting |
| 2231 | * a transaction for special inodes like journal inode on last iput() | 2294 | * a transaction for special inodes like journal inode on last iput() |
| 2232 | * because that could violate lock ordering on umount | 2295 | * because that could violate lock ordering on umount |
| 2233 | */ | 2296 | */ |
| 2234 | if (!mapping->nrpages) | 2297 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
| 2235 | return 0; | 2298 | return 0; |
| 2236 | |||
| 2237 | /* | 2299 | /* |
| 2238 | * Estimate the worse case needed credits to write out | 2300 | * Make sure nr_to_write is >= sbi->s_mb_stream_request |
| 2239 | * EXT4_MAX_BUF_BLOCKS pages | 2301 | * This make sure small files blocks are allocated in |
| 2302 | * single attempt. This ensure that small files | ||
| 2303 | * get less fragmented. | ||
| 2240 | */ | 2304 | */ |
| 2241 | needed_blocks = EXT4_MAX_WRITEBACK_CREDITS; | 2305 | if (wbc->nr_to_write < sbi->s_mb_stream_request) { |
| 2306 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | ||
| 2307 | wbc->nr_to_write = sbi->s_mb_stream_request; | ||
| 2308 | } | ||
| 2242 | 2309 | ||
| 2243 | to_write = wbc->nr_to_write; | 2310 | if (!wbc->range_cyclic) |
| 2244 | if (!wbc->range_cyclic) { | ||
| 2245 | /* | 2311 | /* |
| 2246 | * If range_cyclic is not set force range_cont | 2312 | * If range_cyclic is not set force range_cont |
| 2247 | * and save the old writeback_index | 2313 | * and save the old writeback_index |
| 2248 | */ | 2314 | */ |
| 2249 | wbc->range_cont = 1; | 2315 | wbc->range_cont = 1; |
| 2250 | range_start = wbc->range_start; | ||
| 2251 | } | ||
| 2252 | 2316 | ||
| 2253 | while (!ret && to_write) { | 2317 | range_start = wbc->range_start; |
| 2318 | pages_skipped = wbc->pages_skipped; | ||
| 2319 | |||
| 2320 | restart_loop: | ||
| 2321 | to_write = wbc->nr_to_write; | ||
| 2322 | while (!ret && to_write > 0) { | ||
| 2323 | |||
| 2324 | /* | ||
| 2325 | * we insert one extent at a time. So we need | ||
| 2326 | * credit needed for single extent allocation. | ||
| 2327 | * journalled mode is currently not supported | ||
| 2328 | * by delalloc | ||
| 2329 | */ | ||
| 2330 | BUG_ON(ext4_should_journal_data(inode)); | ||
| 2331 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | ||
| 2332 | |||
| 2254 | /* start a new transaction*/ | 2333 | /* start a new transaction*/ |
| 2255 | handle = ext4_journal_start(inode, needed_blocks); | 2334 | handle = ext4_journal_start(inode, needed_blocks); |
| 2256 | if (IS_ERR(handle)) { | 2335 | if (IS_ERR(handle)) { |
| 2257 | ret = PTR_ERR(handle); | 2336 | ret = PTR_ERR(handle); |
| 2337 | printk(KERN_EMERG "%s: jbd2_start: " | ||
| 2338 | "%ld pages, ino %lu; err %d\n", __func__, | ||
| 2339 | wbc->nr_to_write, inode->i_ino, ret); | ||
| 2340 | dump_stack(); | ||
| 2258 | goto out_writepages; | 2341 | goto out_writepages; |
| 2259 | } | 2342 | } |
| 2260 | if (ext4_should_order_data(inode)) { | 2343 | if (ext4_should_order_data(inode)) { |
| 2261 | /* | 2344 | /* |
| 2262 | * With ordered mode we need to add | 2345 | * With ordered mode we need to add |
| 2263 | * the inode to the journal handle | 2346 | * the inode to the journal handl |
| 2264 | * when we do block allocation. | 2347 | * when we do block allocation. |
| 2265 | */ | 2348 | */ |
| 2266 | ret = ext4_jbd2_file_inode(handle, inode); | 2349 | ret = ext4_jbd2_file_inode(handle, inode); |
| @@ -2268,20 +2351,20 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2268 | ext4_journal_stop(handle); | 2351 | ext4_journal_stop(handle); |
| 2269 | goto out_writepages; | 2352 | goto out_writepages; |
| 2270 | } | 2353 | } |
| 2271 | |||
| 2272 | } | 2354 | } |
| 2273 | /* | ||
| 2274 | * set the max dirty pages could be write at a time | ||
| 2275 | * to fit into the reserved transaction credits | ||
| 2276 | */ | ||
| 2277 | if (wbc->nr_to_write > EXT4_MAX_WRITEBACK_PAGES) | ||
| 2278 | wbc->nr_to_write = EXT4_MAX_WRITEBACK_PAGES; | ||
| 2279 | 2355 | ||
| 2280 | to_write -= wbc->nr_to_write; | 2356 | to_write -= wbc->nr_to_write; |
| 2281 | ret = mpage_da_writepages(mapping, wbc, | 2357 | ret = mpage_da_writepages(mapping, wbc, |
| 2282 | ext4_da_get_block_write); | 2358 | ext4_da_get_block_write); |
| 2283 | ext4_journal_stop(handle); | 2359 | ext4_journal_stop(handle); |
| 2284 | if (wbc->nr_to_write) { | 2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { |
| 2361 | /* | ||
| 2362 | * got one extent now try with | ||
| 2363 | * rest of the pages | ||
| 2364 | */ | ||
| 2365 | to_write += wbc->nr_to_write; | ||
| 2366 | ret = 0; | ||
| 2367 | } else if (wbc->nr_to_write) { | ||
| 2285 | /* | 2368 | /* |
| 2286 | * There is no more writeout needed | 2369 | * There is no more writeout needed |
| 2287 | * or we requested for a noblocking writeout | 2370 | * or we requested for a noblocking writeout |
| @@ -2293,10 +2376,18 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2293 | wbc->nr_to_write = to_write; | 2376 | wbc->nr_to_write = to_write; |
| 2294 | } | 2377 | } |
| 2295 | 2378 | ||
| 2296 | out_writepages: | 2379 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { |
| 2297 | wbc->nr_to_write = to_write; | 2380 | /* We skipped pages in this loop */ |
| 2298 | if (range_start) | ||
| 2299 | wbc->range_start = range_start; | 2381 | wbc->range_start = range_start; |
| 2382 | wbc->nr_to_write = to_write + | ||
| 2383 | wbc->pages_skipped - pages_skipped; | ||
| 2384 | wbc->pages_skipped = pages_skipped; | ||
| 2385 | goto restart_loop; | ||
| 2386 | } | ||
| 2387 | |||
| 2388 | out_writepages: | ||
| 2389 | wbc->nr_to_write = to_write - nr_to_writebump; | ||
| 2390 | wbc->range_start = range_start; | ||
| 2300 | return ret; | 2391 | return ret; |
| 2301 | } | 2392 | } |
| 2302 | 2393 | ||
| @@ -3486,6 +3577,9 @@ void ext4_truncate(struct inode *inode) | |||
| 3486 | * modify the block allocation tree. | 3577 | * modify the block allocation tree. |
| 3487 | */ | 3578 | */ |
| 3488 | down_write(&ei->i_data_sem); | 3579 | down_write(&ei->i_data_sem); |
| 3580 | |||
| 3581 | ext4_discard_reservation(inode); | ||
| 3582 | |||
| 3489 | /* | 3583 | /* |
| 3490 | * The orphan list entry will now protect us from any crash which | 3584 | * The orphan list entry will now protect us from any crash which |
| 3491 | * occurs before the truncate completes, so it is now safe to propagate | 3585 | * occurs before the truncate completes, so it is now safe to propagate |
| @@ -3555,8 +3649,6 @@ do_indirects: | |||
| 3555 | ; | 3649 | ; |
| 3556 | } | 3650 | } |
| 3557 | 3651 | ||
| 3558 | ext4_discard_reservation(inode); | ||
| 3559 | |||
| 3560 | up_write(&ei->i_data_sem); | 3652 | up_write(&ei->i_data_sem); |
| 3561 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3653 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
| 3562 | ext4_mark_inode_dirty(handle, inode); | 3654 | ext4_mark_inode_dirty(handle, inode); |
| @@ -4324,57 +4416,129 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 4324 | return 0; | 4416 | return 0; |
| 4325 | } | 4417 | } |
| 4326 | 4418 | ||
| 4419 | static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | ||
| 4420 | int chunk) | ||
| 4421 | { | ||
| 4422 | int indirects; | ||
| 4423 | |||
| 4424 | /* if nrblocks are contiguous */ | ||
| 4425 | if (chunk) { | ||
| 4426 | /* | ||
| 4427 | * With N contiguous data blocks, it need at most | ||
| 4428 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | ||
| 4429 | * 2 dindirect blocks | ||
| 4430 | * 1 tindirect block | ||
| 4431 | */ | ||
| 4432 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | ||
| 4433 | return indirects + 3; | ||
| 4434 | } | ||
| 4435 | /* | ||
| 4436 | * if nrblocks are not contiguous, worse case, each block touch | ||
| 4437 | * a indirect block, and each indirect block touch a double indirect | ||
| 4438 | * block, plus a triple indirect block | ||
| 4439 | */ | ||
| 4440 | indirects = nrblocks * 2 + 1; | ||
| 4441 | return indirects; | ||
| 4442 | } | ||
| 4443 | |||
| 4444 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
| 4445 | { | ||
| 4446 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
| 4447 | return ext4_indirect_trans_blocks(inode, nrblocks, 0); | ||
| 4448 | return ext4_ext_index_trans_blocks(inode, nrblocks, 0); | ||
| 4449 | } | ||
| 4327 | /* | 4450 | /* |
| 4328 | * How many blocks doth make a writepage()? | 4451 | * Account for index blocks, block groups bitmaps and block group |
| 4329 | * | 4452 | * descriptor blocks if modify datablocks and index blocks |
| 4330 | * With N blocks per page, it may be: | 4453 | * worse case, the indexs blocks spread over different block groups |
| 4331 | * N data blocks | ||
| 4332 | * 2 indirect block | ||
| 4333 | * 2 dindirect | ||
| 4334 | * 1 tindirect | ||
| 4335 | * N+5 bitmap blocks (from the above) | ||
| 4336 | * N+5 group descriptor summary blocks | ||
| 4337 | * 1 inode block | ||
| 4338 | * 1 superblock. | ||
| 4339 | * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files | ||
| 4340 | * | 4454 | * |
| 4341 | * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS | 4455 | * If datablocks are discontiguous, they are possible to spread over |
| 4456 | * different block groups too. If they are contiugous, with flexbg, | ||
| 4457 | * they could still across block group boundary. | ||
| 4342 | * | 4458 | * |
| 4343 | * With ordered or writeback data it's the same, less the N data blocks. | 4459 | * Also account for superblock, inode, quota and xattr blocks |
| 4460 | */ | ||
| 4461 | int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | ||
| 4462 | { | ||
| 4463 | int groups, gdpblocks; | ||
| 4464 | int idxblocks; | ||
| 4465 | int ret = 0; | ||
| 4466 | |||
| 4467 | /* | ||
| 4468 | * How many index blocks need to touch to modify nrblocks? | ||
| 4469 | * The "Chunk" flag indicating whether the nrblocks is | ||
| 4470 | * physically contiguous on disk | ||
| 4471 | * | ||
| 4472 | * For Direct IO and fallocate, they calls get_block to allocate | ||
| 4473 | * one single extent at a time, so they could set the "Chunk" flag | ||
| 4474 | */ | ||
| 4475 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | ||
| 4476 | |||
| 4477 | ret = idxblocks; | ||
| 4478 | |||
| 4479 | /* | ||
| 4480 | * Now let's see how many group bitmaps and group descriptors need | ||
| 4481 | * to account | ||
| 4482 | */ | ||
| 4483 | groups = idxblocks; | ||
| 4484 | if (chunk) | ||
| 4485 | groups += 1; | ||
| 4486 | else | ||
| 4487 | groups += nrblocks; | ||
| 4488 | |||
| 4489 | gdpblocks = groups; | ||
| 4490 | if (groups > EXT4_SB(inode->i_sb)->s_groups_count) | ||
| 4491 | groups = EXT4_SB(inode->i_sb)->s_groups_count; | ||
| 4492 | if (groups > EXT4_SB(inode->i_sb)->s_gdb_count) | ||
| 4493 | gdpblocks = EXT4_SB(inode->i_sb)->s_gdb_count; | ||
| 4494 | |||
| 4495 | /* bitmaps and block group descriptor blocks */ | ||
| 4496 | ret += groups + gdpblocks; | ||
| 4497 | |||
| 4498 | /* Blocks for super block, inode, quota and xattr blocks */ | ||
| 4499 | ret += EXT4_META_TRANS_BLOCKS(inode->i_sb); | ||
| 4500 | |||
| 4501 | return ret; | ||
| 4502 | } | ||
| 4503 | |||
| 4504 | /* | ||
| 4505 | * Calulate the total number of credits to reserve to fit | ||
| 4506 | * the modification of a single pages into a single transaction, | ||
| 4507 | * which may include multiple chunks of block allocations. | ||
| 4344 | * | 4508 | * |
| 4345 | * If the inode's direct blocks can hold an integral number of pages then a | 4509 | * This could be called via ext4_write_begin() |
| 4346 | * page cannot straddle two indirect blocks, and we can only touch one indirect | ||
| 4347 | * and dindirect block, and the "5" above becomes "3". | ||
| 4348 | * | 4510 | * |
| 4349 | * This still overestimates under most circumstances. If we were to pass the | 4511 | * We need to consider the worse case, when |
| 4350 | * start and end offsets in here as well we could do block_to_path() on each | 4512 | * one new block per extent. |
| 4351 | * block and work out the exact number of indirects which are touched. Pah. | ||
| 4352 | */ | 4513 | */ |
| 4353 | |||
| 4354 | int ext4_writepage_trans_blocks(struct inode *inode) | 4514 | int ext4_writepage_trans_blocks(struct inode *inode) |
| 4355 | { | 4515 | { |
| 4356 | int bpp = ext4_journal_blocks_per_page(inode); | 4516 | int bpp = ext4_journal_blocks_per_page(inode); |
| 4357 | int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3; | ||
| 4358 | int ret; | 4517 | int ret; |
| 4359 | 4518 | ||
| 4360 | if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) | 4519 | ret = ext4_meta_trans_blocks(inode, bpp, 0); |
| 4361 | return ext4_ext_writepage_trans_blocks(inode, bpp); | ||
| 4362 | 4520 | ||
| 4521 | /* Account for data blocks for journalled mode */ | ||
| 4363 | if (ext4_should_journal_data(inode)) | 4522 | if (ext4_should_journal_data(inode)) |
| 4364 | ret = 3 * (bpp + indirects) + 2; | 4523 | ret += bpp; |
| 4365 | else | ||
| 4366 | ret = 2 * (bpp + indirects) + 2; | ||
| 4367 | |||
| 4368 | #ifdef CONFIG_QUOTA | ||
| 4369 | /* We know that structure was already allocated during DQUOT_INIT so | ||
| 4370 | * we will be updating only the data blocks + inodes */ | ||
| 4371 | ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); | ||
| 4372 | #endif | ||
| 4373 | |||
| 4374 | return ret; | 4524 | return ret; |
| 4375 | } | 4525 | } |
| 4376 | 4526 | ||
| 4377 | /* | 4527 | /* |
| 4528 | * Calculate the journal credits for a chunk of data modification. | ||
| 4529 | * | ||
| 4530 | * This is called from DIO, fallocate or whoever calling | ||
| 4531 | * ext4_get_blocks_wrap() to map/allocate a chunk of contigous disk blocks. | ||
| 4532 | * | ||
| 4533 | * journal buffers for data blocks are not included here, as DIO | ||
| 4534 | * and fallocate do no need to journal data buffers. | ||
| 4535 | */ | ||
| 4536 | int ext4_chunk_trans_blocks(struct inode *inode, int nrblocks) | ||
| 4537 | { | ||
| 4538 | return ext4_meta_trans_blocks(inode, nrblocks, 1); | ||
| 4539 | } | ||
| 4540 | |||
| 4541 | /* | ||
| 4378 | * The caller must have previously called ext4_reserve_inode_write(). | 4542 | * The caller must have previously called ext4_reserve_inode_write(). |
| 4379 | * Give this, we know that the caller already has write access to iloc->bh. | 4543 | * Give this, we know that the caller already has write access to iloc->bh. |
| 4380 | */ | 4544 | */ |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 865e9ddb44d4..e0e3a5eb1ddb 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -3282,6 +3282,35 @@ static void ext4_mb_use_group_pa(struct ext4_allocation_context *ac, | |||
| 3282 | } | 3282 | } |
| 3283 | 3283 | ||
| 3284 | /* | 3284 | /* |
| 3285 | * Return the prealloc space that have minimal distance | ||
| 3286 | * from the goal block. @cpa is the prealloc | ||
| 3287 | * space that is having currently known minimal distance | ||
| 3288 | * from the goal block. | ||
| 3289 | */ | ||
| 3290 | static struct ext4_prealloc_space * | ||
| 3291 | ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | ||
| 3292 | struct ext4_prealloc_space *pa, | ||
| 3293 | struct ext4_prealloc_space *cpa) | ||
| 3294 | { | ||
| 3295 | ext4_fsblk_t cur_distance, new_distance; | ||
| 3296 | |||
| 3297 | if (cpa == NULL) { | ||
| 3298 | atomic_inc(&pa->pa_count); | ||
| 3299 | return pa; | ||
| 3300 | } | ||
| 3301 | cur_distance = abs(goal_block - cpa->pa_pstart); | ||
| 3302 | new_distance = abs(goal_block - pa->pa_pstart); | ||
| 3303 | |||
| 3304 | if (cur_distance < new_distance) | ||
| 3305 | return cpa; | ||
| 3306 | |||
| 3307 | /* drop the previous reference */ | ||
| 3308 | atomic_dec(&cpa->pa_count); | ||
| 3309 | atomic_inc(&pa->pa_count); | ||
| 3310 | return pa; | ||
| 3311 | } | ||
| 3312 | |||
| 3313 | /* | ||
| 3285 | * search goal blocks in preallocated space | 3314 | * search goal blocks in preallocated space |
| 3286 | */ | 3315 | */ |
| 3287 | static noinline_for_stack int | 3316 | static noinline_for_stack int |
| @@ -3290,7 +3319,8 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
| 3290 | int order, i; | 3319 | int order, i; |
| 3291 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); | 3320 | struct ext4_inode_info *ei = EXT4_I(ac->ac_inode); |
| 3292 | struct ext4_locality_group *lg; | 3321 | struct ext4_locality_group *lg; |
| 3293 | struct ext4_prealloc_space *pa; | 3322 | struct ext4_prealloc_space *pa, *cpa = NULL; |
| 3323 | ext4_fsblk_t goal_block; | ||
| 3294 | 3324 | ||
| 3295 | /* only data can be preallocated */ | 3325 | /* only data can be preallocated */ |
| 3296 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) | 3326 | if (!(ac->ac_flags & EXT4_MB_HINT_DATA)) |
| @@ -3333,6 +3363,13 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
| 3333 | /* The max size of hash table is PREALLOC_TB_SIZE */ | 3363 | /* The max size of hash table is PREALLOC_TB_SIZE */ |
| 3334 | order = PREALLOC_TB_SIZE - 1; | 3364 | order = PREALLOC_TB_SIZE - 1; |
| 3335 | 3365 | ||
| 3366 | goal_block = ac->ac_g_ex.fe_group * EXT4_BLOCKS_PER_GROUP(ac->ac_sb) + | ||
| 3367 | ac->ac_g_ex.fe_start + | ||
| 3368 | le32_to_cpu(EXT4_SB(ac->ac_sb)->s_es->s_first_data_block); | ||
| 3369 | /* | ||
| 3370 | * search for the prealloc space that is having | ||
| 3371 | * minimal distance from the goal block. | ||
| 3372 | */ | ||
| 3336 | for (i = order; i < PREALLOC_TB_SIZE; i++) { | 3373 | for (i = order; i < PREALLOC_TB_SIZE; i++) { |
| 3337 | rcu_read_lock(); | 3374 | rcu_read_lock(); |
| 3338 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], | 3375 | list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[i], |
| @@ -3340,17 +3377,19 @@ ext4_mb_use_preallocated(struct ext4_allocation_context *ac) | |||
| 3340 | spin_lock(&pa->pa_lock); | 3377 | spin_lock(&pa->pa_lock); |
| 3341 | if (pa->pa_deleted == 0 && | 3378 | if (pa->pa_deleted == 0 && |
| 3342 | pa->pa_free >= ac->ac_o_ex.fe_len) { | 3379 | pa->pa_free >= ac->ac_o_ex.fe_len) { |
| 3343 | atomic_inc(&pa->pa_count); | 3380 | |
| 3344 | ext4_mb_use_group_pa(ac, pa); | 3381 | cpa = ext4_mb_check_group_pa(goal_block, |
| 3345 | spin_unlock(&pa->pa_lock); | 3382 | pa, cpa); |
| 3346 | ac->ac_criteria = 20; | ||
| 3347 | rcu_read_unlock(); | ||
| 3348 | return 1; | ||
| 3349 | } | 3383 | } |
| 3350 | spin_unlock(&pa->pa_lock); | 3384 | spin_unlock(&pa->pa_lock); |
| 3351 | } | 3385 | } |
| 3352 | rcu_read_unlock(); | 3386 | rcu_read_unlock(); |
| 3353 | } | 3387 | } |
| 3388 | if (cpa) { | ||
| 3389 | ext4_mb_use_group_pa(ac, cpa); | ||
| 3390 | ac->ac_criteria = 20; | ||
| 3391 | return 1; | ||
| 3392 | } | ||
| 3354 | return 0; | 3393 | return 0; |
| 3355 | } | 3394 | } |
| 3356 | 3395 | ||
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b9e077ba07e9..46fc0b5b12ba 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -53,7 +53,8 @@ static int finish_range(handle_t *handle, struct inode *inode, | |||
| 53 | * credit. But below we try to not accumalate too much | 53 | * credit. But below we try to not accumalate too much |
| 54 | * of them by restarting the journal. | 54 | * of them by restarting the journal. |
| 55 | */ | 55 | */ |
| 56 | needed = ext4_ext_calc_credits_for_insert(inode, path); | 56 | needed = ext4_ext_calc_credits_for_single_extent(inode, |
| 57 | lb->last_block - lb->first_block + 1, path); | ||
| 57 | 58 | ||
| 58 | /* | 59 | /* |
| 59 | * Make sure the credit we accumalated is not really high | 60 | * Make sure the credit we accumalated is not really high |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 0a9265164265..b3d35604ea18 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -773,7 +773,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 773 | 773 | ||
| 774 | if (reserved_gdb || gdb_off == 0) { | 774 | if (reserved_gdb || gdb_off == 0) { |
| 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, | 775 | if (!EXT4_HAS_COMPAT_FEATURE(sb, |
| 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE)){ | 776 | EXT4_FEATURE_COMPAT_RESIZE_INODE) |
| 777 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
| 777 | ext4_warning(sb, __func__, | 778 | ext4_warning(sb, __func__, |
| 778 | "No reserved GDT blocks, can't resize"); | 779 | "No reserved GDT blocks, can't resize"); |
| 779 | return -EPERM; | 780 | return -EPERM; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index d5d77958b861..566344b926b7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -568,6 +568,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 568 | #endif | 568 | #endif |
| 569 | ei->i_block_alloc_info = NULL; | 569 | ei->i_block_alloc_info = NULL; |
| 570 | ei->vfs_inode.i_version = 1; | 570 | ei->vfs_inode.i_version = 1; |
| 571 | ei->vfs_inode.i_data.writeback_index = 0; | ||
| 571 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
| 572 | INIT_LIST_HEAD(&ei->i_prealloc_list); | 573 | INIT_LIST_HEAD(&ei->i_prealloc_list); |
| 573 | spin_lock_init(&ei->i_prealloc_lock); | 574 | spin_lock_init(&ei->i_prealloc_lock); |
diff --git a/fs/inotify_user.c b/fs/inotify_user.c index 60249429a253..d85c7d931cdf 100644 --- a/fs/inotify_user.c +++ b/fs/inotify_user.c | |||
| @@ -323,7 +323,7 @@ out: | |||
| 323 | } | 323 | } |
| 324 | 324 | ||
| 325 | /* | 325 | /* |
| 326 | * remove_kevent - cleans up and ultimately frees the given kevent | 326 | * remove_kevent - cleans up the given kevent |
| 327 | * | 327 | * |
| 328 | * Caller must hold dev->ev_mutex. | 328 | * Caller must hold dev->ev_mutex. |
| 329 | */ | 329 | */ |
| @@ -334,7 +334,13 @@ static void remove_kevent(struct inotify_device *dev, | |||
| 334 | 334 | ||
| 335 | dev->event_count--; | 335 | dev->event_count--; |
| 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; | 336 | dev->queue_size -= sizeof(struct inotify_event) + kevent->event.len; |
| 337 | } | ||
| 337 | 338 | ||
| 339 | /* | ||
| 340 | * free_kevent - frees the given kevent. | ||
| 341 | */ | ||
| 342 | static void free_kevent(struct inotify_kernel_event *kevent) | ||
| 343 | { | ||
| 338 | kfree(kevent->name); | 344 | kfree(kevent->name); |
| 339 | kmem_cache_free(event_cachep, kevent); | 345 | kmem_cache_free(event_cachep, kevent); |
| 340 | } | 346 | } |
| @@ -350,6 +356,7 @@ static void inotify_dev_event_dequeue(struct inotify_device *dev) | |||
| 350 | struct inotify_kernel_event *kevent; | 356 | struct inotify_kernel_event *kevent; |
| 351 | kevent = inotify_dev_get_event(dev); | 357 | kevent = inotify_dev_get_event(dev); |
| 352 | remove_kevent(dev, kevent); | 358 | remove_kevent(dev, kevent); |
| 359 | free_kevent(kevent); | ||
| 353 | } | 360 | } |
| 354 | } | 361 | } |
| 355 | 362 | ||
| @@ -433,17 +440,15 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
| 433 | dev = file->private_data; | 440 | dev = file->private_data; |
| 434 | 441 | ||
| 435 | while (1) { | 442 | while (1) { |
| 436 | int events; | ||
| 437 | 443 | ||
| 438 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); | 444 | prepare_to_wait(&dev->wq, &wait, TASK_INTERRUPTIBLE); |
| 439 | 445 | ||
| 440 | mutex_lock(&dev->ev_mutex); | 446 | mutex_lock(&dev->ev_mutex); |
| 441 | events = !list_empty(&dev->events); | 447 | if (!list_empty(&dev->events)) { |
| 442 | mutex_unlock(&dev->ev_mutex); | ||
| 443 | if (events) { | ||
| 444 | ret = 0; | 448 | ret = 0; |
| 445 | break; | 449 | break; |
| 446 | } | 450 | } |
| 451 | mutex_unlock(&dev->ev_mutex); | ||
| 447 | 452 | ||
| 448 | if (file->f_flags & O_NONBLOCK) { | 453 | if (file->f_flags & O_NONBLOCK) { |
| 449 | ret = -EAGAIN; | 454 | ret = -EAGAIN; |
| @@ -462,7 +467,6 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
| 462 | if (ret) | 467 | if (ret) |
| 463 | return ret; | 468 | return ret; |
| 464 | 469 | ||
| 465 | mutex_lock(&dev->ev_mutex); | ||
| 466 | while (1) { | 470 | while (1) { |
| 467 | struct inotify_kernel_event *kevent; | 471 | struct inotify_kernel_event *kevent; |
| 468 | 472 | ||
| @@ -481,6 +485,13 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
| 481 | } | 485 | } |
| 482 | break; | 486 | break; |
| 483 | } | 487 | } |
| 488 | remove_kevent(dev, kevent); | ||
| 489 | |||
| 490 | /* | ||
| 491 | * Must perform the copy_to_user outside the mutex in order | ||
| 492 | * to avoid a lock order reversal with mmap_sem. | ||
| 493 | */ | ||
| 494 | mutex_unlock(&dev->ev_mutex); | ||
| 484 | 495 | ||
| 485 | if (copy_to_user(buf, &kevent->event, event_size)) { | 496 | if (copy_to_user(buf, &kevent->event, event_size)) { |
| 486 | ret = -EFAULT; | 497 | ret = -EFAULT; |
| @@ -498,7 +509,9 @@ static ssize_t inotify_read(struct file *file, char __user *buf, | |||
| 498 | count -= kevent->event.len; | 509 | count -= kevent->event.len; |
| 499 | } | 510 | } |
| 500 | 511 | ||
| 501 | remove_kevent(dev, kevent); | 512 | free_kevent(kevent); |
| 513 | |||
| 514 | mutex_lock(&dev->ev_mutex); | ||
| 502 | } | 515 | } |
| 503 | mutex_unlock(&dev->ev_mutex); | 516 | mutex_unlock(&dev->ev_mutex); |
| 504 | 517 | ||
diff --git a/fs/jffs2/jffs2_fs_i.h b/fs/jffs2/jffs2_fs_i.h index 31559f45fdde..4c41db91eaa4 100644 --- a/fs/jffs2/jffs2_fs_i.h +++ b/fs/jffs2/jffs2_fs_i.h | |||
| @@ -12,7 +12,6 @@ | |||
| 12 | #ifndef _JFFS2_FS_I | 12 | #ifndef _JFFS2_FS_I |
| 13 | #define _JFFS2_FS_I | 13 | #define _JFFS2_FS_I |
| 14 | 14 | ||
| 15 | #include <linux/version.h> | ||
| 16 | #include <linux/rbtree.h> | 15 | #include <linux/rbtree.h> |
| 17 | #include <linux/posix_acl.h> | 16 | #include <linux/posix_acl.h> |
| 18 | #include <linux/mutex.h> | 17 | #include <linux/mutex.h> |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 9abcd2b329f7..e9b20173fef3 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -1279,6 +1279,12 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1279 | } | 1279 | } |
| 1280 | } | 1280 | } |
| 1281 | 1281 | ||
| 1282 | if (errors > 0) { | ||
| 1283 | dfprintk(MOUNT, "NFS: parsing encountered %d error%s\n", | ||
| 1284 | errors, (errors == 1 ? "" : "s")); | ||
| 1285 | if (!sloppy) | ||
| 1286 | return 0; | ||
| 1287 | } | ||
| 1282 | return 1; | 1288 | return 1; |
| 1283 | 1289 | ||
| 1284 | out_nomem: | 1290 | out_nomem: |
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c index b6ed38380ab8..54b8b4140c8f 100644 --- a/fs/nfsd/nfs4acl.c +++ b/fs/nfsd/nfs4acl.c | |||
| @@ -443,7 +443,7 @@ init_state(struct posix_acl_state *state, int cnt) | |||
| 443 | * enough space for either: | 443 | * enough space for either: |
| 444 | */ | 444 | */ |
| 445 | alloc = sizeof(struct posix_ace_state_array) | 445 | alloc = sizeof(struct posix_ace_state_array) |
| 446 | + cnt*sizeof(struct posix_ace_state); | 446 | + cnt*sizeof(struct posix_user_ace_state); |
| 447 | state->users = kzalloc(alloc, GFP_KERNEL); | 447 | state->users = kzalloc(alloc, GFP_KERNEL); |
| 448 | if (!state->users) | 448 | if (!state->users) |
| 449 | return -ENOMEM; | 449 | return -ENOMEM; |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2e51adac65de..e5b51ffafc6c 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -867,11 +867,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
| 867 | int slack_bytes; | 867 | int slack_bytes; |
| 868 | __be32 status; | 868 | __be32 status; |
| 869 | 869 | ||
| 870 | status = nfserr_resource; | ||
| 871 | cstate = cstate_alloc(); | ||
| 872 | if (cstate == NULL) | ||
| 873 | goto out; | ||
| 874 | |||
| 875 | resp->xbuf = &rqstp->rq_res; | 870 | resp->xbuf = &rqstp->rq_res; |
| 876 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; | 871 | resp->p = rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len; |
| 877 | resp->tagp = resp->p; | 872 | resp->tagp = resp->p; |
| @@ -890,6 +885,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, | |||
| 890 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) | 885 | if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) |
| 891 | goto out; | 886 | goto out; |
| 892 | 887 | ||
| 888 | status = nfserr_resource; | ||
| 889 | cstate = cstate_alloc(); | ||
| 890 | if (cstate == NULL) | ||
| 891 | goto out; | ||
| 892 | |||
| 893 | status = nfs_ok; | 893 | status = nfs_ok; |
| 894 | while (!status && resp->opcnt < args->opcnt) { | 894 | while (!status && resp->opcnt < args->opcnt) { |
| 895 | op = &args->ops[resp->opcnt++]; | 895 | op = &args->ops[resp->opcnt++]; |
| @@ -957,9 +957,9 @@ encode_op: | |||
| 957 | nfsd4_increment_op_stats(op->opnum); | 957 | nfsd4_increment_op_stats(op->opnum); |
| 958 | } | 958 | } |
| 959 | 959 | ||
| 960 | cstate_free(cstate); | ||
| 960 | out: | 961 | out: |
| 961 | nfsd4_release_compoundargs(args); | 962 | nfsd4_release_compoundargs(args); |
| 962 | cstate_free(cstate); | ||
| 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); | 963 | dprintk("nfsv4 compound returned %d\n", ntohl(status)); |
| 964 | return status; | 964 | return status; |
| 965 | } | 965 | } |
diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index e1781c8b1650..9e8a95be7a1e 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c | |||
| @@ -174,7 +174,6 @@ static struct dentry *ntfs_lookup(struct inode *dir_ino, struct dentry *dent, | |||
| 174 | // TODO: Consider moving this lot to a separate function! (AIA) | 174 | // TODO: Consider moving this lot to a separate function! (AIA) |
| 175 | handle_name: | 175 | handle_name: |
| 176 | { | 176 | { |
| 177 | struct dentry *real_dent, *new_dent; | ||
| 178 | MFT_RECORD *m; | 177 | MFT_RECORD *m; |
| 179 | ntfs_attr_search_ctx *ctx; | 178 | ntfs_attr_search_ctx *ctx; |
| 180 | ntfs_inode *ni = NTFS_I(dent_inode); | 179 | ntfs_inode *ni = NTFS_I(dent_inode); |
| @@ -255,93 +254,9 @@ handle_name: | |||
| 255 | } | 254 | } |
| 256 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); | 255 | nls_name.hash = full_name_hash(nls_name.name, nls_name.len); |
| 257 | 256 | ||
| 258 | /* | 257 | dent = d_add_ci(dent, dent_inode, &nls_name); |
| 259 | * Note: No need for dent->d_lock lock as i_mutex is held on the | ||
| 260 | * parent inode. | ||
| 261 | */ | ||
| 262 | |||
| 263 | /* Does a dentry matching the nls_name exist already? */ | ||
| 264 | real_dent = d_lookup(dent->d_parent, &nls_name); | ||
| 265 | /* If not, create it now. */ | ||
| 266 | if (!real_dent) { | ||
| 267 | real_dent = d_alloc(dent->d_parent, &nls_name); | ||
| 268 | kfree(nls_name.name); | ||
| 269 | if (!real_dent) { | ||
| 270 | err = -ENOMEM; | ||
| 271 | goto err_out; | ||
| 272 | } | ||
| 273 | new_dent = d_splice_alias(dent_inode, real_dent); | ||
| 274 | if (new_dent) | ||
| 275 | dput(real_dent); | ||
| 276 | else | ||
| 277 | new_dent = real_dent; | ||
| 278 | ntfs_debug("Done. (Created new dentry.)"); | ||
| 279 | return new_dent; | ||
| 280 | } | ||
| 281 | kfree(nls_name.name); | 258 | kfree(nls_name.name); |
| 282 | /* Matching dentry exists, check if it is negative. */ | 259 | return dent; |
| 283 | if (real_dent->d_inode) { | ||
| 284 | if (unlikely(real_dent->d_inode != dent_inode)) { | ||
| 285 | /* This can happen because bad inodes are unhashed. */ | ||
| 286 | BUG_ON(!is_bad_inode(dent_inode)); | ||
| 287 | BUG_ON(!is_bad_inode(real_dent->d_inode)); | ||
| 288 | } | ||
| 289 | /* | ||
| 290 | * Already have the inode and the dentry attached, decrement | ||
| 291 | * the reference count to balance the ntfs_iget() we did | ||
| 292 | * earlier on. We found the dentry using d_lookup() so it | ||
| 293 | * cannot be disconnected and thus we do not need to worry | ||
| 294 | * about any NFS/disconnectedness issues here. | ||
| 295 | */ | ||
| 296 | iput(dent_inode); | ||
| 297 | ntfs_debug("Done. (Already had inode and dentry.)"); | ||
| 298 | return real_dent; | ||
| 299 | } | ||
| 300 | /* | ||
| 301 | * Negative dentry: instantiate it unless the inode is a directory and | ||
| 302 | * has a 'disconnected' dentry (i.e. IS_ROOT and DCACHE_DISCONNECTED), | ||
| 303 | * in which case d_move() that in place of the found dentry. | ||
| 304 | */ | ||
| 305 | if (!S_ISDIR(dent_inode->i_mode)) { | ||
| 306 | /* Not a directory; everything is easy. */ | ||
| 307 | d_instantiate(real_dent, dent_inode); | ||
| 308 | ntfs_debug("Done. (Already had negative file dentry.)"); | ||
| 309 | return real_dent; | ||
| 310 | } | ||
| 311 | spin_lock(&dcache_lock); | ||
| 312 | if (list_empty(&dent_inode->i_dentry)) { | ||
| 313 | /* | ||
| 314 | * Directory without a 'disconnected' dentry; we need to do | ||
| 315 | * d_instantiate() by hand because it takes dcache_lock which | ||
| 316 | * we already hold. | ||
| 317 | */ | ||
| 318 | list_add(&real_dent->d_alias, &dent_inode->i_dentry); | ||
| 319 | real_dent->d_inode = dent_inode; | ||
| 320 | spin_unlock(&dcache_lock); | ||
| 321 | security_d_instantiate(real_dent, dent_inode); | ||
| 322 | ntfs_debug("Done. (Already had negative directory dentry.)"); | ||
| 323 | return real_dent; | ||
| 324 | } | ||
| 325 | /* | ||
| 326 | * Directory with a 'disconnected' dentry; get a reference to the | ||
| 327 | * 'disconnected' dentry. | ||
| 328 | */ | ||
| 329 | new_dent = list_entry(dent_inode->i_dentry.next, struct dentry, | ||
| 330 | d_alias); | ||
| 331 | dget_locked(new_dent); | ||
| 332 | spin_unlock(&dcache_lock); | ||
| 333 | /* Do security vodoo. */ | ||
| 334 | security_d_instantiate(real_dent, dent_inode); | ||
| 335 | /* Move new_dent in place of real_dent. */ | ||
| 336 | d_move(new_dent, real_dent); | ||
| 337 | /* Balance the ntfs_iget() we did above. */ | ||
| 338 | iput(dent_inode); | ||
| 339 | /* Throw away real_dent. */ | ||
| 340 | dput(real_dent); | ||
| 341 | /* Use new_dent as the actual dentry. */ | ||
| 342 | ntfs_debug("Done. (Already had negative, disconnected directory " | ||
| 343 | "dentry.)"); | ||
| 344 | return new_dent; | ||
| 345 | 260 | ||
| 346 | eio_err_out: | 261 | eio_err_out: |
| 347 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); | 262 | ntfs_error(vol->sb, "Illegal file name attribute. Run chkdsk."); |
diff --git a/fs/ntfs/usnjrnl.h b/fs/ntfs/usnjrnl.h index 3a8af75351e8..4087fbdac327 100644 --- a/fs/ntfs/usnjrnl.h +++ b/fs/ntfs/usnjrnl.h | |||
| @@ -113,7 +113,7 @@ typedef struct { | |||
| 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the | 113 | * Reason flags (32-bit). Cumulative flags describing the change(s) to the |
| 114 | * file since it was last opened. I think the names speak for themselves but | 114 | * file since it was last opened. I think the names speak for themselves but |
| 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS | 115 | * if you disagree check out the descriptions in the Linux NTFS project NTFS |
| 116 | * documentation: http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 116 | * documentation: http://www.linux-ntfs.org/ |
| 117 | */ | 117 | */ |
| 118 | enum { | 118 | enum { |
| 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), | 119 | USN_REASON_DATA_OVERWRITE = const_cpu_to_le32(0x00000001), |
| @@ -145,7 +145,7 @@ typedef le32 USN_REASON_FLAGS; | |||
| 145 | * Source info flags (32-bit). Information about the source of the change(s) | 145 | * Source info flags (32-bit). Information about the source of the change(s) |
| 146 | * to the file. For detailed descriptions of what these mean, see the Linux | 146 | * to the file. For detailed descriptions of what these mean, see the Linux |
| 147 | * NTFS project NTFS documentation: | 147 | * NTFS project NTFS documentation: |
| 148 | * http://linux-ntfs.sourceforge.net/ntfs/files/usnjrnl.html | 148 | * http://www.linux-ntfs.org/ |
| 149 | */ | 149 | */ |
| 150 | enum { | 150 | enum { |
| 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), | 151 | USN_SOURCE_DATA_MANAGEMENT = const_cpu_to_le32(0x00000001), |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 506c24fb5078..a53da1466277 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -594,7 +594,7 @@ static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock, | |||
| 594 | goto bail; | 594 | goto bail; |
| 595 | } | 595 | } |
| 596 | 596 | ||
| 597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno) { | 597 | if (!ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)) && !p_blkno && create) { |
| 598 | ocfs2_error(inode->i_sb, | 598 | ocfs2_error(inode->i_sb, |
| 599 | "Inode %llu has a hole at block %llu\n", | 599 | "Inode %llu has a hole at block %llu\n", |
| 600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | 600 | (unsigned long long)OCFS2_I(inode)->ip_blkno, |
diff --git a/fs/ocfs2/cluster/netdebug.c b/fs/ocfs2/cluster/netdebug.c index d8bfa0eb41b2..52276c02f710 100644 --- a/fs/ocfs2/cluster/netdebug.c +++ b/fs/ocfs2/cluster/netdebug.c | |||
| @@ -138,20 +138,20 @@ static int nst_seq_show(struct seq_file *seq, void *v) | |||
| 138 | " message id: %d\n" | 138 | " message id: %d\n" |
| 139 | " message type: %u\n" | 139 | " message type: %u\n" |
| 140 | " message key: 0x%08x\n" | 140 | " message key: 0x%08x\n" |
| 141 | " sock acquiry: %lu.%lu\n" | 141 | " sock acquiry: %lu.%ld\n" |
| 142 | " send start: %lu.%lu\n" | 142 | " send start: %lu.%ld\n" |
| 143 | " wait start: %lu.%lu\n", | 143 | " wait start: %lu.%ld\n", |
| 144 | nst, (unsigned long)nst->st_task->pid, | 144 | nst, (unsigned long)nst->st_task->pid, |
| 145 | (unsigned long)nst->st_task->tgid, | 145 | (unsigned long)nst->st_task->tgid, |
| 146 | nst->st_task->comm, nst->st_node, | 146 | nst->st_task->comm, nst->st_node, |
| 147 | nst->st_sc, nst->st_id, nst->st_msg_type, | 147 | nst->st_sc, nst->st_id, nst->st_msg_type, |
| 148 | nst->st_msg_key, | 148 | nst->st_msg_key, |
| 149 | nst->st_sock_time.tv_sec, | 149 | nst->st_sock_time.tv_sec, |
| 150 | (unsigned long)nst->st_sock_time.tv_usec, | 150 | (long)nst->st_sock_time.tv_usec, |
| 151 | nst->st_send_time.tv_sec, | 151 | nst->st_send_time.tv_sec, |
| 152 | (unsigned long)nst->st_send_time.tv_usec, | 152 | (long)nst->st_send_time.tv_usec, |
| 153 | nst->st_status_time.tv_sec, | 153 | nst->st_status_time.tv_sec, |
| 154 | nst->st_status_time.tv_usec); | 154 | (long)nst->st_status_time.tv_usec); |
| 155 | } | 155 | } |
| 156 | 156 | ||
| 157 | spin_unlock(&o2net_debug_lock); | 157 | spin_unlock(&o2net_debug_lock); |
| @@ -276,7 +276,7 @@ static void *sc_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |||
| 276 | return sc; /* unused, just needs to be null when done */ | 276 | return sc; /* unused, just needs to be null when done */ |
| 277 | } | 277 | } |
| 278 | 278 | ||
| 279 | #define TV_SEC_USEC(TV) TV.tv_sec, (unsigned long)TV.tv_usec | 279 | #define TV_SEC_USEC(TV) TV.tv_sec, (long)TV.tv_usec |
| 280 | 280 | ||
| 281 | static int sc_seq_show(struct seq_file *seq, void *v) | 281 | static int sc_seq_show(struct seq_file *seq, void *v) |
| 282 | { | 282 | { |
| @@ -309,12 +309,12 @@ static int sc_seq_show(struct seq_file *seq, void *v) | |||
| 309 | " remote node: %s\n" | 309 | " remote node: %s\n" |
| 310 | " page off: %zu\n" | 310 | " page off: %zu\n" |
| 311 | " handshake ok: %u\n" | 311 | " handshake ok: %u\n" |
| 312 | " timer: %lu.%lu\n" | 312 | " timer: %lu.%ld\n" |
| 313 | " data ready: %lu.%lu\n" | 313 | " data ready: %lu.%ld\n" |
| 314 | " advance start: %lu.%lu\n" | 314 | " advance start: %lu.%ld\n" |
| 315 | " advance stop: %lu.%lu\n" | 315 | " advance stop: %lu.%ld\n" |
| 316 | " func start: %lu.%lu\n" | 316 | " func start: %lu.%ld\n" |
| 317 | " func stop: %lu.%lu\n" | 317 | " func stop: %lu.%ld\n" |
| 318 | " func key: %u\n" | 318 | " func key: %u\n" |
| 319 | " func type: %u\n", | 319 | " func type: %u\n", |
| 320 | sc, | 320 | sc, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index a27d61581bd6..2bcf706d9dd3 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
| @@ -143,8 +143,8 @@ static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); | |||
| 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); | 143 | static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc); |
| 144 | 144 | ||
| 145 | #ifdef CONFIG_DEBUG_FS | 145 | #ifdef CONFIG_DEBUG_FS |
| 146 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | 146 | static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, |
| 147 | u32 msgkey, struct task_struct *task, u8 node) | 147 | u32 msgkey, struct task_struct *task, u8 node) |
| 148 | { | 148 | { |
| 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); | 149 | INIT_LIST_HEAD(&nst->st_net_debug_item); |
| 150 | nst->st_task = task; | 150 | nst->st_task = task; |
| @@ -153,31 +153,61 @@ void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | |||
| 153 | nst->st_node = node; | 153 | nst->st_node = node; |
| 154 | } | 154 | } |
| 155 | 155 | ||
| 156 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | 156 | static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) |
| 157 | { | 157 | { |
| 158 | do_gettimeofday(&nst->st_sock_time); | 158 | do_gettimeofday(&nst->st_sock_time); |
| 159 | } | 159 | } |
| 160 | 160 | ||
| 161 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | 161 | static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) |
| 162 | { | 162 | { |
| 163 | do_gettimeofday(&nst->st_send_time); | 163 | do_gettimeofday(&nst->st_send_time); |
| 164 | } | 164 | } |
| 165 | 165 | ||
| 166 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | 166 | static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) |
| 167 | { | 167 | { |
| 168 | do_gettimeofday(&nst->st_status_time); | 168 | do_gettimeofday(&nst->st_status_time); |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | 171 | static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, |
| 172 | struct o2net_sock_container *sc) | 172 | struct o2net_sock_container *sc) |
| 173 | { | 173 | { |
| 174 | nst->st_sc = sc; | 174 | nst->st_sc = sc; |
| 175 | } | 175 | } |
| 176 | 176 | ||
| 177 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) | 177 | static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) |
| 178 | { | 178 | { |
| 179 | nst->st_id = msg_id; | 179 | nst->st_id = msg_id; |
| 180 | } | 180 | } |
| 181 | |||
| 182 | #else /* CONFIG_DEBUG_FS */ | ||
| 183 | |||
| 184 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
| 185 | u32 msgkey, struct task_struct *task, u8 node) | ||
| 186 | { | ||
| 187 | } | ||
| 188 | |||
| 189 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
| 190 | { | ||
| 191 | } | ||
| 192 | |||
| 193 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
| 194 | { | ||
| 195 | } | ||
| 196 | |||
| 197 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
| 198 | { | ||
| 199 | } | ||
| 200 | |||
| 201 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
| 202 | struct o2net_sock_container *sc) | ||
| 203 | { | ||
| 204 | } | ||
| 205 | |||
| 206 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
| 207 | u32 msg_id) | ||
| 208 | { | ||
| 209 | } | ||
| 210 | |||
| 181 | #endif /* CONFIG_DEBUG_FS */ | 211 | #endif /* CONFIG_DEBUG_FS */ |
| 182 | 212 | ||
| 183 | static inline int o2net_reconnect_delay(void) | 213 | static inline int o2net_reconnect_delay(void) |
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h index 18307ff81b77..8d58cfe410b1 100644 --- a/fs/ocfs2/cluster/tcp_internal.h +++ b/fs/ocfs2/cluster/tcp_internal.h | |||
| @@ -224,42 +224,10 @@ struct o2net_send_tracking { | |||
| 224 | struct timeval st_send_time; | 224 | struct timeval st_send_time; |
| 225 | struct timeval st_status_time; | 225 | struct timeval st_status_time; |
| 226 | }; | 226 | }; |
| 227 | |||
| 228 | void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
| 229 | u32 msgkey, struct task_struct *task, u8 node); | ||
| 230 | void o2net_set_nst_sock_time(struct o2net_send_tracking *nst); | ||
| 231 | void o2net_set_nst_send_time(struct o2net_send_tracking *nst); | ||
| 232 | void o2net_set_nst_status_time(struct o2net_send_tracking *nst); | ||
| 233 | void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
| 234 | struct o2net_sock_container *sc); | ||
| 235 | void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id); | ||
| 236 | |||
| 237 | #else | 227 | #else |
| 238 | struct o2net_send_tracking { | 228 | struct o2net_send_tracking { |
| 239 | u32 dummy; | 229 | u32 dummy; |
| 240 | }; | 230 | }; |
| 241 | |||
| 242 | static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, | ||
| 243 | u32 msgkey, struct task_struct *task, u8 node) | ||
| 244 | { | ||
| 245 | } | ||
| 246 | static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) | ||
| 247 | { | ||
| 248 | } | ||
| 249 | static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) | ||
| 250 | { | ||
| 251 | } | ||
| 252 | static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) | ||
| 253 | { | ||
| 254 | } | ||
| 255 | static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, | ||
| 256 | struct o2net_sock_container *sc) | ||
| 257 | { | ||
| 258 | } | ||
| 259 | static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, | ||
| 260 | u32 msg_id) | ||
| 261 | { | ||
| 262 | } | ||
| 263 | #endif /* CONFIG_DEBUG_FS */ | 231 | #endif /* CONFIG_DEBUG_FS */ |
| 264 | 232 | ||
| 265 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ | 233 | #endif /* O2CLUSTER_TCP_INTERNAL_H */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 8a1875848080..9cce563fd627 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -1300,7 +1300,6 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1300 | di->i_size = cpu_to_le64(sb->s_blocksize); | 1300 | di->i_size = cpu_to_le64(sb->s_blocksize); |
| 1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 1301 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
| 1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 1302 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
| 1303 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
| 1304 | 1303 | ||
| 1305 | /* | 1304 | /* |
| 1306 | * This should never fail as our extent list is empty and all | 1305 | * This should never fail as our extent list is empty and all |
| @@ -1310,9 +1309,15 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1310 | NULL); | 1309 | NULL); |
| 1311 | if (ret) { | 1310 | if (ret) { |
| 1312 | mlog_errno(ret); | 1311 | mlog_errno(ret); |
| 1313 | goto out; | 1312 | goto out_commit; |
| 1314 | } | 1313 | } |
| 1315 | 1314 | ||
| 1315 | /* | ||
| 1316 | * Set i_blocks after the extent insert for the most up to | ||
| 1317 | * date ip_clusters value. | ||
| 1318 | */ | ||
| 1319 | dir->i_blocks = ocfs2_inode_sector_count(dir); | ||
| 1320 | |||
| 1316 | ret = ocfs2_journal_dirty(handle, di_bh); | 1321 | ret = ocfs2_journal_dirty(handle, di_bh); |
| 1317 | if (ret) { | 1322 | if (ret) { |
| 1318 | mlog_errno(ret); | 1323 | mlog_errno(ret); |
| @@ -1336,7 +1341,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1336 | len, 0, NULL); | 1341 | len, 0, NULL); |
| 1337 | if (ret) { | 1342 | if (ret) { |
| 1338 | mlog_errno(ret); | 1343 | mlog_errno(ret); |
| 1339 | goto out; | 1344 | goto out_commit; |
| 1340 | } | 1345 | } |
| 1341 | } | 1346 | } |
| 1342 | 1347 | ||
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 7a37240f7a31..c47bc2a809c2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1418,13 +1418,13 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1418 | { | 1418 | { |
| 1419 | unsigned int node_num; | 1419 | unsigned int node_num; |
| 1420 | int status, i; | 1420 | int status, i; |
| 1421 | u32 gen; | ||
| 1421 | struct buffer_head *bh = NULL; | 1422 | struct buffer_head *bh = NULL; |
| 1422 | struct ocfs2_dinode *di; | 1423 | struct ocfs2_dinode *di; |
| 1423 | 1424 | ||
| 1424 | /* This is called with the super block cluster lock, so we | 1425 | /* This is called with the super block cluster lock, so we |
| 1425 | * know that the slot map can't change underneath us. */ | 1426 | * know that the slot map can't change underneath us. */ |
| 1426 | 1427 | ||
| 1427 | spin_lock(&osb->osb_lock); | ||
| 1428 | for (i = 0; i < osb->max_slots; i++) { | 1428 | for (i = 0; i < osb->max_slots; i++) { |
| 1429 | /* Read journal inode to get the recovery generation */ | 1429 | /* Read journal inode to get the recovery generation */ |
| 1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); | 1430 | status = ocfs2_read_journal_inode(osb, i, &bh, NULL); |
| @@ -1433,23 +1433,31 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1433 | goto bail; | 1433 | goto bail; |
| 1434 | } | 1434 | } |
| 1435 | di = (struct ocfs2_dinode *)bh->b_data; | 1435 | di = (struct ocfs2_dinode *)bh->b_data; |
| 1436 | osb->slot_recovery_generations[i] = | 1436 | gen = ocfs2_get_recovery_generation(di); |
| 1437 | ocfs2_get_recovery_generation(di); | ||
| 1438 | brelse(bh); | 1437 | brelse(bh); |
| 1439 | bh = NULL; | 1438 | bh = NULL; |
| 1440 | 1439 | ||
| 1440 | spin_lock(&osb->osb_lock); | ||
| 1441 | osb->slot_recovery_generations[i] = gen; | ||
| 1442 | |||
| 1441 | mlog(0, "Slot %u recovery generation is %u\n", i, | 1443 | mlog(0, "Slot %u recovery generation is %u\n", i, |
| 1442 | osb->slot_recovery_generations[i]); | 1444 | osb->slot_recovery_generations[i]); |
| 1443 | 1445 | ||
| 1444 | if (i == osb->slot_num) | 1446 | if (i == osb->slot_num) { |
| 1447 | spin_unlock(&osb->osb_lock); | ||
| 1445 | continue; | 1448 | continue; |
| 1449 | } | ||
| 1446 | 1450 | ||
| 1447 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); | 1451 | status = ocfs2_slot_to_node_num_locked(osb, i, &node_num); |
| 1448 | if (status == -ENOENT) | 1452 | if (status == -ENOENT) { |
| 1453 | spin_unlock(&osb->osb_lock); | ||
| 1449 | continue; | 1454 | continue; |
| 1455 | } | ||
| 1450 | 1456 | ||
| 1451 | if (__ocfs2_recovery_map_test(osb, node_num)) | 1457 | if (__ocfs2_recovery_map_test(osb, node_num)) { |
| 1458 | spin_unlock(&osb->osb_lock); | ||
| 1452 | continue; | 1459 | continue; |
| 1460 | } | ||
| 1453 | spin_unlock(&osb->osb_lock); | 1461 | spin_unlock(&osb->osb_lock); |
| 1454 | 1462 | ||
| 1455 | /* Ok, we have a slot occupied by another node which | 1463 | /* Ok, we have a slot occupied by another node which |
| @@ -1465,10 +1473,7 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) | |||
| 1465 | mlog_errno(status); | 1473 | mlog_errno(status); |
| 1466 | goto bail; | 1474 | goto bail; |
| 1467 | } | 1475 | } |
| 1468 | |||
| 1469 | spin_lock(&osb->osb_lock); | ||
| 1470 | } | 1476 | } |
| 1471 | spin_unlock(&osb->osb_lock); | ||
| 1472 | 1477 | ||
| 1473 | status = 0; | 1478 | status = 0; |
| 1474 | bail: | 1479 | bail: |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 10e149ae5e3a..07f348b8d721 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
| @@ -97,13 +97,14 @@ static int ocfs2_stack_driver_request(const char *stack_name, | |||
| 97 | goto out; | 97 | goto out; |
| 98 | } | 98 | } |
| 99 | 99 | ||
| 100 | /* Ok, the stack is pinned */ | ||
| 101 | p->sp_count++; | ||
| 102 | active_stack = p; | 100 | active_stack = p; |
| 103 | |||
| 104 | rc = 0; | 101 | rc = 0; |
| 105 | 102 | ||
| 106 | out: | 103 | out: |
| 104 | /* If we found it, pin it */ | ||
| 105 | if (!rc) | ||
| 106 | active_stack->sp_count++; | ||
| 107 | |||
| 107 | spin_unlock(&ocfs2_stack_lock); | 108 | spin_unlock(&ocfs2_stack_lock); |
| 108 | return rc; | 109 | return rc; |
| 109 | } | 110 | } |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 7d6b34e201db..ecc3330972e5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
| @@ -499,9 +499,9 @@ int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | |||
| 499 | if (!size) | 499 | if (!size) |
| 500 | continue; | 500 | continue; |
| 501 | if (from + size > get_capacity(disk)) { | 501 | if (from + size > get_capacity(disk)) { |
| 502 | printk(KERN_ERR " %s: p%d exceeds device capacity\n", | 502 | printk(KERN_WARNING |
| 503 | "%s: p%d exceeds device capacity\n", | ||
| 503 | disk->disk_name, p); | 504 | disk->disk_name, p); |
| 504 | continue; | ||
| 505 | } | 505 | } |
| 506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 506 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
| 507 | if (res) { | 507 | if (res) { |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 0d6eb33597c6..71c9be59c9c2 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
| @@ -337,65 +337,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns, | |||
| 337 | return 0; | 337 | return 0; |
| 338 | } | 338 | } |
| 339 | 339 | ||
| 340 | /* | ||
| 341 | * Use precise platform statistics if available: | ||
| 342 | */ | ||
| 343 | #ifdef CONFIG_VIRT_CPU_ACCOUNTING | ||
| 344 | static cputime_t task_utime(struct task_struct *p) | ||
| 345 | { | ||
| 346 | return p->utime; | ||
| 347 | } | ||
| 348 | |||
| 349 | static cputime_t task_stime(struct task_struct *p) | ||
| 350 | { | ||
| 351 | return p->stime; | ||
| 352 | } | ||
| 353 | #else | ||
| 354 | static cputime_t task_utime(struct task_struct *p) | ||
| 355 | { | ||
| 356 | clock_t utime = cputime_to_clock_t(p->utime), | ||
| 357 | total = utime + cputime_to_clock_t(p->stime); | ||
| 358 | u64 temp; | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Use CFS's precise accounting: | ||
| 362 | */ | ||
| 363 | temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime); | ||
| 364 | |||
| 365 | if (total) { | ||
| 366 | temp *= utime; | ||
| 367 | do_div(temp, total); | ||
| 368 | } | ||
| 369 | utime = (clock_t)temp; | ||
| 370 | |||
| 371 | p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime)); | ||
| 372 | return p->prev_utime; | ||
| 373 | } | ||
| 374 | |||
| 375 | static cputime_t task_stime(struct task_struct *p) | ||
| 376 | { | ||
| 377 | clock_t stime; | ||
| 378 | |||
| 379 | /* | ||
| 380 | * Use CFS's precise accounting. (we subtract utime from | ||
| 381 | * the total, to make sure the total observed by userspace | ||
| 382 | * grows monotonically - apps rely on that): | ||
| 383 | */ | ||
| 384 | stime = nsec_to_clock_t(p->se.sum_exec_runtime) - | ||
| 385 | cputime_to_clock_t(task_utime(p)); | ||
| 386 | |||
| 387 | if (stime >= 0) | ||
| 388 | p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime)); | ||
| 389 | |||
| 390 | return p->prev_stime; | ||
| 391 | } | ||
| 392 | #endif | ||
| 393 | |||
| 394 | static cputime_t task_gtime(struct task_struct *p) | ||
| 395 | { | ||
| 396 | return p->gtime; | ||
| 397 | } | ||
| 398 | |||
| 399 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | 340 | static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, |
| 400 | struct pid *pid, struct task_struct *task, int whole) | 341 | struct pid *pid, struct task_struct *task, int whole) |
| 401 | { | 342 | { |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 4fb81e9c94e3..7821589a17d5 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
| @@ -330,6 +330,7 @@ retry: | |||
| 330 | spin_lock(&proc_inum_lock); | 330 | spin_lock(&proc_inum_lock); |
| 331 | ida_remove(&proc_inum_ida, i); | 331 | ida_remove(&proc_inum_ida, i); |
| 332 | spin_unlock(&proc_inum_lock); | 332 | spin_unlock(&proc_inum_lock); |
| 333 | return 0; | ||
| 333 | } | 334 | } |
| 334 | return PROC_DYNAMIC_FIRST + i; | 335 | return PROC_DYNAMIC_FIRST + i; |
| 335 | } | 336 | } |
| @@ -546,8 +547,8 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp | |||
| 546 | 547 | ||
| 547 | for (tmp = dir->subdir; tmp; tmp = tmp->next) | 548 | for (tmp = dir->subdir; tmp; tmp = tmp->next) |
| 548 | if (strcmp(tmp->name, dp->name) == 0) { | 549 | if (strcmp(tmp->name, dp->name) == 0) { |
| 549 | printk(KERN_WARNING "proc_dir_entry '%s' already " | 550 | printk(KERN_WARNING "proc_dir_entry '%s/%s' already registered\n", |
| 550 | "registered\n", dp->name); | 551 | dir->name, dp->name); |
| 551 | dump_stack(); | 552 | dump_stack(); |
| 552 | break; | 553 | break; |
| 553 | } | 554 | } |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index ded969862960..29e20c6b1f7f 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | #include <linux/tty.h> | 24 | #include <linux/tty.h> |
| 25 | #include <linux/string.h> | 25 | #include <linux/string.h> |
| 26 | #include <linux/mman.h> | 26 | #include <linux/mman.h> |
| 27 | #include <linux/quicklist.h> | ||
| 27 | #include <linux/proc_fs.h> | 28 | #include <linux/proc_fs.h> |
| 28 | #include <linux/ioport.h> | 29 | #include <linux/ioport.h> |
| 29 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
| @@ -182,6 +183,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 182 | "SReclaimable: %8lu kB\n" | 183 | "SReclaimable: %8lu kB\n" |
| 183 | "SUnreclaim: %8lu kB\n" | 184 | "SUnreclaim: %8lu kB\n" |
| 184 | "PageTables: %8lu kB\n" | 185 | "PageTables: %8lu kB\n" |
| 186 | #ifdef CONFIG_QUICKLIST | ||
| 187 | "Quicklists: %8lu kB\n" | ||
| 188 | #endif | ||
| 185 | "NFS_Unstable: %8lu kB\n" | 189 | "NFS_Unstable: %8lu kB\n" |
| 186 | "Bounce: %8lu kB\n" | 190 | "Bounce: %8lu kB\n" |
| 187 | "WritebackTmp: %8lu kB\n" | 191 | "WritebackTmp: %8lu kB\n" |
| @@ -214,6 +218,9 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 214 | K(global_page_state(NR_SLAB_RECLAIMABLE)), | 218 | K(global_page_state(NR_SLAB_RECLAIMABLE)), |
| 215 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), | 219 | K(global_page_state(NR_SLAB_UNRECLAIMABLE)), |
| 216 | K(global_page_state(NR_PAGETABLE)), | 220 | K(global_page_state(NR_PAGETABLE)), |
| 221 | #ifdef CONFIG_QUICKLIST | ||
| 222 | K(quicklist_total_size()), | ||
| 223 | #endif | ||
| 217 | K(global_page_state(NR_UNSTABLE_NFS)), | 224 | K(global_page_state(NR_UNSTABLE_NFS)), |
| 218 | K(global_page_state(NR_BOUNCE)), | 225 | K(global_page_state(NR_BOUNCE)), |
| 219 | K(global_page_state(NR_WRITEBACK_TEMP)), | 226 | K(global_page_state(NR_WRITEBACK_TEMP)), |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 52312ec93ff4..5145cb9125af 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -58,7 +58,7 @@ const struct inode_operations ramfs_file_inode_operations = { | |||
| 58 | * size 0 on the assumption that it's going to be used for an mmap of shared | 58 | * size 0 on the assumption that it's going to be used for an mmap of shared |
| 59 | * memory | 59 | * memory |
| 60 | */ | 60 | */ |
| 61 | static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | 61 | int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) |
| 62 | { | 62 | { |
| 63 | struct pagevec lru_pvec; | 63 | struct pagevec lru_pvec; |
| 64 | unsigned long npages, xpages, loop, limit; | 64 | unsigned long npages, xpages, loop, limit; |
diff --git a/fs/readdir.c b/fs/readdir.c index 4e026e5407fb..93a7559bbfd8 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
| @@ -80,8 +80,10 @@ static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset | |||
| 80 | if (buf->result) | 80 | if (buf->result) |
| 81 | return -EINVAL; | 81 | return -EINVAL; |
| 82 | d_ino = ino; | 82 | d_ino = ino; |
| 83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 83 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
| 84 | buf->result = -EOVERFLOW; | ||
| 84 | return -EOVERFLOW; | 85 | return -EOVERFLOW; |
| 86 | } | ||
| 85 | buf->result++; | 87 | buf->result++; |
| 86 | dirent = buf->dirent; | 88 | dirent = buf->dirent; |
| 87 | if (!access_ok(VERIFY_WRITE, dirent, | 89 | if (!access_ok(VERIFY_WRITE, dirent, |
| @@ -155,8 +157,10 @@ static int filldir(void * __buf, const char * name, int namlen, loff_t offset, | |||
| 155 | if (reclen > buf->count) | 157 | if (reclen > buf->count) |
| 156 | return -EINVAL; | 158 | return -EINVAL; |
| 157 | d_ino = ino; | 159 | d_ino = ino; |
| 158 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) | 160 | if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { |
| 161 | buf->error = -EOVERFLOW; | ||
| 159 | return -EOVERFLOW; | 162 | return -EOVERFLOW; |
| 163 | } | ||
| 160 | dirent = buf->previous; | 164 | dirent = buf->previous; |
| 161 | if (dirent) { | 165 | if (dirent) { |
| 162 | if (__put_user(offset, &dirent->d_off)) | 166 | if (__put_user(offset, &dirent->d_off)) |
diff --git a/fs/seq_file.c b/fs/seq_file.c index 5d54205e486b..bd20f7f5a933 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
| @@ -108,9 +108,9 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
| 108 | goto Done; | 108 | goto Done; |
| 109 | } | 109 | } |
| 110 | /* we need at least one record in buffer */ | 110 | /* we need at least one record in buffer */ |
| 111 | pos = m->index; | ||
| 112 | p = m->op->start(m, &pos); | ||
| 111 | while (1) { | 113 | while (1) { |
| 112 | pos = m->index; | ||
| 113 | p = m->op->start(m, &pos); | ||
| 114 | err = PTR_ERR(p); | 114 | err = PTR_ERR(p); |
| 115 | if (!p || IS_ERR(p)) | 115 | if (!p || IS_ERR(p)) |
| 116 | break; | 116 | break; |
| @@ -119,6 +119,11 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
| 119 | break; | 119 | break; |
| 120 | if (unlikely(err)) | 120 | if (unlikely(err)) |
| 121 | m->count = 0; | 121 | m->count = 0; |
| 122 | if (unlikely(!m->count)) { | ||
| 123 | p = m->op->next(m, p, &pos); | ||
| 124 | m->index = pos; | ||
| 125 | continue; | ||
| 126 | } | ||
| 122 | if (m->count < m->size) | 127 | if (m->count < m->size) |
| 123 | goto Fill; | 128 | goto Fill; |
| 124 | m->op->stop(m, p); | 129 | m->op->stop(m, p); |
| @@ -128,6 +133,8 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) | |||
| 128 | goto Enomem; | 133 | goto Enomem; |
| 129 | m->count = 0; | 134 | m->count = 0; |
| 130 | m->version = 0; | 135 | m->version = 0; |
| 136 | pos = m->index; | ||
| 137 | p = m->op->start(m, &pos); | ||
| 131 | } | 138 | } |
| 132 | m->op->stop(m, p); | 139 | m->op->stop(m, p); |
| 133 | m->count = 0; | 140 | m->count = 0; |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 154098157473..73db464cd08b 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
| @@ -302,18 +302,6 @@ long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs) | |||
| 302 | int subtract_lebs; | 302 | int subtract_lebs; |
| 303 | long long available; | 303 | long long available; |
| 304 | 304 | ||
| 305 | /* | ||
| 306 | * Force the amount available to the total size reported if the used | ||
| 307 | * space is zero. | ||
| 308 | */ | ||
| 309 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && | ||
| 310 | c->budg_data_growth + c->budg_dd_growth == 0) { | ||
| 311 | /* Do the same calculation as for c->block_cnt */ | ||
| 312 | available = c->main_lebs - 2; | ||
| 313 | available *= c->leb_size - c->dark_wm; | ||
| 314 | return available; | ||
| 315 | } | ||
| 316 | |||
| 317 | available = c->main_bytes - c->lst.total_used; | 305 | available = c->main_bytes - c->lst.total_used; |
| 318 | 306 | ||
| 319 | /* | 307 | /* |
| @@ -714,34 +702,106 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
| 714 | } | 702 | } |
| 715 | 703 | ||
| 716 | /** | 704 | /** |
| 717 | * ubifs_budg_get_free_space - return amount of free space. | 705 | * ubifs_reported_space - calculate reported free space. |
| 706 | * @c: the UBIFS file-system description object | ||
| 707 | * @free: amount of free space | ||
| 708 | * | ||
| 709 | * This function calculates amount of free space which will be reported to | ||
| 710 | * user-space. User-space application tend to expect that if the file-system | ||
| 711 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
| 712 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
| 713 | * node and it has to write indexind nodes as well. This introduces additional | ||
| 714 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
| 715 | * above expectetion. | ||
| 716 | * | ||
| 717 | * This function assumes free space is made up of uncompressed data nodes and | ||
| 718 | * full index nodes (one per data node, tripled because we always allow enough | ||
| 719 | * space to write the index thrice). | ||
| 720 | * | ||
| 721 | * Note, the calculation is pessimistic, which means that most of the time | ||
| 722 | * UBIFS reports less space than it actually has. | ||
| 723 | */ | ||
| 724 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free) | ||
| 725 | { | ||
| 726 | int divisor, factor, f; | ||
| 727 | |||
| 728 | /* | ||
| 729 | * Reported space size is @free * X, where X is UBIFS block size | ||
| 730 | * divided by UBIFS block size + all overhead one data block | ||
| 731 | * introduces. The overhead is the node header + indexing overhead. | ||
| 732 | * | ||
| 733 | * Indexing overhead calculations are based on the following formula: | ||
| 734 | * I = N/(f - 1) + 1, where I - number of indexing nodes, N - number | ||
| 735 | * of data nodes, f - fanout. Because effective UBIFS fanout is twice | ||
| 736 | * as less than maximum fanout, we assume that each data node | ||
| 737 | * introduces 3 * @c->max_idx_node_sz / (@c->fanout/2 - 1) bytes. | ||
| 738 | * Note, the multiplier 3 is because UBIFS reseves thrice as more space | ||
| 739 | * for the index. | ||
| 740 | */ | ||
| 741 | f = c->fanout > 3 ? c->fanout >> 1 : 2; | ||
| 742 | factor = UBIFS_BLOCK_SIZE; | ||
| 743 | divisor = UBIFS_MAX_DATA_NODE_SZ; | ||
| 744 | divisor += (c->max_idx_node_sz * 3) / (f - 1); | ||
| 745 | free *= factor; | ||
| 746 | do_div(free, divisor); | ||
| 747 | return free; | ||
| 748 | } | ||
| 749 | |||
| 750 | /** | ||
| 751 | * ubifs_get_free_space - return amount of free space. | ||
| 718 | * @c: UBIFS file-system description object | 752 | * @c: UBIFS file-system description object |
| 719 | * | 753 | * |
| 720 | * This function returns amount of free space on the file-system. | 754 | * This function calculates amount of free space to report to user-space. |
| 755 | * | ||
| 756 | * Because UBIFS may introduce substantial overhead (the index, node headers, | ||
| 757 | * alighment, wastage at the end of eraseblocks, etc), it cannot report real | ||
| 758 | * amount of free flash space it has (well, because not all dirty space is | ||
| 759 | * reclamable, UBIFS does not actually know the real amount). If UBIFS did so, | ||
| 760 | * it would bread user expectetion about what free space is. Users seem to | ||
| 761 | * accustomed to assume that if the file-system reports N bytes of free space, | ||
| 762 | * they would be able to fit a file of N bytes to the FS. This almost works for | ||
| 763 | * traditional file-systems, because they have way less overhead than UBIFS. | ||
| 764 | * So, to keep users happy, UBIFS tries to take the overhead into account. | ||
| 721 | */ | 765 | */ |
| 722 | long long ubifs_budg_get_free_space(struct ubifs_info *c) | 766 | long long ubifs_get_free_space(struct ubifs_info *c) |
| 723 | { | 767 | { |
| 724 | int min_idx_lebs, rsvd_idx_lebs; | 768 | int min_idx_lebs, rsvd_idx_lebs, lebs; |
| 725 | long long available, outstanding, free; | 769 | long long available, outstanding, free; |
| 726 | 770 | ||
| 727 | /* Do exactly the same calculations as in 'do_budget_space()' */ | ||
| 728 | spin_lock(&c->space_lock); | 771 | spin_lock(&c->space_lock); |
| 729 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 772 | min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 773 | outstanding = c->budg_data_growth + c->budg_dd_growth; | ||
| 730 | 774 | ||
| 731 | if (min_idx_lebs > c->lst.idx_lebs) | 775 | /* |
| 732 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | 776 | * Force the amount available to the total size reported if the used |
| 733 | else | 777 | * space is zero. |
| 734 | rsvd_idx_lebs = 0; | 778 | */ |
| 735 | 779 | if (c->lst.total_used <= UBIFS_INO_NODE_SZ && !outstanding) { | |
| 736 | if (rsvd_idx_lebs > c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt | ||
| 737 | - c->lst.taken_empty_lebs) { | ||
| 738 | spin_unlock(&c->space_lock); | 780 | spin_unlock(&c->space_lock); |
| 739 | return 0; | 781 | return (long long)c->block_cnt << UBIFS_BLOCK_SHIFT; |
| 740 | } | 782 | } |
| 741 | 783 | ||
| 742 | available = ubifs_calc_available(c, min_idx_lebs); | 784 | available = ubifs_calc_available(c, min_idx_lebs); |
| 743 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 785 | |
| 744 | c->min_idx_lebs = min_idx_lebs; | 786 | /* |
| 787 | * When reporting free space to user-space, UBIFS guarantees that it is | ||
| 788 | * possible to write a file of free space size. This means that for | ||
| 789 | * empty LEBs we may use more precise calculations than | ||
| 790 | * 'ubifs_calc_available()' is using. Namely, we know that in empty | ||
| 791 | * LEBs we would waste only @c->leb_overhead bytes, not @c->dark_wm. | ||
| 792 | * Thus, amend the available space. | ||
| 793 | * | ||
| 794 | * Note, the calculations below are similar to what we have in | ||
| 795 | * 'do_budget_space()', so refer there for comments. | ||
| 796 | */ | ||
| 797 | if (min_idx_lebs > c->lst.idx_lebs) | ||
| 798 | rsvd_idx_lebs = min_idx_lebs - c->lst.idx_lebs; | ||
| 799 | else | ||
| 800 | rsvd_idx_lebs = 0; | ||
| 801 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | ||
| 802 | c->lst.taken_empty_lebs; | ||
| 803 | lebs -= rsvd_idx_lebs; | ||
| 804 | available += lebs * (c->dark_wm - c->leb_overhead); | ||
| 745 | spin_unlock(&c->space_lock); | 805 | spin_unlock(&c->space_lock); |
| 746 | 806 | ||
| 747 | if (available > outstanding) | 807 | if (available > outstanding) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index b9cb77473758..d7f7645779f2 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
| @@ -538,7 +538,7 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
| 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); | 538 | printk(KERN_DEBUG "\t%d orphan inode numbers:\n", n); |
| 539 | for (i = 0; i < n; i++) | 539 | for (i = 0; i < n; i++) |
| 540 | printk(KERN_DEBUG "\t ino %llu\n", | 540 | printk(KERN_DEBUG "\t ino %llu\n", |
| 541 | le64_to_cpu(orph->inos[i])); | 541 | (unsigned long long)le64_to_cpu(orph->inos[i])); |
| 542 | break; | 542 | break; |
| 543 | } | 543 | } |
| 544 | default: | 544 | default: |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 5c96f1fb7016..526c01ec8003 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
| @@ -426,7 +426,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 426 | 426 | ||
| 427 | while (1) { | 427 | while (1) { |
| 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", | 428 | dbg_gen("feed '%s', ino %llu, new f_pos %#x", |
| 429 | dent->name, le64_to_cpu(dent->inum), | 429 | dent->name, (unsigned long long)le64_to_cpu(dent->inum), |
| 430 | key_hash_flash(c, &dent->key)); | 430 | key_hash_flash(c, &dent->key)); |
| 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); | 431 | ubifs_assert(dent->ch.sqnum > ubifs_inode(dir)->creat_sqnum); |
| 432 | 432 | ||
| @@ -587,7 +587,6 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 587 | if (err) { | 587 | if (err) { |
| 588 | if (err != -ENOSPC) | 588 | if (err != -ENOSPC) |
| 589 | return err; | 589 | return err; |
| 590 | err = 0; | ||
| 591 | budgeted = 0; | 590 | budgeted = 0; |
| 592 | } | 591 | } |
| 593 | 592 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 4071d1cae29f..3d698e2022b1 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -793,7 +793,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
| 793 | int err; | 793 | int err; |
| 794 | struct ubifs_budget_req req; | 794 | struct ubifs_budget_req req; |
| 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; | 795 | loff_t old_size = inode->i_size, new_size = attr->ia_size; |
| 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1); | 796 | int offset = new_size & (UBIFS_BLOCK_SIZE - 1), budgeted = 1; |
| 797 | struct ubifs_inode *ui = ubifs_inode(inode); | 797 | struct ubifs_inode *ui = ubifs_inode(inode); |
| 798 | 798 | ||
| 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); | 799 | dbg_gen("ino %lu, size %lld -> %lld", inode->i_ino, old_size, new_size); |
| @@ -811,8 +811,15 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
| 811 | /* A funny way to budget for truncation node */ | 811 | /* A funny way to budget for truncation node */ |
| 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; | 812 | req.dirtied_ino_d = UBIFS_TRUN_NODE_SZ; |
| 813 | err = ubifs_budget_space(c, &req); | 813 | err = ubifs_budget_space(c, &req); |
| 814 | if (err) | 814 | if (err) { |
| 815 | return err; | 815 | /* |
| 816 | * Treat truncations to zero as deletion and always allow them, | ||
| 817 | * just like we do for '->unlink()'. | ||
| 818 | */ | ||
| 819 | if (new_size || err != -ENOSPC) | ||
| 820 | return err; | ||
| 821 | budgeted = 0; | ||
| 822 | } | ||
| 816 | 823 | ||
| 817 | err = vmtruncate(inode, new_size); | 824 | err = vmtruncate(inode, new_size); |
| 818 | if (err) | 825 | if (err) |
| @@ -869,7 +876,12 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode, | |||
| 869 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); | 876 | err = ubifs_jnl_truncate(c, inode, old_size, new_size); |
| 870 | mutex_unlock(&ui->ui_mutex); | 877 | mutex_unlock(&ui->ui_mutex); |
| 871 | out_budg: | 878 | out_budg: |
| 872 | ubifs_release_budget(c, &req); | 879 | if (budgeted) |
| 880 | ubifs_release_budget(c, &req); | ||
| 881 | else { | ||
| 882 | c->nospace = c->nospace_rp = 0; | ||
| 883 | smp_wmb(); | ||
| 884 | } | ||
| 873 | return err; | 885 | return err; |
| 874 | } | 886 | } |
| 875 | 887 | ||
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index adee7b5ddeab..47814cde2407 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
| @@ -211,14 +211,8 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
| 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty | 211 | * dirty index heap, and it falls-back to LPT scanning if the heaps are empty |
| 212 | * or do not have an LEB which satisfies the @min_space criteria. | 212 | * or do not have an LEB which satisfies the @min_space criteria. |
| 213 | * | 213 | * |
| 214 | * Note: | 214 | * Note, LEBs which have less than dead watermark of free + dirty space are |
| 215 | * o LEBs which have less than dead watermark of dirty space are never picked | 215 | * never picked by this function. |
| 216 | * by this function; | ||
| 217 | * | ||
| 218 | * Returns zero and the LEB properties of | ||
| 219 | * found dirty LEB in case of success, %-ENOSPC if no dirty LEB was found and a | ||
| 220 | * negative error code in case of other failures. The returned LEB is marked as | ||
| 221 | * "taken". | ||
| 222 | * | 216 | * |
| 223 | * The additional @pick_free argument controls if this function has to return a | 217 | * The additional @pick_free argument controls if this function has to return a |
| 224 | * free or freeable LEB if one is present. For example, GC must to set it to %1, | 218 | * free or freeable LEB if one is present. For example, GC must to set it to %1, |
| @@ -231,6 +225,10 @@ static const struct ubifs_lprops *scan_for_dirty(struct ubifs_info *c, | |||
| 231 | * | 225 | * |
| 232 | * In addition @pick_free is set to %2 by the recovery process in order to | 226 | * In addition @pick_free is set to %2 by the recovery process in order to |
| 233 | * recover gc_lnum in which case an index LEB must not be returned. | 227 | * recover gc_lnum in which case an index LEB must not be returned. |
| 228 | * | ||
| 229 | * This function returns zero and the LEB properties of found dirty LEB in case | ||
| 230 | * of success, %-ENOSPC if no dirty LEB was found and a negative error code in | ||
| 231 | * case of other failures. The returned LEB is marked as "taken". | ||
| 234 | */ | 232 | */ |
| 235 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | 233 | int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, |
| 236 | int min_space, int pick_free) | 234 | int min_space, int pick_free) |
| @@ -245,7 +243,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 245 | int lebs, rsvd_idx_lebs = 0; | 243 | int lebs, rsvd_idx_lebs = 0; |
| 246 | 244 | ||
| 247 | spin_lock(&c->space_lock); | 245 | spin_lock(&c->space_lock); |
| 248 | lebs = c->lst.empty_lebs; | 246 | lebs = c->lst.empty_lebs + c->idx_gc_cnt; |
| 249 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; | 247 | lebs += c->freeable_cnt - c->lst.taken_empty_lebs; |
| 250 | 248 | ||
| 251 | /* | 249 | /* |
| @@ -317,7 +315,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 317 | lp = idx_lp; | 315 | lp = idx_lp; |
| 318 | 316 | ||
| 319 | if (lp) { | 317 | if (lp) { |
| 320 | ubifs_assert(lp->dirty >= c->dead_wm); | 318 | ubifs_assert(lp->free + lp->dirty >= c->dead_wm); |
| 321 | goto found; | 319 | goto found; |
| 322 | } | 320 | } |
| 323 | 321 | ||
| @@ -509,7 +507,6 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *free, | |||
| 509 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
| 510 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
| 511 | c->lst.taken_empty_lebs; | 509 | c->lst.taken_empty_lebs; |
| 512 | ubifs_assert(lebs + c->lst.idx_lebs >= c->min_idx_lebs); | ||
| 513 | if (rsvd_idx_lebs < lebs) | 510 | if (rsvd_idx_lebs < lebs) |
| 514 | /* | 511 | /* |
| 515 | * OK to allocate an empty LEB, but we still don't want to go | 512 | * OK to allocate an empty LEB, but we still don't want to go |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index d0f3dac29081..02aba36fe3d4 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
| @@ -334,15 +334,21 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
| 334 | 334 | ||
| 335 | err = move_nodes(c, sleb); | 335 | err = move_nodes(c, sleb); |
| 336 | if (err) | 336 | if (err) |
| 337 | goto out; | 337 | goto out_inc_seq; |
| 338 | 338 | ||
| 339 | err = gc_sync_wbufs(c); | 339 | err = gc_sync_wbufs(c); |
| 340 | if (err) | 340 | if (err) |
| 341 | goto out; | 341 | goto out_inc_seq; |
| 342 | 342 | ||
| 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); | 343 | err = ubifs_change_one_lp(c, lnum, c->leb_size, 0, 0, 0, 0); |
| 344 | if (err) | 344 | if (err) |
| 345 | goto out; | 345 | goto out_inc_seq; |
| 346 | |||
| 347 | /* Allow for races with TNC */ | ||
| 348 | c->gced_lnum = lnum; | ||
| 349 | smp_wmb(); | ||
| 350 | c->gc_seq += 1; | ||
| 351 | smp_wmb(); | ||
| 346 | 352 | ||
| 347 | if (c->gc_lnum == -1) { | 353 | if (c->gc_lnum == -1) { |
| 348 | c->gc_lnum = lnum; | 354 | c->gc_lnum = lnum; |
| @@ -363,6 +369,14 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
| 363 | out: | 369 | out: |
| 364 | ubifs_scan_destroy(sleb); | 370 | ubifs_scan_destroy(sleb); |
| 365 | return err; | 371 | return err; |
| 372 | |||
| 373 | out_inc_seq: | ||
| 374 | /* We may have moved at least some nodes so allow for races with TNC */ | ||
| 375 | c->gced_lnum = lnum; | ||
| 376 | smp_wmb(); | ||
| 377 | c->gc_seq += 1; | ||
| 378 | smp_wmb(); | ||
| 379 | goto out; | ||
| 366 | } | 380 | } |
| 367 | 381 | ||
| 368 | /** | 382 | /** |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 87dabf9fe742..4c12a9215d7f 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
| @@ -284,38 +284,6 @@ static inline void *ubifs_idx_key(const struct ubifs_info *c, | |||
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | /** | 286 | /** |
| 287 | * ubifs_reported_space - calculate reported free space. | ||
| 288 | * @c: the UBIFS file-system description object | ||
| 289 | * @free: amount of free space | ||
| 290 | * | ||
| 291 | * This function calculates amount of free space which will be reported to | ||
| 292 | * user-space. User-space application tend to expect that if the file-system | ||
| 293 | * (e.g., via the 'statfs()' call) reports that it has N bytes available, they | ||
| 294 | * are able to write a file of size N. UBIFS attaches node headers to each data | ||
| 295 | * node and it has to write indexind nodes as well. This introduces additional | ||
| 296 | * overhead, and UBIFS it has to report sligtly less free space to meet the | ||
| 297 | * above expectetion. | ||
| 298 | * | ||
| 299 | * This function assumes free space is made up of uncompressed data nodes and | ||
| 300 | * full index nodes (one per data node, doubled because we always allow enough | ||
| 301 | * space to write the index twice). | ||
| 302 | * | ||
| 303 | * Note, the calculation is pessimistic, which means that most of the time | ||
| 304 | * UBIFS reports less space than it actually has. | ||
| 305 | */ | ||
| 306 | static inline long long ubifs_reported_space(const struct ubifs_info *c, | ||
| 307 | uint64_t free) | ||
| 308 | { | ||
| 309 | int divisor, factor; | ||
| 310 | |||
| 311 | divisor = UBIFS_MAX_DATA_NODE_SZ + (c->max_idx_node_sz * 3); | ||
| 312 | factor = UBIFS_MAX_DATA_NODE_SZ - UBIFS_DATA_NODE_SZ; | ||
| 313 | do_div(free, divisor); | ||
| 314 | |||
| 315 | return free * factor; | ||
| 316 | } | ||
| 317 | |||
| 318 | /** | ||
| 319 | * ubifs_current_time - round current time to time granularity. | 287 | * ubifs_current_time - round current time to time granularity. |
| 320 | * @inode: inode | 288 | * @inode: inode |
| 321 | */ | 289 | */ |
| @@ -325,4 +293,21 @@ static inline struct timespec ubifs_current_time(struct inode *inode) | |||
| 325 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; | 293 | current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; |
| 326 | } | 294 | } |
| 327 | 295 | ||
| 296 | /** | ||
| 297 | * ubifs_tnc_lookup - look up a file-system node. | ||
| 298 | * @c: UBIFS file-system description object | ||
| 299 | * @key: node key to lookup | ||
| 300 | * @node: the node is returned here | ||
| 301 | * | ||
| 302 | * This function look up and reads node with key @key. The caller has to make | ||
| 303 | * sure the @node buffer is large enough to fit the node. Returns zero in case | ||
| 304 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
| 305 | * case of failure. | ||
| 306 | */ | ||
| 307 | static inline int ubifs_tnc_lookup(struct ubifs_info *c, | ||
| 308 | const union ubifs_key *key, void *node) | ||
| 309 | { | ||
| 310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | ||
| 311 | } | ||
| 312 | |||
| 328 | #endif /* __UBIFS_MISC_H__ */ | 313 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f71e6b8822c4..3f4902060c7a 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -370,8 +370,9 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 370 | { | 370 | { |
| 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; | 371 | struct ubifs_info *c = dentry->d_sb->s_fs_info; |
| 372 | unsigned long long free; | 372 | unsigned long long free; |
| 373 | __le32 *uuid = (__le32 *)c->uuid; | ||
| 373 | 374 | ||
| 374 | free = ubifs_budg_get_free_space(c); | 375 | free = ubifs_get_free_space(c); |
| 375 | dbg_gen("free space %lld bytes (%lld blocks)", | 376 | dbg_gen("free space %lld bytes (%lld blocks)", |
| 376 | free, free >> UBIFS_BLOCK_SHIFT); | 377 | free, free >> UBIFS_BLOCK_SHIFT); |
| 377 | 378 | ||
| @@ -386,7 +387,8 @@ static int ubifs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 386 | buf->f_files = 0; | 387 | buf->f_files = 0; |
| 387 | buf->f_ffree = 0; | 388 | buf->f_ffree = 0; |
| 388 | buf->f_namelen = UBIFS_MAX_NLEN; | 389 | buf->f_namelen = UBIFS_MAX_NLEN; |
| 389 | 390 | buf->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[2]); | |
| 391 | buf->f_fsid.val[1] = le32_to_cpu(uuid[1]) ^ le32_to_cpu(uuid[3]); | ||
| 390 | return 0; | 392 | return 0; |
| 391 | } | 393 | } |
| 392 | 394 | ||
| @@ -530,6 +532,12 @@ static int init_constants_early(struct ubifs_info *c) | |||
| 530 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); | 532 | c->dead_wm = ALIGN(MIN_WRITE_SZ, c->min_io_size); |
| 531 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); | 533 | c->dark_wm = ALIGN(UBIFS_MAX_NODE_SZ, c->min_io_size); |
| 532 | 534 | ||
| 535 | /* | ||
| 536 | * Calculate how many bytes would be wasted at the end of LEB if it was | ||
| 537 | * fully filled with data nodes of maximum size. This is used in | ||
| 538 | * calculations when reporting free space. | ||
| 539 | */ | ||
| 540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | ||
| 533 | return 0; | 541 | return 0; |
| 534 | } | 542 | } |
| 535 | 543 | ||
| @@ -647,13 +655,11 @@ static int init_constants_late(struct ubifs_info *c) | |||
| 647 | * internally because it does not make much sense for UBIFS, but it is | 655 | * internally because it does not make much sense for UBIFS, but it is |
| 648 | * necessary to report something for the 'statfs()' call. | 656 | * necessary to report something for the 'statfs()' call. |
| 649 | * | 657 | * |
| 650 | * Subtract the LEB reserved for GC and the LEB which is reserved for | 658 | * Subtract the LEB reserved for GC, the LEB which is reserved for |
| 651 | * deletions. | 659 | * deletions, and assume only one journal head is available. |
| 652 | * | ||
| 653 | * Review 'ubifs_calc_available()' if changing this calculation. | ||
| 654 | */ | 660 | */ |
| 655 | tmp64 = c->main_lebs - 2; | 661 | tmp64 = c->main_lebs - 2 - c->jhead_cnt + 1; |
| 656 | tmp64 *= (uint64_t)c->leb_size - c->dark_wm; | 662 | tmp64 *= (uint64_t)c->leb_size - c->leb_overhead; |
| 657 | tmp64 = ubifs_reported_space(c, tmp64); | 663 | tmp64 = ubifs_reported_space(c, tmp64); |
| 658 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; | 664 | c->block_cnt = tmp64 >> UBIFS_BLOCK_SHIFT; |
| 659 | 665 | ||
| @@ -1018,14 +1024,13 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1018 | goto out_dereg; | 1024 | goto out_dereg; |
| 1019 | } | 1025 | } |
| 1020 | 1026 | ||
| 1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, c->vi.vol_id); | ||
| 1021 | if (!mounted_read_only) { | 1028 | if (!mounted_read_only) { |
| 1022 | err = alloc_wbufs(c); | 1029 | err = alloc_wbufs(c); |
| 1023 | if (err) | 1030 | if (err) |
| 1024 | goto out_cbuf; | 1031 | goto out_cbuf; |
| 1025 | 1032 | ||
| 1026 | /* Create background thread */ | 1033 | /* Create background thread */ |
| 1027 | sprintf(c->bgt_name, BGT_NAME_PATTERN, c->vi.ubi_num, | ||
| 1028 | c->vi.vol_id); | ||
| 1029 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
| 1030 | if (!c->bgt) | 1035 | if (!c->bgt) |
| 1031 | c->bgt = ERR_PTR(-EINVAL); | 1036 | c->bgt = ERR_PTR(-EINVAL); |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index e909f4a96443..7634c5970887 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
| @@ -506,7 +506,7 @@ static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key, | |||
| 506 | if (keys_cmp(c, key, &node_key) != 0) | 506 | if (keys_cmp(c, key, &node_key) != 0) |
| 507 | ret = 0; | 507 | ret = 0; |
| 508 | } | 508 | } |
| 509 | if (ret == 0) | 509 | if (ret == 0 && c->replaying) |
| 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", | 510 | dbg_mnt("dangling branch LEB %d:%d len %d, key %s", |
| 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); | 511 | zbr->lnum, zbr->offs, zbr->len, DBGKEY(key)); |
| 512 | return ret; | 512 | return ret; |
| @@ -1382,50 +1382,39 @@ static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key, | |||
| 1382 | } | 1382 | } |
| 1383 | 1383 | ||
| 1384 | /** | 1384 | /** |
| 1385 | * ubifs_tnc_lookup - look up a file-system node. | 1385 | * maybe_leb_gced - determine if a LEB may have been garbage collected. |
| 1386 | * @c: UBIFS file-system description object | 1386 | * @c: UBIFS file-system description object |
| 1387 | * @key: node key to lookup | 1387 | * @lnum: LEB number |
| 1388 | * @node: the node is returned here | 1388 | * @gc_seq1: garbage collection sequence number |
| 1389 | * | 1389 | * |
| 1390 | * This function look up and reads node with key @key. The caller has to make | 1390 | * This function determines if @lnum may have been garbage collected since |
| 1391 | * sure the @node buffer is large enough to fit the node. Returns zero in case | 1391 | * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise |
| 1392 | * of success, %-ENOENT if the node was not found, and a negative error code in | 1392 | * %0 is returned. |
| 1393 | * case of failure. | ||
| 1394 | */ | 1393 | */ |
| 1395 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | 1394 | static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1) |
| 1396 | void *node) | ||
| 1397 | { | 1395 | { |
| 1398 | int found, n, err; | 1396 | int gc_seq2, gced_lnum; |
| 1399 | struct ubifs_znode *znode; | ||
| 1400 | struct ubifs_zbranch zbr, *zt; | ||
| 1401 | 1397 | ||
| 1402 | mutex_lock(&c->tnc_mutex); | 1398 | gced_lnum = c->gced_lnum; |
| 1403 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1399 | smp_rmb(); |
| 1404 | if (!found) { | 1400 | gc_seq2 = c->gc_seq; |
| 1405 | err = -ENOENT; | 1401 | /* Same seq means no GC */ |
| 1406 | goto out; | 1402 | if (gc_seq1 == gc_seq2) |
| 1407 | } else if (found < 0) { | 1403 | return 0; |
| 1408 | err = found; | 1404 | /* Different by more than 1 means we don't know */ |
| 1409 | goto out; | 1405 | if (gc_seq1 + 1 != gc_seq2) |
| 1410 | } | 1406 | return 1; |
| 1411 | zt = &znode->zbranch[n]; | 1407 | /* |
| 1412 | if (is_hash_key(c, key)) { | 1408 | * We have seen the sequence number has increased by 1. Now we need to |
| 1413 | /* | 1409 | * be sure we read the right LEB number, so read it again. |
| 1414 | * In this case the leaf node cache gets used, so we pass the | 1410 | */ |
| 1415 | * address of the zbranch and keep the mutex locked | 1411 | smp_rmb(); |
| 1416 | */ | 1412 | if (gced_lnum != c->gced_lnum) |
| 1417 | err = tnc_read_node_nm(c, zt, node); | 1413 | return 1; |
| 1418 | goto out; | 1414 | /* Finally we can check lnum */ |
| 1419 | } | 1415 | if (gced_lnum == lnum) |
| 1420 | zbr = znode->zbranch[n]; | 1416 | return 1; |
| 1421 | mutex_unlock(&c->tnc_mutex); | 1417 | return 0; |
| 1422 | |||
| 1423 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
| 1424 | return err; | ||
| 1425 | |||
| 1426 | out: | ||
| 1427 | mutex_unlock(&c->tnc_mutex); | ||
| 1428 | return err; | ||
| 1429 | } | 1418 | } |
| 1430 | 1419 | ||
| 1431 | /** | 1420 | /** |
| @@ -1436,16 +1425,19 @@ out: | |||
| 1436 | * @lnum: LEB number is returned here | 1425 | * @lnum: LEB number is returned here |
| 1437 | * @offs: offset is returned here | 1426 | * @offs: offset is returned here |
| 1438 | * | 1427 | * |
| 1439 | * This function is the same as 'ubifs_tnc_lookup()' but it returns the node | 1428 | * This function look up and reads node with key @key. The caller has to make |
| 1440 | * location also. See 'ubifs_tnc_lookup()'. | 1429 | * sure the @node buffer is large enough to fit the node. Returns zero in case |
| 1430 | * of success, %-ENOENT if the node was not found, and a negative error code in | ||
| 1431 | * case of failure. The node location can be returned in @lnum and @offs. | ||
| 1441 | */ | 1432 | */ |
| 1442 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1433 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
| 1443 | void *node, int *lnum, int *offs) | 1434 | void *node, int *lnum, int *offs) |
| 1444 | { | 1435 | { |
| 1445 | int found, n, err; | 1436 | int found, n, err, safely = 0, gc_seq1; |
| 1446 | struct ubifs_znode *znode; | 1437 | struct ubifs_znode *znode; |
| 1447 | struct ubifs_zbranch zbr, *zt; | 1438 | struct ubifs_zbranch zbr, *zt; |
| 1448 | 1439 | ||
| 1440 | again: | ||
| 1449 | mutex_lock(&c->tnc_mutex); | 1441 | mutex_lock(&c->tnc_mutex); |
| 1450 | found = ubifs_lookup_level0(c, key, &znode, &n); | 1442 | found = ubifs_lookup_level0(c, key, &znode, &n); |
| 1451 | if (!found) { | 1443 | if (!found) { |
| @@ -1456,24 +1448,43 @@ int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | |||
| 1456 | goto out; | 1448 | goto out; |
| 1457 | } | 1449 | } |
| 1458 | zt = &znode->zbranch[n]; | 1450 | zt = &znode->zbranch[n]; |
| 1451 | if (lnum) { | ||
| 1452 | *lnum = zt->lnum; | ||
| 1453 | *offs = zt->offs; | ||
| 1454 | } | ||
| 1459 | if (is_hash_key(c, key)) { | 1455 | if (is_hash_key(c, key)) { |
| 1460 | /* | 1456 | /* |
| 1461 | * In this case the leaf node cache gets used, so we pass the | 1457 | * In this case the leaf node cache gets used, so we pass the |
| 1462 | * address of the zbranch and keep the mutex locked | 1458 | * address of the zbranch and keep the mutex locked |
| 1463 | */ | 1459 | */ |
| 1464 | *lnum = zt->lnum; | ||
| 1465 | *offs = zt->offs; | ||
| 1466 | err = tnc_read_node_nm(c, zt, node); | 1460 | err = tnc_read_node_nm(c, zt, node); |
| 1467 | goto out; | 1461 | goto out; |
| 1468 | } | 1462 | } |
| 1463 | if (safely) { | ||
| 1464 | err = ubifs_tnc_read_node(c, zt, node); | ||
| 1465 | goto out; | ||
| 1466 | } | ||
| 1467 | /* Drop the TNC mutex prematurely and race with garbage collection */ | ||
| 1469 | zbr = znode->zbranch[n]; | 1468 | zbr = znode->zbranch[n]; |
| 1469 | gc_seq1 = c->gc_seq; | ||
| 1470 | mutex_unlock(&c->tnc_mutex); | 1470 | mutex_unlock(&c->tnc_mutex); |
| 1471 | 1471 | ||
| 1472 | *lnum = zbr.lnum; | 1472 | if (ubifs_get_wbuf(c, zbr.lnum)) { |
| 1473 | *offs = zbr.offs; | 1473 | /* We do not GC journal heads */ |
| 1474 | err = ubifs_tnc_read_node(c, &zbr, node); | ||
| 1475 | return err; | ||
| 1476 | } | ||
| 1474 | 1477 | ||
| 1475 | err = ubifs_tnc_read_node(c, &zbr, node); | 1478 | err = fallible_read_node(c, key, &zbr, node); |
| 1476 | return err; | 1479 | if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) { |
| 1480 | /* | ||
| 1481 | * The node may have been GC'ed out from under us so try again | ||
| 1482 | * while keeping the TNC mutex locked. | ||
| 1483 | */ | ||
| 1484 | safely = 1; | ||
| 1485 | goto again; | ||
| 1486 | } | ||
| 1487 | return 0; | ||
| 1477 | 1488 | ||
| 1478 | out: | 1489 | out: |
| 1479 | mutex_unlock(&c->tnc_mutex); | 1490 | mutex_unlock(&c->tnc_mutex); |
| @@ -1498,7 +1509,6 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
| 1498 | { | 1509 | { |
| 1499 | int found, n, err; | 1510 | int found, n, err; |
| 1500 | struct ubifs_znode *znode; | 1511 | struct ubifs_znode *znode; |
| 1501 | struct ubifs_zbranch zbr; | ||
| 1502 | 1512 | ||
| 1503 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); | 1513 | dbg_tnc("name '%.*s' key %s", nm->len, nm->name, DBGKEY(key)); |
| 1504 | mutex_lock(&c->tnc_mutex); | 1514 | mutex_lock(&c->tnc_mutex); |
| @@ -1522,11 +1532,7 @@ static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
| 1522 | goto out_unlock; | 1532 | goto out_unlock; |
| 1523 | } | 1533 | } |
| 1524 | 1534 | ||
| 1525 | zbr = znode->zbranch[n]; | 1535 | err = tnc_read_node_nm(c, &znode->zbranch[n], node); |
| 1526 | mutex_unlock(&c->tnc_mutex); | ||
| 1527 | |||
| 1528 | err = tnc_read_node_nm(c, &zbr, node); | ||
| 1529 | return err; | ||
| 1530 | 1536 | ||
| 1531 | out_unlock: | 1537 | out_unlock: |
| 1532 | mutex_unlock(&c->tnc_mutex); | 1538 | mutex_unlock(&c->tnc_mutex); |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index bd2121f3426e..a9ecbd9af20d 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
| @@ -87,7 +87,7 @@ | |||
| 87 | #define UBIFS_SK_LEN 8 | 87 | #define UBIFS_SK_LEN 8 |
| 88 | 88 | ||
| 89 | /* Minimum index tree fanout */ | 89 | /* Minimum index tree fanout */ |
| 90 | #define UBIFS_MIN_FANOUT 2 | 90 | #define UBIFS_MIN_FANOUT 3 |
| 91 | 91 | ||
| 92 | /* Maximum number of levels in UBIFS indexing B-tree */ | 92 | /* Maximum number of levels in UBIFS indexing B-tree */ |
| 93 | #define UBIFS_MAX_LEVELS 512 | 93 | #define UBIFS_MAX_LEVELS 512 |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index d7f706f7a302..17c620b93eec 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -995,6 +995,9 @@ struct ubifs_mount_opts { | |||
| 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 995 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
| 996 | * @max_inode_sz: maximum possible inode size in bytes | 996 | * @max_inode_sz: maximum possible inode size in bytes |
| 997 | * @max_znode_sz: size of znode in bytes | 997 | * @max_znode_sz: size of znode in bytes |
| 998 | * | ||
| 999 | * @leb_overhead: how many bytes are wasted in an LEB when it is filled with | ||
| 1000 | * data nodes of maximum size - used in free space reporting | ||
| 998 | * @dead_wm: LEB dead space watermark | 1001 | * @dead_wm: LEB dead space watermark |
| 999 | * @dark_wm: LEB dark space watermark | 1002 | * @dark_wm: LEB dark space watermark |
| 1000 | * @block_cnt: count of 4KiB blocks on the FS | 1003 | * @block_cnt: count of 4KiB blocks on the FS |
| @@ -1028,6 +1031,8 @@ struct ubifs_mount_opts { | |||
| 1028 | * @sbuf: a buffer of LEB size used by GC and replay for scanning | 1031 | * @sbuf: a buffer of LEB size used by GC and replay for scanning |
| 1029 | * @idx_gc: list of index LEBs that have been garbage collected | 1032 | * @idx_gc: list of index LEBs that have been garbage collected |
| 1030 | * @idx_gc_cnt: number of elements on the idx_gc list | 1033 | * @idx_gc_cnt: number of elements on the idx_gc list |
| 1034 | * @gc_seq: incremented for every non-index LEB garbage collected | ||
| 1035 | * @gced_lnum: last non-index LEB that was garbage collected | ||
| 1031 | * | 1036 | * |
| 1032 | * @infos_list: links all 'ubifs_info' objects | 1037 | * @infos_list: links all 'ubifs_info' objects |
| 1033 | * @umount_mutex: serializes shrinker and un-mount | 1038 | * @umount_mutex: serializes shrinker and un-mount |
| @@ -1224,6 +1229,8 @@ struct ubifs_info { | |||
| 1224 | int max_idx_node_sz; | 1229 | int max_idx_node_sz; |
| 1225 | long long max_inode_sz; | 1230 | long long max_inode_sz; |
| 1226 | int max_znode_sz; | 1231 | int max_znode_sz; |
| 1232 | |||
| 1233 | int leb_overhead; | ||
| 1227 | int dead_wm; | 1234 | int dead_wm; |
| 1228 | int dark_wm; | 1235 | int dark_wm; |
| 1229 | int block_cnt; | 1236 | int block_cnt; |
| @@ -1257,6 +1264,8 @@ struct ubifs_info { | |||
| 1257 | void *sbuf; | 1264 | void *sbuf; |
| 1258 | struct list_head idx_gc; | 1265 | struct list_head idx_gc; |
| 1259 | int idx_gc_cnt; | 1266 | int idx_gc_cnt; |
| 1267 | volatile int gc_seq; | ||
| 1268 | volatile int gced_lnum; | ||
| 1260 | 1269 | ||
| 1261 | struct list_head infos_list; | 1270 | struct list_head infos_list; |
| 1262 | struct mutex umount_mutex; | 1271 | struct mutex umount_mutex; |
| @@ -1434,9 +1443,10 @@ void ubifs_release_ino_dirty(struct ubifs_info *c, struct inode *inode, | |||
| 1434 | struct ubifs_budget_req *req); | 1443 | struct ubifs_budget_req *req); |
| 1435 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, | 1444 | void ubifs_cancel_ino_op(struct ubifs_info *c, struct inode *inode, |
| 1436 | struct ubifs_budget_req *req); | 1445 | struct ubifs_budget_req *req); |
| 1437 | long long ubifs_budg_get_free_space(struct ubifs_info *c); | 1446 | long long ubifs_get_free_space(struct ubifs_info *c); |
| 1438 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); | 1447 | int ubifs_calc_min_idx_lebs(struct ubifs_info *c); |
| 1439 | void ubifs_convert_page_budget(struct ubifs_info *c); | 1448 | void ubifs_convert_page_budget(struct ubifs_info *c); |
| 1449 | long long ubifs_reported_space(const struct ubifs_info *c, uint64_t free); | ||
| 1440 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); | 1450 | long long ubifs_calc_available(const struct ubifs_info *c, int min_idx_lebs); |
| 1441 | 1451 | ||
| 1442 | /* find.c */ | 1452 | /* find.c */ |
| @@ -1451,8 +1461,6 @@ int ubifs_save_dirty_idx_lnums(struct ubifs_info *c); | |||
| 1451 | /* tnc.c */ | 1461 | /* tnc.c */ |
| 1452 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, | 1462 | int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key, |
| 1453 | struct ubifs_znode **zn, int *n); | 1463 | struct ubifs_znode **zn, int *n); |
| 1454 | int ubifs_tnc_lookup(struct ubifs_info *c, const union ubifs_key *key, | ||
| 1455 | void *node); | ||
| 1456 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, | 1464 | int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key, |
| 1457 | void *node, const struct qstr *nm); | 1465 | void *node, const struct qstr *nm); |
| 1458 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, | 1466 | int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key, |
diff --git a/fs/udf/file.c b/fs/udf/file.c index 0ed6e146a0d9..eb91f3b70320 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c | |||
| @@ -211,6 +211,7 @@ const struct file_operations udf_file_operations = { | |||
| 211 | .release = udf_release_file, | 211 | .release = udf_release_file, |
| 212 | .fsync = udf_fsync_file, | 212 | .fsync = udf_fsync_file, |
| 213 | .splice_read = generic_file_splice_read, | 213 | .splice_read = generic_file_splice_read, |
| 214 | .llseek = generic_file_llseek, | ||
| 214 | }; | 215 | }; |
| 215 | 216 | ||
| 216 | const struct inode_operations udf_file_inode_operations = { | 217 | const struct inode_operations udf_file_inode_operations = { |
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c index eb9cfa23dc3d..a4f2b3ce45b0 100644 --- a/fs/udf/ialloc.c +++ b/fs/udf/ialloc.c | |||
| @@ -76,11 +76,24 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
| 76 | *err = -ENOSPC; | 76 | *err = -ENOSPC; |
| 77 | 77 | ||
| 78 | iinfo = UDF_I(inode); | 78 | iinfo = UDF_I(inode); |
| 79 | iinfo->i_unique = 0; | 79 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { |
| 80 | iinfo->i_lenExtents = 0; | 80 | iinfo->i_efe = 1; |
| 81 | iinfo->i_next_alloc_block = 0; | 81 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) |
| 82 | iinfo->i_next_alloc_goal = 0; | 82 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; |
| 83 | iinfo->i_strat4096 = 0; | 83 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - |
| 84 | sizeof(struct extendedFileEntry), | ||
| 85 | GFP_KERNEL); | ||
| 86 | } else { | ||
| 87 | iinfo->i_efe = 0; | ||
| 88 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
| 89 | sizeof(struct fileEntry), | ||
| 90 | GFP_KERNEL); | ||
| 91 | } | ||
| 92 | if (!iinfo->i_ext.i_data) { | ||
| 93 | iput(inode); | ||
| 94 | *err = -ENOMEM; | ||
| 95 | return NULL; | ||
| 96 | } | ||
| 84 | 97 | ||
| 85 | block = udf_new_block(dir->i_sb, NULL, | 98 | block = udf_new_block(dir->i_sb, NULL, |
| 86 | dinfo->i_location.partitionReferenceNum, | 99 | dinfo->i_location.partitionReferenceNum, |
| @@ -111,6 +124,7 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
| 111 | lvhd->uniqueID = cpu_to_le64(uniqueID); | 124 | lvhd->uniqueID = cpu_to_le64(uniqueID); |
| 112 | mark_buffer_dirty(sbi->s_lvid_bh); | 125 | mark_buffer_dirty(sbi->s_lvid_bh); |
| 113 | } | 126 | } |
| 127 | mutex_unlock(&sbi->s_alloc_mutex); | ||
| 114 | inode->i_mode = mode; | 128 | inode->i_mode = mode; |
| 115 | inode->i_uid = current->fsuid; | 129 | inode->i_uid = current->fsuid; |
| 116 | if (dir->i_mode & S_ISGID) { | 130 | if (dir->i_mode & S_ISGID) { |
| @@ -129,25 +143,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
| 129 | iinfo->i_lenEAttr = 0; | 143 | iinfo->i_lenEAttr = 0; |
| 130 | iinfo->i_lenAlloc = 0; | 144 | iinfo->i_lenAlloc = 0; |
| 131 | iinfo->i_use = 0; | 145 | iinfo->i_use = 0; |
| 132 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_EXTENDED_FE)) { | ||
| 133 | iinfo->i_efe = 1; | ||
| 134 | if (UDF_VERS_USE_EXTENDED_FE > sbi->s_udfrev) | ||
| 135 | sbi->s_udfrev = UDF_VERS_USE_EXTENDED_FE; | ||
| 136 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
| 137 | sizeof(struct extendedFileEntry), | ||
| 138 | GFP_KERNEL); | ||
| 139 | } else { | ||
| 140 | iinfo->i_efe = 0; | ||
| 141 | iinfo->i_ext.i_data = kzalloc(inode->i_sb->s_blocksize - | ||
| 142 | sizeof(struct fileEntry), | ||
| 143 | GFP_KERNEL); | ||
| 144 | } | ||
| 145 | if (!iinfo->i_ext.i_data) { | ||
| 146 | iput(inode); | ||
| 147 | *err = -ENOMEM; | ||
| 148 | mutex_unlock(&sbi->s_alloc_mutex); | ||
| 149 | return NULL; | ||
| 150 | } | ||
| 151 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) | 146 | if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_AD_IN_ICB)) |
| 152 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; | 147 | iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; |
| 153 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) | 148 | else if (UDF_QUERY_FLAG(inode->i_sb, UDF_FLAG_USE_SHORT_AD)) |
| @@ -158,7 +153,6 @@ struct inode *udf_new_inode(struct inode *dir, int mode, int *err) | |||
| 158 | iinfo->i_crtime = current_fs_time(inode->i_sb); | 153 | iinfo->i_crtime = current_fs_time(inode->i_sb); |
| 159 | insert_inode_hash(inode); | 154 | insert_inode_hash(inode); |
| 160 | mark_inode_dirty(inode); | 155 | mark_inode_dirty(inode); |
| 161 | mutex_unlock(&sbi->s_alloc_mutex); | ||
| 162 | 156 | ||
| 163 | if (DQUOT_ALLOC_INODE(inode)) { | 157 | if (DQUOT_ALLOC_INODE(inode)) { |
| 164 | DQUOT_DROP(inode); | 158 | DQUOT_DROP(inode); |
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index f42f80a3b1fa..a44d68eb50b5 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c | |||
| @@ -1338,6 +1338,10 @@ __xfs_get_blocks( | |||
| 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; | 1338 | offset = (xfs_off_t)iblock << inode->i_blkbits; |
| 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); | 1339 | ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); |
| 1340 | size = bh_result->b_size; | 1340 | size = bh_result->b_size; |
| 1341 | |||
| 1342 | if (!create && direct && offset >= i_size_read(inode)) | ||
| 1343 | return 0; | ||
| 1344 | |||
| 1341 | error = xfs_iomap(XFS_I(inode), offset, size, | 1345 | error = xfs_iomap(XFS_I(inode), offset, size, |
| 1342 | create ? flags : BMAPI_READ, &iomap, &niomap); | 1346 | create ? flags : BMAPI_READ, &iomap, &niomap); |
| 1343 | if (error) | 1347 | if (error) |
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 5f60363b9343..5311c1acdd40 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c | |||
| @@ -475,6 +475,7 @@ const struct file_operations xfs_invis_file_operations = { | |||
| 475 | const struct file_operations xfs_dir_file_operations = { | 475 | const struct file_operations xfs_dir_file_operations = { |
| 476 | .read = generic_read_dir, | 476 | .read = generic_read_dir, |
| 477 | .readdir = xfs_file_readdir, | 477 | .readdir = xfs_file_readdir, |
| 478 | .llseek = generic_file_llseek, | ||
| 478 | .unlocked_ioctl = xfs_file_ioctl, | 479 | .unlocked_ioctl = xfs_file_ioctl, |
| 479 | #ifdef CONFIG_COMPAT | 480 | #ifdef CONFIG_COMPAT |
| 480 | .compat_ioctl = xfs_file_compat_ioctl, | 481 | .compat_ioctl = xfs_file_compat_ioctl, |
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 91bcd979242c..095d271f3434 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c | |||
| @@ -355,7 +355,7 @@ xfs_vn_ci_lookup( | |||
| 355 | /* else case-insensitive match... */ | 355 | /* else case-insensitive match... */ |
| 356 | dname.name = ci_name.name; | 356 | dname.name = ci_name.name; |
| 357 | dname.len = ci_name.len; | 357 | dname.len = ci_name.len; |
| 358 | dentry = d_add_ci(VFS_I(ip), dentry, &dname); | 358 | dentry = d_add_ci(dentry, VFS_I(ip), &dname); |
| 359 | kmem_free(ci_name.name); | 359 | kmem_free(ci_name.name); |
| 360 | return dentry; | 360 | return dentry; |
| 361 | } | 361 | } |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 73c65f19e549..18d3c8487835 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -1302,9 +1302,29 @@ xfs_fs_remount( | |||
| 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1302 | mp->m_flags &= ~XFS_MOUNT_BARRIER; |
| 1303 | break; | 1303 | break; |
| 1304 | default: | 1304 | default: |
| 1305 | /* | ||
| 1306 | * Logically we would return an error here to prevent | ||
| 1307 | * users from believing they might have changed | ||
| 1308 | * mount options using remount which can't be changed. | ||
| 1309 | * | ||
| 1310 | * But unfortunately mount(8) adds all options from | ||
| 1311 | * mtab and fstab to the mount arguments in some cases | ||
| 1312 | * so we can't blindly reject options, but have to | ||
| 1313 | * check for each specified option if it actually | ||
| 1314 | * differs from the currently set option and only | ||
| 1315 | * reject it if that's the case. | ||
| 1316 | * | ||
| 1317 | * Until that is implemented we return success for | ||
| 1318 | * every remount request, and silently ignore all | ||
| 1319 | * options that we can't actually change. | ||
| 1320 | */ | ||
| 1321 | #if 0 | ||
| 1305 | printk(KERN_INFO | 1322 | printk(KERN_INFO |
| 1306 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
| 1307 | return -EINVAL; | 1324 | return -EINVAL; |
| 1325 | #else | ||
| 1326 | return 0; | ||
| 1327 | #endif | ||
| 1308 | } | 1328 | } |
| 1309 | } | 1329 | } |
| 1310 | 1330 | ||
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 608c30c3f76b..002fc2617c8e 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
| @@ -732,6 +732,7 @@ xfs_buf_item_init( | |||
| 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; | 732 | bip->bli_item.li_ops = &xfs_buf_item_ops; |
| 733 | bip->bli_item.li_mountp = mp; | 733 | bip->bli_item.li_mountp = mp; |
| 734 | bip->bli_buf = bp; | 734 | bip->bli_buf = bp; |
| 735 | xfs_buf_hold(bp); | ||
| 735 | bip->bli_format.blf_type = XFS_LI_BUF; | 736 | bip->bli_format.blf_type = XFS_LI_BUF; |
| 736 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); | 737 | bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp); |
| 737 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); | 738 | bip->bli_format.blf_len = (ushort)BTOBB(XFS_BUF_COUNT(bp)); |
| @@ -867,6 +868,21 @@ xfs_buf_item_dirty( | |||
| 867 | return (bip->bli_flags & XFS_BLI_DIRTY); | 868 | return (bip->bli_flags & XFS_BLI_DIRTY); |
| 868 | } | 869 | } |
| 869 | 870 | ||
| 871 | STATIC void | ||
| 872 | xfs_buf_item_free( | ||
| 873 | xfs_buf_log_item_t *bip) | ||
| 874 | { | ||
| 875 | #ifdef XFS_TRANS_DEBUG | ||
| 876 | kmem_free(bip->bli_orig); | ||
| 877 | kmem_free(bip->bli_logged); | ||
| 878 | #endif /* XFS_TRANS_DEBUG */ | ||
| 879 | |||
| 880 | #ifdef XFS_BLI_TRACE | ||
| 881 | ktrace_free(bip->bli_trace); | ||
| 882 | #endif | ||
| 883 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
| 884 | } | ||
| 885 | |||
| 870 | /* | 886 | /* |
| 871 | * This is called when the buf log item is no longer needed. It should | 887 | * This is called when the buf log item is no longer needed. It should |
| 872 | * free the buf log item associated with the given buffer and clear | 888 | * free the buf log item associated with the given buffer and clear |
| @@ -887,18 +903,8 @@ xfs_buf_item_relse( | |||
| 887 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { | 903 | (XFS_BUF_IODONE_FUNC(bp) != NULL)) { |
| 888 | XFS_BUF_CLR_IODONE_FUNC(bp); | 904 | XFS_BUF_CLR_IODONE_FUNC(bp); |
| 889 | } | 905 | } |
| 890 | 906 | xfs_buf_rele(bp); | |
| 891 | #ifdef XFS_TRANS_DEBUG | 907 | xfs_buf_item_free(bip); |
| 892 | kmem_free(bip->bli_orig); | ||
| 893 | bip->bli_orig = NULL; | ||
| 894 | kmem_free(bip->bli_logged); | ||
| 895 | bip->bli_logged = NULL; | ||
| 896 | #endif /* XFS_TRANS_DEBUG */ | ||
| 897 | |||
| 898 | #ifdef XFS_BLI_TRACE | ||
| 899 | ktrace_free(bip->bli_trace); | ||
| 900 | #endif | ||
| 901 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
| 902 | } | 908 | } |
| 903 | 909 | ||
| 904 | 910 | ||
| @@ -1120,6 +1126,7 @@ xfs_buf_iodone( | |||
| 1120 | 1126 | ||
| 1121 | ASSERT(bip->bli_buf == bp); | 1127 | ASSERT(bip->bli_buf == bp); |
| 1122 | 1128 | ||
| 1129 | xfs_buf_rele(bp); | ||
| 1123 | mp = bip->bli_item.li_mountp; | 1130 | mp = bip->bli_item.li_mountp; |
| 1124 | 1131 | ||
| 1125 | /* | 1132 | /* |
| @@ -1136,18 +1143,7 @@ xfs_buf_iodone( | |||
| 1136 | * xfs_trans_delete_ail() drops the AIL lock. | 1143 | * xfs_trans_delete_ail() drops the AIL lock. |
| 1137 | */ | 1144 | */ |
| 1138 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); | 1145 | xfs_trans_delete_ail(mp, (xfs_log_item_t *)bip); |
| 1139 | 1146 | xfs_buf_item_free(bip); | |
| 1140 | #ifdef XFS_TRANS_DEBUG | ||
| 1141 | kmem_free(bip->bli_orig); | ||
| 1142 | bip->bli_orig = NULL; | ||
| 1143 | kmem_free(bip->bli_logged); | ||
| 1144 | bip->bli_logged = NULL; | ||
| 1145 | #endif /* XFS_TRANS_DEBUG */ | ||
| 1146 | |||
| 1147 | #ifdef XFS_BLI_TRACE | ||
| 1148 | ktrace_free(bip->bli_trace); | ||
| 1149 | #endif | ||
| 1150 | kmem_zone_free(xfs_buf_item_zone, bip); | ||
| 1151 | } | 1147 | } |
| 1152 | 1148 | ||
| 1153 | #if defined(XFS_BLI_TRACE) | 1149 | #if defined(XFS_BLI_TRACE) |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index 760f4c5b5160..75b0cd4da0ea 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -149,7 +149,14 @@ xfs_swap_extents( | |||
| 149 | 149 | ||
| 150 | sbp = &sxp->sx_stat; | 150 | sbp = &sxp->sx_stat; |
| 151 | 151 | ||
| 152 | xfs_lock_two_inodes(ip, tip, lock_flags); | 152 | /* |
| 153 | * we have to do two separate lock calls here to keep lockdep | ||
| 154 | * happy. If we try to get all the locks in one call, lock will | ||
| 155 | * report false positives when we drop the ILOCK and regain them | ||
| 156 | * below. | ||
| 157 | */ | ||
| 158 | xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL); | ||
| 159 | xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL); | ||
| 153 | locked = 1; | 160 | locked = 1; |
| 154 | 161 | ||
| 155 | /* Verify that both files have the same format */ | 162 | /* Verify that both files have the same format */ |
diff --git a/fs/xfs/xfs_dmapi.h b/fs/xfs/xfs_dmapi.h index cdc2d3464a1a..2813cdd72375 100644 --- a/fs/xfs/xfs_dmapi.h +++ b/fs/xfs/xfs_dmapi.h | |||
| @@ -18,7 +18,6 @@ | |||
| 18 | #ifndef __XFS_DMAPI_H__ | 18 | #ifndef __XFS_DMAPI_H__ |
| 19 | #define __XFS_DMAPI_H__ | 19 | #define __XFS_DMAPI_H__ |
| 20 | 20 | ||
| 21 | #include <linux/version.h> | ||
| 22 | /* Values used to define the on-disk version of dm_attrname_t. All | 21 | /* Values used to define the on-disk version of dm_attrname_t. All |
| 23 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". | 22 | * on-disk attribute names start with the 8-byte string "SGI_DMI_". |
| 24 | * | 23 | * |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 00e80df9dd9d..dbd9cef852ec 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -4118,7 +4118,7 @@ xfs_iext_indirect_to_direct( | |||
| 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); | 4118 | ASSERT(nextents <= XFS_LINEAR_EXTS); |
| 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); | 4119 | size = nextents * sizeof(xfs_bmbt_rec_t); |
| 4120 | 4120 | ||
| 4121 | xfs_iext_irec_compact_full(ifp); | 4121 | xfs_iext_irec_compact_pages(ifp); |
| 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); | 4122 | ASSERT(ifp->if_real_bytes == XFS_IEXT_BUFSZ); |
| 4123 | 4123 | ||
| 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; | 4124 | ep = ifp->if_u1.if_ext_irec->er_extbuf; |
| @@ -4449,8 +4449,7 @@ xfs_iext_irec_remove( | |||
| 4449 | * compaction policy is as follows: | 4449 | * compaction policy is as follows: |
| 4450 | * | 4450 | * |
| 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) | 4451 | * Full Compaction: Extents fit into a single page (or inline buffer) |
| 4452 | * Full Compaction: Extents occupy less than 10% of allocated space | 4452 | * Partial Compaction: Extents occupy less than 50% of allocated space |
| 4453 | * Partial Compaction: Extents occupy > 10% and < 50% of allocated space | ||
| 4454 | * No Compaction: Extents occupy at least 50% of allocated space | 4453 | * No Compaction: Extents occupy at least 50% of allocated space |
| 4455 | */ | 4454 | */ |
| 4456 | void | 4455 | void |
| @@ -4471,8 +4470,6 @@ xfs_iext_irec_compact( | |||
| 4471 | xfs_iext_direct_to_inline(ifp, nextents); | 4470 | xfs_iext_direct_to_inline(ifp, nextents); |
| 4472 | } else if (nextents <= XFS_LINEAR_EXTS) { | 4471 | } else if (nextents <= XFS_LINEAR_EXTS) { |
| 4473 | xfs_iext_indirect_to_direct(ifp); | 4472 | xfs_iext_indirect_to_direct(ifp); |
| 4474 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 3) { | ||
| 4475 | xfs_iext_irec_compact_full(ifp); | ||
| 4476 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { | 4473 | } else if (nextents < (nlists * XFS_LINEAR_EXTS) >> 1) { |
| 4477 | xfs_iext_irec_compact_pages(ifp); | 4474 | xfs_iext_irec_compact_pages(ifp); |
| 4478 | } | 4475 | } |
| @@ -4496,7 +4493,7 @@ xfs_iext_irec_compact_pages( | |||
| 4496 | erp_next = erp + 1; | 4493 | erp_next = erp + 1; |
| 4497 | if (erp_next->er_extcount <= | 4494 | if (erp_next->er_extcount <= |
| 4498 | (XFS_LINEAR_EXTS - erp->er_extcount)) { | 4495 | (XFS_LINEAR_EXTS - erp->er_extcount)) { |
| 4499 | memmove(&erp->er_extbuf[erp->er_extcount], | 4496 | memcpy(&erp->er_extbuf[erp->er_extcount], |
| 4500 | erp_next->er_extbuf, erp_next->er_extcount * | 4497 | erp_next->er_extbuf, erp_next->er_extcount * |
| 4501 | sizeof(xfs_bmbt_rec_t)); | 4498 | sizeof(xfs_bmbt_rec_t)); |
| 4502 | erp->er_extcount += erp_next->er_extcount; | 4499 | erp->er_extcount += erp_next->er_extcount; |
| @@ -4516,91 +4513,6 @@ xfs_iext_irec_compact_pages( | |||
| 4516 | } | 4513 | } |
| 4517 | 4514 | ||
| 4518 | /* | 4515 | /* |
| 4519 | * Fully compact the extent records managed by the indirection array. | ||
| 4520 | */ | ||
| 4521 | void | ||
| 4522 | xfs_iext_irec_compact_full( | ||
| 4523 | xfs_ifork_t *ifp) /* inode fork pointer */ | ||
| 4524 | { | ||
| 4525 | xfs_bmbt_rec_host_t *ep, *ep_next; /* extent record pointers */ | ||
| 4526 | xfs_ext_irec_t *erp, *erp_next; /* extent irec pointers */ | ||
| 4527 | int erp_idx = 0; /* extent irec index */ | ||
| 4528 | int ext_avail; /* empty entries in ex list */ | ||
| 4529 | int ext_diff; /* number of exts to add */ | ||
| 4530 | int nlists; /* number of irec's (ex lists) */ | ||
| 4531 | |||
| 4532 | ASSERT(ifp->if_flags & XFS_IFEXTIREC); | ||
| 4533 | |||
| 4534 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
| 4535 | erp = ifp->if_u1.if_ext_irec; | ||
| 4536 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
| 4537 | erp_next = erp + 1; | ||
| 4538 | ep_next = erp_next->er_extbuf; | ||
| 4539 | |||
| 4540 | while (erp_idx < nlists - 1) { | ||
| 4541 | /* | ||
| 4542 | * Check how many extent records are available in this irec. | ||
| 4543 | * If there is none skip the whole exercise. | ||
| 4544 | */ | ||
| 4545 | ext_avail = XFS_LINEAR_EXTS - erp->er_extcount; | ||
| 4546 | if (ext_avail) { | ||
| 4547 | |||
| 4548 | /* | ||
| 4549 | * Copy over as many as possible extent records into | ||
| 4550 | * the previous page. | ||
| 4551 | */ | ||
| 4552 | ext_diff = MIN(ext_avail, erp_next->er_extcount); | ||
| 4553 | memcpy(ep, ep_next, ext_diff * sizeof(xfs_bmbt_rec_t)); | ||
| 4554 | erp->er_extcount += ext_diff; | ||
| 4555 | erp_next->er_extcount -= ext_diff; | ||
| 4556 | |||
| 4557 | /* | ||
| 4558 | * If the next irec is empty now we can simply | ||
| 4559 | * remove it. | ||
| 4560 | */ | ||
| 4561 | if (erp_next->er_extcount == 0) { | ||
| 4562 | /* | ||
| 4563 | * Free page before removing extent record | ||
| 4564 | * so er_extoffs don't get modified in | ||
| 4565 | * xfs_iext_irec_remove. | ||
| 4566 | */ | ||
| 4567 | kmem_free(erp_next->er_extbuf); | ||
| 4568 | erp_next->er_extbuf = NULL; | ||
| 4569 | xfs_iext_irec_remove(ifp, erp_idx + 1); | ||
| 4570 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
| 4571 | nlists = ifp->if_real_bytes / XFS_IEXT_BUFSZ; | ||
| 4572 | |||
| 4573 | /* | ||
| 4574 | * If the next irec is not empty move up the content | ||
| 4575 | * that has not been copied to the previous page to | ||
| 4576 | * the beggining of this one. | ||
| 4577 | */ | ||
| 4578 | } else { | ||
| 4579 | memmove(erp_next->er_extbuf, &ep_next[ext_diff], | ||
| 4580 | erp_next->er_extcount * | ||
| 4581 | sizeof(xfs_bmbt_rec_t)); | ||
| 4582 | ep_next = erp_next->er_extbuf; | ||
| 4583 | memset(&ep_next[erp_next->er_extcount], 0, | ||
| 4584 | (XFS_LINEAR_EXTS - | ||
| 4585 | erp_next->er_extcount) * | ||
| 4586 | sizeof(xfs_bmbt_rec_t)); | ||
| 4587 | } | ||
| 4588 | } | ||
| 4589 | |||
| 4590 | if (erp->er_extcount == XFS_LINEAR_EXTS) { | ||
| 4591 | erp_idx++; | ||
| 4592 | if (erp_idx < nlists) | ||
| 4593 | erp = &ifp->if_u1.if_ext_irec[erp_idx]; | ||
| 4594 | else | ||
| 4595 | break; | ||
| 4596 | } | ||
| 4597 | ep = &erp->er_extbuf[erp->er_extcount]; | ||
| 4598 | erp_next = erp + 1; | ||
| 4599 | ep_next = erp_next->er_extbuf; | ||
| 4600 | } | ||
| 4601 | } | ||
| 4602 | |||
| 4603 | /* | ||
| 4604 | * This is called to update the er_extoff field in the indirection | 4516 | * This is called to update the er_extoff field in the indirection |
| 4605 | * array when extents have been added or removed from one of the | 4517 | * array when extents have been added or removed from one of the |
| 4606 | * extent lists. erp_idx contains the irec index to begin updating | 4518 | * extent lists. erp_idx contains the irec index to begin updating |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index ccba14eb9dbe..503ea89e8b9a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -124,16 +124,27 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, | |||
| 124 | STATIC int xlog_iclogs_empty(xlog_t *log); | 124 | STATIC int xlog_iclogs_empty(xlog_t *log); |
| 125 | 125 | ||
| 126 | #if defined(XFS_LOG_TRACE) | 126 | #if defined(XFS_LOG_TRACE) |
| 127 | |||
| 128 | #define XLOG_TRACE_LOGGRANT_SIZE 2048 | ||
| 129 | #define XLOG_TRACE_ICLOG_SIZE 256 | ||
| 130 | |||
| 131 | void | ||
| 132 | xlog_trace_loggrant_alloc(xlog_t *log) | ||
| 133 | { | ||
| 134 | log->l_grant_trace = ktrace_alloc(XLOG_TRACE_LOGGRANT_SIZE, KM_NOFS); | ||
| 135 | } | ||
| 136 | |||
| 137 | void | ||
| 138 | xlog_trace_loggrant_dealloc(xlog_t *log) | ||
| 139 | { | ||
| 140 | ktrace_free(log->l_grant_trace); | ||
| 141 | } | ||
| 142 | |||
| 127 | void | 143 | void |
| 128 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | 144 | xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) |
| 129 | { | 145 | { |
| 130 | unsigned long cnts; | 146 | unsigned long cnts; |
| 131 | 147 | ||
| 132 | if (!log->l_grant_trace) { | ||
| 133 | log->l_grant_trace = ktrace_alloc(2048, KM_NOSLEEP); | ||
| 134 | if (!log->l_grant_trace) | ||
| 135 | return; | ||
| 136 | } | ||
| 137 | /* ticket counts are 1 byte each */ | 148 | /* ticket counts are 1 byte each */ |
| 138 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; | 149 | cnts = ((unsigned long)tic->t_ocnt) | ((unsigned long)tic->t_cnt) << 8; |
| 139 | 150 | ||
| @@ -157,10 +168,20 @@ xlog_trace_loggrant(xlog_t *log, xlog_ticket_t *tic, xfs_caddr_t string) | |||
| 157 | } | 168 | } |
| 158 | 169 | ||
| 159 | void | 170 | void |
| 171 | xlog_trace_iclog_alloc(xlog_in_core_t *iclog) | ||
| 172 | { | ||
| 173 | iclog->ic_trace = ktrace_alloc(XLOG_TRACE_ICLOG_SIZE, KM_NOFS); | ||
| 174 | } | ||
| 175 | |||
| 176 | void | ||
| 177 | xlog_trace_iclog_dealloc(xlog_in_core_t *iclog) | ||
| 178 | { | ||
| 179 | ktrace_free(iclog->ic_trace); | ||
| 180 | } | ||
| 181 | |||
| 182 | void | ||
| 160 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | 183 | xlog_trace_iclog(xlog_in_core_t *iclog, uint state) |
| 161 | { | 184 | { |
| 162 | if (!iclog->ic_trace) | ||
| 163 | iclog->ic_trace = ktrace_alloc(256, KM_NOFS); | ||
| 164 | ktrace_enter(iclog->ic_trace, | 185 | ktrace_enter(iclog->ic_trace, |
| 165 | (void *)((unsigned long)state), | 186 | (void *)((unsigned long)state), |
| 166 | (void *)((unsigned long)current_pid()), | 187 | (void *)((unsigned long)current_pid()), |
| @@ -170,8 +191,15 @@ xlog_trace_iclog(xlog_in_core_t *iclog, uint state) | |||
| 170 | (void *)NULL, (void *)NULL); | 191 | (void *)NULL, (void *)NULL); |
| 171 | } | 192 | } |
| 172 | #else | 193 | #else |
| 194 | |||
| 195 | #define xlog_trace_loggrant_alloc(log) | ||
| 196 | #define xlog_trace_loggrant_dealloc(log) | ||
| 173 | #define xlog_trace_loggrant(log,tic,string) | 197 | #define xlog_trace_loggrant(log,tic,string) |
| 198 | |||
| 199 | #define xlog_trace_iclog_alloc(iclog) | ||
| 200 | #define xlog_trace_iclog_dealloc(iclog) | ||
| 174 | #define xlog_trace_iclog(iclog,state) | 201 | #define xlog_trace_iclog(iclog,state) |
| 202 | |||
| 175 | #endif /* XFS_LOG_TRACE */ | 203 | #endif /* XFS_LOG_TRACE */ |
| 176 | 204 | ||
| 177 | 205 | ||
| @@ -1009,7 +1037,7 @@ xlog_iodone(xfs_buf_t *bp) | |||
| 1009 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlyin device no longer supports |
| 1010 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
| 1011 | */ | 1039 | */ |
| 1012 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ORDERED(bp)) { | 1040 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) { |
| 1013 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1041 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
| 1014 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1042 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
| 1015 | "xlog_iodone: Barriers are no longer supported" | 1043 | "xlog_iodone: Barriers are no longer supported" |
| @@ -1231,6 +1259,7 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1231 | spin_lock_init(&log->l_grant_lock); | 1259 | spin_lock_init(&log->l_grant_lock); |
| 1232 | sv_init(&log->l_flush_wait, 0, "flush_wait"); | 1260 | sv_init(&log->l_flush_wait, 0, "flush_wait"); |
| 1233 | 1261 | ||
| 1262 | xlog_trace_loggrant_alloc(log); | ||
| 1234 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ | 1263 | /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ |
| 1235 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); | 1264 | ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); |
| 1236 | 1265 | ||
| @@ -1285,6 +1314,8 @@ xlog_alloc_log(xfs_mount_t *mp, | |||
| 1285 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); | 1314 | sv_init(&iclog->ic_force_wait, SV_DEFAULT, "iclog-force"); |
| 1286 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); | 1315 | sv_init(&iclog->ic_write_wait, SV_DEFAULT, "iclog-write"); |
| 1287 | 1316 | ||
| 1317 | xlog_trace_iclog_alloc(iclog); | ||
| 1318 | |||
| 1288 | iclogp = &iclog->ic_next; | 1319 | iclogp = &iclog->ic_next; |
| 1289 | } | 1320 | } |
| 1290 | *iclogp = log->l_iclog; /* complete ring */ | 1321 | *iclogp = log->l_iclog; /* complete ring */ |
| @@ -1565,11 +1596,7 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1565 | sv_destroy(&iclog->ic_force_wait); | 1596 | sv_destroy(&iclog->ic_force_wait); |
| 1566 | sv_destroy(&iclog->ic_write_wait); | 1597 | sv_destroy(&iclog->ic_write_wait); |
| 1567 | xfs_buf_free(iclog->ic_bp); | 1598 | xfs_buf_free(iclog->ic_bp); |
| 1568 | #ifdef XFS_LOG_TRACE | 1599 | xlog_trace_iclog_dealloc(iclog); |
| 1569 | if (iclog->ic_trace != NULL) { | ||
| 1570 | ktrace_free(iclog->ic_trace); | ||
| 1571 | } | ||
| 1572 | #endif | ||
| 1573 | next_iclog = iclog->ic_next; | 1600 | next_iclog = iclog->ic_next; |
| 1574 | kmem_free(iclog); | 1601 | kmem_free(iclog); |
| 1575 | iclog = next_iclog; | 1602 | iclog = next_iclog; |
| @@ -1578,14 +1605,7 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1578 | spinlock_destroy(&log->l_grant_lock); | 1605 | spinlock_destroy(&log->l_grant_lock); |
| 1579 | 1606 | ||
| 1580 | xfs_buf_free(log->l_xbuf); | 1607 | xfs_buf_free(log->l_xbuf); |
| 1581 | #ifdef XFS_LOG_TRACE | 1608 | xlog_trace_loggrant_dealloc(log); |
| 1582 | if (log->l_trace != NULL) { | ||
| 1583 | ktrace_free(log->l_trace); | ||
| 1584 | } | ||
| 1585 | if (log->l_grant_trace != NULL) { | ||
| 1586 | ktrace_free(log->l_grant_trace); | ||
| 1587 | } | ||
| 1588 | #endif | ||
| 1589 | log->l_mp->m_log = NULL; | 1609 | log->l_mp->m_log = NULL; |
| 1590 | kmem_free(log); | 1610 | kmem_free(log); |
| 1591 | } /* xlog_dealloc_log */ | 1611 | } /* xlog_dealloc_log */ |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index c8a5b22ee3e3..e7d8f84443fa 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
| @@ -448,7 +448,6 @@ typedef struct log { | |||
| 448 | int l_grant_write_bytes; | 448 | int l_grant_write_bytes; |
| 449 | 449 | ||
| 450 | #ifdef XFS_LOG_TRACE | 450 | #ifdef XFS_LOG_TRACE |
| 451 | struct ktrace *l_trace; | ||
| 452 | struct ktrace *l_grant_trace; | 451 | struct ktrace *l_grant_trace; |
| 453 | #endif | 452 | #endif |
| 454 | 453 | ||
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c index aa238c8fbd7a..8b6812f66a15 100644 --- a/fs/xfs/xfs_vnodeops.c +++ b/fs/xfs/xfs_vnodeops.c | |||
| @@ -1838,6 +1838,12 @@ again: | |||
| 1838 | #endif | 1838 | #endif |
| 1839 | } | 1839 | } |
| 1840 | 1840 | ||
| 1841 | /* | ||
| 1842 | * xfs_lock_two_inodes() can only be used to lock one type of lock | ||
| 1843 | * at a time - the iolock or the ilock, but not both at once. If | ||
| 1844 | * we lock both at once, lockdep will report false positives saying | ||
| 1845 | * we have violated locking orders. | ||
| 1846 | */ | ||
| 1841 | void | 1847 | void |
| 1842 | xfs_lock_two_inodes( | 1848 | xfs_lock_two_inodes( |
| 1843 | xfs_inode_t *ip0, | 1849 | xfs_inode_t *ip0, |
| @@ -1848,6 +1854,8 @@ xfs_lock_two_inodes( | |||
| 1848 | int attempts = 0; | 1854 | int attempts = 0; |
| 1849 | xfs_log_item_t *lp; | 1855 | xfs_log_item_t *lp; |
| 1850 | 1856 | ||
| 1857 | if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) | ||
| 1858 | ASSERT((lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) == 0); | ||
| 1851 | ASSERT(ip0->i_ino != ip1->i_ino); | 1859 | ASSERT(ip0->i_ino != ip1->i_ino); |
| 1852 | 1860 | ||
| 1853 | if (ip0->i_ino > ip1->i_ino) { | 1861 | if (ip0->i_ino > ip1->i_ino) { |
| @@ -3152,6 +3160,13 @@ error1: /* Just cancel transaction */ | |||
| 3152 | /* | 3160 | /* |
| 3153 | * Zero file bytes between startoff and endoff inclusive. | 3161 | * Zero file bytes between startoff and endoff inclusive. |
| 3154 | * The iolock is held exclusive and no blocks are buffered. | 3162 | * The iolock is held exclusive and no blocks are buffered. |
| 3163 | * | ||
| 3164 | * This function is used by xfs_free_file_space() to zero | ||
| 3165 | * partial blocks when the range to free is not block aligned. | ||
| 3166 | * When unreserving space with boundaries that are not block | ||
| 3167 | * aligned we round up the start and round down the end | ||
| 3168 | * boundaries and then use this function to zero the parts of | ||
| 3169 | * the blocks that got dropped during the rounding. | ||
| 3155 | */ | 3170 | */ |
| 3156 | STATIC int | 3171 | STATIC int |
| 3157 | xfs_zero_remaining_bytes( | 3172 | xfs_zero_remaining_bytes( |
| @@ -3168,6 +3183,17 @@ xfs_zero_remaining_bytes( | |||
| 3168 | int nimap; | 3183 | int nimap; |
| 3169 | int error = 0; | 3184 | int error = 0; |
| 3170 | 3185 | ||
| 3186 | /* | ||
| 3187 | * Avoid doing I/O beyond eof - it's not necessary | ||
| 3188 | * since nothing can read beyond eof. The space will | ||
| 3189 | * be zeroed when the file is extended anyway. | ||
| 3190 | */ | ||
| 3191 | if (startoff >= ip->i_size) | ||
| 3192 | return 0; | ||
| 3193 | |||
| 3194 | if (endoff > ip->i_size) | ||
| 3195 | endoff = ip->i_size; | ||
| 3196 | |||
| 3171 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, | 3197 | bp = xfs_buf_get_noaddr(mp->m_sb.sb_blocksize, |
| 3172 | XFS_IS_REALTIME_INODE(ip) ? | 3198 | XFS_IS_REALTIME_INODE(ip) ? |
| 3173 | mp->m_rtdev_targp : mp->m_ddev_targp); | 3199 | mp->m_rtdev_targp : mp->m_ddev_targp); |
