diff options
Diffstat (limited to 'fs')
104 files changed, 2745 insertions, 1637 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig index 814ac4e213a8..0a93dc1cb4ac 100644 --- a/fs/9p/Kconfig +++ b/fs/9p/Kconfig | |||
| @@ -1,6 +1,6 @@ | |||
| 1 | config 9P_FS | 1 | config 9P_FS |
| 2 | tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" | 2 | tristate "Plan 9 Resource Sharing Support (9P2000)" |
| 3 | depends on INET && NET_9P && EXPERIMENTAL | 3 | depends on INET && NET_9P |
| 4 | help | 4 | help |
| 5 | If you say Y here, you will get experimental support for | 5 | If you say Y here, you will get experimental support for |
| 6 | Plan 9 resource sharing via the 9P2000 protocol. | 6 | Plan 9 resource sharing via the 9P2000 protocol. |
| @@ -10,7 +10,6 @@ config 9P_FS | |||
| 10 | If unsure, say N. | 10 | If unsure, say N. |
| 11 | 11 | ||
| 12 | if 9P_FS | 12 | if 9P_FS |
| 13 | |||
| 14 | config 9P_FSCACHE | 13 | config 9P_FSCACHE |
| 15 | bool "Enable 9P client caching support (EXPERIMENTAL)" | 14 | bool "Enable 9P client caching support (EXPERIMENTAL)" |
| 16 | depends on EXPERIMENTAL | 15 | depends on EXPERIMENTAL |
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 82a7c38ddad0..691c78f58bef 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c | |||
| @@ -259,7 +259,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
| 259 | if (IS_ERR(inode_fid)) { | 259 | if (IS_ERR(inode_fid)) { |
| 260 | err = PTR_ERR(inode_fid); | 260 | err = PTR_ERR(inode_fid); |
| 261 | mutex_unlock(&v9inode->v_mutex); | 261 | mutex_unlock(&v9inode->v_mutex); |
| 262 | goto error; | 262 | goto err_clunk_old_fid; |
| 263 | } | 263 | } |
| 264 | v9inode->writeback_fid = (void *) inode_fid; | 264 | v9inode->writeback_fid = (void *) inode_fid; |
| 265 | } | 265 | } |
| @@ -267,8 +267,8 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
| 267 | /* Since we are opening a file, assign the open fid to the file */ | 267 | /* Since we are opening a file, assign the open fid to the file */ |
| 268 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); | 268 | filp = lookup_instantiate_filp(nd, dentry, generic_file_open); |
| 269 | if (IS_ERR(filp)) { | 269 | if (IS_ERR(filp)) { |
| 270 | p9_client_clunk(ofid); | 270 | err = PTR_ERR(filp); |
| 271 | return PTR_ERR(filp); | 271 | goto err_clunk_old_fid; |
| 272 | } | 272 | } |
| 273 | filp->private_data = ofid; | 273 | filp->private_data = ofid; |
| 274 | #ifdef CONFIG_9P_FSCACHE | 274 | #ifdef CONFIG_9P_FSCACHE |
| @@ -278,10 +278,11 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, | |||
| 278 | return 0; | 278 | return 0; |
| 279 | 279 | ||
| 280 | error: | 280 | error: |
| 281 | if (ofid) | ||
| 282 | p9_client_clunk(ofid); | ||
| 283 | if (fid) | 281 | if (fid) |
| 284 | p9_client_clunk(fid); | 282 | p9_client_clunk(fid); |
| 283 | err_clunk_old_fid: | ||
| 284 | if (ofid) | ||
| 285 | p9_client_clunk(ofid); | ||
| 285 | return err; | 286 | return err; |
| 286 | } | 287 | } |
| 287 | 288 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index f3aa9b08b228..979992dcb386 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -121,9 +121,25 @@ config TMPFS | |||
| 121 | 121 | ||
| 122 | See <file:Documentation/filesystems/tmpfs.txt> for details. | 122 | See <file:Documentation/filesystems/tmpfs.txt> for details. |
| 123 | 123 | ||
| 124 | config TMPFS_XATTR | ||
| 125 | bool "Tmpfs extended attributes" | ||
| 126 | depends on TMPFS | ||
| 127 | default n | ||
| 128 | help | ||
| 129 | Extended attributes are name:value pairs associated with inodes by | ||
| 130 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 131 | <http://acl.bestbits.at/> for details). | ||
| 132 | |||
| 133 | Currently this enables support for the trusted.* and | ||
| 134 | security.* namespaces. | ||
| 135 | |||
| 136 | If unsure, say N. | ||
| 137 | |||
| 138 | You need this for POSIX ACL support on tmpfs. | ||
| 139 | |||
| 124 | config TMPFS_POSIX_ACL | 140 | config TMPFS_POSIX_ACL |
| 125 | bool "Tmpfs POSIX Access Control Lists" | 141 | bool "Tmpfs POSIX Access Control Lists" |
| 126 | depends on TMPFS | 142 | depends on TMPFS_XATTR |
| 127 | select GENERIC_ACL | 143 | select GENERIC_ACL |
| 128 | help | 144 | help |
| 129 | POSIX Access Control Lists (ACLs) support permissions for users and | 145 | POSIX Access Control Lists (ACLs) support permissions for users and |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 397d3057d336..1bffbe0ed778 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
| @@ -820,6 +820,8 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
| 820 | int res; | 820 | int res; |
| 821 | char buf[16]; | 821 | char buf[16]; |
| 822 | 822 | ||
| 823 | memset(&bprm, 0, sizeof(bprm)); | ||
| 824 | |||
| 823 | /* Create the file name */ | 825 | /* Create the file name */ |
| 824 | sprintf(buf, "/lib/lib%d.so", id); | 826 | sprintf(buf, "/lib/lib%d.so", id); |
| 825 | 827 | ||
| @@ -835,6 +837,12 @@ static int load_flat_shared_library(int id, struct lib_info *libs) | |||
| 835 | if (!bprm.cred) | 837 | if (!bprm.cred) |
| 836 | goto out; | 838 | goto out; |
| 837 | 839 | ||
| 840 | /* We don't really care about recalculating credentials at this point | ||
| 841 | * as we're past the point of no return and are dealing with shared | ||
| 842 | * libraries. | ||
| 843 | */ | ||
| 844 | bprm.cred_prepared = 1; | ||
| 845 | |||
| 838 | res = prepare_binprm(&bprm); | 846 | res = prepare_binprm(&bprm); |
| 839 | 847 | ||
| 840 | if (!IS_ERR_VALUE(res)) | 848 | if (!IS_ERR_VALUE(res)) |
diff --git a/fs/block_dev.c b/fs/block_dev.c index bf9c7a720371..1f2b19978333 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -1238,6 +1238,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
| 1238 | res = __blkdev_get(bdev, mode, 0); | 1238 | res = __blkdev_get(bdev, mode, 0); |
| 1239 | 1239 | ||
| 1240 | if (whole) { | 1240 | if (whole) { |
| 1241 | struct gendisk *disk = whole->bd_disk; | ||
| 1242 | |||
| 1241 | /* finish claiming */ | 1243 | /* finish claiming */ |
| 1242 | mutex_lock(&bdev->bd_mutex); | 1244 | mutex_lock(&bdev->bd_mutex); |
| 1243 | spin_lock(&bdev_lock); | 1245 | spin_lock(&bdev_lock); |
| @@ -1264,15 +1266,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) | |||
| 1264 | spin_unlock(&bdev_lock); | 1266 | spin_unlock(&bdev_lock); |
| 1265 | 1267 | ||
| 1266 | /* | 1268 | /* |
| 1267 | * Block event polling for write claims. Any write | 1269 | * Block event polling for write claims if requested. Any |
| 1268 | * holder makes the write_holder state stick until all | 1270 | * write holder makes the write_holder state stick until |
| 1269 | * are released. This is good enough and tracking | 1271 | * all are released. This is good enough and tracking |
| 1270 | * individual writeable reference is too fragile given | 1272 | * individual writeable reference is too fragile given the |
| 1271 | * the way @mode is used in blkdev_get/put(). | 1273 | * way @mode is used in blkdev_get/put(). |
| 1272 | */ | 1274 | */ |
| 1273 | if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | 1275 | if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && |
| 1276 | !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { | ||
| 1274 | bdev->bd_write_holder = true; | 1277 | bdev->bd_write_holder = true; |
| 1275 | disk_block_events(bdev->bd_disk); | 1278 | disk_block_events(disk); |
| 1276 | } | 1279 | } |
| 1277 | 1280 | ||
| 1278 | mutex_unlock(&bdev->bd_mutex); | 1281 | mutex_unlock(&bdev->bd_mutex); |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 38b8ab554924..33da49dc3cc6 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -848,7 +848,8 @@ get_more_pages: | |||
| 848 | op->payload_len = cpu_to_le32(len); | 848 | op->payload_len = cpu_to_le32(len); |
| 849 | req->r_request->hdr.data_len = cpu_to_le32(len); | 849 | req->r_request->hdr.data_len = cpu_to_le32(len); |
| 850 | 850 | ||
| 851 | ceph_osdc_start_request(&fsc->client->osdc, req, true); | 851 | rc = ceph_osdc_start_request(&fsc->client->osdc, req, true); |
| 852 | BUG_ON(rc); | ||
| 852 | req = NULL; | 853 | req = NULL; |
| 853 | 854 | ||
| 854 | /* continue? */ | 855 | /* continue? */ |
| @@ -880,8 +881,6 @@ release_pvec_pages: | |||
| 880 | out: | 881 | out: |
| 881 | if (req) | 882 | if (req) |
| 882 | ceph_osdc_put_request(req); | 883 | ceph_osdc_put_request(req); |
| 883 | if (rc > 0) | ||
| 884 | rc = 0; /* vfs expects us to return 0 */ | ||
| 885 | ceph_put_snap_context(snapc); | 884 | ceph_put_snap_context(snapc); |
| 886 | dout("writepages done, rc = %d\n", rc); | 885 | dout("writepages done, rc = %d\n", rc); |
| 887 | return rc; | 886 | return rc; |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 2a5404c1c42f..1f72b00447c4 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
| @@ -569,7 +569,8 @@ retry: | |||
| 569 | list_add_tail(&cap->session_caps, &session->s_caps); | 569 | list_add_tail(&cap->session_caps, &session->s_caps); |
| 570 | session->s_nr_caps++; | 570 | session->s_nr_caps++; |
| 571 | spin_unlock(&session->s_cap_lock); | 571 | spin_unlock(&session->s_cap_lock); |
| 572 | } | 572 | } else if (new_cap) |
| 573 | ceph_put_cap(mdsc, new_cap); | ||
| 573 | 574 | ||
| 574 | if (!ci->i_snap_realm) { | 575 | if (!ci->i_snap_realm) { |
| 575 | /* | 576 | /* |
| @@ -2634,6 +2635,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2634 | struct ceph_mds_session *session, | 2635 | struct ceph_mds_session *session, |
| 2635 | int *open_target_sessions) | 2636 | int *open_target_sessions) |
| 2636 | { | 2637 | { |
| 2638 | struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; | ||
| 2637 | struct ceph_inode_info *ci = ceph_inode(inode); | 2639 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 2638 | int mds = session->s_mds; | 2640 | int mds = session->s_mds; |
| 2639 | unsigned mseq = le32_to_cpu(ex->migrate_seq); | 2641 | unsigned mseq = le32_to_cpu(ex->migrate_seq); |
| @@ -2670,6 +2672,19 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, | |||
| 2670 | * export targets, so that we get the matching IMPORT | 2672 | * export targets, so that we get the matching IMPORT |
| 2671 | */ | 2673 | */ |
| 2672 | *open_target_sessions = 1; | 2674 | *open_target_sessions = 1; |
| 2675 | |||
| 2676 | /* | ||
| 2677 | * we can't flush dirty caps that we've seen the | ||
| 2678 | * EXPORT but no IMPORT for | ||
| 2679 | */ | ||
| 2680 | spin_lock(&mdsc->cap_dirty_lock); | ||
| 2681 | if (!list_empty(&ci->i_dirty_item)) { | ||
| 2682 | dout(" moving %p to cap_dirty_migrating\n", | ||
| 2683 | inode); | ||
| 2684 | list_move(&ci->i_dirty_item, | ||
| 2685 | &mdsc->cap_dirty_migrating); | ||
| 2686 | } | ||
| 2687 | spin_unlock(&mdsc->cap_dirty_lock); | ||
| 2673 | } | 2688 | } |
| 2674 | __ceph_remove_cap(cap); | 2689 | __ceph_remove_cap(cap); |
| 2675 | } | 2690 | } |
| @@ -2707,6 +2722,13 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, | |||
| 2707 | ci->i_cap_exporting_issued = 0; | 2722 | ci->i_cap_exporting_issued = 0; |
| 2708 | ci->i_cap_exporting_mseq = 0; | 2723 | ci->i_cap_exporting_mseq = 0; |
| 2709 | ci->i_cap_exporting_mds = -1; | 2724 | ci->i_cap_exporting_mds = -1; |
| 2725 | |||
| 2726 | spin_lock(&mdsc->cap_dirty_lock); | ||
| 2727 | if (!list_empty(&ci->i_dirty_item)) { | ||
| 2728 | dout(" moving %p back to cap_dirty\n", inode); | ||
| 2729 | list_move(&ci->i_dirty_item, &mdsc->cap_dirty); | ||
| 2730 | } | ||
| 2731 | spin_unlock(&mdsc->cap_dirty_lock); | ||
| 2710 | } else { | 2732 | } else { |
| 2711 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", | 2733 | dout("handle_cap_import inode %p ci %p mds%d mseq %d\n", |
| 2712 | inode, ci, mds, mseq); | 2734 | inode, ci, mds, mseq); |
| @@ -2910,38 +2932,16 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) | |||
| 2910 | */ | 2932 | */ |
| 2911 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | 2933 | void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) |
| 2912 | { | 2934 | { |
| 2913 | struct ceph_inode_info *ci, *nci = NULL; | 2935 | struct ceph_inode_info *ci; |
| 2914 | struct inode *inode, *ninode = NULL; | 2936 | struct inode *inode; |
| 2915 | struct list_head *p, *n; | ||
| 2916 | 2937 | ||
| 2917 | dout("flush_dirty_caps\n"); | 2938 | dout("flush_dirty_caps\n"); |
| 2918 | spin_lock(&mdsc->cap_dirty_lock); | 2939 | spin_lock(&mdsc->cap_dirty_lock); |
| 2919 | list_for_each_safe(p, n, &mdsc->cap_dirty) { | 2940 | while (!list_empty(&mdsc->cap_dirty)) { |
| 2920 | if (nci) { | 2941 | ci = list_first_entry(&mdsc->cap_dirty, struct ceph_inode_info, |
| 2921 | ci = nci; | 2942 | i_dirty_item); |
| 2922 | inode = ninode; | 2943 | inode = igrab(&ci->vfs_inode); |
| 2923 | ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; | 2944 | dout("flush_dirty_caps %p\n", inode); |
| 2924 | dout("flush_dirty_caps inode %p (was next inode)\n", | ||
| 2925 | inode); | ||
| 2926 | } else { | ||
| 2927 | ci = list_entry(p, struct ceph_inode_info, | ||
| 2928 | i_dirty_item); | ||
| 2929 | inode = igrab(&ci->vfs_inode); | ||
| 2930 | BUG_ON(!inode); | ||
| 2931 | dout("flush_dirty_caps inode %p\n", inode); | ||
| 2932 | } | ||
| 2933 | if (n != &mdsc->cap_dirty) { | ||
| 2934 | nci = list_entry(n, struct ceph_inode_info, | ||
| 2935 | i_dirty_item); | ||
| 2936 | ninode = igrab(&nci->vfs_inode); | ||
| 2937 | BUG_ON(!ninode); | ||
| 2938 | nci->i_ceph_flags |= CEPH_I_NOFLUSH; | ||
| 2939 | dout("flush_dirty_caps next inode %p, noflush\n", | ||
| 2940 | ninode); | ||
| 2941 | } else { | ||
| 2942 | nci = NULL; | ||
| 2943 | ninode = NULL; | ||
| 2944 | } | ||
| 2945 | spin_unlock(&mdsc->cap_dirty_lock); | 2945 | spin_unlock(&mdsc->cap_dirty_lock); |
| 2946 | if (inode) { | 2946 | if (inode) { |
| 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, | 2947 | ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, |
| @@ -2951,6 +2951,7 @@ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) | |||
| 2951 | spin_lock(&mdsc->cap_dirty_lock); | 2951 | spin_lock(&mdsc->cap_dirty_lock); |
| 2952 | } | 2952 | } |
| 2953 | spin_unlock(&mdsc->cap_dirty_lock); | 2953 | spin_unlock(&mdsc->cap_dirty_lock); |
| 2954 | dout("flush_dirty_caps done\n"); | ||
| 2954 | } | 2955 | } |
| 2955 | 2956 | ||
| 2956 | /* | 2957 | /* |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1a867a3601ae..33729e822bb9 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -360,7 +360,7 @@ more: | |||
| 360 | rinfo = &fi->last_readdir->r_reply_info; | 360 | rinfo = &fi->last_readdir->r_reply_info; |
| 361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, | 361 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, |
| 362 | rinfo->dir_nr, off, fi->offset); | 362 | rinfo->dir_nr, off, fi->offset); |
| 363 | while (off - fi->offset >= 0 && off - fi->offset < rinfo->dir_nr) { | 363 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { |
| 364 | u64 pos = ceph_make_fpos(frag, off); | 364 | u64 pos = ceph_make_fpos(frag, off); |
| 365 | struct ceph_mds_reply_inode *in = | 365 | struct ceph_mds_reply_inode *in = |
| 366 | rinfo->dir_in[off - fi->offset].in; | 366 | rinfo->dir_in[off - fi->offset].in; |
| @@ -1066,16 +1066,17 @@ static ssize_t ceph_read_dir(struct file *file, char __user *buf, size_t size, | |||
| 1066 | struct inode *inode = file->f_dentry->d_inode; | 1066 | struct inode *inode = file->f_dentry->d_inode; |
| 1067 | struct ceph_inode_info *ci = ceph_inode(inode); | 1067 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 1068 | int left; | 1068 | int left; |
| 1069 | const int bufsize = 1024; | ||
| 1069 | 1070 | ||
| 1070 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) | 1071 | if (!ceph_test_mount_opt(ceph_sb_to_client(inode->i_sb), DIRSTAT)) |
| 1071 | return -EISDIR; | 1072 | return -EISDIR; |
| 1072 | 1073 | ||
| 1073 | if (!cf->dir_info) { | 1074 | if (!cf->dir_info) { |
| 1074 | cf->dir_info = kmalloc(1024, GFP_NOFS); | 1075 | cf->dir_info = kmalloc(bufsize, GFP_NOFS); |
| 1075 | if (!cf->dir_info) | 1076 | if (!cf->dir_info) |
| 1076 | return -ENOMEM; | 1077 | return -ENOMEM; |
| 1077 | cf->dir_info_len = | 1078 | cf->dir_info_len = |
| 1078 | sprintf(cf->dir_info, | 1079 | snprintf(cf->dir_info, bufsize, |
| 1079 | "entries: %20lld\n" | 1080 | "entries: %20lld\n" |
| 1080 | " files: %20lld\n" | 1081 | " files: %20lld\n" |
| 1081 | " subdirs: %20lld\n" | 1082 | " subdirs: %20lld\n" |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index e41056174bf8..a610d3d67488 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
| @@ -86,6 +86,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, | |||
| 86 | static struct dentry *__fh_to_dentry(struct super_block *sb, | 86 | static struct dentry *__fh_to_dentry(struct super_block *sb, |
| 87 | struct ceph_nfs_fh *fh) | 87 | struct ceph_nfs_fh *fh) |
| 88 | { | 88 | { |
| 89 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | ||
| 89 | struct inode *inode; | 90 | struct inode *inode; |
| 90 | struct dentry *dentry; | 91 | struct dentry *dentry; |
| 91 | struct ceph_vino vino; | 92 | struct ceph_vino vino; |
| @@ -95,8 +96,24 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
| 95 | vino.ino = fh->ino; | 96 | vino.ino = fh->ino; |
| 96 | vino.snap = CEPH_NOSNAP; | 97 | vino.snap = CEPH_NOSNAP; |
| 97 | inode = ceph_find_inode(sb, vino); | 98 | inode = ceph_find_inode(sb, vino); |
| 98 | if (!inode) | 99 | if (!inode) { |
| 99 | return ERR_PTR(-ESTALE); | 100 | struct ceph_mds_request *req; |
| 101 | |||
| 102 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO, | ||
| 103 | USE_ANY_MDS); | ||
| 104 | if (IS_ERR(req)) | ||
| 105 | return ERR_CAST(req); | ||
| 106 | |||
| 107 | req->r_ino1 = vino; | ||
| 108 | req->r_num_caps = 1; | ||
| 109 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
| 110 | inode = req->r_target_inode; | ||
| 111 | if (inode) | ||
| 112 | igrab(inode); | ||
| 113 | ceph_mdsc_put_request(req); | ||
| 114 | if (!inode) | ||
| 115 | return ERR_PTR(-ESTALE); | ||
| 116 | } | ||
| 100 | 117 | ||
| 101 | dentry = d_obtain_alias(inode); | 118 | dentry = d_obtain_alias(inode); |
| 102 | if (IS_ERR(dentry)) { | 119 | if (IS_ERR(dentry)) { |
| @@ -148,8 +165,10 @@ static struct dentry *__cfh_to_dentry(struct super_block *sb, | |||
| 148 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); | 165 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); |
| 149 | req->r_num_caps = 1; | 166 | req->r_num_caps = 1; |
| 150 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 167 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
| 168 | inode = req->r_target_inode; | ||
| 169 | if (inode) | ||
| 170 | igrab(inode); | ||
| 151 | ceph_mdsc_put_request(req); | 171 | ceph_mdsc_put_request(req); |
| 152 | inode = ceph_find_inode(sb, vino); | ||
| 153 | if (!inode) | 172 | if (!inode) |
| 154 | return ERR_PTR(err ? err : -ESTALE); | 173 | return ERR_PTR(err ? err : -ESTALE); |
| 155 | } | 174 | } |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d0fae4ce9ba5..79743d146be6 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
| @@ -578,6 +578,7 @@ static void __register_request(struct ceph_mds_client *mdsc, | |||
| 578 | if (dir) { | 578 | if (dir) { |
| 579 | struct ceph_inode_info *ci = ceph_inode(dir); | 579 | struct ceph_inode_info *ci = ceph_inode(dir); |
| 580 | 580 | ||
| 581 | ihold(dir); | ||
| 581 | spin_lock(&ci->i_unsafe_lock); | 582 | spin_lock(&ci->i_unsafe_lock); |
| 582 | req->r_unsafe_dir = dir; | 583 | req->r_unsafe_dir = dir; |
| 583 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); | 584 | list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops); |
| @@ -598,6 +599,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc, | |||
| 598 | spin_lock(&ci->i_unsafe_lock); | 599 | spin_lock(&ci->i_unsafe_lock); |
| 599 | list_del_init(&req->r_unsafe_dir_item); | 600 | list_del_init(&req->r_unsafe_dir_item); |
| 600 | spin_unlock(&ci->i_unsafe_lock); | 601 | spin_unlock(&ci->i_unsafe_lock); |
| 602 | |||
| 603 | iput(req->r_unsafe_dir); | ||
| 604 | req->r_unsafe_dir = NULL; | ||
| 601 | } | 605 | } |
| 602 | 606 | ||
| 603 | ceph_mdsc_put_request(req); | 607 | ceph_mdsc_put_request(req); |
| @@ -2691,7 +2695,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
| 2691 | { | 2695 | { |
| 2692 | struct super_block *sb = mdsc->fsc->sb; | 2696 | struct super_block *sb = mdsc->fsc->sb; |
| 2693 | struct inode *inode; | 2697 | struct inode *inode; |
| 2694 | struct ceph_inode_info *ci; | ||
| 2695 | struct dentry *parent, *dentry; | 2698 | struct dentry *parent, *dentry; |
| 2696 | struct ceph_dentry_info *di; | 2699 | struct ceph_dentry_info *di; |
| 2697 | int mds = session->s_mds; | 2700 | int mds = session->s_mds; |
| @@ -2728,7 +2731,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, | |||
| 2728 | dout("handle_lease no inode %llx\n", vino.ino); | 2731 | dout("handle_lease no inode %llx\n", vino.ino); |
| 2729 | goto release; | 2732 | goto release; |
| 2730 | } | 2733 | } |
| 2731 | ci = ceph_inode(inode); | ||
| 2732 | 2734 | ||
| 2733 | /* dentry */ | 2735 | /* dentry */ |
| 2734 | parent = d_find_alias(inode); | 2736 | parent = d_find_alias(inode); |
| @@ -3002,6 +3004,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) | |||
| 3002 | spin_lock_init(&mdsc->snap_flush_lock); | 3004 | spin_lock_init(&mdsc->snap_flush_lock); |
| 3003 | mdsc->cap_flush_seq = 0; | 3005 | mdsc->cap_flush_seq = 0; |
| 3004 | INIT_LIST_HEAD(&mdsc->cap_dirty); | 3006 | INIT_LIST_HEAD(&mdsc->cap_dirty); |
| 3007 | INIT_LIST_HEAD(&mdsc->cap_dirty_migrating); | ||
| 3005 | mdsc->num_cap_flushing = 0; | 3008 | mdsc->num_cap_flushing = 0; |
| 3006 | spin_lock_init(&mdsc->cap_dirty_lock); | 3009 | spin_lock_init(&mdsc->cap_dirty_lock); |
| 3007 | init_waitqueue_head(&mdsc->cap_flushing_wq); | 3010 | init_waitqueue_head(&mdsc->cap_flushing_wq); |
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 4e3a9cc0bba6..7d8a0d662d56 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
| @@ -278,6 +278,7 @@ struct ceph_mds_client { | |||
| 278 | 278 | ||
| 279 | u64 cap_flush_seq; | 279 | u64 cap_flush_seq; |
| 280 | struct list_head cap_dirty; /* inodes with dirty caps */ | 280 | struct list_head cap_dirty; /* inodes with dirty caps */ |
| 281 | struct list_head cap_dirty_migrating; /* ...that are migration... */ | ||
| 281 | int num_cap_flushing; /* # caps we are flushing */ | 282 | int num_cap_flushing; /* # caps we are flushing */ |
| 282 | spinlock_t cap_dirty_lock; /* protects above items */ | 283 | spinlock_t cap_dirty_lock; /* protects above items */ |
| 283 | wait_queue_head_t cap_flushing_wq; | 284 | wait_queue_head_t cap_flushing_wq; |
diff --git a/fs/dcache.c b/fs/dcache.c index 18b2a1f10ed8..37f72ee5bf7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1220,7 +1220,7 @@ void shrink_dcache_parent(struct dentry * parent) | |||
| 1220 | EXPORT_SYMBOL(shrink_dcache_parent); | 1220 | EXPORT_SYMBOL(shrink_dcache_parent); |
| 1221 | 1221 | ||
| 1222 | /* | 1222 | /* |
| 1223 | * Scan `nr' dentries and return the number which remain. | 1223 | * Scan `sc->nr_slab_to_reclaim' dentries and return the number which remain. |
| 1224 | * | 1224 | * |
| 1225 | * We need to avoid reentering the filesystem if the caller is performing a | 1225 | * We need to avoid reentering the filesystem if the caller is performing a |
| 1226 | * GFP_NOFS allocation attempt. One example deadlock is: | 1226 | * GFP_NOFS allocation attempt. One example deadlock is: |
| @@ -1231,8 +1231,12 @@ EXPORT_SYMBOL(shrink_dcache_parent); | |||
| 1231 | * | 1231 | * |
| 1232 | * In this case we return -1 to tell the caller that we baled. | 1232 | * In this case we return -1 to tell the caller that we baled. |
| 1233 | */ | 1233 | */ |
| 1234 | static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 1234 | static int shrink_dcache_memory(struct shrinker *shrink, |
| 1235 | struct shrink_control *sc) | ||
| 1235 | { | 1236 | { |
| 1237 | int nr = sc->nr_to_scan; | ||
| 1238 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 1239 | |||
| 1236 | if (nr) { | 1240 | if (nr) { |
| 1237 | if (!(gfp_mask & __GFP_FS)) | 1241 | if (!(gfp_mask & __GFP_FS)) |
| 1238 | return -1; | 1242 | return -1; |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 0d329ff8ed4c..9b026ea8baa9 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -100,6 +100,7 @@ struct dlm_cluster { | |||
| 100 | unsigned int cl_log_debug; | 100 | unsigned int cl_log_debug; |
| 101 | unsigned int cl_protocol; | 101 | unsigned int cl_protocol; |
| 102 | unsigned int cl_timewarn_cs; | 102 | unsigned int cl_timewarn_cs; |
| 103 | unsigned int cl_waitwarn_us; | ||
| 103 | }; | 104 | }; |
| 104 | 105 | ||
| 105 | enum { | 106 | enum { |
| @@ -114,6 +115,7 @@ enum { | |||
| 114 | CLUSTER_ATTR_LOG_DEBUG, | 115 | CLUSTER_ATTR_LOG_DEBUG, |
| 115 | CLUSTER_ATTR_PROTOCOL, | 116 | CLUSTER_ATTR_PROTOCOL, |
| 116 | CLUSTER_ATTR_TIMEWARN_CS, | 117 | CLUSTER_ATTR_TIMEWARN_CS, |
| 118 | CLUSTER_ATTR_WAITWARN_US, | ||
| 117 | }; | 119 | }; |
| 118 | 120 | ||
| 119 | struct cluster_attribute { | 121 | struct cluster_attribute { |
| @@ -166,6 +168,7 @@ CLUSTER_ATTR(scan_secs, 1); | |||
| 166 | CLUSTER_ATTR(log_debug, 0); | 168 | CLUSTER_ATTR(log_debug, 0); |
| 167 | CLUSTER_ATTR(protocol, 0); | 169 | CLUSTER_ATTR(protocol, 0); |
| 168 | CLUSTER_ATTR(timewarn_cs, 1); | 170 | CLUSTER_ATTR(timewarn_cs, 1); |
| 171 | CLUSTER_ATTR(waitwarn_us, 0); | ||
| 169 | 172 | ||
| 170 | static struct configfs_attribute *cluster_attrs[] = { | 173 | static struct configfs_attribute *cluster_attrs[] = { |
| 171 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, | 174 | [CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr, |
| @@ -179,6 +182,7 @@ static struct configfs_attribute *cluster_attrs[] = { | |||
| 179 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, | 182 | [CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr, |
| 180 | [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, | 183 | [CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr, |
| 181 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, | 184 | [CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr, |
| 185 | [CLUSTER_ATTR_WAITWARN_US] = &cluster_attr_waitwarn_us.attr, | ||
| 182 | NULL, | 186 | NULL, |
| 183 | }; | 187 | }; |
| 184 | 188 | ||
| @@ -439,6 +443,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
| 439 | cl->cl_log_debug = dlm_config.ci_log_debug; | 443 | cl->cl_log_debug = dlm_config.ci_log_debug; |
| 440 | cl->cl_protocol = dlm_config.ci_protocol; | 444 | cl->cl_protocol = dlm_config.ci_protocol; |
| 441 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; | 445 | cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs; |
| 446 | cl->cl_waitwarn_us = dlm_config.ci_waitwarn_us; | ||
| 442 | 447 | ||
| 443 | space_list = &sps->ss_group; | 448 | space_list = &sps->ss_group; |
| 444 | comm_list = &cms->cs_group; | 449 | comm_list = &cms->cs_group; |
| @@ -986,6 +991,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num) | |||
| 986 | #define DEFAULT_LOG_DEBUG 0 | 991 | #define DEFAULT_LOG_DEBUG 0 |
| 987 | #define DEFAULT_PROTOCOL 0 | 992 | #define DEFAULT_PROTOCOL 0 |
| 988 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ | 993 | #define DEFAULT_TIMEWARN_CS 500 /* 5 sec = 500 centiseconds */ |
| 994 | #define DEFAULT_WAITWARN_US 0 | ||
| 989 | 995 | ||
| 990 | struct dlm_config_info dlm_config = { | 996 | struct dlm_config_info dlm_config = { |
| 991 | .ci_tcp_port = DEFAULT_TCP_PORT, | 997 | .ci_tcp_port = DEFAULT_TCP_PORT, |
| @@ -998,6 +1004,7 @@ struct dlm_config_info dlm_config = { | |||
| 998 | .ci_scan_secs = DEFAULT_SCAN_SECS, | 1004 | .ci_scan_secs = DEFAULT_SCAN_SECS, |
| 999 | .ci_log_debug = DEFAULT_LOG_DEBUG, | 1005 | .ci_log_debug = DEFAULT_LOG_DEBUG, |
| 1000 | .ci_protocol = DEFAULT_PROTOCOL, | 1006 | .ci_protocol = DEFAULT_PROTOCOL, |
| 1001 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS | 1007 | .ci_timewarn_cs = DEFAULT_TIMEWARN_CS, |
| 1008 | .ci_waitwarn_us = DEFAULT_WAITWARN_US | ||
| 1002 | }; | 1009 | }; |
| 1003 | 1010 | ||
diff --git a/fs/dlm/config.h b/fs/dlm/config.h index 4f1d6fce58c5..dd0ce24d5a80 100644 --- a/fs/dlm/config.h +++ b/fs/dlm/config.h | |||
| @@ -28,6 +28,7 @@ struct dlm_config_info { | |||
| 28 | int ci_log_debug; | 28 | int ci_log_debug; |
| 29 | int ci_protocol; | 29 | int ci_protocol; |
| 30 | int ci_timewarn_cs; | 30 | int ci_timewarn_cs; |
| 31 | int ci_waitwarn_us; | ||
| 31 | }; | 32 | }; |
| 32 | 33 | ||
| 33 | extern struct dlm_config_info dlm_config; | 34 | extern struct dlm_config_info dlm_config; |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index b94204913011..0262451eb9c6 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -209,6 +209,7 @@ struct dlm_args { | |||
| 209 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 | 209 | #define DLM_IFL_WATCH_TIMEWARN 0x00400000 |
| 210 | #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 | 210 | #define DLM_IFL_TIMEOUT_CANCEL 0x00800000 |
| 211 | #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 | 211 | #define DLM_IFL_DEADLOCK_CANCEL 0x01000000 |
| 212 | #define DLM_IFL_STUB_MS 0x02000000 /* magic number for m_flags */ | ||
| 212 | #define DLM_IFL_USER 0x00000001 | 213 | #define DLM_IFL_USER 0x00000001 |
| 213 | #define DLM_IFL_ORPHAN 0x00000002 | 214 | #define DLM_IFL_ORPHAN 0x00000002 |
| 214 | 215 | ||
| @@ -245,6 +246,7 @@ struct dlm_lkb { | |||
| 245 | 246 | ||
| 246 | int8_t lkb_wait_type; /* type of reply waiting for */ | 247 | int8_t lkb_wait_type; /* type of reply waiting for */ |
| 247 | int8_t lkb_wait_count; | 248 | int8_t lkb_wait_count; |
| 249 | int lkb_wait_nodeid; /* for debugging */ | ||
| 248 | 250 | ||
| 249 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ | 251 | struct list_head lkb_idtbl_list; /* lockspace lkbtbl */ |
| 250 | struct list_head lkb_statequeue; /* rsb g/c/w list */ | 252 | struct list_head lkb_statequeue; /* rsb g/c/w list */ |
| @@ -254,6 +256,7 @@ struct dlm_lkb { | |||
| 254 | struct list_head lkb_ownqueue; /* list of locks for a process */ | 256 | struct list_head lkb_ownqueue; /* list of locks for a process */ |
| 255 | struct list_head lkb_time_list; | 257 | struct list_head lkb_time_list; |
| 256 | ktime_t lkb_timestamp; | 258 | ktime_t lkb_timestamp; |
| 259 | ktime_t lkb_wait_time; | ||
| 257 | unsigned long lkb_timeout_cs; | 260 | unsigned long lkb_timeout_cs; |
| 258 | 261 | ||
| 259 | struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; | 262 | struct dlm_callback lkb_callbacks[DLM_CALLBACKS_SIZE]; |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 56d6bfcc1e48..f71d0b5abd95 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -799,10 +799,84 @@ static int msg_reply_type(int mstype) | |||
| 799 | return -1; | 799 | return -1; |
| 800 | } | 800 | } |
| 801 | 801 | ||
| 802 | static int nodeid_warned(int nodeid, int num_nodes, int *warned) | ||
| 803 | { | ||
| 804 | int i; | ||
| 805 | |||
| 806 | for (i = 0; i < num_nodes; i++) { | ||
| 807 | if (!warned[i]) { | ||
| 808 | warned[i] = nodeid; | ||
| 809 | return 0; | ||
| 810 | } | ||
| 811 | if (warned[i] == nodeid) | ||
| 812 | return 1; | ||
| 813 | } | ||
| 814 | return 0; | ||
| 815 | } | ||
| 816 | |||
| 817 | void dlm_scan_waiters(struct dlm_ls *ls) | ||
| 818 | { | ||
| 819 | struct dlm_lkb *lkb; | ||
| 820 | ktime_t zero = ktime_set(0, 0); | ||
| 821 | s64 us; | ||
| 822 | s64 debug_maxus = 0; | ||
| 823 | u32 debug_scanned = 0; | ||
| 824 | u32 debug_expired = 0; | ||
| 825 | int num_nodes = 0; | ||
| 826 | int *warned = NULL; | ||
| 827 | |||
| 828 | if (!dlm_config.ci_waitwarn_us) | ||
| 829 | return; | ||
| 830 | |||
| 831 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 832 | |||
| 833 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 834 | if (ktime_equal(lkb->lkb_wait_time, zero)) | ||
| 835 | continue; | ||
| 836 | |||
| 837 | debug_scanned++; | ||
| 838 | |||
| 839 | us = ktime_to_us(ktime_sub(ktime_get(), lkb->lkb_wait_time)); | ||
| 840 | |||
| 841 | if (us < dlm_config.ci_waitwarn_us) | ||
| 842 | continue; | ||
| 843 | |||
| 844 | lkb->lkb_wait_time = zero; | ||
| 845 | |||
| 846 | debug_expired++; | ||
| 847 | if (us > debug_maxus) | ||
| 848 | debug_maxus = us; | ||
| 849 | |||
| 850 | if (!num_nodes) { | ||
| 851 | num_nodes = ls->ls_num_nodes; | ||
| 852 | warned = kmalloc(GFP_KERNEL, num_nodes * sizeof(int)); | ||
| 853 | if (warned) | ||
| 854 | memset(warned, 0, num_nodes * sizeof(int)); | ||
| 855 | } | ||
| 856 | if (!warned) | ||
| 857 | continue; | ||
| 858 | if (nodeid_warned(lkb->lkb_wait_nodeid, num_nodes, warned)) | ||
| 859 | continue; | ||
| 860 | |||
| 861 | log_error(ls, "waitwarn %x %lld %d us check connection to " | ||
| 862 | "node %d", lkb->lkb_id, (long long)us, | ||
| 863 | dlm_config.ci_waitwarn_us, lkb->lkb_wait_nodeid); | ||
| 864 | } | ||
| 865 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 866 | |||
| 867 | if (warned) | ||
| 868 | kfree(warned); | ||
| 869 | |||
| 870 | if (debug_expired) | ||
| 871 | log_debug(ls, "scan_waiters %u warn %u over %d us max %lld us", | ||
| 872 | debug_scanned, debug_expired, | ||
| 873 | dlm_config.ci_waitwarn_us, (long long)debug_maxus); | ||
| 874 | } | ||
| 875 | |||
| 802 | /* add/remove lkb from global waiters list of lkb's waiting for | 876 | /* add/remove lkb from global waiters list of lkb's waiting for |
| 803 | a reply from a remote node */ | 877 | a reply from a remote node */ |
| 804 | 878 | ||
| 805 | static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | 879 | static int add_to_waiters(struct dlm_lkb *lkb, int mstype, int to_nodeid) |
| 806 | { | 880 | { |
| 807 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 881 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
| 808 | int error = 0; | 882 | int error = 0; |
| @@ -842,6 +916,8 @@ static int add_to_waiters(struct dlm_lkb *lkb, int mstype) | |||
| 842 | 916 | ||
| 843 | lkb->lkb_wait_count++; | 917 | lkb->lkb_wait_count++; |
| 844 | lkb->lkb_wait_type = mstype; | 918 | lkb->lkb_wait_type = mstype; |
| 919 | lkb->lkb_wait_time = ktime_get(); | ||
| 920 | lkb->lkb_wait_nodeid = to_nodeid; /* for debugging */ | ||
| 845 | hold_lkb(lkb); | 921 | hold_lkb(lkb); |
| 846 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); | 922 | list_add(&lkb->lkb_wait_reply, &ls->ls_waiters); |
| 847 | out: | 923 | out: |
| @@ -961,10 +1037,10 @@ static int remove_from_waiters_ms(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
| 961 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; | 1037 | struct dlm_ls *ls = lkb->lkb_resource->res_ls; |
| 962 | int error; | 1038 | int error; |
| 963 | 1039 | ||
| 964 | if (ms != &ls->ls_stub_ms) | 1040 | if (ms->m_flags != DLM_IFL_STUB_MS) |
| 965 | mutex_lock(&ls->ls_waiters_mutex); | 1041 | mutex_lock(&ls->ls_waiters_mutex); |
| 966 | error = _remove_from_waiters(lkb, ms->m_type, ms); | 1042 | error = _remove_from_waiters(lkb, ms->m_type, ms); |
| 967 | if (ms != &ls->ls_stub_ms) | 1043 | if (ms->m_flags != DLM_IFL_STUB_MS) |
| 968 | mutex_unlock(&ls->ls_waiters_mutex); | 1044 | mutex_unlock(&ls->ls_waiters_mutex); |
| 969 | return error; | 1045 | return error; |
| 970 | } | 1046 | } |
| @@ -1157,6 +1233,16 @@ void dlm_adjust_timeouts(struct dlm_ls *ls) | |||
| 1157 | list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) | 1233 | list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) |
| 1158 | lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); | 1234 | lkb->lkb_timestamp = ktime_add_us(lkb->lkb_timestamp, adj_us); |
| 1159 | mutex_unlock(&ls->ls_timeout_mutex); | 1235 | mutex_unlock(&ls->ls_timeout_mutex); |
| 1236 | |||
| 1237 | if (!dlm_config.ci_waitwarn_us) | ||
| 1238 | return; | ||
| 1239 | |||
| 1240 | mutex_lock(&ls->ls_waiters_mutex); | ||
| 1241 | list_for_each_entry(lkb, &ls->ls_waiters, lkb_wait_reply) { | ||
| 1242 | if (ktime_to_us(lkb->lkb_wait_time)) | ||
| 1243 | lkb->lkb_wait_time = ktime_get(); | ||
| 1244 | } | ||
| 1245 | mutex_unlock(&ls->ls_waiters_mutex); | ||
| 1160 | } | 1246 | } |
| 1161 | 1247 | ||
| 1162 | /* lkb is master or local copy */ | 1248 | /* lkb is master or local copy */ |
| @@ -1376,14 +1462,8 @@ static void grant_lock_pending(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 1376 | ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become | 1462 | ALTPR/ALTCW: our rqmode may have been changed to PR or CW to become |
| 1377 | compatible with other granted locks */ | 1463 | compatible with other granted locks */ |
| 1378 | 1464 | ||
| 1379 | static void munge_demoted(struct dlm_lkb *lkb, struct dlm_message *ms) | 1465 | static void munge_demoted(struct dlm_lkb *lkb) |
| 1380 | { | 1466 | { |
| 1381 | if (ms->m_type != DLM_MSG_CONVERT_REPLY) { | ||
| 1382 | log_print("munge_demoted %x invalid reply type %d", | ||
| 1383 | lkb->lkb_id, ms->m_type); | ||
| 1384 | return; | ||
| 1385 | } | ||
| 1386 | |||
| 1387 | if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { | 1467 | if (lkb->lkb_rqmode == DLM_LOCK_IV || lkb->lkb_grmode == DLM_LOCK_IV) { |
| 1388 | log_print("munge_demoted %x invalid modes gr %d rq %d", | 1468 | log_print("munge_demoted %x invalid modes gr %d rq %d", |
| 1389 | lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); | 1469 | lkb->lkb_id, lkb->lkb_grmode, lkb->lkb_rqmode); |
| @@ -2844,12 +2924,12 @@ static int send_common(struct dlm_rsb *r, struct dlm_lkb *lkb, int mstype) | |||
| 2844 | struct dlm_mhandle *mh; | 2924 | struct dlm_mhandle *mh; |
| 2845 | int to_nodeid, error; | 2925 | int to_nodeid, error; |
| 2846 | 2926 | ||
| 2847 | error = add_to_waiters(lkb, mstype); | 2927 | to_nodeid = r->res_nodeid; |
| 2928 | |||
| 2929 | error = add_to_waiters(lkb, mstype, to_nodeid); | ||
| 2848 | if (error) | 2930 | if (error) |
| 2849 | return error; | 2931 | return error; |
| 2850 | 2932 | ||
| 2851 | to_nodeid = r->res_nodeid; | ||
| 2852 | |||
| 2853 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); | 2933 | error = create_message(r, lkb, to_nodeid, mstype, &ms, &mh); |
| 2854 | if (error) | 2934 | if (error) |
| 2855 | goto fail; | 2935 | goto fail; |
| @@ -2880,9 +2960,9 @@ static int send_convert(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2880 | /* down conversions go without a reply from the master */ | 2960 | /* down conversions go without a reply from the master */ |
| 2881 | if (!error && down_conversion(lkb)) { | 2961 | if (!error && down_conversion(lkb)) { |
| 2882 | remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); | 2962 | remove_from_waiters(lkb, DLM_MSG_CONVERT_REPLY); |
| 2963 | r->res_ls->ls_stub_ms.m_flags = DLM_IFL_STUB_MS; | ||
| 2883 | r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | 2964 | r->res_ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; |
| 2884 | r->res_ls->ls_stub_ms.m_result = 0; | 2965 | r->res_ls->ls_stub_ms.m_result = 0; |
| 2885 | r->res_ls->ls_stub_ms.m_flags = lkb->lkb_flags; | ||
| 2886 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); | 2966 | __receive_convert_reply(r, lkb, &r->res_ls->ls_stub_ms); |
| 2887 | } | 2967 | } |
| 2888 | 2968 | ||
| @@ -2951,12 +3031,12 @@ static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb) | |||
| 2951 | struct dlm_mhandle *mh; | 3031 | struct dlm_mhandle *mh; |
| 2952 | int to_nodeid, error; | 3032 | int to_nodeid, error; |
| 2953 | 3033 | ||
| 2954 | error = add_to_waiters(lkb, DLM_MSG_LOOKUP); | 3034 | to_nodeid = dlm_dir_nodeid(r); |
| 3035 | |||
| 3036 | error = add_to_waiters(lkb, DLM_MSG_LOOKUP, to_nodeid); | ||
| 2955 | if (error) | 3037 | if (error) |
| 2956 | return error; | 3038 | return error; |
| 2957 | 3039 | ||
| 2958 | to_nodeid = dlm_dir_nodeid(r); | ||
| 2959 | |||
| 2960 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); | 3040 | error = create_message(r, NULL, to_nodeid, DLM_MSG_LOOKUP, &ms, &mh); |
| 2961 | if (error) | 3041 | if (error) |
| 2962 | goto fail; | 3042 | goto fail; |
| @@ -3070,6 +3150,9 @@ static void receive_flags(struct dlm_lkb *lkb, struct dlm_message *ms) | |||
| 3070 | 3150 | ||
| 3071 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) | 3151 | static void receive_flags_reply(struct dlm_lkb *lkb, struct dlm_message *ms) |
| 3072 | { | 3152 | { |
| 3153 | if (ms->m_flags == DLM_IFL_STUB_MS) | ||
| 3154 | return; | ||
| 3155 | |||
| 3073 | lkb->lkb_sbflags = ms->m_sbflags; | 3156 | lkb->lkb_sbflags = ms->m_sbflags; |
| 3074 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | | 3157 | lkb->lkb_flags = (lkb->lkb_flags & 0xFFFF0000) | |
| 3075 | (ms->m_flags & 0x0000FFFF); | 3158 | (ms->m_flags & 0x0000FFFF); |
| @@ -3612,7 +3695,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 3612 | /* convert was queued on remote master */ | 3695 | /* convert was queued on remote master */ |
| 3613 | receive_flags_reply(lkb, ms); | 3696 | receive_flags_reply(lkb, ms); |
| 3614 | if (is_demoted(lkb)) | 3697 | if (is_demoted(lkb)) |
| 3615 | munge_demoted(lkb, ms); | 3698 | munge_demoted(lkb); |
| 3616 | del_lkb(r, lkb); | 3699 | del_lkb(r, lkb); |
| 3617 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); | 3700 | add_lkb(r, lkb, DLM_LKSTS_CONVERT); |
| 3618 | add_timeout(lkb); | 3701 | add_timeout(lkb); |
| @@ -3622,7 +3705,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 3622 | /* convert was granted on remote master */ | 3705 | /* convert was granted on remote master */ |
| 3623 | receive_flags_reply(lkb, ms); | 3706 | receive_flags_reply(lkb, ms); |
| 3624 | if (is_demoted(lkb)) | 3707 | if (is_demoted(lkb)) |
| 3625 | munge_demoted(lkb, ms); | 3708 | munge_demoted(lkb); |
| 3626 | grant_lock_pc(r, lkb, ms); | 3709 | grant_lock_pc(r, lkb, ms); |
| 3627 | queue_cast(r, lkb, 0); | 3710 | queue_cast(r, lkb, 0); |
| 3628 | break; | 3711 | break; |
| @@ -3996,15 +4079,17 @@ void dlm_receive_buffer(union dlm_packet *p, int nodeid) | |||
| 3996 | dlm_put_lockspace(ls); | 4079 | dlm_put_lockspace(ls); |
| 3997 | } | 4080 | } |
| 3998 | 4081 | ||
| 3999 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb) | 4082 | static void recover_convert_waiter(struct dlm_ls *ls, struct dlm_lkb *lkb, |
| 4083 | struct dlm_message *ms_stub) | ||
| 4000 | { | 4084 | { |
| 4001 | if (middle_conversion(lkb)) { | 4085 | if (middle_conversion(lkb)) { |
| 4002 | hold_lkb(lkb); | 4086 | hold_lkb(lkb); |
| 4003 | ls->ls_stub_ms.m_type = DLM_MSG_CONVERT_REPLY; | 4087 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4004 | ls->ls_stub_ms.m_result = -EINPROGRESS; | 4088 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4005 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4089 | ms_stub->m_type = DLM_MSG_CONVERT_REPLY; |
| 4006 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4090 | ms_stub->m_result = -EINPROGRESS; |
| 4007 | _receive_convert_reply(lkb, &ls->ls_stub_ms); | 4091 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4092 | _receive_convert_reply(lkb, ms_stub); | ||
| 4008 | 4093 | ||
| 4009 | /* Same special case as in receive_rcom_lock_args() */ | 4094 | /* Same special case as in receive_rcom_lock_args() */ |
| 4010 | lkb->lkb_grmode = DLM_LOCK_IV; | 4095 | lkb->lkb_grmode = DLM_LOCK_IV; |
| @@ -4045,13 +4130,27 @@ static int waiter_needs_recovery(struct dlm_ls *ls, struct dlm_lkb *lkb) | |||
| 4045 | void dlm_recover_waiters_pre(struct dlm_ls *ls) | 4130 | void dlm_recover_waiters_pre(struct dlm_ls *ls) |
| 4046 | { | 4131 | { |
| 4047 | struct dlm_lkb *lkb, *safe; | 4132 | struct dlm_lkb *lkb, *safe; |
| 4133 | struct dlm_message *ms_stub; | ||
| 4048 | int wait_type, stub_unlock_result, stub_cancel_result; | 4134 | int wait_type, stub_unlock_result, stub_cancel_result; |
| 4049 | 4135 | ||
| 4136 | ms_stub = kmalloc(GFP_KERNEL, sizeof(struct dlm_message)); | ||
| 4137 | if (!ms_stub) { | ||
| 4138 | log_error(ls, "dlm_recover_waiters_pre no mem"); | ||
| 4139 | return; | ||
| 4140 | } | ||
| 4141 | |||
| 4050 | mutex_lock(&ls->ls_waiters_mutex); | 4142 | mutex_lock(&ls->ls_waiters_mutex); |
| 4051 | 4143 | ||
| 4052 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { | 4144 | list_for_each_entry_safe(lkb, safe, &ls->ls_waiters, lkb_wait_reply) { |
| 4053 | log_debug(ls, "pre recover waiter lkid %x type %d flags %x", | 4145 | |
| 4054 | lkb->lkb_id, lkb->lkb_wait_type, lkb->lkb_flags); | 4146 | /* exclude debug messages about unlocks because there can be so |
| 4147 | many and they aren't very interesting */ | ||
| 4148 | |||
| 4149 | if (lkb->lkb_wait_type != DLM_MSG_UNLOCK) { | ||
| 4150 | log_debug(ls, "recover_waiter %x nodeid %d " | ||
| 4151 | "msg %d to %d", lkb->lkb_id, lkb->lkb_nodeid, | ||
| 4152 | lkb->lkb_wait_type, lkb->lkb_wait_nodeid); | ||
| 4153 | } | ||
| 4055 | 4154 | ||
| 4056 | /* all outstanding lookups, regardless of destination will be | 4155 | /* all outstanding lookups, regardless of destination will be |
| 4057 | resent after recovery is done */ | 4156 | resent after recovery is done */ |
| @@ -4097,26 +4196,28 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 4097 | break; | 4196 | break; |
| 4098 | 4197 | ||
| 4099 | case DLM_MSG_CONVERT: | 4198 | case DLM_MSG_CONVERT: |
| 4100 | recover_convert_waiter(ls, lkb); | 4199 | recover_convert_waiter(ls, lkb, ms_stub); |
| 4101 | break; | 4200 | break; |
| 4102 | 4201 | ||
| 4103 | case DLM_MSG_UNLOCK: | 4202 | case DLM_MSG_UNLOCK: |
| 4104 | hold_lkb(lkb); | 4203 | hold_lkb(lkb); |
| 4105 | ls->ls_stub_ms.m_type = DLM_MSG_UNLOCK_REPLY; | 4204 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4106 | ls->ls_stub_ms.m_result = stub_unlock_result; | 4205 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4107 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4206 | ms_stub->m_type = DLM_MSG_UNLOCK_REPLY; |
| 4108 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4207 | ms_stub->m_result = stub_unlock_result; |
| 4109 | _receive_unlock_reply(lkb, &ls->ls_stub_ms); | 4208 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4209 | _receive_unlock_reply(lkb, ms_stub); | ||
| 4110 | dlm_put_lkb(lkb); | 4210 | dlm_put_lkb(lkb); |
| 4111 | break; | 4211 | break; |
| 4112 | 4212 | ||
| 4113 | case DLM_MSG_CANCEL: | 4213 | case DLM_MSG_CANCEL: |
| 4114 | hold_lkb(lkb); | 4214 | hold_lkb(lkb); |
| 4115 | ls->ls_stub_ms.m_type = DLM_MSG_CANCEL_REPLY; | 4215 | memset(ms_stub, 0, sizeof(struct dlm_message)); |
| 4116 | ls->ls_stub_ms.m_result = stub_cancel_result; | 4216 | ms_stub->m_flags = DLM_IFL_STUB_MS; |
| 4117 | ls->ls_stub_ms.m_flags = lkb->lkb_flags; | 4217 | ms_stub->m_type = DLM_MSG_CANCEL_REPLY; |
| 4118 | ls->ls_stub_ms.m_header.h_nodeid = lkb->lkb_nodeid; | 4218 | ms_stub->m_result = stub_cancel_result; |
| 4119 | _receive_cancel_reply(lkb, &ls->ls_stub_ms); | 4219 | ms_stub->m_header.h_nodeid = lkb->lkb_nodeid; |
| 4220 | _receive_cancel_reply(lkb, ms_stub); | ||
| 4120 | dlm_put_lkb(lkb); | 4221 | dlm_put_lkb(lkb); |
| 4121 | break; | 4222 | break; |
| 4122 | 4223 | ||
| @@ -4127,6 +4228,7 @@ void dlm_recover_waiters_pre(struct dlm_ls *ls) | |||
| 4127 | schedule(); | 4228 | schedule(); |
| 4128 | } | 4229 | } |
| 4129 | mutex_unlock(&ls->ls_waiters_mutex); | 4230 | mutex_unlock(&ls->ls_waiters_mutex); |
| 4231 | kfree(ms_stub); | ||
| 4130 | } | 4232 | } |
| 4131 | 4233 | ||
| 4132 | static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) | 4234 | static struct dlm_lkb *find_resend_waiter(struct dlm_ls *ls) |
| @@ -4191,8 +4293,8 @@ int dlm_recover_waiters_post(struct dlm_ls *ls) | |||
| 4191 | ou = is_overlap_unlock(lkb); | 4293 | ou = is_overlap_unlock(lkb); |
| 4192 | err = 0; | 4294 | err = 0; |
| 4193 | 4295 | ||
| 4194 | log_debug(ls, "recover_waiters_post %x type %d flags %x %s", | 4296 | log_debug(ls, "recover_waiter %x nodeid %d msg %d r_nodeid %d", |
| 4195 | lkb->lkb_id, mstype, lkb->lkb_flags, r->res_name); | 4297 | lkb->lkb_id, lkb->lkb_nodeid, mstype, r->res_nodeid); |
| 4196 | 4298 | ||
| 4197 | /* At this point we assume that we won't get a reply to any | 4299 | /* At this point we assume that we won't get a reply to any |
| 4198 | previous op or overlap op on this lock. First, do a big | 4300 | previous op or overlap op on this lock. First, do a big |
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h index 88e93c80cc22..265017a7c3e7 100644 --- a/fs/dlm/lock.h +++ b/fs/dlm/lock.h | |||
| @@ -24,6 +24,7 @@ int dlm_put_lkb(struct dlm_lkb *lkb); | |||
| 24 | void dlm_scan_rsbs(struct dlm_ls *ls); | 24 | void dlm_scan_rsbs(struct dlm_ls *ls); |
| 25 | int dlm_lock_recovery_try(struct dlm_ls *ls); | 25 | int dlm_lock_recovery_try(struct dlm_ls *ls); |
| 26 | void dlm_unlock_recovery(struct dlm_ls *ls); | 26 | void dlm_unlock_recovery(struct dlm_ls *ls); |
| 27 | void dlm_scan_waiters(struct dlm_ls *ls); | ||
| 27 | void dlm_scan_timeout(struct dlm_ls *ls); | 28 | void dlm_scan_timeout(struct dlm_ls *ls); |
| 28 | void dlm_adjust_timeouts(struct dlm_ls *ls); | 29 | void dlm_adjust_timeouts(struct dlm_ls *ls); |
| 29 | 30 | ||
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index f994a7dfda85..14cbf4099753 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -243,7 +243,6 @@ static struct dlm_ls *find_ls_to_scan(void) | |||
| 243 | static int dlm_scand(void *data) | 243 | static int dlm_scand(void *data) |
| 244 | { | 244 | { |
| 245 | struct dlm_ls *ls; | 245 | struct dlm_ls *ls; |
| 246 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
| 247 | 246 | ||
| 248 | while (!kthread_should_stop()) { | 247 | while (!kthread_should_stop()) { |
| 249 | ls = find_ls_to_scan(); | 248 | ls = find_ls_to_scan(); |
| @@ -252,13 +251,14 @@ static int dlm_scand(void *data) | |||
| 252 | ls->ls_scan_time = jiffies; | 251 | ls->ls_scan_time = jiffies; |
| 253 | dlm_scan_rsbs(ls); | 252 | dlm_scan_rsbs(ls); |
| 254 | dlm_scan_timeout(ls); | 253 | dlm_scan_timeout(ls); |
| 254 | dlm_scan_waiters(ls); | ||
| 255 | dlm_unlock_recovery(ls); | 255 | dlm_unlock_recovery(ls); |
| 256 | } else { | 256 | } else { |
| 257 | ls->ls_scan_time += HZ; | 257 | ls->ls_scan_time += HZ; |
| 258 | } | 258 | } |
| 259 | } else { | 259 | continue; |
| 260 | schedule_timeout_interruptible(timeout_jiffies); | ||
| 261 | } | 260 | } |
| 261 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
| 262 | } | 262 | } |
| 263 | return 0; | 263 | return 0; |
| 264 | } | 264 | } |
diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index 30d8b85febbf..e2b878004364 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c | |||
| @@ -71,6 +71,36 @@ static void send_op(struct plock_op *op) | |||
| 71 | wake_up(&send_wq); | 71 | wake_up(&send_wq); |
| 72 | } | 72 | } |
| 73 | 73 | ||
| 74 | /* If a process was killed while waiting for the only plock on a file, | ||
| 75 | locks_remove_posix will not see any lock on the file so it won't | ||
| 76 | send an unlock-close to us to pass on to userspace to clean up the | ||
| 77 | abandoned waiter. So, we have to insert the unlock-close when the | ||
| 78 | lock call is interrupted. */ | ||
| 79 | |||
| 80 | static void do_unlock_close(struct dlm_ls *ls, u64 number, | ||
| 81 | struct file *file, struct file_lock *fl) | ||
| 82 | { | ||
| 83 | struct plock_op *op; | ||
| 84 | |||
| 85 | op = kzalloc(sizeof(*op), GFP_NOFS); | ||
| 86 | if (!op) | ||
| 87 | return; | ||
| 88 | |||
| 89 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | ||
| 90 | op->info.pid = fl->fl_pid; | ||
| 91 | op->info.fsid = ls->ls_global_id; | ||
| 92 | op->info.number = number; | ||
| 93 | op->info.start = 0; | ||
| 94 | op->info.end = OFFSET_MAX; | ||
| 95 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | ||
| 96 | op->info.owner = (__u64) fl->fl_pid; | ||
| 97 | else | ||
| 98 | op->info.owner = (__u64)(long) fl->fl_owner; | ||
| 99 | |||
| 100 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | ||
| 101 | send_op(op); | ||
| 102 | } | ||
| 103 | |||
| 74 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | 104 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, |
| 75 | int cmd, struct file_lock *fl) | 105 | int cmd, struct file_lock *fl) |
| 76 | { | 106 | { |
| @@ -114,9 +144,19 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |||
| 114 | 144 | ||
| 115 | send_op(op); | 145 | send_op(op); |
| 116 | 146 | ||
| 117 | if (xop->callback == NULL) | 147 | if (xop->callback == NULL) { |
| 118 | wait_event(recv_wq, (op->done != 0)); | 148 | rv = wait_event_killable(recv_wq, (op->done != 0)); |
| 119 | else { | 149 | if (rv == -ERESTARTSYS) { |
| 150 | log_debug(ls, "dlm_posix_lock: wait killed %llx", | ||
| 151 | (unsigned long long)number); | ||
| 152 | spin_lock(&ops_lock); | ||
| 153 | list_del(&op->list); | ||
| 154 | spin_unlock(&ops_lock); | ||
| 155 | kfree(xop); | ||
| 156 | do_unlock_close(ls, number, file, fl); | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | } else { | ||
| 120 | rv = FILE_LOCK_DEFERRED; | 160 | rv = FILE_LOCK_DEFERRED; |
| 121 | goto out; | 161 | goto out; |
| 122 | } | 162 | } |
| @@ -233,6 +273,13 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |||
| 233 | else | 273 | else |
| 234 | op->info.owner = (__u64)(long) fl->fl_owner; | 274 | op->info.owner = (__u64)(long) fl->fl_owner; |
| 235 | 275 | ||
| 276 | if (fl->fl_flags & FL_CLOSE) { | ||
| 277 | op->info.flags |= DLM_PLOCK_FL_CLOSE; | ||
| 278 | send_op(op); | ||
| 279 | rv = 0; | ||
| 280 | goto out; | ||
| 281 | } | ||
| 282 | |||
| 236 | send_op(op); | 283 | send_op(op); |
| 237 | wait_event(recv_wq, (op->done != 0)); | 284 | wait_event(recv_wq, (op->done != 0)); |
| 238 | 285 | ||
| @@ -334,7 +381,10 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |||
| 334 | spin_lock(&ops_lock); | 381 | spin_lock(&ops_lock); |
| 335 | if (!list_empty(&send_list)) { | 382 | if (!list_empty(&send_list)) { |
| 336 | op = list_entry(send_list.next, struct plock_op, list); | 383 | op = list_entry(send_list.next, struct plock_op, list); |
| 337 | list_move(&op->list, &recv_list); | 384 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) |
| 385 | list_del(&op->list); | ||
| 386 | else | ||
| 387 | list_move(&op->list, &recv_list); | ||
| 338 | memcpy(&info, &op->info, sizeof(info)); | 388 | memcpy(&info, &op->info, sizeof(info)); |
| 339 | } | 389 | } |
| 340 | spin_unlock(&ops_lock); | 390 | spin_unlock(&ops_lock); |
| @@ -342,6 +392,13 @@ static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |||
| 342 | if (!op) | 392 | if (!op) |
| 343 | return -EAGAIN; | 393 | return -EAGAIN; |
| 344 | 394 | ||
| 395 | /* there is no need to get a reply from userspace for unlocks | ||
| 396 | that were generated by the vfs cleaning up for a close | ||
| 397 | (the process did not make an unlock call). */ | ||
| 398 | |||
| 399 | if (op->info.flags & DLM_PLOCK_FL_CLOSE) | ||
| 400 | kfree(op); | ||
| 401 | |||
| 345 | if (copy_to_user(u, &info, sizeof(info))) | 402 | if (copy_to_user(u, &info, sizeof(info))) |
| 346 | return -EFAULT; | 403 | return -EFAULT; |
| 347 | return sizeof(info); | 404 | return sizeof(info); |
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index d5ab3fe7c198..e96bf3e9be88 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -611,7 +611,6 @@ static ssize_t device_write(struct file *file, const char __user *buf, | |||
| 611 | 611 | ||
| 612 | out_sig: | 612 | out_sig: |
| 613 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); | 613 | sigprocmask(SIG_SETMASK, &tmpsig, NULL); |
| 614 | recalc_sigpending(); | ||
| 615 | out_free: | 614 | out_free: |
| 616 | kfree(kbuf); | 615 | kfree(kbuf); |
| 617 | return error; | 616 | return error; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 98b77c89494c..c00e055b6282 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
| @@ -40,9 +40,12 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) | |||
| 40 | static void drop_slab(void) | 40 | static void drop_slab(void) |
| 41 | { | 41 | { |
| 42 | int nr_objects; | 42 | int nr_objects; |
| 43 | struct shrink_control shrink = { | ||
| 44 | .gfp_mask = GFP_KERNEL, | ||
| 45 | }; | ||
| 43 | 46 | ||
| 44 | do { | 47 | do { |
| 45 | nr_objects = shrink_slab(1000, GFP_KERNEL, 1000); | 48 | nr_objects = shrink_slab(&shrink, 1000, 1000); |
| 46 | } while (nr_objects > 10); | 49 | } while (nr_objects > 10); |
| 47 | } | 50 | } |
| 48 | 51 | ||
| @@ -200,7 +200,7 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, | |||
| 200 | 200 | ||
| 201 | #ifdef CONFIG_STACK_GROWSUP | 201 | #ifdef CONFIG_STACK_GROWSUP |
| 202 | if (write) { | 202 | if (write) { |
| 203 | ret = expand_stack_downwards(bprm->vma, pos); | 203 | ret = expand_downwards(bprm->vma, pos); |
| 204 | if (ret < 0) | 204 | if (ret < 0) |
| 205 | return NULL; | 205 | return NULL; |
| 206 | } | 206 | } |
| @@ -600,7 +600,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
| 600 | unsigned long length = old_end - old_start; | 600 | unsigned long length = old_end - old_start; |
| 601 | unsigned long new_start = old_start - shift; | 601 | unsigned long new_start = old_start - shift; |
| 602 | unsigned long new_end = old_end - shift; | 602 | unsigned long new_end = old_end - shift; |
| 603 | struct mmu_gather *tlb; | 603 | struct mmu_gather tlb; |
| 604 | 604 | ||
| 605 | BUG_ON(new_start > new_end); | 605 | BUG_ON(new_start > new_end); |
| 606 | 606 | ||
| @@ -626,12 +626,12 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
| 626 | return -ENOMEM; | 626 | return -ENOMEM; |
| 627 | 627 | ||
| 628 | lru_add_drain(); | 628 | lru_add_drain(); |
| 629 | tlb = tlb_gather_mmu(mm, 0); | 629 | tlb_gather_mmu(&tlb, mm, 0); |
| 630 | if (new_end > old_start) { | 630 | if (new_end > old_start) { |
| 631 | /* | 631 | /* |
| 632 | * when the old and new regions overlap clear from new_end. | 632 | * when the old and new regions overlap clear from new_end. |
| 633 | */ | 633 | */ |
| 634 | free_pgd_range(tlb, new_end, old_end, new_end, | 634 | free_pgd_range(&tlb, new_end, old_end, new_end, |
| 635 | vma->vm_next ? vma->vm_next->vm_start : 0); | 635 | vma->vm_next ? vma->vm_next->vm_start : 0); |
| 636 | } else { | 636 | } else { |
| 637 | /* | 637 | /* |
| @@ -640,10 +640,10 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
| 640 | * have constraints on va-space that make this illegal (IA64) - | 640 | * have constraints on va-space that make this illegal (IA64) - |
| 641 | * for the others its just a little faster. | 641 | * for the others its just a little faster. |
| 642 | */ | 642 | */ |
| 643 | free_pgd_range(tlb, old_start, old_end, new_end, | 643 | free_pgd_range(&tlb, old_start, old_end, new_end, |
| 644 | vma->vm_next ? vma->vm_next->vm_start : 0); | 644 | vma->vm_next ? vma->vm_next->vm_start : 0); |
| 645 | } | 645 | } |
| 646 | tlb_finish_mmu(tlb, new_end, old_end); | 646 | tlb_finish_mmu(&tlb, new_end, old_end); |
| 647 | 647 | ||
| 648 | /* | 648 | /* |
| 649 | * Shrink the vma to just the new range. Always succeeds. | 649 | * Shrink the vma to just the new range. Always succeeds. |
| @@ -1051,6 +1051,7 @@ char *get_task_comm(char *buf, struct task_struct *tsk) | |||
| 1051 | task_unlock(tsk); | 1051 | task_unlock(tsk); |
| 1052 | return buf; | 1052 | return buf; |
| 1053 | } | 1053 | } |
| 1054 | EXPORT_SYMBOL_GPL(get_task_comm); | ||
| 1054 | 1055 | ||
| 1055 | void set_task_comm(struct task_struct *tsk, char *buf) | 1056 | void set_task_comm(struct task_struct *tsk, char *buf) |
| 1056 | { | 1057 | { |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 0a78dae7e2cb..1dd62ed35b85 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -898,7 +898,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 898 | brelse(bh); | 898 | brelse(bh); |
| 899 | 899 | ||
| 900 | if (!sb_set_blocksize(sb, blocksize)) { | 900 | if (!sb_set_blocksize(sb, blocksize)) { |
| 901 | ext2_msg(sb, KERN_ERR, "error: blocksize is too small"); | 901 | ext2_msg(sb, KERN_ERR, |
| 902 | "error: bad blocksize %d", blocksize); | ||
| 902 | goto failed_sbi; | 903 | goto failed_sbi; |
| 903 | } | 904 | } |
| 904 | 905 | ||
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 32f3b8695859..34b6d9bfc48a 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
| @@ -1416,10 +1416,19 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1416 | frame->at = entries; | 1416 | frame->at = entries; |
| 1417 | frame->bh = bh; | 1417 | frame->bh = bh; |
| 1418 | bh = bh2; | 1418 | bh = bh2; |
| 1419 | /* | ||
| 1420 | * Mark buffers dirty here so that if do_split() fails we write a | ||
| 1421 | * consistent set of buffers to disk. | ||
| 1422 | */ | ||
| 1423 | ext3_journal_dirty_metadata(handle, frame->bh); | ||
| 1424 | ext3_journal_dirty_metadata(handle, bh); | ||
| 1419 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); | 1425 | de = do_split(handle,dir, &bh, frame, &hinfo, &retval); |
| 1420 | dx_release (frames); | 1426 | if (!de) { |
| 1421 | if (!(de)) | 1427 | ext3_mark_inode_dirty(handle, dir); |
| 1428 | dx_release(frames); | ||
| 1422 | return retval; | 1429 | return retval; |
| 1430 | } | ||
| 1431 | dx_release(frames); | ||
| 1423 | 1432 | ||
| 1424 | return add_dirent_to_buf(handle, dentry, inode, de, bh); | 1433 | return add_dirent_to_buf(handle, dentry, inode, de, bh); |
| 1425 | } | 1434 | } |
| @@ -2189,6 +2198,7 @@ static int ext3_symlink (struct inode * dir, | |||
| 2189 | handle_t *handle; | 2198 | handle_t *handle; |
| 2190 | struct inode * inode; | 2199 | struct inode * inode; |
| 2191 | int l, err, retries = 0; | 2200 | int l, err, retries = 0; |
| 2201 | int credits; | ||
| 2192 | 2202 | ||
| 2193 | l = strlen(symname)+1; | 2203 | l = strlen(symname)+1; |
| 2194 | if (l > dir->i_sb->s_blocksize) | 2204 | if (l > dir->i_sb->s_blocksize) |
| @@ -2196,10 +2206,26 @@ static int ext3_symlink (struct inode * dir, | |||
| 2196 | 2206 | ||
| 2197 | dquot_initialize(dir); | 2207 | dquot_initialize(dir); |
| 2198 | 2208 | ||
| 2209 | if (l > EXT3_N_BLOCKS * 4) { | ||
| 2210 | /* | ||
| 2211 | * For non-fast symlinks, we just allocate inode and put it on | ||
| 2212 | * orphan list in the first transaction => we need bitmap, | ||
| 2213 | * group descriptor, sb, inode block, quota blocks. | ||
| 2214 | */ | ||
| 2215 | credits = 4 + EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
| 2216 | } else { | ||
| 2217 | /* | ||
| 2218 | * Fast symlink. We have to add entry to directory | ||
| 2219 | * (EXT3_DATA_TRANS_BLOCKS + EXT3_INDEX_EXTRA_TRANS_BLOCKS), | ||
| 2220 | * allocate new inode (bitmap, group descriptor, inode block, | ||
| 2221 | * quota blocks, sb is already counted in previous macros). | ||
| 2222 | */ | ||
| 2223 | credits = EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
| 2224 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 + | ||
| 2225 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb); | ||
| 2226 | } | ||
| 2199 | retry: | 2227 | retry: |
| 2200 | handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | 2228 | handle = ext3_journal_start(dir, credits); |
| 2201 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 + | ||
| 2202 | EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb)); | ||
| 2203 | if (IS_ERR(handle)) | 2229 | if (IS_ERR(handle)) |
| 2204 | return PTR_ERR(handle); | 2230 | return PTR_ERR(handle); |
| 2205 | 2231 | ||
| @@ -2211,21 +2237,45 @@ retry: | |||
| 2211 | if (IS_ERR(inode)) | 2237 | if (IS_ERR(inode)) |
| 2212 | goto out_stop; | 2238 | goto out_stop; |
| 2213 | 2239 | ||
| 2214 | if (l > sizeof (EXT3_I(inode)->i_data)) { | 2240 | if (l > EXT3_N_BLOCKS * 4) { |
| 2215 | inode->i_op = &ext3_symlink_inode_operations; | 2241 | inode->i_op = &ext3_symlink_inode_operations; |
| 2216 | ext3_set_aops(inode); | 2242 | ext3_set_aops(inode); |
| 2217 | /* | 2243 | /* |
| 2218 | * page_symlink() calls into ext3_prepare/commit_write. | 2244 | * We cannot call page_symlink() with transaction started |
| 2219 | * We have a transaction open. All is sweetness. It also sets | 2245 | * because it calls into ext3_write_begin() which acquires page |
| 2220 | * i_size in generic_commit_write(). | 2246 | * lock which ranks below transaction start (and it can also |
| 2247 | * wait for journal commit if we are running out of space). So | ||
| 2248 | * we have to stop transaction now and restart it when symlink | ||
| 2249 | * contents is written. | ||
| 2250 | * | ||
| 2251 | * To keep fs consistent in case of crash, we have to put inode | ||
| 2252 | * to orphan list in the mean time. | ||
| 2221 | */ | 2253 | */ |
| 2254 | drop_nlink(inode); | ||
| 2255 | err = ext3_orphan_add(handle, inode); | ||
| 2256 | ext3_journal_stop(handle); | ||
| 2257 | if (err) | ||
| 2258 | goto err_drop_inode; | ||
| 2222 | err = __page_symlink(inode, symname, l, 1); | 2259 | err = __page_symlink(inode, symname, l, 1); |
| 2260 | if (err) | ||
| 2261 | goto err_drop_inode; | ||
| 2262 | /* | ||
| 2263 | * Now inode is being linked into dir (EXT3_DATA_TRANS_BLOCKS | ||
| 2264 | * + EXT3_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified | ||
| 2265 | */ | ||
| 2266 | handle = ext3_journal_start(dir, | ||
| 2267 | EXT3_DATA_TRANS_BLOCKS(dir->i_sb) + | ||
| 2268 | EXT3_INDEX_EXTRA_TRANS_BLOCKS + 1); | ||
| 2269 | if (IS_ERR(handle)) { | ||
| 2270 | err = PTR_ERR(handle); | ||
| 2271 | goto err_drop_inode; | ||
| 2272 | } | ||
| 2273 | inc_nlink(inode); | ||
| 2274 | err = ext3_orphan_del(handle, inode); | ||
| 2223 | if (err) { | 2275 | if (err) { |
| 2276 | ext3_journal_stop(handle); | ||
| 2224 | drop_nlink(inode); | 2277 | drop_nlink(inode); |
| 2225 | unlock_new_inode(inode); | 2278 | goto err_drop_inode; |
| 2226 | ext3_mark_inode_dirty(handle, inode); | ||
| 2227 | iput (inode); | ||
| 2228 | goto out_stop; | ||
| 2229 | } | 2279 | } |
| 2230 | } else { | 2280 | } else { |
| 2231 | inode->i_op = &ext3_fast_symlink_inode_operations; | 2281 | inode->i_op = &ext3_fast_symlink_inode_operations; |
| @@ -2239,6 +2289,10 @@ out_stop: | |||
| 2239 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) | 2289 | if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) |
| 2240 | goto retry; | 2290 | goto retry; |
| 2241 | return err; | 2291 | return err; |
| 2292 | err_drop_inode: | ||
| 2293 | unlock_new_inode(inode); | ||
| 2294 | iput(inode); | ||
| 2295 | return err; | ||
| 2242 | } | 2296 | } |
| 2243 | 2297 | ||
| 2244 | static int ext3_link (struct dentry * old_dentry, | 2298 | static int ext3_link (struct dentry * old_dentry, |
diff --git a/fs/fat/cache.c b/fs/fat/cache.c index ae8200f84e39..1cc7038e273d 100644 --- a/fs/fat/cache.c +++ b/fs/fat/cache.c | |||
| @@ -151,6 +151,13 @@ static void fat_cache_add(struct inode *inode, struct fat_cache_id *new) | |||
| 151 | spin_unlock(&MSDOS_I(inode)->cache_lru_lock); | 151 | spin_unlock(&MSDOS_I(inode)->cache_lru_lock); |
| 152 | 152 | ||
| 153 | tmp = fat_cache_alloc(inode); | 153 | tmp = fat_cache_alloc(inode); |
| 154 | if (!tmp) { | ||
| 155 | spin_lock(&MSDOS_I(inode)->cache_lru_lock); | ||
| 156 | MSDOS_I(inode)->nr_caches--; | ||
| 157 | spin_unlock(&MSDOS_I(inode)->cache_lru_lock); | ||
| 158 | return; | ||
| 159 | } | ||
| 160 | |||
| 154 | spin_lock(&MSDOS_I(inode)->cache_lru_lock); | 161 | spin_lock(&MSDOS_I(inode)->cache_lru_lock); |
| 155 | cache = fat_cache_merge(inode, new); | 162 | cache = fat_cache_merge(inode, new); |
| 156 | if (cache != NULL) { | 163 | if (cache != NULL) { |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index ee42b9e0b16a..4ad64732cbce 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
| @@ -98,7 +98,7 @@ next: | |||
| 98 | 98 | ||
| 99 | *bh = sb_bread(sb, phys); | 99 | *bh = sb_bread(sb, phys); |
| 100 | if (*bh == NULL) { | 100 | if (*bh == NULL) { |
| 101 | printk(KERN_ERR "FAT: Directory bread(block %llu) failed\n", | 101 | fat_msg(sb, KERN_ERR, "Directory bread(block %llu) failed", |
| 102 | (llu)phys); | 102 | (llu)phys); |
| 103 | /* skip this block */ | 103 | /* skip this block */ |
| 104 | *pos = (iblock + 1) << sb->s_blocksize_bits; | 104 | *pos = (iblock + 1) << sb->s_blocksize_bits; |
| @@ -136,9 +136,10 @@ static inline int fat_get_entry(struct inode *dir, loff_t *pos, | |||
| 136 | * but ignore that right now. | 136 | * but ignore that right now. |
| 137 | * Ahem... Stack smashing in ring 0 isn't fun. Fixed. | 137 | * Ahem... Stack smashing in ring 0 isn't fun. Fixed. |
| 138 | */ | 138 | */ |
| 139 | static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, | 139 | static int uni16_to_x8(struct super_block *sb, unsigned char *ascii, |
| 140 | int uni_xlate, struct nls_table *nls) | 140 | const wchar_t *uni, int len, struct nls_table *nls) |
| 141 | { | 141 | { |
| 142 | int uni_xlate = MSDOS_SB(sb)->options.unicode_xlate; | ||
| 142 | const wchar_t *ip; | 143 | const wchar_t *ip; |
| 143 | wchar_t ec; | 144 | wchar_t ec; |
| 144 | unsigned char *op; | 145 | unsigned char *op; |
| @@ -166,23 +167,23 @@ static int uni16_to_x8(unsigned char *ascii, const wchar_t *uni, int len, | |||
| 166 | } | 167 | } |
| 167 | 168 | ||
| 168 | if (unlikely(*ip)) { | 169 | if (unlikely(*ip)) { |
| 169 | printk(KERN_WARNING "FAT: filename was truncated while " | 170 | fat_msg(sb, KERN_WARNING, "filename was truncated while " |
| 170 | "converting."); | 171 | "converting."); |
| 171 | } | 172 | } |
| 172 | 173 | ||
| 173 | *op = 0; | 174 | *op = 0; |
| 174 | return (op - ascii); | 175 | return (op - ascii); |
| 175 | } | 176 | } |
| 176 | 177 | ||
| 177 | static inline int fat_uni_to_x8(struct msdos_sb_info *sbi, const wchar_t *uni, | 178 | static inline int fat_uni_to_x8(struct super_block *sb, const wchar_t *uni, |
| 178 | unsigned char *buf, int size) | 179 | unsigned char *buf, int size) |
| 179 | { | 180 | { |
| 181 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | ||
| 180 | if (sbi->options.utf8) | 182 | if (sbi->options.utf8) |
| 181 | return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, | 183 | return utf16s_to_utf8s(uni, FAT_MAX_UNI_CHARS, |
| 182 | UTF16_HOST_ENDIAN, buf, size); | 184 | UTF16_HOST_ENDIAN, buf, size); |
| 183 | else | 185 | else |
| 184 | return uni16_to_x8(buf, uni, size, sbi->options.unicode_xlate, | 186 | return uni16_to_x8(sb, buf, uni, size, sbi->nls_io); |
| 185 | sbi->nls_io); | ||
| 186 | } | 187 | } |
| 187 | 188 | ||
| 188 | static inline int | 189 | static inline int |
| @@ -419,7 +420,7 @@ parse_record: | |||
| 419 | 420 | ||
| 420 | /* Compare shortname */ | 421 | /* Compare shortname */ |
| 421 | bufuname[last_u] = 0x0000; | 422 | bufuname[last_u] = 0x0000; |
| 422 | len = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); | 423 | len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); |
| 423 | if (fat_name_match(sbi, name, name_len, bufname, len)) | 424 | if (fat_name_match(sbi, name, name_len, bufname, len)) |
| 424 | goto found; | 425 | goto found; |
| 425 | 426 | ||
| @@ -428,7 +429,7 @@ parse_record: | |||
| 428 | int size = PATH_MAX - FAT_MAX_UNI_SIZE; | 429 | int size = PATH_MAX - FAT_MAX_UNI_SIZE; |
| 429 | 430 | ||
| 430 | /* Compare longname */ | 431 | /* Compare longname */ |
| 431 | len = fat_uni_to_x8(sbi, unicode, longname, size); | 432 | len = fat_uni_to_x8(sb, unicode, longname, size); |
| 432 | if (fat_name_match(sbi, name, name_len, longname, len)) | 433 | if (fat_name_match(sbi, name, name_len, longname, len)) |
| 433 | goto found; | 434 | goto found; |
| 434 | } | 435 | } |
| @@ -545,7 +546,7 @@ parse_record: | |||
| 545 | if (nr_slots) { | 546 | if (nr_slots) { |
| 546 | void *longname = unicode + FAT_MAX_UNI_CHARS; | 547 | void *longname = unicode + FAT_MAX_UNI_CHARS; |
| 547 | int size = PATH_MAX - FAT_MAX_UNI_SIZE; | 548 | int size = PATH_MAX - FAT_MAX_UNI_SIZE; |
| 548 | int len = fat_uni_to_x8(sbi, unicode, longname, size); | 549 | int len = fat_uni_to_x8(sb, unicode, longname, size); |
| 549 | 550 | ||
| 550 | fill_name = longname; | 551 | fill_name = longname; |
| 551 | fill_len = len; | 552 | fill_len = len; |
| @@ -621,7 +622,7 @@ parse_record: | |||
| 621 | 622 | ||
| 622 | if (isvfat) { | 623 | if (isvfat) { |
| 623 | bufuname[j] = 0x0000; | 624 | bufuname[j] = 0x0000; |
| 624 | i = fat_uni_to_x8(sbi, bufuname, bufname, sizeof(bufname)); | 625 | i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname)); |
| 625 | } | 626 | } |
| 626 | if (nr_slots) { | 627 | if (nr_slots) { |
| 627 | /* hack for fat_ioctl_filldir() */ | 628 | /* hack for fat_ioctl_filldir() */ |
| @@ -979,6 +980,7 @@ static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots) | |||
| 979 | 980 | ||
| 980 | int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) | 981 | int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) |
| 981 | { | 982 | { |
| 983 | struct super_block *sb = dir->i_sb; | ||
| 982 | struct msdos_dir_entry *de; | 984 | struct msdos_dir_entry *de; |
| 983 | struct buffer_head *bh; | 985 | struct buffer_head *bh; |
| 984 | int err = 0, nr_slots; | 986 | int err = 0, nr_slots; |
| @@ -1013,8 +1015,8 @@ int fat_remove_entries(struct inode *dir, struct fat_slot_info *sinfo) | |||
| 1013 | */ | 1015 | */ |
| 1014 | err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); | 1016 | err = __fat_remove_entries(dir, sinfo->slot_off, nr_slots); |
| 1015 | if (err) { | 1017 | if (err) { |
| 1016 | printk(KERN_WARNING | 1018 | fat_msg(sb, KERN_WARNING, |
| 1017 | "FAT: Couldn't remove the long name slots\n"); | 1019 | "Couldn't remove the long name slots"); |
| 1018 | } | 1020 | } |
| 1019 | } | 1021 | } |
| 1020 | 1022 | ||
| @@ -1265,7 +1267,7 @@ int fat_add_entries(struct inode *dir, void *slots, int nr_slots, | |||
| 1265 | if (sbi->fat_bits != 32) | 1267 | if (sbi->fat_bits != 32) |
| 1266 | goto error; | 1268 | goto error; |
| 1267 | } else if (MSDOS_I(dir)->i_start == 0) { | 1269 | } else if (MSDOS_I(dir)->i_start == 0) { |
| 1268 | printk(KERN_ERR "FAT: Corrupted directory (i_pos %lld)\n", | 1270 | fat_msg(sb, KERN_ERR, "Corrupted directory (i_pos %lld)", |
| 1269 | MSDOS_I(dir)->i_pos); | 1271 | MSDOS_I(dir)->i_pos); |
| 1270 | err = -EIO; | 1272 | err = -EIO; |
| 1271 | goto error; | 1273 | goto error; |
diff --git a/fs/fat/fat.h b/fs/fat/fat.h index f50408901f7e..8276cc282dec 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h | |||
| @@ -319,19 +319,20 @@ extern struct inode *fat_build_inode(struct super_block *sb, | |||
| 319 | struct msdos_dir_entry *de, loff_t i_pos); | 319 | struct msdos_dir_entry *de, loff_t i_pos); |
| 320 | extern int fat_sync_inode(struct inode *inode); | 320 | extern int fat_sync_inode(struct inode *inode); |
| 321 | extern int fat_fill_super(struct super_block *sb, void *data, int silent, | 321 | extern int fat_fill_super(struct super_block *sb, void *data, int silent, |
| 322 | const struct inode_operations *fs_dir_inode_ops, | 322 | int isvfat, void (*setup)(struct super_block *)); |
| 323 | int isvfat, void (*setup)(struct super_block *)); | ||
| 324 | 323 | ||
| 325 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, | 324 | extern int fat_flush_inodes(struct super_block *sb, struct inode *i1, |
| 326 | struct inode *i2); | 325 | struct inode *i2); |
| 327 | /* fat/misc.c */ | 326 | /* fat/misc.c */ |
| 328 | extern void | 327 | extern void |
| 329 | __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) | 328 | __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) |
| 329 | __attribute__ ((format (printf, 3, 4))) __cold; | ||
| 330 | #define fat_fs_error(sb, fmt, args...) \ | ||
| 331 | __fat_fs_error(sb, 1, fmt , ## args) | ||
| 332 | #define fat_fs_error_ratelimit(sb, fmt, args...) \ | ||
| 333 | __fat_fs_error(sb, __ratelimit(&MSDOS_SB(sb)->ratelimit), fmt , ## args) | ||
| 334 | void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...) | ||
| 330 | __attribute__ ((format (printf, 3, 4))) __cold; | 335 | __attribute__ ((format (printf, 3, 4))) __cold; |
| 331 | #define fat_fs_error(s, fmt, args...) \ | ||
| 332 | __fat_fs_error(s, 1, fmt , ## args) | ||
| 333 | #define fat_fs_error_ratelimit(s, fmt, args...) \ | ||
| 334 | __fat_fs_error(s, __ratelimit(&MSDOS_SB(s)->ratelimit), fmt , ## args) | ||
| 335 | extern int fat_clusters_flush(struct super_block *sb); | 336 | extern int fat_clusters_flush(struct super_block *sb); |
| 336 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); | 337 | extern int fat_chain_add(struct inode *inode, int new_dclus, int nr_cluster); |
| 337 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, | 338 | extern void fat_time_fat2unix(struct msdos_sb_info *sbi, struct timespec *ts, |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index b47d2c9f4fa1..2e81ac0df7e2 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
| @@ -95,7 +95,7 @@ static int fat12_ent_bread(struct super_block *sb, struct fat_entry *fatent, | |||
| 95 | err_brelse: | 95 | err_brelse: |
| 96 | brelse(bhs[0]); | 96 | brelse(bhs[0]); |
| 97 | err: | 97 | err: |
| 98 | printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", (llu)blocknr); | 98 | fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", (llu)blocknr); |
| 99 | return -EIO; | 99 | return -EIO; |
| 100 | } | 100 | } |
| 101 | 101 | ||
| @@ -108,7 +108,7 @@ static int fat_ent_bread(struct super_block *sb, struct fat_entry *fatent, | |||
| 108 | fatent->fat_inode = MSDOS_SB(sb)->fat_inode; | 108 | fatent->fat_inode = MSDOS_SB(sb)->fat_inode; |
| 109 | fatent->bhs[0] = sb_bread(sb, blocknr); | 109 | fatent->bhs[0] = sb_bread(sb, blocknr); |
| 110 | if (!fatent->bhs[0]) { | 110 | if (!fatent->bhs[0]) { |
| 111 | printk(KERN_ERR "FAT: FAT read failed (blocknr %llu)\n", | 111 | fat_msg(sb, KERN_ERR, "FAT read failed (blocknr %llu)", |
| 112 | (llu)blocknr); | 112 | (llu)blocknr); |
| 113 | return -EIO; | 113 | return -EIO; |
| 114 | } | 114 | } |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 8d68690bdcf1..cb8d8391ac0b 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -581,7 +581,8 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 581 | buf->f_bavail = sbi->free_clusters; | 581 | buf->f_bavail = sbi->free_clusters; |
| 582 | buf->f_fsid.val[0] = (u32)id; | 582 | buf->f_fsid.val[0] = (u32)id; |
| 583 | buf->f_fsid.val[1] = (u32)(id >> 32); | 583 | buf->f_fsid.val[1] = (u32)(id >> 32); |
| 584 | buf->f_namelen = sbi->options.isvfat ? FAT_LFN_LEN : 12; | 584 | buf->f_namelen = |
| 585 | (sbi->options.isvfat ? FAT_LFN_LEN : 12) * NLS_MAX_CHARSET_SIZE; | ||
| 585 | 586 | ||
| 586 | return 0; | 587 | return 0; |
| 587 | } | 588 | } |
| @@ -619,8 +620,8 @@ retry: | |||
| 619 | 620 | ||
| 620 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); | 621 | bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits); |
| 621 | if (!bh) { | 622 | if (!bh) { |
| 622 | printk(KERN_ERR "FAT: unable to read inode block " | 623 | fat_msg(sb, KERN_ERR, "unable to read inode block " |
| 623 | "for updating (i_pos %lld)\n", i_pos); | 624 | "for updating (i_pos %lld)", i_pos); |
| 624 | return -EIO; | 625 | return -EIO; |
| 625 | } | 626 | } |
| 626 | spin_lock(&sbi->inode_hash_lock); | 627 | spin_lock(&sbi->inode_hash_lock); |
| @@ -976,8 +977,8 @@ static const match_table_t vfat_tokens = { | |||
| 976 | {Opt_err, NULL} | 977 | {Opt_err, NULL} |
| 977 | }; | 978 | }; |
| 978 | 979 | ||
| 979 | static int parse_options(char *options, int is_vfat, int silent, int *debug, | 980 | static int parse_options(struct super_block *sb, char *options, int is_vfat, |
| 980 | struct fat_mount_options *opts) | 981 | int silent, int *debug, struct fat_mount_options *opts) |
| 981 | { | 982 | { |
| 982 | char *p; | 983 | char *p; |
| 983 | substring_t args[MAX_OPT_ARGS]; | 984 | substring_t args[MAX_OPT_ARGS]; |
| @@ -1168,15 +1169,15 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, | |||
| 1168 | 1169 | ||
| 1169 | /* obsolete mount options */ | 1170 | /* obsolete mount options */ |
| 1170 | case Opt_obsolate: | 1171 | case Opt_obsolate: |
| 1171 | printk(KERN_INFO "FAT: \"%s\" option is obsolete, " | 1172 | fat_msg(sb, KERN_INFO, "\"%s\" option is obsolete, " |
| 1172 | "not supported now\n", p); | 1173 | "not supported now", p); |
| 1173 | break; | 1174 | break; |
| 1174 | /* unknown option */ | 1175 | /* unknown option */ |
| 1175 | default: | 1176 | default: |
| 1176 | if (!silent) { | 1177 | if (!silent) { |
| 1177 | printk(KERN_ERR | 1178 | fat_msg(sb, KERN_ERR, |
| 1178 | "FAT: Unrecognized mount option \"%s\" " | 1179 | "Unrecognized mount option \"%s\" " |
| 1179 | "or missing value\n", p); | 1180 | "or missing value", p); |
| 1180 | } | 1181 | } |
| 1181 | return -EINVAL; | 1182 | return -EINVAL; |
| 1182 | } | 1183 | } |
| @@ -1185,7 +1186,7 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug, | |||
| 1185 | out: | 1186 | out: |
| 1186 | /* UTF-8 doesn't provide FAT semantics */ | 1187 | /* UTF-8 doesn't provide FAT semantics */ |
| 1187 | if (!strcmp(opts->iocharset, "utf8")) { | 1188 | if (!strcmp(opts->iocharset, "utf8")) { |
| 1188 | printk(KERN_ERR "FAT: utf8 is not a recommended IO charset" | 1189 | fat_msg(sb, KERN_ERR, "utf8 is not a recommended IO charset" |
| 1189 | " for FAT filesystems, filesystem will be " | 1190 | " for FAT filesystems, filesystem will be " |
| 1190 | "case sensitive!\n"); | 1191 | "case sensitive!\n"); |
| 1191 | } | 1192 | } |
| @@ -1238,8 +1239,7 @@ static int fat_read_root(struct inode *inode) | |||
| 1238 | /* | 1239 | /* |
| 1239 | * Read the super block of an MS-DOS FS. | 1240 | * Read the super block of an MS-DOS FS. |
| 1240 | */ | 1241 | */ |
| 1241 | int fat_fill_super(struct super_block *sb, void *data, int silent, | 1242 | int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, |
| 1242 | const struct inode_operations *fs_dir_inode_ops, int isvfat, | ||
| 1243 | void (*setup)(struct super_block *)) | 1243 | void (*setup)(struct super_block *)) |
| 1244 | { | 1244 | { |
| 1245 | struct inode *root_inode = NULL, *fat_inode = NULL; | 1245 | struct inode *root_inode = NULL, *fat_inode = NULL; |
| @@ -1268,11 +1268,10 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1268 | sb->s_magic = MSDOS_SUPER_MAGIC; | 1268 | sb->s_magic = MSDOS_SUPER_MAGIC; |
| 1269 | sb->s_op = &fat_sops; | 1269 | sb->s_op = &fat_sops; |
| 1270 | sb->s_export_op = &fat_export_ops; | 1270 | sb->s_export_op = &fat_export_ops; |
| 1271 | sbi->dir_ops = fs_dir_inode_ops; | ||
| 1272 | ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, | 1271 | ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL, |
| 1273 | DEFAULT_RATELIMIT_BURST); | 1272 | DEFAULT_RATELIMIT_BURST); |
| 1274 | 1273 | ||
| 1275 | error = parse_options(data, isvfat, silent, &debug, &sbi->options); | 1274 | error = parse_options(sb, data, isvfat, silent, &debug, &sbi->options); |
| 1276 | if (error) | 1275 | if (error) |
| 1277 | goto out_fail; | 1276 | goto out_fail; |
| 1278 | 1277 | ||
| @@ -1282,20 +1281,20 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1282 | sb_min_blocksize(sb, 512); | 1281 | sb_min_blocksize(sb, 512); |
| 1283 | bh = sb_bread(sb, 0); | 1282 | bh = sb_bread(sb, 0); |
| 1284 | if (bh == NULL) { | 1283 | if (bh == NULL) { |
| 1285 | printk(KERN_ERR "FAT: unable to read boot sector\n"); | 1284 | fat_msg(sb, KERN_ERR, "unable to read boot sector"); |
| 1286 | goto out_fail; | 1285 | goto out_fail; |
| 1287 | } | 1286 | } |
| 1288 | 1287 | ||
| 1289 | b = (struct fat_boot_sector *) bh->b_data; | 1288 | b = (struct fat_boot_sector *) bh->b_data; |
| 1290 | if (!b->reserved) { | 1289 | if (!b->reserved) { |
| 1291 | if (!silent) | 1290 | if (!silent) |
| 1292 | printk(KERN_ERR "FAT: bogus number of reserved sectors\n"); | 1291 | fat_msg(sb, KERN_ERR, "bogus number of reserved sectors"); |
| 1293 | brelse(bh); | 1292 | brelse(bh); |
| 1294 | goto out_invalid; | 1293 | goto out_invalid; |
| 1295 | } | 1294 | } |
| 1296 | if (!b->fats) { | 1295 | if (!b->fats) { |
| 1297 | if (!silent) | 1296 | if (!silent) |
| 1298 | printk(KERN_ERR "FAT: bogus number of FAT structure\n"); | 1297 | fat_msg(sb, KERN_ERR, "bogus number of FAT structure"); |
| 1299 | brelse(bh); | 1298 | brelse(bh); |
| 1300 | goto out_invalid; | 1299 | goto out_invalid; |
| 1301 | } | 1300 | } |
| @@ -1308,7 +1307,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1308 | media = b->media; | 1307 | media = b->media; |
| 1309 | if (!fat_valid_media(media)) { | 1308 | if (!fat_valid_media(media)) { |
| 1310 | if (!silent) | 1309 | if (!silent) |
| 1311 | printk(KERN_ERR "FAT: invalid media value (0x%02x)\n", | 1310 | fat_msg(sb, KERN_ERR, "invalid media value (0x%02x)", |
| 1312 | media); | 1311 | media); |
| 1313 | brelse(bh); | 1312 | brelse(bh); |
| 1314 | goto out_invalid; | 1313 | goto out_invalid; |
| @@ -1318,7 +1317,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1318 | || (logical_sector_size < 512) | 1317 | || (logical_sector_size < 512) |
| 1319 | || (logical_sector_size > 4096)) { | 1318 | || (logical_sector_size > 4096)) { |
| 1320 | if (!silent) | 1319 | if (!silent) |
| 1321 | printk(KERN_ERR "FAT: bogus logical sector size %u\n", | 1320 | fat_msg(sb, KERN_ERR, "bogus logical sector size %u", |
| 1322 | logical_sector_size); | 1321 | logical_sector_size); |
| 1323 | brelse(bh); | 1322 | brelse(bh); |
| 1324 | goto out_invalid; | 1323 | goto out_invalid; |
| @@ -1326,15 +1325,15 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1326 | sbi->sec_per_clus = b->sec_per_clus; | 1325 | sbi->sec_per_clus = b->sec_per_clus; |
| 1327 | if (!is_power_of_2(sbi->sec_per_clus)) { | 1326 | if (!is_power_of_2(sbi->sec_per_clus)) { |
| 1328 | if (!silent) | 1327 | if (!silent) |
| 1329 | printk(KERN_ERR "FAT: bogus sectors per cluster %u\n", | 1328 | fat_msg(sb, KERN_ERR, "bogus sectors per cluster %u", |
| 1330 | sbi->sec_per_clus); | 1329 | sbi->sec_per_clus); |
| 1331 | brelse(bh); | 1330 | brelse(bh); |
| 1332 | goto out_invalid; | 1331 | goto out_invalid; |
| 1333 | } | 1332 | } |
| 1334 | 1333 | ||
| 1335 | if (logical_sector_size < sb->s_blocksize) { | 1334 | if (logical_sector_size < sb->s_blocksize) { |
| 1336 | printk(KERN_ERR "FAT: logical sector size too small for device" | 1335 | fat_msg(sb, KERN_ERR, "logical sector size too small for device" |
| 1337 | " (logical sector size = %u)\n", logical_sector_size); | 1336 | " (logical sector size = %u)", logical_sector_size); |
| 1338 | brelse(bh); | 1337 | brelse(bh); |
| 1339 | goto out_fail; | 1338 | goto out_fail; |
| 1340 | } | 1339 | } |
| @@ -1342,14 +1341,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1342 | brelse(bh); | 1341 | brelse(bh); |
| 1343 | 1342 | ||
| 1344 | if (!sb_set_blocksize(sb, logical_sector_size)) { | 1343 | if (!sb_set_blocksize(sb, logical_sector_size)) { |
| 1345 | printk(KERN_ERR "FAT: unable to set blocksize %u\n", | 1344 | fat_msg(sb, KERN_ERR, "unable to set blocksize %u", |
| 1346 | logical_sector_size); | 1345 | logical_sector_size); |
| 1347 | goto out_fail; | 1346 | goto out_fail; |
| 1348 | } | 1347 | } |
| 1349 | bh = sb_bread(sb, 0); | 1348 | bh = sb_bread(sb, 0); |
| 1350 | if (bh == NULL) { | 1349 | if (bh == NULL) { |
| 1351 | printk(KERN_ERR "FAT: unable to read boot sector" | 1350 | fat_msg(sb, KERN_ERR, "unable to read boot sector" |
| 1352 | " (logical sector size = %lu)\n", | 1351 | " (logical sector size = %lu)", |
| 1353 | sb->s_blocksize); | 1352 | sb->s_blocksize); |
| 1354 | goto out_fail; | 1353 | goto out_fail; |
| 1355 | } | 1354 | } |
| @@ -1385,16 +1384,16 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1385 | 1384 | ||
| 1386 | fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); | 1385 | fsinfo_bh = sb_bread(sb, sbi->fsinfo_sector); |
| 1387 | if (fsinfo_bh == NULL) { | 1386 | if (fsinfo_bh == NULL) { |
| 1388 | printk(KERN_ERR "FAT: bread failed, FSINFO block" | 1387 | fat_msg(sb, KERN_ERR, "bread failed, FSINFO block" |
| 1389 | " (sector = %lu)\n", sbi->fsinfo_sector); | 1388 | " (sector = %lu)", sbi->fsinfo_sector); |
| 1390 | brelse(bh); | 1389 | brelse(bh); |
| 1391 | goto out_fail; | 1390 | goto out_fail; |
| 1392 | } | 1391 | } |
| 1393 | 1392 | ||
| 1394 | fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; | 1393 | fsinfo = (struct fat_boot_fsinfo *)fsinfo_bh->b_data; |
| 1395 | if (!IS_FSINFO(fsinfo)) { | 1394 | if (!IS_FSINFO(fsinfo)) { |
| 1396 | printk(KERN_WARNING "FAT: Invalid FSINFO signature: " | 1395 | fat_msg(sb, KERN_WARNING, "Invalid FSINFO signature: " |
| 1397 | "0x%08x, 0x%08x (sector = %lu)\n", | 1396 | "0x%08x, 0x%08x (sector = %lu)", |
| 1398 | le32_to_cpu(fsinfo->signature1), | 1397 | le32_to_cpu(fsinfo->signature1), |
| 1399 | le32_to_cpu(fsinfo->signature2), | 1398 | le32_to_cpu(fsinfo->signature2), |
| 1400 | sbi->fsinfo_sector); | 1399 | sbi->fsinfo_sector); |
| @@ -1415,8 +1414,8 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1415 | sbi->dir_entries = get_unaligned_le16(&b->dir_entries); | 1414 | sbi->dir_entries = get_unaligned_le16(&b->dir_entries); |
| 1416 | if (sbi->dir_entries & (sbi->dir_per_block - 1)) { | 1415 | if (sbi->dir_entries & (sbi->dir_per_block - 1)) { |
| 1417 | if (!silent) | 1416 | if (!silent) |
| 1418 | printk(KERN_ERR "FAT: bogus directroy-entries per block" | 1417 | fat_msg(sb, KERN_ERR, "bogus directroy-entries per block" |
| 1419 | " (%u)\n", sbi->dir_entries); | 1418 | " (%u)", sbi->dir_entries); |
| 1420 | brelse(bh); | 1419 | brelse(bh); |
| 1421 | goto out_invalid; | 1420 | goto out_invalid; |
| 1422 | } | 1421 | } |
| @@ -1438,7 +1437,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1438 | total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); | 1437 | total_clusters = min(total_clusters, fat_clusters - FAT_START_ENT); |
| 1439 | if (total_clusters > MAX_FAT(sb)) { | 1438 | if (total_clusters > MAX_FAT(sb)) { |
| 1440 | if (!silent) | 1439 | if (!silent) |
| 1441 | printk(KERN_ERR "FAT: count of clusters too big (%u)\n", | 1440 | fat_msg(sb, KERN_ERR, "count of clusters too big (%u)", |
| 1442 | total_clusters); | 1441 | total_clusters); |
| 1443 | brelse(bh); | 1442 | brelse(bh); |
| 1444 | goto out_invalid; | 1443 | goto out_invalid; |
| @@ -1471,7 +1470,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1471 | sprintf(buf, "cp%d", sbi->options.codepage); | 1470 | sprintf(buf, "cp%d", sbi->options.codepage); |
| 1472 | sbi->nls_disk = load_nls(buf); | 1471 | sbi->nls_disk = load_nls(buf); |
| 1473 | if (!sbi->nls_disk) { | 1472 | if (!sbi->nls_disk) { |
| 1474 | printk(KERN_ERR "FAT: codepage %s not found\n", buf); | 1473 | fat_msg(sb, KERN_ERR, "codepage %s not found", buf); |
| 1475 | goto out_fail; | 1474 | goto out_fail; |
| 1476 | } | 1475 | } |
| 1477 | 1476 | ||
| @@ -1479,7 +1478,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1479 | if (sbi->options.isvfat) { | 1478 | if (sbi->options.isvfat) { |
| 1480 | sbi->nls_io = load_nls(sbi->options.iocharset); | 1479 | sbi->nls_io = load_nls(sbi->options.iocharset); |
| 1481 | if (!sbi->nls_io) { | 1480 | if (!sbi->nls_io) { |
| 1482 | printk(KERN_ERR "FAT: IO charset %s not found\n", | 1481 | fat_msg(sb, KERN_ERR, "IO charset %s not found", |
| 1483 | sbi->options.iocharset); | 1482 | sbi->options.iocharset); |
| 1484 | goto out_fail; | 1483 | goto out_fail; |
| 1485 | } | 1484 | } |
| @@ -1503,7 +1502,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1503 | insert_inode_hash(root_inode); | 1502 | insert_inode_hash(root_inode); |
| 1504 | sb->s_root = d_alloc_root(root_inode); | 1503 | sb->s_root = d_alloc_root(root_inode); |
| 1505 | if (!sb->s_root) { | 1504 | if (!sb->s_root) { |
| 1506 | printk(KERN_ERR "FAT: get root inode failed\n"); | 1505 | fat_msg(sb, KERN_ERR, "get root inode failed"); |
| 1507 | goto out_fail; | 1506 | goto out_fail; |
| 1508 | } | 1507 | } |
| 1509 | 1508 | ||
| @@ -1512,8 +1511,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, | |||
| 1512 | out_invalid: | 1511 | out_invalid: |
| 1513 | error = -EINVAL; | 1512 | error = -EINVAL; |
| 1514 | if (!silent) | 1513 | if (!silent) |
| 1515 | printk(KERN_INFO "VFS: Can't find a valid FAT filesystem" | 1514 | fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem"); |
| 1516 | " on dev %s.\n", sb->s_id); | ||
| 1517 | 1515 | ||
| 1518 | out_fail: | 1516 | out_fail: |
| 1519 | if (fat_inode) | 1517 | if (fat_inode) |
diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 970e682ea754..6d93360ca0cc 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c | |||
| @@ -20,30 +20,46 @@ | |||
| 20 | * In case the file system is remounted read-only, it can be made writable | 20 | * In case the file system is remounted read-only, it can be made writable |
| 21 | * again by remounting it. | 21 | * again by remounting it. |
| 22 | */ | 22 | */ |
| 23 | void __fat_fs_error(struct super_block *s, int report, const char *fmt, ...) | 23 | void __fat_fs_error(struct super_block *sb, int report, const char *fmt, ...) |
| 24 | { | 24 | { |
| 25 | struct fat_mount_options *opts = &MSDOS_SB(s)->options; | 25 | struct fat_mount_options *opts = &MSDOS_SB(sb)->options; |
| 26 | va_list args; | 26 | va_list args; |
| 27 | struct va_format vaf; | ||
| 27 | 28 | ||
| 28 | if (report) { | 29 | if (report) { |
| 29 | printk(KERN_ERR "FAT: Filesystem error (dev %s)\n", s->s_id); | ||
| 30 | |||
| 31 | printk(KERN_ERR " "); | ||
| 32 | va_start(args, fmt); | 30 | va_start(args, fmt); |
| 33 | vprintk(fmt, args); | 31 | vaf.fmt = fmt; |
| 32 | vaf.va = &args; | ||
| 33 | printk(KERN_ERR "FAT-fs (%s): error, %pV\n", sb->s_id, &vaf); | ||
| 34 | va_end(args); | 34 | va_end(args); |
| 35 | printk("\n"); | ||
| 36 | } | 35 | } |
| 37 | 36 | ||
| 38 | if (opts->errors == FAT_ERRORS_PANIC) | 37 | if (opts->errors == FAT_ERRORS_PANIC) |
| 39 | panic("FAT: fs panic from previous error\n"); | 38 | panic("FAT-fs (%s): fs panic from previous error\n", sb->s_id); |
| 40 | else if (opts->errors == FAT_ERRORS_RO && !(s->s_flags & MS_RDONLY)) { | 39 | else if (opts->errors == FAT_ERRORS_RO && !(sb->s_flags & MS_RDONLY)) { |
| 41 | s->s_flags |= MS_RDONLY; | 40 | sb->s_flags |= MS_RDONLY; |
| 42 | printk(KERN_ERR "FAT: Filesystem has been set read-only\n"); | 41 | printk(KERN_ERR "FAT-fs (%s): Filesystem has been " |
| 42 | "set read-only\n", sb->s_id); | ||
| 43 | } | 43 | } |
| 44 | } | 44 | } |
| 45 | EXPORT_SYMBOL_GPL(__fat_fs_error); | 45 | EXPORT_SYMBOL_GPL(__fat_fs_error); |
| 46 | 46 | ||
| 47 | /** | ||
| 48 | * fat_msg() - print preformated FAT specific messages. Every thing what is | ||
| 49 | * not fat_fs_error() should be fat_msg(). | ||
| 50 | */ | ||
| 51 | void fat_msg(struct super_block *sb, const char *level, const char *fmt, ...) | ||
| 52 | { | ||
| 53 | struct va_format vaf; | ||
| 54 | va_list args; | ||
| 55 | |||
| 56 | va_start(args, fmt); | ||
| 57 | vaf.fmt = fmt; | ||
| 58 | vaf.va = &args; | ||
| 59 | printk("%sFAT-fs (%s): %pV\n", level, sb->s_id, &vaf); | ||
| 60 | va_end(args); | ||
| 61 | } | ||
| 62 | |||
| 47 | /* Flushes the number of free clusters on FAT32 */ | 63 | /* Flushes the number of free clusters on FAT32 */ |
| 48 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ | 64 | /* XXX: Need to write one per FSINFO block. Currently only writes 1 */ |
| 49 | int fat_clusters_flush(struct super_block *sb) | 65 | int fat_clusters_flush(struct super_block *sb) |
| @@ -57,15 +73,15 @@ int fat_clusters_flush(struct super_block *sb) | |||
| 57 | 73 | ||
| 58 | bh = sb_bread(sb, sbi->fsinfo_sector); | 74 | bh = sb_bread(sb, sbi->fsinfo_sector); |
| 59 | if (bh == NULL) { | 75 | if (bh == NULL) { |
| 60 | printk(KERN_ERR "FAT: bread failed in fat_clusters_flush\n"); | 76 | fat_msg(sb, KERN_ERR, "bread failed in fat_clusters_flush"); |
| 61 | return -EIO; | 77 | return -EIO; |
| 62 | } | 78 | } |
| 63 | 79 | ||
| 64 | fsinfo = (struct fat_boot_fsinfo *)bh->b_data; | 80 | fsinfo = (struct fat_boot_fsinfo *)bh->b_data; |
| 65 | /* Sanity check */ | 81 | /* Sanity check */ |
| 66 | if (!IS_FSINFO(fsinfo)) { | 82 | if (!IS_FSINFO(fsinfo)) { |
| 67 | printk(KERN_ERR "FAT: Invalid FSINFO signature: " | 83 | fat_msg(sb, KERN_ERR, "Invalid FSINFO signature: " |
| 68 | "0x%08x, 0x%08x (sector = %lu)\n", | 84 | "0x%08x, 0x%08x (sector = %lu)", |
| 69 | le32_to_cpu(fsinfo->signature1), | 85 | le32_to_cpu(fsinfo->signature1), |
| 70 | le32_to_cpu(fsinfo->signature2), | 86 | le32_to_cpu(fsinfo->signature2), |
| 71 | sbi->fsinfo_sector); | 87 | sbi->fsinfo_sector); |
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c index 711499040eb6..3b222dafd15b 100644 --- a/fs/fat/namei_msdos.c +++ b/fs/fat/namei_msdos.c | |||
| @@ -659,14 +659,14 @@ static const struct inode_operations msdos_dir_inode_operations = { | |||
| 659 | 659 | ||
| 660 | static void setup(struct super_block *sb) | 660 | static void setup(struct super_block *sb) |
| 661 | { | 661 | { |
| 662 | MSDOS_SB(sb)->dir_ops = &msdos_dir_inode_operations; | ||
| 662 | sb->s_d_op = &msdos_dentry_operations; | 663 | sb->s_d_op = &msdos_dentry_operations; |
| 663 | sb->s_flags |= MS_NOATIME; | 664 | sb->s_flags |= MS_NOATIME; |
| 664 | } | 665 | } |
| 665 | 666 | ||
| 666 | static int msdos_fill_super(struct super_block *sb, void *data, int silent) | 667 | static int msdos_fill_super(struct super_block *sb, void *data, int silent) |
| 667 | { | 668 | { |
| 668 | return fat_fill_super(sb, data, silent, &msdos_dir_inode_operations, | 669 | return fat_fill_super(sb, data, silent, 0, setup); |
| 669 | 0, setup); | ||
| 670 | } | 670 | } |
| 671 | 671 | ||
| 672 | static struct dentry *msdos_mount(struct file_system_type *fs_type, | 672 | static struct dentry *msdos_mount(struct file_system_type *fs_type, |
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c index adae3fb7451a..20b4ea53fdc4 100644 --- a/fs/fat/namei_vfat.c +++ b/fs/fat/namei_vfat.c | |||
| @@ -1065,6 +1065,7 @@ static const struct inode_operations vfat_dir_inode_operations = { | |||
| 1065 | 1065 | ||
| 1066 | static void setup(struct super_block *sb) | 1066 | static void setup(struct super_block *sb) |
| 1067 | { | 1067 | { |
| 1068 | MSDOS_SB(sb)->dir_ops = &vfat_dir_inode_operations; | ||
| 1068 | if (MSDOS_SB(sb)->options.name_check != 's') | 1069 | if (MSDOS_SB(sb)->options.name_check != 's') |
| 1069 | sb->s_d_op = &vfat_ci_dentry_ops; | 1070 | sb->s_d_op = &vfat_ci_dentry_ops; |
| 1070 | else | 1071 | else |
| @@ -1073,8 +1074,7 @@ static void setup(struct super_block *sb) | |||
| 1073 | 1074 | ||
| 1074 | static int vfat_fill_super(struct super_block *sb, void *data, int silent) | 1075 | static int vfat_fill_super(struct super_block *sb, void *data, int silent) |
| 1075 | { | 1076 | { |
| 1076 | return fat_fill_super(sb, data, silent, &vfat_dir_inode_operations, | 1077 | return fat_fill_super(sb, data, silent, 1, setup); |
| 1077 | 1, setup); | ||
| 1078 | } | 1078 | } |
| 1079 | 1079 | ||
| 1080 | static struct dentry *vfat_mount(struct file_system_type *fs_type, | 1080 | static struct dentry *vfat_mount(struct file_system_type *fs_type, |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 48a18f184d50..30afdfa7aec7 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
| @@ -33,8 +33,6 @@ void fscache_enqueue_operation(struct fscache_operation *op) | |||
| 33 | _enter("{OBJ%x OP%x,%u}", | 33 | _enter("{OBJ%x OP%x,%u}", |
| 34 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); | 34 | op->object->debug_id, op->debug_id, atomic_read(&op->usage)); |
| 35 | 35 | ||
| 36 | fscache_set_op_state(op, "EnQ"); | ||
| 37 | |||
| 38 | ASSERT(list_empty(&op->pend_link)); | 36 | ASSERT(list_empty(&op->pend_link)); |
| 39 | ASSERT(op->processor != NULL); | 37 | ASSERT(op->processor != NULL); |
| 40 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | 38 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); |
| @@ -66,8 +64,6 @@ EXPORT_SYMBOL(fscache_enqueue_operation); | |||
| 66 | static void fscache_run_op(struct fscache_object *object, | 64 | static void fscache_run_op(struct fscache_object *object, |
| 67 | struct fscache_operation *op) | 65 | struct fscache_operation *op) |
| 68 | { | 66 | { |
| 69 | fscache_set_op_state(op, "Run"); | ||
| 70 | |||
| 71 | object->n_in_progress++; | 67 | object->n_in_progress++; |
| 72 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) | 68 | if (test_and_clear_bit(FSCACHE_OP_WAITING, &op->flags)) |
| 73 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); | 69 | wake_up_bit(&op->flags, FSCACHE_OP_WAITING); |
| @@ -88,8 +84,6 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
| 88 | 84 | ||
| 89 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); | 85 | _enter("{OBJ%x OP%x},", object->debug_id, op->debug_id); |
| 90 | 86 | ||
| 91 | fscache_set_op_state(op, "SubmitX"); | ||
| 92 | |||
| 93 | spin_lock(&object->lock); | 87 | spin_lock(&object->lock); |
| 94 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | 88 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); |
| 95 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | 89 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); |
| @@ -194,8 +188,6 @@ int fscache_submit_op(struct fscache_object *object, | |||
| 194 | 188 | ||
| 195 | ASSERTCMP(atomic_read(&op->usage), >, 0); | 189 | ASSERTCMP(atomic_read(&op->usage), >, 0); |
| 196 | 190 | ||
| 197 | fscache_set_op_state(op, "Submit"); | ||
| 198 | |||
| 199 | spin_lock(&object->lock); | 191 | spin_lock(&object->lock); |
| 200 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); | 192 | ASSERTCMP(object->n_ops, >=, object->n_in_progress); |
| 201 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); | 193 | ASSERTCMP(object->n_ops, >=, object->n_exclusive); |
| @@ -335,8 +327,6 @@ void fscache_put_operation(struct fscache_operation *op) | |||
| 335 | if (!atomic_dec_and_test(&op->usage)) | 327 | if (!atomic_dec_and_test(&op->usage)) |
| 336 | return; | 328 | return; |
| 337 | 329 | ||
| 338 | fscache_set_op_state(op, "Put"); | ||
| 339 | |||
| 340 | _debug("PUT OP"); | 330 | _debug("PUT OP"); |
| 341 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) | 331 | if (test_and_set_bit(FSCACHE_OP_DEAD, &op->flags)) |
| 342 | BUG(); | 332 | BUG(); |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 41c441c2058d..a2a5d19ece6a 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
| @@ -155,11 +155,9 @@ static void fscache_attr_changed_op(struct fscache_operation *op) | |||
| 155 | fscache_stat(&fscache_n_attr_changed_calls); | 155 | fscache_stat(&fscache_n_attr_changed_calls); |
| 156 | 156 | ||
| 157 | if (fscache_object_is_active(object)) { | 157 | if (fscache_object_is_active(object)) { |
| 158 | fscache_set_op_state(op, "CallFS"); | ||
| 159 | fscache_stat(&fscache_n_cop_attr_changed); | 158 | fscache_stat(&fscache_n_cop_attr_changed); |
| 160 | ret = object->cache->ops->attr_changed(object); | 159 | ret = object->cache->ops->attr_changed(object); |
| 161 | fscache_stat_d(&fscache_n_cop_attr_changed); | 160 | fscache_stat_d(&fscache_n_cop_attr_changed); |
| 162 | fscache_set_op_state(op, "Done"); | ||
| 163 | if (ret < 0) | 161 | if (ret < 0) |
| 164 | fscache_abort_object(object); | 162 | fscache_abort_object(object); |
| 165 | } | 163 | } |
| @@ -190,7 +188,6 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) | |||
| 190 | 188 | ||
| 191 | fscache_operation_init(op, fscache_attr_changed_op, NULL); | 189 | fscache_operation_init(op, fscache_attr_changed_op, NULL); |
| 192 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); | 190 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); |
| 193 | fscache_set_op_name(op, "Attr"); | ||
| 194 | 191 | ||
| 195 | spin_lock(&cookie->lock); | 192 | spin_lock(&cookie->lock); |
| 196 | 193 | ||
| @@ -257,7 +254,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval( | |||
| 257 | op->context = context; | 254 | op->context = context; |
| 258 | op->start_time = jiffies; | 255 | op->start_time = jiffies; |
| 259 | INIT_LIST_HEAD(&op->to_do); | 256 | INIT_LIST_HEAD(&op->to_do); |
| 260 | fscache_set_op_name(&op->op, "Retr"); | ||
| 261 | return op; | 257 | return op; |
| 262 | } | 258 | } |
| 263 | 259 | ||
| @@ -368,7 +364,6 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
| 368 | _leave(" = -ENOMEM"); | 364 | _leave(" = -ENOMEM"); |
| 369 | return -ENOMEM; | 365 | return -ENOMEM; |
| 370 | } | 366 | } |
| 371 | fscache_set_op_name(&op->op, "RetrRA1"); | ||
| 372 | 367 | ||
| 373 | spin_lock(&cookie->lock); | 368 | spin_lock(&cookie->lock); |
| 374 | 369 | ||
| @@ -487,7 +482,6 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | |||
| 487 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | 482 | op = fscache_alloc_retrieval(mapping, end_io_func, context); |
| 488 | if (!op) | 483 | if (!op) |
| 489 | return -ENOMEM; | 484 | return -ENOMEM; |
| 490 | fscache_set_op_name(&op->op, "RetrRAN"); | ||
| 491 | 485 | ||
| 492 | spin_lock(&cookie->lock); | 486 | spin_lock(&cookie->lock); |
| 493 | 487 | ||
| @@ -589,7 +583,6 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, | |||
| 589 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | 583 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); |
| 590 | if (!op) | 584 | if (!op) |
| 591 | return -ENOMEM; | 585 | return -ENOMEM; |
| 592 | fscache_set_op_name(&op->op, "RetrAL1"); | ||
| 593 | 586 | ||
| 594 | spin_lock(&cookie->lock); | 587 | spin_lock(&cookie->lock); |
| 595 | 588 | ||
| @@ -662,8 +655,6 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
| 662 | 655 | ||
| 663 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); | 656 | _enter("{OP%x,%d}", op->op.debug_id, atomic_read(&op->op.usage)); |
| 664 | 657 | ||
| 665 | fscache_set_op_state(&op->op, "GetPage"); | ||
| 666 | |||
| 667 | spin_lock(&object->lock); | 658 | spin_lock(&object->lock); |
| 668 | cookie = object->cookie; | 659 | cookie = object->cookie; |
| 669 | 660 | ||
| @@ -698,15 +689,12 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
| 698 | spin_unlock(&cookie->stores_lock); | 689 | spin_unlock(&cookie->stores_lock); |
| 699 | spin_unlock(&object->lock); | 690 | spin_unlock(&object->lock); |
| 700 | 691 | ||
| 701 | fscache_set_op_state(&op->op, "Store"); | ||
| 702 | fscache_stat(&fscache_n_store_pages); | 692 | fscache_stat(&fscache_n_store_pages); |
| 703 | fscache_stat(&fscache_n_cop_write_page); | 693 | fscache_stat(&fscache_n_cop_write_page); |
| 704 | ret = object->cache->ops->write_page(op, page); | 694 | ret = object->cache->ops->write_page(op, page); |
| 705 | fscache_stat_d(&fscache_n_cop_write_page); | 695 | fscache_stat_d(&fscache_n_cop_write_page); |
| 706 | fscache_set_op_state(&op->op, "EndWrite"); | ||
| 707 | fscache_end_page_write(object, page); | 696 | fscache_end_page_write(object, page); |
| 708 | if (ret < 0) { | 697 | if (ret < 0) { |
| 709 | fscache_set_op_state(&op->op, "Abort"); | ||
| 710 | fscache_abort_object(object); | 698 | fscache_abort_object(object); |
| 711 | } else { | 699 | } else { |
| 712 | fscache_enqueue_operation(&op->op); | 700 | fscache_enqueue_operation(&op->op); |
| @@ -778,7 +766,6 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
| 778 | fscache_operation_init(&op->op, fscache_write_op, | 766 | fscache_operation_init(&op->op, fscache_write_op, |
| 779 | fscache_release_write_op); | 767 | fscache_release_write_op); |
| 780 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); | 768 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); |
| 781 | fscache_set_op_name(&op->op, "Write1"); | ||
| 782 | 769 | ||
| 783 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 770 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); |
| 784 | if (ret < 0) | 771 | if (ret < 0) |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index a2a6abbccc07..2792a790e50b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -1346,11 +1346,14 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) | |||
| 1346 | } | 1346 | } |
| 1347 | 1347 | ||
| 1348 | 1348 | ||
| 1349 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 1349 | static int gfs2_shrink_glock_memory(struct shrinker *shrink, |
| 1350 | struct shrink_control *sc) | ||
| 1350 | { | 1351 | { |
| 1351 | struct gfs2_glock *gl; | 1352 | struct gfs2_glock *gl; |
| 1352 | int may_demote; | 1353 | int may_demote; |
| 1353 | int nr_skipped = 0; | 1354 | int nr_skipped = 0; |
| 1355 | int nr = sc->nr_to_scan; | ||
| 1356 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 1354 | LIST_HEAD(skipped); | 1357 | LIST_HEAD(skipped); |
| 1355 | 1358 | ||
| 1356 | if (nr == 0) | 1359 | if (nr == 0) |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index e23d9864c418..42e8d23bc047 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -38,6 +38,7 @@ | |||
| 38 | 38 | ||
| 39 | #include <linux/sched.h> | 39 | #include <linux/sched.h> |
| 40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
| 41 | #include <linux/mm.h> | ||
| 41 | #include <linux/spinlock.h> | 42 | #include <linux/spinlock.h> |
| 42 | #include <linux/completion.h> | 43 | #include <linux/completion.h> |
| 43 | #include <linux/buffer_head.h> | 44 | #include <linux/buffer_head.h> |
| @@ -77,19 +78,20 @@ static LIST_HEAD(qd_lru_list); | |||
| 77 | static atomic_t qd_lru_count = ATOMIC_INIT(0); | 78 | static atomic_t qd_lru_count = ATOMIC_INIT(0); |
| 78 | static DEFINE_SPINLOCK(qd_lru_lock); | 79 | static DEFINE_SPINLOCK(qd_lru_lock); |
| 79 | 80 | ||
| 80 | int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 81 | int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc) |
| 81 | { | 82 | { |
| 82 | struct gfs2_quota_data *qd; | 83 | struct gfs2_quota_data *qd; |
| 83 | struct gfs2_sbd *sdp; | 84 | struct gfs2_sbd *sdp; |
| 85 | int nr_to_scan = sc->nr_to_scan; | ||
| 84 | 86 | ||
| 85 | if (nr == 0) | 87 | if (nr_to_scan == 0) |
| 86 | goto out; | 88 | goto out; |
| 87 | 89 | ||
| 88 | if (!(gfp_mask & __GFP_FS)) | 90 | if (!(sc->gfp_mask & __GFP_FS)) |
| 89 | return -1; | 91 | return -1; |
| 90 | 92 | ||
| 91 | spin_lock(&qd_lru_lock); | 93 | spin_lock(&qd_lru_lock); |
| 92 | while (nr && !list_empty(&qd_lru_list)) { | 94 | while (nr_to_scan && !list_empty(&qd_lru_list)) { |
| 93 | qd = list_entry(qd_lru_list.next, | 95 | qd = list_entry(qd_lru_list.next, |
| 94 | struct gfs2_quota_data, qd_reclaim); | 96 | struct gfs2_quota_data, qd_reclaim); |
| 95 | sdp = qd->qd_gl->gl_sbd; | 97 | sdp = qd->qd_gl->gl_sbd; |
| @@ -110,7 +112,7 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | |||
| 110 | spin_unlock(&qd_lru_lock); | 112 | spin_unlock(&qd_lru_lock); |
| 111 | kmem_cache_free(gfs2_quotad_cachep, qd); | 113 | kmem_cache_free(gfs2_quotad_cachep, qd); |
| 112 | spin_lock(&qd_lru_lock); | 114 | spin_lock(&qd_lru_lock); |
| 113 | nr--; | 115 | nr_to_scan--; |
| 114 | } | 116 | } |
| 115 | spin_unlock(&qd_lru_lock); | 117 | spin_unlock(&qd_lru_lock); |
| 116 | 118 | ||
diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index e7d236ca48bd..90bf1c302a98 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h | |||
| @@ -12,6 +12,7 @@ | |||
| 12 | 12 | ||
| 13 | struct gfs2_inode; | 13 | struct gfs2_inode; |
| 14 | struct gfs2_sbd; | 14 | struct gfs2_sbd; |
| 15 | struct shrink_control; | ||
| 15 | 16 | ||
| 16 | #define NO_QUOTA_CHANGE ((u32)-1) | 17 | #define NO_QUOTA_CHANGE ((u32)-1) |
| 17 | 18 | ||
| @@ -51,7 +52,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip) | |||
| 51 | return ret; | 52 | return ret; |
| 52 | } | 53 | } |
| 53 | 54 | ||
| 54 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask); | 55 | extern int gfs2_shrink_qd_memory(struct shrinker *shrink, |
| 56 | struct shrink_control *sc); | ||
| 55 | extern const struct quotactl_ops gfs2_quotactl_ops; | 57 | extern const struct quotactl_ops gfs2_quotactl_ops; |
| 56 | 58 | ||
| 57 | #endif /* __QUOTA_DOT_H__ */ | 59 | #endif /* __QUOTA_DOT_H__ */ |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index b9eeb1cd03ff..e7a035781b7d 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -412,10 +412,10 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset) | |||
| 412 | pgoff = offset >> PAGE_SHIFT; | 412 | pgoff = offset >> PAGE_SHIFT; |
| 413 | 413 | ||
| 414 | i_size_write(inode, offset); | 414 | i_size_write(inode, offset); |
| 415 | spin_lock(&mapping->i_mmap_lock); | 415 | mutex_lock(&mapping->i_mmap_mutex); |
| 416 | if (!prio_tree_empty(&mapping->i_mmap)) | 416 | if (!prio_tree_empty(&mapping->i_mmap)) |
| 417 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); | 417 | hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff); |
| 418 | spin_unlock(&mapping->i_mmap_lock); | 418 | mutex_unlock(&mapping->i_mmap_mutex); |
| 419 | truncate_hugepages(inode, offset); | 419 | truncate_hugepages(inode, offset); |
| 420 | return 0; | 420 | return 0; |
| 421 | } | 421 | } |
diff --git a/fs/inode.c b/fs/inode.c index 05f4fa521325..990d284877a1 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
| @@ -326,12 +326,11 @@ void address_space_init_once(struct address_space *mapping) | |||
| 326 | memset(mapping, 0, sizeof(*mapping)); | 326 | memset(mapping, 0, sizeof(*mapping)); |
| 327 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); | 327 | INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC); |
| 328 | spin_lock_init(&mapping->tree_lock); | 328 | spin_lock_init(&mapping->tree_lock); |
| 329 | spin_lock_init(&mapping->i_mmap_lock); | 329 | mutex_init(&mapping->i_mmap_mutex); |
| 330 | INIT_LIST_HEAD(&mapping->private_list); | 330 | INIT_LIST_HEAD(&mapping->private_list); |
| 331 | spin_lock_init(&mapping->private_lock); | 331 | spin_lock_init(&mapping->private_lock); |
| 332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); | 332 | INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap); |
| 333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); | 333 | INIT_LIST_HEAD(&mapping->i_mmap_nonlinear); |
| 334 | mutex_init(&mapping->unmap_mutex); | ||
| 335 | } | 334 | } |
| 336 | EXPORT_SYMBOL(address_space_init_once); | 335 | EXPORT_SYMBOL(address_space_init_once); |
| 337 | 336 | ||
| @@ -752,8 +751,12 @@ static void prune_icache(int nr_to_scan) | |||
| 752 | * This function is passed the number of inodes to scan, and it returns the | 751 | * This function is passed the number of inodes to scan, and it returns the |
| 753 | * total number of remaining possibly-reclaimable inodes. | 752 | * total number of remaining possibly-reclaimable inodes. |
| 754 | */ | 753 | */ |
| 755 | static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 754 | static int shrink_icache_memory(struct shrinker *shrink, |
| 755 | struct shrink_control *sc) | ||
| 756 | { | 756 | { |
| 757 | int nr = sc->nr_to_scan; | ||
| 758 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 759 | |||
| 757 | if (nr) { | 760 | if (nr) { |
| 758 | /* | 761 | /* |
| 759 | * Nasty deadlock avoidance. We may hold various FS locks, | 762 | * Nasty deadlock avoidance. We may hold various FS locks, |
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 69b180459463..72ffa974b0b8 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -302,12 +302,6 @@ void journal_commit_transaction(journal_t *journal) | |||
| 302 | * all outstanding updates to complete. | 302 | * all outstanding updates to complete. |
| 303 | */ | 303 | */ |
| 304 | 304 | ||
| 305 | #ifdef COMMIT_STATS | ||
| 306 | spin_lock(&journal->j_list_lock); | ||
| 307 | summarise_journal_usage(journal); | ||
| 308 | spin_unlock(&journal->j_list_lock); | ||
| 309 | #endif | ||
| 310 | |||
| 311 | /* Do we need to erase the effects of a prior journal_flush? */ | 305 | /* Do we need to erase the effects of a prior journal_flush? */ |
| 312 | if (journal->j_flags & JFS_FLUSHED) { | 306 | if (journal->j_flags & JFS_FLUSHED) { |
| 313 | jbd_debug(3, "super block updated\n"); | 307 | jbd_debug(3, "super block updated\n"); |
| @@ -722,8 +716,13 @@ wait_for_iobuf: | |||
| 722 | required. */ | 716 | required. */ |
| 723 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 717 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
| 724 | journal_file_buffer(jh, commit_transaction, BJ_Forget); | 718 | journal_file_buffer(jh, commit_transaction, BJ_Forget); |
| 725 | /* Wake up any transactions which were waiting for this | 719 | /* |
| 726 | IO to complete */ | 720 | * Wake up any transactions which were waiting for this |
| 721 | * IO to complete. The barrier must be here so that changes | ||
| 722 | * by journal_file_buffer() take effect before wake_up_bit() | ||
| 723 | * does the waitqueue check. | ||
| 724 | */ | ||
| 725 | smp_mb(); | ||
| 727 | wake_up_bit(&bh->b_state, BH_Unshadow); | 726 | wake_up_bit(&bh->b_state, BH_Unshadow); |
| 728 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 727 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
| 729 | __brelse(bh); | 728 | __brelse(bh); |
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index b3713afaaa9e..e2d4285fbe90 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c | |||
| @@ -437,9 +437,12 @@ int __log_space_left(journal_t *journal) | |||
| 437 | int __log_start_commit(journal_t *journal, tid_t target) | 437 | int __log_start_commit(journal_t *journal, tid_t target) |
| 438 | { | 438 | { |
| 439 | /* | 439 | /* |
| 440 | * Are we already doing a recent enough commit? | 440 | * The only transaction we can possibly wait upon is the |
| 441 | * currently running transaction (if it exists). Otherwise, | ||
| 442 | * the target tid must be an old one. | ||
| 441 | */ | 443 | */ |
| 442 | if (!tid_geq(journal->j_commit_request, target)) { | 444 | if (journal->j_running_transaction && |
| 445 | journal->j_running_transaction->t_tid == target) { | ||
| 443 | /* | 446 | /* |
| 444 | * We want a new commit: OK, mark the request and wakeup the | 447 | * We want a new commit: OK, mark the request and wakeup the |
| 445 | * commit thread. We do _not_ do the commit ourselves. | 448 | * commit thread. We do _not_ do the commit ourselves. |
| @@ -451,7 +454,14 @@ int __log_start_commit(journal_t *journal, tid_t target) | |||
| 451 | journal->j_commit_sequence); | 454 | journal->j_commit_sequence); |
| 452 | wake_up(&journal->j_wait_commit); | 455 | wake_up(&journal->j_wait_commit); |
| 453 | return 1; | 456 | return 1; |
| 454 | } | 457 | } else if (!tid_geq(journal->j_commit_request, target)) |
| 458 | /* This should never happen, but if it does, preserve | ||
| 459 | the evidence before kjournald goes into a loop and | ||
| 460 | increments j_commit_sequence beyond all recognition. */ | ||
| 461 | WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", | ||
| 462 | journal->j_commit_request, journal->j_commit_sequence, | ||
| 463 | target, journal->j_running_transaction ? | ||
| 464 | journal->j_running_transaction->t_tid : 0); | ||
| 455 | return 0; | 465 | return 0; |
| 456 | } | 466 | } |
| 457 | 467 | ||
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 60d2319651b2..f7ee81a065da 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -266,7 +266,8 @@ static handle_t *new_handle(int nblocks) | |||
| 266 | * This function is visible to journal users (like ext3fs), so is not | 266 | * This function is visible to journal users (like ext3fs), so is not |
| 267 | * called with the journal already locked. | 267 | * called with the journal already locked. |
| 268 | * | 268 | * |
| 269 | * Return a pointer to a newly allocated handle, or NULL on failure | 269 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
| 270 | * on failure. | ||
| 270 | */ | 271 | */ |
| 271 | handle_t *journal_start(journal_t *journal, int nblocks) | 272 | handle_t *journal_start(journal_t *journal, int nblocks) |
| 272 | { | 273 | { |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6e28000a4b21..29148a81c783 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -338,12 +338,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 338 | * all outstanding updates to complete. | 338 | * all outstanding updates to complete. |
| 339 | */ | 339 | */ |
| 340 | 340 | ||
| 341 | #ifdef COMMIT_STATS | ||
| 342 | spin_lock(&journal->j_list_lock); | ||
| 343 | summarise_journal_usage(journal); | ||
| 344 | spin_unlock(&journal->j_list_lock); | ||
| 345 | #endif | ||
| 346 | |||
| 347 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ | 341 | /* Do we need to erase the effects of a prior jbd2_journal_flush? */ |
| 348 | if (journal->j_flags & JBD2_FLUSHED) { | 342 | if (journal->j_flags & JBD2_FLUSHED) { |
| 349 | jbd_debug(3, "super block updated\n"); | 343 | jbd_debug(3, "super block updated\n"); |
diff --git a/fs/mbcache.c b/fs/mbcache.c index 2f174be06555..8c32ef3ba88e 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
| @@ -90,7 +90,8 @@ static DEFINE_SPINLOCK(mb_cache_spinlock); | |||
| 90 | * What the mbcache registers as to get shrunk dynamically. | 90 | * What the mbcache registers as to get shrunk dynamically. |
| 91 | */ | 91 | */ |
| 92 | 92 | ||
| 93 | static int mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask); | 93 | static int mb_cache_shrink_fn(struct shrinker *shrink, |
| 94 | struct shrink_control *sc); | ||
| 94 | 95 | ||
| 95 | static struct shrinker mb_cache_shrinker = { | 96 | static struct shrinker mb_cache_shrinker = { |
| 96 | .shrink = mb_cache_shrink_fn, | 97 | .shrink = mb_cache_shrink_fn, |
| @@ -156,18 +157,19 @@ forget: | |||
| 156 | * gets low. | 157 | * gets low. |
| 157 | * | 158 | * |
| 158 | * @shrink: (ignored) | 159 | * @shrink: (ignored) |
| 159 | * @nr_to_scan: Number of objects to scan | 160 | * @sc: shrink_control passed from reclaim |
| 160 | * @gfp_mask: (ignored) | ||
| 161 | * | 161 | * |
| 162 | * Returns the number of objects which are present in the cache. | 162 | * Returns the number of objects which are present in the cache. |
| 163 | */ | 163 | */ |
| 164 | static int | 164 | static int |
| 165 | mb_cache_shrink_fn(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 165 | mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc) |
| 166 | { | 166 | { |
| 167 | LIST_HEAD(free_list); | 167 | LIST_HEAD(free_list); |
| 168 | struct mb_cache *cache; | 168 | struct mb_cache *cache; |
| 169 | struct mb_cache_entry *entry, *tmp; | 169 | struct mb_cache_entry *entry, *tmp; |
| 170 | int count = 0; | 170 | int count = 0; |
| 171 | int nr_to_scan = sc->nr_to_scan; | ||
| 172 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 171 | 173 | ||
| 172 | mb_debug("trying to free %d entries", nr_to_scan); | 174 | mb_debug("trying to free %d entries", nr_to_scan); |
| 173 | spin_lock(&mb_cache_spinlock); | 175 | spin_lock(&mb_cache_spinlock); |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 0250e4ce4893..202f370526a7 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
| @@ -461,7 +461,7 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
| 461 | #endif | 461 | #endif |
| 462 | struct ncp_entry_info finfo; | 462 | struct ncp_entry_info finfo; |
| 463 | 463 | ||
| 464 | data.wdog_pid = NULL; | 464 | memset(&data, 0, sizeof(data)); |
| 465 | server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); | 465 | server = kzalloc(sizeof(struct ncp_server), GFP_KERNEL); |
| 466 | if (!server) | 466 | if (!server) |
| 467 | return -ENOMEM; | 467 | return -ENOMEM; |
| @@ -496,7 +496,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
| 496 | struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; | 496 | struct ncp_mount_data_v4* md = (struct ncp_mount_data_v4*)raw_data; |
| 497 | 497 | ||
| 498 | data.flags = md->flags; | 498 | data.flags = md->flags; |
| 499 | data.int_flags = 0; | ||
| 500 | data.mounted_uid = md->mounted_uid; | 499 | data.mounted_uid = md->mounted_uid; |
| 501 | data.wdog_pid = find_get_pid(md->wdog_pid); | 500 | data.wdog_pid = find_get_pid(md->wdog_pid); |
| 502 | data.ncp_fd = md->ncp_fd; | 501 | data.ncp_fd = md->ncp_fd; |
| @@ -507,7 +506,6 @@ static int ncp_fill_super(struct super_block *sb, void *raw_data, int silent) | |||
| 507 | data.file_mode = md->file_mode; | 506 | data.file_mode = md->file_mode; |
| 508 | data.dir_mode = md->dir_mode; | 507 | data.dir_mode = md->dir_mode; |
| 509 | data.info_fd = -1; | 508 | data.info_fd = -1; |
| 510 | data.mounted_vol[0] = 0; | ||
| 511 | } | 509 | } |
| 512 | break; | 510 | break; |
| 513 | default: | 511 | default: |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 7237672216c8..424e47773a84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -2042,11 +2042,14 @@ static void nfs_access_free_list(struct list_head *head) | |||
| 2042 | } | 2042 | } |
| 2043 | } | 2043 | } |
| 2044 | 2044 | ||
| 2045 | int nfs_access_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask) | 2045 | int nfs_access_cache_shrinker(struct shrinker *shrink, |
| 2046 | struct shrink_control *sc) | ||
| 2046 | { | 2047 | { |
| 2047 | LIST_HEAD(head); | 2048 | LIST_HEAD(head); |
| 2048 | struct nfs_inode *nfsi, *next; | 2049 | struct nfs_inode *nfsi, *next; |
| 2049 | struct nfs_access_entry *cache; | 2050 | struct nfs_access_entry *cache; |
| 2051 | int nr_to_scan = sc->nr_to_scan; | ||
| 2052 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 2050 | 2053 | ||
| 2051 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) | 2054 | if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL) |
| 2052 | return (nr_to_scan == 0) ? 0 : -1; | 2055 | return (nr_to_scan == 0) ? 0 : -1; |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index ce118ce885dd..2df6ca7b5898 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -234,7 +234,7 @@ extern int nfs_init_client(struct nfs_client *clp, | |||
| 234 | 234 | ||
| 235 | /* dir.c */ | 235 | /* dir.c */ |
| 236 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, | 236 | extern int nfs_access_cache_shrinker(struct shrinker *shrink, |
| 237 | int nr_to_scan, gfp_t gfp_mask); | 237 | struct shrink_control *sc); |
| 238 | 238 | ||
| 239 | /* inode.c */ | 239 | /* inode.c */ |
| 240 | extern struct workqueue_struct *nfsiod_workqueue; | 240 | extern struct workqueue_struct *nfsiod_workqueue; |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index d545e97d99c3..8ed4d3433199 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
| @@ -255,7 +255,11 @@ ssize_t part_discard_alignment_show(struct device *dev, | |||
| 255 | struct device_attribute *attr, char *buf) | 255 | struct device_attribute *attr, char *buf) |
| 256 | { | 256 | { |
| 257 | struct hd_struct *p = dev_to_part(dev); | 257 | struct hd_struct *p = dev_to_part(dev); |
| 258 | return sprintf(buf, "%u\n", p->discard_alignment); | 258 | struct gendisk *disk = dev_to_disk(dev); |
| 259 | |||
| 260 | return sprintf(buf, "%u\n", | ||
| 261 | queue_limit_discard_alignment(&disk->queue->limits, | ||
| 262 | p->start_sect)); | ||
| 259 | } | 263 | } |
| 260 | 264 | ||
| 261 | ssize_t part_stat_show(struct device *dev, | 265 | ssize_t part_stat_show(struct device *dev, |
| @@ -449,8 +453,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, | |||
| 449 | p->start_sect = start; | 453 | p->start_sect = start; |
| 450 | p->alignment_offset = | 454 | p->alignment_offset = |
| 451 | queue_limit_alignment_offset(&disk->queue->limits, start); | 455 | queue_limit_alignment_offset(&disk->queue->limits, start); |
| 452 | p->discard_alignment = | ||
| 453 | queue_limit_discard_alignment(&disk->queue->limits, start); | ||
| 454 | p->nr_sects = len; | 456 | p->nr_sects = len; |
| 455 | p->partno = partno; | 457 | p->partno = partno; |
| 456 | p->policy = get_disk_ro(disk); | 458 | p->policy = get_disk_ro(disk); |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index c03e8d3a3a5b..3763b436e69d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -61,6 +61,14 @@ extern const struct file_operations proc_pagemap_operations; | |||
| 61 | extern const struct file_operations proc_net_operations; | 61 | extern const struct file_operations proc_net_operations; |
| 62 | extern const struct inode_operations proc_net_inode_operations; | 62 | extern const struct inode_operations proc_net_inode_operations; |
| 63 | 63 | ||
| 64 | struct proc_maps_private { | ||
| 65 | struct pid *pid; | ||
| 66 | struct task_struct *task; | ||
| 67 | #ifdef CONFIG_MMU | ||
| 68 | struct vm_area_struct *tail_vma; | ||
| 69 | #endif | ||
| 70 | }; | ||
| 71 | |||
| 64 | void proc_init_inodecache(void); | 72 | void proc_init_inodecache(void); |
| 65 | 73 | ||
| 66 | static inline struct pid *proc_pid(struct inode *inode) | 74 | static inline struct pid *proc_pid(struct inode *inode) |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 318d8654989b..2c9db29ea358 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -858,7 +858,192 @@ const struct file_operations proc_pagemap_operations = { | |||
| 858 | #endif /* CONFIG_PROC_PAGE_MONITOR */ | 858 | #endif /* CONFIG_PROC_PAGE_MONITOR */ |
| 859 | 859 | ||
| 860 | #ifdef CONFIG_NUMA | 860 | #ifdef CONFIG_NUMA |
| 861 | extern int show_numa_map(struct seq_file *m, void *v); | 861 | |
| 862 | struct numa_maps { | ||
| 863 | struct vm_area_struct *vma; | ||
| 864 | unsigned long pages; | ||
| 865 | unsigned long anon; | ||
| 866 | unsigned long active; | ||
| 867 | unsigned long writeback; | ||
| 868 | unsigned long mapcount_max; | ||
| 869 | unsigned long dirty; | ||
| 870 | unsigned long swapcache; | ||
| 871 | unsigned long node[MAX_NUMNODES]; | ||
| 872 | }; | ||
| 873 | |||
| 874 | struct numa_maps_private { | ||
| 875 | struct proc_maps_private proc_maps; | ||
| 876 | struct numa_maps md; | ||
| 877 | }; | ||
| 878 | |||
| 879 | static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty) | ||
| 880 | { | ||
| 881 | int count = page_mapcount(page); | ||
| 882 | |||
| 883 | md->pages++; | ||
| 884 | if (pte_dirty || PageDirty(page)) | ||
| 885 | md->dirty++; | ||
| 886 | |||
| 887 | if (PageSwapCache(page)) | ||
| 888 | md->swapcache++; | ||
| 889 | |||
| 890 | if (PageActive(page) || PageUnevictable(page)) | ||
| 891 | md->active++; | ||
| 892 | |||
| 893 | if (PageWriteback(page)) | ||
| 894 | md->writeback++; | ||
| 895 | |||
| 896 | if (PageAnon(page)) | ||
| 897 | md->anon++; | ||
| 898 | |||
| 899 | if (count > md->mapcount_max) | ||
| 900 | md->mapcount_max = count; | ||
| 901 | |||
| 902 | md->node[page_to_nid(page)]++; | ||
| 903 | } | ||
| 904 | |||
| 905 | static int gather_pte_stats(pmd_t *pmd, unsigned long addr, | ||
| 906 | unsigned long end, struct mm_walk *walk) | ||
| 907 | { | ||
| 908 | struct numa_maps *md; | ||
| 909 | spinlock_t *ptl; | ||
| 910 | pte_t *orig_pte; | ||
| 911 | pte_t *pte; | ||
| 912 | |||
| 913 | md = walk->private; | ||
| 914 | orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl); | ||
| 915 | do { | ||
| 916 | struct page *page; | ||
| 917 | int nid; | ||
| 918 | |||
| 919 | if (!pte_present(*pte)) | ||
| 920 | continue; | ||
| 921 | |||
| 922 | page = vm_normal_page(md->vma, addr, *pte); | ||
| 923 | if (!page) | ||
| 924 | continue; | ||
| 925 | |||
| 926 | if (PageReserved(page)) | ||
| 927 | continue; | ||
| 928 | |||
| 929 | nid = page_to_nid(page); | ||
| 930 | if (!node_isset(nid, node_states[N_HIGH_MEMORY])) | ||
| 931 | continue; | ||
| 932 | |||
| 933 | gather_stats(page, md, pte_dirty(*pte)); | ||
| 934 | |||
| 935 | } while (pte++, addr += PAGE_SIZE, addr != end); | ||
| 936 | pte_unmap_unlock(orig_pte, ptl); | ||
| 937 | return 0; | ||
| 938 | } | ||
| 939 | #ifdef CONFIG_HUGETLB_PAGE | ||
| 940 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | ||
| 941 | unsigned long addr, unsigned long end, struct mm_walk *walk) | ||
| 942 | { | ||
| 943 | struct numa_maps *md; | ||
| 944 | struct page *page; | ||
| 945 | |||
| 946 | if (pte_none(*pte)) | ||
| 947 | return 0; | ||
| 948 | |||
| 949 | page = pte_page(*pte); | ||
| 950 | if (!page) | ||
| 951 | return 0; | ||
| 952 | |||
| 953 | md = walk->private; | ||
| 954 | gather_stats(page, md, pte_dirty(*pte)); | ||
| 955 | return 0; | ||
| 956 | } | ||
| 957 | |||
| 958 | #else | ||
| 959 | static int gather_hugetbl_stats(pte_t *pte, unsigned long hmask, | ||
| 960 | unsigned long addr, unsigned long end, struct mm_walk *walk) | ||
| 961 | { | ||
| 962 | return 0; | ||
| 963 | } | ||
| 964 | #endif | ||
| 965 | |||
| 966 | /* | ||
| 967 | * Display pages allocated per node and memory policy via /proc. | ||
| 968 | */ | ||
| 969 | static int show_numa_map(struct seq_file *m, void *v) | ||
| 970 | { | ||
| 971 | struct numa_maps_private *numa_priv = m->private; | ||
| 972 | struct proc_maps_private *proc_priv = &numa_priv->proc_maps; | ||
| 973 | struct vm_area_struct *vma = v; | ||
| 974 | struct numa_maps *md = &numa_priv->md; | ||
| 975 | struct file *file = vma->vm_file; | ||
| 976 | struct mm_struct *mm = vma->vm_mm; | ||
| 977 | struct mm_walk walk = {}; | ||
| 978 | struct mempolicy *pol; | ||
| 979 | int n; | ||
| 980 | char buffer[50]; | ||
| 981 | |||
| 982 | if (!mm) | ||
| 983 | return 0; | ||
| 984 | |||
| 985 | /* Ensure we start with an empty set of numa_maps statistics. */ | ||
| 986 | memset(md, 0, sizeof(*md)); | ||
| 987 | |||
| 988 | md->vma = vma; | ||
| 989 | |||
| 990 | walk.hugetlb_entry = gather_hugetbl_stats; | ||
| 991 | walk.pmd_entry = gather_pte_stats; | ||
| 992 | walk.private = md; | ||
| 993 | walk.mm = mm; | ||
| 994 | |||
| 995 | pol = get_vma_policy(proc_priv->task, vma, vma->vm_start); | ||
| 996 | mpol_to_str(buffer, sizeof(buffer), pol, 0); | ||
| 997 | mpol_cond_put(pol); | ||
| 998 | |||
| 999 | seq_printf(m, "%08lx %s", vma->vm_start, buffer); | ||
| 1000 | |||
| 1001 | if (file) { | ||
| 1002 | seq_printf(m, " file="); | ||
| 1003 | seq_path(m, &file->f_path, "\n\t= "); | ||
| 1004 | } else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) { | ||
| 1005 | seq_printf(m, " heap"); | ||
| 1006 | } else if (vma->vm_start <= mm->start_stack && | ||
| 1007 | vma->vm_end >= mm->start_stack) { | ||
| 1008 | seq_printf(m, " stack"); | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | walk_page_range(vma->vm_start, vma->vm_end, &walk); | ||
| 1012 | |||
| 1013 | if (!md->pages) | ||
| 1014 | goto out; | ||
| 1015 | |||
| 1016 | if (md->anon) | ||
| 1017 | seq_printf(m, " anon=%lu", md->anon); | ||
| 1018 | |||
| 1019 | if (md->dirty) | ||
| 1020 | seq_printf(m, " dirty=%lu", md->dirty); | ||
| 1021 | |||
| 1022 | if (md->pages != md->anon && md->pages != md->dirty) | ||
| 1023 | seq_printf(m, " mapped=%lu", md->pages); | ||
| 1024 | |||
| 1025 | if (md->mapcount_max > 1) | ||
| 1026 | seq_printf(m, " mapmax=%lu", md->mapcount_max); | ||
| 1027 | |||
| 1028 | if (md->swapcache) | ||
| 1029 | seq_printf(m, " swapcache=%lu", md->swapcache); | ||
| 1030 | |||
| 1031 | if (md->active < md->pages && !is_vm_hugetlb_page(vma)) | ||
| 1032 | seq_printf(m, " active=%lu", md->active); | ||
| 1033 | |||
| 1034 | if (md->writeback) | ||
| 1035 | seq_printf(m, " writeback=%lu", md->writeback); | ||
| 1036 | |||
| 1037 | for_each_node_state(n, N_HIGH_MEMORY) | ||
| 1038 | if (md->node[n]) | ||
| 1039 | seq_printf(m, " N%d=%lu", n, md->node[n]); | ||
| 1040 | out: | ||
| 1041 | seq_putc(m, '\n'); | ||
| 1042 | |||
| 1043 | if (m->count < m->size) | ||
| 1044 | m->version = (vma != proc_priv->tail_vma) ? vma->vm_start : 0; | ||
| 1045 | return 0; | ||
| 1046 | } | ||
| 862 | 1047 | ||
| 863 | static const struct seq_operations proc_pid_numa_maps_op = { | 1048 | static const struct seq_operations proc_pid_numa_maps_op = { |
| 864 | .start = m_start, | 1049 | .start = m_start, |
| @@ -869,7 +1054,20 @@ static const struct seq_operations proc_pid_numa_maps_op = { | |||
| 869 | 1054 | ||
| 870 | static int numa_maps_open(struct inode *inode, struct file *file) | 1055 | static int numa_maps_open(struct inode *inode, struct file *file) |
| 871 | { | 1056 | { |
| 872 | return do_maps_open(inode, file, &proc_pid_numa_maps_op); | 1057 | struct numa_maps_private *priv; |
| 1058 | int ret = -ENOMEM; | ||
| 1059 | priv = kzalloc(sizeof(*priv), GFP_KERNEL); | ||
| 1060 | if (priv) { | ||
| 1061 | priv->proc_maps.pid = proc_pid(inode); | ||
| 1062 | ret = seq_open(file, &proc_pid_numa_maps_op); | ||
| 1063 | if (!ret) { | ||
| 1064 | struct seq_file *m = file->private_data; | ||
| 1065 | m->private = priv; | ||
| 1066 | } else { | ||
| 1067 | kfree(priv); | ||
| 1068 | } | ||
| 1069 | } | ||
| 1070 | return ret; | ||
| 873 | } | 1071 | } |
| 874 | 1072 | ||
| 875 | const struct file_operations proc_numa_maps_operations = { | 1073 | const struct file_operations proc_numa_maps_operations = { |
| @@ -878,4 +1076,4 @@ const struct file_operations proc_numa_maps_operations = { | |||
| 878 | .llseek = seq_lseek, | 1076 | .llseek = seq_lseek, |
| 879 | .release = seq_release_private, | 1077 | .release = seq_release_private, |
| 880 | }; | 1078 | }; |
| 881 | #endif | 1079 | #endif /* CONFIG_NUMA */ |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index d3c032f5fa0a..5b572c89e6c4 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
| @@ -691,8 +691,11 @@ static void prune_dqcache(int count) | |||
| 691 | * This is called from kswapd when we think we need some | 691 | * This is called from kswapd when we think we need some |
| 692 | * more memory | 692 | * more memory |
| 693 | */ | 693 | */ |
| 694 | static int shrink_dqcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) | 694 | static int shrink_dqcache_memory(struct shrinker *shrink, |
| 695 | struct shrink_control *sc) | ||
| 695 | { | 696 | { |
| 697 | int nr = sc->nr_to_scan; | ||
| 698 | |||
| 696 | if (nr) { | 699 | if (nr) { |
| 697 | spin_lock(&dq_list_lock); | 700 | spin_lock(&dq_list_lock); |
| 698 | prune_dqcache(nr); | 701 | prune_dqcache(nr); |
diff --git a/fs/splice.c b/fs/splice.c index 50a5d978da16..aa866d309695 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -162,6 +162,14 @@ static const struct pipe_buf_operations user_page_pipe_buf_ops = { | |||
| 162 | .get = generic_pipe_buf_get, | 162 | .get = generic_pipe_buf_get, |
| 163 | }; | 163 | }; |
| 164 | 164 | ||
| 165 | static void wakeup_pipe_readers(struct pipe_inode_info *pipe) | ||
| 166 | { | ||
| 167 | smp_mb(); | ||
| 168 | if (waitqueue_active(&pipe->wait)) | ||
| 169 | wake_up_interruptible(&pipe->wait); | ||
| 170 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
| 171 | } | ||
| 172 | |||
| 165 | /** | 173 | /** |
| 166 | * splice_to_pipe - fill passed data into a pipe | 174 | * splice_to_pipe - fill passed data into a pipe |
| 167 | * @pipe: pipe to fill | 175 | * @pipe: pipe to fill |
| @@ -247,12 +255,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, | |||
| 247 | 255 | ||
| 248 | pipe_unlock(pipe); | 256 | pipe_unlock(pipe); |
| 249 | 257 | ||
| 250 | if (do_wakeup) { | 258 | if (do_wakeup) |
| 251 | smp_mb(); | 259 | wakeup_pipe_readers(pipe); |
| 252 | if (waitqueue_active(&pipe->wait)) | ||
| 253 | wake_up_interruptible(&pipe->wait); | ||
| 254 | kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); | ||
| 255 | } | ||
| 256 | 260 | ||
| 257 | while (page_nr < spd_pages) | 261 | while (page_nr < spd_pages) |
| 258 | spd->spd_release(spd, page_nr++); | 262 | spd->spd_release(spd, page_nr++); |
| @@ -1892,12 +1896,9 @@ retry: | |||
| 1892 | /* | 1896 | /* |
| 1893 | * If we put data in the output pipe, wakeup any potential readers. | 1897 | * If we put data in the output pipe, wakeup any potential readers. |
| 1894 | */ | 1898 | */ |
| 1895 | if (ret > 0) { | 1899 | if (ret > 0) |
| 1896 | smp_mb(); | 1900 | wakeup_pipe_readers(opipe); |
| 1897 | if (waitqueue_active(&opipe->wait)) | 1901 | |
| 1898 | wake_up_interruptible(&opipe->wait); | ||
| 1899 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
| 1900 | } | ||
| 1901 | if (input_wakeup) | 1902 | if (input_wakeup) |
| 1902 | wakeup_pipe_writers(ipipe); | 1903 | wakeup_pipe_writers(ipipe); |
| 1903 | 1904 | ||
| @@ -1976,12 +1977,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, | |||
| 1976 | /* | 1977 | /* |
| 1977 | * If we put data in the output pipe, wakeup any potential readers. | 1978 | * If we put data in the output pipe, wakeup any potential readers. |
| 1978 | */ | 1979 | */ |
| 1979 | if (ret > 0) { | 1980 | if (ret > 0) |
| 1980 | smp_mb(); | 1981 | wakeup_pipe_readers(opipe); |
| 1981 | if (waitqueue_active(&opipe->wait)) | ||
| 1982 | wake_up_interruptible(&opipe->wait); | ||
| 1983 | kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); | ||
| 1984 | } | ||
| 1985 | 1982 | ||
| 1986 | return ret; | 1983 | return ret; |
| 1987 | } | 1984 | } |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 8c4fc1425b3e..f67acbdda5e8 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
| @@ -22,16 +22,24 @@ | |||
| 22 | #include <linux/anon_inodes.h> | 22 | #include <linux/anon_inodes.h> |
| 23 | #include <linux/timerfd.h> | 23 | #include <linux/timerfd.h> |
| 24 | #include <linux/syscalls.h> | 24 | #include <linux/syscalls.h> |
| 25 | #include <linux/rcupdate.h> | ||
| 25 | 26 | ||
| 26 | struct timerfd_ctx { | 27 | struct timerfd_ctx { |
| 27 | struct hrtimer tmr; | 28 | struct hrtimer tmr; |
| 28 | ktime_t tintv; | 29 | ktime_t tintv; |
| 30 | ktime_t moffs; | ||
| 29 | wait_queue_head_t wqh; | 31 | wait_queue_head_t wqh; |
| 30 | u64 ticks; | 32 | u64 ticks; |
| 31 | int expired; | 33 | int expired; |
| 32 | int clockid; | 34 | int clockid; |
| 35 | struct rcu_head rcu; | ||
| 36 | struct list_head clist; | ||
| 37 | bool might_cancel; | ||
| 33 | }; | 38 | }; |
| 34 | 39 | ||
| 40 | static LIST_HEAD(cancel_list); | ||
| 41 | static DEFINE_SPINLOCK(cancel_lock); | ||
| 42 | |||
| 35 | /* | 43 | /* |
| 36 | * This gets called when the timer event triggers. We set the "expired" | 44 | * This gets called when the timer event triggers. We set the "expired" |
| 37 | * flag, but we do not re-arm the timer (in case it's necessary, | 45 | * flag, but we do not re-arm the timer (in case it's necessary, |
| @@ -51,6 +59,63 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
| 51 | return HRTIMER_NORESTART; | 59 | return HRTIMER_NORESTART; |
| 52 | } | 60 | } |
| 53 | 61 | ||
| 62 | /* | ||
| 63 | * Called when the clock was set to cancel the timers in the cancel | ||
| 64 | * list. | ||
| 65 | */ | ||
| 66 | void timerfd_clock_was_set(void) | ||
| 67 | { | ||
| 68 | ktime_t moffs = ktime_get_monotonic_offset(); | ||
| 69 | struct timerfd_ctx *ctx; | ||
| 70 | unsigned long flags; | ||
| 71 | |||
| 72 | rcu_read_lock(); | ||
| 73 | list_for_each_entry_rcu(ctx, &cancel_list, clist) { | ||
| 74 | if (!ctx->might_cancel) | ||
| 75 | continue; | ||
| 76 | spin_lock_irqsave(&ctx->wqh.lock, flags); | ||
| 77 | if (ctx->moffs.tv64 != moffs.tv64) { | ||
| 78 | ctx->moffs.tv64 = KTIME_MAX; | ||
| 79 | wake_up_locked(&ctx->wqh); | ||
| 80 | } | ||
| 81 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | ||
| 82 | } | ||
| 83 | rcu_read_unlock(); | ||
| 84 | } | ||
| 85 | |||
| 86 | static void timerfd_remove_cancel(struct timerfd_ctx *ctx) | ||
| 87 | { | ||
| 88 | if (ctx->might_cancel) { | ||
| 89 | ctx->might_cancel = false; | ||
| 90 | spin_lock(&cancel_lock); | ||
| 91 | list_del_rcu(&ctx->clist); | ||
| 92 | spin_unlock(&cancel_lock); | ||
| 93 | } | ||
| 94 | } | ||
| 95 | |||
| 96 | static bool timerfd_canceled(struct timerfd_ctx *ctx) | ||
| 97 | { | ||
| 98 | if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) | ||
| 99 | return false; | ||
| 100 | ctx->moffs = ktime_get_monotonic_offset(); | ||
| 101 | return true; | ||
| 102 | } | ||
| 103 | |||
| 104 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) | ||
| 105 | { | ||
| 106 | if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && | ||
| 107 | (flags & TFD_TIMER_CANCEL_ON_SET)) { | ||
| 108 | if (!ctx->might_cancel) { | ||
| 109 | ctx->might_cancel = true; | ||
| 110 | spin_lock(&cancel_lock); | ||
| 111 | list_add_rcu(&ctx->clist, &cancel_list); | ||
| 112 | spin_unlock(&cancel_lock); | ||
| 113 | } | ||
| 114 | } else if (ctx->might_cancel) { | ||
| 115 | timerfd_remove_cancel(ctx); | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 54 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | 119 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
| 55 | { | 120 | { |
| 56 | ktime_t remaining; | 121 | ktime_t remaining; |
| @@ -59,11 +124,12 @@ static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | |||
| 59 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 124 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
| 60 | } | 125 | } |
| 61 | 126 | ||
| 62 | static void timerfd_setup(struct timerfd_ctx *ctx, int flags, | 127 | static int timerfd_setup(struct timerfd_ctx *ctx, int flags, |
| 63 | const struct itimerspec *ktmr) | 128 | const struct itimerspec *ktmr) |
| 64 | { | 129 | { |
| 65 | enum hrtimer_mode htmode; | 130 | enum hrtimer_mode htmode; |
| 66 | ktime_t texp; | 131 | ktime_t texp; |
| 132 | int clockid = ctx->clockid; | ||
| 67 | 133 | ||
| 68 | htmode = (flags & TFD_TIMER_ABSTIME) ? | 134 | htmode = (flags & TFD_TIMER_ABSTIME) ? |
| 69 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; | 135 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; |
| @@ -72,19 +138,24 @@ static void timerfd_setup(struct timerfd_ctx *ctx, int flags, | |||
| 72 | ctx->expired = 0; | 138 | ctx->expired = 0; |
| 73 | ctx->ticks = 0; | 139 | ctx->ticks = 0; |
| 74 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); | 140 | ctx->tintv = timespec_to_ktime(ktmr->it_interval); |
| 75 | hrtimer_init(&ctx->tmr, ctx->clockid, htmode); | 141 | hrtimer_init(&ctx->tmr, clockid, htmode); |
| 76 | hrtimer_set_expires(&ctx->tmr, texp); | 142 | hrtimer_set_expires(&ctx->tmr, texp); |
| 77 | ctx->tmr.function = timerfd_tmrproc; | 143 | ctx->tmr.function = timerfd_tmrproc; |
| 78 | if (texp.tv64 != 0) | 144 | if (texp.tv64 != 0) { |
| 79 | hrtimer_start(&ctx->tmr, texp, htmode); | 145 | hrtimer_start(&ctx->tmr, texp, htmode); |
| 146 | if (timerfd_canceled(ctx)) | ||
| 147 | return -ECANCELED; | ||
| 148 | } | ||
| 149 | return 0; | ||
| 80 | } | 150 | } |
| 81 | 151 | ||
| 82 | static int timerfd_release(struct inode *inode, struct file *file) | 152 | static int timerfd_release(struct inode *inode, struct file *file) |
| 83 | { | 153 | { |
| 84 | struct timerfd_ctx *ctx = file->private_data; | 154 | struct timerfd_ctx *ctx = file->private_data; |
| 85 | 155 | ||
| 156 | timerfd_remove_cancel(ctx); | ||
| 86 | hrtimer_cancel(&ctx->tmr); | 157 | hrtimer_cancel(&ctx->tmr); |
| 87 | kfree(ctx); | 158 | kfree_rcu(ctx, rcu); |
| 88 | return 0; | 159 | return 0; |
| 89 | } | 160 | } |
| 90 | 161 | ||
| @@ -118,8 +189,21 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
| 118 | res = -EAGAIN; | 189 | res = -EAGAIN; |
| 119 | else | 190 | else |
| 120 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); | 191 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
| 192 | |||
| 193 | /* | ||
| 194 | * If clock has changed, we do not care about the | ||
| 195 | * ticks and we do not rearm the timer. Userspace must | ||
| 196 | * reevaluate anyway. | ||
| 197 | */ | ||
| 198 | if (timerfd_canceled(ctx)) { | ||
| 199 | ctx->ticks = 0; | ||
| 200 | ctx->expired = 0; | ||
| 201 | res = -ECANCELED; | ||
| 202 | } | ||
| 203 | |||
| 121 | if (ctx->ticks) { | 204 | if (ctx->ticks) { |
| 122 | ticks = ctx->ticks; | 205 | ticks = ctx->ticks; |
| 206 | |||
| 123 | if (ctx->expired && ctx->tintv.tv64) { | 207 | if (ctx->expired && ctx->tintv.tv64) { |
| 124 | /* | 208 | /* |
| 125 | * If tintv.tv64 != 0, this is a periodic timer that | 209 | * If tintv.tv64 != 0, this is a periodic timer that |
| @@ -183,6 +267,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
| 183 | init_waitqueue_head(&ctx->wqh); | 267 | init_waitqueue_head(&ctx->wqh); |
| 184 | ctx->clockid = clockid; | 268 | ctx->clockid = clockid; |
| 185 | hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); | 269 | hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS); |
| 270 | ctx->moffs = ktime_get_monotonic_offset(); | ||
| 186 | 271 | ||
| 187 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, | 272 | ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx, |
| 188 | O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); | 273 | O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); |
| @@ -199,6 +284,7 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
| 199 | struct file *file; | 284 | struct file *file; |
| 200 | struct timerfd_ctx *ctx; | 285 | struct timerfd_ctx *ctx; |
| 201 | struct itimerspec ktmr, kotmr; | 286 | struct itimerspec ktmr, kotmr; |
| 287 | int ret; | ||
| 202 | 288 | ||
| 203 | if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) | 289 | if (copy_from_user(&ktmr, utmr, sizeof(ktmr))) |
| 204 | return -EFAULT; | 290 | return -EFAULT; |
| @@ -213,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
| 213 | return PTR_ERR(file); | 299 | return PTR_ERR(file); |
| 214 | ctx = file->private_data; | 300 | ctx = file->private_data; |
| 215 | 301 | ||
| 302 | timerfd_setup_cancel(ctx, flags); | ||
| 303 | |||
| 216 | /* | 304 | /* |
| 217 | * We need to stop the existing timer before reprogramming | 305 | * We need to stop the existing timer before reprogramming |
| 218 | * it to the new values. | 306 | * it to the new values. |
| @@ -240,14 +328,14 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
| 240 | /* | 328 | /* |
| 241 | * Re-program the timer to the new value ... | 329 | * Re-program the timer to the new value ... |
| 242 | */ | 330 | */ |
| 243 | timerfd_setup(ctx, flags, &ktmr); | 331 | ret = timerfd_setup(ctx, flags, &ktmr); |
| 244 | 332 | ||
| 245 | spin_unlock_irq(&ctx->wqh.lock); | 333 | spin_unlock_irq(&ctx->wqh.lock); |
| 246 | fput(file); | 334 | fput(file); |
| 247 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) | 335 | if (otmr && copy_to_user(otmr, &kotmr, sizeof(kotmr))) |
| 248 | return -EFAULT; | 336 | return -EFAULT; |
| 249 | 337 | ||
| 250 | return 0; | 338 | return ret; |
| 251 | } | 339 | } |
| 252 | 340 | ||
| 253 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) | 341 | SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct itimerspec __user *, otmr) |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 8b3a7da531eb..315de66e52b2 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
| @@ -106,7 +106,7 @@ static long long get_liability(struct ubifs_info *c) | |||
| 106 | long long liab; | 106 | long long liab; |
| 107 | 107 | ||
| 108 | spin_lock(&c->space_lock); | 108 | spin_lock(&c->space_lock); |
| 109 | liab = c->budg_idx_growth + c->budg_data_growth + c->budg_dd_growth; | 109 | liab = c->bi.idx_growth + c->bi.data_growth + c->bi.dd_growth; |
| 110 | spin_unlock(&c->space_lock); | 110 | spin_unlock(&c->space_lock); |
| 111 | return liab; | 111 | return liab; |
| 112 | } | 112 | } |
| @@ -180,7 +180,7 @@ int ubifs_calc_min_idx_lebs(struct ubifs_info *c) | |||
| 180 | int idx_lebs; | 180 | int idx_lebs; |
| 181 | long long idx_size; | 181 | long long idx_size; |
| 182 | 182 | ||
| 183 | idx_size = c->old_idx_sz + c->budg_idx_growth + c->budg_uncommitted_idx; | 183 | idx_size = c->bi.old_idx_sz + c->bi.idx_growth + c->bi.uncommitted_idx; |
| 184 | /* And make sure we have thrice the index size of space reserved */ | 184 | /* And make sure we have thrice the index size of space reserved */ |
| 185 | idx_size += idx_size << 1; | 185 | idx_size += idx_size << 1; |
| 186 | /* | 186 | /* |
| @@ -292,13 +292,13 @@ static int can_use_rp(struct ubifs_info *c) | |||
| 292 | * budgeted index space to the size of the current index, multiplies this by 3, | 292 | * budgeted index space to the size of the current index, multiplies this by 3, |
| 293 | * and makes sure this does not exceed the amount of free LEBs. | 293 | * and makes sure this does not exceed the amount of free LEBs. |
| 294 | * | 294 | * |
| 295 | * Notes about @c->min_idx_lebs and @c->lst.idx_lebs variables: | 295 | * Notes about @c->bi.min_idx_lebs and @c->lst.idx_lebs variables: |
| 296 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might | 296 | * o @c->lst.idx_lebs is the number of LEBs the index currently uses. It might |
| 297 | * be large, because UBIFS does not do any index consolidation as long as | 297 | * be large, because UBIFS does not do any index consolidation as long as |
| 298 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs | 298 | * there is free space. IOW, the index may take a lot of LEBs, but the LEBs |
| 299 | * will contain a lot of dirt. | 299 | * will contain a lot of dirt. |
| 300 | * o @c->min_idx_lebs is the number of LEBS the index presumably takes. IOW, | 300 | * o @c->bi.min_idx_lebs is the number of LEBS the index presumably takes. IOW, |
| 301 | * the index may be consolidated to take up to @c->min_idx_lebs LEBs. | 301 | * the index may be consolidated to take up to @c->bi.min_idx_lebs LEBs. |
| 302 | * | 302 | * |
| 303 | * This function returns zero in case of success, and %-ENOSPC in case of | 303 | * This function returns zero in case of success, and %-ENOSPC in case of |
| 304 | * failure. | 304 | * failure. |
| @@ -343,13 +343,13 @@ static int do_budget_space(struct ubifs_info *c) | |||
| 343 | c->lst.taken_empty_lebs; | 343 | c->lst.taken_empty_lebs; |
| 344 | if (unlikely(rsvd_idx_lebs > lebs)) { | 344 | if (unlikely(rsvd_idx_lebs > lebs)) { |
| 345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " | 345 | dbg_budg("out of indexing space: min_idx_lebs %d (old %d), " |
| 346 | "rsvd_idx_lebs %d", min_idx_lebs, c->min_idx_lebs, | 346 | "rsvd_idx_lebs %d", min_idx_lebs, c->bi.min_idx_lebs, |
| 347 | rsvd_idx_lebs); | 347 | rsvd_idx_lebs); |
| 348 | return -ENOSPC; | 348 | return -ENOSPC; |
| 349 | } | 349 | } |
| 350 | 350 | ||
| 351 | available = ubifs_calc_available(c, min_idx_lebs); | 351 | available = ubifs_calc_available(c, min_idx_lebs); |
| 352 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 352 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 353 | 353 | ||
| 354 | if (unlikely(available < outstanding)) { | 354 | if (unlikely(available < outstanding)) { |
| 355 | dbg_budg("out of data space: available %lld, outstanding %lld", | 355 | dbg_budg("out of data space: available %lld, outstanding %lld", |
| @@ -360,7 +360,7 @@ static int do_budget_space(struct ubifs_info *c) | |||
| 360 | if (available - outstanding <= c->rp_size && !can_use_rp(c)) | 360 | if (available - outstanding <= c->rp_size && !can_use_rp(c)) |
| 361 | return -ENOSPC; | 361 | return -ENOSPC; |
| 362 | 362 | ||
| 363 | c->min_idx_lebs = min_idx_lebs; | 363 | c->bi.min_idx_lebs = min_idx_lebs; |
| 364 | return 0; | 364 | return 0; |
| 365 | } | 365 | } |
| 366 | 366 | ||
| @@ -393,11 +393,11 @@ static int calc_data_growth(const struct ubifs_info *c, | |||
| 393 | { | 393 | { |
| 394 | int data_growth; | 394 | int data_growth; |
| 395 | 395 | ||
| 396 | data_growth = req->new_ino ? c->inode_budget : 0; | 396 | data_growth = req->new_ino ? c->bi.inode_budget : 0; |
| 397 | if (req->new_page) | 397 | if (req->new_page) |
| 398 | data_growth += c->page_budget; | 398 | data_growth += c->bi.page_budget; |
| 399 | if (req->new_dent) | 399 | if (req->new_dent) |
| 400 | data_growth += c->dent_budget; | 400 | data_growth += c->bi.dent_budget; |
| 401 | data_growth += req->new_ino_d; | 401 | data_growth += req->new_ino_d; |
| 402 | return data_growth; | 402 | return data_growth; |
| 403 | } | 403 | } |
| @@ -413,12 +413,12 @@ static int calc_dd_growth(const struct ubifs_info *c, | |||
| 413 | { | 413 | { |
| 414 | int dd_growth; | 414 | int dd_growth; |
| 415 | 415 | ||
| 416 | dd_growth = req->dirtied_page ? c->page_budget : 0; | 416 | dd_growth = req->dirtied_page ? c->bi.page_budget : 0; |
| 417 | 417 | ||
| 418 | if (req->dirtied_ino) | 418 | if (req->dirtied_ino) |
| 419 | dd_growth += c->inode_budget << (req->dirtied_ino - 1); | 419 | dd_growth += c->bi.inode_budget << (req->dirtied_ino - 1); |
| 420 | if (req->mod_dent) | 420 | if (req->mod_dent) |
| 421 | dd_growth += c->dent_budget; | 421 | dd_growth += c->bi.dent_budget; |
| 422 | dd_growth += req->dirtied_ino_d; | 422 | dd_growth += req->dirtied_ino_d; |
| 423 | return dd_growth; | 423 | return dd_growth; |
| 424 | } | 424 | } |
| @@ -460,19 +460,19 @@ int ubifs_budget_space(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
| 460 | 460 | ||
| 461 | again: | 461 | again: |
| 462 | spin_lock(&c->space_lock); | 462 | spin_lock(&c->space_lock); |
| 463 | ubifs_assert(c->budg_idx_growth >= 0); | 463 | ubifs_assert(c->bi.idx_growth >= 0); |
| 464 | ubifs_assert(c->budg_data_growth >= 0); | 464 | ubifs_assert(c->bi.data_growth >= 0); |
| 465 | ubifs_assert(c->budg_dd_growth >= 0); | 465 | ubifs_assert(c->bi.dd_growth >= 0); |
| 466 | 466 | ||
| 467 | if (unlikely(c->nospace) && (c->nospace_rp || !can_use_rp(c))) { | 467 | if (unlikely(c->bi.nospace) && (c->bi.nospace_rp || !can_use_rp(c))) { |
| 468 | dbg_budg("no space"); | 468 | dbg_budg("no space"); |
| 469 | spin_unlock(&c->space_lock); | 469 | spin_unlock(&c->space_lock); |
| 470 | return -ENOSPC; | 470 | return -ENOSPC; |
| 471 | } | 471 | } |
| 472 | 472 | ||
| 473 | c->budg_idx_growth += idx_growth; | 473 | c->bi.idx_growth += idx_growth; |
| 474 | c->budg_data_growth += data_growth; | 474 | c->bi.data_growth += data_growth; |
| 475 | c->budg_dd_growth += dd_growth; | 475 | c->bi.dd_growth += dd_growth; |
| 476 | 476 | ||
| 477 | err = do_budget_space(c); | 477 | err = do_budget_space(c); |
| 478 | if (likely(!err)) { | 478 | if (likely(!err)) { |
| @@ -484,9 +484,9 @@ again: | |||
| 484 | } | 484 | } |
| 485 | 485 | ||
| 486 | /* Restore the old values */ | 486 | /* Restore the old values */ |
| 487 | c->budg_idx_growth -= idx_growth; | 487 | c->bi.idx_growth -= idx_growth; |
| 488 | c->budg_data_growth -= data_growth; | 488 | c->bi.data_growth -= data_growth; |
| 489 | c->budg_dd_growth -= dd_growth; | 489 | c->bi.dd_growth -= dd_growth; |
| 490 | spin_unlock(&c->space_lock); | 490 | spin_unlock(&c->space_lock); |
| 491 | 491 | ||
| 492 | if (req->fast) { | 492 | if (req->fast) { |
| @@ -506,9 +506,9 @@ again: | |||
| 506 | goto again; | 506 | goto again; |
| 507 | } | 507 | } |
| 508 | dbg_budg("FS is full, -ENOSPC"); | 508 | dbg_budg("FS is full, -ENOSPC"); |
| 509 | c->nospace = 1; | 509 | c->bi.nospace = 1; |
| 510 | if (can_use_rp(c) || c->rp_size == 0) | 510 | if (can_use_rp(c) || c->rp_size == 0) |
| 511 | c->nospace_rp = 1; | 511 | c->bi.nospace_rp = 1; |
| 512 | smp_wmb(); | 512 | smp_wmb(); |
| 513 | } else | 513 | } else |
| 514 | ubifs_err("cannot budget space, error %d", err); | 514 | ubifs_err("cannot budget space, error %d", err); |
| @@ -523,8 +523,8 @@ again: | |||
| 523 | * This function releases the space budgeted by 'ubifs_budget_space()'. Note, | 523 | * This function releases the space budgeted by 'ubifs_budget_space()'. Note, |
| 524 | * since the index changes (which were budgeted for in @req->idx_growth) will | 524 | * since the index changes (which were budgeted for in @req->idx_growth) will |
| 525 | * only be written to the media on commit, this function moves the index budget | 525 | * only be written to the media on commit, this function moves the index budget |
| 526 | * from @c->budg_idx_growth to @c->budg_uncommitted_idx. The latter will be | 526 | * from @c->bi.idx_growth to @c->bi.uncommitted_idx. The latter will be zeroed |
| 527 | * zeroed by the commit operation. | 527 | * by the commit operation. |
| 528 | */ | 528 | */ |
| 529 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | 529 | void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) |
| 530 | { | 530 | { |
| @@ -553,23 +553,23 @@ void ubifs_release_budget(struct ubifs_info *c, struct ubifs_budget_req *req) | |||
| 553 | if (!req->data_growth && !req->dd_growth) | 553 | if (!req->data_growth && !req->dd_growth) |
| 554 | return; | 554 | return; |
| 555 | 555 | ||
| 556 | c->nospace = c->nospace_rp = 0; | 556 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 557 | smp_wmb(); | 557 | smp_wmb(); |
| 558 | 558 | ||
| 559 | spin_lock(&c->space_lock); | 559 | spin_lock(&c->space_lock); |
| 560 | c->budg_idx_growth -= req->idx_growth; | 560 | c->bi.idx_growth -= req->idx_growth; |
| 561 | c->budg_uncommitted_idx += req->idx_growth; | 561 | c->bi.uncommitted_idx += req->idx_growth; |
| 562 | c->budg_data_growth -= req->data_growth; | 562 | c->bi.data_growth -= req->data_growth; |
| 563 | c->budg_dd_growth -= req->dd_growth; | 563 | c->bi.dd_growth -= req->dd_growth; |
| 564 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 564 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 565 | 565 | ||
| 566 | ubifs_assert(c->budg_idx_growth >= 0); | 566 | ubifs_assert(c->bi.idx_growth >= 0); |
| 567 | ubifs_assert(c->budg_data_growth >= 0); | 567 | ubifs_assert(c->bi.data_growth >= 0); |
| 568 | ubifs_assert(c->budg_dd_growth >= 0); | 568 | ubifs_assert(c->bi.dd_growth >= 0); |
| 569 | ubifs_assert(c->min_idx_lebs < c->main_lebs); | 569 | ubifs_assert(c->bi.min_idx_lebs < c->main_lebs); |
| 570 | ubifs_assert(!(c->budg_idx_growth & 7)); | 570 | ubifs_assert(!(c->bi.idx_growth & 7)); |
| 571 | ubifs_assert(!(c->budg_data_growth & 7)); | 571 | ubifs_assert(!(c->bi.data_growth & 7)); |
| 572 | ubifs_assert(!(c->budg_dd_growth & 7)); | 572 | ubifs_assert(!(c->bi.dd_growth & 7)); |
| 573 | spin_unlock(&c->space_lock); | 573 | spin_unlock(&c->space_lock); |
| 574 | } | 574 | } |
| 575 | 575 | ||
| @@ -586,13 +586,13 @@ void ubifs_convert_page_budget(struct ubifs_info *c) | |||
| 586 | { | 586 | { |
| 587 | spin_lock(&c->space_lock); | 587 | spin_lock(&c->space_lock); |
| 588 | /* Release the index growth reservation */ | 588 | /* Release the index growth reservation */ |
| 589 | c->budg_idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; | 589 | c->bi.idx_growth -= c->max_idx_node_sz << UBIFS_BLOCKS_PER_PAGE_SHIFT; |
| 590 | /* Release the data growth reservation */ | 590 | /* Release the data growth reservation */ |
| 591 | c->budg_data_growth -= c->page_budget; | 591 | c->bi.data_growth -= c->bi.page_budget; |
| 592 | /* Increase the dirty data growth reservation instead */ | 592 | /* Increase the dirty data growth reservation instead */ |
| 593 | c->budg_dd_growth += c->page_budget; | 593 | c->bi.dd_growth += c->bi.page_budget; |
| 594 | /* And re-calculate the indexing space reservation */ | 594 | /* And re-calculate the indexing space reservation */ |
| 595 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 595 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 596 | spin_unlock(&c->space_lock); | 596 | spin_unlock(&c->space_lock); |
| 597 | } | 597 | } |
| 598 | 598 | ||
| @@ -612,7 +612,7 @@ void ubifs_release_dirty_inode_budget(struct ubifs_info *c, | |||
| 612 | 612 | ||
| 613 | memset(&req, 0, sizeof(struct ubifs_budget_req)); | 613 | memset(&req, 0, sizeof(struct ubifs_budget_req)); |
| 614 | /* The "no space" flags will be cleared because dd_growth is > 0 */ | 614 | /* The "no space" flags will be cleared because dd_growth is > 0 */ |
| 615 | req.dd_growth = c->inode_budget + ALIGN(ui->data_len, 8); | 615 | req.dd_growth = c->bi.inode_budget + ALIGN(ui->data_len, 8); |
| 616 | ubifs_release_budget(c, &req); | 616 | ubifs_release_budget(c, &req); |
| 617 | } | 617 | } |
| 618 | 618 | ||
| @@ -682,9 +682,9 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) | |||
| 682 | int rsvd_idx_lebs, lebs; | 682 | int rsvd_idx_lebs, lebs; |
| 683 | long long available, outstanding, free; | 683 | long long available, outstanding, free; |
| 684 | 684 | ||
| 685 | ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); | 685 | ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); |
| 686 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 686 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 687 | available = ubifs_calc_available(c, c->min_idx_lebs); | 687 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
| 688 | 688 | ||
| 689 | /* | 689 | /* |
| 690 | * When reporting free space to user-space, UBIFS guarantees that it is | 690 | * When reporting free space to user-space, UBIFS guarantees that it is |
| @@ -697,8 +697,8 @@ long long ubifs_get_free_space_nolock(struct ubifs_info *c) | |||
| 697 | * Note, the calculations below are similar to what we have in | 697 | * Note, the calculations below are similar to what we have in |
| 698 | * 'do_budget_space()', so refer there for comments. | 698 | * 'do_budget_space()', so refer there for comments. |
| 699 | */ | 699 | */ |
| 700 | if (c->min_idx_lebs > c->lst.idx_lebs) | 700 | if (c->bi.min_idx_lebs > c->lst.idx_lebs) |
| 701 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 701 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 702 | else | 702 | else |
| 703 | rsvd_idx_lebs = 0; | 703 | rsvd_idx_lebs = 0; |
| 704 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 704 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index 1bd01ded7123..87cd0ead8633 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c | |||
| @@ -182,7 +182,7 @@ static int do_commit(struct ubifs_info *c) | |||
| 182 | c->mst_node->root_len = cpu_to_le32(zroot.len); | 182 | c->mst_node->root_len = cpu_to_le32(zroot.len); |
| 183 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); | 183 | c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); |
| 184 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); | 184 | c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); |
| 185 | c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); | 185 | c->mst_node->index_size = cpu_to_le64(c->bi.old_idx_sz); |
| 186 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); | 186 | c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); |
| 187 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); | 187 | c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); |
| 188 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); | 188 | c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 004d3745dc45..0bb2bcef0de9 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
| @@ -34,7 +34,6 @@ | |||
| 34 | #include <linux/moduleparam.h> | 34 | #include <linux/moduleparam.h> |
| 35 | #include <linux/debugfs.h> | 35 | #include <linux/debugfs.h> |
| 36 | #include <linux/math64.h> | 36 | #include <linux/math64.h> |
| 37 | #include <linux/slab.h> | ||
| 38 | 37 | ||
| 39 | #ifdef CONFIG_UBIFS_FS_DEBUG | 38 | #ifdef CONFIG_UBIFS_FS_DEBUG |
| 40 | 39 | ||
| @@ -43,15 +42,12 @@ DEFINE_SPINLOCK(dbg_lock); | |||
| 43 | static char dbg_key_buf0[128]; | 42 | static char dbg_key_buf0[128]; |
| 44 | static char dbg_key_buf1[128]; | 43 | static char dbg_key_buf1[128]; |
| 45 | 44 | ||
| 46 | unsigned int ubifs_msg_flags; | ||
| 47 | unsigned int ubifs_chk_flags; | 45 | unsigned int ubifs_chk_flags; |
| 48 | unsigned int ubifs_tst_flags; | 46 | unsigned int ubifs_tst_flags; |
| 49 | 47 | ||
| 50 | module_param_named(debug_msgs, ubifs_msg_flags, uint, S_IRUGO | S_IWUSR); | ||
| 51 | module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); | 48 | module_param_named(debug_chks, ubifs_chk_flags, uint, S_IRUGO | S_IWUSR); |
| 52 | module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); | 49 | module_param_named(debug_tsts, ubifs_tst_flags, uint, S_IRUGO | S_IWUSR); |
| 53 | 50 | ||
| 54 | MODULE_PARM_DESC(debug_msgs, "Debug message type flags"); | ||
| 55 | MODULE_PARM_DESC(debug_chks, "Debug check flags"); | 51 | MODULE_PARM_DESC(debug_chks, "Debug check flags"); |
| 56 | MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); | 52 | MODULE_PARM_DESC(debug_tsts, "Debug special test flags"); |
| 57 | 53 | ||
| @@ -317,6 +313,8 @@ void dbg_dump_node(const struct ubifs_info *c, const void *node) | |||
| 317 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); | 313 | printk(KERN_DEBUG "\tflags %#x\n", sup_flags); |
| 318 | printk(KERN_DEBUG "\t big_lpt %u\n", | 314 | printk(KERN_DEBUG "\t big_lpt %u\n", |
| 319 | !!(sup_flags & UBIFS_FLG_BIGLPT)); | 315 | !!(sup_flags & UBIFS_FLG_BIGLPT)); |
| 316 | printk(KERN_DEBUG "\t space_fixup %u\n", | ||
| 317 | !!(sup_flags & UBIFS_FLG_SPACE_FIXUP)); | ||
| 320 | printk(KERN_DEBUG "\tmin_io_size %u\n", | 318 | printk(KERN_DEBUG "\tmin_io_size %u\n", |
| 321 | le32_to_cpu(sup->min_io_size)); | 319 | le32_to_cpu(sup->min_io_size)); |
| 322 | printk(KERN_DEBUG "\tleb_size %u\n", | 320 | printk(KERN_DEBUG "\tleb_size %u\n", |
| @@ -602,7 +600,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst) | |||
| 602 | spin_unlock(&dbg_lock); | 600 | spin_unlock(&dbg_lock); |
| 603 | } | 601 | } |
| 604 | 602 | ||
| 605 | void dbg_dump_budg(struct ubifs_info *c) | 603 | void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi) |
| 606 | { | 604 | { |
| 607 | int i; | 605 | int i; |
| 608 | struct rb_node *rb; | 606 | struct rb_node *rb; |
| @@ -610,26 +608,42 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
| 610 | struct ubifs_gced_idx_leb *idx_gc; | 608 | struct ubifs_gced_idx_leb *idx_gc; |
| 611 | long long available, outstanding, free; | 609 | long long available, outstanding, free; |
| 612 | 610 | ||
| 613 | ubifs_assert(spin_is_locked(&c->space_lock)); | 611 | spin_lock(&c->space_lock); |
| 614 | spin_lock(&dbg_lock); | 612 | spin_lock(&dbg_lock); |
| 615 | printk(KERN_DEBUG "(pid %d) Budgeting info: budg_data_growth %lld, " | 613 | printk(KERN_DEBUG "(pid %d) Budgeting info: data budget sum %lld, " |
| 616 | "budg_dd_growth %lld, budg_idx_growth %lld\n", current->pid, | 614 | "total budget sum %lld\n", current->pid, |
| 617 | c->budg_data_growth, c->budg_dd_growth, c->budg_idx_growth); | 615 | bi->data_growth + bi->dd_growth, |
| 618 | printk(KERN_DEBUG "\tdata budget sum %lld, total budget sum %lld, " | 616 | bi->data_growth + bi->dd_growth + bi->idx_growth); |
| 619 | "freeable_cnt %d\n", c->budg_data_growth + c->budg_dd_growth, | 617 | printk(KERN_DEBUG "\tbudg_data_growth %lld, budg_dd_growth %lld, " |
| 620 | c->budg_data_growth + c->budg_dd_growth + c->budg_idx_growth, | 618 | "budg_idx_growth %lld\n", bi->data_growth, bi->dd_growth, |
| 621 | c->freeable_cnt); | 619 | bi->idx_growth); |
| 622 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %lld, " | 620 | printk(KERN_DEBUG "\tmin_idx_lebs %d, old_idx_sz %llu, " |
| 623 | "calc_idx_sz %lld, idx_gc_cnt %d\n", c->min_idx_lebs, | 621 | "uncommitted_idx %lld\n", bi->min_idx_lebs, bi->old_idx_sz, |
| 624 | c->old_idx_sz, c->calc_idx_sz, c->idx_gc_cnt); | 622 | bi->uncommitted_idx); |
| 623 | printk(KERN_DEBUG "\tpage_budget %d, inode_budget %d, dent_budget %d\n", | ||
| 624 | bi->page_budget, bi->inode_budget, bi->dent_budget); | ||
| 625 | printk(KERN_DEBUG "\tnospace %u, nospace_rp %u\n", | ||
| 626 | bi->nospace, bi->nospace_rp); | ||
| 627 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
| 628 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | ||
| 629 | |||
| 630 | if (bi != &c->bi) | ||
| 631 | /* | ||
| 632 | * If we are dumping saved budgeting data, do not print | ||
| 633 | * additional information which is about the current state, not | ||
| 634 | * the old one which corresponded to the saved budgeting data. | ||
| 635 | */ | ||
| 636 | goto out_unlock; | ||
| 637 | |||
| 638 | printk(KERN_DEBUG "\tfreeable_cnt %d, calc_idx_sz %lld, idx_gc_cnt %d\n", | ||
| 639 | c->freeable_cnt, c->calc_idx_sz, c->idx_gc_cnt); | ||
| 625 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " | 640 | printk(KERN_DEBUG "\tdirty_pg_cnt %ld, dirty_zn_cnt %ld, " |
| 626 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), | 641 | "clean_zn_cnt %ld\n", atomic_long_read(&c->dirty_pg_cnt), |
| 627 | atomic_long_read(&c->dirty_zn_cnt), | 642 | atomic_long_read(&c->dirty_zn_cnt), |
| 628 | atomic_long_read(&c->clean_zn_cnt)); | 643 | atomic_long_read(&c->clean_zn_cnt)); |
| 629 | printk(KERN_DEBUG "\tdark_wm %d, dead_wm %d, max_idx_node_sz %d\n", | ||
| 630 | c->dark_wm, c->dead_wm, c->max_idx_node_sz); | ||
| 631 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", | 644 | printk(KERN_DEBUG "\tgc_lnum %d, ihead_lnum %d\n", |
| 632 | c->gc_lnum, c->ihead_lnum); | 645 | c->gc_lnum, c->ihead_lnum); |
| 646 | |||
| 633 | /* If we are in R/O mode, journal heads do not exist */ | 647 | /* If we are in R/O mode, journal heads do not exist */ |
| 634 | if (c->jheads) | 648 | if (c->jheads) |
| 635 | for (i = 0; i < c->jhead_cnt; i++) | 649 | for (i = 0; i < c->jhead_cnt; i++) |
| @@ -648,13 +662,15 @@ void dbg_dump_budg(struct ubifs_info *c) | |||
| 648 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); | 662 | printk(KERN_DEBUG "\tcommit state %d\n", c->cmt_state); |
| 649 | 663 | ||
| 650 | /* Print budgeting predictions */ | 664 | /* Print budgeting predictions */ |
| 651 | available = ubifs_calc_available(c, c->min_idx_lebs); | 665 | available = ubifs_calc_available(c, c->bi.min_idx_lebs); |
| 652 | outstanding = c->budg_data_growth + c->budg_dd_growth; | 666 | outstanding = c->bi.data_growth + c->bi.dd_growth; |
| 653 | free = ubifs_get_free_space_nolock(c); | 667 | free = ubifs_get_free_space_nolock(c); |
| 654 | printk(KERN_DEBUG "Budgeting predictions:\n"); | 668 | printk(KERN_DEBUG "Budgeting predictions:\n"); |
| 655 | printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", | 669 | printk(KERN_DEBUG "\tavailable: %lld, outstanding %lld, free %lld\n", |
| 656 | available, outstanding, free); | 670 | available, outstanding, free); |
| 671 | out_unlock: | ||
| 657 | spin_unlock(&dbg_lock); | 672 | spin_unlock(&dbg_lock); |
| 673 | spin_unlock(&c->space_lock); | ||
| 658 | } | 674 | } |
| 659 | 675 | ||
| 660 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | 676 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) |
| @@ -729,7 +745,13 @@ void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp) | |||
| 729 | if (bud->lnum == lp->lnum) { | 745 | if (bud->lnum == lp->lnum) { |
| 730 | int head = 0; | 746 | int head = 0; |
| 731 | for (i = 0; i < c->jhead_cnt; i++) { | 747 | for (i = 0; i < c->jhead_cnt; i++) { |
| 732 | if (lp->lnum == c->jheads[i].wbuf.lnum) { | 748 | /* |
| 749 | * Note, if we are in R/O mode or in the middle | ||
| 750 | * of mounting/re-mounting, the write-buffers do | ||
| 751 | * not exist. | ||
| 752 | */ | ||
| 753 | if (c->jheads && | ||
| 754 | lp->lnum == c->jheads[i].wbuf.lnum) { | ||
| 733 | printk(KERN_CONT ", jhead %s", | 755 | printk(KERN_CONT ", jhead %s", |
| 734 | dbg_jhead(i)); | 756 | dbg_jhead(i)); |
| 735 | head = 1; | 757 | head = 1; |
| @@ -976,6 +998,8 @@ void dbg_save_space_info(struct ubifs_info *c) | |||
| 976 | 998 | ||
| 977 | spin_lock(&c->space_lock); | 999 | spin_lock(&c->space_lock); |
| 978 | memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); | 1000 | memcpy(&d->saved_lst, &c->lst, sizeof(struct ubifs_lp_stats)); |
| 1001 | memcpy(&d->saved_bi, &c->bi, sizeof(struct ubifs_budg_info)); | ||
| 1002 | d->saved_idx_gc_cnt = c->idx_gc_cnt; | ||
| 979 | 1003 | ||
| 980 | /* | 1004 | /* |
| 981 | * We use a dirty hack here and zero out @c->freeable_cnt, because it | 1005 | * We use a dirty hack here and zero out @c->freeable_cnt, because it |
| @@ -1042,14 +1066,14 @@ int dbg_check_space_info(struct ubifs_info *c) | |||
| 1042 | out: | 1066 | out: |
| 1043 | ubifs_msg("saved lprops statistics dump"); | 1067 | ubifs_msg("saved lprops statistics dump"); |
| 1044 | dbg_dump_lstats(&d->saved_lst); | 1068 | dbg_dump_lstats(&d->saved_lst); |
| 1045 | ubifs_get_lp_stats(c, &lst); | 1069 | ubifs_msg("saved budgeting info dump"); |
| 1046 | 1070 | dbg_dump_budg(c, &d->saved_bi); | |
| 1071 | ubifs_msg("saved idx_gc_cnt %d", d->saved_idx_gc_cnt); | ||
| 1047 | ubifs_msg("current lprops statistics dump"); | 1072 | ubifs_msg("current lprops statistics dump"); |
| 1073 | ubifs_get_lp_stats(c, &lst); | ||
| 1048 | dbg_dump_lstats(&lst); | 1074 | dbg_dump_lstats(&lst); |
| 1049 | 1075 | ubifs_msg("current budgeting info dump"); | |
| 1050 | spin_lock(&c->space_lock); | 1076 | dbg_dump_budg(c, &c->bi); |
| 1051 | dbg_dump_budg(c); | ||
| 1052 | spin_unlock(&c->space_lock); | ||
| 1053 | dump_stack(); | 1077 | dump_stack(); |
| 1054 | return -EINVAL; | 1078 | return -EINVAL; |
| 1055 | } | 1079 | } |
| @@ -1793,6 +1817,8 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, | |||
| 1793 | struct rb_node **p, *parent = NULL; | 1817 | struct rb_node **p, *parent = NULL; |
| 1794 | struct fsck_inode *fscki; | 1818 | struct fsck_inode *fscki; |
| 1795 | ino_t inum = key_inum_flash(c, &ino->key); | 1819 | ino_t inum = key_inum_flash(c, &ino->key); |
| 1820 | struct inode *inode; | ||
| 1821 | struct ubifs_inode *ui; | ||
| 1796 | 1822 | ||
| 1797 | p = &fsckd->inodes.rb_node; | 1823 | p = &fsckd->inodes.rb_node; |
| 1798 | while (*p) { | 1824 | while (*p) { |
| @@ -1816,19 +1842,46 @@ static struct fsck_inode *add_inode(struct ubifs_info *c, | |||
| 1816 | if (!fscki) | 1842 | if (!fscki) |
| 1817 | return ERR_PTR(-ENOMEM); | 1843 | return ERR_PTR(-ENOMEM); |
| 1818 | 1844 | ||
| 1845 | inode = ilookup(c->vfs_sb, inum); | ||
| 1846 | |||
| 1819 | fscki->inum = inum; | 1847 | fscki->inum = inum; |
| 1820 | fscki->nlink = le32_to_cpu(ino->nlink); | 1848 | /* |
| 1821 | fscki->size = le64_to_cpu(ino->size); | 1849 | * If the inode is present in the VFS inode cache, use it instead of |
| 1822 | fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | 1850 | * the on-flash inode which might be out-of-date. E.g., the size might |
| 1823 | fscki->xattr_sz = le32_to_cpu(ino->xattr_size); | 1851 | * be out-of-date. If we do not do this, the following may happen, for |
| 1824 | fscki->xattr_nms = le32_to_cpu(ino->xattr_names); | 1852 | * example: |
| 1825 | fscki->mode = le32_to_cpu(ino->mode); | 1853 | * 1. A power cut happens |
| 1854 | * 2. We mount the file-system R/O, the replay process fixes up the | ||
| 1855 | * inode size in the VFS cache, but on on-flash. | ||
| 1856 | * 3. 'check_leaf()' fails because it hits a data node beyond inode | ||
| 1857 | * size. | ||
| 1858 | */ | ||
| 1859 | if (!inode) { | ||
| 1860 | fscki->nlink = le32_to_cpu(ino->nlink); | ||
| 1861 | fscki->size = le64_to_cpu(ino->size); | ||
| 1862 | fscki->xattr_cnt = le32_to_cpu(ino->xattr_cnt); | ||
| 1863 | fscki->xattr_sz = le32_to_cpu(ino->xattr_size); | ||
| 1864 | fscki->xattr_nms = le32_to_cpu(ino->xattr_names); | ||
| 1865 | fscki->mode = le32_to_cpu(ino->mode); | ||
| 1866 | } else { | ||
| 1867 | ui = ubifs_inode(inode); | ||
| 1868 | fscki->nlink = inode->i_nlink; | ||
| 1869 | fscki->size = inode->i_size; | ||
| 1870 | fscki->xattr_cnt = ui->xattr_cnt; | ||
| 1871 | fscki->xattr_sz = ui->xattr_size; | ||
| 1872 | fscki->xattr_nms = ui->xattr_names; | ||
| 1873 | fscki->mode = inode->i_mode; | ||
| 1874 | iput(inode); | ||
| 1875 | } | ||
| 1876 | |||
| 1826 | if (S_ISDIR(fscki->mode)) { | 1877 | if (S_ISDIR(fscki->mode)) { |
| 1827 | fscki->calc_sz = UBIFS_INO_NODE_SZ; | 1878 | fscki->calc_sz = UBIFS_INO_NODE_SZ; |
| 1828 | fscki->calc_cnt = 2; | 1879 | fscki->calc_cnt = 2; |
| 1829 | } | 1880 | } |
| 1881 | |||
| 1830 | rb_link_node(&fscki->rb, parent, p); | 1882 | rb_link_node(&fscki->rb, parent, p); |
| 1831 | rb_insert_color(&fscki->rb, &fsckd->inodes); | 1883 | rb_insert_color(&fscki->rb, &fsckd->inodes); |
| 1884 | |||
| 1832 | return fscki; | 1885 | return fscki; |
| 1833 | } | 1886 | } |
| 1834 | 1887 | ||
| @@ -2421,7 +2474,8 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) | |||
| 2421 | hashb = key_block(c, &sb->key); | 2474 | hashb = key_block(c, &sb->key); |
| 2422 | 2475 | ||
| 2423 | if (hasha > hashb) { | 2476 | if (hasha > hashb) { |
| 2424 | ubifs_err("larger hash %u goes before %u", hasha, hashb); | 2477 | ubifs_err("larger hash %u goes before %u", |
| 2478 | hasha, hashb); | ||
| 2425 | goto error_dump; | 2479 | goto error_dump; |
| 2426 | } | 2480 | } |
| 2427 | } | 2481 | } |
| @@ -2437,14 +2491,12 @@ error_dump: | |||
| 2437 | return 0; | 2491 | return 0; |
| 2438 | } | 2492 | } |
| 2439 | 2493 | ||
| 2440 | static int invocation_cnt; | ||
| 2441 | |||
| 2442 | int dbg_force_in_the_gaps(void) | 2494 | int dbg_force_in_the_gaps(void) |
| 2443 | { | 2495 | { |
| 2444 | if (!dbg_force_in_the_gaps_enabled) | 2496 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) |
| 2445 | return 0; | 2497 | return 0; |
| 2446 | /* Force in-the-gaps every 8th commit */ | 2498 | |
| 2447 | return !((invocation_cnt++) & 0x7); | 2499 | return !(random32() & 7); |
| 2448 | } | 2500 | } |
| 2449 | 2501 | ||
| 2450 | /* Failure mode for recovery testing */ | 2502 | /* Failure mode for recovery testing */ |
| @@ -2632,7 +2684,7 @@ int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | |||
| 2632 | int len, int check) | 2684 | int len, int check) |
| 2633 | { | 2685 | { |
| 2634 | if (in_failure_mode(desc)) | 2686 | if (in_failure_mode(desc)) |
| 2635 | return -EIO; | 2687 | return -EROFS; |
| 2636 | return ubi_leb_read(desc, lnum, buf, offset, len, check); | 2688 | return ubi_leb_read(desc, lnum, buf, offset, len, check); |
| 2637 | } | 2689 | } |
| 2638 | 2690 | ||
| @@ -2642,7 +2694,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2642 | int err, failing; | 2694 | int err, failing; |
| 2643 | 2695 | ||
| 2644 | if (in_failure_mode(desc)) | 2696 | if (in_failure_mode(desc)) |
| 2645 | return -EIO; | 2697 | return -EROFS; |
| 2646 | failing = do_fail(desc, lnum, 1); | 2698 | failing = do_fail(desc, lnum, 1); |
| 2647 | if (failing) | 2699 | if (failing) |
| 2648 | cut_data(buf, len); | 2700 | cut_data(buf, len); |
| @@ -2650,7 +2702,7 @@ int dbg_leb_write(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2650 | if (err) | 2702 | if (err) |
| 2651 | return err; | 2703 | return err; |
| 2652 | if (failing) | 2704 | if (failing) |
| 2653 | return -EIO; | 2705 | return -EROFS; |
| 2654 | return 0; | 2706 | return 0; |
| 2655 | } | 2707 | } |
| 2656 | 2708 | ||
| @@ -2660,12 +2712,12 @@ int dbg_leb_change(struct ubi_volume_desc *desc, int lnum, const void *buf, | |||
| 2660 | int err; | 2712 | int err; |
| 2661 | 2713 | ||
| 2662 | if (do_fail(desc, lnum, 1)) | 2714 | if (do_fail(desc, lnum, 1)) |
| 2663 | return -EIO; | 2715 | return -EROFS; |
| 2664 | err = ubi_leb_change(desc, lnum, buf, len, dtype); | 2716 | err = ubi_leb_change(desc, lnum, buf, len, dtype); |
| 2665 | if (err) | 2717 | if (err) |
| 2666 | return err; | 2718 | return err; |
| 2667 | if (do_fail(desc, lnum, 1)) | 2719 | if (do_fail(desc, lnum, 1)) |
| 2668 | return -EIO; | 2720 | return -EROFS; |
| 2669 | return 0; | 2721 | return 0; |
| 2670 | } | 2722 | } |
| 2671 | 2723 | ||
| @@ -2674,12 +2726,12 @@ int dbg_leb_erase(struct ubi_volume_desc *desc, int lnum) | |||
| 2674 | int err; | 2726 | int err; |
| 2675 | 2727 | ||
| 2676 | if (do_fail(desc, lnum, 0)) | 2728 | if (do_fail(desc, lnum, 0)) |
| 2677 | return -EIO; | 2729 | return -EROFS; |
| 2678 | err = ubi_leb_erase(desc, lnum); | 2730 | err = ubi_leb_erase(desc, lnum); |
| 2679 | if (err) | 2731 | if (err) |
| 2680 | return err; | 2732 | return err; |
| 2681 | if (do_fail(desc, lnum, 0)) | 2733 | if (do_fail(desc, lnum, 0)) |
| 2682 | return -EIO; | 2734 | return -EROFS; |
| 2683 | return 0; | 2735 | return 0; |
| 2684 | } | 2736 | } |
| 2685 | 2737 | ||
| @@ -2688,19 +2740,19 @@ int dbg_leb_unmap(struct ubi_volume_desc *desc, int lnum) | |||
| 2688 | int err; | 2740 | int err; |
| 2689 | 2741 | ||
| 2690 | if (do_fail(desc, lnum, 0)) | 2742 | if (do_fail(desc, lnum, 0)) |
| 2691 | return -EIO; | 2743 | return -EROFS; |
| 2692 | err = ubi_leb_unmap(desc, lnum); | 2744 | err = ubi_leb_unmap(desc, lnum); |
| 2693 | if (err) | 2745 | if (err) |
| 2694 | return err; | 2746 | return err; |
| 2695 | if (do_fail(desc, lnum, 0)) | 2747 | if (do_fail(desc, lnum, 0)) |
| 2696 | return -EIO; | 2748 | return -EROFS; |
| 2697 | return 0; | 2749 | return 0; |
| 2698 | } | 2750 | } |
| 2699 | 2751 | ||
| 2700 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) | 2752 | int dbg_is_mapped(struct ubi_volume_desc *desc, int lnum) |
| 2701 | { | 2753 | { |
| 2702 | if (in_failure_mode(desc)) | 2754 | if (in_failure_mode(desc)) |
| 2703 | return -EIO; | 2755 | return -EROFS; |
| 2704 | return ubi_is_mapped(desc, lnum); | 2756 | return ubi_is_mapped(desc, lnum); |
| 2705 | } | 2757 | } |
| 2706 | 2758 | ||
| @@ -2709,12 +2761,12 @@ int dbg_leb_map(struct ubi_volume_desc *desc, int lnum, int dtype) | |||
| 2709 | int err; | 2761 | int err; |
| 2710 | 2762 | ||
| 2711 | if (do_fail(desc, lnum, 0)) | 2763 | if (do_fail(desc, lnum, 0)) |
| 2712 | return -EIO; | 2764 | return -EROFS; |
| 2713 | err = ubi_leb_map(desc, lnum, dtype); | 2765 | err = ubi_leb_map(desc, lnum, dtype); |
| 2714 | if (err) | 2766 | if (err) |
| 2715 | return err; | 2767 | return err; |
| 2716 | if (do_fail(desc, lnum, 0)) | 2768 | if (do_fail(desc, lnum, 0)) |
| 2717 | return -EIO; | 2769 | return -EROFS; |
| 2718 | return 0; | 2770 | return 0; |
| 2719 | } | 2771 | } |
| 2720 | 2772 | ||
| @@ -2784,7 +2836,7 @@ void dbg_debugfs_exit(void) | |||
| 2784 | static int open_debugfs_file(struct inode *inode, struct file *file) | 2836 | static int open_debugfs_file(struct inode *inode, struct file *file) |
| 2785 | { | 2837 | { |
| 2786 | file->private_data = inode->i_private; | 2838 | file->private_data = inode->i_private; |
| 2787 | return 0; | 2839 | return nonseekable_open(inode, file); |
| 2788 | } | 2840 | } |
| 2789 | 2841 | ||
| 2790 | static ssize_t write_debugfs_file(struct file *file, const char __user *buf, | 2842 | static ssize_t write_debugfs_file(struct file *file, const char __user *buf, |
| @@ -2795,18 +2847,15 @@ static ssize_t write_debugfs_file(struct file *file, const char __user *buf, | |||
| 2795 | 2847 | ||
| 2796 | if (file->f_path.dentry == d->dfs_dump_lprops) | 2848 | if (file->f_path.dentry == d->dfs_dump_lprops) |
| 2797 | dbg_dump_lprops(c); | 2849 | dbg_dump_lprops(c); |
| 2798 | else if (file->f_path.dentry == d->dfs_dump_budg) { | 2850 | else if (file->f_path.dentry == d->dfs_dump_budg) |
| 2799 | spin_lock(&c->space_lock); | 2851 | dbg_dump_budg(c, &c->bi); |
| 2800 | dbg_dump_budg(c); | 2852 | else if (file->f_path.dentry == d->dfs_dump_tnc) { |
| 2801 | spin_unlock(&c->space_lock); | ||
| 2802 | } else if (file->f_path.dentry == d->dfs_dump_tnc) { | ||
| 2803 | mutex_lock(&c->tnc_mutex); | 2853 | mutex_lock(&c->tnc_mutex); |
| 2804 | dbg_dump_tnc(c); | 2854 | dbg_dump_tnc(c); |
| 2805 | mutex_unlock(&c->tnc_mutex); | 2855 | mutex_unlock(&c->tnc_mutex); |
| 2806 | } else | 2856 | } else |
| 2807 | return -EINVAL; | 2857 | return -EINVAL; |
| 2808 | 2858 | ||
| 2809 | *ppos += count; | ||
| 2810 | return count; | 2859 | return count; |
| 2811 | } | 2860 | } |
| 2812 | 2861 | ||
| @@ -2814,7 +2863,7 @@ static const struct file_operations dfs_fops = { | |||
| 2814 | .open = open_debugfs_file, | 2863 | .open = open_debugfs_file, |
| 2815 | .write = write_debugfs_file, | 2864 | .write = write_debugfs_file, |
| 2816 | .owner = THIS_MODULE, | 2865 | .owner = THIS_MODULE, |
| 2817 | .llseek = default_llseek, | 2866 | .llseek = no_llseek, |
| 2818 | }; | 2867 | }; |
| 2819 | 2868 | ||
| 2820 | /** | 2869 | /** |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index e6493cac193d..a811ac4a26bb 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
| @@ -31,6 +31,8 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, | |||
| 31 | 31 | ||
| 32 | #ifdef CONFIG_UBIFS_FS_DEBUG | 32 | #ifdef CONFIG_UBIFS_FS_DEBUG |
| 33 | 33 | ||
| 34 | #include <linux/random.h> | ||
| 35 | |||
| 34 | /** | 36 | /** |
| 35 | * ubifs_debug_info - per-FS debugging information. | 37 | * ubifs_debug_info - per-FS debugging information. |
| 36 | * @old_zroot: old index root - used by 'dbg_check_old_index()' | 38 | * @old_zroot: old index root - used by 'dbg_check_old_index()' |
| @@ -50,13 +52,15 @@ typedef int (*dbg_znode_callback)(struct ubifs_info *c, | |||
| 50 | * @new_ihead_offs: used by debugging to check @c->ihead_offs | 52 | * @new_ihead_offs: used by debugging to check @c->ihead_offs |
| 51 | * | 53 | * |
| 52 | * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') | 54 | * @saved_lst: saved lprops statistics (used by 'dbg_save_space_info()') |
| 53 | * @saved_free: saved free space (used by 'dbg_save_space_info()') | 55 | * @saved_bi: saved budgeting information |
| 56 | * @saved_free: saved amount of free space | ||
| 57 | * @saved_idx_gc_cnt: saved value of @c->idx_gc_cnt | ||
| 54 | * | 58 | * |
| 55 | * dfs_dir_name: name of debugfs directory containing this file-system's files | 59 | * @dfs_dir_name: name of debugfs directory containing this file-system's files |
| 56 | * dfs_dir: direntry object of the file-system debugfs directory | 60 | * @dfs_dir: direntry object of the file-system debugfs directory |
| 57 | * dfs_dump_lprops: "dump lprops" debugfs knob | 61 | * @dfs_dump_lprops: "dump lprops" debugfs knob |
| 58 | * dfs_dump_budg: "dump budgeting information" debugfs knob | 62 | * @dfs_dump_budg: "dump budgeting information" debugfs knob |
| 59 | * dfs_dump_tnc: "dump TNC" debugfs knob | 63 | * @dfs_dump_tnc: "dump TNC" debugfs knob |
| 60 | */ | 64 | */ |
| 61 | struct ubifs_debug_info { | 65 | struct ubifs_debug_info { |
| 62 | struct ubifs_zbranch old_zroot; | 66 | struct ubifs_zbranch old_zroot; |
| @@ -76,7 +80,9 @@ struct ubifs_debug_info { | |||
| 76 | int new_ihead_offs; | 80 | int new_ihead_offs; |
| 77 | 81 | ||
| 78 | struct ubifs_lp_stats saved_lst; | 82 | struct ubifs_lp_stats saved_lst; |
| 83 | struct ubifs_budg_info saved_bi; | ||
| 79 | long long saved_free; | 84 | long long saved_free; |
| 85 | int saved_idx_gc_cnt; | ||
| 80 | 86 | ||
| 81 | char dfs_dir_name[100]; | 87 | char dfs_dir_name[100]; |
| 82 | struct dentry *dfs_dir; | 88 | struct dentry *dfs_dir; |
| @@ -101,23 +107,7 @@ struct ubifs_debug_info { | |||
| 101 | } \ | 107 | } \ |
| 102 | } while (0) | 108 | } while (0) |
| 103 | 109 | ||
| 104 | #define dbg_dump_stack() do { \ | 110 | #define dbg_dump_stack() dump_stack() |
| 105 | if (!dbg_failure_mode) \ | ||
| 106 | dump_stack(); \ | ||
| 107 | } while (0) | ||
| 108 | |||
| 109 | /* Generic debugging messages */ | ||
| 110 | #define dbg_msg(fmt, ...) do { \ | ||
| 111 | spin_lock(&dbg_lock); \ | ||
| 112 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", current->pid, \ | ||
| 113 | __func__, ##__VA_ARGS__); \ | ||
| 114 | spin_unlock(&dbg_lock); \ | ||
| 115 | } while (0) | ||
| 116 | |||
| 117 | #define dbg_do_msg(typ, fmt, ...) do { \ | ||
| 118 | if (ubifs_msg_flags & typ) \ | ||
| 119 | dbg_msg(fmt, ##__VA_ARGS__); \ | ||
| 120 | } while (0) | ||
| 121 | 111 | ||
| 122 | #define dbg_err(fmt, ...) do { \ | 112 | #define dbg_err(fmt, ...) do { \ |
| 123 | spin_lock(&dbg_lock); \ | 113 | spin_lock(&dbg_lock); \ |
| @@ -137,77 +127,40 @@ const char *dbg_key_str1(const struct ubifs_info *c, | |||
| 137 | #define DBGKEY(key) dbg_key_str0(c, (key)) | 127 | #define DBGKEY(key) dbg_key_str0(c, (key)) |
| 138 | #define DBGKEY1(key) dbg_key_str1(c, (key)) | 128 | #define DBGKEY1(key) dbg_key_str1(c, (key)) |
| 139 | 129 | ||
| 140 | /* General messages */ | 130 | #define ubifs_dbg_msg(type, fmt, ...) do { \ |
| 141 | #define dbg_gen(fmt, ...) dbg_do_msg(UBIFS_MSG_GEN, fmt, ##__VA_ARGS__) | 131 | spin_lock(&dbg_lock); \ |
| 132 | pr_debug("UBIFS DBG " type ": " fmt "\n", ##__VA_ARGS__); \ | ||
| 133 | spin_unlock(&dbg_lock); \ | ||
| 134 | } while (0) | ||
| 142 | 135 | ||
| 136 | /* Just a debugging messages not related to any specific UBIFS subsystem */ | ||
| 137 | #define dbg_msg(fmt, ...) ubifs_dbg_msg("msg", fmt, ##__VA_ARGS__) | ||
| 138 | /* General messages */ | ||
| 139 | #define dbg_gen(fmt, ...) ubifs_dbg_msg("gen", fmt, ##__VA_ARGS__) | ||
| 143 | /* Additional journal messages */ | 140 | /* Additional journal messages */ |
| 144 | #define dbg_jnl(fmt, ...) dbg_do_msg(UBIFS_MSG_JNL, fmt, ##__VA_ARGS__) | 141 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg("jnl", fmt, ##__VA_ARGS__) |
| 145 | |||
| 146 | /* Additional TNC messages */ | 142 | /* Additional TNC messages */ |
| 147 | #define dbg_tnc(fmt, ...) dbg_do_msg(UBIFS_MSG_TNC, fmt, ##__VA_ARGS__) | 143 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg("tnc", fmt, ##__VA_ARGS__) |
| 148 | |||
| 149 | /* Additional lprops messages */ | 144 | /* Additional lprops messages */ |
| 150 | #define dbg_lp(fmt, ...) dbg_do_msg(UBIFS_MSG_LP, fmt, ##__VA_ARGS__) | 145 | #define dbg_lp(fmt, ...) ubifs_dbg_msg("lp", fmt, ##__VA_ARGS__) |
| 151 | |||
| 152 | /* Additional LEB find messages */ | 146 | /* Additional LEB find messages */ |
| 153 | #define dbg_find(fmt, ...) dbg_do_msg(UBIFS_MSG_FIND, fmt, ##__VA_ARGS__) | 147 | #define dbg_find(fmt, ...) ubifs_dbg_msg("find", fmt, ##__VA_ARGS__) |
| 154 | |||
| 155 | /* Additional mount messages */ | 148 | /* Additional mount messages */ |
| 156 | #define dbg_mnt(fmt, ...) dbg_do_msg(UBIFS_MSG_MNT, fmt, ##__VA_ARGS__) | 149 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg("mnt", fmt, ##__VA_ARGS__) |
| 157 | |||
| 158 | /* Additional I/O messages */ | 150 | /* Additional I/O messages */ |
| 159 | #define dbg_io(fmt, ...) dbg_do_msg(UBIFS_MSG_IO, fmt, ##__VA_ARGS__) | 151 | #define dbg_io(fmt, ...) ubifs_dbg_msg("io", fmt, ##__VA_ARGS__) |
| 160 | |||
| 161 | /* Additional commit messages */ | 152 | /* Additional commit messages */ |
| 162 | #define dbg_cmt(fmt, ...) dbg_do_msg(UBIFS_MSG_CMT, fmt, ##__VA_ARGS__) | 153 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg("cmt", fmt, ##__VA_ARGS__) |
| 163 | |||
| 164 | /* Additional budgeting messages */ | 154 | /* Additional budgeting messages */ |
| 165 | #define dbg_budg(fmt, ...) dbg_do_msg(UBIFS_MSG_BUDG, fmt, ##__VA_ARGS__) | 155 | #define dbg_budg(fmt, ...) ubifs_dbg_msg("budg", fmt, ##__VA_ARGS__) |
| 166 | |||
| 167 | /* Additional log messages */ | 156 | /* Additional log messages */ |
| 168 | #define dbg_log(fmt, ...) dbg_do_msg(UBIFS_MSG_LOG, fmt, ##__VA_ARGS__) | 157 | #define dbg_log(fmt, ...) ubifs_dbg_msg("log", fmt, ##__VA_ARGS__) |
| 169 | |||
| 170 | /* Additional gc messages */ | 158 | /* Additional gc messages */ |
| 171 | #define dbg_gc(fmt, ...) dbg_do_msg(UBIFS_MSG_GC, fmt, ##__VA_ARGS__) | 159 | #define dbg_gc(fmt, ...) ubifs_dbg_msg("gc", fmt, ##__VA_ARGS__) |
| 172 | |||
| 173 | /* Additional scan messages */ | 160 | /* Additional scan messages */ |
| 174 | #define dbg_scan(fmt, ...) dbg_do_msg(UBIFS_MSG_SCAN, fmt, ##__VA_ARGS__) | 161 | #define dbg_scan(fmt, ...) ubifs_dbg_msg("scan", fmt, ##__VA_ARGS__) |
| 175 | |||
| 176 | /* Additional recovery messages */ | 162 | /* Additional recovery messages */ |
| 177 | #define dbg_rcvry(fmt, ...) dbg_do_msg(UBIFS_MSG_RCVRY, fmt, ##__VA_ARGS__) | 163 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg("rcvry", fmt, ##__VA_ARGS__) |
| 178 | |||
| 179 | /* | ||
| 180 | * Debugging message type flags. | ||
| 181 | * | ||
| 182 | * UBIFS_MSG_GEN: general messages | ||
| 183 | * UBIFS_MSG_JNL: journal messages | ||
| 184 | * UBIFS_MSG_MNT: mount messages | ||
| 185 | * UBIFS_MSG_CMT: commit messages | ||
| 186 | * UBIFS_MSG_FIND: LEB find messages | ||
| 187 | * UBIFS_MSG_BUDG: budgeting messages | ||
| 188 | * UBIFS_MSG_GC: garbage collection messages | ||
| 189 | * UBIFS_MSG_TNC: TNC messages | ||
| 190 | * UBIFS_MSG_LP: lprops messages | ||
| 191 | * UBIFS_MSG_IO: I/O messages | ||
| 192 | * UBIFS_MSG_LOG: log messages | ||
| 193 | * UBIFS_MSG_SCAN: scan messages | ||
| 194 | * UBIFS_MSG_RCVRY: recovery messages | ||
| 195 | */ | ||
| 196 | enum { | ||
| 197 | UBIFS_MSG_GEN = 0x1, | ||
| 198 | UBIFS_MSG_JNL = 0x2, | ||
| 199 | UBIFS_MSG_MNT = 0x4, | ||
| 200 | UBIFS_MSG_CMT = 0x8, | ||
| 201 | UBIFS_MSG_FIND = 0x10, | ||
| 202 | UBIFS_MSG_BUDG = 0x20, | ||
| 203 | UBIFS_MSG_GC = 0x40, | ||
| 204 | UBIFS_MSG_TNC = 0x80, | ||
| 205 | UBIFS_MSG_LP = 0x100, | ||
| 206 | UBIFS_MSG_IO = 0x200, | ||
| 207 | UBIFS_MSG_LOG = 0x400, | ||
| 208 | UBIFS_MSG_SCAN = 0x800, | ||
| 209 | UBIFS_MSG_RCVRY = 0x1000, | ||
| 210 | }; | ||
| 211 | 164 | ||
| 212 | /* | 165 | /* |
| 213 | * Debugging check flags. | 166 | * Debugging check flags. |
| @@ -233,11 +186,9 @@ enum { | |||
| 233 | /* | 186 | /* |
| 234 | * Special testing flags. | 187 | * Special testing flags. |
| 235 | * | 188 | * |
| 236 | * UBIFS_TST_FORCE_IN_THE_GAPS: force the use of in-the-gaps method | ||
| 237 | * UBIFS_TST_RCVRY: failure mode for recovery testing | 189 | * UBIFS_TST_RCVRY: failure mode for recovery testing |
| 238 | */ | 190 | */ |
| 239 | enum { | 191 | enum { |
| 240 | UBIFS_TST_FORCE_IN_THE_GAPS = 0x2, | ||
| 241 | UBIFS_TST_RCVRY = 0x4, | 192 | UBIFS_TST_RCVRY = 0x4, |
| 242 | }; | 193 | }; |
| 243 | 194 | ||
| @@ -262,7 +213,7 @@ void dbg_dump_lpt_node(const struct ubifs_info *c, void *node, int lnum, | |||
| 262 | int offs); | 213 | int offs); |
| 263 | void dbg_dump_budget_req(const struct ubifs_budget_req *req); | 214 | void dbg_dump_budget_req(const struct ubifs_budget_req *req); |
| 264 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst); | 215 | void dbg_dump_lstats(const struct ubifs_lp_stats *lst); |
| 265 | void dbg_dump_budg(struct ubifs_info *c); | 216 | void dbg_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); |
| 266 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | 217 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); |
| 267 | void dbg_dump_lprops(struct ubifs_info *c); | 218 | void dbg_dump_lprops(struct ubifs_info *c); |
| 268 | void dbg_dump_lpt_info(struct ubifs_info *c); | 219 | void dbg_dump_lpt_info(struct ubifs_info *c); |
| @@ -304,18 +255,16 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head); | |||
| 304 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); | 255 | int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head); |
| 305 | 256 | ||
| 306 | /* Force the use of in-the-gaps method for testing */ | 257 | /* Force the use of in-the-gaps method for testing */ |
| 307 | 258 | static inline int dbg_force_in_the_gaps_enabled(void) | |
| 308 | #define dbg_force_in_the_gaps_enabled \ | 259 | { |
| 309 | (ubifs_tst_flags & UBIFS_TST_FORCE_IN_THE_GAPS) | 260 | return ubifs_chk_flags & UBIFS_CHK_GEN; |
| 310 | 261 | } | |
| 311 | int dbg_force_in_the_gaps(void); | 262 | int dbg_force_in_the_gaps(void); |
| 312 | 263 | ||
| 313 | /* Failure mode for recovery testing */ | 264 | /* Failure mode for recovery testing */ |
| 314 | |||
| 315 | #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) | 265 | #define dbg_failure_mode (ubifs_tst_flags & UBIFS_TST_RCVRY) |
| 316 | 266 | ||
| 317 | #ifndef UBIFS_DBG_PRESERVE_UBI | 267 | #ifndef UBIFS_DBG_PRESERVE_UBI |
| 318 | |||
| 319 | #define ubi_leb_read dbg_leb_read | 268 | #define ubi_leb_read dbg_leb_read |
| 320 | #define ubi_leb_write dbg_leb_write | 269 | #define ubi_leb_write dbg_leb_write |
| 321 | #define ubi_leb_change dbg_leb_change | 270 | #define ubi_leb_change dbg_leb_change |
| @@ -323,7 +272,6 @@ int dbg_force_in_the_gaps(void); | |||
| 323 | #define ubi_leb_unmap dbg_leb_unmap | 272 | #define ubi_leb_unmap dbg_leb_unmap |
| 324 | #define ubi_is_mapped dbg_is_mapped | 273 | #define ubi_is_mapped dbg_is_mapped |
| 325 | #define ubi_leb_map dbg_leb_map | 274 | #define ubi_leb_map dbg_leb_map |
| 326 | |||
| 327 | #endif | 275 | #endif |
| 328 | 276 | ||
| 329 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, | 277 | int dbg_leb_read(struct ubi_volume_desc *desc, int lnum, char *buf, int offset, |
| @@ -370,33 +318,33 @@ void dbg_debugfs_exit_fs(struct ubifs_info *c); | |||
| 370 | __func__, __LINE__, current->pid); \ | 318 | __func__, __LINE__, current->pid); \ |
| 371 | } while (0) | 319 | } while (0) |
| 372 | 320 | ||
| 373 | #define dbg_err(fmt, ...) do { \ | 321 | #define dbg_err(fmt, ...) do { \ |
| 374 | if (0) \ | 322 | if (0) \ |
| 375 | ubifs_err(fmt, ##__VA_ARGS__); \ | 323 | ubifs_err(fmt, ##__VA_ARGS__); \ |
| 376 | } while (0) | 324 | } while (0) |
| 377 | 325 | ||
| 378 | #define dbg_msg(fmt, ...) do { \ | 326 | #define ubifs_dbg_msg(fmt, ...) do { \ |
| 379 | if (0) \ | 327 | if (0) \ |
| 380 | printk(KERN_DEBUG "UBIFS DBG (pid %d): %s: " fmt "\n", \ | 328 | pr_debug(fmt "\n", ##__VA_ARGS__); \ |
| 381 | current->pid, __func__, ##__VA_ARGS__); \ | ||
| 382 | } while (0) | 329 | } while (0) |
| 383 | 330 | ||
| 384 | #define dbg_dump_stack() | 331 | #define dbg_dump_stack() |
| 385 | #define ubifs_assert_cmt_locked(c) | 332 | #define ubifs_assert_cmt_locked(c) |
| 386 | 333 | ||
| 387 | #define dbg_gen(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 334 | #define dbg_msg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 388 | #define dbg_jnl(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 335 | #define dbg_gen(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 389 | #define dbg_tnc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 336 | #define dbg_jnl(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 390 | #define dbg_lp(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 337 | #define dbg_tnc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 391 | #define dbg_find(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 338 | #define dbg_lp(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 392 | #define dbg_mnt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 339 | #define dbg_find(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 393 | #define dbg_io(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 340 | #define dbg_mnt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 394 | #define dbg_cmt(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 341 | #define dbg_io(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 395 | #define dbg_budg(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 342 | #define dbg_cmt(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 396 | #define dbg_log(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 343 | #define dbg_budg(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 397 | #define dbg_gc(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 344 | #define dbg_log(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 398 | #define dbg_scan(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 345 | #define dbg_gc(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 399 | #define dbg_rcvry(fmt, ...) dbg_msg(fmt, ##__VA_ARGS__) | 346 | #define dbg_scan(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) |
| 347 | #define dbg_rcvry(fmt, ...) ubifs_dbg_msg(fmt, ##__VA_ARGS__) | ||
| 400 | 348 | ||
| 401 | #define DBGKEY(key) ((char *)(key)) | 349 | #define DBGKEY(key) ((char *)(key)) |
| 402 | #define DBGKEY1(key) ((char *)(key)) | 350 | #define DBGKEY1(key) ((char *)(key)) |
| @@ -420,7 +368,9 @@ static inline void | |||
| 420 | dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } | 368 | dbg_dump_budget_req(const struct ubifs_budget_req *req) { return; } |
| 421 | static inline void | 369 | static inline void |
| 422 | dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } | 370 | dbg_dump_lstats(const struct ubifs_lp_stats *lst) { return; } |
| 423 | static inline void dbg_dump_budg(struct ubifs_info *c) { return; } | 371 | static inline void |
| 372 | dbg_dump_budg(struct ubifs_info *c, | ||
| 373 | const struct ubifs_budg_info *bi) { return; } | ||
| 424 | static inline void dbg_dump_lprop(const struct ubifs_info *c, | 374 | static inline void dbg_dump_lprop(const struct ubifs_info *c, |
| 425 | const struct ubifs_lprops *lp) { return; } | 375 | const struct ubifs_lprops *lp) { return; } |
| 426 | static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } | 376 | static inline void dbg_dump_lprops(struct ubifs_info *c) { return; } |
| @@ -482,8 +432,8 @@ dbg_check_nondata_nodes_order(struct ubifs_info *c, | |||
| 482 | struct list_head *head) { return 0; } | 432 | struct list_head *head) { return 0; } |
| 483 | 433 | ||
| 484 | static inline int dbg_force_in_the_gaps(void) { return 0; } | 434 | static inline int dbg_force_in_the_gaps(void) { return 0; } |
| 485 | #define dbg_force_in_the_gaps_enabled 0 | 435 | #define dbg_force_in_the_gaps_enabled() 0 |
| 486 | #define dbg_failure_mode 0 | 436 | #define dbg_failure_mode 0 |
| 487 | 437 | ||
| 488 | static inline int dbg_debugfs_init(void) { return 0; } | 438 | static inline int dbg_debugfs_init(void) { return 0; } |
| 489 | static inline void dbg_debugfs_exit(void) { return; } | 439 | static inline void dbg_debugfs_exit(void) { return; } |
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7217d67a80a6..ef5abd38f0bf 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
| @@ -603,7 +603,7 @@ static int ubifs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 603 | ubifs_release_budget(c, &req); | 603 | ubifs_release_budget(c, &req); |
| 604 | else { | 604 | else { |
| 605 | /* We've deleted something - clean the "no space" flags */ | 605 | /* We've deleted something - clean the "no space" flags */ |
| 606 | c->nospace = c->nospace_rp = 0; | 606 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 607 | smp_wmb(); | 607 | smp_wmb(); |
| 608 | } | 608 | } |
| 609 | return 0; | 609 | return 0; |
| @@ -693,7 +693,7 @@ static int ubifs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 693 | ubifs_release_budget(c, &req); | 693 | ubifs_release_budget(c, &req); |
| 694 | else { | 694 | else { |
| 695 | /* We've deleted something - clean the "no space" flags */ | 695 | /* We've deleted something - clean the "no space" flags */ |
| 696 | c->nospace = c->nospace_rp = 0; | 696 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 697 | smp_wmb(); | 697 | smp_wmb(); |
| 698 | } | 698 | } |
| 699 | return 0; | 699 | return 0; |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b286db79c686..5e7fccfc4b29 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -212,7 +212,7 @@ static void release_new_page_budget(struct ubifs_info *c) | |||
| 212 | */ | 212 | */ |
| 213 | static void release_existing_page_budget(struct ubifs_info *c) | 213 | static void release_existing_page_budget(struct ubifs_info *c) |
| 214 | { | 214 | { |
| 215 | struct ubifs_budget_req req = { .dd_growth = c->page_budget}; | 215 | struct ubifs_budget_req req = { .dd_growth = c->bi.page_budget}; |
| 216 | 216 | ||
| 217 | ubifs_release_budget(c, &req); | 217 | ubifs_release_budget(c, &req); |
| 218 | } | 218 | } |
| @@ -971,11 +971,11 @@ static int do_writepage(struct page *page, int len) | |||
| 971 | * the page locked, and it locks @ui_mutex. However, write-back does take inode | 971 | * the page locked, and it locks @ui_mutex. However, write-back does take inode |
| 972 | * @i_mutex, which means other VFS operations may be run on this inode at the | 972 | * @i_mutex, which means other VFS operations may be run on this inode at the |
| 973 | * same time. And the problematic one is truncation to smaller size, from where | 973 | * same time. And the problematic one is truncation to smaller size, from where |
| 974 | * we have to call 'truncate_setsize()', which first changes @inode->i_size, then | 974 | * we have to call 'truncate_setsize()', which first changes @inode->i_size, |
| 975 | * drops the truncated pages. And while dropping the pages, it takes the page | 975 | * then drops the truncated pages. And while dropping the pages, it takes the |
| 976 | * lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' with | 976 | * page lock. This means that 'do_truncation()' cannot call 'truncate_setsize()' |
| 977 | * @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. This | 977 | * with @ui_mutex locked, because it would deadlock with 'ubifs_writepage()'. |
| 978 | * means that @inode->i_size is changed while @ui_mutex is unlocked. | 978 | * This means that @inode->i_size is changed while @ui_mutex is unlocked. |
| 979 | * | 979 | * |
| 980 | * XXX(truncate): with the new truncate sequence this is not true anymore, | 980 | * XXX(truncate): with the new truncate sequence this is not true anymore, |
| 981 | * and the calls to truncate_setsize can be move around freely. They should | 981 | * and the calls to truncate_setsize can be move around freely. They should |
| @@ -1189,7 +1189,7 @@ out_budg: | |||
| 1189 | if (budgeted) | 1189 | if (budgeted) |
| 1190 | ubifs_release_budget(c, &req); | 1190 | ubifs_release_budget(c, &req); |
| 1191 | else { | 1191 | else { |
| 1192 | c->nospace = c->nospace_rp = 0; | 1192 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 1193 | smp_wmb(); | 1193 | smp_wmb(); |
| 1194 | } | 1194 | } |
| 1195 | return err; | 1195 | return err; |
| @@ -1312,7 +1312,11 @@ int ubifs_fsync(struct file *file, int datasync) | |||
| 1312 | 1312 | ||
| 1313 | dbg_gen("syncing inode %lu", inode->i_ino); | 1313 | dbg_gen("syncing inode %lu", inode->i_ino); |
| 1314 | 1314 | ||
| 1315 | if (inode->i_sb->s_flags & MS_RDONLY) | 1315 | if (c->ro_mount) |
| 1316 | /* | ||
| 1317 | * For some really strange reasons VFS does not filter out | ||
| 1318 | * 'fsync()' for R/O mounted file-systems as per 2.6.39. | ||
| 1319 | */ | ||
| 1316 | return 0; | 1320 | return 0; |
| 1317 | 1321 | ||
| 1318 | /* | 1322 | /* |
| @@ -1432,10 +1436,11 @@ static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags) | |||
| 1432 | } | 1436 | } |
| 1433 | 1437 | ||
| 1434 | /* | 1438 | /* |
| 1435 | * mmap()d file has taken write protection fault and is being made | 1439 | * mmap()d file has taken write protection fault and is being made writable. |
| 1436 | * writable. UBIFS must ensure page is budgeted for. | 1440 | * UBIFS must ensure page is budgeted for. |
| 1437 | */ | 1441 | */ |
| 1438 | static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | 1442 | static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma, |
| 1443 | struct vm_fault *vmf) | ||
| 1439 | { | 1444 | { |
| 1440 | struct page *page = vmf->page; | 1445 | struct page *page = vmf->page; |
| 1441 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 1446 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
| @@ -1536,7 +1541,6 @@ static int ubifs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 1536 | { | 1541 | { |
| 1537 | int err; | 1542 | int err; |
| 1538 | 1543 | ||
| 1539 | /* 'generic_file_mmap()' takes care of NOMMU case */ | ||
| 1540 | err = generic_file_mmap(file, vma); | 1544 | err = generic_file_mmap(file, vma); |
| 1541 | if (err) | 1545 | if (err) |
| 1542 | return err; | 1546 | return err; |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 1d54383d1269..2559d174e004 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
| @@ -252,8 +252,8 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 252 | * But if the index takes fewer LEBs than it is reserved for it, | 252 | * But if the index takes fewer LEBs than it is reserved for it, |
| 253 | * this function must avoid picking those reserved LEBs. | 253 | * this function must avoid picking those reserved LEBs. |
| 254 | */ | 254 | */ |
| 255 | if (c->min_idx_lebs >= c->lst.idx_lebs) { | 255 | if (c->bi.min_idx_lebs >= c->lst.idx_lebs) { |
| 256 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 256 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 257 | exclude_index = 1; | 257 | exclude_index = 1; |
| 258 | } | 258 | } |
| 259 | spin_unlock(&c->space_lock); | 259 | spin_unlock(&c->space_lock); |
| @@ -276,7 +276,7 @@ int ubifs_find_dirty_leb(struct ubifs_info *c, struct ubifs_lprops *ret_lp, | |||
| 276 | pick_free = 0; | 276 | pick_free = 0; |
| 277 | } else { | 277 | } else { |
| 278 | spin_lock(&c->space_lock); | 278 | spin_lock(&c->space_lock); |
| 279 | exclude_index = (c->min_idx_lebs >= c->lst.idx_lebs); | 279 | exclude_index = (c->bi.min_idx_lebs >= c->lst.idx_lebs); |
| 280 | spin_unlock(&c->space_lock); | 280 | spin_unlock(&c->space_lock); |
| 281 | } | 281 | } |
| 282 | 282 | ||
| @@ -501,8 +501,8 @@ int ubifs_find_free_space(struct ubifs_info *c, int min_space, int *offs, | |||
| 501 | 501 | ||
| 502 | /* Check if there are enough empty LEBs for commit */ | 502 | /* Check if there are enough empty LEBs for commit */ |
| 503 | spin_lock(&c->space_lock); | 503 | spin_lock(&c->space_lock); |
| 504 | if (c->min_idx_lebs > c->lst.idx_lebs) | 504 | if (c->bi.min_idx_lebs > c->lst.idx_lebs) |
| 505 | rsvd_idx_lebs = c->min_idx_lebs - c->lst.idx_lebs; | 505 | rsvd_idx_lebs = c->bi.min_idx_lebs - c->lst.idx_lebs; |
| 506 | else | 506 | else |
| 507 | rsvd_idx_lebs = 0; | 507 | rsvd_idx_lebs = 0; |
| 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 508 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 151f10882820..ded29f6224c2 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
| @@ -100,6 +100,10 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 100 | if (err) | 100 | if (err) |
| 101 | return err; | 101 | return err; |
| 102 | 102 | ||
| 103 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 104 | if (err) | ||
| 105 | return err; | ||
| 106 | |||
| 103 | err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); | 107 | err = ubifs_add_bud_to_log(c, GCHD, gc_lnum, 0); |
| 104 | if (err) | 108 | if (err) |
| 105 | return err; | 109 | return err; |
| @@ -118,7 +122,7 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 118 | * This function compares data nodes @a and @b. Returns %1 if @a has greater | 122 | * This function compares data nodes @a and @b. Returns %1 if @a has greater |
| 119 | * inode or block number, and %-1 otherwise. | 123 | * inode or block number, and %-1 otherwise. |
| 120 | */ | 124 | */ |
| 121 | int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | 125 | static int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) |
| 122 | { | 126 | { |
| 123 | ino_t inuma, inumb; | 127 | ino_t inuma, inumb; |
| 124 | struct ubifs_info *c = priv; | 128 | struct ubifs_info *c = priv; |
| @@ -161,7 +165,8 @@ int data_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
| 161 | * first and sorted by length in descending order. Directory entry nodes go | 165 | * first and sorted by length in descending order. Directory entry nodes go |
| 162 | * after inode nodes and are sorted in ascending hash valuer order. | 166 | * after inode nodes and are sorted in ascending hash valuer order. |
| 163 | */ | 167 | */ |
| 164 | int nondata_nodes_cmp(void *priv, struct list_head *a, struct list_head *b) | 168 | static int nondata_nodes_cmp(void *priv, struct list_head *a, |
| 169 | struct list_head *b) | ||
| 165 | { | 170 | { |
| 166 | ino_t inuma, inumb; | 171 | ino_t inuma, inumb; |
| 167 | struct ubifs_info *c = priv; | 172 | struct ubifs_info *c = priv; |
| @@ -473,6 +478,37 @@ int ubifs_garbage_collect_leb(struct ubifs_info *c, struct ubifs_lprops *lp) | |||
| 473 | ubifs_assert(c->gc_lnum != lnum); | 478 | ubifs_assert(c->gc_lnum != lnum); |
| 474 | ubifs_assert(wbuf->lnum != lnum); | 479 | ubifs_assert(wbuf->lnum != lnum); |
| 475 | 480 | ||
| 481 | if (lp->free + lp->dirty == c->leb_size) { | ||
| 482 | /* Special case - a free LEB */ | ||
| 483 | dbg_gc("LEB %d is free, return it", lp->lnum); | ||
| 484 | ubifs_assert(!(lp->flags & LPROPS_INDEX)); | ||
| 485 | |||
| 486 | if (lp->free != c->leb_size) { | ||
| 487 | /* | ||
| 488 | * Write buffers must be sync'd before unmapping | ||
| 489 | * freeable LEBs, because one of them may contain data | ||
| 490 | * which obsoletes something in 'lp->pnum'. | ||
| 491 | */ | ||
| 492 | err = gc_sync_wbufs(c); | ||
| 493 | if (err) | ||
| 494 | return err; | ||
| 495 | err = ubifs_change_one_lp(c, lp->lnum, c->leb_size, | ||
| 496 | 0, 0, 0, 0); | ||
| 497 | if (err) | ||
| 498 | return err; | ||
| 499 | } | ||
| 500 | err = ubifs_leb_unmap(c, lp->lnum); | ||
| 501 | if (err) | ||
| 502 | return err; | ||
| 503 | |||
| 504 | if (c->gc_lnum == -1) { | ||
| 505 | c->gc_lnum = lnum; | ||
| 506 | return LEB_RETAINED; | ||
| 507 | } | ||
| 508 | |||
| 509 | return LEB_FREED; | ||
| 510 | } | ||
| 511 | |||
| 476 | /* | 512 | /* |
| 477 | * We scan the entire LEB even though we only really need to scan up to | 513 | * We scan the entire LEB even though we only really need to scan up to |
| 478 | * (c->leb_size - lp->free). | 514 | * (c->leb_size - lp->free). |
| @@ -682,37 +718,6 @@ int ubifs_garbage_collect(struct ubifs_info *c, int anyway) | |||
| 682 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, | 718 | "(min. space %d)", lp.lnum, lp.free, lp.dirty, |
| 683 | lp.free + lp.dirty, min_space); | 719 | lp.free + lp.dirty, min_space); |
| 684 | 720 | ||
| 685 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 686 | /* An empty LEB was returned */ | ||
| 687 | dbg_gc("LEB %d is free, return it", lp.lnum); | ||
| 688 | /* | ||
| 689 | * ubifs_find_dirty_leb() doesn't return freeable index | ||
| 690 | * LEBs. | ||
| 691 | */ | ||
| 692 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 693 | if (lp.free != c->leb_size) { | ||
| 694 | /* | ||
| 695 | * Write buffers must be sync'd before | ||
| 696 | * unmapping freeable LEBs, because one of them | ||
| 697 | * may contain data which obsoletes something | ||
| 698 | * in 'lp.pnum'. | ||
| 699 | */ | ||
| 700 | ret = gc_sync_wbufs(c); | ||
| 701 | if (ret) | ||
| 702 | goto out; | ||
| 703 | ret = ubifs_change_one_lp(c, lp.lnum, | ||
| 704 | c->leb_size, 0, 0, 0, | ||
| 705 | 0); | ||
| 706 | if (ret) | ||
| 707 | goto out; | ||
| 708 | } | ||
| 709 | ret = ubifs_leb_unmap(c, lp.lnum); | ||
| 710 | if (ret) | ||
| 711 | goto out; | ||
| 712 | ret = lp.lnum; | ||
| 713 | break; | ||
| 714 | } | ||
| 715 | |||
| 716 | space_before = c->leb_size - wbuf->offs - wbuf->used; | 721 | space_before = c->leb_size - wbuf->offs - wbuf->used; |
| 717 | if (wbuf->lnum == -1) | 722 | if (wbuf->lnum == -1) |
| 718 | space_before = 0; | 723 | space_before = 0; |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index dfd168b7807e..166951e0dcd3 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
| @@ -393,7 +393,7 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
| 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); | 393 | ubifs_assert(wbuf->size % c->min_io_size == 0); |
| 394 | ubifs_assert(!c->ro_media && !c->ro_mount); | 394 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 395 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 395 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | 396 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
| 397 | 397 | ||
| 398 | if (c->ro_error) | 398 | if (c->ro_error) |
| 399 | return -EROFS; | 399 | return -EROFS; |
| @@ -452,8 +452,8 @@ int ubifs_wbuf_sync_nolock(struct ubifs_wbuf *wbuf) | |||
| 452 | * @dtype: data type | 452 | * @dtype: data type |
| 453 | * | 453 | * |
| 454 | * This function targets the write-buffer to logical eraseblock @lnum:@offs. | 454 | * This function targets the write-buffer to logical eraseblock @lnum:@offs. |
| 455 | * The write-buffer is synchronized if it is not empty. Returns zero in case of | 455 | * The write-buffer has to be empty. Returns zero in case of success and a |
| 456 | * success and a negative error code in case of failure. | 456 | * negative error code in case of failure. |
| 457 | */ | 457 | */ |
| 458 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | 458 | int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, |
| 459 | int dtype) | 459 | int dtype) |
| @@ -465,13 +465,7 @@ int ubifs_wbuf_seek_nolock(struct ubifs_wbuf *wbuf, int lnum, int offs, | |||
| 465 | ubifs_assert(offs >= 0 && offs <= c->leb_size); | 465 | ubifs_assert(offs >= 0 && offs <= c->leb_size); |
| 466 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); | 466 | ubifs_assert(offs % c->min_io_size == 0 && !(offs & 7)); |
| 467 | ubifs_assert(lnum != wbuf->lnum); | 467 | ubifs_assert(lnum != wbuf->lnum); |
| 468 | 468 | ubifs_assert(wbuf->used == 0); | |
| 469 | if (wbuf->used > 0) { | ||
| 470 | int err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 471 | |||
| 472 | if (err) | ||
| 473 | return err; | ||
| 474 | } | ||
| 475 | 469 | ||
| 476 | spin_lock(&wbuf->lock); | 470 | spin_lock(&wbuf->lock); |
| 477 | wbuf->lnum = lnum; | 471 | wbuf->lnum = lnum; |
| @@ -573,7 +567,7 @@ out_timers: | |||
| 573 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | 567 | int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) |
| 574 | { | 568 | { |
| 575 | struct ubifs_info *c = wbuf->c; | 569 | struct ubifs_info *c = wbuf->c; |
| 576 | int err, written, n, aligned_len = ALIGN(len, 8), offs; | 570 | int err, written, n, aligned_len = ALIGN(len, 8); |
| 577 | 571 | ||
| 578 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, | 572 | dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, |
| 579 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), | 573 | dbg_ntype(((struct ubifs_ch *)buf)->node_type), |
| @@ -588,7 +582,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 588 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); | 582 | ubifs_assert(mutex_is_locked(&wbuf->io_mutex)); |
| 589 | ubifs_assert(!c->ro_media && !c->ro_mount); | 583 | ubifs_assert(!c->ro_media && !c->ro_mount); |
| 590 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 584 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 591 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size )); | 585 | ubifs_assert(!((wbuf->offs + wbuf->size) % c->max_write_size)); |
| 592 | 586 | ||
| 593 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { | 587 | if (c->leb_size - wbuf->offs - wbuf->used < aligned_len) { |
| 594 | err = -ENOSPC; | 588 | err = -ENOSPC; |
| @@ -636,7 +630,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 636 | goto exit; | 630 | goto exit; |
| 637 | } | 631 | } |
| 638 | 632 | ||
| 639 | offs = wbuf->offs; | ||
| 640 | written = 0; | 633 | written = 0; |
| 641 | 634 | ||
| 642 | if (wbuf->used) { | 635 | if (wbuf->used) { |
| @@ -653,7 +646,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 653 | if (err) | 646 | if (err) |
| 654 | goto out; | 647 | goto out; |
| 655 | 648 | ||
| 656 | offs += wbuf->size; | 649 | wbuf->offs += wbuf->size; |
| 657 | len -= wbuf->avail; | 650 | len -= wbuf->avail; |
| 658 | aligned_len -= wbuf->avail; | 651 | aligned_len -= wbuf->avail; |
| 659 | written += wbuf->avail; | 652 | written += wbuf->avail; |
| @@ -672,7 +665,7 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 672 | if (err) | 665 | if (err) |
| 673 | goto out; | 666 | goto out; |
| 674 | 667 | ||
| 675 | offs += wbuf->size; | 668 | wbuf->offs += wbuf->size; |
| 676 | len -= wbuf->size; | 669 | len -= wbuf->size; |
| 677 | aligned_len -= wbuf->size; | 670 | aligned_len -= wbuf->size; |
| 678 | written += wbuf->size; | 671 | written += wbuf->size; |
| @@ -687,12 +680,13 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 687 | n = aligned_len >> c->max_write_shift; | 680 | n = aligned_len >> c->max_write_shift; |
| 688 | if (n) { | 681 | if (n) { |
| 689 | n <<= c->max_write_shift; | 682 | n <<= c->max_write_shift; |
| 690 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, offs); | 683 | dbg_io("write %d bytes to LEB %d:%d", n, wbuf->lnum, |
| 691 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, offs, n, | 684 | wbuf->offs); |
| 692 | wbuf->dtype); | 685 | err = ubi_leb_write(c->ubi, wbuf->lnum, buf + written, |
| 686 | wbuf->offs, n, wbuf->dtype); | ||
| 693 | if (err) | 687 | if (err) |
| 694 | goto out; | 688 | goto out; |
| 695 | offs += n; | 689 | wbuf->offs += n; |
| 696 | aligned_len -= n; | 690 | aligned_len -= n; |
| 697 | len -= n; | 691 | len -= n; |
| 698 | written += n; | 692 | written += n; |
| @@ -707,7 +701,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) | |||
| 707 | */ | 701 | */ |
| 708 | memcpy(wbuf->buf, buf + written, len); | 702 | memcpy(wbuf->buf, buf + written, len); |
| 709 | 703 | ||
| 710 | wbuf->offs = offs; | ||
| 711 | if (c->leb_size - wbuf->offs >= c->max_write_size) | 704 | if (c->leb_size - wbuf->offs >= c->max_write_size) |
| 712 | wbuf->size = c->max_write_size; | 705 | wbuf->size = c->max_write_size; |
| 713 | else | 706 | else |
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index aed25e864227..34b1679e6e3a 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c | |||
| @@ -141,14 +141,8 @@ again: | |||
| 141 | * LEB with some empty space. | 141 | * LEB with some empty space. |
| 142 | */ | 142 | */ |
| 143 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); | 143 | lnum = ubifs_find_free_space(c, len, &offs, squeeze); |
| 144 | if (lnum >= 0) { | 144 | if (lnum >= 0) |
| 145 | /* Found an LEB, add it to the journal head */ | ||
| 146 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | ||
| 147 | if (err) | ||
| 148 | goto out_return; | ||
| 149 | /* A new bud was successfully allocated and added to the log */ | ||
| 150 | goto out; | 145 | goto out; |
| 151 | } | ||
| 152 | 146 | ||
| 153 | err = lnum; | 147 | err = lnum; |
| 154 | if (err != -ENOSPC) | 148 | if (err != -ENOSPC) |
| @@ -203,12 +197,23 @@ again: | |||
| 203 | return 0; | 197 | return 0; |
| 204 | } | 198 | } |
| 205 | 199 | ||
| 206 | err = ubifs_add_bud_to_log(c, jhead, lnum, 0); | ||
| 207 | if (err) | ||
| 208 | goto out_return; | ||
| 209 | offs = 0; | 200 | offs = 0; |
| 210 | 201 | ||
| 211 | out: | 202 | out: |
| 203 | /* | ||
| 204 | * Make sure we synchronize the write-buffer before we add the new bud | ||
| 205 | * to the log. Otherwise we may have a power cut after the log | ||
| 206 | * reference node for the last bud (@lnum) is written but before the | ||
| 207 | * write-buffer data are written to the next-to-last bud | ||
| 208 | * (@wbuf->lnum). And the effect would be that the recovery would see | ||
| 209 | * that there is corruption in the next-to-last bud. | ||
| 210 | */ | ||
| 211 | err = ubifs_wbuf_sync_nolock(wbuf); | ||
| 212 | if (err) | ||
| 213 | goto out_return; | ||
| 214 | err = ubifs_add_bud_to_log(c, jhead, lnum, offs); | ||
| 215 | if (err) | ||
| 216 | goto out_return; | ||
| 212 | err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); | 217 | err = ubifs_wbuf_seek_nolock(wbuf, lnum, offs, wbuf->dtype); |
| 213 | if (err) | 218 | if (err) |
| 214 | goto out_unlock; | 219 | goto out_unlock; |
| @@ -380,10 +385,8 @@ out: | |||
| 380 | if (err == -ENOSPC) { | 385 | if (err == -ENOSPC) { |
| 381 | /* This are some budgeting problems, print useful information */ | 386 | /* This are some budgeting problems, print useful information */ |
| 382 | down_write(&c->commit_sem); | 387 | down_write(&c->commit_sem); |
| 383 | spin_lock(&c->space_lock); | ||
| 384 | dbg_dump_stack(); | 388 | dbg_dump_stack(); |
| 385 | dbg_dump_budg(c); | 389 | dbg_dump_budg(c, &c->bi); |
| 386 | spin_unlock(&c->space_lock); | ||
| 387 | dbg_dump_lprops(c); | 390 | dbg_dump_lprops(c); |
| 388 | cmt_retries = dbg_check_lprops(c); | 391 | cmt_retries = dbg_check_lprops(c); |
| 389 | up_write(&c->commit_sem); | 392 | up_write(&c->commit_sem); |
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c index 40fa780ebea7..affea9494ae2 100644 --- a/fs/ubifs/log.c +++ b/fs/ubifs/log.c | |||
| @@ -100,20 +100,6 @@ struct ubifs_wbuf *ubifs_get_wbuf(struct ubifs_info *c, int lnum) | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | /** | 102 | /** |
| 103 | * next_log_lnum - switch to the next log LEB. | ||
| 104 | * @c: UBIFS file-system description object | ||
| 105 | * @lnum: current log LEB | ||
| 106 | */ | ||
| 107 | static inline int next_log_lnum(const struct ubifs_info *c, int lnum) | ||
| 108 | { | ||
| 109 | lnum += 1; | ||
| 110 | if (lnum > c->log_last) | ||
| 111 | lnum = UBIFS_LOG_LNUM; | ||
| 112 | |||
| 113 | return lnum; | ||
| 114 | } | ||
| 115 | |||
| 116 | /** | ||
| 117 | * empty_log_bytes - calculate amount of empty space in the log. | 103 | * empty_log_bytes - calculate amount of empty space in the log. |
| 118 | * @c: UBIFS file-system description object | 104 | * @c: UBIFS file-system description object |
| 119 | */ | 105 | */ |
| @@ -257,7 +243,7 @@ int ubifs_add_bud_to_log(struct ubifs_info *c, int jhead, int lnum, int offs) | |||
| 257 | ref->jhead = cpu_to_le32(jhead); | 243 | ref->jhead = cpu_to_le32(jhead); |
| 258 | 244 | ||
| 259 | if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { | 245 | if (c->lhead_offs > c->leb_size - c->ref_node_alsz) { |
| 260 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 246 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 261 | c->lhead_offs = 0; | 247 | c->lhead_offs = 0; |
| 262 | } | 248 | } |
| 263 | 249 | ||
| @@ -425,7 +411,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
| 425 | 411 | ||
| 426 | /* Switch to the next log LEB */ | 412 | /* Switch to the next log LEB */ |
| 427 | if (c->lhead_offs) { | 413 | if (c->lhead_offs) { |
| 428 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 414 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 429 | c->lhead_offs = 0; | 415 | c->lhead_offs = 0; |
| 430 | } | 416 | } |
| 431 | 417 | ||
| @@ -446,7 +432,7 @@ int ubifs_log_start_commit(struct ubifs_info *c, int *ltail_lnum) | |||
| 446 | 432 | ||
| 447 | c->lhead_offs += len; | 433 | c->lhead_offs += len; |
| 448 | if (c->lhead_offs == c->leb_size) { | 434 | if (c->lhead_offs == c->leb_size) { |
| 449 | c->lhead_lnum = next_log_lnum(c, c->lhead_lnum); | 435 | c->lhead_lnum = ubifs_next_log_lnum(c, c->lhead_lnum); |
| 450 | c->lhead_offs = 0; | 436 | c->lhead_offs = 0; |
| 451 | } | 437 | } |
| 452 | 438 | ||
| @@ -533,7 +519,7 @@ int ubifs_log_post_commit(struct ubifs_info *c, int old_ltail_lnum) | |||
| 533 | } | 519 | } |
| 534 | mutex_lock(&c->log_mutex); | 520 | mutex_lock(&c->log_mutex); |
| 535 | for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; | 521 | for (lnum = old_ltail_lnum; lnum != c->ltail_lnum; |
| 536 | lnum = next_log_lnum(c, lnum)) { | 522 | lnum = ubifs_next_log_lnum(c, lnum)) { |
| 537 | dbg_log("unmap log LEB %d", lnum); | 523 | dbg_log("unmap log LEB %d", lnum); |
| 538 | err = ubifs_leb_unmap(c, lnum); | 524 | err = ubifs_leb_unmap(c, lnum); |
| 539 | if (err) | 525 | if (err) |
| @@ -642,7 +628,7 @@ static int add_node(struct ubifs_info *c, void *buf, int *lnum, int *offs, | |||
| 642 | err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); | 628 | err = ubifs_leb_change(c, *lnum, buf, sz, UBI_SHORTTERM); |
| 643 | if (err) | 629 | if (err) |
| 644 | return err; | 630 | return err; |
| 645 | *lnum = next_log_lnum(c, *lnum); | 631 | *lnum = ubifs_next_log_lnum(c, *lnum); |
| 646 | *offs = 0; | 632 | *offs = 0; |
| 647 | } | 633 | } |
| 648 | memcpy(buf + *offs, node, len); | 634 | memcpy(buf + *offs, node, len); |
| @@ -712,7 +698,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) | |||
| 712 | ubifs_scan_destroy(sleb); | 698 | ubifs_scan_destroy(sleb); |
| 713 | if (lnum == c->lhead_lnum) | 699 | if (lnum == c->lhead_lnum) |
| 714 | break; | 700 | break; |
| 715 | lnum = next_log_lnum(c, lnum); | 701 | lnum = ubifs_next_log_lnum(c, lnum); |
| 716 | } | 702 | } |
| 717 | if (offs) { | 703 | if (offs) { |
| 718 | int sz = ALIGN(offs, c->min_io_size); | 704 | int sz = ALIGN(offs, c->min_io_size); |
| @@ -732,7 +718,7 @@ int ubifs_consolidate_log(struct ubifs_info *c) | |||
| 732 | /* Unmap remaining LEBs */ | 718 | /* Unmap remaining LEBs */ |
| 733 | lnum = write_lnum; | 719 | lnum = write_lnum; |
| 734 | do { | 720 | do { |
| 735 | lnum = next_log_lnum(c, lnum); | 721 | lnum = ubifs_next_log_lnum(c, lnum); |
| 736 | err = ubifs_leb_unmap(c, lnum); | 722 | err = ubifs_leb_unmap(c, lnum); |
| 737 | if (err) | 723 | if (err) |
| 738 | return err; | 724 | return err; |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 0ee0847f2421..667884f4a615 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
| @@ -1007,21 +1007,11 @@ out: | |||
| 1007 | } | 1007 | } |
| 1008 | 1008 | ||
| 1009 | /** | 1009 | /** |
| 1010 | * struct scan_check_data - data provided to scan callback function. | ||
| 1011 | * @lst: LEB properties statistics | ||
| 1012 | * @err: error code | ||
| 1013 | */ | ||
| 1014 | struct scan_check_data { | ||
| 1015 | struct ubifs_lp_stats lst; | ||
| 1016 | int err; | ||
| 1017 | }; | ||
| 1018 | |||
| 1019 | /** | ||
| 1020 | * scan_check_cb - scan callback. | 1010 | * scan_check_cb - scan callback. |
| 1021 | * @c: the UBIFS file-system description object | 1011 | * @c: the UBIFS file-system description object |
| 1022 | * @lp: LEB properties to scan | 1012 | * @lp: LEB properties to scan |
| 1023 | * @in_tree: whether the LEB properties are in main memory | 1013 | * @in_tree: whether the LEB properties are in main memory |
| 1024 | * @data: information passed to and from the caller of the scan | 1014 | * @lst: lprops statistics to update |
| 1025 | * | 1015 | * |
| 1026 | * This function returns a code that indicates whether the scan should continue | 1016 | * This function returns a code that indicates whether the scan should continue |
| 1027 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree | 1017 | * (%LPT_SCAN_CONTINUE), whether the LEB properties should be added to the tree |
| @@ -1030,11 +1020,10 @@ struct scan_check_data { | |||
| 1030 | */ | 1020 | */ |
| 1031 | static int scan_check_cb(struct ubifs_info *c, | 1021 | static int scan_check_cb(struct ubifs_info *c, |
| 1032 | const struct ubifs_lprops *lp, int in_tree, | 1022 | const struct ubifs_lprops *lp, int in_tree, |
| 1033 | struct scan_check_data *data) | 1023 | struct ubifs_lp_stats *lst) |
| 1034 | { | 1024 | { |
| 1035 | struct ubifs_scan_leb *sleb; | 1025 | struct ubifs_scan_leb *sleb; |
| 1036 | struct ubifs_scan_node *snod; | 1026 | struct ubifs_scan_node *snod; |
| 1037 | struct ubifs_lp_stats *lst = &data->lst; | ||
| 1038 | int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; | 1027 | int cat, lnum = lp->lnum, is_idx = 0, used = 0, free, dirty, ret; |
| 1039 | void *buf = NULL; | 1028 | void *buf = NULL; |
| 1040 | 1029 | ||
| @@ -1044,7 +1033,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1044 | if (cat != (lp->flags & LPROPS_CAT_MASK)) { | 1033 | if (cat != (lp->flags & LPROPS_CAT_MASK)) { |
| 1045 | ubifs_err("bad LEB category %d expected %d", | 1034 | ubifs_err("bad LEB category %d expected %d", |
| 1046 | (lp->flags & LPROPS_CAT_MASK), cat); | 1035 | (lp->flags & LPROPS_CAT_MASK), cat); |
| 1047 | goto out; | 1036 | return -EINVAL; |
| 1048 | } | 1037 | } |
| 1049 | } | 1038 | } |
| 1050 | 1039 | ||
| @@ -1078,7 +1067,7 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1078 | } | 1067 | } |
| 1079 | if (!found) { | 1068 | if (!found) { |
| 1080 | ubifs_err("bad LPT list (category %d)", cat); | 1069 | ubifs_err("bad LPT list (category %d)", cat); |
| 1081 | goto out; | 1070 | return -EINVAL; |
| 1082 | } | 1071 | } |
| 1083 | } | 1072 | } |
| 1084 | } | 1073 | } |
| @@ -1090,45 +1079,40 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1090 | if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || | 1079 | if ((lp->hpos != -1 && heap->arr[lp->hpos]->lnum != lnum) || |
| 1091 | lp != heap->arr[lp->hpos]) { | 1080 | lp != heap->arr[lp->hpos]) { |
| 1092 | ubifs_err("bad LPT heap (category %d)", cat); | 1081 | ubifs_err("bad LPT heap (category %d)", cat); |
| 1093 | goto out; | 1082 | return -EINVAL; |
| 1094 | } | 1083 | } |
| 1095 | } | 1084 | } |
| 1096 | 1085 | ||
| 1097 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); | 1086 | buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); |
| 1098 | if (!buf) { | 1087 | if (!buf) |
| 1099 | ubifs_err("cannot allocate memory to scan LEB %d", lnum); | 1088 | return -ENOMEM; |
| 1100 | goto out; | 1089 | |
| 1090 | /* | ||
| 1091 | * After an unclean unmount, empty and freeable LEBs | ||
| 1092 | * may contain garbage - do not scan them. | ||
| 1093 | */ | ||
| 1094 | if (lp->free == c->leb_size) { | ||
| 1095 | lst->empty_lebs += 1; | ||
| 1096 | lst->total_free += c->leb_size; | ||
| 1097 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1098 | return LPT_SCAN_CONTINUE; | ||
| 1099 | } | ||
| 1100 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1101 | !(lp->flags & LPROPS_INDEX)) { | ||
| 1102 | lst->total_free += lp->free; | ||
| 1103 | lst->total_dirty += lp->dirty; | ||
| 1104 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1105 | return LPT_SCAN_CONTINUE; | ||
| 1101 | } | 1106 | } |
| 1102 | 1107 | ||
| 1103 | sleb = ubifs_scan(c, lnum, 0, buf, 0); | 1108 | sleb = ubifs_scan(c, lnum, 0, buf, 0); |
| 1104 | if (IS_ERR(sleb)) { | 1109 | if (IS_ERR(sleb)) { |
| 1105 | /* | 1110 | ret = PTR_ERR(sleb); |
| 1106 | * After an unclean unmount, empty and freeable LEBs | 1111 | if (ret == -EUCLEAN) { |
| 1107 | * may contain garbage. | 1112 | dbg_dump_lprops(c); |
| 1108 | */ | 1113 | dbg_dump_budg(c, &c->bi); |
| 1109 | if (lp->free == c->leb_size) { | ||
| 1110 | ubifs_err("scan errors were in empty LEB " | ||
| 1111 | "- continuing checking"); | ||
| 1112 | lst->empty_lebs += 1; | ||
| 1113 | lst->total_free += c->leb_size; | ||
| 1114 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1115 | ret = LPT_SCAN_CONTINUE; | ||
| 1116 | goto exit; | ||
| 1117 | } | ||
| 1118 | |||
| 1119 | if (lp->free + lp->dirty == c->leb_size && | ||
| 1120 | !(lp->flags & LPROPS_INDEX)) { | ||
| 1121 | ubifs_err("scan errors were in freeable LEB " | ||
| 1122 | "- continuing checking"); | ||
| 1123 | lst->total_free += lp->free; | ||
| 1124 | lst->total_dirty += lp->dirty; | ||
| 1125 | lst->total_dark += ubifs_calc_dark(c, c->leb_size); | ||
| 1126 | ret = LPT_SCAN_CONTINUE; | ||
| 1127 | goto exit; | ||
| 1128 | } | 1114 | } |
| 1129 | data->err = PTR_ERR(sleb); | 1115 | goto out; |
| 1130 | ret = LPT_SCAN_STOP; | ||
| 1131 | goto exit; | ||
| 1132 | } | 1116 | } |
| 1133 | 1117 | ||
| 1134 | is_idx = -1; | 1118 | is_idx = -1; |
| @@ -1246,10 +1230,8 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1246 | } | 1230 | } |
| 1247 | 1231 | ||
| 1248 | ubifs_scan_destroy(sleb); | 1232 | ubifs_scan_destroy(sleb); |
| 1249 | ret = LPT_SCAN_CONTINUE; | ||
| 1250 | exit: | ||
| 1251 | vfree(buf); | 1233 | vfree(buf); |
| 1252 | return ret; | 1234 | return LPT_SCAN_CONTINUE; |
| 1253 | 1235 | ||
| 1254 | out_print: | 1236 | out_print: |
| 1255 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " | 1237 | ubifs_err("bad accounting of LEB %d: free %d, dirty %d flags %#x, " |
| @@ -1258,10 +1240,10 @@ out_print: | |||
| 1258 | dbg_dump_leb(c, lnum); | 1240 | dbg_dump_leb(c, lnum); |
| 1259 | out_destroy: | 1241 | out_destroy: |
| 1260 | ubifs_scan_destroy(sleb); | 1242 | ubifs_scan_destroy(sleb); |
| 1243 | ret = -EINVAL; | ||
| 1261 | out: | 1244 | out: |
| 1262 | vfree(buf); | 1245 | vfree(buf); |
| 1263 | data->err = -EINVAL; | 1246 | return ret; |
| 1264 | return LPT_SCAN_STOP; | ||
| 1265 | } | 1247 | } |
| 1266 | 1248 | ||
| 1267 | /** | 1249 | /** |
| @@ -1278,8 +1260,7 @@ out: | |||
| 1278 | int dbg_check_lprops(struct ubifs_info *c) | 1260 | int dbg_check_lprops(struct ubifs_info *c) |
| 1279 | { | 1261 | { |
| 1280 | int i, err; | 1262 | int i, err; |
| 1281 | struct scan_check_data data; | 1263 | struct ubifs_lp_stats lst; |
| 1282 | struct ubifs_lp_stats *lst = &data.lst; | ||
| 1283 | 1264 | ||
| 1284 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) | 1265 | if (!(ubifs_chk_flags & UBIFS_CHK_LPROPS)) |
| 1285 | return 0; | 1266 | return 0; |
| @@ -1294,29 +1275,23 @@ int dbg_check_lprops(struct ubifs_info *c) | |||
| 1294 | return err; | 1275 | return err; |
| 1295 | } | 1276 | } |
| 1296 | 1277 | ||
| 1297 | memset(lst, 0, sizeof(struct ubifs_lp_stats)); | 1278 | memset(&lst, 0, sizeof(struct ubifs_lp_stats)); |
| 1298 | |||
| 1299 | data.err = 0; | ||
| 1300 | err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, | 1279 | err = ubifs_lpt_scan_nolock(c, c->main_first, c->leb_cnt - 1, |
| 1301 | (ubifs_lpt_scan_callback)scan_check_cb, | 1280 | (ubifs_lpt_scan_callback)scan_check_cb, |
| 1302 | &data); | 1281 | &lst); |
| 1303 | if (err && err != -ENOSPC) | 1282 | if (err && err != -ENOSPC) |
| 1304 | goto out; | 1283 | goto out; |
| 1305 | if (data.err) { | ||
| 1306 | err = data.err; | ||
| 1307 | goto out; | ||
| 1308 | } | ||
| 1309 | 1284 | ||
| 1310 | if (lst->empty_lebs != c->lst.empty_lebs || | 1285 | if (lst.empty_lebs != c->lst.empty_lebs || |
| 1311 | lst->idx_lebs != c->lst.idx_lebs || | 1286 | lst.idx_lebs != c->lst.idx_lebs || |
| 1312 | lst->total_free != c->lst.total_free || | 1287 | lst.total_free != c->lst.total_free || |
| 1313 | lst->total_dirty != c->lst.total_dirty || | 1288 | lst.total_dirty != c->lst.total_dirty || |
| 1314 | lst->total_used != c->lst.total_used) { | 1289 | lst.total_used != c->lst.total_used) { |
| 1315 | ubifs_err("bad overall accounting"); | 1290 | ubifs_err("bad overall accounting"); |
| 1316 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " | 1291 | ubifs_err("calculated: empty_lebs %d, idx_lebs %d, " |
| 1317 | "total_free %lld, total_dirty %lld, total_used %lld", | 1292 | "total_free %lld, total_dirty %lld, total_used %lld", |
| 1318 | lst->empty_lebs, lst->idx_lebs, lst->total_free, | 1293 | lst.empty_lebs, lst.idx_lebs, lst.total_free, |
| 1319 | lst->total_dirty, lst->total_used); | 1294 | lst.total_dirty, lst.total_used); |
| 1320 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " | 1295 | ubifs_err("read from lprops: empty_lebs %d, idx_lebs %d, " |
| 1321 | "total_free %lld, total_dirty %lld, total_used %lld", | 1296 | "total_free %lld, total_dirty %lld, total_used %lld", |
| 1322 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, | 1297 | c->lst.empty_lebs, c->lst.idx_lebs, c->lst.total_free, |
| @@ -1325,11 +1300,11 @@ int dbg_check_lprops(struct ubifs_info *c) | |||
| 1325 | goto out; | 1300 | goto out; |
| 1326 | } | 1301 | } |
| 1327 | 1302 | ||
| 1328 | if (lst->total_dead != c->lst.total_dead || | 1303 | if (lst.total_dead != c->lst.total_dead || |
| 1329 | lst->total_dark != c->lst.total_dark) { | 1304 | lst.total_dark != c->lst.total_dark) { |
| 1330 | ubifs_err("bad dead/dark space accounting"); | 1305 | ubifs_err("bad dead/dark space accounting"); |
| 1331 | ubifs_err("calculated: total_dead %lld, total_dark %lld", | 1306 | ubifs_err("calculated: total_dead %lld, total_dark %lld", |
| 1332 | lst->total_dead, lst->total_dark); | 1307 | lst.total_dead, lst.total_dark); |
| 1333 | ubifs_err("read from lprops: total_dead %lld, total_dark %lld", | 1308 | ubifs_err("read from lprops: total_dead %lld, total_dark %lld", |
| 1334 | c->lst.total_dead, c->lst.total_dark); | 1309 | c->lst.total_dead, c->lst.total_dark); |
| 1335 | err = -EINVAL; | 1310 | err = -EINVAL; |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 0c9c69bd983a..dfcb5748a7dc 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
| @@ -29,6 +29,12 @@ | |||
| 29 | #include <linux/slab.h> | 29 | #include <linux/slab.h> |
| 30 | #include "ubifs.h" | 30 | #include "ubifs.h" |
| 31 | 31 | ||
| 32 | #ifdef CONFIG_UBIFS_FS_DEBUG | ||
| 33 | static int dbg_populate_lsave(struct ubifs_info *c); | ||
| 34 | #else | ||
| 35 | #define dbg_populate_lsave(c) 0 | ||
| 36 | #endif | ||
| 37 | |||
| 32 | /** | 38 | /** |
| 33 | * first_dirty_cnode - find first dirty cnode. | 39 | * first_dirty_cnode - find first dirty cnode. |
| 34 | * @c: UBIFS file-system description object | 40 | * @c: UBIFS file-system description object |
| @@ -586,7 +592,7 @@ static struct ubifs_pnode *next_pnode_to_dirty(struct ubifs_info *c, | |||
| 586 | if (nnode->nbranch[iip].lnum) | 592 | if (nnode->nbranch[iip].lnum) |
| 587 | break; | 593 | break; |
| 588 | } | 594 | } |
| 589 | } while (iip >= UBIFS_LPT_FANOUT); | 595 | } while (iip >= UBIFS_LPT_FANOUT); |
| 590 | 596 | ||
| 591 | /* Go right */ | 597 | /* Go right */ |
| 592 | nnode = ubifs_get_nnode(c, nnode, iip); | 598 | nnode = ubifs_get_nnode(c, nnode, iip); |
| @@ -815,6 +821,10 @@ static void populate_lsave(struct ubifs_info *c) | |||
| 815 | c->lpt_drty_flgs |= LSAVE_DIRTY; | 821 | c->lpt_drty_flgs |= LSAVE_DIRTY; |
| 816 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); | 822 | ubifs_add_lpt_dirt(c, c->lsave_lnum, c->lsave_sz); |
| 817 | } | 823 | } |
| 824 | |||
| 825 | if (dbg_populate_lsave(c)) | ||
| 826 | return; | ||
| 827 | |||
| 818 | list_for_each_entry(lprops, &c->empty_list, list) { | 828 | list_for_each_entry(lprops, &c->empty_list, list) { |
| 819 | c->lsave[cnt++] = lprops->lnum; | 829 | c->lsave[cnt++] = lprops->lnum; |
| 820 | if (cnt >= c->lsave_cnt) | 830 | if (cnt >= c->lsave_cnt) |
| @@ -1994,4 +2004,47 @@ void dbg_dump_lpt_lebs(const struct ubifs_info *c) | |||
| 1994 | current->pid); | 2004 | current->pid); |
| 1995 | } | 2005 | } |
| 1996 | 2006 | ||
| 2007 | /** | ||
| 2008 | * dbg_populate_lsave - debugging version of 'populate_lsave()' | ||
| 2009 | * @c: UBIFS file-system description object | ||
| 2010 | * | ||
| 2011 | * This is a debugging version for 'populate_lsave()' which populates lsave | ||
| 2012 | * with random LEBs instead of useful LEBs, which is good for test coverage. | ||
| 2013 | * Returns zero if lsave has not been populated (this debugging feature is | ||
| 2014 | * disabled) an non-zero if lsave has been populated. | ||
| 2015 | */ | ||
| 2016 | static int dbg_populate_lsave(struct ubifs_info *c) | ||
| 2017 | { | ||
| 2018 | struct ubifs_lprops *lprops; | ||
| 2019 | struct ubifs_lpt_heap *heap; | ||
| 2020 | int i; | ||
| 2021 | |||
| 2022 | if (!(ubifs_chk_flags & UBIFS_CHK_GEN)) | ||
| 2023 | return 0; | ||
| 2024 | if (random32() & 3) | ||
| 2025 | return 0; | ||
| 2026 | |||
| 2027 | for (i = 0; i < c->lsave_cnt; i++) | ||
| 2028 | c->lsave[i] = c->main_first; | ||
| 2029 | |||
| 2030 | list_for_each_entry(lprops, &c->empty_list, list) | ||
| 2031 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2032 | list_for_each_entry(lprops, &c->freeable_list, list) | ||
| 2033 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2034 | list_for_each_entry(lprops, &c->frdi_idx_list, list) | ||
| 2035 | c->lsave[random32() % c->lsave_cnt] = lprops->lnum; | ||
| 2036 | |||
| 2037 | heap = &c->lpt_heap[LPROPS_DIRTY_IDX - 1]; | ||
| 2038 | for (i = 0; i < heap->cnt; i++) | ||
| 2039 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2040 | heap = &c->lpt_heap[LPROPS_DIRTY - 1]; | ||
| 2041 | for (i = 0; i < heap->cnt; i++) | ||
| 2042 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2043 | heap = &c->lpt_heap[LPROPS_FREE - 1]; | ||
| 2044 | for (i = 0; i < heap->cnt; i++) | ||
| 2045 | c->lsave[random32() % c->lsave_cnt] = heap->arr[i]->lnum; | ||
| 2046 | |||
| 2047 | return 1; | ||
| 2048 | } | ||
| 2049 | |||
| 1997 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | 2050 | #endif /* CONFIG_UBIFS_FS_DEBUG */ |
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 21f47afdacff..278c2382e8c2 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c | |||
| @@ -148,7 +148,7 @@ static int validate_master(const struct ubifs_info *c) | |||
| 148 | } | 148 | } |
| 149 | 149 | ||
| 150 | main_sz = (long long)c->main_lebs * c->leb_size; | 150 | main_sz = (long long)c->main_lebs * c->leb_size; |
| 151 | if (c->old_idx_sz & 7 || c->old_idx_sz >= main_sz) { | 151 | if (c->bi.old_idx_sz & 7 || c->bi.old_idx_sz >= main_sz) { |
| 152 | err = 9; | 152 | err = 9; |
| 153 | goto out; | 153 | goto out; |
| 154 | } | 154 | } |
| @@ -218,7 +218,7 @@ static int validate_master(const struct ubifs_info *c) | |||
| 218 | } | 218 | } |
| 219 | 219 | ||
| 220 | if (c->lst.total_dead + c->lst.total_dark + | 220 | if (c->lst.total_dead + c->lst.total_dark + |
| 221 | c->lst.total_used + c->old_idx_sz > main_sz) { | 221 | c->lst.total_used + c->bi.old_idx_sz > main_sz) { |
| 222 | err = 21; | 222 | err = 21; |
| 223 | goto out; | 223 | goto out; |
| 224 | } | 224 | } |
| @@ -286,7 +286,7 @@ int ubifs_read_master(struct ubifs_info *c) | |||
| 286 | c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); | 286 | c->gc_lnum = le32_to_cpu(c->mst_node->gc_lnum); |
| 287 | c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); | 287 | c->ihead_lnum = le32_to_cpu(c->mst_node->ihead_lnum); |
| 288 | c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); | 288 | c->ihead_offs = le32_to_cpu(c->mst_node->ihead_offs); |
| 289 | c->old_idx_sz = le64_to_cpu(c->mst_node->index_size); | 289 | c->bi.old_idx_sz = le64_to_cpu(c->mst_node->index_size); |
| 290 | c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); | 290 | c->lpt_lnum = le32_to_cpu(c->mst_node->lpt_lnum); |
| 291 | c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); | 291 | c->lpt_offs = le32_to_cpu(c->mst_node->lpt_offs); |
| 292 | c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); | 292 | c->nhead_lnum = le32_to_cpu(c->mst_node->nhead_lnum); |
| @@ -305,7 +305,7 @@ int ubifs_read_master(struct ubifs_info *c) | |||
| 305 | c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); | 305 | c->lst.total_dead = le64_to_cpu(c->mst_node->total_dead); |
| 306 | c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); | 306 | c->lst.total_dark = le64_to_cpu(c->mst_node->total_dark); |
| 307 | 307 | ||
| 308 | c->calc_idx_sz = c->old_idx_sz; | 308 | c->calc_idx_sz = c->bi.old_idx_sz; |
| 309 | 309 | ||
| 310 | if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) | 310 | if (c->mst_node->flags & cpu_to_le32(UBIFS_MST_NO_ORPHS)) |
| 311 | c->no_orphs = 1; | 311 | c->no_orphs = 1; |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index c3de04dc952a..0b5296a9a4c5 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
| @@ -340,4 +340,21 @@ static inline void ubifs_release_lprops(struct ubifs_info *c) | |||
| 340 | mutex_unlock(&c->lp_mutex); | 340 | mutex_unlock(&c->lp_mutex); |
| 341 | } | 341 | } |
| 342 | 342 | ||
| 343 | /** | ||
| 344 | * ubifs_next_log_lnum - switch to the next log LEB. | ||
| 345 | * @c: UBIFS file-system description object | ||
| 346 | * @lnum: current log LEB | ||
| 347 | * | ||
| 348 | * This helper function returns the log LEB number which goes next after LEB | ||
| 349 | * 'lnum'. | ||
| 350 | */ | ||
| 351 | static inline int ubifs_next_log_lnum(const struct ubifs_info *c, int lnum) | ||
| 352 | { | ||
| 353 | lnum += 1; | ||
| 354 | if (lnum > c->log_last) | ||
| 355 | lnum = UBIFS_LOG_LNUM; | ||
| 356 | |||
| 357 | return lnum; | ||
| 358 | } | ||
| 359 | |||
| 343 | #endif /* __UBIFS_MISC_H__ */ | 360 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 09df318e368f..bd644bf587a8 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c | |||
| @@ -673,7 +673,8 @@ static int kill_orphans(struct ubifs_info *c) | |||
| 673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); | 673 | sleb = ubifs_scan(c, lnum, 0, c->sbuf, 1); |
| 674 | if (IS_ERR(sleb)) { | 674 | if (IS_ERR(sleb)) { |
| 675 | if (PTR_ERR(sleb) == -EUCLEAN) | 675 | if (PTR_ERR(sleb) == -EUCLEAN) |
| 676 | sleb = ubifs_recover_leb(c, lnum, 0, c->sbuf, 0); | 676 | sleb = ubifs_recover_leb(c, lnum, 0, |
| 677 | c->sbuf, 0); | ||
| 677 | if (IS_ERR(sleb)) { | 678 | if (IS_ERR(sleb)) { |
| 678 | err = PTR_ERR(sleb); | 679 | err = PTR_ERR(sleb); |
| 679 | break; | 680 | break; |
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index 3dbad6fbd1eb..731d9e2e7b50 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c | |||
| @@ -564,13 +564,16 @@ static int fix_unclean_leb(struct ubifs_info *c, struct ubifs_scan_leb *sleb, | |||
| 564 | } | 564 | } |
| 565 | 565 | ||
| 566 | /** | 566 | /** |
| 567 | * drop_incomplete_group - drop nodes from an incomplete group. | 567 | * drop_last_node - drop the last node or group of nodes. |
| 568 | * @sleb: scanned LEB information | 568 | * @sleb: scanned LEB information |
| 569 | * @offs: offset of dropped nodes is returned here | 569 | * @offs: offset of dropped nodes is returned here |
| 570 | * @grouped: non-zero if whole group of nodes have to be dropped | ||
| 570 | * | 571 | * |
| 571 | * This function returns %1 if nodes are dropped and %0 otherwise. | 572 | * This is a helper function for 'ubifs_recover_leb()' which drops the last |
| 573 | * node of the scanned LEB or the last group of nodes if @grouped is not zero. | ||
| 574 | * This function returns %1 if a node was dropped and %0 otherwise. | ||
| 572 | */ | 575 | */ |
| 573 | static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | 576 | static int drop_last_node(struct ubifs_scan_leb *sleb, int *offs, int grouped) |
| 574 | { | 577 | { |
| 575 | int dropped = 0; | 578 | int dropped = 0; |
| 576 | 579 | ||
| @@ -589,6 +592,8 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
| 589 | kfree(snod); | 592 | kfree(snod); |
| 590 | sleb->nodes_cnt -= 1; | 593 | sleb->nodes_cnt -= 1; |
| 591 | dropped = 1; | 594 | dropped = 1; |
| 595 | if (!grouped) | ||
| 596 | break; | ||
| 592 | } | 597 | } |
| 593 | return dropped; | 598 | return dropped; |
| 594 | } | 599 | } |
| @@ -609,8 +614,7 @@ static int drop_incomplete_group(struct ubifs_scan_leb *sleb, int *offs) | |||
| 609 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | 614 | struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, |
| 610 | int offs, void *sbuf, int grouped) | 615 | int offs, void *sbuf, int grouped) |
| 611 | { | 616 | { |
| 612 | int err, len = c->leb_size - offs, need_clean = 0, quiet = 1; | 617 | int ret = 0, err, len = c->leb_size - offs, start = offs, min_io_unit; |
| 613 | int empty_chkd = 0, start = offs; | ||
| 614 | struct ubifs_scan_leb *sleb; | 618 | struct ubifs_scan_leb *sleb; |
| 615 | void *buf = sbuf + offs; | 619 | void *buf = sbuf + offs; |
| 616 | 620 | ||
| @@ -620,12 +624,8 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 620 | if (IS_ERR(sleb)) | 624 | if (IS_ERR(sleb)) |
| 621 | return sleb; | 625 | return sleb; |
| 622 | 626 | ||
| 623 | if (sleb->ecc) | 627 | ubifs_assert(len >= 8); |
| 624 | need_clean = 1; | ||
| 625 | |||
| 626 | while (len >= 8) { | 628 | while (len >= 8) { |
| 627 | int ret; | ||
| 628 | |||
| 629 | dbg_scan("look at LEB %d:%d (%d bytes left)", | 629 | dbg_scan("look at LEB %d:%d (%d bytes left)", |
| 630 | lnum, offs, len); | 630 | lnum, offs, len); |
| 631 | 631 | ||
| @@ -635,8 +635,7 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 635 | * Scan quietly until there is an error from which we cannot | 635 | * Scan quietly until there is an error from which we cannot |
| 636 | * recover | 636 | * recover |
| 637 | */ | 637 | */ |
| 638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, quiet); | 638 | ret = ubifs_scan_a_node(c, buf, len, lnum, offs, 0); |
| 639 | |||
| 640 | if (ret == SCANNED_A_NODE) { | 639 | if (ret == SCANNED_A_NODE) { |
| 641 | /* A valid node, and not a padding node */ | 640 | /* A valid node, and not a padding node */ |
| 642 | struct ubifs_ch *ch = buf; | 641 | struct ubifs_ch *ch = buf; |
| @@ -649,70 +648,32 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 649 | offs += node_len; | 648 | offs += node_len; |
| 650 | buf += node_len; | 649 | buf += node_len; |
| 651 | len -= node_len; | 650 | len -= node_len; |
| 652 | continue; | 651 | } else if (ret > 0) { |
| 653 | } | ||
| 654 | |||
| 655 | if (ret > 0) { | ||
| 656 | /* Padding bytes or a valid padding node */ | 652 | /* Padding bytes or a valid padding node */ |
| 657 | offs += ret; | 653 | offs += ret; |
| 658 | buf += ret; | 654 | buf += ret; |
| 659 | len -= ret; | 655 | len -= ret; |
| 660 | continue; | 656 | } else if (ret == SCANNED_EMPTY_SPACE || |
| 661 | } | 657 | ret == SCANNED_GARBAGE || |
| 662 | 658 | ret == SCANNED_A_BAD_PAD_NODE || | |
| 663 | if (ret == SCANNED_EMPTY_SPACE) { | 659 | ret == SCANNED_A_CORRUPT_NODE) { |
| 664 | if (!is_empty(buf, len)) { | 660 | dbg_rcvry("found corruption - %d", ret); |
| 665 | if (!is_last_write(c, buf, offs)) | ||
| 666 | break; | ||
| 667 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 668 | need_clean = 1; | ||
| 669 | } | ||
| 670 | empty_chkd = 1; | ||
| 671 | break; | 661 | break; |
| 672 | } | 662 | } else { |
| 673 | 663 | dbg_err("unexpected return value %d", ret); | |
| 674 | if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) | ||
| 675 | if (is_last_write(c, buf, offs)) { | ||
| 676 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 677 | need_clean = 1; | ||
| 678 | empty_chkd = 1; | ||
| 679 | break; | ||
| 680 | } | ||
| 681 | |||
| 682 | if (ret == SCANNED_A_CORRUPT_NODE) | ||
| 683 | if (no_more_nodes(c, buf, len, lnum, offs)) { | ||
| 684 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 685 | need_clean = 1; | ||
| 686 | empty_chkd = 1; | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | |||
| 690 | if (quiet) { | ||
| 691 | /* Redo the last scan but noisily */ | ||
| 692 | quiet = 0; | ||
| 693 | continue; | ||
| 694 | } | ||
| 695 | |||
| 696 | switch (ret) { | ||
| 697 | case SCANNED_GARBAGE: | ||
| 698 | dbg_err("garbage"); | ||
| 699 | goto corrupted; | ||
| 700 | case SCANNED_A_CORRUPT_NODE: | ||
| 701 | case SCANNED_A_BAD_PAD_NODE: | ||
| 702 | dbg_err("bad node"); | ||
| 703 | goto corrupted; | ||
| 704 | default: | ||
| 705 | dbg_err("unknown"); | ||
| 706 | err = -EINVAL; | 664 | err = -EINVAL; |
| 707 | goto error; | 665 | goto error; |
| 708 | } | 666 | } |
| 709 | } | 667 | } |
| 710 | 668 | ||
| 711 | if (!empty_chkd && !is_empty(buf, len)) { | 669 | if (ret == SCANNED_GARBAGE || ret == SCANNED_A_BAD_PAD_NODE) { |
| 712 | if (is_last_write(c, buf, offs)) { | 670 | if (!is_last_write(c, buf, offs)) |
| 713 | clean_buf(c, &buf, lnum, &offs, &len); | 671 | goto corrupted_rescan; |
| 714 | need_clean = 1; | 672 | } else if (ret == SCANNED_A_CORRUPT_NODE) { |
| 715 | } else { | 673 | if (!no_more_nodes(c, buf, len, lnum, offs)) |
| 674 | goto corrupted_rescan; | ||
| 675 | } else if (!is_empty(buf, len)) { | ||
| 676 | if (!is_last_write(c, buf, offs)) { | ||
| 716 | int corruption = first_non_ff(buf, len); | 677 | int corruption = first_non_ff(buf, len); |
| 717 | 678 | ||
| 718 | /* | 679 | /* |
| @@ -728,29 +689,82 @@ struct ubifs_scan_leb *ubifs_recover_leb(struct ubifs_info *c, int lnum, | |||
| 728 | } | 689 | } |
| 729 | } | 690 | } |
| 730 | 691 | ||
| 731 | /* Drop nodes from incomplete group */ | 692 | min_io_unit = round_down(offs, c->min_io_size); |
| 732 | if (grouped && drop_incomplete_group(sleb, &offs)) { | 693 | if (grouped) |
| 733 | buf = sbuf + offs; | 694 | /* |
| 734 | len = c->leb_size - offs; | 695 | * If nodes are grouped, always drop the incomplete group at |
| 735 | clean_buf(c, &buf, lnum, &offs, &len); | 696 | * the end. |
| 736 | need_clean = 1; | 697 | */ |
| 737 | } | 698 | drop_last_node(sleb, &offs, 1); |
| 738 | 699 | ||
| 739 | if (offs % c->min_io_size) { | 700 | /* |
| 740 | clean_buf(c, &buf, lnum, &offs, &len); | 701 | * While we are in the middle of the same min. I/O unit keep dropping |
| 741 | need_clean = 1; | 702 | * nodes. So basically, what we want is to make sure that the last min. |
| 742 | } | 703 | * I/O unit where we saw the corruption is dropped completely with all |
| 704 | * the uncorrupted node which may possibly sit there. | ||
| 705 | * | ||
| 706 | * In other words, let's name the min. I/O unit where the corruption | ||
| 707 | * starts B, and the previous min. I/O unit A. The below code tries to | ||
| 708 | * deal with a situation when half of B contains valid nodes or the end | ||
| 709 | * of a valid node, and the second half of B contains corrupted data or | ||
| 710 | * garbage. This means that UBIFS had been writing to B just before the | ||
| 711 | * power cut happened. I do not know how realistic is this scenario | ||
| 712 | * that half of the min. I/O unit had been written successfully and the | ||
| 713 | * other half not, but this is possible in our 'failure mode emulation' | ||
| 714 | * infrastructure at least. | ||
| 715 | * | ||
| 716 | * So what is the problem, why we need to drop those nodes? Whey can't | ||
| 717 | * we just clean-up the second half of B by putting a padding node | ||
| 718 | * there? We can, and this works fine with one exception which was | ||
| 719 | * reproduced with power cut emulation testing and happens extremely | ||
| 720 | * rarely. The description follows, but it is worth noting that that is | ||
| 721 | * only about the GC head, so we could do this trick only if the bud | ||
| 722 | * belongs to the GC head, but it does not seem to be worth an | ||
| 723 | * additional "if" statement. | ||
| 724 | * | ||
| 725 | * So, imagine the file-system is full, we run GC which is moving valid | ||
| 726 | * nodes from LEB X to LEB Y (obviously, LEB Y is the current GC head | ||
| 727 | * LEB). The @c->gc_lnum is -1, which means that GC will retain LEB X | ||
| 728 | * and will try to continue. Imagine that LEB X is currently the | ||
| 729 | * dirtiest LEB, and the amount of used space in LEB Y is exactly the | ||
| 730 | * same as amount of free space in LEB X. | ||
| 731 | * | ||
| 732 | * And a power cut happens when nodes are moved from LEB X to LEB Y. We | ||
| 733 | * are here trying to recover LEB Y which is the GC head LEB. We find | ||
| 734 | * the min. I/O unit B as described above. Then we clean-up LEB Y by | ||
| 735 | * padding min. I/O unit. And later 'ubifs_rcvry_gc_commit()' function | ||
| 736 | * fails, because it cannot find a dirty LEB which could be GC'd into | ||
| 737 | * LEB Y! Even LEB X does not match because the amount of valid nodes | ||
| 738 | * there does not fit the free space in LEB Y any more! And this is | ||
| 739 | * because of the padding node which we added to LEB Y. The | ||
| 740 | * user-visible effect of this which I once observed and analysed is | ||
| 741 | * that we cannot mount the file-system with -ENOSPC error. | ||
| 742 | * | ||
| 743 | * So obviously, to make sure that situation does not happen we should | ||
| 744 | * free min. I/O unit B in LEB Y completely and the last used min. I/O | ||
| 745 | * unit in LEB Y should be A. This is basically what the below code | ||
| 746 | * tries to do. | ||
| 747 | */ | ||
| 748 | while (min_io_unit == round_down(offs, c->min_io_size) && | ||
| 749 | min_io_unit != offs && | ||
| 750 | drop_last_node(sleb, &offs, grouped)); | ||
| 751 | |||
| 752 | buf = sbuf + offs; | ||
| 753 | len = c->leb_size - offs; | ||
| 743 | 754 | ||
| 755 | clean_buf(c, &buf, lnum, &offs, &len); | ||
| 744 | ubifs_end_scan(c, sleb, lnum, offs); | 756 | ubifs_end_scan(c, sleb, lnum, offs); |
| 745 | 757 | ||
| 746 | if (need_clean) { | 758 | err = fix_unclean_leb(c, sleb, start); |
| 747 | err = fix_unclean_leb(c, sleb, start); | 759 | if (err) |
| 748 | if (err) | 760 | goto error; |
| 749 | goto error; | ||
| 750 | } | ||
| 751 | 761 | ||
| 752 | return sleb; | 762 | return sleb; |
| 753 | 763 | ||
| 764 | corrupted_rescan: | ||
| 765 | /* Re-scan the corrupted data with verbose messages */ | ||
| 766 | dbg_err("corruptio %d", ret); | ||
| 767 | ubifs_scan_a_node(c, buf, len, lnum, offs, 1); | ||
| 754 | corrupted: | 768 | corrupted: |
| 755 | ubifs_scanned_corruption(c, lnum, offs, buf); | 769 | ubifs_scanned_corruption(c, lnum, offs, buf); |
| 756 | err = -EUCLEAN; | 770 | err = -EUCLEAN; |
| @@ -1070,6 +1084,53 @@ int ubifs_clean_lebs(const struct ubifs_info *c, void *sbuf) | |||
| 1070 | } | 1084 | } |
| 1071 | 1085 | ||
| 1072 | /** | 1086 | /** |
| 1087 | * grab_empty_leb - grab an empty LEB to use as GC LEB and run commit. | ||
| 1088 | * @c: UBIFS file-system description object | ||
| 1089 | * | ||
| 1090 | * This is a helper function for 'ubifs_rcvry_gc_commit()' which grabs an empty | ||
| 1091 | * LEB to be used as GC LEB (@c->gc_lnum), and then runs the commit. Returns | ||
| 1092 | * zero in case of success and a negative error code in case of failure. | ||
| 1093 | */ | ||
| 1094 | static int grab_empty_leb(struct ubifs_info *c) | ||
| 1095 | { | ||
| 1096 | int lnum, err; | ||
| 1097 | |||
| 1098 | /* | ||
| 1099 | * Note, it is very important to first search for an empty LEB and then | ||
| 1100 | * run the commit, not vice-versa. The reason is that there might be | ||
| 1101 | * only one empty LEB at the moment, the one which has been the | ||
| 1102 | * @c->gc_lnum just before the power cut happened. During the regular | ||
| 1103 | * UBIFS operation (not now) @c->gc_lnum is marked as "taken", so no | ||
| 1104 | * one but GC can grab it. But at this moment this single empty LEB is | ||
| 1105 | * not marked as taken, so if we run commit - what happens? Right, the | ||
| 1106 | * commit will grab it and write the index there. Remember that the | ||
| 1107 | * index always expands as long as there is free space, and it only | ||
| 1108 | * starts consolidating when we run out of space. | ||
| 1109 | * | ||
| 1110 | * IOW, if we run commit now, we might not be able to find a free LEB | ||
| 1111 | * after this. | ||
| 1112 | */ | ||
| 1113 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 1114 | if (lnum < 0) { | ||
| 1115 | dbg_err("could not find an empty LEB"); | ||
| 1116 | dbg_dump_lprops(c); | ||
| 1117 | dbg_dump_budg(c, &c->bi); | ||
| 1118 | return lnum; | ||
| 1119 | } | ||
| 1120 | |||
| 1121 | /* Reset the index flag */ | ||
| 1122 | err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 1123 | LPROPS_INDEX, 0); | ||
| 1124 | if (err) | ||
| 1125 | return err; | ||
| 1126 | |||
| 1127 | c->gc_lnum = lnum; | ||
| 1128 | dbg_rcvry("found empty LEB %d, run commit", lnum); | ||
| 1129 | |||
| 1130 | return ubifs_run_commit(c); | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | /** | ||
| 1073 | * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. | 1134 | * ubifs_rcvry_gc_commit - recover the GC LEB number and run the commit. |
| 1074 | * @c: UBIFS file-system description object | 1135 | * @c: UBIFS file-system description object |
| 1075 | * | 1136 | * |
| @@ -1091,71 +1152,26 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1091 | { | 1152 | { |
| 1092 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | 1153 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; |
| 1093 | struct ubifs_lprops lp; | 1154 | struct ubifs_lprops lp; |
| 1094 | int lnum, err; | 1155 | int err; |
| 1156 | |||
| 1157 | dbg_rcvry("GC head LEB %d, offs %d", wbuf->lnum, wbuf->offs); | ||
| 1095 | 1158 | ||
| 1096 | c->gc_lnum = -1; | 1159 | c->gc_lnum = -1; |
| 1097 | if (wbuf->lnum == -1) { | 1160 | if (wbuf->lnum == -1 || wbuf->offs == c->leb_size) |
| 1098 | dbg_rcvry("no GC head LEB"); | 1161 | return grab_empty_leb(c); |
| 1099 | goto find_free; | 1162 | |
| 1100 | } | ||
| 1101 | /* | ||
| 1102 | * See whether the used space in the dirtiest LEB fits in the GC head | ||
| 1103 | * LEB. | ||
| 1104 | */ | ||
| 1105 | if (wbuf->offs == c->leb_size) { | ||
| 1106 | dbg_rcvry("no room in GC head LEB"); | ||
| 1107 | goto find_free; | ||
| 1108 | } | ||
| 1109 | err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); | 1163 | err = ubifs_find_dirty_leb(c, &lp, wbuf->offs, 2); |
| 1110 | if (err) { | 1164 | if (err) { |
| 1111 | /* | 1165 | if (err != -ENOSPC) |
| 1112 | * There are no dirty or empty LEBs subject to here being | ||
| 1113 | * enough for the index. Try to use | ||
| 1114 | * 'ubifs_find_free_leb_for_idx()', which will return any empty | ||
| 1115 | * LEBs (ignoring index requirements). If the index then | ||
| 1116 | * doesn't have enough LEBs the recovery commit will fail - | ||
| 1117 | * which is the same result anyway i.e. recovery fails. So | ||
| 1118 | * there is no problem ignoring index requirements and just | ||
| 1119 | * grabbing a free LEB since we have already established there | ||
| 1120 | * is not a dirty LEB we could have used instead. | ||
| 1121 | */ | ||
| 1122 | if (err == -ENOSPC) { | ||
| 1123 | dbg_rcvry("could not find a dirty LEB"); | ||
| 1124 | goto find_free; | ||
| 1125 | } | ||
| 1126 | return err; | ||
| 1127 | } | ||
| 1128 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 1129 | lnum = lp.lnum; | ||
| 1130 | if (lp.free + lp.dirty == c->leb_size) { | ||
| 1131 | /* An empty LEB was returned */ | ||
| 1132 | if (lp.free != c->leb_size) { | ||
| 1133 | err = ubifs_change_one_lp(c, lnum, c->leb_size, | ||
| 1134 | 0, 0, 0, 0); | ||
| 1135 | if (err) | ||
| 1136 | return err; | ||
| 1137 | } | ||
| 1138 | err = ubifs_leb_unmap(c, lnum); | ||
| 1139 | if (err) | ||
| 1140 | return err; | 1166 | return err; |
| 1141 | c->gc_lnum = lnum; | 1167 | |
| 1142 | dbg_rcvry("allocated LEB %d for GC", lnum); | 1168 | dbg_rcvry("could not find a dirty LEB"); |
| 1143 | /* Run the commit */ | 1169 | return grab_empty_leb(c); |
| 1144 | dbg_rcvry("committing"); | ||
| 1145 | return ubifs_run_commit(c); | ||
| 1146 | } | ||
| 1147 | /* | ||
| 1148 | * There was no empty LEB so the used space in the dirtiest LEB must fit | ||
| 1149 | * in the GC head LEB. | ||
| 1150 | */ | ||
| 1151 | if (lp.free + lp.dirty < wbuf->offs) { | ||
| 1152 | dbg_rcvry("LEB %d doesn't fit in GC head LEB %d:%d", | ||
| 1153 | lnum, wbuf->lnum, wbuf->offs); | ||
| 1154 | err = ubifs_return_leb(c, lnum); | ||
| 1155 | if (err) | ||
| 1156 | return err; | ||
| 1157 | goto find_free; | ||
| 1158 | } | 1170 | } |
| 1171 | |||
| 1172 | ubifs_assert(!(lp.flags & LPROPS_INDEX)); | ||
| 1173 | ubifs_assert(lp.free + lp.dirty >= wbuf->offs); | ||
| 1174 | |||
| 1159 | /* | 1175 | /* |
| 1160 | * We run the commit before garbage collection otherwise subsequent | 1176 | * We run the commit before garbage collection otherwise subsequent |
| 1161 | * mounts will see the GC and orphan deletion in a different order. | 1177 | * mounts will see the GC and orphan deletion in a different order. |
| @@ -1164,11 +1180,8 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1164 | err = ubifs_run_commit(c); | 1180 | err = ubifs_run_commit(c); |
| 1165 | if (err) | 1181 | if (err) |
| 1166 | return err; | 1182 | return err; |
| 1167 | /* | 1183 | |
| 1168 | * The data in the dirtiest LEB fits in the GC head LEB, so do the GC | 1184 | dbg_rcvry("GC'ing LEB %d", lp.lnum); |
| 1169 | * - use locking to keep 'ubifs_assert()' happy. | ||
| 1170 | */ | ||
| 1171 | dbg_rcvry("GC'ing LEB %d", lnum); | ||
| 1172 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); | 1185 | mutex_lock_nested(&wbuf->io_mutex, wbuf->jhead); |
| 1173 | err = ubifs_garbage_collect_leb(c, &lp); | 1186 | err = ubifs_garbage_collect_leb(c, &lp); |
| 1174 | if (err >= 0) { | 1187 | if (err >= 0) { |
| @@ -1184,37 +1197,17 @@ int ubifs_rcvry_gc_commit(struct ubifs_info *c) | |||
| 1184 | err = -EINVAL; | 1197 | err = -EINVAL; |
| 1185 | return err; | 1198 | return err; |
| 1186 | } | 1199 | } |
| 1187 | if (err != LEB_RETAINED) { | 1200 | |
| 1188 | dbg_err("GC returned %d", err); | 1201 | ubifs_assert(err == LEB_RETAINED); |
| 1202 | if (err != LEB_RETAINED) | ||
| 1189 | return -EINVAL; | 1203 | return -EINVAL; |
| 1190 | } | 1204 | |
| 1191 | err = ubifs_leb_unmap(c, c->gc_lnum); | 1205 | err = ubifs_leb_unmap(c, c->gc_lnum); |
| 1192 | if (err) | 1206 | if (err) |
| 1193 | return err; | 1207 | return err; |
| 1194 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1195 | return 0; | ||
| 1196 | 1208 | ||
| 1197 | find_free: | 1209 | dbg_rcvry("allocated LEB %d for GC", lp.lnum); |
| 1198 | /* | 1210 | return 0; |
| 1199 | * There is no GC head LEB or the free space in the GC head LEB is too | ||
| 1200 | * small, or there are not dirty LEBs. Allocate gc_lnum by calling | ||
| 1201 | * 'ubifs_find_free_leb_for_idx()' so GC is not run. | ||
| 1202 | */ | ||
| 1203 | lnum = ubifs_find_free_leb_for_idx(c); | ||
| 1204 | if (lnum < 0) { | ||
| 1205 | dbg_err("could not find an empty LEB"); | ||
| 1206 | return lnum; | ||
| 1207 | } | ||
| 1208 | /* And reset the index flag */ | ||
| 1209 | err = ubifs_change_one_lp(c, lnum, LPROPS_NC, LPROPS_NC, 0, | ||
| 1210 | LPROPS_INDEX, 0); | ||
| 1211 | if (err) | ||
| 1212 | return err; | ||
| 1213 | c->gc_lnum = lnum; | ||
| 1214 | dbg_rcvry("allocated LEB %d for GC", lnum); | ||
| 1215 | /* Run the commit */ | ||
| 1216 | dbg_rcvry("committing"); | ||
| 1217 | return ubifs_run_commit(c); | ||
| 1218 | } | 1211 | } |
| 1219 | 1212 | ||
| 1220 | /** | 1213 | /** |
| @@ -1456,7 +1449,7 @@ static int fix_size_in_place(struct ubifs_info *c, struct size_entry *e) | |||
| 1456 | err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); | 1449 | err = ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); |
| 1457 | if (err) | 1450 | if (err) |
| 1458 | goto out; | 1451 | goto out; |
| 1459 | dbg_rcvry("inode %lu at %d:%d size %lld -> %lld ", | 1452 | dbg_rcvry("inode %lu at %d:%d size %lld -> %lld", |
| 1460 | (unsigned long)e->inum, lnum, offs, i_size, e->d_size); | 1453 | (unsigned long)e->inum, lnum, offs, i_size, e->d_size); |
| 1461 | return 0; | 1454 | return 0; |
| 1462 | 1455 | ||
| @@ -1505,20 +1498,27 @@ int ubifs_recover_size(struct ubifs_info *c) | |||
| 1505 | e->i_size = le64_to_cpu(ino->size); | 1498 | e->i_size = le64_to_cpu(ino->size); |
| 1506 | } | 1499 | } |
| 1507 | } | 1500 | } |
| 1501 | |||
| 1508 | if (e->exists && e->i_size < e->d_size) { | 1502 | if (e->exists && e->i_size < e->d_size) { |
| 1509 | if (!e->inode && c->ro_mount) { | 1503 | if (c->ro_mount) { |
| 1510 | /* Fix the inode size and pin it in memory */ | 1504 | /* Fix the inode size and pin it in memory */ |
| 1511 | struct inode *inode; | 1505 | struct inode *inode; |
| 1506 | struct ubifs_inode *ui; | ||
| 1507 | |||
| 1508 | ubifs_assert(!e->inode); | ||
| 1512 | 1509 | ||
| 1513 | inode = ubifs_iget(c->vfs_sb, e->inum); | 1510 | inode = ubifs_iget(c->vfs_sb, e->inum); |
| 1514 | if (IS_ERR(inode)) | 1511 | if (IS_ERR(inode)) |
| 1515 | return PTR_ERR(inode); | 1512 | return PTR_ERR(inode); |
| 1513 | |||
| 1514 | ui = ubifs_inode(inode); | ||
| 1516 | if (inode->i_size < e->d_size) { | 1515 | if (inode->i_size < e->d_size) { |
| 1517 | dbg_rcvry("ino %lu size %lld -> %lld", | 1516 | dbg_rcvry("ino %lu size %lld -> %lld", |
| 1518 | (unsigned long)e->inum, | 1517 | (unsigned long)e->inum, |
| 1519 | e->d_size, inode->i_size); | 1518 | inode->i_size, e->d_size); |
| 1520 | inode->i_size = e->d_size; | 1519 | inode->i_size = e->d_size; |
| 1521 | ubifs_inode(inode)->ui_size = e->d_size; | 1520 | ui->ui_size = e->d_size; |
| 1521 | ui->synced_i_size = e->d_size; | ||
| 1522 | e->inode = inode; | 1522 | e->inode = inode; |
| 1523 | this = rb_next(this); | 1523 | this = rb_next(this); |
| 1524 | continue; | 1524 | continue; |
| @@ -1533,9 +1533,11 @@ int ubifs_recover_size(struct ubifs_info *c) | |||
| 1533 | iput(e->inode); | 1533 | iput(e->inode); |
| 1534 | } | 1534 | } |
| 1535 | } | 1535 | } |
| 1536 | |||
| 1536 | this = rb_next(this); | 1537 | this = rb_next(this); |
| 1537 | rb_erase(&e->rb, &c->size_tree); | 1538 | rb_erase(&e->rb, &c->size_tree); |
| 1538 | kfree(e); | 1539 | kfree(e); |
| 1539 | } | 1540 | } |
| 1541 | |||
| 1540 | return 0; | 1542 | return 0; |
| 1541 | } | 1543 | } |
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index d3d6d365bfc1..6617280d1679 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c | |||
| @@ -33,44 +33,32 @@ | |||
| 33 | */ | 33 | */ |
| 34 | 34 | ||
| 35 | #include "ubifs.h" | 35 | #include "ubifs.h" |
| 36 | 36 | #include <linux/list_sort.h> | |
| 37 | /* | ||
| 38 | * Replay flags. | ||
| 39 | * | ||
| 40 | * REPLAY_DELETION: node was deleted | ||
| 41 | * REPLAY_REF: node is a reference node | ||
| 42 | */ | ||
| 43 | enum { | ||
| 44 | REPLAY_DELETION = 1, | ||
| 45 | REPLAY_REF = 2, | ||
| 46 | }; | ||
| 47 | 37 | ||
| 48 | /** | 38 | /** |
| 49 | * struct replay_entry - replay tree entry. | 39 | * struct replay_entry - replay list entry. |
| 50 | * @lnum: logical eraseblock number of the node | 40 | * @lnum: logical eraseblock number of the node |
| 51 | * @offs: node offset | 41 | * @offs: node offset |
| 52 | * @len: node length | 42 | * @len: node length |
| 43 | * @deletion: non-zero if this entry corresponds to a node deletion | ||
| 53 | * @sqnum: node sequence number | 44 | * @sqnum: node sequence number |
| 54 | * @flags: replay flags | 45 | * @list: links the replay list |
| 55 | * @rb: links the replay tree | ||
| 56 | * @key: node key | 46 | * @key: node key |
| 57 | * @nm: directory entry name | 47 | * @nm: directory entry name |
| 58 | * @old_size: truncation old size | 48 | * @old_size: truncation old size |
| 59 | * @new_size: truncation new size | 49 | * @new_size: truncation new size |
| 60 | * @free: amount of free space in a bud | ||
| 61 | * @dirty: amount of dirty space in a bud from padding and deletion nodes | ||
| 62 | * @jhead: journal head number of the bud | ||
| 63 | * | 50 | * |
| 64 | * UBIFS journal replay must compare node sequence numbers, which means it must | 51 | * The replay process first scans all buds and builds the replay list, then |
| 65 | * build a tree of node information to insert into the TNC. | 52 | * sorts the replay list in nodes sequence number order, and then inserts all |
| 53 | * the replay entries to the TNC. | ||
| 66 | */ | 54 | */ |
| 67 | struct replay_entry { | 55 | struct replay_entry { |
| 68 | int lnum; | 56 | int lnum; |
| 69 | int offs; | 57 | int offs; |
| 70 | int len; | 58 | int len; |
| 59 | unsigned int deletion:1; | ||
| 71 | unsigned long long sqnum; | 60 | unsigned long long sqnum; |
| 72 | int flags; | 61 | struct list_head list; |
| 73 | struct rb_node rb; | ||
| 74 | union ubifs_key key; | 62 | union ubifs_key key; |
| 75 | union { | 63 | union { |
| 76 | struct qstr nm; | 64 | struct qstr nm; |
| @@ -78,11 +66,6 @@ struct replay_entry { | |||
| 78 | loff_t old_size; | 66 | loff_t old_size; |
| 79 | loff_t new_size; | 67 | loff_t new_size; |
| 80 | }; | 68 | }; |
| 81 | struct { | ||
| 82 | int free; | ||
| 83 | int dirty; | ||
| 84 | int jhead; | ||
| 85 | }; | ||
| 86 | }; | 69 | }; |
| 87 | }; | 70 | }; |
| 88 | 71 | ||
| @@ -90,57 +73,64 @@ struct replay_entry { | |||
| 90 | * struct bud_entry - entry in the list of buds to replay. | 73 | * struct bud_entry - entry in the list of buds to replay. |
| 91 | * @list: next bud in the list | 74 | * @list: next bud in the list |
| 92 | * @bud: bud description object | 75 | * @bud: bud description object |
| 93 | * @free: free bytes in the bud | ||
| 94 | * @sqnum: reference node sequence number | 76 | * @sqnum: reference node sequence number |
| 77 | * @free: free bytes in the bud | ||
| 78 | * @dirty: dirty bytes in the bud | ||
| 95 | */ | 79 | */ |
| 96 | struct bud_entry { | 80 | struct bud_entry { |
| 97 | struct list_head list; | 81 | struct list_head list; |
| 98 | struct ubifs_bud *bud; | 82 | struct ubifs_bud *bud; |
| 99 | int free; | ||
| 100 | unsigned long long sqnum; | 83 | unsigned long long sqnum; |
| 84 | int free; | ||
| 85 | int dirty; | ||
| 101 | }; | 86 | }; |
| 102 | 87 | ||
| 103 | /** | 88 | /** |
| 104 | * set_bud_lprops - set free and dirty space used by a bud. | 89 | * set_bud_lprops - set free and dirty space used by a bud. |
| 105 | * @c: UBIFS file-system description object | 90 | * @c: UBIFS file-system description object |
| 106 | * @r: replay entry of bud | 91 | * @b: bud entry which describes the bud |
| 92 | * | ||
| 93 | * This function makes sure the LEB properties of bud @b are set correctly | ||
| 94 | * after the replay. Returns zero in case of success and a negative error code | ||
| 95 | * in case of failure. | ||
| 107 | */ | 96 | */ |
| 108 | static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | 97 | static int set_bud_lprops(struct ubifs_info *c, struct bud_entry *b) |
| 109 | { | 98 | { |
| 110 | const struct ubifs_lprops *lp; | 99 | const struct ubifs_lprops *lp; |
| 111 | int err = 0, dirty; | 100 | int err = 0, dirty; |
| 112 | 101 | ||
| 113 | ubifs_get_lprops(c); | 102 | ubifs_get_lprops(c); |
| 114 | 103 | ||
| 115 | lp = ubifs_lpt_lookup_dirty(c, r->lnum); | 104 | lp = ubifs_lpt_lookup_dirty(c, b->bud->lnum); |
| 116 | if (IS_ERR(lp)) { | 105 | if (IS_ERR(lp)) { |
| 117 | err = PTR_ERR(lp); | 106 | err = PTR_ERR(lp); |
| 118 | goto out; | 107 | goto out; |
| 119 | } | 108 | } |
| 120 | 109 | ||
| 121 | dirty = lp->dirty; | 110 | dirty = lp->dirty; |
| 122 | if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { | 111 | if (b->bud->start == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { |
| 123 | /* | 112 | /* |
| 124 | * The LEB was added to the journal with a starting offset of | 113 | * The LEB was added to the journal with a starting offset of |
| 125 | * zero which means the LEB must have been empty. The LEB | 114 | * zero which means the LEB must have been empty. The LEB |
| 126 | * property values should be lp->free == c->leb_size and | 115 | * property values should be @lp->free == @c->leb_size and |
| 127 | * lp->dirty == 0, but that is not the case. The reason is that | 116 | * @lp->dirty == 0, but that is not the case. The reason is that |
| 128 | * the LEB was garbage collected. The garbage collector resets | 117 | * the LEB had been garbage collected before it became the bud, |
| 129 | * the free and dirty space without recording it anywhere except | 118 | * and there was not commit inbetween. The garbage collector |
| 130 | * lprops, so if there is not a commit then lprops does not have | 119 | * resets the free and dirty space without recording it |
| 131 | * that information next time the file system is mounted. | 120 | * anywhere except lprops, so if there was no commit then |
| 121 | * lprops does not have that information. | ||
| 132 | * | 122 | * |
| 133 | * We do not need to adjust free space because the scan has told | 123 | * We do not need to adjust free space because the scan has told |
| 134 | * us the exact value which is recorded in the replay entry as | 124 | * us the exact value which is recorded in the replay entry as |
| 135 | * r->free. | 125 | * @b->free. |
| 136 | * | 126 | * |
| 137 | * However we do need to subtract from the dirty space the | 127 | * However we do need to subtract from the dirty space the |
| 138 | * amount of space that the garbage collector reclaimed, which | 128 | * amount of space that the garbage collector reclaimed, which |
| 139 | * is the whole LEB minus the amount of space that was free. | 129 | * is the whole LEB minus the amount of space that was free. |
| 140 | */ | 130 | */ |
| 141 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | 131 | dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, |
| 142 | lp->free, lp->dirty); | 132 | lp->free, lp->dirty); |
| 143 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, | 133 | dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", b->bud->lnum, |
| 144 | lp->free, lp->dirty); | 134 | lp->free, lp->dirty); |
| 145 | dirty -= c->leb_size - lp->free; | 135 | dirty -= c->leb_size - lp->free; |
| 146 | /* | 136 | /* |
| @@ -152,10 +142,10 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
| 152 | */ | 142 | */ |
| 153 | if (dirty != 0) | 143 | if (dirty != 0) |
| 154 | dbg_msg("LEB %d lp: %d free %d dirty " | 144 | dbg_msg("LEB %d lp: %d free %d dirty " |
| 155 | "replay: %d free %d dirty", r->lnum, lp->free, | 145 | "replay: %d free %d dirty", b->bud->lnum, |
| 156 | lp->dirty, r->free, r->dirty); | 146 | lp->free, lp->dirty, b->free, b->dirty); |
| 157 | } | 147 | } |
| 158 | lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, | 148 | lp = ubifs_change_lp(c, lp, b->free, dirty + b->dirty, |
| 159 | lp->flags | LPROPS_TAKEN, 0); | 149 | lp->flags | LPROPS_TAKEN, 0); |
| 160 | if (IS_ERR(lp)) { | 150 | if (IS_ERR(lp)) { |
| 161 | err = PTR_ERR(lp); | 151 | err = PTR_ERR(lp); |
| @@ -163,8 +153,9 @@ static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) | |||
| 163 | } | 153 | } |
| 164 | 154 | ||
| 165 | /* Make sure the journal head points to the latest bud */ | 155 | /* Make sure the journal head points to the latest bud */ |
| 166 | err = ubifs_wbuf_seek_nolock(&c->jheads[r->jhead].wbuf, r->lnum, | 156 | err = ubifs_wbuf_seek_nolock(&c->jheads[b->bud->jhead].wbuf, |
| 167 | c->leb_size - r->free, UBI_SHORTTERM); | 157 | b->bud->lnum, c->leb_size - b->free, |
| 158 | UBI_SHORTTERM); | ||
| 168 | 159 | ||
| 169 | out: | 160 | out: |
| 170 | ubifs_release_lprops(c); | 161 | ubifs_release_lprops(c); |
| @@ -172,6 +163,27 @@ out: | |||
| 172 | } | 163 | } |
| 173 | 164 | ||
| 174 | /** | 165 | /** |
| 166 | * set_buds_lprops - set free and dirty space for all replayed buds. | ||
| 167 | * @c: UBIFS file-system description object | ||
| 168 | * | ||
| 169 | * This function sets LEB properties for all replayed buds. Returns zero in | ||
| 170 | * case of success and a negative error code in case of failure. | ||
| 171 | */ | ||
| 172 | static int set_buds_lprops(struct ubifs_info *c) | ||
| 173 | { | ||
| 174 | struct bud_entry *b; | ||
| 175 | int err; | ||
| 176 | |||
| 177 | list_for_each_entry(b, &c->replay_buds, list) { | ||
| 178 | err = set_bud_lprops(c, b); | ||
| 179 | if (err) | ||
| 180 | return err; | ||
| 181 | } | ||
| 182 | |||
| 183 | return 0; | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 175 | * trun_remove_range - apply a replay entry for a truncation to the TNC. | 187 | * trun_remove_range - apply a replay entry for a truncation to the TNC. |
| 176 | * @c: UBIFS file-system description object | 188 | * @c: UBIFS file-system description object |
| 177 | * @r: replay entry of truncation | 189 | * @r: replay entry of truncation |
| @@ -207,24 +219,22 @@ static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) | |||
| 207 | */ | 219 | */ |
| 208 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | 220 | static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) |
| 209 | { | 221 | { |
| 210 | int err, deletion = ((r->flags & REPLAY_DELETION) != 0); | 222 | int err; |
| 211 | 223 | ||
| 212 | dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, | 224 | dbg_mnt("LEB %d:%d len %d deletion %d sqnum %llu %s", r->lnum, |
| 213 | r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); | 225 | r->offs, r->len, r->deletion, r->sqnum, DBGKEY(&r->key)); |
| 214 | 226 | ||
| 215 | /* Set c->replay_sqnum to help deal with dangling branches. */ | 227 | /* Set c->replay_sqnum to help deal with dangling branches. */ |
| 216 | c->replay_sqnum = r->sqnum; | 228 | c->replay_sqnum = r->sqnum; |
| 217 | 229 | ||
| 218 | if (r->flags & REPLAY_REF) | 230 | if (is_hash_key(c, &r->key)) { |
| 219 | err = set_bud_lprops(c, r); | 231 | if (r->deletion) |
| 220 | else if (is_hash_key(c, &r->key)) { | ||
| 221 | if (deletion) | ||
| 222 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); | 232 | err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); |
| 223 | else | 233 | else |
| 224 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, | 234 | err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, |
| 225 | r->len, &r->nm); | 235 | r->len, &r->nm); |
| 226 | } else { | 236 | } else { |
| 227 | if (deletion) | 237 | if (r->deletion) |
| 228 | switch (key_type(c, &r->key)) { | 238 | switch (key_type(c, &r->key)) { |
| 229 | case UBIFS_INO_KEY: | 239 | case UBIFS_INO_KEY: |
| 230 | { | 240 | { |
| @@ -247,7 +257,7 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
| 247 | return err; | 257 | return err; |
| 248 | 258 | ||
| 249 | if (c->need_recovery) | 259 | if (c->need_recovery) |
| 250 | err = ubifs_recover_size_accum(c, &r->key, deletion, | 260 | err = ubifs_recover_size_accum(c, &r->key, r->deletion, |
| 251 | r->new_size); | 261 | r->new_size); |
| 252 | } | 262 | } |
| 253 | 263 | ||
| @@ -255,68 +265,77 @@ static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) | |||
| 255 | } | 265 | } |
| 256 | 266 | ||
| 257 | /** | 267 | /** |
| 258 | * destroy_replay_tree - destroy the replay. | 268 | * replay_entries_cmp - compare 2 replay entries. |
| 259 | * @c: UBIFS file-system description object | 269 | * @priv: UBIFS file-system description object |
| 270 | * @a: first replay entry | ||
| 271 | * @a: second replay entry | ||
| 260 | * | 272 | * |
| 261 | * Destroy the replay tree. | 273 | * This is a comparios function for 'list_sort()' which compares 2 replay |
| 274 | * entries @a and @b by comparing their sequence numer. Returns %1 if @a has | ||
| 275 | * greater sequence number and %-1 otherwise. | ||
| 262 | */ | 276 | */ |
| 263 | static void destroy_replay_tree(struct ubifs_info *c) | 277 | static int replay_entries_cmp(void *priv, struct list_head *a, |
| 278 | struct list_head *b) | ||
| 264 | { | 279 | { |
| 265 | struct rb_node *this = c->replay_tree.rb_node; | 280 | struct replay_entry *ra, *rb; |
| 266 | struct replay_entry *r; | 281 | |
| 267 | 282 | cond_resched(); | |
| 268 | while (this) { | 283 | if (a == b) |
| 269 | if (this->rb_left) { | 284 | return 0; |
| 270 | this = this->rb_left; | 285 | |
| 271 | continue; | 286 | ra = list_entry(a, struct replay_entry, list); |
| 272 | } else if (this->rb_right) { | 287 | rb = list_entry(b, struct replay_entry, list); |
| 273 | this = this->rb_right; | 288 | ubifs_assert(ra->sqnum != rb->sqnum); |
| 274 | continue; | 289 | if (ra->sqnum > rb->sqnum) |
| 275 | } | 290 | return 1; |
| 276 | r = rb_entry(this, struct replay_entry, rb); | 291 | return -1; |
| 277 | this = rb_parent(this); | ||
| 278 | if (this) { | ||
| 279 | if (this->rb_left == &r->rb) | ||
| 280 | this->rb_left = NULL; | ||
| 281 | else | ||
| 282 | this->rb_right = NULL; | ||
| 283 | } | ||
| 284 | if (is_hash_key(c, &r->key)) | ||
| 285 | kfree(r->nm.name); | ||
| 286 | kfree(r); | ||
| 287 | } | ||
| 288 | c->replay_tree = RB_ROOT; | ||
| 289 | } | 292 | } |
| 290 | 293 | ||
| 291 | /** | 294 | /** |
| 292 | * apply_replay_tree - apply the replay tree to the TNC. | 295 | * apply_replay_list - apply the replay list to the TNC. |
| 293 | * @c: UBIFS file-system description object | 296 | * @c: UBIFS file-system description object |
| 294 | * | 297 | * |
| 295 | * Apply the replay tree. | 298 | * Apply all entries in the replay list to the TNC. Returns zero in case of |
| 296 | * Returns zero in case of success and a negative error code in case of | 299 | * success and a negative error code in case of failure. |
| 297 | * failure. | ||
| 298 | */ | 300 | */ |
| 299 | static int apply_replay_tree(struct ubifs_info *c) | 301 | static int apply_replay_list(struct ubifs_info *c) |
| 300 | { | 302 | { |
| 301 | struct rb_node *this = rb_first(&c->replay_tree); | 303 | struct replay_entry *r; |
| 304 | int err; | ||
| 302 | 305 | ||
| 303 | while (this) { | 306 | list_sort(c, &c->replay_list, &replay_entries_cmp); |
| 304 | struct replay_entry *r; | ||
| 305 | int err; | ||
| 306 | 307 | ||
| 308 | list_for_each_entry(r, &c->replay_list, list) { | ||
| 307 | cond_resched(); | 309 | cond_resched(); |
| 308 | 310 | ||
| 309 | r = rb_entry(this, struct replay_entry, rb); | ||
| 310 | err = apply_replay_entry(c, r); | 311 | err = apply_replay_entry(c, r); |
| 311 | if (err) | 312 | if (err) |
| 312 | return err; | 313 | return err; |
| 313 | this = rb_next(this); | ||
| 314 | } | 314 | } |
| 315 | |||
| 315 | return 0; | 316 | return 0; |
| 316 | } | 317 | } |
| 317 | 318 | ||
| 318 | /** | 319 | /** |
| 319 | * insert_node - insert a node to the replay tree. | 320 | * destroy_replay_list - destroy the replay. |
| 321 | * @c: UBIFS file-system description object | ||
| 322 | * | ||
| 323 | * Destroy the replay list. | ||
| 324 | */ | ||
| 325 | static void destroy_replay_list(struct ubifs_info *c) | ||
| 326 | { | ||
| 327 | struct replay_entry *r, *tmp; | ||
| 328 | |||
| 329 | list_for_each_entry_safe(r, tmp, &c->replay_list, list) { | ||
| 330 | if (is_hash_key(c, &r->key)) | ||
| 331 | kfree(r->nm.name); | ||
| 332 | list_del(&r->list); | ||
| 333 | kfree(r); | ||
| 334 | } | ||
| 335 | } | ||
| 336 | |||
| 337 | /** | ||
| 338 | * insert_node - insert a node to the replay list | ||
| 320 | * @c: UBIFS file-system description object | 339 | * @c: UBIFS file-system description object |
| 321 | * @lnum: node logical eraseblock number | 340 | * @lnum: node logical eraseblock number |
| 322 | * @offs: node offset | 341 | * @offs: node offset |
| @@ -328,39 +347,25 @@ static int apply_replay_tree(struct ubifs_info *c) | |||
| 328 | * @old_size: truncation old size | 347 | * @old_size: truncation old size |
| 329 | * @new_size: truncation new size | 348 | * @new_size: truncation new size |
| 330 | * | 349 | * |
| 331 | * This function inserts a scanned non-direntry node to the replay tree. The | 350 | * This function inserts a scanned non-direntry node to the replay list. The |
| 332 | * replay tree is an RB-tree containing @struct replay_entry elements which are | 351 | * replay list contains @struct replay_entry elements, and we sort this list in |
| 333 | * indexed by the sequence number. The replay tree is applied at the very end | 352 | * sequence number order before applying it. The replay list is applied at the |
| 334 | * of the replay process. Since the tree is sorted in sequence number order, | 353 | * very end of the replay process. Since the list is sorted in sequence number |
| 335 | * the older modifications are applied first. This function returns zero in | 354 | * order, the older modifications are applied first. This function returns zero |
| 336 | * case of success and a negative error code in case of failure. | 355 | * in case of success and a negative error code in case of failure. |
| 337 | */ | 356 | */ |
| 338 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | 357 | static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, |
| 339 | union ubifs_key *key, unsigned long long sqnum, | 358 | union ubifs_key *key, unsigned long long sqnum, |
| 340 | int deletion, int *used, loff_t old_size, | 359 | int deletion, int *used, loff_t old_size, |
| 341 | loff_t new_size) | 360 | loff_t new_size) |
| 342 | { | 361 | { |
| 343 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 344 | struct replay_entry *r; | 362 | struct replay_entry *r; |
| 345 | 363 | ||
| 364 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 365 | |||
| 346 | if (key_inum(c, key) >= c->highest_inum) | 366 | if (key_inum(c, key) >= c->highest_inum) |
| 347 | c->highest_inum = key_inum(c, key); | 367 | c->highest_inum = key_inum(c, key); |
| 348 | 368 | ||
| 349 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 350 | while (*p) { | ||
| 351 | parent = *p; | ||
| 352 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 353 | if (sqnum < r->sqnum) { | ||
| 354 | p = &(*p)->rb_left; | ||
| 355 | continue; | ||
| 356 | } else if (sqnum > r->sqnum) { | ||
| 357 | p = &(*p)->rb_right; | ||
| 358 | continue; | ||
| 359 | } | ||
| 360 | ubifs_err("duplicate sqnum in replay"); | ||
| 361 | return -EINVAL; | ||
| 362 | } | ||
| 363 | |||
| 364 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 369 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
| 365 | if (!r) | 370 | if (!r) |
| 366 | return -ENOMEM; | 371 | return -ENOMEM; |
| @@ -370,19 +375,18 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 370 | r->lnum = lnum; | 375 | r->lnum = lnum; |
| 371 | r->offs = offs; | 376 | r->offs = offs; |
| 372 | r->len = len; | 377 | r->len = len; |
| 378 | r->deletion = !!deletion; | ||
| 373 | r->sqnum = sqnum; | 379 | r->sqnum = sqnum; |
| 374 | r->flags = (deletion ? REPLAY_DELETION : 0); | 380 | key_copy(c, key, &r->key); |
| 375 | r->old_size = old_size; | 381 | r->old_size = old_size; |
| 376 | r->new_size = new_size; | 382 | r->new_size = new_size; |
| 377 | key_copy(c, key, &r->key); | ||
| 378 | 383 | ||
| 379 | rb_link_node(&r->rb, parent, p); | 384 | list_add_tail(&r->list, &c->replay_list); |
| 380 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 381 | return 0; | 385 | return 0; |
| 382 | } | 386 | } |
| 383 | 387 | ||
| 384 | /** | 388 | /** |
| 385 | * insert_dent - insert a directory entry node into the replay tree. | 389 | * insert_dent - insert a directory entry node into the replay list. |
| 386 | * @c: UBIFS file-system description object | 390 | * @c: UBIFS file-system description object |
| 387 | * @lnum: node logical eraseblock number | 391 | * @lnum: node logical eraseblock number |
| 388 | * @offs: node offset | 392 | * @offs: node offset |
| @@ -394,43 +398,25 @@ static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 394 | * @deletion: non-zero if this is a deletion | 398 | * @deletion: non-zero if this is a deletion |
| 395 | * @used: number of bytes in use in a LEB | 399 | * @used: number of bytes in use in a LEB |
| 396 | * | 400 | * |
| 397 | * This function inserts a scanned directory entry node to the replay tree. | 401 | * This function inserts a scanned directory entry node or an extended |
| 398 | * Returns zero in case of success and a negative error code in case of | 402 | * attribute entry to the replay list. Returns zero in case of success and a |
| 399 | * failure. | 403 | * negative error code in case of failure. |
| 400 | * | ||
| 401 | * This function is also used for extended attribute entries because they are | ||
| 402 | * implemented as directory entry nodes. | ||
| 403 | */ | 404 | */ |
| 404 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | 405 | static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, |
| 405 | union ubifs_key *key, const char *name, int nlen, | 406 | union ubifs_key *key, const char *name, int nlen, |
| 406 | unsigned long long sqnum, int deletion, int *used) | 407 | unsigned long long sqnum, int deletion, int *used) |
| 407 | { | 408 | { |
| 408 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 409 | struct replay_entry *r; | 409 | struct replay_entry *r; |
| 410 | char *nbuf; | 410 | char *nbuf; |
| 411 | 411 | ||
| 412 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 412 | if (key_inum(c, key) >= c->highest_inum) | 413 | if (key_inum(c, key) >= c->highest_inum) |
| 413 | c->highest_inum = key_inum(c, key); | 414 | c->highest_inum = key_inum(c, key); |
| 414 | 415 | ||
| 415 | dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); | ||
| 416 | while (*p) { | ||
| 417 | parent = *p; | ||
| 418 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 419 | if (sqnum < r->sqnum) { | ||
| 420 | p = &(*p)->rb_left; | ||
| 421 | continue; | ||
| 422 | } | ||
| 423 | if (sqnum > r->sqnum) { | ||
| 424 | p = &(*p)->rb_right; | ||
| 425 | continue; | ||
| 426 | } | ||
| 427 | ubifs_err("duplicate sqnum in replay"); | ||
| 428 | return -EINVAL; | ||
| 429 | } | ||
| 430 | |||
| 431 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | 416 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); |
| 432 | if (!r) | 417 | if (!r) |
| 433 | return -ENOMEM; | 418 | return -ENOMEM; |
| 419 | |||
| 434 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); | 420 | nbuf = kmalloc(nlen + 1, GFP_KERNEL); |
| 435 | if (!nbuf) { | 421 | if (!nbuf) { |
| 436 | kfree(r); | 422 | kfree(r); |
| @@ -442,17 +428,15 @@ static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, | |||
| 442 | r->lnum = lnum; | 428 | r->lnum = lnum; |
| 443 | r->offs = offs; | 429 | r->offs = offs; |
| 444 | r->len = len; | 430 | r->len = len; |
| 431 | r->deletion = !!deletion; | ||
| 445 | r->sqnum = sqnum; | 432 | r->sqnum = sqnum; |
| 433 | key_copy(c, key, &r->key); | ||
| 446 | r->nm.len = nlen; | 434 | r->nm.len = nlen; |
| 447 | memcpy(nbuf, name, nlen); | 435 | memcpy(nbuf, name, nlen); |
| 448 | nbuf[nlen] = '\0'; | 436 | nbuf[nlen] = '\0'; |
| 449 | r->nm.name = nbuf; | 437 | r->nm.name = nbuf; |
| 450 | r->flags = (deletion ? REPLAY_DELETION : 0); | ||
| 451 | key_copy(c, key, &r->key); | ||
| 452 | 438 | ||
| 453 | ubifs_assert(!*p); | 439 | list_add_tail(&r->list, &c->replay_list); |
| 454 | rb_link_node(&r->rb, parent, p); | ||
| 455 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 456 | return 0; | 440 | return 0; |
| 457 | } | 441 | } |
| 458 | 442 | ||
| @@ -489,29 +473,92 @@ int ubifs_validate_entry(struct ubifs_info *c, | |||
| 489 | } | 473 | } |
| 490 | 474 | ||
| 491 | /** | 475 | /** |
| 476 | * is_last_bud - check if the bud is the last in the journal head. | ||
| 477 | * @c: UBIFS file-system description object | ||
| 478 | * @bud: bud description object | ||
| 479 | * | ||
| 480 | * This function checks if bud @bud is the last bud in its journal head. This | ||
| 481 | * information is then used by 'replay_bud()' to decide whether the bud can | ||
| 482 | * have corruptions or not. Indeed, only last buds can be corrupted by power | ||
| 483 | * cuts. Returns %1 if this is the last bud, and %0 if not. | ||
| 484 | */ | ||
| 485 | static int is_last_bud(struct ubifs_info *c, struct ubifs_bud *bud) | ||
| 486 | { | ||
| 487 | struct ubifs_jhead *jh = &c->jheads[bud->jhead]; | ||
| 488 | struct ubifs_bud *next; | ||
| 489 | uint32_t data; | ||
| 490 | int err; | ||
| 491 | |||
| 492 | if (list_is_last(&bud->list, &jh->buds_list)) | ||
| 493 | return 1; | ||
| 494 | |||
| 495 | /* | ||
| 496 | * The following is a quirk to make sure we work correctly with UBIFS | ||
| 497 | * images used with older UBIFS. | ||
| 498 | * | ||
| 499 | * Normally, the last bud will be the last in the journal head's list | ||
| 500 | * of bud. However, there is one exception if the UBIFS image belongs | ||
| 501 | * to older UBIFS. This is fairly unlikely: one would need to use old | ||
| 502 | * UBIFS, then have a power cut exactly at the right point, and then | ||
| 503 | * try to mount this image with new UBIFS. | ||
| 504 | * | ||
| 505 | * The exception is: it is possible to have 2 buds A and B, A goes | ||
| 506 | * before B, and B is the last, bud B is contains no data, and bud A is | ||
| 507 | * corrupted at the end. The reason is that in older versions when the | ||
| 508 | * journal code switched the next bud (from A to B), it first added a | ||
| 509 | * log reference node for the new bud (B), and only after this it | ||
| 510 | * synchronized the write-buffer of current bud (A). But later this was | ||
| 511 | * changed and UBIFS started to always synchronize the write-buffer of | ||
| 512 | * the bud (A) before writing the log reference for the new bud (B). | ||
| 513 | * | ||
| 514 | * But because older UBIFS always synchronized A's write-buffer before | ||
| 515 | * writing to B, we can recognize this exceptional situation but | ||
| 516 | * checking the contents of bud B - if it is empty, then A can be | ||
| 517 | * treated as the last and we can recover it. | ||
| 518 | * | ||
| 519 | * TODO: remove this piece of code in a couple of years (today it is | ||
| 520 | * 16.05.2011). | ||
| 521 | */ | ||
| 522 | next = list_entry(bud->list.next, struct ubifs_bud, list); | ||
| 523 | if (!list_is_last(&next->list, &jh->buds_list)) | ||
| 524 | return 0; | ||
| 525 | |||
| 526 | err = ubi_read(c->ubi, next->lnum, (char *)&data, | ||
| 527 | next->start, 4); | ||
| 528 | if (err) | ||
| 529 | return 0; | ||
| 530 | |||
| 531 | return data == 0xFFFFFFFF; | ||
| 532 | } | ||
| 533 | |||
| 534 | /** | ||
| 492 | * replay_bud - replay a bud logical eraseblock. | 535 | * replay_bud - replay a bud logical eraseblock. |
| 493 | * @c: UBIFS file-system description object | 536 | * @c: UBIFS file-system description object |
| 494 | * @lnum: bud logical eraseblock number to replay | 537 | * @b: bud entry which describes the bud |
| 495 | * @offs: bud start offset | ||
| 496 | * @jhead: journal head to which this bud belongs | ||
| 497 | * @free: amount of free space in the bud is returned here | ||
| 498 | * @dirty: amount of dirty space from padding and deletion nodes is returned | ||
| 499 | * here | ||
| 500 | * | 538 | * |
| 501 | * This function returns zero in case of success and a negative error code in | 539 | * This function replays bud @bud, recovers it if needed, and adds all nodes |
| 502 | * case of failure. | 540 | * from this bud to the replay list. Returns zero in case of success and a |
| 541 | * negative error code in case of failure. | ||
| 503 | */ | 542 | */ |
| 504 | static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | 543 | static int replay_bud(struct ubifs_info *c, struct bud_entry *b) |
| 505 | int *free, int *dirty) | ||
| 506 | { | 544 | { |
| 507 | int err = 0, used = 0; | 545 | int is_last = is_last_bud(c, b->bud); |
| 546 | int err = 0, used = 0, lnum = b->bud->lnum, offs = b->bud->start; | ||
| 508 | struct ubifs_scan_leb *sleb; | 547 | struct ubifs_scan_leb *sleb; |
| 509 | struct ubifs_scan_node *snod; | 548 | struct ubifs_scan_node *snod; |
| 510 | struct ubifs_bud *bud; | ||
| 511 | 549 | ||
| 512 | dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); | 550 | dbg_mnt("replay bud LEB %d, head %d, offs %d, is_last %d", |
| 513 | if (c->need_recovery) | 551 | lnum, b->bud->jhead, offs, is_last); |
| 514 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); | 552 | |
| 553 | if (c->need_recovery && is_last) | ||
| 554 | /* | ||
| 555 | * Recover only last LEBs in the journal heads, because power | ||
| 556 | * cuts may cause corruptions only in these LEBs, because only | ||
| 557 | * these LEBs could possibly be written to at the power cut | ||
| 558 | * time. | ||
| 559 | */ | ||
| 560 | sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, | ||
| 561 | b->bud->jhead != GCHD); | ||
| 515 | else | 562 | else |
| 516 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); | 563 | sleb = ubifs_scan(c, lnum, offs, c->sbuf, 0); |
| 517 | if (IS_ERR(sleb)) | 564 | if (IS_ERR(sleb)) |
| @@ -627,15 +674,13 @@ static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, | |||
| 627 | goto out; | 674 | goto out; |
| 628 | } | 675 | } |
| 629 | 676 | ||
| 630 | bud = ubifs_search_bud(c, lnum); | 677 | ubifs_assert(ubifs_search_bud(c, lnum)); |
| 631 | if (!bud) | ||
| 632 | BUG(); | ||
| 633 | |||
| 634 | ubifs_assert(sleb->endpt - offs >= used); | 678 | ubifs_assert(sleb->endpt - offs >= used); |
| 635 | ubifs_assert(sleb->endpt % c->min_io_size == 0); | 679 | ubifs_assert(sleb->endpt % c->min_io_size == 0); |
| 636 | 680 | ||
| 637 | *dirty = sleb->endpt - offs - used; | 681 | b->dirty = sleb->endpt - offs - used; |
| 638 | *free = c->leb_size - sleb->endpt; | 682 | b->free = c->leb_size - sleb->endpt; |
| 683 | dbg_mnt("bud LEB %d replied: dirty %d, free %d", lnum, b->dirty, b->free); | ||
| 639 | 684 | ||
| 640 | out: | 685 | out: |
| 641 | ubifs_scan_destroy(sleb); | 686 | ubifs_scan_destroy(sleb); |
| @@ -649,58 +694,6 @@ out_dump: | |||
| 649 | } | 694 | } |
| 650 | 695 | ||
| 651 | /** | 696 | /** |
| 652 | * insert_ref_node - insert a reference node to the replay tree. | ||
| 653 | * @c: UBIFS file-system description object | ||
| 654 | * @lnum: node logical eraseblock number | ||
| 655 | * @offs: node offset | ||
| 656 | * @sqnum: sequence number | ||
| 657 | * @free: amount of free space in bud | ||
| 658 | * @dirty: amount of dirty space from padding and deletion nodes | ||
| 659 | * @jhead: journal head number for the bud | ||
| 660 | * | ||
| 661 | * This function inserts a reference node to the replay tree and returns zero | ||
| 662 | * in case of success or a negative error code in case of failure. | ||
| 663 | */ | ||
| 664 | static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | ||
| 665 | unsigned long long sqnum, int free, int dirty, | ||
| 666 | int jhead) | ||
| 667 | { | ||
| 668 | struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; | ||
| 669 | struct replay_entry *r; | ||
| 670 | |||
| 671 | dbg_mnt("add ref LEB %d:%d", lnum, offs); | ||
| 672 | while (*p) { | ||
| 673 | parent = *p; | ||
| 674 | r = rb_entry(parent, struct replay_entry, rb); | ||
| 675 | if (sqnum < r->sqnum) { | ||
| 676 | p = &(*p)->rb_left; | ||
| 677 | continue; | ||
| 678 | } else if (sqnum > r->sqnum) { | ||
| 679 | p = &(*p)->rb_right; | ||
| 680 | continue; | ||
| 681 | } | ||
| 682 | ubifs_err("duplicate sqnum in replay tree"); | ||
| 683 | return -EINVAL; | ||
| 684 | } | ||
| 685 | |||
| 686 | r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); | ||
| 687 | if (!r) | ||
| 688 | return -ENOMEM; | ||
| 689 | |||
| 690 | r->lnum = lnum; | ||
| 691 | r->offs = offs; | ||
| 692 | r->sqnum = sqnum; | ||
| 693 | r->flags = REPLAY_REF; | ||
| 694 | r->free = free; | ||
| 695 | r->dirty = dirty; | ||
| 696 | r->jhead = jhead; | ||
| 697 | |||
| 698 | rb_link_node(&r->rb, parent, p); | ||
| 699 | rb_insert_color(&r->rb, &c->replay_tree); | ||
| 700 | return 0; | ||
| 701 | } | ||
| 702 | |||
| 703 | /** | ||
| 704 | * replay_buds - replay all buds. | 697 | * replay_buds - replay all buds. |
| 705 | * @c: UBIFS file-system description object | 698 | * @c: UBIFS file-system description object |
| 706 | * | 699 | * |
| @@ -710,17 +703,16 @@ static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, | |||
| 710 | static int replay_buds(struct ubifs_info *c) | 703 | static int replay_buds(struct ubifs_info *c) |
| 711 | { | 704 | { |
| 712 | struct bud_entry *b; | 705 | struct bud_entry *b; |
| 713 | int err, uninitialized_var(free), uninitialized_var(dirty); | 706 | int err; |
| 707 | unsigned long long prev_sqnum = 0; | ||
| 714 | 708 | ||
| 715 | list_for_each_entry(b, &c->replay_buds, list) { | 709 | list_for_each_entry(b, &c->replay_buds, list) { |
| 716 | err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, | 710 | err = replay_bud(c, b); |
| 717 | &free, &dirty); | ||
| 718 | if (err) | ||
| 719 | return err; | ||
| 720 | err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, | ||
| 721 | free, dirty, b->bud->jhead); | ||
| 722 | if (err) | 711 | if (err) |
| 723 | return err; | 712 | return err; |
| 713 | |||
| 714 | ubifs_assert(b->sqnum > prev_sqnum); | ||
| 715 | prev_sqnum = b->sqnum; | ||
| 724 | } | 716 | } |
| 725 | 717 | ||
| 726 | return 0; | 718 | return 0; |
| @@ -1060,25 +1052,29 @@ int ubifs_replay_journal(struct ubifs_info *c) | |||
| 1060 | if (err) | 1052 | if (err) |
| 1061 | goto out; | 1053 | goto out; |
| 1062 | 1054 | ||
| 1063 | err = apply_replay_tree(c); | 1055 | err = apply_replay_list(c); |
| 1056 | if (err) | ||
| 1057 | goto out; | ||
| 1058 | |||
| 1059 | err = set_buds_lprops(c); | ||
| 1064 | if (err) | 1060 | if (err) |
| 1065 | goto out; | 1061 | goto out; |
| 1066 | 1062 | ||
| 1067 | /* | 1063 | /* |
| 1068 | * UBIFS budgeting calculations use @c->budg_uncommitted_idx variable | 1064 | * UBIFS budgeting calculations use @c->bi.uncommitted_idx variable |
| 1069 | * to roughly estimate index growth. Things like @c->min_idx_lebs | 1065 | * to roughly estimate index growth. Things like @c->bi.min_idx_lebs |
| 1070 | * depend on it. This means we have to initialize it to make sure | 1066 | * depend on it. This means we have to initialize it to make sure |
| 1071 | * budgeting works properly. | 1067 | * budgeting works properly. |
| 1072 | */ | 1068 | */ |
| 1073 | c->budg_uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); | 1069 | c->bi.uncommitted_idx = atomic_long_read(&c->dirty_zn_cnt); |
| 1074 | c->budg_uncommitted_idx *= c->max_idx_node_sz; | 1070 | c->bi.uncommitted_idx *= c->max_idx_node_sz; |
| 1075 | 1071 | ||
| 1076 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); | 1072 | ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); |
| 1077 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " | 1073 | dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " |
| 1078 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, | 1074 | "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, |
| 1079 | (unsigned long)c->highest_inum); | 1075 | (unsigned long)c->highest_inum); |
| 1080 | out: | 1076 | out: |
| 1081 | destroy_replay_tree(c); | 1077 | destroy_replay_list(c); |
| 1082 | destroy_bud_list(c); | 1078 | destroy_bud_list(c); |
| 1083 | c->replaying = 0; | 1079 | c->replaying = 0; |
| 1084 | return err; | 1080 | return err; |
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index bf31b4729e51..c606f010e8df 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c | |||
| @@ -475,7 +475,8 @@ failed: | |||
| 475 | * @c: UBIFS file-system description object | 475 | * @c: UBIFS file-system description object |
| 476 | * | 476 | * |
| 477 | * This function returns a pointer to the superblock node or a negative error | 477 | * This function returns a pointer to the superblock node or a negative error |
| 478 | * code. | 478 | * code. Note, the user of this function is responsible of kfree()'ing the |
| 479 | * returned superblock buffer. | ||
| 479 | */ | 480 | */ |
| 480 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) | 481 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c) |
| 481 | { | 482 | { |
| @@ -616,6 +617,7 @@ int ubifs_read_superblock(struct ubifs_info *c) | |||
| 616 | c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); | 617 | c->vfs_sb->s_time_gran = le32_to_cpu(sup->time_gran); |
| 617 | memcpy(&c->uuid, &sup->uuid, 16); | 618 | memcpy(&c->uuid, &sup->uuid, 16); |
| 618 | c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); | 619 | c->big_lpt = !!(sup_flags & UBIFS_FLG_BIGLPT); |
| 620 | c->space_fixup = !!(sup_flags & UBIFS_FLG_SPACE_FIXUP); | ||
| 619 | 621 | ||
| 620 | /* Automatically increase file system size to the maximum size */ | 622 | /* Automatically increase file system size to the maximum size */ |
| 621 | c->old_leb_cnt = c->leb_cnt; | 623 | c->old_leb_cnt = c->leb_cnt; |
| @@ -650,3 +652,152 @@ out: | |||
| 650 | kfree(sup); | 652 | kfree(sup); |
| 651 | return err; | 653 | return err; |
| 652 | } | 654 | } |
| 655 | |||
| 656 | /** | ||
| 657 | * fixup_leb - fixup/unmap an LEB containing free space. | ||
| 658 | * @c: UBIFS file-system description object | ||
| 659 | * @lnum: the LEB number to fix up | ||
| 660 | * @len: number of used bytes in LEB (starting at offset 0) | ||
| 661 | * | ||
| 662 | * This function reads the contents of the given LEB number @lnum, then fixes | ||
| 663 | * it up, so that empty min. I/O units in the end of LEB are actually erased on | ||
| 664 | * flash (rather than being just all-0xff real data). If the LEB is completely | ||
| 665 | * empty, it is simply unmapped. | ||
| 666 | */ | ||
| 667 | static int fixup_leb(struct ubifs_info *c, int lnum, int len) | ||
| 668 | { | ||
| 669 | int err; | ||
| 670 | |||
| 671 | ubifs_assert(len >= 0); | ||
| 672 | ubifs_assert(len % c->min_io_size == 0); | ||
| 673 | ubifs_assert(len < c->leb_size); | ||
| 674 | |||
| 675 | if (len == 0) { | ||
| 676 | dbg_mnt("unmap empty LEB %d", lnum); | ||
| 677 | return ubi_leb_unmap(c->ubi, lnum); | ||
| 678 | } | ||
| 679 | |||
| 680 | dbg_mnt("fixup LEB %d, data len %d", lnum, len); | ||
| 681 | err = ubi_read(c->ubi, lnum, c->sbuf, 0, len); | ||
| 682 | if (err) | ||
| 683 | return err; | ||
| 684 | |||
| 685 | return ubi_leb_change(c->ubi, lnum, c->sbuf, len, UBI_UNKNOWN); | ||
| 686 | } | ||
| 687 | |||
| 688 | /** | ||
| 689 | * fixup_free_space - find & remap all LEBs containing free space. | ||
| 690 | * @c: UBIFS file-system description object | ||
| 691 | * | ||
| 692 | * This function walks through all LEBs in the filesystem and fiexes up those | ||
| 693 | * containing free/empty space. | ||
| 694 | */ | ||
| 695 | static int fixup_free_space(struct ubifs_info *c) | ||
| 696 | { | ||
| 697 | int lnum, err = 0; | ||
| 698 | struct ubifs_lprops *lprops; | ||
| 699 | |||
| 700 | ubifs_get_lprops(c); | ||
| 701 | |||
| 702 | /* Fixup LEBs in the master area */ | ||
| 703 | for (lnum = UBIFS_MST_LNUM; lnum < UBIFS_LOG_LNUM; lnum++) { | ||
| 704 | err = fixup_leb(c, lnum, c->mst_offs + c->mst_node_alsz); | ||
| 705 | if (err) | ||
| 706 | goto out; | ||
| 707 | } | ||
| 708 | |||
| 709 | /* Unmap unused log LEBs */ | ||
| 710 | lnum = ubifs_next_log_lnum(c, c->lhead_lnum); | ||
| 711 | while (lnum != c->ltail_lnum) { | ||
| 712 | err = fixup_leb(c, lnum, 0); | ||
| 713 | if (err) | ||
| 714 | goto out; | ||
| 715 | lnum = ubifs_next_log_lnum(c, lnum); | ||
| 716 | } | ||
| 717 | |||
| 718 | /* Fixup the current log head */ | ||
| 719 | err = fixup_leb(c, c->lhead_lnum, c->lhead_offs); | ||
| 720 | if (err) | ||
| 721 | goto out; | ||
| 722 | |||
| 723 | /* Fixup LEBs in the LPT area */ | ||
| 724 | for (lnum = c->lpt_first; lnum <= c->lpt_last; lnum++) { | ||
| 725 | int free = c->ltab[lnum - c->lpt_first].free; | ||
| 726 | |||
| 727 | if (free > 0) { | ||
| 728 | err = fixup_leb(c, lnum, c->leb_size - free); | ||
| 729 | if (err) | ||
| 730 | goto out; | ||
| 731 | } | ||
| 732 | } | ||
| 733 | |||
| 734 | /* Unmap LEBs in the orphans area */ | ||
| 735 | for (lnum = c->orph_first; lnum <= c->orph_last; lnum++) { | ||
| 736 | err = fixup_leb(c, lnum, 0); | ||
| 737 | if (err) | ||
| 738 | goto out; | ||
| 739 | } | ||
| 740 | |||
| 741 | /* Fixup LEBs in the main area */ | ||
| 742 | for (lnum = c->main_first; lnum < c->leb_cnt; lnum++) { | ||
| 743 | lprops = ubifs_lpt_lookup(c, lnum); | ||
| 744 | if (IS_ERR(lprops)) { | ||
| 745 | err = PTR_ERR(lprops); | ||
| 746 | goto out; | ||
| 747 | } | ||
| 748 | |||
| 749 | if (lprops->free > 0) { | ||
| 750 | err = fixup_leb(c, lnum, c->leb_size - lprops->free); | ||
| 751 | if (err) | ||
| 752 | goto out; | ||
| 753 | } | ||
| 754 | } | ||
| 755 | |||
| 756 | out: | ||
| 757 | ubifs_release_lprops(c); | ||
| 758 | return err; | ||
| 759 | } | ||
| 760 | |||
| 761 | /** | ||
| 762 | * ubifs_fixup_free_space - find & fix all LEBs with free space. | ||
| 763 | * @c: UBIFS file-system description object | ||
| 764 | * | ||
| 765 | * This function fixes up LEBs containing free space on first mount, if the | ||
| 766 | * appropriate flag was set when the FS was created. Each LEB with one or more | ||
| 767 | * empty min. I/O unit (i.e. free-space-count > 0) is re-written, to make sure | ||
| 768 | * the free space is actually erased. E.g., this is necessary for some NAND | ||
| 769 | * chips, since the free space may have been programmed like real "0xff" data | ||
| 770 | * (generating a non-0xff ECC), causing future writes to the not-really-erased | ||
| 771 | * NAND pages to behave badly. After the space is fixed up, the superblock flag | ||
| 772 | * is cleared, so that this is skipped for all future mounts. | ||
| 773 | */ | ||
| 774 | int ubifs_fixup_free_space(struct ubifs_info *c) | ||
| 775 | { | ||
| 776 | int err; | ||
| 777 | struct ubifs_sb_node *sup; | ||
| 778 | |||
| 779 | ubifs_assert(c->space_fixup); | ||
| 780 | ubifs_assert(!c->ro_mount); | ||
| 781 | |||
| 782 | ubifs_msg("start fixing up free space"); | ||
| 783 | |||
| 784 | err = fixup_free_space(c); | ||
| 785 | if (err) | ||
| 786 | return err; | ||
| 787 | |||
| 788 | sup = ubifs_read_sb_node(c); | ||
| 789 | if (IS_ERR(sup)) | ||
| 790 | return PTR_ERR(sup); | ||
| 791 | |||
| 792 | /* Free-space fixup is no longer required */ | ||
| 793 | c->space_fixup = 0; | ||
| 794 | sup->flags &= cpu_to_le32(~UBIFS_FLG_SPACE_FIXUP); | ||
| 795 | |||
| 796 | err = ubifs_write_sb_node(c, sup); | ||
| 797 | kfree(sup); | ||
| 798 | if (err) | ||
| 799 | return err; | ||
| 800 | |||
| 801 | ubifs_msg("free space fixup complete"); | ||
| 802 | return err; | ||
| 803 | } | ||
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 04ad07f4fcc3..6db0bdaa9f74 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -375,7 +375,7 @@ out: | |||
| 375 | ubifs_release_dirty_inode_budget(c, ui); | 375 | ubifs_release_dirty_inode_budget(c, ui); |
| 376 | else { | 376 | else { |
| 377 | /* We've deleted something - clean the "no space" flags */ | 377 | /* We've deleted something - clean the "no space" flags */ |
| 378 | c->nospace = c->nospace_rp = 0; | 378 | c->bi.nospace = c->bi.nospace_rp = 0; |
| 379 | smp_wmb(); | 379 | smp_wmb(); |
| 380 | } | 380 | } |
| 381 | done: | 381 | done: |
| @@ -694,11 +694,11 @@ static int init_constants_sb(struct ubifs_info *c) | |||
| 694 | * be compressed and direntries are of the maximum size. | 694 | * be compressed and direntries are of the maximum size. |
| 695 | * | 695 | * |
| 696 | * Note, data, which may be stored in inodes is budgeted separately, so | 696 | * Note, data, which may be stored in inodes is budgeted separately, so |
| 697 | * it is not included into 'c->inode_budget'. | 697 | * it is not included into 'c->bi.inode_budget'. |
| 698 | */ | 698 | */ |
| 699 | c->page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; | 699 | c->bi.page_budget = UBIFS_MAX_DATA_NODE_SZ * UBIFS_BLOCKS_PER_PAGE; |
| 700 | c->inode_budget = UBIFS_INO_NODE_SZ; | 700 | c->bi.inode_budget = UBIFS_INO_NODE_SZ; |
| 701 | c->dent_budget = UBIFS_MAX_DENT_NODE_SZ; | 701 | c->bi.dent_budget = UBIFS_MAX_DENT_NODE_SZ; |
| 702 | 702 | ||
| 703 | /* | 703 | /* |
| 704 | * When the amount of flash space used by buds becomes | 704 | * When the amount of flash space used by buds becomes |
| @@ -742,7 +742,7 @@ static void init_constants_master(struct ubifs_info *c) | |||
| 742 | { | 742 | { |
| 743 | long long tmp64; | 743 | long long tmp64; |
| 744 | 744 | ||
| 745 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 745 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 746 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); | 746 | c->report_rp_size = ubifs_reported_space(c, c->rp_size); |
| 747 | 747 | ||
| 748 | /* | 748 | /* |
| @@ -1144,8 +1144,8 @@ static int check_free_space(struct ubifs_info *c) | |||
| 1144 | { | 1144 | { |
| 1145 | ubifs_assert(c->dark_wm > 0); | 1145 | ubifs_assert(c->dark_wm > 0); |
| 1146 | if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { | 1146 | if (c->lst.total_free + c->lst.total_dirty < c->dark_wm) { |
| 1147 | ubifs_err("insufficient free space to mount in read/write mode"); | 1147 | ubifs_err("insufficient free space to mount in R/W mode"); |
| 1148 | dbg_dump_budg(c); | 1148 | dbg_dump_budg(c, &c->bi); |
| 1149 | dbg_dump_lprops(c); | 1149 | dbg_dump_lprops(c); |
| 1150 | return -ENOSPC; | 1150 | return -ENOSPC; |
| 1151 | } | 1151 | } |
| @@ -1304,7 +1304,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1304 | if (err) | 1304 | if (err) |
| 1305 | goto out_lpt; | 1305 | goto out_lpt; |
| 1306 | 1306 | ||
| 1307 | err = dbg_check_idx_size(c, c->old_idx_sz); | 1307 | err = dbg_check_idx_size(c, c->bi.old_idx_sz); |
| 1308 | if (err) | 1308 | if (err) |
| 1309 | goto out_lpt; | 1309 | goto out_lpt; |
| 1310 | 1310 | ||
| @@ -1313,7 +1313,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1313 | goto out_journal; | 1313 | goto out_journal; |
| 1314 | 1314 | ||
| 1315 | /* Calculate 'min_idx_lebs' after journal replay */ | 1315 | /* Calculate 'min_idx_lebs' after journal replay */ |
| 1316 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 1316 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 1317 | 1317 | ||
| 1318 | err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); | 1318 | err = ubifs_mount_orphans(c, c->need_recovery, c->ro_mount); |
| 1319 | if (err) | 1319 | if (err) |
| @@ -1396,6 +1396,12 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1396 | } else | 1396 | } else |
| 1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); | 1397 | ubifs_assert(c->lst.taken_empty_lebs > 0); |
| 1398 | 1398 | ||
| 1399 | if (!c->ro_mount && c->space_fixup) { | ||
| 1400 | err = ubifs_fixup_free_space(c); | ||
| 1401 | if (err) | ||
| 1402 | goto out_infos; | ||
| 1403 | } | ||
| 1404 | |||
| 1399 | err = dbg_check_filesystem(c); | 1405 | err = dbg_check_filesystem(c); |
| 1400 | if (err) | 1406 | if (err) |
| 1401 | goto out_infos; | 1407 | goto out_infos; |
| @@ -1442,7 +1448,8 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1442 | c->main_lebs, c->main_first, c->leb_cnt - 1); | 1448 | c->main_lebs, c->main_first, c->leb_cnt - 1); |
| 1443 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); | 1449 | dbg_msg("index LEBs: %d", c->lst.idx_lebs); |
| 1444 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", | 1450 | dbg_msg("total index bytes: %lld (%lld KiB, %lld MiB)", |
| 1445 | c->old_idx_sz, c->old_idx_sz >> 10, c->old_idx_sz >> 20); | 1451 | c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, |
| 1452 | c->bi.old_idx_sz >> 20); | ||
| 1446 | dbg_msg("key hash type: %d", c->key_hash_type); | 1453 | dbg_msg("key hash type: %d", c->key_hash_type); |
| 1447 | dbg_msg("tree fanout: %d", c->fanout); | 1454 | dbg_msg("tree fanout: %d", c->fanout); |
| 1448 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); | 1455 | dbg_msg("reserved GC LEB: %d", c->gc_lnum); |
| @@ -1456,7 +1463,7 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1456 | dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", | 1463 | dbg_msg("node sizes: ref %zu, cmt. start %zu, orph %zu", |
| 1457 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); | 1464 | UBIFS_REF_NODE_SZ, UBIFS_CS_NODE_SZ, UBIFS_ORPH_NODE_SZ); |
| 1458 | dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", | 1465 | dbg_msg("max. node sizes: data %zu, inode %zu dentry %zu, idx %d", |
| 1459 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, | 1466 | UBIFS_MAX_DATA_NODE_SZ, UBIFS_MAX_INO_NODE_SZ, |
| 1460 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); | 1467 | UBIFS_MAX_DENT_NODE_SZ, ubifs_idx_node_sz(c, c->fanout)); |
| 1461 | dbg_msg("dead watermark: %d", c->dead_wm); | 1468 | dbg_msg("dead watermark: %d", c->dead_wm); |
| 1462 | dbg_msg("dark watermark: %d", c->dark_wm); | 1469 | dbg_msg("dark watermark: %d", c->dark_wm); |
| @@ -1584,6 +1591,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1584 | } | 1591 | } |
| 1585 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); | 1592 | sup->leb_cnt = cpu_to_le32(c->leb_cnt); |
| 1586 | err = ubifs_write_sb_node(c, sup); | 1593 | err = ubifs_write_sb_node(c, sup); |
| 1594 | kfree(sup); | ||
| 1587 | if (err) | 1595 | if (err) |
| 1588 | goto out; | 1596 | goto out; |
| 1589 | } | 1597 | } |
| @@ -1684,6 +1692,13 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1684 | */ | 1692 | */ |
| 1685 | err = dbg_check_space_info(c); | 1693 | err = dbg_check_space_info(c); |
| 1686 | } | 1694 | } |
| 1695 | |||
| 1696 | if (c->space_fixup) { | ||
| 1697 | err = ubifs_fixup_free_space(c); | ||
| 1698 | if (err) | ||
| 1699 | goto out; | ||
| 1700 | } | ||
| 1701 | |||
| 1687 | mutex_unlock(&c->umount_mutex); | 1702 | mutex_unlock(&c->umount_mutex); |
| 1688 | return err; | 1703 | return err; |
| 1689 | 1704 | ||
| @@ -1766,10 +1781,9 @@ static void ubifs_put_super(struct super_block *sb) | |||
| 1766 | * to write them back because of I/O errors. | 1781 | * to write them back because of I/O errors. |
| 1767 | */ | 1782 | */ |
| 1768 | if (!c->ro_error) { | 1783 | if (!c->ro_error) { |
| 1769 | ubifs_assert(atomic_long_read(&c->dirty_pg_cnt) == 0); | 1784 | ubifs_assert(c->bi.idx_growth == 0); |
| 1770 | ubifs_assert(c->budg_idx_growth == 0); | 1785 | ubifs_assert(c->bi.dd_growth == 0); |
| 1771 | ubifs_assert(c->budg_dd_growth == 0); | 1786 | ubifs_assert(c->bi.data_growth == 0); |
| 1772 | ubifs_assert(c->budg_data_growth == 0); | ||
| 1773 | } | 1787 | } |
| 1774 | 1788 | ||
| 1775 | /* | 1789 | /* |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index de485979ca39..8119b1fd8d94 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
| @@ -2557,11 +2557,11 @@ int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key, | |||
| 2557 | if (err) { | 2557 | if (err) { |
| 2558 | /* Ensure the znode is dirtied */ | 2558 | /* Ensure the znode is dirtied */ |
| 2559 | if (znode->cnext || !ubifs_zn_dirty(znode)) { | 2559 | if (znode->cnext || !ubifs_zn_dirty(znode)) { |
| 2560 | znode = dirty_cow_bottom_up(c, znode); | 2560 | znode = dirty_cow_bottom_up(c, znode); |
| 2561 | if (IS_ERR(znode)) { | 2561 | if (IS_ERR(znode)) { |
| 2562 | err = PTR_ERR(znode); | 2562 | err = PTR_ERR(znode); |
| 2563 | goto out_unlock; | 2563 | goto out_unlock; |
| 2564 | } | 2564 | } |
| 2565 | } | 2565 | } |
| 2566 | err = tnc_delete(c, znode, n); | 2566 | err = tnc_delete(c, znode, n); |
| 2567 | } | 2567 | } |
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c index 53288e5d604e..41920f357bbf 100644 --- a/fs/ubifs/tnc_commit.c +++ b/fs/ubifs/tnc_commit.c | |||
| @@ -377,15 +377,13 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt) | |||
| 377 | c->gap_lebs = NULL; | 377 | c->gap_lebs = NULL; |
| 378 | return err; | 378 | return err; |
| 379 | } | 379 | } |
| 380 | if (!dbg_force_in_the_gaps_enabled) { | 380 | if (dbg_force_in_the_gaps_enabled()) { |
| 381 | /* | 381 | /* |
| 382 | * Do not print scary warnings if the debugging | 382 | * Do not print scary warnings if the debugging |
| 383 | * option which forces in-the-gaps is enabled. | 383 | * option which forces in-the-gaps is enabled. |
| 384 | */ | 384 | */ |
| 385 | ubifs_err("out of space"); | 385 | ubifs_warn("out of space"); |
| 386 | spin_lock(&c->space_lock); | 386 | dbg_dump_budg(c, &c->bi); |
| 387 | dbg_dump_budg(c); | ||
| 388 | spin_unlock(&c->space_lock); | ||
| 389 | dbg_dump_lprops(c); | 387 | dbg_dump_lprops(c); |
| 390 | } | 388 | } |
| 391 | /* Try to commit anyway */ | 389 | /* Try to commit anyway */ |
| @@ -796,16 +794,16 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot) | |||
| 796 | spin_lock(&c->space_lock); | 794 | spin_lock(&c->space_lock); |
| 797 | /* | 795 | /* |
| 798 | * Although we have not finished committing yet, update size of the | 796 | * Although we have not finished committing yet, update size of the |
| 799 | * committed index ('c->old_idx_sz') and zero out the index growth | 797 | * committed index ('c->bi.old_idx_sz') and zero out the index growth |
| 800 | * budget. It is OK to do this now, because we've reserved all the | 798 | * budget. It is OK to do this now, because we've reserved all the |
| 801 | * space which is needed to commit the index, and it is save for the | 799 | * space which is needed to commit the index, and it is save for the |
| 802 | * budgeting subsystem to assume the index is already committed, | 800 | * budgeting subsystem to assume the index is already committed, |
| 803 | * even though it is not. | 801 | * even though it is not. |
| 804 | */ | 802 | */ |
| 805 | ubifs_assert(c->min_idx_lebs == ubifs_calc_min_idx_lebs(c)); | 803 | ubifs_assert(c->bi.min_idx_lebs == ubifs_calc_min_idx_lebs(c)); |
| 806 | c->old_idx_sz = c->calc_idx_sz; | 804 | c->bi.old_idx_sz = c->calc_idx_sz; |
| 807 | c->budg_uncommitted_idx = 0; | 805 | c->bi.uncommitted_idx = 0; |
| 808 | c->min_idx_lebs = ubifs_calc_min_idx_lebs(c); | 806 | c->bi.min_idx_lebs = ubifs_calc_min_idx_lebs(c); |
| 809 | spin_unlock(&c->space_lock); | 807 | spin_unlock(&c->space_lock); |
| 810 | mutex_unlock(&c->tnc_mutex); | 808 | mutex_unlock(&c->tnc_mutex); |
| 811 | 809 | ||
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index 191ca7863fe7..e24380cf46ed 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
| @@ -408,9 +408,11 @@ enum { | |||
| 408 | * Superblock flags. | 408 | * Superblock flags. |
| 409 | * | 409 | * |
| 410 | * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set | 410 | * UBIFS_FLG_BIGLPT: if "big" LPT model is used if set |
| 411 | * UBIFS_FLG_SPACE_FIXUP: first-mount "fixup" of free space within LEBs needed | ||
| 411 | */ | 412 | */ |
| 412 | enum { | 413 | enum { |
| 413 | UBIFS_FLG_BIGLPT = 0x02, | 414 | UBIFS_FLG_BIGLPT = 0x02, |
| 415 | UBIFS_FLG_SPACE_FIXUP = 0x04, | ||
| 414 | }; | 416 | }; |
| 415 | 417 | ||
| 416 | /** | 418 | /** |
| @@ -434,7 +436,7 @@ struct ubifs_ch { | |||
| 434 | __u8 node_type; | 436 | __u8 node_type; |
| 435 | __u8 group_type; | 437 | __u8 group_type; |
| 436 | __u8 padding[2]; | 438 | __u8 padding[2]; |
| 437 | } __attribute__ ((packed)); | 439 | } __packed; |
| 438 | 440 | ||
| 439 | /** | 441 | /** |
| 440 | * union ubifs_dev_desc - device node descriptor. | 442 | * union ubifs_dev_desc - device node descriptor. |
| @@ -448,7 +450,7 @@ struct ubifs_ch { | |||
| 448 | union ubifs_dev_desc { | 450 | union ubifs_dev_desc { |
| 449 | __le32 new; | 451 | __le32 new; |
| 450 | __le64 huge; | 452 | __le64 huge; |
| 451 | } __attribute__ ((packed)); | 453 | } __packed; |
| 452 | 454 | ||
| 453 | /** | 455 | /** |
| 454 | * struct ubifs_ino_node - inode node. | 456 | * struct ubifs_ino_node - inode node. |
| @@ -509,7 +511,7 @@ struct ubifs_ino_node { | |||
| 509 | __le16 compr_type; | 511 | __le16 compr_type; |
| 510 | __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ | 512 | __u8 padding2[26]; /* Watch 'zero_ino_node_unused()' if changing! */ |
| 511 | __u8 data[]; | 513 | __u8 data[]; |
| 512 | } __attribute__ ((packed)); | 514 | } __packed; |
| 513 | 515 | ||
| 514 | /** | 516 | /** |
| 515 | * struct ubifs_dent_node - directory entry node. | 517 | * struct ubifs_dent_node - directory entry node. |
| @@ -534,7 +536,7 @@ struct ubifs_dent_node { | |||
| 534 | __le16 nlen; | 536 | __le16 nlen; |
| 535 | __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ | 537 | __u8 padding2[4]; /* Watch 'zero_dent_node_unused()' if changing! */ |
| 536 | __u8 name[]; | 538 | __u8 name[]; |
| 537 | } __attribute__ ((packed)); | 539 | } __packed; |
| 538 | 540 | ||
| 539 | /** | 541 | /** |
| 540 | * struct ubifs_data_node - data node. | 542 | * struct ubifs_data_node - data node. |
| @@ -555,7 +557,7 @@ struct ubifs_data_node { | |||
| 555 | __le16 compr_type; | 557 | __le16 compr_type; |
| 556 | __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ | 558 | __u8 padding[2]; /* Watch 'zero_data_node_unused()' if changing! */ |
| 557 | __u8 data[]; | 559 | __u8 data[]; |
| 558 | } __attribute__ ((packed)); | 560 | } __packed; |
| 559 | 561 | ||
| 560 | /** | 562 | /** |
| 561 | * struct ubifs_trun_node - truncation node. | 563 | * struct ubifs_trun_node - truncation node. |
| @@ -575,7 +577,7 @@ struct ubifs_trun_node { | |||
| 575 | __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ | 577 | __u8 padding[12]; /* Watch 'zero_trun_node_unused()' if changing! */ |
| 576 | __le64 old_size; | 578 | __le64 old_size; |
| 577 | __le64 new_size; | 579 | __le64 new_size; |
| 578 | } __attribute__ ((packed)); | 580 | } __packed; |
| 579 | 581 | ||
| 580 | /** | 582 | /** |
| 581 | * struct ubifs_pad_node - padding node. | 583 | * struct ubifs_pad_node - padding node. |
| @@ -586,7 +588,7 @@ struct ubifs_trun_node { | |||
| 586 | struct ubifs_pad_node { | 588 | struct ubifs_pad_node { |
| 587 | struct ubifs_ch ch; | 589 | struct ubifs_ch ch; |
| 588 | __le32 pad_len; | 590 | __le32 pad_len; |
| 589 | } __attribute__ ((packed)); | 591 | } __packed; |
| 590 | 592 | ||
| 591 | /** | 593 | /** |
| 592 | * struct ubifs_sb_node - superblock node. | 594 | * struct ubifs_sb_node - superblock node. |
| @@ -644,7 +646,7 @@ struct ubifs_sb_node { | |||
| 644 | __u8 uuid[16]; | 646 | __u8 uuid[16]; |
| 645 | __le32 ro_compat_version; | 647 | __le32 ro_compat_version; |
| 646 | __u8 padding2[3968]; | 648 | __u8 padding2[3968]; |
| 647 | } __attribute__ ((packed)); | 649 | } __packed; |
| 648 | 650 | ||
| 649 | /** | 651 | /** |
| 650 | * struct ubifs_mst_node - master node. | 652 | * struct ubifs_mst_node - master node. |
| @@ -711,7 +713,7 @@ struct ubifs_mst_node { | |||
| 711 | __le32 idx_lebs; | 713 | __le32 idx_lebs; |
| 712 | __le32 leb_cnt; | 714 | __le32 leb_cnt; |
| 713 | __u8 padding[344]; | 715 | __u8 padding[344]; |
| 714 | } __attribute__ ((packed)); | 716 | } __packed; |
| 715 | 717 | ||
| 716 | /** | 718 | /** |
| 717 | * struct ubifs_ref_node - logical eraseblock reference node. | 719 | * struct ubifs_ref_node - logical eraseblock reference node. |
| @@ -727,7 +729,7 @@ struct ubifs_ref_node { | |||
| 727 | __le32 offs; | 729 | __le32 offs; |
| 728 | __le32 jhead; | 730 | __le32 jhead; |
| 729 | __u8 padding[28]; | 731 | __u8 padding[28]; |
| 730 | } __attribute__ ((packed)); | 732 | } __packed; |
| 731 | 733 | ||
| 732 | /** | 734 | /** |
| 733 | * struct ubifs_branch - key/reference/length branch | 735 | * struct ubifs_branch - key/reference/length branch |
| @@ -741,7 +743,7 @@ struct ubifs_branch { | |||
| 741 | __le32 offs; | 743 | __le32 offs; |
| 742 | __le32 len; | 744 | __le32 len; |
| 743 | __u8 key[]; | 745 | __u8 key[]; |
| 744 | } __attribute__ ((packed)); | 746 | } __packed; |
| 745 | 747 | ||
| 746 | /** | 748 | /** |
| 747 | * struct ubifs_idx_node - indexing node. | 749 | * struct ubifs_idx_node - indexing node. |
| @@ -755,7 +757,7 @@ struct ubifs_idx_node { | |||
| 755 | __le16 child_cnt; | 757 | __le16 child_cnt; |
| 756 | __le16 level; | 758 | __le16 level; |
| 757 | __u8 branches[]; | 759 | __u8 branches[]; |
| 758 | } __attribute__ ((packed)); | 760 | } __packed; |
| 759 | 761 | ||
| 760 | /** | 762 | /** |
| 761 | * struct ubifs_cs_node - commit start node. | 763 | * struct ubifs_cs_node - commit start node. |
| @@ -765,7 +767,7 @@ struct ubifs_idx_node { | |||
| 765 | struct ubifs_cs_node { | 767 | struct ubifs_cs_node { |
| 766 | struct ubifs_ch ch; | 768 | struct ubifs_ch ch; |
| 767 | __le64 cmt_no; | 769 | __le64 cmt_no; |
| 768 | } __attribute__ ((packed)); | 770 | } __packed; |
| 769 | 771 | ||
| 770 | /** | 772 | /** |
| 771 | * struct ubifs_orph_node - orphan node. | 773 | * struct ubifs_orph_node - orphan node. |
| @@ -777,6 +779,6 @@ struct ubifs_orph_node { | |||
| 777 | struct ubifs_ch ch; | 779 | struct ubifs_ch ch; |
| 778 | __le64 cmt_no; | 780 | __le64 cmt_no; |
| 779 | __le64 inos[]; | 781 | __le64 inos[]; |
| 780 | } __attribute__ ((packed)); | 782 | } __packed; |
| 781 | 783 | ||
| 782 | #endif /* __UBIFS_MEDIA_H__ */ | 784 | #endif /* __UBIFS_MEDIA_H__ */ |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 8c40ad3c6721..93d1412a06f0 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -389,9 +389,9 @@ struct ubifs_gced_idx_leb { | |||
| 389 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses | 389 | * The @ui_size is a "shadow" variable for @inode->i_size and UBIFS uses |
| 390 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot | 390 | * @ui_size instead of @inode->i_size. The reason for this is that UBIFS cannot |
| 391 | * make sure @inode->i_size is always changed under @ui_mutex, because it | 391 | * make sure @inode->i_size is always changed under @ui_mutex, because it |
| 392 | * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would deadlock | 392 | * cannot call 'truncate_setsize()' with @ui_mutex locked, because it would |
| 393 | * with 'ubifs_writepage()' (see file.c). All the other inode fields are | 393 | * deadlock with 'ubifs_writepage()' (see file.c). All the other inode fields |
| 394 | * changed under @ui_mutex, so they do not need "shadow" fields. Note, one | 394 | * are changed under @ui_mutex, so they do not need "shadow" fields. Note, one |
| 395 | * could consider to rework locking and base it on "shadow" fields. | 395 | * could consider to rework locking and base it on "shadow" fields. |
| 396 | */ | 396 | */ |
| 397 | struct ubifs_inode { | 397 | struct ubifs_inode { |
| @@ -937,6 +937,40 @@ struct ubifs_mount_opts { | |||
| 937 | unsigned int compr_type:2; | 937 | unsigned int compr_type:2; |
| 938 | }; | 938 | }; |
| 939 | 939 | ||
| 940 | /** | ||
| 941 | * struct ubifs_budg_info - UBIFS budgeting information. | ||
| 942 | * @idx_growth: amount of bytes budgeted for index growth | ||
| 943 | * @data_growth: amount of bytes budgeted for cached data | ||
| 944 | * @dd_growth: amount of bytes budgeted for cached data that will make | ||
| 945 | * other data dirty | ||
| 946 | * @uncommitted_idx: amount of bytes were budgeted for growth of the index, but | ||
| 947 | * which still have to be taken into account because the index | ||
| 948 | * has not been committed so far | ||
| 949 | * @old_idx_sz: size of index on flash | ||
| 950 | * @min_idx_lebs: minimum number of LEBs required for the index | ||
| 951 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 952 | * optimization) | ||
| 953 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 954 | * pool is full | ||
| 955 | * @page_budget: budget for a page (constant, nenver changed after mount) | ||
| 956 | * @inode_budget: budget for an inode (constant, nenver changed after mount) | ||
| 957 | * @dent_budget: budget for a directory entry (constant, nenver changed after | ||
| 958 | * mount) | ||
| 959 | */ | ||
| 960 | struct ubifs_budg_info { | ||
| 961 | long long idx_growth; | ||
| 962 | long long data_growth; | ||
| 963 | long long dd_growth; | ||
| 964 | long long uncommitted_idx; | ||
| 965 | unsigned long long old_idx_sz; | ||
| 966 | int min_idx_lebs; | ||
| 967 | unsigned int nospace:1; | ||
| 968 | unsigned int nospace_rp:1; | ||
| 969 | int page_budget; | ||
| 970 | int inode_budget; | ||
| 971 | int dent_budget; | ||
| 972 | }; | ||
| 973 | |||
| 940 | struct ubifs_debug_info; | 974 | struct ubifs_debug_info; |
| 941 | 975 | ||
| 942 | /** | 976 | /** |
| @@ -980,6 +1014,7 @@ struct ubifs_debug_info; | |||
| 980 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running | 1014 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running |
| 981 | * | 1015 | * |
| 982 | * @big_lpt: flag that LPT is too big to write whole during commit | 1016 | * @big_lpt: flag that LPT is too big to write whole during commit |
| 1017 | * @space_fixup: flag indicating that free space in LEBs needs to be cleaned up | ||
| 983 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during | 1018 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during |
| 984 | * recovery) | 1019 | * recovery) |
| 985 | * @bulk_read: enable bulk-reads | 1020 | * @bulk_read: enable bulk-reads |
| @@ -1057,32 +1092,14 @@ struct ubifs_debug_info; | |||
| 1057 | * @dirty_zn_cnt: number of dirty znodes | 1092 | * @dirty_zn_cnt: number of dirty znodes |
| 1058 | * @clean_zn_cnt: number of clean znodes | 1093 | * @clean_zn_cnt: number of clean znodes |
| 1059 | * | 1094 | * |
| 1060 | * @budg_idx_growth: amount of bytes budgeted for index growth | 1095 | * @space_lock: protects @bi and @lst |
| 1061 | * @budg_data_growth: amount of bytes budgeted for cached data | 1096 | * @lst: lprops statistics |
| 1062 | * @budg_dd_growth: amount of bytes budgeted for cached data that will make | 1097 | * @bi: budgeting information |
| 1063 | * other data dirty | ||
| 1064 | * @budg_uncommitted_idx: amount of bytes were budgeted for growth of the index, | ||
| 1065 | * but which still have to be taken into account because | ||
| 1066 | * the index has not been committed so far | ||
| 1067 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, | ||
| 1068 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, | ||
| 1069 | * @nospace, and @nospace_rp; | ||
| 1070 | * @min_idx_lebs: minimum number of LEBs required for the index | ||
| 1071 | * @old_idx_sz: size of index on flash | ||
| 1072 | * @calc_idx_sz: temporary variable which is used to calculate new index size | 1098 | * @calc_idx_sz: temporary variable which is used to calculate new index size |
| 1073 | * (contains accurate new index size at end of TNC commit start) | 1099 | * (contains accurate new index size at end of TNC commit start) |
| 1074 | * @lst: lprops statistics | ||
| 1075 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 1076 | * optimization) | ||
| 1077 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 1078 | * pool is full | ||
| 1079 | * | ||
| 1080 | * @page_budget: budget for a page | ||
| 1081 | * @inode_budget: budget for an inode | ||
| 1082 | * @dent_budget: budget for a directory entry | ||
| 1083 | * | 1100 | * |
| 1084 | * @ref_node_alsz: size of the LEB reference node aligned to the min. flash | 1101 | * @ref_node_alsz: size of the LEB reference node aligned to the min. flash |
| 1085 | * I/O unit | 1102 | * I/O unit |
| 1086 | * @mst_node_alsz: master node aligned size | 1103 | * @mst_node_alsz: master node aligned size |
| 1087 | * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary | 1104 | * @min_idx_node_sz: minimum indexing node aligned on 8-bytes boundary |
| 1088 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary | 1105 | * @max_idx_node_sz: maximum indexing node aligned on 8-bytes boundary |
| @@ -1189,7 +1206,6 @@ struct ubifs_debug_info; | |||
| 1189 | * @replaying: %1 during journal replay | 1206 | * @replaying: %1 during journal replay |
| 1190 | * @mounting: %1 while mounting | 1207 | * @mounting: %1 while mounting |
| 1191 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode | 1208 | * @remounting_rw: %1 while re-mounting from R/O mode to R/W mode |
| 1192 | * @replay_tree: temporary tree used during journal replay | ||
| 1193 | * @replay_list: temporary list used during journal replay | 1209 | * @replay_list: temporary list used during journal replay |
| 1194 | * @replay_buds: list of buds to replay | 1210 | * @replay_buds: list of buds to replay |
| 1195 | * @cs_sqnum: sequence number of first node in the log (commit start node) | 1211 | * @cs_sqnum: sequence number of first node in the log (commit start node) |
| @@ -1238,6 +1254,7 @@ struct ubifs_info { | |||
| 1238 | wait_queue_head_t cmt_wq; | 1254 | wait_queue_head_t cmt_wq; |
| 1239 | 1255 | ||
| 1240 | unsigned int big_lpt:1; | 1256 | unsigned int big_lpt:1; |
| 1257 | unsigned int space_fixup:1; | ||
| 1241 | unsigned int no_chk_data_crc:1; | 1258 | unsigned int no_chk_data_crc:1; |
| 1242 | unsigned int bulk_read:1; | 1259 | unsigned int bulk_read:1; |
| 1243 | unsigned int default_compr:2; | 1260 | unsigned int default_compr:2; |
| @@ -1308,21 +1325,10 @@ struct ubifs_info { | |||
| 1308 | atomic_long_t dirty_zn_cnt; | 1325 | atomic_long_t dirty_zn_cnt; |
| 1309 | atomic_long_t clean_zn_cnt; | 1326 | atomic_long_t clean_zn_cnt; |
| 1310 | 1327 | ||
| 1311 | long long budg_idx_growth; | ||
| 1312 | long long budg_data_growth; | ||
| 1313 | long long budg_dd_growth; | ||
| 1314 | long long budg_uncommitted_idx; | ||
| 1315 | spinlock_t space_lock; | 1328 | spinlock_t space_lock; |
| 1316 | int min_idx_lebs; | ||
| 1317 | unsigned long long old_idx_sz; | ||
| 1318 | unsigned long long calc_idx_sz; | ||
| 1319 | struct ubifs_lp_stats lst; | 1329 | struct ubifs_lp_stats lst; |
| 1320 | unsigned int nospace:1; | 1330 | struct ubifs_budg_info bi; |
| 1321 | unsigned int nospace_rp:1; | 1331 | unsigned long long calc_idx_sz; |
| 1322 | |||
| 1323 | int page_budget; | ||
| 1324 | int inode_budget; | ||
| 1325 | int dent_budget; | ||
| 1326 | 1332 | ||
| 1327 | int ref_node_alsz; | 1333 | int ref_node_alsz; |
| 1328 | int mst_node_alsz; | 1334 | int mst_node_alsz; |
| @@ -1430,7 +1436,6 @@ struct ubifs_info { | |||
| 1430 | unsigned int replaying:1; | 1436 | unsigned int replaying:1; |
| 1431 | unsigned int mounting:1; | 1437 | unsigned int mounting:1; |
| 1432 | unsigned int remounting_rw:1; | 1438 | unsigned int remounting_rw:1; |
| 1433 | struct rb_root replay_tree; | ||
| 1434 | struct list_head replay_list; | 1439 | struct list_head replay_list; |
| 1435 | struct list_head replay_buds; | 1440 | struct list_head replay_buds; |
| 1436 | unsigned long long cs_sqnum; | 1441 | unsigned long long cs_sqnum; |
| @@ -1628,6 +1633,7 @@ int ubifs_write_master(struct ubifs_info *c); | |||
| 1628 | int ubifs_read_superblock(struct ubifs_info *c); | 1633 | int ubifs_read_superblock(struct ubifs_info *c); |
| 1629 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); | 1634 | struct ubifs_sb_node *ubifs_read_sb_node(struct ubifs_info *c); |
| 1630 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); | 1635 | int ubifs_write_sb_node(struct ubifs_info *c, struct ubifs_sb_node *sup); |
| 1636 | int ubifs_fixup_free_space(struct ubifs_info *c); | ||
| 1631 | 1637 | ||
| 1632 | /* replay.c */ | 1638 | /* replay.c */ |
| 1633 | int ubifs_validate_entry(struct ubifs_info *c, | 1639 | int ubifs_validate_entry(struct ubifs_info *c, |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 3299f469e712..16f19f55e63f 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
| @@ -80,8 +80,8 @@ enum { | |||
| 80 | SECURITY_XATTR, | 80 | SECURITY_XATTR, |
| 81 | }; | 81 | }; |
| 82 | 82 | ||
| 83 | static const struct inode_operations none_inode_operations; | 83 | static const struct inode_operations empty_iops; |
| 84 | static const struct file_operations none_file_operations; | 84 | static const struct file_operations empty_fops; |
| 85 | 85 | ||
| 86 | /** | 86 | /** |
| 87 | * create_xattr - create an extended attribute. | 87 | * create_xattr - create an extended attribute. |
| @@ -131,8 +131,8 @@ static int create_xattr(struct ubifs_info *c, struct inode *host, | |||
| 131 | 131 | ||
| 132 | /* Re-define all operations to be "nothing" */ | 132 | /* Re-define all operations to be "nothing" */ |
| 133 | inode->i_mapping->a_ops = &empty_aops; | 133 | inode->i_mapping->a_ops = &empty_aops; |
| 134 | inode->i_op = &none_inode_operations; | 134 | inode->i_op = &empty_iops; |
| 135 | inode->i_fop = &none_file_operations; | 135 | inode->i_fop = &empty_fops; |
| 136 | 136 | ||
| 137 | inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; | 137 | inode->i_flags |= S_SYNC | S_NOATIME | S_NOCMTIME | S_NOQUOTA; |
| 138 | ui = ubifs_inode(inode); | 138 | ui = ubifs_inode(inode); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 9ef9ed2cfe2e..5e68099db2a5 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -33,7 +33,6 @@ | |||
| 33 | #include <linux/migrate.h> | 33 | #include <linux/migrate.h> |
| 34 | #include <linux/backing-dev.h> | 34 | #include <linux/backing-dev.h> |
| 35 | #include <linux/freezer.h> | 35 | #include <linux/freezer.h> |
| 36 | #include <linux/list_sort.h> | ||
| 37 | 36 | ||
| 38 | #include "xfs_sb.h" | 37 | #include "xfs_sb.h" |
| 39 | #include "xfs_inum.h" | 38 | #include "xfs_inum.h" |
| @@ -709,6 +708,27 @@ xfs_buf_get_empty( | |||
| 709 | return bp; | 708 | return bp; |
| 710 | } | 709 | } |
| 711 | 710 | ||
| 711 | /* | ||
| 712 | * Return a buffer allocated as an empty buffer and associated to external | ||
| 713 | * memory via xfs_buf_associate_memory() back to it's empty state. | ||
| 714 | */ | ||
| 715 | void | ||
| 716 | xfs_buf_set_empty( | ||
| 717 | struct xfs_buf *bp, | ||
| 718 | size_t len) | ||
| 719 | { | ||
| 720 | if (bp->b_pages) | ||
| 721 | _xfs_buf_free_pages(bp); | ||
| 722 | |||
| 723 | bp->b_pages = NULL; | ||
| 724 | bp->b_page_count = 0; | ||
| 725 | bp->b_addr = NULL; | ||
| 726 | bp->b_file_offset = 0; | ||
| 727 | bp->b_buffer_length = bp->b_count_desired = len; | ||
| 728 | bp->b_bn = XFS_BUF_DADDR_NULL; | ||
| 729 | bp->b_flags &= ~XBF_MAPPED; | ||
| 730 | } | ||
| 731 | |||
| 712 | static inline struct page * | 732 | static inline struct page * |
| 713 | mem_to_page( | 733 | mem_to_page( |
| 714 | void *addr) | 734 | void *addr) |
| @@ -1402,12 +1422,12 @@ restart: | |||
| 1402 | int | 1422 | int |
| 1403 | xfs_buftarg_shrink( | 1423 | xfs_buftarg_shrink( |
| 1404 | struct shrinker *shrink, | 1424 | struct shrinker *shrink, |
| 1405 | int nr_to_scan, | 1425 | struct shrink_control *sc) |
| 1406 | gfp_t mask) | ||
| 1407 | { | 1426 | { |
| 1408 | struct xfs_buftarg *btp = container_of(shrink, | 1427 | struct xfs_buftarg *btp = container_of(shrink, |
| 1409 | struct xfs_buftarg, bt_shrinker); | 1428 | struct xfs_buftarg, bt_shrinker); |
| 1410 | struct xfs_buf *bp; | 1429 | struct xfs_buf *bp; |
| 1430 | int nr_to_scan = sc->nr_to_scan; | ||
| 1411 | LIST_HEAD(dispose); | 1431 | LIST_HEAD(dispose); |
| 1412 | 1432 | ||
| 1413 | if (!nr_to_scan) | 1433 | if (!nr_to_scan) |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index a9a1c4512645..50a7d5fb3b73 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
| @@ -178,6 +178,7 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t, | |||
| 178 | xfs_buf_flags_t); | 178 | xfs_buf_flags_t); |
| 179 | 179 | ||
| 180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); | 180 | extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *); |
| 181 | extern void xfs_buf_set_empty(struct xfs_buf *bp, size_t len); | ||
| 181 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); | 182 | extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int); |
| 182 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); | 183 | extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t); |
| 183 | extern void xfs_buf_hold(xfs_buf_t *); | 184 | extern void xfs_buf_hold(xfs_buf_t *); |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c index b3486dfa5520..54e623bfbb85 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.c +++ b/fs/xfs/linux-2.6/xfs_ioctl32.c | |||
| @@ -586,7 +586,8 @@ xfs_file_compat_ioctl( | |||
| 586 | case XFS_IOC_RESVSP_32: | 586 | case XFS_IOC_RESVSP_32: |
| 587 | case XFS_IOC_UNRESVSP_32: | 587 | case XFS_IOC_UNRESVSP_32: |
| 588 | case XFS_IOC_RESVSP64_32: | 588 | case XFS_IOC_RESVSP64_32: |
| 589 | case XFS_IOC_UNRESVSP64_32: { | 589 | case XFS_IOC_UNRESVSP64_32: |
| 590 | case XFS_IOC_ZERO_RANGE_32: { | ||
| 590 | struct xfs_flock64 bf; | 591 | struct xfs_flock64 bf; |
| 591 | 592 | ||
| 592 | if (xfs_compat_flock64_copyin(&bf, arg)) | 593 | if (xfs_compat_flock64_copyin(&bf, arg)) |
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h index 08b605792a99..80f4060e8970 100644 --- a/fs/xfs/linux-2.6/xfs_ioctl32.h +++ b/fs/xfs/linux-2.6/xfs_ioctl32.h | |||
| @@ -184,6 +184,7 @@ typedef struct compat_xfs_flock64 { | |||
| 184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) | 184 | #define XFS_IOC_UNRESVSP_32 _IOW('X', 41, struct compat_xfs_flock64) |
| 185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) | 185 | #define XFS_IOC_RESVSP64_32 _IOW('X', 42, struct compat_xfs_flock64) |
| 186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) | 186 | #define XFS_IOC_UNRESVSP64_32 _IOW('X', 43, struct compat_xfs_flock64) |
| 187 | #define XFS_IOC_ZERO_RANGE_32 _IOW('X', 57, struct compat_xfs_flock64) | ||
| 187 | 188 | ||
| 188 | typedef struct compat_xfs_fsop_geom_v1 { | 189 | typedef struct compat_xfs_fsop_geom_v1 { |
| 189 | __u32 blocksize; /* filesystem (data) block size */ | 190 | __u32 blocksize; /* filesystem (data) block size */ |
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h index 244be9cbfe78..8633521b3b2e 100644 --- a/fs/xfs/linux-2.6/xfs_linux.h +++ b/fs/xfs/linux-2.6/xfs_linux.h | |||
| @@ -70,6 +70,7 @@ | |||
| 70 | #include <linux/ctype.h> | 70 | #include <linux/ctype.h> |
| 71 | #include <linux/writeback.h> | 71 | #include <linux/writeback.h> |
| 72 | #include <linux/capability.h> | 72 | #include <linux/capability.h> |
| 73 | #include <linux/list_sort.h> | ||
| 73 | 74 | ||
| 74 | #include <asm/page.h> | 75 | #include <asm/page.h> |
| 75 | #include <asm/div64.h> | 76 | #include <asm/div64.h> |
diff --git a/fs/xfs/linux-2.6/xfs_message.c b/fs/xfs/linux-2.6/xfs_message.c index 9f76cceb678d..bd672def95ac 100644 --- a/fs/xfs/linux-2.6/xfs_message.c +++ b/fs/xfs/linux-2.6/xfs_message.c | |||
| @@ -41,23 +41,6 @@ __xfs_printk( | |||
| 41 | printk("%sXFS: %pV\n", level, vaf); | 41 | printk("%sXFS: %pV\n", level, vaf); |
| 42 | } | 42 | } |
| 43 | 43 | ||
| 44 | void xfs_printk( | ||
| 45 | const char *level, | ||
| 46 | const struct xfs_mount *mp, | ||
| 47 | const char *fmt, ...) | ||
| 48 | { | ||
| 49 | struct va_format vaf; | ||
| 50 | va_list args; | ||
| 51 | |||
| 52 | va_start(args, fmt); | ||
| 53 | |||
| 54 | vaf.fmt = fmt; | ||
| 55 | vaf.va = &args; | ||
| 56 | |||
| 57 | __xfs_printk(level, mp, &vaf); | ||
| 58 | va_end(args); | ||
| 59 | } | ||
| 60 | |||
| 61 | #define define_xfs_printk_level(func, kern_level) \ | 44 | #define define_xfs_printk_level(func, kern_level) \ |
| 62 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ | 45 | void func(const struct xfs_mount *mp, const char *fmt, ...) \ |
| 63 | { \ | 46 | { \ |
| @@ -95,8 +78,7 @@ xfs_alert_tag( | |||
| 95 | int do_panic = 0; | 78 | int do_panic = 0; |
| 96 | 79 | ||
| 97 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { | 80 | if (xfs_panic_mask && (xfs_panic_mask & panic_tag)) { |
| 98 | xfs_printk(KERN_ALERT, mp, | 81 | xfs_alert(mp, "Transforming an alert into a BUG."); |
| 99 | "XFS: Transforming an alert into a BUG."); | ||
| 100 | do_panic = 1; | 82 | do_panic = 1; |
| 101 | } | 83 | } |
| 102 | 84 | ||
diff --git a/fs/xfs/linux-2.6/xfs_message.h b/fs/xfs/linux-2.6/xfs_message.h index f1b3fc1b6c4e..7fb7ea007672 100644 --- a/fs/xfs/linux-2.6/xfs_message.h +++ b/fs/xfs/linux-2.6/xfs_message.h | |||
| @@ -3,9 +3,6 @@ | |||
| 3 | 3 | ||
| 4 | struct xfs_mount; | 4 | struct xfs_mount; |
| 5 | 5 | ||
| 6 | extern void xfs_printk(const char *level, const struct xfs_mount *mp, | ||
| 7 | const char *fmt, ...) | ||
| 8 | __attribute__ ((format (printf, 3, 4))); | ||
| 9 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) | 6 | extern void xfs_emerg(const struct xfs_mount *mp, const char *fmt, ...) |
| 10 | __attribute__ ((format (printf, 2, 3))); | 7 | __attribute__ ((format (printf, 2, 3))); |
| 11 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) | 8 | extern void xfs_alert(const struct xfs_mount *mp, const char *fmt, ...) |
| @@ -28,7 +25,9 @@ extern void xfs_info(const struct xfs_mount *mp, const char *fmt, ...) | |||
| 28 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | 25 | extern void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) |
| 29 | __attribute__ ((format (printf, 2, 3))); | 26 | __attribute__ ((format (printf, 2, 3))); |
| 30 | #else | 27 | #else |
| 31 | static inline void xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | 28 | static inline void |
| 29 | __attribute__ ((format (printf, 2, 3))) | ||
| 30 | xfs_debug(const struct xfs_mount *mp, const char *fmt, ...) | ||
| 32 | { | 31 | { |
| 33 | } | 32 | } |
| 34 | #endif | 33 | #endif |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index b38e58d02299..b0aa59e51fd0 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -1787,10 +1787,6 @@ init_xfs_fs(void) | |||
| 1787 | if (error) | 1787 | if (error) |
| 1788 | goto out_cleanup_procfs; | 1788 | goto out_cleanup_procfs; |
| 1789 | 1789 | ||
| 1790 | error = xfs_init_workqueues(); | ||
| 1791 | if (error) | ||
| 1792 | goto out_sysctl_unregister; | ||
| 1793 | |||
| 1794 | vfs_initquota(); | 1790 | vfs_initquota(); |
| 1795 | 1791 | ||
| 1796 | error = register_filesystem(&xfs_fs_type); | 1792 | error = register_filesystem(&xfs_fs_type); |
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c index 3e898a48122d..8ecad5ff9f9b 100644 --- a/fs/xfs/linux-2.6/xfs_sync.c +++ b/fs/xfs/linux-2.6/xfs_sync.c | |||
| @@ -267,6 +267,16 @@ xfs_sync_inode_attr( | |||
| 267 | 267 | ||
| 268 | error = xfs_iflush(ip, flags); | 268 | error = xfs_iflush(ip, flags); |
| 269 | 269 | ||
| 270 | /* | ||
| 271 | * We don't want to try again on non-blocking flushes that can't run | ||
| 272 | * again immediately. If an inode really must be written, then that's | ||
| 273 | * what the SYNC_WAIT flag is for. | ||
| 274 | */ | ||
| 275 | if (error == EAGAIN) { | ||
| 276 | ASSERT(!(flags & SYNC_WAIT)); | ||
| 277 | error = 0; | ||
| 278 | } | ||
| 279 | |||
| 270 | out_unlock: | 280 | out_unlock: |
| 271 | xfs_iunlock(ip, XFS_ILOCK_SHARED); | 281 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
| 272 | return error; | 282 | return error; |
| @@ -1022,13 +1032,14 @@ xfs_reclaim_inodes( | |||
| 1022 | static int | 1032 | static int |
| 1023 | xfs_reclaim_inode_shrink( | 1033 | xfs_reclaim_inode_shrink( |
| 1024 | struct shrinker *shrink, | 1034 | struct shrinker *shrink, |
| 1025 | int nr_to_scan, | 1035 | struct shrink_control *sc) |
| 1026 | gfp_t gfp_mask) | ||
| 1027 | { | 1036 | { |
| 1028 | struct xfs_mount *mp; | 1037 | struct xfs_mount *mp; |
| 1029 | struct xfs_perag *pag; | 1038 | struct xfs_perag *pag; |
| 1030 | xfs_agnumber_t ag; | 1039 | xfs_agnumber_t ag; |
| 1031 | int reclaimable; | 1040 | int reclaimable; |
| 1041 | int nr_to_scan = sc->nr_to_scan; | ||
| 1042 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 1032 | 1043 | ||
| 1033 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); | 1044 | mp = container_of(shrink, struct xfs_mount, m_inode_shrink); |
| 1034 | if (nr_to_scan) { | 1045 | if (nr_to_scan) { |
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index 2d0bcb479075..d48b7a579ae1 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h | |||
| @@ -1151,44 +1151,7 @@ TRACE_EVENT(xfs_bunmap, | |||
| 1151 | 1151 | ||
| 1152 | ); | 1152 | ); |
| 1153 | 1153 | ||
| 1154 | #define XFS_BUSY_SYNC \ | 1154 | DECLARE_EVENT_CLASS(xfs_busy_class, |
| 1155 | { 0, "async" }, \ | ||
| 1156 | { 1, "sync" } | ||
| 1157 | |||
| 1158 | TRACE_EVENT(xfs_alloc_busy, | ||
| 1159 | TP_PROTO(struct xfs_trans *trans, xfs_agnumber_t agno, | ||
| 1160 | xfs_agblock_t agbno, xfs_extlen_t len, int sync), | ||
| 1161 | TP_ARGS(trans, agno, agbno, len, sync), | ||
| 1162 | TP_STRUCT__entry( | ||
| 1163 | __field(dev_t, dev) | ||
| 1164 | __field(struct xfs_trans *, tp) | ||
| 1165 | __field(int, tid) | ||
| 1166 | __field(xfs_agnumber_t, agno) | ||
| 1167 | __field(xfs_agblock_t, agbno) | ||
| 1168 | __field(xfs_extlen_t, len) | ||
| 1169 | __field(int, sync) | ||
| 1170 | ), | ||
| 1171 | TP_fast_assign( | ||
| 1172 | __entry->dev = trans->t_mountp->m_super->s_dev; | ||
| 1173 | __entry->tp = trans; | ||
| 1174 | __entry->tid = trans->t_ticket->t_tid; | ||
| 1175 | __entry->agno = agno; | ||
| 1176 | __entry->agbno = agbno; | ||
| 1177 | __entry->len = len; | ||
| 1178 | __entry->sync = sync; | ||
| 1179 | ), | ||
| 1180 | TP_printk("dev %d:%d trans 0x%p tid 0x%x agno %u agbno %u len %u %s", | ||
| 1181 | MAJOR(__entry->dev), MINOR(__entry->dev), | ||
| 1182 | __entry->tp, | ||
| 1183 | __entry->tid, | ||
| 1184 | __entry->agno, | ||
| 1185 | __entry->agbno, | ||
| 1186 | __entry->len, | ||
| 1187 | __print_symbolic(__entry->sync, XFS_BUSY_SYNC)) | ||
| 1188 | |||
| 1189 | ); | ||
| 1190 | |||
| 1191 | TRACE_EVENT(xfs_alloc_unbusy, | ||
| 1192 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1155 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
| 1193 | xfs_agblock_t agbno, xfs_extlen_t len), | 1156 | xfs_agblock_t agbno, xfs_extlen_t len), |
| 1194 | TP_ARGS(mp, agno, agbno, len), | 1157 | TP_ARGS(mp, agno, agbno, len), |
| @@ -1210,35 +1173,45 @@ TRACE_EVENT(xfs_alloc_unbusy, | |||
| 1210 | __entry->agbno, | 1173 | __entry->agbno, |
| 1211 | __entry->len) | 1174 | __entry->len) |
| 1212 | ); | 1175 | ); |
| 1176 | #define DEFINE_BUSY_EVENT(name) \ | ||
| 1177 | DEFINE_EVENT(xfs_busy_class, name, \ | ||
| 1178 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ | ||
| 1179 | xfs_agblock_t agbno, xfs_extlen_t len), \ | ||
| 1180 | TP_ARGS(mp, agno, agbno, len)) | ||
| 1181 | DEFINE_BUSY_EVENT(xfs_alloc_busy); | ||
| 1182 | DEFINE_BUSY_EVENT(xfs_alloc_busy_enomem); | ||
| 1183 | DEFINE_BUSY_EVENT(xfs_alloc_busy_force); | ||
| 1184 | DEFINE_BUSY_EVENT(xfs_alloc_busy_reuse); | ||
| 1185 | DEFINE_BUSY_EVENT(xfs_alloc_busy_clear); | ||
| 1213 | 1186 | ||
| 1214 | #define XFS_BUSY_STATES \ | 1187 | TRACE_EVENT(xfs_alloc_busy_trim, |
| 1215 | { 0, "missing" }, \ | ||
| 1216 | { 1, "found" } | ||
| 1217 | |||
| 1218 | TRACE_EVENT(xfs_alloc_busysearch, | ||
| 1219 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, | 1188 | TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, |
| 1220 | xfs_agblock_t agbno, xfs_extlen_t len, int found), | 1189 | xfs_agblock_t agbno, xfs_extlen_t len, |
| 1221 | TP_ARGS(mp, agno, agbno, len, found), | 1190 | xfs_agblock_t tbno, xfs_extlen_t tlen), |
| 1191 | TP_ARGS(mp, agno, agbno, len, tbno, tlen), | ||
| 1222 | TP_STRUCT__entry( | 1192 | TP_STRUCT__entry( |
| 1223 | __field(dev_t, dev) | 1193 | __field(dev_t, dev) |
| 1224 | __field(xfs_agnumber_t, agno) | 1194 | __field(xfs_agnumber_t, agno) |
| 1225 | __field(xfs_agblock_t, agbno) | 1195 | __field(xfs_agblock_t, agbno) |
| 1226 | __field(xfs_extlen_t, len) | 1196 | __field(xfs_extlen_t, len) |
| 1227 | __field(int, found) | 1197 | __field(xfs_agblock_t, tbno) |
| 1198 | __field(xfs_extlen_t, tlen) | ||
| 1228 | ), | 1199 | ), |
| 1229 | TP_fast_assign( | 1200 | TP_fast_assign( |
| 1230 | __entry->dev = mp->m_super->s_dev; | 1201 | __entry->dev = mp->m_super->s_dev; |
| 1231 | __entry->agno = agno; | 1202 | __entry->agno = agno; |
| 1232 | __entry->agbno = agbno; | 1203 | __entry->agbno = agbno; |
| 1233 | __entry->len = len; | 1204 | __entry->len = len; |
| 1234 | __entry->found = found; | 1205 | __entry->tbno = tbno; |
| 1206 | __entry->tlen = tlen; | ||
| 1235 | ), | 1207 | ), |
| 1236 | TP_printk("dev %d:%d agno %u agbno %u len %u %s", | 1208 | TP_printk("dev %d:%d agno %u agbno %u len %u tbno %u tlen %u", |
| 1237 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1209 | MAJOR(__entry->dev), MINOR(__entry->dev), |
| 1238 | __entry->agno, | 1210 | __entry->agno, |
| 1239 | __entry->agbno, | 1211 | __entry->agbno, |
| 1240 | __entry->len, | 1212 | __entry->len, |
| 1241 | __print_symbolic(__entry->found, XFS_BUSY_STATES)) | 1213 | __entry->tbno, |
| 1214 | __entry->tlen) | ||
| 1242 | ); | 1215 | ); |
| 1243 | 1216 | ||
| 1244 | TRACE_EVENT(xfs_trans_commit_lsn, | 1217 | TRACE_EVENT(xfs_trans_commit_lsn, |
| @@ -1418,7 +1391,7 @@ DECLARE_EVENT_CLASS(xfs_alloc_class, | |||
| 1418 | __entry->wasfromfl, | 1391 | __entry->wasfromfl, |
| 1419 | __entry->isfl, | 1392 | __entry->isfl, |
| 1420 | __entry->userdata, | 1393 | __entry->userdata, |
| 1421 | __entry->firstblock) | 1394 | (unsigned long long)__entry->firstblock) |
| 1422 | ) | 1395 | ) |
| 1423 | 1396 | ||
| 1424 | #define DEFINE_ALLOC_EVENT(name) \ | 1397 | #define DEFINE_ALLOC_EVENT(name) \ |
| @@ -1433,11 +1406,14 @@ DEFINE_ALLOC_EVENT(xfs_alloc_near_first); | |||
| 1433 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); | 1406 | DEFINE_ALLOC_EVENT(xfs_alloc_near_greater); |
| 1434 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); | 1407 | DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser); |
| 1435 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); | 1408 | DEFINE_ALLOC_EVENT(xfs_alloc_near_error); |
| 1409 | DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry); | ||
| 1410 | DEFINE_ALLOC_EVENT(xfs_alloc_near_busy); | ||
| 1436 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); | 1411 | DEFINE_ALLOC_EVENT(xfs_alloc_size_neither); |
| 1437 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); | 1412 | DEFINE_ALLOC_EVENT(xfs_alloc_size_noentry); |
| 1438 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); | 1413 | DEFINE_ALLOC_EVENT(xfs_alloc_size_nominleft); |
| 1439 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); | 1414 | DEFINE_ALLOC_EVENT(xfs_alloc_size_done); |
| 1440 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); | 1415 | DEFINE_ALLOC_EVENT(xfs_alloc_size_error); |
| 1416 | DEFINE_ALLOC_EVENT(xfs_alloc_size_busy); | ||
| 1441 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); | 1417 | DEFINE_ALLOC_EVENT(xfs_alloc_small_freelist); |
| 1442 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); | 1418 | DEFINE_ALLOC_EVENT(xfs_alloc_small_notenough); |
| 1443 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); | 1419 | DEFINE_ALLOC_EVENT(xfs_alloc_small_done); |
diff --git a/fs/xfs/quota/xfs_qm.c b/fs/xfs/quota/xfs_qm.c index 69228aa8605a..b94dace4e785 100644 --- a/fs/xfs/quota/xfs_qm.c +++ b/fs/xfs/quota/xfs_qm.c | |||
| @@ -60,7 +60,7 @@ STATIC void xfs_qm_list_destroy(xfs_dqlist_t *); | |||
| 60 | 60 | ||
| 61 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); | 61 | STATIC int xfs_qm_init_quotainos(xfs_mount_t *); |
| 62 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); | 62 | STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); |
| 63 | STATIC int xfs_qm_shake(struct shrinker *, int, gfp_t); | 63 | STATIC int xfs_qm_shake(struct shrinker *, struct shrink_control *); |
| 64 | 64 | ||
| 65 | static struct shrinker xfs_qm_shaker = { | 65 | static struct shrinker xfs_qm_shaker = { |
| 66 | .shrink = xfs_qm_shake, | 66 | .shrink = xfs_qm_shake, |
| @@ -2009,10 +2009,10 @@ xfs_qm_shake_freelist( | |||
| 2009 | STATIC int | 2009 | STATIC int |
| 2010 | xfs_qm_shake( | 2010 | xfs_qm_shake( |
| 2011 | struct shrinker *shrink, | 2011 | struct shrinker *shrink, |
| 2012 | int nr_to_scan, | 2012 | struct shrink_control *sc) |
| 2013 | gfp_t gfp_mask) | ||
| 2014 | { | 2013 | { |
| 2015 | int ndqused, nfree, n; | 2014 | int ndqused, nfree, n; |
| 2015 | gfp_t gfp_mask = sc->gfp_mask; | ||
| 2016 | 2016 | ||
| 2017 | if (!kmem_shake_allow(gfp_mask)) | 2017 | if (!kmem_shake_allow(gfp_mask)) |
| 2018 | return 0; | 2018 | return 0; |
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 58632cc17f2d..da0a561ffba2 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
| @@ -187,7 +187,6 @@ struct xfs_busy_extent { | |||
| 187 | xfs_agnumber_t agno; | 187 | xfs_agnumber_t agno; |
| 188 | xfs_agblock_t bno; | 188 | xfs_agblock_t bno; |
| 189 | xfs_extlen_t length; | 189 | xfs_extlen_t length; |
| 190 | xlog_tid_t tid; /* transaction that created this */ | ||
| 191 | }; | 190 | }; |
| 192 | 191 | ||
| 193 | /* | 192 | /* |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 27d64d752eab..acdced86413c 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
| @@ -41,19 +41,13 @@ | |||
| 41 | #define XFSA_FIXUP_BNO_OK 1 | 41 | #define XFSA_FIXUP_BNO_OK 1 |
| 42 | #define XFSA_FIXUP_CNT_OK 2 | 42 | #define XFSA_FIXUP_CNT_OK 2 |
| 43 | 43 | ||
| 44 | /* | ||
| 45 | * Prototypes for per-ag allocation routines | ||
| 46 | */ | ||
| 47 | |||
| 48 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); | 44 | STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); |
| 49 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); | 45 | STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); |
| 50 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); | 46 | STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); |
| 51 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, | 47 | STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, |
| 52 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); | 48 | xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); |
| 53 | 49 | STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, | |
| 54 | /* | 50 | xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); |
| 55 | * Internal functions. | ||
| 56 | */ | ||
| 57 | 51 | ||
| 58 | /* | 52 | /* |
| 59 | * Lookup the record equal to [bno, len] in the btree given by cur. | 53 | * Lookup the record equal to [bno, len] in the btree given by cur. |
| @@ -154,19 +148,21 @@ xfs_alloc_compute_aligned( | |||
| 154 | xfs_extlen_t *reslen) /* result length */ | 148 | xfs_extlen_t *reslen) /* result length */ |
| 155 | { | 149 | { |
| 156 | xfs_agblock_t bno; | 150 | xfs_agblock_t bno; |
| 157 | xfs_extlen_t diff; | ||
| 158 | xfs_extlen_t len; | 151 | xfs_extlen_t len; |
| 159 | 152 | ||
| 160 | if (args->alignment > 1 && foundlen >= args->minlen) { | 153 | /* Trim busy sections out of found extent */ |
| 161 | bno = roundup(foundbno, args->alignment); | 154 | xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); |
| 162 | diff = bno - foundbno; | 155 | |
| 163 | len = diff >= foundlen ? 0 : foundlen - diff; | 156 | if (args->alignment > 1 && len >= args->minlen) { |
| 157 | xfs_agblock_t aligned_bno = roundup(bno, args->alignment); | ||
| 158 | xfs_extlen_t diff = aligned_bno - bno; | ||
| 159 | |||
| 160 | *resbno = aligned_bno; | ||
| 161 | *reslen = diff >= len ? 0 : len - diff; | ||
| 164 | } else { | 162 | } else { |
| 165 | bno = foundbno; | 163 | *resbno = bno; |
| 166 | len = foundlen; | 164 | *reslen = len; |
| 167 | } | 165 | } |
| 168 | *resbno = bno; | ||
| 169 | *reslen = len; | ||
| 170 | } | 166 | } |
| 171 | 167 | ||
| 172 | /* | 168 | /* |
| @@ -280,7 +276,6 @@ xfs_alloc_fix_minleft( | |||
| 280 | return 1; | 276 | return 1; |
| 281 | agf = XFS_BUF_TO_AGF(args->agbp); | 277 | agf = XFS_BUF_TO_AGF(args->agbp); |
| 282 | diff = be32_to_cpu(agf->agf_freeblks) | 278 | diff = be32_to_cpu(agf->agf_freeblks) |
| 283 | + be32_to_cpu(agf->agf_flcount) | ||
| 284 | - args->len - args->minleft; | 279 | - args->len - args->minleft; |
| 285 | if (diff >= 0) | 280 | if (diff >= 0) |
| 286 | return 1; | 281 | return 1; |
| @@ -541,16 +536,8 @@ xfs_alloc_ag_vextent( | |||
| 541 | if (error) | 536 | if (error) |
| 542 | return error; | 537 | return error; |
| 543 | 538 | ||
| 544 | /* | 539 | ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, |
| 545 | * Search the busylist for these blocks and mark the | 540 | args->agbno, args->len)); |
| 546 | * transaction as synchronous if blocks are found. This | ||
| 547 | * avoids the need to block due to a synchronous log | ||
| 548 | * force to ensure correct ordering as the synchronous | ||
| 549 | * transaction will guarantee that for us. | ||
| 550 | */ | ||
| 551 | if (xfs_alloc_busy_search(args->mp, args->agno, | ||
| 552 | args->agbno, args->len)) | ||
| 553 | xfs_trans_set_sync(args->tp); | ||
| 554 | } | 541 | } |
| 555 | 542 | ||
| 556 | if (!args->isfl) { | 543 | if (!args->isfl) { |
| @@ -577,14 +564,14 @@ xfs_alloc_ag_vextent_exact( | |||
| 577 | { | 564 | { |
| 578 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ | 565 | xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ |
| 579 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ | 566 | xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ |
| 580 | xfs_agblock_t end; /* end of allocated extent */ | ||
| 581 | int error; | 567 | int error; |
| 582 | xfs_agblock_t fbno; /* start block of found extent */ | 568 | xfs_agblock_t fbno; /* start block of found extent */ |
| 583 | xfs_agblock_t fend; /* end block of found extent */ | ||
| 584 | xfs_extlen_t flen; /* length of found extent */ | 569 | xfs_extlen_t flen; /* length of found extent */ |
| 570 | xfs_agblock_t tbno; /* start block of trimmed extent */ | ||
| 571 | xfs_extlen_t tlen; /* length of trimmed extent */ | ||
| 572 | xfs_agblock_t tend; /* end block of trimmed extent */ | ||
| 573 | xfs_agblock_t end; /* end of allocated extent */ | ||
| 585 | int i; /* success/failure of operation */ | 574 | int i; /* success/failure of operation */ |
| 586 | xfs_agblock_t maxend; /* end of maximal extent */ | ||
| 587 | xfs_agblock_t minend; /* end of minimal extent */ | ||
| 588 | xfs_extlen_t rlen; /* length of returned extent */ | 575 | xfs_extlen_t rlen; /* length of returned extent */ |
| 589 | 576 | ||
| 590 | ASSERT(args->alignment == 1); | 577 | ASSERT(args->alignment == 1); |
| @@ -614,14 +601,22 @@ xfs_alloc_ag_vextent_exact( | |||
| 614 | goto error0; | 601 | goto error0; |
| 615 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 602 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 616 | ASSERT(fbno <= args->agbno); | 603 | ASSERT(fbno <= args->agbno); |
| 617 | minend = args->agbno + args->minlen; | ||
| 618 | maxend = args->agbno + args->maxlen; | ||
| 619 | fend = fbno + flen; | ||
| 620 | 604 | ||
| 621 | /* | 605 | /* |
| 622 | * Give up if the freespace isn't long enough for the minimum request. | 606 | * Check for overlapping busy extents. |
| 607 | */ | ||
| 608 | xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); | ||
| 609 | |||
| 610 | /* | ||
| 611 | * Give up if the start of the extent is busy, or the freespace isn't | ||
| 612 | * long enough for the minimum request. | ||
| 623 | */ | 613 | */ |
| 624 | if (fend < minend) | 614 | if (tbno > args->agbno) |
| 615 | goto not_found; | ||
| 616 | if (tlen < args->minlen) | ||
| 617 | goto not_found; | ||
| 618 | tend = tbno + tlen; | ||
| 619 | if (tend < args->agbno + args->minlen) | ||
| 625 | goto not_found; | 620 | goto not_found; |
| 626 | 621 | ||
| 627 | /* | 622 | /* |
| @@ -630,14 +625,14 @@ xfs_alloc_ag_vextent_exact( | |||
| 630 | * | 625 | * |
| 631 | * Fix the length according to mod and prod if given. | 626 | * Fix the length according to mod and prod if given. |
| 632 | */ | 627 | */ |
| 633 | end = XFS_AGBLOCK_MIN(fend, maxend); | 628 | end = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen); |
| 634 | args->len = end - args->agbno; | 629 | args->len = end - args->agbno; |
| 635 | xfs_alloc_fix_len(args); | 630 | xfs_alloc_fix_len(args); |
| 636 | if (!xfs_alloc_fix_minleft(args)) | 631 | if (!xfs_alloc_fix_minleft(args)) |
| 637 | goto not_found; | 632 | goto not_found; |
| 638 | 633 | ||
| 639 | rlen = args->len; | 634 | rlen = args->len; |
| 640 | ASSERT(args->agbno + rlen <= fend); | 635 | ASSERT(args->agbno + rlen <= tend); |
| 641 | end = args->agbno + rlen; | 636 | end = args->agbno + rlen; |
| 642 | 637 | ||
| 643 | /* | 638 | /* |
| @@ -686,11 +681,11 @@ xfs_alloc_find_best_extent( | |||
| 686 | struct xfs_btree_cur **scur, /* searching cursor */ | 681 | struct xfs_btree_cur **scur, /* searching cursor */ |
| 687 | xfs_agblock_t gdiff, /* difference for search comparison */ | 682 | xfs_agblock_t gdiff, /* difference for search comparison */ |
| 688 | xfs_agblock_t *sbno, /* extent found by search */ | 683 | xfs_agblock_t *sbno, /* extent found by search */ |
| 689 | xfs_extlen_t *slen, | 684 | xfs_extlen_t *slen, /* extent length */ |
| 690 | xfs_extlen_t *slena, /* aligned length */ | 685 | xfs_agblock_t *sbnoa, /* aligned extent found by search */ |
| 686 | xfs_extlen_t *slena, /* aligned extent length */ | ||
| 691 | int dir) /* 0 = search right, 1 = search left */ | 687 | int dir) /* 0 = search right, 1 = search left */ |
| 692 | { | 688 | { |
| 693 | xfs_agblock_t bno; | ||
| 694 | xfs_agblock_t new; | 689 | xfs_agblock_t new; |
| 695 | xfs_agblock_t sdiff; | 690 | xfs_agblock_t sdiff; |
| 696 | int error; | 691 | int error; |
| @@ -708,16 +703,16 @@ xfs_alloc_find_best_extent( | |||
| 708 | if (error) | 703 | if (error) |
| 709 | goto error0; | 704 | goto error0; |
| 710 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | 705 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); |
| 711 | xfs_alloc_compute_aligned(args, *sbno, *slen, &bno, slena); | 706 | xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); |
| 712 | 707 | ||
| 713 | /* | 708 | /* |
| 714 | * The good extent is closer than this one. | 709 | * The good extent is closer than this one. |
| 715 | */ | 710 | */ |
| 716 | if (!dir) { | 711 | if (!dir) { |
| 717 | if (bno >= args->agbno + gdiff) | 712 | if (*sbnoa >= args->agbno + gdiff) |
| 718 | goto out_use_good; | 713 | goto out_use_good; |
| 719 | } else { | 714 | } else { |
| 720 | if (bno <= args->agbno - gdiff) | 715 | if (*sbnoa <= args->agbno - gdiff) |
| 721 | goto out_use_good; | 716 | goto out_use_good; |
| 722 | } | 717 | } |
| 723 | 718 | ||
| @@ -729,8 +724,8 @@ xfs_alloc_find_best_extent( | |||
| 729 | xfs_alloc_fix_len(args); | 724 | xfs_alloc_fix_len(args); |
| 730 | 725 | ||
| 731 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 726 | sdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 732 | args->alignment, *sbno, | 727 | args->alignment, *sbnoa, |
| 733 | *slen, &new); | 728 | *slena, &new); |
| 734 | 729 | ||
| 735 | /* | 730 | /* |
| 736 | * Choose closer size and invalidate other cursor. | 731 | * Choose closer size and invalidate other cursor. |
| @@ -780,7 +775,7 @@ xfs_alloc_ag_vextent_near( | |||
| 780 | xfs_agblock_t gtbnoa; /* aligned ... */ | 775 | xfs_agblock_t gtbnoa; /* aligned ... */ |
| 781 | xfs_extlen_t gtdiff; /* difference to right side entry */ | 776 | xfs_extlen_t gtdiff; /* difference to right side entry */ |
| 782 | xfs_extlen_t gtlen; /* length of right side entry */ | 777 | xfs_extlen_t gtlen; /* length of right side entry */ |
| 783 | xfs_extlen_t gtlena = 0; /* aligned ... */ | 778 | xfs_extlen_t gtlena; /* aligned ... */ |
| 784 | xfs_agblock_t gtnew; /* useful start bno of right side */ | 779 | xfs_agblock_t gtnew; /* useful start bno of right side */ |
| 785 | int error; /* error code */ | 780 | int error; /* error code */ |
| 786 | int i; /* result code, temporary */ | 781 | int i; /* result code, temporary */ |
| @@ -789,9 +784,10 @@ xfs_alloc_ag_vextent_near( | |||
| 789 | xfs_agblock_t ltbnoa; /* aligned ... */ | 784 | xfs_agblock_t ltbnoa; /* aligned ... */ |
| 790 | xfs_extlen_t ltdiff; /* difference to left side entry */ | 785 | xfs_extlen_t ltdiff; /* difference to left side entry */ |
| 791 | xfs_extlen_t ltlen; /* length of left side entry */ | 786 | xfs_extlen_t ltlen; /* length of left side entry */ |
| 792 | xfs_extlen_t ltlena = 0; /* aligned ... */ | 787 | xfs_extlen_t ltlena; /* aligned ... */ |
| 793 | xfs_agblock_t ltnew; /* useful start bno of left side */ | 788 | xfs_agblock_t ltnew; /* useful start bno of left side */ |
| 794 | xfs_extlen_t rlen; /* length of returned extent */ | 789 | xfs_extlen_t rlen; /* length of returned extent */ |
| 790 | int forced = 0; | ||
| 795 | #if defined(DEBUG) && defined(__KERNEL__) | 791 | #if defined(DEBUG) && defined(__KERNEL__) |
| 796 | /* | 792 | /* |
| 797 | * Randomly don't execute the first algorithm. | 793 | * Randomly don't execute the first algorithm. |
| @@ -800,13 +796,20 @@ xfs_alloc_ag_vextent_near( | |||
| 800 | 796 | ||
| 801 | dofirst = random32() & 1; | 797 | dofirst = random32() & 1; |
| 802 | #endif | 798 | #endif |
| 799 | |||
| 800 | restart: | ||
| 801 | bno_cur_lt = NULL; | ||
| 802 | bno_cur_gt = NULL; | ||
| 803 | ltlen = 0; | ||
| 804 | gtlena = 0; | ||
| 805 | ltlena = 0; | ||
| 806 | |||
| 803 | /* | 807 | /* |
| 804 | * Get a cursor for the by-size btree. | 808 | * Get a cursor for the by-size btree. |
| 805 | */ | 809 | */ |
| 806 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 810 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
| 807 | args->agno, XFS_BTNUM_CNT); | 811 | args->agno, XFS_BTNUM_CNT); |
| 808 | ltlen = 0; | 812 | |
| 809 | bno_cur_lt = bno_cur_gt = NULL; | ||
| 810 | /* | 813 | /* |
| 811 | * See if there are any free extents as big as maxlen. | 814 | * See if there are any free extents as big as maxlen. |
| 812 | */ | 815 | */ |
| @@ -822,11 +825,13 @@ xfs_alloc_ag_vextent_near( | |||
| 822 | goto error0; | 825 | goto error0; |
| 823 | if (i == 0 || ltlen == 0) { | 826 | if (i == 0 || ltlen == 0) { |
| 824 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 827 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
| 828 | trace_xfs_alloc_near_noentry(args); | ||
| 825 | return 0; | 829 | return 0; |
| 826 | } | 830 | } |
| 827 | ASSERT(i == 1); | 831 | ASSERT(i == 1); |
| 828 | } | 832 | } |
| 829 | args->wasfromfl = 0; | 833 | args->wasfromfl = 0; |
| 834 | |||
| 830 | /* | 835 | /* |
| 831 | * First algorithm. | 836 | * First algorithm. |
| 832 | * If the requested extent is large wrt the freespaces available | 837 | * If the requested extent is large wrt the freespaces available |
| @@ -890,7 +895,7 @@ xfs_alloc_ag_vextent_near( | |||
| 890 | if (args->len < blen) | 895 | if (args->len < blen) |
| 891 | continue; | 896 | continue; |
| 892 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 897 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 893 | args->alignment, ltbno, ltlen, <new); | 898 | args->alignment, ltbnoa, ltlena, <new); |
| 894 | if (ltnew != NULLAGBLOCK && | 899 | if (ltnew != NULLAGBLOCK && |
| 895 | (args->len > blen || ltdiff < bdiff)) { | 900 | (args->len > blen || ltdiff < bdiff)) { |
| 896 | bdiff = ltdiff; | 901 | bdiff = ltdiff; |
| @@ -1042,11 +1047,12 @@ xfs_alloc_ag_vextent_near( | |||
| 1042 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); | 1047 | args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); |
| 1043 | xfs_alloc_fix_len(args); | 1048 | xfs_alloc_fix_len(args); |
| 1044 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1049 | ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 1045 | args->alignment, ltbno, ltlen, <new); | 1050 | args->alignment, ltbnoa, ltlena, <new); |
| 1046 | 1051 | ||
| 1047 | error = xfs_alloc_find_best_extent(args, | 1052 | error = xfs_alloc_find_best_extent(args, |
| 1048 | &bno_cur_lt, &bno_cur_gt, | 1053 | &bno_cur_lt, &bno_cur_gt, |
| 1049 | ltdiff, >bno, >len, >lena, | 1054 | ltdiff, >bno, >len, |
| 1055 | >bnoa, >lena, | ||
| 1050 | 0 /* search right */); | 1056 | 0 /* search right */); |
| 1051 | } else { | 1057 | } else { |
| 1052 | ASSERT(gtlena >= args->minlen); | 1058 | ASSERT(gtlena >= args->minlen); |
| @@ -1057,11 +1063,12 @@ xfs_alloc_ag_vextent_near( | |||
| 1057 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); | 1063 | args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); |
| 1058 | xfs_alloc_fix_len(args); | 1064 | xfs_alloc_fix_len(args); |
| 1059 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, | 1065 | gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, |
| 1060 | args->alignment, gtbno, gtlen, >new); | 1066 | args->alignment, gtbnoa, gtlena, >new); |
| 1061 | 1067 | ||
| 1062 | error = xfs_alloc_find_best_extent(args, | 1068 | error = xfs_alloc_find_best_extent(args, |
| 1063 | &bno_cur_gt, &bno_cur_lt, | 1069 | &bno_cur_gt, &bno_cur_lt, |
| 1064 | gtdiff, <bno, <len, <lena, | 1070 | gtdiff, <bno, <len, |
| 1071 | <bnoa, <lena, | ||
| 1065 | 1 /* search left */); | 1072 | 1 /* search left */); |
| 1066 | } | 1073 | } |
| 1067 | 1074 | ||
| @@ -1073,6 +1080,12 @@ xfs_alloc_ag_vextent_near( | |||
| 1073 | * If we couldn't get anything, give up. | 1080 | * If we couldn't get anything, give up. |
| 1074 | */ | 1081 | */ |
| 1075 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { | 1082 | if (bno_cur_lt == NULL && bno_cur_gt == NULL) { |
| 1083 | if (!forced++) { | ||
| 1084 | trace_xfs_alloc_near_busy(args); | ||
| 1085 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
| 1086 | goto restart; | ||
| 1087 | } | ||
| 1088 | |||
| 1076 | trace_xfs_alloc_size_neither(args); | 1089 | trace_xfs_alloc_size_neither(args); |
| 1077 | args->agbno = NULLAGBLOCK; | 1090 | args->agbno = NULLAGBLOCK; |
| 1078 | return 0; | 1091 | return 0; |
| @@ -1107,12 +1120,13 @@ xfs_alloc_ag_vextent_near( | |||
| 1107 | return 0; | 1120 | return 0; |
| 1108 | } | 1121 | } |
| 1109 | rlen = args->len; | 1122 | rlen = args->len; |
| 1110 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, | 1123 | (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, |
| 1111 | ltlen, <new); | 1124 | ltbnoa, ltlena, <new); |
| 1112 | ASSERT(ltnew >= ltbno); | 1125 | ASSERT(ltnew >= ltbno); |
| 1113 | ASSERT(ltnew + rlen <= ltbno + ltlen); | 1126 | ASSERT(ltnew + rlen <= ltbnoa + ltlena); |
| 1114 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); | 1127 | ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); |
| 1115 | args->agbno = ltnew; | 1128 | args->agbno = ltnew; |
| 1129 | |||
| 1116 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, | 1130 | if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, |
| 1117 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) | 1131 | ltnew, rlen, XFSA_FIXUP_BNO_OK))) |
| 1118 | goto error0; | 1132 | goto error0; |
| @@ -1155,26 +1169,35 @@ xfs_alloc_ag_vextent_size( | |||
| 1155 | int i; /* temp status variable */ | 1169 | int i; /* temp status variable */ |
| 1156 | xfs_agblock_t rbno; /* returned block number */ | 1170 | xfs_agblock_t rbno; /* returned block number */ |
| 1157 | xfs_extlen_t rlen; /* length of returned extent */ | 1171 | xfs_extlen_t rlen; /* length of returned extent */ |
| 1172 | int forced = 0; | ||
| 1158 | 1173 | ||
| 1174 | restart: | ||
| 1159 | /* | 1175 | /* |
| 1160 | * Allocate and initialize a cursor for the by-size btree. | 1176 | * Allocate and initialize a cursor for the by-size btree. |
| 1161 | */ | 1177 | */ |
| 1162 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, | 1178 | cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, |
| 1163 | args->agno, XFS_BTNUM_CNT); | 1179 | args->agno, XFS_BTNUM_CNT); |
| 1164 | bno_cur = NULL; | 1180 | bno_cur = NULL; |
| 1181 | |||
| 1165 | /* | 1182 | /* |
| 1166 | * Look for an entry >= maxlen+alignment-1 blocks. | 1183 | * Look for an entry >= maxlen+alignment-1 blocks. |
| 1167 | */ | 1184 | */ |
| 1168 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, | 1185 | if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, |
| 1169 | args->maxlen + args->alignment - 1, &i))) | 1186 | args->maxlen + args->alignment - 1, &i))) |
| 1170 | goto error0; | 1187 | goto error0; |
| 1188 | |||
| 1171 | /* | 1189 | /* |
| 1172 | * If none, then pick up the last entry in the tree unless the | 1190 | * If none or we have busy extents that we cannot allocate from, then |
| 1173 | * tree is empty. | 1191 | * we have to settle for a smaller extent. In the case that there are |
| 1192 | * no large extents, this will return the last entry in the tree unless | ||
| 1193 | * the tree is empty. In the case that there are only busy large | ||
| 1194 | * extents, this will return the largest small extent unless there | ||
| 1195 | * are no smaller extents available. | ||
| 1174 | */ | 1196 | */ |
| 1175 | if (!i) { | 1197 | if (!i || forced > 1) { |
| 1176 | if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, | 1198 | error = xfs_alloc_ag_vextent_small(args, cnt_cur, |
| 1177 | &flen, &i))) | 1199 | &fbno, &flen, &i); |
| 1200 | if (error) | ||
| 1178 | goto error0; | 1201 | goto error0; |
| 1179 | if (i == 0 || flen == 0) { | 1202 | if (i == 0 || flen == 0) { |
| 1180 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1203 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
| @@ -1182,22 +1205,56 @@ xfs_alloc_ag_vextent_size( | |||
| 1182 | return 0; | 1205 | return 0; |
| 1183 | } | 1206 | } |
| 1184 | ASSERT(i == 1); | 1207 | ASSERT(i == 1); |
| 1208 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
| 1209 | } else { | ||
| 1210 | /* | ||
| 1211 | * Search for a non-busy extent that is large enough. | ||
| 1212 | * If we are at low space, don't check, or if we fall of | ||
| 1213 | * the end of the btree, turn off the busy check and | ||
| 1214 | * restart. | ||
| 1215 | */ | ||
| 1216 | for (;;) { | ||
| 1217 | error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); | ||
| 1218 | if (error) | ||
| 1219 | goto error0; | ||
| 1220 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
| 1221 | |||
| 1222 | xfs_alloc_compute_aligned(args, fbno, flen, | ||
| 1223 | &rbno, &rlen); | ||
| 1224 | |||
| 1225 | if (rlen >= args->maxlen) | ||
| 1226 | break; | ||
| 1227 | |||
| 1228 | error = xfs_btree_increment(cnt_cur, 0, &i); | ||
| 1229 | if (error) | ||
| 1230 | goto error0; | ||
| 1231 | if (i == 0) { | ||
| 1232 | /* | ||
| 1233 | * Our only valid extents must have been busy. | ||
| 1234 | * Make it unbusy by forcing the log out and | ||
| 1235 | * retrying. If we've been here before, forcing | ||
| 1236 | * the log isn't making the extents available, | ||
| 1237 | * which means they have probably been freed in | ||
| 1238 | * this transaction. In that case, we have to | ||
| 1239 | * give up on them and we'll attempt a minlen | ||
| 1240 | * allocation the next time around. | ||
| 1241 | */ | ||
| 1242 | xfs_btree_del_cursor(cnt_cur, | ||
| 1243 | XFS_BTREE_NOERROR); | ||
| 1244 | trace_xfs_alloc_size_busy(args); | ||
| 1245 | if (!forced++) | ||
| 1246 | xfs_log_force(args->mp, XFS_LOG_SYNC); | ||
| 1247 | goto restart; | ||
| 1248 | } | ||
| 1249 | } | ||
| 1185 | } | 1250 | } |
| 1186 | /* | 1251 | |
| 1187 | * There's a freespace as big as maxlen+alignment-1, get it. | ||
| 1188 | */ | ||
| 1189 | else { | ||
| 1190 | if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) | ||
| 1191 | goto error0; | ||
| 1192 | XFS_WANT_CORRUPTED_GOTO(i == 1, error0); | ||
| 1193 | } | ||
| 1194 | /* | 1252 | /* |
| 1195 | * In the first case above, we got the last entry in the | 1253 | * In the first case above, we got the last entry in the |
| 1196 | * by-size btree. Now we check to see if the space hits maxlen | 1254 | * by-size btree. Now we check to see if the space hits maxlen |
| 1197 | * once aligned; if not, we search left for something better. | 1255 | * once aligned; if not, we search left for something better. |
| 1198 | * This can't happen in the second case above. | 1256 | * This can't happen in the second case above. |
| 1199 | */ | 1257 | */ |
| 1200 | xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); | ||
| 1201 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); | 1258 | rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); |
| 1202 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || | 1259 | XFS_WANT_CORRUPTED_GOTO(rlen == 0 || |
| 1203 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); | 1260 | (rlen <= flen && rbno + rlen <= fbno + flen), error0); |
| @@ -1251,13 +1308,19 @@ xfs_alloc_ag_vextent_size( | |||
| 1251 | * Fix up the length. | 1308 | * Fix up the length. |
| 1252 | */ | 1309 | */ |
| 1253 | args->len = rlen; | 1310 | args->len = rlen; |
| 1254 | xfs_alloc_fix_len(args); | 1311 | if (rlen < args->minlen) { |
| 1255 | if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { | 1312 | if (!forced++) { |
| 1256 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | 1313 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); |
| 1257 | trace_xfs_alloc_size_nominleft(args); | 1314 | trace_xfs_alloc_size_busy(args); |
| 1258 | args->agbno = NULLAGBLOCK; | 1315 | xfs_log_force(args->mp, XFS_LOG_SYNC); |
| 1259 | return 0; | 1316 | goto restart; |
| 1317 | } | ||
| 1318 | goto out_nominleft; | ||
| 1260 | } | 1319 | } |
| 1320 | xfs_alloc_fix_len(args); | ||
| 1321 | |||
| 1322 | if (!xfs_alloc_fix_minleft(args)) | ||
| 1323 | goto out_nominleft; | ||
| 1261 | rlen = args->len; | 1324 | rlen = args->len; |
| 1262 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); | 1325 | XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); |
| 1263 | /* | 1326 | /* |
| @@ -1287,6 +1350,12 @@ error0: | |||
| 1287 | if (bno_cur) | 1350 | if (bno_cur) |
| 1288 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); | 1351 | xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); |
| 1289 | return error; | 1352 | return error; |
| 1353 | |||
| 1354 | out_nominleft: | ||
| 1355 | xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); | ||
| 1356 | trace_xfs_alloc_size_nominleft(args); | ||
| 1357 | args->agbno = NULLAGBLOCK; | ||
| 1358 | return 0; | ||
| 1290 | } | 1359 | } |
| 1291 | 1360 | ||
| 1292 | /* | 1361 | /* |
| @@ -1326,6 +1395,9 @@ xfs_alloc_ag_vextent_small( | |||
| 1326 | if (error) | 1395 | if (error) |
| 1327 | goto error0; | 1396 | goto error0; |
| 1328 | if (fbno != NULLAGBLOCK) { | 1397 | if (fbno != NULLAGBLOCK) { |
| 1398 | xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, | ||
| 1399 | args->userdata); | ||
| 1400 | |||
| 1329 | if (args->userdata) { | 1401 | if (args->userdata) { |
| 1330 | xfs_buf_t *bp; | 1402 | xfs_buf_t *bp; |
| 1331 | 1403 | ||
| @@ -1617,18 +1689,6 @@ xfs_free_ag_extent( | |||
| 1617 | 1689 | ||
| 1618 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); | 1690 | trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); |
| 1619 | 1691 | ||
| 1620 | /* | ||
| 1621 | * Since blocks move to the free list without the coordination | ||
| 1622 | * used in xfs_bmap_finish, we can't allow block to be available | ||
| 1623 | * for reallocation and non-transaction writing (user data) | ||
| 1624 | * until we know that the transaction that moved it to the free | ||
| 1625 | * list is permanently on disk. We track the blocks by declaring | ||
| 1626 | * these blocks as "busy"; the busy list is maintained on a per-ag | ||
| 1627 | * basis and each transaction records which entries should be removed | ||
| 1628 | * when the iclog commits to disk. If a busy block is allocated, | ||
| 1629 | * the iclog is pushed up to the LSN that freed the block. | ||
| 1630 | */ | ||
| 1631 | xfs_alloc_busy_insert(tp, agno, bno, len); | ||
| 1632 | return 0; | 1692 | return 0; |
| 1633 | 1693 | ||
| 1634 | error0: | 1694 | error0: |
| @@ -1923,21 +1983,6 @@ xfs_alloc_get_freelist( | |||
| 1923 | xfs_alloc_log_agf(tp, agbp, logflags); | 1983 | xfs_alloc_log_agf(tp, agbp, logflags); |
| 1924 | *bnop = bno; | 1984 | *bnop = bno; |
| 1925 | 1985 | ||
| 1926 | /* | ||
| 1927 | * As blocks are freed, they are added to the per-ag busy list and | ||
| 1928 | * remain there until the freeing transaction is committed to disk. | ||
| 1929 | * Now that we have allocated blocks, this list must be searched to see | ||
| 1930 | * if a block is being reused. If one is, then the freeing transaction | ||
| 1931 | * must be pushed to disk before this transaction. | ||
| 1932 | * | ||
| 1933 | * We do this by setting the current transaction to a sync transaction | ||
| 1934 | * which guarantees that the freeing transaction is on disk before this | ||
| 1935 | * transaction. This is done instead of a synchronous log force here so | ||
| 1936 | * that we don't sit and wait with the AGF locked in the transaction | ||
| 1937 | * during the log force. | ||
| 1938 | */ | ||
| 1939 | if (xfs_alloc_busy_search(mp, be32_to_cpu(agf->agf_seqno), bno, 1)) | ||
| 1940 | xfs_trans_set_sync(tp); | ||
| 1941 | return 0; | 1986 | return 0; |
| 1942 | } | 1987 | } |
| 1943 | 1988 | ||
| @@ -2423,105 +2468,13 @@ xfs_free_extent( | |||
| 2423 | } | 2468 | } |
| 2424 | 2469 | ||
| 2425 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); | 2470 | error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); |
| 2471 | if (!error) | ||
| 2472 | xfs_alloc_busy_insert(tp, args.agno, args.agbno, len); | ||
| 2426 | error0: | 2473 | error0: |
| 2427 | xfs_perag_put(args.pag); | 2474 | xfs_perag_put(args.pag); |
| 2428 | return error; | 2475 | return error; |
| 2429 | } | 2476 | } |
| 2430 | 2477 | ||
| 2431 | |||
| 2432 | /* | ||
| 2433 | * AG Busy list management | ||
| 2434 | * The busy list contains block ranges that have been freed but whose | ||
| 2435 | * transactions have not yet hit disk. If any block listed in a busy | ||
| 2436 | * list is reused, the transaction that freed it must be forced to disk | ||
| 2437 | * before continuing to use the block. | ||
| 2438 | * | ||
| 2439 | * xfs_alloc_busy_insert - add to the per-ag busy list | ||
| 2440 | * xfs_alloc_busy_clear - remove an item from the per-ag busy list | ||
| 2441 | * xfs_alloc_busy_search - search for a busy extent | ||
| 2442 | */ | ||
| 2443 | |||
| 2444 | /* | ||
| 2445 | * Insert a new extent into the busy tree. | ||
| 2446 | * | ||
| 2447 | * The busy extent tree is indexed by the start block of the busy extent. | ||
| 2448 | * there can be multiple overlapping ranges in the busy extent tree but only | ||
| 2449 | * ever one entry at a given start block. The reason for this is that | ||
| 2450 | * multi-block extents can be freed, then smaller chunks of that extent | ||
| 2451 | * allocated and freed again before the first transaction commit is on disk. | ||
| 2452 | * If the exact same start block is freed a second time, we have to wait for | ||
| 2453 | * that busy extent to pass out of the tree before the new extent is inserted. | ||
| 2454 | * There are two main cases we have to handle here. | ||
| 2455 | * | ||
| 2456 | * The first case is a transaction that triggers a "free - allocate - free" | ||
| 2457 | * cycle. This can occur during btree manipulations as a btree block is freed | ||
| 2458 | * to the freelist, then allocated from the free list, then freed again. In | ||
| 2459 | * this case, the second extxpnet free is what triggers the duplicate and as | ||
| 2460 | * such the transaction IDs should match. Because the extent was allocated in | ||
| 2461 | * this transaction, the transaction must be marked as synchronous. This is | ||
| 2462 | * true for all cases where the free/alloc/free occurs in the one transaction, | ||
| 2463 | * hence the addition of the ASSERT(tp->t_flags & XFS_TRANS_SYNC) to this case. | ||
| 2464 | * This serves to catch violations of the second case quite effectively. | ||
| 2465 | * | ||
| 2466 | * The second case is where the free/alloc/free occur in different | ||
| 2467 | * transactions. In this case, the thread freeing the extent the second time | ||
| 2468 | * can't mark the extent busy immediately because it is already tracked in a | ||
| 2469 | * transaction that may be committing. When the log commit for the existing | ||
| 2470 | * busy extent completes, the busy extent will be removed from the tree. If we | ||
| 2471 | * allow the second busy insert to continue using that busy extent structure, | ||
| 2472 | * it can be freed before this transaction is safely in the log. Hence our | ||
| 2473 | * only option in this case is to force the log to remove the existing busy | ||
| 2474 | * extent from the list before we insert the new one with the current | ||
| 2475 | * transaction ID. | ||
| 2476 | * | ||
| 2477 | * The problem we are trying to avoid in the free-alloc-free in separate | ||
| 2478 | * transactions is most easily described with a timeline: | ||
| 2479 | * | ||
| 2480 | * Thread 1 Thread 2 Thread 3 xfslogd | ||
| 2481 | * xact alloc | ||
| 2482 | * free X | ||
| 2483 | * mark busy | ||
| 2484 | * commit xact | ||
| 2485 | * free xact | ||
| 2486 | * xact alloc | ||
| 2487 | * alloc X | ||
| 2488 | * busy search | ||
| 2489 | * mark xact sync | ||
| 2490 | * commit xact | ||
| 2491 | * free xact | ||
| 2492 | * force log | ||
| 2493 | * checkpoint starts | ||
| 2494 | * .... | ||
| 2495 | * xact alloc | ||
| 2496 | * free X | ||
| 2497 | * mark busy | ||
| 2498 | * finds match | ||
| 2499 | * *** KABOOM! *** | ||
| 2500 | * .... | ||
| 2501 | * log IO completes | ||
| 2502 | * unbusy X | ||
| 2503 | * checkpoint completes | ||
| 2504 | * | ||
| 2505 | * By issuing a log force in thread 3 @ "KABOOM", the thread will block until | ||
| 2506 | * the checkpoint completes, and the busy extent it matched will have been | ||
| 2507 | * removed from the tree when it is woken. Hence it can then continue safely. | ||
| 2508 | * | ||
| 2509 | * However, to ensure this matching process is robust, we need to use the | ||
| 2510 | * transaction ID for identifying transaction, as delayed logging results in | ||
| 2511 | * the busy extent and transaction lifecycles being different. i.e. the busy | ||
| 2512 | * extent is active for a lot longer than the transaction. Hence the | ||
| 2513 | * transaction structure can be freed and reallocated, then mark the same | ||
| 2514 | * extent busy again in the new transaction. In this case the new transaction | ||
| 2515 | * will have a different tid but can have the same address, and hence we need | ||
| 2516 | * to check against the tid. | ||
| 2517 | * | ||
| 2518 | * Future: for delayed logging, we could avoid the log force if the extent was | ||
| 2519 | * first freed in the current checkpoint sequence. This, however, requires the | ||
| 2520 | * ability to pin the current checkpoint in memory until this transaction | ||
| 2521 | * commits to ensure that both the original free and the current one combine | ||
| 2522 | * logically into the one checkpoint. If the checkpoint sequences are | ||
| 2523 | * different, however, we still need to wait on a log force. | ||
| 2524 | */ | ||
| 2525 | void | 2478 | void |
| 2526 | xfs_alloc_busy_insert( | 2479 | xfs_alloc_busy_insert( |
| 2527 | struct xfs_trans *tp, | 2480 | struct xfs_trans *tp, |
| @@ -2533,9 +2486,7 @@ xfs_alloc_busy_insert( | |||
| 2533 | struct xfs_busy_extent *busyp; | 2486 | struct xfs_busy_extent *busyp; |
| 2534 | struct xfs_perag *pag; | 2487 | struct xfs_perag *pag; |
| 2535 | struct rb_node **rbp; | 2488 | struct rb_node **rbp; |
| 2536 | struct rb_node *parent; | 2489 | struct rb_node *parent = NULL; |
| 2537 | int match; | ||
| 2538 | |||
| 2539 | 2490 | ||
| 2540 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); | 2491 | new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); |
| 2541 | if (!new) { | 2492 | if (!new) { |
| @@ -2544,7 +2495,7 @@ xfs_alloc_busy_insert( | |||
| 2544 | * block, make this a synchronous transaction to insure that | 2495 | * block, make this a synchronous transaction to insure that |
| 2545 | * the block is not reused before this transaction commits. | 2496 | * the block is not reused before this transaction commits. |
| 2546 | */ | 2497 | */ |
| 2547 | trace_xfs_alloc_busy(tp, agno, bno, len, 1); | 2498 | trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); |
| 2548 | xfs_trans_set_sync(tp); | 2499 | xfs_trans_set_sync(tp); |
| 2549 | return; | 2500 | return; |
| 2550 | } | 2501 | } |
| @@ -2552,66 +2503,28 @@ xfs_alloc_busy_insert( | |||
| 2552 | new->agno = agno; | 2503 | new->agno = agno; |
| 2553 | new->bno = bno; | 2504 | new->bno = bno; |
| 2554 | new->length = len; | 2505 | new->length = len; |
| 2555 | new->tid = xfs_log_get_trans_ident(tp); | ||
| 2556 | |||
| 2557 | INIT_LIST_HEAD(&new->list); | 2506 | INIT_LIST_HEAD(&new->list); |
| 2558 | 2507 | ||
| 2559 | /* trace before insert to be able to see failed inserts */ | 2508 | /* trace before insert to be able to see failed inserts */ |
| 2560 | trace_xfs_alloc_busy(tp, agno, bno, len, 0); | 2509 | trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); |
| 2561 | 2510 | ||
| 2562 | pag = xfs_perag_get(tp->t_mountp, new->agno); | 2511 | pag = xfs_perag_get(tp->t_mountp, new->agno); |
| 2563 | restart: | ||
| 2564 | spin_lock(&pag->pagb_lock); | 2512 | spin_lock(&pag->pagb_lock); |
| 2565 | rbp = &pag->pagb_tree.rb_node; | 2513 | rbp = &pag->pagb_tree.rb_node; |
| 2566 | parent = NULL; | 2514 | while (*rbp) { |
| 2567 | busyp = NULL; | ||
| 2568 | match = 0; | ||
| 2569 | while (*rbp && match >= 0) { | ||
| 2570 | parent = *rbp; | 2515 | parent = *rbp; |
| 2571 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); | 2516 | busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); |
| 2572 | 2517 | ||
| 2573 | if (new->bno < busyp->bno) { | 2518 | if (new->bno < busyp->bno) { |
| 2574 | /* may overlap, but exact start block is lower */ | ||
| 2575 | rbp = &(*rbp)->rb_left; | 2519 | rbp = &(*rbp)->rb_left; |
| 2576 | if (new->bno + new->length > busyp->bno) | 2520 | ASSERT(new->bno + new->length <= busyp->bno); |
| 2577 | match = busyp->tid == new->tid ? 1 : -1; | ||
| 2578 | } else if (new->bno > busyp->bno) { | 2521 | } else if (new->bno > busyp->bno) { |
| 2579 | /* may overlap, but exact start block is higher */ | ||
| 2580 | rbp = &(*rbp)->rb_right; | 2522 | rbp = &(*rbp)->rb_right; |
| 2581 | if (bno < busyp->bno + busyp->length) | 2523 | ASSERT(bno >= busyp->bno + busyp->length); |
| 2582 | match = busyp->tid == new->tid ? 1 : -1; | ||
| 2583 | } else { | 2524 | } else { |
| 2584 | match = busyp->tid == new->tid ? 1 : -1; | 2525 | ASSERT(0); |
| 2585 | break; | ||
| 2586 | } | 2526 | } |
| 2587 | } | 2527 | } |
| 2588 | if (match < 0) { | ||
| 2589 | /* overlap marked busy in different transaction */ | ||
| 2590 | spin_unlock(&pag->pagb_lock); | ||
| 2591 | xfs_log_force(tp->t_mountp, XFS_LOG_SYNC); | ||
| 2592 | goto restart; | ||
| 2593 | } | ||
| 2594 | if (match > 0) { | ||
| 2595 | /* | ||
| 2596 | * overlap marked busy in same transaction. Update if exact | ||
| 2597 | * start block match, otherwise combine the busy extents into | ||
| 2598 | * a single range. | ||
| 2599 | */ | ||
| 2600 | if (busyp->bno == new->bno) { | ||
| 2601 | busyp->length = max(busyp->length, new->length); | ||
| 2602 | spin_unlock(&pag->pagb_lock); | ||
| 2603 | ASSERT(tp->t_flags & XFS_TRANS_SYNC); | ||
| 2604 | xfs_perag_put(pag); | ||
| 2605 | kmem_free(new); | ||
| 2606 | return; | ||
| 2607 | } | ||
| 2608 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 2609 | new->length = max(busyp->bno + busyp->length, | ||
| 2610 | new->bno + new->length) - | ||
| 2611 | min(busyp->bno, new->bno); | ||
| 2612 | new->bno = min(busyp->bno, new->bno); | ||
| 2613 | } else | ||
| 2614 | busyp = NULL; | ||
| 2615 | 2528 | ||
| 2616 | rb_link_node(&new->rb_node, parent, rbp); | 2529 | rb_link_node(&new->rb_node, parent, rbp); |
| 2617 | rb_insert_color(&new->rb_node, &pag->pagb_tree); | 2530 | rb_insert_color(&new->rb_node, &pag->pagb_tree); |
| @@ -2619,7 +2532,6 @@ restart: | |||
| 2619 | list_add(&new->list, &tp->t_busy); | 2532 | list_add(&new->list, &tp->t_busy); |
| 2620 | spin_unlock(&pag->pagb_lock); | 2533 | spin_unlock(&pag->pagb_lock); |
| 2621 | xfs_perag_put(pag); | 2534 | xfs_perag_put(pag); |
| 2622 | kmem_free(busyp); | ||
| 2623 | } | 2535 | } |
| 2624 | 2536 | ||
| 2625 | /* | 2537 | /* |
| @@ -2668,31 +2580,443 @@ xfs_alloc_busy_search( | |||
| 2668 | } | 2580 | } |
| 2669 | } | 2581 | } |
| 2670 | spin_unlock(&pag->pagb_lock); | 2582 | spin_unlock(&pag->pagb_lock); |
| 2671 | trace_xfs_alloc_busysearch(mp, agno, bno, len, !!match); | ||
| 2672 | xfs_perag_put(pag); | 2583 | xfs_perag_put(pag); |
| 2673 | return match; | 2584 | return match; |
| 2674 | } | 2585 | } |
| 2675 | 2586 | ||
| 2587 | /* | ||
| 2588 | * The found free extent [fbno, fend] overlaps part or all of the given busy | ||
| 2589 | * extent. If the overlap covers the beginning, the end, or all of the busy | ||
| 2590 | * extent, the overlapping portion can be made unbusy and used for the | ||
| 2591 | * allocation. We can't split a busy extent because we can't modify a | ||
| 2592 | * transaction/CIL context busy list, but we can update an entries block | ||
| 2593 | * number or length. | ||
| 2594 | * | ||
| 2595 | * Returns true if the extent can safely be reused, or false if the search | ||
| 2596 | * needs to be restarted. | ||
| 2597 | */ | ||
| 2598 | STATIC bool | ||
| 2599 | xfs_alloc_busy_update_extent( | ||
| 2600 | struct xfs_mount *mp, | ||
| 2601 | struct xfs_perag *pag, | ||
| 2602 | struct xfs_busy_extent *busyp, | ||
| 2603 | xfs_agblock_t fbno, | ||
| 2604 | xfs_extlen_t flen, | ||
| 2605 | bool userdata) | ||
| 2606 | { | ||
| 2607 | xfs_agblock_t fend = fbno + flen; | ||
| 2608 | xfs_agblock_t bbno = busyp->bno; | ||
| 2609 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2610 | |||
| 2611 | /* | ||
| 2612 | * If there is a busy extent overlapping a user allocation, we have | ||
| 2613 | * no choice but to force the log and retry the search. | ||
| 2614 | * | ||
| 2615 | * Fortunately this does not happen during normal operation, but | ||
| 2616 | * only if the filesystem is very low on space and has to dip into | ||
| 2617 | * the AGFL for normal allocations. | ||
| 2618 | */ | ||
| 2619 | if (userdata) | ||
| 2620 | goto out_force_log; | ||
| 2621 | |||
| 2622 | if (bbno < fbno && bend > fend) { | ||
| 2623 | /* | ||
| 2624 | * Case 1: | ||
| 2625 | * bbno bend | ||
| 2626 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2627 | * +---------+ | ||
| 2628 | * fbno fend | ||
| 2629 | */ | ||
| 2630 | |||
| 2631 | /* | ||
| 2632 | * We would have to split the busy extent to be able to track | ||
| 2633 | * it correct, which we cannot do because we would have to | ||
| 2634 | * modify the list of busy extents attached to the transaction | ||
| 2635 | * or CIL context, which is immutable. | ||
| 2636 | * | ||
| 2637 | * Force out the log to clear the busy extent and retry the | ||
| 2638 | * search. | ||
| 2639 | */ | ||
| 2640 | goto out_force_log; | ||
| 2641 | } else if (bbno >= fbno && bend <= fend) { | ||
| 2642 | /* | ||
| 2643 | * Case 2: | ||
| 2644 | * bbno bend | ||
| 2645 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2646 | * +-----------------+ | ||
| 2647 | * fbno fend | ||
| 2648 | * | ||
| 2649 | * Case 3: | ||
| 2650 | * bbno bend | ||
| 2651 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2652 | * +--------------------------+ | ||
| 2653 | * fbno fend | ||
| 2654 | * | ||
| 2655 | * Case 4: | ||
| 2656 | * bbno bend | ||
| 2657 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2658 | * +--------------------------+ | ||
| 2659 | * fbno fend | ||
| 2660 | * | ||
| 2661 | * Case 5: | ||
| 2662 | * bbno bend | ||
| 2663 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2664 | * +-----------------------------------+ | ||
| 2665 | * fbno fend | ||
| 2666 | * | ||
| 2667 | */ | ||
| 2668 | |||
| 2669 | /* | ||
| 2670 | * The busy extent is fully covered by the extent we are | ||
| 2671 | * allocating, and can simply be removed from the rbtree. | ||
| 2672 | * However we cannot remove it from the immutable list | ||
| 2673 | * tracking busy extents in the transaction or CIL context, | ||
| 2674 | * so set the length to zero to mark it invalid. | ||
| 2675 | * | ||
| 2676 | * We also need to restart the busy extent search from the | ||
| 2677 | * tree root, because erasing the node can rearrange the | ||
| 2678 | * tree topology. | ||
| 2679 | */ | ||
| 2680 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 2681 | busyp->length = 0; | ||
| 2682 | return false; | ||
| 2683 | } else if (fend < bend) { | ||
| 2684 | /* | ||
| 2685 | * Case 6: | ||
| 2686 | * bbno bend | ||
| 2687 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2688 | * +---------+ | ||
| 2689 | * fbno fend | ||
| 2690 | * | ||
| 2691 | * Case 7: | ||
| 2692 | * bbno bend | ||
| 2693 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2694 | * +------------------+ | ||
| 2695 | * fbno fend | ||
| 2696 | * | ||
| 2697 | */ | ||
| 2698 | busyp->bno = fend; | ||
| 2699 | } else if (bbno < fbno) { | ||
| 2700 | /* | ||
| 2701 | * Case 8: | ||
| 2702 | * bbno bend | ||
| 2703 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2704 | * +-------------+ | ||
| 2705 | * fbno fend | ||
| 2706 | * | ||
| 2707 | * Case 9: | ||
| 2708 | * bbno bend | ||
| 2709 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2710 | * +----------------------+ | ||
| 2711 | * fbno fend | ||
| 2712 | */ | ||
| 2713 | busyp->length = fbno - busyp->bno; | ||
| 2714 | } else { | ||
| 2715 | ASSERT(0); | ||
| 2716 | } | ||
| 2717 | |||
| 2718 | trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); | ||
| 2719 | return true; | ||
| 2720 | |||
| 2721 | out_force_log: | ||
| 2722 | spin_unlock(&pag->pagb_lock); | ||
| 2723 | xfs_log_force(mp, XFS_LOG_SYNC); | ||
| 2724 | trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); | ||
| 2725 | spin_lock(&pag->pagb_lock); | ||
| 2726 | return false; | ||
| 2727 | } | ||
| 2728 | |||
| 2729 | |||
| 2730 | /* | ||
| 2731 | * For a given extent [fbno, flen], make sure we can reuse it safely. | ||
| 2732 | */ | ||
| 2676 | void | 2733 | void |
| 2677 | xfs_alloc_busy_clear( | 2734 | xfs_alloc_busy_reuse( |
| 2678 | struct xfs_mount *mp, | 2735 | struct xfs_mount *mp, |
| 2679 | struct xfs_busy_extent *busyp) | 2736 | xfs_agnumber_t agno, |
| 2737 | xfs_agblock_t fbno, | ||
| 2738 | xfs_extlen_t flen, | ||
| 2739 | bool userdata) | ||
| 2680 | { | 2740 | { |
| 2681 | struct xfs_perag *pag; | 2741 | struct xfs_perag *pag; |
| 2742 | struct rb_node *rbp; | ||
| 2682 | 2743 | ||
| 2683 | trace_xfs_alloc_unbusy(mp, busyp->agno, busyp->bno, | 2744 | ASSERT(flen > 0); |
| 2684 | busyp->length); | ||
| 2685 | 2745 | ||
| 2686 | ASSERT(xfs_alloc_busy_search(mp, busyp->agno, busyp->bno, | 2746 | pag = xfs_perag_get(mp, agno); |
| 2687 | busyp->length) == 1); | 2747 | spin_lock(&pag->pagb_lock); |
| 2748 | restart: | ||
| 2749 | rbp = pag->pagb_tree.rb_node; | ||
| 2750 | while (rbp) { | ||
| 2751 | struct xfs_busy_extent *busyp = | ||
| 2752 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
| 2753 | xfs_agblock_t bbno = busyp->bno; | ||
| 2754 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2688 | 2755 | ||
| 2689 | list_del_init(&busyp->list); | 2756 | if (fbno + flen <= bbno) { |
| 2757 | rbp = rbp->rb_left; | ||
| 2758 | continue; | ||
| 2759 | } else if (fbno >= bend) { | ||
| 2760 | rbp = rbp->rb_right; | ||
| 2761 | continue; | ||
| 2762 | } | ||
| 2690 | 2763 | ||
| 2691 | pag = xfs_perag_get(mp, busyp->agno); | 2764 | if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, |
| 2692 | spin_lock(&pag->pagb_lock); | 2765 | userdata)) |
| 2693 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | 2766 | goto restart; |
| 2767 | } | ||
| 2694 | spin_unlock(&pag->pagb_lock); | 2768 | spin_unlock(&pag->pagb_lock); |
| 2695 | xfs_perag_put(pag); | 2769 | xfs_perag_put(pag); |
| 2770 | } | ||
| 2771 | |||
| 2772 | /* | ||
| 2773 | * For a given extent [fbno, flen], search the busy extent list to find a | ||
| 2774 | * subset of the extent that is not busy. If *rlen is smaller than | ||
| 2775 | * args->minlen no suitable extent could be found, and the higher level | ||
| 2776 | * code needs to force out the log and retry the allocation. | ||
| 2777 | */ | ||
| 2778 | STATIC void | ||
| 2779 | xfs_alloc_busy_trim( | ||
| 2780 | struct xfs_alloc_arg *args, | ||
| 2781 | xfs_agblock_t bno, | ||
| 2782 | xfs_extlen_t len, | ||
| 2783 | xfs_agblock_t *rbno, | ||
| 2784 | xfs_extlen_t *rlen) | ||
| 2785 | { | ||
| 2786 | xfs_agblock_t fbno; | ||
| 2787 | xfs_extlen_t flen; | ||
| 2788 | struct rb_node *rbp; | ||
| 2789 | |||
| 2790 | ASSERT(len > 0); | ||
| 2696 | 2791 | ||
| 2792 | spin_lock(&args->pag->pagb_lock); | ||
| 2793 | restart: | ||
| 2794 | fbno = bno; | ||
| 2795 | flen = len; | ||
| 2796 | rbp = args->pag->pagb_tree.rb_node; | ||
| 2797 | while (rbp && flen >= args->minlen) { | ||
| 2798 | struct xfs_busy_extent *busyp = | ||
| 2799 | rb_entry(rbp, struct xfs_busy_extent, rb_node); | ||
| 2800 | xfs_agblock_t fend = fbno + flen; | ||
| 2801 | xfs_agblock_t bbno = busyp->bno; | ||
| 2802 | xfs_agblock_t bend = bbno + busyp->length; | ||
| 2803 | |||
| 2804 | if (fend <= bbno) { | ||
| 2805 | rbp = rbp->rb_left; | ||
| 2806 | continue; | ||
| 2807 | } else if (fbno >= bend) { | ||
| 2808 | rbp = rbp->rb_right; | ||
| 2809 | continue; | ||
| 2810 | } | ||
| 2811 | |||
| 2812 | /* | ||
| 2813 | * If this is a metadata allocation, try to reuse the busy | ||
| 2814 | * extent instead of trimming the allocation. | ||
| 2815 | */ | ||
| 2816 | if (!args->userdata) { | ||
| 2817 | if (!xfs_alloc_busy_update_extent(args->mp, args->pag, | ||
| 2818 | busyp, fbno, flen, | ||
| 2819 | false)) | ||
| 2820 | goto restart; | ||
| 2821 | continue; | ||
| 2822 | } | ||
| 2823 | |||
| 2824 | if (bbno <= fbno) { | ||
| 2825 | /* start overlap */ | ||
| 2826 | |||
| 2827 | /* | ||
| 2828 | * Case 1: | ||
| 2829 | * bbno bend | ||
| 2830 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2831 | * +---------+ | ||
| 2832 | * fbno fend | ||
| 2833 | * | ||
| 2834 | * Case 2: | ||
| 2835 | * bbno bend | ||
| 2836 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2837 | * +-------------+ | ||
| 2838 | * fbno fend | ||
| 2839 | * | ||
| 2840 | * Case 3: | ||
| 2841 | * bbno bend | ||
| 2842 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2843 | * +-------------+ | ||
| 2844 | * fbno fend | ||
| 2845 | * | ||
| 2846 | * Case 4: | ||
| 2847 | * bbno bend | ||
| 2848 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2849 | * +-----------------+ | ||
| 2850 | * fbno fend | ||
| 2851 | * | ||
| 2852 | * No unbusy region in extent, return failure. | ||
| 2853 | */ | ||
| 2854 | if (fend <= bend) | ||
| 2855 | goto fail; | ||
| 2856 | |||
| 2857 | /* | ||
| 2858 | * Case 5: | ||
| 2859 | * bbno bend | ||
| 2860 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2861 | * +----------------------+ | ||
| 2862 | * fbno fend | ||
| 2863 | * | ||
| 2864 | * Case 6: | ||
| 2865 | * bbno bend | ||
| 2866 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2867 | * +--------------------------+ | ||
| 2868 | * fbno fend | ||
| 2869 | * | ||
| 2870 | * Needs to be trimmed to: | ||
| 2871 | * +-------+ | ||
| 2872 | * fbno fend | ||
| 2873 | */ | ||
| 2874 | fbno = bend; | ||
| 2875 | } else if (bend >= fend) { | ||
| 2876 | /* end overlap */ | ||
| 2877 | |||
| 2878 | /* | ||
| 2879 | * Case 7: | ||
| 2880 | * bbno bend | ||
| 2881 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2882 | * +------------------+ | ||
| 2883 | * fbno fend | ||
| 2884 | * | ||
| 2885 | * Case 8: | ||
| 2886 | * bbno bend | ||
| 2887 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2888 | * +--------------------------+ | ||
| 2889 | * fbno fend | ||
| 2890 | * | ||
| 2891 | * Needs to be trimmed to: | ||
| 2892 | * +-------+ | ||
| 2893 | * fbno fend | ||
| 2894 | */ | ||
| 2895 | fend = bbno; | ||
| 2896 | } else { | ||
| 2897 | /* middle overlap */ | ||
| 2898 | |||
| 2899 | /* | ||
| 2900 | * Case 9: | ||
| 2901 | * bbno bend | ||
| 2902 | * +BBBBBBBBBBBBBBBBB+ | ||
| 2903 | * +-----------------------------------+ | ||
| 2904 | * fbno fend | ||
| 2905 | * | ||
| 2906 | * Can be trimmed to: | ||
| 2907 | * +-------+ OR +-------+ | ||
| 2908 | * fbno fend fbno fend | ||
| 2909 | * | ||
| 2910 | * Backward allocation leads to significant | ||
| 2911 | * fragmentation of directories, which degrades | ||
| 2912 | * directory performance, therefore we always want to | ||
| 2913 | * choose the option that produces forward allocation | ||
| 2914 | * patterns. | ||
| 2915 | * Preferring the lower bno extent will make the next | ||
| 2916 | * request use "fend" as the start of the next | ||
| 2917 | * allocation; if the segment is no longer busy at | ||
| 2918 | * that point, we'll get a contiguous allocation, but | ||
| 2919 | * even if it is still busy, we will get a forward | ||
| 2920 | * allocation. | ||
| 2921 | * We try to avoid choosing the segment at "bend", | ||
| 2922 | * because that can lead to the next allocation | ||
| 2923 | * taking the segment at "fbno", which would be a | ||
| 2924 | * backward allocation. We only use the segment at | ||
| 2925 | * "fbno" if it is much larger than the current | ||
| 2926 | * requested size, because in that case there's a | ||
| 2927 | * good chance subsequent allocations will be | ||
| 2928 | * contiguous. | ||
| 2929 | */ | ||
| 2930 | if (bbno - fbno >= args->maxlen) { | ||
| 2931 | /* left candidate fits perfect */ | ||
| 2932 | fend = bbno; | ||
| 2933 | } else if (fend - bend >= args->maxlen * 4) { | ||
| 2934 | /* right candidate has enough free space */ | ||
| 2935 | fbno = bend; | ||
| 2936 | } else if (bbno - fbno >= args->minlen) { | ||
| 2937 | /* left candidate fits minimum requirement */ | ||
| 2938 | fend = bbno; | ||
| 2939 | } else { | ||
| 2940 | goto fail; | ||
| 2941 | } | ||
| 2942 | } | ||
| 2943 | |||
| 2944 | flen = fend - fbno; | ||
| 2945 | } | ||
| 2946 | spin_unlock(&args->pag->pagb_lock); | ||
| 2947 | |||
| 2948 | if (fbno != bno || flen != len) { | ||
| 2949 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, | ||
| 2950 | fbno, flen); | ||
| 2951 | } | ||
| 2952 | *rbno = fbno; | ||
| 2953 | *rlen = flen; | ||
| 2954 | return; | ||
| 2955 | fail: | ||
| 2956 | /* | ||
| 2957 | * Return a zero extent length as failure indications. All callers | ||
| 2958 | * re-check if the trimmed extent satisfies the minlen requirement. | ||
| 2959 | */ | ||
| 2960 | spin_unlock(&args->pag->pagb_lock); | ||
| 2961 | trace_xfs_alloc_busy_trim(args->mp, args->agno, bno, len, fbno, 0); | ||
| 2962 | *rbno = fbno; | ||
| 2963 | *rlen = 0; | ||
| 2964 | } | ||
| 2965 | |||
| 2966 | static void | ||
| 2967 | xfs_alloc_busy_clear_one( | ||
| 2968 | struct xfs_mount *mp, | ||
| 2969 | struct xfs_perag *pag, | ||
| 2970 | struct xfs_busy_extent *busyp) | ||
| 2971 | { | ||
| 2972 | if (busyp->length) { | ||
| 2973 | trace_xfs_alloc_busy_clear(mp, busyp->agno, busyp->bno, | ||
| 2974 | busyp->length); | ||
| 2975 | rb_erase(&busyp->rb_node, &pag->pagb_tree); | ||
| 2976 | } | ||
| 2977 | |||
| 2978 | list_del_init(&busyp->list); | ||
| 2697 | kmem_free(busyp); | 2979 | kmem_free(busyp); |
| 2698 | } | 2980 | } |
| 2981 | |||
| 2982 | void | ||
| 2983 | xfs_alloc_busy_clear( | ||
| 2984 | struct xfs_mount *mp, | ||
| 2985 | struct list_head *list) | ||
| 2986 | { | ||
| 2987 | struct xfs_busy_extent *busyp, *n; | ||
| 2988 | struct xfs_perag *pag = NULL; | ||
| 2989 | xfs_agnumber_t agno = NULLAGNUMBER; | ||
| 2990 | |||
| 2991 | list_for_each_entry_safe(busyp, n, list, list) { | ||
| 2992 | if (busyp->agno != agno) { | ||
| 2993 | if (pag) { | ||
| 2994 | spin_unlock(&pag->pagb_lock); | ||
| 2995 | xfs_perag_put(pag); | ||
| 2996 | } | ||
| 2997 | pag = xfs_perag_get(mp, busyp->agno); | ||
| 2998 | spin_lock(&pag->pagb_lock); | ||
| 2999 | agno = busyp->agno; | ||
| 3000 | } | ||
| 3001 | |||
| 3002 | xfs_alloc_busy_clear_one(mp, pag, busyp); | ||
| 3003 | } | ||
| 3004 | |||
| 3005 | if (pag) { | ||
| 3006 | spin_unlock(&pag->pagb_lock); | ||
| 3007 | xfs_perag_put(pag); | ||
| 3008 | } | ||
| 3009 | } | ||
| 3010 | |||
| 3011 | /* | ||
| 3012 | * Callback for list_sort to sort busy extents by the AG they reside in. | ||
| 3013 | */ | ||
| 3014 | int | ||
| 3015 | xfs_busy_extent_ag_cmp( | ||
| 3016 | void *priv, | ||
| 3017 | struct list_head *a, | ||
| 3018 | struct list_head *b) | ||
| 3019 | { | ||
| 3020 | return container_of(a, struct xfs_busy_extent, list)->agno - | ||
| 3021 | container_of(b, struct xfs_busy_extent, list)->agno; | ||
| 3022 | } | ||
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index d0b3bc72005b..240ad288f2f9 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h | |||
| @@ -140,11 +140,24 @@ xfs_alloc_busy_insert(struct xfs_trans *tp, xfs_agnumber_t agno, | |||
| 140 | xfs_agblock_t bno, xfs_extlen_t len); | 140 | xfs_agblock_t bno, xfs_extlen_t len); |
| 141 | 141 | ||
| 142 | void | 142 | void |
| 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct xfs_busy_extent *busyp); | 143 | xfs_alloc_busy_clear(struct xfs_mount *mp, struct list_head *list); |
| 144 | 144 | ||
| 145 | int | 145 | int |
| 146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, | 146 | xfs_alloc_busy_search(struct xfs_mount *mp, xfs_agnumber_t agno, |
| 147 | xfs_agblock_t bno, xfs_extlen_t len); | 147 | xfs_agblock_t bno, xfs_extlen_t len); |
| 148 | |||
| 149 | void | ||
| 150 | xfs_alloc_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno, | ||
| 151 | xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata); | ||
| 152 | |||
| 153 | int | ||
| 154 | xfs_busy_extent_ag_cmp(void *priv, struct list_head *a, struct list_head *b); | ||
| 155 | |||
| 156 | static inline void xfs_alloc_busy_sort(struct list_head *list) | ||
| 157 | { | ||
| 158 | list_sort(NULL, list, xfs_busy_extent_ag_cmp); | ||
| 159 | } | ||
| 160 | |||
| 148 | #endif /* __KERNEL__ */ | 161 | #endif /* __KERNEL__ */ |
| 149 | 162 | ||
| 150 | /* | 163 | /* |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 3916925e2584..8b469d53599f 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
| @@ -95,6 +95,8 @@ xfs_allocbt_alloc_block( | |||
| 95 | return 0; | 95 | return 0; |
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | xfs_alloc_busy_reuse(cur->bc_mp, cur->bc_private.a.agno, bno, 1, false); | ||
| 99 | |||
| 98 | xfs_trans_agbtree_delta(cur->bc_tp, 1); | 100 | xfs_trans_agbtree_delta(cur->bc_tp, 1); |
| 99 | new->s = cpu_to_be32(bno); | 101 | new->s = cpu_to_be32(bno); |
| 100 | 102 | ||
| @@ -118,17 +120,6 @@ xfs_allocbt_free_block( | |||
| 118 | if (error) | 120 | if (error) |
| 119 | return error; | 121 | return error; |
| 120 | 122 | ||
| 121 | /* | ||
| 122 | * Since blocks move to the free list without the coordination used in | ||
| 123 | * xfs_bmap_finish, we can't allow block to be available for | ||
| 124 | * reallocation and non-transaction writing (user data) until we know | ||
| 125 | * that the transaction that moved it to the free list is permanently | ||
| 126 | * on disk. We track the blocks by declaring these blocks as "busy"; | ||
| 127 | * the busy list is maintained on a per-ag basis and each transaction | ||
| 128 | * records which entries should be removed when the iclog commits to | ||
| 129 | * disk. If a busy block is allocated, the iclog is pushed up to the | ||
| 130 | * LSN that freed the block. | ||
| 131 | */ | ||
| 132 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); | 123 | xfs_alloc_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1); |
| 133 | xfs_trans_agbtree_delta(cur->bc_tp, -1); | 124 | xfs_trans_agbtree_delta(cur->bc_tp, -1); |
| 134 | return 0; | 125 | return 0; |
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c index be628677c288..9a84a85c03b1 100644 --- a/fs/xfs/xfs_dfrag.c +++ b/fs/xfs/xfs_dfrag.c | |||
| @@ -202,7 +202,7 @@ xfs_swap_extents( | |||
| 202 | xfs_inode_t *tip, /* tmp inode */ | 202 | xfs_inode_t *tip, /* tmp inode */ |
| 203 | xfs_swapext_t *sxp) | 203 | xfs_swapext_t *sxp) |
| 204 | { | 204 | { |
| 205 | xfs_mount_t *mp; | 205 | xfs_mount_t *mp = ip->i_mount; |
| 206 | xfs_trans_t *tp; | 206 | xfs_trans_t *tp; |
| 207 | xfs_bstat_t *sbp = &sxp->sx_stat; | 207 | xfs_bstat_t *sbp = &sxp->sx_stat; |
| 208 | xfs_ifork_t *tempifp, *ifp, *tifp; | 208 | xfs_ifork_t *tempifp, *ifp, *tifp; |
| @@ -212,16 +212,12 @@ xfs_swap_extents( | |||
| 212 | int taforkblks = 0; | 212 | int taforkblks = 0; |
| 213 | __uint64_t tmp; | 213 | __uint64_t tmp; |
| 214 | 214 | ||
| 215 | mp = ip->i_mount; | ||
| 216 | |||
| 217 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); | 215 | tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL); |
| 218 | if (!tempifp) { | 216 | if (!tempifp) { |
| 219 | error = XFS_ERROR(ENOMEM); | 217 | error = XFS_ERROR(ENOMEM); |
| 220 | goto out; | 218 | goto out; |
| 221 | } | 219 | } |
| 222 | 220 | ||
| 223 | sbp = &sxp->sx_stat; | ||
| 224 | |||
| 225 | /* | 221 | /* |
| 226 | * we have to do two separate lock calls here to keep lockdep | 222 | * we have to do two separate lock calls here to keep lockdep |
| 227 | * happy. If we try to get all the locks in one call, lock will | 223 | * happy. If we try to get all the locks in one call, lock will |
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d11ce613d692..c8e3349c287c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
| @@ -1354,7 +1354,7 @@ xfs_itruncate_start( | |||
| 1354 | return 0; | 1354 | return 0; |
| 1355 | } | 1355 | } |
| 1356 | last_byte = xfs_file_last_byte(ip); | 1356 | last_byte = xfs_file_last_byte(ip); |
| 1357 | trace_xfs_itruncate_start(ip, flags, new_size, toss_start, last_byte); | 1357 | trace_xfs_itruncate_start(ip, new_size, flags, toss_start, last_byte); |
| 1358 | if (last_byte > toss_start) { | 1358 | if (last_byte > toss_start) { |
| 1359 | if (flags & XFS_ITRUNC_DEFINITE) { | 1359 | if (flags & XFS_ITRUNC_DEFINITE) { |
| 1360 | xfs_tosspages(ip, toss_start, | 1360 | xfs_tosspages(ip, toss_start, |
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 576fdfe81d60..09983a3344a5 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c | |||
| @@ -970,7 +970,6 @@ xfs_iflush_abort( | |||
| 970 | { | 970 | { |
| 971 | xfs_inode_log_item_t *iip = ip->i_itemp; | 971 | xfs_inode_log_item_t *iip = ip->i_itemp; |
| 972 | 972 | ||
| 973 | iip = ip->i_itemp; | ||
| 974 | if (iip) { | 973 | if (iip) { |
| 975 | struct xfs_ail *ailp = iip->ili_item.li_ailp; | 974 | struct xfs_ail *ailp = iip->ili_item.li_ailp; |
| 976 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { | 975 | if (iip->ili_item.li_flags & XFS_LI_IN_AIL) { |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index b612ce4520ae..211930246f20 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -1449,6 +1449,13 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1449 | 1449 | ||
| 1450 | xlog_cil_destroy(log); | 1450 | xlog_cil_destroy(log); |
| 1451 | 1451 | ||
| 1452 | /* | ||
| 1453 | * always need to ensure that the extra buffer does not point to memory | ||
| 1454 | * owned by another log buffer before we free it. | ||
| 1455 | */ | ||
| 1456 | xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); | ||
| 1457 | xfs_buf_free(log->l_xbuf); | ||
| 1458 | |||
| 1452 | iclog = log->l_iclog; | 1459 | iclog = log->l_iclog; |
| 1453 | for (i=0; i<log->l_iclog_bufs; i++) { | 1460 | for (i=0; i<log->l_iclog_bufs; i++) { |
| 1454 | xfs_buf_free(iclog->ic_bp); | 1461 | xfs_buf_free(iclog->ic_bp); |
| @@ -1458,7 +1465,6 @@ xlog_dealloc_log(xlog_t *log) | |||
| 1458 | } | 1465 | } |
| 1459 | spinlock_destroy(&log->l_icloglock); | 1466 | spinlock_destroy(&log->l_icloglock); |
| 1460 | 1467 | ||
| 1461 | xfs_buf_free(log->l_xbuf); | ||
| 1462 | log->l_mp->m_log = NULL; | 1468 | log->l_mp->m_log = NULL; |
| 1463 | kmem_free(log); | 1469 | kmem_free(log); |
| 1464 | } /* xlog_dealloc_log */ | 1470 | } /* xlog_dealloc_log */ |
| @@ -3248,13 +3254,6 @@ xfs_log_ticket_get( | |||
| 3248 | return ticket; | 3254 | return ticket; |
| 3249 | } | 3255 | } |
| 3250 | 3256 | ||
| 3251 | xlog_tid_t | ||
| 3252 | xfs_log_get_trans_ident( | ||
| 3253 | struct xfs_trans *tp) | ||
| 3254 | { | ||
| 3255 | return tp->t_ticket->t_tid; | ||
| 3256 | } | ||
| 3257 | |||
| 3258 | /* | 3257 | /* |
| 3259 | * Allocate and initialise a new log ticket. | 3258 | * Allocate and initialise a new log ticket. |
| 3260 | */ | 3259 | */ |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 3bd3291ef8d2..78c9039994af 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
| @@ -189,8 +189,6 @@ void xlog_iodone(struct xfs_buf *); | |||
| 189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 189 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
| 190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 190 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
| 191 | 191 | ||
| 192 | xlog_tid_t xfs_log_get_trans_ident(struct xfs_trans *tp); | ||
| 193 | |||
| 194 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 192 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
| 195 | struct xfs_log_vec *log_vector, | 193 | struct xfs_log_vec *log_vector, |
| 196 | xfs_lsn_t *commit_lsn, int flags); | 194 | xfs_lsn_t *commit_lsn, int flags); |
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 9ca59be08977..7d56e88a3f0e 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
| @@ -361,13 +361,12 @@ xlog_cil_committed( | |||
| 361 | int abort) | 361 | int abort) |
| 362 | { | 362 | { |
| 363 | struct xfs_cil_ctx *ctx = args; | 363 | struct xfs_cil_ctx *ctx = args; |
| 364 | struct xfs_busy_extent *busyp, *n; | ||
| 365 | 364 | ||
| 366 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, | 365 | xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain, |
| 367 | ctx->start_lsn, abort); | 366 | ctx->start_lsn, abort); |
| 368 | 367 | ||
| 369 | list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list) | 368 | xfs_alloc_busy_sort(&ctx->busy_extents); |
| 370 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp); | 369 | xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, &ctx->busy_extents); |
| 371 | 370 | ||
| 372 | spin_lock(&ctx->cil->xc_cil_lock); | 371 | spin_lock(&ctx->cil->xc_cil_lock); |
| 373 | list_del(&ctx->committing); | 372 | list_del(&ctx->committing); |
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 5864850e9e34..2d3b6a498d63 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h | |||
| @@ -146,6 +146,8 @@ static inline uint xlog_get_client_id(__be32 i) | |||
| 146 | shutdown */ | 146 | shutdown */ |
| 147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ | 147 | #define XLOG_TAIL_WARN 0x10 /* log tail verify warning issued */ |
| 148 | 148 | ||
| 149 | typedef __uint32_t xlog_tid_t; | ||
| 150 | |||
| 149 | #ifdef __KERNEL__ | 151 | #ifdef __KERNEL__ |
| 150 | /* | 152 | /* |
| 151 | * Below are states for covering allocation transactions. | 153 | * Below are states for covering allocation transactions. |
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 5cc464a17c93..04142caedb2b 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c | |||
| @@ -205,6 +205,35 @@ xlog_bread( | |||
| 205 | } | 205 | } |
| 206 | 206 | ||
| 207 | /* | 207 | /* |
| 208 | * Read at an offset into the buffer. Returns with the buffer in it's original | ||
| 209 | * state regardless of the result of the read. | ||
| 210 | */ | ||
| 211 | STATIC int | ||
| 212 | xlog_bread_offset( | ||
| 213 | xlog_t *log, | ||
| 214 | xfs_daddr_t blk_no, /* block to read from */ | ||
| 215 | int nbblks, /* blocks to read */ | ||
| 216 | xfs_buf_t *bp, | ||
| 217 | xfs_caddr_t offset) | ||
| 218 | { | ||
| 219 | xfs_caddr_t orig_offset = XFS_BUF_PTR(bp); | ||
| 220 | int orig_len = bp->b_buffer_length; | ||
| 221 | int error, error2; | ||
| 222 | |||
| 223 | error = XFS_BUF_SET_PTR(bp, offset, BBTOB(nbblks)); | ||
| 224 | if (error) | ||
| 225 | return error; | ||
| 226 | |||
| 227 | error = xlog_bread_noalign(log, blk_no, nbblks, bp); | ||
| 228 | |||
| 229 | /* must reset buffer pointer even on error */ | ||
| 230 | error2 = XFS_BUF_SET_PTR(bp, orig_offset, orig_len); | ||
| 231 | if (error) | ||
| 232 | return error; | ||
| 233 | return error2; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 208 | * Write out the buffer at the given block for the given number of blocks. | 237 | * Write out the buffer at the given block for the given number of blocks. |
| 209 | * The buffer is kept locked across the write and is returned locked. | 238 | * The buffer is kept locked across the write and is returned locked. |
| 210 | * This can only be used for synchronous log writes. | 239 | * This can only be used for synchronous log writes. |
| @@ -1229,20 +1258,12 @@ xlog_write_log_records( | |||
| 1229 | */ | 1258 | */ |
| 1230 | ealign = round_down(end_block, sectbb); | 1259 | ealign = round_down(end_block, sectbb); |
| 1231 | if (j == 0 && (start_block + endcount > ealign)) { | 1260 | if (j == 0 && (start_block + endcount > ealign)) { |
| 1232 | offset = XFS_BUF_PTR(bp); | 1261 | offset = XFS_BUF_PTR(bp) + BBTOB(ealign - start_block); |
| 1233 | balign = BBTOB(ealign - start_block); | 1262 | error = xlog_bread_offset(log, ealign, sectbb, |
| 1234 | error = XFS_BUF_SET_PTR(bp, offset + balign, | 1263 | bp, offset); |
| 1235 | BBTOB(sectbb)); | ||
| 1236 | if (error) | 1264 | if (error) |
| 1237 | break; | 1265 | break; |
| 1238 | 1266 | ||
| 1239 | error = xlog_bread_noalign(log, ealign, sectbb, bp); | ||
| 1240 | if (error) | ||
| 1241 | break; | ||
| 1242 | |||
| 1243 | error = XFS_BUF_SET_PTR(bp, offset, bufblks); | ||
| 1244 | if (error) | ||
| 1245 | break; | ||
| 1246 | } | 1267 | } |
| 1247 | 1268 | ||
| 1248 | offset = xlog_align(log, start_block, endcount, bp); | 1269 | offset = xlog_align(log, start_block, endcount, bp); |
| @@ -3448,19 +3469,9 @@ xlog_do_recovery_pass( | |||
| 3448 | * - order is important. | 3469 | * - order is important. |
| 3449 | */ | 3470 | */ |
| 3450 | wrapped_hblks = hblks - split_hblks; | 3471 | wrapped_hblks = hblks - split_hblks; |
| 3451 | error = XFS_BUF_SET_PTR(hbp, | 3472 | error = xlog_bread_offset(log, 0, |
| 3452 | offset + BBTOB(split_hblks), | 3473 | wrapped_hblks, hbp, |
| 3453 | BBTOB(hblks - split_hblks)); | 3474 | offset + BBTOB(split_hblks)); |
| 3454 | if (error) | ||
| 3455 | goto bread_err2; | ||
| 3456 | |||
| 3457 | error = xlog_bread_noalign(log, 0, | ||
| 3458 | wrapped_hblks, hbp); | ||
| 3459 | if (error) | ||
| 3460 | goto bread_err2; | ||
| 3461 | |||
| 3462 | error = XFS_BUF_SET_PTR(hbp, offset, | ||
| 3463 | BBTOB(hblks)); | ||
| 3464 | if (error) | 3475 | if (error) |
| 3465 | goto bread_err2; | 3476 | goto bread_err2; |
| 3466 | } | 3477 | } |
| @@ -3511,19 +3522,9 @@ xlog_do_recovery_pass( | |||
| 3511 | * _first_, then the log start (LR header end) | 3522 | * _first_, then the log start (LR header end) |
| 3512 | * - order is important. | 3523 | * - order is important. |
| 3513 | */ | 3524 | */ |
| 3514 | error = XFS_BUF_SET_PTR(dbp, | 3525 | error = xlog_bread_offset(log, 0, |
| 3515 | offset + BBTOB(split_bblks), | 3526 | bblks - split_bblks, hbp, |
| 3516 | BBTOB(bblks - split_bblks)); | 3527 | offset + BBTOB(split_bblks)); |
| 3517 | if (error) | ||
| 3518 | goto bread_err2; | ||
| 3519 | |||
| 3520 | error = xlog_bread_noalign(log, wrapped_hblks, | ||
| 3521 | bblks - split_bblks, | ||
| 3522 | dbp); | ||
| 3523 | if (error) | ||
| 3524 | goto bread_err2; | ||
| 3525 | |||
| 3526 | error = XFS_BUF_SET_PTR(dbp, offset, h_size); | ||
| 3527 | if (error) | 3528 | if (error) |
| 3528 | goto bread_err2; | 3529 | goto bread_err2; |
| 3529 | } | 3530 | } |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index bb3f9a7b24ed..b49b82363d20 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
| @@ -1900,7 +1900,7 @@ xfs_mod_incore_sb_batch( | |||
| 1900 | uint nmsb, | 1900 | uint nmsb, |
| 1901 | int rsvd) | 1901 | int rsvd) |
| 1902 | { | 1902 | { |
| 1903 | xfs_mod_sb_t *msbp = &msb[0]; | 1903 | xfs_mod_sb_t *msbp; |
| 1904 | int error = 0; | 1904 | int error = 0; |
| 1905 | 1905 | ||
| 1906 | /* | 1906 | /* |
| @@ -1910,7 +1910,7 @@ xfs_mod_incore_sb_batch( | |||
| 1910 | * changes will be atomic. | 1910 | * changes will be atomic. |
| 1911 | */ | 1911 | */ |
| 1912 | spin_lock(&mp->m_sb_lock); | 1912 | spin_lock(&mp->m_sb_lock); |
| 1913 | for (msbp = &msbp[0]; msbp < (msb + nmsb); msbp++) { | 1913 | for (msbp = msb; msbp < (msb + nmsb); msbp++) { |
| 1914 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || | 1914 | ASSERT(msbp->msb_field < XFS_SBS_ICOUNT || |
| 1915 | msbp->msb_field > XFS_SBS_FDBLOCKS); | 1915 | msbp->msb_field > XFS_SBS_FDBLOCKS); |
| 1916 | 1916 | ||
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 76922793f64f..d1f24858ccc4 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
| @@ -608,10 +608,8 @@ STATIC void | |||
| 608 | xfs_trans_free( | 608 | xfs_trans_free( |
| 609 | struct xfs_trans *tp) | 609 | struct xfs_trans *tp) |
| 610 | { | 610 | { |
| 611 | struct xfs_busy_extent *busyp, *n; | 611 | xfs_alloc_busy_sort(&tp->t_busy); |
| 612 | 612 | xfs_alloc_busy_clear(tp->t_mountp, &tp->t_busy); | |
| 613 | list_for_each_entry_safe(busyp, n, &tp->t_busy, list) | ||
| 614 | xfs_alloc_busy_clear(tp->t_mountp, busyp); | ||
| 615 | 613 | ||
| 616 | atomic_dec(&tp->t_mountp->m_active_trans); | 614 | atomic_dec(&tp->t_mountp->m_active_trans); |
| 617 | xfs_trans_free_dqinfo(tp); | 615 | xfs_trans_free_dqinfo(tp); |
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 26d1867d8156..65584b55607d 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h | |||
| @@ -73,8 +73,6 @@ typedef __int32_t xfs_tid_t; /* transaction identifier */ | |||
| 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ | 73 | typedef __uint32_t xfs_dablk_t; /* dir/attr block number (in file) */ |
| 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ | 74 | typedef __uint32_t xfs_dahash_t; /* dir/attr hash value */ |
| 75 | 75 | ||
| 76 | typedef __uint32_t xlog_tid_t; /* transaction ID type */ | ||
| 77 | |||
| 78 | /* | 76 | /* |
| 79 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. | 77 | * These types are 64 bits on disk but are either 32 or 64 bits in memory. |
| 80 | * Disk based types: | 78 | * Disk based types: |
