diff options
Diffstat (limited to 'fs')
398 files changed, 10504 insertions, 6166 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index a16b0ff497ca..d8223209d4b1 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
@@ -832,6 +832,7 @@ static void v9fs_mmap_vm_close(struct vm_area_struct *vma) | |||
832 | 832 | ||
833 | static const struct vm_operations_struct v9fs_file_vm_ops = { | 833 | static const struct vm_operations_struct v9fs_file_vm_ops = { |
834 | .fault = filemap_fault, | 834 | .fault = filemap_fault, |
835 | .map_pages = filemap_map_pages, | ||
835 | .page_mkwrite = v9fs_vm_page_mkwrite, | 836 | .page_mkwrite = v9fs_vm_page_mkwrite, |
836 | .remap_pages = generic_file_remap_pages, | 837 | .remap_pages = generic_file_remap_pages, |
837 | }; | 838 | }; |
@@ -839,6 +840,7 @@ static const struct vm_operations_struct v9fs_file_vm_ops = { | |||
839 | static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { | 840 | static const struct vm_operations_struct v9fs_mmap_file_vm_ops = { |
840 | .close = v9fs_mmap_vm_close, | 841 | .close = v9fs_mmap_vm_close, |
841 | .fault = filemap_fault, | 842 | .fault = filemap_fault, |
843 | .map_pages = filemap_map_pages, | ||
842 | .page_mkwrite = v9fs_vm_page_mkwrite, | 844 | .page_mkwrite = v9fs_vm_page_mkwrite, |
843 | .remap_pages = generic_file_remap_pages, | 845 | .remap_pages = generic_file_remap_pages, |
844 | }; | 846 | }; |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index bb7991c7e5c7..53161ec058a7 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
@@ -451,7 +451,7 @@ void v9fs_evict_inode(struct inode *inode) | |||
451 | { | 451 | { |
452 | struct v9fs_inode *v9inode = V9FS_I(inode); | 452 | struct v9fs_inode *v9inode = V9FS_I(inode); |
453 | 453 | ||
454 | truncate_inode_pages(inode->i_mapping, 0); | 454 | truncate_inode_pages_final(inode->i_mapping); |
455 | clear_inode(inode); | 455 | clear_inode(inode); |
456 | filemap_fdatawrite(inode->i_mapping); | 456 | filemap_fdatawrite(inode->i_mapping); |
457 | 457 | ||
diff --git a/fs/Kconfig b/fs/Kconfig index 7385e54be4b9..312393f32948 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
@@ -96,6 +96,7 @@ endif # BLOCK | |||
96 | menu "Pseudo filesystems" | 96 | menu "Pseudo filesystems" |
97 | 97 | ||
98 | source "fs/proc/Kconfig" | 98 | source "fs/proc/Kconfig" |
99 | source "fs/kernfs/Kconfig" | ||
99 | source "fs/sysfs/Kconfig" | 100 | source "fs/sysfs/Kconfig" |
100 | 101 | ||
101 | config TMPFS | 102 | config TMPFS |
diff --git a/fs/Makefile b/fs/Makefile index 47ac07bb4acc..f9cb9876e466 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
@@ -52,7 +52,8 @@ obj-$(CONFIG_FHANDLE) += fhandle.o | |||
52 | obj-y += quota/ | 52 | obj-y += quota/ |
53 | 53 | ||
54 | obj-$(CONFIG_PROC_FS) += proc/ | 54 | obj-$(CONFIG_PROC_FS) += proc/ |
55 | obj-$(CONFIG_SYSFS) += sysfs/ kernfs/ | 55 | obj-$(CONFIG_KERNFS) += kernfs/ |
56 | obj-$(CONFIG_SYSFS) += sysfs/ | ||
56 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ | 57 | obj-$(CONFIG_CONFIGFS_FS) += configfs/ |
57 | obj-y += devpts/ | 58 | obj-y += devpts/ |
58 | 59 | ||
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 7b3003cb6f1b..9852bdf34d76 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
@@ -212,6 +212,7 @@ static int parse_options(struct super_block *sb, char *options) | |||
212 | 212 | ||
213 | static int adfs_remount(struct super_block *sb, int *flags, char *data) | 213 | static int adfs_remount(struct super_block *sb, int *flags, char *data) |
214 | { | 214 | { |
215 | sync_filesystem(sb); | ||
215 | *flags |= MS_NODIRATIME; | 216 | *flags |= MS_NODIRATIME; |
216 | return parse_options(sb, data); | 217 | return parse_options(sb, data); |
217 | } | 218 | } |
@@ -265,7 +266,7 @@ static void init_once(void *foo) | |||
265 | inode_init_once(&ei->vfs_inode); | 266 | inode_init_once(&ei->vfs_inode); |
266 | } | 267 | } |
267 | 268 | ||
268 | static int init_inodecache(void) | 269 | static int __init init_inodecache(void) |
269 | { | 270 | { |
270 | adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", | 271 | adfs_inode_cachep = kmem_cache_create("adfs_inode_cache", |
271 | sizeof(struct adfs_inode_info), | 272 | sizeof(struct adfs_inode_info), |
diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 3952121f2f28..25b23b1e7f22 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h | |||
@@ -5,14 +5,6 @@ | |||
5 | #include <linux/mutex.h> | 5 | #include <linux/mutex.h> |
6 | #include <linux/workqueue.h> | 6 | #include <linux/workqueue.h> |
7 | 7 | ||
8 | /* AmigaOS allows file names with up to 30 characters length. | ||
9 | * Names longer than that will be silently truncated. If you | ||
10 | * want to disallow this, comment out the following #define. | ||
11 | * Creating filesystem objects with longer names will then | ||
12 | * result in an error (ENAMETOOLONG). | ||
13 | */ | ||
14 | /*#define AFFS_NO_TRUNCATE */ | ||
15 | |||
16 | /* Ugly macros make the code more pretty. */ | 8 | /* Ugly macros make the code more pretty. */ |
17 | 9 | ||
18 | #define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st)))) | 10 | #define GET_END_PTR(st,p,sz) ((st *)((char *)(p)+((sz)-sizeof(st)))) |
@@ -28,7 +20,6 @@ | |||
28 | 20 | ||
29 | #define AFFS_CACHE_SIZE PAGE_SIZE | 21 | #define AFFS_CACHE_SIZE PAGE_SIZE |
30 | 22 | ||
31 | #define AFFS_MAX_PREALLOC 32 | ||
32 | #define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2) | 23 | #define AFFS_LC_SIZE (AFFS_CACHE_SIZE/sizeof(u32)/2) |
33 | #define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) | 24 | #define AFFS_AC_SIZE (AFFS_CACHE_SIZE/sizeof(struct affs_ext_key)/2) |
34 | #define AFFS_AC_MASK (AFFS_AC_SIZE-1) | 25 | #define AFFS_AC_MASK (AFFS_AC_SIZE-1) |
@@ -118,6 +109,7 @@ struct affs_sb_info { | |||
118 | #define SF_OFS 0x0200 /* Old filesystem */ | 109 | #define SF_OFS 0x0200 /* Old filesystem */ |
119 | #define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ | 110 | #define SF_PREFIX 0x0400 /* Buffer for prefix is allocated */ |
120 | #define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ | 111 | #define SF_VERBOSE 0x0800 /* Talk about fs when mounting */ |
112 | #define SF_NO_TRUNCATE 0x1000 /* Don't truncate filenames */ | ||
121 | 113 | ||
122 | /* short cut to get to the affs specific sb data */ | 114 | /* short cut to get to the affs specific sb data */ |
123 | static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) | 115 | static inline struct affs_sb_info *AFFS_SB(struct super_block *sb) |
@@ -137,9 +129,13 @@ extern void affs_fix_checksum(struct super_block *sb, struct buffer_head *bh); | |||
137 | extern void secs_to_datestamp(time_t secs, struct affs_date *ds); | 129 | extern void secs_to_datestamp(time_t secs, struct affs_date *ds); |
138 | extern umode_t prot_to_mode(u32 prot); | 130 | extern umode_t prot_to_mode(u32 prot); |
139 | extern void mode_to_prot(struct inode *inode); | 131 | extern void mode_to_prot(struct inode *inode); |
140 | extern void affs_error(struct super_block *sb, const char *function, const char *fmt, ...); | 132 | extern void affs_error(struct super_block *sb, const char *function, |
141 | extern void affs_warning(struct super_block *sb, const char *function, const char *fmt, ...); | 133 | const char *fmt, ...); |
142 | extern int affs_check_name(const unsigned char *name, int len); | 134 | extern void affs_warning(struct super_block *sb, const char *function, |
135 | const char *fmt, ...); | ||
136 | extern bool affs_nofilenametruncate(const struct dentry *dentry); | ||
137 | extern int affs_check_name(const unsigned char *name, int len, | ||
138 | bool notruncate); | ||
143 | extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry); | 139 | extern int affs_copy_name(unsigned char *bstr, struct dentry *dentry); |
144 | 140 | ||
145 | /* bitmap. c */ | 141 | /* bitmap. c */ |
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c index d9a43674cb94..533a322c41c0 100644 --- a/fs/affs/amigaffs.c +++ b/fs/affs/amigaffs.c | |||
@@ -471,20 +471,27 @@ affs_warning(struct super_block *sb, const char *function, const char *fmt, ...) | |||
471 | function,ErrorBuffer); | 471 | function,ErrorBuffer); |
472 | } | 472 | } |
473 | 473 | ||
474 | bool | ||
475 | affs_nofilenametruncate(const struct dentry *dentry) | ||
476 | { | ||
477 | struct inode *inode = dentry->d_inode; | ||
478 | return AFFS_SB(inode->i_sb)->s_flags & SF_NO_TRUNCATE; | ||
479 | |||
480 | } | ||
481 | |||
474 | /* Check if the name is valid for a affs object. */ | 482 | /* Check if the name is valid for a affs object. */ |
475 | 483 | ||
476 | int | 484 | int |
477 | affs_check_name(const unsigned char *name, int len) | 485 | affs_check_name(const unsigned char *name, int len, bool notruncate) |
478 | { | 486 | { |
479 | int i; | 487 | int i; |
480 | 488 | ||
481 | if (len > 30) | 489 | if (len > 30) { |
482 | #ifdef AFFS_NO_TRUNCATE | 490 | if (notruncate) |
483 | return -ENAMETOOLONG; | 491 | return -ENAMETOOLONG; |
484 | #else | 492 | else |
485 | len = 30; | 493 | len = 30; |
486 | #endif | 494 | } |
487 | |||
488 | for (i = 0; i < len; i++) { | 495 | for (i = 0; i < len; i++) { |
489 | if (name[i] < ' ' || name[i] == ':' | 496 | if (name[i] < ' ' || name[i] == ':' |
490 | || (name[i] > 0x7e && name[i] < 0xa0)) | 497 | || (name[i] > 0x7e && name[i] < 0xa0)) |
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index f1eba8c3644e..cbbda476a805 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
@@ -52,8 +52,10 @@ affs_readdir(struct file *file, struct dir_context *ctx) | |||
52 | int hash_pos; | 52 | int hash_pos; |
53 | int chain_pos; | 53 | int chain_pos; |
54 | u32 ino; | 54 | u32 ino; |
55 | int error = 0; | ||
55 | 56 | ||
56 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos); | 57 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n", |
58 | inode->i_ino, (unsigned long)ctx->pos); | ||
57 | 59 | ||
58 | if (ctx->pos < 2) { | 60 | if (ctx->pos < 2) { |
59 | file->private_data = (void *)0; | 61 | file->private_data = (void *)0; |
@@ -72,7 +74,7 @@ affs_readdir(struct file *file, struct dir_context *ctx) | |||
72 | } | 74 | } |
73 | dir_bh = affs_bread(sb, inode->i_ino); | 75 | dir_bh = affs_bread(sb, inode->i_ino); |
74 | if (!dir_bh) | 76 | if (!dir_bh) |
75 | goto readdir_out; | 77 | goto out_unlock_dir; |
76 | 78 | ||
77 | /* If the directory hasn't changed since the last call to readdir(), | 79 | /* If the directory hasn't changed since the last call to readdir(), |
78 | * we can jump directly to where we left off. | 80 | * we can jump directly to where we left off. |
@@ -88,7 +90,8 @@ affs_readdir(struct file *file, struct dir_context *ctx) | |||
88 | fh_bh = affs_bread(sb, ino); | 90 | fh_bh = affs_bread(sb, ino); |
89 | if (!fh_bh) { | 91 | if (!fh_bh) { |
90 | affs_error(sb, "readdir","Cannot read block %d", i); | 92 | affs_error(sb, "readdir","Cannot read block %d", i); |
91 | return -EIO; | 93 | error = -EIO; |
94 | goto out_brelse_dir; | ||
92 | } | 95 | } |
93 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 96 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
94 | affs_brelse(fh_bh); | 97 | affs_brelse(fh_bh); |
@@ -107,29 +110,34 @@ inside: | |||
107 | do { | 110 | do { |
108 | fh_bh = affs_bread(sb, ino); | 111 | fh_bh = affs_bread(sb, ino); |
109 | if (!fh_bh) { | 112 | if (!fh_bh) { |
110 | affs_error(sb, "readdir","Cannot read block %d", ino); | 113 | affs_error(sb, "readdir", |
114 | "Cannot read block %d", ino); | ||
111 | break; | 115 | break; |
112 | } | 116 | } |
113 | 117 | ||
114 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); | 118 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); |
115 | name = AFFS_TAIL(sb, fh_bh)->name + 1; | 119 | name = AFFS_TAIL(sb, fh_bh)->name + 1; |
116 | pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", | 120 | pr_debug("AFFS: readdir(): dir_emit(\"%.*s\", " |
121 | "ino=%u), hash=%d, f_pos=%x\n", | ||
117 | namelen, name, ino, hash_pos, (u32)ctx->pos); | 122 | namelen, name, ino, hash_pos, (u32)ctx->pos); |
123 | |||
118 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) | 124 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) |
119 | goto readdir_done; | 125 | goto done; |
120 | ctx->pos++; | 126 | ctx->pos++; |
121 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 127 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
122 | affs_brelse(fh_bh); | 128 | affs_brelse(fh_bh); |
123 | fh_bh = NULL; | 129 | fh_bh = NULL; |
124 | } while (ino); | 130 | } while (ino); |
125 | } | 131 | } |
126 | readdir_done: | 132 | done: |
127 | file->f_version = inode->i_version; | 133 | file->f_version = inode->i_version; |
128 | file->private_data = (void *)(long)ino; | 134 | file->private_data = (void *)(long)ino; |
135 | affs_brelse(fh_bh); | ||
129 | 136 | ||
130 | readdir_out: | 137 | out_brelse_dir: |
131 | affs_brelse(dir_bh); | 138 | affs_brelse(dir_bh); |
132 | affs_brelse(fh_bh); | 139 | |
140 | out_unlock_dir: | ||
133 | affs_unlock_dir(inode); | 141 | affs_unlock_dir(inode); |
134 | return 0; | 142 | return error; |
135 | } | 143 | } |
diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 0e092d08680e..96df91e8c334 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c | |||
@@ -259,7 +259,7 @@ affs_evict_inode(struct inode *inode) | |||
259 | { | 259 | { |
260 | unsigned long cache_page; | 260 | unsigned long cache_page; |
261 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); | 261 | pr_debug("AFFS: evict_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink); |
262 | truncate_inode_pages(&inode->i_data, 0); | 262 | truncate_inode_pages_final(&inode->i_data); |
263 | 263 | ||
264 | if (!inode->i_nlink) { | 264 | if (!inode->i_nlink) { |
265 | inode->i_size = 0; | 265 | inode->i_size = 0; |
diff --git a/fs/affs/namei.c b/fs/affs/namei.c index c36cbb4537a2..6dae1ccd176d 100644 --- a/fs/affs/namei.c +++ b/fs/affs/namei.c | |||
@@ -60,13 +60,13 @@ affs_get_toupper(struct super_block *sb) | |||
60 | * Note: the dentry argument is the parent dentry. | 60 | * Note: the dentry argument is the parent dentry. |
61 | */ | 61 | */ |
62 | static inline int | 62 | static inline int |
63 | __affs_hash_dentry(struct qstr *qstr, toupper_t toupper) | 63 | __affs_hash_dentry(struct qstr *qstr, toupper_t toupper, bool notruncate) |
64 | { | 64 | { |
65 | const u8 *name = qstr->name; | 65 | const u8 *name = qstr->name; |
66 | unsigned long hash; | 66 | unsigned long hash; |
67 | int i; | 67 | int i; |
68 | 68 | ||
69 | i = affs_check_name(qstr->name, qstr->len); | 69 | i = affs_check_name(qstr->name, qstr->len, notruncate); |
70 | if (i) | 70 | if (i) |
71 | return i; | 71 | return i; |
72 | 72 | ||
@@ -82,16 +82,22 @@ __affs_hash_dentry(struct qstr *qstr, toupper_t toupper) | |||
82 | static int | 82 | static int |
83 | affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) | 83 | affs_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
84 | { | 84 | { |
85 | return __affs_hash_dentry(qstr, affs_toupper); | 85 | return __affs_hash_dentry(qstr, affs_toupper, |
86 | affs_nofilenametruncate(dentry)); | ||
87 | |||
86 | } | 88 | } |
89 | |||
87 | static int | 90 | static int |
88 | affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) | 91 | affs_intl_hash_dentry(const struct dentry *dentry, struct qstr *qstr) |
89 | { | 92 | { |
90 | return __affs_hash_dentry(qstr, affs_intl_toupper); | 93 | return __affs_hash_dentry(qstr, affs_intl_toupper, |
94 | affs_nofilenametruncate(dentry)); | ||
95 | |||
91 | } | 96 | } |
92 | 97 | ||
93 | static inline int __affs_compare_dentry(unsigned int len, | 98 | static inline int __affs_compare_dentry(unsigned int len, |
94 | const char *str, const struct qstr *name, toupper_t toupper) | 99 | const char *str, const struct qstr *name, toupper_t toupper, |
100 | bool notruncate) | ||
95 | { | 101 | { |
96 | const u8 *aname = str; | 102 | const u8 *aname = str; |
97 | const u8 *bname = name->name; | 103 | const u8 *bname = name->name; |
@@ -101,7 +107,7 @@ static inline int __affs_compare_dentry(unsigned int len, | |||
101 | * must be valid. 'name' must be validated first. | 107 | * must be valid. 'name' must be validated first. |
102 | */ | 108 | */ |
103 | 109 | ||
104 | if (affs_check_name(name->name, name->len)) | 110 | if (affs_check_name(name->name, name->len, notruncate)) |
105 | return 1; | 111 | return 1; |
106 | 112 | ||
107 | /* | 113 | /* |
@@ -126,13 +132,18 @@ static int | |||
126 | affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, | 132 | affs_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
127 | unsigned int len, const char *str, const struct qstr *name) | 133 | unsigned int len, const char *str, const struct qstr *name) |
128 | { | 134 | { |
129 | return __affs_compare_dentry(len, str, name, affs_toupper); | 135 | |
136 | return __affs_compare_dentry(len, str, name, affs_toupper, | ||
137 | affs_nofilenametruncate(parent)); | ||
130 | } | 138 | } |
139 | |||
131 | static int | 140 | static int |
132 | affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, | 141 | affs_intl_compare_dentry(const struct dentry *parent, const struct dentry *dentry, |
133 | unsigned int len, const char *str, const struct qstr *name) | 142 | unsigned int len, const char *str, const struct qstr *name) |
134 | { | 143 | { |
135 | return __affs_compare_dentry(len, str, name, affs_intl_toupper); | 144 | return __affs_compare_dentry(len, str, name, affs_intl_toupper, |
145 | affs_nofilenametruncate(parent)); | ||
146 | |||
136 | } | 147 | } |
137 | 148 | ||
138 | /* | 149 | /* |
@@ -411,7 +422,10 @@ affs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
411 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, | 422 | (u32)old_dir->i_ino, (int)old_dentry->d_name.len, old_dentry->d_name.name, |
412 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); | 423 | (u32)new_dir->i_ino, (int)new_dentry->d_name.len, new_dentry->d_name.name); |
413 | 424 | ||
414 | retval = affs_check_name(new_dentry->d_name.name,new_dentry->d_name.len); | 425 | retval = affs_check_name(new_dentry->d_name.name, |
426 | new_dentry->d_name.len, | ||
427 | affs_nofilenametruncate(old_dentry)); | ||
428 | |||
415 | if (retval) | 429 | if (retval) |
416 | return retval; | 430 | return retval; |
417 | 431 | ||
diff --git a/fs/affs/super.c b/fs/affs/super.c index d098731b82ff..6d589f28bf9b 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
@@ -128,7 +128,7 @@ static void init_once(void *foo) | |||
128 | inode_init_once(&ei->vfs_inode); | 128 | inode_init_once(&ei->vfs_inode); |
129 | } | 129 | } |
130 | 130 | ||
131 | static int init_inodecache(void) | 131 | static int __init init_inodecache(void) |
132 | { | 132 | { |
133 | affs_inode_cachep = kmem_cache_create("affs_inode_cache", | 133 | affs_inode_cachep = kmem_cache_create("affs_inode_cache", |
134 | sizeof(struct affs_inode_info), | 134 | sizeof(struct affs_inode_info), |
@@ -163,7 +163,7 @@ static const struct super_operations affs_sops = { | |||
163 | }; | 163 | }; |
164 | 164 | ||
165 | enum { | 165 | enum { |
166 | Opt_bs, Opt_mode, Opt_mufs, Opt_prefix, Opt_protect, | 166 | Opt_bs, Opt_mode, Opt_mufs, Opt_notruncate, Opt_prefix, Opt_protect, |
167 | Opt_reserved, Opt_root, Opt_setgid, Opt_setuid, | 167 | Opt_reserved, Opt_root, Opt_setgid, Opt_setuid, |
168 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, | 168 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, |
169 | }; | 169 | }; |
@@ -172,6 +172,7 @@ static const match_table_t tokens = { | |||
172 | {Opt_bs, "bs=%u"}, | 172 | {Opt_bs, "bs=%u"}, |
173 | {Opt_mode, "mode=%o"}, | 173 | {Opt_mode, "mode=%o"}, |
174 | {Opt_mufs, "mufs"}, | 174 | {Opt_mufs, "mufs"}, |
175 | {Opt_notruncate, "nofilenametruncate"}, | ||
175 | {Opt_prefix, "prefix=%s"}, | 176 | {Opt_prefix, "prefix=%s"}, |
176 | {Opt_protect, "protect"}, | 177 | {Opt_protect, "protect"}, |
177 | {Opt_reserved, "reserved=%u"}, | 178 | {Opt_reserved, "reserved=%u"}, |
@@ -233,6 +234,9 @@ parse_options(char *options, kuid_t *uid, kgid_t *gid, int *mode, int *reserved, | |||
233 | case Opt_mufs: | 234 | case Opt_mufs: |
234 | *mount_opts |= SF_MUFS; | 235 | *mount_opts |= SF_MUFS; |
235 | break; | 236 | break; |
237 | case Opt_notruncate: | ||
238 | *mount_opts |= SF_NO_TRUNCATE; | ||
239 | break; | ||
236 | case Opt_prefix: | 240 | case Opt_prefix: |
237 | *prefix = match_strdup(&args[0]); | 241 | *prefix = match_strdup(&args[0]); |
238 | if (!*prefix) | 242 | if (!*prefix) |
@@ -530,6 +534,7 @@ affs_remount(struct super_block *sb, int *flags, char *data) | |||
530 | 534 | ||
531 | pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); | 535 | pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data); |
532 | 536 | ||
537 | sync_filesystem(sb); | ||
533 | *flags |= MS_NODIRATIME; | 538 | *flags |= MS_NODIRATIME; |
534 | 539 | ||
535 | memcpy(volume, sbi->s_volume, 32); | 540 | memcpy(volume, sbi->s_volume, 32); |
diff --git a/fs/afs/inode.c b/fs/afs/inode.c index ce25d755b7aa..294671288449 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c | |||
@@ -422,7 +422,7 @@ void afs_evict_inode(struct inode *inode) | |||
422 | 422 | ||
423 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); | 423 | ASSERTCMP(inode->i_ino, ==, vnode->fid.vnode); |
424 | 424 | ||
425 | truncate_inode_pages(&inode->i_data, 0); | 425 | truncate_inode_pages_final(&inode->i_data); |
426 | clear_inode(inode); | 426 | clear_inode(inode); |
427 | 427 | ||
428 | afs_give_up_callback(vnode); | 428 | afs_give_up_callback(vnode); |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 6621f8008122..be75b500005d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
@@ -75,6 +75,7 @@ struct afs_call { | |||
75 | const struct afs_call_type *type; /* type of call */ | 75 | const struct afs_call_type *type; /* type of call */ |
76 | const struct afs_wait_mode *wait_mode; /* completion wait mode */ | 76 | const struct afs_wait_mode *wait_mode; /* completion wait mode */ |
77 | wait_queue_head_t waitq; /* processes awaiting completion */ | 77 | wait_queue_head_t waitq; /* processes awaiting completion */ |
78 | work_func_t async_workfn; | ||
78 | struct work_struct async_work; /* asynchronous work processor */ | 79 | struct work_struct async_work; /* asynchronous work processor */ |
79 | struct work_struct work; /* actual work processor */ | 80 | struct work_struct work; /* actual work processor */ |
80 | struct sk_buff_head rx_queue; /* received packets */ | 81 | struct sk_buff_head rx_queue; /* received packets */ |
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 8ad8c2a0703a..ef943df73b8c 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c | |||
@@ -644,7 +644,7 @@ static void afs_process_async_call(struct work_struct *work) | |||
644 | 644 | ||
645 | /* we can't just delete the call because the work item may be | 645 | /* we can't just delete the call because the work item may be |
646 | * queued */ | 646 | * queued */ |
647 | PREPARE_WORK(&call->async_work, afs_delete_async_call); | 647 | call->async_workfn = afs_delete_async_call; |
648 | queue_work(afs_async_calls, &call->async_work); | 648 | queue_work(afs_async_calls, &call->async_work); |
649 | } | 649 | } |
650 | 650 | ||
@@ -663,6 +663,13 @@ void afs_transfer_reply(struct afs_call *call, struct sk_buff *skb) | |||
663 | call->reply_size += len; | 663 | call->reply_size += len; |
664 | } | 664 | } |
665 | 665 | ||
666 | static void afs_async_workfn(struct work_struct *work) | ||
667 | { | ||
668 | struct afs_call *call = container_of(work, struct afs_call, async_work); | ||
669 | |||
670 | call->async_workfn(work); | ||
671 | } | ||
672 | |||
666 | /* | 673 | /* |
667 | * accept the backlog of incoming calls | 674 | * accept the backlog of incoming calls |
668 | */ | 675 | */ |
@@ -685,7 +692,8 @@ static void afs_collect_incoming_call(struct work_struct *work) | |||
685 | return; | 692 | return; |
686 | } | 693 | } |
687 | 694 | ||
688 | INIT_WORK(&call->async_work, afs_process_async_call); | 695 | call->async_workfn = afs_process_async_call; |
696 | INIT_WORK(&call->async_work, afs_async_workfn); | ||
689 | call->wait_mode = &afs_async_incoming_call; | 697 | call->wait_mode = &afs_async_incoming_call; |
690 | call->type = &afs_RXCMxxxx; | 698 | call->type = &afs_RXCMxxxx; |
691 | init_waitqueue_head(&call->waitq); | 699 | init_waitqueue_head(&call->waitq); |
diff --git a/fs/befs/Makefile b/fs/befs/Makefile index 2f370bd7a50d..8b9f66642a83 100644 --- a/fs/befs/Makefile +++ b/fs/befs/Makefile | |||
@@ -3,5 +3,5 @@ | |||
3 | # | 3 | # |
4 | 4 | ||
5 | obj-$(CONFIG_BEFS_FS) += befs.o | 5 | obj-$(CONFIG_BEFS_FS) += befs.o |
6 | 6 | ccflags-$(CONFIG_BEFS_DEBUG) += -DDEBUG | |
7 | befs-objs := datastream.o btree.o super.o inode.o debug.o io.o linuxvfs.o | 7 | befs-objs := datastream.o btree.o super.o inode.o debug.o io.o linuxvfs.o |
diff --git a/fs/befs/befs.h b/fs/befs/befs.h index b26642839156..3a7813ab8c95 100644 --- a/fs/befs/befs.h +++ b/fs/befs/befs.h | |||
@@ -88,8 +88,11 @@ enum befs_err { | |||
88 | 88 | ||
89 | /****************************/ | 89 | /****************************/ |
90 | /* debug.c */ | 90 | /* debug.c */ |
91 | __printf(2, 3) | ||
91 | void befs_error(const struct super_block *sb, const char *fmt, ...); | 92 | void befs_error(const struct super_block *sb, const char *fmt, ...); |
93 | __printf(2, 3) | ||
92 | void befs_warning(const struct super_block *sb, const char *fmt, ...); | 94 | void befs_warning(const struct super_block *sb, const char *fmt, ...); |
95 | __printf(2, 3) | ||
93 | void befs_debug(const struct super_block *sb, const char *fmt, ...); | 96 | void befs_debug(const struct super_block *sb, const char *fmt, ...); |
94 | 97 | ||
95 | void befs_dump_super_block(const struct super_block *sb, befs_super_block *); | 98 | void befs_dump_super_block(const struct super_block *sb, befs_super_block *); |
diff --git a/fs/befs/btree.c b/fs/befs/btree.c index 74e397db0b8b..a2cd305a993a 100644 --- a/fs/befs/btree.c +++ b/fs/befs/btree.c | |||
@@ -137,7 +137,7 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, | |||
137 | struct buffer_head *bh = NULL; | 137 | struct buffer_head *bh = NULL; |
138 | befs_disk_btree_super *od_sup = NULL; | 138 | befs_disk_btree_super *od_sup = NULL; |
139 | 139 | ||
140 | befs_debug(sb, "---> befs_btree_read_super()"); | 140 | befs_debug(sb, "---> %s", __func__); |
141 | 141 | ||
142 | bh = befs_read_datastream(sb, ds, 0, NULL); | 142 | bh = befs_read_datastream(sb, ds, 0, NULL); |
143 | 143 | ||
@@ -162,11 +162,11 @@ befs_bt_read_super(struct super_block *sb, befs_data_stream * ds, | |||
162 | goto error; | 162 | goto error; |
163 | } | 163 | } |
164 | 164 | ||
165 | befs_debug(sb, "<--- befs_btree_read_super()"); | 165 | befs_debug(sb, "<--- %s", __func__); |
166 | return BEFS_OK; | 166 | return BEFS_OK; |
167 | 167 | ||
168 | error: | 168 | error: |
169 | befs_debug(sb, "<--- befs_btree_read_super() ERROR"); | 169 | befs_debug(sb, "<--- %s ERROR", __func__); |
170 | return BEFS_ERR; | 170 | return BEFS_ERR; |
171 | } | 171 | } |
172 | 172 | ||
@@ -195,16 +195,16 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, | |||
195 | { | 195 | { |
196 | uint off = 0; | 196 | uint off = 0; |
197 | 197 | ||
198 | befs_debug(sb, "---> befs_bt_read_node()"); | 198 | befs_debug(sb, "---> %s", __func__); |
199 | 199 | ||
200 | if (node->bh) | 200 | if (node->bh) |
201 | brelse(node->bh); | 201 | brelse(node->bh); |
202 | 202 | ||
203 | node->bh = befs_read_datastream(sb, ds, node_off, &off); | 203 | node->bh = befs_read_datastream(sb, ds, node_off, &off); |
204 | if (!node->bh) { | 204 | if (!node->bh) { |
205 | befs_error(sb, "befs_bt_read_node() failed to read " | 205 | befs_error(sb, "%s failed to read " |
206 | "node at %Lu", node_off); | 206 | "node at %llu", __func__, node_off); |
207 | befs_debug(sb, "<--- befs_bt_read_node() ERROR"); | 207 | befs_debug(sb, "<--- %s ERROR", __func__); |
208 | 208 | ||
209 | return BEFS_ERR; | 209 | return BEFS_ERR; |
210 | } | 210 | } |
@@ -221,7 +221,7 @@ befs_bt_read_node(struct super_block *sb, befs_data_stream * ds, | |||
221 | node->head.all_key_length = | 221 | node->head.all_key_length = |
222 | fs16_to_cpu(sb, node->od_node->all_key_length); | 222 | fs16_to_cpu(sb, node->od_node->all_key_length); |
223 | 223 | ||
224 | befs_debug(sb, "<--- befs_btree_read_node()"); | 224 | befs_debug(sb, "<--- %s", __func__); |
225 | return BEFS_OK; | 225 | return BEFS_OK; |
226 | } | 226 | } |
227 | 227 | ||
@@ -252,7 +252,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
252 | befs_off_t node_off; | 252 | befs_off_t node_off; |
253 | int res; | 253 | int res; |
254 | 254 | ||
255 | befs_debug(sb, "---> befs_btree_find() Key: %s", key); | 255 | befs_debug(sb, "---> %s Key: %s", __func__, key); |
256 | 256 | ||
257 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { | 257 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { |
258 | befs_error(sb, | 258 | befs_error(sb, |
@@ -263,7 +263,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
263 | this_node = kmalloc(sizeof (befs_btree_node), | 263 | this_node = kmalloc(sizeof (befs_btree_node), |
264 | GFP_NOFS); | 264 | GFP_NOFS); |
265 | if (!this_node) { | 265 | if (!this_node) { |
266 | befs_error(sb, "befs_btree_find() failed to allocate %u " | 266 | befs_error(sb, "befs_btree_find() failed to allocate %zu " |
267 | "bytes of memory", sizeof (befs_btree_node)); | 267 | "bytes of memory", sizeof (befs_btree_node)); |
268 | goto error; | 268 | goto error; |
269 | } | 269 | } |
@@ -274,7 +274,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
274 | node_off = bt_super.root_node_ptr; | 274 | node_off = bt_super.root_node_ptr; |
275 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 275 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
276 | befs_error(sb, "befs_btree_find() failed to read " | 276 | befs_error(sb, "befs_btree_find() failed to read " |
277 | "node at %Lu", node_off); | 277 | "node at %llu", node_off); |
278 | goto error_alloc; | 278 | goto error_alloc; |
279 | } | 279 | } |
280 | 280 | ||
@@ -285,7 +285,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
285 | /* if no match, go to overflow node */ | 285 | /* if no match, go to overflow node */ |
286 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 286 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
287 | befs_error(sb, "befs_btree_find() failed to read " | 287 | befs_error(sb, "befs_btree_find() failed to read " |
288 | "node at %Lu", node_off); | 288 | "node at %llu", node_off); |
289 | goto error_alloc; | 289 | goto error_alloc; |
290 | } | 290 | } |
291 | } | 291 | } |
@@ -298,11 +298,11 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
298 | kfree(this_node); | 298 | kfree(this_node); |
299 | 299 | ||
300 | if (res != BEFS_BT_MATCH) { | 300 | if (res != BEFS_BT_MATCH) { |
301 | befs_debug(sb, "<--- befs_btree_find() Key %s not found", key); | 301 | befs_debug(sb, "<--- %s Key %s not found", __func__, key); |
302 | *value = 0; | 302 | *value = 0; |
303 | return BEFS_BT_NOT_FOUND; | 303 | return BEFS_BT_NOT_FOUND; |
304 | } | 304 | } |
305 | befs_debug(sb, "<--- befs_btree_find() Found key %s, value %Lu", | 305 | befs_debug(sb, "<--- %s Found key %s, value %llu", __func__, |
306 | key, *value); | 306 | key, *value); |
307 | return BEFS_OK; | 307 | return BEFS_OK; |
308 | 308 | ||
@@ -310,7 +310,7 @@ befs_btree_find(struct super_block *sb, befs_data_stream * ds, | |||
310 | kfree(this_node); | 310 | kfree(this_node); |
311 | error: | 311 | error: |
312 | *value = 0; | 312 | *value = 0; |
313 | befs_debug(sb, "<--- befs_btree_find() ERROR"); | 313 | befs_debug(sb, "<--- %s ERROR", __func__); |
314 | return BEFS_ERR; | 314 | return BEFS_ERR; |
315 | } | 315 | } |
316 | 316 | ||
@@ -343,7 +343,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
343 | char *thiskey; | 343 | char *thiskey; |
344 | fs64 *valarray; | 344 | fs64 *valarray; |
345 | 345 | ||
346 | befs_debug(sb, "---> befs_find_key() %s", findkey); | 346 | befs_debug(sb, "---> %s %s", __func__, findkey); |
347 | 347 | ||
348 | *value = 0; | 348 | *value = 0; |
349 | 349 | ||
@@ -355,7 +355,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
355 | 355 | ||
356 | eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len); | 356 | eq = befs_compare_strings(thiskey, keylen, findkey, findkey_len); |
357 | if (eq < 0) { | 357 | if (eq < 0) { |
358 | befs_debug(sb, "<--- befs_find_key() %s not found", findkey); | 358 | befs_debug(sb, "<--- %s %s not found", __func__, findkey); |
359 | return BEFS_BT_NOT_FOUND; | 359 | return BEFS_BT_NOT_FOUND; |
360 | } | 360 | } |
361 | 361 | ||
@@ -373,8 +373,8 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
373 | findkey_len); | 373 | findkey_len); |
374 | 374 | ||
375 | if (eq == 0) { | 375 | if (eq == 0) { |
376 | befs_debug(sb, "<--- befs_find_key() found %s at %d", | 376 | befs_debug(sb, "<--- %s found %s at %d", |
377 | thiskey, mid); | 377 | __func__, thiskey, mid); |
378 | 378 | ||
379 | *value = fs64_to_cpu(sb, valarray[mid]); | 379 | *value = fs64_to_cpu(sb, valarray[mid]); |
380 | return BEFS_BT_MATCH; | 380 | return BEFS_BT_MATCH; |
@@ -388,7 +388,7 @@ befs_find_key(struct super_block *sb, befs_btree_node * node, | |||
388 | *value = fs64_to_cpu(sb, valarray[mid + 1]); | 388 | *value = fs64_to_cpu(sb, valarray[mid + 1]); |
389 | else | 389 | else |
390 | *value = fs64_to_cpu(sb, valarray[mid]); | 390 | *value = fs64_to_cpu(sb, valarray[mid]); |
391 | befs_debug(sb, "<--- befs_find_key() found %s at %d", thiskey, mid); | 391 | befs_debug(sb, "<--- %s found %s at %d", __func__, thiskey, mid); |
392 | return BEFS_BT_PARMATCH; | 392 | return BEFS_BT_PARMATCH; |
393 | } | 393 | } |
394 | 394 | ||
@@ -428,7 +428,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
428 | 428 | ||
429 | uint key_sum = 0; | 429 | uint key_sum = 0; |
430 | 430 | ||
431 | befs_debug(sb, "---> befs_btree_read()"); | 431 | befs_debug(sb, "---> %s", __func__); |
432 | 432 | ||
433 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { | 433 | if (befs_bt_read_super(sb, ds, &bt_super) != BEFS_OK) { |
434 | befs_error(sb, | 434 | befs_error(sb, |
@@ -437,7 +437,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
437 | } | 437 | } |
438 | 438 | ||
439 | if ((this_node = kmalloc(sizeof (befs_btree_node), GFP_NOFS)) == NULL) { | 439 | if ((this_node = kmalloc(sizeof (befs_btree_node), GFP_NOFS)) == NULL) { |
440 | befs_error(sb, "befs_btree_read() failed to allocate %u " | 440 | befs_error(sb, "befs_btree_read() failed to allocate %zu " |
441 | "bytes of memory", sizeof (befs_btree_node)); | 441 | "bytes of memory", sizeof (befs_btree_node)); |
442 | goto error; | 442 | goto error; |
443 | } | 443 | } |
@@ -452,7 +452,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
452 | kfree(this_node); | 452 | kfree(this_node); |
453 | *value = 0; | 453 | *value = 0; |
454 | *keysize = 0; | 454 | *keysize = 0; |
455 | befs_debug(sb, "<--- befs_btree_read() Tree is EMPTY"); | 455 | befs_debug(sb, "<--- %s Tree is EMPTY", __func__); |
456 | return BEFS_BT_EMPTY; | 456 | return BEFS_BT_EMPTY; |
457 | } else if (res == BEFS_ERR) { | 457 | } else if (res == BEFS_ERR) { |
458 | goto error_alloc; | 458 | goto error_alloc; |
@@ -467,7 +467,8 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
467 | *keysize = 0; | 467 | *keysize = 0; |
468 | *value = 0; | 468 | *value = 0; |
469 | befs_debug(sb, | 469 | befs_debug(sb, |
470 | "<--- befs_btree_read() END of keys at %Lu", | 470 | "<--- %s END of keys at %llu", __func__, |
471 | (unsigned long long) | ||
471 | key_sum + this_node->head.all_key_count); | 472 | key_sum + this_node->head.all_key_count); |
472 | brelse(this_node->bh); | 473 | brelse(this_node->bh); |
473 | kfree(this_node); | 474 | kfree(this_node); |
@@ -478,8 +479,8 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
478 | node_off = this_node->head.right; | 479 | node_off = this_node->head.right; |
479 | 480 | ||
480 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { | 481 | if (befs_bt_read_node(sb, ds, this_node, node_off) != BEFS_OK) { |
481 | befs_error(sb, "befs_btree_read() failed to read " | 482 | befs_error(sb, "%s failed to read node at %llu", |
482 | "node at %Lu", node_off); | 483 | __func__, (unsigned long long)node_off); |
483 | goto error_alloc; | 484 | goto error_alloc; |
484 | } | 485 | } |
485 | } | 486 | } |
@@ -492,11 +493,13 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
492 | 493 | ||
493 | keystart = befs_bt_get_key(sb, this_node, cur_key, &keylen); | 494 | keystart = befs_bt_get_key(sb, this_node, cur_key, &keylen); |
494 | 495 | ||
495 | befs_debug(sb, "Read [%Lu,%d]: keysize %d", node_off, cur_key, keylen); | 496 | befs_debug(sb, "Read [%llu,%d]: keysize %d", |
497 | (long long unsigned int)node_off, (int)cur_key, | ||
498 | (int)keylen); | ||
496 | 499 | ||
497 | if (bufsize < keylen + 1) { | 500 | if (bufsize < keylen + 1) { |
498 | befs_error(sb, "befs_btree_read() keybuf too small (%u) " | 501 | befs_error(sb, "%s keybuf too small (%zu) " |
499 | "for key of size %d", bufsize, keylen); | 502 | "for key of size %d", __func__, bufsize, keylen); |
500 | brelse(this_node->bh); | 503 | brelse(this_node->bh); |
501 | goto error_alloc; | 504 | goto error_alloc; |
502 | }; | 505 | }; |
@@ -506,13 +509,13 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
506 | *keysize = keylen; | 509 | *keysize = keylen; |
507 | keybuf[keylen] = '\0'; | 510 | keybuf[keylen] = '\0'; |
508 | 511 | ||
509 | befs_debug(sb, "Read [%Lu,%d]: Key \"%.*s\", Value %Lu", node_off, | 512 | befs_debug(sb, "Read [%llu,%d]: Key \"%.*s\", Value %llu", node_off, |
510 | cur_key, keylen, keybuf, *value); | 513 | cur_key, keylen, keybuf, *value); |
511 | 514 | ||
512 | brelse(this_node->bh); | 515 | brelse(this_node->bh); |
513 | kfree(this_node); | 516 | kfree(this_node); |
514 | 517 | ||
515 | befs_debug(sb, "<--- befs_btree_read()"); | 518 | befs_debug(sb, "<--- %s", __func__); |
516 | 519 | ||
517 | return BEFS_OK; | 520 | return BEFS_OK; |
518 | 521 | ||
@@ -522,7 +525,7 @@ befs_btree_read(struct super_block *sb, befs_data_stream * ds, | |||
522 | error: | 525 | error: |
523 | *keysize = 0; | 526 | *keysize = 0; |
524 | *value = 0; | 527 | *value = 0; |
525 | befs_debug(sb, "<--- befs_btree_read() ERROR"); | 528 | befs_debug(sb, "<--- %s ERROR", __func__); |
526 | return BEFS_ERR; | 529 | return BEFS_ERR; |
527 | } | 530 | } |
528 | 531 | ||
@@ -547,26 +550,26 @@ befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, | |||
547 | befs_off_t * node_off) | 550 | befs_off_t * node_off) |
548 | { | 551 | { |
549 | 552 | ||
550 | befs_debug(sb, "---> befs_btree_seekleaf()"); | 553 | befs_debug(sb, "---> %s", __func__); |
551 | 554 | ||
552 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { | 555 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { |
553 | befs_error(sb, "befs_btree_seekleaf() failed to read " | 556 | befs_error(sb, "%s failed to read " |
554 | "node at %Lu", *node_off); | 557 | "node at %llu", __func__, *node_off); |
555 | goto error; | 558 | goto error; |
556 | } | 559 | } |
557 | befs_debug(sb, "Seekleaf to root node %Lu", *node_off); | 560 | befs_debug(sb, "Seekleaf to root node %llu", *node_off); |
558 | 561 | ||
559 | if (this_node->head.all_key_count == 0 && befs_leafnode(this_node)) { | 562 | if (this_node->head.all_key_count == 0 && befs_leafnode(this_node)) { |
560 | befs_debug(sb, "<--- befs_btree_seekleaf() Tree is EMPTY"); | 563 | befs_debug(sb, "<--- %s Tree is EMPTY", __func__); |
561 | return BEFS_BT_EMPTY; | 564 | return BEFS_BT_EMPTY; |
562 | } | 565 | } |
563 | 566 | ||
564 | while (!befs_leafnode(this_node)) { | 567 | while (!befs_leafnode(this_node)) { |
565 | 568 | ||
566 | if (this_node->head.all_key_count == 0) { | 569 | if (this_node->head.all_key_count == 0) { |
567 | befs_debug(sb, "befs_btree_seekleaf() encountered " | 570 | befs_debug(sb, "%s encountered " |
568 | "an empty interior node: %Lu. Using Overflow " | 571 | "an empty interior node: %llu. Using Overflow " |
569 | "node: %Lu", *node_off, | 572 | "node: %llu", __func__, *node_off, |
570 | this_node->head.overflow); | 573 | this_node->head.overflow); |
571 | *node_off = this_node->head.overflow; | 574 | *node_off = this_node->head.overflow; |
572 | } else { | 575 | } else { |
@@ -574,19 +577,19 @@ befs_btree_seekleaf(struct super_block *sb, befs_data_stream * ds, | |||
574 | *node_off = fs64_to_cpu(sb, valarray[0]); | 577 | *node_off = fs64_to_cpu(sb, valarray[0]); |
575 | } | 578 | } |
576 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { | 579 | if (befs_bt_read_node(sb, ds, this_node, *node_off) != BEFS_OK) { |
577 | befs_error(sb, "befs_btree_seekleaf() failed to read " | 580 | befs_error(sb, "%s failed to read " |
578 | "node at %Lu", *node_off); | 581 | "node at %llu", __func__, *node_off); |
579 | goto error; | 582 | goto error; |
580 | } | 583 | } |
581 | 584 | ||
582 | befs_debug(sb, "Seekleaf to child node %Lu", *node_off); | 585 | befs_debug(sb, "Seekleaf to child node %llu", *node_off); |
583 | } | 586 | } |
584 | befs_debug(sb, "Node %Lu is a leaf node", *node_off); | 587 | befs_debug(sb, "Node %llu is a leaf node", *node_off); |
585 | 588 | ||
586 | return BEFS_OK; | 589 | return BEFS_OK; |
587 | 590 | ||
588 | error: | 591 | error: |
589 | befs_debug(sb, "<--- befs_btree_seekleaf() ERROR"); | 592 | befs_debug(sb, "<--- %s ERROR", __func__); |
590 | return BEFS_ERR; | 593 | return BEFS_ERR; |
591 | } | 594 | } |
592 | 595 | ||
diff --git a/fs/befs/datastream.c b/fs/befs/datastream.c index 59096b5e0fc7..c467bebd50af 100644 --- a/fs/befs/datastream.c +++ b/fs/befs/datastream.c | |||
@@ -52,26 +52,25 @@ befs_read_datastream(struct super_block *sb, befs_data_stream * ds, | |||
52 | befs_block_run run; | 52 | befs_block_run run; |
53 | befs_blocknr_t block; /* block coresponding to pos */ | 53 | befs_blocknr_t block; /* block coresponding to pos */ |
54 | 54 | ||
55 | befs_debug(sb, "---> befs_read_datastream() %Lu", pos); | 55 | befs_debug(sb, "---> %s %llu", __func__, pos); |
56 | block = pos >> BEFS_SB(sb)->block_shift; | 56 | block = pos >> BEFS_SB(sb)->block_shift; |
57 | if (off) | 57 | if (off) |
58 | *off = pos - (block << BEFS_SB(sb)->block_shift); | 58 | *off = pos - (block << BEFS_SB(sb)->block_shift); |
59 | 59 | ||
60 | if (befs_fblock2brun(sb, ds, block, &run) != BEFS_OK) { | 60 | if (befs_fblock2brun(sb, ds, block, &run) != BEFS_OK) { |
61 | befs_error(sb, "BeFS: Error finding disk addr of block %lu", | 61 | befs_error(sb, "BeFS: Error finding disk addr of block %lu", |
62 | block); | 62 | (unsigned long)block); |
63 | befs_debug(sb, "<--- befs_read_datastream() ERROR"); | 63 | befs_debug(sb, "<--- %s ERROR", __func__); |
64 | return NULL; | 64 | return NULL; |
65 | } | 65 | } |
66 | bh = befs_bread_iaddr(sb, run); | 66 | bh = befs_bread_iaddr(sb, run); |
67 | if (!bh) { | 67 | if (!bh) { |
68 | befs_error(sb, "BeFS: Error reading block %lu from datastream", | 68 | befs_error(sb, "BeFS: Error reading block %lu from datastream", |
69 | block); | 69 | (unsigned long)block); |
70 | return NULL; | 70 | return NULL; |
71 | } | 71 | } |
72 | 72 | ||
73 | befs_debug(sb, "<--- befs_read_datastream() read data, starting at %Lu", | 73 | befs_debug(sb, "<--- %s read data, starting at %llu", __func__, pos); |
74 | pos); | ||
75 | 74 | ||
76 | return bh; | 75 | return bh; |
77 | } | 76 | } |
@@ -106,7 +105,8 @@ befs_fblock2brun(struct super_block *sb, befs_data_stream * data, | |||
106 | } else { | 105 | } else { |
107 | befs_error(sb, | 106 | befs_error(sb, |
108 | "befs_fblock2brun() was asked to find block %lu, " | 107 | "befs_fblock2brun() was asked to find block %lu, " |
109 | "which is not mapped by the datastream\n", fblock); | 108 | "which is not mapped by the datastream\n", |
109 | (unsigned long)fblock); | ||
110 | err = BEFS_ERR; | 110 | err = BEFS_ERR; |
111 | } | 111 | } |
112 | return err; | 112 | return err; |
@@ -128,14 +128,14 @@ befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, | |||
128 | befs_off_t bytes_read = 0; /* bytes readed */ | 128 | befs_off_t bytes_read = 0; /* bytes readed */ |
129 | u16 plen; | 129 | u16 plen; |
130 | struct buffer_head *bh = NULL; | 130 | struct buffer_head *bh = NULL; |
131 | befs_debug(sb, "---> befs_read_lsymlink() length: %Lu", len); | 131 | befs_debug(sb, "---> %s length: %llu", __func__, len); |
132 | 132 | ||
133 | while (bytes_read < len) { | 133 | while (bytes_read < len) { |
134 | bh = befs_read_datastream(sb, ds, bytes_read, NULL); | 134 | bh = befs_read_datastream(sb, ds, bytes_read, NULL); |
135 | if (!bh) { | 135 | if (!bh) { |
136 | befs_error(sb, "BeFS: Error reading datastream block " | 136 | befs_error(sb, "BeFS: Error reading datastream block " |
137 | "starting from %Lu", bytes_read); | 137 | "starting from %llu", bytes_read); |
138 | befs_debug(sb, "<--- befs_read_lsymlink() ERROR"); | 138 | befs_debug(sb, "<--- %s ERROR", __func__); |
139 | return bytes_read; | 139 | return bytes_read; |
140 | 140 | ||
141 | } | 141 | } |
@@ -146,7 +146,8 @@ befs_read_lsymlink(struct super_block * sb, befs_data_stream * ds, void *buff, | |||
146 | bytes_read += plen; | 146 | bytes_read += plen; |
147 | } | 147 | } |
148 | 148 | ||
149 | befs_debug(sb, "<--- befs_read_lsymlink() read %u bytes", bytes_read); | 149 | befs_debug(sb, "<--- %s read %u bytes", __func__, (unsigned int) |
150 | bytes_read); | ||
150 | return bytes_read; | 151 | return bytes_read; |
151 | } | 152 | } |
152 | 153 | ||
@@ -169,7 +170,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) | |||
169 | befs_blocknr_t metablocks; /* FS metadata blocks */ | 170 | befs_blocknr_t metablocks; /* FS metadata blocks */ |
170 | befs_sb_info *befs_sb = BEFS_SB(sb); | 171 | befs_sb_info *befs_sb = BEFS_SB(sb); |
171 | 172 | ||
172 | befs_debug(sb, "---> befs_count_blocks()"); | 173 | befs_debug(sb, "---> %s", __func__); |
173 | 174 | ||
174 | datablocks = ds->size >> befs_sb->block_shift; | 175 | datablocks = ds->size >> befs_sb->block_shift; |
175 | if (ds->size & (befs_sb->block_size - 1)) | 176 | if (ds->size & (befs_sb->block_size - 1)) |
@@ -206,7 +207,7 @@ befs_count_blocks(struct super_block * sb, befs_data_stream * ds) | |||
206 | } | 207 | } |
207 | 208 | ||
208 | blocks = datablocks + metablocks; | 209 | blocks = datablocks + metablocks; |
209 | befs_debug(sb, "<--- befs_count_blocks() %u blocks", blocks); | 210 | befs_debug(sb, "<--- %s %u blocks", __func__, (unsigned int)blocks); |
210 | 211 | ||
211 | return blocks; | 212 | return blocks; |
212 | } | 213 | } |
@@ -251,11 +252,11 @@ befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, | |||
251 | befs_blocknr_t max_block = | 252 | befs_blocknr_t max_block = |
252 | data->max_direct_range >> BEFS_SB(sb)->block_shift; | 253 | data->max_direct_range >> BEFS_SB(sb)->block_shift; |
253 | 254 | ||
254 | befs_debug(sb, "---> befs_find_brun_direct(), find %lu", blockno); | 255 | befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno); |
255 | 256 | ||
256 | if (blockno > max_block) { | 257 | if (blockno > max_block) { |
257 | befs_error(sb, "befs_find_brun_direct() passed block outside of" | 258 | befs_error(sb, "%s passed block outside of direct region", |
258 | "direct region"); | 259 | __func__); |
259 | return BEFS_ERR; | 260 | return BEFS_ERR; |
260 | } | 261 | } |
261 | 262 | ||
@@ -267,13 +268,14 @@ befs_find_brun_direct(struct super_block *sb, befs_data_stream * data, | |||
267 | run->start = array[i].start + offset; | 268 | run->start = array[i].start + offset; |
268 | run->len = array[i].len - offset; | 269 | run->len = array[i].len - offset; |
269 | 270 | ||
270 | befs_debug(sb, "---> befs_find_brun_direct(), " | 271 | befs_debug(sb, "---> %s, " |
271 | "found %lu at direct[%d]", blockno, i); | 272 | "found %lu at direct[%d]", __func__, |
273 | (unsigned long)blockno, i); | ||
272 | return BEFS_OK; | 274 | return BEFS_OK; |
273 | } | 275 | } |
274 | } | 276 | } |
275 | 277 | ||
276 | befs_debug(sb, "---> befs_find_brun_direct() ERROR"); | 278 | befs_debug(sb, "---> %s ERROR", __func__); |
277 | return BEFS_ERR; | 279 | return BEFS_ERR; |
278 | } | 280 | } |
279 | 281 | ||
@@ -316,7 +318,7 @@ befs_find_brun_indirect(struct super_block *sb, | |||
316 | befs_blocknr_t indirblockno = iaddr2blockno(sb, &indirect); | 318 | befs_blocknr_t indirblockno = iaddr2blockno(sb, &indirect); |
317 | int arraylen = befs_iaddrs_per_block(sb); | 319 | int arraylen = befs_iaddrs_per_block(sb); |
318 | 320 | ||
319 | befs_debug(sb, "---> befs_find_brun_indirect(), find %lu", blockno); | 321 | befs_debug(sb, "---> %s, find %lu", __func__, (unsigned long)blockno); |
320 | 322 | ||
321 | indir_start_blk = data->max_direct_range >> BEFS_SB(sb)->block_shift; | 323 | indir_start_blk = data->max_direct_range >> BEFS_SB(sb)->block_shift; |
322 | search_blk = blockno - indir_start_blk; | 324 | search_blk = blockno - indir_start_blk; |
@@ -325,10 +327,9 @@ befs_find_brun_indirect(struct super_block *sb, | |||
325 | for (i = 0; i < indirect.len; i++) { | 327 | for (i = 0; i < indirect.len; i++) { |
326 | indirblock = befs_bread(sb, indirblockno + i); | 328 | indirblock = befs_bread(sb, indirblockno + i); |
327 | if (indirblock == NULL) { | 329 | if (indirblock == NULL) { |
328 | befs_debug(sb, | 330 | befs_debug(sb, "---> %s failed to read " |
329 | "---> befs_find_brun_indirect() failed to " | 331 | "disk block %lu from the indirect brun", |
330 | "read disk block %lu from the indirect brun", | 332 | __func__, (unsigned long)indirblockno + i); |
331 | indirblockno + i); | ||
332 | return BEFS_ERR; | 333 | return BEFS_ERR; |
333 | } | 334 | } |
334 | 335 | ||
@@ -348,9 +349,10 @@ befs_find_brun_indirect(struct super_block *sb, | |||
348 | 349 | ||
349 | brelse(indirblock); | 350 | brelse(indirblock); |
350 | befs_debug(sb, | 351 | befs_debug(sb, |
351 | "<--- befs_find_brun_indirect() found " | 352 | "<--- %s found file block " |
352 | "file block %lu at indirect[%d]", | 353 | "%lu at indirect[%d]", __func__, |
353 | blockno, j + (i * arraylen)); | 354 | (unsigned long)blockno, |
355 | j + (i * arraylen)); | ||
354 | return BEFS_OK; | 356 | return BEFS_OK; |
355 | } | 357 | } |
356 | sum += len; | 358 | sum += len; |
@@ -360,10 +362,10 @@ befs_find_brun_indirect(struct super_block *sb, | |||
360 | } | 362 | } |
361 | 363 | ||
362 | /* Only fallthrough is an error */ | 364 | /* Only fallthrough is an error */ |
363 | befs_error(sb, "BeFS: befs_find_brun_indirect() failed to find " | 365 | befs_error(sb, "BeFS: %s failed to find " |
364 | "file block %lu", blockno); | 366 | "file block %lu", __func__, (unsigned long)blockno); |
365 | 367 | ||
366 | befs_debug(sb, "<--- befs_find_brun_indirect() ERROR"); | 368 | befs_debug(sb, "<--- %s ERROR", __func__); |
367 | return BEFS_ERR; | 369 | return BEFS_ERR; |
368 | } | 370 | } |
369 | 371 | ||
@@ -444,7 +446,7 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
444 | size_t diblklen = iblklen * befs_iaddrs_per_block(sb) | 446 | size_t diblklen = iblklen * befs_iaddrs_per_block(sb) |
445 | * BEFS_DBLINDIR_BRUN_LEN; | 447 | * BEFS_DBLINDIR_BRUN_LEN; |
446 | 448 | ||
447 | befs_debug(sb, "---> befs_find_brun_dblindirect() find %lu", blockno); | 449 | befs_debug(sb, "---> %s find %lu", __func__, (unsigned long)blockno); |
448 | 450 | ||
449 | /* First, discover which of the double_indir->indir blocks | 451 | /* First, discover which of the double_indir->indir blocks |
450 | * contains pos. Then figure out how much of pos that | 452 | * contains pos. Then figure out how much of pos that |
@@ -460,8 +462,9 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
460 | dbl_which_block = dblindir_indx / befs_iaddrs_per_block(sb); | 462 | dbl_which_block = dblindir_indx / befs_iaddrs_per_block(sb); |
461 | if (dbl_which_block > data->double_indirect.len) { | 463 | if (dbl_which_block > data->double_indirect.len) { |
462 | befs_error(sb, "The double-indirect index calculated by " | 464 | befs_error(sb, "The double-indirect index calculated by " |
463 | "befs_read_brun_dblindirect(), %d, is outside the range " | 465 | "%s, %d, is outside the range " |
464 | "of the double-indirect block", dblindir_indx); | 466 | "of the double-indirect block", __func__, |
467 | dblindir_indx); | ||
465 | return BEFS_ERR; | 468 | return BEFS_ERR; |
466 | } | 469 | } |
467 | 470 | ||
@@ -469,10 +472,10 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
469 | befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) + | 472 | befs_bread(sb, iaddr2blockno(sb, &data->double_indirect) + |
470 | dbl_which_block); | 473 | dbl_which_block); |
471 | if (dbl_indir_block == NULL) { | 474 | if (dbl_indir_block == NULL) { |
472 | befs_error(sb, "befs_read_brun_dblindirect() couldn't read the " | 475 | befs_error(sb, "%s couldn't read the " |
473 | "double-indirect block at blockno %lu", | 476 | "double-indirect block at blockno %lu", __func__, |
474 | iaddr2blockno(sb, | 477 | (unsigned long) |
475 | &data->double_indirect) + | 478 | iaddr2blockno(sb, &data->double_indirect) + |
476 | dbl_which_block); | 479 | dbl_which_block); |
477 | brelse(dbl_indir_block); | 480 | brelse(dbl_indir_block); |
478 | return BEFS_ERR; | 481 | return BEFS_ERR; |
@@ -489,16 +492,16 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
489 | which_block = indir_indx / befs_iaddrs_per_block(sb); | 492 | which_block = indir_indx / befs_iaddrs_per_block(sb); |
490 | if (which_block > indir_run.len) { | 493 | if (which_block > indir_run.len) { |
491 | befs_error(sb, "The indirect index calculated by " | 494 | befs_error(sb, "The indirect index calculated by " |
492 | "befs_read_brun_dblindirect(), %d, is outside the range " | 495 | "%s, %d, is outside the range " |
493 | "of the indirect block", indir_indx); | 496 | "of the indirect block", __func__, indir_indx); |
494 | return BEFS_ERR; | 497 | return BEFS_ERR; |
495 | } | 498 | } |
496 | 499 | ||
497 | indir_block = | 500 | indir_block = |
498 | befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block); | 501 | befs_bread(sb, iaddr2blockno(sb, &indir_run) + which_block); |
499 | if (indir_block == NULL) { | 502 | if (indir_block == NULL) { |
500 | befs_error(sb, "befs_read_brun_dblindirect() couldn't read the " | 503 | befs_error(sb, "%s couldn't read the indirect block " |
501 | "indirect block at blockno %lu", | 504 | "at blockno %lu", __func__, (unsigned long) |
502 | iaddr2blockno(sb, &indir_run) + which_block); | 505 | iaddr2blockno(sb, &indir_run) + which_block); |
503 | brelse(indir_block); | 506 | brelse(indir_block); |
504 | return BEFS_ERR; | 507 | return BEFS_ERR; |
@@ -519,7 +522,7 @@ befs_find_brun_dblindirect(struct super_block *sb, | |||
519 | run->len -= offset; | 522 | run->len -= offset; |
520 | 523 | ||
521 | befs_debug(sb, "Found file block %lu in double_indirect[%d][%d]," | 524 | befs_debug(sb, "Found file block %lu in double_indirect[%d][%d]," |
522 | " double_indirect_leftover = %lu", | 525 | " double_indirect_leftover = %lu", (unsigned long) |
523 | blockno, dblindir_indx, indir_indx, dblindir_leftover); | 526 | blockno, dblindir_indx, indir_indx, dblindir_leftover); |
524 | 527 | ||
525 | return BEFS_OK; | 528 | return BEFS_OK; |
diff --git a/fs/befs/debug.c b/fs/befs/debug.c index 622e73775c83..4de7cffcd662 100644 --- a/fs/befs/debug.c +++ b/fs/befs/debug.c | |||
@@ -10,6 +10,7 @@ | |||
10 | * debug functions | 10 | * debug functions |
11 | */ | 11 | */ |
12 | 12 | ||
13 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
13 | #ifdef __KERNEL__ | 14 | #ifdef __KERNEL__ |
14 | 15 | ||
15 | #include <stdarg.h> | 16 | #include <stdarg.h> |
@@ -23,43 +24,30 @@ | |||
23 | 24 | ||
24 | #include "befs.h" | 25 | #include "befs.h" |
25 | 26 | ||
26 | #define ERRBUFSIZE 1024 | ||
27 | |||
28 | void | 27 | void |
29 | befs_error(const struct super_block *sb, const char *fmt, ...) | 28 | befs_error(const struct super_block *sb, const char *fmt, ...) |
30 | { | 29 | { |
30 | struct va_format vaf; | ||
31 | va_list args; | 31 | va_list args; |
32 | char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | ||
33 | if (err_buf == NULL) { | ||
34 | printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE); | ||
35 | return; | ||
36 | } | ||
37 | 32 | ||
38 | va_start(args, fmt); | 33 | va_start(args, fmt); |
39 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | 34 | vaf.fmt = fmt; |
35 | vaf.va = &args; | ||
36 | pr_err("(%s): %pV\n", sb->s_id, &vaf); | ||
40 | va_end(args); | 37 | va_end(args); |
41 | |||
42 | printk(KERN_ERR "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
43 | kfree(err_buf); | ||
44 | } | 38 | } |
45 | 39 | ||
46 | void | 40 | void |
47 | befs_warning(const struct super_block *sb, const char *fmt, ...) | 41 | befs_warning(const struct super_block *sb, const char *fmt, ...) |
48 | { | 42 | { |
43 | struct va_format vaf; | ||
49 | va_list args; | 44 | va_list args; |
50 | char *err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | ||
51 | if (err_buf == NULL) { | ||
52 | printk(KERN_ERR "could not allocate %d bytes\n", ERRBUFSIZE); | ||
53 | return; | ||
54 | } | ||
55 | 45 | ||
56 | va_start(args, fmt); | 46 | va_start(args, fmt); |
57 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | 47 | vaf.fmt = fmt; |
48 | vaf.va = &args; | ||
49 | pr_warn("(%s): %pV\n", sb->s_id, &vaf); | ||
58 | va_end(args); | 50 | va_end(args); |
59 | |||
60 | printk(KERN_WARNING "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
61 | |||
62 | kfree(err_buf); | ||
63 | } | 51 | } |
64 | 52 | ||
65 | void | 53 | void |
@@ -67,25 +55,13 @@ befs_debug(const struct super_block *sb, const char *fmt, ...) | |||
67 | { | 55 | { |
68 | #ifdef CONFIG_BEFS_DEBUG | 56 | #ifdef CONFIG_BEFS_DEBUG |
69 | 57 | ||
58 | struct va_format vaf; | ||
70 | va_list args; | 59 | va_list args; |
71 | char *err_buf = NULL; | 60 | va_start(args, fmt); |
72 | 61 | vaf.fmt = fmt; | |
73 | if (BEFS_SB(sb)->mount_opts.debug) { | 62 | vaf.va = &args; |
74 | err_buf = kmalloc(ERRBUFSIZE, GFP_KERNEL); | 63 | pr_debug("(%s): %pV\n", sb->s_id, &vaf); |
75 | if (err_buf == NULL) { | 64 | va_end(args); |
76 | printk(KERN_ERR "could not allocate %d bytes\n", | ||
77 | ERRBUFSIZE); | ||
78 | return; | ||
79 | } | ||
80 | |||
81 | va_start(args, fmt); | ||
82 | vsnprintf(err_buf, ERRBUFSIZE, fmt, args); | ||
83 | va_end(args); | ||
84 | |||
85 | printk(KERN_DEBUG "BeFS(%s): %s\n", sb->s_id, err_buf); | ||
86 | |||
87 | kfree(err_buf); | ||
88 | } | ||
89 | 65 | ||
90 | #endif //CONFIG_BEFS_DEBUG | 66 | #endif //CONFIG_BEFS_DEBUG |
91 | } | 67 | } |
@@ -109,9 +85,9 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
109 | befs_debug(sb, " gid %u", fs32_to_cpu(sb, inode->gid)); | 85 | befs_debug(sb, " gid %u", fs32_to_cpu(sb, inode->gid)); |
110 | befs_debug(sb, " mode %08x", fs32_to_cpu(sb, inode->mode)); | 86 | befs_debug(sb, " mode %08x", fs32_to_cpu(sb, inode->mode)); |
111 | befs_debug(sb, " flags %08x", fs32_to_cpu(sb, inode->flags)); | 87 | befs_debug(sb, " flags %08x", fs32_to_cpu(sb, inode->flags)); |
112 | befs_debug(sb, " create_time %Lu", | 88 | befs_debug(sb, " create_time %llu", |
113 | fs64_to_cpu(sb, inode->create_time)); | 89 | fs64_to_cpu(sb, inode->create_time)); |
114 | befs_debug(sb, " last_modified_time %Lu", | 90 | befs_debug(sb, " last_modified_time %llu", |
115 | fs64_to_cpu(sb, inode->last_modified_time)); | 91 | fs64_to_cpu(sb, inode->last_modified_time)); |
116 | 92 | ||
117 | tmp_run = fsrun_to_cpu(sb, inode->parent); | 93 | tmp_run = fsrun_to_cpu(sb, inode->parent); |
@@ -137,7 +113,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
137 | tmp_run.allocation_group, tmp_run.start, | 113 | tmp_run.allocation_group, tmp_run.start, |
138 | tmp_run.len); | 114 | tmp_run.len); |
139 | } | 115 | } |
140 | befs_debug(sb, " max_direct_range %Lu", | 116 | befs_debug(sb, " max_direct_range %llu", |
141 | fs64_to_cpu(sb, | 117 | fs64_to_cpu(sb, |
142 | inode->data.datastream. | 118 | inode->data.datastream. |
143 | max_direct_range)); | 119 | max_direct_range)); |
@@ -147,7 +123,7 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
147 | tmp_run.allocation_group, | 123 | tmp_run.allocation_group, |
148 | tmp_run.start, tmp_run.len); | 124 | tmp_run.start, tmp_run.len); |
149 | 125 | ||
150 | befs_debug(sb, " max_indirect_range %Lu", | 126 | befs_debug(sb, " max_indirect_range %llu", |
151 | fs64_to_cpu(sb, | 127 | fs64_to_cpu(sb, |
152 | inode->data.datastream. | 128 | inode->data.datastream. |
153 | max_indirect_range)); | 129 | max_indirect_range)); |
@@ -158,12 +134,12 @@ befs_dump_inode(const struct super_block *sb, befs_inode * inode) | |||
158 | tmp_run.allocation_group, tmp_run.start, | 134 | tmp_run.allocation_group, tmp_run.start, |
159 | tmp_run.len); | 135 | tmp_run.len); |
160 | 136 | ||
161 | befs_debug(sb, " max_double_indirect_range %Lu", | 137 | befs_debug(sb, " max_double_indirect_range %llu", |
162 | fs64_to_cpu(sb, | 138 | fs64_to_cpu(sb, |
163 | inode->data.datastream. | 139 | inode->data.datastream. |
164 | max_double_indirect_range)); | 140 | max_double_indirect_range)); |
165 | 141 | ||
166 | befs_debug(sb, " size %Lu", | 142 | befs_debug(sb, " size %llu", |
167 | fs64_to_cpu(sb, inode->data.datastream.size)); | 143 | fs64_to_cpu(sb, inode->data.datastream.size)); |
168 | } | 144 | } |
169 | 145 | ||
@@ -191,8 +167,8 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) | |||
191 | befs_debug(sb, " block_size %u", fs32_to_cpu(sb, sup->block_size)); | 167 | befs_debug(sb, " block_size %u", fs32_to_cpu(sb, sup->block_size)); |
192 | befs_debug(sb, " block_shift %u", fs32_to_cpu(sb, sup->block_shift)); | 168 | befs_debug(sb, " block_shift %u", fs32_to_cpu(sb, sup->block_shift)); |
193 | 169 | ||
194 | befs_debug(sb, " num_blocks %Lu", fs64_to_cpu(sb, sup->num_blocks)); | 170 | befs_debug(sb, " num_blocks %llu", fs64_to_cpu(sb, sup->num_blocks)); |
195 | befs_debug(sb, " used_blocks %Lu", fs64_to_cpu(sb, sup->used_blocks)); | 171 | befs_debug(sb, " used_blocks %llu", fs64_to_cpu(sb, sup->used_blocks)); |
196 | 172 | ||
197 | befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2)); | 173 | befs_debug(sb, " magic2 %08x", fs32_to_cpu(sb, sup->magic2)); |
198 | befs_debug(sb, " blocks_per_ag %u", | 174 | befs_debug(sb, " blocks_per_ag %u", |
@@ -206,8 +182,8 @@ befs_dump_super_block(const struct super_block *sb, befs_super_block * sup) | |||
206 | befs_debug(sb, " log_blocks %u, %hu, %hu", | 182 | befs_debug(sb, " log_blocks %u, %hu, %hu", |
207 | tmp_run.allocation_group, tmp_run.start, tmp_run.len); | 183 | tmp_run.allocation_group, tmp_run.start, tmp_run.len); |
208 | 184 | ||
209 | befs_debug(sb, " log_start %Ld", fs64_to_cpu(sb, sup->log_start)); | 185 | befs_debug(sb, " log_start %lld", fs64_to_cpu(sb, sup->log_start)); |
210 | befs_debug(sb, " log_end %Ld", fs64_to_cpu(sb, sup->log_end)); | 186 | befs_debug(sb, " log_end %lld", fs64_to_cpu(sb, sup->log_end)); |
211 | 187 | ||
212 | befs_debug(sb, " magic3 %08x", fs32_to_cpu(sb, sup->magic3)); | 188 | befs_debug(sb, " magic3 %08x", fs32_to_cpu(sb, sup->magic3)); |
213 | 189 | ||
diff --git a/fs/befs/inode.c b/fs/befs/inode.c index 94c17f9a9576..fa4b718de597 100644 --- a/fs/befs/inode.c +++ b/fs/befs/inode.c | |||
@@ -25,7 +25,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
25 | /* check magic header. */ | 25 | /* check magic header. */ |
26 | if (magic1 != BEFS_INODE_MAGIC1) { | 26 | if (magic1 != BEFS_INODE_MAGIC1) { |
27 | befs_error(sb, | 27 | befs_error(sb, |
28 | "Inode has a bad magic header - inode = %lu", inode); | 28 | "Inode has a bad magic header - inode = %lu", |
29 | (unsigned long)inode); | ||
29 | return BEFS_BAD_INODE; | 30 | return BEFS_BAD_INODE; |
30 | } | 31 | } |
31 | 32 | ||
@@ -34,8 +35,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
34 | */ | 35 | */ |
35 | if (inode != iaddr2blockno(sb, &ino_num)) { | 36 | if (inode != iaddr2blockno(sb, &ino_num)) { |
36 | befs_error(sb, "inode blocknr field disagrees with vfs " | 37 | befs_error(sb, "inode blocknr field disagrees with vfs " |
37 | "VFS: %lu, Inode %lu", | 38 | "VFS: %lu, Inode %lu", (unsigned long) |
38 | inode, iaddr2blockno(sb, &ino_num)); | 39 | inode, (unsigned long)iaddr2blockno(sb, &ino_num)); |
39 | return BEFS_BAD_INODE; | 40 | return BEFS_BAD_INODE; |
40 | } | 41 | } |
41 | 42 | ||
@@ -44,7 +45,8 @@ befs_check_inode(struct super_block *sb, befs_inode * raw_inode, | |||
44 | */ | 45 | */ |
45 | 46 | ||
46 | if (!(flags & BEFS_INODE_IN_USE)) { | 47 | if (!(flags & BEFS_INODE_IN_USE)) { |
47 | befs_error(sb, "inode is not used - inode = %lu", inode); | 48 | befs_error(sb, "inode is not used - inode = %lu", |
49 | (unsigned long)inode); | ||
48 | return BEFS_BAD_INODE; | 50 | return BEFS_BAD_INODE; |
49 | } | 51 | } |
50 | 52 | ||
diff --git a/fs/befs/io.c b/fs/befs/io.c index ddef98aa255d..0408a3d601d0 100644 --- a/fs/befs/io.c +++ b/fs/befs/io.c | |||
@@ -30,9 +30,9 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr) | |||
30 | befs_blocknr_t block = 0; | 30 | befs_blocknr_t block = 0; |
31 | befs_sb_info *befs_sb = BEFS_SB(sb); | 31 | befs_sb_info *befs_sb = BEFS_SB(sb); |
32 | 32 | ||
33 | befs_debug(sb, "---> Enter befs_read_iaddr() " | 33 | befs_debug(sb, "---> Enter %s " |
34 | "[%u, %hu, %hu]", | 34 | "[%u, %hu, %hu]", __func__, iaddr.allocation_group, |
35 | iaddr.allocation_group, iaddr.start, iaddr.len); | 35 | iaddr.start, iaddr.len); |
36 | 36 | ||
37 | if (iaddr.allocation_group > befs_sb->num_ags) { | 37 | if (iaddr.allocation_group > befs_sb->num_ags) { |
38 | befs_error(sb, "BEFS: Invalid allocation group %u, max is %u", | 38 | befs_error(sb, "BEFS: Invalid allocation group %u, max is %u", |
@@ -42,20 +42,21 @@ befs_bread_iaddr(struct super_block *sb, befs_inode_addr iaddr) | |||
42 | 42 | ||
43 | block = iaddr2blockno(sb, &iaddr); | 43 | block = iaddr2blockno(sb, &iaddr); |
44 | 44 | ||
45 | befs_debug(sb, "befs_read_iaddr: offset = %lu", block); | 45 | befs_debug(sb, "%s: offset = %lu", __func__, (unsigned long)block); |
46 | 46 | ||
47 | bh = sb_bread(sb, block); | 47 | bh = sb_bread(sb, block); |
48 | 48 | ||
49 | if (bh == NULL) { | 49 | if (bh == NULL) { |
50 | befs_error(sb, "Failed to read block %lu", block); | 50 | befs_error(sb, "Failed to read block %lu", |
51 | (unsigned long)block); | ||
51 | goto error; | 52 | goto error; |
52 | } | 53 | } |
53 | 54 | ||
54 | befs_debug(sb, "<--- befs_read_iaddr()"); | 55 | befs_debug(sb, "<--- %s", __func__); |
55 | return bh; | 56 | return bh; |
56 | 57 | ||
57 | error: | 58 | error: |
58 | befs_debug(sb, "<--- befs_read_iaddr() ERROR"); | 59 | befs_debug(sb, "<--- %s ERROR", __func__); |
59 | return NULL; | 60 | return NULL; |
60 | } | 61 | } |
61 | 62 | ||
@@ -64,20 +65,21 @@ befs_bread(struct super_block *sb, befs_blocknr_t block) | |||
64 | { | 65 | { |
65 | struct buffer_head *bh = NULL; | 66 | struct buffer_head *bh = NULL; |
66 | 67 | ||
67 | befs_debug(sb, "---> Enter befs_read() %Lu", block); | 68 | befs_debug(sb, "---> Enter %s %lu", __func__, (unsigned long)block); |
68 | 69 | ||
69 | bh = sb_bread(sb, block); | 70 | bh = sb_bread(sb, block); |
70 | 71 | ||
71 | if (bh == NULL) { | 72 | if (bh == NULL) { |
72 | befs_error(sb, "Failed to read block %lu", block); | 73 | befs_error(sb, "Failed to read block %lu", |
74 | (unsigned long)block); | ||
73 | goto error; | 75 | goto error; |
74 | } | 76 | } |
75 | 77 | ||
76 | befs_debug(sb, "<--- befs_read()"); | 78 | befs_debug(sb, "<--- %s", __func__); |
77 | 79 | ||
78 | return bh; | 80 | return bh; |
79 | 81 | ||
80 | error: | 82 | error: |
81 | befs_debug(sb, "<--- befs_read() ERROR"); | 83 | befs_debug(sb, "<--- %s ERROR", __func__); |
82 | return NULL; | 84 | return NULL; |
83 | } | 85 | } |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 845d2d690ce2..d626756ff721 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
@@ -5,6 +5,8 @@ | |||
5 | * | 5 | * |
6 | */ | 6 | */ |
7 | 7 | ||
8 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
9 | |||
8 | #include <linux/module.h> | 10 | #include <linux/module.h> |
9 | #include <linux/slab.h> | 11 | #include <linux/slab.h> |
10 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
@@ -39,7 +41,6 @@ static struct dentry *befs_lookup(struct inode *, struct dentry *, unsigned int) | |||
39 | static struct inode *befs_iget(struct super_block *, unsigned long); | 41 | static struct inode *befs_iget(struct super_block *, unsigned long); |
40 | static struct inode *befs_alloc_inode(struct super_block *sb); | 42 | static struct inode *befs_alloc_inode(struct super_block *sb); |
41 | static void befs_destroy_inode(struct inode *inode); | 43 | static void befs_destroy_inode(struct inode *inode); |
42 | static int befs_init_inodecache(void); | ||
43 | static void befs_destroy_inodecache(void); | 44 | static void befs_destroy_inodecache(void); |
44 | static void *befs_follow_link(struct dentry *, struct nameidata *); | 45 | static void *befs_follow_link(struct dentry *, struct nameidata *); |
45 | static void *befs_fast_follow_link(struct dentry *, struct nameidata *); | 46 | static void *befs_fast_follow_link(struct dentry *, struct nameidata *); |
@@ -131,26 +132,28 @@ befs_get_block(struct inode *inode, sector_t block, | |||
131 | ulong disk_off; | 132 | ulong disk_off; |
132 | 133 | ||
133 | befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", | 134 | befs_debug(sb, "---> befs_get_block() for inode %lu, block %ld", |
134 | inode->i_ino, block); | 135 | (unsigned long)inode->i_ino, (long)block); |
135 | 136 | ||
136 | if (block < 0) { | 137 | if (block < 0) { |
137 | befs_error(sb, "befs_get_block() was asked for a block " | 138 | befs_error(sb, "befs_get_block() was asked for a block " |
138 | "number less than zero: block %ld in inode %lu", | 139 | "number less than zero: block %ld in inode %lu", |
139 | block, inode->i_ino); | 140 | (long)block, (unsigned long)inode->i_ino); |
140 | return -EIO; | 141 | return -EIO; |
141 | } | 142 | } |
142 | 143 | ||
143 | if (create) { | 144 | if (create) { |
144 | befs_error(sb, "befs_get_block() was asked to write to " | 145 | befs_error(sb, "befs_get_block() was asked to write to " |
145 | "block %ld in inode %lu", block, inode->i_ino); | 146 | "block %ld in inode %lu", (long)block, |
147 | (unsigned long)inode->i_ino); | ||
146 | return -EPERM; | 148 | return -EPERM; |
147 | } | 149 | } |
148 | 150 | ||
149 | res = befs_fblock2brun(sb, ds, block, &run); | 151 | res = befs_fblock2brun(sb, ds, block, &run); |
150 | if (res != BEFS_OK) { | 152 | if (res != BEFS_OK) { |
151 | befs_error(sb, | 153 | befs_error(sb, |
152 | "<--- befs_get_block() for inode %lu, block " | 154 | "<--- %s for inode %lu, block %ld ERROR", |
153 | "%ld ERROR", inode->i_ino, block); | 155 | __func__, (unsigned long)inode->i_ino, |
156 | (long)block); | ||
154 | return -EFBIG; | 157 | return -EFBIG; |
155 | } | 158 | } |
156 | 159 | ||
@@ -158,8 +161,9 @@ befs_get_block(struct inode *inode, sector_t block, | |||
158 | 161 | ||
159 | map_bh(bh_result, inode->i_sb, disk_off); | 162 | map_bh(bh_result, inode->i_sb, disk_off); |
160 | 163 | ||
161 | befs_debug(sb, "<--- befs_get_block() for inode %lu, block %ld, " | 164 | befs_debug(sb, "<--- %s for inode %lu, block %ld, disk address %lu", |
162 | "disk address %lu", inode->i_ino, block, disk_off); | 165 | __func__, (unsigned long)inode->i_ino, (long)block, |
166 | (unsigned long)disk_off); | ||
163 | 167 | ||
164 | return 0; | 168 | return 0; |
165 | } | 169 | } |
@@ -176,15 +180,15 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
176 | char *utfname; | 180 | char *utfname; |
177 | const char *name = dentry->d_name.name; | 181 | const char *name = dentry->d_name.name; |
178 | 182 | ||
179 | befs_debug(sb, "---> befs_lookup() " | 183 | befs_debug(sb, "---> %s name %s inode %ld", __func__, |
180 | "name %s inode %ld", dentry->d_name.name, dir->i_ino); | 184 | dentry->d_name.name, dir->i_ino); |
181 | 185 | ||
182 | /* Convert to UTF-8 */ | 186 | /* Convert to UTF-8 */ |
183 | if (BEFS_SB(sb)->nls) { | 187 | if (BEFS_SB(sb)->nls) { |
184 | ret = | 188 | ret = |
185 | befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); | 189 | befs_nls2utf(sb, name, strlen(name), &utfname, &utfnamelen); |
186 | if (ret < 0) { | 190 | if (ret < 0) { |
187 | befs_debug(sb, "<--- befs_lookup() ERROR"); | 191 | befs_debug(sb, "<--- %s ERROR", __func__); |
188 | return ERR_PTR(ret); | 192 | return ERR_PTR(ret); |
189 | } | 193 | } |
190 | ret = befs_btree_find(sb, ds, utfname, &offset); | 194 | ret = befs_btree_find(sb, ds, utfname, &offset); |
@@ -195,12 +199,12 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
195 | } | 199 | } |
196 | 200 | ||
197 | if (ret == BEFS_BT_NOT_FOUND) { | 201 | if (ret == BEFS_BT_NOT_FOUND) { |
198 | befs_debug(sb, "<--- befs_lookup() %s not found", | 202 | befs_debug(sb, "<--- %s %s not found", __func__, |
199 | dentry->d_name.name); | 203 | dentry->d_name.name); |
200 | return ERR_PTR(-ENOENT); | 204 | return ERR_PTR(-ENOENT); |
201 | 205 | ||
202 | } else if (ret != BEFS_OK || offset == 0) { | 206 | } else if (ret != BEFS_OK || offset == 0) { |
203 | befs_warning(sb, "<--- befs_lookup() Error"); | 207 | befs_warning(sb, "<--- %s Error", __func__); |
204 | return ERR_PTR(-ENODATA); | 208 | return ERR_PTR(-ENODATA); |
205 | } | 209 | } |
206 | 210 | ||
@@ -210,7 +214,7 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
210 | 214 | ||
211 | d_add(dentry, inode); | 215 | d_add(dentry, inode); |
212 | 216 | ||
213 | befs_debug(sb, "<--- befs_lookup()"); | 217 | befs_debug(sb, "<--- %s", __func__); |
214 | 218 | ||
215 | return NULL; | 219 | return NULL; |
216 | } | 220 | } |
@@ -228,26 +232,25 @@ befs_readdir(struct file *file, struct dir_context *ctx) | |||
228 | char keybuf[BEFS_NAME_LEN + 1]; | 232 | char keybuf[BEFS_NAME_LEN + 1]; |
229 | const char *dirname = file->f_path.dentry->d_name.name; | 233 | const char *dirname = file->f_path.dentry->d_name.name; |
230 | 234 | ||
231 | befs_debug(sb, "---> befs_readdir() " | 235 | befs_debug(sb, "---> %s name %s, inode %ld, ctx->pos %lld", |
232 | "name %s, inode %ld, ctx->pos %Ld", | 236 | __func__, dirname, inode->i_ino, ctx->pos); |
233 | dirname, inode->i_ino, ctx->pos); | ||
234 | 237 | ||
235 | more: | 238 | more: |
236 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, | 239 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, |
237 | keybuf, &keysize, &value); | 240 | keybuf, &keysize, &value); |
238 | 241 | ||
239 | if (result == BEFS_ERR) { | 242 | if (result == BEFS_ERR) { |
240 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 243 | befs_debug(sb, "<--- %s ERROR", __func__); |
241 | befs_error(sb, "IO error reading %s (inode %lu)", | 244 | befs_error(sb, "IO error reading %s (inode %lu)", |
242 | dirname, inode->i_ino); | 245 | dirname, inode->i_ino); |
243 | return -EIO; | 246 | return -EIO; |
244 | 247 | ||
245 | } else if (result == BEFS_BT_END) { | 248 | } else if (result == BEFS_BT_END) { |
246 | befs_debug(sb, "<--- befs_readdir() END"); | 249 | befs_debug(sb, "<--- %s END", __func__); |
247 | return 0; | 250 | return 0; |
248 | 251 | ||
249 | } else if (result == BEFS_BT_EMPTY) { | 252 | } else if (result == BEFS_BT_EMPTY) { |
250 | befs_debug(sb, "<--- befs_readdir() Empty directory"); | 253 | befs_debug(sb, "<--- %s Empty directory", __func__); |
251 | return 0; | 254 | return 0; |
252 | } | 255 | } |
253 | 256 | ||
@@ -260,7 +263,7 @@ more: | |||
260 | result = | 263 | result = |
261 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); | 264 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); |
262 | if (result < 0) { | 265 | if (result < 0) { |
263 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 266 | befs_debug(sb, "<--- %s ERROR", __func__); |
264 | return result; | 267 | return result; |
265 | } | 268 | } |
266 | if (!dir_emit(ctx, nlsname, nlsnamelen, | 269 | if (!dir_emit(ctx, nlsname, nlsnamelen, |
@@ -277,7 +280,7 @@ more: | |||
277 | ctx->pos++; | 280 | ctx->pos++; |
278 | goto more; | 281 | goto more; |
279 | 282 | ||
280 | befs_debug(sb, "<--- befs_readdir() pos %Ld", ctx->pos); | 283 | befs_debug(sb, "<--- %s pos %lld", __func__, ctx->pos); |
281 | 284 | ||
282 | return 0; | 285 | return 0; |
283 | } | 286 | } |
@@ -321,7 +324,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
321 | struct inode *inode; | 324 | struct inode *inode; |
322 | long ret = -EIO; | 325 | long ret = -EIO; |
323 | 326 | ||
324 | befs_debug(sb, "---> befs_read_inode() " "inode = %lu", ino); | 327 | befs_debug(sb, "---> %s inode = %lu", __func__, ino); |
325 | 328 | ||
326 | inode = iget_locked(sb, ino); | 329 | inode = iget_locked(sb, ino); |
327 | if (!inode) | 330 | if (!inode) |
@@ -428,7 +431,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
428 | } | 431 | } |
429 | 432 | ||
430 | brelse(bh); | 433 | brelse(bh); |
431 | befs_debug(sb, "<--- befs_read_inode()"); | 434 | befs_debug(sb, "<--- %s", __func__); |
432 | unlock_new_inode(inode); | 435 | unlock_new_inode(inode); |
433 | return inode; | 436 | return inode; |
434 | 437 | ||
@@ -437,7 +440,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
437 | 440 | ||
438 | unacquire_none: | 441 | unacquire_none: |
439 | iget_failed(inode); | 442 | iget_failed(inode); |
440 | befs_debug(sb, "<--- befs_read_inode() - Bad inode"); | 443 | befs_debug(sb, "<--- %s - Bad inode", __func__); |
441 | return ERR_PTR(ret); | 444 | return ERR_PTR(ret); |
442 | } | 445 | } |
443 | 446 | ||
@@ -445,7 +448,7 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) | |||
445 | * | 448 | * |
446 | * Taken from NFS implementation by Al Viro. | 449 | * Taken from NFS implementation by Al Viro. |
447 | */ | 450 | */ |
448 | static int | 451 | static int __init |
449 | befs_init_inodecache(void) | 452 | befs_init_inodecache(void) |
450 | { | 453 | { |
451 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", | 454 | befs_inode_cachep = kmem_cache_create("befs_inode_cache", |
@@ -454,11 +457,9 @@ befs_init_inodecache(void) | |||
454 | SLAB_MEM_SPREAD), | 457 | SLAB_MEM_SPREAD), |
455 | init_once); | 458 | init_once); |
456 | if (befs_inode_cachep == NULL) { | 459 | if (befs_inode_cachep == NULL) { |
457 | printk(KERN_ERR "befs_init_inodecache: " | 460 | pr_err("%s: Couldn't initialize inode slabcache\n", __func__); |
458 | "Couldn't initialize inode slabcache\n"); | ||
459 | return -ENOMEM; | 461 | return -ENOMEM; |
460 | } | 462 | } |
461 | |||
462 | return 0; | 463 | return 0; |
463 | } | 464 | } |
464 | 465 | ||
@@ -544,16 +545,16 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
544 | */ | 545 | */ |
545 | int maxlen = in_len + 1; | 546 | int maxlen = in_len + 1; |
546 | 547 | ||
547 | befs_debug(sb, "---> utf2nls()"); | 548 | befs_debug(sb, "---> %s", __func__); |
548 | 549 | ||
549 | if (!nls) { | 550 | if (!nls) { |
550 | befs_error(sb, "befs_utf2nls called with no NLS table loaded"); | 551 | befs_error(sb, "%s called with no NLS table loaded", __func__); |
551 | return -EINVAL; | 552 | return -EINVAL; |
552 | } | 553 | } |
553 | 554 | ||
554 | *out = result = kmalloc(maxlen, GFP_NOFS); | 555 | *out = result = kmalloc(maxlen, GFP_NOFS); |
555 | if (!*out) { | 556 | if (!*out) { |
556 | befs_error(sb, "befs_utf2nls() cannot allocate memory"); | 557 | befs_error(sb, "%s cannot allocate memory", __func__); |
557 | *out_len = 0; | 558 | *out_len = 0; |
558 | return -ENOMEM; | 559 | return -ENOMEM; |
559 | } | 560 | } |
@@ -575,14 +576,14 @@ befs_utf2nls(struct super_block *sb, const char *in, | |||
575 | result[o] = '\0'; | 576 | result[o] = '\0'; |
576 | *out_len = o; | 577 | *out_len = o; |
577 | 578 | ||
578 | befs_debug(sb, "<--- utf2nls()"); | 579 | befs_debug(sb, "<--- %s", __func__); |
579 | 580 | ||
580 | return o; | 581 | return o; |
581 | 582 | ||
582 | conv_err: | 583 | conv_err: |
583 | befs_error(sb, "Name using character set %s contains a character that " | 584 | befs_error(sb, "Name using character set %s contains a character that " |
584 | "cannot be converted to unicode.", nls->charset); | 585 | "cannot be converted to unicode.", nls->charset); |
585 | befs_debug(sb, "<--- utf2nls()"); | 586 | befs_debug(sb, "<--- %s", __func__); |
586 | kfree(result); | 587 | kfree(result); |
587 | return -EILSEQ; | 588 | return -EILSEQ; |
588 | } | 589 | } |
@@ -623,16 +624,17 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
623 | * in special cases */ | 624 | * in special cases */ |
624 | int maxlen = (3 * in_len) + 1; | 625 | int maxlen = (3 * in_len) + 1; |
625 | 626 | ||
626 | befs_debug(sb, "---> nls2utf()\n"); | 627 | befs_debug(sb, "---> %s\n", __func__); |
627 | 628 | ||
628 | if (!nls) { | 629 | if (!nls) { |
629 | befs_error(sb, "befs_nls2utf called with no NLS table loaded."); | 630 | befs_error(sb, "%s called with no NLS table loaded.", |
631 | __func__); | ||
630 | return -EINVAL; | 632 | return -EINVAL; |
631 | } | 633 | } |
632 | 634 | ||
633 | *out = result = kmalloc(maxlen, GFP_NOFS); | 635 | *out = result = kmalloc(maxlen, GFP_NOFS); |
634 | if (!*out) { | 636 | if (!*out) { |
635 | befs_error(sb, "befs_nls2utf() cannot allocate memory"); | 637 | befs_error(sb, "%s cannot allocate memory", __func__); |
636 | *out_len = 0; | 638 | *out_len = 0; |
637 | return -ENOMEM; | 639 | return -ENOMEM; |
638 | } | 640 | } |
@@ -653,14 +655,14 @@ befs_nls2utf(struct super_block *sb, const char *in, | |||
653 | result[o] = '\0'; | 655 | result[o] = '\0'; |
654 | *out_len = o; | 656 | *out_len = o; |
655 | 657 | ||
656 | befs_debug(sb, "<--- nls2utf()"); | 658 | befs_debug(sb, "<--- %s", __func__); |
657 | 659 | ||
658 | return i; | 660 | return i; |
659 | 661 | ||
660 | conv_err: | 662 | conv_err: |
661 | befs_error(sb, "Name using charecter set %s contains a charecter that " | 663 | befs_error(sb, "Name using charecter set %s contains a charecter that " |
662 | "cannot be converted to unicode.", nls->charset); | 664 | "cannot be converted to unicode.", nls->charset); |
663 | befs_debug(sb, "<--- nls2utf()"); | 665 | befs_debug(sb, "<--- %s", __func__); |
664 | kfree(result); | 666 | kfree(result); |
665 | return -EILSEQ; | 667 | return -EILSEQ; |
666 | } | 668 | } |
@@ -715,8 +717,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
715 | if (option >= 0) | 717 | if (option >= 0) |
716 | uid = make_kuid(current_user_ns(), option); | 718 | uid = make_kuid(current_user_ns(), option); |
717 | if (!uid_valid(uid)) { | 719 | if (!uid_valid(uid)) { |
718 | printk(KERN_ERR "BeFS: Invalid uid %d, " | 720 | pr_err("Invalid uid %d, " |
719 | "using default\n", option); | 721 | "using default\n", option); |
720 | break; | 722 | break; |
721 | } | 723 | } |
722 | opts->uid = uid; | 724 | opts->uid = uid; |
@@ -729,8 +731,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
729 | if (option >= 0) | 731 | if (option >= 0) |
730 | gid = make_kgid(current_user_ns(), option); | 732 | gid = make_kgid(current_user_ns(), option); |
731 | if (!gid_valid(gid)) { | 733 | if (!gid_valid(gid)) { |
732 | printk(KERN_ERR "BeFS: Invalid gid %d, " | 734 | pr_err("Invalid gid %d, " |
733 | "using default\n", option); | 735 | "using default\n", option); |
734 | break; | 736 | break; |
735 | } | 737 | } |
736 | opts->gid = gid; | 738 | opts->gid = gid; |
@@ -740,8 +742,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
740 | kfree(opts->iocharset); | 742 | kfree(opts->iocharset); |
741 | opts->iocharset = match_strdup(&args[0]); | 743 | opts->iocharset = match_strdup(&args[0]); |
742 | if (!opts->iocharset) { | 744 | if (!opts->iocharset) { |
743 | printk(KERN_ERR "BeFS: allocation failure for " | 745 | pr_err("allocation failure for " |
744 | "iocharset string\n"); | 746 | "iocharset string\n"); |
745 | return 0; | 747 | return 0; |
746 | } | 748 | } |
747 | break; | 749 | break; |
@@ -749,8 +751,8 @@ parse_options(char *options, befs_mount_options * opts) | |||
749 | opts->debug = 1; | 751 | opts->debug = 1; |
750 | break; | 752 | break; |
751 | default: | 753 | default: |
752 | printk(KERN_ERR "BeFS: Unrecognized mount option \"%s\" " | 754 | pr_err("Unrecognized mount option \"%s\" " |
753 | "or missing value\n", p); | 755 | "or missing value\n", p); |
754 | return 0; | 756 | return 0; |
755 | } | 757 | } |
756 | } | 758 | } |
@@ -791,22 +793,20 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
791 | 793 | ||
792 | save_mount_options(sb, data); | 794 | save_mount_options(sb, data); |
793 | 795 | ||
794 | sb->s_fs_info = kmalloc(sizeof (*befs_sb), GFP_KERNEL); | 796 | sb->s_fs_info = kzalloc(sizeof(*befs_sb), GFP_KERNEL); |
795 | if (sb->s_fs_info == NULL) { | 797 | if (sb->s_fs_info == NULL) { |
796 | printk(KERN_ERR | 798 | pr_err("(%s): Unable to allocate memory for private " |
797 | "BeFS(%s): Unable to allocate memory for private " | ||
798 | "portion of superblock. Bailing.\n", sb->s_id); | 799 | "portion of superblock. Bailing.\n", sb->s_id); |
799 | goto unacquire_none; | 800 | goto unacquire_none; |
800 | } | 801 | } |
801 | befs_sb = BEFS_SB(sb); | 802 | befs_sb = BEFS_SB(sb); |
802 | memset(befs_sb, 0, sizeof(befs_sb_info)); | ||
803 | 803 | ||
804 | if (!parse_options((char *) data, &befs_sb->mount_opts)) { | 804 | if (!parse_options((char *) data, &befs_sb->mount_opts)) { |
805 | befs_error(sb, "cannot parse mount options"); | 805 | befs_error(sb, "cannot parse mount options"); |
806 | goto unacquire_priv_sbp; | 806 | goto unacquire_priv_sbp; |
807 | } | 807 | } |
808 | 808 | ||
809 | befs_debug(sb, "---> befs_fill_super()"); | 809 | befs_debug(sb, "---> %s", __func__); |
810 | 810 | ||
811 | #ifndef CONFIG_BEFS_RW | 811 | #ifndef CONFIG_BEFS_RW |
812 | if (!(sb->s_flags & MS_RDONLY)) { | 812 | if (!(sb->s_flags & MS_RDONLY)) { |
@@ -854,7 +854,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
854 | goto unacquire_priv_sbp; | 854 | goto unacquire_priv_sbp; |
855 | 855 | ||
856 | if( befs_sb->num_blocks > ~((sector_t)0) ) { | 856 | if( befs_sb->num_blocks > ~((sector_t)0) ) { |
857 | befs_error(sb, "blocks count: %Lu " | 857 | befs_error(sb, "blocks count: %llu " |
858 | "is larger than the host can use", | 858 | "is larger than the host can use", |
859 | befs_sb->num_blocks); | 859 | befs_sb->num_blocks); |
860 | goto unacquire_priv_sbp; | 860 | goto unacquire_priv_sbp; |
@@ -913,6 +913,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
913 | static int | 913 | static int |
914 | befs_remount(struct super_block *sb, int *flags, char *data) | 914 | befs_remount(struct super_block *sb, int *flags, char *data) |
915 | { | 915 | { |
916 | sync_filesystem(sb); | ||
916 | if (!(*flags & MS_RDONLY)) | 917 | if (!(*flags & MS_RDONLY)) |
917 | return -EINVAL; | 918 | return -EINVAL; |
918 | return 0; | 919 | return 0; |
@@ -924,7 +925,7 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
924 | struct super_block *sb = dentry->d_sb; | 925 | struct super_block *sb = dentry->d_sb; |
925 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 926 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
926 | 927 | ||
927 | befs_debug(sb, "---> befs_statfs()"); | 928 | befs_debug(sb, "---> %s", __func__); |
928 | 929 | ||
929 | buf->f_type = BEFS_SUPER_MAGIC; | 930 | buf->f_type = BEFS_SUPER_MAGIC; |
930 | buf->f_bsize = sb->s_blocksize; | 931 | buf->f_bsize = sb->s_blocksize; |
@@ -937,7 +938,7 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
937 | buf->f_fsid.val[1] = (u32)(id >> 32); | 938 | buf->f_fsid.val[1] = (u32)(id >> 32); |
938 | buf->f_namelen = BEFS_NAME_LEN; | 939 | buf->f_namelen = BEFS_NAME_LEN; |
939 | 940 | ||
940 | befs_debug(sb, "<--- befs_statfs()"); | 941 | befs_debug(sb, "<--- %s", __func__); |
941 | 942 | ||
942 | return 0; | 943 | return 0; |
943 | } | 944 | } |
@@ -963,7 +964,7 @@ init_befs_fs(void) | |||
963 | { | 964 | { |
964 | int err; | 965 | int err; |
965 | 966 | ||
966 | printk(KERN_INFO "BeFS version: %s\n", BEFS_VERSION); | 967 | pr_info("version: %s\n", BEFS_VERSION); |
967 | 968 | ||
968 | err = befs_init_inodecache(); | 969 | err = befs_init_inodecache(); |
969 | if (err) | 970 | if (err) |
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c index 8defc6b3f9a2..7041ac35ace8 100644 --- a/fs/bfs/inode.c +++ b/fs/bfs/inode.c | |||
@@ -172,7 +172,7 @@ static void bfs_evict_inode(struct inode *inode) | |||
172 | 172 | ||
173 | dprintf("ino=%08lx\n", ino); | 173 | dprintf("ino=%08lx\n", ino); |
174 | 174 | ||
175 | truncate_inode_pages(&inode->i_data, 0); | 175 | truncate_inode_pages_final(&inode->i_data); |
176 | invalidate_inode_buffers(inode); | 176 | invalidate_inode_buffers(inode); |
177 | clear_inode(inode); | 177 | clear_inode(inode); |
178 | 178 | ||
@@ -266,7 +266,7 @@ static void init_once(void *foo) | |||
266 | inode_init_once(&bi->vfs_inode); | 266 | inode_init_once(&bi->vfs_inode); |
267 | } | 267 | } |
268 | 268 | ||
269 | static int init_inodecache(void) | 269 | static int __init init_inodecache(void) |
270 | { | 270 | { |
271 | bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", | 271 | bfs_inode_cachep = kmem_cache_create("bfs_inode_cache", |
272 | sizeof(struct bfs_inode_info), | 272 | sizeof(struct bfs_inode_info), |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 67be2951b98a..aa3cb626671e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
@@ -46,10 +46,15 @@ | |||
46 | #endif | 46 | #endif |
47 | 47 | ||
48 | static int load_elf_binary(struct linux_binprm *bprm); | 48 | static int load_elf_binary(struct linux_binprm *bprm); |
49 | static int load_elf_library(struct file *); | ||
50 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, | 49 | static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *, |
51 | int, int, unsigned long); | 50 | int, int, unsigned long); |
52 | 51 | ||
52 | #ifdef CONFIG_USELIB | ||
53 | static int load_elf_library(struct file *); | ||
54 | #else | ||
55 | #define load_elf_library NULL | ||
56 | #endif | ||
57 | |||
53 | /* | 58 | /* |
54 | * If we don't support core dumping, then supply a NULL so we | 59 | * If we don't support core dumping, then supply a NULL so we |
55 | * don't even try. | 60 | * don't even try. |
@@ -579,7 +584,6 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
579 | unsigned long start_code, end_code, start_data, end_data; | 584 | unsigned long start_code, end_code, start_data, end_data; |
580 | unsigned long reloc_func_desc __maybe_unused = 0; | 585 | unsigned long reloc_func_desc __maybe_unused = 0; |
581 | int executable_stack = EXSTACK_DEFAULT; | 586 | int executable_stack = EXSTACK_DEFAULT; |
582 | unsigned long def_flags = 0; | ||
583 | struct pt_regs *regs = current_pt_regs(); | 587 | struct pt_regs *regs = current_pt_regs(); |
584 | struct { | 588 | struct { |
585 | struct elfhdr elf_ex; | 589 | struct elfhdr elf_ex; |
@@ -719,9 +723,6 @@ static int load_elf_binary(struct linux_binprm *bprm) | |||
719 | if (retval) | 723 | if (retval) |
720 | goto out_free_dentry; | 724 | goto out_free_dentry; |
721 | 725 | ||
722 | /* OK, This is the point of no return */ | ||
723 | current->mm->def_flags = def_flags; | ||
724 | |||
725 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages | 726 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
726 | may depend on the personality. */ | 727 | may depend on the personality. */ |
727 | SET_PERSONALITY(loc->elf_ex); | 728 | SET_PERSONALITY(loc->elf_ex); |
@@ -1005,6 +1006,7 @@ out_free_ph: | |||
1005 | goto out; | 1006 | goto out; |
1006 | } | 1007 | } |
1007 | 1008 | ||
1009 | #ifdef CONFIG_USELIB | ||
1008 | /* This is really simpleminded and specialized - we are loading an | 1010 | /* This is really simpleminded and specialized - we are loading an |
1009 | a.out library that is given an ELF header. */ | 1011 | a.out library that is given an ELF header. */ |
1010 | static int load_elf_library(struct file *file) | 1012 | static int load_elf_library(struct file *file) |
@@ -1083,6 +1085,7 @@ out_free_ph: | |||
1083 | out: | 1085 | out: |
1084 | return error; | 1086 | return error; |
1085 | } | 1087 | } |
1088 | #endif /* #ifdef CONFIG_USELIB */ | ||
1086 | 1089 | ||
1087 | #ifdef CONFIG_ELF_CORE | 1090 | #ifdef CONFIG_ELF_CORE |
1088 | /* | 1091 | /* |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 1c740e152f38..b60500300dd7 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
@@ -656,6 +656,7 @@ static ssize_t bm_status_write(struct file * file, const char __user * buffer, | |||
656 | 656 | ||
657 | mutex_unlock(&root->d_inode->i_mutex); | 657 | mutex_unlock(&root->d_inode->i_mutex); |
658 | dput(root); | 658 | dput(root); |
659 | break; | ||
659 | default: return res; | 660 | default: return res; |
660 | } | 661 | } |
661 | return count; | 662 | return count; |
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 4f70f383132c..29696b78d1f4 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
@@ -301,25 +301,25 @@ int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len) | |||
301 | EXPORT_SYMBOL(bio_integrity_get_tag); | 301 | EXPORT_SYMBOL(bio_integrity_get_tag); |
302 | 302 | ||
303 | /** | 303 | /** |
304 | * bio_integrity_generate - Generate integrity metadata for a bio | 304 | * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio |
305 | * @bio: bio to generate integrity metadata for | 305 | * @bio: bio to generate/verify integrity metadata for |
306 | * | 306 | * @operate: operate number, 1 for generate, 0 for verify |
307 | * Description: Generates integrity metadata for a bio by calling the | ||
308 | * block device's generation callback function. The bio must have a | ||
309 | * bip attached with enough room to accommodate the generated | ||
310 | * integrity metadata. | ||
311 | */ | 307 | */ |
312 | static void bio_integrity_generate(struct bio *bio) | 308 | static int bio_integrity_generate_verify(struct bio *bio, int operate) |
313 | { | 309 | { |
314 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | 310 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); |
315 | struct blk_integrity_exchg bix; | 311 | struct blk_integrity_exchg bix; |
316 | struct bio_vec bv; | 312 | struct bio_vec bv; |
317 | struct bvec_iter iter; | 313 | struct bvec_iter iter; |
318 | sector_t sector = bio->bi_iter.bi_sector; | 314 | sector_t sector; |
319 | unsigned int sectors, total; | 315 | unsigned int sectors, ret = 0; |
320 | void *prot_buf = bio->bi_integrity->bip_buf; | 316 | void *prot_buf = bio->bi_integrity->bip_buf; |
321 | 317 | ||
322 | total = 0; | 318 | if (operate) |
319 | sector = bio->bi_iter.bi_sector; | ||
320 | else | ||
321 | sector = bio->bi_integrity->bip_iter.bi_sector; | ||
322 | |||
323 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | 323 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; |
324 | bix.sector_size = bi->sector_size; | 324 | bix.sector_size = bi->sector_size; |
325 | 325 | ||
@@ -330,16 +330,37 @@ static void bio_integrity_generate(struct bio *bio) | |||
330 | bix.prot_buf = prot_buf; | 330 | bix.prot_buf = prot_buf; |
331 | bix.sector = sector; | 331 | bix.sector = sector; |
332 | 332 | ||
333 | bi->generate_fn(&bix); | 333 | if (operate) { |
334 | bi->generate_fn(&bix); | ||
335 | } else { | ||
336 | ret = bi->verify_fn(&bix); | ||
337 | if (ret) { | ||
338 | kunmap_atomic(kaddr); | ||
339 | return ret; | ||
340 | } | ||
341 | } | ||
334 | 342 | ||
335 | sectors = bv.bv_len / bi->sector_size; | 343 | sectors = bv.bv_len / bi->sector_size; |
336 | sector += sectors; | 344 | sector += sectors; |
337 | prot_buf += sectors * bi->tuple_size; | 345 | prot_buf += sectors * bi->tuple_size; |
338 | total += sectors * bi->tuple_size; | ||
339 | BUG_ON(total > bio->bi_integrity->bip_iter.bi_size); | ||
340 | 346 | ||
341 | kunmap_atomic(kaddr); | 347 | kunmap_atomic(kaddr); |
342 | } | 348 | } |
349 | return ret; | ||
350 | } | ||
351 | |||
352 | /** | ||
353 | * bio_integrity_generate - Generate integrity metadata for a bio | ||
354 | * @bio: bio to generate integrity metadata for | ||
355 | * | ||
356 | * Description: Generates integrity metadata for a bio by calling the | ||
357 | * block device's generation callback function. The bio must have a | ||
358 | * bip attached with enough room to accommodate the generated | ||
359 | * integrity metadata. | ||
360 | */ | ||
361 | static void bio_integrity_generate(struct bio *bio) | ||
362 | { | ||
363 | bio_integrity_generate_verify(bio, 1); | ||
343 | } | 364 | } |
344 | 365 | ||
345 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | 366 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) |
@@ -454,40 +475,7 @@ EXPORT_SYMBOL(bio_integrity_prep); | |||
454 | */ | 475 | */ |
455 | static int bio_integrity_verify(struct bio *bio) | 476 | static int bio_integrity_verify(struct bio *bio) |
456 | { | 477 | { |
457 | struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev); | 478 | return bio_integrity_generate_verify(bio, 0); |
458 | struct blk_integrity_exchg bix; | ||
459 | struct bio_vec *bv; | ||
460 | sector_t sector = bio->bi_integrity->bip_iter.bi_sector; | ||
461 | unsigned int sectors, ret = 0; | ||
462 | void *prot_buf = bio->bi_integrity->bip_buf; | ||
463 | int i; | ||
464 | |||
465 | bix.disk_name = bio->bi_bdev->bd_disk->disk_name; | ||
466 | bix.sector_size = bi->sector_size; | ||
467 | |||
468 | bio_for_each_segment_all(bv, bio, i) { | ||
469 | void *kaddr = kmap_atomic(bv->bv_page); | ||
470 | |||
471 | bix.data_buf = kaddr + bv->bv_offset; | ||
472 | bix.data_size = bv->bv_len; | ||
473 | bix.prot_buf = prot_buf; | ||
474 | bix.sector = sector; | ||
475 | |||
476 | ret = bi->verify_fn(&bix); | ||
477 | |||
478 | if (ret) { | ||
479 | kunmap_atomic(kaddr); | ||
480 | return ret; | ||
481 | } | ||
482 | |||
483 | sectors = bv->bv_len / bi->sector_size; | ||
484 | sector += sectors; | ||
485 | prot_buf += sectors * bi->tuple_size; | ||
486 | |||
487 | kunmap_atomic(kaddr); | ||
488 | } | ||
489 | |||
490 | return ret; | ||
491 | } | 479 | } |
492 | 480 | ||
493 | /** | 481 | /** |
@@ -116,7 +116,6 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) | |||
116 | if (!slab) | 116 | if (!slab) |
117 | goto out_unlock; | 117 | goto out_unlock; |
118 | 118 | ||
119 | printk(KERN_INFO "bio: create slab <%s> at %d\n", bslab->name, entry); | ||
120 | bslab->slab = slab; | 119 | bslab->slab = slab; |
121 | bslab->slab_ref = 1; | 120 | bslab->slab_ref = 1; |
122 | bslab->slab_size = sz; | 121 | bslab->slab_size = sz; |
@@ -1970,7 +1969,7 @@ int bio_associate_current(struct bio *bio) | |||
1970 | 1969 | ||
1971 | /* associate blkcg if exists */ | 1970 | /* associate blkcg if exists */ |
1972 | rcu_read_lock(); | 1971 | rcu_read_lock(); |
1973 | css = task_css(current, blkio_subsys_id); | 1972 | css = task_css(current, blkio_cgrp_id); |
1974 | if (css && css_tryget(css)) | 1973 | if (css && css_tryget(css)) |
1975 | bio->bi_css = css; | 1974 | bio->bi_css = css; |
1976 | rcu_read_unlock(); | 1975 | rcu_read_unlock(); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index 1e86823a9cbd..ba0d2b05bb78 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
@@ -83,7 +83,7 @@ void kill_bdev(struct block_device *bdev) | |||
83 | { | 83 | { |
84 | struct address_space *mapping = bdev->bd_inode->i_mapping; | 84 | struct address_space *mapping = bdev->bd_inode->i_mapping; |
85 | 85 | ||
86 | if (mapping->nrpages == 0) | 86 | if (mapping->nrpages == 0 && mapping->nrshadows == 0) |
87 | return; | 87 | return; |
88 | 88 | ||
89 | invalidate_bh_lrus(); | 89 | invalidate_bh_lrus(); |
@@ -419,7 +419,7 @@ static void bdev_evict_inode(struct inode *inode) | |||
419 | { | 419 | { |
420 | struct block_device *bdev = &BDEV_I(inode)->bdev; | 420 | struct block_device *bdev = &BDEV_I(inode)->bdev; |
421 | struct list_head *p; | 421 | struct list_head *p; |
422 | truncate_inode_pages(&inode->i_data, 0); | 422 | truncate_inode_pages_final(&inode->i_data); |
423 | invalidate_inode_buffers(inode); /* is it needed here? */ | 423 | invalidate_inode_buffers(inode); /* is it needed here? */ |
424 | clear_inode(inode); | 424 | clear_inode(inode); |
425 | spin_lock(&bdev_lock); | 425 | spin_lock(&bdev_lock); |
@@ -1523,7 +1523,7 @@ ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1523 | ssize_t err; | 1523 | ssize_t err; |
1524 | 1524 | ||
1525 | err = generic_write_sync(file, pos, ret); | 1525 | err = generic_write_sync(file, pos, ret); |
1526 | if (err < 0 && ret > 0) | 1526 | if (err < 0) |
1527 | ret = err; | 1527 | ret = err; |
1528 | } | 1528 | } |
1529 | blk_finish_plug(&plug); | 1529 | blk_finish_plug(&plug); |
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index c1e0b0caf9cc..ecb5832c0967 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
@@ -21,708 +22,313 @@ | |||
21 | #include <linux/list.h> | 22 | #include <linux/list.h> |
22 | #include <linux/spinlock.h> | 23 | #include <linux/spinlock.h> |
23 | #include <linux/freezer.h> | 24 | #include <linux/freezer.h> |
25 | #include <linux/workqueue.h> | ||
24 | #include "async-thread.h" | 26 | #include "async-thread.h" |
27 | #include "ctree.h" | ||
28 | |||
29 | #define WORK_DONE_BIT 0 | ||
30 | #define WORK_ORDER_DONE_BIT 1 | ||
31 | #define WORK_HIGH_PRIO_BIT 2 | ||
32 | |||
33 | #define NO_THRESHOLD (-1) | ||
34 | #define DFT_THRESHOLD (32) | ||
35 | |||
36 | struct __btrfs_workqueue { | ||
37 | struct workqueue_struct *normal_wq; | ||
38 | /* List head pointing to ordered work list */ | ||
39 | struct list_head ordered_list; | ||
40 | |||
41 | /* Spinlock for ordered_list */ | ||
42 | spinlock_t list_lock; | ||
43 | |||
44 | /* Thresholding related variants */ | ||
45 | atomic_t pending; | ||
46 | int max_active; | ||
47 | int current_max; | ||
48 | int thresh; | ||
49 | unsigned int count; | ||
50 | spinlock_t thres_lock; | ||
51 | }; | ||
25 | 52 | ||
26 | #define WORK_QUEUED_BIT 0 | 53 | struct btrfs_workqueue { |
27 | #define WORK_DONE_BIT 1 | 54 | struct __btrfs_workqueue *normal; |
28 | #define WORK_ORDER_DONE_BIT 2 | 55 | struct __btrfs_workqueue *high; |
29 | #define WORK_HIGH_PRIO_BIT 3 | 56 | }; |
30 | |||
31 | /* | ||
32 | * container for the kthread task pointer and the list of pending work | ||
33 | * One of these is allocated per thread. | ||
34 | */ | ||
35 | struct btrfs_worker_thread { | ||
36 | /* pool we belong to */ | ||
37 | struct btrfs_workers *workers; | ||
38 | |||
39 | /* list of struct btrfs_work that are waiting for service */ | ||
40 | struct list_head pending; | ||
41 | struct list_head prio_pending; | ||
42 | |||
43 | /* list of worker threads from struct btrfs_workers */ | ||
44 | struct list_head worker_list; | ||
45 | |||
46 | /* kthread */ | ||
47 | struct task_struct *task; | ||
48 | 57 | ||
49 | /* number of things on the pending list */ | 58 | static inline struct __btrfs_workqueue |
50 | atomic_t num_pending; | 59 | *__btrfs_alloc_workqueue(const char *name, int flags, int max_active, |
60 | int thresh) | ||
61 | { | ||
62 | struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
51 | 63 | ||
52 | /* reference counter for this struct */ | 64 | if (unlikely(!ret)) |
53 | atomic_t refs; | 65 | return NULL; |
54 | 66 | ||
55 | unsigned long sequence; | 67 | ret->max_active = max_active; |
68 | atomic_set(&ret->pending, 0); | ||
69 | if (thresh == 0) | ||
70 | thresh = DFT_THRESHOLD; | ||
71 | /* For low threshold, disabling threshold is a better choice */ | ||
72 | if (thresh < DFT_THRESHOLD) { | ||
73 | ret->current_max = max_active; | ||
74 | ret->thresh = NO_THRESHOLD; | ||
75 | } else { | ||
76 | ret->current_max = 1; | ||
77 | ret->thresh = thresh; | ||
78 | } | ||
56 | 79 | ||
57 | /* protects the pending list. */ | 80 | if (flags & WQ_HIGHPRI) |
58 | spinlock_t lock; | 81 | ret->normal_wq = alloc_workqueue("%s-%s-high", flags, |
82 | ret->max_active, | ||
83 | "btrfs", name); | ||
84 | else | ||
85 | ret->normal_wq = alloc_workqueue("%s-%s", flags, | ||
86 | ret->max_active, "btrfs", | ||
87 | name); | ||
88 | if (unlikely(!ret->normal_wq)) { | ||
89 | kfree(ret); | ||
90 | return NULL; | ||
91 | } | ||
59 | 92 | ||
60 | /* set to non-zero when this thread is already awake and kicking */ | 93 | INIT_LIST_HEAD(&ret->ordered_list); |
61 | int working; | 94 | spin_lock_init(&ret->list_lock); |
95 | spin_lock_init(&ret->thres_lock); | ||
96 | trace_btrfs_workqueue_alloc(ret, name, flags & WQ_HIGHPRI); | ||
97 | return ret; | ||
98 | } | ||
62 | 99 | ||
63 | /* are we currently idle */ | 100 | static inline void |
64 | int idle; | 101 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq); |
65 | }; | ||
66 | 102 | ||
67 | static int __btrfs_start_workers(struct btrfs_workers *workers); | 103 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
104 | int flags, | ||
105 | int max_active, | ||
106 | int thresh) | ||
107 | { | ||
108 | struct btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); | ||
68 | 109 | ||
69 | /* | 110 | if (unlikely(!ret)) |
70 | * btrfs_start_workers uses kthread_run, which can block waiting for memory | 111 | return NULL; |
71 | * for a very long time. It will actually throttle on page writeback, | ||
72 | * and so it may not make progress until after our btrfs worker threads | ||
73 | * process all of the pending work structs in their queue | ||
74 | * | ||
75 | * This means we can't use btrfs_start_workers from inside a btrfs worker | ||
76 | * thread that is used as part of cleaning dirty memory, which pretty much | ||
77 | * involves all of the worker threads. | ||
78 | * | ||
79 | * Instead we have a helper queue who never has more than one thread | ||
80 | * where we scheduler thread start operations. This worker_start struct | ||
81 | * is used to contain the work and hold a pointer to the queue that needs | ||
82 | * another worker. | ||
83 | */ | ||
84 | struct worker_start { | ||
85 | struct btrfs_work work; | ||
86 | struct btrfs_workers *queue; | ||
87 | }; | ||
88 | 112 | ||
89 | static void start_new_worker_func(struct btrfs_work *work) | 113 | ret->normal = __btrfs_alloc_workqueue(name, flags & ~WQ_HIGHPRI, |
90 | { | 114 | max_active, thresh); |
91 | struct worker_start *start; | 115 | if (unlikely(!ret->normal)) { |
92 | start = container_of(work, struct worker_start, work); | 116 | kfree(ret); |
93 | __btrfs_start_workers(start->queue); | 117 | return NULL; |
94 | kfree(start); | 118 | } |
95 | } | ||
96 | 119 | ||
97 | /* | 120 | if (flags & WQ_HIGHPRI) { |
98 | * helper function to move a thread onto the idle list after it | 121 | ret->high = __btrfs_alloc_workqueue(name, flags, max_active, |
99 | * has finished some requests. | 122 | thresh); |
100 | */ | 123 | if (unlikely(!ret->high)) { |
101 | static void check_idle_worker(struct btrfs_worker_thread *worker) | 124 | __btrfs_destroy_workqueue(ret->normal); |
102 | { | 125 | kfree(ret); |
103 | if (!worker->idle && atomic_read(&worker->num_pending) < | 126 | return NULL; |
104 | worker->workers->idle_thresh / 2) { | ||
105 | unsigned long flags; | ||
106 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
107 | worker->idle = 1; | ||
108 | |||
109 | /* the list may be empty if the worker is just starting */ | ||
110 | if (!list_empty(&worker->worker_list) && | ||
111 | !worker->workers->stopping) { | ||
112 | list_move(&worker->worker_list, | ||
113 | &worker->workers->idle_list); | ||
114 | } | 127 | } |
115 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
116 | } | 128 | } |
129 | return ret; | ||
117 | } | 130 | } |
118 | 131 | ||
119 | /* | 132 | /* |
120 | * helper function to move a thread off the idle list after new | 133 | * Hook for threshold which will be called in btrfs_queue_work. |
121 | * pending work is added. | 134 | * This hook WILL be called in IRQ handler context, |
135 | * so workqueue_set_max_active MUST NOT be called in this hook | ||
122 | */ | 136 | */ |
123 | static void check_busy_worker(struct btrfs_worker_thread *worker) | 137 | static inline void thresh_queue_hook(struct __btrfs_workqueue *wq) |
124 | { | 138 | { |
125 | if (worker->idle && atomic_read(&worker->num_pending) >= | 139 | if (wq->thresh == NO_THRESHOLD) |
126 | worker->workers->idle_thresh) { | 140 | return; |
127 | unsigned long flags; | 141 | atomic_inc(&wq->pending); |
128 | spin_lock_irqsave(&worker->workers->lock, flags); | ||
129 | worker->idle = 0; | ||
130 | |||
131 | if (!list_empty(&worker->worker_list) && | ||
132 | !worker->workers->stopping) { | ||
133 | list_move_tail(&worker->worker_list, | ||
134 | &worker->workers->worker_list); | ||
135 | } | ||
136 | spin_unlock_irqrestore(&worker->workers->lock, flags); | ||
137 | } | ||
138 | } | 142 | } |
139 | 143 | ||
140 | static void check_pending_worker_creates(struct btrfs_worker_thread *worker) | 144 | /* |
145 | * Hook for threshold which will be called before executing the work, | ||
146 | * This hook is called in kthread content. | ||
147 | * So workqueue_set_max_active is called here. | ||
148 | */ | ||
149 | static inline void thresh_exec_hook(struct __btrfs_workqueue *wq) | ||
141 | { | 150 | { |
142 | struct btrfs_workers *workers = worker->workers; | 151 | int new_max_active; |
143 | struct worker_start *start; | 152 | long pending; |
144 | unsigned long flags; | 153 | int need_change = 0; |
145 | 154 | ||
146 | rmb(); | 155 | if (wq->thresh == NO_THRESHOLD) |
147 | if (!workers->atomic_start_pending) | ||
148 | return; | 156 | return; |
149 | 157 | ||
150 | start = kzalloc(sizeof(*start), GFP_NOFS); | 158 | atomic_dec(&wq->pending); |
151 | if (!start) | 159 | spin_lock(&wq->thres_lock); |
152 | return; | 160 | /* |
153 | 161 | * Use wq->count to limit the calling frequency of | |
154 | start->work.func = start_new_worker_func; | 162 | * workqueue_set_max_active. |
155 | start->queue = workers; | 163 | */ |
156 | 164 | wq->count++; | |
157 | spin_lock_irqsave(&workers->lock, flags); | 165 | wq->count %= (wq->thresh / 4); |
158 | if (!workers->atomic_start_pending) | 166 | if (!wq->count) |
159 | goto out; | 167 | goto out; |
160 | 168 | new_max_active = wq->current_max; | |
161 | workers->atomic_start_pending = 0; | ||
162 | if (workers->num_workers + workers->num_workers_starting >= | ||
163 | workers->max_workers) | ||
164 | goto out; | ||
165 | |||
166 | workers->num_workers_starting += 1; | ||
167 | spin_unlock_irqrestore(&workers->lock, flags); | ||
168 | btrfs_queue_worker(workers->atomic_worker_start, &start->work); | ||
169 | return; | ||
170 | 169 | ||
170 | /* | ||
171 | * pending may be changed later, but it's OK since we really | ||
172 | * don't need it so accurate to calculate new_max_active. | ||
173 | */ | ||
174 | pending = atomic_read(&wq->pending); | ||
175 | if (pending > wq->thresh) | ||
176 | new_max_active++; | ||
177 | if (pending < wq->thresh / 2) | ||
178 | new_max_active--; | ||
179 | new_max_active = clamp_val(new_max_active, 1, wq->max_active); | ||
180 | if (new_max_active != wq->current_max) { | ||
181 | need_change = 1; | ||
182 | wq->current_max = new_max_active; | ||
183 | } | ||
171 | out: | 184 | out: |
172 | kfree(start); | 185 | spin_unlock(&wq->thres_lock); |
173 | spin_unlock_irqrestore(&workers->lock, flags); | 186 | |
187 | if (need_change) { | ||
188 | workqueue_set_max_active(wq->normal_wq, wq->current_max); | ||
189 | } | ||
174 | } | 190 | } |
175 | 191 | ||
176 | static noinline void run_ordered_completions(struct btrfs_workers *workers, | 192 | static void run_ordered_work(struct __btrfs_workqueue *wq) |
177 | struct btrfs_work *work) | ||
178 | { | 193 | { |
179 | if (!workers->ordered) | 194 | struct list_head *list = &wq->ordered_list; |
180 | return; | 195 | struct btrfs_work *work; |
181 | 196 | spinlock_t *lock = &wq->list_lock; | |
182 | set_bit(WORK_DONE_BIT, &work->flags); | 197 | unsigned long flags; |
183 | |||
184 | spin_lock(&workers->order_lock); | ||
185 | 198 | ||
186 | while (1) { | 199 | while (1) { |
187 | if (!list_empty(&workers->prio_order_list)) { | 200 | spin_lock_irqsave(lock, flags); |
188 | work = list_entry(workers->prio_order_list.next, | 201 | if (list_empty(list)) |
189 | struct btrfs_work, order_list); | ||
190 | } else if (!list_empty(&workers->order_list)) { | ||
191 | work = list_entry(workers->order_list.next, | ||
192 | struct btrfs_work, order_list); | ||
193 | } else { | ||
194 | break; | 202 | break; |
195 | } | 203 | work = list_entry(list->next, struct btrfs_work, |
204 | ordered_list); | ||
196 | if (!test_bit(WORK_DONE_BIT, &work->flags)) | 205 | if (!test_bit(WORK_DONE_BIT, &work->flags)) |
197 | break; | 206 | break; |
198 | 207 | ||
199 | /* we are going to call the ordered done function, but | 208 | /* |
209 | * we are going to call the ordered done function, but | ||
200 | * we leave the work item on the list as a barrier so | 210 | * we leave the work item on the list as a barrier so |
201 | * that later work items that are done don't have their | 211 | * that later work items that are done don't have their |
202 | * functions called before this one returns | 212 | * functions called before this one returns |
203 | */ | 213 | */ |
204 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) | 214 | if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags)) |
205 | break; | 215 | break; |
206 | 216 | trace_btrfs_ordered_sched(work); | |
207 | spin_unlock(&workers->order_lock); | 217 | spin_unlock_irqrestore(lock, flags); |
208 | |||
209 | work->ordered_func(work); | 218 | work->ordered_func(work); |
210 | 219 | ||
211 | /* now take the lock again and drop our item from the list */ | 220 | /* now take the lock again and drop our item from the list */ |
212 | spin_lock(&workers->order_lock); | 221 | spin_lock_irqsave(lock, flags); |
213 | list_del(&work->order_list); | 222 | list_del(&work->ordered_list); |
214 | spin_unlock(&workers->order_lock); | 223 | spin_unlock_irqrestore(lock, flags); |
215 | 224 | ||
216 | /* | 225 | /* |
217 | * we don't want to call the ordered free functions | 226 | * we don't want to call the ordered free functions |
218 | * with the lock held though | 227 | * with the lock held though |
219 | */ | 228 | */ |
220 | work->ordered_free(work); | 229 | work->ordered_free(work); |
221 | spin_lock(&workers->order_lock); | 230 | trace_btrfs_all_work_done(work); |
222 | } | ||
223 | |||
224 | spin_unlock(&workers->order_lock); | ||
225 | } | ||
226 | |||
227 | static void put_worker(struct btrfs_worker_thread *worker) | ||
228 | { | ||
229 | if (atomic_dec_and_test(&worker->refs)) | ||
230 | kfree(worker); | ||
231 | } | ||
232 | |||
233 | static int try_worker_shutdown(struct btrfs_worker_thread *worker) | ||
234 | { | ||
235 | int freeit = 0; | ||
236 | |||
237 | spin_lock_irq(&worker->lock); | ||
238 | spin_lock(&worker->workers->lock); | ||
239 | if (worker->workers->num_workers > 1 && | ||
240 | worker->idle && | ||
241 | !worker->working && | ||
242 | !list_empty(&worker->worker_list) && | ||
243 | list_empty(&worker->prio_pending) && | ||
244 | list_empty(&worker->pending) && | ||
245 | atomic_read(&worker->num_pending) == 0) { | ||
246 | freeit = 1; | ||
247 | list_del_init(&worker->worker_list); | ||
248 | worker->workers->num_workers--; | ||
249 | } | 231 | } |
250 | spin_unlock(&worker->workers->lock); | 232 | spin_unlock_irqrestore(lock, flags); |
251 | spin_unlock_irq(&worker->lock); | ||
252 | |||
253 | if (freeit) | ||
254 | put_worker(worker); | ||
255 | return freeit; | ||
256 | } | 233 | } |
257 | 234 | ||
258 | static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker, | 235 | static void normal_work_helper(struct work_struct *arg) |
259 | struct list_head *prio_head, | ||
260 | struct list_head *head) | ||
261 | { | ||
262 | struct btrfs_work *work = NULL; | ||
263 | struct list_head *cur = NULL; | ||
264 | |||
265 | if (!list_empty(prio_head)) | ||
266 | cur = prio_head->next; | ||
267 | |||
268 | smp_mb(); | ||
269 | if (!list_empty(&worker->prio_pending)) | ||
270 | goto refill; | ||
271 | |||
272 | if (!list_empty(head)) | ||
273 | cur = head->next; | ||
274 | |||
275 | if (cur) | ||
276 | goto out; | ||
277 | |||
278 | refill: | ||
279 | spin_lock_irq(&worker->lock); | ||
280 | list_splice_tail_init(&worker->prio_pending, prio_head); | ||
281 | list_splice_tail_init(&worker->pending, head); | ||
282 | |||
283 | if (!list_empty(prio_head)) | ||
284 | cur = prio_head->next; | ||
285 | else if (!list_empty(head)) | ||
286 | cur = head->next; | ||
287 | spin_unlock_irq(&worker->lock); | ||
288 | |||
289 | if (!cur) | ||
290 | goto out_fail; | ||
291 | |||
292 | out: | ||
293 | work = list_entry(cur, struct btrfs_work, list); | ||
294 | |||
295 | out_fail: | ||
296 | return work; | ||
297 | } | ||
298 | |||
299 | /* | ||
300 | * main loop for servicing work items | ||
301 | */ | ||
302 | static int worker_loop(void *arg) | ||
303 | { | 236 | { |
304 | struct btrfs_worker_thread *worker = arg; | ||
305 | struct list_head head; | ||
306 | struct list_head prio_head; | ||
307 | struct btrfs_work *work; | 237 | struct btrfs_work *work; |
238 | struct __btrfs_workqueue *wq; | ||
239 | int need_order = 0; | ||
308 | 240 | ||
309 | INIT_LIST_HEAD(&head); | 241 | work = container_of(arg, struct btrfs_work, normal_work); |
310 | INIT_LIST_HEAD(&prio_head); | 242 | /* |
311 | 243 | * We should not touch things inside work in the following cases: | |
312 | do { | 244 | * 1) after work->func() if it has no ordered_free |
313 | again: | 245 | * Since the struct is freed in work->func(). |
314 | while (1) { | 246 | * 2) after setting WORK_DONE_BIT |
315 | 247 | * The work may be freed in other threads almost instantly. | |
316 | 248 | * So we save the needed things here. | |
317 | work = get_next_work(worker, &prio_head, &head); | 249 | */ |
318 | if (!work) | 250 | if (work->ordered_func) |
319 | break; | 251 | need_order = 1; |
320 | 252 | wq = work->wq; | |
321 | list_del(&work->list); | 253 | |
322 | clear_bit(WORK_QUEUED_BIT, &work->flags); | 254 | trace_btrfs_work_sched(work); |
323 | 255 | thresh_exec_hook(wq); | |
324 | work->worker = worker; | 256 | work->func(work); |
325 | 257 | if (need_order) { | |
326 | work->func(work); | 258 | set_bit(WORK_DONE_BIT, &work->flags); |
327 | 259 | run_ordered_work(wq); | |
328 | atomic_dec(&worker->num_pending); | ||
329 | /* | ||
330 | * unless this is an ordered work queue, | ||
331 | * 'work' was probably freed by func above. | ||
332 | */ | ||
333 | run_ordered_completions(worker->workers, work); | ||
334 | |||
335 | check_pending_worker_creates(worker); | ||
336 | cond_resched(); | ||
337 | } | ||
338 | |||
339 | spin_lock_irq(&worker->lock); | ||
340 | check_idle_worker(worker); | ||
341 | |||
342 | if (freezing(current)) { | ||
343 | worker->working = 0; | ||
344 | spin_unlock_irq(&worker->lock); | ||
345 | try_to_freeze(); | ||
346 | } else { | ||
347 | spin_unlock_irq(&worker->lock); | ||
348 | if (!kthread_should_stop()) { | ||
349 | cpu_relax(); | ||
350 | /* | ||
351 | * we've dropped the lock, did someone else | ||
352 | * jump_in? | ||
353 | */ | ||
354 | smp_mb(); | ||
355 | if (!list_empty(&worker->pending) || | ||
356 | !list_empty(&worker->prio_pending)) | ||
357 | continue; | ||
358 | |||
359 | /* | ||
360 | * this short schedule allows more work to | ||
361 | * come in without the queue functions | ||
362 | * needing to go through wake_up_process() | ||
363 | * | ||
364 | * worker->working is still 1, so nobody | ||
365 | * is going to try and wake us up | ||
366 | */ | ||
367 | schedule_timeout(1); | ||
368 | smp_mb(); | ||
369 | if (!list_empty(&worker->pending) || | ||
370 | !list_empty(&worker->prio_pending)) | ||
371 | continue; | ||
372 | |||
373 | if (kthread_should_stop()) | ||
374 | break; | ||
375 | |||
376 | /* still no more work?, sleep for real */ | ||
377 | spin_lock_irq(&worker->lock); | ||
378 | set_current_state(TASK_INTERRUPTIBLE); | ||
379 | if (!list_empty(&worker->pending) || | ||
380 | !list_empty(&worker->prio_pending)) { | ||
381 | spin_unlock_irq(&worker->lock); | ||
382 | set_current_state(TASK_RUNNING); | ||
383 | goto again; | ||
384 | } | ||
385 | |||
386 | /* | ||
387 | * this makes sure we get a wakeup when someone | ||
388 | * adds something new to the queue | ||
389 | */ | ||
390 | worker->working = 0; | ||
391 | spin_unlock_irq(&worker->lock); | ||
392 | |||
393 | if (!kthread_should_stop()) { | ||
394 | schedule_timeout(HZ * 120); | ||
395 | if (!worker->working && | ||
396 | try_worker_shutdown(worker)) { | ||
397 | return 0; | ||
398 | } | ||
399 | } | ||
400 | } | ||
401 | __set_current_state(TASK_RUNNING); | ||
402 | } | ||
403 | } while (!kthread_should_stop()); | ||
404 | return 0; | ||
405 | } | ||
406 | |||
407 | /* | ||
408 | * this will wait for all the worker threads to shutdown | ||
409 | */ | ||
410 | void btrfs_stop_workers(struct btrfs_workers *workers) | ||
411 | { | ||
412 | struct list_head *cur; | ||
413 | struct btrfs_worker_thread *worker; | ||
414 | int can_stop; | ||
415 | |||
416 | spin_lock_irq(&workers->lock); | ||
417 | workers->stopping = 1; | ||
418 | list_splice_init(&workers->idle_list, &workers->worker_list); | ||
419 | while (!list_empty(&workers->worker_list)) { | ||
420 | cur = workers->worker_list.next; | ||
421 | worker = list_entry(cur, struct btrfs_worker_thread, | ||
422 | worker_list); | ||
423 | |||
424 | atomic_inc(&worker->refs); | ||
425 | workers->num_workers -= 1; | ||
426 | if (!list_empty(&worker->worker_list)) { | ||
427 | list_del_init(&worker->worker_list); | ||
428 | put_worker(worker); | ||
429 | can_stop = 1; | ||
430 | } else | ||
431 | can_stop = 0; | ||
432 | spin_unlock_irq(&workers->lock); | ||
433 | if (can_stop) | ||
434 | kthread_stop(worker->task); | ||
435 | spin_lock_irq(&workers->lock); | ||
436 | put_worker(worker); | ||
437 | } | 260 | } |
438 | spin_unlock_irq(&workers->lock); | 261 | if (!need_order) |
262 | trace_btrfs_all_work_done(work); | ||
439 | } | 263 | } |
440 | 264 | ||
441 | /* | 265 | void btrfs_init_work(struct btrfs_work *work, |
442 | * simple init on struct btrfs_workers | 266 | btrfs_func_t func, |
443 | */ | 267 | btrfs_func_t ordered_func, |
444 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 268 | btrfs_func_t ordered_free) |
445 | struct btrfs_workers *async_helper) | ||
446 | { | 269 | { |
447 | workers->num_workers = 0; | 270 | work->func = func; |
448 | workers->num_workers_starting = 0; | 271 | work->ordered_func = ordered_func; |
449 | INIT_LIST_HEAD(&workers->worker_list); | 272 | work->ordered_free = ordered_free; |
450 | INIT_LIST_HEAD(&workers->idle_list); | 273 | INIT_WORK(&work->normal_work, normal_work_helper); |
451 | INIT_LIST_HEAD(&workers->order_list); | 274 | INIT_LIST_HEAD(&work->ordered_list); |
452 | INIT_LIST_HEAD(&workers->prio_order_list); | 275 | work->flags = 0; |
453 | spin_lock_init(&workers->lock); | ||
454 | spin_lock_init(&workers->order_lock); | ||
455 | workers->max_workers = max; | ||
456 | workers->idle_thresh = 32; | ||
457 | workers->name = name; | ||
458 | workers->ordered = 0; | ||
459 | workers->atomic_start_pending = 0; | ||
460 | workers->atomic_worker_start = async_helper; | ||
461 | workers->stopping = 0; | ||
462 | } | 276 | } |
463 | 277 | ||
464 | /* | 278 | static inline void __btrfs_queue_work(struct __btrfs_workqueue *wq, |
465 | * starts new worker threads. This does not enforce the max worker | 279 | struct btrfs_work *work) |
466 | * count in case you need to temporarily go past it. | ||
467 | */ | ||
468 | static int __btrfs_start_workers(struct btrfs_workers *workers) | ||
469 | { | 280 | { |
470 | struct btrfs_worker_thread *worker; | 281 | unsigned long flags; |
471 | int ret = 0; | ||
472 | |||
473 | worker = kzalloc(sizeof(*worker), GFP_NOFS); | ||
474 | if (!worker) { | ||
475 | ret = -ENOMEM; | ||
476 | goto fail; | ||
477 | } | ||
478 | |||
479 | INIT_LIST_HEAD(&worker->pending); | ||
480 | INIT_LIST_HEAD(&worker->prio_pending); | ||
481 | INIT_LIST_HEAD(&worker->worker_list); | ||
482 | spin_lock_init(&worker->lock); | ||
483 | |||
484 | atomic_set(&worker->num_pending, 0); | ||
485 | atomic_set(&worker->refs, 1); | ||
486 | worker->workers = workers; | ||
487 | worker->task = kthread_create(worker_loop, worker, | ||
488 | "btrfs-%s-%d", workers->name, | ||
489 | workers->num_workers + 1); | ||
490 | if (IS_ERR(worker->task)) { | ||
491 | ret = PTR_ERR(worker->task); | ||
492 | goto fail; | ||
493 | } | ||
494 | 282 | ||
495 | spin_lock_irq(&workers->lock); | 283 | work->wq = wq; |
496 | if (workers->stopping) { | 284 | thresh_queue_hook(wq); |
497 | spin_unlock_irq(&workers->lock); | 285 | if (work->ordered_func) { |
498 | ret = -EINVAL; | 286 | spin_lock_irqsave(&wq->list_lock, flags); |
499 | goto fail_kthread; | 287 | list_add_tail(&work->ordered_list, &wq->ordered_list); |
288 | spin_unlock_irqrestore(&wq->list_lock, flags); | ||
500 | } | 289 | } |
501 | list_add_tail(&worker->worker_list, &workers->idle_list); | 290 | queue_work(wq->normal_wq, &work->normal_work); |
502 | worker->idle = 1; | 291 | trace_btrfs_work_queued(work); |
503 | workers->num_workers++; | ||
504 | workers->num_workers_starting--; | ||
505 | WARN_ON(workers->num_workers_starting < 0); | ||
506 | spin_unlock_irq(&workers->lock); | ||
507 | |||
508 | wake_up_process(worker->task); | ||
509 | return 0; | ||
510 | |||
511 | fail_kthread: | ||
512 | kthread_stop(worker->task); | ||
513 | fail: | ||
514 | kfree(worker); | ||
515 | spin_lock_irq(&workers->lock); | ||
516 | workers->num_workers_starting--; | ||
517 | spin_unlock_irq(&workers->lock); | ||
518 | return ret; | ||
519 | } | 292 | } |
520 | 293 | ||
521 | int btrfs_start_workers(struct btrfs_workers *workers) | 294 | void btrfs_queue_work(struct btrfs_workqueue *wq, |
295 | struct btrfs_work *work) | ||
522 | { | 296 | { |
523 | spin_lock_irq(&workers->lock); | 297 | struct __btrfs_workqueue *dest_wq; |
524 | workers->num_workers_starting++; | ||
525 | spin_unlock_irq(&workers->lock); | ||
526 | return __btrfs_start_workers(workers); | ||
527 | } | ||
528 | |||
529 | /* | ||
530 | * run through the list and find a worker thread that doesn't have a lot | ||
531 | * to do right now. This can return null if we aren't yet at the thread | ||
532 | * count limit and all of the threads are busy. | ||
533 | */ | ||
534 | static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers) | ||
535 | { | ||
536 | struct btrfs_worker_thread *worker; | ||
537 | struct list_head *next; | ||
538 | int enforce_min; | ||
539 | |||
540 | enforce_min = (workers->num_workers + workers->num_workers_starting) < | ||
541 | workers->max_workers; | ||
542 | |||
543 | /* | ||
544 | * if we find an idle thread, don't move it to the end of the | ||
545 | * idle list. This improves the chance that the next submission | ||
546 | * will reuse the same thread, and maybe catch it while it is still | ||
547 | * working | ||
548 | */ | ||
549 | if (!list_empty(&workers->idle_list)) { | ||
550 | next = workers->idle_list.next; | ||
551 | worker = list_entry(next, struct btrfs_worker_thread, | ||
552 | worker_list); | ||
553 | return worker; | ||
554 | } | ||
555 | if (enforce_min || list_empty(&workers->worker_list)) | ||
556 | return NULL; | ||
557 | |||
558 | /* | ||
559 | * if we pick a busy task, move the task to the end of the list. | ||
560 | * hopefully this will keep things somewhat evenly balanced. | ||
561 | * Do the move in batches based on the sequence number. This groups | ||
562 | * requests submitted at roughly the same time onto the same worker. | ||
563 | */ | ||
564 | next = workers->worker_list.next; | ||
565 | worker = list_entry(next, struct btrfs_worker_thread, worker_list); | ||
566 | worker->sequence++; | ||
567 | 298 | ||
568 | if (worker->sequence % workers->idle_thresh == 0) | 299 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags) && wq->high) |
569 | list_move_tail(next, &workers->worker_list); | 300 | dest_wq = wq->high; |
570 | return worker; | 301 | else |
302 | dest_wq = wq->normal; | ||
303 | __btrfs_queue_work(dest_wq, work); | ||
571 | } | 304 | } |
572 | 305 | ||
573 | /* | 306 | static inline void |
574 | * selects a worker thread to take the next job. This will either find | 307 | __btrfs_destroy_workqueue(struct __btrfs_workqueue *wq) |
575 | * an idle worker, start a new worker up to the max count, or just return | ||
576 | * one of the existing busy workers. | ||
577 | */ | ||
578 | static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers) | ||
579 | { | 308 | { |
580 | struct btrfs_worker_thread *worker; | 309 | destroy_workqueue(wq->normal_wq); |
581 | unsigned long flags; | 310 | trace_btrfs_workqueue_destroy(wq); |
582 | struct list_head *fallback; | 311 | kfree(wq); |
583 | int ret; | ||
584 | |||
585 | spin_lock_irqsave(&workers->lock, flags); | ||
586 | again: | ||
587 | worker = next_worker(workers); | ||
588 | |||
589 | if (!worker) { | ||
590 | if (workers->num_workers + workers->num_workers_starting >= | ||
591 | workers->max_workers) { | ||
592 | goto fallback; | ||
593 | } else if (workers->atomic_worker_start) { | ||
594 | workers->atomic_start_pending = 1; | ||
595 | goto fallback; | ||
596 | } else { | ||
597 | workers->num_workers_starting++; | ||
598 | spin_unlock_irqrestore(&workers->lock, flags); | ||
599 | /* we're below the limit, start another worker */ | ||
600 | ret = __btrfs_start_workers(workers); | ||
601 | spin_lock_irqsave(&workers->lock, flags); | ||
602 | if (ret) | ||
603 | goto fallback; | ||
604 | goto again; | ||
605 | } | ||
606 | } | ||
607 | goto found; | ||
608 | |||
609 | fallback: | ||
610 | fallback = NULL; | ||
611 | /* | ||
612 | * we have failed to find any workers, just | ||
613 | * return the first one we can find. | ||
614 | */ | ||
615 | if (!list_empty(&workers->worker_list)) | ||
616 | fallback = workers->worker_list.next; | ||
617 | if (!list_empty(&workers->idle_list)) | ||
618 | fallback = workers->idle_list.next; | ||
619 | BUG_ON(!fallback); | ||
620 | worker = list_entry(fallback, | ||
621 | struct btrfs_worker_thread, worker_list); | ||
622 | found: | ||
623 | /* | ||
624 | * this makes sure the worker doesn't exit before it is placed | ||
625 | * onto a busy/idle list | ||
626 | */ | ||
627 | atomic_inc(&worker->num_pending); | ||
628 | spin_unlock_irqrestore(&workers->lock, flags); | ||
629 | return worker; | ||
630 | } | 312 | } |
631 | 313 | ||
632 | /* | 314 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq) |
633 | * btrfs_requeue_work just puts the work item back on the tail of the list | ||
634 | * it was taken from. It is intended for use with long running work functions | ||
635 | * that make some progress and want to give the cpu up for others. | ||
636 | */ | ||
637 | void btrfs_requeue_work(struct btrfs_work *work) | ||
638 | { | 315 | { |
639 | struct btrfs_worker_thread *worker = work->worker; | 316 | if (!wq) |
640 | unsigned long flags; | ||
641 | int wake = 0; | ||
642 | |||
643 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
644 | return; | 317 | return; |
645 | 318 | if (wq->high) | |
646 | spin_lock_irqsave(&worker->lock, flags); | 319 | __btrfs_destroy_workqueue(wq->high); |
647 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | 320 | __btrfs_destroy_workqueue(wq->normal); |
648 | list_add_tail(&work->list, &worker->prio_pending); | 321 | kfree(wq); |
649 | else | ||
650 | list_add_tail(&work->list, &worker->pending); | ||
651 | atomic_inc(&worker->num_pending); | ||
652 | |||
653 | /* by definition we're busy, take ourselves off the idle | ||
654 | * list | ||
655 | */ | ||
656 | if (worker->idle) { | ||
657 | spin_lock(&worker->workers->lock); | ||
658 | worker->idle = 0; | ||
659 | list_move_tail(&worker->worker_list, | ||
660 | &worker->workers->worker_list); | ||
661 | spin_unlock(&worker->workers->lock); | ||
662 | } | ||
663 | if (!worker->working) { | ||
664 | wake = 1; | ||
665 | worker->working = 1; | ||
666 | } | ||
667 | |||
668 | if (wake) | ||
669 | wake_up_process(worker->task); | ||
670 | spin_unlock_irqrestore(&worker->lock, flags); | ||
671 | } | 322 | } |
672 | 323 | ||
673 | void btrfs_set_work_high_prio(struct btrfs_work *work) | 324 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max) |
674 | { | 325 | { |
675 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); | 326 | wq->normal->max_active = max; |
327 | if (wq->high) | ||
328 | wq->high->max_active = max; | ||
676 | } | 329 | } |
677 | 330 | ||
678 | /* | 331 | void btrfs_set_work_high_priority(struct btrfs_work *work) |
679 | * places a struct btrfs_work into the pending queue of one of the kthreads | ||
680 | */ | ||
681 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work) | ||
682 | { | 332 | { |
683 | struct btrfs_worker_thread *worker; | 333 | set_bit(WORK_HIGH_PRIO_BIT, &work->flags); |
684 | unsigned long flags; | ||
685 | int wake = 0; | ||
686 | |||
687 | /* don't requeue something already on a list */ | ||
688 | if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags)) | ||
689 | return; | ||
690 | |||
691 | worker = find_worker(workers); | ||
692 | if (workers->ordered) { | ||
693 | /* | ||
694 | * you're not allowed to do ordered queues from an | ||
695 | * interrupt handler | ||
696 | */ | ||
697 | spin_lock(&workers->order_lock); | ||
698 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) { | ||
699 | list_add_tail(&work->order_list, | ||
700 | &workers->prio_order_list); | ||
701 | } else { | ||
702 | list_add_tail(&work->order_list, &workers->order_list); | ||
703 | } | ||
704 | spin_unlock(&workers->order_lock); | ||
705 | } else { | ||
706 | INIT_LIST_HEAD(&work->order_list); | ||
707 | } | ||
708 | |||
709 | spin_lock_irqsave(&worker->lock, flags); | ||
710 | |||
711 | if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) | ||
712 | list_add_tail(&work->list, &worker->prio_pending); | ||
713 | else | ||
714 | list_add_tail(&work->list, &worker->pending); | ||
715 | check_busy_worker(worker); | ||
716 | |||
717 | /* | ||
718 | * avoid calling into wake_up_process if this thread has already | ||
719 | * been kicked | ||
720 | */ | ||
721 | if (!worker->working) | ||
722 | wake = 1; | ||
723 | worker->working = 1; | ||
724 | |||
725 | if (wake) | ||
726 | wake_up_process(worker->task); | ||
727 | spin_unlock_irqrestore(&worker->lock, flags); | ||
728 | } | 334 | } |
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h index 1f26792683ed..9c6b66d15fb0 100644 --- a/fs/btrfs/async-thread.h +++ b/fs/btrfs/async-thread.h | |||
@@ -1,5 +1,6 @@ | |||
1 | /* | 1 | /* |
2 | * Copyright (C) 2007 Oracle. All rights reserved. | 2 | * Copyright (C) 2007 Oracle. All rights reserved. |
3 | * Copyright (C) 2014 Fujitsu. All rights reserved. | ||
3 | * | 4 | * |
4 | * This program is free software; you can redistribute it and/or | 5 | * This program is free software; you can redistribute it and/or |
5 | * modify it under the terms of the GNU General Public | 6 | * modify it under the terms of the GNU General Public |
@@ -19,103 +20,35 @@ | |||
19 | #ifndef __BTRFS_ASYNC_THREAD_ | 20 | #ifndef __BTRFS_ASYNC_THREAD_ |
20 | #define __BTRFS_ASYNC_THREAD_ | 21 | #define __BTRFS_ASYNC_THREAD_ |
21 | 22 | ||
22 | struct btrfs_worker_thread; | 23 | struct btrfs_workqueue; |
24 | /* Internal use only */ | ||
25 | struct __btrfs_workqueue; | ||
26 | struct btrfs_work; | ||
27 | typedef void (*btrfs_func_t)(struct btrfs_work *arg); | ||
23 | 28 | ||
24 | /* | ||
25 | * This is similar to a workqueue, but it is meant to spread the operations | ||
26 | * across all available cpus instead of just the CPU that was used to | ||
27 | * queue the work. There is also some batching introduced to try and | ||
28 | * cut down on context switches. | ||
29 | * | ||
30 | * By default threads are added on demand up to 2 * the number of cpus. | ||
31 | * Changing struct btrfs_workers->max_workers is one way to prevent | ||
32 | * demand creation of kthreads. | ||
33 | * | ||
34 | * the basic model of these worker threads is to embed a btrfs_work | ||
35 | * structure in your own data struct, and use container_of in a | ||
36 | * work function to get back to your data struct. | ||
37 | */ | ||
38 | struct btrfs_work { | 29 | struct btrfs_work { |
39 | /* | 30 | btrfs_func_t func; |
40 | * func should be set to the function you want called | 31 | btrfs_func_t ordered_func; |
41 | * your work struct is passed as the only arg | 32 | btrfs_func_t ordered_free; |
42 | * | 33 | |
43 | * ordered_func must be set for work sent to an ordered work queue, | 34 | /* Don't touch things below */ |
44 | * and it is called to complete a given work item in the same | 35 | struct work_struct normal_work; |
45 | * order they were sent to the queue. | 36 | struct list_head ordered_list; |
46 | */ | 37 | struct __btrfs_workqueue *wq; |
47 | void (*func)(struct btrfs_work *work); | ||
48 | void (*ordered_func)(struct btrfs_work *work); | ||
49 | void (*ordered_free)(struct btrfs_work *work); | ||
50 | |||
51 | /* | ||
52 | * flags should be set to zero. It is used to make sure the | ||
53 | * struct is only inserted once into the list. | ||
54 | */ | ||
55 | unsigned long flags; | 38 | unsigned long flags; |
56 | |||
57 | /* don't touch these */ | ||
58 | struct btrfs_worker_thread *worker; | ||
59 | struct list_head list; | ||
60 | struct list_head order_list; | ||
61 | }; | ||
62 | |||
63 | struct btrfs_workers { | ||
64 | /* current number of running workers */ | ||
65 | int num_workers; | ||
66 | |||
67 | int num_workers_starting; | ||
68 | |||
69 | /* max number of workers allowed. changed by btrfs_start_workers */ | ||
70 | int max_workers; | ||
71 | |||
72 | /* once a worker has this many requests or fewer, it is idle */ | ||
73 | int idle_thresh; | ||
74 | |||
75 | /* force completions in the order they were queued */ | ||
76 | int ordered; | ||
77 | |||
78 | /* more workers required, but in an interrupt handler */ | ||
79 | int atomic_start_pending; | ||
80 | |||
81 | /* | ||
82 | * are we allowed to sleep while starting workers or are we required | ||
83 | * to start them at a later time? If we can't sleep, this indicates | ||
84 | * which queue we need to use to schedule thread creation. | ||
85 | */ | ||
86 | struct btrfs_workers *atomic_worker_start; | ||
87 | |||
88 | /* list with all the work threads. The workers on the idle thread | ||
89 | * may be actively servicing jobs, but they haven't yet hit the | ||
90 | * idle thresh limit above. | ||
91 | */ | ||
92 | struct list_head worker_list; | ||
93 | struct list_head idle_list; | ||
94 | |||
95 | /* | ||
96 | * when operating in ordered mode, this maintains the list | ||
97 | * of work items waiting for completion | ||
98 | */ | ||
99 | struct list_head order_list; | ||
100 | struct list_head prio_order_list; | ||
101 | |||
102 | /* lock for finding the next worker thread to queue on */ | ||
103 | spinlock_t lock; | ||
104 | |||
105 | /* lock for the ordered lists */ | ||
106 | spinlock_t order_lock; | ||
107 | |||
108 | /* extra name for this worker, used for current->name */ | ||
109 | char *name; | ||
110 | |||
111 | int stopping; | ||
112 | }; | 39 | }; |
113 | 40 | ||
114 | void btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work); | 41 | struct btrfs_workqueue *btrfs_alloc_workqueue(const char *name, |
115 | int btrfs_start_workers(struct btrfs_workers *workers); | 42 | int flags, |
116 | void btrfs_stop_workers(struct btrfs_workers *workers); | 43 | int max_active, |
117 | void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max, | 44 | int thresh); |
118 | struct btrfs_workers *async_starter); | 45 | void btrfs_init_work(struct btrfs_work *work, |
119 | void btrfs_requeue_work(struct btrfs_work *work); | 46 | btrfs_func_t func, |
120 | void btrfs_set_work_high_prio(struct btrfs_work *work); | 47 | btrfs_func_t ordered_func, |
48 | btrfs_func_t ordered_free); | ||
49 | void btrfs_queue_work(struct btrfs_workqueue *wq, | ||
50 | struct btrfs_work *work); | ||
51 | void btrfs_destroy_workqueue(struct btrfs_workqueue *wq); | ||
52 | void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max); | ||
53 | void btrfs_set_work_high_priority(struct btrfs_work *work); | ||
121 | #endif | 54 | #endif |
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index aded3ef3d3d4..aad7201ad11b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c | |||
@@ -220,7 +220,8 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, | |||
220 | 220 | ||
221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | 221 | static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, |
222 | struct ulist *parents, struct __prelim_ref *ref, | 222 | struct ulist *parents, struct __prelim_ref *ref, |
223 | int level, u64 time_seq, const u64 *extent_item_pos) | 223 | int level, u64 time_seq, const u64 *extent_item_pos, |
224 | u64 total_refs) | ||
224 | { | 225 | { |
225 | int ret = 0; | 226 | int ret = 0; |
226 | int slot; | 227 | int slot; |
@@ -249,7 +250,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, | |||
249 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) | 250 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) |
250 | ret = btrfs_next_old_leaf(root, path, time_seq); | 251 | ret = btrfs_next_old_leaf(root, path, time_seq); |
251 | 252 | ||
252 | while (!ret && count < ref->count) { | 253 | while (!ret && count < total_refs) { |
253 | eb = path->nodes[0]; | 254 | eb = path->nodes[0]; |
254 | slot = path->slots[0]; | 255 | slot = path->slots[0]; |
255 | 256 | ||
@@ -306,7 +307,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
306 | struct btrfs_path *path, u64 time_seq, | 307 | struct btrfs_path *path, u64 time_seq, |
307 | struct __prelim_ref *ref, | 308 | struct __prelim_ref *ref, |
308 | struct ulist *parents, | 309 | struct ulist *parents, |
309 | const u64 *extent_item_pos) | 310 | const u64 *extent_item_pos, u64 total_refs) |
310 | { | 311 | { |
311 | struct btrfs_root *root; | 312 | struct btrfs_root *root; |
312 | struct btrfs_key root_key; | 313 | struct btrfs_key root_key; |
@@ -361,7 +362,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, | |||
361 | } | 362 | } |
362 | 363 | ||
363 | ret = add_all_parents(root, path, parents, ref, level, time_seq, | 364 | ret = add_all_parents(root, path, parents, ref, level, time_seq, |
364 | extent_item_pos); | 365 | extent_item_pos, total_refs); |
365 | out: | 366 | out: |
366 | path->lowest_level = 0; | 367 | path->lowest_level = 0; |
367 | btrfs_release_path(path); | 368 | btrfs_release_path(path); |
@@ -374,7 +375,7 @@ out: | |||
374 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | 375 | static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, |
375 | struct btrfs_path *path, u64 time_seq, | 376 | struct btrfs_path *path, u64 time_seq, |
376 | struct list_head *head, | 377 | struct list_head *head, |
377 | const u64 *extent_item_pos) | 378 | const u64 *extent_item_pos, u64 total_refs) |
378 | { | 379 | { |
379 | int err; | 380 | int err; |
380 | int ret = 0; | 381 | int ret = 0; |
@@ -400,7 +401,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, | |||
400 | if (ref->count == 0) | 401 | if (ref->count == 0) |
401 | continue; | 402 | continue; |
402 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, | 403 | err = __resolve_indirect_ref(fs_info, path, time_seq, ref, |
403 | parents, extent_item_pos); | 404 | parents, extent_item_pos, |
405 | total_refs); | ||
404 | /* | 406 | /* |
405 | * we can only tolerate ENOENT,otherwise,we should catch error | 407 | * we can only tolerate ENOENT,otherwise,we should catch error |
406 | * and return directly. | 408 | * and return directly. |
@@ -557,7 +559,7 @@ static void __merge_refs(struct list_head *head, int mode) | |||
557 | * smaller or equal that seq to the list | 559 | * smaller or equal that seq to the list |
558 | */ | 560 | */ |
559 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | 561 | static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, |
560 | struct list_head *prefs) | 562 | struct list_head *prefs, u64 *total_refs) |
561 | { | 563 | { |
562 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; | 564 | struct btrfs_delayed_extent_op *extent_op = head->extent_op; |
563 | struct rb_node *n = &head->node.rb_node; | 565 | struct rb_node *n = &head->node.rb_node; |
@@ -593,6 +595,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
593 | default: | 595 | default: |
594 | BUG_ON(1); | 596 | BUG_ON(1); |
595 | } | 597 | } |
598 | *total_refs += (node->ref_mod * sgn); | ||
596 | switch (node->type) { | 599 | switch (node->type) { |
597 | case BTRFS_TREE_BLOCK_REF_KEY: { | 600 | case BTRFS_TREE_BLOCK_REF_KEY: { |
598 | struct btrfs_delayed_tree_ref *ref; | 601 | struct btrfs_delayed_tree_ref *ref; |
@@ -653,7 +656,8 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, | |||
653 | */ | 656 | */ |
654 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, | 657 | static int __add_inline_refs(struct btrfs_fs_info *fs_info, |
655 | struct btrfs_path *path, u64 bytenr, | 658 | struct btrfs_path *path, u64 bytenr, |
656 | int *info_level, struct list_head *prefs) | 659 | int *info_level, struct list_head *prefs, |
660 | u64 *total_refs) | ||
657 | { | 661 | { |
658 | int ret = 0; | 662 | int ret = 0; |
659 | int slot; | 663 | int slot; |
@@ -677,6 +681,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, | |||
677 | 681 | ||
678 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); | 682 | ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); |
679 | flags = btrfs_extent_flags(leaf, ei); | 683 | flags = btrfs_extent_flags(leaf, ei); |
684 | *total_refs += btrfs_extent_refs(leaf, ei); | ||
680 | btrfs_item_key_to_cpu(leaf, &found_key, slot); | 685 | btrfs_item_key_to_cpu(leaf, &found_key, slot); |
681 | 686 | ||
682 | ptr = (unsigned long)(ei + 1); | 687 | ptr = (unsigned long)(ei + 1); |
@@ -859,6 +864,7 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
859 | struct list_head prefs; | 864 | struct list_head prefs; |
860 | struct __prelim_ref *ref; | 865 | struct __prelim_ref *ref; |
861 | struct extent_inode_elem *eie = NULL; | 866 | struct extent_inode_elem *eie = NULL; |
867 | u64 total_refs = 0; | ||
862 | 868 | ||
863 | INIT_LIST_HEAD(&prefs); | 869 | INIT_LIST_HEAD(&prefs); |
864 | INIT_LIST_HEAD(&prefs_delayed); | 870 | INIT_LIST_HEAD(&prefs_delayed); |
@@ -873,8 +879,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, | |||
873 | path = btrfs_alloc_path(); | 879 | path = btrfs_alloc_path(); |
874 | if (!path) | 880 | if (!path) |
875 | return -ENOMEM; | 881 | return -ENOMEM; |
876 | if (!trans) | 882 | if (!trans) { |
877 | path->search_commit_root = 1; | 883 | path->search_commit_root = 1; |
884 | path->skip_locking = 1; | ||
885 | } | ||
878 | 886 | ||
879 | /* | 887 | /* |
880 | * grab both a lock on the path and a lock on the delayed ref head. | 888 | * grab both a lock on the path and a lock on the delayed ref head. |
@@ -915,7 +923,7 @@ again: | |||
915 | } | 923 | } |
916 | spin_unlock(&delayed_refs->lock); | 924 | spin_unlock(&delayed_refs->lock); |
917 | ret = __add_delayed_refs(head, time_seq, | 925 | ret = __add_delayed_refs(head, time_seq, |
918 | &prefs_delayed); | 926 | &prefs_delayed, &total_refs); |
919 | mutex_unlock(&head->mutex); | 927 | mutex_unlock(&head->mutex); |
920 | if (ret) | 928 | if (ret) |
921 | goto out; | 929 | goto out; |
@@ -936,7 +944,8 @@ again: | |||
936 | (key.type == BTRFS_EXTENT_ITEM_KEY || | 944 | (key.type == BTRFS_EXTENT_ITEM_KEY || |
937 | key.type == BTRFS_METADATA_ITEM_KEY)) { | 945 | key.type == BTRFS_METADATA_ITEM_KEY)) { |
938 | ret = __add_inline_refs(fs_info, path, bytenr, | 946 | ret = __add_inline_refs(fs_info, path, bytenr, |
939 | &info_level, &prefs); | 947 | &info_level, &prefs, |
948 | &total_refs); | ||
940 | if (ret) | 949 | if (ret) |
941 | goto out; | 950 | goto out; |
942 | ret = __add_keyed_refs(fs_info, path, bytenr, | 951 | ret = __add_keyed_refs(fs_info, path, bytenr, |
@@ -956,7 +965,7 @@ again: | |||
956 | __merge_refs(&prefs, 1); | 965 | __merge_refs(&prefs, 1); |
957 | 966 | ||
958 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, | 967 | ret = __resolve_indirect_refs(fs_info, path, time_seq, &prefs, |
959 | extent_item_pos); | 968 | extent_item_pos, total_refs); |
960 | if (ret) | 969 | if (ret) |
961 | goto out; | 970 | goto out; |
962 | 971 | ||
@@ -965,7 +974,7 @@ again: | |||
965 | while (!list_empty(&prefs)) { | 974 | while (!list_empty(&prefs)) { |
966 | ref = list_first_entry(&prefs, struct __prelim_ref, list); | 975 | ref = list_first_entry(&prefs, struct __prelim_ref, list); |
967 | WARN_ON(ref->count < 0); | 976 | WARN_ON(ref->count < 0); |
968 | if (ref->count && ref->root_id && ref->parent == 0) { | 977 | if (roots && ref->count && ref->root_id && ref->parent == 0) { |
969 | /* no parent == root of tree */ | 978 | /* no parent == root of tree */ |
970 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); | 979 | ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); |
971 | if (ret < 0) | 980 | if (ret < 0) |
@@ -1061,22 +1070,14 @@ static int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, | |||
1061 | u64 time_seq, struct ulist **leafs, | 1070 | u64 time_seq, struct ulist **leafs, |
1062 | const u64 *extent_item_pos) | 1071 | const u64 *extent_item_pos) |
1063 | { | 1072 | { |
1064 | struct ulist *tmp; | ||
1065 | int ret; | 1073 | int ret; |
1066 | 1074 | ||
1067 | tmp = ulist_alloc(GFP_NOFS); | ||
1068 | if (!tmp) | ||
1069 | return -ENOMEM; | ||
1070 | *leafs = ulist_alloc(GFP_NOFS); | 1075 | *leafs = ulist_alloc(GFP_NOFS); |
1071 | if (!*leafs) { | 1076 | if (!*leafs) |
1072 | ulist_free(tmp); | ||
1073 | return -ENOMEM; | 1077 | return -ENOMEM; |
1074 | } | ||
1075 | 1078 | ||
1076 | ret = find_parent_nodes(trans, fs_info, bytenr, | 1079 | ret = find_parent_nodes(trans, fs_info, bytenr, |
1077 | time_seq, *leafs, tmp, extent_item_pos); | 1080 | time_seq, *leafs, NULL, extent_item_pos); |
1078 | ulist_free(tmp); | ||
1079 | |||
1080 | if (ret < 0 && ret != -ENOENT) { | 1081 | if (ret < 0 && ret != -ENOENT) { |
1081 | free_leaf_list(*leafs); | 1082 | free_leaf_list(*leafs); |
1082 | return ret; | 1083 | return ret; |
@@ -1333,38 +1334,13 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, | |||
1333 | if (ret < 0) | 1334 | if (ret < 0) |
1334 | return ret; | 1335 | return ret; |
1335 | 1336 | ||
1336 | while (1) { | 1337 | ret = btrfs_previous_extent_item(fs_info->extent_root, path, 0); |
1337 | u32 nritems; | 1338 | if (ret) { |
1338 | if (path->slots[0] == 0) { | 1339 | if (ret > 0) |
1339 | btrfs_set_path_blocking(path); | 1340 | ret = -ENOENT; |
1340 | ret = btrfs_prev_leaf(fs_info->extent_root, path); | 1341 | return ret; |
1341 | if (ret != 0) { | ||
1342 | if (ret > 0) { | ||
1343 | pr_debug("logical %llu is not within " | ||
1344 | "any extent\n", logical); | ||
1345 | ret = -ENOENT; | ||
1346 | } | ||
1347 | return ret; | ||
1348 | } | ||
1349 | } else { | ||
1350 | path->slots[0]--; | ||
1351 | } | ||
1352 | nritems = btrfs_header_nritems(path->nodes[0]); | ||
1353 | if (nritems == 0) { | ||
1354 | pr_debug("logical %llu is not within any extent\n", | ||
1355 | logical); | ||
1356 | return -ENOENT; | ||
1357 | } | ||
1358 | if (path->slots[0] == nritems) | ||
1359 | path->slots[0]--; | ||
1360 | |||
1361 | btrfs_item_key_to_cpu(path->nodes[0], found_key, | ||
1362 | path->slots[0]); | ||
1363 | if (found_key->type == BTRFS_EXTENT_ITEM_KEY || | ||
1364 | found_key->type == BTRFS_METADATA_ITEM_KEY) | ||
1365 | break; | ||
1366 | } | 1342 | } |
1367 | 1343 | btrfs_item_key_to_cpu(path->nodes[0], found_key, path->slots[0]); | |
1368 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) | 1344 | if (found_key->type == BTRFS_METADATA_ITEM_KEY) |
1369 | size = fs_info->extent_root->leafsize; | 1345 | size = fs_info->extent_root->leafsize; |
1370 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) | 1346 | else if (found_key->type == BTRFS_EXTENT_ITEM_KEY) |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 8fed2125689e..c9a24444ec9a 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -109,14 +109,17 @@ struct btrfs_inode { | |||
109 | u64 last_trans; | 109 | u64 last_trans; |
110 | 110 | ||
111 | /* | 111 | /* |
112 | * log transid when this inode was last modified | 112 | * transid that last logged this inode |
113 | */ | 113 | */ |
114 | u64 last_sub_trans; | 114 | u64 logged_trans; |
115 | 115 | ||
116 | /* | 116 | /* |
117 | * transid that last logged this inode | 117 | * log transid when this inode was last modified |
118 | */ | 118 | */ |
119 | u64 logged_trans; | 119 | int last_sub_trans; |
120 | |||
121 | /* a local copy of root's last_log_commit */ | ||
122 | int last_log_commit; | ||
120 | 123 | ||
121 | /* total number of bytes pending delalloc, used by stat to calc the | 124 | /* total number of bytes pending delalloc, used by stat to calc the |
122 | * real block usage of the file | 125 | * real block usage of the file |
@@ -155,9 +158,6 @@ struct btrfs_inode { | |||
155 | /* flags field from the on disk inode */ | 158 | /* flags field from the on disk inode */ |
156 | u32 flags; | 159 | u32 flags; |
157 | 160 | ||
158 | /* a local copy of root's last_log_commit */ | ||
159 | unsigned long last_log_commit; | ||
160 | |||
161 | /* | 161 | /* |
162 | * Counters to keep track of the number of extent item's we may use due | 162 | * Counters to keep track of the number of extent item's we may use due |
163 | * to delalloc and such. outstanding_extents is the number of extent | 163 | * to delalloc and such. outstanding_extents is the number of extent |
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index b01fb6c527e3..d43c544d3b68 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c | |||
@@ -472,7 +472,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, | |||
472 | rcu_read_lock(); | 472 | rcu_read_lock(); |
473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); | 473 | page = radix_tree_lookup(&mapping->page_tree, pg_index); |
474 | rcu_read_unlock(); | 474 | rcu_read_unlock(); |
475 | if (page) { | 475 | if (page && !radix_tree_exceptional_entry(page)) { |
476 | misses++; | 476 | misses++; |
477 | if (misses > 4) | 477 | if (misses > 4) |
478 | break; | 478 | break; |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index cbd3a7d6fa68..88d1b1eedc9c 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -5376,6 +5376,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5376 | int advance_right; | 5376 | int advance_right; |
5377 | u64 left_blockptr; | 5377 | u64 left_blockptr; |
5378 | u64 right_blockptr; | 5378 | u64 right_blockptr; |
5379 | u64 left_gen; | ||
5380 | u64 right_gen; | ||
5379 | u64 left_start_ctransid; | 5381 | u64 left_start_ctransid; |
5380 | u64 right_start_ctransid; | 5382 | u64 right_start_ctransid; |
5381 | u64 ctransid; | 5383 | u64 ctransid; |
@@ -5640,7 +5642,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root, | |||
5640 | right_blockptr = btrfs_node_blockptr( | 5642 | right_blockptr = btrfs_node_blockptr( |
5641 | right_path->nodes[right_level], | 5643 | right_path->nodes[right_level], |
5642 | right_path->slots[right_level]); | 5644 | right_path->slots[right_level]); |
5643 | if (left_blockptr == right_blockptr) { | 5645 | left_gen = btrfs_node_ptr_generation( |
5646 | left_path->nodes[left_level], | ||
5647 | left_path->slots[left_level]); | ||
5648 | right_gen = btrfs_node_ptr_generation( | ||
5649 | right_path->nodes[right_level], | ||
5650 | right_path->slots[right_level]); | ||
5651 | if (left_blockptr == right_blockptr && | ||
5652 | left_gen == right_gen) { | ||
5644 | /* | 5653 | /* |
5645 | * As we're on a shared block, don't | 5654 | * As we're on a shared block, don't |
5646 | * allow to go deeper. | 5655 | * allow to go deeper. |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2c1a42ca519f..bc96c03dd259 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -351,6 +351,7 @@ static inline unsigned long btrfs_chunk_item_size(int num_stripes) | |||
351 | #define BTRFS_FS_STATE_ERROR 0 | 351 | #define BTRFS_FS_STATE_ERROR 0 |
352 | #define BTRFS_FS_STATE_REMOUNTING 1 | 352 | #define BTRFS_FS_STATE_REMOUNTING 1 |
353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 | 353 | #define BTRFS_FS_STATE_TRANS_ABORTED 2 |
354 | #define BTRFS_FS_STATE_DEV_REPLACING 3 | ||
354 | 355 | ||
355 | /* Super block flags */ | 356 | /* Super block flags */ |
356 | /* Errors detected */ | 357 | /* Errors detected */ |
@@ -1489,6 +1490,7 @@ struct btrfs_fs_info { | |||
1489 | */ | 1490 | */ |
1490 | struct list_head ordered_roots; | 1491 | struct list_head ordered_roots; |
1491 | 1492 | ||
1493 | struct mutex delalloc_root_mutex; | ||
1492 | spinlock_t delalloc_root_lock; | 1494 | spinlock_t delalloc_root_lock; |
1493 | /* all fs/file tree roots that have delalloc inodes. */ | 1495 | /* all fs/file tree roots that have delalloc inodes. */ |
1494 | struct list_head delalloc_roots; | 1496 | struct list_head delalloc_roots; |
@@ -1503,28 +1505,27 @@ struct btrfs_fs_info { | |||
1503 | * A third pool does submit_bio to avoid deadlocking with the other | 1505 | * A third pool does submit_bio to avoid deadlocking with the other |
1504 | * two | 1506 | * two |
1505 | */ | 1507 | */ |
1506 | struct btrfs_workers generic_worker; | 1508 | struct btrfs_workqueue *workers; |
1507 | struct btrfs_workers workers; | 1509 | struct btrfs_workqueue *delalloc_workers; |
1508 | struct btrfs_workers delalloc_workers; | 1510 | struct btrfs_workqueue *flush_workers; |
1509 | struct btrfs_workers flush_workers; | 1511 | struct btrfs_workqueue *endio_workers; |
1510 | struct btrfs_workers endio_workers; | 1512 | struct btrfs_workqueue *endio_meta_workers; |
1511 | struct btrfs_workers endio_meta_workers; | 1513 | struct btrfs_workqueue *endio_raid56_workers; |
1512 | struct btrfs_workers endio_raid56_workers; | 1514 | struct btrfs_workqueue *rmw_workers; |
1513 | struct btrfs_workers rmw_workers; | 1515 | struct btrfs_workqueue *endio_meta_write_workers; |
1514 | struct btrfs_workers endio_meta_write_workers; | 1516 | struct btrfs_workqueue *endio_write_workers; |
1515 | struct btrfs_workers endio_write_workers; | 1517 | struct btrfs_workqueue *endio_freespace_worker; |
1516 | struct btrfs_workers endio_freespace_worker; | 1518 | struct btrfs_workqueue *submit_workers; |
1517 | struct btrfs_workers submit_workers; | 1519 | struct btrfs_workqueue *caching_workers; |
1518 | struct btrfs_workers caching_workers; | 1520 | struct btrfs_workqueue *readahead_workers; |
1519 | struct btrfs_workers readahead_workers; | ||
1520 | 1521 | ||
1521 | /* | 1522 | /* |
1522 | * fixup workers take dirty pages that didn't properly go through | 1523 | * fixup workers take dirty pages that didn't properly go through |
1523 | * the cow mechanism and make them safe to write. It happens | 1524 | * the cow mechanism and make them safe to write. It happens |
1524 | * for the sys_munmap function call path | 1525 | * for the sys_munmap function call path |
1525 | */ | 1526 | */ |
1526 | struct btrfs_workers fixup_workers; | 1527 | struct btrfs_workqueue *fixup_workers; |
1527 | struct btrfs_workers delayed_workers; | 1528 | struct btrfs_workqueue *delayed_workers; |
1528 | struct task_struct *transaction_kthread; | 1529 | struct task_struct *transaction_kthread; |
1529 | struct task_struct *cleaner_kthread; | 1530 | struct task_struct *cleaner_kthread; |
1530 | int thread_pool_size; | 1531 | int thread_pool_size; |
@@ -1604,9 +1605,9 @@ struct btrfs_fs_info { | |||
1604 | atomic_t scrub_cancel_req; | 1605 | atomic_t scrub_cancel_req; |
1605 | wait_queue_head_t scrub_pause_wait; | 1606 | wait_queue_head_t scrub_pause_wait; |
1606 | int scrub_workers_refcnt; | 1607 | int scrub_workers_refcnt; |
1607 | struct btrfs_workers scrub_workers; | 1608 | struct btrfs_workqueue *scrub_workers; |
1608 | struct btrfs_workers scrub_wr_completion_workers; | 1609 | struct btrfs_workqueue *scrub_wr_completion_workers; |
1609 | struct btrfs_workers scrub_nocow_workers; | 1610 | struct btrfs_workqueue *scrub_nocow_workers; |
1610 | 1611 | ||
1611 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 1612 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
1612 | u32 check_integrity_print_mask; | 1613 | u32 check_integrity_print_mask; |
@@ -1647,7 +1648,7 @@ struct btrfs_fs_info { | |||
1647 | /* qgroup rescan items */ | 1648 | /* qgroup rescan items */ |
1648 | struct mutex qgroup_rescan_lock; /* protects the progress item */ | 1649 | struct mutex qgroup_rescan_lock; /* protects the progress item */ |
1649 | struct btrfs_key qgroup_rescan_progress; | 1650 | struct btrfs_key qgroup_rescan_progress; |
1650 | struct btrfs_workers qgroup_rescan_workers; | 1651 | struct btrfs_workqueue *qgroup_rescan_workers; |
1651 | struct completion qgroup_rescan_completion; | 1652 | struct completion qgroup_rescan_completion; |
1652 | struct btrfs_work qgroup_rescan_work; | 1653 | struct btrfs_work qgroup_rescan_work; |
1653 | 1654 | ||
@@ -1674,10 +1675,18 @@ struct btrfs_fs_info { | |||
1674 | 1675 | ||
1675 | atomic_t mutually_exclusive_operation_running; | 1676 | atomic_t mutually_exclusive_operation_running; |
1676 | 1677 | ||
1678 | struct percpu_counter bio_counter; | ||
1679 | wait_queue_head_t replace_wait; | ||
1680 | |||
1677 | struct semaphore uuid_tree_rescan_sem; | 1681 | struct semaphore uuid_tree_rescan_sem; |
1678 | unsigned int update_uuid_tree_gen:1; | 1682 | unsigned int update_uuid_tree_gen:1; |
1679 | }; | 1683 | }; |
1680 | 1684 | ||
1685 | struct btrfs_subvolume_writers { | ||
1686 | struct percpu_counter counter; | ||
1687 | wait_queue_head_t wait; | ||
1688 | }; | ||
1689 | |||
1681 | /* | 1690 | /* |
1682 | * in ram representation of the tree. extent_root is used for all allocations | 1691 | * in ram representation of the tree. extent_root is used for all allocations |
1683 | * and for the extent tree extent_root root. | 1692 | * and for the extent tree extent_root root. |
@@ -1714,11 +1723,15 @@ struct btrfs_root { | |||
1714 | struct mutex log_mutex; | 1723 | struct mutex log_mutex; |
1715 | wait_queue_head_t log_writer_wait; | 1724 | wait_queue_head_t log_writer_wait; |
1716 | wait_queue_head_t log_commit_wait[2]; | 1725 | wait_queue_head_t log_commit_wait[2]; |
1726 | struct list_head log_ctxs[2]; | ||
1717 | atomic_t log_writers; | 1727 | atomic_t log_writers; |
1718 | atomic_t log_commit[2]; | 1728 | atomic_t log_commit[2]; |
1719 | atomic_t log_batch; | 1729 | atomic_t log_batch; |
1720 | unsigned long log_transid; | 1730 | int log_transid; |
1721 | unsigned long last_log_commit; | 1731 | /* No matter the commit succeeds or not*/ |
1732 | int log_transid_committed; | ||
1733 | /* Just be updated when the commit succeeds. */ | ||
1734 | int last_log_commit; | ||
1722 | pid_t log_start_pid; | 1735 | pid_t log_start_pid; |
1723 | bool log_multiple_pids; | 1736 | bool log_multiple_pids; |
1724 | 1737 | ||
@@ -1793,6 +1806,7 @@ struct btrfs_root { | |||
1793 | spinlock_t root_item_lock; | 1806 | spinlock_t root_item_lock; |
1794 | atomic_t refs; | 1807 | atomic_t refs; |
1795 | 1808 | ||
1809 | struct mutex delalloc_mutex; | ||
1796 | spinlock_t delalloc_lock; | 1810 | spinlock_t delalloc_lock; |
1797 | /* | 1811 | /* |
1798 | * all of the inodes that have delalloc bytes. It is possible for | 1812 | * all of the inodes that have delalloc bytes. It is possible for |
@@ -1802,6 +1816,8 @@ struct btrfs_root { | |||
1802 | struct list_head delalloc_inodes; | 1816 | struct list_head delalloc_inodes; |
1803 | struct list_head delalloc_root; | 1817 | struct list_head delalloc_root; |
1804 | u64 nr_delalloc_inodes; | 1818 | u64 nr_delalloc_inodes; |
1819 | |||
1820 | struct mutex ordered_extent_mutex; | ||
1805 | /* | 1821 | /* |
1806 | * this is used by the balancing code to wait for all the pending | 1822 | * this is used by the balancing code to wait for all the pending |
1807 | * ordered extents | 1823 | * ordered extents |
@@ -1822,6 +1838,8 @@ struct btrfs_root { | |||
1822 | * manipulation with the read-only status via SUBVOL_SETFLAGS | 1838 | * manipulation with the read-only status via SUBVOL_SETFLAGS |
1823 | */ | 1839 | */ |
1824 | int send_in_progress; | 1840 | int send_in_progress; |
1841 | struct btrfs_subvolume_writers *subv_writers; | ||
1842 | atomic_t will_be_snapshoted; | ||
1825 | }; | 1843 | }; |
1826 | 1844 | ||
1827 | struct btrfs_ioctl_defrag_range_args { | 1845 | struct btrfs_ioctl_defrag_range_args { |
@@ -3346,6 +3364,9 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info); | |||
3346 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, | 3364 | int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans, |
3347 | struct btrfs_fs_info *fs_info); | 3365 | struct btrfs_fs_info *fs_info); |
3348 | int __get_raid_index(u64 flags); | 3366 | int __get_raid_index(u64 flags); |
3367 | |||
3368 | int btrfs_start_nocow_write(struct btrfs_root *root); | ||
3369 | void btrfs_end_nocow_write(struct btrfs_root *root); | ||
3349 | /* ctree.c */ | 3370 | /* ctree.c */ |
3350 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 3371 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
3351 | int level, int *slot); | 3372 | int level, int *slot); |
@@ -3723,7 +3744,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
3723 | u32 min_type); | 3744 | u32 min_type); |
3724 | 3745 | ||
3725 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 3746 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
3726 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput); | 3747 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
3748 | int nr); | ||
3727 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 3749 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
3728 | struct extent_state **cached_state); | 3750 | struct extent_state **cached_state); |
3729 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, | 3751 | int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, |
@@ -4005,6 +4027,11 @@ int btrfs_scrub_cancel_dev(struct btrfs_fs_info *info, | |||
4005 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | 4027 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, |
4006 | struct btrfs_scrub_progress *progress); | 4028 | struct btrfs_scrub_progress *progress); |
4007 | 4029 | ||
4030 | /* dev-replace.c */ | ||
4031 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info); | ||
4032 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info); | ||
4033 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info); | ||
4034 | |||
4008 | /* reada.c */ | 4035 | /* reada.c */ |
4009 | struct reada_control { | 4036 | struct reada_control { |
4010 | struct btrfs_root *root; /* tree to prefetch */ | 4037 | struct btrfs_root *root; /* tree to prefetch */ |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 451b00c86f6c..33e561a84013 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
@@ -1392,11 +1392,11 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root, | |||
1392 | return -ENOMEM; | 1392 | return -ENOMEM; |
1393 | 1393 | ||
1394 | async_work->delayed_root = delayed_root; | 1394 | async_work->delayed_root = delayed_root; |
1395 | async_work->work.func = btrfs_async_run_delayed_root; | 1395 | btrfs_init_work(&async_work->work, btrfs_async_run_delayed_root, |
1396 | async_work->work.flags = 0; | 1396 | NULL, NULL); |
1397 | async_work->nr = nr; | 1397 | async_work->nr = nr; |
1398 | 1398 | ||
1399 | btrfs_queue_worker(&root->fs_info->delayed_workers, &async_work->work); | 1399 | btrfs_queue_work(root->fs_info->delayed_workers, &async_work->work); |
1400 | return 0; | 1400 | return 0; |
1401 | } | 1401 | } |
1402 | 1402 | ||
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f3bff89eecf0..31299646024d 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -199,44 +199,31 @@ static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, | |||
199 | */ | 199 | */ |
200 | static struct btrfs_delayed_ref_head * | 200 | static struct btrfs_delayed_ref_head * |
201 | find_ref_head(struct rb_root *root, u64 bytenr, | 201 | find_ref_head(struct rb_root *root, u64 bytenr, |
202 | struct btrfs_delayed_ref_head **last, int return_bigger) | 202 | int return_bigger) |
203 | { | 203 | { |
204 | struct rb_node *n; | 204 | struct rb_node *n; |
205 | struct btrfs_delayed_ref_head *entry; | 205 | struct btrfs_delayed_ref_head *entry; |
206 | int cmp = 0; | ||
207 | 206 | ||
208 | again: | ||
209 | n = root->rb_node; | 207 | n = root->rb_node; |
210 | entry = NULL; | 208 | entry = NULL; |
211 | while (n) { | 209 | while (n) { |
212 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); | 210 | entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); |
213 | if (last) | ||
214 | *last = entry; | ||
215 | 211 | ||
216 | if (bytenr < entry->node.bytenr) | 212 | if (bytenr < entry->node.bytenr) |
217 | cmp = -1; | ||
218 | else if (bytenr > entry->node.bytenr) | ||
219 | cmp = 1; | ||
220 | else | ||
221 | cmp = 0; | ||
222 | |||
223 | if (cmp < 0) | ||
224 | n = n->rb_left; | 213 | n = n->rb_left; |
225 | else if (cmp > 0) | 214 | else if (bytenr > entry->node.bytenr) |
226 | n = n->rb_right; | 215 | n = n->rb_right; |
227 | else | 216 | else |
228 | return entry; | 217 | return entry; |
229 | } | 218 | } |
230 | if (entry && return_bigger) { | 219 | if (entry && return_bigger) { |
231 | if (cmp > 0) { | 220 | if (bytenr > entry->node.bytenr) { |
232 | n = rb_next(&entry->href_node); | 221 | n = rb_next(&entry->href_node); |
233 | if (!n) | 222 | if (!n) |
234 | n = rb_first(root); | 223 | n = rb_first(root); |
235 | entry = rb_entry(n, struct btrfs_delayed_ref_head, | 224 | entry = rb_entry(n, struct btrfs_delayed_ref_head, |
236 | href_node); | 225 | href_node); |
237 | bytenr = entry->node.bytenr; | 226 | return entry; |
238 | return_bigger = 0; | ||
239 | goto again; | ||
240 | } | 227 | } |
241 | return entry; | 228 | return entry; |
242 | } | 229 | } |
@@ -415,12 +402,12 @@ btrfs_select_ref_head(struct btrfs_trans_handle *trans) | |||
415 | 402 | ||
416 | again: | 403 | again: |
417 | start = delayed_refs->run_delayed_start; | 404 | start = delayed_refs->run_delayed_start; |
418 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 405 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
419 | if (!head && !loop) { | 406 | if (!head && !loop) { |
420 | delayed_refs->run_delayed_start = 0; | 407 | delayed_refs->run_delayed_start = 0; |
421 | start = 0; | 408 | start = 0; |
422 | loop = true; | 409 | loop = true; |
423 | head = find_ref_head(&delayed_refs->href_root, start, NULL, 1); | 410 | head = find_ref_head(&delayed_refs->href_root, start, 1); |
424 | if (!head) | 411 | if (!head) |
425 | return NULL; | 412 | return NULL; |
426 | } else if (!head && loop) { | 413 | } else if (!head && loop) { |
@@ -508,6 +495,7 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
508 | ref = btrfs_delayed_node_to_head(update); | 495 | ref = btrfs_delayed_node_to_head(update); |
509 | BUG_ON(existing_ref->is_data != ref->is_data); | 496 | BUG_ON(existing_ref->is_data != ref->is_data); |
510 | 497 | ||
498 | spin_lock(&existing_ref->lock); | ||
511 | if (ref->must_insert_reserved) { | 499 | if (ref->must_insert_reserved) { |
512 | /* if the extent was freed and then | 500 | /* if the extent was freed and then |
513 | * reallocated before the delayed ref | 501 | * reallocated before the delayed ref |
@@ -549,7 +537,6 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | |||
549 | * only need the lock for this case cause we could be processing it | 537 | * only need the lock for this case cause we could be processing it |
550 | * currently, for refs we just added we know we're a-ok. | 538 | * currently, for refs we just added we know we're a-ok. |
551 | */ | 539 | */ |
552 | spin_lock(&existing_ref->lock); | ||
553 | existing->ref_mod += update->ref_mod; | 540 | existing->ref_mod += update->ref_mod; |
554 | spin_unlock(&existing_ref->lock); | 541 | spin_unlock(&existing_ref->lock); |
555 | } | 542 | } |
@@ -898,7 +885,7 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | |||
898 | struct btrfs_delayed_ref_root *delayed_refs; | 885 | struct btrfs_delayed_ref_root *delayed_refs; |
899 | 886 | ||
900 | delayed_refs = &trans->transaction->delayed_refs; | 887 | delayed_refs = &trans->transaction->delayed_refs; |
901 | return find_ref_head(&delayed_refs->href_root, bytenr, NULL, 0); | 888 | return find_ref_head(&delayed_refs->href_root, bytenr, 0); |
902 | } | 889 | } |
903 | 890 | ||
904 | void btrfs_delayed_ref_exit(void) | 891 | void btrfs_delayed_ref_exit(void) |
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 564c92638b20..9f2290509aca 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c | |||
@@ -431,6 +431,35 @@ leave_no_lock: | |||
431 | return ret; | 431 | return ret; |
432 | } | 432 | } |
433 | 433 | ||
434 | /* | ||
435 | * blocked until all flighting bios are finished. | ||
436 | */ | ||
437 | static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) | ||
438 | { | ||
439 | s64 writers; | ||
440 | DEFINE_WAIT(wait); | ||
441 | |||
442 | set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
443 | do { | ||
444 | prepare_to_wait(&fs_info->replace_wait, &wait, | ||
445 | TASK_UNINTERRUPTIBLE); | ||
446 | writers = percpu_counter_sum(&fs_info->bio_counter); | ||
447 | if (writers) | ||
448 | schedule(); | ||
449 | finish_wait(&fs_info->replace_wait, &wait); | ||
450 | } while (writers); | ||
451 | } | ||
452 | |||
453 | /* | ||
454 | * we have removed target device, it is safe to allow new bios request. | ||
455 | */ | ||
456 | static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) | ||
457 | { | ||
458 | clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); | ||
459 | if (waitqueue_active(&fs_info->replace_wait)) | ||
460 | wake_up(&fs_info->replace_wait); | ||
461 | } | ||
462 | |||
434 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | 463 | static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, |
435 | int scrub_ret) | 464 | int scrub_ret) |
436 | { | 465 | { |
@@ -458,17 +487,11 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
458 | src_device = dev_replace->srcdev; | 487 | src_device = dev_replace->srcdev; |
459 | btrfs_dev_replace_unlock(dev_replace); | 488 | btrfs_dev_replace_unlock(dev_replace); |
460 | 489 | ||
461 | /* replace old device with new one in mapping tree */ | ||
462 | if (!scrub_ret) | ||
463 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
464 | src_device, | ||
465 | tgt_device); | ||
466 | |||
467 | /* | 490 | /* |
468 | * flush all outstanding I/O and inode extent mappings before the | 491 | * flush all outstanding I/O and inode extent mappings before the |
469 | * copy operation is declared as being finished | 492 | * copy operation is declared as being finished |
470 | */ | 493 | */ |
471 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 494 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
472 | if (ret) { | 495 | if (ret) { |
473 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 496 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
474 | return ret; | 497 | return ret; |
@@ -484,6 +507,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
484 | WARN_ON(ret); | 507 | WARN_ON(ret); |
485 | 508 | ||
486 | /* keep away write_all_supers() during the finishing procedure */ | 509 | /* keep away write_all_supers() during the finishing procedure */ |
510 | mutex_lock(&root->fs_info->chunk_mutex); | ||
487 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | 511 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); |
488 | btrfs_dev_replace_lock(dev_replace); | 512 | btrfs_dev_replace_lock(dev_replace); |
489 | dev_replace->replace_state = | 513 | dev_replace->replace_state = |
@@ -494,7 +518,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
494 | dev_replace->time_stopped = get_seconds(); | 518 | dev_replace->time_stopped = get_seconds(); |
495 | dev_replace->item_needs_writeback = 1; | 519 | dev_replace->item_needs_writeback = 1; |
496 | 520 | ||
497 | if (scrub_ret) { | 521 | /* replace old device with new one in mapping tree */ |
522 | if (!scrub_ret) { | ||
523 | btrfs_dev_replace_update_device_in_mapping_tree(fs_info, | ||
524 | src_device, | ||
525 | tgt_device); | ||
526 | } else { | ||
498 | printk_in_rcu(KERN_ERR | 527 | printk_in_rcu(KERN_ERR |
499 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", | 528 | "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", |
500 | src_device->missing ? "<missing disk>" : | 529 | src_device->missing ? "<missing disk>" : |
@@ -503,6 +532,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
503 | rcu_str_deref(tgt_device->name), scrub_ret); | 532 | rcu_str_deref(tgt_device->name), scrub_ret); |
504 | btrfs_dev_replace_unlock(dev_replace); | 533 | btrfs_dev_replace_unlock(dev_replace); |
505 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 534 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
535 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
506 | if (tgt_device) | 536 | if (tgt_device) |
507 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); | 537 | btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); |
508 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); | 538 | mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); |
@@ -532,8 +562,12 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
532 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; | 562 | fs_info->fs_devices->latest_bdev = tgt_device->bdev; |
533 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); | 563 | list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); |
534 | 564 | ||
565 | btrfs_rm_dev_replace_blocked(fs_info); | ||
566 | |||
535 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); | 567 | btrfs_rm_dev_replace_srcdev(fs_info, src_device); |
536 | 568 | ||
569 | btrfs_rm_dev_replace_unblocked(fs_info); | ||
570 | |||
537 | /* | 571 | /* |
538 | * this is again a consistent state where no dev_replace procedure | 572 | * this is again a consistent state where no dev_replace procedure |
539 | * is running, the target device is part of the filesystem, the | 573 | * is running, the target device is part of the filesystem, the |
@@ -543,6 +577,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, | |||
543 | */ | 577 | */ |
544 | btrfs_dev_replace_unlock(dev_replace); | 578 | btrfs_dev_replace_unlock(dev_replace); |
545 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 579 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
580 | mutex_unlock(&root->fs_info->chunk_mutex); | ||
546 | 581 | ||
547 | /* write back the superblocks */ | 582 | /* write back the superblocks */ |
548 | trans = btrfs_start_transaction(root, 0); | 583 | trans = btrfs_start_transaction(root, 0); |
@@ -862,3 +897,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) | |||
862 | mutex_unlock(&dev_replace->lock_management_lock); | 897 | mutex_unlock(&dev_replace->lock_management_lock); |
863 | } | 898 | } |
864 | } | 899 | } |
900 | |||
901 | void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) | ||
902 | { | ||
903 | percpu_counter_inc(&fs_info->bio_counter); | ||
904 | } | ||
905 | |||
906 | void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) | ||
907 | { | ||
908 | percpu_counter_dec(&fs_info->bio_counter); | ||
909 | |||
910 | if (waitqueue_active(&fs_info->replace_wait)) | ||
911 | wake_up(&fs_info->replace_wait); | ||
912 | } | ||
913 | |||
914 | void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) | ||
915 | { | ||
916 | DEFINE_WAIT(wait); | ||
917 | again: | ||
918 | percpu_counter_inc(&fs_info->bio_counter); | ||
919 | if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { | ||
920 | btrfs_bio_counter_dec(fs_info); | ||
921 | wait_event(fs_info->replace_wait, | ||
922 | !test_bit(BTRFS_FS_STATE_DEV_REPLACING, | ||
923 | &fs_info->fs_state)); | ||
924 | goto again; | ||
925 | } | ||
926 | |||
927 | } | ||
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 81ea55314b1f..bd0f752b797b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -678,32 +678,31 @@ static void end_workqueue_bio(struct bio *bio, int err) | |||
678 | 678 | ||
679 | fs_info = end_io_wq->info; | 679 | fs_info = end_io_wq->info; |
680 | end_io_wq->error = err; | 680 | end_io_wq->error = err; |
681 | end_io_wq->work.func = end_workqueue_fn; | 681 | btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL); |
682 | end_io_wq->work.flags = 0; | ||
683 | 682 | ||
684 | if (bio->bi_rw & REQ_WRITE) { | 683 | if (bio->bi_rw & REQ_WRITE) { |
685 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) | 684 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) |
686 | btrfs_queue_worker(&fs_info->endio_meta_write_workers, | 685 | btrfs_queue_work(fs_info->endio_meta_write_workers, |
687 | &end_io_wq->work); | 686 | &end_io_wq->work); |
688 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) | 687 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) |
689 | btrfs_queue_worker(&fs_info->endio_freespace_worker, | 688 | btrfs_queue_work(fs_info->endio_freespace_worker, |
690 | &end_io_wq->work); | 689 | &end_io_wq->work); |
691 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 690 | else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
692 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 691 | btrfs_queue_work(fs_info->endio_raid56_workers, |
693 | &end_io_wq->work); | 692 | &end_io_wq->work); |
694 | else | 693 | else |
695 | btrfs_queue_worker(&fs_info->endio_write_workers, | 694 | btrfs_queue_work(fs_info->endio_write_workers, |
696 | &end_io_wq->work); | 695 | &end_io_wq->work); |
697 | } else { | 696 | } else { |
698 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) | 697 | if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) |
699 | btrfs_queue_worker(&fs_info->endio_raid56_workers, | 698 | btrfs_queue_work(fs_info->endio_raid56_workers, |
700 | &end_io_wq->work); | 699 | &end_io_wq->work); |
701 | else if (end_io_wq->metadata) | 700 | else if (end_io_wq->metadata) |
702 | btrfs_queue_worker(&fs_info->endio_meta_workers, | 701 | btrfs_queue_work(fs_info->endio_meta_workers, |
703 | &end_io_wq->work); | 702 | &end_io_wq->work); |
704 | else | 703 | else |
705 | btrfs_queue_worker(&fs_info->endio_workers, | 704 | btrfs_queue_work(fs_info->endio_workers, |
706 | &end_io_wq->work); | 705 | &end_io_wq->work); |
707 | } | 706 | } |
708 | } | 707 | } |
709 | 708 | ||
@@ -738,7 +737,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
738 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | 737 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) |
739 | { | 738 | { |
740 | unsigned long limit = min_t(unsigned long, | 739 | unsigned long limit = min_t(unsigned long, |
741 | info->workers.max_workers, | 740 | info->thread_pool_size, |
742 | info->fs_devices->open_devices); | 741 | info->fs_devices->open_devices); |
743 | return 256 * limit; | 742 | return 256 * limit; |
744 | } | 743 | } |
@@ -811,11 +810,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
811 | async->submit_bio_start = submit_bio_start; | 810 | async->submit_bio_start = submit_bio_start; |
812 | async->submit_bio_done = submit_bio_done; | 811 | async->submit_bio_done = submit_bio_done; |
813 | 812 | ||
814 | async->work.func = run_one_async_start; | 813 | btrfs_init_work(&async->work, run_one_async_start, |
815 | async->work.ordered_func = run_one_async_done; | 814 | run_one_async_done, run_one_async_free); |
816 | async->work.ordered_free = run_one_async_free; | ||
817 | 815 | ||
818 | async->work.flags = 0; | ||
819 | async->bio_flags = bio_flags; | 816 | async->bio_flags = bio_flags; |
820 | async->bio_offset = bio_offset; | 817 | async->bio_offset = bio_offset; |
821 | 818 | ||
@@ -824,9 +821,9 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
824 | atomic_inc(&fs_info->nr_async_submits); | 821 | atomic_inc(&fs_info->nr_async_submits); |
825 | 822 | ||
826 | if (rw & REQ_SYNC) | 823 | if (rw & REQ_SYNC) |
827 | btrfs_set_work_high_prio(&async->work); | 824 | btrfs_set_work_high_priority(&async->work); |
828 | 825 | ||
829 | btrfs_queue_worker(&fs_info->workers, &async->work); | 826 | btrfs_queue_work(fs_info->workers, &async->work); |
830 | 827 | ||
831 | while (atomic_read(&fs_info->async_submit_draining) && | 828 | while (atomic_read(&fs_info->async_submit_draining) && |
832 | atomic_read(&fs_info->nr_async_submits)) { | 829 | atomic_read(&fs_info->nr_async_submits)) { |
@@ -1149,6 +1146,32 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1149 | } | 1146 | } |
1150 | } | 1147 | } |
1151 | 1148 | ||
1149 | static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void) | ||
1150 | { | ||
1151 | struct btrfs_subvolume_writers *writers; | ||
1152 | int ret; | ||
1153 | |||
1154 | writers = kmalloc(sizeof(*writers), GFP_NOFS); | ||
1155 | if (!writers) | ||
1156 | return ERR_PTR(-ENOMEM); | ||
1157 | |||
1158 | ret = percpu_counter_init(&writers->counter, 0); | ||
1159 | if (ret < 0) { | ||
1160 | kfree(writers); | ||
1161 | return ERR_PTR(ret); | ||
1162 | } | ||
1163 | |||
1164 | init_waitqueue_head(&writers->wait); | ||
1165 | return writers; | ||
1166 | } | ||
1167 | |||
1168 | static void | ||
1169 | btrfs_free_subvolume_writers(struct btrfs_subvolume_writers *writers) | ||
1170 | { | ||
1171 | percpu_counter_destroy(&writers->counter); | ||
1172 | kfree(writers); | ||
1173 | } | ||
1174 | |||
1152 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1175 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
1153 | u32 stripesize, struct btrfs_root *root, | 1176 | u32 stripesize, struct btrfs_root *root, |
1154 | struct btrfs_fs_info *fs_info, | 1177 | struct btrfs_fs_info *fs_info, |
@@ -1194,16 +1217,22 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1194 | spin_lock_init(&root->log_extents_lock[1]); | 1217 | spin_lock_init(&root->log_extents_lock[1]); |
1195 | mutex_init(&root->objectid_mutex); | 1218 | mutex_init(&root->objectid_mutex); |
1196 | mutex_init(&root->log_mutex); | 1219 | mutex_init(&root->log_mutex); |
1220 | mutex_init(&root->ordered_extent_mutex); | ||
1221 | mutex_init(&root->delalloc_mutex); | ||
1197 | init_waitqueue_head(&root->log_writer_wait); | 1222 | init_waitqueue_head(&root->log_writer_wait); |
1198 | init_waitqueue_head(&root->log_commit_wait[0]); | 1223 | init_waitqueue_head(&root->log_commit_wait[0]); |
1199 | init_waitqueue_head(&root->log_commit_wait[1]); | 1224 | init_waitqueue_head(&root->log_commit_wait[1]); |
1225 | INIT_LIST_HEAD(&root->log_ctxs[0]); | ||
1226 | INIT_LIST_HEAD(&root->log_ctxs[1]); | ||
1200 | atomic_set(&root->log_commit[0], 0); | 1227 | atomic_set(&root->log_commit[0], 0); |
1201 | atomic_set(&root->log_commit[1], 0); | 1228 | atomic_set(&root->log_commit[1], 0); |
1202 | atomic_set(&root->log_writers, 0); | 1229 | atomic_set(&root->log_writers, 0); |
1203 | atomic_set(&root->log_batch, 0); | 1230 | atomic_set(&root->log_batch, 0); |
1204 | atomic_set(&root->orphan_inodes, 0); | 1231 | atomic_set(&root->orphan_inodes, 0); |
1205 | atomic_set(&root->refs, 1); | 1232 | atomic_set(&root->refs, 1); |
1233 | atomic_set(&root->will_be_snapshoted, 0); | ||
1206 | root->log_transid = 0; | 1234 | root->log_transid = 0; |
1235 | root->log_transid_committed = -1; | ||
1207 | root->last_log_commit = 0; | 1236 | root->last_log_commit = 0; |
1208 | if (fs_info) | 1237 | if (fs_info) |
1209 | extent_io_tree_init(&root->dirty_log_pages, | 1238 | extent_io_tree_init(&root->dirty_log_pages, |
@@ -1417,6 +1446,7 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | |||
1417 | WARN_ON(root->log_root); | 1446 | WARN_ON(root->log_root); |
1418 | root->log_root = log_root; | 1447 | root->log_root = log_root; |
1419 | root->log_transid = 0; | 1448 | root->log_transid = 0; |
1449 | root->log_transid_committed = -1; | ||
1420 | root->last_log_commit = 0; | 1450 | root->last_log_commit = 0; |
1421 | return 0; | 1451 | return 0; |
1422 | } | 1452 | } |
@@ -1498,6 +1528,7 @@ struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, | |||
1498 | int btrfs_init_fs_root(struct btrfs_root *root) | 1528 | int btrfs_init_fs_root(struct btrfs_root *root) |
1499 | { | 1529 | { |
1500 | int ret; | 1530 | int ret; |
1531 | struct btrfs_subvolume_writers *writers; | ||
1501 | 1532 | ||
1502 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); | 1533 | root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); |
1503 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), | 1534 | root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), |
@@ -1507,6 +1538,13 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1507 | goto fail; | 1538 | goto fail; |
1508 | } | 1539 | } |
1509 | 1540 | ||
1541 | writers = btrfs_alloc_subvolume_writers(); | ||
1542 | if (IS_ERR(writers)) { | ||
1543 | ret = PTR_ERR(writers); | ||
1544 | goto fail; | ||
1545 | } | ||
1546 | root->subv_writers = writers; | ||
1547 | |||
1510 | btrfs_init_free_ino_ctl(root); | 1548 | btrfs_init_free_ino_ctl(root); |
1511 | mutex_init(&root->fs_commit_mutex); | 1549 | mutex_init(&root->fs_commit_mutex); |
1512 | spin_lock_init(&root->cache_lock); | 1550 | spin_lock_init(&root->cache_lock); |
@@ -1514,8 +1552,11 @@ int btrfs_init_fs_root(struct btrfs_root *root) | |||
1514 | 1552 | ||
1515 | ret = get_anon_bdev(&root->anon_dev); | 1553 | ret = get_anon_bdev(&root->anon_dev); |
1516 | if (ret) | 1554 | if (ret) |
1517 | goto fail; | 1555 | goto free_writers; |
1518 | return 0; | 1556 | return 0; |
1557 | |||
1558 | free_writers: | ||
1559 | btrfs_free_subvolume_writers(root->subv_writers); | ||
1519 | fail: | 1560 | fail: |
1520 | kfree(root->free_ino_ctl); | 1561 | kfree(root->free_ino_ctl); |
1521 | kfree(root->free_ino_pinned); | 1562 | kfree(root->free_ino_pinned); |
@@ -1990,23 +2031,22 @@ static noinline int next_root_backup(struct btrfs_fs_info *info, | |||
1990 | /* helper to cleanup workers */ | 2031 | /* helper to cleanup workers */ |
1991 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) | 2032 | static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) |
1992 | { | 2033 | { |
1993 | btrfs_stop_workers(&fs_info->generic_worker); | 2034 | btrfs_destroy_workqueue(fs_info->fixup_workers); |
1994 | btrfs_stop_workers(&fs_info->fixup_workers); | 2035 | btrfs_destroy_workqueue(fs_info->delalloc_workers); |
1995 | btrfs_stop_workers(&fs_info->delalloc_workers); | 2036 | btrfs_destroy_workqueue(fs_info->workers); |
1996 | btrfs_stop_workers(&fs_info->workers); | 2037 | btrfs_destroy_workqueue(fs_info->endio_workers); |
1997 | btrfs_stop_workers(&fs_info->endio_workers); | 2038 | btrfs_destroy_workqueue(fs_info->endio_meta_workers); |
1998 | btrfs_stop_workers(&fs_info->endio_meta_workers); | 2039 | btrfs_destroy_workqueue(fs_info->endio_raid56_workers); |
1999 | btrfs_stop_workers(&fs_info->endio_raid56_workers); | 2040 | btrfs_destroy_workqueue(fs_info->rmw_workers); |
2000 | btrfs_stop_workers(&fs_info->rmw_workers); | 2041 | btrfs_destroy_workqueue(fs_info->endio_meta_write_workers); |
2001 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2042 | btrfs_destroy_workqueue(fs_info->endio_write_workers); |
2002 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2043 | btrfs_destroy_workqueue(fs_info->endio_freespace_worker); |
2003 | btrfs_stop_workers(&fs_info->endio_freespace_worker); | 2044 | btrfs_destroy_workqueue(fs_info->submit_workers); |
2004 | btrfs_stop_workers(&fs_info->submit_workers); | 2045 | btrfs_destroy_workqueue(fs_info->delayed_workers); |
2005 | btrfs_stop_workers(&fs_info->delayed_workers); | 2046 | btrfs_destroy_workqueue(fs_info->caching_workers); |
2006 | btrfs_stop_workers(&fs_info->caching_workers); | 2047 | btrfs_destroy_workqueue(fs_info->readahead_workers); |
2007 | btrfs_stop_workers(&fs_info->readahead_workers); | 2048 | btrfs_destroy_workqueue(fs_info->flush_workers); |
2008 | btrfs_stop_workers(&fs_info->flush_workers); | 2049 | btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); |
2009 | btrfs_stop_workers(&fs_info->qgroup_rescan_workers); | ||
2010 | } | 2050 | } |
2011 | 2051 | ||
2012 | static void free_root_extent_buffers(struct btrfs_root *root) | 2052 | static void free_root_extent_buffers(struct btrfs_root *root) |
@@ -2097,6 +2137,8 @@ int open_ctree(struct super_block *sb, | |||
2097 | int err = -EINVAL; | 2137 | int err = -EINVAL; |
2098 | int num_backups_tried = 0; | 2138 | int num_backups_tried = 0; |
2099 | int backup_index = 0; | 2139 | int backup_index = 0; |
2140 | int max_active; | ||
2141 | int flags = WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND; | ||
2100 | bool create_uuid_tree; | 2142 | bool create_uuid_tree; |
2101 | bool check_uuid_tree; | 2143 | bool check_uuid_tree; |
2102 | 2144 | ||
@@ -2133,10 +2175,16 @@ int open_ctree(struct super_block *sb, | |||
2133 | goto fail_dirty_metadata_bytes; | 2175 | goto fail_dirty_metadata_bytes; |
2134 | } | 2176 | } |
2135 | 2177 | ||
2178 | ret = percpu_counter_init(&fs_info->bio_counter, 0); | ||
2179 | if (ret) { | ||
2180 | err = ret; | ||
2181 | goto fail_delalloc_bytes; | ||
2182 | } | ||
2183 | |||
2136 | fs_info->btree_inode = new_inode(sb); | 2184 | fs_info->btree_inode = new_inode(sb); |
2137 | if (!fs_info->btree_inode) { | 2185 | if (!fs_info->btree_inode) { |
2138 | err = -ENOMEM; | 2186 | err = -ENOMEM; |
2139 | goto fail_delalloc_bytes; | 2187 | goto fail_bio_counter; |
2140 | } | 2188 | } |
2141 | 2189 | ||
2142 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); | 2190 | mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); |
@@ -2159,6 +2207,7 @@ int open_ctree(struct super_block *sb, | |||
2159 | spin_lock_init(&fs_info->buffer_lock); | 2207 | spin_lock_init(&fs_info->buffer_lock); |
2160 | rwlock_init(&fs_info->tree_mod_log_lock); | 2208 | rwlock_init(&fs_info->tree_mod_log_lock); |
2161 | mutex_init(&fs_info->reloc_mutex); | 2209 | mutex_init(&fs_info->reloc_mutex); |
2210 | mutex_init(&fs_info->delalloc_root_mutex); | ||
2162 | seqlock_init(&fs_info->profiles_lock); | 2211 | seqlock_init(&fs_info->profiles_lock); |
2163 | 2212 | ||
2164 | init_completion(&fs_info->kobj_unregister); | 2213 | init_completion(&fs_info->kobj_unregister); |
@@ -2211,6 +2260,7 @@ int open_ctree(struct super_block *sb, | |||
2211 | atomic_set(&fs_info->scrub_pause_req, 0); | 2260 | atomic_set(&fs_info->scrub_pause_req, 0); |
2212 | atomic_set(&fs_info->scrubs_paused, 0); | 2261 | atomic_set(&fs_info->scrubs_paused, 0); |
2213 | atomic_set(&fs_info->scrub_cancel_req, 0); | 2262 | atomic_set(&fs_info->scrub_cancel_req, 0); |
2263 | init_waitqueue_head(&fs_info->replace_wait); | ||
2214 | init_waitqueue_head(&fs_info->scrub_pause_wait); | 2264 | init_waitqueue_head(&fs_info->scrub_pause_wait); |
2215 | fs_info->scrub_workers_refcnt = 0; | 2265 | fs_info->scrub_workers_refcnt = 0; |
2216 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY | 2266 | #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |
@@ -2458,104 +2508,68 @@ int open_ctree(struct super_block *sb, | |||
2458 | goto fail_alloc; | 2508 | goto fail_alloc; |
2459 | } | 2509 | } |
2460 | 2510 | ||
2461 | btrfs_init_workers(&fs_info->generic_worker, | 2511 | max_active = fs_info->thread_pool_size; |
2462 | "genwork", 1, NULL); | ||
2463 | |||
2464 | btrfs_init_workers(&fs_info->workers, "worker", | ||
2465 | fs_info->thread_pool_size, | ||
2466 | &fs_info->generic_worker); | ||
2467 | 2512 | ||
2468 | btrfs_init_workers(&fs_info->delalloc_workers, "delalloc", | 2513 | fs_info->workers = |
2469 | fs_info->thread_pool_size, NULL); | 2514 | btrfs_alloc_workqueue("worker", flags | WQ_HIGHPRI, |
2515 | max_active, 16); | ||
2470 | 2516 | ||
2471 | btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc", | 2517 | fs_info->delalloc_workers = |
2472 | fs_info->thread_pool_size, NULL); | 2518 | btrfs_alloc_workqueue("delalloc", flags, max_active, 2); |
2473 | 2519 | ||
2474 | btrfs_init_workers(&fs_info->submit_workers, "submit", | 2520 | fs_info->flush_workers = |
2475 | min_t(u64, fs_devices->num_devices, | 2521 | btrfs_alloc_workqueue("flush_delalloc", flags, max_active, 0); |
2476 | fs_info->thread_pool_size), NULL); | ||
2477 | 2522 | ||
2478 | btrfs_init_workers(&fs_info->caching_workers, "cache", | 2523 | fs_info->caching_workers = |
2479 | fs_info->thread_pool_size, NULL); | 2524 | btrfs_alloc_workqueue("cache", flags, max_active, 0); |
2480 | 2525 | ||
2481 | /* a higher idle thresh on the submit workers makes it much more | 2526 | /* |
2527 | * a higher idle thresh on the submit workers makes it much more | ||
2482 | * likely that bios will be send down in a sane order to the | 2528 | * likely that bios will be send down in a sane order to the |
2483 | * devices | 2529 | * devices |
2484 | */ | 2530 | */ |
2485 | fs_info->submit_workers.idle_thresh = 64; | 2531 | fs_info->submit_workers = |
2486 | 2532 | btrfs_alloc_workqueue("submit", flags, | |
2487 | fs_info->workers.idle_thresh = 16; | 2533 | min_t(u64, fs_devices->num_devices, |
2488 | fs_info->workers.ordered = 1; | 2534 | max_active), 64); |
2489 | 2535 | ||
2490 | fs_info->delalloc_workers.idle_thresh = 2; | 2536 | fs_info->fixup_workers = |
2491 | fs_info->delalloc_workers.ordered = 1; | 2537 | btrfs_alloc_workqueue("fixup", flags, 1, 0); |
2492 | |||
2493 | btrfs_init_workers(&fs_info->fixup_workers, "fixup", 1, | ||
2494 | &fs_info->generic_worker); | ||
2495 | btrfs_init_workers(&fs_info->endio_workers, "endio", | ||
2496 | fs_info->thread_pool_size, | ||
2497 | &fs_info->generic_worker); | ||
2498 | btrfs_init_workers(&fs_info->endio_meta_workers, "endio-meta", | ||
2499 | fs_info->thread_pool_size, | ||
2500 | &fs_info->generic_worker); | ||
2501 | btrfs_init_workers(&fs_info->endio_meta_write_workers, | ||
2502 | "endio-meta-write", fs_info->thread_pool_size, | ||
2503 | &fs_info->generic_worker); | ||
2504 | btrfs_init_workers(&fs_info->endio_raid56_workers, | ||
2505 | "endio-raid56", fs_info->thread_pool_size, | ||
2506 | &fs_info->generic_worker); | ||
2507 | btrfs_init_workers(&fs_info->rmw_workers, | ||
2508 | "rmw", fs_info->thread_pool_size, | ||
2509 | &fs_info->generic_worker); | ||
2510 | btrfs_init_workers(&fs_info->endio_write_workers, "endio-write", | ||
2511 | fs_info->thread_pool_size, | ||
2512 | &fs_info->generic_worker); | ||
2513 | btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write", | ||
2514 | 1, &fs_info->generic_worker); | ||
2515 | btrfs_init_workers(&fs_info->delayed_workers, "delayed-meta", | ||
2516 | fs_info->thread_pool_size, | ||
2517 | &fs_info->generic_worker); | ||
2518 | btrfs_init_workers(&fs_info->readahead_workers, "readahead", | ||
2519 | fs_info->thread_pool_size, | ||
2520 | &fs_info->generic_worker); | ||
2521 | btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1, | ||
2522 | &fs_info->generic_worker); | ||
2523 | 2538 | ||
2524 | /* | 2539 | /* |
2525 | * endios are largely parallel and should have a very | 2540 | * endios are largely parallel and should have a very |
2526 | * low idle thresh | 2541 | * low idle thresh |
2527 | */ | 2542 | */ |
2528 | fs_info->endio_workers.idle_thresh = 4; | 2543 | fs_info->endio_workers = |
2529 | fs_info->endio_meta_workers.idle_thresh = 4; | 2544 | btrfs_alloc_workqueue("endio", flags, max_active, 4); |
2530 | fs_info->endio_raid56_workers.idle_thresh = 4; | 2545 | fs_info->endio_meta_workers = |
2531 | fs_info->rmw_workers.idle_thresh = 2; | 2546 | btrfs_alloc_workqueue("endio-meta", flags, max_active, 4); |
2532 | 2547 | fs_info->endio_meta_write_workers = | |
2533 | fs_info->endio_write_workers.idle_thresh = 2; | 2548 | btrfs_alloc_workqueue("endio-meta-write", flags, max_active, 2); |
2534 | fs_info->endio_meta_write_workers.idle_thresh = 2; | 2549 | fs_info->endio_raid56_workers = |
2535 | fs_info->readahead_workers.idle_thresh = 2; | 2550 | btrfs_alloc_workqueue("endio-raid56", flags, max_active, 4); |
2536 | 2551 | fs_info->rmw_workers = | |
2537 | /* | 2552 | btrfs_alloc_workqueue("rmw", flags, max_active, 2); |
2538 | * btrfs_start_workers can really only fail because of ENOMEM so just | 2553 | fs_info->endio_write_workers = |
2539 | * return -ENOMEM if any of these fail. | 2554 | btrfs_alloc_workqueue("endio-write", flags, max_active, 2); |
2540 | */ | 2555 | fs_info->endio_freespace_worker = |
2541 | ret = btrfs_start_workers(&fs_info->workers); | 2556 | btrfs_alloc_workqueue("freespace-write", flags, max_active, 0); |
2542 | ret |= btrfs_start_workers(&fs_info->generic_worker); | 2557 | fs_info->delayed_workers = |
2543 | ret |= btrfs_start_workers(&fs_info->submit_workers); | 2558 | btrfs_alloc_workqueue("delayed-meta", flags, max_active, 0); |
2544 | ret |= btrfs_start_workers(&fs_info->delalloc_workers); | 2559 | fs_info->readahead_workers = |
2545 | ret |= btrfs_start_workers(&fs_info->fixup_workers); | 2560 | btrfs_alloc_workqueue("readahead", flags, max_active, 2); |
2546 | ret |= btrfs_start_workers(&fs_info->endio_workers); | 2561 | fs_info->qgroup_rescan_workers = |
2547 | ret |= btrfs_start_workers(&fs_info->endio_meta_workers); | 2562 | btrfs_alloc_workqueue("qgroup-rescan", flags, 1, 0); |
2548 | ret |= btrfs_start_workers(&fs_info->rmw_workers); | 2563 | |
2549 | ret |= btrfs_start_workers(&fs_info->endio_raid56_workers); | 2564 | if (!(fs_info->workers && fs_info->delalloc_workers && |
2550 | ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers); | 2565 | fs_info->submit_workers && fs_info->flush_workers && |
2551 | ret |= btrfs_start_workers(&fs_info->endio_write_workers); | 2566 | fs_info->endio_workers && fs_info->endio_meta_workers && |
2552 | ret |= btrfs_start_workers(&fs_info->endio_freespace_worker); | 2567 | fs_info->endio_meta_write_workers && |
2553 | ret |= btrfs_start_workers(&fs_info->delayed_workers); | 2568 | fs_info->endio_write_workers && fs_info->endio_raid56_workers && |
2554 | ret |= btrfs_start_workers(&fs_info->caching_workers); | 2569 | fs_info->endio_freespace_worker && fs_info->rmw_workers && |
2555 | ret |= btrfs_start_workers(&fs_info->readahead_workers); | 2570 | fs_info->caching_workers && fs_info->readahead_workers && |
2556 | ret |= btrfs_start_workers(&fs_info->flush_workers); | 2571 | fs_info->fixup_workers && fs_info->delayed_workers && |
2557 | ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers); | 2572 | fs_info->qgroup_rescan_workers)) { |
2558 | if (ret) { | ||
2559 | err = -ENOMEM; | 2573 | err = -ENOMEM; |
2560 | goto fail_sb_buffer; | 2574 | goto fail_sb_buffer; |
2561 | } | 2575 | } |
@@ -2963,6 +2977,8 @@ fail_iput: | |||
2963 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2977 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
2964 | 2978 | ||
2965 | iput(fs_info->btree_inode); | 2979 | iput(fs_info->btree_inode); |
2980 | fail_bio_counter: | ||
2981 | percpu_counter_destroy(&fs_info->bio_counter); | ||
2966 | fail_delalloc_bytes: | 2982 | fail_delalloc_bytes: |
2967 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 2983 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
2968 | fail_dirty_metadata_bytes: | 2984 | fail_dirty_metadata_bytes: |
@@ -3244,6 +3260,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3244 | /* send down all the barriers */ | 3260 | /* send down all the barriers */ |
3245 | head = &info->fs_devices->devices; | 3261 | head = &info->fs_devices->devices; |
3246 | list_for_each_entry_rcu(dev, head, dev_list) { | 3262 | list_for_each_entry_rcu(dev, head, dev_list) { |
3263 | if (dev->missing) | ||
3264 | continue; | ||
3247 | if (!dev->bdev) { | 3265 | if (!dev->bdev) { |
3248 | errors_send++; | 3266 | errors_send++; |
3249 | continue; | 3267 | continue; |
@@ -3258,6 +3276,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info) | |||
3258 | 3276 | ||
3259 | /* wait for all the barriers */ | 3277 | /* wait for all the barriers */ |
3260 | list_for_each_entry_rcu(dev, head, dev_list) { | 3278 | list_for_each_entry_rcu(dev, head, dev_list) { |
3279 | if (dev->missing) | ||
3280 | continue; | ||
3261 | if (!dev->bdev) { | 3281 | if (!dev->bdev) { |
3262 | errors_wait++; | 3282 | errors_wait++; |
3263 | continue; | 3283 | continue; |
@@ -3477,6 +3497,8 @@ static void free_fs_root(struct btrfs_root *root) | |||
3477 | root->orphan_block_rsv = NULL; | 3497 | root->orphan_block_rsv = NULL; |
3478 | if (root->anon_dev) | 3498 | if (root->anon_dev) |
3479 | free_anon_bdev(root->anon_dev); | 3499 | free_anon_bdev(root->anon_dev); |
3500 | if (root->subv_writers) | ||
3501 | btrfs_free_subvolume_writers(root->subv_writers); | ||
3480 | free_extent_buffer(root->node); | 3502 | free_extent_buffer(root->node); |
3481 | free_extent_buffer(root->commit_root); | 3503 | free_extent_buffer(root->commit_root); |
3482 | kfree(root->free_ino_ctl); | 3504 | kfree(root->free_ino_ctl); |
@@ -3610,6 +3632,7 @@ int close_ctree(struct btrfs_root *root) | |||
3610 | 3632 | ||
3611 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); | 3633 | percpu_counter_destroy(&fs_info->dirty_metadata_bytes); |
3612 | percpu_counter_destroy(&fs_info->delalloc_bytes); | 3634 | percpu_counter_destroy(&fs_info->delalloc_bytes); |
3635 | percpu_counter_destroy(&fs_info->bio_counter); | ||
3613 | bdi_destroy(&fs_info->bdi); | 3636 | bdi_destroy(&fs_info->bdi); |
3614 | cleanup_srcu_struct(&fs_info->subvol_srcu); | 3637 | cleanup_srcu_struct(&fs_info->subvol_srcu); |
3615 | 3638 | ||
@@ -3791,9 +3814,11 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) | |||
3791 | list_move_tail(&root->ordered_root, | 3814 | list_move_tail(&root->ordered_root, |
3792 | &fs_info->ordered_roots); | 3815 | &fs_info->ordered_roots); |
3793 | 3816 | ||
3817 | spin_unlock(&fs_info->ordered_root_lock); | ||
3794 | btrfs_destroy_ordered_extents(root); | 3818 | btrfs_destroy_ordered_extents(root); |
3795 | 3819 | ||
3796 | cond_resched_lock(&fs_info->ordered_root_lock); | 3820 | cond_resched(); |
3821 | spin_lock(&fs_info->ordered_root_lock); | ||
3797 | } | 3822 | } |
3798 | spin_unlock(&fs_info->ordered_root_lock); | 3823 | spin_unlock(&fs_info->ordered_root_lock); |
3799 | } | 3824 | } |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 32312e09f0f5..c6b6a6e3e735 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -549,7 +549,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
549 | caching_ctl->block_group = cache; | 549 | caching_ctl->block_group = cache; |
550 | caching_ctl->progress = cache->key.objectid; | 550 | caching_ctl->progress = cache->key.objectid; |
551 | atomic_set(&caching_ctl->count, 1); | 551 | atomic_set(&caching_ctl->count, 1); |
552 | caching_ctl->work.func = caching_thread; | 552 | btrfs_init_work(&caching_ctl->work, caching_thread, NULL, NULL); |
553 | 553 | ||
554 | spin_lock(&cache->lock); | 554 | spin_lock(&cache->lock); |
555 | /* | 555 | /* |
@@ -640,7 +640,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, | |||
640 | 640 | ||
641 | btrfs_get_block_group(cache); | 641 | btrfs_get_block_group(cache); |
642 | 642 | ||
643 | btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); | 643 | btrfs_queue_work(fs_info->caching_workers, &caching_ctl->work); |
644 | 644 | ||
645 | return ret; | 645 | return ret; |
646 | } | 646 | } |
@@ -3971,7 +3971,7 @@ static int can_overcommit(struct btrfs_root *root, | |||
3971 | } | 3971 | } |
3972 | 3972 | ||
3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | 3973 | static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, |
3974 | unsigned long nr_pages) | 3974 | unsigned long nr_pages, int nr_items) |
3975 | { | 3975 | { |
3976 | struct super_block *sb = root->fs_info->sb; | 3976 | struct super_block *sb = root->fs_info->sb; |
3977 | 3977 | ||
@@ -3986,9 +3986,9 @@ static void btrfs_writeback_inodes_sb_nr(struct btrfs_root *root, | |||
3986 | * the filesystem is readonly(all dirty pages are written to | 3986 | * the filesystem is readonly(all dirty pages are written to |
3987 | * the disk). | 3987 | * the disk). |
3988 | */ | 3988 | */ |
3989 | btrfs_start_delalloc_roots(root->fs_info, 0); | 3989 | btrfs_start_delalloc_roots(root->fs_info, 0, nr_items); |
3990 | if (!current->journal_info) | 3990 | if (!current->journal_info) |
3991 | btrfs_wait_ordered_roots(root->fs_info, -1); | 3991 | btrfs_wait_ordered_roots(root->fs_info, nr_items); |
3992 | } | 3992 | } |
3993 | } | 3993 | } |
3994 | 3994 | ||
@@ -4045,7 +4045,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig, | |||
4045 | while (delalloc_bytes && loops < 3) { | 4045 | while (delalloc_bytes && loops < 3) { |
4046 | max_reclaim = min(delalloc_bytes, to_reclaim); | 4046 | max_reclaim = min(delalloc_bytes, to_reclaim); |
4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; | 4047 | nr_pages = max_reclaim >> PAGE_CACHE_SHIFT; |
4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages); | 4048 | btrfs_writeback_inodes_sb_nr(root, nr_pages, items); |
4049 | /* | 4049 | /* |
4050 | * We need to wait for the async pages to actually start before | 4050 | * We need to wait for the async pages to actually start before |
4051 | * we do anything. | 4051 | * we do anything. |
@@ -4112,13 +4112,9 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4112 | goto commit; | 4112 | goto commit; |
4113 | 4113 | ||
4114 | /* See if there is enough pinned space to make this reservation */ | 4114 | /* See if there is enough pinned space to make this reservation */ |
4115 | spin_lock(&space_info->lock); | ||
4116 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4115 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4117 | bytes) >= 0) { | 4116 | bytes) >= 0) |
4118 | spin_unlock(&space_info->lock); | ||
4119 | goto commit; | 4117 | goto commit; |
4120 | } | ||
4121 | spin_unlock(&space_info->lock); | ||
4122 | 4118 | ||
4123 | /* | 4119 | /* |
4124 | * See if there is some space in the delayed insertion reservation for | 4120 | * See if there is some space in the delayed insertion reservation for |
@@ -4127,16 +4123,13 @@ static int may_commit_transaction(struct btrfs_root *root, | |||
4127 | if (space_info != delayed_rsv->space_info) | 4123 | if (space_info != delayed_rsv->space_info) |
4128 | return -ENOSPC; | 4124 | return -ENOSPC; |
4129 | 4125 | ||
4130 | spin_lock(&space_info->lock); | ||
4131 | spin_lock(&delayed_rsv->lock); | 4126 | spin_lock(&delayed_rsv->lock); |
4132 | if (percpu_counter_compare(&space_info->total_bytes_pinned, | 4127 | if (percpu_counter_compare(&space_info->total_bytes_pinned, |
4133 | bytes - delayed_rsv->size) >= 0) { | 4128 | bytes - delayed_rsv->size) >= 0) { |
4134 | spin_unlock(&delayed_rsv->lock); | 4129 | spin_unlock(&delayed_rsv->lock); |
4135 | spin_unlock(&space_info->lock); | ||
4136 | return -ENOSPC; | 4130 | return -ENOSPC; |
4137 | } | 4131 | } |
4138 | spin_unlock(&delayed_rsv->lock); | 4132 | spin_unlock(&delayed_rsv->lock); |
4139 | spin_unlock(&space_info->lock); | ||
4140 | 4133 | ||
4141 | commit: | 4134 | commit: |
4142 | trans = btrfs_join_transaction(root); | 4135 | trans = btrfs_join_transaction(root); |
@@ -4181,7 +4174,7 @@ static int flush_space(struct btrfs_root *root, | |||
4181 | break; | 4174 | break; |
4182 | case FLUSH_DELALLOC: | 4175 | case FLUSH_DELALLOC: |
4183 | case FLUSH_DELALLOC_WAIT: | 4176 | case FLUSH_DELALLOC_WAIT: |
4184 | shrink_delalloc(root, num_bytes, orig_bytes, | 4177 | shrink_delalloc(root, num_bytes * 2, orig_bytes, |
4185 | state == FLUSH_DELALLOC_WAIT); | 4178 | state == FLUSH_DELALLOC_WAIT); |
4186 | break; | 4179 | break; |
4187 | case ALLOC_CHUNK: | 4180 | case ALLOC_CHUNK: |
@@ -8938,3 +8931,38 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) | |||
8938 | range->len = trimmed; | 8931 | range->len = trimmed; |
8939 | return ret; | 8932 | return ret; |
8940 | } | 8933 | } |
8934 | |||
8935 | /* | ||
8936 | * btrfs_{start,end}_write() is similar to mnt_{want, drop}_write(), | ||
8937 | * they are used to prevent the some tasks writing data into the page cache | ||
8938 | * by nocow before the subvolume is snapshoted, but flush the data into | ||
8939 | * the disk after the snapshot creation. | ||
8940 | */ | ||
8941 | void btrfs_end_nocow_write(struct btrfs_root *root) | ||
8942 | { | ||
8943 | percpu_counter_dec(&root->subv_writers->counter); | ||
8944 | /* | ||
8945 | * Make sure counter is updated before we wake up | ||
8946 | * waiters. | ||
8947 | */ | ||
8948 | smp_mb(); | ||
8949 | if (waitqueue_active(&root->subv_writers->wait)) | ||
8950 | wake_up(&root->subv_writers->wait); | ||
8951 | } | ||
8952 | |||
8953 | int btrfs_start_nocow_write(struct btrfs_root *root) | ||
8954 | { | ||
8955 | if (unlikely(atomic_read(&root->will_be_snapshoted))) | ||
8956 | return 0; | ||
8957 | |||
8958 | percpu_counter_inc(&root->subv_writers->counter); | ||
8959 | /* | ||
8960 | * Make sure counter is updated before we check for snapshot creation. | ||
8961 | */ | ||
8962 | smp_mb(); | ||
8963 | if (unlikely(atomic_read(&root->will_be_snapshoted))) { | ||
8964 | btrfs_end_nocow_write(root); | ||
8965 | return 0; | ||
8966 | } | ||
8967 | return 1; | ||
8968 | } | ||
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 85bbd01f1271..ae69a00387e7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -229,12 +229,14 @@ void free_extent_state(struct extent_state *state) | |||
229 | } | 229 | } |
230 | } | 230 | } |
231 | 231 | ||
232 | static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | 232 | static struct rb_node *tree_insert(struct rb_root *root, |
233 | struct rb_node *search_start, | ||
234 | u64 offset, | ||
233 | struct rb_node *node, | 235 | struct rb_node *node, |
234 | struct rb_node ***p_in, | 236 | struct rb_node ***p_in, |
235 | struct rb_node **parent_in) | 237 | struct rb_node **parent_in) |
236 | { | 238 | { |
237 | struct rb_node **p = &root->rb_node; | 239 | struct rb_node **p; |
238 | struct rb_node *parent = NULL; | 240 | struct rb_node *parent = NULL; |
239 | struct tree_entry *entry; | 241 | struct tree_entry *entry; |
240 | 242 | ||
@@ -244,6 +246,7 @@ static struct rb_node *tree_insert(struct rb_root *root, u64 offset, | |||
244 | goto do_insert; | 246 | goto do_insert; |
245 | } | 247 | } |
246 | 248 | ||
249 | p = search_start ? &search_start : &root->rb_node; | ||
247 | while (*p) { | 250 | while (*p) { |
248 | parent = *p; | 251 | parent = *p; |
249 | entry = rb_entry(parent, struct tree_entry, rb_node); | 252 | entry = rb_entry(parent, struct tree_entry, rb_node); |
@@ -430,7 +433,7 @@ static int insert_state(struct extent_io_tree *tree, | |||
430 | 433 | ||
431 | set_state_bits(tree, state, bits); | 434 | set_state_bits(tree, state, bits); |
432 | 435 | ||
433 | node = tree_insert(&tree->state, end, &state->rb_node, p, parent); | 436 | node = tree_insert(&tree->state, NULL, end, &state->rb_node, p, parent); |
434 | if (node) { | 437 | if (node) { |
435 | struct extent_state *found; | 438 | struct extent_state *found; |
436 | found = rb_entry(node, struct extent_state, rb_node); | 439 | found = rb_entry(node, struct extent_state, rb_node); |
@@ -477,8 +480,8 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
477 | prealloc->state = orig->state; | 480 | prealloc->state = orig->state; |
478 | orig->start = split; | 481 | orig->start = split; |
479 | 482 | ||
480 | node = tree_insert(&tree->state, prealloc->end, &prealloc->rb_node, | 483 | node = tree_insert(&tree->state, &orig->rb_node, prealloc->end, |
481 | NULL, NULL); | 484 | &prealloc->rb_node, NULL, NULL); |
482 | if (node) { | 485 | if (node) { |
483 | free_extent_state(prealloc); | 486 | free_extent_state(prealloc); |
484 | return -EEXIST; | 487 | return -EEXIST; |
@@ -2757,7 +2760,7 @@ __get_extent_map(struct inode *inode, struct page *page, size_t pg_offset, | |||
2757 | 2760 | ||
2758 | if (em_cached && *em_cached) { | 2761 | if (em_cached && *em_cached) { |
2759 | em = *em_cached; | 2762 | em = *em_cached; |
2760 | if (em->in_tree && start >= em->start && | 2763 | if (extent_map_in_tree(em) && start >= em->start && |
2761 | start < extent_map_end(em)) { | 2764 | start < extent_map_end(em)) { |
2762 | atomic_inc(&em->refs); | 2765 | atomic_inc(&em->refs); |
2763 | return em; | 2766 | return em; |
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c index 996ad56b57db..1874aee69c86 100644 --- a/fs/btrfs/extent_map.c +++ b/fs/btrfs/extent_map.c | |||
@@ -51,7 +51,7 @@ struct extent_map *alloc_extent_map(void) | |||
51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); | 51 | em = kmem_cache_zalloc(extent_map_cache, GFP_NOFS); |
52 | if (!em) | 52 | if (!em) |
53 | return NULL; | 53 | return NULL; |
54 | em->in_tree = 0; | 54 | RB_CLEAR_NODE(&em->rb_node); |
55 | em->flags = 0; | 55 | em->flags = 0; |
56 | em->compress_type = BTRFS_COMPRESS_NONE; | 56 | em->compress_type = BTRFS_COMPRESS_NONE; |
57 | em->generation = 0; | 57 | em->generation = 0; |
@@ -73,7 +73,7 @@ void free_extent_map(struct extent_map *em) | |||
73 | return; | 73 | return; |
74 | WARN_ON(atomic_read(&em->refs) == 0); | 74 | WARN_ON(atomic_read(&em->refs) == 0); |
75 | if (atomic_dec_and_test(&em->refs)) { | 75 | if (atomic_dec_and_test(&em->refs)) { |
76 | WARN_ON(em->in_tree); | 76 | WARN_ON(extent_map_in_tree(em)); |
77 | WARN_ON(!list_empty(&em->list)); | 77 | WARN_ON(!list_empty(&em->list)); |
78 | kmem_cache_free(extent_map_cache, em); | 78 | kmem_cache_free(extent_map_cache, em); |
79 | } | 79 | } |
@@ -99,8 +99,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
99 | parent = *p; | 99 | parent = *p; |
100 | entry = rb_entry(parent, struct extent_map, rb_node); | 100 | entry = rb_entry(parent, struct extent_map, rb_node); |
101 | 101 | ||
102 | WARN_ON(!entry->in_tree); | ||
103 | |||
104 | if (em->start < entry->start) | 102 | if (em->start < entry->start) |
105 | p = &(*p)->rb_left; | 103 | p = &(*p)->rb_left; |
106 | else if (em->start >= extent_map_end(entry)) | 104 | else if (em->start >= extent_map_end(entry)) |
@@ -128,7 +126,6 @@ static int tree_insert(struct rb_root *root, struct extent_map *em) | |||
128 | if (end > entry->start && em->start < extent_map_end(entry)) | 126 | if (end > entry->start && em->start < extent_map_end(entry)) |
129 | return -EEXIST; | 127 | return -EEXIST; |
130 | 128 | ||
131 | em->in_tree = 1; | ||
132 | rb_link_node(&em->rb_node, orig_parent, p); | 129 | rb_link_node(&em->rb_node, orig_parent, p); |
133 | rb_insert_color(&em->rb_node, root); | 130 | rb_insert_color(&em->rb_node, root); |
134 | return 0; | 131 | return 0; |
@@ -153,8 +150,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset, | |||
153 | prev = n; | 150 | prev = n; |
154 | prev_entry = entry; | 151 | prev_entry = entry; |
155 | 152 | ||
156 | WARN_ON(!entry->in_tree); | ||
157 | |||
158 | if (offset < entry->start) | 153 | if (offset < entry->start) |
159 | n = n->rb_left; | 154 | n = n->rb_left; |
160 | else if (offset >= extent_map_end(entry)) | 155 | else if (offset >= extent_map_end(entry)) |
@@ -240,12 +235,12 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
240 | em->len += merge->len; | 235 | em->len += merge->len; |
241 | em->block_len += merge->block_len; | 236 | em->block_len += merge->block_len; |
242 | em->block_start = merge->block_start; | 237 | em->block_start = merge->block_start; |
243 | merge->in_tree = 0; | ||
244 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; | 238 | em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start; |
245 | em->mod_start = merge->mod_start; | 239 | em->mod_start = merge->mod_start; |
246 | em->generation = max(em->generation, merge->generation); | 240 | em->generation = max(em->generation, merge->generation); |
247 | 241 | ||
248 | rb_erase(&merge->rb_node, &tree->map); | 242 | rb_erase(&merge->rb_node, &tree->map); |
243 | RB_CLEAR_NODE(&merge->rb_node); | ||
249 | free_extent_map(merge); | 244 | free_extent_map(merge); |
250 | } | 245 | } |
251 | } | 246 | } |
@@ -257,7 +252,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em) | |||
257 | em->len += merge->len; | 252 | em->len += merge->len; |
258 | em->block_len += merge->block_len; | 253 | em->block_len += merge->block_len; |
259 | rb_erase(&merge->rb_node, &tree->map); | 254 | rb_erase(&merge->rb_node, &tree->map); |
260 | merge->in_tree = 0; | 255 | RB_CLEAR_NODE(&merge->rb_node); |
261 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; | 256 | em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start; |
262 | em->generation = max(em->generation, merge->generation); | 257 | em->generation = max(em->generation, merge->generation); |
263 | free_extent_map(merge); | 258 | free_extent_map(merge); |
@@ -319,7 +314,21 @@ out: | |||
319 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) | 314 | void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em) |
320 | { | 315 | { |
321 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); | 316 | clear_bit(EXTENT_FLAG_LOGGING, &em->flags); |
322 | if (em->in_tree) | 317 | if (extent_map_in_tree(em)) |
318 | try_merge_map(tree, em); | ||
319 | } | ||
320 | |||
321 | static inline void setup_extent_mapping(struct extent_map_tree *tree, | ||
322 | struct extent_map *em, | ||
323 | int modified) | ||
324 | { | ||
325 | atomic_inc(&em->refs); | ||
326 | em->mod_start = em->start; | ||
327 | em->mod_len = em->len; | ||
328 | |||
329 | if (modified) | ||
330 | list_move(&em->list, &tree->modified_extents); | ||
331 | else | ||
323 | try_merge_map(tree, em); | 332 | try_merge_map(tree, em); |
324 | } | 333 | } |
325 | 334 | ||
@@ -342,15 +351,7 @@ int add_extent_mapping(struct extent_map_tree *tree, | |||
342 | if (ret) | 351 | if (ret) |
343 | goto out; | 352 | goto out; |
344 | 353 | ||
345 | atomic_inc(&em->refs); | 354 | setup_extent_mapping(tree, em, modified); |
346 | |||
347 | em->mod_start = em->start; | ||
348 | em->mod_len = em->len; | ||
349 | |||
350 | if (modified) | ||
351 | list_move(&em->list, &tree->modified_extents); | ||
352 | else | ||
353 | try_merge_map(tree, em); | ||
354 | out: | 355 | out: |
355 | return ret; | 356 | return ret; |
356 | } | 357 | } |
@@ -434,6 +435,21 @@ int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em) | |||
434 | rb_erase(&em->rb_node, &tree->map); | 435 | rb_erase(&em->rb_node, &tree->map); |
435 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) | 436 | if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags)) |
436 | list_del_init(&em->list); | 437 | list_del_init(&em->list); |
437 | em->in_tree = 0; | 438 | RB_CLEAR_NODE(&em->rb_node); |
438 | return ret; | 439 | return ret; |
439 | } | 440 | } |
441 | |||
442 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
443 | struct extent_map *cur, | ||
444 | struct extent_map *new, | ||
445 | int modified) | ||
446 | { | ||
447 | WARN_ON(test_bit(EXTENT_FLAG_PINNED, &cur->flags)); | ||
448 | ASSERT(extent_map_in_tree(cur)); | ||
449 | if (!test_bit(EXTENT_FLAG_LOGGING, &cur->flags)) | ||
450 | list_del_init(&cur->list); | ||
451 | rb_replace_node(&cur->rb_node, &new->rb_node, &tree->map); | ||
452 | RB_CLEAR_NODE(&cur->rb_node); | ||
453 | |||
454 | setup_extent_mapping(tree, new, modified); | ||
455 | } | ||
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h index 93fba716d7f8..e7fd8a56a140 100644 --- a/fs/btrfs/extent_map.h +++ b/fs/btrfs/extent_map.h | |||
@@ -33,7 +33,6 @@ struct extent_map { | |||
33 | unsigned long flags; | 33 | unsigned long flags; |
34 | struct block_device *bdev; | 34 | struct block_device *bdev; |
35 | atomic_t refs; | 35 | atomic_t refs; |
36 | unsigned int in_tree; | ||
37 | unsigned int compress_type; | 36 | unsigned int compress_type; |
38 | struct list_head list; | 37 | struct list_head list; |
39 | }; | 38 | }; |
@@ -44,6 +43,11 @@ struct extent_map_tree { | |||
44 | rwlock_t lock; | 43 | rwlock_t lock; |
45 | }; | 44 | }; |
46 | 45 | ||
46 | static inline int extent_map_in_tree(const struct extent_map *em) | ||
47 | { | ||
48 | return !RB_EMPTY_NODE(&em->rb_node); | ||
49 | } | ||
50 | |||
47 | static inline u64 extent_map_end(struct extent_map *em) | 51 | static inline u64 extent_map_end(struct extent_map *em) |
48 | { | 52 | { |
49 | if (em->start + em->len < em->start) | 53 | if (em->start + em->len < em->start) |
@@ -64,6 +68,10 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree, | |||
64 | int add_extent_mapping(struct extent_map_tree *tree, | 68 | int add_extent_mapping(struct extent_map_tree *tree, |
65 | struct extent_map *em, int modified); | 69 | struct extent_map *em, int modified); |
66 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); | 70 | int remove_extent_mapping(struct extent_map_tree *tree, struct extent_map *em); |
71 | void replace_extent_mapping(struct extent_map_tree *tree, | ||
72 | struct extent_map *cur, | ||
73 | struct extent_map *new, | ||
74 | int modified); | ||
67 | 75 | ||
68 | struct extent_map *alloc_extent_map(void); | 76 | struct extent_map *alloc_extent_map(void); |
69 | void free_extent_map(struct extent_map *em); | 77 | void free_extent_map(struct extent_map *em); |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 0165b8672f09..c660527af838 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -591,7 +591,6 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); | 591 | clear_bit(EXTENT_FLAG_PINNED, &em->flags); |
592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); | 592 | clear_bit(EXTENT_FLAG_LOGGING, &flags); |
593 | modified = !list_empty(&em->list); | 593 | modified = !list_empty(&em->list); |
594 | remove_extent_mapping(em_tree, em); | ||
595 | if (no_splits) | 594 | if (no_splits) |
596 | goto next; | 595 | goto next; |
597 | 596 | ||
@@ -622,8 +621,7 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
622 | split->bdev = em->bdev; | 621 | split->bdev = em->bdev; |
623 | split->flags = flags; | 622 | split->flags = flags; |
624 | split->compress_type = em->compress_type; | 623 | split->compress_type = em->compress_type; |
625 | ret = add_extent_mapping(em_tree, split, modified); | 624 | replace_extent_mapping(em_tree, em, split, modified); |
626 | BUG_ON(ret); /* Logic error */ | ||
627 | free_extent_map(split); | 625 | free_extent_map(split); |
628 | split = split2; | 626 | split = split2; |
629 | split2 = NULL; | 627 | split2 = NULL; |
@@ -661,12 +659,20 @@ void btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, | |||
661 | split->orig_block_len = 0; | 659 | split->orig_block_len = 0; |
662 | } | 660 | } |
663 | 661 | ||
664 | ret = add_extent_mapping(em_tree, split, modified); | 662 | if (extent_map_in_tree(em)) { |
665 | BUG_ON(ret); /* Logic error */ | 663 | replace_extent_mapping(em_tree, em, split, |
664 | modified); | ||
665 | } else { | ||
666 | ret = add_extent_mapping(em_tree, split, | ||
667 | modified); | ||
668 | ASSERT(ret == 0); /* Logic error */ | ||
669 | } | ||
666 | free_extent_map(split); | 670 | free_extent_map(split); |
667 | split = NULL; | 671 | split = NULL; |
668 | } | 672 | } |
669 | next: | 673 | next: |
674 | if (extent_map_in_tree(em)) | ||
675 | remove_extent_mapping(em_tree, em); | ||
670 | write_unlock(&em_tree->lock); | 676 | write_unlock(&em_tree->lock); |
671 | 677 | ||
672 | /* once for us */ | 678 | /* once for us */ |
@@ -720,7 +726,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans, | |||
720 | if (drop_cache) | 726 | if (drop_cache) |
721 | btrfs_drop_extent_cache(inode, start, end - 1, 0); | 727 | btrfs_drop_extent_cache(inode, start, end - 1, 0); |
722 | 728 | ||
723 | if (start >= BTRFS_I(inode)->disk_i_size) | 729 | if (start >= BTRFS_I(inode)->disk_i_size && !replace_extent) |
724 | modify_tree = 0; | 730 | modify_tree = 0; |
725 | 731 | ||
726 | while (1) { | 732 | while (1) { |
@@ -798,7 +804,10 @@ next_slot: | |||
798 | */ | 804 | */ |
799 | if (start > key.offset && end < extent_end) { | 805 | if (start > key.offset && end < extent_end) { |
800 | BUG_ON(del_nr > 0); | 806 | BUG_ON(del_nr > 0); |
801 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 807 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
808 | ret = -EINVAL; | ||
809 | break; | ||
810 | } | ||
802 | 811 | ||
803 | memcpy(&new_key, &key, sizeof(new_key)); | 812 | memcpy(&new_key, &key, sizeof(new_key)); |
804 | new_key.offset = start; | 813 | new_key.offset = start; |
@@ -841,7 +850,10 @@ next_slot: | |||
841 | * | -------- extent -------- | | 850 | * | -------- extent -------- | |
842 | */ | 851 | */ |
843 | if (start <= key.offset && end < extent_end) { | 852 | if (start <= key.offset && end < extent_end) { |
844 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 853 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
854 | ret = -EINVAL; | ||
855 | break; | ||
856 | } | ||
845 | 857 | ||
846 | memcpy(&new_key, &key, sizeof(new_key)); | 858 | memcpy(&new_key, &key, sizeof(new_key)); |
847 | new_key.offset = end; | 859 | new_key.offset = end; |
@@ -864,7 +876,10 @@ next_slot: | |||
864 | */ | 876 | */ |
865 | if (start > key.offset && end >= extent_end) { | 877 | if (start > key.offset && end >= extent_end) { |
866 | BUG_ON(del_nr > 0); | 878 | BUG_ON(del_nr > 0); |
867 | BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE); | 879 | if (extent_type == BTRFS_FILE_EXTENT_INLINE) { |
880 | ret = -EINVAL; | ||
881 | break; | ||
882 | } | ||
868 | 883 | ||
869 | btrfs_set_file_extent_num_bytes(leaf, fi, | 884 | btrfs_set_file_extent_num_bytes(leaf, fi, |
870 | start - key.offset); | 885 | start - key.offset); |
@@ -938,34 +953,42 @@ next_slot: | |||
938 | * Set path->slots[0] to first slot, so that after the delete | 953 | * Set path->slots[0] to first slot, so that after the delete |
939 | * if items are move off from our leaf to its immediate left or | 954 | * if items are move off from our leaf to its immediate left or |
940 | * right neighbor leafs, we end up with a correct and adjusted | 955 | * right neighbor leafs, we end up with a correct and adjusted |
941 | * path->slots[0] for our insertion. | 956 | * path->slots[0] for our insertion (if replace_extent != 0). |
942 | */ | 957 | */ |
943 | path->slots[0] = del_slot; | 958 | path->slots[0] = del_slot; |
944 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); | 959 | ret = btrfs_del_items(trans, root, path, del_slot, del_nr); |
945 | if (ret) | 960 | if (ret) |
946 | btrfs_abort_transaction(trans, root, ret); | 961 | btrfs_abort_transaction(trans, root, ret); |
962 | } | ||
947 | 963 | ||
948 | leaf = path->nodes[0]; | 964 | leaf = path->nodes[0]; |
949 | /* | 965 | /* |
950 | * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that | 966 | * If btrfs_del_items() was called, it might have deleted a leaf, in |
951 | * is, its contents got pushed to its neighbors), in which case | 967 | * which case it unlocked our path, so check path->locks[0] matches a |
952 | * it means path->locks[0] == 0 | 968 | * write lock. |
953 | */ | 969 | */ |
954 | if (!ret && replace_extent && leafs_visited == 1 && | 970 | if (!ret && replace_extent && leafs_visited == 1 && |
955 | path->locks[0] && | 971 | (path->locks[0] == BTRFS_WRITE_LOCK_BLOCKING || |
956 | btrfs_leaf_free_space(root, leaf) >= | 972 | path->locks[0] == BTRFS_WRITE_LOCK) && |
957 | sizeof(struct btrfs_item) + extent_item_size) { | 973 | btrfs_leaf_free_space(root, leaf) >= |
958 | 974 | sizeof(struct btrfs_item) + extent_item_size) { | |
959 | key.objectid = ino; | 975 | |
960 | key.type = BTRFS_EXTENT_DATA_KEY; | 976 | key.objectid = ino; |
961 | key.offset = start; | 977 | key.type = BTRFS_EXTENT_DATA_KEY; |
962 | setup_items_for_insert(root, path, &key, | 978 | key.offset = start; |
963 | &extent_item_size, | 979 | if (!del_nr && path->slots[0] < btrfs_header_nritems(leaf)) { |
964 | extent_item_size, | 980 | struct btrfs_key slot_key; |
965 | sizeof(struct btrfs_item) + | 981 | |
966 | extent_item_size, 1); | 982 | btrfs_item_key_to_cpu(leaf, &slot_key, path->slots[0]); |
967 | *key_inserted = 1; | 983 | if (btrfs_comp_cpu_keys(&key, &slot_key) > 0) |
984 | path->slots[0]++; | ||
968 | } | 985 | } |
986 | setup_items_for_insert(root, path, &key, | ||
987 | &extent_item_size, | ||
988 | extent_item_size, | ||
989 | sizeof(struct btrfs_item) + | ||
990 | extent_item_size, 1); | ||
991 | *key_inserted = 1; | ||
969 | } | 992 | } |
970 | 993 | ||
971 | if (!replace_extent || !(*key_inserted)) | 994 | if (!replace_extent || !(*key_inserted)) |
@@ -1346,11 +1369,11 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
1346 | struct btrfs_ordered_extent *ordered; | 1369 | struct btrfs_ordered_extent *ordered; |
1347 | lock_extent_bits(&BTRFS_I(inode)->io_tree, | 1370 | lock_extent_bits(&BTRFS_I(inode)->io_tree, |
1348 | start_pos, last_pos, 0, cached_state); | 1371 | start_pos, last_pos, 0, cached_state); |
1349 | ordered = btrfs_lookup_first_ordered_extent(inode, last_pos); | 1372 | ordered = btrfs_lookup_ordered_range(inode, start_pos, |
1373 | last_pos - start_pos + 1); | ||
1350 | if (ordered && | 1374 | if (ordered && |
1351 | ordered->file_offset + ordered->len > start_pos && | 1375 | ordered->file_offset + ordered->len > start_pos && |
1352 | ordered->file_offset <= last_pos) { | 1376 | ordered->file_offset <= last_pos) { |
1353 | btrfs_put_ordered_extent(ordered); | ||
1354 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, | 1377 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, |
1355 | start_pos, last_pos, | 1378 | start_pos, last_pos, |
1356 | cached_state, GFP_NOFS); | 1379 | cached_state, GFP_NOFS); |
@@ -1358,12 +1381,9 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages, | |||
1358 | unlock_page(pages[i]); | 1381 | unlock_page(pages[i]); |
1359 | page_cache_release(pages[i]); | 1382 | page_cache_release(pages[i]); |
1360 | } | 1383 | } |
1361 | ret = btrfs_wait_ordered_range(inode, start_pos, | 1384 | btrfs_start_ordered_extent(inode, ordered, 1); |
1362 | last_pos - start_pos + 1); | 1385 | btrfs_put_ordered_extent(ordered); |
1363 | if (ret) | 1386 | return -EAGAIN; |
1364 | return ret; | ||
1365 | else | ||
1366 | return -EAGAIN; | ||
1367 | } | 1387 | } |
1368 | if (ordered) | 1388 | if (ordered) |
1369 | btrfs_put_ordered_extent(ordered); | 1389 | btrfs_put_ordered_extent(ordered); |
@@ -1396,8 +1416,12 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1396 | u64 num_bytes; | 1416 | u64 num_bytes; |
1397 | int ret; | 1417 | int ret; |
1398 | 1418 | ||
1419 | ret = btrfs_start_nocow_write(root); | ||
1420 | if (!ret) | ||
1421 | return -ENOSPC; | ||
1422 | |||
1399 | lockstart = round_down(pos, root->sectorsize); | 1423 | lockstart = round_down(pos, root->sectorsize); |
1400 | lockend = lockstart + round_up(*write_bytes, root->sectorsize) - 1; | 1424 | lockend = round_up(pos + *write_bytes, root->sectorsize) - 1; |
1401 | 1425 | ||
1402 | while (1) { | 1426 | while (1) { |
1403 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1427 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
@@ -1415,12 +1439,10 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos, | |||
1415 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); | 1439 | ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL); |
1416 | if (ret <= 0) { | 1440 | if (ret <= 0) { |
1417 | ret = 0; | 1441 | ret = 0; |
1442 | btrfs_end_nocow_write(root); | ||
1418 | } else { | 1443 | } else { |
1419 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 1444 | *write_bytes = min_t(size_t, *write_bytes , |
1420 | EXTENT_DIRTY | EXTENT_DELALLOC | | 1445 | num_bytes - pos + lockstart); |
1421 | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, | ||
1422 | NULL, GFP_NOFS); | ||
1423 | *write_bytes = min_t(size_t, *write_bytes, num_bytes); | ||
1424 | } | 1446 | } |
1425 | 1447 | ||
1426 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); | 1448 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend); |
@@ -1510,6 +1532,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, | |||
1510 | if (!only_release_metadata) | 1532 | if (!only_release_metadata) |
1511 | btrfs_free_reserved_data_space(inode, | 1533 | btrfs_free_reserved_data_space(inode, |
1512 | reserve_bytes); | 1534 | reserve_bytes); |
1535 | else | ||
1536 | btrfs_end_nocow_write(root); | ||
1513 | break; | 1537 | break; |
1514 | } | 1538 | } |
1515 | 1539 | ||
@@ -1598,6 +1622,9 @@ again: | |||
1598 | } | 1622 | } |
1599 | 1623 | ||
1600 | release_bytes = 0; | 1624 | release_bytes = 0; |
1625 | if (only_release_metadata) | ||
1626 | btrfs_end_nocow_write(root); | ||
1627 | |||
1601 | if (only_release_metadata && copied > 0) { | 1628 | if (only_release_metadata && copied > 0) { |
1602 | u64 lockstart = round_down(pos, root->sectorsize); | 1629 | u64 lockstart = round_down(pos, root->sectorsize); |
1603 | u64 lockend = lockstart + | 1630 | u64 lockend = lockstart + |
@@ -1624,10 +1651,12 @@ again: | |||
1624 | kfree(pages); | 1651 | kfree(pages); |
1625 | 1652 | ||
1626 | if (release_bytes) { | 1653 | if (release_bytes) { |
1627 | if (only_release_metadata) | 1654 | if (only_release_metadata) { |
1655 | btrfs_end_nocow_write(root); | ||
1628 | btrfs_delalloc_release_metadata(inode, release_bytes); | 1656 | btrfs_delalloc_release_metadata(inode, release_bytes); |
1629 | else | 1657 | } else { |
1630 | btrfs_delalloc_release_space(inode, release_bytes); | 1658 | btrfs_delalloc_release_space(inode, release_bytes); |
1659 | } | ||
1631 | } | 1660 | } |
1632 | 1661 | ||
1633 | return num_written ? num_written : ret; | 1662 | return num_written ? num_written : ret; |
@@ -1797,7 +1826,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
1797 | BTRFS_I(inode)->last_sub_trans = root->log_transid; | 1826 | BTRFS_I(inode)->last_sub_trans = root->log_transid; |
1798 | if (num_written > 0) { | 1827 | if (num_written > 0) { |
1799 | err = generic_write_sync(file, pos, num_written); | 1828 | err = generic_write_sync(file, pos, num_written); |
1800 | if (err < 0 && num_written > 0) | 1829 | if (err < 0) |
1801 | num_written = err; | 1830 | num_written = err; |
1802 | } | 1831 | } |
1803 | 1832 | ||
@@ -1856,8 +1885,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1856 | struct dentry *dentry = file->f_path.dentry; | 1885 | struct dentry *dentry = file->f_path.dentry; |
1857 | struct inode *inode = dentry->d_inode; | 1886 | struct inode *inode = dentry->d_inode; |
1858 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1887 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1859 | int ret = 0; | ||
1860 | struct btrfs_trans_handle *trans; | 1888 | struct btrfs_trans_handle *trans; |
1889 | struct btrfs_log_ctx ctx; | ||
1890 | int ret = 0; | ||
1861 | bool full_sync = 0; | 1891 | bool full_sync = 0; |
1862 | 1892 | ||
1863 | trace_btrfs_sync_file(file, datasync); | 1893 | trace_btrfs_sync_file(file, datasync); |
@@ -1951,7 +1981,9 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1951 | } | 1981 | } |
1952 | trans->sync = true; | 1982 | trans->sync = true; |
1953 | 1983 | ||
1954 | ret = btrfs_log_dentry_safe(trans, root, dentry); | 1984 | btrfs_init_log_ctx(&ctx); |
1985 | |||
1986 | ret = btrfs_log_dentry_safe(trans, root, dentry, &ctx); | ||
1955 | if (ret < 0) { | 1987 | if (ret < 0) { |
1956 | /* Fallthrough and commit/free transaction. */ | 1988 | /* Fallthrough and commit/free transaction. */ |
1957 | ret = 1; | 1989 | ret = 1; |
@@ -1971,7 +2003,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
1971 | 2003 | ||
1972 | if (ret != BTRFS_NO_LOG_SYNC) { | 2004 | if (ret != BTRFS_NO_LOG_SYNC) { |
1973 | if (!ret) { | 2005 | if (!ret) { |
1974 | ret = btrfs_sync_log(trans, root); | 2006 | ret = btrfs_sync_log(trans, root, &ctx); |
1975 | if (!ret) { | 2007 | if (!ret) { |
1976 | ret = btrfs_end_transaction(trans, root); | 2008 | ret = btrfs_end_transaction(trans, root); |
1977 | goto out; | 2009 | goto out; |
@@ -1993,6 +2025,7 @@ out: | |||
1993 | 2025 | ||
1994 | static const struct vm_operations_struct btrfs_file_vm_ops = { | 2026 | static const struct vm_operations_struct btrfs_file_vm_ops = { |
1995 | .fault = filemap_fault, | 2027 | .fault = filemap_fault, |
2028 | .map_pages = filemap_map_pages, | ||
1996 | .page_mkwrite = btrfs_page_mkwrite, | 2029 | .page_mkwrite = btrfs_page_mkwrite, |
1997 | .remap_pages = generic_file_remap_pages, | 2030 | .remap_pages = generic_file_remap_pages, |
1998 | }; | 2031 | }; |
@@ -2157,6 +2190,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2157 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == | 2190 | bool same_page = ((offset >> PAGE_CACHE_SHIFT) == |
2158 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); | 2191 | ((offset + len - 1) >> PAGE_CACHE_SHIFT)); |
2159 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); | 2192 | bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES); |
2193 | u64 ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE); | ||
2160 | 2194 | ||
2161 | ret = btrfs_wait_ordered_range(inode, offset, len); | 2195 | ret = btrfs_wait_ordered_range(inode, offset, len); |
2162 | if (ret) | 2196 | if (ret) |
@@ -2172,14 +2206,14 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2172 | * entire page. | 2206 | * entire page. |
2173 | */ | 2207 | */ |
2174 | if (same_page && len < PAGE_CACHE_SIZE) { | 2208 | if (same_page && len < PAGE_CACHE_SIZE) { |
2175 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) | 2209 | if (offset < ino_size) |
2176 | ret = btrfs_truncate_page(inode, offset, len, 0); | 2210 | ret = btrfs_truncate_page(inode, offset, len, 0); |
2177 | mutex_unlock(&inode->i_mutex); | 2211 | mutex_unlock(&inode->i_mutex); |
2178 | return ret; | 2212 | return ret; |
2179 | } | 2213 | } |
2180 | 2214 | ||
2181 | /* zero back part of the first page */ | 2215 | /* zero back part of the first page */ |
2182 | if (offset < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2216 | if (offset < ino_size) { |
2183 | ret = btrfs_truncate_page(inode, offset, 0, 0); | 2217 | ret = btrfs_truncate_page(inode, offset, 0, 0); |
2184 | if (ret) { | 2218 | if (ret) { |
2185 | mutex_unlock(&inode->i_mutex); | 2219 | mutex_unlock(&inode->i_mutex); |
@@ -2188,7 +2222,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2188 | } | 2222 | } |
2189 | 2223 | ||
2190 | /* zero the front end of the last page */ | 2224 | /* zero the front end of the last page */ |
2191 | if (offset + len < round_up(inode->i_size, PAGE_CACHE_SIZE)) { | 2225 | if (offset + len < ino_size) { |
2192 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); | 2226 | ret = btrfs_truncate_page(inode, offset + len, 0, 1); |
2193 | if (ret) { | 2227 | if (ret) { |
2194 | mutex_unlock(&inode->i_mutex); | 2228 | mutex_unlock(&inode->i_mutex); |
@@ -2277,10 +2311,13 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2277 | 2311 | ||
2278 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2312 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2279 | 2313 | ||
2280 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2314 | if (cur_offset < ino_size) { |
2281 | if (ret) { | 2315 | ret = fill_holes(trans, inode, path, cur_offset, |
2282 | err = ret; | 2316 | drop_end); |
2283 | break; | 2317 | if (ret) { |
2318 | err = ret; | ||
2319 | break; | ||
2320 | } | ||
2284 | } | 2321 | } |
2285 | 2322 | ||
2286 | cur_offset = drop_end; | 2323 | cur_offset = drop_end; |
@@ -2313,10 +2350,12 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) | |||
2313 | } | 2350 | } |
2314 | 2351 | ||
2315 | trans->block_rsv = &root->fs_info->trans_block_rsv; | 2352 | trans->block_rsv = &root->fs_info->trans_block_rsv; |
2316 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); | 2353 | if (cur_offset < ino_size) { |
2317 | if (ret) { | 2354 | ret = fill_holes(trans, inode, path, cur_offset, drop_end); |
2318 | err = ret; | 2355 | if (ret) { |
2319 | goto out_trans; | 2356 | err = ret; |
2357 | goto out_trans; | ||
2358 | } | ||
2320 | } | 2359 | } |
2321 | 2360 | ||
2322 | out_trans: | 2361 | out_trans: |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d3d44486290b..06e9a4152b14 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -864,7 +864,8 @@ static noinline int cow_file_range(struct inode *inode, | |||
864 | 864 | ||
865 | if (btrfs_is_free_space_inode(inode)) { | 865 | if (btrfs_is_free_space_inode(inode)) { |
866 | WARN_ON_ONCE(1); | 866 | WARN_ON_ONCE(1); |
867 | return -EINVAL; | 867 | ret = -EINVAL; |
868 | goto out_unlock; | ||
868 | } | 869 | } |
869 | 870 | ||
870 | num_bytes = ALIGN(end - start + 1, blocksize); | 871 | num_bytes = ALIGN(end - start + 1, blocksize); |
@@ -1075,17 +1076,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
1075 | async_cow->end = cur_end; | 1076 | async_cow->end = cur_end; |
1076 | INIT_LIST_HEAD(&async_cow->extents); | 1077 | INIT_LIST_HEAD(&async_cow->extents); |
1077 | 1078 | ||
1078 | async_cow->work.func = async_cow_start; | 1079 | btrfs_init_work(&async_cow->work, async_cow_start, |
1079 | async_cow->work.ordered_func = async_cow_submit; | 1080 | async_cow_submit, async_cow_free); |
1080 | async_cow->work.ordered_free = async_cow_free; | ||
1081 | async_cow->work.flags = 0; | ||
1082 | 1081 | ||
1083 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> | 1082 | nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> |
1084 | PAGE_CACHE_SHIFT; | 1083 | PAGE_CACHE_SHIFT; |
1085 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); | 1084 | atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); |
1086 | 1085 | ||
1087 | btrfs_queue_worker(&root->fs_info->delalloc_workers, | 1086 | btrfs_queue_work(root->fs_info->delalloc_workers, |
1088 | &async_cow->work); | 1087 | &async_cow->work); |
1089 | 1088 | ||
1090 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { | 1089 | if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { |
1091 | wait_event(root->fs_info->async_submit_wait, | 1090 | wait_event(root->fs_info->async_submit_wait, |
@@ -1843,9 +1842,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) | |||
1843 | 1842 | ||
1844 | SetPageChecked(page); | 1843 | SetPageChecked(page); |
1845 | page_cache_get(page); | 1844 | page_cache_get(page); |
1846 | fixup->work.func = btrfs_writepage_fixup_worker; | 1845 | btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); |
1847 | fixup->page = page; | 1846 | fixup->page = page; |
1848 | btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); | 1847 | btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); |
1849 | return -EBUSY; | 1848 | return -EBUSY; |
1850 | } | 1849 | } |
1851 | 1850 | ||
@@ -2239,6 +2238,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, | |||
2239 | return PTR_ERR(root); | 2238 | return PTR_ERR(root); |
2240 | } | 2239 | } |
2241 | 2240 | ||
2241 | if (btrfs_root_readonly(root)) { | ||
2242 | srcu_read_unlock(&fs_info->subvol_srcu, index); | ||
2243 | return 0; | ||
2244 | } | ||
2245 | |||
2242 | /* step 2: get inode */ | 2246 | /* step 2: get inode */ |
2243 | key.objectid = backref->inum; | 2247 | key.objectid = backref->inum; |
2244 | key.type = BTRFS_INODE_ITEM_KEY; | 2248 | key.type = BTRFS_INODE_ITEM_KEY; |
@@ -2759,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2759 | struct inode *inode = page->mapping->host; | 2763 | struct inode *inode = page->mapping->host; |
2760 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2764 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2761 | struct btrfs_ordered_extent *ordered_extent = NULL; | 2765 | struct btrfs_ordered_extent *ordered_extent = NULL; |
2762 | struct btrfs_workers *workers; | 2766 | struct btrfs_workqueue *workers; |
2763 | 2767 | ||
2764 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); | 2768 | trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); |
2765 | 2769 | ||
@@ -2768,14 +2772,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, | |||
2768 | end - start + 1, uptodate)) | 2772 | end - start + 1, uptodate)) |
2769 | return 0; | 2773 | return 0; |
2770 | 2774 | ||
2771 | ordered_extent->work.func = finish_ordered_fn; | 2775 | btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); |
2772 | ordered_extent->work.flags = 0; | ||
2773 | 2776 | ||
2774 | if (btrfs_is_free_space_inode(inode)) | 2777 | if (btrfs_is_free_space_inode(inode)) |
2775 | workers = &root->fs_info->endio_freespace_worker; | 2778 | workers = root->fs_info->endio_freespace_worker; |
2776 | else | 2779 | else |
2777 | workers = &root->fs_info->endio_write_workers; | 2780 | workers = root->fs_info->endio_write_workers; |
2778 | btrfs_queue_worker(workers, &ordered_extent->work); | 2781 | btrfs_queue_work(workers, &ordered_extent->work); |
2779 | 2782 | ||
2780 | return 0; | 2783 | return 0; |
2781 | } | 2784 | } |
@@ -4593,7 +4596,7 @@ static void evict_inode_truncate_pages(struct inode *inode) | |||
4593 | struct rb_node *node; | 4596 | struct rb_node *node; |
4594 | 4597 | ||
4595 | ASSERT(inode->i_state & I_FREEING); | 4598 | ASSERT(inode->i_state & I_FREEING); |
4596 | truncate_inode_pages(&inode->i_data, 0); | 4599 | truncate_inode_pages_final(&inode->i_data); |
4597 | 4600 | ||
4598 | write_lock(&map_tree->lock); | 4601 | write_lock(&map_tree->lock); |
4599 | while (!RB_EMPTY_ROOT(&map_tree->map)) { | 4602 | while (!RB_EMPTY_ROOT(&map_tree->map)) { |
@@ -4924,7 +4927,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) | |||
4924 | struct inode *inode; | 4927 | struct inode *inode; |
4925 | u64 objectid = 0; | 4928 | u64 objectid = 0; |
4926 | 4929 | ||
4927 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | 4930 | if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
4931 | WARN_ON(btrfs_root_refs(&root->root_item) != 0); | ||
4928 | 4932 | ||
4929 | spin_lock(&root->inode_lock); | 4933 | spin_lock(&root->inode_lock); |
4930 | again: | 4934 | again: |
@@ -5799,6 +5803,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
5799 | } | 5803 | } |
5800 | out_unlock: | 5804 | out_unlock: |
5801 | btrfs_end_transaction(trans, root); | 5805 | btrfs_end_transaction(trans, root); |
5806 | btrfs_balance_delayed_items(root); | ||
5802 | btrfs_btree_balance_dirty(root); | 5807 | btrfs_btree_balance_dirty(root); |
5803 | if (drop_inode) { | 5808 | if (drop_inode) { |
5804 | inode_dec_link_count(inode); | 5809 | inode_dec_link_count(inode); |
@@ -5872,6 +5877,7 @@ out_unlock: | |||
5872 | inode_dec_link_count(inode); | 5877 | inode_dec_link_count(inode); |
5873 | iput(inode); | 5878 | iput(inode); |
5874 | } | 5879 | } |
5880 | btrfs_balance_delayed_items(root); | ||
5875 | btrfs_btree_balance_dirty(root); | 5881 | btrfs_btree_balance_dirty(root); |
5876 | return err; | 5882 | return err; |
5877 | } | 5883 | } |
@@ -5930,6 +5936,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
5930 | } | 5936 | } |
5931 | 5937 | ||
5932 | btrfs_end_transaction(trans, root); | 5938 | btrfs_end_transaction(trans, root); |
5939 | btrfs_balance_delayed_items(root); | ||
5933 | fail: | 5940 | fail: |
5934 | if (drop_inode) { | 5941 | if (drop_inode) { |
5935 | inode_dec_link_count(inode); | 5942 | inode_dec_link_count(inode); |
@@ -5996,6 +6003,7 @@ out_fail: | |||
5996 | btrfs_end_transaction(trans, root); | 6003 | btrfs_end_transaction(trans, root); |
5997 | if (drop_on_err) | 6004 | if (drop_on_err) |
5998 | iput(inode); | 6005 | iput(inode); |
6006 | btrfs_balance_delayed_items(root); | ||
5999 | btrfs_btree_balance_dirty(root); | 6007 | btrfs_btree_balance_dirty(root); |
6000 | return err; | 6008 | return err; |
6001 | } | 6009 | } |
@@ -6550,6 +6558,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6550 | int ret; | 6558 | int ret; |
6551 | struct extent_buffer *leaf; | 6559 | struct extent_buffer *leaf; |
6552 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6560 | struct btrfs_root *root = BTRFS_I(inode)->root; |
6561 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
6553 | struct btrfs_file_extent_item *fi; | 6562 | struct btrfs_file_extent_item *fi; |
6554 | struct btrfs_key key; | 6563 | struct btrfs_key key; |
6555 | u64 disk_bytenr; | 6564 | u64 disk_bytenr; |
@@ -6626,6 +6635,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6626 | 6635 | ||
6627 | if (btrfs_extent_readonly(root, disk_bytenr)) | 6636 | if (btrfs_extent_readonly(root, disk_bytenr)) |
6628 | goto out; | 6637 | goto out; |
6638 | |||
6639 | num_bytes = min(offset + *len, extent_end) - offset; | ||
6640 | if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { | ||
6641 | u64 range_end; | ||
6642 | |||
6643 | range_end = round_up(offset + num_bytes, root->sectorsize) - 1; | ||
6644 | ret = test_range_bit(io_tree, offset, range_end, | ||
6645 | EXTENT_DELALLOC, 0, NULL); | ||
6646 | if (ret) { | ||
6647 | ret = -EAGAIN; | ||
6648 | goto out; | ||
6649 | } | ||
6650 | } | ||
6651 | |||
6629 | btrfs_release_path(path); | 6652 | btrfs_release_path(path); |
6630 | 6653 | ||
6631 | /* | 6654 | /* |
@@ -6654,7 +6677,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, | |||
6654 | */ | 6677 | */ |
6655 | disk_bytenr += backref_offset; | 6678 | disk_bytenr += backref_offset; |
6656 | disk_bytenr += offset - key.offset; | 6679 | disk_bytenr += offset - key.offset; |
6657 | num_bytes = min(offset + *len, extent_end) - offset; | ||
6658 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | 6680 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) |
6659 | goto out; | 6681 | goto out; |
6660 | /* | 6682 | /* |
@@ -7024,10 +7046,9 @@ again: | |||
7024 | if (!ret) | 7046 | if (!ret) |
7025 | goto out_test; | 7047 | goto out_test; |
7026 | 7048 | ||
7027 | ordered->work.func = finish_ordered_fn; | 7049 | btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); |
7028 | ordered->work.flags = 0; | 7050 | btrfs_queue_work(root->fs_info->endio_write_workers, |
7029 | btrfs_queue_worker(&root->fs_info->endio_write_workers, | 7051 | &ordered->work); |
7030 | &ordered->work); | ||
7031 | out_test: | 7052 | out_test: |
7032 | /* | 7053 | /* |
7033 | * our bio might span multiple ordered extents. If we haven't | 7054 | * our bio might span multiple ordered extents. If we haven't |
@@ -7404,15 +7425,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
7404 | smp_mb__after_atomic_inc(); | 7425 | smp_mb__after_atomic_inc(); |
7405 | 7426 | ||
7406 | /* | 7427 | /* |
7407 | * The generic stuff only does filemap_write_and_wait_range, which isn't | 7428 | * The generic stuff only does filemap_write_and_wait_range, which |
7408 | * enough if we've written compressed pages to this area, so we need to | 7429 | * isn't enough if we've written compressed pages to this area, so |
7409 | * call btrfs_wait_ordered_range to make absolutely sure that any | 7430 | * we need to flush the dirty pages again to make absolutely sure |
7410 | * outstanding dirty pages are on disk. | 7431 | * that any outstanding dirty pages are on disk. |
7411 | */ | 7432 | */ |
7412 | count = iov_length(iov, nr_segs); | 7433 | count = iov_length(iov, nr_segs); |
7413 | ret = btrfs_wait_ordered_range(inode, offset, count); | 7434 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, |
7414 | if (ret) | 7435 | &BTRFS_I(inode)->runtime_flags)) |
7415 | return ret; | 7436 | filemap_fdatawrite_range(inode->i_mapping, offset, count); |
7416 | 7437 | ||
7417 | if (rw & WRITE) { | 7438 | if (rw & WRITE) { |
7418 | /* | 7439 | /* |
@@ -8404,7 +8425,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, | |||
8404 | work->inode = inode; | 8425 | work->inode = inode; |
8405 | work->wait = wait; | 8426 | work->wait = wait; |
8406 | work->delay_iput = delay_iput; | 8427 | work->delay_iput = delay_iput; |
8407 | work->work.func = btrfs_run_delalloc_work; | 8428 | btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); |
8408 | 8429 | ||
8409 | return work; | 8430 | return work; |
8410 | } | 8431 | } |
@@ -8419,7 +8440,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) | |||
8419 | * some fairly slow code that needs optimization. This walks the list | 8440 | * some fairly slow code that needs optimization. This walks the list |
8420 | * of all the inodes with pending delalloc and forces them to disk. | 8441 | * of all the inodes with pending delalloc and forces them to disk. |
8421 | */ | 8442 | */ |
8422 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | 8443 | static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, |
8444 | int nr) | ||
8423 | { | 8445 | { |
8424 | struct btrfs_inode *binode; | 8446 | struct btrfs_inode *binode; |
8425 | struct inode *inode; | 8447 | struct inode *inode; |
@@ -8431,6 +8453,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8431 | INIT_LIST_HEAD(&works); | 8453 | INIT_LIST_HEAD(&works); |
8432 | INIT_LIST_HEAD(&splice); | 8454 | INIT_LIST_HEAD(&splice); |
8433 | 8455 | ||
8456 | mutex_lock(&root->delalloc_mutex); | ||
8434 | spin_lock(&root->delalloc_lock); | 8457 | spin_lock(&root->delalloc_lock); |
8435 | list_splice_init(&root->delalloc_inodes, &splice); | 8458 | list_splice_init(&root->delalloc_inodes, &splice); |
8436 | while (!list_empty(&splice)) { | 8459 | while (!list_empty(&splice)) { |
@@ -8453,12 +8476,14 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8453 | else | 8476 | else |
8454 | iput(inode); | 8477 | iput(inode); |
8455 | ret = -ENOMEM; | 8478 | ret = -ENOMEM; |
8456 | goto out; | 8479 | break; |
8457 | } | 8480 | } |
8458 | list_add_tail(&work->list, &works); | 8481 | list_add_tail(&work->list, &works); |
8459 | btrfs_queue_worker(&root->fs_info->flush_workers, | 8482 | btrfs_queue_work(root->fs_info->flush_workers, |
8460 | &work->work); | 8483 | &work->work); |
8461 | 8484 | ret++; | |
8485 | if (nr != -1 && ret >= nr) | ||
8486 | break; | ||
8462 | cond_resched(); | 8487 | cond_resched(); |
8463 | spin_lock(&root->delalloc_lock); | 8488 | spin_lock(&root->delalloc_lock); |
8464 | } | 8489 | } |
@@ -8468,18 +8493,13 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8468 | list_del_init(&work->list); | 8493 | list_del_init(&work->list); |
8469 | btrfs_wait_and_free_delalloc_work(work); | 8494 | btrfs_wait_and_free_delalloc_work(work); |
8470 | } | 8495 | } |
8471 | return 0; | ||
8472 | out: | ||
8473 | list_for_each_entry_safe(work, next, &works, list) { | ||
8474 | list_del_init(&work->list); | ||
8475 | btrfs_wait_and_free_delalloc_work(work); | ||
8476 | } | ||
8477 | 8496 | ||
8478 | if (!list_empty_careful(&splice)) { | 8497 | if (!list_empty_careful(&splice)) { |
8479 | spin_lock(&root->delalloc_lock); | 8498 | spin_lock(&root->delalloc_lock); |
8480 | list_splice_tail(&splice, &root->delalloc_inodes); | 8499 | list_splice_tail(&splice, &root->delalloc_inodes); |
8481 | spin_unlock(&root->delalloc_lock); | 8500 | spin_unlock(&root->delalloc_lock); |
8482 | } | 8501 | } |
8502 | mutex_unlock(&root->delalloc_mutex); | ||
8483 | return ret; | 8503 | return ret; |
8484 | } | 8504 | } |
8485 | 8505 | ||
@@ -8490,7 +8510,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8490 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) | 8510 | if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) |
8491 | return -EROFS; | 8511 | return -EROFS; |
8492 | 8512 | ||
8493 | ret = __start_delalloc_inodes(root, delay_iput); | 8513 | ret = __start_delalloc_inodes(root, delay_iput, -1); |
8514 | if (ret > 0) | ||
8515 | ret = 0; | ||
8494 | /* | 8516 | /* |
8495 | * the filemap_flush will queue IO into the worker threads, but | 8517 | * the filemap_flush will queue IO into the worker threads, but |
8496 | * we have to make sure the IO is actually started and that | 8518 | * we have to make sure the IO is actually started and that |
@@ -8507,7 +8529,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
8507 | return ret; | 8529 | return ret; |
8508 | } | 8530 | } |
8509 | 8531 | ||
8510 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | 8532 | int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, |
8533 | int nr) | ||
8511 | { | 8534 | { |
8512 | struct btrfs_root *root; | 8535 | struct btrfs_root *root; |
8513 | struct list_head splice; | 8536 | struct list_head splice; |
@@ -8518,9 +8541,10 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8518 | 8541 | ||
8519 | INIT_LIST_HEAD(&splice); | 8542 | INIT_LIST_HEAD(&splice); |
8520 | 8543 | ||
8544 | mutex_lock(&fs_info->delalloc_root_mutex); | ||
8521 | spin_lock(&fs_info->delalloc_root_lock); | 8545 | spin_lock(&fs_info->delalloc_root_lock); |
8522 | list_splice_init(&fs_info->delalloc_roots, &splice); | 8546 | list_splice_init(&fs_info->delalloc_roots, &splice); |
8523 | while (!list_empty(&splice)) { | 8547 | while (!list_empty(&splice) && nr) { |
8524 | root = list_first_entry(&splice, struct btrfs_root, | 8548 | root = list_first_entry(&splice, struct btrfs_root, |
8525 | delalloc_root); | 8549 | delalloc_root); |
8526 | root = btrfs_grab_fs_root(root); | 8550 | root = btrfs_grab_fs_root(root); |
@@ -8529,15 +8553,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8529 | &fs_info->delalloc_roots); | 8553 | &fs_info->delalloc_roots); |
8530 | spin_unlock(&fs_info->delalloc_root_lock); | 8554 | spin_unlock(&fs_info->delalloc_root_lock); |
8531 | 8555 | ||
8532 | ret = __start_delalloc_inodes(root, delay_iput); | 8556 | ret = __start_delalloc_inodes(root, delay_iput, nr); |
8533 | btrfs_put_fs_root(root); | 8557 | btrfs_put_fs_root(root); |
8534 | if (ret) | 8558 | if (ret < 0) |
8535 | goto out; | 8559 | goto out; |
8536 | 8560 | ||
8561 | if (nr != -1) { | ||
8562 | nr -= ret; | ||
8563 | WARN_ON(nr < 0); | ||
8564 | } | ||
8537 | spin_lock(&fs_info->delalloc_root_lock); | 8565 | spin_lock(&fs_info->delalloc_root_lock); |
8538 | } | 8566 | } |
8539 | spin_unlock(&fs_info->delalloc_root_lock); | 8567 | spin_unlock(&fs_info->delalloc_root_lock); |
8540 | 8568 | ||
8569 | ret = 0; | ||
8541 | atomic_inc(&fs_info->async_submit_draining); | 8570 | atomic_inc(&fs_info->async_submit_draining); |
8542 | while (atomic_read(&fs_info->nr_async_submits) || | 8571 | while (atomic_read(&fs_info->nr_async_submits) || |
8543 | atomic_read(&fs_info->async_delalloc_pages)) { | 8572 | atomic_read(&fs_info->async_delalloc_pages)) { |
@@ -8546,13 +8575,13 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) | |||
8546 | atomic_read(&fs_info->async_delalloc_pages) == 0)); | 8575 | atomic_read(&fs_info->async_delalloc_pages) == 0)); |
8547 | } | 8576 | } |
8548 | atomic_dec(&fs_info->async_submit_draining); | 8577 | atomic_dec(&fs_info->async_submit_draining); |
8549 | return 0; | ||
8550 | out: | 8578 | out: |
8551 | if (!list_empty_careful(&splice)) { | 8579 | if (!list_empty_careful(&splice)) { |
8552 | spin_lock(&fs_info->delalloc_root_lock); | 8580 | spin_lock(&fs_info->delalloc_root_lock); |
8553 | list_splice_tail(&splice, &fs_info->delalloc_roots); | 8581 | list_splice_tail(&splice, &fs_info->delalloc_roots); |
8554 | spin_unlock(&fs_info->delalloc_root_lock); | 8582 | spin_unlock(&fs_info->delalloc_root_lock); |
8555 | } | 8583 | } |
8584 | mutex_unlock(&fs_info->delalloc_root_mutex); | ||
8556 | return ret; | 8585 | return ret; |
8557 | } | 8586 | } |
8558 | 8587 | ||
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a6d8efa46bfe..0401397b5c92 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -59,6 +59,32 @@ | |||
59 | #include "props.h" | 59 | #include "props.h" |
60 | #include "sysfs.h" | 60 | #include "sysfs.h" |
61 | 61 | ||
62 | #ifdef CONFIG_64BIT | ||
63 | /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI | ||
64 | * structures are incorrect, as the timespec structure from userspace | ||
65 | * is 4 bytes too small. We define these alternatives here to teach | ||
66 | * the kernel about the 32-bit struct packing. | ||
67 | */ | ||
68 | struct btrfs_ioctl_timespec_32 { | ||
69 | __u64 sec; | ||
70 | __u32 nsec; | ||
71 | } __attribute__ ((__packed__)); | ||
72 | |||
73 | struct btrfs_ioctl_received_subvol_args_32 { | ||
74 | char uuid[BTRFS_UUID_SIZE]; /* in */ | ||
75 | __u64 stransid; /* in */ | ||
76 | __u64 rtransid; /* out */ | ||
77 | struct btrfs_ioctl_timespec_32 stime; /* in */ | ||
78 | struct btrfs_ioctl_timespec_32 rtime; /* out */ | ||
79 | __u64 flags; /* in */ | ||
80 | __u64 reserved[16]; /* in */ | ||
81 | } __attribute__ ((__packed__)); | ||
82 | |||
83 | #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ | ||
84 | struct btrfs_ioctl_received_subvol_args_32) | ||
85 | #endif | ||
86 | |||
87 | |||
62 | static int btrfs_clone(struct inode *src, struct inode *inode, | 88 | static int btrfs_clone(struct inode *src, struct inode *inode, |
63 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); | 89 | u64 off, u64 olen, u64 olen_aligned, u64 destoff); |
64 | 90 | ||
@@ -585,6 +611,23 @@ fail: | |||
585 | return ret; | 611 | return ret; |
586 | } | 612 | } |
587 | 613 | ||
614 | static void btrfs_wait_nocow_write(struct btrfs_root *root) | ||
615 | { | ||
616 | s64 writers; | ||
617 | DEFINE_WAIT(wait); | ||
618 | |||
619 | do { | ||
620 | prepare_to_wait(&root->subv_writers->wait, &wait, | ||
621 | TASK_UNINTERRUPTIBLE); | ||
622 | |||
623 | writers = percpu_counter_sum(&root->subv_writers->counter); | ||
624 | if (writers) | ||
625 | schedule(); | ||
626 | |||
627 | finish_wait(&root->subv_writers->wait, &wait); | ||
628 | } while (writers); | ||
629 | } | ||
630 | |||
588 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, | 631 | static int create_snapshot(struct btrfs_root *root, struct inode *dir, |
589 | struct dentry *dentry, char *name, int namelen, | 632 | struct dentry *dentry, char *name, int namelen, |
590 | u64 *async_transid, bool readonly, | 633 | u64 *async_transid, bool readonly, |
@@ -598,15 +641,21 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
598 | if (!root->ref_cows) | 641 | if (!root->ref_cows) |
599 | return -EINVAL; | 642 | return -EINVAL; |
600 | 643 | ||
644 | atomic_inc(&root->will_be_snapshoted); | ||
645 | smp_mb__after_atomic_inc(); | ||
646 | btrfs_wait_nocow_write(root); | ||
647 | |||
601 | ret = btrfs_start_delalloc_inodes(root, 0); | 648 | ret = btrfs_start_delalloc_inodes(root, 0); |
602 | if (ret) | 649 | if (ret) |
603 | return ret; | 650 | goto out; |
604 | 651 | ||
605 | btrfs_wait_ordered_extents(root, -1); | 652 | btrfs_wait_ordered_extents(root, -1); |
606 | 653 | ||
607 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 654 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
608 | if (!pending_snapshot) | 655 | if (!pending_snapshot) { |
609 | return -ENOMEM; | 656 | ret = -ENOMEM; |
657 | goto out; | ||
658 | } | ||
610 | 659 | ||
611 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, | 660 | btrfs_init_block_rsv(&pending_snapshot->block_rsv, |
612 | BTRFS_BLOCK_RSV_TEMP); | 661 | BTRFS_BLOCK_RSV_TEMP); |
@@ -623,7 +672,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir, | |||
623 | &pending_snapshot->qgroup_reserved, | 672 | &pending_snapshot->qgroup_reserved, |
624 | false); | 673 | false); |
625 | if (ret) | 674 | if (ret) |
626 | goto out; | 675 | goto free; |
627 | 676 | ||
628 | pending_snapshot->dentry = dentry; | 677 | pending_snapshot->dentry = dentry; |
629 | pending_snapshot->root = root; | 678 | pending_snapshot->root = root; |
@@ -674,8 +723,10 @@ fail: | |||
674 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, | 723 | btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, |
675 | &pending_snapshot->block_rsv, | 724 | &pending_snapshot->block_rsv, |
676 | pending_snapshot->qgroup_reserved); | 725 | pending_snapshot->qgroup_reserved); |
677 | out: | 726 | free: |
678 | kfree(pending_snapshot); | 727 | kfree(pending_snapshot); |
728 | out: | ||
729 | atomic_dec(&root->will_be_snapshoted); | ||
679 | return ret; | 730 | return ret; |
680 | } | 731 | } |
681 | 732 | ||
@@ -884,12 +935,14 @@ static int find_new_extents(struct btrfs_root *root, | |||
884 | min_key.type = BTRFS_EXTENT_DATA_KEY; | 935 | min_key.type = BTRFS_EXTENT_DATA_KEY; |
885 | min_key.offset = *off; | 936 | min_key.offset = *off; |
886 | 937 | ||
887 | path->keep_locks = 1; | ||
888 | |||
889 | while (1) { | 938 | while (1) { |
939 | path->keep_locks = 1; | ||
890 | ret = btrfs_search_forward(root, &min_key, path, newer_than); | 940 | ret = btrfs_search_forward(root, &min_key, path, newer_than); |
891 | if (ret != 0) | 941 | if (ret != 0) |
892 | goto none; | 942 | goto none; |
943 | path->keep_locks = 0; | ||
944 | btrfs_unlock_up_safe(path, 1); | ||
945 | process_slot: | ||
893 | if (min_key.objectid != ino) | 946 | if (min_key.objectid != ino) |
894 | goto none; | 947 | goto none; |
895 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) | 948 | if (min_key.type != BTRFS_EXTENT_DATA_KEY) |
@@ -908,6 +961,12 @@ static int find_new_extents(struct btrfs_root *root, | |||
908 | return 0; | 961 | return 0; |
909 | } | 962 | } |
910 | 963 | ||
964 | path->slots[0]++; | ||
965 | if (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
966 | btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); | ||
967 | goto process_slot; | ||
968 | } | ||
969 | |||
911 | if (min_key.offset == (u64)-1) | 970 | if (min_key.offset == (u64)-1) |
912 | goto none; | 971 | goto none; |
913 | 972 | ||
@@ -935,10 +994,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) | |||
935 | read_unlock(&em_tree->lock); | 994 | read_unlock(&em_tree->lock); |
936 | 995 | ||
937 | if (!em) { | 996 | if (!em) { |
997 | struct extent_state *cached = NULL; | ||
998 | u64 end = start + len - 1; | ||
999 | |||
938 | /* get the big lock and read metadata off disk */ | 1000 | /* get the big lock and read metadata off disk */ |
939 | lock_extent(io_tree, start, start + len - 1); | 1001 | lock_extent_bits(io_tree, start, end, 0, &cached); |
940 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 1002 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
941 | unlock_extent(io_tree, start, start + len - 1); | 1003 | unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); |
942 | 1004 | ||
943 | if (IS_ERR(em)) | 1005 | if (IS_ERR(em)) |
944 | return NULL; | 1006 | return NULL; |
@@ -957,7 +1019,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) | |||
957 | return false; | 1019 | return false; |
958 | 1020 | ||
959 | next = defrag_lookup_extent(inode, em->start + em->len); | 1021 | next = defrag_lookup_extent(inode, em->start + em->len); |
960 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) | 1022 | if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || |
1023 | (em->block_start + em->block_len == next->block_start)) | ||
961 | ret = false; | 1024 | ret = false; |
962 | 1025 | ||
963 | free_extent_map(next); | 1026 | free_extent_map(next); |
@@ -1076,10 +1139,12 @@ again: | |||
1076 | page_start = page_offset(page); | 1139 | page_start = page_offset(page); |
1077 | page_end = page_start + PAGE_CACHE_SIZE - 1; | 1140 | page_end = page_start + PAGE_CACHE_SIZE - 1; |
1078 | while (1) { | 1141 | while (1) { |
1079 | lock_extent(tree, page_start, page_end); | 1142 | lock_extent_bits(tree, page_start, page_end, |
1143 | 0, &cached_state); | ||
1080 | ordered = btrfs_lookup_ordered_extent(inode, | 1144 | ordered = btrfs_lookup_ordered_extent(inode, |
1081 | page_start); | 1145 | page_start); |
1082 | unlock_extent(tree, page_start, page_end); | 1146 | unlock_extent_cached(tree, page_start, page_end, |
1147 | &cached_state, GFP_NOFS); | ||
1083 | if (!ordered) | 1148 | if (!ordered) |
1084 | break; | 1149 | break; |
1085 | 1150 | ||
@@ -1356,8 +1421,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, | |||
1356 | } | 1421 | } |
1357 | } | 1422 | } |
1358 | 1423 | ||
1359 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) | 1424 | if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { |
1360 | filemap_flush(inode->i_mapping); | 1425 | filemap_flush(inode->i_mapping); |
1426 | if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, | ||
1427 | &BTRFS_I(inode)->runtime_flags)) | ||
1428 | filemap_flush(inode->i_mapping); | ||
1429 | } | ||
1361 | 1430 | ||
1362 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { | 1431 | if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { |
1363 | /* the filemap_flush will queue IO into the worker threads, but | 1432 | /* the filemap_flush will queue IO into the worker threads, but |
@@ -1573,7 +1642,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, | |||
1573 | if (src_inode->i_sb != file_inode(file)->i_sb) { | 1642 | if (src_inode->i_sb != file_inode(file)->i_sb) { |
1574 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, | 1643 | btrfs_info(BTRFS_I(src_inode)->root->fs_info, |
1575 | "Snapshot src from another FS"); | 1644 | "Snapshot src from another FS"); |
1576 | ret = -EINVAL; | 1645 | ret = -EXDEV; |
1577 | } else if (!inode_owner_or_capable(src_inode)) { | 1646 | } else if (!inode_owner_or_capable(src_inode)) { |
1578 | /* | 1647 | /* |
1579 | * Subvolume creation is not restricted, but snapshots | 1648 | * Subvolume creation is not restricted, but snapshots |
@@ -1797,7 +1866,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root) | |||
1797 | if (di && !IS_ERR(di)) { | 1866 | if (di && !IS_ERR(di)) { |
1798 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); | 1867 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); |
1799 | if (key.objectid == root->root_key.objectid) { | 1868 | if (key.objectid == root->root_key.objectid) { |
1800 | ret = -ENOTEMPTY; | 1869 | ret = -EPERM; |
1870 | btrfs_err(root->fs_info, "deleting default subvolume " | ||
1871 | "%llu is not allowed", key.objectid); | ||
1801 | goto out; | 1872 | goto out; |
1802 | } | 1873 | } |
1803 | btrfs_release_path(path); | 1874 | btrfs_release_path(path); |
@@ -2994,8 +3065,9 @@ process_slot: | |||
2994 | new_key.offset + datal, | 3065 | new_key.offset + datal, |
2995 | 1); | 3066 | 1); |
2996 | if (ret) { | 3067 | if (ret) { |
2997 | btrfs_abort_transaction(trans, root, | 3068 | if (ret != -EINVAL) |
2998 | ret); | 3069 | btrfs_abort_transaction(trans, |
3070 | root, ret); | ||
2999 | btrfs_end_transaction(trans, root); | 3071 | btrfs_end_transaction(trans, root); |
3000 | goto out; | 3072 | goto out; |
3001 | } | 3073 | } |
@@ -3153,8 +3225,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
3153 | * decompress into destination's address_space (the file offset | 3225 | * decompress into destination's address_space (the file offset |
3154 | * may change, so source mapping won't do), then recompress (or | 3226 | * may change, so source mapping won't do), then recompress (or |
3155 | * otherwise reinsert) a subrange. | 3227 | * otherwise reinsert) a subrange. |
3156 | * - allow ranges within the same file to be cloned (provided | 3228 | * |
3157 | * they don't overlap)? | 3229 | * - split destination inode's inline extents. The inline extents can |
3230 | * be either compressed or non-compressed. | ||
3158 | */ | 3231 | */ |
3159 | 3232 | ||
3160 | /* the destination must be opened for writing */ | 3233 | /* the destination must be opened for writing */ |
@@ -4353,10 +4426,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) | |||
4353 | return btrfs_qgroup_wait_for_completion(root->fs_info); | 4426 | return btrfs_qgroup_wait_for_completion(root->fs_info); |
4354 | } | 4427 | } |
4355 | 4428 | ||
4356 | static long btrfs_ioctl_set_received_subvol(struct file *file, | 4429 | static long _btrfs_ioctl_set_received_subvol(struct file *file, |
4357 | void __user *arg) | 4430 | struct btrfs_ioctl_received_subvol_args *sa) |
4358 | { | 4431 | { |
4359 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
4360 | struct inode *inode = file_inode(file); | 4432 | struct inode *inode = file_inode(file); |
4361 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4433 | struct btrfs_root *root = BTRFS_I(inode)->root; |
4362 | struct btrfs_root_item *root_item = &root->root_item; | 4434 | struct btrfs_root_item *root_item = &root->root_item; |
@@ -4384,13 +4456,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
4384 | goto out; | 4456 | goto out; |
4385 | } | 4457 | } |
4386 | 4458 | ||
4387 | sa = memdup_user(arg, sizeof(*sa)); | ||
4388 | if (IS_ERR(sa)) { | ||
4389 | ret = PTR_ERR(sa); | ||
4390 | sa = NULL; | ||
4391 | goto out; | ||
4392 | } | ||
4393 | |||
4394 | /* | 4459 | /* |
4395 | * 1 - root item | 4460 | * 1 - root item |
4396 | * 2 - uuid items (received uuid + subvol uuid) | 4461 | * 2 - uuid items (received uuid + subvol uuid) |
@@ -4444,14 +4509,91 @@ static long btrfs_ioctl_set_received_subvol(struct file *file, | |||
4444 | goto out; | 4509 | goto out; |
4445 | } | 4510 | } |
4446 | 4511 | ||
4512 | out: | ||
4513 | up_write(&root->fs_info->subvol_sem); | ||
4514 | mnt_drop_write_file(file); | ||
4515 | return ret; | ||
4516 | } | ||
4517 | |||
4518 | #ifdef CONFIG_64BIT | ||
4519 | static long btrfs_ioctl_set_received_subvol_32(struct file *file, | ||
4520 | void __user *arg) | ||
4521 | { | ||
4522 | struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; | ||
4523 | struct btrfs_ioctl_received_subvol_args *args64 = NULL; | ||
4524 | int ret = 0; | ||
4525 | |||
4526 | args32 = memdup_user(arg, sizeof(*args32)); | ||
4527 | if (IS_ERR(args32)) { | ||
4528 | ret = PTR_ERR(args32); | ||
4529 | args32 = NULL; | ||
4530 | goto out; | ||
4531 | } | ||
4532 | |||
4533 | args64 = kmalloc(sizeof(*args64), GFP_NOFS); | ||
4534 | if (IS_ERR(args64)) { | ||
4535 | ret = PTR_ERR(args64); | ||
4536 | args64 = NULL; | ||
4537 | goto out; | ||
4538 | } | ||
4539 | |||
4540 | memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); | ||
4541 | args64->stransid = args32->stransid; | ||
4542 | args64->rtransid = args32->rtransid; | ||
4543 | args64->stime.sec = args32->stime.sec; | ||
4544 | args64->stime.nsec = args32->stime.nsec; | ||
4545 | args64->rtime.sec = args32->rtime.sec; | ||
4546 | args64->rtime.nsec = args32->rtime.nsec; | ||
4547 | args64->flags = args32->flags; | ||
4548 | |||
4549 | ret = _btrfs_ioctl_set_received_subvol(file, args64); | ||
4550 | if (ret) | ||
4551 | goto out; | ||
4552 | |||
4553 | memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); | ||
4554 | args32->stransid = args64->stransid; | ||
4555 | args32->rtransid = args64->rtransid; | ||
4556 | args32->stime.sec = args64->stime.sec; | ||
4557 | args32->stime.nsec = args64->stime.nsec; | ||
4558 | args32->rtime.sec = args64->rtime.sec; | ||
4559 | args32->rtime.nsec = args64->rtime.nsec; | ||
4560 | args32->flags = args64->flags; | ||
4561 | |||
4562 | ret = copy_to_user(arg, args32, sizeof(*args32)); | ||
4563 | if (ret) | ||
4564 | ret = -EFAULT; | ||
4565 | |||
4566 | out: | ||
4567 | kfree(args32); | ||
4568 | kfree(args64); | ||
4569 | return ret; | ||
4570 | } | ||
4571 | #endif | ||
4572 | |||
4573 | static long btrfs_ioctl_set_received_subvol(struct file *file, | ||
4574 | void __user *arg) | ||
4575 | { | ||
4576 | struct btrfs_ioctl_received_subvol_args *sa = NULL; | ||
4577 | int ret = 0; | ||
4578 | |||
4579 | sa = memdup_user(arg, sizeof(*sa)); | ||
4580 | if (IS_ERR(sa)) { | ||
4581 | ret = PTR_ERR(sa); | ||
4582 | sa = NULL; | ||
4583 | goto out; | ||
4584 | } | ||
4585 | |||
4586 | ret = _btrfs_ioctl_set_received_subvol(file, sa); | ||
4587 | |||
4588 | if (ret) | ||
4589 | goto out; | ||
4590 | |||
4447 | ret = copy_to_user(arg, sa, sizeof(*sa)); | 4591 | ret = copy_to_user(arg, sa, sizeof(*sa)); |
4448 | if (ret) | 4592 | if (ret) |
4449 | ret = -EFAULT; | 4593 | ret = -EFAULT; |
4450 | 4594 | ||
4451 | out: | 4595 | out: |
4452 | kfree(sa); | 4596 | kfree(sa); |
4453 | up_write(&root->fs_info->subvol_sem); | ||
4454 | mnt_drop_write_file(file); | ||
4455 | return ret; | 4597 | return ret; |
4456 | } | 4598 | } |
4457 | 4599 | ||
@@ -4746,7 +4888,7 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4746 | case BTRFS_IOC_SYNC: { | 4888 | case BTRFS_IOC_SYNC: { |
4747 | int ret; | 4889 | int ret; |
4748 | 4890 | ||
4749 | ret = btrfs_start_delalloc_roots(root->fs_info, 0); | 4891 | ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); |
4750 | if (ret) | 4892 | if (ret) |
4751 | return ret; | 4893 | return ret; |
4752 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); | 4894 | ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); |
@@ -4770,6 +4912,10 @@ long btrfs_ioctl(struct file *file, unsigned int | |||
4770 | return btrfs_ioctl_balance_progress(root, argp); | 4912 | return btrfs_ioctl_balance_progress(root, argp); |
4771 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: | 4913 | case BTRFS_IOC_SET_RECEIVED_SUBVOL: |
4772 | return btrfs_ioctl_set_received_subvol(file, argp); | 4914 | return btrfs_ioctl_set_received_subvol(file, argp); |
4915 | #ifdef CONFIG_64BIT | ||
4916 | case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: | ||
4917 | return btrfs_ioctl_set_received_subvol_32(file, argp); | ||
4918 | #endif | ||
4773 | case BTRFS_IOC_SEND: | 4919 | case BTRFS_IOC_SEND: |
4774 | return btrfs_ioctl_send(file, argp); | 4920 | return btrfs_ioctl_send(file, argp); |
4775 | case BTRFS_IOC_GET_DEV_STATS: | 4921 | case BTRFS_IOC_GET_DEV_STATS: |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index b16450b840e7..a94b05f72869 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -349,10 +349,13 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode, | |||
349 | if (!uptodate) | 349 | if (!uptodate) |
350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 350 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
351 | 351 | ||
352 | if (entry->bytes_left == 0) | 352 | if (entry->bytes_left == 0) { |
353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 353 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
354 | else | 354 | if (waitqueue_active(&entry->wait)) |
355 | wake_up(&entry->wait); | ||
356 | } else { | ||
355 | ret = 1; | 357 | ret = 1; |
358 | } | ||
356 | out: | 359 | out: |
357 | if (!ret && cached && entry) { | 360 | if (!ret && cached && entry) { |
358 | *cached = entry; | 361 | *cached = entry; |
@@ -410,10 +413,13 @@ have_entry: | |||
410 | if (!uptodate) | 413 | if (!uptodate) |
411 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); | 414 | set_bit(BTRFS_ORDERED_IOERR, &entry->flags); |
412 | 415 | ||
413 | if (entry->bytes_left == 0) | 416 | if (entry->bytes_left == 0) { |
414 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); | 417 | ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags); |
415 | else | 418 | if (waitqueue_active(&entry->wait)) |
419 | wake_up(&entry->wait); | ||
420 | } else { | ||
416 | ret = 1; | 421 | ret = 1; |
422 | } | ||
417 | out: | 423 | out: |
418 | if (!ret && cached && entry) { | 424 | if (!ret && cached && entry) { |
419 | *cached = entry; | 425 | *cached = entry; |
@@ -424,27 +430,48 @@ out: | |||
424 | } | 430 | } |
425 | 431 | ||
426 | /* Needs to either be called under a log transaction or the log_mutex */ | 432 | /* Needs to either be called under a log transaction or the log_mutex */ |
427 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode) | 433 | void btrfs_get_logged_extents(struct inode *inode, |
434 | struct list_head *logged_list) | ||
428 | { | 435 | { |
429 | struct btrfs_ordered_inode_tree *tree; | 436 | struct btrfs_ordered_inode_tree *tree; |
430 | struct btrfs_ordered_extent *ordered; | 437 | struct btrfs_ordered_extent *ordered; |
431 | struct rb_node *n; | 438 | struct rb_node *n; |
432 | int index = log->log_transid % 2; | ||
433 | 439 | ||
434 | tree = &BTRFS_I(inode)->ordered_tree; | 440 | tree = &BTRFS_I(inode)->ordered_tree; |
435 | spin_lock_irq(&tree->lock); | 441 | spin_lock_irq(&tree->lock); |
436 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { | 442 | for (n = rb_first(&tree->tree); n; n = rb_next(n)) { |
437 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); | 443 | ordered = rb_entry(n, struct btrfs_ordered_extent, rb_node); |
438 | spin_lock(&log->log_extents_lock[index]); | 444 | if (!list_empty(&ordered->log_list)) |
439 | if (list_empty(&ordered->log_list)) { | 445 | continue; |
440 | list_add_tail(&ordered->log_list, &log->logged_list[index]); | 446 | list_add_tail(&ordered->log_list, logged_list); |
441 | atomic_inc(&ordered->refs); | 447 | atomic_inc(&ordered->refs); |
442 | } | ||
443 | spin_unlock(&log->log_extents_lock[index]); | ||
444 | } | 448 | } |
445 | spin_unlock_irq(&tree->lock); | 449 | spin_unlock_irq(&tree->lock); |
446 | } | 450 | } |
447 | 451 | ||
452 | void btrfs_put_logged_extents(struct list_head *logged_list) | ||
453 | { | ||
454 | struct btrfs_ordered_extent *ordered; | ||
455 | |||
456 | while (!list_empty(logged_list)) { | ||
457 | ordered = list_first_entry(logged_list, | ||
458 | struct btrfs_ordered_extent, | ||
459 | log_list); | ||
460 | list_del_init(&ordered->log_list); | ||
461 | btrfs_put_ordered_extent(ordered); | ||
462 | } | ||
463 | } | ||
464 | |||
465 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
466 | struct btrfs_root *log) | ||
467 | { | ||
468 | int index = log->log_transid % 2; | ||
469 | |||
470 | spin_lock_irq(&log->log_extents_lock[index]); | ||
471 | list_splice_tail(logged_list, &log->logged_list[index]); | ||
472 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
473 | } | ||
474 | |||
448 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) | 475 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid) |
449 | { | 476 | { |
450 | struct btrfs_ordered_extent *ordered; | 477 | struct btrfs_ordered_extent *ordered; |
@@ -577,7 +604,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
577 | INIT_LIST_HEAD(&splice); | 604 | INIT_LIST_HEAD(&splice); |
578 | INIT_LIST_HEAD(&works); | 605 | INIT_LIST_HEAD(&works); |
579 | 606 | ||
580 | mutex_lock(&root->fs_info->ordered_operations_mutex); | 607 | mutex_lock(&root->ordered_extent_mutex); |
581 | spin_lock(&root->ordered_extent_lock); | 608 | spin_lock(&root->ordered_extent_lock); |
582 | list_splice_init(&root->ordered_extents, &splice); | 609 | list_splice_init(&root->ordered_extents, &splice); |
583 | while (!list_empty(&splice) && nr) { | 610 | while (!list_empty(&splice) && nr) { |
@@ -588,10 +615,11 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
588 | atomic_inc(&ordered->refs); | 615 | atomic_inc(&ordered->refs); |
589 | spin_unlock(&root->ordered_extent_lock); | 616 | spin_unlock(&root->ordered_extent_lock); |
590 | 617 | ||
591 | ordered->flush_work.func = btrfs_run_ordered_extent_work; | 618 | btrfs_init_work(&ordered->flush_work, |
619 | btrfs_run_ordered_extent_work, NULL, NULL); | ||
592 | list_add_tail(&ordered->work_list, &works); | 620 | list_add_tail(&ordered->work_list, &works); |
593 | btrfs_queue_worker(&root->fs_info->flush_workers, | 621 | btrfs_queue_work(root->fs_info->flush_workers, |
594 | &ordered->flush_work); | 622 | &ordered->flush_work); |
595 | 623 | ||
596 | cond_resched(); | 624 | cond_resched(); |
597 | spin_lock(&root->ordered_extent_lock); | 625 | spin_lock(&root->ordered_extent_lock); |
@@ -608,7 +636,7 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr) | |||
608 | btrfs_put_ordered_extent(ordered); | 636 | btrfs_put_ordered_extent(ordered); |
609 | cond_resched(); | 637 | cond_resched(); |
610 | } | 638 | } |
611 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 639 | mutex_unlock(&root->ordered_extent_mutex); |
612 | 640 | ||
613 | return count; | 641 | return count; |
614 | } | 642 | } |
@@ -621,6 +649,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
621 | 649 | ||
622 | INIT_LIST_HEAD(&splice); | 650 | INIT_LIST_HEAD(&splice); |
623 | 651 | ||
652 | mutex_lock(&fs_info->ordered_operations_mutex); | ||
624 | spin_lock(&fs_info->ordered_root_lock); | 653 | spin_lock(&fs_info->ordered_root_lock); |
625 | list_splice_init(&fs_info->ordered_roots, &splice); | 654 | list_splice_init(&fs_info->ordered_roots, &splice); |
626 | while (!list_empty(&splice) && nr) { | 655 | while (!list_empty(&splice) && nr) { |
@@ -643,6 +672,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr) | |||
643 | } | 672 | } |
644 | list_splice_tail(&splice, &fs_info->ordered_roots); | 673 | list_splice_tail(&splice, &fs_info->ordered_roots); |
645 | spin_unlock(&fs_info->ordered_root_lock); | 674 | spin_unlock(&fs_info->ordered_root_lock); |
675 | mutex_unlock(&fs_info->ordered_operations_mutex); | ||
646 | } | 676 | } |
647 | 677 | ||
648 | /* | 678 | /* |
@@ -704,8 +734,8 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans, | |||
704 | goto out; | 734 | goto out; |
705 | } | 735 | } |
706 | list_add_tail(&work->list, &works); | 736 | list_add_tail(&work->list, &works); |
707 | btrfs_queue_worker(&root->fs_info->flush_workers, | 737 | btrfs_queue_work(root->fs_info->flush_workers, |
708 | &work->work); | 738 | &work->work); |
709 | 739 | ||
710 | cond_resched(); | 740 | cond_resched(); |
711 | spin_lock(&root->fs_info->ordered_root_lock); | 741 | spin_lock(&root->fs_info->ordered_root_lock); |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index 9b0450f7ac20..246897058efb 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -197,7 +197,11 @@ void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | |||
197 | struct inode *inode); | 197 | struct inode *inode); |
198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); | 198 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr); |
199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); | 199 | void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr); |
200 | void btrfs_get_logged_extents(struct btrfs_root *log, struct inode *inode); | 200 | void btrfs_get_logged_extents(struct inode *inode, |
201 | struct list_head *logged_list); | ||
202 | void btrfs_put_logged_extents(struct list_head *logged_list); | ||
203 | void btrfs_submit_logged_extents(struct list_head *logged_list, | ||
204 | struct btrfs_root *log); | ||
201 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); | 205 | void btrfs_wait_logged_extents(struct btrfs_root *log, u64 transid); |
202 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); | 206 | void btrfs_free_logged_extents(struct btrfs_root *log, u64 transid); |
203 | int __init ordered_data_init(void); | 207 | int __init ordered_data_init(void); |
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 472302a2d745..2cf905877aaf 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c | |||
@@ -1509,8 +1509,8 @@ int btrfs_run_qgroups(struct btrfs_trans_handle *trans, | |||
1509 | ret = qgroup_rescan_init(fs_info, 0, 1); | 1509 | ret = qgroup_rescan_init(fs_info, 0, 1); |
1510 | if (!ret) { | 1510 | if (!ret) { |
1511 | qgroup_rescan_zero_tracking(fs_info); | 1511 | qgroup_rescan_zero_tracking(fs_info); |
1512 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 1512 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
1513 | &fs_info->qgroup_rescan_work); | 1513 | &fs_info->qgroup_rescan_work); |
1514 | } | 1514 | } |
1515 | ret = 0; | 1515 | ret = 0; |
1516 | } | 1516 | } |
@@ -2095,7 +2095,8 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, | |||
2095 | 2095 | ||
2096 | memset(&fs_info->qgroup_rescan_work, 0, | 2096 | memset(&fs_info->qgroup_rescan_work, 0, |
2097 | sizeof(fs_info->qgroup_rescan_work)); | 2097 | sizeof(fs_info->qgroup_rescan_work)); |
2098 | fs_info->qgroup_rescan_work.func = btrfs_qgroup_rescan_worker; | 2098 | btrfs_init_work(&fs_info->qgroup_rescan_work, |
2099 | btrfs_qgroup_rescan_worker, NULL, NULL); | ||
2099 | 2100 | ||
2100 | if (ret) { | 2101 | if (ret) { |
2101 | err: | 2102 | err: |
@@ -2158,8 +2159,8 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info) | |||
2158 | 2159 | ||
2159 | qgroup_rescan_zero_tracking(fs_info); | 2160 | qgroup_rescan_zero_tracking(fs_info); |
2160 | 2161 | ||
2161 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2162 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
2162 | &fs_info->qgroup_rescan_work); | 2163 | &fs_info->qgroup_rescan_work); |
2163 | 2164 | ||
2164 | return 0; | 2165 | return 0; |
2165 | } | 2166 | } |
@@ -2190,6 +2191,6 @@ void | |||
2190 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) | 2191 | btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info) |
2191 | { | 2192 | { |
2192 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) | 2193 | if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) |
2193 | btrfs_queue_worker(&fs_info->qgroup_rescan_workers, | 2194 | btrfs_queue_work(fs_info->qgroup_rescan_workers, |
2194 | &fs_info->qgroup_rescan_work); | 2195 | &fs_info->qgroup_rescan_work); |
2195 | } | 2196 | } |
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 9af0b25d991a..4055291a523e 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c | |||
@@ -1416,20 +1416,18 @@ cleanup: | |||
1416 | 1416 | ||
1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) | 1417 | static void async_rmw_stripe(struct btrfs_raid_bio *rbio) |
1418 | { | 1418 | { |
1419 | rbio->work.flags = 0; | 1419 | btrfs_init_work(&rbio->work, rmw_work, NULL, NULL); |
1420 | rbio->work.func = rmw_work; | ||
1421 | 1420 | ||
1422 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1421 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1423 | &rbio->work); | 1422 | &rbio->work); |
1424 | } | 1423 | } |
1425 | 1424 | ||
1426 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) | 1425 | static void async_read_rebuild(struct btrfs_raid_bio *rbio) |
1427 | { | 1426 | { |
1428 | rbio->work.flags = 0; | 1427 | btrfs_init_work(&rbio->work, read_rebuild_work, NULL, NULL); |
1429 | rbio->work.func = read_rebuild_work; | ||
1430 | 1428 | ||
1431 | btrfs_queue_worker(&rbio->fs_info->rmw_workers, | 1429 | btrfs_queue_work(rbio->fs_info->rmw_workers, |
1432 | &rbio->work); | 1430 | &rbio->work); |
1433 | } | 1431 | } |
1434 | 1432 | ||
1435 | /* | 1433 | /* |
@@ -1667,10 +1665,9 @@ static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) | |||
1667 | plug = container_of(cb, struct btrfs_plug_cb, cb); | 1665 | plug = container_of(cb, struct btrfs_plug_cb, cb); |
1668 | 1666 | ||
1669 | if (from_schedule) { | 1667 | if (from_schedule) { |
1670 | plug->work.flags = 0; | 1668 | btrfs_init_work(&plug->work, unplug_work, NULL, NULL); |
1671 | plug->work.func = unplug_work; | 1669 | btrfs_queue_work(plug->info->rmw_workers, |
1672 | btrfs_queue_worker(&plug->info->rmw_workers, | 1670 | &plug->work); |
1673 | &plug->work); | ||
1674 | return; | 1671 | return; |
1675 | } | 1672 | } |
1676 | run_plug(plug); | 1673 | run_plug(plug); |
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 31c797c48c3e..30947f923620 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c | |||
@@ -793,10 +793,10 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info) | |||
793 | /* FIXME we cannot handle this properly right now */ | 793 | /* FIXME we cannot handle this properly right now */ |
794 | BUG(); | 794 | BUG(); |
795 | } | 795 | } |
796 | rmw->work.func = reada_start_machine_worker; | 796 | btrfs_init_work(&rmw->work, reada_start_machine_worker, NULL, NULL); |
797 | rmw->fs_info = fs_info; | 797 | rmw->fs_info = fs_info; |
798 | 798 | ||
799 | btrfs_queue_worker(&fs_info->readahead_workers, &rmw->work); | 799 | btrfs_queue_work(fs_info->readahead_workers, &rmw->work); |
800 | } | 800 | } |
801 | 801 | ||
802 | #ifdef DEBUG | 802 | #ifdef DEBUG |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 07b3b36f40ee..def428a25b2a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -4248,7 +4248,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", | 4248 | btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", |
4249 | rc->block_group->key.objectid, rc->block_group->flags); | 4249 | rc->block_group->key.objectid, rc->block_group->flags); |
4250 | 4250 | ||
4251 | ret = btrfs_start_delalloc_roots(fs_info, 0); | 4251 | ret = btrfs_start_delalloc_roots(fs_info, 0, -1); |
4252 | if (ret < 0) { | 4252 | if (ret < 0) { |
4253 | err = ret; | 4253 | err = ret; |
4254 | goto out; | 4254 | goto out; |
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 1389b69059de..38bb47e7d6b1 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -16,6 +16,7 @@ | |||
16 | * Boston, MA 021110-1307, USA. | 16 | * Boston, MA 021110-1307, USA. |
17 | */ | 17 | */ |
18 | 18 | ||
19 | #include <linux/err.h> | ||
19 | #include <linux/uuid.h> | 20 | #include <linux/uuid.h> |
20 | #include "ctree.h" | 21 | #include "ctree.h" |
21 | #include "transaction.h" | 22 | #include "transaction.h" |
@@ -271,7 +272,7 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
271 | key.offset++; | 272 | key.offset++; |
272 | 273 | ||
273 | root = btrfs_read_fs_root(tree_root, &root_key); | 274 | root = btrfs_read_fs_root(tree_root, &root_key); |
274 | err = PTR_RET(root); | 275 | err = PTR_ERR_OR_ZERO(root); |
275 | if (err && err != -ENOENT) { | 276 | if (err && err != -ENOENT) { |
276 | break; | 277 | break; |
277 | } else if (err == -ENOENT) { | 278 | } else if (err == -ENOENT) { |
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index efba5d1282ee..93e6d7172844 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -315,6 +315,16 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) | |||
315 | atomic_inc(&fs_info->scrubs_running); | 315 | atomic_inc(&fs_info->scrubs_running); |
316 | atomic_inc(&fs_info->scrubs_paused); | 316 | atomic_inc(&fs_info->scrubs_paused); |
317 | mutex_unlock(&fs_info->scrub_lock); | 317 | mutex_unlock(&fs_info->scrub_lock); |
318 | |||
319 | /* | ||
320 | * check if @scrubs_running=@scrubs_paused condition | ||
321 | * inside wait_event() is not an atomic operation. | ||
322 | * which means we may inc/dec @scrub_running/paused | ||
323 | * at any time. Let's wake up @scrub_pause_wait as | ||
324 | * much as we can to let commit transaction blocked less. | ||
325 | */ | ||
326 | wake_up(&fs_info->scrub_pause_wait); | ||
327 | |||
318 | atomic_inc(&sctx->workers_pending); | 328 | atomic_inc(&sctx->workers_pending); |
319 | } | 329 | } |
320 | 330 | ||
@@ -418,7 +428,8 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
418 | sbio->index = i; | 428 | sbio->index = i; |
419 | sbio->sctx = sctx; | 429 | sbio->sctx = sctx; |
420 | sbio->page_count = 0; | 430 | sbio->page_count = 0; |
421 | sbio->work.func = scrub_bio_end_io_worker; | 431 | btrfs_init_work(&sbio->work, scrub_bio_end_io_worker, |
432 | NULL, NULL); | ||
422 | 433 | ||
423 | if (i != SCRUB_BIOS_PER_SCTX - 1) | 434 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
424 | sctx->bios[i]->next_free = i + 1; | 435 | sctx->bios[i]->next_free = i + 1; |
@@ -987,9 +998,10 @@ nodatasum_case: | |||
987 | fixup_nodatasum->root = fs_info->extent_root; | 998 | fixup_nodatasum->root = fs_info->extent_root; |
988 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; | 999 | fixup_nodatasum->mirror_num = failed_mirror_index + 1; |
989 | scrub_pending_trans_workers_inc(sctx); | 1000 | scrub_pending_trans_workers_inc(sctx); |
990 | fixup_nodatasum->work.func = scrub_fixup_nodatasum; | 1001 | btrfs_init_work(&fixup_nodatasum->work, scrub_fixup_nodatasum, |
991 | btrfs_queue_worker(&fs_info->scrub_workers, | 1002 | NULL, NULL); |
992 | &fixup_nodatasum->work); | 1003 | btrfs_queue_work(fs_info->scrub_workers, |
1004 | &fixup_nodatasum->work); | ||
993 | goto out; | 1005 | goto out; |
994 | } | 1006 | } |
995 | 1007 | ||
@@ -1603,8 +1615,8 @@ static void scrub_wr_bio_end_io(struct bio *bio, int err) | |||
1603 | sbio->err = err; | 1615 | sbio->err = err; |
1604 | sbio->bio = bio; | 1616 | sbio->bio = bio; |
1605 | 1617 | ||
1606 | sbio->work.func = scrub_wr_bio_end_io_worker; | 1618 | btrfs_init_work(&sbio->work, scrub_wr_bio_end_io_worker, NULL, NULL); |
1607 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | 1619 | btrfs_queue_work(fs_info->scrub_wr_completion_workers, &sbio->work); |
1608 | } | 1620 | } |
1609 | 1621 | ||
1610 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | 1622 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) |
@@ -2072,7 +2084,7 @@ static void scrub_bio_end_io(struct bio *bio, int err) | |||
2072 | sbio->err = err; | 2084 | sbio->err = err; |
2073 | sbio->bio = bio; | 2085 | sbio->bio = bio; |
2074 | 2086 | ||
2075 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | 2087 | btrfs_queue_work(fs_info->scrub_workers, &sbio->work); |
2076 | } | 2088 | } |
2077 | 2089 | ||
2078 | static void scrub_bio_end_io_worker(struct btrfs_work *work) | 2090 | static void scrub_bio_end_io_worker(struct btrfs_work *work) |
@@ -2686,10 +2698,23 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2686 | 2698 | ||
2687 | wait_event(sctx->list_wait, | 2699 | wait_event(sctx->list_wait, |
2688 | atomic_read(&sctx->bios_in_flight) == 0); | 2700 | atomic_read(&sctx->bios_in_flight) == 0); |
2689 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | 2701 | atomic_inc(&fs_info->scrubs_paused); |
2702 | wake_up(&fs_info->scrub_pause_wait); | ||
2703 | |||
2704 | /* | ||
2705 | * must be called before we decrease @scrub_paused. | ||
2706 | * make sure we don't block transaction commit while | ||
2707 | * we are waiting pending workers finished. | ||
2708 | */ | ||
2690 | wait_event(sctx->list_wait, | 2709 | wait_event(sctx->list_wait, |
2691 | atomic_read(&sctx->workers_pending) == 0); | 2710 | atomic_read(&sctx->workers_pending) == 0); |
2692 | scrub_blocked_if_needed(fs_info); | 2711 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); |
2712 | |||
2713 | mutex_lock(&fs_info->scrub_lock); | ||
2714 | __scrub_blocked_if_needed(fs_info); | ||
2715 | atomic_dec(&fs_info->scrubs_paused); | ||
2716 | mutex_unlock(&fs_info->scrub_lock); | ||
2717 | wake_up(&fs_info->scrub_pause_wait); | ||
2693 | 2718 | ||
2694 | btrfs_put_block_group(cache); | 2719 | btrfs_put_block_group(cache); |
2695 | if (ret) | 2720 | if (ret) |
@@ -2757,33 +2782,35 @@ static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, | |||
2757 | int is_dev_replace) | 2782 | int is_dev_replace) |
2758 | { | 2783 | { |
2759 | int ret = 0; | 2784 | int ret = 0; |
2785 | int flags = WQ_FREEZABLE | WQ_UNBOUND; | ||
2786 | int max_active = fs_info->thread_pool_size; | ||
2760 | 2787 | ||
2761 | if (fs_info->scrub_workers_refcnt == 0) { | 2788 | if (fs_info->scrub_workers_refcnt == 0) { |
2762 | if (is_dev_replace) | 2789 | if (is_dev_replace) |
2763 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, | 2790 | fs_info->scrub_workers = |
2764 | &fs_info->generic_worker); | 2791 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
2792 | 1, 4); | ||
2765 | else | 2793 | else |
2766 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2794 | fs_info->scrub_workers = |
2767 | fs_info->thread_pool_size, | 2795 | btrfs_alloc_workqueue("btrfs-scrub", flags, |
2768 | &fs_info->generic_worker); | 2796 | max_active, 4); |
2769 | fs_info->scrub_workers.idle_thresh = 4; | 2797 | if (!fs_info->scrub_workers) { |
2770 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2798 | ret = -ENOMEM; |
2771 | if (ret) | ||
2772 | goto out; | 2799 | goto out; |
2773 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | 2800 | } |
2774 | "scrubwrc", | 2801 | fs_info->scrub_wr_completion_workers = |
2775 | fs_info->thread_pool_size, | 2802 | btrfs_alloc_workqueue("btrfs-scrubwrc", flags, |
2776 | &fs_info->generic_worker); | 2803 | max_active, 2); |
2777 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | 2804 | if (!fs_info->scrub_wr_completion_workers) { |
2778 | ret = btrfs_start_workers( | 2805 | ret = -ENOMEM; |
2779 | &fs_info->scrub_wr_completion_workers); | ||
2780 | if (ret) | ||
2781 | goto out; | 2806 | goto out; |
2782 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | 2807 | } |
2783 | &fs_info->generic_worker); | 2808 | fs_info->scrub_nocow_workers = |
2784 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | 2809 | btrfs_alloc_workqueue("btrfs-scrubnc", flags, 1, 0); |
2785 | if (ret) | 2810 | if (!fs_info->scrub_nocow_workers) { |
2811 | ret = -ENOMEM; | ||
2786 | goto out; | 2812 | goto out; |
2813 | } | ||
2787 | } | 2814 | } |
2788 | ++fs_info->scrub_workers_refcnt; | 2815 | ++fs_info->scrub_workers_refcnt; |
2789 | out: | 2816 | out: |
@@ -2793,9 +2820,9 @@ out: | |||
2793 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2820 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
2794 | { | 2821 | { |
2795 | if (--fs_info->scrub_workers_refcnt == 0) { | 2822 | if (--fs_info->scrub_workers_refcnt == 0) { |
2796 | btrfs_stop_workers(&fs_info->scrub_workers); | 2823 | btrfs_destroy_workqueue(fs_info->scrub_workers); |
2797 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | 2824 | btrfs_destroy_workqueue(fs_info->scrub_wr_completion_workers); |
2798 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | 2825 | btrfs_destroy_workqueue(fs_info->scrub_nocow_workers); |
2799 | } | 2826 | } |
2800 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2827 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
2801 | } | 2828 | } |
@@ -3106,10 +3133,10 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
3106 | nocow_ctx->len = len; | 3133 | nocow_ctx->len = len; |
3107 | nocow_ctx->mirror_num = mirror_num; | 3134 | nocow_ctx->mirror_num = mirror_num; |
3108 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | 3135 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; |
3109 | nocow_ctx->work.func = copy_nocow_pages_worker; | 3136 | btrfs_init_work(&nocow_ctx->work, copy_nocow_pages_worker, NULL, NULL); |
3110 | INIT_LIST_HEAD(&nocow_ctx->inodes); | 3137 | INIT_LIST_HEAD(&nocow_ctx->inodes); |
3111 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | 3138 | btrfs_queue_work(fs_info->scrub_nocow_workers, |
3112 | &nocow_ctx->work); | 3139 | &nocow_ctx->work); |
3113 | 3140 | ||
3114 | return 0; | 3141 | return 0; |
3115 | } | 3142 | } |
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9dde9717c1b9..9b6da9d55f9a 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c | |||
@@ -51,15 +51,18 @@ struct fs_path { | |||
51 | struct { | 51 | struct { |
52 | char *start; | 52 | char *start; |
53 | char *end; | 53 | char *end; |
54 | char *prepared; | ||
55 | 54 | ||
56 | char *buf; | 55 | char *buf; |
57 | int buf_len; | 56 | unsigned short buf_len:15; |
58 | unsigned int reversed:1; | 57 | unsigned short reversed:1; |
59 | unsigned int virtual_mem:1; | ||
60 | char inline_buf[]; | 58 | char inline_buf[]; |
61 | }; | 59 | }; |
62 | char pad[PAGE_SIZE]; | 60 | /* |
61 | * Average path length does not exceed 200 bytes, we'll have | ||
62 | * better packing in the slab and higher chance to satisfy | ||
63 | * a allocation later during send. | ||
64 | */ | ||
65 | char pad[256]; | ||
63 | }; | 66 | }; |
64 | }; | 67 | }; |
65 | #define FS_PATH_INLINE_SIZE \ | 68 | #define FS_PATH_INLINE_SIZE \ |
@@ -109,6 +112,7 @@ struct send_ctx { | |||
109 | int cur_inode_deleted; | 112 | int cur_inode_deleted; |
110 | u64 cur_inode_size; | 113 | u64 cur_inode_size; |
111 | u64 cur_inode_mode; | 114 | u64 cur_inode_mode; |
115 | u64 cur_inode_rdev; | ||
112 | u64 cur_inode_last_extent; | 116 | u64 cur_inode_last_extent; |
113 | 117 | ||
114 | u64 send_progress; | 118 | u64 send_progress; |
@@ -120,6 +124,8 @@ struct send_ctx { | |||
120 | struct list_head name_cache_list; | 124 | struct list_head name_cache_list; |
121 | int name_cache_size; | 125 | int name_cache_size; |
122 | 126 | ||
127 | struct file_ra_state ra; | ||
128 | |||
123 | char *read_buf; | 129 | char *read_buf; |
124 | 130 | ||
125 | /* | 131 | /* |
@@ -175,6 +181,47 @@ struct send_ctx { | |||
175 | * own move/rename can be performed. | 181 | * own move/rename can be performed. |
176 | */ | 182 | */ |
177 | struct rb_root waiting_dir_moves; | 183 | struct rb_root waiting_dir_moves; |
184 | |||
185 | /* | ||
186 | * A directory that is going to be rm'ed might have a child directory | ||
187 | * which is in the pending directory moves index above. In this case, | ||
188 | * the directory can only be removed after the move/rename of its child | ||
189 | * is performed. Example: | ||
190 | * | ||
191 | * Parent snapshot: | ||
192 | * | ||
193 | * . (ino 256) | ||
194 | * |-- a/ (ino 257) | ||
195 | * |-- b/ (ino 258) | ||
196 | * |-- c/ (ino 259) | ||
197 | * | |-- x/ (ino 260) | ||
198 | * | | ||
199 | * |-- y/ (ino 261) | ||
200 | * | ||
201 | * Send snapshot: | ||
202 | * | ||
203 | * . (ino 256) | ||
204 | * |-- a/ (ino 257) | ||
205 | * |-- b/ (ino 258) | ||
206 | * |-- YY/ (ino 261) | ||
207 | * |-- x/ (ino 260) | ||
208 | * | ||
209 | * Sequence of steps that lead to the send snapshot: | ||
210 | * rm -f /a/b/c/foo.txt | ||
211 | * mv /a/b/y /a/b/YY | ||
212 | * mv /a/b/c/x /a/b/YY | ||
213 | * rmdir /a/b/c | ||
214 | * | ||
215 | * When the child is processed, its move/rename is delayed until its | ||
216 | * parent is processed (as explained above), but all other operations | ||
217 | * like update utimes, chown, chgrp, etc, are performed and the paths | ||
218 | * that it uses for those operations must use the orphanized name of | ||
219 | * its parent (the directory we're going to rm later), so we need to | ||
220 | * memorize that name. | ||
221 | * | ||
222 | * Indexed by the inode number of the directory to be deleted. | ||
223 | */ | ||
224 | struct rb_root orphan_dirs; | ||
178 | }; | 225 | }; |
179 | 226 | ||
180 | struct pending_dir_move { | 227 | struct pending_dir_move { |
@@ -189,6 +236,18 @@ struct pending_dir_move { | |||
189 | struct waiting_dir_move { | 236 | struct waiting_dir_move { |
190 | struct rb_node node; | 237 | struct rb_node node; |
191 | u64 ino; | 238 | u64 ino; |
239 | /* | ||
240 | * There might be some directory that could not be removed because it | ||
241 | * was waiting for this directory inode to be moved first. Therefore | ||
242 | * after this directory is moved, we can try to rmdir the ino rmdir_ino. | ||
243 | */ | ||
244 | u64 rmdir_ino; | ||
245 | }; | ||
246 | |||
247 | struct orphan_dir_info { | ||
248 | struct rb_node node; | ||
249 | u64 ino; | ||
250 | u64 gen; | ||
192 | }; | 251 | }; |
193 | 252 | ||
194 | struct name_cache_entry { | 253 | struct name_cache_entry { |
@@ -214,6 +273,11 @@ struct name_cache_entry { | |||
214 | 273 | ||
215 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); | 274 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino); |
216 | 275 | ||
276 | static struct waiting_dir_move * | ||
277 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino); | ||
278 | |||
279 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino); | ||
280 | |||
217 | static int need_send_hole(struct send_ctx *sctx) | 281 | static int need_send_hole(struct send_ctx *sctx) |
218 | { | 282 | { |
219 | return (sctx->parent_root && !sctx->cur_inode_new && | 283 | return (sctx->parent_root && !sctx->cur_inode_new && |
@@ -242,7 +306,6 @@ static struct fs_path *fs_path_alloc(void) | |||
242 | if (!p) | 306 | if (!p) |
243 | return NULL; | 307 | return NULL; |
244 | p->reversed = 0; | 308 | p->reversed = 0; |
245 | p->virtual_mem = 0; | ||
246 | p->buf = p->inline_buf; | 309 | p->buf = p->inline_buf; |
247 | p->buf_len = FS_PATH_INLINE_SIZE; | 310 | p->buf_len = FS_PATH_INLINE_SIZE; |
248 | fs_path_reset(p); | 311 | fs_path_reset(p); |
@@ -265,12 +328,8 @@ static void fs_path_free(struct fs_path *p) | |||
265 | { | 328 | { |
266 | if (!p) | 329 | if (!p) |
267 | return; | 330 | return; |
268 | if (p->buf != p->inline_buf) { | 331 | if (p->buf != p->inline_buf) |
269 | if (p->virtual_mem) | 332 | kfree(p->buf); |
270 | vfree(p->buf); | ||
271 | else | ||
272 | kfree(p->buf); | ||
273 | } | ||
274 | kfree(p); | 333 | kfree(p); |
275 | } | 334 | } |
276 | 335 | ||
@@ -292,40 +351,23 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
292 | 351 | ||
293 | path_len = p->end - p->start; | 352 | path_len = p->end - p->start; |
294 | old_buf_len = p->buf_len; | 353 | old_buf_len = p->buf_len; |
295 | len = PAGE_ALIGN(len); | 354 | |
296 | 355 | /* | |
297 | if (p->buf == p->inline_buf) { | 356 | * First time the inline_buf does not suffice |
298 | tmp_buf = kmalloc(len, GFP_NOFS | __GFP_NOWARN); | 357 | */ |
299 | if (!tmp_buf) { | 358 | if (p->buf == p->inline_buf) |
300 | tmp_buf = vmalloc(len); | 359 | tmp_buf = kmalloc(len, GFP_NOFS); |
301 | if (!tmp_buf) | 360 | else |
302 | return -ENOMEM; | 361 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); |
303 | p->virtual_mem = 1; | 362 | if (!tmp_buf) |
304 | } | 363 | return -ENOMEM; |
305 | memcpy(tmp_buf, p->buf, p->buf_len); | 364 | p->buf = tmp_buf; |
306 | p->buf = tmp_buf; | 365 | /* |
307 | p->buf_len = len; | 366 | * The real size of the buffer is bigger, this will let the fast path |
308 | } else { | 367 | * happen most of the time |
309 | if (p->virtual_mem) { | 368 | */ |
310 | tmp_buf = vmalloc(len); | 369 | p->buf_len = ksize(p->buf); |
311 | if (!tmp_buf) | 370 | |
312 | return -ENOMEM; | ||
313 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
314 | vfree(p->buf); | ||
315 | } else { | ||
316 | tmp_buf = krealloc(p->buf, len, GFP_NOFS); | ||
317 | if (!tmp_buf) { | ||
318 | tmp_buf = vmalloc(len); | ||
319 | if (!tmp_buf) | ||
320 | return -ENOMEM; | ||
321 | memcpy(tmp_buf, p->buf, p->buf_len); | ||
322 | kfree(p->buf); | ||
323 | p->virtual_mem = 1; | ||
324 | } | ||
325 | } | ||
326 | p->buf = tmp_buf; | ||
327 | p->buf_len = len; | ||
328 | } | ||
329 | if (p->reversed) { | 371 | if (p->reversed) { |
330 | tmp_buf = p->buf + old_buf_len - path_len - 1; | 372 | tmp_buf = p->buf + old_buf_len - path_len - 1; |
331 | p->end = p->buf + p->buf_len - 1; | 373 | p->end = p->buf + p->buf_len - 1; |
@@ -338,7 +380,8 @@ static int fs_path_ensure_buf(struct fs_path *p, int len) | |||
338 | return 0; | 380 | return 0; |
339 | } | 381 | } |
340 | 382 | ||
341 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | 383 | static int fs_path_prepare_for_add(struct fs_path *p, int name_len, |
384 | char **prepared) | ||
342 | { | 385 | { |
343 | int ret; | 386 | int ret; |
344 | int new_len; | 387 | int new_len; |
@@ -354,11 +397,11 @@ static int fs_path_prepare_for_add(struct fs_path *p, int name_len) | |||
354 | if (p->start != p->end) | 397 | if (p->start != p->end) |
355 | *--p->start = '/'; | 398 | *--p->start = '/'; |
356 | p->start -= name_len; | 399 | p->start -= name_len; |
357 | p->prepared = p->start; | 400 | *prepared = p->start; |
358 | } else { | 401 | } else { |
359 | if (p->start != p->end) | 402 | if (p->start != p->end) |
360 | *p->end++ = '/'; | 403 | *p->end++ = '/'; |
361 | p->prepared = p->end; | 404 | *prepared = p->end; |
362 | p->end += name_len; | 405 | p->end += name_len; |
363 | *p->end = 0; | 406 | *p->end = 0; |
364 | } | 407 | } |
@@ -370,12 +413,12 @@ out: | |||
370 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) | 413 | static int fs_path_add(struct fs_path *p, const char *name, int name_len) |
371 | { | 414 | { |
372 | int ret; | 415 | int ret; |
416 | char *prepared; | ||
373 | 417 | ||
374 | ret = fs_path_prepare_for_add(p, name_len); | 418 | ret = fs_path_prepare_for_add(p, name_len, &prepared); |
375 | if (ret < 0) | 419 | if (ret < 0) |
376 | goto out; | 420 | goto out; |
377 | memcpy(p->prepared, name, name_len); | 421 | memcpy(prepared, name, name_len); |
378 | p->prepared = NULL; | ||
379 | 422 | ||
380 | out: | 423 | out: |
381 | return ret; | 424 | return ret; |
@@ -384,12 +427,12 @@ out: | |||
384 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) | 427 | static int fs_path_add_path(struct fs_path *p, struct fs_path *p2) |
385 | { | 428 | { |
386 | int ret; | 429 | int ret; |
430 | char *prepared; | ||
387 | 431 | ||
388 | ret = fs_path_prepare_for_add(p, p2->end - p2->start); | 432 | ret = fs_path_prepare_for_add(p, p2->end - p2->start, &prepared); |
389 | if (ret < 0) | 433 | if (ret < 0) |
390 | goto out; | 434 | goto out; |
391 | memcpy(p->prepared, p2->start, p2->end - p2->start); | 435 | memcpy(prepared, p2->start, p2->end - p2->start); |
392 | p->prepared = NULL; | ||
393 | 436 | ||
394 | out: | 437 | out: |
395 | return ret; | 438 | return ret; |
@@ -400,13 +443,13 @@ static int fs_path_add_from_extent_buffer(struct fs_path *p, | |||
400 | unsigned long off, int len) | 443 | unsigned long off, int len) |
401 | { | 444 | { |
402 | int ret; | 445 | int ret; |
446 | char *prepared; | ||
403 | 447 | ||
404 | ret = fs_path_prepare_for_add(p, len); | 448 | ret = fs_path_prepare_for_add(p, len, &prepared); |
405 | if (ret < 0) | 449 | if (ret < 0) |
406 | goto out; | 450 | goto out; |
407 | 451 | ||
408 | read_extent_buffer(eb, p->prepared, off, len); | 452 | read_extent_buffer(eb, prepared, off, len); |
409 | p->prepared = NULL; | ||
410 | 453 | ||
411 | out: | 454 | out: |
412 | return ret; | 455 | return ret; |
@@ -915,9 +958,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
915 | struct btrfs_dir_item *di; | 958 | struct btrfs_dir_item *di; |
916 | struct btrfs_key di_key; | 959 | struct btrfs_key di_key; |
917 | char *buf = NULL; | 960 | char *buf = NULL; |
918 | char *buf2 = NULL; | 961 | const int buf_len = PATH_MAX; |
919 | int buf_len; | ||
920 | int buf_virtual = 0; | ||
921 | u32 name_len; | 962 | u32 name_len; |
922 | u32 data_len; | 963 | u32 data_len; |
923 | u32 cur; | 964 | u32 cur; |
@@ -927,7 +968,6 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
927 | int num; | 968 | int num; |
928 | u8 type; | 969 | u8 type; |
929 | 970 | ||
930 | buf_len = PAGE_SIZE; | ||
931 | buf = kmalloc(buf_len, GFP_NOFS); | 971 | buf = kmalloc(buf_len, GFP_NOFS); |
932 | if (!buf) { | 972 | if (!buf) { |
933 | ret = -ENOMEM; | 973 | ret = -ENOMEM; |
@@ -949,30 +989,12 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
949 | type = btrfs_dir_type(eb, di); | 989 | type = btrfs_dir_type(eb, di); |
950 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); | 990 | btrfs_dir_item_key_to_cpu(eb, di, &di_key); |
951 | 991 | ||
992 | /* | ||
993 | * Path too long | ||
994 | */ | ||
952 | if (name_len + data_len > buf_len) { | 995 | if (name_len + data_len > buf_len) { |
953 | buf_len = PAGE_ALIGN(name_len + data_len); | 996 | ret = -ENAMETOOLONG; |
954 | if (buf_virtual) { | 997 | goto out; |
955 | buf2 = vmalloc(buf_len); | ||
956 | if (!buf2) { | ||
957 | ret = -ENOMEM; | ||
958 | goto out; | ||
959 | } | ||
960 | vfree(buf); | ||
961 | } else { | ||
962 | buf2 = krealloc(buf, buf_len, GFP_NOFS); | ||
963 | if (!buf2) { | ||
964 | buf2 = vmalloc(buf_len); | ||
965 | if (!buf2) { | ||
966 | ret = -ENOMEM; | ||
967 | goto out; | ||
968 | } | ||
969 | kfree(buf); | ||
970 | buf_virtual = 1; | ||
971 | } | ||
972 | } | ||
973 | |||
974 | buf = buf2; | ||
975 | buf2 = NULL; | ||
976 | } | 998 | } |
977 | 999 | ||
978 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), | 1000 | read_extent_buffer(eb, buf, (unsigned long)(di + 1), |
@@ -995,10 +1017,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path, | |||
995 | } | 1017 | } |
996 | 1018 | ||
997 | out: | 1019 | out: |
998 | if (buf_virtual) | 1020 | kfree(buf); |
999 | vfree(buf); | ||
1000 | else | ||
1001 | kfree(buf); | ||
1002 | return ret; | 1021 | return ret; |
1003 | } | 1022 | } |
1004 | 1023 | ||
@@ -1292,8 +1311,6 @@ static int find_extent_clone(struct send_ctx *sctx, | |||
1292 | extent_item_pos = logical - found_key.objectid; | 1311 | extent_item_pos = logical - found_key.objectid; |
1293 | else | 1312 | else |
1294 | extent_item_pos = 0; | 1313 | extent_item_pos = 0; |
1295 | |||
1296 | extent_item_pos = logical - found_key.objectid; | ||
1297 | ret = iterate_extent_inodes(sctx->send_root->fs_info, | 1314 | ret = iterate_extent_inodes(sctx->send_root->fs_info, |
1298 | found_key.objectid, extent_item_pos, 1, | 1315 | found_key.objectid, extent_item_pos, 1, |
1299 | __iterate_backrefs, backref_ctx); | 1316 | __iterate_backrefs, backref_ctx); |
@@ -1418,11 +1435,7 @@ static int gen_unique_name(struct send_ctx *sctx, | |||
1418 | while (1) { | 1435 | while (1) { |
1419 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", | 1436 | len = snprintf(tmp, sizeof(tmp), "o%llu-%llu-%llu", |
1420 | ino, gen, idx); | 1437 | ino, gen, idx); |
1421 | if (len >= sizeof(tmp)) { | 1438 | ASSERT(len < sizeof(tmp)); |
1422 | /* should really not happen */ | ||
1423 | ret = -EOVERFLOW; | ||
1424 | goto out; | ||
1425 | } | ||
1426 | 1439 | ||
1427 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, | 1440 | di = btrfs_lookup_dir_item(NULL, sctx->send_root, |
1428 | path, BTRFS_FIRST_FREE_OBJECTID, | 1441 | path, BTRFS_FIRST_FREE_OBJECTID, |
@@ -1898,13 +1911,20 @@ static void name_cache_delete(struct send_ctx *sctx, | |||
1898 | 1911 | ||
1899 | nce_head = radix_tree_lookup(&sctx->name_cache, | 1912 | nce_head = radix_tree_lookup(&sctx->name_cache, |
1900 | (unsigned long)nce->ino); | 1913 | (unsigned long)nce->ino); |
1901 | BUG_ON(!nce_head); | 1914 | if (!nce_head) { |
1915 | btrfs_err(sctx->send_root->fs_info, | ||
1916 | "name_cache_delete lookup failed ino %llu cache size %d, leaking memory", | ||
1917 | nce->ino, sctx->name_cache_size); | ||
1918 | } | ||
1902 | 1919 | ||
1903 | list_del(&nce->radix_list); | 1920 | list_del(&nce->radix_list); |
1904 | list_del(&nce->list); | 1921 | list_del(&nce->list); |
1905 | sctx->name_cache_size--; | 1922 | sctx->name_cache_size--; |
1906 | 1923 | ||
1907 | if (list_empty(nce_head)) { | 1924 | /* |
1925 | * We may not get to the final release of nce_head if the lookup fails | ||
1926 | */ | ||
1927 | if (nce_head && list_empty(nce_head)) { | ||
1908 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); | 1928 | radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino); |
1909 | kfree(nce_head); | 1929 | kfree(nce_head); |
1910 | } | 1930 | } |
@@ -1977,7 +1997,6 @@ static void name_cache_free(struct send_ctx *sctx) | |||
1977 | */ | 1997 | */ |
1978 | static int __get_cur_name_and_parent(struct send_ctx *sctx, | 1998 | static int __get_cur_name_and_parent(struct send_ctx *sctx, |
1979 | u64 ino, u64 gen, | 1999 | u64 ino, u64 gen, |
1980 | int skip_name_cache, | ||
1981 | u64 *parent_ino, | 2000 | u64 *parent_ino, |
1982 | u64 *parent_gen, | 2001 | u64 *parent_gen, |
1983 | struct fs_path *dest) | 2002 | struct fs_path *dest) |
@@ -1987,8 +2006,6 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
1987 | struct btrfs_path *path = NULL; | 2006 | struct btrfs_path *path = NULL; |
1988 | struct name_cache_entry *nce = NULL; | 2007 | struct name_cache_entry *nce = NULL; |
1989 | 2008 | ||
1990 | if (skip_name_cache) | ||
1991 | goto get_ref; | ||
1992 | /* | 2009 | /* |
1993 | * First check if we already did a call to this function with the same | 2010 | * First check if we already did a call to this function with the same |
1994 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes | 2011 | * ino/gen. If yes, check if the cache entry is still up-to-date. If yes |
@@ -2033,12 +2050,11 @@ static int __get_cur_name_and_parent(struct send_ctx *sctx, | |||
2033 | goto out_cache; | 2050 | goto out_cache; |
2034 | } | 2051 | } |
2035 | 2052 | ||
2036 | get_ref: | ||
2037 | /* | 2053 | /* |
2038 | * Depending on whether the inode was already processed or not, use | 2054 | * Depending on whether the inode was already processed or not, use |
2039 | * send_root or parent_root for ref lookup. | 2055 | * send_root or parent_root for ref lookup. |
2040 | */ | 2056 | */ |
2041 | if (ino < sctx->send_progress && !skip_name_cache) | 2057 | if (ino < sctx->send_progress) |
2042 | ret = get_first_ref(sctx->send_root, ino, | 2058 | ret = get_first_ref(sctx->send_root, ino, |
2043 | parent_ino, parent_gen, dest); | 2059 | parent_ino, parent_gen, dest); |
2044 | else | 2060 | else |
@@ -2062,8 +2078,6 @@ get_ref: | |||
2062 | goto out; | 2078 | goto out; |
2063 | ret = 1; | 2079 | ret = 1; |
2064 | } | 2080 | } |
2065 | if (skip_name_cache) | ||
2066 | goto out; | ||
2067 | 2081 | ||
2068 | out_cache: | 2082 | out_cache: |
2069 | /* | 2083 | /* |
@@ -2131,9 +2145,6 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2131 | u64 parent_inode = 0; | 2145 | u64 parent_inode = 0; |
2132 | u64 parent_gen = 0; | 2146 | u64 parent_gen = 0; |
2133 | int stop = 0; | 2147 | int stop = 0; |
2134 | u64 start_ino = ino; | ||
2135 | u64 start_gen = gen; | ||
2136 | int skip_name_cache = 0; | ||
2137 | 2148 | ||
2138 | name = fs_path_alloc(); | 2149 | name = fs_path_alloc(); |
2139 | if (!name) { | 2150 | if (!name) { |
@@ -2141,31 +2152,33 @@ static int get_cur_path(struct send_ctx *sctx, u64 ino, u64 gen, | |||
2141 | goto out; | 2152 | goto out; |
2142 | } | 2153 | } |
2143 | 2154 | ||
2144 | if (is_waiting_for_move(sctx, ino)) | ||
2145 | skip_name_cache = 1; | ||
2146 | |||
2147 | again: | ||
2148 | dest->reversed = 1; | 2155 | dest->reversed = 1; |
2149 | fs_path_reset(dest); | 2156 | fs_path_reset(dest); |
2150 | 2157 | ||
2151 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { | 2158 | while (!stop && ino != BTRFS_FIRST_FREE_OBJECTID) { |
2152 | fs_path_reset(name); | 2159 | fs_path_reset(name); |
2153 | 2160 | ||
2154 | ret = __get_cur_name_and_parent(sctx, ino, gen, skip_name_cache, | 2161 | if (is_waiting_for_rm(sctx, ino)) { |
2155 | &parent_inode, &parent_gen, name); | 2162 | ret = gen_unique_name(sctx, ino, gen, name); |
2163 | if (ret < 0) | ||
2164 | goto out; | ||
2165 | ret = fs_path_add_path(dest, name); | ||
2166 | break; | ||
2167 | } | ||
2168 | |||
2169 | if (is_waiting_for_move(sctx, ino)) { | ||
2170 | ret = get_first_ref(sctx->parent_root, ino, | ||
2171 | &parent_inode, &parent_gen, name); | ||
2172 | } else { | ||
2173 | ret = __get_cur_name_and_parent(sctx, ino, gen, | ||
2174 | &parent_inode, | ||
2175 | &parent_gen, name); | ||
2176 | if (ret) | ||
2177 | stop = 1; | ||
2178 | } | ||
2179 | |||
2156 | if (ret < 0) | 2180 | if (ret < 0) |
2157 | goto out; | 2181 | goto out; |
2158 | if (ret) | ||
2159 | stop = 1; | ||
2160 | |||
2161 | if (!skip_name_cache && | ||
2162 | is_waiting_for_move(sctx, parent_inode)) { | ||
2163 | ino = start_ino; | ||
2164 | gen = start_gen; | ||
2165 | stop = 0; | ||
2166 | skip_name_cache = 1; | ||
2167 | goto again; | ||
2168 | } | ||
2169 | 2182 | ||
2170 | ret = fs_path_add_path(dest, name); | 2183 | ret = fs_path_add_path(dest, name); |
2171 | if (ret < 0) | 2184 | if (ret < 0) |
@@ -2429,10 +2442,16 @@ verbose_printk("btrfs: send_create_inode %llu\n", ino); | |||
2429 | if (!p) | 2442 | if (!p) |
2430 | return -ENOMEM; | 2443 | return -ENOMEM; |
2431 | 2444 | ||
2432 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, NULL, | 2445 | if (ino != sctx->cur_ino) { |
2433 | NULL, &rdev); | 2446 | ret = get_inode_info(sctx->send_root, ino, NULL, &gen, &mode, |
2434 | if (ret < 0) | 2447 | NULL, NULL, &rdev); |
2435 | goto out; | 2448 | if (ret < 0) |
2449 | goto out; | ||
2450 | } else { | ||
2451 | gen = sctx->cur_inode_gen; | ||
2452 | mode = sctx->cur_inode_mode; | ||
2453 | rdev = sctx->cur_inode_rdev; | ||
2454 | } | ||
2436 | 2455 | ||
2437 | if (S_ISREG(mode)) { | 2456 | if (S_ISREG(mode)) { |
2438 | cmd = BTRFS_SEND_C_MKFILE; | 2457 | cmd = BTRFS_SEND_C_MKFILE; |
@@ -2512,17 +2531,26 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
2512 | key.objectid = dir; | 2531 | key.objectid = dir; |
2513 | key.type = BTRFS_DIR_INDEX_KEY; | 2532 | key.type = BTRFS_DIR_INDEX_KEY; |
2514 | key.offset = 0; | 2533 | key.offset = 0; |
2534 | ret = btrfs_search_slot(NULL, sctx->send_root, &key, path, 0, 0); | ||
2535 | if (ret < 0) | ||
2536 | goto out; | ||
2537 | |||
2515 | while (1) { | 2538 | while (1) { |
2516 | ret = btrfs_search_slot_for_read(sctx->send_root, &key, path, | 2539 | eb = path->nodes[0]; |
2517 | 1, 0); | 2540 | slot = path->slots[0]; |
2518 | if (ret < 0) | 2541 | if (slot >= btrfs_header_nritems(eb)) { |
2519 | goto out; | 2542 | ret = btrfs_next_leaf(sctx->send_root, path); |
2520 | if (!ret) { | 2543 | if (ret < 0) { |
2521 | eb = path->nodes[0]; | 2544 | goto out; |
2522 | slot = path->slots[0]; | 2545 | } else if (ret > 0) { |
2523 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 2546 | ret = 0; |
2547 | break; | ||
2548 | } | ||
2549 | continue; | ||
2524 | } | 2550 | } |
2525 | if (ret || found_key.objectid != key.objectid || | 2551 | |
2552 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
2553 | if (found_key.objectid != key.objectid || | ||
2526 | found_key.type != key.type) { | 2554 | found_key.type != key.type) { |
2527 | ret = 0; | 2555 | ret = 0; |
2528 | goto out; | 2556 | goto out; |
@@ -2537,8 +2565,7 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) | |||
2537 | goto out; | 2565 | goto out; |
2538 | } | 2566 | } |
2539 | 2567 | ||
2540 | key.offset = found_key.offset + 1; | 2568 | path->slots[0]++; |
2541 | btrfs_release_path(path); | ||
2542 | } | 2569 | } |
2543 | 2570 | ||
2544 | out: | 2571 | out: |
@@ -2590,7 +2617,7 @@ struct recorded_ref { | |||
2590 | * everything mixed. So we first record all refs and later process them. | 2617 | * everything mixed. So we first record all refs and later process them. |
2591 | * This function is a helper to record one ref. | 2618 | * This function is a helper to record one ref. |
2592 | */ | 2619 | */ |
2593 | static int record_ref(struct list_head *head, u64 dir, | 2620 | static int __record_ref(struct list_head *head, u64 dir, |
2594 | u64 dir_gen, struct fs_path *path) | 2621 | u64 dir_gen, struct fs_path *path) |
2595 | { | 2622 | { |
2596 | struct recorded_ref *ref; | 2623 | struct recorded_ref *ref; |
@@ -2676,12 +2703,78 @@ out: | |||
2676 | return ret; | 2703 | return ret; |
2677 | } | 2704 | } |
2678 | 2705 | ||
2706 | static struct orphan_dir_info * | ||
2707 | add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
2708 | { | ||
2709 | struct rb_node **p = &sctx->orphan_dirs.rb_node; | ||
2710 | struct rb_node *parent = NULL; | ||
2711 | struct orphan_dir_info *entry, *odi; | ||
2712 | |||
2713 | odi = kmalloc(sizeof(*odi), GFP_NOFS); | ||
2714 | if (!odi) | ||
2715 | return ERR_PTR(-ENOMEM); | ||
2716 | odi->ino = dir_ino; | ||
2717 | odi->gen = 0; | ||
2718 | |||
2719 | while (*p) { | ||
2720 | parent = *p; | ||
2721 | entry = rb_entry(parent, struct orphan_dir_info, node); | ||
2722 | if (dir_ino < entry->ino) { | ||
2723 | p = &(*p)->rb_left; | ||
2724 | } else if (dir_ino > entry->ino) { | ||
2725 | p = &(*p)->rb_right; | ||
2726 | } else { | ||
2727 | kfree(odi); | ||
2728 | return entry; | ||
2729 | } | ||
2730 | } | ||
2731 | |||
2732 | rb_link_node(&odi->node, parent, p); | ||
2733 | rb_insert_color(&odi->node, &sctx->orphan_dirs); | ||
2734 | return odi; | ||
2735 | } | ||
2736 | |||
2737 | static struct orphan_dir_info * | ||
2738 | get_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino) | ||
2739 | { | ||
2740 | struct rb_node *n = sctx->orphan_dirs.rb_node; | ||
2741 | struct orphan_dir_info *entry; | ||
2742 | |||
2743 | while (n) { | ||
2744 | entry = rb_entry(n, struct orphan_dir_info, node); | ||
2745 | if (dir_ino < entry->ino) | ||
2746 | n = n->rb_left; | ||
2747 | else if (dir_ino > entry->ino) | ||
2748 | n = n->rb_right; | ||
2749 | else | ||
2750 | return entry; | ||
2751 | } | ||
2752 | return NULL; | ||
2753 | } | ||
2754 | |||
2755 | static int is_waiting_for_rm(struct send_ctx *sctx, u64 dir_ino) | ||
2756 | { | ||
2757 | struct orphan_dir_info *odi = get_orphan_dir_info(sctx, dir_ino); | ||
2758 | |||
2759 | return odi != NULL; | ||
2760 | } | ||
2761 | |||
2762 | static void free_orphan_dir_info(struct send_ctx *sctx, | ||
2763 | struct orphan_dir_info *odi) | ||
2764 | { | ||
2765 | if (!odi) | ||
2766 | return; | ||
2767 | rb_erase(&odi->node, &sctx->orphan_dirs); | ||
2768 | kfree(odi); | ||
2769 | } | ||
2770 | |||
2679 | /* | 2771 | /* |
2680 | * Returns 1 if a directory can be removed at this point in time. | 2772 | * Returns 1 if a directory can be removed at this point in time. |
2681 | * We check this by iterating all dir items and checking if the inode behind | 2773 | * We check this by iterating all dir items and checking if the inode behind |
2682 | * the dir item was already processed. | 2774 | * the dir item was already processed. |
2683 | */ | 2775 | */ |
2684 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | 2776 | static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 dir_gen, |
2777 | u64 send_progress) | ||
2685 | { | 2778 | { |
2686 | int ret = 0; | 2779 | int ret = 0; |
2687 | struct btrfs_root *root = sctx->parent_root; | 2780 | struct btrfs_root *root = sctx->parent_root; |
@@ -2704,31 +2797,52 @@ static int can_rmdir(struct send_ctx *sctx, u64 dir, u64 send_progress) | |||
2704 | key.objectid = dir; | 2797 | key.objectid = dir; |
2705 | key.type = BTRFS_DIR_INDEX_KEY; | 2798 | key.type = BTRFS_DIR_INDEX_KEY; |
2706 | key.offset = 0; | 2799 | key.offset = 0; |
2800 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
2801 | if (ret < 0) | ||
2802 | goto out; | ||
2707 | 2803 | ||
2708 | while (1) { | 2804 | while (1) { |
2709 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 2805 | struct waiting_dir_move *dm; |
2710 | if (ret < 0) | 2806 | |
2711 | goto out; | 2807 | if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { |
2712 | if (!ret) { | 2808 | ret = btrfs_next_leaf(root, path); |
2713 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | 2809 | if (ret < 0) |
2714 | path->slots[0]); | 2810 | goto out; |
2811 | else if (ret > 0) | ||
2812 | break; | ||
2813 | continue; | ||
2715 | } | 2814 | } |
2716 | if (ret || found_key.objectid != key.objectid || | 2815 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, |
2717 | found_key.type != key.type) { | 2816 | path->slots[0]); |
2817 | if (found_key.objectid != key.objectid || | ||
2818 | found_key.type != key.type) | ||
2718 | break; | 2819 | break; |
2719 | } | ||
2720 | 2820 | ||
2721 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2821 | di = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2722 | struct btrfs_dir_item); | 2822 | struct btrfs_dir_item); |
2723 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); | 2823 | btrfs_dir_item_key_to_cpu(path->nodes[0], di, &loc); |
2724 | 2824 | ||
2825 | dm = get_waiting_dir_move(sctx, loc.objectid); | ||
2826 | if (dm) { | ||
2827 | struct orphan_dir_info *odi; | ||
2828 | |||
2829 | odi = add_orphan_dir_info(sctx, dir); | ||
2830 | if (IS_ERR(odi)) { | ||
2831 | ret = PTR_ERR(odi); | ||
2832 | goto out; | ||
2833 | } | ||
2834 | odi->gen = dir_gen; | ||
2835 | dm->rmdir_ino = dir; | ||
2836 | ret = 0; | ||
2837 | goto out; | ||
2838 | } | ||
2839 | |||
2725 | if (loc.objectid > send_progress) { | 2840 | if (loc.objectid > send_progress) { |
2726 | ret = 0; | 2841 | ret = 0; |
2727 | goto out; | 2842 | goto out; |
2728 | } | 2843 | } |
2729 | 2844 | ||
2730 | btrfs_release_path(path); | 2845 | path->slots[0]++; |
2731 | key.offset = found_key.offset + 1; | ||
2732 | } | 2846 | } |
2733 | 2847 | ||
2734 | ret = 1; | 2848 | ret = 1; |
@@ -2740,19 +2854,9 @@ out: | |||
2740 | 2854 | ||
2741 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) | 2855 | static int is_waiting_for_move(struct send_ctx *sctx, u64 ino) |
2742 | { | 2856 | { |
2743 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2857 | struct waiting_dir_move *entry = get_waiting_dir_move(sctx, ino); |
2744 | struct waiting_dir_move *entry; | ||
2745 | 2858 | ||
2746 | while (n) { | 2859 | return entry != NULL; |
2747 | entry = rb_entry(n, struct waiting_dir_move, node); | ||
2748 | if (ino < entry->ino) | ||
2749 | n = n->rb_left; | ||
2750 | else if (ino > entry->ino) | ||
2751 | n = n->rb_right; | ||
2752 | else | ||
2753 | return 1; | ||
2754 | } | ||
2755 | return 0; | ||
2756 | } | 2860 | } |
2757 | 2861 | ||
2758 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2862 | static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) |
@@ -2765,6 +2869,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
2765 | if (!dm) | 2869 | if (!dm) |
2766 | return -ENOMEM; | 2870 | return -ENOMEM; |
2767 | dm->ino = ino; | 2871 | dm->ino = ino; |
2872 | dm->rmdir_ino = 0; | ||
2768 | 2873 | ||
2769 | while (*p) { | 2874 | while (*p) { |
2770 | parent = *p; | 2875 | parent = *p; |
@@ -2784,31 +2889,41 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino) | |||
2784 | return 0; | 2889 | return 0; |
2785 | } | 2890 | } |
2786 | 2891 | ||
2787 | static int del_waiting_dir_move(struct send_ctx *sctx, u64 ino) | 2892 | static struct waiting_dir_move * |
2893 | get_waiting_dir_move(struct send_ctx *sctx, u64 ino) | ||
2788 | { | 2894 | { |
2789 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; | 2895 | struct rb_node *n = sctx->waiting_dir_moves.rb_node; |
2790 | struct waiting_dir_move *entry; | 2896 | struct waiting_dir_move *entry; |
2791 | 2897 | ||
2792 | while (n) { | 2898 | while (n) { |
2793 | entry = rb_entry(n, struct waiting_dir_move, node); | 2899 | entry = rb_entry(n, struct waiting_dir_move, node); |
2794 | if (ino < entry->ino) { | 2900 | if (ino < entry->ino) |
2795 | n = n->rb_left; | 2901 | n = n->rb_left; |
2796 | } else if (ino > entry->ino) { | 2902 | else if (ino > entry->ino) |
2797 | n = n->rb_right; | 2903 | n = n->rb_right; |
2798 | } else { | 2904 | else |
2799 | rb_erase(&entry->node, &sctx->waiting_dir_moves); | 2905 | return entry; |
2800 | kfree(entry); | ||
2801 | return 0; | ||
2802 | } | ||
2803 | } | 2906 | } |
2804 | return -ENOENT; | 2907 | return NULL; |
2908 | } | ||
2909 | |||
2910 | static void free_waiting_dir_move(struct send_ctx *sctx, | ||
2911 | struct waiting_dir_move *dm) | ||
2912 | { | ||
2913 | if (!dm) | ||
2914 | return; | ||
2915 | rb_erase(&dm->node, &sctx->waiting_dir_moves); | ||
2916 | kfree(dm); | ||
2805 | } | 2917 | } |
2806 | 2918 | ||
2807 | static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | 2919 | static int add_pending_dir_move(struct send_ctx *sctx, |
2920 | u64 ino, | ||
2921 | u64 ino_gen, | ||
2922 | u64 parent_ino) | ||
2808 | { | 2923 | { |
2809 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; | 2924 | struct rb_node **p = &sctx->pending_dir_moves.rb_node; |
2810 | struct rb_node *parent = NULL; | 2925 | struct rb_node *parent = NULL; |
2811 | struct pending_dir_move *entry, *pm; | 2926 | struct pending_dir_move *entry = NULL, *pm; |
2812 | struct recorded_ref *cur; | 2927 | struct recorded_ref *cur; |
2813 | int exists = 0; | 2928 | int exists = 0; |
2814 | int ret; | 2929 | int ret; |
@@ -2817,8 +2932,8 @@ static int add_pending_dir_move(struct send_ctx *sctx, u64 parent_ino) | |||
2817 | if (!pm) | 2932 | if (!pm) |
2818 | return -ENOMEM; | 2933 | return -ENOMEM; |
2819 | pm->parent_ino = parent_ino; | 2934 | pm->parent_ino = parent_ino; |
2820 | pm->ino = sctx->cur_ino; | 2935 | pm->ino = ino; |
2821 | pm->gen = sctx->cur_inode_gen; | 2936 | pm->gen = ino_gen; |
2822 | INIT_LIST_HEAD(&pm->list); | 2937 | INIT_LIST_HEAD(&pm->list); |
2823 | INIT_LIST_HEAD(&pm->update_refs); | 2938 | INIT_LIST_HEAD(&pm->update_refs); |
2824 | RB_CLEAR_NODE(&pm->node); | 2939 | RB_CLEAR_NODE(&pm->node); |
@@ -2888,19 +3003,52 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2888 | { | 3003 | { |
2889 | struct fs_path *from_path = NULL; | 3004 | struct fs_path *from_path = NULL; |
2890 | struct fs_path *to_path = NULL; | 3005 | struct fs_path *to_path = NULL; |
3006 | struct fs_path *name = NULL; | ||
2891 | u64 orig_progress = sctx->send_progress; | 3007 | u64 orig_progress = sctx->send_progress; |
2892 | struct recorded_ref *cur; | 3008 | struct recorded_ref *cur; |
3009 | u64 parent_ino, parent_gen; | ||
3010 | struct waiting_dir_move *dm = NULL; | ||
3011 | u64 rmdir_ino = 0; | ||
2893 | int ret; | 3012 | int ret; |
2894 | 3013 | ||
3014 | name = fs_path_alloc(); | ||
2895 | from_path = fs_path_alloc(); | 3015 | from_path = fs_path_alloc(); |
2896 | if (!from_path) | 3016 | if (!name || !from_path) { |
2897 | return -ENOMEM; | 3017 | ret = -ENOMEM; |
3018 | goto out; | ||
3019 | } | ||
2898 | 3020 | ||
2899 | sctx->send_progress = pm->ino; | 3021 | dm = get_waiting_dir_move(sctx, pm->ino); |
2900 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | 3022 | ASSERT(dm); |
3023 | rmdir_ino = dm->rmdir_ino; | ||
3024 | free_waiting_dir_move(sctx, dm); | ||
3025 | |||
3026 | ret = get_first_ref(sctx->parent_root, pm->ino, | ||
3027 | &parent_ino, &parent_gen, name); | ||
2901 | if (ret < 0) | 3028 | if (ret < 0) |
2902 | goto out; | 3029 | goto out; |
2903 | 3030 | ||
3031 | if (parent_ino == sctx->cur_ino) { | ||
3032 | /* child only renamed, not moved */ | ||
3033 | ASSERT(parent_gen == sctx->cur_inode_gen); | ||
3034 | ret = get_cur_path(sctx, sctx->cur_ino, sctx->cur_inode_gen, | ||
3035 | from_path); | ||
3036 | if (ret < 0) | ||
3037 | goto out; | ||
3038 | ret = fs_path_add_path(from_path, name); | ||
3039 | if (ret < 0) | ||
3040 | goto out; | ||
3041 | } else { | ||
3042 | /* child moved and maybe renamed too */ | ||
3043 | sctx->send_progress = pm->ino; | ||
3044 | ret = get_cur_path(sctx, pm->ino, pm->gen, from_path); | ||
3045 | if (ret < 0) | ||
3046 | goto out; | ||
3047 | } | ||
3048 | |||
3049 | fs_path_free(name); | ||
3050 | name = NULL; | ||
3051 | |||
2904 | to_path = fs_path_alloc(); | 3052 | to_path = fs_path_alloc(); |
2905 | if (!to_path) { | 3053 | if (!to_path) { |
2906 | ret = -ENOMEM; | 3054 | ret = -ENOMEM; |
@@ -2908,9 +3056,6 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2908 | } | 3056 | } |
2909 | 3057 | ||
2910 | sctx->send_progress = sctx->cur_ino + 1; | 3058 | sctx->send_progress = sctx->cur_ino + 1; |
2911 | ret = del_waiting_dir_move(sctx, pm->ino); | ||
2912 | ASSERT(ret == 0); | ||
2913 | |||
2914 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); | 3059 | ret = get_cur_path(sctx, pm->ino, pm->gen, to_path); |
2915 | if (ret < 0) | 3060 | if (ret < 0) |
2916 | goto out; | 3061 | goto out; |
@@ -2919,6 +3064,35 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2919 | if (ret < 0) | 3064 | if (ret < 0) |
2920 | goto out; | 3065 | goto out; |
2921 | 3066 | ||
3067 | if (rmdir_ino) { | ||
3068 | struct orphan_dir_info *odi; | ||
3069 | |||
3070 | odi = get_orphan_dir_info(sctx, rmdir_ino); | ||
3071 | if (!odi) { | ||
3072 | /* already deleted */ | ||
3073 | goto finish; | ||
3074 | } | ||
3075 | ret = can_rmdir(sctx, rmdir_ino, odi->gen, sctx->cur_ino + 1); | ||
3076 | if (ret < 0) | ||
3077 | goto out; | ||
3078 | if (!ret) | ||
3079 | goto finish; | ||
3080 | |||
3081 | name = fs_path_alloc(); | ||
3082 | if (!name) { | ||
3083 | ret = -ENOMEM; | ||
3084 | goto out; | ||
3085 | } | ||
3086 | ret = get_cur_path(sctx, rmdir_ino, odi->gen, name); | ||
3087 | if (ret < 0) | ||
3088 | goto out; | ||
3089 | ret = send_rmdir(sctx, name); | ||
3090 | if (ret < 0) | ||
3091 | goto out; | ||
3092 | free_orphan_dir_info(sctx, odi); | ||
3093 | } | ||
3094 | |||
3095 | finish: | ||
2922 | ret = send_utimes(sctx, pm->ino, pm->gen); | 3096 | ret = send_utimes(sctx, pm->ino, pm->gen); |
2923 | if (ret < 0) | 3097 | if (ret < 0) |
2924 | goto out; | 3098 | goto out; |
@@ -2928,12 +3102,15 @@ static int apply_dir_move(struct send_ctx *sctx, struct pending_dir_move *pm) | |||
2928 | * and old parent(s). | 3102 | * and old parent(s). |
2929 | */ | 3103 | */ |
2930 | list_for_each_entry(cur, &pm->update_refs, list) { | 3104 | list_for_each_entry(cur, &pm->update_refs, list) { |
3105 | if (cur->dir == rmdir_ino) | ||
3106 | continue; | ||
2931 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3107 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
2932 | if (ret < 0) | 3108 | if (ret < 0) |
2933 | goto out; | 3109 | goto out; |
2934 | } | 3110 | } |
2935 | 3111 | ||
2936 | out: | 3112 | out: |
3113 | fs_path_free(name); | ||
2937 | fs_path_free(from_path); | 3114 | fs_path_free(from_path); |
2938 | fs_path_free(to_path); | 3115 | fs_path_free(to_path); |
2939 | sctx->send_progress = orig_progress; | 3116 | sctx->send_progress = orig_progress; |
@@ -3005,17 +3182,19 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3005 | int ret; | 3182 | int ret; |
3006 | u64 ino = parent_ref->dir; | 3183 | u64 ino = parent_ref->dir; |
3007 | u64 parent_ino_before, parent_ino_after; | 3184 | u64 parent_ino_before, parent_ino_after; |
3008 | u64 new_gen, old_gen; | 3185 | u64 old_gen; |
3009 | struct fs_path *path_before = NULL; | 3186 | struct fs_path *path_before = NULL; |
3010 | struct fs_path *path_after = NULL; | 3187 | struct fs_path *path_after = NULL; |
3011 | int len1, len2; | 3188 | int len1, len2; |
3012 | 3189 | int register_upper_dirs; | |
3013 | if (parent_ref->dir <= sctx->cur_ino) | 3190 | u64 gen; |
3014 | return 0; | ||
3015 | 3191 | ||
3016 | if (is_waiting_for_move(sctx, ino)) | 3192 | if (is_waiting_for_move(sctx, ino)) |
3017 | return 1; | 3193 | return 1; |
3018 | 3194 | ||
3195 | if (parent_ref->dir <= sctx->cur_ino) | ||
3196 | return 0; | ||
3197 | |||
3019 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, | 3198 | ret = get_inode_info(sctx->parent_root, ino, NULL, &old_gen, |
3020 | NULL, NULL, NULL, NULL); | 3199 | NULL, NULL, NULL, NULL); |
3021 | if (ret == -ENOENT) | 3200 | if (ret == -ENOENT) |
@@ -3023,12 +3202,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3023 | else if (ret < 0) | 3202 | else if (ret < 0) |
3024 | return ret; | 3203 | return ret; |
3025 | 3204 | ||
3026 | ret = get_inode_info(sctx->send_root, ino, NULL, &new_gen, | 3205 | if (parent_ref->dir_gen != old_gen) |
3027 | NULL, NULL, NULL, NULL); | ||
3028 | if (ret < 0) | ||
3029 | return ret; | ||
3030 | |||
3031 | if (new_gen != old_gen) | ||
3032 | return 0; | 3206 | return 0; |
3033 | 3207 | ||
3034 | path_before = fs_path_alloc(); | 3208 | path_before = fs_path_alloc(); |
@@ -3051,7 +3225,7 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3051 | } | 3225 | } |
3052 | 3226 | ||
3053 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | 3227 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, |
3054 | NULL, path_after); | 3228 | &gen, path_after); |
3055 | if (ret == -ENOENT) { | 3229 | if (ret == -ENOENT) { |
3056 | ret = 0; | 3230 | ret = 0; |
3057 | goto out; | 3231 | goto out; |
@@ -3061,13 +3235,67 @@ static int wait_for_parent_move(struct send_ctx *sctx, | |||
3061 | 3235 | ||
3062 | len1 = fs_path_len(path_before); | 3236 | len1 = fs_path_len(path_before); |
3063 | len2 = fs_path_len(path_after); | 3237 | len2 = fs_path_len(path_after); |
3064 | if ((parent_ino_before != parent_ino_after) && (len1 != len2 || | 3238 | if (parent_ino_before != parent_ino_after || len1 != len2 || |
3065 | memcmp(path_before->start, path_after->start, len1))) { | 3239 | memcmp(path_before->start, path_after->start, len1)) { |
3066 | ret = 1; | 3240 | ret = 1; |
3067 | goto out; | 3241 | goto out; |
3068 | } | 3242 | } |
3069 | ret = 0; | 3243 | ret = 0; |
3070 | 3244 | ||
3245 | /* | ||
3246 | * Ok, our new most direct ancestor has a higher inode number but | ||
3247 | * wasn't moved/renamed. So maybe some of the new ancestors higher in | ||
3248 | * the hierarchy have an higher inode number too *and* were renamed | ||
3249 | * or moved - in this case we need to wait for the ancestor's rename | ||
3250 | * or move operation before we can do the move/rename for the current | ||
3251 | * inode. | ||
3252 | */ | ||
3253 | register_upper_dirs = 0; | ||
3254 | ino = parent_ino_after; | ||
3255 | again: | ||
3256 | while ((ret == 0 || register_upper_dirs) && ino > sctx->cur_ino) { | ||
3257 | u64 parent_gen; | ||
3258 | |||
3259 | fs_path_reset(path_before); | ||
3260 | fs_path_reset(path_after); | ||
3261 | |||
3262 | ret = get_first_ref(sctx->send_root, ino, &parent_ino_after, | ||
3263 | &parent_gen, path_after); | ||
3264 | if (ret < 0) | ||
3265 | goto out; | ||
3266 | ret = get_first_ref(sctx->parent_root, ino, &parent_ino_before, | ||
3267 | NULL, path_before); | ||
3268 | if (ret == -ENOENT) { | ||
3269 | ret = 0; | ||
3270 | break; | ||
3271 | } else if (ret < 0) { | ||
3272 | goto out; | ||
3273 | } | ||
3274 | |||
3275 | len1 = fs_path_len(path_before); | ||
3276 | len2 = fs_path_len(path_after); | ||
3277 | if (parent_ino_before != parent_ino_after || len1 != len2 || | ||
3278 | memcmp(path_before->start, path_after->start, len1)) { | ||
3279 | ret = 1; | ||
3280 | if (register_upper_dirs) { | ||
3281 | break; | ||
3282 | } else { | ||
3283 | register_upper_dirs = 1; | ||
3284 | ino = parent_ref->dir; | ||
3285 | gen = parent_ref->dir_gen; | ||
3286 | goto again; | ||
3287 | } | ||
3288 | } else if (register_upper_dirs) { | ||
3289 | ret = add_pending_dir_move(sctx, ino, gen, | ||
3290 | parent_ino_after); | ||
3291 | if (ret < 0 && ret != -EEXIST) | ||
3292 | goto out; | ||
3293 | } | ||
3294 | |||
3295 | ino = parent_ino_after; | ||
3296 | gen = parent_gen; | ||
3297 | } | ||
3298 | |||
3071 | out: | 3299 | out: |
3072 | fs_path_free(path_before); | 3300 | fs_path_free(path_before); |
3073 | fs_path_free(path_after); | 3301 | fs_path_free(path_after); |
@@ -3089,6 +3317,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move) | |||
3089 | u64 ow_gen; | 3317 | u64 ow_gen; |
3090 | int did_overwrite = 0; | 3318 | int did_overwrite = 0; |
3091 | int is_orphan = 0; | 3319 | int is_orphan = 0; |
3320 | u64 last_dir_ino_rm = 0; | ||
3092 | 3321 | ||
3093 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | 3322 | verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); |
3094 | 3323 | ||
@@ -3227,9 +3456,14 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3227 | * dirs, we always have one new and one deleted | 3456 | * dirs, we always have one new and one deleted |
3228 | * ref. The deleted ref is ignored later. | 3457 | * ref. The deleted ref is ignored later. |
3229 | */ | 3458 | */ |
3230 | if (wait_for_parent_move(sctx, cur)) { | 3459 | ret = wait_for_parent_move(sctx, cur); |
3460 | if (ret < 0) | ||
3461 | goto out; | ||
3462 | if (ret) { | ||
3231 | ret = add_pending_dir_move(sctx, | 3463 | ret = add_pending_dir_move(sctx, |
3232 | cur->dir); | 3464 | sctx->cur_ino, |
3465 | sctx->cur_inode_gen, | ||
3466 | cur->dir); | ||
3233 | *pending_move = 1; | 3467 | *pending_move = 1; |
3234 | } else { | 3468 | } else { |
3235 | ret = send_rename(sctx, valid_path, | 3469 | ret = send_rename(sctx, valid_path, |
@@ -3259,7 +3493,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3259 | * later, we do this check again and rmdir it then if possible. | 3493 | * later, we do this check again and rmdir it then if possible. |
3260 | * See the use of check_dirs for more details. | 3494 | * See the use of check_dirs for more details. |
3261 | */ | 3495 | */ |
3262 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_ino); | 3496 | ret = can_rmdir(sctx, sctx->cur_ino, sctx->cur_inode_gen, |
3497 | sctx->cur_ino); | ||
3263 | if (ret < 0) | 3498 | if (ret < 0) |
3264 | goto out; | 3499 | goto out; |
3265 | if (ret) { | 3500 | if (ret) { |
@@ -3350,8 +3585,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3350 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); | 3585 | ret = send_utimes(sctx, cur->dir, cur->dir_gen); |
3351 | if (ret < 0) | 3586 | if (ret < 0) |
3352 | goto out; | 3587 | goto out; |
3353 | } else if (ret == inode_state_did_delete) { | 3588 | } else if (ret == inode_state_did_delete && |
3354 | ret = can_rmdir(sctx, cur->dir, sctx->cur_ino); | 3589 | cur->dir != last_dir_ino_rm) { |
3590 | ret = can_rmdir(sctx, cur->dir, cur->dir_gen, | ||
3591 | sctx->cur_ino); | ||
3355 | if (ret < 0) | 3592 | if (ret < 0) |
3356 | goto out; | 3593 | goto out; |
3357 | if (ret) { | 3594 | if (ret) { |
@@ -3362,6 +3599,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino); | |||
3362 | ret = send_rmdir(sctx, valid_path); | 3599 | ret = send_rmdir(sctx, valid_path); |
3363 | if (ret < 0) | 3600 | if (ret < 0) |
3364 | goto out; | 3601 | goto out; |
3602 | last_dir_ino_rm = cur->dir; | ||
3365 | } | 3603 | } |
3366 | } | 3604 | } |
3367 | } | 3605 | } |
@@ -3375,9 +3613,8 @@ out: | |||
3375 | return ret; | 3613 | return ret; |
3376 | } | 3614 | } |
3377 | 3615 | ||
3378 | static int __record_new_ref(int num, u64 dir, int index, | 3616 | static int record_ref(struct btrfs_root *root, int num, u64 dir, int index, |
3379 | struct fs_path *name, | 3617 | struct fs_path *name, void *ctx, struct list_head *refs) |
3380 | void *ctx) | ||
3381 | { | 3618 | { |
3382 | int ret = 0; | 3619 | int ret = 0; |
3383 | struct send_ctx *sctx = ctx; | 3620 | struct send_ctx *sctx = ctx; |
@@ -3388,7 +3625,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3388 | if (!p) | 3625 | if (!p) |
3389 | return -ENOMEM; | 3626 | return -ENOMEM; |
3390 | 3627 | ||
3391 | ret = get_inode_info(sctx->send_root, dir, NULL, &gen, NULL, NULL, | 3628 | ret = get_inode_info(root, dir, NULL, &gen, NULL, NULL, |
3392 | NULL, NULL); | 3629 | NULL, NULL); |
3393 | if (ret < 0) | 3630 | if (ret < 0) |
3394 | goto out; | 3631 | goto out; |
@@ -3400,7 +3637,7 @@ static int __record_new_ref(int num, u64 dir, int index, | |||
3400 | if (ret < 0) | 3637 | if (ret < 0) |
3401 | goto out; | 3638 | goto out; |
3402 | 3639 | ||
3403 | ret = record_ref(&sctx->new_refs, dir, gen, p); | 3640 | ret = __record_ref(refs, dir, gen, p); |
3404 | 3641 | ||
3405 | out: | 3642 | out: |
3406 | if (ret) | 3643 | if (ret) |
@@ -3408,37 +3645,23 @@ out: | |||
3408 | return ret; | 3645 | return ret; |
3409 | } | 3646 | } |
3410 | 3647 | ||
3648 | static int __record_new_ref(int num, u64 dir, int index, | ||
3649 | struct fs_path *name, | ||
3650 | void *ctx) | ||
3651 | { | ||
3652 | struct send_ctx *sctx = ctx; | ||
3653 | return record_ref(sctx->send_root, num, dir, index, name, | ||
3654 | ctx, &sctx->new_refs); | ||
3655 | } | ||
3656 | |||
3657 | |||
3411 | static int __record_deleted_ref(int num, u64 dir, int index, | 3658 | static int __record_deleted_ref(int num, u64 dir, int index, |
3412 | struct fs_path *name, | 3659 | struct fs_path *name, |
3413 | void *ctx) | 3660 | void *ctx) |
3414 | { | 3661 | { |
3415 | int ret = 0; | ||
3416 | struct send_ctx *sctx = ctx; | 3662 | struct send_ctx *sctx = ctx; |
3417 | struct fs_path *p; | 3663 | return record_ref(sctx->parent_root, num, dir, index, name, |
3418 | u64 gen; | 3664 | ctx, &sctx->deleted_refs); |
3419 | |||
3420 | p = fs_path_alloc(); | ||
3421 | if (!p) | ||
3422 | return -ENOMEM; | ||
3423 | |||
3424 | ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL, NULL, | ||
3425 | NULL, NULL); | ||
3426 | if (ret < 0) | ||
3427 | goto out; | ||
3428 | |||
3429 | ret = get_cur_path(sctx, dir, gen, p); | ||
3430 | if (ret < 0) | ||
3431 | goto out; | ||
3432 | ret = fs_path_add_path(p, name); | ||
3433 | if (ret < 0) | ||
3434 | goto out; | ||
3435 | |||
3436 | ret = record_ref(&sctx->deleted_refs, dir, gen, p); | ||
3437 | |||
3438 | out: | ||
3439 | if (ret) | ||
3440 | fs_path_free(p); | ||
3441 | return ret; | ||
3442 | } | 3665 | } |
3443 | 3666 | ||
3444 | static int record_new_ref(struct send_ctx *sctx) | 3667 | static int record_new_ref(struct send_ctx *sctx) |
@@ -3619,21 +3842,31 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3619 | root = sctx->parent_root; | 3842 | root = sctx->parent_root; |
3620 | cb = __record_deleted_ref; | 3843 | cb = __record_deleted_ref; |
3621 | } else { | 3844 | } else { |
3622 | BUG(); | 3845 | btrfs_err(sctx->send_root->fs_info, |
3846 | "Wrong command %d in process_all_refs", cmd); | ||
3847 | ret = -EINVAL; | ||
3848 | goto out; | ||
3623 | } | 3849 | } |
3624 | 3850 | ||
3625 | key.objectid = sctx->cmp_key->objectid; | 3851 | key.objectid = sctx->cmp_key->objectid; |
3626 | key.type = BTRFS_INODE_REF_KEY; | 3852 | key.type = BTRFS_INODE_REF_KEY; |
3627 | key.offset = 0; | 3853 | key.offset = 0; |
3628 | while (1) { | 3854 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3629 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 3855 | if (ret < 0) |
3630 | if (ret < 0) | 3856 | goto out; |
3631 | goto out; | ||
3632 | if (ret) | ||
3633 | break; | ||
3634 | 3857 | ||
3858 | while (1) { | ||
3635 | eb = path->nodes[0]; | 3859 | eb = path->nodes[0]; |
3636 | slot = path->slots[0]; | 3860 | slot = path->slots[0]; |
3861 | if (slot >= btrfs_header_nritems(eb)) { | ||
3862 | ret = btrfs_next_leaf(root, path); | ||
3863 | if (ret < 0) | ||
3864 | goto out; | ||
3865 | else if (ret > 0) | ||
3866 | break; | ||
3867 | continue; | ||
3868 | } | ||
3869 | |||
3637 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 3870 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
3638 | 3871 | ||
3639 | if (found_key.objectid != key.objectid || | 3872 | if (found_key.objectid != key.objectid || |
@@ -3642,11 +3875,10 @@ static int process_all_refs(struct send_ctx *sctx, | |||
3642 | break; | 3875 | break; |
3643 | 3876 | ||
3644 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); | 3877 | ret = iterate_inode_ref(root, path, &found_key, 0, cb, sctx); |
3645 | btrfs_release_path(path); | ||
3646 | if (ret < 0) | 3878 | if (ret < 0) |
3647 | goto out; | 3879 | goto out; |
3648 | 3880 | ||
3649 | key.offset = found_key.offset + 1; | 3881 | path->slots[0]++; |
3650 | } | 3882 | } |
3651 | btrfs_release_path(path); | 3883 | btrfs_release_path(path); |
3652 | 3884 | ||
@@ -3927,19 +4159,25 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
3927 | key.objectid = sctx->cmp_key->objectid; | 4159 | key.objectid = sctx->cmp_key->objectid; |
3928 | key.type = BTRFS_XATTR_ITEM_KEY; | 4160 | key.type = BTRFS_XATTR_ITEM_KEY; |
3929 | key.offset = 0; | 4161 | key.offset = 0; |
3930 | while (1) { | 4162 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
3931 | ret = btrfs_search_slot_for_read(root, &key, path, 1, 0); | 4163 | if (ret < 0) |
3932 | if (ret < 0) | 4164 | goto out; |
3933 | goto out; | ||
3934 | if (ret) { | ||
3935 | ret = 0; | ||
3936 | goto out; | ||
3937 | } | ||
3938 | 4165 | ||
4166 | while (1) { | ||
3939 | eb = path->nodes[0]; | 4167 | eb = path->nodes[0]; |
3940 | slot = path->slots[0]; | 4168 | slot = path->slots[0]; |
3941 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 4169 | if (slot >= btrfs_header_nritems(eb)) { |
4170 | ret = btrfs_next_leaf(root, path); | ||
4171 | if (ret < 0) { | ||
4172 | goto out; | ||
4173 | } else if (ret > 0) { | ||
4174 | ret = 0; | ||
4175 | break; | ||
4176 | } | ||
4177 | continue; | ||
4178 | } | ||
3942 | 4179 | ||
4180 | btrfs_item_key_to_cpu(eb, &found_key, slot); | ||
3943 | if (found_key.objectid != key.objectid || | 4181 | if (found_key.objectid != key.objectid || |
3944 | found_key.type != key.type) { | 4182 | found_key.type != key.type) { |
3945 | ret = 0; | 4183 | ret = 0; |
@@ -3951,8 +4189,7 @@ static int process_all_new_xattrs(struct send_ctx *sctx) | |||
3951 | if (ret < 0) | 4189 | if (ret < 0) |
3952 | goto out; | 4190 | goto out; |
3953 | 4191 | ||
3954 | btrfs_release_path(path); | 4192 | path->slots[0]++; |
3955 | key.offset = found_key.offset + 1; | ||
3956 | } | 4193 | } |
3957 | 4194 | ||
3958 | out: | 4195 | out: |
@@ -3991,6 +4228,13 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) | |||
3991 | goto out; | 4228 | goto out; |
3992 | 4229 | ||
3993 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; | 4230 | last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT; |
4231 | |||
4232 | /* initial readahead */ | ||
4233 | memset(&sctx->ra, 0, sizeof(struct file_ra_state)); | ||
4234 | file_ra_state_init(&sctx->ra, inode->i_mapping); | ||
4235 | btrfs_force_ra(inode->i_mapping, &sctx->ra, NULL, index, | ||
4236 | last_index - index + 1); | ||
4237 | |||
3994 | while (index <= last_index) { | 4238 | while (index <= last_index) { |
3995 | unsigned cur_len = min_t(unsigned, len, | 4239 | unsigned cur_len = min_t(unsigned, len, |
3996 | PAGE_CACHE_SIZE - pg_offset); | 4240 | PAGE_CACHE_SIZE - pg_offset); |
@@ -4763,18 +5007,19 @@ static int finish_inode_if_needed(struct send_ctx *sctx, int at_end) | |||
4763 | ret = apply_children_dir_moves(sctx); | 5007 | ret = apply_children_dir_moves(sctx); |
4764 | if (ret) | 5008 | if (ret) |
4765 | goto out; | 5009 | goto out; |
5010 | /* | ||
5011 | * Need to send that every time, no matter if it actually | ||
5012 | * changed between the two trees as we have done changes to | ||
5013 | * the inode before. If our inode is a directory and it's | ||
5014 | * waiting to be moved/renamed, we will send its utimes when | ||
5015 | * it's moved/renamed, therefore we don't need to do it here. | ||
5016 | */ | ||
5017 | sctx->send_progress = sctx->cur_ino + 1; | ||
5018 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
5019 | if (ret < 0) | ||
5020 | goto out; | ||
4766 | } | 5021 | } |
4767 | 5022 | ||
4768 | /* | ||
4769 | * Need to send that every time, no matter if it actually | ||
4770 | * changed between the two trees as we have done changes to | ||
4771 | * the inode before. | ||
4772 | */ | ||
4773 | sctx->send_progress = sctx->cur_ino + 1; | ||
4774 | ret = send_utimes(sctx, sctx->cur_ino, sctx->cur_inode_gen); | ||
4775 | if (ret < 0) | ||
4776 | goto out; | ||
4777 | |||
4778 | out: | 5023 | out: |
4779 | return ret; | 5024 | return ret; |
4780 | } | 5025 | } |
@@ -4840,6 +5085,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
4840 | sctx->left_path->nodes[0], left_ii); | 5085 | sctx->left_path->nodes[0], left_ii); |
4841 | sctx->cur_inode_mode = btrfs_inode_mode( | 5086 | sctx->cur_inode_mode = btrfs_inode_mode( |
4842 | sctx->left_path->nodes[0], left_ii); | 5087 | sctx->left_path->nodes[0], left_ii); |
5088 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
5089 | sctx->left_path->nodes[0], left_ii); | ||
4843 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) | 5090 | if (sctx->cur_ino != BTRFS_FIRST_FREE_OBJECTID) |
4844 | ret = send_create_inode_if_needed(sctx); | 5091 | ret = send_create_inode_if_needed(sctx); |
4845 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { | 5092 | } else if (result == BTRFS_COMPARE_TREE_DELETED) { |
@@ -4884,6 +5131,8 @@ static int changed_inode(struct send_ctx *sctx, | |||
4884 | sctx->left_path->nodes[0], left_ii); | 5131 | sctx->left_path->nodes[0], left_ii); |
4885 | sctx->cur_inode_mode = btrfs_inode_mode( | 5132 | sctx->cur_inode_mode = btrfs_inode_mode( |
4886 | sctx->left_path->nodes[0], left_ii); | 5133 | sctx->left_path->nodes[0], left_ii); |
5134 | sctx->cur_inode_rdev = btrfs_inode_rdev( | ||
5135 | sctx->left_path->nodes[0], left_ii); | ||
4887 | ret = send_create_inode_if_needed(sctx); | 5136 | ret = send_create_inode_if_needed(sctx); |
4888 | if (ret < 0) | 5137 | if (ret < 0) |
4889 | goto out; | 5138 | goto out; |
@@ -5118,6 +5367,7 @@ out: | |||
5118 | static int full_send_tree(struct send_ctx *sctx) | 5367 | static int full_send_tree(struct send_ctx *sctx) |
5119 | { | 5368 | { |
5120 | int ret; | 5369 | int ret; |
5370 | struct btrfs_trans_handle *trans = NULL; | ||
5121 | struct btrfs_root *send_root = sctx->send_root; | 5371 | struct btrfs_root *send_root = sctx->send_root; |
5122 | struct btrfs_key key; | 5372 | struct btrfs_key key; |
5123 | struct btrfs_key found_key; | 5373 | struct btrfs_key found_key; |
@@ -5139,6 +5389,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
5139 | key.type = BTRFS_INODE_ITEM_KEY; | 5389 | key.type = BTRFS_INODE_ITEM_KEY; |
5140 | key.offset = 0; | 5390 | key.offset = 0; |
5141 | 5391 | ||
5392 | join_trans: | ||
5393 | /* | ||
5394 | * We need to make sure the transaction does not get committed | ||
5395 | * while we do anything on commit roots. Join a transaction to prevent | ||
5396 | * this. | ||
5397 | */ | ||
5398 | trans = btrfs_join_transaction(send_root); | ||
5399 | if (IS_ERR(trans)) { | ||
5400 | ret = PTR_ERR(trans); | ||
5401 | trans = NULL; | ||
5402 | goto out; | ||
5403 | } | ||
5404 | |||
5142 | /* | 5405 | /* |
5143 | * Make sure the tree has not changed after re-joining. We detect this | 5406 | * Make sure the tree has not changed after re-joining. We detect this |
5144 | * by comparing start_ctransid and ctransid. They should always match. | 5407 | * by comparing start_ctransid and ctransid. They should always match. |
@@ -5162,6 +5425,19 @@ static int full_send_tree(struct send_ctx *sctx) | |||
5162 | goto out_finish; | 5425 | goto out_finish; |
5163 | 5426 | ||
5164 | while (1) { | 5427 | while (1) { |
5428 | /* | ||
5429 | * When someone want to commit while we iterate, end the | ||
5430 | * joined transaction and rejoin. | ||
5431 | */ | ||
5432 | if (btrfs_should_end_transaction(trans, send_root)) { | ||
5433 | ret = btrfs_end_transaction(trans, send_root); | ||
5434 | trans = NULL; | ||
5435 | if (ret < 0) | ||
5436 | goto out; | ||
5437 | btrfs_release_path(path); | ||
5438 | goto join_trans; | ||
5439 | } | ||
5440 | |||
5165 | eb = path->nodes[0]; | 5441 | eb = path->nodes[0]; |
5166 | slot = path->slots[0]; | 5442 | slot = path->slots[0]; |
5167 | btrfs_item_key_to_cpu(eb, &found_key, slot); | 5443 | btrfs_item_key_to_cpu(eb, &found_key, slot); |
@@ -5189,6 +5465,12 @@ out_finish: | |||
5189 | 5465 | ||
5190 | out: | 5466 | out: |
5191 | btrfs_free_path(path); | 5467 | btrfs_free_path(path); |
5468 | if (trans) { | ||
5469 | if (!ret) | ||
5470 | ret = btrfs_end_transaction(trans, send_root); | ||
5471 | else | ||
5472 | btrfs_end_transaction(trans, send_root); | ||
5473 | } | ||
5192 | return ret; | 5474 | return ret; |
5193 | } | 5475 | } |
5194 | 5476 | ||
@@ -5340,6 +5622,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) | |||
5340 | 5622 | ||
5341 | sctx->pending_dir_moves = RB_ROOT; | 5623 | sctx->pending_dir_moves = RB_ROOT; |
5342 | sctx->waiting_dir_moves = RB_ROOT; | 5624 | sctx->waiting_dir_moves = RB_ROOT; |
5625 | sctx->orphan_dirs = RB_ROOT; | ||
5343 | 5626 | ||
5344 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * | 5627 | sctx->clone_roots = vzalloc(sizeof(struct clone_root) * |
5345 | (arg->clone_sources_count + 1)); | 5628 | (arg->clone_sources_count + 1)); |
@@ -5477,6 +5760,16 @@ out: | |||
5477 | kfree(dm); | 5760 | kfree(dm); |
5478 | } | 5761 | } |
5479 | 5762 | ||
5763 | WARN_ON(sctx && !ret && !RB_EMPTY_ROOT(&sctx->orphan_dirs)); | ||
5764 | while (sctx && !RB_EMPTY_ROOT(&sctx->orphan_dirs)) { | ||
5765 | struct rb_node *n; | ||
5766 | struct orphan_dir_info *odi; | ||
5767 | |||
5768 | n = rb_first(&sctx->orphan_dirs); | ||
5769 | odi = rb_entry(n, struct orphan_dir_info, node); | ||
5770 | free_orphan_dir_info(sctx, odi); | ||
5771 | } | ||
5772 | |||
5480 | if (sort_clone_roots) { | 5773 | if (sort_clone_roots) { |
5481 | for (i = 0; i < sctx->clone_roots_cnt; i++) | 5774 | for (i = 0; i < sctx->clone_roots_cnt; i++) |
5482 | btrfs_root_dec_send_in_progress( | 5775 | btrfs_root_dec_send_in_progress( |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index d04db817be5c..9dbf42395153 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -1305,13 +1305,6 @@ error_fs_info: | |||
1305 | return ERR_PTR(error); | 1305 | return ERR_PTR(error); |
1306 | } | 1306 | } |
1307 | 1307 | ||
1308 | static void btrfs_set_max_workers(struct btrfs_workers *workers, int new_limit) | ||
1309 | { | ||
1310 | spin_lock_irq(&workers->lock); | ||
1311 | workers->max_workers = new_limit; | ||
1312 | spin_unlock_irq(&workers->lock); | ||
1313 | } | ||
1314 | |||
1315 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | 1308 | static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, |
1316 | int new_pool_size, int old_pool_size) | 1309 | int new_pool_size, int old_pool_size) |
1317 | { | 1310 | { |
@@ -1323,21 +1316,20 @@ static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, | |||
1323 | btrfs_info(fs_info, "resize thread pool %d -> %d", | 1316 | btrfs_info(fs_info, "resize thread pool %d -> %d", |
1324 | old_pool_size, new_pool_size); | 1317 | old_pool_size, new_pool_size); |
1325 | 1318 | ||
1326 | btrfs_set_max_workers(&fs_info->generic_worker, new_pool_size); | 1319 | btrfs_workqueue_set_max(fs_info->workers, new_pool_size); |
1327 | btrfs_set_max_workers(&fs_info->workers, new_pool_size); | 1320 | btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); |
1328 | btrfs_set_max_workers(&fs_info->delalloc_workers, new_pool_size); | 1321 | btrfs_workqueue_set_max(fs_info->submit_workers, new_pool_size); |
1329 | btrfs_set_max_workers(&fs_info->submit_workers, new_pool_size); | 1322 | btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); |
1330 | btrfs_set_max_workers(&fs_info->caching_workers, new_pool_size); | 1323 | btrfs_workqueue_set_max(fs_info->endio_workers, new_pool_size); |
1331 | btrfs_set_max_workers(&fs_info->fixup_workers, new_pool_size); | 1324 | btrfs_workqueue_set_max(fs_info->endio_meta_workers, new_pool_size); |
1332 | btrfs_set_max_workers(&fs_info->endio_workers, new_pool_size); | 1325 | btrfs_workqueue_set_max(fs_info->endio_meta_write_workers, |
1333 | btrfs_set_max_workers(&fs_info->endio_meta_workers, new_pool_size); | 1326 | new_pool_size); |
1334 | btrfs_set_max_workers(&fs_info->endio_meta_write_workers, new_pool_size); | 1327 | btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); |
1335 | btrfs_set_max_workers(&fs_info->endio_write_workers, new_pool_size); | 1328 | btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); |
1336 | btrfs_set_max_workers(&fs_info->endio_freespace_worker, new_pool_size); | 1329 | btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); |
1337 | btrfs_set_max_workers(&fs_info->delayed_workers, new_pool_size); | 1330 | btrfs_workqueue_set_max(fs_info->readahead_workers, new_pool_size); |
1338 | btrfs_set_max_workers(&fs_info->readahead_workers, new_pool_size); | 1331 | btrfs_workqueue_set_max(fs_info->scrub_wr_completion_workers, |
1339 | btrfs_set_max_workers(&fs_info->scrub_wr_completion_workers, | 1332 | new_pool_size); |
1340 | new_pool_size); | ||
1341 | } | 1333 | } |
1342 | 1334 | ||
1343 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) | 1335 | static inline void btrfs_remount_prepare(struct btrfs_fs_info *fs_info) |
@@ -1388,6 +1380,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1388 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; | 1380 | unsigned int old_metadata_ratio = fs_info->metadata_ratio; |
1389 | int ret; | 1381 | int ret; |
1390 | 1382 | ||
1383 | sync_filesystem(sb); | ||
1391 | btrfs_remount_prepare(fs_info); | 1384 | btrfs_remount_prepare(fs_info); |
1392 | 1385 | ||
1393 | ret = btrfs_parse_options(root, data); | 1386 | ret = btrfs_parse_options(root, data); |
@@ -1479,6 +1472,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
1479 | sb->s_flags &= ~MS_RDONLY; | 1472 | sb->s_flags &= ~MS_RDONLY; |
1480 | } | 1473 | } |
1481 | out: | 1474 | out: |
1475 | wake_up_process(fs_info->transaction_kthread); | ||
1482 | btrfs_remount_cleanup(fs_info, old_opts); | 1476 | btrfs_remount_cleanup(fs_info, old_opts); |
1483 | return 0; | 1477 | return 0; |
1484 | 1478 | ||
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 865f4cf9a769..c5eb2143dc66 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/kobject.h> | 24 | #include <linux/kobject.h> |
25 | #include <linux/bug.h> | 25 | #include <linux/bug.h> |
26 | #include <linux/genhd.h> | 26 | #include <linux/genhd.h> |
27 | #include <linux/debugfs.h> | ||
27 | 28 | ||
28 | #include "ctree.h" | 29 | #include "ctree.h" |
29 | #include "disk-io.h" | 30 | #include "disk-io.h" |
@@ -599,6 +600,12 @@ static int add_device_membership(struct btrfs_fs_info *fs_info) | |||
599 | /* /sys/fs/btrfs/ entry */ | 600 | /* /sys/fs/btrfs/ entry */ |
600 | static struct kset *btrfs_kset; | 601 | static struct kset *btrfs_kset; |
601 | 602 | ||
603 | /* /sys/kernel/debug/btrfs */ | ||
604 | static struct dentry *btrfs_debugfs_root_dentry; | ||
605 | |||
606 | /* Debugging tunables and exported data */ | ||
607 | u64 btrfs_debugfs_test; | ||
608 | |||
602 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) | 609 | int btrfs_sysfs_add_one(struct btrfs_fs_info *fs_info) |
603 | { | 610 | { |
604 | int error; | 611 | int error; |
@@ -642,27 +649,41 @@ failure: | |||
642 | return error; | 649 | return error; |
643 | } | 650 | } |
644 | 651 | ||
652 | static int btrfs_init_debugfs(void) | ||
653 | { | ||
654 | #ifdef CONFIG_DEBUG_FS | ||
655 | btrfs_debugfs_root_dentry = debugfs_create_dir("btrfs", NULL); | ||
656 | if (!btrfs_debugfs_root_dentry) | ||
657 | return -ENOMEM; | ||
658 | |||
659 | debugfs_create_u64("test", S_IRUGO | S_IWUGO, btrfs_debugfs_root_dentry, | ||
660 | &btrfs_debugfs_test); | ||
661 | #endif | ||
662 | return 0; | ||
663 | } | ||
664 | |||
645 | int btrfs_init_sysfs(void) | 665 | int btrfs_init_sysfs(void) |
646 | { | 666 | { |
647 | int ret; | 667 | int ret; |
668 | |||
648 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); | 669 | btrfs_kset = kset_create_and_add("btrfs", NULL, fs_kobj); |
649 | if (!btrfs_kset) | 670 | if (!btrfs_kset) |
650 | return -ENOMEM; | 671 | return -ENOMEM; |
651 | 672 | ||
652 | init_feature_attrs(); | 673 | ret = btrfs_init_debugfs(); |
674 | if (ret) | ||
675 | return ret; | ||
653 | 676 | ||
677 | init_feature_attrs(); | ||
654 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 678 | ret = sysfs_create_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
655 | if (ret) { | ||
656 | kset_unregister(btrfs_kset); | ||
657 | return ret; | ||
658 | } | ||
659 | 679 | ||
660 | return 0; | 680 | return ret; |
661 | } | 681 | } |
662 | 682 | ||
663 | void btrfs_exit_sysfs(void) | 683 | void btrfs_exit_sysfs(void) |
664 | { | 684 | { |
665 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); | 685 | sysfs_remove_group(&btrfs_kset->kobj, &btrfs_feature_attr_group); |
666 | kset_unregister(btrfs_kset); | 686 | kset_unregister(btrfs_kset); |
687 | debugfs_remove_recursive(btrfs_debugfs_root_dentry); | ||
667 | } | 688 | } |
668 | 689 | ||
diff --git a/fs/btrfs/sysfs.h b/fs/btrfs/sysfs.h index f3cea3710d44..9ab576318a84 100644 --- a/fs/btrfs/sysfs.h +++ b/fs/btrfs/sysfs.h | |||
@@ -1,6 +1,11 @@ | |||
1 | #ifndef _BTRFS_SYSFS_H_ | 1 | #ifndef _BTRFS_SYSFS_H_ |
2 | #define _BTRFS_SYSFS_H_ | 2 | #define _BTRFS_SYSFS_H_ |
3 | 3 | ||
4 | /* | ||
5 | * Data exported through sysfs | ||
6 | */ | ||
7 | extern u64 btrfs_debugfs_test; | ||
8 | |||
4 | enum btrfs_feature_set { | 9 | enum btrfs_feature_set { |
5 | FEAT_COMPAT, | 10 | FEAT_COMPAT, |
6 | FEAT_COMPAT_RO, | 11 | FEAT_COMPAT_RO, |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 34cd83184c4a..a04707f740d6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -683,7 +683,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); | 683 | int lock = (trans->type != TRANS_JOIN_NOLOCK); |
684 | int err = 0; | 684 | int err = 0; |
685 | 685 | ||
686 | if (--trans->use_count) { | 686 | if (trans->use_count > 1) { |
687 | trans->use_count--; | ||
687 | trans->block_rsv = trans->orig_rsv; | 688 | trans->block_rsv = trans->orig_rsv; |
688 | return 0; | 689 | return 0; |
689 | } | 690 | } |
@@ -731,17 +732,10 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
731 | } | 732 | } |
732 | 733 | ||
733 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { | 734 | if (lock && ACCESS_ONCE(cur_trans->state) == TRANS_STATE_BLOCKED) { |
734 | if (throttle) { | 735 | if (throttle) |
735 | /* | ||
736 | * We may race with somebody else here so end up having | ||
737 | * to call end_transaction on ourselves again, so inc | ||
738 | * our use_count. | ||
739 | */ | ||
740 | trans->use_count++; | ||
741 | return btrfs_commit_transaction(trans, root); | 736 | return btrfs_commit_transaction(trans, root); |
742 | } else { | 737 | else |
743 | wake_up_process(info->transaction_kthread); | 738 | wake_up_process(info->transaction_kthread); |
744 | } | ||
745 | } | 739 | } |
746 | 740 | ||
747 | if (trans->type & __TRANS_FREEZABLE) | 741 | if (trans->type & __TRANS_FREEZABLE) |
@@ -1578,10 +1572,9 @@ static void cleanup_transaction(struct btrfs_trans_handle *trans, | |||
1578 | 1572 | ||
1579 | trace_btrfs_transaction_commit(root); | 1573 | trace_btrfs_transaction_commit(root); |
1580 | 1574 | ||
1581 | btrfs_scrub_continue(root); | ||
1582 | |||
1583 | if (current->journal_info == trans) | 1575 | if (current->journal_info == trans) |
1584 | current->journal_info = NULL; | 1576 | current->journal_info = NULL; |
1577 | btrfs_scrub_cancel(root->fs_info); | ||
1585 | 1578 | ||
1586 | kmem_cache_free(btrfs_trans_handle_cachep, trans); | 1579 | kmem_cache_free(btrfs_trans_handle_cachep, trans); |
1587 | } | 1580 | } |
@@ -1621,7 +1614,7 @@ static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans, | |||
1621 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) | 1614 | static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info) |
1622 | { | 1615 | { |
1623 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) | 1616 | if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT)) |
1624 | return btrfs_start_delalloc_roots(fs_info, 1); | 1617 | return btrfs_start_delalloc_roots(fs_info, 1, -1); |
1625 | return 0; | 1618 | return 0; |
1626 | } | 1619 | } |
1627 | 1620 | ||
@@ -1754,7 +1747,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1754 | /* ->aborted might be set after the previous check, so check it */ | 1747 | /* ->aborted might be set after the previous check, so check it */ |
1755 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { | 1748 | if (unlikely(ACCESS_ONCE(cur_trans->aborted))) { |
1756 | ret = cur_trans->aborted; | 1749 | ret = cur_trans->aborted; |
1757 | goto cleanup_transaction; | 1750 | goto scrub_continue; |
1758 | } | 1751 | } |
1759 | /* | 1752 | /* |
1760 | * the reloc mutex makes sure that we stop | 1753 | * the reloc mutex makes sure that we stop |
@@ -1771,7 +1764,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1771 | ret = create_pending_snapshots(trans, root->fs_info); | 1764 | ret = create_pending_snapshots(trans, root->fs_info); |
1772 | if (ret) { | 1765 | if (ret) { |
1773 | mutex_unlock(&root->fs_info->reloc_mutex); | 1766 | mutex_unlock(&root->fs_info->reloc_mutex); |
1774 | goto cleanup_transaction; | 1767 | goto scrub_continue; |
1775 | } | 1768 | } |
1776 | 1769 | ||
1777 | /* | 1770 | /* |
@@ -1787,13 +1780,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1787 | ret = btrfs_run_delayed_items(trans, root); | 1780 | ret = btrfs_run_delayed_items(trans, root); |
1788 | if (ret) { | 1781 | if (ret) { |
1789 | mutex_unlock(&root->fs_info->reloc_mutex); | 1782 | mutex_unlock(&root->fs_info->reloc_mutex); |
1790 | goto cleanup_transaction; | 1783 | goto scrub_continue; |
1791 | } | 1784 | } |
1792 | 1785 | ||
1793 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | 1786 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
1794 | if (ret) { | 1787 | if (ret) { |
1795 | mutex_unlock(&root->fs_info->reloc_mutex); | 1788 | mutex_unlock(&root->fs_info->reloc_mutex); |
1796 | goto cleanup_transaction; | 1789 | goto scrub_continue; |
1797 | } | 1790 | } |
1798 | 1791 | ||
1799 | /* | 1792 | /* |
@@ -1823,7 +1816,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1823 | if (ret) { | 1816 | if (ret) { |
1824 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1817 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1825 | mutex_unlock(&root->fs_info->reloc_mutex); | 1818 | mutex_unlock(&root->fs_info->reloc_mutex); |
1826 | goto cleanup_transaction; | 1819 | goto scrub_continue; |
1827 | } | 1820 | } |
1828 | 1821 | ||
1829 | /* | 1822 | /* |
@@ -1844,7 +1837,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1844 | if (ret) { | 1837 | if (ret) { |
1845 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1838 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1846 | mutex_unlock(&root->fs_info->reloc_mutex); | 1839 | mutex_unlock(&root->fs_info->reloc_mutex); |
1847 | goto cleanup_transaction; | 1840 | goto scrub_continue; |
1848 | } | 1841 | } |
1849 | 1842 | ||
1850 | /* | 1843 | /* |
@@ -1855,7 +1848,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1855 | ret = cur_trans->aborted; | 1848 | ret = cur_trans->aborted; |
1856 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1849 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1857 | mutex_unlock(&root->fs_info->reloc_mutex); | 1850 | mutex_unlock(&root->fs_info->reloc_mutex); |
1858 | goto cleanup_transaction; | 1851 | goto scrub_continue; |
1859 | } | 1852 | } |
1860 | 1853 | ||
1861 | btrfs_prepare_extent_commit(trans, root); | 1854 | btrfs_prepare_extent_commit(trans, root); |
@@ -1891,13 +1884,13 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1891 | btrfs_error(root->fs_info, ret, | 1884 | btrfs_error(root->fs_info, ret, |
1892 | "Error while writing out transaction"); | 1885 | "Error while writing out transaction"); |
1893 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1886 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1894 | goto cleanup_transaction; | 1887 | goto scrub_continue; |
1895 | } | 1888 | } |
1896 | 1889 | ||
1897 | ret = write_ctree_super(trans, root, 0); | 1890 | ret = write_ctree_super(trans, root, 0); |
1898 | if (ret) { | 1891 | if (ret) { |
1899 | mutex_unlock(&root->fs_info->tree_log_mutex); | 1892 | mutex_unlock(&root->fs_info->tree_log_mutex); |
1900 | goto cleanup_transaction; | 1893 | goto scrub_continue; |
1901 | } | 1894 | } |
1902 | 1895 | ||
1903 | /* | 1896 | /* |
@@ -1940,6 +1933,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1940 | 1933 | ||
1941 | return ret; | 1934 | return ret; |
1942 | 1935 | ||
1936 | scrub_continue: | ||
1937 | btrfs_scrub_continue(root); | ||
1943 | cleanup_transaction: | 1938 | cleanup_transaction: |
1944 | btrfs_trans_release_metadata(trans, root); | 1939 | btrfs_trans_release_metadata(trans, root); |
1945 | trans->block_rsv = NULL; | 1940 | trans->block_rsv = NULL; |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 39d83da03e03..e2f45fc02610 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -136,13 +136,20 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
136 | * syncing the tree wait for us to finish | 136 | * syncing the tree wait for us to finish |
137 | */ | 137 | */ |
138 | static int start_log_trans(struct btrfs_trans_handle *trans, | 138 | static int start_log_trans(struct btrfs_trans_handle *trans, |
139 | struct btrfs_root *root) | 139 | struct btrfs_root *root, |
140 | struct btrfs_log_ctx *ctx) | ||
140 | { | 141 | { |
142 | int index; | ||
141 | int ret; | 143 | int ret; |
142 | int err = 0; | ||
143 | 144 | ||
144 | mutex_lock(&root->log_mutex); | 145 | mutex_lock(&root->log_mutex); |
145 | if (root->log_root) { | 146 | if (root->log_root) { |
147 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == | ||
148 | trans->transid) { | ||
149 | ret = -EAGAIN; | ||
150 | goto out; | ||
151 | } | ||
152 | |||
146 | if (!root->log_start_pid) { | 153 | if (!root->log_start_pid) { |
147 | root->log_start_pid = current->pid; | 154 | root->log_start_pid = current->pid; |
148 | root->log_multiple_pids = false; | 155 | root->log_multiple_pids = false; |
@@ -152,27 +159,40 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
152 | 159 | ||
153 | atomic_inc(&root->log_batch); | 160 | atomic_inc(&root->log_batch); |
154 | atomic_inc(&root->log_writers); | 161 | atomic_inc(&root->log_writers); |
162 | if (ctx) { | ||
163 | index = root->log_transid % 2; | ||
164 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
165 | ctx->log_transid = root->log_transid; | ||
166 | } | ||
155 | mutex_unlock(&root->log_mutex); | 167 | mutex_unlock(&root->log_mutex); |
156 | return 0; | 168 | return 0; |
157 | } | 169 | } |
158 | root->log_multiple_pids = false; | 170 | |
159 | root->log_start_pid = current->pid; | 171 | ret = 0; |
160 | mutex_lock(&root->fs_info->tree_log_mutex); | 172 | mutex_lock(&root->fs_info->tree_log_mutex); |
161 | if (!root->fs_info->log_root_tree) { | 173 | if (!root->fs_info->log_root_tree) |
162 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 174 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
163 | if (ret) | 175 | mutex_unlock(&root->fs_info->tree_log_mutex); |
164 | err = ret; | 176 | if (ret) |
165 | } | 177 | goto out; |
166 | if (err == 0 && !root->log_root) { | 178 | |
179 | if (!root->log_root) { | ||
167 | ret = btrfs_add_log_tree(trans, root); | 180 | ret = btrfs_add_log_tree(trans, root); |
168 | if (ret) | 181 | if (ret) |
169 | err = ret; | 182 | goto out; |
170 | } | 183 | } |
171 | mutex_unlock(&root->fs_info->tree_log_mutex); | 184 | root->log_multiple_pids = false; |
185 | root->log_start_pid = current->pid; | ||
172 | atomic_inc(&root->log_batch); | 186 | atomic_inc(&root->log_batch); |
173 | atomic_inc(&root->log_writers); | 187 | atomic_inc(&root->log_writers); |
188 | if (ctx) { | ||
189 | index = root->log_transid % 2; | ||
190 | list_add_tail(&ctx->list, &root->log_ctxs[index]); | ||
191 | ctx->log_transid = root->log_transid; | ||
192 | } | ||
193 | out: | ||
174 | mutex_unlock(&root->log_mutex); | 194 | mutex_unlock(&root->log_mutex); |
175 | return err; | 195 | return ret; |
176 | } | 196 | } |
177 | 197 | ||
178 | /* | 198 | /* |
@@ -2359,8 +2379,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
2359 | return ret; | 2379 | return ret; |
2360 | } | 2380 | } |
2361 | 2381 | ||
2362 | static int wait_log_commit(struct btrfs_trans_handle *trans, | 2382 | static void wait_log_commit(struct btrfs_trans_handle *trans, |
2363 | struct btrfs_root *root, unsigned long transid) | 2383 | struct btrfs_root *root, int transid) |
2364 | { | 2384 | { |
2365 | DEFINE_WAIT(wait); | 2385 | DEFINE_WAIT(wait); |
2366 | int index = transid % 2; | 2386 | int index = transid % 2; |
@@ -2375,36 +2395,63 @@ static int wait_log_commit(struct btrfs_trans_handle *trans, | |||
2375 | &wait, TASK_UNINTERRUPTIBLE); | 2395 | &wait, TASK_UNINTERRUPTIBLE); |
2376 | mutex_unlock(&root->log_mutex); | 2396 | mutex_unlock(&root->log_mutex); |
2377 | 2397 | ||
2378 | if (root->fs_info->last_trans_log_full_commit != | 2398 | if (root->log_transid_committed < transid && |
2379 | trans->transid && root->log_transid < transid + 2 && | ||
2380 | atomic_read(&root->log_commit[index])) | 2399 | atomic_read(&root->log_commit[index])) |
2381 | schedule(); | 2400 | schedule(); |
2382 | 2401 | ||
2383 | finish_wait(&root->log_commit_wait[index], &wait); | 2402 | finish_wait(&root->log_commit_wait[index], &wait); |
2384 | mutex_lock(&root->log_mutex); | 2403 | mutex_lock(&root->log_mutex); |
2385 | } while (root->fs_info->last_trans_log_full_commit != | 2404 | } while (root->log_transid_committed < transid && |
2386 | trans->transid && root->log_transid < transid + 2 && | ||
2387 | atomic_read(&root->log_commit[index])); | 2405 | atomic_read(&root->log_commit[index])); |
2388 | return 0; | ||
2389 | } | 2406 | } |
2390 | 2407 | ||
2391 | static void wait_for_writer(struct btrfs_trans_handle *trans, | 2408 | static void wait_for_writer(struct btrfs_trans_handle *trans, |
2392 | struct btrfs_root *root) | 2409 | struct btrfs_root *root) |
2393 | { | 2410 | { |
2394 | DEFINE_WAIT(wait); | 2411 | DEFINE_WAIT(wait); |
2395 | while (root->fs_info->last_trans_log_full_commit != | 2412 | |
2396 | trans->transid && atomic_read(&root->log_writers)) { | 2413 | while (atomic_read(&root->log_writers)) { |
2397 | prepare_to_wait(&root->log_writer_wait, | 2414 | prepare_to_wait(&root->log_writer_wait, |
2398 | &wait, TASK_UNINTERRUPTIBLE); | 2415 | &wait, TASK_UNINTERRUPTIBLE); |
2399 | mutex_unlock(&root->log_mutex); | 2416 | mutex_unlock(&root->log_mutex); |
2400 | if (root->fs_info->last_trans_log_full_commit != | 2417 | if (atomic_read(&root->log_writers)) |
2401 | trans->transid && atomic_read(&root->log_writers)) | ||
2402 | schedule(); | 2418 | schedule(); |
2403 | mutex_lock(&root->log_mutex); | 2419 | mutex_lock(&root->log_mutex); |
2404 | finish_wait(&root->log_writer_wait, &wait); | 2420 | finish_wait(&root->log_writer_wait, &wait); |
2405 | } | 2421 | } |
2406 | } | 2422 | } |
2407 | 2423 | ||
2424 | static inline void btrfs_remove_log_ctx(struct btrfs_root *root, | ||
2425 | struct btrfs_log_ctx *ctx) | ||
2426 | { | ||
2427 | if (!ctx) | ||
2428 | return; | ||
2429 | |||
2430 | mutex_lock(&root->log_mutex); | ||
2431 | list_del_init(&ctx->list); | ||
2432 | mutex_unlock(&root->log_mutex); | ||
2433 | } | ||
2434 | |||
2435 | /* | ||
2436 | * Invoked in log mutex context, or be sure there is no other task which | ||
2437 | * can access the list. | ||
2438 | */ | ||
2439 | static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root, | ||
2440 | int index, int error) | ||
2441 | { | ||
2442 | struct btrfs_log_ctx *ctx; | ||
2443 | |||
2444 | if (!error) { | ||
2445 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
2446 | return; | ||
2447 | } | ||
2448 | |||
2449 | list_for_each_entry(ctx, &root->log_ctxs[index], list) | ||
2450 | ctx->log_ret = error; | ||
2451 | |||
2452 | INIT_LIST_HEAD(&root->log_ctxs[index]); | ||
2453 | } | ||
2454 | |||
2408 | /* | 2455 | /* |
2409 | * btrfs_sync_log does sends a given tree log down to the disk and | 2456 | * btrfs_sync_log does sends a given tree log down to the disk and |
2410 | * updates the super blocks to record it. When this call is done, | 2457 | * updates the super blocks to record it. When this call is done, |
@@ -2418,7 +2465,7 @@ static void wait_for_writer(struct btrfs_trans_handle *trans, | |||
2418 | * that has happened. | 2465 | * that has happened. |
2419 | */ | 2466 | */ |
2420 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 2467 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
2421 | struct btrfs_root *root) | 2468 | struct btrfs_root *root, struct btrfs_log_ctx *ctx) |
2422 | { | 2469 | { |
2423 | int index1; | 2470 | int index1; |
2424 | int index2; | 2471 | int index2; |
@@ -2426,22 +2473,30 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2426 | int ret; | 2473 | int ret; |
2427 | struct btrfs_root *log = root->log_root; | 2474 | struct btrfs_root *log = root->log_root; |
2428 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; | 2475 | struct btrfs_root *log_root_tree = root->fs_info->log_root_tree; |
2429 | unsigned long log_transid = 0; | 2476 | int log_transid = 0; |
2477 | struct btrfs_log_ctx root_log_ctx; | ||
2430 | struct blk_plug plug; | 2478 | struct blk_plug plug; |
2431 | 2479 | ||
2432 | mutex_lock(&root->log_mutex); | 2480 | mutex_lock(&root->log_mutex); |
2433 | log_transid = root->log_transid; | 2481 | log_transid = ctx->log_transid; |
2434 | index1 = root->log_transid % 2; | 2482 | if (root->log_transid_committed >= log_transid) { |
2483 | mutex_unlock(&root->log_mutex); | ||
2484 | return ctx->log_ret; | ||
2485 | } | ||
2486 | |||
2487 | index1 = log_transid % 2; | ||
2435 | if (atomic_read(&root->log_commit[index1])) { | 2488 | if (atomic_read(&root->log_commit[index1])) { |
2436 | wait_log_commit(trans, root, root->log_transid); | 2489 | wait_log_commit(trans, root, log_transid); |
2437 | mutex_unlock(&root->log_mutex); | 2490 | mutex_unlock(&root->log_mutex); |
2438 | return 0; | 2491 | return ctx->log_ret; |
2439 | } | 2492 | } |
2493 | ASSERT(log_transid == root->log_transid); | ||
2440 | atomic_set(&root->log_commit[index1], 1); | 2494 | atomic_set(&root->log_commit[index1], 1); |
2441 | 2495 | ||
2442 | /* wait for previous tree log sync to complete */ | 2496 | /* wait for previous tree log sync to complete */ |
2443 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2497 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
2444 | wait_log_commit(trans, root, root->log_transid - 1); | 2498 | wait_log_commit(trans, root, log_transid - 1); |
2499 | |||
2445 | while (1) { | 2500 | while (1) { |
2446 | int batch = atomic_read(&root->log_batch); | 2501 | int batch = atomic_read(&root->log_batch); |
2447 | /* when we're on an ssd, just kick the log commit out */ | 2502 | /* when we're on an ssd, just kick the log commit out */ |
@@ -2456,7 +2511,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2456 | } | 2511 | } |
2457 | 2512 | ||
2458 | /* bail out if we need to do a full commit */ | 2513 | /* bail out if we need to do a full commit */ |
2459 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2514 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
2515 | trans->transid) { | ||
2460 | ret = -EAGAIN; | 2516 | ret = -EAGAIN; |
2461 | btrfs_free_logged_extents(log, log_transid); | 2517 | btrfs_free_logged_extents(log, log_transid); |
2462 | mutex_unlock(&root->log_mutex); | 2518 | mutex_unlock(&root->log_mutex); |
@@ -2477,6 +2533,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2477 | blk_finish_plug(&plug); | 2533 | blk_finish_plug(&plug); |
2478 | btrfs_abort_transaction(trans, root, ret); | 2534 | btrfs_abort_transaction(trans, root, ret); |
2479 | btrfs_free_logged_extents(log, log_transid); | 2535 | btrfs_free_logged_extents(log, log_transid); |
2536 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2537 | trans->transid; | ||
2480 | mutex_unlock(&root->log_mutex); | 2538 | mutex_unlock(&root->log_mutex); |
2481 | goto out; | 2539 | goto out; |
2482 | } | 2540 | } |
@@ -2486,7 +2544,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2486 | root->log_transid++; | 2544 | root->log_transid++; |
2487 | log->log_transid = root->log_transid; | 2545 | log->log_transid = root->log_transid; |
2488 | root->log_start_pid = 0; | 2546 | root->log_start_pid = 0; |
2489 | smp_mb(); | ||
2490 | /* | 2547 | /* |
2491 | * IO has been started, blocks of the log tree have WRITTEN flag set | 2548 | * IO has been started, blocks of the log tree have WRITTEN flag set |
2492 | * in their headers. new modifications of the log will be written to | 2549 | * in their headers. new modifications of the log will be written to |
@@ -2494,9 +2551,16 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2494 | */ | 2551 | */ |
2495 | mutex_unlock(&root->log_mutex); | 2552 | mutex_unlock(&root->log_mutex); |
2496 | 2553 | ||
2554 | btrfs_init_log_ctx(&root_log_ctx); | ||
2555 | |||
2497 | mutex_lock(&log_root_tree->log_mutex); | 2556 | mutex_lock(&log_root_tree->log_mutex); |
2498 | atomic_inc(&log_root_tree->log_batch); | 2557 | atomic_inc(&log_root_tree->log_batch); |
2499 | atomic_inc(&log_root_tree->log_writers); | 2558 | atomic_inc(&log_root_tree->log_writers); |
2559 | |||
2560 | index2 = log_root_tree->log_transid % 2; | ||
2561 | list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]); | ||
2562 | root_log_ctx.log_transid = log_root_tree->log_transid; | ||
2563 | |||
2500 | mutex_unlock(&log_root_tree->log_mutex); | 2564 | mutex_unlock(&log_root_tree->log_mutex); |
2501 | 2565 | ||
2502 | ret = update_log_root(trans, log); | 2566 | ret = update_log_root(trans, log); |
@@ -2509,13 +2573,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2509 | } | 2573 | } |
2510 | 2574 | ||
2511 | if (ret) { | 2575 | if (ret) { |
2576 | if (!list_empty(&root_log_ctx.list)) | ||
2577 | list_del_init(&root_log_ctx.list); | ||
2578 | |||
2512 | blk_finish_plug(&plug); | 2579 | blk_finish_plug(&plug); |
2580 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2581 | trans->transid; | ||
2513 | if (ret != -ENOSPC) { | 2582 | if (ret != -ENOSPC) { |
2514 | btrfs_abort_transaction(trans, root, ret); | 2583 | btrfs_abort_transaction(trans, root, ret); |
2515 | mutex_unlock(&log_root_tree->log_mutex); | 2584 | mutex_unlock(&log_root_tree->log_mutex); |
2516 | goto out; | 2585 | goto out; |
2517 | } | 2586 | } |
2518 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2519 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2587 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2520 | btrfs_free_logged_extents(log, log_transid); | 2588 | btrfs_free_logged_extents(log, log_transid); |
2521 | mutex_unlock(&log_root_tree->log_mutex); | 2589 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2523,22 +2591,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2523 | goto out; | 2591 | goto out; |
2524 | } | 2592 | } |
2525 | 2593 | ||
2526 | index2 = log_root_tree->log_transid % 2; | 2594 | if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) { |
2595 | mutex_unlock(&log_root_tree->log_mutex); | ||
2596 | ret = root_log_ctx.log_ret; | ||
2597 | goto out; | ||
2598 | } | ||
2599 | |||
2600 | index2 = root_log_ctx.log_transid % 2; | ||
2527 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2601 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2528 | blk_finish_plug(&plug); | 2602 | blk_finish_plug(&plug); |
2529 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2603 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2530 | wait_log_commit(trans, log_root_tree, | 2604 | wait_log_commit(trans, log_root_tree, |
2531 | log_root_tree->log_transid); | 2605 | root_log_ctx.log_transid); |
2532 | btrfs_free_logged_extents(log, log_transid); | 2606 | btrfs_free_logged_extents(log, log_transid); |
2533 | mutex_unlock(&log_root_tree->log_mutex); | 2607 | mutex_unlock(&log_root_tree->log_mutex); |
2534 | ret = 0; | 2608 | ret = root_log_ctx.log_ret; |
2535 | goto out; | 2609 | goto out; |
2536 | } | 2610 | } |
2611 | ASSERT(root_log_ctx.log_transid == log_root_tree->log_transid); | ||
2537 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2612 | atomic_set(&log_root_tree->log_commit[index2], 1); |
2538 | 2613 | ||
2539 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { | 2614 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
2540 | wait_log_commit(trans, log_root_tree, | 2615 | wait_log_commit(trans, log_root_tree, |
2541 | log_root_tree->log_transid - 1); | 2616 | root_log_ctx.log_transid - 1); |
2542 | } | 2617 | } |
2543 | 2618 | ||
2544 | wait_for_writer(trans, log_root_tree); | 2619 | wait_for_writer(trans, log_root_tree); |
@@ -2547,7 +2622,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2547 | * now that we've moved on to the tree of log tree roots, | 2622 | * now that we've moved on to the tree of log tree roots, |
2548 | * check the full commit flag again | 2623 | * check the full commit flag again |
2549 | */ | 2624 | */ |
2550 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | 2625 | if (ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) == |
2626 | trans->transid) { | ||
2551 | blk_finish_plug(&plug); | 2627 | blk_finish_plug(&plug); |
2552 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2628 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
2553 | btrfs_free_logged_extents(log, log_transid); | 2629 | btrfs_free_logged_extents(log, log_transid); |
@@ -2561,6 +2637,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2561 | EXTENT_DIRTY | EXTENT_NEW); | 2637 | EXTENT_DIRTY | EXTENT_NEW); |
2562 | blk_finish_plug(&plug); | 2638 | blk_finish_plug(&plug); |
2563 | if (ret) { | 2639 | if (ret) { |
2640 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2641 | trans->transid; | ||
2564 | btrfs_abort_transaction(trans, root, ret); | 2642 | btrfs_abort_transaction(trans, root, ret); |
2565 | btrfs_free_logged_extents(log, log_transid); | 2643 | btrfs_free_logged_extents(log, log_transid); |
2566 | mutex_unlock(&log_root_tree->log_mutex); | 2644 | mutex_unlock(&log_root_tree->log_mutex); |
@@ -2578,8 +2656,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2578 | btrfs_header_level(log_root_tree->node)); | 2656 | btrfs_header_level(log_root_tree->node)); |
2579 | 2657 | ||
2580 | log_root_tree->log_transid++; | 2658 | log_root_tree->log_transid++; |
2581 | smp_mb(); | ||
2582 | |||
2583 | mutex_unlock(&log_root_tree->log_mutex); | 2659 | mutex_unlock(&log_root_tree->log_mutex); |
2584 | 2660 | ||
2585 | /* | 2661 | /* |
@@ -2591,6 +2667,8 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2591 | */ | 2667 | */ |
2592 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); | 2668 | ret = write_ctree_super(trans, root->fs_info->tree_root, 1); |
2593 | if (ret) { | 2669 | if (ret) { |
2670 | ACCESS_ONCE(root->fs_info->last_trans_log_full_commit) = | ||
2671 | trans->transid; | ||
2594 | btrfs_abort_transaction(trans, root, ret); | 2672 | btrfs_abort_transaction(trans, root, ret); |
2595 | goto out_wake_log_root; | 2673 | goto out_wake_log_root; |
2596 | } | 2674 | } |
@@ -2601,13 +2679,28 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2601 | mutex_unlock(&root->log_mutex); | 2679 | mutex_unlock(&root->log_mutex); |
2602 | 2680 | ||
2603 | out_wake_log_root: | 2681 | out_wake_log_root: |
2682 | /* | ||
2683 | * We needn't get log_mutex here because we are sure all | ||
2684 | * the other tasks are blocked. | ||
2685 | */ | ||
2686 | btrfs_remove_all_log_ctxs(log_root_tree, index2, ret); | ||
2687 | |||
2688 | mutex_lock(&log_root_tree->log_mutex); | ||
2689 | log_root_tree->log_transid_committed++; | ||
2604 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2690 | atomic_set(&log_root_tree->log_commit[index2], 0); |
2605 | smp_mb(); | 2691 | mutex_unlock(&log_root_tree->log_mutex); |
2692 | |||
2606 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2693 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
2607 | wake_up(&log_root_tree->log_commit_wait[index2]); | 2694 | wake_up(&log_root_tree->log_commit_wait[index2]); |
2608 | out: | 2695 | out: |
2696 | /* See above. */ | ||
2697 | btrfs_remove_all_log_ctxs(root, index1, ret); | ||
2698 | |||
2699 | mutex_lock(&root->log_mutex); | ||
2700 | root->log_transid_committed++; | ||
2609 | atomic_set(&root->log_commit[index1], 0); | 2701 | atomic_set(&root->log_commit[index1], 0); |
2610 | smp_mb(); | 2702 | mutex_unlock(&root->log_mutex); |
2703 | |||
2611 | if (waitqueue_active(&root->log_commit_wait[index1])) | 2704 | if (waitqueue_active(&root->log_commit_wait[index1])) |
2612 | wake_up(&root->log_commit_wait[index1]); | 2705 | wake_up(&root->log_commit_wait[index1]); |
2613 | return ret; | 2706 | return ret; |
@@ -3479,7 +3572,8 @@ static int extent_cmp(void *priv, struct list_head *a, struct list_head *b) | |||
3479 | 3572 | ||
3480 | static int log_one_extent(struct btrfs_trans_handle *trans, | 3573 | static int log_one_extent(struct btrfs_trans_handle *trans, |
3481 | struct inode *inode, struct btrfs_root *root, | 3574 | struct inode *inode, struct btrfs_root *root, |
3482 | struct extent_map *em, struct btrfs_path *path) | 3575 | struct extent_map *em, struct btrfs_path *path, |
3576 | struct list_head *logged_list) | ||
3483 | { | 3577 | { |
3484 | struct btrfs_root *log = root->log_root; | 3578 | struct btrfs_root *log = root->log_root; |
3485 | struct btrfs_file_extent_item *fi; | 3579 | struct btrfs_file_extent_item *fi; |
@@ -3495,7 +3589,6 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3495 | u64 extent_offset = em->start - em->orig_start; | 3589 | u64 extent_offset = em->start - em->orig_start; |
3496 | u64 block_len; | 3590 | u64 block_len; |
3497 | int ret; | 3591 | int ret; |
3498 | int index = log->log_transid % 2; | ||
3499 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | 3592 | bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; |
3500 | int extent_inserted = 0; | 3593 | int extent_inserted = 0; |
3501 | 3594 | ||
@@ -3579,17 +3672,12 @@ static int log_one_extent(struct btrfs_trans_handle *trans, | |||
3579 | * First check and see if our csums are on our outstanding ordered | 3672 | * First check and see if our csums are on our outstanding ordered |
3580 | * extents. | 3673 | * extents. |
3581 | */ | 3674 | */ |
3582 | again: | 3675 | list_for_each_entry(ordered, logged_list, log_list) { |
3583 | spin_lock_irq(&log->log_extents_lock[index]); | ||
3584 | list_for_each_entry(ordered, &log->logged_list[index], log_list) { | ||
3585 | struct btrfs_ordered_sum *sum; | 3676 | struct btrfs_ordered_sum *sum; |
3586 | 3677 | ||
3587 | if (!mod_len) | 3678 | if (!mod_len) |
3588 | break; | 3679 | break; |
3589 | 3680 | ||
3590 | if (ordered->inode != inode) | ||
3591 | continue; | ||
3592 | |||
3593 | if (ordered->file_offset + ordered->len <= mod_start || | 3681 | if (ordered->file_offset + ordered->len <= mod_start || |
3594 | mod_start + mod_len <= ordered->file_offset) | 3682 | mod_start + mod_len <= ordered->file_offset) |
3595 | continue; | 3683 | continue; |
@@ -3632,12 +3720,6 @@ again: | |||
3632 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, | 3720 | if (test_and_set_bit(BTRFS_ORDERED_LOGGED_CSUM, |
3633 | &ordered->flags)) | 3721 | &ordered->flags)) |
3634 | continue; | 3722 | continue; |
3635 | atomic_inc(&ordered->refs); | ||
3636 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3637 | /* | ||
3638 | * we've dropped the lock, we must either break or | ||
3639 | * start over after this. | ||
3640 | */ | ||
3641 | 3723 | ||
3642 | if (ordered->csum_bytes_left) { | 3724 | if (ordered->csum_bytes_left) { |
3643 | btrfs_start_ordered_extent(inode, ordered, 0); | 3725 | btrfs_start_ordered_extent(inode, ordered, 0); |
@@ -3647,16 +3729,11 @@ again: | |||
3647 | 3729 | ||
3648 | list_for_each_entry(sum, &ordered->list, list) { | 3730 | list_for_each_entry(sum, &ordered->list, list) { |
3649 | ret = btrfs_csum_file_blocks(trans, log, sum); | 3731 | ret = btrfs_csum_file_blocks(trans, log, sum); |
3650 | if (ret) { | 3732 | if (ret) |
3651 | btrfs_put_ordered_extent(ordered); | ||
3652 | goto unlocked; | 3733 | goto unlocked; |
3653 | } | ||
3654 | } | 3734 | } |
3655 | btrfs_put_ordered_extent(ordered); | ||
3656 | goto again; | ||
3657 | 3735 | ||
3658 | } | 3736 | } |
3659 | spin_unlock_irq(&log->log_extents_lock[index]); | ||
3660 | unlocked: | 3737 | unlocked: |
3661 | 3738 | ||
3662 | if (!mod_len || ret) | 3739 | if (!mod_len || ret) |
@@ -3694,7 +3771,8 @@ unlocked: | |||
3694 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, | 3771 | static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, |
3695 | struct btrfs_root *root, | 3772 | struct btrfs_root *root, |
3696 | struct inode *inode, | 3773 | struct inode *inode, |
3697 | struct btrfs_path *path) | 3774 | struct btrfs_path *path, |
3775 | struct list_head *logged_list) | ||
3698 | { | 3776 | { |
3699 | struct extent_map *em, *n; | 3777 | struct extent_map *em, *n; |
3700 | struct list_head extents; | 3778 | struct list_head extents; |
@@ -3752,7 +3830,7 @@ process: | |||
3752 | 3830 | ||
3753 | write_unlock(&tree->lock); | 3831 | write_unlock(&tree->lock); |
3754 | 3832 | ||
3755 | ret = log_one_extent(trans, inode, root, em, path); | 3833 | ret = log_one_extent(trans, inode, root, em, path, logged_list); |
3756 | write_lock(&tree->lock); | 3834 | write_lock(&tree->lock); |
3757 | clear_em_logging(tree, em); | 3835 | clear_em_logging(tree, em); |
3758 | free_extent_map(em); | 3836 | free_extent_map(em); |
@@ -3788,6 +3866,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3788 | struct btrfs_key max_key; | 3866 | struct btrfs_key max_key; |
3789 | struct btrfs_root *log = root->log_root; | 3867 | struct btrfs_root *log = root->log_root; |
3790 | struct extent_buffer *src = NULL; | 3868 | struct extent_buffer *src = NULL; |
3869 | LIST_HEAD(logged_list); | ||
3791 | u64 last_extent = 0; | 3870 | u64 last_extent = 0; |
3792 | int err = 0; | 3871 | int err = 0; |
3793 | int ret; | 3872 | int ret; |
@@ -3836,7 +3915,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
3836 | 3915 | ||
3837 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 3916 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
3838 | 3917 | ||
3839 | btrfs_get_logged_extents(log, inode); | 3918 | btrfs_get_logged_extents(inode, &logged_list); |
3840 | 3919 | ||
3841 | /* | 3920 | /* |
3842 | * a brute force approach to making sure we get the most uptodate | 3921 | * a brute force approach to making sure we get the most uptodate |
@@ -3962,7 +4041,8 @@ log_extents: | |||
3962 | btrfs_release_path(path); | 4041 | btrfs_release_path(path); |
3963 | btrfs_release_path(dst_path); | 4042 | btrfs_release_path(dst_path); |
3964 | if (fast_search) { | 4043 | if (fast_search) { |
3965 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path); | 4044 | ret = btrfs_log_changed_extents(trans, root, inode, dst_path, |
4045 | &logged_list); | ||
3966 | if (ret) { | 4046 | if (ret) { |
3967 | err = ret; | 4047 | err = ret; |
3968 | goto out_unlock; | 4048 | goto out_unlock; |
@@ -3987,8 +4067,10 @@ log_extents: | |||
3987 | BTRFS_I(inode)->logged_trans = trans->transid; | 4067 | BTRFS_I(inode)->logged_trans = trans->transid; |
3988 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; | 4068 | BTRFS_I(inode)->last_log_commit = BTRFS_I(inode)->last_sub_trans; |
3989 | out_unlock: | 4069 | out_unlock: |
3990 | if (err) | 4070 | if (unlikely(err)) |
3991 | btrfs_free_logged_extents(log, log->log_transid); | 4071 | btrfs_put_logged_extents(&logged_list); |
4072 | else | ||
4073 | btrfs_submit_logged_extents(&logged_list, log); | ||
3992 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 4074 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
3993 | 4075 | ||
3994 | btrfs_free_path(path); | 4076 | btrfs_free_path(path); |
@@ -4079,7 +4161,8 @@ out: | |||
4079 | */ | 4161 | */ |
4080 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | 4162 | static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
4081 | struct btrfs_root *root, struct inode *inode, | 4163 | struct btrfs_root *root, struct inode *inode, |
4082 | struct dentry *parent, int exists_only) | 4164 | struct dentry *parent, int exists_only, |
4165 | struct btrfs_log_ctx *ctx) | ||
4083 | { | 4166 | { |
4084 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; | 4167 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
4085 | struct super_block *sb; | 4168 | struct super_block *sb; |
@@ -4116,9 +4199,9 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
4116 | goto end_no_trans; | 4199 | goto end_no_trans; |
4117 | } | 4200 | } |
4118 | 4201 | ||
4119 | ret = start_log_trans(trans, root); | 4202 | ret = start_log_trans(trans, root, ctx); |
4120 | if (ret) | 4203 | if (ret) |
4121 | goto end_trans; | 4204 | goto end_no_trans; |
4122 | 4205 | ||
4123 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 4206 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
4124 | if (ret) | 4207 | if (ret) |
@@ -4166,6 +4249,9 @@ end_trans: | |||
4166 | root->fs_info->last_trans_log_full_commit = trans->transid; | 4249 | root->fs_info->last_trans_log_full_commit = trans->transid; |
4167 | ret = 1; | 4250 | ret = 1; |
4168 | } | 4251 | } |
4252 | |||
4253 | if (ret) | ||
4254 | btrfs_remove_log_ctx(root, ctx); | ||
4169 | btrfs_end_log_trans(root); | 4255 | btrfs_end_log_trans(root); |
4170 | end_no_trans: | 4256 | end_no_trans: |
4171 | return ret; | 4257 | return ret; |
@@ -4178,12 +4264,14 @@ end_no_trans: | |||
4178 | * data on disk. | 4264 | * data on disk. |
4179 | */ | 4265 | */ |
4180 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 4266 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
4181 | struct btrfs_root *root, struct dentry *dentry) | 4267 | struct btrfs_root *root, struct dentry *dentry, |
4268 | struct btrfs_log_ctx *ctx) | ||
4182 | { | 4269 | { |
4183 | struct dentry *parent = dget_parent(dentry); | 4270 | struct dentry *parent = dget_parent(dentry); |
4184 | int ret; | 4271 | int ret; |
4185 | 4272 | ||
4186 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0); | 4273 | ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, |
4274 | 0, ctx); | ||
4187 | dput(parent); | 4275 | dput(parent); |
4188 | 4276 | ||
4189 | return ret; | 4277 | return ret; |
@@ -4420,6 +4508,6 @@ int btrfs_log_new_name(struct btrfs_trans_handle *trans, | |||
4420 | root->fs_info->last_trans_committed)) | 4508 | root->fs_info->last_trans_committed)) |
4421 | return 0; | 4509 | return 0; |
4422 | 4510 | ||
4423 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | 4511 | return btrfs_log_inode_parent(trans, root, inode, parent, 1, NULL); |
4424 | } | 4512 | } |
4425 | 4513 | ||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 1d4ae0d15a70..91b145fce333 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -22,14 +22,28 @@ | |||
22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ | 22 | /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */ |
23 | #define BTRFS_NO_LOG_SYNC 256 | 23 | #define BTRFS_NO_LOG_SYNC 256 |
24 | 24 | ||
25 | struct btrfs_log_ctx { | ||
26 | int log_ret; | ||
27 | int log_transid; | ||
28 | struct list_head list; | ||
29 | }; | ||
30 | |||
31 | static inline void btrfs_init_log_ctx(struct btrfs_log_ctx *ctx) | ||
32 | { | ||
33 | ctx->log_ret = 0; | ||
34 | ctx->log_transid = 0; | ||
35 | INIT_LIST_HEAD(&ctx->list); | ||
36 | } | ||
37 | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 38 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 39 | struct btrfs_root *root, struct btrfs_log_ctx *ctx); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 40 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | 41 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, |
29 | struct btrfs_fs_info *fs_info); | 42 | struct btrfs_fs_info *fs_info); |
30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 43 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 44 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
32 | struct btrfs_root *root, struct dentry *dentry); | 45 | struct btrfs_root *root, struct dentry *dentry, |
46 | struct btrfs_log_ctx *ctx); | ||
33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 47 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | 48 | struct btrfs_root *root, |
35 | const char *name, int name_len, | 49 | const char *name, int name_len, |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bab0b84d8f80..d241130a32fd 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -415,7 +415,8 @@ loop_lock: | |||
415 | device->running_pending = 1; | 415 | device->running_pending = 1; |
416 | 416 | ||
417 | spin_unlock(&device->io_lock); | 417 | spin_unlock(&device->io_lock); |
418 | btrfs_requeue_work(&device->work); | 418 | btrfs_queue_work(fs_info->submit_workers, |
419 | &device->work); | ||
419 | goto done; | 420 | goto done; |
420 | } | 421 | } |
421 | /* unplug every 64 requests just for good measure */ | 422 | /* unplug every 64 requests just for good measure */ |
@@ -5263,6 +5264,7 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, | |||
5263 | static void btrfs_end_bio(struct bio *bio, int err) | 5264 | static void btrfs_end_bio(struct bio *bio, int err) |
5264 | { | 5265 | { |
5265 | struct btrfs_bio *bbio = bio->bi_private; | 5266 | struct btrfs_bio *bbio = bio->bi_private; |
5267 | struct btrfs_device *dev = bbio->stripes[0].dev; | ||
5266 | int is_orig_bio = 0; | 5268 | int is_orig_bio = 0; |
5267 | 5269 | ||
5268 | if (err) { | 5270 | if (err) { |
@@ -5270,7 +5272,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5270 | if (err == -EIO || err == -EREMOTEIO) { | 5272 | if (err == -EIO || err == -EREMOTEIO) { |
5271 | unsigned int stripe_index = | 5273 | unsigned int stripe_index = |
5272 | btrfs_io_bio(bio)->stripe_index; | 5274 | btrfs_io_bio(bio)->stripe_index; |
5273 | struct btrfs_device *dev; | ||
5274 | 5275 | ||
5275 | BUG_ON(stripe_index >= bbio->num_stripes); | 5276 | BUG_ON(stripe_index >= bbio->num_stripes); |
5276 | dev = bbio->stripes[stripe_index].dev; | 5277 | dev = bbio->stripes[stripe_index].dev; |
@@ -5292,6 +5293,8 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5292 | if (bio == bbio->orig_bio) | 5293 | if (bio == bbio->orig_bio) |
5293 | is_orig_bio = 1; | 5294 | is_orig_bio = 1; |
5294 | 5295 | ||
5296 | btrfs_bio_counter_dec(bbio->fs_info); | ||
5297 | |||
5295 | if (atomic_dec_and_test(&bbio->stripes_pending)) { | 5298 | if (atomic_dec_and_test(&bbio->stripes_pending)) { |
5296 | if (!is_orig_bio) { | 5299 | if (!is_orig_bio) { |
5297 | bio_put(bio); | 5300 | bio_put(bio); |
@@ -5328,13 +5331,6 @@ static void btrfs_end_bio(struct bio *bio, int err) | |||
5328 | } | 5331 | } |
5329 | } | 5332 | } |
5330 | 5333 | ||
5331 | struct async_sched { | ||
5332 | struct bio *bio; | ||
5333 | int rw; | ||
5334 | struct btrfs_fs_info *info; | ||
5335 | struct btrfs_work work; | ||
5336 | }; | ||
5337 | |||
5338 | /* | 5334 | /* |
5339 | * see run_scheduled_bios for a description of why bios are collected for | 5335 | * see run_scheduled_bios for a description of why bios are collected for |
5340 | * async submit. | 5336 | * async submit. |
@@ -5391,8 +5387,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, | |||
5391 | spin_unlock(&device->io_lock); | 5387 | spin_unlock(&device->io_lock); |
5392 | 5388 | ||
5393 | if (should_queue) | 5389 | if (should_queue) |
5394 | btrfs_queue_worker(&root->fs_info->submit_workers, | 5390 | btrfs_queue_work(root->fs_info->submit_workers, |
5395 | &device->work); | 5391 | &device->work); |
5396 | } | 5392 | } |
5397 | 5393 | ||
5398 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, | 5394 | static int bio_size_ok(struct block_device *bdev, struct bio *bio, |
@@ -5447,6 +5443,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, | |||
5447 | } | 5443 | } |
5448 | #endif | 5444 | #endif |
5449 | bio->bi_bdev = dev->bdev; | 5445 | bio->bi_bdev = dev->bdev; |
5446 | |||
5447 | btrfs_bio_counter_inc_noblocked(root->fs_info); | ||
5448 | |||
5450 | if (async) | 5449 | if (async) |
5451 | btrfs_schedule_bio(root, dev, rw, bio); | 5450 | btrfs_schedule_bio(root, dev, rw, bio); |
5452 | else | 5451 | else |
@@ -5515,28 +5514,38 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5515 | length = bio->bi_iter.bi_size; | 5514 | length = bio->bi_iter.bi_size; |
5516 | map_length = length; | 5515 | map_length = length; |
5517 | 5516 | ||
5517 | btrfs_bio_counter_inc_blocked(root->fs_info); | ||
5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, | 5518 | ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, |
5519 | mirror_num, &raid_map); | 5519 | mirror_num, &raid_map); |
5520 | if (ret) /* -ENOMEM */ | 5520 | if (ret) { |
5521 | btrfs_bio_counter_dec(root->fs_info); | ||
5521 | return ret; | 5522 | return ret; |
5523 | } | ||
5522 | 5524 | ||
5523 | total_devs = bbio->num_stripes; | 5525 | total_devs = bbio->num_stripes; |
5524 | bbio->orig_bio = first_bio; | 5526 | bbio->orig_bio = first_bio; |
5525 | bbio->private = first_bio->bi_private; | 5527 | bbio->private = first_bio->bi_private; |
5526 | bbio->end_io = first_bio->bi_end_io; | 5528 | bbio->end_io = first_bio->bi_end_io; |
5529 | bbio->fs_info = root->fs_info; | ||
5527 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); | 5530 | atomic_set(&bbio->stripes_pending, bbio->num_stripes); |
5528 | 5531 | ||
5529 | if (raid_map) { | 5532 | if (raid_map) { |
5530 | /* In this case, map_length has been set to the length of | 5533 | /* In this case, map_length has been set to the length of |
5531 | a single stripe; not the whole write */ | 5534 | a single stripe; not the whole write */ |
5532 | if (rw & WRITE) { | 5535 | if (rw & WRITE) { |
5533 | return raid56_parity_write(root, bio, bbio, | 5536 | ret = raid56_parity_write(root, bio, bbio, |
5534 | raid_map, map_length); | 5537 | raid_map, map_length); |
5535 | } else { | 5538 | } else { |
5536 | return raid56_parity_recover(root, bio, bbio, | 5539 | ret = raid56_parity_recover(root, bio, bbio, |
5537 | raid_map, map_length, | 5540 | raid_map, map_length, |
5538 | mirror_num); | 5541 | mirror_num); |
5539 | } | 5542 | } |
5543 | /* | ||
5544 | * FIXME, replace dosen't support raid56 yet, please fix | ||
5545 | * it in the future. | ||
5546 | */ | ||
5547 | btrfs_bio_counter_dec(root->fs_info); | ||
5548 | return ret; | ||
5540 | } | 5549 | } |
5541 | 5550 | ||
5542 | if (map_length < length) { | 5551 | if (map_length < length) { |
@@ -5578,6 +5587,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, | |||
5578 | async_submit); | 5587 | async_submit); |
5579 | dev_nr++; | 5588 | dev_nr++; |
5580 | } | 5589 | } |
5590 | btrfs_bio_counter_dec(root->fs_info); | ||
5581 | return 0; | 5591 | return 0; |
5582 | } | 5592 | } |
5583 | 5593 | ||
@@ -5666,7 +5676,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, | |||
5666 | else | 5676 | else |
5667 | generate_random_uuid(dev->uuid); | 5677 | generate_random_uuid(dev->uuid); |
5668 | 5678 | ||
5669 | dev->work.func = pending_bios_fn; | 5679 | btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); |
5670 | 5680 | ||
5671 | return dev; | 5681 | return dev; |
5672 | } | 5682 | } |
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 8b3cd142b373..80754f9dd3df 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h | |||
@@ -192,6 +192,7 @@ typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); | |||
192 | 192 | ||
193 | struct btrfs_bio { | 193 | struct btrfs_bio { |
194 | atomic_t stripes_pending; | 194 | atomic_t stripes_pending; |
195 | struct btrfs_fs_info *fs_info; | ||
195 | bio_end_io_t *end_io; | 196 | bio_end_io_t *end_io; |
196 | struct bio *orig_bio; | 197 | struct bio *orig_bio; |
197 | void *private; | 198 | void *private; |
diff --git a/fs/buffer.c b/fs/buffer.c index 27265a8b43c1..8c53a2b15ecb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
@@ -3088,7 +3088,7 @@ EXPORT_SYMBOL(submit_bh); | |||
3088 | * until the buffer gets unlocked). | 3088 | * until the buffer gets unlocked). |
3089 | * | 3089 | * |
3090 | * ll_rw_block sets b_end_io to simple completion handler that marks | 3090 | * ll_rw_block sets b_end_io to simple completion handler that marks |
3091 | * the buffer up-to-date (if approriate), unlocks the buffer and wakes | 3091 | * the buffer up-to-date (if appropriate), unlocks the buffer and wakes |
3092 | * any waiters. | 3092 | * any waiters. |
3093 | * | 3093 | * |
3094 | * All of the buffers must be for the same device, and must also be a | 3094 | * All of the buffers must be for the same device, and must also be a |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index ca65f39dc8dc..6494d9f673aa 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
@@ -391,12 +391,12 @@ try_again: | |||
391 | path.dentry = dir; | 391 | path.dentry = dir; |
392 | path_to_graveyard.mnt = cache->mnt; | 392 | path_to_graveyard.mnt = cache->mnt; |
393 | path_to_graveyard.dentry = cache->graveyard; | 393 | path_to_graveyard.dentry = cache->graveyard; |
394 | ret = security_path_rename(&path, rep, &path_to_graveyard, grave); | 394 | ret = security_path_rename(&path, rep, &path_to_graveyard, grave, 0); |
395 | if (ret < 0) { | 395 | if (ret < 0) { |
396 | cachefiles_io_error(cache, "Rename security error %d", ret); | 396 | cachefiles_io_error(cache, "Rename security error %d", ret); |
397 | } else { | 397 | } else { |
398 | ret = vfs_rename(dir->d_inode, rep, | 398 | ret = vfs_rename(dir->d_inode, rep, |
399 | cache->graveyard->d_inode, grave, NULL); | 399 | cache->graveyard->d_inode, grave, NULL, 0); |
400 | if (ret != 0 && ret != -ENOMEM) | 400 | if (ret != 0 && ret != -ENOMEM) |
401 | cachefiles_io_error(cache, | 401 | cachefiles_io_error(cache, |
402 | "Rename failed with error %d", ret); | 402 | "Rename failed with error %d", ret); |
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index ebaff368120d..4b1fb5ca65b8 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c | |||
@@ -265,24 +265,22 @@ static int cachefiles_read_backing_file_one(struct cachefiles_object *object, | |||
265 | goto nomem_monitor; | 265 | goto nomem_monitor; |
266 | } | 266 | } |
267 | 267 | ||
268 | ret = add_to_page_cache(newpage, bmapping, | 268 | ret = add_to_page_cache_lru(newpage, bmapping, |
269 | netpage->index, cachefiles_gfp); | 269 | netpage->index, cachefiles_gfp); |
270 | if (ret == 0) | 270 | if (ret == 0) |
271 | goto installed_new_backing_page; | 271 | goto installed_new_backing_page; |
272 | if (ret != -EEXIST) | 272 | if (ret != -EEXIST) |
273 | goto nomem_page; | 273 | goto nomem_page; |
274 | } | 274 | } |
275 | 275 | ||
276 | /* we've installed a new backing page, so now we need to add it | 276 | /* we've installed a new backing page, so now we need to start |
277 | * to the LRU list and start it reading */ | 277 | * it reading */ |
278 | installed_new_backing_page: | 278 | installed_new_backing_page: |
279 | _debug("- new %p", newpage); | 279 | _debug("- new %p", newpage); |
280 | 280 | ||
281 | backpage = newpage; | 281 | backpage = newpage; |
282 | newpage = NULL; | 282 | newpage = NULL; |
283 | 283 | ||
284 | lru_cache_add_file(backpage); | ||
285 | |||
286 | read_backing_page: | 284 | read_backing_page: |
287 | ret = bmapping->a_ops->readpage(NULL, backpage); | 285 | ret = bmapping->a_ops->readpage(NULL, backpage); |
288 | if (ret < 0) | 286 | if (ret < 0) |
@@ -510,24 +508,23 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
510 | goto nomem; | 508 | goto nomem; |
511 | } | 509 | } |
512 | 510 | ||
513 | ret = add_to_page_cache(newpage, bmapping, | 511 | ret = add_to_page_cache_lru(newpage, bmapping, |
514 | netpage->index, cachefiles_gfp); | 512 | netpage->index, |
513 | cachefiles_gfp); | ||
515 | if (ret == 0) | 514 | if (ret == 0) |
516 | goto installed_new_backing_page; | 515 | goto installed_new_backing_page; |
517 | if (ret != -EEXIST) | 516 | if (ret != -EEXIST) |
518 | goto nomem; | 517 | goto nomem; |
519 | } | 518 | } |
520 | 519 | ||
521 | /* we've installed a new backing page, so now we need to add it | 520 | /* we've installed a new backing page, so now we need |
522 | * to the LRU list and start it reading */ | 521 | * to start it reading */ |
523 | installed_new_backing_page: | 522 | installed_new_backing_page: |
524 | _debug("- new %p", newpage); | 523 | _debug("- new %p", newpage); |
525 | 524 | ||
526 | backpage = newpage; | 525 | backpage = newpage; |
527 | newpage = NULL; | 526 | newpage = NULL; |
528 | 527 | ||
529 | lru_cache_add_file(backpage); | ||
530 | |||
531 | reread_backing_page: | 528 | reread_backing_page: |
532 | ret = bmapping->a_ops->readpage(NULL, backpage); | 529 | ret = bmapping->a_ops->readpage(NULL, backpage); |
533 | if (ret < 0) | 530 | if (ret < 0) |
@@ -538,8 +535,8 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
538 | monitor_backing_page: | 535 | monitor_backing_page: |
539 | _debug("- monitor add"); | 536 | _debug("- monitor add"); |
540 | 537 | ||
541 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | 538 | ret = add_to_page_cache_lru(netpage, op->mapping, |
542 | cachefiles_gfp); | 539 | netpage->index, cachefiles_gfp); |
543 | if (ret < 0) { | 540 | if (ret < 0) { |
544 | if (ret == -EEXIST) { | 541 | if (ret == -EEXIST) { |
545 | page_cache_release(netpage); | 542 | page_cache_release(netpage); |
@@ -549,8 +546,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
549 | goto nomem; | 546 | goto nomem; |
550 | } | 547 | } |
551 | 548 | ||
552 | lru_cache_add_file(netpage); | ||
553 | |||
554 | /* install a monitor */ | 549 | /* install a monitor */ |
555 | page_cache_get(netpage); | 550 | page_cache_get(netpage); |
556 | monitor->netfs_page = netpage; | 551 | monitor->netfs_page = netpage; |
@@ -613,8 +608,8 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
613 | backing_page_already_uptodate: | 608 | backing_page_already_uptodate: |
614 | _debug("- uptodate"); | 609 | _debug("- uptodate"); |
615 | 610 | ||
616 | ret = add_to_page_cache(netpage, op->mapping, netpage->index, | 611 | ret = add_to_page_cache_lru(netpage, op->mapping, |
617 | cachefiles_gfp); | 612 | netpage->index, cachefiles_gfp); |
618 | if (ret < 0) { | 613 | if (ret < 0) { |
619 | if (ret == -EEXIST) { | 614 | if (ret == -EEXIST) { |
620 | page_cache_release(netpage); | 615 | page_cache_release(netpage); |
@@ -631,8 +626,6 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object, | |||
631 | 626 | ||
632 | fscache_mark_page_cached(op, netpage); | 627 | fscache_mark_page_cached(op, netpage); |
633 | 628 | ||
634 | lru_cache_add_file(netpage); | ||
635 | |||
636 | /* the netpage is unlocked and marked up to date here */ | 629 | /* the netpage is unlocked and marked up to date here */ |
637 | fscache_end_io(op, netpage, 0); | 630 | fscache_end_io(op, netpage, 0); |
638 | page_cache_release(netpage); | 631 | page_cache_release(netpage); |
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 8c44fdd4e1c3..834f9f3723fb 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c | |||
@@ -205,6 +205,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, | |||
205 | ci->fscache = fscache_acquire_cookie(fsc->fscache, | 205 | ci->fscache = fscache_acquire_cookie(fsc->fscache, |
206 | &ceph_fscache_inode_object_def, | 206 | &ceph_fscache_inode_object_def, |
207 | ci, true); | 207 | ci, true); |
208 | fscache_check_consistency(ci->fscache); | ||
208 | done: | 209 | done: |
209 | mutex_unlock(&inode->i_mutex); | 210 | mutex_unlock(&inode->i_mutex); |
210 | 211 | ||
diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index da95f61b7a09..5ac591bd012b 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h | |||
@@ -48,6 +48,12 @@ void ceph_readpage_to_fscache(struct inode *inode, struct page *page); | |||
48 | void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); | 48 | void ceph_invalidate_fscache_page(struct inode* inode, struct page *page); |
49 | void ceph_queue_revalidate(struct inode *inode); | 49 | void ceph_queue_revalidate(struct inode *inode); |
50 | 50 | ||
51 | static inline void ceph_fscache_update_objectsize(struct inode *inode) | ||
52 | { | ||
53 | struct ceph_inode_info *ci = ceph_inode(inode); | ||
54 | fscache_attr_changed(ci->fscache); | ||
55 | } | ||
56 | |||
51 | static inline void ceph_fscache_invalidate(struct inode *inode) | 57 | static inline void ceph_fscache_invalidate(struct inode *inode) |
52 | { | 58 | { |
53 | fscache_invalidate(ceph_inode(inode)->fscache); | 59 | fscache_invalidate(ceph_inode(inode)->fscache); |
@@ -135,6 +141,10 @@ static inline void ceph_readpage_to_fscache(struct inode *inode, | |||
135 | { | 141 | { |
136 | } | 142 | } |
137 | 143 | ||
144 | static inline void ceph_fscache_update_objectsize(struct inode *inode) | ||
145 | { | ||
146 | } | ||
147 | |||
138 | static inline void ceph_fscache_invalidate(struct inode *inode) | 148 | static inline void ceph_fscache_invalidate(struct inode *inode) |
139 | { | 149 | { |
140 | } | 150 | } |
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 17543383545c..2e5e648eb5c3 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c | |||
@@ -622,8 +622,10 @@ retry: | |||
622 | 622 | ||
623 | if (flags & CEPH_CAP_FLAG_AUTH) { | 623 | if (flags & CEPH_CAP_FLAG_AUTH) { |
624 | if (ci->i_auth_cap == NULL || | 624 | if (ci->i_auth_cap == NULL || |
625 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) | 625 | ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) { |
626 | ci->i_auth_cap = cap; | 626 | ci->i_auth_cap = cap; |
627 | cap->mds_wanted = wanted; | ||
628 | } | ||
627 | ci->i_cap_exporting_issued = 0; | 629 | ci->i_cap_exporting_issued = 0; |
628 | } else { | 630 | } else { |
629 | WARN_ON(ci->i_auth_cap == cap); | 631 | WARN_ON(ci->i_auth_cap == cap); |
@@ -885,7 +887,10 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) | |||
885 | cap = rb_entry(p, struct ceph_cap, ci_node); | 887 | cap = rb_entry(p, struct ceph_cap, ci_node); |
886 | if (!__cap_is_valid(cap)) | 888 | if (!__cap_is_valid(cap)) |
887 | continue; | 889 | continue; |
888 | mds_wanted |= cap->mds_wanted; | 890 | if (cap == ci->i_auth_cap) |
891 | mds_wanted |= cap->mds_wanted; | ||
892 | else | ||
893 | mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR); | ||
889 | } | 894 | } |
890 | return mds_wanted; | 895 | return mds_wanted; |
891 | } | 896 | } |
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 6d59006bfa27..16b54aa31f08 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c | |||
@@ -93,6 +93,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
93 | } else if (req->r_path1) { | 93 | } else if (req->r_path1) { |
94 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, | 94 | seq_printf(s, " #%llx/%s", req->r_ino1.ino, |
95 | req->r_path1); | 95 | req->r_path1); |
96 | } else { | ||
97 | seq_printf(s, " #%llx", req->r_ino1.ino); | ||
96 | } | 98 | } |
97 | 99 | ||
98 | if (req->r_old_dentry) { | 100 | if (req->r_old_dentry) { |
@@ -102,7 +104,8 @@ static int mdsc_show(struct seq_file *s, void *p) | |||
102 | path = NULL; | 104 | path = NULL; |
103 | spin_lock(&req->r_old_dentry->d_lock); | 105 | spin_lock(&req->r_old_dentry->d_lock); |
104 | seq_printf(s, " #%llx/%.*s (%s)", | 106 | seq_printf(s, " #%llx/%.*s (%s)", |
105 | ceph_ino(req->r_old_dentry_dir), | 107 | req->r_old_dentry_dir ? |
108 | ceph_ino(req->r_old_dentry_dir) : 0, | ||
106 | req->r_old_dentry->d_name.len, | 109 | req->r_old_dentry->d_name.len, |
107 | req->r_old_dentry->d_name.name, | 110 | req->r_old_dentry->d_name.name, |
108 | path ? path : ""); | 111 | path ? path : ""); |
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 45eda6d7a40c..766410a12c2c 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
@@ -119,7 +119,8 @@ static int fpos_cmp(loff_t l, loff_t r) | |||
119 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by | 119 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by |
120 | * the MDS if/when the directory is modified). | 120 | * the MDS if/when the directory is modified). |
121 | */ | 121 | */ |
122 | static int __dcache_readdir(struct file *file, struct dir_context *ctx) | 122 | static int __dcache_readdir(struct file *file, struct dir_context *ctx, |
123 | u32 shared_gen) | ||
123 | { | 124 | { |
124 | struct ceph_file_info *fi = file->private_data; | 125 | struct ceph_file_info *fi = file->private_data; |
125 | struct dentry *parent = file->f_dentry; | 126 | struct dentry *parent = file->f_dentry; |
@@ -133,8 +134,8 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx) | |||
133 | last = fi->dentry; | 134 | last = fi->dentry; |
134 | fi->dentry = NULL; | 135 | fi->dentry = NULL; |
135 | 136 | ||
136 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, | 137 | dout("__dcache_readdir %p v%u at %llu (last %p)\n", |
137 | last); | 138 | dir, shared_gen, ctx->pos, last); |
138 | 139 | ||
139 | spin_lock(&parent->d_lock); | 140 | spin_lock(&parent->d_lock); |
140 | 141 | ||
@@ -161,7 +162,8 @@ more: | |||
161 | goto out_unlock; | 162 | goto out_unlock; |
162 | } | 163 | } |
163 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); | 164 | spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); |
164 | if (!d_unhashed(dentry) && dentry->d_inode && | 165 | if (di->lease_shared_gen == shared_gen && |
166 | !d_unhashed(dentry) && dentry->d_inode && | ||
165 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && | 167 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && |
166 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && | 168 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && |
167 | fpos_cmp(ctx->pos, di->offset) <= 0) | 169 | fpos_cmp(ctx->pos, di->offset) <= 0) |
@@ -190,7 +192,7 @@ more: | |||
190 | if (last) { | 192 | if (last) { |
191 | /* remember our position */ | 193 | /* remember our position */ |
192 | fi->dentry = last; | 194 | fi->dentry = last; |
193 | fi->next_offset = di->offset; | 195 | fi->next_offset = fpos_off(di->offset); |
194 | } | 196 | } |
195 | dput(dentry); | 197 | dput(dentry); |
196 | return 0; | 198 | return 0; |
@@ -252,8 +254,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
252 | int err; | 254 | int err; |
253 | u32 ftype; | 255 | u32 ftype; |
254 | struct ceph_mds_reply_info_parsed *rinfo; | 256 | struct ceph_mds_reply_info_parsed *rinfo; |
255 | const int max_entries = fsc->mount_options->max_readdir; | ||
256 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | ||
257 | 257 | ||
258 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); | 258 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); |
259 | if (fi->flags & CEPH_F_ATEND) | 259 | if (fi->flags & CEPH_F_ATEND) |
@@ -291,8 +291,9 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx) | |||
291 | ceph_snap(inode) != CEPH_SNAPDIR && | 291 | ceph_snap(inode) != CEPH_SNAPDIR && |
292 | __ceph_dir_is_complete(ci) && | 292 | __ceph_dir_is_complete(ci) && |
293 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 293 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
294 | u32 shared_gen = ci->i_shared_gen; | ||
294 | spin_unlock(&ci->i_ceph_lock); | 295 | spin_unlock(&ci->i_ceph_lock); |
295 | err = __dcache_readdir(file, ctx); | 296 | err = __dcache_readdir(file, ctx, shared_gen); |
296 | if (err != -EAGAIN) | 297 | if (err != -EAGAIN) |
297 | return err; | 298 | return err; |
298 | } else { | 299 | } else { |
@@ -322,14 +323,16 @@ more: | |||
322 | fi->last_readdir = NULL; | 323 | fi->last_readdir = NULL; |
323 | } | 324 | } |
324 | 325 | ||
325 | /* requery frag tree, as the frag topology may have changed */ | ||
326 | frag = ceph_choose_frag(ceph_inode(inode), frag, NULL, NULL); | ||
327 | |||
328 | dout("readdir fetching %llx.%llx frag %x offset '%s'\n", | 326 | dout("readdir fetching %llx.%llx frag %x offset '%s'\n", |
329 | ceph_vinop(inode), frag, fi->last_name); | 327 | ceph_vinop(inode), frag, fi->last_name); |
330 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); | 328 | req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); |
331 | if (IS_ERR(req)) | 329 | if (IS_ERR(req)) |
332 | return PTR_ERR(req); | 330 | return PTR_ERR(req); |
331 | err = ceph_alloc_readdir_reply_buffer(req, inode); | ||
332 | if (err) { | ||
333 | ceph_mdsc_put_request(req); | ||
334 | return err; | ||
335 | } | ||
333 | req->r_inode = inode; | 336 | req->r_inode = inode; |
334 | ihold(inode); | 337 | ihold(inode); |
335 | req->r_dentry = dget(file->f_dentry); | 338 | req->r_dentry = dget(file->f_dentry); |
@@ -340,9 +343,6 @@ more: | |||
340 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); | 343 | req->r_path2 = kstrdup(fi->last_name, GFP_NOFS); |
341 | req->r_readdir_offset = fi->next_offset; | 344 | req->r_readdir_offset = fi->next_offset; |
342 | req->r_args.readdir.frag = cpu_to_le32(frag); | 345 | req->r_args.readdir.frag = cpu_to_le32(frag); |
343 | req->r_args.readdir.max_entries = cpu_to_le32(max_entries); | ||
344 | req->r_args.readdir.max_bytes = cpu_to_le32(max_bytes); | ||
345 | req->r_num_caps = max_entries + 1; | ||
346 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 346 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
347 | if (err < 0) { | 347 | if (err < 0) { |
348 | ceph_mdsc_put_request(req); | 348 | ceph_mdsc_put_request(req); |
@@ -369,9 +369,9 @@ more: | |||
369 | fi->next_offset = 0; | 369 | fi->next_offset = 0; |
370 | off = fi->next_offset; | 370 | off = fi->next_offset; |
371 | } | 371 | } |
372 | fi->frag = frag; | ||
372 | fi->offset = fi->next_offset; | 373 | fi->offset = fi->next_offset; |
373 | fi->last_readdir = req; | 374 | fi->last_readdir = req; |
374 | fi->frag = frag; | ||
375 | 375 | ||
376 | if (req->r_reply_info.dir_end) { | 376 | if (req->r_reply_info.dir_end) { |
377 | kfree(fi->last_name); | 377 | kfree(fi->last_name); |
@@ -454,7 +454,7 @@ more: | |||
454 | return 0; | 454 | return 0; |
455 | } | 455 | } |
456 | 456 | ||
457 | static void reset_readdir(struct ceph_file_info *fi) | 457 | static void reset_readdir(struct ceph_file_info *fi, unsigned frag) |
458 | { | 458 | { |
459 | if (fi->last_readdir) { | 459 | if (fi->last_readdir) { |
460 | ceph_mdsc_put_request(fi->last_readdir); | 460 | ceph_mdsc_put_request(fi->last_readdir); |
@@ -462,7 +462,10 @@ static void reset_readdir(struct ceph_file_info *fi) | |||
462 | } | 462 | } |
463 | kfree(fi->last_name); | 463 | kfree(fi->last_name); |
464 | fi->last_name = NULL; | 464 | fi->last_name = NULL; |
465 | fi->next_offset = 2; /* compensate for . and .. */ | 465 | if (ceph_frag_is_leftmost(frag)) |
466 | fi->next_offset = 2; /* compensate for . and .. */ | ||
467 | else | ||
468 | fi->next_offset = 0; | ||
466 | if (fi->dentry) { | 469 | if (fi->dentry) { |
467 | dput(fi->dentry); | 470 | dput(fi->dentry); |
468 | fi->dentry = NULL; | 471 | fi->dentry = NULL; |
@@ -474,7 +477,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
474 | { | 477 | { |
475 | struct ceph_file_info *fi = file->private_data; | 478 | struct ceph_file_info *fi = file->private_data; |
476 | struct inode *inode = file->f_mapping->host; | 479 | struct inode *inode = file->f_mapping->host; |
477 | loff_t old_offset = offset; | 480 | loff_t old_offset = ceph_make_fpos(fi->frag, fi->next_offset); |
478 | loff_t retval; | 481 | loff_t retval; |
479 | 482 | ||
480 | mutex_lock(&inode->i_mutex); | 483 | mutex_lock(&inode->i_mutex); |
@@ -491,7 +494,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
491 | goto out; | 494 | goto out; |
492 | } | 495 | } |
493 | 496 | ||
494 | if (offset >= 0 && offset <= inode->i_sb->s_maxbytes) { | 497 | if (offset >= 0) { |
495 | if (offset != file->f_pos) { | 498 | if (offset != file->f_pos) { |
496 | file->f_pos = offset; | 499 | file->f_pos = offset; |
497 | file->f_version = 0; | 500 | file->f_version = 0; |
@@ -504,14 +507,14 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int whence) | |||
504 | * seek to new frag, or seek prior to current chunk. | 507 | * seek to new frag, or seek prior to current chunk. |
505 | */ | 508 | */ |
506 | if (offset == 0 || | 509 | if (offset == 0 || |
507 | fpos_frag(offset) != fpos_frag(old_offset) || | 510 | fpos_frag(offset) != fi->frag || |
508 | fpos_off(offset) < fi->offset) { | 511 | fpos_off(offset) < fi->offset) { |
509 | dout("dir_llseek dropping %p content\n", file); | 512 | dout("dir_llseek dropping %p content\n", file); |
510 | reset_readdir(fi); | 513 | reset_readdir(fi, fpos_frag(offset)); |
511 | } | 514 | } |
512 | 515 | ||
513 | /* bump dir_release_count if we did a forward seek */ | 516 | /* bump dir_release_count if we did a forward seek */ |
514 | if (offset > old_offset) | 517 | if (fpos_cmp(offset, old_offset) > 0) |
515 | fi->dir_release_count--; | 518 | fi->dir_release_count--; |
516 | } | 519 | } |
517 | out: | 520 | out: |
@@ -812,8 +815,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, | |||
812 | } | 815 | } |
813 | req->r_dentry = dget(dentry); | 816 | req->r_dentry = dget(dentry); |
814 | req->r_num_caps = 2; | 817 | req->r_num_caps = 2; |
815 | req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ | 818 | req->r_old_dentry = dget(old_dentry); |
816 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | ||
817 | req->r_locked_dir = dir; | 819 | req->r_locked_dir = dir; |
818 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; | 820 | req->r_dentry_drop = CEPH_CAP_FILE_SHARED; |
819 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; | 821 | req->r_dentry_unless = CEPH_CAP_FILE_EXCL; |
@@ -911,10 +913,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
911 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); | 913 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RENAME, USE_AUTH_MDS); |
912 | if (IS_ERR(req)) | 914 | if (IS_ERR(req)) |
913 | return PTR_ERR(req); | 915 | return PTR_ERR(req); |
916 | ihold(old_dir); | ||
914 | req->r_dentry = dget(new_dentry); | 917 | req->r_dentry = dget(new_dentry); |
915 | req->r_num_caps = 2; | 918 | req->r_num_caps = 2; |
916 | req->r_old_dentry = dget(old_dentry); | 919 | req->r_old_dentry = dget(old_dentry); |
917 | req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); | 920 | req->r_old_dentry_dir = old_dir; |
918 | req->r_locked_dir = new_dir; | 921 | req->r_locked_dir = new_dir; |
919 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; | 922 | req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; |
920 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; | 923 | req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; |
diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 16796be53ca5..00d6af6a32ec 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c | |||
@@ -8,23 +8,6 @@ | |||
8 | #include "mds_client.h" | 8 | #include "mds_client.h" |
9 | 9 | ||
10 | /* | 10 | /* |
11 | * NFS export support | ||
12 | * | ||
13 | * NFS re-export of a ceph mount is, at present, only semireliable. | ||
14 | * The basic issue is that the Ceph architectures doesn't lend itself | ||
15 | * well to generating filehandles that will remain valid forever. | ||
16 | * | ||
17 | * So, we do our best. If you're lucky, your inode will be in the | ||
18 | * client's cache. If it's not, and you have a connectable fh, then | ||
19 | * the MDS server may be able to find it for you. Otherwise, you get | ||
20 | * ESTALE. | ||
21 | * | ||
22 | * There are ways to this more reliable, but in the non-connectable fh | ||
23 | * case, we won't every work perfectly, and in the connectable case, | ||
24 | * some changes are needed on the MDS side to work better. | ||
25 | */ | ||
26 | |||
27 | /* | ||
28 | * Basic fh | 11 | * Basic fh |
29 | */ | 12 | */ |
30 | struct ceph_nfs_fh { | 13 | struct ceph_nfs_fh { |
@@ -32,22 +15,12 @@ struct ceph_nfs_fh { | |||
32 | } __attribute__ ((packed)); | 15 | } __attribute__ ((packed)); |
33 | 16 | ||
34 | /* | 17 | /* |
35 | * Larger 'connectable' fh that includes parent ino and name hash. | 18 | * Larger fh that includes parent ino. |
36 | * Use this whenever possible, as it works more reliably. | ||
37 | */ | 19 | */ |
38 | struct ceph_nfs_confh { | 20 | struct ceph_nfs_confh { |
39 | u64 ino, parent_ino; | 21 | u64 ino, parent_ino; |
40 | u32 parent_name_hash; | ||
41 | } __attribute__ ((packed)); | 22 | } __attribute__ ((packed)); |
42 | 23 | ||
43 | /* | ||
44 | * The presence of @parent_inode here tells us whether NFS wants a | ||
45 | * connectable file handle. However, we want to make a connectionable | ||
46 | * file handle unconditionally so that the MDS gets as much of a hint | ||
47 | * as possible. That means we only use @parent_dentry to indicate | ||
48 | * whether nfsd wants a connectable fh, and whether we should indicate | ||
49 | * failure from a too-small @max_len. | ||
50 | */ | ||
51 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | 24 | static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, |
52 | struct inode *parent_inode) | 25 | struct inode *parent_inode) |
53 | { | 26 | { |
@@ -56,54 +29,36 @@ static int ceph_encode_fh(struct inode *inode, u32 *rawfh, int *max_len, | |||
56 | struct ceph_nfs_confh *cfh = (void *)rawfh; | 29 | struct ceph_nfs_confh *cfh = (void *)rawfh; |
57 | int connected_handle_length = sizeof(*cfh)/4; | 30 | int connected_handle_length = sizeof(*cfh)/4; |
58 | int handle_length = sizeof(*fh)/4; | 31 | int handle_length = sizeof(*fh)/4; |
59 | struct dentry *dentry; | ||
60 | struct dentry *parent; | ||
61 | 32 | ||
62 | /* don't re-export snaps */ | 33 | /* don't re-export snaps */ |
63 | if (ceph_snap(inode) != CEPH_NOSNAP) | 34 | if (ceph_snap(inode) != CEPH_NOSNAP) |
64 | return -EINVAL; | 35 | return -EINVAL; |
65 | 36 | ||
66 | dentry = d_find_alias(inode); | 37 | if (parent_inode && (*max_len < connected_handle_length)) { |
38 | *max_len = connected_handle_length; | ||
39 | return FILEID_INVALID; | ||
40 | } else if (*max_len < handle_length) { | ||
41 | *max_len = handle_length; | ||
42 | return FILEID_INVALID; | ||
43 | } | ||
67 | 44 | ||
68 | /* if we found an alias, generate a connectable fh */ | 45 | if (parent_inode) { |
69 | if (*max_len >= connected_handle_length && dentry) { | 46 | dout("encode_fh %llx with parent %llx\n", |
70 | dout("encode_fh %p connectable\n", dentry); | 47 | ceph_ino(inode), ceph_ino(parent_inode)); |
71 | spin_lock(&dentry->d_lock); | ||
72 | parent = dentry->d_parent; | ||
73 | cfh->ino = ceph_ino(inode); | 48 | cfh->ino = ceph_ino(inode); |
74 | cfh->parent_ino = ceph_ino(parent->d_inode); | 49 | cfh->parent_ino = ceph_ino(parent_inode); |
75 | cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, | ||
76 | dentry); | ||
77 | *max_len = connected_handle_length; | 50 | *max_len = connected_handle_length; |
78 | type = 2; | 51 | type = FILEID_INO32_GEN_PARENT; |
79 | spin_unlock(&dentry->d_lock); | ||
80 | } else if (*max_len >= handle_length) { | ||
81 | if (parent_inode) { | ||
82 | /* nfsd wants connectable */ | ||
83 | *max_len = connected_handle_length; | ||
84 | type = FILEID_INVALID; | ||
85 | } else { | ||
86 | dout("encode_fh %p\n", dentry); | ||
87 | fh->ino = ceph_ino(inode); | ||
88 | *max_len = handle_length; | ||
89 | type = 1; | ||
90 | } | ||
91 | } else { | 52 | } else { |
53 | dout("encode_fh %llx\n", ceph_ino(inode)); | ||
54 | fh->ino = ceph_ino(inode); | ||
92 | *max_len = handle_length; | 55 | *max_len = handle_length; |
93 | type = FILEID_INVALID; | 56 | type = FILEID_INO32_GEN; |
94 | } | 57 | } |
95 | if (dentry) | ||
96 | dput(dentry); | ||
97 | return type; | 58 | return type; |
98 | } | 59 | } |
99 | 60 | ||
100 | /* | 61 | static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino) |
101 | * convert regular fh to dentry | ||
102 | * | ||
103 | * FIXME: we should try harder by querying the mds for the ino. | ||
104 | */ | ||
105 | static struct dentry *__fh_to_dentry(struct super_block *sb, | ||
106 | struct ceph_nfs_fh *fh, int fh_len) | ||
107 | { | 62 | { |
108 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 63 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
109 | struct inode *inode; | 64 | struct inode *inode; |
@@ -111,11 +66,7 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
111 | struct ceph_vino vino; | 66 | struct ceph_vino vino; |
112 | int err; | 67 | int err; |
113 | 68 | ||
114 | if (fh_len < sizeof(*fh) / 4) | 69 | vino.ino = ino; |
115 | return ERR_PTR(-ESTALE); | ||
116 | |||
117 | dout("__fh_to_dentry %llx\n", fh->ino); | ||
118 | vino.ino = fh->ino; | ||
119 | vino.snap = CEPH_NOSNAP; | 70 | vino.snap = CEPH_NOSNAP; |
120 | inode = ceph_find_inode(sb, vino); | 71 | inode = ceph_find_inode(sb, vino); |
121 | if (!inode) { | 72 | if (!inode) { |
@@ -139,139 +90,161 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, | |||
139 | 90 | ||
140 | dentry = d_obtain_alias(inode); | 91 | dentry = d_obtain_alias(inode); |
141 | if (IS_ERR(dentry)) { | 92 | if (IS_ERR(dentry)) { |
142 | pr_err("fh_to_dentry %llx -- inode %p but ENOMEM\n", | ||
143 | fh->ino, inode); | ||
144 | iput(inode); | 93 | iput(inode); |
145 | return dentry; | 94 | return dentry; |
146 | } | 95 | } |
147 | err = ceph_init_dentry(dentry); | 96 | err = ceph_init_dentry(dentry); |
148 | if (err < 0) { | 97 | if (err < 0) { |
149 | iput(inode); | 98 | dput(dentry); |
150 | return ERR_PTR(err); | 99 | return ERR_PTR(err); |
151 | } | 100 | } |
152 | dout("__fh_to_dentry %llx %p dentry %p\n", fh->ino, inode, dentry); | 101 | dout("__fh_to_dentry %llx %p dentry %p\n", ino, inode, dentry); |
153 | return dentry; | 102 | return dentry; |
154 | } | 103 | } |
155 | 104 | ||
156 | /* | 105 | /* |
157 | * convert connectable fh to dentry | 106 | * convert regular fh to dentry |
158 | */ | 107 | */ |
159 | static struct dentry *__cfh_to_dentry(struct super_block *sb, | 108 | static struct dentry *ceph_fh_to_dentry(struct super_block *sb, |
160 | struct ceph_nfs_confh *cfh, int fh_len) | 109 | struct fid *fid, |
110 | int fh_len, int fh_type) | ||
111 | { | ||
112 | struct ceph_nfs_fh *fh = (void *)fid->raw; | ||
113 | |||
114 | if (fh_type != FILEID_INO32_GEN && | ||
115 | fh_type != FILEID_INO32_GEN_PARENT) | ||
116 | return NULL; | ||
117 | if (fh_len < sizeof(*fh) / 4) | ||
118 | return NULL; | ||
119 | |||
120 | dout("fh_to_dentry %llx\n", fh->ino); | ||
121 | return __fh_to_dentry(sb, fh->ino); | ||
122 | } | ||
123 | |||
124 | static struct dentry *__get_parent(struct super_block *sb, | ||
125 | struct dentry *child, u64 ino) | ||
161 | { | 126 | { |
162 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; | 127 | struct ceph_mds_client *mdsc = ceph_sb_to_client(sb)->mdsc; |
128 | struct ceph_mds_request *req; | ||
163 | struct inode *inode; | 129 | struct inode *inode; |
164 | struct dentry *dentry; | 130 | struct dentry *dentry; |
165 | struct ceph_vino vino; | ||
166 | int err; | 131 | int err; |
167 | 132 | ||
168 | if (fh_len < sizeof(*cfh) / 4) | 133 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT, |
169 | return ERR_PTR(-ESTALE); | 134 | USE_ANY_MDS); |
170 | 135 | if (IS_ERR(req)) | |
171 | dout("__cfh_to_dentry %llx (%llx/%x)\n", | 136 | return ERR_CAST(req); |
172 | cfh->ino, cfh->parent_ino, cfh->parent_name_hash); | ||
173 | |||
174 | vino.ino = cfh->ino; | ||
175 | vino.snap = CEPH_NOSNAP; | ||
176 | inode = ceph_find_inode(sb, vino); | ||
177 | if (!inode) { | ||
178 | struct ceph_mds_request *req; | ||
179 | |||
180 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPHASH, | ||
181 | USE_ANY_MDS); | ||
182 | if (IS_ERR(req)) | ||
183 | return ERR_CAST(req); | ||
184 | 137 | ||
185 | req->r_ino1 = vino; | 138 | if (child) { |
186 | req->r_ino2.ino = cfh->parent_ino; | 139 | req->r_inode = child->d_inode; |
187 | req->r_ino2.snap = CEPH_NOSNAP; | 140 | ihold(child->d_inode); |
188 | req->r_path2 = kmalloc(16, GFP_NOFS); | 141 | } else { |
189 | snprintf(req->r_path2, 16, "%d", cfh->parent_name_hash); | 142 | req->r_ino1 = (struct ceph_vino) { |
190 | req->r_num_caps = 1; | 143 | .ino = ino, |
191 | err = ceph_mdsc_do_request(mdsc, NULL, req); | 144 | .snap = CEPH_NOSNAP, |
192 | inode = req->r_target_inode; | 145 | }; |
193 | if (inode) | ||
194 | ihold(inode); | ||
195 | ceph_mdsc_put_request(req); | ||
196 | if (!inode) | ||
197 | return ERR_PTR(err ? err : -ESTALE); | ||
198 | } | 146 | } |
147 | req->r_num_caps = 1; | ||
148 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
149 | inode = req->r_target_inode; | ||
150 | if (inode) | ||
151 | ihold(inode); | ||
152 | ceph_mdsc_put_request(req); | ||
153 | if (!inode) | ||
154 | return ERR_PTR(-ENOENT); | ||
199 | 155 | ||
200 | dentry = d_obtain_alias(inode); | 156 | dentry = d_obtain_alias(inode); |
201 | if (IS_ERR(dentry)) { | 157 | if (IS_ERR(dentry)) { |
202 | pr_err("cfh_to_dentry %llx -- inode %p but ENOMEM\n", | ||
203 | cfh->ino, inode); | ||
204 | iput(inode); | 158 | iput(inode); |
205 | return dentry; | 159 | return dentry; |
206 | } | 160 | } |
207 | err = ceph_init_dentry(dentry); | 161 | err = ceph_init_dentry(dentry); |
208 | if (err < 0) { | 162 | if (err < 0) { |
209 | iput(inode); | 163 | dput(dentry); |
210 | return ERR_PTR(err); | 164 | return ERR_PTR(err); |
211 | } | 165 | } |
212 | dout("__cfh_to_dentry %llx %p dentry %p\n", cfh->ino, inode, dentry); | 166 | dout("__get_parent ino %llx parent %p ino %llx.%llx\n", |
167 | child ? ceph_ino(child->d_inode) : ino, | ||
168 | dentry, ceph_vinop(inode)); | ||
213 | return dentry; | 169 | return dentry; |
214 | } | 170 | } |
215 | 171 | ||
216 | static struct dentry *ceph_fh_to_dentry(struct super_block *sb, struct fid *fid, | 172 | struct dentry *ceph_get_parent(struct dentry *child) |
217 | int fh_len, int fh_type) | ||
218 | { | 173 | { |
219 | if (fh_type == 1) | 174 | /* don't re-export snaps */ |
220 | return __fh_to_dentry(sb, (struct ceph_nfs_fh *)fid->raw, | 175 | if (ceph_snap(child->d_inode) != CEPH_NOSNAP) |
221 | fh_len); | 176 | return ERR_PTR(-EINVAL); |
222 | else | 177 | |
223 | return __cfh_to_dentry(sb, (struct ceph_nfs_confh *)fid->raw, | 178 | dout("get_parent %p ino %llx.%llx\n", |
224 | fh_len); | 179 | child, ceph_vinop(child->d_inode)); |
180 | return __get_parent(child->d_sb, child, 0); | ||
225 | } | 181 | } |
226 | 182 | ||
227 | /* | 183 | /* |
228 | * get parent, if possible. | 184 | * convert regular fh to parent |
229 | * | ||
230 | * FIXME: we could do better by querying the mds to discover the | ||
231 | * parent. | ||
232 | */ | 185 | */ |
233 | static struct dentry *ceph_fh_to_parent(struct super_block *sb, | 186 | static struct dentry *ceph_fh_to_parent(struct super_block *sb, |
234 | struct fid *fid, | 187 | struct fid *fid, |
235 | int fh_len, int fh_type) | 188 | int fh_len, int fh_type) |
236 | { | 189 | { |
237 | struct ceph_nfs_confh *cfh = (void *)fid->raw; | 190 | struct ceph_nfs_confh *cfh = (void *)fid->raw; |
238 | struct ceph_vino vino; | ||
239 | struct inode *inode; | ||
240 | struct dentry *dentry; | 191 | struct dentry *dentry; |
241 | int err; | ||
242 | 192 | ||
243 | if (fh_type == 1) | 193 | if (fh_type != FILEID_INO32_GEN_PARENT) |
244 | return ERR_PTR(-ESTALE); | 194 | return NULL; |
245 | if (fh_len < sizeof(*cfh) / 4) | 195 | if (fh_len < sizeof(*cfh) / 4) |
246 | return ERR_PTR(-ESTALE); | 196 | return NULL; |
247 | 197 | ||
248 | pr_debug("fh_to_parent %llx/%d\n", cfh->parent_ino, | 198 | dout("fh_to_parent %llx\n", cfh->parent_ino); |
249 | cfh->parent_name_hash); | 199 | dentry = __get_parent(sb, NULL, cfh->ino); |
200 | if (IS_ERR(dentry) && PTR_ERR(dentry) == -ENOENT) | ||
201 | dentry = __fh_to_dentry(sb, cfh->parent_ino); | ||
202 | return dentry; | ||
203 | } | ||
250 | 204 | ||
251 | vino.ino = cfh->ino; | 205 | static int ceph_get_name(struct dentry *parent, char *name, |
252 | vino.snap = CEPH_NOSNAP; | 206 | struct dentry *child) |
253 | inode = ceph_find_inode(sb, vino); | 207 | { |
254 | if (!inode) | 208 | struct ceph_mds_client *mdsc; |
255 | return ERR_PTR(-ESTALE); | 209 | struct ceph_mds_request *req; |
210 | int err; | ||
256 | 211 | ||
257 | dentry = d_obtain_alias(inode); | 212 | mdsc = ceph_inode_to_client(child->d_inode)->mdsc; |
258 | if (IS_ERR(dentry)) { | 213 | req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPNAME, |
259 | pr_err("fh_to_parent %llx -- inode %p but ENOMEM\n", | 214 | USE_ANY_MDS); |
260 | cfh->ino, inode); | 215 | if (IS_ERR(req)) |
261 | iput(inode); | 216 | return PTR_ERR(req); |
262 | return dentry; | 217 | |
263 | } | 218 | mutex_lock(&parent->d_inode->i_mutex); |
264 | err = ceph_init_dentry(dentry); | 219 | |
265 | if (err < 0) { | 220 | req->r_inode = child->d_inode; |
266 | iput(inode); | 221 | ihold(child->d_inode); |
267 | return ERR_PTR(err); | 222 | req->r_ino2 = ceph_vino(parent->d_inode); |
223 | req->r_locked_dir = parent->d_inode; | ||
224 | req->r_num_caps = 2; | ||
225 | err = ceph_mdsc_do_request(mdsc, NULL, req); | ||
226 | |||
227 | mutex_unlock(&parent->d_inode->i_mutex); | ||
228 | |||
229 | if (!err) { | ||
230 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | ||
231 | memcpy(name, rinfo->dname, rinfo->dname_len); | ||
232 | name[rinfo->dname_len] = 0; | ||
233 | dout("get_name %p ino %llx.%llx name %s\n", | ||
234 | child, ceph_vinop(child->d_inode), name); | ||
235 | } else { | ||
236 | dout("get_name %p ino %llx.%llx err %d\n", | ||
237 | child, ceph_vinop(child->d_inode), err); | ||
268 | } | 238 | } |
269 | dout("fh_to_parent %llx %p dentry %p\n", cfh->ino, inode, dentry); | 239 | |
270 | return dentry; | 240 | ceph_mdsc_put_request(req); |
241 | return err; | ||
271 | } | 242 | } |
272 | 243 | ||
273 | const struct export_operations ceph_export_ops = { | 244 | const struct export_operations ceph_export_ops = { |
274 | .encode_fh = ceph_encode_fh, | 245 | .encode_fh = ceph_encode_fh, |
275 | .fh_to_dentry = ceph_fh_to_dentry, | 246 | .fh_to_dentry = ceph_fh_to_dentry, |
276 | .fh_to_parent = ceph_fh_to_parent, | 247 | .fh_to_parent = ceph_fh_to_parent, |
248 | .get_parent = ceph_get_parent, | ||
249 | .get_name = ceph_get_name, | ||
277 | }; | 250 | }; |
diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 09c7afe32e49..66075a4ad979 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c | |||
@@ -210,7 +210,7 @@ int ceph_open(struct inode *inode, struct file *file) | |||
210 | ihold(inode); | 210 | ihold(inode); |
211 | 211 | ||
212 | req->r_num_caps = 1; | 212 | req->r_num_caps = 1; |
213 | if (flags & (O_CREAT|O_TRUNC)) | 213 | if (flags & O_CREAT) |
214 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | 214 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); |
215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | 215 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); |
216 | iput(parent_inode); | 216 | iput(parent_inode); |
@@ -291,8 +291,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, | |||
291 | } | 291 | } |
292 | err = finish_open(file, dentry, ceph_open, opened); | 292 | err = finish_open(file, dentry, ceph_open, opened); |
293 | } | 293 | } |
294 | |||
295 | out_err: | 294 | out_err: |
295 | if (!req->r_err && req->r_target_inode) | ||
296 | ceph_put_fmode(ceph_inode(req->r_target_inode), req->r_fmode); | ||
296 | ceph_mdsc_put_request(req); | 297 | ceph_mdsc_put_request(req); |
297 | dout("atomic_open result=%d\n", err); | 298 | dout("atomic_open result=%d\n", err); |
298 | return err; | 299 | return err; |
@@ -970,6 +971,7 @@ retry_snap: | |||
970 | goto retry_snap; | 971 | goto retry_snap; |
971 | } | 972 | } |
972 | } else { | 973 | } else { |
974 | loff_t old_size = inode->i_size; | ||
973 | /* | 975 | /* |
974 | * No need to acquire the i_truncate_mutex. Because | 976 | * No need to acquire the i_truncate_mutex. Because |
975 | * the MDS revokes Fwb caps before sending truncate | 977 | * the MDS revokes Fwb caps before sending truncate |
@@ -980,6 +982,8 @@ retry_snap: | |||
980 | written = generic_file_buffered_write(iocb, iov, nr_segs, | 982 | written = generic_file_buffered_write(iocb, iov, nr_segs, |
981 | pos, &iocb->ki_pos, | 983 | pos, &iocb->ki_pos, |
982 | count, 0); | 984 | count, 0); |
985 | if (inode->i_size > old_size) | ||
986 | ceph_fscache_update_objectsize(inode); | ||
983 | mutex_unlock(&inode->i_mutex); | 987 | mutex_unlock(&inode->i_mutex); |
984 | } | 988 | } |
985 | 989 | ||
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 32d519d8a2e2..0b0728e5be2d 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c | |||
@@ -659,14 +659,6 @@ static int fill_inode(struct inode *inode, | |||
659 | le32_to_cpu(info->time_warp_seq), | 659 | le32_to_cpu(info->time_warp_seq), |
660 | &ctime, &mtime, &atime); | 660 | &ctime, &mtime, &atime); |
661 | 661 | ||
662 | /* only update max_size on auth cap */ | ||
663 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
664 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
665 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
666 | le64_to_cpu(info->max_size)); | ||
667 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
668 | } | ||
669 | |||
670 | ci->i_layout = info->layout; | 662 | ci->i_layout = info->layout; |
671 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; | 663 | inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; |
672 | 664 | ||
@@ -755,6 +747,14 @@ static int fill_inode(struct inode *inode, | |||
755 | ci->i_max_offset = 2; | 747 | ci->i_max_offset = 2; |
756 | } | 748 | } |
757 | no_change: | 749 | no_change: |
750 | /* only update max_size on auth cap */ | ||
751 | if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && | ||
752 | ci->i_max_size != le64_to_cpu(info->max_size)) { | ||
753 | dout("max_size %lld -> %llu\n", ci->i_max_size, | ||
754 | le64_to_cpu(info->max_size)); | ||
755 | ci->i_max_size = le64_to_cpu(info->max_size); | ||
756 | } | ||
757 | |||
758 | spin_unlock(&ci->i_ceph_lock); | 758 | spin_unlock(&ci->i_ceph_lock); |
759 | 759 | ||
760 | /* queue truncate if we saw i_size decrease */ | 760 | /* queue truncate if we saw i_size decrease */ |
@@ -1044,10 +1044,59 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1044 | session, req->r_request_started, -1, | 1044 | session, req->r_request_started, -1, |
1045 | &req->r_caps_reservation); | 1045 | &req->r_caps_reservation); |
1046 | if (err < 0) | 1046 | if (err < 0) |
1047 | return err; | 1047 | goto done; |
1048 | } else { | 1048 | } else { |
1049 | WARN_ON_ONCE(1); | 1049 | WARN_ON_ONCE(1); |
1050 | } | 1050 | } |
1051 | |||
1052 | if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME) { | ||
1053 | struct qstr dname; | ||
1054 | struct dentry *dn, *parent; | ||
1055 | |||
1056 | BUG_ON(!rinfo->head->is_target); | ||
1057 | BUG_ON(req->r_dentry); | ||
1058 | |||
1059 | parent = d_find_any_alias(dir); | ||
1060 | BUG_ON(!parent); | ||
1061 | |||
1062 | dname.name = rinfo->dname; | ||
1063 | dname.len = rinfo->dname_len; | ||
1064 | dname.hash = full_name_hash(dname.name, dname.len); | ||
1065 | vino.ino = le64_to_cpu(rinfo->targeti.in->ino); | ||
1066 | vino.snap = le64_to_cpu(rinfo->targeti.in->snapid); | ||
1067 | retry_lookup: | ||
1068 | dn = d_lookup(parent, &dname); | ||
1069 | dout("d_lookup on parent=%p name=%.*s got %p\n", | ||
1070 | parent, dname.len, dname.name, dn); | ||
1071 | |||
1072 | if (!dn) { | ||
1073 | dn = d_alloc(parent, &dname); | ||
1074 | dout("d_alloc %p '%.*s' = %p\n", parent, | ||
1075 | dname.len, dname.name, dn); | ||
1076 | if (dn == NULL) { | ||
1077 | dput(parent); | ||
1078 | err = -ENOMEM; | ||
1079 | goto done; | ||
1080 | } | ||
1081 | err = ceph_init_dentry(dn); | ||
1082 | if (err < 0) { | ||
1083 | dput(dn); | ||
1084 | dput(parent); | ||
1085 | goto done; | ||
1086 | } | ||
1087 | } else if (dn->d_inode && | ||
1088 | (ceph_ino(dn->d_inode) != vino.ino || | ||
1089 | ceph_snap(dn->d_inode) != vino.snap)) { | ||
1090 | dout(" dn %p points to wrong inode %p\n", | ||
1091 | dn, dn->d_inode); | ||
1092 | d_delete(dn); | ||
1093 | dput(dn); | ||
1094 | goto retry_lookup; | ||
1095 | } | ||
1096 | |||
1097 | req->r_dentry = dn; | ||
1098 | dput(parent); | ||
1099 | } | ||
1051 | } | 1100 | } |
1052 | 1101 | ||
1053 | if (rinfo->head->is_target) { | 1102 | if (rinfo->head->is_target) { |
@@ -1063,7 +1112,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, | |||
1063 | 1112 | ||
1064 | err = fill_inode(in, &rinfo->targeti, NULL, | 1113 | err = fill_inode(in, &rinfo->targeti, NULL, |
1065 | session, req->r_request_started, | 1114 | session, req->r_request_started, |
1066 | (le32_to_cpu(rinfo->head->result) == 0) ? | 1115 | (!req->r_aborted && rinfo->head->result == 0) ? |
1067 | req->r_fmode : -1, | 1116 | req->r_fmode : -1, |
1068 | &req->r_caps_reservation); | 1117 | &req->r_caps_reservation); |
1069 | if (err < 0) { | 1118 | if (err < 0) { |
@@ -1616,8 +1665,6 @@ static const struct inode_operations ceph_symlink_iops = { | |||
1616 | .getxattr = ceph_getxattr, | 1665 | .getxattr = ceph_getxattr, |
1617 | .listxattr = ceph_listxattr, | 1666 | .listxattr = ceph_listxattr, |
1618 | .removexattr = ceph_removexattr, | 1667 | .removexattr = ceph_removexattr, |
1619 | .get_acl = ceph_get_acl, | ||
1620 | .set_acl = ceph_set_acl, | ||
1621 | }; | 1668 | }; |
1622 | 1669 | ||
1623 | /* | 1670 | /* |
@@ -1627,7 +1674,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1627 | { | 1674 | { |
1628 | struct inode *inode = dentry->d_inode; | 1675 | struct inode *inode = dentry->d_inode; |
1629 | struct ceph_inode_info *ci = ceph_inode(inode); | 1676 | struct ceph_inode_info *ci = ceph_inode(inode); |
1630 | struct inode *parent_inode; | ||
1631 | const unsigned int ia_valid = attr->ia_valid; | 1677 | const unsigned int ia_valid = attr->ia_valid; |
1632 | struct ceph_mds_request *req; | 1678 | struct ceph_mds_request *req; |
1633 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; | 1679 | struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; |
@@ -1819,9 +1865,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) | |||
1819 | req->r_inode_drop = release; | 1865 | req->r_inode_drop = release; |
1820 | req->r_args.setattr.mask = cpu_to_le32(mask); | 1866 | req->r_args.setattr.mask = cpu_to_le32(mask); |
1821 | req->r_num_caps = 1; | 1867 | req->r_num_caps = 1; |
1822 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 1868 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1823 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
1824 | iput(parent_inode); | ||
1825 | } | 1869 | } |
1826 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, | 1870 | dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, |
1827 | ceph_cap_string(dirtied), mask); | 1871 | ceph_cap_string(dirtied), mask); |
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index dc66c9e023e4..efbe08289292 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c | |||
@@ -64,7 +64,6 @@ static long __validate_layout(struct ceph_mds_client *mdsc, | |||
64 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | 64 | static long ceph_ioctl_set_layout(struct file *file, void __user *arg) |
65 | { | 65 | { |
66 | struct inode *inode = file_inode(file); | 66 | struct inode *inode = file_inode(file); |
67 | struct inode *parent_inode; | ||
68 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; | 67 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
69 | struct ceph_mds_request *req; | 68 | struct ceph_mds_request *req; |
70 | struct ceph_ioctl_layout l; | 69 | struct ceph_ioctl_layout l; |
@@ -121,9 +120,7 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) | |||
121 | cpu_to_le32(l.object_size); | 120 | cpu_to_le32(l.object_size); |
122 | req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); | 121 | req->r_args.setlayout.layout.fl_pg_pool = cpu_to_le32(l.data_pool); |
123 | 122 | ||
124 | parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); | 123 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
125 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
126 | iput(parent_inode); | ||
127 | ceph_mdsc_put_request(req); | 124 | ceph_mdsc_put_request(req); |
128 | return err; | 125 | return err; |
129 | } | 126 | } |
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c index ae6d14e82b0f..d94ba0df9f4d 100644 --- a/fs/ceph/locks.c +++ b/fs/ceph/locks.c | |||
@@ -2,11 +2,31 @@ | |||
2 | 2 | ||
3 | #include <linux/file.h> | 3 | #include <linux/file.h> |
4 | #include <linux/namei.h> | 4 | #include <linux/namei.h> |
5 | #include <linux/random.h> | ||
5 | 6 | ||
6 | #include "super.h" | 7 | #include "super.h" |
7 | #include "mds_client.h" | 8 | #include "mds_client.h" |
8 | #include <linux/ceph/pagelist.h> | 9 | #include <linux/ceph/pagelist.h> |
9 | 10 | ||
11 | static u64 lock_secret; | ||
12 | |||
13 | static inline u64 secure_addr(void *addr) | ||
14 | { | ||
15 | u64 v = lock_secret ^ (u64)(unsigned long)addr; | ||
16 | /* | ||
17 | * Set the most significant bit, so that MDS knows the 'owner' | ||
18 | * is sufficient to identify the owner of lock. (old code uses | ||
19 | * both 'owner' and 'pid') | ||
20 | */ | ||
21 | v |= (1ULL << 63); | ||
22 | return v; | ||
23 | } | ||
24 | |||
25 | void __init ceph_flock_init(void) | ||
26 | { | ||
27 | get_random_bytes(&lock_secret, sizeof(lock_secret)); | ||
28 | } | ||
29 | |||
10 | /** | 30 | /** |
11 | * Implement fcntl and flock locking functions. | 31 | * Implement fcntl and flock locking functions. |
12 | */ | 32 | */ |
@@ -14,11 +34,11 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
14 | int cmd, u8 wait, struct file_lock *fl) | 34 | int cmd, u8 wait, struct file_lock *fl) |
15 | { | 35 | { |
16 | struct inode *inode = file_inode(file); | 36 | struct inode *inode = file_inode(file); |
17 | struct ceph_mds_client *mdsc = | 37 | struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; |
18 | ceph_sb_to_client(inode->i_sb)->mdsc; | ||
19 | struct ceph_mds_request *req; | 38 | struct ceph_mds_request *req; |
20 | int err; | 39 | int err; |
21 | u64 length = 0; | 40 | u64 length = 0; |
41 | u64 owner; | ||
22 | 42 | ||
23 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); | 43 | req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS); |
24 | if (IS_ERR(req)) | 44 | if (IS_ERR(req)) |
@@ -32,25 +52,27 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file, | |||
32 | else | 52 | else |
33 | length = fl->fl_end - fl->fl_start + 1; | 53 | length = fl->fl_end - fl->fl_start + 1; |
34 | 54 | ||
35 | dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, " | 55 | if (lock_type == CEPH_LOCK_FCNTL) |
36 | "length: %llu, wait: %d, type: %d", (int)lock_type, | 56 | owner = secure_addr(fl->fl_owner); |
37 | (int)operation, (u64)fl->fl_pid, fl->fl_start, | 57 | else |
38 | length, wait, fl->fl_type); | 58 | owner = secure_addr(fl->fl_file); |
59 | |||
60 | dout("ceph_lock_message: rule: %d, op: %d, owner: %llx, pid: %llu, " | ||
61 | "start: %llu, length: %llu, wait: %d, type: %d", (int)lock_type, | ||
62 | (int)operation, owner, (u64)fl->fl_pid, fl->fl_start, length, | ||
63 | wait, fl->fl_type); | ||
39 | 64 | ||
40 | req->r_args.filelock_change.rule = lock_type; | 65 | req->r_args.filelock_change.rule = lock_type; |
41 | req->r_args.filelock_change.type = cmd; | 66 | req->r_args.filelock_change.type = cmd; |
67 | req->r_args.filelock_change.owner = cpu_to_le64(owner); | ||
42 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); | 68 | req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid); |
43 | /* This should be adjusted, but I'm not sure if | ||
44 | namespaces actually get id numbers*/ | ||
45 | req->r_args.filelock_change.pid_namespace = | ||
46 | cpu_to_le64((u64)(unsigned long)fl->fl_nspid); | ||
47 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); | 69 | req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start); |
48 | req->r_args.filelock_change.length = cpu_to_le64(length); | 70 | req->r_args.filelock_change.length = cpu_to_le64(length); |
49 | req->r_args.filelock_change.wait = wait; | 71 | req->r_args.filelock_change.wait = wait; |
50 | 72 | ||
51 | err = ceph_mdsc_do_request(mdsc, inode, req); | 73 | err = ceph_mdsc_do_request(mdsc, inode, req); |
52 | 74 | ||
53 | if ( operation == CEPH_MDS_OP_GETFILELOCK){ | 75 | if (operation == CEPH_MDS_OP_GETFILELOCK) { |
54 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); | 76 | fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid); |
55 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) | 77 | if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type) |
56 | fl->fl_type = F_RDLCK; | 78 | fl->fl_type = F_RDLCK; |
@@ -87,14 +109,19 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
87 | u8 wait = 0; | 109 | u8 wait = 0; |
88 | u16 op = CEPH_MDS_OP_SETFILELOCK; | 110 | u16 op = CEPH_MDS_OP_SETFILELOCK; |
89 | 111 | ||
90 | fl->fl_nspid = get_pid(task_tgid(current)); | 112 | if (!(fl->fl_flags & FL_POSIX)) |
91 | dout("ceph_lock, fl_pid:%d", fl->fl_pid); | 113 | return -ENOLCK; |
114 | /* No mandatory locks */ | ||
115 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) | ||
116 | return -ENOLCK; | ||
117 | |||
118 | dout("ceph_lock, fl_owner: %p", fl->fl_owner); | ||
92 | 119 | ||
93 | /* set wait bit as appropriate, then make command as Ceph expects it*/ | 120 | /* set wait bit as appropriate, then make command as Ceph expects it*/ |
94 | if (F_SETLKW == cmd) | 121 | if (IS_GETLK(cmd)) |
95 | wait = 1; | ||
96 | if (F_GETLK == cmd) | ||
97 | op = CEPH_MDS_OP_GETFILELOCK; | 122 | op = CEPH_MDS_OP_GETFILELOCK; |
123 | else if (IS_SETLKW(cmd)) | ||
124 | wait = 1; | ||
98 | 125 | ||
99 | if (F_RDLCK == fl->fl_type) | 126 | if (F_RDLCK == fl->fl_type) |
100 | lock_cmd = CEPH_LOCK_SHARED; | 127 | lock_cmd = CEPH_LOCK_SHARED; |
@@ -105,7 +132,7 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl) | |||
105 | 132 | ||
106 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); | 133 | err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl); |
107 | if (!err) { | 134 | if (!err) { |
108 | if ( op != CEPH_MDS_OP_GETFILELOCK ){ | 135 | if (op != CEPH_MDS_OP_GETFILELOCK) { |
109 | dout("mds locked, locking locally"); | 136 | dout("mds locked, locking locally"); |
110 | err = posix_lock_file(file, fl, NULL); | 137 | err = posix_lock_file(file, fl, NULL); |
111 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { | 138 | if (err && (CEPH_MDS_OP_SETFILELOCK == op)) { |
@@ -131,20 +158,22 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl) | |||
131 | { | 158 | { |
132 | u8 lock_cmd; | 159 | u8 lock_cmd; |
133 | int err; | 160 | int err; |
134 | u8 wait = 1; | 161 | u8 wait = 0; |
135 | 162 | ||
136 | fl->fl_nspid = get_pid(task_tgid(current)); | 163 | if (!(fl->fl_flags & FL_FLOCK)) |
137 | dout("ceph_flock, fl_pid:%d", fl->fl_pid); | 164 | return -ENOLCK; |
138 | 165 | /* No mandatory locks */ | |
139 | /* set wait bit, then clear it out of cmd*/ | 166 | if (__mandatory_lock(file->f_mapping->host) && fl->fl_type != F_UNLCK) |
140 | if (cmd & LOCK_NB) | 167 | return -ENOLCK; |
141 | wait = 0; | 168 | |
142 | cmd = cmd & (LOCK_SH | LOCK_EX | LOCK_UN); | 169 | dout("ceph_flock, fl_file: %p", fl->fl_file); |
143 | /* set command sequence that Ceph wants to see: | 170 | |
144 | shared lock, exclusive lock, or unlock */ | 171 | if (IS_SETLKW(cmd)) |
145 | if (LOCK_SH == cmd) | 172 | wait = 1; |
173 | |||
174 | if (F_RDLCK == fl->fl_type) | ||
146 | lock_cmd = CEPH_LOCK_SHARED; | 175 | lock_cmd = CEPH_LOCK_SHARED; |
147 | else if (LOCK_EX == cmd) | 176 | else if (F_WRLCK == fl->fl_type) |
148 | lock_cmd = CEPH_LOCK_EXCL; | 177 | lock_cmd = CEPH_LOCK_EXCL; |
149 | else | 178 | else |
150 | lock_cmd = CEPH_LOCK_UNLOCK; | 179 | lock_cmd = CEPH_LOCK_UNLOCK; |
@@ -280,13 +309,14 @@ int lock_to_ceph_filelock(struct file_lock *lock, | |||
280 | struct ceph_filelock *cephlock) | 309 | struct ceph_filelock *cephlock) |
281 | { | 310 | { |
282 | int err = 0; | 311 | int err = 0; |
283 | |||
284 | cephlock->start = cpu_to_le64(lock->fl_start); | 312 | cephlock->start = cpu_to_le64(lock->fl_start); |
285 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); | 313 | cephlock->length = cpu_to_le64(lock->fl_end - lock->fl_start + 1); |
286 | cephlock->client = cpu_to_le64(0); | 314 | cephlock->client = cpu_to_le64(0); |
287 | cephlock->pid = cpu_to_le64(lock->fl_pid); | 315 | cephlock->pid = cpu_to_le64((u64)lock->fl_pid); |
288 | cephlock->pid_namespace = | 316 | if (lock->fl_flags & FL_POSIX) |
289 | cpu_to_le64((u64)(unsigned long)lock->fl_nspid); | 317 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_owner)); |
318 | else | ||
319 | cephlock->owner = cpu_to_le64(secure_addr(lock->fl_file)); | ||
290 | 320 | ||
291 | switch (lock->fl_type) { | 321 | switch (lock->fl_type) { |
292 | case F_RDLCK: | 322 | case F_RDLCK: |
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index f4f050a69a48..2b4d093d0563 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/fs.h> | 3 | #include <linux/fs.h> |
4 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
5 | #include <linux/slab.h> | 5 | #include <linux/slab.h> |
6 | #include <linux/gfp.h> | ||
6 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
7 | #include <linux/debugfs.h> | 8 | #include <linux/debugfs.h> |
8 | #include <linux/seq_file.h> | 9 | #include <linux/seq_file.h> |
@@ -165,21 +166,18 @@ static int parse_reply_info_dir(void **p, void *end, | |||
165 | if (num == 0) | 166 | if (num == 0) |
166 | goto done; | 167 | goto done; |
167 | 168 | ||
168 | /* alloc large array */ | 169 | BUG_ON(!info->dir_in); |
169 | info->dir_nr = num; | ||
170 | info->dir_in = kcalloc(num, sizeof(*info->dir_in) + | ||
171 | sizeof(*info->dir_dname) + | ||
172 | sizeof(*info->dir_dname_len) + | ||
173 | sizeof(*info->dir_dlease), | ||
174 | GFP_NOFS); | ||
175 | if (info->dir_in == NULL) { | ||
176 | err = -ENOMEM; | ||
177 | goto out_bad; | ||
178 | } | ||
179 | info->dir_dname = (void *)(info->dir_in + num); | 170 | info->dir_dname = (void *)(info->dir_in + num); |
180 | info->dir_dname_len = (void *)(info->dir_dname + num); | 171 | info->dir_dname_len = (void *)(info->dir_dname + num); |
181 | info->dir_dlease = (void *)(info->dir_dname_len + num); | 172 | info->dir_dlease = (void *)(info->dir_dname_len + num); |
173 | if ((unsigned long)(info->dir_dlease + num) > | ||
174 | (unsigned long)info->dir_in + info->dir_buf_size) { | ||
175 | pr_err("dir contents are larger than expected\n"); | ||
176 | WARN_ON(1); | ||
177 | goto bad; | ||
178 | } | ||
182 | 179 | ||
180 | info->dir_nr = num; | ||
183 | while (num) { | 181 | while (num) { |
184 | /* dentry */ | 182 | /* dentry */ |
185 | ceph_decode_need(p, end, sizeof(u32)*2, bad); | 183 | ceph_decode_need(p, end, sizeof(u32)*2, bad); |
@@ -327,7 +325,9 @@ out_bad: | |||
327 | 325 | ||
328 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) | 326 | static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info) |
329 | { | 327 | { |
330 | kfree(info->dir_in); | 328 | if (!info->dir_in) |
329 | return; | ||
330 | free_pages((unsigned long)info->dir_in, get_order(info->dir_buf_size)); | ||
331 | } | 331 | } |
332 | 332 | ||
333 | 333 | ||
@@ -512,12 +512,11 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
512 | struct ceph_mds_request *req = container_of(kref, | 512 | struct ceph_mds_request *req = container_of(kref, |
513 | struct ceph_mds_request, | 513 | struct ceph_mds_request, |
514 | r_kref); | 514 | r_kref); |
515 | destroy_reply_info(&req->r_reply_info); | ||
515 | if (req->r_request) | 516 | if (req->r_request) |
516 | ceph_msg_put(req->r_request); | 517 | ceph_msg_put(req->r_request); |
517 | if (req->r_reply) { | 518 | if (req->r_reply) |
518 | ceph_msg_put(req->r_reply); | 519 | ceph_msg_put(req->r_reply); |
519 | destroy_reply_info(&req->r_reply_info); | ||
520 | } | ||
521 | if (req->r_inode) { | 520 | if (req->r_inode) { |
522 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 521 | ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
523 | iput(req->r_inode); | 522 | iput(req->r_inode); |
@@ -528,7 +527,9 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
528 | iput(req->r_target_inode); | 527 | iput(req->r_target_inode); |
529 | if (req->r_dentry) | 528 | if (req->r_dentry) |
530 | dput(req->r_dentry); | 529 | dput(req->r_dentry); |
531 | if (req->r_old_dentry) { | 530 | if (req->r_old_dentry) |
531 | dput(req->r_old_dentry); | ||
532 | if (req->r_old_dentry_dir) { | ||
532 | /* | 533 | /* |
533 | * track (and drop pins for) r_old_dentry_dir | 534 | * track (and drop pins for) r_old_dentry_dir |
534 | * separately, since r_old_dentry's d_parent may have | 535 | * separately, since r_old_dentry's d_parent may have |
@@ -537,7 +538,6 @@ void ceph_mdsc_release_request(struct kref *kref) | |||
537 | */ | 538 | */ |
538 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), | 539 | ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), |
539 | CEPH_CAP_PIN); | 540 | CEPH_CAP_PIN); |
540 | dput(req->r_old_dentry); | ||
541 | iput(req->r_old_dentry_dir); | 541 | iput(req->r_old_dentry_dir); |
542 | } | 542 | } |
543 | kfree(req->r_path1); | 543 | kfree(req->r_path1); |
@@ -1311,6 +1311,9 @@ static int trim_caps(struct ceph_mds_client *mdsc, | |||
1311 | trim_caps - session->s_trim_caps); | 1311 | trim_caps - session->s_trim_caps); |
1312 | session->s_trim_caps = 0; | 1312 | session->s_trim_caps = 0; |
1313 | } | 1313 | } |
1314 | |||
1315 | ceph_add_cap_releases(mdsc, session); | ||
1316 | ceph_send_cap_releases(mdsc, session); | ||
1314 | return 0; | 1317 | return 0; |
1315 | } | 1318 | } |
1316 | 1319 | ||
@@ -1461,15 +1464,18 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1461 | 1464 | ||
1462 | dout("discard_cap_releases mds%d\n", session->s_mds); | 1465 | dout("discard_cap_releases mds%d\n", session->s_mds); |
1463 | 1466 | ||
1464 | /* zero out the in-progress message */ | 1467 | if (!list_empty(&session->s_cap_releases)) { |
1465 | msg = list_first_entry(&session->s_cap_releases, | 1468 | /* zero out the in-progress message */ |
1466 | struct ceph_msg, list_head); | 1469 | msg = list_first_entry(&session->s_cap_releases, |
1467 | head = msg->front.iov_base; | 1470 | struct ceph_msg, list_head); |
1468 | num = le32_to_cpu(head->num); | 1471 | head = msg->front.iov_base; |
1469 | dout("discard_cap_releases mds%d %p %u\n", session->s_mds, msg, num); | 1472 | num = le32_to_cpu(head->num); |
1470 | head->num = cpu_to_le32(0); | 1473 | dout("discard_cap_releases mds%d %p %u\n", |
1471 | msg->front.iov_len = sizeof(*head); | 1474 | session->s_mds, msg, num); |
1472 | session->s_num_cap_releases += num; | 1475 | head->num = cpu_to_le32(0); |
1476 | msg->front.iov_len = sizeof(*head); | ||
1477 | session->s_num_cap_releases += num; | ||
1478 | } | ||
1473 | 1479 | ||
1474 | /* requeue completed messages */ | 1480 | /* requeue completed messages */ |
1475 | while (!list_empty(&session->s_cap_releases_done)) { | 1481 | while (!list_empty(&session->s_cap_releases_done)) { |
@@ -1492,6 +1498,43 @@ static void discard_cap_releases(struct ceph_mds_client *mdsc, | |||
1492 | * requests | 1498 | * requests |
1493 | */ | 1499 | */ |
1494 | 1500 | ||
1501 | int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | ||
1502 | struct inode *dir) | ||
1503 | { | ||
1504 | struct ceph_inode_info *ci = ceph_inode(dir); | ||
1505 | struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info; | ||
1506 | struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options; | ||
1507 | size_t size = sizeof(*rinfo->dir_in) + sizeof(*rinfo->dir_dname_len) + | ||
1508 | sizeof(*rinfo->dir_dname) + sizeof(*rinfo->dir_dlease); | ||
1509 | int order, num_entries; | ||
1510 | |||
1511 | spin_lock(&ci->i_ceph_lock); | ||
1512 | num_entries = ci->i_files + ci->i_subdirs; | ||
1513 | spin_unlock(&ci->i_ceph_lock); | ||
1514 | num_entries = max(num_entries, 1); | ||
1515 | num_entries = min(num_entries, opt->max_readdir); | ||
1516 | |||
1517 | order = get_order(size * num_entries); | ||
1518 | while (order >= 0) { | ||
1519 | rinfo->dir_in = (void*)__get_free_pages(GFP_NOFS | __GFP_NOWARN, | ||
1520 | order); | ||
1521 | if (rinfo->dir_in) | ||
1522 | break; | ||
1523 | order--; | ||
1524 | } | ||
1525 | if (!rinfo->dir_in) | ||
1526 | return -ENOMEM; | ||
1527 | |||
1528 | num_entries = (PAGE_SIZE << order) / size; | ||
1529 | num_entries = min(num_entries, opt->max_readdir); | ||
1530 | |||
1531 | rinfo->dir_buf_size = PAGE_SIZE << order; | ||
1532 | req->r_num_caps = num_entries + 1; | ||
1533 | req->r_args.readdir.max_entries = cpu_to_le32(num_entries); | ||
1534 | req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes); | ||
1535 | return 0; | ||
1536 | } | ||
1537 | |||
1495 | /* | 1538 | /* |
1496 | * Create an mds request. | 1539 | * Create an mds request. |
1497 | */ | 1540 | */ |
@@ -2053,7 +2096,7 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, | |||
2053 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); | 2096 | ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); |
2054 | if (req->r_locked_dir) | 2097 | if (req->r_locked_dir) |
2055 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); | 2098 | ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); |
2056 | if (req->r_old_dentry) | 2099 | if (req->r_old_dentry_dir) |
2057 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), | 2100 | ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), |
2058 | CEPH_CAP_PIN); | 2101 | CEPH_CAP_PIN); |
2059 | 2102 | ||
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 68288917c737..e90cfccf93bd 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h | |||
@@ -67,6 +67,7 @@ struct ceph_mds_reply_info_parsed { | |||
67 | /* for readdir results */ | 67 | /* for readdir results */ |
68 | struct { | 68 | struct { |
69 | struct ceph_mds_reply_dirfrag *dir_dir; | 69 | struct ceph_mds_reply_dirfrag *dir_dir; |
70 | size_t dir_buf_size; | ||
70 | int dir_nr; | 71 | int dir_nr; |
71 | char **dir_dname; | 72 | char **dir_dname; |
72 | u32 *dir_dname_len; | 73 | u32 *dir_dname_len; |
@@ -346,7 +347,8 @@ extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, | |||
346 | struct dentry *dn); | 347 | struct dentry *dn); |
347 | 348 | ||
348 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); | 349 | extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); |
349 | 350 | extern int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req, | |
351 | struct inode *dir); | ||
350 | extern struct ceph_mds_request * | 352 | extern struct ceph_mds_request * |
351 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); | 353 | ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode); |
352 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, | 354 | extern void ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, |
diff --git a/fs/ceph/strings.c b/fs/ceph/strings.c index 4440f447fd3f..51cc23e48111 100644 --- a/fs/ceph/strings.c +++ b/fs/ceph/strings.c | |||
@@ -54,6 +54,7 @@ const char *ceph_mds_op_name(int op) | |||
54 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; | 54 | case CEPH_MDS_OP_LOOKUPHASH: return "lookuphash"; |
55 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; | 55 | case CEPH_MDS_OP_LOOKUPPARENT: return "lookupparent"; |
56 | case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; | 56 | case CEPH_MDS_OP_LOOKUPINO: return "lookupino"; |
57 | case CEPH_MDS_OP_LOOKUPNAME: return "lookupname"; | ||
57 | case CEPH_MDS_OP_GETATTR: return "getattr"; | 58 | case CEPH_MDS_OP_GETATTR: return "getattr"; |
58 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; | 59 | case CEPH_MDS_OP_SETXATTR: return "setxattr"; |
59 | case CEPH_MDS_OP_SETATTR: return "setattr"; | 60 | case CEPH_MDS_OP_SETATTR: return "setattr"; |
diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 10a4ccbf38da..06150fd745ac 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c | |||
@@ -1026,6 +1026,7 @@ static int __init init_ceph(void) | |||
1026 | if (ret) | 1026 | if (ret) |
1027 | goto out; | 1027 | goto out; |
1028 | 1028 | ||
1029 | ceph_flock_init(); | ||
1029 | ceph_xattr_init(); | 1030 | ceph_xattr_init(); |
1030 | ret = register_filesystem(&ceph_fs_type); | 1031 | ret = register_filesystem(&ceph_fs_type); |
1031 | if (ret) | 1032 | if (ret) |
diff --git a/fs/ceph/super.h b/fs/ceph/super.h index d8801a95b685..7866cd05a6bb 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h | |||
@@ -577,7 +577,7 @@ struct ceph_file_info { | |||
577 | 577 | ||
578 | /* readdir: position within a frag */ | 578 | /* readdir: position within a frag */ |
579 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ | 579 | unsigned offset; /* offset of last chunk, adjusted for . and .. */ |
580 | u64 next_offset; /* offset of next chunk (last_name's + 1) */ | 580 | unsigned next_offset; /* offset of next chunk (last_name's + 1) */ |
581 | char *last_name; /* last entry in previous chunk */ | 581 | char *last_name; /* last entry in previous chunk */ |
582 | struct dentry *dentry; /* next dentry (for dcache readdir) */ | 582 | struct dentry *dentry; /* next dentry (for dcache readdir) */ |
583 | int dir_release_count; | 583 | int dir_release_count; |
@@ -871,6 +871,7 @@ extern long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg); | |||
871 | extern const struct export_operations ceph_export_ops; | 871 | extern const struct export_operations ceph_export_ops; |
872 | 872 | ||
873 | /* locks.c */ | 873 | /* locks.c */ |
874 | extern __init void ceph_flock_init(void); | ||
874 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); | 875 | extern int ceph_lock(struct file *file, int cmd, struct file_lock *fl); |
875 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); | 876 | extern int ceph_flock(struct file *file, int cmd, struct file_lock *fl); |
876 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); | 877 | extern void ceph_count_locks(struct inode *inode, int *p_num, int *f_num); |
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index a55ec37378c6..c9c2b887381e 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c | |||
@@ -64,32 +64,48 @@ static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci) | |||
64 | } | 64 | } |
65 | 65 | ||
66 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, | 66 | static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val, |
67 | size_t size) | 67 | size_t size) |
68 | { | 68 | { |
69 | int ret; | 69 | int ret; |
70 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); | 70 | struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb); |
71 | struct ceph_osd_client *osdc = &fsc->client->osdc; | 71 | struct ceph_osd_client *osdc = &fsc->client->osdc; |
72 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); | 72 | s64 pool = ceph_file_layout_pg_pool(ci->i_layout); |
73 | const char *pool_name; | 73 | const char *pool_name; |
74 | char buf[128]; | ||
74 | 75 | ||
75 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); | 76 | dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode); |
76 | down_read(&osdc->map_sem); | 77 | down_read(&osdc->map_sem); |
77 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); | 78 | pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool); |
78 | if (pool_name) | 79 | if (pool_name) { |
79 | ret = snprintf(val, size, | 80 | size_t len = strlen(pool_name); |
80 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s", | 81 | ret = snprintf(buf, sizeof(buf), |
82 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=", | ||
81 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | 83 | (unsigned long long)ceph_file_layout_su(ci->i_layout), |
82 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | 84 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), |
83 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | 85 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout)); |
84 | pool_name); | 86 | if (!size) { |
85 | else | 87 | ret += len; |
86 | ret = snprintf(val, size, | 88 | } else if (ret + len > size) { |
89 | ret = -ERANGE; | ||
90 | } else { | ||
91 | memcpy(val, buf, ret); | ||
92 | memcpy(val + ret, pool_name, len); | ||
93 | ret += len; | ||
94 | } | ||
95 | } else { | ||
96 | ret = snprintf(buf, sizeof(buf), | ||
87 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", | 97 | "stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld", |
88 | (unsigned long long)ceph_file_layout_su(ci->i_layout), | 98 | (unsigned long long)ceph_file_layout_su(ci->i_layout), |
89 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), | 99 | (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout), |
90 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), | 100 | (unsigned long long)ceph_file_layout_object_size(ci->i_layout), |
91 | (unsigned long long)pool); | 101 | (unsigned long long)pool); |
92 | 102 | if (size) { | |
103 | if (ret <= size) | ||
104 | memcpy(val, buf, ret); | ||
105 | else | ||
106 | ret = -ERANGE; | ||
107 | } | ||
108 | } | ||
93 | up_read(&osdc->map_sem); | 109 | up_read(&osdc->map_sem); |
94 | return ret; | 110 | return ret; |
95 | } | 111 | } |
@@ -215,7 +231,7 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = { | |||
215 | .name_size = sizeof("ceph.dir.layout"), | 231 | .name_size = sizeof("ceph.dir.layout"), |
216 | .getxattr_cb = ceph_vxattrcb_layout, | 232 | .getxattr_cb = ceph_vxattrcb_layout, |
217 | .readonly = false, | 233 | .readonly = false, |
218 | .hidden = false, | 234 | .hidden = true, |
219 | .exists_cb = ceph_vxattrcb_layout_exists, | 235 | .exists_cb = ceph_vxattrcb_layout_exists, |
220 | }, | 236 | }, |
221 | XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), | 237 | XATTR_LAYOUT_FIELD(dir, layout, stripe_unit), |
@@ -242,7 +258,7 @@ static struct ceph_vxattr ceph_file_vxattrs[] = { | |||
242 | .name_size = sizeof("ceph.file.layout"), | 258 | .name_size = sizeof("ceph.file.layout"), |
243 | .getxattr_cb = ceph_vxattrcb_layout, | 259 | .getxattr_cb = ceph_vxattrcb_layout, |
244 | .readonly = false, | 260 | .readonly = false, |
245 | .hidden = false, | 261 | .hidden = true, |
246 | .exists_cb = ceph_vxattrcb_layout_exists, | 262 | .exists_cb = ceph_vxattrcb_layout_exists, |
247 | }, | 263 | }, |
248 | XATTR_LAYOUT_FIELD(file, layout, stripe_unit), | 264 | XATTR_LAYOUT_FIELD(file, layout, stripe_unit), |
@@ -842,7 +858,6 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
842 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 858 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
843 | struct inode *inode = dentry->d_inode; | 859 | struct inode *inode = dentry->d_inode; |
844 | struct ceph_inode_info *ci = ceph_inode(inode); | 860 | struct ceph_inode_info *ci = ceph_inode(inode); |
845 | struct inode *parent_inode; | ||
846 | struct ceph_mds_request *req; | 861 | struct ceph_mds_request *req; |
847 | struct ceph_mds_client *mdsc = fsc->mdsc; | 862 | struct ceph_mds_client *mdsc = fsc->mdsc; |
848 | int err; | 863 | int err; |
@@ -893,9 +908,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, | |||
893 | req->r_data_len = size; | 908 | req->r_data_len = size; |
894 | 909 | ||
895 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); | 910 | dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); |
896 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 911 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
897 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
898 | iput(parent_inode); | ||
899 | ceph_mdsc_put_request(req); | 912 | ceph_mdsc_put_request(req); |
900 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); | 913 | dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); |
901 | 914 | ||
@@ -1019,7 +1032,6 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1019 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); | 1032 | struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); |
1020 | struct ceph_mds_client *mdsc = fsc->mdsc; | 1033 | struct ceph_mds_client *mdsc = fsc->mdsc; |
1021 | struct inode *inode = dentry->d_inode; | 1034 | struct inode *inode = dentry->d_inode; |
1022 | struct inode *parent_inode; | ||
1023 | struct ceph_mds_request *req; | 1035 | struct ceph_mds_request *req; |
1024 | int err; | 1036 | int err; |
1025 | 1037 | ||
@@ -1033,9 +1045,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) | |||
1033 | req->r_num_caps = 1; | 1045 | req->r_num_caps = 1; |
1034 | req->r_path2 = kstrdup(name, GFP_NOFS); | 1046 | req->r_path2 = kstrdup(name, GFP_NOFS); |
1035 | 1047 | ||
1036 | parent_inode = ceph_get_dentry_parent_inode(dentry); | 1048 | err = ceph_mdsc_do_request(mdsc, NULL, req); |
1037 | err = ceph_mdsc_do_request(mdsc, parent_inode, req); | ||
1038 | iput(parent_inode); | ||
1039 | ceph_mdsc_put_request(req); | 1049 | ceph_mdsc_put_request(req); |
1040 | return err; | 1050 | return err; |
1041 | } | 1051 | } |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 849f6132b327..2c70cbe35d39 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
@@ -286,7 +286,7 @@ cifs_destroy_inode(struct inode *inode) | |||
286 | static void | 286 | static void |
287 | cifs_evict_inode(struct inode *inode) | 287 | cifs_evict_inode(struct inode *inode) |
288 | { | 288 | { |
289 | truncate_inode_pages(&inode->i_data, 0); | 289 | truncate_inode_pages_final(&inode->i_data); |
290 | clear_inode(inode); | 290 | clear_inode(inode); |
291 | cifs_fscache_release_inode_cookie(inode); | 291 | cifs_fscache_release_inode_cookie(inode); |
292 | } | 292 | } |
@@ -541,6 +541,7 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root) | |||
541 | 541 | ||
542 | static int cifs_remount(struct super_block *sb, int *flags, char *data) | 542 | static int cifs_remount(struct super_block *sb, int *flags, char *data) |
543 | { | 543 | { |
544 | sync_filesystem(sb); | ||
544 | *flags |= MS_NODIRATIME; | 545 | *flags |= MS_NODIRATIME; |
545 | return 0; | 546 | return 0; |
546 | } | 547 | } |
@@ -1005,7 +1006,7 @@ cifs_init_once(void *inode) | |||
1005 | init_rwsem(&cifsi->lock_sem); | 1006 | init_rwsem(&cifsi->lock_sem); |
1006 | } | 1007 | } |
1007 | 1008 | ||
1008 | static int | 1009 | static int __init |
1009 | cifs_init_inodecache(void) | 1010 | cifs_init_inodecache(void) |
1010 | { | 1011 | { |
1011 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", | 1012 | cifs_inode_cachep = kmem_cache_create("cifs_inode_cache", |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 834fce759d80..216d7e99f921 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
@@ -3113,6 +3113,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
3113 | 3113 | ||
3114 | static struct vm_operations_struct cifs_file_vm_ops = { | 3114 | static struct vm_operations_struct cifs_file_vm_ops = { |
3115 | .fault = filemap_fault, | 3115 | .fault = filemap_fault, |
3116 | .map_pages = filemap_map_pages, | ||
3116 | .page_mkwrite = cifs_page_mkwrite, | 3117 | .page_mkwrite = cifs_page_mkwrite, |
3117 | .remap_pages = generic_file_remap_pages, | 3118 | .remap_pages = generic_file_remap_pages, |
3118 | }; | 3119 | }; |
diff --git a/fs/coda/coda_int.h b/fs/coda/coda_int.h index b7143cf783ac..381c993b1427 100644 --- a/fs/coda/coda_int.h +++ b/fs/coda/coda_int.h | |||
@@ -10,7 +10,7 @@ extern int coda_hard; | |||
10 | extern int coda_fake_statfs; | 10 | extern int coda_fake_statfs; |
11 | 11 | ||
12 | void coda_destroy_inodecache(void); | 12 | void coda_destroy_inodecache(void); |
13 | int coda_init_inodecache(void); | 13 | int __init coda_init_inodecache(void); |
14 | int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync); | 14 | int coda_fsync(struct file *coda_file, loff_t start, loff_t end, int datasync); |
15 | void coda_sysctl_init(void); | 15 | void coda_sysctl_init(void); |
16 | void coda_sysctl_clean(void); | 16 | void coda_sysctl_clean(void); |
diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 506de34a4ef3..d9c7751f10ac 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c | |||
@@ -73,7 +73,7 @@ static void init_once(void *foo) | |||
73 | inode_init_once(&ei->vfs_inode); | 73 | inode_init_once(&ei->vfs_inode); |
74 | } | 74 | } |
75 | 75 | ||
76 | int coda_init_inodecache(void) | 76 | int __init coda_init_inodecache(void) |
77 | { | 77 | { |
78 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", | 78 | coda_inode_cachep = kmem_cache_create("coda_inode_cache", |
79 | sizeof(struct coda_inode_info), | 79 | sizeof(struct coda_inode_info), |
@@ -96,6 +96,7 @@ void coda_destroy_inodecache(void) | |||
96 | 96 | ||
97 | static int coda_remount(struct super_block *sb, int *flags, char *data) | 97 | static int coda_remount(struct super_block *sb, int *flags, char *data) |
98 | { | 98 | { |
99 | sync_filesystem(sb); | ||
99 | *flags |= MS_NOATIME; | 100 | *flags |= MS_NOATIME; |
100 | return 0; | 101 | return 0; |
101 | } | 102 | } |
@@ -250,7 +251,7 @@ static void coda_put_super(struct super_block *sb) | |||
250 | 251 | ||
251 | static void coda_evict_inode(struct inode *inode) | 252 | static void coda_evict_inode(struct inode *inode) |
252 | { | 253 | { |
253 | truncate_inode_pages(&inode->i_data, 0); | 254 | truncate_inode_pages_final(&inode->i_data); |
254 | clear_inode(inode); | 255 | clear_inode(inode); |
255 | coda_cache_clear_inode(inode); | 256 | coda_cache_clear_inode(inode); |
256 | } | 257 | } |
diff --git a/fs/compat.c b/fs/compat.c index 6af20de2c1a3..ca926ad0430c 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
@@ -72,8 +72,8 @@ int compat_printk(const char *fmt, ...) | |||
72 | * Not all architectures have sys_utime, so implement this in terms | 72 | * Not all architectures have sys_utime, so implement this in terms |
73 | * of sys_utimes. | 73 | * of sys_utimes. |
74 | */ | 74 | */ |
75 | asmlinkage long compat_sys_utime(const char __user *filename, | 75 | COMPAT_SYSCALL_DEFINE2(utime, const char __user *, filename, |
76 | struct compat_utimbuf __user *t) | 76 | struct compat_utimbuf __user *, t) |
77 | { | 77 | { |
78 | struct timespec tv[2]; | 78 | struct timespec tv[2]; |
79 | 79 | ||
@@ -87,13 +87,13 @@ asmlinkage long compat_sys_utime(const char __user *filename, | |||
87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); | 87 | return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0); |
88 | } | 88 | } |
89 | 89 | ||
90 | asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filename, struct compat_timespec __user *t, int flags) | 90 | COMPAT_SYSCALL_DEFINE4(utimensat, unsigned int, dfd, const char __user *, filename, struct compat_timespec __user *, t, int, flags) |
91 | { | 91 | { |
92 | struct timespec tv[2]; | 92 | struct timespec tv[2]; |
93 | 93 | ||
94 | if (t) { | 94 | if (t) { |
95 | if (get_compat_timespec(&tv[0], &t[0]) || | 95 | if (compat_get_timespec(&tv[0], &t[0]) || |
96 | get_compat_timespec(&tv[1], &t[1])) | 96 | compat_get_timespec(&tv[1], &t[1])) |
97 | return -EFAULT; | 97 | return -EFAULT; |
98 | 98 | ||
99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) | 99 | if (tv[0].tv_nsec == UTIME_OMIT && tv[1].tv_nsec == UTIME_OMIT) |
@@ -102,7 +102,7 @@ asmlinkage long compat_sys_utimensat(unsigned int dfd, const char __user *filena | |||
102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); | 102 | return do_utimes(dfd, filename, t ? tv : NULL, flags); |
103 | } | 103 | } |
104 | 104 | ||
105 | asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filename, struct compat_timeval __user *t) | 105 | COMPAT_SYSCALL_DEFINE3(futimesat, unsigned int, dfd, const char __user *, filename, struct compat_timeval __user *, t) |
106 | { | 106 | { |
107 | struct timespec tv[2]; | 107 | struct timespec tv[2]; |
108 | 108 | ||
@@ -121,7 +121,7 @@ asmlinkage long compat_sys_futimesat(unsigned int dfd, const char __user *filena | |||
121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); | 121 | return do_utimes(dfd, filename, t ? tv : NULL, 0); |
122 | } | 122 | } |
123 | 123 | ||
124 | asmlinkage long compat_sys_utimes(const char __user *filename, struct compat_timeval __user *t) | 124 | COMPAT_SYSCALL_DEFINE2(utimes, const char __user *, filename, struct compat_timeval __user *, t) |
125 | { | 125 | { |
126 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 126 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
127 | } | 127 | } |
@@ -159,8 +159,8 @@ static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | |||
159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; | 159 | return copy_to_user(ubuf, &tmp, sizeof(tmp)) ? -EFAULT : 0; |
160 | } | 160 | } |
161 | 161 | ||
162 | asmlinkage long compat_sys_newstat(const char __user * filename, | 162 | COMPAT_SYSCALL_DEFINE2(newstat, const char __user *, filename, |
163 | struct compat_stat __user *statbuf) | 163 | struct compat_stat __user *, statbuf) |
164 | { | 164 | { |
165 | struct kstat stat; | 165 | struct kstat stat; |
166 | int error; | 166 | int error; |
@@ -171,8 +171,8 @@ asmlinkage long compat_sys_newstat(const char __user * filename, | |||
171 | return cp_compat_stat(&stat, statbuf); | 171 | return cp_compat_stat(&stat, statbuf); |
172 | } | 172 | } |
173 | 173 | ||
174 | asmlinkage long compat_sys_newlstat(const char __user * filename, | 174 | COMPAT_SYSCALL_DEFINE2(newlstat, const char __user *, filename, |
175 | struct compat_stat __user *statbuf) | 175 | struct compat_stat __user *, statbuf) |
176 | { | 176 | { |
177 | struct kstat stat; | 177 | struct kstat stat; |
178 | int error; | 178 | int error; |
@@ -184,9 +184,9 @@ asmlinkage long compat_sys_newlstat(const char __user * filename, | |||
184 | } | 184 | } |
185 | 185 | ||
186 | #ifndef __ARCH_WANT_STAT64 | 186 | #ifndef __ARCH_WANT_STAT64 |
187 | asmlinkage long compat_sys_newfstatat(unsigned int dfd, | 187 | COMPAT_SYSCALL_DEFINE4(newfstatat, unsigned int, dfd, |
188 | const char __user *filename, | 188 | const char __user *, filename, |
189 | struct compat_stat __user *statbuf, int flag) | 189 | struct compat_stat __user *, statbuf, int, flag) |
190 | { | 190 | { |
191 | struct kstat stat; | 191 | struct kstat stat; |
192 | int error; | 192 | int error; |
@@ -198,8 +198,8 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, | |||
198 | } | 198 | } |
199 | #endif | 199 | #endif |
200 | 200 | ||
201 | asmlinkage long compat_sys_newfstat(unsigned int fd, | 201 | COMPAT_SYSCALL_DEFINE2(newfstat, unsigned int, fd, |
202 | struct compat_stat __user * statbuf) | 202 | struct compat_stat __user *, statbuf) |
203 | { | 203 | { |
204 | struct kstat stat; | 204 | struct kstat stat; |
205 | int error = vfs_fstat(fd, &stat); | 205 | int error = vfs_fstat(fd, &stat); |
@@ -247,7 +247,7 @@ static int put_compat_statfs(struct compat_statfs __user *ubuf, struct kstatfs * | |||
247 | * The following statfs calls are copies of code from fs/statfs.c and | 247 | * The following statfs calls are copies of code from fs/statfs.c and |
248 | * should be checked against those from time to time | 248 | * should be checked against those from time to time |
249 | */ | 249 | */ |
250 | asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_statfs __user *buf) | 250 | COMPAT_SYSCALL_DEFINE2(statfs, const char __user *, pathname, struct compat_statfs __user *, buf) |
251 | { | 251 | { |
252 | struct kstatfs tmp; | 252 | struct kstatfs tmp; |
253 | int error = user_statfs(pathname, &tmp); | 253 | int error = user_statfs(pathname, &tmp); |
@@ -256,7 +256,7 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta | |||
256 | return error; | 256 | return error; |
257 | } | 257 | } |
258 | 258 | ||
259 | asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user *buf) | 259 | COMPAT_SYSCALL_DEFINE2(fstatfs, unsigned int, fd, struct compat_statfs __user *, buf) |
260 | { | 260 | { |
261 | struct kstatfs tmp; | 261 | struct kstatfs tmp; |
262 | int error = fd_statfs(fd, &tmp); | 262 | int error = fd_statfs(fd, &tmp); |
@@ -298,7 +298,7 @@ static int put_compat_statfs64(struct compat_statfs64 __user *ubuf, struct kstat | |||
298 | return 0; | 298 | return 0; |
299 | } | 299 | } |
300 | 300 | ||
301 | asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t sz, struct compat_statfs64 __user *buf) | 301 | COMPAT_SYSCALL_DEFINE3(statfs64, const char __user *, pathname, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
302 | { | 302 | { |
303 | struct kstatfs tmp; | 303 | struct kstatfs tmp; |
304 | int error; | 304 | int error; |
@@ -312,7 +312,7 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s | |||
312 | return error; | 312 | return error; |
313 | } | 313 | } |
314 | 314 | ||
315 | asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct compat_statfs64 __user *buf) | 315 | COMPAT_SYSCALL_DEFINE3(fstatfs64, unsigned int, fd, compat_size_t, sz, struct compat_statfs64 __user *, buf) |
316 | { | 316 | { |
317 | struct kstatfs tmp; | 317 | struct kstatfs tmp; |
318 | int error; | 318 | int error; |
@@ -331,7 +331,7 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c | |||
331 | * Given how simple this syscall is that apporach is more maintainable | 331 | * Given how simple this syscall is that apporach is more maintainable |
332 | * than the various conversion hacks. | 332 | * than the various conversion hacks. |
333 | */ | 333 | */ |
334 | asmlinkage long compat_sys_ustat(unsigned dev, struct compat_ustat __user *u) | 334 | COMPAT_SYSCALL_DEFINE2(ustat, unsigned, dev, struct compat_ustat __user *, u) |
335 | { | 335 | { |
336 | struct compat_ustat tmp; | 336 | struct compat_ustat tmp; |
337 | struct kstatfs sbuf; | 337 | struct kstatfs sbuf; |
@@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u | |||
399 | } | 399 | } |
400 | #endif | 400 | #endif |
401 | 401 | ||
402 | asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | 402 | static unsigned int |
403 | unsigned long arg) | 403 | convert_fcntl_cmd(unsigned int cmd) |
404 | { | ||
405 | switch (cmd) { | ||
406 | case F_GETLK64: | ||
407 | return F_GETLK; | ||
408 | case F_SETLK64: | ||
409 | return F_SETLK; | ||
410 | case F_SETLKW64: | ||
411 | return F_SETLKW; | ||
412 | } | ||
413 | |||
414 | return cmd; | ||
415 | } | ||
416 | |||
417 | COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | ||
418 | compat_ulong_t, arg) | ||
404 | { | 419 | { |
405 | mm_segment_t old_fs; | 420 | mm_segment_t old_fs; |
406 | struct flock f; | 421 | struct flock f; |
407 | long ret; | 422 | long ret; |
423 | unsigned int conv_cmd; | ||
408 | 424 | ||
409 | switch (cmd) { | 425 | switch (cmd) { |
410 | case F_GETLK: | 426 | case F_GETLK: |
@@ -441,16 +457,18 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | |||
441 | case F_GETLK64: | 457 | case F_GETLK64: |
442 | case F_SETLK64: | 458 | case F_SETLK64: |
443 | case F_SETLKW64: | 459 | case F_SETLKW64: |
460 | case F_GETLKP: | ||
461 | case F_SETLKP: | ||
462 | case F_SETLKPW: | ||
444 | ret = get_compat_flock64(&f, compat_ptr(arg)); | 463 | ret = get_compat_flock64(&f, compat_ptr(arg)); |
445 | if (ret != 0) | 464 | if (ret != 0) |
446 | break; | 465 | break; |
447 | old_fs = get_fs(); | 466 | old_fs = get_fs(); |
448 | set_fs(KERNEL_DS); | 467 | set_fs(KERNEL_DS); |
449 | ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK : | 468 | conv_cmd = convert_fcntl_cmd(cmd); |
450 | ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW), | 469 | ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); |
451 | (unsigned long)&f); | ||
452 | set_fs(old_fs); | 470 | set_fs(old_fs); |
453 | if (cmd == F_GETLK64 && ret == 0) { | 471 | if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { |
454 | /* need to return lock information - see above for commentary */ | 472 | /* need to return lock information - see above for commentary */ |
455 | if (f.l_start > COMPAT_LOFF_T_MAX) | 473 | if (f.l_start > COMPAT_LOFF_T_MAX) |
456 | ret = -EOVERFLOW; | 474 | ret = -EOVERFLOW; |
@@ -468,16 +486,22 @@ asmlinkage long compat_sys_fcntl64(unsigned int fd, unsigned int cmd, | |||
468 | return ret; | 486 | return ret; |
469 | } | 487 | } |
470 | 488 | ||
471 | asmlinkage long compat_sys_fcntl(unsigned int fd, unsigned int cmd, | 489 | COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, |
472 | unsigned long arg) | 490 | compat_ulong_t, arg) |
473 | { | 491 | { |
474 | if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) | 492 | switch (cmd) { |
493 | case F_GETLK64: | ||
494 | case F_SETLK64: | ||
495 | case F_SETLKW64: | ||
496 | case F_GETLKP: | ||
497 | case F_SETLKP: | ||
498 | case F_SETLKPW: | ||
475 | return -EINVAL; | 499 | return -EINVAL; |
500 | } | ||
476 | return compat_sys_fcntl64(fd, cmd, arg); | 501 | return compat_sys_fcntl64(fd, cmd, arg); |
477 | } | 502 | } |
478 | 503 | ||
479 | asmlinkage long | 504 | COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_reqs, u32 __user *, ctx32p) |
480 | compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) | ||
481 | { | 505 | { |
482 | long ret; | 506 | long ret; |
483 | aio_context_t ctx64; | 507 | aio_context_t ctx64; |
@@ -496,32 +520,24 @@ compat_sys_io_setup(unsigned nr_reqs, u32 __user *ctx32p) | |||
496 | return ret; | 520 | return ret; |
497 | } | 521 | } |
498 | 522 | ||
499 | asmlinkage long | 523 | COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id, |
500 | compat_sys_io_getevents(aio_context_t ctx_id, | 524 | compat_long_t, min_nr, |
501 | unsigned long min_nr, | 525 | compat_long_t, nr, |
502 | unsigned long nr, | 526 | struct io_event __user *, events, |
503 | struct io_event __user *events, | 527 | struct compat_timespec __user *, timeout) |
504 | struct compat_timespec __user *timeout) | ||
505 | { | 528 | { |
506 | long ret; | ||
507 | struct timespec t; | 529 | struct timespec t; |
508 | struct timespec __user *ut = NULL; | 530 | struct timespec __user *ut = NULL; |
509 | 531 | ||
510 | ret = -EFAULT; | ||
511 | if (unlikely(!access_ok(VERIFY_WRITE, events, | ||
512 | nr * sizeof(struct io_event)))) | ||
513 | goto out; | ||
514 | if (timeout) { | 532 | if (timeout) { |
515 | if (get_compat_timespec(&t, timeout)) | 533 | if (compat_get_timespec(&t, timeout)) |
516 | goto out; | 534 | return -EFAULT; |
517 | 535 | ||
518 | ut = compat_alloc_user_space(sizeof(*ut)); | 536 | ut = compat_alloc_user_space(sizeof(*ut)); |
519 | if (copy_to_user(ut, &t, sizeof(t)) ) | 537 | if (copy_to_user(ut, &t, sizeof(t)) ) |
520 | goto out; | 538 | return -EFAULT; |
521 | } | 539 | } |
522 | ret = sys_io_getevents(ctx_id, min_nr, nr, events, ut); | 540 | return sys_io_getevents(ctx_id, min_nr, nr, events, ut); |
523 | out: | ||
524 | return ret; | ||
525 | } | 541 | } |
526 | 542 | ||
527 | /* A write operation does a read from user space and vice versa */ | 543 | /* A write operation does a read from user space and vice versa */ |
@@ -617,8 +633,8 @@ copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64) | |||
617 | 633 | ||
618 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) | 634 | #define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *)) |
619 | 635 | ||
620 | asmlinkage long | 636 | COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id, |
621 | compat_sys_io_submit(aio_context_t ctx_id, int nr, u32 __user *iocb) | 637 | int, nr, u32 __user *, iocb) |
622 | { | 638 | { |
623 | struct iocb __user * __user *iocb64; | 639 | struct iocb __user * __user *iocb64; |
624 | long ret; | 640 | long ret; |
@@ -770,10 +786,10 @@ static int do_nfs4_super_data_conv(void *raw_data) | |||
770 | #define NCPFS_NAME "ncpfs" | 786 | #define NCPFS_NAME "ncpfs" |
771 | #define NFS4_NAME "nfs4" | 787 | #define NFS4_NAME "nfs4" |
772 | 788 | ||
773 | asmlinkage long compat_sys_mount(const char __user * dev_name, | 789 | COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name, |
774 | const char __user * dir_name, | 790 | const char __user *, dir_name, |
775 | const char __user * type, unsigned long flags, | 791 | const char __user *, type, compat_ulong_t, flags, |
776 | const void __user * data) | 792 | const void __user *, data) |
777 | { | 793 | { |
778 | char *kernel_type; | 794 | char *kernel_type; |
779 | unsigned long data_page; | 795 | unsigned long data_page; |
@@ -869,8 +885,8 @@ efault: | |||
869 | return -EFAULT; | 885 | return -EFAULT; |
870 | } | 886 | } |
871 | 887 | ||
872 | asmlinkage long compat_sys_old_readdir(unsigned int fd, | 888 | COMPAT_SYSCALL_DEFINE3(old_readdir, unsigned int, fd, |
873 | struct compat_old_linux_dirent __user *dirent, unsigned int count) | 889 | struct compat_old_linux_dirent __user *, dirent, unsigned int, count) |
874 | { | 890 | { |
875 | int error; | 891 | int error; |
876 | struct fd f = fdget(fd); | 892 | struct fd f = fdget(fd); |
@@ -948,8 +964,8 @@ efault: | |||
948 | return -EFAULT; | 964 | return -EFAULT; |
949 | } | 965 | } |
950 | 966 | ||
951 | asmlinkage long compat_sys_getdents(unsigned int fd, | 967 | COMPAT_SYSCALL_DEFINE3(getdents, unsigned int, fd, |
952 | struct compat_linux_dirent __user *dirent, unsigned int count) | 968 | struct compat_linux_dirent __user *, dirent, unsigned int, count) |
953 | { | 969 | { |
954 | struct fd f; | 970 | struct fd f; |
955 | struct compat_linux_dirent __user * lastdirent; | 971 | struct compat_linux_dirent __user * lastdirent; |
@@ -981,7 +997,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
981 | return error; | 997 | return error; |
982 | } | 998 | } |
983 | 999 | ||
984 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 | 1000 | #ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 |
985 | 1001 | ||
986 | struct compat_getdents_callback64 { | 1002 | struct compat_getdents_callback64 { |
987 | struct dir_context ctx; | 1003 | struct dir_context ctx; |
@@ -1033,8 +1049,8 @@ efault: | |||
1033 | return -EFAULT; | 1049 | return -EFAULT; |
1034 | } | 1050 | } |
1035 | 1051 | ||
1036 | asmlinkage long compat_sys_getdents64(unsigned int fd, | 1052 | COMPAT_SYSCALL_DEFINE3(getdents64, unsigned int, fd, |
1037 | struct linux_dirent64 __user * dirent, unsigned int count) | 1053 | struct linux_dirent64 __user *, dirent, unsigned int, count) |
1038 | { | 1054 | { |
1039 | struct fd f; | 1055 | struct fd f; |
1040 | struct linux_dirent64 __user * lastdirent; | 1056 | struct linux_dirent64 __user * lastdirent; |
@@ -1066,7 +1082,7 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
1066 | fdput(f); | 1082 | fdput(f); |
1067 | return error; | 1083 | return error; |
1068 | } | 1084 | } |
1069 | #endif /* ! __ARCH_OMIT_COMPAT_SYS_GETDENTS64 */ | 1085 | #endif /* __ARCH_WANT_COMPAT_SYS_GETDENTS64 */ |
1070 | 1086 | ||
1071 | /* | 1087 | /* |
1072 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the | 1088 | * Exactly like fs/open.c:sys_open(), except that it doesn't set the |
@@ -1287,9 +1303,9 @@ out_nofds: | |||
1287 | return ret; | 1303 | return ret; |
1288 | } | 1304 | } |
1289 | 1305 | ||
1290 | asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp, | 1306 | COMPAT_SYSCALL_DEFINE5(select, int, n, compat_ulong_t __user *, inp, |
1291 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1307 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
1292 | struct compat_timeval __user *tvp) | 1308 | struct compat_timeval __user *, tvp) |
1293 | { | 1309 | { |
1294 | struct timespec end_time, *to = NULL; | 1310 | struct timespec end_time, *to = NULL; |
1295 | struct compat_timeval tv; | 1311 | struct compat_timeval tv; |
@@ -1320,7 +1336,7 @@ struct compat_sel_arg_struct { | |||
1320 | compat_uptr_t tvp; | 1336 | compat_uptr_t tvp; |
1321 | }; | 1337 | }; |
1322 | 1338 | ||
1323 | asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg) | 1339 | COMPAT_SYSCALL_DEFINE1(old_select, struct compat_sel_arg_struct __user *, arg) |
1324 | { | 1340 | { |
1325 | struct compat_sel_arg_struct a; | 1341 | struct compat_sel_arg_struct a; |
1326 | 1342 | ||
@@ -1381,9 +1397,9 @@ static long do_compat_pselect(int n, compat_ulong_t __user *inp, | |||
1381 | return ret; | 1397 | return ret; |
1382 | } | 1398 | } |
1383 | 1399 | ||
1384 | asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, | 1400 | COMPAT_SYSCALL_DEFINE6(pselect6, int, n, compat_ulong_t __user *, inp, |
1385 | compat_ulong_t __user *outp, compat_ulong_t __user *exp, | 1401 | compat_ulong_t __user *, outp, compat_ulong_t __user *, exp, |
1386 | struct compat_timespec __user *tsp, void __user *sig) | 1402 | struct compat_timespec __user *, tsp, void __user *, sig) |
1387 | { | 1403 | { |
1388 | compat_size_t sigsetsize = 0; | 1404 | compat_size_t sigsetsize = 0; |
1389 | compat_uptr_t up = 0; | 1405 | compat_uptr_t up = 0; |
@@ -1400,9 +1416,9 @@ asmlinkage long compat_sys_pselect6(int n, compat_ulong_t __user *inp, | |||
1400 | sigsetsize); | 1416 | sigsetsize); |
1401 | } | 1417 | } |
1402 | 1418 | ||
1403 | asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, | 1419 | COMPAT_SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, |
1404 | unsigned int nfds, struct compat_timespec __user *tsp, | 1420 | unsigned int, nfds, struct compat_timespec __user *, tsp, |
1405 | const compat_sigset_t __user *sigmask, compat_size_t sigsetsize) | 1421 | const compat_sigset_t __user *, sigmask, compat_size_t, sigsetsize) |
1406 | { | 1422 | { |
1407 | compat_sigset_t ss32; | 1423 | compat_sigset_t ss32; |
1408 | sigset_t ksigmask, sigsaved; | 1424 | sigset_t ksigmask, sigsaved; |
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c index a81147e2e4ef..4d24d17bcfc1 100644 --- a/fs/compat_binfmt_elf.c +++ b/fs/compat_binfmt_elf.c | |||
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime, | |||
88 | #define ELF_HWCAP COMPAT_ELF_HWCAP | 88 | #define ELF_HWCAP COMPAT_ELF_HWCAP |
89 | #endif | 89 | #endif |
90 | 90 | ||
91 | #ifdef COMPAT_ELF_HWCAP2 | ||
92 | #undef ELF_HWCAP2 | ||
93 | #define ELF_HWCAP2 COMPAT_ELF_HWCAP2 | ||
94 | #endif | ||
95 | |||
91 | #ifdef COMPAT_ARCH_DLINFO | 96 | #ifdef COMPAT_ARCH_DLINFO |
92 | #undef ARCH_DLINFO | 97 | #undef ARCH_DLINFO |
93 | #define ARCH_DLINFO COMPAT_ARCH_DLINFO | 98 | #define ARCH_DLINFO COMPAT_ARCH_DLINFO |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 3881610b6438..e82289047272 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
@@ -1538,9 +1538,10 @@ static int compat_ioctl_check_table(unsigned int xcmd) | |||
1538 | return ioctl_pointer[i] == xcmd; | 1538 | return ioctl_pointer[i] == xcmd; |
1539 | } | 1539 | } |
1540 | 1540 | ||
1541 | asmlinkage long compat_sys_ioctl(unsigned int fd, unsigned int cmd, | 1541 | COMPAT_SYSCALL_DEFINE3(ioctl, unsigned int, fd, unsigned int, cmd, |
1542 | unsigned long arg) | 1542 | compat_ulong_t, arg32) |
1543 | { | 1543 | { |
1544 | unsigned long arg = arg32; | ||
1544 | struct fd f = fdget(fd); | 1545 | struct fd f = fdget(fd); |
1545 | int error = -EBADF; | 1546 | int error = -EBADF; |
1546 | if (!f.file) | 1547 | if (!f.file) |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 06610cf94d57..ddcfe590b8a8 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
@@ -195,8 +195,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i | |||
195 | struct page *page = NULL; | 195 | struct page *page = NULL; |
196 | 196 | ||
197 | if (blocknr + i < devsize) { | 197 | if (blocknr + i < devsize) { |
198 | page = read_mapping_page_async(mapping, blocknr + i, | 198 | page = read_mapping_page(mapping, blocknr + i, NULL); |
199 | NULL); | ||
200 | /* synchronous error? */ | 199 | /* synchronous error? */ |
201 | if (IS_ERR(page)) | 200 | if (IS_ERR(page)) |
202 | page = NULL; | 201 | page = NULL; |
@@ -244,6 +243,7 @@ static void cramfs_kill_sb(struct super_block *sb) | |||
244 | 243 | ||
245 | static int cramfs_remount(struct super_block *sb, int *flags, char *data) | 244 | static int cramfs_remount(struct super_block *sb, int *flags, char *data) |
246 | { | 245 | { |
246 | sync_filesystem(sb); | ||
247 | *flags |= MS_RDONLY; | 247 | *flags |= MS_RDONLY; |
248 | return 0; | 248 | return 0; |
249 | } | 249 | } |
diff --git a/fs/dcache.c b/fs/dcache.c index 089f681ac952..40707d88a945 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
@@ -2483,12 +2483,14 @@ static void switch_names(struct dentry *dentry, struct dentry *target) | |||
2483 | dentry->d_name.name = dentry->d_iname; | 2483 | dentry->d_name.name = dentry->d_iname; |
2484 | } else { | 2484 | } else { |
2485 | /* | 2485 | /* |
2486 | * Both are internal. Just copy target to dentry | 2486 | * Both are internal. |
2487 | */ | 2487 | */ |
2488 | memcpy(dentry->d_iname, target->d_name.name, | 2488 | unsigned int i; |
2489 | target->d_name.len + 1); | 2489 | BUILD_BUG_ON(!IS_ALIGNED(DNAME_INLINE_LEN, sizeof(long))); |
2490 | dentry->d_name.len = target->d_name.len; | 2490 | for (i = 0; i < DNAME_INLINE_LEN / sizeof(long); i++) { |
2491 | return; | 2491 | swap(((long *) &dentry->d_iname)[i], |
2492 | ((long *) &target->d_iname)[i]); | ||
2493 | } | ||
2492 | } | 2494 | } |
2493 | } | 2495 | } |
2494 | swap(dentry->d_name.len, target->d_name.len); | 2496 | swap(dentry->d_name.len, target->d_name.len); |
@@ -2545,13 +2547,15 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, | |||
2545 | * __d_move - move a dentry | 2547 | * __d_move - move a dentry |
2546 | * @dentry: entry to move | 2548 | * @dentry: entry to move |
2547 | * @target: new dentry | 2549 | * @target: new dentry |
2550 | * @exchange: exchange the two dentries | ||
2548 | * | 2551 | * |
2549 | * Update the dcache to reflect the move of a file name. Negative | 2552 | * Update the dcache to reflect the move of a file name. Negative |
2550 | * dcache entries should not be moved in this way. Caller must hold | 2553 | * dcache entries should not be moved in this way. Caller must hold |
2551 | * rename_lock, the i_mutex of the source and target directories, | 2554 | * rename_lock, the i_mutex of the source and target directories, |
2552 | * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). | 2555 | * and the sb->s_vfs_rename_mutex if they differ. See lock_rename(). |
2553 | */ | 2556 | */ |
2554 | static void __d_move(struct dentry * dentry, struct dentry * target) | 2557 | static void __d_move(struct dentry *dentry, struct dentry *target, |
2558 | bool exchange) | ||
2555 | { | 2559 | { |
2556 | if (!dentry->d_inode) | 2560 | if (!dentry->d_inode) |
2557 | printk(KERN_WARNING "VFS: moving negative dcache entry\n"); | 2561 | printk(KERN_WARNING "VFS: moving negative dcache entry\n"); |
@@ -2573,8 +2577,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
2573 | __d_drop(dentry); | 2577 | __d_drop(dentry); |
2574 | __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); | 2578 | __d_rehash(dentry, d_hash(target->d_parent, target->d_name.hash)); |
2575 | 2579 | ||
2576 | /* Unhash the target: dput() will then get rid of it */ | 2580 | /* |
2581 | * Unhash the target (d_delete() is not usable here). If exchanging | ||
2582 | * the two dentries, then rehash onto the other's hash queue. | ||
2583 | */ | ||
2577 | __d_drop(target); | 2584 | __d_drop(target); |
2585 | if (exchange) { | ||
2586 | __d_rehash(target, | ||
2587 | d_hash(dentry->d_parent, dentry->d_name.hash)); | ||
2588 | } | ||
2578 | 2589 | ||
2579 | list_del(&dentry->d_u.d_child); | 2590 | list_del(&dentry->d_u.d_child); |
2580 | list_del(&target->d_u.d_child); | 2591 | list_del(&target->d_u.d_child); |
@@ -2601,6 +2612,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
2601 | write_seqcount_end(&dentry->d_seq); | 2612 | write_seqcount_end(&dentry->d_seq); |
2602 | 2613 | ||
2603 | dentry_unlock_parents_for_move(dentry, target); | 2614 | dentry_unlock_parents_for_move(dentry, target); |
2615 | if (exchange) | ||
2616 | fsnotify_d_move(target); | ||
2604 | spin_unlock(&target->d_lock); | 2617 | spin_unlock(&target->d_lock); |
2605 | fsnotify_d_move(dentry); | 2618 | fsnotify_d_move(dentry); |
2606 | spin_unlock(&dentry->d_lock); | 2619 | spin_unlock(&dentry->d_lock); |
@@ -2618,11 +2631,30 @@ static void __d_move(struct dentry * dentry, struct dentry * target) | |||
2618 | void d_move(struct dentry *dentry, struct dentry *target) | 2631 | void d_move(struct dentry *dentry, struct dentry *target) |
2619 | { | 2632 | { |
2620 | write_seqlock(&rename_lock); | 2633 | write_seqlock(&rename_lock); |
2621 | __d_move(dentry, target); | 2634 | __d_move(dentry, target, false); |
2622 | write_sequnlock(&rename_lock); | 2635 | write_sequnlock(&rename_lock); |
2623 | } | 2636 | } |
2624 | EXPORT_SYMBOL(d_move); | 2637 | EXPORT_SYMBOL(d_move); |
2625 | 2638 | ||
2639 | /* | ||
2640 | * d_exchange - exchange two dentries | ||
2641 | * @dentry1: first dentry | ||
2642 | * @dentry2: second dentry | ||
2643 | */ | ||
2644 | void d_exchange(struct dentry *dentry1, struct dentry *dentry2) | ||
2645 | { | ||
2646 | write_seqlock(&rename_lock); | ||
2647 | |||
2648 | WARN_ON(!dentry1->d_inode); | ||
2649 | WARN_ON(!dentry2->d_inode); | ||
2650 | WARN_ON(IS_ROOT(dentry1)); | ||
2651 | WARN_ON(IS_ROOT(dentry2)); | ||
2652 | |||
2653 | __d_move(dentry1, dentry2, true); | ||
2654 | |||
2655 | write_sequnlock(&rename_lock); | ||
2656 | } | ||
2657 | |||
2626 | /** | 2658 | /** |
2627 | * d_ancestor - search for an ancestor | 2659 | * d_ancestor - search for an ancestor |
2628 | * @p1: ancestor dentry | 2660 | * @p1: ancestor dentry |
@@ -2670,7 +2702,7 @@ static struct dentry *__d_unalias(struct inode *inode, | |||
2670 | m2 = &alias->d_parent->d_inode->i_mutex; | 2702 | m2 = &alias->d_parent->d_inode->i_mutex; |
2671 | out_unalias: | 2703 | out_unalias: |
2672 | if (likely(!d_mountpoint(alias))) { | 2704 | if (likely(!d_mountpoint(alias))) { |
2673 | __d_move(alias, dentry); | 2705 | __d_move(alias, dentry, false); |
2674 | ret = alias; | 2706 | ret = alias; |
2675 | } | 2707 | } |
2676 | out_err: | 2708 | out_err: |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 9c0444cccbe1..8c41b52da358 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
@@ -218,6 +218,7 @@ static int debugfs_remount(struct super_block *sb, int *flags, char *data) | |||
218 | int err; | 218 | int err; |
219 | struct debugfs_fs_info *fsi = sb->s_fs_info; | 219 | struct debugfs_fs_info *fsi = sb->s_fs_info; |
220 | 220 | ||
221 | sync_filesystem(sb); | ||
221 | err = debugfs_parse_options(data, &fsi->mount_opts); | 222 | err = debugfs_parse_options(data, &fsi->mount_opts); |
222 | if (err) | 223 | if (err) |
223 | goto fail; | 224 | goto fail; |
@@ -358,7 +359,7 @@ exit: | |||
358 | * @name: a pointer to a string containing the name of the file to create. | 359 | * @name: a pointer to a string containing the name of the file to create. |
359 | * @mode: the permission that the file should have. | 360 | * @mode: the permission that the file should have. |
360 | * @parent: a pointer to the parent dentry for this file. This should be a | 361 | * @parent: a pointer to the parent dentry for this file. This should be a |
361 | * directory dentry if set. If this paramater is NULL, then the | 362 | * directory dentry if set. If this parameter is NULL, then the |
362 | * file will be created in the root of the debugfs filesystem. | 363 | * file will be created in the root of the debugfs filesystem. |
363 | * @data: a pointer to something that the caller will want to get to later | 364 | * @data: a pointer to something that the caller will want to get to later |
364 | * on. The inode.i_private pointer will point to this value on | 365 | * on. The inode.i_private pointer will point to this value on |
@@ -400,7 +401,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_file); | |||
400 | * @name: a pointer to a string containing the name of the directory to | 401 | * @name: a pointer to a string containing the name of the directory to |
401 | * create. | 402 | * create. |
402 | * @parent: a pointer to the parent dentry for this file. This should be a | 403 | * @parent: a pointer to the parent dentry for this file. This should be a |
403 | * directory dentry if set. If this paramater is NULL, then the | 404 | * directory dentry if set. If this parameter is NULL, then the |
404 | * directory will be created in the root of the debugfs filesystem. | 405 | * directory will be created in the root of the debugfs filesystem. |
405 | * | 406 | * |
406 | * This function creates a directory in debugfs with the given name. | 407 | * This function creates a directory in debugfs with the given name. |
@@ -425,7 +426,7 @@ EXPORT_SYMBOL_GPL(debugfs_create_dir); | |||
425 | * @name: a pointer to a string containing the name of the symbolic link to | 426 | * @name: a pointer to a string containing the name of the symbolic link to |
426 | * create. | 427 | * create. |
427 | * @parent: a pointer to the parent dentry for this symbolic link. This | 428 | * @parent: a pointer to the parent dentry for this symbolic link. This |
428 | * should be a directory dentry if set. If this paramater is NULL, | 429 | * should be a directory dentry if set. If this parameter is NULL, |
429 | * then the symbolic link will be created in the root of the debugfs | 430 | * then the symbolic link will be created in the root of the debugfs |
430 | * filesystem. | 431 | * filesystem. |
431 | * @target: a pointer to a string containing the path to the target of the | 432 | * @target: a pointer to a string containing the path to the target of the |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index a726b9f29cb7..c71038079b47 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
@@ -313,6 +313,7 @@ static int devpts_remount(struct super_block *sb, int *flags, char *data) | |||
313 | struct pts_fs_info *fsi = DEVPTS_SB(sb); | 313 | struct pts_fs_info *fsi = DEVPTS_SB(sb); |
314 | struct pts_mount_opts *opts = &fsi->mount_opts; | 314 | struct pts_mount_opts *opts = &fsi->mount_opts; |
315 | 315 | ||
316 | sync_filesystem(sb); | ||
316 | err = parse_mount_options(data, PARSE_REMOUNT, opts); | 317 | err = parse_mount_options(data, PARSE_REMOUNT, opts); |
317 | 318 | ||
318 | /* | 319 | /* |
diff --git a/fs/direct-io.c b/fs/direct-io.c index 160a5489a939..31ba0935e32e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -664,7 +664,6 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio, | |||
664 | goto out; | 664 | goto out; |
665 | sector = start_sector << (sdio->blkbits - 9); | 665 | sector = start_sector << (sdio->blkbits - 9); |
666 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); | 666 | nr_pages = min(sdio->pages_in_io, bio_get_nr_vecs(map_bh->b_bdev)); |
667 | nr_pages = min(nr_pages, BIO_MAX_PAGES); | ||
668 | BUG_ON(nr_pages <= 0); | 667 | BUG_ON(nr_pages <= 0); |
669 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); | 668 | dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages); |
670 | sdio->boundary = 0; | 669 | sdio->boundary = 0; |
@@ -1194,13 +1193,19 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1194 | } | 1193 | } |
1195 | 1194 | ||
1196 | /* | 1195 | /* |
1197 | * For file extending writes updating i_size before data | 1196 | * For file extending writes updating i_size before data writeouts |
1198 | * writeouts complete can expose uninitialized blocks. So | 1197 | * complete can expose uninitialized blocks in dumb filesystems. |
1199 | * even for AIO, we need to wait for i/o to complete before | 1198 | * In that case we need to wait for I/O completion even if asked |
1200 | * returning in this case. | 1199 | * for an asynchronous write. |
1201 | */ | 1200 | */ |
1202 | dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && | 1201 | if (is_sync_kiocb(iocb)) |
1203 | (end > i_size_read(inode))); | 1202 | dio->is_async = false; |
1203 | else if (!(dio->flags & DIO_ASYNC_EXTEND) && | ||
1204 | (rw & WRITE) && end > i_size_read(inode)) | ||
1205 | dio->is_async = false; | ||
1206 | else | ||
1207 | dio->is_async = true; | ||
1208 | |||
1204 | dio->inode = inode; | 1209 | dio->inode = inode; |
1205 | dio->rw = rw; | 1210 | dio->rw = rw; |
1206 | 1211 | ||
diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 0e90f0c91b93..dcea1e37a1b7 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include "dlm_internal.h" | 14 | #include "dlm_internal.h" |
15 | #include "lock.h" | 15 | #include "lock.h" |
16 | #include "user.h" | 16 | #include "user.h" |
17 | #include "ast.h" | ||
17 | 18 | ||
18 | static uint64_t dlm_cb_seq; | 19 | static uint64_t dlm_cb_seq; |
19 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); | 20 | static DEFINE_SPINLOCK(dlm_cb_seq_spin); |
@@ -308,6 +309,6 @@ void dlm_callback_resume(struct dlm_ls *ls) | |||
308 | mutex_unlock(&ls->ls_cb_mutex); | 309 | mutex_unlock(&ls->ls_cb_mutex); |
309 | 310 | ||
310 | if (count) | 311 | if (count) |
311 | log_debug(ls, "dlm_callback_resume %d", count); | 312 | log_rinfo(ls, "dlm_callback_resume %d", count); |
312 | } | 313 | } |
313 | 314 | ||
diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c index 278a75cda446..d975851a7e1e 100644 --- a/fs/dlm/dir.c +++ b/fs/dlm/dir.c | |||
@@ -68,7 +68,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
68 | uint16_t namelen; | 68 | uint16_t namelen; |
69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; | 69 | unsigned int count = 0, count_match = 0, count_bad = 0, count_add = 0; |
70 | 70 | ||
71 | log_debug(ls, "dlm_recover_directory"); | 71 | log_rinfo(ls, "dlm_recover_directory"); |
72 | 72 | ||
73 | if (dlm_no_directory(ls)) | 73 | if (dlm_no_directory(ls)) |
74 | goto out_status; | 74 | goto out_status; |
@@ -189,7 +189,7 @@ int dlm_recover_directory(struct dlm_ls *ls) | |||
189 | error = 0; | 189 | error = 0; |
190 | dlm_set_recover_status(ls, DLM_RS_DIR); | 190 | dlm_set_recover_status(ls, DLM_RS_DIR); |
191 | 191 | ||
192 | log_debug(ls, "dlm_recover_directory %u in %u new", | 192 | log_rinfo(ls, "dlm_recover_directory %u in %u new", |
193 | count, count_add); | 193 | count, count_add); |
194 | out_free: | 194 | out_free: |
195 | kfree(last_name); | 195 | kfree(last_name); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index e7665c31f7b1..5eff6ea3e27f 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
@@ -65,6 +65,8 @@ struct dlm_mhandle; | |||
65 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) | 65 | printk(KERN_ERR "dlm: "fmt"\n" , ##args) |
66 | #define log_error(ls, fmt, args...) \ | 66 | #define log_error(ls, fmt, args...) \ |
67 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) | 67 | printk(KERN_ERR "dlm: %s: " fmt "\n", (ls)->ls_name , ##args) |
68 | #define log_rinfo(ls, fmt, args...) \ | ||
69 | printk(KERN_INFO "dlm: %s: " fmt "\n", (ls)->ls_name , ##args); | ||
68 | 70 | ||
69 | #define log_debug(ls, fmt, args...) \ | 71 | #define log_debug(ls, fmt, args...) \ |
70 | do { \ | 72 | do { \ |
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index e223a911a834..83f3d5520307 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
@@ -687,6 +687,7 @@ static int find_rsb_dir(struct dlm_ls *ls, char *name, int len, | |||
687 | log_error(ls, "find_rsb new from_other %d dir %d our %d %s", | 687 | log_error(ls, "find_rsb new from_other %d dir %d our %d %s", |
688 | from_nodeid, dir_nodeid, our_nodeid, r->res_name); | 688 | from_nodeid, dir_nodeid, our_nodeid, r->res_name); |
689 | dlm_free_rsb(r); | 689 | dlm_free_rsb(r); |
690 | r = NULL; | ||
690 | error = -ENOTBLK; | 691 | error = -ENOTBLK; |
691 | goto out_unlock; | 692 | goto out_unlock; |
692 | } | 693 | } |
@@ -5462,7 +5463,7 @@ void dlm_recover_purge(struct dlm_ls *ls) | |||
5462 | up_write(&ls->ls_root_sem); | 5463 | up_write(&ls->ls_root_sem); |
5463 | 5464 | ||
5464 | if (lkb_count) | 5465 | if (lkb_count) |
5465 | log_debug(ls, "dlm_recover_purge %u locks for %u nodes", | 5466 | log_rinfo(ls, "dlm_recover_purge %u locks for %u nodes", |
5466 | lkb_count, nodes_count); | 5467 | lkb_count, nodes_count); |
5467 | } | 5468 | } |
5468 | 5469 | ||
@@ -5536,7 +5537,7 @@ void dlm_recover_grant(struct dlm_ls *ls) | |||
5536 | } | 5537 | } |
5537 | 5538 | ||
5538 | if (lkb_count) | 5539 | if (lkb_count) |
5539 | log_debug(ls, "dlm_recover_grant %u locks on %u resources", | 5540 | log_rinfo(ls, "dlm_recover_grant %u locks on %u resources", |
5540 | lkb_count, rsb_count); | 5541 | lkb_count, rsb_count); |
5541 | } | 5542 | } |
5542 | 5543 | ||
@@ -5695,7 +5696,7 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
5695 | put_rsb(r); | 5696 | put_rsb(r); |
5696 | out: | 5697 | out: |
5697 | if (error && error != -EEXIST) | 5698 | if (error && error != -EEXIST) |
5698 | log_debug(ls, "dlm_recover_master_copy remote %d %x error %d", | 5699 | log_rinfo(ls, "dlm_recover_master_copy remote %d %x error %d", |
5699 | from_nodeid, remid, error); | 5700 | from_nodeid, remid, error); |
5700 | rl->rl_result = cpu_to_le32(error); | 5701 | rl->rl_result = cpu_to_le32(error); |
5701 | return error; | 5702 | return error; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index d5abafd56a6d..04d6398c1f1c 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
@@ -190,7 +190,7 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
190 | else | 190 | else |
191 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); | 191 | kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE); |
192 | 192 | ||
193 | log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving"); | 193 | log_rinfo(ls, "%s the lockspace group...", in ? "joining" : "leaving"); |
194 | 194 | ||
195 | /* dlm_controld will see the uevent, do the necessary group management | 195 | /* dlm_controld will see the uevent, do the necessary group management |
196 | and then write to sysfs to wake us */ | 196 | and then write to sysfs to wake us */ |
@@ -198,7 +198,7 @@ static int do_uevent(struct dlm_ls *ls, int in) | |||
198 | error = wait_event_interruptible(ls->ls_uevent_wait, | 198 | error = wait_event_interruptible(ls->ls_uevent_wait, |
199 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); | 199 | test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags)); |
200 | 200 | ||
201 | log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result); | 201 | log_rinfo(ls, "group event done %d %d", error, ls->ls_uevent_result); |
202 | 202 | ||
203 | if (error) | 203 | if (error) |
204 | goto out; | 204 | goto out; |
@@ -640,7 +640,7 @@ static int new_lockspace(const char *name, const char *cluster, | |||
640 | 640 | ||
641 | dlm_create_debug_file(ls); | 641 | dlm_create_debug_file(ls); |
642 | 642 | ||
643 | log_debug(ls, "join complete"); | 643 | log_rinfo(ls, "join complete"); |
644 | *lockspace = ls; | 644 | *lockspace = ls; |
645 | return 0; | 645 | return 0; |
646 | 646 | ||
@@ -835,7 +835,7 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
835 | dlm_clear_members(ls); | 835 | dlm_clear_members(ls); |
836 | dlm_clear_members_gone(ls); | 836 | dlm_clear_members_gone(ls); |
837 | kfree(ls->ls_node_array); | 837 | kfree(ls->ls_node_array); |
838 | log_debug(ls, "release_lockspace final free"); | 838 | log_rinfo(ls, "release_lockspace final free"); |
839 | kobject_put(&ls->ls_kobj); | 839 | kobject_put(&ls->ls_kobj); |
840 | /* The ls structure will be freed when the kobject is done with */ | 840 | /* The ls structure will be freed when the kobject is done with */ |
841 | 841 | ||
diff --git a/fs/dlm/member.c b/fs/dlm/member.c index 476557b54921..9c47f1c14a8b 100644 --- a/fs/dlm/member.c +++ b/fs/dlm/member.c | |||
@@ -60,18 +60,15 @@ void dlm_slots_copy_out(struct dlm_ls *ls, struct dlm_rcom *rc) | |||
60 | 60 | ||
61 | #define SLOT_DEBUG_LINE 128 | 61 | #define SLOT_DEBUG_LINE 128 |
62 | 62 | ||
63 | static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | 63 | static void log_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, |
64 | struct rcom_slot *ro0, struct dlm_slot *array, | 64 | struct rcom_slot *ro0, struct dlm_slot *array, |
65 | int array_size) | 65 | int array_size) |
66 | { | 66 | { |
67 | char line[SLOT_DEBUG_LINE]; | 67 | char line[SLOT_DEBUG_LINE]; |
68 | int len = SLOT_DEBUG_LINE - 1; | 68 | int len = SLOT_DEBUG_LINE - 1; |
69 | int pos = 0; | 69 | int pos = 0; |
70 | int ret, i; | 70 | int ret, i; |
71 | 71 | ||
72 | if (!dlm_config.ci_log_debug) | ||
73 | return; | ||
74 | |||
75 | memset(line, 0, sizeof(line)); | 72 | memset(line, 0, sizeof(line)); |
76 | 73 | ||
77 | if (array) { | 74 | if (array) { |
@@ -95,7 +92,7 @@ static void log_debug_slots(struct dlm_ls *ls, uint32_t gen, int num_slots, | |||
95 | } | 92 | } |
96 | } | 93 | } |
97 | 94 | ||
98 | log_debug(ls, "generation %u slots %d%s", gen, num_slots, line); | 95 | log_rinfo(ls, "generation %u slots %d%s", gen, num_slots, line); |
99 | } | 96 | } |
100 | 97 | ||
101 | int dlm_slots_copy_in(struct dlm_ls *ls) | 98 | int dlm_slots_copy_in(struct dlm_ls *ls) |
@@ -129,7 +126,7 @@ int dlm_slots_copy_in(struct dlm_ls *ls) | |||
129 | ro->ro_slot = le16_to_cpu(ro->ro_slot); | 126 | ro->ro_slot = le16_to_cpu(ro->ro_slot); |
130 | } | 127 | } |
131 | 128 | ||
132 | log_debug_slots(ls, gen, num_slots, ro0, NULL, 0); | 129 | log_slots(ls, gen, num_slots, ro0, NULL, 0); |
133 | 130 | ||
134 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 131 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
135 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { | 132 | for (i = 0, ro = ro0; i < num_slots; i++, ro++) { |
@@ -274,7 +271,7 @@ int dlm_slots_assign(struct dlm_ls *ls, int *num_slots, int *slots_size, | |||
274 | 271 | ||
275 | gen++; | 272 | gen++; |
276 | 273 | ||
277 | log_debug_slots(ls, gen, num, NULL, array, array_size); | 274 | log_slots(ls, gen, num, NULL, array, array_size); |
278 | 275 | ||
279 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - | 276 | max_slots = (dlm_config.ci_buffer_size - sizeof(struct dlm_rcom) - |
280 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); | 277 | sizeof(struct rcom_config)) / sizeof(struct rcom_slot); |
@@ -447,7 +444,7 @@ static int ping_members(struct dlm_ls *ls) | |||
447 | break; | 444 | break; |
448 | } | 445 | } |
449 | if (error) | 446 | if (error) |
450 | log_debug(ls, "ping_members aborted %d last nodeid %d", | 447 | log_rinfo(ls, "ping_members aborted %d last nodeid %d", |
451 | error, ls->ls_recover_nodeid); | 448 | error, ls->ls_recover_nodeid); |
452 | return error; | 449 | return error; |
453 | } | 450 | } |
@@ -539,7 +536,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
539 | count as a negative change so the "neg" recovery steps will happen */ | 536 | count as a negative change so the "neg" recovery steps will happen */ |
540 | 537 | ||
541 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { | 538 | list_for_each_entry(memb, &ls->ls_nodes_gone, list) { |
542 | log_debug(ls, "prev removed member %d", memb->nodeid); | 539 | log_rinfo(ls, "prev removed member %d", memb->nodeid); |
543 | neg++; | 540 | neg++; |
544 | } | 541 | } |
545 | 542 | ||
@@ -551,10 +548,10 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
551 | continue; | 548 | continue; |
552 | 549 | ||
553 | if (!node) { | 550 | if (!node) { |
554 | log_debug(ls, "remove member %d", memb->nodeid); | 551 | log_rinfo(ls, "remove member %d", memb->nodeid); |
555 | } else { | 552 | } else { |
556 | /* removed and re-added */ | 553 | /* removed and re-added */ |
557 | log_debug(ls, "remove member %d comm_seq %u %u", | 554 | log_rinfo(ls, "remove member %d comm_seq %u %u", |
558 | memb->nodeid, memb->comm_seq, node->comm_seq); | 555 | memb->nodeid, memb->comm_seq, node->comm_seq); |
559 | } | 556 | } |
560 | 557 | ||
@@ -571,7 +568,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
571 | if (dlm_is_member(ls, node->nodeid)) | 568 | if (dlm_is_member(ls, node->nodeid)) |
572 | continue; | 569 | continue; |
573 | dlm_add_member(ls, node); | 570 | dlm_add_member(ls, node); |
574 | log_debug(ls, "add member %d", node->nodeid); | 571 | log_rinfo(ls, "add member %d", node->nodeid); |
575 | } | 572 | } |
576 | 573 | ||
577 | list_for_each_entry(memb, &ls->ls_nodes, list) { | 574 | list_for_each_entry(memb, &ls->ls_nodes, list) { |
@@ -591,7 +588,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out) | |||
591 | complete(&ls->ls_members_done); | 588 | complete(&ls->ls_members_done); |
592 | } | 589 | } |
593 | 590 | ||
594 | log_debug(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); | 591 | log_rinfo(ls, "dlm_recover_members %d nodes", ls->ls_num_nodes); |
595 | return error; | 592 | return error; |
596 | } | 593 | } |
597 | 594 | ||
diff --git a/fs/dlm/recover.c b/fs/dlm/recover.c index a6bc63f6e31b..eaea789bf97d 100644 --- a/fs/dlm/recover.c +++ b/fs/dlm/recover.c | |||
@@ -526,7 +526,7 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
526 | int nodir = dlm_no_directory(ls); | 526 | int nodir = dlm_no_directory(ls); |
527 | int error; | 527 | int error; |
528 | 528 | ||
529 | log_debug(ls, "dlm_recover_masters"); | 529 | log_rinfo(ls, "dlm_recover_masters"); |
530 | 530 | ||
531 | down_read(&ls->ls_root_sem); | 531 | down_read(&ls->ls_root_sem); |
532 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { | 532 | list_for_each_entry(r, &ls->ls_root_list, res_root_list) { |
@@ -552,7 +552,7 @@ int dlm_recover_masters(struct dlm_ls *ls) | |||
552 | } | 552 | } |
553 | up_read(&ls->ls_root_sem); | 553 | up_read(&ls->ls_root_sem); |
554 | 554 | ||
555 | log_debug(ls, "dlm_recover_masters %u of %u", count, total); | 555 | log_rinfo(ls, "dlm_recover_masters %u of %u", count, total); |
556 | 556 | ||
557 | error = dlm_wait_function(ls, &recover_idr_empty); | 557 | error = dlm_wait_function(ls, &recover_idr_empty); |
558 | out: | 558 | out: |
@@ -685,7 +685,7 @@ int dlm_recover_locks(struct dlm_ls *ls) | |||
685 | } | 685 | } |
686 | up_read(&ls->ls_root_sem); | 686 | up_read(&ls->ls_root_sem); |
687 | 687 | ||
688 | log_debug(ls, "dlm_recover_locks %d out", count); | 688 | log_rinfo(ls, "dlm_recover_locks %d out", count); |
689 | 689 | ||
690 | error = dlm_wait_function(ls, &recover_list_empty); | 690 | error = dlm_wait_function(ls, &recover_list_empty); |
691 | out: | 691 | out: |
@@ -883,7 +883,7 @@ void dlm_recover_rsbs(struct dlm_ls *ls) | |||
883 | up_read(&ls->ls_root_sem); | 883 | up_read(&ls->ls_root_sem); |
884 | 884 | ||
885 | if (count) | 885 | if (count) |
886 | log_debug(ls, "dlm_recover_rsbs %d done", count); | 886 | log_rinfo(ls, "dlm_recover_rsbs %d done", count); |
887 | } | 887 | } |
888 | 888 | ||
889 | /* Create a single list of all root rsb's to be used during recovery */ | 889 | /* Create a single list of all root rsb's to be used during recovery */ |
@@ -950,6 +950,6 @@ void dlm_clear_toss(struct dlm_ls *ls) | |||
950 | } | 950 | } |
951 | 951 | ||
952 | if (count) | 952 | if (count) |
953 | log_debug(ls, "dlm_clear_toss %u done", count); | 953 | log_rinfo(ls, "dlm_clear_toss %u done", count); |
954 | } | 954 | } |
955 | 955 | ||
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c index 32f9f8926ec3..6859b4bf971e 100644 --- a/fs/dlm/recoverd.c +++ b/fs/dlm/recoverd.c | |||
@@ -55,7 +55,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
55 | unsigned long start; | 55 | unsigned long start; |
56 | int error, neg = 0; | 56 | int error, neg = 0; |
57 | 57 | ||
58 | log_debug(ls, "dlm_recover %llu", (unsigned long long)rv->seq); | 58 | log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq); |
59 | 59 | ||
60 | mutex_lock(&ls->ls_recoverd_active); | 60 | mutex_lock(&ls->ls_recoverd_active); |
61 | 61 | ||
@@ -76,7 +76,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
76 | 76 | ||
77 | error = dlm_recover_members(ls, rv, &neg); | 77 | error = dlm_recover_members(ls, rv, &neg); |
78 | if (error) { | 78 | if (error) { |
79 | log_debug(ls, "dlm_recover_members error %d", error); | 79 | log_rinfo(ls, "dlm_recover_members error %d", error); |
80 | goto fail; | 80 | goto fail; |
81 | } | 81 | } |
82 | 82 | ||
@@ -90,7 +90,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
90 | 90 | ||
91 | error = dlm_recover_members_wait(ls); | 91 | error = dlm_recover_members_wait(ls); |
92 | if (error) { | 92 | if (error) { |
93 | log_debug(ls, "dlm_recover_members_wait error %d", error); | 93 | log_rinfo(ls, "dlm_recover_members_wait error %d", error); |
94 | goto fail; | 94 | goto fail; |
95 | } | 95 | } |
96 | 96 | ||
@@ -103,7 +103,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
103 | 103 | ||
104 | error = dlm_recover_directory(ls); | 104 | error = dlm_recover_directory(ls); |
105 | if (error) { | 105 | if (error) { |
106 | log_debug(ls, "dlm_recover_directory error %d", error); | 106 | log_rinfo(ls, "dlm_recover_directory error %d", error); |
107 | goto fail; | 107 | goto fail; |
108 | } | 108 | } |
109 | 109 | ||
@@ -111,11 +111,11 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
111 | 111 | ||
112 | error = dlm_recover_directory_wait(ls); | 112 | error = dlm_recover_directory_wait(ls); |
113 | if (error) { | 113 | if (error) { |
114 | log_debug(ls, "dlm_recover_directory_wait error %d", error); | 114 | log_rinfo(ls, "dlm_recover_directory_wait error %d", error); |
115 | goto fail; | 115 | goto fail; |
116 | } | 116 | } |
117 | 117 | ||
118 | log_debug(ls, "dlm_recover_directory %u out %u messages", | 118 | log_rinfo(ls, "dlm_recover_directory %u out %u messages", |
119 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); | 119 | ls->ls_recover_dir_sent_res, ls->ls_recover_dir_sent_msg); |
120 | 120 | ||
121 | /* | 121 | /* |
@@ -144,7 +144,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
144 | 144 | ||
145 | error = dlm_recover_masters(ls); | 145 | error = dlm_recover_masters(ls); |
146 | if (error) { | 146 | if (error) { |
147 | log_debug(ls, "dlm_recover_masters error %d", error); | 147 | log_rinfo(ls, "dlm_recover_masters error %d", error); |
148 | goto fail; | 148 | goto fail; |
149 | } | 149 | } |
150 | 150 | ||
@@ -154,7 +154,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
154 | 154 | ||
155 | error = dlm_recover_locks(ls); | 155 | error = dlm_recover_locks(ls); |
156 | if (error) { | 156 | if (error) { |
157 | log_debug(ls, "dlm_recover_locks error %d", error); | 157 | log_rinfo(ls, "dlm_recover_locks error %d", error); |
158 | goto fail; | 158 | goto fail; |
159 | } | 159 | } |
160 | 160 | ||
@@ -162,11 +162,11 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
162 | 162 | ||
163 | error = dlm_recover_locks_wait(ls); | 163 | error = dlm_recover_locks_wait(ls); |
164 | if (error) { | 164 | if (error) { |
165 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 165 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); |
166 | goto fail; | 166 | goto fail; |
167 | } | 167 | } |
168 | 168 | ||
169 | log_debug(ls, "dlm_recover_locks %u in", | 169 | log_rinfo(ls, "dlm_recover_locks %u in", |
170 | ls->ls_recover_locks_in); | 170 | ls->ls_recover_locks_in); |
171 | 171 | ||
172 | /* | 172 | /* |
@@ -186,7 +186,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
186 | 186 | ||
187 | error = dlm_recover_locks_wait(ls); | 187 | error = dlm_recover_locks_wait(ls); |
188 | if (error) { | 188 | if (error) { |
189 | log_debug(ls, "dlm_recover_locks_wait error %d", error); | 189 | log_rinfo(ls, "dlm_recover_locks_wait error %d", error); |
190 | goto fail; | 190 | goto fail; |
191 | } | 191 | } |
192 | } | 192 | } |
@@ -205,7 +205,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
205 | 205 | ||
206 | error = dlm_recover_done_wait(ls); | 206 | error = dlm_recover_done_wait(ls); |
207 | if (error) { | 207 | if (error) { |
208 | log_debug(ls, "dlm_recover_done_wait error %d", error); | 208 | log_rinfo(ls, "dlm_recover_done_wait error %d", error); |
209 | goto fail; | 209 | goto fail; |
210 | } | 210 | } |
211 | 211 | ||
@@ -217,25 +217,25 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
217 | 217 | ||
218 | error = enable_locking(ls, rv->seq); | 218 | error = enable_locking(ls, rv->seq); |
219 | if (error) { | 219 | if (error) { |
220 | log_debug(ls, "enable_locking error %d", error); | 220 | log_rinfo(ls, "enable_locking error %d", error); |
221 | goto fail; | 221 | goto fail; |
222 | } | 222 | } |
223 | 223 | ||
224 | error = dlm_process_requestqueue(ls); | 224 | error = dlm_process_requestqueue(ls); |
225 | if (error) { | 225 | if (error) { |
226 | log_debug(ls, "dlm_process_requestqueue error %d", error); | 226 | log_rinfo(ls, "dlm_process_requestqueue error %d", error); |
227 | goto fail; | 227 | goto fail; |
228 | } | 228 | } |
229 | 229 | ||
230 | error = dlm_recover_waiters_post(ls); | 230 | error = dlm_recover_waiters_post(ls); |
231 | if (error) { | 231 | if (error) { |
232 | log_debug(ls, "dlm_recover_waiters_post error %d", error); | 232 | log_rinfo(ls, "dlm_recover_waiters_post error %d", error); |
233 | goto fail; | 233 | goto fail; |
234 | } | 234 | } |
235 | 235 | ||
236 | dlm_recover_grant(ls); | 236 | dlm_recover_grant(ls); |
237 | 237 | ||
238 | log_debug(ls, "dlm_recover %llu generation %u done: %u ms", | 238 | log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms", |
239 | (unsigned long long)rv->seq, ls->ls_generation, | 239 | (unsigned long long)rv->seq, ls->ls_generation, |
240 | jiffies_to_msecs(jiffies - start)); | 240 | jiffies_to_msecs(jiffies - start)); |
241 | mutex_unlock(&ls->ls_recoverd_active); | 241 | mutex_unlock(&ls->ls_recoverd_active); |
@@ -245,7 +245,7 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv) | |||
245 | 245 | ||
246 | fail: | 246 | fail: |
247 | dlm_release_root_list(ls); | 247 | dlm_release_root_list(ls); |
248 | log_debug(ls, "dlm_recover %llu error %d", | 248 | log_rinfo(ls, "dlm_recover %llu error %d", |
249 | (unsigned long long)rv->seq, error); | 249 | (unsigned long long)rv->seq, error); |
250 | mutex_unlock(&ls->ls_recoverd_active); | 250 | mutex_unlock(&ls->ls_recoverd_active); |
251 | return error; | 251 | return error; |
diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 9fd702f5bfb2..9280202e488c 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c | |||
@@ -59,10 +59,22 @@ int drop_caches_sysctl_handler(ctl_table *table, int write, | |||
59 | if (ret) | 59 | if (ret) |
60 | return ret; | 60 | return ret; |
61 | if (write) { | 61 | if (write) { |
62 | if (sysctl_drop_caches & 1) | 62 | static int stfu; |
63 | |||
64 | if (sysctl_drop_caches & 1) { | ||
63 | iterate_supers(drop_pagecache_sb, NULL); | 65 | iterate_supers(drop_pagecache_sb, NULL); |
64 | if (sysctl_drop_caches & 2) | 66 | count_vm_event(DROP_PAGECACHE); |
67 | } | ||
68 | if (sysctl_drop_caches & 2) { | ||
65 | drop_slab(); | 69 | drop_slab(); |
70 | count_vm_event(DROP_SLAB); | ||
71 | } | ||
72 | if (!stfu) { | ||
73 | pr_info("%s (%d): drop_caches: %d\n", | ||
74 | current->comm, task_pid_nr(current), | ||
75 | sysctl_drop_caches); | ||
76 | } | ||
77 | stfu |= sysctl_drop_caches & 4; | ||
66 | } | 78 | } |
67 | return 0; | 79 | return 0; |
68 | } | 80 | } |
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index b167ca48b8ee..d4a9431ec73c 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c | |||
@@ -641,7 +641,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
641 | } | 641 | } |
642 | rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, | 642 | rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry, |
643 | lower_new_dir_dentry->d_inode, lower_new_dentry, | 643 | lower_new_dir_dentry->d_inode, lower_new_dentry, |
644 | NULL); | 644 | NULL, 0); |
645 | if (rc) | 645 | if (rc) |
646 | goto out_lock; | 646 | goto out_lock; |
647 | if (target_inode) | 647 | if (target_inode) |
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c index e879cf8ff0b1..afa1b81c3418 100644 --- a/fs/ecryptfs/super.c +++ b/fs/ecryptfs/super.c | |||
@@ -132,7 +132,7 @@ static int ecryptfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
132 | */ | 132 | */ |
133 | static void ecryptfs_evict_inode(struct inode *inode) | 133 | static void ecryptfs_evict_inode(struct inode *inode) |
134 | { | 134 | { |
135 | truncate_inode_pages(&inode->i_data, 0); | 135 | truncate_inode_pages_final(&inode->i_data); |
136 | clear_inode(inode); | 136 | clear_inode(inode); |
137 | iput(ecryptfs_inode_to_lower(inode)); | 137 | iput(ecryptfs_inode_to_lower(inode)); |
138 | } | 138 | } |
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c index 8dd524f32284..cdb2971192a5 100644 --- a/fs/efivarfs/file.c +++ b/fs/efivarfs/file.c | |||
@@ -21,7 +21,7 @@ static ssize_t efivarfs_file_write(struct file *file, | |||
21 | u32 attributes; | 21 | u32 attributes; |
22 | struct inode *inode = file->f_mapping->host; | 22 | struct inode *inode = file->f_mapping->host; |
23 | unsigned long datasize = count - sizeof(attributes); | 23 | unsigned long datasize = count - sizeof(attributes); |
24 | ssize_t bytes = 0; | 24 | ssize_t bytes; |
25 | bool set = false; | 25 | bool set = false; |
26 | 26 | ||
27 | if (count < sizeof(attributes)) | 27 | if (count < sizeof(attributes)) |
@@ -33,14 +33,9 @@ static ssize_t efivarfs_file_write(struct file *file, | |||
33 | if (attributes & ~(EFI_VARIABLE_MASK)) | 33 | if (attributes & ~(EFI_VARIABLE_MASK)) |
34 | return -EINVAL; | 34 | return -EINVAL; |
35 | 35 | ||
36 | data = kmalloc(datasize, GFP_KERNEL); | 36 | data = memdup_user(userbuf + sizeof(attributes), datasize); |
37 | if (!data) | 37 | if (IS_ERR(data)) |
38 | return -ENOMEM; | 38 | return PTR_ERR(data); |
39 | |||
40 | if (copy_from_user(data, userbuf + sizeof(attributes), datasize)) { | ||
41 | bytes = -EFAULT; | ||
42 | goto out; | ||
43 | } | ||
44 | 39 | ||
45 | bytes = efivar_entry_set_get_size(var, attributes, &datasize, | 40 | bytes = efivar_entry_set_get_size(var, attributes, &datasize, |
46 | data, &set); | 41 | data, &set); |
diff --git a/fs/efs/super.c b/fs/efs/super.c index 50215bbd6463..3befcc9f5d63 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
@@ -91,7 +91,7 @@ static void init_once(void *foo) | |||
91 | inode_init_once(&ei->vfs_inode); | 91 | inode_init_once(&ei->vfs_inode); |
92 | } | 92 | } |
93 | 93 | ||
94 | static int init_inodecache(void) | 94 | static int __init init_inodecache(void) |
95 | { | 95 | { |
96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", | 96 | efs_inode_cachep = kmem_cache_create("efs_inode_cache", |
97 | sizeof(struct efs_inode_info), | 97 | sizeof(struct efs_inode_info), |
@@ -114,6 +114,7 @@ static void destroy_inodecache(void) | |||
114 | 114 | ||
115 | static int efs_remount(struct super_block *sb, int *flags, char *data) | 115 | static int efs_remount(struct super_block *sb, int *flags, char *data) |
116 | { | 116 | { |
117 | sync_filesystem(sb); | ||
117 | *flags |= MS_RDONLY; | 118 | *flags |= MS_RDONLY; |
118 | return 0; | 119 | return 0; |
119 | } | 120 | } |
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/file.h> | 26 | #include <linux/file.h> |
27 | #include <linux/fdtable.h> | 27 | #include <linux/fdtable.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/vmacache.h> | ||
29 | #include <linux/stat.h> | 30 | #include <linux/stat.h> |
30 | #include <linux/fcntl.h> | 31 | #include <linux/fcntl.h> |
31 | #include <linux/swap.h> | 32 | #include <linux/swap.h> |
@@ -97,6 +98,7 @@ static inline void put_binfmt(struct linux_binfmt * fmt) | |||
97 | module_put(fmt->module); | 98 | module_put(fmt->module); |
98 | } | 99 | } |
99 | 100 | ||
101 | #ifdef CONFIG_USELIB | ||
100 | /* | 102 | /* |
101 | * Note that a shared library must be both readable and executable due to | 103 | * Note that a shared library must be both readable and executable due to |
102 | * security reasons. | 104 | * security reasons. |
@@ -156,6 +158,7 @@ exit: | |||
156 | out: | 158 | out: |
157 | return error; | 159 | return error; |
158 | } | 160 | } |
161 | #endif /* #ifdef CONFIG_USELIB */ | ||
159 | 162 | ||
160 | #ifdef CONFIG_MMU | 163 | #ifdef CONFIG_MMU |
161 | /* | 164 | /* |
@@ -820,7 +823,7 @@ EXPORT_SYMBOL(read_code); | |||
820 | static int exec_mmap(struct mm_struct *mm) | 823 | static int exec_mmap(struct mm_struct *mm) |
821 | { | 824 | { |
822 | struct task_struct *tsk; | 825 | struct task_struct *tsk; |
823 | struct mm_struct * old_mm, *active_mm; | 826 | struct mm_struct *old_mm, *active_mm; |
824 | 827 | ||
825 | /* Notify parent that we're no longer interested in the old VM */ | 828 | /* Notify parent that we're no longer interested in the old VM */ |
826 | tsk = current; | 829 | tsk = current; |
@@ -846,6 +849,8 @@ static int exec_mmap(struct mm_struct *mm) | |||
846 | tsk->mm = mm; | 849 | tsk->mm = mm; |
847 | tsk->active_mm = mm; | 850 | tsk->active_mm = mm; |
848 | activate_mm(active_mm, mm); | 851 | activate_mm(active_mm, mm); |
852 | tsk->mm->vmacache_seqnum = 0; | ||
853 | vmacache_flush(tsk); | ||
849 | task_unlock(tsk); | 854 | task_unlock(tsk); |
850 | if (old_mm) { | 855 | if (old_mm) { |
851 | up_read(&old_mm->mmap_sem); | 856 | up_read(&old_mm->mmap_sem); |
@@ -1041,7 +1046,7 @@ EXPORT_SYMBOL_GPL(get_task_comm); | |||
1041 | * so that a new one can be started | 1046 | * so that a new one can be started |
1042 | */ | 1047 | */ |
1043 | 1048 | ||
1044 | void set_task_comm(struct task_struct *tsk, char *buf) | 1049 | void set_task_comm(struct task_struct *tsk, const char *buf) |
1045 | { | 1050 | { |
1046 | task_lock(tsk); | 1051 | task_lock(tsk); |
1047 | trace_task_rename(tsk, buf); | 1052 | trace_task_rename(tsk, buf); |
@@ -1050,21 +1055,6 @@ void set_task_comm(struct task_struct *tsk, char *buf) | |||
1050 | perf_event_comm(tsk); | 1055 | perf_event_comm(tsk); |
1051 | } | 1056 | } |
1052 | 1057 | ||
1053 | static void filename_to_taskname(char *tcomm, const char *fn, unsigned int len) | ||
1054 | { | ||
1055 | int i, ch; | ||
1056 | |||
1057 | /* Copies the binary name from after last slash */ | ||
1058 | for (i = 0; (ch = *(fn++)) != '\0';) { | ||
1059 | if (ch == '/') | ||
1060 | i = 0; /* overwrite what we wrote */ | ||
1061 | else | ||
1062 | if (i < len - 1) | ||
1063 | tcomm[i++] = ch; | ||
1064 | } | ||
1065 | tcomm[i] = '\0'; | ||
1066 | } | ||
1067 | |||
1068 | int flush_old_exec(struct linux_binprm * bprm) | 1058 | int flush_old_exec(struct linux_binprm * bprm) |
1069 | { | 1059 | { |
1070 | int retval; | 1060 | int retval; |
@@ -1078,8 +1068,6 @@ int flush_old_exec(struct linux_binprm * bprm) | |||
1078 | goto out; | 1068 | goto out; |
1079 | 1069 | ||
1080 | set_mm_exe_file(bprm->mm, bprm->file); | 1070 | set_mm_exe_file(bprm->mm, bprm->file); |
1081 | |||
1082 | filename_to_taskname(bprm->tcomm, bprm->filename, sizeof(bprm->tcomm)); | ||
1083 | /* | 1071 | /* |
1084 | * Release all of the old mmap stuff | 1072 | * Release all of the old mmap stuff |
1085 | */ | 1073 | */ |
@@ -1122,7 +1110,7 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
1122 | else | 1110 | else |
1123 | set_dumpable(current->mm, suid_dumpable); | 1111 | set_dumpable(current->mm, suid_dumpable); |
1124 | 1112 | ||
1125 | set_task_comm(current, bprm->tcomm); | 1113 | set_task_comm(current, kbasename(bprm->filename)); |
1126 | 1114 | ||
1127 | /* Set the new mm task size. We have to do that late because it may | 1115 | /* Set the new mm task size. We have to do that late because it may |
1128 | * depend on TIF_32BIT which is only updated in flush_thread() on | 1116 | * depend on TIF_32BIT which is only updated in flush_thread() on |
@@ -1619,9 +1607,9 @@ SYSCALL_DEFINE3(execve, | |||
1619 | return do_execve(getname(filename), argv, envp); | 1607 | return do_execve(getname(filename), argv, envp); |
1620 | } | 1608 | } |
1621 | #ifdef CONFIG_COMPAT | 1609 | #ifdef CONFIG_COMPAT |
1622 | asmlinkage long compat_sys_execve(const char __user * filename, | 1610 | COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename, |
1623 | const compat_uptr_t __user * argv, | 1611 | const compat_uptr_t __user *, argv, |
1624 | const compat_uptr_t __user * envp) | 1612 | const compat_uptr_t __user *, envp) |
1625 | { | 1613 | { |
1626 | return compat_do_execve(getname(filename), argv, envp); | 1614 | return compat_do_execve(getname(filename), argv, envp); |
1627 | } | 1615 | } |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index ee4317faccb1..d1c244d67667 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
@@ -1486,7 +1486,7 @@ void exofs_evict_inode(struct inode *inode) | |||
1486 | struct ore_io_state *ios; | 1486 | struct ore_io_state *ios; |
1487 | int ret; | 1487 | int ret; |
1488 | 1488 | ||
1489 | truncate_inode_pages(&inode->i_data, 0); | 1489 | truncate_inode_pages_final(&inode->i_data); |
1490 | 1490 | ||
1491 | /* TODO: should do better here */ | 1491 | /* TODO: should do better here */ |
1492 | if (inode->i_nlink || is_bad_inode(inode)) | 1492 | if (inode->i_nlink || is_bad_inode(inode)) |
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index 1b8001bbe947..27695e6f4e46 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c | |||
@@ -4,7 +4,6 @@ | |||
4 | * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> | 4 | * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de> |
5 | */ | 5 | */ |
6 | 6 | ||
7 | #include <linux/capability.h> | ||
8 | #include <linux/init.h> | 7 | #include <linux/init.h> |
9 | #include <linux/sched.h> | 8 | #include <linux/sched.h> |
10 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c index 7cadd823bb31..7d66fb0e4cca 100644 --- a/fs/ext2/ialloc.c +++ b/fs/ext2/ialloc.c | |||
@@ -284,7 +284,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
284 | int best_ndir = inodes_per_group; | 284 | int best_ndir = inodes_per_group; |
285 | int best_group = -1; | 285 | int best_group = -1; |
286 | 286 | ||
287 | get_random_bytes(&group, sizeof(group)); | 287 | group = prandom_u32(); |
288 | parent_group = (unsigned)group % ngroups; | 288 | parent_group = (unsigned)group % ngroups; |
289 | for (i = 0; i < ngroups; i++) { | 289 | for (i = 0; i < ngroups; i++) { |
290 | group = (parent_group + i) % ngroups; | 290 | group = (parent_group + i) % ngroups; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 94ed36849b71..b1d2a4675d42 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
@@ -78,7 +78,7 @@ void ext2_evict_inode(struct inode * inode) | |||
78 | dquot_drop(inode); | 78 | dquot_drop(inode); |
79 | } | 79 | } |
80 | 80 | ||
81 | truncate_inode_pages(&inode->i_data, 0); | 81 | truncate_inode_pages_final(&inode->i_data); |
82 | 82 | ||
83 | if (want_delete) { | 83 | if (want_delete) { |
84 | sb_start_intwrite(inode->i_sb); | 84 | sb_start_intwrite(inode->i_sb); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 20d6697bd638..3750031cfa2f 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
@@ -192,7 +192,7 @@ static void init_once(void *foo) | |||
192 | inode_init_once(&ei->vfs_inode); | 192 | inode_init_once(&ei->vfs_inode); |
193 | } | 193 | } |
194 | 194 | ||
195 | static int init_inodecache(void) | 195 | static int __init init_inodecache(void) |
196 | { | 196 | { |
197 | ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", | 197 | ext2_inode_cachep = kmem_cache_create("ext2_inode_cache", |
198 | sizeof(struct ext2_inode_info), | 198 | sizeof(struct ext2_inode_info), |
@@ -1254,6 +1254,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data) | |||
1254 | unsigned long old_sb_flags; | 1254 | unsigned long old_sb_flags; |
1255 | int err; | 1255 | int err; |
1256 | 1256 | ||
1257 | sync_filesystem(sb); | ||
1257 | spin_lock(&sbi->s_lock); | 1258 | spin_lock(&sbi->s_lock); |
1258 | 1259 | ||
1259 | /* Store the old options */ | 1260 | /* Store the old options */ |
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c index cfedb2cb0d8c..c0ebc4db8849 100644 --- a/fs/ext2/xattr_security.c +++ b/fs/ext2/xattr_security.c | |||
@@ -42,8 +42,8 @@ ext2_xattr_security_set(struct dentry *dentry, const char *name, | |||
42 | value, size, flags); | 42 | value, size, flags); |
43 | } | 43 | } |
44 | 44 | ||
45 | int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 45 | static int ext2_initxattrs(struct inode *inode, const struct xattr *xattr_array, |
46 | void *fs_info) | 46 | void *fs_info) |
47 | { | 47 | { |
48 | const struct xattr *xattr; | 48 | const struct xattr *xattr; |
49 | int err = 0; | 49 | int err = 0; |
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 22548f56197b..158b5d4ce067 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
@@ -1727,10 +1727,7 @@ allocated: | |||
1727 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | 1727 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); |
1728 | 1728 | ||
1729 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | 1729 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); |
1730 | err = ext3_journal_dirty_metadata(handle, gdp_bh); | 1730 | fatal = ext3_journal_dirty_metadata(handle, gdp_bh); |
1731 | if (!fatal) | ||
1732 | fatal = err; | ||
1733 | |||
1734 | if (fatal) | 1731 | if (fatal) |
1735 | goto out; | 1732 | goto out; |
1736 | 1733 | ||
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index e66e4808719f..17742eed2c16 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
@@ -275,7 +275,7 @@ static inline loff_t ext3_get_htree_eof(struct file *filp) | |||
275 | * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) | 275 | * NOTE: offsets obtained *before* ext3_set_inode_flag(dir, EXT3_INODE_INDEX) |
276 | * will be invalid once the directory was converted into a dx directory | 276 | * will be invalid once the directory was converted into a dx directory |
277 | */ | 277 | */ |
278 | loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) | 278 | static loff_t ext3_dir_llseek(struct file *file, loff_t offset, int whence) |
279 | { | 279 | { |
280 | struct inode *inode = file->f_mapping->host; | 280 | struct inode *inode = file->f_mapping->host; |
281 | int dx_dir = is_dx_dir(inode); | 281 | int dx_dir = is_dx_dir(inode); |
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index 082afd78b107..a1b810230cc5 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c | |||
@@ -215,7 +215,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent) | |||
215 | int best_ndir = inodes_per_group; | 215 | int best_ndir = inodes_per_group; |
216 | int best_group = -1; | 216 | int best_group = -1; |
217 | 217 | ||
218 | get_random_bytes(&group, sizeof(group)); | 218 | group = prandom_u32(); |
219 | parent_group = (unsigned)group % ngroups; | 219 | parent_group = (unsigned)group % ngroups; |
220 | for (i = 0; i < ngroups; i++) { | 220 | for (i = 0; i < ngroups; i++) { |
221 | group = (parent_group + i) % ngroups; | 221 | group = (parent_group + i) % ngroups; |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 384b6ebb655f..f5157d0d1b43 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
@@ -228,7 +228,7 @@ void ext3_evict_inode (struct inode *inode) | |||
228 | log_wait_commit(journal, commit_tid); | 228 | log_wait_commit(journal, commit_tid); |
229 | filemap_write_and_wait(&inode->i_data); | 229 | filemap_write_and_wait(&inode->i_data); |
230 | } | 230 | } |
231 | truncate_inode_pages(&inode->i_data, 0); | 231 | truncate_inode_pages_final(&inode->i_data); |
232 | 232 | ||
233 | ext3_discard_reservation(inode); | 233 | ext3_discard_reservation(inode); |
234 | rsv = ei->i_block_alloc_info; | 234 | rsv = ei->i_block_alloc_info; |
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh) | |||
1559 | } | 1559 | } |
1560 | 1560 | ||
1561 | /* | 1561 | /* |
1562 | * Note that we always start a transaction even if we're not journalling | 1562 | * Note that whenever we need to map blocks we start a transaction even if |
1563 | * data. This is to preserve ordering: any hole instantiation within | 1563 | * we're not journalling data. This is to preserve ordering: any hole |
1564 | * __block_write_full_page -> ext3_get_block() should be journalled | 1564 | * instantiation within __block_write_full_page -> ext3_get_block() should be |
1565 | * along with the data so we don't crash and then get metadata which | 1565 | * journalled along with the data so we don't crash and then get metadata which |
1566 | * refers to old data. | 1566 | * refers to old data. |
1567 | * | 1567 | * |
1568 | * In all journalling modes block_write_full_page() will start the I/O. | 1568 | * In all journalling modes block_write_full_page() will start the I/O. |
1569 | * | 1569 | * |
1570 | * Problem: | ||
1571 | * | ||
1572 | * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() -> | ||
1573 | * ext3_writepage() | ||
1574 | * | ||
1575 | * Similar for: | ||
1576 | * | ||
1577 | * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ... | ||
1578 | * | ||
1579 | * Same applies to ext3_get_block(). We will deadlock on various things like | ||
1580 | * lock_journal and i_truncate_mutex. | ||
1581 | * | ||
1582 | * Setting PF_MEMALLOC here doesn't work - too many internal memory | ||
1583 | * allocations fail. | ||
1584 | * | ||
1585 | * 16May01: If we're reentered then journal_current_handle() will be | ||
1586 | * non-zero. We simply *return*. | ||
1587 | * | ||
1588 | * 1 July 2001: @@@ FIXME: | ||
1589 | * In journalled data mode, a data buffer may be metadata against the | ||
1590 | * current transaction. But the same file is part of a shared mapping | ||
1591 | * and someone does a writepage() on it. | ||
1592 | * | ||
1593 | * We will move the buffer onto the async_data list, but *after* it has | ||
1594 | * been dirtied. So there's a small window where we have dirty data on | ||
1595 | * BJ_Metadata. | ||
1596 | * | ||
1597 | * Note that this only applies to the last partial page in the file. The | ||
1598 | * bit which block_write_full_page() uses prepare/commit for. (That's | ||
1599 | * broken code anyway: it's wrong for msync()). | ||
1600 | * | ||
1601 | * It's a rare case: affects the final partial page, for journalled data | ||
1602 | * where the file is subject to bith write() and writepage() in the same | ||
1603 | * transction. To fix it we'll need a custom block_write_full_page(). | ||
1604 | * We'll probably need that anyway for journalling writepage() output. | ||
1605 | * | ||
1606 | * We don't honour synchronous mounts for writepage(). That would be | 1570 | * We don't honour synchronous mounts for writepage(). That would be |
1607 | * disastrous. Any write() or metadata operation will sync the fs for | 1571 | * disastrous. Any write() or metadata operation will sync the fs for |
1608 | * us. | 1572 | * us. |
1609 | * | ||
1610 | * AKPM2: if all the page's buffers are mapped to disk and !data=journal, | ||
1611 | * we don't need to open a transaction here. | ||
1612 | */ | 1573 | */ |
1613 | static int ext3_ordered_writepage(struct page *page, | 1574 | static int ext3_ordered_writepage(struct page *page, |
1614 | struct writeback_control *wbc) | 1575 | struct writeback_control *wbc) |
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page, | |||
1673 | * block_write_full_page() succeeded. Otherwise they are unmapped, | 1634 | * block_write_full_page() succeeded. Otherwise they are unmapped, |
1674 | * and generally junk. | 1635 | * and generally junk. |
1675 | */ | 1636 | */ |
1676 | if (ret == 0) { | 1637 | if (ret == 0) |
1677 | err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, | 1638 | ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE, |
1678 | NULL, journal_dirty_data_fn); | 1639 | NULL, journal_dirty_data_fn); |
1679 | if (!ret) | ||
1680 | ret = err; | ||
1681 | } | ||
1682 | walk_page_buffers(handle, page_bufs, 0, | 1640 | walk_page_buffers(handle, page_bufs, 0, |
1683 | PAGE_CACHE_SIZE, NULL, bput_one); | 1641 | PAGE_CACHE_SIZE, NULL, bput_one); |
1684 | err = ext3_journal_stop(handle); | 1642 | err = ext3_journal_stop(handle); |
@@ -1925,6 +1883,8 @@ retry: | |||
1925 | * and pretend the write failed... */ | 1883 | * and pretend the write failed... */ |
1926 | ext3_truncate_failed_direct_write(inode); | 1884 | ext3_truncate_failed_direct_write(inode); |
1927 | ret = PTR_ERR(handle); | 1885 | ret = PTR_ERR(handle); |
1886 | if (inode->i_nlink) | ||
1887 | ext3_orphan_del(NULL, inode); | ||
1928 | goto out; | 1888 | goto out; |
1929 | } | 1889 | } |
1930 | if (inode->i_nlink) | 1890 | if (inode->i_nlink) |
@@ -3212,21 +3172,20 @@ out_brelse: | |||
3212 | * | 3172 | * |
3213 | * We are called from a few places: | 3173 | * We are called from a few places: |
3214 | * | 3174 | * |
3215 | * - Within generic_file_write() for O_SYNC files. | 3175 | * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files. |
3216 | * Here, there will be no transaction running. We wait for any running | 3176 | * Here, there will be no transaction running. We wait for any running |
3217 | * transaction to commit. | 3177 | * transaction to commit. |
3218 | * | 3178 | * |
3219 | * - Within sys_sync(), kupdate and such. | 3179 | * - Within flush work (for sys_sync(), kupdate and such). |
3220 | * We wait on commit, if tol to. | 3180 | * We wait on commit, if told to. |
3221 | * | 3181 | * |
3222 | * - Within prune_icache() (PF_MEMALLOC == true) | 3182 | * - Within iput_final() -> write_inode_now() |
3223 | * Here we simply return. We can't afford to block kswapd on the | 3183 | * We wait on commit, if told to. |
3224 | * journal commit. | ||
3225 | * | 3184 | * |
3226 | * In all cases it is actually safe for us to return without doing anything, | 3185 | * In all cases it is actually safe for us to return without doing anything, |
3227 | * because the inode has been copied into a raw inode buffer in | 3186 | * because the inode has been copied into a raw inode buffer in |
3228 | * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for | 3187 | * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL |
3229 | * knfsd. | 3188 | * writeback. |
3230 | * | 3189 | * |
3231 | * Note that we are absolutely dependent upon all inode dirtiers doing the | 3190 | * Note that we are absolutely dependent upon all inode dirtiers doing the |
3232 | * right thing: they *must* call mark_inode_dirty() after dirtying info in | 3191 | * right thing: they *must* call mark_inode_dirty() after dirtying info in |
@@ -3238,13 +3197,13 @@ out_brelse: | |||
3238 | * stuff(); | 3197 | * stuff(); |
3239 | * inode->i_size = expr; | 3198 | * inode->i_size = expr; |
3240 | * | 3199 | * |
3241 | * is in error because a kswapd-driven write_inode() could occur while | 3200 | * is in error because write_inode() could occur while `stuff()' is running, |
3242 | * `stuff()' is running, and the new i_size will be lost. Plus the inode | 3201 | * and the new i_size will be lost. Plus the inode will no longer be on the |
3243 | * will no longer be on the superblock's dirty inode list. | 3202 | * superblock's dirty inode list. |
3244 | */ | 3203 | */ |
3245 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) | 3204 | int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) |
3246 | { | 3205 | { |
3247 | if (current->flags & PF_MEMALLOC) | 3206 | if (WARN_ON_ONCE(current->flags & PF_MEMALLOC)) |
3248 | return 0; | 3207 | return 0; |
3249 | 3208 | ||
3250 | if (ext3_journal_current_handle()) { | 3209 | if (ext3_journal_current_handle()) { |
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
3253 | return -EIO; | 3212 | return -EIO; |
3254 | } | 3213 | } |
3255 | 3214 | ||
3256 | if (wbc->sync_mode != WB_SYNC_ALL) | 3215 | /* |
3216 | * No need to force transaction in WB_SYNC_NONE mode. Also | ||
3217 | * ext3_sync_fs() will force the commit after everything is | ||
3218 | * written. | ||
3219 | */ | ||
3220 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) | ||
3257 | return 0; | 3221 | return 0; |
3258 | 3222 | ||
3259 | return ext3_force_commit(inode->i_sb); | 3223 | return ext3_force_commit(inode->i_sb); |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index 37fd31ed16e7..08cdfe5461e3 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
@@ -527,7 +527,7 @@ static void init_once(void *foo) | |||
527 | inode_init_once(&ei->vfs_inode); | 527 | inode_init_once(&ei->vfs_inode); |
528 | } | 528 | } |
529 | 529 | ||
530 | static int init_inodecache(void) | 530 | static int __init init_inodecache(void) |
531 | { | 531 | { |
532 | ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", | 532 | ext3_inode_cachep = kmem_cache_create("ext3_inode_cache", |
533 | sizeof(struct ext3_inode_info), | 533 | sizeof(struct ext3_inode_info), |
@@ -2649,6 +2649,8 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data) | |||
2649 | int i; | 2649 | int i; |
2650 | #endif | 2650 | #endif |
2651 | 2651 | ||
2652 | sync_filesystem(sb); | ||
2653 | |||
2652 | /* Store the original options */ | 2654 | /* Store the original options */ |
2653 | old_sb_flags = sb->s_flags; | 2655 | old_sb_flags = sb->s_flags; |
2654 | old_opts.s_mount_opt = sbi->s_mount_opt; | 2656 | old_opts.s_mount_opt = sbi->s_mount_opt; |
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c index 3387664ad70e..722c2bf9645d 100644 --- a/fs/ext3/xattr_security.c +++ b/fs/ext3/xattr_security.c | |||
@@ -43,8 +43,9 @@ ext3_xattr_security_set(struct dentry *dentry, const char *name, | |||
43 | name, value, size, flags); | 43 | name, value, size, flags); |
44 | } | 44 | } |
45 | 45 | ||
46 | int ext3_initxattrs(struct inode *inode, const struct xattr *xattr_array, | 46 | static int ext3_initxattrs(struct inode *inode, |
47 | void *fs_info) | 47 | const struct xattr *xattr_array, |
48 | void *fs_info) | ||
48 | { | 49 | { |
49 | const struct xattr *xattr; | 50 | const struct xattr *xattr; |
50 | handle_t *handle = fs_info; | 51 | handle_t *handle = fs_info; |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index d3a534fdc5ff..f1c65dc7cc0a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -31,6 +31,7 @@ | |||
31 | #include <linux/percpu_counter.h> | 31 | #include <linux/percpu_counter.h> |
32 | #include <linux/ratelimit.h> | 32 | #include <linux/ratelimit.h> |
33 | #include <crypto/hash.h> | 33 | #include <crypto/hash.h> |
34 | #include <linux/falloc.h> | ||
34 | #ifdef __KERNEL__ | 35 | #ifdef __KERNEL__ |
35 | #include <linux/compat.h> | 36 | #include <linux/compat.h> |
36 | #endif | 37 | #endif |
@@ -567,6 +568,8 @@ enum { | |||
567 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 | 568 | #define EXT4_GET_BLOCKS_NO_LOCK 0x0100 |
568 | /* Do not put hole in extent cache */ | 569 | /* Do not put hole in extent cache */ |
569 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 | 570 | #define EXT4_GET_BLOCKS_NO_PUT_HOLE 0x0200 |
571 | /* Convert written extents to unwritten */ | ||
572 | #define EXT4_GET_BLOCKS_CONVERT_UNWRITTEN 0x0400 | ||
570 | 573 | ||
571 | /* | 574 | /* |
572 | * The bit position of these flags must not overlap with any of the | 575 | * The bit position of these flags must not overlap with any of the |
@@ -998,6 +1001,8 @@ struct ext4_inode_info { | |||
998 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group | 1001 | #define EXT4_MOUNT2_STD_GROUP_SIZE 0x00000002 /* We have standard group |
999 | size of blocksize * 8 | 1002 | size of blocksize * 8 |
1000 | blocks */ | 1003 | blocks */ |
1004 | #define EXT4_MOUNT2_HURD_COMPAT 0x00000004 /* Support HURD-castrated | ||
1005 | file systems */ | ||
1001 | 1006 | ||
1002 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ | 1007 | #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ |
1003 | ~EXT4_MOUNT_##opt | 1008 | ~EXT4_MOUNT_##opt |
@@ -1326,6 +1331,7 @@ struct ext4_sb_info { | |||
1326 | struct list_head s_es_lru; | 1331 | struct list_head s_es_lru; |
1327 | unsigned long s_es_last_sorted; | 1332 | unsigned long s_es_last_sorted; |
1328 | struct percpu_counter s_extent_cache_cnt; | 1333 | struct percpu_counter s_extent_cache_cnt; |
1334 | struct mb_cache *s_mb_cache; | ||
1329 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1335 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
1330 | 1336 | ||
1331 | /* Ratelimit ext4 messages. */ | 1337 | /* Ratelimit ext4 messages. */ |
@@ -2133,8 +2139,6 @@ extern int ext4_writepage_trans_blocks(struct inode *); | |||
2133 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2139 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
2134 | extern int ext4_block_truncate_page(handle_t *handle, | 2140 | extern int ext4_block_truncate_page(handle_t *handle, |
2135 | struct address_space *mapping, loff_t from); | 2141 | struct address_space *mapping, loff_t from); |
2136 | extern int ext4_block_zero_page_range(handle_t *handle, | ||
2137 | struct address_space *mapping, loff_t from, loff_t length); | ||
2138 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 2142 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
2139 | loff_t lstart, loff_t lend); | 2143 | loff_t lstart, loff_t lend); |
2140 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2144 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
@@ -2757,6 +2761,7 @@ extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); | |||
2757 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 2761 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
2758 | __u64 start, __u64 len); | 2762 | __u64 start, __u64 len); |
2759 | extern int ext4_ext_precache(struct inode *inode); | 2763 | extern int ext4_ext_precache(struct inode *inode); |
2764 | extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); | ||
2760 | 2765 | ||
2761 | /* move_extent.c */ | 2766 | /* move_extent.c */ |
2762 | extern void ext4_double_down_write_data_sem(struct inode *first, | 2767 | extern void ext4_double_down_write_data_sem(struct inode *first, |
@@ -2766,6 +2771,8 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode, | |||
2766 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | 2771 | extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, |
2767 | __u64 start_orig, __u64 start_donor, | 2772 | __u64 start_orig, __u64 start_donor, |
2768 | __u64 len, __u64 *moved_len); | 2773 | __u64 len, __u64 *moved_len); |
2774 | extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | ||
2775 | struct ext4_extent **extent); | ||
2769 | 2776 | ||
2770 | /* page-io.c */ | 2777 | /* page-io.c */ |
2771 | extern int __init ext4_init_pageio(void); | 2778 | extern int __init ext4_init_pageio(void); |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 3fe29de832c8..c3fb607413ed 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
@@ -259,6 +259,16 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, | |||
259 | if (WARN_ON_ONCE(err)) { | 259 | if (WARN_ON_ONCE(err)) { |
260 | ext4_journal_abort_handle(where, line, __func__, bh, | 260 | ext4_journal_abort_handle(where, line, __func__, bh, |
261 | handle, err); | 261 | handle, err); |
262 | if (inode == NULL) { | ||
263 | pr_err("EXT4: jbd2_journal_dirty_metadata " | ||
264 | "failed: handle type %u started at " | ||
265 | "line %u, credits %u/%u, errcode %d", | ||
266 | handle->h_type, | ||
267 | handle->h_line_no, | ||
268 | handle->h_requested_credits, | ||
269 | handle->h_buffer_credits, err); | ||
270 | return err; | ||
271 | } | ||
262 | ext4_error_inode(inode, where, line, | 272 | ext4_error_inode(inode, where, line, |
263 | bh->b_blocknr, | 273 | bh->b_blocknr, |
264 | "journal_dirty_metadata failed: " | 274 | "journal_dirty_metadata failed: " |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74bc2d549c58..82df3ce9874a 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include <linux/quotaops.h> | 37 | #include <linux/quotaops.h> |
38 | #include <linux/string.h> | 38 | #include <linux/string.h> |
39 | #include <linux/slab.h> | 39 | #include <linux/slab.h> |
40 | #include <linux/falloc.h> | ||
41 | #include <asm/uaccess.h> | 40 | #include <asm/uaccess.h> |
42 | #include <linux/fiemap.h> | 41 | #include <linux/fiemap.h> |
43 | #include "ext4_jbd2.h" | 42 | #include "ext4_jbd2.h" |
@@ -1691,7 +1690,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1691 | * the extent that was written properly split out and conversion to | 1690 | * the extent that was written properly split out and conversion to |
1692 | * initialized is trivial. | 1691 | * initialized is trivial. |
1693 | */ | 1692 | */ |
1694 | if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2)) | 1693 | if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) |
1695 | return 0; | 1694 | return 0; |
1696 | 1695 | ||
1697 | ext1_ee_len = ext4_ext_get_actual_len(ex1); | 1696 | ext1_ee_len = ext4_ext_get_actual_len(ex1); |
@@ -1708,6 +1707,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1, | |||
1708 | */ | 1707 | */ |
1709 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) | 1708 | if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) |
1710 | return 0; | 1709 | return 0; |
1710 | if (ext4_ext_is_uninitialized(ex1) && | ||
1711 | (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || | ||
1712 | atomic_read(&EXT4_I(inode)->i_unwritten) || | ||
1713 | (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) | ||
1714 | return 0; | ||
1711 | #ifdef AGGRESSIVE_TEST | 1715 | #ifdef AGGRESSIVE_TEST |
1712 | if (ext1_ee_len >= 4) | 1716 | if (ext1_ee_len >= 4) |
1713 | return 0; | 1717 | return 0; |
@@ -1731,7 +1735,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1731 | { | 1735 | { |
1732 | struct ext4_extent_header *eh; | 1736 | struct ext4_extent_header *eh; |
1733 | unsigned int depth, len; | 1737 | unsigned int depth, len; |
1734 | int merge_done = 0; | 1738 | int merge_done = 0, uninit; |
1735 | 1739 | ||
1736 | depth = ext_depth(inode); | 1740 | depth = ext_depth(inode); |
1737 | BUG_ON(path[depth].p_hdr == NULL); | 1741 | BUG_ON(path[depth].p_hdr == NULL); |
@@ -1741,8 +1745,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode, | |||
1741 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) | 1745 | if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) |
1742 | break; | 1746 | break; |
1743 | /* merge with next extent! */ | 1747 | /* merge with next extent! */ |
1748 | uninit = ext4_ext_is_uninitialized(ex); | ||
1744 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1749 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1745 | + ext4_ext_get_actual_len(ex + 1)); | 1750 | + ext4_ext_get_actual_len(ex + 1)); |
1751 | if (uninit) | ||
1752 | ext4_ext_mark_uninitialized(ex); | ||
1746 | 1753 | ||
1747 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { | 1754 | if (ex + 1 < EXT_LAST_EXTENT(eh)) { |
1748 | len = (EXT_LAST_EXTENT(eh) - ex - 1) | 1755 | len = (EXT_LAST_EXTENT(eh) - ex - 1) |
@@ -1896,7 +1903,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1896 | struct ext4_ext_path *npath = NULL; | 1903 | struct ext4_ext_path *npath = NULL; |
1897 | int depth, len, err; | 1904 | int depth, len, err; |
1898 | ext4_lblk_t next; | 1905 | ext4_lblk_t next; |
1899 | int mb_flags = 0; | 1906 | int mb_flags = 0, uninit; |
1900 | 1907 | ||
1901 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { | 1908 | if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { |
1902 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); | 1909 | EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); |
@@ -1946,9 +1953,11 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
1946 | path + depth); | 1953 | path + depth); |
1947 | if (err) | 1954 | if (err) |
1948 | return err; | 1955 | return err; |
1949 | 1956 | uninit = ext4_ext_is_uninitialized(ex); | |
1950 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1957 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1951 | + ext4_ext_get_actual_len(newext)); | 1958 | + ext4_ext_get_actual_len(newext)); |
1959 | if (uninit) | ||
1960 | ext4_ext_mark_uninitialized(ex); | ||
1952 | eh = path[depth].p_hdr; | 1961 | eh = path[depth].p_hdr; |
1953 | nearex = ex; | 1962 | nearex = ex; |
1954 | goto merge; | 1963 | goto merge; |
@@ -1971,10 +1980,13 @@ prepend: | |||
1971 | if (err) | 1980 | if (err) |
1972 | return err; | 1981 | return err; |
1973 | 1982 | ||
1983 | uninit = ext4_ext_is_uninitialized(ex); | ||
1974 | ex->ee_block = newext->ee_block; | 1984 | ex->ee_block = newext->ee_block; |
1975 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); | 1985 | ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); |
1976 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) | 1986 | ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) |
1977 | + ext4_ext_get_actual_len(newext)); | 1987 | + ext4_ext_get_actual_len(newext)); |
1988 | if (uninit) | ||
1989 | ext4_ext_mark_uninitialized(ex); | ||
1978 | eh = path[depth].p_hdr; | 1990 | eh = path[depth].p_hdr; |
1979 | nearex = ex; | 1991 | nearex = ex; |
1980 | goto merge; | 1992 | goto merge; |
@@ -2585,6 +2597,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2585 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2597 | ex_ee_block = le32_to_cpu(ex->ee_block); |
2586 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2598 | ex_ee_len = ext4_ext_get_actual_len(ex); |
2587 | 2599 | ||
2600 | /* | ||
2601 | * If we're starting with an extent other than the last one in the | ||
2602 | * node, we need to see if it shares a cluster with the extent to | ||
2603 | * the right (towards the end of the file). If its leftmost cluster | ||
2604 | * is this extent's rightmost cluster and it is not cluster aligned, | ||
2605 | * we'll mark it as a partial that is not to be deallocated. | ||
2606 | */ | ||
2607 | |||
2608 | if (ex != EXT_LAST_EXTENT(eh)) { | ||
2609 | ext4_fsblk_t current_pblk, right_pblk; | ||
2610 | long long current_cluster, right_cluster; | ||
2611 | |||
2612 | current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1; | ||
2613 | current_cluster = (long long)EXT4_B2C(sbi, current_pblk); | ||
2614 | right_pblk = ext4_ext_pblock(ex + 1); | ||
2615 | right_cluster = (long long)EXT4_B2C(sbi, right_pblk); | ||
2616 | if (current_cluster == right_cluster && | ||
2617 | EXT4_PBLK_COFF(sbi, right_pblk)) | ||
2618 | *partial_cluster = -right_cluster; | ||
2619 | } | ||
2620 | |||
2588 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); | 2621 | trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster); |
2589 | 2622 | ||
2590 | while (ex >= EXT_FIRST_EXTENT(eh) && | 2623 | while (ex >= EXT_FIRST_EXTENT(eh) && |
@@ -2710,10 +2743,15 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
2710 | err = ext4_ext_correct_indexes(handle, inode, path); | 2743 | err = ext4_ext_correct_indexes(handle, inode, path); |
2711 | 2744 | ||
2712 | /* | 2745 | /* |
2713 | * Free the partial cluster only if the current extent does not | 2746 | * If there's a partial cluster and at least one extent remains in |
2714 | * reference it. Otherwise we might free used cluster. | 2747 | * the leaf, free the partial cluster if it isn't shared with the |
2748 | * current extent. If there's a partial cluster and no extents | ||
2749 | * remain in the leaf, it can't be freed here. It can only be | ||
2750 | * freed when it's possible to determine if it's not shared with | ||
2751 | * any other extent - when the next leaf is processed or when space | ||
2752 | * removal is complete. | ||
2715 | */ | 2753 | */ |
2716 | if (*partial_cluster > 0 && | 2754 | if (*partial_cluster > 0 && eh->eh_entries && |
2717 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2755 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
2718 | *partial_cluster)) { | 2756 | *partial_cluster)) { |
2719 | int flags = get_default_free_blocks_flags(inode); | 2757 | int flags = get_default_free_blocks_flags(inode); |
@@ -3569,6 +3607,8 @@ out: | |||
3569 | * b> Splits in two extents: Write is happening at either end of the extent | 3607 | * b> Splits in two extents: Write is happening at either end of the extent |
3570 | * c> Splits in three extents: Somone is writing in middle of the extent | 3608 | * c> Splits in three extents: Somone is writing in middle of the extent |
3571 | * | 3609 | * |
3610 | * This works the same way in the case of initialized -> unwritten conversion. | ||
3611 | * | ||
3572 | * One of more index blocks maybe needed if the extent tree grow after | 3612 | * One of more index blocks maybe needed if the extent tree grow after |
3573 | * the uninitialized extent split. To prevent ENOSPC occur at the IO | 3613 | * the uninitialized extent split. To prevent ENOSPC occur at the IO |
3574 | * complete, we need to split the uninitialized extent before DIO submit | 3614 | * complete, we need to split the uninitialized extent before DIO submit |
@@ -3579,7 +3619,7 @@ out: | |||
3579 | * | 3619 | * |
3580 | * Returns the size of uninitialized extent to be written on success. | 3620 | * Returns the size of uninitialized extent to be written on success. |
3581 | */ | 3621 | */ |
3582 | static int ext4_split_unwritten_extents(handle_t *handle, | 3622 | static int ext4_split_convert_extents(handle_t *handle, |
3583 | struct inode *inode, | 3623 | struct inode *inode, |
3584 | struct ext4_map_blocks *map, | 3624 | struct ext4_map_blocks *map, |
3585 | struct ext4_ext_path *path, | 3625 | struct ext4_ext_path *path, |
@@ -3591,9 +3631,9 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3591 | unsigned int ee_len; | 3631 | unsigned int ee_len; |
3592 | int split_flag = 0, depth; | 3632 | int split_flag = 0, depth; |
3593 | 3633 | ||
3594 | ext_debug("ext4_split_unwritten_extents: inode %lu, logical" | 3634 | ext_debug("%s: inode %lu, logical block %llu, max_blocks %u\n", |
3595 | "block %llu, max_blocks %u\n", inode->i_ino, | 3635 | __func__, inode->i_ino, |
3596 | (unsigned long long)map->m_lblk, map->m_len); | 3636 | (unsigned long long)map->m_lblk, map->m_len); |
3597 | 3637 | ||
3598 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> | 3638 | eof_block = (inode->i_size + inode->i_sb->s_blocksize - 1) >> |
3599 | inode->i_sb->s_blocksize_bits; | 3639 | inode->i_sb->s_blocksize_bits; |
@@ -3608,14 +3648,73 @@ static int ext4_split_unwritten_extents(handle_t *handle, | |||
3608 | ee_block = le32_to_cpu(ex->ee_block); | 3648 | ee_block = le32_to_cpu(ex->ee_block); |
3609 | ee_len = ext4_ext_get_actual_len(ex); | 3649 | ee_len = ext4_ext_get_actual_len(ex); |
3610 | 3650 | ||
3611 | split_flag |= ee_block + ee_len <= eof_block ? EXT4_EXT_MAY_ZEROOUT : 0; | 3651 | /* Convert to unwritten */ |
3612 | split_flag |= EXT4_EXT_MARK_UNINIT2; | 3652 | if (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN) { |
3613 | if (flags & EXT4_GET_BLOCKS_CONVERT) | 3653 | split_flag |= EXT4_EXT_DATA_VALID1; |
3614 | split_flag |= EXT4_EXT_DATA_VALID2; | 3654 | /* Convert to initialized */ |
3655 | } else if (flags & EXT4_GET_BLOCKS_CONVERT) { | ||
3656 | split_flag |= ee_block + ee_len <= eof_block ? | ||
3657 | EXT4_EXT_MAY_ZEROOUT : 0; | ||
3658 | split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); | ||
3659 | } | ||
3615 | flags |= EXT4_GET_BLOCKS_PRE_IO; | 3660 | flags |= EXT4_GET_BLOCKS_PRE_IO; |
3616 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); | 3661 | return ext4_split_extent(handle, inode, path, map, split_flag, flags); |
3617 | } | 3662 | } |
3618 | 3663 | ||
3664 | static int ext4_convert_initialized_extents(handle_t *handle, | ||
3665 | struct inode *inode, | ||
3666 | struct ext4_map_blocks *map, | ||
3667 | struct ext4_ext_path *path) | ||
3668 | { | ||
3669 | struct ext4_extent *ex; | ||
3670 | ext4_lblk_t ee_block; | ||
3671 | unsigned int ee_len; | ||
3672 | int depth; | ||
3673 | int err = 0; | ||
3674 | |||
3675 | depth = ext_depth(inode); | ||
3676 | ex = path[depth].p_ext; | ||
3677 | ee_block = le32_to_cpu(ex->ee_block); | ||
3678 | ee_len = ext4_ext_get_actual_len(ex); | ||
3679 | |||
3680 | ext_debug("%s: inode %lu, logical" | ||
3681 | "block %llu, max_blocks %u\n", __func__, inode->i_ino, | ||
3682 | (unsigned long long)ee_block, ee_len); | ||
3683 | |||
3684 | if (ee_block != map->m_lblk || ee_len > map->m_len) { | ||
3685 | err = ext4_split_convert_extents(handle, inode, map, path, | ||
3686 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); | ||
3687 | if (err < 0) | ||
3688 | goto out; | ||
3689 | ext4_ext_drop_refs(path); | ||
3690 | path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); | ||
3691 | if (IS_ERR(path)) { | ||
3692 | err = PTR_ERR(path); | ||
3693 | goto out; | ||
3694 | } | ||
3695 | depth = ext_depth(inode); | ||
3696 | ex = path[depth].p_ext; | ||
3697 | } | ||
3698 | |||
3699 | err = ext4_ext_get_access(handle, inode, path + depth); | ||
3700 | if (err) | ||
3701 | goto out; | ||
3702 | /* first mark the extent as uninitialized */ | ||
3703 | ext4_ext_mark_uninitialized(ex); | ||
3704 | |||
3705 | /* note: ext4_ext_correct_indexes() isn't needed here because | ||
3706 | * borders are not changed | ||
3707 | */ | ||
3708 | ext4_ext_try_to_merge(handle, inode, path, ex); | ||
3709 | |||
3710 | /* Mark modified extent as dirty */ | ||
3711 | err = ext4_ext_dirty(handle, inode, path + path->p_depth); | ||
3712 | out: | ||
3713 | ext4_ext_show_leaf(inode, path); | ||
3714 | return err; | ||
3715 | } | ||
3716 | |||
3717 | |||
3619 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, | 3718 | static int ext4_convert_unwritten_extents_endio(handle_t *handle, |
3620 | struct inode *inode, | 3719 | struct inode *inode, |
3621 | struct ext4_map_blocks *map, | 3720 | struct ext4_map_blocks *map, |
@@ -3649,8 +3748,8 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle, | |||
3649 | inode->i_ino, (unsigned long long)ee_block, ee_len, | 3748 | inode->i_ino, (unsigned long long)ee_block, ee_len, |
3650 | (unsigned long long)map->m_lblk, map->m_len); | 3749 | (unsigned long long)map->m_lblk, map->m_len); |
3651 | #endif | 3750 | #endif |
3652 | err = ext4_split_unwritten_extents(handle, inode, map, path, | 3751 | err = ext4_split_convert_extents(handle, inode, map, path, |
3653 | EXT4_GET_BLOCKS_CONVERT); | 3752 | EXT4_GET_BLOCKS_CONVERT); |
3654 | if (err < 0) | 3753 | if (err < 0) |
3655 | goto out; | 3754 | goto out; |
3656 | ext4_ext_drop_refs(path); | 3755 | ext4_ext_drop_refs(path); |
@@ -3851,6 +3950,38 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, | |||
3851 | } | 3950 | } |
3852 | 3951 | ||
3853 | static int | 3952 | static int |
3953 | ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, | ||
3954 | struct ext4_map_blocks *map, | ||
3955 | struct ext4_ext_path *path, int flags, | ||
3956 | unsigned int allocated, ext4_fsblk_t newblock) | ||
3957 | { | ||
3958 | int ret = 0; | ||
3959 | int err = 0; | ||
3960 | |||
3961 | /* | ||
3962 | * Make sure that the extent is no bigger than we support with | ||
3963 | * uninitialized extent | ||
3964 | */ | ||
3965 | if (map->m_len > EXT_UNINIT_MAX_LEN) | ||
3966 | map->m_len = EXT_UNINIT_MAX_LEN / 2; | ||
3967 | |||
3968 | ret = ext4_convert_initialized_extents(handle, inode, map, | ||
3969 | path); | ||
3970 | if (ret >= 0) { | ||
3971 | ext4_update_inode_fsync_trans(handle, inode, 1); | ||
3972 | err = check_eofblocks_fl(handle, inode, map->m_lblk, | ||
3973 | path, map->m_len); | ||
3974 | } else | ||
3975 | err = ret; | ||
3976 | map->m_flags |= EXT4_MAP_UNWRITTEN; | ||
3977 | if (allocated > map->m_len) | ||
3978 | allocated = map->m_len; | ||
3979 | map->m_len = allocated; | ||
3980 | |||
3981 | return err ? err : allocated; | ||
3982 | } | ||
3983 | |||
3984 | static int | ||
3854 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | 3985 | ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, |
3855 | struct ext4_map_blocks *map, | 3986 | struct ext4_map_blocks *map, |
3856 | struct ext4_ext_path *path, int flags, | 3987 | struct ext4_ext_path *path, int flags, |
@@ -3877,8 +4008,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3877 | 4008 | ||
3878 | /* get_block() before submit the IO, split the extent */ | 4009 | /* get_block() before submit the IO, split the extent */ |
3879 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 4010 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3880 | ret = ext4_split_unwritten_extents(handle, inode, map, | 4011 | ret = ext4_split_convert_extents(handle, inode, map, |
3881 | path, flags); | 4012 | path, flags | EXT4_GET_BLOCKS_CONVERT); |
3882 | if (ret <= 0) | 4013 | if (ret <= 0) |
3883 | goto out; | 4014 | goto out; |
3884 | /* | 4015 | /* |
@@ -3993,10 +4124,6 @@ out1: | |||
3993 | map->m_pblk = newblock; | 4124 | map->m_pblk = newblock; |
3994 | map->m_len = allocated; | 4125 | map->m_len = allocated; |
3995 | out2: | 4126 | out2: |
3996 | if (path) { | ||
3997 | ext4_ext_drop_refs(path); | ||
3998 | kfree(path); | ||
3999 | } | ||
4000 | return err ? err : allocated; | 4127 | return err ? err : allocated; |
4001 | } | 4128 | } |
4002 | 4129 | ||
@@ -4128,7 +4255,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4128 | struct ext4_extent newex, *ex, *ex2; | 4255 | struct ext4_extent newex, *ex, *ex2; |
4129 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 4256 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
4130 | ext4_fsblk_t newblock = 0; | 4257 | ext4_fsblk_t newblock = 0; |
4131 | int free_on_err = 0, err = 0, depth; | 4258 | int free_on_err = 0, err = 0, depth, ret; |
4132 | unsigned int allocated = 0, offset = 0; | 4259 | unsigned int allocated = 0, offset = 0; |
4133 | unsigned int allocated_clusters = 0; | 4260 | unsigned int allocated_clusters = 0; |
4134 | struct ext4_allocation_request ar; | 4261 | struct ext4_allocation_request ar; |
@@ -4170,6 +4297,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4170 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); | 4297 | ext4_fsblk_t ee_start = ext4_ext_pblock(ex); |
4171 | unsigned short ee_len; | 4298 | unsigned short ee_len; |
4172 | 4299 | ||
4300 | |||
4173 | /* | 4301 | /* |
4174 | * Uninitialized extents are treated as holes, except that | 4302 | * Uninitialized extents are treated as holes, except that |
4175 | * we split out initialized portions during a write. | 4303 | * we split out initialized portions during a write. |
@@ -4186,13 +4314,27 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
4186 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, | 4314 | ext_debug("%u fit into %u:%d -> %llu\n", map->m_lblk, |
4187 | ee_block, ee_len, newblock); | 4315 | ee_block, ee_len, newblock); |
4188 | 4316 | ||
4189 | if (!ext4_ext_is_uninitialized(ex)) | 4317 | /* |
4318 | * If the extent is initialized check whether the | ||
4319 | * caller wants to convert it to unwritten. | ||
4320 | */ | ||
4321 | if ((!ext4_ext_is_uninitialized(ex)) && | ||
4322 | (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { | ||
4323 | allocated = ext4_ext_convert_initialized_extent( | ||
4324 | handle, inode, map, path, flags, | ||
4325 | allocated, newblock); | ||
4326 | goto out2; | ||
4327 | } else if (!ext4_ext_is_uninitialized(ex)) | ||
4190 | goto out; | 4328 | goto out; |
4191 | 4329 | ||
4192 | allocated = ext4_ext_handle_uninitialized_extents( | 4330 | ret = ext4_ext_handle_uninitialized_extents( |
4193 | handle, inode, map, path, flags, | 4331 | handle, inode, map, path, flags, |
4194 | allocated, newblock); | 4332 | allocated, newblock); |
4195 | goto out3; | 4333 | if (ret < 0) |
4334 | err = ret; | ||
4335 | else | ||
4336 | allocated = ret; | ||
4337 | goto out2; | ||
4196 | } | 4338 | } |
4197 | } | 4339 | } |
4198 | 4340 | ||
@@ -4473,7 +4615,6 @@ out2: | |||
4473 | kfree(path); | 4615 | kfree(path); |
4474 | } | 4616 | } |
4475 | 4617 | ||
4476 | out3: | ||
4477 | trace_ext4_ext_map_blocks_exit(inode, flags, map, | 4618 | trace_ext4_ext_map_blocks_exit(inode, flags, map, |
4478 | err ? err : allocated); | 4619 | err ? err : allocated); |
4479 | ext4_es_lru_add(inode); | 4620 | ext4_es_lru_add(inode); |
@@ -4514,34 +4655,200 @@ retry: | |||
4514 | ext4_std_error(inode->i_sb, err); | 4655 | ext4_std_error(inode->i_sb, err); |
4515 | } | 4656 | } |
4516 | 4657 | ||
4517 | static void ext4_falloc_update_inode(struct inode *inode, | 4658 | static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, |
4518 | int mode, loff_t new_size, int update_ctime) | 4659 | ext4_lblk_t len, int flags, int mode) |
4519 | { | 4660 | { |
4520 | struct timespec now; | 4661 | struct inode *inode = file_inode(file); |
4662 | handle_t *handle; | ||
4663 | int ret = 0; | ||
4664 | int ret2 = 0; | ||
4665 | int retries = 0; | ||
4666 | struct ext4_map_blocks map; | ||
4667 | unsigned int credits; | ||
4521 | 4668 | ||
4522 | if (update_ctime) { | 4669 | map.m_lblk = offset; |
4523 | now = current_fs_time(inode->i_sb); | 4670 | /* |
4524 | if (!timespec_equal(&inode->i_ctime, &now)) | 4671 | * Don't normalize the request if it can fit in one extent so |
4525 | inode->i_ctime = now; | 4672 | * that it doesn't get unnecessarily split into multiple |
4673 | * extents. | ||
4674 | */ | ||
4675 | if (len <= EXT_UNINIT_MAX_LEN) | ||
4676 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
4677 | |||
4678 | /* | ||
4679 | * credits to insert 1 extent into extent tree | ||
4680 | */ | ||
4681 | credits = ext4_chunk_trans_blocks(inode, len); | ||
4682 | |||
4683 | retry: | ||
4684 | while (ret >= 0 && ret < len) { | ||
4685 | map.m_lblk = map.m_lblk + ret; | ||
4686 | map.m_len = len = len - ret; | ||
4687 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
4688 | credits); | ||
4689 | if (IS_ERR(handle)) { | ||
4690 | ret = PTR_ERR(handle); | ||
4691 | break; | ||
4692 | } | ||
4693 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
4694 | if (ret <= 0) { | ||
4695 | ext4_debug("inode #%lu: block %u: len %u: " | ||
4696 | "ext4_ext_map_blocks returned %d", | ||
4697 | inode->i_ino, map.m_lblk, | ||
4698 | map.m_len, ret); | ||
4699 | ext4_mark_inode_dirty(handle, inode); | ||
4700 | ret2 = ext4_journal_stop(handle); | ||
4701 | break; | ||
4702 | } | ||
4703 | ret2 = ext4_journal_stop(handle); | ||
4704 | if (ret2) | ||
4705 | break; | ||
4706 | } | ||
4707 | if (ret == -ENOSPC && | ||
4708 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
4709 | ret = 0; | ||
4710 | goto retry; | ||
4526 | } | 4711 | } |
4712 | |||
4713 | return ret > 0 ? ret2 : ret; | ||
4714 | } | ||
4715 | |||
4716 | static long ext4_zero_range(struct file *file, loff_t offset, | ||
4717 | loff_t len, int mode) | ||
4718 | { | ||
4719 | struct inode *inode = file_inode(file); | ||
4720 | handle_t *handle = NULL; | ||
4721 | unsigned int max_blocks; | ||
4722 | loff_t new_size = 0; | ||
4723 | int ret = 0; | ||
4724 | int flags; | ||
4725 | int partial; | ||
4726 | loff_t start, end; | ||
4727 | ext4_lblk_t lblk; | ||
4728 | struct address_space *mapping = inode->i_mapping; | ||
4729 | unsigned int blkbits = inode->i_blkbits; | ||
4730 | |||
4731 | trace_ext4_zero_range(inode, offset, len, mode); | ||
4732 | |||
4733 | /* | ||
4734 | * Write out all dirty pages to avoid race conditions | ||
4735 | * Then release them. | ||
4736 | */ | ||
4737 | if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { | ||
4738 | ret = filemap_write_and_wait_range(mapping, offset, | ||
4739 | offset + len - 1); | ||
4740 | if (ret) | ||
4741 | return ret; | ||
4742 | } | ||
4743 | |||
4527 | /* | 4744 | /* |
4528 | * Update only when preallocation was requested beyond | 4745 | * Round up offset. This is not fallocate, we neet to zero out |
4529 | * the file size. | 4746 | * blocks, so convert interior block aligned part of the range to |
4747 | * unwritten and possibly manually zero out unaligned parts of the | ||
4748 | * range. | ||
4530 | */ | 4749 | */ |
4531 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { | 4750 | start = round_up(offset, 1 << blkbits); |
4751 | end = round_down((offset + len), 1 << blkbits); | ||
4752 | |||
4753 | if (start < offset || end > offset + len) | ||
4754 | return -EINVAL; | ||
4755 | partial = (offset + len) & ((1 << blkbits) - 1); | ||
4756 | |||
4757 | lblk = start >> blkbits; | ||
4758 | max_blocks = (end >> blkbits); | ||
4759 | if (max_blocks < lblk) | ||
4760 | max_blocks = 0; | ||
4761 | else | ||
4762 | max_blocks -= lblk; | ||
4763 | |||
4764 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | | ||
4765 | EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; | ||
4766 | if (mode & FALLOC_FL_KEEP_SIZE) | ||
4767 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | ||
4768 | |||
4769 | mutex_lock(&inode->i_mutex); | ||
4770 | |||
4771 | /* | ||
4772 | * Indirect files do not support unwritten extnets | ||
4773 | */ | ||
4774 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { | ||
4775 | ret = -EOPNOTSUPP; | ||
4776 | goto out_mutex; | ||
4777 | } | ||
4778 | |||
4779 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | ||
4780 | offset + len > i_size_read(inode)) { | ||
4781 | new_size = offset + len; | ||
4782 | ret = inode_newsize_ok(inode, new_size); | ||
4783 | if (ret) | ||
4784 | goto out_mutex; | ||
4785 | /* | ||
4786 | * If we have a partial block after EOF we have to allocate | ||
4787 | * the entire block. | ||
4788 | */ | ||
4789 | if (partial) | ||
4790 | max_blocks += 1; | ||
4791 | } | ||
4792 | |||
4793 | if (max_blocks > 0) { | ||
4794 | |||
4795 | /* Now release the pages and zero block aligned part of pages*/ | ||
4796 | truncate_pagecache_range(inode, start, end - 1); | ||
4797 | |||
4798 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | ||
4799 | ext4_inode_block_unlocked_dio(inode); | ||
4800 | inode_dio_wait(inode); | ||
4801 | |||
4802 | /* | ||
4803 | * Remove entire range from the extent status tree. | ||
4804 | */ | ||
4805 | ret = ext4_es_remove_extent(inode, lblk, max_blocks); | ||
4806 | if (ret) | ||
4807 | goto out_dio; | ||
4808 | |||
4809 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, | ||
4810 | mode); | ||
4811 | if (ret) | ||
4812 | goto out_dio; | ||
4813 | } | ||
4814 | |||
4815 | handle = ext4_journal_start(inode, EXT4_HT_MISC, 4); | ||
4816 | if (IS_ERR(handle)) { | ||
4817 | ret = PTR_ERR(handle); | ||
4818 | ext4_std_error(inode->i_sb, ret); | ||
4819 | goto out_dio; | ||
4820 | } | ||
4821 | |||
4822 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
4823 | |||
4824 | if (new_size) { | ||
4532 | if (new_size > i_size_read(inode)) | 4825 | if (new_size > i_size_read(inode)) |
4533 | i_size_write(inode, new_size); | 4826 | i_size_write(inode, new_size); |
4534 | if (new_size > EXT4_I(inode)->i_disksize) | 4827 | if (new_size > EXT4_I(inode)->i_disksize) |
4535 | ext4_update_i_disksize(inode, new_size); | 4828 | ext4_update_i_disksize(inode, new_size); |
4536 | } else { | 4829 | } else { |
4537 | /* | 4830 | /* |
4538 | * Mark that we allocate beyond EOF so the subsequent truncate | 4831 | * Mark that we allocate beyond EOF so the subsequent truncate |
4539 | * can proceed even if the new size is the same as i_size. | 4832 | * can proceed even if the new size is the same as i_size. |
4540 | */ | 4833 | */ |
4541 | if (new_size > i_size_read(inode)) | 4834 | if ((offset + len) > i_size_read(inode)) |
4542 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | 4835 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); |
4543 | } | 4836 | } |
4544 | 4837 | ||
4838 | ext4_mark_inode_dirty(handle, inode); | ||
4839 | |||
4840 | /* Zero out partial block at the edges of the range */ | ||
4841 | ret = ext4_zero_partial_blocks(handle, inode, offset, len); | ||
4842 | |||
4843 | if (file->f_flags & O_SYNC) | ||
4844 | ext4_handle_sync(handle); | ||
4845 | |||
4846 | ext4_journal_stop(handle); | ||
4847 | out_dio: | ||
4848 | ext4_inode_resume_unlocked_dio(inode); | ||
4849 | out_mutex: | ||
4850 | mutex_unlock(&inode->i_mutex); | ||
4851 | return ret; | ||
4545 | } | 4852 | } |
4546 | 4853 | ||
4547 | /* | 4854 | /* |
@@ -4555,22 +4862,25 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4555 | { | 4862 | { |
4556 | struct inode *inode = file_inode(file); | 4863 | struct inode *inode = file_inode(file); |
4557 | handle_t *handle; | 4864 | handle_t *handle; |
4558 | loff_t new_size; | 4865 | loff_t new_size = 0; |
4559 | unsigned int max_blocks; | 4866 | unsigned int max_blocks; |
4560 | int ret = 0; | 4867 | int ret = 0; |
4561 | int ret2 = 0; | ||
4562 | int retries = 0; | ||
4563 | int flags; | 4868 | int flags; |
4564 | struct ext4_map_blocks map; | 4869 | ext4_lblk_t lblk; |
4565 | unsigned int credits, blkbits = inode->i_blkbits; | 4870 | struct timespec tv; |
4871 | unsigned int blkbits = inode->i_blkbits; | ||
4566 | 4872 | ||
4567 | /* Return error if mode is not supported */ | 4873 | /* Return error if mode is not supported */ |
4568 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 4874 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
4875 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
4569 | return -EOPNOTSUPP; | 4876 | return -EOPNOTSUPP; |
4570 | 4877 | ||
4571 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4878 | if (mode & FALLOC_FL_PUNCH_HOLE) |
4572 | return ext4_punch_hole(inode, offset, len); | 4879 | return ext4_punch_hole(inode, offset, len); |
4573 | 4880 | ||
4881 | if (mode & FALLOC_FL_COLLAPSE_RANGE) | ||
4882 | return ext4_collapse_range(inode, offset, len); | ||
4883 | |||
4574 | ret = ext4_convert_inline_data(inode); | 4884 | ret = ext4_convert_inline_data(inode); |
4575 | if (ret) | 4885 | if (ret) |
4576 | return ret; | 4886 | return ret; |
@@ -4582,83 +4892,66 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
4582 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4892 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
4583 | return -EOPNOTSUPP; | 4893 | return -EOPNOTSUPP; |
4584 | 4894 | ||
4895 | if (mode & FALLOC_FL_ZERO_RANGE) | ||
4896 | return ext4_zero_range(file, offset, len, mode); | ||
4897 | |||
4585 | trace_ext4_fallocate_enter(inode, offset, len, mode); | 4898 | trace_ext4_fallocate_enter(inode, offset, len, mode); |
4586 | map.m_lblk = offset >> blkbits; | 4899 | lblk = offset >> blkbits; |
4587 | /* | 4900 | /* |
4588 | * We can't just convert len to max_blocks because | 4901 | * We can't just convert len to max_blocks because |
4589 | * If blocksize = 4096 offset = 3072 and len = 2048 | 4902 | * If blocksize = 4096 offset = 3072 and len = 2048 |
4590 | */ | 4903 | */ |
4591 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) | 4904 | max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) |
4592 | - map.m_lblk; | 4905 | - lblk; |
4593 | /* | 4906 | |
4594 | * credits to insert 1 extent into extent tree | ||
4595 | */ | ||
4596 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
4597 | mutex_lock(&inode->i_mutex); | ||
4598 | ret = inode_newsize_ok(inode, (len + offset)); | ||
4599 | if (ret) { | ||
4600 | mutex_unlock(&inode->i_mutex); | ||
4601 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
4602 | return ret; | ||
4603 | } | ||
4604 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; | 4907 | flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; |
4605 | if (mode & FALLOC_FL_KEEP_SIZE) | 4908 | if (mode & FALLOC_FL_KEEP_SIZE) |
4606 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; | 4909 | flags |= EXT4_GET_BLOCKS_KEEP_SIZE; |
4607 | /* | ||
4608 | * Don't normalize the request if it can fit in one extent so | ||
4609 | * that it doesn't get unnecessarily split into multiple | ||
4610 | * extents. | ||
4611 | */ | ||
4612 | if (len <= EXT_UNINIT_MAX_LEN << blkbits) | ||
4613 | flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; | ||
4614 | 4910 | ||
4615 | retry: | 4911 | mutex_lock(&inode->i_mutex); |
4616 | while (ret >= 0 && ret < max_blocks) { | ||
4617 | map.m_lblk = map.m_lblk + ret; | ||
4618 | map.m_len = max_blocks = max_blocks - ret; | ||
4619 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, | ||
4620 | credits); | ||
4621 | if (IS_ERR(handle)) { | ||
4622 | ret = PTR_ERR(handle); | ||
4623 | break; | ||
4624 | } | ||
4625 | ret = ext4_map_blocks(handle, inode, &map, flags); | ||
4626 | if (ret <= 0) { | ||
4627 | #ifdef EXT4FS_DEBUG | ||
4628 | ext4_warning(inode->i_sb, | ||
4629 | "inode #%lu: block %u: len %u: " | ||
4630 | "ext4_ext_map_blocks returned %d", | ||
4631 | inode->i_ino, map.m_lblk, | ||
4632 | map.m_len, ret); | ||
4633 | #endif | ||
4634 | ext4_mark_inode_dirty(handle, inode); | ||
4635 | ret2 = ext4_journal_stop(handle); | ||
4636 | break; | ||
4637 | } | ||
4638 | if ((map.m_lblk + ret) >= (EXT4_BLOCK_ALIGN(offset + len, | ||
4639 | blkbits) >> blkbits)) | ||
4640 | new_size = offset + len; | ||
4641 | else | ||
4642 | new_size = ((loff_t) map.m_lblk + ret) << blkbits; | ||
4643 | 4912 | ||
4644 | ext4_falloc_update_inode(inode, mode, new_size, | 4913 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
4645 | (map.m_flags & EXT4_MAP_NEW)); | 4914 | offset + len > i_size_read(inode)) { |
4646 | ext4_mark_inode_dirty(handle, inode); | 4915 | new_size = offset + len; |
4647 | if ((file->f_flags & O_SYNC) && ret >= max_blocks) | 4916 | ret = inode_newsize_ok(inode, new_size); |
4648 | ext4_handle_sync(handle); | 4917 | if (ret) |
4649 | ret2 = ext4_journal_stop(handle); | 4918 | goto out; |
4650 | if (ret2) | ||
4651 | break; | ||
4652 | } | 4919 | } |
4653 | if (ret == -ENOSPC && | 4920 | |
4654 | ext4_should_retry_alloc(inode->i_sb, &retries)) { | 4921 | ret = ext4_alloc_file_blocks(file, lblk, max_blocks, flags, mode); |
4655 | ret = 0; | 4922 | if (ret) |
4656 | goto retry; | 4923 | goto out; |
4924 | |||
4925 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); | ||
4926 | if (IS_ERR(handle)) | ||
4927 | goto out; | ||
4928 | |||
4929 | tv = inode->i_ctime = ext4_current_time(inode); | ||
4930 | |||
4931 | if (new_size) { | ||
4932 | if (new_size > i_size_read(inode)) { | ||
4933 | i_size_write(inode, new_size); | ||
4934 | inode->i_mtime = tv; | ||
4935 | } | ||
4936 | if (new_size > EXT4_I(inode)->i_disksize) | ||
4937 | ext4_update_i_disksize(inode, new_size); | ||
4938 | } else { | ||
4939 | /* | ||
4940 | * Mark that we allocate beyond EOF so the subsequent truncate | ||
4941 | * can proceed even if the new size is the same as i_size. | ||
4942 | */ | ||
4943 | if ((offset + len) > i_size_read(inode)) | ||
4944 | ext4_set_inode_flag(inode, EXT4_INODE_EOFBLOCKS); | ||
4657 | } | 4945 | } |
4946 | ext4_mark_inode_dirty(handle, inode); | ||
4947 | if (file->f_flags & O_SYNC) | ||
4948 | ext4_handle_sync(handle); | ||
4949 | |||
4950 | ext4_journal_stop(handle); | ||
4951 | out: | ||
4658 | mutex_unlock(&inode->i_mutex); | 4952 | mutex_unlock(&inode->i_mutex); |
4659 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | 4953 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); |
4660 | ret > 0 ? ret2 : ret); | 4954 | return ret; |
4661 | return ret > 0 ? ret2 : ret; | ||
4662 | } | 4955 | } |
4663 | 4956 | ||
4664 | /* | 4957 | /* |
@@ -4869,3 +5162,304 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | |||
4869 | ext4_es_lru_add(inode); | 5162 | ext4_es_lru_add(inode); |
4870 | return error; | 5163 | return error; |
4871 | } | 5164 | } |
5165 | |||
5166 | /* | ||
5167 | * ext4_access_path: | ||
5168 | * Function to access the path buffer for marking it dirty. | ||
5169 | * It also checks if there are sufficient credits left in the journal handle | ||
5170 | * to update path. | ||
5171 | */ | ||
5172 | static int | ||
5173 | ext4_access_path(handle_t *handle, struct inode *inode, | ||
5174 | struct ext4_ext_path *path) | ||
5175 | { | ||
5176 | int credits, err; | ||
5177 | |||
5178 | if (!ext4_handle_valid(handle)) | ||
5179 | return 0; | ||
5180 | |||
5181 | /* | ||
5182 | * Check if need to extend journal credits | ||
5183 | * 3 for leaf, sb, and inode plus 2 (bmap and group | ||
5184 | * descriptor) for each block group; assume two block | ||
5185 | * groups | ||
5186 | */ | ||
5187 | if (handle->h_buffer_credits < 7) { | ||
5188 | credits = ext4_writepage_trans_blocks(inode); | ||
5189 | err = ext4_ext_truncate_extend_restart(handle, inode, credits); | ||
5190 | /* EAGAIN is success */ | ||
5191 | if (err && err != -EAGAIN) | ||
5192 | return err; | ||
5193 | } | ||
5194 | |||
5195 | err = ext4_ext_get_access(handle, inode, path); | ||
5196 | return err; | ||
5197 | } | ||
5198 | |||
5199 | /* | ||
5200 | * ext4_ext_shift_path_extents: | ||
5201 | * Shift the extents of a path structure lying between path[depth].p_ext | ||
5202 | * and EXT_LAST_EXTENT(path[depth].p_hdr) downwards, by subtracting shift | ||
5203 | * from starting block for each extent. | ||
5204 | */ | ||
5205 | static int | ||
5206 | ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift, | ||
5207 | struct inode *inode, handle_t *handle, | ||
5208 | ext4_lblk_t *start) | ||
5209 | { | ||
5210 | int depth, err = 0; | ||
5211 | struct ext4_extent *ex_start, *ex_last; | ||
5212 | bool update = 0; | ||
5213 | depth = path->p_depth; | ||
5214 | |||
5215 | while (depth >= 0) { | ||
5216 | if (depth == path->p_depth) { | ||
5217 | ex_start = path[depth].p_ext; | ||
5218 | if (!ex_start) | ||
5219 | return -EIO; | ||
5220 | |||
5221 | ex_last = EXT_LAST_EXTENT(path[depth].p_hdr); | ||
5222 | if (!ex_last) | ||
5223 | return -EIO; | ||
5224 | |||
5225 | err = ext4_access_path(handle, inode, path + depth); | ||
5226 | if (err) | ||
5227 | goto out; | ||
5228 | |||
5229 | if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) | ||
5230 | update = 1; | ||
5231 | |||
5232 | *start = ex_last->ee_block + | ||
5233 | ext4_ext_get_actual_len(ex_last); | ||
5234 | |||
5235 | while (ex_start <= ex_last) { | ||
5236 | ex_start->ee_block -= shift; | ||
5237 | if (ex_start > | ||
5238 | EXT_FIRST_EXTENT(path[depth].p_hdr)) { | ||
5239 | if (ext4_ext_try_to_merge_right(inode, | ||
5240 | path, ex_start - 1)) | ||
5241 | ex_last--; | ||
5242 | } | ||
5243 | ex_start++; | ||
5244 | } | ||
5245 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
5246 | if (err) | ||
5247 | goto out; | ||
5248 | |||
5249 | if (--depth < 0 || !update) | ||
5250 | break; | ||
5251 | } | ||
5252 | |||
5253 | /* Update index too */ | ||
5254 | err = ext4_access_path(handle, inode, path + depth); | ||
5255 | if (err) | ||
5256 | goto out; | ||
5257 | |||
5258 | path[depth].p_idx->ei_block -= shift; | ||
5259 | err = ext4_ext_dirty(handle, inode, path + depth); | ||
5260 | if (err) | ||
5261 | goto out; | ||
5262 | |||
5263 | /* we are done if current index is not a starting index */ | ||
5264 | if (path[depth].p_idx != EXT_FIRST_INDEX(path[depth].p_hdr)) | ||
5265 | break; | ||
5266 | |||
5267 | depth--; | ||
5268 | } | ||
5269 | |||
5270 | out: | ||
5271 | return err; | ||
5272 | } | ||
5273 | |||
5274 | /* | ||
5275 | * ext4_ext_shift_extents: | ||
5276 | * All the extents which lies in the range from start to the last allocated | ||
5277 | * block for the file are shifted downwards by shift blocks. | ||
5278 | * On success, 0 is returned, error otherwise. | ||
5279 | */ | ||
5280 | static int | ||
5281 | ext4_ext_shift_extents(struct inode *inode, handle_t *handle, | ||
5282 | ext4_lblk_t start, ext4_lblk_t shift) | ||
5283 | { | ||
5284 | struct ext4_ext_path *path; | ||
5285 | int ret = 0, depth; | ||
5286 | struct ext4_extent *extent; | ||
5287 | ext4_lblk_t stop_block, current_block; | ||
5288 | ext4_lblk_t ex_start, ex_end; | ||
5289 | |||
5290 | /* Let path point to the last extent */ | ||
5291 | path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); | ||
5292 | if (IS_ERR(path)) | ||
5293 | return PTR_ERR(path); | ||
5294 | |||
5295 | depth = path->p_depth; | ||
5296 | extent = path[depth].p_ext; | ||
5297 | if (!extent) { | ||
5298 | ext4_ext_drop_refs(path); | ||
5299 | kfree(path); | ||
5300 | return ret; | ||
5301 | } | ||
5302 | |||
5303 | stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5304 | ext4_ext_drop_refs(path); | ||
5305 | kfree(path); | ||
5306 | |||
5307 | /* Nothing to shift, if hole is at the end of file */ | ||
5308 | if (start >= stop_block) | ||
5309 | return ret; | ||
5310 | |||
5311 | /* | ||
5312 | * Don't start shifting extents until we make sure the hole is big | ||
5313 | * enough to accomodate the shift. | ||
5314 | */ | ||
5315 | path = ext4_ext_find_extent(inode, start - 1, NULL, 0); | ||
5316 | depth = path->p_depth; | ||
5317 | extent = path[depth].p_ext; | ||
5318 | ex_start = extent->ee_block; | ||
5319 | ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); | ||
5320 | ext4_ext_drop_refs(path); | ||
5321 | kfree(path); | ||
5322 | |||
5323 | if ((start == ex_start && shift > ex_start) || | ||
5324 | (shift > start - ex_end)) | ||
5325 | return -EINVAL; | ||
5326 | |||
5327 | /* Its safe to start updating extents */ | ||
5328 | while (start < stop_block) { | ||
5329 | path = ext4_ext_find_extent(inode, start, NULL, 0); | ||
5330 | if (IS_ERR(path)) | ||
5331 | return PTR_ERR(path); | ||
5332 | depth = path->p_depth; | ||
5333 | extent = path[depth].p_ext; | ||
5334 | current_block = extent->ee_block; | ||
5335 | if (start > current_block) { | ||
5336 | /* Hole, move to the next extent */ | ||
5337 | ret = mext_next_extent(inode, path, &extent); | ||
5338 | if (ret != 0) { | ||
5339 | ext4_ext_drop_refs(path); | ||
5340 | kfree(path); | ||
5341 | if (ret == 1) | ||
5342 | ret = 0; | ||
5343 | break; | ||
5344 | } | ||
5345 | } | ||
5346 | ret = ext4_ext_shift_path_extents(path, shift, inode, | ||
5347 | handle, &start); | ||
5348 | ext4_ext_drop_refs(path); | ||
5349 | kfree(path); | ||
5350 | if (ret) | ||
5351 | break; | ||
5352 | } | ||
5353 | |||
5354 | return ret; | ||
5355 | } | ||
5356 | |||
5357 | /* | ||
5358 | * ext4_collapse_range: | ||
5359 | * This implements the fallocate's collapse range functionality for ext4 | ||
5360 | * Returns: 0 and non-zero on error. | ||
5361 | */ | ||
5362 | int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) | ||
5363 | { | ||
5364 | struct super_block *sb = inode->i_sb; | ||
5365 | ext4_lblk_t punch_start, punch_stop; | ||
5366 | handle_t *handle; | ||
5367 | unsigned int credits; | ||
5368 | loff_t new_size; | ||
5369 | int ret; | ||
5370 | |||
5371 | BUG_ON(offset + len > i_size_read(inode)); | ||
5372 | |||
5373 | /* Collapse range works only on fs block size aligned offsets. */ | ||
5374 | if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || | ||
5375 | len & (EXT4_BLOCK_SIZE(sb) - 1)) | ||
5376 | return -EINVAL; | ||
5377 | |||
5378 | if (!S_ISREG(inode->i_mode)) | ||
5379 | return -EOPNOTSUPP; | ||
5380 | |||
5381 | trace_ext4_collapse_range(inode, offset, len); | ||
5382 | |||
5383 | punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5384 | punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); | ||
5385 | |||
5386 | /* Write out all dirty pages */ | ||
5387 | ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); | ||
5388 | if (ret) | ||
5389 | return ret; | ||
5390 | |||
5391 | /* Take mutex lock */ | ||
5392 | mutex_lock(&inode->i_mutex); | ||
5393 | |||
5394 | /* It's not possible punch hole on append only file */ | ||
5395 | if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { | ||
5396 | ret = -EPERM; | ||
5397 | goto out_mutex; | ||
5398 | } | ||
5399 | |||
5400 | if (IS_SWAPFILE(inode)) { | ||
5401 | ret = -ETXTBSY; | ||
5402 | goto out_mutex; | ||
5403 | } | ||
5404 | |||
5405 | /* Currently just for extent based files */ | ||
5406 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | ||
5407 | ret = -EOPNOTSUPP; | ||
5408 | goto out_mutex; | ||
5409 | } | ||
5410 | |||
5411 | truncate_pagecache_range(inode, offset, -1); | ||
5412 | |||
5413 | /* Wait for existing dio to complete */ | ||
5414 | ext4_inode_block_unlocked_dio(inode); | ||
5415 | inode_dio_wait(inode); | ||
5416 | |||
5417 | credits = ext4_writepage_trans_blocks(inode); | ||
5418 | handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); | ||
5419 | if (IS_ERR(handle)) { | ||
5420 | ret = PTR_ERR(handle); | ||
5421 | goto out_dio; | ||
5422 | } | ||
5423 | |||
5424 | down_write(&EXT4_I(inode)->i_data_sem); | ||
5425 | ext4_discard_preallocations(inode); | ||
5426 | |||
5427 | ret = ext4_es_remove_extent(inode, punch_start, | ||
5428 | EXT_MAX_BLOCKS - punch_start - 1); | ||
5429 | if (ret) { | ||
5430 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5431 | goto out_stop; | ||
5432 | } | ||
5433 | |||
5434 | ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1); | ||
5435 | if (ret) { | ||
5436 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5437 | goto out_stop; | ||
5438 | } | ||
5439 | |||
5440 | ret = ext4_ext_shift_extents(inode, handle, punch_stop, | ||
5441 | punch_stop - punch_start); | ||
5442 | if (ret) { | ||
5443 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5444 | goto out_stop; | ||
5445 | } | ||
5446 | |||
5447 | new_size = i_size_read(inode) - len; | ||
5448 | truncate_setsize(inode, new_size); | ||
5449 | EXT4_I(inode)->i_disksize = new_size; | ||
5450 | |||
5451 | ext4_discard_preallocations(inode); | ||
5452 | up_write(&EXT4_I(inode)->i_data_sem); | ||
5453 | if (IS_SYNC(inode)) | ||
5454 | ext4_handle_sync(handle); | ||
5455 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | ||
5456 | ext4_mark_inode_dirty(handle, inode); | ||
5457 | |||
5458 | out_stop: | ||
5459 | ext4_journal_stop(handle); | ||
5460 | out_dio: | ||
5461 | ext4_inode_resume_unlocked_dio(inode); | ||
5462 | out_mutex: | ||
5463 | mutex_unlock(&inode->i_mutex); | ||
5464 | return ret; | ||
5465 | } | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 3981ff783950..0a014a7194b2 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
@@ -184,7 +184,7 @@ static void ext4_es_print_tree(struct inode *inode) | |||
184 | while (node) { | 184 | while (node) { |
185 | struct extent_status *es; | 185 | struct extent_status *es; |
186 | es = rb_entry(node, struct extent_status, rb_node); | 186 | es = rb_entry(node, struct extent_status, rb_node); |
187 | printk(KERN_DEBUG " [%u/%u) %llu %llx", | 187 | printk(KERN_DEBUG " [%u/%u) %llu %x", |
188 | es->es_lblk, es->es_len, | 188 | es->es_lblk, es->es_len, |
189 | ext4_es_pblock(es), ext4_es_status(es)); | 189 | ext4_es_pblock(es), ext4_es_status(es)); |
190 | node = rb_next(node); | 190 | node = rb_next(node); |
@@ -445,8 +445,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
445 | pr_warn("ES insert assertion failed for " | 445 | pr_warn("ES insert assertion failed for " |
446 | "inode: %lu we can find an extent " | 446 | "inode: %lu we can find an extent " |
447 | "at block [%d/%d/%llu/%c], but we " | 447 | "at block [%d/%d/%llu/%c], but we " |
448 | "want to add an delayed/hole extent " | 448 | "want to add a delayed/hole extent " |
449 | "[%d/%d/%llu/%llx]\n", | 449 | "[%d/%d/%llu/%x]\n", |
450 | inode->i_ino, ee_block, ee_len, | 450 | inode->i_ino, ee_block, ee_len, |
451 | ee_start, ee_status ? 'u' : 'w', | 451 | ee_start, ee_status ? 'u' : 'w', |
452 | es->es_lblk, es->es_len, | 452 | es->es_lblk, es->es_len, |
@@ -486,8 +486,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode, | |||
486 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { | 486 | if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) { |
487 | pr_warn("ES insert assertion failed for inode: %lu " | 487 | pr_warn("ES insert assertion failed for inode: %lu " |
488 | "can't find an extent at block %d but we want " | 488 | "can't find an extent at block %d but we want " |
489 | "to add an written/unwritten extent " | 489 | "to add a written/unwritten extent " |
490 | "[%d/%d/%llu/%llx]\n", inode->i_ino, | 490 | "[%d/%d/%llu/%x]\n", inode->i_ino, |
491 | es->es_lblk, es->es_lblk, es->es_len, | 491 | es->es_lblk, es->es_lblk, es->es_len, |
492 | ext4_es_pblock(es), ext4_es_status(es)); | 492 | ext4_es_pblock(es), ext4_es_status(es)); |
493 | } | 493 | } |
@@ -524,7 +524,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
524 | */ | 524 | */ |
525 | pr_warn("ES insert assertion failed for inode: %lu " | 525 | pr_warn("ES insert assertion failed for inode: %lu " |
526 | "We can find blocks but we want to add a " | 526 | "We can find blocks but we want to add a " |
527 | "delayed/hole extent [%d/%d/%llu/%llx]\n", | 527 | "delayed/hole extent [%d/%d/%llu/%x]\n", |
528 | inode->i_ino, es->es_lblk, es->es_len, | 528 | inode->i_ino, es->es_lblk, es->es_len, |
529 | ext4_es_pblock(es), ext4_es_status(es)); | 529 | ext4_es_pblock(es), ext4_es_status(es)); |
530 | return; | 530 | return; |
@@ -554,7 +554,7 @@ static void ext4_es_insert_extent_ind_check(struct inode *inode, | |||
554 | if (ext4_es_is_written(es)) { | 554 | if (ext4_es_is_written(es)) { |
555 | pr_warn("ES insert assertion failed for inode: %lu " | 555 | pr_warn("ES insert assertion failed for inode: %lu " |
556 | "We can't find the block but we want to add " | 556 | "We can't find the block but we want to add " |
557 | "an written extent [%d/%d/%llu/%llx]\n", | 557 | "a written extent [%d/%d/%llu/%x]\n", |
558 | inode->i_ino, es->es_lblk, es->es_len, | 558 | inode->i_ino, es->es_lblk, es->es_len, |
559 | ext4_es_pblock(es), ext4_es_status(es)); | 559 | ext4_es_pblock(es), ext4_es_status(es)); |
560 | return; | 560 | return; |
@@ -658,8 +658,7 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
658 | 658 | ||
659 | newes.es_lblk = lblk; | 659 | newes.es_lblk = lblk; |
660 | newes.es_len = len; | 660 | newes.es_len = len; |
661 | ext4_es_store_pblock(&newes, pblk); | 661 | ext4_es_store_pblock_status(&newes, pblk, status); |
662 | ext4_es_store_status(&newes, status); | ||
663 | trace_ext4_es_insert_extent(inode, &newes); | 662 | trace_ext4_es_insert_extent(inode, &newes); |
664 | 663 | ||
665 | ext4_es_insert_extent_check(inode, &newes); | 664 | ext4_es_insert_extent_check(inode, &newes); |
@@ -699,8 +698,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk, | |||
699 | 698 | ||
700 | newes.es_lblk = lblk; | 699 | newes.es_lblk = lblk; |
701 | newes.es_len = len; | 700 | newes.es_len = len; |
702 | ext4_es_store_pblock(&newes, pblk); | 701 | ext4_es_store_pblock_status(&newes, pblk, status); |
703 | ext4_es_store_status(&newes, status); | ||
704 | trace_ext4_es_cache_extent(inode, &newes); | 702 | trace_ext4_es_cache_extent(inode, &newes); |
705 | 703 | ||
706 | if (!len) | 704 | if (!len) |
@@ -812,13 +810,13 @@ retry: | |||
812 | 810 | ||
813 | newes.es_lblk = end + 1; | 811 | newes.es_lblk = end + 1; |
814 | newes.es_len = len2; | 812 | newes.es_len = len2; |
813 | block = 0x7FDEADBEEF; | ||
815 | if (ext4_es_is_written(&orig_es) || | 814 | if (ext4_es_is_written(&orig_es) || |
816 | ext4_es_is_unwritten(&orig_es)) { | 815 | ext4_es_is_unwritten(&orig_es)) |
817 | block = ext4_es_pblock(&orig_es) + | 816 | block = ext4_es_pblock(&orig_es) + |
818 | orig_es.es_len - len2; | 817 | orig_es.es_len - len2; |
819 | ext4_es_store_pblock(&newes, block); | 818 | ext4_es_store_pblock_status(&newes, block, |
820 | } | 819 | ext4_es_status(&orig_es)); |
821 | ext4_es_store_status(&newes, ext4_es_status(&orig_es)); | ||
822 | err = __es_insert_extent(inode, &newes); | 820 | err = __es_insert_extent(inode, &newes); |
823 | if (err) { | 821 | if (err) { |
824 | es->es_lblk = orig_es.es_lblk; | 822 | es->es_lblk = orig_es.es_lblk; |
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index 167f4ab8ecc3..f1b62a419920 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
@@ -129,6 +129,15 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
129 | (es->es_pblk & ~ES_MASK)); | 129 | (es->es_pblk & ~ES_MASK)); |
130 | } | 130 | } |
131 | 131 | ||
132 | static inline void ext4_es_store_pblock_status(struct extent_status *es, | ||
133 | ext4_fsblk_t pb, | ||
134 | unsigned int status) | ||
135 | { | ||
136 | es->es_pblk = (((ext4_fsblk_t) | ||
137 | (status & EXTENT_STATUS_FLAGS) << ES_SHIFT) | | ||
138 | (pb & ~ES_MASK)); | ||
139 | } | ||
140 | |||
132 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); | 141 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
133 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); | 142 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
134 | extern void ext4_es_lru_add(struct inode *inode); | 143 | extern void ext4_es_lru_add(struct inode *inode); |
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 1a5073959f32..4e508fc83dcf 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
@@ -153,7 +153,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, | |||
153 | ssize_t err; | 153 | ssize_t err; |
154 | 154 | ||
155 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); | 155 | err = generic_write_sync(file, iocb->ki_pos - ret, ret); |
156 | if (err < 0 && ret > 0) | 156 | if (err < 0) |
157 | ret = err; | 157 | ret = err; |
158 | } | 158 | } |
159 | blk_finish_plug(&plug); | 159 | blk_finish_plug(&plug); |
@@ -200,6 +200,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, | |||
200 | 200 | ||
201 | static const struct vm_operations_struct ext4_file_vm_ops = { | 201 | static const struct vm_operations_struct ext4_file_vm_ops = { |
202 | .fault = filemap_fault, | 202 | .fault = filemap_fault, |
203 | .map_pages = filemap_map_pages, | ||
203 | .page_mkwrite = ext4_page_mkwrite, | 204 | .page_mkwrite = ext4_page_mkwrite, |
204 | .remap_pages = generic_file_remap_pages, | 205 | .remap_pages = generic_file_remap_pages, |
205 | }; | 206 | }; |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 24bfd7ff3049..5b0d2c7d5408 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -215,7 +215,7 @@ void ext4_evict_inode(struct inode *inode) | |||
215 | jbd2_complete_transaction(journal, commit_tid); | 215 | jbd2_complete_transaction(journal, commit_tid); |
216 | filemap_write_and_wait(&inode->i_data); | 216 | filemap_write_and_wait(&inode->i_data); |
217 | } | 217 | } |
218 | truncate_inode_pages(&inode->i_data, 0); | 218 | truncate_inode_pages_final(&inode->i_data); |
219 | 219 | ||
220 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 220 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
221 | goto no_delete; | 221 | goto no_delete; |
@@ -226,7 +226,7 @@ void ext4_evict_inode(struct inode *inode) | |||
226 | 226 | ||
227 | if (ext4_should_order_data(inode)) | 227 | if (ext4_should_order_data(inode)) |
228 | ext4_begin_ordered_truncate(inode, 0); | 228 | ext4_begin_ordered_truncate(inode, 0); |
229 | truncate_inode_pages(&inode->i_data, 0); | 229 | truncate_inode_pages_final(&inode->i_data); |
230 | 230 | ||
231 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | 231 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); |
232 | if (is_bad_inode(inode)) | 232 | if (is_bad_inode(inode)) |
@@ -504,6 +504,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
504 | { | 504 | { |
505 | struct extent_status es; | 505 | struct extent_status es; |
506 | int retval; | 506 | int retval; |
507 | int ret = 0; | ||
507 | #ifdef ES_AGGRESSIVE_TEST | 508 | #ifdef ES_AGGRESSIVE_TEST |
508 | struct ext4_map_blocks orig_map; | 509 | struct ext4_map_blocks orig_map; |
509 | 510 | ||
@@ -515,6 +516,12 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
515 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 516 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
516 | (unsigned long) map->m_lblk); | 517 | (unsigned long) map->m_lblk); |
517 | 518 | ||
519 | /* | ||
520 | * ext4_map_blocks returns an int, and m_len is an unsigned int | ||
521 | */ | ||
522 | if (unlikely(map->m_len > INT_MAX)) | ||
523 | map->m_len = INT_MAX; | ||
524 | |||
518 | /* Lookup extent status tree firstly */ | 525 | /* Lookup extent status tree firstly */ |
519 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 526 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
520 | ext4_es_lru_add(inode); | 527 | ext4_es_lru_add(inode); |
@@ -553,7 +560,6 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
553 | EXT4_GET_BLOCKS_KEEP_SIZE); | 560 | EXT4_GET_BLOCKS_KEEP_SIZE); |
554 | } | 561 | } |
555 | if (retval > 0) { | 562 | if (retval > 0) { |
556 | int ret; | ||
557 | unsigned int status; | 563 | unsigned int status; |
558 | 564 | ||
559 | if (unlikely(retval != map->m_len)) { | 565 | if (unlikely(retval != map->m_len)) { |
@@ -580,7 +586,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
580 | 586 | ||
581 | found: | 587 | found: |
582 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 588 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
583 | int ret = check_block_validity(inode, map); | 589 | ret = check_block_validity(inode, map); |
584 | if (ret != 0) | 590 | if (ret != 0) |
585 | return ret; | 591 | return ret; |
586 | } | 592 | } |
@@ -597,7 +603,13 @@ found: | |||
597 | * with buffer head unmapped. | 603 | * with buffer head unmapped. |
598 | */ | 604 | */ |
599 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) | 605 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) |
600 | return retval; | 606 | /* |
607 | * If we need to convert extent to unwritten | ||
608 | * we continue and do the actual work in | ||
609 | * ext4_ext_map_blocks() | ||
610 | */ | ||
611 | if (!(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) | ||
612 | return retval; | ||
601 | 613 | ||
602 | /* | 614 | /* |
603 | * Here we clear m_flags because after allocating an new extent, | 615 | * Here we clear m_flags because after allocating an new extent, |
@@ -653,7 +665,6 @@ found: | |||
653 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); | 665 | ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED); |
654 | 666 | ||
655 | if (retval > 0) { | 667 | if (retval > 0) { |
656 | int ret; | ||
657 | unsigned int status; | 668 | unsigned int status; |
658 | 669 | ||
659 | if (unlikely(retval != map->m_len)) { | 670 | if (unlikely(retval != map->m_len)) { |
@@ -688,7 +699,7 @@ found: | |||
688 | has_zeroout: | 699 | has_zeroout: |
689 | up_write((&EXT4_I(inode)->i_data_sem)); | 700 | up_write((&EXT4_I(inode)->i_data_sem)); |
690 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { | 701 | if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { |
691 | int ret = check_block_validity(inode, map); | 702 | ret = check_block_validity(inode, map); |
692 | if (ret != 0) | 703 | if (ret != 0) |
693 | return ret; | 704 | return ret; |
694 | } | 705 | } |
@@ -3313,33 +3324,13 @@ void ext4_set_aops(struct inode *inode) | |||
3313 | } | 3324 | } |
3314 | 3325 | ||
3315 | /* | 3326 | /* |
3316 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
3317 | * up to the end of the block which corresponds to `from'. | ||
3318 | * This required during truncate. We need to physically zero the tail end | ||
3319 | * of that block so it doesn't yield old data if the file is later grown. | ||
3320 | */ | ||
3321 | int ext4_block_truncate_page(handle_t *handle, | ||
3322 | struct address_space *mapping, loff_t from) | ||
3323 | { | ||
3324 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3325 | unsigned length; | ||
3326 | unsigned blocksize; | ||
3327 | struct inode *inode = mapping->host; | ||
3328 | |||
3329 | blocksize = inode->i_sb->s_blocksize; | ||
3330 | length = blocksize - (offset & (blocksize - 1)); | ||
3331 | |||
3332 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3333 | } | ||
3334 | |||
3335 | /* | ||
3336 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' | 3327 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
3337 | * starting from file offset 'from'. The range to be zero'd must | 3328 | * starting from file offset 'from'. The range to be zero'd must |
3338 | * be contained with in one block. If the specified range exceeds | 3329 | * be contained with in one block. If the specified range exceeds |
3339 | * the end of the block it will be shortened to end of the block | 3330 | * the end of the block it will be shortened to end of the block |
3340 | * that cooresponds to 'from' | 3331 | * that cooresponds to 'from' |
3341 | */ | 3332 | */ |
3342 | int ext4_block_zero_page_range(handle_t *handle, | 3333 | static int ext4_block_zero_page_range(handle_t *handle, |
3343 | struct address_space *mapping, loff_t from, loff_t length) | 3334 | struct address_space *mapping, loff_t from, loff_t length) |
3344 | { | 3335 | { |
3345 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3336 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
@@ -3429,6 +3420,26 @@ unlock: | |||
3429 | return err; | 3420 | return err; |
3430 | } | 3421 | } |
3431 | 3422 | ||
3423 | /* | ||
3424 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' | ||
3425 | * up to the end of the block which corresponds to `from'. | ||
3426 | * This required during truncate. We need to physically zero the tail end | ||
3427 | * of that block so it doesn't yield old data if the file is later grown. | ||
3428 | */ | ||
3429 | int ext4_block_truncate_page(handle_t *handle, | ||
3430 | struct address_space *mapping, loff_t from) | ||
3431 | { | ||
3432 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
3433 | unsigned length; | ||
3434 | unsigned blocksize; | ||
3435 | struct inode *inode = mapping->host; | ||
3436 | |||
3437 | blocksize = inode->i_sb->s_blocksize; | ||
3438 | length = blocksize - (offset & (blocksize - 1)); | ||
3439 | |||
3440 | return ext4_block_zero_page_range(handle, mapping, from, length); | ||
3441 | } | ||
3442 | |||
3432 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | 3443 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
3433 | loff_t lstart, loff_t length) | 3444 | loff_t lstart, loff_t length) |
3434 | { | 3445 | { |
@@ -3502,7 +3513,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3502 | if (!S_ISREG(inode->i_mode)) | 3513 | if (!S_ISREG(inode->i_mode)) |
3503 | return -EOPNOTSUPP; | 3514 | return -EOPNOTSUPP; |
3504 | 3515 | ||
3505 | trace_ext4_punch_hole(inode, offset, length); | 3516 | trace_ext4_punch_hole(inode, offset, length, 0); |
3506 | 3517 | ||
3507 | /* | 3518 | /* |
3508 | * Write out all dirty pages to avoid race conditions | 3519 | * Write out all dirty pages to avoid race conditions |
@@ -3609,6 +3620,12 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) | |||
3609 | up_write(&EXT4_I(inode)->i_data_sem); | 3620 | up_write(&EXT4_I(inode)->i_data_sem); |
3610 | if (IS_SYNC(inode)) | 3621 | if (IS_SYNC(inode)) |
3611 | ext4_handle_sync(handle); | 3622 | ext4_handle_sync(handle); |
3623 | |||
3624 | /* Now release the pages again to reduce race window */ | ||
3625 | if (last_block_offset > first_block_offset) | ||
3626 | truncate_pagecache_range(inode, first_block_offset, | ||
3627 | last_block_offset); | ||
3628 | |||
3612 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 3629 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
3613 | ext4_mark_inode_dirty(handle, inode); | 3630 | ext4_mark_inode_dirty(handle, inode); |
3614 | out_stop: | 3631 | out_stop: |
@@ -3682,7 +3699,7 @@ void ext4_truncate(struct inode *inode) | |||
3682 | 3699 | ||
3683 | /* | 3700 | /* |
3684 | * There is a possibility that we're either freeing the inode | 3701 | * There is a possibility that we're either freeing the inode |
3685 | * or it completely new indode. In those cases we might not | 3702 | * or it's a completely new inode. In those cases we might not |
3686 | * have i_mutex locked because it's not necessary. | 3703 | * have i_mutex locked because it's not necessary. |
3687 | */ | 3704 | */ |
3688 | if (!(inode->i_state & (I_NEW|I_FREEING))) | 3705 | if (!(inode->i_state & (I_NEW|I_FREEING))) |
@@ -3934,8 +3951,8 @@ void ext4_set_inode_flags(struct inode *inode) | |||
3934 | new_fl |= S_NOATIME; | 3951 | new_fl |= S_NOATIME; |
3935 | if (flags & EXT4_DIRSYNC_FL) | 3952 | if (flags & EXT4_DIRSYNC_FL) |
3936 | new_fl |= S_DIRSYNC; | 3953 | new_fl |= S_DIRSYNC; |
3937 | set_mask_bits(&inode->i_flags, | 3954 | inode_set_flags(inode, new_fl, |
3938 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl); | 3955 | S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); |
3939 | } | 3956 | } |
3940 | 3957 | ||
3941 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ | 3958 | /* Propagate flags from i_flags to EXT4_I(inode)->i_flags */ |
@@ -4154,11 +4171,13 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4154 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); | 4171 | EXT4_INODE_GET_XTIME(i_atime, inode, raw_inode); |
4155 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); | 4172 | EXT4_EINODE_GET_XTIME(i_crtime, ei, raw_inode); |
4156 | 4173 | ||
4157 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); | 4174 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
4158 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | 4175 | inode->i_version = le32_to_cpu(raw_inode->i_disk_version); |
4159 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4176 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
4160 | inode->i_version |= | 4177 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
4161 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | 4178 | inode->i_version |= |
4179 | (__u64)(le32_to_cpu(raw_inode->i_version_hi)) << 32; | ||
4180 | } | ||
4162 | } | 4181 | } |
4163 | 4182 | ||
4164 | ret = 0; | 4183 | ret = 0; |
@@ -4328,8 +4347,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4328 | goto out_brelse; | 4347 | goto out_brelse; |
4329 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); | 4348 | raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); |
4330 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); | 4349 | raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); |
4331 | if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != | 4350 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) |
4332 | cpu_to_le32(EXT4_OS_HURD)) | ||
4333 | raw_inode->i_file_acl_high = | 4351 | raw_inode->i_file_acl_high = |
4334 | cpu_to_le16(ei->i_file_acl >> 32); | 4352 | cpu_to_le16(ei->i_file_acl >> 32); |
4335 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); | 4353 | raw_inode->i_file_acl_lo = cpu_to_le32(ei->i_file_acl); |
@@ -4374,12 +4392,15 @@ static int ext4_do_update_inode(handle_t *handle, | |||
4374 | raw_inode->i_block[block] = ei->i_data[block]; | 4392 | raw_inode->i_block[block] = ei->i_data[block]; |
4375 | } | 4393 | } |
4376 | 4394 | ||
4377 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); | 4395 | if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) { |
4378 | if (ei->i_extra_isize) { | 4396 | raw_inode->i_disk_version = cpu_to_le32(inode->i_version); |
4379 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) | 4397 | if (ei->i_extra_isize) { |
4380 | raw_inode->i_version_hi = | 4398 | if (EXT4_FITS_IN_INODE(raw_inode, ei, i_version_hi)) |
4381 | cpu_to_le32(inode->i_version >> 32); | 4399 | raw_inode->i_version_hi = |
4382 | raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize); | 4400 | cpu_to_le32(inode->i_version >> 32); |
4401 | raw_inode->i_extra_isize = | ||
4402 | cpu_to_le16(ei->i_extra_isize); | ||
4403 | } | ||
4383 | } | 4404 | } |
4384 | 4405 | ||
4385 | ext4_inode_csum_set(inode, raw_inode, ei); | 4406 | ext4_inode_csum_set(inode, raw_inode, ei); |
@@ -4446,7 +4467,12 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4446 | return -EIO; | 4467 | return -EIO; |
4447 | } | 4468 | } |
4448 | 4469 | ||
4449 | if (wbc->sync_mode != WB_SYNC_ALL) | 4470 | /* |
4471 | * No need to force transaction in WB_SYNC_NONE mode. Also | ||
4472 | * ext4_sync_fs() will force the commit after everything is | ||
4473 | * written. | ||
4474 | */ | ||
4475 | if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync) | ||
4450 | return 0; | 4476 | return 0; |
4451 | 4477 | ||
4452 | err = ext4_force_commit(inode->i_sb); | 4478 | err = ext4_force_commit(inode->i_sb); |
@@ -4456,7 +4482,11 @@ int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
4456 | err = __ext4_get_inode_loc(inode, &iloc, 0); | 4482 | err = __ext4_get_inode_loc(inode, &iloc, 0); |
4457 | if (err) | 4483 | if (err) |
4458 | return err; | 4484 | return err; |
4459 | if (wbc->sync_mode == WB_SYNC_ALL) | 4485 | /* |
4486 | * sync(2) will flush the whole buffer cache. No need to do | ||
4487 | * it here separately for each inode. | ||
4488 | */ | ||
4489 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) | ||
4460 | sync_dirty_buffer(iloc.bh); | 4490 | sync_dirty_buffer(iloc.bh); |
4461 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { | 4491 | if (buffer_req(iloc.bh) && !buffer_uptodate(iloc.bh)) { |
4462 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, | 4492 | EXT4_ERROR_INODE_BLOCK(inode, iloc.bh->b_blocknr, |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index a2a837f00407..0f2252ec274d 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -104,21 +104,15 @@ static long swap_inode_boot_loader(struct super_block *sb, | |||
104 | struct ext4_inode_info *ei_bl; | 104 | struct ext4_inode_info *ei_bl; |
105 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 105 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
106 | 106 | ||
107 | if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) { | 107 | if (inode->i_nlink != 1 || !S_ISREG(inode->i_mode)) |
108 | err = -EINVAL; | 108 | return -EINVAL; |
109 | goto swap_boot_out; | ||
110 | } | ||
111 | 109 | ||
112 | if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) { | 110 | if (!inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) |
113 | err = -EPERM; | 111 | return -EPERM; |
114 | goto swap_boot_out; | ||
115 | } | ||
116 | 112 | ||
117 | inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); | 113 | inode_bl = ext4_iget(sb, EXT4_BOOT_LOADER_INO); |
118 | if (IS_ERR(inode_bl)) { | 114 | if (IS_ERR(inode_bl)) |
119 | err = PTR_ERR(inode_bl); | 115 | return PTR_ERR(inode_bl); |
120 | goto swap_boot_out; | ||
121 | } | ||
122 | ei_bl = EXT4_I(inode_bl); | 116 | ei_bl = EXT4_I(inode_bl); |
123 | 117 | ||
124 | filemap_flush(inode->i_mapping); | 118 | filemap_flush(inode->i_mapping); |
@@ -193,20 +187,14 @@ static long swap_inode_boot_loader(struct super_block *sb, | |||
193 | ext4_mark_inode_dirty(handle, inode); | 187 | ext4_mark_inode_dirty(handle, inode); |
194 | } | 188 | } |
195 | } | 189 | } |
196 | |||
197 | ext4_journal_stop(handle); | 190 | ext4_journal_stop(handle); |
198 | |||
199 | ext4_double_up_write_data_sem(inode, inode_bl); | 191 | ext4_double_up_write_data_sem(inode, inode_bl); |
200 | 192 | ||
201 | journal_err_out: | 193 | journal_err_out: |
202 | ext4_inode_resume_unlocked_dio(inode); | 194 | ext4_inode_resume_unlocked_dio(inode); |
203 | ext4_inode_resume_unlocked_dio(inode_bl); | 195 | ext4_inode_resume_unlocked_dio(inode_bl); |
204 | |||
205 | unlock_two_nondirectories(inode, inode_bl); | 196 | unlock_two_nondirectories(inode, inode_bl); |
206 | |||
207 | iput(inode_bl); | 197 | iput(inode_bl); |
208 | |||
209 | swap_boot_out: | ||
210 | return err; | 198 | return err; |
211 | } | 199 | } |
212 | 200 | ||
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 04a5c7504be9..a888cac76e9c 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -1808,6 +1808,7 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac, | |||
1808 | ext4_lock_group(ac->ac_sb, group); | 1808 | ext4_lock_group(ac->ac_sb, group); |
1809 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, | 1809 | max = mb_find_extent(e4b, ac->ac_g_ex.fe_start, |
1810 | ac->ac_g_ex.fe_len, &ex); | 1810 | ac->ac_g_ex.fe_len, &ex); |
1811 | ex.fe_logical = 0xDEADFA11; /* debug value */ | ||
1811 | 1812 | ||
1812 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { | 1813 | if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) { |
1813 | ext4_fsblk_t start; | 1814 | ext4_fsblk_t start; |
@@ -1936,7 +1937,7 @@ void ext4_mb_complex_scan_group(struct ext4_allocation_context *ac, | |||
1936 | */ | 1937 | */ |
1937 | break; | 1938 | break; |
1938 | } | 1939 | } |
1939 | 1940 | ex.fe_logical = 0xDEADC0DE; /* debug value */ | |
1940 | ext4_mb_measure_extent(ac, &ex, e4b); | 1941 | ext4_mb_measure_extent(ac, &ex, e4b); |
1941 | 1942 | ||
1942 | i += ex.fe_len; | 1943 | i += ex.fe_len; |
@@ -1977,6 +1978,7 @@ void ext4_mb_scan_aligned(struct ext4_allocation_context *ac, | |||
1977 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); | 1978 | max = mb_find_extent(e4b, i, sbi->s_stripe, &ex); |
1978 | if (max >= sbi->s_stripe) { | 1979 | if (max >= sbi->s_stripe) { |
1979 | ac->ac_found++; | 1980 | ac->ac_found++; |
1981 | ex.fe_logical = 0xDEADF00D; /* debug value */ | ||
1980 | ac->ac_b_ex = ex; | 1982 | ac->ac_b_ex = ex; |
1981 | ext4_mb_use_best_found(ac, e4b); | 1983 | ext4_mb_use_best_found(ac, e4b); |
1982 | break; | 1984 | break; |
@@ -4006,8 +4008,7 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
4006 | (unsigned long)ac->ac_b_ex.fe_len, | 4008 | (unsigned long)ac->ac_b_ex.fe_len, |
4007 | (unsigned long)ac->ac_b_ex.fe_logical, | 4009 | (unsigned long)ac->ac_b_ex.fe_logical, |
4008 | (int)ac->ac_criteria); | 4010 | (int)ac->ac_criteria); |
4009 | ext4_msg(ac->ac_sb, KERN_ERR, "%lu scanned, %d found", | 4011 | ext4_msg(ac->ac_sb, KERN_ERR, "%d found", ac->ac_found); |
4010 | ac->ac_ex_scanned, ac->ac_found); | ||
4011 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); | 4012 | ext4_msg(ac->ac_sb, KERN_ERR, "groups: "); |
4012 | ngroups = ext4_get_groups_count(sb); | 4013 | ngroups = ext4_get_groups_count(sb); |
4013 | for (i = 0; i < ngroups; i++) { | 4014 | for (i = 0; i < ngroups; i++) { |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 08481ee84cd5..d634e183b4d4 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -48,7 +48,7 @@ extern ushort ext4_mballoc_debug; | |||
48 | } \ | 48 | } \ |
49 | } while (0) | 49 | } while (0) |
50 | #else | 50 | #else |
51 | #define mb_debug(n, fmt, a...) | 51 | #define mb_debug(n, fmt, a...) no_printk(fmt, ## a) |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ | 54 | #define EXT4_MB_HISTORY_ALLOC 1 /* allocation */ |
@@ -175,8 +175,6 @@ struct ext4_allocation_context { | |||
175 | /* copy of the best found extent taken before preallocation efforts */ | 175 | /* copy of the best found extent taken before preallocation efforts */ |
176 | struct ext4_free_extent ac_f_ex; | 176 | struct ext4_free_extent ac_f_ex; |
177 | 177 | ||
178 | /* number of iterations done. we have to track to limit searching */ | ||
179 | unsigned long ac_ex_scanned; | ||
180 | __u16 ac_groups_scanned; | 178 | __u16 ac_groups_scanned; |
181 | __u16 ac_found; | 179 | __u16 ac_found; |
182 | __u16 ac_tail; | 180 | __u16 ac_tail; |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 773b503bd18c..58ee7dc87669 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
@@ -76,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) | |||
76 | * ext4_ext_path structure refers to the last extent, or a negative error | 76 | * ext4_ext_path structure refers to the last extent, or a negative error |
77 | * value on failure. | 77 | * value on failure. |
78 | */ | 78 | */ |
79 | static int | 79 | int |
80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, | 80 | mext_next_extent(struct inode *inode, struct ext4_ext_path *path, |
81 | struct ext4_extent **extent) | 81 | struct ext4_extent **extent) |
82 | { | 82 | { |
@@ -861,8 +861,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) | |||
861 | } | 861 | } |
862 | if (!buffer_mapped(bh)) { | 862 | if (!buffer_mapped(bh)) { |
863 | zero_user(page, block_start, blocksize); | 863 | zero_user(page, block_start, blocksize); |
864 | if (!err) | 864 | set_buffer_uptodate(bh); |
865 | set_buffer_uptodate(bh); | ||
866 | continue; | 865 | continue; |
867 | } | 866 | } |
868 | } | 867 | } |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d050e043e884..1cb84f78909e 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -3000,6 +3000,154 @@ static struct buffer_head *ext4_get_first_dir_block(handle_t *handle, | |||
3000 | return ext4_get_first_inline_block(inode, parent_de, retval); | 3000 | return ext4_get_first_inline_block(inode, parent_de, retval); |
3001 | } | 3001 | } |
3002 | 3002 | ||
3003 | struct ext4_renament { | ||
3004 | struct inode *dir; | ||
3005 | struct dentry *dentry; | ||
3006 | struct inode *inode; | ||
3007 | bool is_dir; | ||
3008 | int dir_nlink_delta; | ||
3009 | |||
3010 | /* entry for "dentry" */ | ||
3011 | struct buffer_head *bh; | ||
3012 | struct ext4_dir_entry_2 *de; | ||
3013 | int inlined; | ||
3014 | |||
3015 | /* entry for ".." in inode if it's a directory */ | ||
3016 | struct buffer_head *dir_bh; | ||
3017 | struct ext4_dir_entry_2 *parent_de; | ||
3018 | int dir_inlined; | ||
3019 | }; | ||
3020 | |||
3021 | static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent) | ||
3022 | { | ||
3023 | int retval; | ||
3024 | |||
3025 | ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode, | ||
3026 | &retval, &ent->parent_de, | ||
3027 | &ent->dir_inlined); | ||
3028 | if (!ent->dir_bh) | ||
3029 | return retval; | ||
3030 | if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino) | ||
3031 | return -EIO; | ||
3032 | BUFFER_TRACE(ent->dir_bh, "get_write_access"); | ||
3033 | return ext4_journal_get_write_access(handle, ent->dir_bh); | ||
3034 | } | ||
3035 | |||
3036 | static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent, | ||
3037 | unsigned dir_ino) | ||
3038 | { | ||
3039 | int retval; | ||
3040 | |||
3041 | ent->parent_de->inode = cpu_to_le32(dir_ino); | ||
3042 | BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata"); | ||
3043 | if (!ent->dir_inlined) { | ||
3044 | if (is_dx(ent->inode)) { | ||
3045 | retval = ext4_handle_dirty_dx_node(handle, | ||
3046 | ent->inode, | ||
3047 | ent->dir_bh); | ||
3048 | } else { | ||
3049 | retval = ext4_handle_dirty_dirent_node(handle, | ||
3050 | ent->inode, | ||
3051 | ent->dir_bh); | ||
3052 | } | ||
3053 | } else { | ||
3054 | retval = ext4_mark_inode_dirty(handle, ent->inode); | ||
3055 | } | ||
3056 | if (retval) { | ||
3057 | ext4_std_error(ent->dir->i_sb, retval); | ||
3058 | return retval; | ||
3059 | } | ||
3060 | return 0; | ||
3061 | } | ||
3062 | |||
3063 | static int ext4_setent(handle_t *handle, struct ext4_renament *ent, | ||
3064 | unsigned ino, unsigned file_type) | ||
3065 | { | ||
3066 | int retval; | ||
3067 | |||
3068 | BUFFER_TRACE(ent->bh, "get write access"); | ||
3069 | retval = ext4_journal_get_write_access(handle, ent->bh); | ||
3070 | if (retval) | ||
3071 | return retval; | ||
3072 | ent->de->inode = cpu_to_le32(ino); | ||
3073 | if (EXT4_HAS_INCOMPAT_FEATURE(ent->dir->i_sb, | ||
3074 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | ||
3075 | ent->de->file_type = file_type; | ||
3076 | ent->dir->i_version++; | ||
3077 | ent->dir->i_ctime = ent->dir->i_mtime = | ||
3078 | ext4_current_time(ent->dir); | ||
3079 | ext4_mark_inode_dirty(handle, ent->dir); | ||
3080 | BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata"); | ||
3081 | if (!ent->inlined) { | ||
3082 | retval = ext4_handle_dirty_dirent_node(handle, | ||
3083 | ent->dir, ent->bh); | ||
3084 | if (unlikely(retval)) { | ||
3085 | ext4_std_error(ent->dir->i_sb, retval); | ||
3086 | return retval; | ||
3087 | } | ||
3088 | } | ||
3089 | brelse(ent->bh); | ||
3090 | ent->bh = NULL; | ||
3091 | |||
3092 | return 0; | ||
3093 | } | ||
3094 | |||
3095 | static int ext4_find_delete_entry(handle_t *handle, struct inode *dir, | ||
3096 | const struct qstr *d_name) | ||
3097 | { | ||
3098 | int retval = -ENOENT; | ||
3099 | struct buffer_head *bh; | ||
3100 | struct ext4_dir_entry_2 *de; | ||
3101 | |||
3102 | bh = ext4_find_entry(dir, d_name, &de, NULL); | ||
3103 | if (bh) { | ||
3104 | retval = ext4_delete_entry(handle, dir, de, bh); | ||
3105 | brelse(bh); | ||
3106 | } | ||
3107 | return retval; | ||
3108 | } | ||
3109 | |||
3110 | static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent) | ||
3111 | { | ||
3112 | int retval; | ||
3113 | /* | ||
3114 | * ent->de could have moved from under us during htree split, so make | ||
3115 | * sure that we are deleting the right entry. We might also be pointing | ||
3116 | * to a stale entry in the unused part of ent->bh so just checking inum | ||
3117 | * and the name isn't enough. | ||
3118 | */ | ||
3119 | if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino || | ||
3120 | ent->de->name_len != ent->dentry->d_name.len || | ||
3121 | strncmp(ent->de->name, ent->dentry->d_name.name, | ||
3122 | ent->de->name_len)) { | ||
3123 | retval = ext4_find_delete_entry(handle, ent->dir, | ||
3124 | &ent->dentry->d_name); | ||
3125 | } else { | ||
3126 | retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh); | ||
3127 | if (retval == -ENOENT) { | ||
3128 | retval = ext4_find_delete_entry(handle, ent->dir, | ||
3129 | &ent->dentry->d_name); | ||
3130 | } | ||
3131 | } | ||
3132 | |||
3133 | if (retval) { | ||
3134 | ext4_warning(ent->dir->i_sb, | ||
3135 | "Deleting old file (%lu), %d, error=%d", | ||
3136 | ent->dir->i_ino, ent->dir->i_nlink, retval); | ||
3137 | } | ||
3138 | } | ||
3139 | |||
3140 | static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent) | ||
3141 | { | ||
3142 | if (ent->dir_nlink_delta) { | ||
3143 | if (ent->dir_nlink_delta == -1) | ||
3144 | ext4_dec_count(handle, ent->dir); | ||
3145 | else | ||
3146 | ext4_inc_count(handle, ent->dir); | ||
3147 | ext4_mark_inode_dirty(handle, ent->dir); | ||
3148 | } | ||
3149 | } | ||
3150 | |||
3003 | /* | 3151 | /* |
3004 | * Anybody can rename anything with this: the permission checks are left to the | 3152 | * Anybody can rename anything with this: the permission checks are left to the |
3005 | * higher-level routines. | 3153 | * higher-level routines. |
@@ -3012,198 +3160,267 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
3012 | struct inode *new_dir, struct dentry *new_dentry) | 3160 | struct inode *new_dir, struct dentry *new_dentry) |
3013 | { | 3161 | { |
3014 | handle_t *handle = NULL; | 3162 | handle_t *handle = NULL; |
3015 | struct inode *old_inode, *new_inode; | 3163 | struct ext4_renament old = { |
3016 | struct buffer_head *old_bh, *new_bh, *dir_bh; | 3164 | .dir = old_dir, |
3017 | struct ext4_dir_entry_2 *old_de, *new_de; | 3165 | .dentry = old_dentry, |
3166 | .inode = old_dentry->d_inode, | ||
3167 | }; | ||
3168 | struct ext4_renament new = { | ||
3169 | .dir = new_dir, | ||
3170 | .dentry = new_dentry, | ||
3171 | .inode = new_dentry->d_inode, | ||
3172 | }; | ||
3018 | int retval; | 3173 | int retval; |
3019 | int inlined = 0, new_inlined = 0; | ||
3020 | struct ext4_dir_entry_2 *parent_de; | ||
3021 | 3174 | ||
3022 | dquot_initialize(old_dir); | 3175 | dquot_initialize(old.dir); |
3023 | dquot_initialize(new_dir); | 3176 | dquot_initialize(new.dir); |
3024 | |||
3025 | old_bh = new_bh = dir_bh = NULL; | ||
3026 | 3177 | ||
3027 | /* Initialize quotas before so that eventual writes go | 3178 | /* Initialize quotas before so that eventual writes go |
3028 | * in separate transaction */ | 3179 | * in separate transaction */ |
3029 | if (new_dentry->d_inode) | 3180 | if (new.inode) |
3030 | dquot_initialize(new_dentry->d_inode); | 3181 | dquot_initialize(new.inode); |
3031 | 3182 | ||
3032 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de, NULL); | 3183 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL); |
3033 | /* | 3184 | /* |
3034 | * Check for inode number is _not_ due to possible IO errors. | 3185 | * Check for inode number is _not_ due to possible IO errors. |
3035 | * We might rmdir the source, keep it as pwd of some process | 3186 | * We might rmdir the source, keep it as pwd of some process |
3036 | * and merrily kill the link to whatever was created under the | 3187 | * and merrily kill the link to whatever was created under the |
3037 | * same name. Goodbye sticky bit ;-< | 3188 | * same name. Goodbye sticky bit ;-< |
3038 | */ | 3189 | */ |
3039 | old_inode = old_dentry->d_inode; | ||
3040 | retval = -ENOENT; | 3190 | retval = -ENOENT; |
3041 | if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino) | 3191 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) |
3042 | goto end_rename; | 3192 | goto end_rename; |
3043 | 3193 | ||
3044 | new_inode = new_dentry->d_inode; | 3194 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, |
3045 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, | 3195 | &new.de, &new.inlined); |
3046 | &new_de, &new_inlined); | 3196 | if (new.bh) { |
3047 | if (new_bh) { | 3197 | if (!new.inode) { |
3048 | if (!new_inode) { | 3198 | brelse(new.bh); |
3049 | brelse(new_bh); | 3199 | new.bh = NULL; |
3050 | new_bh = NULL; | ||
3051 | } | 3200 | } |
3052 | } | 3201 | } |
3053 | if (new_inode && !test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC)) | 3202 | if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) |
3054 | ext4_alloc_da_blocks(old_inode); | 3203 | ext4_alloc_da_blocks(old.inode); |
3055 | 3204 | ||
3056 | handle = ext4_journal_start(old_dir, EXT4_HT_DIR, | 3205 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, |
3057 | (2 * EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) + | 3206 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + |
3058 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | 3207 | EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); |
3059 | if (IS_ERR(handle)) | 3208 | if (IS_ERR(handle)) |
3060 | return PTR_ERR(handle); | 3209 | return PTR_ERR(handle); |
3061 | 3210 | ||
3062 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 3211 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) |
3063 | ext4_handle_sync(handle); | 3212 | ext4_handle_sync(handle); |
3064 | 3213 | ||
3065 | if (S_ISDIR(old_inode->i_mode)) { | 3214 | if (S_ISDIR(old.inode->i_mode)) { |
3066 | if (new_inode) { | 3215 | if (new.inode) { |
3067 | retval = -ENOTEMPTY; | 3216 | retval = -ENOTEMPTY; |
3068 | if (!empty_dir(new_inode)) | 3217 | if (!empty_dir(new.inode)) |
3218 | goto end_rename; | ||
3219 | } else { | ||
3220 | retval = -EMLINK; | ||
3221 | if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir)) | ||
3069 | goto end_rename; | 3222 | goto end_rename; |
3070 | } | 3223 | } |
3071 | retval = -EIO; | 3224 | retval = ext4_rename_dir_prepare(handle, &old); |
3072 | dir_bh = ext4_get_first_dir_block(handle, old_inode, | ||
3073 | &retval, &parent_de, | ||
3074 | &inlined); | ||
3075 | if (!dir_bh) | ||
3076 | goto end_rename; | ||
3077 | if (le32_to_cpu(parent_de->inode) != old_dir->i_ino) | ||
3078 | goto end_rename; | ||
3079 | retval = -EMLINK; | ||
3080 | if (!new_inode && new_dir != old_dir && | ||
3081 | EXT4_DIR_LINK_MAX(new_dir)) | ||
3082 | goto end_rename; | ||
3083 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
3084 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
3085 | if (retval) | 3225 | if (retval) |
3086 | goto end_rename; | 3226 | goto end_rename; |
3087 | } | 3227 | } |
3088 | if (!new_bh) { | 3228 | if (!new.bh) { |
3089 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 3229 | retval = ext4_add_entry(handle, new.dentry, old.inode); |
3090 | if (retval) | 3230 | if (retval) |
3091 | goto end_rename; | 3231 | goto end_rename; |
3092 | } else { | 3232 | } else { |
3093 | BUFFER_TRACE(new_bh, "get write access"); | 3233 | retval = ext4_setent(handle, &new, |
3094 | retval = ext4_journal_get_write_access(handle, new_bh); | 3234 | old.inode->i_ino, old.de->file_type); |
3095 | if (retval) | 3235 | if (retval) |
3096 | goto end_rename; | 3236 | goto end_rename; |
3097 | new_de->inode = cpu_to_le32(old_inode->i_ino); | ||
3098 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | ||
3099 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | ||
3100 | new_de->file_type = old_de->file_type; | ||
3101 | new_dir->i_version++; | ||
3102 | new_dir->i_ctime = new_dir->i_mtime = | ||
3103 | ext4_current_time(new_dir); | ||
3104 | ext4_mark_inode_dirty(handle, new_dir); | ||
3105 | BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata"); | ||
3106 | if (!new_inlined) { | ||
3107 | retval = ext4_handle_dirty_dirent_node(handle, | ||
3108 | new_dir, new_bh); | ||
3109 | if (unlikely(retval)) { | ||
3110 | ext4_std_error(new_dir->i_sb, retval); | ||
3111 | goto end_rename; | ||
3112 | } | ||
3113 | } | ||
3114 | brelse(new_bh); | ||
3115 | new_bh = NULL; | ||
3116 | } | 3237 | } |
3117 | 3238 | ||
3118 | /* | 3239 | /* |
3119 | * Like most other Unix systems, set the ctime for inodes on a | 3240 | * Like most other Unix systems, set the ctime for inodes on a |
3120 | * rename. | 3241 | * rename. |
3121 | */ | 3242 | */ |
3122 | old_inode->i_ctime = ext4_current_time(old_inode); | 3243 | old.inode->i_ctime = ext4_current_time(old.inode); |
3123 | ext4_mark_inode_dirty(handle, old_inode); | 3244 | ext4_mark_inode_dirty(handle, old.inode); |
3124 | 3245 | ||
3125 | /* | 3246 | /* |
3126 | * ok, that's it | 3247 | * ok, that's it |
3127 | */ | 3248 | */ |
3128 | if (le32_to_cpu(old_de->inode) != old_inode->i_ino || | 3249 | ext4_rename_delete(handle, &old); |
3129 | old_de->name_len != old_dentry->d_name.len || | 3250 | |
3130 | strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) || | 3251 | if (new.inode) { |
3131 | (retval = ext4_delete_entry(handle, old_dir, | 3252 | ext4_dec_count(handle, new.inode); |
3132 | old_de, old_bh)) == -ENOENT) { | 3253 | new.inode->i_ctime = ext4_current_time(new.inode); |
3133 | /* old_de could have moved from under us during htree split, so | ||
3134 | * make sure that we are deleting the right entry. We might | ||
3135 | * also be pointing to a stale entry in the unused part of | ||
3136 | * old_bh so just checking inum and the name isn't enough. */ | ||
3137 | struct buffer_head *old_bh2; | ||
3138 | struct ext4_dir_entry_2 *old_de2; | ||
3139 | |||
3140 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, | ||
3141 | &old_de2, NULL); | ||
3142 | if (old_bh2) { | ||
3143 | retval = ext4_delete_entry(handle, old_dir, | ||
3144 | old_de2, old_bh2); | ||
3145 | brelse(old_bh2); | ||
3146 | } | ||
3147 | } | 3254 | } |
3148 | if (retval) { | 3255 | old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir); |
3149 | ext4_warning(old_dir->i_sb, | 3256 | ext4_update_dx_flag(old.dir); |
3150 | "Deleting old file (%lu), %d, error=%d", | 3257 | if (old.dir_bh) { |
3151 | old_dir->i_ino, old_dir->i_nlink, retval); | 3258 | retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); |
3152 | } | 3259 | if (retval) |
3153 | |||
3154 | if (new_inode) { | ||
3155 | ext4_dec_count(handle, new_inode); | ||
3156 | new_inode->i_ctime = ext4_current_time(new_inode); | ||
3157 | } | ||
3158 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | ||
3159 | ext4_update_dx_flag(old_dir); | ||
3160 | if (dir_bh) { | ||
3161 | parent_de->inode = cpu_to_le32(new_dir->i_ino); | ||
3162 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | ||
3163 | if (!inlined) { | ||
3164 | if (is_dx(old_inode)) { | ||
3165 | retval = ext4_handle_dirty_dx_node(handle, | ||
3166 | old_inode, | ||
3167 | dir_bh); | ||
3168 | } else { | ||
3169 | retval = ext4_handle_dirty_dirent_node(handle, | ||
3170 | old_inode, dir_bh); | ||
3171 | } | ||
3172 | } else { | ||
3173 | retval = ext4_mark_inode_dirty(handle, old_inode); | ||
3174 | } | ||
3175 | if (retval) { | ||
3176 | ext4_std_error(old_dir->i_sb, retval); | ||
3177 | goto end_rename; | 3260 | goto end_rename; |
3178 | } | 3261 | |
3179 | ext4_dec_count(handle, old_dir); | 3262 | ext4_dec_count(handle, old.dir); |
3180 | if (new_inode) { | 3263 | if (new.inode) { |
3181 | /* checked empty_dir above, can't have another parent, | 3264 | /* checked empty_dir above, can't have another parent, |
3182 | * ext4_dec_count() won't work for many-linked dirs */ | 3265 | * ext4_dec_count() won't work for many-linked dirs */ |
3183 | clear_nlink(new_inode); | 3266 | clear_nlink(new.inode); |
3184 | } else { | 3267 | } else { |
3185 | ext4_inc_count(handle, new_dir); | 3268 | ext4_inc_count(handle, new.dir); |
3186 | ext4_update_dx_flag(new_dir); | 3269 | ext4_update_dx_flag(new.dir); |
3187 | ext4_mark_inode_dirty(handle, new_dir); | 3270 | ext4_mark_inode_dirty(handle, new.dir); |
3188 | } | 3271 | } |
3189 | } | 3272 | } |
3190 | ext4_mark_inode_dirty(handle, old_dir); | 3273 | ext4_mark_inode_dirty(handle, old.dir); |
3191 | if (new_inode) { | 3274 | if (new.inode) { |
3192 | ext4_mark_inode_dirty(handle, new_inode); | 3275 | ext4_mark_inode_dirty(handle, new.inode); |
3193 | if (!new_inode->i_nlink) | 3276 | if (!new.inode->i_nlink) |
3194 | ext4_orphan_add(handle, new_inode); | 3277 | ext4_orphan_add(handle, new.inode); |
3195 | } | 3278 | } |
3196 | retval = 0; | 3279 | retval = 0; |
3197 | 3280 | ||
3198 | end_rename: | 3281 | end_rename: |
3199 | brelse(dir_bh); | 3282 | brelse(old.dir_bh); |
3200 | brelse(old_bh); | 3283 | brelse(old.bh); |
3201 | brelse(new_bh); | 3284 | brelse(new.bh); |
3202 | if (handle) | 3285 | if (handle) |
3203 | ext4_journal_stop(handle); | 3286 | ext4_journal_stop(handle); |
3204 | return retval; | 3287 | return retval; |
3205 | } | 3288 | } |
3206 | 3289 | ||
3290 | static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
3291 | struct inode *new_dir, struct dentry *new_dentry) | ||
3292 | { | ||
3293 | handle_t *handle = NULL; | ||
3294 | struct ext4_renament old = { | ||
3295 | .dir = old_dir, | ||
3296 | .dentry = old_dentry, | ||
3297 | .inode = old_dentry->d_inode, | ||
3298 | }; | ||
3299 | struct ext4_renament new = { | ||
3300 | .dir = new_dir, | ||
3301 | .dentry = new_dentry, | ||
3302 | .inode = new_dentry->d_inode, | ||
3303 | }; | ||
3304 | u8 new_file_type; | ||
3305 | int retval; | ||
3306 | |||
3307 | dquot_initialize(old.dir); | ||
3308 | dquot_initialize(new.dir); | ||
3309 | |||
3310 | old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, | ||
3311 | &old.de, &old.inlined); | ||
3312 | /* | ||
3313 | * Check for inode number is _not_ due to possible IO errors. | ||
3314 | * We might rmdir the source, keep it as pwd of some process | ||
3315 | * and merrily kill the link to whatever was created under the | ||
3316 | * same name. Goodbye sticky bit ;-< | ||
3317 | */ | ||
3318 | retval = -ENOENT; | ||
3319 | if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino) | ||
3320 | goto end_rename; | ||
3321 | |||
3322 | new.bh = ext4_find_entry(new.dir, &new.dentry->d_name, | ||
3323 | &new.de, &new.inlined); | ||
3324 | |||
3325 | /* RENAME_EXCHANGE case: old *and* new must both exist */ | ||
3326 | if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino) | ||
3327 | goto end_rename; | ||
3328 | |||
3329 | handle = ext4_journal_start(old.dir, EXT4_HT_DIR, | ||
3330 | (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + | ||
3331 | 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); | ||
3332 | if (IS_ERR(handle)) | ||
3333 | return PTR_ERR(handle); | ||
3334 | |||
3335 | if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) | ||
3336 | ext4_handle_sync(handle); | ||
3337 | |||
3338 | if (S_ISDIR(old.inode->i_mode)) { | ||
3339 | old.is_dir = true; | ||
3340 | retval = ext4_rename_dir_prepare(handle, &old); | ||
3341 | if (retval) | ||
3342 | goto end_rename; | ||
3343 | } | ||
3344 | if (S_ISDIR(new.inode->i_mode)) { | ||
3345 | new.is_dir = true; | ||
3346 | retval = ext4_rename_dir_prepare(handle, &new); | ||
3347 | if (retval) | ||
3348 | goto end_rename; | ||
3349 | } | ||
3350 | |||
3351 | /* | ||
3352 | * Other than the special case of overwriting a directory, parents' | ||
3353 | * nlink only needs to be modified if this is a cross directory rename. | ||
3354 | */ | ||
3355 | if (old.dir != new.dir && old.is_dir != new.is_dir) { | ||
3356 | old.dir_nlink_delta = old.is_dir ? -1 : 1; | ||
3357 | new.dir_nlink_delta = -old.dir_nlink_delta; | ||
3358 | retval = -EMLINK; | ||
3359 | if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) || | ||
3360 | (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir))) | ||
3361 | goto end_rename; | ||
3362 | } | ||
3363 | |||
3364 | new_file_type = new.de->file_type; | ||
3365 | retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type); | ||
3366 | if (retval) | ||
3367 | goto end_rename; | ||
3368 | |||
3369 | retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type); | ||
3370 | if (retval) | ||
3371 | goto end_rename; | ||
3372 | |||
3373 | /* | ||
3374 | * Like most other Unix systems, set the ctime for inodes on a | ||
3375 | * rename. | ||
3376 | */ | ||
3377 | old.inode->i_ctime = ext4_current_time(old.inode); | ||
3378 | new.inode->i_ctime = ext4_current_time(new.inode); | ||
3379 | ext4_mark_inode_dirty(handle, old.inode); | ||
3380 | ext4_mark_inode_dirty(handle, new.inode); | ||
3381 | |||
3382 | if (old.dir_bh) { | ||
3383 | retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); | ||
3384 | if (retval) | ||
3385 | goto end_rename; | ||
3386 | } | ||
3387 | if (new.dir_bh) { | ||
3388 | retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino); | ||
3389 | if (retval) | ||
3390 | goto end_rename; | ||
3391 | } | ||
3392 | ext4_update_dir_count(handle, &old); | ||
3393 | ext4_update_dir_count(handle, &new); | ||
3394 | retval = 0; | ||
3395 | |||
3396 | end_rename: | ||
3397 | brelse(old.dir_bh); | ||
3398 | brelse(new.dir_bh); | ||
3399 | brelse(old.bh); | ||
3400 | brelse(new.bh); | ||
3401 | if (handle) | ||
3402 | ext4_journal_stop(handle); | ||
3403 | return retval; | ||
3404 | } | ||
3405 | |||
3406 | static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry, | ||
3407 | struct inode *new_dir, struct dentry *new_dentry, | ||
3408 | unsigned int flags) | ||
3409 | { | ||
3410 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | ||
3411 | return -EINVAL; | ||
3412 | |||
3413 | if (flags & RENAME_EXCHANGE) { | ||
3414 | return ext4_cross_rename(old_dir, old_dentry, | ||
3415 | new_dir, new_dentry); | ||
3416 | } | ||
3417 | /* | ||
3418 | * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" | ||
3419 | * is equivalent to regular rename. | ||
3420 | */ | ||
3421 | return ext4_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
3422 | } | ||
3423 | |||
3207 | /* | 3424 | /* |
3208 | * directories can handle most operations... | 3425 | * directories can handle most operations... |
3209 | */ | 3426 | */ |
@@ -3218,6 +3435,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
3218 | .mknod = ext4_mknod, | 3435 | .mknod = ext4_mknod, |
3219 | .tmpfile = ext4_tmpfile, | 3436 | .tmpfile = ext4_tmpfile, |
3220 | .rename = ext4_rename, | 3437 | .rename = ext4_rename, |
3438 | .rename2 = ext4_rename2, | ||
3221 | .setattr = ext4_setattr, | 3439 | .setattr = ext4_setattr, |
3222 | .setxattr = generic_setxattr, | 3440 | .setxattr = generic_setxattr, |
3223 | .getxattr = generic_getxattr, | 3441 | .getxattr = generic_getxattr, |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 710fed2377d4..f3c667091618 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -59,6 +59,7 @@ static struct kset *ext4_kset; | |||
59 | static struct ext4_lazy_init *ext4_li_info; | 59 | static struct ext4_lazy_init *ext4_li_info; |
60 | static struct mutex ext4_li_mtx; | 60 | static struct mutex ext4_li_mtx; |
61 | static struct ext4_features *ext4_feat; | 61 | static struct ext4_features *ext4_feat; |
62 | static int ext4_mballoc_ready; | ||
62 | 63 | ||
63 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 64 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
64 | unsigned long journal_devnum); | 65 | unsigned long journal_devnum); |
@@ -845,6 +846,10 @@ static void ext4_put_super(struct super_block *sb) | |||
845 | invalidate_bdev(sbi->journal_bdev); | 846 | invalidate_bdev(sbi->journal_bdev); |
846 | ext4_blkdev_remove(sbi); | 847 | ext4_blkdev_remove(sbi); |
847 | } | 848 | } |
849 | if (sbi->s_mb_cache) { | ||
850 | ext4_xattr_destroy_cache(sbi->s_mb_cache); | ||
851 | sbi->s_mb_cache = NULL; | ||
852 | } | ||
848 | if (sbi->s_mmp_tsk) | 853 | if (sbi->s_mmp_tsk) |
849 | kthread_stop(sbi->s_mmp_tsk); | 854 | kthread_stop(sbi->s_mmp_tsk); |
850 | sb->s_fs_info = NULL; | 855 | sb->s_fs_info = NULL; |
@@ -940,7 +945,7 @@ static void init_once(void *foo) | |||
940 | inode_init_once(&ei->vfs_inode); | 945 | inode_init_once(&ei->vfs_inode); |
941 | } | 946 | } |
942 | 947 | ||
943 | static int init_inodecache(void) | 948 | static int __init init_inodecache(void) |
944 | { | 949 | { |
945 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", | 950 | ext4_inode_cachep = kmem_cache_create("ext4_inode_cache", |
946 | sizeof(struct ext4_inode_info), | 951 | sizeof(struct ext4_inode_info), |
@@ -3575,6 +3580,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3575 | "feature flags set on rev 0 fs, " | 3580 | "feature flags set on rev 0 fs, " |
3576 | "running e2fsck is recommended"); | 3581 | "running e2fsck is recommended"); |
3577 | 3582 | ||
3583 | if (es->s_creator_os == cpu_to_le32(EXT4_OS_HURD)) { | ||
3584 | set_opt2(sb, HURD_COMPAT); | ||
3585 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, | ||
3586 | EXT4_FEATURE_INCOMPAT_64BIT)) { | ||
3587 | ext4_msg(sb, KERN_ERR, | ||
3588 | "The Hurd can't support 64-bit file systems"); | ||
3589 | goto failed_mount; | ||
3590 | } | ||
3591 | } | ||
3592 | |||
3578 | if (IS_EXT2_SB(sb)) { | 3593 | if (IS_EXT2_SB(sb)) { |
3579 | if (ext2_feature_set_ok(sb)) | 3594 | if (ext2_feature_set_ok(sb)) |
3580 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " | 3595 | ext4_msg(sb, KERN_INFO, "mounting ext2 file system " |
@@ -4010,6 +4025,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
4010 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); | 4025 | percpu_counter_set(&sbi->s_dirtyclusters_counter, 0); |
4011 | 4026 | ||
4012 | no_journal: | 4027 | no_journal: |
4028 | if (ext4_mballoc_ready) { | ||
4029 | sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id); | ||
4030 | if (!sbi->s_mb_cache) { | ||
4031 | ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache"); | ||
4032 | goto failed_mount_wq; | ||
4033 | } | ||
4034 | } | ||
4035 | |||
4013 | /* | 4036 | /* |
4014 | * Get the # of file system overhead blocks from the | 4037 | * Get the # of file system overhead blocks from the |
4015 | * superblock if present. | 4038 | * superblock if present. |
@@ -4835,6 +4858,9 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) | |||
4835 | } | 4858 | } |
4836 | 4859 | ||
4837 | if (*flags & MS_RDONLY) { | 4860 | if (*flags & MS_RDONLY) { |
4861 | err = sync_filesystem(sb); | ||
4862 | if (err < 0) | ||
4863 | goto restore_opts; | ||
4838 | err = dquot_suspend(sb, -1); | 4864 | err = dquot_suspend(sb, -1); |
4839 | if (err < 0) | 4865 | if (err < 0) |
4840 | goto restore_opts; | 4866 | goto restore_opts; |
@@ -5516,11 +5542,9 @@ static int __init ext4_init_fs(void) | |||
5516 | 5542 | ||
5517 | err = ext4_init_mballoc(); | 5543 | err = ext4_init_mballoc(); |
5518 | if (err) | 5544 | if (err) |
5519 | goto out3; | ||
5520 | |||
5521 | err = ext4_init_xattr(); | ||
5522 | if (err) | ||
5523 | goto out2; | 5545 | goto out2; |
5546 | else | ||
5547 | ext4_mballoc_ready = 1; | ||
5524 | err = init_inodecache(); | 5548 | err = init_inodecache(); |
5525 | if (err) | 5549 | if (err) |
5526 | goto out1; | 5550 | goto out1; |
@@ -5536,10 +5560,9 @@ out: | |||
5536 | unregister_as_ext3(); | 5560 | unregister_as_ext3(); |
5537 | destroy_inodecache(); | 5561 | destroy_inodecache(); |
5538 | out1: | 5562 | out1: |
5539 | ext4_exit_xattr(); | 5563 | ext4_mballoc_ready = 0; |
5540 | out2: | ||
5541 | ext4_exit_mballoc(); | 5564 | ext4_exit_mballoc(); |
5542 | out3: | 5565 | out2: |
5543 | ext4_exit_feat_adverts(); | 5566 | ext4_exit_feat_adverts(); |
5544 | out4: | 5567 | out4: |
5545 | if (ext4_proc_root) | 5568 | if (ext4_proc_root) |
@@ -5562,7 +5585,6 @@ static void __exit ext4_exit_fs(void) | |||
5562 | unregister_as_ext3(); | 5585 | unregister_as_ext3(); |
5563 | unregister_filesystem(&ext4_fs_type); | 5586 | unregister_filesystem(&ext4_fs_type); |
5564 | destroy_inodecache(); | 5587 | destroy_inodecache(); |
5565 | ext4_exit_xattr(); | ||
5566 | ext4_exit_mballoc(); | 5588 | ext4_exit_mballoc(); |
5567 | ext4_exit_feat_adverts(); | 5589 | ext4_exit_feat_adverts(); |
5568 | remove_proc_entry("fs/ext4", NULL); | 5590 | remove_proc_entry("fs/ext4", NULL); |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index e175e94116ac..1f5cf5880718 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -81,7 +81,7 @@ | |||
81 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) | 81 | # define ea_bdebug(bh, fmt, ...) no_printk(fmt, ##__VA_ARGS__) |
82 | #endif | 82 | #endif |
83 | 83 | ||
84 | static void ext4_xattr_cache_insert(struct buffer_head *); | 84 | static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *); |
85 | static struct buffer_head *ext4_xattr_cache_find(struct inode *, | 85 | static struct buffer_head *ext4_xattr_cache_find(struct inode *, |
86 | struct ext4_xattr_header *, | 86 | struct ext4_xattr_header *, |
87 | struct mb_cache_entry **); | 87 | struct mb_cache_entry **); |
@@ -90,8 +90,6 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *, | |||
90 | static int ext4_xattr_list(struct dentry *dentry, char *buffer, | 90 | static int ext4_xattr_list(struct dentry *dentry, char *buffer, |
91 | size_t buffer_size); | 91 | size_t buffer_size); |
92 | 92 | ||
93 | static struct mb_cache *ext4_xattr_cache; | ||
94 | |||
95 | static const struct xattr_handler *ext4_xattr_handler_map[] = { | 93 | static const struct xattr_handler *ext4_xattr_handler_map[] = { |
96 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 94 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
97 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 95 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
@@ -117,6 +115,9 @@ const struct xattr_handler *ext4_xattr_handlers[] = { | |||
117 | NULL | 115 | NULL |
118 | }; | 116 | }; |
119 | 117 | ||
118 | #define EXT4_GET_MB_CACHE(inode) (((struct ext4_sb_info *) \ | ||
119 | inode->i_sb->s_fs_info)->s_mb_cache) | ||
120 | |||
120 | static __le32 ext4_xattr_block_csum(struct inode *inode, | 121 | static __le32 ext4_xattr_block_csum(struct inode *inode, |
121 | sector_t block_nr, | 122 | sector_t block_nr, |
122 | struct ext4_xattr_header *hdr) | 123 | struct ext4_xattr_header *hdr) |
@@ -265,6 +266,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name, | |||
265 | struct ext4_xattr_entry *entry; | 266 | struct ext4_xattr_entry *entry; |
266 | size_t size; | 267 | size_t size; |
267 | int error; | 268 | int error; |
269 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
268 | 270 | ||
269 | ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", | 271 | ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld", |
270 | name_index, name, buffer, (long)buffer_size); | 272 | name_index, name, buffer, (long)buffer_size); |
@@ -286,7 +288,7 @@ bad_block: | |||
286 | error = -EIO; | 288 | error = -EIO; |
287 | goto cleanup; | 289 | goto cleanup; |
288 | } | 290 | } |
289 | ext4_xattr_cache_insert(bh); | 291 | ext4_xattr_cache_insert(ext4_mb_cache, bh); |
290 | entry = BFIRST(bh); | 292 | entry = BFIRST(bh); |
291 | error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); | 293 | error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1); |
292 | if (error == -EIO) | 294 | if (error == -EIO) |
@@ -409,6 +411,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
409 | struct inode *inode = dentry->d_inode; | 411 | struct inode *inode = dentry->d_inode; |
410 | struct buffer_head *bh = NULL; | 412 | struct buffer_head *bh = NULL; |
411 | int error; | 413 | int error; |
414 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
412 | 415 | ||
413 | ea_idebug(inode, "buffer=%p, buffer_size=%ld", | 416 | ea_idebug(inode, "buffer=%p, buffer_size=%ld", |
414 | buffer, (long)buffer_size); | 417 | buffer, (long)buffer_size); |
@@ -430,7 +433,7 @@ ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
430 | error = -EIO; | 433 | error = -EIO; |
431 | goto cleanup; | 434 | goto cleanup; |
432 | } | 435 | } |
433 | ext4_xattr_cache_insert(bh); | 436 | ext4_xattr_cache_insert(ext4_mb_cache, bh); |
434 | error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); | 437 | error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size); |
435 | 438 | ||
436 | cleanup: | 439 | cleanup: |
@@ -526,8 +529,9 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode, | |||
526 | { | 529 | { |
527 | struct mb_cache_entry *ce = NULL; | 530 | struct mb_cache_entry *ce = NULL; |
528 | int error = 0; | 531 | int error = 0; |
532 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
529 | 533 | ||
530 | ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr); | 534 | ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); |
531 | error = ext4_journal_get_write_access(handle, bh); | 535 | error = ext4_journal_get_write_access(handle, bh); |
532 | if (error) | 536 | if (error) |
533 | goto out; | 537 | goto out; |
@@ -567,12 +571,13 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last, | |||
567 | size_t *min_offs, void *base, int *total) | 571 | size_t *min_offs, void *base, int *total) |
568 | { | 572 | { |
569 | for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { | 573 | for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) { |
570 | *total += EXT4_XATTR_LEN(last->e_name_len); | ||
571 | if (!last->e_value_block && last->e_value_size) { | 574 | if (!last->e_value_block && last->e_value_size) { |
572 | size_t offs = le16_to_cpu(last->e_value_offs); | 575 | size_t offs = le16_to_cpu(last->e_value_offs); |
573 | if (offs < *min_offs) | 576 | if (offs < *min_offs) |
574 | *min_offs = offs; | 577 | *min_offs = offs; |
575 | } | 578 | } |
579 | if (total) | ||
580 | *total += EXT4_XATTR_LEN(last->e_name_len); | ||
576 | } | 581 | } |
577 | return (*min_offs - ((void *)last - base) - sizeof(__u32)); | 582 | return (*min_offs - ((void *)last - base) - sizeof(__u32)); |
578 | } | 583 | } |
@@ -745,13 +750,14 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
745 | struct ext4_xattr_search *s = &bs->s; | 750 | struct ext4_xattr_search *s = &bs->s; |
746 | struct mb_cache_entry *ce = NULL; | 751 | struct mb_cache_entry *ce = NULL; |
747 | int error = 0; | 752 | int error = 0; |
753 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
748 | 754 | ||
749 | #define header(x) ((struct ext4_xattr_header *)(x)) | 755 | #define header(x) ((struct ext4_xattr_header *)(x)) |
750 | 756 | ||
751 | if (i->value && i->value_len > sb->s_blocksize) | 757 | if (i->value && i->value_len > sb->s_blocksize) |
752 | return -ENOSPC; | 758 | return -ENOSPC; |
753 | if (s->base) { | 759 | if (s->base) { |
754 | ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev, | 760 | ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, |
755 | bs->bh->b_blocknr); | 761 | bs->bh->b_blocknr); |
756 | error = ext4_journal_get_write_access(handle, bs->bh); | 762 | error = ext4_journal_get_write_access(handle, bs->bh); |
757 | if (error) | 763 | if (error) |
@@ -769,7 +775,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
769 | if (!IS_LAST_ENTRY(s->first)) | 775 | if (!IS_LAST_ENTRY(s->first)) |
770 | ext4_xattr_rehash(header(s->base), | 776 | ext4_xattr_rehash(header(s->base), |
771 | s->here); | 777 | s->here); |
772 | ext4_xattr_cache_insert(bs->bh); | 778 | ext4_xattr_cache_insert(ext4_mb_cache, |
779 | bs->bh); | ||
773 | } | 780 | } |
774 | unlock_buffer(bs->bh); | 781 | unlock_buffer(bs->bh); |
775 | if (error == -EIO) | 782 | if (error == -EIO) |
@@ -905,7 +912,7 @@ getblk_failed: | |||
905 | memcpy(new_bh->b_data, s->base, new_bh->b_size); | 912 | memcpy(new_bh->b_data, s->base, new_bh->b_size); |
906 | set_buffer_uptodate(new_bh); | 913 | set_buffer_uptodate(new_bh); |
907 | unlock_buffer(new_bh); | 914 | unlock_buffer(new_bh); |
908 | ext4_xattr_cache_insert(new_bh); | 915 | ext4_xattr_cache_insert(ext4_mb_cache, new_bh); |
909 | error = ext4_handle_dirty_xattr_block(handle, | 916 | error = ext4_handle_dirty_xattr_block(handle, |
910 | inode, new_bh); | 917 | inode, new_bh); |
911 | if (error) | 918 | if (error) |
@@ -1228,7 +1235,7 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
1228 | struct ext4_xattr_block_find *bs = NULL; | 1235 | struct ext4_xattr_block_find *bs = NULL; |
1229 | char *buffer = NULL, *b_entry_name = NULL; | 1236 | char *buffer = NULL, *b_entry_name = NULL; |
1230 | size_t min_offs, free; | 1237 | size_t min_offs, free; |
1231 | int total_ino, total_blk; | 1238 | int total_ino; |
1232 | void *base, *start, *end; | 1239 | void *base, *start, *end; |
1233 | int extra_isize = 0, error = 0, tried_min_extra_isize = 0; | 1240 | int extra_isize = 0, error = 0, tried_min_extra_isize = 0; |
1234 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); | 1241 | int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize); |
@@ -1286,8 +1293,7 @@ retry: | |||
1286 | first = BFIRST(bh); | 1293 | first = BFIRST(bh); |
1287 | end = bh->b_data + bh->b_size; | 1294 | end = bh->b_data + bh->b_size; |
1288 | min_offs = end - base; | 1295 | min_offs = end - base; |
1289 | free = ext4_xattr_free_space(first, &min_offs, base, | 1296 | free = ext4_xattr_free_space(first, &min_offs, base, NULL); |
1290 | &total_blk); | ||
1291 | if (free < new_extra_isize) { | 1297 | if (free < new_extra_isize) { |
1292 | if (!tried_min_extra_isize && s_min_extra_isize) { | 1298 | if (!tried_min_extra_isize && s_min_extra_isize) { |
1293 | tried_min_extra_isize++; | 1299 | tried_min_extra_isize++; |
@@ -1495,13 +1501,13 @@ ext4_xattr_put_super(struct super_block *sb) | |||
1495 | * Returns 0, or a negative error number on failure. | 1501 | * Returns 0, or a negative error number on failure. |
1496 | */ | 1502 | */ |
1497 | static void | 1503 | static void |
1498 | ext4_xattr_cache_insert(struct buffer_head *bh) | 1504 | ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh) |
1499 | { | 1505 | { |
1500 | __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); | 1506 | __u32 hash = le32_to_cpu(BHDR(bh)->h_hash); |
1501 | struct mb_cache_entry *ce; | 1507 | struct mb_cache_entry *ce; |
1502 | int error; | 1508 | int error; |
1503 | 1509 | ||
1504 | ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS); | 1510 | ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS); |
1505 | if (!ce) { | 1511 | if (!ce) { |
1506 | ea_bdebug(bh, "out of memory"); | 1512 | ea_bdebug(bh, "out of memory"); |
1507 | return; | 1513 | return; |
@@ -1573,12 +1579,13 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header, | |||
1573 | { | 1579 | { |
1574 | __u32 hash = le32_to_cpu(header->h_hash); | 1580 | __u32 hash = le32_to_cpu(header->h_hash); |
1575 | struct mb_cache_entry *ce; | 1581 | struct mb_cache_entry *ce; |
1582 | struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); | ||
1576 | 1583 | ||
1577 | if (!header->h_hash) | 1584 | if (!header->h_hash) |
1578 | return NULL; /* never share */ | 1585 | return NULL; /* never share */ |
1579 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); | 1586 | ea_idebug(inode, "looking for cached blocks [%x]", (int)hash); |
1580 | again: | 1587 | again: |
1581 | ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev, | 1588 | ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev, |
1582 | hash); | 1589 | hash); |
1583 | while (ce) { | 1590 | while (ce) { |
1584 | struct buffer_head *bh; | 1591 | struct buffer_head *bh; |
@@ -1676,19 +1683,17 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header, | |||
1676 | 1683 | ||
1677 | #undef BLOCK_HASH_SHIFT | 1684 | #undef BLOCK_HASH_SHIFT |
1678 | 1685 | ||
1679 | int __init | 1686 | #define HASH_BUCKET_BITS 10 |
1680 | ext4_init_xattr(void) | 1687 | |
1688 | struct mb_cache * | ||
1689 | ext4_xattr_create_cache(char *name) | ||
1681 | { | 1690 | { |
1682 | ext4_xattr_cache = mb_cache_create("ext4_xattr", 6); | 1691 | return mb_cache_create(name, HASH_BUCKET_BITS); |
1683 | if (!ext4_xattr_cache) | ||
1684 | return -ENOMEM; | ||
1685 | return 0; | ||
1686 | } | 1692 | } |
1687 | 1693 | ||
1688 | void | 1694 | void ext4_xattr_destroy_cache(struct mb_cache *cache) |
1689 | ext4_exit_xattr(void) | ||
1690 | { | 1695 | { |
1691 | if (ext4_xattr_cache) | 1696 | if (cache) |
1692 | mb_cache_destroy(ext4_xattr_cache); | 1697 | mb_cache_destroy(cache); |
1693 | ext4_xattr_cache = NULL; | ||
1694 | } | 1698 | } |
1699 | |||
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 819d6398833f..29bedf5589f6 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
@@ -110,9 +110,6 @@ extern void ext4_xattr_put_super(struct super_block *); | |||
110 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | 110 | extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, |
111 | struct ext4_inode *raw_inode, handle_t *handle); | 111 | struct ext4_inode *raw_inode, handle_t *handle); |
112 | 112 | ||
113 | extern int __init ext4_init_xattr(void); | ||
114 | extern void ext4_exit_xattr(void); | ||
115 | |||
116 | extern const struct xattr_handler *ext4_xattr_handlers[]; | 113 | extern const struct xattr_handler *ext4_xattr_handlers[]; |
117 | 114 | ||
118 | extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, | 115 | extern int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, |
@@ -124,6 +121,9 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode, | |||
124 | struct ext4_xattr_info *i, | 121 | struct ext4_xattr_info *i, |
125 | struct ext4_xattr_ibody_find *is); | 122 | struct ext4_xattr_ibody_find *is); |
126 | 123 | ||
124 | extern struct mb_cache *ext4_xattr_create_cache(char *name); | ||
125 | extern void ext4_xattr_destroy_cache(struct mb_cache *); | ||
126 | |||
127 | #ifdef CONFIG_EXT4_FS_SECURITY | 127 | #ifdef CONFIG_EXT4_FS_SECURITY |
128 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 128 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
129 | struct inode *dir, const struct qstr *qstr); | 129 | struct inode *dir, const struct qstr *qstr); |
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index fa8da4cb8c4b..e93e4ec7d165 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
@@ -174,7 +174,7 @@ struct posix_acl *f2fs_get_acl(struct inode *inode, int type) | |||
174 | 174 | ||
175 | retval = f2fs_getxattr(inode, name_index, "", NULL, 0); | 175 | retval = f2fs_getxattr(inode, name_index, "", NULL, 0); |
176 | if (retval > 0) { | 176 | if (retval > 0) { |
177 | value = kmalloc(retval, GFP_KERNEL); | 177 | value = kmalloc(retval, GFP_F2FS_ZERO); |
178 | if (!value) | 178 | if (!value) |
179 | return ERR_PTR(-ENOMEM); | 179 | return ERR_PTR(-ENOMEM); |
180 | retval = f2fs_getxattr(inode, name_index, "", value, retval); | 180 | retval = f2fs_getxattr(inode, name_index, "", value, retval); |
@@ -203,6 +203,12 @@ static int __f2fs_set_acl(struct inode *inode, int type, | |||
203 | size_t size = 0; | 203 | size_t size = 0; |
204 | int error; | 204 | int error; |
205 | 205 | ||
206 | if (acl) { | ||
207 | error = posix_acl_valid(acl); | ||
208 | if (error < 0) | ||
209 | return error; | ||
210 | } | ||
211 | |||
206 | switch (type) { | 212 | switch (type) { |
207 | case ACL_TYPE_ACCESS: | 213 | case ACL_TYPE_ACCESS: |
208 | name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; | 214 | name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS; |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 293d0486a40f..4aa521aa9bc3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
@@ -33,14 +33,12 @@ struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index) | |||
33 | struct address_space *mapping = META_MAPPING(sbi); | 33 | struct address_space *mapping = META_MAPPING(sbi); |
34 | struct page *page = NULL; | 34 | struct page *page = NULL; |
35 | repeat: | 35 | repeat: |
36 | page = grab_cache_page(mapping, index); | 36 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); |
37 | if (!page) { | 37 | if (!page) { |
38 | cond_resched(); | 38 | cond_resched(); |
39 | goto repeat; | 39 | goto repeat; |
40 | } | 40 | } |
41 | 41 | ||
42 | /* We wait writeback only inside grab_meta_page() */ | ||
43 | wait_on_page_writeback(page); | ||
44 | SetPageUptodate(page); | 42 | SetPageUptodate(page); |
45 | return page; | 43 | return page; |
46 | } | 44 | } |
@@ -75,23 +73,102 @@ out: | |||
75 | return page; | 73 | return page; |
76 | } | 74 | } |
77 | 75 | ||
76 | inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type) | ||
77 | { | ||
78 | switch (type) { | ||
79 | case META_NAT: | ||
80 | return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK; | ||
81 | case META_SIT: | ||
82 | return SIT_BLK_CNT(sbi); | ||
83 | case META_SSA: | ||
84 | case META_CP: | ||
85 | return 0; | ||
86 | default: | ||
87 | BUG(); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * Readahead CP/NAT/SIT/SSA pages | ||
93 | */ | ||
94 | int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type) | ||
95 | { | ||
96 | block_t prev_blk_addr = 0; | ||
97 | struct page *page; | ||
98 | int blkno = start; | ||
99 | int max_blks = get_max_meta_blks(sbi, type); | ||
100 | |||
101 | struct f2fs_io_info fio = { | ||
102 | .type = META, | ||
103 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
104 | }; | ||
105 | |||
106 | for (; nrpages-- > 0; blkno++) { | ||
107 | block_t blk_addr; | ||
108 | |||
109 | switch (type) { | ||
110 | case META_NAT: | ||
111 | /* get nat block addr */ | ||
112 | if (unlikely(blkno >= max_blks)) | ||
113 | blkno = 0; | ||
114 | blk_addr = current_nat_addr(sbi, | ||
115 | blkno * NAT_ENTRY_PER_BLOCK); | ||
116 | break; | ||
117 | case META_SIT: | ||
118 | /* get sit block addr */ | ||
119 | if (unlikely(blkno >= max_blks)) | ||
120 | goto out; | ||
121 | blk_addr = current_sit_addr(sbi, | ||
122 | blkno * SIT_ENTRY_PER_BLOCK); | ||
123 | if (blkno != start && prev_blk_addr + 1 != blk_addr) | ||
124 | goto out; | ||
125 | prev_blk_addr = blk_addr; | ||
126 | break; | ||
127 | case META_SSA: | ||
128 | case META_CP: | ||
129 | /* get ssa/cp block addr */ | ||
130 | blk_addr = blkno; | ||
131 | break; | ||
132 | default: | ||
133 | BUG(); | ||
134 | } | ||
135 | |||
136 | page = grab_cache_page(META_MAPPING(sbi), blk_addr); | ||
137 | if (!page) | ||
138 | continue; | ||
139 | if (PageUptodate(page)) { | ||
140 | mark_page_accessed(page); | ||
141 | f2fs_put_page(page, 1); | ||
142 | continue; | ||
143 | } | ||
144 | |||
145 | f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); | ||
146 | mark_page_accessed(page); | ||
147 | f2fs_put_page(page, 0); | ||
148 | } | ||
149 | out: | ||
150 | f2fs_submit_merged_bio(sbi, META, READ); | ||
151 | return blkno - start; | ||
152 | } | ||
153 | |||
78 | static int f2fs_write_meta_page(struct page *page, | 154 | static int f2fs_write_meta_page(struct page *page, |
79 | struct writeback_control *wbc) | 155 | struct writeback_control *wbc) |
80 | { | 156 | { |
81 | struct inode *inode = page->mapping->host; | 157 | struct inode *inode = page->mapping->host; |
82 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 158 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
83 | 159 | ||
84 | /* Should not write any meta pages, if any IO error was occurred */ | 160 | if (unlikely(sbi->por_doing)) |
85 | if (unlikely(sbi->por_doing || | ||
86 | is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) | ||
87 | goto redirty_out; | 161 | goto redirty_out; |
88 | |||
89 | if (wbc->for_reclaim) | 162 | if (wbc->for_reclaim) |
90 | goto redirty_out; | 163 | goto redirty_out; |
91 | 164 | ||
92 | wait_on_page_writeback(page); | 165 | /* Should not write any meta pages, if any IO error was occurred */ |
166 | if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) | ||
167 | goto no_write; | ||
93 | 168 | ||
169 | f2fs_wait_on_page_writeback(page, META); | ||
94 | write_meta_page(sbi, page); | 170 | write_meta_page(sbi, page); |
171 | no_write: | ||
95 | dec_page_count(sbi, F2FS_DIRTY_META); | 172 | dec_page_count(sbi, F2FS_DIRTY_META); |
96 | unlock_page(page); | 173 | unlock_page(page); |
97 | return 0; | 174 | return 0; |
@@ -99,6 +176,7 @@ static int f2fs_write_meta_page(struct page *page, | |||
99 | redirty_out: | 176 | redirty_out: |
100 | dec_page_count(sbi, F2FS_DIRTY_META); | 177 | dec_page_count(sbi, F2FS_DIRTY_META); |
101 | wbc->pages_skipped++; | 178 | wbc->pages_skipped++; |
179 | account_page_redirty(page); | ||
102 | set_page_dirty(page); | 180 | set_page_dirty(page); |
103 | return AOP_WRITEPAGE_ACTIVATE; | 181 | return AOP_WRITEPAGE_ACTIVATE; |
104 | } | 182 | } |
@@ -107,21 +185,23 @@ static int f2fs_write_meta_pages(struct address_space *mapping, | |||
107 | struct writeback_control *wbc) | 185 | struct writeback_control *wbc) |
108 | { | 186 | { |
109 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 187 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); |
110 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 188 | long diff, written; |
111 | long written; | ||
112 | |||
113 | if (wbc->for_kupdate) | ||
114 | return 0; | ||
115 | 189 | ||
116 | /* collect a number of dirty meta pages and write together */ | 190 | /* collect a number of dirty meta pages and write together */ |
117 | if (get_pages(sbi, F2FS_DIRTY_META) < nrpages) | 191 | if (wbc->for_kupdate || |
118 | return 0; | 192 | get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) |
193 | goto skip_write; | ||
119 | 194 | ||
120 | /* if mounting is failed, skip writing node pages */ | 195 | /* if mounting is failed, skip writing node pages */ |
121 | mutex_lock(&sbi->cp_mutex); | 196 | mutex_lock(&sbi->cp_mutex); |
122 | written = sync_meta_pages(sbi, META, nrpages); | 197 | diff = nr_pages_to_write(sbi, META, wbc); |
198 | written = sync_meta_pages(sbi, META, wbc->nr_to_write); | ||
123 | mutex_unlock(&sbi->cp_mutex); | 199 | mutex_unlock(&sbi->cp_mutex); |
124 | wbc->nr_to_write -= written; | 200 | wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff); |
201 | return 0; | ||
202 | |||
203 | skip_write: | ||
204 | wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META); | ||
125 | return 0; | 205 | return 0; |
126 | } | 206 | } |
127 | 207 | ||
@@ -148,10 +228,22 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type, | |||
148 | 228 | ||
149 | for (i = 0; i < nr_pages; i++) { | 229 | for (i = 0; i < nr_pages; i++) { |
150 | struct page *page = pvec.pages[i]; | 230 | struct page *page = pvec.pages[i]; |
231 | |||
151 | lock_page(page); | 232 | lock_page(page); |
152 | f2fs_bug_on(page->mapping != mapping); | 233 | |
153 | f2fs_bug_on(!PageDirty(page)); | 234 | if (unlikely(page->mapping != mapping)) { |
154 | clear_page_dirty_for_io(page); | 235 | continue_unlock: |
236 | unlock_page(page); | ||
237 | continue; | ||
238 | } | ||
239 | if (!PageDirty(page)) { | ||
240 | /* someone wrote it for us */ | ||
241 | goto continue_unlock; | ||
242 | } | ||
243 | |||
244 | if (!clear_page_dirty_for_io(page)) | ||
245 | goto continue_unlock; | ||
246 | |||
155 | if (f2fs_write_meta_page(page, &wbc)) { | 247 | if (f2fs_write_meta_page(page, &wbc)) { |
156 | unlock_page(page); | 248 | unlock_page(page); |
157 | break; | 249 | break; |
@@ -216,16 +308,15 @@ void release_orphan_inode(struct f2fs_sb_info *sbi) | |||
216 | 308 | ||
217 | void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | 309 | void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) |
218 | { | 310 | { |
219 | struct list_head *head, *this; | 311 | struct list_head *head; |
220 | struct orphan_inode_entry *new = NULL, *orphan = NULL; | 312 | struct orphan_inode_entry *new, *orphan; |
221 | 313 | ||
222 | new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); | 314 | new = f2fs_kmem_cache_alloc(orphan_entry_slab, GFP_ATOMIC); |
223 | new->ino = ino; | 315 | new->ino = ino; |
224 | 316 | ||
225 | spin_lock(&sbi->orphan_inode_lock); | 317 | spin_lock(&sbi->orphan_inode_lock); |
226 | head = &sbi->orphan_inode_list; | 318 | head = &sbi->orphan_inode_list; |
227 | list_for_each(this, head) { | 319 | list_for_each_entry(orphan, head, list) { |
228 | orphan = list_entry(this, struct orphan_inode_entry, list); | ||
229 | if (orphan->ino == ino) { | 320 | if (orphan->ino == ino) { |
230 | spin_unlock(&sbi->orphan_inode_lock); | 321 | spin_unlock(&sbi->orphan_inode_lock); |
231 | kmem_cache_free(orphan_entry_slab, new); | 322 | kmem_cache_free(orphan_entry_slab, new); |
@@ -234,14 +325,10 @@ void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
234 | 325 | ||
235 | if (orphan->ino > ino) | 326 | if (orphan->ino > ino) |
236 | break; | 327 | break; |
237 | orphan = NULL; | ||
238 | } | 328 | } |
239 | 329 | ||
240 | /* add new_oentry into list which is sorted by inode number */ | 330 | /* add new orphan entry into list which is sorted by inode number */ |
241 | if (orphan) | 331 | list_add_tail(&new->list, &orphan->list); |
242 | list_add(&new->list, this->prev); | ||
243 | else | ||
244 | list_add_tail(&new->list, head); | ||
245 | spin_unlock(&sbi->orphan_inode_lock); | 332 | spin_unlock(&sbi->orphan_inode_lock); |
246 | } | 333 | } |
247 | 334 | ||
@@ -255,10 +342,11 @@ void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) | |||
255 | list_for_each_entry(orphan, head, list) { | 342 | list_for_each_entry(orphan, head, list) { |
256 | if (orphan->ino == ino) { | 343 | if (orphan->ino == ino) { |
257 | list_del(&orphan->list); | 344 | list_del(&orphan->list); |
258 | kmem_cache_free(orphan_entry_slab, orphan); | ||
259 | f2fs_bug_on(sbi->n_orphans == 0); | 345 | f2fs_bug_on(sbi->n_orphans == 0); |
260 | sbi->n_orphans--; | 346 | sbi->n_orphans--; |
261 | break; | 347 | spin_unlock(&sbi->orphan_inode_lock); |
348 | kmem_cache_free(orphan_entry_slab, orphan); | ||
349 | return; | ||
262 | } | 350 | } |
263 | } | 351 | } |
264 | spin_unlock(&sbi->orphan_inode_lock); | 352 | spin_unlock(&sbi->orphan_inode_lock); |
@@ -285,6 +373,8 @@ void recover_orphan_inodes(struct f2fs_sb_info *sbi) | |||
285 | start_blk = __start_cp_addr(sbi) + 1; | 373 | start_blk = __start_cp_addr(sbi) + 1; |
286 | orphan_blkaddr = __start_sum_addr(sbi) - 1; | 374 | orphan_blkaddr = __start_sum_addr(sbi) - 1; |
287 | 375 | ||
376 | ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP); | ||
377 | |||
288 | for (i = 0; i < orphan_blkaddr; i++) { | 378 | for (i = 0; i < orphan_blkaddr; i++) { |
289 | struct page *page = get_meta_page(sbi, start_blk + i); | 379 | struct page *page = get_meta_page(sbi, start_blk + i); |
290 | struct f2fs_orphan_block *orphan_blk; | 380 | struct f2fs_orphan_block *orphan_blk; |
@@ -466,14 +556,12 @@ static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) | |||
466 | { | 556 | { |
467 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 557 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
468 | struct list_head *head = &sbi->dir_inode_list; | 558 | struct list_head *head = &sbi->dir_inode_list; |
469 | struct list_head *this; | 559 | struct dir_inode_entry *entry; |
470 | 560 | ||
471 | list_for_each(this, head) { | 561 | list_for_each_entry(entry, head, list) |
472 | struct dir_inode_entry *entry; | ||
473 | entry = list_entry(this, struct dir_inode_entry, list); | ||
474 | if (unlikely(entry->inode == inode)) | 562 | if (unlikely(entry->inode == inode)) |
475 | return -EEXIST; | 563 | return -EEXIST; |
476 | } | 564 | |
477 | list_add_tail(&new->list, head); | 565 | list_add_tail(&new->list, head); |
478 | stat_inc_dirty_dir(sbi); | 566 | stat_inc_dirty_dir(sbi); |
479 | return 0; | 567 | return 0; |
@@ -483,6 +571,7 @@ void set_dirty_dir_page(struct inode *inode, struct page *page) | |||
483 | { | 571 | { |
484 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 572 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
485 | struct dir_inode_entry *new; | 573 | struct dir_inode_entry *new; |
574 | int ret = 0; | ||
486 | 575 | ||
487 | if (!S_ISDIR(inode->i_mode)) | 576 | if (!S_ISDIR(inode->i_mode)) |
488 | return; | 577 | return; |
@@ -492,13 +581,13 @@ void set_dirty_dir_page(struct inode *inode, struct page *page) | |||
492 | INIT_LIST_HEAD(&new->list); | 581 | INIT_LIST_HEAD(&new->list); |
493 | 582 | ||
494 | spin_lock(&sbi->dir_inode_lock); | 583 | spin_lock(&sbi->dir_inode_lock); |
495 | if (__add_dirty_inode(inode, new)) | 584 | ret = __add_dirty_inode(inode, new); |
496 | kmem_cache_free(inode_entry_slab, new); | ||
497 | |||
498 | inc_page_count(sbi, F2FS_DIRTY_DENTS); | ||
499 | inode_inc_dirty_dents(inode); | 585 | inode_inc_dirty_dents(inode); |
500 | SetPagePrivate(page); | 586 | SetPagePrivate(page); |
501 | spin_unlock(&sbi->dir_inode_lock); | 587 | spin_unlock(&sbi->dir_inode_lock); |
588 | |||
589 | if (ret) | ||
590 | kmem_cache_free(inode_entry_slab, new); | ||
502 | } | 591 | } |
503 | 592 | ||
504 | void add_dirty_dir_inode(struct inode *inode) | 593 | void add_dirty_dir_inode(struct inode *inode) |
@@ -506,44 +595,47 @@ void add_dirty_dir_inode(struct inode *inode) | |||
506 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 595 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
507 | struct dir_inode_entry *new = | 596 | struct dir_inode_entry *new = |
508 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | 597 | f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS); |
598 | int ret = 0; | ||
509 | 599 | ||
510 | new->inode = inode; | 600 | new->inode = inode; |
511 | INIT_LIST_HEAD(&new->list); | 601 | INIT_LIST_HEAD(&new->list); |
512 | 602 | ||
513 | spin_lock(&sbi->dir_inode_lock); | 603 | spin_lock(&sbi->dir_inode_lock); |
514 | if (__add_dirty_inode(inode, new)) | 604 | ret = __add_dirty_inode(inode, new); |
515 | kmem_cache_free(inode_entry_slab, new); | ||
516 | spin_unlock(&sbi->dir_inode_lock); | 605 | spin_unlock(&sbi->dir_inode_lock); |
606 | |||
607 | if (ret) | ||
608 | kmem_cache_free(inode_entry_slab, new); | ||
517 | } | 609 | } |
518 | 610 | ||
519 | void remove_dirty_dir_inode(struct inode *inode) | 611 | void remove_dirty_dir_inode(struct inode *inode) |
520 | { | 612 | { |
521 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 613 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
522 | 614 | struct list_head *head; | |
523 | struct list_head *this, *head; | 615 | struct dir_inode_entry *entry; |
524 | 616 | ||
525 | if (!S_ISDIR(inode->i_mode)) | 617 | if (!S_ISDIR(inode->i_mode)) |
526 | return; | 618 | return; |
527 | 619 | ||
528 | spin_lock(&sbi->dir_inode_lock); | 620 | spin_lock(&sbi->dir_inode_lock); |
529 | if (atomic_read(&F2FS_I(inode)->dirty_dents)) { | 621 | if (get_dirty_dents(inode)) { |
530 | spin_unlock(&sbi->dir_inode_lock); | 622 | spin_unlock(&sbi->dir_inode_lock); |
531 | return; | 623 | return; |
532 | } | 624 | } |
533 | 625 | ||
534 | head = &sbi->dir_inode_list; | 626 | head = &sbi->dir_inode_list; |
535 | list_for_each(this, head) { | 627 | list_for_each_entry(entry, head, list) { |
536 | struct dir_inode_entry *entry; | ||
537 | entry = list_entry(this, struct dir_inode_entry, list); | ||
538 | if (entry->inode == inode) { | 628 | if (entry->inode == inode) { |
539 | list_del(&entry->list); | 629 | list_del(&entry->list); |
540 | kmem_cache_free(inode_entry_slab, entry); | ||
541 | stat_dec_dirty_dir(sbi); | 630 | stat_dec_dirty_dir(sbi); |
542 | break; | 631 | spin_unlock(&sbi->dir_inode_lock); |
632 | kmem_cache_free(inode_entry_slab, entry); | ||
633 | goto done; | ||
543 | } | 634 | } |
544 | } | 635 | } |
545 | spin_unlock(&sbi->dir_inode_lock); | 636 | spin_unlock(&sbi->dir_inode_lock); |
546 | 637 | ||
638 | done: | ||
547 | /* Only from the recovery routine */ | 639 | /* Only from the recovery routine */ |
548 | if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { | 640 | if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { |
549 | clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); | 641 | clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); |
@@ -554,15 +646,14 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
554 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | 646 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) |
555 | { | 647 | { |
556 | 648 | ||
557 | struct list_head *this, *head; | 649 | struct list_head *head; |
558 | struct inode *inode = NULL; | 650 | struct inode *inode = NULL; |
651 | struct dir_inode_entry *entry; | ||
559 | 652 | ||
560 | spin_lock(&sbi->dir_inode_lock); | 653 | spin_lock(&sbi->dir_inode_lock); |
561 | 654 | ||
562 | head = &sbi->dir_inode_list; | 655 | head = &sbi->dir_inode_list; |
563 | list_for_each(this, head) { | 656 | list_for_each_entry(entry, head, list) { |
564 | struct dir_inode_entry *entry; | ||
565 | entry = list_entry(this, struct dir_inode_entry, list); | ||
566 | if (entry->inode->i_ino == ino) { | 657 | if (entry->inode->i_ino == ino) { |
567 | inode = entry->inode; | 658 | inode = entry->inode; |
568 | break; | 659 | break; |
@@ -589,7 +680,7 @@ retry: | |||
589 | inode = igrab(entry->inode); | 680 | inode = igrab(entry->inode); |
590 | spin_unlock(&sbi->dir_inode_lock); | 681 | spin_unlock(&sbi->dir_inode_lock); |
591 | if (inode) { | 682 | if (inode) { |
592 | filemap_flush(inode->i_mapping); | 683 | filemap_fdatawrite(inode->i_mapping); |
593 | iput(inode); | 684 | iput(inode); |
594 | } else { | 685 | } else { |
595 | /* | 686 | /* |
@@ -824,6 +915,7 @@ void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
824 | unblock_operations(sbi); | 915 | unblock_operations(sbi); |
825 | mutex_unlock(&sbi->cp_mutex); | 916 | mutex_unlock(&sbi->cp_mutex); |
826 | 917 | ||
918 | stat_inc_cp_count(sbi->stat_info); | ||
827 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); | 919 | trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint"); |
828 | } | 920 | } |
829 | 921 | ||
@@ -845,11 +937,11 @@ void init_orphan_info(struct f2fs_sb_info *sbi) | |||
845 | int __init create_checkpoint_caches(void) | 937 | int __init create_checkpoint_caches(void) |
846 | { | 938 | { |
847 | orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", | 939 | orphan_entry_slab = f2fs_kmem_cache_create("f2fs_orphan_entry", |
848 | sizeof(struct orphan_inode_entry), NULL); | 940 | sizeof(struct orphan_inode_entry)); |
849 | if (!orphan_entry_slab) | 941 | if (!orphan_entry_slab) |
850 | return -ENOMEM; | 942 | return -ENOMEM; |
851 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", | 943 | inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry", |
852 | sizeof(struct dir_inode_entry), NULL); | 944 | sizeof(struct dir_inode_entry)); |
853 | if (!inode_entry_slab) { | 945 | if (!inode_entry_slab) { |
854 | kmem_cache_destroy(orphan_entry_slab); | 946 | kmem_cache_destroy(orphan_entry_slab); |
855 | return -ENOMEM; | 947 | return -ENOMEM; |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2261ccdd0b5f..45abd60e2bff 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
@@ -45,7 +45,7 @@ static void f2fs_read_end_io(struct bio *bio, int err) | |||
45 | 45 | ||
46 | static void f2fs_write_end_io(struct bio *bio, int err) | 46 | static void f2fs_write_end_io(struct bio *bio, int err) |
47 | { | 47 | { |
48 | struct f2fs_sb_info *sbi = F2FS_SB(bio->bi_io_vec->bv_page->mapping->host->i_sb); | 48 | struct f2fs_sb_info *sbi = bio->bi_private; |
49 | struct bio_vec *bvec; | 49 | struct bio_vec *bvec; |
50 | int i; | 50 | int i; |
51 | 51 | ||
@@ -55,15 +55,16 @@ static void f2fs_write_end_io(struct bio *bio, int err) | |||
55 | if (unlikely(err)) { | 55 | if (unlikely(err)) { |
56 | SetPageError(page); | 56 | SetPageError(page); |
57 | set_bit(AS_EIO, &page->mapping->flags); | 57 | set_bit(AS_EIO, &page->mapping->flags); |
58 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | 58 | f2fs_stop_checkpoint(sbi); |
59 | sbi->sb->s_flags |= MS_RDONLY; | ||
60 | } | 59 | } |
61 | end_page_writeback(page); | 60 | end_page_writeback(page); |
62 | dec_page_count(sbi, F2FS_WRITEBACK); | 61 | dec_page_count(sbi, F2FS_WRITEBACK); |
63 | } | 62 | } |
64 | 63 | ||
65 | if (bio->bi_private) | 64 | if (sbi->wait_io) { |
66 | complete(bio->bi_private); | 65 | complete(sbi->wait_io); |
66 | sbi->wait_io = NULL; | ||
67 | } | ||
67 | 68 | ||
68 | if (!get_pages(sbi, F2FS_WRITEBACK) && | 69 | if (!get_pages(sbi, F2FS_WRITEBACK) && |
69 | !list_empty(&sbi->cp_wait.task_list)) | 70 | !list_empty(&sbi->cp_wait.task_list)) |
@@ -86,6 +87,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr, | |||
86 | bio->bi_bdev = sbi->sb->s_bdev; | 87 | bio->bi_bdev = sbi->sb->s_bdev; |
87 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); | 88 | bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr); |
88 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; | 89 | bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io; |
90 | bio->bi_private = sbi; | ||
89 | 91 | ||
90 | return bio; | 92 | return bio; |
91 | } | 93 | } |
@@ -113,7 +115,7 @@ static void __submit_merged_bio(struct f2fs_bio_info *io) | |||
113 | */ | 115 | */ |
114 | if (fio->type == META_FLUSH) { | 116 | if (fio->type == META_FLUSH) { |
115 | DECLARE_COMPLETION_ONSTACK(wait); | 117 | DECLARE_COMPLETION_ONSTACK(wait); |
116 | io->bio->bi_private = &wait; | 118 | io->sbi->wait_io = &wait; |
117 | submit_bio(rw, io->bio); | 119 | submit_bio(rw, io->bio); |
118 | wait_for_completion(&wait); | 120 | wait_for_completion(&wait); |
119 | } else { | 121 | } else { |
@@ -132,7 +134,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, | |||
132 | 134 | ||
133 | io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; | 135 | io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype]; |
134 | 136 | ||
135 | mutex_lock(&io->io_mutex); | 137 | down_write(&io->io_rwsem); |
136 | 138 | ||
137 | /* change META to META_FLUSH in the checkpoint procedure */ | 139 | /* change META to META_FLUSH in the checkpoint procedure */ |
138 | if (type >= META_FLUSH) { | 140 | if (type >= META_FLUSH) { |
@@ -140,7 +142,7 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, | |||
140 | io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; | 142 | io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO; |
141 | } | 143 | } |
142 | __submit_merged_bio(io); | 144 | __submit_merged_bio(io); |
143 | mutex_unlock(&io->io_mutex); | 145 | up_write(&io->io_rwsem); |
144 | } | 146 | } |
145 | 147 | ||
146 | /* | 148 | /* |
@@ -178,7 +180,7 @@ void f2fs_submit_page_mbio(struct f2fs_sb_info *sbi, struct page *page, | |||
178 | 180 | ||
179 | verify_block_addr(sbi, blk_addr); | 181 | verify_block_addr(sbi, blk_addr); |
180 | 182 | ||
181 | mutex_lock(&io->io_mutex); | 183 | down_write(&io->io_rwsem); |
182 | 184 | ||
183 | if (!is_read) | 185 | if (!is_read) |
184 | inc_page_count(sbi, F2FS_WRITEBACK); | 186 | inc_page_count(sbi, F2FS_WRITEBACK); |
@@ -202,7 +204,7 @@ alloc_new: | |||
202 | 204 | ||
203 | io->last_block_in_bio = blk_addr; | 205 | io->last_block_in_bio = blk_addr; |
204 | 206 | ||
205 | mutex_unlock(&io->io_mutex); | 207 | up_write(&io->io_rwsem); |
206 | trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); | 208 | trace_f2fs_submit_page_mbio(page, fio->rw, fio->type, blk_addr); |
207 | } | 209 | } |
208 | 210 | ||
@@ -797,48 +799,36 @@ static int f2fs_write_data_page(struct page *page, | |||
797 | */ | 799 | */ |
798 | offset = i_size & (PAGE_CACHE_SIZE - 1); | 800 | offset = i_size & (PAGE_CACHE_SIZE - 1); |
799 | if ((page->index >= end_index + 1) || !offset) { | 801 | if ((page->index >= end_index + 1) || !offset) { |
800 | if (S_ISDIR(inode->i_mode)) { | 802 | inode_dec_dirty_dents(inode); |
801 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | ||
802 | inode_dec_dirty_dents(inode); | ||
803 | } | ||
804 | goto out; | 803 | goto out; |
805 | } | 804 | } |
806 | 805 | ||
807 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); | 806 | zero_user_segment(page, offset, PAGE_CACHE_SIZE); |
808 | write: | 807 | write: |
809 | if (unlikely(sbi->por_doing)) { | 808 | if (unlikely(sbi->por_doing)) |
810 | err = AOP_WRITEPAGE_ACTIVATE; | ||
811 | goto redirty_out; | 809 | goto redirty_out; |
812 | } | ||
813 | 810 | ||
814 | /* Dentry blocks are controlled by checkpoint */ | 811 | /* Dentry blocks are controlled by checkpoint */ |
815 | if (S_ISDIR(inode->i_mode)) { | 812 | if (S_ISDIR(inode->i_mode)) { |
816 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | ||
817 | inode_dec_dirty_dents(inode); | 813 | inode_dec_dirty_dents(inode); |
818 | err = do_write_data_page(page, &fio); | 814 | err = do_write_data_page(page, &fio); |
819 | } else { | 815 | goto done; |
820 | f2fs_lock_op(sbi); | 816 | } |
821 | |||
822 | if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) { | ||
823 | err = f2fs_write_inline_data(inode, page, offset); | ||
824 | f2fs_unlock_op(sbi); | ||
825 | goto out; | ||
826 | } else { | ||
827 | err = do_write_data_page(page, &fio); | ||
828 | } | ||
829 | 817 | ||
830 | f2fs_unlock_op(sbi); | 818 | if (!wbc->for_reclaim) |
831 | need_balance_fs = true; | 819 | need_balance_fs = true; |
832 | } | 820 | else if (has_not_enough_free_secs(sbi, 0)) |
833 | if (err == -ENOENT) | ||
834 | goto out; | ||
835 | else if (err) | ||
836 | goto redirty_out; | 821 | goto redirty_out; |
837 | 822 | ||
838 | if (wbc->for_reclaim) { | 823 | f2fs_lock_op(sbi); |
839 | f2fs_submit_merged_bio(sbi, DATA, WRITE); | 824 | if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode)) |
840 | need_balance_fs = false; | 825 | err = f2fs_write_inline_data(inode, page, offset); |
841 | } | 826 | else |
827 | err = do_write_data_page(page, &fio); | ||
828 | f2fs_unlock_op(sbi); | ||
829 | done: | ||
830 | if (err && err != -ENOENT) | ||
831 | goto redirty_out; | ||
842 | 832 | ||
843 | clear_cold_data(page); | 833 | clear_cold_data(page); |
844 | out: | 834 | out: |
@@ -849,12 +839,11 @@ out: | |||
849 | 839 | ||
850 | redirty_out: | 840 | redirty_out: |
851 | wbc->pages_skipped++; | 841 | wbc->pages_skipped++; |
842 | account_page_redirty(page); | ||
852 | set_page_dirty(page); | 843 | set_page_dirty(page); |
853 | return err; | 844 | return AOP_WRITEPAGE_ACTIVATE; |
854 | } | 845 | } |
855 | 846 | ||
856 | #define MAX_DESIRED_PAGES_WP 4096 | ||
857 | |||
858 | static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, | 847 | static int __f2fs_writepage(struct page *page, struct writeback_control *wbc, |
859 | void *data) | 848 | void *data) |
860 | { | 849 | { |
@@ -871,17 +860,17 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
871 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 860 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
872 | bool locked = false; | 861 | bool locked = false; |
873 | int ret; | 862 | int ret; |
874 | long excess_nrtw = 0, desired_nrtw; | 863 | long diff; |
875 | 864 | ||
876 | /* deal with chardevs and other special file */ | 865 | /* deal with chardevs and other special file */ |
877 | if (!mapping->a_ops->writepage) | 866 | if (!mapping->a_ops->writepage) |
878 | return 0; | 867 | return 0; |
879 | 868 | ||
880 | if (wbc->nr_to_write < MAX_DESIRED_PAGES_WP) { | 869 | if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE && |
881 | desired_nrtw = MAX_DESIRED_PAGES_WP; | 870 | get_dirty_dents(inode) < nr_pages_to_skip(sbi, DATA)) |
882 | excess_nrtw = desired_nrtw - wbc->nr_to_write; | 871 | goto skip_write; |
883 | wbc->nr_to_write = desired_nrtw; | 872 | |
884 | } | 873 | diff = nr_pages_to_write(sbi, DATA, wbc); |
885 | 874 | ||
886 | if (!S_ISDIR(inode->i_mode)) { | 875 | if (!S_ISDIR(inode->i_mode)) { |
887 | mutex_lock(&sbi->writepages); | 876 | mutex_lock(&sbi->writepages); |
@@ -895,8 +884,12 @@ static int f2fs_write_data_pages(struct address_space *mapping, | |||
895 | 884 | ||
896 | remove_dirty_dir_inode(inode); | 885 | remove_dirty_dir_inode(inode); |
897 | 886 | ||
898 | wbc->nr_to_write -= excess_nrtw; | 887 | wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); |
899 | return ret; | 888 | return ret; |
889 | |||
890 | skip_write: | ||
891 | wbc->pages_skipped += get_dirty_dents(inode); | ||
892 | return 0; | ||
900 | } | 893 | } |
901 | 894 | ||
902 | static int f2fs_write_begin(struct file *file, struct address_space *mapping, | 895 | static int f2fs_write_begin(struct file *file, struct address_space *mapping, |
@@ -949,13 +942,19 @@ inline_data: | |||
949 | if (dn.data_blkaddr == NEW_ADDR) { | 942 | if (dn.data_blkaddr == NEW_ADDR) { |
950 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); | 943 | zero_user_segment(page, 0, PAGE_CACHE_SIZE); |
951 | } else { | 944 | } else { |
952 | if (f2fs_has_inline_data(inode)) | 945 | if (f2fs_has_inline_data(inode)) { |
953 | err = f2fs_read_inline_data(inode, page); | 946 | err = f2fs_read_inline_data(inode, page); |
954 | else | 947 | if (err) { |
948 | page_cache_release(page); | ||
949 | return err; | ||
950 | } | ||
951 | } else { | ||
955 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, | 952 | err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr, |
956 | READ_SYNC); | 953 | READ_SYNC); |
957 | if (err) | 954 | if (err) |
958 | return err; | 955 | return err; |
956 | } | ||
957 | |||
959 | lock_page(page); | 958 | lock_page(page); |
960 | if (unlikely(!PageUptodate(page))) { | 959 | if (unlikely(!PageUptodate(page))) { |
961 | f2fs_put_page(page, 1); | 960 | f2fs_put_page(page, 1); |
@@ -1031,11 +1030,8 @@ static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, | |||
1031 | unsigned int length) | 1030 | unsigned int length) |
1032 | { | 1031 | { |
1033 | struct inode *inode = page->mapping->host; | 1032 | struct inode *inode = page->mapping->host; |
1034 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 1033 | if (PageDirty(page)) |
1035 | if (S_ISDIR(inode->i_mode) && PageDirty(page)) { | ||
1036 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | ||
1037 | inode_dec_dirty_dents(inode); | 1034 | inode_dec_dirty_dents(inode); |
1038 | } | ||
1039 | ClearPagePrivate(page); | 1035 | ClearPagePrivate(page); |
1040 | } | 1036 | } |
1041 | 1037 | ||
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 3de9d20d0c14..b52c12cf5873 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
@@ -86,7 +86,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
86 | { | 86 | { |
87 | struct f2fs_stat_info *si = F2FS_STAT(sbi); | 87 | struct f2fs_stat_info *si = F2FS_STAT(sbi); |
88 | unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; | 88 | unsigned int blks_per_sec, hblks_per_sec, total_vblocks, bimodal, dist; |
89 | struct sit_info *sit_i = SIT_I(sbi); | ||
90 | unsigned int segno, vblocks; | 89 | unsigned int segno, vblocks; |
91 | int ndirty = 0; | 90 | int ndirty = 0; |
92 | 91 | ||
@@ -94,7 +93,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
94 | total_vblocks = 0; | 93 | total_vblocks = 0; |
95 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); | 94 | blks_per_sec = sbi->segs_per_sec * (1 << sbi->log_blocks_per_seg); |
96 | hblks_per_sec = blks_per_sec / 2; | 95 | hblks_per_sec = blks_per_sec / 2; |
97 | mutex_lock(&sit_i->sentry_lock); | ||
98 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { | 96 | for (segno = 0; segno < TOTAL_SEGS(sbi); segno += sbi->segs_per_sec) { |
99 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); | 97 | vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); |
100 | dist = abs(vblocks - hblks_per_sec); | 98 | dist = abs(vblocks - hblks_per_sec); |
@@ -105,7 +103,6 @@ static void update_sit_info(struct f2fs_sb_info *sbi) | |||
105 | ndirty++; | 103 | ndirty++; |
106 | } | 104 | } |
107 | } | 105 | } |
108 | mutex_unlock(&sit_i->sentry_lock); | ||
109 | dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; | 106 | dist = TOTAL_SECS(sbi) * hblks_per_sec * hblks_per_sec / 100; |
110 | si->bimodal = bimodal / dist; | 107 | si->bimodal = bimodal / dist; |
111 | if (si->dirty_count) | 108 | if (si->dirty_count) |
@@ -236,6 +233,7 @@ static int stat_show(struct seq_file *s, void *v) | |||
236 | si->dirty_count); | 233 | si->dirty_count); |
237 | seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", | 234 | seq_printf(s, " - Prefree: %d\n - Free: %d (%d)\n\n", |
238 | si->prefree_count, si->free_segs, si->free_secs); | 235 | si->prefree_count, si->free_segs, si->free_secs); |
236 | seq_printf(s, "CP calls: %d\n", si->cp_count); | ||
239 | seq_printf(s, "GC calls: %d (BG: %d)\n", | 237 | seq_printf(s, "GC calls: %d (BG: %d)\n", |
240 | si->call_count, si->bg_gc); | 238 | si->call_count, si->bg_gc); |
241 | seq_printf(s, " - data segments : %d\n", si->data_segs); | 239 | seq_printf(s, " - data segments : %d\n", si->data_segs); |
@@ -252,10 +250,10 @@ static int stat_show(struct seq_file *s, void *v) | |||
252 | si->ndirty_dent, si->ndirty_dirs); | 250 | si->ndirty_dent, si->ndirty_dirs); |
253 | seq_printf(s, " - meta: %4d in %4d\n", | 251 | seq_printf(s, " - meta: %4d in %4d\n", |
254 | si->ndirty_meta, si->meta_pages); | 252 | si->ndirty_meta, si->meta_pages); |
255 | seq_printf(s, " - NATs: %5d > %lu\n", | 253 | seq_printf(s, " - NATs: %9d\n - SITs: %9d\n", |
256 | si->nats, NM_WOUT_THRESHOLD); | 254 | si->nats, si->sits); |
257 | seq_printf(s, " - SITs: %5d\n - free_nids: %5d\n", | 255 | seq_printf(s, " - free_nids: %9d\n", |
258 | si->sits, si->fnids); | 256 | si->fnids); |
259 | seq_puts(s, "\nDistribution of User Blocks:"); | 257 | seq_puts(s, "\nDistribution of User Blocks:"); |
260 | seq_puts(s, " [ valid | invalid | free ]\n"); | 258 | seq_puts(s, " [ valid | invalid | free ]\n"); |
261 | seq_puts(s, " ["); | 259 | seq_puts(s, " ["); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 2b7c255bcbdf..972fd0ef230f 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
@@ -21,12 +21,12 @@ static unsigned long dir_blocks(struct inode *inode) | |||
21 | >> PAGE_CACHE_SHIFT; | 21 | >> PAGE_CACHE_SHIFT; |
22 | } | 22 | } |
23 | 23 | ||
24 | static unsigned int dir_buckets(unsigned int level) | 24 | static unsigned int dir_buckets(unsigned int level, int dir_level) |
25 | { | 25 | { |
26 | if (level < MAX_DIR_HASH_DEPTH / 2) | 26 | if (level < MAX_DIR_HASH_DEPTH / 2) |
27 | return 1 << level; | 27 | return 1 << (level + dir_level); |
28 | else | 28 | else |
29 | return 1 << ((MAX_DIR_HASH_DEPTH / 2) - 1); | 29 | return 1 << ((MAX_DIR_HASH_DEPTH / 2 + dir_level) - 1); |
30 | } | 30 | } |
31 | 31 | ||
32 | static unsigned int bucket_blocks(unsigned int level) | 32 | static unsigned int bucket_blocks(unsigned int level) |
@@ -65,13 +65,14 @@ static void set_de_type(struct f2fs_dir_entry *de, struct inode *inode) | |||
65 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; | 65 | de->file_type = f2fs_type_by_mode[(mode & S_IFMT) >> S_SHIFT]; |
66 | } | 66 | } |
67 | 67 | ||
68 | static unsigned long dir_block_index(unsigned int level, unsigned int idx) | 68 | static unsigned long dir_block_index(unsigned int level, |
69 | int dir_level, unsigned int idx) | ||
69 | { | 70 | { |
70 | unsigned long i; | 71 | unsigned long i; |
71 | unsigned long bidx = 0; | 72 | unsigned long bidx = 0; |
72 | 73 | ||
73 | for (i = 0; i < level; i++) | 74 | for (i = 0; i < level; i++) |
74 | bidx += dir_buckets(i) * bucket_blocks(i); | 75 | bidx += dir_buckets(i, dir_level) * bucket_blocks(i); |
75 | bidx += idx * bucket_blocks(level); | 76 | bidx += idx * bucket_blocks(level); |
76 | return bidx; | 77 | return bidx; |
77 | } | 78 | } |
@@ -93,16 +94,21 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, | |||
93 | f2fs_hash_t namehash, struct page **res_page) | 94 | f2fs_hash_t namehash, struct page **res_page) |
94 | { | 95 | { |
95 | struct f2fs_dir_entry *de; | 96 | struct f2fs_dir_entry *de; |
96 | unsigned long bit_pos, end_pos, next_pos; | 97 | unsigned long bit_pos = 0; |
97 | struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); | 98 | struct f2fs_dentry_block *dentry_blk = kmap(dentry_page); |
98 | int slots; | 99 | const void *dentry_bits = &dentry_blk->dentry_bitmap; |
100 | int max_len = 0; | ||
99 | 101 | ||
100 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | ||
101 | NR_DENTRY_IN_BLOCK, 0); | ||
102 | while (bit_pos < NR_DENTRY_IN_BLOCK) { | 102 | while (bit_pos < NR_DENTRY_IN_BLOCK) { |
103 | if (!test_bit_le(bit_pos, dentry_bits)) { | ||
104 | if (bit_pos == 0) | ||
105 | max_len = 1; | ||
106 | else if (!test_bit_le(bit_pos - 1, dentry_bits)) | ||
107 | max_len++; | ||
108 | bit_pos++; | ||
109 | continue; | ||
110 | } | ||
103 | de = &dentry_blk->dentry[bit_pos]; | 111 | de = &dentry_blk->dentry[bit_pos]; |
104 | slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | ||
105 | |||
106 | if (early_match_name(name, namelen, namehash, de)) { | 112 | if (early_match_name(name, namelen, namehash, de)) { |
107 | if (!memcmp(dentry_blk->filename[bit_pos], | 113 | if (!memcmp(dentry_blk->filename[bit_pos], |
108 | name, namelen)) { | 114 | name, namelen)) { |
@@ -110,20 +116,18 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, | |||
110 | goto found; | 116 | goto found; |
111 | } | 117 | } |
112 | } | 118 | } |
113 | next_pos = bit_pos + slots; | 119 | if (max_len > *max_slots) { |
114 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | 120 | *max_slots = max_len; |
115 | NR_DENTRY_IN_BLOCK, next_pos); | 121 | max_len = 0; |
116 | if (bit_pos >= NR_DENTRY_IN_BLOCK) | 122 | } |
117 | end_pos = NR_DENTRY_IN_BLOCK; | 123 | bit_pos += GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
118 | else | ||
119 | end_pos = bit_pos; | ||
120 | if (*max_slots < end_pos - next_pos) | ||
121 | *max_slots = end_pos - next_pos; | ||
122 | } | 124 | } |
123 | 125 | ||
124 | de = NULL; | 126 | de = NULL; |
125 | kunmap(dentry_page); | 127 | kunmap(dentry_page); |
126 | found: | 128 | found: |
129 | if (max_len > *max_slots) | ||
130 | *max_slots = max_len; | ||
127 | return de; | 131 | return de; |
128 | } | 132 | } |
129 | 133 | ||
@@ -141,10 +145,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, | |||
141 | 145 | ||
142 | f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); | 146 | f2fs_bug_on(level > MAX_DIR_HASH_DEPTH); |
143 | 147 | ||
144 | nbucket = dir_buckets(level); | 148 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); |
145 | nblock = bucket_blocks(level); | 149 | nblock = bucket_blocks(level); |
146 | 150 | ||
147 | bidx = dir_block_index(level, le32_to_cpu(namehash) % nbucket); | 151 | bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, |
152 | le32_to_cpu(namehash) % nbucket); | ||
148 | end_block = bidx + nblock; | 153 | end_block = bidx + nblock; |
149 | 154 | ||
150 | for (; bidx < end_block; bidx++) { | 155 | for (; bidx < end_block; bidx++) { |
@@ -248,7 +253,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
248 | struct page *page, struct inode *inode) | 253 | struct page *page, struct inode *inode) |
249 | { | 254 | { |
250 | lock_page(page); | 255 | lock_page(page); |
251 | wait_on_page_writeback(page); | 256 | f2fs_wait_on_page_writeback(page, DATA); |
252 | de->ino = cpu_to_le32(inode->i_ino); | 257 | de->ino = cpu_to_le32(inode->i_ino); |
253 | set_de_type(de, inode); | 258 | set_de_type(de, inode); |
254 | kunmap(page); | 259 | kunmap(page); |
@@ -347,14 +352,11 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
347 | err = f2fs_init_security(inode, dir, name, page); | 352 | err = f2fs_init_security(inode, dir, name, page); |
348 | if (err) | 353 | if (err) |
349 | goto put_error; | 354 | goto put_error; |
350 | |||
351 | wait_on_page_writeback(page); | ||
352 | } else { | 355 | } else { |
353 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | 356 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); |
354 | if (IS_ERR(page)) | 357 | if (IS_ERR(page)) |
355 | return page; | 358 | return page; |
356 | 359 | ||
357 | wait_on_page_writeback(page); | ||
358 | set_cold_node(inode, page); | 360 | set_cold_node(inode, page); |
359 | } | 361 | } |
360 | 362 | ||
@@ -372,6 +374,10 @@ static struct page *init_inode_metadata(struct inode *inode, | |||
372 | 374 | ||
373 | put_error: | 375 | put_error: |
374 | f2fs_put_page(page, 1); | 376 | f2fs_put_page(page, 1); |
377 | /* once the failed inode becomes a bad inode, i_mode is S_IFREG */ | ||
378 | truncate_inode_pages(&inode->i_data, 0); | ||
379 | truncate_blocks(inode, 0); | ||
380 | remove_dirty_dir_inode(inode); | ||
375 | error: | 381 | error: |
376 | remove_inode_page(inode); | 382 | remove_inode_page(inode); |
377 | return ERR_PTR(err); | 383 | return ERR_PTR(err); |
@@ -395,9 +401,6 @@ static void update_parent_metadata(struct inode *dir, struct inode *inode, | |||
395 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | 401 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
396 | } | 402 | } |
397 | 403 | ||
398 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) | ||
399 | update_inode_page(dir); | ||
400 | |||
401 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) | 404 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) |
402 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 405 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
403 | } | 406 | } |
@@ -464,10 +467,11 @@ start: | |||
464 | if (level == current_depth) | 467 | if (level == current_depth) |
465 | ++current_depth; | 468 | ++current_depth; |
466 | 469 | ||
467 | nbucket = dir_buckets(level); | 470 | nbucket = dir_buckets(level, F2FS_I(dir)->i_dir_level); |
468 | nblock = bucket_blocks(level); | 471 | nblock = bucket_blocks(level); |
469 | 472 | ||
470 | bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); | 473 | bidx = dir_block_index(level, F2FS_I(dir)->i_dir_level, |
474 | (le32_to_cpu(dentry_hash) % nbucket)); | ||
471 | 475 | ||
472 | for (block = bidx; block <= (bidx + nblock - 1); block++) { | 476 | for (block = bidx; block <= (bidx + nblock - 1); block++) { |
473 | dentry_page = get_new_data_page(dir, NULL, block, true); | 477 | dentry_page = get_new_data_page(dir, NULL, block, true); |
@@ -487,8 +491,9 @@ start: | |||
487 | ++level; | 491 | ++level; |
488 | goto start; | 492 | goto start; |
489 | add_dentry: | 493 | add_dentry: |
490 | wait_on_page_writeback(dentry_page); | 494 | f2fs_wait_on_page_writeback(dentry_page, DATA); |
491 | 495 | ||
496 | down_write(&F2FS_I(inode)->i_sem); | ||
492 | page = init_inode_metadata(inode, dir, name); | 497 | page = init_inode_metadata(inode, dir, name); |
493 | if (IS_ERR(page)) { | 498 | if (IS_ERR(page)) { |
494 | err = PTR_ERR(page); | 499 | err = PTR_ERR(page); |
@@ -511,7 +516,12 @@ add_dentry: | |||
511 | 516 | ||
512 | update_parent_metadata(dir, inode, current_depth); | 517 | update_parent_metadata(dir, inode, current_depth); |
513 | fail: | 518 | fail: |
514 | clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | 519 | up_write(&F2FS_I(inode)->i_sem); |
520 | |||
521 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) { | ||
522 | update_inode_page(dir); | ||
523 | clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | ||
524 | } | ||
515 | kunmap(dentry_page); | 525 | kunmap(dentry_page); |
516 | f2fs_put_page(dentry_page, 1); | 526 | f2fs_put_page(dentry_page, 1); |
517 | return err; | 527 | return err; |
@@ -528,13 +538,12 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
528 | unsigned int bit_pos; | 538 | unsigned int bit_pos; |
529 | struct address_space *mapping = page->mapping; | 539 | struct address_space *mapping = page->mapping; |
530 | struct inode *dir = mapping->host; | 540 | struct inode *dir = mapping->host; |
531 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | ||
532 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); | 541 | int slots = GET_DENTRY_SLOTS(le16_to_cpu(dentry->name_len)); |
533 | void *kaddr = page_address(page); | 542 | void *kaddr = page_address(page); |
534 | int i; | 543 | int i; |
535 | 544 | ||
536 | lock_page(page); | 545 | lock_page(page); |
537 | wait_on_page_writeback(page); | 546 | f2fs_wait_on_page_writeback(page, DATA); |
538 | 547 | ||
539 | dentry_blk = (struct f2fs_dentry_block *)kaddr; | 548 | dentry_blk = (struct f2fs_dentry_block *)kaddr; |
540 | bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; | 549 | bit_pos = dentry - (struct f2fs_dir_entry *)dentry_blk->dentry; |
@@ -551,6 +560,10 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
551 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; | 560 | dir->i_ctime = dir->i_mtime = CURRENT_TIME; |
552 | 561 | ||
553 | if (inode) { | 562 | if (inode) { |
563 | struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb); | ||
564 | |||
565 | down_write(&F2FS_I(inode)->i_sem); | ||
566 | |||
554 | if (S_ISDIR(inode->i_mode)) { | 567 | if (S_ISDIR(inode->i_mode)) { |
555 | drop_nlink(dir); | 568 | drop_nlink(dir); |
556 | update_inode_page(dir); | 569 | update_inode_page(dir); |
@@ -561,6 +574,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
561 | drop_nlink(inode); | 574 | drop_nlink(inode); |
562 | i_size_write(inode, 0); | 575 | i_size_write(inode, 0); |
563 | } | 576 | } |
577 | up_write(&F2FS_I(inode)->i_sem); | ||
564 | update_inode_page(inode); | 578 | update_inode_page(inode); |
565 | 579 | ||
566 | if (inode->i_nlink == 0) | 580 | if (inode->i_nlink == 0) |
@@ -573,7 +587,6 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page, | |||
573 | truncate_hole(dir, page->index, page->index + 1); | 587 | truncate_hole(dir, page->index, page->index + 1); |
574 | clear_page_dirty_for_io(page); | 588 | clear_page_dirty_for_io(page); |
575 | ClearPageUptodate(page); | 589 | ClearPageUptodate(page); |
576 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | ||
577 | inode_dec_dirty_dents(dir); | 590 | inode_dec_dirty_dents(dir); |
578 | } | 591 | } |
579 | f2fs_put_page(page, 1); | 592 | f2fs_put_page(page, 1); |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fc3c558cb4f3..2ecac8312359 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
@@ -40,6 +40,7 @@ | |||
40 | #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 | 40 | #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 |
41 | #define F2FS_MOUNT_INLINE_XATTR 0x00000080 | 41 | #define F2FS_MOUNT_INLINE_XATTR 0x00000080 |
42 | #define F2FS_MOUNT_INLINE_DATA 0x00000100 | 42 | #define F2FS_MOUNT_INLINE_DATA 0x00000100 |
43 | #define F2FS_MOUNT_FLUSH_MERGE 0x00000200 | ||
43 | 44 | ||
44 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) | 45 | #define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) |
45 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) | 46 | #define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) |
@@ -88,6 +89,16 @@ enum { | |||
88 | SIT_BITMAP | 89 | SIT_BITMAP |
89 | }; | 90 | }; |
90 | 91 | ||
92 | /* | ||
93 | * For CP/NAT/SIT/SSA readahead | ||
94 | */ | ||
95 | enum { | ||
96 | META_CP, | ||
97 | META_NAT, | ||
98 | META_SIT, | ||
99 | META_SSA | ||
100 | }; | ||
101 | |||
91 | /* for the list of orphan inodes */ | 102 | /* for the list of orphan inodes */ |
92 | struct orphan_inode_entry { | 103 | struct orphan_inode_entry { |
93 | struct list_head list; /* list head */ | 104 | struct list_head list; /* list head */ |
@@ -187,16 +198,20 @@ struct extent_info { | |||
187 | #define FADVISE_COLD_BIT 0x01 | 198 | #define FADVISE_COLD_BIT 0x01 |
188 | #define FADVISE_LOST_PINO_BIT 0x02 | 199 | #define FADVISE_LOST_PINO_BIT 0x02 |
189 | 200 | ||
201 | #define DEF_DIR_LEVEL 0 | ||
202 | |||
190 | struct f2fs_inode_info { | 203 | struct f2fs_inode_info { |
191 | struct inode vfs_inode; /* serve a vfs inode */ | 204 | struct inode vfs_inode; /* serve a vfs inode */ |
192 | unsigned long i_flags; /* keep an inode flags for ioctl */ | 205 | unsigned long i_flags; /* keep an inode flags for ioctl */ |
193 | unsigned char i_advise; /* use to give file attribute hints */ | 206 | unsigned char i_advise; /* use to give file attribute hints */ |
207 | unsigned char i_dir_level; /* use for dentry level for large dir */ | ||
194 | unsigned int i_current_depth; /* use only in directory structure */ | 208 | unsigned int i_current_depth; /* use only in directory structure */ |
195 | unsigned int i_pino; /* parent inode number */ | 209 | unsigned int i_pino; /* parent inode number */ |
196 | umode_t i_acl_mode; /* keep file acl mode temporarily */ | 210 | umode_t i_acl_mode; /* keep file acl mode temporarily */ |
197 | 211 | ||
198 | /* Use below internally in f2fs*/ | 212 | /* Use below internally in f2fs*/ |
199 | unsigned long flags; /* use to pass per-file flags */ | 213 | unsigned long flags; /* use to pass per-file flags */ |
214 | struct rw_semaphore i_sem; /* protect fi info */ | ||
200 | atomic_t dirty_dents; /* # of dirty dentry pages */ | 215 | atomic_t dirty_dents; /* # of dirty dentry pages */ |
201 | f2fs_hash_t chash; /* hash value of given file name */ | 216 | f2fs_hash_t chash; /* hash value of given file name */ |
202 | unsigned int clevel; /* maximum level of given file name */ | 217 | unsigned int clevel; /* maximum level of given file name */ |
@@ -229,6 +244,7 @@ struct f2fs_nm_info { | |||
229 | block_t nat_blkaddr; /* base disk address of NAT */ | 244 | block_t nat_blkaddr; /* base disk address of NAT */ |
230 | nid_t max_nid; /* maximum possible node ids */ | 245 | nid_t max_nid; /* maximum possible node ids */ |
231 | nid_t next_scan_nid; /* the next nid to be scanned */ | 246 | nid_t next_scan_nid; /* the next nid to be scanned */ |
247 | unsigned int ram_thresh; /* control the memory footprint */ | ||
232 | 248 | ||
233 | /* NAT cache management */ | 249 | /* NAT cache management */ |
234 | struct radix_tree_root nat_root;/* root of the nat entry cache */ | 250 | struct radix_tree_root nat_root;/* root of the nat entry cache */ |
@@ -238,6 +254,7 @@ struct f2fs_nm_info { | |||
238 | struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ | 254 | struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ |
239 | 255 | ||
240 | /* free node ids management */ | 256 | /* free node ids management */ |
257 | struct radix_tree_root free_nid_root;/* root of the free_nid cache */ | ||
241 | struct list_head free_nid_list; /* a list for free nids */ | 258 | struct list_head free_nid_list; /* a list for free nids */ |
242 | spinlock_t free_nid_list_lock; /* protect free nid list */ | 259 | spinlock_t free_nid_list_lock; /* protect free nid list */ |
243 | unsigned int fcnt; /* the number of free node id */ | 260 | unsigned int fcnt; /* the number of free node id */ |
@@ -300,6 +317,12 @@ enum { | |||
300 | NO_CHECK_TYPE | 317 | NO_CHECK_TYPE |
301 | }; | 318 | }; |
302 | 319 | ||
320 | struct flush_cmd { | ||
321 | struct flush_cmd *next; | ||
322 | struct completion wait; | ||
323 | int ret; | ||
324 | }; | ||
325 | |||
303 | struct f2fs_sm_info { | 326 | struct f2fs_sm_info { |
304 | struct sit_info *sit_info; /* whole segment information */ | 327 | struct sit_info *sit_info; /* whole segment information */ |
305 | struct free_segmap_info *free_info; /* free segment information */ | 328 | struct free_segmap_info *free_info; /* free segment information */ |
@@ -328,6 +351,14 @@ struct f2fs_sm_info { | |||
328 | 351 | ||
329 | unsigned int ipu_policy; /* in-place-update policy */ | 352 | unsigned int ipu_policy; /* in-place-update policy */ |
330 | unsigned int min_ipu_util; /* in-place-update threshold */ | 353 | unsigned int min_ipu_util; /* in-place-update threshold */ |
354 | |||
355 | /* for flush command control */ | ||
356 | struct task_struct *f2fs_issue_flush; /* flush thread */ | ||
357 | wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ | ||
358 | struct flush_cmd *issue_list; /* list for command issue */ | ||
359 | struct flush_cmd *dispatch_list; /* list for command dispatch */ | ||
360 | spinlock_t issue_lock; /* for issue list lock */ | ||
361 | struct flush_cmd *issue_tail; /* list tail of issue list */ | ||
331 | }; | 362 | }; |
332 | 363 | ||
333 | /* | 364 | /* |
@@ -378,7 +409,7 @@ struct f2fs_bio_info { | |||
378 | struct bio *bio; /* bios to merge */ | 409 | struct bio *bio; /* bios to merge */ |
379 | sector_t last_block_in_bio; /* last block number */ | 410 | sector_t last_block_in_bio; /* last block number */ |
380 | struct f2fs_io_info fio; /* store buffered io info. */ | 411 | struct f2fs_io_info fio; /* store buffered io info. */ |
381 | struct mutex io_mutex; /* mutex for bio */ | 412 | struct rw_semaphore io_rwsem; /* blocking op for bio */ |
382 | }; | 413 | }; |
383 | 414 | ||
384 | struct f2fs_sb_info { | 415 | struct f2fs_sb_info { |
@@ -398,6 +429,7 @@ struct f2fs_sb_info { | |||
398 | /* for bio operations */ | 429 | /* for bio operations */ |
399 | struct f2fs_bio_info read_io; /* for read bios */ | 430 | struct f2fs_bio_info read_io; /* for read bios */ |
400 | struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ | 431 | struct f2fs_bio_info write_io[NR_PAGE_TYPE]; /* for write bios */ |
432 | struct completion *wait_io; /* for completion bios */ | ||
401 | 433 | ||
402 | /* for checkpoint */ | 434 | /* for checkpoint */ |
403 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ | 435 | struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ |
@@ -407,7 +439,6 @@ struct f2fs_sb_info { | |||
407 | struct mutex node_write; /* locking node writes */ | 439 | struct mutex node_write; /* locking node writes */ |
408 | struct mutex writepages; /* mutex for writepages() */ | 440 | struct mutex writepages; /* mutex for writepages() */ |
409 | bool por_doing; /* recovery is doing or not */ | 441 | bool por_doing; /* recovery is doing or not */ |
410 | bool on_build_free_nids; /* build_free_nids is doing */ | ||
411 | wait_queue_head_t cp_wait; | 442 | wait_queue_head_t cp_wait; |
412 | 443 | ||
413 | /* for orphan inode management */ | 444 | /* for orphan inode management */ |
@@ -436,6 +467,7 @@ struct f2fs_sb_info { | |||
436 | unsigned int total_valid_node_count; /* valid node block count */ | 467 | unsigned int total_valid_node_count; /* valid node block count */ |
437 | unsigned int total_valid_inode_count; /* valid inode count */ | 468 | unsigned int total_valid_inode_count; /* valid inode count */ |
438 | int active_logs; /* # of active logs */ | 469 | int active_logs; /* # of active logs */ |
470 | int dir_level; /* directory level */ | ||
439 | 471 | ||
440 | block_t user_block_count; /* # of user blocks */ | 472 | block_t user_block_count; /* # of user blocks */ |
441 | block_t total_valid_block_count; /* # of valid blocks */ | 473 | block_t total_valid_block_count; /* # of valid blocks */ |
@@ -622,6 +654,11 @@ static inline int F2FS_HAS_BLOCKS(struct inode *inode) | |||
622 | return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; | 654 | return inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS; |
623 | } | 655 | } |
624 | 656 | ||
657 | static inline bool f2fs_has_xattr_block(unsigned int ofs) | ||
658 | { | ||
659 | return ofs == XATTR_NODE_OFFSET; | ||
660 | } | ||
661 | |||
625 | static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, | 662 | static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, |
626 | struct inode *inode, blkcnt_t count) | 663 | struct inode *inode, blkcnt_t count) |
627 | { | 664 | { |
@@ -661,6 +698,7 @@ static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
661 | 698 | ||
662 | static inline void inode_inc_dirty_dents(struct inode *inode) | 699 | static inline void inode_inc_dirty_dents(struct inode *inode) |
663 | { | 700 | { |
701 | inc_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | ||
664 | atomic_inc(&F2FS_I(inode)->dirty_dents); | 702 | atomic_inc(&F2FS_I(inode)->dirty_dents); |
665 | } | 703 | } |
666 | 704 | ||
@@ -671,6 +709,10 @@ static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) | |||
671 | 709 | ||
672 | static inline void inode_dec_dirty_dents(struct inode *inode) | 710 | static inline void inode_dec_dirty_dents(struct inode *inode) |
673 | { | 711 | { |
712 | if (!S_ISDIR(inode->i_mode)) | ||
713 | return; | ||
714 | |||
715 | dec_page_count(F2FS_SB(inode->i_sb), F2FS_DIRTY_DENTS); | ||
674 | atomic_dec(&F2FS_I(inode)->dirty_dents); | 716 | atomic_dec(&F2FS_I(inode)->dirty_dents); |
675 | } | 717 | } |
676 | 718 | ||
@@ -679,6 +721,11 @@ static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) | |||
679 | return atomic_read(&sbi->nr_pages[count_type]); | 721 | return atomic_read(&sbi->nr_pages[count_type]); |
680 | } | 722 | } |
681 | 723 | ||
724 | static inline int get_dirty_dents(struct inode *inode) | ||
725 | { | ||
726 | return atomic_read(&F2FS_I(inode)->dirty_dents); | ||
727 | } | ||
728 | |||
682 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) | 729 | static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) |
683 | { | 730 | { |
684 | unsigned int pages_per_sec = sbi->segs_per_sec * | 731 | unsigned int pages_per_sec = sbi->segs_per_sec * |
@@ -689,11 +736,7 @@ static inline int get_blocktype_secs(struct f2fs_sb_info *sbi, int block_type) | |||
689 | 736 | ||
690 | static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) | 737 | static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) |
691 | { | 738 | { |
692 | block_t ret; | 739 | return sbi->total_valid_block_count; |
693 | spin_lock(&sbi->stat_lock); | ||
694 | ret = sbi->total_valid_block_count; | ||
695 | spin_unlock(&sbi->stat_lock); | ||
696 | return ret; | ||
697 | } | 740 | } |
698 | 741 | ||
699 | static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) | 742 | static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) |
@@ -789,11 +832,7 @@ static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, | |||
789 | 832 | ||
790 | static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) | 833 | static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) |
791 | { | 834 | { |
792 | unsigned int ret; | 835 | return sbi->total_valid_node_count; |
793 | spin_lock(&sbi->stat_lock); | ||
794 | ret = sbi->total_valid_node_count; | ||
795 | spin_unlock(&sbi->stat_lock); | ||
796 | return ret; | ||
797 | } | 836 | } |
798 | 837 | ||
799 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) | 838 | static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) |
@@ -814,11 +853,7 @@ static inline void dec_valid_inode_count(struct f2fs_sb_info *sbi) | |||
814 | 853 | ||
815 | static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) | 854 | static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) |
816 | { | 855 | { |
817 | unsigned int ret; | 856 | return sbi->total_valid_inode_count; |
818 | spin_lock(&sbi->stat_lock); | ||
819 | ret = sbi->total_valid_inode_count; | ||
820 | spin_unlock(&sbi->stat_lock); | ||
821 | return ret; | ||
822 | } | 857 | } |
823 | 858 | ||
824 | static inline void f2fs_put_page(struct page *page, int unlock) | 859 | static inline void f2fs_put_page(struct page *page, int unlock) |
@@ -844,9 +879,9 @@ static inline void f2fs_put_dnode(struct dnode_of_data *dn) | |||
844 | } | 879 | } |
845 | 880 | ||
846 | static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, | 881 | static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, |
847 | size_t size, void (*ctor)(void *)) | 882 | size_t size) |
848 | { | 883 | { |
849 | return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); | 884 | return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, NULL); |
850 | } | 885 | } |
851 | 886 | ||
852 | static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, | 887 | static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep, |
@@ -983,24 +1018,28 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi, | |||
983 | ri->i_inline |= F2FS_INLINE_DATA; | 1018 | ri->i_inline |= F2FS_INLINE_DATA; |
984 | } | 1019 | } |
985 | 1020 | ||
1021 | static inline int f2fs_has_inline_xattr(struct inode *inode) | ||
1022 | { | ||
1023 | return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR); | ||
1024 | } | ||
1025 | |||
986 | static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) | 1026 | static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi) |
987 | { | 1027 | { |
988 | if (is_inode_flag_set(fi, FI_INLINE_XATTR)) | 1028 | if (f2fs_has_inline_xattr(&fi->vfs_inode)) |
989 | return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; | 1029 | return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS; |
990 | return DEF_ADDRS_PER_INODE; | 1030 | return DEF_ADDRS_PER_INODE; |
991 | } | 1031 | } |
992 | 1032 | ||
993 | static inline void *inline_xattr_addr(struct page *page) | 1033 | static inline void *inline_xattr_addr(struct page *page) |
994 | { | 1034 | { |
995 | struct f2fs_inode *ri; | 1035 | struct f2fs_inode *ri = F2FS_INODE(page); |
996 | ri = (struct f2fs_inode *)page_address(page); | ||
997 | return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - | 1036 | return (void *)&(ri->i_addr[DEF_ADDRS_PER_INODE - |
998 | F2FS_INLINE_XATTR_ADDRS]); | 1037 | F2FS_INLINE_XATTR_ADDRS]); |
999 | } | 1038 | } |
1000 | 1039 | ||
1001 | static inline int inline_xattr_size(struct inode *inode) | 1040 | static inline int inline_xattr_size(struct inode *inode) |
1002 | { | 1041 | { |
1003 | if (is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR)) | 1042 | if (f2fs_has_inline_xattr(inode)) |
1004 | return F2FS_INLINE_XATTR_ADDRS << 2; | 1043 | return F2FS_INLINE_XATTR_ADDRS << 2; |
1005 | else | 1044 | else |
1006 | return 0; | 1045 | return 0; |
@@ -1013,8 +1052,7 @@ static inline int f2fs_has_inline_data(struct inode *inode) | |||
1013 | 1052 | ||
1014 | static inline void *inline_data_addr(struct page *page) | 1053 | static inline void *inline_data_addr(struct page *page) |
1015 | { | 1054 | { |
1016 | struct f2fs_inode *ri; | 1055 | struct f2fs_inode *ri = F2FS_INODE(page); |
1017 | ri = (struct f2fs_inode *)page_address(page); | ||
1018 | return (void *)&(ri->i_addr[1]); | 1056 | return (void *)&(ri->i_addr[1]); |
1019 | } | 1057 | } |
1020 | 1058 | ||
@@ -1023,6 +1061,12 @@ static inline int f2fs_readonly(struct super_block *sb) | |||
1023 | return sb->s_flags & MS_RDONLY; | 1061 | return sb->s_flags & MS_RDONLY; |
1024 | } | 1062 | } |
1025 | 1063 | ||
1064 | static inline void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi) | ||
1065 | { | ||
1066 | set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG); | ||
1067 | sbi->sb->s_flags |= MS_RDONLY; | ||
1068 | } | ||
1069 | |||
1026 | #define get_inode_mode(i) \ | 1070 | #define get_inode_mode(i) \ |
1027 | ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ | 1071 | ((is_inode_flag_set(F2FS_I(i), FI_ACL_MODE)) ? \ |
1028 | (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) | 1072 | (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) |
@@ -1048,7 +1092,7 @@ void f2fs_set_inode_flags(struct inode *); | |||
1048 | struct inode *f2fs_iget(struct super_block *, unsigned long); | 1092 | struct inode *f2fs_iget(struct super_block *, unsigned long); |
1049 | int try_to_free_nats(struct f2fs_sb_info *, int); | 1093 | int try_to_free_nats(struct f2fs_sb_info *, int); |
1050 | void update_inode(struct inode *, struct page *); | 1094 | void update_inode(struct inode *, struct page *); |
1051 | int update_inode_page(struct inode *); | 1095 | void update_inode_page(struct inode *); |
1052 | int f2fs_write_inode(struct inode *, struct writeback_control *); | 1096 | int f2fs_write_inode(struct inode *, struct writeback_control *); |
1053 | void f2fs_evict_inode(struct inode *); | 1097 | void f2fs_evict_inode(struct inode *); |
1054 | 1098 | ||
@@ -1097,6 +1141,7 @@ struct dnode_of_data; | |||
1097 | struct node_info; | 1141 | struct node_info; |
1098 | 1142 | ||
1099 | int is_checkpointed_node(struct f2fs_sb_info *, nid_t); | 1143 | int is_checkpointed_node(struct f2fs_sb_info *, nid_t); |
1144 | bool fsync_mark_done(struct f2fs_sb_info *, nid_t); | ||
1100 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | 1145 | void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); |
1101 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | 1146 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); |
1102 | int truncate_inode_blocks(struct inode *, pgoff_t); | 1147 | int truncate_inode_blocks(struct inode *, pgoff_t); |
@@ -1115,6 +1160,7 @@ void alloc_nid_done(struct f2fs_sb_info *, nid_t); | |||
1115 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); | 1160 | void alloc_nid_failed(struct f2fs_sb_info *, nid_t); |
1116 | void recover_node_page(struct f2fs_sb_info *, struct page *, | 1161 | void recover_node_page(struct f2fs_sb_info *, struct page *, |
1117 | struct f2fs_summary *, struct node_info *, block_t); | 1162 | struct f2fs_summary *, struct node_info *, block_t); |
1163 | bool recover_xattr_data(struct inode *, struct page *, block_t); | ||
1118 | int recover_inode_page(struct f2fs_sb_info *, struct page *); | 1164 | int recover_inode_page(struct f2fs_sb_info *, struct page *); |
1119 | int restore_node_summary(struct f2fs_sb_info *, unsigned int, | 1165 | int restore_node_summary(struct f2fs_sb_info *, unsigned int, |
1120 | struct f2fs_summary_block *); | 1166 | struct f2fs_summary_block *); |
@@ -1129,7 +1175,9 @@ void destroy_node_manager_caches(void); | |||
1129 | */ | 1175 | */ |
1130 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1176 | void f2fs_balance_fs(struct f2fs_sb_info *); |
1131 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); | 1177 | void f2fs_balance_fs_bg(struct f2fs_sb_info *); |
1178 | int f2fs_issue_flush(struct f2fs_sb_info *); | ||
1132 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1179 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
1180 | void refresh_sit_entry(struct f2fs_sb_info *, block_t, block_t); | ||
1133 | void clear_prefree_segments(struct f2fs_sb_info *); | 1181 | void clear_prefree_segments(struct f2fs_sb_info *); |
1134 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1182 | int npages_for_summary_flush(struct f2fs_sb_info *); |
1135 | void allocate_new_segments(struct f2fs_sb_info *); | 1183 | void allocate_new_segments(struct f2fs_sb_info *); |
@@ -1162,6 +1210,7 @@ void destroy_segment_manager_caches(void); | |||
1162 | */ | 1210 | */ |
1163 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); | 1211 | struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); |
1164 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); | 1212 | struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); |
1213 | int ra_meta_pages(struct f2fs_sb_info *, int, int, int); | ||
1165 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); | 1214 | long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); |
1166 | int acquire_orphan_inode(struct f2fs_sb_info *); | 1215 | int acquire_orphan_inode(struct f2fs_sb_info *); |
1167 | void release_orphan_inode(struct f2fs_sb_info *); | 1216 | void release_orphan_inode(struct f2fs_sb_info *); |
@@ -1231,7 +1280,7 @@ struct f2fs_stat_info { | |||
1231 | int util_free, util_valid, util_invalid; | 1280 | int util_free, util_valid, util_invalid; |
1232 | int rsvd_segs, overp_segs; | 1281 | int rsvd_segs, overp_segs; |
1233 | int dirty_count, node_pages, meta_pages; | 1282 | int dirty_count, node_pages, meta_pages; |
1234 | int prefree_count, call_count; | 1283 | int prefree_count, call_count, cp_count; |
1235 | int tot_segs, node_segs, data_segs, free_segs, free_secs; | 1284 | int tot_segs, node_segs, data_segs, free_segs, free_secs; |
1236 | int tot_blks, data_blks, node_blks; | 1285 | int tot_blks, data_blks, node_blks; |
1237 | int curseg[NR_CURSEG_TYPE]; | 1286 | int curseg[NR_CURSEG_TYPE]; |
@@ -1248,6 +1297,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) | |||
1248 | return (struct f2fs_stat_info *)sbi->stat_info; | 1297 | return (struct f2fs_stat_info *)sbi->stat_info; |
1249 | } | 1298 | } |
1250 | 1299 | ||
1300 | #define stat_inc_cp_count(si) ((si)->cp_count++) | ||
1251 | #define stat_inc_call_count(si) ((si)->call_count++) | 1301 | #define stat_inc_call_count(si) ((si)->call_count++) |
1252 | #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) | 1302 | #define stat_inc_bggc_count(sbi) ((sbi)->bg_gc++) |
1253 | #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) | 1303 | #define stat_inc_dirty_dir(sbi) ((sbi)->n_dirty_dirs++) |
@@ -1302,6 +1352,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *); | |||
1302 | void __init f2fs_create_root_stats(void); | 1352 | void __init f2fs_create_root_stats(void); |
1303 | void f2fs_destroy_root_stats(void); | 1353 | void f2fs_destroy_root_stats(void); |
1304 | #else | 1354 | #else |
1355 | #define stat_inc_cp_count(si) | ||
1305 | #define stat_inc_call_count(si) | 1356 | #define stat_inc_call_count(si) |
1306 | #define stat_inc_bggc_count(si) | 1357 | #define stat_inc_bggc_count(si) |
1307 | #define stat_inc_dirty_dir(sbi) | 1358 | #define stat_inc_dirty_dir(sbi) |
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0dfcef53a6ed..60e7d5448a1d 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
@@ -76,7 +76,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
76 | trace_f2fs_vm_page_mkwrite(page, DATA); | 76 | trace_f2fs_vm_page_mkwrite(page, DATA); |
77 | mapped: | 77 | mapped: |
78 | /* fill the page */ | 78 | /* fill the page */ |
79 | wait_on_page_writeback(page); | 79 | f2fs_wait_on_page_writeback(page, DATA); |
80 | out: | 80 | out: |
81 | sb_end_pagefault(inode->i_sb); | 81 | sb_end_pagefault(inode->i_sb); |
82 | return block_page_mkwrite_return(err); | 82 | return block_page_mkwrite_return(err); |
@@ -84,6 +84,7 @@ out: | |||
84 | 84 | ||
85 | static const struct vm_operations_struct f2fs_file_vm_ops = { | 85 | static const struct vm_operations_struct f2fs_file_vm_ops = { |
86 | .fault = filemap_fault, | 86 | .fault = filemap_fault, |
87 | .map_pages = filemap_map_pages, | ||
87 | .page_mkwrite = f2fs_vm_page_mkwrite, | 88 | .page_mkwrite = f2fs_vm_page_mkwrite, |
88 | .remap_pages = generic_file_remap_pages, | 89 | .remap_pages = generic_file_remap_pages, |
89 | }; | 90 | }; |
@@ -111,11 +112,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) | |||
111 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 112 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
112 | { | 113 | { |
113 | struct inode *inode = file->f_mapping->host; | 114 | struct inode *inode = file->f_mapping->host; |
115 | struct f2fs_inode_info *fi = F2FS_I(inode); | ||
114 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 116 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
115 | int ret = 0; | 117 | int ret = 0; |
116 | bool need_cp = false; | 118 | bool need_cp = false; |
117 | struct writeback_control wbc = { | 119 | struct writeback_control wbc = { |
118 | .sync_mode = WB_SYNC_NONE, | 120 | .sync_mode = WB_SYNC_ALL, |
119 | .nr_to_write = LONG_MAX, | 121 | .nr_to_write = LONG_MAX, |
120 | .for_reclaim = 0, | 122 | .for_reclaim = 0, |
121 | }; | 123 | }; |
@@ -133,7 +135,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
133 | /* guarantee free sections for fsync */ | 135 | /* guarantee free sections for fsync */ |
134 | f2fs_balance_fs(sbi); | 136 | f2fs_balance_fs(sbi); |
135 | 137 | ||
136 | mutex_lock(&inode->i_mutex); | 138 | down_read(&fi->i_sem); |
137 | 139 | ||
138 | /* | 140 | /* |
139 | * Both of fdatasync() and fsync() are able to be recovered from | 141 | * Both of fdatasync() and fsync() are able to be recovered from |
@@ -150,25 +152,33 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
150 | else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) | 152 | else if (F2FS_I(inode)->xattr_ver == cur_cp_version(F2FS_CKPT(sbi))) |
151 | need_cp = true; | 153 | need_cp = true; |
152 | 154 | ||
155 | up_read(&fi->i_sem); | ||
156 | |||
153 | if (need_cp) { | 157 | if (need_cp) { |
154 | nid_t pino; | 158 | nid_t pino; |
155 | 159 | ||
156 | F2FS_I(inode)->xattr_ver = 0; | ||
157 | |||
158 | /* all the dirty node pages should be flushed for POR */ | 160 | /* all the dirty node pages should be flushed for POR */ |
159 | ret = f2fs_sync_fs(inode->i_sb, 1); | 161 | ret = f2fs_sync_fs(inode->i_sb, 1); |
162 | |||
163 | down_write(&fi->i_sem); | ||
164 | F2FS_I(inode)->xattr_ver = 0; | ||
160 | if (file_wrong_pino(inode) && inode->i_nlink == 1 && | 165 | if (file_wrong_pino(inode) && inode->i_nlink == 1 && |
161 | get_parent_ino(inode, &pino)) { | 166 | get_parent_ino(inode, &pino)) { |
162 | F2FS_I(inode)->i_pino = pino; | 167 | F2FS_I(inode)->i_pino = pino; |
163 | file_got_pino(inode); | 168 | file_got_pino(inode); |
169 | up_write(&fi->i_sem); | ||
164 | mark_inode_dirty_sync(inode); | 170 | mark_inode_dirty_sync(inode); |
165 | ret = f2fs_write_inode(inode, NULL); | 171 | ret = f2fs_write_inode(inode, NULL); |
166 | if (ret) | 172 | if (ret) |
167 | goto out; | 173 | goto out; |
174 | } else { | ||
175 | up_write(&fi->i_sem); | ||
168 | } | 176 | } |
169 | } else { | 177 | } else { |
170 | /* if there is no written node page, write its inode page */ | 178 | /* if there is no written node page, write its inode page */ |
171 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | 179 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { |
180 | if (fsync_mark_done(sbi, inode->i_ino)) | ||
181 | goto out; | ||
172 | mark_inode_dirty_sync(inode); | 182 | mark_inode_dirty_sync(inode); |
173 | ret = f2fs_write_inode(inode, NULL); | 183 | ret = f2fs_write_inode(inode, NULL); |
174 | if (ret) | 184 | if (ret) |
@@ -177,10 +187,9 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
177 | ret = wait_on_node_pages_writeback(sbi, inode->i_ino); | 187 | ret = wait_on_node_pages_writeback(sbi, inode->i_ino); |
178 | if (ret) | 188 | if (ret) |
179 | goto out; | 189 | goto out; |
180 | ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 190 | ret = f2fs_issue_flush(F2FS_SB(inode->i_sb)); |
181 | } | 191 | } |
182 | out: | 192 | out: |
183 | mutex_unlock(&inode->i_mutex); | ||
184 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); | 193 | trace_f2fs_sync_file_exit(inode, need_cp, datasync, ret); |
185 | return ret; | 194 | return ret; |
186 | } | 195 | } |
@@ -245,7 +254,7 @@ static void truncate_partial_data_page(struct inode *inode, u64 from) | |||
245 | f2fs_put_page(page, 1); | 254 | f2fs_put_page(page, 1); |
246 | return; | 255 | return; |
247 | } | 256 | } |
248 | wait_on_page_writeback(page); | 257 | f2fs_wait_on_page_writeback(page, DATA); |
249 | zero_user(page, offset, PAGE_CACHE_SIZE - offset); | 258 | zero_user(page, offset, PAGE_CACHE_SIZE - offset); |
250 | set_page_dirty(page); | 259 | set_page_dirty(page); |
251 | f2fs_put_page(page, 1); | 260 | f2fs_put_page(page, 1); |
@@ -422,7 +431,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, | |||
422 | f2fs_unlock_op(sbi); | 431 | f2fs_unlock_op(sbi); |
423 | 432 | ||
424 | if (!IS_ERR(page)) { | 433 | if (!IS_ERR(page)) { |
425 | wait_on_page_writeback(page); | 434 | f2fs_wait_on_page_writeback(page, DATA); |
426 | zero_user(page, start, len); | 435 | zero_user(page, start, len); |
427 | set_page_dirty(page); | 436 | set_page_dirty(page); |
428 | f2fs_put_page(page, 1); | 437 | f2fs_put_page(page, 1); |
@@ -560,6 +569,8 @@ static long f2fs_fallocate(struct file *file, int mode, | |||
560 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 569 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) |
561 | return -EOPNOTSUPP; | 570 | return -EOPNOTSUPP; |
562 | 571 | ||
572 | mutex_lock(&inode->i_mutex); | ||
573 | |||
563 | if (mode & FALLOC_FL_PUNCH_HOLE) | 574 | if (mode & FALLOC_FL_PUNCH_HOLE) |
564 | ret = punch_hole(inode, offset, len); | 575 | ret = punch_hole(inode, offset, len); |
565 | else | 576 | else |
@@ -569,6 +580,9 @@ static long f2fs_fallocate(struct file *file, int mode, | |||
569 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 580 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
570 | mark_inode_dirty(inode); | 581 | mark_inode_dirty(inode); |
571 | } | 582 | } |
583 | |||
584 | mutex_unlock(&inode->i_mutex); | ||
585 | |||
572 | trace_f2fs_fallocate(inode, mode, offset, len, ret); | 586 | trace_f2fs_fallocate(inode, mode, offset, len, ret); |
573 | return ret; | 587 | return ret; |
574 | } | 588 | } |
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index ea0371e854b4..b90dbe55403a 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
@@ -531,15 +531,10 @@ static void move_data_page(struct inode *inode, struct page *page, int gc_type) | |||
531 | set_page_dirty(page); | 531 | set_page_dirty(page); |
532 | set_cold_data(page); | 532 | set_cold_data(page); |
533 | } else { | 533 | } else { |
534 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
535 | |||
536 | f2fs_wait_on_page_writeback(page, DATA); | 534 | f2fs_wait_on_page_writeback(page, DATA); |
537 | 535 | ||
538 | if (clear_page_dirty_for_io(page) && | 536 | if (clear_page_dirty_for_io(page)) |
539 | S_ISDIR(inode->i_mode)) { | ||
540 | dec_page_count(sbi, F2FS_DIRTY_DENTS); | ||
541 | inode_dec_dirty_dents(inode); | 537 | inode_dec_dirty_dents(inode); |
542 | } | ||
543 | set_cold_data(page); | 538 | set_cold_data(page); |
544 | do_write_data_page(page, &fio); | 539 | do_write_data_page(page, &fio); |
545 | clear_cold_data(page); | 540 | clear_cold_data(page); |
@@ -701,6 +696,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi) | |||
701 | gc_more: | 696 | gc_more: |
702 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) | 697 | if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE))) |
703 | goto stop; | 698 | goto stop; |
699 | if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG))) | ||
700 | goto stop; | ||
704 | 701 | ||
705 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { | 702 | if (gc_type == BG_GC && has_not_enough_free_secs(sbi, nfree)) { |
706 | gc_type = FG_GC; | 703 | gc_type = FG_GC; |
@@ -711,6 +708,11 @@ gc_more: | |||
711 | goto stop; | 708 | goto stop; |
712 | ret = 0; | 709 | ret = 0; |
713 | 710 | ||
711 | /* readahead multi ssa blocks those have contiguous address */ | ||
712 | if (sbi->segs_per_sec > 1) | ||
713 | ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec, | ||
714 | META_SSA); | ||
715 | |||
714 | for (i = 0; i < sbi->segs_per_sec; i++) | 716 | for (i = 0; i < sbi->segs_per_sec; i++) |
715 | do_garbage_collect(sbi, segno + i, &ilist, gc_type); | 717 | do_garbage_collect(sbi, segno + i, &ilist, gc_type); |
716 | 718 | ||
@@ -740,7 +742,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi) | |||
740 | int __init create_gc_caches(void) | 742 | int __init create_gc_caches(void) |
741 | { | 743 | { |
742 | winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", | 744 | winode_slab = f2fs_kmem_cache_create("f2fs_gc_inodes", |
743 | sizeof(struct inode_entry), NULL); | 745 | sizeof(struct inode_entry)); |
744 | if (!winode_slab) | 746 | if (!winode_slab) |
745 | return -ENOMEM; | 747 | return -ENOMEM; |
746 | return 0; | 748 | return 0; |
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 31ee5b164ff9..383db1fabcf4 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c | |||
@@ -45,8 +45,10 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page) | |||
45 | } | 45 | } |
46 | 46 | ||
47 | ipage = get_node_page(sbi, inode->i_ino); | 47 | ipage = get_node_page(sbi, inode->i_ino); |
48 | if (IS_ERR(ipage)) | 48 | if (IS_ERR(ipage)) { |
49 | unlock_page(page); | ||
49 | return PTR_ERR(ipage); | 50 | return PTR_ERR(ipage); |
51 | } | ||
50 | 52 | ||
51 | zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); | 53 | zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE); |
52 | 54 | ||
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 4d67ed736dca..ee829d360468 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
@@ -107,6 +107,7 @@ static int do_read_inode(struct inode *inode) | |||
107 | fi->flags = 0; | 107 | fi->flags = 0; |
108 | fi->i_advise = ri->i_advise; | 108 | fi->i_advise = ri->i_advise; |
109 | fi->i_pino = le32_to_cpu(ri->i_pino); | 109 | fi->i_pino = le32_to_cpu(ri->i_pino); |
110 | fi->i_dir_level = ri->i_dir_level; | ||
110 | 111 | ||
111 | get_extent_info(&fi->ext, ri->i_ext); | 112 | get_extent_info(&fi->ext, ri->i_ext); |
112 | get_inline_info(fi, ri); | 113 | get_inline_info(fi, ri); |
@@ -204,6 +205,7 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
204 | ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); | 205 | ri->i_flags = cpu_to_le32(F2FS_I(inode)->i_flags); |
205 | ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); | 206 | ri->i_pino = cpu_to_le32(F2FS_I(inode)->i_pino); |
206 | ri->i_generation = cpu_to_le32(inode->i_generation); | 207 | ri->i_generation = cpu_to_le32(inode->i_generation); |
208 | ri->i_dir_level = F2FS_I(inode)->i_dir_level; | ||
207 | 209 | ||
208 | __set_inode_rdev(inode, ri); | 210 | __set_inode_rdev(inode, ri); |
209 | set_cold_node(inode, node_page); | 211 | set_cold_node(inode, node_page); |
@@ -212,24 +214,29 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
212 | clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | 214 | clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); |
213 | } | 215 | } |
214 | 216 | ||
215 | int update_inode_page(struct inode *inode) | 217 | void update_inode_page(struct inode *inode) |
216 | { | 218 | { |
217 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 219 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
218 | struct page *node_page; | 220 | struct page *node_page; |
219 | 221 | retry: | |
220 | node_page = get_node_page(sbi, inode->i_ino); | 222 | node_page = get_node_page(sbi, inode->i_ino); |
221 | if (IS_ERR(node_page)) | 223 | if (IS_ERR(node_page)) { |
222 | return PTR_ERR(node_page); | 224 | int err = PTR_ERR(node_page); |
223 | 225 | if (err == -ENOMEM) { | |
226 | cond_resched(); | ||
227 | goto retry; | ||
228 | } else if (err != -ENOENT) { | ||
229 | f2fs_stop_checkpoint(sbi); | ||
230 | } | ||
231 | return; | ||
232 | } | ||
224 | update_inode(inode, node_page); | 233 | update_inode(inode, node_page); |
225 | f2fs_put_page(node_page, 1); | 234 | f2fs_put_page(node_page, 1); |
226 | return 0; | ||
227 | } | 235 | } |
228 | 236 | ||
229 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | 237 | int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) |
230 | { | 238 | { |
231 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 239 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
232 | int ret; | ||
233 | 240 | ||
234 | if (inode->i_ino == F2FS_NODE_INO(sbi) || | 241 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
235 | inode->i_ino == F2FS_META_INO(sbi)) | 242 | inode->i_ino == F2FS_META_INO(sbi)) |
@@ -243,13 +250,13 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
243 | * during the urgent cleaning time when runing out of free sections. | 250 | * during the urgent cleaning time when runing out of free sections. |
244 | */ | 251 | */ |
245 | f2fs_lock_op(sbi); | 252 | f2fs_lock_op(sbi); |
246 | ret = update_inode_page(inode); | 253 | update_inode_page(inode); |
247 | f2fs_unlock_op(sbi); | 254 | f2fs_unlock_op(sbi); |
248 | 255 | ||
249 | if (wbc) | 256 | if (wbc) |
250 | f2fs_balance_fs(sbi); | 257 | f2fs_balance_fs(sbi); |
251 | 258 | ||
252 | return ret; | 259 | return 0; |
253 | } | 260 | } |
254 | 261 | ||
255 | /* | 262 | /* |
@@ -260,13 +267,13 @@ void f2fs_evict_inode(struct inode *inode) | |||
260 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 267 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
261 | 268 | ||
262 | trace_f2fs_evict_inode(inode); | 269 | trace_f2fs_evict_inode(inode); |
263 | truncate_inode_pages(&inode->i_data, 0); | 270 | truncate_inode_pages_final(&inode->i_data); |
264 | 271 | ||
265 | if (inode->i_ino == F2FS_NODE_INO(sbi) || | 272 | if (inode->i_ino == F2FS_NODE_INO(sbi) || |
266 | inode->i_ino == F2FS_META_INO(sbi)) | 273 | inode->i_ino == F2FS_META_INO(sbi)) |
267 | goto no_delete; | 274 | goto no_delete; |
268 | 275 | ||
269 | f2fs_bug_on(atomic_read(&F2FS_I(inode)->dirty_dents)); | 276 | f2fs_bug_on(get_dirty_dents(inode)); |
270 | remove_dirty_dir_inode(inode); | 277 | remove_dirty_dir_inode(inode); |
271 | 278 | ||
272 | if (inode->i_nlink || is_bad_inode(inode)) | 279 | if (inode->i_nlink || is_bad_inode(inode)) |
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 397d459e97bf..a9409d19dfd4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
@@ -207,6 +207,8 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry, | |||
207 | inode = f2fs_iget(dir->i_sb, ino); | 207 | inode = f2fs_iget(dir->i_sb, ino); |
208 | if (IS_ERR(inode)) | 208 | if (IS_ERR(inode)) |
209 | return ERR_CAST(inode); | 209 | return ERR_CAST(inode); |
210 | |||
211 | stat_inc_inline_inode(inode); | ||
210 | } | 212 | } |
211 | 213 | ||
212 | return d_splice_alias(inode, dentry); | 214 | return d_splice_alias(inode, dentry); |
@@ -424,12 +426,17 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
424 | } | 426 | } |
425 | 427 | ||
426 | f2fs_set_link(new_dir, new_entry, new_page, old_inode); | 428 | f2fs_set_link(new_dir, new_entry, new_page, old_inode); |
429 | down_write(&F2FS_I(old_inode)->i_sem); | ||
427 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; | 430 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; |
431 | up_write(&F2FS_I(old_inode)->i_sem); | ||
428 | 432 | ||
429 | new_inode->i_ctime = CURRENT_TIME; | 433 | new_inode->i_ctime = CURRENT_TIME; |
434 | down_write(&F2FS_I(new_inode)->i_sem); | ||
430 | if (old_dir_entry) | 435 | if (old_dir_entry) |
431 | drop_nlink(new_inode); | 436 | drop_nlink(new_inode); |
432 | drop_nlink(new_inode); | 437 | drop_nlink(new_inode); |
438 | up_write(&F2FS_I(new_inode)->i_sem); | ||
439 | |||
433 | mark_inode_dirty(new_inode); | 440 | mark_inode_dirty(new_inode); |
434 | 441 | ||
435 | if (!new_inode->i_nlink) | 442 | if (!new_inode->i_nlink) |
@@ -459,7 +466,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
459 | if (old_dir != new_dir) { | 466 | if (old_dir != new_dir) { |
460 | f2fs_set_link(old_inode, old_dir_entry, | 467 | f2fs_set_link(old_inode, old_dir_entry, |
461 | old_dir_page, new_dir); | 468 | old_dir_page, new_dir); |
469 | down_write(&F2FS_I(old_inode)->i_sem); | ||
462 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; | 470 | F2FS_I(old_inode)->i_pino = new_dir->i_ino; |
471 | up_write(&F2FS_I(old_inode)->i_sem); | ||
463 | update_inode_page(old_inode); | 472 | update_inode_page(old_inode); |
464 | } else { | 473 | } else { |
465 | kunmap(old_dir_page); | 474 | kunmap(old_dir_page); |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b0649b76eb4f..a161e955c4c8 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
@@ -21,9 +21,27 @@ | |||
21 | #include "segment.h" | 21 | #include "segment.h" |
22 | #include <trace/events/f2fs.h> | 22 | #include <trace/events/f2fs.h> |
23 | 23 | ||
24 | #define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) | ||
25 | |||
24 | static struct kmem_cache *nat_entry_slab; | 26 | static struct kmem_cache *nat_entry_slab; |
25 | static struct kmem_cache *free_nid_slab; | 27 | static struct kmem_cache *free_nid_slab; |
26 | 28 | ||
29 | static inline bool available_free_memory(struct f2fs_nm_info *nm_i, int type) | ||
30 | { | ||
31 | struct sysinfo val; | ||
32 | unsigned long mem_size = 0; | ||
33 | |||
34 | si_meminfo(&val); | ||
35 | if (type == FREE_NIDS) | ||
36 | mem_size = nm_i->fcnt * sizeof(struct free_nid); | ||
37 | else if (type == NAT_ENTRIES) | ||
38 | mem_size += nm_i->nat_cnt * sizeof(struct nat_entry); | ||
39 | mem_size >>= 12; | ||
40 | |||
41 | /* give 50:50 memory for free nids and nat caches respectively */ | ||
42 | return (mem_size < ((val.totalram * nm_i->ram_thresh) >> 11)); | ||
43 | } | ||
44 | |||
27 | static void clear_node_page_dirty(struct page *page) | 45 | static void clear_node_page_dirty(struct page *page) |
28 | { | 46 | { |
29 | struct address_space *mapping = page->mapping; | 47 | struct address_space *mapping = page->mapping; |
@@ -82,42 +100,6 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid) | |||
82 | return dst_page; | 100 | return dst_page; |
83 | } | 101 | } |
84 | 102 | ||
85 | /* | ||
86 | * Readahead NAT pages | ||
87 | */ | ||
88 | static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) | ||
89 | { | ||
90 | struct address_space *mapping = META_MAPPING(sbi); | ||
91 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
92 | struct page *page; | ||
93 | pgoff_t index; | ||
94 | int i; | ||
95 | struct f2fs_io_info fio = { | ||
96 | .type = META, | ||
97 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
98 | }; | ||
99 | |||
100 | |||
101 | for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { | ||
102 | if (unlikely(nid >= nm_i->max_nid)) | ||
103 | nid = 0; | ||
104 | index = current_nat_addr(sbi, nid); | ||
105 | |||
106 | page = grab_cache_page(mapping, index); | ||
107 | if (!page) | ||
108 | continue; | ||
109 | if (PageUptodate(page)) { | ||
110 | mark_page_accessed(page); | ||
111 | f2fs_put_page(page, 1); | ||
112 | continue; | ||
113 | } | ||
114 | f2fs_submit_page_mbio(sbi, page, index, &fio); | ||
115 | mark_page_accessed(page); | ||
116 | f2fs_put_page(page, 0); | ||
117 | } | ||
118 | f2fs_submit_merged_bio(sbi, META, READ); | ||
119 | } | ||
120 | |||
121 | static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) | 103 | static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) |
122 | { | 104 | { |
123 | return radix_tree_lookup(&nm_i->nat_root, n); | 105 | return radix_tree_lookup(&nm_i->nat_root, n); |
@@ -151,6 +133,20 @@ int is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid) | |||
151 | return is_cp; | 133 | return is_cp; |
152 | } | 134 | } |
153 | 135 | ||
136 | bool fsync_mark_done(struct f2fs_sb_info *sbi, nid_t nid) | ||
137 | { | ||
138 | struct f2fs_nm_info *nm_i = NM_I(sbi); | ||
139 | struct nat_entry *e; | ||
140 | bool fsync_done = false; | ||
141 | |||
142 | read_lock(&nm_i->nat_tree_lock); | ||
143 | e = __lookup_nat_cache(nm_i, nid); | ||
144 | if (e) | ||
145 | fsync_done = e->fsync_done; | ||
146 | read_unlock(&nm_i->nat_tree_lock); | ||
147 | return fsync_done; | ||
148 | } | ||
149 | |||
154 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | 150 | static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) |
155 | { | 151 | { |
156 | struct nat_entry *new; | 152 | struct nat_entry *new; |
@@ -164,6 +160,7 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid) | |||
164 | } | 160 | } |
165 | memset(new, 0, sizeof(struct nat_entry)); | 161 | memset(new, 0, sizeof(struct nat_entry)); |
166 | nat_set_nid(new, nid); | 162 | nat_set_nid(new, nid); |
163 | new->checkpointed = true; | ||
167 | list_add_tail(&new->list, &nm_i->nat_entries); | 164 | list_add_tail(&new->list, &nm_i->nat_entries); |
168 | nm_i->nat_cnt++; | 165 | nm_i->nat_cnt++; |
169 | return new; | 166 | return new; |
@@ -185,13 +182,12 @@ retry: | |||
185 | nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); | 182 | nat_set_blkaddr(e, le32_to_cpu(ne->block_addr)); |
186 | nat_set_ino(e, le32_to_cpu(ne->ino)); | 183 | nat_set_ino(e, le32_to_cpu(ne->ino)); |
187 | nat_set_version(e, ne->version); | 184 | nat_set_version(e, ne->version); |
188 | e->checkpointed = true; | ||
189 | } | 185 | } |
190 | write_unlock(&nm_i->nat_tree_lock); | 186 | write_unlock(&nm_i->nat_tree_lock); |
191 | } | 187 | } |
192 | 188 | ||
193 | static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, | 189 | static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni, |
194 | block_t new_blkaddr) | 190 | block_t new_blkaddr, bool fsync_done) |
195 | { | 191 | { |
196 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 192 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
197 | struct nat_entry *e; | 193 | struct nat_entry *e; |
@@ -205,7 +201,6 @@ retry: | |||
205 | goto retry; | 201 | goto retry; |
206 | } | 202 | } |
207 | e->ni = *ni; | 203 | e->ni = *ni; |
208 | e->checkpointed = true; | ||
209 | f2fs_bug_on(ni->blk_addr == NEW_ADDR); | 204 | f2fs_bug_on(ni->blk_addr == NEW_ADDR); |
210 | } else if (new_blkaddr == NEW_ADDR) { | 205 | } else if (new_blkaddr == NEW_ADDR) { |
211 | /* | 206 | /* |
@@ -217,9 +212,6 @@ retry: | |||
217 | f2fs_bug_on(ni->blk_addr != NULL_ADDR); | 212 | f2fs_bug_on(ni->blk_addr != NULL_ADDR); |
218 | } | 213 | } |
219 | 214 | ||
220 | if (new_blkaddr == NEW_ADDR) | ||
221 | e->checkpointed = false; | ||
222 | |||
223 | /* sanity check */ | 215 | /* sanity check */ |
224 | f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); | 216 | f2fs_bug_on(nat_get_blkaddr(e) != ni->blk_addr); |
225 | f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && | 217 | f2fs_bug_on(nat_get_blkaddr(e) == NULL_ADDR && |
@@ -239,6 +231,11 @@ retry: | |||
239 | /* change address */ | 231 | /* change address */ |
240 | nat_set_blkaddr(e, new_blkaddr); | 232 | nat_set_blkaddr(e, new_blkaddr); |
241 | __set_nat_cache_dirty(nm_i, e); | 233 | __set_nat_cache_dirty(nm_i, e); |
234 | |||
235 | /* update fsync_mark if its inode nat entry is still alive */ | ||
236 | e = __lookup_nat_cache(nm_i, ni->ino); | ||
237 | if (e) | ||
238 | e->fsync_done = fsync_done; | ||
242 | write_unlock(&nm_i->nat_tree_lock); | 239 | write_unlock(&nm_i->nat_tree_lock); |
243 | } | 240 | } |
244 | 241 | ||
@@ -246,7 +243,7 @@ int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) | |||
246 | { | 243 | { |
247 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 244 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
248 | 245 | ||
249 | if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) | 246 | if (available_free_memory(nm_i, NAT_ENTRIES)) |
250 | return 0; | 247 | return 0; |
251 | 248 | ||
252 | write_lock(&nm_i->nat_tree_lock); | 249 | write_lock(&nm_i->nat_tree_lock); |
@@ -505,7 +502,7 @@ static void truncate_node(struct dnode_of_data *dn) | |||
505 | /* Deallocate node address */ | 502 | /* Deallocate node address */ |
506 | invalidate_blocks(sbi, ni.blk_addr); | 503 | invalidate_blocks(sbi, ni.blk_addr); |
507 | dec_valid_node_count(sbi, dn->inode); | 504 | dec_valid_node_count(sbi, dn->inode); |
508 | set_node_addr(sbi, &ni, NULL_ADDR); | 505 | set_node_addr(sbi, &ni, NULL_ADDR, false); |
509 | 506 | ||
510 | if (dn->nid == dn->inode->i_ino) { | 507 | if (dn->nid == dn->inode->i_ino) { |
511 | remove_orphan_inode(sbi, dn->nid); | 508 | remove_orphan_inode(sbi, dn->nid); |
@@ -763,7 +760,7 @@ skip_partial: | |||
763 | f2fs_put_page(page, 1); | 760 | f2fs_put_page(page, 1); |
764 | goto restart; | 761 | goto restart; |
765 | } | 762 | } |
766 | wait_on_page_writeback(page); | 763 | f2fs_wait_on_page_writeback(page, NODE); |
767 | ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; | 764 | ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; |
768 | set_page_dirty(page); | 765 | set_page_dirty(page); |
769 | unlock_page(page); | 766 | unlock_page(page); |
@@ -852,7 +849,8 @@ struct page *new_node_page(struct dnode_of_data *dn, | |||
852 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) | 849 | if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC))) |
853 | return ERR_PTR(-EPERM); | 850 | return ERR_PTR(-EPERM); |
854 | 851 | ||
855 | page = grab_cache_page(NODE_MAPPING(sbi), dn->nid); | 852 | page = grab_cache_page_write_begin(NODE_MAPPING(sbi), |
853 | dn->nid, AOP_FLAG_NOFS); | ||
856 | if (!page) | 854 | if (!page) |
857 | return ERR_PTR(-ENOMEM); | 855 | return ERR_PTR(-ENOMEM); |
858 | 856 | ||
@@ -867,14 +865,14 @@ struct page *new_node_page(struct dnode_of_data *dn, | |||
867 | f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); | 865 | f2fs_bug_on(old_ni.blk_addr != NULL_ADDR); |
868 | new_ni = old_ni; | 866 | new_ni = old_ni; |
869 | new_ni.ino = dn->inode->i_ino; | 867 | new_ni.ino = dn->inode->i_ino; |
870 | set_node_addr(sbi, &new_ni, NEW_ADDR); | 868 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); |
871 | 869 | ||
872 | fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); | 870 | fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true); |
873 | set_cold_node(dn->inode, page); | 871 | set_cold_node(dn->inode, page); |
874 | SetPageUptodate(page); | 872 | SetPageUptodate(page); |
875 | set_page_dirty(page); | 873 | set_page_dirty(page); |
876 | 874 | ||
877 | if (ofs == XATTR_NODE_OFFSET) | 875 | if (f2fs_has_xattr_block(ofs)) |
878 | F2FS_I(dn->inode)->i_xattr_nid = dn->nid; | 876 | F2FS_I(dn->inode)->i_xattr_nid = dn->nid; |
879 | 877 | ||
880 | dn->node_page = page; | 878 | dn->node_page = page; |
@@ -948,7 +946,8 @@ struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) | |||
948 | struct page *page; | 946 | struct page *page; |
949 | int err; | 947 | int err; |
950 | repeat: | 948 | repeat: |
951 | page = grab_cache_page(NODE_MAPPING(sbi), nid); | 949 | page = grab_cache_page_write_begin(NODE_MAPPING(sbi), |
950 | nid, AOP_FLAG_NOFS); | ||
952 | if (!page) | 951 | if (!page) |
953 | return ERR_PTR(-ENOMEM); | 952 | return ERR_PTR(-ENOMEM); |
954 | 953 | ||
@@ -959,7 +958,7 @@ repeat: | |||
959 | goto got_it; | 958 | goto got_it; |
960 | 959 | ||
961 | lock_page(page); | 960 | lock_page(page); |
962 | if (unlikely(!PageUptodate(page))) { | 961 | if (unlikely(!PageUptodate(page) || nid != nid_of_node(page))) { |
963 | f2fs_put_page(page, 1); | 962 | f2fs_put_page(page, 1); |
964 | return ERR_PTR(-EIO); | 963 | return ERR_PTR(-EIO); |
965 | } | 964 | } |
@@ -968,7 +967,6 @@ repeat: | |||
968 | goto repeat; | 967 | goto repeat; |
969 | } | 968 | } |
970 | got_it: | 969 | got_it: |
971 | f2fs_bug_on(nid != nid_of_node(page)); | ||
972 | mark_page_accessed(page); | 970 | mark_page_accessed(page); |
973 | return page; | 971 | return page; |
974 | } | 972 | } |
@@ -1168,7 +1166,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino) | |||
1168 | continue; | 1166 | continue; |
1169 | 1167 | ||
1170 | if (ino && ino_of_node(page) == ino) { | 1168 | if (ino && ino_of_node(page) == ino) { |
1171 | wait_on_page_writeback(page); | 1169 | f2fs_wait_on_page_writeback(page, NODE); |
1172 | if (TestClearPageError(page)) | 1170 | if (TestClearPageError(page)) |
1173 | ret = -EIO; | 1171 | ret = -EIO; |
1174 | } | 1172 | } |
@@ -1201,7 +1199,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1201 | if (unlikely(sbi->por_doing)) | 1199 | if (unlikely(sbi->por_doing)) |
1202 | goto redirty_out; | 1200 | goto redirty_out; |
1203 | 1201 | ||
1204 | wait_on_page_writeback(page); | 1202 | f2fs_wait_on_page_writeback(page, NODE); |
1205 | 1203 | ||
1206 | /* get old block addr of this node page */ | 1204 | /* get old block addr of this node page */ |
1207 | nid = nid_of_node(page); | 1205 | nid = nid_of_node(page); |
@@ -1222,7 +1220,7 @@ static int f2fs_write_node_page(struct page *page, | |||
1222 | mutex_lock(&sbi->node_write); | 1220 | mutex_lock(&sbi->node_write); |
1223 | set_page_writeback(page); | 1221 | set_page_writeback(page); |
1224 | write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); | 1222 | write_node_page(sbi, page, &fio, nid, ni.blk_addr, &new_addr); |
1225 | set_node_addr(sbi, &ni, new_addr); | 1223 | set_node_addr(sbi, &ni, new_addr, is_fsync_dnode(page)); |
1226 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1224 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1227 | mutex_unlock(&sbi->node_write); | 1225 | mutex_unlock(&sbi->node_write); |
1228 | unlock_page(page); | 1226 | unlock_page(page); |
@@ -1231,35 +1229,32 @@ static int f2fs_write_node_page(struct page *page, | |||
1231 | redirty_out: | 1229 | redirty_out: |
1232 | dec_page_count(sbi, F2FS_DIRTY_NODES); | 1230 | dec_page_count(sbi, F2FS_DIRTY_NODES); |
1233 | wbc->pages_skipped++; | 1231 | wbc->pages_skipped++; |
1232 | account_page_redirty(page); | ||
1234 | set_page_dirty(page); | 1233 | set_page_dirty(page); |
1235 | return AOP_WRITEPAGE_ACTIVATE; | 1234 | return AOP_WRITEPAGE_ACTIVATE; |
1236 | } | 1235 | } |
1237 | 1236 | ||
1238 | /* | ||
1239 | * It is very important to gather dirty pages and write at once, so that we can | ||
1240 | * submit a big bio without interfering other data writes. | ||
1241 | * Be default, 512 pages (2MB) * 3 node types, is more reasonable. | ||
1242 | */ | ||
1243 | #define COLLECT_DIRTY_NODES 1536 | ||
1244 | static int f2fs_write_node_pages(struct address_space *mapping, | 1237 | static int f2fs_write_node_pages(struct address_space *mapping, |
1245 | struct writeback_control *wbc) | 1238 | struct writeback_control *wbc) |
1246 | { | 1239 | { |
1247 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); | 1240 | struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); |
1248 | long nr_to_write = wbc->nr_to_write; | 1241 | long diff; |
1249 | 1242 | ||
1250 | /* balancing f2fs's metadata in background */ | 1243 | /* balancing f2fs's metadata in background */ |
1251 | f2fs_balance_fs_bg(sbi); | 1244 | f2fs_balance_fs_bg(sbi); |
1252 | 1245 | ||
1253 | /* collect a number of dirty node pages and write together */ | 1246 | /* collect a number of dirty node pages and write together */ |
1254 | if (get_pages(sbi, F2FS_DIRTY_NODES) < COLLECT_DIRTY_NODES) | 1247 | if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE)) |
1255 | return 0; | 1248 | goto skip_write; |
1256 | 1249 | ||
1257 | /* if mounting is failed, skip writing node pages */ | 1250 | diff = nr_pages_to_write(sbi, NODE, wbc); |
1258 | wbc->nr_to_write = 3 * max_hw_blocks(sbi); | ||
1259 | wbc->sync_mode = WB_SYNC_NONE; | 1251 | wbc->sync_mode = WB_SYNC_NONE; |
1260 | sync_node_pages(sbi, 0, wbc); | 1252 | sync_node_pages(sbi, 0, wbc); |
1261 | wbc->nr_to_write = nr_to_write - (3 * max_hw_blocks(sbi) - | 1253 | wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); |
1262 | wbc->nr_to_write); | 1254 | return 0; |
1255 | |||
1256 | skip_write: | ||
1257 | wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES); | ||
1263 | return 0; | 1258 | return 0; |
1264 | } | 1259 | } |
1265 | 1260 | ||
@@ -1307,22 +1302,17 @@ const struct address_space_operations f2fs_node_aops = { | |||
1307 | .releasepage = f2fs_release_node_page, | 1302 | .releasepage = f2fs_release_node_page, |
1308 | }; | 1303 | }; |
1309 | 1304 | ||
1310 | static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) | 1305 | static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, |
1306 | nid_t n) | ||
1311 | { | 1307 | { |
1312 | struct list_head *this; | 1308 | return radix_tree_lookup(&nm_i->free_nid_root, n); |
1313 | struct free_nid *i; | ||
1314 | list_for_each(this, head) { | ||
1315 | i = list_entry(this, struct free_nid, list); | ||
1316 | if (i->nid == n) | ||
1317 | return i; | ||
1318 | } | ||
1319 | return NULL; | ||
1320 | } | 1309 | } |
1321 | 1310 | ||
1322 | static void __del_from_free_nid_list(struct free_nid *i) | 1311 | static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, |
1312 | struct free_nid *i) | ||
1323 | { | 1313 | { |
1324 | list_del(&i->list); | 1314 | list_del(&i->list); |
1325 | kmem_cache_free(free_nid_slab, i); | 1315 | radix_tree_delete(&nm_i->free_nid_root, i->nid); |
1326 | } | 1316 | } |
1327 | 1317 | ||
1328 | static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | 1318 | static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) |
@@ -1331,7 +1321,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |||
1331 | struct nat_entry *ne; | 1321 | struct nat_entry *ne; |
1332 | bool allocated = false; | 1322 | bool allocated = false; |
1333 | 1323 | ||
1334 | if (nm_i->fcnt > 2 * MAX_FREE_NIDS) | 1324 | if (!available_free_memory(nm_i, FREE_NIDS)) |
1335 | return -1; | 1325 | return -1; |
1336 | 1326 | ||
1337 | /* 0 nid should not be used */ | 1327 | /* 0 nid should not be used */ |
@@ -1342,7 +1332,8 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |||
1342 | /* do not add allocated nids */ | 1332 | /* do not add allocated nids */ |
1343 | read_lock(&nm_i->nat_tree_lock); | 1333 | read_lock(&nm_i->nat_tree_lock); |
1344 | ne = __lookup_nat_cache(nm_i, nid); | 1334 | ne = __lookup_nat_cache(nm_i, nid); |
1345 | if (ne && nat_get_blkaddr(ne) != NULL_ADDR) | 1335 | if (ne && |
1336 | (!ne->checkpointed || nat_get_blkaddr(ne) != NULL_ADDR)) | ||
1346 | allocated = true; | 1337 | allocated = true; |
1347 | read_unlock(&nm_i->nat_tree_lock); | 1338 | read_unlock(&nm_i->nat_tree_lock); |
1348 | if (allocated) | 1339 | if (allocated) |
@@ -1354,7 +1345,7 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |||
1354 | i->state = NID_NEW; | 1345 | i->state = NID_NEW; |
1355 | 1346 | ||
1356 | spin_lock(&nm_i->free_nid_list_lock); | 1347 | spin_lock(&nm_i->free_nid_list_lock); |
1357 | if (__lookup_free_nid_list(nid, &nm_i->free_nid_list)) { | 1348 | if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { |
1358 | spin_unlock(&nm_i->free_nid_list_lock); | 1349 | spin_unlock(&nm_i->free_nid_list_lock); |
1359 | kmem_cache_free(free_nid_slab, i); | 1350 | kmem_cache_free(free_nid_slab, i); |
1360 | return 0; | 1351 | return 0; |
@@ -1368,13 +1359,19 @@ static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) | |||
1368 | static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) | 1359 | static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) |
1369 | { | 1360 | { |
1370 | struct free_nid *i; | 1361 | struct free_nid *i; |
1362 | bool need_free = false; | ||
1363 | |||
1371 | spin_lock(&nm_i->free_nid_list_lock); | 1364 | spin_lock(&nm_i->free_nid_list_lock); |
1372 | i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | 1365 | i = __lookup_free_nid_list(nm_i, nid); |
1373 | if (i && i->state == NID_NEW) { | 1366 | if (i && i->state == NID_NEW) { |
1374 | __del_from_free_nid_list(i); | 1367 | __del_from_free_nid_list(nm_i, i); |
1375 | nm_i->fcnt--; | 1368 | nm_i->fcnt--; |
1369 | need_free = true; | ||
1376 | } | 1370 | } |
1377 | spin_unlock(&nm_i->free_nid_list_lock); | 1371 | spin_unlock(&nm_i->free_nid_list_lock); |
1372 | |||
1373 | if (need_free) | ||
1374 | kmem_cache_free(free_nid_slab, i); | ||
1378 | } | 1375 | } |
1379 | 1376 | ||
1380 | static void scan_nat_page(struct f2fs_nm_info *nm_i, | 1377 | static void scan_nat_page(struct f2fs_nm_info *nm_i, |
@@ -1413,7 +1410,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi) | |||
1413 | return; | 1410 | return; |
1414 | 1411 | ||
1415 | /* readahead nat pages to be scanned */ | 1412 | /* readahead nat pages to be scanned */ |
1416 | ra_nat_pages(sbi, nid); | 1413 | ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES, META_NAT); |
1417 | 1414 | ||
1418 | while (1) { | 1415 | while (1) { |
1419 | struct page *page = get_current_nat_page(sbi, nid); | 1416 | struct page *page = get_current_nat_page(sbi, nid); |
@@ -1454,7 +1451,6 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) | |||
1454 | { | 1451 | { |
1455 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1452 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1456 | struct free_nid *i = NULL; | 1453 | struct free_nid *i = NULL; |
1457 | struct list_head *this; | ||
1458 | retry: | 1454 | retry: |
1459 | if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) | 1455 | if (unlikely(sbi->total_valid_node_count + 1 >= nm_i->max_nid)) |
1460 | return false; | 1456 | return false; |
@@ -1462,13 +1458,11 @@ retry: | |||
1462 | spin_lock(&nm_i->free_nid_list_lock); | 1458 | spin_lock(&nm_i->free_nid_list_lock); |
1463 | 1459 | ||
1464 | /* We should not use stale free nids created by build_free_nids */ | 1460 | /* We should not use stale free nids created by build_free_nids */ |
1465 | if (nm_i->fcnt && !sbi->on_build_free_nids) { | 1461 | if (nm_i->fcnt && !on_build_free_nids(nm_i)) { |
1466 | f2fs_bug_on(list_empty(&nm_i->free_nid_list)); | 1462 | f2fs_bug_on(list_empty(&nm_i->free_nid_list)); |
1467 | list_for_each(this, &nm_i->free_nid_list) { | 1463 | list_for_each_entry(i, &nm_i->free_nid_list, list) |
1468 | i = list_entry(this, struct free_nid, list); | ||
1469 | if (i->state == NID_NEW) | 1464 | if (i->state == NID_NEW) |
1470 | break; | 1465 | break; |
1471 | } | ||
1472 | 1466 | ||
1473 | f2fs_bug_on(i->state != NID_NEW); | 1467 | f2fs_bug_on(i->state != NID_NEW); |
1474 | *nid = i->nid; | 1468 | *nid = i->nid; |
@@ -1481,9 +1475,7 @@ retry: | |||
1481 | 1475 | ||
1482 | /* Let's scan nat pages and its caches to get free nids */ | 1476 | /* Let's scan nat pages and its caches to get free nids */ |
1483 | mutex_lock(&nm_i->build_lock); | 1477 | mutex_lock(&nm_i->build_lock); |
1484 | sbi->on_build_free_nids = true; | ||
1485 | build_free_nids(sbi); | 1478 | build_free_nids(sbi); |
1486 | sbi->on_build_free_nids = false; | ||
1487 | mutex_unlock(&nm_i->build_lock); | 1479 | mutex_unlock(&nm_i->build_lock); |
1488 | goto retry; | 1480 | goto retry; |
1489 | } | 1481 | } |
@@ -1497,10 +1489,12 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) | |||
1497 | struct free_nid *i; | 1489 | struct free_nid *i; |
1498 | 1490 | ||
1499 | spin_lock(&nm_i->free_nid_list_lock); | 1491 | spin_lock(&nm_i->free_nid_list_lock); |
1500 | i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | 1492 | i = __lookup_free_nid_list(nm_i, nid); |
1501 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1493 | f2fs_bug_on(!i || i->state != NID_ALLOC); |
1502 | __del_from_free_nid_list(i); | 1494 | __del_from_free_nid_list(nm_i, i); |
1503 | spin_unlock(&nm_i->free_nid_list_lock); | 1495 | spin_unlock(&nm_i->free_nid_list_lock); |
1496 | |||
1497 | kmem_cache_free(free_nid_slab, i); | ||
1504 | } | 1498 | } |
1505 | 1499 | ||
1506 | /* | 1500 | /* |
@@ -1510,20 +1504,25 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) | |||
1510 | { | 1504 | { |
1511 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1505 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1512 | struct free_nid *i; | 1506 | struct free_nid *i; |
1507 | bool need_free = false; | ||
1513 | 1508 | ||
1514 | if (!nid) | 1509 | if (!nid) |
1515 | return; | 1510 | return; |
1516 | 1511 | ||
1517 | spin_lock(&nm_i->free_nid_list_lock); | 1512 | spin_lock(&nm_i->free_nid_list_lock); |
1518 | i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); | 1513 | i = __lookup_free_nid_list(nm_i, nid); |
1519 | f2fs_bug_on(!i || i->state != NID_ALLOC); | 1514 | f2fs_bug_on(!i || i->state != NID_ALLOC); |
1520 | if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { | 1515 | if (!available_free_memory(nm_i, FREE_NIDS)) { |
1521 | __del_from_free_nid_list(i); | 1516 | __del_from_free_nid_list(nm_i, i); |
1517 | need_free = true; | ||
1522 | } else { | 1518 | } else { |
1523 | i->state = NID_NEW; | 1519 | i->state = NID_NEW; |
1524 | nm_i->fcnt++; | 1520 | nm_i->fcnt++; |
1525 | } | 1521 | } |
1526 | spin_unlock(&nm_i->free_nid_list_lock); | 1522 | spin_unlock(&nm_i->free_nid_list_lock); |
1523 | |||
1524 | if (need_free) | ||
1525 | kmem_cache_free(free_nid_slab, i); | ||
1527 | } | 1526 | } |
1528 | 1527 | ||
1529 | void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, | 1528 | void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, |
@@ -1531,10 +1530,83 @@ void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, | |||
1531 | block_t new_blkaddr) | 1530 | block_t new_blkaddr) |
1532 | { | 1531 | { |
1533 | rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); | 1532 | rewrite_node_page(sbi, page, sum, ni->blk_addr, new_blkaddr); |
1534 | set_node_addr(sbi, ni, new_blkaddr); | 1533 | set_node_addr(sbi, ni, new_blkaddr, false); |
1535 | clear_node_page_dirty(page); | 1534 | clear_node_page_dirty(page); |
1536 | } | 1535 | } |
1537 | 1536 | ||
1537 | void recover_inline_xattr(struct inode *inode, struct page *page) | ||
1538 | { | ||
1539 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1540 | void *src_addr, *dst_addr; | ||
1541 | size_t inline_size; | ||
1542 | struct page *ipage; | ||
1543 | struct f2fs_inode *ri; | ||
1544 | |||
1545 | if (!f2fs_has_inline_xattr(inode)) | ||
1546 | return; | ||
1547 | |||
1548 | if (!IS_INODE(page)) | ||
1549 | return; | ||
1550 | |||
1551 | ri = F2FS_INODE(page); | ||
1552 | if (!(ri->i_inline & F2FS_INLINE_XATTR)) | ||
1553 | return; | ||
1554 | |||
1555 | ipage = get_node_page(sbi, inode->i_ino); | ||
1556 | f2fs_bug_on(IS_ERR(ipage)); | ||
1557 | |||
1558 | dst_addr = inline_xattr_addr(ipage); | ||
1559 | src_addr = inline_xattr_addr(page); | ||
1560 | inline_size = inline_xattr_size(inode); | ||
1561 | |||
1562 | memcpy(dst_addr, src_addr, inline_size); | ||
1563 | |||
1564 | update_inode(inode, ipage); | ||
1565 | f2fs_put_page(ipage, 1); | ||
1566 | } | ||
1567 | |||
1568 | bool recover_xattr_data(struct inode *inode, struct page *page, block_t blkaddr) | ||
1569 | { | ||
1570 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
1571 | nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid; | ||
1572 | nid_t new_xnid = nid_of_node(page); | ||
1573 | struct node_info ni; | ||
1574 | |||
1575 | recover_inline_xattr(inode, page); | ||
1576 | |||
1577 | if (!f2fs_has_xattr_block(ofs_of_node(page))) | ||
1578 | return false; | ||
1579 | |||
1580 | /* 1: invalidate the previous xattr nid */ | ||
1581 | if (!prev_xnid) | ||
1582 | goto recover_xnid; | ||
1583 | |||
1584 | /* Deallocate node address */ | ||
1585 | get_node_info(sbi, prev_xnid, &ni); | ||
1586 | f2fs_bug_on(ni.blk_addr == NULL_ADDR); | ||
1587 | invalidate_blocks(sbi, ni.blk_addr); | ||
1588 | dec_valid_node_count(sbi, inode); | ||
1589 | set_node_addr(sbi, &ni, NULL_ADDR, false); | ||
1590 | |||
1591 | recover_xnid: | ||
1592 | /* 2: allocate new xattr nid */ | ||
1593 | if (unlikely(!inc_valid_node_count(sbi, inode))) | ||
1594 | f2fs_bug_on(1); | ||
1595 | |||
1596 | remove_free_nid(NM_I(sbi), new_xnid); | ||
1597 | get_node_info(sbi, new_xnid, &ni); | ||
1598 | ni.ino = inode->i_ino; | ||
1599 | set_node_addr(sbi, &ni, NEW_ADDR, false); | ||
1600 | F2FS_I(inode)->i_xattr_nid = new_xnid; | ||
1601 | |||
1602 | /* 3: update xattr blkaddr */ | ||
1603 | refresh_sit_entry(sbi, NEW_ADDR, blkaddr); | ||
1604 | set_node_addr(sbi, &ni, blkaddr, false); | ||
1605 | |||
1606 | update_inode_page(inode); | ||
1607 | return true; | ||
1608 | } | ||
1609 | |||
1538 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | 1610 | int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) |
1539 | { | 1611 | { |
1540 | struct f2fs_inode *src, *dst; | 1612 | struct f2fs_inode *src, *dst; |
@@ -1567,7 +1639,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
1567 | 1639 | ||
1568 | if (unlikely(!inc_valid_node_count(sbi, NULL))) | 1640 | if (unlikely(!inc_valid_node_count(sbi, NULL))) |
1569 | WARN_ON(1); | 1641 | WARN_ON(1); |
1570 | set_node_addr(sbi, &new_ni, NEW_ADDR); | 1642 | set_node_addr(sbi, &new_ni, NEW_ADDR, false); |
1571 | inc_valid_inode_count(sbi); | 1643 | inc_valid_inode_count(sbi); |
1572 | f2fs_put_page(ipage, 1); | 1644 | f2fs_put_page(ipage, 1); |
1573 | return 0; | 1645 | return 0; |
@@ -1590,15 +1662,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, | |||
1590 | for (; page_idx < start + nrpages; page_idx++) { | 1662 | for (; page_idx < start + nrpages; page_idx++) { |
1591 | /* alloc temporal page for read node summary info*/ | 1663 | /* alloc temporal page for read node summary info*/ |
1592 | page = alloc_page(GFP_F2FS_ZERO); | 1664 | page = alloc_page(GFP_F2FS_ZERO); |
1593 | if (!page) { | 1665 | if (!page) |
1594 | struct page *tmp; | 1666 | break; |
1595 | list_for_each_entry_safe(page, tmp, pages, lru) { | ||
1596 | list_del(&page->lru); | ||
1597 | unlock_page(page); | ||
1598 | __free_pages(page, 0); | ||
1599 | } | ||
1600 | return -ENOMEM; | ||
1601 | } | ||
1602 | 1667 | ||
1603 | lock_page(page); | 1668 | lock_page(page); |
1604 | page->index = page_idx; | 1669 | page->index = page_idx; |
@@ -1609,7 +1674,8 @@ static int ra_sum_pages(struct f2fs_sb_info *sbi, struct list_head *pages, | |||
1609 | f2fs_submit_page_mbio(sbi, page, page->index, &fio); | 1674 | f2fs_submit_page_mbio(sbi, page, page->index, &fio); |
1610 | 1675 | ||
1611 | f2fs_submit_merged_bio(sbi, META, READ); | 1676 | f2fs_submit_merged_bio(sbi, META, READ); |
1612 | return 0; | 1677 | |
1678 | return page_idx - start; | ||
1613 | } | 1679 | } |
1614 | 1680 | ||
1615 | int restore_node_summary(struct f2fs_sb_info *sbi, | 1681 | int restore_node_summary(struct f2fs_sb_info *sbi, |
@@ -1628,15 +1694,17 @@ int restore_node_summary(struct f2fs_sb_info *sbi, | |||
1628 | addr = START_BLOCK(sbi, segno); | 1694 | addr = START_BLOCK(sbi, segno); |
1629 | sum_entry = &sum->entries[0]; | 1695 | sum_entry = &sum->entries[0]; |
1630 | 1696 | ||
1631 | for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { | 1697 | for (i = 0; !err && i < last_offset; i += nrpages, addr += nrpages) { |
1632 | nrpages = min(last_offset - i, bio_blocks); | 1698 | nrpages = min(last_offset - i, bio_blocks); |
1633 | 1699 | ||
1634 | /* read ahead node pages */ | 1700 | /* read ahead node pages */ |
1635 | err = ra_sum_pages(sbi, &page_list, addr, nrpages); | 1701 | nrpages = ra_sum_pages(sbi, &page_list, addr, nrpages); |
1636 | if (err) | 1702 | if (!nrpages) |
1637 | return err; | 1703 | return -ENOMEM; |
1638 | 1704 | ||
1639 | list_for_each_entry_safe(page, tmp, &page_list, lru) { | 1705 | list_for_each_entry_safe(page, tmp, &page_list, lru) { |
1706 | if (err) | ||
1707 | goto skip; | ||
1640 | 1708 | ||
1641 | lock_page(page); | 1709 | lock_page(page); |
1642 | if (unlikely(!PageUptodate(page))) { | 1710 | if (unlikely(!PageUptodate(page))) { |
@@ -1648,9 +1716,9 @@ int restore_node_summary(struct f2fs_sb_info *sbi, | |||
1648 | sum_entry->ofs_in_node = 0; | 1716 | sum_entry->ofs_in_node = 0; |
1649 | sum_entry++; | 1717 | sum_entry++; |
1650 | } | 1718 | } |
1651 | |||
1652 | list_del(&page->lru); | ||
1653 | unlock_page(page); | 1719 | unlock_page(page); |
1720 | skip: | ||
1721 | list_del(&page->lru); | ||
1654 | __free_pages(page, 0); | 1722 | __free_pages(page, 0); |
1655 | } | 1723 | } |
1656 | } | 1724 | } |
@@ -1709,7 +1777,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1709 | struct f2fs_nm_info *nm_i = NM_I(sbi); | 1777 | struct f2fs_nm_info *nm_i = NM_I(sbi); |
1710 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); | 1778 | struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); |
1711 | struct f2fs_summary_block *sum = curseg->sum_blk; | 1779 | struct f2fs_summary_block *sum = curseg->sum_blk; |
1712 | struct list_head *cur, *n; | 1780 | struct nat_entry *ne, *cur; |
1713 | struct page *page = NULL; | 1781 | struct page *page = NULL; |
1714 | struct f2fs_nat_block *nat_blk = NULL; | 1782 | struct f2fs_nat_block *nat_blk = NULL; |
1715 | nid_t start_nid = 0, end_nid = 0; | 1783 | nid_t start_nid = 0, end_nid = 0; |
@@ -1721,18 +1789,17 @@ void flush_nat_entries(struct f2fs_sb_info *sbi) | |||
1721 | mutex_lock(&curseg->curseg_mutex); | 1789 | mutex_lock(&curseg->curseg_mutex); |
1722 | 1790 | ||
1723 | /* 1) flush dirty nat caches */ | 1791 | /* 1) flush dirty nat caches */ |
1724 | list_for_each_safe(cur, n, &nm_i->dirty_nat_entries) { | 1792 | list_for_each_entry_safe(ne, cur, &nm_i->dirty_nat_entries, list) { |
1725 | struct nat_entry *ne; | ||
1726 | nid_t nid; | 1793 | nid_t nid; |
1727 | struct f2fs_nat_entry raw_ne; | 1794 | struct f2fs_nat_entry raw_ne; |
1728 | int offset = -1; | 1795 | int offset = -1; |
1729 | block_t new_blkaddr; | 1796 | block_t new_blkaddr; |
1730 | 1797 | ||
1731 | ne = list_entry(cur, struct nat_entry, list); | ||
1732 | nid = nat_get_nid(ne); | ||
1733 | |||
1734 | if (nat_get_blkaddr(ne) == NEW_ADDR) | 1798 | if (nat_get_blkaddr(ne) == NEW_ADDR) |
1735 | continue; | 1799 | continue; |
1800 | |||
1801 | nid = nat_get_nid(ne); | ||
1802 | |||
1736 | if (flushed) | 1803 | if (flushed) |
1737 | goto to_nat_page; | 1804 | goto to_nat_page; |
1738 | 1805 | ||
@@ -1783,16 +1850,12 @@ flush_now: | |||
1783 | } else { | 1850 | } else { |
1784 | write_lock(&nm_i->nat_tree_lock); | 1851 | write_lock(&nm_i->nat_tree_lock); |
1785 | __clear_nat_cache_dirty(nm_i, ne); | 1852 | __clear_nat_cache_dirty(nm_i, ne); |
1786 | ne->checkpointed = true; | ||
1787 | write_unlock(&nm_i->nat_tree_lock); | 1853 | write_unlock(&nm_i->nat_tree_lock); |
1788 | } | 1854 | } |
1789 | } | 1855 | } |
1790 | if (!flushed) | 1856 | if (!flushed) |
1791 | mutex_unlock(&curseg->curseg_mutex); | 1857 | mutex_unlock(&curseg->curseg_mutex); |
1792 | f2fs_put_page(page, 1); | 1858 | f2fs_put_page(page, 1); |
1793 | |||
1794 | /* 2) shrink nat caches if necessary */ | ||
1795 | try_to_free_nats(sbi, nm_i->nat_cnt - NM_WOUT_THRESHOLD); | ||
1796 | } | 1859 | } |
1797 | 1860 | ||
1798 | static int init_node_manager(struct f2fs_sb_info *sbi) | 1861 | static int init_node_manager(struct f2fs_sb_info *sbi) |
@@ -1807,10 +1870,14 @@ static int init_node_manager(struct f2fs_sb_info *sbi) | |||
1807 | /* segment_count_nat includes pair segment so divide to 2. */ | 1870 | /* segment_count_nat includes pair segment so divide to 2. */ |
1808 | nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; | 1871 | nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1; |
1809 | nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); | 1872 | nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg); |
1810 | nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; | 1873 | |
1874 | /* not used nids: 0, node, meta, (and root counted as valid node) */ | ||
1875 | nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks - 3; | ||
1811 | nm_i->fcnt = 0; | 1876 | nm_i->fcnt = 0; |
1812 | nm_i->nat_cnt = 0; | 1877 | nm_i->nat_cnt = 0; |
1878 | nm_i->ram_thresh = DEF_RAM_THRESHOLD; | ||
1813 | 1879 | ||
1880 | INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); | ||
1814 | INIT_LIST_HEAD(&nm_i->free_nid_list); | 1881 | INIT_LIST_HEAD(&nm_i->free_nid_list); |
1815 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); | 1882 | INIT_RADIX_TREE(&nm_i->nat_root, GFP_ATOMIC); |
1816 | INIT_LIST_HEAD(&nm_i->nat_entries); | 1883 | INIT_LIST_HEAD(&nm_i->nat_entries); |
@@ -1864,8 +1931,11 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
1864 | spin_lock(&nm_i->free_nid_list_lock); | 1931 | spin_lock(&nm_i->free_nid_list_lock); |
1865 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { | 1932 | list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { |
1866 | f2fs_bug_on(i->state == NID_ALLOC); | 1933 | f2fs_bug_on(i->state == NID_ALLOC); |
1867 | __del_from_free_nid_list(i); | 1934 | __del_from_free_nid_list(nm_i, i); |
1868 | nm_i->fcnt--; | 1935 | nm_i->fcnt--; |
1936 | spin_unlock(&nm_i->free_nid_list_lock); | ||
1937 | kmem_cache_free(free_nid_slab, i); | ||
1938 | spin_lock(&nm_i->free_nid_list_lock); | ||
1869 | } | 1939 | } |
1870 | f2fs_bug_on(nm_i->fcnt); | 1940 | f2fs_bug_on(nm_i->fcnt); |
1871 | spin_unlock(&nm_i->free_nid_list_lock); | 1941 | spin_unlock(&nm_i->free_nid_list_lock); |
@@ -1875,11 +1945,9 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
1875 | while ((found = __gang_lookup_nat_cache(nm_i, | 1945 | while ((found = __gang_lookup_nat_cache(nm_i, |
1876 | nid, NATVEC_SIZE, natvec))) { | 1946 | nid, NATVEC_SIZE, natvec))) { |
1877 | unsigned idx; | 1947 | unsigned idx; |
1878 | for (idx = 0; idx < found; idx++) { | 1948 | nid = nat_get_nid(natvec[found - 1]) + 1; |
1879 | struct nat_entry *e = natvec[idx]; | 1949 | for (idx = 0; idx < found; idx++) |
1880 | nid = nat_get_nid(e) + 1; | 1950 | __del_from_nat_cache(nm_i, natvec[idx]); |
1881 | __del_from_nat_cache(nm_i, e); | ||
1882 | } | ||
1883 | } | 1951 | } |
1884 | f2fs_bug_on(nm_i->nat_cnt); | 1952 | f2fs_bug_on(nm_i->nat_cnt); |
1885 | write_unlock(&nm_i->nat_tree_lock); | 1953 | write_unlock(&nm_i->nat_tree_lock); |
@@ -1892,12 +1960,12 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) | |||
1892 | int __init create_node_manager_caches(void) | 1960 | int __init create_node_manager_caches(void) |
1893 | { | 1961 | { |
1894 | nat_entry_slab = f2fs_kmem_cache_create("nat_entry", | 1962 | nat_entry_slab = f2fs_kmem_cache_create("nat_entry", |
1895 | sizeof(struct nat_entry), NULL); | 1963 | sizeof(struct nat_entry)); |
1896 | if (!nat_entry_slab) | 1964 | if (!nat_entry_slab) |
1897 | return -ENOMEM; | 1965 | return -ENOMEM; |
1898 | 1966 | ||
1899 | free_nid_slab = f2fs_kmem_cache_create("free_nid", | 1967 | free_nid_slab = f2fs_kmem_cache_create("free_nid", |
1900 | sizeof(struct free_nid), NULL); | 1968 | sizeof(struct free_nid)); |
1901 | if (!free_nid_slab) { | 1969 | if (!free_nid_slab) { |
1902 | kmem_cache_destroy(nat_entry_slab); | 1970 | kmem_cache_destroy(nat_entry_slab); |
1903 | return -ENOMEM; | 1971 | return -ENOMEM; |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index c4c79885c993..5decc1a375f0 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
@@ -17,14 +17,11 @@ | |||
17 | /* # of pages to perform readahead before building free nids */ | 17 | /* # of pages to perform readahead before building free nids */ |
18 | #define FREE_NID_PAGES 4 | 18 | #define FREE_NID_PAGES 4 |
19 | 19 | ||
20 | /* maximum # of free node ids to produce during build_free_nids */ | ||
21 | #define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) | ||
22 | |||
23 | /* maximum readahead size for node during getting data blocks */ | 20 | /* maximum readahead size for node during getting data blocks */ |
24 | #define MAX_RA_NODE 128 | 21 | #define MAX_RA_NODE 128 |
25 | 22 | ||
26 | /* maximum cached nat entries to manage memory footprint */ | 23 | /* control the memory footprint threshold (10MB per 1GB ram) */ |
27 | #define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) | 24 | #define DEF_RAM_THRESHOLD 10 |
28 | 25 | ||
29 | /* vector size for gang look-up from nat cache that consists of radix tree */ | 26 | /* vector size for gang look-up from nat cache that consists of radix tree */ |
30 | #define NATVEC_SIZE 64 | 27 | #define NATVEC_SIZE 64 |
@@ -45,6 +42,7 @@ struct node_info { | |||
45 | struct nat_entry { | 42 | struct nat_entry { |
46 | struct list_head list; /* for clean or dirty nat list */ | 43 | struct list_head list; /* for clean or dirty nat list */ |
47 | bool checkpointed; /* whether it is checkpointed or not */ | 44 | bool checkpointed; /* whether it is checkpointed or not */ |
45 | bool fsync_done; /* whether the latest node has fsync mark */ | ||
48 | struct node_info ni; /* in-memory node information */ | 46 | struct node_info ni; /* in-memory node information */ |
49 | }; | 47 | }; |
50 | 48 | ||
@@ -58,9 +56,15 @@ struct nat_entry { | |||
58 | #define nat_set_version(nat, v) (nat->ni.version = v) | 56 | #define nat_set_version(nat, v) (nat->ni.version = v) |
59 | 57 | ||
60 | #define __set_nat_cache_dirty(nm_i, ne) \ | 58 | #define __set_nat_cache_dirty(nm_i, ne) \ |
61 | list_move_tail(&ne->list, &nm_i->dirty_nat_entries); | 59 | do { \ |
60 | ne->checkpointed = false; \ | ||
61 | list_move_tail(&ne->list, &nm_i->dirty_nat_entries); \ | ||
62 | } while (0); | ||
62 | #define __clear_nat_cache_dirty(nm_i, ne) \ | 63 | #define __clear_nat_cache_dirty(nm_i, ne) \ |
63 | list_move_tail(&ne->list, &nm_i->nat_entries); | 64 | do { \ |
65 | ne->checkpointed = true; \ | ||
66 | list_move_tail(&ne->list, &nm_i->nat_entries); \ | ||
67 | } while (0); | ||
64 | #define inc_node_version(version) (++version) | 68 | #define inc_node_version(version) (++version) |
65 | 69 | ||
66 | static inline void node_info_from_raw_nat(struct node_info *ni, | 70 | static inline void node_info_from_raw_nat(struct node_info *ni, |
@@ -71,6 +75,11 @@ static inline void node_info_from_raw_nat(struct node_info *ni, | |||
71 | ni->version = raw_ne->version; | 75 | ni->version = raw_ne->version; |
72 | } | 76 | } |
73 | 77 | ||
78 | enum nid_type { | ||
79 | FREE_NIDS, /* indicates the free nid list */ | ||
80 | NAT_ENTRIES /* indicates the cached nat entry */ | ||
81 | }; | ||
82 | |||
74 | /* | 83 | /* |
75 | * For free nid mangement | 84 | * For free nid mangement |
76 | */ | 85 | */ |
@@ -236,7 +245,7 @@ static inline bool IS_DNODE(struct page *node_page) | |||
236 | { | 245 | { |
237 | unsigned int ofs = ofs_of_node(node_page); | 246 | unsigned int ofs = ofs_of_node(node_page); |
238 | 247 | ||
239 | if (ofs == XATTR_NODE_OFFSET) | 248 | if (f2fs_has_xattr_block(ofs)) |
240 | return false; | 249 | return false; |
241 | 250 | ||
242 | if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || | 251 | if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || |
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 976a7a934db5..b1ae89f0f44e 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
@@ -27,14 +27,12 @@ bool space_for_roll_forward(struct f2fs_sb_info *sbi) | |||
27 | static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | 27 | static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, |
28 | nid_t ino) | 28 | nid_t ino) |
29 | { | 29 | { |
30 | struct list_head *this; | ||
31 | struct fsync_inode_entry *entry; | 30 | struct fsync_inode_entry *entry; |
32 | 31 | ||
33 | list_for_each(this, head) { | 32 | list_for_each_entry(entry, head, list) |
34 | entry = list_entry(this, struct fsync_inode_entry, list); | ||
35 | if (entry->inode->i_ino == ino) | 33 | if (entry->inode->i_ino == ino) |
36 | return entry; | 34 | return entry; |
37 | } | 35 | |
38 | return NULL; | 36 | return NULL; |
39 | } | 37 | } |
40 | 38 | ||
@@ -136,7 +134,7 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
136 | 134 | ||
137 | /* get node pages in the current segment */ | 135 | /* get node pages in the current segment */ |
138 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); | 136 | curseg = CURSEG_I(sbi, CURSEG_WARM_NODE); |
139 | blkaddr = START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff; | 137 | blkaddr = NEXT_FREE_BLKADDR(sbi, curseg); |
140 | 138 | ||
141 | /* read node page */ | 139 | /* read node page */ |
142 | page = alloc_page(GFP_F2FS_ZERO); | 140 | page = alloc_page(GFP_F2FS_ZERO); |
@@ -218,13 +216,12 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
218 | { | 216 | { |
219 | struct seg_entry *sentry; | 217 | struct seg_entry *sentry; |
220 | unsigned int segno = GET_SEGNO(sbi, blkaddr); | 218 | unsigned int segno = GET_SEGNO(sbi, blkaddr); |
221 | unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & | 219 | unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
222 | (sbi->blocks_per_seg - 1); | 220 | struct f2fs_summary_block *sum_node; |
223 | struct f2fs_summary sum; | 221 | struct f2fs_summary sum; |
222 | struct page *sum_page, *node_page; | ||
224 | nid_t ino, nid; | 223 | nid_t ino, nid; |
225 | void *kaddr; | ||
226 | struct inode *inode; | 224 | struct inode *inode; |
227 | struct page *node_page; | ||
228 | unsigned int offset; | 225 | unsigned int offset; |
229 | block_t bidx; | 226 | block_t bidx; |
230 | int i; | 227 | int i; |
@@ -238,18 +235,15 @@ static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
238 | struct curseg_info *curseg = CURSEG_I(sbi, i); | 235 | struct curseg_info *curseg = CURSEG_I(sbi, i); |
239 | if (curseg->segno == segno) { | 236 | if (curseg->segno == segno) { |
240 | sum = curseg->sum_blk->entries[blkoff]; | 237 | sum = curseg->sum_blk->entries[blkoff]; |
241 | break; | 238 | goto got_it; |
242 | } | 239 | } |
243 | } | 240 | } |
244 | if (i > CURSEG_COLD_DATA) { | ||
245 | struct page *sum_page = get_sum_page(sbi, segno); | ||
246 | struct f2fs_summary_block *sum_node; | ||
247 | kaddr = page_address(sum_page); | ||
248 | sum_node = (struct f2fs_summary_block *)kaddr; | ||
249 | sum = sum_node->entries[blkoff]; | ||
250 | f2fs_put_page(sum_page, 1); | ||
251 | } | ||
252 | 241 | ||
242 | sum_page = get_sum_page(sbi, segno); | ||
243 | sum_node = (struct f2fs_summary_block *)page_address(sum_page); | ||
244 | sum = sum_node->entries[blkoff]; | ||
245 | f2fs_put_page(sum_page, 1); | ||
246 | got_it: | ||
253 | /* Use the locked dnode page and inode */ | 247 | /* Use the locked dnode page and inode */ |
254 | nid = le32_to_cpu(sum.nid); | 248 | nid = le32_to_cpu(sum.nid); |
255 | if (dn->inode->i_ino == nid) { | 249 | if (dn->inode->i_ino == nid) { |
@@ -301,6 +295,9 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
301 | if (recover_inline_data(inode, page)) | 295 | if (recover_inline_data(inode, page)) |
302 | goto out; | 296 | goto out; |
303 | 297 | ||
298 | if (recover_xattr_data(inode, page, blkaddr)) | ||
299 | goto out; | ||
300 | |||
304 | start = start_bidx_of_node(ofs_of_node(page), fi); | 301 | start = start_bidx_of_node(ofs_of_node(page), fi); |
305 | if (IS_INODE(page)) | 302 | if (IS_INODE(page)) |
306 | end = start + ADDRS_PER_INODE(fi); | 303 | end = start + ADDRS_PER_INODE(fi); |
@@ -317,7 +314,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
317 | goto out; | 314 | goto out; |
318 | } | 315 | } |
319 | 316 | ||
320 | wait_on_page_writeback(dn.node_page); | 317 | f2fs_wait_on_page_writeback(dn.node_page, NODE); |
321 | 318 | ||
322 | get_node_info(sbi, dn.nid, &ni); | 319 | get_node_info(sbi, dn.nid, &ni); |
323 | f2fs_bug_on(ni.ino != ino_of_node(page)); | 320 | f2fs_bug_on(ni.ino != ino_of_node(page)); |
@@ -437,7 +434,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
437 | bool need_writecp = false; | 434 | bool need_writecp = false; |
438 | 435 | ||
439 | fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", | 436 | fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry", |
440 | sizeof(struct fsync_inode_entry), NULL); | 437 | sizeof(struct fsync_inode_entry)); |
441 | if (!fsync_entry_slab) | 438 | if (!fsync_entry_slab) |
442 | return -ENOMEM; | 439 | return -ENOMEM; |
443 | 440 | ||
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7caac5f2ca9e..085f548be7a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
@@ -13,6 +13,7 @@ | |||
13 | #include <linux/bio.h> | 13 | #include <linux/bio.h> |
14 | #include <linux/blkdev.h> | 14 | #include <linux/blkdev.h> |
15 | #include <linux/prefetch.h> | 15 | #include <linux/prefetch.h> |
16 | #include <linux/kthread.h> | ||
16 | #include <linux/vmalloc.h> | 17 | #include <linux/vmalloc.h> |
17 | #include <linux/swap.h> | 18 | #include <linux/swap.h> |
18 | 19 | ||
@@ -24,6 +25,7 @@ | |||
24 | #define __reverse_ffz(x) __reverse_ffs(~(x)) | 25 | #define __reverse_ffz(x) __reverse_ffs(~(x)) |
25 | 26 | ||
26 | static struct kmem_cache *discard_entry_slab; | 27 | static struct kmem_cache *discard_entry_slab; |
28 | static struct kmem_cache *flush_cmd_slab; | ||
27 | 29 | ||
28 | /* | 30 | /* |
29 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since | 31 | * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since |
@@ -195,6 +197,73 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) | |||
195 | f2fs_sync_fs(sbi->sb, true); | 197 | f2fs_sync_fs(sbi->sb, true); |
196 | } | 198 | } |
197 | 199 | ||
200 | static int issue_flush_thread(void *data) | ||
201 | { | ||
202 | struct f2fs_sb_info *sbi = data; | ||
203 | struct f2fs_sm_info *sm_i = SM_I(sbi); | ||
204 | wait_queue_head_t *q = &sm_i->flush_wait_queue; | ||
205 | repeat: | ||
206 | if (kthread_should_stop()) | ||
207 | return 0; | ||
208 | |||
209 | spin_lock(&sm_i->issue_lock); | ||
210 | if (sm_i->issue_list) { | ||
211 | sm_i->dispatch_list = sm_i->issue_list; | ||
212 | sm_i->issue_list = sm_i->issue_tail = NULL; | ||
213 | } | ||
214 | spin_unlock(&sm_i->issue_lock); | ||
215 | |||
216 | if (sm_i->dispatch_list) { | ||
217 | struct bio *bio = bio_alloc(GFP_NOIO, 0); | ||
218 | struct flush_cmd *cmd, *next; | ||
219 | int ret; | ||
220 | |||
221 | bio->bi_bdev = sbi->sb->s_bdev; | ||
222 | ret = submit_bio_wait(WRITE_FLUSH, bio); | ||
223 | |||
224 | for (cmd = sm_i->dispatch_list; cmd; cmd = next) { | ||
225 | cmd->ret = ret; | ||
226 | next = cmd->next; | ||
227 | complete(&cmd->wait); | ||
228 | } | ||
229 | sm_i->dispatch_list = NULL; | ||
230 | } | ||
231 | |||
232 | wait_event_interruptible(*q, kthread_should_stop() || sm_i->issue_list); | ||
233 | goto repeat; | ||
234 | } | ||
235 | |||
236 | int f2fs_issue_flush(struct f2fs_sb_info *sbi) | ||
237 | { | ||
238 | struct f2fs_sm_info *sm_i = SM_I(sbi); | ||
239 | struct flush_cmd *cmd; | ||
240 | int ret; | ||
241 | |||
242 | if (!test_opt(sbi, FLUSH_MERGE)) | ||
243 | return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL); | ||
244 | |||
245 | cmd = f2fs_kmem_cache_alloc(flush_cmd_slab, GFP_ATOMIC); | ||
246 | cmd->next = NULL; | ||
247 | cmd->ret = 0; | ||
248 | init_completion(&cmd->wait); | ||
249 | |||
250 | spin_lock(&sm_i->issue_lock); | ||
251 | if (sm_i->issue_list) | ||
252 | sm_i->issue_tail->next = cmd; | ||
253 | else | ||
254 | sm_i->issue_list = cmd; | ||
255 | sm_i->issue_tail = cmd; | ||
256 | spin_unlock(&sm_i->issue_lock); | ||
257 | |||
258 | if (!sm_i->dispatch_list) | ||
259 | wake_up(&sm_i->flush_wait_queue); | ||
260 | |||
261 | wait_for_completion(&cmd->wait); | ||
262 | ret = cmd->ret; | ||
263 | kmem_cache_free(flush_cmd_slab, cmd); | ||
264 | return ret; | ||
265 | } | ||
266 | |||
198 | static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | 267 | static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, |
199 | enum dirty_type dirty_type) | 268 | enum dirty_type dirty_type) |
200 | { | 269 | { |
@@ -340,8 +409,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
340 | void clear_prefree_segments(struct f2fs_sb_info *sbi) | 409 | void clear_prefree_segments(struct f2fs_sb_info *sbi) |
341 | { | 410 | { |
342 | struct list_head *head = &(SM_I(sbi)->discard_list); | 411 | struct list_head *head = &(SM_I(sbi)->discard_list); |
343 | struct list_head *this, *next; | 412 | struct discard_entry *entry, *this; |
344 | struct discard_entry *entry; | ||
345 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 413 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
346 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; | 414 | unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; |
347 | unsigned int total_segs = TOTAL_SEGS(sbi); | 415 | unsigned int total_segs = TOTAL_SEGS(sbi); |
@@ -370,8 +438,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi) | |||
370 | mutex_unlock(&dirty_i->seglist_lock); | 438 | mutex_unlock(&dirty_i->seglist_lock); |
371 | 439 | ||
372 | /* send small discards */ | 440 | /* send small discards */ |
373 | list_for_each_safe(this, next, head) { | 441 | list_for_each_entry_safe(entry, this, head, list) { |
374 | entry = list_entry(this, struct discard_entry, list); | ||
375 | f2fs_issue_discard(sbi, entry->blkaddr, entry->len); | 442 | f2fs_issue_discard(sbi, entry->blkaddr, entry->len); |
376 | list_del(&entry->list); | 443 | list_del(&entry->list); |
377 | SM_I(sbi)->nr_discards -= entry->len; | 444 | SM_I(sbi)->nr_discards -= entry->len; |
@@ -405,7 +472,7 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
405 | 472 | ||
406 | se = get_seg_entry(sbi, segno); | 473 | se = get_seg_entry(sbi, segno); |
407 | new_vblocks = se->valid_blocks + del; | 474 | new_vblocks = se->valid_blocks + del; |
408 | offset = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & (sbi->blocks_per_seg - 1); | 475 | offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr); |
409 | 476 | ||
410 | f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || | 477 | f2fs_bug_on((new_vblocks >> (sizeof(unsigned short) << 3) || |
411 | (new_vblocks > sbi->blocks_per_seg))); | 478 | (new_vblocks > sbi->blocks_per_seg))); |
@@ -434,12 +501,14 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del) | |||
434 | get_sec_entry(sbi, segno)->valid_blocks += del; | 501 | get_sec_entry(sbi, segno)->valid_blocks += del; |
435 | } | 502 | } |
436 | 503 | ||
437 | static void refresh_sit_entry(struct f2fs_sb_info *sbi, | 504 | void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new) |
438 | block_t old_blkaddr, block_t new_blkaddr) | ||
439 | { | 505 | { |
440 | update_sit_entry(sbi, new_blkaddr, 1); | 506 | update_sit_entry(sbi, new, 1); |
441 | if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) | 507 | if (GET_SEGNO(sbi, old) != NULL_SEGNO) |
442 | update_sit_entry(sbi, old_blkaddr, -1); | 508 | update_sit_entry(sbi, old, -1); |
509 | |||
510 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old)); | ||
511 | locate_dirty_segment(sbi, GET_SEGNO(sbi, new)); | ||
443 | } | 512 | } |
444 | 513 | ||
445 | void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | 514 | void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) |
@@ -881,17 +950,15 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, | |||
881 | 950 | ||
882 | stat_inc_block_count(sbi, curseg); | 951 | stat_inc_block_count(sbi, curseg); |
883 | 952 | ||
953 | if (!__has_curseg_space(sbi, type)) | ||
954 | sit_i->s_ops->allocate_segment(sbi, type, false); | ||
884 | /* | 955 | /* |
885 | * SIT information should be updated before segment allocation, | 956 | * SIT information should be updated before segment allocation, |
886 | * since SSR needs latest valid block information. | 957 | * since SSR needs latest valid block information. |
887 | */ | 958 | */ |
888 | refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); | 959 | refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); |
889 | |||
890 | if (!__has_curseg_space(sbi, type)) | ||
891 | sit_i->s_ops->allocate_segment(sbi, type, false); | ||
892 | |||
893 | locate_dirty_segment(sbi, old_cursegno); | 960 | locate_dirty_segment(sbi, old_cursegno); |
894 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | 961 | |
895 | mutex_unlock(&sit_i->sentry_lock); | 962 | mutex_unlock(&sit_i->sentry_lock); |
896 | 963 | ||
897 | if (page && IS_NODESEG(type)) | 964 | if (page && IS_NODESEG(type)) |
@@ -987,14 +1054,11 @@ void recover_data_page(struct f2fs_sb_info *sbi, | |||
987 | change_curseg(sbi, type, true); | 1054 | change_curseg(sbi, type, true); |
988 | } | 1055 | } |
989 | 1056 | ||
990 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 1057 | curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); |
991 | (sbi->blocks_per_seg - 1); | ||
992 | __add_sum_entry(sbi, type, sum); | 1058 | __add_sum_entry(sbi, type, sum); |
993 | 1059 | ||
994 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | 1060 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); |
995 | |||
996 | locate_dirty_segment(sbi, old_cursegno); | 1061 | locate_dirty_segment(sbi, old_cursegno); |
997 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | ||
998 | 1062 | ||
999 | mutex_unlock(&sit_i->sentry_lock); | 1063 | mutex_unlock(&sit_i->sentry_lock); |
1000 | mutex_unlock(&curseg->curseg_mutex); | 1064 | mutex_unlock(&curseg->curseg_mutex); |
@@ -1028,8 +1092,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
1028 | curseg->next_segno = segno; | 1092 | curseg->next_segno = segno; |
1029 | change_curseg(sbi, type, true); | 1093 | change_curseg(sbi, type, true); |
1030 | } | 1094 | } |
1031 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 1095 | curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr); |
1032 | (sbi->blocks_per_seg - 1); | ||
1033 | __add_sum_entry(sbi, type, sum); | 1096 | __add_sum_entry(sbi, type, sum); |
1034 | 1097 | ||
1035 | /* change the current log to the next block addr in advance */ | 1098 | /* change the current log to the next block addr in advance */ |
@@ -1037,28 +1100,50 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
1037 | curseg->next_segno = next_segno; | 1100 | curseg->next_segno = next_segno; |
1038 | change_curseg(sbi, type, true); | 1101 | change_curseg(sbi, type, true); |
1039 | } | 1102 | } |
1040 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, next_blkaddr) & | 1103 | curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, next_blkaddr); |
1041 | (sbi->blocks_per_seg - 1); | ||
1042 | 1104 | ||
1043 | /* rewrite node page */ | 1105 | /* rewrite node page */ |
1044 | set_page_writeback(page); | 1106 | set_page_writeback(page); |
1045 | f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); | 1107 | f2fs_submit_page_mbio(sbi, page, new_blkaddr, &fio); |
1046 | f2fs_submit_merged_bio(sbi, NODE, WRITE); | 1108 | f2fs_submit_merged_bio(sbi, NODE, WRITE); |
1047 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | 1109 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); |
1048 | |||
1049 | locate_dirty_segment(sbi, old_cursegno); | 1110 | locate_dirty_segment(sbi, old_cursegno); |
1050 | locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr)); | ||
1051 | 1111 | ||
1052 | mutex_unlock(&sit_i->sentry_lock); | 1112 | mutex_unlock(&sit_i->sentry_lock); |
1053 | mutex_unlock(&curseg->curseg_mutex); | 1113 | mutex_unlock(&curseg->curseg_mutex); |
1054 | } | 1114 | } |
1055 | 1115 | ||
1116 | static inline bool is_merged_page(struct f2fs_sb_info *sbi, | ||
1117 | struct page *page, enum page_type type) | ||
1118 | { | ||
1119 | enum page_type btype = PAGE_TYPE_OF_BIO(type); | ||
1120 | struct f2fs_bio_info *io = &sbi->write_io[btype]; | ||
1121 | struct bio_vec *bvec; | ||
1122 | int i; | ||
1123 | |||
1124 | down_read(&io->io_rwsem); | ||
1125 | if (!io->bio) | ||
1126 | goto out; | ||
1127 | |||
1128 | bio_for_each_segment_all(bvec, io->bio, i) { | ||
1129 | if (page == bvec->bv_page) { | ||
1130 | up_read(&io->io_rwsem); | ||
1131 | return true; | ||
1132 | } | ||
1133 | } | ||
1134 | |||
1135 | out: | ||
1136 | up_read(&io->io_rwsem); | ||
1137 | return false; | ||
1138 | } | ||
1139 | |||
1056 | void f2fs_wait_on_page_writeback(struct page *page, | 1140 | void f2fs_wait_on_page_writeback(struct page *page, |
1057 | enum page_type type) | 1141 | enum page_type type) |
1058 | { | 1142 | { |
1059 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); | 1143 | struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); |
1060 | if (PageWriteback(page)) { | 1144 | if (PageWriteback(page)) { |
1061 | f2fs_submit_merged_bio(sbi, type, WRITE); | 1145 | if (is_merged_page(sbi, page, type)) |
1146 | f2fs_submit_merged_bio(sbi, type, WRITE); | ||
1062 | wait_on_page_writeback(page); | 1147 | wait_on_page_writeback(page); |
1063 | } | 1148 | } |
1064 | } | 1149 | } |
@@ -1167,9 +1252,12 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |||
1167 | ns->ofs_in_node = 0; | 1252 | ns->ofs_in_node = 0; |
1168 | } | 1253 | } |
1169 | } else { | 1254 | } else { |
1170 | if (restore_node_summary(sbi, segno, sum)) { | 1255 | int err; |
1256 | |||
1257 | err = restore_node_summary(sbi, segno, sum); | ||
1258 | if (err) { | ||
1171 | f2fs_put_page(new, 1); | 1259 | f2fs_put_page(new, 1); |
1172 | return -EINVAL; | 1260 | return err; |
1173 | } | 1261 | } |
1174 | } | 1262 | } |
1175 | } | 1263 | } |
@@ -1190,6 +1278,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type) | |||
1190 | static int restore_curseg_summaries(struct f2fs_sb_info *sbi) | 1278 | static int restore_curseg_summaries(struct f2fs_sb_info *sbi) |
1191 | { | 1279 | { |
1192 | int type = CURSEG_HOT_DATA; | 1280 | int type = CURSEG_HOT_DATA; |
1281 | int err; | ||
1193 | 1282 | ||
1194 | if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { | 1283 | if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) { |
1195 | /* restore for compacted data summary */ | 1284 | /* restore for compacted data summary */ |
@@ -1198,9 +1287,12 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi) | |||
1198 | type = CURSEG_HOT_NODE; | 1287 | type = CURSEG_HOT_NODE; |
1199 | } | 1288 | } |
1200 | 1289 | ||
1201 | for (; type <= CURSEG_COLD_NODE; type++) | 1290 | for (; type <= CURSEG_COLD_NODE; type++) { |
1202 | if (read_normal_summaries(sbi, type)) | 1291 | err = read_normal_summaries(sbi, type); |
1203 | return -EINVAL; | 1292 | if (err) |
1293 | return err; | ||
1294 | } | ||
1295 | |||
1204 | return 0; | 1296 | return 0; |
1205 | } | 1297 | } |
1206 | 1298 | ||
@@ -1583,47 +1675,6 @@ static int build_curseg(struct f2fs_sb_info *sbi) | |||
1583 | return restore_curseg_summaries(sbi); | 1675 | return restore_curseg_summaries(sbi); |
1584 | } | 1676 | } |
1585 | 1677 | ||
1586 | static int ra_sit_pages(struct f2fs_sb_info *sbi, int start, int nrpages) | ||
1587 | { | ||
1588 | struct address_space *mapping = META_MAPPING(sbi); | ||
1589 | struct page *page; | ||
1590 | block_t blk_addr, prev_blk_addr = 0; | ||
1591 | int sit_blk_cnt = SIT_BLK_CNT(sbi); | ||
1592 | int blkno = start; | ||
1593 | struct f2fs_io_info fio = { | ||
1594 | .type = META, | ||
1595 | .rw = READ_SYNC | REQ_META | REQ_PRIO | ||
1596 | }; | ||
1597 | |||
1598 | for (; blkno < start + nrpages && blkno < sit_blk_cnt; blkno++) { | ||
1599 | |||
1600 | blk_addr = current_sit_addr(sbi, blkno * SIT_ENTRY_PER_BLOCK); | ||
1601 | |||
1602 | if (blkno != start && prev_blk_addr + 1 != blk_addr) | ||
1603 | break; | ||
1604 | prev_blk_addr = blk_addr; | ||
1605 | repeat: | ||
1606 | page = grab_cache_page(mapping, blk_addr); | ||
1607 | if (!page) { | ||
1608 | cond_resched(); | ||
1609 | goto repeat; | ||
1610 | } | ||
1611 | if (PageUptodate(page)) { | ||
1612 | mark_page_accessed(page); | ||
1613 | f2fs_put_page(page, 1); | ||
1614 | continue; | ||
1615 | } | ||
1616 | |||
1617 | f2fs_submit_page_mbio(sbi, page, blk_addr, &fio); | ||
1618 | |||
1619 | mark_page_accessed(page); | ||
1620 | f2fs_put_page(page, 0); | ||
1621 | } | ||
1622 | |||
1623 | f2fs_submit_merged_bio(sbi, META, READ); | ||
1624 | return blkno - start; | ||
1625 | } | ||
1626 | |||
1627 | static void build_sit_entries(struct f2fs_sb_info *sbi) | 1678 | static void build_sit_entries(struct f2fs_sb_info *sbi) |
1628 | { | 1679 | { |
1629 | struct sit_info *sit_i = SIT_I(sbi); | 1680 | struct sit_info *sit_i = SIT_I(sbi); |
@@ -1635,7 +1686,7 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) | |||
1635 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | 1686 | int nrpages = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); |
1636 | 1687 | ||
1637 | do { | 1688 | do { |
1638 | readed = ra_sit_pages(sbi, start_blk, nrpages); | 1689 | readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT); |
1639 | 1690 | ||
1640 | start = start_blk * sit_i->sents_per_block; | 1691 | start = start_blk * sit_i->sents_per_block; |
1641 | end = (start_blk + readed) * sit_i->sents_per_block; | 1692 | end = (start_blk + readed) * sit_i->sents_per_block; |
@@ -1781,6 +1832,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1781 | { | 1832 | { |
1782 | struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); | 1833 | struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi); |
1783 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); | 1834 | struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); |
1835 | dev_t dev = sbi->sb->s_bdev->bd_dev; | ||
1784 | struct f2fs_sm_info *sm_info; | 1836 | struct f2fs_sm_info *sm_info; |
1785 | int err; | 1837 | int err; |
1786 | 1838 | ||
@@ -1799,7 +1851,8 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1799 | sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); | 1851 | sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count); |
1800 | sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); | 1852 | sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main); |
1801 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); | 1853 | sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr); |
1802 | sm_info->rec_prefree_segments = DEF_RECLAIM_PREFREE_SEGMENTS; | 1854 | sm_info->rec_prefree_segments = sm_info->main_segments * |
1855 | DEF_RECLAIM_PREFREE_SEGMENTS / 100; | ||
1803 | sm_info->ipu_policy = F2FS_IPU_DISABLE; | 1856 | sm_info->ipu_policy = F2FS_IPU_DISABLE; |
1804 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; | 1857 | sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; |
1805 | 1858 | ||
@@ -1807,6 +1860,16 @@ int build_segment_manager(struct f2fs_sb_info *sbi) | |||
1807 | sm_info->nr_discards = 0; | 1860 | sm_info->nr_discards = 0; |
1808 | sm_info->max_discards = 0; | 1861 | sm_info->max_discards = 0; |
1809 | 1862 | ||
1863 | if (test_opt(sbi, FLUSH_MERGE)) { | ||
1864 | spin_lock_init(&sm_info->issue_lock); | ||
1865 | init_waitqueue_head(&sm_info->flush_wait_queue); | ||
1866 | |||
1867 | sm_info->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi, | ||
1868 | "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev)); | ||
1869 | if (IS_ERR(sm_info->f2fs_issue_flush)) | ||
1870 | return PTR_ERR(sm_info->f2fs_issue_flush); | ||
1871 | } | ||
1872 | |||
1810 | err = build_sit_info(sbi); | 1873 | err = build_sit_info(sbi); |
1811 | if (err) | 1874 | if (err) |
1812 | return err; | 1875 | return err; |
@@ -1915,6 +1978,8 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) | |||
1915 | struct f2fs_sm_info *sm_info = SM_I(sbi); | 1978 | struct f2fs_sm_info *sm_info = SM_I(sbi); |
1916 | if (!sm_info) | 1979 | if (!sm_info) |
1917 | return; | 1980 | return; |
1981 | if (sm_info->f2fs_issue_flush) | ||
1982 | kthread_stop(sm_info->f2fs_issue_flush); | ||
1918 | destroy_dirty_segmap(sbi); | 1983 | destroy_dirty_segmap(sbi); |
1919 | destroy_curseg(sbi); | 1984 | destroy_curseg(sbi); |
1920 | destroy_free_segmap(sbi); | 1985 | destroy_free_segmap(sbi); |
@@ -1926,13 +1991,20 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) | |||
1926 | int __init create_segment_manager_caches(void) | 1991 | int __init create_segment_manager_caches(void) |
1927 | { | 1992 | { |
1928 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", | 1993 | discard_entry_slab = f2fs_kmem_cache_create("discard_entry", |
1929 | sizeof(struct discard_entry), NULL); | 1994 | sizeof(struct discard_entry)); |
1930 | if (!discard_entry_slab) | 1995 | if (!discard_entry_slab) |
1931 | return -ENOMEM; | 1996 | return -ENOMEM; |
1997 | flush_cmd_slab = f2fs_kmem_cache_create("flush_command", | ||
1998 | sizeof(struct flush_cmd)); | ||
1999 | if (!flush_cmd_slab) { | ||
2000 | kmem_cache_destroy(discard_entry_slab); | ||
2001 | return -ENOMEM; | ||
2002 | } | ||
1932 | return 0; | 2003 | return 0; |
1933 | } | 2004 | } |
1934 | 2005 | ||
1935 | void destroy_segment_manager_caches(void) | 2006 | void destroy_segment_manager_caches(void) |
1936 | { | 2007 | { |
1937 | kmem_cache_destroy(discard_entry_slab); | 2008 | kmem_cache_destroy(discard_entry_slab); |
2009 | kmem_cache_destroy(flush_cmd_slab); | ||
1938 | } | 2010 | } |
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5731682d7516..7091204680f4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h | |||
@@ -14,7 +14,7 @@ | |||
14 | #define NULL_SEGNO ((unsigned int)(~0)) | 14 | #define NULL_SEGNO ((unsigned int)(~0)) |
15 | #define NULL_SECNO ((unsigned int)(~0)) | 15 | #define NULL_SECNO ((unsigned int)(~0)) |
16 | 16 | ||
17 | #define DEF_RECLAIM_PREFREE_SEGMENTS 100 /* 200MB of prefree segments */ | 17 | #define DEF_RECLAIM_PREFREE_SEGMENTS 5 /* 5% over total segments */ |
18 | 18 | ||
19 | /* L: Logical segment # in volume, R: Relative segment # in main area */ | 19 | /* L: Logical segment # in volume, R: Relative segment # in main area */ |
20 | #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) | 20 | #define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) |
@@ -57,6 +57,9 @@ | |||
57 | ((blk_addr) - SM_I(sbi)->seg0_blkaddr) | 57 | ((blk_addr) - SM_I(sbi)->seg0_blkaddr) |
58 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ | 58 | #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ |
59 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) | 59 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) |
60 | #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ | ||
61 | (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) | ||
62 | |||
60 | #define GET_SEGNO(sbi, blk_addr) \ | 63 | #define GET_SEGNO(sbi, blk_addr) \ |
61 | (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ | 64 | (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ |
62 | NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ | 65 | NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ |
@@ -377,26 +380,12 @@ static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, | |||
377 | 380 | ||
378 | static inline block_t written_block_count(struct f2fs_sb_info *sbi) | 381 | static inline block_t written_block_count(struct f2fs_sb_info *sbi) |
379 | { | 382 | { |
380 | struct sit_info *sit_i = SIT_I(sbi); | 383 | return SIT_I(sbi)->written_valid_blocks; |
381 | block_t vblocks; | ||
382 | |||
383 | mutex_lock(&sit_i->sentry_lock); | ||
384 | vblocks = sit_i->written_valid_blocks; | ||
385 | mutex_unlock(&sit_i->sentry_lock); | ||
386 | |||
387 | return vblocks; | ||
388 | } | 384 | } |
389 | 385 | ||
390 | static inline unsigned int free_segments(struct f2fs_sb_info *sbi) | 386 | static inline unsigned int free_segments(struct f2fs_sb_info *sbi) |
391 | { | 387 | { |
392 | struct free_segmap_info *free_i = FREE_I(sbi); | 388 | return FREE_I(sbi)->free_segments; |
393 | unsigned int free_segs; | ||
394 | |||
395 | read_lock(&free_i->segmap_lock); | ||
396 | free_segs = free_i->free_segments; | ||
397 | read_unlock(&free_i->segmap_lock); | ||
398 | |||
399 | return free_segs; | ||
400 | } | 389 | } |
401 | 390 | ||
402 | static inline int reserved_segments(struct f2fs_sb_info *sbi) | 391 | static inline int reserved_segments(struct f2fs_sb_info *sbi) |
@@ -406,14 +395,7 @@ static inline int reserved_segments(struct f2fs_sb_info *sbi) | |||
406 | 395 | ||
407 | static inline unsigned int free_sections(struct f2fs_sb_info *sbi) | 396 | static inline unsigned int free_sections(struct f2fs_sb_info *sbi) |
408 | { | 397 | { |
409 | struct free_segmap_info *free_i = FREE_I(sbi); | 398 | return FREE_I(sbi)->free_sections; |
410 | unsigned int free_secs; | ||
411 | |||
412 | read_lock(&free_i->segmap_lock); | ||
413 | free_secs = free_i->free_sections; | ||
414 | read_unlock(&free_i->segmap_lock); | ||
415 | |||
416 | return free_secs; | ||
417 | } | 399 | } |
418 | 400 | ||
419 | static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) | 401 | static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) |
@@ -682,3 +664,46 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi) | |||
682 | struct request_queue *q = bdev_get_queue(bdev); | 664 | struct request_queue *q = bdev_get_queue(bdev); |
683 | return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); | 665 | return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q)); |
684 | } | 666 | } |
667 | |||
668 | /* | ||
669 | * It is very important to gather dirty pages and write at once, so that we can | ||
670 | * submit a big bio without interfering other data writes. | ||
671 | * By default, 512 pages for directory data, | ||
672 | * 512 pages (2MB) * 3 for three types of nodes, and | ||
673 | * max_bio_blocks for meta are set. | ||
674 | */ | ||
675 | static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type) | ||
676 | { | ||
677 | if (type == DATA) | ||
678 | return sbi->blocks_per_seg; | ||
679 | else if (type == NODE) | ||
680 | return 3 * sbi->blocks_per_seg; | ||
681 | else if (type == META) | ||
682 | return MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | ||
683 | else | ||
684 | return 0; | ||
685 | } | ||
686 | |||
687 | /* | ||
688 | * When writing pages, it'd better align nr_to_write for segment size. | ||
689 | */ | ||
690 | static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type, | ||
691 | struct writeback_control *wbc) | ||
692 | { | ||
693 | long nr_to_write, desired; | ||
694 | |||
695 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
696 | return 0; | ||
697 | |||
698 | nr_to_write = wbc->nr_to_write; | ||
699 | |||
700 | if (type == DATA) | ||
701 | desired = 4096; | ||
702 | else if (type == NODE) | ||
703 | desired = 3 * max_hw_blocks(sbi); | ||
704 | else | ||
705 | desired = MAX_BIO_BLOCKS(max_hw_blocks(sbi)); | ||
706 | |||
707 | wbc->nr_to_write = desired; | ||
708 | return desired - nr_to_write; | ||
709 | } | ||
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 1a85f83abd53..c756923a7302 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
@@ -51,6 +51,7 @@ enum { | |||
51 | Opt_disable_ext_identify, | 51 | Opt_disable_ext_identify, |
52 | Opt_inline_xattr, | 52 | Opt_inline_xattr, |
53 | Opt_inline_data, | 53 | Opt_inline_data, |
54 | Opt_flush_merge, | ||
54 | Opt_err, | 55 | Opt_err, |
55 | }; | 56 | }; |
56 | 57 | ||
@@ -67,6 +68,7 @@ static match_table_t f2fs_tokens = { | |||
67 | {Opt_disable_ext_identify, "disable_ext_identify"}, | 68 | {Opt_disable_ext_identify, "disable_ext_identify"}, |
68 | {Opt_inline_xattr, "inline_xattr"}, | 69 | {Opt_inline_xattr, "inline_xattr"}, |
69 | {Opt_inline_data, "inline_data"}, | 70 | {Opt_inline_data, "inline_data"}, |
71 | {Opt_flush_merge, "flush_merge"}, | ||
70 | {Opt_err, NULL}, | 72 | {Opt_err, NULL}, |
71 | }; | 73 | }; |
72 | 74 | ||
@@ -74,6 +76,7 @@ static match_table_t f2fs_tokens = { | |||
74 | enum { | 76 | enum { |
75 | GC_THREAD, /* struct f2fs_gc_thread */ | 77 | GC_THREAD, /* struct f2fs_gc_thread */ |
76 | SM_INFO, /* struct f2fs_sm_info */ | 78 | SM_INFO, /* struct f2fs_sm_info */ |
79 | NM_INFO, /* struct f2fs_nm_info */ | ||
77 | F2FS_SBI, /* struct f2fs_sb_info */ | 80 | F2FS_SBI, /* struct f2fs_sb_info */ |
78 | }; | 81 | }; |
79 | 82 | ||
@@ -92,6 +95,8 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type) | |||
92 | return (unsigned char *)sbi->gc_thread; | 95 | return (unsigned char *)sbi->gc_thread; |
93 | else if (struct_type == SM_INFO) | 96 | else if (struct_type == SM_INFO) |
94 | return (unsigned char *)SM_I(sbi); | 97 | return (unsigned char *)SM_I(sbi); |
98 | else if (struct_type == NM_INFO) | ||
99 | return (unsigned char *)NM_I(sbi); | ||
95 | else if (struct_type == F2FS_SBI) | 100 | else if (struct_type == F2FS_SBI) |
96 | return (unsigned char *)sbi; | 101 | return (unsigned char *)sbi; |
97 | return NULL; | 102 | return NULL; |
@@ -183,7 +188,9 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, reclaim_segments, rec_prefree_segments); | |||
183 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); | 188 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, max_small_discards, max_discards); |
184 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); | 189 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); |
185 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); | 190 | F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); |
191 | F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); | ||
186 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); | 192 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search); |
193 | F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level); | ||
187 | 194 | ||
188 | #define ATTR_LIST(name) (&f2fs_attr_##name.attr) | 195 | #define ATTR_LIST(name) (&f2fs_attr_##name.attr) |
189 | static struct attribute *f2fs_attrs[] = { | 196 | static struct attribute *f2fs_attrs[] = { |
@@ -196,6 +203,8 @@ static struct attribute *f2fs_attrs[] = { | |||
196 | ATTR_LIST(ipu_policy), | 203 | ATTR_LIST(ipu_policy), |
197 | ATTR_LIST(min_ipu_util), | 204 | ATTR_LIST(min_ipu_util), |
198 | ATTR_LIST(max_victim_search), | 205 | ATTR_LIST(max_victim_search), |
206 | ATTR_LIST(dir_level), | ||
207 | ATTR_LIST(ram_thresh), | ||
199 | NULL, | 208 | NULL, |
200 | }; | 209 | }; |
201 | 210 | ||
@@ -256,9 +265,9 @@ static int parse_options(struct super_block *sb, char *options) | |||
256 | 265 | ||
257 | if (!name) | 266 | if (!name) |
258 | return -ENOMEM; | 267 | return -ENOMEM; |
259 | if (!strncmp(name, "on", 2)) | 268 | if (strlen(name) == 2 && !strncmp(name, "on", 2)) |
260 | set_opt(sbi, BG_GC); | 269 | set_opt(sbi, BG_GC); |
261 | else if (!strncmp(name, "off", 3)) | 270 | else if (strlen(name) == 3 && !strncmp(name, "off", 3)) |
262 | clear_opt(sbi, BG_GC); | 271 | clear_opt(sbi, BG_GC); |
263 | else { | 272 | else { |
264 | kfree(name); | 273 | kfree(name); |
@@ -327,6 +336,9 @@ static int parse_options(struct super_block *sb, char *options) | |||
327 | case Opt_inline_data: | 336 | case Opt_inline_data: |
328 | set_opt(sbi, INLINE_DATA); | 337 | set_opt(sbi, INLINE_DATA); |
329 | break; | 338 | break; |
339 | case Opt_flush_merge: | ||
340 | set_opt(sbi, FLUSH_MERGE); | ||
341 | break; | ||
330 | default: | 342 | default: |
331 | f2fs_msg(sb, KERN_ERR, | 343 | f2fs_msg(sb, KERN_ERR, |
332 | "Unrecognized mount option \"%s\" or missing value", | 344 | "Unrecognized mount option \"%s\" or missing value", |
@@ -353,12 +365,16 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb) | |||
353 | fi->i_current_depth = 1; | 365 | fi->i_current_depth = 1; |
354 | fi->i_advise = 0; | 366 | fi->i_advise = 0; |
355 | rwlock_init(&fi->ext.ext_lock); | 367 | rwlock_init(&fi->ext.ext_lock); |
368 | init_rwsem(&fi->i_sem); | ||
356 | 369 | ||
357 | set_inode_flag(fi, FI_NEW_INODE); | 370 | set_inode_flag(fi, FI_NEW_INODE); |
358 | 371 | ||
359 | if (test_opt(F2FS_SB(sb), INLINE_XATTR)) | 372 | if (test_opt(F2FS_SB(sb), INLINE_XATTR)) |
360 | set_inode_flag(fi, FI_INLINE_XATTR); | 373 | set_inode_flag(fi, FI_INLINE_XATTR); |
361 | 374 | ||
375 | /* Will be used by directory only */ | ||
376 | fi->i_dir_level = F2FS_SB(sb)->dir_level; | ||
377 | |||
362 | return &fi->vfs_inode; | 378 | return &fi->vfs_inode; |
363 | } | 379 | } |
364 | 380 | ||
@@ -526,6 +542,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
526 | seq_puts(seq, ",disable_ext_identify"); | 542 | seq_puts(seq, ",disable_ext_identify"); |
527 | if (test_opt(sbi, INLINE_DATA)) | 543 | if (test_opt(sbi, INLINE_DATA)) |
528 | seq_puts(seq, ",inline_data"); | 544 | seq_puts(seq, ",inline_data"); |
545 | if (test_opt(sbi, FLUSH_MERGE)) | ||
546 | seq_puts(seq, ",flush_merge"); | ||
529 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); | 547 | seq_printf(seq, ",active_logs=%u", sbi->active_logs); |
530 | 548 | ||
531 | return 0; | 549 | return 0; |
@@ -539,13 +557,22 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) | |||
539 | le32_to_cpu(sbi->raw_super->segment_count_main); | 557 | le32_to_cpu(sbi->raw_super->segment_count_main); |
540 | int i; | 558 | int i; |
541 | 559 | ||
560 | seq_puts(seq, "format: segment_type|valid_blocks\n" | ||
561 | "segment_type(0:HD, 1:WD, 2:CD, 3:HN, 4:WN, 5:CN)\n"); | ||
562 | |||
542 | for (i = 0; i < total_segs; i++) { | 563 | for (i = 0; i < total_segs; i++) { |
543 | seq_printf(seq, "%u", get_valid_blocks(sbi, i, 1)); | 564 | struct seg_entry *se = get_seg_entry(sbi, i); |
544 | if (i != 0 && (i % 10) == 0) | 565 | |
545 | seq_puts(seq, "\n"); | 566 | if ((i % 10) == 0) |
567 | seq_printf(seq, "%-5d", i); | ||
568 | seq_printf(seq, "%d|%-3u", se->type, | ||
569 | get_valid_blocks(sbi, i, 1)); | ||
570 | if ((i % 10) == 9 || i == (total_segs - 1)) | ||
571 | seq_putc(seq, '\n'); | ||
546 | else | 572 | else |
547 | seq_puts(seq, " "); | 573 | seq_putc(seq, ' '); |
548 | } | 574 | } |
575 | |||
549 | return 0; | 576 | return 0; |
550 | } | 577 | } |
551 | 578 | ||
@@ -568,6 +595,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data) | |||
568 | struct f2fs_mount_info org_mount_opt; | 595 | struct f2fs_mount_info org_mount_opt; |
569 | int err, active_logs; | 596 | int err, active_logs; |
570 | 597 | ||
598 | sync_filesystem(sb); | ||
599 | |||
571 | /* | 600 | /* |
572 | * Save the old mount options in case we | 601 | * Save the old mount options in case we |
573 | * need to restore them. | 602 | * need to restore them. |
@@ -638,6 +667,8 @@ static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | |||
638 | 667 | ||
639 | if (unlikely(ino < F2FS_ROOT_INO(sbi))) | 668 | if (unlikely(ino < F2FS_ROOT_INO(sbi))) |
640 | return ERR_PTR(-ESTALE); | 669 | return ERR_PTR(-ESTALE); |
670 | if (unlikely(ino >= NM_I(sbi)->max_nid)) | ||
671 | return ERR_PTR(-ESTALE); | ||
641 | 672 | ||
642 | /* | 673 | /* |
643 | * f2fs_iget isn't quite right if the inode is currently unallocated! | 674 | * f2fs_iget isn't quite right if the inode is currently unallocated! |
@@ -785,6 +816,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) | |||
785 | 816 | ||
786 | for (i = 0; i < NR_COUNT_TYPE; i++) | 817 | for (i = 0; i < NR_COUNT_TYPE; i++) |
787 | atomic_set(&sbi->nr_pages[i], 0); | 818 | atomic_set(&sbi->nr_pages[i], 0); |
819 | |||
820 | sbi->dir_level = DEF_DIR_LEVEL; | ||
788 | } | 821 | } |
789 | 822 | ||
790 | /* | 823 | /* |
@@ -896,11 +929,11 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
896 | sbi->por_doing = false; | 929 | sbi->por_doing = false; |
897 | spin_lock_init(&sbi->stat_lock); | 930 | spin_lock_init(&sbi->stat_lock); |
898 | 931 | ||
899 | mutex_init(&sbi->read_io.io_mutex); | 932 | init_rwsem(&sbi->read_io.io_rwsem); |
900 | sbi->read_io.sbi = sbi; | 933 | sbi->read_io.sbi = sbi; |
901 | sbi->read_io.bio = NULL; | 934 | sbi->read_io.bio = NULL; |
902 | for (i = 0; i < NR_PAGE_TYPE; i++) { | 935 | for (i = 0; i < NR_PAGE_TYPE; i++) { |
903 | mutex_init(&sbi->write_io[i].io_mutex); | 936 | init_rwsem(&sbi->write_io[i].io_rwsem); |
904 | sbi->write_io[i].sbi = sbi; | 937 | sbi->write_io[i].sbi = sbi; |
905 | sbi->write_io[i].bio = NULL; | 938 | sbi->write_io[i].bio = NULL; |
906 | } | 939 | } |
@@ -989,28 +1022,9 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
989 | goto free_root_inode; | 1022 | goto free_root_inode; |
990 | } | 1023 | } |
991 | 1024 | ||
992 | /* recover fsynced data */ | ||
993 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | ||
994 | err = recover_fsync_data(sbi); | ||
995 | if (err) | ||
996 | f2fs_msg(sb, KERN_ERR, | ||
997 | "Cannot recover all fsync data errno=%ld", err); | ||
998 | } | ||
999 | |||
1000 | /* | ||
1001 | * If filesystem is not mounted as read-only then | ||
1002 | * do start the gc_thread. | ||
1003 | */ | ||
1004 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1005 | /* After POR, we can run background GC thread.*/ | ||
1006 | err = start_gc_thread(sbi); | ||
1007 | if (err) | ||
1008 | goto free_gc; | ||
1009 | } | ||
1010 | |||
1011 | err = f2fs_build_stats(sbi); | 1025 | err = f2fs_build_stats(sbi); |
1012 | if (err) | 1026 | if (err) |
1013 | goto free_gc; | 1027 | goto free_root_inode; |
1014 | 1028 | ||
1015 | if (f2fs_proc_root) | 1029 | if (f2fs_proc_root) |
1016 | sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); | 1030 | sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); |
@@ -1032,17 +1046,36 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
1032 | err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, | 1046 | err = kobject_init_and_add(&sbi->s_kobj, &f2fs_ktype, NULL, |
1033 | "%s", sb->s_id); | 1047 | "%s", sb->s_id); |
1034 | if (err) | 1048 | if (err) |
1035 | goto fail; | 1049 | goto free_proc; |
1050 | |||
1051 | /* recover fsynced data */ | ||
1052 | if (!test_opt(sbi, DISABLE_ROLL_FORWARD)) { | ||
1053 | err = recover_fsync_data(sbi); | ||
1054 | if (err) | ||
1055 | f2fs_msg(sb, KERN_ERR, | ||
1056 | "Cannot recover all fsync data errno=%ld", err); | ||
1057 | } | ||
1036 | 1058 | ||
1059 | /* | ||
1060 | * If filesystem is not mounted as read-only then | ||
1061 | * do start the gc_thread. | ||
1062 | */ | ||
1063 | if (!(sb->s_flags & MS_RDONLY)) { | ||
1064 | /* After POR, we can run background GC thread.*/ | ||
1065 | err = start_gc_thread(sbi); | ||
1066 | if (err) | ||
1067 | goto free_kobj; | ||
1068 | } | ||
1037 | return 0; | 1069 | return 0; |
1038 | fail: | 1070 | |
1071 | free_kobj: | ||
1072 | kobject_del(&sbi->s_kobj); | ||
1073 | free_proc: | ||
1039 | if (sbi->s_proc) { | 1074 | if (sbi->s_proc) { |
1040 | remove_proc_entry("segment_info", sbi->s_proc); | 1075 | remove_proc_entry("segment_info", sbi->s_proc); |
1041 | remove_proc_entry(sb->s_id, f2fs_proc_root); | 1076 | remove_proc_entry(sb->s_id, f2fs_proc_root); |
1042 | } | 1077 | } |
1043 | f2fs_destroy_stats(sbi); | 1078 | f2fs_destroy_stats(sbi); |
1044 | free_gc: | ||
1045 | stop_gc_thread(sbi); | ||
1046 | free_root_inode: | 1079 | free_root_inode: |
1047 | dput(sb->s_root); | 1080 | dput(sb->s_root); |
1048 | sb->s_root = NULL; | 1081 | sb->s_root = NULL; |
@@ -1082,7 +1115,7 @@ MODULE_ALIAS_FS("f2fs"); | |||
1082 | static int __init init_inodecache(void) | 1115 | static int __init init_inodecache(void) |
1083 | { | 1116 | { |
1084 | f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", | 1117 | f2fs_inode_cachep = f2fs_kmem_cache_create("f2fs_inode_cache", |
1085 | sizeof(struct f2fs_inode_info), NULL); | 1118 | sizeof(struct f2fs_inode_info)); |
1086 | if (!f2fs_inode_cachep) | 1119 | if (!f2fs_inode_cachep) |
1087 | return -ENOMEM; | 1120 | return -ENOMEM; |
1088 | return 0; | 1121 | return 0; |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 89d0422a91a8..503c2451131e 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
@@ -275,7 +275,7 @@ static void *read_all_xattrs(struct inode *inode, struct page *ipage) | |||
275 | 275 | ||
276 | inline_size = inline_xattr_size(inode); | 276 | inline_size = inline_xattr_size(inode); |
277 | 277 | ||
278 | txattr_addr = kzalloc(inline_size + size, GFP_KERNEL); | 278 | txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); |
279 | if (!txattr_addr) | 279 | if (!txattr_addr) |
280 | return NULL; | 280 | return NULL; |
281 | 281 | ||
@@ -407,6 +407,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | |||
407 | if (name == NULL) | 407 | if (name == NULL) |
408 | return -EINVAL; | 408 | return -EINVAL; |
409 | name_len = strlen(name); | 409 | name_len = strlen(name); |
410 | if (name_len > F2FS_NAME_LEN) | ||
411 | return -ERANGE; | ||
410 | 412 | ||
411 | base_addr = read_all_xattrs(inode, NULL); | 413 | base_addr = read_all_xattrs(inode, NULL); |
412 | if (!base_addr) | 414 | if (!base_addr) |
@@ -590,7 +592,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |||
590 | f2fs_balance_fs(sbi); | 592 | f2fs_balance_fs(sbi); |
591 | 593 | ||
592 | f2fs_lock_op(sbi); | 594 | f2fs_lock_op(sbi); |
595 | /* protect xattr_ver */ | ||
596 | down_write(&F2FS_I(inode)->i_sem); | ||
593 | err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); | 597 | err = __f2fs_setxattr(inode, name_index, name, value, value_len, ipage); |
598 | up_write(&F2FS_I(inode)->i_sem); | ||
594 | f2fs_unlock_op(sbi); | 599 | f2fs_unlock_op(sbi); |
595 | 600 | ||
596 | return err; | 601 | return err; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 854b578f6695..b3361fe2bcb5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(fat_build_inode); | |||
490 | 490 | ||
491 | static void fat_evict_inode(struct inode *inode) | 491 | static void fat_evict_inode(struct inode *inode) |
492 | { | 492 | { |
493 | truncate_inode_pages(&inode->i_data, 0); | 493 | truncate_inode_pages_final(&inode->i_data); |
494 | if (!inode->i_nlink) { | 494 | if (!inode->i_nlink) { |
495 | inode->i_size = 0; | 495 | inode->i_size = 0; |
496 | fat_truncate_blocks(inode, 0); | 496 | fat_truncate_blocks(inode, 0); |
@@ -635,6 +635,8 @@ static int fat_remount(struct super_block *sb, int *flags, char *data) | |||
635 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 635 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
636 | *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); | 636 | *flags |= MS_NODIRATIME | (sbi->options.isvfat ? 0 : MS_NOATIME); |
637 | 637 | ||
638 | sync_filesystem(sb); | ||
639 | |||
638 | /* make sure we update state on remount. */ | 640 | /* make sure we update state on remount. */ |
639 | new_rdonly = *flags & MS_RDONLY; | 641 | new_rdonly = *flags & MS_RDONLY; |
640 | if (new_rdonly != (sb->s_flags & MS_RDONLY)) { | 642 | if (new_rdonly != (sb->s_flags & MS_RDONLY)) { |
diff --git a/fs/fcntl.c b/fs/fcntl.c index ef6866592a0f..9ead1596399a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c | |||
@@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, | |||
272 | case F_SETFL: | 272 | case F_SETFL: |
273 | err = setfl(fd, filp, arg); | 273 | err = setfl(fd, filp, arg); |
274 | break; | 274 | break; |
275 | #if BITS_PER_LONG != 32 | ||
276 | /* 32-bit arches must use fcntl64() */ | ||
277 | case F_GETLKP: | ||
278 | #endif | ||
275 | case F_GETLK: | 279 | case F_GETLK: |
276 | err = fcntl_getlk(filp, (struct flock __user *) arg); | 280 | err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); |
277 | break; | 281 | break; |
282 | #if BITS_PER_LONG != 32 | ||
283 | /* 32-bit arches must use fcntl64() */ | ||
284 | case F_SETLKP: | ||
285 | case F_SETLKPW: | ||
286 | #endif | ||
287 | /* Fallthrough */ | ||
278 | case F_SETLK: | 288 | case F_SETLK: |
279 | case F_SETLKW: | 289 | case F_SETLKW: |
280 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); | 290 | err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); |
@@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, | |||
388 | goto out1; | 398 | goto out1; |
389 | 399 | ||
390 | switch (cmd) { | 400 | switch (cmd) { |
391 | case F_GETLK64: | 401 | case F_GETLK64: |
392 | err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); | 402 | case F_GETLKP: |
393 | break; | 403 | err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); |
394 | case F_SETLK64: | 404 | break; |
395 | case F_SETLKW64: | 405 | case F_SETLK64: |
396 | err = fcntl_setlk64(fd, f.file, cmd, | 406 | case F_SETLKW64: |
397 | (struct flock64 __user *) arg); | 407 | case F_SETLKP: |
398 | break; | 408 | case F_SETLKPW: |
399 | default: | 409 | err = fcntl_setlk64(fd, f.file, cmd, |
400 | err = do_fcntl(fd, cmd, arg, f.file); | 410 | (struct flock64 __user *) arg); |
401 | break; | 411 | break; |
412 | default: | ||
413 | err = do_fcntl(fd, cmd, arg, f.file); | ||
414 | break; | ||
402 | } | 415 | } |
403 | out1: | 416 | out1: |
404 | fdput(f); | 417 | fdput(f); |
@@ -497,7 +497,7 @@ repeat: | |||
497 | error = fd; | 497 | error = fd; |
498 | #if 1 | 498 | #if 1 |
499 | /* Sanity check */ | 499 | /* Sanity check */ |
500 | if (rcu_dereference_raw(fdt->fd[fd]) != NULL) { | 500 | if (rcu_access_pointer(fdt->fd[fd]) != NULL) { |
501 | printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); | 501 | printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd); |
502 | rcu_assign_pointer(fdt->fd[fd], NULL); | 502 | rcu_assign_pointer(fdt->fd[fd], NULL); |
503 | } | 503 | } |
diff --git a/fs/file_table.c b/fs/file_table.c index 5b24008ea4f6..01071c4d752e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c | |||
@@ -235,7 +235,7 @@ static void __fput(struct file *file) | |||
235 | * in the file cleanup chain. | 235 | * in the file cleanup chain. |
236 | */ | 236 | */ |
237 | eventpoll_release(file); | 237 | eventpoll_release(file); |
238 | locks_remove_flock(file); | 238 | locks_remove_file(file); |
239 | 239 | ||
240 | if (unlikely(file->f_flags & FASYNC)) { | 240 | if (unlikely(file->f_flags & FASYNC)) { |
241 | if (file->f_op->fasync) | 241 | if (file->f_op->fasync) |
diff --git a/fs/filesystems.c b/fs/filesystems.c index 92567d95ba6a..5797d45a78cb 100644 --- a/fs/filesystems.c +++ b/fs/filesystems.c | |||
@@ -121,6 +121,7 @@ int unregister_filesystem(struct file_system_type * fs) | |||
121 | 121 | ||
122 | EXPORT_SYMBOL(unregister_filesystem); | 122 | EXPORT_SYMBOL(unregister_filesystem); |
123 | 123 | ||
124 | #ifdef CONFIG_SYSFS_SYSCALL | ||
124 | static int fs_index(const char __user * __name) | 125 | static int fs_index(const char __user * __name) |
125 | { | 126 | { |
126 | struct file_system_type * tmp; | 127 | struct file_system_type * tmp; |
@@ -199,6 +200,7 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2) | |||
199 | } | 200 | } |
200 | return retval; | 201 | return retval; |
201 | } | 202 | } |
203 | #endif | ||
202 | 204 | ||
203 | int __init get_filesystem_list(char *buf) | 205 | int __init get_filesystem_list(char *buf) |
204 | { | 206 | { |
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index f47df72cef17..363e3ae25f6b 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c | |||
@@ -354,7 +354,7 @@ static void vxfs_i_callback(struct rcu_head *head) | |||
354 | void | 354 | void |
355 | vxfs_evict_inode(struct inode *ip) | 355 | vxfs_evict_inode(struct inode *ip) |
356 | { | 356 | { |
357 | truncate_inode_pages(&ip->i_data, 0); | 357 | truncate_inode_pages_final(&ip->i_data); |
358 | clear_inode(ip); | 358 | clear_inode(ip); |
359 | call_rcu(&ip->i_rcu, vxfs_i_callback); | 359 | call_rcu(&ip->i_rcu, vxfs_i_callback); |
360 | } | 360 | } |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 25d4099a4aea..99c7f0a37af4 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
@@ -192,7 +192,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp) | |||
192 | * vxfs_lookup - lookup pathname component | 192 | * vxfs_lookup - lookup pathname component |
193 | * @dip: dir in which we lookup | 193 | * @dip: dir in which we lookup |
194 | * @dp: dentry we lookup | 194 | * @dp: dentry we lookup |
195 | * @nd: lookup nameidata | 195 | * @flags: lookup flags |
196 | * | 196 | * |
197 | * Description: | 197 | * Description: |
198 | * vxfs_lookup tries to lookup the pathname component described | 198 | * vxfs_lookup tries to lookup the pathname component described |
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c index e37eb274e492..7ca8c75d50d3 100644 --- a/fs/freevxfs/vxfs_super.c +++ b/fs/freevxfs/vxfs_super.c | |||
@@ -124,6 +124,7 @@ vxfs_statfs(struct dentry *dentry, struct kstatfs *bufp) | |||
124 | 124 | ||
125 | static int vxfs_remount(struct super_block *sb, int *flags, char *data) | 125 | static int vxfs_remount(struct super_block *sb, int *flags, char *data) |
126 | { | 126 | { |
127 | sync_filesystem(sb); | ||
127 | *flags |= MS_RDONLY; | 128 | *flags |= MS_RDONLY; |
128 | return 0; | 129 | return 0; |
129 | } | 130 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d754e3cf99a8..be568b7311d6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
@@ -89,16 +89,31 @@ static inline struct inode *wb_inode(struct list_head *head) | |||
89 | #define CREATE_TRACE_POINTS | 89 | #define CREATE_TRACE_POINTS |
90 | #include <trace/events/writeback.h> | 90 | #include <trace/events/writeback.h> |
91 | 91 | ||
92 | EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage); | ||
93 | |||
94 | static void bdi_wakeup_thread(struct backing_dev_info *bdi) | ||
95 | { | ||
96 | spin_lock_bh(&bdi->wb_lock); | ||
97 | if (test_bit(BDI_registered, &bdi->state)) | ||
98 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | ||
99 | spin_unlock_bh(&bdi->wb_lock); | ||
100 | } | ||
101 | |||
92 | static void bdi_queue_work(struct backing_dev_info *bdi, | 102 | static void bdi_queue_work(struct backing_dev_info *bdi, |
93 | struct wb_writeback_work *work) | 103 | struct wb_writeback_work *work) |
94 | { | 104 | { |
95 | trace_writeback_queue(bdi, work); | 105 | trace_writeback_queue(bdi, work); |
96 | 106 | ||
97 | spin_lock_bh(&bdi->wb_lock); | 107 | spin_lock_bh(&bdi->wb_lock); |
108 | if (!test_bit(BDI_registered, &bdi->state)) { | ||
109 | if (work->done) | ||
110 | complete(work->done); | ||
111 | goto out_unlock; | ||
112 | } | ||
98 | list_add_tail(&work->list, &bdi->work_list); | 113 | list_add_tail(&work->list, &bdi->work_list); |
99 | spin_unlock_bh(&bdi->wb_lock); | ||
100 | |||
101 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 114 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); |
115 | out_unlock: | ||
116 | spin_unlock_bh(&bdi->wb_lock); | ||
102 | } | 117 | } |
103 | 118 | ||
104 | static void | 119 | static void |
@@ -114,7 +129,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, | |||
114 | work = kzalloc(sizeof(*work), GFP_ATOMIC); | 129 | work = kzalloc(sizeof(*work), GFP_ATOMIC); |
115 | if (!work) { | 130 | if (!work) { |
116 | trace_writeback_nowork(bdi); | 131 | trace_writeback_nowork(bdi); |
117 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 132 | bdi_wakeup_thread(bdi); |
118 | return; | 133 | return; |
119 | } | 134 | } |
120 | 135 | ||
@@ -161,7 +176,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) | |||
161 | * writeback as soon as there is no other work to do. | 176 | * writeback as soon as there is no other work to do. |
162 | */ | 177 | */ |
163 | trace_writeback_wake_background(bdi); | 178 | trace_writeback_wake_background(bdi); |
164 | mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); | 179 | bdi_wakeup_thread(bdi); |
165 | } | 180 | } |
166 | 181 | ||
167 | /* | 182 | /* |
@@ -1017,7 +1032,7 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
1017 | current->flags |= PF_SWAPWRITE; | 1032 | current->flags |= PF_SWAPWRITE; |
1018 | 1033 | ||
1019 | if (likely(!current_is_workqueue_rescuer() || | 1034 | if (likely(!current_is_workqueue_rescuer() || |
1020 | list_empty(&bdi->bdi_list))) { | 1035 | !test_bit(BDI_registered, &bdi->state))) { |
1021 | /* | 1036 | /* |
1022 | * The normal path. Keep writing back @bdi until its | 1037 | * The normal path. Keep writing back @bdi until its |
1023 | * work_list is empty. Note that this path is also taken | 1038 | * work_list is empty. Note that this path is also taken |
@@ -1039,10 +1054,10 @@ void bdi_writeback_workfn(struct work_struct *work) | |||
1039 | trace_writeback_pages_written(pages_written); | 1054 | trace_writeback_pages_written(pages_written); |
1040 | } | 1055 | } |
1041 | 1056 | ||
1042 | if (!list_empty(&bdi->work_list) || | 1057 | if (!list_empty(&bdi->work_list)) |
1043 | (wb_has_dirty_io(wb) && dirty_writeback_interval)) | 1058 | mod_delayed_work(bdi_wq, &wb->dwork, 0); |
1044 | queue_delayed_work(bdi_wq, &wb->dwork, | 1059 | else if (wb_has_dirty_io(wb) && dirty_writeback_interval) |
1045 | msecs_to_jiffies(dirty_writeback_interval * 10)); | 1060 | bdi_wakeup_thread_delayed(bdi); |
1046 | 1061 | ||
1047 | current->flags &= ~PF_SWAPWRITE; | 1062 | current->flags &= ~PF_SWAPWRITE; |
1048 | } | 1063 | } |
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index b96a49b37d66..13b691a8a7d2 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c | |||
@@ -95,7 +95,7 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, | |||
95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; | 95 | struct iovec iov = { .iov_base = buf, .iov_len = count }; |
96 | struct fuse_io_priv io = { .async = 0, .file = file }; | 96 | struct fuse_io_priv io = { .async = 0, .file = file }; |
97 | 97 | ||
98 | return fuse_direct_io(&io, &iov, 1, count, &pos, 0); | 98 | return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); |
99 | } | 99 | } |
100 | 100 | ||
101 | static ssize_t cuse_write(struct file *file, const char __user *buf, | 101 | static ssize_t cuse_write(struct file *file, const char __user *buf, |
@@ -109,7 +109,8 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, | |||
109 | * No locking or generic_write_checks(), the server is | 109 | * No locking or generic_write_checks(), the server is |
110 | * responsible for locking and sanity checks. | 110 | * responsible for locking and sanity checks. |
111 | */ | 111 | */ |
112 | return fuse_direct_io(&io, &iov, 1, count, &pos, 1); | 112 | return fuse_direct_io(&io, &iov, 1, count, &pos, |
113 | FUSE_DIO_WRITE | FUSE_DIO_CUSE); | ||
113 | } | 114 | } |
114 | 115 | ||
115 | static int cuse_open(struct inode *inode, struct file *file) | 116 | static int cuse_open(struct inode *inode, struct file *file) |
@@ -568,7 +569,7 @@ static ssize_t cuse_class_waiting_show(struct device *dev, | |||
568 | 569 | ||
569 | return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); | 570 | return sprintf(buf, "%d\n", atomic_read(&cc->fc.num_waiting)); |
570 | } | 571 | } |
571 | static DEVICE_ATTR(waiting, S_IFREG | 0400, cuse_class_waiting_show, NULL); | 572 | static DEVICE_ATTR(waiting, 0400, cuse_class_waiting_show, NULL); |
572 | 573 | ||
573 | static ssize_t cuse_class_abort_store(struct device *dev, | 574 | static ssize_t cuse_class_abort_store(struct device *dev, |
574 | struct device_attribute *attr, | 575 | struct device_attribute *attr, |
@@ -579,7 +580,7 @@ static ssize_t cuse_class_abort_store(struct device *dev, | |||
579 | fuse_abort_conn(&cc->fc); | 580 | fuse_abort_conn(&cc->fc); |
580 | return count; | 581 | return count; |
581 | } | 582 | } |
582 | static DEVICE_ATTR(abort, S_IFREG | 0200, NULL, cuse_class_abort_store); | 583 | static DEVICE_ATTR(abort, 0200, NULL, cuse_class_abort_store); |
583 | 584 | ||
584 | static struct attribute *cuse_class_dev_attrs[] = { | 585 | static struct attribute *cuse_class_dev_attrs[] = { |
585 | &dev_attr_waiting.attr, | 586 | &dev_attr_waiting.attr, |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 1d1292c581c3..5b4e035b364c 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
@@ -839,6 +839,14 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, | |||
839 | struct kstat *stat) | 839 | struct kstat *stat) |
840 | { | 840 | { |
841 | unsigned int blkbits; | 841 | unsigned int blkbits; |
842 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
843 | |||
844 | /* see the comment in fuse_change_attributes() */ | ||
845 | if (fc->writeback_cache && S_ISREG(inode->i_mode)) { | ||
846 | attr->size = i_size_read(inode); | ||
847 | attr->mtime = inode->i_mtime.tv_sec; | ||
848 | attr->mtimensec = inode->i_mtime.tv_nsec; | ||
849 | } | ||
842 | 850 | ||
843 | stat->dev = inode->i_sb->s_dev; | 851 | stat->dev = inode->i_sb->s_dev; |
844 | stat->ino = attr->ino; | 852 | stat->ino = attr->ino; |
@@ -1477,12 +1485,16 @@ static long fuse_dir_compat_ioctl(struct file *file, unsigned int cmd, | |||
1477 | FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); | 1485 | FUSE_IOCTL_COMPAT | FUSE_IOCTL_DIR); |
1478 | } | 1486 | } |
1479 | 1487 | ||
1480 | static bool update_mtime(unsigned ivalid) | 1488 | static bool update_mtime(unsigned ivalid, bool trust_local_mtime) |
1481 | { | 1489 | { |
1482 | /* Always update if mtime is explicitly set */ | 1490 | /* Always update if mtime is explicitly set */ |
1483 | if (ivalid & ATTR_MTIME_SET) | 1491 | if (ivalid & ATTR_MTIME_SET) |
1484 | return true; | 1492 | return true; |
1485 | 1493 | ||
1494 | /* Or if kernel i_mtime is the official one */ | ||
1495 | if (trust_local_mtime) | ||
1496 | return true; | ||
1497 | |||
1486 | /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ | 1498 | /* If it's an open(O_TRUNC) or an ftruncate(), don't update */ |
1487 | if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) | 1499 | if ((ivalid & ATTR_SIZE) && (ivalid & (ATTR_OPEN | ATTR_FILE))) |
1488 | return false; | 1500 | return false; |
@@ -1491,7 +1503,8 @@ static bool update_mtime(unsigned ivalid) | |||
1491 | return true; | 1503 | return true; |
1492 | } | 1504 | } |
1493 | 1505 | ||
1494 | static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | 1506 | static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg, |
1507 | bool trust_local_mtime) | ||
1495 | { | 1508 | { |
1496 | unsigned ivalid = iattr->ia_valid; | 1509 | unsigned ivalid = iattr->ia_valid; |
1497 | 1510 | ||
@@ -1510,11 +1523,11 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) | |||
1510 | if (!(ivalid & ATTR_ATIME_SET)) | 1523 | if (!(ivalid & ATTR_ATIME_SET)) |
1511 | arg->valid |= FATTR_ATIME_NOW; | 1524 | arg->valid |= FATTR_ATIME_NOW; |
1512 | } | 1525 | } |
1513 | if ((ivalid & ATTR_MTIME) && update_mtime(ivalid)) { | 1526 | if ((ivalid & ATTR_MTIME) && update_mtime(ivalid, trust_local_mtime)) { |
1514 | arg->valid |= FATTR_MTIME; | 1527 | arg->valid |= FATTR_MTIME; |
1515 | arg->mtime = iattr->ia_mtime.tv_sec; | 1528 | arg->mtime = iattr->ia_mtime.tv_sec; |
1516 | arg->mtimensec = iattr->ia_mtime.tv_nsec; | 1529 | arg->mtimensec = iattr->ia_mtime.tv_nsec; |
1517 | if (!(ivalid & ATTR_MTIME_SET)) | 1530 | if (!(ivalid & ATTR_MTIME_SET) && !trust_local_mtime) |
1518 | arg->valid |= FATTR_MTIME_NOW; | 1531 | arg->valid |= FATTR_MTIME_NOW; |
1519 | } | 1532 | } |
1520 | } | 1533 | } |
@@ -1563,6 +1576,63 @@ void fuse_release_nowrite(struct inode *inode) | |||
1563 | spin_unlock(&fc->lock); | 1576 | spin_unlock(&fc->lock); |
1564 | } | 1577 | } |
1565 | 1578 | ||
1579 | static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req, | ||
1580 | struct inode *inode, | ||
1581 | struct fuse_setattr_in *inarg_p, | ||
1582 | struct fuse_attr_out *outarg_p) | ||
1583 | { | ||
1584 | req->in.h.opcode = FUSE_SETATTR; | ||
1585 | req->in.h.nodeid = get_node_id(inode); | ||
1586 | req->in.numargs = 1; | ||
1587 | req->in.args[0].size = sizeof(*inarg_p); | ||
1588 | req->in.args[0].value = inarg_p; | ||
1589 | req->out.numargs = 1; | ||
1590 | if (fc->minor < 9) | ||
1591 | req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; | ||
1592 | else | ||
1593 | req->out.args[0].size = sizeof(*outarg_p); | ||
1594 | req->out.args[0].value = outarg_p; | ||
1595 | } | ||
1596 | |||
1597 | /* | ||
1598 | * Flush inode->i_mtime to the server | ||
1599 | */ | ||
1600 | int fuse_flush_mtime(struct file *file, bool nofail) | ||
1601 | { | ||
1602 | struct inode *inode = file->f_mapping->host; | ||
1603 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1604 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
1605 | struct fuse_req *req = NULL; | ||
1606 | struct fuse_setattr_in inarg; | ||
1607 | struct fuse_attr_out outarg; | ||
1608 | int err; | ||
1609 | |||
1610 | if (nofail) { | ||
1611 | req = fuse_get_req_nofail_nopages(fc, file); | ||
1612 | } else { | ||
1613 | req = fuse_get_req_nopages(fc); | ||
1614 | if (IS_ERR(req)) | ||
1615 | return PTR_ERR(req); | ||
1616 | } | ||
1617 | |||
1618 | memset(&inarg, 0, sizeof(inarg)); | ||
1619 | memset(&outarg, 0, sizeof(outarg)); | ||
1620 | |||
1621 | inarg.valid |= FATTR_MTIME; | ||
1622 | inarg.mtime = inode->i_mtime.tv_sec; | ||
1623 | inarg.mtimensec = inode->i_mtime.tv_nsec; | ||
1624 | |||
1625 | fuse_setattr_fill(fc, req, inode, &inarg, &outarg); | ||
1626 | fuse_request_send(fc, req); | ||
1627 | err = req->out.h.error; | ||
1628 | fuse_put_request(fc, req); | ||
1629 | |||
1630 | if (!err) | ||
1631 | clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
1632 | |||
1633 | return err; | ||
1634 | } | ||
1635 | |||
1566 | /* | 1636 | /* |
1567 | * Set attributes, and at the same time refresh them. | 1637 | * Set attributes, and at the same time refresh them. |
1568 | * | 1638 | * |
@@ -1580,8 +1650,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1580 | struct fuse_setattr_in inarg; | 1650 | struct fuse_setattr_in inarg; |
1581 | struct fuse_attr_out outarg; | 1651 | struct fuse_attr_out outarg; |
1582 | bool is_truncate = false; | 1652 | bool is_truncate = false; |
1653 | bool is_wb = fc->writeback_cache; | ||
1583 | loff_t oldsize; | 1654 | loff_t oldsize; |
1584 | int err; | 1655 | int err; |
1656 | bool trust_local_mtime = is_wb && S_ISREG(inode->i_mode); | ||
1585 | 1657 | ||
1586 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) | 1658 | if (!(fc->flags & FUSE_DEFAULT_PERMISSIONS)) |
1587 | attr->ia_valid |= ATTR_FORCE; | 1659 | attr->ia_valid |= ATTR_FORCE; |
@@ -1610,7 +1682,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1610 | 1682 | ||
1611 | memset(&inarg, 0, sizeof(inarg)); | 1683 | memset(&inarg, 0, sizeof(inarg)); |
1612 | memset(&outarg, 0, sizeof(outarg)); | 1684 | memset(&outarg, 0, sizeof(outarg)); |
1613 | iattr_to_fattr(attr, &inarg); | 1685 | iattr_to_fattr(attr, &inarg, trust_local_mtime); |
1614 | if (file) { | 1686 | if (file) { |
1615 | struct fuse_file *ff = file->private_data; | 1687 | struct fuse_file *ff = file->private_data; |
1616 | inarg.valid |= FATTR_FH; | 1688 | inarg.valid |= FATTR_FH; |
@@ -1621,17 +1693,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1621 | inarg.valid |= FATTR_LOCKOWNER; | 1693 | inarg.valid |= FATTR_LOCKOWNER; |
1622 | inarg.lock_owner = fuse_lock_owner_id(fc, current->files); | 1694 | inarg.lock_owner = fuse_lock_owner_id(fc, current->files); |
1623 | } | 1695 | } |
1624 | req->in.h.opcode = FUSE_SETATTR; | 1696 | fuse_setattr_fill(fc, req, inode, &inarg, &outarg); |
1625 | req->in.h.nodeid = get_node_id(inode); | ||
1626 | req->in.numargs = 1; | ||
1627 | req->in.args[0].size = sizeof(inarg); | ||
1628 | req->in.args[0].value = &inarg; | ||
1629 | req->out.numargs = 1; | ||
1630 | if (fc->minor < 9) | ||
1631 | req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE; | ||
1632 | else | ||
1633 | req->out.args[0].size = sizeof(outarg); | ||
1634 | req->out.args[0].value = &outarg; | ||
1635 | fuse_request_send(fc, req); | 1697 | fuse_request_send(fc, req); |
1636 | err = req->out.h.error; | 1698 | err = req->out.h.error; |
1637 | fuse_put_request(fc, req); | 1699 | fuse_put_request(fc, req); |
@@ -1648,10 +1710,18 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1648 | } | 1710 | } |
1649 | 1711 | ||
1650 | spin_lock(&fc->lock); | 1712 | spin_lock(&fc->lock); |
1713 | /* the kernel maintains i_mtime locally */ | ||
1714 | if (trust_local_mtime && (attr->ia_valid & ATTR_MTIME)) { | ||
1715 | inode->i_mtime = attr->ia_mtime; | ||
1716 | clear_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
1717 | } | ||
1718 | |||
1651 | fuse_change_attributes_common(inode, &outarg.attr, | 1719 | fuse_change_attributes_common(inode, &outarg.attr, |
1652 | attr_timeout(&outarg)); | 1720 | attr_timeout(&outarg)); |
1653 | oldsize = inode->i_size; | 1721 | oldsize = inode->i_size; |
1654 | i_size_write(inode, outarg.attr.size); | 1722 | /* see the comment in fuse_change_attributes() */ |
1723 | if (!is_wb || is_truncate || !S_ISREG(inode->i_mode)) | ||
1724 | i_size_write(inode, outarg.attr.size); | ||
1655 | 1725 | ||
1656 | if (is_truncate) { | 1726 | if (is_truncate) { |
1657 | /* NOTE: this may release/reacquire fc->lock */ | 1727 | /* NOTE: this may release/reacquire fc->lock */ |
@@ -1663,7 +1733,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, | |||
1663 | * Only call invalidate_inode_pages2() after removing | 1733 | * Only call invalidate_inode_pages2() after removing |
1664 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. | 1734 | * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock. |
1665 | */ | 1735 | */ |
1666 | if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | 1736 | if ((is_truncate || !is_wb) && |
1737 | S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) { | ||
1667 | truncate_pagecache(inode, outarg.attr.size); | 1738 | truncate_pagecache(inode, outarg.attr.size); |
1668 | invalidate_inode_pages2(inode->i_mapping); | 1739 | invalidate_inode_pages2(inode->i_mapping); |
1669 | } | 1740 | } |
@@ -1875,6 +1946,17 @@ static int fuse_removexattr(struct dentry *entry, const char *name) | |||
1875 | return err; | 1946 | return err; |
1876 | } | 1947 | } |
1877 | 1948 | ||
1949 | static int fuse_update_time(struct inode *inode, struct timespec *now, | ||
1950 | int flags) | ||
1951 | { | ||
1952 | if (flags & S_MTIME) { | ||
1953 | inode->i_mtime = *now; | ||
1954 | set_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state); | ||
1955 | BUG_ON(!S_ISREG(inode->i_mode)); | ||
1956 | } | ||
1957 | return 0; | ||
1958 | } | ||
1959 | |||
1878 | static const struct inode_operations fuse_dir_inode_operations = { | 1960 | static const struct inode_operations fuse_dir_inode_operations = { |
1879 | .lookup = fuse_lookup, | 1961 | .lookup = fuse_lookup, |
1880 | .mkdir = fuse_mkdir, | 1962 | .mkdir = fuse_mkdir, |
@@ -1914,6 +1996,7 @@ static const struct inode_operations fuse_common_inode_operations = { | |||
1914 | .getxattr = fuse_getxattr, | 1996 | .getxattr = fuse_getxattr, |
1915 | .listxattr = fuse_listxattr, | 1997 | .listxattr = fuse_listxattr, |
1916 | .removexattr = fuse_removexattr, | 1998 | .removexattr = fuse_removexattr, |
1999 | .update_time = fuse_update_time, | ||
1917 | }; | 2000 | }; |
1918 | 2001 | ||
1919 | static const struct inode_operations fuse_symlink_inode_operations = { | 2002 | static const struct inode_operations fuse_symlink_inode_operations = { |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 77bcc303c3ae..48992cac714b 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
@@ -188,6 +188,22 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | |||
188 | } | 188 | } |
189 | EXPORT_SYMBOL_GPL(fuse_do_open); | 189 | EXPORT_SYMBOL_GPL(fuse_do_open); |
190 | 190 | ||
191 | static void fuse_link_write_file(struct file *file) | ||
192 | { | ||
193 | struct inode *inode = file_inode(file); | ||
194 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
195 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
196 | struct fuse_file *ff = file->private_data; | ||
197 | /* | ||
198 | * file may be written through mmap, so chain it onto the | ||
199 | * inodes's write_file list | ||
200 | */ | ||
201 | spin_lock(&fc->lock); | ||
202 | if (list_empty(&ff->write_entry)) | ||
203 | list_add(&ff->write_entry, &fi->write_files); | ||
204 | spin_unlock(&fc->lock); | ||
205 | } | ||
206 | |||
191 | void fuse_finish_open(struct inode *inode, struct file *file) | 207 | void fuse_finish_open(struct inode *inode, struct file *file) |
192 | { | 208 | { |
193 | struct fuse_file *ff = file->private_data; | 209 | struct fuse_file *ff = file->private_data; |
@@ -208,6 +224,8 @@ void fuse_finish_open(struct inode *inode, struct file *file) | |||
208 | spin_unlock(&fc->lock); | 224 | spin_unlock(&fc->lock); |
209 | fuse_invalidate_attr(inode); | 225 | fuse_invalidate_attr(inode); |
210 | } | 226 | } |
227 | if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache) | ||
228 | fuse_link_write_file(file); | ||
211 | } | 229 | } |
212 | 230 | ||
213 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) | 231 | int fuse_open_common(struct inode *inode, struct file *file, bool isdir) |
@@ -292,6 +310,15 @@ static int fuse_open(struct inode *inode, struct file *file) | |||
292 | 310 | ||
293 | static int fuse_release(struct inode *inode, struct file *file) | 311 | static int fuse_release(struct inode *inode, struct file *file) |
294 | { | 312 | { |
313 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
314 | |||
315 | /* see fuse_vma_close() for !writeback_cache case */ | ||
316 | if (fc->writeback_cache) | ||
317 | filemap_write_and_wait(file->f_mapping); | ||
318 | |||
319 | if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) | ||
320 | fuse_flush_mtime(file, true); | ||
321 | |||
295 | fuse_release_common(file, FUSE_RELEASE); | 322 | fuse_release_common(file, FUSE_RELEASE); |
296 | 323 | ||
297 | /* return value is ignored by VFS */ | 324 | /* return value is ignored by VFS */ |
@@ -333,12 +360,13 @@ u64 fuse_lock_owner_id(struct fuse_conn *fc, fl_owner_t id) | |||
333 | } | 360 | } |
334 | 361 | ||
335 | /* | 362 | /* |
336 | * Check if page is under writeback | 363 | * Check if any page in a range is under writeback |
337 | * | 364 | * |
338 | * This is currently done by walking the list of writepage requests | 365 | * This is currently done by walking the list of writepage requests |
339 | * for the inode, which can be pretty inefficient. | 366 | * for the inode, which can be pretty inefficient. |
340 | */ | 367 | */ |
341 | static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | 368 | static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from, |
369 | pgoff_t idx_to) | ||
342 | { | 370 | { |
343 | struct fuse_conn *fc = get_fuse_conn(inode); | 371 | struct fuse_conn *fc = get_fuse_conn(inode); |
344 | struct fuse_inode *fi = get_fuse_inode(inode); | 372 | struct fuse_inode *fi = get_fuse_inode(inode); |
@@ -351,8 +379,8 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
351 | 379 | ||
352 | BUG_ON(req->inode != inode); | 380 | BUG_ON(req->inode != inode); |
353 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; | 381 | curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT; |
354 | if (curr_index <= index && | 382 | if (idx_from < curr_index + req->num_pages && |
355 | index < curr_index + req->num_pages) { | 383 | curr_index <= idx_to) { |
356 | found = true; | 384 | found = true; |
357 | break; | 385 | break; |
358 | } | 386 | } |
@@ -362,6 +390,11 @@ static bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | |||
362 | return found; | 390 | return found; |
363 | } | 391 | } |
364 | 392 | ||
393 | static inline bool fuse_page_is_writeback(struct inode *inode, pgoff_t index) | ||
394 | { | ||
395 | return fuse_range_is_writeback(inode, index, index); | ||
396 | } | ||
397 | |||
365 | /* | 398 | /* |
366 | * Wait for page writeback to be completed. | 399 | * Wait for page writeback to be completed. |
367 | * | 400 | * |
@@ -376,6 +409,21 @@ static int fuse_wait_on_page_writeback(struct inode *inode, pgoff_t index) | |||
376 | return 0; | 409 | return 0; |
377 | } | 410 | } |
378 | 411 | ||
412 | /* | ||
413 | * Wait for all pending writepages on the inode to finish. | ||
414 | * | ||
415 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
416 | * and waiting for all sent writes to complete. | ||
417 | * | ||
418 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
419 | * could conflict with truncation. | ||
420 | */ | ||
421 | static void fuse_sync_writes(struct inode *inode) | ||
422 | { | ||
423 | fuse_set_nowrite(inode); | ||
424 | fuse_release_nowrite(inode); | ||
425 | } | ||
426 | |||
379 | static int fuse_flush(struct file *file, fl_owner_t id) | 427 | static int fuse_flush(struct file *file, fl_owner_t id) |
380 | { | 428 | { |
381 | struct inode *inode = file_inode(file); | 429 | struct inode *inode = file_inode(file); |
@@ -391,6 +439,14 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
391 | if (fc->no_flush) | 439 | if (fc->no_flush) |
392 | return 0; | 440 | return 0; |
393 | 441 | ||
442 | err = filemap_write_and_wait(file->f_mapping); | ||
443 | if (err) | ||
444 | return err; | ||
445 | |||
446 | mutex_lock(&inode->i_mutex); | ||
447 | fuse_sync_writes(inode); | ||
448 | mutex_unlock(&inode->i_mutex); | ||
449 | |||
394 | req = fuse_get_req_nofail_nopages(fc, file); | 450 | req = fuse_get_req_nofail_nopages(fc, file); |
395 | memset(&inarg, 0, sizeof(inarg)); | 451 | memset(&inarg, 0, sizeof(inarg)); |
396 | inarg.fh = ff->fh; | 452 | inarg.fh = ff->fh; |
@@ -411,21 +467,6 @@ static int fuse_flush(struct file *file, fl_owner_t id) | |||
411 | return err; | 467 | return err; |
412 | } | 468 | } |
413 | 469 | ||
414 | /* | ||
415 | * Wait for all pending writepages on the inode to finish. | ||
416 | * | ||
417 | * This is currently done by blocking further writes with FUSE_NOWRITE | ||
418 | * and waiting for all sent writes to complete. | ||
419 | * | ||
420 | * This must be called under i_mutex, otherwise the FUSE_NOWRITE usage | ||
421 | * could conflict with truncation. | ||
422 | */ | ||
423 | static void fuse_sync_writes(struct inode *inode) | ||
424 | { | ||
425 | fuse_set_nowrite(inode); | ||
426 | fuse_release_nowrite(inode); | ||
427 | } | ||
428 | |||
429 | int fuse_fsync_common(struct file *file, loff_t start, loff_t end, | 470 | int fuse_fsync_common(struct file *file, loff_t start, loff_t end, |
430 | int datasync, int isdir) | 471 | int datasync, int isdir) |
431 | { | 472 | { |
@@ -459,6 +500,12 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end, | |||
459 | 500 | ||
460 | fuse_sync_writes(inode); | 501 | fuse_sync_writes(inode); |
461 | 502 | ||
503 | if (test_bit(FUSE_I_MTIME_DIRTY, &get_fuse_inode(inode)->state)) { | ||
504 | int err = fuse_flush_mtime(file, false); | ||
505 | if (err) | ||
506 | goto out; | ||
507 | } | ||
508 | |||
462 | req = fuse_get_req_nopages(fc); | 509 | req = fuse_get_req_nopages(fc); |
463 | if (IS_ERR(req)) { | 510 | if (IS_ERR(req)) { |
464 | err = PTR_ERR(req); | 511 | err = PTR_ERR(req); |
@@ -655,7 +702,33 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, | |||
655 | spin_unlock(&fc->lock); | 702 | spin_unlock(&fc->lock); |
656 | } | 703 | } |
657 | 704 | ||
658 | static int fuse_readpage(struct file *file, struct page *page) | 705 | static void fuse_short_read(struct fuse_req *req, struct inode *inode, |
706 | u64 attr_ver) | ||
707 | { | ||
708 | size_t num_read = req->out.args[0].size; | ||
709 | struct fuse_conn *fc = get_fuse_conn(inode); | ||
710 | |||
711 | if (fc->writeback_cache) { | ||
712 | /* | ||
713 | * A hole in a file. Some data after the hole are in page cache, | ||
714 | * but have not reached the client fs yet. So, the hole is not | ||
715 | * present there. | ||
716 | */ | ||
717 | int i; | ||
718 | int start_idx = num_read >> PAGE_CACHE_SHIFT; | ||
719 | size_t off = num_read & (PAGE_CACHE_SIZE - 1); | ||
720 | |||
721 | for (i = start_idx; i < req->num_pages; i++) { | ||
722 | zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE); | ||
723 | off = 0; | ||
724 | } | ||
725 | } else { | ||
726 | loff_t pos = page_offset(req->pages[0]) + num_read; | ||
727 | fuse_read_update_size(inode, pos, attr_ver); | ||
728 | } | ||
729 | } | ||
730 | |||
731 | static int fuse_do_readpage(struct file *file, struct page *page) | ||
659 | { | 732 | { |
660 | struct fuse_io_priv io = { .async = 0, .file = file }; | 733 | struct fuse_io_priv io = { .async = 0, .file = file }; |
661 | struct inode *inode = page->mapping->host; | 734 | struct inode *inode = page->mapping->host; |
@@ -667,10 +740,6 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
667 | u64 attr_ver; | 740 | u64 attr_ver; |
668 | int err; | 741 | int err; |
669 | 742 | ||
670 | err = -EIO; | ||
671 | if (is_bad_inode(inode)) | ||
672 | goto out; | ||
673 | |||
674 | /* | 743 | /* |
675 | * Page writeback can extend beyond the lifetime of the | 744 | * Page writeback can extend beyond the lifetime of the |
676 | * page-cache page, so make sure we read a properly synced | 745 | * page-cache page, so make sure we read a properly synced |
@@ -679,9 +748,8 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
679 | fuse_wait_on_page_writeback(inode, page->index); | 748 | fuse_wait_on_page_writeback(inode, page->index); |
680 | 749 | ||
681 | req = fuse_get_req(fc, 1); | 750 | req = fuse_get_req(fc, 1); |
682 | err = PTR_ERR(req); | ||
683 | if (IS_ERR(req)) | 751 | if (IS_ERR(req)) |
684 | goto out; | 752 | return PTR_ERR(req); |
685 | 753 | ||
686 | attr_ver = fuse_get_attr_version(fc); | 754 | attr_ver = fuse_get_attr_version(fc); |
687 | 755 | ||
@@ -692,18 +760,32 @@ static int fuse_readpage(struct file *file, struct page *page) | |||
692 | req->page_descs[0].length = count; | 760 | req->page_descs[0].length = count; |
693 | num_read = fuse_send_read(req, &io, pos, count, NULL); | 761 | num_read = fuse_send_read(req, &io, pos, count, NULL); |
694 | err = req->out.h.error; | 762 | err = req->out.h.error; |
695 | fuse_put_request(fc, req); | ||
696 | 763 | ||
697 | if (!err) { | 764 | if (!err) { |
698 | /* | 765 | /* |
699 | * Short read means EOF. If file size is larger, truncate it | 766 | * Short read means EOF. If file size is larger, truncate it |
700 | */ | 767 | */ |
701 | if (num_read < count) | 768 | if (num_read < count) |
702 | fuse_read_update_size(inode, pos + num_read, attr_ver); | 769 | fuse_short_read(req, inode, attr_ver); |
703 | 770 | ||
704 | SetPageUptodate(page); | 771 | SetPageUptodate(page); |
705 | } | 772 | } |
706 | 773 | ||
774 | fuse_put_request(fc, req); | ||
775 | |||
776 | return err; | ||
777 | } | ||
778 | |||
779 | static int fuse_readpage(struct file *file, struct page *page) | ||
780 | { | ||
781 | struct inode *inode = page->mapping->host; | ||
782 | int err; | ||
783 | |||
784 | err = -EIO; | ||
785 | if (is_bad_inode(inode)) | ||
786 | goto out; | ||
787 | |||
788 | err = fuse_do_readpage(file, page); | ||
707 | fuse_invalidate_atime(inode); | 789 | fuse_invalidate_atime(inode); |
708 | out: | 790 | out: |
709 | unlock_page(page); | 791 | unlock_page(page); |
@@ -726,13 +808,9 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req) | |||
726 | /* | 808 | /* |
727 | * Short read means EOF. If file size is larger, truncate it | 809 | * Short read means EOF. If file size is larger, truncate it |
728 | */ | 810 | */ |
729 | if (!req->out.h.error && num_read < count) { | 811 | if (!req->out.h.error && num_read < count) |
730 | loff_t pos; | 812 | fuse_short_read(req, inode, req->misc.read.attr_ver); |
731 | 813 | ||
732 | pos = page_offset(req->pages[0]) + num_read; | ||
733 | fuse_read_update_size(inode, pos, | ||
734 | req->misc.read.attr_ver); | ||
735 | } | ||
736 | fuse_invalidate_atime(inode); | 814 | fuse_invalidate_atime(inode); |
737 | } | 815 | } |
738 | 816 | ||
@@ -922,16 +1000,21 @@ static size_t fuse_send_write(struct fuse_req *req, struct fuse_io_priv *io, | |||
922 | return req->misc.write.out.size; | 1000 | return req->misc.write.out.size; |
923 | } | 1001 | } |
924 | 1002 | ||
925 | void fuse_write_update_size(struct inode *inode, loff_t pos) | 1003 | bool fuse_write_update_size(struct inode *inode, loff_t pos) |
926 | { | 1004 | { |
927 | struct fuse_conn *fc = get_fuse_conn(inode); | 1005 | struct fuse_conn *fc = get_fuse_conn(inode); |
928 | struct fuse_inode *fi = get_fuse_inode(inode); | 1006 | struct fuse_inode *fi = get_fuse_inode(inode); |
1007 | bool ret = false; | ||
929 | 1008 | ||
930 | spin_lock(&fc->lock); | 1009 | spin_lock(&fc->lock); |
931 | fi->attr_version = ++fc->attr_version; | 1010 | fi->attr_version = ++fc->attr_version; |
932 | if (pos > inode->i_size) | 1011 | if (pos > inode->i_size) { |
933 | i_size_write(inode, pos); | 1012 | i_size_write(inode, pos); |
1013 | ret = true; | ||
1014 | } | ||
934 | spin_unlock(&fc->lock); | 1015 | spin_unlock(&fc->lock); |
1016 | |||
1017 | return ret; | ||
935 | } | 1018 | } |
936 | 1019 | ||
937 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, | 1020 | static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file, |
@@ -1116,6 +1199,15 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
1116 | struct iov_iter i; | 1199 | struct iov_iter i; |
1117 | loff_t endbyte = 0; | 1200 | loff_t endbyte = 0; |
1118 | 1201 | ||
1202 | if (get_fuse_conn(inode)->writeback_cache) { | ||
1203 | /* Update size (EOF optimization) and mode (SUID clearing) */ | ||
1204 | err = fuse_update_attributes(mapping->host, NULL, file, NULL); | ||
1205 | if (err) | ||
1206 | return err; | ||
1207 | |||
1208 | return generic_file_aio_write(iocb, iov, nr_segs, pos); | ||
1209 | } | ||
1210 | |||
1119 | WARN_ON(iocb->ki_pos != pos); | 1211 | WARN_ON(iocb->ki_pos != pos); |
1120 | 1212 | ||
1121 | ocount = 0; | 1213 | ocount = 0; |
@@ -1289,13 +1381,18 @@ static inline int fuse_iter_npages(const struct iov_iter *ii_p) | |||
1289 | 1381 | ||
1290 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 1382 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
1291 | unsigned long nr_segs, size_t count, loff_t *ppos, | 1383 | unsigned long nr_segs, size_t count, loff_t *ppos, |
1292 | int write) | 1384 | int flags) |
1293 | { | 1385 | { |
1386 | int write = flags & FUSE_DIO_WRITE; | ||
1387 | int cuse = flags & FUSE_DIO_CUSE; | ||
1294 | struct file *file = io->file; | 1388 | struct file *file = io->file; |
1389 | struct inode *inode = file->f_mapping->host; | ||
1295 | struct fuse_file *ff = file->private_data; | 1390 | struct fuse_file *ff = file->private_data; |
1296 | struct fuse_conn *fc = ff->fc; | 1391 | struct fuse_conn *fc = ff->fc; |
1297 | size_t nmax = write ? fc->max_write : fc->max_read; | 1392 | size_t nmax = write ? fc->max_write : fc->max_read; |
1298 | loff_t pos = *ppos; | 1393 | loff_t pos = *ppos; |
1394 | pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; | ||
1395 | pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; | ||
1299 | ssize_t res = 0; | 1396 | ssize_t res = 0; |
1300 | struct fuse_req *req; | 1397 | struct fuse_req *req; |
1301 | struct iov_iter ii; | 1398 | struct iov_iter ii; |
@@ -1309,6 +1406,14 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | |||
1309 | if (IS_ERR(req)) | 1406 | if (IS_ERR(req)) |
1310 | return PTR_ERR(req); | 1407 | return PTR_ERR(req); |
1311 | 1408 | ||
1409 | if (!cuse && fuse_range_is_writeback(inode, idx_from, idx_to)) { | ||
1410 | if (!write) | ||
1411 | mutex_lock(&inode->i_mutex); | ||
1412 | fuse_sync_writes(inode); | ||
1413 | if (!write) | ||
1414 | mutex_unlock(&inode->i_mutex); | ||
1415 | } | ||
1416 | |||
1312 | while (count) { | 1417 | while (count) { |
1313 | size_t nres; | 1418 | size_t nres; |
1314 | fl_owner_t owner = current->files; | 1419 | fl_owner_t owner = current->files; |
@@ -1397,7 +1502,8 @@ static ssize_t __fuse_direct_write(struct fuse_io_priv *io, | |||
1397 | 1502 | ||
1398 | res = generic_write_checks(file, ppos, &count, 0); | 1503 | res = generic_write_checks(file, ppos, &count, 0); |
1399 | if (!res) | 1504 | if (!res) |
1400 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, 1); | 1505 | res = fuse_direct_io(io, iov, nr_segs, count, ppos, |
1506 | FUSE_DIO_WRITE); | ||
1401 | 1507 | ||
1402 | fuse_invalidate_attr(inode); | 1508 | fuse_invalidate_attr(inode); |
1403 | 1509 | ||
@@ -1885,6 +1991,77 @@ out: | |||
1885 | return err; | 1991 | return err; |
1886 | } | 1992 | } |
1887 | 1993 | ||
1994 | /* | ||
1995 | * It's worthy to make sure that space is reserved on disk for the write, | ||
1996 | * but how to implement it without killing performance need more thinking. | ||
1997 | */ | ||
1998 | static int fuse_write_begin(struct file *file, struct address_space *mapping, | ||
1999 | loff_t pos, unsigned len, unsigned flags, | ||
2000 | struct page **pagep, void **fsdata) | ||
2001 | { | ||
2002 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
2003 | struct fuse_conn *fc = get_fuse_conn(file->f_dentry->d_inode); | ||
2004 | struct page *page; | ||
2005 | loff_t fsize; | ||
2006 | int err = -ENOMEM; | ||
2007 | |||
2008 | WARN_ON(!fc->writeback_cache); | ||
2009 | |||
2010 | page = grab_cache_page_write_begin(mapping, index, flags); | ||
2011 | if (!page) | ||
2012 | goto error; | ||
2013 | |||
2014 | fuse_wait_on_page_writeback(mapping->host, page->index); | ||
2015 | |||
2016 | if (PageUptodate(page) || len == PAGE_CACHE_SIZE) | ||
2017 | goto success; | ||
2018 | /* | ||
2019 | * Check if the start this page comes after the end of file, in which | ||
2020 | * case the readpage can be optimized away. | ||
2021 | */ | ||
2022 | fsize = i_size_read(mapping->host); | ||
2023 | if (fsize <= (pos & PAGE_CACHE_MASK)) { | ||
2024 | size_t off = pos & ~PAGE_CACHE_MASK; | ||
2025 | if (off) | ||
2026 | zero_user_segment(page, 0, off); | ||
2027 | goto success; | ||
2028 | } | ||
2029 | err = fuse_do_readpage(file, page); | ||
2030 | if (err) | ||
2031 | goto cleanup; | ||
2032 | success: | ||
2033 | *pagep = page; | ||
2034 | return 0; | ||
2035 | |||
2036 | cleanup: | ||
2037 | unlock_page(page); | ||
2038 | page_cache_release(page); | ||
2039 | error: | ||
2040 | return err; | ||
2041 | } | ||
2042 | |||
2043 | static int fuse_write_end(struct file *file, struct address_space *mapping, | ||
2044 | loff_t pos, unsigned len, unsigned copied, | ||
2045 | struct page *page, void *fsdata) | ||
2046 | { | ||
2047 | struct inode *inode = page->mapping->host; | ||
2048 | |||
2049 | if (!PageUptodate(page)) { | ||
2050 | /* Zero any unwritten bytes at the end of the page */ | ||
2051 | size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK; | ||
2052 | if (endoff) | ||
2053 | zero_user_segment(page, endoff, PAGE_CACHE_SIZE); | ||
2054 | SetPageUptodate(page); | ||
2055 | } | ||
2056 | |||
2057 | fuse_write_update_size(inode, pos + copied); | ||
2058 | set_page_dirty(page); | ||
2059 | unlock_page(page); | ||
2060 | page_cache_release(page); | ||
2061 | |||
2062 | return copied; | ||
2063 | } | ||
2064 | |||
1888 | static int fuse_launder_page(struct page *page) | 2065 | static int fuse_launder_page(struct page *page) |
1889 | { | 2066 | { |
1890 | int err = 0; | 2067 | int err = 0; |
@@ -1940,26 +2117,16 @@ static int fuse_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1940 | static const struct vm_operations_struct fuse_file_vm_ops = { | 2117 | static const struct vm_operations_struct fuse_file_vm_ops = { |
1941 | .close = fuse_vma_close, | 2118 | .close = fuse_vma_close, |
1942 | .fault = filemap_fault, | 2119 | .fault = filemap_fault, |
2120 | .map_pages = filemap_map_pages, | ||
1943 | .page_mkwrite = fuse_page_mkwrite, | 2121 | .page_mkwrite = fuse_page_mkwrite, |
1944 | .remap_pages = generic_file_remap_pages, | 2122 | .remap_pages = generic_file_remap_pages, |
1945 | }; | 2123 | }; |
1946 | 2124 | ||
1947 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) | 2125 | static int fuse_file_mmap(struct file *file, struct vm_area_struct *vma) |
1948 | { | 2126 | { |
1949 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) { | 2127 | if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE)) |
1950 | struct inode *inode = file_inode(file); | 2128 | fuse_link_write_file(file); |
1951 | struct fuse_conn *fc = get_fuse_conn(inode); | 2129 | |
1952 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
1953 | struct fuse_file *ff = file->private_data; | ||
1954 | /* | ||
1955 | * file may be written through mmap, so chain it onto the | ||
1956 | * inodes's write_file list | ||
1957 | */ | ||
1958 | spin_lock(&fc->lock); | ||
1959 | if (list_empty(&ff->write_entry)) | ||
1960 | list_add(&ff->write_entry, &fi->write_files); | ||
1961 | spin_unlock(&fc->lock); | ||
1962 | } | ||
1963 | file_accessed(file); | 2130 | file_accessed(file); |
1964 | vma->vm_ops = &fuse_file_vm_ops; | 2131 | vma->vm_ops = &fuse_file_vm_ops; |
1965 | return 0; | 2132 | return 0; |
@@ -2606,7 +2773,7 @@ static void fuse_register_polled_file(struct fuse_conn *fc, | |||
2606 | { | 2773 | { |
2607 | spin_lock(&fc->lock); | 2774 | spin_lock(&fc->lock); |
2608 | if (RB_EMPTY_NODE(&ff->polled_node)) { | 2775 | if (RB_EMPTY_NODE(&ff->polled_node)) { |
2609 | struct rb_node **link, *parent; | 2776 | struct rb_node **link, *uninitialized_var(parent); |
2610 | 2777 | ||
2611 | link = fuse_find_polled_node(fc, ff->kh, &parent); | 2778 | link = fuse_find_polled_node(fc, ff->kh, &parent); |
2612 | BUG_ON(*link); | 2779 | BUG_ON(*link); |
@@ -2850,8 +3017,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, | |||
2850 | goto out; | 3017 | goto out; |
2851 | 3018 | ||
2852 | /* we could have extended the file */ | 3019 | /* we could have extended the file */ |
2853 | if (!(mode & FALLOC_FL_KEEP_SIZE)) | 3020 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
2854 | fuse_write_update_size(inode, offset + length); | 3021 | bool changed = fuse_write_update_size(inode, offset + length); |
3022 | |||
3023 | if (changed && fc->writeback_cache) { | ||
3024 | struct fuse_inode *fi = get_fuse_inode(inode); | ||
3025 | |||
3026 | inode->i_mtime = current_fs_time(inode->i_sb); | ||
3027 | set_bit(FUSE_I_MTIME_DIRTY, &fi->state); | ||
3028 | } | ||
3029 | } | ||
2855 | 3030 | ||
2856 | if (mode & FALLOC_FL_PUNCH_HOLE) | 3031 | if (mode & FALLOC_FL_PUNCH_HOLE) |
2857 | truncate_pagecache_range(inode, offset, offset + length - 1); | 3032 | truncate_pagecache_range(inode, offset, offset + length - 1); |
@@ -2915,6 +3090,8 @@ static const struct address_space_operations fuse_file_aops = { | |||
2915 | .set_page_dirty = __set_page_dirty_nobuffers, | 3090 | .set_page_dirty = __set_page_dirty_nobuffers, |
2916 | .bmap = fuse_bmap, | 3091 | .bmap = fuse_bmap, |
2917 | .direct_IO = fuse_direct_IO, | 3092 | .direct_IO = fuse_direct_IO, |
3093 | .write_begin = fuse_write_begin, | ||
3094 | .write_end = fuse_write_end, | ||
2918 | }; | 3095 | }; |
2919 | 3096 | ||
2920 | void fuse_init_file_inode(struct inode *inode) | 3097 | void fuse_init_file_inode(struct inode *inode) |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 2da5db2c8bdb..a257ed8ebee6 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
@@ -119,6 +119,8 @@ enum { | |||
119 | FUSE_I_INIT_RDPLUS, | 119 | FUSE_I_INIT_RDPLUS, |
120 | /** An operation changing file size is in progress */ | 120 | /** An operation changing file size is in progress */ |
121 | FUSE_I_SIZE_UNSTABLE, | 121 | FUSE_I_SIZE_UNSTABLE, |
122 | /** i_mtime has been updated locally; a flush to userspace needed */ | ||
123 | FUSE_I_MTIME_DIRTY, | ||
122 | }; | 124 | }; |
123 | 125 | ||
124 | struct fuse_conn; | 126 | struct fuse_conn; |
@@ -480,6 +482,9 @@ struct fuse_conn { | |||
480 | /** Set if bdi is valid */ | 482 | /** Set if bdi is valid */ |
481 | unsigned bdi_initialized:1; | 483 | unsigned bdi_initialized:1; |
482 | 484 | ||
485 | /** write-back cache policy (default is write-through) */ | ||
486 | unsigned writeback_cache:1; | ||
487 | |||
483 | /* | 488 | /* |
484 | * The following bitfields are only for optimization purposes | 489 | * The following bitfields are only for optimization purposes |
485 | * and hence races in setting them will not cause malfunction | 490 | * and hence races in setting them will not cause malfunction |
@@ -863,9 +868,20 @@ int fuse_reverse_inval_entry(struct super_block *sb, u64 parent_nodeid, | |||
863 | 868 | ||
864 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, | 869 | int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, |
865 | bool isdir); | 870 | bool isdir); |
871 | |||
872 | /** | ||
873 | * fuse_direct_io() flags | ||
874 | */ | ||
875 | |||
876 | /** If set, it is WRITE; otherwise - READ */ | ||
877 | #define FUSE_DIO_WRITE (1 << 0) | ||
878 | |||
879 | /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ | ||
880 | #define FUSE_DIO_CUSE (1 << 1) | ||
881 | |||
866 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, | 882 | ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, |
867 | unsigned long nr_segs, size_t count, loff_t *ppos, | 883 | unsigned long nr_segs, size_t count, loff_t *ppos, |
868 | int write); | 884 | int flags); |
869 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, | 885 | long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, |
870 | unsigned int flags); | 886 | unsigned int flags); |
871 | long fuse_ioctl_common(struct file *file, unsigned int cmd, | 887 | long fuse_ioctl_common(struct file *file, unsigned int cmd, |
@@ -873,7 +889,9 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd, | |||
873 | unsigned fuse_file_poll(struct file *file, poll_table *wait); | 889 | unsigned fuse_file_poll(struct file *file, poll_table *wait); |
874 | int fuse_dev_release(struct inode *inode, struct file *file); | 890 | int fuse_dev_release(struct inode *inode, struct file *file); |
875 | 891 | ||
876 | void fuse_write_update_size(struct inode *inode, loff_t pos); | 892 | bool fuse_write_update_size(struct inode *inode, loff_t pos); |
893 | |||
894 | int fuse_flush_mtime(struct file *file, bool nofail); | ||
877 | 895 | ||
878 | int fuse_do_setattr(struct inode *inode, struct iattr *attr, | 896 | int fuse_do_setattr(struct inode *inode, struct iattr *attr, |
879 | struct file *file); | 897 | struct file *file); |
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d468643a68b2..8d611696fcad 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
@@ -123,7 +123,7 @@ static void fuse_destroy_inode(struct inode *inode) | |||
123 | 123 | ||
124 | static void fuse_evict_inode(struct inode *inode) | 124 | static void fuse_evict_inode(struct inode *inode) |
125 | { | 125 | { |
126 | truncate_inode_pages(&inode->i_data, 0); | 126 | truncate_inode_pages_final(&inode->i_data); |
127 | clear_inode(inode); | 127 | clear_inode(inode); |
128 | if (inode->i_sb->s_flags & MS_ACTIVE) { | 128 | if (inode->i_sb->s_flags & MS_ACTIVE) { |
129 | struct fuse_conn *fc = get_fuse_conn(inode); | 129 | struct fuse_conn *fc = get_fuse_conn(inode); |
@@ -135,6 +135,7 @@ static void fuse_evict_inode(struct inode *inode) | |||
135 | 135 | ||
136 | static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) | 136 | static int fuse_remount_fs(struct super_block *sb, int *flags, char *data) |
137 | { | 137 | { |
138 | sync_filesystem(sb); | ||
138 | if (*flags & MS_MANDLOCK) | 139 | if (*flags & MS_MANDLOCK) |
139 | return -EINVAL; | 140 | return -EINVAL; |
140 | 141 | ||
@@ -170,8 +171,11 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, | |||
170 | inode->i_blocks = attr->blocks; | 171 | inode->i_blocks = attr->blocks; |
171 | inode->i_atime.tv_sec = attr->atime; | 172 | inode->i_atime.tv_sec = attr->atime; |
172 | inode->i_atime.tv_nsec = attr->atimensec; | 173 | inode->i_atime.tv_nsec = attr->atimensec; |
173 | inode->i_mtime.tv_sec = attr->mtime; | 174 | /* mtime from server may be stale due to local buffered write */ |
174 | inode->i_mtime.tv_nsec = attr->mtimensec; | 175 | if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) { |
176 | inode->i_mtime.tv_sec = attr->mtime; | ||
177 | inode->i_mtime.tv_nsec = attr->mtimensec; | ||
178 | } | ||
175 | inode->i_ctime.tv_sec = attr->ctime; | 179 | inode->i_ctime.tv_sec = attr->ctime; |
176 | inode->i_ctime.tv_nsec = attr->ctimensec; | 180 | inode->i_ctime.tv_nsec = attr->ctimensec; |
177 | 181 | ||
@@ -197,6 +201,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
197 | { | 201 | { |
198 | struct fuse_conn *fc = get_fuse_conn(inode); | 202 | struct fuse_conn *fc = get_fuse_conn(inode); |
199 | struct fuse_inode *fi = get_fuse_inode(inode); | 203 | struct fuse_inode *fi = get_fuse_inode(inode); |
204 | bool is_wb = fc->writeback_cache; | ||
200 | loff_t oldsize; | 205 | loff_t oldsize; |
201 | struct timespec old_mtime; | 206 | struct timespec old_mtime; |
202 | 207 | ||
@@ -211,10 +216,16 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, | |||
211 | fuse_change_attributes_common(inode, attr, attr_valid); | 216 | fuse_change_attributes_common(inode, attr, attr_valid); |
212 | 217 | ||
213 | oldsize = inode->i_size; | 218 | oldsize = inode->i_size; |
214 | i_size_write(inode, attr->size); | 219 | /* |
220 | * In case of writeback_cache enabled, the cached writes beyond EOF | ||
221 | * extend local i_size without keeping userspace server in sync. So, | ||
222 | * attr->size coming from server can be stale. We cannot trust it. | ||
223 | */ | ||
224 | if (!is_wb || !S_ISREG(inode->i_mode)) | ||
225 | i_size_write(inode, attr->size); | ||
215 | spin_unlock(&fc->lock); | 226 | spin_unlock(&fc->lock); |
216 | 227 | ||
217 | if (S_ISREG(inode->i_mode)) { | 228 | if (!is_wb && S_ISREG(inode->i_mode)) { |
218 | bool inval = false; | 229 | bool inval = false; |
219 | 230 | ||
220 | if (oldsize != attr->size) { | 231 | if (oldsize != attr->size) { |
@@ -243,6 +254,8 @@ static void fuse_init_inode(struct inode *inode, struct fuse_attr *attr) | |||
243 | { | 254 | { |
244 | inode->i_mode = attr->mode & S_IFMT; | 255 | inode->i_mode = attr->mode & S_IFMT; |
245 | inode->i_size = attr->size; | 256 | inode->i_size = attr->size; |
257 | inode->i_mtime.tv_sec = attr->mtime; | ||
258 | inode->i_mtime.tv_nsec = attr->mtimensec; | ||
246 | if (S_ISREG(inode->i_mode)) { | 259 | if (S_ISREG(inode->i_mode)) { |
247 | fuse_init_common(inode); | 260 | fuse_init_common(inode); |
248 | fuse_init_file_inode(inode); | 261 | fuse_init_file_inode(inode); |
@@ -289,7 +302,9 @@ struct inode *fuse_iget(struct super_block *sb, u64 nodeid, | |||
289 | return NULL; | 302 | return NULL; |
290 | 303 | ||
291 | if ((inode->i_state & I_NEW)) { | 304 | if ((inode->i_state & I_NEW)) { |
292 | inode->i_flags |= S_NOATIME|S_NOCMTIME; | 305 | inode->i_flags |= S_NOATIME; |
306 | if (!fc->writeback_cache || !S_ISREG(inode->i_mode)) | ||
307 | inode->i_flags |= S_NOCMTIME; | ||
293 | inode->i_generation = generation; | 308 | inode->i_generation = generation; |
294 | inode->i_data.backing_dev_info = &fc->bdi; | 309 | inode->i_data.backing_dev_info = &fc->bdi; |
295 | fuse_init_inode(inode, attr); | 310 | fuse_init_inode(inode, attr); |
@@ -873,6 +888,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req) | |||
873 | } | 888 | } |
874 | if (arg->flags & FUSE_ASYNC_DIO) | 889 | if (arg->flags & FUSE_ASYNC_DIO) |
875 | fc->async_dio = 1; | 890 | fc->async_dio = 1; |
891 | if (arg->flags & FUSE_WRITEBACK_CACHE) | ||
892 | fc->writeback_cache = 1; | ||
876 | } else { | 893 | } else { |
877 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; | 894 | ra_pages = fc->max_read / PAGE_CACHE_SIZE; |
878 | fc->no_lock = 1; | 895 | fc->no_lock = 1; |
@@ -900,7 +917,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req) | |||
900 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | | 917 | FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK | |
901 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | | 918 | FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ | |
902 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | | 919 | FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA | |
903 | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO; | 920 | FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO | |
921 | FUSE_WRITEBACK_CACHE; | ||
904 | req->in.h.opcode = FUSE_INIT; | 922 | req->in.h.opcode = FUSE_INIT; |
905 | req->in.numargs = 1; | 923 | req->in.numargs = 1; |
906 | req->in.args[0].size = sizeof(*arg); | 924 | req->in.args[0].size = sizeof(*arg); |
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index ba9456685f47..3088e2a38e30 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c | |||
@@ -64,18 +64,6 @@ struct posix_acl *gfs2_get_acl(struct inode *inode, int type) | |||
64 | return acl; | 64 | return acl; |
65 | } | 65 | } |
66 | 66 | ||
67 | static int gfs2_set_mode(struct inode *inode, umode_t mode) | ||
68 | { | ||
69 | int error = 0; | ||
70 | |||
71 | if (mode != inode->i_mode) { | ||
72 | inode->i_mode = mode; | ||
73 | mark_inode_dirty(inode); | ||
74 | } | ||
75 | |||
76 | return error; | ||
77 | } | ||
78 | |||
79 | int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | 67 | int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) |
80 | { | 68 | { |
81 | int error; | 69 | int error; |
@@ -85,8 +73,8 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
85 | 73 | ||
86 | BUG_ON(name == NULL); | 74 | BUG_ON(name == NULL); |
87 | 75 | ||
88 | if (acl->a_count > GFS2_ACL_MAX_ENTRIES) | 76 | if (acl->a_count > GFS2_ACL_MAX_ENTRIES(GFS2_SB(inode))) |
89 | return -EINVAL; | 77 | return -E2BIG; |
90 | 78 | ||
91 | if (type == ACL_TYPE_ACCESS) { | 79 | if (type == ACL_TYPE_ACCESS) { |
92 | umode_t mode = inode->i_mode; | 80 | umode_t mode = inode->i_mode; |
@@ -98,9 +86,10 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
98 | if (error == 0) | 86 | if (error == 0) |
99 | acl = NULL; | 87 | acl = NULL; |
100 | 88 | ||
101 | error = gfs2_set_mode(inode, mode); | 89 | if (mode != inode->i_mode) { |
102 | if (error) | 90 | inode->i_mode = mode; |
103 | return error; | 91 | mark_inode_dirty(inode); |
92 | } | ||
104 | } | 93 | } |
105 | 94 | ||
106 | if (acl) { | 95 | if (acl) { |
diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index 301260c999ba..2d65ec4cd4be 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h | |||
@@ -14,7 +14,7 @@ | |||
14 | 14 | ||
15 | #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" | 15 | #define GFS2_POSIX_ACL_ACCESS "posix_acl_access" |
16 | #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" | 16 | #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" |
17 | #define GFS2_ACL_MAX_ENTRIES 25 | 17 | #define GFS2_ACL_MAX_ENTRIES(sdp) ((300 << (sdp)->sd_sb.sb_bsize_shift) >> 12) |
18 | 18 | ||
19 | extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); | 19 | extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); |
20 | extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); | 20 | extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type); |
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 49436fa7cd4f..ce62dcac90b6 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/gfs2_ondisk.h> | 21 | #include <linux/gfs2_ondisk.h> |
22 | #include <linux/backing-dev.h> | 22 | #include <linux/backing-dev.h> |
23 | #include <linux/aio.h> | 23 | #include <linux/aio.h> |
24 | #include <trace/events/writeback.h> | ||
24 | 25 | ||
25 | #include "gfs2.h" | 26 | #include "gfs2.h" |
26 | #include "incore.h" | 27 | #include "incore.h" |
@@ -230,13 +231,11 @@ static int gfs2_writepages(struct address_space *mapping, | |||
230 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, | 231 | static int gfs2_write_jdata_pagevec(struct address_space *mapping, |
231 | struct writeback_control *wbc, | 232 | struct writeback_control *wbc, |
232 | struct pagevec *pvec, | 233 | struct pagevec *pvec, |
233 | int nr_pages, pgoff_t end) | 234 | int nr_pages, pgoff_t end, |
235 | pgoff_t *done_index) | ||
234 | { | 236 | { |
235 | struct inode *inode = mapping->host; | 237 | struct inode *inode = mapping->host; |
236 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 238 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
237 | loff_t i_size = i_size_read(inode); | ||
238 | pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT; | ||
239 | unsigned offset = i_size & (PAGE_CACHE_SIZE-1); | ||
240 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); | 239 | unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize); |
241 | int i; | 240 | int i; |
242 | int ret; | 241 | int ret; |
@@ -248,40 +247,83 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
248 | for(i = 0; i < nr_pages; i++) { | 247 | for(i = 0; i < nr_pages; i++) { |
249 | struct page *page = pvec->pages[i]; | 248 | struct page *page = pvec->pages[i]; |
250 | 249 | ||
250 | /* | ||
251 | * At this point, the page may be truncated or | ||
252 | * invalidated (changing page->mapping to NULL), or | ||
253 | * even swizzled back from swapper_space to tmpfs file | ||
254 | * mapping. However, page->index will not change | ||
255 | * because we have a reference on the page. | ||
256 | */ | ||
257 | if (page->index > end) { | ||
258 | /* | ||
259 | * can't be range_cyclic (1st pass) because | ||
260 | * end == -1 in that case. | ||
261 | */ | ||
262 | ret = 1; | ||
263 | break; | ||
264 | } | ||
265 | |||
266 | *done_index = page->index; | ||
267 | |||
251 | lock_page(page); | 268 | lock_page(page); |
252 | 269 | ||
253 | if (unlikely(page->mapping != mapping)) { | 270 | if (unlikely(page->mapping != mapping)) { |
271 | continue_unlock: | ||
254 | unlock_page(page); | 272 | unlock_page(page); |
255 | continue; | 273 | continue; |
256 | } | 274 | } |
257 | 275 | ||
258 | if (!wbc->range_cyclic && page->index > end) { | 276 | if (!PageDirty(page)) { |
259 | ret = 1; | 277 | /* someone wrote it for us */ |
260 | unlock_page(page); | 278 | goto continue_unlock; |
261 | continue; | ||
262 | } | 279 | } |
263 | 280 | ||
264 | if (wbc->sync_mode != WB_SYNC_NONE) | 281 | if (PageWriteback(page)) { |
265 | wait_on_page_writeback(page); | 282 | if (wbc->sync_mode != WB_SYNC_NONE) |
266 | 283 | wait_on_page_writeback(page); | |
267 | if (PageWriteback(page) || | 284 | else |
268 | !clear_page_dirty_for_io(page)) { | 285 | goto continue_unlock; |
269 | unlock_page(page); | ||
270 | continue; | ||
271 | } | 286 | } |
272 | 287 | ||
273 | /* Is the page fully outside i_size? (truncate in progress) */ | 288 | BUG_ON(PageWriteback(page)); |
274 | if (page->index > end_index || (page->index == end_index && !offset)) { | 289 | if (!clear_page_dirty_for_io(page)) |
275 | page->mapping->a_ops->invalidatepage(page, 0, | 290 | goto continue_unlock; |
276 | PAGE_CACHE_SIZE); | 291 | |
277 | unlock_page(page); | 292 | trace_wbc_writepage(wbc, mapping->backing_dev_info); |
278 | continue; | ||
279 | } | ||
280 | 293 | ||
281 | ret = __gfs2_jdata_writepage(page, wbc); | 294 | ret = __gfs2_jdata_writepage(page, wbc); |
295 | if (unlikely(ret)) { | ||
296 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | ||
297 | unlock_page(page); | ||
298 | ret = 0; | ||
299 | } else { | ||
300 | |||
301 | /* | ||
302 | * done_index is set past this page, | ||
303 | * so media errors will not choke | ||
304 | * background writeout for the entire | ||
305 | * file. This has consequences for | ||
306 | * range_cyclic semantics (ie. it may | ||
307 | * not be suitable for data integrity | ||
308 | * writeout). | ||
309 | */ | ||
310 | *done_index = page->index + 1; | ||
311 | ret = 1; | ||
312 | break; | ||
313 | } | ||
314 | } | ||
282 | 315 | ||
283 | if (ret || (--(wbc->nr_to_write) <= 0)) | 316 | /* |
317 | * We stop writing back only if we are not doing | ||
318 | * integrity sync. In case of integrity sync we have to | ||
319 | * keep going until we have written all the pages | ||
320 | * we tagged for writeback prior to entering this loop. | ||
321 | */ | ||
322 | if (--wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE) { | ||
284 | ret = 1; | 323 | ret = 1; |
324 | break; | ||
325 | } | ||
326 | |||
285 | } | 327 | } |
286 | gfs2_trans_end(sdp); | 328 | gfs2_trans_end(sdp); |
287 | return ret; | 329 | return ret; |
@@ -306,51 +348,69 @@ static int gfs2_write_cache_jdata(struct address_space *mapping, | |||
306 | int done = 0; | 348 | int done = 0; |
307 | struct pagevec pvec; | 349 | struct pagevec pvec; |
308 | int nr_pages; | 350 | int nr_pages; |
351 | pgoff_t uninitialized_var(writeback_index); | ||
309 | pgoff_t index; | 352 | pgoff_t index; |
310 | pgoff_t end; | 353 | pgoff_t end; |
311 | int scanned = 0; | 354 | pgoff_t done_index; |
355 | int cycled; | ||
312 | int range_whole = 0; | 356 | int range_whole = 0; |
357 | int tag; | ||
313 | 358 | ||
314 | pagevec_init(&pvec, 0); | 359 | pagevec_init(&pvec, 0); |
315 | if (wbc->range_cyclic) { | 360 | if (wbc->range_cyclic) { |
316 | index = mapping->writeback_index; /* Start from prev offset */ | 361 | writeback_index = mapping->writeback_index; /* prev offset */ |
362 | index = writeback_index; | ||
363 | if (index == 0) | ||
364 | cycled = 1; | ||
365 | else | ||
366 | cycled = 0; | ||
317 | end = -1; | 367 | end = -1; |
318 | } else { | 368 | } else { |
319 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 369 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
320 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 370 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
321 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 371 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
322 | range_whole = 1; | 372 | range_whole = 1; |
323 | scanned = 1; | 373 | cycled = 1; /* ignore range_cyclic tests */ |
324 | } | 374 | } |
375 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | ||
376 | tag = PAGECACHE_TAG_TOWRITE; | ||
377 | else | ||
378 | tag = PAGECACHE_TAG_DIRTY; | ||
325 | 379 | ||
326 | retry: | 380 | retry: |
327 | while (!done && (index <= end) && | 381 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
328 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 382 | tag_pages_for_writeback(mapping, index, end); |
329 | PAGECACHE_TAG_DIRTY, | 383 | done_index = index; |
330 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { | 384 | while (!done && (index <= end)) { |
331 | scanned = 1; | 385 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
332 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end); | 386 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
387 | if (nr_pages == 0) | ||
388 | break; | ||
389 | |||
390 | ret = gfs2_write_jdata_pagevec(mapping, wbc, &pvec, nr_pages, end, &done_index); | ||
333 | if (ret) | 391 | if (ret) |
334 | done = 1; | 392 | done = 1; |
335 | if (ret > 0) | 393 | if (ret > 0) |
336 | ret = 0; | 394 | ret = 0; |
337 | |||
338 | pagevec_release(&pvec); | 395 | pagevec_release(&pvec); |
339 | cond_resched(); | 396 | cond_resched(); |
340 | } | 397 | } |
341 | 398 | ||
342 | if (!scanned && !done) { | 399 | if (!cycled && !done) { |
343 | /* | 400 | /* |
401 | * range_cyclic: | ||
344 | * We hit the last page and there is more work to be done: wrap | 402 | * We hit the last page and there is more work to be done: wrap |
345 | * back to the start of the file | 403 | * back to the start of the file |
346 | */ | 404 | */ |
347 | scanned = 1; | 405 | cycled = 1; |
348 | index = 0; | 406 | index = 0; |
407 | end = writeback_index - 1; | ||
349 | goto retry; | 408 | goto retry; |
350 | } | 409 | } |
351 | 410 | ||
352 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 411 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
353 | mapping->writeback_index = index; | 412 | mapping->writeback_index = done_index; |
413 | |||
354 | return ret; | 414 | return ret; |
355 | } | 415 | } |
356 | 416 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index fe0500c0af7a..c62d4b9f51dc 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
@@ -1328,6 +1328,121 @@ int gfs2_file_dealloc(struct gfs2_inode *ip) | |||
1328 | } | 1328 | } |
1329 | 1329 | ||
1330 | /** | 1330 | /** |
1331 | * gfs2_free_journal_extents - Free cached journal bmap info | ||
1332 | * @jd: The journal | ||
1333 | * | ||
1334 | */ | ||
1335 | |||
1336 | void gfs2_free_journal_extents(struct gfs2_jdesc *jd) | ||
1337 | { | ||
1338 | struct gfs2_journal_extent *jext; | ||
1339 | |||
1340 | while(!list_empty(&jd->extent_list)) { | ||
1341 | jext = list_entry(jd->extent_list.next, struct gfs2_journal_extent, list); | ||
1342 | list_del(&jext->list); | ||
1343 | kfree(jext); | ||
1344 | } | ||
1345 | } | ||
1346 | |||
1347 | /** | ||
1348 | * gfs2_add_jextent - Add or merge a new extent to extent cache | ||
1349 | * @jd: The journal descriptor | ||
1350 | * @lblock: The logical block at start of new extent | ||
1351 | * @pblock: The physical block at start of new extent | ||
1352 | * @blocks: Size of extent in fs blocks | ||
1353 | * | ||
1354 | * Returns: 0 on success or -ENOMEM | ||
1355 | */ | ||
1356 | |||
1357 | static int gfs2_add_jextent(struct gfs2_jdesc *jd, u64 lblock, u64 dblock, u64 blocks) | ||
1358 | { | ||
1359 | struct gfs2_journal_extent *jext; | ||
1360 | |||
1361 | if (!list_empty(&jd->extent_list)) { | ||
1362 | jext = list_entry(jd->extent_list.prev, struct gfs2_journal_extent, list); | ||
1363 | if ((jext->dblock + jext->blocks) == dblock) { | ||
1364 | jext->blocks += blocks; | ||
1365 | return 0; | ||
1366 | } | ||
1367 | } | ||
1368 | |||
1369 | jext = kzalloc(sizeof(struct gfs2_journal_extent), GFP_NOFS); | ||
1370 | if (jext == NULL) | ||
1371 | return -ENOMEM; | ||
1372 | jext->dblock = dblock; | ||
1373 | jext->lblock = lblock; | ||
1374 | jext->blocks = blocks; | ||
1375 | list_add_tail(&jext->list, &jd->extent_list); | ||
1376 | jd->nr_extents++; | ||
1377 | return 0; | ||
1378 | } | ||
1379 | |||
1380 | /** | ||
1381 | * gfs2_map_journal_extents - Cache journal bmap info | ||
1382 | * @sdp: The super block | ||
1383 | * @jd: The journal to map | ||
1384 | * | ||
1385 | * Create a reusable "extent" mapping from all logical | ||
1386 | * blocks to all physical blocks for the given journal. This will save | ||
1387 | * us time when writing journal blocks. Most journals will have only one | ||
1388 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
1389 | * arranges the journal blocks sequentially to maximize performance. | ||
1390 | * So the extent would map the first block for the entire file length. | ||
1391 | * However, gfs2_jadd can happen while file activity is happening, so | ||
1392 | * those journals may not be sequential. Less likely is the case where | ||
1393 | * the users created their own journals by mounting the metafs and | ||
1394 | * laying it out. But it's still possible. These journals might have | ||
1395 | * several extents. | ||
1396 | * | ||
1397 | * Returns: 0 on success, or error on failure | ||
1398 | */ | ||
1399 | |||
1400 | int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd) | ||
1401 | { | ||
1402 | u64 lblock = 0; | ||
1403 | u64 lblock_stop; | ||
1404 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
1405 | struct buffer_head bh; | ||
1406 | unsigned int shift = sdp->sd_sb.sb_bsize_shift; | ||
1407 | u64 size; | ||
1408 | int rc; | ||
1409 | |||
1410 | lblock_stop = i_size_read(jd->jd_inode) >> shift; | ||
1411 | size = (lblock_stop - lblock) << shift; | ||
1412 | jd->nr_extents = 0; | ||
1413 | WARN_ON(!list_empty(&jd->extent_list)); | ||
1414 | |||
1415 | do { | ||
1416 | bh.b_state = 0; | ||
1417 | bh.b_blocknr = 0; | ||
1418 | bh.b_size = size; | ||
1419 | rc = gfs2_block_map(jd->jd_inode, lblock, &bh, 0); | ||
1420 | if (rc || !buffer_mapped(&bh)) | ||
1421 | goto fail; | ||
1422 | rc = gfs2_add_jextent(jd, lblock, bh.b_blocknr, bh.b_size >> shift); | ||
1423 | if (rc) | ||
1424 | goto fail; | ||
1425 | size -= bh.b_size; | ||
1426 | lblock += (bh.b_size >> ip->i_inode.i_blkbits); | ||
1427 | } while(size > 0); | ||
1428 | |||
1429 | fs_info(sdp, "journal %d mapped with %u extents\n", jd->jd_jid, | ||
1430 | jd->nr_extents); | ||
1431 | return 0; | ||
1432 | |||
1433 | fail: | ||
1434 | fs_warn(sdp, "error %d mapping journal %u at offset %llu (extent %u)\n", | ||
1435 | rc, jd->jd_jid, | ||
1436 | (unsigned long long)(i_size_read(jd->jd_inode) - size), | ||
1437 | jd->nr_extents); | ||
1438 | fs_warn(sdp, "bmap=%d lblock=%llu block=%llu, state=0x%08lx, size=%llu\n", | ||
1439 | rc, (unsigned long long)lblock, (unsigned long long)bh.b_blocknr, | ||
1440 | bh.b_state, (unsigned long long)bh.b_size); | ||
1441 | gfs2_free_journal_extents(jd); | ||
1442 | return rc; | ||
1443 | } | ||
1444 | |||
1445 | /** | ||
1331 | * gfs2_write_alloc_required - figure out if a write will require an allocation | 1446 | * gfs2_write_alloc_required - figure out if a write will require an allocation |
1332 | * @ip: the file being written to | 1447 | * @ip: the file being written to |
1333 | * @offset: the offset to write to | 1448 | * @offset: the offset to write to |
diff --git a/fs/gfs2/bmap.h b/fs/gfs2/bmap.h index 42fea03e2bd9..81ded5e2aaa2 100644 --- a/fs/gfs2/bmap.h +++ b/fs/gfs2/bmap.h | |||
@@ -55,5 +55,7 @@ extern int gfs2_truncatei_resume(struct gfs2_inode *ip); | |||
55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); | 55 | extern int gfs2_file_dealloc(struct gfs2_inode *ip); |
56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, | 56 | extern int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset, |
57 | unsigned int len); | 57 | unsigned int len); |
58 | extern int gfs2_map_journal_extents(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd); | ||
59 | extern void gfs2_free_journal_extents(struct gfs2_jdesc *jd); | ||
58 | 60 | ||
59 | #endif /* __BMAP_DOT_H__ */ | 61 | #endif /* __BMAP_DOT_H__ */ |
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index fa32655449c8..1a349f9a9685 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
@@ -53,6 +53,8 @@ | |||
53 | * but never before the maximum hash table size has been reached. | 53 | * but never before the maximum hash table size has been reached. |
54 | */ | 54 | */ |
55 | 55 | ||
56 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
57 | |||
56 | #include <linux/slab.h> | 58 | #include <linux/slab.h> |
57 | #include <linux/spinlock.h> | 59 | #include <linux/spinlock.h> |
58 | #include <linux/buffer_head.h> | 60 | #include <linux/buffer_head.h> |
@@ -507,8 +509,8 @@ static int gfs2_check_dirent(struct gfs2_dirent *dent, unsigned int offset, | |||
507 | goto error; | 509 | goto error; |
508 | return 0; | 510 | return 0; |
509 | error: | 511 | error: |
510 | printk(KERN_WARNING "gfs2_check_dirent: %s (%s)\n", msg, | 512 | pr_warn("%s: %s (%s)\n", |
511 | first ? "first in block" : "not first in block"); | 513 | __func__, msg, first ? "first in block" : "not first in block"); |
512 | return -EIO; | 514 | return -EIO; |
513 | } | 515 | } |
514 | 516 | ||
@@ -531,8 +533,7 @@ static int gfs2_dirent_offset(const void *buf) | |||
531 | } | 533 | } |
532 | return offset; | 534 | return offset; |
533 | wrong_type: | 535 | wrong_type: |
534 | printk(KERN_WARNING "gfs2_scan_dirent: wrong block type %u\n", | 536 | pr_warn("%s: wrong block type %u\n", __func__, be32_to_cpu(h->mh_type)); |
535 | be32_to_cpu(h->mh_type)); | ||
536 | return -1; | 537 | return -1; |
537 | } | 538 | } |
538 | 539 | ||
@@ -728,7 +729,7 @@ static int get_leaf(struct gfs2_inode *dip, u64 leaf_no, | |||
728 | 729 | ||
729 | error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); | 730 | error = gfs2_meta_read(dip->i_gl, leaf_no, DIO_WAIT, bhp); |
730 | if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { | 731 | if (!error && gfs2_metatype_check(GFS2_SB(&dip->i_inode), *bhp, GFS2_METATYPE_LF)) { |
731 | /* printk(KERN_INFO "block num=%llu\n", leaf_no); */ | 732 | /* pr_info("block num=%llu\n", leaf_no); */ |
732 | error = -EIO; | 733 | error = -EIO; |
733 | } | 734 | } |
734 | 735 | ||
@@ -1006,7 +1007,8 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name) | |||
1006 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); | 1007 | len = 1 << (dip->i_depth - be16_to_cpu(oleaf->lf_depth)); |
1007 | half_len = len >> 1; | 1008 | half_len = len >> 1; |
1008 | if (!half_len) { | 1009 | if (!half_len) { |
1009 | printk(KERN_WARNING "i_depth %u lf_depth %u index %u\n", dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); | 1010 | pr_warn("i_depth %u lf_depth %u index %u\n", |
1011 | dip->i_depth, be16_to_cpu(oleaf->lf_depth), index); | ||
1010 | gfs2_consist_inode(dip); | 1012 | gfs2_consist_inode(dip); |
1011 | error = -EIO; | 1013 | error = -EIO; |
1012 | goto fail_brelse; | 1014 | goto fail_brelse; |
@@ -1684,6 +1686,14 @@ static int dir_new_leaf(struct inode *inode, const struct qstr *name) | |||
1684 | return 0; | 1686 | return 0; |
1685 | } | 1687 | } |
1686 | 1688 | ||
1689 | static u16 gfs2_inode_ra_len(const struct gfs2_inode *ip) | ||
1690 | { | ||
1691 | u64 where = ip->i_no_addr + 1; | ||
1692 | if (ip->i_eattr == where) | ||
1693 | return 1; | ||
1694 | return 0; | ||
1695 | } | ||
1696 | |||
1687 | /** | 1697 | /** |
1688 | * gfs2_dir_add - Add new filename into directory | 1698 | * gfs2_dir_add - Add new filename into directory |
1689 | * @inode: The directory inode | 1699 | * @inode: The directory inode |
@@ -1721,6 +1731,7 @@ int gfs2_dir_add(struct inode *inode, const struct qstr *name, | |||
1721 | dent = gfs2_init_dirent(inode, dent, name, bh); | 1731 | dent = gfs2_init_dirent(inode, dent, name, bh); |
1722 | gfs2_inum_out(nip, dent); | 1732 | gfs2_inum_out(nip, dent); |
1723 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); | 1733 | dent->de_type = cpu_to_be16(IF2DT(nip->i_inode.i_mode)); |
1734 | dent->de_rahead = cpu_to_be16(gfs2_inode_ra_len(nip)); | ||
1724 | tv = CURRENT_TIME; | 1735 | tv = CURRENT_TIME; |
1725 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { | 1736 | if (ip->i_diskflags & GFS2_DIF_EXHASH) { |
1726 | leaf = (struct gfs2_leaf *)bh->b_data; | 1737 | leaf = (struct gfs2_leaf *)bh->b_data; |
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index efc078f0ee4e..80d67253623c 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
@@ -494,6 +494,7 @@ out: | |||
494 | 494 | ||
495 | static const struct vm_operations_struct gfs2_vm_ops = { | 495 | static const struct vm_operations_struct gfs2_vm_ops = { |
496 | .fault = filemap_fault, | 496 | .fault = filemap_fault, |
497 | .map_pages = filemap_map_pages, | ||
497 | .page_mkwrite = gfs2_page_mkwrite, | 498 | .page_mkwrite = gfs2_page_mkwrite, |
498 | .remap_pages = generic_file_remap_pages, | 499 | .remap_pages = generic_file_remap_pages, |
499 | }; | 500 | }; |
@@ -811,6 +812,8 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
811 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); | 812 | loff_t bsize_mask = ~((loff_t)sdp->sd_sb.sb_bsize - 1); |
812 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; | 813 | loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift; |
813 | loff_t max_chunk_size = UINT_MAX & bsize_mask; | 814 | loff_t max_chunk_size = UINT_MAX & bsize_mask; |
815 | struct gfs2_holder gh; | ||
816 | |||
814 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; | 817 | next = (next + 1) << sdp->sd_sb.sb_bsize_shift; |
815 | 818 | ||
816 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ | 819 | /* We only support the FALLOC_FL_KEEP_SIZE mode */ |
@@ -831,8 +834,10 @@ static long gfs2_fallocate(struct file *file, int mode, loff_t offset, | |||
831 | if (error) | 834 | if (error) |
832 | return error; | 835 | return error; |
833 | 836 | ||
834 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); | 837 | mutex_lock(&inode->i_mutex); |
835 | error = gfs2_glock_nq(&ip->i_gh); | 838 | |
839 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
840 | error = gfs2_glock_nq(&gh); | ||
836 | if (unlikely(error)) | 841 | if (unlikely(error)) |
837 | goto out_uninit; | 842 | goto out_uninit; |
838 | 843 | ||
@@ -900,9 +905,10 @@ out_trans_fail: | |||
900 | out_qunlock: | 905 | out_qunlock: |
901 | gfs2_quota_unlock(ip); | 906 | gfs2_quota_unlock(ip); |
902 | out_unlock: | 907 | out_unlock: |
903 | gfs2_glock_dq(&ip->i_gh); | 908 | gfs2_glock_dq(&gh); |
904 | out_uninit: | 909 | out_uninit: |
905 | gfs2_holder_uninit(&ip->i_gh); | 910 | gfs2_holder_uninit(&gh); |
911 | mutex_unlock(&inode->i_mutex); | ||
906 | return error; | 912 | return error; |
907 | } | 913 | } |
908 | 914 | ||
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ca0be6c69a26..aec7f73832f0 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
@@ -468,7 +470,7 @@ retry: | |||
468 | do_xmote(gl, gh, LM_ST_UNLOCKED); | 470 | do_xmote(gl, gh, LM_ST_UNLOCKED); |
469 | break; | 471 | break; |
470 | default: /* Everything else */ | 472 | default: /* Everything else */ |
471 | printk(KERN_ERR "GFS2: wanted %u got %u\n", gl->gl_target, state); | 473 | pr_err("wanted %u got %u\n", gl->gl_target, state); |
472 | GLOCK_BUG_ON(gl, 1); | 474 | GLOCK_BUG_ON(gl, 1); |
473 | } | 475 | } |
474 | spin_unlock(&gl->gl_spin); | 476 | spin_unlock(&gl->gl_spin); |
@@ -542,7 +544,7 @@ __acquires(&gl->gl_spin) | |||
542 | /* lock_dlm */ | 544 | /* lock_dlm */ |
543 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); | 545 | ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); |
544 | if (ret) { | 546 | if (ret) { |
545 | printk(KERN_ERR "GFS2: lm_lock ret %d\n", ret); | 547 | pr_err("lm_lock ret %d\n", ret); |
546 | GLOCK_BUG_ON(gl, 1); | 548 | GLOCK_BUG_ON(gl, 1); |
547 | } | 549 | } |
548 | } else { /* lock_nolock */ | 550 | } else { /* lock_nolock */ |
@@ -935,7 +937,7 @@ void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) | |||
935 | vaf.fmt = fmt; | 937 | vaf.fmt = fmt; |
936 | vaf.va = &args; | 938 | vaf.va = &args; |
937 | 939 | ||
938 | printk(KERN_ERR " %pV", &vaf); | 940 | pr_err("%pV", &vaf); |
939 | } | 941 | } |
940 | 942 | ||
941 | va_end(args); | 943 | va_end(args); |
@@ -1010,13 +1012,13 @@ do_cancel: | |||
1010 | return; | 1012 | return; |
1011 | 1013 | ||
1012 | trap_recursive: | 1014 | trap_recursive: |
1013 | printk(KERN_ERR "original: %pSR\n", (void *)gh2->gh_ip); | 1015 | pr_err("original: %pSR\n", (void *)gh2->gh_ip); |
1014 | printk(KERN_ERR "pid: %d\n", pid_nr(gh2->gh_owner_pid)); | 1016 | pr_err("pid: %d\n", pid_nr(gh2->gh_owner_pid)); |
1015 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | 1017 | pr_err("lock type: %d req lock state : %d\n", |
1016 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); | 1018 | gh2->gh_gl->gl_name.ln_type, gh2->gh_state); |
1017 | printk(KERN_ERR "new: %pSR\n", (void *)gh->gh_ip); | 1019 | pr_err("new: %pSR\n", (void *)gh->gh_ip); |
1018 | printk(KERN_ERR "pid: %d\n", pid_nr(gh->gh_owner_pid)); | 1020 | pr_err("pid: %d\n", pid_nr(gh->gh_owner_pid)); |
1019 | printk(KERN_ERR "lock type: %d req lock state : %d\n", | 1021 | pr_err("lock type: %d req lock state : %d\n", |
1020 | gh->gh_gl->gl_name.ln_type, gh->gh_state); | 1022 | gh->gh_gl->gl_name.ln_type, gh->gh_state); |
1021 | gfs2_dump_glock(NULL, gl); | 1023 | gfs2_dump_glock(NULL, gl); |
1022 | BUG(); | 1024 | BUG(); |
@@ -1045,9 +1047,13 @@ int gfs2_glock_nq(struct gfs2_holder *gh) | |||
1045 | 1047 | ||
1046 | spin_lock(&gl->gl_spin); | 1048 | spin_lock(&gl->gl_spin); |
1047 | add_to_queue(gh); | 1049 | add_to_queue(gh); |
1048 | if ((LM_FLAG_NOEXP & gh->gh_flags) && | 1050 | if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) && |
1049 | test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) | 1051 | test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) { |
1050 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); | 1052 | set_bit(GLF_REPLY_PENDING, &gl->gl_flags); |
1053 | gl->gl_lockref.count++; | ||
1054 | if (queue_delayed_work(glock_workqueue, &gl->gl_work, 0) == 0) | ||
1055 | gl->gl_lockref.count--; | ||
1056 | } | ||
1051 | run_queue(gl, 1); | 1057 | run_queue(gl, 1); |
1052 | spin_unlock(&gl->gl_spin); | 1058 | spin_unlock(&gl->gl_spin); |
1053 | 1059 | ||
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 3bf0631b5d56..54b66809e818 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
@@ -82,6 +82,8 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) | |||
82 | struct gfs2_trans tr; | 82 | struct gfs2_trans tr; |
83 | 83 | ||
84 | memset(&tr, 0, sizeof(tr)); | 84 | memset(&tr, 0, sizeof(tr)); |
85 | INIT_LIST_HEAD(&tr.tr_buf); | ||
86 | INIT_LIST_HEAD(&tr.tr_databuf); | ||
85 | tr.tr_revokes = atomic_read(&gl->gl_ail_count); | 87 | tr.tr_revokes = atomic_read(&gl->gl_ail_count); |
86 | 88 | ||
87 | if (!tr.tr_revokes) | 89 | if (!tr.tr_revokes) |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index cf0e34400f71..bdf70c18610c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
@@ -52,7 +52,7 @@ struct gfs2_log_header_host { | |||
52 | */ | 52 | */ |
53 | 53 | ||
54 | struct gfs2_log_operations { | 54 | struct gfs2_log_operations { |
55 | void (*lo_before_commit) (struct gfs2_sbd *sdp); | 55 | void (*lo_before_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); |
56 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); | 56 | void (*lo_after_commit) (struct gfs2_sbd *sdp, struct gfs2_trans *tr); |
57 | void (*lo_before_scan) (struct gfs2_jdesc *jd, | 57 | void (*lo_before_scan) (struct gfs2_jdesc *jd, |
58 | struct gfs2_log_header_host *head, int pass); | 58 | struct gfs2_log_header_host *head, int pass); |
@@ -371,6 +371,7 @@ enum { | |||
371 | GIF_ALLOC_FAILED = 2, | 371 | GIF_ALLOC_FAILED = 2, |
372 | GIF_SW_PAGED = 3, | 372 | GIF_SW_PAGED = 3, |
373 | GIF_ORDERED = 4, | 373 | GIF_ORDERED = 4, |
374 | GIF_FREE_VFS_INODE = 5, | ||
374 | }; | 375 | }; |
375 | 376 | ||
376 | struct gfs2_inode { | 377 | struct gfs2_inode { |
@@ -462,11 +463,11 @@ struct gfs2_trans { | |||
462 | unsigned int tr_blocks; | 463 | unsigned int tr_blocks; |
463 | unsigned int tr_revokes; | 464 | unsigned int tr_revokes; |
464 | unsigned int tr_reserved; | 465 | unsigned int tr_reserved; |
466 | unsigned int tr_touched:1; | ||
467 | unsigned int tr_attached:1; | ||
465 | 468 | ||
466 | struct gfs2_holder tr_t_gh; | 469 | struct gfs2_holder tr_t_gh; |
467 | 470 | ||
468 | int tr_touched; | ||
469 | int tr_attached; | ||
470 | 471 | ||
471 | unsigned int tr_num_buf_new; | 472 | unsigned int tr_num_buf_new; |
472 | unsigned int tr_num_databuf_new; | 473 | unsigned int tr_num_databuf_new; |
@@ -476,6 +477,8 @@ struct gfs2_trans { | |||
476 | unsigned int tr_num_revoke_rm; | 477 | unsigned int tr_num_revoke_rm; |
477 | 478 | ||
478 | struct list_head tr_list; | 479 | struct list_head tr_list; |
480 | struct list_head tr_databuf; | ||
481 | struct list_head tr_buf; | ||
479 | 482 | ||
480 | unsigned int tr_first; | 483 | unsigned int tr_first; |
481 | struct list_head tr_ail1_list; | 484 | struct list_head tr_ail1_list; |
@@ -483,7 +486,7 @@ struct gfs2_trans { | |||
483 | }; | 486 | }; |
484 | 487 | ||
485 | struct gfs2_journal_extent { | 488 | struct gfs2_journal_extent { |
486 | struct list_head extent_list; | 489 | struct list_head list; |
487 | 490 | ||
488 | unsigned int lblock; /* First logical block */ | 491 | unsigned int lblock; /* First logical block */ |
489 | u64 dblock; /* First disk block */ | 492 | u64 dblock; /* First disk block */ |
@@ -493,6 +496,7 @@ struct gfs2_journal_extent { | |||
493 | struct gfs2_jdesc { | 496 | struct gfs2_jdesc { |
494 | struct list_head jd_list; | 497 | struct list_head jd_list; |
495 | struct list_head extent_list; | 498 | struct list_head extent_list; |
499 | unsigned int nr_extents; | ||
496 | struct work_struct jd_work; | 500 | struct work_struct jd_work; |
497 | struct inode *jd_inode; | 501 | struct inode *jd_inode; |
498 | unsigned long jd_flags; | 502 | unsigned long jd_flags; |
@@ -500,6 +504,15 @@ struct gfs2_jdesc { | |||
500 | unsigned int jd_jid; | 504 | unsigned int jd_jid; |
501 | unsigned int jd_blocks; | 505 | unsigned int jd_blocks; |
502 | int jd_recover_error; | 506 | int jd_recover_error; |
507 | /* Replay stuff */ | ||
508 | |||
509 | unsigned int jd_found_blocks; | ||
510 | unsigned int jd_found_revokes; | ||
511 | unsigned int jd_replayed_blocks; | ||
512 | |||
513 | struct list_head jd_revoke_list; | ||
514 | unsigned int jd_replay_tail; | ||
515 | |||
503 | }; | 516 | }; |
504 | 517 | ||
505 | struct gfs2_statfs_change_host { | 518 | struct gfs2_statfs_change_host { |
@@ -746,19 +759,12 @@ struct gfs2_sbd { | |||
746 | 759 | ||
747 | struct gfs2_trans *sd_log_tr; | 760 | struct gfs2_trans *sd_log_tr; |
748 | unsigned int sd_log_blks_reserved; | 761 | unsigned int sd_log_blks_reserved; |
749 | unsigned int sd_log_commited_buf; | ||
750 | unsigned int sd_log_commited_databuf; | ||
751 | int sd_log_commited_revoke; | 762 | int sd_log_commited_revoke; |
752 | 763 | ||
753 | atomic_t sd_log_pinned; | 764 | atomic_t sd_log_pinned; |
754 | unsigned int sd_log_num_buf; | ||
755 | unsigned int sd_log_num_revoke; | 765 | unsigned int sd_log_num_revoke; |
756 | unsigned int sd_log_num_rg; | ||
757 | unsigned int sd_log_num_databuf; | ||
758 | 766 | ||
759 | struct list_head sd_log_le_buf; | ||
760 | struct list_head sd_log_le_revoke; | 767 | struct list_head sd_log_le_revoke; |
761 | struct list_head sd_log_le_databuf; | ||
762 | struct list_head sd_log_le_ordered; | 768 | struct list_head sd_log_le_ordered; |
763 | spinlock_t sd_ordered_lock; | 769 | spinlock_t sd_ordered_lock; |
764 | 770 | ||
@@ -786,15 +792,6 @@ struct gfs2_sbd { | |||
786 | struct list_head sd_ail1_list; | 792 | struct list_head sd_ail1_list; |
787 | struct list_head sd_ail2_list; | 793 | struct list_head sd_ail2_list; |
788 | 794 | ||
789 | /* Replay stuff */ | ||
790 | |||
791 | struct list_head sd_revoke_list; | ||
792 | unsigned int sd_replay_tail; | ||
793 | |||
794 | unsigned int sd_found_blocks; | ||
795 | unsigned int sd_found_revokes; | ||
796 | unsigned int sd_replayed_blocks; | ||
797 | |||
798 | /* For quiescing the filesystem */ | 795 | /* For quiescing the filesystem */ |
799 | struct gfs2_holder sd_freeze_gh; | 796 | struct gfs2_holder sd_freeze_gh; |
800 | 797 | ||
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 5c524180c98e..28cc7bf6575a 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
@@ -376,12 +376,11 @@ static void munge_mode_uid_gid(const struct gfs2_inode *dip, | |||
376 | inode->i_gid = current_fsgid(); | 376 | inode->i_gid = current_fsgid(); |
377 | } | 377 | } |
378 | 378 | ||
379 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags) | 379 | static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks) |
380 | { | 380 | { |
381 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 381 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
382 | struct gfs2_alloc_parms ap = { .target = RES_DINODE, .aflags = flags, }; | 382 | struct gfs2_alloc_parms ap = { .target = *dblocks, .aflags = flags, }; |
383 | int error; | 383 | int error; |
384 | int dblocks = 1; | ||
385 | 384 | ||
386 | error = gfs2_quota_lock_check(ip); | 385 | error = gfs2_quota_lock_check(ip); |
387 | if (error) | 386 | if (error) |
@@ -391,11 +390,11 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags) | |||
391 | if (error) | 390 | if (error) |
392 | goto out_quota; | 391 | goto out_quota; |
393 | 392 | ||
394 | error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_STATFS + RES_QUOTA, 0); | 393 | error = gfs2_trans_begin(sdp, (*dblocks * RES_RG_BIT) + RES_STATFS + RES_QUOTA, 0); |
395 | if (error) | 394 | if (error) |
396 | goto out_ipreserv; | 395 | goto out_ipreserv; |
397 | 396 | ||
398 | error = gfs2_alloc_blocks(ip, &ip->i_no_addr, &dblocks, 1, &ip->i_generation); | 397 | error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation); |
399 | ip->i_no_formal_ino = ip->i_generation; | 398 | ip->i_no_formal_ino = ip->i_generation; |
400 | ip->i_inode.i_ino = ip->i_no_addr; | 399 | ip->i_inode.i_ino = ip->i_no_addr; |
401 | ip->i_goal = ip->i_no_addr; | 400 | ip->i_goal = ip->i_no_addr; |
@@ -428,6 +427,33 @@ static void gfs2_init_dir(struct buffer_head *dibh, | |||
428 | } | 427 | } |
429 | 428 | ||
430 | /** | 429 | /** |
430 | * gfs2_init_xattr - Initialise an xattr block for a new inode | ||
431 | * @ip: The inode in question | ||
432 | * | ||
433 | * This sets up an empty xattr block for a new inode, ready to | ||
434 | * take any ACLs, LSM xattrs, etc. | ||
435 | */ | ||
436 | |||
437 | static void gfs2_init_xattr(struct gfs2_inode *ip) | ||
438 | { | ||
439 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | ||
440 | struct buffer_head *bh; | ||
441 | struct gfs2_ea_header *ea; | ||
442 | |||
443 | bh = gfs2_meta_new(ip->i_gl, ip->i_eattr); | ||
444 | gfs2_trans_add_meta(ip->i_gl, bh); | ||
445 | gfs2_metatype_set(bh, GFS2_METATYPE_EA, GFS2_FORMAT_EA); | ||
446 | gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); | ||
447 | |||
448 | ea = GFS2_EA_BH2FIRST(bh); | ||
449 | ea->ea_rec_len = cpu_to_be32(sdp->sd_jbsize); | ||
450 | ea->ea_type = GFS2_EATYPE_UNUSED; | ||
451 | ea->ea_flags = GFS2_EAFLAG_LAST; | ||
452 | |||
453 | brelse(bh); | ||
454 | } | ||
455 | |||
456 | /** | ||
431 | * init_dinode - Fill in a new dinode structure | 457 | * init_dinode - Fill in a new dinode structure |
432 | * @dip: The directory this inode is being created in | 458 | * @dip: The directory this inode is being created in |
433 | * @ip: The inode | 459 | * @ip: The inode |
@@ -545,13 +571,6 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array, | |||
545 | return err; | 571 | return err; |
546 | } | 572 | } |
547 | 573 | ||
548 | static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | ||
549 | const struct qstr *qstr) | ||
550 | { | ||
551 | return security_inode_init_security(&ip->i_inode, &dip->i_inode, qstr, | ||
552 | &gfs2_initxattrs, NULL); | ||
553 | } | ||
554 | |||
555 | /** | 574 | /** |
556 | * gfs2_create_inode - Create a new inode | 575 | * gfs2_create_inode - Create a new inode |
557 | * @dir: The parent directory | 576 | * @dir: The parent directory |
@@ -578,8 +597,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
578 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 597 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
579 | struct gfs2_glock *io_gl; | 598 | struct gfs2_glock *io_gl; |
580 | struct dentry *d; | 599 | struct dentry *d; |
581 | int error; | 600 | int error, free_vfs_inode = 0; |
582 | u32 aflags = 0; | 601 | u32 aflags = 0; |
602 | unsigned blocks = 1; | ||
583 | struct gfs2_diradd da = { .bh = NULL, }; | 603 | struct gfs2_diradd da = { .bh = NULL, }; |
584 | 604 | ||
585 | if (!name->len || name->len > GFS2_FNAMESIZE) | 605 | if (!name->len || name->len > GFS2_FNAMESIZE) |
@@ -676,10 +696,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
676 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) | 696 | (dip->i_diskflags & GFS2_DIF_TOPDIR)) |
677 | aflags |= GFS2_AF_ORLOV; | 697 | aflags |= GFS2_AF_ORLOV; |
678 | 698 | ||
679 | error = alloc_dinode(ip, aflags); | 699 | if (default_acl || acl) |
700 | blocks++; | ||
701 | |||
702 | error = alloc_dinode(ip, aflags, &blocks); | ||
680 | if (error) | 703 | if (error) |
681 | goto fail_free_inode; | 704 | goto fail_free_inode; |
682 | 705 | ||
706 | gfs2_set_inode_blocks(inode, blocks); | ||
707 | |||
683 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); | 708 | error = gfs2_glock_get(sdp, ip->i_no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl); |
684 | if (error) | 709 | if (error) |
685 | goto fail_free_inode; | 710 | goto fail_free_inode; |
@@ -689,10 +714,14 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
689 | if (error) | 714 | if (error) |
690 | goto fail_free_inode; | 715 | goto fail_free_inode; |
691 | 716 | ||
692 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | 717 | error = gfs2_trans_begin(sdp, blocks, 0); |
693 | if (error) | 718 | if (error) |
694 | goto fail_gunlock2; | 719 | goto fail_gunlock2; |
695 | 720 | ||
721 | if (blocks > 1) { | ||
722 | ip->i_eattr = ip->i_no_addr + 1; | ||
723 | gfs2_init_xattr(ip); | ||
724 | } | ||
696 | init_dinode(dip, ip, symname); | 725 | init_dinode(dip, ip, symname); |
697 | gfs2_trans_end(sdp); | 726 | gfs2_trans_end(sdp); |
698 | 727 | ||
@@ -722,7 +751,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
722 | if (error) | 751 | if (error) |
723 | goto fail_gunlock3; | 752 | goto fail_gunlock3; |
724 | 753 | ||
725 | error = gfs2_security_init(dip, ip, name); | 754 | error = security_inode_init_security(&ip->i_inode, &dip->i_inode, name, |
755 | &gfs2_initxattrs, NULL); | ||
726 | if (error) | 756 | if (error) |
727 | goto fail_gunlock3; | 757 | goto fail_gunlock3; |
728 | 758 | ||
@@ -758,15 +788,16 @@ fail_free_acls: | |||
758 | if (acl) | 788 | if (acl) |
759 | posix_acl_release(acl); | 789 | posix_acl_release(acl); |
760 | fail_free_vfs_inode: | 790 | fail_free_vfs_inode: |
761 | free_inode_nonrcu(inode); | 791 | free_vfs_inode = 1; |
762 | inode = NULL; | ||
763 | fail_gunlock: | 792 | fail_gunlock: |
764 | gfs2_dir_no_add(&da); | 793 | gfs2_dir_no_add(&da); |
765 | gfs2_glock_dq_uninit(ghs); | 794 | gfs2_glock_dq_uninit(ghs); |
766 | if (inode && !IS_ERR(inode)) { | 795 | if (inode && !IS_ERR(inode)) { |
767 | clear_nlink(inode); | 796 | clear_nlink(inode); |
768 | mark_inode_dirty(inode); | 797 | if (!free_vfs_inode) |
769 | set_bit(GIF_ALLOC_FAILED, &GFS2_I(inode)->i_flags); | 798 | mark_inode_dirty(inode); |
799 | set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED, | ||
800 | &GFS2_I(inode)->i_flags); | ||
770 | iput(inode); | 801 | iput(inode); |
771 | } | 802 | } |
772 | fail: | 803 | fail: |
@@ -1263,6 +1294,10 @@ static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | |||
1263 | } | 1294 | } |
1264 | 1295 | ||
1265 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); | 1296 | tmp = gfs2_lookupi(dir, &gfs2_qdotdot, 1); |
1297 | if (!tmp) { | ||
1298 | error = -ENOENT; | ||
1299 | break; | ||
1300 | } | ||
1266 | if (IS_ERR(tmp)) { | 1301 | if (IS_ERR(tmp)) { |
1267 | error = PTR_ERR(tmp); | 1302 | error = PTR_ERR(tmp); |
1268 | break; | 1303 | break; |
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 2a6ba06bee6f..c1eb555dc588 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
11 | #include <linux/dlm.h> | 13 | #include <linux/dlm.h> |
12 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
@@ -176,7 +178,7 @@ static void gdlm_bast(void *arg, int mode) | |||
176 | gfs2_glock_cb(gl, LM_ST_SHARED); | 178 | gfs2_glock_cb(gl, LM_ST_SHARED); |
177 | break; | 179 | break; |
178 | default: | 180 | default: |
179 | printk(KERN_ERR "unknown bast mode %d", mode); | 181 | pr_err("unknown bast mode %d\n", mode); |
180 | BUG(); | 182 | BUG(); |
181 | } | 183 | } |
182 | } | 184 | } |
@@ -195,7 +197,7 @@ static int make_mode(const unsigned int lmstate) | |||
195 | case LM_ST_SHARED: | 197 | case LM_ST_SHARED: |
196 | return DLM_LOCK_PR; | 198 | return DLM_LOCK_PR; |
197 | } | 199 | } |
198 | printk(KERN_ERR "unknown LM state %d", lmstate); | 200 | pr_err("unknown LM state %d\n", lmstate); |
199 | BUG(); | 201 | BUG(); |
200 | return -1; | 202 | return -1; |
201 | } | 203 | } |
@@ -308,7 +310,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl) | |||
308 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, | 310 | error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, |
309 | NULL, gl); | 311 | NULL, gl); |
310 | if (error) { | 312 | if (error) { |
311 | printk(KERN_ERR "gdlm_unlock %x,%llx err=%d\n", | 313 | pr_err("gdlm_unlock %x,%llx err=%d\n", |
312 | gl->gl_name.ln_type, | 314 | gl->gl_name.ln_type, |
313 | (unsigned long long)gl->gl_name.ln_number, error); | 315 | (unsigned long long)gl->gl_name.ln_number, error); |
314 | return; | 316 | return; |
@@ -1102,7 +1104,7 @@ static void gdlm_recover_slot(void *arg, struct dlm_slot *slot) | |||
1102 | } | 1104 | } |
1103 | 1105 | ||
1104 | if (ls->ls_recover_submit[jid]) { | 1106 | if (ls->ls_recover_submit[jid]) { |
1105 | fs_info(sdp, "recover_slot jid %d gen %u prev %u", | 1107 | fs_info(sdp, "recover_slot jid %d gen %u prev %u\n", |
1106 | jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); | 1108 | jid, ls->ls_recover_block, ls->ls_recover_submit[jid]); |
1107 | } | 1109 | } |
1108 | ls->ls_recover_submit[jid] = ls->ls_recover_block; | 1110 | ls->ls_recover_submit[jid] = ls->ls_recover_block; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 9dcb9777a5f8..4a14d504ef83 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
@@ -18,6 +18,7 @@ | |||
18 | #include <linux/kthread.h> | 18 | #include <linux/kthread.h> |
19 | #include <linux/freezer.h> | 19 | #include <linux/freezer.h> |
20 | #include <linux/bio.h> | 20 | #include <linux/bio.h> |
21 | #include <linux/blkdev.h> | ||
21 | #include <linux/writeback.h> | 22 | #include <linux/writeback.h> |
22 | #include <linux/list_sort.h> | 23 | #include <linux/list_sort.h> |
23 | 24 | ||
@@ -145,8 +146,10 @@ void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) | |||
145 | { | 146 | { |
146 | struct list_head *head = &sdp->sd_ail1_list; | 147 | struct list_head *head = &sdp->sd_ail1_list; |
147 | struct gfs2_trans *tr; | 148 | struct gfs2_trans *tr; |
149 | struct blk_plug plug; | ||
148 | 150 | ||
149 | trace_gfs2_ail_flush(sdp, wbc, 1); | 151 | trace_gfs2_ail_flush(sdp, wbc, 1); |
152 | blk_start_plug(&plug); | ||
150 | spin_lock(&sdp->sd_ail_lock); | 153 | spin_lock(&sdp->sd_ail_lock); |
151 | restart: | 154 | restart: |
152 | list_for_each_entry_reverse(tr, head, tr_list) { | 155 | list_for_each_entry_reverse(tr, head, tr_list) { |
@@ -156,6 +159,7 @@ restart: | |||
156 | goto restart; | 159 | goto restart; |
157 | } | 160 | } |
158 | spin_unlock(&sdp->sd_ail_lock); | 161 | spin_unlock(&sdp->sd_ail_lock); |
162 | blk_finish_plug(&plug); | ||
159 | trace_gfs2_ail_flush(sdp, wbc, 0); | 163 | trace_gfs2_ail_flush(sdp, wbc, 0); |
160 | } | 164 | } |
161 | 165 | ||
@@ -410,24 +414,22 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer | |||
410 | static unsigned int calc_reserved(struct gfs2_sbd *sdp) | 414 | static unsigned int calc_reserved(struct gfs2_sbd *sdp) |
411 | { | 415 | { |
412 | unsigned int reserved = 0; | 416 | unsigned int reserved = 0; |
413 | unsigned int mbuf_limit, metabufhdrs_needed; | 417 | unsigned int mbuf; |
414 | unsigned int dbuf_limit, databufhdrs_needed; | 418 | unsigned int dbuf; |
415 | unsigned int revokes = 0; | 419 | struct gfs2_trans *tr = sdp->sd_log_tr; |
416 | 420 | ||
417 | mbuf_limit = buf_limit(sdp); | 421 | if (tr) { |
418 | metabufhdrs_needed = (sdp->sd_log_commited_buf + | 422 | mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; |
419 | (mbuf_limit - 1)) / mbuf_limit; | 423 | dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; |
420 | dbuf_limit = databuf_limit(sdp); | 424 | reserved = mbuf + dbuf; |
421 | databufhdrs_needed = (sdp->sd_log_commited_databuf + | 425 | /* Account for header blocks */ |
422 | (dbuf_limit - 1)) / dbuf_limit; | 426 | reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); |
427 | reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); | ||
428 | } | ||
423 | 429 | ||
424 | if (sdp->sd_log_commited_revoke > 0) | 430 | if (sdp->sd_log_commited_revoke > 0) |
425 | revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, | 431 | reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, |
426 | sizeof(u64)); | 432 | sizeof(u64)); |
427 | |||
428 | reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + | ||
429 | sdp->sd_log_commited_databuf + databufhdrs_needed + | ||
430 | revokes; | ||
431 | /* One for the overall header */ | 433 | /* One for the overall header */ |
432 | if (reserved) | 434 | if (reserved) |
433 | reserved++; | 435 | reserved++; |
@@ -682,36 +684,25 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
682 | } | 684 | } |
683 | trace_gfs2_log_flush(sdp, 1); | 685 | trace_gfs2_log_flush(sdp, 1); |
684 | 686 | ||
687 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
688 | sdp->sd_log_flush_wrapped = 0; | ||
685 | tr = sdp->sd_log_tr; | 689 | tr = sdp->sd_log_tr; |
686 | if (tr) { | 690 | if (tr) { |
687 | sdp->sd_log_tr = NULL; | 691 | sdp->sd_log_tr = NULL; |
688 | INIT_LIST_HEAD(&tr->tr_ail1_list); | 692 | INIT_LIST_HEAD(&tr->tr_ail1_list); |
689 | INIT_LIST_HEAD(&tr->tr_ail2_list); | 693 | INIT_LIST_HEAD(&tr->tr_ail2_list); |
694 | tr->tr_first = sdp->sd_log_flush_head; | ||
690 | } | 695 | } |
691 | 696 | ||
692 | if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { | ||
693 | printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, | ||
694 | sdp->sd_log_commited_buf); | ||
695 | gfs2_assert_withdraw(sdp, 0); | ||
696 | } | ||
697 | if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { | ||
698 | printk(KERN_INFO "GFS2: log databuf %u %u\n", | ||
699 | sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); | ||
700 | gfs2_assert_withdraw(sdp, 0); | ||
701 | } | ||
702 | gfs2_assert_withdraw(sdp, | 697 | gfs2_assert_withdraw(sdp, |
703 | sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); | 698 | sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); |
704 | 699 | ||
705 | sdp->sd_log_flush_head = sdp->sd_log_head; | ||
706 | sdp->sd_log_flush_wrapped = 0; | ||
707 | if (tr) | ||
708 | tr->tr_first = sdp->sd_log_flush_head; | ||
709 | |||
710 | gfs2_ordered_write(sdp); | 700 | gfs2_ordered_write(sdp); |
711 | lops_before_commit(sdp); | 701 | lops_before_commit(sdp, tr); |
712 | gfs2_log_flush_bio(sdp, WRITE); | 702 | gfs2_log_flush_bio(sdp, WRITE); |
713 | 703 | ||
714 | if (sdp->sd_log_head != sdp->sd_log_flush_head) { | 704 | if (sdp->sd_log_head != sdp->sd_log_flush_head) { |
705 | log_flush_wait(sdp); | ||
715 | log_write_header(sdp, 0); | 706 | log_write_header(sdp, 0); |
716 | } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ | 707 | } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ |
717 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ | 708 | atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ |
@@ -723,8 +714,6 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
723 | gfs2_log_lock(sdp); | 714 | gfs2_log_lock(sdp); |
724 | sdp->sd_log_head = sdp->sd_log_flush_head; | 715 | sdp->sd_log_head = sdp->sd_log_flush_head; |
725 | sdp->sd_log_blks_reserved = 0; | 716 | sdp->sd_log_blks_reserved = 0; |
726 | sdp->sd_log_commited_buf = 0; | ||
727 | sdp->sd_log_commited_databuf = 0; | ||
728 | sdp->sd_log_commited_revoke = 0; | 717 | sdp->sd_log_commited_revoke = 0; |
729 | 718 | ||
730 | spin_lock(&sdp->sd_ail_lock); | 719 | spin_lock(&sdp->sd_ail_lock); |
@@ -740,34 +729,54 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) | |||
740 | kfree(tr); | 729 | kfree(tr); |
741 | } | 730 | } |
742 | 731 | ||
732 | /** | ||
733 | * gfs2_merge_trans - Merge a new transaction into a cached transaction | ||
734 | * @old: Original transaction to be expanded | ||
735 | * @new: New transaction to be merged | ||
736 | */ | ||
737 | |||
738 | static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) | ||
739 | { | ||
740 | WARN_ON_ONCE(old->tr_attached != 1); | ||
741 | |||
742 | old->tr_num_buf_new += new->tr_num_buf_new; | ||
743 | old->tr_num_databuf_new += new->tr_num_databuf_new; | ||
744 | old->tr_num_buf_rm += new->tr_num_buf_rm; | ||
745 | old->tr_num_databuf_rm += new->tr_num_databuf_rm; | ||
746 | old->tr_num_revoke += new->tr_num_revoke; | ||
747 | old->tr_num_revoke_rm += new->tr_num_revoke_rm; | ||
748 | |||
749 | list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); | ||
750 | list_splice_tail_init(&new->tr_buf, &old->tr_buf); | ||
751 | } | ||
752 | |||
743 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 753 | static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
744 | { | 754 | { |
745 | unsigned int reserved; | 755 | unsigned int reserved; |
746 | unsigned int unused; | 756 | unsigned int unused; |
757 | unsigned int maxres; | ||
747 | 758 | ||
748 | gfs2_log_lock(sdp); | 759 | gfs2_log_lock(sdp); |
749 | 760 | ||
750 | sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; | 761 | if (sdp->sd_log_tr) { |
751 | sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - | 762 | gfs2_merge_trans(sdp->sd_log_tr, tr); |
752 | tr->tr_num_databuf_rm; | 763 | } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { |
753 | gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || | 764 | gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); |
754 | (((int)sdp->sd_log_commited_databuf) >= 0)); | 765 | sdp->sd_log_tr = tr; |
766 | tr->tr_attached = 1; | ||
767 | } | ||
768 | |||
755 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; | 769 | sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; |
756 | reserved = calc_reserved(sdp); | 770 | reserved = calc_reserved(sdp); |
757 | gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); | 771 | maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; |
758 | unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; | 772 | gfs2_assert_withdraw(sdp, maxres >= reserved); |
773 | unused = maxres - reserved; | ||
759 | atomic_add(unused, &sdp->sd_log_blks_free); | 774 | atomic_add(unused, &sdp->sd_log_blks_free); |
760 | trace_gfs2_log_blocks(sdp, unused); | 775 | trace_gfs2_log_blocks(sdp, unused); |
761 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= | 776 | gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= |
762 | sdp->sd_jdesc->jd_blocks); | 777 | sdp->sd_jdesc->jd_blocks); |
763 | sdp->sd_log_blks_reserved = reserved; | 778 | sdp->sd_log_blks_reserved = reserved; |
764 | 779 | ||
765 | if (sdp->sd_log_tr == NULL && | ||
766 | (tr->tr_num_buf_new || tr->tr_num_databuf_new)) { | ||
767 | gfs2_assert_withdraw(sdp, tr->tr_t_gh.gh_gl); | ||
768 | sdp->sd_log_tr = tr; | ||
769 | tr->tr_attached = 1; | ||
770 | } | ||
771 | gfs2_log_unlock(sdp); | 780 | gfs2_log_unlock(sdp); |
772 | } | 781 | } |
773 | 782 | ||
@@ -807,10 +816,7 @@ void gfs2_log_shutdown(struct gfs2_sbd *sdp) | |||
807 | down_write(&sdp->sd_log_flush_lock); | 816 | down_write(&sdp->sd_log_flush_lock); |
808 | 817 | ||
809 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); | 818 | gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); |
810 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf); | ||
811 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); | 819 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); |
812 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); | ||
813 | gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf); | ||
814 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); | 820 | gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); |
815 | 821 | ||
816 | sdp->sd_log_flush_head = sdp->sd_log_head; | 822 | sdp->sd_log_flush_head = sdp->sd_log_head; |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 76693793cedd..a294d8d8bcd4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
@@ -146,8 +146,8 @@ static u64 gfs2_log_bmap(struct gfs2_sbd *sdp) | |||
146 | struct gfs2_journal_extent *je; | 146 | struct gfs2_journal_extent *je; |
147 | u64 block; | 147 | u64 block; |
148 | 148 | ||
149 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { | 149 | list_for_each_entry(je, &sdp->sd_jdesc->extent_list, list) { |
150 | if (lbn >= je->lblock && lbn < je->lblock + je->blocks) { | 150 | if ((lbn >= je->lblock) && (lbn < (je->lblock + je->blocks))) { |
151 | block = je->dblock + lbn - je->lblock; | 151 | block = je->dblock + lbn - je->lblock; |
152 | gfs2_log_incr_head(sdp); | 152 | gfs2_log_incr_head(sdp); |
153 | return block; | 153 | return block; |
@@ -491,44 +491,40 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | |||
491 | gfs2_log_unlock(sdp); | 491 | gfs2_log_unlock(sdp); |
492 | } | 492 | } |
493 | 493 | ||
494 | static void buf_lo_before_commit(struct gfs2_sbd *sdp) | 494 | static void buf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
495 | { | 495 | { |
496 | unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ | 496 | unsigned int limit = buf_limit(sdp); /* 503 for 4k blocks */ |
497 | 497 | unsigned int nbuf; | |
498 | gfs2_before_commit(sdp, limit, sdp->sd_log_num_buf, | 498 | if (tr == NULL) |
499 | &sdp->sd_log_le_buf, 0); | 499 | return; |
500 | nbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; | ||
501 | gfs2_before_commit(sdp, limit, nbuf, &tr->tr_buf, 0); | ||
500 | } | 502 | } |
501 | 503 | ||
502 | static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 504 | static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
503 | { | 505 | { |
504 | struct list_head *head = &sdp->sd_log_le_buf; | 506 | struct list_head *head; |
505 | struct gfs2_bufdata *bd; | 507 | struct gfs2_bufdata *bd; |
506 | 508 | ||
507 | if (tr == NULL) { | 509 | if (tr == NULL) |
508 | gfs2_assert(sdp, list_empty(head)); | ||
509 | return; | 510 | return; |
510 | } | ||
511 | 511 | ||
512 | head = &tr->tr_buf; | ||
512 | while (!list_empty(head)) { | 513 | while (!list_empty(head)) { |
513 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); | 514 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); |
514 | list_del_init(&bd->bd_list); | 515 | list_del_init(&bd->bd_list); |
515 | sdp->sd_log_num_buf--; | ||
516 | |||
517 | gfs2_unpin(sdp, bd->bd_bh, tr); | 516 | gfs2_unpin(sdp, bd->bd_bh, tr); |
518 | } | 517 | } |
519 | gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); | ||
520 | } | 518 | } |
521 | 519 | ||
522 | static void buf_lo_before_scan(struct gfs2_jdesc *jd, | 520 | static void buf_lo_before_scan(struct gfs2_jdesc *jd, |
523 | struct gfs2_log_header_host *head, int pass) | 521 | struct gfs2_log_header_host *head, int pass) |
524 | { | 522 | { |
525 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
526 | |||
527 | if (pass != 0) | 523 | if (pass != 0) |
528 | return; | 524 | return; |
529 | 525 | ||
530 | sdp->sd_found_blocks = 0; | 526 | jd->jd_found_blocks = 0; |
531 | sdp->sd_replayed_blocks = 0; | 527 | jd->jd_replayed_blocks = 0; |
532 | } | 528 | } |
533 | 529 | ||
534 | static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 530 | static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
@@ -551,9 +547,9 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
551 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { | 547 | for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { |
552 | blkno = be64_to_cpu(*ptr++); | 548 | blkno = be64_to_cpu(*ptr++); |
553 | 549 | ||
554 | sdp->sd_found_blocks++; | 550 | jd->jd_found_blocks++; |
555 | 551 | ||
556 | if (gfs2_revoke_check(sdp, blkno, start)) | 552 | if (gfs2_revoke_check(jd, blkno, start)) |
557 | continue; | 553 | continue; |
558 | 554 | ||
559 | error = gfs2_replay_read_block(jd, start, &bh_log); | 555 | error = gfs2_replay_read_block(jd, start, &bh_log); |
@@ -574,7 +570,7 @@ static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
574 | if (error) | 570 | if (error) |
575 | break; | 571 | break; |
576 | 572 | ||
577 | sdp->sd_replayed_blocks++; | 573 | jd->jd_replayed_blocks++; |
578 | } | 574 | } |
579 | 575 | ||
580 | return error; | 576 | return error; |
@@ -617,10 +613,10 @@ static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
617 | gfs2_meta_sync(ip->i_gl); | 613 | gfs2_meta_sync(ip->i_gl); |
618 | 614 | ||
619 | fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", | 615 | fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", |
620 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | 616 | jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); |
621 | } | 617 | } |
622 | 618 | ||
623 | static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | 619 | static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
624 | { | 620 | { |
625 | struct gfs2_meta_header *mh; | 621 | struct gfs2_meta_header *mh; |
626 | unsigned int offset; | 622 | unsigned int offset; |
@@ -679,13 +675,11 @@ static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
679 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, | 675 | static void revoke_lo_before_scan(struct gfs2_jdesc *jd, |
680 | struct gfs2_log_header_host *head, int pass) | 676 | struct gfs2_log_header_host *head, int pass) |
681 | { | 677 | { |
682 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | ||
683 | |||
684 | if (pass != 0) | 678 | if (pass != 0) |
685 | return; | 679 | return; |
686 | 680 | ||
687 | sdp->sd_found_revokes = 0; | 681 | jd->jd_found_revokes = 0; |
688 | sdp->sd_replay_tail = head->lh_tail; | 682 | jd->jd_replay_tail = head->lh_tail; |
689 | } | 683 | } |
690 | 684 | ||
691 | static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 685 | static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
@@ -717,13 +711,13 @@ static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
717 | while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { | 711 | while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { |
718 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); | 712 | blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); |
719 | 713 | ||
720 | error = gfs2_revoke_add(sdp, blkno, start); | 714 | error = gfs2_revoke_add(jd, blkno, start); |
721 | if (error < 0) { | 715 | if (error < 0) { |
722 | brelse(bh); | 716 | brelse(bh); |
723 | return error; | 717 | return error; |
724 | } | 718 | } |
725 | else if (error) | 719 | else if (error) |
726 | sdp->sd_found_revokes++; | 720 | jd->jd_found_revokes++; |
727 | 721 | ||
728 | if (!--revokes) | 722 | if (!--revokes) |
729 | break; | 723 | break; |
@@ -743,16 +737,16 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
743 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); | 737 | struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); |
744 | 738 | ||
745 | if (error) { | 739 | if (error) { |
746 | gfs2_revoke_clean(sdp); | 740 | gfs2_revoke_clean(jd); |
747 | return; | 741 | return; |
748 | } | 742 | } |
749 | if (pass != 1) | 743 | if (pass != 1) |
750 | return; | 744 | return; |
751 | 745 | ||
752 | fs_info(sdp, "jid=%u: Found %u revoke tags\n", | 746 | fs_info(sdp, "jid=%u: Found %u revoke tags\n", |
753 | jd->jd_jid, sdp->sd_found_revokes); | 747 | jd->jd_jid, jd->jd_found_revokes); |
754 | 748 | ||
755 | gfs2_revoke_clean(sdp); | 749 | gfs2_revoke_clean(jd); |
756 | } | 750 | } |
757 | 751 | ||
758 | /** | 752 | /** |
@@ -760,12 +754,14 @@ static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
760 | * | 754 | * |
761 | */ | 755 | */ |
762 | 756 | ||
763 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp) | 757 | static void databuf_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
764 | { | 758 | { |
765 | unsigned int limit = buf_limit(sdp) / 2; | 759 | unsigned int limit = databuf_limit(sdp); |
766 | 760 | unsigned int nbuf; | |
767 | gfs2_before_commit(sdp, limit, sdp->sd_log_num_databuf, | 761 | if (tr == NULL) |
768 | &sdp->sd_log_le_databuf, 1); | 762 | return; |
763 | nbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; | ||
764 | gfs2_before_commit(sdp, limit, nbuf, &tr->tr_databuf, 1); | ||
769 | } | 765 | } |
770 | 766 | ||
771 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | 767 | static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, |
@@ -789,9 +785,9 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
789 | blkno = be64_to_cpu(*ptr++); | 785 | blkno = be64_to_cpu(*ptr++); |
790 | esc = be64_to_cpu(*ptr++); | 786 | esc = be64_to_cpu(*ptr++); |
791 | 787 | ||
792 | sdp->sd_found_blocks++; | 788 | jd->jd_found_blocks++; |
793 | 789 | ||
794 | if (gfs2_revoke_check(sdp, blkno, start)) | 790 | if (gfs2_revoke_check(jd, blkno, start)) |
795 | continue; | 791 | continue; |
796 | 792 | ||
797 | error = gfs2_replay_read_block(jd, start, &bh_log); | 793 | error = gfs2_replay_read_block(jd, start, &bh_log); |
@@ -811,7 +807,7 @@ static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, | |||
811 | brelse(bh_log); | 807 | brelse(bh_log); |
812 | brelse(bh_ip); | 808 | brelse(bh_ip); |
813 | 809 | ||
814 | sdp->sd_replayed_blocks++; | 810 | jd->jd_replayed_blocks++; |
815 | } | 811 | } |
816 | 812 | ||
817 | return error; | 813 | return error; |
@@ -835,26 +831,23 @@ static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) | |||
835 | gfs2_meta_sync(ip->i_gl); | 831 | gfs2_meta_sync(ip->i_gl); |
836 | 832 | ||
837 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", | 833 | fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", |
838 | jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); | 834 | jd->jd_jid, jd->jd_replayed_blocks, jd->jd_found_blocks); |
839 | } | 835 | } |
840 | 836 | ||
841 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | 837 | static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) |
842 | { | 838 | { |
843 | struct list_head *head = &sdp->sd_log_le_databuf; | 839 | struct list_head *head; |
844 | struct gfs2_bufdata *bd; | 840 | struct gfs2_bufdata *bd; |
845 | 841 | ||
846 | if (tr == NULL) { | 842 | if (tr == NULL) |
847 | gfs2_assert(sdp, list_empty(head)); | ||
848 | return; | 843 | return; |
849 | } | ||
850 | 844 | ||
845 | head = &tr->tr_databuf; | ||
851 | while (!list_empty(head)) { | 846 | while (!list_empty(head)) { |
852 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); | 847 | bd = list_entry(head->next, struct gfs2_bufdata, bd_list); |
853 | list_del_init(&bd->bd_list); | 848 | list_del_init(&bd->bd_list); |
854 | sdp->sd_log_num_databuf--; | ||
855 | gfs2_unpin(sdp, bd->bd_bh, tr); | 849 | gfs2_unpin(sdp, bd->bd_bh, tr); |
856 | } | 850 | } |
857 | gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); | ||
858 | } | 851 | } |
859 | 852 | ||
860 | 853 | ||
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 9ca2e6438419..a65a7ba32ffd 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
@@ -46,12 +46,13 @@ static inline unsigned int databuf_limit(struct gfs2_sbd *sdp) | |||
46 | return limit; | 46 | return limit; |
47 | } | 47 | } |
48 | 48 | ||
49 | static inline void lops_before_commit(struct gfs2_sbd *sdp) | 49 | static inline void lops_before_commit(struct gfs2_sbd *sdp, |
50 | struct gfs2_trans *tr) | ||
50 | { | 51 | { |
51 | int x; | 52 | int x; |
52 | for (x = 0; gfs2_log_ops[x]; x++) | 53 | for (x = 0; gfs2_log_ops[x]; x++) |
53 | if (gfs2_log_ops[x]->lo_before_commit) | 54 | if (gfs2_log_ops[x]->lo_before_commit) |
54 | gfs2_log_ops[x]->lo_before_commit(sdp); | 55 | gfs2_log_ops[x]->lo_before_commit(sdp, tr); |
55 | } | 56 | } |
56 | 57 | ||
57 | static inline void lops_after_commit(struct gfs2_sbd *sdp, | 58 | static inline void lops_after_commit(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index c272e73063de..82b6ac829656 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
@@ -165,7 +167,7 @@ static int __init init_gfs2_fs(void) | |||
165 | 167 | ||
166 | gfs2_register_debugfs(); | 168 | gfs2_register_debugfs(); |
167 | 169 | ||
168 | printk("GFS2 installed\n"); | 170 | pr_info("GFS2 installed\n"); |
169 | 171 | ||
170 | return 0; | 172 | return 0; |
171 | 173 | ||
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index c7f24690ed05..2cf09b63a6b4 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
@@ -97,6 +97,11 @@ const struct address_space_operations gfs2_meta_aops = { | |||
97 | .releasepage = gfs2_releasepage, | 97 | .releasepage = gfs2_releasepage, |
98 | }; | 98 | }; |
99 | 99 | ||
100 | const struct address_space_operations gfs2_rgrp_aops = { | ||
101 | .writepage = gfs2_aspace_writepage, | ||
102 | .releasepage = gfs2_releasepage, | ||
103 | }; | ||
104 | |||
100 | /** | 105 | /** |
101 | * gfs2_getbuf - Get a buffer with a given address space | 106 | * gfs2_getbuf - Get a buffer with a given address space |
102 | * @gl: the glock | 107 | * @gl: the glock |
@@ -267,15 +272,10 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
267 | trace_gfs2_pin(bd, 0); | 272 | trace_gfs2_pin(bd, 0); |
268 | atomic_dec(&sdp->sd_log_pinned); | 273 | atomic_dec(&sdp->sd_log_pinned); |
269 | list_del_init(&bd->bd_list); | 274 | list_del_init(&bd->bd_list); |
270 | if (meta) { | 275 | if (meta) |
271 | gfs2_assert_warn(sdp, sdp->sd_log_num_buf); | ||
272 | sdp->sd_log_num_buf--; | ||
273 | tr->tr_num_buf_rm++; | 276 | tr->tr_num_buf_rm++; |
274 | } else { | 277 | else |
275 | gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); | ||
276 | sdp->sd_log_num_databuf--; | ||
277 | tr->tr_num_databuf_rm++; | 278 | tr->tr_num_databuf_rm++; |
278 | } | ||
279 | tr->tr_touched = 1; | 279 | tr->tr_touched = 1; |
280 | was_pinned = 1; | 280 | was_pinned = 1; |
281 | brelse(bh); | 281 | brelse(bh); |
diff --git a/fs/gfs2/meta_io.h b/fs/gfs2/meta_io.h index 4823b934208a..ac5d8027d335 100644 --- a/fs/gfs2/meta_io.h +++ b/fs/gfs2/meta_io.h | |||
@@ -38,12 +38,15 @@ static inline void gfs2_buffer_copy_tail(struct buffer_head *to_bh, | |||
38 | } | 38 | } |
39 | 39 | ||
40 | extern const struct address_space_operations gfs2_meta_aops; | 40 | extern const struct address_space_operations gfs2_meta_aops; |
41 | extern const struct address_space_operations gfs2_rgrp_aops; | ||
41 | 42 | ||
42 | static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) | 43 | static inline struct gfs2_sbd *gfs2_mapping2sbd(struct address_space *mapping) |
43 | { | 44 | { |
44 | struct inode *inode = mapping->host; | 45 | struct inode *inode = mapping->host; |
45 | if (mapping->a_ops == &gfs2_meta_aops) | 46 | if (mapping->a_ops == &gfs2_meta_aops) |
46 | return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; | 47 | return (((struct gfs2_glock *)mapping) - 1)->gl_sbd; |
48 | else if (mapping->a_ops == &gfs2_rgrp_aops) | ||
49 | return container_of(mapping, struct gfs2_sbd, sd_aspace); | ||
47 | else | 50 | else |
48 | return inode->i_sb->s_fs_info; | 51 | return inode->i_sb->s_fs_info; |
49 | } | 52 | } |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index c6872d09561a..22f954051bb8 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
@@ -104,7 +106,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
104 | mapping = &sdp->sd_aspace; | 106 | mapping = &sdp->sd_aspace; |
105 | 107 | ||
106 | address_space_init_once(mapping); | 108 | address_space_init_once(mapping); |
107 | mapping->a_ops = &gfs2_meta_aops; | 109 | mapping->a_ops = &gfs2_rgrp_aops; |
108 | mapping->host = sb->s_bdev->bd_inode; | 110 | mapping->host = sb->s_bdev->bd_inode; |
109 | mapping->flags = 0; | 111 | mapping->flags = 0; |
110 | mapping_set_gfp_mask(mapping, GFP_NOFS); | 112 | mapping_set_gfp_mask(mapping, GFP_NOFS); |
@@ -114,9 +116,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
114 | 116 | ||
115 | spin_lock_init(&sdp->sd_log_lock); | 117 | spin_lock_init(&sdp->sd_log_lock); |
116 | atomic_set(&sdp->sd_log_pinned, 0); | 118 | atomic_set(&sdp->sd_log_pinned, 0); |
117 | INIT_LIST_HEAD(&sdp->sd_log_le_buf); | ||
118 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); | 119 | INIT_LIST_HEAD(&sdp->sd_log_le_revoke); |
119 | INIT_LIST_HEAD(&sdp->sd_log_le_databuf); | ||
120 | INIT_LIST_HEAD(&sdp->sd_log_le_ordered); | 120 | INIT_LIST_HEAD(&sdp->sd_log_le_ordered); |
121 | spin_lock_init(&sdp->sd_ordered_lock); | 121 | spin_lock_init(&sdp->sd_ordered_lock); |
122 | 122 | ||
@@ -130,8 +130,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
130 | atomic_set(&sdp->sd_log_in_flight, 0); | 130 | atomic_set(&sdp->sd_log_in_flight, 0); |
131 | init_waitqueue_head(&sdp->sd_log_flush_wait); | 131 | init_waitqueue_head(&sdp->sd_log_flush_wait); |
132 | 132 | ||
133 | INIT_LIST_HEAD(&sdp->sd_revoke_list); | ||
134 | |||
135 | return sdp; | 133 | return sdp; |
136 | } | 134 | } |
137 | 135 | ||
@@ -154,7 +152,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent) | |||
154 | if (sb->sb_magic != GFS2_MAGIC || | 152 | if (sb->sb_magic != GFS2_MAGIC || |
155 | sb->sb_type != GFS2_METATYPE_SB) { | 153 | sb->sb_type != GFS2_METATYPE_SB) { |
156 | if (!silent) | 154 | if (!silent) |
157 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | 155 | pr_warn("not a GFS2 filesystem\n"); |
158 | return -EINVAL; | 156 | return -EINVAL; |
159 | } | 157 | } |
160 | 158 | ||
@@ -176,7 +174,7 @@ static void end_bio_io_page(struct bio *bio, int error) | |||
176 | if (!error) | 174 | if (!error) |
177 | SetPageUptodate(page); | 175 | SetPageUptodate(page); |
178 | else | 176 | else |
179 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | 177 | pr_warn("error %d reading superblock\n", error); |
180 | unlock_page(page); | 178 | unlock_page(page); |
181 | } | 179 | } |
182 | 180 | ||
@@ -519,67 +517,6 @@ out: | |||
519 | return ret; | 517 | return ret; |
520 | } | 518 | } |
521 | 519 | ||
522 | /** | ||
523 | * map_journal_extents - create a reusable "extent" mapping from all logical | ||
524 | * blocks to all physical blocks for the given journal. This will save | ||
525 | * us time when writing journal blocks. Most journals will have only one | ||
526 | * extent that maps all their logical blocks. That's because gfs2.mkfs | ||
527 | * arranges the journal blocks sequentially to maximize performance. | ||
528 | * So the extent would map the first block for the entire file length. | ||
529 | * However, gfs2_jadd can happen while file activity is happening, so | ||
530 | * those journals may not be sequential. Less likely is the case where | ||
531 | * the users created their own journals by mounting the metafs and | ||
532 | * laying it out. But it's still possible. These journals might have | ||
533 | * several extents. | ||
534 | * | ||
535 | * TODO: This should be done in bigger chunks rather than one block at a time, | ||
536 | * but since it's only done at mount time, I'm not worried about the | ||
537 | * time it takes. | ||
538 | */ | ||
539 | static int map_journal_extents(struct gfs2_sbd *sdp) | ||
540 | { | ||
541 | struct gfs2_jdesc *jd = sdp->sd_jdesc; | ||
542 | unsigned int lb; | ||
543 | u64 db, prev_db; /* logical block, disk block, prev disk block */ | ||
544 | struct gfs2_inode *ip = GFS2_I(jd->jd_inode); | ||
545 | struct gfs2_journal_extent *jext = NULL; | ||
546 | struct buffer_head bh; | ||
547 | int rc = 0; | ||
548 | |||
549 | prev_db = 0; | ||
550 | |||
551 | for (lb = 0; lb < i_size_read(jd->jd_inode) >> sdp->sd_sb.sb_bsize_shift; lb++) { | ||
552 | bh.b_state = 0; | ||
553 | bh.b_blocknr = 0; | ||
554 | bh.b_size = 1 << ip->i_inode.i_blkbits; | ||
555 | rc = gfs2_block_map(jd->jd_inode, lb, &bh, 0); | ||
556 | db = bh.b_blocknr; | ||
557 | if (rc || !db) { | ||
558 | printk(KERN_INFO "GFS2 journal mapping error %d: lb=" | ||
559 | "%u db=%llu\n", rc, lb, (unsigned long long)db); | ||
560 | break; | ||
561 | } | ||
562 | if (!prev_db || db != prev_db + 1) { | ||
563 | jext = kzalloc(sizeof(struct gfs2_journal_extent), | ||
564 | GFP_KERNEL); | ||
565 | if (!jext) { | ||
566 | printk(KERN_INFO "GFS2 error: out of memory " | ||
567 | "mapping journal extents.\n"); | ||
568 | rc = -ENOMEM; | ||
569 | break; | ||
570 | } | ||
571 | jext->dblock = db; | ||
572 | jext->lblock = lb; | ||
573 | jext->blocks = 1; | ||
574 | list_add_tail(&jext->extent_list, &jd->extent_list); | ||
575 | } else { | ||
576 | jext->blocks++; | ||
577 | } | ||
578 | prev_db = db; | ||
579 | } | ||
580 | return rc; | ||
581 | } | ||
582 | |||
583 | static void gfs2_others_may_mount(struct gfs2_sbd *sdp) | 520 | static void gfs2_others_may_mount(struct gfs2_sbd *sdp) |
584 | { | 521 | { |
585 | char *message = "FIRSTMOUNT=Done"; | 522 | char *message = "FIRSTMOUNT=Done"; |
@@ -638,6 +575,8 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) | |||
638 | break; | 575 | break; |
639 | 576 | ||
640 | INIT_LIST_HEAD(&jd->extent_list); | 577 | INIT_LIST_HEAD(&jd->extent_list); |
578 | INIT_LIST_HEAD(&jd->jd_revoke_list); | ||
579 | |||
641 | INIT_WORK(&jd->jd_work, gfs2_recover_func); | 580 | INIT_WORK(&jd->jd_work, gfs2_recover_func); |
642 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); | 581 | jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); |
643 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { | 582 | if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { |
@@ -781,7 +720,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
781 | atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); | 720 | atomic_set(&sdp->sd_log_thresh2, 4*sdp->sd_jdesc->jd_blocks/5); |
782 | 721 | ||
783 | /* Map the extents for this journal's blocks */ | 722 | /* Map the extents for this journal's blocks */ |
784 | map_journal_extents(sdp); | 723 | gfs2_map_journal_extents(sdp, sdp->sd_jdesc); |
785 | } | 724 | } |
786 | trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); | 725 | trace_gfs2_log_blocks(sdp, atomic_read(&sdp->sd_log_blks_free)); |
787 | 726 | ||
@@ -1008,7 +947,7 @@ static int gfs2_lm_mount(struct gfs2_sbd *sdp, int silent) | |||
1008 | lm = &gfs2_dlm_ops; | 947 | lm = &gfs2_dlm_ops; |
1009 | #endif | 948 | #endif |
1010 | } else { | 949 | } else { |
1011 | printk(KERN_INFO "GFS2: can't find protocol %s\n", proto); | 950 | pr_info("can't find protocol %s\n", proto); |
1012 | return -ENOENT; | 951 | return -ENOENT; |
1013 | } | 952 | } |
1014 | 953 | ||
@@ -1115,7 +1054,7 @@ static int fill_super(struct super_block *sb, struct gfs2_args *args, int silent | |||
1115 | 1054 | ||
1116 | sdp = init_sbd(sb); | 1055 | sdp = init_sbd(sb); |
1117 | if (!sdp) { | 1056 | if (!sdp) { |
1118 | printk(KERN_WARNING "GFS2: can't alloc struct gfs2_sbd\n"); | 1057 | pr_warn("can't alloc struct gfs2_sbd\n"); |
1119 | return -ENOMEM; | 1058 | return -ENOMEM; |
1120 | } | 1059 | } |
1121 | sdp->sd_args = *args; | 1060 | sdp->sd_args = *args; |
@@ -1363,7 +1302,7 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, | |||
1363 | 1302 | ||
1364 | error = gfs2_mount_args(&args, data); | 1303 | error = gfs2_mount_args(&args, data); |
1365 | if (error) { | 1304 | if (error) { |
1366 | printk(KERN_WARNING "GFS2: can't parse mount arguments\n"); | 1305 | pr_warn("can't parse mount arguments\n"); |
1367 | goto error_super; | 1306 | goto error_super; |
1368 | } | 1307 | } |
1369 | 1308 | ||
@@ -1413,15 +1352,15 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type, | |||
1413 | 1352 | ||
1414 | error = kern_path(dev_name, LOOKUP_FOLLOW, &path); | 1353 | error = kern_path(dev_name, LOOKUP_FOLLOW, &path); |
1415 | if (error) { | 1354 | if (error) { |
1416 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", | 1355 | pr_warn("path_lookup on %s returned error %d\n", |
1417 | dev_name, error); | 1356 | dev_name, error); |
1418 | return ERR_PTR(error); | 1357 | return ERR_PTR(error); |
1419 | } | 1358 | } |
1420 | s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, | 1359 | s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super, flags, |
1421 | path.dentry->d_inode->i_sb->s_bdev); | 1360 | path.dentry->d_inode->i_sb->s_bdev); |
1422 | path_put(&path); | 1361 | path_put(&path); |
1423 | if (IS_ERR(s)) { | 1362 | if (IS_ERR(s)) { |
1424 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1363 | pr_warn("gfs2 mount does not exist\n"); |
1425 | return ERR_CAST(s); | 1364 | return ERR_CAST(s); |
1426 | } | 1365 | } |
1427 | if ((flags ^ s->s_flags) & MS_RDONLY) { | 1366 | if ((flags ^ s->s_flags) & MS_RDONLY) { |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8bec0e3192dd..c4effff7cf55 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
@@ -36,6 +36,8 @@ | |||
36 | * the quota file, so it is not being constantly read. | 36 | * the quota file, so it is not being constantly read. |
37 | */ | 37 | */ |
38 | 38 | ||
39 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
40 | |||
39 | #include <linux/sched.h> | 41 | #include <linux/sched.h> |
40 | #include <linux/slab.h> | 42 | #include <linux/slab.h> |
41 | #include <linux/mm.h> | 43 | #include <linux/mm.h> |
@@ -330,6 +332,7 @@ static int slot_get(struct gfs2_quota_data *qd) | |||
330 | if (bit < sdp->sd_quota_slots) { | 332 | if (bit < sdp->sd_quota_slots) { |
331 | set_bit(bit, sdp->sd_quota_bitmap); | 333 | set_bit(bit, sdp->sd_quota_bitmap); |
332 | qd->qd_slot = bit; | 334 | qd->qd_slot = bit; |
335 | error = 0; | ||
333 | out: | 336 | out: |
334 | qd->qd_slot_count++; | 337 | qd->qd_slot_count++; |
335 | } | 338 | } |
@@ -1081,10 +1084,10 @@ static int print_message(struct gfs2_quota_data *qd, char *type) | |||
1081 | { | 1084 | { |
1082 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; | 1085 | struct gfs2_sbd *sdp = qd->qd_gl->gl_sbd; |
1083 | 1086 | ||
1084 | printk(KERN_INFO "GFS2: fsid=%s: quota %s for %s %u\n", | 1087 | fs_info(sdp, "quota %s for %s %u\n", |
1085 | sdp->sd_fsname, type, | 1088 | type, |
1086 | (qd->qd_id.type == USRQUOTA) ? "user" : "group", | 1089 | (qd->qd_id.type == USRQUOTA) ? "user" : "group", |
1087 | from_kqid(&init_user_ns, qd->qd_id)); | 1090 | from_kqid(&init_user_ns, qd->qd_id)); |
1088 | 1091 | ||
1089 | return 0; | 1092 | return 0; |
1090 | } | 1093 | } |
@@ -1242,14 +1245,13 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) | |||
1242 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); | 1245 | bm_size = DIV_ROUND_UP(sdp->sd_quota_slots, 8 * sizeof(unsigned long)); |
1243 | bm_size *= sizeof(unsigned long); | 1246 | bm_size *= sizeof(unsigned long); |
1244 | error = -ENOMEM; | 1247 | error = -ENOMEM; |
1245 | sdp->sd_quota_bitmap = kmalloc(bm_size, GFP_NOFS|__GFP_NOWARN); | 1248 | sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); |
1246 | if (sdp->sd_quota_bitmap == NULL) | 1249 | if (sdp->sd_quota_bitmap == NULL) |
1247 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS, PAGE_KERNEL); | 1250 | sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | |
1251 | __GFP_ZERO, PAGE_KERNEL); | ||
1248 | if (!sdp->sd_quota_bitmap) | 1252 | if (!sdp->sd_quota_bitmap) |
1249 | return error; | 1253 | return error; |
1250 | 1254 | ||
1251 | memset(sdp->sd_quota_bitmap, 0, bm_size); | ||
1252 | |||
1253 | for (x = 0; x < blocks; x++) { | 1255 | for (x = 0; x < blocks; x++) { |
1254 | struct buffer_head *bh; | 1256 | struct buffer_head *bh; |
1255 | const struct gfs2_quota_change *qc; | 1257 | const struct gfs2_quota_change *qc; |
diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 963b2d75200c..7ad4094d68c0 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c | |||
@@ -52,9 +52,9 @@ int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | |||
52 | return error; | 52 | return error; |
53 | } | 53 | } |
54 | 54 | ||
55 | int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | 55 | int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) |
56 | { | 56 | { |
57 | struct list_head *head = &sdp->sd_revoke_list; | 57 | struct list_head *head = &jd->jd_revoke_list; |
58 | struct gfs2_revoke_replay *rr; | 58 | struct gfs2_revoke_replay *rr; |
59 | int found = 0; | 59 | int found = 0; |
60 | 60 | ||
@@ -81,13 +81,13 @@ int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
81 | return 1; | 81 | return 1; |
82 | } | 82 | } |
83 | 83 | ||
84 | int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | 84 | int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where) |
85 | { | 85 | { |
86 | struct gfs2_revoke_replay *rr; | 86 | struct gfs2_revoke_replay *rr; |
87 | int wrap, a, b, revoke; | 87 | int wrap, a, b, revoke; |
88 | int found = 0; | 88 | int found = 0; |
89 | 89 | ||
90 | list_for_each_entry(rr, &sdp->sd_revoke_list, rr_list) { | 90 | list_for_each_entry(rr, &jd->jd_revoke_list, rr_list) { |
91 | if (rr->rr_blkno == blkno) { | 91 | if (rr->rr_blkno == blkno) { |
92 | found = 1; | 92 | found = 1; |
93 | break; | 93 | break; |
@@ -97,17 +97,17 @@ int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where) | |||
97 | if (!found) | 97 | if (!found) |
98 | return 0; | 98 | return 0; |
99 | 99 | ||
100 | wrap = (rr->rr_where < sdp->sd_replay_tail); | 100 | wrap = (rr->rr_where < jd->jd_replay_tail); |
101 | a = (sdp->sd_replay_tail < where); | 101 | a = (jd->jd_replay_tail < where); |
102 | b = (where < rr->rr_where); | 102 | b = (where < rr->rr_where); |
103 | revoke = (wrap) ? (a || b) : (a && b); | 103 | revoke = (wrap) ? (a || b) : (a && b); |
104 | 104 | ||
105 | return revoke; | 105 | return revoke; |
106 | } | 106 | } |
107 | 107 | ||
108 | void gfs2_revoke_clean(struct gfs2_sbd *sdp) | 108 | void gfs2_revoke_clean(struct gfs2_jdesc *jd) |
109 | { | 109 | { |
110 | struct list_head *head = &sdp->sd_revoke_list; | 110 | struct list_head *head = &jd->jd_revoke_list; |
111 | struct gfs2_revoke_replay *rr; | 111 | struct gfs2_revoke_replay *rr; |
112 | 112 | ||
113 | while (!list_empty(head)) { | 113 | while (!list_empty(head)) { |
diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index 2226136c7647..6142836cce96 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h | |||
@@ -23,9 +23,9 @@ static inline void gfs2_replay_incr_blk(struct gfs2_sbd *sdp, unsigned int *blk) | |||
23 | extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, | 23 | extern int gfs2_replay_read_block(struct gfs2_jdesc *jd, unsigned int blk, |
24 | struct buffer_head **bh); | 24 | struct buffer_head **bh); |
25 | 25 | ||
26 | extern int gfs2_revoke_add(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | 26 | extern int gfs2_revoke_add(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); |
27 | extern int gfs2_revoke_check(struct gfs2_sbd *sdp, u64 blkno, unsigned int where); | 27 | extern int gfs2_revoke_check(struct gfs2_jdesc *jd, u64 blkno, unsigned int where); |
28 | extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); | 28 | extern void gfs2_revoke_clean(struct gfs2_jdesc *jd); |
29 | 29 | ||
30 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, | 30 | extern int gfs2_find_jhead(struct gfs2_jdesc *jd, |
31 | struct gfs2_log_header_host *head); | 31 | struct gfs2_log_header_host *head); |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index a1da21349235..281a7716e3f3 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
@@ -99,12 +101,12 @@ static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, | |||
99 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; | 101 | cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; |
100 | 102 | ||
101 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { | 103 | if (unlikely(!valid_change[new_state * 4 + cur_state])) { |
102 | printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " | 104 | pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", |
103 | "new_state=%d\n", rbm->offset, cur_state, new_state); | 105 | rbm->offset, cur_state, new_state); |
104 | printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", | 106 | pr_warn("rgrp=0x%llx bi_start=0x%x\n", |
105 | (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); | 107 | (unsigned long long)rbm->rgd->rd_addr, bi->bi_start); |
106 | printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", | 108 | pr_warn("bi_offset=0x%x bi_len=0x%x\n", |
107 | bi->bi_offset, bi->bi_len); | 109 | bi->bi_offset, bi->bi_len); |
108 | dump_stack(); | 110 | dump_stack(); |
109 | gfs2_consist_rgrpd(rbm->rgd); | 111 | gfs2_consist_rgrpd(rbm->rgd); |
110 | return; | 112 | return; |
@@ -736,11 +738,11 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) | |||
736 | 738 | ||
737 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) | 739 | static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd) |
738 | { | 740 | { |
739 | printk(KERN_INFO " ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); | 741 | pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); |
740 | printk(KERN_INFO " ri_length = %u\n", rgd->rd_length); | 742 | pr_info("ri_length = %u\n", rgd->rd_length); |
741 | printk(KERN_INFO " ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); | 743 | pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); |
742 | printk(KERN_INFO " ri_data = %u\n", rgd->rd_data); | 744 | pr_info("ri_data = %u\n", rgd->rd_data); |
743 | printk(KERN_INFO " ri_bitbytes = %u\n", rgd->rd_bitbytes); | 745 | pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes); |
744 | } | 746 | } |
745 | 747 | ||
746 | /** | 748 | /** |
@@ -1102,7 +1104,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd) | |||
1102 | * Returns: errno | 1104 | * Returns: errno |
1103 | */ | 1105 | */ |
1104 | 1106 | ||
1105 | int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) | 1107 | static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) |
1106 | { | 1108 | { |
1107 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 1109 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
1108 | struct gfs2_glock *gl = rgd->rd_gl; | 1110 | struct gfs2_glock *gl = rgd->rd_gl; |
@@ -1169,7 +1171,7 @@ fail: | |||
1169 | return error; | 1171 | return error; |
1170 | } | 1172 | } |
1171 | 1173 | ||
1172 | int update_rgrp_lvb(struct gfs2_rgrpd *rgd) | 1174 | static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) |
1173 | { | 1175 | { |
1174 | u32 rl_flags; | 1176 | u32 rl_flags; |
1175 | 1177 | ||
@@ -2278,7 +2280,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2278 | } | 2280 | } |
2279 | } | 2281 | } |
2280 | if (rbm.rgd->rd_free < *nblocks) { | 2282 | if (rbm.rgd->rd_free < *nblocks) { |
2281 | printk(KERN_WARNING "nblocks=%u\n", *nblocks); | 2283 | pr_warn("nblocks=%u\n", *nblocks); |
2282 | goto rgrp_error; | 2284 | goto rgrp_error; |
2283 | } | 2285 | } |
2284 | 2286 | ||
@@ -2296,7 +2298,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, | |||
2296 | 2298 | ||
2297 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); | 2299 | gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); |
2298 | if (dinode) | 2300 | if (dinode) |
2299 | gfs2_trans_add_unrevoke(sdp, block, 1); | 2301 | gfs2_trans_add_unrevoke(sdp, block, *nblocks); |
2300 | 2302 | ||
2301 | gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); | 2303 | gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); |
2302 | 2304 | ||
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 60f60f6181f3..de8afad89e51 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/bio.h> | 12 | #include <linux/bio.h> |
11 | #include <linux/sched.h> | 13 | #include <linux/sched.h> |
12 | #include <linux/slab.h> | 14 | #include <linux/slab.h> |
@@ -175,8 +177,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
175 | break; | 177 | break; |
176 | case Opt_debug: | 178 | case Opt_debug: |
177 | if (args->ar_errors == GFS2_ERRORS_PANIC) { | 179 | if (args->ar_errors == GFS2_ERRORS_PANIC) { |
178 | printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " | 180 | pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); |
179 | "are mutually exclusive.\n"); | ||
180 | return -EINVAL; | 181 | return -EINVAL; |
181 | } | 182 | } |
182 | args->ar_debug = 1; | 183 | args->ar_debug = 1; |
@@ -228,21 +229,21 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
228 | case Opt_commit: | 229 | case Opt_commit: |
229 | rv = match_int(&tmp[0], &args->ar_commit); | 230 | rv = match_int(&tmp[0], &args->ar_commit); |
230 | if (rv || args->ar_commit <= 0) { | 231 | if (rv || args->ar_commit <= 0) { |
231 | printk(KERN_WARNING "GFS2: commit mount option requires a positive numeric argument\n"); | 232 | pr_warn("commit mount option requires a positive numeric argument\n"); |
232 | return rv ? rv : -EINVAL; | 233 | return rv ? rv : -EINVAL; |
233 | } | 234 | } |
234 | break; | 235 | break; |
235 | case Opt_statfs_quantum: | 236 | case Opt_statfs_quantum: |
236 | rv = match_int(&tmp[0], &args->ar_statfs_quantum); | 237 | rv = match_int(&tmp[0], &args->ar_statfs_quantum); |
237 | if (rv || args->ar_statfs_quantum < 0) { | 238 | if (rv || args->ar_statfs_quantum < 0) { |
238 | printk(KERN_WARNING "GFS2: statfs_quantum mount option requires a non-negative numeric argument\n"); | 239 | pr_warn("statfs_quantum mount option requires a non-negative numeric argument\n"); |
239 | return rv ? rv : -EINVAL; | 240 | return rv ? rv : -EINVAL; |
240 | } | 241 | } |
241 | break; | 242 | break; |
242 | case Opt_quota_quantum: | 243 | case Opt_quota_quantum: |
243 | rv = match_int(&tmp[0], &args->ar_quota_quantum); | 244 | rv = match_int(&tmp[0], &args->ar_quota_quantum); |
244 | if (rv || args->ar_quota_quantum <= 0) { | 245 | if (rv || args->ar_quota_quantum <= 0) { |
245 | printk(KERN_WARNING "GFS2: quota_quantum mount option requires a positive numeric argument\n"); | 246 | pr_warn("quota_quantum mount option requires a positive numeric argument\n"); |
246 | return rv ? rv : -EINVAL; | 247 | return rv ? rv : -EINVAL; |
247 | } | 248 | } |
248 | break; | 249 | break; |
@@ -250,7 +251,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
250 | rv = match_int(&tmp[0], &args->ar_statfs_percent); | 251 | rv = match_int(&tmp[0], &args->ar_statfs_percent); |
251 | if (rv || args->ar_statfs_percent < 0 || | 252 | if (rv || args->ar_statfs_percent < 0 || |
252 | args->ar_statfs_percent > 100) { | 253 | args->ar_statfs_percent > 100) { |
253 | printk(KERN_WARNING "statfs_percent mount option requires a numeric argument between 0 and 100\n"); | 254 | pr_warn("statfs_percent mount option requires a numeric argument between 0 and 100\n"); |
254 | return rv ? rv : -EINVAL; | 255 | return rv ? rv : -EINVAL; |
255 | } | 256 | } |
256 | break; | 257 | break; |
@@ -259,8 +260,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
259 | break; | 260 | break; |
260 | case Opt_err_panic: | 261 | case Opt_err_panic: |
261 | if (args->ar_debug) { | 262 | if (args->ar_debug) { |
262 | printk(KERN_WARNING "GFS2: -o debug and -o errors=panic " | 263 | pr_warn("-o debug and -o errors=panic are mutually exclusive\n"); |
263 | "are mutually exclusive.\n"); | ||
264 | return -EINVAL; | 264 | return -EINVAL; |
265 | } | 265 | } |
266 | args->ar_errors = GFS2_ERRORS_PANIC; | 266 | args->ar_errors = GFS2_ERRORS_PANIC; |
@@ -279,7 +279,7 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
279 | break; | 279 | break; |
280 | case Opt_error: | 280 | case Opt_error: |
281 | default: | 281 | default: |
282 | printk(KERN_WARNING "GFS2: invalid mount option: %s\n", o); | 282 | pr_warn("invalid mount option: %s\n", o); |
283 | return -EINVAL; | 283 | return -EINVAL; |
284 | } | 284 | } |
285 | } | 285 | } |
@@ -295,9 +295,8 @@ int gfs2_mount_args(struct gfs2_args *args, char *options) | |||
295 | 295 | ||
296 | void gfs2_jindex_free(struct gfs2_sbd *sdp) | 296 | void gfs2_jindex_free(struct gfs2_sbd *sdp) |
297 | { | 297 | { |
298 | struct list_head list, *head; | 298 | struct list_head list; |
299 | struct gfs2_jdesc *jd; | 299 | struct gfs2_jdesc *jd; |
300 | struct gfs2_journal_extent *jext; | ||
301 | 300 | ||
302 | spin_lock(&sdp->sd_jindex_spin); | 301 | spin_lock(&sdp->sd_jindex_spin); |
303 | list_add(&list, &sdp->sd_jindex_list); | 302 | list_add(&list, &sdp->sd_jindex_list); |
@@ -307,14 +306,7 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp) | |||
307 | 306 | ||
308 | while (!list_empty(&list)) { | 307 | while (!list_empty(&list)) { |
309 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); | 308 | jd = list_entry(list.next, struct gfs2_jdesc, jd_list); |
310 | head = &jd->extent_list; | 309 | gfs2_free_journal_extents(jd); |
311 | while (!list_empty(head)) { | ||
312 | jext = list_entry(head->next, | ||
313 | struct gfs2_journal_extent, | ||
314 | extent_list); | ||
315 | list_del(&jext->extent_list); | ||
316 | kfree(jext); | ||
317 | } | ||
318 | list_del(&jd->jd_list); | 310 | list_del(&jd->jd_list); |
319 | iput(jd->jd_inode); | 311 | iput(jd->jd_inode); |
320 | kfree(jd); | 312 | kfree(jd); |
@@ -1175,6 +1167,8 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1175 | struct gfs2_tune *gt = &sdp->sd_tune; | 1167 | struct gfs2_tune *gt = &sdp->sd_tune; |
1176 | int error; | 1168 | int error; |
1177 | 1169 | ||
1170 | sync_filesystem(sb); | ||
1171 | |||
1178 | spin_lock(>->gt_spin); | 1172 | spin_lock(>->gt_spin); |
1179 | args.ar_commit = gt->gt_logd_secs; | 1173 | args.ar_commit = gt->gt_logd_secs; |
1180 | args.ar_quota_quantum = gt->gt_quota_quantum; | 1174 | args.ar_quota_quantum = gt->gt_quota_quantum; |
@@ -1256,7 +1250,7 @@ static int gfs2_drop_inode(struct inode *inode) | |||
1256 | { | 1250 | { |
1257 | struct gfs2_inode *ip = GFS2_I(inode); | 1251 | struct gfs2_inode *ip = GFS2_I(inode); |
1258 | 1252 | ||
1259 | if (inode->i_nlink) { | 1253 | if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) && inode->i_nlink) { |
1260 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; | 1254 | struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; |
1261 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) | 1255 | if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) |
1262 | clear_nlink(inode); | 1256 | clear_nlink(inode); |
@@ -1471,6 +1465,11 @@ static void gfs2_evict_inode(struct inode *inode) | |||
1471 | struct gfs2_holder gh; | 1465 | struct gfs2_holder gh; |
1472 | int error; | 1466 | int error; |
1473 | 1467 | ||
1468 | if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) { | ||
1469 | clear_inode(inode); | ||
1470 | return; | ||
1471 | } | ||
1472 | |||
1474 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) | 1473 | if (inode->i_nlink || (sb->s_flags & MS_RDONLY)) |
1475 | goto out; | 1474 | goto out; |
1476 | 1475 | ||
@@ -1558,7 +1557,7 @@ out_unlock: | |||
1558 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); | 1557 | fs_warn(sdp, "gfs2_evict_inode: %d\n", error); |
1559 | out: | 1558 | out: |
1560 | /* Case 3 starts here */ | 1559 | /* Case 3 starts here */ |
1561 | truncate_inode_pages(&inode->i_data, 0); | 1560 | truncate_inode_pages_final(&inode->i_data); |
1562 | gfs2_rs_delete(ip, NULL); | 1561 | gfs2_rs_delete(ip, NULL); |
1563 | gfs2_ordered_del_inode(ip); | 1562 | gfs2_ordered_del_inode(ip); |
1564 | clear_inode(inode); | 1563 | clear_inode(inode); |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d09f6edda0ff..de25d5577e5d 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
11 | #include <linux/spinlock.h> | 13 | #include <linux/spinlock.h> |
12 | #include <linux/completion.h> | 14 | #include <linux/completion.h> |
@@ -138,9 +140,8 @@ static ssize_t withdraw_store(struct gfs2_sbd *sdp, const char *buf, size_t len) | |||
138 | if (simple_strtol(buf, NULL, 0) != 1) | 140 | if (simple_strtol(buf, NULL, 0) != 1) |
139 | return -EINVAL; | 141 | return -EINVAL; |
140 | 142 | ||
141 | gfs2_lm_withdraw(sdp, | 143 | gfs2_lm_withdraw(sdp, "withdrawing from cluster at user's request\n"); |
142 | "GFS2: fsid=%s: withdrawing from cluster at user's request\n", | 144 | |
143 | sdp->sd_fsname); | ||
144 | return len; | 145 | return len; |
145 | } | 146 | } |
146 | 147 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 2b20d7046bf3..bead90d27bad 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/sched.h> | 12 | #include <linux/sched.h> |
11 | #include <linux/slab.h> | 13 | #include <linux/slab.h> |
12 | #include <linux/spinlock.h> | 14 | #include <linux/spinlock.h> |
@@ -51,6 +53,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, | |||
51 | if (revokes) | 53 | if (revokes) |
52 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, | 54 | tr->tr_reserved += gfs2_struct2blk(sdp, revokes, |
53 | sizeof(u64)); | 55 | sizeof(u64)); |
56 | INIT_LIST_HEAD(&tr->tr_databuf); | ||
57 | INIT_LIST_HEAD(&tr->tr_buf); | ||
58 | |||
54 | sb_start_intwrite(sdp->sd_vfs); | 59 | sb_start_intwrite(sdp->sd_vfs); |
55 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); | 60 | gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh); |
56 | 61 | ||
@@ -96,14 +101,13 @@ static void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) | |||
96 | 101 | ||
97 | static void gfs2_print_trans(const struct gfs2_trans *tr) | 102 | static void gfs2_print_trans(const struct gfs2_trans *tr) |
98 | { | 103 | { |
99 | printk(KERN_WARNING "GFS2: Transaction created at: %pSR\n", | 104 | pr_warn("Transaction created at: %pSR\n", (void *)tr->tr_ip); |
100 | (void *)tr->tr_ip); | 105 | pr_warn("blocks=%u revokes=%u reserved=%u touched=%u\n", |
101 | printk(KERN_WARNING "GFS2: blocks=%u revokes=%u reserved=%u touched=%d\n", | 106 | tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); |
102 | tr->tr_blocks, tr->tr_revokes, tr->tr_reserved, tr->tr_touched); | 107 | pr_warn("Buf %u/%u Databuf %u/%u Revoke %u/%u\n", |
103 | printk(KERN_WARNING "GFS2: Buf %u/%u Databuf %u/%u Revoke %u/%u\n", | 108 | tr->tr_num_buf_new, tr->tr_num_buf_rm, |
104 | tr->tr_num_buf_new, tr->tr_num_buf_rm, | 109 | tr->tr_num_databuf_new, tr->tr_num_databuf_rm, |
105 | tr->tr_num_databuf_new, tr->tr_num_databuf_rm, | 110 | tr->tr_num_revoke, tr->tr_num_revoke_rm); |
106 | tr->tr_num_revoke, tr->tr_num_revoke_rm); | ||
107 | } | 111 | } |
108 | 112 | ||
109 | void gfs2_trans_end(struct gfs2_sbd *sdp) | 113 | void gfs2_trans_end(struct gfs2_sbd *sdp) |
@@ -210,8 +214,7 @@ void gfs2_trans_add_data(struct gfs2_glock *gl, struct buffer_head *bh) | |||
210 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | 214 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); |
211 | gfs2_pin(sdp, bd->bd_bh); | 215 | gfs2_pin(sdp, bd->bd_bh); |
212 | tr->tr_num_databuf_new++; | 216 | tr->tr_num_databuf_new++; |
213 | sdp->sd_log_num_databuf++; | 217 | list_add_tail(&bd->bd_list, &tr->tr_databuf); |
214 | list_add_tail(&bd->bd_list, &sdp->sd_log_le_databuf); | ||
215 | } | 218 | } |
216 | gfs2_log_unlock(sdp); | 219 | gfs2_log_unlock(sdp); |
217 | unlock_buffer(bh); | 220 | unlock_buffer(bh); |
@@ -230,16 +233,14 @@ static void meta_lo_add(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | |||
230 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); | 233 | set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); |
231 | mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; | 234 | mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; |
232 | if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { | 235 | if (unlikely(mh->mh_magic != cpu_to_be32(GFS2_MAGIC))) { |
233 | printk(KERN_ERR | 236 | pr_err("Attempting to add uninitialised block to journal (inplace block=%lld)\n", |
234 | "Attempting to add uninitialised block to journal (inplace block=%lld)\n", | ||
235 | (unsigned long long)bd->bd_bh->b_blocknr); | 237 | (unsigned long long)bd->bd_bh->b_blocknr); |
236 | BUG(); | 238 | BUG(); |
237 | } | 239 | } |
238 | gfs2_pin(sdp, bd->bd_bh); | 240 | gfs2_pin(sdp, bd->bd_bh); |
239 | mh->__pad0 = cpu_to_be64(0); | 241 | mh->__pad0 = cpu_to_be64(0); |
240 | mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); | 242 | mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); |
241 | sdp->sd_log_num_buf++; | 243 | list_add(&bd->bd_list, &tr->tr_buf); |
242 | list_add(&bd->bd_list, &sdp->sd_log_le_buf); | ||
243 | tr->tr_num_buf_new++; | 244 | tr->tr_num_buf_new++; |
244 | } | 245 | } |
245 | 246 | ||
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f7109f689e61..86d2035ac669 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c | |||
@@ -7,6 +7,8 @@ | |||
7 | * of the GNU General Public License version 2. | 7 | * of the GNU General Public License version 2. |
8 | */ | 8 | */ |
9 | 9 | ||
10 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
11 | |||
10 | #include <linux/spinlock.h> | 12 | #include <linux/spinlock.h> |
11 | #include <linux/completion.h> | 13 | #include <linux/completion.h> |
12 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
@@ -30,22 +32,27 @@ mempool_t *gfs2_page_pool __read_mostly; | |||
30 | 32 | ||
31 | void gfs2_assert_i(struct gfs2_sbd *sdp) | 33 | void gfs2_assert_i(struct gfs2_sbd *sdp) |
32 | { | 34 | { |
33 | printk(KERN_EMERG "GFS2: fsid=%s: fatal assertion failed\n", | 35 | fs_emerg(sdp, "fatal assertion failed\n"); |
34 | sdp->sd_fsname); | ||
35 | } | 36 | } |
36 | 37 | ||
37 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | 38 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...) |
38 | { | 39 | { |
39 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; | 40 | struct lm_lockstruct *ls = &sdp->sd_lockstruct; |
40 | const struct lm_lockops *lm = ls->ls_ops; | 41 | const struct lm_lockops *lm = ls->ls_ops; |
41 | va_list args; | 42 | va_list args; |
43 | struct va_format vaf; | ||
42 | 44 | ||
43 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && | 45 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW && |
44 | test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | 46 | test_and_set_bit(SDF_SHUTDOWN, &sdp->sd_flags)) |
45 | return 0; | 47 | return 0; |
46 | 48 | ||
47 | va_start(args, fmt); | 49 | va_start(args, fmt); |
48 | vprintk(fmt, args); | 50 | |
51 | vaf.fmt = fmt; | ||
52 | vaf.va = &args; | ||
53 | |||
54 | fs_err(sdp, "%pV", &vaf); | ||
55 | |||
49 | va_end(args); | 56 | va_end(args); |
50 | 57 | ||
51 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { | 58 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) { |
@@ -66,7 +73,7 @@ int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...) | |||
66 | } | 73 | } |
67 | 74 | ||
68 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) | 75 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_PANIC) |
69 | panic("GFS2: fsid=%s: panic requested.\n", sdp->sd_fsname); | 76 | panic("GFS2: fsid=%s: panic requested\n", sdp->sd_fsname); |
70 | 77 | ||
71 | return -1; | 78 | return -1; |
72 | } | 79 | } |
@@ -82,10 +89,9 @@ int gfs2_assert_withdraw_i(struct gfs2_sbd *sdp, char *assertion, | |||
82 | { | 89 | { |
83 | int me; | 90 | int me; |
84 | me = gfs2_lm_withdraw(sdp, | 91 | me = gfs2_lm_withdraw(sdp, |
85 | "GFS2: fsid=%s: fatal: assertion \"%s\" failed\n" | 92 | "fatal: assertion \"%s\" failed\n" |
86 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 93 | " function = %s, file = %s, line = %u\n", |
87 | sdp->sd_fsname, assertion, | 94 | assertion, function, file, line); |
88 | sdp->sd_fsname, function, file, line); | ||
89 | dump_stack(); | 95 | dump_stack(); |
90 | return (me) ? -1 : -2; | 96 | return (me) ? -1 : -2; |
91 | } | 97 | } |
@@ -105,11 +111,8 @@ int gfs2_assert_warn_i(struct gfs2_sbd *sdp, char *assertion, | |||
105 | return -2; | 111 | return -2; |
106 | 112 | ||
107 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) | 113 | if (sdp->sd_args.ar_errors == GFS2_ERRORS_WITHDRAW) |
108 | printk(KERN_WARNING | 114 | fs_warn(sdp, "warning: assertion \"%s\" failed at function = %s, file = %s, line = %u\n", |
109 | "GFS2: fsid=%s: warning: assertion \"%s\" failed\n" | 115 | assertion, function, file, line); |
110 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | ||
111 | sdp->sd_fsname, assertion, | ||
112 | sdp->sd_fsname, function, file, line); | ||
113 | 116 | ||
114 | if (sdp->sd_args.ar_debug) | 117 | if (sdp->sd_args.ar_debug) |
115 | BUG(); | 118 | BUG(); |
@@ -138,10 +141,8 @@ int gfs2_consist_i(struct gfs2_sbd *sdp, int cluster_wide, const char *function, | |||
138 | { | 141 | { |
139 | int rv; | 142 | int rv; |
140 | rv = gfs2_lm_withdraw(sdp, | 143 | rv = gfs2_lm_withdraw(sdp, |
141 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 144 | "fatal: filesystem consistency error - function = %s, file = %s, line = %u\n", |
142 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 145 | function, file, line); |
143 | sdp->sd_fsname, | ||
144 | sdp->sd_fsname, function, file, line); | ||
145 | return rv; | 146 | return rv; |
146 | } | 147 | } |
147 | 148 | ||
@@ -157,13 +158,12 @@ int gfs2_consist_inode_i(struct gfs2_inode *ip, int cluster_wide, | |||
157 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); | 158 | struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); |
158 | int rv; | 159 | int rv; |
159 | rv = gfs2_lm_withdraw(sdp, | 160 | rv = gfs2_lm_withdraw(sdp, |
160 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 161 | "fatal: filesystem consistency error\n" |
161 | "GFS2: fsid=%s: inode = %llu %llu\n" | 162 | " inode = %llu %llu\n" |
162 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 163 | " function = %s, file = %s, line = %u\n", |
163 | sdp->sd_fsname, | 164 | (unsigned long long)ip->i_no_formal_ino, |
164 | sdp->sd_fsname, (unsigned long long)ip->i_no_formal_ino, | 165 | (unsigned long long)ip->i_no_addr, |
165 | (unsigned long long)ip->i_no_addr, | 166 | function, file, line); |
166 | sdp->sd_fsname, function, file, line); | ||
167 | return rv; | 167 | return rv; |
168 | } | 168 | } |
169 | 169 | ||
@@ -179,12 +179,11 @@ int gfs2_consist_rgrpd_i(struct gfs2_rgrpd *rgd, int cluster_wide, | |||
179 | struct gfs2_sbd *sdp = rgd->rd_sbd; | 179 | struct gfs2_sbd *sdp = rgd->rd_sbd; |
180 | int rv; | 180 | int rv; |
181 | rv = gfs2_lm_withdraw(sdp, | 181 | rv = gfs2_lm_withdraw(sdp, |
182 | "GFS2: fsid=%s: fatal: filesystem consistency error\n" | 182 | "fatal: filesystem consistency error\n" |
183 | "GFS2: fsid=%s: RG = %llu\n" | 183 | " RG = %llu\n" |
184 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 184 | " function = %s, file = %s, line = %u\n", |
185 | sdp->sd_fsname, | 185 | (unsigned long long)rgd->rd_addr, |
186 | sdp->sd_fsname, (unsigned long long)rgd->rd_addr, | 186 | function, file, line); |
187 | sdp->sd_fsname, function, file, line); | ||
188 | return rv; | 187 | return rv; |
189 | } | 188 | } |
190 | 189 | ||
@@ -200,12 +199,11 @@ int gfs2_meta_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
200 | { | 199 | { |
201 | int me; | 200 | int me; |
202 | me = gfs2_lm_withdraw(sdp, | 201 | me = gfs2_lm_withdraw(sdp, |
203 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | 202 | "fatal: invalid metadata block\n" |
204 | "GFS2: fsid=%s: bh = %llu (%s)\n" | 203 | " bh = %llu (%s)\n" |
205 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 204 | " function = %s, file = %s, line = %u\n", |
206 | sdp->sd_fsname, | 205 | (unsigned long long)bh->b_blocknr, type, |
207 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, | 206 | function, file, line); |
208 | sdp->sd_fsname, function, file, line); | ||
209 | return (me) ? -1 : -2; | 207 | return (me) ? -1 : -2; |
210 | } | 208 | } |
211 | 209 | ||
@@ -221,12 +219,11 @@ int gfs2_metatype_check_ii(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
221 | { | 219 | { |
222 | int me; | 220 | int me; |
223 | me = gfs2_lm_withdraw(sdp, | 221 | me = gfs2_lm_withdraw(sdp, |
224 | "GFS2: fsid=%s: fatal: invalid metadata block\n" | 222 | "fatal: invalid metadata block\n" |
225 | "GFS2: fsid=%s: bh = %llu (type: exp=%u, found=%u)\n" | 223 | " bh = %llu (type: exp=%u, found=%u)\n" |
226 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 224 | " function = %s, file = %s, line = %u\n", |
227 | sdp->sd_fsname, | 225 | (unsigned long long)bh->b_blocknr, type, t, |
228 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, type, t, | 226 | function, file, line); |
229 | sdp->sd_fsname, function, file, line); | ||
230 | return (me) ? -1 : -2; | 227 | return (me) ? -1 : -2; |
231 | } | 228 | } |
232 | 229 | ||
@@ -241,10 +238,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function, char *file, | |||
241 | { | 238 | { |
242 | int rv; | 239 | int rv; |
243 | rv = gfs2_lm_withdraw(sdp, | 240 | rv = gfs2_lm_withdraw(sdp, |
244 | "GFS2: fsid=%s: fatal: I/O error\n" | 241 | "fatal: I/O error\n" |
245 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 242 | " function = %s, file = %s, line = %u\n", |
246 | sdp->sd_fsname, | 243 | function, file, line); |
247 | sdp->sd_fsname, function, file, line); | ||
248 | return rv; | 244 | return rv; |
249 | } | 245 | } |
250 | 246 | ||
@@ -259,12 +255,11 @@ int gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, | |||
259 | { | 255 | { |
260 | int rv; | 256 | int rv; |
261 | rv = gfs2_lm_withdraw(sdp, | 257 | rv = gfs2_lm_withdraw(sdp, |
262 | "GFS2: fsid=%s: fatal: I/O error\n" | 258 | "fatal: I/O error\n" |
263 | "GFS2: fsid=%s: block = %llu\n" | 259 | " block = %llu\n" |
264 | "GFS2: fsid=%s: function = %s, file = %s, line = %u\n", | 260 | " function = %s, file = %s, line = %u\n", |
265 | sdp->sd_fsname, | 261 | (unsigned long long)bh->b_blocknr, |
266 | sdp->sd_fsname, (unsigned long long)bh->b_blocknr, | 262 | function, file, line); |
267 | sdp->sd_fsname, function, file, line); | ||
268 | return rv; | 263 | return rv; |
269 | } | 264 | } |
270 | 265 | ||
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index b7ffb09b99ea..cbdcbdf39614 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h | |||
@@ -10,22 +10,23 @@ | |||
10 | #ifndef __UTIL_DOT_H__ | 10 | #ifndef __UTIL_DOT_H__ |
11 | #define __UTIL_DOT_H__ | 11 | #define __UTIL_DOT_H__ |
12 | 12 | ||
13 | #ifdef pr_fmt | ||
14 | #undef pr_fmt | ||
15 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
16 | #endif | ||
17 | |||
13 | #include <linux/mempool.h> | 18 | #include <linux/mempool.h> |
14 | 19 | ||
15 | #include "incore.h" | 20 | #include "incore.h" |
16 | 21 | ||
17 | #define fs_printk(level, fs, fmt, arg...) \ | 22 | #define fs_emerg(fs, fmt, ...) \ |
18 | printk(level "GFS2: fsid=%s: " fmt , (fs)->sd_fsname , ## arg) | 23 | pr_emerg("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
19 | 24 | #define fs_warn(fs, fmt, ...) \ | |
20 | #define fs_info(fs, fmt, arg...) \ | 25 | pr_warn("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
21 | fs_printk(KERN_INFO , fs , fmt , ## arg) | 26 | #define fs_err(fs, fmt, ...) \ |
22 | 27 | pr_err("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) | |
23 | #define fs_warn(fs, fmt, arg...) \ | 28 | #define fs_info(fs, fmt, ...) \ |
24 | fs_printk(KERN_WARNING , fs , fmt , ## arg) | 29 | pr_info("fsid=%s: " fmt, (fs)->sd_fsname, ##__VA_ARGS__) |
25 | |||
26 | #define fs_err(fs, fmt, arg...) \ | ||
27 | fs_printk(KERN_ERR, fs , fmt , ## arg) | ||
28 | |||
29 | 30 | ||
30 | void gfs2_assert_i(struct gfs2_sbd *sdp); | 31 | void gfs2_assert_i(struct gfs2_sbd *sdp); |
31 | 32 | ||
@@ -85,7 +86,7 @@ static inline int gfs2_meta_check(struct gfs2_sbd *sdp, | |||
85 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; | 86 | struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; |
86 | u32 magic = be32_to_cpu(mh->mh_magic); | 87 | u32 magic = be32_to_cpu(mh->mh_magic); |
87 | if (unlikely(magic != GFS2_MAGIC)) { | 88 | if (unlikely(magic != GFS2_MAGIC)) { |
88 | printk(KERN_ERR "GFS2: Magic number missing at %llu\n", | 89 | pr_err("Magic number missing at %llu\n", |
89 | (unsigned long long)bh->b_blocknr); | 90 | (unsigned long long)bh->b_blocknr); |
90 | return -EIO; | 91 | return -EIO; |
91 | } | 92 | } |
@@ -164,7 +165,7 @@ static inline unsigned int gfs2_tune_get_i(struct gfs2_tune *gt, | |||
164 | #define gfs2_tune_get(sdp, field) \ | 165 | #define gfs2_tune_get(sdp, field) \ |
165 | gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) | 166 | gfs2_tune_get_i(&(sdp)->sd_tune, &(sdp)->sd_tune.field) |
166 | 167 | ||
167 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, char *fmt, ...); | 168 | __printf(2, 3) |
169 | int gfs2_lm_withdraw(struct gfs2_sbd *sdp, const char *fmt, ...); | ||
168 | 170 | ||
169 | #endif /* __UTIL_DOT_H__ */ | 171 | #endif /* __UTIL_DOT_H__ */ |
170 | |||
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 380ab31b5e0f..9e2fecd62f62 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c | |||
@@ -547,7 +547,7 @@ out: | |||
547 | 547 | ||
548 | void hfs_evict_inode(struct inode *inode) | 548 | void hfs_evict_inode(struct inode *inode) |
549 | { | 549 | { |
550 | truncate_inode_pages(&inode->i_data, 0); | 550 | truncate_inode_pages_final(&inode->i_data); |
551 | clear_inode(inode); | 551 | clear_inode(inode); |
552 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { | 552 | if (HFS_IS_RSRC(inode) && HFS_I(inode)->rsrc_inode) { |
553 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 553 | HFS_I(HFS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 2d2039e754cd..eee7206c38d1 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
@@ -112,6 +112,7 @@ static int hfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
112 | 112 | ||
113 | static int hfs_remount(struct super_block *sb, int *flags, char *data) | 113 | static int hfs_remount(struct super_block *sb, int *flags, char *data) |
114 | { | 114 | { |
115 | sync_filesystem(sb); | ||
115 | *flags |= MS_NODIRATIME; | 116 | *flags |= MS_NODIRATIME; |
116 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 117 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
117 | return 0; | 118 | return 0; |
diff --git a/fs/hfsplus/attributes.c b/fs/hfsplus/attributes.c index 0f47890299c4..caf89a7be0a1 100644 --- a/fs/hfsplus/attributes.c +++ b/fs/hfsplus/attributes.c | |||
@@ -11,7 +11,7 @@ | |||
11 | 11 | ||
12 | static struct kmem_cache *hfsplus_attr_tree_cachep; | 12 | static struct kmem_cache *hfsplus_attr_tree_cachep; |
13 | 13 | ||
14 | int hfsplus_create_attr_tree_cache(void) | 14 | int __init hfsplus_create_attr_tree_cache(void) |
15 | { | 15 | { |
16 | if (hfsplus_attr_tree_cachep) | 16 | if (hfsplus_attr_tree_cachep) |
17 | return -EEXIST; | 17 | return -EEXIST; |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fbb212fbb1ef..a7aafb35b624 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
@@ -227,10 +227,8 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
227 | u32 ablock, dblock, mask; | 227 | u32 ablock, dblock, mask; |
228 | sector_t sector; | 228 | sector_t sector; |
229 | int was_dirty = 0; | 229 | int was_dirty = 0; |
230 | int shift; | ||
231 | 230 | ||
232 | /* Convert inode block to disk allocation block */ | 231 | /* Convert inode block to disk allocation block */ |
233 | shift = sbi->alloc_blksz_shift - sb->s_blocksize_bits; | ||
234 | ablock = iblock >> sbi->fs_shift; | 232 | ablock = iblock >> sbi->fs_shift; |
235 | 233 | ||
236 | if (iblock >= hip->fs_blocks) { | 234 | if (iblock >= hip->fs_blocks) { |
@@ -498,11 +496,13 @@ int hfsplus_file_extend(struct inode *inode) | |||
498 | goto insert_extent; | 496 | goto insert_extent; |
499 | } | 497 | } |
500 | out: | 498 | out: |
501 | mutex_unlock(&hip->extents_lock); | ||
502 | if (!res) { | 499 | if (!res) { |
503 | hip->alloc_blocks += len; | 500 | hip->alloc_blocks += len; |
501 | mutex_unlock(&hip->extents_lock); | ||
504 | hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); | 502 | hfsplus_mark_inode_dirty(inode, HFSPLUS_I_ALLOC_DIRTY); |
503 | return 0; | ||
505 | } | 504 | } |
505 | mutex_unlock(&hip->extents_lock); | ||
506 | return res; | 506 | return res; |
507 | 507 | ||
508 | insert_extent: | 508 | insert_extent: |
@@ -556,11 +556,13 @@ void hfsplus_file_truncate(struct inode *inode) | |||
556 | 556 | ||
557 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> | 557 | blk_cnt = (inode->i_size + HFSPLUS_SB(sb)->alloc_blksz - 1) >> |
558 | HFSPLUS_SB(sb)->alloc_blksz_shift; | 558 | HFSPLUS_SB(sb)->alloc_blksz_shift; |
559 | |||
560 | mutex_lock(&hip->extents_lock); | ||
561 | |||
559 | alloc_cnt = hip->alloc_blocks; | 562 | alloc_cnt = hip->alloc_blocks; |
560 | if (blk_cnt == alloc_cnt) | 563 | if (blk_cnt == alloc_cnt) |
561 | goto out; | 564 | goto out_unlock; |
562 | 565 | ||
563 | mutex_lock(&hip->extents_lock); | ||
564 | res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); | 566 | res = hfs_find_init(HFSPLUS_SB(sb)->ext_tree, &fd); |
565 | if (res) { | 567 | if (res) { |
566 | mutex_unlock(&hip->extents_lock); | 568 | mutex_unlock(&hip->extents_lock); |
@@ -592,10 +594,10 @@ void hfsplus_file_truncate(struct inode *inode) | |||
592 | hfs_brec_remove(&fd); | 594 | hfs_brec_remove(&fd); |
593 | } | 595 | } |
594 | hfs_find_exit(&fd); | 596 | hfs_find_exit(&fd); |
595 | mutex_unlock(&hip->extents_lock); | ||
596 | 597 | ||
597 | hip->alloc_blocks = blk_cnt; | 598 | hip->alloc_blocks = blk_cnt; |
598 | out: | 599 | out_unlock: |
600 | mutex_unlock(&hip->extents_lock); | ||
599 | hip->phys_size = inode->i_size; | 601 | hip->phys_size = inode->i_size; |
600 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> | 602 | hip->fs_blocks = (inode->i_size + sb->s_blocksize - 1) >> |
601 | sb->s_blocksize_bits; | 603 | sb->s_blocksize_bits; |
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 62d571eb69ba..83dc29286b10 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h | |||
@@ -367,7 +367,7 @@ typedef int (*search_strategy_t)(struct hfs_bnode *, | |||
367 | */ | 367 | */ |
368 | 368 | ||
369 | /* attributes.c */ | 369 | /* attributes.c */ |
370 | int hfsplus_create_attr_tree_cache(void); | 370 | int __init hfsplus_create_attr_tree_cache(void); |
371 | void hfsplus_destroy_attr_tree_cache(void); | 371 | void hfsplus_destroy_attr_tree_cache(void); |
372 | hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); | 372 | hfsplus_attr_entry *hfsplus_alloc_attr_entry(void); |
373 | void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); | 373 | void hfsplus_destroy_attr_entry(hfsplus_attr_entry *entry_p); |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 80875aa640ef..a513d2d36be9 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
@@ -161,7 +161,7 @@ static int hfsplus_write_inode(struct inode *inode, | |||
161 | static void hfsplus_evict_inode(struct inode *inode) | 161 | static void hfsplus_evict_inode(struct inode *inode) |
162 | { | 162 | { |
163 | hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); | 163 | hfs_dbg(INODE, "hfsplus_evict_inode: %lu\n", inode->i_ino); |
164 | truncate_inode_pages(&inode->i_data, 0); | 164 | truncate_inode_pages_final(&inode->i_data); |
165 | clear_inode(inode); | 165 | clear_inode(inode); |
166 | if (HFSPLUS_IS_RSRC(inode)) { | 166 | if (HFSPLUS_IS_RSRC(inode)) { |
167 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; | 167 | HFSPLUS_I(HFSPLUS_I(inode)->rsrc_inode)->rsrc_inode = NULL; |
@@ -323,6 +323,7 @@ static int hfsplus_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
323 | 323 | ||
324 | static int hfsplus_remount(struct super_block *sb, int *flags, char *data) | 324 | static int hfsplus_remount(struct super_block *sb, int *flags, char *data) |
325 | { | 325 | { |
326 | sync_filesystem(sb); | ||
326 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 327 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
327 | return 0; | 328 | return 0; |
328 | if (!(*flags & MS_RDONLY)) { | 329 | if (!(*flags & MS_RDONLY)) { |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index fe649d325b1f..9c470fde9878 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
@@ -230,7 +230,7 @@ static struct inode *hostfs_alloc_inode(struct super_block *sb) | |||
230 | 230 | ||
231 | static void hostfs_evict_inode(struct inode *inode) | 231 | static void hostfs_evict_inode(struct inode *inode) |
232 | { | 232 | { |
233 | truncate_inode_pages(&inode->i_data, 0); | 233 | truncate_inode_pages_final(&inode->i_data); |
234 | clear_inode(inode); | 234 | clear_inode(inode); |
235 | if (HOSTFS_I(inode)->fd != -1) { | 235 | if (HOSTFS_I(inode)->fd != -1) { |
236 | close_file(&HOSTFS_I(inode)->fd); | 236 | close_file(&HOSTFS_I(inode)->fd); |
diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index 9edeeb0ea97e..50a427313835 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c | |||
@@ -304,7 +304,7 @@ void hpfs_write_if_changed(struct inode *inode) | |||
304 | 304 | ||
305 | void hpfs_evict_inode(struct inode *inode) | 305 | void hpfs_evict_inode(struct inode *inode) |
306 | { | 306 | { |
307 | truncate_inode_pages(&inode->i_data, 0); | 307 | truncate_inode_pages_final(&inode->i_data); |
308 | clear_inode(inode); | 308 | clear_inode(inode); |
309 | if (!inode->i_nlink) { | 309 | if (!inode->i_nlink) { |
310 | hpfs_lock(inode->i_sb); | 310 | hpfs_lock(inode->i_sb); |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index 4534ff688b76..fe3463a43236 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
@@ -421,6 +421,8 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data) | |||
421 | struct hpfs_sb_info *sbi = hpfs_sb(s); | 421 | struct hpfs_sb_info *sbi = hpfs_sb(s); |
422 | char *new_opts = kstrdup(data, GFP_KERNEL); | 422 | char *new_opts = kstrdup(data, GFP_KERNEL); |
423 | 423 | ||
424 | sync_filesystem(s); | ||
425 | |||
424 | *flags |= MS_NOATIME; | 426 | *flags |= MS_NOATIME; |
425 | 427 | ||
426 | hpfs_lock(s); | 428 | hpfs_lock(s); |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index d19b30ababf1..204027520937 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
@@ -366,7 +366,13 @@ static void truncate_hugepages(struct inode *inode, loff_t lstart) | |||
366 | 366 | ||
367 | static void hugetlbfs_evict_inode(struct inode *inode) | 367 | static void hugetlbfs_evict_inode(struct inode *inode) |
368 | { | 368 | { |
369 | struct resv_map *resv_map; | ||
370 | |||
369 | truncate_hugepages(inode, 0); | 371 | truncate_hugepages(inode, 0); |
372 | resv_map = (struct resv_map *)inode->i_mapping->private_data; | ||
373 | /* root inode doesn't have the resv_map, so we should check it */ | ||
374 | if (resv_map) | ||
375 | resv_map_release(&resv_map->refs); | ||
370 | clear_inode(inode); | 376 | clear_inode(inode); |
371 | } | 377 | } |
372 | 378 | ||
@@ -476,6 +482,11 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
476 | umode_t mode, dev_t dev) | 482 | umode_t mode, dev_t dev) |
477 | { | 483 | { |
478 | struct inode *inode; | 484 | struct inode *inode; |
485 | struct resv_map *resv_map; | ||
486 | |||
487 | resv_map = resv_map_alloc(); | ||
488 | if (!resv_map) | ||
489 | return NULL; | ||
479 | 490 | ||
480 | inode = new_inode(sb); | 491 | inode = new_inode(sb); |
481 | if (inode) { | 492 | if (inode) { |
@@ -487,7 +498,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
487 | inode->i_mapping->a_ops = &hugetlbfs_aops; | 498 | inode->i_mapping->a_ops = &hugetlbfs_aops; |
488 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; | 499 | inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; |
489 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 500 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
490 | INIT_LIST_HEAD(&inode->i_mapping->private_list); | 501 | inode->i_mapping->private_data = resv_map; |
491 | info = HUGETLBFS_I(inode); | 502 | info = HUGETLBFS_I(inode); |
492 | /* | 503 | /* |
493 | * The policy is initialized here even if we are creating a | 504 | * The policy is initialized here even if we are creating a |
@@ -517,7 +528,9 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, | |||
517 | break; | 528 | break; |
518 | } | 529 | } |
519 | lockdep_annotate_inode_mutex_key(inode); | 530 | lockdep_annotate_inode_mutex_key(inode); |
520 | } | 531 | } else |
532 | kref_put(&resv_map->refs, resv_map_release); | ||
533 | |||
521 | return inode; | 534 | return inode; |
522 | } | 535 | } |
523 | 536 | ||
diff --git a/fs/inode.c b/fs/inode.c index 4bcdad3c9361..f96d2a6f88cc 100644 --- a/fs/inode.c +++ b/fs/inode.c | |||
@@ -503,6 +503,7 @@ void clear_inode(struct inode *inode) | |||
503 | */ | 503 | */ |
504 | spin_lock_irq(&inode->i_data.tree_lock); | 504 | spin_lock_irq(&inode->i_data.tree_lock); |
505 | BUG_ON(inode->i_data.nrpages); | 505 | BUG_ON(inode->i_data.nrpages); |
506 | BUG_ON(inode->i_data.nrshadows); | ||
506 | spin_unlock_irq(&inode->i_data.tree_lock); | 507 | spin_unlock_irq(&inode->i_data.tree_lock); |
507 | BUG_ON(!list_empty(&inode->i_data.private_list)); | 508 | BUG_ON(!list_empty(&inode->i_data.private_list)); |
508 | BUG_ON(!(inode->i_state & I_FREEING)); | 509 | BUG_ON(!(inode->i_state & I_FREEING)); |
@@ -548,8 +549,7 @@ static void evict(struct inode *inode) | |||
548 | if (op->evict_inode) { | 549 | if (op->evict_inode) { |
549 | op->evict_inode(inode); | 550 | op->evict_inode(inode); |
550 | } else { | 551 | } else { |
551 | if (inode->i_data.nrpages) | 552 | truncate_inode_pages_final(&inode->i_data); |
552 | truncate_inode_pages(&inode->i_data, 0); | ||
553 | clear_inode(inode); | 553 | clear_inode(inode); |
554 | } | 554 | } |
555 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) | 555 | if (S_ISBLK(inode->i_mode) && inode->i_bdev) |
@@ -944,24 +944,22 @@ EXPORT_SYMBOL(unlock_new_inode); | |||
944 | 944 | ||
945 | /** | 945 | /** |
946 | * lock_two_nondirectories - take two i_mutexes on non-directory objects | 946 | * lock_two_nondirectories - take two i_mutexes on non-directory objects |
947 | * | ||
948 | * Lock any non-NULL argument that is not a directory. | ||
949 | * Zero, one or two objects may be locked by this function. | ||
950 | * | ||
947 | * @inode1: first inode to lock | 951 | * @inode1: first inode to lock |
948 | * @inode2: second inode to lock | 952 | * @inode2: second inode to lock |
949 | */ | 953 | */ |
950 | void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) | 954 | void lock_two_nondirectories(struct inode *inode1, struct inode *inode2) |
951 | { | 955 | { |
952 | WARN_ON_ONCE(S_ISDIR(inode1->i_mode)); | 956 | if (inode1 > inode2) |
953 | if (inode1 == inode2 || !inode2) { | 957 | swap(inode1, inode2); |
954 | mutex_lock(&inode1->i_mutex); | 958 | |
955 | return; | 959 | if (inode1 && !S_ISDIR(inode1->i_mode)) |
956 | } | ||
957 | WARN_ON_ONCE(S_ISDIR(inode2->i_mode)); | ||
958 | if (inode1 < inode2) { | ||
959 | mutex_lock(&inode1->i_mutex); | 960 | mutex_lock(&inode1->i_mutex); |
961 | if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) | ||
960 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); | 962 | mutex_lock_nested(&inode2->i_mutex, I_MUTEX_NONDIR2); |
961 | } else { | ||
962 | mutex_lock(&inode2->i_mutex); | ||
963 | mutex_lock_nested(&inode1->i_mutex, I_MUTEX_NONDIR2); | ||
964 | } | ||
965 | } | 963 | } |
966 | EXPORT_SYMBOL(lock_two_nondirectories); | 964 | EXPORT_SYMBOL(lock_two_nondirectories); |
967 | 965 | ||
@@ -972,8 +970,9 @@ EXPORT_SYMBOL(lock_two_nondirectories); | |||
972 | */ | 970 | */ |
973 | void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) | 971 | void unlock_two_nondirectories(struct inode *inode1, struct inode *inode2) |
974 | { | 972 | { |
975 | mutex_unlock(&inode1->i_mutex); | 973 | if (inode1 && !S_ISDIR(inode1->i_mode)) |
976 | if (inode2 && inode2 != inode1) | 974 | mutex_unlock(&inode1->i_mutex); |
975 | if (inode2 && !S_ISDIR(inode2->i_mode) && inode2 != inode1) | ||
977 | mutex_unlock(&inode2->i_mutex); | 976 | mutex_unlock(&inode2->i_mutex); |
978 | } | 977 | } |
979 | EXPORT_SYMBOL(unlock_two_nondirectories); | 978 | EXPORT_SYMBOL(unlock_two_nondirectories); |
@@ -1899,3 +1898,34 @@ void inode_dio_done(struct inode *inode) | |||
1899 | wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); | 1898 | wake_up_bit(&inode->i_state, __I_DIO_WAKEUP); |
1900 | } | 1899 | } |
1901 | EXPORT_SYMBOL(inode_dio_done); | 1900 | EXPORT_SYMBOL(inode_dio_done); |
1901 | |||
1902 | /* | ||
1903 | * inode_set_flags - atomically set some inode flags | ||
1904 | * | ||
1905 | * Note: the caller should be holding i_mutex, or else be sure that | ||
1906 | * they have exclusive access to the inode structure (i.e., while the | ||
1907 | * inode is being instantiated). The reason for the cmpxchg() loop | ||
1908 | * --- which wouldn't be necessary if all code paths which modify | ||
1909 | * i_flags actually followed this rule, is that there is at least one | ||
1910 | * code path which doesn't today --- for example, | ||
1911 | * __generic_file_aio_write() calls file_remove_suid() without holding | ||
1912 | * i_mutex --- so we use cmpxchg() out of an abundance of caution. | ||
1913 | * | ||
1914 | * In the long run, i_mutex is overkill, and we should probably look | ||
1915 | * at using the i_lock spinlock to protect i_flags, and then make sure | ||
1916 | * it is so documented in include/linux/fs.h and that all code follows | ||
1917 | * the locking convention!! | ||
1918 | */ | ||
1919 | void inode_set_flags(struct inode *inode, unsigned int flags, | ||
1920 | unsigned int mask) | ||
1921 | { | ||
1922 | unsigned int old_flags, new_flags; | ||
1923 | |||
1924 | WARN_ON_ONCE(flags & ~mask); | ||
1925 | do { | ||
1926 | old_flags = ACCESS_ONCE(inode->i_flags); | ||
1927 | new_flags = (old_flags & ~mask) | flags; | ||
1928 | } while (unlikely(cmpxchg(&inode->i_flags, old_flags, | ||
1929 | new_flags) != old_flags)); | ||
1930 | } | ||
1931 | EXPORT_SYMBOL(inode_set_flags); | ||
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 4a9e10ea13f2..4556ce1af5b0 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
@@ -93,7 +93,7 @@ static void init_once(void *foo) | |||
93 | inode_init_once(&ei->vfs_inode); | 93 | inode_init_once(&ei->vfs_inode); |
94 | } | 94 | } |
95 | 95 | ||
96 | static int init_inodecache(void) | 96 | static int __init init_inodecache(void) |
97 | { | 97 | { |
98 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", | 98 | isofs_inode_cachep = kmem_cache_create("isofs_inode_cache", |
99 | sizeof(struct iso_inode_info), | 99 | sizeof(struct iso_inode_info), |
@@ -117,6 +117,7 @@ static void destroy_inodecache(void) | |||
117 | 117 | ||
118 | static int isofs_remount(struct super_block *sb, int *flags, char *data) | 118 | static int isofs_remount(struct super_block *sb, int *flags, char *data) |
119 | { | 119 | { |
120 | sync_filesystem(sb); | ||
120 | if (!(*flags & MS_RDONLY)) | 121 | if (!(*flags & MS_RDONLY)) |
121 | return -EROFS; | 122 | return -EROFS; |
122 | return 0; | 123 | return 0; |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index cf2fc0594063..5f26139a165a 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
@@ -555,7 +555,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
555 | blk_start_plug(&plug); | 555 | blk_start_plug(&plug); |
556 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
557 | &log_bufs, WRITE_SYNC); | 557 | &log_bufs, WRITE_SYNC); |
558 | blk_finish_plug(&plug); | ||
559 | 558 | ||
560 | jbd_debug(3, "JBD2: commit phase 2b\n"); | 559 | jbd_debug(3, "JBD2: commit phase 2b\n"); |
561 | 560 | ||
@@ -582,7 +581,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
582 | err = 0; | 581 | err = 0; |
583 | bufs = 0; | 582 | bufs = 0; |
584 | descriptor = NULL; | 583 | descriptor = NULL; |
585 | blk_start_plug(&plug); | ||
586 | while (commit_transaction->t_buffers) { | 584 | while (commit_transaction->t_buffers) { |
587 | 585 | ||
588 | /* Find the next buffer to be journaled... */ | 586 | /* Find the next buffer to be journaled... */ |
@@ -1067,6 +1065,25 @@ restart_loop: | |||
1067 | goto restart_loop; | 1065 | goto restart_loop; |
1068 | } | 1066 | } |
1069 | 1067 | ||
1068 | /* Add the transaction to the checkpoint list | ||
1069 | * __journal_remove_checkpoint() can not destroy transaction | ||
1070 | * under us because it is not marked as T_FINISHED yet */ | ||
1071 | if (journal->j_checkpoint_transactions == NULL) { | ||
1072 | journal->j_checkpoint_transactions = commit_transaction; | ||
1073 | commit_transaction->t_cpnext = commit_transaction; | ||
1074 | commit_transaction->t_cpprev = commit_transaction; | ||
1075 | } else { | ||
1076 | commit_transaction->t_cpnext = | ||
1077 | journal->j_checkpoint_transactions; | ||
1078 | commit_transaction->t_cpprev = | ||
1079 | commit_transaction->t_cpnext->t_cpprev; | ||
1080 | commit_transaction->t_cpnext->t_cpprev = | ||
1081 | commit_transaction; | ||
1082 | commit_transaction->t_cpprev->t_cpnext = | ||
1083 | commit_transaction; | ||
1084 | } | ||
1085 | spin_unlock(&journal->j_list_lock); | ||
1086 | |||
1070 | /* Done with this transaction! */ | 1087 | /* Done with this transaction! */ |
1071 | 1088 | ||
1072 | jbd_debug(3, "JBD2: commit phase 7\n"); | 1089 | jbd_debug(3, "JBD2: commit phase 7\n"); |
@@ -1085,24 +1102,7 @@ restart_loop: | |||
1085 | atomic_read(&commit_transaction->t_handle_count); | 1102 | atomic_read(&commit_transaction->t_handle_count); |
1086 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, | 1103 | trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, |
1087 | commit_transaction->t_tid, &stats.run); | 1104 | commit_transaction->t_tid, &stats.run); |
1088 | 1105 | stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; | |
1089 | /* | ||
1090 | * Calculate overall stats | ||
1091 | */ | ||
1092 | spin_lock(&journal->j_history_lock); | ||
1093 | journal->j_stats.ts_tid++; | ||
1094 | if (commit_transaction->t_requested) | ||
1095 | journal->j_stats.ts_requested++; | ||
1096 | journal->j_stats.run.rs_wait += stats.run.rs_wait; | ||
1097 | journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; | ||
1098 | journal->j_stats.run.rs_running += stats.run.rs_running; | ||
1099 | journal->j_stats.run.rs_locked += stats.run.rs_locked; | ||
1100 | journal->j_stats.run.rs_flushing += stats.run.rs_flushing; | ||
1101 | journal->j_stats.run.rs_logging += stats.run.rs_logging; | ||
1102 | journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; | ||
1103 | journal->j_stats.run.rs_blocks += stats.run.rs_blocks; | ||
1104 | journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; | ||
1105 | spin_unlock(&journal->j_history_lock); | ||
1106 | 1106 | ||
1107 | commit_transaction->t_state = T_COMMIT_CALLBACK; | 1107 | commit_transaction->t_state = T_COMMIT_CALLBACK; |
1108 | J_ASSERT(commit_transaction == journal->j_committing_transaction); | 1108 | J_ASSERT(commit_transaction == journal->j_committing_transaction); |
@@ -1122,24 +1122,6 @@ restart_loop: | |||
1122 | 1122 | ||
1123 | write_unlock(&journal->j_state_lock); | 1123 | write_unlock(&journal->j_state_lock); |
1124 | 1124 | ||
1125 | if (journal->j_checkpoint_transactions == NULL) { | ||
1126 | journal->j_checkpoint_transactions = commit_transaction; | ||
1127 | commit_transaction->t_cpnext = commit_transaction; | ||
1128 | commit_transaction->t_cpprev = commit_transaction; | ||
1129 | } else { | ||
1130 | commit_transaction->t_cpnext = | ||
1131 | journal->j_checkpoint_transactions; | ||
1132 | commit_transaction->t_cpprev = | ||
1133 | commit_transaction->t_cpnext->t_cpprev; | ||
1134 | commit_transaction->t_cpnext->t_cpprev = | ||
1135 | commit_transaction; | ||
1136 | commit_transaction->t_cpprev->t_cpnext = | ||
1137 | commit_transaction; | ||
1138 | } | ||
1139 | spin_unlock(&journal->j_list_lock); | ||
1140 | /* Drop all spin_locks because commit_callback may be block. | ||
1141 | * __journal_remove_checkpoint() can not destroy transaction | ||
1142 | * under us because it is not marked as T_FINISHED yet */ | ||
1143 | if (journal->j_commit_callback) | 1125 | if (journal->j_commit_callback) |
1144 | journal->j_commit_callback(journal, commit_transaction); | 1126 | journal->j_commit_callback(journal, commit_transaction); |
1145 | 1127 | ||
@@ -1150,7 +1132,7 @@ restart_loop: | |||
1150 | write_lock(&journal->j_state_lock); | 1132 | write_lock(&journal->j_state_lock); |
1151 | spin_lock(&journal->j_list_lock); | 1133 | spin_lock(&journal->j_list_lock); |
1152 | commit_transaction->t_state = T_FINISHED; | 1134 | commit_transaction->t_state = T_FINISHED; |
1153 | /* Recheck checkpoint lists after j_list_lock was dropped */ | 1135 | /* Check if the transaction can be dropped now that we are finished */ |
1154 | if (commit_transaction->t_checkpoint_list == NULL && | 1136 | if (commit_transaction->t_checkpoint_list == NULL && |
1155 | commit_transaction->t_checkpoint_io_list == NULL) { | 1137 | commit_transaction->t_checkpoint_io_list == NULL) { |
1156 | __jbd2_journal_drop_transaction(journal, commit_transaction); | 1138 | __jbd2_journal_drop_transaction(journal, commit_transaction); |
@@ -1159,4 +1141,21 @@ restart_loop: | |||
1159 | spin_unlock(&journal->j_list_lock); | 1141 | spin_unlock(&journal->j_list_lock); |
1160 | write_unlock(&journal->j_state_lock); | 1142 | write_unlock(&journal->j_state_lock); |
1161 | wake_up(&journal->j_wait_done_commit); | 1143 | wake_up(&journal->j_wait_done_commit); |
1144 | |||
1145 | /* | ||
1146 | * Calculate overall stats | ||
1147 | */ | ||
1148 | spin_lock(&journal->j_history_lock); | ||
1149 | journal->j_stats.ts_tid++; | ||
1150 | journal->j_stats.ts_requested += stats.ts_requested; | ||
1151 | journal->j_stats.run.rs_wait += stats.run.rs_wait; | ||
1152 | journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; | ||
1153 | journal->j_stats.run.rs_running += stats.run.rs_running; | ||
1154 | journal->j_stats.run.rs_locked += stats.run.rs_locked; | ||
1155 | journal->j_stats.run.rs_flushing += stats.run.rs_flushing; | ||
1156 | journal->j_stats.run.rs_logging += stats.run.rs_logging; | ||
1157 | journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; | ||
1158 | journal->j_stats.run.rs_blocks += stats.run.rs_blocks; | ||
1159 | journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; | ||
1160 | spin_unlock(&journal->j_history_lock); | ||
1162 | } | 1161 | } |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 5fa344afb49a..67b8e303946c 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
@@ -122,7 +122,7 @@ EXPORT_SYMBOL(__jbd2_debug); | |||
122 | #endif | 122 | #endif |
123 | 123 | ||
124 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
126 | { | 126 | { |
127 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 127 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
128 | return 1; | 128 | return 1; |
@@ -143,7 +143,7 @@ static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) | |||
143 | return cpu_to_be32(csum); | 143 | return cpu_to_be32(csum); |
144 | } | 144 | } |
145 | 145 | ||
146 | int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | 146 | static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) |
147 | { | 147 | { |
148 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 148 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
149 | return 1; | 149 | return 1; |
@@ -151,7 +151,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) | |||
151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); | 151 | return sb->s_checksum == jbd2_superblock_csum(j, sb); |
152 | } | 152 | } |
153 | 153 | ||
154 | void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) | 154 | static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) |
155 | { | 155 | { |
156 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 156 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
157 | return; | 157 | return; |
@@ -302,8 +302,8 @@ static void journal_kill_thread(journal_t *journal) | |||
302 | journal->j_flags |= JBD2_UNMOUNT; | 302 | journal->j_flags |= JBD2_UNMOUNT; |
303 | 303 | ||
304 | while (journal->j_task) { | 304 | while (journal->j_task) { |
305 | wake_up(&journal->j_wait_commit); | ||
306 | write_unlock(&journal->j_state_lock); | 305 | write_unlock(&journal->j_state_lock); |
306 | wake_up(&journal->j_wait_commit); | ||
307 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); | 307 | wait_event(journal->j_wait_done_commit, journal->j_task == NULL); |
308 | write_lock(&journal->j_state_lock); | 308 | write_lock(&journal->j_state_lock); |
309 | } | 309 | } |
@@ -710,8 +710,8 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) | |||
710 | while (tid_gt(tid, journal->j_commit_sequence)) { | 710 | while (tid_gt(tid, journal->j_commit_sequence)) { |
711 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", | 711 | jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", |
712 | tid, journal->j_commit_sequence); | 712 | tid, journal->j_commit_sequence); |
713 | wake_up(&journal->j_wait_commit); | ||
714 | read_unlock(&journal->j_state_lock); | 713 | read_unlock(&journal->j_state_lock); |
714 | wake_up(&journal->j_wait_commit); | ||
715 | wait_event(journal->j_wait_done_commit, | 715 | wait_event(journal->j_wait_done_commit, |
716 | !tid_gt(tid, journal->j_commit_sequence)); | 716 | !tid_gt(tid, journal->j_commit_sequence)); |
717 | read_lock(&journal->j_state_lock); | 717 | read_lock(&journal->j_state_lock); |
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 60bb365f54a5..38cfcf5f6fce 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
@@ -1073,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
1073 | * reused here. | 1073 | * reused here. |
1074 | */ | 1074 | */ |
1075 | jbd_lock_bh_state(bh); | 1075 | jbd_lock_bh_state(bh); |
1076 | spin_lock(&journal->j_list_lock); | ||
1077 | J_ASSERT_JH(jh, (jh->b_transaction == transaction || | 1076 | J_ASSERT_JH(jh, (jh->b_transaction == transaction || |
1078 | jh->b_transaction == NULL || | 1077 | jh->b_transaction == NULL || |
1079 | (jh->b_transaction == journal->j_committing_transaction && | 1078 | (jh->b_transaction == journal->j_committing_transaction && |
@@ -1096,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | |||
1096 | jh->b_modified = 0; | 1095 | jh->b_modified = 0; |
1097 | 1096 | ||
1098 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); | 1097 | JBUFFER_TRACE(jh, "file as BJ_Reserved"); |
1098 | spin_lock(&journal->j_list_lock); | ||
1099 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); | 1099 | __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); |
1100 | } else if (jh->b_transaction == journal->j_committing_transaction) { | 1100 | } else if (jh->b_transaction == journal->j_committing_transaction) { |
1101 | /* first access by this transaction */ | 1101 | /* first access by this transaction */ |
1102 | jh->b_modified = 0; | 1102 | jh->b_modified = 0; |
1103 | 1103 | ||
1104 | JBUFFER_TRACE(jh, "set next transaction"); | 1104 | JBUFFER_TRACE(jh, "set next transaction"); |
1105 | spin_lock(&journal->j_list_lock); | ||
1105 | jh->b_next_transaction = transaction; | 1106 | jh->b_next_transaction = transaction; |
1106 | } | 1107 | } |
1107 | spin_unlock(&journal->j_list_lock); | 1108 | spin_unlock(&journal->j_list_lock); |
@@ -1312,7 +1313,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1312 | journal->j_running_transaction)) { | 1313 | journal->j_running_transaction)) { |
1313 | printk(KERN_ERR "JBD2: %s: " | 1314 | printk(KERN_ERR "JBD2: %s: " |
1314 | "jh->b_transaction (%llu, %p, %u) != " | 1315 | "jh->b_transaction (%llu, %p, %u) != " |
1315 | "journal->j_running_transaction (%p, %u)", | 1316 | "journal->j_running_transaction (%p, %u)\n", |
1316 | journal->j_devname, | 1317 | journal->j_devname, |
1317 | (unsigned long long) bh->b_blocknr, | 1318 | (unsigned long long) bh->b_blocknr, |
1318 | jh->b_transaction, | 1319 | jh->b_transaction, |
@@ -1335,30 +1336,25 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
1335 | */ | 1336 | */ |
1336 | if (jh->b_transaction != transaction) { | 1337 | if (jh->b_transaction != transaction) { |
1337 | JBUFFER_TRACE(jh, "already on other transaction"); | 1338 | JBUFFER_TRACE(jh, "already on other transaction"); |
1338 | if (unlikely(jh->b_transaction != | 1339 | if (unlikely(((jh->b_transaction != |
1339 | journal->j_committing_transaction)) { | 1340 | journal->j_committing_transaction)) || |
1340 | printk(KERN_ERR "JBD2: %s: " | 1341 | (jh->b_next_transaction != transaction))) { |
1341 | "jh->b_transaction (%llu, %p, %u) != " | 1342 | printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " |
1342 | "journal->j_committing_transaction (%p, %u)", | 1343 | "bad jh for block %llu: " |
1344 | "transaction (%p, %u), " | ||
1345 | "jh->b_transaction (%p, %u), " | ||
1346 | "jh->b_next_transaction (%p, %u), jlist %u\n", | ||
1343 | journal->j_devname, | 1347 | journal->j_devname, |
1344 | (unsigned long long) bh->b_blocknr, | 1348 | (unsigned long long) bh->b_blocknr, |
1349 | transaction, transaction->t_tid, | ||
1345 | jh->b_transaction, | 1350 | jh->b_transaction, |
1346 | jh->b_transaction ? jh->b_transaction->t_tid : 0, | 1351 | jh->b_transaction ? |
1347 | journal->j_committing_transaction, | 1352 | jh->b_transaction->t_tid : 0, |
1348 | journal->j_committing_transaction ? | ||
1349 | journal->j_committing_transaction->t_tid : 0); | ||
1350 | ret = -EINVAL; | ||
1351 | } | ||
1352 | if (unlikely(jh->b_next_transaction != transaction)) { | ||
1353 | printk(KERN_ERR "JBD2: %s: " | ||
1354 | "jh->b_next_transaction (%llu, %p, %u) != " | ||
1355 | "transaction (%p, %u)", | ||
1356 | journal->j_devname, | ||
1357 | (unsigned long long) bh->b_blocknr, | ||
1358 | jh->b_next_transaction, | 1353 | jh->b_next_transaction, |
1359 | jh->b_next_transaction ? | 1354 | jh->b_next_transaction ? |
1360 | jh->b_next_transaction->t_tid : 0, | 1355 | jh->b_next_transaction->t_tid : 0, |
1361 | transaction, transaction->t_tid); | 1356 | jh->b_jlist); |
1357 | WARN_ON(1); | ||
1362 | ret = -EINVAL; | 1358 | ret = -EINVAL; |
1363 | } | 1359 | } |
1364 | /* And this case is illegal: we can't reuse another | 1360 | /* And this case is illegal: we can't reuse another |
@@ -1415,7 +1411,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1415 | BUFFER_TRACE(bh, "entry"); | 1411 | BUFFER_TRACE(bh, "entry"); |
1416 | 1412 | ||
1417 | jbd_lock_bh_state(bh); | 1413 | jbd_lock_bh_state(bh); |
1418 | spin_lock(&journal->j_list_lock); | ||
1419 | 1414 | ||
1420 | if (!buffer_jbd(bh)) | 1415 | if (!buffer_jbd(bh)) |
1421 | goto not_jbd; | 1416 | goto not_jbd; |
@@ -1468,6 +1463,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1468 | * we know to remove the checkpoint after we commit. | 1463 | * we know to remove the checkpoint after we commit. |
1469 | */ | 1464 | */ |
1470 | 1465 | ||
1466 | spin_lock(&journal->j_list_lock); | ||
1471 | if (jh->b_cp_transaction) { | 1467 | if (jh->b_cp_transaction) { |
1472 | __jbd2_journal_temp_unlink_buffer(jh); | 1468 | __jbd2_journal_temp_unlink_buffer(jh); |
1473 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); | 1469 | __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); |
@@ -1480,6 +1476,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1480 | goto drop; | 1476 | goto drop; |
1481 | } | 1477 | } |
1482 | } | 1478 | } |
1479 | spin_unlock(&journal->j_list_lock); | ||
1483 | } else if (jh->b_transaction) { | 1480 | } else if (jh->b_transaction) { |
1484 | J_ASSERT_JH(jh, (jh->b_transaction == | 1481 | J_ASSERT_JH(jh, (jh->b_transaction == |
1485 | journal->j_committing_transaction)); | 1482 | journal->j_committing_transaction)); |
@@ -1491,7 +1488,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1491 | 1488 | ||
1492 | if (jh->b_next_transaction) { | 1489 | if (jh->b_next_transaction) { |
1493 | J_ASSERT(jh->b_next_transaction == transaction); | 1490 | J_ASSERT(jh->b_next_transaction == transaction); |
1491 | spin_lock(&journal->j_list_lock); | ||
1494 | jh->b_next_transaction = NULL; | 1492 | jh->b_next_transaction = NULL; |
1493 | spin_unlock(&journal->j_list_lock); | ||
1495 | 1494 | ||
1496 | /* | 1495 | /* |
1497 | * only drop a reference if this transaction modified | 1496 | * only drop a reference if this transaction modified |
@@ -1503,7 +1502,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
1503 | } | 1502 | } |
1504 | 1503 | ||
1505 | not_jbd: | 1504 | not_jbd: |
1506 | spin_unlock(&journal->j_list_lock); | ||
1507 | jbd_unlock_bh_state(bh); | 1505 | jbd_unlock_bh_state(bh); |
1508 | __brelse(bh); | 1506 | __brelse(bh); |
1509 | drop: | 1507 | drop: |
@@ -1821,11 +1819,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) | |||
1821 | if (buffer_locked(bh) || buffer_dirty(bh)) | 1819 | if (buffer_locked(bh) || buffer_dirty(bh)) |
1822 | goto out; | 1820 | goto out; |
1823 | 1821 | ||
1824 | if (jh->b_next_transaction != NULL) | 1822 | if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) |
1825 | goto out; | 1823 | goto out; |
1826 | 1824 | ||
1827 | spin_lock(&journal->j_list_lock); | 1825 | spin_lock(&journal->j_list_lock); |
1828 | if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { | 1826 | if (jh->b_cp_transaction != NULL) { |
1829 | /* written-back checkpointed metadata buffer */ | 1827 | /* written-back checkpointed metadata buffer */ |
1830 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 1828 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
1831 | __jbd2_journal_remove_checkpoint(jh); | 1829 | __jbd2_journal_remove_checkpoint(jh); |
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c index 16a5047903a6..406d9cc84ba8 100644 --- a/fs/jffs2/compr_rtime.c +++ b/fs/jffs2/compr_rtime.c | |||
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in, | |||
33 | unsigned char *cpage_out, | 33 | unsigned char *cpage_out, |
34 | uint32_t *sourcelen, uint32_t *dstlen) | 34 | uint32_t *sourcelen, uint32_t *dstlen) |
35 | { | 35 | { |
36 | short positions[256]; | 36 | unsigned short positions[256]; |
37 | int outpos = 0; | 37 | int outpos = 0; |
38 | int pos=0; | 38 | int pos=0; |
39 | 39 | ||
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in, | |||
74 | unsigned char *cpage_out, | 74 | unsigned char *cpage_out, |
75 | uint32_t srclen, uint32_t destlen) | 75 | uint32_t srclen, uint32_t destlen) |
76 | { | 76 | { |
77 | short positions[256]; | 77 | unsigned short positions[256]; |
78 | int outpos = 0; | 78 | int outpos = 0; |
79 | int pos=0; | 79 | int pos=0; |
80 | 80 | ||
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index a69e426435dd..601afd1afddf 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
@@ -242,7 +242,7 @@ void jffs2_evict_inode (struct inode *inode) | |||
242 | 242 | ||
243 | jffs2_dbg(1, "%s(): ino #%lu mode %o\n", | 243 | jffs2_dbg(1, "%s(): ino #%lu mode %o\n", |
244 | __func__, inode->i_ino, inode->i_mode); | 244 | __func__, inode->i_ino, inode->i_mode); |
245 | truncate_inode_pages(&inode->i_data, 0); | 245 | truncate_inode_pages_final(&inode->i_data); |
246 | clear_inode(inode); | 246 | clear_inode(inode); |
247 | jffs2_do_clear_inode(c, f); | 247 | jffs2_do_clear_inode(c, f); |
248 | } | 248 | } |
@@ -457,12 +457,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
457 | The umask is only applied if there's no default ACL */ | 457 | The umask is only applied if there's no default ACL */ |
458 | ret = jffs2_init_acl_pre(dir_i, inode, &mode); | 458 | ret = jffs2_init_acl_pre(dir_i, inode, &mode); |
459 | if (ret) { | 459 | if (ret) { |
460 | make_bad_inode(inode); | 460 | mutex_unlock(&f->sem); |
461 | iput(inode); | 461 | make_bad_inode(inode); |
462 | return ERR_PTR(ret); | 462 | iput(inode); |
463 | return ERR_PTR(ret); | ||
463 | } | 464 | } |
464 | ret = jffs2_do_new_inode (c, f, mode, ri); | 465 | ret = jffs2_do_new_inode (c, f, mode, ri); |
465 | if (ret) { | 466 | if (ret) { |
467 | mutex_unlock(&f->sem); | ||
466 | make_bad_inode(inode); | 468 | make_bad_inode(inode); |
467 | iput(inode); | 469 | iput(inode); |
468 | return ERR_PTR(ret); | 470 | return ERR_PTR(ret); |
@@ -479,6 +481,7 @@ struct inode *jffs2_new_inode (struct inode *dir_i, umode_t mode, struct jffs2_r | |||
479 | inode->i_size = 0; | 481 | inode->i_size = 0; |
480 | 482 | ||
481 | if (insert_inode_locked(inode) < 0) { | 483 | if (insert_inode_locked(inode) < 0) { |
484 | mutex_unlock(&f->sem); | ||
482 | make_bad_inode(inode); | 485 | make_bad_inode(inode); |
483 | iput(inode); | 486 | iput(inode); |
484 | return ERR_PTR(-EINVAL); | 487 | return ERR_PTR(-EINVAL); |
@@ -687,7 +690,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c, | |||
687 | struct inode *inode = OFNI_EDONI_2SFFJ(f); | 690 | struct inode *inode = OFNI_EDONI_2SFFJ(f); |
688 | struct page *pg; | 691 | struct page *pg; |
689 | 692 | ||
690 | pg = read_cache_page_async(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, | 693 | pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT, |
691 | (void *)jffs2_do_readpage_unlock, inode); | 694 | (void *)jffs2_do_readpage_unlock, inode); |
692 | if (IS_ERR(pg)) | 695 | if (IS_ERR(pg)) |
693 | return (void *)pg; | 696 | return (void *)pg; |
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index e4619b00f7c5..fa35ff79ab35 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h | |||
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info | |||
231 | uint32_t version; | 231 | uint32_t version; |
232 | uint32_t data_crc; | 232 | uint32_t data_crc; |
233 | uint32_t partial_crc; | 233 | uint32_t partial_crc; |
234 | uint16_t csize; | 234 | uint32_t csize; |
235 | uint16_t overlapped; | 235 | uint16_t overlapped; |
236 | }; | 236 | }; |
237 | 237 | ||
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index 03310721712f..b6bd4affd9ad 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c | |||
@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize, | |||
179 | spin_unlock(&c->erase_completion_lock); | 179 | spin_unlock(&c->erase_completion_lock); |
180 | 180 | ||
181 | schedule(); | 181 | schedule(); |
182 | remove_wait_queue(&c->erase_wait, &wait); | ||
182 | } else | 183 | } else |
183 | spin_unlock(&c->erase_completion_lock); | 184 | spin_unlock(&c->erase_completion_lock); |
184 | } else if (ret) | 185 | } else if (ret) |
@@ -211,20 +212,25 @@ out: | |||
211 | int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, | 212 | int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize, |
212 | uint32_t *len, uint32_t sumsize) | 213 | uint32_t *len, uint32_t sumsize) |
213 | { | 214 | { |
214 | int ret = -EAGAIN; | 215 | int ret; |
215 | minsize = PAD(minsize); | 216 | minsize = PAD(minsize); |
216 | 217 | ||
217 | jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); | 218 | jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize); |
218 | 219 | ||
219 | spin_lock(&c->erase_completion_lock); | 220 | while (true) { |
220 | while(ret == -EAGAIN) { | 221 | spin_lock(&c->erase_completion_lock); |
221 | ret = jffs2_do_reserve_space(c, minsize, len, sumsize); | 222 | ret = jffs2_do_reserve_space(c, minsize, len, sumsize); |
222 | if (ret) { | 223 | if (ret) { |
223 | jffs2_dbg(1, "%s(): looping, ret is %d\n", | 224 | jffs2_dbg(1, "%s(): looping, ret is %d\n", |
224 | __func__, ret); | 225 | __func__, ret); |
225 | } | 226 | } |
227 | spin_unlock(&c->erase_completion_lock); | ||
228 | |||
229 | if (ret == -EAGAIN) | ||
230 | cond_resched(); | ||
231 | else | ||
232 | break; | ||
226 | } | 233 | } |
227 | spin_unlock(&c->erase_completion_lock); | ||
228 | if (!ret) | 234 | if (!ret) |
229 | ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); | 235 | ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); |
230 | 236 | ||
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 0defb1cc2a35..0918f0e2e266 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c | |||
@@ -243,6 +243,7 @@ static int jffs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
243 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); | 243 | struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); |
244 | int err; | 244 | int err; |
245 | 245 | ||
246 | sync_filesystem(sb); | ||
246 | err = jffs2_parse_options(c, data); | 247 | err = jffs2_parse_options(c, data); |
247 | if (err) | 248 | if (err) |
248 | return -EINVAL; | 249 | return -EINVAL; |
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index f4aab719add5..6f8fe72c2a7a 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c | |||
@@ -154,7 +154,7 @@ void jfs_evict_inode(struct inode *inode) | |||
154 | dquot_initialize(inode); | 154 | dquot_initialize(inode); |
155 | 155 | ||
156 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { | 156 | if (JFS_IP(inode)->fileset == FILESYSTEM_I) { |
157 | truncate_inode_pages(&inode->i_data, 0); | 157 | truncate_inode_pages_final(&inode->i_data); |
158 | 158 | ||
159 | if (test_cflag(COMMIT_Freewmap, inode)) | 159 | if (test_cflag(COMMIT_Freewmap, inode)) |
160 | jfs_free_zero_link(inode); | 160 | jfs_free_zero_link(inode); |
@@ -168,7 +168,7 @@ void jfs_evict_inode(struct inode *inode) | |||
168 | dquot_free_inode(inode); | 168 | dquot_free_inode(inode); |
169 | } | 169 | } |
170 | } else { | 170 | } else { |
171 | truncate_inode_pages(&inode->i_data, 0); | 171 | truncate_inode_pages_final(&inode->i_data); |
172 | } | 172 | } |
173 | clear_inode(inode); | 173 | clear_inode(inode); |
174 | dquot_drop(inode); | 174 | dquot_drop(inode); |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index e2b7483444fd..97f7fda51890 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
@@ -418,6 +418,7 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data) | |||
418 | int flag = JFS_SBI(sb)->flag; | 418 | int flag = JFS_SBI(sb)->flag; |
419 | int ret; | 419 | int ret; |
420 | 420 | ||
421 | sync_filesystem(sb); | ||
421 | if (!parse_options(data, sb, &newLVSize, &flag)) { | 422 | if (!parse_options(data, sb, &newLVSize, &flag)) { |
422 | return -EINVAL; | 423 | return -EINVAL; |
423 | } | 424 | } |
diff --git a/fs/kernfs/Kconfig b/fs/kernfs/Kconfig new file mode 100644 index 000000000000..397b5f7a7a16 --- /dev/null +++ b/fs/kernfs/Kconfig | |||
@@ -0,0 +1,7 @@ | |||
1 | # | ||
2 | # KERNFS should be selected by its users | ||
3 | # | ||
4 | |||
5 | config KERNFS | ||
6 | bool | ||
7 | default n | ||
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index bd6e18be6e1a..78f3403300af 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c | |||
@@ -8,6 +8,7 @@ | |||
8 | * This file is released under the GPLv2. | 8 | * This file is released under the GPLv2. |
9 | */ | 9 | */ |
10 | 10 | ||
11 | #include <linux/sched.h> | ||
11 | #include <linux/fs.h> | 12 | #include <linux/fs.h> |
12 | #include <linux/namei.h> | 13 | #include <linux/namei.h> |
13 | #include <linux/idr.h> | 14 | #include <linux/idr.h> |
@@ -18,9 +19,162 @@ | |||
18 | #include "kernfs-internal.h" | 19 | #include "kernfs-internal.h" |
19 | 20 | ||
20 | DEFINE_MUTEX(kernfs_mutex); | 21 | DEFINE_MUTEX(kernfs_mutex); |
22 | static DEFINE_SPINLOCK(kernfs_rename_lock); /* kn->parent and ->name */ | ||
23 | static char kernfs_pr_cont_buf[PATH_MAX]; /* protected by rename_lock */ | ||
21 | 24 | ||
22 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) | 25 | #define rb_to_kn(X) rb_entry((X), struct kernfs_node, rb) |
23 | 26 | ||
27 | static bool kernfs_active(struct kernfs_node *kn) | ||
28 | { | ||
29 | lockdep_assert_held(&kernfs_mutex); | ||
30 | return atomic_read(&kn->active) >= 0; | ||
31 | } | ||
32 | |||
33 | static bool kernfs_lockdep(struct kernfs_node *kn) | ||
34 | { | ||
35 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
36 | return kn->flags & KERNFS_LOCKDEP; | ||
37 | #else | ||
38 | return false; | ||
39 | #endif | ||
40 | } | ||
41 | |||
42 | static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen) | ||
43 | { | ||
44 | return strlcpy(buf, kn->parent ? kn->name : "/", buflen); | ||
45 | } | ||
46 | |||
47 | static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf, | ||
48 | size_t buflen) | ||
49 | { | ||
50 | char *p = buf + buflen; | ||
51 | int len; | ||
52 | |||
53 | *--p = '\0'; | ||
54 | |||
55 | do { | ||
56 | len = strlen(kn->name); | ||
57 | if (p - buf < len + 1) { | ||
58 | buf[0] = '\0'; | ||
59 | p = NULL; | ||
60 | break; | ||
61 | } | ||
62 | p -= len; | ||
63 | memcpy(p, kn->name, len); | ||
64 | *--p = '/'; | ||
65 | kn = kn->parent; | ||
66 | } while (kn && kn->parent); | ||
67 | |||
68 | return p; | ||
69 | } | ||
70 | |||
71 | /** | ||
72 | * kernfs_name - obtain the name of a given node | ||
73 | * @kn: kernfs_node of interest | ||
74 | * @buf: buffer to copy @kn's name into | ||
75 | * @buflen: size of @buf | ||
76 | * | ||
77 | * Copies the name of @kn into @buf of @buflen bytes. The behavior is | ||
78 | * similar to strlcpy(). It returns the length of @kn's name and if @buf | ||
79 | * isn't long enough, it's filled upto @buflen-1 and nul terminated. | ||
80 | * | ||
81 | * This function can be called from any context. | ||
82 | */ | ||
83 | int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen) | ||
84 | { | ||
85 | unsigned long flags; | ||
86 | int ret; | ||
87 | |||
88 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
89 | ret = kernfs_name_locked(kn, buf, buflen); | ||
90 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
91 | return ret; | ||
92 | } | ||
93 | |||
94 | /** | ||
95 | * kernfs_path - build full path of a given node | ||
96 | * @kn: kernfs_node of interest | ||
97 | * @buf: buffer to copy @kn's name into | ||
98 | * @buflen: size of @buf | ||
99 | * | ||
100 | * Builds and returns the full path of @kn in @buf of @buflen bytes. The | ||
101 | * path is built from the end of @buf so the returned pointer usually | ||
102 | * doesn't match @buf. If @buf isn't long enough, @buf is nul terminated | ||
103 | * and %NULL is returned. | ||
104 | */ | ||
105 | char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen) | ||
106 | { | ||
107 | unsigned long flags; | ||
108 | char *p; | ||
109 | |||
110 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
111 | p = kernfs_path_locked(kn, buf, buflen); | ||
112 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
113 | return p; | ||
114 | } | ||
115 | EXPORT_SYMBOL_GPL(kernfs_path); | ||
116 | |||
117 | /** | ||
118 | * pr_cont_kernfs_name - pr_cont name of a kernfs_node | ||
119 | * @kn: kernfs_node of interest | ||
120 | * | ||
121 | * This function can be called from any context. | ||
122 | */ | ||
123 | void pr_cont_kernfs_name(struct kernfs_node *kn) | ||
124 | { | ||
125 | unsigned long flags; | ||
126 | |||
127 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
128 | |||
129 | kernfs_name_locked(kn, kernfs_pr_cont_buf, sizeof(kernfs_pr_cont_buf)); | ||
130 | pr_cont("%s", kernfs_pr_cont_buf); | ||
131 | |||
132 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * pr_cont_kernfs_path - pr_cont path of a kernfs_node | ||
137 | * @kn: kernfs_node of interest | ||
138 | * | ||
139 | * This function can be called from any context. | ||
140 | */ | ||
141 | void pr_cont_kernfs_path(struct kernfs_node *kn) | ||
142 | { | ||
143 | unsigned long flags; | ||
144 | char *p; | ||
145 | |||
146 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
147 | |||
148 | p = kernfs_path_locked(kn, kernfs_pr_cont_buf, | ||
149 | sizeof(kernfs_pr_cont_buf)); | ||
150 | if (p) | ||
151 | pr_cont("%s", p); | ||
152 | else | ||
153 | pr_cont("<name too long>"); | ||
154 | |||
155 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
156 | } | ||
157 | |||
158 | /** | ||
159 | * kernfs_get_parent - determine the parent node and pin it | ||
160 | * @kn: kernfs_node of interest | ||
161 | * | ||
162 | * Determines @kn's parent, pins and returns it. This function can be | ||
163 | * called from any context. | ||
164 | */ | ||
165 | struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn) | ||
166 | { | ||
167 | struct kernfs_node *parent; | ||
168 | unsigned long flags; | ||
169 | |||
170 | spin_lock_irqsave(&kernfs_rename_lock, flags); | ||
171 | parent = kn->parent; | ||
172 | kernfs_get(parent); | ||
173 | spin_unlock_irqrestore(&kernfs_rename_lock, flags); | ||
174 | |||
175 | return parent; | ||
176 | } | ||
177 | |||
24 | /** | 178 | /** |
25 | * kernfs_name_hash | 179 | * kernfs_name_hash |
26 | * @name: Null terminated string to hash | 180 | * @name: Null terminated string to hash |
@@ -37,7 +191,7 @@ static unsigned int kernfs_name_hash(const char *name, const void *ns) | |||
37 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); | 191 | hash = (end_name_hash(hash) ^ hash_ptr((void *)ns, 31)); |
38 | hash &= 0x7fffffffU; | 192 | hash &= 0x7fffffffU; |
39 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ | 193 | /* Reserve hash numbers 0, 1 and INT_MAX for magic directory entries */ |
40 | if (hash < 1) | 194 | if (hash < 2) |
41 | hash += 2; | 195 | hash += 2; |
42 | if (hash >= INT_MAX) | 196 | if (hash >= INT_MAX) |
43 | hash = INT_MAX - 1; | 197 | hash = INT_MAX - 1; |
@@ -105,18 +259,24 @@ static int kernfs_link_sibling(struct kernfs_node *kn) | |||
105 | * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree | 259 | * kernfs_unlink_sibling - unlink kernfs_node from sibling rbtree |
106 | * @kn: kernfs_node of interest | 260 | * @kn: kernfs_node of interest |
107 | * | 261 | * |
108 | * Unlink @kn from its sibling rbtree which starts from | 262 | * Try to unlink @kn from its sibling rbtree which starts from |
109 | * kn->parent->dir.children. | 263 | * kn->parent->dir.children. Returns %true if @kn was actually |
264 | * removed, %false if @kn wasn't on the rbtree. | ||
110 | * | 265 | * |
111 | * Locking: | 266 | * Locking: |
112 | * mutex_lock(kernfs_mutex) | 267 | * mutex_lock(kernfs_mutex) |
113 | */ | 268 | */ |
114 | static void kernfs_unlink_sibling(struct kernfs_node *kn) | 269 | static bool kernfs_unlink_sibling(struct kernfs_node *kn) |
115 | { | 270 | { |
271 | if (RB_EMPTY_NODE(&kn->rb)) | ||
272 | return false; | ||
273 | |||
116 | if (kernfs_type(kn) == KERNFS_DIR) | 274 | if (kernfs_type(kn) == KERNFS_DIR) |
117 | kn->parent->dir.subdirs--; | 275 | kn->parent->dir.subdirs--; |
118 | 276 | ||
119 | rb_erase(&kn->rb, &kn->parent->dir.children); | 277 | rb_erase(&kn->rb, &kn->parent->dir.children); |
278 | RB_CLEAR_NODE(&kn->rb); | ||
279 | return true; | ||
120 | } | 280 | } |
121 | 281 | ||
122 | /** | 282 | /** |
@@ -137,7 +297,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) | |||
137 | if (!atomic_inc_unless_negative(&kn->active)) | 297 | if (!atomic_inc_unless_negative(&kn->active)) |
138 | return NULL; | 298 | return NULL; |
139 | 299 | ||
140 | if (kn->flags & KERNFS_LOCKDEP) | 300 | if (kernfs_lockdep(kn)) |
141 | rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); | 301 | rwsem_acquire_read(&kn->dep_map, 0, 1, _RET_IP_); |
142 | return kn; | 302 | return kn; |
143 | } | 303 | } |
@@ -151,59 +311,57 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn) | |||
151 | */ | 311 | */ |
152 | void kernfs_put_active(struct kernfs_node *kn) | 312 | void kernfs_put_active(struct kernfs_node *kn) |
153 | { | 313 | { |
314 | struct kernfs_root *root = kernfs_root(kn); | ||
154 | int v; | 315 | int v; |
155 | 316 | ||
156 | if (unlikely(!kn)) | 317 | if (unlikely(!kn)) |
157 | return; | 318 | return; |
158 | 319 | ||
159 | if (kn->flags & KERNFS_LOCKDEP) | 320 | if (kernfs_lockdep(kn)) |
160 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | 321 | rwsem_release(&kn->dep_map, 1, _RET_IP_); |
161 | v = atomic_dec_return(&kn->active); | 322 | v = atomic_dec_return(&kn->active); |
162 | if (likely(v != KN_DEACTIVATED_BIAS)) | 323 | if (likely(v != KN_DEACTIVATED_BIAS)) |
163 | return; | 324 | return; |
164 | 325 | ||
165 | /* | 326 | wake_up_all(&root->deactivate_waitq); |
166 | * atomic_dec_return() is a mb(), we'll always see the updated | ||
167 | * kn->u.completion. | ||
168 | */ | ||
169 | complete(kn->u.completion); | ||
170 | } | 327 | } |
171 | 328 | ||
172 | /** | 329 | /** |
173 | * kernfs_deactivate - deactivate kernfs_node | 330 | * kernfs_drain - drain kernfs_node |
174 | * @kn: kernfs_node to deactivate | 331 | * @kn: kernfs_node to drain |
175 | * | 332 | * |
176 | * Deny new active references and drain existing ones. | 333 | * Drain existing usages and nuke all existing mmaps of @kn. Mutiple |
334 | * removers may invoke this function concurrently on @kn and all will | ||
335 | * return after draining is complete. | ||
177 | */ | 336 | */ |
178 | static void kernfs_deactivate(struct kernfs_node *kn) | 337 | static void kernfs_drain(struct kernfs_node *kn) |
338 | __releases(&kernfs_mutex) __acquires(&kernfs_mutex) | ||
179 | { | 339 | { |
180 | DECLARE_COMPLETION_ONSTACK(wait); | 340 | struct kernfs_root *root = kernfs_root(kn); |
181 | int v; | ||
182 | 341 | ||
183 | BUG_ON(!(kn->flags & KERNFS_REMOVED)); | 342 | lockdep_assert_held(&kernfs_mutex); |
184 | 343 | WARN_ON_ONCE(kernfs_active(kn)); | |
185 | if (!(kernfs_type(kn) & KERNFS_ACTIVE_REF)) | ||
186 | return; | ||
187 | 344 | ||
188 | kn->u.completion = (void *)&wait; | 345 | mutex_unlock(&kernfs_mutex); |
189 | 346 | ||
190 | if (kn->flags & KERNFS_LOCKDEP) | 347 | if (kernfs_lockdep(kn)) { |
191 | rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); | 348 | rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); |
192 | /* atomic_add_return() is a mb(), put_active() will always see | 349 | if (atomic_read(&kn->active) != KN_DEACTIVATED_BIAS) |
193 | * the updated kn->u.completion. | ||
194 | */ | ||
195 | v = atomic_add_return(KN_DEACTIVATED_BIAS, &kn->active); | ||
196 | |||
197 | if (v != KN_DEACTIVATED_BIAS) { | ||
198 | if (kn->flags & KERNFS_LOCKDEP) | ||
199 | lock_contended(&kn->dep_map, _RET_IP_); | 350 | lock_contended(&kn->dep_map, _RET_IP_); |
200 | wait_for_completion(&wait); | ||
201 | } | 351 | } |
202 | 352 | ||
203 | if (kn->flags & KERNFS_LOCKDEP) { | 353 | /* but everyone should wait for draining */ |
354 | wait_event(root->deactivate_waitq, | ||
355 | atomic_read(&kn->active) == KN_DEACTIVATED_BIAS); | ||
356 | |||
357 | if (kernfs_lockdep(kn)) { | ||
204 | lock_acquired(&kn->dep_map, _RET_IP_); | 358 | lock_acquired(&kn->dep_map, _RET_IP_); |
205 | rwsem_release(&kn->dep_map, 1, _RET_IP_); | 359 | rwsem_release(&kn->dep_map, 1, _RET_IP_); |
206 | } | 360 | } |
361 | |||
362 | kernfs_unmap_bin_file(kn); | ||
363 | |||
364 | mutex_lock(&kernfs_mutex); | ||
207 | } | 365 | } |
208 | 366 | ||
209 | /** | 367 | /** |
@@ -234,13 +392,15 @@ void kernfs_put(struct kernfs_node *kn) | |||
234 | return; | 392 | return; |
235 | root = kernfs_root(kn); | 393 | root = kernfs_root(kn); |
236 | repeat: | 394 | repeat: |
237 | /* Moving/renaming is always done while holding reference. | 395 | /* |
396 | * Moving/renaming is always done while holding reference. | ||
238 | * kn->parent won't change beneath us. | 397 | * kn->parent won't change beneath us. |
239 | */ | 398 | */ |
240 | parent = kn->parent; | 399 | parent = kn->parent; |
241 | 400 | ||
242 | WARN(!(kn->flags & KERNFS_REMOVED), "kernfs: free using entry: %s/%s\n", | 401 | WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS, |
243 | parent ? parent->name : "", kn->name); | 402 | "kernfs_put: %s/%s: released with incorrect active_ref %d\n", |
403 | parent ? parent->name : "", kn->name, atomic_read(&kn->active)); | ||
244 | 404 | ||
245 | if (kernfs_type(kn) == KERNFS_LINK) | 405 | if (kernfs_type(kn) == KERNFS_LINK) |
246 | kernfs_put(kn->symlink.target_kn); | 406 | kernfs_put(kn->symlink.target_kn); |
@@ -282,8 +442,8 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags) | |||
282 | kn = dentry->d_fsdata; | 442 | kn = dentry->d_fsdata; |
283 | mutex_lock(&kernfs_mutex); | 443 | mutex_lock(&kernfs_mutex); |
284 | 444 | ||
285 | /* The kernfs node has been deleted */ | 445 | /* The kernfs node has been deactivated */ |
286 | if (kn->flags & KERNFS_REMOVED) | 446 | if (!kernfs_active(kn)) |
287 | goto out_bad; | 447 | goto out_bad; |
288 | 448 | ||
289 | /* The kernfs node has been moved? */ | 449 | /* The kernfs node has been moved? */ |
@@ -328,6 +488,24 @@ const struct dentry_operations kernfs_dops = { | |||
328 | .d_release = kernfs_dop_release, | 488 | .d_release = kernfs_dop_release, |
329 | }; | 489 | }; |
330 | 490 | ||
491 | /** | ||
492 | * kernfs_node_from_dentry - determine kernfs_node associated with a dentry | ||
493 | * @dentry: the dentry in question | ||
494 | * | ||
495 | * Return the kernfs_node associated with @dentry. If @dentry is not a | ||
496 | * kernfs one, %NULL is returned. | ||
497 | * | ||
498 | * While the returned kernfs_node will stay accessible as long as @dentry | ||
499 | * is accessible, the returned node can be in any state and the caller is | ||
500 | * fully responsible for determining what's accessible. | ||
501 | */ | ||
502 | struct kernfs_node *kernfs_node_from_dentry(struct dentry *dentry) | ||
503 | { | ||
504 | if (dentry->d_sb->s_op == &kernfs_sops) | ||
505 | return dentry->d_fsdata; | ||
506 | return NULL; | ||
507 | } | ||
508 | |||
331 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | 509 | static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, |
332 | const char *name, umode_t mode, | 510 | const char *name, umode_t mode, |
333 | unsigned flags) | 511 | unsigned flags) |
@@ -352,11 +530,12 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, | |||
352 | kn->ino = ret; | 530 | kn->ino = ret; |
353 | 531 | ||
354 | atomic_set(&kn->count, 1); | 532 | atomic_set(&kn->count, 1); |
355 | atomic_set(&kn->active, 0); | 533 | atomic_set(&kn->active, KN_DEACTIVATED_BIAS); |
534 | RB_CLEAR_NODE(&kn->rb); | ||
356 | 535 | ||
357 | kn->name = name; | 536 | kn->name = name; |
358 | kn->mode = mode; | 537 | kn->mode = mode; |
359 | kn->flags = flags | KERNFS_REMOVED; | 538 | kn->flags = flags; |
360 | 539 | ||
361 | return kn; | 540 | return kn; |
362 | 541 | ||
@@ -382,69 +561,44 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | |||
382 | } | 561 | } |
383 | 562 | ||
384 | /** | 563 | /** |
385 | * kernfs_addrm_start - prepare for kernfs_node add/remove | ||
386 | * @acxt: pointer to kernfs_addrm_cxt to be used | ||
387 | * | ||
388 | * This function is called when the caller is about to add or remove | ||
389 | * kernfs_node. This function acquires kernfs_mutex. @acxt is used | ||
390 | * to keep and pass context to other addrm functions. | ||
391 | * | ||
392 | * LOCKING: | ||
393 | * Kernel thread context (may sleep). kernfs_mutex is locked on | ||
394 | * return. | ||
395 | */ | ||
396 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt) | ||
397 | __acquires(kernfs_mutex) | ||
398 | { | ||
399 | memset(acxt, 0, sizeof(*acxt)); | ||
400 | |||
401 | mutex_lock(&kernfs_mutex); | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * kernfs_add_one - add kernfs_node to parent without warning | 564 | * kernfs_add_one - add kernfs_node to parent without warning |
406 | * @acxt: addrm context to use | ||
407 | * @kn: kernfs_node to be added | 565 | * @kn: kernfs_node to be added |
408 | * | 566 | * |
409 | * The caller must already have initialized @kn->parent. This | 567 | * The caller must already have initialized @kn->parent. This |
410 | * function increments nlink of the parent's inode if @kn is a | 568 | * function increments nlink of the parent's inode if @kn is a |
411 | * directory and link into the children list of the parent. | 569 | * directory and link into the children list of the parent. |
412 | * | 570 | * |
413 | * This function should be called between calls to | ||
414 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be passed | ||
415 | * the same @acxt as passed to kernfs_addrm_start(). | ||
416 | * | ||
417 | * LOCKING: | ||
418 | * Determined by kernfs_addrm_start(). | ||
419 | * | ||
420 | * RETURNS: | 571 | * RETURNS: |
421 | * 0 on success, -EEXIST if entry with the given name already | 572 | * 0 on success, -EEXIST if entry with the given name already |
422 | * exists. | 573 | * exists. |
423 | */ | 574 | */ |
424 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) | 575 | int kernfs_add_one(struct kernfs_node *kn) |
425 | { | 576 | { |
426 | struct kernfs_node *parent = kn->parent; | 577 | struct kernfs_node *parent = kn->parent; |
427 | bool has_ns = kernfs_ns_enabled(parent); | ||
428 | struct kernfs_iattrs *ps_iattr; | 578 | struct kernfs_iattrs *ps_iattr; |
579 | bool has_ns; | ||
429 | int ret; | 580 | int ret; |
430 | 581 | ||
431 | if (has_ns != (bool)kn->ns) { | 582 | mutex_lock(&kernfs_mutex); |
432 | WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", | 583 | |
433 | has_ns ? "required" : "invalid", parent->name, kn->name); | 584 | ret = -EINVAL; |
434 | return -EINVAL; | 585 | has_ns = kernfs_ns_enabled(parent); |
435 | } | 586 | if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", |
587 | has_ns ? "required" : "invalid", parent->name, kn->name)) | ||
588 | goto out_unlock; | ||
436 | 589 | ||
437 | if (kernfs_type(parent) != KERNFS_DIR) | 590 | if (kernfs_type(parent) != KERNFS_DIR) |
438 | return -EINVAL; | 591 | goto out_unlock; |
439 | 592 | ||
440 | if (parent->flags & KERNFS_REMOVED) | 593 | ret = -ENOENT; |
441 | return -ENOENT; | 594 | if ((parent->flags & KERNFS_ACTIVATED) && !kernfs_active(parent)) |
595 | goto out_unlock; | ||
442 | 596 | ||
443 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | 597 | kn->hash = kernfs_name_hash(kn->name, kn->ns); |
444 | 598 | ||
445 | ret = kernfs_link_sibling(kn); | 599 | ret = kernfs_link_sibling(kn); |
446 | if (ret) | 600 | if (ret) |
447 | return ret; | 601 | goto out_unlock; |
448 | 602 | ||
449 | /* Update timestamps on the parent */ | 603 | /* Update timestamps on the parent */ |
450 | ps_iattr = parent->iattr; | 604 | ps_iattr = parent->iattr; |
@@ -453,82 +607,22 @@ int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn) | |||
453 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; | 607 | ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; |
454 | } | 608 | } |
455 | 609 | ||
456 | /* Mark the entry added into directory tree */ | 610 | mutex_unlock(&kernfs_mutex); |
457 | kn->flags &= ~KERNFS_REMOVED; | ||
458 | |||
459 | return 0; | ||
460 | } | ||
461 | |||
462 | /** | ||
463 | * kernfs_remove_one - remove kernfs_node from parent | ||
464 | * @acxt: addrm context to use | ||
465 | * @kn: kernfs_node to be removed | ||
466 | * | ||
467 | * Mark @kn removed and drop nlink of parent inode if @kn is a | ||
468 | * directory. @kn is unlinked from the children list. | ||
469 | * | ||
470 | * This function should be called between calls to | ||
471 | * kernfs_addrm_start() and kernfs_addrm_finish() and should be | ||
472 | * passed the same @acxt as passed to kernfs_addrm_start(). | ||
473 | * | ||
474 | * LOCKING: | ||
475 | * Determined by kernfs_addrm_start(). | ||
476 | */ | ||
477 | static void kernfs_remove_one(struct kernfs_addrm_cxt *acxt, | ||
478 | struct kernfs_node *kn) | ||
479 | { | ||
480 | struct kernfs_iattrs *ps_iattr; | ||
481 | 611 | ||
482 | /* | 612 | /* |
483 | * Removal can be called multiple times on the same node. Only the | 613 | * Activate the new node unless CREATE_DEACTIVATED is requested. |
484 | * first invocation is effective and puts the base ref. | 614 | * If not activated here, the kernfs user is responsible for |
615 | * activating the node with kernfs_activate(). A node which hasn't | ||
616 | * been activated is not visible to userland and its removal won't | ||
617 | * trigger deactivation. | ||
485 | */ | 618 | */ |
486 | if (kn->flags & KERNFS_REMOVED) | 619 | if (!(kernfs_root(kn)->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) |
487 | return; | 620 | kernfs_activate(kn); |
488 | 621 | return 0; | |
489 | if (kn->parent) { | ||
490 | kernfs_unlink_sibling(kn); | ||
491 | |||
492 | /* Update timestamps on the parent */ | ||
493 | ps_iattr = kn->parent->iattr; | ||
494 | if (ps_iattr) { | ||
495 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | ||
496 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | ||
497 | } | ||
498 | } | ||
499 | |||
500 | kn->flags |= KERNFS_REMOVED; | ||
501 | kn->u.removed_list = acxt->removed; | ||
502 | acxt->removed = kn; | ||
503 | } | ||
504 | 622 | ||
505 | /** | 623 | out_unlock: |
506 | * kernfs_addrm_finish - finish up kernfs_node add/remove | ||
507 | * @acxt: addrm context to finish up | ||
508 | * | ||
509 | * Finish up kernfs_node add/remove. Resources acquired by | ||
510 | * kernfs_addrm_start() are released and removed kernfs_nodes are | ||
511 | * cleaned up. | ||
512 | * | ||
513 | * LOCKING: | ||
514 | * kernfs_mutex is released. | ||
515 | */ | ||
516 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt) | ||
517 | __releases(kernfs_mutex) | ||
518 | { | ||
519 | /* release resources acquired by kernfs_addrm_start() */ | ||
520 | mutex_unlock(&kernfs_mutex); | 624 | mutex_unlock(&kernfs_mutex); |
521 | 625 | return ret; | |
522 | /* kill removed kernfs_nodes */ | ||
523 | while (acxt->removed) { | ||
524 | struct kernfs_node *kn = acxt->removed; | ||
525 | |||
526 | acxt->removed = kn->u.removed_list; | ||
527 | |||
528 | kernfs_deactivate(kn); | ||
529 | kernfs_unmap_bin_file(kn); | ||
530 | kernfs_put(kn); | ||
531 | } | ||
532 | } | 626 | } |
533 | 627 | ||
534 | /** | 628 | /** |
@@ -599,13 +693,15 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns); | |||
599 | 693 | ||
600 | /** | 694 | /** |
601 | * kernfs_create_root - create a new kernfs hierarchy | 695 | * kernfs_create_root - create a new kernfs hierarchy |
602 | * @kdops: optional directory syscall operations for the hierarchy | 696 | * @scops: optional syscall operations for the hierarchy |
697 | * @flags: KERNFS_ROOT_* flags | ||
603 | * @priv: opaque data associated with the new directory | 698 | * @priv: opaque data associated with the new directory |
604 | * | 699 | * |
605 | * Returns the root of the new hierarchy on success, ERR_PTR() value on | 700 | * Returns the root of the new hierarchy on success, ERR_PTR() value on |
606 | * failure. | 701 | * failure. |
607 | */ | 702 | */ |
608 | struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) | 703 | struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops, |
704 | unsigned int flags, void *priv) | ||
609 | { | 705 | { |
610 | struct kernfs_root *root; | 706 | struct kernfs_root *root; |
611 | struct kernfs_node *kn; | 707 | struct kernfs_node *kn; |
@@ -624,12 +720,16 @@ struct kernfs_root *kernfs_create_root(struct kernfs_dir_ops *kdops, void *priv) | |||
624 | return ERR_PTR(-ENOMEM); | 720 | return ERR_PTR(-ENOMEM); |
625 | } | 721 | } |
626 | 722 | ||
627 | kn->flags &= ~KERNFS_REMOVED; | ||
628 | kn->priv = priv; | 723 | kn->priv = priv; |
629 | kn->dir.root = root; | 724 | kn->dir.root = root; |
630 | 725 | ||
631 | root->dir_ops = kdops; | 726 | root->syscall_ops = scops; |
727 | root->flags = flags; | ||
632 | root->kn = kn; | 728 | root->kn = kn; |
729 | init_waitqueue_head(&root->deactivate_waitq); | ||
730 | |||
731 | if (!(root->flags & KERNFS_ROOT_CREATE_DEACTIVATED)) | ||
732 | kernfs_activate(kn); | ||
633 | 733 | ||
634 | return root; | 734 | return root; |
635 | } | 735 | } |
@@ -660,7 +760,6 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, | |||
660 | const char *name, umode_t mode, | 760 | const char *name, umode_t mode, |
661 | void *priv, const void *ns) | 761 | void *priv, const void *ns) |
662 | { | 762 | { |
663 | struct kernfs_addrm_cxt acxt; | ||
664 | struct kernfs_node *kn; | 763 | struct kernfs_node *kn; |
665 | int rc; | 764 | int rc; |
666 | 765 | ||
@@ -674,10 +773,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent, | |||
674 | kn->priv = priv; | 773 | kn->priv = priv; |
675 | 774 | ||
676 | /* link in */ | 775 | /* link in */ |
677 | kernfs_addrm_start(&acxt); | 776 | rc = kernfs_add_one(kn); |
678 | rc = kernfs_add_one(&acxt, kn); | ||
679 | kernfs_addrm_finish(&acxt); | ||
680 | |||
681 | if (!rc) | 777 | if (!rc) |
682 | return kn; | 778 | return kn; |
683 | 779 | ||
@@ -703,7 +799,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir, | |||
703 | kn = kernfs_find_ns(parent, dentry->d_name.name, ns); | 799 | kn = kernfs_find_ns(parent, dentry->d_name.name, ns); |
704 | 800 | ||
705 | /* no such entry */ | 801 | /* no such entry */ |
706 | if (!kn) { | 802 | if (!kn || !kernfs_active(kn)) { |
707 | ret = NULL; | 803 | ret = NULL; |
708 | goto out_unlock; | 804 | goto out_unlock; |
709 | } | 805 | } |
@@ -728,23 +824,37 @@ static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry, | |||
728 | umode_t mode) | 824 | umode_t mode) |
729 | { | 825 | { |
730 | struct kernfs_node *parent = dir->i_private; | 826 | struct kernfs_node *parent = dir->i_private; |
731 | struct kernfs_dir_ops *kdops = kernfs_root(parent)->dir_ops; | 827 | struct kernfs_syscall_ops *scops = kernfs_root(parent)->syscall_ops; |
828 | int ret; | ||
732 | 829 | ||
733 | if (!kdops || !kdops->mkdir) | 830 | if (!scops || !scops->mkdir) |
734 | return -EPERM; | 831 | return -EPERM; |
735 | 832 | ||
736 | return kdops->mkdir(parent, dentry->d_name.name, mode); | 833 | if (!kernfs_get_active(parent)) |
834 | return -ENODEV; | ||
835 | |||
836 | ret = scops->mkdir(parent, dentry->d_name.name, mode); | ||
837 | |||
838 | kernfs_put_active(parent); | ||
839 | return ret; | ||
737 | } | 840 | } |
738 | 841 | ||
739 | static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) | 842 | static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry) |
740 | { | 843 | { |
741 | struct kernfs_node *kn = dentry->d_fsdata; | 844 | struct kernfs_node *kn = dentry->d_fsdata; |
742 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | 845 | struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; |
846 | int ret; | ||
743 | 847 | ||
744 | if (!kdops || !kdops->rmdir) | 848 | if (!scops || !scops->rmdir) |
745 | return -EPERM; | 849 | return -EPERM; |
746 | 850 | ||
747 | return kdops->rmdir(kn); | 851 | if (!kernfs_get_active(kn)) |
852 | return -ENODEV; | ||
853 | |||
854 | ret = scops->rmdir(kn); | ||
855 | |||
856 | kernfs_put_active(kn); | ||
857 | return ret; | ||
748 | } | 858 | } |
749 | 859 | ||
750 | static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, | 860 | static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, |
@@ -752,12 +862,25 @@ static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
752 | { | 862 | { |
753 | struct kernfs_node *kn = old_dentry->d_fsdata; | 863 | struct kernfs_node *kn = old_dentry->d_fsdata; |
754 | struct kernfs_node *new_parent = new_dir->i_private; | 864 | struct kernfs_node *new_parent = new_dir->i_private; |
755 | struct kernfs_dir_ops *kdops = kernfs_root(kn)->dir_ops; | 865 | struct kernfs_syscall_ops *scops = kernfs_root(kn)->syscall_ops; |
866 | int ret; | ||
756 | 867 | ||
757 | if (!kdops || !kdops->rename) | 868 | if (!scops || !scops->rename) |
758 | return -EPERM; | 869 | return -EPERM; |
759 | 870 | ||
760 | return kdops->rename(kn, new_parent, new_dentry->d_name.name); | 871 | if (!kernfs_get_active(kn)) |
872 | return -ENODEV; | ||
873 | |||
874 | if (!kernfs_get_active(new_parent)) { | ||
875 | kernfs_put_active(kn); | ||
876 | return -ENODEV; | ||
877 | } | ||
878 | |||
879 | ret = scops->rename(kn, new_parent, new_dentry->d_name.name); | ||
880 | |||
881 | kernfs_put_active(new_parent); | ||
882 | kernfs_put_active(kn); | ||
883 | return ret; | ||
761 | } | 884 | } |
762 | 885 | ||
763 | const struct inode_operations kernfs_dir_iops = { | 886 | const struct inode_operations kernfs_dir_iops = { |
@@ -830,23 +953,104 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, | |||
830 | return pos->parent; | 953 | return pos->parent; |
831 | } | 954 | } |
832 | 955 | ||
833 | static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, | 956 | /** |
834 | struct kernfs_node *kn) | 957 | * kernfs_activate - activate a node which started deactivated |
958 | * @kn: kernfs_node whose subtree is to be activated | ||
959 | * | ||
960 | * If the root has KERNFS_ROOT_CREATE_DEACTIVATED set, a newly created node | ||
961 | * needs to be explicitly activated. A node which hasn't been activated | ||
962 | * isn't visible to userland and deactivation is skipped during its | ||
963 | * removal. This is useful to construct atomic init sequences where | ||
964 | * creation of multiple nodes should either succeed or fail atomically. | ||
965 | * | ||
966 | * The caller is responsible for ensuring that this function is not called | ||
967 | * after kernfs_remove*() is invoked on @kn. | ||
968 | */ | ||
969 | void kernfs_activate(struct kernfs_node *kn) | ||
835 | { | 970 | { |
836 | struct kernfs_node *pos, *next; | 971 | struct kernfs_node *pos; |
837 | 972 | ||
838 | if (!kn) | 973 | mutex_lock(&kernfs_mutex); |
974 | |||
975 | pos = NULL; | ||
976 | while ((pos = kernfs_next_descendant_post(pos, kn))) { | ||
977 | if (!pos || (pos->flags & KERNFS_ACTIVATED)) | ||
978 | continue; | ||
979 | |||
980 | WARN_ON_ONCE(pos->parent && RB_EMPTY_NODE(&pos->rb)); | ||
981 | WARN_ON_ONCE(atomic_read(&pos->active) != KN_DEACTIVATED_BIAS); | ||
982 | |||
983 | atomic_sub(KN_DEACTIVATED_BIAS, &pos->active); | ||
984 | pos->flags |= KERNFS_ACTIVATED; | ||
985 | } | ||
986 | |||
987 | mutex_unlock(&kernfs_mutex); | ||
988 | } | ||
989 | |||
990 | static void __kernfs_remove(struct kernfs_node *kn) | ||
991 | { | ||
992 | struct kernfs_node *pos; | ||
993 | |||
994 | lockdep_assert_held(&kernfs_mutex); | ||
995 | |||
996 | /* | ||
997 | * Short-circuit if non-root @kn has already finished removal. | ||
998 | * This is for kernfs_remove_self() which plays with active ref | ||
999 | * after removal. | ||
1000 | */ | ||
1001 | if (!kn || (kn->parent && RB_EMPTY_NODE(&kn->rb))) | ||
839 | return; | 1002 | return; |
840 | 1003 | ||
841 | pr_debug("kernfs %s: removing\n", kn->name); | 1004 | pr_debug("kernfs %s: removing\n", kn->name); |
842 | 1005 | ||
843 | next = NULL; | 1006 | /* prevent any new usage under @kn by deactivating all nodes */ |
1007 | pos = NULL; | ||
1008 | while ((pos = kernfs_next_descendant_post(pos, kn))) | ||
1009 | if (kernfs_active(pos)) | ||
1010 | atomic_add(KN_DEACTIVATED_BIAS, &pos->active); | ||
1011 | |||
1012 | /* deactivate and unlink the subtree node-by-node */ | ||
844 | do { | 1013 | do { |
845 | pos = next; | 1014 | pos = kernfs_leftmost_descendant(kn); |
846 | next = kernfs_next_descendant_post(pos, kn); | 1015 | |
847 | if (pos) | 1016 | /* |
848 | kernfs_remove_one(acxt, pos); | 1017 | * kernfs_drain() drops kernfs_mutex temporarily and @pos's |
849 | } while (next); | 1018 | * base ref could have been put by someone else by the time |
1019 | * the function returns. Make sure it doesn't go away | ||
1020 | * underneath us. | ||
1021 | */ | ||
1022 | kernfs_get(pos); | ||
1023 | |||
1024 | /* | ||
1025 | * Drain iff @kn was activated. This avoids draining and | ||
1026 | * its lockdep annotations for nodes which have never been | ||
1027 | * activated and allows embedding kernfs_remove() in create | ||
1028 | * error paths without worrying about draining. | ||
1029 | */ | ||
1030 | if (kn->flags & KERNFS_ACTIVATED) | ||
1031 | kernfs_drain(pos); | ||
1032 | else | ||
1033 | WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS); | ||
1034 | |||
1035 | /* | ||
1036 | * kernfs_unlink_sibling() succeeds once per node. Use it | ||
1037 | * to decide who's responsible for cleanups. | ||
1038 | */ | ||
1039 | if (!pos->parent || kernfs_unlink_sibling(pos)) { | ||
1040 | struct kernfs_iattrs *ps_iattr = | ||
1041 | pos->parent ? pos->parent->iattr : NULL; | ||
1042 | |||
1043 | /* update timestamps on the parent */ | ||
1044 | if (ps_iattr) { | ||
1045 | ps_iattr->ia_iattr.ia_ctime = CURRENT_TIME; | ||
1046 | ps_iattr->ia_iattr.ia_mtime = CURRENT_TIME; | ||
1047 | } | ||
1048 | |||
1049 | kernfs_put(pos); | ||
1050 | } | ||
1051 | |||
1052 | kernfs_put(pos); | ||
1053 | } while (pos != kn); | ||
850 | } | 1054 | } |
851 | 1055 | ||
852 | /** | 1056 | /** |
@@ -857,11 +1061,140 @@ static void __kernfs_remove(struct kernfs_addrm_cxt *acxt, | |||
857 | */ | 1061 | */ |
858 | void kernfs_remove(struct kernfs_node *kn) | 1062 | void kernfs_remove(struct kernfs_node *kn) |
859 | { | 1063 | { |
860 | struct kernfs_addrm_cxt acxt; | 1064 | mutex_lock(&kernfs_mutex); |
1065 | __kernfs_remove(kn); | ||
1066 | mutex_unlock(&kernfs_mutex); | ||
1067 | } | ||
861 | 1068 | ||
862 | kernfs_addrm_start(&acxt); | 1069 | /** |
863 | __kernfs_remove(&acxt, kn); | 1070 | * kernfs_break_active_protection - break out of active protection |
864 | kernfs_addrm_finish(&acxt); | 1071 | * @kn: the self kernfs_node |
1072 | * | ||
1073 | * The caller must be running off of a kernfs operation which is invoked | ||
1074 | * with an active reference - e.g. one of kernfs_ops. Each invocation of | ||
1075 | * this function must also be matched with an invocation of | ||
1076 | * kernfs_unbreak_active_protection(). | ||
1077 | * | ||
1078 | * This function releases the active reference of @kn the caller is | ||
1079 | * holding. Once this function is called, @kn may be removed at any point | ||
1080 | * and the caller is solely responsible for ensuring that the objects it | ||
1081 | * dereferences are accessible. | ||
1082 | */ | ||
1083 | void kernfs_break_active_protection(struct kernfs_node *kn) | ||
1084 | { | ||
1085 | /* | ||
1086 | * Take out ourself out of the active ref dependency chain. If | ||
1087 | * we're called without an active ref, lockdep will complain. | ||
1088 | */ | ||
1089 | kernfs_put_active(kn); | ||
1090 | } | ||
1091 | |||
1092 | /** | ||
1093 | * kernfs_unbreak_active_protection - undo kernfs_break_active_protection() | ||
1094 | * @kn: the self kernfs_node | ||
1095 | * | ||
1096 | * If kernfs_break_active_protection() was called, this function must be | ||
1097 | * invoked before finishing the kernfs operation. Note that while this | ||
1098 | * function restores the active reference, it doesn't and can't actually | ||
1099 | * restore the active protection - @kn may already or be in the process of | ||
1100 | * being removed. Once kernfs_break_active_protection() is invoked, that | ||
1101 | * protection is irreversibly gone for the kernfs operation instance. | ||
1102 | * | ||
1103 | * While this function may be called at any point after | ||
1104 | * kernfs_break_active_protection() is invoked, its most useful location | ||
1105 | * would be right before the enclosing kernfs operation returns. | ||
1106 | */ | ||
1107 | void kernfs_unbreak_active_protection(struct kernfs_node *kn) | ||
1108 | { | ||
1109 | /* | ||
1110 | * @kn->active could be in any state; however, the increment we do | ||
1111 | * here will be undone as soon as the enclosing kernfs operation | ||
1112 | * finishes and this temporary bump can't break anything. If @kn | ||
1113 | * is alive, nothing changes. If @kn is being deactivated, the | ||
1114 | * soon-to-follow put will either finish deactivation or restore | ||
1115 | * deactivated state. If @kn is already removed, the temporary | ||
1116 | * bump is guaranteed to be gone before @kn is released. | ||
1117 | */ | ||
1118 | atomic_inc(&kn->active); | ||
1119 | if (kernfs_lockdep(kn)) | ||
1120 | rwsem_acquire(&kn->dep_map, 0, 1, _RET_IP_); | ||
1121 | } | ||
1122 | |||
1123 | /** | ||
1124 | * kernfs_remove_self - remove a kernfs_node from its own method | ||
1125 | * @kn: the self kernfs_node to remove | ||
1126 | * | ||
1127 | * The caller must be running off of a kernfs operation which is invoked | ||
1128 | * with an active reference - e.g. one of kernfs_ops. This can be used to | ||
1129 | * implement a file operation which deletes itself. | ||
1130 | * | ||
1131 | * For example, the "delete" file for a sysfs device directory can be | ||
1132 | * implemented by invoking kernfs_remove_self() on the "delete" file | ||
1133 | * itself. This function breaks the circular dependency of trying to | ||
1134 | * deactivate self while holding an active ref itself. It isn't necessary | ||
1135 | * to modify the usual removal path to use kernfs_remove_self(). The | ||
1136 | * "delete" implementation can simply invoke kernfs_remove_self() on self | ||
1137 | * before proceeding with the usual removal path. kernfs will ignore later | ||
1138 | * kernfs_remove() on self. | ||
1139 | * | ||
1140 | * kernfs_remove_self() can be called multiple times concurrently on the | ||
1141 | * same kernfs_node. Only the first one actually performs removal and | ||
1142 | * returns %true. All others will wait until the kernfs operation which | ||
1143 | * won self-removal finishes and return %false. Note that the losers wait | ||
1144 | * for the completion of not only the winning kernfs_remove_self() but also | ||
1145 | * the whole kernfs_ops which won the arbitration. This can be used to | ||
1146 | * guarantee, for example, all concurrent writes to a "delete" file to | ||
1147 | * finish only after the whole operation is complete. | ||
1148 | */ | ||
1149 | bool kernfs_remove_self(struct kernfs_node *kn) | ||
1150 | { | ||
1151 | bool ret; | ||
1152 | |||
1153 | mutex_lock(&kernfs_mutex); | ||
1154 | kernfs_break_active_protection(kn); | ||
1155 | |||
1156 | /* | ||
1157 | * SUICIDAL is used to arbitrate among competing invocations. Only | ||
1158 | * the first one will actually perform removal. When the removal | ||
1159 | * is complete, SUICIDED is set and the active ref is restored | ||
1160 | * while holding kernfs_mutex. The ones which lost arbitration | ||
1161 | * waits for SUICDED && drained which can happen only after the | ||
1162 | * enclosing kernfs operation which executed the winning instance | ||
1163 | * of kernfs_remove_self() finished. | ||
1164 | */ | ||
1165 | if (!(kn->flags & KERNFS_SUICIDAL)) { | ||
1166 | kn->flags |= KERNFS_SUICIDAL; | ||
1167 | __kernfs_remove(kn); | ||
1168 | kn->flags |= KERNFS_SUICIDED; | ||
1169 | ret = true; | ||
1170 | } else { | ||
1171 | wait_queue_head_t *waitq = &kernfs_root(kn)->deactivate_waitq; | ||
1172 | DEFINE_WAIT(wait); | ||
1173 | |||
1174 | while (true) { | ||
1175 | prepare_to_wait(waitq, &wait, TASK_UNINTERRUPTIBLE); | ||
1176 | |||
1177 | if ((kn->flags & KERNFS_SUICIDED) && | ||
1178 | atomic_read(&kn->active) == KN_DEACTIVATED_BIAS) | ||
1179 | break; | ||
1180 | |||
1181 | mutex_unlock(&kernfs_mutex); | ||
1182 | schedule(); | ||
1183 | mutex_lock(&kernfs_mutex); | ||
1184 | } | ||
1185 | finish_wait(waitq, &wait); | ||
1186 | WARN_ON_ONCE(!RB_EMPTY_NODE(&kn->rb)); | ||
1187 | ret = false; | ||
1188 | } | ||
1189 | |||
1190 | /* | ||
1191 | * This must be done while holding kernfs_mutex; otherwise, waiting | ||
1192 | * for SUICIDED && deactivated could finish prematurely. | ||
1193 | */ | ||
1194 | kernfs_unbreak_active_protection(kn); | ||
1195 | |||
1196 | mutex_unlock(&kernfs_mutex); | ||
1197 | return ret; | ||
865 | } | 1198 | } |
866 | 1199 | ||
867 | /** | 1200 | /** |
@@ -876,7 +1209,6 @@ void kernfs_remove(struct kernfs_node *kn) | |||
876 | int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | 1209 | int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, |
877 | const void *ns) | 1210 | const void *ns) |
878 | { | 1211 | { |
879 | struct kernfs_addrm_cxt acxt; | ||
880 | struct kernfs_node *kn; | 1212 | struct kernfs_node *kn; |
881 | 1213 | ||
882 | if (!parent) { | 1214 | if (!parent) { |
@@ -885,13 +1217,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | |||
885 | return -ENOENT; | 1217 | return -ENOENT; |
886 | } | 1218 | } |
887 | 1219 | ||
888 | kernfs_addrm_start(&acxt); | 1220 | mutex_lock(&kernfs_mutex); |
889 | 1221 | ||
890 | kn = kernfs_find_ns(parent, name, ns); | 1222 | kn = kernfs_find_ns(parent, name, ns); |
891 | if (kn) | 1223 | if (kn) |
892 | __kernfs_remove(&acxt, kn); | 1224 | __kernfs_remove(kn); |
893 | 1225 | ||
894 | kernfs_addrm_finish(&acxt); | 1226 | mutex_unlock(&kernfs_mutex); |
895 | 1227 | ||
896 | if (kn) | 1228 | if (kn) |
897 | return 0; | 1229 | return 0; |
@@ -909,12 +1241,18 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, | |||
909 | int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | 1241 | int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, |
910 | const char *new_name, const void *new_ns) | 1242 | const char *new_name, const void *new_ns) |
911 | { | 1243 | { |
1244 | struct kernfs_node *old_parent; | ||
1245 | const char *old_name = NULL; | ||
912 | int error; | 1246 | int error; |
913 | 1247 | ||
1248 | /* can't move or rename root */ | ||
1249 | if (!kn->parent) | ||
1250 | return -EINVAL; | ||
1251 | |||
914 | mutex_lock(&kernfs_mutex); | 1252 | mutex_lock(&kernfs_mutex); |
915 | 1253 | ||
916 | error = -ENOENT; | 1254 | error = -ENOENT; |
917 | if ((kn->flags | new_parent->flags) & KERNFS_REMOVED) | 1255 | if (!kernfs_active(kn) || !kernfs_active(new_parent)) |
918 | goto out; | 1256 | goto out; |
919 | 1257 | ||
920 | error = 0; | 1258 | error = 0; |
@@ -932,13 +1270,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
932 | new_name = kstrdup(new_name, GFP_KERNEL); | 1270 | new_name = kstrdup(new_name, GFP_KERNEL); |
933 | if (!new_name) | 1271 | if (!new_name) |
934 | goto out; | 1272 | goto out; |
935 | 1273 | } else { | |
936 | if (kn->flags & KERNFS_STATIC_NAME) | 1274 | new_name = NULL; |
937 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
938 | else | ||
939 | kfree(kn->name); | ||
940 | |||
941 | kn->name = new_name; | ||
942 | } | 1275 | } |
943 | 1276 | ||
944 | /* | 1277 | /* |
@@ -946,12 +1279,29 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, | |||
946 | */ | 1279 | */ |
947 | kernfs_unlink_sibling(kn); | 1280 | kernfs_unlink_sibling(kn); |
948 | kernfs_get(new_parent); | 1281 | kernfs_get(new_parent); |
949 | kernfs_put(kn->parent); | 1282 | |
1283 | /* rename_lock protects ->parent and ->name accessors */ | ||
1284 | spin_lock_irq(&kernfs_rename_lock); | ||
1285 | |||
1286 | old_parent = kn->parent; | ||
1287 | kn->parent = new_parent; | ||
1288 | |||
950 | kn->ns = new_ns; | 1289 | kn->ns = new_ns; |
1290 | if (new_name) { | ||
1291 | if (!(kn->flags & KERNFS_STATIC_NAME)) | ||
1292 | old_name = kn->name; | ||
1293 | kn->flags &= ~KERNFS_STATIC_NAME; | ||
1294 | kn->name = new_name; | ||
1295 | } | ||
1296 | |||
1297 | spin_unlock_irq(&kernfs_rename_lock); | ||
1298 | |||
951 | kn->hash = kernfs_name_hash(kn->name, kn->ns); | 1299 | kn->hash = kernfs_name_hash(kn->name, kn->ns); |
952 | kn->parent = new_parent; | ||
953 | kernfs_link_sibling(kn); | 1300 | kernfs_link_sibling(kn); |
954 | 1301 | ||
1302 | kernfs_put(old_parent); | ||
1303 | kfree(old_name); | ||
1304 | |||
955 | error = 0; | 1305 | error = 0; |
956 | out: | 1306 | out: |
957 | mutex_unlock(&kernfs_mutex); | 1307 | mutex_unlock(&kernfs_mutex); |
@@ -974,7 +1324,7 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, | |||
974 | struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) | 1324 | struct kernfs_node *parent, loff_t hash, struct kernfs_node *pos) |
975 | { | 1325 | { |
976 | if (pos) { | 1326 | if (pos) { |
977 | int valid = !(pos->flags & KERNFS_REMOVED) && | 1327 | int valid = kernfs_active(pos) && |
978 | pos->parent == parent && hash == pos->hash; | 1328 | pos->parent == parent && hash == pos->hash; |
979 | kernfs_put(pos); | 1329 | kernfs_put(pos); |
980 | if (!valid) | 1330 | if (!valid) |
@@ -993,8 +1343,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns, | |||
993 | break; | 1343 | break; |
994 | } | 1344 | } |
995 | } | 1345 | } |
996 | /* Skip over entries in the wrong namespace */ | 1346 | /* Skip over entries which are dying/dead or in the wrong namespace */ |
997 | while (pos && pos->ns != ns) { | 1347 | while (pos && (!kernfs_active(pos) || pos->ns != ns)) { |
998 | struct rb_node *node = rb_next(&pos->rb); | 1348 | struct rb_node *node = rb_next(&pos->rb); |
999 | if (!node) | 1349 | if (!node) |
1000 | pos = NULL; | 1350 | pos = NULL; |
@@ -1008,14 +1358,15 @@ static struct kernfs_node *kernfs_dir_next_pos(const void *ns, | |||
1008 | struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) | 1358 | struct kernfs_node *parent, ino_t ino, struct kernfs_node *pos) |
1009 | { | 1359 | { |
1010 | pos = kernfs_dir_pos(ns, parent, ino, pos); | 1360 | pos = kernfs_dir_pos(ns, parent, ino, pos); |
1011 | if (pos) | 1361 | if (pos) { |
1012 | do { | 1362 | do { |
1013 | struct rb_node *node = rb_next(&pos->rb); | 1363 | struct rb_node *node = rb_next(&pos->rb); |
1014 | if (!node) | 1364 | if (!node) |
1015 | pos = NULL; | 1365 | pos = NULL; |
1016 | else | 1366 | else |
1017 | pos = rb_to_kn(node); | 1367 | pos = rb_to_kn(node); |
1018 | } while (pos && pos->ns != ns); | 1368 | } while (pos && (!kernfs_active(pos) || pos->ns != ns)); |
1369 | } | ||
1019 | return pos; | 1370 | return pos; |
1020 | } | 1371 | } |
1021 | 1372 | ||
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index dbf397bfdff2..8034706a7af8 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c | |||
@@ -252,10 +252,18 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf, | |||
252 | size_t count, loff_t *ppos) | 252 | size_t count, loff_t *ppos) |
253 | { | 253 | { |
254 | struct kernfs_open_file *of = kernfs_of(file); | 254 | struct kernfs_open_file *of = kernfs_of(file); |
255 | ssize_t len = min_t(size_t, count, PAGE_SIZE); | ||
256 | const struct kernfs_ops *ops; | 255 | const struct kernfs_ops *ops; |
256 | size_t len; | ||
257 | char *buf; | 257 | char *buf; |
258 | 258 | ||
259 | if (of->atomic_write_len) { | ||
260 | len = count; | ||
261 | if (len > of->atomic_write_len) | ||
262 | return -E2BIG; | ||
263 | } else { | ||
264 | len = min_t(size_t, count, PAGE_SIZE); | ||
265 | } | ||
266 | |||
259 | buf = kmalloc(len + 1, GFP_KERNEL); | 267 | buf = kmalloc(len + 1, GFP_KERNEL); |
260 | if (!buf) | 268 | if (!buf) |
261 | return -ENOMEM; | 269 | return -ENOMEM; |
@@ -653,6 +661,12 @@ static int kernfs_fop_open(struct inode *inode, struct file *file) | |||
653 | of->file = file; | 661 | of->file = file; |
654 | 662 | ||
655 | /* | 663 | /* |
664 | * Write path needs to atomic_write_len outside active reference. | ||
665 | * Cache it in open_file. See kernfs_fop_write() for details. | ||
666 | */ | ||
667 | of->atomic_write_len = ops->atomic_write_len; | ||
668 | |||
669 | /* | ||
656 | * Always instantiate seq_file even if read access doesn't use | 670 | * Always instantiate seq_file even if read access doesn't use |
657 | * seq_file or is not requested. This unifies private data access | 671 | * seq_file or is not requested. This unifies private data access |
658 | * and readable regular files are the vast majority anyway. | 672 | * and readable regular files are the vast majority anyway. |
@@ -820,7 +834,6 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
820 | bool name_is_static, | 834 | bool name_is_static, |
821 | struct lock_class_key *key) | 835 | struct lock_class_key *key) |
822 | { | 836 | { |
823 | struct kernfs_addrm_cxt acxt; | ||
824 | struct kernfs_node *kn; | 837 | struct kernfs_node *kn; |
825 | unsigned flags; | 838 | unsigned flags; |
826 | int rc; | 839 | int rc; |
@@ -855,10 +868,7 @@ struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent, | |||
855 | if (ops->mmap) | 868 | if (ops->mmap) |
856 | kn->flags |= KERNFS_HAS_MMAP; | 869 | kn->flags |= KERNFS_HAS_MMAP; |
857 | 870 | ||
858 | kernfs_addrm_start(&acxt); | 871 | rc = kernfs_add_one(kn); |
859 | rc = kernfs_add_one(&acxt, kn); | ||
860 | kernfs_addrm_finish(&acxt); | ||
861 | |||
862 | if (rc) { | 872 | if (rc) { |
863 | kernfs_put(kn); | 873 | kernfs_put(kn); |
864 | return ERR_PTR(rc); | 874 | return ERR_PTR(rc); |
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index e55126f85bd2..abb0f1f53d93 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c | |||
@@ -355,7 +355,7 @@ void kernfs_evict_inode(struct inode *inode) | |||
355 | { | 355 | { |
356 | struct kernfs_node *kn = inode->i_private; | 356 | struct kernfs_node *kn = inode->i_private; |
357 | 357 | ||
358 | truncate_inode_pages(&inode->i_data, 0); | 358 | truncate_inode_pages_final(&inode->i_data); |
359 | clear_inode(inode); | 359 | clear_inode(inode); |
360 | kernfs_put(kn); | 360 | kernfs_put(kn); |
361 | } | 361 | } |
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index eb536b76374a..8be13b2a079b 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h | |||
@@ -26,7 +26,8 @@ struct kernfs_iattrs { | |||
26 | struct simple_xattrs xattrs; | 26 | struct simple_xattrs xattrs; |
27 | }; | 27 | }; |
28 | 28 | ||
29 | #define KN_DEACTIVATED_BIAS INT_MIN | 29 | /* +1 to avoid triggering overflow warning when negating it */ |
30 | #define KN_DEACTIVATED_BIAS (INT_MIN + 1) | ||
30 | 31 | ||
31 | /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ | 32 | /* KERNFS_TYPE_MASK and types are defined in include/linux/kernfs.h */ |
32 | 33 | ||
@@ -45,13 +46,6 @@ static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn) | |||
45 | } | 46 | } |
46 | 47 | ||
47 | /* | 48 | /* |
48 | * Context structure to be used while adding/removing nodes. | ||
49 | */ | ||
50 | struct kernfs_addrm_cxt { | ||
51 | struct kernfs_node *removed; | ||
52 | }; | ||
53 | |||
54 | /* | ||
55 | * mount.c | 49 | * mount.c |
56 | */ | 50 | */ |
57 | struct kernfs_super_info { | 51 | struct kernfs_super_info { |
@@ -71,6 +65,7 @@ struct kernfs_super_info { | |||
71 | }; | 65 | }; |
72 | #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) | 66 | #define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info)) |
73 | 67 | ||
68 | extern const struct super_operations kernfs_sops; | ||
74 | extern struct kmem_cache *kernfs_node_cache; | 69 | extern struct kmem_cache *kernfs_node_cache; |
75 | 70 | ||
76 | /* | 71 | /* |
@@ -100,9 +95,7 @@ extern const struct inode_operations kernfs_dir_iops; | |||
100 | 95 | ||
101 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); | 96 | struct kernfs_node *kernfs_get_active(struct kernfs_node *kn); |
102 | void kernfs_put_active(struct kernfs_node *kn); | 97 | void kernfs_put_active(struct kernfs_node *kn); |
103 | void kernfs_addrm_start(struct kernfs_addrm_cxt *acxt); | 98 | int kernfs_add_one(struct kernfs_node *kn); |
104 | int kernfs_add_one(struct kernfs_addrm_cxt *acxt, struct kernfs_node *kn); | ||
105 | void kernfs_addrm_finish(struct kernfs_addrm_cxt *acxt); | ||
106 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, | 99 | struct kernfs_node *kernfs_new_node(struct kernfs_node *parent, |
107 | const char *name, umode_t mode, | 100 | const char *name, umode_t mode, |
108 | unsigned flags); | 101 | unsigned flags); |
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c index 0f4152defe7b..6a5f04ac8704 100644 --- a/fs/kernfs/mount.c +++ b/fs/kernfs/mount.c | |||
@@ -19,12 +19,49 @@ | |||
19 | 19 | ||
20 | struct kmem_cache *kernfs_node_cache; | 20 | struct kmem_cache *kernfs_node_cache; |
21 | 21 | ||
22 | static const struct super_operations kernfs_sops = { | 22 | static int kernfs_sop_remount_fs(struct super_block *sb, int *flags, char *data) |
23 | { | ||
24 | struct kernfs_root *root = kernfs_info(sb)->root; | ||
25 | struct kernfs_syscall_ops *scops = root->syscall_ops; | ||
26 | |||
27 | if (scops && scops->remount_fs) | ||
28 | return scops->remount_fs(root, flags, data); | ||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry) | ||
33 | { | ||
34 | struct kernfs_root *root = kernfs_root(dentry->d_fsdata); | ||
35 | struct kernfs_syscall_ops *scops = root->syscall_ops; | ||
36 | |||
37 | if (scops && scops->show_options) | ||
38 | return scops->show_options(sf, root); | ||
39 | return 0; | ||
40 | } | ||
41 | |||
42 | const struct super_operations kernfs_sops = { | ||
23 | .statfs = simple_statfs, | 43 | .statfs = simple_statfs, |
24 | .drop_inode = generic_delete_inode, | 44 | .drop_inode = generic_delete_inode, |
25 | .evict_inode = kernfs_evict_inode, | 45 | .evict_inode = kernfs_evict_inode, |
46 | |||
47 | .remount_fs = kernfs_sop_remount_fs, | ||
48 | .show_options = kernfs_sop_show_options, | ||
26 | }; | 49 | }; |
27 | 50 | ||
51 | /** | ||
52 | * kernfs_root_from_sb - determine kernfs_root associated with a super_block | ||
53 | * @sb: the super_block in question | ||
54 | * | ||
55 | * Return the kernfs_root associated with @sb. If @sb is not a kernfs one, | ||
56 | * %NULL is returned. | ||
57 | */ | ||
58 | struct kernfs_root *kernfs_root_from_sb(struct super_block *sb) | ||
59 | { | ||
60 | if (sb->s_op == &kernfs_sops) | ||
61 | return kernfs_info(sb)->root; | ||
62 | return NULL; | ||
63 | } | ||
64 | |||
28 | static int kernfs_fill_super(struct super_block *sb) | 65 | static int kernfs_fill_super(struct super_block *sb) |
29 | { | 66 | { |
30 | struct kernfs_super_info *info = kernfs_info(sb); | 67 | struct kernfs_super_info *info = kernfs_info(sb); |
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 4d457055acb9..8a198898e39a 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c | |||
@@ -27,7 +27,6 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, | |||
27 | struct kernfs_node *target) | 27 | struct kernfs_node *target) |
28 | { | 28 | { |
29 | struct kernfs_node *kn; | 29 | struct kernfs_node *kn; |
30 | struct kernfs_addrm_cxt acxt; | ||
31 | int error; | 30 | int error; |
32 | 31 | ||
33 | kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); | 32 | kn = kernfs_new_node(parent, name, S_IFLNK|S_IRWXUGO, KERNFS_LINK); |
@@ -39,10 +38,7 @@ struct kernfs_node *kernfs_create_link(struct kernfs_node *parent, | |||
39 | kn->symlink.target_kn = target; | 38 | kn->symlink.target_kn = target; |
40 | kernfs_get(target); /* ref owned by symlink */ | 39 | kernfs_get(target); /* ref owned by symlink */ |
41 | 40 | ||
42 | kernfs_addrm_start(&acxt); | 41 | error = kernfs_add_one(kn); |
43 | error = kernfs_add_one(&acxt, kn); | ||
44 | kernfs_addrm_finish(&acxt); | ||
45 | |||
46 | if (!error) | 42 | if (!error) |
47 | return kn; | 43 | return kn; |
48 | 44 | ||
diff --git a/fs/locks.c b/fs/locks.c index 92a0f0a52b06..13fc7a6d380a 100644 --- a/fs/locks.c +++ b/fs/locks.c | |||
@@ -135,6 +135,7 @@ | |||
135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) | 135 | #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) |
136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) | 136 | #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) |
137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) | 137 | #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) |
138 | #define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) | ||
138 | 139 | ||
139 | static bool lease_breaking(struct file_lock *fl) | 140 | static bool lease_breaking(struct file_lock *fl) |
140 | { | 141 | { |
@@ -344,48 +345,43 @@ static int assign_type(struct file_lock *fl, long type) | |||
344 | return 0; | 345 | return 0; |
345 | } | 346 | } |
346 | 347 | ||
347 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | 348 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, |
348 | * style lock. | 349 | struct flock64 *l) |
349 | */ | ||
350 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | ||
351 | struct flock *l) | ||
352 | { | 350 | { |
353 | off_t start, end; | ||
354 | |||
355 | switch (l->l_whence) { | 351 | switch (l->l_whence) { |
356 | case SEEK_SET: | 352 | case SEEK_SET: |
357 | start = 0; | 353 | fl->fl_start = 0; |
358 | break; | 354 | break; |
359 | case SEEK_CUR: | 355 | case SEEK_CUR: |
360 | start = filp->f_pos; | 356 | fl->fl_start = filp->f_pos; |
361 | break; | 357 | break; |
362 | case SEEK_END: | 358 | case SEEK_END: |
363 | start = i_size_read(file_inode(filp)); | 359 | fl->fl_start = i_size_read(file_inode(filp)); |
364 | break; | 360 | break; |
365 | default: | 361 | default: |
366 | return -EINVAL; | 362 | return -EINVAL; |
367 | } | 363 | } |
364 | if (l->l_start > OFFSET_MAX - fl->fl_start) | ||
365 | return -EOVERFLOW; | ||
366 | fl->fl_start += l->l_start; | ||
367 | if (fl->fl_start < 0) | ||
368 | return -EINVAL; | ||
368 | 369 | ||
369 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; | 370 | /* POSIX-1996 leaves the case l->l_len < 0 undefined; |
370 | POSIX-2001 defines it. */ | 371 | POSIX-2001 defines it. */ |
371 | start += l->l_start; | ||
372 | if (start < 0) | ||
373 | return -EINVAL; | ||
374 | fl->fl_end = OFFSET_MAX; | ||
375 | if (l->l_len > 0) { | 372 | if (l->l_len > 0) { |
376 | end = start + l->l_len - 1; | 373 | if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) |
377 | fl->fl_end = end; | 374 | return -EOVERFLOW; |
375 | fl->fl_end = fl->fl_start + l->l_len - 1; | ||
376 | |||
378 | } else if (l->l_len < 0) { | 377 | } else if (l->l_len < 0) { |
379 | end = start - 1; | 378 | if (fl->fl_start + l->l_len < 0) |
380 | fl->fl_end = end; | ||
381 | start += l->l_len; | ||
382 | if (start < 0) | ||
383 | return -EINVAL; | 379 | return -EINVAL; |
384 | } | 380 | fl->fl_end = fl->fl_start - 1; |
385 | fl->fl_start = start; /* we record the absolute position */ | 381 | fl->fl_start += l->l_len; |
386 | if (fl->fl_end < fl->fl_start) | 382 | } else |
387 | return -EOVERFLOW; | 383 | fl->fl_end = OFFSET_MAX; |
388 | 384 | ||
389 | fl->fl_owner = current->files; | 385 | fl->fl_owner = current->files; |
390 | fl->fl_pid = current->tgid; | 386 | fl->fl_pid = current->tgid; |
391 | fl->fl_file = filp; | 387 | fl->fl_file = filp; |
@@ -393,55 +389,36 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | |||
393 | fl->fl_ops = NULL; | 389 | fl->fl_ops = NULL; |
394 | fl->fl_lmops = NULL; | 390 | fl->fl_lmops = NULL; |
395 | 391 | ||
396 | return assign_type(fl, l->l_type); | 392 | /* Ensure that fl->fl_filp has compatible f_mode */ |
397 | } | 393 | switch (l->l_type) { |
398 | 394 | case F_RDLCK: | |
399 | #if BITS_PER_LONG == 32 | 395 | if (!(filp->f_mode & FMODE_READ)) |
400 | static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, | 396 | return -EBADF; |
401 | struct flock64 *l) | ||
402 | { | ||
403 | loff_t start; | ||
404 | |||
405 | switch (l->l_whence) { | ||
406 | case SEEK_SET: | ||
407 | start = 0; | ||
408 | break; | ||
409 | case SEEK_CUR: | ||
410 | start = filp->f_pos; | ||
411 | break; | 397 | break; |
412 | case SEEK_END: | 398 | case F_WRLCK: |
413 | start = i_size_read(file_inode(filp)); | 399 | if (!(filp->f_mode & FMODE_WRITE)) |
400 | return -EBADF; | ||
414 | break; | 401 | break; |
415 | default: | ||
416 | return -EINVAL; | ||
417 | } | 402 | } |
418 | 403 | ||
419 | start += l->l_start; | ||
420 | if (start < 0) | ||
421 | return -EINVAL; | ||
422 | fl->fl_end = OFFSET_MAX; | ||
423 | if (l->l_len > 0) { | ||
424 | fl->fl_end = start + l->l_len - 1; | ||
425 | } else if (l->l_len < 0) { | ||
426 | fl->fl_end = start - 1; | ||
427 | start += l->l_len; | ||
428 | if (start < 0) | ||
429 | return -EINVAL; | ||
430 | } | ||
431 | fl->fl_start = start; /* we record the absolute position */ | ||
432 | if (fl->fl_end < fl->fl_start) | ||
433 | return -EOVERFLOW; | ||
434 | |||
435 | fl->fl_owner = current->files; | ||
436 | fl->fl_pid = current->tgid; | ||
437 | fl->fl_file = filp; | ||
438 | fl->fl_flags = FL_POSIX; | ||
439 | fl->fl_ops = NULL; | ||
440 | fl->fl_lmops = NULL; | ||
441 | |||
442 | return assign_type(fl, l->l_type); | 404 | return assign_type(fl, l->l_type); |
443 | } | 405 | } |
444 | #endif | 406 | |
407 | /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX | ||
408 | * style lock. | ||
409 | */ | ||
410 | static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, | ||
411 | struct flock *l) | ||
412 | { | ||
413 | struct flock64 ll = { | ||
414 | .l_type = l->l_type, | ||
415 | .l_whence = l->l_whence, | ||
416 | .l_start = l->l_start, | ||
417 | .l_len = l->l_len, | ||
418 | }; | ||
419 | |||
420 | return flock64_to_posix_lock(filp, fl, &ll); | ||
421 | } | ||
445 | 422 | ||
446 | /* default lease lock manager operations */ | 423 | /* default lease lock manager operations */ |
447 | static void lease_break_callback(struct file_lock *fl) | 424 | static void lease_break_callback(struct file_lock *fl) |
@@ -511,8 +488,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) | |||
511 | } | 488 | } |
512 | 489 | ||
513 | /* Must be called with the i_lock held! */ | 490 | /* Must be called with the i_lock held! */ |
514 | static inline void | 491 | static void locks_insert_global_locks(struct file_lock *fl) |
515 | locks_insert_global_locks(struct file_lock *fl) | ||
516 | { | 492 | { |
517 | lg_local_lock(&file_lock_lglock); | 493 | lg_local_lock(&file_lock_lglock); |
518 | fl->fl_link_cpu = smp_processor_id(); | 494 | fl->fl_link_cpu = smp_processor_id(); |
@@ -521,8 +497,7 @@ locks_insert_global_locks(struct file_lock *fl) | |||
521 | } | 497 | } |
522 | 498 | ||
523 | /* Must be called with the i_lock held! */ | 499 | /* Must be called with the i_lock held! */ |
524 | static inline void | 500 | static void locks_delete_global_locks(struct file_lock *fl) |
525 | locks_delete_global_locks(struct file_lock *fl) | ||
526 | { | 501 | { |
527 | /* | 502 | /* |
528 | * Avoid taking lock if already unhashed. This is safe since this check | 503 | * Avoid taking lock if already unhashed. This is safe since this check |
@@ -544,14 +519,12 @@ posix_owner_key(struct file_lock *fl) | |||
544 | return (unsigned long)fl->fl_owner; | 519 | return (unsigned long)fl->fl_owner; |
545 | } | 520 | } |
546 | 521 | ||
547 | static inline void | 522 | static void locks_insert_global_blocked(struct file_lock *waiter) |
548 | locks_insert_global_blocked(struct file_lock *waiter) | ||
549 | { | 523 | { |
550 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); | 524 | hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); |
551 | } | 525 | } |
552 | 526 | ||
553 | static inline void | 527 | static void locks_delete_global_blocked(struct file_lock *waiter) |
554 | locks_delete_global_blocked(struct file_lock *waiter) | ||
555 | { | 528 | { |
556 | hash_del(&waiter->fl_link); | 529 | hash_del(&waiter->fl_link); |
557 | } | 530 | } |
@@ -581,7 +554,7 @@ static void locks_delete_block(struct file_lock *waiter) | |||
581 | * it seems like the reasonable thing to do. | 554 | * it seems like the reasonable thing to do. |
582 | * | 555 | * |
583 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block | 556 | * Must be called with both the i_lock and blocked_lock_lock held. The fl_block |
584 | * list itself is protected by the file_lock_list, but by ensuring that the | 557 | * list itself is protected by the blocked_lock_lock, but by ensuring that the |
585 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock | 558 | * i_lock is also held on insertions we can avoid taking the blocked_lock_lock |
586 | * in some cases when we see that the fl_block list is empty. | 559 | * in some cases when we see that the fl_block list is empty. |
587 | */ | 560 | */ |
@@ -591,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, | |||
591 | BUG_ON(!list_empty(&waiter->fl_block)); | 564 | BUG_ON(!list_empty(&waiter->fl_block)); |
592 | waiter->fl_next = blocker; | 565 | waiter->fl_next = blocker; |
593 | list_add_tail(&waiter->fl_block, &blocker->fl_block); | 566 | list_add_tail(&waiter->fl_block, &blocker->fl_block); |
594 | if (IS_POSIX(blocker)) | 567 | if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) |
595 | locks_insert_global_blocked(waiter); | 568 | locks_insert_global_blocked(waiter); |
596 | } | 569 | } |
597 | 570 | ||
@@ -652,15 +625,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) | |||
652 | locks_insert_global_locks(fl); | 625 | locks_insert_global_locks(fl); |
653 | } | 626 | } |
654 | 627 | ||
655 | /* | 628 | /** |
656 | * Delete a lock and then free it. | 629 | * locks_delete_lock - Delete a lock and then free it. |
657 | * Wake up processes that are blocked waiting for this lock, | 630 | * @thisfl_p: pointer that points to the fl_next field of the previous |
658 | * notify the FS that the lock has been cleared and | 631 | * inode->i_flock list entry |
659 | * finally free the lock. | 632 | * |
633 | * Unlink a lock from all lists and free the namespace reference, but don't | ||
634 | * free it yet. Wake up processes that are blocked waiting for this lock and | ||
635 | * notify the FS that the lock has been cleared. | ||
660 | * | 636 | * |
661 | * Must be called with the i_lock held! | 637 | * Must be called with the i_lock held! |
662 | */ | 638 | */ |
663 | static void locks_delete_lock(struct file_lock **thisfl_p) | 639 | static void locks_unlink_lock(struct file_lock **thisfl_p) |
664 | { | 640 | { |
665 | struct file_lock *fl = *thisfl_p; | 641 | struct file_lock *fl = *thisfl_p; |
666 | 642 | ||
@@ -675,6 +651,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p) | |||
675 | } | 651 | } |
676 | 652 | ||
677 | locks_wake_up_blocks(fl); | 653 | locks_wake_up_blocks(fl); |
654 | } | ||
655 | |||
656 | /* | ||
657 | * Unlink a lock from all lists and free it. | ||
658 | * | ||
659 | * Must be called with i_lock held! | ||
660 | */ | ||
661 | static void locks_delete_lock(struct file_lock **thisfl_p) | ||
662 | { | ||
663 | struct file_lock *fl = *thisfl_p; | ||
664 | |||
665 | locks_unlink_lock(thisfl_p); | ||
678 | locks_free_lock(fl); | 666 | locks_free_lock(fl); |
679 | } | 667 | } |
680 | 668 | ||
@@ -769,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock); | |||
769 | * Note: the above assumption may not be true when handling lock | 757 | * Note: the above assumption may not be true when handling lock |
770 | * requests from a broken NFS client. It may also fail in the presence | 758 | * requests from a broken NFS client. It may also fail in the presence |
771 | * of tasks (such as posix threads) sharing the same open file table. | 759 | * of tasks (such as posix threads) sharing the same open file table. |
772 | * | ||
773 | * To handle those cases, we just bail out after a few iterations. | 760 | * To handle those cases, we just bail out after a few iterations. |
761 | * | ||
762 | * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. | ||
763 | * Because the owner is not even nominally tied to a thread of | ||
764 | * execution, the deadlock detection below can't reasonably work well. Just | ||
765 | * skip it for those. | ||
766 | * | ||
767 | * In principle, we could do a more limited deadlock detection on FL_FILE_PVT | ||
768 | * locks that just checks for the case where two tasks are attempting to | ||
769 | * upgrade from read to write locks on the same inode. | ||
774 | */ | 770 | */ |
775 | 771 | ||
776 | #define MAX_DEADLK_ITERATIONS 10 | 772 | #define MAX_DEADLK_ITERATIONS 10 |
@@ -793,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, | |||
793 | { | 789 | { |
794 | int i = 0; | 790 | int i = 0; |
795 | 791 | ||
792 | /* | ||
793 | * This deadlock detector can't reasonably detect deadlocks with | ||
794 | * FL_FILE_PVT locks, since they aren't owned by a process, per-se. | ||
795 | */ | ||
796 | if (IS_FILE_PVT(caller_fl)) | ||
797 | return 0; | ||
798 | |||
796 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { | 799 | while ((block_fl = what_owner_is_waiting_for(block_fl))) { |
797 | if (i++ > MAX_DEADLK_ITERATIONS) | 800 | if (i++ > MAX_DEADLK_ITERATIONS) |
798 | return 0; | 801 | return 0; |
@@ -1152,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait); | |||
1152 | 1155 | ||
1153 | /** | 1156 | /** |
1154 | * locks_mandatory_locked - Check for an active lock | 1157 | * locks_mandatory_locked - Check for an active lock |
1155 | * @inode: the file to check | 1158 | * @file: the file to check |
1156 | * | 1159 | * |
1157 | * Searches the inode's list of locks to find any POSIX locks which conflict. | 1160 | * Searches the inode's list of locks to find any POSIX locks which conflict. |
1158 | * This function is called from locks_verify_locked() only. | 1161 | * This function is called from locks_verify_locked() only. |
1159 | */ | 1162 | */ |
1160 | int locks_mandatory_locked(struct inode *inode) | 1163 | int locks_mandatory_locked(struct file *file) |
1161 | { | 1164 | { |
1165 | struct inode *inode = file_inode(file); | ||
1162 | fl_owner_t owner = current->files; | 1166 | fl_owner_t owner = current->files; |
1163 | struct file_lock *fl; | 1167 | struct file_lock *fl; |
1164 | 1168 | ||
@@ -1169,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode) | |||
1169 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { | 1173 | for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { |
1170 | if (!IS_POSIX(fl)) | 1174 | if (!IS_POSIX(fl)) |
1171 | continue; | 1175 | continue; |
1172 | if (fl->fl_owner != owner) | 1176 | if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) |
1173 | break; | 1177 | break; |
1174 | } | 1178 | } |
1175 | spin_unlock(&inode->i_lock); | 1179 | spin_unlock(&inode->i_lock); |
@@ -1195,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode, | |||
1195 | { | 1199 | { |
1196 | struct file_lock fl; | 1200 | struct file_lock fl; |
1197 | int error; | 1201 | int error; |
1202 | bool sleep = false; | ||
1198 | 1203 | ||
1199 | locks_init_lock(&fl); | 1204 | locks_init_lock(&fl); |
1200 | fl.fl_owner = current->files; | ||
1201 | fl.fl_pid = current->tgid; | 1205 | fl.fl_pid = current->tgid; |
1202 | fl.fl_file = filp; | 1206 | fl.fl_file = filp; |
1203 | fl.fl_flags = FL_POSIX | FL_ACCESS; | 1207 | fl.fl_flags = FL_POSIX | FL_ACCESS; |
1204 | if (filp && !(filp->f_flags & O_NONBLOCK)) | 1208 | if (filp && !(filp->f_flags & O_NONBLOCK)) |
1205 | fl.fl_flags |= FL_SLEEP; | 1209 | sleep = true; |
1206 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; | 1210 | fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; |
1207 | fl.fl_start = offset; | 1211 | fl.fl_start = offset; |
1208 | fl.fl_end = offset + count - 1; | 1212 | fl.fl_end = offset + count - 1; |
1209 | 1213 | ||
1210 | for (;;) { | 1214 | for (;;) { |
1215 | if (filp) { | ||
1216 | fl.fl_owner = (fl_owner_t)filp; | ||
1217 | fl.fl_flags &= ~FL_SLEEP; | ||
1218 | error = __posix_lock_file(inode, &fl, NULL); | ||
1219 | if (!error) | ||
1220 | break; | ||
1221 | } | ||
1222 | |||
1223 | if (sleep) | ||
1224 | fl.fl_flags |= FL_SLEEP; | ||
1225 | fl.fl_owner = current->files; | ||
1211 | error = __posix_lock_file(inode, &fl, NULL); | 1226 | error = __posix_lock_file(inode, &fl, NULL); |
1212 | if (error != FILE_LOCK_DEFERRED) | 1227 | if (error != FILE_LOCK_DEFERRED) |
1213 | break; | 1228 | break; |
@@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp) | |||
1472 | return type; | 1487 | return type; |
1473 | } | 1488 | } |
1474 | 1489 | ||
1490 | /** | ||
1491 | * check_conflicting_open - see if the given dentry points to a file that has | ||
1492 | * an existing open that would conflict with the | ||
1493 | * desired lease. | ||
1494 | * @dentry: dentry to check | ||
1495 | * @arg: type of lease that we're trying to acquire | ||
1496 | * | ||
1497 | * Check to see if there's an existing open fd on this file that would | ||
1498 | * conflict with the lease we're trying to set. | ||
1499 | */ | ||
1500 | static int | ||
1501 | check_conflicting_open(const struct dentry *dentry, const long arg) | ||
1502 | { | ||
1503 | int ret = 0; | ||
1504 | struct inode *inode = dentry->d_inode; | ||
1505 | |||
1506 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | ||
1507 | return -EAGAIN; | ||
1508 | |||
1509 | if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || | ||
1510 | (atomic_read(&inode->i_count) > 1))) | ||
1511 | ret = -EAGAIN; | ||
1512 | |||
1513 | return ret; | ||
1514 | } | ||
1515 | |||
1475 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) | 1516 | static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) |
1476 | { | 1517 | { |
1477 | struct file_lock *fl, **before, **my_before = NULL, *lease; | 1518 | struct file_lock *fl, **before, **my_before = NULL, *lease; |
@@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp | |||
1499 | return -EINVAL; | 1540 | return -EINVAL; |
1500 | } | 1541 | } |
1501 | 1542 | ||
1502 | error = -EAGAIN; | 1543 | error = check_conflicting_open(dentry, arg); |
1503 | if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) | 1544 | if (error) |
1504 | goto out; | ||
1505 | if ((arg == F_WRLCK) | ||
1506 | && ((d_count(dentry) > 1) | ||
1507 | || (atomic_read(&inode->i_count) > 1))) | ||
1508 | goto out; | 1545 | goto out; |
1509 | 1546 | ||
1510 | /* | 1547 | /* |
@@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp | |||
1549 | goto out; | 1586 | goto out; |
1550 | 1587 | ||
1551 | locks_insert_lock(before, lease); | 1588 | locks_insert_lock(before, lease); |
1552 | error = 0; | 1589 | /* |
1590 | * The check in break_lease() is lockless. It's possible for another | ||
1591 | * open to race in after we did the earlier check for a conflicting | ||
1592 | * open but before the lease was inserted. Check again for a | ||
1593 | * conflicting open and cancel the lease if there is one. | ||
1594 | * | ||
1595 | * We also add a barrier here to ensure that the insertion of the lock | ||
1596 | * precedes these checks. | ||
1597 | */ | ||
1598 | smp_mb(); | ||
1599 | error = check_conflicting_open(dentry, arg); | ||
1600 | if (error) | ||
1601 | locks_unlink_lock(flp); | ||
1553 | out: | 1602 | out: |
1554 | if (is_deleg) | 1603 | if (is_deleg) |
1555 | mutex_unlock(&inode->i_mutex); | 1604 | mutex_unlock(&inode->i_mutex); |
@@ -1842,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); | |||
1842 | 1891 | ||
1843 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | 1892 | static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) |
1844 | { | 1893 | { |
1845 | flock->l_pid = fl->fl_pid; | 1894 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; |
1846 | #if BITS_PER_LONG == 32 | 1895 | #if BITS_PER_LONG == 32 |
1847 | /* | 1896 | /* |
1848 | * Make sure we can represent the posix lock via | 1897 | * Make sure we can represent the posix lock via |
@@ -1864,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) | |||
1864 | #if BITS_PER_LONG == 32 | 1913 | #if BITS_PER_LONG == 32 |
1865 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | 1914 | static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) |
1866 | { | 1915 | { |
1867 | flock->l_pid = fl->fl_pid; | 1916 | flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; |
1868 | flock->l_start = fl->fl_start; | 1917 | flock->l_start = fl->fl_start; |
1869 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : | 1918 | flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : |
1870 | fl->fl_end - fl->fl_start + 1; | 1919 | fl->fl_end - fl->fl_start + 1; |
@@ -1876,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) | |||
1876 | /* Report the first existing lock that would conflict with l. | 1925 | /* Report the first existing lock that would conflict with l. |
1877 | * This implements the F_GETLK command of fcntl(). | 1926 | * This implements the F_GETLK command of fcntl(). |
1878 | */ | 1927 | */ |
1879 | int fcntl_getlk(struct file *filp, struct flock __user *l) | 1928 | int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) |
1880 | { | 1929 | { |
1881 | struct file_lock file_lock; | 1930 | struct file_lock file_lock; |
1882 | struct flock flock; | 1931 | struct flock flock; |
@@ -1893,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) | |||
1893 | if (error) | 1942 | if (error) |
1894 | goto out; | 1943 | goto out; |
1895 | 1944 | ||
1945 | if (cmd == F_GETLKP) { | ||
1946 | error = -EINVAL; | ||
1947 | if (flock.l_pid != 0) | ||
1948 | goto out; | ||
1949 | |||
1950 | cmd = F_GETLK; | ||
1951 | file_lock.fl_flags |= FL_FILE_PVT; | ||
1952 | file_lock.fl_owner = (fl_owner_t)filp; | ||
1953 | } | ||
1954 | |||
1896 | error = vfs_test_lock(filp, &file_lock); | 1955 | error = vfs_test_lock(filp, &file_lock); |
1897 | if (error) | 1956 | if (error) |
1898 | goto out; | 1957 | goto out; |
@@ -2012,25 +2071,32 @@ again: | |||
2012 | error = flock_to_posix_lock(filp, file_lock, &flock); | 2071 | error = flock_to_posix_lock(filp, file_lock, &flock); |
2013 | if (error) | 2072 | if (error) |
2014 | goto out; | 2073 | goto out; |
2015 | if (cmd == F_SETLKW) { | 2074 | |
2016 | file_lock->fl_flags |= FL_SLEEP; | 2075 | /* |
2017 | } | 2076 | * If the cmd is requesting file-private locks, then set the |
2018 | 2077 | * FL_FILE_PVT flag and override the owner. | |
2019 | error = -EBADF; | 2078 | */ |
2020 | switch (flock.l_type) { | 2079 | switch (cmd) { |
2021 | case F_RDLCK: | 2080 | case F_SETLKP: |
2022 | if (!(filp->f_mode & FMODE_READ)) | 2081 | error = -EINVAL; |
2023 | goto out; | 2082 | if (flock.l_pid != 0) |
2024 | break; | ||
2025 | case F_WRLCK: | ||
2026 | if (!(filp->f_mode & FMODE_WRITE)) | ||
2027 | goto out; | 2083 | goto out; |
2084 | |||
2085 | cmd = F_SETLK; | ||
2086 | file_lock->fl_flags |= FL_FILE_PVT; | ||
2087 | file_lock->fl_owner = (fl_owner_t)filp; | ||
2028 | break; | 2088 | break; |
2029 | case F_UNLCK: | 2089 | case F_SETLKPW: |
2030 | break; | ||
2031 | default: | ||
2032 | error = -EINVAL; | 2090 | error = -EINVAL; |
2033 | goto out; | 2091 | if (flock.l_pid != 0) |
2092 | goto out; | ||
2093 | |||
2094 | cmd = F_SETLKW; | ||
2095 | file_lock->fl_flags |= FL_FILE_PVT; | ||
2096 | file_lock->fl_owner = (fl_owner_t)filp; | ||
2097 | /* Fallthrough */ | ||
2098 | case F_SETLKW: | ||
2099 | file_lock->fl_flags |= FL_SLEEP; | ||
2034 | } | 2100 | } |
2035 | 2101 | ||
2036 | error = do_lock_file_wait(filp, cmd, file_lock); | 2102 | error = do_lock_file_wait(filp, cmd, file_lock); |
@@ -2061,7 +2127,7 @@ out: | |||
2061 | /* Report the first existing lock that would conflict with l. | 2127 | /* Report the first existing lock that would conflict with l. |
2062 | * This implements the F_GETLK command of fcntl(). | 2128 | * This implements the F_GETLK command of fcntl(). |
2063 | */ | 2129 | */ |
2064 | int fcntl_getlk64(struct file *filp, struct flock64 __user *l) | 2130 | int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) |
2065 | { | 2131 | { |
2066 | struct file_lock file_lock; | 2132 | struct file_lock file_lock; |
2067 | struct flock64 flock; | 2133 | struct flock64 flock; |
@@ -2078,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) | |||
2078 | if (error) | 2144 | if (error) |
2079 | goto out; | 2145 | goto out; |
2080 | 2146 | ||
2147 | if (cmd == F_GETLKP) { | ||
2148 | error = -EINVAL; | ||
2149 | if (flock.l_pid != 0) | ||
2150 | goto out; | ||
2151 | |||
2152 | cmd = F_GETLK64; | ||
2153 | file_lock.fl_flags |= FL_FILE_PVT; | ||
2154 | file_lock.fl_owner = (fl_owner_t)filp; | ||
2155 | } | ||
2156 | |||
2081 | error = vfs_test_lock(filp, &file_lock); | 2157 | error = vfs_test_lock(filp, &file_lock); |
2082 | if (error) | 2158 | if (error) |
2083 | goto out; | 2159 | goto out; |
@@ -2130,25 +2206,32 @@ again: | |||
2130 | error = flock64_to_posix_lock(filp, file_lock, &flock); | 2206 | error = flock64_to_posix_lock(filp, file_lock, &flock); |
2131 | if (error) | 2207 | if (error) |
2132 | goto out; | 2208 | goto out; |
2133 | if (cmd == F_SETLKW64) { | 2209 | |
2134 | file_lock->fl_flags |= FL_SLEEP; | 2210 | /* |
2135 | } | 2211 | * If the cmd is requesting file-private locks, then set the |
2136 | 2212 | * FL_FILE_PVT flag and override the owner. | |
2137 | error = -EBADF; | 2213 | */ |
2138 | switch (flock.l_type) { | 2214 | switch (cmd) { |
2139 | case F_RDLCK: | 2215 | case F_SETLKP: |
2140 | if (!(filp->f_mode & FMODE_READ)) | 2216 | error = -EINVAL; |
2141 | goto out; | 2217 | if (flock.l_pid != 0) |
2142 | break; | ||
2143 | case F_WRLCK: | ||
2144 | if (!(filp->f_mode & FMODE_WRITE)) | ||
2145 | goto out; | 2218 | goto out; |
2219 | |||
2220 | cmd = F_SETLK64; | ||
2221 | file_lock->fl_flags |= FL_FILE_PVT; | ||
2222 | file_lock->fl_owner = (fl_owner_t)filp; | ||
2146 | break; | 2223 | break; |
2147 | case F_UNLCK: | 2224 | case F_SETLKPW: |
2148 | break; | ||
2149 | default: | ||
2150 | error = -EINVAL; | 2225 | error = -EINVAL; |
2151 | goto out; | 2226 | if (flock.l_pid != 0) |
2227 | goto out; | ||
2228 | |||
2229 | cmd = F_SETLKW64; | ||
2230 | file_lock->fl_flags |= FL_FILE_PVT; | ||
2231 | file_lock->fl_owner = (fl_owner_t)filp; | ||
2232 | /* Fallthrough */ | ||
2233 | case F_SETLKW64: | ||
2234 | file_lock->fl_flags |= FL_SLEEP; | ||
2152 | } | 2235 | } |
2153 | 2236 | ||
2154 | error = do_lock_file_wait(filp, cmd, file_lock); | 2237 | error = do_lock_file_wait(filp, cmd, file_lock); |
@@ -2209,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix); | |||
2209 | /* | 2292 | /* |
2210 | * This function is called on the last close of an open file. | 2293 | * This function is called on the last close of an open file. |
2211 | */ | 2294 | */ |
2212 | void locks_remove_flock(struct file *filp) | 2295 | void locks_remove_file(struct file *filp) |
2213 | { | 2296 | { |
2214 | struct inode * inode = file_inode(filp); | 2297 | struct inode * inode = file_inode(filp); |
2215 | struct file_lock *fl; | 2298 | struct file_lock *fl; |
@@ -2218,6 +2301,8 @@ void locks_remove_flock(struct file *filp) | |||
2218 | if (!inode->i_flock) | 2301 | if (!inode->i_flock) |
2219 | return; | 2302 | return; |
2220 | 2303 | ||
2304 | locks_remove_posix(filp, (fl_owner_t)filp); | ||
2305 | |||
2221 | if (filp->f_op->flock) { | 2306 | if (filp->f_op->flock) { |
2222 | struct file_lock fl = { | 2307 | struct file_lock fl = { |
2223 | .fl_pid = current->tgid, | 2308 | .fl_pid = current->tgid, |
@@ -2236,16 +2321,28 @@ void locks_remove_flock(struct file *filp) | |||
2236 | 2321 | ||
2237 | while ((fl = *before) != NULL) { | 2322 | while ((fl = *before) != NULL) { |
2238 | if (fl->fl_file == filp) { | 2323 | if (fl->fl_file == filp) { |
2239 | if (IS_FLOCK(fl)) { | ||
2240 | locks_delete_lock(before); | ||
2241 | continue; | ||
2242 | } | ||
2243 | if (IS_LEASE(fl)) { | 2324 | if (IS_LEASE(fl)) { |
2244 | lease_modify(before, F_UNLCK); | 2325 | lease_modify(before, F_UNLCK); |
2245 | continue; | 2326 | continue; |
2246 | } | 2327 | } |
2247 | /* What? */ | 2328 | |
2248 | BUG(); | 2329 | /* |
2330 | * There's a leftover lock on the list of a type that | ||
2331 | * we didn't expect to see. Most likely a classic | ||
2332 | * POSIX lock that ended up not getting released | ||
2333 | * properly, or that raced onto the list somehow. Log | ||
2334 | * some info about it and then just remove it from | ||
2335 | * the list. | ||
2336 | */ | ||
2337 | WARN(!IS_FLOCK(fl), | ||
2338 | "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", | ||
2339 | MAJOR(inode->i_sb->s_dev), | ||
2340 | MINOR(inode->i_sb->s_dev), inode->i_ino, | ||
2341 | fl->fl_type, fl->fl_flags, | ||
2342 | fl->fl_start, fl->fl_end); | ||
2343 | |||
2344 | locks_delete_lock(before); | ||
2345 | continue; | ||
2249 | } | 2346 | } |
2250 | before = &fl->fl_next; | 2347 | before = &fl->fl_next; |
2251 | } | 2348 | } |
@@ -2314,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, | |||
2314 | 2411 | ||
2315 | seq_printf(f, "%lld:%s ", id, pfx); | 2412 | seq_printf(f, "%lld:%s ", id, pfx); |
2316 | if (IS_POSIX(fl)) { | 2413 | if (IS_POSIX(fl)) { |
2317 | seq_printf(f, "%6s %s ", | 2414 | if (fl->fl_flags & FL_ACCESS) |
2318 | (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", | 2415 | seq_printf(f, "ACCESS"); |
2416 | else if (IS_FILE_PVT(fl)) | ||
2417 | seq_printf(f, "FLPVT "); | ||
2418 | else | ||
2419 | seq_printf(f, "POSIX "); | ||
2420 | |||
2421 | seq_printf(f, " %s ", | ||
2319 | (inode == NULL) ? "*NOINODE*" : | 2422 | (inode == NULL) ? "*NOINODE*" : |
2320 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); | 2423 | mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); |
2321 | } else if (IS_FLOCK(fl)) { | 2424 | } else if (IS_FLOCK(fl)) { |
@@ -2385,6 +2488,7 @@ static int locks_show(struct seq_file *f, void *v) | |||
2385 | } | 2488 | } |
2386 | 2489 | ||
2387 | static void *locks_start(struct seq_file *f, loff_t *pos) | 2490 | static void *locks_start(struct seq_file *f, loff_t *pos) |
2491 | __acquires(&blocked_lock_lock) | ||
2388 | { | 2492 | { |
2389 | struct locks_iterator *iter = f->private; | 2493 | struct locks_iterator *iter = f->private; |
2390 | 2494 | ||
@@ -2403,6 +2507,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos) | |||
2403 | } | 2507 | } |
2404 | 2508 | ||
2405 | static void locks_stop(struct seq_file *f, void *v) | 2509 | static void locks_stop(struct seq_file *f, void *v) |
2510 | __releases(&blocked_lock_lock) | ||
2406 | { | 2511 | { |
2407 | spin_unlock(&blocked_lock_lock); | 2512 | spin_unlock(&blocked_lock_lock); |
2408 | lg_global_unlock(&file_lock_lglock); | 2513 | lg_global_unlock(&file_lock_lglock); |
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 9a59cbade2fb..48140315f627 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c | |||
@@ -2180,7 +2180,7 @@ void logfs_evict_inode(struct inode *inode) | |||
2180 | do_delete_inode(inode); | 2180 | do_delete_inode(inode); |
2181 | } | 2181 | } |
2182 | } | 2182 | } |
2183 | truncate_inode_pages(&inode->i_data, 0); | 2183 | truncate_inode_pages_final(&inode->i_data); |
2184 | clear_inode(inode); | 2184 | clear_inode(inode); |
2185 | 2185 | ||
2186 | /* Cheaper version of write_inode. All changes are concealed in | 2186 | /* Cheaper version of write_inode. All changes are concealed in |
diff --git a/fs/mbcache.c b/fs/mbcache.c index e519e45bf673..bf166e388f0d 100644 --- a/fs/mbcache.c +++ b/fs/mbcache.c | |||
@@ -26,6 +26,41 @@ | |||
26 | * back on the lru list. | 26 | * back on the lru list. |
27 | */ | 27 | */ |
28 | 28 | ||
29 | /* | ||
30 | * Lock descriptions and usage: | ||
31 | * | ||
32 | * Each hash chain of both the block and index hash tables now contains | ||
33 | * a built-in lock used to serialize accesses to the hash chain. | ||
34 | * | ||
35 | * Accesses to global data structures mb_cache_list and mb_cache_lru_list | ||
36 | * are serialized via the global spinlock mb_cache_spinlock. | ||
37 | * | ||
38 | * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize | ||
39 | * accesses to its local data, such as e_used and e_queued. | ||
40 | * | ||
41 | * Lock ordering: | ||
42 | * | ||
43 | * Each block hash chain's lock has the highest lock order, followed by an | ||
44 | * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's | ||
45 | * lock), and mb_cach_spinlock, with the lowest order. While holding | ||
46 | * either a block or index hash chain lock, a thread can acquire an | ||
47 | * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock. | ||
48 | * | ||
49 | * Synchronization: | ||
50 | * | ||
51 | * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and | ||
52 | * index hash chian, it needs to lock the corresponding hash chain. For each | ||
53 | * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to | ||
54 | * prevent either any simultaneous release or free on the entry and also | ||
55 | * to serialize accesses to either the e_used or e_queued member of the entry. | ||
56 | * | ||
57 | * To avoid having a dangling reference to an already freed | ||
58 | * mb_cache_entry, an mb_cache_entry is only freed when it is not on a | ||
59 | * block hash chain and also no longer being referenced, both e_used, | ||
60 | * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is | ||
61 | * first removed from a block hash chain. | ||
62 | */ | ||
63 | |||
29 | #include <linux/kernel.h> | 64 | #include <linux/kernel.h> |
30 | #include <linux/module.h> | 65 | #include <linux/module.h> |
31 | 66 | ||
@@ -34,9 +69,10 @@ | |||
34 | #include <linux/mm.h> | 69 | #include <linux/mm.h> |
35 | #include <linux/slab.h> | 70 | #include <linux/slab.h> |
36 | #include <linux/sched.h> | 71 | #include <linux/sched.h> |
37 | #include <linux/init.h> | 72 | #include <linux/list_bl.h> |
38 | #include <linux/mbcache.h> | 73 | #include <linux/mbcache.h> |
39 | 74 | #include <linux/init.h> | |
75 | #include <linux/blockgroup_lock.h> | ||
40 | 76 | ||
41 | #ifdef MB_CACHE_DEBUG | 77 | #ifdef MB_CACHE_DEBUG |
42 | # define mb_debug(f...) do { \ | 78 | # define mb_debug(f...) do { \ |
@@ -57,8 +93,14 @@ | |||
57 | 93 | ||
58 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) | 94 | #define MB_CACHE_WRITER ((unsigned short)~0U >> 1) |
59 | 95 | ||
96 | #define MB_CACHE_ENTRY_LOCK_BITS __builtin_log2(NR_BG_LOCKS) | ||
97 | #define MB_CACHE_ENTRY_LOCK_INDEX(ce) \ | ||
98 | (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS)) | ||
99 | |||
60 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); | 100 | static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue); |
61 | 101 | static struct blockgroup_lock *mb_cache_bg_lock; | |
102 | static struct kmem_cache *mb_cache_kmem_cache; | ||
103 | |||
62 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); | 104 | MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>"); |
63 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); | 105 | MODULE_DESCRIPTION("Meta block cache (for extended attributes)"); |
64 | MODULE_LICENSE("GPL"); | 106 | MODULE_LICENSE("GPL"); |
@@ -86,58 +128,110 @@ static LIST_HEAD(mb_cache_list); | |||
86 | static LIST_HEAD(mb_cache_lru_list); | 128 | static LIST_HEAD(mb_cache_lru_list); |
87 | static DEFINE_SPINLOCK(mb_cache_spinlock); | 129 | static DEFINE_SPINLOCK(mb_cache_spinlock); |
88 | 130 | ||
131 | static inline void | ||
132 | __spin_lock_mb_cache_entry(struct mb_cache_entry *ce) | ||
133 | { | ||
134 | spin_lock(bgl_lock_ptr(mb_cache_bg_lock, | ||
135 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
136 | } | ||
137 | |||
138 | static inline void | ||
139 | __spin_unlock_mb_cache_entry(struct mb_cache_entry *ce) | ||
140 | { | ||
141 | spin_unlock(bgl_lock_ptr(mb_cache_bg_lock, | ||
142 | MB_CACHE_ENTRY_LOCK_INDEX(ce))); | ||
143 | } | ||
144 | |||
89 | static inline int | 145 | static inline int |
90 | __mb_cache_entry_is_hashed(struct mb_cache_entry *ce) | 146 | __mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce) |
91 | { | 147 | { |
92 | return !list_empty(&ce->e_block_list); | 148 | return !hlist_bl_unhashed(&ce->e_block_list); |
93 | } | 149 | } |
94 | 150 | ||
95 | 151 | ||
96 | static void | 152 | static inline void |
97 | __mb_cache_entry_unhash(struct mb_cache_entry *ce) | 153 | __mb_cache_entry_unhash_block(struct mb_cache_entry *ce) |
98 | { | 154 | { |
99 | if (__mb_cache_entry_is_hashed(ce)) { | 155 | if (__mb_cache_entry_is_block_hashed(ce)) |
100 | list_del_init(&ce->e_block_list); | 156 | hlist_bl_del_init(&ce->e_block_list); |
101 | list_del(&ce->e_index.o_list); | ||
102 | } | ||
103 | } | 157 | } |
104 | 158 | ||
159 | static inline int | ||
160 | __mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce) | ||
161 | { | ||
162 | return !hlist_bl_unhashed(&ce->e_index.o_list); | ||
163 | } | ||
164 | |||
165 | static inline void | ||
166 | __mb_cache_entry_unhash_index(struct mb_cache_entry *ce) | ||
167 | { | ||
168 | if (__mb_cache_entry_is_index_hashed(ce)) | ||
169 | hlist_bl_del_init(&ce->e_index.o_list); | ||
170 | } | ||
171 | |||
172 | /* | ||
173 | * __mb_cache_entry_unhash_unlock() | ||
174 | * | ||
175 | * This function is called to unhash both the block and index hash | ||
176 | * chain. | ||
177 | * It assumes both the block and index hash chain is locked upon entry. | ||
178 | * It also unlock both hash chains both exit | ||
179 | */ | ||
180 | static inline void | ||
181 | __mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce) | ||
182 | { | ||
183 | __mb_cache_entry_unhash_index(ce); | ||
184 | hlist_bl_unlock(ce->e_index_hash_p); | ||
185 | __mb_cache_entry_unhash_block(ce); | ||
186 | hlist_bl_unlock(ce->e_block_hash_p); | ||
187 | } | ||
105 | 188 | ||
106 | static void | 189 | static void |
107 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) | 190 | __mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask) |
108 | { | 191 | { |
109 | struct mb_cache *cache = ce->e_cache; | 192 | struct mb_cache *cache = ce->e_cache; |
110 | 193 | ||
111 | mb_assert(!(ce->e_used || ce->e_queued)); | 194 | mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))); |
112 | kmem_cache_free(cache->c_entry_cache, ce); | 195 | kmem_cache_free(cache->c_entry_cache, ce); |
113 | atomic_dec(&cache->c_entry_count); | 196 | atomic_dec(&cache->c_entry_count); |
114 | } | 197 | } |
115 | 198 | ||
116 | |||
117 | static void | 199 | static void |
118 | __mb_cache_entry_release_unlock(struct mb_cache_entry *ce) | 200 | __mb_cache_entry_release(struct mb_cache_entry *ce) |
119 | __releases(mb_cache_spinlock) | ||
120 | { | 201 | { |
202 | /* First lock the entry to serialize access to its local data. */ | ||
203 | __spin_lock_mb_cache_entry(ce); | ||
121 | /* Wake up all processes queuing for this cache entry. */ | 204 | /* Wake up all processes queuing for this cache entry. */ |
122 | if (ce->e_queued) | 205 | if (ce->e_queued) |
123 | wake_up_all(&mb_cache_queue); | 206 | wake_up_all(&mb_cache_queue); |
124 | if (ce->e_used >= MB_CACHE_WRITER) | 207 | if (ce->e_used >= MB_CACHE_WRITER) |
125 | ce->e_used -= MB_CACHE_WRITER; | 208 | ce->e_used -= MB_CACHE_WRITER; |
209 | /* | ||
210 | * Make sure that all cache entries on lru_list have | ||
211 | * both e_used and e_qued of 0s. | ||
212 | */ | ||
126 | ce->e_used--; | 213 | ce->e_used--; |
127 | if (!(ce->e_used || ce->e_queued)) { | 214 | if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) { |
128 | if (!__mb_cache_entry_is_hashed(ce)) | 215 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
216 | __spin_unlock_mb_cache_entry(ce); | ||
129 | goto forget; | 217 | goto forget; |
130 | mb_assert(list_empty(&ce->e_lru_list)); | 218 | } |
131 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | 219 | /* |
220 | * Need access to lru list, first drop entry lock, | ||
221 | * then reacquire the lock in the proper order. | ||
222 | */ | ||
223 | spin_lock(&mb_cache_spinlock); | ||
224 | if (list_empty(&ce->e_lru_list)) | ||
225 | list_add_tail(&ce->e_lru_list, &mb_cache_lru_list); | ||
226 | spin_unlock(&mb_cache_spinlock); | ||
132 | } | 227 | } |
133 | spin_unlock(&mb_cache_spinlock); | 228 | __spin_unlock_mb_cache_entry(ce); |
134 | return; | 229 | return; |
135 | forget: | 230 | forget: |
136 | spin_unlock(&mb_cache_spinlock); | 231 | mb_assert(list_empty(&ce->e_lru_list)); |
137 | __mb_cache_entry_forget(ce, GFP_KERNEL); | 232 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
138 | } | 233 | } |
139 | 234 | ||
140 | |||
141 | /* | 235 | /* |
142 | * mb_cache_shrink_scan() memory pressure callback | 236 | * mb_cache_shrink_scan() memory pressure callback |
143 | * | 237 | * |
@@ -160,17 +254,34 @@ mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc) | |||
160 | 254 | ||
161 | mb_debug("trying to free %d entries", nr_to_scan); | 255 | mb_debug("trying to free %d entries", nr_to_scan); |
162 | spin_lock(&mb_cache_spinlock); | 256 | spin_lock(&mb_cache_spinlock); |
163 | while (nr_to_scan-- && !list_empty(&mb_cache_lru_list)) { | 257 | while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) { |
164 | struct mb_cache_entry *ce = | 258 | struct mb_cache_entry *ce = |
165 | list_entry(mb_cache_lru_list.next, | 259 | list_entry(mb_cache_lru_list.next, |
166 | struct mb_cache_entry, e_lru_list); | 260 | struct mb_cache_entry, e_lru_list); |
167 | list_move_tail(&ce->e_lru_list, &free_list); | 261 | list_del_init(&ce->e_lru_list); |
168 | __mb_cache_entry_unhash(ce); | 262 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)) |
169 | freed++; | 263 | continue; |
264 | spin_unlock(&mb_cache_spinlock); | ||
265 | /* Prevent any find or get operation on the entry */ | ||
266 | hlist_bl_lock(ce->e_block_hash_p); | ||
267 | hlist_bl_lock(ce->e_index_hash_p); | ||
268 | /* Ignore if it is touched by a find/get */ | ||
269 | if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) || | ||
270 | !list_empty(&ce->e_lru_list)) { | ||
271 | hlist_bl_unlock(ce->e_index_hash_p); | ||
272 | hlist_bl_unlock(ce->e_block_hash_p); | ||
273 | spin_lock(&mb_cache_spinlock); | ||
274 | continue; | ||
275 | } | ||
276 | __mb_cache_entry_unhash_unlock(ce); | ||
277 | list_add_tail(&ce->e_lru_list, &free_list); | ||
278 | spin_lock(&mb_cache_spinlock); | ||
170 | } | 279 | } |
171 | spin_unlock(&mb_cache_spinlock); | 280 | spin_unlock(&mb_cache_spinlock); |
281 | |||
172 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { | 282 | list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) { |
173 | __mb_cache_entry_forget(entry, gfp_mask); | 283 | __mb_cache_entry_forget(entry, gfp_mask); |
284 | freed++; | ||
174 | } | 285 | } |
175 | return freed; | 286 | return freed; |
176 | } | 287 | } |
@@ -215,29 +326,40 @@ mb_cache_create(const char *name, int bucket_bits) | |||
215 | int n, bucket_count = 1 << bucket_bits; | 326 | int n, bucket_count = 1 << bucket_bits; |
216 | struct mb_cache *cache = NULL; | 327 | struct mb_cache *cache = NULL; |
217 | 328 | ||
329 | if (!mb_cache_bg_lock) { | ||
330 | mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock), | ||
331 | GFP_KERNEL); | ||
332 | if (!mb_cache_bg_lock) | ||
333 | return NULL; | ||
334 | bgl_lock_init(mb_cache_bg_lock); | ||
335 | } | ||
336 | |||
218 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); | 337 | cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL); |
219 | if (!cache) | 338 | if (!cache) |
220 | return NULL; | 339 | return NULL; |
221 | cache->c_name = name; | 340 | cache->c_name = name; |
222 | atomic_set(&cache->c_entry_count, 0); | 341 | atomic_set(&cache->c_entry_count, 0); |
223 | cache->c_bucket_bits = bucket_bits; | 342 | cache->c_bucket_bits = bucket_bits; |
224 | cache->c_block_hash = kmalloc(bucket_count * sizeof(struct list_head), | 343 | cache->c_block_hash = kmalloc(bucket_count * |
225 | GFP_KERNEL); | 344 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
226 | if (!cache->c_block_hash) | 345 | if (!cache->c_block_hash) |
227 | goto fail; | 346 | goto fail; |
228 | for (n=0; n<bucket_count; n++) | 347 | for (n=0; n<bucket_count; n++) |
229 | INIT_LIST_HEAD(&cache->c_block_hash[n]); | 348 | INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]); |
230 | cache->c_index_hash = kmalloc(bucket_count * sizeof(struct list_head), | 349 | cache->c_index_hash = kmalloc(bucket_count * |
231 | GFP_KERNEL); | 350 | sizeof(struct hlist_bl_head), GFP_KERNEL); |
232 | if (!cache->c_index_hash) | 351 | if (!cache->c_index_hash) |
233 | goto fail; | 352 | goto fail; |
234 | for (n=0; n<bucket_count; n++) | 353 | for (n=0; n<bucket_count; n++) |
235 | INIT_LIST_HEAD(&cache->c_index_hash[n]); | 354 | INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]); |
236 | cache->c_entry_cache = kmem_cache_create(name, | 355 | if (!mb_cache_kmem_cache) { |
237 | sizeof(struct mb_cache_entry), 0, | 356 | mb_cache_kmem_cache = kmem_cache_create(name, |
238 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 357 | sizeof(struct mb_cache_entry), 0, |
239 | if (!cache->c_entry_cache) | 358 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
240 | goto fail2; | 359 | if (!mb_cache_kmem_cache) |
360 | goto fail2; | ||
361 | } | ||
362 | cache->c_entry_cache = mb_cache_kmem_cache; | ||
241 | 363 | ||
242 | /* | 364 | /* |
243 | * Set an upper limit on the number of cache entries so that the hash | 365 | * Set an upper limit on the number of cache entries so that the hash |
@@ -273,21 +395,47 @@ void | |||
273 | mb_cache_shrink(struct block_device *bdev) | 395 | mb_cache_shrink(struct block_device *bdev) |
274 | { | 396 | { |
275 | LIST_HEAD(free_list); | 397 | LIST_HEAD(free_list); |
276 | struct list_head *l, *ltmp; | 398 | struct list_head *l; |
399 | struct mb_cache_entry *ce, *tmp; | ||
277 | 400 | ||
401 | l = &mb_cache_lru_list; | ||
278 | spin_lock(&mb_cache_spinlock); | 402 | spin_lock(&mb_cache_spinlock); |
279 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 403 | while (!list_is_last(l, &mb_cache_lru_list)) { |
280 | struct mb_cache_entry *ce = | 404 | l = l->next; |
281 | list_entry(l, struct mb_cache_entry, e_lru_list); | 405 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
282 | if (ce->e_bdev == bdev) { | 406 | if (ce->e_bdev == bdev) { |
283 | list_move_tail(&ce->e_lru_list, &free_list); | 407 | list_del_init(&ce->e_lru_list); |
284 | __mb_cache_entry_unhash(ce); | 408 | if (ce->e_used || ce->e_queued || |
409 | atomic_read(&ce->e_refcnt)) | ||
410 | continue; | ||
411 | spin_unlock(&mb_cache_spinlock); | ||
412 | /* | ||
413 | * Prevent any find or get operation on the entry. | ||
414 | */ | ||
415 | hlist_bl_lock(ce->e_block_hash_p); | ||
416 | hlist_bl_lock(ce->e_index_hash_p); | ||
417 | /* Ignore if it is touched by a find/get */ | ||
418 | if (ce->e_used || ce->e_queued || | ||
419 | atomic_read(&ce->e_refcnt) || | ||
420 | !list_empty(&ce->e_lru_list)) { | ||
421 | hlist_bl_unlock(ce->e_index_hash_p); | ||
422 | hlist_bl_unlock(ce->e_block_hash_p); | ||
423 | l = &mb_cache_lru_list; | ||
424 | spin_lock(&mb_cache_spinlock); | ||
425 | continue; | ||
426 | } | ||
427 | __mb_cache_entry_unhash_unlock(ce); | ||
428 | mb_assert(!(ce->e_used || ce->e_queued || | ||
429 | atomic_read(&ce->e_refcnt))); | ||
430 | list_add_tail(&ce->e_lru_list, &free_list); | ||
431 | l = &mb_cache_lru_list; | ||
432 | spin_lock(&mb_cache_spinlock); | ||
285 | } | 433 | } |
286 | } | 434 | } |
287 | spin_unlock(&mb_cache_spinlock); | 435 | spin_unlock(&mb_cache_spinlock); |
288 | list_for_each_safe(l, ltmp, &free_list) { | 436 | |
289 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 437 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
290 | e_lru_list), GFP_KERNEL); | 438 | __mb_cache_entry_forget(ce, GFP_KERNEL); |
291 | } | 439 | } |
292 | } | 440 | } |
293 | 441 | ||
@@ -303,23 +451,27 @@ void | |||
303 | mb_cache_destroy(struct mb_cache *cache) | 451 | mb_cache_destroy(struct mb_cache *cache) |
304 | { | 452 | { |
305 | LIST_HEAD(free_list); | 453 | LIST_HEAD(free_list); |
306 | struct list_head *l, *ltmp; | 454 | struct mb_cache_entry *ce, *tmp; |
307 | 455 | ||
308 | spin_lock(&mb_cache_spinlock); | 456 | spin_lock(&mb_cache_spinlock); |
309 | list_for_each_safe(l, ltmp, &mb_cache_lru_list) { | 457 | list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) { |
310 | struct mb_cache_entry *ce = | 458 | if (ce->e_cache == cache) |
311 | list_entry(l, struct mb_cache_entry, e_lru_list); | ||
312 | if (ce->e_cache == cache) { | ||
313 | list_move_tail(&ce->e_lru_list, &free_list); | 459 | list_move_tail(&ce->e_lru_list, &free_list); |
314 | __mb_cache_entry_unhash(ce); | ||
315 | } | ||
316 | } | 460 | } |
317 | list_del(&cache->c_cache_list); | 461 | list_del(&cache->c_cache_list); |
318 | spin_unlock(&mb_cache_spinlock); | 462 | spin_unlock(&mb_cache_spinlock); |
319 | 463 | ||
320 | list_for_each_safe(l, ltmp, &free_list) { | 464 | list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) { |
321 | __mb_cache_entry_forget(list_entry(l, struct mb_cache_entry, | 465 | list_del_init(&ce->e_lru_list); |
322 | e_lru_list), GFP_KERNEL); | 466 | /* |
467 | * Prevent any find or get operation on the entry. | ||
468 | */ | ||
469 | hlist_bl_lock(ce->e_block_hash_p); | ||
470 | hlist_bl_lock(ce->e_index_hash_p); | ||
471 | mb_assert(!(ce->e_used || ce->e_queued || | ||
472 | atomic_read(&ce->e_refcnt))); | ||
473 | __mb_cache_entry_unhash_unlock(ce); | ||
474 | __mb_cache_entry_forget(ce, GFP_KERNEL); | ||
323 | } | 475 | } |
324 | 476 | ||
325 | if (atomic_read(&cache->c_entry_count) > 0) { | 477 | if (atomic_read(&cache->c_entry_count) > 0) { |
@@ -328,8 +480,10 @@ mb_cache_destroy(struct mb_cache *cache) | |||
328 | atomic_read(&cache->c_entry_count)); | 480 | atomic_read(&cache->c_entry_count)); |
329 | } | 481 | } |
330 | 482 | ||
331 | kmem_cache_destroy(cache->c_entry_cache); | 483 | if (list_empty(&mb_cache_list)) { |
332 | 484 | kmem_cache_destroy(mb_cache_kmem_cache); | |
485 | mb_cache_kmem_cache = NULL; | ||
486 | } | ||
333 | kfree(cache->c_index_hash); | 487 | kfree(cache->c_index_hash); |
334 | kfree(cache->c_block_hash); | 488 | kfree(cache->c_block_hash); |
335 | kfree(cache); | 489 | kfree(cache); |
@@ -346,28 +500,61 @@ mb_cache_destroy(struct mb_cache *cache) | |||
346 | struct mb_cache_entry * | 500 | struct mb_cache_entry * |
347 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) | 501 | mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags) |
348 | { | 502 | { |
349 | struct mb_cache_entry *ce = NULL; | 503 | struct mb_cache_entry *ce; |
350 | 504 | ||
351 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { | 505 | if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) { |
506 | struct list_head *l; | ||
507 | |||
508 | l = &mb_cache_lru_list; | ||
352 | spin_lock(&mb_cache_spinlock); | 509 | spin_lock(&mb_cache_spinlock); |
353 | if (!list_empty(&mb_cache_lru_list)) { | 510 | while (!list_is_last(l, &mb_cache_lru_list)) { |
354 | ce = list_entry(mb_cache_lru_list.next, | 511 | l = l->next; |
355 | struct mb_cache_entry, e_lru_list); | 512 | ce = list_entry(l, struct mb_cache_entry, e_lru_list); |
356 | list_del_init(&ce->e_lru_list); | 513 | if (ce->e_cache == cache) { |
357 | __mb_cache_entry_unhash(ce); | 514 | list_del_init(&ce->e_lru_list); |
515 | if (ce->e_used || ce->e_queued || | ||
516 | atomic_read(&ce->e_refcnt)) | ||
517 | continue; | ||
518 | spin_unlock(&mb_cache_spinlock); | ||
519 | /* | ||
520 | * Prevent any find or get operation on the | ||
521 | * entry. | ||
522 | */ | ||
523 | hlist_bl_lock(ce->e_block_hash_p); | ||
524 | hlist_bl_lock(ce->e_index_hash_p); | ||
525 | /* Ignore if it is touched by a find/get */ | ||
526 | if (ce->e_used || ce->e_queued || | ||
527 | atomic_read(&ce->e_refcnt) || | ||
528 | !list_empty(&ce->e_lru_list)) { | ||
529 | hlist_bl_unlock(ce->e_index_hash_p); | ||
530 | hlist_bl_unlock(ce->e_block_hash_p); | ||
531 | l = &mb_cache_lru_list; | ||
532 | spin_lock(&mb_cache_spinlock); | ||
533 | continue; | ||
534 | } | ||
535 | mb_assert(list_empty(&ce->e_lru_list)); | ||
536 | mb_assert(!(ce->e_used || ce->e_queued || | ||
537 | atomic_read(&ce->e_refcnt))); | ||
538 | __mb_cache_entry_unhash_unlock(ce); | ||
539 | goto found; | ||
540 | } | ||
358 | } | 541 | } |
359 | spin_unlock(&mb_cache_spinlock); | 542 | spin_unlock(&mb_cache_spinlock); |
360 | } | 543 | } |
361 | if (!ce) { | 544 | |
362 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); | 545 | ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags); |
363 | if (!ce) | 546 | if (!ce) |
364 | return NULL; | 547 | return NULL; |
365 | atomic_inc(&cache->c_entry_count); | 548 | atomic_inc(&cache->c_entry_count); |
366 | INIT_LIST_HEAD(&ce->e_lru_list); | 549 | INIT_LIST_HEAD(&ce->e_lru_list); |
367 | INIT_LIST_HEAD(&ce->e_block_list); | 550 | INIT_HLIST_BL_NODE(&ce->e_block_list); |
368 | ce->e_cache = cache; | 551 | INIT_HLIST_BL_NODE(&ce->e_index.o_list); |
369 | ce->e_queued = 0; | 552 | ce->e_cache = cache; |
370 | } | 553 | ce->e_queued = 0; |
554 | atomic_set(&ce->e_refcnt, 0); | ||
555 | found: | ||
556 | ce->e_block_hash_p = &cache->c_block_hash[0]; | ||
557 | ce->e_index_hash_p = &cache->c_index_hash[0]; | ||
371 | ce->e_used = 1 + MB_CACHE_WRITER; | 558 | ce->e_used = 1 + MB_CACHE_WRITER; |
372 | return ce; | 559 | return ce; |
373 | } | 560 | } |
@@ -393,29 +580,38 @@ mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev, | |||
393 | { | 580 | { |
394 | struct mb_cache *cache = ce->e_cache; | 581 | struct mb_cache *cache = ce->e_cache; |
395 | unsigned int bucket; | 582 | unsigned int bucket; |
396 | struct list_head *l; | 583 | struct hlist_bl_node *l; |
397 | int error = -EBUSY; | 584 | struct hlist_bl_head *block_hash_p; |
585 | struct hlist_bl_head *index_hash_p; | ||
586 | struct mb_cache_entry *lce; | ||
398 | 587 | ||
588 | mb_assert(ce); | ||
399 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 589 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
400 | cache->c_bucket_bits); | 590 | cache->c_bucket_bits); |
401 | spin_lock(&mb_cache_spinlock); | 591 | block_hash_p = &cache->c_block_hash[bucket]; |
402 | list_for_each_prev(l, &cache->c_block_hash[bucket]) { | 592 | hlist_bl_lock(block_hash_p); |
403 | struct mb_cache_entry *ce = | 593 | hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) { |
404 | list_entry(l, struct mb_cache_entry, e_block_list); | 594 | if (lce->e_bdev == bdev && lce->e_block == block) { |
405 | if (ce->e_bdev == bdev && ce->e_block == block) | 595 | hlist_bl_unlock(block_hash_p); |
406 | goto out; | 596 | return -EBUSY; |
597 | } | ||
407 | } | 598 | } |
408 | __mb_cache_entry_unhash(ce); | 599 | mb_assert(!__mb_cache_entry_is_block_hashed(ce)); |
600 | __mb_cache_entry_unhash_block(ce); | ||
601 | __mb_cache_entry_unhash_index(ce); | ||
409 | ce->e_bdev = bdev; | 602 | ce->e_bdev = bdev; |
410 | ce->e_block = block; | 603 | ce->e_block = block; |
411 | list_add(&ce->e_block_list, &cache->c_block_hash[bucket]); | 604 | ce->e_block_hash_p = block_hash_p; |
412 | ce->e_index.o_key = key; | 605 | ce->e_index.o_key = key; |
606 | hlist_bl_add_head(&ce->e_block_list, block_hash_p); | ||
607 | hlist_bl_unlock(block_hash_p); | ||
413 | bucket = hash_long(key, cache->c_bucket_bits); | 608 | bucket = hash_long(key, cache->c_bucket_bits); |
414 | list_add(&ce->e_index.o_list, &cache->c_index_hash[bucket]); | 609 | index_hash_p = &cache->c_index_hash[bucket]; |
415 | error = 0; | 610 | hlist_bl_lock(index_hash_p); |
416 | out: | 611 | ce->e_index_hash_p = index_hash_p; |
417 | spin_unlock(&mb_cache_spinlock); | 612 | hlist_bl_add_head(&ce->e_index.o_list, index_hash_p); |
418 | return error; | 613 | hlist_bl_unlock(index_hash_p); |
614 | return 0; | ||
419 | } | 615 | } |
420 | 616 | ||
421 | 617 | ||
@@ -429,24 +625,26 @@ out: | |||
429 | void | 625 | void |
430 | mb_cache_entry_release(struct mb_cache_entry *ce) | 626 | mb_cache_entry_release(struct mb_cache_entry *ce) |
431 | { | 627 | { |
432 | spin_lock(&mb_cache_spinlock); | 628 | __mb_cache_entry_release(ce); |
433 | __mb_cache_entry_release_unlock(ce); | ||
434 | } | 629 | } |
435 | 630 | ||
436 | 631 | ||
437 | /* | 632 | /* |
438 | * mb_cache_entry_free() | 633 | * mb_cache_entry_free() |
439 | * | 634 | * |
440 | * This is equivalent to the sequence mb_cache_entry_takeout() -- | ||
441 | * mb_cache_entry_release(). | ||
442 | */ | 635 | */ |
443 | void | 636 | void |
444 | mb_cache_entry_free(struct mb_cache_entry *ce) | 637 | mb_cache_entry_free(struct mb_cache_entry *ce) |
445 | { | 638 | { |
446 | spin_lock(&mb_cache_spinlock); | 639 | mb_assert(ce); |
447 | mb_assert(list_empty(&ce->e_lru_list)); | 640 | mb_assert(list_empty(&ce->e_lru_list)); |
448 | __mb_cache_entry_unhash(ce); | 641 | hlist_bl_lock(ce->e_index_hash_p); |
449 | __mb_cache_entry_release_unlock(ce); | 642 | __mb_cache_entry_unhash_index(ce); |
643 | hlist_bl_unlock(ce->e_index_hash_p); | ||
644 | hlist_bl_lock(ce->e_block_hash_p); | ||
645 | __mb_cache_entry_unhash_block(ce); | ||
646 | hlist_bl_unlock(ce->e_block_hash_p); | ||
647 | __mb_cache_entry_release(ce); | ||
450 | } | 648 | } |
451 | 649 | ||
452 | 650 | ||
@@ -463,84 +661,110 @@ mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev, | |||
463 | sector_t block) | 661 | sector_t block) |
464 | { | 662 | { |
465 | unsigned int bucket; | 663 | unsigned int bucket; |
466 | struct list_head *l; | 664 | struct hlist_bl_node *l; |
467 | struct mb_cache_entry *ce; | 665 | struct mb_cache_entry *ce; |
666 | struct hlist_bl_head *block_hash_p; | ||
468 | 667 | ||
469 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), | 668 | bucket = hash_long((unsigned long)bdev + (block & 0xffffffff), |
470 | cache->c_bucket_bits); | 669 | cache->c_bucket_bits); |
471 | spin_lock(&mb_cache_spinlock); | 670 | block_hash_p = &cache->c_block_hash[bucket]; |
472 | list_for_each(l, &cache->c_block_hash[bucket]) { | 671 | /* First serialize access to the block corresponding hash chain. */ |
473 | ce = list_entry(l, struct mb_cache_entry, e_block_list); | 672 | hlist_bl_lock(block_hash_p); |
673 | hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) { | ||
674 | mb_assert(ce->e_block_hash_p == block_hash_p); | ||
474 | if (ce->e_bdev == bdev && ce->e_block == block) { | 675 | if (ce->e_bdev == bdev && ce->e_block == block) { |
475 | DEFINE_WAIT(wait); | 676 | /* |
677 | * Prevent a free from removing the entry. | ||
678 | */ | ||
679 | atomic_inc(&ce->e_refcnt); | ||
680 | hlist_bl_unlock(block_hash_p); | ||
681 | __spin_lock_mb_cache_entry(ce); | ||
682 | atomic_dec(&ce->e_refcnt); | ||
683 | if (ce->e_used > 0) { | ||
684 | DEFINE_WAIT(wait); | ||
685 | while (ce->e_used > 0) { | ||
686 | ce->e_queued++; | ||
687 | prepare_to_wait(&mb_cache_queue, &wait, | ||
688 | TASK_UNINTERRUPTIBLE); | ||
689 | __spin_unlock_mb_cache_entry(ce); | ||
690 | schedule(); | ||
691 | __spin_lock_mb_cache_entry(ce); | ||
692 | ce->e_queued--; | ||
693 | } | ||
694 | finish_wait(&mb_cache_queue, &wait); | ||
695 | } | ||
696 | ce->e_used += 1 + MB_CACHE_WRITER; | ||
697 | __spin_unlock_mb_cache_entry(ce); | ||
476 | 698 | ||
477 | if (!list_empty(&ce->e_lru_list)) | 699 | if (!list_empty(&ce->e_lru_list)) { |
700 | spin_lock(&mb_cache_spinlock); | ||
478 | list_del_init(&ce->e_lru_list); | 701 | list_del_init(&ce->e_lru_list); |
479 | |||
480 | while (ce->e_used > 0) { | ||
481 | ce->e_queued++; | ||
482 | prepare_to_wait(&mb_cache_queue, &wait, | ||
483 | TASK_UNINTERRUPTIBLE); | ||
484 | spin_unlock(&mb_cache_spinlock); | 702 | spin_unlock(&mb_cache_spinlock); |
485 | schedule(); | ||
486 | spin_lock(&mb_cache_spinlock); | ||
487 | ce->e_queued--; | ||
488 | } | 703 | } |
489 | finish_wait(&mb_cache_queue, &wait); | 704 | if (!__mb_cache_entry_is_block_hashed(ce)) { |
490 | ce->e_used += 1 + MB_CACHE_WRITER; | 705 | __mb_cache_entry_release(ce); |
491 | |||
492 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
493 | __mb_cache_entry_release_unlock(ce); | ||
494 | return NULL; | 706 | return NULL; |
495 | } | 707 | } |
496 | goto cleanup; | 708 | return ce; |
497 | } | 709 | } |
498 | } | 710 | } |
499 | ce = NULL; | 711 | hlist_bl_unlock(block_hash_p); |
500 | 712 | return NULL; | |
501 | cleanup: | ||
502 | spin_unlock(&mb_cache_spinlock); | ||
503 | return ce; | ||
504 | } | 713 | } |
505 | 714 | ||
506 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) | 715 | #if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) |
507 | 716 | ||
508 | static struct mb_cache_entry * | 717 | static struct mb_cache_entry * |
509 | __mb_cache_entry_find(struct list_head *l, struct list_head *head, | 718 | __mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head, |
510 | struct block_device *bdev, unsigned int key) | 719 | struct block_device *bdev, unsigned int key) |
511 | { | 720 | { |
512 | while (l != head) { | 721 | |
722 | /* The index hash chain is alredy acquire by caller. */ | ||
723 | while (l != NULL) { | ||
513 | struct mb_cache_entry *ce = | 724 | struct mb_cache_entry *ce = |
514 | list_entry(l, struct mb_cache_entry, e_index.o_list); | 725 | hlist_bl_entry(l, struct mb_cache_entry, |
726 | e_index.o_list); | ||
727 | mb_assert(ce->e_index_hash_p == head); | ||
515 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { | 728 | if (ce->e_bdev == bdev && ce->e_index.o_key == key) { |
516 | DEFINE_WAIT(wait); | 729 | /* |
517 | 730 | * Prevent a free from removing the entry. | |
518 | if (!list_empty(&ce->e_lru_list)) | 731 | */ |
519 | list_del_init(&ce->e_lru_list); | 732 | atomic_inc(&ce->e_refcnt); |
520 | 733 | hlist_bl_unlock(head); | |
734 | __spin_lock_mb_cache_entry(ce); | ||
735 | atomic_dec(&ce->e_refcnt); | ||
736 | ce->e_used++; | ||
521 | /* Incrementing before holding the lock gives readers | 737 | /* Incrementing before holding the lock gives readers |
522 | priority over writers. */ | 738 | priority over writers. */ |
523 | ce->e_used++; | 739 | if (ce->e_used >= MB_CACHE_WRITER) { |
524 | while (ce->e_used >= MB_CACHE_WRITER) { | 740 | DEFINE_WAIT(wait); |
525 | ce->e_queued++; | 741 | |
526 | prepare_to_wait(&mb_cache_queue, &wait, | 742 | while (ce->e_used >= MB_CACHE_WRITER) { |
527 | TASK_UNINTERRUPTIBLE); | 743 | ce->e_queued++; |
528 | spin_unlock(&mb_cache_spinlock); | 744 | prepare_to_wait(&mb_cache_queue, &wait, |
529 | schedule(); | 745 | TASK_UNINTERRUPTIBLE); |
530 | spin_lock(&mb_cache_spinlock); | 746 | __spin_unlock_mb_cache_entry(ce); |
531 | ce->e_queued--; | 747 | schedule(); |
748 | __spin_lock_mb_cache_entry(ce); | ||
749 | ce->e_queued--; | ||
750 | } | ||
751 | finish_wait(&mb_cache_queue, &wait); | ||
532 | } | 752 | } |
533 | finish_wait(&mb_cache_queue, &wait); | 753 | __spin_unlock_mb_cache_entry(ce); |
534 | 754 | if (!list_empty(&ce->e_lru_list)) { | |
535 | if (!__mb_cache_entry_is_hashed(ce)) { | ||
536 | __mb_cache_entry_release_unlock(ce); | ||
537 | spin_lock(&mb_cache_spinlock); | 755 | spin_lock(&mb_cache_spinlock); |
756 | list_del_init(&ce->e_lru_list); | ||
757 | spin_unlock(&mb_cache_spinlock); | ||
758 | } | ||
759 | if (!__mb_cache_entry_is_block_hashed(ce)) { | ||
760 | __mb_cache_entry_release(ce); | ||
538 | return ERR_PTR(-EAGAIN); | 761 | return ERR_PTR(-EAGAIN); |
539 | } | 762 | } |
540 | return ce; | 763 | return ce; |
541 | } | 764 | } |
542 | l = l->next; | 765 | l = l->next; |
543 | } | 766 | } |
767 | hlist_bl_unlock(head); | ||
544 | return NULL; | 768 | return NULL; |
545 | } | 769 | } |
546 | 770 | ||
@@ -562,13 +786,17 @@ mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev, | |||
562 | unsigned int key) | 786 | unsigned int key) |
563 | { | 787 | { |
564 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 788 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
565 | struct list_head *l; | 789 | struct hlist_bl_node *l; |
566 | struct mb_cache_entry *ce; | 790 | struct mb_cache_entry *ce = NULL; |
567 | 791 | struct hlist_bl_head *index_hash_p; | |
568 | spin_lock(&mb_cache_spinlock); | 792 | |
569 | l = cache->c_index_hash[bucket].next; | 793 | index_hash_p = &cache->c_index_hash[bucket]; |
570 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 794 | hlist_bl_lock(index_hash_p); |
571 | spin_unlock(&mb_cache_spinlock); | 795 | if (!hlist_bl_empty(index_hash_p)) { |
796 | l = hlist_bl_first(index_hash_p); | ||
797 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); | ||
798 | } else | ||
799 | hlist_bl_unlock(index_hash_p); | ||
572 | return ce; | 800 | return ce; |
573 | } | 801 | } |
574 | 802 | ||
@@ -597,13 +825,17 @@ mb_cache_entry_find_next(struct mb_cache_entry *prev, | |||
597 | { | 825 | { |
598 | struct mb_cache *cache = prev->e_cache; | 826 | struct mb_cache *cache = prev->e_cache; |
599 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); | 827 | unsigned int bucket = hash_long(key, cache->c_bucket_bits); |
600 | struct list_head *l; | 828 | struct hlist_bl_node *l; |
601 | struct mb_cache_entry *ce; | 829 | struct mb_cache_entry *ce; |
830 | struct hlist_bl_head *index_hash_p; | ||
602 | 831 | ||
603 | spin_lock(&mb_cache_spinlock); | 832 | index_hash_p = &cache->c_index_hash[bucket]; |
833 | mb_assert(prev->e_index_hash_p == index_hash_p); | ||
834 | hlist_bl_lock(index_hash_p); | ||
835 | mb_assert(!hlist_bl_empty(index_hash_p)); | ||
604 | l = prev->e_index.o_list.next; | 836 | l = prev->e_index.o_list.next; |
605 | ce = __mb_cache_entry_find(l, &cache->c_index_hash[bucket], bdev, key); | 837 | ce = __mb_cache_entry_find(l, index_hash_p, bdev, key); |
606 | __mb_cache_entry_release_unlock(prev); | 838 | __mb_cache_entry_release(prev); |
607 | return ce; | 839 | return ce; |
608 | } | 840 | } |
609 | 841 | ||
diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 0332109162a5..f007a3355570 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c | |||
@@ -26,7 +26,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data); | |||
26 | 26 | ||
27 | static void minix_evict_inode(struct inode *inode) | 27 | static void minix_evict_inode(struct inode *inode) |
28 | { | 28 | { |
29 | truncate_inode_pages(&inode->i_data, 0); | 29 | truncate_inode_pages_final(&inode->i_data); |
30 | if (!inode->i_nlink) { | 30 | if (!inode->i_nlink) { |
31 | inode->i_size = 0; | 31 | inode->i_size = 0; |
32 | minix_truncate(inode); | 32 | minix_truncate(inode); |
@@ -86,7 +86,7 @@ static void init_once(void *foo) | |||
86 | inode_init_once(&ei->vfs_inode); | 86 | inode_init_once(&ei->vfs_inode); |
87 | } | 87 | } |
88 | 88 | ||
89 | static int init_inodecache(void) | 89 | static int __init init_inodecache(void) |
90 | { | 90 | { |
91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", | 91 | minix_inode_cachep = kmem_cache_create("minix_inode_cache", |
92 | sizeof(struct minix_inode_info), | 92 | sizeof(struct minix_inode_info), |
@@ -123,6 +123,7 @@ static int minix_remount (struct super_block * sb, int * flags, char * data) | |||
123 | struct minix_sb_info * sbi = minix_sb(sb); | 123 | struct minix_sb_info * sbi = minix_sb(sb); |
124 | struct minix_super_block * ms; | 124 | struct minix_super_block * ms; |
125 | 125 | ||
126 | sync_filesystem(sb); | ||
126 | ms = sbi->s_ms; | 127 | ms = sbi->s_ms; |
127 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) | 128 | if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) |
128 | return 0; | 129 | return 0; |
diff --git a/fs/namei.c b/fs/namei.c index 4b491b431990..88339f59efb5 100644 --- a/fs/namei.c +++ b/fs/namei.c | |||
@@ -1796,7 +1796,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) | |||
1796 | if (err) | 1796 | if (err) |
1797 | return err; | 1797 | return err; |
1798 | } | 1798 | } |
1799 | if (!d_is_directory(nd->path.dentry)) { | 1799 | if (!d_can_lookup(nd->path.dentry)) { |
1800 | err = -ENOTDIR; | 1800 | err = -ENOTDIR; |
1801 | break; | 1801 | break; |
1802 | } | 1802 | } |
@@ -1817,7 +1817,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1817 | struct dentry *root = nd->root.dentry; | 1817 | struct dentry *root = nd->root.dentry; |
1818 | struct inode *inode = root->d_inode; | 1818 | struct inode *inode = root->d_inode; |
1819 | if (*name) { | 1819 | if (*name) { |
1820 | if (!d_is_directory(root)) | 1820 | if (!d_can_lookup(root)) |
1821 | return -ENOTDIR; | 1821 | return -ENOTDIR; |
1822 | retval = inode_permission(inode, MAY_EXEC); | 1822 | retval = inode_permission(inode, MAY_EXEC); |
1823 | if (retval) | 1823 | if (retval) |
@@ -1873,7 +1873,7 @@ static int path_init(int dfd, const char *name, unsigned int flags, | |||
1873 | dentry = f.file->f_path.dentry; | 1873 | dentry = f.file->f_path.dentry; |
1874 | 1874 | ||
1875 | if (*name) { | 1875 | if (*name) { |
1876 | if (!d_is_directory(dentry)) { | 1876 | if (!d_can_lookup(dentry)) { |
1877 | fdput(f); | 1877 | fdput(f); |
1878 | return -ENOTDIR; | 1878 | return -ENOTDIR; |
1879 | } | 1879 | } |
@@ -1955,7 +1955,7 @@ static int path_lookupat(int dfd, const char *name, | |||
1955 | err = complete_walk(nd); | 1955 | err = complete_walk(nd); |
1956 | 1956 | ||
1957 | if (!err && nd->flags & LOOKUP_DIRECTORY) { | 1957 | if (!err && nd->flags & LOOKUP_DIRECTORY) { |
1958 | if (!d_is_directory(nd->path.dentry)) { | 1958 | if (!d_can_lookup(nd->path.dentry)) { |
1959 | path_put(&nd->path); | 1959 | path_put(&nd->path); |
1960 | err = -ENOTDIR; | 1960 | err = -ENOTDIR; |
1961 | } | 1961 | } |
@@ -2414,11 +2414,11 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) | |||
2414 | IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) | 2414 | IS_IMMUTABLE(inode) || IS_SWAPFILE(inode)) |
2415 | return -EPERM; | 2415 | return -EPERM; |
2416 | if (isdir) { | 2416 | if (isdir) { |
2417 | if (!d_is_directory(victim) && !d_is_autodir(victim)) | 2417 | if (!d_is_dir(victim)) |
2418 | return -ENOTDIR; | 2418 | return -ENOTDIR; |
2419 | if (IS_ROOT(victim)) | 2419 | if (IS_ROOT(victim)) |
2420 | return -EBUSY; | 2420 | return -EBUSY; |
2421 | } else if (d_is_directory(victim) || d_is_autodir(victim)) | 2421 | } else if (d_is_dir(victim)) |
2422 | return -EISDIR; | 2422 | return -EISDIR; |
2423 | if (IS_DEADDIR(dir)) | 2423 | if (IS_DEADDIR(dir)) |
2424 | return -ENOENT; | 2424 | return -ENOENT; |
@@ -2569,7 +2569,7 @@ static int handle_truncate(struct file *filp) | |||
2569 | /* | 2569 | /* |
2570 | * Refuse to truncate files with mandatory locks held on them. | 2570 | * Refuse to truncate files with mandatory locks held on them. |
2571 | */ | 2571 | */ |
2572 | error = locks_verify_locked(inode); | 2572 | error = locks_verify_locked(filp); |
2573 | if (!error) | 2573 | if (!error) |
2574 | error = security_path_truncate(path); | 2574 | error = security_path_truncate(path); |
2575 | if (!error) { | 2575 | if (!error) { |
@@ -3016,11 +3016,10 @@ finish_open: | |||
3016 | } | 3016 | } |
3017 | audit_inode(name, nd->path.dentry, 0); | 3017 | audit_inode(name, nd->path.dentry, 0); |
3018 | error = -EISDIR; | 3018 | error = -EISDIR; |
3019 | if ((open_flag & O_CREAT) && | 3019 | if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) |
3020 | (d_is_directory(nd->path.dentry) || d_is_autodir(nd->path.dentry))) | ||
3021 | goto out; | 3020 | goto out; |
3022 | error = -ENOTDIR; | 3021 | error = -ENOTDIR; |
3023 | if ((nd->flags & LOOKUP_DIRECTORY) && !d_is_directory(nd->path.dentry)) | 3022 | if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry)) |
3024 | goto out; | 3023 | goto out; |
3025 | if (!S_ISREG(nd->inode->i_mode)) | 3024 | if (!S_ISREG(nd->inode->i_mode)) |
3026 | will_truncate = false; | 3025 | will_truncate = false; |
@@ -3744,7 +3743,7 @@ exit1: | |||
3744 | slashes: | 3743 | slashes: |
3745 | if (d_is_negative(dentry)) | 3744 | if (d_is_negative(dentry)) |
3746 | error = -ENOENT; | 3745 | error = -ENOENT; |
3747 | else if (d_is_directory(dentry) || d_is_autodir(dentry)) | 3746 | else if (d_is_dir(dentry)) |
3748 | error = -EISDIR; | 3747 | error = -EISDIR; |
3749 | else | 3748 | else |
3750 | error = -ENOTDIR; | 3749 | error = -ENOTDIR; |
@@ -3974,7 +3973,28 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
3974 | return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); | 3973 | return sys_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
3975 | } | 3974 | } |
3976 | 3975 | ||
3977 | /* | 3976 | /** |
3977 | * vfs_rename - rename a filesystem object | ||
3978 | * @old_dir: parent of source | ||
3979 | * @old_dentry: source | ||
3980 | * @new_dir: parent of destination | ||
3981 | * @new_dentry: destination | ||
3982 | * @delegated_inode: returns an inode needing a delegation break | ||
3983 | * @flags: rename flags | ||
3984 | * | ||
3985 | * The caller must hold multiple mutexes--see lock_rename()). | ||
3986 | * | ||
3987 | * If vfs_rename discovers a delegation in need of breaking at either | ||
3988 | * the source or destination, it will return -EWOULDBLOCK and return a | ||
3989 | * reference to the inode in delegated_inode. The caller should then | ||
3990 | * break the delegation and retry. Because breaking a delegation may | ||
3991 | * take a long time, the caller should drop all locks before doing | ||
3992 | * so. | ||
3993 | * | ||
3994 | * Alternatively, a caller may pass NULL for delegated_inode. This may | ||
3995 | * be appropriate for callers that expect the underlying filesystem not | ||
3996 | * to be NFS exported. | ||
3997 | * | ||
3978 | * The worst of all namespace operations - renaming directory. "Perverted" | 3998 | * The worst of all namespace operations - renaming directory. "Perverted" |
3979 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... | 3999 | * doesn't even start to describe it. Somebody in UCB had a heck of a trip... |
3980 | * Problems: | 4000 | * Problems: |
@@ -4002,163 +4022,139 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname | |||
4002 | * ->i_mutex on parents, which works but leads to some truly excessive | 4022 | * ->i_mutex on parents, which works but leads to some truly excessive |
4003 | * locking]. | 4023 | * locking]. |
4004 | */ | 4024 | */ |
4005 | static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, | 4025 | int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
4006 | struct inode *new_dir, struct dentry *new_dentry) | 4026 | struct inode *new_dir, struct dentry *new_dentry, |
4027 | struct inode **delegated_inode, unsigned int flags) | ||
4007 | { | 4028 | { |
4008 | int error = 0; | 4029 | int error; |
4030 | bool is_dir = d_is_dir(old_dentry); | ||
4031 | const unsigned char *old_name; | ||
4032 | struct inode *source = old_dentry->d_inode; | ||
4009 | struct inode *target = new_dentry->d_inode; | 4033 | struct inode *target = new_dentry->d_inode; |
4034 | bool new_is_dir = false; | ||
4010 | unsigned max_links = new_dir->i_sb->s_max_links; | 4035 | unsigned max_links = new_dir->i_sb->s_max_links; |
4011 | 4036 | ||
4037 | if (source == target) | ||
4038 | return 0; | ||
4039 | |||
4040 | error = may_delete(old_dir, old_dentry, is_dir); | ||
4041 | if (error) | ||
4042 | return error; | ||
4043 | |||
4044 | if (!target) { | ||
4045 | error = may_create(new_dir, new_dentry); | ||
4046 | } else { | ||
4047 | new_is_dir = d_is_dir(new_dentry); | ||
4048 | |||
4049 | if (!(flags & RENAME_EXCHANGE)) | ||
4050 | error = may_delete(new_dir, new_dentry, is_dir); | ||
4051 | else | ||
4052 | error = may_delete(new_dir, new_dentry, new_is_dir); | ||
4053 | } | ||
4054 | if (error) | ||
4055 | return error; | ||
4056 | |||
4057 | if (!old_dir->i_op->rename) | ||
4058 | return -EPERM; | ||
4059 | |||
4060 | if (flags && !old_dir->i_op->rename2) | ||
4061 | return -EINVAL; | ||
4062 | |||
4012 | /* | 4063 | /* |
4013 | * If we are going to change the parent - check write permissions, | 4064 | * If we are going to change the parent - check write permissions, |
4014 | * we'll need to flip '..'. | 4065 | * we'll need to flip '..'. |
4015 | */ | 4066 | */ |
4016 | if (new_dir != old_dir) { | 4067 | if (new_dir != old_dir) { |
4017 | error = inode_permission(old_dentry->d_inode, MAY_WRITE); | 4068 | if (is_dir) { |
4018 | if (error) | 4069 | error = inode_permission(source, MAY_WRITE); |
4019 | return error; | 4070 | if (error) |
4071 | return error; | ||
4072 | } | ||
4073 | if ((flags & RENAME_EXCHANGE) && new_is_dir) { | ||
4074 | error = inode_permission(target, MAY_WRITE); | ||
4075 | if (error) | ||
4076 | return error; | ||
4077 | } | ||
4020 | } | 4078 | } |
4021 | 4079 | ||
4022 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | 4080 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry, |
4081 | flags); | ||
4023 | if (error) | 4082 | if (error) |
4024 | return error; | 4083 | return error; |
4025 | 4084 | ||
4085 | old_name = fsnotify_oldname_init(old_dentry->d_name.name); | ||
4026 | dget(new_dentry); | 4086 | dget(new_dentry); |
4027 | if (target) | 4087 | if (!is_dir || (flags & RENAME_EXCHANGE)) |
4088 | lock_two_nondirectories(source, target); | ||
4089 | else if (target) | ||
4028 | mutex_lock(&target->i_mutex); | 4090 | mutex_lock(&target->i_mutex); |
4029 | 4091 | ||
4030 | error = -EBUSY; | 4092 | error = -EBUSY; |
4031 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) | 4093 | if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) |
4032 | goto out; | 4094 | goto out; |
4033 | 4095 | ||
4034 | error = -EMLINK; | 4096 | if (max_links && new_dir != old_dir) { |
4035 | if (max_links && !target && new_dir != old_dir && | 4097 | error = -EMLINK; |
4036 | new_dir->i_nlink >= max_links) | 4098 | if (is_dir && !new_is_dir && new_dir->i_nlink >= max_links) |
4037 | goto out; | 4099 | goto out; |
4038 | 4100 | if ((flags & RENAME_EXCHANGE) && !is_dir && new_is_dir && | |
4039 | if (target) | 4101 | old_dir->i_nlink >= max_links) |
4102 | goto out; | ||
4103 | } | ||
4104 | if (is_dir && !(flags & RENAME_EXCHANGE) && target) | ||
4040 | shrink_dcache_parent(new_dentry); | 4105 | shrink_dcache_parent(new_dentry); |
4041 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 4106 | if (!is_dir) { |
4042 | if (error) | 4107 | error = try_break_deleg(source, delegated_inode); |
4043 | goto out; | 4108 | if (error) |
4044 | 4109 | goto out; | |
4045 | if (target) { | ||
4046 | target->i_flags |= S_DEAD; | ||
4047 | dont_mount(new_dentry); | ||
4048 | } | 4110 | } |
4049 | out: | 4111 | if (target && !new_is_dir) { |
4050 | if (target) | ||
4051 | mutex_unlock(&target->i_mutex); | ||
4052 | dput(new_dentry); | ||
4053 | if (!error) | ||
4054 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | ||
4055 | d_move(old_dentry,new_dentry); | ||
4056 | return error; | ||
4057 | } | ||
4058 | |||
4059 | static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, | ||
4060 | struct inode *new_dir, struct dentry *new_dentry, | ||
4061 | struct inode **delegated_inode) | ||
4062 | { | ||
4063 | struct inode *target = new_dentry->d_inode; | ||
4064 | struct inode *source = old_dentry->d_inode; | ||
4065 | int error; | ||
4066 | |||
4067 | error = security_inode_rename(old_dir, old_dentry, new_dir, new_dentry); | ||
4068 | if (error) | ||
4069 | return error; | ||
4070 | |||
4071 | dget(new_dentry); | ||
4072 | lock_two_nondirectories(source, target); | ||
4073 | |||
4074 | error = -EBUSY; | ||
4075 | if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry)) | ||
4076 | goto out; | ||
4077 | |||
4078 | error = try_break_deleg(source, delegated_inode); | ||
4079 | if (error) | ||
4080 | goto out; | ||
4081 | if (target) { | ||
4082 | error = try_break_deleg(target, delegated_inode); | 4112 | error = try_break_deleg(target, delegated_inode); |
4083 | if (error) | 4113 | if (error) |
4084 | goto out; | 4114 | goto out; |
4085 | } | 4115 | } |
4086 | error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); | 4116 | if (!flags) { |
4117 | error = old_dir->i_op->rename(old_dir, old_dentry, | ||
4118 | new_dir, new_dentry); | ||
4119 | } else { | ||
4120 | error = old_dir->i_op->rename2(old_dir, old_dentry, | ||
4121 | new_dir, new_dentry, flags); | ||
4122 | } | ||
4087 | if (error) | 4123 | if (error) |
4088 | goto out; | 4124 | goto out; |
4089 | 4125 | ||
4090 | if (target) | 4126 | if (!(flags & RENAME_EXCHANGE) && target) { |
4127 | if (is_dir) | ||
4128 | target->i_flags |= S_DEAD; | ||
4091 | dont_mount(new_dentry); | 4129 | dont_mount(new_dentry); |
4092 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) | 4130 | } |
4093 | d_move(old_dentry, new_dentry); | 4131 | if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE)) { |
4132 | if (!(flags & RENAME_EXCHANGE)) | ||
4133 | d_move(old_dentry, new_dentry); | ||
4134 | else | ||
4135 | d_exchange(old_dentry, new_dentry); | ||
4136 | } | ||
4094 | out: | 4137 | out: |
4095 | unlock_two_nondirectories(source, target); | 4138 | if (!is_dir || (flags & RENAME_EXCHANGE)) |
4139 | unlock_two_nondirectories(source, target); | ||
4140 | else if (target) | ||
4141 | mutex_unlock(&target->i_mutex); | ||
4096 | dput(new_dentry); | 4142 | dput(new_dentry); |
4097 | return error; | 4143 | if (!error) { |
4098 | } | ||
4099 | |||
4100 | /** | ||
4101 | * vfs_rename - rename a filesystem object | ||
4102 | * @old_dir: parent of source | ||
4103 | * @old_dentry: source | ||
4104 | * @new_dir: parent of destination | ||
4105 | * @new_dentry: destination | ||
4106 | * @delegated_inode: returns an inode needing a delegation break | ||
4107 | * | ||
4108 | * The caller must hold multiple mutexes--see lock_rename()). | ||
4109 | * | ||
4110 | * If vfs_rename discovers a delegation in need of breaking at either | ||
4111 | * the source or destination, it will return -EWOULDBLOCK and return a | ||
4112 | * reference to the inode in delegated_inode. The caller should then | ||
4113 | * break the delegation and retry. Because breaking a delegation may | ||
4114 | * take a long time, the caller should drop all locks before doing | ||
4115 | * so. | ||
4116 | * | ||
4117 | * Alternatively, a caller may pass NULL for delegated_inode. This may | ||
4118 | * be appropriate for callers that expect the underlying filesystem not | ||
4119 | * to be NFS exported. | ||
4120 | */ | ||
4121 | int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, | ||
4122 | struct inode *new_dir, struct dentry *new_dentry, | ||
4123 | struct inode **delegated_inode) | ||
4124 | { | ||
4125 | int error; | ||
4126 | int is_dir = d_is_directory(old_dentry) || d_is_autodir(old_dentry); | ||
4127 | const unsigned char *old_name; | ||
4128 | |||
4129 | if (old_dentry->d_inode == new_dentry->d_inode) | ||
4130 | return 0; | ||
4131 | |||
4132 | error = may_delete(old_dir, old_dentry, is_dir); | ||
4133 | if (error) | ||
4134 | return error; | ||
4135 | |||
4136 | if (!new_dentry->d_inode) | ||
4137 | error = may_create(new_dir, new_dentry); | ||
4138 | else | ||
4139 | error = may_delete(new_dir, new_dentry, is_dir); | ||
4140 | if (error) | ||
4141 | return error; | ||
4142 | |||
4143 | if (!old_dir->i_op->rename) | ||
4144 | return -EPERM; | ||
4145 | |||
4146 | old_name = fsnotify_oldname_init(old_dentry->d_name.name); | ||
4147 | |||
4148 | if (is_dir) | ||
4149 | error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry); | ||
4150 | else | ||
4151 | error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry,delegated_inode); | ||
4152 | if (!error) | ||
4153 | fsnotify_move(old_dir, new_dir, old_name, is_dir, | 4144 | fsnotify_move(old_dir, new_dir, old_name, is_dir, |
4154 | new_dentry->d_inode, old_dentry); | 4145 | !(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry); |
4146 | if (flags & RENAME_EXCHANGE) { | ||
4147 | fsnotify_move(new_dir, old_dir, old_dentry->d_name.name, | ||
4148 | new_is_dir, NULL, new_dentry); | ||
4149 | } | ||
4150 | } | ||
4155 | fsnotify_oldname_free(old_name); | 4151 | fsnotify_oldname_free(old_name); |
4156 | 4152 | ||
4157 | return error; | 4153 | return error; |
4158 | } | 4154 | } |
4159 | 4155 | ||
4160 | SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | 4156 | SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, |
4161 | int, newdfd, const char __user *, newname) | 4157 | int, newdfd, const char __user *, newname, unsigned int, flags) |
4162 | { | 4158 | { |
4163 | struct dentry *old_dir, *new_dir; | 4159 | struct dentry *old_dir, *new_dir; |
4164 | struct dentry *old_dentry, *new_dentry; | 4160 | struct dentry *old_dentry, *new_dentry; |
@@ -4170,6 +4166,13 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | |||
4170 | unsigned int lookup_flags = 0; | 4166 | unsigned int lookup_flags = 0; |
4171 | bool should_retry = false; | 4167 | bool should_retry = false; |
4172 | int error; | 4168 | int error; |
4169 | |||
4170 | if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) | ||
4171 | return -EINVAL; | ||
4172 | |||
4173 | if ((flags & RENAME_NOREPLACE) && (flags & RENAME_EXCHANGE)) | ||
4174 | return -EINVAL; | ||
4175 | |||
4173 | retry: | 4176 | retry: |
4174 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); | 4177 | from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); |
4175 | if (IS_ERR(from)) { | 4178 | if (IS_ERR(from)) { |
@@ -4193,6 +4196,8 @@ retry: | |||
4193 | goto exit2; | 4196 | goto exit2; |
4194 | 4197 | ||
4195 | new_dir = newnd.path.dentry; | 4198 | new_dir = newnd.path.dentry; |
4199 | if (flags & RENAME_NOREPLACE) | ||
4200 | error = -EEXIST; | ||
4196 | if (newnd.last_type != LAST_NORM) | 4201 | if (newnd.last_type != LAST_NORM) |
4197 | goto exit2; | 4202 | goto exit2; |
4198 | 4203 | ||
@@ -4202,7 +4207,8 @@ retry: | |||
4202 | 4207 | ||
4203 | oldnd.flags &= ~LOOKUP_PARENT; | 4208 | oldnd.flags &= ~LOOKUP_PARENT; |
4204 | newnd.flags &= ~LOOKUP_PARENT; | 4209 | newnd.flags &= ~LOOKUP_PARENT; |
4205 | newnd.flags |= LOOKUP_RENAME_TARGET; | 4210 | if (!(flags & RENAME_EXCHANGE)) |
4211 | newnd.flags |= LOOKUP_RENAME_TARGET; | ||
4206 | 4212 | ||
4207 | retry_deleg: | 4213 | retry_deleg: |
4208 | trap = lock_rename(new_dir, old_dir); | 4214 | trap = lock_rename(new_dir, old_dir); |
@@ -4215,34 +4221,49 @@ retry_deleg: | |||
4215 | error = -ENOENT; | 4221 | error = -ENOENT; |
4216 | if (d_is_negative(old_dentry)) | 4222 | if (d_is_negative(old_dentry)) |
4217 | goto exit4; | 4223 | goto exit4; |
4224 | new_dentry = lookup_hash(&newnd); | ||
4225 | error = PTR_ERR(new_dentry); | ||
4226 | if (IS_ERR(new_dentry)) | ||
4227 | goto exit4; | ||
4228 | error = -EEXIST; | ||
4229 | if ((flags & RENAME_NOREPLACE) && d_is_positive(new_dentry)) | ||
4230 | goto exit5; | ||
4231 | if (flags & RENAME_EXCHANGE) { | ||
4232 | error = -ENOENT; | ||
4233 | if (d_is_negative(new_dentry)) | ||
4234 | goto exit5; | ||
4235 | |||
4236 | if (!d_is_dir(new_dentry)) { | ||
4237 | error = -ENOTDIR; | ||
4238 | if (newnd.last.name[newnd.last.len]) | ||
4239 | goto exit5; | ||
4240 | } | ||
4241 | } | ||
4218 | /* unless the source is a directory trailing slashes give -ENOTDIR */ | 4242 | /* unless the source is a directory trailing slashes give -ENOTDIR */ |
4219 | if (!d_is_directory(old_dentry) && !d_is_autodir(old_dentry)) { | 4243 | if (!d_is_dir(old_dentry)) { |
4220 | error = -ENOTDIR; | 4244 | error = -ENOTDIR; |
4221 | if (oldnd.last.name[oldnd.last.len]) | 4245 | if (oldnd.last.name[oldnd.last.len]) |
4222 | goto exit4; | 4246 | goto exit5; |
4223 | if (newnd.last.name[newnd.last.len]) | 4247 | if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) |
4224 | goto exit4; | 4248 | goto exit5; |
4225 | } | 4249 | } |
4226 | /* source should not be ancestor of target */ | 4250 | /* source should not be ancestor of target */ |
4227 | error = -EINVAL; | 4251 | error = -EINVAL; |
4228 | if (old_dentry == trap) | 4252 | if (old_dentry == trap) |
4229 | goto exit4; | 4253 | goto exit5; |
4230 | new_dentry = lookup_hash(&newnd); | ||
4231 | error = PTR_ERR(new_dentry); | ||
4232 | if (IS_ERR(new_dentry)) | ||
4233 | goto exit4; | ||
4234 | /* target should not be an ancestor of source */ | 4254 | /* target should not be an ancestor of source */ |
4235 | error = -ENOTEMPTY; | 4255 | if (!(flags & RENAME_EXCHANGE)) |
4256 | error = -ENOTEMPTY; | ||
4236 | if (new_dentry == trap) | 4257 | if (new_dentry == trap) |
4237 | goto exit5; | 4258 | goto exit5; |
4238 | 4259 | ||
4239 | error = security_path_rename(&oldnd.path, old_dentry, | 4260 | error = security_path_rename(&oldnd.path, old_dentry, |
4240 | &newnd.path, new_dentry); | 4261 | &newnd.path, new_dentry, flags); |
4241 | if (error) | 4262 | if (error) |
4242 | goto exit5; | 4263 | goto exit5; |
4243 | error = vfs_rename(old_dir->d_inode, old_dentry, | 4264 | error = vfs_rename(old_dir->d_inode, old_dentry, |
4244 | new_dir->d_inode, new_dentry, | 4265 | new_dir->d_inode, new_dentry, |
4245 | &delegated_inode); | 4266 | &delegated_inode, flags); |
4246 | exit5: | 4267 | exit5: |
4247 | dput(new_dentry); | 4268 | dput(new_dentry); |
4248 | exit4: | 4269 | exit4: |
@@ -4272,9 +4293,15 @@ exit: | |||
4272 | return error; | 4293 | return error; |
4273 | } | 4294 | } |
4274 | 4295 | ||
4296 | SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, | ||
4297 | int, newdfd, const char __user *, newname) | ||
4298 | { | ||
4299 | return sys_renameat2(olddfd, oldname, newdfd, newname, 0); | ||
4300 | } | ||
4301 | |||
4275 | SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) | 4302 | SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newname) |
4276 | { | 4303 | { |
4277 | return sys_renameat(AT_FDCWD, oldname, AT_FDCWD, newname); | 4304 | return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0); |
4278 | } | 4305 | } |
4279 | 4306 | ||
4280 | int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) | 4307 | int vfs_readlink(struct dentry *dentry, char __user *buffer, int buflen, const char *link) |
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 2cf2ebecb55f..647d86d2db39 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c | |||
@@ -99,6 +99,7 @@ static void destroy_inodecache(void) | |||
99 | 99 | ||
100 | static int ncp_remount(struct super_block *sb, int *flags, char* data) | 100 | static int ncp_remount(struct super_block *sb, int *flags, char* data) |
101 | { | 101 | { |
102 | sync_filesystem(sb); | ||
102 | *flags |= MS_NODIRATIME; | 103 | *flags |= MS_NODIRATIME; |
103 | return 0; | 104 | return 0; |
104 | } | 105 | } |
@@ -296,7 +297,7 @@ ncp_iget(struct super_block *sb, struct ncp_entry_info *info) | |||
296 | static void | 297 | static void |
297 | ncp_evict_inode(struct inode *inode) | 298 | ncp_evict_inode(struct inode *inode) |
298 | { | 299 | { |
299 | truncate_inode_pages(&inode->i_data, 0); | 300 | truncate_inode_pages_final(&inode->i_data); |
300 | clear_inode(inode); | 301 | clear_inode(inode); |
301 | 302 | ||
302 | if (S_ISDIR(inode->i_mode)) { | 303 | if (S_ISDIR(inode->i_mode)) { |
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 56ff823ca82e..65d849bdf77a 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c | |||
@@ -1213,7 +1213,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx) | |||
1213 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); | 1213 | end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE); |
1214 | if (end != NFS_I(inode)->npages) { | 1214 | if (end != NFS_I(inode)->npages) { |
1215 | rcu_read_lock(); | 1215 | rcu_read_lock(); |
1216 | end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX); | 1216 | end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX); |
1217 | rcu_read_unlock(); | 1217 | rcu_read_unlock(); |
1218 | } | 1218 | } |
1219 | 1219 | ||
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index ae2e87b95453..41db5258e7a7 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c | |||
@@ -112,7 +112,8 @@ out: | |||
112 | * TODO: keep track of all layouts (and delegations) in a hash table | 112 | * TODO: keep track of all layouts (and delegations) in a hash table |
113 | * hashed by filehandle. | 113 | * hashed by filehandle. |
114 | */ | 114 | */ |
115 | static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, struct nfs_fh *fh) | 115 | static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, |
116 | struct nfs_fh *fh, nfs4_stateid *stateid) | ||
116 | { | 117 | { |
117 | struct nfs_server *server; | 118 | struct nfs_server *server; |
118 | struct inode *ino; | 119 | struct inode *ino; |
@@ -120,17 +121,19 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, | |||
120 | 121 | ||
121 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { | 122 | list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { |
122 | list_for_each_entry(lo, &server->layouts, plh_layouts) { | 123 | list_for_each_entry(lo, &server->layouts, plh_layouts) { |
124 | if (!nfs4_stateid_match_other(&lo->plh_stateid, stateid)) | ||
125 | continue; | ||
123 | if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) | 126 | if (nfs_compare_fh(fh, &NFS_I(lo->plh_inode)->fh)) |
124 | continue; | 127 | continue; |
125 | ino = igrab(lo->plh_inode); | 128 | ino = igrab(lo->plh_inode); |
126 | if (!ino) | 129 | if (!ino) |
127 | continue; | 130 | break; |
128 | spin_lock(&ino->i_lock); | 131 | spin_lock(&ino->i_lock); |
129 | /* Is this layout in the process of being freed? */ | 132 | /* Is this layout in the process of being freed? */ |
130 | if (NFS_I(ino)->layout != lo) { | 133 | if (NFS_I(ino)->layout != lo) { |
131 | spin_unlock(&ino->i_lock); | 134 | spin_unlock(&ino->i_lock); |
132 | iput(ino); | 135 | iput(ino); |
133 | continue; | 136 | break; |
134 | } | 137 | } |
135 | pnfs_get_layout_hdr(lo); | 138 | pnfs_get_layout_hdr(lo); |
136 | spin_unlock(&ino->i_lock); | 139 | spin_unlock(&ino->i_lock); |
@@ -141,13 +144,14 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp, | |||
141 | return NULL; | 144 | return NULL; |
142 | } | 145 | } |
143 | 146 | ||
144 | static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, struct nfs_fh *fh) | 147 | static struct pnfs_layout_hdr * get_layout_by_fh(struct nfs_client *clp, |
148 | struct nfs_fh *fh, nfs4_stateid *stateid) | ||
145 | { | 149 | { |
146 | struct pnfs_layout_hdr *lo; | 150 | struct pnfs_layout_hdr *lo; |
147 | 151 | ||
148 | spin_lock(&clp->cl_lock); | 152 | spin_lock(&clp->cl_lock); |
149 | rcu_read_lock(); | 153 | rcu_read_lock(); |
150 | lo = get_layout_by_fh_locked(clp, fh); | 154 | lo = get_layout_by_fh_locked(clp, fh, stateid); |
151 | rcu_read_unlock(); | 155 | rcu_read_unlock(); |
152 | spin_unlock(&clp->cl_lock); | 156 | spin_unlock(&clp->cl_lock); |
153 | 157 | ||
@@ -162,9 +166,9 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
162 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; | 166 | u32 rv = NFS4ERR_NOMATCHING_LAYOUT; |
163 | LIST_HEAD(free_me_list); | 167 | LIST_HEAD(free_me_list); |
164 | 168 | ||
165 | lo = get_layout_by_fh(clp, &args->cbl_fh); | 169 | lo = get_layout_by_fh(clp, &args->cbl_fh, &args->cbl_stateid); |
166 | if (!lo) | 170 | if (!lo) |
167 | return NFS4ERR_NOMATCHING_LAYOUT; | 171 | goto out; |
168 | 172 | ||
169 | ino = lo->plh_inode; | 173 | ino = lo->plh_inode; |
170 | spin_lock(&ino->i_lock); | 174 | spin_lock(&ino->i_lock); |
@@ -179,6 +183,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, | |||
179 | pnfs_free_lseg_list(&free_me_list); | 183 | pnfs_free_lseg_list(&free_me_list); |
180 | pnfs_put_layout_hdr(lo); | 184 | pnfs_put_layout_hdr(lo); |
181 | iput(ino); | 185 | iput(ino); |
186 | out: | ||
182 | return rv; | 187 | return rv; |
183 | } | 188 | } |
184 | 189 | ||
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4a48fe4b84b6..d9f3d067cd15 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
@@ -69,21 +69,28 @@ const struct address_space_operations nfs_dir_aops = { | |||
69 | 69 | ||
70 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) | 70 | static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred) |
71 | { | 71 | { |
72 | struct nfs_inode *nfsi = NFS_I(dir); | ||
72 | struct nfs_open_dir_context *ctx; | 73 | struct nfs_open_dir_context *ctx; |
73 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); | 74 | ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); |
74 | if (ctx != NULL) { | 75 | if (ctx != NULL) { |
75 | ctx->duped = 0; | 76 | ctx->duped = 0; |
76 | ctx->attr_gencount = NFS_I(dir)->attr_gencount; | 77 | ctx->attr_gencount = nfsi->attr_gencount; |
77 | ctx->dir_cookie = 0; | 78 | ctx->dir_cookie = 0; |
78 | ctx->dup_cookie = 0; | 79 | ctx->dup_cookie = 0; |
79 | ctx->cred = get_rpccred(cred); | 80 | ctx->cred = get_rpccred(cred); |
81 | spin_lock(&dir->i_lock); | ||
82 | list_add(&ctx->list, &nfsi->open_files); | ||
83 | spin_unlock(&dir->i_lock); | ||
80 | return ctx; | 84 | return ctx; |
81 | } | 85 | } |
82 | return ERR_PTR(-ENOMEM); | 86 | return ERR_PTR(-ENOMEM); |
83 | } | 87 | } |
84 | 88 | ||
85 | static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx) | 89 | static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx) |
86 | { | 90 | { |
91 | spin_lock(&dir->i_lock); | ||
92 | list_del(&ctx->list); | ||
93 | spin_unlock(&dir->i_lock); | ||
87 | put_rpccred(ctx->cred); | 94 | put_rpccred(ctx->cred); |
88 | kfree(ctx); | 95 | kfree(ctx); |
89 | } | 96 | } |
@@ -126,7 +133,7 @@ out: | |||
126 | static int | 133 | static int |
127 | nfs_closedir(struct inode *inode, struct file *filp) | 134 | nfs_closedir(struct inode *inode, struct file *filp) |
128 | { | 135 | { |
129 | put_nfs_open_dir_context(filp->private_data); | 136 | put_nfs_open_dir_context(filp->f_path.dentry->d_inode, filp->private_data); |
130 | return 0; | 137 | return 0; |
131 | } | 138 | } |
132 | 139 | ||
@@ -306,10 +313,9 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
306 | if (printk_ratelimit()) { | 313 | if (printk_ratelimit()) { |
307 | pr_notice("NFS: directory %pD2 contains a readdir loop." | 314 | pr_notice("NFS: directory %pD2 contains a readdir loop." |
308 | "Please contact your server vendor. " | 315 | "Please contact your server vendor. " |
309 | "The file: %s has duplicate cookie %llu\n", | 316 | "The file: %.*s has duplicate cookie %llu\n", |
310 | desc->file, | 317 | desc->file, array->array[i].string.len, |
311 | array->array[i].string.name, | 318 | array->array[i].string.name, *desc->dir_cookie); |
312 | *desc->dir_cookie); | ||
313 | } | 319 | } |
314 | status = -ELOOP; | 320 | status = -ELOOP; |
315 | goto out; | 321 | goto out; |
@@ -437,6 +443,22 @@ void nfs_advise_use_readdirplus(struct inode *dir) | |||
437 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); | 443 | set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags); |
438 | } | 444 | } |
439 | 445 | ||
446 | /* | ||
447 | * This function is mainly for use by nfs_getattr(). | ||
448 | * | ||
449 | * If this is an 'ls -l', we want to force use of readdirplus. | ||
450 | * Do this by checking if there is an active file descriptor | ||
451 | * and calling nfs_advise_use_readdirplus, then forcing a | ||
452 | * cache flush. | ||
453 | */ | ||
454 | void nfs_force_use_readdirplus(struct inode *dir) | ||
455 | { | ||
456 | if (!list_empty(&NFS_I(dir)->open_files)) { | ||
457 | nfs_advise_use_readdirplus(dir); | ||
458 | nfs_zap_mapping(dir, dir->i_mapping); | ||
459 | } | ||
460 | } | ||
461 | |||
440 | static | 462 | static |
441 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) | 463 | void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry) |
442 | { | 464 | { |
@@ -815,6 +837,17 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) | |||
815 | goto out; | 837 | goto out; |
816 | } | 838 | } |
817 | 839 | ||
840 | static bool nfs_dir_mapping_need_revalidate(struct inode *dir) | ||
841 | { | ||
842 | struct nfs_inode *nfsi = NFS_I(dir); | ||
843 | |||
844 | if (nfs_attribute_cache_expired(dir)) | ||
845 | return true; | ||
846 | if (nfsi->cache_validity & NFS_INO_INVALID_DATA) | ||
847 | return true; | ||
848 | return false; | ||
849 | } | ||
850 | |||
818 | /* The file offset position represents the dirent entry number. A | 851 | /* The file offset position represents the dirent entry number. A |
819 | last cookie cache takes care of the common case of reading the | 852 | last cookie cache takes care of the common case of reading the |
820 | whole directory. | 853 | whole directory. |
@@ -847,7 +880,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) | |||
847 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; | 880 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; |
848 | 881 | ||
849 | nfs_block_sillyrename(dentry); | 882 | nfs_block_sillyrename(dentry); |
850 | if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) | 883 | if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode)) |
851 | res = nfs_revalidate_mapping(inode, file->f_mapping); | 884 | res = nfs_revalidate_mapping(inode, file->f_mapping); |
852 | if (res < 0) | 885 | if (res < 0) |
853 | goto out; | 886 | goto out; |
@@ -1911,6 +1944,7 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1911 | struct inode *old_inode = old_dentry->d_inode; | 1944 | struct inode *old_inode = old_dentry->d_inode; |
1912 | struct inode *new_inode = new_dentry->d_inode; | 1945 | struct inode *new_inode = new_dentry->d_inode; |
1913 | struct dentry *dentry = NULL, *rehash = NULL; | 1946 | struct dentry *dentry = NULL, *rehash = NULL; |
1947 | struct rpc_task *task; | ||
1914 | int error = -EBUSY; | 1948 | int error = -EBUSY; |
1915 | 1949 | ||
1916 | dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", | 1950 | dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n", |
@@ -1958,8 +1992,16 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
1958 | if (new_inode != NULL) | 1992 | if (new_inode != NULL) |
1959 | NFS_PROTO(new_inode)->return_delegation(new_inode); | 1993 | NFS_PROTO(new_inode)->return_delegation(new_inode); |
1960 | 1994 | ||
1961 | error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, | 1995 | task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL); |
1962 | new_dir, &new_dentry->d_name); | 1996 | if (IS_ERR(task)) { |
1997 | error = PTR_ERR(task); | ||
1998 | goto out; | ||
1999 | } | ||
2000 | |||
2001 | error = rpc_wait_for_completion_task(task); | ||
2002 | if (error == 0) | ||
2003 | error = task->tk_status; | ||
2004 | rpc_put_task(task); | ||
1963 | nfs_mark_for_revalidate(old_inode); | 2005 | nfs_mark_for_revalidate(old_inode); |
1964 | out: | 2006 | out: |
1965 | if (rehash) | 2007 | if (rehash) |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 5bb790a69c71..284ca901fe16 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
@@ -617,6 +617,7 @@ out: | |||
617 | 617 | ||
618 | static const struct vm_operations_struct nfs_file_vm_ops = { | 618 | static const struct vm_operations_struct nfs_file_vm_ops = { |
619 | .fault = filemap_fault, | 619 | .fault = filemap_fault, |
620 | .map_pages = filemap_map_pages, | ||
620 | .page_mkwrite = nfs_vm_page_mkwrite, | 621 | .page_mkwrite = nfs_vm_page_mkwrite, |
621 | .remap_pages = generic_file_remap_pages, | 622 | .remap_pages = generic_file_remap_pages, |
622 | }; | 623 | }; |
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 360114ae8b82..0c438973f3c8 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
@@ -128,7 +128,7 @@ EXPORT_SYMBOL_GPL(nfs_clear_inode); | |||
128 | 128 | ||
129 | void nfs_evict_inode(struct inode *inode) | 129 | void nfs_evict_inode(struct inode *inode) |
130 | { | 130 | { |
131 | truncate_inode_pages(&inode->i_data, 0); | 131 | truncate_inode_pages_final(&inode->i_data); |
132 | clear_inode(inode); | 132 | clear_inode(inode); |
133 | nfs_clear_inode(inode); | 133 | nfs_clear_inode(inode); |
134 | } | 134 | } |
@@ -588,6 +588,25 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
588 | } | 588 | } |
589 | EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); | 589 | EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); |
590 | 590 | ||
591 | static void nfs_request_parent_use_readdirplus(struct dentry *dentry) | ||
592 | { | ||
593 | struct dentry *parent; | ||
594 | |||
595 | parent = dget_parent(dentry); | ||
596 | nfs_force_use_readdirplus(parent->d_inode); | ||
597 | dput(parent); | ||
598 | } | ||
599 | |||
600 | static bool nfs_need_revalidate_inode(struct inode *inode) | ||
601 | { | ||
602 | if (NFS_I(inode)->cache_validity & | ||
603 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
604 | return true; | ||
605 | if (nfs_attribute_cache_expired(inode)) | ||
606 | return true; | ||
607 | return false; | ||
608 | } | ||
609 | |||
591 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 610 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
592 | { | 611 | { |
593 | struct inode *inode = dentry->d_inode; | 612 | struct inode *inode = dentry->d_inode; |
@@ -616,10 +635,13 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | |||
616 | ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) | 635 | ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) |
617 | need_atime = 0; | 636 | need_atime = 0; |
618 | 637 | ||
619 | if (need_atime) | 638 | if (need_atime || nfs_need_revalidate_inode(inode)) { |
620 | err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); | 639 | struct nfs_server *server = NFS_SERVER(inode); |
621 | else | 640 | |
622 | err = nfs_revalidate_inode(NFS_SERVER(inode), inode); | 641 | if (server->caps & NFS_CAP_READDIRPLUS) |
642 | nfs_request_parent_use_readdirplus(dentry); | ||
643 | err = __nfs_revalidate_inode(server, inode); | ||
644 | } | ||
623 | if (!err) { | 645 | if (!err) { |
624 | generic_fillattr(inode, stat); | 646 | generic_fillattr(inode, stat); |
625 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); | 647 | stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); |
@@ -961,9 +983,7 @@ int nfs_attribute_cache_expired(struct inode *inode) | |||
961 | */ | 983 | */ |
962 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | 984 | int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) |
963 | { | 985 | { |
964 | if (!(NFS_I(inode)->cache_validity & | 986 | if (!nfs_need_revalidate_inode(inode)) |
965 | (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) | ||
966 | && !nfs_attribute_cache_expired(inode)) | ||
967 | return NFS_STALE(inode) ? -ESTALE : 0; | 987 | return NFS_STALE(inode) ? -ESTALE : 0; |
968 | return __nfs_revalidate_inode(server, inode); | 988 | return __nfs_revalidate_inode(server, inode); |
969 | } | 989 | } |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b46cf5a67329..dd8bfc2e2464 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
@@ -301,6 +301,7 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp, | |||
301 | const char *ip_addr); | 301 | const char *ip_addr); |
302 | 302 | ||
303 | /* dir.c */ | 303 | /* dir.c */ |
304 | extern void nfs_force_use_readdirplus(struct inode *dir); | ||
304 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, | 305 | extern unsigned long nfs_access_cache_count(struct shrinker *shrink, |
305 | struct shrink_control *sc); | 306 | struct shrink_control *sc); |
306 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, | 307 | extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, |
@@ -474,6 +475,13 @@ extern int nfs_migrate_page(struct address_space *, | |||
474 | #define nfs_migrate_page NULL | 475 | #define nfs_migrate_page NULL |
475 | #endif | 476 | #endif |
476 | 477 | ||
478 | /* unlink.c */ | ||
479 | extern struct rpc_task * | ||
480 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | ||
481 | struct dentry *old_dentry, struct dentry *new_dentry, | ||
482 | void (*complete)(struct rpc_task *, struct nfs_renamedata *)); | ||
483 | extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); | ||
484 | |||
477 | /* direct.c */ | 485 | /* direct.c */ |
478 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, | 486 | void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, |
479 | struct nfs_direct_req *dreq); | 487 | struct nfs_direct_req *dreq); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index a462ef0fb5d6..db60149c4579 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
@@ -479,41 +479,6 @@ nfs3_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
479 | } | 479 | } |
480 | 480 | ||
481 | static int | 481 | static int |
482 | nfs3_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
483 | struct inode *new_dir, struct qstr *new_name) | ||
484 | { | ||
485 | struct nfs_renameargs arg = { | ||
486 | .old_dir = NFS_FH(old_dir), | ||
487 | .old_name = old_name, | ||
488 | .new_dir = NFS_FH(new_dir), | ||
489 | .new_name = new_name, | ||
490 | }; | ||
491 | struct nfs_renameres res; | ||
492 | struct rpc_message msg = { | ||
493 | .rpc_proc = &nfs3_procedures[NFS3PROC_RENAME], | ||
494 | .rpc_argp = &arg, | ||
495 | .rpc_resp = &res, | ||
496 | }; | ||
497 | int status = -ENOMEM; | ||
498 | |||
499 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); | ||
500 | |||
501 | res.old_fattr = nfs_alloc_fattr(); | ||
502 | res.new_fattr = nfs_alloc_fattr(); | ||
503 | if (res.old_fattr == NULL || res.new_fattr == NULL) | ||
504 | goto out; | ||
505 | |||
506 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); | ||
507 | nfs_post_op_update_inode(old_dir, res.old_fattr); | ||
508 | nfs_post_op_update_inode(new_dir, res.new_fattr); | ||
509 | out: | ||
510 | nfs_free_fattr(res.old_fattr); | ||
511 | nfs_free_fattr(res.new_fattr); | ||
512 | dprintk("NFS reply rename: %d\n", status); | ||
513 | return status; | ||
514 | } | ||
515 | |||
516 | static int | ||
517 | nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 482 | nfs3_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
518 | { | 483 | { |
519 | struct nfs3_linkargs arg = { | 484 | struct nfs3_linkargs arg = { |
@@ -968,7 +933,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = { | |||
968 | .unlink_setup = nfs3_proc_unlink_setup, | 933 | .unlink_setup = nfs3_proc_unlink_setup, |
969 | .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, | 934 | .unlink_rpc_prepare = nfs3_proc_unlink_rpc_prepare, |
970 | .unlink_done = nfs3_proc_unlink_done, | 935 | .unlink_done = nfs3_proc_unlink_done, |
971 | .rename = nfs3_proc_rename, | ||
972 | .rename_setup = nfs3_proc_rename_setup, | 936 | .rename_setup = nfs3_proc_rename_setup, |
973 | .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, | 937 | .rename_rpc_prepare = nfs3_proc_rename_rpc_prepare, |
974 | .rename_done = nfs3_proc_rename_done, | 938 | .rename_done = nfs3_proc_rename_done, |
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index a5b27c2d9689..e1d1badbe53c 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h | |||
@@ -427,6 +427,7 @@ extern void nfs4_close_sync(struct nfs4_state *, fmode_t); | |||
427 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); | 427 | extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); |
428 | extern void nfs_inode_find_state_and_recover(struct inode *inode, | 428 | extern void nfs_inode_find_state_and_recover(struct inode *inode, |
429 | const nfs4_stateid *stateid); | 429 | const nfs4_stateid *stateid); |
430 | extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *, struct nfs4_state *); | ||
430 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); | 431 | extern void nfs4_schedule_lease_recovery(struct nfs_client *); |
431 | extern int nfs4_wait_clnt_recover(struct nfs_client *clp); | 432 | extern int nfs4_wait_clnt_recover(struct nfs_client *clp); |
432 | extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); | 433 | extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); |
@@ -500,6 +501,16 @@ static inline bool nfs4_stateid_match(const nfs4_stateid *dst, const nfs4_statei | |||
500 | return memcmp(dst, src, sizeof(*dst)) == 0; | 501 | return memcmp(dst, src, sizeof(*dst)) == 0; |
501 | } | 502 | } |
502 | 503 | ||
504 | static inline bool nfs4_stateid_match_other(const nfs4_stateid *dst, const nfs4_stateid *src) | ||
505 | { | ||
506 | return memcmp(dst->other, src->other, NFS4_STATEID_OTHER_SIZE) == 0; | ||
507 | } | ||
508 | |||
509 | static inline bool nfs4_stateid_is_newer(const nfs4_stateid *s1, const nfs4_stateid *s2) | ||
510 | { | ||
511 | return (s32)(be32_to_cpu(s1->seqid) - be32_to_cpu(s2->seqid)) > 0; | ||
512 | } | ||
513 | |||
503 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) | 514 | static inline bool nfs4_valid_open_stateid(const struct nfs4_state *state) |
504 | { | 515 | { |
505 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; | 516 | return test_bit(NFS_STATE_RECOVERY_FAILED, &state->flags) == 0; |
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 0e46d3d1b6cc..aa9ef4876046 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c | |||
@@ -531,6 +531,13 @@ int nfs40_walk_client_list(struct nfs_client *new, | |||
531 | *result = pos; | 531 | *result = pos; |
532 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", | 532 | dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", |
533 | __func__, pos, atomic_read(&pos->cl_count)); | 533 | __func__, pos, atomic_read(&pos->cl_count)); |
534 | goto out; | ||
535 | case -ERESTARTSYS: | ||
536 | case -ETIMEDOUT: | ||
537 | /* The callback path may have been inadvertently | ||
538 | * changed. Schedule recovery! | ||
539 | */ | ||
540 | nfs4_schedule_path_down_recovery(pos); | ||
534 | default: | 541 | default: |
535 | goto out; | 542 | goto out; |
536 | } | 543 | } |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 450bfedbe2f4..397be39c6dc8 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -1068,6 +1068,7 @@ static void nfs4_opendata_free(struct kref *kref) | |||
1068 | dput(p->dentry); | 1068 | dput(p->dentry); |
1069 | nfs_sb_deactive(sb); | 1069 | nfs_sb_deactive(sb); |
1070 | nfs_fattr_free_names(&p->f_attr); | 1070 | nfs_fattr_free_names(&p->f_attr); |
1071 | kfree(p->f_attr.mdsthreshold); | ||
1071 | kfree(p); | 1072 | kfree(p); |
1072 | } | 1073 | } |
1073 | 1074 | ||
@@ -1137,12 +1138,71 @@ static void update_open_stateflags(struct nfs4_state *state, fmode_t fmode) | |||
1137 | nfs4_state_set_mode_locked(state, state->state | fmode); | 1138 | nfs4_state_set_mode_locked(state, state->state | fmode); |
1138 | } | 1139 | } |
1139 | 1140 | ||
1140 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | 1141 | static void nfs_test_and_clear_all_open_stateid(struct nfs4_state *state) |
1142 | { | ||
1143 | struct nfs_client *clp = state->owner->so_server->nfs_client; | ||
1144 | bool need_recover = false; | ||
1145 | |||
1146 | if (test_and_clear_bit(NFS_O_RDONLY_STATE, &state->flags) && state->n_rdonly) | ||
1147 | need_recover = true; | ||
1148 | if (test_and_clear_bit(NFS_O_WRONLY_STATE, &state->flags) && state->n_wronly) | ||
1149 | need_recover = true; | ||
1150 | if (test_and_clear_bit(NFS_O_RDWR_STATE, &state->flags) && state->n_rdwr) | ||
1151 | need_recover = true; | ||
1152 | if (need_recover) | ||
1153 | nfs4_state_mark_reclaim_nograce(clp, state); | ||
1154 | } | ||
1155 | |||
1156 | static bool nfs_need_update_open_stateid(struct nfs4_state *state, | ||
1157 | nfs4_stateid *stateid) | ||
1158 | { | ||
1159 | if (test_and_set_bit(NFS_OPEN_STATE, &state->flags) == 0) | ||
1160 | return true; | ||
1161 | if (!nfs4_stateid_match_other(stateid, &state->open_stateid)) { | ||
1162 | nfs_test_and_clear_all_open_stateid(state); | ||
1163 | return true; | ||
1164 | } | ||
1165 | if (nfs4_stateid_is_newer(stateid, &state->open_stateid)) | ||
1166 | return true; | ||
1167 | return false; | ||
1168 | } | ||
1169 | |||
1170 | static void nfs_clear_open_stateid_locked(struct nfs4_state *state, | ||
1171 | nfs4_stateid *stateid, fmode_t fmode) | ||
1141 | { | 1172 | { |
1173 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
1174 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | ||
1175 | case FMODE_WRITE: | ||
1176 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1177 | break; | ||
1178 | case FMODE_READ: | ||
1179 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1180 | break; | ||
1181 | case 0: | ||
1182 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1183 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1184 | clear_bit(NFS_OPEN_STATE, &state->flags); | ||
1185 | } | ||
1186 | if (stateid == NULL) | ||
1187 | return; | ||
1188 | if (!nfs_need_update_open_stateid(state, stateid)) | ||
1189 | return; | ||
1142 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) | 1190 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
1143 | nfs4_stateid_copy(&state->stateid, stateid); | 1191 | nfs4_stateid_copy(&state->stateid, stateid); |
1144 | nfs4_stateid_copy(&state->open_stateid, stateid); | 1192 | nfs4_stateid_copy(&state->open_stateid, stateid); |
1145 | set_bit(NFS_OPEN_STATE, &state->flags); | 1193 | } |
1194 | |||
1195 | static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | ||
1196 | { | ||
1197 | write_seqlock(&state->seqlock); | ||
1198 | nfs_clear_open_stateid_locked(state, stateid, fmode); | ||
1199 | write_sequnlock(&state->seqlock); | ||
1200 | if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) | ||
1201 | nfs4_schedule_state_manager(state->owner->so_server->nfs_client); | ||
1202 | } | ||
1203 | |||
1204 | static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | ||
1205 | { | ||
1146 | switch (fmode) { | 1206 | switch (fmode) { |
1147 | case FMODE_READ: | 1207 | case FMODE_READ: |
1148 | set_bit(NFS_O_RDONLY_STATE, &state->flags); | 1208 | set_bit(NFS_O_RDONLY_STATE, &state->flags); |
@@ -1153,13 +1213,11 @@ static void nfs_set_open_stateid_locked(struct nfs4_state *state, nfs4_stateid * | |||
1153 | case FMODE_READ|FMODE_WRITE: | 1213 | case FMODE_READ|FMODE_WRITE: |
1154 | set_bit(NFS_O_RDWR_STATE, &state->flags); | 1214 | set_bit(NFS_O_RDWR_STATE, &state->flags); |
1155 | } | 1215 | } |
1156 | } | 1216 | if (!nfs_need_update_open_stateid(state, stateid)) |
1157 | 1217 | return; | |
1158 | static void nfs_set_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode) | 1218 | if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) |
1159 | { | 1219 | nfs4_stateid_copy(&state->stateid, stateid); |
1160 | write_seqlock(&state->seqlock); | 1220 | nfs4_stateid_copy(&state->open_stateid, stateid); |
1161 | nfs_set_open_stateid_locked(state, stateid, fmode); | ||
1162 | write_sequnlock(&state->seqlock); | ||
1163 | } | 1221 | } |
1164 | 1222 | ||
1165 | static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) | 1223 | static void __update_open_stateid(struct nfs4_state *state, nfs4_stateid *open_stateid, const nfs4_stateid *deleg_stateid, fmode_t fmode) |
@@ -1217,6 +1275,8 @@ no_delegation: | |||
1217 | __update_open_stateid(state, open_stateid, NULL, fmode); | 1275 | __update_open_stateid(state, open_stateid, NULL, fmode); |
1218 | ret = 1; | 1276 | ret = 1; |
1219 | } | 1277 | } |
1278 | if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags)) | ||
1279 | nfs4_schedule_state_manager(state->owner->so_server->nfs_client); | ||
1220 | 1280 | ||
1221 | return ret; | 1281 | return ret; |
1222 | } | 1282 | } |
@@ -1450,12 +1510,15 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1450 | struct nfs4_state *newstate; | 1510 | struct nfs4_state *newstate; |
1451 | int ret; | 1511 | int ret; |
1452 | 1512 | ||
1513 | /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */ | ||
1514 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
1515 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1516 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1453 | /* memory barrier prior to reading state->n_* */ | 1517 | /* memory barrier prior to reading state->n_* */ |
1454 | clear_bit(NFS_DELEGATED_STATE, &state->flags); | 1518 | clear_bit(NFS_DELEGATED_STATE, &state->flags); |
1455 | clear_bit(NFS_OPEN_STATE, &state->flags); | 1519 | clear_bit(NFS_OPEN_STATE, &state->flags); |
1456 | smp_rmb(); | 1520 | smp_rmb(); |
1457 | if (state->n_rdwr != 0) { | 1521 | if (state->n_rdwr != 0) { |
1458 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
1459 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); | 1522 | ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE, &newstate); |
1460 | if (ret != 0) | 1523 | if (ret != 0) |
1461 | return ret; | 1524 | return ret; |
@@ -1463,7 +1526,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1463 | return -ESTALE; | 1526 | return -ESTALE; |
1464 | } | 1527 | } |
1465 | if (state->n_wronly != 0) { | 1528 | if (state->n_wronly != 0) { |
1466 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
1467 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); | 1529 | ret = nfs4_open_recover_helper(opendata, FMODE_WRITE, &newstate); |
1468 | if (ret != 0) | 1530 | if (ret != 0) |
1469 | return ret; | 1531 | return ret; |
@@ -1471,7 +1533,6 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state * | |||
1471 | return -ESTALE; | 1533 | return -ESTALE; |
1472 | } | 1534 | } |
1473 | if (state->n_rdonly != 0) { | 1535 | if (state->n_rdonly != 0) { |
1474 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
1475 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); | 1536 | ret = nfs4_open_recover_helper(opendata, FMODE_READ, &newstate); |
1476 | if (ret != 0) | 1537 | if (ret != 0) |
1477 | return ret; | 1538 | return ret; |
@@ -2244,10 +2305,12 @@ static int _nfs4_do_open(struct inode *dir, | |||
2244 | } | 2305 | } |
2245 | } | 2306 | } |
2246 | 2307 | ||
2247 | if (ctx_th && server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { | 2308 | if (server->attr_bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD) { |
2248 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); | 2309 | if (!opendata->f_attr.mdsthreshold) { |
2249 | if (!opendata->f_attr.mdsthreshold) | 2310 | opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc(); |
2250 | goto err_free_label; | 2311 | if (!opendata->f_attr.mdsthreshold) |
2312 | goto err_free_label; | ||
2313 | } | ||
2251 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; | 2314 | opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0]; |
2252 | } | 2315 | } |
2253 | if (dentry->d_inode != NULL) | 2316 | if (dentry->d_inode != NULL) |
@@ -2275,11 +2338,10 @@ static int _nfs4_do_open(struct inode *dir, | |||
2275 | if (opendata->file_created) | 2338 | if (opendata->file_created) |
2276 | *opened |= FILE_CREATED; | 2339 | *opened |= FILE_CREATED; |
2277 | 2340 | ||
2278 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) | 2341 | if (pnfs_use_threshold(ctx_th, opendata->f_attr.mdsthreshold, server)) { |
2279 | *ctx_th = opendata->f_attr.mdsthreshold; | 2342 | *ctx_th = opendata->f_attr.mdsthreshold; |
2280 | else | 2343 | opendata->f_attr.mdsthreshold = NULL; |
2281 | kfree(opendata->f_attr.mdsthreshold); | 2344 | } |
2282 | opendata->f_attr.mdsthreshold = NULL; | ||
2283 | 2345 | ||
2284 | nfs4_label_free(olabel); | 2346 | nfs4_label_free(olabel); |
2285 | 2347 | ||
@@ -2289,7 +2351,6 @@ static int _nfs4_do_open(struct inode *dir, | |||
2289 | err_free_label: | 2351 | err_free_label: |
2290 | nfs4_label_free(olabel); | 2352 | nfs4_label_free(olabel); |
2291 | err_opendata_put: | 2353 | err_opendata_put: |
2292 | kfree(opendata->f_attr.mdsthreshold); | ||
2293 | nfs4_opendata_put(opendata); | 2354 | nfs4_opendata_put(opendata); |
2294 | err_put_state_owner: | 2355 | err_put_state_owner: |
2295 | nfs4_put_state_owner(sp); | 2356 | nfs4_put_state_owner(sp); |
@@ -2479,26 +2540,6 @@ static void nfs4_free_closedata(void *data) | |||
2479 | kfree(calldata); | 2540 | kfree(calldata); |
2480 | } | 2541 | } |
2481 | 2542 | ||
2482 | static void nfs4_close_clear_stateid_flags(struct nfs4_state *state, | ||
2483 | fmode_t fmode) | ||
2484 | { | ||
2485 | spin_lock(&state->owner->so_lock); | ||
2486 | clear_bit(NFS_O_RDWR_STATE, &state->flags); | ||
2487 | switch (fmode & (FMODE_READ|FMODE_WRITE)) { | ||
2488 | case FMODE_WRITE: | ||
2489 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
2490 | break; | ||
2491 | case FMODE_READ: | ||
2492 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
2493 | break; | ||
2494 | case 0: | ||
2495 | clear_bit(NFS_O_RDONLY_STATE, &state->flags); | ||
2496 | clear_bit(NFS_O_WRONLY_STATE, &state->flags); | ||
2497 | clear_bit(NFS_OPEN_STATE, &state->flags); | ||
2498 | } | ||
2499 | spin_unlock(&state->owner->so_lock); | ||
2500 | } | ||
2501 | |||
2502 | static void nfs4_close_done(struct rpc_task *task, void *data) | 2543 | static void nfs4_close_done(struct rpc_task *task, void *data) |
2503 | { | 2544 | { |
2504 | struct nfs4_closedata *calldata = data; | 2545 | struct nfs4_closedata *calldata = data; |
@@ -2517,9 +2558,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2517 | if (calldata->roc) | 2558 | if (calldata->roc) |
2518 | pnfs_roc_set_barrier(state->inode, | 2559 | pnfs_roc_set_barrier(state->inode, |
2519 | calldata->roc_barrier); | 2560 | calldata->roc_barrier); |
2520 | nfs_set_open_stateid(state, &calldata->res.stateid, 0); | 2561 | nfs_clear_open_stateid(state, &calldata->res.stateid, 0); |
2521 | renew_lease(server, calldata->timestamp); | 2562 | renew_lease(server, calldata->timestamp); |
2522 | break; | 2563 | goto out_release; |
2523 | case -NFS4ERR_ADMIN_REVOKED: | 2564 | case -NFS4ERR_ADMIN_REVOKED: |
2524 | case -NFS4ERR_STALE_STATEID: | 2565 | case -NFS4ERR_STALE_STATEID: |
2525 | case -NFS4ERR_OLD_STATEID: | 2566 | case -NFS4ERR_OLD_STATEID: |
@@ -2533,7 +2574,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data) | |||
2533 | goto out_release; | 2574 | goto out_release; |
2534 | } | 2575 | } |
2535 | } | 2576 | } |
2536 | nfs4_close_clear_stateid_flags(state, calldata->arg.fmode); | 2577 | nfs_clear_open_stateid(state, NULL, calldata->arg.fmode); |
2537 | out_release: | 2578 | out_release: |
2538 | nfs_release_seqid(calldata->arg.seqid); | 2579 | nfs_release_seqid(calldata->arg.seqid); |
2539 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); | 2580 | nfs_refresh_inode(calldata->inode, calldata->res.fattr); |
@@ -3507,49 +3548,6 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
3507 | return 1; | 3548 | return 1; |
3508 | } | 3549 | } |
3509 | 3550 | ||
3510 | static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
3511 | struct inode *new_dir, struct qstr *new_name) | ||
3512 | { | ||
3513 | struct nfs_server *server = NFS_SERVER(old_dir); | ||
3514 | struct nfs_renameargs arg = { | ||
3515 | .old_dir = NFS_FH(old_dir), | ||
3516 | .new_dir = NFS_FH(new_dir), | ||
3517 | .old_name = old_name, | ||
3518 | .new_name = new_name, | ||
3519 | }; | ||
3520 | struct nfs_renameres res = { | ||
3521 | .server = server, | ||
3522 | }; | ||
3523 | struct rpc_message msg = { | ||
3524 | .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], | ||
3525 | .rpc_argp = &arg, | ||
3526 | .rpc_resp = &res, | ||
3527 | }; | ||
3528 | int status = -ENOMEM; | ||
3529 | |||
3530 | status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); | ||
3531 | if (!status) { | ||
3532 | update_changeattr(old_dir, &res.old_cinfo); | ||
3533 | update_changeattr(new_dir, &res.new_cinfo); | ||
3534 | } | ||
3535 | return status; | ||
3536 | } | ||
3537 | |||
3538 | static int nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
3539 | struct inode *new_dir, struct qstr *new_name) | ||
3540 | { | ||
3541 | struct nfs4_exception exception = { }; | ||
3542 | int err; | ||
3543 | do { | ||
3544 | err = _nfs4_proc_rename(old_dir, old_name, | ||
3545 | new_dir, new_name); | ||
3546 | trace_nfs4_rename(old_dir, old_name, new_dir, new_name, err); | ||
3547 | err = nfs4_handle_exception(NFS_SERVER(old_dir), err, | ||
3548 | &exception); | ||
3549 | } while (exception.retry); | ||
3550 | return err; | ||
3551 | } | ||
3552 | |||
3553 | static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 3551 | static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
3554 | { | 3552 | { |
3555 | struct nfs_server *server = NFS_SERVER(inode); | 3553 | struct nfs_server *server = NFS_SERVER(inode); |
@@ -4884,6 +4882,20 @@ nfs4_init_uniform_client_string(const struct nfs_client *clp, | |||
4884 | nodename); | 4882 | nodename); |
4885 | } | 4883 | } |
4886 | 4884 | ||
4885 | /* | ||
4886 | * nfs4_callback_up_net() starts only "tcp" and "tcp6" callback | ||
4887 | * services. Advertise one based on the address family of the | ||
4888 | * clientaddr. | ||
4889 | */ | ||
4890 | static unsigned int | ||
4891 | nfs4_init_callback_netid(const struct nfs_client *clp, char *buf, size_t len) | ||
4892 | { | ||
4893 | if (strchr(clp->cl_ipaddr, ':') != NULL) | ||
4894 | return scnprintf(buf, len, "tcp6"); | ||
4895 | else | ||
4896 | return scnprintf(buf, len, "tcp"); | ||
4897 | } | ||
4898 | |||
4887 | /** | 4899 | /** |
4888 | * nfs4_proc_setclientid - Negotiate client ID | 4900 | * nfs4_proc_setclientid - Negotiate client ID |
4889 | * @clp: state data structure | 4901 | * @clp: state data structure |
@@ -4925,12 +4937,10 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, | |||
4925 | setclientid.sc_name, | 4937 | setclientid.sc_name, |
4926 | sizeof(setclientid.sc_name)); | 4938 | sizeof(setclientid.sc_name)); |
4927 | /* cb_client4 */ | 4939 | /* cb_client4 */ |
4928 | rcu_read_lock(); | 4940 | setclientid.sc_netid_len = |
4929 | setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, | 4941 | nfs4_init_callback_netid(clp, |
4930 | sizeof(setclientid.sc_netid), "%s", | 4942 | setclientid.sc_netid, |
4931 | rpc_peeraddr2str(clp->cl_rpcclient, | 4943 | sizeof(setclientid.sc_netid)); |
4932 | RPC_DISPLAY_NETID)); | ||
4933 | rcu_read_unlock(); | ||
4934 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, | 4944 | setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr, |
4935 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", | 4945 | sizeof(setclientid.sc_uaddr), "%s.%u.%u", |
4936 | clp->cl_ipaddr, port >> 8, port & 255); | 4946 | clp->cl_ipaddr, port >> 8, port & 255); |
@@ -8408,7 +8418,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = { | |||
8408 | .unlink_setup = nfs4_proc_unlink_setup, | 8418 | .unlink_setup = nfs4_proc_unlink_setup, |
8409 | .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, | 8419 | .unlink_rpc_prepare = nfs4_proc_unlink_rpc_prepare, |
8410 | .unlink_done = nfs4_proc_unlink_done, | 8420 | .unlink_done = nfs4_proc_unlink_done, |
8411 | .rename = nfs4_proc_rename, | ||
8412 | .rename_setup = nfs4_proc_rename_setup, | 8421 | .rename_setup = nfs4_proc_rename_setup, |
8413 | .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, | 8422 | .rename_rpc_prepare = nfs4_proc_rename_rpc_prepare, |
8414 | .rename_done = nfs4_proc_rename_done, | 8423 | .rename_done = nfs4_proc_rename_done, |
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0deb32105ccf..2349518eef2c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c | |||
@@ -1316,7 +1316,7 @@ static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_st | |||
1316 | return 1; | 1316 | return 1; |
1317 | } | 1317 | } |
1318 | 1318 | ||
1319 | static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) | 1319 | int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) |
1320 | { | 1320 | { |
1321 | set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); | 1321 | set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); |
1322 | clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); | 1322 | clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); |
@@ -2075,8 +2075,10 @@ again: | |||
2075 | switch (status) { | 2075 | switch (status) { |
2076 | case 0: | 2076 | case 0: |
2077 | break; | 2077 | break; |
2078 | case -NFS4ERR_DELAY: | ||
2079 | case -ETIMEDOUT: | 2078 | case -ETIMEDOUT: |
2079 | if (clnt->cl_softrtry) | ||
2080 | break; | ||
2081 | case -NFS4ERR_DELAY: | ||
2080 | case -EAGAIN: | 2082 | case -EAGAIN: |
2081 | ssleep(1); | 2083 | ssleep(1); |
2082 | case -NFS4ERR_STALE_CLIENTID: | 2084 | case -NFS4ERR_STALE_CLIENTID: |
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 808f29574412..6f340f02f2ba 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c | |||
@@ -90,7 +90,7 @@ static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
90 | */ | 90 | */ |
91 | static void nfs4_evict_inode(struct inode *inode) | 91 | static void nfs4_evict_inode(struct inode *inode) |
92 | { | 92 | { |
93 | truncate_inode_pages(&inode->i_data, 0); | 93 | truncate_inode_pages_final(&inode->i_data); |
94 | clear_inode(inode); | 94 | clear_inode(inode); |
95 | pnfs_return_layout(inode); | 95 | pnfs_return_layout(inode); |
96 | pnfs_destroy_layout(NFS_I(inode)); | 96 | pnfs_destroy_layout(NFS_I(inode)); |
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 72f3bf1754ef..73ce8d4fe2c8 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c | |||
@@ -203,8 +203,7 @@ static int nfs4_stat_to_errno(int); | |||
203 | 2 + encode_verifier_maxsz + 5 + \ | 203 | 2 + encode_verifier_maxsz + 5 + \ |
204 | nfs4_label_maxsz) | 204 | nfs4_label_maxsz) |
205 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ | 205 | #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ |
206 | decode_verifier_maxsz + \ | 206 | decode_verifier_maxsz) |
207 | nfs4_label_maxsz + nfs4_fattr_maxsz) | ||
208 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) | 207 | #define encode_readlink_maxsz (op_encode_hdr_maxsz) |
209 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) | 208 | #define decode_readlink_maxsz (op_decode_hdr_maxsz + 1) |
210 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ | 209 | #define encode_write_maxsz (op_encode_hdr_maxsz + \ |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 4755858e37a0..cb53d450ae32 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -662,7 +662,18 @@ pnfs_destroy_all_layouts(struct nfs_client *clp) | |||
662 | */ | 662 | */ |
663 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) | 663 | static bool pnfs_seqid_is_newer(u32 s1, u32 s2) |
664 | { | 664 | { |
665 | return (s32)s1 - (s32)s2 > 0; | 665 | return (s32)(s1 - s2) > 0; |
666 | } | ||
667 | |||
668 | static void | ||
669 | pnfs_verify_layout_stateid(struct pnfs_layout_hdr *lo, | ||
670 | const nfs4_stateid *new, | ||
671 | struct list_head *free_me_list) | ||
672 | { | ||
673 | if (nfs4_stateid_match_other(&lo->plh_stateid, new)) | ||
674 | return; | ||
675 | /* Layout is new! Kill existing layout segments */ | ||
676 | pnfs_mark_matching_lsegs_invalid(lo, free_me_list, NULL); | ||
666 | } | 677 | } |
667 | 678 | ||
668 | /* update lo->plh_stateid with new if is more recent */ | 679 | /* update lo->plh_stateid with new if is more recent */ |
@@ -1315,6 +1326,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1315 | struct nfs4_layoutget_res *res = &lgp->res; | 1326 | struct nfs4_layoutget_res *res = &lgp->res; |
1316 | struct pnfs_layout_segment *lseg; | 1327 | struct pnfs_layout_segment *lseg; |
1317 | struct inode *ino = lo->plh_inode; | 1328 | struct inode *ino = lo->plh_inode; |
1329 | LIST_HEAD(free_me); | ||
1318 | int status = 0; | 1330 | int status = 0; |
1319 | 1331 | ||
1320 | /* Inject layout blob into I/O device driver */ | 1332 | /* Inject layout blob into I/O device driver */ |
@@ -1341,6 +1353,8 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1341 | goto out_forget_reply; | 1353 | goto out_forget_reply; |
1342 | } | 1354 | } |
1343 | 1355 | ||
1356 | /* Check that the new stateid matches the old stateid */ | ||
1357 | pnfs_verify_layout_stateid(lo, &res->stateid, &free_me); | ||
1344 | /* Done processing layoutget. Set the layout stateid */ | 1358 | /* Done processing layoutget. Set the layout stateid */ |
1345 | pnfs_set_layout_stateid(lo, &res->stateid, false); | 1359 | pnfs_set_layout_stateid(lo, &res->stateid, false); |
1346 | 1360 | ||
@@ -1355,6 +1369,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) | |||
1355 | } | 1369 | } |
1356 | 1370 | ||
1357 | spin_unlock(&ino->i_lock); | 1371 | spin_unlock(&ino->i_lock); |
1372 | pnfs_free_lseg_list(&free_me); | ||
1358 | return lseg; | 1373 | return lseg; |
1359 | out: | 1374 | out: |
1360 | return ERR_PTR(status); | 1375 | return ERR_PTR(status); |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index fddbba2d9eff..e55ce9e8b034 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
@@ -357,30 +357,6 @@ nfs_proc_rename_done(struct rpc_task *task, struct inode *old_dir, | |||
357 | } | 357 | } |
358 | 358 | ||
359 | static int | 359 | static int |
360 | nfs_proc_rename(struct inode *old_dir, struct qstr *old_name, | ||
361 | struct inode *new_dir, struct qstr *new_name) | ||
362 | { | ||
363 | struct nfs_renameargs arg = { | ||
364 | .old_dir = NFS_FH(old_dir), | ||
365 | .old_name = old_name, | ||
366 | .new_dir = NFS_FH(new_dir), | ||
367 | .new_name = new_name, | ||
368 | }; | ||
369 | struct rpc_message msg = { | ||
370 | .rpc_proc = &nfs_procedures[NFSPROC_RENAME], | ||
371 | .rpc_argp = &arg, | ||
372 | }; | ||
373 | int status; | ||
374 | |||
375 | dprintk("NFS call rename %s -> %s\n", old_name->name, new_name->name); | ||
376 | status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); | ||
377 | nfs_mark_for_revalidate(old_dir); | ||
378 | nfs_mark_for_revalidate(new_dir); | ||
379 | dprintk("NFS reply rename: %d\n", status); | ||
380 | return status; | ||
381 | } | ||
382 | |||
383 | static int | ||
384 | nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) | 360 | nfs_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) |
385 | { | 361 | { |
386 | struct nfs_linkargs arg = { | 362 | struct nfs_linkargs arg = { |
@@ -745,7 +721,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = { | |||
745 | .unlink_setup = nfs_proc_unlink_setup, | 721 | .unlink_setup = nfs_proc_unlink_setup, |
746 | .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, | 722 | .unlink_rpc_prepare = nfs_proc_unlink_rpc_prepare, |
747 | .unlink_done = nfs_proc_unlink_done, | 723 | .unlink_done = nfs_proc_unlink_done, |
748 | .rename = nfs_proc_rename, | ||
749 | .rename_setup = nfs_proc_rename_setup, | 724 | .rename_setup = nfs_proc_rename_setup, |
750 | .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, | 725 | .rename_rpc_prepare = nfs_proc_rename_rpc_prepare, |
751 | .rename_done = nfs_proc_rename_done, | 726 | .rename_done = nfs_proc_rename_done, |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 910ed906eb82..2cb56943e232 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
@@ -2215,6 +2215,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) | |||
2215 | struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; | 2215 | struct nfs4_mount_data *options4 = (struct nfs4_mount_data *)raw_data; |
2216 | u32 nfsvers = nfss->nfs_client->rpc_ops->version; | 2216 | u32 nfsvers = nfss->nfs_client->rpc_ops->version; |
2217 | 2217 | ||
2218 | sync_filesystem(sb); | ||
2219 | |||
2218 | /* | 2220 | /* |
2219 | * Userspace mount programs that send binary options generally send | 2221 | * Userspace mount programs that send binary options generally send |
2220 | * them populated with default values. We have no way to know which | 2222 | * them populated with default values. We have no way to know which |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 11d78944de79..de54129336c6 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
@@ -14,6 +14,7 @@ | |||
14 | #include <linux/sched.h> | 14 | #include <linux/sched.h> |
15 | #include <linux/wait.h> | 15 | #include <linux/wait.h> |
16 | #include <linux/namei.h> | 16 | #include <linux/namei.h> |
17 | #include <linux/fsnotify.h> | ||
17 | 18 | ||
18 | #include "internal.h" | 19 | #include "internal.h" |
19 | #include "nfs4_fs.h" | 20 | #include "nfs4_fs.h" |
@@ -353,8 +354,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) | |||
353 | return; | 354 | return; |
354 | } | 355 | } |
355 | 356 | ||
356 | if (task->tk_status != 0) | 357 | if (data->complete) |
357 | nfs_cancel_async_unlink(old_dentry); | 358 | data->complete(task, data); |
358 | } | 359 | } |
359 | 360 | ||
360 | /** | 361 | /** |
@@ -399,9 +400,10 @@ static const struct rpc_call_ops nfs_rename_ops = { | |||
399 | * | 400 | * |
400 | * It's expected that valid references to the dentries and inodes are held | 401 | * It's expected that valid references to the dentries and inodes are held |
401 | */ | 402 | */ |
402 | static struct rpc_task * | 403 | struct rpc_task * |
403 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | 404 | nfs_async_rename(struct inode *old_dir, struct inode *new_dir, |
404 | struct dentry *old_dentry, struct dentry *new_dentry) | 405 | struct dentry *old_dentry, struct dentry *new_dentry, |
406 | void (*complete)(struct rpc_task *, struct nfs_renamedata *)) | ||
405 | { | 407 | { |
406 | struct nfs_renamedata *data; | 408 | struct nfs_renamedata *data; |
407 | struct rpc_message msg = { }; | 409 | struct rpc_message msg = { }; |
@@ -438,6 +440,7 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
438 | data->new_dentry = dget(new_dentry); | 440 | data->new_dentry = dget(new_dentry); |
439 | nfs_fattr_init(&data->old_fattr); | 441 | nfs_fattr_init(&data->old_fattr); |
440 | nfs_fattr_init(&data->new_fattr); | 442 | nfs_fattr_init(&data->new_fattr); |
443 | data->complete = complete; | ||
441 | 444 | ||
442 | /* set up nfs_renameargs */ | 445 | /* set up nfs_renameargs */ |
443 | data->args.old_dir = NFS_FH(old_dir); | 446 | data->args.old_dir = NFS_FH(old_dir); |
@@ -456,6 +459,27 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, | |||
456 | return rpc_run_task(&task_setup_data); | 459 | return rpc_run_task(&task_setup_data); |
457 | } | 460 | } |
458 | 461 | ||
462 | /* | ||
463 | * Perform tasks needed when a sillyrename is done such as cancelling the | ||
464 | * queued async unlink if it failed. | ||
465 | */ | ||
466 | static void | ||
467 | nfs_complete_sillyrename(struct rpc_task *task, struct nfs_renamedata *data) | ||
468 | { | ||
469 | struct dentry *dentry = data->old_dentry; | ||
470 | |||
471 | if (task->tk_status != 0) { | ||
472 | nfs_cancel_async_unlink(dentry); | ||
473 | return; | ||
474 | } | ||
475 | |||
476 | /* | ||
477 | * vfs_unlink and the like do not issue this when a file is | ||
478 | * sillyrenamed, so do it here. | ||
479 | */ | ||
480 | fsnotify_nameremove(dentry, 0); | ||
481 | } | ||
482 | |||
459 | #define SILLYNAME_PREFIX ".nfs" | 483 | #define SILLYNAME_PREFIX ".nfs" |
460 | #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) | 484 | #define SILLYNAME_PREFIX_LEN ((unsigned)sizeof(SILLYNAME_PREFIX) - 1) |
461 | #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) | 485 | #define SILLYNAME_FILEID_LEN ((unsigned)sizeof(u64) << 1) |
@@ -548,7 +572,8 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) | |||
548 | } | 572 | } |
549 | 573 | ||
550 | /* run the rename task, undo unlink if it fails */ | 574 | /* run the rename task, undo unlink if it fails */ |
551 | task = nfs_async_rename(dir, dir, dentry, sdentry); | 575 | task = nfs_async_rename(dir, dir, dentry, sdentry, |
576 | nfs_complete_sillyrename); | ||
552 | if (IS_ERR(task)) { | 577 | if (IS_ERR(task)) { |
553 | error = -EBUSY; | 578 | error = -EBUSY; |
554 | nfs_cancel_async_unlink(dentry); | 579 | nfs_cancel_async_unlink(dentry); |
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 06cddd572264..2645be435e75 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c | |||
@@ -71,10 +71,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
71 | if (gid_eq(new->fsgid, INVALID_GID)) | 71 | if (gid_eq(new->fsgid, INVALID_GID)) |
72 | new->fsgid = exp->ex_anon_gid; | 72 | new->fsgid = exp->ex_anon_gid; |
73 | 73 | ||
74 | ret = set_groups(new, gi); | 74 | set_groups(new, gi); |
75 | put_group_info(gi); | 75 | put_group_info(gi); |
76 | if (ret < 0) | ||
77 | goto error; | ||
78 | 76 | ||
79 | if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) | 77 | if (!uid_eq(new->fsuid, GLOBAL_ROOT_UID)) |
80 | new->cap_effective = cap_drop_nfsd_set(new->cap_effective); | 78 | new->cap_effective = cap_drop_nfsd_set(new->cap_effective); |
@@ -89,7 +87,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) | |||
89 | 87 | ||
90 | oom: | 88 | oom: |
91 | ret = -ENOMEM; | 89 | ret = -ENOMEM; |
92 | error: | ||
93 | abort_creds(new); | 90 | abort_creds(new); |
94 | return ret; | 91 | return ret; |
95 | } | 92 | } |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 6d7be3f80356..915808b36df7 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
@@ -1694,7 +1694,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen, | |||
1694 | if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) | 1694 | if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry) |
1695 | goto out_dput_new; | 1695 | goto out_dput_new; |
1696 | 1696 | ||
1697 | host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL); | 1697 | host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0); |
1698 | if (!host_err) { | 1698 | if (!host_err) { |
1699 | host_err = commit_metadata(tfhp); | 1699 | host_err = commit_metadata(tfhp); |
1700 | if (!host_err) | 1700 | if (!host_err) |
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c index deaa3d33a0aa..0d58075f34e2 100644 --- a/fs/nilfs2/cpfile.c +++ b/fs/nilfs2/cpfile.c | |||
@@ -942,6 +942,18 @@ int nilfs_cpfile_read(struct super_block *sb, size_t cpsize, | |||
942 | struct inode *cpfile; | 942 | struct inode *cpfile; |
943 | int err; | 943 | int err; |
944 | 944 | ||
945 | if (cpsize > sb->s_blocksize) { | ||
946 | printk(KERN_ERR | ||
947 | "NILFS: too large checkpoint size: %zu bytes.\n", | ||
948 | cpsize); | ||
949 | return -EINVAL; | ||
950 | } else if (cpsize < NILFS_MIN_CHECKPOINT_SIZE) { | ||
951 | printk(KERN_ERR | ||
952 | "NILFS: too small checkpoint size: %zu bytes.\n", | ||
953 | cpsize); | ||
954 | return -EINVAL; | ||
955 | } | ||
956 | |||
945 | cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); | 957 | cpfile = nilfs_iget_locked(sb, NULL, NILFS_CPFILE_INO); |
946 | if (unlikely(!cpfile)) | 958 | if (unlikely(!cpfile)) |
947 | return -ENOMEM; | 959 | return -ENOMEM; |
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c index fa0f80308c2d..0d5fada91191 100644 --- a/fs/nilfs2/dat.c +++ b/fs/nilfs2/dat.c | |||
@@ -484,6 +484,18 @@ int nilfs_dat_read(struct super_block *sb, size_t entry_size, | |||
484 | struct nilfs_dat_info *di; | 484 | struct nilfs_dat_info *di; |
485 | int err; | 485 | int err; |
486 | 486 | ||
487 | if (entry_size > sb->s_blocksize) { | ||
488 | printk(KERN_ERR | ||
489 | "NILFS: too large DAT entry size: %zu bytes.\n", | ||
490 | entry_size); | ||
491 | return -EINVAL; | ||
492 | } else if (entry_size < NILFS_MIN_DAT_ENTRY_SIZE) { | ||
493 | printk(KERN_ERR | ||
494 | "NILFS: too small DAT entry size: %zu bytes.\n", | ||
495 | entry_size); | ||
496 | return -EINVAL; | ||
497 | } | ||
498 | |||
487 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); | 499 | dat = nilfs_iget_locked(sb, NULL, NILFS_DAT_INO); |
488 | if (unlikely(!dat)) | 500 | if (unlikely(!dat)) |
489 | return -ENOMEM; | 501 | return -ENOMEM; |
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index 08fdb77852ac..f3a82fbcae02 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c | |||
@@ -134,6 +134,7 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
134 | 134 | ||
135 | static const struct vm_operations_struct nilfs_file_vm_ops = { | 135 | static const struct vm_operations_struct nilfs_file_vm_ops = { |
136 | .fault = filemap_fault, | 136 | .fault = filemap_fault, |
137 | .map_pages = filemap_map_pages, | ||
137 | .page_mkwrite = nilfs_page_mkwrite, | 138 | .page_mkwrite = nilfs_page_mkwrite, |
138 | .remap_pages = generic_file_remap_pages, | 139 | .remap_pages = generic_file_remap_pages, |
139 | }; | 140 | }; |
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 7e350c562e0e..b9c5726120e3 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c | |||
@@ -783,16 +783,14 @@ void nilfs_evict_inode(struct inode *inode) | |||
783 | int ret; | 783 | int ret; |
784 | 784 | ||
785 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { | 785 | if (inode->i_nlink || !ii->i_root || unlikely(is_bad_inode(inode))) { |
786 | if (inode->i_data.nrpages) | 786 | truncate_inode_pages_final(&inode->i_data); |
787 | truncate_inode_pages(&inode->i_data, 0); | ||
788 | clear_inode(inode); | 787 | clear_inode(inode); |
789 | nilfs_clear_inode(inode); | 788 | nilfs_clear_inode(inode); |
790 | return; | 789 | return; |
791 | } | 790 | } |
792 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ | 791 | nilfs_transaction_begin(sb, &ti, 0); /* never fails */ |
793 | 792 | ||
794 | if (inode->i_data.nrpages) | 793 | truncate_inode_pages_final(&inode->i_data); |
795 | truncate_inode_pages(&inode->i_data, 0); | ||
796 | 794 | ||
797 | /* TODO: some of the following operations may fail. */ | 795 | /* TODO: some of the following operations may fail. */ |
798 | nilfs_truncate_bmap(ii, 0); | 796 | nilfs_truncate_bmap(ii, 0); |
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index 2b34021948e4..422fb54b7377 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c | |||
@@ -1072,6 +1072,48 @@ out: | |||
1072 | } | 1072 | } |
1073 | 1073 | ||
1074 | /** | 1074 | /** |
1075 | * nilfs_ioctl_trim_fs() - trim ioctl handle function | ||
1076 | * @inode: inode object | ||
1077 | * @argp: pointer on argument from userspace | ||
1078 | * | ||
1079 | * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It | ||
1080 | * checks the arguments from userspace and calls nilfs_sufile_trim_fs, which | ||
1081 | * performs the actual trim operation. | ||
1082 | * | ||
1083 | * Return Value: On success, 0 is returned or negative error code, otherwise. | ||
1084 | */ | ||
1085 | static int nilfs_ioctl_trim_fs(struct inode *inode, void __user *argp) | ||
1086 | { | ||
1087 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | ||
1088 | struct request_queue *q = bdev_get_queue(nilfs->ns_bdev); | ||
1089 | struct fstrim_range range; | ||
1090 | int ret; | ||
1091 | |||
1092 | if (!capable(CAP_SYS_ADMIN)) | ||
1093 | return -EPERM; | ||
1094 | |||
1095 | if (!blk_queue_discard(q)) | ||
1096 | return -EOPNOTSUPP; | ||
1097 | |||
1098 | if (copy_from_user(&range, argp, sizeof(range))) | ||
1099 | return -EFAULT; | ||
1100 | |||
1101 | range.minlen = max_t(u64, range.minlen, q->limits.discard_granularity); | ||
1102 | |||
1103 | down_read(&nilfs->ns_segctor_sem); | ||
1104 | ret = nilfs_sufile_trim_fs(nilfs->ns_sufile, &range); | ||
1105 | up_read(&nilfs->ns_segctor_sem); | ||
1106 | |||
1107 | if (ret < 0) | ||
1108 | return ret; | ||
1109 | |||
1110 | if (copy_to_user(argp, &range, sizeof(range))) | ||
1111 | return -EFAULT; | ||
1112 | |||
1113 | return 0; | ||
1114 | } | ||
1115 | |||
1116 | /** | ||
1075 | * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated | 1117 | * nilfs_ioctl_set_alloc_range - limit range of segments to be allocated |
1076 | * @inode: inode object | 1118 | * @inode: inode object |
1077 | * @argp: pointer on argument from userspace | 1119 | * @argp: pointer on argument from userspace |
@@ -1163,6 +1205,95 @@ static int nilfs_ioctl_get_info(struct inode *inode, struct file *filp, | |||
1163 | return ret; | 1205 | return ret; |
1164 | } | 1206 | } |
1165 | 1207 | ||
1208 | /** | ||
1209 | * nilfs_ioctl_set_suinfo - set segment usage info | ||
1210 | * @inode: inode object | ||
1211 | * @filp: file object | ||
1212 | * @cmd: ioctl's request code | ||
1213 | * @argp: pointer on argument from userspace | ||
1214 | * | ||
1215 | * Description: Expects an array of nilfs_suinfo_update structures | ||
1216 | * encapsulated in nilfs_argv and updates the segment usage info | ||
1217 | * according to the flags in nilfs_suinfo_update. | ||
1218 | * | ||
1219 | * Return Value: On success, 0 is returned. On error, one of the | ||
1220 | * following negative error codes is returned. | ||
1221 | * | ||
1222 | * %-EPERM - Not enough permissions | ||
1223 | * | ||
1224 | * %-EFAULT - Error copying input data | ||
1225 | * | ||
1226 | * %-EIO - I/O error. | ||
1227 | * | ||
1228 | * %-ENOMEM - Insufficient amount of memory available. | ||
1229 | * | ||
1230 | * %-EINVAL - Invalid values in input (segment number, flags or nblocks) | ||
1231 | */ | ||
1232 | static int nilfs_ioctl_set_suinfo(struct inode *inode, struct file *filp, | ||
1233 | unsigned int cmd, void __user *argp) | ||
1234 | { | ||
1235 | struct the_nilfs *nilfs = inode->i_sb->s_fs_info; | ||
1236 | struct nilfs_transaction_info ti; | ||
1237 | struct nilfs_argv argv; | ||
1238 | size_t len; | ||
1239 | void __user *base; | ||
1240 | void *kbuf; | ||
1241 | int ret; | ||
1242 | |||
1243 | if (!capable(CAP_SYS_ADMIN)) | ||
1244 | return -EPERM; | ||
1245 | |||
1246 | ret = mnt_want_write_file(filp); | ||
1247 | if (ret) | ||
1248 | return ret; | ||
1249 | |||
1250 | ret = -EFAULT; | ||
1251 | if (copy_from_user(&argv, argp, sizeof(argv))) | ||
1252 | goto out; | ||
1253 | |||
1254 | ret = -EINVAL; | ||
1255 | if (argv.v_size < sizeof(struct nilfs_suinfo_update)) | ||
1256 | goto out; | ||
1257 | |||
1258 | if (argv.v_nmembs > nilfs->ns_nsegments) | ||
1259 | goto out; | ||
1260 | |||
1261 | if (argv.v_nmembs >= UINT_MAX / argv.v_size) | ||
1262 | goto out; | ||
1263 | |||
1264 | len = argv.v_size * argv.v_nmembs; | ||
1265 | if (!len) { | ||
1266 | ret = 0; | ||
1267 | goto out; | ||
1268 | } | ||
1269 | |||
1270 | base = (void __user *)(unsigned long)argv.v_base; | ||
1271 | kbuf = vmalloc(len); | ||
1272 | if (!kbuf) { | ||
1273 | ret = -ENOMEM; | ||
1274 | goto out; | ||
1275 | } | ||
1276 | |||
1277 | if (copy_from_user(kbuf, base, len)) { | ||
1278 | ret = -EFAULT; | ||
1279 | goto out_free; | ||
1280 | } | ||
1281 | |||
1282 | nilfs_transaction_begin(inode->i_sb, &ti, 0); | ||
1283 | ret = nilfs_sufile_set_suinfo(nilfs->ns_sufile, kbuf, argv.v_size, | ||
1284 | argv.v_nmembs); | ||
1285 | if (unlikely(ret < 0)) | ||
1286 | nilfs_transaction_abort(inode->i_sb); | ||
1287 | else | ||
1288 | nilfs_transaction_commit(inode->i_sb); /* never fails */ | ||
1289 | |||
1290 | out_free: | ||
1291 | vfree(kbuf); | ||
1292 | out: | ||
1293 | mnt_drop_write_file(filp); | ||
1294 | return ret; | ||
1295 | } | ||
1296 | |||
1166 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | 1297 | long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) |
1167 | { | 1298 | { |
1168 | struct inode *inode = file_inode(filp); | 1299 | struct inode *inode = file_inode(filp); |
@@ -1189,6 +1320,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1189 | return nilfs_ioctl_get_info(inode, filp, cmd, argp, | 1320 | return nilfs_ioctl_get_info(inode, filp, cmd, argp, |
1190 | sizeof(struct nilfs_suinfo), | 1321 | sizeof(struct nilfs_suinfo), |
1191 | nilfs_ioctl_do_get_suinfo); | 1322 | nilfs_ioctl_do_get_suinfo); |
1323 | case NILFS_IOCTL_SET_SUINFO: | ||
1324 | return nilfs_ioctl_set_suinfo(inode, filp, cmd, argp); | ||
1192 | case NILFS_IOCTL_GET_SUSTAT: | 1325 | case NILFS_IOCTL_GET_SUSTAT: |
1193 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); | 1326 | return nilfs_ioctl_get_sustat(inode, filp, cmd, argp); |
1194 | case NILFS_IOCTL_GET_VINFO: | 1327 | case NILFS_IOCTL_GET_VINFO: |
@@ -1205,6 +1338,8 @@ long nilfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1205 | return nilfs_ioctl_resize(inode, filp, argp); | 1338 | return nilfs_ioctl_resize(inode, filp, argp); |
1206 | case NILFS_IOCTL_SET_ALLOC_RANGE: | 1339 | case NILFS_IOCTL_SET_ALLOC_RANGE: |
1207 | return nilfs_ioctl_set_alloc_range(inode, argp); | 1340 | return nilfs_ioctl_set_alloc_range(inode, argp); |
1341 | case FITRIM: | ||
1342 | return nilfs_ioctl_trim_fs(inode, argp); | ||
1208 | default: | 1343 | default: |
1209 | return -ENOTTY; | 1344 | return -ENOTTY; |
1210 | } | 1345 | } |
@@ -1228,6 +1363,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1228 | case NILFS_IOCTL_GET_CPINFO: | 1363 | case NILFS_IOCTL_GET_CPINFO: |
1229 | case NILFS_IOCTL_GET_CPSTAT: | 1364 | case NILFS_IOCTL_GET_CPSTAT: |
1230 | case NILFS_IOCTL_GET_SUINFO: | 1365 | case NILFS_IOCTL_GET_SUINFO: |
1366 | case NILFS_IOCTL_SET_SUINFO: | ||
1231 | case NILFS_IOCTL_GET_SUSTAT: | 1367 | case NILFS_IOCTL_GET_SUSTAT: |
1232 | case NILFS_IOCTL_GET_VINFO: | 1368 | case NILFS_IOCTL_GET_VINFO: |
1233 | case NILFS_IOCTL_GET_BDESCS: | 1369 | case NILFS_IOCTL_GET_BDESCS: |
@@ -1235,6 +1371,7 @@ long nilfs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
1235 | case NILFS_IOCTL_SYNC: | 1371 | case NILFS_IOCTL_SYNC: |
1236 | case NILFS_IOCTL_RESIZE: | 1372 | case NILFS_IOCTL_RESIZE: |
1237 | case NILFS_IOCTL_SET_ALLOC_RANGE: | 1373 | case NILFS_IOCTL_SET_ALLOC_RANGE: |
1374 | case FITRIM: | ||
1238 | break; | 1375 | break; |
1239 | default: | 1376 | default: |
1240 | return -ENOIOCTLCMD; | 1377 | return -ENOIOCTLCMD; |
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c index 3127e9f438a7..2a869c35c362 100644 --- a/fs/nilfs2/sufile.c +++ b/fs/nilfs2/sufile.c | |||
@@ -870,6 +870,289 @@ ssize_t nilfs_sufile_get_suinfo(struct inode *sufile, __u64 segnum, void *buf, | |||
870 | } | 870 | } |
871 | 871 | ||
872 | /** | 872 | /** |
873 | * nilfs_sufile_set_suinfo - sets segment usage info | ||
874 | * @sufile: inode of segment usage file | ||
875 | * @buf: array of suinfo_update | ||
876 | * @supsz: byte size of suinfo_update | ||
877 | * @nsup: size of suinfo_update array | ||
878 | * | ||
879 | * Description: Takes an array of nilfs_suinfo_update structs and updates | ||
880 | * segment usage accordingly. Only the fields indicated by the sup_flags | ||
881 | * are updated. | ||
882 | * | ||
883 | * Return Value: On success, 0 is returned. On error, one of the | ||
884 | * following negative error codes is returned. | ||
885 | * | ||
886 | * %-EIO - I/O error. | ||
887 | * | ||
888 | * %-ENOMEM - Insufficient amount of memory available. | ||
889 | * | ||
890 | * %-EINVAL - Invalid values in input (segment number, flags or nblocks) | ||
891 | */ | ||
892 | ssize_t nilfs_sufile_set_suinfo(struct inode *sufile, void *buf, | ||
893 | unsigned supsz, size_t nsup) | ||
894 | { | ||
895 | struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; | ||
896 | struct buffer_head *header_bh, *bh; | ||
897 | struct nilfs_suinfo_update *sup, *supend = buf + supsz * nsup; | ||
898 | struct nilfs_segment_usage *su; | ||
899 | void *kaddr; | ||
900 | unsigned long blkoff, prev_blkoff; | ||
901 | int cleansi, cleansu, dirtysi, dirtysu; | ||
902 | long ncleaned = 0, ndirtied = 0; | ||
903 | int ret = 0; | ||
904 | |||
905 | if (unlikely(nsup == 0)) | ||
906 | return ret; | ||
907 | |||
908 | for (sup = buf; sup < supend; sup = (void *)sup + supsz) { | ||
909 | if (sup->sup_segnum >= nilfs->ns_nsegments | ||
910 | || (sup->sup_flags & | ||
911 | (~0UL << __NR_NILFS_SUINFO_UPDATE_FIELDS)) | ||
912 | || (nilfs_suinfo_update_nblocks(sup) && | ||
913 | sup->sup_sui.sui_nblocks > | ||
914 | nilfs->ns_blocks_per_segment)) | ||
915 | return -EINVAL; | ||
916 | } | ||
917 | |||
918 | down_write(&NILFS_MDT(sufile)->mi_sem); | ||
919 | |||
920 | ret = nilfs_sufile_get_header_block(sufile, &header_bh); | ||
921 | if (ret < 0) | ||
922 | goto out_sem; | ||
923 | |||
924 | sup = buf; | ||
925 | blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); | ||
926 | ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); | ||
927 | if (ret < 0) | ||
928 | goto out_header; | ||
929 | |||
930 | for (;;) { | ||
931 | kaddr = kmap_atomic(bh->b_page); | ||
932 | su = nilfs_sufile_block_get_segment_usage( | ||
933 | sufile, sup->sup_segnum, bh, kaddr); | ||
934 | |||
935 | if (nilfs_suinfo_update_lastmod(sup)) | ||
936 | su->su_lastmod = cpu_to_le64(sup->sup_sui.sui_lastmod); | ||
937 | |||
938 | if (nilfs_suinfo_update_nblocks(sup)) | ||
939 | su->su_nblocks = cpu_to_le32(sup->sup_sui.sui_nblocks); | ||
940 | |||
941 | if (nilfs_suinfo_update_flags(sup)) { | ||
942 | /* | ||
943 | * Active flag is a virtual flag projected by running | ||
944 | * nilfs kernel code - drop it not to write it to | ||
945 | * disk. | ||
946 | */ | ||
947 | sup->sup_sui.sui_flags &= | ||
948 | ~(1UL << NILFS_SEGMENT_USAGE_ACTIVE); | ||
949 | |||
950 | cleansi = nilfs_suinfo_clean(&sup->sup_sui); | ||
951 | cleansu = nilfs_segment_usage_clean(su); | ||
952 | dirtysi = nilfs_suinfo_dirty(&sup->sup_sui); | ||
953 | dirtysu = nilfs_segment_usage_dirty(su); | ||
954 | |||
955 | if (cleansi && !cleansu) | ||
956 | ++ncleaned; | ||
957 | else if (!cleansi && cleansu) | ||
958 | --ncleaned; | ||
959 | |||
960 | if (dirtysi && !dirtysu) | ||
961 | ++ndirtied; | ||
962 | else if (!dirtysi && dirtysu) | ||
963 | --ndirtied; | ||
964 | |||
965 | su->su_flags = cpu_to_le32(sup->sup_sui.sui_flags); | ||
966 | } | ||
967 | |||
968 | kunmap_atomic(kaddr); | ||
969 | |||
970 | sup = (void *)sup + supsz; | ||
971 | if (sup >= supend) | ||
972 | break; | ||
973 | |||
974 | prev_blkoff = blkoff; | ||
975 | blkoff = nilfs_sufile_get_blkoff(sufile, sup->sup_segnum); | ||
976 | if (blkoff == prev_blkoff) | ||
977 | continue; | ||
978 | |||
979 | /* get different block */ | ||
980 | mark_buffer_dirty(bh); | ||
981 | put_bh(bh); | ||
982 | ret = nilfs_mdt_get_block(sufile, blkoff, 1, NULL, &bh); | ||
983 | if (unlikely(ret < 0)) | ||
984 | goto out_mark; | ||
985 | } | ||
986 | mark_buffer_dirty(bh); | ||
987 | put_bh(bh); | ||
988 | |||
989 | out_mark: | ||
990 | if (ncleaned || ndirtied) { | ||
991 | nilfs_sufile_mod_counter(header_bh, (u64)ncleaned, | ||
992 | (u64)ndirtied); | ||
993 | NILFS_SUI(sufile)->ncleansegs += ncleaned; | ||
994 | } | ||
995 | nilfs_mdt_mark_dirty(sufile); | ||
996 | out_header: | ||
997 | put_bh(header_bh); | ||
998 | out_sem: | ||
999 | up_write(&NILFS_MDT(sufile)->mi_sem); | ||
1000 | return ret; | ||
1001 | } | ||
1002 | |||
1003 | /** | ||
1004 | * nilfs_sufile_trim_fs() - trim ioctl handle function | ||
1005 | * @sufile: inode of segment usage file | ||
1006 | * @range: fstrim_range structure | ||
1007 | * | ||
1008 | * start: First Byte to trim | ||
1009 | * len: number of Bytes to trim from start | ||
1010 | * minlen: minimum extent length in Bytes | ||
1011 | * | ||
1012 | * Decription: nilfs_sufile_trim_fs goes through all segments containing bytes | ||
1013 | * from start to start+len. start is rounded up to the next block boundary | ||
1014 | * and start+len is rounded down. For each clean segment blkdev_issue_discard | ||
1015 | * function is invoked. | ||
1016 | * | ||
1017 | * Return Value: On success, 0 is returned or negative error code, otherwise. | ||
1018 | */ | ||
1019 | int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range) | ||
1020 | { | ||
1021 | struct the_nilfs *nilfs = sufile->i_sb->s_fs_info; | ||
1022 | struct buffer_head *su_bh; | ||
1023 | struct nilfs_segment_usage *su; | ||
1024 | void *kaddr; | ||
1025 | size_t n, i, susz = NILFS_MDT(sufile)->mi_entry_size; | ||
1026 | sector_t seg_start, seg_end, start_block, end_block; | ||
1027 | sector_t start = 0, nblocks = 0; | ||
1028 | u64 segnum, segnum_end, minlen, len, max_blocks, ndiscarded = 0; | ||
1029 | int ret = 0; | ||
1030 | unsigned int sects_per_block; | ||
1031 | |||
1032 | sects_per_block = (1 << nilfs->ns_blocksize_bits) / | ||
1033 | bdev_logical_block_size(nilfs->ns_bdev); | ||
1034 | len = range->len >> nilfs->ns_blocksize_bits; | ||
1035 | minlen = range->minlen >> nilfs->ns_blocksize_bits; | ||
1036 | max_blocks = ((u64)nilfs->ns_nsegments * nilfs->ns_blocks_per_segment); | ||
1037 | |||
1038 | if (!len || range->start >= max_blocks << nilfs->ns_blocksize_bits) | ||
1039 | return -EINVAL; | ||
1040 | |||
1041 | start_block = (range->start + nilfs->ns_blocksize - 1) >> | ||
1042 | nilfs->ns_blocksize_bits; | ||
1043 | |||
1044 | /* | ||
1045 | * range->len can be very large (actually, it is set to | ||
1046 | * ULLONG_MAX by default) - truncate upper end of the range | ||
1047 | * carefully so as not to overflow. | ||
1048 | */ | ||
1049 | if (max_blocks - start_block < len) | ||
1050 | end_block = max_blocks - 1; | ||
1051 | else | ||
1052 | end_block = start_block + len - 1; | ||
1053 | |||
1054 | segnum = nilfs_get_segnum_of_block(nilfs, start_block); | ||
1055 | segnum_end = nilfs_get_segnum_of_block(nilfs, end_block); | ||
1056 | |||
1057 | down_read(&NILFS_MDT(sufile)->mi_sem); | ||
1058 | |||
1059 | while (segnum <= segnum_end) { | ||
1060 | n = nilfs_sufile_segment_usages_in_block(sufile, segnum, | ||
1061 | segnum_end); | ||
1062 | |||
1063 | ret = nilfs_sufile_get_segment_usage_block(sufile, segnum, 0, | ||
1064 | &su_bh); | ||
1065 | if (ret < 0) { | ||
1066 | if (ret != -ENOENT) | ||
1067 | goto out_sem; | ||
1068 | /* hole */ | ||
1069 | segnum += n; | ||
1070 | continue; | ||
1071 | } | ||
1072 | |||
1073 | kaddr = kmap_atomic(su_bh->b_page); | ||
1074 | su = nilfs_sufile_block_get_segment_usage(sufile, segnum, | ||
1075 | su_bh, kaddr); | ||
1076 | for (i = 0; i < n; ++i, ++segnum, su = (void *)su + susz) { | ||
1077 | if (!nilfs_segment_usage_clean(su)) | ||
1078 | continue; | ||
1079 | |||
1080 | nilfs_get_segment_range(nilfs, segnum, &seg_start, | ||
1081 | &seg_end); | ||
1082 | |||
1083 | if (!nblocks) { | ||
1084 | /* start new extent */ | ||
1085 | start = seg_start; | ||
1086 | nblocks = seg_end - seg_start + 1; | ||
1087 | continue; | ||
1088 | } | ||
1089 | |||
1090 | if (start + nblocks == seg_start) { | ||
1091 | /* add to previous extent */ | ||
1092 | nblocks += seg_end - seg_start + 1; | ||
1093 | continue; | ||
1094 | } | ||
1095 | |||
1096 | /* discard previous extent */ | ||
1097 | if (start < start_block) { | ||
1098 | nblocks -= start_block - start; | ||
1099 | start = start_block; | ||
1100 | } | ||
1101 | |||
1102 | if (nblocks >= minlen) { | ||
1103 | kunmap_atomic(kaddr); | ||
1104 | |||
1105 | ret = blkdev_issue_discard(nilfs->ns_bdev, | ||
1106 | start * sects_per_block, | ||
1107 | nblocks * sects_per_block, | ||
1108 | GFP_NOFS, 0); | ||
1109 | if (ret < 0) { | ||
1110 | put_bh(su_bh); | ||
1111 | goto out_sem; | ||
1112 | } | ||
1113 | |||
1114 | ndiscarded += nblocks; | ||
1115 | kaddr = kmap_atomic(su_bh->b_page); | ||
1116 | su = nilfs_sufile_block_get_segment_usage( | ||
1117 | sufile, segnum, su_bh, kaddr); | ||
1118 | } | ||
1119 | |||
1120 | /* start new extent */ | ||
1121 | start = seg_start; | ||
1122 | nblocks = seg_end - seg_start + 1; | ||
1123 | } | ||
1124 | kunmap_atomic(kaddr); | ||
1125 | put_bh(su_bh); | ||
1126 | } | ||
1127 | |||
1128 | |||
1129 | if (nblocks) { | ||
1130 | /* discard last extent */ | ||
1131 | if (start < start_block) { | ||
1132 | nblocks -= start_block - start; | ||
1133 | start = start_block; | ||
1134 | } | ||
1135 | if (start + nblocks > end_block + 1) | ||
1136 | nblocks = end_block - start + 1; | ||
1137 | |||
1138 | if (nblocks >= minlen) { | ||
1139 | ret = blkdev_issue_discard(nilfs->ns_bdev, | ||
1140 | start * sects_per_block, | ||
1141 | nblocks * sects_per_block, | ||
1142 | GFP_NOFS, 0); | ||
1143 | if (!ret) | ||
1144 | ndiscarded += nblocks; | ||
1145 | } | ||
1146 | } | ||
1147 | |||
1148 | out_sem: | ||
1149 | up_read(&NILFS_MDT(sufile)->mi_sem); | ||
1150 | |||
1151 | range->len = ndiscarded << nilfs->ns_blocksize_bits; | ||
1152 | return ret; | ||
1153 | } | ||
1154 | |||
1155 | /** | ||
873 | * nilfs_sufile_read - read or get sufile inode | 1156 | * nilfs_sufile_read - read or get sufile inode |
874 | * @sb: super block instance | 1157 | * @sb: super block instance |
875 | * @susize: size of a segment usage entry | 1158 | * @susize: size of a segment usage entry |
@@ -886,6 +1169,18 @@ int nilfs_sufile_read(struct super_block *sb, size_t susize, | |||
886 | void *kaddr; | 1169 | void *kaddr; |
887 | int err; | 1170 | int err; |
888 | 1171 | ||
1172 | if (susize > sb->s_blocksize) { | ||
1173 | printk(KERN_ERR | ||
1174 | "NILFS: too large segment usage size: %zu bytes.\n", | ||
1175 | susize); | ||
1176 | return -EINVAL; | ||
1177 | } else if (susize < NILFS_MIN_SEGMENT_USAGE_SIZE) { | ||
1178 | printk(KERN_ERR | ||
1179 | "NILFS: too small segment usage size: %zu bytes.\n", | ||
1180 | susize); | ||
1181 | return -EINVAL; | ||
1182 | } | ||
1183 | |||
889 | sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); | 1184 | sufile = nilfs_iget_locked(sb, NULL, NILFS_SUFILE_INO); |
890 | if (unlikely(!sufile)) | 1185 | if (unlikely(!sufile)) |
891 | return -ENOMEM; | 1186 | return -ENOMEM; |
diff --git a/fs/nilfs2/sufile.h b/fs/nilfs2/sufile.h index e84bc5b51fc1..b8afd72f2379 100644 --- a/fs/nilfs2/sufile.h +++ b/fs/nilfs2/sufile.h | |||
@@ -44,6 +44,7 @@ int nilfs_sufile_set_segment_usage(struct inode *sufile, __u64 segnum, | |||
44 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); | 44 | int nilfs_sufile_get_stat(struct inode *, struct nilfs_sustat *); |
45 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, | 45 | ssize_t nilfs_sufile_get_suinfo(struct inode *, __u64, void *, unsigned, |
46 | size_t); | 46 | size_t); |
47 | ssize_t nilfs_sufile_set_suinfo(struct inode *, void *, unsigned , size_t); | ||
47 | 48 | ||
48 | int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, | 49 | int nilfs_sufile_updatev(struct inode *, __u64 *, size_t, int, size_t *, |
49 | void (*dofunc)(struct inode *, __u64, | 50 | void (*dofunc)(struct inode *, __u64, |
@@ -65,6 +66,7 @@ void nilfs_sufile_do_set_error(struct inode *, __u64, struct buffer_head *, | |||
65 | int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); | 66 | int nilfs_sufile_resize(struct inode *sufile, __u64 newnsegs); |
66 | int nilfs_sufile_read(struct super_block *sb, size_t susize, | 67 | int nilfs_sufile_read(struct super_block *sb, size_t susize, |
67 | struct nilfs_inode *raw_inode, struct inode **inodep); | 68 | struct nilfs_inode *raw_inode, struct inode **inodep); |
69 | int nilfs_sufile_trim_fs(struct inode *sufile, struct fstrim_range *range); | ||
68 | 70 | ||
69 | /** | 71 | /** |
70 | * nilfs_sufile_scrap - make a segment garbage | 72 | * nilfs_sufile_scrap - make a segment garbage |
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 7ac2a122ca1d..8c532b2ca3ab 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c | |||
@@ -1129,6 +1129,7 @@ static int nilfs_remount(struct super_block *sb, int *flags, char *data) | |||
1129 | unsigned long old_mount_opt; | 1129 | unsigned long old_mount_opt; |
1130 | int err; | 1130 | int err; |
1131 | 1131 | ||
1132 | sync_filesystem(sb); | ||
1132 | old_sb_flags = sb->s_flags; | 1133 | old_sb_flags = sb->s_flags; |
1133 | old_mount_opt = nilfs->ns_mount_opt; | 1134 | old_mount_opt = nilfs->ns_mount_opt; |
1134 | 1135 | ||
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c index 94c451ce6d24..8ba8229ba076 100644 --- a/fs/nilfs2/the_nilfs.c +++ b/fs/nilfs2/the_nilfs.c | |||
@@ -399,6 +399,16 @@ static int nilfs_store_disk_layout(struct the_nilfs *nilfs, | |||
399 | return -EINVAL; | 399 | return -EINVAL; |
400 | 400 | ||
401 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); | 401 | nilfs->ns_inode_size = le16_to_cpu(sbp->s_inode_size); |
402 | if (nilfs->ns_inode_size > nilfs->ns_blocksize) { | ||
403 | printk(KERN_ERR "NILFS: too large inode size: %d bytes.\n", | ||
404 | nilfs->ns_inode_size); | ||
405 | return -EINVAL; | ||
406 | } else if (nilfs->ns_inode_size < NILFS_MIN_INODE_SIZE) { | ||
407 | printk(KERN_ERR "NILFS: too small inode size: %d bytes.\n", | ||
408 | nilfs->ns_inode_size); | ||
409 | return -EINVAL; | ||
410 | } | ||
411 | |||
402 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); | 412 | nilfs->ns_first_ino = le32_to_cpu(sbp->s_first_ino); |
403 | 413 | ||
404 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); | 414 | nilfs->ns_blocks_per_segment = le32_to_cpu(sbp->s_blocks_per_segment); |
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index dc638f786d5c..ee9cb3795c2b 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c | |||
@@ -60,8 +60,8 @@ static int fanotify_merge(struct list_head *list, struct fsnotify_event *event) | |||
60 | } | 60 | } |
61 | 61 | ||
62 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 62 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
63 | static int fanotify_get_response_from_access(struct fsnotify_group *group, | 63 | static int fanotify_get_response(struct fsnotify_group *group, |
64 | struct fanotify_event_info *event) | 64 | struct fanotify_perm_event_info *event) |
65 | { | 65 | { |
66 | int ret; | 66 | int ret; |
67 | 67 | ||
@@ -142,6 +142,40 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, | |||
142 | return false; | 142 | return false; |
143 | } | 143 | } |
144 | 144 | ||
145 | struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, | ||
146 | struct path *path) | ||
147 | { | ||
148 | struct fanotify_event_info *event; | ||
149 | |||
150 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
151 | if (mask & FAN_ALL_PERM_EVENTS) { | ||
152 | struct fanotify_perm_event_info *pevent; | ||
153 | |||
154 | pevent = kmem_cache_alloc(fanotify_perm_event_cachep, | ||
155 | GFP_KERNEL); | ||
156 | if (!pevent) | ||
157 | return NULL; | ||
158 | event = &pevent->fae; | ||
159 | pevent->response = 0; | ||
160 | goto init; | ||
161 | } | ||
162 | #endif | ||
163 | event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | ||
164 | if (!event) | ||
165 | return NULL; | ||
166 | init: __maybe_unused | ||
167 | fsnotify_init_event(&event->fse, inode, mask); | ||
168 | event->tgid = get_pid(task_tgid(current)); | ||
169 | if (path) { | ||
170 | event->path = *path; | ||
171 | path_get(&event->path); | ||
172 | } else { | ||
173 | event->path.mnt = NULL; | ||
174 | event->path.dentry = NULL; | ||
175 | } | ||
176 | return event; | ||
177 | } | ||
178 | |||
145 | static int fanotify_handle_event(struct fsnotify_group *group, | 179 | static int fanotify_handle_event(struct fsnotify_group *group, |
146 | struct inode *inode, | 180 | struct inode *inode, |
147 | struct fsnotify_mark *inode_mark, | 181 | struct fsnotify_mark *inode_mark, |
@@ -171,25 +205,11 @@ static int fanotify_handle_event(struct fsnotify_group *group, | |||
171 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, | 205 | pr_debug("%s: group=%p inode=%p mask=%x\n", __func__, group, inode, |
172 | mask); | 206 | mask); |
173 | 207 | ||
174 | event = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | 208 | event = fanotify_alloc_event(inode, mask, data); |
175 | if (unlikely(!event)) | 209 | if (unlikely(!event)) |
176 | return -ENOMEM; | 210 | return -ENOMEM; |
177 | 211 | ||
178 | fsn_event = &event->fse; | 212 | fsn_event = &event->fse; |
179 | fsnotify_init_event(fsn_event, inode, mask); | ||
180 | event->tgid = get_pid(task_tgid(current)); | ||
181 | if (data_type == FSNOTIFY_EVENT_PATH) { | ||
182 | struct path *path = data; | ||
183 | event->path = *path; | ||
184 | path_get(&event->path); | ||
185 | } else { | ||
186 | event->path.mnt = NULL; | ||
187 | event->path.dentry = NULL; | ||
188 | } | ||
189 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
190 | event->response = 0; | ||
191 | #endif | ||
192 | |||
193 | ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); | 213 | ret = fsnotify_add_notify_event(group, fsn_event, fanotify_merge); |
194 | if (ret) { | 214 | if (ret) { |
195 | /* Permission events shouldn't be merged */ | 215 | /* Permission events shouldn't be merged */ |
@@ -202,7 +222,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, | |||
202 | 222 | ||
203 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 223 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
204 | if (mask & FAN_ALL_PERM_EVENTS) { | 224 | if (mask & FAN_ALL_PERM_EVENTS) { |
205 | ret = fanotify_get_response_from_access(group, event); | 225 | ret = fanotify_get_response(group, FANOTIFY_PE(fsn_event)); |
206 | fsnotify_destroy_event(group, fsn_event); | 226 | fsnotify_destroy_event(group, fsn_event); |
207 | } | 227 | } |
208 | #endif | 228 | #endif |
@@ -225,6 +245,13 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event) | |||
225 | event = FANOTIFY_E(fsn_event); | 245 | event = FANOTIFY_E(fsn_event); |
226 | path_put(&event->path); | 246 | path_put(&event->path); |
227 | put_pid(event->tgid); | 247 | put_pid(event->tgid); |
248 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
249 | if (fsn_event->mask & FAN_ALL_PERM_EVENTS) { | ||
250 | kmem_cache_free(fanotify_perm_event_cachep, | ||
251 | FANOTIFY_PE(fsn_event)); | ||
252 | return; | ||
253 | } | ||
254 | #endif | ||
228 | kmem_cache_free(fanotify_event_cachep, event); | 255 | kmem_cache_free(fanotify_event_cachep, event); |
229 | } | 256 | } |
230 | 257 | ||
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index 32a2f034fb94..2a5fb14115df 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h | |||
@@ -3,13 +3,12 @@ | |||
3 | #include <linux/slab.h> | 3 | #include <linux/slab.h> |
4 | 4 | ||
5 | extern struct kmem_cache *fanotify_event_cachep; | 5 | extern struct kmem_cache *fanotify_event_cachep; |
6 | extern struct kmem_cache *fanotify_perm_event_cachep; | ||
6 | 7 | ||
7 | /* | 8 | /* |
8 | * Lifetime of the structure differs for normal and permission events. In both | 9 | * Structure for normal fanotify events. It gets allocated in |
9 | * cases the structure is allocated in fanotify_handle_event(). For normal | 10 | * fanotify_handle_event() and freed when the information is retrieved by |
10 | * events the structure is freed immediately after reporting it to userspace. | 11 | * userspace |
11 | * For permission events we free it only after we receive response from | ||
12 | * userspace. | ||
13 | */ | 12 | */ |
14 | struct fanotify_event_info { | 13 | struct fanotify_event_info { |
15 | struct fsnotify_event fse; | 14 | struct fsnotify_event fse; |
@@ -19,12 +18,33 @@ struct fanotify_event_info { | |||
19 | */ | 18 | */ |
20 | struct path path; | 19 | struct path path; |
21 | struct pid *tgid; | 20 | struct pid *tgid; |
21 | }; | ||
22 | |||
22 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 23 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
23 | u32 response; /* userspace answer to question */ | 24 | /* |
24 | #endif | 25 | * Structure for permission fanotify events. It gets allocated and freed in |
26 | * fanotify_handle_event() since we wait there for user response. When the | ||
27 | * information is retrieved by userspace the structure is moved from | ||
28 | * group->notification_list to group->fanotify_data.access_list to wait for | ||
29 | * user response. | ||
30 | */ | ||
31 | struct fanotify_perm_event_info { | ||
32 | struct fanotify_event_info fae; | ||
33 | int response; /* userspace answer to question */ | ||
34 | int fd; /* fd we passed to userspace for this event */ | ||
25 | }; | 35 | }; |
26 | 36 | ||
37 | static inline struct fanotify_perm_event_info * | ||
38 | FANOTIFY_PE(struct fsnotify_event *fse) | ||
39 | { | ||
40 | return container_of(fse, struct fanotify_perm_event_info, fae.fse); | ||
41 | } | ||
42 | #endif | ||
43 | |||
27 | static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) | 44 | static inline struct fanotify_event_info *FANOTIFY_E(struct fsnotify_event *fse) |
28 | { | 45 | { |
29 | return container_of(fse, struct fanotify_event_info, fse); | 46 | return container_of(fse, struct fanotify_event_info, fse); |
30 | } | 47 | } |
48 | |||
49 | struct fanotify_event_info *fanotify_alloc_event(struct inode *inode, u32 mask, | ||
50 | struct path *path); | ||
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 287a22c04149..4e565c814309 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c | |||
@@ -28,14 +28,8 @@ | |||
28 | extern const struct fsnotify_ops fanotify_fsnotify_ops; | 28 | extern const struct fsnotify_ops fanotify_fsnotify_ops; |
29 | 29 | ||
30 | static struct kmem_cache *fanotify_mark_cache __read_mostly; | 30 | static struct kmem_cache *fanotify_mark_cache __read_mostly; |
31 | static struct kmem_cache *fanotify_response_event_cache __read_mostly; | ||
32 | struct kmem_cache *fanotify_event_cachep __read_mostly; | 31 | struct kmem_cache *fanotify_event_cachep __read_mostly; |
33 | 32 | struct kmem_cache *fanotify_perm_event_cachep __read_mostly; | |
34 | struct fanotify_response_event { | ||
35 | struct list_head list; | ||
36 | __s32 fd; | ||
37 | struct fanotify_event_info *event; | ||
38 | }; | ||
39 | 33 | ||
40 | /* | 34 | /* |
41 | * Get an fsnotify notification event if one exists and is small | 35 | * Get an fsnotify notification event if one exists and is small |
@@ -135,33 +129,34 @@ static int fill_event_metadata(struct fsnotify_group *group, | |||
135 | } | 129 | } |
136 | 130 | ||
137 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 131 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
138 | static struct fanotify_response_event *dequeue_re(struct fsnotify_group *group, | 132 | static struct fanotify_perm_event_info *dequeue_event( |
139 | __s32 fd) | 133 | struct fsnotify_group *group, int fd) |
140 | { | 134 | { |
141 | struct fanotify_response_event *re, *return_re = NULL; | 135 | struct fanotify_perm_event_info *event, *return_e = NULL; |
142 | 136 | ||
143 | mutex_lock(&group->fanotify_data.access_mutex); | 137 | spin_lock(&group->fanotify_data.access_lock); |
144 | list_for_each_entry(re, &group->fanotify_data.access_list, list) { | 138 | list_for_each_entry(event, &group->fanotify_data.access_list, |
145 | if (re->fd != fd) | 139 | fae.fse.list) { |
140 | if (event->fd != fd) | ||
146 | continue; | 141 | continue; |
147 | 142 | ||
148 | list_del_init(&re->list); | 143 | list_del_init(&event->fae.fse.list); |
149 | return_re = re; | 144 | return_e = event; |
150 | break; | 145 | break; |
151 | } | 146 | } |
152 | mutex_unlock(&group->fanotify_data.access_mutex); | 147 | spin_unlock(&group->fanotify_data.access_lock); |
153 | 148 | ||
154 | pr_debug("%s: found return_re=%p\n", __func__, return_re); | 149 | pr_debug("%s: found return_re=%p\n", __func__, return_e); |
155 | 150 | ||
156 | return return_re; | 151 | return return_e; |
157 | } | 152 | } |
158 | 153 | ||
159 | static int process_access_response(struct fsnotify_group *group, | 154 | static int process_access_response(struct fsnotify_group *group, |
160 | struct fanotify_response *response_struct) | 155 | struct fanotify_response *response_struct) |
161 | { | 156 | { |
162 | struct fanotify_response_event *re; | 157 | struct fanotify_perm_event_info *event; |
163 | __s32 fd = response_struct->fd; | 158 | int fd = response_struct->fd; |
164 | __u32 response = response_struct->response; | 159 | int response = response_struct->response; |
165 | 160 | ||
166 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, | 161 | pr_debug("%s: group=%p fd=%d response=%d\n", __func__, group, |
167 | fd, response); | 162 | fd, response); |
@@ -181,58 +176,15 @@ static int process_access_response(struct fsnotify_group *group, | |||
181 | if (fd < 0) | 176 | if (fd < 0) |
182 | return -EINVAL; | 177 | return -EINVAL; |
183 | 178 | ||
184 | re = dequeue_re(group, fd); | 179 | event = dequeue_event(group, fd); |
185 | if (!re) | 180 | if (!event) |
186 | return -ENOENT; | 181 | return -ENOENT; |
187 | 182 | ||
188 | re->event->response = response; | 183 | event->response = response; |
189 | |||
190 | wake_up(&group->fanotify_data.access_waitq); | 184 | wake_up(&group->fanotify_data.access_waitq); |
191 | 185 | ||
192 | kmem_cache_free(fanotify_response_event_cache, re); | ||
193 | |||
194 | return 0; | ||
195 | } | ||
196 | |||
197 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
198 | struct fsnotify_event *event, | ||
199 | __s32 fd) | ||
200 | { | ||
201 | struct fanotify_response_event *re; | ||
202 | |||
203 | if (!(event->mask & FAN_ALL_PERM_EVENTS)) | ||
204 | return 0; | ||
205 | |||
206 | re = kmem_cache_alloc(fanotify_response_event_cache, GFP_KERNEL); | ||
207 | if (!re) | ||
208 | return -ENOMEM; | ||
209 | |||
210 | re->event = FANOTIFY_E(event); | ||
211 | re->fd = fd; | ||
212 | |||
213 | mutex_lock(&group->fanotify_data.access_mutex); | ||
214 | |||
215 | if (atomic_read(&group->fanotify_data.bypass_perm)) { | ||
216 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
217 | kmem_cache_free(fanotify_response_event_cache, re); | ||
218 | FANOTIFY_E(event)->response = FAN_ALLOW; | ||
219 | return 0; | ||
220 | } | ||
221 | |||
222 | list_add_tail(&re->list, &group->fanotify_data.access_list); | ||
223 | mutex_unlock(&group->fanotify_data.access_mutex); | ||
224 | |||
225 | return 0; | ||
226 | } | ||
227 | |||
228 | #else | ||
229 | static int prepare_for_access_response(struct fsnotify_group *group, | ||
230 | struct fsnotify_event *event, | ||
231 | __s32 fd) | ||
232 | { | ||
233 | return 0; | 186 | return 0; |
234 | } | 187 | } |
235 | |||
236 | #endif | 188 | #endif |
237 | 189 | ||
238 | static ssize_t copy_event_to_user(struct fsnotify_group *group, | 190 | static ssize_t copy_event_to_user(struct fsnotify_group *group, |
@@ -247,7 +199,7 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
247 | 199 | ||
248 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); | 200 | ret = fill_event_metadata(group, &fanotify_event_metadata, event, &f); |
249 | if (ret < 0) | 201 | if (ret < 0) |
250 | goto out; | 202 | return ret; |
251 | 203 | ||
252 | fd = fanotify_event_metadata.fd; | 204 | fd = fanotify_event_metadata.fd; |
253 | ret = -EFAULT; | 205 | ret = -EFAULT; |
@@ -255,9 +207,10 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, | |||
255 | fanotify_event_metadata.event_len)) | 207 | fanotify_event_metadata.event_len)) |
256 | goto out_close_fd; | 208 | goto out_close_fd; |
257 | 209 | ||
258 | ret = prepare_for_access_response(group, event, fd); | 210 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
259 | if (ret) | 211 | if (event->mask & FAN_ALL_PERM_EVENTS) |
260 | goto out_close_fd; | 212 | FANOTIFY_PE(event)->fd = fd; |
213 | #endif | ||
261 | 214 | ||
262 | if (fd != FAN_NOFD) | 215 | if (fd != FAN_NOFD) |
263 | fd_install(fd, f); | 216 | fd_install(fd, f); |
@@ -268,13 +221,6 @@ out_close_fd: | |||
268 | put_unused_fd(fd); | 221 | put_unused_fd(fd); |
269 | fput(f); | 222 | fput(f); |
270 | } | 223 | } |
271 | out: | ||
272 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
273 | if (event->mask & FAN_ALL_PERM_EVENTS) { | ||
274 | FANOTIFY_E(event)->response = FAN_DENY; | ||
275 | wake_up(&group->fanotify_data.access_waitq); | ||
276 | } | ||
277 | #endif | ||
278 | return ret; | 224 | return ret; |
279 | } | 225 | } |
280 | 226 | ||
@@ -314,35 +260,50 @@ static ssize_t fanotify_read(struct file *file, char __user *buf, | |||
314 | kevent = get_one_event(group, count); | 260 | kevent = get_one_event(group, count); |
315 | mutex_unlock(&group->notification_mutex); | 261 | mutex_unlock(&group->notification_mutex); |
316 | 262 | ||
317 | if (kevent) { | 263 | if (IS_ERR(kevent)) { |
318 | ret = PTR_ERR(kevent); | 264 | ret = PTR_ERR(kevent); |
319 | if (IS_ERR(kevent)) | 265 | break; |
266 | } | ||
267 | |||
268 | if (!kevent) { | ||
269 | ret = -EAGAIN; | ||
270 | if (file->f_flags & O_NONBLOCK) | ||
320 | break; | 271 | break; |
321 | ret = copy_event_to_user(group, kevent, buf); | 272 | |
322 | /* | 273 | ret = -ERESTARTSYS; |
323 | * Permission events get destroyed after we | 274 | if (signal_pending(current)) |
324 | * receive response | 275 | break; |
325 | */ | 276 | |
326 | if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) | 277 | if (start != buf) |
327 | fsnotify_destroy_event(group, kevent); | ||
328 | if (ret < 0) | ||
329 | break; | 278 | break; |
330 | buf += ret; | 279 | schedule(); |
331 | count -= ret; | ||
332 | continue; | 280 | continue; |
333 | } | 281 | } |
334 | 282 | ||
335 | ret = -EAGAIN; | 283 | ret = copy_event_to_user(group, kevent, buf); |
336 | if (file->f_flags & O_NONBLOCK) | 284 | /* |
337 | break; | 285 | * Permission events get queued to wait for response. Other |
338 | ret = -ERESTARTSYS; | 286 | * events can be destroyed now. |
339 | if (signal_pending(current)) | 287 | */ |
340 | break; | 288 | if (!(kevent->mask & FAN_ALL_PERM_EVENTS)) { |
341 | 289 | fsnotify_destroy_event(group, kevent); | |
342 | if (start != buf) | 290 | if (ret < 0) |
343 | break; | 291 | break; |
344 | 292 | } else { | |
345 | schedule(); | 293 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
294 | if (ret < 0) { | ||
295 | FANOTIFY_PE(kevent)->response = FAN_DENY; | ||
296 | wake_up(&group->fanotify_data.access_waitq); | ||
297 | break; | ||
298 | } | ||
299 | spin_lock(&group->fanotify_data.access_lock); | ||
300 | list_add_tail(&kevent->list, | ||
301 | &group->fanotify_data.access_list); | ||
302 | spin_unlock(&group->fanotify_data.access_lock); | ||
303 | #endif | ||
304 | } | ||
305 | buf += ret; | ||
306 | count -= ret; | ||
346 | } | 307 | } |
347 | 308 | ||
348 | finish_wait(&group->notification_waitq, &wait); | 309 | finish_wait(&group->notification_waitq, &wait); |
@@ -383,22 +344,21 @@ static int fanotify_release(struct inode *ignored, struct file *file) | |||
383 | struct fsnotify_group *group = file->private_data; | 344 | struct fsnotify_group *group = file->private_data; |
384 | 345 | ||
385 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 346 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
386 | struct fanotify_response_event *re, *lre; | 347 | struct fanotify_perm_event_info *event, *next; |
387 | 348 | ||
388 | mutex_lock(&group->fanotify_data.access_mutex); | 349 | spin_lock(&group->fanotify_data.access_lock); |
389 | 350 | ||
390 | atomic_inc(&group->fanotify_data.bypass_perm); | 351 | atomic_inc(&group->fanotify_data.bypass_perm); |
391 | 352 | ||
392 | list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) { | 353 | list_for_each_entry_safe(event, next, &group->fanotify_data.access_list, |
393 | pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group, | 354 | fae.fse.list) { |
394 | re, re->event); | 355 | pr_debug("%s: found group=%p event=%p\n", __func__, group, |
356 | event); | ||
395 | 357 | ||
396 | list_del_init(&re->list); | 358 | list_del_init(&event->fae.fse.list); |
397 | re->event->response = FAN_ALLOW; | 359 | event->response = FAN_ALLOW; |
398 | |||
399 | kmem_cache_free(fanotify_response_event_cache, re); | ||
400 | } | 360 | } |
401 | mutex_unlock(&group->fanotify_data.access_mutex); | 361 | spin_unlock(&group->fanotify_data.access_lock); |
402 | 362 | ||
403 | wake_up(&group->fanotify_data.access_waitq); | 363 | wake_up(&group->fanotify_data.access_waitq); |
404 | #endif | 364 | #endif |
@@ -731,21 +691,16 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) | |||
731 | group->fanotify_data.user = user; | 691 | group->fanotify_data.user = user; |
732 | atomic_inc(&user->fanotify_listeners); | 692 | atomic_inc(&user->fanotify_listeners); |
733 | 693 | ||
734 | oevent = kmem_cache_alloc(fanotify_event_cachep, GFP_KERNEL); | 694 | oevent = fanotify_alloc_event(NULL, FS_Q_OVERFLOW, NULL); |
735 | if (unlikely(!oevent)) { | 695 | if (unlikely(!oevent)) { |
736 | fd = -ENOMEM; | 696 | fd = -ENOMEM; |
737 | goto out_destroy_group; | 697 | goto out_destroy_group; |
738 | } | 698 | } |
739 | group->overflow_event = &oevent->fse; | 699 | group->overflow_event = &oevent->fse; |
740 | fsnotify_init_event(group->overflow_event, NULL, FS_Q_OVERFLOW); | ||
741 | oevent->tgid = get_pid(task_tgid(current)); | ||
742 | oevent->path.mnt = NULL; | ||
743 | oevent->path.dentry = NULL; | ||
744 | 700 | ||
745 | group->fanotify_data.f_flags = event_f_flags; | 701 | group->fanotify_data.f_flags = event_f_flags; |
746 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | 702 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS |
747 | oevent->response = 0; | 703 | spin_lock_init(&group->fanotify_data.access_lock); |
748 | mutex_init(&group->fanotify_data.access_mutex); | ||
749 | init_waitqueue_head(&group->fanotify_data.access_waitq); | 704 | init_waitqueue_head(&group->fanotify_data.access_waitq); |
750 | INIT_LIST_HEAD(&group->fanotify_data.access_list); | 705 | INIT_LIST_HEAD(&group->fanotify_data.access_list); |
751 | atomic_set(&group->fanotify_data.bypass_perm, 0); | 706 | atomic_set(&group->fanotify_data.bypass_perm, 0); |
@@ -920,9 +875,11 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, | |||
920 | static int __init fanotify_user_setup(void) | 875 | static int __init fanotify_user_setup(void) |
921 | { | 876 | { |
922 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); | 877 | fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC); |
923 | fanotify_response_event_cache = KMEM_CACHE(fanotify_response_event, | ||
924 | SLAB_PANIC); | ||
925 | fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); | 878 | fanotify_event_cachep = KMEM_CACHE(fanotify_event_info, SLAB_PANIC); |
879 | #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS | ||
880 | fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event_info, | ||
881 | SLAB_PANIC); | ||
882 | #endif | ||
926 | 883 | ||
927 | return 0; | 884 | return 0; |
928 | } | 885 | } |
diff --git a/fs/ntfs/debug.c b/fs/ntfs/debug.c index 807150e2c2b9..dd6103cc93c1 100644 --- a/fs/ntfs/debug.c +++ b/fs/ntfs/debug.c | |||
@@ -18,16 +18,9 @@ | |||
18 | * distribution in the file COPYING); if not, write to the Free Software | 18 | * distribution in the file COPYING); if not, write to the Free Software |
19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 19 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
20 | */ | 20 | */ |
21 | 21 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | |
22 | #include "debug.h" | 22 | #include "debug.h" |
23 | 23 | ||
24 | /* | ||
25 | * A static buffer to hold the error string being displayed and a spinlock | ||
26 | * to protect concurrent accesses to it. | ||
27 | */ | ||
28 | static char err_buf[1024]; | ||
29 | static DEFINE_SPINLOCK(err_buf_lock); | ||
30 | |||
31 | /** | 24 | /** |
32 | * __ntfs_warning - output a warning to the syslog | 25 | * __ntfs_warning - output a warning to the syslog |
33 | * @function: name of function outputting the warning | 26 | * @function: name of function outputting the warning |
@@ -50,6 +43,7 @@ static DEFINE_SPINLOCK(err_buf_lock); | |||
50 | void __ntfs_warning(const char *function, const struct super_block *sb, | 43 | void __ntfs_warning(const char *function, const struct super_block *sb, |
51 | const char *fmt, ...) | 44 | const char *fmt, ...) |
52 | { | 45 | { |
46 | struct va_format vaf; | ||
53 | va_list args; | 47 | va_list args; |
54 | int flen = 0; | 48 | int flen = 0; |
55 | 49 | ||
@@ -59,17 +53,15 @@ void __ntfs_warning(const char *function, const struct super_block *sb, | |||
59 | #endif | 53 | #endif |
60 | if (function) | 54 | if (function) |
61 | flen = strlen(function); | 55 | flen = strlen(function); |
62 | spin_lock(&err_buf_lock); | ||
63 | va_start(args, fmt); | 56 | va_start(args, fmt); |
64 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | 57 | vaf.fmt = fmt; |
65 | va_end(args); | 58 | vaf.va = &args; |
66 | if (sb) | 59 | if (sb) |
67 | printk(KERN_ERR "NTFS-fs warning (device %s): %s(): %s\n", | 60 | pr_warn("(device %s): %s(): %pV\n", |
68 | sb->s_id, flen ? function : "", err_buf); | 61 | sb->s_id, flen ? function : "", &vaf); |
69 | else | 62 | else |
70 | printk(KERN_ERR "NTFS-fs warning: %s(): %s\n", | 63 | pr_warn("%s(): %pV\n", flen ? function : "", &vaf); |
71 | flen ? function : "", err_buf); | 64 | va_end(args); |
72 | spin_unlock(&err_buf_lock); | ||
73 | } | 65 | } |
74 | 66 | ||
75 | /** | 67 | /** |
@@ -94,6 +86,7 @@ void __ntfs_warning(const char *function, const struct super_block *sb, | |||
94 | void __ntfs_error(const char *function, const struct super_block *sb, | 86 | void __ntfs_error(const char *function, const struct super_block *sb, |
95 | const char *fmt, ...) | 87 | const char *fmt, ...) |
96 | { | 88 | { |
89 | struct va_format vaf; | ||
97 | va_list args; | 90 | va_list args; |
98 | int flen = 0; | 91 | int flen = 0; |
99 | 92 | ||
@@ -103,17 +96,15 @@ void __ntfs_error(const char *function, const struct super_block *sb, | |||
103 | #endif | 96 | #endif |
104 | if (function) | 97 | if (function) |
105 | flen = strlen(function); | 98 | flen = strlen(function); |
106 | spin_lock(&err_buf_lock); | ||
107 | va_start(args, fmt); | 99 | va_start(args, fmt); |
108 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | 100 | vaf.fmt = fmt; |
109 | va_end(args); | 101 | vaf.va = &args; |
110 | if (sb) | 102 | if (sb) |
111 | printk(KERN_ERR "NTFS-fs error (device %s): %s(): %s\n", | 103 | pr_err("(device %s): %s(): %pV\n", |
112 | sb->s_id, flen ? function : "", err_buf); | 104 | sb->s_id, flen ? function : "", &vaf); |
113 | else | 105 | else |
114 | printk(KERN_ERR "NTFS-fs error: %s(): %s\n", | 106 | pr_err("%s(): %pV\n", flen ? function : "", &vaf); |
115 | flen ? function : "", err_buf); | 107 | va_end(args); |
116 | spin_unlock(&err_buf_lock); | ||
117 | } | 108 | } |
118 | 109 | ||
119 | #ifdef DEBUG | 110 | #ifdef DEBUG |
@@ -124,6 +115,7 @@ int debug_msgs = 0; | |||
124 | void __ntfs_debug (const char *file, int line, const char *function, | 115 | void __ntfs_debug (const char *file, int line, const char *function, |
125 | const char *fmt, ...) | 116 | const char *fmt, ...) |
126 | { | 117 | { |
118 | struct va_format vaf; | ||
127 | va_list args; | 119 | va_list args; |
128 | int flen = 0; | 120 | int flen = 0; |
129 | 121 | ||
@@ -131,13 +123,11 @@ void __ntfs_debug (const char *file, int line, const char *function, | |||
131 | return; | 123 | return; |
132 | if (function) | 124 | if (function) |
133 | flen = strlen(function); | 125 | flen = strlen(function); |
134 | spin_lock(&err_buf_lock); | ||
135 | va_start(args, fmt); | 126 | va_start(args, fmt); |
136 | vsnprintf(err_buf, sizeof(err_buf), fmt, args); | 127 | vaf.fmt = fmt; |
128 | vaf.va = &args; | ||
129 | pr_debug("(%s, %d): %s(): %pV", file, line, flen ? function : "", &vaf); | ||
137 | va_end(args); | 130 | va_end(args); |
138 | printk(KERN_DEBUG "NTFS-fs DEBUG (%s, %d): %s(): %s\n", file, line, | ||
139 | flen ? function : "", err_buf); | ||
140 | spin_unlock(&err_buf_lock); | ||
141 | } | 131 | } |
142 | 132 | ||
143 | /* Dump a runlist. Caller has to provide synchronisation for @rl. */ | 133 | /* Dump a runlist. Caller has to provide synchronisation for @rl. */ |
@@ -149,12 +139,12 @@ void ntfs_debug_dump_runlist(const runlist_element *rl) | |||
149 | 139 | ||
150 | if (!debug_msgs) | 140 | if (!debug_msgs) |
151 | return; | 141 | return; |
152 | printk(KERN_DEBUG "NTFS-fs DEBUG: Dumping runlist (values in hex):\n"); | 142 | pr_debug("Dumping runlist (values in hex):\n"); |
153 | if (!rl) { | 143 | if (!rl) { |
154 | printk(KERN_DEBUG "Run list not present.\n"); | 144 | pr_debug("Run list not present.\n"); |
155 | return; | 145 | return; |
156 | } | 146 | } |
157 | printk(KERN_DEBUG "VCN LCN Run length\n"); | 147 | pr_debug("VCN LCN Run length\n"); |
158 | for (i = 0; ; i++) { | 148 | for (i = 0; ; i++) { |
159 | LCN lcn = (rl + i)->lcn; | 149 | LCN lcn = (rl + i)->lcn; |
160 | 150 | ||
@@ -163,13 +153,13 @@ void ntfs_debug_dump_runlist(const runlist_element *rl) | |||
163 | 153 | ||
164 | if (index > -LCN_ENOENT - 1) | 154 | if (index > -LCN_ENOENT - 1) |
165 | index = 3; | 155 | index = 3; |
166 | printk(KERN_DEBUG "%-16Lx %s %-16Lx%s\n", | 156 | pr_debug("%-16Lx %s %-16Lx%s\n", |
167 | (long long)(rl + i)->vcn, lcn_str[index], | 157 | (long long)(rl + i)->vcn, lcn_str[index], |
168 | (long long)(rl + i)->length, | 158 | (long long)(rl + i)->length, |
169 | (rl + i)->length ? "" : | 159 | (rl + i)->length ? "" : |
170 | " (runlist end)"); | 160 | " (runlist end)"); |
171 | } else | 161 | } else |
172 | printk(KERN_DEBUG "%-16Lx %-16Lx %-16Lx%s\n", | 162 | pr_debug("%-16Lx %-16Lx %-16Lx%s\n", |
173 | (long long)(rl + i)->vcn, | 163 | (long long)(rl + i)->vcn, |
174 | (long long)(rl + i)->lcn, | 164 | (long long)(rl + i)->lcn, |
175 | (long long)(rl + i)->length, | 165 | (long long)(rl + i)->length, |
diff --git a/fs/ntfs/debug.h b/fs/ntfs/debug.h index 53c27eaf2307..61bf091e32a8 100644 --- a/fs/ntfs/debug.h +++ b/fs/ntfs/debug.h | |||
@@ -48,7 +48,12 @@ extern void ntfs_debug_dump_runlist(const runlist_element *rl); | |||
48 | 48 | ||
49 | #else /* !DEBUG */ | 49 | #else /* !DEBUG */ |
50 | 50 | ||
51 | #define ntfs_debug(f, a...) do {} while (0) | 51 | #define ntfs_debug(fmt, ...) \ |
52 | do { \ | ||
53 | if (0) \ | ||
54 | no_printk(fmt, ##__VA_ARGS__); \ | ||
55 | } while (0) | ||
56 | |||
52 | #define ntfs_debug_dump_runlist(rl) do {} while (0) | 57 | #define ntfs_debug_dump_runlist(rl) do {} while (0) |
53 | 58 | ||
54 | #endif /* !DEBUG */ | 59 | #endif /* !DEBUG */ |
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c index ffb9b3675736..9d8153ebacfb 100644 --- a/fs/ntfs/inode.c +++ b/fs/ntfs/inode.c | |||
@@ -2259,7 +2259,7 @@ void ntfs_evict_big_inode(struct inode *vi) | |||
2259 | { | 2259 | { |
2260 | ntfs_inode *ni = NTFS_I(vi); | 2260 | ntfs_inode *ni = NTFS_I(vi); |
2261 | 2261 | ||
2262 | truncate_inode_pages(&vi->i_data, 0); | 2262 | truncate_inode_pages_final(&vi->i_data); |
2263 | clear_inode(vi); | 2263 | clear_inode(vi); |
2264 | 2264 | ||
2265 | #ifdef NTFS_RW | 2265 | #ifdef NTFS_RW |
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index 82650d52d916..9de2491f2926 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c | |||
@@ -19,6 +19,7 @@ | |||
19 | * distribution in the file COPYING); if not, write to the Free Software | 19 | * distribution in the file COPYING); if not, write to the Free Software |
20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 20 | * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 | */ | 21 | */ |
22 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
22 | 23 | ||
23 | #include <linux/stddef.h> | 24 | #include <linux/stddef.h> |
24 | #include <linux/init.h> | 25 | #include <linux/init.h> |
@@ -468,6 +469,8 @@ static int ntfs_remount(struct super_block *sb, int *flags, char *opt) | |||
468 | 469 | ||
469 | ntfs_debug("Entering with remount options string: %s", opt); | 470 | ntfs_debug("Entering with remount options string: %s", opt); |
470 | 471 | ||
472 | sync_filesystem(sb); | ||
473 | |||
471 | #ifndef NTFS_RW | 474 | #ifndef NTFS_RW |
472 | /* For read-only compiled driver, enforce read-only flag. */ | 475 | /* For read-only compiled driver, enforce read-only flag. */ |
473 | *flags |= MS_RDONLY; | 476 | *flags |= MS_RDONLY; |
@@ -1894,7 +1897,7 @@ get_ctx_vol_failed: | |||
1894 | vol->minor_ver = vi->minor_ver; | 1897 | vol->minor_ver = vi->minor_ver; |
1895 | ntfs_attr_put_search_ctx(ctx); | 1898 | ntfs_attr_put_search_ctx(ctx); |
1896 | unmap_mft_record(NTFS_I(vol->vol_ino)); | 1899 | unmap_mft_record(NTFS_I(vol->vol_ino)); |
1897 | printk(KERN_INFO "NTFS volume version %i.%i.\n", vol->major_ver, | 1900 | pr_info("volume version %i.%i.\n", vol->major_ver, |
1898 | vol->minor_ver); | 1901 | vol->minor_ver); |
1899 | if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { | 1902 | if (vol->major_ver < 3 && NVolSparseEnabled(vol)) { |
1900 | ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " | 1903 | ntfs_warning(vol->sb, "Disabling sparse support due to NTFS " |
@@ -3093,7 +3096,7 @@ static int __init init_ntfs_fs(void) | |||
3093 | int err = 0; | 3096 | int err = 0; |
3094 | 3097 | ||
3095 | /* This may be ugly but it results in pretty output so who cares. (-8 */ | 3098 | /* This may be ugly but it results in pretty output so who cares. (-8 */ |
3096 | printk(KERN_INFO "NTFS driver " NTFS_VERSION " [Flags: R/" | 3099 | pr_info("driver " NTFS_VERSION " [Flags: R/" |
3097 | #ifdef NTFS_RW | 3100 | #ifdef NTFS_RW |
3098 | "W" | 3101 | "W" |
3099 | #else | 3102 | #else |
@@ -3113,16 +3116,15 @@ static int __init init_ntfs_fs(void) | |||
3113 | sizeof(ntfs_index_context), 0 /* offset */, | 3116 | sizeof(ntfs_index_context), 0 /* offset */, |
3114 | SLAB_HWCACHE_ALIGN, NULL /* ctor */); | 3117 | SLAB_HWCACHE_ALIGN, NULL /* ctor */); |
3115 | if (!ntfs_index_ctx_cache) { | 3118 | if (!ntfs_index_ctx_cache) { |
3116 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | 3119 | pr_crit("Failed to create %s!\n", ntfs_index_ctx_cache_name); |
3117 | ntfs_index_ctx_cache_name); | ||
3118 | goto ictx_err_out; | 3120 | goto ictx_err_out; |
3119 | } | 3121 | } |
3120 | ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, | 3122 | ntfs_attr_ctx_cache = kmem_cache_create(ntfs_attr_ctx_cache_name, |
3121 | sizeof(ntfs_attr_search_ctx), 0 /* offset */, | 3123 | sizeof(ntfs_attr_search_ctx), 0 /* offset */, |
3122 | SLAB_HWCACHE_ALIGN, NULL /* ctor */); | 3124 | SLAB_HWCACHE_ALIGN, NULL /* ctor */); |
3123 | if (!ntfs_attr_ctx_cache) { | 3125 | if (!ntfs_attr_ctx_cache) { |
3124 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | 3126 | pr_crit("NTFS: Failed to create %s!\n", |
3125 | ntfs_attr_ctx_cache_name); | 3127 | ntfs_attr_ctx_cache_name); |
3126 | goto actx_err_out; | 3128 | goto actx_err_out; |
3127 | } | 3129 | } |
3128 | 3130 | ||
@@ -3130,8 +3132,7 @@ static int __init init_ntfs_fs(void) | |||
3130 | (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, | 3132 | (NTFS_MAX_NAME_LEN+1) * sizeof(ntfschar), 0, |
3131 | SLAB_HWCACHE_ALIGN, NULL); | 3133 | SLAB_HWCACHE_ALIGN, NULL); |
3132 | if (!ntfs_name_cache) { | 3134 | if (!ntfs_name_cache) { |
3133 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | 3135 | pr_crit("Failed to create %s!\n", ntfs_name_cache_name); |
3134 | ntfs_name_cache_name); | ||
3135 | goto name_err_out; | 3136 | goto name_err_out; |
3136 | } | 3137 | } |
3137 | 3138 | ||
@@ -3139,8 +3140,7 @@ static int __init init_ntfs_fs(void) | |||
3139 | sizeof(ntfs_inode), 0, | 3140 | sizeof(ntfs_inode), 0, |
3140 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); | 3141 | SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL); |
3141 | if (!ntfs_inode_cache) { | 3142 | if (!ntfs_inode_cache) { |
3142 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | 3143 | pr_crit("Failed to create %s!\n", ntfs_inode_cache_name); |
3143 | ntfs_inode_cache_name); | ||
3144 | goto inode_err_out; | 3144 | goto inode_err_out; |
3145 | } | 3145 | } |
3146 | 3146 | ||
@@ -3149,15 +3149,14 @@ static int __init init_ntfs_fs(void) | |||
3149 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, | 3149 | SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, |
3150 | ntfs_big_inode_init_once); | 3150 | ntfs_big_inode_init_once); |
3151 | if (!ntfs_big_inode_cache) { | 3151 | if (!ntfs_big_inode_cache) { |
3152 | printk(KERN_CRIT "NTFS: Failed to create %s!\n", | 3152 | pr_crit("Failed to create %s!\n", ntfs_big_inode_cache_name); |
3153 | ntfs_big_inode_cache_name); | ||
3154 | goto big_inode_err_out; | 3153 | goto big_inode_err_out; |
3155 | } | 3154 | } |
3156 | 3155 | ||
3157 | /* Register the ntfs sysctls. */ | 3156 | /* Register the ntfs sysctls. */ |
3158 | err = ntfs_sysctl(1); | 3157 | err = ntfs_sysctl(1); |
3159 | if (err) { | 3158 | if (err) { |
3160 | printk(KERN_CRIT "NTFS: Failed to register NTFS sysctls!\n"); | 3159 | pr_crit("Failed to register NTFS sysctls!\n"); |
3161 | goto sysctl_err_out; | 3160 | goto sysctl_err_out; |
3162 | } | 3161 | } |
3163 | 3162 | ||
@@ -3166,7 +3165,7 @@ static int __init init_ntfs_fs(void) | |||
3166 | ntfs_debug("NTFS driver registered successfully."); | 3165 | ntfs_debug("NTFS driver registered successfully."); |
3167 | return 0; /* Success! */ | 3166 | return 0; /* Success! */ |
3168 | } | 3167 | } |
3169 | printk(KERN_CRIT "NTFS: Failed to register NTFS filesystem driver!\n"); | 3168 | pr_crit("Failed to register NTFS filesystem driver!\n"); |
3170 | 3169 | ||
3171 | /* Unregister the ntfs sysctls. */ | 3170 | /* Unregister the ntfs sysctls. */ |
3172 | ntfs_sysctl(0); | 3171 | ntfs_sysctl(0); |
@@ -3182,8 +3181,7 @@ actx_err_out: | |||
3182 | kmem_cache_destroy(ntfs_index_ctx_cache); | 3181 | kmem_cache_destroy(ntfs_index_ctx_cache); |
3183 | ictx_err_out: | 3182 | ictx_err_out: |
3184 | if (!err) { | 3183 | if (!err) { |
3185 | printk(KERN_CRIT "NTFS: Aborting NTFS filesystem driver " | 3184 | pr_crit("Aborting NTFS filesystem driver registration...\n"); |
3186 | "registration...\n"); | ||
3187 | err = -ENOMEM; | 3185 | err = -ENOMEM; |
3188 | } | 3186 | } |
3189 | return err; | 3187 | return err; |
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 555f4cddefe3..7e8282dcea2a 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c | |||
@@ -205,6 +205,7 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, | |||
205 | di->i_mode = cpu_to_le16(inode->i_mode); | 205 | di->i_mode = cpu_to_le16(inode->i_mode); |
206 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 206 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
207 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 207 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
208 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
208 | 209 | ||
209 | ocfs2_journal_dirty(handle, di_bh); | 210 | ocfs2_journal_dirty(handle, di_bh); |
210 | 211 | ||
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index e2edff38be52..b4deb5f750d9 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
@@ -5728,6 +5728,7 @@ int ocfs2_remove_btree_range(struct inode *inode, | |||
5728 | } | 5728 | } |
5729 | 5729 | ||
5730 | ocfs2_et_update_clusters(et, -len); | 5730 | ocfs2_et_update_clusters(et, -len); |
5731 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
5731 | 5732 | ||
5732 | ocfs2_journal_dirty(handle, et->et_root_bh); | 5733 | ocfs2_journal_dirty(handle, et->et_root_bh); |
5733 | 5734 | ||
@@ -6932,6 +6933,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
6932 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | 6933 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); |
6933 | spin_unlock(&oi->ip_lock); | 6934 | spin_unlock(&oi->ip_lock); |
6934 | 6935 | ||
6936 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
6935 | ocfs2_dinode_new_extent_list(inode, di); | 6937 | ocfs2_dinode_new_extent_list(inode, di); |
6936 | 6938 | ||
6937 | ocfs2_journal_dirty(handle, di_bh); | 6939 | ocfs2_journal_dirty(handle, di_bh); |
@@ -7208,6 +7210,7 @@ int ocfs2_truncate_inline(struct inode *inode, struct buffer_head *di_bh, | |||
7208 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 7210 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
7209 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 7211 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
7210 | 7212 | ||
7213 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
7211 | ocfs2_journal_dirty(handle, di_bh); | 7214 | ocfs2_journal_dirty(handle, di_bh); |
7212 | 7215 | ||
7213 | out_commit: | 7216 | out_commit: |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index aeb44e879c51..d310d12a9adc 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
@@ -571,7 +571,6 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
571 | { | 571 | { |
572 | struct inode *inode = file_inode(iocb->ki_filp); | 572 | struct inode *inode = file_inode(iocb->ki_filp); |
573 | int level; | 573 | int level; |
574 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
575 | 574 | ||
576 | /* this io's submitter should not have unlocked this before we could */ | 575 | /* this io's submitter should not have unlocked this before we could */ |
577 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); | 576 | BUG_ON(!ocfs2_iocb_is_rw_locked(iocb)); |
@@ -582,10 +581,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
582 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { | 581 | if (ocfs2_iocb_is_unaligned_aio(iocb)) { |
583 | ocfs2_iocb_clear_unaligned_aio(iocb); | 582 | ocfs2_iocb_clear_unaligned_aio(iocb); |
584 | 583 | ||
585 | if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) && | 584 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); |
586 | waitqueue_active(wq)) { | ||
587 | wake_up_all(wq); | ||
588 | } | ||
589 | } | 585 | } |
590 | 586 | ||
591 | ocfs2_iocb_clear_rw_locked(iocb); | 587 | ocfs2_iocb_clear_rw_locked(iocb); |
@@ -2043,6 +2039,7 @@ out_write_size: | |||
2043 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 2039 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
2044 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); | 2040 | di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec); |
2045 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 2041 | di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
2042 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
2046 | ocfs2_journal_dirty(handle, wc->w_di_bh); | 2043 | ocfs2_journal_dirty(handle, wc->w_di_bh); |
2047 | 2044 | ||
2048 | ocfs2_commit_trans(osb, handle); | 2045 | ocfs2_commit_trans(osb, handle); |
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index f671e49beb34..6cae155d54df 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h | |||
@@ -102,9 +102,4 @@ enum ocfs2_iocb_lock_bits { | |||
102 | #define ocfs2_iocb_is_unaligned_aio(iocb) \ | 102 | #define ocfs2_iocb_is_unaligned_aio(iocb) \ |
103 | test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) | 103 | test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private) |
104 | 104 | ||
105 | #define OCFS2_IOEND_WQ_HASH_SZ 37 | ||
106 | #define ocfs2_ioend_wq(v) (&ocfs2__ioend_wq[((unsigned long)(v)) %\ | ||
107 | OCFS2_IOEND_WQ_HASH_SZ]) | ||
108 | extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
109 | |||
110 | #endif /* OCFS2_FILE_H */ | 105 | #endif /* OCFS2_FILE_H */ |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index 5b704c63a103..1edcb141f639 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
90 | * information for this bh as it's not marked locally | 90 | * information for this bh as it's not marked locally |
91 | * uptodate. */ | 91 | * uptodate. */ |
92 | ret = -EIO; | 92 | ret = -EIO; |
93 | put_bh(bh); | ||
94 | mlog_errno(ret); | 93 | mlog_errno(ret); |
95 | } | 94 | } |
96 | 95 | ||
@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | |||
420 | 419 | ||
421 | if (!buffer_uptodate(bh)) { | 420 | if (!buffer_uptodate(bh)) { |
422 | ret = -EIO; | 421 | ret = -EIO; |
423 | put_bh(bh); | ||
424 | mlog_errno(ret); | 422 | mlog_errno(ret); |
425 | } | 423 | } |
426 | 424 | ||
diff --git a/fs/ocfs2/cluster/sys.c b/fs/ocfs2/cluster/sys.c index a4b07730b2e1..b7f57271d49c 100644 --- a/fs/ocfs2/cluster/sys.c +++ b/fs/ocfs2/cluster/sys.c | |||
@@ -41,7 +41,7 @@ static ssize_t version_show(struct kobject *kobj, struct kobj_attribute *attr, | |||
41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); | 41 | return snprintf(buf, PAGE_SIZE, "%u\n", O2NM_API_VERSION); |
42 | } | 42 | } |
43 | static struct kobj_attribute attr_version = | 43 | static struct kobj_attribute attr_version = |
44 | __ATTR(interface_revision, S_IFREG | S_IRUGO, version_show, NULL); | 44 | __ATTR(interface_revision, S_IRUGO, version_show, NULL); |
45 | 45 | ||
46 | static struct attribute *o2cb_attrs[] = { | 46 | static struct attribute *o2cb_attrs[] = { |
47 | &attr_version.attr, | 47 | &attr_version.attr, |
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 2cd2406b4140..eb649d23a4de 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c | |||
@@ -262,17 +262,17 @@ static void o2net_update_recv_stats(struct o2net_sock_container *sc) | |||
262 | 262 | ||
263 | #endif /* CONFIG_OCFS2_FS_STATS */ | 263 | #endif /* CONFIG_OCFS2_FS_STATS */ |
264 | 264 | ||
265 | static inline int o2net_reconnect_delay(void) | 265 | static inline unsigned int o2net_reconnect_delay(void) |
266 | { | 266 | { |
267 | return o2nm_single_cluster->cl_reconnect_delay_ms; | 267 | return o2nm_single_cluster->cl_reconnect_delay_ms; |
268 | } | 268 | } |
269 | 269 | ||
270 | static inline int o2net_keepalive_delay(void) | 270 | static inline unsigned int o2net_keepalive_delay(void) |
271 | { | 271 | { |
272 | return o2nm_single_cluster->cl_keepalive_delay_ms; | 272 | return o2nm_single_cluster->cl_keepalive_delay_ms; |
273 | } | 273 | } |
274 | 274 | ||
275 | static inline int o2net_idle_timeout(void) | 275 | static inline unsigned int o2net_idle_timeout(void) |
276 | { | 276 | { |
277 | return o2nm_single_cluster->cl_idle_timeout_ms; | 277 | return o2nm_single_cluster->cl_idle_timeout_ms; |
278 | } | 278 | } |
@@ -1964,18 +1964,30 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes) | |||
1964 | goto out; | 1964 | goto out; |
1965 | } | 1965 | } |
1966 | 1966 | ||
1967 | /* ->sk_data_ready is also called for a newly established child socket | 1967 | /* This callback may called twice when a new connection |
1968 | * before it has been accepted and the acceptor has set up their | 1968 | * is being established as a child socket inherits everything |
1969 | * data_ready.. we only want to queue listen work for our listening | 1969 | * from a parent LISTEN socket, including the data_ready cb of |
1970 | * socket */ | 1970 | * the parent. This leads to a hazard. In o2net_accept_one() |
1971 | * we are still initializing the child socket but have not | ||
1972 | * changed the inherited data_ready callback yet when | ||
1973 | * data starts arriving. | ||
1974 | * We avoid this hazard by checking the state. | ||
1975 | * For the listening socket, the state will be TCP_LISTEN; for the new | ||
1976 | * socket, will be TCP_ESTABLISHED. Also, in this case, | ||
1977 | * sk->sk_user_data is not a valid function pointer. | ||
1978 | */ | ||
1979 | |||
1971 | if (sk->sk_state == TCP_LISTEN) { | 1980 | if (sk->sk_state == TCP_LISTEN) { |
1972 | mlog(ML_TCP, "bytes: %d\n", bytes); | 1981 | mlog(ML_TCP, "bytes: %d\n", bytes); |
1973 | queue_work(o2net_wq, &o2net_listen_work); | 1982 | queue_work(o2net_wq, &o2net_listen_work); |
1983 | } else { | ||
1984 | ready = NULL; | ||
1974 | } | 1985 | } |
1975 | 1986 | ||
1976 | out: | 1987 | out: |
1977 | read_unlock(&sk->sk_callback_lock); | 1988 | read_unlock(&sk->sk_callback_lock); |
1978 | ready(sk, bytes); | 1989 | if (ready != NULL) |
1990 | ready(sk, bytes); | ||
1979 | } | 1991 | } |
1980 | 1992 | ||
1981 | static int o2net_open_listening_sock(__be32 addr, __be16 port) | 1993 | static int o2net_open_listening_sock(__be32 addr, __be16 port) |
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c index 0d3a97d2d5f6..e2e05a106beb 100644 --- a/fs/ocfs2/dcache.c +++ b/fs/ocfs2/dcache.c | |||
@@ -37,7 +37,6 @@ | |||
37 | #include "dlmglue.h" | 37 | #include "dlmglue.h" |
38 | #include "file.h" | 38 | #include "file.h" |
39 | #include "inode.h" | 39 | #include "inode.h" |
40 | #include "super.h" | ||
41 | #include "ocfs2_trace.h" | 40 | #include "ocfs2_trace.h" |
42 | 41 | ||
43 | void ocfs2_dentry_attach_gen(struct dentry *dentry) | 42 | void ocfs2_dentry_attach_gen(struct dentry *dentry) |
@@ -346,52 +345,6 @@ out_attach: | |||
346 | return ret; | 345 | return ret; |
347 | } | 346 | } |
348 | 347 | ||
349 | DEFINE_SPINLOCK(dentry_list_lock); | ||
350 | |||
351 | /* We limit the number of dentry locks to drop in one go. We have | ||
352 | * this limit so that we don't starve other users of ocfs2_wq. */ | ||
353 | #define DL_INODE_DROP_COUNT 64 | ||
354 | |||
355 | /* Drop inode references from dentry locks */ | ||
356 | static void __ocfs2_drop_dl_inodes(struct ocfs2_super *osb, int drop_count) | ||
357 | { | ||
358 | struct ocfs2_dentry_lock *dl; | ||
359 | |||
360 | spin_lock(&dentry_list_lock); | ||
361 | while (osb->dentry_lock_list && (drop_count < 0 || drop_count--)) { | ||
362 | dl = osb->dentry_lock_list; | ||
363 | osb->dentry_lock_list = dl->dl_next; | ||
364 | spin_unlock(&dentry_list_lock); | ||
365 | iput(dl->dl_inode); | ||
366 | kfree(dl); | ||
367 | spin_lock(&dentry_list_lock); | ||
368 | } | ||
369 | spin_unlock(&dentry_list_lock); | ||
370 | } | ||
371 | |||
372 | void ocfs2_drop_dl_inodes(struct work_struct *work) | ||
373 | { | ||
374 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
375 | dentry_lock_work); | ||
376 | |||
377 | __ocfs2_drop_dl_inodes(osb, DL_INODE_DROP_COUNT); | ||
378 | /* | ||
379 | * Don't queue dropping if umount is in progress. We flush the | ||
380 | * list in ocfs2_dismount_volume | ||
381 | */ | ||
382 | spin_lock(&dentry_list_lock); | ||
383 | if (osb->dentry_lock_list && | ||
384 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
385 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | ||
386 | spin_unlock(&dentry_list_lock); | ||
387 | } | ||
388 | |||
389 | /* Flush the whole work queue */ | ||
390 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) | ||
391 | { | ||
392 | __ocfs2_drop_dl_inodes(osb, -1); | ||
393 | } | ||
394 | |||
395 | /* | 348 | /* |
396 | * ocfs2_dentry_iput() and friends. | 349 | * ocfs2_dentry_iput() and friends. |
397 | * | 350 | * |
@@ -416,24 +369,16 @@ void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb) | |||
416 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, | 369 | static void ocfs2_drop_dentry_lock(struct ocfs2_super *osb, |
417 | struct ocfs2_dentry_lock *dl) | 370 | struct ocfs2_dentry_lock *dl) |
418 | { | 371 | { |
372 | iput(dl->dl_inode); | ||
419 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); | 373 | ocfs2_simple_drop_lockres(osb, &dl->dl_lockres); |
420 | ocfs2_lock_res_free(&dl->dl_lockres); | 374 | ocfs2_lock_res_free(&dl->dl_lockres); |
421 | 375 | kfree(dl); | |
422 | /* We leave dropping of inode reference to ocfs2_wq as that can | ||
423 | * possibly lead to inode deletion which gets tricky */ | ||
424 | spin_lock(&dentry_list_lock); | ||
425 | if (!osb->dentry_lock_list && | ||
426 | !ocfs2_test_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED)) | ||
427 | queue_work(ocfs2_wq, &osb->dentry_lock_work); | ||
428 | dl->dl_next = osb->dentry_lock_list; | ||
429 | osb->dentry_lock_list = dl; | ||
430 | spin_unlock(&dentry_list_lock); | ||
431 | } | 376 | } |
432 | 377 | ||
433 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | 378 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, |
434 | struct ocfs2_dentry_lock *dl) | 379 | struct ocfs2_dentry_lock *dl) |
435 | { | 380 | { |
436 | int unlock; | 381 | int unlock = 0; |
437 | 382 | ||
438 | BUG_ON(dl->dl_count == 0); | 383 | BUG_ON(dl->dl_count == 0); |
439 | 384 | ||
diff --git a/fs/ocfs2/dcache.h b/fs/ocfs2/dcache.h index b79eff709958..55f58892b153 100644 --- a/fs/ocfs2/dcache.h +++ b/fs/ocfs2/dcache.h | |||
@@ -29,13 +29,8 @@ | |||
29 | extern const struct dentry_operations ocfs2_dentry_ops; | 29 | extern const struct dentry_operations ocfs2_dentry_ops; |
30 | 30 | ||
31 | struct ocfs2_dentry_lock { | 31 | struct ocfs2_dentry_lock { |
32 | /* Use count of dentry lock */ | ||
33 | unsigned int dl_count; | 32 | unsigned int dl_count; |
34 | union { | 33 | u64 dl_parent_blkno; |
35 | /* Linked list of dentry locks to release */ | ||
36 | struct ocfs2_dentry_lock *dl_next; | ||
37 | u64 dl_parent_blkno; | ||
38 | }; | ||
39 | 34 | ||
40 | /* | 35 | /* |
41 | * The ocfs2_dentry_lock keeps an inode reference until | 36 | * The ocfs2_dentry_lock keeps an inode reference until |
@@ -49,14 +44,9 @@ struct ocfs2_dentry_lock { | |||
49 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, | 44 | int ocfs2_dentry_attach_lock(struct dentry *dentry, struct inode *inode, |
50 | u64 parent_blkno); | 45 | u64 parent_blkno); |
51 | 46 | ||
52 | extern spinlock_t dentry_list_lock; | ||
53 | |||
54 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, | 47 | void ocfs2_dentry_lock_put(struct ocfs2_super *osb, |
55 | struct ocfs2_dentry_lock *dl); | 48 | struct ocfs2_dentry_lock *dl); |
56 | 49 | ||
57 | void ocfs2_drop_dl_inodes(struct work_struct *work); | ||
58 | void ocfs2_drop_all_dl_inodes(struct ocfs2_super *osb); | ||
59 | |||
60 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, | 50 | struct dentry *ocfs2_find_local_alias(struct inode *inode, u64 parent_blkno, |
61 | int skip_unhashed); | 51 | int skip_unhashed); |
62 | 52 | ||
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 91a7e85ac8fd..0717662b4aef 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
@@ -2957,6 +2957,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
2957 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); | 2957 | ocfs2_init_dir_trailer(dir, dirdata_bh, i); |
2958 | } | 2958 | } |
2959 | 2959 | ||
2960 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
2960 | ocfs2_journal_dirty(handle, dirdata_bh); | 2961 | ocfs2_journal_dirty(handle, dirdata_bh); |
2961 | 2962 | ||
2962 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { | 2963 | if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) { |
@@ -3005,6 +3006,7 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
3005 | di->i_size = cpu_to_le64(sb->s_blocksize); | 3006 | di->i_size = cpu_to_le64(sb->s_blocksize); |
3006 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); | 3007 | di->i_ctime = di->i_mtime = cpu_to_le64(dir->i_ctime.tv_sec); |
3007 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); | 3008 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(dir->i_ctime.tv_nsec); |
3009 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
3008 | 3010 | ||
3009 | /* | 3011 | /* |
3010 | * This should never fail as our extent list is empty and all | 3012 | * This should never fail as our extent list is empty and all |
@@ -3338,6 +3340,7 @@ do_extend: | |||
3338 | } else { | 3340 | } else { |
3339 | de->rec_len = cpu_to_le16(sb->s_blocksize); | 3341 | de->rec_len = cpu_to_le16(sb->s_blocksize); |
3340 | } | 3342 | } |
3343 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
3341 | ocfs2_journal_dirty(handle, new_bh); | 3344 | ocfs2_journal_dirty(handle, new_bh); |
3342 | 3345 | ||
3343 | dir_i_size += dir->i_sb->s_blocksize; | 3346 | dir_i_size += dir->i_sb->s_blocksize; |
@@ -3896,6 +3899,7 @@ out_commit: | |||
3896 | dquot_free_space_nodirty(dir, | 3899 | dquot_free_space_nodirty(dir, |
3897 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); | 3900 | ocfs2_clusters_to_bytes(dir->i_sb, 1)); |
3898 | 3901 | ||
3902 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
3899 | ocfs2_commit_trans(osb, handle); | 3903 | ocfs2_commit_trans(osb, handle); |
3900 | 3904 | ||
3901 | out: | 3905 | out: |
@@ -4134,6 +4138,7 @@ static int ocfs2_expand_inline_dx_root(struct inode *dir, | |||
4134 | mlog_errno(ret); | 4138 | mlog_errno(ret); |
4135 | did_quota = 0; | 4139 | did_quota = 0; |
4136 | 4140 | ||
4141 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
4137 | ocfs2_journal_dirty(handle, dx_root_bh); | 4142 | ocfs2_journal_dirty(handle, dx_root_bh); |
4138 | 4143 | ||
4139 | out_commit: | 4144 | out_commit: |
@@ -4401,6 +4406,7 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir, | |||
4401 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); | 4406 | di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features); |
4402 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 4407 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
4403 | di->i_dx_root = cpu_to_le64(0ULL); | 4408 | di->i_dx_root = cpu_to_le64(0ULL); |
4409 | ocfs2_update_inode_fsync_trans(handle, dir, 1); | ||
4404 | 4410 | ||
4405 | ocfs2_journal_dirty(handle, di_bh); | 4411 | ocfs2_journal_dirty(handle, di_bh); |
4406 | 4412 | ||
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 33660a4a52fa..c973690dc0bc 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c | |||
@@ -1123,7 +1123,6 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
1123 | struct dlm_ctxt *dlm = NULL; | 1123 | struct dlm_ctxt *dlm = NULL; |
1124 | char *local = NULL; | 1124 | char *local = NULL; |
1125 | int status = 0; | 1125 | int status = 0; |
1126 | int locked = 0; | ||
1127 | 1126 | ||
1128 | qr = (struct dlm_query_region *) msg->buf; | 1127 | qr = (struct dlm_query_region *) msg->buf; |
1129 | 1128 | ||
@@ -1132,10 +1131,8 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
1132 | 1131 | ||
1133 | /* buffer used in dlm_mast_regions() */ | 1132 | /* buffer used in dlm_mast_regions() */ |
1134 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); | 1133 | local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); |
1135 | if (!local) { | 1134 | if (!local) |
1136 | status = -ENOMEM; | 1135 | return -ENOMEM; |
1137 | goto bail; | ||
1138 | } | ||
1139 | 1136 | ||
1140 | status = -EINVAL; | 1137 | status = -EINVAL; |
1141 | 1138 | ||
@@ -1144,16 +1141,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
1144 | if (!dlm) { | 1141 | if (!dlm) { |
1145 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | 1142 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
1146 | "before join domain\n", qr->qr_node, qr->qr_domain); | 1143 | "before join domain\n", qr->qr_node, qr->qr_domain); |
1147 | goto bail; | 1144 | goto out_domain_lock; |
1148 | } | 1145 | } |
1149 | 1146 | ||
1150 | spin_lock(&dlm->spinlock); | 1147 | spin_lock(&dlm->spinlock); |
1151 | locked = 1; | ||
1152 | if (dlm->joining_node != qr->qr_node) { | 1148 | if (dlm->joining_node != qr->qr_node) { |
1153 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " | 1149 | mlog(ML_ERROR, "Node %d queried hb regions on domain %s " |
1154 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, | 1150 | "but joining node is %d\n", qr->qr_node, qr->qr_domain, |
1155 | dlm->joining_node); | 1151 | dlm->joining_node); |
1156 | goto bail; | 1152 | goto out_dlm_lock; |
1157 | } | 1153 | } |
1158 | 1154 | ||
1159 | /* Support for global heartbeat was added in 1.1 */ | 1155 | /* Support for global heartbeat was added in 1.1 */ |
@@ -1163,14 +1159,15 @@ static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, | |||
1163 | "but active dlm protocol is %d.%d\n", qr->qr_node, | 1159 | "but active dlm protocol is %d.%d\n", qr->qr_node, |
1164 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, | 1160 | qr->qr_domain, dlm->dlm_locking_proto.pv_major, |
1165 | dlm->dlm_locking_proto.pv_minor); | 1161 | dlm->dlm_locking_proto.pv_minor); |
1166 | goto bail; | 1162 | goto out_dlm_lock; |
1167 | } | 1163 | } |
1168 | 1164 | ||
1169 | status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); | 1165 | status = dlm_match_regions(dlm, qr, local, sizeof(qr->qr_regions)); |
1170 | 1166 | ||
1171 | bail: | 1167 | out_dlm_lock: |
1172 | if (locked) | 1168 | spin_unlock(&dlm->spinlock); |
1173 | spin_unlock(&dlm->spinlock); | 1169 | |
1170 | out_domain_lock: | ||
1174 | spin_unlock(&dlm_domain_lock); | 1171 | spin_unlock(&dlm_domain_lock); |
1175 | 1172 | ||
1176 | kfree(local); | 1173 | kfree(local); |
@@ -1877,19 +1874,19 @@ static int dlm_join_domain(struct dlm_ctxt *dlm) | |||
1877 | goto bail; | 1874 | goto bail; |
1878 | } | 1875 | } |
1879 | 1876 | ||
1880 | status = dlm_debug_init(dlm); | 1877 | status = dlm_launch_thread(dlm); |
1881 | if (status < 0) { | 1878 | if (status < 0) { |
1882 | mlog_errno(status); | 1879 | mlog_errno(status); |
1883 | goto bail; | 1880 | goto bail; |
1884 | } | 1881 | } |
1885 | 1882 | ||
1886 | status = dlm_launch_thread(dlm); | 1883 | status = dlm_launch_recovery_thread(dlm); |
1887 | if (status < 0) { | 1884 | if (status < 0) { |
1888 | mlog_errno(status); | 1885 | mlog_errno(status); |
1889 | goto bail; | 1886 | goto bail; |
1890 | } | 1887 | } |
1891 | 1888 | ||
1892 | status = dlm_launch_recovery_thread(dlm); | 1889 | status = dlm_debug_init(dlm); |
1893 | if (status < 0) { | 1890 | if (status < 0) { |
1894 | mlog_errno(status); | 1891 | mlog_errno(status); |
1895 | goto bail; | 1892 | goto bail; |
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 7035af09cc03..fe29f7978f81 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c | |||
@@ -537,7 +537,10 @@ master_here: | |||
537 | /* success! see if any other nodes need recovery */ | 537 | /* success! see if any other nodes need recovery */ |
538 | mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", | 538 | mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n", |
539 | dlm->name, dlm->reco.dead_node, dlm->node_num); | 539 | dlm->name, dlm->reco.dead_node, dlm->node_num); |
540 | dlm_reset_recovery(dlm); | 540 | spin_lock(&dlm->spinlock); |
541 | __dlm_reset_recovery(dlm); | ||
542 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | ||
543 | spin_unlock(&dlm->spinlock); | ||
541 | } | 544 | } |
542 | dlm_end_recovery(dlm); | 545 | dlm_end_recovery(dlm); |
543 | 546 | ||
@@ -695,6 +698,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node) | |||
695 | if (all_nodes_done) { | 698 | if (all_nodes_done) { |
696 | int ret; | 699 | int ret; |
697 | 700 | ||
701 | /* Set this flag on recovery master to avoid | ||
702 | * a new recovery for another dead node start | ||
703 | * before the recovery is not done. That may | ||
704 | * cause recovery hung.*/ | ||
705 | spin_lock(&dlm->spinlock); | ||
706 | dlm->reco.state |= DLM_RECO_STATE_FINALIZE; | ||
707 | spin_unlock(&dlm->spinlock); | ||
708 | |||
698 | /* all nodes are now in DLM_RECO_NODE_DATA_DONE state | 709 | /* all nodes are now in DLM_RECO_NODE_DATA_DONE state |
699 | * just send a finalize message to everyone and | 710 | * just send a finalize message to everyone and |
700 | * clean up */ | 711 | * clean up */ |
@@ -1750,13 +1761,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1750 | struct dlm_migratable_lockres *mres) | 1761 | struct dlm_migratable_lockres *mres) |
1751 | { | 1762 | { |
1752 | struct dlm_migratable_lock *ml; | 1763 | struct dlm_migratable_lock *ml; |
1753 | struct list_head *queue; | 1764 | struct list_head *queue, *iter; |
1754 | struct list_head *tmpq = NULL; | 1765 | struct list_head *tmpq = NULL; |
1755 | struct dlm_lock *newlock = NULL; | 1766 | struct dlm_lock *newlock = NULL; |
1756 | struct dlm_lockstatus *lksb = NULL; | 1767 | struct dlm_lockstatus *lksb = NULL; |
1757 | int ret = 0; | 1768 | int ret = 0; |
1758 | int i, j, bad; | 1769 | int i, j, bad; |
1759 | struct dlm_lock *lock = NULL; | 1770 | struct dlm_lock *lock; |
1760 | u8 from = O2NM_MAX_NODES; | 1771 | u8 from = O2NM_MAX_NODES; |
1761 | unsigned int added = 0; | 1772 | unsigned int added = 0; |
1762 | __be64 c; | 1773 | __be64 c; |
@@ -1791,14 +1802,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm, | |||
1791 | /* MIGRATION ONLY! */ | 1802 | /* MIGRATION ONLY! */ |
1792 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); | 1803 | BUG_ON(!(mres->flags & DLM_MRES_MIGRATION)); |
1793 | 1804 | ||
1805 | lock = NULL; | ||
1794 | spin_lock(&res->spinlock); | 1806 | spin_lock(&res->spinlock); |
1795 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { | 1807 | for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) { |
1796 | tmpq = dlm_list_idx_to_ptr(res, j); | 1808 | tmpq = dlm_list_idx_to_ptr(res, j); |
1797 | list_for_each_entry(lock, tmpq, list) { | 1809 | list_for_each(iter, tmpq) { |
1798 | if (lock->ml.cookie != ml->cookie) | 1810 | lock = list_entry(iter, |
1799 | lock = NULL; | 1811 | struct dlm_lock, list); |
1800 | else | 1812 | if (lock->ml.cookie == ml->cookie) |
1801 | break; | 1813 | break; |
1814 | lock = NULL; | ||
1802 | } | 1815 | } |
1803 | if (lock) | 1816 | if (lock) |
1804 | break; | 1817 | break; |
@@ -2882,8 +2895,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data, | |||
2882 | BUG(); | 2895 | BUG(); |
2883 | } | 2896 | } |
2884 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; | 2897 | dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE; |
2898 | __dlm_reset_recovery(dlm); | ||
2885 | spin_unlock(&dlm->spinlock); | 2899 | spin_unlock(&dlm->spinlock); |
2886 | dlm_reset_recovery(dlm); | ||
2887 | dlm_kick_recovery_thread(dlm); | 2900 | dlm_kick_recovery_thread(dlm); |
2888 | break; | 2901 | break; |
2889 | default: | 2902 | default: |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 19986959d149..6bd690b5a061 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
@@ -3144,22 +3144,60 @@ out: | |||
3144 | return 0; | 3144 | return 0; |
3145 | } | 3145 | } |
3146 | 3146 | ||
3147 | static void ocfs2_process_blocked_lock(struct ocfs2_super *osb, | ||
3148 | struct ocfs2_lock_res *lockres); | ||
3149 | |||
3147 | /* Mark the lockres as being dropped. It will no longer be | 3150 | /* Mark the lockres as being dropped. It will no longer be |
3148 | * queued if blocking, but we still may have to wait on it | 3151 | * queued if blocking, but we still may have to wait on it |
3149 | * being dequeued from the downconvert thread before we can consider | 3152 | * being dequeued from the downconvert thread before we can consider |
3150 | * it safe to drop. | 3153 | * it safe to drop. |
3151 | * | 3154 | * |
3152 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ | 3155 | * You can *not* attempt to call cluster_lock on this lockres anymore. */ |
3153 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres) | 3156 | void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, |
3157 | struct ocfs2_lock_res *lockres) | ||
3154 | { | 3158 | { |
3155 | int status; | 3159 | int status; |
3156 | struct ocfs2_mask_waiter mw; | 3160 | struct ocfs2_mask_waiter mw; |
3157 | unsigned long flags; | 3161 | unsigned long flags, flags2; |
3158 | 3162 | ||
3159 | ocfs2_init_mask_waiter(&mw); | 3163 | ocfs2_init_mask_waiter(&mw); |
3160 | 3164 | ||
3161 | spin_lock_irqsave(&lockres->l_lock, flags); | 3165 | spin_lock_irqsave(&lockres->l_lock, flags); |
3162 | lockres->l_flags |= OCFS2_LOCK_FREEING; | 3166 | lockres->l_flags |= OCFS2_LOCK_FREEING; |
3167 | if (lockres->l_flags & OCFS2_LOCK_QUEUED && current == osb->dc_task) { | ||
3168 | /* | ||
3169 | * We know the downconvert is queued but not in progress | ||
3170 | * because we are the downconvert thread and processing | ||
3171 | * different lock. So we can just remove the lock from the | ||
3172 | * queue. This is not only an optimization but also a way | ||
3173 | * to avoid the following deadlock: | ||
3174 | * ocfs2_dentry_post_unlock() | ||
3175 | * ocfs2_dentry_lock_put() | ||
3176 | * ocfs2_drop_dentry_lock() | ||
3177 | * iput() | ||
3178 | * ocfs2_evict_inode() | ||
3179 | * ocfs2_clear_inode() | ||
3180 | * ocfs2_mark_lockres_freeing() | ||
3181 | * ... blocks waiting for OCFS2_LOCK_QUEUED | ||
3182 | * since we are the downconvert thread which | ||
3183 | * should clear the flag. | ||
3184 | */ | ||
3185 | spin_unlock_irqrestore(&lockres->l_lock, flags); | ||
3186 | spin_lock_irqsave(&osb->dc_task_lock, flags2); | ||
3187 | list_del_init(&lockres->l_blocked_list); | ||
3188 | osb->blocked_lock_count--; | ||
3189 | spin_unlock_irqrestore(&osb->dc_task_lock, flags2); | ||
3190 | /* | ||
3191 | * Warn if we recurse into another post_unlock call. Strictly | ||
3192 | * speaking it isn't a problem but we need to be careful if | ||
3193 | * that happens (stack overflow, deadlocks, ...) so warn if | ||
3194 | * ocfs2 grows a path for which this can happen. | ||
3195 | */ | ||
3196 | WARN_ON_ONCE(lockres->l_ops->post_unlock); | ||
3197 | /* Since the lock is freeing we don't do much in the fn below */ | ||
3198 | ocfs2_process_blocked_lock(osb, lockres); | ||
3199 | return; | ||
3200 | } | ||
3163 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { | 3201 | while (lockres->l_flags & OCFS2_LOCK_QUEUED) { |
3164 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); | 3202 | lockres_add_mask_waiter(lockres, &mw, OCFS2_LOCK_QUEUED, 0); |
3165 | spin_unlock_irqrestore(&lockres->l_lock, flags); | 3203 | spin_unlock_irqrestore(&lockres->l_lock, flags); |
@@ -3180,7 +3218,7 @@ void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | |||
3180 | { | 3218 | { |
3181 | int ret; | 3219 | int ret; |
3182 | 3220 | ||
3183 | ocfs2_mark_lockres_freeing(lockres); | 3221 | ocfs2_mark_lockres_freeing(osb, lockres); |
3184 | ret = ocfs2_drop_lock(osb, lockres); | 3222 | ret = ocfs2_drop_lock(osb, lockres); |
3185 | if (ret) | 3223 | if (ret) |
3186 | mlog_errno(ret); | 3224 | mlog_errno(ret); |
diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index 1d596d8c4a4a..d293a22c32c5 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h | |||
@@ -157,7 +157,8 @@ int ocfs2_refcount_lock(struct ocfs2_refcount_tree *ref_tree, int ex); | |||
157 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); | 157 | void ocfs2_refcount_unlock(struct ocfs2_refcount_tree *ref_tree, int ex); |
158 | 158 | ||
159 | 159 | ||
160 | void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres); | 160 | void ocfs2_mark_lockres_freeing(struct ocfs2_super *osb, |
161 | struct ocfs2_lock_res *lockres); | ||
161 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, | 162 | void ocfs2_simple_drop_lockres(struct ocfs2_super *osb, |
162 | struct ocfs2_lock_res *lockres); | 163 | struct ocfs2_lock_res *lockres); |
163 | 164 | ||
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 51632c40e896..ff33c5ef87f2 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
@@ -175,9 +175,13 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, | |||
175 | int datasync) | 175 | int datasync) |
176 | { | 176 | { |
177 | int err = 0; | 177 | int err = 0; |
178 | journal_t *journal; | ||
179 | struct inode *inode = file->f_mapping->host; | 178 | struct inode *inode = file->f_mapping->host; |
180 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 179 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
180 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
181 | journal_t *journal = osb->journal->j_journal; | ||
182 | int ret; | ||
183 | tid_t commit_tid; | ||
184 | bool needs_barrier = false; | ||
181 | 185 | ||
182 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, | 186 | trace_ocfs2_sync_file(inode, file, file->f_path.dentry, |
183 | OCFS2_I(inode)->ip_blkno, | 187 | OCFS2_I(inode)->ip_blkno, |
@@ -192,29 +196,19 @@ static int ocfs2_sync_file(struct file *file, loff_t start, loff_t end, | |||
192 | if (err) | 196 | if (err) |
193 | return err; | 197 | return err; |
194 | 198 | ||
195 | /* | 199 | commit_tid = datasync ? oi->i_datasync_tid : oi->i_sync_tid; |
196 | * Probably don't need the i_mutex at all in here, just putting it here | 200 | if (journal->j_flags & JBD2_BARRIER && |
197 | * to be consistent with how fsync used to be called, someone more | 201 | !jbd2_trans_will_send_data_barrier(journal, commit_tid)) |
198 | * familiar with the fs could possibly remove it. | 202 | needs_barrier = true; |
199 | */ | 203 | err = jbd2_complete_transaction(journal, commit_tid); |
200 | mutex_lock(&inode->i_mutex); | 204 | if (needs_barrier) { |
201 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) { | 205 | ret = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
202 | /* | 206 | if (!err) |
203 | * We still have to flush drive's caches to get data to the | 207 | err = ret; |
204 | * platter | ||
205 | */ | ||
206 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | ||
207 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | ||
208 | goto bail; | ||
209 | } | 208 | } |
210 | 209 | ||
211 | journal = osb->journal->j_journal; | ||
212 | err = jbd2_journal_force_commit(journal); | ||
213 | |||
214 | bail: | ||
215 | if (err) | 210 | if (err) |
216 | mlog_errno(err); | 211 | mlog_errno(err); |
217 | mutex_unlock(&inode->i_mutex); | ||
218 | 212 | ||
219 | return (err < 0) ? -EIO : 0; | 213 | return (err < 0) ? -EIO : 0; |
220 | } | 214 | } |
@@ -292,6 +286,7 @@ int ocfs2_update_inode_atime(struct inode *inode, | |||
292 | inode->i_atime = CURRENT_TIME; | 286 | inode->i_atime = CURRENT_TIME; |
293 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); | 287 | di->i_atime = cpu_to_le64(inode->i_atime.tv_sec); |
294 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); | 288 | di->i_atime_nsec = cpu_to_le32(inode->i_atime.tv_nsec); |
289 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
295 | ocfs2_journal_dirty(handle, bh); | 290 | ocfs2_journal_dirty(handle, bh); |
296 | 291 | ||
297 | out_commit: | 292 | out_commit: |
@@ -341,6 +336,7 @@ int ocfs2_simple_size_update(struct inode *inode, | |||
341 | if (ret < 0) | 336 | if (ret < 0) |
342 | mlog_errno(ret); | 337 | mlog_errno(ret); |
343 | 338 | ||
339 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
344 | ocfs2_commit_trans(osb, handle); | 340 | ocfs2_commit_trans(osb, handle); |
345 | out: | 341 | out: |
346 | return ret; | 342 | return ret; |
@@ -435,6 +431,7 @@ static int ocfs2_orphan_for_truncate(struct ocfs2_super *osb, | |||
435 | di->i_size = cpu_to_le64(new_i_size); | 431 | di->i_size = cpu_to_le64(new_i_size); |
436 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); | 432 | di->i_ctime = di->i_mtime = cpu_to_le64(inode->i_ctime.tv_sec); |
437 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 433 | di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
434 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
438 | 435 | ||
439 | ocfs2_journal_dirty(handle, fe_bh); | 436 | ocfs2_journal_dirty(handle, fe_bh); |
440 | 437 | ||
@@ -650,7 +647,7 @@ restarted_transaction: | |||
650 | mlog_errno(status); | 647 | mlog_errno(status); |
651 | goto leave; | 648 | goto leave; |
652 | } | 649 | } |
653 | 650 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | |
654 | ocfs2_journal_dirty(handle, bh); | 651 | ocfs2_journal_dirty(handle, bh); |
655 | 652 | ||
656 | spin_lock(&OCFS2_I(inode)->ip_lock); | 653 | spin_lock(&OCFS2_I(inode)->ip_lock); |
@@ -743,6 +740,7 @@ static handle_t *ocfs2_zero_start_ordered_transaction(struct inode *inode, | |||
743 | OCFS2_JOURNAL_ACCESS_WRITE); | 740 | OCFS2_JOURNAL_ACCESS_WRITE); |
744 | if (ret) | 741 | if (ret) |
745 | mlog_errno(ret); | 742 | mlog_errno(ret); |
743 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
746 | 744 | ||
747 | out: | 745 | out: |
748 | if (ret) { | 746 | if (ret) { |
@@ -840,6 +838,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, | |||
840 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 838 | di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
841 | di->i_mtime_nsec = di->i_ctime_nsec; | 839 | di->i_mtime_nsec = di->i_ctime_nsec; |
842 | ocfs2_journal_dirty(handle, di_bh); | 840 | ocfs2_journal_dirty(handle, di_bh); |
841 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
843 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | 842 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); |
844 | } | 843 | } |
845 | 844 | ||
@@ -1344,6 +1343,7 @@ static int __ocfs2_write_remove_suid(struct inode *inode, | |||
1344 | 1343 | ||
1345 | di = (struct ocfs2_dinode *) bh->b_data; | 1344 | di = (struct ocfs2_dinode *) bh->b_data; |
1346 | di->i_mode = cpu_to_le16(inode->i_mode); | 1345 | di->i_mode = cpu_to_le16(inode->i_mode); |
1346 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
1347 | 1347 | ||
1348 | ocfs2_journal_dirty(handle, bh); | 1348 | ocfs2_journal_dirty(handle, bh); |
1349 | 1349 | ||
@@ -1576,6 +1576,7 @@ static int ocfs2_zero_partial_clusters(struct inode *inode, | |||
1576 | if (ret) | 1576 | if (ret) |
1577 | mlog_errno(ret); | 1577 | mlog_errno(ret); |
1578 | } | 1578 | } |
1579 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
1579 | 1580 | ||
1580 | ocfs2_commit_trans(osb, handle); | 1581 | ocfs2_commit_trans(osb, handle); |
1581 | out: | 1582 | out: |
@@ -2061,13 +2062,6 @@ out: | |||
2061 | return ret; | 2062 | return ret; |
2062 | } | 2063 | } |
2063 | 2064 | ||
2064 | static void ocfs2_aiodio_wait(struct inode *inode) | ||
2065 | { | ||
2066 | wait_queue_head_t *wq = ocfs2_ioend_wq(inode); | ||
2067 | |||
2068 | wait_event(*wq, (atomic_read(&OCFS2_I(inode)->ip_unaligned_aio) == 0)); | ||
2069 | } | ||
2070 | |||
2071 | static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) | 2065 | static int ocfs2_is_io_unaligned(struct inode *inode, size_t count, loff_t pos) |
2072 | { | 2066 | { |
2073 | int blockmask = inode->i_sb->s_blocksize - 1; | 2067 | int blockmask = inode->i_sb->s_blocksize - 1; |
@@ -2345,10 +2339,8 @@ relock: | |||
2345 | * Wait on previous unaligned aio to complete before | 2339 | * Wait on previous unaligned aio to complete before |
2346 | * proceeding. | 2340 | * proceeding. |
2347 | */ | 2341 | */ |
2348 | ocfs2_aiodio_wait(inode); | 2342 | mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio); |
2349 | 2343 | /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */ | |
2350 | /* Mark the iocb as needing a decrement in ocfs2_dio_end_io */ | ||
2351 | atomic_inc(&OCFS2_I(inode)->ip_unaligned_aio); | ||
2352 | ocfs2_iocb_set_unaligned_aio(iocb); | 2344 | ocfs2_iocb_set_unaligned_aio(iocb); |
2353 | } | 2345 | } |
2354 | 2346 | ||
@@ -2428,7 +2420,7 @@ out_dio: | |||
2428 | 2420 | ||
2429 | if (unaligned_dio) { | 2421 | if (unaligned_dio) { |
2430 | ocfs2_iocb_clear_unaligned_aio(iocb); | 2422 | ocfs2_iocb_clear_unaligned_aio(iocb); |
2431 | atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio); | 2423 | mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio); |
2432 | } | 2424 | } |
2433 | 2425 | ||
2434 | out: | 2426 | out: |
@@ -2645,7 +2637,16 @@ static loff_t ocfs2_file_llseek(struct file *file, loff_t offset, int whence) | |||
2645 | case SEEK_SET: | 2637 | case SEEK_SET: |
2646 | break; | 2638 | break; |
2647 | case SEEK_END: | 2639 | case SEEK_END: |
2648 | offset += inode->i_size; | 2640 | /* SEEK_END requires the OCFS2 inode lock for the file |
2641 | * because it references the file's size. | ||
2642 | */ | ||
2643 | ret = ocfs2_inode_lock(inode, NULL, 0); | ||
2644 | if (ret < 0) { | ||
2645 | mlog_errno(ret); | ||
2646 | goto out; | ||
2647 | } | ||
2648 | offset += i_size_read(inode); | ||
2649 | ocfs2_inode_unlock(inode, 0); | ||
2649 | break; | 2650 | break; |
2650 | case SEEK_CUR: | 2651 | case SEEK_CUR: |
2651 | if (offset == 0) { | 2652 | if (offset == 0) { |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index f29a90fde619..437de7f768c6 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
@@ -130,6 +130,7 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | |||
130 | struct inode *inode = NULL; | 130 | struct inode *inode = NULL; |
131 | struct super_block *sb = osb->sb; | 131 | struct super_block *sb = osb->sb; |
132 | struct ocfs2_find_inode_args args; | 132 | struct ocfs2_find_inode_args args; |
133 | journal_t *journal = OCFS2_SB(sb)->journal->j_journal; | ||
133 | 134 | ||
134 | trace_ocfs2_iget_begin((unsigned long long)blkno, flags, | 135 | trace_ocfs2_iget_begin((unsigned long long)blkno, flags, |
135 | sysfile_type); | 136 | sysfile_type); |
@@ -169,6 +170,32 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags, | |||
169 | goto bail; | 170 | goto bail; |
170 | } | 171 | } |
171 | 172 | ||
173 | /* | ||
174 | * Set transaction id's of transactions that have to be committed | ||
175 | * to finish f[data]sync. We set them to currently running transaction | ||
176 | * as we cannot be sure that the inode or some of its metadata isn't | ||
177 | * part of the transaction - the inode could have been reclaimed and | ||
178 | * now it is reread from disk. | ||
179 | */ | ||
180 | if (journal) { | ||
181 | transaction_t *transaction; | ||
182 | tid_t tid; | ||
183 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
184 | |||
185 | read_lock(&journal->j_state_lock); | ||
186 | if (journal->j_running_transaction) | ||
187 | transaction = journal->j_running_transaction; | ||
188 | else | ||
189 | transaction = journal->j_committing_transaction; | ||
190 | if (transaction) | ||
191 | tid = transaction->t_tid; | ||
192 | else | ||
193 | tid = journal->j_commit_sequence; | ||
194 | read_unlock(&journal->j_state_lock); | ||
195 | oi->i_sync_tid = tid; | ||
196 | oi->i_datasync_tid = tid; | ||
197 | } | ||
198 | |||
172 | bail: | 199 | bail: |
173 | if (!IS_ERR(inode)) { | 200 | if (!IS_ERR(inode)) { |
174 | trace_ocfs2_iget_end(inode, | 201 | trace_ocfs2_iget_end(inode, |
@@ -804,11 +831,13 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
804 | goto bail; | 831 | goto bail; |
805 | } | 832 | } |
806 | 833 | ||
807 | /* If we're coming from downconvert_thread we can't go into our own | 834 | /* |
808 | * voting [hello, deadlock city!], so unforuntately we just | 835 | * If we're coming from downconvert_thread we can't go into our own |
809 | * have to skip deleting this guy. That's OK though because | 836 | * voting [hello, deadlock city!] so we cannot delete the inode. But |
810 | * the node who's doing the actual deleting should handle it | 837 | * since we dropped last inode ref when downconverting dentry lock, |
811 | * anyway. */ | 838 | * we cannot have the file open and thus the node doing unlink will |
839 | * take care of deleting the inode. | ||
840 | */ | ||
812 | if (current == osb->dc_task) | 841 | if (current == osb->dc_task) |
813 | goto bail; | 842 | goto bail; |
814 | 843 | ||
@@ -822,12 +851,6 @@ static int ocfs2_inode_is_valid_to_delete(struct inode *inode) | |||
822 | goto bail_unlock; | 851 | goto bail_unlock; |
823 | } | 852 | } |
824 | 853 | ||
825 | /* If we have allowd wipe of this inode for another node, it | ||
826 | * will be marked here so we can safely skip it. Recovery will | ||
827 | * cleanup any inodes we might inadvertently skip here. */ | ||
828 | if (oi->ip_flags & OCFS2_INODE_SKIP_DELETE) | ||
829 | goto bail_unlock; | ||
830 | |||
831 | ret = 1; | 854 | ret = 1; |
832 | bail_unlock: | 855 | bail_unlock: |
833 | spin_unlock(&oi->ip_lock); | 856 | spin_unlock(&oi->ip_lock); |
@@ -941,7 +964,7 @@ static void ocfs2_cleanup_delete_inode(struct inode *inode, | |||
941 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); | 964 | (unsigned long long)OCFS2_I(inode)->ip_blkno, sync_data); |
942 | if (sync_data) | 965 | if (sync_data) |
943 | filemap_write_and_wait(inode->i_mapping); | 966 | filemap_write_and_wait(inode->i_mapping); |
944 | truncate_inode_pages(&inode->i_data, 0); | 967 | truncate_inode_pages_final(&inode->i_data); |
945 | } | 968 | } |
946 | 969 | ||
947 | static void ocfs2_delete_inode(struct inode *inode) | 970 | static void ocfs2_delete_inode(struct inode *inode) |
@@ -960,8 +983,6 @@ static void ocfs2_delete_inode(struct inode *inode) | |||
960 | if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) | 983 | if (is_bad_inode(inode) || !OCFS2_I(inode)->ip_blkno) |
961 | goto bail; | 984 | goto bail; |
962 | 985 | ||
963 | dquot_initialize(inode); | ||
964 | |||
965 | if (!ocfs2_inode_is_valid_to_delete(inode)) { | 986 | if (!ocfs2_inode_is_valid_to_delete(inode)) { |
966 | /* It's probably not necessary to truncate_inode_pages | 987 | /* It's probably not necessary to truncate_inode_pages |
967 | * here but we do it for safety anyway (it will most | 988 | * here but we do it for safety anyway (it will most |
@@ -970,6 +991,8 @@ static void ocfs2_delete_inode(struct inode *inode) | |||
970 | goto bail; | 991 | goto bail; |
971 | } | 992 | } |
972 | 993 | ||
994 | dquot_initialize(inode); | ||
995 | |||
973 | /* We want to block signals in delete_inode as the lock and | 996 | /* We want to block signals in delete_inode as the lock and |
974 | * messaging paths may return us -ERESTARTSYS. Which would | 997 | * messaging paths may return us -ERESTARTSYS. Which would |
975 | * cause us to exit early, resulting in inodes being orphaned | 998 | * cause us to exit early, resulting in inodes being orphaned |
@@ -1057,6 +1080,7 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1057 | { | 1080 | { |
1058 | int status; | 1081 | int status; |
1059 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1082 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
1083 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
1060 | 1084 | ||
1061 | clear_inode(inode); | 1085 | clear_inode(inode); |
1062 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, | 1086 | trace_ocfs2_clear_inode((unsigned long long)oi->ip_blkno, |
@@ -1073,9 +1097,9 @@ static void ocfs2_clear_inode(struct inode *inode) | |||
1073 | 1097 | ||
1074 | /* Do these before all the other work so that we don't bounce | 1098 | /* Do these before all the other work so that we don't bounce |
1075 | * the downconvert thread while waiting to destroy the locks. */ | 1099 | * the downconvert thread while waiting to destroy the locks. */ |
1076 | ocfs2_mark_lockres_freeing(&oi->ip_rw_lockres); | 1100 | ocfs2_mark_lockres_freeing(osb, &oi->ip_rw_lockres); |
1077 | ocfs2_mark_lockres_freeing(&oi->ip_inode_lockres); | 1101 | ocfs2_mark_lockres_freeing(osb, &oi->ip_inode_lockres); |
1078 | ocfs2_mark_lockres_freeing(&oi->ip_open_lockres); | 1102 | ocfs2_mark_lockres_freeing(osb, &oi->ip_open_lockres); |
1079 | 1103 | ||
1080 | ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, | 1104 | ocfs2_resv_discard(&OCFS2_SB(inode->i_sb)->osb_la_resmap, |
1081 | &oi->ip_la_data_resv); | 1105 | &oi->ip_la_data_resv); |
@@ -1157,7 +1181,7 @@ void ocfs2_evict_inode(struct inode *inode) | |||
1157 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { | 1181 | (OCFS2_I(inode)->ip_flags & OCFS2_INODE_MAYBE_ORPHANED)) { |
1158 | ocfs2_delete_inode(inode); | 1182 | ocfs2_delete_inode(inode); |
1159 | } else { | 1183 | } else { |
1160 | truncate_inode_pages(&inode->i_data, 0); | 1184 | truncate_inode_pages_final(&inode->i_data); |
1161 | } | 1185 | } |
1162 | ocfs2_clear_inode(inode); | 1186 | ocfs2_clear_inode(inode); |
1163 | } | 1187 | } |
@@ -1260,6 +1284,7 @@ int ocfs2_mark_inode_dirty(handle_t *handle, | |||
1260 | fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); | 1284 | fe->i_mtime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec); |
1261 | 1285 | ||
1262 | ocfs2_journal_dirty(handle, bh); | 1286 | ocfs2_journal_dirty(handle, bh); |
1287 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
1263 | leave: | 1288 | leave: |
1264 | return status; | 1289 | return status; |
1265 | } | 1290 | } |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 621fc73bf23d..a6c991c0fc98 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
@@ -44,7 +44,7 @@ struct ocfs2_inode_info | |||
44 | struct rw_semaphore ip_xattr_sem; | 44 | struct rw_semaphore ip_xattr_sem; |
45 | 45 | ||
46 | /* Number of outstanding AIO's which are not page aligned */ | 46 | /* Number of outstanding AIO's which are not page aligned */ |
47 | atomic_t ip_unaligned_aio; | 47 | struct mutex ip_unaligned_aio; |
48 | 48 | ||
49 | /* These fields are protected by ip_lock */ | 49 | /* These fields are protected by ip_lock */ |
50 | spinlock_t ip_lock; | 50 | spinlock_t ip_lock; |
@@ -73,6 +73,13 @@ struct ocfs2_inode_info | |||
73 | u32 ip_dir_lock_gen; | 73 | u32 ip_dir_lock_gen; |
74 | 74 | ||
75 | struct ocfs2_alloc_reservation ip_la_data_resv; | 75 | struct ocfs2_alloc_reservation ip_la_data_resv; |
76 | |||
77 | /* | ||
78 | * Transactions that contain inode's metadata needed to complete | ||
79 | * fsync and fdatasync, respectively. | ||
80 | */ | ||
81 | tid_t i_sync_tid; | ||
82 | tid_t i_datasync_tid; | ||
76 | }; | 83 | }; |
77 | 84 | ||
78 | /* | 85 | /* |
@@ -84,8 +91,6 @@ struct ocfs2_inode_info | |||
84 | #define OCFS2_INODE_BITMAP 0x00000004 | 91 | #define OCFS2_INODE_BITMAP 0x00000004 |
85 | /* This inode has been wiped from disk */ | 92 | /* This inode has been wiped from disk */ |
86 | #define OCFS2_INODE_DELETED 0x00000008 | 93 | #define OCFS2_INODE_DELETED 0x00000008 |
87 | /* Another node is deleting, so our delete is a nop */ | ||
88 | #define OCFS2_INODE_SKIP_DELETE 0x00000010 | ||
89 | /* Has the inode been orphaned on another node? | 94 | /* Has the inode been orphaned on another node? |
90 | * | 95 | * |
91 | * This hints to ocfs2_drop_inode that it should clear i_nlink before | 96 | * This hints to ocfs2_drop_inode that it should clear i_nlink before |
@@ -100,11 +105,11 @@ struct ocfs2_inode_info | |||
100 | * rely on ocfs2_delete_inode to sort things out under the proper | 105 | * rely on ocfs2_delete_inode to sort things out under the proper |
101 | * cluster locks. | 106 | * cluster locks. |
102 | */ | 107 | */ |
103 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000020 | 108 | #define OCFS2_INODE_MAYBE_ORPHANED 0x00000010 |
104 | /* Does someone have the file open O_DIRECT */ | 109 | /* Does someone have the file open O_DIRECT */ |
105 | #define OCFS2_INODE_OPEN_DIRECT 0x00000040 | 110 | #define OCFS2_INODE_OPEN_DIRECT 0x00000020 |
106 | /* Tell the inode wipe code it's not in orphan dir */ | 111 | /* Tell the inode wipe code it's not in orphan dir */ |
107 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000080 | 112 | #define OCFS2_INODE_SKIP_ORPHAN_DIR 0x00000040 |
108 | 113 | ||
109 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) | 114 | static inline struct ocfs2_inode_info *OCFS2_I(struct inode *inode) |
110 | { | 115 | { |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 8ca3c29accbf..490229f43731 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
@@ -413,11 +413,12 @@ int ocfs2_info_handle_freeinode(struct inode *inode, | |||
413 | } | 413 | } |
414 | 414 | ||
415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); | 415 | status = ocfs2_info_scan_inode_alloc(osb, inode_alloc, blkno, oifi, i); |
416 | if (status < 0) | ||
417 | goto bail; | ||
418 | 416 | ||
419 | iput(inode_alloc); | 417 | iput(inode_alloc); |
420 | inode_alloc = NULL; | 418 | inode_alloc = NULL; |
419 | |||
420 | if (status < 0) | ||
421 | goto bail; | ||
421 | } | 422 | } |
422 | 423 | ||
423 | o2info_set_request_filled(&oifi->ifi_req); | 424 | o2info_set_request_filled(&oifi->ifi_req); |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 44fc3e530c3d..03ea9314fecd 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
@@ -2132,12 +2132,6 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, | |||
2132 | iter = oi->ip_next_orphan; | 2132 | iter = oi->ip_next_orphan; |
2133 | 2133 | ||
2134 | spin_lock(&oi->ip_lock); | 2134 | spin_lock(&oi->ip_lock); |
2135 | /* The remote delete code may have set these on the | ||
2136 | * assumption that the other node would wipe them | ||
2137 | * successfully. If they are still in the node's | ||
2138 | * orphan dir, we need to reset that state. */ | ||
2139 | oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); | ||
2140 | |||
2141 | /* Set the proper information to get us going into | 2135 | /* Set the proper information to get us going into |
2142 | * ocfs2_delete_inode. */ | 2136 | * ocfs2_delete_inode. */ |
2143 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; | 2137 | oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 9ff4e8cf9d97..7f8cde94abfe 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
@@ -626,4 +626,15 @@ static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | |||
626 | new_size); | 626 | new_size); |
627 | } | 627 | } |
628 | 628 | ||
629 | static inline void ocfs2_update_inode_fsync_trans(handle_t *handle, | ||
630 | struct inode *inode, | ||
631 | int datasync) | ||
632 | { | ||
633 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
634 | |||
635 | oi->i_sync_tid = handle->h_transaction->t_tid; | ||
636 | if (datasync) | ||
637 | oi->i_datasync_tid = handle->h_transaction->t_tid; | ||
638 | } | ||
639 | |||
629 | #endif /* OCFS2_JOURNAL_H */ | 640 | #endif /* OCFS2_JOURNAL_H */ |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index e57c804069ea..6b6d092b0998 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
@@ -82,6 +82,8 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, | |||
82 | } | 82 | } |
83 | 83 | ||
84 | ret = flock_lock_file_wait(file, fl); | 84 | ret = flock_lock_file_wait(file, fl); |
85 | if (ret) | ||
86 | ocfs2_file_unlock(file); | ||
85 | 87 | ||
86 | out: | 88 | out: |
87 | mutex_unlock(&fp->fp_mutex); | 89 | mutex_unlock(&fp->fp_mutex); |
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index 64c304d668f0..599eb4c4c8be 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c | |||
@@ -151,6 +151,7 @@ static int __ocfs2_move_extent(handle_t *handle, | |||
151 | old_blkno, len); | 151 | old_blkno, len); |
152 | } | 152 | } |
153 | 153 | ||
154 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
154 | out: | 155 | out: |
155 | ocfs2_free_path(path); | 156 | ocfs2_free_path(path); |
156 | return ret; | 157 | return ret; |
@@ -690,8 +691,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context, | |||
690 | 691 | ||
691 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, | 692 | ret = ocfs2_block_group_set_bits(handle, gb_inode, gd, gd_bh, |
692 | goal_bit, len); | 693 | goal_bit, len); |
693 | if (ret) | 694 | if (ret) { |
695 | ocfs2_rollback_alloc_dinode_counts(gb_inode, gb_bh, len, | ||
696 | le16_to_cpu(gd->bg_chain)); | ||
694 | mlog_errno(ret); | 697 | mlog_errno(ret); |
698 | } | ||
695 | 699 | ||
696 | /* | 700 | /* |
697 | * Here we should write the new page out first if we are | 701 | * Here we should write the new page out first if we are |
@@ -957,6 +961,7 @@ static int ocfs2_move_extents(struct ocfs2_move_extents_context *context) | |||
957 | inode->i_ctime = CURRENT_TIME; | 961 | inode->i_ctime = CURRENT_TIME; |
958 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | 962 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); |
959 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | 963 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); |
964 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
960 | 965 | ||
961 | ocfs2_journal_dirty(handle, di_bh); | 966 | ocfs2_journal_dirty(handle, di_bh); |
962 | 967 | ||
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3683643f3f0e..2060fc398445 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
@@ -450,7 +450,6 @@ leave: | |||
450 | 450 | ||
451 | brelse(new_fe_bh); | 451 | brelse(new_fe_bh); |
452 | brelse(parent_fe_bh); | 452 | brelse(parent_fe_bh); |
453 | kfree(si.name); | ||
454 | kfree(si.value); | 453 | kfree(si.value); |
455 | 454 | ||
456 | ocfs2_free_dir_lookup_result(&lookup); | 455 | ocfs2_free_dir_lookup_result(&lookup); |
@@ -495,6 +494,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
495 | struct ocfs2_dinode *fe = NULL; | 494 | struct ocfs2_dinode *fe = NULL; |
496 | struct ocfs2_extent_list *fel; | 495 | struct ocfs2_extent_list *fel; |
497 | u16 feat; | 496 | u16 feat; |
497 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
498 | 498 | ||
499 | *new_fe_bh = NULL; | 499 | *new_fe_bh = NULL; |
500 | 500 | ||
@@ -576,8 +576,8 @@ static int __ocfs2_mknod_locked(struct inode *dir, | |||
576 | mlog_errno(status); | 576 | mlog_errno(status); |
577 | } | 577 | } |
578 | 578 | ||
579 | status = 0; /* error in ocfs2_create_new_inode_locks is not | 579 | oi->i_sync_tid = handle->h_transaction->t_tid; |
580 | * critical */ | 580 | oi->i_datasync_tid = handle->h_transaction->t_tid; |
581 | 581 | ||
582 | leave: | 582 | leave: |
583 | if (status < 0) { | 583 | if (status < 0) { |
@@ -1855,7 +1855,6 @@ bail: | |||
1855 | 1855 | ||
1856 | brelse(new_fe_bh); | 1856 | brelse(new_fe_bh); |
1857 | brelse(parent_fe_bh); | 1857 | brelse(parent_fe_bh); |
1858 | kfree(si.name); | ||
1859 | kfree(si.value); | 1858 | kfree(si.value); |
1860 | ocfs2_free_dir_lookup_result(&lookup); | 1859 | ocfs2_free_dir_lookup_result(&lookup); |
1861 | if (inode_ac) | 1860 | if (inode_ac) |
@@ -2481,6 +2480,7 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir, | |||
2481 | di->i_orphaned_slot = 0; | 2480 | di->i_orphaned_slot = 0; |
2482 | set_nlink(inode, 1); | 2481 | set_nlink(inode, 1); |
2483 | ocfs2_set_links_count(di, inode->i_nlink); | 2482 | ocfs2_set_links_count(di, inode->i_nlink); |
2483 | ocfs2_update_inode_fsync_trans(handle, inode, 1); | ||
2484 | ocfs2_journal_dirty(handle, di_bh); | 2484 | ocfs2_journal_dirty(handle, di_bh); |
2485 | 2485 | ||
2486 | status = ocfs2_add_entry(handle, dentry, inode, | 2486 | status = ocfs2_add_entry(handle, dentry, inode, |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 553f53cc73ae..8d64a97a9d5e 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <linux/sched.h> | 30 | #include <linux/sched.h> |
31 | #include <linux/wait.h> | 31 | #include <linux/wait.h> |
32 | #include <linux/list.h> | 32 | #include <linux/list.h> |
33 | #include <linux/llist.h> | ||
33 | #include <linux/rbtree.h> | 34 | #include <linux/rbtree.h> |
34 | #include <linux/workqueue.h> | 35 | #include <linux/workqueue.h> |
35 | #include <linux/kref.h> | 36 | #include <linux/kref.h> |
@@ -274,19 +275,16 @@ enum ocfs2_mount_options | |||
274 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ | 275 | OCFS2_MOUNT_HB_GLOBAL = 1 << 14, /* Global heartbeat */ |
275 | }; | 276 | }; |
276 | 277 | ||
277 | #define OCFS2_OSB_SOFT_RO 0x0001 | 278 | #define OCFS2_OSB_SOFT_RO 0x0001 |
278 | #define OCFS2_OSB_HARD_RO 0x0002 | 279 | #define OCFS2_OSB_HARD_RO 0x0002 |
279 | #define OCFS2_OSB_ERROR_FS 0x0004 | 280 | #define OCFS2_OSB_ERROR_FS 0x0004 |
280 | #define OCFS2_OSB_DROP_DENTRY_LOCK_IMMED 0x0008 | 281 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 |
281 | |||
282 | #define OCFS2_DEFAULT_ATIME_QUANTUM 60 | ||
283 | 282 | ||
284 | struct ocfs2_journal; | 283 | struct ocfs2_journal; |
285 | struct ocfs2_slot_info; | 284 | struct ocfs2_slot_info; |
286 | struct ocfs2_recovery_map; | 285 | struct ocfs2_recovery_map; |
287 | struct ocfs2_replay_map; | 286 | struct ocfs2_replay_map; |
288 | struct ocfs2_quota_recovery; | 287 | struct ocfs2_quota_recovery; |
289 | struct ocfs2_dentry_lock; | ||
290 | struct ocfs2_super | 288 | struct ocfs2_super |
291 | { | 289 | { |
292 | struct task_struct *commit_task; | 290 | struct task_struct *commit_task; |
@@ -414,10 +412,9 @@ struct ocfs2_super | |||
414 | struct list_head blocked_lock_list; | 412 | struct list_head blocked_lock_list; |
415 | unsigned long blocked_lock_count; | 413 | unsigned long blocked_lock_count; |
416 | 414 | ||
417 | /* List of dentry locks to release. Anyone can add locks to | 415 | /* List of dquot structures to drop last reference to */ |
418 | * the list, ocfs2_wq processes the list */ | 416 | struct llist_head dquot_drop_list; |
419 | struct ocfs2_dentry_lock *dentry_lock_list; | 417 | struct work_struct dquot_drop_work; |
420 | struct work_struct dentry_lock_work; | ||
421 | 418 | ||
422 | wait_queue_head_t osb_mount_event; | 419 | wait_queue_head_t osb_mount_event; |
423 | 420 | ||
@@ -449,6 +446,8 @@ struct ocfs2_super | |||
449 | /* rb tree root for refcount lock. */ | 446 | /* rb tree root for refcount lock. */ |
450 | struct rb_root osb_rf_lock_tree; | 447 | struct rb_root osb_rf_lock_tree; |
451 | struct ocfs2_refcount_tree *osb_ref_tree_lru; | 448 | struct ocfs2_refcount_tree *osb_ref_tree_lru; |
449 | |||
450 | struct mutex system_file_mutex; | ||
452 | }; | 451 | }; |
453 | 452 | ||
454 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) | 453 | #define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info) |
@@ -579,18 +578,6 @@ static inline void ocfs2_set_osb_flag(struct ocfs2_super *osb, | |||
579 | spin_unlock(&osb->osb_lock); | 578 | spin_unlock(&osb->osb_lock); |
580 | } | 579 | } |
581 | 580 | ||
582 | |||
583 | static inline unsigned long ocfs2_test_osb_flag(struct ocfs2_super *osb, | ||
584 | unsigned long flag) | ||
585 | { | ||
586 | unsigned long ret; | ||
587 | |||
588 | spin_lock(&osb->osb_lock); | ||
589 | ret = osb->osb_flags & flag; | ||
590 | spin_unlock(&osb->osb_lock); | ||
591 | return ret; | ||
592 | } | ||
593 | |||
594 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, | 581 | static inline void ocfs2_set_ro_flag(struct ocfs2_super *osb, |
595 | int hard) | 582 | int hard) |
596 | { | 583 | { |
diff --git a/fs/ocfs2/quota.h b/fs/ocfs2/quota.h index d5ab56cbe5c5..f266d67df3c6 100644 --- a/fs/ocfs2/quota.h +++ b/fs/ocfs2/quota.h | |||
@@ -28,6 +28,7 @@ struct ocfs2_dquot { | |||
28 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ | 28 | unsigned int dq_use_count; /* Number of nodes having reference to this entry in global quota file */ |
29 | s64 dq_origspace; /* Last globally synced space usage */ | 29 | s64 dq_origspace; /* Last globally synced space usage */ |
30 | s64 dq_originodes; /* Last globally synced inode usage */ | 30 | s64 dq_originodes; /* Last globally synced inode usage */ |
31 | struct llist_node list; /* Member of list of dquots to drop */ | ||
31 | }; | 32 | }; |
32 | 33 | ||
33 | /* Description of one chunk to recover in memory */ | 34 | /* Description of one chunk to recover in memory */ |
@@ -110,6 +111,7 @@ int ocfs2_read_quota_phys_block(struct inode *inode, u64 p_block, | |||
110 | int ocfs2_create_local_dquot(struct dquot *dquot); | 111 | int ocfs2_create_local_dquot(struct dquot *dquot); |
111 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); | 112 | int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot); |
112 | int ocfs2_local_write_dquot(struct dquot *dquot); | 113 | int ocfs2_local_write_dquot(struct dquot *dquot); |
114 | void ocfs2_drop_dquot_refs(struct work_struct *work); | ||
113 | 115 | ||
114 | extern const struct dquot_operations ocfs2_quota_operations; | 116 | extern const struct dquot_operations ocfs2_quota_operations; |
115 | extern struct quota_format_type ocfs2_quota_format; | 117 | extern struct quota_format_type ocfs2_quota_format; |
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index d7b5108789e2..b990a62cff50 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c | |||
@@ -10,6 +10,7 @@ | |||
10 | #include <linux/jiffies.h> | 10 | #include <linux/jiffies.h> |
11 | #include <linux/writeback.h> | 11 | #include <linux/writeback.h> |
12 | #include <linux/workqueue.h> | 12 | #include <linux/workqueue.h> |
13 | #include <linux/llist.h> | ||
13 | 14 | ||
14 | #include <cluster/masklog.h> | 15 | #include <cluster/masklog.h> |
15 | 16 | ||
@@ -679,6 +680,27 @@ static int ocfs2_calc_qdel_credits(struct super_block *sb, int type) | |||
679 | OCFS2_INODE_UPDATE_CREDITS; | 680 | OCFS2_INODE_UPDATE_CREDITS; |
680 | } | 681 | } |
681 | 682 | ||
683 | void ocfs2_drop_dquot_refs(struct work_struct *work) | ||
684 | { | ||
685 | struct ocfs2_super *osb = container_of(work, struct ocfs2_super, | ||
686 | dquot_drop_work); | ||
687 | struct llist_node *list; | ||
688 | struct ocfs2_dquot *odquot, *next_odquot; | ||
689 | |||
690 | list = llist_del_all(&osb->dquot_drop_list); | ||
691 | llist_for_each_entry_safe(odquot, next_odquot, list, list) { | ||
692 | /* Drop the reference we acquired in ocfs2_dquot_release() */ | ||
693 | dqput(&odquot->dq_dquot); | ||
694 | } | ||
695 | } | ||
696 | |||
697 | /* | ||
698 | * Called when the last reference to dquot is dropped. If we are called from | ||
699 | * downconvert thread, we cannot do all the handling here because grabbing | ||
700 | * quota lock could deadlock (the node holding the quota lock could need some | ||
701 | * other cluster lock to proceed but with blocked downconvert thread we cannot | ||
702 | * release any lock). | ||
703 | */ | ||
682 | static int ocfs2_release_dquot(struct dquot *dquot) | 704 | static int ocfs2_release_dquot(struct dquot *dquot) |
683 | { | 705 | { |
684 | handle_t *handle; | 706 | handle_t *handle; |
@@ -694,6 +716,19 @@ static int ocfs2_release_dquot(struct dquot *dquot) | |||
694 | /* Check whether we are not racing with some other dqget() */ | 716 | /* Check whether we are not racing with some other dqget() */ |
695 | if (atomic_read(&dquot->dq_count) > 1) | 717 | if (atomic_read(&dquot->dq_count) > 1) |
696 | goto out; | 718 | goto out; |
719 | /* Running from downconvert thread? Postpone quota processing to wq */ | ||
720 | if (current == osb->dc_task) { | ||
721 | /* | ||
722 | * Grab our own reference to dquot and queue it for delayed | ||
723 | * dropping. Quota code rechecks after calling | ||
724 | * ->release_dquot() and won't free dquot structure. | ||
725 | */ | ||
726 | dqgrab(dquot); | ||
727 | /* First entry on list -> queue work */ | ||
728 | if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list)) | ||
729 | queue_work(ocfs2_wq, &osb->dquot_drop_work); | ||
730 | goto out; | ||
731 | } | ||
697 | status = ocfs2_lock_global_qf(oinfo, 1); | 732 | status = ocfs2_lock_global_qf(oinfo, 1); |
698 | if (status < 0) | 733 | if (status < 0) |
699 | goto out; | 734 | goto out; |
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index ca5ce14cbddc..83f1a665ae97 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
@@ -496,7 +496,7 @@ static ssize_t ocfs2_max_locking_protocol_show(struct kobject *kobj, | |||
496 | } | 496 | } |
497 | 497 | ||
498 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = | 498 | static struct kobj_attribute ocfs2_attr_max_locking_protocol = |
499 | __ATTR(max_locking_protocol, S_IFREG | S_IRUGO, | 499 | __ATTR(max_locking_protocol, S_IRUGO, |
500 | ocfs2_max_locking_protocol_show, NULL); | 500 | ocfs2_max_locking_protocol_show, NULL); |
501 | 501 | ||
502 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | 502 | static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, |
@@ -528,7 +528,7 @@ static ssize_t ocfs2_loaded_cluster_plugins_show(struct kobject *kobj, | |||
528 | } | 528 | } |
529 | 529 | ||
530 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = | 530 | static struct kobj_attribute ocfs2_attr_loaded_cluster_plugins = |
531 | __ATTR(loaded_cluster_plugins, S_IFREG | S_IRUGO, | 531 | __ATTR(loaded_cluster_plugins, S_IRUGO, |
532 | ocfs2_loaded_cluster_plugins_show, NULL); | 532 | ocfs2_loaded_cluster_plugins_show, NULL); |
533 | 533 | ||
534 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | 534 | static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, |
@@ -550,7 +550,7 @@ static ssize_t ocfs2_active_cluster_plugin_show(struct kobject *kobj, | |||
550 | } | 550 | } |
551 | 551 | ||
552 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = | 552 | static struct kobj_attribute ocfs2_attr_active_cluster_plugin = |
553 | __ATTR(active_cluster_plugin, S_IFREG | S_IRUGO, | 553 | __ATTR(active_cluster_plugin, S_IRUGO, |
554 | ocfs2_active_cluster_plugin_show, NULL); | 554 | ocfs2_active_cluster_plugin_show, NULL); |
555 | 555 | ||
556 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, | 556 | static ssize_t ocfs2_cluster_stack_show(struct kobject *kobj, |
@@ -599,15 +599,29 @@ static ssize_t ocfs2_cluster_stack_store(struct kobject *kobj, | |||
599 | 599 | ||
600 | 600 | ||
601 | static struct kobj_attribute ocfs2_attr_cluster_stack = | 601 | static struct kobj_attribute ocfs2_attr_cluster_stack = |
602 | __ATTR(cluster_stack, S_IFREG | S_IRUGO | S_IWUSR, | 602 | __ATTR(cluster_stack, S_IRUGO | S_IWUSR, |
603 | ocfs2_cluster_stack_show, | 603 | ocfs2_cluster_stack_show, |
604 | ocfs2_cluster_stack_store); | 604 | ocfs2_cluster_stack_store); |
605 | 605 | ||
606 | |||
607 | |||
608 | static ssize_t ocfs2_dlm_recover_show(struct kobject *kobj, | ||
609 | struct kobj_attribute *attr, | ||
610 | char *buf) | ||
611 | { | ||
612 | return snprintf(buf, PAGE_SIZE, "1\n"); | ||
613 | } | ||
614 | |||
615 | static struct kobj_attribute ocfs2_attr_dlm_recover_support = | ||
616 | __ATTR(dlm_recover_callback_support, S_IRUGO, | ||
617 | ocfs2_dlm_recover_show, NULL); | ||
618 | |||
606 | static struct attribute *ocfs2_attrs[] = { | 619 | static struct attribute *ocfs2_attrs[] = { |
607 | &ocfs2_attr_max_locking_protocol.attr, | 620 | &ocfs2_attr_max_locking_protocol.attr, |
608 | &ocfs2_attr_loaded_cluster_plugins.attr, | 621 | &ocfs2_attr_loaded_cluster_plugins.attr, |
609 | &ocfs2_attr_active_cluster_plugin.attr, | 622 | &ocfs2_attr_active_cluster_plugin.attr, |
610 | &ocfs2_attr_cluster_stack.attr, | 623 | &ocfs2_attr_cluster_stack.attr, |
624 | &ocfs2_attr_dlm_recover_support.attr, | ||
611 | NULL, | 625 | NULL, |
612 | }; | 626 | }; |
613 | 627 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 47ae2663a6f5..0cb889a17ae1 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
@@ -771,6 +771,7 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
771 | spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); | 771 | spin_unlock(&OCFS2_I(alloc_inode)->ip_lock); |
772 | i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); | 772 | i_size_write(alloc_inode, le64_to_cpu(fe->i_size)); |
773 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); | 773 | alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode); |
774 | ocfs2_update_inode_fsync_trans(handle, alloc_inode, 0); | ||
774 | 775 | ||
775 | status = 0; | 776 | status = 0; |
776 | 777 | ||
@@ -1607,6 +1608,21 @@ out: | |||
1607 | return ret; | 1608 | return ret; |
1608 | } | 1609 | } |
1609 | 1610 | ||
1611 | void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, | ||
1612 | struct buffer_head *di_bh, | ||
1613 | u32 num_bits, | ||
1614 | u16 chain) | ||
1615 | { | ||
1616 | u32 tmp_used; | ||
1617 | struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data; | ||
1618 | struct ocfs2_chain_list *cl; | ||
1619 | |||
1620 | cl = (struct ocfs2_chain_list *)&di->id2.i_chain; | ||
1621 | tmp_used = le32_to_cpu(di->id1.bitmap1.i_used); | ||
1622 | di->id1.bitmap1.i_used = cpu_to_le32(tmp_used - num_bits); | ||
1623 | le32_add_cpu(&cl->cl_recs[chain].c_free, num_bits); | ||
1624 | } | ||
1625 | |||
1610 | static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, | 1626 | static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, |
1611 | struct ocfs2_extent_rec *rec, | 1627 | struct ocfs2_extent_rec *rec, |
1612 | struct ocfs2_chain_list *cl) | 1628 | struct ocfs2_chain_list *cl) |
@@ -1707,8 +1723,12 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
1707 | 1723 | ||
1708 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, | 1724 | ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh, |
1709 | res->sr_bit_offset, res->sr_bits); | 1725 | res->sr_bit_offset, res->sr_bits); |
1710 | if (ret < 0) | 1726 | if (ret < 0) { |
1727 | ocfs2_rollback_alloc_dinode_counts(alloc_inode, ac->ac_bh, | ||
1728 | res->sr_bits, | ||
1729 | le16_to_cpu(gd->bg_chain)); | ||
1711 | mlog_errno(ret); | 1730 | mlog_errno(ret); |
1731 | } | ||
1712 | 1732 | ||
1713 | out_loc_only: | 1733 | out_loc_only: |
1714 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); | 1734 | *bits_left = le16_to_cpu(gd->bg_free_bits_count); |
@@ -1838,6 +1858,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
1838 | res->sr_bit_offset, | 1858 | res->sr_bit_offset, |
1839 | res->sr_bits); | 1859 | res->sr_bits); |
1840 | if (status < 0) { | 1860 | if (status < 0) { |
1861 | ocfs2_rollback_alloc_dinode_counts(alloc_inode, | ||
1862 | ac->ac_bh, res->sr_bits, chain); | ||
1841 | mlog_errno(status); | 1863 | mlog_errno(status); |
1842 | goto bail; | 1864 | goto bail; |
1843 | } | 1865 | } |
@@ -2091,7 +2113,7 @@ int ocfs2_find_new_inode_loc(struct inode *dir, | |||
2091 | 2113 | ||
2092 | ac->ac_find_loc_priv = res; | 2114 | ac->ac_find_loc_priv = res; |
2093 | *fe_blkno = res->sr_blkno; | 2115 | *fe_blkno = res->sr_blkno; |
2094 | 2116 | ocfs2_update_inode_fsync_trans(handle, dir, 0); | |
2095 | out: | 2117 | out: |
2096 | if (handle) | 2118 | if (handle) |
2097 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); | 2119 | ocfs2_commit_trans(OCFS2_SB(dir->i_sb), handle); |
@@ -2149,6 +2171,8 @@ int ocfs2_claim_new_inode_at_loc(handle_t *handle, | |||
2149 | res->sr_bit_offset, | 2171 | res->sr_bit_offset, |
2150 | res->sr_bits); | 2172 | res->sr_bits); |
2151 | if (ret < 0) { | 2173 | if (ret < 0) { |
2174 | ocfs2_rollback_alloc_dinode_counts(ac->ac_inode, | ||
2175 | ac->ac_bh, res->sr_bits, chain); | ||
2152 | mlog_errno(ret); | 2176 | mlog_errno(ret); |
2153 | goto out; | 2177 | goto out; |
2154 | } | 2178 | } |
@@ -2870,6 +2894,7 @@ int ocfs2_test_inode_bit(struct ocfs2_super *osb, u64 blkno, int *res) | |||
2870 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); | 2894 | status = ocfs2_inode_lock(inode_alloc_inode, &alloc_bh, 0); |
2871 | if (status < 0) { | 2895 | if (status < 0) { |
2872 | mutex_unlock(&inode_alloc_inode->i_mutex); | 2896 | mutex_unlock(&inode_alloc_inode->i_mutex); |
2897 | iput(inode_alloc_inode); | ||
2873 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", | 2898 | mlog(ML_ERROR, "lock on alloc inode on slot %u failed %d\n", |
2874 | (u32)suballoc_slot, status); | 2899 | (u32)suballoc_slot, status); |
2875 | goto bail; | 2900 | goto bail; |
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 218d8036b3e7..2d2501767c0c 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
@@ -91,6 +91,10 @@ int ocfs2_alloc_dinode_update_counts(struct inode *inode, | |||
91 | struct buffer_head *di_bh, | 91 | struct buffer_head *di_bh, |
92 | u32 num_bits, | 92 | u32 num_bits, |
93 | u16 chain); | 93 | u16 chain); |
94 | void ocfs2_rollback_alloc_dinode_counts(struct inode *inode, | ||
95 | struct buffer_head *di_bh, | ||
96 | u32 num_bits, | ||
97 | u16 chain); | ||
94 | int ocfs2_block_group_set_bits(handle_t *handle, | 98 | int ocfs2_block_group_set_bits(handle_t *handle, |
95 | struct inode *alloc_inode, | 99 | struct inode *alloc_inode, |
96 | struct ocfs2_group_desc *bg, | 100 | struct ocfs2_group_desc *bg, |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 49d84f80f36c..a7cdd56f4c79 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
@@ -561,6 +561,9 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) | |||
561 | if (!oi) | 561 | if (!oi) |
562 | return NULL; | 562 | return NULL; |
563 | 563 | ||
564 | oi->i_sync_tid = 0; | ||
565 | oi->i_datasync_tid = 0; | ||
566 | |||
564 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); | 567 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); |
565 | return &oi->vfs_inode; | 568 | return &oi->vfs_inode; |
566 | } | 569 | } |
@@ -631,6 +634,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
631 | struct ocfs2_super *osb = OCFS2_SB(sb); | 634 | struct ocfs2_super *osb = OCFS2_SB(sb); |
632 | u32 tmp; | 635 | u32 tmp; |
633 | 636 | ||
637 | sync_filesystem(sb); | ||
638 | |||
634 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || | 639 | if (!ocfs2_parse_options(sb, data, &parsed_options, 1) || |
635 | !ocfs2_check_set_options(sb, &parsed_options)) { | 640 | !ocfs2_check_set_options(sb, &parsed_options)) { |
636 | ret = -EINVAL; | 641 | ret = -EINVAL; |
@@ -1238,30 +1243,11 @@ static struct dentry *ocfs2_mount(struct file_system_type *fs_type, | |||
1238 | return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); | 1243 | return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super); |
1239 | } | 1244 | } |
1240 | 1245 | ||
1241 | static void ocfs2_kill_sb(struct super_block *sb) | ||
1242 | { | ||
1243 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
1244 | |||
1245 | /* Failed mount? */ | ||
1246 | if (!osb || atomic_read(&osb->vol_state) == VOLUME_DISABLED) | ||
1247 | goto out; | ||
1248 | |||
1249 | /* Prevent further queueing of inode drop events */ | ||
1250 | spin_lock(&dentry_list_lock); | ||
1251 | ocfs2_set_osb_flag(osb, OCFS2_OSB_DROP_DENTRY_LOCK_IMMED); | ||
1252 | spin_unlock(&dentry_list_lock); | ||
1253 | /* Wait for work to finish and/or remove it */ | ||
1254 | cancel_work_sync(&osb->dentry_lock_work); | ||
1255 | out: | ||
1256 | kill_block_super(sb); | ||
1257 | } | ||
1258 | |||
1259 | static struct file_system_type ocfs2_fs_type = { | 1246 | static struct file_system_type ocfs2_fs_type = { |
1260 | .owner = THIS_MODULE, | 1247 | .owner = THIS_MODULE, |
1261 | .name = "ocfs2", | 1248 | .name = "ocfs2", |
1262 | .mount = ocfs2_mount, | 1249 | .mount = ocfs2_mount, |
1263 | .kill_sb = ocfs2_kill_sb, | 1250 | .kill_sb = kill_block_super, |
1264 | |||
1265 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, | 1251 | .fs_flags = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE, |
1266 | .next = NULL | 1252 | .next = NULL |
1267 | }; | 1253 | }; |
@@ -1612,14 +1598,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) | |||
1612 | return 0; | 1598 | return 0; |
1613 | } | 1599 | } |
1614 | 1600 | ||
1615 | wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ]; | ||
1616 | |||
1617 | static int __init ocfs2_init(void) | 1601 | static int __init ocfs2_init(void) |
1618 | { | 1602 | { |
1619 | int status, i; | 1603 | int status; |
1620 | |||
1621 | for (i = 0; i < OCFS2_IOEND_WQ_HASH_SZ; i++) | ||
1622 | init_waitqueue_head(&ocfs2__ioend_wq[i]); | ||
1623 | 1604 | ||
1624 | status = init_ocfs2_uptodate_cache(); | 1605 | status = init_ocfs2_uptodate_cache(); |
1625 | if (status < 0) | 1606 | if (status < 0) |
@@ -1761,7 +1742,7 @@ static void ocfs2_inode_init_once(void *data) | |||
1761 | ocfs2_extent_map_init(&oi->vfs_inode); | 1742 | ocfs2_extent_map_init(&oi->vfs_inode); |
1762 | INIT_LIST_HEAD(&oi->ip_io_markers); | 1743 | INIT_LIST_HEAD(&oi->ip_io_markers); |
1763 | oi->ip_dir_start_lookup = 0; | 1744 | oi->ip_dir_start_lookup = 0; |
1764 | atomic_set(&oi->ip_unaligned_aio, 0); | 1745 | mutex_init(&oi->ip_unaligned_aio); |
1765 | init_rwsem(&oi->ip_alloc_sem); | 1746 | init_rwsem(&oi->ip_alloc_sem); |
1766 | init_rwsem(&oi->ip_xattr_sem); | 1747 | init_rwsem(&oi->ip_xattr_sem); |
1767 | mutex_init(&oi->ip_io_mutex); | 1748 | mutex_init(&oi->ip_io_mutex); |
@@ -1932,17 +1913,16 @@ static void ocfs2_dismount_volume(struct super_block *sb, int mnt_err) | |||
1932 | 1913 | ||
1933 | debugfs_remove(osb->osb_ctxt); | 1914 | debugfs_remove(osb->osb_ctxt); |
1934 | 1915 | ||
1935 | /* | ||
1936 | * Flush inode dropping work queue so that deletes are | ||
1937 | * performed while the filesystem is still working | ||
1938 | */ | ||
1939 | ocfs2_drop_all_dl_inodes(osb); | ||
1940 | |||
1941 | /* Orphan scan should be stopped as early as possible */ | 1916 | /* Orphan scan should be stopped as early as possible */ |
1942 | ocfs2_orphan_scan_stop(osb); | 1917 | ocfs2_orphan_scan_stop(osb); |
1943 | 1918 | ||
1944 | ocfs2_disable_quotas(osb); | 1919 | ocfs2_disable_quotas(osb); |
1945 | 1920 | ||
1921 | /* All dquots should be freed by now */ | ||
1922 | WARN_ON(!llist_empty(&osb->dquot_drop_list)); | ||
1923 | /* Wait for worker to be done with the work structure in osb */ | ||
1924 | cancel_work_sync(&osb->dquot_drop_work); | ||
1925 | |||
1946 | ocfs2_shutdown_local_alloc(osb); | 1926 | ocfs2_shutdown_local_alloc(osb); |
1947 | 1927 | ||
1948 | /* This will disable recovery and flush any recovery work. */ | 1928 | /* This will disable recovery and flush any recovery work. */ |
@@ -2077,7 +2057,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2077 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; | 2057 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; |
2078 | struct inode *inode = NULL; | 2058 | struct inode *inode = NULL; |
2079 | struct ocfs2_journal *journal; | 2059 | struct ocfs2_journal *journal; |
2080 | __le32 uuid_net_key; | ||
2081 | struct ocfs2_super *osb; | 2060 | struct ocfs2_super *osb; |
2082 | u64 total_blocks; | 2061 | u64 total_blocks; |
2083 | 2062 | ||
@@ -2123,6 +2102,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2123 | spin_lock_init(&osb->osb_xattr_lock); | 2102 | spin_lock_init(&osb->osb_xattr_lock); |
2124 | ocfs2_init_steal_slots(osb); | 2103 | ocfs2_init_steal_slots(osb); |
2125 | 2104 | ||
2105 | mutex_init(&osb->system_file_mutex); | ||
2106 | |||
2126 | atomic_set(&osb->alloc_stats.moves, 0); | 2107 | atomic_set(&osb->alloc_stats.moves, 0); |
2127 | atomic_set(&osb->alloc_stats.local_data, 0); | 2108 | atomic_set(&osb->alloc_stats.local_data, 0); |
2128 | atomic_set(&osb->alloc_stats.bitmap_data, 0); | 2109 | atomic_set(&osb->alloc_stats.bitmap_data, 0); |
@@ -2276,8 +2257,8 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2276 | INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); | 2257 | INIT_WORK(&journal->j_recovery_work, ocfs2_complete_recovery); |
2277 | journal->j_state = OCFS2_JOURNAL_FREE; | 2258 | journal->j_state = OCFS2_JOURNAL_FREE; |
2278 | 2259 | ||
2279 | INIT_WORK(&osb->dentry_lock_work, ocfs2_drop_dl_inodes); | 2260 | INIT_WORK(&osb->dquot_drop_work, ocfs2_drop_dquot_refs); |
2280 | osb->dentry_lock_list = NULL; | 2261 | init_llist_head(&osb->dquot_drop_list); |
2281 | 2262 | ||
2282 | /* get some pseudo constants for clustersize bits */ | 2263 | /* get some pseudo constants for clustersize bits */ |
2283 | osb->s_clustersize_bits = | 2264 | osb->s_clustersize_bits = |
@@ -2311,8 +2292,6 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
2311 | goto bail; | 2292 | goto bail; |
2312 | } | 2293 | } |
2313 | 2294 | ||
2314 | memcpy(&uuid_net_key, di->id2.i_super.s_uuid, sizeof(uuid_net_key)); | ||
2315 | |||
2316 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); | 2295 | strncpy(osb->vol_label, di->id2.i_super.s_label, 63); |
2317 | osb->vol_label[63] = '\0'; | 2296 | osb->vol_label[63] = '\0'; |
2318 | osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); | 2297 | osb->root_blkno = le64_to_cpu(di->id2.i_super.s_root_blkno); |
diff --git a/fs/ocfs2/sysfile.c b/fs/ocfs2/sysfile.c index f053688d22a3..af155c183123 100644 --- a/fs/ocfs2/sysfile.c +++ b/fs/ocfs2/sysfile.c | |||
@@ -113,9 +113,11 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
113 | } else | 113 | } else |
114 | arr = get_local_system_inode(osb, type, slot); | 114 | arr = get_local_system_inode(osb, type, slot); |
115 | 115 | ||
116 | mutex_lock(&osb->system_file_mutex); | ||
116 | if (arr && ((inode = *arr) != NULL)) { | 117 | if (arr && ((inode = *arr) != NULL)) { |
117 | /* get a ref in addition to the array ref */ | 118 | /* get a ref in addition to the array ref */ |
118 | inode = igrab(inode); | 119 | inode = igrab(inode); |
120 | mutex_unlock(&osb->system_file_mutex); | ||
119 | BUG_ON(!inode); | 121 | BUG_ON(!inode); |
120 | 122 | ||
121 | return inode; | 123 | return inode; |
@@ -129,6 +131,7 @@ struct inode *ocfs2_get_system_file_inode(struct ocfs2_super *osb, | |||
129 | *arr = igrab(inode); | 131 | *arr = igrab(inode); |
130 | BUG_ON(!*arr); | 132 | BUG_ON(!*arr); |
131 | } | 133 | } |
134 | mutex_unlock(&osb->system_file_mutex); | ||
132 | return inode; | 135 | return inode; |
133 | } | 136 | } |
134 | 137 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 185fa3b7f962..016f01df3825 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c | |||
@@ -369,7 +369,7 @@ static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket) | |||
369 | * them fully. | 369 | * them fully. |
370 | */ | 370 | */ |
371 | static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | 371 | static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, |
372 | u64 xb_blkno) | 372 | u64 xb_blkno, int new) |
373 | { | 373 | { |
374 | int i, rc = 0; | 374 | int i, rc = 0; |
375 | 375 | ||
@@ -383,9 +383,16 @@ static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket, | |||
383 | } | 383 | } |
384 | 384 | ||
385 | if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | 385 | if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
386 | bucket->bu_bhs[i])) | 386 | bucket->bu_bhs[i])) { |
387 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | 387 | if (new) |
388 | bucket->bu_bhs[i]); | 388 | ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode), |
389 | bucket->bu_bhs[i]); | ||
390 | else { | ||
391 | set_buffer_uptodate(bucket->bu_bhs[i]); | ||
392 | ocfs2_set_buffer_uptodate(INODE_CACHE(bucket->bu_inode), | ||
393 | bucket->bu_bhs[i]); | ||
394 | } | ||
395 | } | ||
389 | } | 396 | } |
390 | 397 | ||
391 | if (rc) | 398 | if (rc) |
@@ -2602,6 +2609,7 @@ int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | |||
2602 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); | 2609 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); |
2603 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | 2610 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); |
2604 | spin_unlock(&oi->ip_lock); | 2611 | spin_unlock(&oi->ip_lock); |
2612 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
2605 | 2613 | ||
2606 | ocfs2_journal_dirty(handle, di_bh); | 2614 | ocfs2_journal_dirty(handle, di_bh); |
2607 | out_commit: | 2615 | out_commit: |
@@ -3200,8 +3208,15 @@ meta_guess: | |||
3200 | clusters_add += 1; | 3208 | clusters_add += 1; |
3201 | } | 3209 | } |
3202 | } else { | 3210 | } else { |
3203 | meta_add += 1; | ||
3204 | credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; | 3211 | credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS; |
3212 | if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
3213 | struct ocfs2_extent_list *el = &def_xv.xv.xr_list; | ||
3214 | meta_add += ocfs2_extend_meta_needed(el); | ||
3215 | credits += ocfs2_calc_extend_credits(inode->i_sb, | ||
3216 | el); | ||
3217 | } else { | ||
3218 | meta_add += 1; | ||
3219 | } | ||
3205 | } | 3220 | } |
3206 | out: | 3221 | out: |
3207 | if (clusters_need) | 3222 | if (clusters_need) |
@@ -3614,6 +3629,7 @@ int ocfs2_xattr_set(struct inode *inode, | |||
3614 | } | 3629 | } |
3615 | 3630 | ||
3616 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); | 3631 | ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt); |
3632 | ocfs2_update_inode_fsync_trans(ctxt.handle, inode, 0); | ||
3617 | 3633 | ||
3618 | ocfs2_commit_trans(osb, ctxt.handle); | 3634 | ocfs2_commit_trans(osb, ctxt.handle); |
3619 | 3635 | ||
@@ -4294,7 +4310,7 @@ static int ocfs2_xattr_create_index_block(struct inode *inode, | |||
4294 | 4310 | ||
4295 | trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); | 4311 | trace_ocfs2_xattr_create_index_block((unsigned long long)blkno); |
4296 | 4312 | ||
4297 | ret = ocfs2_init_xattr_bucket(xs->bucket, blkno); | 4313 | ret = ocfs2_init_xattr_bucket(xs->bucket, blkno, 1); |
4298 | if (ret) { | 4314 | if (ret) { |
4299 | mlog_errno(ret); | 4315 | mlog_errno(ret); |
4300 | goto out; | 4316 | goto out; |
@@ -4638,7 +4654,7 @@ static int ocfs2_divide_xattr_bucket(struct inode *inode, | |||
4638 | * Even if !new_bucket_head, we're overwriting t_bucket. Thus, | 4654 | * Even if !new_bucket_head, we're overwriting t_bucket. Thus, |
4639 | * there's no need to read it. | 4655 | * there's no need to read it. |
4640 | */ | 4656 | */ |
4641 | ret = ocfs2_init_xattr_bucket(t_bucket, new_blk); | 4657 | ret = ocfs2_init_xattr_bucket(t_bucket, new_blk, new_bucket_head); |
4642 | if (ret) { | 4658 | if (ret) { |
4643 | mlog_errno(ret); | 4659 | mlog_errno(ret); |
4644 | goto out; | 4660 | goto out; |
@@ -4804,7 +4820,7 @@ static int ocfs2_cp_xattr_bucket(struct inode *inode, | |||
4804 | * Even if !t_is_new, we're overwriting t_bucket. Thus, | 4820 | * Even if !t_is_new, we're overwriting t_bucket. Thus, |
4805 | * there's no need to read it. | 4821 | * there's no need to read it. |
4806 | */ | 4822 | */ |
4807 | ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno); | 4823 | ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno, t_is_new); |
4808 | if (ret) | 4824 | if (ret) |
4809 | goto out; | 4825 | goto out; |
4810 | 4826 | ||
@@ -5476,6 +5492,7 @@ static int ocfs2_rm_xattr_cluster(struct inode *inode, | |||
5476 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); | 5492 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); |
5477 | if (ret) | 5493 | if (ret) |
5478 | mlog_errno(ret); | 5494 | mlog_errno(ret); |
5495 | ocfs2_update_inode_fsync_trans(handle, inode, 0); | ||
5479 | 5496 | ||
5480 | out_commit: | 5497 | out_commit: |
5481 | ocfs2_commit_trans(osb, handle); | 5498 | ocfs2_commit_trans(osb, handle); |
@@ -6830,7 +6847,7 @@ static int ocfs2_reflink_xattr_bucket(handle_t *handle, | |||
6830 | break; | 6847 | break; |
6831 | } | 6848 | } |
6832 | 6849 | ||
6833 | ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno); | 6850 | ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno, 1); |
6834 | if (ret) { | 6851 | if (ret) { |
6835 | mlog_errno(ret); | 6852 | mlog_errno(ret); |
6836 | break; | 6853 | break; |
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d8b0afde2179..ec58c7659183 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
@@ -183,7 +183,7 @@ int omfs_sync_inode(struct inode *inode) | |||
183 | */ | 183 | */ |
184 | static void omfs_evict_inode(struct inode *inode) | 184 | static void omfs_evict_inode(struct inode *inode) |
185 | { | 185 | { |
186 | truncate_inode_pages(&inode->i_data, 0); | 186 | truncate_inode_pages_final(&inode->i_data); |
187 | clear_inode(inode); | 187 | clear_inode(inode); |
188 | 188 | ||
189 | if (inode->i_nlink) | 189 | if (inode->i_nlink) |
@@ -231,7 +231,13 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
231 | return -EINVAL; | 231 | return -EINVAL; |
232 | 232 | ||
233 | /* Return error if mode is not supported */ | 233 | /* Return error if mode is not supported */ |
234 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 234 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
235 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
236 | return -EOPNOTSUPP; | ||
237 | |||
238 | /* Punch hole and zero range are mutually exclusive */ | ||
239 | if ((mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) == | ||
240 | (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) | ||
235 | return -EOPNOTSUPP; | 241 | return -EOPNOTSUPP; |
236 | 242 | ||
237 | /* Punch hole must have keep size set */ | 243 | /* Punch hole must have keep size set */ |
@@ -239,11 +245,20 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
239 | !(mode & FALLOC_FL_KEEP_SIZE)) | 245 | !(mode & FALLOC_FL_KEEP_SIZE)) |
240 | return -EOPNOTSUPP; | 246 | return -EOPNOTSUPP; |
241 | 247 | ||
248 | /* Collapse range should only be used exclusively. */ | ||
249 | if ((mode & FALLOC_FL_COLLAPSE_RANGE) && | ||
250 | (mode & ~FALLOC_FL_COLLAPSE_RANGE)) | ||
251 | return -EINVAL; | ||
252 | |||
242 | if (!(file->f_mode & FMODE_WRITE)) | 253 | if (!(file->f_mode & FMODE_WRITE)) |
243 | return -EBADF; | 254 | return -EBADF; |
244 | 255 | ||
245 | /* It's not possible punch hole on append only file */ | 256 | /* |
246 | if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode)) | 257 | * It's not possible to punch hole or perform collapse range |
258 | * on append only file | ||
259 | */ | ||
260 | if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) | ||
261 | && IS_APPEND(inode)) | ||
247 | return -EPERM; | 262 | return -EPERM; |
248 | 263 | ||
249 | if (IS_IMMUTABLE(inode)) | 264 | if (IS_IMMUTABLE(inode)) |
@@ -271,6 +286,14 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
271 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) | 286 | if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) |
272 | return -EFBIG; | 287 | return -EFBIG; |
273 | 288 | ||
289 | /* | ||
290 | * There is no need to overlap collapse range with EOF, in which case | ||
291 | * it is effectively a truncate operation | ||
292 | */ | ||
293 | if ((mode & FALLOC_FL_COLLAPSE_RANGE) && | ||
294 | (offset + len >= i_size_read(inode))) | ||
295 | return -EINVAL; | ||
296 | |||
274 | if (!file->f_op->fallocate) | 297 | if (!file->f_op->fallocate) |
275 | return -EOPNOTSUPP; | 298 | return -EOPNOTSUPP; |
276 | 299 | ||
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 8c0ceb8dd1f7..15e4500cda3e 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
@@ -368,6 +368,7 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) | |||
368 | 368 | ||
369 | static int openprom_remount(struct super_block *sb, int *flags, char *data) | 369 | static int openprom_remount(struct super_block *sb, int *flags, char *data) |
370 | { | 370 | { |
371 | sync_filesystem(sb); | ||
371 | *flags |= MS_NOATIME; | 372 | *flags |= MS_NOATIME; |
372 | return 0; | 373 | return 0; |
373 | } | 374 | } |
diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 11c54fd51e16..9e363e41dacc 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c | |||
@@ -723,7 +723,7 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, | |||
723 | void *buffer, size_t size) | 723 | void *buffer, size_t size) |
724 | { | 724 | { |
725 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; | 725 | posix_acl_xattr_header *ext_acl = (posix_acl_xattr_header *)buffer; |
726 | posix_acl_xattr_entry *ext_entry = ext_acl->a_entries; | 726 | posix_acl_xattr_entry *ext_entry; |
727 | int real_size, n; | 727 | int real_size, n; |
728 | 728 | ||
729 | real_size = posix_acl_xattr_size(acl->a_count); | 729 | real_size = posix_acl_xattr_size(acl->a_count); |
@@ -731,7 +731,8 @@ posix_acl_to_xattr(struct user_namespace *user_ns, const struct posix_acl *acl, | |||
731 | return real_size; | 731 | return real_size; |
732 | if (real_size > size) | 732 | if (real_size > size) |
733 | return -ERANGE; | 733 | return -ERANGE; |
734 | 734 | ||
735 | ext_entry = ext_acl->a_entries; | ||
735 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); | 736 | ext_acl->a_version = cpu_to_le32(POSIX_ACL_XATTR_VERSION); |
736 | 737 | ||
737 | for (n=0; n < acl->a_count; n++, ext_entry++) { | 738 | for (n=0; n < acl->a_count; n++, ext_entry++) { |
diff --git a/fs/proc/Makefile b/fs/proc/Makefile index ab30716584f5..239493ec718e 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile | |||
@@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o | |||
27 | proc-$(CONFIG_NET) += proc_net.o | 27 | proc-$(CONFIG_NET) += proc_net.o |
28 | proc-$(CONFIG_PROC_KCORE) += kcore.o | 28 | proc-$(CONFIG_PROC_KCORE) += kcore.o |
29 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o | 29 | proc-$(CONFIG_PROC_VMCORE) += vmcore.o |
30 | proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o | ||
31 | proc-$(CONFIG_PRINTK) += kmsg.o | 30 | proc-$(CONFIG_PRINTK) += kmsg.o |
32 | proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o | 31 | proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o |
diff --git a/fs/proc/array.c b/fs/proc/array.c index 656e401794de..64db2bceac59 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
@@ -138,8 +138,8 @@ static const char * const task_state_array[] = { | |||
138 | "D (disk sleep)", /* 2 */ | 138 | "D (disk sleep)", /* 2 */ |
139 | "T (stopped)", /* 4 */ | 139 | "T (stopped)", /* 4 */ |
140 | "t (tracing stop)", /* 8 */ | 140 | "t (tracing stop)", /* 8 */ |
141 | "Z (zombie)", /* 16 */ | 141 | "X (dead)", /* 16 */ |
142 | "X (dead)", /* 32 */ | 142 | "Z (zombie)", /* 32 */ |
143 | }; | 143 | }; |
144 | 144 | ||
145 | static inline const char *get_task_state(struct task_struct *tsk) | 145 | static inline const char *get_task_state(struct task_struct *tsk) |
diff --git a/fs/proc/base.c b/fs/proc/base.c index b9760628e1fd..6b7087e2e8fb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
@@ -1236,6 +1236,9 @@ static ssize_t proc_fault_inject_write(struct file * file, | |||
1236 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); | 1236 | make_it_fail = simple_strtol(strstrip(buffer), &end, 0); |
1237 | if (*end) | 1237 | if (*end) |
1238 | return -EINVAL; | 1238 | return -EINVAL; |
1239 | if (make_it_fail < 0 || make_it_fail > 1) | ||
1240 | return -EINVAL; | ||
1241 | |||
1239 | task = get_proc_task(file_inode(file)); | 1242 | task = get_proc_task(file_inode(file)); |
1240 | if (!task) | 1243 | if (!task) |
1241 | return -ESRCH; | 1244 | return -ESRCH; |
@@ -2588,7 +2591,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2588 | REG("environ", S_IRUSR, proc_environ_operations), | 2591 | REG("environ", S_IRUSR, proc_environ_operations), |
2589 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2592 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2590 | ONE("status", S_IRUGO, proc_pid_status), | 2593 | ONE("status", S_IRUGO, proc_pid_status), |
2591 | ONE("personality", S_IRUGO, proc_pid_personality), | 2594 | ONE("personality", S_IRUSR, proc_pid_personality), |
2592 | INF("limits", S_IRUGO, proc_pid_limits), | 2595 | INF("limits", S_IRUGO, proc_pid_limits), |
2593 | #ifdef CONFIG_SCHED_DEBUG | 2596 | #ifdef CONFIG_SCHED_DEBUG |
2594 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2597 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
@@ -2598,7 +2601,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2598 | #endif | 2601 | #endif |
2599 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 2602 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
2600 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 2603 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
2601 | INF("syscall", S_IRUGO, proc_pid_syscall), | 2604 | INF("syscall", S_IRUSR, proc_pid_syscall), |
2602 | #endif | 2605 | #endif |
2603 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 2606 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2604 | ONE("stat", S_IRUGO, proc_tgid_stat), | 2607 | ONE("stat", S_IRUGO, proc_tgid_stat), |
@@ -2617,7 +2620,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2617 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2620 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2618 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 2621 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2619 | REG("smaps", S_IRUGO, proc_pid_smaps_operations), | 2622 | REG("smaps", S_IRUGO, proc_pid_smaps_operations), |
2620 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 2623 | REG("pagemap", S_IRUSR, proc_pagemap_operations), |
2621 | #endif | 2624 | #endif |
2622 | #ifdef CONFIG_SECURITY | 2625 | #ifdef CONFIG_SECURITY |
2623 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 2626 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
@@ -2626,7 +2629,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
2626 | INF("wchan", S_IRUGO, proc_pid_wchan), | 2629 | INF("wchan", S_IRUGO, proc_pid_wchan), |
2627 | #endif | 2630 | #endif |
2628 | #ifdef CONFIG_STACKTRACE | 2631 | #ifdef CONFIG_STACKTRACE |
2629 | ONE("stack", S_IRUGO, proc_pid_stack), | 2632 | ONE("stack", S_IRUSR, proc_pid_stack), |
2630 | #endif | 2633 | #endif |
2631 | #ifdef CONFIG_SCHEDSTATS | 2634 | #ifdef CONFIG_SCHEDSTATS |
2632 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 2635 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
@@ -2927,14 +2930,14 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2927 | REG("environ", S_IRUSR, proc_environ_operations), | 2930 | REG("environ", S_IRUSR, proc_environ_operations), |
2928 | INF("auxv", S_IRUSR, proc_pid_auxv), | 2931 | INF("auxv", S_IRUSR, proc_pid_auxv), |
2929 | ONE("status", S_IRUGO, proc_pid_status), | 2932 | ONE("status", S_IRUGO, proc_pid_status), |
2930 | ONE("personality", S_IRUGO, proc_pid_personality), | 2933 | ONE("personality", S_IRUSR, proc_pid_personality), |
2931 | INF("limits", S_IRUGO, proc_pid_limits), | 2934 | INF("limits", S_IRUGO, proc_pid_limits), |
2932 | #ifdef CONFIG_SCHED_DEBUG | 2935 | #ifdef CONFIG_SCHED_DEBUG |
2933 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), | 2936 | REG("sched", S_IRUGO|S_IWUSR, proc_pid_sched_operations), |
2934 | #endif | 2937 | #endif |
2935 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), | 2938 | REG("comm", S_IRUGO|S_IWUSR, proc_pid_set_comm_operations), |
2936 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK | 2939 | #ifdef CONFIG_HAVE_ARCH_TRACEHOOK |
2937 | INF("syscall", S_IRUGO, proc_pid_syscall), | 2940 | INF("syscall", S_IRUSR, proc_pid_syscall), |
2938 | #endif | 2941 | #endif |
2939 | INF("cmdline", S_IRUGO, proc_pid_cmdline), | 2942 | INF("cmdline", S_IRUGO, proc_pid_cmdline), |
2940 | ONE("stat", S_IRUGO, proc_tid_stat), | 2943 | ONE("stat", S_IRUGO, proc_tid_stat), |
@@ -2955,7 +2958,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2955 | #ifdef CONFIG_PROC_PAGE_MONITOR | 2958 | #ifdef CONFIG_PROC_PAGE_MONITOR |
2956 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), | 2959 | REG("clear_refs", S_IWUSR, proc_clear_refs_operations), |
2957 | REG("smaps", S_IRUGO, proc_tid_smaps_operations), | 2960 | REG("smaps", S_IRUGO, proc_tid_smaps_operations), |
2958 | REG("pagemap", S_IRUGO, proc_pagemap_operations), | 2961 | REG("pagemap", S_IRUSR, proc_pagemap_operations), |
2959 | #endif | 2962 | #endif |
2960 | #ifdef CONFIG_SECURITY | 2963 | #ifdef CONFIG_SECURITY |
2961 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), | 2964 | DIR("attr", S_IRUGO|S_IXUGO, proc_attr_dir_inode_operations, proc_attr_dir_operations), |
@@ -2964,7 +2967,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
2964 | INF("wchan", S_IRUGO, proc_pid_wchan), | 2967 | INF("wchan", S_IRUGO, proc_pid_wchan), |
2965 | #endif | 2968 | #endif |
2966 | #ifdef CONFIG_STACKTRACE | 2969 | #ifdef CONFIG_STACKTRACE |
2967 | ONE("stack", S_IRUGO, proc_pid_stack), | 2970 | ONE("stack", S_IRUSR, proc_pid_stack), |
2968 | #endif | 2971 | #endif |
2969 | #ifdef CONFIG_SCHEDSTATS | 2972 | #ifdef CONFIG_SCHEDSTATS |
2970 | INF("schedstat", S_IRUGO, proc_pid_schedstat), | 2973 | INF("schedstat", S_IRUGO, proc_pid_schedstat), |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 985ea881b5bc..0788d093f5d8 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
@@ -11,6 +11,7 @@ | |||
11 | 11 | ||
12 | #include <linux/proc_fs.h> | 12 | #include <linux/proc_fs.h> |
13 | 13 | ||
14 | #include "../mount.h" | ||
14 | #include "internal.h" | 15 | #include "internal.h" |
15 | #include "fd.h" | 16 | #include "fd.h" |
16 | 17 | ||
@@ -48,8 +49,9 @@ static int seq_show(struct seq_file *m, void *v) | |||
48 | } | 49 | } |
49 | 50 | ||
50 | if (!ret) { | 51 | if (!ret) { |
51 | seq_printf(m, "pos:\t%lli\nflags:\t0%o\n", | 52 | seq_printf(m, "pos:\t%lli\nflags:\t0%o\nmnt_id:\t%i\n", |
52 | (long long)file->f_pos, f_flags); | 53 | (long long)file->f_pos, f_flags, |
54 | real_mount(file->f_path.mnt)->mnt_id); | ||
53 | if (file->f_op->show_fdinfo) | 55 | if (file->f_op->show_fdinfo) |
54 | ret = file->f_op->show_fdinfo(m, file); | 56 | ret = file->f_op->show_fdinfo(m, file); |
55 | fput(file); | 57 | fput(file); |
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 124fc43c7090..0adbc02d60e3 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
@@ -35,7 +35,7 @@ static void proc_evict_inode(struct inode *inode) | |||
35 | const struct proc_ns_operations *ns_ops; | 35 | const struct proc_ns_operations *ns_ops; |
36 | void *ns; | 36 | void *ns; |
37 | 37 | ||
38 | truncate_inode_pages(&inode->i_data, 0); | 38 | truncate_inode_pages_final(&inode->i_data); |
39 | clear_inode(inode); | 39 | clear_inode(inode); |
40 | 40 | ||
41 | /* Stop tracking associated processes */ | 41 | /* Stop tracking associated processes */ |
@@ -47,7 +47,7 @@ static void proc_evict_inode(struct inode *inode) | |||
47 | pde_put(de); | 47 | pde_put(de); |
48 | head = PROC_I(inode)->sysctl; | 48 | head = PROC_I(inode)->sysctl; |
49 | if (head) { | 49 | if (head) { |
50 | rcu_assign_pointer(PROC_I(inode)->sysctl, NULL); | 50 | RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL); |
51 | sysctl_head_put(head); | 51 | sysctl_head_put(head); |
52 | } | 52 | } |
53 | /* Release any associated namespace */ | 53 | /* Release any associated namespace */ |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 651d09a11dde..3ab6d14e71c5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
@@ -211,13 +211,6 @@ extern int proc_fill_super(struct super_block *); | |||
211 | extern void proc_entry_rundown(struct proc_dir_entry *); | 211 | extern void proc_entry_rundown(struct proc_dir_entry *); |
212 | 212 | ||
213 | /* | 213 | /* |
214 | * proc_devtree.c | ||
215 | */ | ||
216 | #ifdef CONFIG_PROC_DEVICETREE | ||
217 | extern void proc_device_tree_init(void); | ||
218 | #endif | ||
219 | |||
220 | /* | ||
221 | * proc_namespaces.c | 214 | * proc_namespaces.c |
222 | */ | 215 | */ |
223 | extern const struct inode_operations proc_ns_dir_inode_operations; | 216 | extern const struct inode_operations proc_ns_dir_inode_operations; |
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 136e548d9567..7445af0b1aa3 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c | |||
@@ -73,7 +73,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) | |||
73 | available += pagecache; | 73 | available += pagecache; |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * Part of the reclaimable swap consists of items that are in use, | 76 | * Part of the reclaimable slab consists of items that are in use, |
77 | * and cannot be freed. Cap this estimate at the low watermark. | 77 | * and cannot be freed. Cap this estimate at the low watermark. |
78 | */ | 78 | */ |
79 | available += global_page_state(NR_SLAB_RECLAIMABLE) - | 79 | available += global_page_state(NR_SLAB_RECLAIMABLE) - |
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c deleted file mode 100644 index c82dd5147845..000000000000 --- a/fs/proc/proc_devtree.c +++ /dev/null | |||
@@ -1,241 +0,0 @@ | |||
1 | /* | ||
2 | * proc_devtree.c - handles /proc/device-tree | ||
3 | * | ||
4 | * Copyright 1997 Paul Mackerras | ||
5 | */ | ||
6 | #include <linux/errno.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/time.h> | ||
9 | #include <linux/proc_fs.h> | ||
10 | #include <linux/seq_file.h> | ||
11 | #include <linux/printk.h> | ||
12 | #include <linux/stat.h> | ||
13 | #include <linux/string.h> | ||
14 | #include <linux/of.h> | ||
15 | #include <linux/export.h> | ||
16 | #include <linux/slab.h> | ||
17 | #include <asm/uaccess.h> | ||
18 | #include "internal.h" | ||
19 | |||
20 | static inline void set_node_proc_entry(struct device_node *np, | ||
21 | struct proc_dir_entry *de) | ||
22 | { | ||
23 | np->pde = de; | ||
24 | } | ||
25 | |||
26 | static struct proc_dir_entry *proc_device_tree; | ||
27 | |||
28 | /* | ||
29 | * Supply data on a read from /proc/device-tree/node/property. | ||
30 | */ | ||
31 | static int property_proc_show(struct seq_file *m, void *v) | ||
32 | { | ||
33 | struct property *pp = m->private; | ||
34 | |||
35 | seq_write(m, pp->value, pp->length); | ||
36 | return 0; | ||
37 | } | ||
38 | |||
39 | static int property_proc_open(struct inode *inode, struct file *file) | ||
40 | { | ||
41 | return single_open(file, property_proc_show, __PDE_DATA(inode)); | ||
42 | } | ||
43 | |||
44 | static const struct file_operations property_proc_fops = { | ||
45 | .owner = THIS_MODULE, | ||
46 | .open = property_proc_open, | ||
47 | .read = seq_read, | ||
48 | .llseek = seq_lseek, | ||
49 | .release = single_release, | ||
50 | }; | ||
51 | |||
52 | /* | ||
53 | * For a node with a name like "gc@10", we make symlinks called "gc" | ||
54 | * and "@10" to it. | ||
55 | */ | ||
56 | |||
57 | /* | ||
58 | * Add a property to a node | ||
59 | */ | ||
60 | static struct proc_dir_entry * | ||
61 | __proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp, | ||
62 | const char *name) | ||
63 | { | ||
64 | struct proc_dir_entry *ent; | ||
65 | |||
66 | /* | ||
67 | * Unfortunately proc_register puts each new entry | ||
68 | * at the beginning of the list. So we rearrange them. | ||
69 | */ | ||
70 | ent = proc_create_data(name, | ||
71 | strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR, | ||
72 | de, &property_proc_fops, pp); | ||
73 | if (ent == NULL) | ||
74 | return NULL; | ||
75 | |||
76 | if (!strncmp(name, "security-", 9)) | ||
77 | proc_set_size(ent, 0); /* don't leak number of password chars */ | ||
78 | else | ||
79 | proc_set_size(ent, pp->length); | ||
80 | |||
81 | return ent; | ||
82 | } | ||
83 | |||
84 | |||
85 | void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop) | ||
86 | { | ||
87 | __proc_device_tree_add_prop(pde, prop, prop->name); | ||
88 | } | ||
89 | |||
90 | void proc_device_tree_remove_prop(struct proc_dir_entry *pde, | ||
91 | struct property *prop) | ||
92 | { | ||
93 | remove_proc_entry(prop->name, pde); | ||
94 | } | ||
95 | |||
96 | void proc_device_tree_update_prop(struct proc_dir_entry *pde, | ||
97 | struct property *newprop, | ||
98 | struct property *oldprop) | ||
99 | { | ||
100 | struct proc_dir_entry *ent; | ||
101 | |||
102 | if (!oldprop) { | ||
103 | proc_device_tree_add_prop(pde, newprop); | ||
104 | return; | ||
105 | } | ||
106 | |||
107 | for (ent = pde->subdir; ent != NULL; ent = ent->next) | ||
108 | if (ent->data == oldprop) | ||
109 | break; | ||
110 | if (ent == NULL) { | ||
111 | pr_warn("device-tree: property \"%s\" does not exist\n", | ||
112 | oldprop->name); | ||
113 | } else { | ||
114 | ent->data = newprop; | ||
115 | ent->size = newprop->length; | ||
116 | } | ||
117 | } | ||
118 | |||
119 | /* | ||
120 | * Various dodgy firmware might give us nodes and/or properties with | ||
121 | * conflicting names. That's generally ok, except for exporting via /proc, | ||
122 | * so munge names here to ensure they're unique. | ||
123 | */ | ||
124 | |||
125 | static int duplicate_name(struct proc_dir_entry *de, const char *name) | ||
126 | { | ||
127 | struct proc_dir_entry *ent; | ||
128 | int found = 0; | ||
129 | |||
130 | spin_lock(&proc_subdir_lock); | ||
131 | |||
132 | for (ent = de->subdir; ent != NULL; ent = ent->next) { | ||
133 | if (strcmp(ent->name, name) == 0) { | ||
134 | found = 1; | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | spin_unlock(&proc_subdir_lock); | ||
140 | |||
141 | return found; | ||
142 | } | ||
143 | |||
144 | static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de, | ||
145 | const char *name) | ||
146 | { | ||
147 | char *fixed_name; | ||
148 | int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */ | ||
149 | int i = 1, size; | ||
150 | |||
151 | realloc: | ||
152 | fixed_name = kmalloc(fixup_len, GFP_KERNEL); | ||
153 | if (fixed_name == NULL) { | ||
154 | pr_err("device-tree: Out of memory trying to fixup " | ||
155 | "name \"%s\"\n", name); | ||
156 | return name; | ||
157 | } | ||
158 | |||
159 | retry: | ||
160 | size = snprintf(fixed_name, fixup_len, "%s#%d", name, i); | ||
161 | size++; /* account for NULL */ | ||
162 | |||
163 | if (size > fixup_len) { | ||
164 | /* We ran out of space, free and reallocate. */ | ||
165 | kfree(fixed_name); | ||
166 | fixup_len = size; | ||
167 | goto realloc; | ||
168 | } | ||
169 | |||
170 | if (duplicate_name(de, fixed_name)) { | ||
171 | /* Multiple duplicates. Retry with a different offset. */ | ||
172 | i++; | ||
173 | goto retry; | ||
174 | } | ||
175 | |||
176 | pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n", | ||
177 | np->full_name, fixed_name); | ||
178 | |||
179 | return fixed_name; | ||
180 | } | ||
181 | |||
182 | /* | ||
183 | * Process a node, adding entries for its children and its properties. | ||
184 | */ | ||
185 | void proc_device_tree_add_node(struct device_node *np, | ||
186 | struct proc_dir_entry *de) | ||
187 | { | ||
188 | struct property *pp; | ||
189 | struct proc_dir_entry *ent; | ||
190 | struct device_node *child; | ||
191 | const char *p; | ||
192 | |||
193 | set_node_proc_entry(np, de); | ||
194 | for (child = NULL; (child = of_get_next_child(np, child));) { | ||
195 | /* Use everything after the last slash, or the full name */ | ||
196 | p = kbasename(child->full_name); | ||
197 | |||
198 | if (duplicate_name(de, p)) | ||
199 | p = fixup_name(np, de, p); | ||
200 | |||
201 | ent = proc_mkdir(p, de); | ||
202 | if (ent == NULL) | ||
203 | break; | ||
204 | proc_device_tree_add_node(child, ent); | ||
205 | } | ||
206 | of_node_put(child); | ||
207 | |||
208 | for (pp = np->properties; pp != NULL; pp = pp->next) { | ||
209 | p = pp->name; | ||
210 | |||
211 | if (strchr(p, '/')) | ||
212 | continue; | ||
213 | |||
214 | if (duplicate_name(de, p)) | ||
215 | p = fixup_name(np, de, p); | ||
216 | |||
217 | ent = __proc_device_tree_add_prop(de, pp, p); | ||
218 | if (ent == NULL) | ||
219 | break; | ||
220 | } | ||
221 | } | ||
222 | |||
223 | /* | ||
224 | * Called on initialization to set up the /proc/device-tree subtree | ||
225 | */ | ||
226 | void __init proc_device_tree_init(void) | ||
227 | { | ||
228 | struct device_node *root; | ||
229 | |||
230 | proc_device_tree = proc_mkdir("device-tree", NULL); | ||
231 | if (proc_device_tree == NULL) | ||
232 | return; | ||
233 | root = of_find_node_by_path("/"); | ||
234 | if (root == NULL) { | ||
235 | remove_proc_entry("device-tree", NULL); | ||
236 | pr_debug("/proc/device-tree: can't find root\n"); | ||
237 | return; | ||
238 | } | ||
239 | proc_device_tree_add_node(root, proc_device_tree); | ||
240 | of_node_put(root); | ||
241 | } | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 87dbcbef7fe4..5dbadecb234d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
@@ -92,6 +92,8 @@ static int proc_parse_options(char *options, struct pid_namespace *pid) | |||
92 | int proc_remount(struct super_block *sb, int *flags, char *data) | 92 | int proc_remount(struct super_block *sb, int *flags, char *data) |
93 | { | 93 | { |
94 | struct pid_namespace *pid = sb->s_fs_info; | 94 | struct pid_namespace *pid = sb->s_fs_info; |
95 | |||
96 | sync_filesystem(sb); | ||
95 | return !proc_parse_options(data, pid); | 97 | return !proc_parse_options(data, pid); |
96 | } | 98 | } |
97 | 99 | ||
@@ -183,9 +185,6 @@ void __init proc_root_init(void) | |||
183 | proc_mkdir("openprom", NULL); | 185 | proc_mkdir("openprom", NULL); |
184 | #endif | 186 | #endif |
185 | proc_tty_init(); | 187 | proc_tty_init(); |
186 | #ifdef CONFIG_PROC_DEVICETREE | ||
187 | proc_device_tree_init(); | ||
188 | #endif | ||
189 | proc_mkdir("bus", NULL); | 188 | proc_mkdir("bus", NULL); |
190 | proc_sys_init(); | 189 | proc_sys_init(); |
191 | } | 190 | } |
diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6f599c62f0cc..9d231e9e5f0e 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c | |||
@@ -9,7 +9,7 @@ | |||
9 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/time.h> | 10 | #include <linux/time.h> |
11 | #include <linux/irqnr.h> | 11 | #include <linux/irqnr.h> |
12 | #include <asm/cputime.h> | 12 | #include <linux/cputime.h> |
13 | #include <linux/tick.h> | 13 | #include <linux/tick.h> |
14 | 14 | ||
15 | #ifndef arch_irq_stat_cpu | 15 | #ifndef arch_irq_stat_cpu |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fb52b548080d..442177b1119a 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
@@ -1,4 +1,5 @@ | |||
1 | #include <linux/mm.h> | 1 | #include <linux/mm.h> |
2 | #include <linux/vmacache.h> | ||
2 | #include <linux/hugetlb.h> | 3 | #include <linux/hugetlb.h> |
3 | #include <linux/huge_mm.h> | 4 | #include <linux/huge_mm.h> |
4 | #include <linux/mount.h> | 5 | #include <linux/mount.h> |
@@ -152,7 +153,7 @@ static void *m_start(struct seq_file *m, loff_t *pos) | |||
152 | 153 | ||
153 | /* | 154 | /* |
154 | * We remember last_addr rather than next_addr to hit with | 155 | * We remember last_addr rather than next_addr to hit with |
155 | * mmap_cache most of the time. We have zero last_addr at | 156 | * vmacache most of the time. We have zero last_addr at |
156 | * the beginning and also after lseek. We will have -1 last_addr | 157 | * the beginning and also after lseek. We will have -1 last_addr |
157 | * after the end of the vmas. | 158 | * after the end of the vmas. |
158 | */ | 159 | */ |
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c index 7141b8d0ca9e..33de567c25af 100644 --- a/fs/proc/uptime.c +++ b/fs/proc/uptime.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <linux/seq_file.h> | 5 | #include <linux/seq_file.h> |
6 | #include <linux/time.h> | 6 | #include <linux/time.h> |
7 | #include <linux/kernel_stat.h> | 7 | #include <linux/kernel_stat.h> |
8 | #include <asm/cputime.h> | 8 | #include <linux/cputime.h> |
9 | 9 | ||
10 | static int uptime_proc_show(struct seq_file *m, void *v) | 10 | static int uptime_proc_show(struct seq_file *m, void *v) |
11 | { | 11 | { |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 88d4585b30f1..6a8e785b29da 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
@@ -484,7 +484,6 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr) | |||
484 | phdr_ptr->p_memsz = real_sz; | 484 | phdr_ptr->p_memsz = real_sz; |
485 | if (real_sz == 0) { | 485 | if (real_sz == 0) { |
486 | pr_warn("Warning: Zero PT_NOTE entries found\n"); | 486 | pr_warn("Warning: Zero PT_NOTE entries found\n"); |
487 | return -EINVAL; | ||
488 | } | 487 | } |
489 | } | 488 | } |
490 | 489 | ||
@@ -671,7 +670,6 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr) | |||
671 | phdr_ptr->p_memsz = real_sz; | 670 | phdr_ptr->p_memsz = real_sz; |
672 | if (real_sz == 0) { | 671 | if (real_sz == 0) { |
673 | pr_warn("Warning: Zero PT_NOTE entries found\n"); | 672 | pr_warn("Warning: Zero PT_NOTE entries found\n"); |
674 | return -EINVAL; | ||
675 | } | 673 | } |
676 | } | 674 | } |
677 | 675 | ||
@@ -1118,4 +1116,3 @@ void vmcore_cleanup(void) | |||
1118 | } | 1116 | } |
1119 | free_elfcorebuf(); | 1117 | free_elfcorebuf(); |
1120 | } | 1118 | } |
1121 | EXPORT_SYMBOL_GPL(vmcore_cleanup); | ||
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 12823845d324..192297b0090d 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c | |||
@@ -249,6 +249,7 @@ static void parse_options(char *options) | |||
249 | 249 | ||
250 | static int pstore_remount(struct super_block *sb, int *flags, char *data) | 250 | static int pstore_remount(struct super_block *sb, int *flags, char *data) |
251 | { | 251 | { |
252 | sync_filesystem(sb); | ||
252 | parse_options(data); | 253 | parse_options(data); |
253 | 254 | ||
254 | return 0; | 255 | return 0; |
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 78c3c2097787..46d269e38706 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c | |||
@@ -497,6 +497,7 @@ void pstore_get_records(int quiet) | |||
497 | big_oops_buf_sz); | 497 | big_oops_buf_sz); |
498 | 498 | ||
499 | if (unzipped_len > 0) { | 499 | if (unzipped_len > 0) { |
500 | kfree(buf); | ||
500 | buf = big_oops_buf; | 501 | buf = big_oops_buf; |
501 | size = unzipped_len; | 502 | size = unzipped_len; |
502 | compressed = false; | 503 | compressed = false; |
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index fa8cef2cca3a..3b5744306ed8 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c | |||
@@ -86,6 +86,7 @@ struct ramoops_context { | |||
86 | struct persistent_ram_ecc_info ecc_info; | 86 | struct persistent_ram_ecc_info ecc_info; |
87 | unsigned int max_dump_cnt; | 87 | unsigned int max_dump_cnt; |
88 | unsigned int dump_write_cnt; | 88 | unsigned int dump_write_cnt; |
89 | /* _read_cnt need clear on ramoops_pstore_open */ | ||
89 | unsigned int dump_read_cnt; | 90 | unsigned int dump_read_cnt; |
90 | unsigned int console_read_cnt; | 91 | unsigned int console_read_cnt; |
91 | unsigned int ftrace_read_cnt; | 92 | unsigned int ftrace_read_cnt; |
@@ -101,6 +102,7 @@ static int ramoops_pstore_open(struct pstore_info *psi) | |||
101 | 102 | ||
102 | cxt->dump_read_cnt = 0; | 103 | cxt->dump_read_cnt = 0; |
103 | cxt->console_read_cnt = 0; | 104 | cxt->console_read_cnt = 0; |
105 | cxt->ftrace_read_cnt = 0; | ||
104 | return 0; | 106 | return 0; |
105 | } | 107 | } |
106 | 108 | ||
@@ -117,13 +119,15 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max, | |||
117 | return NULL; | 119 | return NULL; |
118 | 120 | ||
119 | prz = przs[i]; | 121 | prz = przs[i]; |
122 | if (!prz) | ||
123 | return NULL; | ||
120 | 124 | ||
121 | if (update) { | 125 | /* Update old/shadowed buffer. */ |
122 | /* Update old/shadowed buffer. */ | 126 | if (update) |
123 | persistent_ram_save_old(prz); | 127 | persistent_ram_save_old(prz); |
124 | if (!persistent_ram_old_size(prz)) | 128 | |
125 | return NULL; | 129 | if (!persistent_ram_old_size(prz)) |
126 | } | 130 | return NULL; |
127 | 131 | ||
128 | *typep = type; | 132 | *typep = type; |
129 | *id = i; | 133 | *id = i; |
@@ -316,6 +320,7 @@ static void ramoops_free_przs(struct ramoops_context *cxt) | |||
316 | { | 320 | { |
317 | int i; | 321 | int i; |
318 | 322 | ||
323 | cxt->max_dump_cnt = 0; | ||
319 | if (!cxt->przs) | 324 | if (!cxt->przs) |
320 | return; | 325 | return; |
321 | 326 | ||
@@ -346,7 +351,7 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt, | |||
346 | GFP_KERNEL); | 351 | GFP_KERNEL); |
347 | if (!cxt->przs) { | 352 | if (!cxt->przs) { |
348 | dev_err(dev, "failed to initialize a prz array for dumps\n"); | 353 | dev_err(dev, "failed to initialize a prz array for dumps\n"); |
349 | return -ENOMEM; | 354 | goto fail_prz; |
350 | } | 355 | } |
351 | 356 | ||
352 | for (i = 0; i < cxt->max_dump_cnt; i++) { | 357 | for (i = 0; i < cxt->max_dump_cnt; i++) { |
@@ -428,7 +433,6 @@ static int ramoops_probe(struct platform_device *pdev) | |||
428 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) | 433 | if (pdata->ftrace_size && !is_power_of_2(pdata->ftrace_size)) |
429 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); | 434 | pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size); |
430 | 435 | ||
431 | cxt->dump_read_cnt = 0; | ||
432 | cxt->size = pdata->mem_size; | 436 | cxt->size = pdata->mem_size; |
433 | cxt->phys_addr = pdata->mem_address; | 437 | cxt->phys_addr = pdata->mem_address; |
434 | cxt->record_size = pdata->record_size; | 438 | cxt->record_size = pdata->record_size; |
@@ -505,7 +509,6 @@ fail_buf: | |||
505 | kfree(cxt->pstore.buf); | 509 | kfree(cxt->pstore.buf); |
506 | fail_clear: | 510 | fail_clear: |
507 | cxt->pstore.bufsize = 0; | 511 | cxt->pstore.bufsize = 0; |
508 | cxt->max_dump_cnt = 0; | ||
509 | fail_cnt: | 512 | fail_cnt: |
510 | kfree(cxt->fprz); | 513 | kfree(cxt->fprz); |
511 | fail_init_fprz: | 514 | fail_init_fprz: |
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c index de272d426763..ff7e3d4df5a1 100644 --- a/fs/pstore/ram_core.c +++ b/fs/pstore/ram_core.c | |||
@@ -54,7 +54,7 @@ static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a) | |||
54 | do { | 54 | do { |
55 | old = atomic_read(&prz->buffer->start); | 55 | old = atomic_read(&prz->buffer->start); |
56 | new = old + a; | 56 | new = old + a; |
57 | while (unlikely(new > prz->buffer_size)) | 57 | while (unlikely(new >= prz->buffer_size)) |
58 | new -= prz->buffer_size; | 58 | new -= prz->buffer_size; |
59 | } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old); | 59 | } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old); |
60 | 60 | ||
@@ -91,7 +91,7 @@ static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a) | |||
91 | 91 | ||
92 | old = atomic_read(&prz->buffer->start); | 92 | old = atomic_read(&prz->buffer->start); |
93 | new = old + a; | 93 | new = old + a; |
94 | while (unlikely(new > prz->buffer_size)) | 94 | while (unlikely(new >= prz->buffer_size)) |
95 | new -= prz->buffer_size; | 95 | new -= prz->buffer_size; |
96 | atomic_set(&prz->buffer->start, new); | 96 | atomic_set(&prz->buffer->start, new); |
97 | 97 | ||
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 89558810381c..c4bcb778886e 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c | |||
@@ -44,6 +44,7 @@ static int qnx4_remount(struct super_block *sb, int *flags, char *data) | |||
44 | { | 44 | { |
45 | struct qnx4_sb_info *qs; | 45 | struct qnx4_sb_info *qs; |
46 | 46 | ||
47 | sync_filesystem(sb); | ||
47 | qs = qnx4_sb(sb); | 48 | qs = qnx4_sb(sb); |
48 | qs->Version = QNX4_VERSION; | 49 | qs->Version = QNX4_VERSION; |
49 | *flags |= MS_RDONLY; | 50 | *flags |= MS_RDONLY; |
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 8d941edfefa1..65cdaab3ed49 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c | |||
@@ -55,6 +55,7 @@ static int qnx6_show_options(struct seq_file *seq, struct dentry *root) | |||
55 | 55 | ||
56 | static int qnx6_remount(struct super_block *sb, int *flags, char *data) | 56 | static int qnx6_remount(struct super_block *sb, int *flags, char *data) |
57 | { | 57 | { |
58 | sync_filesystem(sb); | ||
58 | *flags |= MS_RDONLY; | 59 | *flags |= MS_RDONLY; |
59 | return 0; | 60 | return 0; |
60 | } | 61 | } |
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig index 880fd9884366..c51df1dd237e 100644 --- a/fs/quota/Kconfig +++ b/fs/quota/Kconfig | |||
@@ -8,9 +8,10 @@ config QUOTA | |||
8 | help | 8 | help |
9 | If you say Y here, you will be able to set per user limits for disk | 9 | If you say Y here, you will be able to set per user limits for disk |
10 | usage (also called disk quotas). Currently, it works for the | 10 | usage (also called disk quotas). Currently, it works for the |
11 | ext2, ext3, and reiserfs file system. ext3 also supports journalled | 11 | ext2, ext3, ext4, jfs, ocfs2 and reiserfs file systems. |
12 | quotas for which you don't need to run quotacheck(8) after an unclean | 12 | Note that gfs2 and xfs use their own quota system. |
13 | shutdown. | 13 | Ext3, ext4 and reiserfs also support journaled quotas for which |
14 | you don't need to run quotacheck(8) after an unclean shutdown. | ||
14 | For further details, read the Quota mini-HOWTO, available from | 15 | For further details, read the Quota mini-HOWTO, available from |
15 | <http://www.tldp.org/docs.html#howto>, or the documentation provided | 16 | <http://www.tldp.org/docs.html#howto>, or the documentation provided |
16 | with the quota tools. Probably the quota support is only useful for | 17 | with the quota tools. Probably the quota support is only useful for |
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index cfc8dcc16043..9cd5f63715c0 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c | |||
@@ -528,7 +528,7 @@ restart: | |||
528 | if (atomic_read(&dquot->dq_count)) { | 528 | if (atomic_read(&dquot->dq_count)) { |
529 | DEFINE_WAIT(wait); | 529 | DEFINE_WAIT(wait); |
530 | 530 | ||
531 | atomic_inc(&dquot->dq_count); | 531 | dqgrab(dquot); |
532 | prepare_to_wait(&dquot->dq_wait_unused, &wait, | 532 | prepare_to_wait(&dquot->dq_wait_unused, &wait, |
533 | TASK_UNINTERRUPTIBLE); | 533 | TASK_UNINTERRUPTIBLE); |
534 | spin_unlock(&dq_list_lock); | 534 | spin_unlock(&dq_list_lock); |
@@ -632,7 +632,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type) | |||
632 | /* Now we have active dquot from which someone is | 632 | /* Now we have active dquot from which someone is |
633 | * holding reference so we can safely just increase | 633 | * holding reference so we can safely just increase |
634 | * use count */ | 634 | * use count */ |
635 | atomic_inc(&dquot->dq_count); | 635 | dqgrab(dquot); |
636 | spin_unlock(&dq_list_lock); | 636 | spin_unlock(&dq_list_lock); |
637 | dqstats_inc(DQST_LOOKUPS); | 637 | dqstats_inc(DQST_LOOKUPS); |
638 | err = sb->dq_op->write_dquot(dquot); | 638 | err = sb->dq_op->write_dquot(dquot); |
diff --git a/fs/read_write.c b/fs/read_write.c index 28cc9c810744..31c6efa43183 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
@@ -994,9 +994,9 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, | |||
994 | return ret; | 994 | return ret; |
995 | } | 995 | } |
996 | 996 | ||
997 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | 997 | static long __compat_sys_preadv64(unsigned long fd, |
998 | const struct compat_iovec __user *,vec, | 998 | const struct compat_iovec __user *vec, |
999 | unsigned long, vlen, loff_t, pos) | 999 | unsigned long vlen, loff_t pos) |
1000 | { | 1000 | { |
1001 | struct fd f; | 1001 | struct fd f; |
1002 | ssize_t ret; | 1002 | ssize_t ret; |
@@ -1013,12 +1013,22 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | |||
1013 | return ret; | 1013 | return ret; |
1014 | } | 1014 | } |
1015 | 1015 | ||
1016 | #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 | ||
1017 | COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, | ||
1018 | const struct compat_iovec __user *,vec, | ||
1019 | unsigned long, vlen, loff_t, pos) | ||
1020 | { | ||
1021 | return __compat_sys_preadv64(fd, vec, vlen, pos); | ||
1022 | } | ||
1023 | #endif | ||
1024 | |||
1016 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, | 1025 | COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, |
1017 | const struct compat_iovec __user *,vec, | 1026 | const struct compat_iovec __user *,vec, |
1018 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) | 1027 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1019 | { | 1028 | { |
1020 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1029 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1021 | return compat_sys_preadv64(fd, vec, vlen, pos); | 1030 | |
1031 | return __compat_sys_preadv64(fd, vec, vlen, pos); | ||
1022 | } | 1032 | } |
1023 | 1033 | ||
1024 | static size_t compat_writev(struct file *file, | 1034 | static size_t compat_writev(struct file *file, |
@@ -1061,9 +1071,9 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, | |||
1061 | return ret; | 1071 | return ret; |
1062 | } | 1072 | } |
1063 | 1073 | ||
1064 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | 1074 | static long __compat_sys_pwritev64(unsigned long fd, |
1065 | const struct compat_iovec __user *,vec, | 1075 | const struct compat_iovec __user *vec, |
1066 | unsigned long, vlen, loff_t, pos) | 1076 | unsigned long vlen, loff_t pos) |
1067 | { | 1077 | { |
1068 | struct fd f; | 1078 | struct fd f; |
1069 | ssize_t ret; | 1079 | ssize_t ret; |
@@ -1080,12 +1090,22 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | |||
1080 | return ret; | 1090 | return ret; |
1081 | } | 1091 | } |
1082 | 1092 | ||
1093 | #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 | ||
1094 | COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, | ||
1095 | const struct compat_iovec __user *,vec, | ||
1096 | unsigned long, vlen, loff_t, pos) | ||
1097 | { | ||
1098 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | ||
1099 | } | ||
1100 | #endif | ||
1101 | |||
1083 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, | 1102 | COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, |
1084 | const struct compat_iovec __user *,vec, | 1103 | const struct compat_iovec __user *,vec, |
1085 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) | 1104 | compat_ulong_t, vlen, u32, pos_low, u32, pos_high) |
1086 | { | 1105 | { |
1087 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; | 1106 | loff_t pos = ((loff_t)pos_high << 32) | pos_low; |
1088 | return compat_sys_pwritev64(fd, vec, vlen, pos); | 1107 | |
1108 | return __compat_sys_pwritev64(fd, vec, vlen, pos); | ||
1089 | } | 1109 | } |
1090 | #endif | 1110 | #endif |
1091 | 1111 | ||
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 1fd2051109a3..af677353a3f5 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
@@ -125,6 +125,7 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
125 | int d_reclen; | 125 | int d_reclen; |
126 | char *d_name; | 126 | char *d_name; |
127 | ino_t d_ino; | 127 | ino_t d_ino; |
128 | loff_t cur_pos = deh_offset(deh); | ||
128 | 129 | ||
129 | if (!de_visible(deh)) | 130 | if (!de_visible(deh)) |
130 | /* it is hidden entry */ | 131 | /* it is hidden entry */ |
@@ -196,8 +197,9 @@ int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) | |||
196 | if (local_buf != small_buf) { | 197 | if (local_buf != small_buf) { |
197 | kfree(local_buf); | 198 | kfree(local_buf); |
198 | } | 199 | } |
199 | // next entry should be looked for with such offset | 200 | |
200 | next_pos = deh_offset(deh) + 1; | 201 | /* deh_offset(deh) may be invalid now. */ |
202 | next_pos = cur_pos + 1; | ||
201 | 203 | ||
202 | if (item_moved(&tmp_ih, &path_to_entry)) { | 204 | if (item_moved(&tmp_ih, &path_to_entry)) { |
203 | set_cpu_key_k_offset(&pos_key, | 205 | set_cpu_key_k_offset(&pos_key, |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ad62bdbb451e..bc8b8009897d 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
@@ -35,7 +35,7 @@ void reiserfs_evict_inode(struct inode *inode) | |||
35 | if (!inode->i_nlink && !is_bad_inode(inode)) | 35 | if (!inode->i_nlink && !is_bad_inode(inode)) |
36 | dquot_initialize(inode); | 36 | dquot_initialize(inode); |
37 | 37 | ||
38 | truncate_inode_pages(&inode->i_data, 0); | 38 | truncate_inode_pages_final(&inode->i_data); |
39 | if (inode->i_nlink) | 39 | if (inode->i_nlink) |
40 | goto no_delete; | 40 | goto no_delete; |
41 | 41 | ||
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 8d06adf89948..83d4eac8059a 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
@@ -2831,6 +2831,7 @@ void reiserfs_init_alloc_options(struct super_block *s); | |||
2831 | */ | 2831 | */ |
2832 | __le32 reiserfs_choose_packing(struct inode *dir); | 2832 | __le32 reiserfs_choose_packing(struct inode *dir); |
2833 | 2833 | ||
2834 | void show_alloc_options(struct seq_file *seq, struct super_block *s); | ||
2834 | int reiserfs_init_bitmap_cache(struct super_block *sb); | 2835 | int reiserfs_init_bitmap_cache(struct super_block *sb); |
2835 | void reiserfs_free_bitmap_cache(struct super_block *sb); | 2836 | void reiserfs_free_bitmap_cache(struct super_block *sb); |
2836 | void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); | 2837 | void reiserfs_cache_bitmap_metadata(struct super_block *sb, struct buffer_head *bh, struct reiserfs_bitmap_info *info); |
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index 2c803353f8ac..9fb20426005e 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c | |||
@@ -62,7 +62,6 @@ static int is_any_reiserfs_magic_string(struct reiserfs_super_block *rs) | |||
62 | 62 | ||
63 | static int reiserfs_remount(struct super_block *s, int *flags, char *data); | 63 | static int reiserfs_remount(struct super_block *s, int *flags, char *data); |
64 | static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); | 64 | static int reiserfs_statfs(struct dentry *dentry, struct kstatfs *buf); |
65 | void show_alloc_options(struct seq_file *seq, struct super_block *s); | ||
66 | 65 | ||
67 | static int reiserfs_sync_fs(struct super_block *s, int wait) | 66 | static int reiserfs_sync_fs(struct super_block *s, int wait) |
68 | { | 67 | { |
@@ -597,7 +596,7 @@ static void init_once(void *foo) | |||
597 | inode_init_once(&ei->vfs_inode); | 596 | inode_init_once(&ei->vfs_inode); |
598 | } | 597 | } |
599 | 598 | ||
600 | static int init_inodecache(void) | 599 | static int __init init_inodecache(void) |
601 | { | 600 | { |
602 | reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", | 601 | reiserfs_inode_cachep = kmem_cache_create("reiser_inode_cache", |
603 | sizeof(struct | 602 | sizeof(struct |
@@ -1319,6 +1318,7 @@ static int reiserfs_remount(struct super_block *s, int *mount_flags, char *arg) | |||
1319 | int i; | 1318 | int i; |
1320 | #endif | 1319 | #endif |
1321 | 1320 | ||
1321 | sync_filesystem(s); | ||
1322 | reiserfs_write_lock(s); | 1322 | reiserfs_write_lock(s); |
1323 | 1323 | ||
1324 | #ifdef CONFIG_QUOTA | 1324 | #ifdef CONFIG_QUOTA |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index d8418782862b..ef90e8bca95a 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
@@ -432,6 +432,7 @@ static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
432 | */ | 432 | */ |
433 | static int romfs_remount(struct super_block *sb, int *flags, char *data) | 433 | static int romfs_remount(struct super_block *sb, int *flags, char *data) |
434 | { | 434 | { |
435 | sync_filesystem(sb); | ||
435 | *flags |= MS_RDONLY; | 436 | *flags |= MS_RDONLY; |
436 | return 0; | 437 | return 0; |
437 | } | 438 | } |
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 202df6312d4e..031c8d67fd51 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c | |||
@@ -371,6 +371,7 @@ static int squashfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
371 | 371 | ||
372 | static int squashfs_remount(struct super_block *sb, int *flags, char *data) | 372 | static int squashfs_remount(struct super_block *sb, int *flags, char *data) |
373 | { | 373 | { |
374 | sync_filesystem(sb); | ||
374 | *flags |= MS_RDONLY; | 375 | *flags |= MS_RDONLY; |
375 | return 0; | 376 | return 0; |
376 | } | 377 | } |
diff --git a/fs/super.c b/fs/super.c index 80d5cf2ca765..e9dc3c3fe159 100644 --- a/fs/super.c +++ b/fs/super.c | |||
@@ -719,8 +719,6 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force) | |||
719 | } | 719 | } |
720 | } | 720 | } |
721 | 721 | ||
722 | sync_filesystem(sb); | ||
723 | |||
724 | if (sb->s_op->remount_fs) { | 722 | if (sb->s_op->remount_fs) { |
725 | retval = sb->s_op->remount_fs(sb, &flags, data); | 723 | retval = sb->s_op->remount_fs(sb, &flags, data); |
726 | if (retval) { | 724 | if (retval) { |
diff --git a/fs/sysfs/Kconfig b/fs/sysfs/Kconfig index 8c41feacbac5..b2756014508c 100644 --- a/fs/sysfs/Kconfig +++ b/fs/sysfs/Kconfig | |||
@@ -1,6 +1,7 @@ | |||
1 | config SYSFS | 1 | config SYSFS |
2 | bool "sysfs file system support" if EXPERT | 2 | bool "sysfs file system support" if EXPERT |
3 | default y | 3 | default y |
4 | select KERNFS | ||
4 | help | 5 | help |
5 | The sysfs filesystem is a virtual filesystem that the kernel uses to | 6 | The sysfs filesystem is a virtual filesystem that the kernel uses to |
6 | export internal kernel objects, their attributes, and their | 7 | export internal kernel objects, their attributes, and their |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index ee0d761c3179..0b45ff42f374 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
@@ -19,39 +19,18 @@ | |||
19 | 19 | ||
20 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); | 20 | DEFINE_SPINLOCK(sysfs_symlink_target_lock); |
21 | 21 | ||
22 | /** | ||
23 | * sysfs_pathname - return full path to sysfs dirent | ||
24 | * @kn: kernfs_node whose path we want | ||
25 | * @path: caller allocated buffer of size PATH_MAX | ||
26 | * | ||
27 | * Gives the name "/" to the sysfs_root entry; any path returned | ||
28 | * is relative to wherever sysfs is mounted. | ||
29 | */ | ||
30 | static char *sysfs_pathname(struct kernfs_node *kn, char *path) | ||
31 | { | ||
32 | if (kn->parent) { | ||
33 | sysfs_pathname(kn->parent, path); | ||
34 | strlcat(path, "/", PATH_MAX); | ||
35 | } | ||
36 | strlcat(path, kn->name, PATH_MAX); | ||
37 | return path; | ||
38 | } | ||
39 | |||
40 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name) | 22 | void sysfs_warn_dup(struct kernfs_node *parent, const char *name) |
41 | { | 23 | { |
42 | char *path; | 24 | char *buf, *path = NULL; |
43 | 25 | ||
44 | path = kzalloc(PATH_MAX, GFP_KERNEL); | 26 | buf = kzalloc(PATH_MAX, GFP_KERNEL); |
45 | if (path) { | 27 | if (buf) |
46 | sysfs_pathname(parent, path); | 28 | path = kernfs_path(parent, buf, PATH_MAX); |
47 | strlcat(path, "/", PATH_MAX); | ||
48 | strlcat(path, name, PATH_MAX); | ||
49 | } | ||
50 | 29 | ||
51 | WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", | 30 | WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s/%s'\n", |
52 | path ? path : name); | 31 | path, name); |
53 | 32 | ||
54 | kfree(path); | 33 | kfree(buf); |
55 | } | 34 | } |
56 | 35 | ||
57 | /** | 36 | /** |
@@ -122,9 +101,13 @@ void sysfs_remove_dir(struct kobject *kobj) | |||
122 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, | 101 | int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, |
123 | const void *new_ns) | 102 | const void *new_ns) |
124 | { | 103 | { |
125 | struct kernfs_node *parent = kobj->sd->parent; | 104 | struct kernfs_node *parent; |
105 | int ret; | ||
126 | 106 | ||
127 | return kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); | 107 | parent = kernfs_get_parent(kobj->sd); |
108 | ret = kernfs_rename_ns(kobj->sd, parent, new_name, new_ns); | ||
109 | kernfs_put(parent); | ||
110 | return ret; | ||
128 | } | 111 | } |
129 | 112 | ||
130 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, | 113 | int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, |
@@ -133,7 +116,6 @@ int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, | |||
133 | struct kernfs_node *kn = kobj->sd; | 116 | struct kernfs_node *kn = kobj->sd; |
134 | struct kernfs_node *new_parent; | 117 | struct kernfs_node *new_parent; |
135 | 118 | ||
136 | BUG_ON(!kn->parent); | ||
137 | new_parent = new_parent_kobj && new_parent_kobj->sd ? | 119 | new_parent = new_parent_kobj && new_parent_kobj->sd ? |
138 | new_parent_kobj->sd : sysfs_root_kn; | 120 | new_parent_kobj->sd : sysfs_root_kn; |
139 | 121 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 810cf6e613e5..1b8b91b67fdb 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
@@ -372,6 +372,29 @@ void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, | |||
372 | } | 372 | } |
373 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); | 373 | EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); |
374 | 374 | ||
375 | /** | ||
376 | * sysfs_remove_file_self - remove an object attribute from its own method | ||
377 | * @kobj: object we're acting for | ||
378 | * @attr: attribute descriptor | ||
379 | * | ||
380 | * See kernfs_remove_self() for details. | ||
381 | */ | ||
382 | bool sysfs_remove_file_self(struct kobject *kobj, const struct attribute *attr) | ||
383 | { | ||
384 | struct kernfs_node *parent = kobj->sd; | ||
385 | struct kernfs_node *kn; | ||
386 | bool ret; | ||
387 | |||
388 | kn = kernfs_find_and_get(parent, attr->name); | ||
389 | if (WARN_ON_ONCE(!kn)) | ||
390 | return false; | ||
391 | |||
392 | ret = kernfs_remove_self(kn); | ||
393 | |||
394 | kernfs_put(kn); | ||
395 | return ret; | ||
396 | } | ||
397 | |||
375 | void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) | 398 | void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) |
376 | { | 399 | { |
377 | int i; | 400 | int i; |
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 6b579387c67a..aa0406895b53 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c | |||
@@ -70,8 +70,11 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, | |||
70 | if (grp->bin_attrs) { | 70 | if (grp->bin_attrs) { |
71 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { | 71 | for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) { |
72 | if (update) | 72 | if (update) |
73 | sysfs_remove_bin_file(kobj, *bin_attr); | 73 | kernfs_remove_by_name(parent, |
74 | error = sysfs_create_bin_file(kobj, *bin_attr); | 74 | (*bin_attr)->attr.name); |
75 | error = sysfs_add_file_mode_ns(parent, | ||
76 | &(*bin_attr)->attr, true, | ||
77 | (*bin_attr)->attr.mode, NULL); | ||
75 | if (error) | 78 | if (error) |
76 | break; | 79 | break; |
77 | } | 80 | } |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 3eaf5c6622eb..a66ad6196f59 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
@@ -63,7 +63,7 @@ int __init sysfs_init(void) | |||
63 | { | 63 | { |
64 | int err; | 64 | int err; |
65 | 65 | ||
66 | sysfs_root = kernfs_create_root(NULL, NULL); | 66 | sysfs_root = kernfs_create_root(NULL, 0, NULL); |
67 | if (IS_ERR(sysfs_root)) | 67 | if (IS_ERR(sysfs_root)) |
68 | return PTR_ERR(sysfs_root); | 68 | return PTR_ERR(sysfs_root); |
69 | 69 | ||
diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index c327d4ee1235..88956309cc86 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c | |||
@@ -60,6 +60,7 @@ static int sysv_remount(struct super_block *sb, int *flags, char *data) | |||
60 | { | 60 | { |
61 | struct sysv_sb_info *sbi = SYSV_SB(sb); | 61 | struct sysv_sb_info *sbi = SYSV_SB(sb); |
62 | 62 | ||
63 | sync_filesystem(sb); | ||
63 | if (sbi->s_forced_ro) | 64 | if (sbi->s_forced_ro) |
64 | *flags |= MS_RDONLY; | 65 | *flags |= MS_RDONLY; |
65 | return 0; | 66 | return 0; |
@@ -295,7 +296,7 @@ int sysv_sync_inode(struct inode *inode) | |||
295 | 296 | ||
296 | static void sysv_evict_inode(struct inode *inode) | 297 | static void sysv_evict_inode(struct inode *inode) |
297 | { | 298 | { |
298 | truncate_inode_pages(&inode->i_data, 0); | 299 | truncate_inode_pages_final(&inode->i_data); |
299 | if (!inode->i_nlink) { | 300 | if (!inode->i_nlink) { |
300 | inode->i_size = 0; | 301 | inode->i_size = 0; |
301 | sysv_truncate(inode); | 302 | sysv_truncate(inode); |
diff --git a/fs/timerfd.c b/fs/timerfd.c index 929312180dd0..0013142c0475 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -317,6 +317,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) | |||
317 | (clockid != CLOCK_MONOTONIC && | 317 | (clockid != CLOCK_MONOTONIC && |
318 | clockid != CLOCK_REALTIME && | 318 | clockid != CLOCK_REALTIME && |
319 | clockid != CLOCK_REALTIME_ALARM && | 319 | clockid != CLOCK_REALTIME_ALARM && |
320 | clockid != CLOCK_BOOTTIME && | ||
320 | clockid != CLOCK_BOOTTIME_ALARM)) | 321 | clockid != CLOCK_BOOTTIME_ALARM)) |
321 | return -EINVAL; | 322 | return -EINVAL; |
322 | 323 | ||
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 123c79b7261e..4f34dbae823d 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
@@ -1538,6 +1538,7 @@ out_unlock: | |||
1538 | 1538 | ||
1539 | static const struct vm_operations_struct ubifs_file_vm_ops = { | 1539 | static const struct vm_operations_struct ubifs_file_vm_ops = { |
1540 | .fault = filemap_fault, | 1540 | .fault = filemap_fault, |
1541 | .map_pages = filemap_map_pages, | ||
1541 | .page_mkwrite = ubifs_vm_page_mkwrite, | 1542 | .page_mkwrite = ubifs_vm_page_mkwrite, |
1542 | .remap_pages = generic_file_remap_pages, | 1543 | .remap_pages = generic_file_remap_pages, |
1543 | }; | 1544 | }; |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 5ded8490c0c6..a1266089eca1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
@@ -351,7 +351,7 @@ static void ubifs_evict_inode(struct inode *inode) | |||
351 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); | 351 | dbg_gen("inode %lu, mode %#x", inode->i_ino, (int)inode->i_mode); |
352 | ubifs_assert(!atomic_read(&inode->i_count)); | 352 | ubifs_assert(!atomic_read(&inode->i_count)); |
353 | 353 | ||
354 | truncate_inode_pages(&inode->i_data, 0); | 354 | truncate_inode_pages_final(&inode->i_data); |
355 | 355 | ||
356 | if (inode->i_nlink) | 356 | if (inode->i_nlink) |
357 | goto done; | 357 | goto done; |
@@ -1827,6 +1827,7 @@ static int ubifs_remount_fs(struct super_block *sb, int *flags, char *data) | |||
1827 | int err; | 1827 | int err; |
1828 | struct ubifs_info *c = sb->s_fs_info; | 1828 | struct ubifs_info *c = sb->s_fs_info; |
1829 | 1829 | ||
1830 | sync_filesystem(sb); | ||
1830 | dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); | 1831 | dbg_gen("old flags %#lx, new flags %#x", sb->s_flags, *flags); |
1831 | 1832 | ||
1832 | err = ubifs_parse_options(c, data, 1); | 1833 | err = ubifs_parse_options(c, data, 1); |
diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 982ce05c87ed..5d643706212f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c | |||
@@ -146,8 +146,8 @@ void udf_evict_inode(struct inode *inode) | |||
146 | want_delete = 1; | 146 | want_delete = 1; |
147 | udf_setsize(inode, 0); | 147 | udf_setsize(inode, 0); |
148 | udf_update_inode(inode, IS_SYNC(inode)); | 148 | udf_update_inode(inode, IS_SYNC(inode)); |
149 | } else | 149 | } |
150 | truncate_inode_pages(&inode->i_data, 0); | 150 | truncate_inode_pages_final(&inode->i_data); |
151 | invalidate_inode_buffers(inode); | 151 | invalidate_inode_buffers(inode); |
152 | clear_inode(inode); | 152 | clear_inode(inode); |
153 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && | 153 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 3306b9f69bed..3286db047a40 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
@@ -175,7 +175,7 @@ static void init_once(void *foo) | |||
175 | inode_init_once(&ei->vfs_inode); | 175 | inode_init_once(&ei->vfs_inode); |
176 | } | 176 | } |
177 | 177 | ||
178 | static int init_inodecache(void) | 178 | static int __init init_inodecache(void) |
179 | { | 179 | { |
180 | udf_inode_cachep = kmem_cache_create("udf_inode_cache", | 180 | udf_inode_cachep = kmem_cache_create("udf_inode_cache", |
181 | sizeof(struct udf_inode_info), | 181 | sizeof(struct udf_inode_info), |
@@ -505,6 +505,7 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
505 | while ((p = strsep(&options, ",")) != NULL) { | 505 | while ((p = strsep(&options, ",")) != NULL) { |
506 | substring_t args[MAX_OPT_ARGS]; | 506 | substring_t args[MAX_OPT_ARGS]; |
507 | int token; | 507 | int token; |
508 | unsigned n; | ||
508 | if (!*p) | 509 | if (!*p) |
509 | continue; | 510 | continue; |
510 | 511 | ||
@@ -516,7 +517,10 @@ static int udf_parse_options(char *options, struct udf_options *uopt, | |||
516 | case Opt_bs: | 517 | case Opt_bs: |
517 | if (match_int(&args[0], &option)) | 518 | if (match_int(&args[0], &option)) |
518 | return 0; | 519 | return 0; |
519 | uopt->blocksize = option; | 520 | n = option; |
521 | if (n != 512 && n != 1024 && n != 2048 && n != 4096) | ||
522 | return 0; | ||
523 | uopt->blocksize = n; | ||
520 | uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); | 524 | uopt->flags |= (1 << UDF_FLAG_BLOCKSIZE_SET); |
521 | break; | 525 | break; |
522 | case Opt_unhide: | 526 | case Opt_unhide: |
@@ -646,6 +650,7 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) | |||
646 | int error = 0; | 650 | int error = 0; |
647 | struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); | 651 | struct logicalVolIntegrityDescImpUse *lvidiu = udf_sb_lvidiu(sb); |
648 | 652 | ||
653 | sync_filesystem(sb); | ||
649 | if (lvidiu) { | 654 | if (lvidiu) { |
650 | int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); | 655 | int write_rev = le16_to_cpu(lvidiu->minUDFWriteRev); |
651 | if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) | 656 | if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) |
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index a7ea492ae660..0ab1de4b39a5 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c | |||
@@ -38,7 +38,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
38 | { | 38 | { |
39 | struct super_block * sb; | 39 | struct super_block * sb; |
40 | struct ufs_sb_private_info * uspi; | 40 | struct ufs_sb_private_info * uspi; |
41 | struct ufs_super_block_first * usb1; | ||
42 | struct ufs_cg_private_info * ucpi; | 41 | struct ufs_cg_private_info * ucpi; |
43 | struct ufs_cylinder_group * ucg; | 42 | struct ufs_cylinder_group * ucg; |
44 | unsigned cgno, bit, end_bit, bbase, blkmap, i; | 43 | unsigned cgno, bit, end_bit, bbase, blkmap, i; |
@@ -46,7 +45,6 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) | |||
46 | 45 | ||
47 | sb = inode->i_sb; | 46 | sb = inode->i_sb; |
48 | uspi = UFS_SB(sb)->s_uspi; | 47 | uspi = UFS_SB(sb)->s_uspi; |
49 | usb1 = ubh_get_usb_first(uspi); | ||
50 | 48 | ||
51 | UFSD("ENTER, fragment %llu, count %u\n", | 49 | UFSD("ENTER, fragment %llu, count %u\n", |
52 | (unsigned long long)fragment, count); | 50 | (unsigned long long)fragment, count); |
@@ -135,7 +133,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) | |||
135 | { | 133 | { |
136 | struct super_block * sb; | 134 | struct super_block * sb; |
137 | struct ufs_sb_private_info * uspi; | 135 | struct ufs_sb_private_info * uspi; |
138 | struct ufs_super_block_first * usb1; | ||
139 | struct ufs_cg_private_info * ucpi; | 136 | struct ufs_cg_private_info * ucpi; |
140 | struct ufs_cylinder_group * ucg; | 137 | struct ufs_cylinder_group * ucg; |
141 | unsigned overflow, cgno, bit, end_bit, i; | 138 | unsigned overflow, cgno, bit, end_bit, i; |
@@ -143,7 +140,6 @@ void ufs_free_blocks(struct inode *inode, u64 fragment, unsigned count) | |||
143 | 140 | ||
144 | sb = inode->i_sb; | 141 | sb = inode->i_sb; |
145 | uspi = UFS_SB(sb)->s_uspi; | 142 | uspi = UFS_SB(sb)->s_uspi; |
146 | usb1 = ubh_get_usb_first(uspi); | ||
147 | 143 | ||
148 | UFSD("ENTER, fragment %llu, count %u\n", | 144 | UFSD("ENTER, fragment %llu, count %u\n", |
149 | (unsigned long long)fragment, count); | 145 | (unsigned long long)fragment, count); |
@@ -499,7 +495,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
499 | { | 495 | { |
500 | struct super_block * sb; | 496 | struct super_block * sb; |
501 | struct ufs_sb_private_info * uspi; | 497 | struct ufs_sb_private_info * uspi; |
502 | struct ufs_super_block_first * usb1; | ||
503 | struct ufs_cg_private_info * ucpi; | 498 | struct ufs_cg_private_info * ucpi; |
504 | struct ufs_cylinder_group * ucg; | 499 | struct ufs_cylinder_group * ucg; |
505 | unsigned cgno, fragno, fragoff, count, fragsize, i; | 500 | unsigned cgno, fragno, fragoff, count, fragsize, i; |
@@ -509,7 +504,6 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, | |||
509 | 504 | ||
510 | sb = inode->i_sb; | 505 | sb = inode->i_sb; |
511 | uspi = UFS_SB(sb)->s_uspi; | 506 | uspi = UFS_SB(sb)->s_uspi; |
512 | usb1 = ubh_get_usb_first (uspi); | ||
513 | count = newcount - oldcount; | 507 | count = newcount - oldcount; |
514 | 508 | ||
515 | cgno = ufs_dtog(uspi, fragment); | 509 | cgno = ufs_dtog(uspi, fragment); |
@@ -577,7 +571,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, | |||
577 | { | 571 | { |
578 | struct super_block * sb; | 572 | struct super_block * sb; |
579 | struct ufs_sb_private_info * uspi; | 573 | struct ufs_sb_private_info * uspi; |
580 | struct ufs_super_block_first * usb1; | ||
581 | struct ufs_cg_private_info * ucpi; | 574 | struct ufs_cg_private_info * ucpi; |
582 | struct ufs_cylinder_group * ucg; | 575 | struct ufs_cylinder_group * ucg; |
583 | unsigned oldcg, i, j, k, allocsize; | 576 | unsigned oldcg, i, j, k, allocsize; |
@@ -588,7 +581,6 @@ static u64 ufs_alloc_fragments(struct inode *inode, unsigned cgno, | |||
588 | 581 | ||
589 | sb = inode->i_sb; | 582 | sb = inode->i_sb; |
590 | uspi = UFS_SB(sb)->s_uspi; | 583 | uspi = UFS_SB(sb)->s_uspi; |
591 | usb1 = ubh_get_usb_first(uspi); | ||
592 | oldcg = cgno; | 584 | oldcg = cgno; |
593 | 585 | ||
594 | /* | 586 | /* |
@@ -690,7 +682,6 @@ static u64 ufs_alloccg_block(struct inode *inode, | |||
690 | { | 682 | { |
691 | struct super_block * sb; | 683 | struct super_block * sb; |
692 | struct ufs_sb_private_info * uspi; | 684 | struct ufs_sb_private_info * uspi; |
693 | struct ufs_super_block_first * usb1; | ||
694 | struct ufs_cylinder_group * ucg; | 685 | struct ufs_cylinder_group * ucg; |
695 | u64 result, blkno; | 686 | u64 result, blkno; |
696 | 687 | ||
@@ -698,7 +689,6 @@ static u64 ufs_alloccg_block(struct inode *inode, | |||
698 | 689 | ||
699 | sb = inode->i_sb; | 690 | sb = inode->i_sb; |
700 | uspi = UFS_SB(sb)->s_uspi; | 691 | uspi = UFS_SB(sb)->s_uspi; |
701 | usb1 = ubh_get_usb_first(uspi); | ||
702 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); | 692 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); |
703 | 693 | ||
704 | if (goal == 0) { | 694 | if (goal == 0) { |
@@ -794,7 +784,6 @@ static u64 ufs_bitmap_search(struct super_block *sb, | |||
794 | 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe | 784 | 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe |
795 | }; | 785 | }; |
796 | struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; | 786 | struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi; |
797 | struct ufs_super_block_first *usb1; | ||
798 | struct ufs_cylinder_group *ucg; | 787 | struct ufs_cylinder_group *ucg; |
799 | unsigned start, length, loc; | 788 | unsigned start, length, loc; |
800 | unsigned pos, want, blockmap, mask, end; | 789 | unsigned pos, want, blockmap, mask, end; |
@@ -803,7 +792,6 @@ static u64 ufs_bitmap_search(struct super_block *sb, | |||
803 | UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx, | 792 | UFSD("ENTER, cg %u, goal %llu, count %u\n", ucpi->c_cgx, |
804 | (unsigned long long)goal, count); | 793 | (unsigned long long)goal, count); |
805 | 794 | ||
806 | usb1 = ubh_get_usb_first (uspi); | ||
807 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); | 795 | ucg = ubh_get_ucg(UCPI_UBH(ucpi)); |
808 | 796 | ||
809 | if (goal) | 797 | if (goal) |
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index d0426d74817b..98f7211599ff 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c | |||
@@ -57,7 +57,6 @@ void ufs_free_inode (struct inode * inode) | |||
57 | { | 57 | { |
58 | struct super_block * sb; | 58 | struct super_block * sb; |
59 | struct ufs_sb_private_info * uspi; | 59 | struct ufs_sb_private_info * uspi; |
60 | struct ufs_super_block_first * usb1; | ||
61 | struct ufs_cg_private_info * ucpi; | 60 | struct ufs_cg_private_info * ucpi; |
62 | struct ufs_cylinder_group * ucg; | 61 | struct ufs_cylinder_group * ucg; |
63 | int is_directory; | 62 | int is_directory; |
@@ -67,7 +66,6 @@ void ufs_free_inode (struct inode * inode) | |||
67 | 66 | ||
68 | sb = inode->i_sb; | 67 | sb = inode->i_sb; |
69 | uspi = UFS_SB(sb)->s_uspi; | 68 | uspi = UFS_SB(sb)->s_uspi; |
70 | usb1 = ubh_get_usb_first(uspi); | ||
71 | 69 | ||
72 | ino = inode->i_ino; | 70 | ino = inode->i_ino; |
73 | 71 | ||
@@ -175,7 +173,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) | |||
175 | struct super_block * sb; | 173 | struct super_block * sb; |
176 | struct ufs_sb_info * sbi; | 174 | struct ufs_sb_info * sbi; |
177 | struct ufs_sb_private_info * uspi; | 175 | struct ufs_sb_private_info * uspi; |
178 | struct ufs_super_block_first * usb1; | ||
179 | struct ufs_cg_private_info * ucpi; | 176 | struct ufs_cg_private_info * ucpi; |
180 | struct ufs_cylinder_group * ucg; | 177 | struct ufs_cylinder_group * ucg; |
181 | struct inode * inode; | 178 | struct inode * inode; |
@@ -195,7 +192,6 @@ struct inode *ufs_new_inode(struct inode *dir, umode_t mode) | |||
195 | ufsi = UFS_I(inode); | 192 | ufsi = UFS_I(inode); |
196 | sbi = UFS_SB(sb); | 193 | sbi = UFS_SB(sb); |
197 | uspi = sbi->s_uspi; | 194 | uspi = sbi->s_uspi; |
198 | usb1 = ubh_get_usb_first(uspi); | ||
199 | 195 | ||
200 | mutex_lock(&sbi->s_lock); | 196 | mutex_lock(&sbi->s_lock); |
201 | 197 | ||
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index c8ca96086784..61e8a9b021dd 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c | |||
@@ -885,7 +885,7 @@ void ufs_evict_inode(struct inode * inode) | |||
885 | if (!inode->i_nlink && !is_bad_inode(inode)) | 885 | if (!inode->i_nlink && !is_bad_inode(inode)) |
886 | want_delete = 1; | 886 | want_delete = 1; |
887 | 887 | ||
888 | truncate_inode_pages(&inode->i_data, 0); | 888 | truncate_inode_pages_final(&inode->i_data); |
889 | if (want_delete) { | 889 | if (want_delete) { |
890 | loff_t old_i_size; | 890 | loff_t old_i_size; |
891 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ | 891 | /*UFS_I(inode)->i_dtime = CURRENT_TIME;*/ |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 329f2f53b7ed..c1183f9f69dc 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
@@ -524,11 +524,9 @@ static int ufs_read_cylinder_structures(struct super_block *sb) | |||
524 | struct ufs_buffer_head * ubh; | 524 | struct ufs_buffer_head * ubh; |
525 | unsigned char * base, * space; | 525 | unsigned char * base, * space; |
526 | unsigned size, blks, i; | 526 | unsigned size, blks, i; |
527 | struct ufs_super_block_third *usb3; | ||
528 | 527 | ||
529 | UFSD("ENTER\n"); | 528 | UFSD("ENTER\n"); |
530 | 529 | ||
531 | usb3 = ubh_get_usb_third(uspi); | ||
532 | /* | 530 | /* |
533 | * Read cs structures from (usually) first data block | 531 | * Read cs structures from (usually) first data block |
534 | * on the device. | 532 | * on the device. |
@@ -1280,6 +1278,7 @@ static int ufs_remount (struct super_block *sb, int *mount_flags, char *data) | |||
1280 | unsigned new_mount_opt, ufstype; | 1278 | unsigned new_mount_opt, ufstype; |
1281 | unsigned flags; | 1279 | unsigned flags; |
1282 | 1280 | ||
1281 | sync_filesystem(sb); | ||
1283 | lock_ufs(sb); | 1282 | lock_ufs(sb); |
1284 | mutex_lock(&UFS_SB(sb)->s_lock); | 1283 | mutex_lock(&UFS_SB(sb)->s_lock); |
1285 | uspi = UFS_SB(sb)->s_uspi; | 1284 | uspi = UFS_SB(sb)->s_uspi; |
@@ -1389,15 +1388,11 @@ static int ufs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
1389 | struct super_block *sb = dentry->d_sb; | 1388 | struct super_block *sb = dentry->d_sb; |
1390 | struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; | 1389 | struct ufs_sb_private_info *uspi= UFS_SB(sb)->s_uspi; |
1391 | unsigned flags = UFS_SB(sb)->s_flags; | 1390 | unsigned flags = UFS_SB(sb)->s_flags; |
1392 | struct ufs_super_block_first *usb1; | ||
1393 | struct ufs_super_block_second *usb2; | ||
1394 | struct ufs_super_block_third *usb3; | 1391 | struct ufs_super_block_third *usb3; |
1395 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); | 1392 | u64 id = huge_encode_dev(sb->s_bdev->bd_dev); |
1396 | 1393 | ||
1397 | lock_ufs(sb); | 1394 | lock_ufs(sb); |
1398 | 1395 | ||
1399 | usb1 = ubh_get_usb_first(uspi); | ||
1400 | usb2 = ubh_get_usb_second(uspi); | ||
1401 | usb3 = ubh_get_usb_third(uspi); | 1396 | usb3 = ubh_get_usb_third(uspi); |
1402 | 1397 | ||
1403 | if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { | 1398 | if ((flags & UFS_TYPE_MASK) == UFS_TYPE_UFS2) { |
@@ -1453,7 +1448,7 @@ static void init_once(void *foo) | |||
1453 | inode_init_once(&ei->vfs_inode); | 1448 | inode_init_once(&ei->vfs_inode); |
1454 | } | 1449 | } |
1455 | 1450 | ||
1456 | static int init_inodecache(void) | 1451 | static int __init init_inodecache(void) |
1457 | { | 1452 | { |
1458 | ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", | 1453 | ufs_inode_cachep = kmem_cache_create("ufs_inode_cache", |
1459 | sizeof(struct ufs_inode_info), | 1454 | sizeof(struct ufs_inode_info), |
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 66a36befc5c0..844e288b9576 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c | |||
@@ -65,12 +65,31 @@ kmem_alloc(size_t size, xfs_km_flags_t flags) | |||
65 | void * | 65 | void * |
66 | kmem_zalloc_large(size_t size, xfs_km_flags_t flags) | 66 | kmem_zalloc_large(size_t size, xfs_km_flags_t flags) |
67 | { | 67 | { |
68 | unsigned noio_flag = 0; | ||
68 | void *ptr; | 69 | void *ptr; |
70 | gfp_t lflags; | ||
69 | 71 | ||
70 | ptr = kmem_zalloc(size, flags | KM_MAYFAIL); | 72 | ptr = kmem_zalloc(size, flags | KM_MAYFAIL); |
71 | if (ptr) | 73 | if (ptr) |
72 | return ptr; | 74 | return ptr; |
73 | return vzalloc(size); | 75 | |
76 | /* | ||
77 | * __vmalloc() will allocate data pages and auxillary structures (e.g. | ||
78 | * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context | ||
79 | * here. Hence we need to tell memory reclaim that we are in such a | ||
80 | * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering | ||
81 | * the filesystem here and potentially deadlocking. | ||
82 | */ | ||
83 | if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) | ||
84 | noio_flag = memalloc_noio_save(); | ||
85 | |||
86 | lflags = kmem_flags_convert(flags); | ||
87 | ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); | ||
88 | |||
89 | if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) | ||
90 | memalloc_noio_restore(noio_flag); | ||
91 | |||
92 | return ptr; | ||
74 | } | 93 | } |
75 | 94 | ||
76 | void | 95 | void |
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 0ecec1896f25..6888ad886ff6 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c | |||
@@ -281,7 +281,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) | |||
281 | if (!acl) | 281 | if (!acl) |
282 | goto set_acl; | 282 | goto set_acl; |
283 | 283 | ||
284 | error = -EINVAL; | 284 | error = -E2BIG; |
285 | if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) | 285 | if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) |
286 | return error; | 286 | return error; |
287 | 287 | ||
diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 3fc109819c34..0fdd4109c624 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h | |||
@@ -89,6 +89,8 @@ typedef struct xfs_agf { | |||
89 | /* structure must be padded to 64 bit alignment */ | 89 | /* structure must be padded to 64 bit alignment */ |
90 | } xfs_agf_t; | 90 | } xfs_agf_t; |
91 | 91 | ||
92 | #define XFS_AGF_CRC_OFF offsetof(struct xfs_agf, agf_crc) | ||
93 | |||
92 | #define XFS_AGF_MAGICNUM 0x00000001 | 94 | #define XFS_AGF_MAGICNUM 0x00000001 |
93 | #define XFS_AGF_VERSIONNUM 0x00000002 | 95 | #define XFS_AGF_VERSIONNUM 0x00000002 |
94 | #define XFS_AGF_SEQNO 0x00000004 | 96 | #define XFS_AGF_SEQNO 0x00000004 |
@@ -167,6 +169,8 @@ typedef struct xfs_agi { | |||
167 | /* structure must be padded to 64 bit alignment */ | 169 | /* structure must be padded to 64 bit alignment */ |
168 | } xfs_agi_t; | 170 | } xfs_agi_t; |
169 | 171 | ||
172 | #define XFS_AGI_CRC_OFF offsetof(struct xfs_agi, agi_crc) | ||
173 | |||
170 | #define XFS_AGI_MAGICNUM 0x00000001 | 174 | #define XFS_AGI_MAGICNUM 0x00000001 |
171 | #define XFS_AGI_VERSIONNUM 0x00000002 | 175 | #define XFS_AGI_VERSIONNUM 0x00000002 |
172 | #define XFS_AGI_SEQNO 0x00000004 | 176 | #define XFS_AGI_SEQNO 0x00000004 |
@@ -222,6 +226,8 @@ typedef struct xfs_agfl { | |||
222 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ | 226 | __be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */ |
223 | } xfs_agfl_t; | 227 | } xfs_agfl_t; |
224 | 228 | ||
229 | #define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc) | ||
230 | |||
225 | /* | 231 | /* |
226 | * tags for inode radix tree | 232 | * tags for inode radix tree |
227 | */ | 233 | */ |
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 9eab2dfdcbb5..c1cf6a336a72 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c | |||
@@ -474,7 +474,6 @@ xfs_agfl_read_verify( | |||
474 | struct xfs_buf *bp) | 474 | struct xfs_buf *bp) |
475 | { | 475 | { |
476 | struct xfs_mount *mp = bp->b_target->bt_mount; | 476 | struct xfs_mount *mp = bp->b_target->bt_mount; |
477 | int agfl_ok = 1; | ||
478 | 477 | ||
479 | /* | 478 | /* |
480 | * There is no verification of non-crc AGFLs because mkfs does not | 479 | * There is no verification of non-crc AGFLs because mkfs does not |
@@ -485,15 +484,13 @@ xfs_agfl_read_verify( | |||
485 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 484 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
486 | return; | 485 | return; |
487 | 486 | ||
488 | agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 487 | if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) |
489 | offsetof(struct xfs_agfl, agfl_crc)); | 488 | xfs_buf_ioerror(bp, EFSBADCRC); |
490 | 489 | else if (!xfs_agfl_verify(bp)) | |
491 | agfl_ok = agfl_ok && xfs_agfl_verify(bp); | ||
492 | |||
493 | if (!agfl_ok) { | ||
494 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
495 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 490 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
496 | } | 491 | |
492 | if (bp->b_error) | ||
493 | xfs_verifier_error(bp); | ||
497 | } | 494 | } |
498 | 495 | ||
499 | static void | 496 | static void |
@@ -508,16 +505,15 @@ xfs_agfl_write_verify( | |||
508 | return; | 505 | return; |
509 | 506 | ||
510 | if (!xfs_agfl_verify(bp)) { | 507 | if (!xfs_agfl_verify(bp)) { |
511 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
512 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 508 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
509 | xfs_verifier_error(bp); | ||
513 | return; | 510 | return; |
514 | } | 511 | } |
515 | 512 | ||
516 | if (bip) | 513 | if (bip) |
517 | XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 514 | XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
518 | 515 | ||
519 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 516 | xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); |
520 | offsetof(struct xfs_agfl, agfl_crc)); | ||
521 | } | 517 | } |
522 | 518 | ||
523 | const struct xfs_buf_ops xfs_agfl_buf_ops = { | 519 | const struct xfs_buf_ops xfs_agfl_buf_ops = { |
@@ -2238,19 +2234,17 @@ xfs_agf_read_verify( | |||
2238 | struct xfs_buf *bp) | 2234 | struct xfs_buf *bp) |
2239 | { | 2235 | { |
2240 | struct xfs_mount *mp = bp->b_target->bt_mount; | 2236 | struct xfs_mount *mp = bp->b_target->bt_mount; |
2241 | int agf_ok = 1; | ||
2242 | |||
2243 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
2244 | agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
2245 | offsetof(struct xfs_agf, agf_crc)); | ||
2246 | 2237 | ||
2247 | agf_ok = agf_ok && xfs_agf_verify(mp, bp); | 2238 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
2248 | 2239 | !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) | |
2249 | if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, | 2240 | xfs_buf_ioerror(bp, EFSBADCRC); |
2250 | XFS_RANDOM_ALLOC_READ_AGF))) { | 2241 | else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, |
2251 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 2242 | XFS_ERRTAG_ALLOC_READ_AGF, |
2243 | XFS_RANDOM_ALLOC_READ_AGF)) | ||
2252 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 2244 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
2253 | } | 2245 | |
2246 | if (bp->b_error) | ||
2247 | xfs_verifier_error(bp); | ||
2254 | } | 2248 | } |
2255 | 2249 | ||
2256 | static void | 2250 | static void |
@@ -2261,8 +2255,8 @@ xfs_agf_write_verify( | |||
2261 | struct xfs_buf_log_item *bip = bp->b_fspriv; | 2255 | struct xfs_buf_log_item *bip = bp->b_fspriv; |
2262 | 2256 | ||
2263 | if (!xfs_agf_verify(mp, bp)) { | 2257 | if (!xfs_agf_verify(mp, bp)) { |
2264 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
2265 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 2258 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
2259 | xfs_verifier_error(bp); | ||
2266 | return; | 2260 | return; |
2267 | } | 2261 | } |
2268 | 2262 | ||
@@ -2272,8 +2266,7 @@ xfs_agf_write_verify( | |||
2272 | if (bip) | 2266 | if (bip) |
2273 | XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 2267 | XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
2274 | 2268 | ||
2275 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 2269 | xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); |
2276 | offsetof(struct xfs_agf, agf_crc)); | ||
2277 | } | 2270 | } |
2278 | 2271 | ||
2279 | const struct xfs_buf_ops xfs_agf_buf_ops = { | 2272 | const struct xfs_buf_ops xfs_agf_buf_ops = { |
diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index 13085429e523..cc1eadcbb049 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c | |||
@@ -355,12 +355,14 @@ static void | |||
355 | xfs_allocbt_read_verify( | 355 | xfs_allocbt_read_verify( |
356 | struct xfs_buf *bp) | 356 | struct xfs_buf *bp) |
357 | { | 357 | { |
358 | if (!(xfs_btree_sblock_verify_crc(bp) && | 358 | if (!xfs_btree_sblock_verify_crc(bp)) |
359 | xfs_allocbt_verify(bp))) { | 359 | xfs_buf_ioerror(bp, EFSBADCRC); |
360 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 360 | else if (!xfs_allocbt_verify(bp)) |
361 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
362 | bp->b_target->bt_mount, bp->b_addr); | ||
363 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 361 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
362 | |||
363 | if (bp->b_error) { | ||
364 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
365 | xfs_verifier_error(bp); | ||
364 | } | 366 | } |
365 | } | 367 | } |
366 | 368 | ||
@@ -370,9 +372,9 @@ xfs_allocbt_write_verify( | |||
370 | { | 372 | { |
371 | if (!xfs_allocbt_verify(bp)) { | 373 | if (!xfs_allocbt_verify(bp)) { |
372 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 374 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
373 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
374 | bp->b_target->bt_mount, bp->b_addr); | ||
375 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 375 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
376 | xfs_verifier_error(bp); | ||
377 | return; | ||
376 | } | 378 | } |
377 | xfs_btree_sblock_calc_crc(bp); | 379 | xfs_btree_sblock_calc_crc(bp); |
378 | 380 | ||
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index db2cfb067d0b..75df77d09f75 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
@@ -632,38 +632,46 @@ xfs_map_at_offset( | |||
632 | } | 632 | } |
633 | 633 | ||
634 | /* | 634 | /* |
635 | * Test if a given page is suitable for writing as part of an unwritten | 635 | * Test if a given page contains at least one buffer of a given @type. |
636 | * or delayed allocate extent. | 636 | * If @check_all_buffers is true, then we walk all the buffers in the page to |
637 | * try to find one of the type passed in. If it is not set, then the caller only | ||
638 | * needs to check the first buffer on the page for a match. | ||
637 | */ | 639 | */ |
638 | STATIC int | 640 | STATIC bool |
639 | xfs_check_page_type( | 641 | xfs_check_page_type( |
640 | struct page *page, | 642 | struct page *page, |
641 | unsigned int type) | 643 | unsigned int type, |
644 | bool check_all_buffers) | ||
642 | { | 645 | { |
643 | if (PageWriteback(page)) | 646 | struct buffer_head *bh; |
644 | return 0; | 647 | struct buffer_head *head; |
645 | 648 | ||
646 | if (page->mapping && page_has_buffers(page)) { | 649 | if (PageWriteback(page)) |
647 | struct buffer_head *bh, *head; | 650 | return false; |
648 | int acceptable = 0; | 651 | if (!page->mapping) |
652 | return false; | ||
653 | if (!page_has_buffers(page)) | ||
654 | return false; | ||
649 | 655 | ||
650 | bh = head = page_buffers(page); | 656 | bh = head = page_buffers(page); |
651 | do { | 657 | do { |
652 | if (buffer_unwritten(bh)) | 658 | if (buffer_unwritten(bh)) { |
653 | acceptable += (type == XFS_IO_UNWRITTEN); | 659 | if (type == XFS_IO_UNWRITTEN) |
654 | else if (buffer_delay(bh)) | 660 | return true; |
655 | acceptable += (type == XFS_IO_DELALLOC); | 661 | } else if (buffer_delay(bh)) { |
656 | else if (buffer_dirty(bh) && buffer_mapped(bh)) | 662 | if (type == XFS_IO_DELALLOC) |
657 | acceptable += (type == XFS_IO_OVERWRITE); | 663 | return true; |
658 | else | 664 | } else if (buffer_dirty(bh) && buffer_mapped(bh)) { |
659 | break; | 665 | if (type == XFS_IO_OVERWRITE) |
660 | } while ((bh = bh->b_this_page) != head); | 666 | return true; |
667 | } | ||
661 | 668 | ||
662 | if (acceptable) | 669 | /* If we are only checking the first buffer, we are done now. */ |
663 | return 1; | 670 | if (!check_all_buffers) |
664 | } | 671 | break; |
672 | } while ((bh = bh->b_this_page) != head); | ||
665 | 673 | ||
666 | return 0; | 674 | return false; |
667 | } | 675 | } |
668 | 676 | ||
669 | /* | 677 | /* |
@@ -697,7 +705,7 @@ xfs_convert_page( | |||
697 | goto fail_unlock_page; | 705 | goto fail_unlock_page; |
698 | if (page->mapping != inode->i_mapping) | 706 | if (page->mapping != inode->i_mapping) |
699 | goto fail_unlock_page; | 707 | goto fail_unlock_page; |
700 | if (!xfs_check_page_type(page, (*ioendp)->io_type)) | 708 | if (!xfs_check_page_type(page, (*ioendp)->io_type, false)) |
701 | goto fail_unlock_page; | 709 | goto fail_unlock_page; |
702 | 710 | ||
703 | /* | 711 | /* |
@@ -742,6 +750,15 @@ xfs_convert_page( | |||
742 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; | 750 | p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; |
743 | page_dirty = p_offset / len; | 751 | page_dirty = p_offset / len; |
744 | 752 | ||
753 | /* | ||
754 | * The moment we find a buffer that doesn't match our current type | ||
755 | * specification or can't be written, abort the loop and start | ||
756 | * writeback. As per the above xfs_imap_valid() check, only | ||
757 | * xfs_vm_writepage() can handle partial page writeback fully - we are | ||
758 | * limited here to the buffers that are contiguous with the current | ||
759 | * ioend, and hence a buffer we can't write breaks that contiguity and | ||
760 | * we have to defer the rest of the IO to xfs_vm_writepage(). | ||
761 | */ | ||
745 | bh = head = page_buffers(page); | 762 | bh = head = page_buffers(page); |
746 | do { | 763 | do { |
747 | if (offset >= end_offset) | 764 | if (offset >= end_offset) |
@@ -750,7 +767,7 @@ xfs_convert_page( | |||
750 | uptodate = 0; | 767 | uptodate = 0; |
751 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { | 768 | if (!(PageUptodate(page) || buffer_uptodate(bh))) { |
752 | done = 1; | 769 | done = 1; |
753 | continue; | 770 | break; |
754 | } | 771 | } |
755 | 772 | ||
756 | if (buffer_unwritten(bh) || buffer_delay(bh) || | 773 | if (buffer_unwritten(bh) || buffer_delay(bh) || |
@@ -762,10 +779,11 @@ xfs_convert_page( | |||
762 | else | 779 | else |
763 | type = XFS_IO_OVERWRITE; | 780 | type = XFS_IO_OVERWRITE; |
764 | 781 | ||
765 | if (!xfs_imap_valid(inode, imap, offset)) { | 782 | /* |
766 | done = 1; | 783 | * imap should always be valid because of the above |
767 | continue; | 784 | * partial page end_offset check on the imap. |
768 | } | 785 | */ |
786 | ASSERT(xfs_imap_valid(inode, imap, offset)); | ||
769 | 787 | ||
770 | lock_buffer(bh); | 788 | lock_buffer(bh); |
771 | if (type != XFS_IO_OVERWRITE) | 789 | if (type != XFS_IO_OVERWRITE) |
@@ -777,6 +795,7 @@ xfs_convert_page( | |||
777 | count++; | 795 | count++; |
778 | } else { | 796 | } else { |
779 | done = 1; | 797 | done = 1; |
798 | break; | ||
780 | } | 799 | } |
781 | } while (offset += len, (bh = bh->b_this_page) != head); | 800 | } while (offset += len, (bh = bh->b_this_page) != head); |
782 | 801 | ||
@@ -868,7 +887,7 @@ xfs_aops_discard_page( | |||
868 | struct buffer_head *bh, *head; | 887 | struct buffer_head *bh, *head; |
869 | loff_t offset = page_offset(page); | 888 | loff_t offset = page_offset(page); |
870 | 889 | ||
871 | if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) | 890 | if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) |
872 | goto out_invalidate; | 891 | goto out_invalidate; |
873 | 892 | ||
874 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) | 893 | if (XFS_FORCED_SHUTDOWN(ip->i_mount)) |
@@ -1441,7 +1460,8 @@ xfs_vm_direct_IO( | |||
1441 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, | 1460 | ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, |
1442 | offset, nr_segs, | 1461 | offset, nr_segs, |
1443 | xfs_get_blocks_direct, | 1462 | xfs_get_blocks_direct, |
1444 | xfs_end_io_direct_write, NULL, 0); | 1463 | xfs_end_io_direct_write, NULL, |
1464 | DIO_ASYNC_EXTEND); | ||
1445 | if (ret != -EIOCBQUEUED && iocb->private) | 1465 | if (ret != -EIOCBQUEUED && iocb->private) |
1446 | goto out_destroy_ioend; | 1466 | goto out_destroy_ioend; |
1447 | } else { | 1467 | } else { |
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 7b126f46a2f9..fe9587fab17a 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c | |||
@@ -213,8 +213,8 @@ xfs_attr3_leaf_write_verify( | |||
213 | struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; | 213 | struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr; |
214 | 214 | ||
215 | if (!xfs_attr3_leaf_verify(bp)) { | 215 | if (!xfs_attr3_leaf_verify(bp)) { |
216 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
217 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 216 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
217 | xfs_verifier_error(bp); | ||
218 | return; | 218 | return; |
219 | } | 219 | } |
220 | 220 | ||
@@ -224,7 +224,7 @@ xfs_attr3_leaf_write_verify( | |||
224 | if (bip) | 224 | if (bip) |
225 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 225 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
226 | 226 | ||
227 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); | 227 | xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF); |
228 | } | 228 | } |
229 | 229 | ||
230 | /* | 230 | /* |
@@ -239,13 +239,14 @@ xfs_attr3_leaf_read_verify( | |||
239 | { | 239 | { |
240 | struct xfs_mount *mp = bp->b_target->bt_mount; | 240 | struct xfs_mount *mp = bp->b_target->bt_mount; |
241 | 241 | ||
242 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 242 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
243 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 243 | !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) |
244 | XFS_ATTR3_LEAF_CRC_OFF)) || | 244 | xfs_buf_ioerror(bp, EFSBADCRC); |
245 | !xfs_attr3_leaf_verify(bp)) { | 245 | else if (!xfs_attr3_leaf_verify(bp)) |
246 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
247 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 246 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
248 | } | 247 | |
248 | if (bp->b_error) | ||
249 | xfs_verifier_error(bp); | ||
249 | } | 250 | } |
250 | 251 | ||
251 | const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { | 252 | const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { |
diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 5549d69ddb45..6e37823e2932 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c | |||
@@ -125,7 +125,6 @@ xfs_attr3_rmt_read_verify( | |||
125 | struct xfs_mount *mp = bp->b_target->bt_mount; | 125 | struct xfs_mount *mp = bp->b_target->bt_mount; |
126 | char *ptr; | 126 | char *ptr; |
127 | int len; | 127 | int len; |
128 | bool corrupt = false; | ||
129 | xfs_daddr_t bno; | 128 | xfs_daddr_t bno; |
130 | 129 | ||
131 | /* no verification of non-crc buffers */ | 130 | /* no verification of non-crc buffers */ |
@@ -140,11 +139,11 @@ xfs_attr3_rmt_read_verify( | |||
140 | while (len > 0) { | 139 | while (len > 0) { |
141 | if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), | 140 | if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), |
142 | XFS_ATTR3_RMT_CRC_OFF)) { | 141 | XFS_ATTR3_RMT_CRC_OFF)) { |
143 | corrupt = true; | 142 | xfs_buf_ioerror(bp, EFSBADCRC); |
144 | break; | 143 | break; |
145 | } | 144 | } |
146 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { | 145 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { |
147 | corrupt = true; | 146 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
148 | break; | 147 | break; |
149 | } | 148 | } |
150 | len -= XFS_LBSIZE(mp); | 149 | len -= XFS_LBSIZE(mp); |
@@ -152,10 +151,9 @@ xfs_attr3_rmt_read_verify( | |||
152 | bno += mp->m_bsize; | 151 | bno += mp->m_bsize; |
153 | } | 152 | } |
154 | 153 | ||
155 | if (corrupt) { | 154 | if (bp->b_error) |
156 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 155 | xfs_verifier_error(bp); |
157 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 156 | else |
158 | } else | ||
159 | ASSERT(len == 0); | 157 | ASSERT(len == 0); |
160 | } | 158 | } |
161 | 159 | ||
@@ -180,9 +178,8 @@ xfs_attr3_rmt_write_verify( | |||
180 | 178 | ||
181 | while (len > 0) { | 179 | while (len > 0) { |
182 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { | 180 | if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { |
183 | XFS_CORRUPTION_ERROR(__func__, | ||
184 | XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
185 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 181 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
182 | xfs_verifier_error(bp); | ||
186 | return; | 183 | return; |
187 | } | 184 | } |
188 | if (bip) { | 185 | if (bip) { |
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index 152543c4ca70..5b6092ef51ef 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c | |||
@@ -5378,3 +5378,196 @@ error0: | |||
5378 | } | 5378 | } |
5379 | return error; | 5379 | return error; |
5380 | } | 5380 | } |
5381 | |||
5382 | /* | ||
5383 | * Shift extent records to the left to cover a hole. | ||
5384 | * | ||
5385 | * The maximum number of extents to be shifted in a single operation | ||
5386 | * is @num_exts, and @current_ext keeps track of the current extent | ||
5387 | * index we have shifted. @offset_shift_fsb is the length by which each | ||
5388 | * extent is shifted. If there is no hole to shift the extents | ||
5389 | * into, this will be considered invalid operation and we abort immediately. | ||
5390 | */ | ||
5391 | int | ||
5392 | xfs_bmap_shift_extents( | ||
5393 | struct xfs_trans *tp, | ||
5394 | struct xfs_inode *ip, | ||
5395 | int *done, | ||
5396 | xfs_fileoff_t start_fsb, | ||
5397 | xfs_fileoff_t offset_shift_fsb, | ||
5398 | xfs_extnum_t *current_ext, | ||
5399 | xfs_fsblock_t *firstblock, | ||
5400 | struct xfs_bmap_free *flist, | ||
5401 | int num_exts) | ||
5402 | { | ||
5403 | struct xfs_btree_cur *cur; | ||
5404 | struct xfs_bmbt_rec_host *gotp; | ||
5405 | struct xfs_bmbt_irec got; | ||
5406 | struct xfs_bmbt_irec left; | ||
5407 | struct xfs_mount *mp = ip->i_mount; | ||
5408 | struct xfs_ifork *ifp; | ||
5409 | xfs_extnum_t nexts = 0; | ||
5410 | xfs_fileoff_t startoff; | ||
5411 | int error = 0; | ||
5412 | int i; | ||
5413 | int whichfork = XFS_DATA_FORK; | ||
5414 | int logflags; | ||
5415 | xfs_filblks_t blockcount = 0; | ||
5416 | |||
5417 | if (unlikely(XFS_TEST_ERROR( | ||
5418 | (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && | ||
5419 | XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), | ||
5420 | mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { | ||
5421 | XFS_ERROR_REPORT("xfs_bmap_shift_extents", | ||
5422 | XFS_ERRLEVEL_LOW, mp); | ||
5423 | return XFS_ERROR(EFSCORRUPTED); | ||
5424 | } | ||
5425 | |||
5426 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
5427 | return XFS_ERROR(EIO); | ||
5428 | |||
5429 | ASSERT(current_ext != NULL); | ||
5430 | |||
5431 | ifp = XFS_IFORK_PTR(ip, whichfork); | ||
5432 | |||
5433 | if (!(ifp->if_flags & XFS_IFEXTENTS)) { | ||
5434 | /* Read in all the extents */ | ||
5435 | error = xfs_iread_extents(tp, ip, whichfork); | ||
5436 | if (error) | ||
5437 | return error; | ||
5438 | } | ||
5439 | |||
5440 | /* | ||
5441 | * If *current_ext is 0, we would need to lookup the extent | ||
5442 | * from where we would start shifting and store it in gotp. | ||
5443 | */ | ||
5444 | if (!*current_ext) { | ||
5445 | gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); | ||
5446 | /* | ||
5447 | * gotp can be null in 2 cases: 1) if there are no extents | ||
5448 | * or 2) start_fsb lies in a hole beyond which there are | ||
5449 | * no extents. Either way, we are done. | ||
5450 | */ | ||
5451 | if (!gotp) { | ||
5452 | *done = 1; | ||
5453 | return 0; | ||
5454 | } | ||
5455 | } | ||
5456 | |||
5457 | /* We are going to change core inode */ | ||
5458 | logflags = XFS_ILOG_CORE; | ||
5459 | |||
5460 | if (ifp->if_flags & XFS_IFBROOT) { | ||
5461 | cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); | ||
5462 | cur->bc_private.b.firstblock = *firstblock; | ||
5463 | cur->bc_private.b.flist = flist; | ||
5464 | cur->bc_private.b.flags = 0; | ||
5465 | } else { | ||
5466 | cur = NULL; | ||
5467 | logflags |= XFS_ILOG_DEXT; | ||
5468 | } | ||
5469 | |||
5470 | while (nexts++ < num_exts && | ||
5471 | *current_ext < XFS_IFORK_NEXTENTS(ip, whichfork)) { | ||
5472 | |||
5473 | gotp = xfs_iext_get_ext(ifp, *current_ext); | ||
5474 | xfs_bmbt_get_all(gotp, &got); | ||
5475 | startoff = got.br_startoff - offset_shift_fsb; | ||
5476 | |||
5477 | /* | ||
5478 | * Before shifting extent into hole, make sure that the hole | ||
5479 | * is large enough to accomodate the shift. | ||
5480 | */ | ||
5481 | if (*current_ext) { | ||
5482 | xfs_bmbt_get_all(xfs_iext_get_ext(ifp, | ||
5483 | *current_ext - 1), &left); | ||
5484 | |||
5485 | if (startoff < left.br_startoff + left.br_blockcount) | ||
5486 | error = XFS_ERROR(EINVAL); | ||
5487 | } else if (offset_shift_fsb > got.br_startoff) { | ||
5488 | /* | ||
5489 | * When first extent is shifted, offset_shift_fsb | ||
5490 | * should be less than the stating offset of | ||
5491 | * the first extent. | ||
5492 | */ | ||
5493 | error = XFS_ERROR(EINVAL); | ||
5494 | } | ||
5495 | |||
5496 | if (error) | ||
5497 | goto del_cursor; | ||
5498 | |||
5499 | if (cur) { | ||
5500 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5501 | got.br_startblock, | ||
5502 | got.br_blockcount, | ||
5503 | &i); | ||
5504 | if (error) | ||
5505 | goto del_cursor; | ||
5506 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5507 | } | ||
5508 | |||
5509 | /* Check if we can merge 2 adjacent extents */ | ||
5510 | if (*current_ext && | ||
5511 | left.br_startoff + left.br_blockcount == startoff && | ||
5512 | left.br_startblock + left.br_blockcount == | ||
5513 | got.br_startblock && | ||
5514 | left.br_state == got.br_state && | ||
5515 | left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { | ||
5516 | blockcount = left.br_blockcount + | ||
5517 | got.br_blockcount; | ||
5518 | xfs_iext_remove(ip, *current_ext, 1, 0); | ||
5519 | if (cur) { | ||
5520 | error = xfs_btree_delete(cur, &i); | ||
5521 | if (error) | ||
5522 | goto del_cursor; | ||
5523 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5524 | } | ||
5525 | XFS_IFORK_NEXT_SET(ip, whichfork, | ||
5526 | XFS_IFORK_NEXTENTS(ip, whichfork) - 1); | ||
5527 | gotp = xfs_iext_get_ext(ifp, --*current_ext); | ||
5528 | xfs_bmbt_get_all(gotp, &got); | ||
5529 | |||
5530 | /* Make cursor point to the extent we will update */ | ||
5531 | if (cur) { | ||
5532 | error = xfs_bmbt_lookup_eq(cur, got.br_startoff, | ||
5533 | got.br_startblock, | ||
5534 | got.br_blockcount, | ||
5535 | &i); | ||
5536 | if (error) | ||
5537 | goto del_cursor; | ||
5538 | XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); | ||
5539 | } | ||
5540 | |||
5541 | xfs_bmbt_set_blockcount(gotp, blockcount); | ||
5542 | got.br_blockcount = blockcount; | ||
5543 | } else { | ||
5544 | /* We have to update the startoff */ | ||
5545 | xfs_bmbt_set_startoff(gotp, startoff); | ||
5546 | got.br_startoff = startoff; | ||
5547 | } | ||
5548 | |||
5549 | if (cur) { | ||
5550 | error = xfs_bmbt_update(cur, got.br_startoff, | ||
5551 | got.br_startblock, | ||
5552 | got.br_blockcount, | ||
5553 | got.br_state); | ||
5554 | if (error) | ||
5555 | goto del_cursor; | ||
5556 | } | ||
5557 | |||
5558 | (*current_ext)++; | ||
5559 | } | ||
5560 | |||
5561 | /* Check if we are done */ | ||
5562 | if (*current_ext == XFS_IFORK_NEXTENTS(ip, whichfork)) | ||
5563 | *done = 1; | ||
5564 | |||
5565 | del_cursor: | ||
5566 | if (cur) | ||
5567 | xfs_btree_del_cursor(cur, | ||
5568 | error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); | ||
5569 | |||
5570 | xfs_trans_log_inode(tp, ip, logflags); | ||
5571 | |||
5572 | return error; | ||
5573 | } | ||
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 33b41f351225..f84bd7af43be 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h | |||
@@ -127,6 +127,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp) | |||
127 | { BMAP_RIGHT_FILLING, "RF" }, \ | 127 | { BMAP_RIGHT_FILLING, "RF" }, \ |
128 | { BMAP_ATTRFORK, "ATTR" } | 128 | { BMAP_ATTRFORK, "ATTR" } |
129 | 129 | ||
130 | |||
131 | /* | ||
132 | * This macro is used to determine how many extents will be shifted | ||
133 | * in one write transaction. We could require two splits, | ||
134 | * an extent move on the first and an extent merge on the second, | ||
135 | * So it is proper that one extent is shifted inside write transaction | ||
136 | * at a time. | ||
137 | */ | ||
138 | #define XFS_BMAP_MAX_SHIFT_EXTENTS 1 | ||
139 | |||
130 | #ifdef DEBUG | 140 | #ifdef DEBUG |
131 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, | 141 | void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, |
132 | int whichfork, unsigned long caller_ip); | 142 | int whichfork, unsigned long caller_ip); |
@@ -169,5 +179,10 @@ int xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip, | |||
169 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, | 179 | int xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx, |
170 | xfs_extnum_t num); | 180 | xfs_extnum_t num); |
171 | uint xfs_default_attroffset(struct xfs_inode *ip); | 181 | uint xfs_default_attroffset(struct xfs_inode *ip); |
182 | int xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, | ||
183 | int *done, xfs_fileoff_t start_fsb, | ||
184 | xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, | ||
185 | xfs_fsblock_t *firstblock, struct xfs_bmap_free *flist, | ||
186 | int num_exts); | ||
172 | 187 | ||
173 | #endif /* __XFS_BMAP_H__ */ | 188 | #endif /* __XFS_BMAP_H__ */ |
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index 706bc3f777cb..818d546664e7 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c | |||
@@ -780,12 +780,14 @@ static void | |||
780 | xfs_bmbt_read_verify( | 780 | xfs_bmbt_read_verify( |
781 | struct xfs_buf *bp) | 781 | struct xfs_buf *bp) |
782 | { | 782 | { |
783 | if (!(xfs_btree_lblock_verify_crc(bp) && | 783 | if (!xfs_btree_lblock_verify_crc(bp)) |
784 | xfs_bmbt_verify(bp))) { | 784 | xfs_buf_ioerror(bp, EFSBADCRC); |
785 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 785 | else if (!xfs_bmbt_verify(bp)) |
786 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
787 | bp->b_target->bt_mount, bp->b_addr); | ||
788 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 786 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
787 | |||
788 | if (bp->b_error) { | ||
789 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
790 | xfs_verifier_error(bp); | ||
789 | } | 791 | } |
790 | } | 792 | } |
791 | 793 | ||
@@ -794,11 +796,9 @@ xfs_bmbt_write_verify( | |||
794 | struct xfs_buf *bp) | 796 | struct xfs_buf *bp) |
795 | { | 797 | { |
796 | if (!xfs_bmbt_verify(bp)) { | 798 | if (!xfs_bmbt_verify(bp)) { |
797 | xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn); | ||
798 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 799 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
799 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
800 | bp->b_target->bt_mount, bp->b_addr); | ||
801 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 800 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
801 | xfs_verifier_error(bp); | ||
802 | return; | 802 | return; |
803 | } | 803 | } |
804 | xfs_btree_lblock_calc_crc(bp); | 804 | xfs_btree_lblock_calc_crc(bp); |
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index f264616080ca..01f6a646caa1 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c | |||
@@ -1349,7 +1349,6 @@ xfs_free_file_space( | |||
1349 | * the freeing of the space succeeds at ENOSPC. | 1349 | * the freeing of the space succeeds at ENOSPC. |
1350 | */ | 1350 | */ |
1351 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | 1351 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); |
1352 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
1353 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); | 1352 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0); |
1354 | 1353 | ||
1355 | /* | 1354 | /* |
@@ -1468,6 +1467,102 @@ out: | |||
1468 | } | 1467 | } |
1469 | 1468 | ||
1470 | /* | 1469 | /* |
1470 | * xfs_collapse_file_space() | ||
1471 | * This routine frees disk space and shift extent for the given file. | ||
1472 | * The first thing we do is to free data blocks in the specified range | ||
1473 | * by calling xfs_free_file_space(). It would also sync dirty data | ||
1474 | * and invalidate page cache over the region on which collapse range | ||
1475 | * is working. And Shift extent records to the left to cover a hole. | ||
1476 | * RETURNS: | ||
1477 | * 0 on success | ||
1478 | * errno on error | ||
1479 | * | ||
1480 | */ | ||
1481 | int | ||
1482 | xfs_collapse_file_space( | ||
1483 | struct xfs_inode *ip, | ||
1484 | xfs_off_t offset, | ||
1485 | xfs_off_t len) | ||
1486 | { | ||
1487 | int done = 0; | ||
1488 | struct xfs_mount *mp = ip->i_mount; | ||
1489 | struct xfs_trans *tp; | ||
1490 | int error; | ||
1491 | xfs_extnum_t current_ext = 0; | ||
1492 | struct xfs_bmap_free free_list; | ||
1493 | xfs_fsblock_t first_block; | ||
1494 | int committed; | ||
1495 | xfs_fileoff_t start_fsb; | ||
1496 | xfs_fileoff_t shift_fsb; | ||
1497 | |||
1498 | ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); | ||
1499 | |||
1500 | trace_xfs_collapse_file_space(ip); | ||
1501 | |||
1502 | start_fsb = XFS_B_TO_FSB(mp, offset + len); | ||
1503 | shift_fsb = XFS_B_TO_FSB(mp, len); | ||
1504 | |||
1505 | error = xfs_free_file_space(ip, offset, len); | ||
1506 | if (error) | ||
1507 | return error; | ||
1508 | |||
1509 | while (!error && !done) { | ||
1510 | tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); | ||
1511 | tp->t_flags |= XFS_TRANS_RESERVE; | ||
1512 | /* | ||
1513 | * We would need to reserve permanent block for transaction. | ||
1514 | * This will come into picture when after shifting extent into | ||
1515 | * hole we found that adjacent extents can be merged which | ||
1516 | * may lead to freeing of a block during record update. | ||
1517 | */ | ||
1518 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, | ||
1519 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); | ||
1520 | if (error) { | ||
1521 | ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp)); | ||
1522 | xfs_trans_cancel(tp, 0); | ||
1523 | break; | ||
1524 | } | ||
1525 | |||
1526 | xfs_ilock(ip, XFS_ILOCK_EXCL); | ||
1527 | error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, | ||
1528 | ip->i_gdquot, ip->i_pdquot, | ||
1529 | XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, | ||
1530 | XFS_QMOPT_RES_REGBLKS); | ||
1531 | if (error) | ||
1532 | goto out; | ||
1533 | |||
1534 | xfs_trans_ijoin(tp, ip, 0); | ||
1535 | |||
1536 | xfs_bmap_init(&free_list, &first_block); | ||
1537 | |||
1538 | /* | ||
1539 | * We are using the write transaction in which max 2 bmbt | ||
1540 | * updates are allowed | ||
1541 | */ | ||
1542 | error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, | ||
1543 | shift_fsb, ¤t_ext, | ||
1544 | &first_block, &free_list, | ||
1545 | XFS_BMAP_MAX_SHIFT_EXTENTS); | ||
1546 | if (error) | ||
1547 | goto out; | ||
1548 | |||
1549 | error = xfs_bmap_finish(&tp, &free_list, &committed); | ||
1550 | if (error) | ||
1551 | goto out; | ||
1552 | |||
1553 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1554 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1555 | } | ||
1556 | |||
1557 | return error; | ||
1558 | |||
1559 | out: | ||
1560 | xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); | ||
1561 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | ||
1562 | return error; | ||
1563 | } | ||
1564 | |||
1565 | /* | ||
1471 | * We need to check that the format of the data fork in the temporary inode is | 1566 | * We need to check that the format of the data fork in the temporary inode is |
1472 | * valid for the target inode before doing the swap. This is not a problem with | 1567 | * valid for the target inode before doing the swap. This is not a problem with |
1473 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized | 1568 | * attr1 because of the fixed fork offset, but attr2 has a dynamically sized |
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 900747b25772..935ed2b24edf 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h | |||
@@ -99,6 +99,8 @@ int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, | |||
99 | xfs_off_t len); | 99 | xfs_off_t len); |
100 | int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, | 100 | int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, |
101 | xfs_off_t len); | 101 | xfs_off_t len); |
102 | int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, | ||
103 | xfs_off_t len); | ||
102 | 104 | ||
103 | /* EOF block manipulation functions */ | 105 | /* EOF block manipulation functions */ |
104 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); | 106 | bool xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); |
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 9adaae4f3e2f..e80d59fdf89a 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c | |||
@@ -234,8 +234,7 @@ xfs_btree_lblock_calc_crc( | |||
234 | return; | 234 | return; |
235 | if (bip) | 235 | if (bip) |
236 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 236 | block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
237 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 237 | xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
238 | XFS_BTREE_LBLOCK_CRC_OFF); | ||
239 | } | 238 | } |
240 | 239 | ||
241 | bool | 240 | bool |
@@ -243,8 +242,8 @@ xfs_btree_lblock_verify_crc( | |||
243 | struct xfs_buf *bp) | 242 | struct xfs_buf *bp) |
244 | { | 243 | { |
245 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | 244 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) |
246 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 245 | return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); |
247 | XFS_BTREE_LBLOCK_CRC_OFF); | 246 | |
248 | return true; | 247 | return true; |
249 | } | 248 | } |
250 | 249 | ||
@@ -267,8 +266,7 @@ xfs_btree_sblock_calc_crc( | |||
267 | return; | 266 | return; |
268 | if (bip) | 267 | if (bip) |
269 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 268 | block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
270 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 269 | xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
271 | XFS_BTREE_SBLOCK_CRC_OFF); | ||
272 | } | 270 | } |
273 | 271 | ||
274 | bool | 272 | bool |
@@ -276,8 +274,8 @@ xfs_btree_sblock_verify_crc( | |||
276 | struct xfs_buf *bp) | 274 | struct xfs_buf *bp) |
277 | { | 275 | { |
278 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) | 276 | if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) |
279 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 277 | return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); |
280 | XFS_BTREE_SBLOCK_CRC_OFF); | 278 | |
281 | return true; | 279 | return true; |
282 | } | 280 | } |
283 | 281 | ||
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9c061ef2b0d9..107f2fdfe41f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c | |||
@@ -396,7 +396,17 @@ _xfs_buf_map_pages( | |||
396 | bp->b_addr = NULL; | 396 | bp->b_addr = NULL; |
397 | } else { | 397 | } else { |
398 | int retried = 0; | 398 | int retried = 0; |
399 | unsigned noio_flag; | ||
399 | 400 | ||
401 | /* | ||
402 | * vm_map_ram() will allocate auxillary structures (e.g. | ||
403 | * pagetables) with GFP_KERNEL, yet we are likely to be under | ||
404 | * GFP_NOFS context here. Hence we need to tell memory reclaim | ||
405 | * that we are in such a context via PF_MEMALLOC_NOIO to prevent | ||
406 | * memory reclaim re-entering the filesystem here and | ||
407 | * potentially deadlocking. | ||
408 | */ | ||
409 | noio_flag = memalloc_noio_save(); | ||
400 | do { | 410 | do { |
401 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, | 411 | bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, |
402 | -1, PAGE_KERNEL); | 412 | -1, PAGE_KERNEL); |
@@ -404,6 +414,7 @@ _xfs_buf_map_pages( | |||
404 | break; | 414 | break; |
405 | vm_unmap_aliases(); | 415 | vm_unmap_aliases(); |
406 | } while (retried++ <= 1); | 416 | } while (retried++ <= 1); |
417 | memalloc_noio_restore(noio_flag); | ||
407 | 418 | ||
408 | if (!bp->b_addr) | 419 | if (!bp->b_addr) |
409 | return -ENOMEM; | 420 | return -ENOMEM; |
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 995339534db6..b8a3abf6cf47 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h | |||
@@ -369,6 +369,20 @@ static inline void xfs_buf_relse(xfs_buf_t *bp) | |||
369 | xfs_buf_rele(bp); | 369 | xfs_buf_rele(bp); |
370 | } | 370 | } |
371 | 371 | ||
372 | static inline int | ||
373 | xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) | ||
374 | { | ||
375 | return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
376 | cksum_offset); | ||
377 | } | ||
378 | |||
379 | static inline void | ||
380 | xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) | ||
381 | { | ||
382 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
383 | cksum_offset); | ||
384 | } | ||
385 | |||
372 | /* | 386 | /* |
373 | * Handling of buftargs. | 387 | * Handling of buftargs. |
374 | */ | 388 | */ |
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 33149113e333..8752821443be 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c | |||
@@ -796,20 +796,6 @@ xfs_buf_item_init( | |||
796 | bip->bli_formats[i].blf_map_size = map_size; | 796 | bip->bli_formats[i].blf_map_size = map_size; |
797 | } | 797 | } |
798 | 798 | ||
799 | #ifdef XFS_TRANS_DEBUG | ||
800 | /* | ||
801 | * Allocate the arrays for tracking what needs to be logged | ||
802 | * and what our callers request to be logged. bli_orig | ||
803 | * holds a copy of the original, clean buffer for comparison | ||
804 | * against, and bli_logged keeps a 1 bit flag per byte in | ||
805 | * the buffer to indicate which bytes the callers have asked | ||
806 | * to have logged. | ||
807 | */ | ||
808 | bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); | ||
809 | memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); | ||
810 | bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); | ||
811 | #endif | ||
812 | |||
813 | /* | 799 | /* |
814 | * Put the buf item into the list of items attached to the | 800 | * Put the buf item into the list of items attached to the |
815 | * buffer at the front. | 801 | * buffer at the front. |
@@ -957,11 +943,6 @@ STATIC void | |||
957 | xfs_buf_item_free( | 943 | xfs_buf_item_free( |
958 | xfs_buf_log_item_t *bip) | 944 | xfs_buf_log_item_t *bip) |
959 | { | 945 | { |
960 | #ifdef XFS_TRANS_DEBUG | ||
961 | kmem_free(bip->bli_orig); | ||
962 | kmem_free(bip->bli_logged); | ||
963 | #endif /* XFS_TRANS_DEBUG */ | ||
964 | |||
965 | xfs_buf_item_free_format(bip); | 946 | xfs_buf_item_free_format(bip); |
966 | kmem_zone_free(xfs_buf_item_zone, bip); | 947 | kmem_zone_free(xfs_buf_item_zone, bip); |
967 | } | 948 | } |
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 796272a2e129..6cc5f6785a77 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c | |||
@@ -185,8 +185,8 @@ xfs_da3_node_write_verify( | |||
185 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; | 185 | struct xfs_da3_node_hdr *hdr3 = bp->b_addr; |
186 | 186 | ||
187 | if (!xfs_da3_node_verify(bp)) { | 187 | if (!xfs_da3_node_verify(bp)) { |
188 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
189 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 188 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
189 | xfs_verifier_error(bp); | ||
190 | return; | 190 | return; |
191 | } | 191 | } |
192 | 192 | ||
@@ -196,7 +196,7 @@ xfs_da3_node_write_verify( | |||
196 | if (bip) | 196 | if (bip) |
197 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 197 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
198 | 198 | ||
199 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); | 199 | xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF); |
200 | } | 200 | } |
201 | 201 | ||
202 | /* | 202 | /* |
@@ -209,18 +209,20 @@ static void | |||
209 | xfs_da3_node_read_verify( | 209 | xfs_da3_node_read_verify( |
210 | struct xfs_buf *bp) | 210 | struct xfs_buf *bp) |
211 | { | 211 | { |
212 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
213 | struct xfs_da_blkinfo *info = bp->b_addr; | 212 | struct xfs_da_blkinfo *info = bp->b_addr; |
214 | 213 | ||
215 | switch (be16_to_cpu(info->magic)) { | 214 | switch (be16_to_cpu(info->magic)) { |
216 | case XFS_DA3_NODE_MAGIC: | 215 | case XFS_DA3_NODE_MAGIC: |
217 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 216 | if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { |
218 | XFS_DA3_NODE_CRC_OFF)) | 217 | xfs_buf_ioerror(bp, EFSBADCRC); |
219 | break; | 218 | break; |
219 | } | ||
220 | /* fall through */ | 220 | /* fall through */ |
221 | case XFS_DA_NODE_MAGIC: | 221 | case XFS_DA_NODE_MAGIC: |
222 | if (!xfs_da3_node_verify(bp)) | 222 | if (!xfs_da3_node_verify(bp)) { |
223 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
223 | break; | 224 | break; |
225 | } | ||
224 | return; | 226 | return; |
225 | case XFS_ATTR_LEAF_MAGIC: | 227 | case XFS_ATTR_LEAF_MAGIC: |
226 | case XFS_ATTR3_LEAF_MAGIC: | 228 | case XFS_ATTR3_LEAF_MAGIC: |
@@ -237,8 +239,7 @@ xfs_da3_node_read_verify( | |||
237 | } | 239 | } |
238 | 240 | ||
239 | /* corrupt block */ | 241 | /* corrupt block */ |
240 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 242 | xfs_verifier_error(bp); |
241 | xfs_buf_ioerror(bp, EFSCORRUPTED); | ||
242 | } | 243 | } |
243 | 244 | ||
244 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { | 245 | const struct xfs_buf_ops xfs_da3_node_buf_ops = { |
@@ -1295,7 +1296,7 @@ xfs_da3_fixhashpath( | |||
1295 | node = blk->bp->b_addr; | 1296 | node = blk->bp->b_addr; |
1296 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); | 1297 | dp->d_ops->node_hdr_from_disk(&nodehdr, node); |
1297 | btree = dp->d_ops->node_tree_p(node); | 1298 | btree = dp->d_ops->node_tree_p(node); |
1298 | if (be32_to_cpu(btree->hashval) == lasthash) | 1299 | if (be32_to_cpu(btree[blk->index].hashval) == lasthash) |
1299 | break; | 1300 | break; |
1300 | blk->hashval = lasthash; | 1301 | blk->hashval = lasthash; |
1301 | btree[blk->index].hashval = cpu_to_be32(lasthash); | 1302 | btree[blk->index].hashval = cpu_to_be32(lasthash); |
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5869b50dc41..623bbe8fd921 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h | |||
@@ -89,6 +89,8 @@ typedef struct xfs_dinode { | |||
89 | /* structure must be padded to 64 bit alignment */ | 89 | /* structure must be padded to 64 bit alignment */ |
90 | } xfs_dinode_t; | 90 | } xfs_dinode_t; |
91 | 91 | ||
92 | #define XFS_DINODE_CRC_OFF offsetof(struct xfs_dinode, di_crc) | ||
93 | |||
92 | #define DI_MAX_FLUSH 0xffff | 94 | #define DI_MAX_FLUSH 0xffff |
93 | 95 | ||
94 | /* | 96 | /* |
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index ce16ef02997a..fda46253966a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
@@ -180,16 +180,23 @@ xfs_dir_init( | |||
180 | xfs_inode_t *dp, | 180 | xfs_inode_t *dp, |
181 | xfs_inode_t *pdp) | 181 | xfs_inode_t *pdp) |
182 | { | 182 | { |
183 | xfs_da_args_t args; | 183 | struct xfs_da_args *args; |
184 | int error; | 184 | int error; |
185 | 185 | ||
186 | memset((char *)&args, 0, sizeof(args)); | ||
187 | args.dp = dp; | ||
188 | args.trans = tp; | ||
189 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 186 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
190 | if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) | 187 | error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); |
188 | if (error) | ||
191 | return error; | 189 | return error; |
192 | return xfs_dir2_sf_create(&args, pdp->i_ino); | 190 | |
191 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); | ||
192 | if (!args) | ||
193 | return ENOMEM; | ||
194 | |||
195 | args->dp = dp; | ||
196 | args->trans = tp; | ||
197 | error = xfs_dir2_sf_create(args, pdp->i_ino); | ||
198 | kmem_free(args); | ||
199 | return error; | ||
193 | } | 200 | } |
194 | 201 | ||
195 | /* | 202 | /* |
@@ -205,41 +212,56 @@ xfs_dir_createname( | |||
205 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 212 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
206 | xfs_extlen_t total) /* bmap's total block count */ | 213 | xfs_extlen_t total) /* bmap's total block count */ |
207 | { | 214 | { |
208 | xfs_da_args_t args; | 215 | struct xfs_da_args *args; |
209 | int rval; | 216 | int rval; |
210 | int v; /* type-checking value */ | 217 | int v; /* type-checking value */ |
211 | 218 | ||
212 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 219 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
213 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 220 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); |
221 | if (rval) | ||
214 | return rval; | 222 | return rval; |
215 | XFS_STATS_INC(xs_dir_create); | 223 | XFS_STATS_INC(xs_dir_create); |
216 | 224 | ||
217 | memset(&args, 0, sizeof(xfs_da_args_t)); | 225 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
218 | args.name = name->name; | 226 | if (!args) |
219 | args.namelen = name->len; | 227 | return ENOMEM; |
220 | args.filetype = name->type; | 228 | |
221 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 229 | args->name = name->name; |
222 | args.inumber = inum; | 230 | args->namelen = name->len; |
223 | args.dp = dp; | 231 | args->filetype = name->type; |
224 | args.firstblock = first; | 232 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
225 | args.flist = flist; | 233 | args->inumber = inum; |
226 | args.total = total; | 234 | args->dp = dp; |
227 | args.whichfork = XFS_DATA_FORK; | 235 | args->firstblock = first; |
228 | args.trans = tp; | 236 | args->flist = flist; |
229 | args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; | 237 | args->total = total; |
230 | 238 | args->whichfork = XFS_DATA_FORK; | |
231 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 239 | args->trans = tp; |
232 | rval = xfs_dir2_sf_addname(&args); | 240 | args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; |
233 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 241 | |
234 | return rval; | 242 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
235 | else if (v) | 243 | rval = xfs_dir2_sf_addname(args); |
236 | rval = xfs_dir2_block_addname(&args); | 244 | goto out_free; |
237 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 245 | } |
238 | return rval; | 246 | |
239 | else if (v) | 247 | rval = xfs_dir2_isblock(tp, dp, &v); |
240 | rval = xfs_dir2_leaf_addname(&args); | 248 | if (rval) |
249 | goto out_free; | ||
250 | if (v) { | ||
251 | rval = xfs_dir2_block_addname(args); | ||
252 | goto out_free; | ||
253 | } | ||
254 | |||
255 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
256 | if (rval) | ||
257 | goto out_free; | ||
258 | if (v) | ||
259 | rval = xfs_dir2_leaf_addname(args); | ||
241 | else | 260 | else |
242 | rval = xfs_dir2_node_addname(&args); | 261 | rval = xfs_dir2_node_addname(args); |
262 | |||
263 | out_free: | ||
264 | kmem_free(args); | ||
243 | return rval; | 265 | return rval; |
244 | } | 266 | } |
245 | 267 | ||
@@ -282,46 +304,66 @@ xfs_dir_lookup( | |||
282 | xfs_ino_t *inum, /* out: inode number */ | 304 | xfs_ino_t *inum, /* out: inode number */ |
283 | struct xfs_name *ci_name) /* out: actual name if CI match */ | 305 | struct xfs_name *ci_name) /* out: actual name if CI match */ |
284 | { | 306 | { |
285 | xfs_da_args_t args; | 307 | struct xfs_da_args *args; |
286 | int rval; | 308 | int rval; |
287 | int v; /* type-checking value */ | 309 | int v; /* type-checking value */ |
288 | 310 | ||
289 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 311 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
290 | XFS_STATS_INC(xs_dir_lookup); | 312 | XFS_STATS_INC(xs_dir_lookup); |
291 | 313 | ||
292 | memset(&args, 0, sizeof(xfs_da_args_t)); | 314 | /* |
293 | args.name = name->name; | 315 | * We need to use KM_NOFS here so that lockdep will not throw false |
294 | args.namelen = name->len; | 316 | * positive deadlock warnings on a non-transactional lookup path. It is |
295 | args.filetype = name->type; | 317 | * safe to recurse into inode recalim in that case, but lockdep can't |
296 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 318 | * easily be taught about it. Hence KM_NOFS avoids having to add more |
297 | args.dp = dp; | 319 | * lockdep Doing this avoids having to add a bunch of lockdep class |
298 | args.whichfork = XFS_DATA_FORK; | 320 | * annotations into the reclaim path for the ilock. |
299 | args.trans = tp; | 321 | */ |
300 | args.op_flags = XFS_DA_OP_OKNOENT; | 322 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
323 | args->name = name->name; | ||
324 | args->namelen = name->len; | ||
325 | args->filetype = name->type; | ||
326 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); | ||
327 | args->dp = dp; | ||
328 | args->whichfork = XFS_DATA_FORK; | ||
329 | args->trans = tp; | ||
330 | args->op_flags = XFS_DA_OP_OKNOENT; | ||
301 | if (ci_name) | 331 | if (ci_name) |
302 | args.op_flags |= XFS_DA_OP_CILOOKUP; | 332 | args->op_flags |= XFS_DA_OP_CILOOKUP; |
303 | 333 | ||
304 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 334 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
305 | rval = xfs_dir2_sf_lookup(&args); | 335 | rval = xfs_dir2_sf_lookup(args); |
306 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 336 | goto out_check_rval; |
307 | return rval; | 337 | } |
308 | else if (v) | 338 | |
309 | rval = xfs_dir2_block_lookup(&args); | 339 | rval = xfs_dir2_isblock(tp, dp, &v); |
310 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 340 | if (rval) |
311 | return rval; | 341 | goto out_free; |
312 | else if (v) | 342 | if (v) { |
313 | rval = xfs_dir2_leaf_lookup(&args); | 343 | rval = xfs_dir2_block_lookup(args); |
344 | goto out_check_rval; | ||
345 | } | ||
346 | |||
347 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
348 | if (rval) | ||
349 | goto out_free; | ||
350 | if (v) | ||
351 | rval = xfs_dir2_leaf_lookup(args); | ||
314 | else | 352 | else |
315 | rval = xfs_dir2_node_lookup(&args); | 353 | rval = xfs_dir2_node_lookup(args); |
354 | |||
355 | out_check_rval: | ||
316 | if (rval == EEXIST) | 356 | if (rval == EEXIST) |
317 | rval = 0; | 357 | rval = 0; |
318 | if (!rval) { | 358 | if (!rval) { |
319 | *inum = args.inumber; | 359 | *inum = args->inumber; |
320 | if (ci_name) { | 360 | if (ci_name) { |
321 | ci_name->name = args.value; | 361 | ci_name->name = args->value; |
322 | ci_name->len = args.valuelen; | 362 | ci_name->len = args->valuelen; |
323 | } | 363 | } |
324 | } | 364 | } |
365 | out_free: | ||
366 | kmem_free(args); | ||
325 | return rval; | 367 | return rval; |
326 | } | 368 | } |
327 | 369 | ||
@@ -338,38 +380,51 @@ xfs_dir_removename( | |||
338 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 380 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
339 | xfs_extlen_t total) /* bmap's total block count */ | 381 | xfs_extlen_t total) /* bmap's total block count */ |
340 | { | 382 | { |
341 | xfs_da_args_t args; | 383 | struct xfs_da_args *args; |
342 | int rval; | 384 | int rval; |
343 | int v; /* type-checking value */ | 385 | int v; /* type-checking value */ |
344 | 386 | ||
345 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 387 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
346 | XFS_STATS_INC(xs_dir_remove); | 388 | XFS_STATS_INC(xs_dir_remove); |
347 | 389 | ||
348 | memset(&args, 0, sizeof(xfs_da_args_t)); | 390 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
349 | args.name = name->name; | 391 | if (!args) |
350 | args.namelen = name->len; | 392 | return ENOMEM; |
351 | args.filetype = name->type; | 393 | |
352 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 394 | args->name = name->name; |
353 | args.inumber = ino; | 395 | args->namelen = name->len; |
354 | args.dp = dp; | 396 | args->filetype = name->type; |
355 | args.firstblock = first; | 397 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
356 | args.flist = flist; | 398 | args->inumber = ino; |
357 | args.total = total; | 399 | args->dp = dp; |
358 | args.whichfork = XFS_DATA_FORK; | 400 | args->firstblock = first; |
359 | args.trans = tp; | 401 | args->flist = flist; |
360 | 402 | args->total = total; | |
361 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 403 | args->whichfork = XFS_DATA_FORK; |
362 | rval = xfs_dir2_sf_removename(&args); | 404 | args->trans = tp; |
363 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 405 | |
364 | return rval; | 406 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
365 | else if (v) | 407 | rval = xfs_dir2_sf_removename(args); |
366 | rval = xfs_dir2_block_removename(&args); | 408 | goto out_free; |
367 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 409 | } |
368 | return rval; | 410 | |
369 | else if (v) | 411 | rval = xfs_dir2_isblock(tp, dp, &v); |
370 | rval = xfs_dir2_leaf_removename(&args); | 412 | if (rval) |
413 | goto out_free; | ||
414 | if (v) { | ||
415 | rval = xfs_dir2_block_removename(args); | ||
416 | goto out_free; | ||
417 | } | ||
418 | |||
419 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
420 | if (rval) | ||
421 | goto out_free; | ||
422 | if (v) | ||
423 | rval = xfs_dir2_leaf_removename(args); | ||
371 | else | 424 | else |
372 | rval = xfs_dir2_node_removename(&args); | 425 | rval = xfs_dir2_node_removename(args); |
426 | out_free: | ||
427 | kmem_free(args); | ||
373 | return rval; | 428 | return rval; |
374 | } | 429 | } |
375 | 430 | ||
@@ -386,40 +441,54 @@ xfs_dir_replace( | |||
386 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ | 441 | xfs_bmap_free_t *flist, /* bmap's freeblock list */ |
387 | xfs_extlen_t total) /* bmap's total block count */ | 442 | xfs_extlen_t total) /* bmap's total block count */ |
388 | { | 443 | { |
389 | xfs_da_args_t args; | 444 | struct xfs_da_args *args; |
390 | int rval; | 445 | int rval; |
391 | int v; /* type-checking value */ | 446 | int v; /* type-checking value */ |
392 | 447 | ||
393 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 448 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
394 | 449 | ||
395 | if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) | 450 | rval = xfs_dir_ino_validate(tp->t_mountp, inum); |
451 | if (rval) | ||
396 | return rval; | 452 | return rval; |
397 | 453 | ||
398 | memset(&args, 0, sizeof(xfs_da_args_t)); | 454 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
399 | args.name = name->name; | 455 | if (!args) |
400 | args.namelen = name->len; | 456 | return ENOMEM; |
401 | args.filetype = name->type; | 457 | |
402 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 458 | args->name = name->name; |
403 | args.inumber = inum; | 459 | args->namelen = name->len; |
404 | args.dp = dp; | 460 | args->filetype = name->type; |
405 | args.firstblock = first; | 461 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
406 | args.flist = flist; | 462 | args->inumber = inum; |
407 | args.total = total; | 463 | args->dp = dp; |
408 | args.whichfork = XFS_DATA_FORK; | 464 | args->firstblock = first; |
409 | args.trans = tp; | 465 | args->flist = flist; |
410 | 466 | args->total = total; | |
411 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 467 | args->whichfork = XFS_DATA_FORK; |
412 | rval = xfs_dir2_sf_replace(&args); | 468 | args->trans = tp; |
413 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 469 | |
414 | return rval; | 470 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
415 | else if (v) | 471 | rval = xfs_dir2_sf_replace(args); |
416 | rval = xfs_dir2_block_replace(&args); | 472 | goto out_free; |
417 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 473 | } |
418 | return rval; | 474 | |
419 | else if (v) | 475 | rval = xfs_dir2_isblock(tp, dp, &v); |
420 | rval = xfs_dir2_leaf_replace(&args); | 476 | if (rval) |
477 | goto out_free; | ||
478 | if (v) { | ||
479 | rval = xfs_dir2_block_replace(args); | ||
480 | goto out_free; | ||
481 | } | ||
482 | |||
483 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
484 | if (rval) | ||
485 | goto out_free; | ||
486 | if (v) | ||
487 | rval = xfs_dir2_leaf_replace(args); | ||
421 | else | 488 | else |
422 | rval = xfs_dir2_node_replace(&args); | 489 | rval = xfs_dir2_node_replace(args); |
490 | out_free: | ||
491 | kmem_free(args); | ||
423 | return rval; | 492 | return rval; |
424 | } | 493 | } |
425 | 494 | ||
@@ -434,7 +503,7 @@ xfs_dir_canenter( | |||
434 | struct xfs_name *name, /* name of entry to add */ | 503 | struct xfs_name *name, /* name of entry to add */ |
435 | uint resblks) | 504 | uint resblks) |
436 | { | 505 | { |
437 | xfs_da_args_t args; | 506 | struct xfs_da_args *args; |
438 | int rval; | 507 | int rval; |
439 | int v; /* type-checking value */ | 508 | int v; /* type-checking value */ |
440 | 509 | ||
@@ -443,29 +512,42 @@ xfs_dir_canenter( | |||
443 | 512 | ||
444 | ASSERT(S_ISDIR(dp->i_d.di_mode)); | 513 | ASSERT(S_ISDIR(dp->i_d.di_mode)); |
445 | 514 | ||
446 | memset(&args, 0, sizeof(xfs_da_args_t)); | 515 | args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); |
447 | args.name = name->name; | 516 | if (!args) |
448 | args.namelen = name->len; | 517 | return ENOMEM; |
449 | args.filetype = name->type; | 518 | |
450 | args.hashval = dp->i_mount->m_dirnameops->hashname(name); | 519 | args->name = name->name; |
451 | args.dp = dp; | 520 | args->namelen = name->len; |
452 | args.whichfork = XFS_DATA_FORK; | 521 | args->filetype = name->type; |
453 | args.trans = tp; | 522 | args->hashval = dp->i_mount->m_dirnameops->hashname(name); |
454 | args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | 523 | args->dp = dp; |
524 | args->whichfork = XFS_DATA_FORK; | ||
525 | args->trans = tp; | ||
526 | args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | | ||
455 | XFS_DA_OP_OKNOENT; | 527 | XFS_DA_OP_OKNOENT; |
456 | 528 | ||
457 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 529 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { |
458 | rval = xfs_dir2_sf_addname(&args); | 530 | rval = xfs_dir2_sf_addname(args); |
459 | else if ((rval = xfs_dir2_isblock(tp, dp, &v))) | 531 | goto out_free; |
460 | return rval; | 532 | } |
461 | else if (v) | 533 | |
462 | rval = xfs_dir2_block_addname(&args); | 534 | rval = xfs_dir2_isblock(tp, dp, &v); |
463 | else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) | 535 | if (rval) |
464 | return rval; | 536 | goto out_free; |
465 | else if (v) | 537 | if (v) { |
466 | rval = xfs_dir2_leaf_addname(&args); | 538 | rval = xfs_dir2_block_addname(args); |
539 | goto out_free; | ||
540 | } | ||
541 | |||
542 | rval = xfs_dir2_isleaf(tp, dp, &v); | ||
543 | if (rval) | ||
544 | goto out_free; | ||
545 | if (v) | ||
546 | rval = xfs_dir2_leaf_addname(args); | ||
467 | else | 547 | else |
468 | rval = xfs_dir2_node_addname(&args); | 548 | rval = xfs_dir2_node_addname(args); |
549 | out_free: | ||
550 | kmem_free(args); | ||
469 | return rval; | 551 | return rval; |
470 | } | 552 | } |
471 | 553 | ||
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 90cdbf4b5f19..4f6a38cb83a4 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
@@ -89,13 +89,14 @@ xfs_dir3_block_read_verify( | |||
89 | { | 89 | { |
90 | struct xfs_mount *mp = bp->b_target->bt_mount; | 90 | struct xfs_mount *mp = bp->b_target->bt_mount; |
91 | 91 | ||
92 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 92 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
93 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 93 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) |
94 | XFS_DIR3_DATA_CRC_OFF)) || | 94 | xfs_buf_ioerror(bp, EFSBADCRC); |
95 | !xfs_dir3_block_verify(bp)) { | 95 | else if (!xfs_dir3_block_verify(bp)) |
96 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
97 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 96 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
98 | } | 97 | |
98 | if (bp->b_error) | ||
99 | xfs_verifier_error(bp); | ||
99 | } | 100 | } |
100 | 101 | ||
101 | static void | 102 | static void |
@@ -107,8 +108,8 @@ xfs_dir3_block_write_verify( | |||
107 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 108 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
108 | 109 | ||
109 | if (!xfs_dir3_block_verify(bp)) { | 110 | if (!xfs_dir3_block_verify(bp)) { |
110 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
111 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 111 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
112 | xfs_verifier_error(bp); | ||
112 | return; | 113 | return; |
113 | } | 114 | } |
114 | 115 | ||
@@ -118,7 +119,7 @@ xfs_dir3_block_write_verify( | |||
118 | if (bip) | 119 | if (bip) |
119 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 120 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
120 | 121 | ||
121 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); | 122 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); |
122 | } | 123 | } |
123 | 124 | ||
124 | const struct xfs_buf_ops xfs_dir3_block_buf_ops = { | 125 | const struct xfs_buf_ops xfs_dir3_block_buf_ops = { |
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 70acff4ee173..afa4ad523f3f 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c | |||
@@ -241,7 +241,6 @@ static void | |||
241 | xfs_dir3_data_reada_verify( | 241 | xfs_dir3_data_reada_verify( |
242 | struct xfs_buf *bp) | 242 | struct xfs_buf *bp) |
243 | { | 243 | { |
244 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
245 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; | 244 | struct xfs_dir2_data_hdr *hdr = bp->b_addr; |
246 | 245 | ||
247 | switch (hdr->magic) { | 246 | switch (hdr->magic) { |
@@ -255,8 +254,8 @@ xfs_dir3_data_reada_verify( | |||
255 | xfs_dir3_data_verify(bp); | 254 | xfs_dir3_data_verify(bp); |
256 | return; | 255 | return; |
257 | default: | 256 | default: |
258 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr); | ||
259 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 257 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
258 | xfs_verifier_error(bp); | ||
260 | break; | 259 | break; |
261 | } | 260 | } |
262 | } | 261 | } |
@@ -267,13 +266,14 @@ xfs_dir3_data_read_verify( | |||
267 | { | 266 | { |
268 | struct xfs_mount *mp = bp->b_target->bt_mount; | 267 | struct xfs_mount *mp = bp->b_target->bt_mount; |
269 | 268 | ||
270 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 269 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
271 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 270 | !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) |
272 | XFS_DIR3_DATA_CRC_OFF)) || | 271 | xfs_buf_ioerror(bp, EFSBADCRC); |
273 | !xfs_dir3_data_verify(bp)) { | 272 | else if (!xfs_dir3_data_verify(bp)) |
274 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
275 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 273 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
276 | } | 274 | |
275 | if (bp->b_error) | ||
276 | xfs_verifier_error(bp); | ||
277 | } | 277 | } |
278 | 278 | ||
279 | static void | 279 | static void |
@@ -285,8 +285,8 @@ xfs_dir3_data_write_verify( | |||
285 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 285 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
286 | 286 | ||
287 | if (!xfs_dir3_data_verify(bp)) { | 287 | if (!xfs_dir3_data_verify(bp)) { |
288 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
289 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 288 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
289 | xfs_verifier_error(bp); | ||
290 | return; | 290 | return; |
291 | } | 291 | } |
292 | 292 | ||
@@ -296,7 +296,7 @@ xfs_dir3_data_write_verify( | |||
296 | if (bip) | 296 | if (bip) |
297 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 297 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
298 | 298 | ||
299 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); | 299 | xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF); |
300 | } | 300 | } |
301 | 301 | ||
302 | const struct xfs_buf_ops xfs_dir3_data_buf_ops = { | 302 | const struct xfs_buf_ops xfs_dir3_data_buf_ops = { |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index ae47ec6e16c4..d36e97df1187 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
@@ -179,13 +179,14 @@ __read_verify( | |||
179 | { | 179 | { |
180 | struct xfs_mount *mp = bp->b_target->bt_mount; | 180 | struct xfs_mount *mp = bp->b_target->bt_mount; |
181 | 181 | ||
182 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 182 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
183 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 183 | !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) |
184 | XFS_DIR3_LEAF_CRC_OFF)) || | 184 | xfs_buf_ioerror(bp, EFSBADCRC); |
185 | !xfs_dir3_leaf_verify(bp, magic)) { | 185 | else if (!xfs_dir3_leaf_verify(bp, magic)) |
186 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
187 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 186 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
188 | } | 187 | |
188 | if (bp->b_error) | ||
189 | xfs_verifier_error(bp); | ||
189 | } | 190 | } |
190 | 191 | ||
191 | static void | 192 | static void |
@@ -198,8 +199,8 @@ __write_verify( | |||
198 | struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; | 199 | struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr; |
199 | 200 | ||
200 | if (!xfs_dir3_leaf_verify(bp, magic)) { | 201 | if (!xfs_dir3_leaf_verify(bp, magic)) { |
201 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
202 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 202 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
203 | xfs_verifier_error(bp); | ||
203 | return; | 204 | return; |
204 | } | 205 | } |
205 | 206 | ||
@@ -209,7 +210,7 @@ __write_verify( | |||
209 | if (bip) | 210 | if (bip) |
210 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); | 211 | hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); |
211 | 212 | ||
212 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); | 213 | xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF); |
213 | } | 214 | } |
214 | 215 | ||
215 | static void | 216 | static void |
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 48c7d18f68c3..cb434d732681 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c | |||
@@ -115,13 +115,14 @@ xfs_dir3_free_read_verify( | |||
115 | { | 115 | { |
116 | struct xfs_mount *mp = bp->b_target->bt_mount; | 116 | struct xfs_mount *mp = bp->b_target->bt_mount; |
117 | 117 | ||
118 | if ((xfs_sb_version_hascrc(&mp->m_sb) && | 118 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
119 | !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 119 | !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) |
120 | XFS_DIR3_FREE_CRC_OFF)) || | 120 | xfs_buf_ioerror(bp, EFSBADCRC); |
121 | !xfs_dir3_free_verify(bp)) { | 121 | else if (!xfs_dir3_free_verify(bp)) |
122 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
123 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 122 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
124 | } | 123 | |
124 | if (bp->b_error) | ||
125 | xfs_verifier_error(bp); | ||
125 | } | 126 | } |
126 | 127 | ||
127 | static void | 128 | static void |
@@ -133,8 +134,8 @@ xfs_dir3_free_write_verify( | |||
133 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; | 134 | struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr; |
134 | 135 | ||
135 | if (!xfs_dir3_free_verify(bp)) { | 136 | if (!xfs_dir3_free_verify(bp)) { |
136 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
137 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 137 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
138 | xfs_verifier_error(bp); | ||
138 | return; | 139 | return; |
139 | } | 140 | } |
140 | 141 | ||
@@ -144,7 +145,7 @@ xfs_dir3_free_write_verify( | |||
144 | if (bip) | 145 | if (bip) |
145 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); | 146 | hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); |
146 | 147 | ||
147 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); | 148 | xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF); |
148 | } | 149 | } |
149 | 150 | ||
150 | const struct xfs_buf_ops xfs_dir3_free_buf_ops = { | 151 | const struct xfs_buf_ops xfs_dir3_free_buf_ops = { |
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 7aeb4c895b32..868b19f096bf 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c | |||
@@ -615,7 +615,7 @@ xfs_qm_dqread( | |||
615 | 615 | ||
616 | if (flags & XFS_QMOPT_DQALLOC) { | 616 | if (flags & XFS_QMOPT_DQALLOC) { |
617 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); | 617 | tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); |
618 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, | 618 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc, |
619 | XFS_QM_DQALLOC_SPACE_RES(mp), 0); | 619 | XFS_QM_DQALLOC_SPACE_RES(mp), 0); |
620 | if (error) | 620 | if (error) |
621 | goto error1; | 621 | goto error1; |
diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c index d401457d2f25..610da8177737 100644 --- a/fs/xfs/xfs_dquot_buf.c +++ b/fs/xfs/xfs_dquot_buf.c | |||
@@ -257,10 +257,13 @@ xfs_dquot_buf_read_verify( | |||
257 | { | 257 | { |
258 | struct xfs_mount *mp = bp->b_target->bt_mount; | 258 | struct xfs_mount *mp = bp->b_target->bt_mount; |
259 | 259 | ||
260 | if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { | 260 | if (!xfs_dquot_buf_verify_crc(mp, bp)) |
261 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 261 | xfs_buf_ioerror(bp, EFSBADCRC); |
262 | else if (!xfs_dquot_buf_verify(mp, bp)) | ||
262 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 263 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
263 | } | 264 | |
265 | if (bp->b_error) | ||
266 | xfs_verifier_error(bp); | ||
264 | } | 267 | } |
265 | 268 | ||
266 | /* | 269 | /* |
@@ -275,8 +278,8 @@ xfs_dquot_buf_write_verify( | |||
275 | struct xfs_mount *mp = bp->b_target->bt_mount; | 278 | struct xfs_mount *mp = bp->b_target->bt_mount; |
276 | 279 | ||
277 | if (!xfs_dquot_buf_verify(mp, bp)) { | 280 | if (!xfs_dquot_buf_verify(mp, bp)) { |
278 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
279 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 281 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
282 | xfs_verifier_error(bp); | ||
280 | return; | 283 | return; |
281 | } | 284 | } |
282 | } | 285 | } |
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 9995b807d627..edac5b057d28 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c | |||
@@ -156,7 +156,7 @@ xfs_error_report( | |||
156 | { | 156 | { |
157 | if (level <= xfs_error_level) { | 157 | if (level <= xfs_error_level) { |
158 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, | 158 | xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, |
159 | "Internal error %s at line %d of file %s. Caller 0x%p", | 159 | "Internal error %s at line %d of file %s. Caller %pF", |
160 | tag, linenum, filename, ra); | 160 | tag, linenum, filename, ra); |
161 | 161 | ||
162 | xfs_stack_trace(); | 162 | xfs_stack_trace(); |
@@ -178,3 +178,28 @@ xfs_corruption_error( | |||
178 | xfs_error_report(tag, level, mp, filename, linenum, ra); | 178 | xfs_error_report(tag, level, mp, filename, linenum, ra); |
179 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); | 179 | xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair"); |
180 | } | 180 | } |
181 | |||
182 | /* | ||
183 | * Warnings specifically for verifier errors. Differentiate CRC vs. invalid | ||
184 | * values, and omit the stack trace unless the error level is tuned high. | ||
185 | */ | ||
186 | void | ||
187 | xfs_verifier_error( | ||
188 | struct xfs_buf *bp) | ||
189 | { | ||
190 | struct xfs_mount *mp = bp->b_target->bt_mount; | ||
191 | |||
192 | xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", | ||
193 | bp->b_error == EFSBADCRC ? "CRC error" : "corruption", | ||
194 | __return_address, bp->b_bn); | ||
195 | |||
196 | xfs_alert(mp, "Unmount and run xfs_repair"); | ||
197 | |||
198 | if (xfs_error_level >= XFS_ERRLEVEL_LOW) { | ||
199 | xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); | ||
200 | xfs_hex_dump(xfs_buf_offset(bp, 0), 64); | ||
201 | } | ||
202 | |||
203 | if (xfs_error_level >= XFS_ERRLEVEL_HIGH) | ||
204 | xfs_stack_trace(); | ||
205 | } | ||
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 079a367f44ee..c1c57d4a4b5d 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h | |||
@@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp, | |||
34 | extern void xfs_corruption_error(const char *tag, int level, | 34 | extern void xfs_corruption_error(const char *tag, int level, |
35 | struct xfs_mount *mp, void *p, const char *filename, | 35 | struct xfs_mount *mp, void *p, const char *filename, |
36 | int linenum, inst_t *ra); | 36 | int linenum, inst_t *ra); |
37 | extern void xfs_verifier_error(struct xfs_buf *bp); | ||
37 | 38 | ||
38 | #define XFS_ERROR_REPORT(e, lvl, mp) \ | 39 | #define XFS_ERROR_REPORT(e, lvl, mp) \ |
39 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) | 40 | xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 64b48eade91d..003c0051b62f 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
@@ -823,7 +823,8 @@ xfs_file_fallocate( | |||
823 | 823 | ||
824 | if (!S_ISREG(inode->i_mode)) | 824 | if (!S_ISREG(inode->i_mode)) |
825 | return -EINVAL; | 825 | return -EINVAL; |
826 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) | 826 | if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | |
827 | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) | ||
827 | return -EOPNOTSUPP; | 828 | return -EOPNOTSUPP; |
828 | 829 | ||
829 | xfs_ilock(ip, XFS_IOLOCK_EXCL); | 830 | xfs_ilock(ip, XFS_IOLOCK_EXCL); |
@@ -831,6 +832,20 @@ xfs_file_fallocate( | |||
831 | error = xfs_free_file_space(ip, offset, len); | 832 | error = xfs_free_file_space(ip, offset, len); |
832 | if (error) | 833 | if (error) |
833 | goto out_unlock; | 834 | goto out_unlock; |
835 | } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { | ||
836 | unsigned blksize_mask = (1 << inode->i_blkbits) - 1; | ||
837 | |||
838 | if (offset & blksize_mask || len & blksize_mask) { | ||
839 | error = -EINVAL; | ||
840 | goto out_unlock; | ||
841 | } | ||
842 | |||
843 | ASSERT(offset + len < i_size_read(inode)); | ||
844 | new_size = i_size_read(inode) - len; | ||
845 | |||
846 | error = xfs_collapse_file_space(ip, offset, len); | ||
847 | if (error) | ||
848 | goto out_unlock; | ||
834 | } else { | 849 | } else { |
835 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 850 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
836 | offset + len > i_size_read(inode)) { | 851 | offset + len > i_size_read(inode)) { |
@@ -840,8 +855,11 @@ xfs_file_fallocate( | |||
840 | goto out_unlock; | 855 | goto out_unlock; |
841 | } | 856 | } |
842 | 857 | ||
843 | error = xfs_alloc_file_space(ip, offset, len, | 858 | if (mode & FALLOC_FL_ZERO_RANGE) |
844 | XFS_BMAPI_PREALLOC); | 859 | error = xfs_zero_file_space(ip, offset, len); |
860 | else | ||
861 | error = xfs_alloc_file_space(ip, offset, len, | ||
862 | XFS_BMAPI_PREALLOC); | ||
845 | if (error) | 863 | if (error) |
846 | goto out_unlock; | 864 | goto out_unlock; |
847 | } | 865 | } |
@@ -859,7 +877,7 @@ xfs_file_fallocate( | |||
859 | if (ip->i_d.di_mode & S_IXGRP) | 877 | if (ip->i_d.di_mode & S_IXGRP) |
860 | ip->i_d.di_mode &= ~S_ISGID; | 878 | ip->i_d.di_mode &= ~S_ISGID; |
861 | 879 | ||
862 | if (!(mode & FALLOC_FL_PUNCH_HOLE)) | 880 | if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) |
863 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; | 881 | ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; |
864 | 882 | ||
865 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); | 883 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); |
@@ -1465,6 +1483,7 @@ const struct file_operations xfs_dir_file_operations = { | |||
1465 | 1483 | ||
1466 | static const struct vm_operations_struct xfs_file_vm_ops = { | 1484 | static const struct vm_operations_struct xfs_file_vm_ops = { |
1467 | .fault = filemap_fault, | 1485 | .fault = filemap_fault, |
1486 | .map_pages = filemap_map_pages, | ||
1468 | .page_mkwrite = xfs_vm_page_mkwrite, | 1487 | .page_mkwrite = xfs_vm_page_mkwrite, |
1469 | .remap_pages = generic_file_remap_pages, | 1488 | .remap_pages = generic_file_remap_pages, |
1470 | }; | 1489 | }; |
diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index b6ab5a3cfa12..9898f31d05d8 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h | |||
@@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr { | |||
145 | __be64 sl_lsn; | 145 | __be64 sl_lsn; |
146 | }; | 146 | }; |
147 | 147 | ||
148 | #define XFS_SYMLINK_CRC_OFF offsetof(struct xfs_dsymlink_hdr, sl_crc) | ||
149 | |||
148 | /* | 150 | /* |
149 | * The maximum pathlen is 1024 bytes. Since the minimum file system | 151 | * The maximum pathlen is 1024 bytes. Since the minimum file system |
150 | * blocksize is 512 bytes, we can get a max of 3 extents back from | 152 | * blocksize is 512 bytes, we can get a max of 3 extents back from |
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 5d7f105a1c82..8f711db61a0c 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c | |||
@@ -363,6 +363,18 @@ xfs_ialloc_ag_alloc( | |||
363 | args.minleft = args.mp->m_in_maxlevels - 1; | 363 | args.minleft = args.mp->m_in_maxlevels - 1; |
364 | if ((error = xfs_alloc_vextent(&args))) | 364 | if ((error = xfs_alloc_vextent(&args))) |
365 | return error; | 365 | return error; |
366 | |||
367 | /* | ||
368 | * This request might have dirtied the transaction if the AG can | ||
369 | * satisfy the request, but the exact block was not available. | ||
370 | * If the allocation did fail, subsequent requests will relax | ||
371 | * the exact agbno requirement and increase the alignment | ||
372 | * instead. It is critical that the total size of the request | ||
373 | * (len + alignment + slop) does not increase from this point | ||
374 | * on, so reset minalignslop to ensure it is not included in | ||
375 | * subsequent requests. | ||
376 | */ | ||
377 | args.minalignslop = 0; | ||
366 | } else | 378 | } else |
367 | args.fsbno = NULLFSBLOCK; | 379 | args.fsbno = NULLFSBLOCK; |
368 | 380 | ||
@@ -1568,18 +1580,17 @@ xfs_agi_read_verify( | |||
1568 | struct xfs_buf *bp) | 1580 | struct xfs_buf *bp) |
1569 | { | 1581 | { |
1570 | struct xfs_mount *mp = bp->b_target->bt_mount; | 1582 | struct xfs_mount *mp = bp->b_target->bt_mount; |
1571 | int agi_ok = 1; | ||
1572 | |||
1573 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
1574 | agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | ||
1575 | offsetof(struct xfs_agi, agi_crc)); | ||
1576 | agi_ok = agi_ok && xfs_agi_verify(bp); | ||
1577 | 1583 | ||
1578 | if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, | 1584 | if (xfs_sb_version_hascrc(&mp->m_sb) && |
1579 | XFS_RANDOM_IALLOC_READ_AGI))) { | 1585 | !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) |
1580 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | 1586 | xfs_buf_ioerror(bp, EFSBADCRC); |
1587 | else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, | ||
1588 | XFS_ERRTAG_IALLOC_READ_AGI, | ||
1589 | XFS_RANDOM_IALLOC_READ_AGI)) | ||
1581 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 1590 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
1582 | } | 1591 | |
1592 | if (bp->b_error) | ||
1593 | xfs_verifier_error(bp); | ||
1583 | } | 1594 | } |
1584 | 1595 | ||
1585 | static void | 1596 | static void |
@@ -1590,8 +1601,8 @@ xfs_agi_write_verify( | |||
1590 | struct xfs_buf_log_item *bip = bp->b_fspriv; | 1601 | struct xfs_buf_log_item *bip = bp->b_fspriv; |
1591 | 1602 | ||
1592 | if (!xfs_agi_verify(bp)) { | 1603 | if (!xfs_agi_verify(bp)) { |
1593 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
1594 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 1604 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
1605 | xfs_verifier_error(bp); | ||
1595 | return; | 1606 | return; |
1596 | } | 1607 | } |
1597 | 1608 | ||
@@ -1600,8 +1611,7 @@ xfs_agi_write_verify( | |||
1600 | 1611 | ||
1601 | if (bip) | 1612 | if (bip) |
1602 | XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 1613 | XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
1603 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 1614 | xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF); |
1604 | offsetof(struct xfs_agi, agi_crc)); | ||
1605 | } | 1615 | } |
1606 | 1616 | ||
1607 | const struct xfs_buf_ops xfs_agi_buf_ops = { | 1617 | const struct xfs_buf_ops xfs_agi_buf_ops = { |
diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index c8fa5bbb36de..7e309b11e87d 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c | |||
@@ -243,12 +243,14 @@ static void | |||
243 | xfs_inobt_read_verify( | 243 | xfs_inobt_read_verify( |
244 | struct xfs_buf *bp) | 244 | struct xfs_buf *bp) |
245 | { | 245 | { |
246 | if (!(xfs_btree_sblock_verify_crc(bp) && | 246 | if (!xfs_btree_sblock_verify_crc(bp)) |
247 | xfs_inobt_verify(bp))) { | 247 | xfs_buf_ioerror(bp, EFSBADCRC); |
248 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 248 | else if (!xfs_inobt_verify(bp)) |
249 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
250 | bp->b_target->bt_mount, bp->b_addr); | ||
251 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 249 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
250 | |||
251 | if (bp->b_error) { | ||
252 | trace_xfs_btree_corrupt(bp, _RET_IP_); | ||
253 | xfs_verifier_error(bp); | ||
252 | } | 254 | } |
253 | } | 255 | } |
254 | 256 | ||
@@ -258,9 +260,9 @@ xfs_inobt_write_verify( | |||
258 | { | 260 | { |
259 | if (!xfs_inobt_verify(bp)) { | 261 | if (!xfs_inobt_verify(bp)) { |
260 | trace_xfs_btree_corrupt(bp, _RET_IP_); | 262 | trace_xfs_btree_corrupt(bp, _RET_IP_); |
261 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
262 | bp->b_target->bt_mount, bp->b_addr); | ||
263 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 263 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
264 | xfs_verifier_error(bp); | ||
265 | return; | ||
264 | } | 266 | } |
265 | xfs_btree_sblock_calc_crc(bp); | 267 | xfs_btree_sblock_calc_crc(bp); |
266 | 268 | ||
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 3a137e9f9a7d..5e7a38fa6ee6 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c | |||
@@ -42,7 +42,6 @@ | |||
42 | #include "xfs_bmap_util.h" | 42 | #include "xfs_bmap_util.h" |
43 | #include "xfs_error.h" | 43 | #include "xfs_error.h" |
44 | #include "xfs_quota.h" | 44 | #include "xfs_quota.h" |
45 | #include "xfs_dinode.h" | ||
46 | #include "xfs_filestream.h" | 45 | #include "xfs_filestream.h" |
47 | #include "xfs_cksum.h" | 46 | #include "xfs_cksum.h" |
48 | #include "xfs_trace.h" | 47 | #include "xfs_trace.h" |
@@ -62,6 +61,8 @@ kmem_zone_t *xfs_inode_zone; | |||
62 | 61 | ||
63 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); | 62 | STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); |
64 | 63 | ||
64 | STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *); | ||
65 | |||
65 | /* | 66 | /* |
66 | * helper function to extract extent size hint from inode | 67 | * helper function to extract extent size hint from inode |
67 | */ | 68 | */ |
@@ -1115,7 +1116,7 @@ xfs_bumplink( | |||
1115 | { | 1116 | { |
1116 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); | 1117 | xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); |
1117 | 1118 | ||
1118 | ASSERT(ip->i_d.di_nlink > 0); | 1119 | ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE)); |
1119 | ip->i_d.di_nlink++; | 1120 | ip->i_d.di_nlink++; |
1120 | inc_nlink(VFS_I(ip)); | 1121 | inc_nlink(VFS_I(ip)); |
1121 | if ((ip->i_d.di_version == 1) && | 1122 | if ((ip->i_d.di_version == 1) && |
@@ -1165,10 +1166,7 @@ xfs_create( | |||
1165 | if (XFS_FORCED_SHUTDOWN(mp)) | 1166 | if (XFS_FORCED_SHUTDOWN(mp)) |
1166 | return XFS_ERROR(EIO); | 1167 | return XFS_ERROR(EIO); |
1167 | 1168 | ||
1168 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 1169 | prid = xfs_get_initial_prid(dp); |
1169 | prid = xfs_get_projid(dp); | ||
1170 | else | ||
1171 | prid = XFS_PROJID_DEFAULT; | ||
1172 | 1170 | ||
1173 | /* | 1171 | /* |
1174 | * Make sure that we have allocated dquot(s) on disk. | 1172 | * Make sure that we have allocated dquot(s) on disk. |
@@ -1333,6 +1331,113 @@ xfs_create( | |||
1333 | } | 1331 | } |
1334 | 1332 | ||
1335 | int | 1333 | int |
1334 | xfs_create_tmpfile( | ||
1335 | struct xfs_inode *dp, | ||
1336 | struct dentry *dentry, | ||
1337 | umode_t mode) | ||
1338 | { | ||
1339 | struct xfs_mount *mp = dp->i_mount; | ||
1340 | struct xfs_inode *ip = NULL; | ||
1341 | struct xfs_trans *tp = NULL; | ||
1342 | int error; | ||
1343 | uint cancel_flags = XFS_TRANS_RELEASE_LOG_RES; | ||
1344 | prid_t prid; | ||
1345 | struct xfs_dquot *udqp = NULL; | ||
1346 | struct xfs_dquot *gdqp = NULL; | ||
1347 | struct xfs_dquot *pdqp = NULL; | ||
1348 | struct xfs_trans_res *tres; | ||
1349 | uint resblks; | ||
1350 | |||
1351 | if (XFS_FORCED_SHUTDOWN(mp)) | ||
1352 | return XFS_ERROR(EIO); | ||
1353 | |||
1354 | prid = xfs_get_initial_prid(dp); | ||
1355 | |||
1356 | /* | ||
1357 | * Make sure that we have allocated dquot(s) on disk. | ||
1358 | */ | ||
1359 | error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), | ||
1360 | xfs_kgid_to_gid(current_fsgid()), prid, | ||
1361 | XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, | ||
1362 | &udqp, &gdqp, &pdqp); | ||
1363 | if (error) | ||
1364 | return error; | ||
1365 | |||
1366 | resblks = XFS_IALLOC_SPACE_RES(mp); | ||
1367 | tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE); | ||
1368 | |||
1369 | tres = &M_RES(mp)->tr_create_tmpfile; | ||
1370 | error = xfs_trans_reserve(tp, tres, resblks, 0); | ||
1371 | if (error == ENOSPC) { | ||
1372 | /* No space at all so try a "no-allocation" reservation */ | ||
1373 | resblks = 0; | ||
1374 | error = xfs_trans_reserve(tp, tres, 0, 0); | ||
1375 | } | ||
1376 | if (error) { | ||
1377 | cancel_flags = 0; | ||
1378 | goto out_trans_cancel; | ||
1379 | } | ||
1380 | |||
1381 | error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, | ||
1382 | pdqp, resblks, 1, 0); | ||
1383 | if (error) | ||
1384 | goto out_trans_cancel; | ||
1385 | |||
1386 | error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, | ||
1387 | prid, resblks > 0, &ip, NULL); | ||
1388 | if (error) { | ||
1389 | if (error == ENOSPC) | ||
1390 | goto out_trans_cancel; | ||
1391 | goto out_trans_abort; | ||
1392 | } | ||
1393 | |||
1394 | if (mp->m_flags & XFS_MOUNT_WSYNC) | ||
1395 | xfs_trans_set_sync(tp); | ||
1396 | |||
1397 | /* | ||
1398 | * Attach the dquot(s) to the inodes and modify them incore. | ||
1399 | * These ids of the inode couldn't have changed since the new | ||
1400 | * inode has been locked ever since it was created. | ||
1401 | */ | ||
1402 | xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); | ||
1403 | |||
1404 | ip->i_d.di_nlink--; | ||
1405 | d_tmpfile(dentry, VFS_I(ip)); | ||
1406 | error = xfs_iunlink(tp, ip); | ||
1407 | if (error) | ||
1408 | goto out_trans_abort; | ||
1409 | |||
1410 | error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); | ||
1411 | if (error) | ||
1412 | goto out_release_inode; | ||
1413 | |||
1414 | xfs_qm_dqrele(udqp); | ||
1415 | xfs_qm_dqrele(gdqp); | ||
1416 | xfs_qm_dqrele(pdqp); | ||
1417 | |||
1418 | return 0; | ||
1419 | |||
1420 | out_trans_abort: | ||
1421 | cancel_flags |= XFS_TRANS_ABORT; | ||
1422 | out_trans_cancel: | ||
1423 | xfs_trans_cancel(tp, cancel_flags); | ||
1424 | out_release_inode: | ||
1425 | /* | ||
1426 | * Wait until after the current transaction is aborted to | ||
1427 | * release the inode. This prevents recursive transactions | ||
1428 | * and deadlocks from xfs_inactive. | ||
1429 | */ | ||
1430 | if (ip) | ||
1431 | IRELE(ip); | ||
1432 | |||
1433 | xfs_qm_dqrele(udqp); | ||
1434 | xfs_qm_dqrele(gdqp); | ||
1435 | xfs_qm_dqrele(pdqp); | ||
1436 | |||
1437 | return error; | ||
1438 | } | ||
1439 | |||
1440 | int | ||
1336 | xfs_link( | 1441 | xfs_link( |
1337 | xfs_inode_t *tdp, | 1442 | xfs_inode_t *tdp, |
1338 | xfs_inode_t *sip, | 1443 | xfs_inode_t *sip, |
@@ -1397,6 +1502,12 @@ xfs_link( | |||
1397 | 1502 | ||
1398 | xfs_bmap_init(&free_list, &first_block); | 1503 | xfs_bmap_init(&free_list, &first_block); |
1399 | 1504 | ||
1505 | if (sip->i_d.di_nlink == 0) { | ||
1506 | error = xfs_iunlink_remove(tp, sip); | ||
1507 | if (error) | ||
1508 | goto abort_return; | ||
1509 | } | ||
1510 | |||
1400 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, | 1511 | error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino, |
1401 | &first_block, &free_list, resblks); | 1512 | &first_block, &free_list, resblks); |
1402 | if (error) | 1513 | if (error) |
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 65e2350f449c..396cc1fafd0d 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h | |||
@@ -20,6 +20,7 @@ | |||
20 | 20 | ||
21 | #include "xfs_inode_buf.h" | 21 | #include "xfs_inode_buf.h" |
22 | #include "xfs_inode_fork.h" | 22 | #include "xfs_inode_fork.h" |
23 | #include "xfs_dinode.h" | ||
23 | 24 | ||
24 | /* | 25 | /* |
25 | * Kernel only inode definitions | 26 | * Kernel only inode definitions |
@@ -192,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip, | |||
192 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); | 193 | ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff); |
193 | } | 194 | } |
194 | 195 | ||
196 | static inline prid_t | ||
197 | xfs_get_initial_prid(struct xfs_inode *dp) | ||
198 | { | ||
199 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | ||
200 | return xfs_get_projid(dp); | ||
201 | |||
202 | return XFS_PROJID_DEFAULT; | ||
203 | } | ||
204 | |||
195 | /* | 205 | /* |
196 | * In-core inode flags. | 206 | * In-core inode flags. |
197 | */ | 207 | */ |
@@ -323,6 +333,8 @@ int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, | |||
323 | struct xfs_inode **ipp, struct xfs_name *ci_name); | 333 | struct xfs_inode **ipp, struct xfs_name *ci_name); |
324 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, | 334 | int xfs_create(struct xfs_inode *dp, struct xfs_name *name, |
325 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); | 335 | umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); |
336 | int xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, | ||
337 | umode_t mode); | ||
326 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | 338 | int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, |
327 | struct xfs_inode *ip); | 339 | struct xfs_inode *ip); |
328 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 340 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 4fc9f39dd89e..24e993996bdc 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c | |||
@@ -102,8 +102,7 @@ xfs_inode_buf_verify( | |||
102 | } | 102 | } |
103 | 103 | ||
104 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 104 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
105 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, | 105 | xfs_verifier_error(bp); |
106 | mp, dip); | ||
107 | #ifdef DEBUG | 106 | #ifdef DEBUG |
108 | xfs_alert(mp, | 107 | xfs_alert(mp, |
109 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", | 108 | "bad inode magic/vsn daddr %lld #%d (magic=%x)", |
@@ -306,7 +305,7 @@ xfs_dinode_verify( | |||
306 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 305 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
307 | return false; | 306 | return false; |
308 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, | 307 | if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, |
309 | offsetof(struct xfs_dinode, di_crc))) | 308 | XFS_DINODE_CRC_OFF)) |
310 | return false; | 309 | return false; |
311 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) | 310 | if (be64_to_cpu(dip->di_ino) != ip->i_ino) |
312 | return false; | 311 | return false; |
@@ -327,7 +326,7 @@ xfs_dinode_calc_crc( | |||
327 | 326 | ||
328 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); | 327 | ASSERT(xfs_sb_version_hascrc(&mp->m_sb)); |
329 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, | 328 | crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, |
330 | offsetof(struct xfs_dinode, di_crc)); | 329 | XFS_DINODE_CRC_OFF); |
331 | dip->di_crc = xfs_end_cksum(crc); | 330 | dip->di_crc = xfs_end_cksum(crc); |
332 | } | 331 | } |
333 | 332 | ||
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 22d1cbea283d..3b80ebae05f5 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c | |||
@@ -128,7 +128,6 @@ xfs_iomap_write_direct( | |||
128 | xfs_fsblock_t firstfsb; | 128 | xfs_fsblock_t firstfsb; |
129 | xfs_extlen_t extsz, temp; | 129 | xfs_extlen_t extsz, temp; |
130 | int nimaps; | 130 | int nimaps; |
131 | int bmapi_flag; | ||
132 | int quota_flag; | 131 | int quota_flag; |
133 | int rt; | 132 | int rt; |
134 | xfs_trans_t *tp; | 133 | xfs_trans_t *tp; |
@@ -200,18 +199,15 @@ xfs_iomap_write_direct( | |||
200 | 199 | ||
201 | xfs_trans_ijoin(tp, ip, 0); | 200 | xfs_trans_ijoin(tp, ip, 0); |
202 | 201 | ||
203 | bmapi_flag = 0; | ||
204 | if (offset < XFS_ISIZE(ip) || extsz) | ||
205 | bmapi_flag |= XFS_BMAPI_PREALLOC; | ||
206 | |||
207 | /* | 202 | /* |
208 | * From this point onwards we overwrite the imap pointer that the | 203 | * From this point onwards we overwrite the imap pointer that the |
209 | * caller gave to us. | 204 | * caller gave to us. |
210 | */ | 205 | */ |
211 | xfs_bmap_init(&free_list, &firstfsb); | 206 | xfs_bmap_init(&free_list, &firstfsb); |
212 | nimaps = 1; | 207 | nimaps = 1; |
213 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, | 208 | error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, |
214 | &firstfsb, 0, imap, &nimaps, &free_list); | 209 | XFS_BMAPI_PREALLOC, &firstfsb, 0, |
210 | imap, &nimaps, &free_list); | ||
215 | if (error) | 211 | if (error) |
216 | goto out_bmap_cancel; | 212 | goto out_bmap_cancel; |
217 | 213 | ||
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 9ddfb8190ca1..89b07e43ca28 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include "xfs_da_btree.h" | 39 | #include "xfs_da_btree.h" |
40 | #include "xfs_dir2_priv.h" | 40 | #include "xfs_dir2_priv.h" |
41 | #include "xfs_dinode.h" | 41 | #include "xfs_dinode.h" |
42 | #include "xfs_trans_space.h" | ||
42 | 43 | ||
43 | #include <linux/capability.h> | 44 | #include <linux/capability.h> |
44 | #include <linux/xattr.h> | 45 | #include <linux/xattr.h> |
@@ -48,6 +49,18 @@ | |||
48 | #include <linux/fiemap.h> | 49 | #include <linux/fiemap.h> |
49 | #include <linux/slab.h> | 50 | #include <linux/slab.h> |
50 | 51 | ||
52 | /* | ||
53 | * Directories have different lock order w.r.t. mmap_sem compared to regular | ||
54 | * files. This is due to readdir potentially triggering page faults on a user | ||
55 | * buffer inside filldir(), and this happens with the ilock on the directory | ||
56 | * held. For regular files, the lock order is the other way around - the | ||
57 | * mmap_sem is taken during the page fault, and then we lock the ilock to do | ||
58 | * block mapping. Hence we need a different class for the directory ilock so | ||
59 | * that lockdep can tell them apart. | ||
60 | */ | ||
61 | static struct lock_class_key xfs_nondir_ilock_class; | ||
62 | static struct lock_class_key xfs_dir_ilock_class; | ||
63 | |||
51 | static int | 64 | static int |
52 | xfs_initxattrs( | 65 | xfs_initxattrs( |
53 | struct inode *inode, | 66 | struct inode *inode, |
@@ -1034,6 +1047,19 @@ xfs_vn_fiemap( | |||
1034 | return 0; | 1047 | return 0; |
1035 | } | 1048 | } |
1036 | 1049 | ||
1050 | STATIC int | ||
1051 | xfs_vn_tmpfile( | ||
1052 | struct inode *dir, | ||
1053 | struct dentry *dentry, | ||
1054 | umode_t mode) | ||
1055 | { | ||
1056 | int error; | ||
1057 | |||
1058 | error = xfs_create_tmpfile(XFS_I(dir), dentry, mode); | ||
1059 | |||
1060 | return -error; | ||
1061 | } | ||
1062 | |||
1037 | static const struct inode_operations xfs_inode_operations = { | 1063 | static const struct inode_operations xfs_inode_operations = { |
1038 | .get_acl = xfs_get_acl, | 1064 | .get_acl = xfs_get_acl, |
1039 | .set_acl = xfs_set_acl, | 1065 | .set_acl = xfs_set_acl, |
@@ -1072,6 +1098,7 @@ static const struct inode_operations xfs_dir_inode_operations = { | |||
1072 | .removexattr = generic_removexattr, | 1098 | .removexattr = generic_removexattr, |
1073 | .listxattr = xfs_vn_listxattr, | 1099 | .listxattr = xfs_vn_listxattr, |
1074 | .update_time = xfs_vn_update_time, | 1100 | .update_time = xfs_vn_update_time, |
1101 | .tmpfile = xfs_vn_tmpfile, | ||
1075 | }; | 1102 | }; |
1076 | 1103 | ||
1077 | static const struct inode_operations xfs_dir_ci_inode_operations = { | 1104 | static const struct inode_operations xfs_dir_ci_inode_operations = { |
@@ -1099,6 +1126,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { | |||
1099 | .removexattr = generic_removexattr, | 1126 | .removexattr = generic_removexattr, |
1100 | .listxattr = xfs_vn_listxattr, | 1127 | .listxattr = xfs_vn_listxattr, |
1101 | .update_time = xfs_vn_update_time, | 1128 | .update_time = xfs_vn_update_time, |
1129 | .tmpfile = xfs_vn_tmpfile, | ||
1102 | }; | 1130 | }; |
1103 | 1131 | ||
1104 | static const struct inode_operations xfs_symlink_inode_operations = { | 1132 | static const struct inode_operations xfs_symlink_inode_operations = { |
@@ -1191,6 +1219,7 @@ xfs_setup_inode( | |||
1191 | xfs_diflags_to_iflags(inode, ip); | 1219 | xfs_diflags_to_iflags(inode, ip); |
1192 | 1220 | ||
1193 | ip->d_ops = ip->i_mount->m_nondir_inode_ops; | 1221 | ip->d_ops = ip->i_mount->m_nondir_inode_ops; |
1222 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class); | ||
1194 | switch (inode->i_mode & S_IFMT) { | 1223 | switch (inode->i_mode & S_IFMT) { |
1195 | case S_IFREG: | 1224 | case S_IFREG: |
1196 | inode->i_op = &xfs_inode_operations; | 1225 | inode->i_op = &xfs_inode_operations; |
@@ -1198,6 +1227,7 @@ xfs_setup_inode( | |||
1198 | inode->i_mapping->a_ops = &xfs_address_space_operations; | 1227 | inode->i_mapping->a_ops = &xfs_address_space_operations; |
1199 | break; | 1228 | break; |
1200 | case S_IFDIR: | 1229 | case S_IFDIR: |
1230 | lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class); | ||
1201 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) | 1231 | if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb)) |
1202 | inode->i_op = &xfs_dir_ci_inode_operations; | 1232 | inode->i_op = &xfs_dir_ci_inode_operations; |
1203 | else | 1233 | else |
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9bb590acc0e..825249d2dfc1 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h | |||
@@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t; | |||
119 | #include "xfs_iops.h" | 119 | #include "xfs_iops.h" |
120 | #include "xfs_aops.h" | 120 | #include "xfs_aops.h" |
121 | #include "xfs_super.h" | 121 | #include "xfs_super.h" |
122 | #include "xfs_cksum.h" | ||
122 | #include "xfs_buf.h" | 123 | #include "xfs_buf.h" |
123 | #include "xfs_message.h" | 124 | #include "xfs_message.h" |
124 | 125 | ||
@@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t; | |||
178 | #define ENOATTR ENODATA /* Attribute not found */ | 179 | #define ENOATTR ENODATA /* Attribute not found */ |
179 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ | 180 | #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ |
180 | #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ | 181 | #define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ |
182 | #define EFSBADCRC EBADMSG /* Bad CRC detected */ | ||
181 | 183 | ||
182 | #define SYNCHRONIZE() barrier() | 184 | #define SYNCHRONIZE() barrier() |
183 | #define __return_address __builtin_return_address(0) | 185 | #define __return_address __builtin_return_address(0) |
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index b0f4ef77fa70..2c4004475e71 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h | |||
@@ -175,7 +175,7 @@ void xlog_iodone(struct xfs_buf *); | |||
175 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); | 175 | struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket); |
176 | void xfs_log_ticket_put(struct xlog_ticket *ticket); | 176 | void xfs_log_ticket_put(struct xlog_ticket *ticket); |
177 | 177 | ||
178 | int xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, | 178 | void xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, |
179 | xfs_lsn_t *commit_lsn, int flags); | 179 | xfs_lsn_t *commit_lsn, int flags); |
180 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); | 180 | bool xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); |
181 | 181 | ||
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index 4ef6fdbced78..7e5455391176 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c | |||
@@ -499,13 +499,6 @@ xlog_cil_push( | |||
499 | cil->xc_ctx = new_ctx; | 499 | cil->xc_ctx = new_ctx; |
500 | 500 | ||
501 | /* | 501 | /* |
502 | * mirror the new sequence into the cil structure so that we can do | ||
503 | * unlocked checks against the current sequence in log forces without | ||
504 | * risking deferencing a freed context pointer. | ||
505 | */ | ||
506 | cil->xc_current_sequence = new_ctx->sequence; | ||
507 | |||
508 | /* | ||
509 | * The switch is now done, so we can drop the context lock and move out | 502 | * The switch is now done, so we can drop the context lock and move out |
510 | * of a shared context. We can't just go straight to the commit record, | 503 | * of a shared context. We can't just go straight to the commit record, |
511 | * though - we need to synchronise with previous and future commits so | 504 | * though - we need to synchronise with previous and future commits so |
@@ -523,8 +516,15 @@ xlog_cil_push( | |||
523 | * Hence we need to add this context to the committing context list so | 516 | * Hence we need to add this context to the committing context list so |
524 | * that higher sequences will wait for us to write out a commit record | 517 | * that higher sequences will wait for us to write out a commit record |
525 | * before they do. | 518 | * before they do. |
519 | * | ||
520 | * xfs_log_force_lsn requires us to mirror the new sequence into the cil | ||
521 | * structure atomically with the addition of this sequence to the | ||
522 | * committing list. This also ensures that we can do unlocked checks | ||
523 | * against the current sequence in log forces without risking | ||
524 | * deferencing a freed context pointer. | ||
526 | */ | 525 | */ |
527 | spin_lock(&cil->xc_push_lock); | 526 | spin_lock(&cil->xc_push_lock); |
527 | cil->xc_current_sequence = new_ctx->sequence; | ||
528 | list_add(&ctx->committing, &cil->xc_committing); | 528 | list_add(&ctx->committing, &cil->xc_committing); |
529 | spin_unlock(&cil->xc_push_lock); | 529 | spin_unlock(&cil->xc_push_lock); |
530 | up_write(&cil->xc_ctx_lock); | 530 | up_write(&cil->xc_ctx_lock); |
@@ -662,8 +662,14 @@ xlog_cil_push_background( | |||
662 | 662 | ||
663 | } | 663 | } |
664 | 664 | ||
665 | /* | ||
666 | * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence | ||
667 | * number that is passed. When it returns, the work will be queued for | ||
668 | * @push_seq, but it won't be completed. The caller is expected to do any | ||
669 | * waiting for push_seq to complete if it is required. | ||
670 | */ | ||
665 | static void | 671 | static void |
666 | xlog_cil_push_foreground( | 672 | xlog_cil_push_now( |
667 | struct xlog *log, | 673 | struct xlog *log, |
668 | xfs_lsn_t push_seq) | 674 | xfs_lsn_t push_seq) |
669 | { | 675 | { |
@@ -688,10 +694,8 @@ xlog_cil_push_foreground( | |||
688 | } | 694 | } |
689 | 695 | ||
690 | cil->xc_push_seq = push_seq; | 696 | cil->xc_push_seq = push_seq; |
697 | queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); | ||
691 | spin_unlock(&cil->xc_push_lock); | 698 | spin_unlock(&cil->xc_push_lock); |
692 | |||
693 | /* do the push now */ | ||
694 | xlog_cil_push(log); | ||
695 | } | 699 | } |
696 | 700 | ||
697 | bool | 701 | bool |
@@ -721,7 +725,7 @@ xlog_cil_empty( | |||
721 | * background commit, returns without it held once background commits are | 725 | * background commit, returns without it held once background commits are |
722 | * allowed again. | 726 | * allowed again. |
723 | */ | 727 | */ |
724 | int | 728 | void |
725 | xfs_log_commit_cil( | 729 | xfs_log_commit_cil( |
726 | struct xfs_mount *mp, | 730 | struct xfs_mount *mp, |
727 | struct xfs_trans *tp, | 731 | struct xfs_trans *tp, |
@@ -767,7 +771,6 @@ xfs_log_commit_cil( | |||
767 | xlog_cil_push_background(log); | 771 | xlog_cil_push_background(log); |
768 | 772 | ||
769 | up_read(&cil->xc_ctx_lock); | 773 | up_read(&cil->xc_ctx_lock); |
770 | return 0; | ||
771 | } | 774 | } |
772 | 775 | ||
773 | /* | 776 | /* |
@@ -796,7 +799,8 @@ xlog_cil_force_lsn( | |||
796 | * xlog_cil_push() handles racing pushes for the same sequence, | 799 | * xlog_cil_push() handles racing pushes for the same sequence, |
797 | * so no need to deal with it here. | 800 | * so no need to deal with it here. |
798 | */ | 801 | */ |
799 | xlog_cil_push_foreground(log, sequence); | 802 | restart: |
803 | xlog_cil_push_now(log, sequence); | ||
800 | 804 | ||
801 | /* | 805 | /* |
802 | * See if we can find a previous sequence still committing. | 806 | * See if we can find a previous sequence still committing. |
@@ -804,7 +808,6 @@ xlog_cil_force_lsn( | |||
804 | * before allowing the force of push_seq to go ahead. Hence block | 808 | * before allowing the force of push_seq to go ahead. Hence block |
805 | * on commits for those as well. | 809 | * on commits for those as well. |
806 | */ | 810 | */ |
807 | restart: | ||
808 | spin_lock(&cil->xc_push_lock); | 811 | spin_lock(&cil->xc_push_lock); |
809 | list_for_each_entry(ctx, &cil->xc_committing, committing) { | 812 | list_for_each_entry(ctx, &cil->xc_committing, committing) { |
810 | if (ctx->sequence > sequence) | 813 | if (ctx->sequence > sequence) |
@@ -822,6 +825,28 @@ restart: | |||
822 | /* found it! */ | 825 | /* found it! */ |
823 | commit_lsn = ctx->commit_lsn; | 826 | commit_lsn = ctx->commit_lsn; |
824 | } | 827 | } |
828 | |||
829 | /* | ||
830 | * The call to xlog_cil_push_now() executes the push in the background. | ||
831 | * Hence by the time we have got here it our sequence may not have been | ||
832 | * pushed yet. This is true if the current sequence still matches the | ||
833 | * push sequence after the above wait loop and the CIL still contains | ||
834 | * dirty objects. | ||
835 | * | ||
836 | * When the push occurs, it will empty the CIL and | ||
837 | * atomically increment the currect sequence past the push sequence and | ||
838 | * move it into the committing list. Of course, if the CIL is clean at | ||
839 | * the time of the push, it won't have pushed the CIL at all, so in that | ||
840 | * case we should try the push for this sequence again from the start | ||
841 | * just in case. | ||
842 | */ | ||
843 | |||
844 | if (sequence == cil->xc_current_sequence && | ||
845 | !list_empty(&cil->xc_cil)) { | ||
846 | spin_unlock(&cil->xc_push_lock); | ||
847 | goto restart; | ||
848 | } | ||
849 | |||
825 | spin_unlock(&cil->xc_push_lock); | 850 | spin_unlock(&cil->xc_push_lock); |
826 | return commit_lsn; | 851 | return commit_lsn; |
827 | } | 852 | } |
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index f96c05669a9e..993cb19e7d39 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c | |||
@@ -314,6 +314,9 @@ reread: | |||
314 | error = bp->b_error; | 314 | error = bp->b_error; |
315 | if (loud) | 315 | if (loud) |
316 | xfs_warn(mp, "SB validate failed with error %d.", error); | 316 | xfs_warn(mp, "SB validate failed with error %d.", error); |
317 | /* bad CRC means corrupted metadata */ | ||
318 | if (error == EFSBADCRC) | ||
319 | error = EFSCORRUPTED; | ||
317 | goto release_buf; | 320 | goto release_buf; |
318 | } | 321 | } |
319 | 322 | ||
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index a6a76b2b6a85..ec5ca65c6211 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c | |||
@@ -842,7 +842,7 @@ xfs_growfs_rt_alloc( | |||
842 | /* | 842 | /* |
843 | * Reserve space & log for one extent added to the file. | 843 | * Reserve space & log for one extent added to the file. |
844 | */ | 844 | */ |
845 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, | 845 | error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, |
846 | resblks, 0); | 846 | resblks, 0); |
847 | if (error) | 847 | if (error) |
848 | goto error_cancel; | 848 | goto error_cancel; |
diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index 1e116794bb66..0c0e41bbe4e3 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c | |||
@@ -288,6 +288,7 @@ xfs_mount_validate_sb( | |||
288 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || | 288 | sbp->sb_inodelog < XFS_DINODE_MIN_LOG || |
289 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || | 289 | sbp->sb_inodelog > XFS_DINODE_MAX_LOG || |
290 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || | 290 | sbp->sb_inodesize != (1 << sbp->sb_inodelog) || |
291 | sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) || | ||
291 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || | 292 | (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog) || |
292 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || | 293 | (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE) || |
293 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || | 294 | (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE) || |
@@ -610,12 +611,11 @@ xfs_sb_read_verify( | |||
610 | XFS_SB_VERSION_5) || | 611 | XFS_SB_VERSION_5) || |
611 | dsb->sb_crc != 0)) { | 612 | dsb->sb_crc != 0)) { |
612 | 613 | ||
613 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 614 | if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { |
614 | offsetof(struct xfs_sb, sb_crc))) { | ||
615 | /* Only fail bad secondaries on a known V5 filesystem */ | 615 | /* Only fail bad secondaries on a known V5 filesystem */ |
616 | if (bp->b_bn == XFS_SB_DADDR || | 616 | if (bp->b_bn == XFS_SB_DADDR || |
617 | xfs_sb_version_hascrc(&mp->m_sb)) { | 617 | xfs_sb_version_hascrc(&mp->m_sb)) { |
618 | error = EFSCORRUPTED; | 618 | error = EFSBADCRC; |
619 | goto out_error; | 619 | goto out_error; |
620 | } | 620 | } |
621 | } | 621 | } |
@@ -624,10 +624,9 @@ xfs_sb_read_verify( | |||
624 | 624 | ||
625 | out_error: | 625 | out_error: |
626 | if (error) { | 626 | if (error) { |
627 | if (error == EFSCORRUPTED) | ||
628 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
629 | mp, bp->b_addr); | ||
630 | xfs_buf_ioerror(bp, error); | 627 | xfs_buf_ioerror(bp, error); |
628 | if (error == EFSCORRUPTED || error == EFSBADCRC) | ||
629 | xfs_verifier_error(bp); | ||
631 | } | 630 | } |
632 | } | 631 | } |
633 | 632 | ||
@@ -662,9 +661,8 @@ xfs_sb_write_verify( | |||
662 | 661 | ||
663 | error = xfs_sb_verify(bp, false); | 662 | error = xfs_sb_verify(bp, false); |
664 | if (error) { | 663 | if (error) { |
665 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, | ||
666 | mp, bp->b_addr); | ||
667 | xfs_buf_ioerror(bp, error); | 664 | xfs_buf_ioerror(bp, error); |
665 | xfs_verifier_error(bp); | ||
668 | return; | 666 | return; |
669 | } | 667 | } |
670 | 668 | ||
@@ -674,8 +672,7 @@ xfs_sb_write_verify( | |||
674 | if (bip) | 672 | if (bip) |
675 | XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 673 | XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
676 | 674 | ||
677 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 675 | xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF); |
678 | offsetof(struct xfs_sb, sb_crc)); | ||
679 | } | 676 | } |
680 | 677 | ||
681 | const struct xfs_buf_ops xfs_sb_buf_ops = { | 678 | const struct xfs_buf_ops xfs_sb_buf_ops = { |
diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 35061d4b614c..f7b2fe77c5a5 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h | |||
@@ -182,6 +182,8 @@ typedef struct xfs_sb { | |||
182 | /* must be padded to 64 bit alignment */ | 182 | /* must be padded to 64 bit alignment */ |
183 | } xfs_sb_t; | 183 | } xfs_sb_t; |
184 | 184 | ||
185 | #define XFS_SB_CRC_OFF offsetof(struct xfs_sb, sb_crc) | ||
186 | |||
185 | /* | 187 | /* |
186 | * Superblock - on disk version. Must match the in core version above. | 188 | * Superblock - on disk version. Must match the in core version above. |
187 | * Must be padded to 64 bit alignment. | 189 | * Must be padded to 64 bit alignment. |
diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h index 8c5035a13df1..4484e5151395 100644 --- a/fs/xfs/xfs_shared.h +++ b/fs/xfs/xfs_shared.h | |||
@@ -104,7 +104,8 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
104 | #define XFS_TRANS_SB_COUNT 41 | 104 | #define XFS_TRANS_SB_COUNT 41 |
105 | #define XFS_TRANS_CHECKPOINT 42 | 105 | #define XFS_TRANS_CHECKPOINT 42 |
106 | #define XFS_TRANS_ICREATE 43 | 106 | #define XFS_TRANS_ICREATE 43 |
107 | #define XFS_TRANS_TYPE_MAX 43 | 107 | #define XFS_TRANS_CREATE_TMPFILE 44 |
108 | #define XFS_TRANS_TYPE_MAX 44 | ||
108 | /* new transaction types need to be reflected in xfs_logprint(8) */ | 109 | /* new transaction types need to be reflected in xfs_logprint(8) */ |
109 | 110 | ||
110 | #define XFS_TRANS_TYPES \ | 111 | #define XFS_TRANS_TYPES \ |
@@ -112,6 +113,7 @@ extern const struct xfs_buf_ops xfs_symlink_buf_ops; | |||
112 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ | 113 | { XFS_TRANS_SETATTR_SIZE, "SETATTR_SIZE" }, \ |
113 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ | 114 | { XFS_TRANS_INACTIVE, "INACTIVE" }, \ |
114 | { XFS_TRANS_CREATE, "CREATE" }, \ | 115 | { XFS_TRANS_CREATE, "CREATE" }, \ |
116 | { XFS_TRANS_CREATE_TMPFILE, "CREATE_TMPFILE" }, \ | ||
115 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ | 117 | { XFS_TRANS_CREATE_TRUNC, "CREATE_TRUNC" }, \ |
116 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ | 118 | { XFS_TRANS_TRUNCATE_FILE, "TRUNCATE_FILE" }, \ |
117 | { XFS_TRANS_REMOVE, "REMOVE" }, \ | 119 | { XFS_TRANS_REMOVE, "REMOVE" }, \ |
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index d971f4932b5d..205376776377 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c | |||
@@ -996,7 +996,7 @@ xfs_fs_evict_inode( | |||
996 | 996 | ||
997 | trace_xfs_evict_inode(ip); | 997 | trace_xfs_evict_inode(ip); |
998 | 998 | ||
999 | truncate_inode_pages(&inode->i_data, 0); | 999 | truncate_inode_pages_final(&inode->i_data); |
1000 | clear_inode(inode); | 1000 | clear_inode(inode); |
1001 | XFS_STATS_INC(vn_rele); | 1001 | XFS_STATS_INC(vn_rele); |
1002 | XFS_STATS_INC(vn_remove); | 1002 | XFS_STATS_INC(vn_remove); |
@@ -1197,6 +1197,7 @@ xfs_fs_remount( | |||
1197 | char *p; | 1197 | char *p; |
1198 | int error; | 1198 | int error; |
1199 | 1199 | ||
1200 | sync_filesystem(sb); | ||
1200 | while ((p = strsep(&options, ",")) != NULL) { | 1201 | while ((p = strsep(&options, ",")) != NULL) { |
1201 | int token; | 1202 | int token; |
1202 | 1203 | ||
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 14e58f2c96bd..52979aa90986 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c | |||
@@ -80,6 +80,10 @@ xfs_readlink_bmap( | |||
80 | if (error) { | 80 | if (error) { |
81 | xfs_buf_ioerror_alert(bp, __func__); | 81 | xfs_buf_ioerror_alert(bp, __func__); |
82 | xfs_buf_relse(bp); | 82 | xfs_buf_relse(bp); |
83 | |||
84 | /* bad CRC means corrupted metadata */ | ||
85 | if (error == EFSBADCRC) | ||
86 | error = EFSCORRUPTED; | ||
83 | goto out; | 87 | goto out; |
84 | } | 88 | } |
85 | byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); | 89 | byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); |
@@ -208,10 +212,7 @@ xfs_symlink( | |||
208 | return XFS_ERROR(ENAMETOOLONG); | 212 | return XFS_ERROR(ENAMETOOLONG); |
209 | 213 | ||
210 | udqp = gdqp = NULL; | 214 | udqp = gdqp = NULL; |
211 | if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) | 215 | prid = xfs_get_initial_prid(dp); |
212 | prid = xfs_get_projid(dp); | ||
213 | else | ||
214 | prid = XFS_PROJID_DEFAULT; | ||
215 | 216 | ||
216 | /* | 217 | /* |
217 | * Make sure that we have allocated dquot(s) on disk. | 218 | * Make sure that we have allocated dquot(s) on disk. |
diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index bf59a2b45f8c..9b32052ff65e 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c | |||
@@ -133,12 +133,13 @@ xfs_symlink_read_verify( | |||
133 | if (!xfs_sb_version_hascrc(&mp->m_sb)) | 133 | if (!xfs_sb_version_hascrc(&mp->m_sb)) |
134 | return; | 134 | return; |
135 | 135 | ||
136 | if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), | 136 | if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) |
137 | offsetof(struct xfs_dsymlink_hdr, sl_crc)) || | 137 | xfs_buf_ioerror(bp, EFSBADCRC); |
138 | !xfs_symlink_verify(bp)) { | 138 | else if (!xfs_symlink_verify(bp)) |
139 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
140 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 139 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
141 | } | 140 | |
141 | if (bp->b_error) | ||
142 | xfs_verifier_error(bp); | ||
142 | } | 143 | } |
143 | 144 | ||
144 | static void | 145 | static void |
@@ -153,8 +154,8 @@ xfs_symlink_write_verify( | |||
153 | return; | 154 | return; |
154 | 155 | ||
155 | if (!xfs_symlink_verify(bp)) { | 156 | if (!xfs_symlink_verify(bp)) { |
156 | XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); | ||
157 | xfs_buf_ioerror(bp, EFSCORRUPTED); | 157 | xfs_buf_ioerror(bp, EFSCORRUPTED); |
158 | xfs_verifier_error(bp); | ||
158 | return; | 159 | return; |
159 | } | 160 | } |
160 | 161 | ||
@@ -162,8 +163,7 @@ xfs_symlink_write_verify( | |||
162 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; | 163 | struct xfs_dsymlink_hdr *dsl = bp->b_addr; |
163 | dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); | 164 | dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn); |
164 | } | 165 | } |
165 | xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), | 166 | xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF); |
166 | offsetof(struct xfs_dsymlink_hdr, sl_crc)); | ||
167 | } | 167 | } |
168 | 168 | ||
169 | const struct xfs_buf_ops xfs_symlink_buf_ops = { | 169 | const struct xfs_buf_ops xfs_symlink_buf_ops = { |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 425dfa45b9a0..a4ae41c179a8 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
@@ -603,6 +603,7 @@ DEFINE_INODE_EVENT(xfs_readlink); | |||
603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); | 603 | DEFINE_INODE_EVENT(xfs_inactive_symlink); |
604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); | 604 | DEFINE_INODE_EVENT(xfs_alloc_file_space); |
605 | DEFINE_INODE_EVENT(xfs_free_file_space); | 605 | DEFINE_INODE_EVENT(xfs_free_file_space); |
606 | DEFINE_INODE_EVENT(xfs_collapse_file_space); | ||
606 | DEFINE_INODE_EVENT(xfs_readdir); | 607 | DEFINE_INODE_EVENT(xfs_readdir); |
607 | #ifdef CONFIG_XFS_POSIX_ACL | 608 | #ifdef CONFIG_XFS_POSIX_ACL |
608 | DEFINE_INODE_EVENT(xfs_get_acl); | 609 | DEFINE_INODE_EVENT(xfs_get_acl); |
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c812c5c060de..54a57326d85b 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c | |||
@@ -887,12 +887,7 @@ xfs_trans_commit( | |||
887 | xfs_trans_apply_sb_deltas(tp); | 887 | xfs_trans_apply_sb_deltas(tp); |
888 | xfs_trans_apply_dquot_deltas(tp); | 888 | xfs_trans_apply_dquot_deltas(tp); |
889 | 889 | ||
890 | error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); | 890 | xfs_log_commit_cil(mp, tp, &commit_lsn, flags); |
891 | if (error == ENOMEM) { | ||
892 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); | ||
893 | error = XFS_ERROR(EIO); | ||
894 | goto out_unreserve; | ||
895 | } | ||
896 | 891 | ||
897 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); | 892 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
898 | xfs_trans_free(tp); | 893 | xfs_trans_free(tp); |
@@ -902,10 +897,7 @@ xfs_trans_commit( | |||
902 | * log out now and wait for it. | 897 | * log out now and wait for it. |
903 | */ | 898 | */ |
904 | if (sync) { | 899 | if (sync) { |
905 | if (!error) { | 900 | error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL); |
906 | error = _xfs_log_force_lsn(mp, commit_lsn, | ||
907 | XFS_LOG_SYNC, NULL); | ||
908 | } | ||
909 | XFS_STATS_INC(xs_trans_sync); | 901 | XFS_STATS_INC(xs_trans_sync); |
910 | } else { | 902 | } else { |
911 | XFS_STATS_INC(xs_trans_async); | 903 | XFS_STATS_INC(xs_trans_async); |
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 647b6f1d8923..b8eef0549f3f 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c | |||
@@ -275,6 +275,10 @@ xfs_trans_read_buf_map( | |||
275 | XFS_BUF_UNDONE(bp); | 275 | XFS_BUF_UNDONE(bp); |
276 | xfs_buf_stale(bp); | 276 | xfs_buf_stale(bp); |
277 | xfs_buf_relse(bp); | 277 | xfs_buf_relse(bp); |
278 | |||
279 | /* bad CRC means corrupted metadata */ | ||
280 | if (error == EFSBADCRC) | ||
281 | error = EFSCORRUPTED; | ||
278 | return error; | 282 | return error; |
279 | } | 283 | } |
280 | #ifdef DEBUG | 284 | #ifdef DEBUG |
@@ -338,6 +342,9 @@ xfs_trans_read_buf_map( | |||
338 | if (tp->t_flags & XFS_TRANS_DIRTY) | 342 | if (tp->t_flags & XFS_TRANS_DIRTY) |
339 | xfs_force_shutdown(tp->t_mountp, | 343 | xfs_force_shutdown(tp->t_mountp, |
340 | SHUTDOWN_META_IO_ERROR); | 344 | SHUTDOWN_META_IO_ERROR); |
345 | /* bad CRC means corrupted metadata */ | ||
346 | if (error == EFSBADCRC) | ||
347 | error = EFSCORRUPTED; | ||
341 | return error; | 348 | return error; |
342 | } | 349 | } |
343 | } | 350 | } |
@@ -375,6 +382,10 @@ xfs_trans_read_buf_map( | |||
375 | if (tp->t_flags & XFS_TRANS_DIRTY) | 382 | if (tp->t_flags & XFS_TRANS_DIRTY) |
376 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); | 383 | xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR); |
377 | xfs_buf_relse(bp); | 384 | xfs_buf_relse(bp); |
385 | |||
386 | /* bad CRC means corrupted metadata */ | ||
387 | if (error == EFSBADCRC) | ||
388 | error = EFSCORRUPTED; | ||
378 | return error; | 389 | return error; |
379 | } | 390 | } |
380 | #ifdef DEBUG | 391 | #ifdef DEBUG |
diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index 2ffd3e331b49..ae368165244d 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c | |||
@@ -81,20 +81,28 @@ xfs_calc_buf_res( | |||
81 | * on disk. Hence we need an inode reservation function that calculates all this | 81 | * on disk. Hence we need an inode reservation function that calculates all this |
82 | * correctly. So, we log: | 82 | * correctly. So, we log: |
83 | * | 83 | * |
84 | * - log op headers for object | 84 | * - 4 log op headers for object |
85 | * - for the ilf, the inode core and 2 forks | ||
85 | * - inode log format object | 86 | * - inode log format object |
86 | * - the entire inode contents (core + 2 forks) | 87 | * - the inode core |
87 | * - two bmap btree block headers | 88 | * - two inode forks containing bmap btree root blocks. |
89 | * - the btree data contained by both forks will fit into the inode size, | ||
90 | * hence when combined with the inode core above, we have a total of the | ||
91 | * actual inode size. | ||
92 | * - the BMBT headers need to be accounted separately, as they are | ||
93 | * additional to the records and pointers that fit inside the inode | ||
94 | * forks. | ||
88 | */ | 95 | */ |
89 | STATIC uint | 96 | STATIC uint |
90 | xfs_calc_inode_res( | 97 | xfs_calc_inode_res( |
91 | struct xfs_mount *mp, | 98 | struct xfs_mount *mp, |
92 | uint ninodes) | 99 | uint ninodes) |
93 | { | 100 | { |
94 | return ninodes * (sizeof(struct xlog_op_header) + | 101 | return ninodes * |
95 | sizeof(struct xfs_inode_log_format) + | 102 | (4 * sizeof(struct xlog_op_header) + |
96 | mp->m_sb.sb_inodesize + | 103 | sizeof(struct xfs_inode_log_format) + |
97 | 2 * XFS_BMBT_BLOCK_LEN(mp)); | 104 | mp->m_sb.sb_inodesize + |
105 | 2 * XFS_BMBT_BLOCK_LEN(mp)); | ||
98 | } | 106 | } |
99 | 107 | ||
100 | /* | 108 | /* |
@@ -204,6 +212,19 @@ xfs_calc_rename_reservation( | |||
204 | } | 212 | } |
205 | 213 | ||
206 | /* | 214 | /* |
215 | * For removing an inode from unlinked list at first, we can modify: | ||
216 | * the agi hash list and counters: sector size | ||
217 | * the on disk inode before ours in the agi hash list: inode cluster size | ||
218 | */ | ||
219 | STATIC uint | ||
220 | xfs_calc_iunlink_remove_reservation( | ||
221 | struct xfs_mount *mp) | ||
222 | { | ||
223 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
224 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); | ||
225 | } | ||
226 | |||
227 | /* | ||
207 | * For creating a link to an inode: | 228 | * For creating a link to an inode: |
208 | * the parent directory inode: inode size | 229 | * the parent directory inode: inode size |
209 | * the linked inode: inode size | 230 | * the linked inode: inode size |
@@ -220,6 +241,7 @@ xfs_calc_link_reservation( | |||
220 | struct xfs_mount *mp) | 241 | struct xfs_mount *mp) |
221 | { | 242 | { |
222 | return XFS_DQUOT_LOGRES(mp) + | 243 | return XFS_DQUOT_LOGRES(mp) + |
244 | xfs_calc_iunlink_remove_reservation(mp) + | ||
223 | MAX((xfs_calc_inode_res(mp, 2) + | 245 | MAX((xfs_calc_inode_res(mp, 2) + |
224 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 246 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
225 | XFS_FSB_TO_B(mp, 1))), | 247 | XFS_FSB_TO_B(mp, 1))), |
@@ -229,6 +251,18 @@ xfs_calc_link_reservation( | |||
229 | } | 251 | } |
230 | 252 | ||
231 | /* | 253 | /* |
254 | * For adding an inode to unlinked list we can modify: | ||
255 | * the agi hash list: sector size | ||
256 | * the unlinked inode: inode size | ||
257 | */ | ||
258 | STATIC uint | ||
259 | xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) | ||
260 | { | ||
261 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + | ||
262 | xfs_calc_inode_res(mp, 1); | ||
263 | } | ||
264 | |||
265 | /* | ||
232 | * For removing a directory entry we can modify: | 266 | * For removing a directory entry we can modify: |
233 | * the parent directory inode: inode size | 267 | * the parent directory inode: inode size |
234 | * the removed inode: inode size | 268 | * the removed inode: inode size |
@@ -245,10 +279,11 @@ xfs_calc_remove_reservation( | |||
245 | struct xfs_mount *mp) | 279 | struct xfs_mount *mp) |
246 | { | 280 | { |
247 | return XFS_DQUOT_LOGRES(mp) + | 281 | return XFS_DQUOT_LOGRES(mp) + |
248 | MAX((xfs_calc_inode_res(mp, 2) + | 282 | xfs_calc_iunlink_add_reservation(mp) + |
283 | MAX((xfs_calc_inode_res(mp, 1) + | ||
249 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), | 284 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
250 | XFS_FSB_TO_B(mp, 1))), | 285 | XFS_FSB_TO_B(mp, 1))), |
251 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + | 286 | (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) + |
252 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), | 287 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
253 | XFS_FSB_TO_B(mp, 1)))); | 288 | XFS_FSB_TO_B(mp, 1)))); |
254 | } | 289 | } |
@@ -343,6 +378,20 @@ xfs_calc_create_reservation( | |||
343 | 378 | ||
344 | } | 379 | } |
345 | 380 | ||
381 | STATIC uint | ||
382 | xfs_calc_create_tmpfile_reservation( | ||
383 | struct xfs_mount *mp) | ||
384 | { | ||
385 | uint res = XFS_DQUOT_LOGRES(mp); | ||
386 | |||
387 | if (xfs_sb_version_hascrc(&mp->m_sb)) | ||
388 | res += xfs_calc_icreate_resv_alloc(mp); | ||
389 | else | ||
390 | res += xfs_calc_create_resv_alloc(mp); | ||
391 | |||
392 | return res + xfs_calc_iunlink_add_reservation(mp); | ||
393 | } | ||
394 | |||
346 | /* | 395 | /* |
347 | * Making a new directory is the same as creating a new file. | 396 | * Making a new directory is the same as creating a new file. |
348 | */ | 397 | */ |
@@ -383,9 +432,9 @@ xfs_calc_ifree_reservation( | |||
383 | { | 432 | { |
384 | return XFS_DQUOT_LOGRES(mp) + | 433 | return XFS_DQUOT_LOGRES(mp) + |
385 | xfs_calc_inode_res(mp, 1) + | 434 | xfs_calc_inode_res(mp, 1) + |
386 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + | 435 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
387 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + | 436 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + |
388 | max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size) + | 437 | xfs_calc_iunlink_remove_reservation(mp) + |
389 | xfs_calc_buf_res(1, 0) + | 438 | xfs_calc_buf_res(1, 0) + |
390 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + | 439 | xfs_calc_buf_res(2 + mp->m_ialloc_blks + |
391 | mp->m_in_maxlevels, 0) + | 440 | mp->m_in_maxlevels, 0) + |
@@ -644,15 +693,14 @@ xfs_calc_qm_setqlim_reservation( | |||
644 | 693 | ||
645 | /* | 694 | /* |
646 | * Allocating quota on disk if needed. | 695 | * Allocating quota on disk if needed. |
647 | * the write transaction log space: M_RES(mp)->tr_write.tr_logres | 696 | * the write transaction log space for quota file extent allocation |
648 | * the unit of quota allocation: one system block size | 697 | * the unit of quota allocation: one system block size |
649 | */ | 698 | */ |
650 | STATIC uint | 699 | STATIC uint |
651 | xfs_calc_qm_dqalloc_reservation( | 700 | xfs_calc_qm_dqalloc_reservation( |
652 | struct xfs_mount *mp) | 701 | struct xfs_mount *mp) |
653 | { | 702 | { |
654 | ASSERT(M_RES(mp)->tr_write.tr_logres); | 703 | return xfs_calc_write_reservation(mp) + |
655 | return M_RES(mp)->tr_write.tr_logres + | ||
656 | xfs_calc_buf_res(1, | 704 | xfs_calc_buf_res(1, |
657 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); | 705 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); |
658 | } | 706 | } |
@@ -729,6 +777,11 @@ xfs_trans_resv_calc( | |||
729 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; | 777 | resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT; |
730 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | 778 | resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
731 | 779 | ||
780 | resp->tr_create_tmpfile.tr_logres = | ||
781 | xfs_calc_create_tmpfile_reservation(mp); | ||
782 | resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; | ||
783 | resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | ||
784 | |||
732 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); | 785 | resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp); |
733 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; | 786 | resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT; |
734 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; | 787 | resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; |
@@ -784,7 +837,6 @@ xfs_trans_resv_calc( | |||
784 | /* The following transaction are logged in logical format */ | 837 | /* The following transaction are logged in logical format */ |
785 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); | 838 | resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp); |
786 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); | 839 | resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); |
787 | resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp); | ||
788 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); | 840 | resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp); |
789 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); | 841 | resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp); |
790 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); | 842 | resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); |
diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h index de7de9aaad8a..1097d14cd583 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/xfs_trans_resv.h | |||
@@ -38,11 +38,11 @@ struct xfs_trans_resv { | |||
38 | struct xfs_trans_res tr_remove; /* unlink trans */ | 38 | struct xfs_trans_res tr_remove; /* unlink trans */ |
39 | struct xfs_trans_res tr_symlink; /* symlink trans */ | 39 | struct xfs_trans_res tr_symlink; /* symlink trans */ |
40 | struct xfs_trans_res tr_create; /* create trans */ | 40 | struct xfs_trans_res tr_create; /* create trans */ |
41 | struct xfs_trans_res tr_create_tmpfile; /* create O_TMPFILE trans */ | ||
41 | struct xfs_trans_res tr_mkdir; /* mkdir trans */ | 42 | struct xfs_trans_res tr_mkdir; /* mkdir trans */ |
42 | struct xfs_trans_res tr_ifree; /* inode free trans */ | 43 | struct xfs_trans_res tr_ifree; /* inode free trans */ |
43 | struct xfs_trans_res tr_ichange; /* inode update trans */ | 44 | struct xfs_trans_res tr_ichange; /* inode update trans */ |
44 | struct xfs_trans_res tr_growdata; /* fs data section grow trans */ | 45 | struct xfs_trans_res tr_growdata; /* fs data section grow trans */ |
45 | struct xfs_trans_res tr_swrite; /* sync write inode trans */ | ||
46 | struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ | 46 | struct xfs_trans_res tr_addafork; /* add inode attr fork trans */ |
47 | struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ | 47 | struct xfs_trans_res tr_writeid; /* write setuid/setgid file */ |
48 | struct xfs_trans_res tr_attrinval; /* attr fork buffer | 48 | struct xfs_trans_res tr_attrinval; /* attr fork buffer |
@@ -100,6 +100,7 @@ struct xfs_trans_resv { | |||
100 | #define XFS_ITRUNCATE_LOG_COUNT 2 | 100 | #define XFS_ITRUNCATE_LOG_COUNT 2 |
101 | #define XFS_INACTIVE_LOG_COUNT 2 | 101 | #define XFS_INACTIVE_LOG_COUNT 2 |
102 | #define XFS_CREATE_LOG_COUNT 2 | 102 | #define XFS_CREATE_LOG_COUNT 2 |
103 | #define XFS_CREATE_TMPFILE_LOG_COUNT 2 | ||
103 | #define XFS_MKDIR_LOG_COUNT 3 | 104 | #define XFS_MKDIR_LOG_COUNT 3 |
104 | #define XFS_SYMLINK_LOG_COUNT 3 | 105 | #define XFS_SYMLINK_LOG_COUNT 3 |
105 | #define XFS_REMOVE_LOG_COUNT 2 | 106 | #define XFS_REMOVE_LOG_COUNT 2 |