diff options
Diffstat (limited to 'fs')
194 files changed, 6118 insertions, 6000 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 055562c580b4..9ff073f4090a 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
| @@ -148,13 +148,14 @@ static int v9fs_release_page(struct page *page, gfp_t gfp) | |||
| 148 | * @offset: offset in the page | 148 | * @offset: offset in the page |
| 149 | */ | 149 | */ |
| 150 | 150 | ||
| 151 | static void v9fs_invalidate_page(struct page *page, unsigned long offset) | 151 | static void v9fs_invalidate_page(struct page *page, unsigned int offset, |
| 152 | unsigned int length) | ||
| 152 | { | 153 | { |
| 153 | /* | 154 | /* |
| 154 | * If called with zero offset, we should release | 155 | * If called with zero offset, we should release |
| 155 | * the private state assocated with the page | 156 | * the private state assocated with the page |
| 156 | */ | 157 | */ |
| 157 | if (offset == 0) | 158 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
| 158 | v9fs_fscache_invalidate_page(page); | 159 | v9fs_fscache_invalidate_page(page); |
| 159 | } | 160 | } |
| 160 | 161 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index be1e34adc3c6..4d0c2e0be7e5 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
| @@ -101,16 +101,15 @@ static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) | |||
| 101 | } | 101 | } |
| 102 | 102 | ||
| 103 | /** | 103 | /** |
| 104 | * v9fs_dir_readdir - read a directory | 104 | * v9fs_dir_readdir - iterate through a directory |
| 105 | * @filp: opened file structure | 105 | * @file: opened file structure |
| 106 | * @dirent: directory structure ??? | 106 | * @ctx: actor we feed the entries to |
| 107 | * @filldir: function to populate directory structure ??? | ||
| 108 | * | 107 | * |
| 109 | */ | 108 | */ |
| 110 | 109 | ||
| 111 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | 110 | static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) |
| 112 | { | 111 | { |
| 113 | int over; | 112 | bool over; |
| 114 | struct p9_wstat st; | 113 | struct p9_wstat st; |
| 115 | int err = 0; | 114 | int err = 0; |
| 116 | struct p9_fid *fid; | 115 | struct p9_fid *fid; |
| @@ -118,19 +117,19 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 118 | int reclen = 0; | 117 | int reclen = 0; |
| 119 | struct p9_rdir *rdir; | 118 | struct p9_rdir *rdir; |
| 120 | 119 | ||
| 121 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 120 | p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); |
| 122 | fid = filp->private_data; | 121 | fid = file->private_data; |
| 123 | 122 | ||
| 124 | buflen = fid->clnt->msize - P9_IOHDRSZ; | 123 | buflen = fid->clnt->msize - P9_IOHDRSZ; |
| 125 | 124 | ||
| 126 | rdir = v9fs_alloc_rdir_buf(filp, buflen); | 125 | rdir = v9fs_alloc_rdir_buf(file, buflen); |
| 127 | if (!rdir) | 126 | if (!rdir) |
| 128 | return -ENOMEM; | 127 | return -ENOMEM; |
| 129 | 128 | ||
| 130 | while (1) { | 129 | while (1) { |
| 131 | if (rdir->tail == rdir->head) { | 130 | if (rdir->tail == rdir->head) { |
| 132 | err = v9fs_file_readn(filp, rdir->buf, NULL, | 131 | err = v9fs_file_readn(file, rdir->buf, NULL, |
| 133 | buflen, filp->f_pos); | 132 | buflen, ctx->pos); |
| 134 | if (err <= 0) | 133 | if (err <= 0) |
| 135 | return err; | 134 | return err; |
| 136 | 135 | ||
| @@ -148,51 +147,45 @@ static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 148 | } | 147 | } |
| 149 | reclen = st.size+2; | 148 | reclen = st.size+2; |
| 150 | 149 | ||
| 151 | over = filldir(dirent, st.name, strlen(st.name), | 150 | over = !dir_emit(ctx, st.name, strlen(st.name), |
| 152 | filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st)); | 151 | v9fs_qid2ino(&st.qid), dt_type(&st)); |
| 153 | |||
| 154 | p9stat_free(&st); | 152 | p9stat_free(&st); |
| 155 | |||
| 156 | if (over) | 153 | if (over) |
| 157 | return 0; | 154 | return 0; |
| 158 | 155 | ||
| 159 | rdir->head += reclen; | 156 | rdir->head += reclen; |
| 160 | filp->f_pos += reclen; | 157 | ctx->pos += reclen; |
| 161 | } | 158 | } |
| 162 | } | 159 | } |
| 163 | } | 160 | } |
| 164 | 161 | ||
| 165 | /** | 162 | /** |
| 166 | * v9fs_dir_readdir_dotl - read a directory | 163 | * v9fs_dir_readdir_dotl - iterate through a directory |
| 167 | * @filp: opened file structure | 164 | * @file: opened file structure |
| 168 | * @dirent: buffer to fill dirent structures | 165 | * @ctx: actor we feed the entries to |
| 169 | * @filldir: function to populate dirent structures | ||
| 170 | * | 166 | * |
| 171 | */ | 167 | */ |
| 172 | static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | 168 | static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) |
| 173 | filldir_t filldir) | ||
| 174 | { | 169 | { |
| 175 | int over; | ||
| 176 | int err = 0; | 170 | int err = 0; |
| 177 | struct p9_fid *fid; | 171 | struct p9_fid *fid; |
| 178 | int buflen; | 172 | int buflen; |
| 179 | struct p9_rdir *rdir; | 173 | struct p9_rdir *rdir; |
| 180 | struct p9_dirent curdirent; | 174 | struct p9_dirent curdirent; |
| 181 | u64 oldoffset = 0; | ||
| 182 | 175 | ||
| 183 | p9_debug(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 176 | p9_debug(P9_DEBUG_VFS, "name %s\n", file->f_path.dentry->d_name.name); |
| 184 | fid = filp->private_data; | 177 | fid = file->private_data; |
| 185 | 178 | ||
| 186 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; | 179 | buflen = fid->clnt->msize - P9_READDIRHDRSZ; |
| 187 | 180 | ||
| 188 | rdir = v9fs_alloc_rdir_buf(filp, buflen); | 181 | rdir = v9fs_alloc_rdir_buf(file, buflen); |
| 189 | if (!rdir) | 182 | if (!rdir) |
| 190 | return -ENOMEM; | 183 | return -ENOMEM; |
| 191 | 184 | ||
| 192 | while (1) { | 185 | while (1) { |
| 193 | if (rdir->tail == rdir->head) { | 186 | if (rdir->tail == rdir->head) { |
| 194 | err = p9_client_readdir(fid, rdir->buf, buflen, | 187 | err = p9_client_readdir(fid, rdir->buf, buflen, |
| 195 | filp->f_pos); | 188 | ctx->pos); |
| 196 | if (err <= 0) | 189 | if (err <= 0) |
| 197 | return err; | 190 | return err; |
| 198 | 191 | ||
| @@ -210,22 +203,13 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent, | |||
| 210 | return -EIO; | 203 | return -EIO; |
| 211 | } | 204 | } |
| 212 | 205 | ||
| 213 | /* d_off in dirent structure tracks the offset into | 206 | if (!dir_emit(ctx, curdirent.d_name, |
| 214 | * the next dirent in the dir. However, filldir() | 207 | strlen(curdirent.d_name), |
| 215 | * expects offset into the current dirent. Hence | 208 | v9fs_qid2ino(&curdirent.qid), |
| 216 | * while calling filldir send the offset from the | 209 | curdirent.d_type)) |
| 217 | * previous dirent structure. | ||
| 218 | */ | ||
| 219 | over = filldir(dirent, curdirent.d_name, | ||
| 220 | strlen(curdirent.d_name), | ||
| 221 | oldoffset, v9fs_qid2ino(&curdirent.qid), | ||
| 222 | curdirent.d_type); | ||
| 223 | oldoffset = curdirent.d_off; | ||
| 224 | |||
| 225 | if (over) | ||
| 226 | return 0; | 210 | return 0; |
| 227 | 211 | ||
| 228 | filp->f_pos = curdirent.d_off; | 212 | ctx->pos = curdirent.d_off; |
| 229 | rdir->head += err; | 213 | rdir->head += err; |
| 230 | } | 214 | } |
| 231 | } | 215 | } |
| @@ -254,7 +238,7 @@ int v9fs_dir_release(struct inode *inode, struct file *filp) | |||
| 254 | const struct file_operations v9fs_dir_operations = { | 238 | const struct file_operations v9fs_dir_operations = { |
| 255 | .read = generic_read_dir, | 239 | .read = generic_read_dir, |
| 256 | .llseek = generic_file_llseek, | 240 | .llseek = generic_file_llseek, |
| 257 | .readdir = v9fs_dir_readdir, | 241 | .iterate = v9fs_dir_readdir, |
| 258 | .open = v9fs_file_open, | 242 | .open = v9fs_file_open, |
| 259 | .release = v9fs_dir_release, | 243 | .release = v9fs_dir_release, |
| 260 | }; | 244 | }; |
| @@ -262,7 +246,7 @@ const struct file_operations v9fs_dir_operations = { | |||
| 262 | const struct file_operations v9fs_dir_operations_dotl = { | 246 | const struct file_operations v9fs_dir_operations_dotl = { |
| 263 | .read = generic_read_dir, | 247 | .read = generic_read_dir, |
| 264 | .llseek = generic_file_llseek, | 248 | .llseek = generic_file_llseek, |
| 265 | .readdir = v9fs_dir_readdir_dotl, | 249 | .iterate = v9fs_dir_readdir_dotl, |
| 266 | .open = v9fs_file_open, | 250 | .open = v9fs_file_open, |
| 267 | .release = v9fs_dir_release, | 251 | .release = v9fs_dir_release, |
| 268 | .fsync = v9fs_file_fsync_dotl, | 252 | .fsync = v9fs_file_fsync_dotl, |
diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 9cf874ce8336..ade28bb058e3 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c | |||
| @@ -17,47 +17,43 @@ | |||
| 17 | static DEFINE_RWLOCK(adfs_dir_lock); | 17 | static DEFINE_RWLOCK(adfs_dir_lock); |
| 18 | 18 | ||
| 19 | static int | 19 | static int |
| 20 | adfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 20 | adfs_readdir(struct file *file, struct dir_context *ctx) |
| 21 | { | 21 | { |
| 22 | struct inode *inode = file_inode(filp); | 22 | struct inode *inode = file_inode(file); |
| 23 | struct super_block *sb = inode->i_sb; | 23 | struct super_block *sb = inode->i_sb; |
| 24 | struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; | 24 | struct adfs_dir_ops *ops = ADFS_SB(sb)->s_dir; |
| 25 | struct object_info obj; | 25 | struct object_info obj; |
| 26 | struct adfs_dir dir; | 26 | struct adfs_dir dir; |
| 27 | int ret = 0; | 27 | int ret = 0; |
| 28 | 28 | ||
| 29 | if (filp->f_pos >> 32) | 29 | if (ctx->pos >> 32) |
| 30 | goto out; | 30 | return 0; |
| 31 | 31 | ||
| 32 | ret = ops->read(sb, inode->i_ino, inode->i_size, &dir); | 32 | ret = ops->read(sb, inode->i_ino, inode->i_size, &dir); |
| 33 | if (ret) | 33 | if (ret) |
| 34 | goto out; | 34 | return ret; |
| 35 | 35 | ||
| 36 | switch ((unsigned long)filp->f_pos) { | 36 | if (ctx->pos == 0) { |
| 37 | case 0: | 37 | if (!dir_emit_dot(file, ctx)) |
| 38 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
| 39 | goto free_out; | 38 | goto free_out; |
| 40 | filp->f_pos += 1; | 39 | ctx->pos = 1; |
| 41 | 40 | } | |
| 42 | case 1: | 41 | if (ctx->pos == 1) { |
| 43 | if (filldir(dirent, "..", 2, 1, dir.parent_id, DT_DIR) < 0) | 42 | if (!dir_emit(ctx, "..", 2, dir.parent_id, DT_DIR)) |
| 44 | goto free_out; | 43 | goto free_out; |
| 45 | filp->f_pos += 1; | 44 | ctx->pos = 2; |
| 46 | |||
| 47 | default: | ||
| 48 | break; | ||
| 49 | } | 45 | } |
| 50 | 46 | ||
| 51 | read_lock(&adfs_dir_lock); | 47 | read_lock(&adfs_dir_lock); |
| 52 | 48 | ||
| 53 | ret = ops->setpos(&dir, filp->f_pos - 2); | 49 | ret = ops->setpos(&dir, ctx->pos - 2); |
| 54 | if (ret) | 50 | if (ret) |
| 55 | goto unlock_out; | 51 | goto unlock_out; |
| 56 | while (ops->getnext(&dir, &obj) == 0) { | 52 | while (ops->getnext(&dir, &obj) == 0) { |
| 57 | if (filldir(dirent, obj.name, obj.name_len, | 53 | if (!dir_emit(ctx, obj.name, obj.name_len, |
| 58 | filp->f_pos, obj.file_id, DT_UNKNOWN) < 0) | 54 | obj.file_id, DT_UNKNOWN)) |
| 59 | goto unlock_out; | 55 | break; |
| 60 | filp->f_pos += 1; | 56 | ctx->pos++; |
| 61 | } | 57 | } |
| 62 | 58 | ||
| 63 | unlock_out: | 59 | unlock_out: |
| @@ -65,8 +61,6 @@ unlock_out: | |||
| 65 | 61 | ||
| 66 | free_out: | 62 | free_out: |
| 67 | ops->free(&dir); | 63 | ops->free(&dir); |
| 68 | |||
| 69 | out: | ||
| 70 | return ret; | 64 | return ret; |
| 71 | } | 65 | } |
| 72 | 66 | ||
| @@ -192,7 +186,7 @@ out: | |||
| 192 | const struct file_operations adfs_dir_operations = { | 186 | const struct file_operations adfs_dir_operations = { |
| 193 | .read = generic_read_dir, | 187 | .read = generic_read_dir, |
| 194 | .llseek = generic_file_llseek, | 188 | .llseek = generic_file_llseek, |
| 195 | .readdir = adfs_readdir, | 189 | .iterate = adfs_readdir, |
| 196 | .fsync = generic_file_fsync, | 190 | .fsync = generic_file_fsync, |
| 197 | }; | 191 | }; |
| 198 | 192 | ||
diff --git a/fs/affs/dir.c b/fs/affs/dir.c index fd11a6d608ee..f1eba8c3644e 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c | |||
| @@ -15,12 +15,12 @@ | |||
| 15 | 15 | ||
| 16 | #include "affs.h" | 16 | #include "affs.h" |
| 17 | 17 | ||
| 18 | static int affs_readdir(struct file *, void *, filldir_t); | 18 | static int affs_readdir(struct file *, struct dir_context *); |
| 19 | 19 | ||
| 20 | const struct file_operations affs_dir_operations = { | 20 | const struct file_operations affs_dir_operations = { |
| 21 | .read = generic_read_dir, | 21 | .read = generic_read_dir, |
| 22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
| 23 | .readdir = affs_readdir, | 23 | .iterate = affs_readdir, |
| 24 | .fsync = affs_file_fsync, | 24 | .fsync = affs_file_fsync, |
| 25 | }; | 25 | }; |
| 26 | 26 | ||
| @@ -40,52 +40,35 @@ const struct inode_operations affs_dir_inode_operations = { | |||
| 40 | }; | 40 | }; |
| 41 | 41 | ||
| 42 | static int | 42 | static int |
| 43 | affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 43 | affs_readdir(struct file *file, struct dir_context *ctx) |
| 44 | { | 44 | { |
| 45 | struct inode *inode = file_inode(filp); | 45 | struct inode *inode = file_inode(file); |
| 46 | struct super_block *sb = inode->i_sb; | 46 | struct super_block *sb = inode->i_sb; |
| 47 | struct buffer_head *dir_bh; | 47 | struct buffer_head *dir_bh = NULL; |
| 48 | struct buffer_head *fh_bh; | 48 | struct buffer_head *fh_bh = NULL; |
| 49 | unsigned char *name; | 49 | unsigned char *name; |
| 50 | int namelen; | 50 | int namelen; |
| 51 | u32 i; | 51 | u32 i; |
| 52 | int hash_pos; | 52 | int hash_pos; |
| 53 | int chain_pos; | 53 | int chain_pos; |
| 54 | u32 f_pos; | ||
| 55 | u32 ino; | 54 | u32 ino; |
| 56 | int stored; | ||
| 57 | int res; | ||
| 58 | 55 | ||
| 59 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)filp->f_pos); | 56 | pr_debug("AFFS: readdir(ino=%lu,f_pos=%lx)\n",inode->i_ino,(unsigned long)ctx->pos); |
| 60 | 57 | ||
| 61 | stored = 0; | 58 | if (ctx->pos < 2) { |
| 62 | res = -EIO; | 59 | file->private_data = (void *)0; |
| 63 | dir_bh = NULL; | 60 | if (!dir_emit_dots(file, ctx)) |
| 64 | fh_bh = NULL; | ||
| 65 | f_pos = filp->f_pos; | ||
| 66 | |||
| 67 | if (f_pos == 0) { | ||
| 68 | filp->private_data = (void *)0; | ||
| 69 | if (filldir(dirent, ".", 1, f_pos, inode->i_ino, DT_DIR) < 0) | ||
| 70 | return 0; | 61 | return 0; |
| 71 | filp->f_pos = f_pos = 1; | ||
| 72 | stored++; | ||
| 73 | } | ||
| 74 | if (f_pos == 1) { | ||
| 75 | if (filldir(dirent, "..", 2, f_pos, parent_ino(filp->f_path.dentry), DT_DIR) < 0) | ||
| 76 | return stored; | ||
| 77 | filp->f_pos = f_pos = 2; | ||
| 78 | stored++; | ||
| 79 | } | 62 | } |
| 80 | 63 | ||
| 81 | affs_lock_dir(inode); | 64 | affs_lock_dir(inode); |
| 82 | chain_pos = (f_pos - 2) & 0xffff; | 65 | chain_pos = (ctx->pos - 2) & 0xffff; |
| 83 | hash_pos = (f_pos - 2) >> 16; | 66 | hash_pos = (ctx->pos - 2) >> 16; |
| 84 | if (chain_pos == 0xffff) { | 67 | if (chain_pos == 0xffff) { |
| 85 | affs_warning(sb, "readdir", "More than 65535 entries in chain"); | 68 | affs_warning(sb, "readdir", "More than 65535 entries in chain"); |
| 86 | chain_pos = 0; | 69 | chain_pos = 0; |
| 87 | hash_pos++; | 70 | hash_pos++; |
| 88 | filp->f_pos = ((hash_pos << 16) | chain_pos) + 2; | 71 | ctx->pos = ((hash_pos << 16) | chain_pos) + 2; |
| 89 | } | 72 | } |
| 90 | dir_bh = affs_bread(sb, inode->i_ino); | 73 | dir_bh = affs_bread(sb, inode->i_ino); |
| 91 | if (!dir_bh) | 74 | if (!dir_bh) |
| @@ -94,8 +77,8 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 94 | /* If the directory hasn't changed since the last call to readdir(), | 77 | /* If the directory hasn't changed since the last call to readdir(), |
| 95 | * we can jump directly to where we left off. | 78 | * we can jump directly to where we left off. |
| 96 | */ | 79 | */ |
| 97 | ino = (u32)(long)filp->private_data; | 80 | ino = (u32)(long)file->private_data; |
| 98 | if (ino && filp->f_version == inode->i_version) { | 81 | if (ino && file->f_version == inode->i_version) { |
| 99 | pr_debug("AFFS: readdir() left off=%d\n", ino); | 82 | pr_debug("AFFS: readdir() left off=%d\n", ino); |
| 100 | goto inside; | 83 | goto inside; |
| 101 | } | 84 | } |
| @@ -105,7 +88,7 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 105 | fh_bh = affs_bread(sb, ino); | 88 | fh_bh = affs_bread(sb, ino); |
| 106 | if (!fh_bh) { | 89 | if (!fh_bh) { |
| 107 | affs_error(sb, "readdir","Cannot read block %d", i); | 90 | affs_error(sb, "readdir","Cannot read block %d", i); |
| 108 | goto readdir_out; | 91 | return -EIO; |
| 109 | } | 92 | } |
| 110 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 93 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
| 111 | affs_brelse(fh_bh); | 94 | affs_brelse(fh_bh); |
| @@ -119,38 +102,34 @@ affs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 119 | ino = be32_to_cpu(AFFS_HEAD(dir_bh)->table[hash_pos]); | 102 | ino = be32_to_cpu(AFFS_HEAD(dir_bh)->table[hash_pos]); |
| 120 | if (!ino) | 103 | if (!ino) |
| 121 | continue; | 104 | continue; |
| 122 | f_pos = (hash_pos << 16) + 2; | 105 | ctx->pos = (hash_pos << 16) + 2; |
| 123 | inside: | 106 | inside: |
| 124 | do { | 107 | do { |
| 125 | fh_bh = affs_bread(sb, ino); | 108 | fh_bh = affs_bread(sb, ino); |
| 126 | if (!fh_bh) { | 109 | if (!fh_bh) { |
| 127 | affs_error(sb, "readdir","Cannot read block %d", ino); | 110 | affs_error(sb, "readdir","Cannot read block %d", ino); |
| 128 | goto readdir_done; | 111 | break; |
| 129 | } | 112 | } |
| 130 | 113 | ||
| 131 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); | 114 | namelen = min(AFFS_TAIL(sb, fh_bh)->name[0], (u8)30); |
| 132 | name = AFFS_TAIL(sb, fh_bh)->name + 1; | 115 | name = AFFS_TAIL(sb, fh_bh)->name + 1; |
| 133 | pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", | 116 | pr_debug("AFFS: readdir(): filldir(\"%.*s\", ino=%u), hash=%d, f_pos=%x\n", |
| 134 | namelen, name, ino, hash_pos, f_pos); | 117 | namelen, name, ino, hash_pos, (u32)ctx->pos); |
| 135 | if (filldir(dirent, name, namelen, f_pos, ino, DT_UNKNOWN) < 0) | 118 | if (!dir_emit(ctx, name, namelen, ino, DT_UNKNOWN)) |
| 136 | goto readdir_done; | 119 | goto readdir_done; |
| 137 | stored++; | 120 | ctx->pos++; |
| 138 | f_pos++; | ||
| 139 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); | 121 | ino = be32_to_cpu(AFFS_TAIL(sb, fh_bh)->hash_chain); |
| 140 | affs_brelse(fh_bh); | 122 | affs_brelse(fh_bh); |
| 141 | fh_bh = NULL; | 123 | fh_bh = NULL; |
| 142 | } while (ino); | 124 | } while (ino); |
| 143 | } | 125 | } |
| 144 | readdir_done: | 126 | readdir_done: |
| 145 | filp->f_pos = f_pos; | 127 | file->f_version = inode->i_version; |
| 146 | filp->f_version = inode->i_version; | 128 | file->private_data = (void *)(long)ino; |
| 147 | filp->private_data = (void *)(long)ino; | ||
| 148 | res = stored; | ||
| 149 | 129 | ||
| 150 | readdir_out: | 130 | readdir_out: |
| 151 | affs_brelse(dir_bh); | 131 | affs_brelse(dir_bh); |
| 152 | affs_brelse(fh_bh); | 132 | affs_brelse(fh_bh); |
| 153 | affs_unlock_dir(inode); | 133 | affs_unlock_dir(inode); |
| 154 | pr_debug("AFFS: readdir()=%d\n", stored); | 134 | return 0; |
| 155 | return res; | ||
| 156 | } | 135 | } |
diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 7a465ed04444..34494fbead0a 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, | 22 | static struct dentry *afs_lookup(struct inode *dir, struct dentry *dentry, |
| 23 | unsigned int flags); | 23 | unsigned int flags); |
| 24 | static int afs_dir_open(struct inode *inode, struct file *file); | 24 | static int afs_dir_open(struct inode *inode, struct file *file); |
| 25 | static int afs_readdir(struct file *file, void *dirent, filldir_t filldir); | 25 | static int afs_readdir(struct file *file, struct dir_context *ctx); |
| 26 | static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); | 26 | static int afs_d_revalidate(struct dentry *dentry, unsigned int flags); |
| 27 | static int afs_d_delete(const struct dentry *dentry); | 27 | static int afs_d_delete(const struct dentry *dentry); |
| 28 | static void afs_d_release(struct dentry *dentry); | 28 | static void afs_d_release(struct dentry *dentry); |
| @@ -43,7 +43,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 43 | const struct file_operations afs_dir_file_operations = { | 43 | const struct file_operations afs_dir_file_operations = { |
| 44 | .open = afs_dir_open, | 44 | .open = afs_dir_open, |
| 45 | .release = afs_release, | 45 | .release = afs_release, |
| 46 | .readdir = afs_readdir, | 46 | .iterate = afs_readdir, |
| 47 | .lock = afs_lock, | 47 | .lock = afs_lock, |
| 48 | .llseek = generic_file_llseek, | 48 | .llseek = generic_file_llseek, |
| 49 | }; | 49 | }; |
| @@ -119,9 +119,9 @@ struct afs_dir_page { | |||
| 119 | }; | 119 | }; |
| 120 | 120 | ||
| 121 | struct afs_lookup_cookie { | 121 | struct afs_lookup_cookie { |
| 122 | struct dir_context ctx; | ||
| 122 | struct afs_fid fid; | 123 | struct afs_fid fid; |
| 123 | const char *name; | 124 | struct qstr name; |
| 124 | size_t nlen; | ||
| 125 | int found; | 125 | int found; |
| 126 | }; | 126 | }; |
| 127 | 127 | ||
| @@ -228,20 +228,18 @@ static int afs_dir_open(struct inode *inode, struct file *file) | |||
| 228 | /* | 228 | /* |
| 229 | * deal with one block in an AFS directory | 229 | * deal with one block in an AFS directory |
| 230 | */ | 230 | */ |
| 231 | static int afs_dir_iterate_block(unsigned *fpos, | 231 | static int afs_dir_iterate_block(struct dir_context *ctx, |
| 232 | union afs_dir_block *block, | 232 | union afs_dir_block *block, |
| 233 | unsigned blkoff, | 233 | unsigned blkoff) |
| 234 | void *cookie, | ||
| 235 | filldir_t filldir) | ||
| 236 | { | 234 | { |
| 237 | union afs_dirent *dire; | 235 | union afs_dirent *dire; |
| 238 | unsigned offset, next, curr; | 236 | unsigned offset, next, curr; |
| 239 | size_t nlen; | 237 | size_t nlen; |
| 240 | int tmp, ret; | 238 | int tmp; |
| 241 | 239 | ||
| 242 | _enter("%u,%x,%p,,",*fpos,blkoff,block); | 240 | _enter("%u,%x,%p,,",(unsigned)ctx->pos,blkoff,block); |
| 243 | 241 | ||
| 244 | curr = (*fpos - blkoff) / sizeof(union afs_dirent); | 242 | curr = (ctx->pos - blkoff) / sizeof(union afs_dirent); |
| 245 | 243 | ||
| 246 | /* walk through the block, an entry at a time */ | 244 | /* walk through the block, an entry at a time */ |
| 247 | for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; | 245 | for (offset = AFS_DIRENT_PER_BLOCK - block->pagehdr.nentries; |
| @@ -256,7 +254,7 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
| 256 | _debug("ENT[%Zu.%u]: unused", | 254 | _debug("ENT[%Zu.%u]: unused", |
| 257 | blkoff / sizeof(union afs_dir_block), offset); | 255 | blkoff / sizeof(union afs_dir_block), offset); |
| 258 | if (offset >= curr) | 256 | if (offset >= curr) |
| 259 | *fpos = blkoff + | 257 | ctx->pos = blkoff + |
| 260 | next * sizeof(union afs_dirent); | 258 | next * sizeof(union afs_dirent); |
| 261 | continue; | 259 | continue; |
| 262 | } | 260 | } |
| @@ -302,19 +300,15 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
| 302 | continue; | 300 | continue; |
| 303 | 301 | ||
| 304 | /* found the next entry */ | 302 | /* found the next entry */ |
| 305 | ret = filldir(cookie, | 303 | if (!dir_emit(ctx, dire->u.name, nlen, |
| 306 | dire->u.name, | ||
| 307 | nlen, | ||
| 308 | blkoff + offset * sizeof(union afs_dirent), | ||
| 309 | ntohl(dire->u.vnode), | 304 | ntohl(dire->u.vnode), |
| 310 | filldir == afs_lookup_filldir ? | 305 | ctx->actor == afs_lookup_filldir ? |
| 311 | ntohl(dire->u.unique) : DT_UNKNOWN); | 306 | ntohl(dire->u.unique) : DT_UNKNOWN)) { |
| 312 | if (ret < 0) { | ||
| 313 | _leave(" = 0 [full]"); | 307 | _leave(" = 0 [full]"); |
| 314 | return 0; | 308 | return 0; |
| 315 | } | 309 | } |
| 316 | 310 | ||
| 317 | *fpos = blkoff + next * sizeof(union afs_dirent); | 311 | ctx->pos = blkoff + next * sizeof(union afs_dirent); |
| 318 | } | 312 | } |
| 319 | 313 | ||
| 320 | _leave(" = 1 [more]"); | 314 | _leave(" = 1 [more]"); |
| @@ -324,8 +318,8 @@ static int afs_dir_iterate_block(unsigned *fpos, | |||
| 324 | /* | 318 | /* |
| 325 | * iterate through the data blob that lists the contents of an AFS directory | 319 | * iterate through the data blob that lists the contents of an AFS directory |
| 326 | */ | 320 | */ |
| 327 | static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | 321 | static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, |
| 328 | filldir_t filldir, struct key *key) | 322 | struct key *key) |
| 329 | { | 323 | { |
| 330 | union afs_dir_block *dblock; | 324 | union afs_dir_block *dblock; |
| 331 | struct afs_dir_page *dbuf; | 325 | struct afs_dir_page *dbuf; |
| @@ -333,7 +327,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
| 333 | unsigned blkoff, limit; | 327 | unsigned blkoff, limit; |
| 334 | int ret; | 328 | int ret; |
| 335 | 329 | ||
| 336 | _enter("{%lu},%u,,", dir->i_ino, *fpos); | 330 | _enter("{%lu},%u,,", dir->i_ino, (unsigned)ctx->pos); |
| 337 | 331 | ||
| 338 | if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { | 332 | if (test_bit(AFS_VNODE_DELETED, &AFS_FS_I(dir)->flags)) { |
| 339 | _leave(" = -ESTALE"); | 333 | _leave(" = -ESTALE"); |
| @@ -341,13 +335,13 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
| 341 | } | 335 | } |
| 342 | 336 | ||
| 343 | /* round the file position up to the next entry boundary */ | 337 | /* round the file position up to the next entry boundary */ |
| 344 | *fpos += sizeof(union afs_dirent) - 1; | 338 | ctx->pos += sizeof(union afs_dirent) - 1; |
| 345 | *fpos &= ~(sizeof(union afs_dirent) - 1); | 339 | ctx->pos &= ~(sizeof(union afs_dirent) - 1); |
| 346 | 340 | ||
| 347 | /* walk through the blocks in sequence */ | 341 | /* walk through the blocks in sequence */ |
| 348 | ret = 0; | 342 | ret = 0; |
| 349 | while (*fpos < dir->i_size) { | 343 | while (ctx->pos < dir->i_size) { |
| 350 | blkoff = *fpos & ~(sizeof(union afs_dir_block) - 1); | 344 | blkoff = ctx->pos & ~(sizeof(union afs_dir_block) - 1); |
| 351 | 345 | ||
| 352 | /* fetch the appropriate page from the directory */ | 346 | /* fetch the appropriate page from the directory */ |
| 353 | page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); | 347 | page = afs_dir_get_page(dir, blkoff / PAGE_SIZE, key); |
| @@ -364,8 +358,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
| 364 | do { | 358 | do { |
| 365 | dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / | 359 | dblock = &dbuf->blocks[(blkoff % PAGE_SIZE) / |
| 366 | sizeof(union afs_dir_block)]; | 360 | sizeof(union afs_dir_block)]; |
| 367 | ret = afs_dir_iterate_block(fpos, dblock, blkoff, | 361 | ret = afs_dir_iterate_block(ctx, dblock, blkoff); |
| 368 | cookie, filldir); | ||
| 369 | if (ret != 1) { | 362 | if (ret != 1) { |
| 370 | afs_dir_put_page(page); | 363 | afs_dir_put_page(page); |
| 371 | goto out; | 364 | goto out; |
| @@ -373,7 +366,7 @@ static int afs_dir_iterate(struct inode *dir, unsigned *fpos, void *cookie, | |||
| 373 | 366 | ||
| 374 | blkoff += sizeof(union afs_dir_block); | 367 | blkoff += sizeof(union afs_dir_block); |
| 375 | 368 | ||
| 376 | } while (*fpos < dir->i_size && blkoff < limit); | 369 | } while (ctx->pos < dir->i_size && blkoff < limit); |
| 377 | 370 | ||
| 378 | afs_dir_put_page(page); | 371 | afs_dir_put_page(page); |
| 379 | ret = 0; | 372 | ret = 0; |
| @@ -387,23 +380,10 @@ out: | |||
| 387 | /* | 380 | /* |
| 388 | * read an AFS directory | 381 | * read an AFS directory |
| 389 | */ | 382 | */ |
| 390 | static int afs_readdir(struct file *file, void *cookie, filldir_t filldir) | 383 | static int afs_readdir(struct file *file, struct dir_context *ctx) |
| 391 | { | 384 | { |
| 392 | unsigned fpos; | 385 | return afs_dir_iterate(file_inode(file), |
| 393 | int ret; | 386 | ctx, file->private_data); |
| 394 | |||
| 395 | _enter("{%Ld,{%lu}}", | ||
| 396 | file->f_pos, file_inode(file)->i_ino); | ||
| 397 | |||
| 398 | ASSERT(file->private_data != NULL); | ||
| 399 | |||
| 400 | fpos = file->f_pos; | ||
| 401 | ret = afs_dir_iterate(file_inode(file), &fpos, | ||
| 402 | cookie, filldir, file->private_data); | ||
| 403 | file->f_pos = fpos; | ||
| 404 | |||
| 405 | _leave(" = %d", ret); | ||
| 406 | return ret; | ||
| 407 | } | 387 | } |
| 408 | 388 | ||
| 409 | /* | 389 | /* |
| @@ -416,15 +396,16 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, | |||
| 416 | { | 396 | { |
| 417 | struct afs_lookup_cookie *cookie = _cookie; | 397 | struct afs_lookup_cookie *cookie = _cookie; |
| 418 | 398 | ||
| 419 | _enter("{%s,%Zu},%s,%u,,%llu,%u", | 399 | _enter("{%s,%u},%s,%u,,%llu,%u", |
| 420 | cookie->name, cookie->nlen, name, nlen, | 400 | cookie->name.name, cookie->name.len, name, nlen, |
| 421 | (unsigned long long) ino, dtype); | 401 | (unsigned long long) ino, dtype); |
| 422 | 402 | ||
| 423 | /* insanity checks first */ | 403 | /* insanity checks first */ |
| 424 | BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); | 404 | BUILD_BUG_ON(sizeof(union afs_dir_block) != 2048); |
| 425 | BUILD_BUG_ON(sizeof(union afs_dirent) != 32); | 405 | BUILD_BUG_ON(sizeof(union afs_dirent) != 32); |
| 426 | 406 | ||
| 427 | if (cookie->nlen != nlen || memcmp(cookie->name, name, nlen) != 0) { | 407 | if (cookie->name.len != nlen || |
| 408 | memcmp(cookie->name.name, name, nlen) != 0) { | ||
| 428 | _leave(" = 0 [no]"); | 409 | _leave(" = 0 [no]"); |
| 429 | return 0; | 410 | return 0; |
| 430 | } | 411 | } |
| @@ -444,24 +425,18 @@ static int afs_lookup_filldir(void *_cookie, const char *name, int nlen, | |||
| 444 | static int afs_do_lookup(struct inode *dir, struct dentry *dentry, | 425 | static int afs_do_lookup(struct inode *dir, struct dentry *dentry, |
| 445 | struct afs_fid *fid, struct key *key) | 426 | struct afs_fid *fid, struct key *key) |
| 446 | { | 427 | { |
| 447 | struct afs_lookup_cookie cookie; | 428 | struct afs_super_info *as = dir->i_sb->s_fs_info; |
| 448 | struct afs_super_info *as; | 429 | struct afs_lookup_cookie cookie = { |
| 449 | unsigned fpos; | 430 | .ctx.actor = afs_lookup_filldir, |
| 431 | .name = dentry->d_name, | ||
| 432 | .fid.vid = as->volume->vid | ||
| 433 | }; | ||
| 450 | int ret; | 434 | int ret; |
| 451 | 435 | ||
| 452 | _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name); | 436 | _enter("{%lu},%p{%s},", dir->i_ino, dentry, dentry->d_name.name); |
| 453 | 437 | ||
| 454 | as = dir->i_sb->s_fs_info; | ||
| 455 | |||
| 456 | /* search the directory */ | 438 | /* search the directory */ |
| 457 | cookie.name = dentry->d_name.name; | 439 | ret = afs_dir_iterate(dir, &cookie.ctx, key); |
| 458 | cookie.nlen = dentry->d_name.len; | ||
| 459 | cookie.fid.vid = as->volume->vid; | ||
| 460 | cookie.found = 0; | ||
| 461 | |||
| 462 | fpos = 0; | ||
| 463 | ret = afs_dir_iterate(dir, &fpos, &cookie, afs_lookup_filldir, | ||
| 464 | key); | ||
| 465 | if (ret < 0) { | 440 | if (ret < 0) { |
| 466 | _leave(" = %d [iter]", ret); | 441 | _leave(" = %d [iter]", ret); |
| 467 | return ret; | 442 | return ret; |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 8f6e9234d565..66d50fe2ee45 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
| @@ -19,7 +19,8 @@ | |||
| 19 | #include "internal.h" | 19 | #include "internal.h" |
| 20 | 20 | ||
| 21 | static int afs_readpage(struct file *file, struct page *page); | 21 | static int afs_readpage(struct file *file, struct page *page); |
| 22 | static void afs_invalidatepage(struct page *page, unsigned long offset); | 22 | static void afs_invalidatepage(struct page *page, unsigned int offset, |
| 23 | unsigned int length); | ||
| 23 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); | 24 | static int afs_releasepage(struct page *page, gfp_t gfp_flags); |
| 24 | static int afs_launder_page(struct page *page); | 25 | static int afs_launder_page(struct page *page); |
| 25 | 26 | ||
| @@ -310,16 +311,17 @@ static int afs_launder_page(struct page *page) | |||
| 310 | * - release a page and clean up its private data if offset is 0 (indicating | 311 | * - release a page and clean up its private data if offset is 0 (indicating |
| 311 | * the entire page) | 312 | * the entire page) |
| 312 | */ | 313 | */ |
| 313 | static void afs_invalidatepage(struct page *page, unsigned long offset) | 314 | static void afs_invalidatepage(struct page *page, unsigned int offset, |
| 315 | unsigned int length) | ||
| 314 | { | 316 | { |
| 315 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); | 317 | struct afs_writeback *wb = (struct afs_writeback *) page_private(page); |
| 316 | 318 | ||
| 317 | _enter("{%lu},%lu", page->index, offset); | 319 | _enter("{%lu},%u,%u", page->index, offset, length); |
| 318 | 320 | ||
| 319 | BUG_ON(!PageLocked(page)); | 321 | BUG_ON(!PageLocked(page)); |
| 320 | 322 | ||
| 321 | /* we clean up only if the entire page is being invalidated */ | 323 | /* we clean up only if the entire page is being invalidated */ |
| 322 | if (offset == 0) { | 324 | if (offset == 0 && length == PAGE_CACHE_SIZE) { |
| 323 | #ifdef CONFIG_AFS_FSCACHE | 325 | #ifdef CONFIG_AFS_FSCACHE |
| 324 | if (PageFsCache(page)) { | 326 | if (PageFsCache(page)) { |
| 325 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); | 327 | struct afs_vnode *vnode = AFS_FS_I(page->mapping->host); |
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 085da86e07c2..ca8e55548d98 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c | |||
| @@ -41,7 +41,7 @@ const struct file_operations autofs4_root_operations = { | |||
| 41 | .open = dcache_dir_open, | 41 | .open = dcache_dir_open, |
| 42 | .release = dcache_dir_close, | 42 | .release = dcache_dir_close, |
| 43 | .read = generic_read_dir, | 43 | .read = generic_read_dir, |
| 44 | .readdir = dcache_readdir, | 44 | .iterate = dcache_readdir, |
| 45 | .llseek = dcache_dir_lseek, | 45 | .llseek = dcache_dir_lseek, |
| 46 | .unlocked_ioctl = autofs4_root_ioctl, | 46 | .unlocked_ioctl = autofs4_root_ioctl, |
| 47 | #ifdef CONFIG_COMPAT | 47 | #ifdef CONFIG_COMPAT |
| @@ -53,7 +53,7 @@ const struct file_operations autofs4_dir_operations = { | |||
| 53 | .open = autofs4_dir_open, | 53 | .open = autofs4_dir_open, |
| 54 | .release = dcache_dir_close, | 54 | .release = dcache_dir_close, |
| 55 | .read = generic_read_dir, | 55 | .read = generic_read_dir, |
| 56 | .readdir = dcache_readdir, | 56 | .iterate = dcache_readdir, |
| 57 | .llseek = dcache_dir_lseek, | 57 | .llseek = dcache_dir_lseek, |
| 58 | }; | 58 | }; |
| 59 | 59 | ||
diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 922ad460bff9..7c93953030fb 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c | |||
| @@ -45,7 +45,7 @@ static ssize_t bad_file_aio_write(struct kiocb *iocb, const struct iovec *iov, | |||
| 45 | return -EIO; | 45 | return -EIO; |
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | static int bad_file_readdir(struct file *filp, void *dirent, filldir_t filldir) | 48 | static int bad_file_readdir(struct file *file, struct dir_context *ctx) |
| 49 | { | 49 | { |
| 50 | return -EIO; | 50 | return -EIO; |
| 51 | } | 51 | } |
| @@ -152,7 +152,7 @@ static const struct file_operations bad_file_ops = | |||
| 152 | .write = bad_file_write, | 152 | .write = bad_file_write, |
| 153 | .aio_read = bad_file_aio_read, | 153 | .aio_read = bad_file_aio_read, |
| 154 | .aio_write = bad_file_aio_write, | 154 | .aio_write = bad_file_aio_write, |
| 155 | .readdir = bad_file_readdir, | 155 | .iterate = bad_file_readdir, |
| 156 | .poll = bad_file_poll, | 156 | .poll = bad_file_poll, |
| 157 | .unlocked_ioctl = bad_file_unlocked_ioctl, | 157 | .unlocked_ioctl = bad_file_unlocked_ioctl, |
| 158 | .compat_ioctl = bad_file_compat_ioctl, | 158 | .compat_ioctl = bad_file_compat_ioctl, |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index f95dddced968..e9c75e20db32 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
| @@ -31,7 +31,7 @@ MODULE_LICENSE("GPL"); | |||
| 31 | /* The units the vfs expects inode->i_blocks to be in */ | 31 | /* The units the vfs expects inode->i_blocks to be in */ |
| 32 | #define VFS_BLOCK_SIZE 512 | 32 | #define VFS_BLOCK_SIZE 512 |
| 33 | 33 | ||
| 34 | static int befs_readdir(struct file *, void *, filldir_t); | 34 | static int befs_readdir(struct file *, struct dir_context *); |
| 35 | static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); | 35 | static int befs_get_block(struct inode *, sector_t, struct buffer_head *, int); |
| 36 | static int befs_readpage(struct file *file, struct page *page); | 36 | static int befs_readpage(struct file *file, struct page *page); |
| 37 | static sector_t befs_bmap(struct address_space *mapping, sector_t block); | 37 | static sector_t befs_bmap(struct address_space *mapping, sector_t block); |
| @@ -66,7 +66,7 @@ static struct kmem_cache *befs_inode_cachep; | |||
| 66 | 66 | ||
| 67 | static const struct file_operations befs_dir_operations = { | 67 | static const struct file_operations befs_dir_operations = { |
| 68 | .read = generic_read_dir, | 68 | .read = generic_read_dir, |
| 69 | .readdir = befs_readdir, | 69 | .iterate = befs_readdir, |
| 70 | .llseek = generic_file_llseek, | 70 | .llseek = generic_file_llseek, |
| 71 | }; | 71 | }; |
| 72 | 72 | ||
| @@ -211,9 +211,9 @@ befs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
| 211 | } | 211 | } |
| 212 | 212 | ||
| 213 | static int | 213 | static int |
| 214 | befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 214 | befs_readdir(struct file *file, struct dir_context *ctx) |
| 215 | { | 215 | { |
| 216 | struct inode *inode = file_inode(filp); | 216 | struct inode *inode = file_inode(file); |
| 217 | struct super_block *sb = inode->i_sb; | 217 | struct super_block *sb = inode->i_sb; |
| 218 | befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; | 218 | befs_data_stream *ds = &BEFS_I(inode)->i_data.ds; |
| 219 | befs_off_t value; | 219 | befs_off_t value; |
| @@ -221,15 +221,14 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 221 | size_t keysize; | 221 | size_t keysize; |
| 222 | unsigned char d_type; | 222 | unsigned char d_type; |
| 223 | char keybuf[BEFS_NAME_LEN + 1]; | 223 | char keybuf[BEFS_NAME_LEN + 1]; |
| 224 | char *nlsname; | 224 | const char *dirname = file->f_path.dentry->d_name.name; |
| 225 | int nlsnamelen; | ||
| 226 | const char *dirname = filp->f_path.dentry->d_name.name; | ||
| 227 | 225 | ||
| 228 | befs_debug(sb, "---> befs_readdir() " | 226 | befs_debug(sb, "---> befs_readdir() " |
| 229 | "name %s, inode %ld, filp->f_pos %Ld", | 227 | "name %s, inode %ld, ctx->pos %Ld", |
| 230 | dirname, inode->i_ino, filp->f_pos); | 228 | dirname, inode->i_ino, ctx->pos); |
| 231 | 229 | ||
| 232 | result = befs_btree_read(sb, ds, filp->f_pos, BEFS_NAME_LEN + 1, | 230 | more: |
| 231 | result = befs_btree_read(sb, ds, ctx->pos, BEFS_NAME_LEN + 1, | ||
| 233 | keybuf, &keysize, &value); | 232 | keybuf, &keysize, &value); |
| 234 | 233 | ||
| 235 | if (result == BEFS_ERR) { | 234 | if (result == BEFS_ERR) { |
| @@ -251,24 +250,29 @@ befs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 251 | 250 | ||
| 252 | /* Convert to NLS */ | 251 | /* Convert to NLS */ |
| 253 | if (BEFS_SB(sb)->nls) { | 252 | if (BEFS_SB(sb)->nls) { |
| 253 | char *nlsname; | ||
| 254 | int nlsnamelen; | ||
| 254 | result = | 255 | result = |
| 255 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); | 256 | befs_utf2nls(sb, keybuf, keysize, &nlsname, &nlsnamelen); |
| 256 | if (result < 0) { | 257 | if (result < 0) { |
| 257 | befs_debug(sb, "<--- befs_readdir() ERROR"); | 258 | befs_debug(sb, "<--- befs_readdir() ERROR"); |
| 258 | return result; | 259 | return result; |
| 259 | } | 260 | } |
| 260 | result = filldir(dirent, nlsname, nlsnamelen, filp->f_pos, | 261 | if (!dir_emit(ctx, nlsname, nlsnamelen, |
| 261 | (ino_t) value, d_type); | 262 | (ino_t) value, d_type)) { |
| 263 | kfree(nlsname); | ||
| 264 | return 0; | ||
| 265 | } | ||
| 262 | kfree(nlsname); | 266 | kfree(nlsname); |
| 263 | |||
| 264 | } else { | 267 | } else { |
| 265 | result = filldir(dirent, keybuf, keysize, filp->f_pos, | 268 | if (!dir_emit(ctx, keybuf, keysize, |
| 266 | (ino_t) value, d_type); | 269 | (ino_t) value, d_type)) |
| 270 | return 0; | ||
| 267 | } | 271 | } |
| 268 | if (!result) | 272 | ctx->pos++; |
| 269 | filp->f_pos++; | 273 | goto more; |
| 270 | 274 | ||
| 271 | befs_debug(sb, "<--- befs_readdir() filp->f_pos %Ld", filp->f_pos); | 275 | befs_debug(sb, "<--- befs_readdir() pos %Ld", ctx->pos); |
| 272 | 276 | ||
| 273 | return 0; | 277 | return 0; |
| 274 | } | 278 | } |
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 3f422f6bb5ca..a399e6d9dc74 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c | |||
| @@ -26,58 +26,51 @@ static struct buffer_head *bfs_find_entry(struct inode *dir, | |||
| 26 | const unsigned char *name, int namelen, | 26 | const unsigned char *name, int namelen, |
| 27 | struct bfs_dirent **res_dir); | 27 | struct bfs_dirent **res_dir); |
| 28 | 28 | ||
| 29 | static int bfs_readdir(struct file *f, void *dirent, filldir_t filldir) | 29 | static int bfs_readdir(struct file *f, struct dir_context *ctx) |
| 30 | { | 30 | { |
| 31 | struct inode *dir = file_inode(f); | 31 | struct inode *dir = file_inode(f); |
| 32 | struct buffer_head *bh; | 32 | struct buffer_head *bh; |
| 33 | struct bfs_dirent *de; | 33 | struct bfs_dirent *de; |
| 34 | struct bfs_sb_info *info = BFS_SB(dir->i_sb); | ||
| 35 | unsigned int offset; | 34 | unsigned int offset; |
| 36 | int block; | 35 | int block; |
| 37 | 36 | ||
| 38 | mutex_lock(&info->bfs_lock); | 37 | if (ctx->pos & (BFS_DIRENT_SIZE - 1)) { |
| 39 | |||
| 40 | if (f->f_pos & (BFS_DIRENT_SIZE - 1)) { | ||
| 41 | printf("Bad f_pos=%08lx for %s:%08lx\n", | 38 | printf("Bad f_pos=%08lx for %s:%08lx\n", |
| 42 | (unsigned long)f->f_pos, | 39 | (unsigned long)ctx->pos, |
| 43 | dir->i_sb->s_id, dir->i_ino); | 40 | dir->i_sb->s_id, dir->i_ino); |
| 44 | mutex_unlock(&info->bfs_lock); | 41 | return -EINVAL; |
| 45 | return -EBADF; | ||
| 46 | } | 42 | } |
| 47 | 43 | ||
| 48 | while (f->f_pos < dir->i_size) { | 44 | while (ctx->pos < dir->i_size) { |
| 49 | offset = f->f_pos & (BFS_BSIZE - 1); | 45 | offset = ctx->pos & (BFS_BSIZE - 1); |
| 50 | block = BFS_I(dir)->i_sblock + (f->f_pos >> BFS_BSIZE_BITS); | 46 | block = BFS_I(dir)->i_sblock + (ctx->pos >> BFS_BSIZE_BITS); |
| 51 | bh = sb_bread(dir->i_sb, block); | 47 | bh = sb_bread(dir->i_sb, block); |
| 52 | if (!bh) { | 48 | if (!bh) { |
| 53 | f->f_pos += BFS_BSIZE - offset; | 49 | ctx->pos += BFS_BSIZE - offset; |
| 54 | continue; | 50 | continue; |
| 55 | } | 51 | } |
| 56 | do { | 52 | do { |
| 57 | de = (struct bfs_dirent *)(bh->b_data + offset); | 53 | de = (struct bfs_dirent *)(bh->b_data + offset); |
| 58 | if (de->ino) { | 54 | if (de->ino) { |
| 59 | int size = strnlen(de->name, BFS_NAMELEN); | 55 | int size = strnlen(de->name, BFS_NAMELEN); |
| 60 | if (filldir(dirent, de->name, size, f->f_pos, | 56 | if (!dir_emit(ctx, de->name, size, |
| 61 | le16_to_cpu(de->ino), | 57 | le16_to_cpu(de->ino), |
| 62 | DT_UNKNOWN) < 0) { | 58 | DT_UNKNOWN)) { |
| 63 | brelse(bh); | 59 | brelse(bh); |
| 64 | mutex_unlock(&info->bfs_lock); | ||
| 65 | return 0; | 60 | return 0; |
| 66 | } | 61 | } |
| 67 | } | 62 | } |
| 68 | offset += BFS_DIRENT_SIZE; | 63 | offset += BFS_DIRENT_SIZE; |
| 69 | f->f_pos += BFS_DIRENT_SIZE; | 64 | ctx->pos += BFS_DIRENT_SIZE; |
| 70 | } while ((offset < BFS_BSIZE) && (f->f_pos < dir->i_size)); | 65 | } while ((offset < BFS_BSIZE) && (ctx->pos < dir->i_size)); |
| 71 | brelse(bh); | 66 | brelse(bh); |
| 72 | } | 67 | } |
| 73 | 68 | return 0; | |
| 74 | mutex_unlock(&info->bfs_lock); | ||
| 75 | return 0; | ||
| 76 | } | 69 | } |
| 77 | 70 | ||
| 78 | const struct file_operations bfs_dir_operations = { | 71 | const struct file_operations bfs_dir_operations = { |
| 79 | .read = generic_read_dir, | 72 | .read = generic_read_dir, |
| 80 | .readdir = bfs_readdir, | 73 | .iterate = bfs_readdir, |
| 81 | .fsync = generic_file_fsync, | 74 | .fsync = generic_file_fsync, |
| 82 | .llseek = generic_file_llseek, | 75 | .llseek = generic_file_llseek, |
| 83 | }; | 76 | }; |
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index f26f38ccd194..eb34438ddedb 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c | |||
| @@ -1681,8 +1681,7 @@ int btrfs_should_delete_dir_index(struct list_head *del_list, | |||
| 1681 | * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree | 1681 | * btrfs_readdir_delayed_dir_index - read dir info stored in the delayed tree |
| 1682 | * | 1682 | * |
| 1683 | */ | 1683 | */ |
| 1684 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | 1684 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
| 1685 | filldir_t filldir, | ||
| 1686 | struct list_head *ins_list) | 1685 | struct list_head *ins_list) |
| 1687 | { | 1686 | { |
| 1688 | struct btrfs_dir_item *di; | 1687 | struct btrfs_dir_item *di; |
| @@ -1704,13 +1703,13 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | |||
| 1704 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { | 1703 | list_for_each_entry_safe(curr, next, ins_list, readdir_list) { |
| 1705 | list_del(&curr->readdir_list); | 1704 | list_del(&curr->readdir_list); |
| 1706 | 1705 | ||
| 1707 | if (curr->key.offset < filp->f_pos) { | 1706 | if (curr->key.offset < ctx->pos) { |
| 1708 | if (atomic_dec_and_test(&curr->refs)) | 1707 | if (atomic_dec_and_test(&curr->refs)) |
| 1709 | kfree(curr); | 1708 | kfree(curr); |
| 1710 | continue; | 1709 | continue; |
| 1711 | } | 1710 | } |
| 1712 | 1711 | ||
| 1713 | filp->f_pos = curr->key.offset; | 1712 | ctx->pos = curr->key.offset; |
| 1714 | 1713 | ||
| 1715 | di = (struct btrfs_dir_item *)curr->data; | 1714 | di = (struct btrfs_dir_item *)curr->data; |
| 1716 | name = (char *)(di + 1); | 1715 | name = (char *)(di + 1); |
| @@ -1719,7 +1718,7 @@ int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | |||
| 1719 | d_type = btrfs_filetype_table[di->type]; | 1718 | d_type = btrfs_filetype_table[di->type]; |
| 1720 | btrfs_disk_key_to_cpu(&location, &di->location); | 1719 | btrfs_disk_key_to_cpu(&location, &di->location); |
| 1721 | 1720 | ||
| 1722 | over = filldir(dirent, name, name_len, curr->key.offset, | 1721 | over = !dir_emit(ctx, name, name_len, |
| 1723 | location.objectid, d_type); | 1722 | location.objectid, d_type); |
| 1724 | 1723 | ||
| 1725 | if (atomic_dec_and_test(&curr->refs)) | 1724 | if (atomic_dec_and_test(&curr->refs)) |
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 1d5c5f7abe3e..a4b38f934d14 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h | |||
| @@ -139,8 +139,7 @@ void btrfs_put_delayed_items(struct list_head *ins_list, | |||
| 139 | struct list_head *del_list); | 139 | struct list_head *del_list); |
| 140 | int btrfs_should_delete_dir_index(struct list_head *del_list, | 140 | int btrfs_should_delete_dir_index(struct list_head *del_list, |
| 141 | u64 index); | 141 | u64 index); |
| 142 | int btrfs_readdir_delayed_dir_index(struct file *filp, void *dirent, | 142 | int btrfs_readdir_delayed_dir_index(struct dir_context *ctx, |
| 143 | filldir_t filldir, | ||
| 144 | struct list_head *ins_list); | 143 | struct list_head *ins_list); |
| 145 | 144 | ||
| 146 | /* for init */ | 145 | /* for init */ |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8b60b660c8f..b0292b3ead54 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -1013,7 +1013,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) | |||
| 1013 | return try_release_extent_buffer(page); | 1013 | return try_release_extent_buffer(page); |
| 1014 | } | 1014 | } |
| 1015 | 1015 | ||
| 1016 | static void btree_invalidatepage(struct page *page, unsigned long offset) | 1016 | static void btree_invalidatepage(struct page *page, unsigned int offset, |
| 1017 | unsigned int length) | ||
| 1017 | { | 1018 | { |
| 1018 | struct extent_io_tree *tree; | 1019 | struct extent_io_tree *tree; |
| 1019 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 1020 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e7e7afb4a872..6bca9472f313 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -2957,7 +2957,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, | |||
| 2957 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); | 2957 | pg_offset = i_size & (PAGE_CACHE_SIZE - 1); |
| 2958 | if (page->index > end_index || | 2958 | if (page->index > end_index || |
| 2959 | (page->index == end_index && !pg_offset)) { | 2959 | (page->index == end_index && !pg_offset)) { |
| 2960 | page->mapping->a_ops->invalidatepage(page, 0); | 2960 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 2961 | unlock_page(page); | 2961 | unlock_page(page); |
| 2962 | return 0; | 2962 | return 0; |
| 2963 | } | 2963 | } |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17f3064b4a3e..4f9d16b70d3d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -5137,10 +5137,9 @@ unsigned char btrfs_filetype_table[] = { | |||
| 5137 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 5137 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
| 5138 | }; | 5138 | }; |
| 5139 | 5139 | ||
| 5140 | static int btrfs_real_readdir(struct file *filp, void *dirent, | 5140 | static int btrfs_real_readdir(struct file *file, struct dir_context *ctx) |
| 5141 | filldir_t filldir) | ||
| 5142 | { | 5141 | { |
| 5143 | struct inode *inode = file_inode(filp); | 5142 | struct inode *inode = file_inode(file); |
| 5144 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5143 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5145 | struct btrfs_item *item; | 5144 | struct btrfs_item *item; |
| 5146 | struct btrfs_dir_item *di; | 5145 | struct btrfs_dir_item *di; |
| @@ -5161,29 +5160,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
| 5161 | char tmp_name[32]; | 5160 | char tmp_name[32]; |
| 5162 | char *name_ptr; | 5161 | char *name_ptr; |
| 5163 | int name_len; | 5162 | int name_len; |
| 5164 | int is_curr = 0; /* filp->f_pos points to the current index? */ | 5163 | int is_curr = 0; /* ctx->pos points to the current index? */ |
| 5165 | 5164 | ||
| 5166 | /* FIXME, use a real flag for deciding about the key type */ | 5165 | /* FIXME, use a real flag for deciding about the key type */ |
| 5167 | if (root->fs_info->tree_root == root) | 5166 | if (root->fs_info->tree_root == root) |
| 5168 | key_type = BTRFS_DIR_ITEM_KEY; | 5167 | key_type = BTRFS_DIR_ITEM_KEY; |
| 5169 | 5168 | ||
| 5170 | /* special case for "." */ | 5169 | if (!dir_emit_dots(file, ctx)) |
| 5171 | if (filp->f_pos == 0) { | 5170 | return 0; |
| 5172 | over = filldir(dirent, ".", 1, | 5171 | |
| 5173 | filp->f_pos, btrfs_ino(inode), DT_DIR); | ||
| 5174 | if (over) | ||
| 5175 | return 0; | ||
| 5176 | filp->f_pos = 1; | ||
| 5177 | } | ||
| 5178 | /* special case for .., just use the back ref */ | ||
| 5179 | if (filp->f_pos == 1) { | ||
| 5180 | u64 pino = parent_ino(filp->f_path.dentry); | ||
| 5181 | over = filldir(dirent, "..", 2, | ||
| 5182 | filp->f_pos, pino, DT_DIR); | ||
| 5183 | if (over) | ||
| 5184 | return 0; | ||
| 5185 | filp->f_pos = 2; | ||
| 5186 | } | ||
| 5187 | path = btrfs_alloc_path(); | 5172 | path = btrfs_alloc_path(); |
| 5188 | if (!path) | 5173 | if (!path) |
| 5189 | return -ENOMEM; | 5174 | return -ENOMEM; |
| @@ -5197,7 +5182,7 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
| 5197 | } | 5182 | } |
| 5198 | 5183 | ||
| 5199 | btrfs_set_key_type(&key, key_type); | 5184 | btrfs_set_key_type(&key, key_type); |
| 5200 | key.offset = filp->f_pos; | 5185 | key.offset = ctx->pos; |
| 5201 | key.objectid = btrfs_ino(inode); | 5186 | key.objectid = btrfs_ino(inode); |
| 5202 | 5187 | ||
| 5203 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5188 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| @@ -5223,14 +5208,14 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
| 5223 | break; | 5208 | break; |
| 5224 | if (btrfs_key_type(&found_key) != key_type) | 5209 | if (btrfs_key_type(&found_key) != key_type) |
| 5225 | break; | 5210 | break; |
| 5226 | if (found_key.offset < filp->f_pos) | 5211 | if (found_key.offset < ctx->pos) |
| 5227 | goto next; | 5212 | goto next; |
| 5228 | if (key_type == BTRFS_DIR_INDEX_KEY && | 5213 | if (key_type == BTRFS_DIR_INDEX_KEY && |
| 5229 | btrfs_should_delete_dir_index(&del_list, | 5214 | btrfs_should_delete_dir_index(&del_list, |
| 5230 | found_key.offset)) | 5215 | found_key.offset)) |
| 5231 | goto next; | 5216 | goto next; |
| 5232 | 5217 | ||
| 5233 | filp->f_pos = found_key.offset; | 5218 | ctx->pos = found_key.offset; |
| 5234 | is_curr = 1; | 5219 | is_curr = 1; |
| 5235 | 5220 | ||
| 5236 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); | 5221 | di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item); |
| @@ -5274,9 +5259,8 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, | |||
| 5274 | over = 0; | 5259 | over = 0; |
| 5275 | goto skip; | 5260 | goto skip; |
| 5276 | } | 5261 | } |
| 5277 | over = filldir(dirent, name_ptr, name_len, | 5262 | over = !dir_emit(ctx, name_ptr, name_len, |
| 5278 | found_key.offset, location.objectid, | 5263 | location.objectid, d_type); |
| 5279 | d_type); | ||
| 5280 | 5264 | ||
| 5281 | skip: | 5265 | skip: |
| 5282 | if (name_ptr != tmp_name) | 5266 | if (name_ptr != tmp_name) |
| @@ -5295,9 +5279,8 @@ next: | |||
| 5295 | 5279 | ||
| 5296 | if (key_type == BTRFS_DIR_INDEX_KEY) { | 5280 | if (key_type == BTRFS_DIR_INDEX_KEY) { |
| 5297 | if (is_curr) | 5281 | if (is_curr) |
| 5298 | filp->f_pos++; | 5282 | ctx->pos++; |
| 5299 | ret = btrfs_readdir_delayed_dir_index(filp, dirent, filldir, | 5283 | ret = btrfs_readdir_delayed_dir_index(ctx, &ins_list); |
| 5300 | &ins_list); | ||
| 5301 | if (ret) | 5284 | if (ret) |
| 5302 | goto nopos; | 5285 | goto nopos; |
| 5303 | } | 5286 | } |
| @@ -5308,9 +5291,9 @@ next: | |||
| 5308 | * 32-bit glibc will use getdents64, but then strtol - | 5291 | * 32-bit glibc will use getdents64, but then strtol - |
| 5309 | * so the last number we can serve is this. | 5292 | * so the last number we can serve is this. |
| 5310 | */ | 5293 | */ |
| 5311 | filp->f_pos = 0x7fffffff; | 5294 | ctx->pos = 0x7fffffff; |
| 5312 | else | 5295 | else |
| 5313 | filp->f_pos++; | 5296 | ctx->pos++; |
| 5314 | nopos: | 5297 | nopos: |
| 5315 | ret = 0; | 5298 | ret = 0; |
| 5316 | err: | 5299 | err: |
| @@ -7510,7 +7493,8 @@ static int btrfs_releasepage(struct page *page, gfp_t gfp_flags) | |||
| 7510 | return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); | 7493 | return __btrfs_releasepage(page, gfp_flags & GFP_NOFS); |
| 7511 | } | 7494 | } |
| 7512 | 7495 | ||
| 7513 | static void btrfs_invalidatepage(struct page *page, unsigned long offset) | 7496 | static void btrfs_invalidatepage(struct page *page, unsigned int offset, |
| 7497 | unsigned int length) | ||
| 7514 | { | 7498 | { |
| 7515 | struct inode *inode = page->mapping->host; | 7499 | struct inode *inode = page->mapping->host; |
| 7516 | struct extent_io_tree *tree; | 7500 | struct extent_io_tree *tree; |
| @@ -8731,7 +8715,7 @@ static const struct inode_operations btrfs_dir_ro_inode_operations = { | |||
| 8731 | static const struct file_operations btrfs_dir_file_operations = { | 8715 | static const struct file_operations btrfs_dir_file_operations = { |
| 8732 | .llseek = generic_file_llseek, | 8716 | .llseek = generic_file_llseek, |
| 8733 | .read = generic_read_dir, | 8717 | .read = generic_read_dir, |
| 8734 | .readdir = btrfs_real_readdir, | 8718 | .iterate = btrfs_real_readdir, |
| 8735 | .unlocked_ioctl = btrfs_ioctl, | 8719 | .unlocked_ioctl = btrfs_ioctl, |
| 8736 | #ifdef CONFIG_COMPAT | 8720 | #ifdef CONFIG_COMPAT |
| 8737 | .compat_ioctl = btrfs_ioctl, | 8721 | .compat_ioctl = btrfs_ioctl, |
diff --git a/fs/buffer.c b/fs/buffer.c index d2a4d1bb2d57..f93392e2df12 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -1454,7 +1454,8 @@ static void discard_buffer(struct buffer_head * bh) | |||
| 1454 | * block_invalidatepage - invalidate part or all of a buffer-backed page | 1454 | * block_invalidatepage - invalidate part or all of a buffer-backed page |
| 1455 | * | 1455 | * |
| 1456 | * @page: the page which is affected | 1456 | * @page: the page which is affected |
| 1457 | * @offset: the index of the truncation point | 1457 | * @offset: start of the range to invalidate |
| 1458 | * @length: length of the range to invalidate | ||
| 1458 | * | 1459 | * |
| 1459 | * block_invalidatepage() is called when all or part of the page has become | 1460 | * block_invalidatepage() is called when all or part of the page has become |
| 1460 | * invalidated by a truncate operation. | 1461 | * invalidated by a truncate operation. |
| @@ -1465,15 +1466,22 @@ static void discard_buffer(struct buffer_head * bh) | |||
| 1465 | * point. Because the caller is about to free (and possibly reuse) those | 1466 | * point. Because the caller is about to free (and possibly reuse) those |
| 1466 | * blocks on-disk. | 1467 | * blocks on-disk. |
| 1467 | */ | 1468 | */ |
| 1468 | void block_invalidatepage(struct page *page, unsigned long offset) | 1469 | void block_invalidatepage(struct page *page, unsigned int offset, |
| 1470 | unsigned int length) | ||
| 1469 | { | 1471 | { |
| 1470 | struct buffer_head *head, *bh, *next; | 1472 | struct buffer_head *head, *bh, *next; |
| 1471 | unsigned int curr_off = 0; | 1473 | unsigned int curr_off = 0; |
| 1474 | unsigned int stop = length + offset; | ||
| 1472 | 1475 | ||
| 1473 | BUG_ON(!PageLocked(page)); | 1476 | BUG_ON(!PageLocked(page)); |
| 1474 | if (!page_has_buffers(page)) | 1477 | if (!page_has_buffers(page)) |
| 1475 | goto out; | 1478 | goto out; |
| 1476 | 1479 | ||
| 1480 | /* | ||
| 1481 | * Check for overflow | ||
| 1482 | */ | ||
| 1483 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
| 1484 | |||
| 1477 | head = page_buffers(page); | 1485 | head = page_buffers(page); |
| 1478 | bh = head; | 1486 | bh = head; |
| 1479 | do { | 1487 | do { |
| @@ -1481,6 +1489,12 @@ void block_invalidatepage(struct page *page, unsigned long offset) | |||
| 1481 | next = bh->b_this_page; | 1489 | next = bh->b_this_page; |
| 1482 | 1490 | ||
| 1483 | /* | 1491 | /* |
| 1492 | * Are we still fully in range ? | ||
| 1493 | */ | ||
| 1494 | if (next_off > stop) | ||
| 1495 | goto out; | ||
| 1496 | |||
| 1497 | /* | ||
| 1484 | * is this block fully invalidated? | 1498 | * is this block fully invalidated? |
| 1485 | */ | 1499 | */ |
| 1486 | if (offset <= curr_off) | 1500 | if (offset <= curr_off) |
| @@ -1501,6 +1515,7 @@ out: | |||
| 1501 | } | 1515 | } |
| 1502 | EXPORT_SYMBOL(block_invalidatepage); | 1516 | EXPORT_SYMBOL(block_invalidatepage); |
| 1503 | 1517 | ||
| 1518 | |||
| 1504 | /* | 1519 | /* |
| 1505 | * We attach and possibly dirty the buffers atomically wrt | 1520 | * We attach and possibly dirty the buffers atomically wrt |
| 1506 | * __set_page_dirty_buffers() via private_lock. try_to_free_buffers | 1521 | * __set_page_dirty_buffers() via private_lock. try_to_free_buffers |
| @@ -2841,7 +2856,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, | |||
| 2841 | * they may have been added in ext3_writepage(). Make them | 2856 | * they may have been added in ext3_writepage(). Make them |
| 2842 | * freeable here, so the page does not leak. | 2857 | * freeable here, so the page does not leak. |
| 2843 | */ | 2858 | */ |
| 2844 | do_invalidatepage(page, 0); | 2859 | do_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 2845 | unlock_page(page); | 2860 | unlock_page(page); |
| 2846 | return 0; /* don't care */ | 2861 | return 0; /* don't care */ |
| 2847 | } | 2862 | } |
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 746ce532e130..d4c1206af9fc 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c | |||
| @@ -13,8 +13,6 @@ | |||
| 13 | #include <linux/mount.h> | 13 | #include <linux/mount.h> |
| 14 | #include "internal.h" | 14 | #include "internal.h" |
| 15 | 15 | ||
| 16 | #define list_to_page(head) (list_entry((head)->prev, struct page, lru)) | ||
| 17 | |||
| 18 | struct cachefiles_lookup_data { | 16 | struct cachefiles_lookup_data { |
| 19 | struct cachefiles_xattr *auxdata; /* auxiliary data */ | 17 | struct cachefiles_xattr *auxdata; /* auxiliary data */ |
| 20 | char *key; /* key path */ | 18 | char *key; /* key path */ |
| @@ -212,20 +210,29 @@ static void cachefiles_update_object(struct fscache_object *_object) | |||
| 212 | object = container_of(_object, struct cachefiles_object, fscache); | 210 | object = container_of(_object, struct cachefiles_object, fscache); |
| 213 | cache = container_of(object->fscache.cache, struct cachefiles_cache, | 211 | cache = container_of(object->fscache.cache, struct cachefiles_cache, |
| 214 | cache); | 212 | cache); |
| 213 | |||
| 214 | if (!fscache_use_cookie(_object)) { | ||
| 215 | _leave(" [relinq]"); | ||
| 216 | return; | ||
| 217 | } | ||
| 218 | |||
| 215 | cookie = object->fscache.cookie; | 219 | cookie = object->fscache.cookie; |
| 216 | 220 | ||
| 217 | if (!cookie->def->get_aux) { | 221 | if (!cookie->def->get_aux) { |
| 222 | fscache_unuse_cookie(_object); | ||
| 218 | _leave(" [no aux]"); | 223 | _leave(" [no aux]"); |
| 219 | return; | 224 | return; |
| 220 | } | 225 | } |
| 221 | 226 | ||
| 222 | auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); | 227 | auxdata = kmalloc(2 + 512 + 3, cachefiles_gfp); |
| 223 | if (!auxdata) { | 228 | if (!auxdata) { |
| 229 | fscache_unuse_cookie(_object); | ||
| 224 | _leave(" [nomem]"); | 230 | _leave(" [nomem]"); |
| 225 | return; | 231 | return; |
| 226 | } | 232 | } |
| 227 | 233 | ||
| 228 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); | 234 | auxlen = cookie->def->get_aux(cookie->netfs_data, auxdata->data, 511); |
| 235 | fscache_unuse_cookie(_object); | ||
| 229 | ASSERTCMP(auxlen, <, 511); | 236 | ASSERTCMP(auxlen, <, 511); |
| 230 | 237 | ||
| 231 | auxdata->len = auxlen + 1; | 238 | auxdata->len = auxlen + 1; |
| @@ -263,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object) | |||
| 263 | #endif | 270 | #endif |
| 264 | 271 | ||
| 265 | /* delete retired objects */ | 272 | /* delete retired objects */ |
| 266 | if (object->fscache.state == FSCACHE_OBJECT_RECYCLING && | 273 | if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) && |
| 267 | _object != cache->cache.fsdef | 274 | _object != cache->cache.fsdef |
| 268 | ) { | 275 | ) { |
| 269 | _debug("- retire object OBJ%x", object->fscache.debug_id); | 276 | _debug("- retire object OBJ%x", object->fscache.debug_id); |
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c index 8c01c5fcdf75..25badd1aec5c 100644 --- a/fs/cachefiles/namei.c +++ b/fs/cachefiles/namei.c | |||
| @@ -38,7 +38,7 @@ void __cachefiles_printk_object(struct cachefiles_object *object, | |||
| 38 | printk(KERN_ERR "%sobject: OBJ%x\n", | 38 | printk(KERN_ERR "%sobject: OBJ%x\n", |
| 39 | prefix, object->fscache.debug_id); | 39 | prefix, object->fscache.debug_id); |
| 40 | printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", | 40 | printk(KERN_ERR "%sobjstate=%s fl=%lx wbusy=%x ev=%lx[%lx]\n", |
| 41 | prefix, fscache_object_states[object->fscache.state], | 41 | prefix, object->fscache.state->name, |
| 42 | object->fscache.flags, work_busy(&object->fscache.work), | 42 | object->fscache.flags, work_busy(&object->fscache.work), |
| 43 | object->fscache.events, object->fscache.event_mask); | 43 | object->fscache.events, object->fscache.event_mask); |
| 44 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", | 44 | printk(KERN_ERR "%sops=%u inp=%u exc=%u\n", |
| @@ -127,10 +127,10 @@ static void cachefiles_mark_object_buried(struct cachefiles_cache *cache, | |||
| 127 | found_dentry: | 127 | found_dentry: |
| 128 | kdebug("preemptive burial: OBJ%x [%s] %p", | 128 | kdebug("preemptive burial: OBJ%x [%s] %p", |
| 129 | object->fscache.debug_id, | 129 | object->fscache.debug_id, |
| 130 | fscache_object_states[object->fscache.state], | 130 | object->fscache.state->name, |
| 131 | dentry); | 131 | dentry); |
| 132 | 132 | ||
| 133 | if (object->fscache.state < FSCACHE_OBJECT_DYING) { | 133 | if (fscache_object_is_live(&object->fscache)) { |
| 134 | printk(KERN_ERR "\n"); | 134 | printk(KERN_ERR "\n"); |
| 135 | printk(KERN_ERR "CacheFiles: Error:" | 135 | printk(KERN_ERR "CacheFiles: Error:" |
| 136 | " Can't preemptively bury live object\n"); | 136 | " Can't preemptively bury live object\n"); |
| @@ -192,7 +192,7 @@ try_again: | |||
| 192 | /* an old object from a previous incarnation is hogging the slot - we | 192 | /* an old object from a previous incarnation is hogging the slot - we |
| 193 | * need to wait for it to be destroyed */ | 193 | * need to wait for it to be destroyed */ |
| 194 | wait_for_old_object: | 194 | wait_for_old_object: |
| 195 | if (xobject->fscache.state < FSCACHE_OBJECT_DYING) { | 195 | if (fscache_object_is_live(&object->fscache)) { |
| 196 | printk(KERN_ERR "\n"); | 196 | printk(KERN_ERR "\n"); |
| 197 | printk(KERN_ERR "CacheFiles: Error:" | 197 | printk(KERN_ERR "CacheFiles: Error:" |
| 198 | " Unexpected object collision\n"); | 198 | " Unexpected object collision\n"); |
| @@ -836,7 +836,7 @@ static struct dentry *cachefiles_check_active(struct cachefiles_cache *cache, | |||
| 836 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); | 836 | // dir->d_name.len, dir->d_name.len, dir->d_name.name, filename); |
| 837 | 837 | ||
| 838 | /* look up the victim */ | 838 | /* look up the victim */ |
| 839 | mutex_lock_nested(&dir->d_inode->i_mutex, 1); | 839 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
| 840 | 840 | ||
| 841 | start = jiffies; | 841 | start = jiffies; |
| 842 | victim = lookup_one_len(filename, dir, strlen(filename)); | 842 | victim = lookup_one_len(filename, dir, strlen(filename)); |
diff --git a/fs/cachefiles/xattr.c b/fs/cachefiles/xattr.c index 73b46288b54b..2476e5162609 100644 --- a/fs/cachefiles/xattr.c +++ b/fs/cachefiles/xattr.c | |||
| @@ -109,13 +109,12 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object, | |||
| 109 | struct dentry *dentry = object->dentry; | 109 | struct dentry *dentry = object->dentry; |
| 110 | int ret; | 110 | int ret; |
| 111 | 111 | ||
| 112 | ASSERT(object->fscache.cookie); | ||
| 113 | ASSERT(dentry); | 112 | ASSERT(dentry); |
| 114 | 113 | ||
| 115 | _enter("%p,#%d", object, auxdata->len); | 114 | _enter("%p,#%d", object, auxdata->len); |
| 116 | 115 | ||
| 117 | /* attempt to install the cache metadata directly */ | 116 | /* attempt to install the cache metadata directly */ |
| 118 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | 117 | _debug("SET #%u", auxdata->len); |
| 119 | 118 | ||
| 120 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | 119 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, |
| 121 | &auxdata->type, auxdata->len, | 120 | &auxdata->type, auxdata->len, |
| @@ -138,13 +137,12 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object, | |||
| 138 | struct dentry *dentry = object->dentry; | 137 | struct dentry *dentry = object->dentry; |
| 139 | int ret; | 138 | int ret; |
| 140 | 139 | ||
| 141 | ASSERT(object->fscache.cookie); | ||
| 142 | ASSERT(dentry); | 140 | ASSERT(dentry); |
| 143 | 141 | ||
| 144 | _enter("%p,#%d", object, auxdata->len); | 142 | _enter("%p,#%d", object, auxdata->len); |
| 145 | 143 | ||
| 146 | /* attempt to install the cache metadata directly */ | 144 | /* attempt to install the cache metadata directly */ |
| 147 | _debug("SET %s #%u", object->fscache.cookie->def->name, auxdata->len); | 145 | _debug("SET #%u", auxdata->len); |
| 148 | 146 | ||
| 149 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, | 147 | ret = vfs_setxattr(dentry, cachefiles_xattr_cache, |
| 150 | &auxdata->type, auxdata->len, | 148 | &auxdata->type, auxdata->len, |
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3e68ac101040..38b5c1bc6776 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c | |||
| @@ -143,7 +143,8 @@ static int ceph_set_page_dirty(struct page *page) | |||
| 143 | * dirty page counters appropriately. Only called if there is private | 143 | * dirty page counters appropriately. Only called if there is private |
| 144 | * data on the page. | 144 | * data on the page. |
| 145 | */ | 145 | */ |
| 146 | static void ceph_invalidatepage(struct page *page, unsigned long offset) | 146 | static void ceph_invalidatepage(struct page *page, unsigned int offset, |
| 147 | unsigned int length) | ||
| 147 | { | 148 | { |
| 148 | struct inode *inode; | 149 | struct inode *inode; |
| 149 | struct ceph_inode_info *ci; | 150 | struct ceph_inode_info *ci; |
| @@ -163,20 +164,20 @@ static void ceph_invalidatepage(struct page *page, unsigned long offset) | |||
| 163 | if (!PageDirty(page)) | 164 | if (!PageDirty(page)) |
| 164 | pr_err("%p invalidatepage %p page not dirty\n", inode, page); | 165 | pr_err("%p invalidatepage %p page not dirty\n", inode, page); |
| 165 | 166 | ||
| 166 | if (offset == 0) | 167 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
| 167 | ClearPageChecked(page); | 168 | ClearPageChecked(page); |
| 168 | 169 | ||
| 169 | ci = ceph_inode(inode); | 170 | ci = ceph_inode(inode); |
| 170 | if (offset == 0) { | 171 | if (offset == 0 && length == PAGE_CACHE_SIZE) { |
| 171 | dout("%p invalidatepage %p idx %lu full dirty page %lu\n", | 172 | dout("%p invalidatepage %p idx %lu full dirty page\n", |
| 172 | inode, page, page->index, offset); | 173 | inode, page, page->index); |
| 173 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); | 174 | ceph_put_wrbuffer_cap_refs(ci, 1, snapc); |
| 174 | ceph_put_snap_context(snapc); | 175 | ceph_put_snap_context(snapc); |
| 175 | page->private = 0; | 176 | page->private = 0; |
| 176 | ClearPagePrivate(page); | 177 | ClearPagePrivate(page); |
| 177 | } else { | 178 | } else { |
| 178 | dout("%p invalidatepage %p idx %lu partial dirty page\n", | 179 | dout("%p invalidatepage %p idx %lu partial dirty page %u(%u)\n", |
| 179 | inode, page, page->index); | 180 | inode, page, page->index, offset, length); |
| 180 | } | 181 | } |
| 181 | } | 182 | } |
| 182 | 183 | ||
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index f02d82b7933e..a40ceda47a32 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c | |||
| @@ -111,11 +111,10 @@ static unsigned fpos_off(loff_t p) | |||
| 111 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by | 111 | * defined IFF we hold CEPH_CAP_FILE_SHARED (which will be revoked by |
| 112 | * the MDS if/when the directory is modified). | 112 | * the MDS if/when the directory is modified). |
| 113 | */ | 113 | */ |
| 114 | static int __dcache_readdir(struct file *filp, | 114 | static int __dcache_readdir(struct file *file, struct dir_context *ctx) |
| 115 | void *dirent, filldir_t filldir) | ||
| 116 | { | 115 | { |
| 117 | struct ceph_file_info *fi = filp->private_data; | 116 | struct ceph_file_info *fi = file->private_data; |
| 118 | struct dentry *parent = filp->f_dentry; | 117 | struct dentry *parent = file->f_dentry; |
| 119 | struct inode *dir = parent->d_inode; | 118 | struct inode *dir = parent->d_inode; |
| 120 | struct list_head *p; | 119 | struct list_head *p; |
| 121 | struct dentry *dentry, *last; | 120 | struct dentry *dentry, *last; |
| @@ -126,14 +125,14 @@ static int __dcache_readdir(struct file *filp, | |||
| 126 | last = fi->dentry; | 125 | last = fi->dentry; |
| 127 | fi->dentry = NULL; | 126 | fi->dentry = NULL; |
| 128 | 127 | ||
| 129 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, filp->f_pos, | 128 | dout("__dcache_readdir %p at %llu (last %p)\n", dir, ctx->pos, |
| 130 | last); | 129 | last); |
| 131 | 130 | ||
| 132 | spin_lock(&parent->d_lock); | 131 | spin_lock(&parent->d_lock); |
| 133 | 132 | ||
| 134 | /* start at beginning? */ | 133 | /* start at beginning? */ |
| 135 | if (filp->f_pos == 2 || last == NULL || | 134 | if (ctx->pos == 2 || last == NULL || |
| 136 | filp->f_pos < ceph_dentry(last)->offset) { | 135 | ctx->pos < ceph_dentry(last)->offset) { |
| 137 | if (list_empty(&parent->d_subdirs)) | 136 | if (list_empty(&parent->d_subdirs)) |
| 138 | goto out_unlock; | 137 | goto out_unlock; |
| 139 | p = parent->d_subdirs.prev; | 138 | p = parent->d_subdirs.prev; |
| @@ -157,11 +156,11 @@ more: | |||
| 157 | if (!d_unhashed(dentry) && dentry->d_inode && | 156 | if (!d_unhashed(dentry) && dentry->d_inode && |
| 158 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && | 157 | ceph_snap(dentry->d_inode) != CEPH_SNAPDIR && |
| 159 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && | 158 | ceph_ino(dentry->d_inode) != CEPH_INO_CEPH && |
| 160 | filp->f_pos <= di->offset) | 159 | ctx->pos <= di->offset) |
| 161 | break; | 160 | break; |
| 162 | dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, | 161 | dout(" skipping %p %.*s at %llu (%llu)%s%s\n", dentry, |
| 163 | dentry->d_name.len, dentry->d_name.name, di->offset, | 162 | dentry->d_name.len, dentry->d_name.name, di->offset, |
| 164 | filp->f_pos, d_unhashed(dentry) ? " unhashed" : "", | 163 | ctx->pos, d_unhashed(dentry) ? " unhashed" : "", |
| 165 | !dentry->d_inode ? " null" : ""); | 164 | !dentry->d_inode ? " null" : ""); |
| 166 | spin_unlock(&dentry->d_lock); | 165 | spin_unlock(&dentry->d_lock); |
| 167 | p = p->prev; | 166 | p = p->prev; |
| @@ -173,29 +172,27 @@ more: | |||
| 173 | spin_unlock(&dentry->d_lock); | 172 | spin_unlock(&dentry->d_lock); |
| 174 | spin_unlock(&parent->d_lock); | 173 | spin_unlock(&parent->d_lock); |
| 175 | 174 | ||
| 176 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, filp->f_pos, | 175 | dout(" %llu (%llu) dentry %p %.*s %p\n", di->offset, ctx->pos, |
| 177 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); | 176 | dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); |
| 178 | filp->f_pos = di->offset; | 177 | ctx->pos = di->offset; |
| 179 | err = filldir(dirent, dentry->d_name.name, | 178 | if (!dir_emit(ctx, dentry->d_name.name, |
| 180 | dentry->d_name.len, di->offset, | 179 | dentry->d_name.len, |
| 181 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), | 180 | ceph_translate_ino(dentry->d_sb, dentry->d_inode->i_ino), |
| 182 | dentry->d_inode->i_mode >> 12); | 181 | dentry->d_inode->i_mode >> 12)) { |
| 183 | 182 | if (last) { | |
| 184 | if (last) { | ||
| 185 | if (err < 0) { | ||
| 186 | /* remember our position */ | 183 | /* remember our position */ |
| 187 | fi->dentry = last; | 184 | fi->dentry = last; |
| 188 | fi->next_offset = di->offset; | 185 | fi->next_offset = di->offset; |
| 189 | } else { | ||
| 190 | dput(last); | ||
| 191 | } | 186 | } |
| 187 | dput(dentry); | ||
| 188 | return 0; | ||
| 192 | } | 189 | } |
| 193 | last = dentry; | ||
| 194 | 190 | ||
| 195 | if (err < 0) | 191 | if (last) |
| 196 | goto out; | 192 | dput(last); |
| 193 | last = dentry; | ||
| 197 | 194 | ||
| 198 | filp->f_pos++; | 195 | ctx->pos++; |
| 199 | 196 | ||
| 200 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ | 197 | /* make sure a dentry wasn't dropped while we didn't have parent lock */ |
| 201 | if (!ceph_dir_is_complete(dir)) { | 198 | if (!ceph_dir_is_complete(dir)) { |
| @@ -235,59 +232,59 @@ static int note_last_dentry(struct ceph_file_info *fi, const char *name, | |||
| 235 | return 0; | 232 | return 0; |
| 236 | } | 233 | } |
| 237 | 234 | ||
| 238 | static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) | 235 | static int ceph_readdir(struct file *file, struct dir_context *ctx) |
| 239 | { | 236 | { |
| 240 | struct ceph_file_info *fi = filp->private_data; | 237 | struct ceph_file_info *fi = file->private_data; |
| 241 | struct inode *inode = file_inode(filp); | 238 | struct inode *inode = file_inode(file); |
| 242 | struct ceph_inode_info *ci = ceph_inode(inode); | 239 | struct ceph_inode_info *ci = ceph_inode(inode); |
| 243 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); | 240 | struct ceph_fs_client *fsc = ceph_inode_to_client(inode); |
| 244 | struct ceph_mds_client *mdsc = fsc->mdsc; | 241 | struct ceph_mds_client *mdsc = fsc->mdsc; |
| 245 | unsigned frag = fpos_frag(filp->f_pos); | 242 | unsigned frag = fpos_frag(ctx->pos); |
| 246 | int off = fpos_off(filp->f_pos); | 243 | int off = fpos_off(ctx->pos); |
| 247 | int err; | 244 | int err; |
| 248 | u32 ftype; | 245 | u32 ftype; |
| 249 | struct ceph_mds_reply_info_parsed *rinfo; | 246 | struct ceph_mds_reply_info_parsed *rinfo; |
| 250 | const int max_entries = fsc->mount_options->max_readdir; | 247 | const int max_entries = fsc->mount_options->max_readdir; |
| 251 | const int max_bytes = fsc->mount_options->max_readdir_bytes; | 248 | const int max_bytes = fsc->mount_options->max_readdir_bytes; |
| 252 | 249 | ||
| 253 | dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); | 250 | dout("readdir %p file %p frag %u off %u\n", inode, file, frag, off); |
| 254 | if (fi->flags & CEPH_F_ATEND) | 251 | if (fi->flags & CEPH_F_ATEND) |
| 255 | return 0; | 252 | return 0; |
| 256 | 253 | ||
| 257 | /* always start with . and .. */ | 254 | /* always start with . and .. */ |
| 258 | if (filp->f_pos == 0) { | 255 | if (ctx->pos == 0) { |
| 259 | /* note dir version at start of readdir so we can tell | 256 | /* note dir version at start of readdir so we can tell |
| 260 | * if any dentries get dropped */ | 257 | * if any dentries get dropped */ |
| 261 | fi->dir_release_count = atomic_read(&ci->i_release_count); | 258 | fi->dir_release_count = atomic_read(&ci->i_release_count); |
| 262 | 259 | ||
| 263 | dout("readdir off 0 -> '.'\n"); | 260 | dout("readdir off 0 -> '.'\n"); |
| 264 | if (filldir(dirent, ".", 1, ceph_make_fpos(0, 0), | 261 | if (!dir_emit(ctx, ".", 1, |
| 265 | ceph_translate_ino(inode->i_sb, inode->i_ino), | 262 | ceph_translate_ino(inode->i_sb, inode->i_ino), |
| 266 | inode->i_mode >> 12) < 0) | 263 | inode->i_mode >> 12)) |
| 267 | return 0; | 264 | return 0; |
| 268 | filp->f_pos = 1; | 265 | ctx->pos = 1; |
| 269 | off = 1; | 266 | off = 1; |
| 270 | } | 267 | } |
| 271 | if (filp->f_pos == 1) { | 268 | if (ctx->pos == 1) { |
| 272 | ino_t ino = parent_ino(filp->f_dentry); | 269 | ino_t ino = parent_ino(file->f_dentry); |
| 273 | dout("readdir off 1 -> '..'\n"); | 270 | dout("readdir off 1 -> '..'\n"); |
| 274 | if (filldir(dirent, "..", 2, ceph_make_fpos(0, 1), | 271 | if (!dir_emit(ctx, "..", 2, |
| 275 | ceph_translate_ino(inode->i_sb, ino), | 272 | ceph_translate_ino(inode->i_sb, ino), |
| 276 | inode->i_mode >> 12) < 0) | 273 | inode->i_mode >> 12)) |
| 277 | return 0; | 274 | return 0; |
| 278 | filp->f_pos = 2; | 275 | ctx->pos = 2; |
| 279 | off = 2; | 276 | off = 2; |
| 280 | } | 277 | } |
| 281 | 278 | ||
| 282 | /* can we use the dcache? */ | 279 | /* can we use the dcache? */ |
| 283 | spin_lock(&ci->i_ceph_lock); | 280 | spin_lock(&ci->i_ceph_lock); |
| 284 | if ((filp->f_pos == 2 || fi->dentry) && | 281 | if ((ctx->pos == 2 || fi->dentry) && |
| 285 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && | 282 | !ceph_test_mount_opt(fsc, NOASYNCREADDIR) && |
| 286 | ceph_snap(inode) != CEPH_SNAPDIR && | 283 | ceph_snap(inode) != CEPH_SNAPDIR && |
| 287 | __ceph_dir_is_complete(ci) && | 284 | __ceph_dir_is_complete(ci) && |
| 288 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { | 285 | __ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) { |
| 289 | spin_unlock(&ci->i_ceph_lock); | 286 | spin_unlock(&ci->i_ceph_lock); |
| 290 | err = __dcache_readdir(filp, dirent, filldir); | 287 | err = __dcache_readdir(file, ctx); |
| 291 | if (err != -EAGAIN) | 288 | if (err != -EAGAIN) |
| 292 | return err; | 289 | return err; |
| 293 | } else { | 290 | } else { |
| @@ -327,7 +324,7 @@ more: | |||
| 327 | return PTR_ERR(req); | 324 | return PTR_ERR(req); |
| 328 | req->r_inode = inode; | 325 | req->r_inode = inode; |
| 329 | ihold(inode); | 326 | ihold(inode); |
| 330 | req->r_dentry = dget(filp->f_dentry); | 327 | req->r_dentry = dget(file->f_dentry); |
| 331 | /* hints to request -> mds selection code */ | 328 | /* hints to request -> mds selection code */ |
| 332 | req->r_direct_mode = USE_AUTH_MDS; | 329 | req->r_direct_mode = USE_AUTH_MDS; |
| 333 | req->r_direct_hash = ceph_frag_value(frag); | 330 | req->r_direct_hash = ceph_frag_value(frag); |
| @@ -379,15 +376,16 @@ more: | |||
| 379 | rinfo = &fi->last_readdir->r_reply_info; | 376 | rinfo = &fi->last_readdir->r_reply_info; |
| 380 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, | 377 | dout("readdir frag %x num %d off %d chunkoff %d\n", frag, |
| 381 | rinfo->dir_nr, off, fi->offset); | 378 | rinfo->dir_nr, off, fi->offset); |
| 379 | |||
| 380 | ctx->pos = ceph_make_fpos(frag, off); | ||
| 382 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { | 381 | while (off >= fi->offset && off - fi->offset < rinfo->dir_nr) { |
| 383 | u64 pos = ceph_make_fpos(frag, off); | ||
| 384 | struct ceph_mds_reply_inode *in = | 382 | struct ceph_mds_reply_inode *in = |
| 385 | rinfo->dir_in[off - fi->offset].in; | 383 | rinfo->dir_in[off - fi->offset].in; |
| 386 | struct ceph_vino vino; | 384 | struct ceph_vino vino; |
| 387 | ino_t ino; | 385 | ino_t ino; |
| 388 | 386 | ||
| 389 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", | 387 | dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n", |
| 390 | off, off - fi->offset, rinfo->dir_nr, pos, | 388 | off, off - fi->offset, rinfo->dir_nr, ctx->pos, |
| 391 | rinfo->dir_dname_len[off - fi->offset], | 389 | rinfo->dir_dname_len[off - fi->offset], |
| 392 | rinfo->dir_dname[off - fi->offset], in); | 390 | rinfo->dir_dname[off - fi->offset], in); |
| 393 | BUG_ON(!in); | 391 | BUG_ON(!in); |
| @@ -395,16 +393,15 @@ more: | |||
| 395 | vino.ino = le64_to_cpu(in->ino); | 393 | vino.ino = le64_to_cpu(in->ino); |
| 396 | vino.snap = le64_to_cpu(in->snapid); | 394 | vino.snap = le64_to_cpu(in->snapid); |
| 397 | ino = ceph_vino_to_ino(vino); | 395 | ino = ceph_vino_to_ino(vino); |
| 398 | if (filldir(dirent, | 396 | if (!dir_emit(ctx, |
| 399 | rinfo->dir_dname[off - fi->offset], | 397 | rinfo->dir_dname[off - fi->offset], |
| 400 | rinfo->dir_dname_len[off - fi->offset], | 398 | rinfo->dir_dname_len[off - fi->offset], |
| 401 | pos, | 399 | ceph_translate_ino(inode->i_sb, ino), ftype)) { |
| 402 | ceph_translate_ino(inode->i_sb, ino), ftype) < 0) { | ||
| 403 | dout("filldir stopping us...\n"); | 400 | dout("filldir stopping us...\n"); |
| 404 | return 0; | 401 | return 0; |
| 405 | } | 402 | } |
| 406 | off++; | 403 | off++; |
| 407 | filp->f_pos = pos + 1; | 404 | ctx->pos++; |
| 408 | } | 405 | } |
| 409 | 406 | ||
| 410 | if (fi->last_name) { | 407 | if (fi->last_name) { |
| @@ -417,7 +414,7 @@ more: | |||
| 417 | if (!ceph_frag_is_rightmost(frag)) { | 414 | if (!ceph_frag_is_rightmost(frag)) { |
| 418 | frag = ceph_frag_next(frag); | 415 | frag = ceph_frag_next(frag); |
| 419 | off = 0; | 416 | off = 0; |
| 420 | filp->f_pos = ceph_make_fpos(frag, off); | 417 | ctx->pos = ceph_make_fpos(frag, off); |
| 421 | dout("readdir next frag is %x\n", frag); | 418 | dout("readdir next frag is %x\n", frag); |
| 422 | goto more; | 419 | goto more; |
| 423 | } | 420 | } |
| @@ -432,11 +429,11 @@ more: | |||
| 432 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { | 429 | if (atomic_read(&ci->i_release_count) == fi->dir_release_count) { |
| 433 | dout(" marking %p complete\n", inode); | 430 | dout(" marking %p complete\n", inode); |
| 434 | __ceph_dir_set_complete(ci, fi->dir_release_count); | 431 | __ceph_dir_set_complete(ci, fi->dir_release_count); |
| 435 | ci->i_max_offset = filp->f_pos; | 432 | ci->i_max_offset = ctx->pos; |
| 436 | } | 433 | } |
| 437 | spin_unlock(&ci->i_ceph_lock); | 434 | spin_unlock(&ci->i_ceph_lock); |
| 438 | 435 | ||
| 439 | dout("readdir %p filp %p done.\n", inode, filp); | 436 | dout("readdir %p file %p done.\n", inode, file); |
| 440 | return 0; | 437 | return 0; |
| 441 | } | 438 | } |
| 442 | 439 | ||
| @@ -1268,7 +1265,7 @@ unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) | |||
| 1268 | 1265 | ||
| 1269 | const struct file_operations ceph_dir_fops = { | 1266 | const struct file_operations ceph_dir_fops = { |
| 1270 | .read = ceph_read_dir, | 1267 | .read = ceph_read_dir, |
| 1271 | .readdir = ceph_readdir, | 1268 | .iterate = ceph_readdir, |
| 1272 | .llseek = ceph_dir_llseek, | 1269 | .llseek = ceph_dir_llseek, |
| 1273 | .open = ceph_open, | 1270 | .open = ceph_open, |
| 1274 | .release = ceph_release, | 1271 | .release = ceph_release, |
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3752b9f6d9e4..540c1ccfcdb2 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c | |||
| @@ -968,7 +968,7 @@ const struct file_operations cifs_file_direct_nobrl_ops = { | |||
| 968 | }; | 968 | }; |
| 969 | 969 | ||
| 970 | const struct file_operations cifs_dir_ops = { | 970 | const struct file_operations cifs_dir_ops = { |
| 971 | .readdir = cifs_readdir, | 971 | .iterate = cifs_readdir, |
| 972 | .release = cifs_closedir, | 972 | .release = cifs_closedir, |
| 973 | .read = generic_read_dir, | 973 | .read = generic_read_dir, |
| 974 | .unlocked_ioctl = cifs_ioctl, | 974 | .unlocked_ioctl = cifs_ioctl, |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 0e32c3446ce9..d05b3028e3b9 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -101,7 +101,7 @@ extern int cifs_file_mmap(struct file * , struct vm_area_struct *); | |||
| 101 | extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); | 101 | extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *); |
| 102 | extern const struct file_operations cifs_dir_ops; | 102 | extern const struct file_operations cifs_dir_ops; |
| 103 | extern int cifs_dir_open(struct inode *inode, struct file *file); | 103 | extern int cifs_dir_open(struct inode *inode, struct file *file); |
| 104 | extern int cifs_readdir(struct file *file, void *direntry, filldir_t filldir); | 104 | extern int cifs_readdir(struct file *file, struct dir_context *ctx); |
| 105 | 105 | ||
| 106 | /* Functions related to dir entries */ | 106 | /* Functions related to dir entries */ |
| 107 | extern const struct dentry_operations cifs_dentry_ops; | 107 | extern const struct dentry_operations cifs_dentry_ops; |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 48b29d24c9f4..4d8ba8d491e5 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -3546,11 +3546,12 @@ static int cifs_release_page(struct page *page, gfp_t gfp) | |||
| 3546 | return cifs_fscache_release_page(page, gfp); | 3546 | return cifs_fscache_release_page(page, gfp); |
| 3547 | } | 3547 | } |
| 3548 | 3548 | ||
| 3549 | static void cifs_invalidate_page(struct page *page, unsigned long offset) | 3549 | static void cifs_invalidate_page(struct page *page, unsigned int offset, |
| 3550 | unsigned int length) | ||
| 3550 | { | 3551 | { |
| 3551 | struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); | 3552 | struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host); |
| 3552 | 3553 | ||
| 3553 | if (offset == 0) | 3554 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
| 3554 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); | 3555 | cifs_fscache_invalidate_page(page, &cifsi->vfs_inode); |
| 3555 | } | 3556 | } |
| 3556 | 3557 | ||
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 770d5a9781c1..f1213799de1a 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
| @@ -537,14 +537,14 @@ static int cifs_save_resume_key(const char *current_entry, | |||
| 537 | * every entry (do not increment for . or .. entry). | 537 | * every entry (do not increment for . or .. entry). |
| 538 | */ | 538 | */ |
| 539 | static int | 539 | static int |
| 540 | find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, | 540 | find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos, |
| 541 | struct file *file, char **current_entry, int *num_to_ret) | 541 | struct file *file, char **current_entry, int *num_to_ret) |
| 542 | { | 542 | { |
| 543 | __u16 search_flags; | 543 | __u16 search_flags; |
| 544 | int rc = 0; | 544 | int rc = 0; |
| 545 | int pos_in_buf = 0; | 545 | int pos_in_buf = 0; |
| 546 | loff_t first_entry_in_buffer; | 546 | loff_t first_entry_in_buffer; |
| 547 | loff_t index_to_find = file->f_pos; | 547 | loff_t index_to_find = pos; |
| 548 | struct cifsFileInfo *cfile = file->private_data; | 548 | struct cifsFileInfo *cfile = file->private_data; |
| 549 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); | 549 | struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); |
| 550 | struct TCP_Server_Info *server = tcon->ses->server; | 550 | struct TCP_Server_Info *server = tcon->ses->server; |
| @@ -659,8 +659,9 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, | |||
| 659 | return rc; | 659 | return rc; |
| 660 | } | 660 | } |
| 661 | 661 | ||
| 662 | static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | 662 | static int cifs_filldir(char *find_entry, struct file *file, |
| 663 | void *dirent, char *scratch_buf, unsigned int max_len) | 663 | struct dir_context *ctx, |
| 664 | char *scratch_buf, unsigned int max_len) | ||
| 664 | { | 665 | { |
| 665 | struct cifsFileInfo *file_info = file->private_data; | 666 | struct cifsFileInfo *file_info = file->private_data; |
| 666 | struct super_block *sb = file->f_path.dentry->d_sb; | 667 | struct super_block *sb = file->f_path.dentry->d_sb; |
| @@ -740,13 +741,11 @@ static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, | |||
| 740 | cifs_prime_dcache(file->f_dentry, &name, &fattr); | 741 | cifs_prime_dcache(file->f_dentry, &name, &fattr); |
| 741 | 742 | ||
| 742 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); | 743 | ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); |
| 743 | rc = filldir(dirent, name.name, name.len, file->f_pos, ino, | 744 | return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype); |
| 744 | fattr.cf_dtype); | ||
| 745 | return rc; | ||
| 746 | } | 745 | } |
| 747 | 746 | ||
| 748 | 747 | ||
| 749 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 748 | int cifs_readdir(struct file *file, struct dir_context *ctx) |
| 750 | { | 749 | { |
| 751 | int rc = 0; | 750 | int rc = 0; |
| 752 | unsigned int xid; | 751 | unsigned int xid; |
| @@ -772,103 +771,86 @@ int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | |||
| 772 | goto rddir2_exit; | 771 | goto rddir2_exit; |
| 773 | } | 772 | } |
| 774 | 773 | ||
| 775 | switch ((int) file->f_pos) { | 774 | if (!dir_emit_dots(file, ctx)) |
| 776 | case 0: | 775 | goto rddir2_exit; |
| 777 | if (filldir(direntry, ".", 1, file->f_pos, | ||
| 778 | file_inode(file)->i_ino, DT_DIR) < 0) { | ||
| 779 | cifs_dbg(VFS, "Filldir for current dir failed\n"); | ||
| 780 | rc = -ENOMEM; | ||
| 781 | break; | ||
| 782 | } | ||
| 783 | file->f_pos++; | ||
| 784 | case 1: | ||
| 785 | if (filldir(direntry, "..", 2, file->f_pos, | ||
| 786 | parent_ino(file->f_path.dentry), DT_DIR) < 0) { | ||
| 787 | cifs_dbg(VFS, "Filldir for parent dir failed\n"); | ||
| 788 | rc = -ENOMEM; | ||
| 789 | break; | ||
| 790 | } | ||
| 791 | file->f_pos++; | ||
| 792 | default: | ||
| 793 | /* 1) If search is active, | ||
| 794 | is in current search buffer? | ||
| 795 | if it before then restart search | ||
| 796 | if after then keep searching till find it */ | ||
| 797 | |||
| 798 | if (file->private_data == NULL) { | ||
| 799 | rc = -EINVAL; | ||
| 800 | free_xid(xid); | ||
| 801 | return rc; | ||
| 802 | } | ||
| 803 | cifsFile = file->private_data; | ||
| 804 | if (cifsFile->srch_inf.endOfSearch) { | ||
| 805 | if (cifsFile->srch_inf.emptyDir) { | ||
| 806 | cifs_dbg(FYI, "End of search, empty dir\n"); | ||
| 807 | rc = 0; | ||
| 808 | break; | ||
| 809 | } | ||
| 810 | } /* else { | ||
| 811 | cifsFile->invalidHandle = true; | ||
| 812 | tcon->ses->server->close(xid, tcon, &cifsFile->fid); | ||
| 813 | } */ | ||
| 814 | 776 | ||
| 815 | tcon = tlink_tcon(cifsFile->tlink); | 777 | /* 1) If search is active, |
| 816 | rc = find_cifs_entry(xid, tcon, file, ¤t_entry, | 778 | is in current search buffer? |
| 817 | &num_to_fill); | 779 | if it before then restart search |
| 818 | if (rc) { | 780 | if after then keep searching till find it */ |
| 819 | cifs_dbg(FYI, "fce error %d\n", rc); | 781 | |
| 820 | goto rddir2_exit; | 782 | if (file->private_data == NULL) { |
| 821 | } else if (current_entry != NULL) { | 783 | rc = -EINVAL; |
| 822 | cifs_dbg(FYI, "entry %lld found\n", file->f_pos); | 784 | goto rddir2_exit; |
| 823 | } else { | 785 | } |
| 824 | cifs_dbg(FYI, "could not find entry\n"); | 786 | cifsFile = file->private_data; |
| 787 | if (cifsFile->srch_inf.endOfSearch) { | ||
| 788 | if (cifsFile->srch_inf.emptyDir) { | ||
| 789 | cifs_dbg(FYI, "End of search, empty dir\n"); | ||
| 790 | rc = 0; | ||
| 825 | goto rddir2_exit; | 791 | goto rddir2_exit; |
| 826 | } | 792 | } |
| 827 | cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", | 793 | } /* else { |
| 828 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); | 794 | cifsFile->invalidHandle = true; |
| 829 | max_len = tcon->ses->server->ops->calc_smb_size( | 795 | tcon->ses->server->close(xid, tcon, &cifsFile->fid); |
| 830 | cifsFile->srch_inf.ntwrk_buf_start); | 796 | } */ |
| 831 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; | 797 | |
| 832 | 798 | tcon = tlink_tcon(cifsFile->tlink); | |
| 833 | tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); | 799 | rc = find_cifs_entry(xid, tcon, ctx->pos, file, ¤t_entry, |
| 834 | if (tmp_buf == NULL) { | 800 | &num_to_fill); |
| 835 | rc = -ENOMEM; | 801 | if (rc) { |
| 802 | cifs_dbg(FYI, "fce error %d\n", rc); | ||
| 803 | goto rddir2_exit; | ||
| 804 | } else if (current_entry != NULL) { | ||
| 805 | cifs_dbg(FYI, "entry %lld found\n", ctx->pos); | ||
| 806 | } else { | ||
| 807 | cifs_dbg(FYI, "could not find entry\n"); | ||
| 808 | goto rddir2_exit; | ||
| 809 | } | ||
| 810 | cifs_dbg(FYI, "loop through %d times filling dir for net buf %p\n", | ||
| 811 | num_to_fill, cifsFile->srch_inf.ntwrk_buf_start); | ||
| 812 | max_len = tcon->ses->server->ops->calc_smb_size( | ||
| 813 | cifsFile->srch_inf.ntwrk_buf_start); | ||
| 814 | end_of_smb = cifsFile->srch_inf.ntwrk_buf_start + max_len; | ||
| 815 | |||
| 816 | tmp_buf = kmalloc(UNICODE_NAME_MAX, GFP_KERNEL); | ||
| 817 | if (tmp_buf == NULL) { | ||
| 818 | rc = -ENOMEM; | ||
| 819 | goto rddir2_exit; | ||
| 820 | } | ||
| 821 | |||
| 822 | for (i = 0; i < num_to_fill; i++) { | ||
| 823 | if (current_entry == NULL) { | ||
| 824 | /* evaluate whether this case is an error */ | ||
| 825 | cifs_dbg(VFS, "past SMB end, num to fill %d i %d\n", | ||
| 826 | num_to_fill, i); | ||
| 836 | break; | 827 | break; |
| 837 | } | 828 | } |
| 838 | 829 | /* | |
| 839 | for (i = 0; (i < num_to_fill) && (rc == 0); i++) { | 830 | * if buggy server returns . and .. late do we want to |
| 840 | if (current_entry == NULL) { | 831 | * check for that here? |
| 841 | /* evaluate whether this case is an error */ | 832 | */ |
| 842 | cifs_dbg(VFS, "past SMB end, num to fill %d i %d\n", | 833 | rc = cifs_filldir(current_entry, file, ctx, |
| 843 | num_to_fill, i); | 834 | tmp_buf, max_len); |
| 844 | break; | 835 | if (rc) { |
| 845 | } | 836 | if (rc > 0) |
| 846 | /* | ||
| 847 | * if buggy server returns . and .. late do we want to | ||
| 848 | * check for that here? | ||
| 849 | */ | ||
| 850 | rc = cifs_filldir(current_entry, file, filldir, | ||
| 851 | direntry, tmp_buf, max_len); | ||
| 852 | if (rc == -EOVERFLOW) { | ||
| 853 | rc = 0; | 837 | rc = 0; |
| 854 | break; | 838 | break; |
| 855 | } | ||
| 856 | |||
| 857 | file->f_pos++; | ||
| 858 | if (file->f_pos == | ||
| 859 | cifsFile->srch_inf.index_of_last_entry) { | ||
| 860 | cifs_dbg(FYI, "last entry in buf at pos %lld %s\n", | ||
| 861 | file->f_pos, tmp_buf); | ||
| 862 | cifs_save_resume_key(current_entry, cifsFile); | ||
| 863 | break; | ||
| 864 | } else | ||
| 865 | current_entry = | ||
| 866 | nxt_dir_entry(current_entry, end_of_smb, | ||
| 867 | cifsFile->srch_inf.info_level); | ||
| 868 | } | 839 | } |
| 869 | kfree(tmp_buf); | 840 | |
| 870 | break; | 841 | ctx->pos++; |
| 871 | } /* end switch */ | 842 | if (ctx->pos == |
| 843 | cifsFile->srch_inf.index_of_last_entry) { | ||
| 844 | cifs_dbg(FYI, "last entry in buf at pos %lld %s\n", | ||
| 845 | ctx->pos, tmp_buf); | ||
| 846 | cifs_save_resume_key(current_entry, cifsFile); | ||
| 847 | break; | ||
| 848 | } else | ||
| 849 | current_entry = | ||
| 850 | nxt_dir_entry(current_entry, end_of_smb, | ||
| 851 | cifsFile->srch_inf.info_level); | ||
| 852 | } | ||
| 853 | kfree(tmp_buf); | ||
| 872 | 854 | ||
| 873 | rddir2_exit: | 855 | rddir2_exit: |
| 874 | free_xid(xid); | 856 | free_xid(xid); |
diff --git a/fs/coda/dir.c b/fs/coda/dir.c index b7d3a05c062c..87e0ee9f4465 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c | |||
| @@ -43,15 +43,14 @@ static int coda_rename(struct inode *old_inode, struct dentry *old_dentry, | |||
| 43 | struct inode *new_inode, struct dentry *new_dentry); | 43 | struct inode *new_inode, struct dentry *new_dentry); |
| 44 | 44 | ||
| 45 | /* dir file-ops */ | 45 | /* dir file-ops */ |
| 46 | static int coda_readdir(struct file *file, void *buf, filldir_t filldir); | 46 | static int coda_readdir(struct file *file, struct dir_context *ctx); |
| 47 | 47 | ||
| 48 | /* dentry ops */ | 48 | /* dentry ops */ |
| 49 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); | 49 | static int coda_dentry_revalidate(struct dentry *de, unsigned int flags); |
| 50 | static int coda_dentry_delete(const struct dentry *); | 50 | static int coda_dentry_delete(const struct dentry *); |
| 51 | 51 | ||
| 52 | /* support routines */ | 52 | /* support routines */ |
| 53 | static int coda_venus_readdir(struct file *coda_file, void *buf, | 53 | static int coda_venus_readdir(struct file *, struct dir_context *); |
| 54 | filldir_t filldir); | ||
| 55 | 54 | ||
| 56 | /* same as fs/bad_inode.c */ | 55 | /* same as fs/bad_inode.c */ |
| 57 | static int coda_return_EIO(void) | 56 | static int coda_return_EIO(void) |
| @@ -85,7 +84,7 @@ const struct inode_operations coda_dir_inode_operations = | |||
| 85 | const struct file_operations coda_dir_operations = { | 84 | const struct file_operations coda_dir_operations = { |
| 86 | .llseek = generic_file_llseek, | 85 | .llseek = generic_file_llseek, |
| 87 | .read = generic_read_dir, | 86 | .read = generic_read_dir, |
| 88 | .readdir = coda_readdir, | 87 | .iterate = coda_readdir, |
| 89 | .open = coda_open, | 88 | .open = coda_open, |
| 90 | .release = coda_release, | 89 | .release = coda_release, |
| 91 | .fsync = coda_fsync, | 90 | .fsync = coda_fsync, |
| @@ -378,7 +377,7 @@ static int coda_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 378 | 377 | ||
| 379 | 378 | ||
| 380 | /* file operations for directories */ | 379 | /* file operations for directories */ |
| 381 | static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) | 380 | static int coda_readdir(struct file *coda_file, struct dir_context *ctx) |
| 382 | { | 381 | { |
| 383 | struct coda_file_info *cfi; | 382 | struct coda_file_info *cfi; |
| 384 | struct file *host_file; | 383 | struct file *host_file; |
| @@ -391,30 +390,19 @@ static int coda_readdir(struct file *coda_file, void *buf, filldir_t filldir) | |||
| 391 | if (!host_file->f_op) | 390 | if (!host_file->f_op) |
| 392 | return -ENOTDIR; | 391 | return -ENOTDIR; |
| 393 | 392 | ||
| 394 | if (host_file->f_op->readdir) | 393 | if (host_file->f_op->iterate) { |
| 395 | { | ||
| 396 | /* potemkin case: we were handed a directory inode. | ||
| 397 | * We can't use vfs_readdir because we have to keep the file | ||
| 398 | * position in sync between the coda_file and the host_file. | ||
| 399 | * and as such we need grab the inode mutex. */ | ||
| 400 | struct inode *host_inode = file_inode(host_file); | 394 | struct inode *host_inode = file_inode(host_file); |
| 401 | |||
| 402 | mutex_lock(&host_inode->i_mutex); | 395 | mutex_lock(&host_inode->i_mutex); |
| 403 | host_file->f_pos = coda_file->f_pos; | ||
| 404 | |||
| 405 | ret = -ENOENT; | 396 | ret = -ENOENT; |
| 406 | if (!IS_DEADDIR(host_inode)) { | 397 | if (!IS_DEADDIR(host_inode)) { |
| 407 | ret = host_file->f_op->readdir(host_file, buf, filldir); | 398 | ret = host_file->f_op->iterate(host_file, ctx); |
| 408 | file_accessed(host_file); | 399 | file_accessed(host_file); |
| 409 | } | 400 | } |
| 410 | |||
| 411 | coda_file->f_pos = host_file->f_pos; | ||
| 412 | mutex_unlock(&host_inode->i_mutex); | 401 | mutex_unlock(&host_inode->i_mutex); |
| 402 | return ret; | ||
| 413 | } | 403 | } |
| 414 | else /* Venus: we must read Venus dirents from a file */ | 404 | /* Venus: we must read Venus dirents from a file */ |
| 415 | ret = coda_venus_readdir(coda_file, buf, filldir); | 405 | return coda_venus_readdir(coda_file, ctx); |
| 416 | |||
| 417 | return ret; | ||
| 418 | } | 406 | } |
| 419 | 407 | ||
| 420 | static inline unsigned int CDT2DT(unsigned char cdt) | 408 | static inline unsigned int CDT2DT(unsigned char cdt) |
| @@ -437,10 +425,8 @@ static inline unsigned int CDT2DT(unsigned char cdt) | |||
| 437 | } | 425 | } |
| 438 | 426 | ||
| 439 | /* support routines */ | 427 | /* support routines */ |
| 440 | static int coda_venus_readdir(struct file *coda_file, void *buf, | 428 | static int coda_venus_readdir(struct file *coda_file, struct dir_context *ctx) |
| 441 | filldir_t filldir) | ||
| 442 | { | 429 | { |
| 443 | int result = 0; /* # of entries returned */ | ||
| 444 | struct coda_file_info *cfi; | 430 | struct coda_file_info *cfi; |
| 445 | struct coda_inode_info *cii; | 431 | struct coda_inode_info *cii; |
| 446 | struct file *host_file; | 432 | struct file *host_file; |
| @@ -462,23 +448,12 @@ static int coda_venus_readdir(struct file *coda_file, void *buf, | |||
| 462 | vdir = kmalloc(sizeof(*vdir), GFP_KERNEL); | 448 | vdir = kmalloc(sizeof(*vdir), GFP_KERNEL); |
| 463 | if (!vdir) return -ENOMEM; | 449 | if (!vdir) return -ENOMEM; |
| 464 | 450 | ||
| 465 | if (coda_file->f_pos == 0) { | 451 | if (!dir_emit_dots(coda_file, ctx)) |
| 466 | ret = filldir(buf, ".", 1, 0, de->d_inode->i_ino, DT_DIR); | 452 | goto out; |
| 467 | if (ret < 0) | 453 | |
| 468 | goto out; | ||
| 469 | result++; | ||
| 470 | coda_file->f_pos++; | ||
| 471 | } | ||
| 472 | if (coda_file->f_pos == 1) { | ||
| 473 | ret = filldir(buf, "..", 2, 1, parent_ino(de), DT_DIR); | ||
| 474 | if (ret < 0) | ||
| 475 | goto out; | ||
| 476 | result++; | ||
| 477 | coda_file->f_pos++; | ||
| 478 | } | ||
| 479 | while (1) { | 454 | while (1) { |
| 480 | /* read entries from the directory file */ | 455 | /* read entries from the directory file */ |
| 481 | ret = kernel_read(host_file, coda_file->f_pos - 2, (char *)vdir, | 456 | ret = kernel_read(host_file, ctx->pos - 2, (char *)vdir, |
| 482 | sizeof(*vdir)); | 457 | sizeof(*vdir)); |
| 483 | if (ret < 0) { | 458 | if (ret < 0) { |
| 484 | printk(KERN_ERR "coda readdir: read dir %s failed %d\n", | 459 | printk(KERN_ERR "coda readdir: read dir %s failed %d\n", |
| @@ -507,7 +482,7 @@ static int coda_venus_readdir(struct file *coda_file, void *buf, | |||
| 507 | 482 | ||
| 508 | /* Make sure we skip '.' and '..', we already got those */ | 483 | /* Make sure we skip '.' and '..', we already got those */ |
| 509 | if (name.name[0] == '.' && (name.len == 1 || | 484 | if (name.name[0] == '.' && (name.len == 1 || |
| 510 | (vdir->d_name[1] == '.' && name.len == 2))) | 485 | (name.name[1] == '.' && name.len == 2))) |
| 511 | vdir->d_fileno = name.len = 0; | 486 | vdir->d_fileno = name.len = 0; |
| 512 | 487 | ||
| 513 | /* skip null entries */ | 488 | /* skip null entries */ |
| @@ -520,19 +495,16 @@ static int coda_venus_readdir(struct file *coda_file, void *buf, | |||
| 520 | if (!ino) ino = vdir->d_fileno; | 495 | if (!ino) ino = vdir->d_fileno; |
| 521 | 496 | ||
| 522 | type = CDT2DT(vdir->d_type); | 497 | type = CDT2DT(vdir->d_type); |
| 523 | ret = filldir(buf, name.name, name.len, | 498 | if (!dir_emit(ctx, name.name, name.len, ino, type)) |
| 524 | coda_file->f_pos, ino, type); | 499 | break; |
| 525 | /* failure means no space for filling in this round */ | ||
| 526 | if (ret < 0) break; | ||
| 527 | result++; | ||
| 528 | } | 500 | } |
| 529 | /* we'll always have progress because d_reclen is unsigned and | 501 | /* we'll always have progress because d_reclen is unsigned and |
| 530 | * we've already established it is non-zero. */ | 502 | * we've already established it is non-zero. */ |
| 531 | coda_file->f_pos += vdir->d_reclen; | 503 | ctx->pos += vdir->d_reclen; |
| 532 | } | 504 | } |
| 533 | out: | 505 | out: |
| 534 | kfree(vdir); | 506 | kfree(vdir); |
| 535 | return result ? result : ret; | 507 | return 0; |
| 536 | } | 508 | } |
| 537 | 509 | ||
| 538 | /* called when a cache lookup succeeds */ | 510 | /* called when a cache lookup succeeds */ |
diff --git a/fs/compat.c b/fs/compat.c index fc3b55dce184..6af20de2c1a3 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
| @@ -832,6 +832,7 @@ struct compat_old_linux_dirent { | |||
| 832 | }; | 832 | }; |
| 833 | 833 | ||
| 834 | struct compat_readdir_callback { | 834 | struct compat_readdir_callback { |
| 835 | struct dir_context ctx; | ||
| 835 | struct compat_old_linux_dirent __user *dirent; | 836 | struct compat_old_linux_dirent __user *dirent; |
| 836 | int result; | 837 | int result; |
| 837 | }; | 838 | }; |
| @@ -873,15 +874,15 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, | |||
| 873 | { | 874 | { |
| 874 | int error; | 875 | int error; |
| 875 | struct fd f = fdget(fd); | 876 | struct fd f = fdget(fd); |
| 876 | struct compat_readdir_callback buf; | 877 | struct compat_readdir_callback buf = { |
| 878 | .ctx.actor = compat_fillonedir, | ||
| 879 | .dirent = dirent | ||
| 880 | }; | ||
| 877 | 881 | ||
| 878 | if (!f.file) | 882 | if (!f.file) |
| 879 | return -EBADF; | 883 | return -EBADF; |
| 880 | 884 | ||
| 881 | buf.result = 0; | 885 | error = iterate_dir(f.file, &buf.ctx); |
| 882 | buf.dirent = dirent; | ||
| 883 | |||
| 884 | error = vfs_readdir(f.file, compat_fillonedir, &buf); | ||
| 885 | if (buf.result) | 886 | if (buf.result) |
| 886 | error = buf.result; | 887 | error = buf.result; |
| 887 | 888 | ||
| @@ -897,6 +898,7 @@ struct compat_linux_dirent { | |||
| 897 | }; | 898 | }; |
| 898 | 899 | ||
| 899 | struct compat_getdents_callback { | 900 | struct compat_getdents_callback { |
| 901 | struct dir_context ctx; | ||
| 900 | struct compat_linux_dirent __user *current_dir; | 902 | struct compat_linux_dirent __user *current_dir; |
| 901 | struct compat_linux_dirent __user *previous; | 903 | struct compat_linux_dirent __user *previous; |
| 902 | int count; | 904 | int count; |
| @@ -951,7 +953,11 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
| 951 | { | 953 | { |
| 952 | struct fd f; | 954 | struct fd f; |
| 953 | struct compat_linux_dirent __user * lastdirent; | 955 | struct compat_linux_dirent __user * lastdirent; |
| 954 | struct compat_getdents_callback buf; | 956 | struct compat_getdents_callback buf = { |
| 957 | .ctx.actor = compat_filldir, | ||
| 958 | .current_dir = dirent, | ||
| 959 | .count = count | ||
| 960 | }; | ||
| 955 | int error; | 961 | int error; |
| 956 | 962 | ||
| 957 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 963 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
| @@ -961,17 +967,12 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
| 961 | if (!f.file) | 967 | if (!f.file) |
| 962 | return -EBADF; | 968 | return -EBADF; |
| 963 | 969 | ||
| 964 | buf.current_dir = dirent; | 970 | error = iterate_dir(f.file, &buf.ctx); |
| 965 | buf.previous = NULL; | ||
| 966 | buf.count = count; | ||
| 967 | buf.error = 0; | ||
| 968 | |||
| 969 | error = vfs_readdir(f.file, compat_filldir, &buf); | ||
| 970 | if (error >= 0) | 971 | if (error >= 0) |
| 971 | error = buf.error; | 972 | error = buf.error; |
| 972 | lastdirent = buf.previous; | 973 | lastdirent = buf.previous; |
| 973 | if (lastdirent) { | 974 | if (lastdirent) { |
| 974 | if (put_user(f.file->f_pos, &lastdirent->d_off)) | 975 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
| 975 | error = -EFAULT; | 976 | error = -EFAULT; |
| 976 | else | 977 | else |
| 977 | error = count - buf.count; | 978 | error = count - buf.count; |
| @@ -983,6 +984,7 @@ asmlinkage long compat_sys_getdents(unsigned int fd, | |||
| 983 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 | 984 | #ifndef __ARCH_OMIT_COMPAT_SYS_GETDENTS64 |
| 984 | 985 | ||
| 985 | struct compat_getdents_callback64 { | 986 | struct compat_getdents_callback64 { |
| 987 | struct dir_context ctx; | ||
| 986 | struct linux_dirent64 __user *current_dir; | 988 | struct linux_dirent64 __user *current_dir; |
| 987 | struct linux_dirent64 __user *previous; | 989 | struct linux_dirent64 __user *previous; |
| 988 | int count; | 990 | int count; |
| @@ -1036,7 +1038,11 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
| 1036 | { | 1038 | { |
| 1037 | struct fd f; | 1039 | struct fd f; |
| 1038 | struct linux_dirent64 __user * lastdirent; | 1040 | struct linux_dirent64 __user * lastdirent; |
| 1039 | struct compat_getdents_callback64 buf; | 1041 | struct compat_getdents_callback64 buf = { |
| 1042 | .ctx.actor = compat_filldir64, | ||
| 1043 | .current_dir = dirent, | ||
| 1044 | .count = count | ||
| 1045 | }; | ||
| 1040 | int error; | 1046 | int error; |
| 1041 | 1047 | ||
| 1042 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 1048 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
| @@ -1046,17 +1052,12 @@ asmlinkage long compat_sys_getdents64(unsigned int fd, | |||
| 1046 | if (!f.file) | 1052 | if (!f.file) |
| 1047 | return -EBADF; | 1053 | return -EBADF; |
| 1048 | 1054 | ||
| 1049 | buf.current_dir = dirent; | 1055 | error = iterate_dir(f.file, &buf.ctx); |
| 1050 | buf.previous = NULL; | ||
| 1051 | buf.count = count; | ||
| 1052 | buf.error = 0; | ||
| 1053 | |||
| 1054 | error = vfs_readdir(f.file, compat_filldir64, &buf); | ||
| 1055 | if (error >= 0) | 1056 | if (error >= 0) |
| 1056 | error = buf.error; | 1057 | error = buf.error; |
| 1057 | lastdirent = buf.previous; | 1058 | lastdirent = buf.previous; |
| 1058 | if (lastdirent) { | 1059 | if (lastdirent) { |
| 1059 | typeof(lastdirent->d_off) d_off = f.file->f_pos; | 1060 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
| 1060 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) | 1061 | if (__put_user_unaligned(d_off, &lastdirent->d_off)) |
| 1061 | error = -EFAULT; | 1062 | error = -EFAULT; |
| 1062 | else | 1063 | else |
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 996cdc5abb85..5d19acfa7c6c 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c | |||
| @@ -66,7 +66,6 @@ | |||
| 66 | #include <linux/gigaset_dev.h> | 66 | #include <linux/gigaset_dev.h> |
| 67 | 67 | ||
| 68 | #ifdef CONFIG_BLOCK | 68 | #ifdef CONFIG_BLOCK |
| 69 | #include <linux/loop.h> | ||
| 70 | #include <linux/cdrom.h> | 69 | #include <linux/cdrom.h> |
| 71 | #include <linux/fd.h> | 70 | #include <linux/fd.h> |
| 72 | #include <scsi/scsi.h> | 71 | #include <scsi/scsi.h> |
| @@ -954,8 +953,6 @@ COMPATIBLE_IOCTL(MTIOCTOP) | |||
| 954 | /* Socket level stuff */ | 953 | /* Socket level stuff */ |
| 955 | COMPATIBLE_IOCTL(FIOQSIZE) | 954 | COMPATIBLE_IOCTL(FIOQSIZE) |
| 956 | #ifdef CONFIG_BLOCK | 955 | #ifdef CONFIG_BLOCK |
| 957 | /* loop */ | ||
| 958 | IGNORE_IOCTL(LOOP_CLR_FD) | ||
| 959 | /* md calls this on random blockdevs */ | 956 | /* md calls this on random blockdevs */ |
| 960 | IGNORE_IOCTL(RAID_VERSION) | 957 | IGNORE_IOCTL(RAID_VERSION) |
| 961 | /* qemu/qemu-img might call these two on plain files for probing */ | 958 | /* qemu/qemu-img might call these two on plain files for probing */ |
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7aabc6ad4e9b..64e5323cbbb0 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c | |||
| @@ -1532,84 +1532,66 @@ static inline unsigned char dt_type(struct configfs_dirent *sd) | |||
| 1532 | return (sd->s_mode >> 12) & 15; | 1532 | return (sd->s_mode >> 12) & 15; |
| 1533 | } | 1533 | } |
| 1534 | 1534 | ||
| 1535 | static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1535 | static int configfs_readdir(struct file *file, struct dir_context *ctx) |
| 1536 | { | 1536 | { |
| 1537 | struct dentry *dentry = filp->f_path.dentry; | 1537 | struct dentry *dentry = file->f_path.dentry; |
| 1538 | struct super_block *sb = dentry->d_sb; | 1538 | struct super_block *sb = dentry->d_sb; |
| 1539 | struct configfs_dirent * parent_sd = dentry->d_fsdata; | 1539 | struct configfs_dirent * parent_sd = dentry->d_fsdata; |
| 1540 | struct configfs_dirent *cursor = filp->private_data; | 1540 | struct configfs_dirent *cursor = file->private_data; |
| 1541 | struct list_head *p, *q = &cursor->s_sibling; | 1541 | struct list_head *p, *q = &cursor->s_sibling; |
| 1542 | ino_t ino = 0; | 1542 | ino_t ino = 0; |
| 1543 | int i = filp->f_pos; | ||
| 1544 | 1543 | ||
| 1545 | switch (i) { | 1544 | if (!dir_emit_dots(file, ctx)) |
| 1546 | case 0: | 1545 | return 0; |
| 1547 | ino = dentry->d_inode->i_ino; | 1546 | if (ctx->pos == 2) { |
| 1548 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 1547 | spin_lock(&configfs_dirent_lock); |
| 1549 | break; | 1548 | list_move(q, &parent_sd->s_children); |
| 1550 | filp->f_pos++; | 1549 | spin_unlock(&configfs_dirent_lock); |
| 1551 | i++; | 1550 | } |
| 1552 | /* fallthrough */ | 1551 | for (p = q->next; p != &parent_sd->s_children; p = p->next) { |
| 1553 | case 1: | 1552 | struct configfs_dirent *next; |
| 1554 | ino = parent_ino(dentry); | 1553 | const char *name; |
| 1555 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 1554 | int len; |
| 1556 | break; | 1555 | struct inode *inode = NULL; |
| 1557 | filp->f_pos++; | 1556 | |
| 1558 | i++; | 1557 | next = list_entry(p, struct configfs_dirent, s_sibling); |
| 1559 | /* fallthrough */ | 1558 | if (!next->s_element) |
| 1560 | default: | 1559 | continue; |
| 1561 | if (filp->f_pos == 2) { | ||
| 1562 | spin_lock(&configfs_dirent_lock); | ||
| 1563 | list_move(q, &parent_sd->s_children); | ||
| 1564 | spin_unlock(&configfs_dirent_lock); | ||
| 1565 | } | ||
| 1566 | for (p=q->next; p!= &parent_sd->s_children; p=p->next) { | ||
| 1567 | struct configfs_dirent *next; | ||
| 1568 | const char * name; | ||
| 1569 | int len; | ||
| 1570 | struct inode *inode = NULL; | ||
| 1571 | 1560 | ||
| 1572 | next = list_entry(p, struct configfs_dirent, | 1561 | name = configfs_get_name(next); |
| 1573 | s_sibling); | 1562 | len = strlen(name); |
| 1574 | if (!next->s_element) | 1563 | |
| 1575 | continue; | 1564 | /* |
| 1576 | 1565 | * We'll have a dentry and an inode for | |
| 1577 | name = configfs_get_name(next); | 1566 | * PINNED items and for open attribute |
| 1578 | len = strlen(name); | 1567 | * files. We lock here to prevent a race |
| 1579 | 1568 | * with configfs_d_iput() clearing | |
| 1580 | /* | 1569 | * s_dentry before calling iput(). |
| 1581 | * We'll have a dentry and an inode for | 1570 | * |
| 1582 | * PINNED items and for open attribute | 1571 | * Why do we go to the trouble? If |
| 1583 | * files. We lock here to prevent a race | 1572 | * someone has an attribute file open, |
| 1584 | * with configfs_d_iput() clearing | 1573 | * the inode number should match until |
| 1585 | * s_dentry before calling iput(). | 1574 | * they close it. Beyond that, we don't |
| 1586 | * | 1575 | * care. |
| 1587 | * Why do we go to the trouble? If | 1576 | */ |
| 1588 | * someone has an attribute file open, | 1577 | spin_lock(&configfs_dirent_lock); |
| 1589 | * the inode number should match until | 1578 | dentry = next->s_dentry; |
| 1590 | * they close it. Beyond that, we don't | 1579 | if (dentry) |
| 1591 | * care. | 1580 | inode = dentry->d_inode; |
| 1592 | */ | 1581 | if (inode) |
| 1593 | spin_lock(&configfs_dirent_lock); | 1582 | ino = inode->i_ino; |
| 1594 | dentry = next->s_dentry; | 1583 | spin_unlock(&configfs_dirent_lock); |
| 1595 | if (dentry) | 1584 | if (!inode) |
| 1596 | inode = dentry->d_inode; | 1585 | ino = iunique(sb, 2); |
| 1597 | if (inode) | ||
| 1598 | ino = inode->i_ino; | ||
| 1599 | spin_unlock(&configfs_dirent_lock); | ||
| 1600 | if (!inode) | ||
| 1601 | ino = iunique(sb, 2); | ||
| 1602 | 1586 | ||
| 1603 | if (filldir(dirent, name, len, filp->f_pos, ino, | 1587 | if (!dir_emit(ctx, name, len, ino, dt_type(next))) |
| 1604 | dt_type(next)) < 0) | 1588 | return 0; |
| 1605 | return 0; | ||
| 1606 | 1589 | ||
| 1607 | spin_lock(&configfs_dirent_lock); | 1590 | spin_lock(&configfs_dirent_lock); |
| 1608 | list_move(q, p); | 1591 | list_move(q, p); |
| 1609 | spin_unlock(&configfs_dirent_lock); | 1592 | spin_unlock(&configfs_dirent_lock); |
| 1610 | p = q; | 1593 | p = q; |
| 1611 | filp->f_pos++; | 1594 | ctx->pos++; |
| 1612 | } | ||
| 1613 | } | 1595 | } |
| 1614 | return 0; | 1596 | return 0; |
| 1615 | } | 1597 | } |
| @@ -1661,7 +1643,7 @@ const struct file_operations configfs_dir_operations = { | |||
| 1661 | .release = configfs_dir_close, | 1643 | .release = configfs_dir_close, |
| 1662 | .llseek = configfs_dir_lseek, | 1644 | .llseek = configfs_dir_lseek, |
| 1663 | .read = generic_read_dir, | 1645 | .read = generic_read_dir, |
| 1664 | .readdir = configfs_readdir, | 1646 | .iterate = configfs_readdir, |
| 1665 | }; | 1647 | }; |
| 1666 | 1648 | ||
| 1667 | int configfs_register_subsystem(struct configfs_subsystem *subsys) | 1649 | int configfs_register_subsystem(struct configfs_subsystem *subsys) |
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c index 35b1c7bd18b7..e501ac3a49ff 100644 --- a/fs/cramfs/inode.c +++ b/fs/cramfs/inode.c | |||
| @@ -349,18 +349,17 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 349 | /* | 349 | /* |
| 350 | * Read a cramfs directory entry. | 350 | * Read a cramfs directory entry. |
| 351 | */ | 351 | */ |
| 352 | static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 352 | static int cramfs_readdir(struct file *file, struct dir_context *ctx) |
| 353 | { | 353 | { |
| 354 | struct inode *inode = file_inode(filp); | 354 | struct inode *inode = file_inode(file); |
| 355 | struct super_block *sb = inode->i_sb; | 355 | struct super_block *sb = inode->i_sb; |
| 356 | char *buf; | 356 | char *buf; |
| 357 | unsigned int offset; | 357 | unsigned int offset; |
| 358 | int copied; | ||
| 359 | 358 | ||
| 360 | /* Offset within the thing. */ | 359 | /* Offset within the thing. */ |
| 361 | offset = filp->f_pos; | 360 | if (ctx->pos >= inode->i_size) |
| 362 | if (offset >= inode->i_size) | ||
| 363 | return 0; | 361 | return 0; |
| 362 | offset = ctx->pos; | ||
| 364 | /* Directory entries are always 4-byte aligned */ | 363 | /* Directory entries are always 4-byte aligned */ |
| 365 | if (offset & 3) | 364 | if (offset & 3) |
| 366 | return -EINVAL; | 365 | return -EINVAL; |
| @@ -369,14 +368,13 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 369 | if (!buf) | 368 | if (!buf) |
| 370 | return -ENOMEM; | 369 | return -ENOMEM; |
| 371 | 370 | ||
| 372 | copied = 0; | ||
| 373 | while (offset < inode->i_size) { | 371 | while (offset < inode->i_size) { |
| 374 | struct cramfs_inode *de; | 372 | struct cramfs_inode *de; |
| 375 | unsigned long nextoffset; | 373 | unsigned long nextoffset; |
| 376 | char *name; | 374 | char *name; |
| 377 | ino_t ino; | 375 | ino_t ino; |
| 378 | umode_t mode; | 376 | umode_t mode; |
| 379 | int namelen, error; | 377 | int namelen; |
| 380 | 378 | ||
| 381 | mutex_lock(&read_mutex); | 379 | mutex_lock(&read_mutex); |
| 382 | de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); | 380 | de = cramfs_read(sb, OFFSET(inode) + offset, sizeof(*de)+CRAMFS_MAXPATHLEN); |
| @@ -402,13 +400,10 @@ static int cramfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 402 | break; | 400 | break; |
| 403 | namelen--; | 401 | namelen--; |
| 404 | } | 402 | } |
| 405 | error = filldir(dirent, buf, namelen, offset, ino, mode >> 12); | 403 | if (!dir_emit(ctx, buf, namelen, ino, mode >> 12)) |
| 406 | if (error) | ||
| 407 | break; | 404 | break; |
| 408 | 405 | ||
| 409 | offset = nextoffset; | 406 | ctx->pos = offset = nextoffset; |
| 410 | filp->f_pos = offset; | ||
| 411 | copied++; | ||
| 412 | } | 407 | } |
| 413 | kfree(buf); | 408 | kfree(buf); |
| 414 | return 0; | 409 | return 0; |
| @@ -547,7 +542,7 @@ static const struct address_space_operations cramfs_aops = { | |||
| 547 | static const struct file_operations cramfs_directory_operations = { | 542 | static const struct file_operations cramfs_directory_operations = { |
| 548 | .llseek = generic_file_llseek, | 543 | .llseek = generic_file_llseek, |
| 549 | .read = generic_read_dir, | 544 | .read = generic_read_dir, |
| 550 | .readdir = cramfs_readdir, | 545 | .iterate = cramfs_readdir, |
| 551 | }; | 546 | }; |
| 552 | 547 | ||
| 553 | static const struct inode_operations cramfs_dir_inode_operations = { | 548 | static const struct inode_operations cramfs_dir_inode_operations = { |
diff --git a/fs/dcache.c b/fs/dcache.c index f09b9085f7d8..5a23073138df 100644 --- a/fs/dcache.c +++ b/fs/dcache.c | |||
| @@ -1612,6 +1612,10 @@ EXPORT_SYMBOL(d_obtain_alias); | |||
| 1612 | * If a dentry was found and moved, then it is returned. Otherwise NULL | 1612 | * If a dentry was found and moved, then it is returned. Otherwise NULL |
| 1613 | * is returned. This matches the expected return value of ->lookup. | 1613 | * is returned. This matches the expected return value of ->lookup. |
| 1614 | * | 1614 | * |
| 1615 | * Cluster filesystems may call this function with a negative, hashed dentry. | ||
| 1616 | * In that case, we know that the inode will be a regular file, and also this | ||
| 1617 | * will only occur during atomic_open. So we need to check for the dentry | ||
| 1618 | * being already hashed only in the final case. | ||
| 1615 | */ | 1619 | */ |
| 1616 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | 1620 | struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) |
| 1617 | { | 1621 | { |
| @@ -1636,8 +1640,11 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry) | |||
| 1636 | security_d_instantiate(dentry, inode); | 1640 | security_d_instantiate(dentry, inode); |
| 1637 | d_rehash(dentry); | 1641 | d_rehash(dentry); |
| 1638 | } | 1642 | } |
| 1639 | } else | 1643 | } else { |
| 1640 | d_add(dentry, inode); | 1644 | d_instantiate(dentry, inode); |
| 1645 | if (d_unhashed(dentry)) | ||
| 1646 | d_rehash(dentry); | ||
| 1647 | } | ||
| 1641 | return new; | 1648 | return new; |
| 1642 | } | 1649 | } |
| 1643 | EXPORT_SYMBOL(d_splice_alias); | 1650 | EXPORT_SYMBOL(d_splice_alias); |
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index c5ca6ae5a30c..63146295153b 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c | |||
| @@ -21,6 +21,7 @@ | |||
| 21 | #include <linux/debugfs.h> | 21 | #include <linux/debugfs.h> |
| 22 | #include <linux/io.h> | 22 | #include <linux/io.h> |
| 23 | #include <linux/slab.h> | 23 | #include <linux/slab.h> |
| 24 | #include <linux/atomic.h> | ||
| 24 | 25 | ||
| 25 | static ssize_t default_read_file(struct file *file, char __user *buf, | 26 | static ssize_t default_read_file(struct file *file, char __user *buf, |
| 26 | size_t count, loff_t *ppos) | 27 | size_t count, loff_t *ppos) |
| @@ -403,6 +404,47 @@ struct dentry *debugfs_create_size_t(const char *name, umode_t mode, | |||
| 403 | } | 404 | } |
| 404 | EXPORT_SYMBOL_GPL(debugfs_create_size_t); | 405 | EXPORT_SYMBOL_GPL(debugfs_create_size_t); |
| 405 | 406 | ||
| 407 | static int debugfs_atomic_t_set(void *data, u64 val) | ||
| 408 | { | ||
| 409 | atomic_set((atomic_t *)data, val); | ||
| 410 | return 0; | ||
| 411 | } | ||
| 412 | static int debugfs_atomic_t_get(void *data, u64 *val) | ||
| 413 | { | ||
| 414 | *val = atomic_read((atomic_t *)data); | ||
| 415 | return 0; | ||
| 416 | } | ||
| 417 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t, debugfs_atomic_t_get, | ||
| 418 | debugfs_atomic_t_set, "%lld\n"); | ||
| 419 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_ro, debugfs_atomic_t_get, NULL, "%lld\n"); | ||
| 420 | DEFINE_SIMPLE_ATTRIBUTE(fops_atomic_t_wo, NULL, debugfs_atomic_t_set, "%lld\n"); | ||
| 421 | |||
| 422 | /** | ||
| 423 | * debugfs_create_atomic_t - create a debugfs file that is used to read and | ||
| 424 | * write an atomic_t value | ||
| 425 | * @name: a pointer to a string containing the name of the file to create. | ||
| 426 | * @mode: the permission that the file should have | ||
| 427 | * @parent: a pointer to the parent dentry for this file. This should be a | ||
| 428 | * directory dentry if set. If this parameter is %NULL, then the | ||
| 429 | * file will be created in the root of the debugfs filesystem. | ||
| 430 | * @value: a pointer to the variable that the file should read to and write | ||
| 431 | * from. | ||
| 432 | */ | ||
| 433 | struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, | ||
| 434 | struct dentry *parent, atomic_t *value) | ||
| 435 | { | ||
| 436 | /* if there are no write bits set, make read only */ | ||
| 437 | if (!(mode & S_IWUGO)) | ||
| 438 | return debugfs_create_file(name, mode, parent, value, | ||
| 439 | &fops_atomic_t_ro); | ||
| 440 | /* if there are no read bits set, make write only */ | ||
| 441 | if (!(mode & S_IRUGO)) | ||
| 442 | return debugfs_create_file(name, mode, parent, value, | ||
| 443 | &fops_atomic_t_wo); | ||
| 444 | |||
| 445 | return debugfs_create_file(name, mode, parent, value, &fops_atomic_t); | ||
| 446 | } | ||
| 447 | EXPORT_SYMBOL_GPL(debugfs_create_atomic_t); | ||
| 406 | 448 | ||
| 407 | static ssize_t read_file_bool(struct file *file, char __user *user_buf, | 449 | static ssize_t read_file_bool(struct file *file, char __user *user_buf, |
| 408 | size_t count, loff_t *ppos) | 450 | size_t count, loff_t *ppos) |
| @@ -431,6 +473,7 @@ static ssize_t write_file_bool(struct file *file, const char __user *user_buf, | |||
| 431 | if (copy_from_user(buf, user_buf, buf_size)) | 473 | if (copy_from_user(buf, user_buf, buf_size)) |
| 432 | return -EFAULT; | 474 | return -EFAULT; |
| 433 | 475 | ||
| 476 | buf[buf_size] = '\0'; | ||
| 434 | if (strtobool(buf, &bv) == 0) | 477 | if (strtobool(buf, &bv) == 0) |
| 435 | *val = bv; | 478 | *val = bv; |
| 436 | 479 | ||
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 7d58d5b112b5..76feb4b60fa6 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -138,8 +138,9 @@ static ssize_t cluster_cluster_name_read(struct dlm_cluster *cl, char *buf) | |||
| 138 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, | 138 | static ssize_t cluster_cluster_name_write(struct dlm_cluster *cl, |
| 139 | const char *buf, size_t len) | 139 | const char *buf, size_t len) |
| 140 | { | 140 | { |
| 141 | strncpy(dlm_config.ci_cluster_name, buf, DLM_LOCKSPACE_LEN); | 141 | strlcpy(dlm_config.ci_cluster_name, buf, |
| 142 | strncpy(cl->cl_cluster_name, buf, DLM_LOCKSPACE_LEN); | 142 | sizeof(dlm_config.ci_cluster_name)); |
| 143 | strlcpy(cl->cl_cluster_name, buf, sizeof(cl->cl_cluster_name)); | ||
| 143 | return len; | 144 | return len; |
| 144 | } | 145 | } |
| 145 | 146 | ||
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index 1b1146670c4b..e223a911a834 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c | |||
| @@ -2038,8 +2038,8 @@ static void set_lvb_lock_pc(struct dlm_rsb *r, struct dlm_lkb *lkb, | |||
| 2038 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; | 2038 | b = dlm_lvb_operations[lkb->lkb_grmode + 1][lkb->lkb_rqmode + 1]; |
| 2039 | if (b == 1) { | 2039 | if (b == 1) { |
| 2040 | int len = receive_extralen(ms); | 2040 | int len = receive_extralen(ms); |
| 2041 | if (len > DLM_RESNAME_MAXLEN) | 2041 | if (len > r->res_ls->ls_lvblen) |
| 2042 | len = DLM_RESNAME_MAXLEN; | 2042 | len = r->res_ls->ls_lvblen; |
| 2043 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | 2043 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); |
| 2044 | lkb->lkb_lvbseq = ms->m_lvbseq; | 2044 | lkb->lkb_lvbseq = ms->m_lvbseq; |
| 2045 | } | 2045 | } |
| @@ -3893,8 +3893,8 @@ static int receive_lvb(struct dlm_ls *ls, struct dlm_lkb *lkb, | |||
| 3893 | if (!lkb->lkb_lvbptr) | 3893 | if (!lkb->lkb_lvbptr) |
| 3894 | return -ENOMEM; | 3894 | return -ENOMEM; |
| 3895 | len = receive_extralen(ms); | 3895 | len = receive_extralen(ms); |
| 3896 | if (len > DLM_RESNAME_MAXLEN) | 3896 | if (len > ls->ls_lvblen) |
| 3897 | len = DLM_RESNAME_MAXLEN; | 3897 | len = ls->ls_lvblen; |
| 3898 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); | 3898 | memcpy(lkb->lkb_lvbptr, ms->m_extra, len); |
| 3899 | } | 3899 | } |
| 3900 | return 0; | 3900 | return 0; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 3ca79d3253b9..88556dc0458e 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -883,17 +883,24 @@ int dlm_release_lockspace(void *lockspace, int force) | |||
| 883 | void dlm_stop_lockspaces(void) | 883 | void dlm_stop_lockspaces(void) |
| 884 | { | 884 | { |
| 885 | struct dlm_ls *ls; | 885 | struct dlm_ls *ls; |
| 886 | int count; | ||
| 886 | 887 | ||
| 887 | restart: | 888 | restart: |
| 889 | count = 0; | ||
| 888 | spin_lock(&lslist_lock); | 890 | spin_lock(&lslist_lock); |
| 889 | list_for_each_entry(ls, &lslist, ls_list) { | 891 | list_for_each_entry(ls, &lslist, ls_list) { |
| 890 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | 892 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) { |
| 893 | count++; | ||
| 891 | continue; | 894 | continue; |
| 895 | } | ||
| 892 | spin_unlock(&lslist_lock); | 896 | spin_unlock(&lslist_lock); |
| 893 | log_error(ls, "no userland control daemon, stopping lockspace"); | 897 | log_error(ls, "no userland control daemon, stopping lockspace"); |
| 894 | dlm_ls_stop(ls); | 898 | dlm_ls_stop(ls); |
| 895 | goto restart; | 899 | goto restart; |
| 896 | } | 900 | } |
| 897 | spin_unlock(&lslist_lock); | 901 | spin_unlock(&lslist_lock); |
| 902 | |||
| 903 | if (count) | ||
| 904 | log_print("dlm user daemon left %d lockspaces", count); | ||
| 898 | } | 905 | } |
| 899 | 906 | ||
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index d0ccd2fd79eb..d90909ec6aa6 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c | |||
| @@ -52,7 +52,6 @@ | |||
| 52 | #include <linux/mutex.h> | 52 | #include <linux/mutex.h> |
| 53 | #include <linux/sctp.h> | 53 | #include <linux/sctp.h> |
| 54 | #include <linux/slab.h> | 54 | #include <linux/slab.h> |
| 55 | #include <linux/sctp.h> | ||
| 56 | #include <net/sctp/sctp.h> | 55 | #include <net/sctp/sctp.h> |
| 57 | #include <net/ipv6.h> | 56 | #include <net/ipv6.h> |
| 58 | 57 | ||
| @@ -126,6 +125,7 @@ struct connection { | |||
| 126 | struct connection *othercon; | 125 | struct connection *othercon; |
| 127 | struct work_struct rwork; /* Receive workqueue */ | 126 | struct work_struct rwork; /* Receive workqueue */ |
| 128 | struct work_struct swork; /* Send workqueue */ | 127 | struct work_struct swork; /* Send workqueue */ |
| 128 | bool try_new_addr; | ||
| 129 | }; | 129 | }; |
| 130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) | 130 | #define sock2con(x) ((struct connection *)(x)->sk_user_data) |
| 131 | 131 | ||
| @@ -144,6 +144,7 @@ struct dlm_node_addr { | |||
| 144 | struct list_head list; | 144 | struct list_head list; |
| 145 | int nodeid; | 145 | int nodeid; |
| 146 | int addr_count; | 146 | int addr_count; |
| 147 | int curr_addr_index; | ||
| 147 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; | 148 | struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; |
| 148 | }; | 149 | }; |
| 149 | 150 | ||
| @@ -310,7 +311,7 @@ static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | |||
| 310 | } | 311 | } |
| 311 | 312 | ||
| 312 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | 313 | static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, |
| 313 | struct sockaddr *sa_out) | 314 | struct sockaddr *sa_out, bool try_new_addr) |
| 314 | { | 315 | { |
| 315 | struct sockaddr_storage sas; | 316 | struct sockaddr_storage sas; |
| 316 | struct dlm_node_addr *na; | 317 | struct dlm_node_addr *na; |
| @@ -320,8 +321,16 @@ static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, | |||
| 320 | 321 | ||
| 321 | spin_lock(&dlm_node_addrs_spin); | 322 | spin_lock(&dlm_node_addrs_spin); |
| 322 | na = find_node_addr(nodeid); | 323 | na = find_node_addr(nodeid); |
| 323 | if (na && na->addr_count) | 324 | if (na && na->addr_count) { |
| 324 | memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); | 325 | if (try_new_addr) { |
| 326 | na->curr_addr_index++; | ||
| 327 | if (na->curr_addr_index == na->addr_count) | ||
| 328 | na->curr_addr_index = 0; | ||
| 329 | } | ||
| 330 | |||
| 331 | memcpy(&sas, na->addr[na->curr_addr_index ], | ||
| 332 | sizeof(struct sockaddr_storage)); | ||
| 333 | } | ||
| 325 | spin_unlock(&dlm_node_addrs_spin); | 334 | spin_unlock(&dlm_node_addrs_spin); |
| 326 | 335 | ||
| 327 | if (!na) | 336 | if (!na) |
| @@ -353,19 +362,22 @@ static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) | |||
| 353 | { | 362 | { |
| 354 | struct dlm_node_addr *na; | 363 | struct dlm_node_addr *na; |
| 355 | int rv = -EEXIST; | 364 | int rv = -EEXIST; |
| 365 | int addr_i; | ||
| 356 | 366 | ||
| 357 | spin_lock(&dlm_node_addrs_spin); | 367 | spin_lock(&dlm_node_addrs_spin); |
| 358 | list_for_each_entry(na, &dlm_node_addrs, list) { | 368 | list_for_each_entry(na, &dlm_node_addrs, list) { |
| 359 | if (!na->addr_count) | 369 | if (!na->addr_count) |
| 360 | continue; | 370 | continue; |
| 361 | 371 | ||
| 362 | if (!addr_compare(na->addr[0], addr)) | 372 | for (addr_i = 0; addr_i < na->addr_count; addr_i++) { |
| 363 | continue; | 373 | if (addr_compare(na->addr[addr_i], addr)) { |
| 364 | 374 | *nodeid = na->nodeid; | |
| 365 | *nodeid = na->nodeid; | 375 | rv = 0; |
| 366 | rv = 0; | 376 | goto unlock; |
| 367 | break; | 377 | } |
| 378 | } | ||
| 368 | } | 379 | } |
| 380 | unlock: | ||
| 369 | spin_unlock(&dlm_node_addrs_spin); | 381 | spin_unlock(&dlm_node_addrs_spin); |
| 370 | return rv; | 382 | return rv; |
| 371 | } | 383 | } |
| @@ -561,8 +573,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) | |||
| 561 | 573 | ||
| 562 | static void sctp_init_failed_foreach(struct connection *con) | 574 | static void sctp_init_failed_foreach(struct connection *con) |
| 563 | { | 575 | { |
| 576 | |||
| 577 | /* | ||
| 578 | * Don't try to recover base con and handle race where the | ||
| 579 | * other node's assoc init creates a assoc and we get that | ||
| 580 | * notification, then we get a notification that our attempt | ||
| 581 | * failed due. This happens when we are still trying the primary | ||
| 582 | * address, but the other node has already tried secondary addrs | ||
| 583 | * and found one that worked. | ||
| 584 | */ | ||
| 585 | if (!con->nodeid || con->sctp_assoc) | ||
| 586 | return; | ||
| 587 | |||
| 588 | log_print("Retrying SCTP association init for node %d\n", con->nodeid); | ||
| 589 | |||
| 590 | con->try_new_addr = true; | ||
| 564 | con->sctp_assoc = 0; | 591 | con->sctp_assoc = 0; |
| 565 | if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { | 592 | if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { |
| 566 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | 593 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) |
| 567 | queue_work(send_workqueue, &con->swork); | 594 | queue_work(send_workqueue, &con->swork); |
| 568 | } | 595 | } |
| @@ -579,15 +606,56 @@ static void sctp_init_failed(void) | |||
| 579 | mutex_unlock(&connections_lock); | 606 | mutex_unlock(&connections_lock); |
| 580 | } | 607 | } |
| 581 | 608 | ||
| 609 | static void retry_failed_sctp_send(struct connection *recv_con, | ||
| 610 | struct sctp_send_failed *sn_send_failed, | ||
| 611 | char *buf) | ||
| 612 | { | ||
| 613 | int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); | ||
| 614 | struct dlm_mhandle *mh; | ||
| 615 | struct connection *con; | ||
| 616 | char *retry_buf; | ||
| 617 | int nodeid = sn_send_failed->ssf_info.sinfo_ppid; | ||
| 618 | |||
| 619 | log_print("Retry sending %d bytes to node id %d", len, nodeid); | ||
| 620 | |||
| 621 | con = nodeid2con(nodeid, 0); | ||
| 622 | if (!con) { | ||
| 623 | log_print("Could not look up con for nodeid %d\n", | ||
| 624 | nodeid); | ||
| 625 | return; | ||
| 626 | } | ||
| 627 | |||
| 628 | mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); | ||
| 629 | if (!mh) { | ||
| 630 | log_print("Could not allocate buf for retry."); | ||
| 631 | return; | ||
| 632 | } | ||
| 633 | memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); | ||
| 634 | dlm_lowcomms_commit_buffer(mh); | ||
| 635 | |||
| 636 | /* | ||
| 637 | * If we got a assoc changed event before the send failed event then | ||
| 638 | * we only need to retry the send. | ||
| 639 | */ | ||
| 640 | if (con->sctp_assoc) { | ||
| 641 | if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) | ||
| 642 | queue_work(send_workqueue, &con->swork); | ||
| 643 | } else | ||
| 644 | sctp_init_failed_foreach(con); | ||
| 645 | } | ||
| 646 | |||
| 582 | /* Something happened to an association */ | 647 | /* Something happened to an association */ |
| 583 | static void process_sctp_notification(struct connection *con, | 648 | static void process_sctp_notification(struct connection *con, |
| 584 | struct msghdr *msg, char *buf) | 649 | struct msghdr *msg, char *buf) |
| 585 | { | 650 | { |
| 586 | union sctp_notification *sn = (union sctp_notification *)buf; | 651 | union sctp_notification *sn = (union sctp_notification *)buf; |
| 587 | 652 | ||
| 588 | if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { | 653 | switch (sn->sn_header.sn_type) { |
| 654 | case SCTP_SEND_FAILED: | ||
| 655 | retry_failed_sctp_send(con, &sn->sn_send_failed, buf); | ||
| 656 | break; | ||
| 657 | case SCTP_ASSOC_CHANGE: | ||
| 589 | switch (sn->sn_assoc_change.sac_state) { | 658 | switch (sn->sn_assoc_change.sac_state) { |
| 590 | |||
| 591 | case SCTP_COMM_UP: | 659 | case SCTP_COMM_UP: |
| 592 | case SCTP_RESTART: | 660 | case SCTP_RESTART: |
| 593 | { | 661 | { |
| @@ -662,9 +730,11 @@ static void process_sctp_notification(struct connection *con, | |||
| 662 | log_print("connecting to %d sctp association %d", | 730 | log_print("connecting to %d sctp association %d", |
| 663 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); | 731 | nodeid, (int)sn->sn_assoc_change.sac_assoc_id); |
| 664 | 732 | ||
| 733 | new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; | ||
| 734 | new_con->try_new_addr = false; | ||
| 665 | /* Send any pending writes */ | 735 | /* Send any pending writes */ |
| 666 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); | 736 | clear_bit(CF_CONNECT_PENDING, &new_con->flags); |
| 667 | clear_bit(CF_INIT_PENDING, &con->flags); | 737 | clear_bit(CF_INIT_PENDING, &new_con->flags); |
| 668 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { | 738 | if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { |
| 669 | queue_work(send_workqueue, &new_con->swork); | 739 | queue_work(send_workqueue, &new_con->swork); |
| 670 | } | 740 | } |
| @@ -683,14 +753,10 @@ static void process_sctp_notification(struct connection *con, | |||
| 683 | } | 753 | } |
| 684 | break; | 754 | break; |
| 685 | 755 | ||
| 686 | /* We don't know which INIT failed, so clear the PENDING flags | ||
| 687 | * on them all. if assoc_id is zero then it will then try | ||
| 688 | * again */ | ||
| 689 | |||
| 690 | case SCTP_CANT_STR_ASSOC: | 756 | case SCTP_CANT_STR_ASSOC: |
| 691 | { | 757 | { |
| 758 | /* Will retry init when we get the send failed notification */ | ||
| 692 | log_print("Can't start SCTP association - retrying"); | 759 | log_print("Can't start SCTP association - retrying"); |
| 693 | sctp_init_failed(); | ||
| 694 | } | 760 | } |
| 695 | break; | 761 | break; |
| 696 | 762 | ||
| @@ -699,6 +765,8 @@ static void process_sctp_notification(struct connection *con, | |||
| 699 | (int)sn->sn_assoc_change.sac_assoc_id, | 765 | (int)sn->sn_assoc_change.sac_assoc_id, |
| 700 | sn->sn_assoc_change.sac_state); | 766 | sn->sn_assoc_change.sac_state); |
| 701 | } | 767 | } |
| 768 | default: | ||
| 769 | ; /* fall through */ | ||
| 702 | } | 770 | } |
| 703 | } | 771 | } |
| 704 | 772 | ||
| @@ -958,6 +1026,24 @@ static void free_entry(struct writequeue_entry *e) | |||
| 958 | kfree(e); | 1026 | kfree(e); |
| 959 | } | 1027 | } |
| 960 | 1028 | ||
| 1029 | /* | ||
| 1030 | * writequeue_entry_complete - try to delete and free write queue entry | ||
| 1031 | * @e: write queue entry to try to delete | ||
| 1032 | * @completed: bytes completed | ||
| 1033 | * | ||
| 1034 | * writequeue_lock must be held. | ||
| 1035 | */ | ||
| 1036 | static void writequeue_entry_complete(struct writequeue_entry *e, int completed) | ||
| 1037 | { | ||
| 1038 | e->offset += completed; | ||
| 1039 | e->len -= completed; | ||
| 1040 | |||
| 1041 | if (e->len == 0 && e->users == 0) { | ||
| 1042 | list_del(&e->list); | ||
| 1043 | free_entry(e); | ||
| 1044 | } | ||
| 1045 | } | ||
| 1046 | |||
| 961 | /* Initiate an SCTP association. | 1047 | /* Initiate an SCTP association. |
| 962 | This is a special case of send_to_sock() in that we don't yet have a | 1048 | This is a special case of send_to_sock() in that we don't yet have a |
| 963 | peeled-off socket for this association, so we use the listening socket | 1049 | peeled-off socket for this association, so we use the listening socket |
| @@ -977,15 +1063,14 @@ static void sctp_init_assoc(struct connection *con) | |||
| 977 | int addrlen; | 1063 | int addrlen; |
| 978 | struct kvec iov[1]; | 1064 | struct kvec iov[1]; |
| 979 | 1065 | ||
| 1066 | mutex_lock(&con->sock_mutex); | ||
| 980 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) | 1067 | if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) |
| 981 | return; | 1068 | goto unlock; |
| 982 | |||
| 983 | if (con->retries++ > MAX_CONNECT_RETRIES) | ||
| 984 | return; | ||
| 985 | 1069 | ||
| 986 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { | 1070 | if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, |
| 1071 | con->try_new_addr)) { | ||
| 987 | log_print("no address for nodeid %d", con->nodeid); | 1072 | log_print("no address for nodeid %d", con->nodeid); |
| 988 | return; | 1073 | goto unlock; |
| 989 | } | 1074 | } |
| 990 | base_con = nodeid2con(0, 0); | 1075 | base_con = nodeid2con(0, 0); |
| 991 | BUG_ON(base_con == NULL); | 1076 | BUG_ON(base_con == NULL); |
| @@ -1003,17 +1088,25 @@ static void sctp_init_assoc(struct connection *con) | |||
| 1003 | if (list_empty(&con->writequeue)) { | 1088 | if (list_empty(&con->writequeue)) { |
| 1004 | spin_unlock(&con->writequeue_lock); | 1089 | spin_unlock(&con->writequeue_lock); |
| 1005 | log_print("writequeue empty for nodeid %d", con->nodeid); | 1090 | log_print("writequeue empty for nodeid %d", con->nodeid); |
| 1006 | return; | 1091 | goto unlock; |
| 1007 | } | 1092 | } |
| 1008 | 1093 | ||
| 1009 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); | 1094 | e = list_first_entry(&con->writequeue, struct writequeue_entry, list); |
| 1010 | len = e->len; | 1095 | len = e->len; |
| 1011 | offset = e->offset; | 1096 | offset = e->offset; |
| 1012 | spin_unlock(&con->writequeue_lock); | ||
| 1013 | 1097 | ||
| 1014 | /* Send the first block off the write queue */ | 1098 | /* Send the first block off the write queue */ |
| 1015 | iov[0].iov_base = page_address(e->page)+offset; | 1099 | iov[0].iov_base = page_address(e->page)+offset; |
| 1016 | iov[0].iov_len = len; | 1100 | iov[0].iov_len = len; |
| 1101 | spin_unlock(&con->writequeue_lock); | ||
| 1102 | |||
| 1103 | if (rem_addr.ss_family == AF_INET) { | ||
| 1104 | struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; | ||
| 1105 | log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); | ||
| 1106 | } else { | ||
| 1107 | struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; | ||
| 1108 | log_print("Trying to connect to %pI6", &sin6->sin6_addr); | ||
| 1109 | } | ||
| 1017 | 1110 | ||
| 1018 | cmsg = CMSG_FIRSTHDR(&outmessage); | 1111 | cmsg = CMSG_FIRSTHDR(&outmessage); |
| 1019 | cmsg->cmsg_level = IPPROTO_SCTP; | 1112 | cmsg->cmsg_level = IPPROTO_SCTP; |
| @@ -1021,8 +1114,9 @@ static void sctp_init_assoc(struct connection *con) | |||
| 1021 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); | 1114 | cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); |
| 1022 | sinfo = CMSG_DATA(cmsg); | 1115 | sinfo = CMSG_DATA(cmsg); |
| 1023 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); | 1116 | memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); |
| 1024 | sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); | 1117 | sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); |
| 1025 | outmessage.msg_controllen = cmsg->cmsg_len; | 1118 | outmessage.msg_controllen = cmsg->cmsg_len; |
| 1119 | sinfo->sinfo_flags |= SCTP_ADDR_OVER; | ||
| 1026 | 1120 | ||
| 1027 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); | 1121 | ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); |
| 1028 | if (ret < 0) { | 1122 | if (ret < 0) { |
| @@ -1035,15 +1129,12 @@ static void sctp_init_assoc(struct connection *con) | |||
| 1035 | } | 1129 | } |
| 1036 | else { | 1130 | else { |
| 1037 | spin_lock(&con->writequeue_lock); | 1131 | spin_lock(&con->writequeue_lock); |
| 1038 | e->offset += ret; | 1132 | writequeue_entry_complete(e, ret); |
| 1039 | e->len -= ret; | ||
| 1040 | |||
| 1041 | if (e->len == 0 && e->users == 0) { | ||
| 1042 | list_del(&e->list); | ||
| 1043 | free_entry(e); | ||
| 1044 | } | ||
| 1045 | spin_unlock(&con->writequeue_lock); | 1133 | spin_unlock(&con->writequeue_lock); |
| 1046 | } | 1134 | } |
| 1135 | |||
| 1136 | unlock: | ||
| 1137 | mutex_unlock(&con->sock_mutex); | ||
| 1047 | } | 1138 | } |
| 1048 | 1139 | ||
| 1049 | /* Connect a new socket to its peer */ | 1140 | /* Connect a new socket to its peer */ |
| @@ -1075,7 +1166,7 @@ static void tcp_connect_to_sock(struct connection *con) | |||
| 1075 | goto out_err; | 1166 | goto out_err; |
| 1076 | 1167 | ||
| 1077 | memset(&saddr, 0, sizeof(saddr)); | 1168 | memset(&saddr, 0, sizeof(saddr)); |
| 1078 | result = nodeid_to_addr(con->nodeid, &saddr, NULL); | 1169 | result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); |
| 1079 | if (result < 0) { | 1170 | if (result < 0) { |
| 1080 | log_print("no address for nodeid %d", con->nodeid); | 1171 | log_print("no address for nodeid %d", con->nodeid); |
| 1081 | goto out_err; | 1172 | goto out_err; |
| @@ -1254,6 +1345,7 @@ static int sctp_listen_for_all(void) | |||
| 1254 | int result = -EINVAL, num = 1, i, addr_len; | 1345 | int result = -EINVAL, num = 1, i, addr_len; |
| 1255 | struct connection *con = nodeid2con(0, GFP_NOFS); | 1346 | struct connection *con = nodeid2con(0, GFP_NOFS); |
| 1256 | int bufsize = NEEDED_RMEM; | 1347 | int bufsize = NEEDED_RMEM; |
| 1348 | int one = 1; | ||
| 1257 | 1349 | ||
| 1258 | if (!con) | 1350 | if (!con) |
| 1259 | return -ENOMEM; | 1351 | return -ENOMEM; |
| @@ -1288,6 +1380,11 @@ static int sctp_listen_for_all(void) | |||
| 1288 | goto create_delsock; | 1380 | goto create_delsock; |
| 1289 | } | 1381 | } |
| 1290 | 1382 | ||
| 1383 | result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, | ||
| 1384 | sizeof(one)); | ||
| 1385 | if (result < 0) | ||
| 1386 | log_print("Could not set SCTP NODELAY error %d\n", result); | ||
| 1387 | |||
| 1291 | /* Init con struct */ | 1388 | /* Init con struct */ |
| 1292 | sock->sk->sk_user_data = con; | 1389 | sock->sk->sk_user_data = con; |
| 1293 | con->sock = sock; | 1390 | con->sock = sock; |
| @@ -1493,13 +1590,7 @@ static void send_to_sock(struct connection *con) | |||
| 1493 | } | 1590 | } |
| 1494 | 1591 | ||
| 1495 | spin_lock(&con->writequeue_lock); | 1592 | spin_lock(&con->writequeue_lock); |
| 1496 | e->offset += ret; | 1593 | writequeue_entry_complete(e, ret); |
| 1497 | e->len -= ret; | ||
| 1498 | |||
| 1499 | if (e->len == 0 && e->users == 0) { | ||
| 1500 | list_del(&e->list); | ||
| 1501 | free_entry(e); | ||
| 1502 | } | ||
| 1503 | } | 1594 | } |
| 1504 | spin_unlock(&con->writequeue_lock); | 1595 | spin_unlock(&con->writequeue_lock); |
| 1505 | out: | 1596 | out: |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index a7abbea2c096..9aa05e08060b 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -68,9 +68,9 @@ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, | |||
| 68 | } | 68 | } |
| 69 | 69 | ||
| 70 | struct ecryptfs_getdents_callback { | 70 | struct ecryptfs_getdents_callback { |
| 71 | void *dirent; | 71 | struct dir_context ctx; |
| 72 | struct dir_context *caller; | ||
| 72 | struct dentry *dentry; | 73 | struct dentry *dentry; |
| 73 | filldir_t filldir; | ||
| 74 | int filldir_called; | 74 | int filldir_called; |
| 75 | int entries_written; | 75 | int entries_written; |
| 76 | }; | 76 | }; |
| @@ -96,9 +96,10 @@ ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen, | |||
| 96 | rc); | 96 | rc); |
| 97 | goto out; | 97 | goto out; |
| 98 | } | 98 | } |
| 99 | rc = buf->filldir(buf->dirent, name, name_size, offset, ino, d_type); | 99 | buf->caller->pos = buf->ctx.pos; |
| 100 | rc = !dir_emit(buf->caller, name, name_size, ino, d_type); | ||
| 100 | kfree(name); | 101 | kfree(name); |
| 101 | if (rc >= 0) | 102 | if (!rc) |
| 102 | buf->entries_written++; | 103 | buf->entries_written++; |
| 103 | out: | 104 | out: |
| 104 | return rc; | 105 | return rc; |
| @@ -107,27 +108,23 @@ out: | |||
| 107 | /** | 108 | /** |
| 108 | * ecryptfs_readdir | 109 | * ecryptfs_readdir |
| 109 | * @file: The eCryptfs directory file | 110 | * @file: The eCryptfs directory file |
| 110 | * @dirent: Directory entry handle | 111 | * @ctx: The actor to feed the entries to |
| 111 | * @filldir: The filldir callback function | ||
| 112 | */ | 112 | */ |
| 113 | static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 113 | static int ecryptfs_readdir(struct file *file, struct dir_context *ctx) |
| 114 | { | 114 | { |
| 115 | int rc; | 115 | int rc; |
| 116 | struct file *lower_file; | 116 | struct file *lower_file; |
| 117 | struct inode *inode; | 117 | struct inode *inode; |
| 118 | struct ecryptfs_getdents_callback buf; | 118 | struct ecryptfs_getdents_callback buf = { |
| 119 | 119 | .ctx.actor = ecryptfs_filldir, | |
| 120 | .caller = ctx, | ||
| 121 | .dentry = file->f_path.dentry | ||
| 122 | }; | ||
| 120 | lower_file = ecryptfs_file_to_lower(file); | 123 | lower_file = ecryptfs_file_to_lower(file); |
| 121 | lower_file->f_pos = file->f_pos; | 124 | lower_file->f_pos = ctx->pos; |
| 122 | inode = file_inode(file); | 125 | inode = file_inode(file); |
| 123 | memset(&buf, 0, sizeof(buf)); | 126 | rc = iterate_dir(lower_file, &buf.ctx); |
| 124 | buf.dirent = dirent; | 127 | ctx->pos = buf.ctx.pos; |
| 125 | buf.dentry = file->f_path.dentry; | ||
| 126 | buf.filldir = filldir; | ||
| 127 | buf.filldir_called = 0; | ||
| 128 | buf.entries_written = 0; | ||
| 129 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); | ||
| 130 | file->f_pos = lower_file->f_pos; | ||
| 131 | if (rc < 0) | 128 | if (rc < 0) |
| 132 | goto out; | 129 | goto out; |
| 133 | if (buf.filldir_called && !buf.entries_written) | 130 | if (buf.filldir_called && !buf.entries_written) |
| @@ -344,7 +341,7 @@ ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
| 344 | #endif | 341 | #endif |
| 345 | 342 | ||
| 346 | const struct file_operations ecryptfs_dir_fops = { | 343 | const struct file_operations ecryptfs_dir_fops = { |
| 347 | .readdir = ecryptfs_readdir, | 344 | .iterate = ecryptfs_readdir, |
| 348 | .read = generic_read_dir, | 345 | .read = generic_read_dir, |
| 349 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, | 346 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
| 350 | #ifdef CONFIG_COMPAT | 347 | #ifdef CONFIG_COMPAT |
| @@ -365,7 +362,7 @@ const struct file_operations ecryptfs_main_fops = { | |||
| 365 | .aio_read = ecryptfs_read_update_atime, | 362 | .aio_read = ecryptfs_read_update_atime, |
| 366 | .write = do_sync_write, | 363 | .write = do_sync_write, |
| 367 | .aio_write = generic_file_aio_write, | 364 | .aio_write = generic_file_aio_write, |
| 368 | .readdir = ecryptfs_readdir, | 365 | .iterate = ecryptfs_readdir, |
| 369 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, | 366 | .unlocked_ioctl = ecryptfs_unlocked_ioctl, |
| 370 | #ifdef CONFIG_COMPAT | 367 | #ifdef CONFIG_COMPAT |
| 371 | .compat_ioctl = ecryptfs_compat_ioctl, | 368 | .compat_ioctl = ecryptfs_compat_ioctl, |
diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 055a9e9ca747..b72307ccdf7a 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c | |||
| @@ -7,40 +7,38 @@ | |||
| 7 | #include <linux/buffer_head.h> | 7 | #include <linux/buffer_head.h> |
| 8 | #include "efs.h" | 8 | #include "efs.h" |
| 9 | 9 | ||
| 10 | static int efs_readdir(struct file *, void *, filldir_t); | 10 | static int efs_readdir(struct file *, struct dir_context *); |
| 11 | 11 | ||
| 12 | const struct file_operations efs_dir_operations = { | 12 | const struct file_operations efs_dir_operations = { |
| 13 | .llseek = generic_file_llseek, | 13 | .llseek = generic_file_llseek, |
| 14 | .read = generic_read_dir, | 14 | .read = generic_read_dir, |
| 15 | .readdir = efs_readdir, | 15 | .iterate = efs_readdir, |
| 16 | }; | 16 | }; |
| 17 | 17 | ||
| 18 | const struct inode_operations efs_dir_inode_operations = { | 18 | const struct inode_operations efs_dir_inode_operations = { |
| 19 | .lookup = efs_lookup, | 19 | .lookup = efs_lookup, |
| 20 | }; | 20 | }; |
| 21 | 21 | ||
| 22 | static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | 22 | static int efs_readdir(struct file *file, struct dir_context *ctx) |
| 23 | struct inode *inode = file_inode(filp); | 23 | { |
| 24 | struct buffer_head *bh; | 24 | struct inode *inode = file_inode(file); |
| 25 | |||
| 26 | struct efs_dir *dirblock; | ||
| 27 | struct efs_dentry *dirslot; | ||
| 28 | efs_ino_t inodenum; | ||
| 29 | efs_block_t block; | 25 | efs_block_t block; |
| 30 | int slot, namelen; | 26 | int slot; |
| 31 | char *nameptr; | ||
| 32 | 27 | ||
| 33 | if (inode->i_size & (EFS_DIRBSIZE-1)) | 28 | if (inode->i_size & (EFS_DIRBSIZE-1)) |
| 34 | printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); | 29 | printk(KERN_WARNING "EFS: WARNING: readdir(): directory size not a multiple of EFS_DIRBSIZE\n"); |
| 35 | 30 | ||
| 36 | /* work out where this entry can be found */ | 31 | /* work out where this entry can be found */ |
| 37 | block = filp->f_pos >> EFS_DIRBSIZE_BITS; | 32 | block = ctx->pos >> EFS_DIRBSIZE_BITS; |
| 38 | 33 | ||
| 39 | /* each block contains at most 256 slots */ | 34 | /* each block contains at most 256 slots */ |
| 40 | slot = filp->f_pos & 0xff; | 35 | slot = ctx->pos & 0xff; |
| 41 | 36 | ||
| 42 | /* look at all blocks */ | 37 | /* look at all blocks */ |
| 43 | while (block < inode->i_blocks) { | 38 | while (block < inode->i_blocks) { |
| 39 | struct efs_dir *dirblock; | ||
| 40 | struct buffer_head *bh; | ||
| 41 | |||
| 44 | /* read the dir block */ | 42 | /* read the dir block */ |
| 45 | bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); | 43 | bh = sb_bread(inode->i_sb, efs_bmap(inode, block)); |
| 46 | 44 | ||
| @@ -57,11 +55,14 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | |||
| 57 | break; | 55 | break; |
| 58 | } | 56 | } |
| 59 | 57 | ||
| 60 | while (slot < dirblock->slots) { | 58 | for (; slot < dirblock->slots; slot++) { |
| 61 | if (dirblock->space[slot] == 0) { | 59 | struct efs_dentry *dirslot; |
| 62 | slot++; | 60 | efs_ino_t inodenum; |
| 61 | const char *nameptr; | ||
| 62 | int namelen; | ||
| 63 | |||
| 64 | if (dirblock->space[slot] == 0) | ||
| 63 | continue; | 65 | continue; |
| 64 | } | ||
| 65 | 66 | ||
| 66 | dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); | 67 | dirslot = (struct efs_dentry *) (((char *) bh->b_data) + EFS_SLOTAT(dirblock, slot)); |
| 67 | 68 | ||
| @@ -72,39 +73,29 @@ static int efs_readdir(struct file *filp, void *dirent, filldir_t filldir) { | |||
| 72 | #ifdef DEBUG | 73 | #ifdef DEBUG |
| 73 | printk(KERN_DEBUG "EFS: readdir(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", block, slot, dirblock->slots-1, inodenum, nameptr, namelen); | 74 | printk(KERN_DEBUG "EFS: readdir(): block %d slot %d/%d: inode %u, name \"%s\", namelen %u\n", block, slot, dirblock->slots-1, inodenum, nameptr, namelen); |
| 74 | #endif | 75 | #endif |
| 75 | if (namelen > 0) { | 76 | if (!namelen) |
| 76 | /* found the next entry */ | 77 | continue; |
| 77 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | 78 | /* found the next entry */ |
| 78 | 79 | ctx->pos = (block << EFS_DIRBSIZE_BITS) | slot; | |
| 79 | /* copy filename and data in dirslot */ | 80 | |
| 80 | filldir(dirent, nameptr, namelen, filp->f_pos, inodenum, DT_UNKNOWN); | 81 | /* sanity check */ |
| 81 | 82 | if (nameptr - (char *) dirblock + namelen > EFS_DIRBSIZE) { | |
| 82 | /* sanity check */ | 83 | printk(KERN_WARNING "EFS: directory entry %d exceeds directory block\n", slot); |
| 83 | if (nameptr - (char *) dirblock + namelen > EFS_DIRBSIZE) { | 84 | continue; |
| 84 | printk(KERN_WARNING "EFS: directory entry %d exceeds directory block\n", slot); | 85 | } |
| 85 | slot++; | 86 | |
| 86 | continue; | 87 | /* copy filename and data in dirslot */ |
| 87 | } | 88 | if (!dir_emit(ctx, nameptr, namelen, inodenum, DT_UNKNOWN)) { |
| 88 | |||
| 89 | /* store position of next slot */ | ||
| 90 | if (++slot == dirblock->slots) { | ||
| 91 | slot = 0; | ||
| 92 | block++; | ||
| 93 | } | ||
| 94 | brelse(bh); | 89 | brelse(bh); |
| 95 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | 90 | return 0; |
| 96 | goto out; | ||
| 97 | } | 91 | } |
| 98 | slot++; | ||
| 99 | } | 92 | } |
| 100 | brelse(bh); | 93 | brelse(bh); |
| 101 | 94 | ||
| 102 | slot = 0; | 95 | slot = 0; |
| 103 | block++; | 96 | block++; |
| 104 | } | 97 | } |
| 105 | 98 | ctx->pos = (block << EFS_DIRBSIZE_BITS) | slot; | |
| 106 | filp->f_pos = (block << EFS_DIRBSIZE_BITS) | slot; | ||
| 107 | out: | ||
| 108 | return 0; | 99 | return 0; |
| 109 | } | 100 | } |
| 110 | 101 | ||
| @@ -1135,13 +1135,6 @@ void setup_new_exec(struct linux_binprm * bprm) | |||
| 1135 | set_dumpable(current->mm, suid_dumpable); | 1135 | set_dumpable(current->mm, suid_dumpable); |
| 1136 | } | 1136 | } |
| 1137 | 1137 | ||
| 1138 | /* | ||
| 1139 | * Flush performance counters when crossing a | ||
| 1140 | * security domain: | ||
| 1141 | */ | ||
| 1142 | if (!get_dumpable(current->mm)) | ||
| 1143 | perf_event_exit_task(current); | ||
| 1144 | |||
| 1145 | /* An exec changes our domain. We are no longer part of the thread | 1138 | /* An exec changes our domain. We are no longer part of the thread |
| 1146 | group */ | 1139 | group */ |
| 1147 | 1140 | ||
| @@ -1205,6 +1198,15 @@ void install_exec_creds(struct linux_binprm *bprm) | |||
| 1205 | 1198 | ||
| 1206 | commit_creds(bprm->cred); | 1199 | commit_creds(bprm->cred); |
| 1207 | bprm->cred = NULL; | 1200 | bprm->cred = NULL; |
| 1201 | |||
| 1202 | /* | ||
| 1203 | * Disable monitoring for regular users | ||
| 1204 | * when executing setuid binaries. Must | ||
| 1205 | * wait until new credentials are committed | ||
| 1206 | * by commit_creds() above | ||
| 1207 | */ | ||
| 1208 | if (get_dumpable(current->mm) != SUID_DUMP_USER) | ||
| 1209 | perf_event_exit_task(current); | ||
| 1208 | /* | 1210 | /* |
| 1209 | * cred_guard_mutex must be held at least to this point to prevent | 1211 | * cred_guard_mutex must be held at least to this point to prevent |
| 1210 | * ptrace_attach() from altering our determination of the task's | 1212 | * ptrace_attach() from altering our determination of the task's |
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c index 46375896cfc0..49f51ab4caac 100644 --- a/fs/exofs/dir.c +++ b/fs/exofs/dir.c | |||
| @@ -239,22 +239,19 @@ void exofs_set_de_type(struct exofs_dir_entry *de, struct inode *inode) | |||
| 239 | } | 239 | } |
| 240 | 240 | ||
| 241 | static int | 241 | static int |
| 242 | exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 242 | exofs_readdir(struct file *file, struct dir_context *ctx) |
| 243 | { | 243 | { |
| 244 | loff_t pos = filp->f_pos; | 244 | loff_t pos = ctx->pos; |
| 245 | struct inode *inode = file_inode(filp); | 245 | struct inode *inode = file_inode(file); |
| 246 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 246 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
| 247 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 247 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
| 248 | unsigned long npages = dir_pages(inode); | 248 | unsigned long npages = dir_pages(inode); |
| 249 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); | 249 | unsigned chunk_mask = ~(exofs_chunk_size(inode)-1); |
| 250 | unsigned char *types = NULL; | 250 | int need_revalidate = (file->f_version != inode->i_version); |
| 251 | int need_revalidate = (filp->f_version != inode->i_version); | ||
| 252 | 251 | ||
| 253 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) | 252 | if (pos > inode->i_size - EXOFS_DIR_REC_LEN(1)) |
| 254 | return 0; | 253 | return 0; |
| 255 | 254 | ||
| 256 | types = exofs_filetype_table; | ||
| 257 | |||
| 258 | for ( ; n < npages; n++, offset = 0) { | 255 | for ( ; n < npages; n++, offset = 0) { |
| 259 | char *kaddr, *limit; | 256 | char *kaddr, *limit; |
| 260 | struct exofs_dir_entry *de; | 257 | struct exofs_dir_entry *de; |
| @@ -263,7 +260,7 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 263 | if (IS_ERR(page)) { | 260 | if (IS_ERR(page)) { |
| 264 | EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n", | 261 | EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n", |
| 265 | inode->i_ino); | 262 | inode->i_ino); |
| 266 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 263 | ctx->pos += PAGE_CACHE_SIZE - offset; |
| 267 | return PTR_ERR(page); | 264 | return PTR_ERR(page); |
| 268 | } | 265 | } |
| 269 | kaddr = page_address(page); | 266 | kaddr = page_address(page); |
| @@ -271,9 +268,9 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 271 | if (offset) { | 268 | if (offset) { |
| 272 | offset = exofs_validate_entry(kaddr, offset, | 269 | offset = exofs_validate_entry(kaddr, offset, |
| 273 | chunk_mask); | 270 | chunk_mask); |
| 274 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 271 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
| 275 | } | 272 | } |
| 276 | filp->f_version = inode->i_version; | 273 | file->f_version = inode->i_version; |
| 277 | need_revalidate = 0; | 274 | need_revalidate = 0; |
| 278 | } | 275 | } |
| 279 | de = (struct exofs_dir_entry *)(kaddr + offset); | 276 | de = (struct exofs_dir_entry *)(kaddr + offset); |
| @@ -288,27 +285,24 @@ exofs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 288 | return -EIO; | 285 | return -EIO; |
| 289 | } | 286 | } |
| 290 | if (de->inode_no) { | 287 | if (de->inode_no) { |
| 291 | int over; | 288 | unsigned char t; |
| 292 | unsigned char d_type = DT_UNKNOWN; | ||
| 293 | 289 | ||
| 294 | if (types && de->file_type < EXOFS_FT_MAX) | 290 | if (de->file_type < EXOFS_FT_MAX) |
| 295 | d_type = types[de->file_type]; | 291 | t = exofs_filetype_table[de->file_type]; |
| 292 | else | ||
| 293 | t = DT_UNKNOWN; | ||
| 296 | 294 | ||
| 297 | offset = (char *)de - kaddr; | 295 | if (!dir_emit(ctx, de->name, de->name_len, |
| 298 | over = filldir(dirent, de->name, de->name_len, | ||
| 299 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
| 300 | le64_to_cpu(de->inode_no), | 296 | le64_to_cpu(de->inode_no), |
| 301 | d_type); | 297 | t)) { |
| 302 | if (over) { | ||
| 303 | exofs_put_page(page); | 298 | exofs_put_page(page); |
| 304 | return 0; | 299 | return 0; |
| 305 | } | 300 | } |
| 306 | } | 301 | } |
| 307 | filp->f_pos += le16_to_cpu(de->rec_len); | 302 | ctx->pos += le16_to_cpu(de->rec_len); |
| 308 | } | 303 | } |
| 309 | exofs_put_page(page); | 304 | exofs_put_page(page); |
| 310 | } | 305 | } |
| 311 | |||
| 312 | return 0; | 306 | return 0; |
| 313 | } | 307 | } |
| 314 | 308 | ||
| @@ -669,5 +663,5 @@ not_empty: | |||
| 669 | const struct file_operations exofs_dir_operations = { | 663 | const struct file_operations exofs_dir_operations = { |
| 670 | .llseek = generic_file_llseek, | 664 | .llseek = generic_file_llseek, |
| 671 | .read = generic_read_dir, | 665 | .read = generic_read_dir, |
| 672 | .readdir = exofs_readdir, | 666 | .iterate = exofs_readdir, |
| 673 | }; | 667 | }; |
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d1f80abd8828..2ec8eb1ab269 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c | |||
| @@ -953,9 +953,11 @@ static int exofs_releasepage(struct page *page, gfp_t gfp) | |||
| 953 | return 0; | 953 | return 0; |
| 954 | } | 954 | } |
| 955 | 955 | ||
| 956 | static void exofs_invalidatepage(struct page *page, unsigned long offset) | 956 | static void exofs_invalidatepage(struct page *page, unsigned int offset, |
| 957 | unsigned int length) | ||
| 957 | { | 958 | { |
| 958 | EXOFS_DBGMSG("page 0x%lx offset 0x%lx\n", page->index, offset); | 959 | EXOFS_DBGMSG("page 0x%lx offset 0x%x length 0x%x\n", |
| 960 | page->index, offset, length); | ||
| 959 | WARN_ON(1); | 961 | WARN_ON(1); |
| 960 | } | 962 | } |
| 961 | 963 | ||
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index 262fc9940982..293bc2e47a73 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c | |||
| @@ -212,6 +212,7 @@ reconnect_path(struct vfsmount *mnt, struct dentry *target_dir, char *nbuf) | |||
| 212 | } | 212 | } |
| 213 | 213 | ||
| 214 | struct getdents_callback { | 214 | struct getdents_callback { |
| 215 | struct dir_context ctx; | ||
| 215 | char *name; /* name that was found. It already points to a | 216 | char *name; /* name that was found. It already points to a |
| 216 | buffer NAME_MAX+1 is size */ | 217 | buffer NAME_MAX+1 is size */ |
| 217 | unsigned long ino; /* the inum we are looking for */ | 218 | unsigned long ino; /* the inum we are looking for */ |
| @@ -254,7 +255,11 @@ static int get_name(const struct path *path, char *name, struct dentry *child) | |||
| 254 | struct inode *dir = path->dentry->d_inode; | 255 | struct inode *dir = path->dentry->d_inode; |
| 255 | int error; | 256 | int error; |
| 256 | struct file *file; | 257 | struct file *file; |
| 257 | struct getdents_callback buffer; | 258 | struct getdents_callback buffer = { |
| 259 | .ctx.actor = filldir_one, | ||
| 260 | .name = name, | ||
| 261 | .ino = child->d_inode->i_ino | ||
| 262 | }; | ||
| 258 | 263 | ||
| 259 | error = -ENOTDIR; | 264 | error = -ENOTDIR; |
| 260 | if (!dir || !S_ISDIR(dir->i_mode)) | 265 | if (!dir || !S_ISDIR(dir->i_mode)) |
| @@ -271,17 +276,14 @@ static int get_name(const struct path *path, char *name, struct dentry *child) | |||
| 271 | goto out; | 276 | goto out; |
| 272 | 277 | ||
| 273 | error = -EINVAL; | 278 | error = -EINVAL; |
| 274 | if (!file->f_op->readdir) | 279 | if (!file->f_op->iterate) |
| 275 | goto out_close; | 280 | goto out_close; |
| 276 | 281 | ||
| 277 | buffer.name = name; | ||
| 278 | buffer.ino = child->d_inode->i_ino; | ||
| 279 | buffer.found = 0; | ||
| 280 | buffer.sequence = 0; | 282 | buffer.sequence = 0; |
| 281 | while (1) { | 283 | while (1) { |
| 282 | int old_seq = buffer.sequence; | 284 | int old_seq = buffer.sequence; |
| 283 | 285 | ||
| 284 | error = vfs_readdir(file, filldir_one, &buffer); | 286 | error = iterate_dir(file, &buffer.ctx); |
| 285 | if (buffer.found) { | 287 | if (buffer.found) { |
| 286 | error = 0; | 288 | error = 0; |
| 287 | break; | 289 | break; |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 4237722bfd27..6e1d4ab09d72 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
| @@ -287,17 +287,17 @@ static inline void ext2_set_de_type(ext2_dirent *de, struct inode *inode) | |||
| 287 | } | 287 | } |
| 288 | 288 | ||
| 289 | static int | 289 | static int |
| 290 | ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | 290 | ext2_readdir(struct file *file, struct dir_context *ctx) |
| 291 | { | 291 | { |
| 292 | loff_t pos = filp->f_pos; | 292 | loff_t pos = ctx->pos; |
| 293 | struct inode *inode = file_inode(filp); | 293 | struct inode *inode = file_inode(file); |
| 294 | struct super_block *sb = inode->i_sb; | 294 | struct super_block *sb = inode->i_sb; |
| 295 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 295 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
| 296 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 296 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
| 297 | unsigned long npages = dir_pages(inode); | 297 | unsigned long npages = dir_pages(inode); |
| 298 | unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); | 298 | unsigned chunk_mask = ~(ext2_chunk_size(inode)-1); |
| 299 | unsigned char *types = NULL; | 299 | unsigned char *types = NULL; |
| 300 | int need_revalidate = filp->f_version != inode->i_version; | 300 | int need_revalidate = file->f_version != inode->i_version; |
| 301 | 301 | ||
| 302 | if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) | 302 | if (pos > inode->i_size - EXT2_DIR_REC_LEN(1)) |
| 303 | return 0; | 303 | return 0; |
| @@ -314,16 +314,16 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
| 314 | ext2_error(sb, __func__, | 314 | ext2_error(sb, __func__, |
| 315 | "bad page in #%lu", | 315 | "bad page in #%lu", |
| 316 | inode->i_ino); | 316 | inode->i_ino); |
| 317 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 317 | ctx->pos += PAGE_CACHE_SIZE - offset; |
| 318 | return PTR_ERR(page); | 318 | return PTR_ERR(page); |
| 319 | } | 319 | } |
| 320 | kaddr = page_address(page); | 320 | kaddr = page_address(page); |
| 321 | if (unlikely(need_revalidate)) { | 321 | if (unlikely(need_revalidate)) { |
| 322 | if (offset) { | 322 | if (offset) { |
| 323 | offset = ext2_validate_entry(kaddr, offset, chunk_mask); | 323 | offset = ext2_validate_entry(kaddr, offset, chunk_mask); |
| 324 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 324 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
| 325 | } | 325 | } |
| 326 | filp->f_version = inode->i_version; | 326 | file->f_version = inode->i_version; |
| 327 | need_revalidate = 0; | 327 | need_revalidate = 0; |
| 328 | } | 328 | } |
| 329 | de = (ext2_dirent *)(kaddr+offset); | 329 | de = (ext2_dirent *)(kaddr+offset); |
| @@ -336,22 +336,19 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
| 336 | return -EIO; | 336 | return -EIO; |
| 337 | } | 337 | } |
| 338 | if (de->inode) { | 338 | if (de->inode) { |
| 339 | int over; | ||
| 340 | unsigned char d_type = DT_UNKNOWN; | 339 | unsigned char d_type = DT_UNKNOWN; |
| 341 | 340 | ||
| 342 | if (types && de->file_type < EXT2_FT_MAX) | 341 | if (types && de->file_type < EXT2_FT_MAX) |
| 343 | d_type = types[de->file_type]; | 342 | d_type = types[de->file_type]; |
| 344 | 343 | ||
| 345 | offset = (char *)de - kaddr; | 344 | if (!dir_emit(ctx, de->name, de->name_len, |
| 346 | over = filldir(dirent, de->name, de->name_len, | 345 | le32_to_cpu(de->inode), |
| 347 | (n<<PAGE_CACHE_SHIFT) | offset, | 346 | d_type)) { |
| 348 | le32_to_cpu(de->inode), d_type); | ||
| 349 | if (over) { | ||
| 350 | ext2_put_page(page); | 347 | ext2_put_page(page); |
| 351 | return 0; | 348 | return 0; |
| 352 | } | 349 | } |
| 353 | } | 350 | } |
| 354 | filp->f_pos += ext2_rec_len_from_disk(de->rec_len); | 351 | ctx->pos += ext2_rec_len_from_disk(de->rec_len); |
| 355 | } | 352 | } |
| 356 | ext2_put_page(page); | 353 | ext2_put_page(page); |
| 357 | } | 354 | } |
| @@ -724,7 +721,7 @@ not_empty: | |||
| 724 | const struct file_operations ext2_dir_operations = { | 721 | const struct file_operations ext2_dir_operations = { |
| 725 | .llseek = generic_file_llseek, | 722 | .llseek = generic_file_llseek, |
| 726 | .read = generic_read_dir, | 723 | .read = generic_read_dir, |
| 727 | .readdir = ext2_readdir, | 724 | .iterate = ext2_readdir, |
| 728 | .unlocked_ioctl = ext2_ioctl, | 725 | .unlocked_ioctl = ext2_ioctl, |
| 729 | #ifdef CONFIG_COMPAT | 726 | #ifdef CONFIG_COMPAT |
| 730 | .compat_ioctl = ext2_compat_ioctl, | 727 | .compat_ioctl = ext2_compat_ioctl, |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 87eccbbca255..f522425aaa24 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
| @@ -28,8 +28,7 @@ static unsigned char ext3_filetype_table[] = { | |||
| 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK | 28 | DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK |
| 29 | }; | 29 | }; |
| 30 | 30 | ||
| 31 | static int ext3_dx_readdir(struct file * filp, | 31 | static int ext3_dx_readdir(struct file *, struct dir_context *); |
| 32 | void * dirent, filldir_t filldir); | ||
| 33 | 32 | ||
| 34 | static unsigned char get_dtype(struct super_block *sb, int filetype) | 33 | static unsigned char get_dtype(struct super_block *sb, int filetype) |
| 35 | { | 34 | { |
| @@ -91,36 +90,30 @@ int ext3_check_dir_entry (const char * function, struct inode * dir, | |||
| 91 | return error_msg == NULL ? 1 : 0; | 90 | return error_msg == NULL ? 1 : 0; |
| 92 | } | 91 | } |
| 93 | 92 | ||
| 94 | static int ext3_readdir(struct file * filp, | 93 | static int ext3_readdir(struct file *file, struct dir_context *ctx) |
| 95 | void * dirent, filldir_t filldir) | ||
| 96 | { | 94 | { |
| 97 | int error = 0; | ||
| 98 | unsigned long offset; | 95 | unsigned long offset; |
| 99 | int i, stored; | 96 | int i; |
| 100 | struct ext3_dir_entry_2 *de; | 97 | struct ext3_dir_entry_2 *de; |
| 101 | int err; | 98 | int err; |
| 102 | struct inode *inode = file_inode(filp); | 99 | struct inode *inode = file_inode(file); |
| 103 | struct super_block *sb = inode->i_sb; | 100 | struct super_block *sb = inode->i_sb; |
| 104 | int ret = 0; | ||
| 105 | int dir_has_error = 0; | 101 | int dir_has_error = 0; |
| 106 | 102 | ||
| 107 | if (is_dx_dir(inode)) { | 103 | if (is_dx_dir(inode)) { |
| 108 | err = ext3_dx_readdir(filp, dirent, filldir); | 104 | err = ext3_dx_readdir(file, ctx); |
| 109 | if (err != ERR_BAD_DX_DIR) { | 105 | if (err != ERR_BAD_DX_DIR) |
| 110 | ret = err; | 106 | return err; |
| 111 | goto out; | ||
| 112 | } | ||
| 113 | /* | 107 | /* |
| 114 | * We don't set the inode dirty flag since it's not | 108 | * We don't set the inode dirty flag since it's not |
| 115 | * critical that it get flushed back to the disk. | 109 | * critical that it get flushed back to the disk. |
| 116 | */ | 110 | */ |
| 117 | EXT3_I(file_inode(filp))->i_flags &= ~EXT3_INDEX_FL; | 111 | EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; |
| 118 | } | 112 | } |
| 119 | stored = 0; | 113 | offset = ctx->pos & (sb->s_blocksize - 1); |
| 120 | offset = filp->f_pos & (sb->s_blocksize - 1); | ||
| 121 | 114 | ||
| 122 | while (!error && !stored && filp->f_pos < inode->i_size) { | 115 | while (ctx->pos < inode->i_size) { |
| 123 | unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb); | 116 | unsigned long blk = ctx->pos >> EXT3_BLOCK_SIZE_BITS(sb); |
| 124 | struct buffer_head map_bh; | 117 | struct buffer_head map_bh; |
| 125 | struct buffer_head *bh = NULL; | 118 | struct buffer_head *bh = NULL; |
| 126 | 119 | ||
| @@ -129,12 +122,12 @@ static int ext3_readdir(struct file * filp, | |||
| 129 | if (err > 0) { | 122 | if (err > 0) { |
| 130 | pgoff_t index = map_bh.b_blocknr >> | 123 | pgoff_t index = map_bh.b_blocknr >> |
| 131 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 124 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| 132 | if (!ra_has_index(&filp->f_ra, index)) | 125 | if (!ra_has_index(&file->f_ra, index)) |
| 133 | page_cache_sync_readahead( | 126 | page_cache_sync_readahead( |
| 134 | sb->s_bdev->bd_inode->i_mapping, | 127 | sb->s_bdev->bd_inode->i_mapping, |
| 135 | &filp->f_ra, filp, | 128 | &file->f_ra, file, |
| 136 | index, 1); | 129 | index, 1); |
| 137 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 130 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
| 138 | bh = ext3_bread(NULL, inode, blk, 0, &err); | 131 | bh = ext3_bread(NULL, inode, blk, 0, &err); |
| 139 | } | 132 | } |
| 140 | 133 | ||
| @@ -146,22 +139,21 @@ static int ext3_readdir(struct file * filp, | |||
| 146 | if (!dir_has_error) { | 139 | if (!dir_has_error) { |
| 147 | ext3_error(sb, __func__, "directory #%lu " | 140 | ext3_error(sb, __func__, "directory #%lu " |
| 148 | "contains a hole at offset %lld", | 141 | "contains a hole at offset %lld", |
| 149 | inode->i_ino, filp->f_pos); | 142 | inode->i_ino, ctx->pos); |
| 150 | dir_has_error = 1; | 143 | dir_has_error = 1; |
| 151 | } | 144 | } |
| 152 | /* corrupt size? Maybe no more blocks to read */ | 145 | /* corrupt size? Maybe no more blocks to read */ |
| 153 | if (filp->f_pos > inode->i_blocks << 9) | 146 | if (ctx->pos > inode->i_blocks << 9) |
| 154 | break; | 147 | break; |
| 155 | filp->f_pos += sb->s_blocksize - offset; | 148 | ctx->pos += sb->s_blocksize - offset; |
| 156 | continue; | 149 | continue; |
| 157 | } | 150 | } |
| 158 | 151 | ||
| 159 | revalidate: | ||
| 160 | /* If the dir block has changed since the last call to | 152 | /* If the dir block has changed since the last call to |
| 161 | * readdir(2), then we might be pointing to an invalid | 153 | * readdir(2), then we might be pointing to an invalid |
| 162 | * dirent right now. Scan from the start of the block | 154 | * dirent right now. Scan from the start of the block |
| 163 | * to make sure. */ | 155 | * to make sure. */ |
| 164 | if (filp->f_version != inode->i_version) { | 156 | if (offset && file->f_version != inode->i_version) { |
| 165 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 157 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
| 166 | de = (struct ext3_dir_entry_2 *) | 158 | de = (struct ext3_dir_entry_2 *) |
| 167 | (bh->b_data + i); | 159 | (bh->b_data + i); |
| @@ -177,53 +169,40 @@ revalidate: | |||
| 177 | i += ext3_rec_len_from_disk(de->rec_len); | 169 | i += ext3_rec_len_from_disk(de->rec_len); |
| 178 | } | 170 | } |
| 179 | offset = i; | 171 | offset = i; |
| 180 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 172 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
| 181 | | offset; | 173 | | offset; |
| 182 | filp->f_version = inode->i_version; | 174 | file->f_version = inode->i_version; |
| 183 | } | 175 | } |
| 184 | 176 | ||
| 185 | while (!error && filp->f_pos < inode->i_size | 177 | while (ctx->pos < inode->i_size |
| 186 | && offset < sb->s_blocksize) { | 178 | && offset < sb->s_blocksize) { |
| 187 | de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); | 179 | de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); |
| 188 | if (!ext3_check_dir_entry ("ext3_readdir", inode, de, | 180 | if (!ext3_check_dir_entry ("ext3_readdir", inode, de, |
| 189 | bh, offset)) { | 181 | bh, offset)) { |
| 190 | /* On error, skip the f_pos to the | 182 | /* On error, skip the to the |
| 191 | next block. */ | 183 | next block. */ |
| 192 | filp->f_pos = (filp->f_pos | | 184 | ctx->pos = (ctx->pos | |
| 193 | (sb->s_blocksize - 1)) + 1; | 185 | (sb->s_blocksize - 1)) + 1; |
| 194 | brelse (bh); | 186 | break; |
| 195 | ret = stored; | ||
| 196 | goto out; | ||
| 197 | } | 187 | } |
| 198 | offset += ext3_rec_len_from_disk(de->rec_len); | 188 | offset += ext3_rec_len_from_disk(de->rec_len); |
| 199 | if (le32_to_cpu(de->inode)) { | 189 | if (le32_to_cpu(de->inode)) { |
| 200 | /* We might block in the next section | 190 | if (!dir_emit(ctx, de->name, de->name_len, |
| 201 | * if the data destination is | 191 | le32_to_cpu(de->inode), |
| 202 | * currently swapped out. So, use a | 192 | get_dtype(sb, de->file_type))) { |
| 203 | * version stamp to detect whether or | 193 | brelse(bh); |
| 204 | * not the directory has been modified | 194 | return 0; |
| 205 | * during the copy operation. | 195 | } |
| 206 | */ | ||
| 207 | u64 version = filp->f_version; | ||
| 208 | |||
| 209 | error = filldir(dirent, de->name, | ||
| 210 | de->name_len, | ||
| 211 | filp->f_pos, | ||
| 212 | le32_to_cpu(de->inode), | ||
| 213 | get_dtype(sb, de->file_type)); | ||
| 214 | if (error) | ||
| 215 | break; | ||
| 216 | if (version != filp->f_version) | ||
| 217 | goto revalidate; | ||
| 218 | stored ++; | ||
| 219 | } | 196 | } |
| 220 | filp->f_pos += ext3_rec_len_from_disk(de->rec_len); | 197 | ctx->pos += ext3_rec_len_from_disk(de->rec_len); |
| 221 | } | 198 | } |
| 222 | offset = 0; | 199 | offset = 0; |
| 223 | brelse (bh); | 200 | brelse (bh); |
| 201 | if (ctx->pos < inode->i_size) | ||
| 202 | if (!dir_relax(inode)) | ||
| 203 | return 0; | ||
| 224 | } | 204 | } |
| 225 | out: | 205 | return 0; |
| 226 | return ret; | ||
| 227 | } | 206 | } |
| 228 | 207 | ||
| 229 | static inline int is_32bit_api(void) | 208 | static inline int is_32bit_api(void) |
| @@ -452,62 +431,54 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
| 452 | * for all entres on the fname linked list. (Normally there is only | 431 | * for all entres on the fname linked list. (Normally there is only |
| 453 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 432 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
| 454 | */ | 433 | */ |
| 455 | static int call_filldir(struct file * filp, void * dirent, | 434 | static bool call_filldir(struct file *file, struct dir_context *ctx, |
| 456 | filldir_t filldir, struct fname *fname) | 435 | struct fname *fname) |
| 457 | { | 436 | { |
| 458 | struct dir_private_info *info = filp->private_data; | 437 | struct dir_private_info *info = file->private_data; |
| 459 | loff_t curr_pos; | 438 | struct inode *inode = file_inode(file); |
| 460 | struct inode *inode = file_inode(filp); | 439 | struct super_block *sb = inode->i_sb; |
| 461 | struct super_block * sb; | ||
| 462 | int error; | ||
| 463 | |||
| 464 | sb = inode->i_sb; | ||
| 465 | 440 | ||
| 466 | if (!fname) { | 441 | if (!fname) { |
| 467 | printk("call_filldir: called with null fname?!?\n"); | 442 | printk("call_filldir: called with null fname?!?\n"); |
| 468 | return 0; | 443 | return true; |
| 469 | } | 444 | } |
| 470 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); | 445 | ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); |
| 471 | while (fname) { | 446 | while (fname) { |
| 472 | error = filldir(dirent, fname->name, | 447 | if (!dir_emit(ctx, fname->name, fname->name_len, |
| 473 | fname->name_len, curr_pos, | ||
| 474 | fname->inode, | 448 | fname->inode, |
| 475 | get_dtype(sb, fname->file_type)); | 449 | get_dtype(sb, fname->file_type))) { |
| 476 | if (error) { | ||
| 477 | filp->f_pos = curr_pos; | ||
| 478 | info->extra_fname = fname; | 450 | info->extra_fname = fname; |
| 479 | return error; | 451 | return false; |
| 480 | } | 452 | } |
| 481 | fname = fname->next; | 453 | fname = fname->next; |
| 482 | } | 454 | } |
| 483 | return 0; | 455 | return true; |
| 484 | } | 456 | } |
| 485 | 457 | ||
| 486 | static int ext3_dx_readdir(struct file * filp, | 458 | static int ext3_dx_readdir(struct file *file, struct dir_context *ctx) |
| 487 | void * dirent, filldir_t filldir) | ||
| 488 | { | 459 | { |
| 489 | struct dir_private_info *info = filp->private_data; | 460 | struct dir_private_info *info = file->private_data; |
| 490 | struct inode *inode = file_inode(filp); | 461 | struct inode *inode = file_inode(file); |
| 491 | struct fname *fname; | 462 | struct fname *fname; |
| 492 | int ret; | 463 | int ret; |
| 493 | 464 | ||
| 494 | if (!info) { | 465 | if (!info) { |
| 495 | info = ext3_htree_create_dir_info(filp, filp->f_pos); | 466 | info = ext3_htree_create_dir_info(file, ctx->pos); |
| 496 | if (!info) | 467 | if (!info) |
| 497 | return -ENOMEM; | 468 | return -ENOMEM; |
| 498 | filp->private_data = info; | 469 | file->private_data = info; |
| 499 | } | 470 | } |
| 500 | 471 | ||
| 501 | if (filp->f_pos == ext3_get_htree_eof(filp)) | 472 | if (ctx->pos == ext3_get_htree_eof(file)) |
| 502 | return 0; /* EOF */ | 473 | return 0; /* EOF */ |
| 503 | 474 | ||
| 504 | /* Some one has messed with f_pos; reset the world */ | 475 | /* Some one has messed with f_pos; reset the world */ |
| 505 | if (info->last_pos != filp->f_pos) { | 476 | if (info->last_pos != ctx->pos) { |
| 506 | free_rb_tree_fname(&info->root); | 477 | free_rb_tree_fname(&info->root); |
| 507 | info->curr_node = NULL; | 478 | info->curr_node = NULL; |
| 508 | info->extra_fname = NULL; | 479 | info->extra_fname = NULL; |
| 509 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); | 480 | info->curr_hash = pos2maj_hash(file, ctx->pos); |
| 510 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); | 481 | info->curr_minor_hash = pos2min_hash(file, ctx->pos); |
| 511 | } | 482 | } |
| 512 | 483 | ||
| 513 | /* | 484 | /* |
| @@ -515,7 +486,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 515 | * chain, return them first. | 486 | * chain, return them first. |
| 516 | */ | 487 | */ |
| 517 | if (info->extra_fname) { | 488 | if (info->extra_fname) { |
| 518 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) | 489 | if (!call_filldir(file, ctx, info->extra_fname)) |
| 519 | goto finished; | 490 | goto finished; |
| 520 | info->extra_fname = NULL; | 491 | info->extra_fname = NULL; |
| 521 | goto next_node; | 492 | goto next_node; |
| @@ -529,17 +500,17 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 529 | * cached entries. | 500 | * cached entries. |
| 530 | */ | 501 | */ |
| 531 | if ((!info->curr_node) || | 502 | if ((!info->curr_node) || |
| 532 | (filp->f_version != inode->i_version)) { | 503 | (file->f_version != inode->i_version)) { |
| 533 | info->curr_node = NULL; | 504 | info->curr_node = NULL; |
| 534 | free_rb_tree_fname(&info->root); | 505 | free_rb_tree_fname(&info->root); |
| 535 | filp->f_version = inode->i_version; | 506 | file->f_version = inode->i_version; |
| 536 | ret = ext3_htree_fill_tree(filp, info->curr_hash, | 507 | ret = ext3_htree_fill_tree(file, info->curr_hash, |
| 537 | info->curr_minor_hash, | 508 | info->curr_minor_hash, |
| 538 | &info->next_hash); | 509 | &info->next_hash); |
| 539 | if (ret < 0) | 510 | if (ret < 0) |
| 540 | return ret; | 511 | return ret; |
| 541 | if (ret == 0) { | 512 | if (ret == 0) { |
| 542 | filp->f_pos = ext3_get_htree_eof(filp); | 513 | ctx->pos = ext3_get_htree_eof(file); |
| 543 | break; | 514 | break; |
| 544 | } | 515 | } |
| 545 | info->curr_node = rb_first(&info->root); | 516 | info->curr_node = rb_first(&info->root); |
| @@ -548,7 +519,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 548 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | 519 | fname = rb_entry(info->curr_node, struct fname, rb_hash); |
| 549 | info->curr_hash = fname->hash; | 520 | info->curr_hash = fname->hash; |
| 550 | info->curr_minor_hash = fname->minor_hash; | 521 | info->curr_minor_hash = fname->minor_hash; |
| 551 | if (call_filldir(filp, dirent, filldir, fname)) | 522 | if (!call_filldir(file, ctx, fname)) |
| 552 | break; | 523 | break; |
| 553 | next_node: | 524 | next_node: |
| 554 | info->curr_node = rb_next(info->curr_node); | 525 | info->curr_node = rb_next(info->curr_node); |
| @@ -559,7 +530,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 559 | info->curr_minor_hash = fname->minor_hash; | 530 | info->curr_minor_hash = fname->minor_hash; |
| 560 | } else { | 531 | } else { |
| 561 | if (info->next_hash == ~0) { | 532 | if (info->next_hash == ~0) { |
| 562 | filp->f_pos = ext3_get_htree_eof(filp); | 533 | ctx->pos = ext3_get_htree_eof(file); |
| 563 | break; | 534 | break; |
| 564 | } | 535 | } |
| 565 | info->curr_hash = info->next_hash; | 536 | info->curr_hash = info->next_hash; |
| @@ -567,7 +538,7 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 567 | } | 538 | } |
| 568 | } | 539 | } |
| 569 | finished: | 540 | finished: |
| 570 | info->last_pos = filp->f_pos; | 541 | info->last_pos = ctx->pos; |
| 571 | return 0; | 542 | return 0; |
| 572 | } | 543 | } |
| 573 | 544 | ||
| @@ -582,7 +553,7 @@ static int ext3_release_dir (struct inode * inode, struct file * filp) | |||
| 582 | const struct file_operations ext3_dir_operations = { | 553 | const struct file_operations ext3_dir_operations = { |
| 583 | .llseek = ext3_dir_llseek, | 554 | .llseek = ext3_dir_llseek, |
| 584 | .read = generic_read_dir, | 555 | .read = generic_read_dir, |
| 585 | .readdir = ext3_readdir, | 556 | .iterate = ext3_readdir, |
| 586 | .unlocked_ioctl = ext3_ioctl, | 557 | .unlocked_ioctl = ext3_ioctl, |
| 587 | #ifdef CONFIG_COMPAT | 558 | #ifdef CONFIG_COMPAT |
| 588 | .compat_ioctl = ext3_compat_ioctl, | 559 | .compat_ioctl = ext3_compat_ioctl, |
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 23c712825640..f67668f724ba 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -1825,19 +1825,20 @@ ext3_readpages(struct file *file, struct address_space *mapping, | |||
| 1825 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); | 1825 | return mpage_readpages(mapping, pages, nr_pages, ext3_get_block); |
| 1826 | } | 1826 | } |
| 1827 | 1827 | ||
| 1828 | static void ext3_invalidatepage(struct page *page, unsigned long offset) | 1828 | static void ext3_invalidatepage(struct page *page, unsigned int offset, |
| 1829 | unsigned int length) | ||
| 1829 | { | 1830 | { |
| 1830 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); | 1831 | journal_t *journal = EXT3_JOURNAL(page->mapping->host); |
| 1831 | 1832 | ||
| 1832 | trace_ext3_invalidatepage(page, offset); | 1833 | trace_ext3_invalidatepage(page, offset, length); |
| 1833 | 1834 | ||
| 1834 | /* | 1835 | /* |
| 1835 | * If it's a full truncate we just forget about the pending dirtying | 1836 | * If it's a full truncate we just forget about the pending dirtying |
| 1836 | */ | 1837 | */ |
| 1837 | if (offset == 0) | 1838 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
| 1838 | ClearPageChecked(page); | 1839 | ClearPageChecked(page); |
| 1839 | 1840 | ||
| 1840 | journal_invalidatepage(journal, page, offset); | 1841 | journal_invalidatepage(journal, page, offset, length); |
| 1841 | } | 1842 | } |
| 1842 | 1843 | ||
| 1843 | static int ext3_releasepage(struct page *page, gfp_t wait) | 1844 | static int ext3_releasepage(struct page *page, gfp_t wait) |
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 692de13e3596..cea8ecf3e76e 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c | |||
| @@ -576,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
| 576 | if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, | 576 | if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, |
| 577 | (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) | 577 | (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) |
| 578 | +((char *)de - bh->b_data))) { | 578 | +((char *)de - bh->b_data))) { |
| 579 | /* On error, skip the f_pos to the next block. */ | 579 | /* silently ignore the rest of the block */ |
| 580 | dir_file->f_pos = (dir_file->f_pos | | 580 | break; |
| 581 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
| 582 | brelse (bh); | ||
| 583 | return count; | ||
| 584 | } | 581 | } |
| 585 | ext3fs_dirhash(de->name, de->name_len, hinfo); | 582 | ext3fs_dirhash(de->name, de->name_len, hinfo); |
| 586 | if ((hinfo->hash < start_hash) || | 583 | if ((hinfo->hash < start_hash) || |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..58339393fa6e 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -682,11 +682,15 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb) | |||
| 682 | 682 | ||
| 683 | static inline int test_root(ext4_group_t a, int b) | 683 | static inline int test_root(ext4_group_t a, int b) |
| 684 | { | 684 | { |
| 685 | int num = b; | 685 | while (1) { |
| 686 | 686 | if (a < b) | |
| 687 | while (a > num) | 687 | return 0; |
| 688 | num *= b; | 688 | if (a == b) |
| 689 | return num == a; | 689 | return 1; |
| 690 | if ((a % b) != 0) | ||
| 691 | return 0; | ||
| 692 | a = a / b; | ||
| 693 | } | ||
| 690 | } | 694 | } |
| 691 | 695 | ||
| 692 | static int ext4_group_sparse(ext4_group_t group) | 696 | static int ext4_group_sparse(ext4_group_t group) |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index f8d56e4254e0..3c7d288ae94c 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -29,8 +29,7 @@ | |||
| 29 | #include "ext4.h" | 29 | #include "ext4.h" |
| 30 | #include "xattr.h" | 30 | #include "xattr.h" |
| 31 | 31 | ||
| 32 | static int ext4_dx_readdir(struct file *filp, | 32 | static int ext4_dx_readdir(struct file *, struct dir_context *); |
| 33 | void *dirent, filldir_t filldir); | ||
| 34 | 33 | ||
| 35 | /** | 34 | /** |
| 36 | * Check if the given dir-inode refers to an htree-indexed directory | 35 | * Check if the given dir-inode refers to an htree-indexed directory |
| @@ -103,60 +102,56 @@ int __ext4_check_dir_entry(const char *function, unsigned int line, | |||
| 103 | return 1; | 102 | return 1; |
| 104 | } | 103 | } |
| 105 | 104 | ||
| 106 | static int ext4_readdir(struct file *filp, | 105 | static int ext4_readdir(struct file *file, struct dir_context *ctx) |
| 107 | void *dirent, filldir_t filldir) | ||
| 108 | { | 106 | { |
| 109 | int error = 0; | ||
| 110 | unsigned int offset; | 107 | unsigned int offset; |
| 111 | int i, stored; | 108 | int i, stored; |
| 112 | struct ext4_dir_entry_2 *de; | 109 | struct ext4_dir_entry_2 *de; |
| 113 | int err; | 110 | int err; |
| 114 | struct inode *inode = file_inode(filp); | 111 | struct inode *inode = file_inode(file); |
| 115 | struct super_block *sb = inode->i_sb; | 112 | struct super_block *sb = inode->i_sb; |
| 116 | int ret = 0; | ||
| 117 | int dir_has_error = 0; | 113 | int dir_has_error = 0; |
| 118 | 114 | ||
| 119 | if (is_dx_dir(inode)) { | 115 | if (is_dx_dir(inode)) { |
| 120 | err = ext4_dx_readdir(filp, dirent, filldir); | 116 | err = ext4_dx_readdir(file, ctx); |
| 121 | if (err != ERR_BAD_DX_DIR) { | 117 | if (err != ERR_BAD_DX_DIR) { |
| 122 | ret = err; | 118 | return err; |
| 123 | goto out; | ||
| 124 | } | 119 | } |
| 125 | /* | 120 | /* |
| 126 | * We don't set the inode dirty flag since it's not | 121 | * We don't set the inode dirty flag since it's not |
| 127 | * critical that it get flushed back to the disk. | 122 | * critical that it get flushed back to the disk. |
| 128 | */ | 123 | */ |
| 129 | ext4_clear_inode_flag(file_inode(filp), | 124 | ext4_clear_inode_flag(file_inode(file), |
| 130 | EXT4_INODE_INDEX); | 125 | EXT4_INODE_INDEX); |
| 131 | } | 126 | } |
| 132 | 127 | ||
| 133 | if (ext4_has_inline_data(inode)) { | 128 | if (ext4_has_inline_data(inode)) { |
| 134 | int has_inline_data = 1; | 129 | int has_inline_data = 1; |
| 135 | ret = ext4_read_inline_dir(filp, dirent, filldir, | 130 | int ret = ext4_read_inline_dir(file, ctx, |
| 136 | &has_inline_data); | 131 | &has_inline_data); |
| 137 | if (has_inline_data) | 132 | if (has_inline_data) |
| 138 | return ret; | 133 | return ret; |
| 139 | } | 134 | } |
| 140 | 135 | ||
| 141 | stored = 0; | 136 | stored = 0; |
| 142 | offset = filp->f_pos & (sb->s_blocksize - 1); | 137 | offset = ctx->pos & (sb->s_blocksize - 1); |
| 143 | 138 | ||
| 144 | while (!error && !stored && filp->f_pos < inode->i_size) { | 139 | while (ctx->pos < inode->i_size) { |
| 145 | struct ext4_map_blocks map; | 140 | struct ext4_map_blocks map; |
| 146 | struct buffer_head *bh = NULL; | 141 | struct buffer_head *bh = NULL; |
| 147 | 142 | ||
| 148 | map.m_lblk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb); | 143 | map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb); |
| 149 | map.m_len = 1; | 144 | map.m_len = 1; |
| 150 | err = ext4_map_blocks(NULL, inode, &map, 0); | 145 | err = ext4_map_blocks(NULL, inode, &map, 0); |
| 151 | if (err > 0) { | 146 | if (err > 0) { |
| 152 | pgoff_t index = map.m_pblk >> | 147 | pgoff_t index = map.m_pblk >> |
| 153 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | 148 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| 154 | if (!ra_has_index(&filp->f_ra, index)) | 149 | if (!ra_has_index(&file->f_ra, index)) |
| 155 | page_cache_sync_readahead( | 150 | page_cache_sync_readahead( |
| 156 | sb->s_bdev->bd_inode->i_mapping, | 151 | sb->s_bdev->bd_inode->i_mapping, |
| 157 | &filp->f_ra, filp, | 152 | &file->f_ra, file, |
| 158 | index, 1); | 153 | index, 1); |
| 159 | filp->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; | 154 | file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; |
| 160 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); | 155 | bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); |
| 161 | } | 156 | } |
| 162 | 157 | ||
| @@ -166,16 +161,16 @@ static int ext4_readdir(struct file *filp, | |||
| 166 | */ | 161 | */ |
| 167 | if (!bh) { | 162 | if (!bh) { |
| 168 | if (!dir_has_error) { | 163 | if (!dir_has_error) { |
| 169 | EXT4_ERROR_FILE(filp, 0, | 164 | EXT4_ERROR_FILE(file, 0, |
| 170 | "directory contains a " | 165 | "directory contains a " |
| 171 | "hole at offset %llu", | 166 | "hole at offset %llu", |
| 172 | (unsigned long long) filp->f_pos); | 167 | (unsigned long long) ctx->pos); |
| 173 | dir_has_error = 1; | 168 | dir_has_error = 1; |
| 174 | } | 169 | } |
| 175 | /* corrupt size? Maybe no more blocks to read */ | 170 | /* corrupt size? Maybe no more blocks to read */ |
| 176 | if (filp->f_pos > inode->i_blocks << 9) | 171 | if (ctx->pos > inode->i_blocks << 9) |
| 177 | break; | 172 | break; |
| 178 | filp->f_pos += sb->s_blocksize - offset; | 173 | ctx->pos += sb->s_blocksize - offset; |
| 179 | continue; | 174 | continue; |
| 180 | } | 175 | } |
| 181 | 176 | ||
| @@ -183,21 +178,20 @@ static int ext4_readdir(struct file *filp, | |||
| 183 | if (!buffer_verified(bh) && | 178 | if (!buffer_verified(bh) && |
| 184 | !ext4_dirent_csum_verify(inode, | 179 | !ext4_dirent_csum_verify(inode, |
| 185 | (struct ext4_dir_entry *)bh->b_data)) { | 180 | (struct ext4_dir_entry *)bh->b_data)) { |
| 186 | EXT4_ERROR_FILE(filp, 0, "directory fails checksum " | 181 | EXT4_ERROR_FILE(file, 0, "directory fails checksum " |
| 187 | "at offset %llu", | 182 | "at offset %llu", |
| 188 | (unsigned long long)filp->f_pos); | 183 | (unsigned long long)ctx->pos); |
| 189 | filp->f_pos += sb->s_blocksize - offset; | 184 | ctx->pos += sb->s_blocksize - offset; |
| 190 | brelse(bh); | 185 | brelse(bh); |
| 191 | continue; | 186 | continue; |
| 192 | } | 187 | } |
| 193 | set_buffer_verified(bh); | 188 | set_buffer_verified(bh); |
| 194 | 189 | ||
| 195 | revalidate: | ||
| 196 | /* If the dir block has changed since the last call to | 190 | /* If the dir block has changed since the last call to |
| 197 | * readdir(2), then we might be pointing to an invalid | 191 | * readdir(2), then we might be pointing to an invalid |
| 198 | * dirent right now. Scan from the start of the block | 192 | * dirent right now. Scan from the start of the block |
| 199 | * to make sure. */ | 193 | * to make sure. */ |
| 200 | if (filp->f_version != inode->i_version) { | 194 | if (file->f_version != inode->i_version) { |
| 201 | for (i = 0; i < sb->s_blocksize && i < offset; ) { | 195 | for (i = 0; i < sb->s_blocksize && i < offset; ) { |
| 202 | de = (struct ext4_dir_entry_2 *) | 196 | de = (struct ext4_dir_entry_2 *) |
| 203 | (bh->b_data + i); | 197 | (bh->b_data + i); |
| @@ -214,57 +208,46 @@ revalidate: | |||
| 214 | sb->s_blocksize); | 208 | sb->s_blocksize); |
| 215 | } | 209 | } |
| 216 | offset = i; | 210 | offset = i; |
| 217 | filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1)) | 211 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
| 218 | | offset; | 212 | | offset; |
| 219 | filp->f_version = inode->i_version; | 213 | file->f_version = inode->i_version; |
| 220 | } | 214 | } |
| 221 | 215 | ||
| 222 | while (!error && filp->f_pos < inode->i_size | 216 | while (ctx->pos < inode->i_size |
| 223 | && offset < sb->s_blocksize) { | 217 | && offset < sb->s_blocksize) { |
| 224 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 218 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
| 225 | if (ext4_check_dir_entry(inode, filp, de, bh, | 219 | if (ext4_check_dir_entry(inode, file, de, bh, |
| 226 | bh->b_data, bh->b_size, | 220 | bh->b_data, bh->b_size, |
| 227 | offset)) { | 221 | offset)) { |
| 228 | /* | 222 | /* |
| 229 | * On error, skip the f_pos to the next block | 223 | * On error, skip to the next block |
| 230 | */ | 224 | */ |
| 231 | filp->f_pos = (filp->f_pos | | 225 | ctx->pos = (ctx->pos | |
| 232 | (sb->s_blocksize - 1)) + 1; | 226 | (sb->s_blocksize - 1)) + 1; |
| 233 | brelse(bh); | 227 | break; |
| 234 | ret = stored; | ||
| 235 | goto out; | ||
| 236 | } | 228 | } |
| 237 | offset += ext4_rec_len_from_disk(de->rec_len, | 229 | offset += ext4_rec_len_from_disk(de->rec_len, |
| 238 | sb->s_blocksize); | 230 | sb->s_blocksize); |
| 239 | if (le32_to_cpu(de->inode)) { | 231 | if (le32_to_cpu(de->inode)) { |
| 240 | /* We might block in the next section | 232 | if (!dir_emit(ctx, de->name, |
| 241 | * if the data destination is | ||
| 242 | * currently swapped out. So, use a | ||
| 243 | * version stamp to detect whether or | ||
| 244 | * not the directory has been modified | ||
| 245 | * during the copy operation. | ||
| 246 | */ | ||
| 247 | u64 version = filp->f_version; | ||
| 248 | |||
| 249 | error = filldir(dirent, de->name, | ||
| 250 | de->name_len, | 233 | de->name_len, |
| 251 | filp->f_pos, | ||
| 252 | le32_to_cpu(de->inode), | 234 | le32_to_cpu(de->inode), |
| 253 | get_dtype(sb, de->file_type)); | 235 | get_dtype(sb, de->file_type))) { |
| 254 | if (error) | 236 | brelse(bh); |
| 255 | break; | 237 | return 0; |
| 256 | if (version != filp->f_version) | 238 | } |
| 257 | goto revalidate; | ||
| 258 | stored++; | ||
| 259 | } | 239 | } |
| 260 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | 240 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, |
| 261 | sb->s_blocksize); | 241 | sb->s_blocksize); |
| 262 | } | 242 | } |
| 263 | offset = 0; | 243 | offset = 0; |
| 264 | brelse(bh); | 244 | brelse(bh); |
| 245 | if (ctx->pos < inode->i_size) { | ||
| 246 | if (!dir_relax(inode)) | ||
| 247 | return 0; | ||
| 248 | } | ||
| 265 | } | 249 | } |
| 266 | out: | 250 | return 0; |
| 267 | return ret; | ||
| 268 | } | 251 | } |
| 269 | 252 | ||
| 270 | static inline int is_32bit_api(void) | 253 | static inline int is_32bit_api(void) |
| @@ -492,16 +475,12 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
| 492 | * for all entres on the fname linked list. (Normally there is only | 475 | * for all entres on the fname linked list. (Normally there is only |
| 493 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 476 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
| 494 | */ | 477 | */ |
| 495 | static int call_filldir(struct file *filp, void *dirent, | 478 | static int call_filldir(struct file *file, struct dir_context *ctx, |
| 496 | filldir_t filldir, struct fname *fname) | 479 | struct fname *fname) |
| 497 | { | 480 | { |
| 498 | struct dir_private_info *info = filp->private_data; | 481 | struct dir_private_info *info = file->private_data; |
| 499 | loff_t curr_pos; | 482 | struct inode *inode = file_inode(file); |
| 500 | struct inode *inode = file_inode(filp); | 483 | struct super_block *sb = inode->i_sb; |
| 501 | struct super_block *sb; | ||
| 502 | int error; | ||
| 503 | |||
| 504 | sb = inode->i_sb; | ||
| 505 | 484 | ||
| 506 | if (!fname) { | 485 | if (!fname) { |
| 507 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " | 486 | ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: " |
| @@ -509,47 +488,44 @@ static int call_filldir(struct file *filp, void *dirent, | |||
| 509 | inode->i_ino, current->comm); | 488 | inode->i_ino, current->comm); |
| 510 | return 0; | 489 | return 0; |
| 511 | } | 490 | } |
| 512 | curr_pos = hash2pos(filp, fname->hash, fname->minor_hash); | 491 | ctx->pos = hash2pos(file, fname->hash, fname->minor_hash); |
| 513 | while (fname) { | 492 | while (fname) { |
| 514 | error = filldir(dirent, fname->name, | 493 | if (!dir_emit(ctx, fname->name, |
| 515 | fname->name_len, curr_pos, | 494 | fname->name_len, |
| 516 | fname->inode, | 495 | fname->inode, |
| 517 | get_dtype(sb, fname->file_type)); | 496 | get_dtype(sb, fname->file_type))) { |
| 518 | if (error) { | ||
| 519 | filp->f_pos = curr_pos; | ||
| 520 | info->extra_fname = fname; | 497 | info->extra_fname = fname; |
| 521 | return error; | 498 | return 1; |
| 522 | } | 499 | } |
| 523 | fname = fname->next; | 500 | fname = fname->next; |
| 524 | } | 501 | } |
| 525 | return 0; | 502 | return 0; |
| 526 | } | 503 | } |
| 527 | 504 | ||
| 528 | static int ext4_dx_readdir(struct file *filp, | 505 | static int ext4_dx_readdir(struct file *file, struct dir_context *ctx) |
| 529 | void *dirent, filldir_t filldir) | ||
| 530 | { | 506 | { |
| 531 | struct dir_private_info *info = filp->private_data; | 507 | struct dir_private_info *info = file->private_data; |
| 532 | struct inode *inode = file_inode(filp); | 508 | struct inode *inode = file_inode(file); |
| 533 | struct fname *fname; | 509 | struct fname *fname; |
| 534 | int ret; | 510 | int ret; |
| 535 | 511 | ||
| 536 | if (!info) { | 512 | if (!info) { |
| 537 | info = ext4_htree_create_dir_info(filp, filp->f_pos); | 513 | info = ext4_htree_create_dir_info(file, ctx->pos); |
| 538 | if (!info) | 514 | if (!info) |
| 539 | return -ENOMEM; | 515 | return -ENOMEM; |
| 540 | filp->private_data = info; | 516 | file->private_data = info; |
| 541 | } | 517 | } |
| 542 | 518 | ||
| 543 | if (filp->f_pos == ext4_get_htree_eof(filp)) | 519 | if (ctx->pos == ext4_get_htree_eof(file)) |
| 544 | return 0; /* EOF */ | 520 | return 0; /* EOF */ |
| 545 | 521 | ||
| 546 | /* Some one has messed with f_pos; reset the world */ | 522 | /* Some one has messed with f_pos; reset the world */ |
| 547 | if (info->last_pos != filp->f_pos) { | 523 | if (info->last_pos != ctx->pos) { |
| 548 | free_rb_tree_fname(&info->root); | 524 | free_rb_tree_fname(&info->root); |
| 549 | info->curr_node = NULL; | 525 | info->curr_node = NULL; |
| 550 | info->extra_fname = NULL; | 526 | info->extra_fname = NULL; |
| 551 | info->curr_hash = pos2maj_hash(filp, filp->f_pos); | 527 | info->curr_hash = pos2maj_hash(file, ctx->pos); |
| 552 | info->curr_minor_hash = pos2min_hash(filp, filp->f_pos); | 528 | info->curr_minor_hash = pos2min_hash(file, ctx->pos); |
| 553 | } | 529 | } |
| 554 | 530 | ||
| 555 | /* | 531 | /* |
| @@ -557,7 +533,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
| 557 | * chain, return them first. | 533 | * chain, return them first. |
| 558 | */ | 534 | */ |
| 559 | if (info->extra_fname) { | 535 | if (info->extra_fname) { |
| 560 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) | 536 | if (call_filldir(file, ctx, info->extra_fname)) |
| 561 | goto finished; | 537 | goto finished; |
| 562 | info->extra_fname = NULL; | 538 | info->extra_fname = NULL; |
| 563 | goto next_node; | 539 | goto next_node; |
| @@ -571,17 +547,17 @@ static int ext4_dx_readdir(struct file *filp, | |||
| 571 | * cached entries. | 547 | * cached entries. |
| 572 | */ | 548 | */ |
| 573 | if ((!info->curr_node) || | 549 | if ((!info->curr_node) || |
| 574 | (filp->f_version != inode->i_version)) { | 550 | (file->f_version != inode->i_version)) { |
| 575 | info->curr_node = NULL; | 551 | info->curr_node = NULL; |
| 576 | free_rb_tree_fname(&info->root); | 552 | free_rb_tree_fname(&info->root); |
| 577 | filp->f_version = inode->i_version; | 553 | file->f_version = inode->i_version; |
| 578 | ret = ext4_htree_fill_tree(filp, info->curr_hash, | 554 | ret = ext4_htree_fill_tree(file, info->curr_hash, |
| 579 | info->curr_minor_hash, | 555 | info->curr_minor_hash, |
| 580 | &info->next_hash); | 556 | &info->next_hash); |
| 581 | if (ret < 0) | 557 | if (ret < 0) |
| 582 | return ret; | 558 | return ret; |
| 583 | if (ret == 0) { | 559 | if (ret == 0) { |
| 584 | filp->f_pos = ext4_get_htree_eof(filp); | 560 | ctx->pos = ext4_get_htree_eof(file); |
| 585 | break; | 561 | break; |
| 586 | } | 562 | } |
| 587 | info->curr_node = rb_first(&info->root); | 563 | info->curr_node = rb_first(&info->root); |
| @@ -590,7 +566,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
| 590 | fname = rb_entry(info->curr_node, struct fname, rb_hash); | 566 | fname = rb_entry(info->curr_node, struct fname, rb_hash); |
| 591 | info->curr_hash = fname->hash; | 567 | info->curr_hash = fname->hash; |
| 592 | info->curr_minor_hash = fname->minor_hash; | 568 | info->curr_minor_hash = fname->minor_hash; |
| 593 | if (call_filldir(filp, dirent, filldir, fname)) | 569 | if (call_filldir(file, ctx, fname)) |
| 594 | break; | 570 | break; |
| 595 | next_node: | 571 | next_node: |
| 596 | info->curr_node = rb_next(info->curr_node); | 572 | info->curr_node = rb_next(info->curr_node); |
| @@ -601,7 +577,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
| 601 | info->curr_minor_hash = fname->minor_hash; | 577 | info->curr_minor_hash = fname->minor_hash; |
| 602 | } else { | 578 | } else { |
| 603 | if (info->next_hash == ~0) { | 579 | if (info->next_hash == ~0) { |
| 604 | filp->f_pos = ext4_get_htree_eof(filp); | 580 | ctx->pos = ext4_get_htree_eof(file); |
| 605 | break; | 581 | break; |
| 606 | } | 582 | } |
| 607 | info->curr_hash = info->next_hash; | 583 | info->curr_hash = info->next_hash; |
| @@ -609,7 +585,7 @@ static int ext4_dx_readdir(struct file *filp, | |||
| 609 | } | 585 | } |
| 610 | } | 586 | } |
| 611 | finished: | 587 | finished: |
| 612 | info->last_pos = filp->f_pos; | 588 | info->last_pos = ctx->pos; |
| 613 | return 0; | 589 | return 0; |
| 614 | } | 590 | } |
| 615 | 591 | ||
| @@ -624,7 +600,7 @@ static int ext4_release_dir(struct inode *inode, struct file *filp) | |||
| 624 | const struct file_operations ext4_dir_operations = { | 600 | const struct file_operations ext4_dir_operations = { |
| 625 | .llseek = ext4_dir_llseek, | 601 | .llseek = ext4_dir_llseek, |
| 626 | .read = generic_read_dir, | 602 | .read = generic_read_dir, |
| 627 | .readdir = ext4_readdir, | 603 | .iterate = ext4_readdir, |
| 628 | .unlocked_ioctl = ext4_ioctl, | 604 | .unlocked_ioctl = ext4_ioctl, |
| 629 | #ifdef CONFIG_COMPAT | 605 | #ifdef CONFIG_COMPAT |
| 630 | .compat_ioctl = ext4_compat_ioctl, | 606 | .compat_ioctl = ext4_compat_ioctl, |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..b577e45425b0 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -177,38 +177,28 @@ struct ext4_map_blocks { | |||
| 177 | }; | 177 | }; |
| 178 | 178 | ||
| 179 | /* | 179 | /* |
| 180 | * For delayed allocation tracking | ||
| 181 | */ | ||
| 182 | struct mpage_da_data { | ||
| 183 | struct inode *inode; | ||
| 184 | sector_t b_blocknr; /* start block number of extent */ | ||
| 185 | size_t b_size; /* size of extent */ | ||
| 186 | unsigned long b_state; /* state of the extent */ | ||
| 187 | unsigned long first_page, next_page; /* extent of pages */ | ||
| 188 | struct writeback_control *wbc; | ||
| 189 | int io_done; | ||
| 190 | int pages_written; | ||
| 191 | int retval; | ||
| 192 | }; | ||
| 193 | |||
| 194 | /* | ||
| 195 | * Flags for ext4_io_end->flags | 180 | * Flags for ext4_io_end->flags |
| 196 | */ | 181 | */ |
| 197 | #define EXT4_IO_END_UNWRITTEN 0x0001 | 182 | #define EXT4_IO_END_UNWRITTEN 0x0001 |
| 198 | #define EXT4_IO_END_ERROR 0x0002 | 183 | #define EXT4_IO_END_DIRECT 0x0002 |
| 199 | #define EXT4_IO_END_DIRECT 0x0004 | ||
| 200 | 184 | ||
| 201 | /* | 185 | /* |
| 202 | * For converting uninitialized extents on a work queue. | 186 | * For converting uninitialized extents on a work queue. 'handle' is used for |
| 187 | * buffered writeback. | ||
| 203 | */ | 188 | */ |
| 204 | typedef struct ext4_io_end { | 189 | typedef struct ext4_io_end { |
| 205 | struct list_head list; /* per-file finished IO list */ | 190 | struct list_head list; /* per-file finished IO list */ |
| 191 | handle_t *handle; /* handle reserved for extent | ||
| 192 | * conversion */ | ||
| 206 | struct inode *inode; /* file being written to */ | 193 | struct inode *inode; /* file being written to */ |
| 194 | struct bio *bio; /* Linked list of completed | ||
| 195 | * bios covering the extent */ | ||
| 207 | unsigned int flag; /* unwritten or not */ | 196 | unsigned int flag; /* unwritten or not */ |
| 208 | loff_t offset; /* offset in the file */ | 197 | loff_t offset; /* offset in the file */ |
| 209 | ssize_t size; /* size of the extent */ | 198 | ssize_t size; /* size of the extent */ |
| 210 | struct kiocb *iocb; /* iocb struct for AIO */ | 199 | struct kiocb *iocb; /* iocb struct for AIO */ |
| 211 | int result; /* error value for AIO */ | 200 | int result; /* error value for AIO */ |
| 201 | atomic_t count; /* reference counter */ | ||
| 212 | } ext4_io_end_t; | 202 | } ext4_io_end_t; |
| 213 | 203 | ||
| 214 | struct ext4_io_submit { | 204 | struct ext4_io_submit { |
| @@ -581,11 +571,6 @@ enum { | |||
| 581 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 | 571 | #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 |
| 582 | 572 | ||
| 583 | /* | 573 | /* |
| 584 | * Flags used by ext4_discard_partial_page_buffers | ||
| 585 | */ | ||
| 586 | #define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 | ||
| 587 | |||
| 588 | /* | ||
| 589 | * ioctl commands | 574 | * ioctl commands |
| 590 | */ | 575 | */ |
| 591 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS | 576 | #define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS |
| @@ -879,6 +864,7 @@ struct ext4_inode_info { | |||
| 879 | rwlock_t i_es_lock; | 864 | rwlock_t i_es_lock; |
| 880 | struct list_head i_es_lru; | 865 | struct list_head i_es_lru; |
| 881 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ | 866 | unsigned int i_es_lru_nr; /* protected by i_es_lock */ |
| 867 | unsigned long i_touch_when; /* jiffies of last accessing */ | ||
| 882 | 868 | ||
| 883 | /* ialloc */ | 869 | /* ialloc */ |
| 884 | ext4_group_t i_last_alloc_group; | 870 | ext4_group_t i_last_alloc_group; |
| @@ -903,12 +889,22 @@ struct ext4_inode_info { | |||
| 903 | qsize_t i_reserved_quota; | 889 | qsize_t i_reserved_quota; |
| 904 | #endif | 890 | #endif |
| 905 | 891 | ||
| 906 | /* completed IOs that might need unwritten extents handling */ | 892 | /* Lock protecting lists below */ |
| 907 | struct list_head i_completed_io_list; | ||
| 908 | spinlock_t i_completed_io_lock; | 893 | spinlock_t i_completed_io_lock; |
| 894 | /* | ||
| 895 | * Completed IOs that need unwritten extents handling and have | ||
| 896 | * transaction reserved | ||
| 897 | */ | ||
| 898 | struct list_head i_rsv_conversion_list; | ||
| 899 | /* | ||
| 900 | * Completed IOs that need unwritten extents handling and don't have | ||
| 901 | * transaction reserved | ||
| 902 | */ | ||
| 903 | struct list_head i_unrsv_conversion_list; | ||
| 909 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ | 904 | atomic_t i_ioend_count; /* Number of outstanding io_end structs */ |
| 910 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ | 905 | atomic_t i_unwritten; /* Nr. of inflight conversions pending */ |
| 911 | struct work_struct i_unwritten_work; /* deferred extent conversion */ | 906 | struct work_struct i_rsv_conversion_work; |
| 907 | struct work_struct i_unrsv_conversion_work; | ||
| 912 | 908 | ||
| 913 | spinlock_t i_block_reservation_lock; | 909 | spinlock_t i_block_reservation_lock; |
| 914 | 910 | ||
| @@ -1245,7 +1241,6 @@ struct ext4_sb_info { | |||
| 1245 | unsigned int s_mb_stats; | 1241 | unsigned int s_mb_stats; |
| 1246 | unsigned int s_mb_order2_reqs; | 1242 | unsigned int s_mb_order2_reqs; |
| 1247 | unsigned int s_mb_group_prealloc; | 1243 | unsigned int s_mb_group_prealloc; |
| 1248 | unsigned int s_max_writeback_mb_bump; | ||
| 1249 | unsigned int s_max_dir_size_kb; | 1244 | unsigned int s_max_dir_size_kb; |
| 1250 | /* where last allocation was done - for stream allocation */ | 1245 | /* where last allocation was done - for stream allocation */ |
| 1251 | unsigned long s_mb_last_group; | 1246 | unsigned long s_mb_last_group; |
| @@ -1281,8 +1276,10 @@ struct ext4_sb_info { | |||
| 1281 | struct flex_groups *s_flex_groups; | 1276 | struct flex_groups *s_flex_groups; |
| 1282 | ext4_group_t s_flex_groups_allocated; | 1277 | ext4_group_t s_flex_groups_allocated; |
| 1283 | 1278 | ||
| 1284 | /* workqueue for dio unwritten */ | 1279 | /* workqueue for unreserved extent convertions (dio) */ |
| 1285 | struct workqueue_struct *dio_unwritten_wq; | 1280 | struct workqueue_struct *unrsv_conversion_wq; |
| 1281 | /* workqueue for reserved extent conversions (buffered io) */ | ||
| 1282 | struct workqueue_struct *rsv_conversion_wq; | ||
| 1286 | 1283 | ||
| 1287 | /* timer for periodic error stats printing */ | 1284 | /* timer for periodic error stats printing */ |
| 1288 | struct timer_list s_err_report; | 1285 | struct timer_list s_err_report; |
| @@ -1307,6 +1304,7 @@ struct ext4_sb_info { | |||
| 1307 | /* Reclaim extents from extent status tree */ | 1304 | /* Reclaim extents from extent status tree */ |
| 1308 | struct shrinker s_es_shrinker; | 1305 | struct shrinker s_es_shrinker; |
| 1309 | struct list_head s_es_lru; | 1306 | struct list_head s_es_lru; |
| 1307 | unsigned long s_es_last_sorted; | ||
| 1310 | struct percpu_counter s_extent_cache_cnt; | 1308 | struct percpu_counter s_extent_cache_cnt; |
| 1311 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; | 1309 | spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; |
| 1312 | }; | 1310 | }; |
| @@ -1342,6 +1340,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, | |||
| 1342 | struct ext4_io_end *io_end) | 1340 | struct ext4_io_end *io_end) |
| 1343 | { | 1341 | { |
| 1344 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | 1342 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { |
| 1343 | /* Writeback has to have coversion transaction reserved */ | ||
| 1344 | WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && | ||
| 1345 | !(io_end->flag & EXT4_IO_END_DIRECT)); | ||
| 1345 | io_end->flag |= EXT4_IO_END_UNWRITTEN; | 1346 | io_end->flag |= EXT4_IO_END_UNWRITTEN; |
| 1346 | atomic_inc(&EXT4_I(inode)->i_unwritten); | 1347 | atomic_inc(&EXT4_I(inode)->i_unwritten); |
| 1347 | } | 1348 | } |
| @@ -1999,7 +2000,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) | |||
| 1999 | 2000 | ||
| 2000 | /* fsync.c */ | 2001 | /* fsync.c */ |
| 2001 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); | 2002 | extern int ext4_sync_file(struct file *, loff_t, loff_t, int); |
| 2002 | extern int ext4_flush_unwritten_io(struct inode *); | ||
| 2003 | 2003 | ||
| 2004 | /* hash.c */ | 2004 | /* hash.c */ |
| 2005 | extern int ext4fs_dirhash(const char *name, int len, struct | 2005 | extern int ext4fs_dirhash(const char *name, int len, struct |
| @@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); | |||
| 2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 2088 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
| 2089 | extern int ext4_can_truncate(struct inode *inode); | 2089 | extern int ext4_can_truncate(struct inode *inode); |
| 2090 | extern void ext4_truncate(struct inode *); | 2090 | extern void ext4_truncate(struct inode *); |
| 2091 | extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); | 2091 | extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); |
| 2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); | 2092 | extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); |
| 2093 | extern void ext4_set_inode_flags(struct inode *); | 2093 | extern void ext4_set_inode_flags(struct inode *); |
| 2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 2094 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| @@ -2096,9 +2096,12 @@ extern int ext4_alloc_da_blocks(struct inode *inode); | |||
| 2096 | extern void ext4_set_aops(struct inode *inode); | 2096 | extern void ext4_set_aops(struct inode *inode); |
| 2097 | extern int ext4_writepage_trans_blocks(struct inode *); | 2097 | extern int ext4_writepage_trans_blocks(struct inode *); |
| 2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); | 2098 | extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); |
| 2099 | extern int ext4_discard_partial_page_buffers(handle_t *handle, | 2099 | extern int ext4_block_truncate_page(handle_t *handle, |
| 2100 | struct address_space *mapping, loff_t from, | 2100 | struct address_space *mapping, loff_t from); |
| 2101 | loff_t length, int flags); | 2101 | extern int ext4_block_zero_page_range(handle_t *handle, |
| 2102 | struct address_space *mapping, loff_t from, loff_t length); | ||
| 2103 | extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, | ||
| 2104 | loff_t lstart, loff_t lend); | ||
| 2102 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); | 2105 | extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
| 2103 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); | 2106 | extern qsize_t *ext4_get_reserved_space(struct inode *inode); |
| 2104 | extern void ext4_da_update_reserve_space(struct inode *inode, | 2107 | extern void ext4_da_update_reserve_space(struct inode *inode, |
| @@ -2111,7 +2114,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
| 2111 | const struct iovec *iov, loff_t offset, | 2114 | const struct iovec *iov, loff_t offset, |
| 2112 | unsigned long nr_segs); | 2115 | unsigned long nr_segs); |
| 2113 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); | 2116 | extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); |
| 2114 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); | 2117 | extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); |
| 2115 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); | 2118 | extern void ext4_ind_truncate(handle_t *, struct inode *inode); |
| 2116 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, | 2119 | extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, |
| 2117 | ext4_lblk_t first, ext4_lblk_t stop); | 2120 | ext4_lblk_t first, ext4_lblk_t stop); |
| @@ -2166,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb, | |||
| 2166 | ext4_group_t ngroup); | 2169 | ext4_group_t ngroup); |
| 2167 | extern const char *ext4_decode_error(struct super_block *sb, int errno, | 2170 | extern const char *ext4_decode_error(struct super_block *sb, int errno, |
| 2168 | char nbuf[16]); | 2171 | char nbuf[16]); |
| 2172 | |||
| 2169 | extern __printf(4, 5) | 2173 | extern __printf(4, 5) |
| 2170 | void __ext4_error(struct super_block *, const char *, unsigned int, | 2174 | void __ext4_error(struct super_block *, const char *, unsigned int, |
| 2171 | const char *, ...); | 2175 | const char *, ...); |
| 2172 | #define ext4_error(sb, message...) __ext4_error(sb, __func__, \ | ||
| 2173 | __LINE__, ## message) | ||
| 2174 | extern __printf(5, 6) | 2176 | extern __printf(5, 6) |
| 2175 | void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, | 2177 | void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, |
| 2176 | const char *, ...); | 2178 | const char *, ...); |
| 2177 | extern __printf(5, 6) | 2179 | extern __printf(5, 6) |
| 2178 | void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, | 2180 | void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, |
| 2179 | const char *, ...); | 2181 | const char *, ...); |
| 2180 | extern void __ext4_std_error(struct super_block *, const char *, | 2182 | extern void __ext4_std_error(struct super_block *, const char *, |
| 2181 | unsigned int, int); | 2183 | unsigned int, int); |
| 2182 | extern __printf(4, 5) | 2184 | extern __printf(4, 5) |
| 2183 | void __ext4_abort(struct super_block *, const char *, unsigned int, | 2185 | void __ext4_abort(struct super_block *, const char *, unsigned int, |
| 2184 | const char *, ...); | 2186 | const char *, ...); |
| 2185 | #define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ | ||
| 2186 | __LINE__, ## message) | ||
| 2187 | extern __printf(4, 5) | 2187 | extern __printf(4, 5) |
| 2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, | 2188 | void __ext4_warning(struct super_block *, const char *, unsigned int, |
| 2189 | const char *, ...); | 2189 | const char *, ...); |
| 2190 | #define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ | ||
| 2191 | __LINE__, ## message) | ||
| 2192 | extern __printf(3, 4) | 2190 | extern __printf(3, 4) |
| 2193 | void ext4_msg(struct super_block *, const char *, const char *, ...); | 2191 | void __ext4_msg(struct super_block *, const char *, const char *, ...); |
| 2194 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, | 2192 | extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, |
| 2195 | const char *, unsigned int, const char *); | 2193 | const char *, unsigned int, const char *); |
| 2196 | #define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ | ||
| 2197 | __LINE__, msg) | ||
| 2198 | extern __printf(7, 8) | 2194 | extern __printf(7, 8) |
| 2199 | void __ext4_grp_locked_error(const char *, unsigned int, | 2195 | void __ext4_grp_locked_error(const char *, unsigned int, |
| 2200 | struct super_block *, ext4_group_t, | 2196 | struct super_block *, ext4_group_t, |
| 2201 | unsigned long, ext4_fsblk_t, | 2197 | unsigned long, ext4_fsblk_t, |
| 2202 | const char *, ...); | 2198 | const char *, ...); |
| 2203 | #define ext4_grp_locked_error(sb, grp, message...) \ | 2199 | |
| 2204 | __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) | 2200 | #ifdef CONFIG_PRINTK |
| 2201 | |||
| 2202 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
| 2203 | __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) | ||
| 2204 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
| 2205 | __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) | ||
| 2206 | #define ext4_error(sb, fmt, ...) \ | ||
| 2207 | __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
| 2208 | #define ext4_abort(sb, fmt, ...) \ | ||
| 2209 | __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
| 2210 | #define ext4_warning(sb, fmt, ...) \ | ||
| 2211 | __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) | ||
| 2212 | #define ext4_msg(sb, level, fmt, ...) \ | ||
| 2213 | __ext4_msg(sb, level, fmt, ##__VA_ARGS__) | ||
| 2214 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
| 2215 | __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) | ||
| 2216 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
| 2217 | __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ | ||
| 2218 | fmt, ##__VA_ARGS__) | ||
| 2219 | |||
| 2220 | #else | ||
| 2221 | |||
| 2222 | #define ext4_error_inode(inode, func, line, block, fmt, ...) \ | ||
| 2223 | do { \ | ||
| 2224 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2225 | __ext4_error_inode(inode, "", 0, block, " "); \ | ||
| 2226 | } while (0) | ||
| 2227 | #define ext4_error_file(file, func, line, block, fmt, ...) \ | ||
| 2228 | do { \ | ||
| 2229 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2230 | __ext4_error_file(file, "", 0, block, " "); \ | ||
| 2231 | } while (0) | ||
| 2232 | #define ext4_error(sb, fmt, ...) \ | ||
| 2233 | do { \ | ||
| 2234 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2235 | __ext4_error(sb, "", 0, " "); \ | ||
| 2236 | } while (0) | ||
| 2237 | #define ext4_abort(sb, fmt, ...) \ | ||
| 2238 | do { \ | ||
| 2239 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2240 | __ext4_abort(sb, "", 0, " "); \ | ||
| 2241 | } while (0) | ||
| 2242 | #define ext4_warning(sb, fmt, ...) \ | ||
| 2243 | do { \ | ||
| 2244 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2245 | __ext4_warning(sb, "", 0, " "); \ | ||
| 2246 | } while (0) | ||
| 2247 | #define ext4_msg(sb, level, fmt, ...) \ | ||
| 2248 | do { \ | ||
| 2249 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2250 | __ext4_msg(sb, "", " "); \ | ||
| 2251 | } while (0) | ||
| 2252 | #define dump_mmp_msg(sb, mmp, msg) \ | ||
| 2253 | __dump_mmp_msg(sb, mmp, "", 0, "") | ||
| 2254 | #define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ | ||
| 2255 | do { \ | ||
| 2256 | no_printk(fmt, ##__VA_ARGS__); \ | ||
| 2257 | __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ | ||
| 2258 | } while (0) | ||
| 2259 | |||
| 2260 | #endif | ||
| 2261 | |||
| 2205 | extern void ext4_update_dynamic_rev(struct super_block *sb); | 2262 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
| 2206 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 2263 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
| 2207 | __u32 compat); | 2264 | __u32 compat); |
| @@ -2312,6 +2369,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | |||
| 2312 | { | 2369 | { |
| 2313 | struct ext4_group_info ***grp_info; | 2370 | struct ext4_group_info ***grp_info; |
| 2314 | long indexv, indexh; | 2371 | long indexv, indexh; |
| 2372 | BUG_ON(group >= EXT4_SB(sb)->s_groups_count); | ||
| 2315 | grp_info = EXT4_SB(sb)->s_group_info; | 2373 | grp_info = EXT4_SB(sb)->s_group_info; |
| 2316 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); | 2374 | indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); |
| 2317 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); | 2375 | indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); |
| @@ -2515,7 +2573,7 @@ extern int ext4_try_create_inline_dir(handle_t *handle, | |||
| 2515 | struct inode *parent, | 2573 | struct inode *parent, |
| 2516 | struct inode *inode); | 2574 | struct inode *inode); |
| 2517 | extern int ext4_read_inline_dir(struct file *filp, | 2575 | extern int ext4_read_inline_dir(struct file *filp, |
| 2518 | void *dirent, filldir_t filldir, | 2576 | struct dir_context *ctx, |
| 2519 | int *has_inline_data); | 2577 | int *has_inline_data); |
| 2520 | extern int htree_inlinedir_to_tree(struct file *dir_file, | 2578 | extern int htree_inlinedir_to_tree(struct file *dir_file, |
| 2521 | struct inode *dir, ext4_lblk_t block, | 2579 | struct inode *dir, ext4_lblk_t block, |
| @@ -2598,8 +2656,7 @@ struct ext4_extent; | |||
| 2598 | 2656 | ||
| 2599 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); | 2657 | extern int ext4_ext_tree_init(handle_t *handle, struct inode *); |
| 2600 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); | 2658 | extern int ext4_ext_writepage_trans_blocks(struct inode *, int); |
| 2601 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, | 2659 | extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); |
| 2602 | int chunk); | ||
| 2603 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | 2660 | extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, |
| 2604 | struct ext4_map_blocks *map, int flags); | 2661 | struct ext4_map_blocks *map, int flags); |
| 2605 | extern void ext4_ext_truncate(handle_t *, struct inode *); | 2662 | extern void ext4_ext_truncate(handle_t *, struct inode *); |
| @@ -2609,8 +2666,8 @@ extern void ext4_ext_init(struct super_block *); | |||
| 2609 | extern void ext4_ext_release(struct super_block *); | 2666 | extern void ext4_ext_release(struct super_block *); |
| 2610 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, | 2667 | extern long ext4_fallocate(struct file *file, int mode, loff_t offset, |
| 2611 | loff_t len); | 2668 | loff_t len); |
| 2612 | extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 2669 | extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
| 2613 | ssize_t len); | 2670 | loff_t offset, ssize_t len); |
| 2614 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, | 2671 | extern int ext4_map_blocks(handle_t *handle, struct inode *inode, |
| 2615 | struct ext4_map_blocks *map, int flags); | 2672 | struct ext4_map_blocks *map, int flags); |
| 2616 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, | 2673 | extern int ext4_ext_calc_metadata_amount(struct inode *inode, |
| @@ -2650,12 +2707,15 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, | |||
| 2650 | 2707 | ||
| 2651 | /* page-io.c */ | 2708 | /* page-io.c */ |
| 2652 | extern int __init ext4_init_pageio(void); | 2709 | extern int __init ext4_init_pageio(void); |
| 2653 | extern void ext4_add_complete_io(ext4_io_end_t *io_end); | ||
| 2654 | extern void ext4_exit_pageio(void); | 2710 | extern void ext4_exit_pageio(void); |
| 2655 | extern void ext4_ioend_shutdown(struct inode *); | ||
| 2656 | extern void ext4_free_io_end(ext4_io_end_t *io); | ||
| 2657 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); | 2711 | extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); |
| 2658 | extern void ext4_end_io_work(struct work_struct *work); | 2712 | extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); |
| 2713 | extern int ext4_put_io_end(ext4_io_end_t *io_end); | ||
| 2714 | extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); | ||
| 2715 | extern void ext4_io_submit_init(struct ext4_io_submit *io, | ||
| 2716 | struct writeback_control *wbc); | ||
| 2717 | extern void ext4_end_io_rsv_work(struct work_struct *work); | ||
| 2718 | extern void ext4_end_io_unrsv_work(struct work_struct *work); | ||
| 2659 | extern void ext4_io_submit(struct ext4_io_submit *io); | 2719 | extern void ext4_io_submit(struct ext4_io_submit *io); |
| 2660 | extern int ext4_bio_write_page(struct ext4_io_submit *io, | 2720 | extern int ext4_bio_write_page(struct ext4_io_submit *io, |
| 2661 | struct page *page, | 2721 | struct page *page, |
| @@ -2668,20 +2728,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); | |||
| 2668 | extern int ext4_mmp_csum_verify(struct super_block *sb, | 2728 | extern int ext4_mmp_csum_verify(struct super_block *sb, |
| 2669 | struct mmp_struct *mmp); | 2729 | struct mmp_struct *mmp); |
| 2670 | 2730 | ||
| 2671 | /* BH_Uninit flag: blocks are allocated but uninitialized on disk */ | 2731 | /* |
| 2732 | * Note that these flags will never ever appear in a buffer_head's state flag. | ||
| 2733 | * See EXT4_MAP_... to see where this is used. | ||
| 2734 | */ | ||
| 2672 | enum ext4_state_bits { | 2735 | enum ext4_state_bits { |
| 2673 | BH_Uninit /* blocks are allocated but uninitialized on disk */ | 2736 | BH_Uninit /* blocks are allocated but uninitialized on disk */ |
| 2674 | = BH_JBDPrivateStart, | 2737 | = BH_JBDPrivateStart, |
| 2675 | BH_AllocFromCluster, /* allocated blocks were part of already | 2738 | BH_AllocFromCluster, /* allocated blocks were part of already |
| 2676 | * allocated cluster. Note that this flag will | 2739 | * allocated cluster. */ |
| 2677 | * never, ever appear in a buffer_head's state | ||
| 2678 | * flag. See EXT4_MAP_FROM_CLUSTER to see where | ||
| 2679 | * this is used. */ | ||
| 2680 | }; | 2740 | }; |
| 2681 | 2741 | ||
| 2682 | BUFFER_FNS(Uninit, uninit) | ||
| 2683 | TAS_BUFFER_FNS(Uninit, uninit) | ||
| 2684 | |||
| 2685 | /* | 2742 | /* |
| 2686 | * Add new method to test whether block and inode bitmaps are properly | 2743 | * Add new method to test whether block and inode bitmaps are properly |
| 2687 | * initialized. With uninit_bg reading the block from disk is not enough | 2744 | * initialized. With uninit_bg reading the block from disk is not enough |
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..72a3600aedbd 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c | |||
| @@ -38,31 +38,43 @@ static void ext4_put_nojournal(handle_t *handle) | |||
| 38 | /* | 38 | /* |
| 39 | * Wrappers for jbd2_journal_start/end. | 39 | * Wrappers for jbd2_journal_start/end. |
| 40 | */ | 40 | */ |
| 41 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 41 | static int ext4_journal_check_start(struct super_block *sb) |
| 42 | int type, int nblocks) | ||
| 43 | { | 42 | { |
| 44 | journal_t *journal; | 43 | journal_t *journal; |
| 45 | 44 | ||
| 46 | might_sleep(); | 45 | might_sleep(); |
| 47 | |||
| 48 | trace_ext4_journal_start(sb, nblocks, _RET_IP_); | ||
| 49 | if (sb->s_flags & MS_RDONLY) | 46 | if (sb->s_flags & MS_RDONLY) |
| 50 | return ERR_PTR(-EROFS); | 47 | return -EROFS; |
| 51 | |||
| 52 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); | 48 | WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE); |
| 53 | journal = EXT4_SB(sb)->s_journal; | 49 | journal = EXT4_SB(sb)->s_journal; |
| 54 | if (!journal) | ||
| 55 | return ext4_get_nojournal(); | ||
| 56 | /* | 50 | /* |
| 57 | * Special case here: if the journal has aborted behind our | 51 | * Special case here: if the journal has aborted behind our |
| 58 | * backs (eg. EIO in the commit thread), then we still need to | 52 | * backs (eg. EIO in the commit thread), then we still need to |
| 59 | * take the FS itself readonly cleanly. | 53 | * take the FS itself readonly cleanly. |
| 60 | */ | 54 | */ |
| 61 | if (is_journal_aborted(journal)) { | 55 | if (journal && is_journal_aborted(journal)) { |
| 62 | ext4_abort(sb, "Detected aborted journal"); | 56 | ext4_abort(sb, "Detected aborted journal"); |
| 63 | return ERR_PTR(-EROFS); | 57 | return -EROFS; |
| 64 | } | 58 | } |
| 65 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, type, line); | 59 | return 0; |
| 60 | } | ||
| 61 | |||
| 62 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | ||
| 63 | int type, int blocks, int rsv_blocks) | ||
| 64 | { | ||
| 65 | journal_t *journal; | ||
| 66 | int err; | ||
| 67 | |||
| 68 | trace_ext4_journal_start(sb, blocks, rsv_blocks, _RET_IP_); | ||
| 69 | err = ext4_journal_check_start(sb); | ||
| 70 | if (err < 0) | ||
| 71 | return ERR_PTR(err); | ||
| 72 | |||
| 73 | journal = EXT4_SB(sb)->s_journal; | ||
| 74 | if (!journal) | ||
| 75 | return ext4_get_nojournal(); | ||
| 76 | return jbd2__journal_start(journal, blocks, rsv_blocks, GFP_NOFS, | ||
| 77 | type, line); | ||
| 66 | } | 78 | } |
| 67 | 79 | ||
| 68 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | 80 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) |
| @@ -86,6 +98,30 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) | |||
| 86 | return err; | 98 | return err; |
| 87 | } | 99 | } |
| 88 | 100 | ||
| 101 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
| 102 | int type) | ||
| 103 | { | ||
| 104 | struct super_block *sb; | ||
| 105 | int err; | ||
| 106 | |||
| 107 | if (!ext4_handle_valid(handle)) | ||
| 108 | return ext4_get_nojournal(); | ||
| 109 | |||
| 110 | sb = handle->h_journal->j_private; | ||
| 111 | trace_ext4_journal_start_reserved(sb, handle->h_buffer_credits, | ||
| 112 | _RET_IP_); | ||
| 113 | err = ext4_journal_check_start(sb); | ||
| 114 | if (err < 0) { | ||
| 115 | jbd2_journal_free_reserved(handle); | ||
| 116 | return ERR_PTR(err); | ||
| 117 | } | ||
| 118 | |||
| 119 | err = jbd2_journal_start_reserved(handle, type, line); | ||
| 120 | if (err < 0) | ||
| 121 | return ERR_PTR(err); | ||
| 122 | return handle; | ||
| 123 | } | ||
| 124 | |||
| 89 | void ext4_journal_abort_handle(const char *caller, unsigned int line, | 125 | void ext4_journal_abort_handle(const char *caller, unsigned int line, |
| 90 | const char *err_fn, struct buffer_head *bh, | 126 | const char *err_fn, struct buffer_head *bh, |
| 91 | handle_t *handle, int err) | 127 | handle_t *handle, int err) |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index c8c6885406db..2877258d9497 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
| @@ -134,7 +134,8 @@ static inline int ext4_jbd2_credits_xattr(struct inode *inode) | |||
| 134 | #define EXT4_HT_MIGRATE 8 | 134 | #define EXT4_HT_MIGRATE 8 |
| 135 | #define EXT4_HT_MOVE_EXTENTS 9 | 135 | #define EXT4_HT_MOVE_EXTENTS 9 |
| 136 | #define EXT4_HT_XATTR 10 | 136 | #define EXT4_HT_XATTR 10 |
| 137 | #define EXT4_HT_MAX 11 | 137 | #define EXT4_HT_EXT_CONVERT 11 |
| 138 | #define EXT4_HT_MAX 12 | ||
| 138 | 139 | ||
| 139 | /** | 140 | /** |
| 140 | * struct ext4_journal_cb_entry - Base structure for callback information. | 141 | * struct ext4_journal_cb_entry - Base structure for callback information. |
| @@ -265,7 +266,7 @@ int __ext4_handle_dirty_super(const char *where, unsigned int line, | |||
| 265 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) | 266 | __ext4_handle_dirty_super(__func__, __LINE__, (handle), (sb)) |
| 266 | 267 | ||
| 267 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, | 268 | handle_t *__ext4_journal_start_sb(struct super_block *sb, unsigned int line, |
| 268 | int type, int nblocks); | 269 | int type, int blocks, int rsv_blocks); |
| 269 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); | 270 | int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle); |
| 270 | 271 | ||
| 271 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) | 272 | #define EXT4_NOJOURNAL_MAX_REF_COUNT ((unsigned long) 4096) |
| @@ -300,21 +301,37 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
| 300 | } | 301 | } |
| 301 | 302 | ||
| 302 | #define ext4_journal_start_sb(sb, type, nblocks) \ | 303 | #define ext4_journal_start_sb(sb, type, nblocks) \ |
| 303 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks)) | 304 | __ext4_journal_start_sb((sb), __LINE__, (type), (nblocks), 0) |
| 304 | 305 | ||
| 305 | #define ext4_journal_start(inode, type, nblocks) \ | 306 | #define ext4_journal_start(inode, type, nblocks) \ |
| 306 | __ext4_journal_start((inode), __LINE__, (type), (nblocks)) | 307 | __ext4_journal_start((inode), __LINE__, (type), (nblocks), 0) |
| 308 | |||
| 309 | #define ext4_journal_start_with_reserve(inode, type, blocks, rsv_blocks) \ | ||
| 310 | __ext4_journal_start((inode), __LINE__, (type), (blocks), (rsv_blocks)) | ||
| 307 | 311 | ||
| 308 | static inline handle_t *__ext4_journal_start(struct inode *inode, | 312 | static inline handle_t *__ext4_journal_start(struct inode *inode, |
| 309 | unsigned int line, int type, | 313 | unsigned int line, int type, |
| 310 | int nblocks) | 314 | int blocks, int rsv_blocks) |
| 311 | { | 315 | { |
| 312 | return __ext4_journal_start_sb(inode->i_sb, line, type, nblocks); | 316 | return __ext4_journal_start_sb(inode->i_sb, line, type, blocks, |
| 317 | rsv_blocks); | ||
| 313 | } | 318 | } |
| 314 | 319 | ||
| 315 | #define ext4_journal_stop(handle) \ | 320 | #define ext4_journal_stop(handle) \ |
| 316 | __ext4_journal_stop(__func__, __LINE__, (handle)) | 321 | __ext4_journal_stop(__func__, __LINE__, (handle)) |
| 317 | 322 | ||
| 323 | #define ext4_journal_start_reserved(handle, type) \ | ||
| 324 | __ext4_journal_start_reserved((handle), __LINE__, (type)) | ||
| 325 | |||
| 326 | handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line, | ||
| 327 | int type); | ||
| 328 | |||
| 329 | static inline void ext4_journal_free_reserved(handle_t *handle) | ||
| 330 | { | ||
| 331 | if (ext4_handle_valid(handle)) | ||
| 332 | jbd2_journal_free_reserved(handle); | ||
| 333 | } | ||
| 334 | |||
| 318 | static inline handle_t *ext4_journal_current_handle(void) | 335 | static inline handle_t *ext4_journal_current_handle(void) |
| 319 | { | 336 | { |
| 320 | return journal_current_handle(); | 337 | return journal_current_handle(); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..7097b0f680e6 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -2125,7 +2125,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode, | |||
| 2125 | next_del = ext4_find_delayed_extent(inode, &es); | 2125 | next_del = ext4_find_delayed_extent(inode, &es); |
| 2126 | if (!exists && next_del) { | 2126 | if (!exists && next_del) { |
| 2127 | exists = 1; | 2127 | exists = 1; |
| 2128 | flags |= FIEMAP_EXTENT_DELALLOC; | 2128 | flags |= (FIEMAP_EXTENT_DELALLOC | |
| 2129 | FIEMAP_EXTENT_UNKNOWN); | ||
| 2129 | } | 2130 | } |
| 2130 | up_read(&EXT4_I(inode)->i_data_sem); | 2131 | up_read(&EXT4_I(inode)->i_data_sem); |
| 2131 | 2132 | ||
| @@ -2328,17 +2329,15 @@ int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int nrblocks, | |||
| 2328 | } | 2329 | } |
| 2329 | 2330 | ||
| 2330 | /* | 2331 | /* |
| 2331 | * How many index/leaf blocks need to change/allocate to modify nrblocks? | 2332 | * How many index/leaf blocks need to change/allocate to add @extents extents? |
| 2332 | * | 2333 | * |
| 2333 | * if nrblocks are fit in a single extent (chunk flag is 1), then | 2334 | * If we add a single extent, then in the worse case, each tree level |
| 2334 | * in the worse case, each tree level index/leaf need to be changed | 2335 | * index/leaf need to be changed in case of the tree split. |
| 2335 | * if the tree split due to insert a new extent, then the old tree | ||
| 2336 | * index/leaf need to be updated too | ||
| 2337 | * | 2336 | * |
| 2338 | * If the nrblocks are discontiguous, they could cause | 2337 | * If more extents are inserted, they could cause the whole tree split more |
| 2339 | * the whole tree split more than once, but this is really rare. | 2338 | * than once, but this is really rare. |
| 2340 | */ | 2339 | */ |
| 2341 | int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 2340 | int ext4_ext_index_trans_blocks(struct inode *inode, int extents) |
| 2342 | { | 2341 | { |
| 2343 | int index; | 2342 | int index; |
| 2344 | int depth; | 2343 | int depth; |
| @@ -2349,7 +2348,7 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 2349 | 2348 | ||
| 2350 | depth = ext_depth(inode); | 2349 | depth = ext_depth(inode); |
| 2351 | 2350 | ||
| 2352 | if (chunk) | 2351 | if (extents <= 1) |
| 2353 | index = depth * 2; | 2352 | index = depth * 2; |
| 2354 | else | 2353 | else |
| 2355 | index = depth * 3; | 2354 | index = depth * 3; |
| @@ -2357,20 +2356,24 @@ int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 2357 | return index; | 2356 | return index; |
| 2358 | } | 2357 | } |
| 2359 | 2358 | ||
| 2359 | static inline int get_default_free_blocks_flags(struct inode *inode) | ||
| 2360 | { | ||
| 2361 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2362 | return EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
| 2363 | else if (ext4_should_journal_data(inode)) | ||
| 2364 | return EXT4_FREE_BLOCKS_FORGET; | ||
| 2365 | return 0; | ||
| 2366 | } | ||
| 2367 | |||
| 2360 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | 2368 | static int ext4_remove_blocks(handle_t *handle, struct inode *inode, |
| 2361 | struct ext4_extent *ex, | 2369 | struct ext4_extent *ex, |
| 2362 | ext4_fsblk_t *partial_cluster, | 2370 | long long *partial_cluster, |
| 2363 | ext4_lblk_t from, ext4_lblk_t to) | 2371 | ext4_lblk_t from, ext4_lblk_t to) |
| 2364 | { | 2372 | { |
| 2365 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2373 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 2366 | unsigned short ee_len = ext4_ext_get_actual_len(ex); | 2374 | unsigned short ee_len = ext4_ext_get_actual_len(ex); |
| 2367 | ext4_fsblk_t pblk; | 2375 | ext4_fsblk_t pblk; |
| 2368 | int flags = 0; | 2376 | int flags = get_default_free_blocks_flags(inode); |
| 2369 | |||
| 2370 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2371 | flags |= EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET; | ||
| 2372 | else if (ext4_should_journal_data(inode)) | ||
| 2373 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
| 2374 | 2377 | ||
| 2375 | /* | 2378 | /* |
| 2376 | * For bigalloc file systems, we never free a partial cluster | 2379 | * For bigalloc file systems, we never free a partial cluster |
| @@ -2388,7 +2391,8 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2388 | * partial cluster here. | 2391 | * partial cluster here. |
| 2389 | */ | 2392 | */ |
| 2390 | pblk = ext4_ext_pblock(ex) + ee_len - 1; | 2393 | pblk = ext4_ext_pblock(ex) + ee_len - 1; |
| 2391 | if (*partial_cluster && (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | 2394 | if ((*partial_cluster > 0) && |
| 2395 | (EXT4_B2C(sbi, pblk) != *partial_cluster)) { | ||
| 2392 | ext4_free_blocks(handle, inode, NULL, | 2396 | ext4_free_blocks(handle, inode, NULL, |
| 2393 | EXT4_C2B(sbi, *partial_cluster), | 2397 | EXT4_C2B(sbi, *partial_cluster), |
| 2394 | sbi->s_cluster_ratio, flags); | 2398 | sbi->s_cluster_ratio, flags); |
| @@ -2414,41 +2418,46 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2414 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2418 | && to == le32_to_cpu(ex->ee_block) + ee_len - 1) { |
| 2415 | /* tail removal */ | 2419 | /* tail removal */ |
| 2416 | ext4_lblk_t num; | 2420 | ext4_lblk_t num; |
| 2421 | unsigned int unaligned; | ||
| 2417 | 2422 | ||
| 2418 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2423 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
| 2419 | pblk = ext4_ext_pblock(ex) + ee_len - num; | 2424 | pblk = ext4_ext_pblock(ex) + ee_len - num; |
| 2420 | ext_debug("free last %u blocks starting %llu\n", num, pblk); | 2425 | /* |
| 2426 | * Usually we want to free partial cluster at the end of the | ||
| 2427 | * extent, except for the situation when the cluster is still | ||
| 2428 | * used by any other extent (partial_cluster is negative). | ||
| 2429 | */ | ||
| 2430 | if (*partial_cluster < 0 && | ||
| 2431 | -(*partial_cluster) == EXT4_B2C(sbi, pblk + num - 1)) | ||
| 2432 | flags |= EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER; | ||
| 2433 | |||
| 2434 | ext_debug("free last %u blocks starting %llu partial %lld\n", | ||
| 2435 | num, pblk, *partial_cluster); | ||
| 2421 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); | 2436 | ext4_free_blocks(handle, inode, NULL, pblk, num, flags); |
| 2422 | /* | 2437 | /* |
| 2423 | * If the block range to be freed didn't start at the | 2438 | * If the block range to be freed didn't start at the |
| 2424 | * beginning of a cluster, and we removed the entire | 2439 | * beginning of a cluster, and we removed the entire |
| 2425 | * extent, save the partial cluster here, since we | 2440 | * extent and the cluster is not used by any other extent, |
| 2426 | * might need to delete if we determine that the | 2441 | * save the partial cluster here, since we might need to |
| 2427 | * truncate operation has removed all of the blocks in | 2442 | * delete if we determine that the truncate operation has |
| 2428 | * the cluster. | 2443 | * removed all of the blocks in the cluster. |
| 2444 | * | ||
| 2445 | * On the other hand, if we did not manage to free the whole | ||
| 2446 | * extent, we have to mark the cluster as used (store negative | ||
| 2447 | * cluster number in partial_cluster). | ||
| 2429 | */ | 2448 | */ |
| 2430 | if (pblk & (sbi->s_cluster_ratio - 1) && | 2449 | unaligned = pblk & (sbi->s_cluster_ratio - 1); |
| 2431 | (ee_len == num)) | 2450 | if (unaligned && (ee_len == num) && |
| 2451 | (*partial_cluster != -((long long)EXT4_B2C(sbi, pblk)))) | ||
| 2432 | *partial_cluster = EXT4_B2C(sbi, pblk); | 2452 | *partial_cluster = EXT4_B2C(sbi, pblk); |
| 2433 | else | 2453 | else if (unaligned) |
| 2454 | *partial_cluster = -((long long)EXT4_B2C(sbi, pblk)); | ||
| 2455 | else if (*partial_cluster > 0) | ||
| 2434 | *partial_cluster = 0; | 2456 | *partial_cluster = 0; |
| 2435 | } else if (from == le32_to_cpu(ex->ee_block) | 2457 | } else |
| 2436 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2458 | ext4_error(sbi->s_sb, "strange request: removal(2) " |
| 2437 | /* head removal */ | 2459 | "%u-%u from %u:%u\n", |
| 2438 | ext4_lblk_t num; | 2460 | from, to, le32_to_cpu(ex->ee_block), ee_len); |
| 2439 | ext4_fsblk_t start; | ||
| 2440 | |||
| 2441 | num = to - from; | ||
| 2442 | start = ext4_ext_pblock(ex); | ||
| 2443 | |||
| 2444 | ext_debug("free first %u blocks starting %llu\n", num, start); | ||
| 2445 | ext4_free_blocks(handle, inode, NULL, start, num, flags); | ||
| 2446 | |||
| 2447 | } else { | ||
| 2448 | printk(KERN_INFO "strange request: removal(2) " | ||
| 2449 | "%u-%u from %u:%u\n", | ||
| 2450 | from, to, le32_to_cpu(ex->ee_block), ee_len); | ||
| 2451 | } | ||
| 2452 | return 0; | 2461 | return 0; |
| 2453 | } | 2462 | } |
| 2454 | 2463 | ||
| @@ -2461,12 +2470,16 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
| 2461 | * @handle: The journal handle | 2470 | * @handle: The journal handle |
| 2462 | * @inode: The files inode | 2471 | * @inode: The files inode |
| 2463 | * @path: The path to the leaf | 2472 | * @path: The path to the leaf |
| 2473 | * @partial_cluster: The cluster which we'll have to free if all extents | ||
| 2474 | * has been released from it. It gets negative in case | ||
| 2475 | * that the cluster is still used. | ||
| 2464 | * @start: The first block to remove | 2476 | * @start: The first block to remove |
| 2465 | * @end: The last block to remove | 2477 | * @end: The last block to remove |
| 2466 | */ | 2478 | */ |
| 2467 | static int | 2479 | static int |
| 2468 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | 2480 | ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, |
| 2469 | struct ext4_ext_path *path, ext4_fsblk_t *partial_cluster, | 2481 | struct ext4_ext_path *path, |
| 2482 | long long *partial_cluster, | ||
| 2470 | ext4_lblk_t start, ext4_lblk_t end) | 2483 | ext4_lblk_t start, ext4_lblk_t end) |
| 2471 | { | 2484 | { |
| 2472 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 2485 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| @@ -2479,6 +2492,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2479 | unsigned short ex_ee_len; | 2492 | unsigned short ex_ee_len; |
| 2480 | unsigned uninitialized = 0; | 2493 | unsigned uninitialized = 0; |
| 2481 | struct ext4_extent *ex; | 2494 | struct ext4_extent *ex; |
| 2495 | ext4_fsblk_t pblk; | ||
| 2482 | 2496 | ||
| 2483 | /* the header must be checked already in ext4_ext_remove_space() */ | 2497 | /* the header must be checked already in ext4_ext_remove_space() */ |
| 2484 | ext_debug("truncate since %u in leaf to %u\n", start, end); | 2498 | ext_debug("truncate since %u in leaf to %u\n", start, end); |
| @@ -2490,7 +2504,9 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2490 | return -EIO; | 2504 | return -EIO; |
| 2491 | } | 2505 | } |
| 2492 | /* find where to start removing */ | 2506 | /* find where to start removing */ |
| 2493 | ex = EXT_LAST_EXTENT(eh); | 2507 | ex = path[depth].p_ext; |
| 2508 | if (!ex) | ||
| 2509 | ex = EXT_LAST_EXTENT(eh); | ||
| 2494 | 2510 | ||
| 2495 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2511 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| 2496 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2512 | ex_ee_len = ext4_ext_get_actual_len(ex); |
| @@ -2517,6 +2533,16 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2517 | 2533 | ||
| 2518 | /* If this extent is beyond the end of the hole, skip it */ | 2534 | /* If this extent is beyond the end of the hole, skip it */ |
| 2519 | if (end < ex_ee_block) { | 2535 | if (end < ex_ee_block) { |
| 2536 | /* | ||
| 2537 | * We're going to skip this extent and move to another, | ||
| 2538 | * so if this extent is not cluster aligned we have | ||
| 2539 | * to mark the current cluster as used to avoid | ||
| 2540 | * accidentally freeing it later on | ||
| 2541 | */ | ||
| 2542 | pblk = ext4_ext_pblock(ex); | ||
| 2543 | if (pblk & (sbi->s_cluster_ratio - 1)) | ||
| 2544 | *partial_cluster = | ||
| 2545 | -((long long)EXT4_B2C(sbi, pblk)); | ||
| 2520 | ex--; | 2546 | ex--; |
| 2521 | ex_ee_block = le32_to_cpu(ex->ee_block); | 2547 | ex_ee_block = le32_to_cpu(ex->ee_block); |
| 2522 | ex_ee_len = ext4_ext_get_actual_len(ex); | 2548 | ex_ee_len = ext4_ext_get_actual_len(ex); |
| @@ -2592,7 +2618,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2592 | sizeof(struct ext4_extent)); | 2618 | sizeof(struct ext4_extent)); |
| 2593 | } | 2619 | } |
| 2594 | le16_add_cpu(&eh->eh_entries, -1); | 2620 | le16_add_cpu(&eh->eh_entries, -1); |
| 2595 | } else | 2621 | } else if (*partial_cluster > 0) |
| 2596 | *partial_cluster = 0; | 2622 | *partial_cluster = 0; |
| 2597 | 2623 | ||
| 2598 | err = ext4_ext_dirty(handle, inode, path + depth); | 2624 | err = ext4_ext_dirty(handle, inode, path + depth); |
| @@ -2610,17 +2636,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode, | |||
| 2610 | err = ext4_ext_correct_indexes(handle, inode, path); | 2636 | err = ext4_ext_correct_indexes(handle, inode, path); |
| 2611 | 2637 | ||
| 2612 | /* | 2638 | /* |
| 2613 | * If there is still a entry in the leaf node, check to see if | 2639 | * Free the partial cluster only if the current extent does not |
| 2614 | * it references the partial cluster. This is the only place | 2640 | * reference it. Otherwise we might free used cluster. |
| 2615 | * where it could; if it doesn't, we can free the cluster. | ||
| 2616 | */ | 2641 | */ |
| 2617 | if (*partial_cluster && ex >= EXT_FIRST_EXTENT(eh) && | 2642 | if (*partial_cluster > 0 && |
| 2618 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != | 2643 | (EXT4_B2C(sbi, ext4_ext_pblock(ex) + ex_ee_len - 1) != |
| 2619 | *partial_cluster)) { | 2644 | *partial_cluster)) { |
| 2620 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2645 | int flags = get_default_free_blocks_flags(inode); |
| 2621 | |||
| 2622 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2623 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
| 2624 | 2646 | ||
| 2625 | ext4_free_blocks(handle, inode, NULL, | 2647 | ext4_free_blocks(handle, inode, NULL, |
| 2626 | EXT4_C2B(sbi, *partial_cluster), | 2648 | EXT4_C2B(sbi, *partial_cluster), |
| @@ -2664,7 +2686,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
| 2664 | struct super_block *sb = inode->i_sb; | 2686 | struct super_block *sb = inode->i_sb; |
| 2665 | int depth = ext_depth(inode); | 2687 | int depth = ext_depth(inode); |
| 2666 | struct ext4_ext_path *path = NULL; | 2688 | struct ext4_ext_path *path = NULL; |
| 2667 | ext4_fsblk_t partial_cluster = 0; | 2689 | long long partial_cluster = 0; |
| 2668 | handle_t *handle; | 2690 | handle_t *handle; |
| 2669 | int i = 0, err = 0; | 2691 | int i = 0, err = 0; |
| 2670 | 2692 | ||
| @@ -2676,7 +2698,7 @@ int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start, | |||
| 2676 | return PTR_ERR(handle); | 2698 | return PTR_ERR(handle); |
| 2677 | 2699 | ||
| 2678 | again: | 2700 | again: |
| 2679 | trace_ext4_ext_remove_space(inode, start, depth); | 2701 | trace_ext4_ext_remove_space(inode, start, end, depth); |
| 2680 | 2702 | ||
| 2681 | /* | 2703 | /* |
| 2682 | * Check if we are removing extents inside the extent tree. If that | 2704 | * Check if we are removing extents inside the extent tree. If that |
| @@ -2844,17 +2866,14 @@ again: | |||
| 2844 | } | 2866 | } |
| 2845 | } | 2867 | } |
| 2846 | 2868 | ||
| 2847 | trace_ext4_ext_remove_space_done(inode, start, depth, partial_cluster, | 2869 | trace_ext4_ext_remove_space_done(inode, start, end, depth, |
| 2848 | path->p_hdr->eh_entries); | 2870 | partial_cluster, path->p_hdr->eh_entries); |
| 2849 | 2871 | ||
| 2850 | /* If we still have something in the partial cluster and we have removed | 2872 | /* If we still have something in the partial cluster and we have removed |
| 2851 | * even the first extent, then we should free the blocks in the partial | 2873 | * even the first extent, then we should free the blocks in the partial |
| 2852 | * cluster as well. */ | 2874 | * cluster as well. */ |
| 2853 | if (partial_cluster && path->p_hdr->eh_entries == 0) { | 2875 | if (partial_cluster > 0 && path->p_hdr->eh_entries == 0) { |
| 2854 | int flags = EXT4_FREE_BLOCKS_FORGET; | 2876 | int flags = get_default_free_blocks_flags(inode); |
| 2855 | |||
| 2856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | ||
| 2857 | flags |= EXT4_FREE_BLOCKS_METADATA; | ||
| 2858 | 2877 | ||
| 2859 | ext4_free_blocks(handle, inode, NULL, | 2878 | ext4_free_blocks(handle, inode, NULL, |
| 2860 | EXT4_C2B(EXT4_SB(sb), partial_cluster), | 2879 | EXT4_C2B(EXT4_SB(sb), partial_cluster), |
| @@ -4363,7 +4382,7 @@ out2: | |||
| 4363 | } | 4382 | } |
| 4364 | 4383 | ||
| 4365 | out3: | 4384 | out3: |
| 4366 | trace_ext4_ext_map_blocks_exit(inode, map, err ? err : allocated); | 4385 | trace_ext4_ext_map_blocks_exit(inode, flags, map, err ? err : allocated); |
| 4367 | 4386 | ||
| 4368 | return err ? err : allocated; | 4387 | return err ? err : allocated; |
| 4369 | } | 4388 | } |
| @@ -4446,7 +4465,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
| 4446 | return -EOPNOTSUPP; | 4465 | return -EOPNOTSUPP; |
| 4447 | 4466 | ||
| 4448 | if (mode & FALLOC_FL_PUNCH_HOLE) | 4467 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 4449 | return ext4_punch_hole(file, offset, len); | 4468 | return ext4_punch_hole(inode, offset, len); |
| 4450 | 4469 | ||
| 4451 | ret = ext4_convert_inline_data(inode); | 4470 | ret = ext4_convert_inline_data(inode); |
| 4452 | if (ret) | 4471 | if (ret) |
| @@ -4548,10 +4567,9 @@ retry: | |||
| 4548 | * function, to convert the fallocated extents after IO is completed. | 4567 | * function, to convert the fallocated extents after IO is completed. |
| 4549 | * Returns 0 on success. | 4568 | * Returns 0 on success. |
| 4550 | */ | 4569 | */ |
| 4551 | int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | 4570 | int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, |
| 4552 | ssize_t len) | 4571 | loff_t offset, ssize_t len) |
| 4553 | { | 4572 | { |
| 4554 | handle_t *handle; | ||
| 4555 | unsigned int max_blocks; | 4573 | unsigned int max_blocks; |
| 4556 | int ret = 0; | 4574 | int ret = 0; |
| 4557 | int ret2 = 0; | 4575 | int ret2 = 0; |
| @@ -4566,16 +4584,32 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
| 4566 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - | 4584 | max_blocks = ((EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) - |
| 4567 | map.m_lblk); | 4585 | map.m_lblk); |
| 4568 | /* | 4586 | /* |
| 4569 | * credits to insert 1 extent into extent tree | 4587 | * This is somewhat ugly but the idea is clear: When transaction is |
| 4588 | * reserved, everything goes into it. Otherwise we rather start several | ||
| 4589 | * smaller transactions for conversion of each extent separately. | ||
| 4570 | */ | 4590 | */ |
| 4571 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | 4591 | if (handle) { |
| 4592 | handle = ext4_journal_start_reserved(handle, | ||
| 4593 | EXT4_HT_EXT_CONVERT); | ||
| 4594 | if (IS_ERR(handle)) | ||
| 4595 | return PTR_ERR(handle); | ||
| 4596 | credits = 0; | ||
| 4597 | } else { | ||
| 4598 | /* | ||
| 4599 | * credits to insert 1 extent into extent tree | ||
| 4600 | */ | ||
| 4601 | credits = ext4_chunk_trans_blocks(inode, max_blocks); | ||
| 4602 | } | ||
| 4572 | while (ret >= 0 && ret < max_blocks) { | 4603 | while (ret >= 0 && ret < max_blocks) { |
| 4573 | map.m_lblk += ret; | 4604 | map.m_lblk += ret; |
| 4574 | map.m_len = (max_blocks -= ret); | 4605 | map.m_len = (max_blocks -= ret); |
| 4575 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, credits); | 4606 | if (credits) { |
| 4576 | if (IS_ERR(handle)) { | 4607 | handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, |
| 4577 | ret = PTR_ERR(handle); | 4608 | credits); |
| 4578 | break; | 4609 | if (IS_ERR(handle)) { |
| 4610 | ret = PTR_ERR(handle); | ||
| 4611 | break; | ||
| 4612 | } | ||
| 4579 | } | 4613 | } |
| 4580 | ret = ext4_map_blocks(handle, inode, &map, | 4614 | ret = ext4_map_blocks(handle, inode, &map, |
| 4581 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); | 4615 | EXT4_GET_BLOCKS_IO_CONVERT_EXT); |
| @@ -4586,10 +4620,13 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
| 4586 | inode->i_ino, map.m_lblk, | 4620 | inode->i_ino, map.m_lblk, |
| 4587 | map.m_len, ret); | 4621 | map.m_len, ret); |
| 4588 | ext4_mark_inode_dirty(handle, inode); | 4622 | ext4_mark_inode_dirty(handle, inode); |
| 4589 | ret2 = ext4_journal_stop(handle); | 4623 | if (credits) |
| 4590 | if (ret <= 0 || ret2 ) | 4624 | ret2 = ext4_journal_stop(handle); |
| 4625 | if (ret <= 0 || ret2) | ||
| 4591 | break; | 4626 | break; |
| 4592 | } | 4627 | } |
| 4628 | if (!credits) | ||
| 4629 | ret2 = ext4_journal_stop(handle); | ||
| 4593 | return ret > 0 ? ret2 : ret; | 4630 | return ret > 0 ? ret2 : ret; |
| 4594 | } | 4631 | } |
| 4595 | 4632 | ||
| @@ -4659,7 +4696,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
| 4659 | error = ext4_get_inode_loc(inode, &iloc); | 4696 | error = ext4_get_inode_loc(inode, &iloc); |
| 4660 | if (error) | 4697 | if (error) |
| 4661 | return error; | 4698 | return error; |
| 4662 | physical = iloc.bh->b_blocknr << blockbits; | 4699 | physical = (__u64)iloc.bh->b_blocknr << blockbits; |
| 4663 | offset = EXT4_GOOD_OLD_INODE_SIZE + | 4700 | offset = EXT4_GOOD_OLD_INODE_SIZE + |
| 4664 | EXT4_I(inode)->i_extra_isize; | 4701 | EXT4_I(inode)->i_extra_isize; |
| 4665 | physical += offset; | 4702 | physical += offset; |
| @@ -4667,7 +4704,7 @@ static int ext4_xattr_fiemap(struct inode *inode, | |||
| 4667 | flags |= FIEMAP_EXTENT_DATA_INLINE; | 4704 | flags |= FIEMAP_EXTENT_DATA_INLINE; |
| 4668 | brelse(iloc.bh); | 4705 | brelse(iloc.bh); |
| 4669 | } else { /* external block */ | 4706 | } else { /* external block */ |
| 4670 | physical = EXT4_I(inode)->i_file_acl << blockbits; | 4707 | physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; |
| 4671 | length = inode->i_sb->s_blocksize; | 4708 | length = inode->i_sb->s_blocksize; |
| 4672 | } | 4709 | } |
| 4673 | 4710 | ||
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index e6941e622d31..ee018d5f397e 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c | |||
| @@ -10,6 +10,7 @@ | |||
| 10 | * Ext4 extents status tree core functions. | 10 | * Ext4 extents status tree core functions. |
| 11 | */ | 11 | */ |
| 12 | #include <linux/rbtree.h> | 12 | #include <linux/rbtree.h> |
| 13 | #include <linux/list_sort.h> | ||
| 13 | #include "ext4.h" | 14 | #include "ext4.h" |
| 14 | #include "extents_status.h" | 15 | #include "extents_status.h" |
| 15 | #include "ext4_extents.h" | 16 | #include "ext4_extents.h" |
| @@ -291,7 +292,6 @@ out: | |||
| 291 | 292 | ||
| 292 | read_unlock(&EXT4_I(inode)->i_es_lock); | 293 | read_unlock(&EXT4_I(inode)->i_es_lock); |
| 293 | 294 | ||
| 294 | ext4_es_lru_add(inode); | ||
| 295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); | 295 | trace_ext4_es_find_delayed_extent_range_exit(inode, es); |
| 296 | } | 296 | } |
| 297 | 297 | ||
| @@ -672,7 +672,6 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, | |||
| 672 | error: | 672 | error: |
| 673 | write_unlock(&EXT4_I(inode)->i_es_lock); | 673 | write_unlock(&EXT4_I(inode)->i_es_lock); |
| 674 | 674 | ||
| 675 | ext4_es_lru_add(inode); | ||
| 676 | ext4_es_print_tree(inode); | 675 | ext4_es_print_tree(inode); |
| 677 | 676 | ||
| 678 | return err; | 677 | return err; |
| @@ -734,7 +733,6 @@ out: | |||
| 734 | 733 | ||
| 735 | read_unlock(&EXT4_I(inode)->i_es_lock); | 734 | read_unlock(&EXT4_I(inode)->i_es_lock); |
| 736 | 735 | ||
| 737 | ext4_es_lru_add(inode); | ||
| 738 | trace_ext4_es_lookup_extent_exit(inode, es, found); | 736 | trace_ext4_es_lookup_extent_exit(inode, es, found); |
| 739 | return found; | 737 | return found; |
| 740 | } | 738 | } |
| @@ -878,12 +876,28 @@ int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
| 878 | EXTENT_STATUS_WRITTEN); | 876 | EXTENT_STATUS_WRITTEN); |
| 879 | } | 877 | } |
| 880 | 878 | ||
| 879 | static int ext4_inode_touch_time_cmp(void *priv, struct list_head *a, | ||
| 880 | struct list_head *b) | ||
| 881 | { | ||
| 882 | struct ext4_inode_info *eia, *eib; | ||
| 883 | eia = list_entry(a, struct ext4_inode_info, i_es_lru); | ||
| 884 | eib = list_entry(b, struct ext4_inode_info, i_es_lru); | ||
| 885 | |||
| 886 | if (eia->i_touch_when == eib->i_touch_when) | ||
| 887 | return 0; | ||
| 888 | if (time_after(eia->i_touch_when, eib->i_touch_when)) | ||
| 889 | return 1; | ||
| 890 | else | ||
| 891 | return -1; | ||
| 892 | } | ||
| 893 | |||
| 881 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | 894 | static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) |
| 882 | { | 895 | { |
| 883 | struct ext4_sb_info *sbi = container_of(shrink, | 896 | struct ext4_sb_info *sbi = container_of(shrink, |
| 884 | struct ext4_sb_info, s_es_shrinker); | 897 | struct ext4_sb_info, s_es_shrinker); |
| 885 | struct ext4_inode_info *ei; | 898 | struct ext4_inode_info *ei; |
| 886 | struct list_head *cur, *tmp, scanned; | 899 | struct list_head *cur, *tmp; |
| 900 | LIST_HEAD(skiped); | ||
| 887 | int nr_to_scan = sc->nr_to_scan; | 901 | int nr_to_scan = sc->nr_to_scan; |
| 888 | int ret, nr_shrunk = 0; | 902 | int ret, nr_shrunk = 0; |
| 889 | 903 | ||
| @@ -893,23 +907,41 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 893 | if (!nr_to_scan) | 907 | if (!nr_to_scan) |
| 894 | return ret; | 908 | return ret; |
| 895 | 909 | ||
| 896 | INIT_LIST_HEAD(&scanned); | ||
| 897 | |||
| 898 | spin_lock(&sbi->s_es_lru_lock); | 910 | spin_lock(&sbi->s_es_lru_lock); |
| 911 | |||
| 912 | /* | ||
| 913 | * If the inode that is at the head of LRU list is newer than | ||
| 914 | * last_sorted time, that means that we need to sort this list. | ||
| 915 | */ | ||
| 916 | ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, i_es_lru); | ||
| 917 | if (sbi->s_es_last_sorted < ei->i_touch_when) { | ||
| 918 | list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); | ||
| 919 | sbi->s_es_last_sorted = jiffies; | ||
| 920 | } | ||
| 921 | |||
| 899 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { | 922 | list_for_each_safe(cur, tmp, &sbi->s_es_lru) { |
| 900 | list_move_tail(cur, &scanned); | 923 | /* |
| 924 | * If we have already reclaimed all extents from extent | ||
| 925 | * status tree, just stop the loop immediately. | ||
| 926 | */ | ||
| 927 | if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) | ||
| 928 | break; | ||
| 901 | 929 | ||
| 902 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); | 930 | ei = list_entry(cur, struct ext4_inode_info, i_es_lru); |
| 903 | 931 | ||
| 904 | read_lock(&ei->i_es_lock); | 932 | /* Skip the inode that is newer than the last_sorted time */ |
| 905 | if (ei->i_es_lru_nr == 0) { | 933 | if (sbi->s_es_last_sorted < ei->i_touch_when) { |
| 906 | read_unlock(&ei->i_es_lock); | 934 | list_move_tail(cur, &skiped); |
| 907 | continue; | 935 | continue; |
| 908 | } | 936 | } |
| 909 | read_unlock(&ei->i_es_lock); | 937 | |
| 938 | if (ei->i_es_lru_nr == 0) | ||
| 939 | continue; | ||
| 910 | 940 | ||
| 911 | write_lock(&ei->i_es_lock); | 941 | write_lock(&ei->i_es_lock); |
| 912 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); | 942 | ret = __es_try_to_reclaim_extents(ei, nr_to_scan); |
| 943 | if (ei->i_es_lru_nr == 0) | ||
| 944 | list_del_init(&ei->i_es_lru); | ||
| 913 | write_unlock(&ei->i_es_lock); | 945 | write_unlock(&ei->i_es_lock); |
| 914 | 946 | ||
| 915 | nr_shrunk += ret; | 947 | nr_shrunk += ret; |
| @@ -917,7 +949,9 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 917 | if (nr_to_scan == 0) | 949 | if (nr_to_scan == 0) |
| 918 | break; | 950 | break; |
| 919 | } | 951 | } |
| 920 | list_splice_tail(&scanned, &sbi->s_es_lru); | 952 | |
| 953 | /* Move the newer inodes into the tail of the LRU list. */ | ||
| 954 | list_splice_tail(&skiped, &sbi->s_es_lru); | ||
| 921 | spin_unlock(&sbi->s_es_lru_lock); | 955 | spin_unlock(&sbi->s_es_lru_lock); |
| 922 | 956 | ||
| 923 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); | 957 | ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); |
| @@ -925,21 +959,19 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) | |||
| 925 | return ret; | 959 | return ret; |
| 926 | } | 960 | } |
| 927 | 961 | ||
| 928 | void ext4_es_register_shrinker(struct super_block *sb) | 962 | void ext4_es_register_shrinker(struct ext4_sb_info *sbi) |
| 929 | { | 963 | { |
| 930 | struct ext4_sb_info *sbi; | ||
| 931 | |||
| 932 | sbi = EXT4_SB(sb); | ||
| 933 | INIT_LIST_HEAD(&sbi->s_es_lru); | 964 | INIT_LIST_HEAD(&sbi->s_es_lru); |
| 934 | spin_lock_init(&sbi->s_es_lru_lock); | 965 | spin_lock_init(&sbi->s_es_lru_lock); |
| 966 | sbi->s_es_last_sorted = 0; | ||
| 935 | sbi->s_es_shrinker.shrink = ext4_es_shrink; | 967 | sbi->s_es_shrinker.shrink = ext4_es_shrink; |
| 936 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; | 968 | sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; |
| 937 | register_shrinker(&sbi->s_es_shrinker); | 969 | register_shrinker(&sbi->s_es_shrinker); |
| 938 | } | 970 | } |
| 939 | 971 | ||
| 940 | void ext4_es_unregister_shrinker(struct super_block *sb) | 972 | void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) |
| 941 | { | 973 | { |
| 942 | unregister_shrinker(&EXT4_SB(sb)->s_es_shrinker); | 974 | unregister_shrinker(&sbi->s_es_shrinker); |
| 943 | } | 975 | } |
| 944 | 976 | ||
| 945 | void ext4_es_lru_add(struct inode *inode) | 977 | void ext4_es_lru_add(struct inode *inode) |
| @@ -947,11 +979,14 @@ void ext4_es_lru_add(struct inode *inode) | |||
| 947 | struct ext4_inode_info *ei = EXT4_I(inode); | 979 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 948 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 980 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 949 | 981 | ||
| 982 | ei->i_touch_when = jiffies; | ||
| 983 | |||
| 984 | if (!list_empty(&ei->i_es_lru)) | ||
| 985 | return; | ||
| 986 | |||
| 950 | spin_lock(&sbi->s_es_lru_lock); | 987 | spin_lock(&sbi->s_es_lru_lock); |
| 951 | if (list_empty(&ei->i_es_lru)) | 988 | if (list_empty(&ei->i_es_lru)) |
| 952 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); | 989 | list_add_tail(&ei->i_es_lru, &sbi->s_es_lru); |
| 953 | else | ||
| 954 | list_move_tail(&ei->i_es_lru, &sbi->s_es_lru); | ||
| 955 | spin_unlock(&sbi->s_es_lru_lock); | 990 | spin_unlock(&sbi->s_es_lru_lock); |
| 956 | } | 991 | } |
| 957 | 992 | ||
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h index f740eb03b707..e936730cc5b0 100644 --- a/fs/ext4/extents_status.h +++ b/fs/ext4/extents_status.h | |||
| @@ -39,6 +39,7 @@ | |||
| 39 | EXTENT_STATUS_DELAYED | \ | 39 | EXTENT_STATUS_DELAYED | \ |
| 40 | EXTENT_STATUS_HOLE) | 40 | EXTENT_STATUS_HOLE) |
| 41 | 41 | ||
| 42 | struct ext4_sb_info; | ||
| 42 | struct ext4_extent; | 43 | struct ext4_extent; |
| 43 | 44 | ||
| 44 | struct extent_status { | 45 | struct extent_status { |
| @@ -119,8 +120,8 @@ static inline void ext4_es_store_status(struct extent_status *es, | |||
| 119 | es->es_pblk = block; | 120 | es->es_pblk = block; |
| 120 | } | 121 | } |
| 121 | 122 | ||
| 122 | extern void ext4_es_register_shrinker(struct super_block *sb); | 123 | extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); |
| 123 | extern void ext4_es_unregister_shrinker(struct super_block *sb); | 124 | extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); |
| 124 | extern void ext4_es_lru_add(struct inode *inode); | 125 | extern void ext4_es_lru_add(struct inode *inode); |
| 125 | extern void ext4_es_lru_del(struct inode *inode); | 126 | extern void ext4_es_lru_del(struct inode *inode); |
| 126 | 127 | ||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..b19f0a457f32 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, | |||
| 312 | blkbits = inode->i_sb->s_blocksize_bits; | 312 | blkbits = inode->i_sb->s_blocksize_bits; |
| 313 | startoff = *offset; | 313 | startoff = *offset; |
| 314 | lastoff = startoff; | 314 | lastoff = startoff; |
| 315 | endoff = (map->m_lblk + map->m_len) << blkbits; | 315 | endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; |
| 316 | 316 | ||
| 317 | index = startoff >> PAGE_CACHE_SHIFT; | 317 | index = startoff >> PAGE_CACHE_SHIFT; |
| 318 | end = endoff >> PAGE_CACHE_SHIFT; | 318 | end = endoff >> PAGE_CACHE_SHIFT; |
| @@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
| 457 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 457 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
| 458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 458 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
| 459 | if (last != start) | 459 | if (last != start) |
| 460 | dataoff = last << blkbits; | 460 | dataoff = (loff_t)last << blkbits; |
| 461 | break; | 461 | break; |
| 462 | } | 462 | } |
| 463 | 463 | ||
| @@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
| 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 468 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
| 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 469 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
| 470 | if (last != start) | 470 | if (last != start) |
| 471 | dataoff = last << blkbits; | 471 | dataoff = (loff_t)last << blkbits; |
| 472 | break; | 472 | break; |
| 473 | } | 473 | } |
| 474 | 474 | ||
| @@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) | |||
| 486 | } | 486 | } |
| 487 | 487 | ||
| 488 | last++; | 488 | last++; |
| 489 | dataoff = last << blkbits; | 489 | dataoff = (loff_t)last << blkbits; |
| 490 | } while (last <= end); | 490 | } while (last <= end); |
| 491 | 491 | ||
| 492 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
| @@ -540,7 +540,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
| 540 | ret = ext4_map_blocks(NULL, inode, &map, 0); | 540 | ret = ext4_map_blocks(NULL, inode, &map, 0); |
| 541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { | 541 | if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { |
| 542 | last += ret; | 542 | last += ret; |
| 543 | holeoff = last << blkbits; | 543 | holeoff = (loff_t)last << blkbits; |
| 544 | continue; | 544 | continue; |
| 545 | } | 545 | } |
| 546 | 546 | ||
| @@ -551,7 +551,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
| 551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); | 551 | ext4_es_find_delayed_extent_range(inode, last, last, &es); |
| 552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { | 552 | if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { |
| 553 | last = es.es_lblk + es.es_len; | 553 | last = es.es_lblk + es.es_len; |
| 554 | holeoff = last << blkbits; | 554 | holeoff = (loff_t)last << blkbits; |
| 555 | continue; | 555 | continue; |
| 556 | } | 556 | } |
| 557 | 557 | ||
| @@ -566,7 +566,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) | |||
| 566 | &map, &holeoff); | 566 | &map, &holeoff); |
| 567 | if (!unwritten) { | 567 | if (!unwritten) { |
| 568 | last += ret; | 568 | last += ret; |
| 569 | holeoff = last << blkbits; | 569 | holeoff = (loff_t)last << blkbits; |
| 570 | continue; | 570 | continue; |
| 571 | } | 571 | } |
| 572 | } | 572 | } |
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index e0ba8a408def..a8bc47f75fa0 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -73,32 +73,6 @@ static int ext4_sync_parent(struct inode *inode) | |||
| 73 | return ret; | 73 | return ret; |
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | /** | ||
| 77 | * __sync_file - generic_file_fsync without the locking and filemap_write | ||
| 78 | * @inode: inode to sync | ||
| 79 | * @datasync: only sync essential metadata if true | ||
| 80 | * | ||
| 81 | * This is just generic_file_fsync without the locking. This is needed for | ||
| 82 | * nojournal mode to make sure this inodes data/metadata makes it to disk | ||
| 83 | * properly. The i_mutex should be held already. | ||
| 84 | */ | ||
| 85 | static int __sync_inode(struct inode *inode, int datasync) | ||
| 86 | { | ||
| 87 | int err; | ||
| 88 | int ret; | ||
| 89 | |||
| 90 | ret = sync_mapping_buffers(inode->i_mapping); | ||
| 91 | if (!(inode->i_state & I_DIRTY)) | ||
| 92 | return ret; | ||
| 93 | if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) | ||
| 94 | return ret; | ||
| 95 | |||
| 96 | err = sync_inode_metadata(inode, 1); | ||
| 97 | if (ret == 0) | ||
| 98 | ret = err; | ||
| 99 | return ret; | ||
| 100 | } | ||
| 101 | |||
| 102 | /* | 76 | /* |
| 103 | * akpm: A new design for ext4_sync_file(). | 77 | * akpm: A new design for ext4_sync_file(). |
| 104 | * | 78 | * |
| @@ -116,7 +90,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 116 | struct inode *inode = file->f_mapping->host; | 90 | struct inode *inode = file->f_mapping->host; |
| 117 | struct ext4_inode_info *ei = EXT4_I(inode); | 91 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 118 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 92 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
| 119 | int ret, err; | 93 | int ret = 0, err; |
| 120 | tid_t commit_tid; | 94 | tid_t commit_tid; |
| 121 | bool needs_barrier = false; | 95 | bool needs_barrier = false; |
| 122 | 96 | ||
| @@ -124,25 +98,24 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 124 | 98 | ||
| 125 | trace_ext4_sync_file_enter(file, datasync); | 99 | trace_ext4_sync_file_enter(file, datasync); |
| 126 | 100 | ||
| 127 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | 101 | if (inode->i_sb->s_flags & MS_RDONLY) { |
| 128 | if (ret) | 102 | /* Make sure that we read updated s_mount_flags value */ |
| 129 | return ret; | 103 | smp_rmb(); |
| 130 | mutex_lock(&inode->i_mutex); | 104 | if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) |
| 131 | 105 | ret = -EROFS; | |
| 132 | if (inode->i_sb->s_flags & MS_RDONLY) | ||
| 133 | goto out; | ||
| 134 | |||
| 135 | ret = ext4_flush_unwritten_io(inode); | ||
| 136 | if (ret < 0) | ||
| 137 | goto out; | 106 | goto out; |
| 107 | } | ||
| 138 | 108 | ||
| 139 | if (!journal) { | 109 | if (!journal) { |
| 140 | ret = __sync_inode(inode, datasync); | 110 | ret = generic_file_fsync(file, start, end, datasync); |
| 141 | if (!ret && !hlist_empty(&inode->i_dentry)) | 111 | if (!ret && !hlist_empty(&inode->i_dentry)) |
| 142 | ret = ext4_sync_parent(inode); | 112 | ret = ext4_sync_parent(inode); |
| 143 | goto out; | 113 | goto out; |
| 144 | } | 114 | } |
| 145 | 115 | ||
| 116 | ret = filemap_write_and_wait_range(inode->i_mapping, start, end); | ||
| 117 | if (ret) | ||
| 118 | return ret; | ||
| 146 | /* | 119 | /* |
| 147 | * data=writeback,ordered: | 120 | * data=writeback,ordered: |
| 148 | * The caller's filemap_fdatawrite()/wait will sync the data. | 121 | * The caller's filemap_fdatawrite()/wait will sync the data. |
| @@ -172,8 +145,7 @@ int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 172 | if (!ret) | 145 | if (!ret) |
| 173 | ret = err; | 146 | ret = err; |
| 174 | } | 147 | } |
| 175 | out: | 148 | out: |
| 176 | mutex_unlock(&inode->i_mutex); | ||
| 177 | trace_ext4_sync_file_exit(inode, ret); | 149 | trace_ext4_sync_file_exit(inode, ret); |
| 178 | return ret; | 150 | return ret; |
| 179 | } | 151 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..f03598c6ffd3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -747,7 +747,8 @@ repeat_in_this_group: | |||
| 747 | if (!handle) { | 747 | if (!handle) { |
| 748 | BUG_ON(nblocks <= 0); | 748 | BUG_ON(nblocks <= 0); |
| 749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, | 749 | handle = __ext4_journal_start_sb(dir->i_sb, line_no, |
| 750 | handle_type, nblocks); | 750 | handle_type, nblocks, |
| 751 | 0); | ||
| 751 | if (IS_ERR(handle)) { | 752 | if (IS_ERR(handle)) { |
| 752 | err = PTR_ERR(handle); | 753 | err = PTR_ERR(handle); |
| 753 | ext4_std_error(sb, err); | 754 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index b8d5d351e24f..87b30cd357e7 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c | |||
| @@ -624,7 +624,7 @@ cleanup: | |||
| 624 | partial--; | 624 | partial--; |
| 625 | } | 625 | } |
| 626 | out: | 626 | out: |
| 627 | trace_ext4_ind_map_blocks_exit(inode, map, err); | 627 | trace_ext4_ind_map_blocks_exit(inode, flags, map, err); |
| 628 | return err; | 628 | return err; |
| 629 | } | 629 | } |
| 630 | 630 | ||
| @@ -675,11 +675,6 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, | |||
| 675 | 675 | ||
| 676 | retry: | 676 | retry: |
| 677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { | 677 | if (rw == READ && ext4_should_dioread_nolock(inode)) { |
| 678 | if (unlikely(atomic_read(&EXT4_I(inode)->i_unwritten))) { | ||
| 679 | mutex_lock(&inode->i_mutex); | ||
| 680 | ext4_flush_unwritten_io(inode); | ||
| 681 | mutex_unlock(&inode->i_mutex); | ||
| 682 | } | ||
| 683 | /* | 678 | /* |
| 684 | * Nolock dioread optimization may be dynamically disabled | 679 | * Nolock dioread optimization may be dynamically disabled |
| 685 | * via ext4_inode_block_unlocked_dio(). Check inode's state | 680 | * via ext4_inode_block_unlocked_dio(). Check inode's state |
| @@ -779,27 +774,18 @@ int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock) | |||
| 779 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; | 774 | return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1; |
| 780 | } | 775 | } |
| 781 | 776 | ||
| 782 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 777 | /* |
| 778 | * Calculate number of indirect blocks touched by mapping @nrblocks logically | ||
| 779 | * contiguous blocks | ||
| 780 | */ | ||
| 781 | int ext4_ind_trans_blocks(struct inode *inode, int nrblocks) | ||
| 783 | { | 782 | { |
| 784 | int indirects; | ||
| 785 | |||
| 786 | /* if nrblocks are contiguous */ | ||
| 787 | if (chunk) { | ||
| 788 | /* | ||
| 789 | * With N contiguous data blocks, we need at most | ||
| 790 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, | ||
| 791 | * 2 dindirect blocks, and 1 tindirect block | ||
| 792 | */ | ||
| 793 | return DIV_ROUND_UP(nrblocks, | ||
| 794 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; | ||
| 795 | } | ||
| 796 | /* | 783 | /* |
| 797 | * if nrblocks are not contiguous, worse case, each block touch | 784 | * With N contiguous data blocks, we need at most |
| 798 | * a indirect block, and each indirect block touch a double indirect | 785 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
| 799 | * block, plus a triple indirect block | 786 | * 2 dindirect blocks, and 1 tindirect block |
| 800 | */ | 787 | */ |
| 801 | indirects = nrblocks * 2 + 1; | 788 | return DIV_ROUND_UP(nrblocks, EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
| 802 | return indirects; | ||
| 803 | } | 789 | } |
| 804 | 790 | ||
| 805 | /* | 791 | /* |
| @@ -940,11 +926,13 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
| 940 | __le32 *last) | 926 | __le32 *last) |
| 941 | { | 927 | { |
| 942 | __le32 *p; | 928 | __le32 *p; |
| 943 | int flags = EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_VALIDATED; | 929 | int flags = EXT4_FREE_BLOCKS_VALIDATED; |
| 944 | int err; | 930 | int err; |
| 945 | 931 | ||
| 946 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) | 932 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
| 947 | flags |= EXT4_FREE_BLOCKS_METADATA; | 933 | flags |= EXT4_FREE_BLOCKS_FORGET | EXT4_FREE_BLOCKS_METADATA; |
| 934 | else if (ext4_should_journal_data(inode)) | ||
| 935 | flags |= EXT4_FREE_BLOCKS_FORGET; | ||
| 948 | 936 | ||
| 949 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, | 937 | if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, |
| 950 | count)) { | 938 | count)) { |
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3e2bf873e8a8..d9ecbf1113a7 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c | |||
| @@ -72,7 +72,7 @@ static int get_max_inline_xattr_value_size(struct inode *inode, | |||
| 72 | entry = (struct ext4_xattr_entry *) | 72 | entry = (struct ext4_xattr_entry *) |
| 73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); | 73 | ((void *)raw_inode + EXT4_I(inode)->i_inline_off); |
| 74 | 74 | ||
| 75 | free += le32_to_cpu(entry->e_value_size); | 75 | free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); |
| 76 | goto out; | 76 | goto out; |
| 77 | } | 77 | } |
| 78 | 78 | ||
| @@ -1404,16 +1404,15 @@ out: | |||
| 1404 | * offset as if '.' and '..' really take place. | 1404 | * offset as if '.' and '..' really take place. |
| 1405 | * | 1405 | * |
| 1406 | */ | 1406 | */ |
| 1407 | int ext4_read_inline_dir(struct file *filp, | 1407 | int ext4_read_inline_dir(struct file *file, |
| 1408 | void *dirent, filldir_t filldir, | 1408 | struct dir_context *ctx, |
| 1409 | int *has_inline_data) | 1409 | int *has_inline_data) |
| 1410 | { | 1410 | { |
| 1411 | int error = 0; | ||
| 1412 | unsigned int offset, parent_ino; | 1411 | unsigned int offset, parent_ino; |
| 1413 | int i, stored; | 1412 | int i; |
| 1414 | struct ext4_dir_entry_2 *de; | 1413 | struct ext4_dir_entry_2 *de; |
| 1415 | struct super_block *sb; | 1414 | struct super_block *sb; |
| 1416 | struct inode *inode = file_inode(filp); | 1415 | struct inode *inode = file_inode(file); |
| 1417 | int ret, inline_size = 0; | 1416 | int ret, inline_size = 0; |
| 1418 | struct ext4_iloc iloc; | 1417 | struct ext4_iloc iloc; |
| 1419 | void *dir_buf = NULL; | 1418 | void *dir_buf = NULL; |
| @@ -1444,9 +1443,8 @@ int ext4_read_inline_dir(struct file *filp, | |||
| 1444 | goto out; | 1443 | goto out; |
| 1445 | 1444 | ||
| 1446 | sb = inode->i_sb; | 1445 | sb = inode->i_sb; |
| 1447 | stored = 0; | ||
| 1448 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); | 1446 | parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); |
| 1449 | offset = filp->f_pos; | 1447 | offset = ctx->pos; |
| 1450 | 1448 | ||
| 1451 | /* | 1449 | /* |
| 1452 | * dotdot_offset and dotdot_size is the real offset and | 1450 | * dotdot_offset and dotdot_size is the real offset and |
| @@ -1460,104 +1458,74 @@ int ext4_read_inline_dir(struct file *filp, | |||
| 1460 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; | 1458 | extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; |
| 1461 | extra_size = extra_offset + inline_size; | 1459 | extra_size = extra_offset + inline_size; |
| 1462 | 1460 | ||
| 1463 | while (!error && !stored && filp->f_pos < extra_size) { | 1461 | /* |
| 1464 | revalidate: | 1462 | * If the version has changed since the last call to |
| 1465 | /* | 1463 | * readdir(2), then we might be pointing to an invalid |
| 1466 | * If the version has changed since the last call to | 1464 | * dirent right now. Scan from the start of the inline |
| 1467 | * readdir(2), then we might be pointing to an invalid | 1465 | * dir to make sure. |
| 1468 | * dirent right now. Scan from the start of the inline | 1466 | */ |
| 1469 | * dir to make sure. | 1467 | if (file->f_version != inode->i_version) { |
| 1470 | */ | 1468 | for (i = 0; i < extra_size && i < offset;) { |
| 1471 | if (filp->f_version != inode->i_version) { | 1469 | /* |
| 1472 | for (i = 0; i < extra_size && i < offset;) { | 1470 | * "." is with offset 0 and |
| 1473 | /* | 1471 | * ".." is dotdot_offset. |
| 1474 | * "." is with offset 0 and | 1472 | */ |
| 1475 | * ".." is dotdot_offset. | 1473 | if (!i) { |
| 1476 | */ | 1474 | i = dotdot_offset; |
| 1477 | if (!i) { | 1475 | continue; |
| 1478 | i = dotdot_offset; | 1476 | } else if (i == dotdot_offset) { |
| 1479 | continue; | 1477 | i = dotdot_size; |
| 1480 | } else if (i == dotdot_offset) { | ||
| 1481 | i = dotdot_size; | ||
| 1482 | continue; | ||
| 1483 | } | ||
| 1484 | /* for other entry, the real offset in | ||
| 1485 | * the buf has to be tuned accordingly. | ||
| 1486 | */ | ||
| 1487 | de = (struct ext4_dir_entry_2 *) | ||
| 1488 | (dir_buf + i - extra_offset); | ||
| 1489 | /* It's too expensive to do a full | ||
| 1490 | * dirent test each time round this | ||
| 1491 | * loop, but we do have to test at | ||
| 1492 | * least that it is non-zero. A | ||
| 1493 | * failure will be detected in the | ||
| 1494 | * dirent test below. */ | ||
| 1495 | if (ext4_rec_len_from_disk(de->rec_len, | ||
| 1496 | extra_size) < EXT4_DIR_REC_LEN(1)) | ||
| 1497 | break; | ||
| 1498 | i += ext4_rec_len_from_disk(de->rec_len, | ||
| 1499 | extra_size); | ||
| 1500 | } | ||
| 1501 | offset = i; | ||
| 1502 | filp->f_pos = offset; | ||
| 1503 | filp->f_version = inode->i_version; | ||
| 1504 | } | ||
| 1505 | |||
| 1506 | while (!error && filp->f_pos < extra_size) { | ||
| 1507 | if (filp->f_pos == 0) { | ||
| 1508 | error = filldir(dirent, ".", 1, 0, inode->i_ino, | ||
| 1509 | DT_DIR); | ||
| 1510 | if (error) | ||
| 1511 | break; | ||
| 1512 | stored++; | ||
| 1513 | filp->f_pos = dotdot_offset; | ||
| 1514 | continue; | 1478 | continue; |
| 1515 | } | 1479 | } |
| 1480 | /* for other entry, the real offset in | ||
| 1481 | * the buf has to be tuned accordingly. | ||
| 1482 | */ | ||
| 1483 | de = (struct ext4_dir_entry_2 *) | ||
| 1484 | (dir_buf + i - extra_offset); | ||
| 1485 | /* It's too expensive to do a full | ||
| 1486 | * dirent test each time round this | ||
| 1487 | * loop, but we do have to test at | ||
| 1488 | * least that it is non-zero. A | ||
| 1489 | * failure will be detected in the | ||
| 1490 | * dirent test below. */ | ||
| 1491 | if (ext4_rec_len_from_disk(de->rec_len, extra_size) | ||
| 1492 | < EXT4_DIR_REC_LEN(1)) | ||
| 1493 | break; | ||
| 1494 | i += ext4_rec_len_from_disk(de->rec_len, | ||
| 1495 | extra_size); | ||
| 1496 | } | ||
| 1497 | offset = i; | ||
| 1498 | ctx->pos = offset; | ||
| 1499 | file->f_version = inode->i_version; | ||
| 1500 | } | ||
| 1516 | 1501 | ||
| 1517 | if (filp->f_pos == dotdot_offset) { | 1502 | while (ctx->pos < extra_size) { |
| 1518 | error = filldir(dirent, "..", 2, | 1503 | if (ctx->pos == 0) { |
| 1519 | dotdot_offset, | 1504 | if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) |
| 1520 | parent_ino, DT_DIR); | 1505 | goto out; |
| 1521 | if (error) | 1506 | ctx->pos = dotdot_offset; |
| 1522 | break; | 1507 | continue; |
| 1523 | stored++; | 1508 | } |
| 1524 | 1509 | ||
| 1525 | filp->f_pos = dotdot_size; | 1510 | if (ctx->pos == dotdot_offset) { |
| 1526 | continue; | 1511 | if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) |
| 1527 | } | 1512 | goto out; |
| 1513 | ctx->pos = dotdot_size; | ||
| 1514 | continue; | ||
| 1515 | } | ||
| 1528 | 1516 | ||
| 1529 | de = (struct ext4_dir_entry_2 *) | 1517 | de = (struct ext4_dir_entry_2 *) |
| 1530 | (dir_buf + filp->f_pos - extra_offset); | 1518 | (dir_buf + ctx->pos - extra_offset); |
| 1531 | if (ext4_check_dir_entry(inode, filp, de, | 1519 | if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, |
| 1532 | iloc.bh, dir_buf, | 1520 | extra_size, ctx->pos)) |
| 1533 | extra_size, filp->f_pos)) { | 1521 | goto out; |
| 1534 | ret = stored; | 1522 | if (le32_to_cpu(de->inode)) { |
| 1523 | if (!dir_emit(ctx, de->name, de->name_len, | ||
| 1524 | le32_to_cpu(de->inode), | ||
| 1525 | get_dtype(sb, de->file_type))) | ||
| 1535 | goto out; | 1526 | goto out; |
| 1536 | } | ||
| 1537 | if (le32_to_cpu(de->inode)) { | ||
| 1538 | /* We might block in the next section | ||
| 1539 | * if the data destination is | ||
| 1540 | * currently swapped out. So, use a | ||
| 1541 | * version stamp to detect whether or | ||
| 1542 | * not the directory has been modified | ||
| 1543 | * during the copy operation. | ||
| 1544 | */ | ||
| 1545 | u64 version = filp->f_version; | ||
| 1546 | |||
| 1547 | error = filldir(dirent, de->name, | ||
| 1548 | de->name_len, | ||
| 1549 | filp->f_pos, | ||
| 1550 | le32_to_cpu(de->inode), | ||
| 1551 | get_dtype(sb, de->file_type)); | ||
| 1552 | if (error) | ||
| 1553 | break; | ||
| 1554 | if (version != filp->f_version) | ||
| 1555 | goto revalidate; | ||
| 1556 | stored++; | ||
| 1557 | } | ||
| 1558 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len, | ||
| 1559 | extra_size); | ||
| 1560 | } | 1527 | } |
| 1528 | ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); | ||
| 1561 | } | 1529 | } |
| 1562 | out: | 1530 | out: |
| 1563 | kfree(dir_buf); | 1531 | kfree(dir_buf); |
| @@ -1842,7 +1810,7 @@ int ext4_inline_data_fiemap(struct inode *inode, | |||
| 1842 | if (error) | 1810 | if (error) |
| 1843 | goto out; | 1811 | goto out; |
| 1844 | 1812 | ||
| 1845 | physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; | 1813 | physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; |
| 1846 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; | 1814 | physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; |
| 1847 | physical += offsetof(struct ext4_inode, i_block); | 1815 | physical += offsetof(struct ext4_inode, i_block); |
| 1848 | length = i_size_read(inode); | 1816 | length = i_size_read(inode); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..0188e65e1f58 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -132,12 +132,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, | |||
| 132 | new_size); | 132 | new_size); |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | static void ext4_invalidatepage(struct page *page, unsigned long offset); | 135 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
| 136 | unsigned int length); | ||
| 136 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); | 137 | static int __ext4_journalled_writepage(struct page *page, unsigned int len); |
| 137 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); | 138 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh); |
| 138 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 139 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
| 139 | struct inode *inode, struct page *page, loff_t from, | 140 | int pextents); |
| 140 | loff_t length, int flags); | ||
| 141 | 141 | ||
| 142 | /* | 142 | /* |
| 143 | * Test whether an inode is a fast symlink. | 143 | * Test whether an inode is a fast symlink. |
| @@ -215,7 +215,8 @@ void ext4_evict_inode(struct inode *inode) | |||
| 215 | filemap_write_and_wait(&inode->i_data); | 215 | filemap_write_and_wait(&inode->i_data); |
| 216 | } | 216 | } |
| 217 | truncate_inode_pages(&inode->i_data, 0); | 217 | truncate_inode_pages(&inode->i_data, 0); |
| 218 | ext4_ioend_shutdown(inode); | 218 | |
| 219 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
| 219 | goto no_delete; | 220 | goto no_delete; |
| 220 | } | 221 | } |
| 221 | 222 | ||
| @@ -225,8 +226,8 @@ void ext4_evict_inode(struct inode *inode) | |||
| 225 | if (ext4_should_order_data(inode)) | 226 | if (ext4_should_order_data(inode)) |
| 226 | ext4_begin_ordered_truncate(inode, 0); | 227 | ext4_begin_ordered_truncate(inode, 0); |
| 227 | truncate_inode_pages(&inode->i_data, 0); | 228 | truncate_inode_pages(&inode->i_data, 0); |
| 228 | ext4_ioend_shutdown(inode); | ||
| 229 | 229 | ||
| 230 | WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); | ||
| 230 | if (is_bad_inode(inode)) | 231 | if (is_bad_inode(inode)) |
| 231 | goto no_delete; | 232 | goto no_delete; |
| 232 | 233 | ||
| @@ -423,66 +424,6 @@ static int __check_block_validity(struct inode *inode, const char *func, | |||
| 423 | #define check_block_validity(inode, map) \ | 424 | #define check_block_validity(inode, map) \ |
| 424 | __check_block_validity((inode), __func__, __LINE__, (map)) | 425 | __check_block_validity((inode), __func__, __LINE__, (map)) |
| 425 | 426 | ||
| 426 | /* | ||
| 427 | * Return the number of contiguous dirty pages in a given inode | ||
| 428 | * starting at page frame idx. | ||
| 429 | */ | ||
| 430 | static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, | ||
| 431 | unsigned int max_pages) | ||
| 432 | { | ||
| 433 | struct address_space *mapping = inode->i_mapping; | ||
| 434 | pgoff_t index; | ||
| 435 | struct pagevec pvec; | ||
| 436 | pgoff_t num = 0; | ||
| 437 | int i, nr_pages, done = 0; | ||
| 438 | |||
| 439 | if (max_pages == 0) | ||
| 440 | return 0; | ||
| 441 | pagevec_init(&pvec, 0); | ||
| 442 | while (!done) { | ||
| 443 | index = idx; | ||
| 444 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 445 | PAGECACHE_TAG_DIRTY, | ||
| 446 | (pgoff_t)PAGEVEC_SIZE); | ||
| 447 | if (nr_pages == 0) | ||
| 448 | break; | ||
| 449 | for (i = 0; i < nr_pages; i++) { | ||
| 450 | struct page *page = pvec.pages[i]; | ||
| 451 | struct buffer_head *bh, *head; | ||
| 452 | |||
| 453 | lock_page(page); | ||
| 454 | if (unlikely(page->mapping != mapping) || | ||
| 455 | !PageDirty(page) || | ||
| 456 | PageWriteback(page) || | ||
| 457 | page->index != idx) { | ||
| 458 | done = 1; | ||
| 459 | unlock_page(page); | ||
| 460 | break; | ||
| 461 | } | ||
| 462 | if (page_has_buffers(page)) { | ||
| 463 | bh = head = page_buffers(page); | ||
| 464 | do { | ||
| 465 | if (!buffer_delay(bh) && | ||
| 466 | !buffer_unwritten(bh)) | ||
| 467 | done = 1; | ||
| 468 | bh = bh->b_this_page; | ||
| 469 | } while (!done && (bh != head)); | ||
| 470 | } | ||
| 471 | unlock_page(page); | ||
| 472 | if (done) | ||
| 473 | break; | ||
| 474 | idx++; | ||
| 475 | num++; | ||
| 476 | if (num >= max_pages) { | ||
| 477 | done = 1; | ||
| 478 | break; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | pagevec_release(&pvec); | ||
| 482 | } | ||
| 483 | return num; | ||
| 484 | } | ||
| 485 | |||
| 486 | #ifdef ES_AGGRESSIVE_TEST | 427 | #ifdef ES_AGGRESSIVE_TEST |
| 487 | static void ext4_map_blocks_es_recheck(handle_t *handle, | 428 | static void ext4_map_blocks_es_recheck(handle_t *handle, |
| 488 | struct inode *inode, | 429 | struct inode *inode, |
| @@ -573,6 +514,8 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode, | |||
| 573 | "logical block %lu\n", inode->i_ino, flags, map->m_len, | 514 | "logical block %lu\n", inode->i_ino, flags, map->m_len, |
| 574 | (unsigned long) map->m_lblk); | 515 | (unsigned long) map->m_lblk); |
| 575 | 516 | ||
| 517 | ext4_es_lru_add(inode); | ||
| 518 | |||
| 576 | /* Lookup extent status tree firstly */ | 519 | /* Lookup extent status tree firstly */ |
| 577 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { | 520 | if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { |
| 578 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { | 521 | if (ext4_es_is_written(&es) || ext4_es_is_unwritten(&es)) { |
| @@ -1118,10 +1061,13 @@ static int ext4_write_end(struct file *file, | |||
| 1118 | } | 1061 | } |
| 1119 | } | 1062 | } |
| 1120 | 1063 | ||
| 1121 | if (ext4_has_inline_data(inode)) | 1064 | if (ext4_has_inline_data(inode)) { |
| 1122 | copied = ext4_write_inline_data_end(inode, pos, len, | 1065 | ret = ext4_write_inline_data_end(inode, pos, len, |
| 1123 | copied, page); | 1066 | copied, page); |
| 1124 | else | 1067 | if (ret < 0) |
| 1068 | goto errout; | ||
| 1069 | copied = ret; | ||
| 1070 | } else | ||
| 1125 | copied = block_write_end(file, mapping, pos, | 1071 | copied = block_write_end(file, mapping, pos, |
| 1126 | len, copied, page, fsdata); | 1072 | len, copied, page, fsdata); |
| 1127 | 1073 | ||
| @@ -1157,8 +1103,6 @@ static int ext4_write_end(struct file *file, | |||
| 1157 | if (i_size_changed) | 1103 | if (i_size_changed) |
| 1158 | ext4_mark_inode_dirty(handle, inode); | 1104 | ext4_mark_inode_dirty(handle, inode); |
| 1159 | 1105 | ||
| 1160 | if (copied < 0) | ||
| 1161 | ret = copied; | ||
| 1162 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) | 1106 | if (pos + len > inode->i_size && ext4_can_truncate(inode)) |
| 1163 | /* if we have allocated more blocks and copied | 1107 | /* if we have allocated more blocks and copied |
| 1164 | * less. We will have blocks allocated outside | 1108 | * less. We will have blocks allocated outside |
| @@ -1415,21 +1359,28 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
| 1415 | } | 1359 | } |
| 1416 | 1360 | ||
| 1417 | static void ext4_da_page_release_reservation(struct page *page, | 1361 | static void ext4_da_page_release_reservation(struct page *page, |
| 1418 | unsigned long offset) | 1362 | unsigned int offset, |
| 1363 | unsigned int length) | ||
| 1419 | { | 1364 | { |
| 1420 | int to_release = 0; | 1365 | int to_release = 0; |
| 1421 | struct buffer_head *head, *bh; | 1366 | struct buffer_head *head, *bh; |
| 1422 | unsigned int curr_off = 0; | 1367 | unsigned int curr_off = 0; |
| 1423 | struct inode *inode = page->mapping->host; | 1368 | struct inode *inode = page->mapping->host; |
| 1424 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1369 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1370 | unsigned int stop = offset + length; | ||
| 1425 | int num_clusters; | 1371 | int num_clusters; |
| 1426 | ext4_fsblk_t lblk; | 1372 | ext4_fsblk_t lblk; |
| 1427 | 1373 | ||
| 1374 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
| 1375 | |||
| 1428 | head = page_buffers(page); | 1376 | head = page_buffers(page); |
| 1429 | bh = head; | 1377 | bh = head; |
| 1430 | do { | 1378 | do { |
| 1431 | unsigned int next_off = curr_off + bh->b_size; | 1379 | unsigned int next_off = curr_off + bh->b_size; |
| 1432 | 1380 | ||
| 1381 | if (next_off > stop) | ||
| 1382 | break; | ||
| 1383 | |||
| 1433 | if ((offset <= curr_off) && (buffer_delay(bh))) { | 1384 | if ((offset <= curr_off) && (buffer_delay(bh))) { |
| 1434 | to_release++; | 1385 | to_release++; |
| 1435 | clear_buffer_delay(bh); | 1386 | clear_buffer_delay(bh); |
| @@ -1460,140 +1411,43 @@ static void ext4_da_page_release_reservation(struct page *page, | |||
| 1460 | * Delayed allocation stuff | 1411 | * Delayed allocation stuff |
| 1461 | */ | 1412 | */ |
| 1462 | 1413 | ||
| 1463 | /* | 1414 | struct mpage_da_data { |
| 1464 | * mpage_da_submit_io - walks through extent of pages and try to write | 1415 | struct inode *inode; |
| 1465 | * them with writepage() call back | 1416 | struct writeback_control *wbc; |
| 1466 | * | ||
| 1467 | * @mpd->inode: inode | ||
| 1468 | * @mpd->first_page: first page of the extent | ||
| 1469 | * @mpd->next_page: page after the last page of the extent | ||
| 1470 | * | ||
| 1471 | * By the time mpage_da_submit_io() is called we expect all blocks | ||
| 1472 | * to be allocated. this may be wrong if allocation failed. | ||
| 1473 | * | ||
| 1474 | * As pages are already locked by write_cache_pages(), we can't use it | ||
| 1475 | */ | ||
| 1476 | static int mpage_da_submit_io(struct mpage_da_data *mpd, | ||
| 1477 | struct ext4_map_blocks *map) | ||
| 1478 | { | ||
| 1479 | struct pagevec pvec; | ||
| 1480 | unsigned long index, end; | ||
| 1481 | int ret = 0, err, nr_pages, i; | ||
| 1482 | struct inode *inode = mpd->inode; | ||
| 1483 | struct address_space *mapping = inode->i_mapping; | ||
| 1484 | loff_t size = i_size_read(inode); | ||
| 1485 | unsigned int len, block_start; | ||
| 1486 | struct buffer_head *bh, *page_bufs = NULL; | ||
| 1487 | sector_t pblock = 0, cur_logical = 0; | ||
| 1488 | struct ext4_io_submit io_submit; | ||
| 1489 | 1417 | ||
| 1490 | BUG_ON(mpd->next_page <= mpd->first_page); | 1418 | pgoff_t first_page; /* The first page to write */ |
| 1491 | memset(&io_submit, 0, sizeof(io_submit)); | 1419 | pgoff_t next_page; /* Current page to examine */ |
| 1420 | pgoff_t last_page; /* Last page to examine */ | ||
| 1492 | /* | 1421 | /* |
| 1493 | * We need to start from the first_page to the next_page - 1 | 1422 | * Extent to map - this can be after first_page because that can be |
| 1494 | * to make sure we also write the mapped dirty buffer_heads. | 1423 | * fully mapped. We somewhat abuse m_flags to store whether the extent |
| 1495 | * If we look at mpd->b_blocknr we would only be looking | 1424 | * is delalloc or unwritten. |
| 1496 | * at the currently mapped buffer_heads. | ||
| 1497 | */ | 1425 | */ |
| 1498 | index = mpd->first_page; | 1426 | struct ext4_map_blocks map; |
| 1499 | end = mpd->next_page - 1; | 1427 | struct ext4_io_submit io_submit; /* IO submission data */ |
| 1500 | 1428 | }; | |
| 1501 | pagevec_init(&pvec, 0); | ||
| 1502 | while (index <= end) { | ||
| 1503 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1504 | if (nr_pages == 0) | ||
| 1505 | break; | ||
| 1506 | for (i = 0; i < nr_pages; i++) { | ||
| 1507 | int skip_page = 0; | ||
| 1508 | struct page *page = pvec.pages[i]; | ||
| 1509 | |||
| 1510 | index = page->index; | ||
| 1511 | if (index > end) | ||
| 1512 | break; | ||
| 1513 | |||
| 1514 | if (index == size >> PAGE_CACHE_SHIFT) | ||
| 1515 | len = size & ~PAGE_CACHE_MASK; | ||
| 1516 | else | ||
| 1517 | len = PAGE_CACHE_SIZE; | ||
| 1518 | if (map) { | ||
| 1519 | cur_logical = index << (PAGE_CACHE_SHIFT - | ||
| 1520 | inode->i_blkbits); | ||
| 1521 | pblock = map->m_pblk + (cur_logical - | ||
| 1522 | map->m_lblk); | ||
| 1523 | } | ||
| 1524 | index++; | ||
| 1525 | |||
| 1526 | BUG_ON(!PageLocked(page)); | ||
| 1527 | BUG_ON(PageWriteback(page)); | ||
| 1528 | |||
| 1529 | bh = page_bufs = page_buffers(page); | ||
| 1530 | block_start = 0; | ||
| 1531 | do { | ||
| 1532 | if (map && (cur_logical >= map->m_lblk) && | ||
| 1533 | (cur_logical <= (map->m_lblk + | ||
| 1534 | (map->m_len - 1)))) { | ||
| 1535 | if (buffer_delay(bh)) { | ||
| 1536 | clear_buffer_delay(bh); | ||
| 1537 | bh->b_blocknr = pblock; | ||
| 1538 | } | ||
| 1539 | if (buffer_unwritten(bh) || | ||
| 1540 | buffer_mapped(bh)) | ||
| 1541 | BUG_ON(bh->b_blocknr != pblock); | ||
| 1542 | if (map->m_flags & EXT4_MAP_UNINIT) | ||
| 1543 | set_buffer_uninit(bh); | ||
| 1544 | clear_buffer_unwritten(bh); | ||
| 1545 | } | ||
| 1546 | |||
| 1547 | /* | ||
| 1548 | * skip page if block allocation undone and | ||
| 1549 | * block is dirty | ||
| 1550 | */ | ||
| 1551 | if (ext4_bh_delay_or_unwritten(NULL, bh)) | ||
| 1552 | skip_page = 1; | ||
| 1553 | bh = bh->b_this_page; | ||
| 1554 | block_start += bh->b_size; | ||
| 1555 | cur_logical++; | ||
| 1556 | pblock++; | ||
| 1557 | } while (bh != page_bufs); | ||
| 1558 | |||
| 1559 | if (skip_page) { | ||
| 1560 | unlock_page(page); | ||
| 1561 | continue; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | clear_page_dirty_for_io(page); | ||
| 1565 | err = ext4_bio_write_page(&io_submit, page, len, | ||
| 1566 | mpd->wbc); | ||
| 1567 | if (!err) | ||
| 1568 | mpd->pages_written++; | ||
| 1569 | /* | ||
| 1570 | * In error case, we have to continue because | ||
| 1571 | * remaining pages are still locked | ||
| 1572 | */ | ||
| 1573 | if (ret == 0) | ||
| 1574 | ret = err; | ||
| 1575 | } | ||
| 1576 | pagevec_release(&pvec); | ||
| 1577 | } | ||
| 1578 | ext4_io_submit(&io_submit); | ||
| 1579 | return ret; | ||
| 1580 | } | ||
| 1581 | 1429 | ||
| 1582 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | 1430 | static void mpage_release_unused_pages(struct mpage_da_data *mpd, |
| 1431 | bool invalidate) | ||
| 1583 | { | 1432 | { |
| 1584 | int nr_pages, i; | 1433 | int nr_pages, i; |
| 1585 | pgoff_t index, end; | 1434 | pgoff_t index, end; |
| 1586 | struct pagevec pvec; | 1435 | struct pagevec pvec; |
| 1587 | struct inode *inode = mpd->inode; | 1436 | struct inode *inode = mpd->inode; |
| 1588 | struct address_space *mapping = inode->i_mapping; | 1437 | struct address_space *mapping = inode->i_mapping; |
| 1589 | ext4_lblk_t start, last; | 1438 | |
| 1439 | /* This is necessary when next_page == 0. */ | ||
| 1440 | if (mpd->first_page >= mpd->next_page) | ||
| 1441 | return; | ||
| 1590 | 1442 | ||
| 1591 | index = mpd->first_page; | 1443 | index = mpd->first_page; |
| 1592 | end = mpd->next_page - 1; | 1444 | end = mpd->next_page - 1; |
| 1593 | 1445 | if (invalidate) { | |
| 1594 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1446 | ext4_lblk_t start, last; |
| 1595 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); | 1447 | start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| 1596 | ext4_es_remove_extent(inode, start, last - start + 1); | 1448 | last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits); |
| 1449 | ext4_es_remove_extent(inode, start, last - start + 1); | ||
| 1450 | } | ||
| 1597 | 1451 | ||
| 1598 | pagevec_init(&pvec, 0); | 1452 | pagevec_init(&pvec, 0); |
| 1599 | while (index <= end) { | 1453 | while (index <= end) { |
| @@ -1606,14 +1460,15 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) | |||
| 1606 | break; | 1460 | break; |
| 1607 | BUG_ON(!PageLocked(page)); | 1461 | BUG_ON(!PageLocked(page)); |
| 1608 | BUG_ON(PageWriteback(page)); | 1462 | BUG_ON(PageWriteback(page)); |
| 1609 | block_invalidatepage(page, 0); | 1463 | if (invalidate) { |
| 1610 | ClearPageUptodate(page); | 1464 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 1465 | ClearPageUptodate(page); | ||
| 1466 | } | ||
| 1611 | unlock_page(page); | 1467 | unlock_page(page); |
| 1612 | } | 1468 | } |
| 1613 | index = pvec.pages[nr_pages - 1]->index + 1; | 1469 | index = pvec.pages[nr_pages - 1]->index + 1; |
| 1614 | pagevec_release(&pvec); | 1470 | pagevec_release(&pvec); |
| 1615 | } | 1471 | } |
| 1616 | return; | ||
| 1617 | } | 1472 | } |
| 1618 | 1473 | ||
| 1619 | static void ext4_print_free_blocks(struct inode *inode) | 1474 | static void ext4_print_free_blocks(struct inode *inode) |
| @@ -1642,215 +1497,6 @@ static void ext4_print_free_blocks(struct inode *inode) | |||
| 1642 | return; | 1497 | return; |
| 1643 | } | 1498 | } |
| 1644 | 1499 | ||
| 1645 | /* | ||
| 1646 | * mpage_da_map_and_submit - go through given space, map them | ||
| 1647 | * if necessary, and then submit them for I/O | ||
| 1648 | * | ||
| 1649 | * @mpd - bh describing space | ||
| 1650 | * | ||
| 1651 | * The function skips space we know is already mapped to disk blocks. | ||
| 1652 | * | ||
| 1653 | */ | ||
| 1654 | static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | ||
| 1655 | { | ||
| 1656 | int err, blks, get_blocks_flags; | ||
| 1657 | struct ext4_map_blocks map, *mapp = NULL; | ||
| 1658 | sector_t next = mpd->b_blocknr; | ||
| 1659 | unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits; | ||
| 1660 | loff_t disksize = EXT4_I(mpd->inode)->i_disksize; | ||
| 1661 | handle_t *handle = NULL; | ||
| 1662 | |||
| 1663 | /* | ||
| 1664 | * If the blocks are mapped already, or we couldn't accumulate | ||
| 1665 | * any blocks, then proceed immediately to the submission stage. | ||
| 1666 | */ | ||
| 1667 | if ((mpd->b_size == 0) || | ||
| 1668 | ((mpd->b_state & (1 << BH_Mapped)) && | ||
| 1669 | !(mpd->b_state & (1 << BH_Delay)) && | ||
| 1670 | !(mpd->b_state & (1 << BH_Unwritten)))) | ||
| 1671 | goto submit_io; | ||
| 1672 | |||
| 1673 | handle = ext4_journal_current_handle(); | ||
| 1674 | BUG_ON(!handle); | ||
| 1675 | |||
| 1676 | /* | ||
| 1677 | * Call ext4_map_blocks() to allocate any delayed allocation | ||
| 1678 | * blocks, or to convert an uninitialized extent to be | ||
| 1679 | * initialized (in the case where we have written into | ||
| 1680 | * one or more preallocated blocks). | ||
| 1681 | * | ||
| 1682 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE to | ||
| 1683 | * indicate that we are on the delayed allocation path. This | ||
| 1684 | * affects functions in many different parts of the allocation | ||
| 1685 | * call path. This flag exists primarily because we don't | ||
| 1686 | * want to change *many* call functions, so ext4_map_blocks() | ||
| 1687 | * will set the EXT4_STATE_DELALLOC_RESERVED flag once the | ||
| 1688 | * inode's allocation semaphore is taken. | ||
| 1689 | * | ||
| 1690 | * If the blocks in questions were delalloc blocks, set | ||
| 1691 | * EXT4_GET_BLOCKS_DELALLOC_RESERVE so the delalloc accounting | ||
| 1692 | * variables are updated after the blocks have been allocated. | ||
| 1693 | */ | ||
| 1694 | map.m_lblk = next; | ||
| 1695 | map.m_len = max_blocks; | ||
| 1696 | /* | ||
| 1697 | * We're in delalloc path and it is possible that we're going to | ||
| 1698 | * need more metadata blocks than previously reserved. However | ||
| 1699 | * we must not fail because we're in writeback and there is | ||
| 1700 | * nothing we can do about it so it might result in data loss. | ||
| 1701 | * So use reserved blocks to allocate metadata if possible. | ||
| 1702 | */ | ||
| 1703 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | | ||
| 1704 | EXT4_GET_BLOCKS_METADATA_NOFAIL; | ||
| 1705 | if (ext4_should_dioread_nolock(mpd->inode)) | ||
| 1706 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
| 1707 | if (mpd->b_state & (1 << BH_Delay)) | ||
| 1708 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
| 1709 | |||
| 1710 | |||
| 1711 | blks = ext4_map_blocks(handle, mpd->inode, &map, get_blocks_flags); | ||
| 1712 | if (blks < 0) { | ||
| 1713 | struct super_block *sb = mpd->inode->i_sb; | ||
| 1714 | |||
| 1715 | err = blks; | ||
| 1716 | /* | ||
| 1717 | * If get block returns EAGAIN or ENOSPC and there | ||
| 1718 | * appears to be free blocks we will just let | ||
| 1719 | * mpage_da_submit_io() unlock all of the pages. | ||
| 1720 | */ | ||
| 1721 | if (err == -EAGAIN) | ||
| 1722 | goto submit_io; | ||
| 1723 | |||
| 1724 | if (err == -ENOSPC && ext4_count_free_clusters(sb)) { | ||
| 1725 | mpd->retval = err; | ||
| 1726 | goto submit_io; | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | /* | ||
| 1730 | * get block failure will cause us to loop in | ||
| 1731 | * writepages, because a_ops->writepage won't be able | ||
| 1732 | * to make progress. The page will be redirtied by | ||
| 1733 | * writepage and writepages will again try to write | ||
| 1734 | * the same. | ||
| 1735 | */ | ||
| 1736 | if (!(EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) { | ||
| 1737 | ext4_msg(sb, KERN_CRIT, | ||
| 1738 | "delayed block allocation failed for inode %lu " | ||
| 1739 | "at logical offset %llu with max blocks %zd " | ||
| 1740 | "with error %d", mpd->inode->i_ino, | ||
| 1741 | (unsigned long long) next, | ||
| 1742 | mpd->b_size >> mpd->inode->i_blkbits, err); | ||
| 1743 | ext4_msg(sb, KERN_CRIT, | ||
| 1744 | "This should not happen!! Data will be lost"); | ||
| 1745 | if (err == -ENOSPC) | ||
| 1746 | ext4_print_free_blocks(mpd->inode); | ||
| 1747 | } | ||
| 1748 | /* invalidate all the pages */ | ||
| 1749 | ext4_da_block_invalidatepages(mpd); | ||
| 1750 | |||
| 1751 | /* Mark this page range as having been completed */ | ||
| 1752 | mpd->io_done = 1; | ||
| 1753 | return; | ||
| 1754 | } | ||
| 1755 | BUG_ON(blks == 0); | ||
| 1756 | |||
| 1757 | mapp = ↦ | ||
| 1758 | if (map.m_flags & EXT4_MAP_NEW) { | ||
| 1759 | struct block_device *bdev = mpd->inode->i_sb->s_bdev; | ||
| 1760 | int i; | ||
| 1761 | |||
| 1762 | for (i = 0; i < map.m_len; i++) | ||
| 1763 | unmap_underlying_metadata(bdev, map.m_pblk + i); | ||
| 1764 | } | ||
| 1765 | |||
| 1766 | /* | ||
| 1767 | * Update on-disk size along with block allocation. | ||
| 1768 | */ | ||
| 1769 | disksize = ((loff_t) next + blks) << mpd->inode->i_blkbits; | ||
| 1770 | if (disksize > i_size_read(mpd->inode)) | ||
| 1771 | disksize = i_size_read(mpd->inode); | ||
| 1772 | if (disksize > EXT4_I(mpd->inode)->i_disksize) { | ||
| 1773 | ext4_update_i_disksize(mpd->inode, disksize); | ||
| 1774 | err = ext4_mark_inode_dirty(handle, mpd->inode); | ||
| 1775 | if (err) | ||
| 1776 | ext4_error(mpd->inode->i_sb, | ||
| 1777 | "Failed to mark inode %lu dirty", | ||
| 1778 | mpd->inode->i_ino); | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | submit_io: | ||
| 1782 | mpage_da_submit_io(mpd, mapp); | ||
| 1783 | mpd->io_done = 1; | ||
| 1784 | } | ||
| 1785 | |||
| 1786 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | ||
| 1787 | (1 << BH_Delay) | (1 << BH_Unwritten)) | ||
| 1788 | |||
| 1789 | /* | ||
| 1790 | * mpage_add_bh_to_extent - try to add one more block to extent of blocks | ||
| 1791 | * | ||
| 1792 | * @mpd->lbh - extent of blocks | ||
| 1793 | * @logical - logical number of the block in the file | ||
| 1794 | * @b_state - b_state of the buffer head added | ||
| 1795 | * | ||
| 1796 | * the function is used to collect contig. blocks in same state | ||
| 1797 | */ | ||
| 1798 | static void mpage_add_bh_to_extent(struct mpage_da_data *mpd, sector_t logical, | ||
| 1799 | unsigned long b_state) | ||
| 1800 | { | ||
| 1801 | sector_t next; | ||
| 1802 | int blkbits = mpd->inode->i_blkbits; | ||
| 1803 | int nrblocks = mpd->b_size >> blkbits; | ||
| 1804 | |||
| 1805 | /* | ||
| 1806 | * XXX Don't go larger than mballoc is willing to allocate | ||
| 1807 | * This is a stopgap solution. We eventually need to fold | ||
| 1808 | * mpage_da_submit_io() into this function and then call | ||
| 1809 | * ext4_map_blocks() multiple times in a loop | ||
| 1810 | */ | ||
| 1811 | if (nrblocks >= (8*1024*1024 >> blkbits)) | ||
| 1812 | goto flush_it; | ||
| 1813 | |||
| 1814 | /* check if the reserved journal credits might overflow */ | ||
| 1815 | if (!ext4_test_inode_flag(mpd->inode, EXT4_INODE_EXTENTS)) { | ||
| 1816 | if (nrblocks >= EXT4_MAX_TRANS_DATA) { | ||
| 1817 | /* | ||
| 1818 | * With non-extent format we are limited by the journal | ||
| 1819 | * credit available. Total credit needed to insert | ||
| 1820 | * nrblocks contiguous blocks is dependent on the | ||
| 1821 | * nrblocks. So limit nrblocks. | ||
| 1822 | */ | ||
| 1823 | goto flush_it; | ||
| 1824 | } | ||
| 1825 | } | ||
| 1826 | /* | ||
| 1827 | * First block in the extent | ||
| 1828 | */ | ||
| 1829 | if (mpd->b_size == 0) { | ||
| 1830 | mpd->b_blocknr = logical; | ||
| 1831 | mpd->b_size = 1 << blkbits; | ||
| 1832 | mpd->b_state = b_state & BH_FLAGS; | ||
| 1833 | return; | ||
| 1834 | } | ||
| 1835 | |||
| 1836 | next = mpd->b_blocknr + nrblocks; | ||
| 1837 | /* | ||
| 1838 | * Can we merge the block to our big extent? | ||
| 1839 | */ | ||
| 1840 | if (logical == next && (b_state & BH_FLAGS) == mpd->b_state) { | ||
| 1841 | mpd->b_size += 1 << blkbits; | ||
| 1842 | return; | ||
| 1843 | } | ||
| 1844 | |||
| 1845 | flush_it: | ||
| 1846 | /* | ||
| 1847 | * We couldn't merge the block to our extent, so we | ||
| 1848 | * need to flush current extent and start new one | ||
| 1849 | */ | ||
| 1850 | mpage_da_map_and_submit(mpd); | ||
| 1851 | return; | ||
| 1852 | } | ||
| 1853 | |||
| 1854 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | 1500 | static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) |
| 1855 | { | 1501 | { |
| 1856 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); | 1502 | return (buffer_delay(bh) || buffer_unwritten(bh)) && buffer_dirty(bh); |
| @@ -1883,6 +1529,8 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock, | |||
| 1883 | "logical block %lu\n", inode->i_ino, map->m_len, | 1529 | "logical block %lu\n", inode->i_ino, map->m_len, |
| 1884 | (unsigned long) map->m_lblk); | 1530 | (unsigned long) map->m_lblk); |
| 1885 | 1531 | ||
| 1532 | ext4_es_lru_add(inode); | ||
| 1533 | |||
| 1886 | /* Lookup extent status tree firstly */ | 1534 | /* Lookup extent status tree firstly */ |
| 1887 | if (ext4_es_lookup_extent(inode, iblock, &es)) { | 1535 | if (ext4_es_lookup_extent(inode, iblock, &es)) { |
| 1888 | 1536 | ||
| @@ -2156,7 +1804,7 @@ out: | |||
| 2156 | * lock so we have to do some magic. | 1804 | * lock so we have to do some magic. |
| 2157 | * | 1805 | * |
| 2158 | * This function can get called via... | 1806 | * This function can get called via... |
| 2159 | * - ext4_da_writepages after taking page lock (have journal handle) | 1807 | * - ext4_writepages after taking page lock (have journal handle) |
| 2160 | * - journal_submit_inode_data_buffers (no journal handle) | 1808 | * - journal_submit_inode_data_buffers (no journal handle) |
| 2161 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) | 1809 | * - shrink_page_list via the kswapd/direct reclaim (no journal handle) |
| 2162 | * - grab_page_cache when doing write_begin (have journal handle) | 1810 | * - grab_page_cache when doing write_begin (have journal handle) |
| @@ -2234,76 +1882,405 @@ static int ext4_writepage(struct page *page, | |||
| 2234 | */ | 1882 | */ |
| 2235 | return __ext4_journalled_writepage(page, len); | 1883 | return __ext4_journalled_writepage(page, len); |
| 2236 | 1884 | ||
| 2237 | memset(&io_submit, 0, sizeof(io_submit)); | 1885 | ext4_io_submit_init(&io_submit, wbc); |
| 1886 | io_submit.io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
| 1887 | if (!io_submit.io_end) { | ||
| 1888 | redirty_page_for_writepage(wbc, page); | ||
| 1889 | unlock_page(page); | ||
| 1890 | return -ENOMEM; | ||
| 1891 | } | ||
| 2238 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); | 1892 | ret = ext4_bio_write_page(&io_submit, page, len, wbc); |
| 2239 | ext4_io_submit(&io_submit); | 1893 | ext4_io_submit(&io_submit); |
| 1894 | /* Drop io_end reference we got from init */ | ||
| 1895 | ext4_put_io_end_defer(io_submit.io_end); | ||
| 2240 | return ret; | 1896 | return ret; |
| 2241 | } | 1897 | } |
| 2242 | 1898 | ||
| 1899 | #define BH_FLAGS ((1 << BH_Unwritten) | (1 << BH_Delay)) | ||
| 1900 | |||
| 2243 | /* | 1901 | /* |
| 2244 | * This is called via ext4_da_writepages() to | 1902 | * mballoc gives us at most this number of blocks... |
| 2245 | * calculate the total number of credits to reserve to fit | 1903 | * XXX: That seems to be only a limitation of ext4_mb_normalize_request(). |
| 2246 | * a single extent allocation into a single transaction, | 1904 | * The rest of mballoc seems to handle chunks upto full group size. |
| 2247 | * ext4_da_writpeages() will loop calling this before | ||
| 2248 | * the block allocation. | ||
| 2249 | */ | 1905 | */ |
| 1906 | #define MAX_WRITEPAGES_EXTENT_LEN 2048 | ||
| 2250 | 1907 | ||
| 2251 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | 1908 | /* |
| 1909 | * mpage_add_bh_to_extent - try to add bh to extent of blocks to map | ||
| 1910 | * | ||
| 1911 | * @mpd - extent of blocks | ||
| 1912 | * @lblk - logical number of the block in the file | ||
| 1913 | * @b_state - b_state of the buffer head added | ||
| 1914 | * | ||
| 1915 | * the function is used to collect contig. blocks in same state | ||
| 1916 | */ | ||
| 1917 | static int mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk, | ||
| 1918 | unsigned long b_state) | ||
| 1919 | { | ||
| 1920 | struct ext4_map_blocks *map = &mpd->map; | ||
| 1921 | |||
| 1922 | /* Don't go larger than mballoc is willing to allocate */ | ||
| 1923 | if (map->m_len >= MAX_WRITEPAGES_EXTENT_LEN) | ||
| 1924 | return 0; | ||
| 1925 | |||
| 1926 | /* First block in the extent? */ | ||
| 1927 | if (map->m_len == 0) { | ||
| 1928 | map->m_lblk = lblk; | ||
| 1929 | map->m_len = 1; | ||
| 1930 | map->m_flags = b_state & BH_FLAGS; | ||
| 1931 | return 1; | ||
| 1932 | } | ||
| 1933 | |||
| 1934 | /* Can we merge the block to our big extent? */ | ||
| 1935 | if (lblk == map->m_lblk + map->m_len && | ||
| 1936 | (b_state & BH_FLAGS) == map->m_flags) { | ||
| 1937 | map->m_len++; | ||
| 1938 | return 1; | ||
| 1939 | } | ||
| 1940 | return 0; | ||
| 1941 | } | ||
| 1942 | |||
| 1943 | static bool add_page_bufs_to_extent(struct mpage_da_data *mpd, | ||
| 1944 | struct buffer_head *head, | ||
| 1945 | struct buffer_head *bh, | ||
| 1946 | ext4_lblk_t lblk) | ||
| 1947 | { | ||
| 1948 | struct inode *inode = mpd->inode; | ||
| 1949 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
| 1950 | >> inode->i_blkbits; | ||
| 1951 | |||
| 1952 | do { | ||
| 1953 | BUG_ON(buffer_locked(bh)); | ||
| 1954 | |||
| 1955 | if (!buffer_dirty(bh) || !buffer_mapped(bh) || | ||
| 1956 | (!buffer_delay(bh) && !buffer_unwritten(bh)) || | ||
| 1957 | lblk >= blocks) { | ||
| 1958 | /* Found extent to map? */ | ||
| 1959 | if (mpd->map.m_len) | ||
| 1960 | return false; | ||
| 1961 | if (lblk >= blocks) | ||
| 1962 | return true; | ||
| 1963 | continue; | ||
| 1964 | } | ||
| 1965 | if (!mpage_add_bh_to_extent(mpd, lblk, bh->b_state)) | ||
| 1966 | return false; | ||
| 1967 | } while (lblk++, (bh = bh->b_this_page) != head); | ||
| 1968 | return true; | ||
| 1969 | } | ||
| 1970 | |||
| 1971 | static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page) | ||
| 2252 | { | 1972 | { |
| 2253 | int max_blocks = EXT4_I(inode)->i_reserved_data_blocks; | 1973 | int len; |
| 1974 | loff_t size = i_size_read(mpd->inode); | ||
| 1975 | int err; | ||
| 1976 | |||
| 1977 | BUG_ON(page->index != mpd->first_page); | ||
| 1978 | if (page->index == size >> PAGE_CACHE_SHIFT) | ||
| 1979 | len = size & ~PAGE_CACHE_MASK; | ||
| 1980 | else | ||
| 1981 | len = PAGE_CACHE_SIZE; | ||
| 1982 | clear_page_dirty_for_io(page); | ||
| 1983 | err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); | ||
| 1984 | if (!err) | ||
| 1985 | mpd->wbc->nr_to_write--; | ||
| 1986 | mpd->first_page++; | ||
| 2254 | 1987 | ||
| 1988 | return err; | ||
| 1989 | } | ||
| 1990 | |||
| 1991 | /* | ||
| 1992 | * mpage_map_buffers - update buffers corresponding to changed extent and | ||
| 1993 | * submit fully mapped pages for IO | ||
| 1994 | * | ||
| 1995 | * @mpd - description of extent to map, on return next extent to map | ||
| 1996 | * | ||
| 1997 | * Scan buffers corresponding to changed extent (we expect corresponding pages | ||
| 1998 | * to be already locked) and update buffer state according to new extent state. | ||
| 1999 | * We map delalloc buffers to their physical location, clear unwritten bits, | ||
| 2000 | * and mark buffers as uninit when we perform writes to uninitialized extents | ||
| 2001 | * and do extent conversion after IO is finished. If the last page is not fully | ||
| 2002 | * mapped, we update @map to the next extent in the last page that needs | ||
| 2003 | * mapping. Otherwise we submit the page for IO. | ||
| 2004 | */ | ||
| 2005 | static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd) | ||
| 2006 | { | ||
| 2007 | struct pagevec pvec; | ||
| 2008 | int nr_pages, i; | ||
| 2009 | struct inode *inode = mpd->inode; | ||
| 2010 | struct buffer_head *head, *bh; | ||
| 2011 | int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits; | ||
| 2012 | ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1) | ||
| 2013 | >> inode->i_blkbits; | ||
| 2014 | pgoff_t start, end; | ||
| 2015 | ext4_lblk_t lblk; | ||
| 2016 | sector_t pblock; | ||
| 2017 | int err; | ||
| 2018 | |||
| 2019 | start = mpd->map.m_lblk >> bpp_bits; | ||
| 2020 | end = (mpd->map.m_lblk + mpd->map.m_len - 1) >> bpp_bits; | ||
| 2021 | lblk = start << bpp_bits; | ||
| 2022 | pblock = mpd->map.m_pblk; | ||
| 2023 | |||
| 2024 | pagevec_init(&pvec, 0); | ||
| 2025 | while (start <= end) { | ||
| 2026 | nr_pages = pagevec_lookup(&pvec, inode->i_mapping, start, | ||
| 2027 | PAGEVEC_SIZE); | ||
| 2028 | if (nr_pages == 0) | ||
| 2029 | break; | ||
| 2030 | for (i = 0; i < nr_pages; i++) { | ||
| 2031 | struct page *page = pvec.pages[i]; | ||
| 2032 | |||
| 2033 | if (page->index > end) | ||
| 2034 | break; | ||
| 2035 | /* Upto 'end' pages must be contiguous */ | ||
| 2036 | BUG_ON(page->index != start); | ||
| 2037 | bh = head = page_buffers(page); | ||
| 2038 | do { | ||
| 2039 | if (lblk < mpd->map.m_lblk) | ||
| 2040 | continue; | ||
| 2041 | if (lblk >= mpd->map.m_lblk + mpd->map.m_len) { | ||
| 2042 | /* | ||
| 2043 | * Buffer after end of mapped extent. | ||
| 2044 | * Find next buffer in the page to map. | ||
| 2045 | */ | ||
| 2046 | mpd->map.m_len = 0; | ||
| 2047 | mpd->map.m_flags = 0; | ||
| 2048 | add_page_bufs_to_extent(mpd, head, bh, | ||
| 2049 | lblk); | ||
| 2050 | pagevec_release(&pvec); | ||
| 2051 | return 0; | ||
| 2052 | } | ||
| 2053 | if (buffer_delay(bh)) { | ||
| 2054 | clear_buffer_delay(bh); | ||
| 2055 | bh->b_blocknr = pblock++; | ||
| 2056 | } | ||
| 2057 | clear_buffer_unwritten(bh); | ||
| 2058 | } while (++lblk < blocks && | ||
| 2059 | (bh = bh->b_this_page) != head); | ||
| 2060 | |||
| 2061 | /* | ||
| 2062 | * FIXME: This is going to break if dioread_nolock | ||
| 2063 | * supports blocksize < pagesize as we will try to | ||
| 2064 | * convert potentially unmapped parts of inode. | ||
| 2065 | */ | ||
| 2066 | mpd->io_submit.io_end->size += PAGE_CACHE_SIZE; | ||
| 2067 | /* Page fully mapped - let IO run! */ | ||
| 2068 | err = mpage_submit_page(mpd, page); | ||
| 2069 | if (err < 0) { | ||
| 2070 | pagevec_release(&pvec); | ||
| 2071 | return err; | ||
| 2072 | } | ||
| 2073 | start++; | ||
| 2074 | } | ||
| 2075 | pagevec_release(&pvec); | ||
| 2076 | } | ||
| 2077 | /* Extent fully mapped and matches with page boundary. We are done. */ | ||
| 2078 | mpd->map.m_len = 0; | ||
| 2079 | mpd->map.m_flags = 0; | ||
| 2080 | return 0; | ||
| 2081 | } | ||
| 2082 | |||
| 2083 | static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd) | ||
| 2084 | { | ||
| 2085 | struct inode *inode = mpd->inode; | ||
| 2086 | struct ext4_map_blocks *map = &mpd->map; | ||
| 2087 | int get_blocks_flags; | ||
| 2088 | int err; | ||
| 2089 | |||
| 2090 | trace_ext4_da_write_pages_extent(inode, map); | ||
| 2255 | /* | 2091 | /* |
| 2256 | * With non-extent format the journal credit needed to | 2092 | * Call ext4_map_blocks() to allocate any delayed allocation blocks, or |
| 2257 | * insert nrblocks contiguous block is dependent on | 2093 | * to convert an uninitialized extent to be initialized (in the case |
| 2258 | * number of contiguous block. So we will limit | 2094 | * where we have written into one or more preallocated blocks). It is |
| 2259 | * number of contiguous block to a sane value | 2095 | * possible that we're going to need more metadata blocks than |
| 2096 | * previously reserved. However we must not fail because we're in | ||
| 2097 | * writeback and there is nothing we can do about it so it might result | ||
| 2098 | * in data loss. So use reserved blocks to allocate metadata if | ||
| 2099 | * possible. | ||
| 2100 | * | ||
| 2101 | * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks | ||
| 2102 | * in question are delalloc blocks. This affects functions in many | ||
| 2103 | * different parts of the allocation call path. This flag exists | ||
| 2104 | * primarily because we don't want to change *many* call functions, so | ||
| 2105 | * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag | ||
| 2106 | * once the inode's allocation semaphore is taken. | ||
| 2260 | */ | 2107 | */ |
| 2261 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) && | 2108 | get_blocks_flags = EXT4_GET_BLOCKS_CREATE | |
| 2262 | (max_blocks > EXT4_MAX_TRANS_DATA)) | 2109 | EXT4_GET_BLOCKS_METADATA_NOFAIL; |
| 2263 | max_blocks = EXT4_MAX_TRANS_DATA; | 2110 | if (ext4_should_dioread_nolock(inode)) |
| 2111 | get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; | ||
| 2112 | if (map->m_flags & (1 << BH_Delay)) | ||
| 2113 | get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; | ||
| 2264 | 2114 | ||
| 2265 | return ext4_chunk_trans_blocks(inode, max_blocks); | 2115 | err = ext4_map_blocks(handle, inode, map, get_blocks_flags); |
| 2116 | if (err < 0) | ||
| 2117 | return err; | ||
| 2118 | if (map->m_flags & EXT4_MAP_UNINIT) { | ||
| 2119 | if (!mpd->io_submit.io_end->handle && | ||
| 2120 | ext4_handle_valid(handle)) { | ||
| 2121 | mpd->io_submit.io_end->handle = handle->h_rsv_handle; | ||
| 2122 | handle->h_rsv_handle = NULL; | ||
| 2123 | } | ||
| 2124 | ext4_set_io_unwritten_flag(inode, mpd->io_submit.io_end); | ||
| 2125 | } | ||
| 2126 | |||
| 2127 | BUG_ON(map->m_len == 0); | ||
| 2128 | if (map->m_flags & EXT4_MAP_NEW) { | ||
| 2129 | struct block_device *bdev = inode->i_sb->s_bdev; | ||
| 2130 | int i; | ||
| 2131 | |||
| 2132 | for (i = 0; i < map->m_len; i++) | ||
| 2133 | unmap_underlying_metadata(bdev, map->m_pblk + i); | ||
| 2134 | } | ||
| 2135 | return 0; | ||
| 2266 | } | 2136 | } |
| 2267 | 2137 | ||
| 2268 | /* | 2138 | /* |
| 2269 | * write_cache_pages_da - walk the list of dirty pages of the given | 2139 | * mpage_map_and_submit_extent - map extent starting at mpd->lblk of length |
| 2270 | * address space and accumulate pages that need writing, and call | 2140 | * mpd->len and submit pages underlying it for IO |
| 2271 | * mpage_da_map_and_submit to map a single contiguous memory region | 2141 | * |
| 2272 | * and then write them. | 2142 | * @handle - handle for journal operations |
| 2143 | * @mpd - extent to map | ||
| 2144 | * | ||
| 2145 | * The function maps extent starting at mpd->lblk of length mpd->len. If it is | ||
| 2146 | * delayed, blocks are allocated, if it is unwritten, we may need to convert | ||
| 2147 | * them to initialized or split the described range from larger unwritten | ||
| 2148 | * extent. Note that we need not map all the described range since allocation | ||
| 2149 | * can return less blocks or the range is covered by more unwritten extents. We | ||
| 2150 | * cannot map more because we are limited by reserved transaction credits. On | ||
| 2151 | * the other hand we always make sure that the last touched page is fully | ||
| 2152 | * mapped so that it can be written out (and thus forward progress is | ||
| 2153 | * guaranteed). After mapping we submit all mapped pages for IO. | ||
| 2273 | */ | 2154 | */ |
| 2274 | static int write_cache_pages_da(handle_t *handle, | 2155 | static int mpage_map_and_submit_extent(handle_t *handle, |
| 2275 | struct address_space *mapping, | 2156 | struct mpage_da_data *mpd, |
| 2276 | struct writeback_control *wbc, | 2157 | bool *give_up_on_write) |
| 2277 | struct mpage_da_data *mpd, | ||
| 2278 | pgoff_t *done_index) | ||
| 2279 | { | 2158 | { |
| 2280 | struct buffer_head *bh, *head; | 2159 | struct inode *inode = mpd->inode; |
| 2281 | struct inode *inode = mapping->host; | 2160 | struct ext4_map_blocks *map = &mpd->map; |
| 2282 | struct pagevec pvec; | 2161 | int err; |
| 2283 | unsigned int nr_pages; | 2162 | loff_t disksize; |
| 2284 | sector_t logical; | ||
| 2285 | pgoff_t index, end; | ||
| 2286 | long nr_to_write = wbc->nr_to_write; | ||
| 2287 | int i, tag, ret = 0; | ||
| 2288 | |||
| 2289 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
| 2290 | mpd->wbc = wbc; | ||
| 2291 | mpd->inode = inode; | ||
| 2292 | pagevec_init(&pvec, 0); | ||
| 2293 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | ||
| 2294 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | ||
| 2295 | 2163 | ||
| 2296 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2164 | mpd->io_submit.io_end->offset = |
| 2165 | ((loff_t)map->m_lblk) << inode->i_blkbits; | ||
| 2166 | while (map->m_len) { | ||
| 2167 | err = mpage_map_one_extent(handle, mpd); | ||
| 2168 | if (err < 0) { | ||
| 2169 | struct super_block *sb = inode->i_sb; | ||
| 2170 | |||
| 2171 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | ||
| 2172 | goto invalidate_dirty_pages; | ||
| 2173 | /* | ||
| 2174 | * Let the uper layers retry transient errors. | ||
| 2175 | * In the case of ENOSPC, if ext4_count_free_blocks() | ||
| 2176 | * is non-zero, a commit should free up blocks. | ||
| 2177 | */ | ||
| 2178 | if ((err == -ENOMEM) || | ||
| 2179 | (err == -ENOSPC && ext4_count_free_clusters(sb))) | ||
| 2180 | return err; | ||
| 2181 | ext4_msg(sb, KERN_CRIT, | ||
| 2182 | "Delayed block allocation failed for " | ||
| 2183 | "inode %lu at logical offset %llu with" | ||
| 2184 | " max blocks %u with error %d", | ||
| 2185 | inode->i_ino, | ||
| 2186 | (unsigned long long)map->m_lblk, | ||
| 2187 | (unsigned)map->m_len, -err); | ||
| 2188 | ext4_msg(sb, KERN_CRIT, | ||
| 2189 | "This should not happen!! Data will " | ||
| 2190 | "be lost\n"); | ||
| 2191 | if (err == -ENOSPC) | ||
| 2192 | ext4_print_free_blocks(inode); | ||
| 2193 | invalidate_dirty_pages: | ||
| 2194 | *give_up_on_write = true; | ||
| 2195 | return err; | ||
| 2196 | } | ||
| 2197 | /* | ||
| 2198 | * Update buffer state, submit mapped pages, and get us new | ||
| 2199 | * extent to map | ||
| 2200 | */ | ||
| 2201 | err = mpage_map_and_submit_buffers(mpd); | ||
| 2202 | if (err < 0) | ||
| 2203 | return err; | ||
| 2204 | } | ||
| 2205 | |||
| 2206 | /* Update on-disk size after IO is submitted */ | ||
| 2207 | disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; | ||
| 2208 | if (disksize > i_size_read(inode)) | ||
| 2209 | disksize = i_size_read(inode); | ||
| 2210 | if (disksize > EXT4_I(inode)->i_disksize) { | ||
| 2211 | int err2; | ||
| 2212 | |||
| 2213 | ext4_update_i_disksize(inode, disksize); | ||
| 2214 | err2 = ext4_mark_inode_dirty(handle, inode); | ||
| 2215 | if (err2) | ||
| 2216 | ext4_error(inode->i_sb, | ||
| 2217 | "Failed to mark inode %lu dirty", | ||
| 2218 | inode->i_ino); | ||
| 2219 | if (!err) | ||
| 2220 | err = err2; | ||
| 2221 | } | ||
| 2222 | return err; | ||
| 2223 | } | ||
| 2224 | |||
| 2225 | /* | ||
| 2226 | * Calculate the total number of credits to reserve for one writepages | ||
| 2227 | * iteration. This is called from ext4_writepages(). We map an extent of | ||
| 2228 | * upto MAX_WRITEPAGES_EXTENT_LEN blocks and then we go on and finish mapping | ||
| 2229 | * the last partial page. So in total we can map MAX_WRITEPAGES_EXTENT_LEN + | ||
| 2230 | * bpp - 1 blocks in bpp different extents. | ||
| 2231 | */ | ||
| 2232 | static int ext4_da_writepages_trans_blocks(struct inode *inode) | ||
| 2233 | { | ||
| 2234 | int bpp = ext4_journal_blocks_per_page(inode); | ||
| 2235 | |||
| 2236 | return ext4_meta_trans_blocks(inode, | ||
| 2237 | MAX_WRITEPAGES_EXTENT_LEN + bpp - 1, bpp); | ||
| 2238 | } | ||
| 2239 | |||
| 2240 | /* | ||
| 2241 | * mpage_prepare_extent_to_map - find & lock contiguous range of dirty pages | ||
| 2242 | * and underlying extent to map | ||
| 2243 | * | ||
| 2244 | * @mpd - where to look for pages | ||
| 2245 | * | ||
| 2246 | * Walk dirty pages in the mapping. If they are fully mapped, submit them for | ||
| 2247 | * IO immediately. When we find a page which isn't mapped we start accumulating | ||
| 2248 | * extent of buffers underlying these pages that needs mapping (formed by | ||
| 2249 | * either delayed or unwritten buffers). We also lock the pages containing | ||
| 2250 | * these buffers. The extent found is returned in @mpd structure (starting at | ||
| 2251 | * mpd->lblk with length mpd->len blocks). | ||
| 2252 | * | ||
| 2253 | * Note that this function can attach bios to one io_end structure which are | ||
| 2254 | * neither logically nor physically contiguous. Although it may seem as an | ||
| 2255 | * unnecessary complication, it is actually inevitable in blocksize < pagesize | ||
| 2256 | * case as we need to track IO to all buffers underlying a page in one io_end. | ||
| 2257 | */ | ||
| 2258 | static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) | ||
| 2259 | { | ||
| 2260 | struct address_space *mapping = mpd->inode->i_mapping; | ||
| 2261 | struct pagevec pvec; | ||
| 2262 | unsigned int nr_pages; | ||
| 2263 | pgoff_t index = mpd->first_page; | ||
| 2264 | pgoff_t end = mpd->last_page; | ||
| 2265 | int tag; | ||
| 2266 | int i, err = 0; | ||
| 2267 | int blkbits = mpd->inode->i_blkbits; | ||
| 2268 | ext4_lblk_t lblk; | ||
| 2269 | struct buffer_head *head; | ||
| 2270 | |||
| 2271 | if (mpd->wbc->sync_mode == WB_SYNC_ALL || mpd->wbc->tagged_writepages) | ||
| 2297 | tag = PAGECACHE_TAG_TOWRITE; | 2272 | tag = PAGECACHE_TAG_TOWRITE; |
| 2298 | else | 2273 | else |
| 2299 | tag = PAGECACHE_TAG_DIRTY; | 2274 | tag = PAGECACHE_TAG_DIRTY; |
| 2300 | 2275 | ||
| 2301 | *done_index = index; | 2276 | pagevec_init(&pvec, 0); |
| 2277 | mpd->map.m_len = 0; | ||
| 2278 | mpd->next_page = index; | ||
| 2302 | while (index <= end) { | 2279 | while (index <= end) { |
| 2303 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2280 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
| 2304 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2281 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
| 2305 | if (nr_pages == 0) | 2282 | if (nr_pages == 0) |
| 2306 | return 0; | 2283 | goto out; |
| 2307 | 2284 | ||
| 2308 | for (i = 0; i < nr_pages; i++) { | 2285 | for (i = 0; i < nr_pages; i++) { |
| 2309 | struct page *page = pvec.pages[i]; | 2286 | struct page *page = pvec.pages[i]; |
| @@ -2318,31 +2295,21 @@ static int write_cache_pages_da(handle_t *handle, | |||
| 2318 | if (page->index > end) | 2295 | if (page->index > end) |
| 2319 | goto out; | 2296 | goto out; |
| 2320 | 2297 | ||
| 2321 | *done_index = page->index + 1; | 2298 | /* If we can't merge this page, we are done. */ |
| 2322 | 2299 | if (mpd->map.m_len > 0 && mpd->next_page != page->index) | |
| 2323 | /* | 2300 | goto out; |
| 2324 | * If we can't merge this page, and we have | ||
| 2325 | * accumulated an contiguous region, write it | ||
| 2326 | */ | ||
| 2327 | if ((mpd->next_page != page->index) && | ||
| 2328 | (mpd->next_page != mpd->first_page)) { | ||
| 2329 | mpage_da_map_and_submit(mpd); | ||
| 2330 | goto ret_extent_tail; | ||
| 2331 | } | ||
| 2332 | 2301 | ||
| 2333 | lock_page(page); | 2302 | lock_page(page); |
| 2334 | |||
| 2335 | /* | 2303 | /* |
| 2336 | * If the page is no longer dirty, or its | 2304 | * If the page is no longer dirty, or its mapping no |
| 2337 | * mapping no longer corresponds to inode we | 2305 | * longer corresponds to inode we are writing (which |
| 2338 | * are writing (which means it has been | 2306 | * means it has been truncated or invalidated), or the |
| 2339 | * truncated or invalidated), or the page is | 2307 | * page is already under writeback and we are not doing |
| 2340 | * already under writeback and we are not | 2308 | * a data integrity writeback, skip the page |
| 2341 | * doing a data integrity writeback, skip the page | ||
| 2342 | */ | 2309 | */ |
| 2343 | if (!PageDirty(page) || | 2310 | if (!PageDirty(page) || |
| 2344 | (PageWriteback(page) && | 2311 | (PageWriteback(page) && |
| 2345 | (wbc->sync_mode == WB_SYNC_NONE)) || | 2312 | (mpd->wbc->sync_mode == WB_SYNC_NONE)) || |
| 2346 | unlikely(page->mapping != mapping)) { | 2313 | unlikely(page->mapping != mapping)) { |
| 2347 | unlock_page(page); | 2314 | unlock_page(page); |
| 2348 | continue; | 2315 | continue; |
| @@ -2351,106 +2318,70 @@ static int write_cache_pages_da(handle_t *handle, | |||
| 2351 | wait_on_page_writeback(page); | 2318 | wait_on_page_writeback(page); |
| 2352 | BUG_ON(PageWriteback(page)); | 2319 | BUG_ON(PageWriteback(page)); |
| 2353 | 2320 | ||
| 2354 | /* | 2321 | if (mpd->map.m_len == 0) |
| 2355 | * If we have inline data and arrive here, it means that | ||
| 2356 | * we will soon create the block for the 1st page, so | ||
| 2357 | * we'd better clear the inline data here. | ||
| 2358 | */ | ||
| 2359 | if (ext4_has_inline_data(inode)) { | ||
| 2360 | BUG_ON(ext4_test_inode_state(inode, | ||
| 2361 | EXT4_STATE_MAY_INLINE_DATA)); | ||
| 2362 | ext4_destroy_inline_data(handle, inode); | ||
| 2363 | } | ||
| 2364 | |||
| 2365 | if (mpd->next_page != page->index) | ||
| 2366 | mpd->first_page = page->index; | 2322 | mpd->first_page = page->index; |
| 2367 | mpd->next_page = page->index + 1; | 2323 | mpd->next_page = page->index + 1; |
| 2368 | logical = (sector_t) page->index << | ||
| 2369 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 2370 | |||
| 2371 | /* Add all dirty buffers to mpd */ | 2324 | /* Add all dirty buffers to mpd */ |
| 2325 | lblk = ((ext4_lblk_t)page->index) << | ||
| 2326 | (PAGE_CACHE_SHIFT - blkbits); | ||
| 2372 | head = page_buffers(page); | 2327 | head = page_buffers(page); |
| 2373 | bh = head; | 2328 | if (!add_page_bufs_to_extent(mpd, head, head, lblk)) |
| 2374 | do { | 2329 | goto out; |
| 2375 | BUG_ON(buffer_locked(bh)); | 2330 | /* So far everything mapped? Submit the page for IO. */ |
| 2376 | /* | 2331 | if (mpd->map.m_len == 0) { |
| 2377 | * We need to try to allocate unmapped blocks | 2332 | err = mpage_submit_page(mpd, page); |
| 2378 | * in the same page. Otherwise we won't make | 2333 | if (err < 0) |
| 2379 | * progress with the page in ext4_writepage | ||
| 2380 | */ | ||
| 2381 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
| 2382 | mpage_add_bh_to_extent(mpd, logical, | ||
| 2383 | bh->b_state); | ||
| 2384 | if (mpd->io_done) | ||
| 2385 | goto ret_extent_tail; | ||
| 2386 | } else if (buffer_dirty(bh) && | ||
| 2387 | buffer_mapped(bh)) { | ||
| 2388 | /* | ||
| 2389 | * mapped dirty buffer. We need to | ||
| 2390 | * update the b_state because we look | ||
| 2391 | * at b_state in mpage_da_map_blocks. | ||
| 2392 | * We don't update b_size because if we | ||
| 2393 | * find an unmapped buffer_head later | ||
| 2394 | * we need to use the b_state flag of | ||
| 2395 | * that buffer_head. | ||
| 2396 | */ | ||
| 2397 | if (mpd->b_size == 0) | ||
| 2398 | mpd->b_state = | ||
| 2399 | bh->b_state & BH_FLAGS; | ||
| 2400 | } | ||
| 2401 | logical++; | ||
| 2402 | } while ((bh = bh->b_this_page) != head); | ||
| 2403 | |||
| 2404 | if (nr_to_write > 0) { | ||
| 2405 | nr_to_write--; | ||
| 2406 | if (nr_to_write == 0 && | ||
| 2407 | wbc->sync_mode == WB_SYNC_NONE) | ||
| 2408 | /* | ||
| 2409 | * We stop writing back only if we are | ||
| 2410 | * not doing integrity sync. In case of | ||
| 2411 | * integrity sync we have to keep going | ||
| 2412 | * because someone may be concurrently | ||
| 2413 | * dirtying pages, and we might have | ||
| 2414 | * synced a lot of newly appeared dirty | ||
| 2415 | * pages, but have not synced all of the | ||
| 2416 | * old dirty pages. | ||
| 2417 | */ | ||
| 2418 | goto out; | 2334 | goto out; |
| 2419 | } | 2335 | } |
| 2336 | |||
| 2337 | /* | ||
| 2338 | * Accumulated enough dirty pages? This doesn't apply | ||
| 2339 | * to WB_SYNC_ALL mode. For integrity sync we have to | ||
| 2340 | * keep going because someone may be concurrently | ||
| 2341 | * dirtying pages, and we might have synced a lot of | ||
| 2342 | * newly appeared dirty pages, but have not synced all | ||
| 2343 | * of the old dirty pages. | ||
| 2344 | */ | ||
| 2345 | if (mpd->wbc->sync_mode == WB_SYNC_NONE && | ||
| 2346 | mpd->next_page - mpd->first_page >= | ||
| 2347 | mpd->wbc->nr_to_write) | ||
| 2348 | goto out; | ||
| 2420 | } | 2349 | } |
| 2421 | pagevec_release(&pvec); | 2350 | pagevec_release(&pvec); |
| 2422 | cond_resched(); | 2351 | cond_resched(); |
| 2423 | } | 2352 | } |
| 2424 | return 0; | 2353 | return 0; |
| 2425 | ret_extent_tail: | ||
| 2426 | ret = MPAGE_DA_EXTENT_TAIL; | ||
| 2427 | out: | 2354 | out: |
| 2428 | pagevec_release(&pvec); | 2355 | pagevec_release(&pvec); |
| 2429 | cond_resched(); | 2356 | return err; |
| 2430 | return ret; | ||
| 2431 | } | 2357 | } |
| 2432 | 2358 | ||
| 2359 | static int __writepage(struct page *page, struct writeback_control *wbc, | ||
| 2360 | void *data) | ||
| 2361 | { | ||
| 2362 | struct address_space *mapping = data; | ||
| 2363 | int ret = ext4_writepage(page, wbc); | ||
| 2364 | mapping_set_error(mapping, ret); | ||
| 2365 | return ret; | ||
| 2366 | } | ||
| 2433 | 2367 | ||
| 2434 | static int ext4_da_writepages(struct address_space *mapping, | 2368 | static int ext4_writepages(struct address_space *mapping, |
| 2435 | struct writeback_control *wbc) | 2369 | struct writeback_control *wbc) |
| 2436 | { | 2370 | { |
| 2437 | pgoff_t index; | 2371 | pgoff_t writeback_index = 0; |
| 2372 | long nr_to_write = wbc->nr_to_write; | ||
| 2438 | int range_whole = 0; | 2373 | int range_whole = 0; |
| 2374 | int cycled = 1; | ||
| 2439 | handle_t *handle = NULL; | 2375 | handle_t *handle = NULL; |
| 2440 | struct mpage_da_data mpd; | 2376 | struct mpage_da_data mpd; |
| 2441 | struct inode *inode = mapping->host; | 2377 | struct inode *inode = mapping->host; |
| 2442 | int pages_written = 0; | 2378 | int needed_blocks, rsv_blocks = 0, ret = 0; |
| 2443 | unsigned int max_pages; | ||
| 2444 | int range_cyclic, cycled = 1, io_done = 0; | ||
| 2445 | int needed_blocks, ret = 0; | ||
| 2446 | long desired_nr_to_write, nr_to_writebump = 0; | ||
| 2447 | loff_t range_start = wbc->range_start; | ||
| 2448 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2379 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2449 | pgoff_t done_index = 0; | 2380 | bool done; |
| 2450 | pgoff_t end; | ||
| 2451 | struct blk_plug plug; | 2381 | struct blk_plug plug; |
| 2382 | bool give_up_on_write = false; | ||
| 2452 | 2383 | ||
| 2453 | trace_ext4_da_writepages(inode, wbc); | 2384 | trace_ext4_writepages(inode, wbc); |
| 2454 | 2385 | ||
| 2455 | /* | 2386 | /* |
| 2456 | * No pages to write? This is mainly a kludge to avoid starting | 2387 | * No pages to write? This is mainly a kludge to avoid starting |
| @@ -2460,164 +2391,165 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2460 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) | 2391 | if (!mapping->nrpages || !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) |
| 2461 | return 0; | 2392 | return 0; |
| 2462 | 2393 | ||
| 2394 | if (ext4_should_journal_data(inode)) { | ||
| 2395 | struct blk_plug plug; | ||
| 2396 | int ret; | ||
| 2397 | |||
| 2398 | blk_start_plug(&plug); | ||
| 2399 | ret = write_cache_pages(mapping, wbc, __writepage, mapping); | ||
| 2400 | blk_finish_plug(&plug); | ||
| 2401 | return ret; | ||
| 2402 | } | ||
| 2403 | |||
| 2463 | /* | 2404 | /* |
| 2464 | * If the filesystem has aborted, it is read-only, so return | 2405 | * If the filesystem has aborted, it is read-only, so return |
| 2465 | * right away instead of dumping stack traces later on that | 2406 | * right away instead of dumping stack traces later on that |
| 2466 | * will obscure the real source of the problem. We test | 2407 | * will obscure the real source of the problem. We test |
| 2467 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because | 2408 | * EXT4_MF_FS_ABORTED instead of sb->s_flag's MS_RDONLY because |
| 2468 | * the latter could be true if the filesystem is mounted | 2409 | * the latter could be true if the filesystem is mounted |
| 2469 | * read-only, and in that case, ext4_da_writepages should | 2410 | * read-only, and in that case, ext4_writepages should |
| 2470 | * *never* be called, so if that ever happens, we would want | 2411 | * *never* be called, so if that ever happens, we would want |
| 2471 | * the stack trace. | 2412 | * the stack trace. |
| 2472 | */ | 2413 | */ |
| 2473 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) | 2414 | if (unlikely(sbi->s_mount_flags & EXT4_MF_FS_ABORTED)) |
| 2474 | return -EROFS; | 2415 | return -EROFS; |
| 2475 | 2416 | ||
| 2417 | if (ext4_should_dioread_nolock(inode)) { | ||
| 2418 | /* | ||
| 2419 | * We may need to convert upto one extent per block in | ||
| 2420 | * the page and we may dirty the inode. | ||
| 2421 | */ | ||
| 2422 | rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits); | ||
| 2423 | } | ||
| 2424 | |||
| 2425 | /* | ||
| 2426 | * If we have inline data and arrive here, it means that | ||
| 2427 | * we will soon create the block for the 1st page, so | ||
| 2428 | * we'd better clear the inline data here. | ||
| 2429 | */ | ||
| 2430 | if (ext4_has_inline_data(inode)) { | ||
| 2431 | /* Just inode will be modified... */ | ||
| 2432 | handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); | ||
| 2433 | if (IS_ERR(handle)) { | ||
| 2434 | ret = PTR_ERR(handle); | ||
| 2435 | goto out_writepages; | ||
| 2436 | } | ||
| 2437 | BUG_ON(ext4_test_inode_state(inode, | ||
| 2438 | EXT4_STATE_MAY_INLINE_DATA)); | ||
| 2439 | ext4_destroy_inline_data(handle, inode); | ||
| 2440 | ext4_journal_stop(handle); | ||
| 2441 | } | ||
| 2442 | |||
| 2476 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | 2443 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) |
| 2477 | range_whole = 1; | 2444 | range_whole = 1; |
| 2478 | 2445 | ||
| 2479 | range_cyclic = wbc->range_cyclic; | ||
| 2480 | if (wbc->range_cyclic) { | 2446 | if (wbc->range_cyclic) { |
| 2481 | index = mapping->writeback_index; | 2447 | writeback_index = mapping->writeback_index; |
| 2482 | if (index) | 2448 | if (writeback_index) |
| 2483 | cycled = 0; | 2449 | cycled = 0; |
| 2484 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2450 | mpd.first_page = writeback_index; |
| 2485 | wbc->range_end = LLONG_MAX; | 2451 | mpd.last_page = -1; |
| 2486 | wbc->range_cyclic = 0; | ||
| 2487 | end = -1; | ||
| 2488 | } else { | 2452 | } else { |
| 2489 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2453 | mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 2490 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2454 | mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT; |
| 2491 | } | ||
| 2492 | |||
| 2493 | /* | ||
| 2494 | * This works around two forms of stupidity. The first is in | ||
| 2495 | * the writeback code, which caps the maximum number of pages | ||
| 2496 | * written to be 1024 pages. This is wrong on multiple | ||
| 2497 | * levels; different architectues have a different page size, | ||
| 2498 | * which changes the maximum amount of data which gets | ||
| 2499 | * written. Secondly, 4 megabytes is way too small. XFS | ||
| 2500 | * forces this value to be 16 megabytes by multiplying | ||
| 2501 | * nr_to_write parameter by four, and then relies on its | ||
| 2502 | * allocator to allocate larger extents to make them | ||
| 2503 | * contiguous. Unfortunately this brings us to the second | ||
| 2504 | * stupidity, which is that ext4's mballoc code only allocates | ||
| 2505 | * at most 2048 blocks. So we force contiguous writes up to | ||
| 2506 | * the number of dirty blocks in the inode, or | ||
| 2507 | * sbi->max_writeback_mb_bump whichever is smaller. | ||
| 2508 | */ | ||
| 2509 | max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT); | ||
| 2510 | if (!range_cyclic && range_whole) { | ||
| 2511 | if (wbc->nr_to_write == LONG_MAX) | ||
| 2512 | desired_nr_to_write = wbc->nr_to_write; | ||
| 2513 | else | ||
| 2514 | desired_nr_to_write = wbc->nr_to_write * 8; | ||
| 2515 | } else | ||
| 2516 | desired_nr_to_write = ext4_num_dirty_pages(inode, index, | ||
| 2517 | max_pages); | ||
| 2518 | if (desired_nr_to_write > max_pages) | ||
| 2519 | desired_nr_to_write = max_pages; | ||
| 2520 | |||
| 2521 | if (wbc->nr_to_write < desired_nr_to_write) { | ||
| 2522 | nr_to_writebump = desired_nr_to_write - wbc->nr_to_write; | ||
| 2523 | wbc->nr_to_write = desired_nr_to_write; | ||
| 2524 | } | 2455 | } |
| 2525 | 2456 | ||
| 2457 | mpd.inode = inode; | ||
| 2458 | mpd.wbc = wbc; | ||
| 2459 | ext4_io_submit_init(&mpd.io_submit, wbc); | ||
| 2526 | retry: | 2460 | retry: |
| 2527 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) | 2461 | if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) |
| 2528 | tag_pages_for_writeback(mapping, index, end); | 2462 | tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page); |
| 2529 | 2463 | done = false; | |
| 2530 | blk_start_plug(&plug); | 2464 | blk_start_plug(&plug); |
| 2531 | while (!ret && wbc->nr_to_write > 0) { | 2465 | while (!done && mpd.first_page <= mpd.last_page) { |
| 2466 | /* For each extent of pages we use new io_end */ | ||
| 2467 | mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL); | ||
| 2468 | if (!mpd.io_submit.io_end) { | ||
| 2469 | ret = -ENOMEM; | ||
| 2470 | break; | ||
| 2471 | } | ||
| 2532 | 2472 | ||
| 2533 | /* | 2473 | /* |
| 2534 | * we insert one extent at a time. So we need | 2474 | * We have two constraints: We find one extent to map and we |
| 2535 | * credit needed for single extent allocation. | 2475 | * must always write out whole page (makes a difference when |
| 2536 | * journalled mode is currently not supported | 2476 | * blocksize < pagesize) so that we don't block on IO when we |
| 2537 | * by delalloc | 2477 | * try to write out the rest of the page. Journalled mode is |
| 2478 | * not supported by delalloc. | ||
| 2538 | */ | 2479 | */ |
| 2539 | BUG_ON(ext4_should_journal_data(inode)); | 2480 | BUG_ON(ext4_should_journal_data(inode)); |
| 2540 | needed_blocks = ext4_da_writepages_trans_blocks(inode); | 2481 | needed_blocks = ext4_da_writepages_trans_blocks(inode); |
| 2541 | 2482 | ||
| 2542 | /* start a new transaction*/ | 2483 | /* start a new transaction */ |
| 2543 | handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, | 2484 | handle = ext4_journal_start_with_reserve(inode, |
| 2544 | needed_blocks); | 2485 | EXT4_HT_WRITE_PAGE, needed_blocks, rsv_blocks); |
| 2545 | if (IS_ERR(handle)) { | 2486 | if (IS_ERR(handle)) { |
| 2546 | ret = PTR_ERR(handle); | 2487 | ret = PTR_ERR(handle); |
| 2547 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " | 2488 | ext4_msg(inode->i_sb, KERN_CRIT, "%s: jbd2_start: " |
| 2548 | "%ld pages, ino %lu; err %d", __func__, | 2489 | "%ld pages, ino %lu; err %d", __func__, |
| 2549 | wbc->nr_to_write, inode->i_ino, ret); | 2490 | wbc->nr_to_write, inode->i_ino, ret); |
| 2550 | blk_finish_plug(&plug); | 2491 | /* Release allocated io_end */ |
| 2551 | goto out_writepages; | 2492 | ext4_put_io_end(mpd.io_submit.io_end); |
| 2493 | break; | ||
| 2552 | } | 2494 | } |
| 2553 | 2495 | ||
| 2554 | /* | 2496 | trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc); |
| 2555 | * Now call write_cache_pages_da() to find the next | 2497 | ret = mpage_prepare_extent_to_map(&mpd); |
| 2556 | * contiguous region of logical blocks that need | 2498 | if (!ret) { |
| 2557 | * blocks to be allocated by ext4 and submit them. | 2499 | if (mpd.map.m_len) |
| 2558 | */ | 2500 | ret = mpage_map_and_submit_extent(handle, &mpd, |
| 2559 | ret = write_cache_pages_da(handle, mapping, | 2501 | &give_up_on_write); |
| 2560 | wbc, &mpd, &done_index); | 2502 | else { |
| 2561 | /* | 2503 | /* |
| 2562 | * If we have a contiguous extent of pages and we | 2504 | * We scanned the whole range (or exhausted |
| 2563 | * haven't done the I/O yet, map the blocks and submit | 2505 | * nr_to_write), submitted what was mapped and |
| 2564 | * them for I/O. | 2506 | * didn't find anything needing mapping. We are |
| 2565 | */ | 2507 | * done. |
| 2566 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2508 | */ |
| 2567 | mpage_da_map_and_submit(&mpd); | 2509 | done = true; |
| 2568 | ret = MPAGE_DA_EXTENT_TAIL; | 2510 | } |
| 2569 | } | 2511 | } |
| 2570 | trace_ext4_da_write_pages(inode, &mpd); | ||
| 2571 | wbc->nr_to_write -= mpd.pages_written; | ||
| 2572 | |||
| 2573 | ext4_journal_stop(handle); | 2512 | ext4_journal_stop(handle); |
| 2574 | 2513 | /* Submit prepared bio */ | |
| 2575 | if ((mpd.retval == -ENOSPC) && sbi->s_journal) { | 2514 | ext4_io_submit(&mpd.io_submit); |
| 2576 | /* commit the transaction which would | 2515 | /* Unlock pages we didn't use */ |
| 2516 | mpage_release_unused_pages(&mpd, give_up_on_write); | ||
| 2517 | /* Drop our io_end reference we got from init */ | ||
| 2518 | ext4_put_io_end(mpd.io_submit.io_end); | ||
| 2519 | |||
| 2520 | if (ret == -ENOSPC && sbi->s_journal) { | ||
| 2521 | /* | ||
| 2522 | * Commit the transaction which would | ||
| 2577 | * free blocks released in the transaction | 2523 | * free blocks released in the transaction |
| 2578 | * and try again | 2524 | * and try again |
| 2579 | */ | 2525 | */ |
| 2580 | jbd2_journal_force_commit_nested(sbi->s_journal); | 2526 | jbd2_journal_force_commit_nested(sbi->s_journal); |
| 2581 | ret = 0; | 2527 | ret = 0; |
| 2582 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 2528 | continue; |
| 2583 | /* | 2529 | } |
| 2584 | * Got one extent now try with rest of the pages. | 2530 | /* Fatal error - ENOMEM, EIO... */ |
| 2585 | * If mpd.retval is set -EIO, journal is aborted. | 2531 | if (ret) |
| 2586 | * So we don't need to write any more. | ||
| 2587 | */ | ||
| 2588 | pages_written += mpd.pages_written; | ||
| 2589 | ret = mpd.retval; | ||
| 2590 | io_done = 1; | ||
| 2591 | } else if (wbc->nr_to_write) | ||
| 2592 | /* | ||
| 2593 | * There is no more writeout needed | ||
| 2594 | * or we requested for a noblocking writeout | ||
| 2595 | * and we found the device congested | ||
| 2596 | */ | ||
| 2597 | break; | 2532 | break; |
| 2598 | } | 2533 | } |
| 2599 | blk_finish_plug(&plug); | 2534 | blk_finish_plug(&plug); |
| 2600 | if (!io_done && !cycled) { | 2535 | if (!ret && !cycled) { |
| 2601 | cycled = 1; | 2536 | cycled = 1; |
| 2602 | index = 0; | 2537 | mpd.last_page = writeback_index - 1; |
| 2603 | wbc->range_start = index << PAGE_CACHE_SHIFT; | 2538 | mpd.first_page = 0; |
| 2604 | wbc->range_end = mapping->writeback_index - 1; | ||
| 2605 | goto retry; | 2539 | goto retry; |
| 2606 | } | 2540 | } |
| 2607 | 2541 | ||
| 2608 | /* Update index */ | 2542 | /* Update index */ |
| 2609 | wbc->range_cyclic = range_cyclic; | ||
| 2610 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | 2543 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) |
| 2611 | /* | 2544 | /* |
| 2612 | * set the writeback_index so that range_cyclic | 2545 | * Set the writeback_index so that range_cyclic |
| 2613 | * mode will write it back later | 2546 | * mode will write it back later |
| 2614 | */ | 2547 | */ |
| 2615 | mapping->writeback_index = done_index; | 2548 | mapping->writeback_index = mpd.first_page; |
| 2616 | 2549 | ||
| 2617 | out_writepages: | 2550 | out_writepages: |
| 2618 | wbc->nr_to_write -= nr_to_writebump; | 2551 | trace_ext4_writepages_result(inode, wbc, ret, |
| 2619 | wbc->range_start = range_start; | 2552 | nr_to_write - wbc->nr_to_write); |
| 2620 | trace_ext4_da_writepages_result(inode, wbc, ret, pages_written); | ||
| 2621 | return ret; | 2553 | return ret; |
| 2622 | } | 2554 | } |
| 2623 | 2555 | ||
| @@ -2829,7 +2761,8 @@ static int ext4_da_write_end(struct file *file, | |||
| 2829 | return ret ? ret : copied; | 2761 | return ret ? ret : copied; |
| 2830 | } | 2762 | } |
| 2831 | 2763 | ||
| 2832 | static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | 2764 | static void ext4_da_invalidatepage(struct page *page, unsigned int offset, |
| 2765 | unsigned int length) | ||
| 2833 | { | 2766 | { |
| 2834 | /* | 2767 | /* |
| 2835 | * Drop reserved blocks | 2768 | * Drop reserved blocks |
| @@ -2838,10 +2771,10 @@ static void ext4_da_invalidatepage(struct page *page, unsigned long offset) | |||
| 2838 | if (!page_has_buffers(page)) | 2771 | if (!page_has_buffers(page)) |
| 2839 | goto out; | 2772 | goto out; |
| 2840 | 2773 | ||
| 2841 | ext4_da_page_release_reservation(page, offset); | 2774 | ext4_da_page_release_reservation(page, offset, length); |
| 2842 | 2775 | ||
| 2843 | out: | 2776 | out: |
| 2844 | ext4_invalidatepage(page, offset); | 2777 | ext4_invalidatepage(page, offset, length); |
| 2845 | 2778 | ||
| 2846 | return; | 2779 | return; |
| 2847 | } | 2780 | } |
| @@ -2864,7 +2797,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
| 2864 | * laptop_mode, not even desirable). However, to do otherwise | 2797 | * laptop_mode, not even desirable). However, to do otherwise |
| 2865 | * would require replicating code paths in: | 2798 | * would require replicating code paths in: |
| 2866 | * | 2799 | * |
| 2867 | * ext4_da_writepages() -> | 2800 | * ext4_writepages() -> |
| 2868 | * write_cache_pages() ---> (via passed in callback function) | 2801 | * write_cache_pages() ---> (via passed in callback function) |
| 2869 | * __mpage_da_writepage() --> | 2802 | * __mpage_da_writepage() --> |
| 2870 | * mpage_add_bh_to_extent() | 2803 | * mpage_add_bh_to_extent() |
| @@ -2989,37 +2922,40 @@ ext4_readpages(struct file *file, struct address_space *mapping, | |||
| 2989 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); | 2922 | return mpage_readpages(mapping, pages, nr_pages, ext4_get_block); |
| 2990 | } | 2923 | } |
| 2991 | 2924 | ||
| 2992 | static void ext4_invalidatepage(struct page *page, unsigned long offset) | 2925 | static void ext4_invalidatepage(struct page *page, unsigned int offset, |
| 2926 | unsigned int length) | ||
| 2993 | { | 2927 | { |
| 2994 | trace_ext4_invalidatepage(page, offset); | 2928 | trace_ext4_invalidatepage(page, offset, length); |
| 2995 | 2929 | ||
| 2996 | /* No journalling happens on data buffers when this function is used */ | 2930 | /* No journalling happens on data buffers when this function is used */ |
| 2997 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); | 2931 | WARN_ON(page_has_buffers(page) && buffer_jbd(page_buffers(page))); |
| 2998 | 2932 | ||
| 2999 | block_invalidatepage(page, offset); | 2933 | block_invalidatepage(page, offset, length); |
| 3000 | } | 2934 | } |
| 3001 | 2935 | ||
| 3002 | static int __ext4_journalled_invalidatepage(struct page *page, | 2936 | static int __ext4_journalled_invalidatepage(struct page *page, |
| 3003 | unsigned long offset) | 2937 | unsigned int offset, |
| 2938 | unsigned int length) | ||
| 3004 | { | 2939 | { |
| 3005 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 2940 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
| 3006 | 2941 | ||
| 3007 | trace_ext4_journalled_invalidatepage(page, offset); | 2942 | trace_ext4_journalled_invalidatepage(page, offset, length); |
| 3008 | 2943 | ||
| 3009 | /* | 2944 | /* |
| 3010 | * If it's a full truncate we just forget about the pending dirtying | 2945 | * If it's a full truncate we just forget about the pending dirtying |
| 3011 | */ | 2946 | */ |
| 3012 | if (offset == 0) | 2947 | if (offset == 0 && length == PAGE_CACHE_SIZE) |
| 3013 | ClearPageChecked(page); | 2948 | ClearPageChecked(page); |
| 3014 | 2949 | ||
| 3015 | return jbd2_journal_invalidatepage(journal, page, offset); | 2950 | return jbd2_journal_invalidatepage(journal, page, offset, length); |
| 3016 | } | 2951 | } |
| 3017 | 2952 | ||
| 3018 | /* Wrapper for aops... */ | 2953 | /* Wrapper for aops... */ |
| 3019 | static void ext4_journalled_invalidatepage(struct page *page, | 2954 | static void ext4_journalled_invalidatepage(struct page *page, |
| 3020 | unsigned long offset) | 2955 | unsigned int offset, |
| 2956 | unsigned int length) | ||
| 3021 | { | 2957 | { |
| 3022 | WARN_ON(__ext4_journalled_invalidatepage(page, offset) < 0); | 2958 | WARN_ON(__ext4_journalled_invalidatepage(page, offset, length) < 0); |
| 3023 | } | 2959 | } |
| 3024 | 2960 | ||
| 3025 | static int ext4_releasepage(struct page *page, gfp_t wait) | 2961 | static int ext4_releasepage(struct page *page, gfp_t wait) |
| @@ -3067,9 +3003,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3067 | struct inode *inode = file_inode(iocb->ki_filp); | 3003 | struct inode *inode = file_inode(iocb->ki_filp); |
| 3068 | ext4_io_end_t *io_end = iocb->private; | 3004 | ext4_io_end_t *io_end = iocb->private; |
| 3069 | 3005 | ||
| 3070 | /* if not async direct IO or dio with 0 bytes write, just return */ | 3006 | /* if not async direct IO just return */ |
| 3071 | if (!io_end || !size) | 3007 | if (!io_end) { |
| 3072 | goto out; | 3008 | inode_dio_done(inode); |
| 3009 | if (is_async) | ||
| 3010 | aio_complete(iocb, ret, 0); | ||
| 3011 | return; | ||
| 3012 | } | ||
| 3073 | 3013 | ||
| 3074 | ext_debug("ext4_end_io_dio(): io_end 0x%p " | 3014 | ext_debug("ext4_end_io_dio(): io_end 0x%p " |
| 3075 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", | 3015 | "for inode %lu, iocb 0x%p, offset %llu, size %zd\n", |
| @@ -3077,25 +3017,13 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, | |||
| 3077 | size); | 3017 | size); |
| 3078 | 3018 | ||
| 3079 | iocb->private = NULL; | 3019 | iocb->private = NULL; |
| 3080 | |||
| 3081 | /* if not aio dio with unwritten extents, just free io and return */ | ||
| 3082 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
| 3083 | ext4_free_io_end(io_end); | ||
| 3084 | out: | ||
| 3085 | inode_dio_done(inode); | ||
| 3086 | if (is_async) | ||
| 3087 | aio_complete(iocb, ret, 0); | ||
| 3088 | return; | ||
| 3089 | } | ||
| 3090 | |||
| 3091 | io_end->offset = offset; | 3020 | io_end->offset = offset; |
| 3092 | io_end->size = size; | 3021 | io_end->size = size; |
| 3093 | if (is_async) { | 3022 | if (is_async) { |
| 3094 | io_end->iocb = iocb; | 3023 | io_end->iocb = iocb; |
| 3095 | io_end->result = ret; | 3024 | io_end->result = ret; |
| 3096 | } | 3025 | } |
| 3097 | 3026 | ext4_put_io_end_defer(io_end); | |
| 3098 | ext4_add_complete_io(io_end); | ||
| 3099 | } | 3027 | } |
| 3100 | 3028 | ||
| 3101 | /* | 3029 | /* |
| @@ -3129,6 +3057,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3129 | get_block_t *get_block_func = NULL; | 3057 | get_block_t *get_block_func = NULL; |
| 3130 | int dio_flags = 0; | 3058 | int dio_flags = 0; |
| 3131 | loff_t final_size = offset + count; | 3059 | loff_t final_size = offset + count; |
| 3060 | ext4_io_end_t *io_end = NULL; | ||
| 3132 | 3061 | ||
| 3133 | /* Use the old path for reads and writes beyond i_size. */ | 3062 | /* Use the old path for reads and writes beyond i_size. */ |
| 3134 | if (rw != WRITE || final_size > inode->i_size) | 3063 | if (rw != WRITE || final_size > inode->i_size) |
| @@ -3136,11 +3065,18 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3136 | 3065 | ||
| 3137 | BUG_ON(iocb->private == NULL); | 3066 | BUG_ON(iocb->private == NULL); |
| 3138 | 3067 | ||
| 3068 | /* | ||
| 3069 | * Make all waiters for direct IO properly wait also for extent | ||
| 3070 | * conversion. This also disallows race between truncate() and | ||
| 3071 | * overwrite DIO as i_dio_count needs to be incremented under i_mutex. | ||
| 3072 | */ | ||
| 3073 | if (rw == WRITE) | ||
| 3074 | atomic_inc(&inode->i_dio_count); | ||
| 3075 | |||
| 3139 | /* If we do a overwrite dio, i_mutex locking can be released */ | 3076 | /* If we do a overwrite dio, i_mutex locking can be released */ |
| 3140 | overwrite = *((int *)iocb->private); | 3077 | overwrite = *((int *)iocb->private); |
| 3141 | 3078 | ||
| 3142 | if (overwrite) { | 3079 | if (overwrite) { |
| 3143 | atomic_inc(&inode->i_dio_count); | ||
| 3144 | down_read(&EXT4_I(inode)->i_data_sem); | 3080 | down_read(&EXT4_I(inode)->i_data_sem); |
| 3145 | mutex_unlock(&inode->i_mutex); | 3081 | mutex_unlock(&inode->i_mutex); |
| 3146 | } | 3082 | } |
| @@ -3167,13 +3103,16 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3167 | iocb->private = NULL; | 3103 | iocb->private = NULL; |
| 3168 | ext4_inode_aio_set(inode, NULL); | 3104 | ext4_inode_aio_set(inode, NULL); |
| 3169 | if (!is_sync_kiocb(iocb)) { | 3105 | if (!is_sync_kiocb(iocb)) { |
| 3170 | ext4_io_end_t *io_end = ext4_init_io_end(inode, GFP_NOFS); | 3106 | io_end = ext4_init_io_end(inode, GFP_NOFS); |
| 3171 | if (!io_end) { | 3107 | if (!io_end) { |
| 3172 | ret = -ENOMEM; | 3108 | ret = -ENOMEM; |
| 3173 | goto retake_lock; | 3109 | goto retake_lock; |
| 3174 | } | 3110 | } |
| 3175 | io_end->flag |= EXT4_IO_END_DIRECT; | 3111 | io_end->flag |= EXT4_IO_END_DIRECT; |
| 3176 | iocb->private = io_end; | 3112 | /* |
| 3113 | * Grab reference for DIO. Will be dropped in ext4_end_io_dio() | ||
| 3114 | */ | ||
| 3115 | iocb->private = ext4_get_io_end(io_end); | ||
| 3177 | /* | 3116 | /* |
| 3178 | * we save the io structure for current async direct | 3117 | * we save the io structure for current async direct |
| 3179 | * IO, so that later ext4_map_blocks() could flag the | 3118 | * IO, so that later ext4_map_blocks() could flag the |
| @@ -3197,33 +3136,42 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3197 | NULL, | 3136 | NULL, |
| 3198 | dio_flags); | 3137 | dio_flags); |
| 3199 | 3138 | ||
| 3200 | if (iocb->private) | ||
| 3201 | ext4_inode_aio_set(inode, NULL); | ||
| 3202 | /* | 3139 | /* |
| 3203 | * The io_end structure takes a reference to the inode, that | 3140 | * Put our reference to io_end. This can free the io_end structure e.g. |
| 3204 | * structure needs to be destroyed and the reference to the | 3141 | * in sync IO case or in case of error. It can even perform extent |
| 3205 | * inode need to be dropped, when IO is complete, even with 0 | 3142 | * conversion if all bios we submitted finished before we got here. |
| 3206 | * byte write, or failed. | 3143 | * Note that in that case iocb->private can be already set to NULL |
| 3207 | * | 3144 | * here. |
| 3208 | * In the successful AIO DIO case, the io_end structure will | ||
| 3209 | * be destroyed and the reference to the inode will be dropped | ||
| 3210 | * after the end_io call back function is called. | ||
| 3211 | * | ||
| 3212 | * In the case there is 0 byte write, or error case, since VFS | ||
| 3213 | * direct IO won't invoke the end_io call back function, we | ||
| 3214 | * need to free the end_io structure here. | ||
| 3215 | */ | 3145 | */ |
| 3216 | if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { | 3146 | if (io_end) { |
| 3217 | ext4_free_io_end(iocb->private); | 3147 | ext4_inode_aio_set(inode, NULL); |
| 3218 | iocb->private = NULL; | 3148 | ext4_put_io_end(io_end); |
| 3219 | } else if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | 3149 | /* |
| 3150 | * When no IO was submitted ext4_end_io_dio() was not | ||
| 3151 | * called so we have to put iocb's reference. | ||
| 3152 | */ | ||
| 3153 | if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) { | ||
| 3154 | WARN_ON(iocb->private != io_end); | ||
| 3155 | WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
| 3156 | WARN_ON(io_end->iocb); | ||
| 3157 | /* | ||
| 3158 | * Generic code already did inode_dio_done() so we | ||
| 3159 | * have to clear EXT4_IO_END_DIRECT to not do it for | ||
| 3160 | * the second time. | ||
| 3161 | */ | ||
| 3162 | io_end->flag = 0; | ||
| 3163 | ext4_put_io_end(io_end); | ||
| 3164 | iocb->private = NULL; | ||
| 3165 | } | ||
| 3166 | } | ||
| 3167 | if (ret > 0 && !overwrite && ext4_test_inode_state(inode, | ||
| 3220 | EXT4_STATE_DIO_UNWRITTEN)) { | 3168 | EXT4_STATE_DIO_UNWRITTEN)) { |
| 3221 | int err; | 3169 | int err; |
| 3222 | /* | 3170 | /* |
| 3223 | * for non AIO case, since the IO is already | 3171 | * for non AIO case, since the IO is already |
| 3224 | * completed, we could do the conversion right here | 3172 | * completed, we could do the conversion right here |
| 3225 | */ | 3173 | */ |
| 3226 | err = ext4_convert_unwritten_extents(inode, | 3174 | err = ext4_convert_unwritten_extents(NULL, inode, |
| 3227 | offset, ret); | 3175 | offset, ret); |
| 3228 | if (err < 0) | 3176 | if (err < 0) |
| 3229 | ret = err; | 3177 | ret = err; |
| @@ -3231,9 +3179,10 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
| 3231 | } | 3179 | } |
| 3232 | 3180 | ||
| 3233 | retake_lock: | 3181 | retake_lock: |
| 3182 | if (rw == WRITE) | ||
| 3183 | inode_dio_done(inode); | ||
| 3234 | /* take i_mutex locking again if we do a ovewrite dio */ | 3184 | /* take i_mutex locking again if we do a ovewrite dio */ |
| 3235 | if (overwrite) { | 3185 | if (overwrite) { |
| 3236 | inode_dio_done(inode); | ||
| 3237 | up_read(&EXT4_I(inode)->i_data_sem); | 3186 | up_read(&EXT4_I(inode)->i_data_sem); |
| 3238 | mutex_lock(&inode->i_mutex); | 3187 | mutex_lock(&inode->i_mutex); |
| 3239 | } | 3188 | } |
| @@ -3292,6 +3241,7 @@ static const struct address_space_operations ext4_aops = { | |||
| 3292 | .readpage = ext4_readpage, | 3241 | .readpage = ext4_readpage, |
| 3293 | .readpages = ext4_readpages, | 3242 | .readpages = ext4_readpages, |
| 3294 | .writepage = ext4_writepage, | 3243 | .writepage = ext4_writepage, |
| 3244 | .writepages = ext4_writepages, | ||
| 3295 | .write_begin = ext4_write_begin, | 3245 | .write_begin = ext4_write_begin, |
| 3296 | .write_end = ext4_write_end, | 3246 | .write_end = ext4_write_end, |
| 3297 | .bmap = ext4_bmap, | 3247 | .bmap = ext4_bmap, |
| @@ -3307,6 +3257,7 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
| 3307 | .readpage = ext4_readpage, | 3257 | .readpage = ext4_readpage, |
| 3308 | .readpages = ext4_readpages, | 3258 | .readpages = ext4_readpages, |
| 3309 | .writepage = ext4_writepage, | 3259 | .writepage = ext4_writepage, |
| 3260 | .writepages = ext4_writepages, | ||
| 3310 | .write_begin = ext4_write_begin, | 3261 | .write_begin = ext4_write_begin, |
| 3311 | .write_end = ext4_journalled_write_end, | 3262 | .write_end = ext4_journalled_write_end, |
| 3312 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3263 | .set_page_dirty = ext4_journalled_set_page_dirty, |
| @@ -3322,7 +3273,7 @@ static const struct address_space_operations ext4_da_aops = { | |||
| 3322 | .readpage = ext4_readpage, | 3273 | .readpage = ext4_readpage, |
| 3323 | .readpages = ext4_readpages, | 3274 | .readpages = ext4_readpages, |
| 3324 | .writepage = ext4_writepage, | 3275 | .writepage = ext4_writepage, |
| 3325 | .writepages = ext4_da_writepages, | 3276 | .writepages = ext4_writepages, |
| 3326 | .write_begin = ext4_da_write_begin, | 3277 | .write_begin = ext4_da_write_begin, |
| 3327 | .write_end = ext4_da_write_end, | 3278 | .write_end = ext4_da_write_end, |
| 3328 | .bmap = ext4_bmap, | 3279 | .bmap = ext4_bmap, |
| @@ -3355,89 +3306,56 @@ void ext4_set_aops(struct inode *inode) | |||
| 3355 | inode->i_mapping->a_ops = &ext4_aops; | 3306 | inode->i_mapping->a_ops = &ext4_aops; |
| 3356 | } | 3307 | } |
| 3357 | 3308 | ||
| 3358 | |||
| 3359 | /* | 3309 | /* |
| 3360 | * ext4_discard_partial_page_buffers() | 3310 | * ext4_block_truncate_page() zeroes out a mapping from file offset `from' |
| 3361 | * Wrapper function for ext4_discard_partial_page_buffers_no_lock. | 3311 | * up to the end of the block which corresponds to `from'. |
| 3362 | * This function finds and locks the page containing the offset | 3312 | * This required during truncate. We need to physically zero the tail end |
| 3363 | * "from" and passes it to ext4_discard_partial_page_buffers_no_lock. | 3313 | * of that block so it doesn't yield old data if the file is later grown. |
| 3364 | * Calling functions that already have the page locked should call | ||
| 3365 | * ext4_discard_partial_page_buffers_no_lock directly. | ||
| 3366 | */ | 3314 | */ |
| 3367 | int ext4_discard_partial_page_buffers(handle_t *handle, | 3315 | int ext4_block_truncate_page(handle_t *handle, |
| 3368 | struct address_space *mapping, loff_t from, | 3316 | struct address_space *mapping, loff_t from) |
| 3369 | loff_t length, int flags) | ||
| 3370 | { | 3317 | { |
| 3318 | unsigned offset = from & (PAGE_CACHE_SIZE-1); | ||
| 3319 | unsigned length; | ||
| 3320 | unsigned blocksize; | ||
| 3371 | struct inode *inode = mapping->host; | 3321 | struct inode *inode = mapping->host; |
| 3372 | struct page *page; | ||
| 3373 | int err = 0; | ||
| 3374 | 3322 | ||
| 3375 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, | 3323 | blocksize = inode->i_sb->s_blocksize; |
| 3376 | mapping_gfp_mask(mapping) & ~__GFP_FS); | 3324 | length = blocksize - (offset & (blocksize - 1)); |
| 3377 | if (!page) | ||
| 3378 | return -ENOMEM; | ||
| 3379 | |||
| 3380 | err = ext4_discard_partial_page_buffers_no_lock(handle, inode, page, | ||
| 3381 | from, length, flags); | ||
| 3382 | 3325 | ||
| 3383 | unlock_page(page); | 3326 | return ext4_block_zero_page_range(handle, mapping, from, length); |
| 3384 | page_cache_release(page); | ||
| 3385 | return err; | ||
| 3386 | } | 3327 | } |
| 3387 | 3328 | ||
| 3388 | /* | 3329 | /* |
| 3389 | * ext4_discard_partial_page_buffers_no_lock() | 3330 | * ext4_block_zero_page_range() zeros out a mapping of length 'length' |
| 3390 | * Zeros a page range of length 'length' starting from offset 'from'. | 3331 | * starting from file offset 'from'. The range to be zero'd must |
| 3391 | * Buffer heads that correspond to the block aligned regions of the | 3332 | * be contained with in one block. If the specified range exceeds |
| 3392 | * zeroed range will be unmapped. Unblock aligned regions | 3333 | * the end of the block it will be shortened to end of the block |
| 3393 | * will have the corresponding buffer head mapped if needed so that | 3334 | * that cooresponds to 'from' |
| 3394 | * that region of the page can be updated with the partial zero out. | ||
| 3395 | * | ||
| 3396 | * This function assumes that the page has already been locked. The | ||
| 3397 | * The range to be discarded must be contained with in the given page. | ||
| 3398 | * If the specified range exceeds the end of the page it will be shortened | ||
| 3399 | * to the end of the page that corresponds to 'from'. This function is | ||
| 3400 | * appropriate for updating a page and it buffer heads to be unmapped and | ||
| 3401 | * zeroed for blocks that have been either released, or are going to be | ||
| 3402 | * released. | ||
| 3403 | * | ||
| 3404 | * handle: The journal handle | ||
| 3405 | * inode: The files inode | ||
| 3406 | * page: A locked page that contains the offset "from" | ||
| 3407 | * from: The starting byte offset (from the beginning of the file) | ||
| 3408 | * to begin discarding | ||
| 3409 | * len: The length of bytes to discard | ||
| 3410 | * flags: Optional flags that may be used: | ||
| 3411 | * | ||
| 3412 | * EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED | ||
| 3413 | * Only zero the regions of the page whose buffer heads | ||
| 3414 | * have already been unmapped. This flag is appropriate | ||
| 3415 | * for updating the contents of a page whose blocks may | ||
| 3416 | * have already been released, and we only want to zero | ||
| 3417 | * out the regions that correspond to those released blocks. | ||
| 3418 | * | ||
| 3419 | * Returns zero on success or negative on failure. | ||
| 3420 | */ | 3335 | */ |
| 3421 | static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | 3336 | int ext4_block_zero_page_range(handle_t *handle, |
| 3422 | struct inode *inode, struct page *page, loff_t from, | 3337 | struct address_space *mapping, loff_t from, loff_t length) |
| 3423 | loff_t length, int flags) | ||
| 3424 | { | 3338 | { |
| 3425 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; | 3339 | ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT; |
| 3426 | unsigned int offset = from & (PAGE_CACHE_SIZE-1); | 3340 | unsigned offset = from & (PAGE_CACHE_SIZE-1); |
| 3427 | unsigned int blocksize, max, pos; | 3341 | unsigned blocksize, max, pos; |
| 3428 | ext4_lblk_t iblock; | 3342 | ext4_lblk_t iblock; |
| 3343 | struct inode *inode = mapping->host; | ||
| 3429 | struct buffer_head *bh; | 3344 | struct buffer_head *bh; |
| 3345 | struct page *page; | ||
| 3430 | int err = 0; | 3346 | int err = 0; |
| 3431 | 3347 | ||
| 3432 | blocksize = inode->i_sb->s_blocksize; | 3348 | page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT, |
| 3433 | max = PAGE_CACHE_SIZE - offset; | 3349 | mapping_gfp_mask(mapping) & ~__GFP_FS); |
| 3350 | if (!page) | ||
| 3351 | return -ENOMEM; | ||
| 3434 | 3352 | ||
| 3435 | if (index != page->index) | 3353 | blocksize = inode->i_sb->s_blocksize; |
| 3436 | return -EINVAL; | 3354 | max = blocksize - (offset & (blocksize - 1)); |
| 3437 | 3355 | ||
| 3438 | /* | 3356 | /* |
| 3439 | * correct length if it does not fall between | 3357 | * correct length if it does not fall between |
| 3440 | * 'from' and the end of the page | 3358 | * 'from' and the end of the block |
| 3441 | */ | 3359 | */ |
| 3442 | if (length > max || length < 0) | 3360 | if (length > max || length < 0) |
| 3443 | length = max; | 3361 | length = max; |
| @@ -3455,106 +3373,91 @@ static int ext4_discard_partial_page_buffers_no_lock(handle_t *handle, | |||
| 3455 | iblock++; | 3373 | iblock++; |
| 3456 | pos += blocksize; | 3374 | pos += blocksize; |
| 3457 | } | 3375 | } |
| 3458 | 3376 | if (buffer_freed(bh)) { | |
| 3459 | pos = offset; | 3377 | BUFFER_TRACE(bh, "freed: skip"); |
| 3460 | while (pos < offset + length) { | 3378 | goto unlock; |
| 3461 | unsigned int end_of_block, range_to_discard; | 3379 | } |
| 3462 | 3380 | if (!buffer_mapped(bh)) { | |
| 3463 | err = 0; | 3381 | BUFFER_TRACE(bh, "unmapped"); |
| 3464 | 3382 | ext4_get_block(inode, iblock, bh, 0); | |
| 3465 | /* The length of space left to zero and unmap */ | 3383 | /* unmapped? It's a hole - nothing to do */ |
| 3466 | range_to_discard = offset + length - pos; | ||
| 3467 | |||
| 3468 | /* The length of space until the end of the block */ | ||
| 3469 | end_of_block = blocksize - (pos & (blocksize-1)); | ||
| 3470 | |||
| 3471 | /* | ||
| 3472 | * Do not unmap or zero past end of block | ||
| 3473 | * for this buffer head | ||
| 3474 | */ | ||
| 3475 | if (range_to_discard > end_of_block) | ||
| 3476 | range_to_discard = end_of_block; | ||
| 3477 | |||
| 3478 | |||
| 3479 | /* | ||
| 3480 | * Skip this buffer head if we are only zeroing unampped | ||
| 3481 | * regions of the page | ||
| 3482 | */ | ||
| 3483 | if (flags & EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED && | ||
| 3484 | buffer_mapped(bh)) | ||
| 3485 | goto next; | ||
| 3486 | |||
| 3487 | /* If the range is block aligned, unmap */ | ||
| 3488 | if (range_to_discard == blocksize) { | ||
| 3489 | clear_buffer_dirty(bh); | ||
| 3490 | bh->b_bdev = NULL; | ||
| 3491 | clear_buffer_mapped(bh); | ||
| 3492 | clear_buffer_req(bh); | ||
| 3493 | clear_buffer_new(bh); | ||
| 3494 | clear_buffer_delay(bh); | ||
| 3495 | clear_buffer_unwritten(bh); | ||
| 3496 | clear_buffer_uptodate(bh); | ||
| 3497 | zero_user(page, pos, range_to_discard); | ||
| 3498 | BUFFER_TRACE(bh, "Buffer discarded"); | ||
| 3499 | goto next; | ||
| 3500 | } | ||
| 3501 | |||
| 3502 | /* | ||
| 3503 | * If this block is not completely contained in the range | ||
| 3504 | * to be discarded, then it is not going to be released. Because | ||
| 3505 | * we need to keep this block, we need to make sure this part | ||
| 3506 | * of the page is uptodate before we modify it by writeing | ||
| 3507 | * partial zeros on it. | ||
| 3508 | */ | ||
| 3509 | if (!buffer_mapped(bh)) { | 3384 | if (!buffer_mapped(bh)) { |
| 3510 | /* | 3385 | BUFFER_TRACE(bh, "still unmapped"); |
| 3511 | * Buffer head must be mapped before we can read | 3386 | goto unlock; |
| 3512 | * from the block | ||
| 3513 | */ | ||
| 3514 | BUFFER_TRACE(bh, "unmapped"); | ||
| 3515 | ext4_get_block(inode, iblock, bh, 0); | ||
| 3516 | /* unmapped? It's a hole - nothing to do */ | ||
| 3517 | if (!buffer_mapped(bh)) { | ||
| 3518 | BUFFER_TRACE(bh, "still unmapped"); | ||
| 3519 | goto next; | ||
| 3520 | } | ||
| 3521 | } | 3387 | } |
| 3388 | } | ||
| 3522 | 3389 | ||
| 3523 | /* Ok, it's mapped. Make sure it's up-to-date */ | 3390 | /* Ok, it's mapped. Make sure it's up-to-date */ |
| 3524 | if (PageUptodate(page)) | 3391 | if (PageUptodate(page)) |
| 3525 | set_buffer_uptodate(bh); | 3392 | set_buffer_uptodate(bh); |
| 3526 | 3393 | ||
| 3527 | if (!buffer_uptodate(bh)) { | 3394 | if (!buffer_uptodate(bh)) { |
| 3528 | err = -EIO; | 3395 | err = -EIO; |
| 3529 | ll_rw_block(READ, 1, &bh); | 3396 | ll_rw_block(READ, 1, &bh); |
| 3530 | wait_on_buffer(bh); | 3397 | wait_on_buffer(bh); |
| 3531 | /* Uhhuh. Read error. Complain and punt.*/ | 3398 | /* Uhhuh. Read error. Complain and punt. */ |
| 3532 | if (!buffer_uptodate(bh)) | 3399 | if (!buffer_uptodate(bh)) |
| 3533 | goto next; | 3400 | goto unlock; |
| 3534 | } | 3401 | } |
| 3402 | if (ext4_should_journal_data(inode)) { | ||
| 3403 | BUFFER_TRACE(bh, "get write access"); | ||
| 3404 | err = ext4_journal_get_write_access(handle, bh); | ||
| 3405 | if (err) | ||
| 3406 | goto unlock; | ||
| 3407 | } | ||
| 3408 | zero_user(page, offset, length); | ||
| 3409 | BUFFER_TRACE(bh, "zeroed end of block"); | ||
| 3535 | 3410 | ||
| 3536 | if (ext4_should_journal_data(inode)) { | 3411 | if (ext4_should_journal_data(inode)) { |
| 3537 | BUFFER_TRACE(bh, "get write access"); | 3412 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
| 3538 | err = ext4_journal_get_write_access(handle, bh); | 3413 | } else { |
| 3539 | if (err) | 3414 | err = 0; |
| 3540 | goto next; | 3415 | mark_buffer_dirty(bh); |
| 3541 | } | 3416 | if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) |
| 3417 | err = ext4_jbd2_file_inode(handle, inode); | ||
| 3418 | } | ||
| 3419 | |||
| 3420 | unlock: | ||
| 3421 | unlock_page(page); | ||
| 3422 | page_cache_release(page); | ||
| 3423 | return err; | ||
| 3424 | } | ||
| 3542 | 3425 | ||
| 3543 | zero_user(page, pos, range_to_discard); | 3426 | int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, |
| 3427 | loff_t lstart, loff_t length) | ||
| 3428 | { | ||
| 3429 | struct super_block *sb = inode->i_sb; | ||
| 3430 | struct address_space *mapping = inode->i_mapping; | ||
| 3431 | unsigned partial_start, partial_end; | ||
| 3432 | ext4_fsblk_t start, end; | ||
| 3433 | loff_t byte_end = (lstart + length - 1); | ||
| 3434 | int err = 0; | ||
| 3544 | 3435 | ||
| 3545 | err = 0; | 3436 | partial_start = lstart & (sb->s_blocksize - 1); |
| 3546 | if (ext4_should_journal_data(inode)) { | 3437 | partial_end = byte_end & (sb->s_blocksize - 1); |
| 3547 | err = ext4_handle_dirty_metadata(handle, inode, bh); | ||
| 3548 | } else | ||
| 3549 | mark_buffer_dirty(bh); | ||
| 3550 | 3438 | ||
| 3551 | BUFFER_TRACE(bh, "Partial buffer zeroed"); | 3439 | start = lstart >> sb->s_blocksize_bits; |
| 3552 | next: | 3440 | end = byte_end >> sb->s_blocksize_bits; |
| 3553 | bh = bh->b_this_page; | ||
| 3554 | iblock++; | ||
| 3555 | pos += range_to_discard; | ||
| 3556 | } | ||
| 3557 | 3441 | ||
| 3442 | /* Handle partial zero within the single block */ | ||
| 3443 | if (start == end && | ||
| 3444 | (partial_start || (partial_end != sb->s_blocksize - 1))) { | ||
| 3445 | err = ext4_block_zero_page_range(handle, mapping, | ||
| 3446 | lstart, length); | ||
| 3447 | return err; | ||
| 3448 | } | ||
| 3449 | /* Handle partial zero out on the start of the range */ | ||
| 3450 | if (partial_start) { | ||
| 3451 | err = ext4_block_zero_page_range(handle, mapping, | ||
| 3452 | lstart, sb->s_blocksize); | ||
| 3453 | if (err) | ||
| 3454 | return err; | ||
| 3455 | } | ||
| 3456 | /* Handle partial zero out on the end of the range */ | ||
| 3457 | if (partial_end != sb->s_blocksize - 1) | ||
| 3458 | err = ext4_block_zero_page_range(handle, mapping, | ||
| 3459 | byte_end - partial_end, | ||
| 3460 | partial_end + 1); | ||
| 3558 | return err; | 3461 | return err; |
| 3559 | } | 3462 | } |
| 3560 | 3463 | ||
| @@ -3580,14 +3483,12 @@ int ext4_can_truncate(struct inode *inode) | |||
| 3580 | * Returns: 0 on success or negative on failure | 3483 | * Returns: 0 on success or negative on failure |
| 3581 | */ | 3484 | */ |
| 3582 | 3485 | ||
| 3583 | int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | 3486 | int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length) |
| 3584 | { | 3487 | { |
| 3585 | struct inode *inode = file_inode(file); | ||
| 3586 | struct super_block *sb = inode->i_sb; | 3488 | struct super_block *sb = inode->i_sb; |
| 3587 | ext4_lblk_t first_block, stop_block; | 3489 | ext4_lblk_t first_block, stop_block; |
| 3588 | struct address_space *mapping = inode->i_mapping; | 3490 | struct address_space *mapping = inode->i_mapping; |
| 3589 | loff_t first_page, last_page, page_len; | 3491 | loff_t first_block_offset, last_block_offset; |
| 3590 | loff_t first_page_offset, last_page_offset; | ||
| 3591 | handle_t *handle; | 3492 | handle_t *handle; |
| 3592 | unsigned int credits; | 3493 | unsigned int credits; |
| 3593 | int ret = 0; | 3494 | int ret = 0; |
| @@ -3638,23 +3539,16 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 3638 | offset; | 3539 | offset; |
| 3639 | } | 3540 | } |
| 3640 | 3541 | ||
| 3641 | first_page = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; | 3542 | first_block_offset = round_up(offset, sb->s_blocksize); |
| 3642 | last_page = (offset + length) >> PAGE_CACHE_SHIFT; | 3543 | last_block_offset = round_down((offset + length), sb->s_blocksize) - 1; |
| 3643 | 3544 | ||
| 3644 | first_page_offset = first_page << PAGE_CACHE_SHIFT; | 3545 | /* Now release the pages and zero block aligned part of pages*/ |
| 3645 | last_page_offset = last_page << PAGE_CACHE_SHIFT; | 3546 | if (last_block_offset > first_block_offset) |
| 3646 | 3547 | truncate_pagecache_range(inode, first_block_offset, | |
| 3647 | /* Now release the pages */ | 3548 | last_block_offset); |
| 3648 | if (last_page_offset > first_page_offset) { | ||
| 3649 | truncate_pagecache_range(inode, first_page_offset, | ||
| 3650 | last_page_offset - 1); | ||
| 3651 | } | ||
| 3652 | 3549 | ||
| 3653 | /* Wait all existing dio workers, newcomers will block on i_mutex */ | 3550 | /* Wait all existing dio workers, newcomers will block on i_mutex */ |
| 3654 | ext4_inode_block_unlocked_dio(inode); | 3551 | ext4_inode_block_unlocked_dio(inode); |
| 3655 | ret = ext4_flush_unwritten_io(inode); | ||
| 3656 | if (ret) | ||
| 3657 | goto out_dio; | ||
| 3658 | inode_dio_wait(inode); | 3552 | inode_dio_wait(inode); |
| 3659 | 3553 | ||
| 3660 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3554 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
| @@ -3668,66 +3562,10 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length) | |||
| 3668 | goto out_dio; | 3562 | goto out_dio; |
| 3669 | } | 3563 | } |
| 3670 | 3564 | ||
| 3671 | /* | 3565 | ret = ext4_zero_partial_blocks(handle, inode, offset, |
| 3672 | * Now we need to zero out the non-page-aligned data in the | 3566 | length); |
| 3673 | * pages at the start and tail of the hole, and unmap the | 3567 | if (ret) |
| 3674 | * buffer heads for the block aligned regions of the page that | 3568 | goto out_stop; |
| 3675 | * were completely zeroed. | ||
| 3676 | */ | ||
| 3677 | if (first_page > last_page) { | ||
| 3678 | /* | ||
| 3679 | * If the file space being truncated is contained | ||
| 3680 | * within a page just zero out and unmap the middle of | ||
| 3681 | * that page | ||
| 3682 | */ | ||
| 3683 | ret = ext4_discard_partial_page_buffers(handle, | ||
| 3684 | mapping, offset, length, 0); | ||
| 3685 | |||
| 3686 | if (ret) | ||
| 3687 | goto out_stop; | ||
| 3688 | } else { | ||
| 3689 | /* | ||
| 3690 | * zero out and unmap the partial page that contains | ||
| 3691 | * the start of the hole | ||
| 3692 | */ | ||
| 3693 | page_len = first_page_offset - offset; | ||
| 3694 | if (page_len > 0) { | ||
| 3695 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
| 3696 | offset, page_len, 0); | ||
| 3697 | if (ret) | ||
| 3698 | goto out_stop; | ||
| 3699 | } | ||
| 3700 | |||
| 3701 | /* | ||
| 3702 | * zero out and unmap the partial page that contains | ||
| 3703 | * the end of the hole | ||
| 3704 | */ | ||
| 3705 | page_len = offset + length - last_page_offset; | ||
| 3706 | if (page_len > 0) { | ||
| 3707 | ret = ext4_discard_partial_page_buffers(handle, mapping, | ||
| 3708 | last_page_offset, page_len, 0); | ||
| 3709 | if (ret) | ||
| 3710 | goto out_stop; | ||
| 3711 | } | ||
| 3712 | } | ||
| 3713 | |||
| 3714 | /* | ||
| 3715 | * If i_size is contained in the last page, we need to | ||
| 3716 | * unmap and zero the partial page after i_size | ||
| 3717 | */ | ||
| 3718 | if (inode->i_size >> PAGE_CACHE_SHIFT == last_page && | ||
| 3719 | inode->i_size % PAGE_CACHE_SIZE != 0) { | ||
| 3720 | page_len = PAGE_CACHE_SIZE - | ||
| 3721 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
| 3722 | |||
| 3723 | if (page_len > 0) { | ||
| 3724 | ret = ext4_discard_partial_page_buffers(handle, | ||
| 3725 | mapping, inode->i_size, page_len, 0); | ||
| 3726 | |||
| 3727 | if (ret) | ||
| 3728 | goto out_stop; | ||
| 3729 | } | ||
| 3730 | } | ||
| 3731 | 3569 | ||
| 3732 | first_block = (offset + sb->s_blocksize - 1) >> | 3570 | first_block = (offset + sb->s_blocksize - 1) >> |
| 3733 | EXT4_BLOCK_SIZE_BITS(sb); | 3571 | EXT4_BLOCK_SIZE_BITS(sb); |
| @@ -3803,7 +3641,6 @@ void ext4_truncate(struct inode *inode) | |||
| 3803 | unsigned int credits; | 3641 | unsigned int credits; |
| 3804 | handle_t *handle; | 3642 | handle_t *handle; |
| 3805 | struct address_space *mapping = inode->i_mapping; | 3643 | struct address_space *mapping = inode->i_mapping; |
| 3806 | loff_t page_len; | ||
| 3807 | 3644 | ||
| 3808 | /* | 3645 | /* |
| 3809 | * There is a possibility that we're either freeing the inode | 3646 | * There is a possibility that we're either freeing the inode |
| @@ -3830,12 +3667,6 @@ void ext4_truncate(struct inode *inode) | |||
| 3830 | return; | 3667 | return; |
| 3831 | } | 3668 | } |
| 3832 | 3669 | ||
| 3833 | /* | ||
| 3834 | * finish any pending end_io work so we won't run the risk of | ||
| 3835 | * converting any truncated blocks to initialized later | ||
| 3836 | */ | ||
| 3837 | ext4_flush_unwritten_io(inode); | ||
| 3838 | |||
| 3839 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3670 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
| 3840 | credits = ext4_writepage_trans_blocks(inode); | 3671 | credits = ext4_writepage_trans_blocks(inode); |
| 3841 | else | 3672 | else |
| @@ -3847,14 +3678,8 @@ void ext4_truncate(struct inode *inode) | |||
| 3847 | return; | 3678 | return; |
| 3848 | } | 3679 | } |
| 3849 | 3680 | ||
| 3850 | if (inode->i_size % PAGE_CACHE_SIZE != 0) { | 3681 | if (inode->i_size & (inode->i_sb->s_blocksize - 1)) |
| 3851 | page_len = PAGE_CACHE_SIZE - | 3682 | ext4_block_truncate_page(handle, mapping, inode->i_size); |
| 3852 | (inode->i_size & (PAGE_CACHE_SIZE - 1)); | ||
| 3853 | |||
| 3854 | if (ext4_discard_partial_page_buffers(handle, | ||
| 3855 | mapping, inode->i_size, page_len, 0)) | ||
| 3856 | goto out_stop; | ||
| 3857 | } | ||
| 3858 | 3683 | ||
| 3859 | /* | 3684 | /* |
| 3860 | * We add the inode to the orphan list, so that if this | 3685 | * We add the inode to the orphan list, so that if this |
| @@ -4623,7 +4448,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode) | |||
| 4623 | inode->i_size >> PAGE_CACHE_SHIFT); | 4448 | inode->i_size >> PAGE_CACHE_SHIFT); |
| 4624 | if (!page) | 4449 | if (!page) |
| 4625 | return; | 4450 | return; |
| 4626 | ret = __ext4_journalled_invalidatepage(page, offset); | 4451 | ret = __ext4_journalled_invalidatepage(page, offset, |
| 4452 | PAGE_CACHE_SIZE - offset); | ||
| 4627 | unlock_page(page); | 4453 | unlock_page(page); |
| 4628 | page_cache_release(page); | 4454 | page_cache_release(page); |
| 4629 | if (ret != -EBUSY) | 4455 | if (ret != -EBUSY) |
| @@ -4805,7 +4631,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 4805 | struct kstat *stat) | 4631 | struct kstat *stat) |
| 4806 | { | 4632 | { |
| 4807 | struct inode *inode; | 4633 | struct inode *inode; |
| 4808 | unsigned long delalloc_blocks; | 4634 | unsigned long long delalloc_blocks; |
| 4809 | 4635 | ||
| 4810 | inode = dentry->d_inode; | 4636 | inode = dentry->d_inode; |
| 4811 | generic_fillattr(inode, stat); | 4637 | generic_fillattr(inode, stat); |
| @@ -4823,15 +4649,16 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 4823 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), | 4649 | delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), |
| 4824 | EXT4_I(inode)->i_reserved_data_blocks); | 4650 | EXT4_I(inode)->i_reserved_data_blocks); |
| 4825 | 4651 | ||
| 4826 | stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; | 4652 | stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); |
| 4827 | return 0; | 4653 | return 0; |
| 4828 | } | 4654 | } |
| 4829 | 4655 | ||
| 4830 | static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4656 | static int ext4_index_trans_blocks(struct inode *inode, int lblocks, |
| 4657 | int pextents) | ||
| 4831 | { | 4658 | { |
| 4832 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 4659 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
| 4833 | return ext4_ind_trans_blocks(inode, nrblocks, chunk); | 4660 | return ext4_ind_trans_blocks(inode, lblocks); |
| 4834 | return ext4_ext_index_trans_blocks(inode, nrblocks, chunk); | 4661 | return ext4_ext_index_trans_blocks(inode, pextents); |
| 4835 | } | 4662 | } |
| 4836 | 4663 | ||
| 4837 | /* | 4664 | /* |
| @@ -4845,7 +4672,8 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 4845 | * | 4672 | * |
| 4846 | * Also account for superblock, inode, quota and xattr blocks | 4673 | * Also account for superblock, inode, quota and xattr blocks |
| 4847 | */ | 4674 | */ |
| 4848 | static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | 4675 | static int ext4_meta_trans_blocks(struct inode *inode, int lblocks, |
| 4676 | int pextents) | ||
| 4849 | { | 4677 | { |
| 4850 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); | 4678 | ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb); |
| 4851 | int gdpblocks; | 4679 | int gdpblocks; |
| @@ -4853,14 +4681,10 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 4853 | int ret = 0; | 4681 | int ret = 0; |
| 4854 | 4682 | ||
| 4855 | /* | 4683 | /* |
| 4856 | * How many index blocks need to touch to modify nrblocks? | 4684 | * How many index blocks need to touch to map @lblocks logical blocks |
| 4857 | * The "Chunk" flag indicating whether the nrblocks is | 4685 | * to @pextents physical extents? |
| 4858 | * physically contiguous on disk | ||
| 4859 | * | ||
| 4860 | * For Direct IO and fallocate, they calls get_block to allocate | ||
| 4861 | * one single extent at a time, so they could set the "Chunk" flag | ||
| 4862 | */ | 4686 | */ |
| 4863 | idxblocks = ext4_index_trans_blocks(inode, nrblocks, chunk); | 4687 | idxblocks = ext4_index_trans_blocks(inode, lblocks, pextents); |
| 4864 | 4688 | ||
| 4865 | ret = idxblocks; | 4689 | ret = idxblocks; |
| 4866 | 4690 | ||
| @@ -4868,12 +4692,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
| 4868 | * Now let's see how many group bitmaps and group descriptors need | 4692 | * Now let's see how many group bitmaps and group descriptors need |
| 4869 | * to account | 4693 | * to account |
| 4870 | */ | 4694 | */ |
| 4871 | groups = idxblocks; | 4695 | groups = idxblocks + pextents; |
| 4872 | if (chunk) | ||
| 4873 | groups += 1; | ||
| 4874 | else | ||
| 4875 | groups += nrblocks; | ||
| 4876 | |||
| 4877 | gdpblocks = groups; | 4696 | gdpblocks = groups; |
| 4878 | if (groups > ngroups) | 4697 | if (groups > ngroups) |
| 4879 | groups = ngroups; | 4698 | groups = ngroups; |
| @@ -4904,7 +4723,7 @@ int ext4_writepage_trans_blocks(struct inode *inode) | |||
| 4904 | int bpp = ext4_journal_blocks_per_page(inode); | 4723 | int bpp = ext4_journal_blocks_per_page(inode); |
| 4905 | int ret; | 4724 | int ret; |
| 4906 | 4725 | ||
| 4907 | ret = ext4_meta_trans_blocks(inode, bpp, 0); | 4726 | ret = ext4_meta_trans_blocks(inode, bpp, bpp); |
| 4908 | 4727 | ||
| 4909 | /* Account for data blocks for journalled mode */ | 4728 | /* Account for data blocks for journalled mode */ |
| 4910 | if (ext4_should_journal_data(inode)) | 4729 | if (ext4_should_journal_data(inode)) |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..a9ff5e5137ca 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -2105,6 +2105,7 @@ repeat: | |||
| 2105 | group = ac->ac_g_ex.fe_group; | 2105 | group = ac->ac_g_ex.fe_group; |
| 2106 | 2106 | ||
| 2107 | for (i = 0; i < ngroups; group++, i++) { | 2107 | for (i = 0; i < ngroups; group++, i++) { |
| 2108 | cond_resched(); | ||
| 2108 | /* | 2109 | /* |
| 2109 | * Artificially restricted ngroups for non-extent | 2110 | * Artificially restricted ngroups for non-extent |
| 2110 | * files makes group > ngroups possible on first loop. | 2111 | * files makes group > ngroups possible on first loop. |
| @@ -4405,17 +4406,20 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4405 | repeat: | 4406 | repeat: |
| 4406 | /* allocate space in core */ | 4407 | /* allocate space in core */ |
| 4407 | *errp = ext4_mb_regular_allocator(ac); | 4408 | *errp = ext4_mb_regular_allocator(ac); |
| 4408 | if (*errp) { | 4409 | if (*errp) |
| 4409 | ext4_discard_allocated_blocks(ac); | 4410 | goto discard_and_exit; |
| 4410 | goto errout; | ||
| 4411 | } | ||
| 4412 | 4411 | ||
| 4413 | /* as we've just preallocated more space than | 4412 | /* as we've just preallocated more space than |
| 4414 | * user requested orinally, we store allocated | 4413 | * user requested originally, we store allocated |
| 4415 | * space in a special descriptor */ | 4414 | * space in a special descriptor */ |
| 4416 | if (ac->ac_status == AC_STATUS_FOUND && | 4415 | if (ac->ac_status == AC_STATUS_FOUND && |
| 4417 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) | 4416 | ac->ac_o_ex.fe_len < ac->ac_b_ex.fe_len) |
| 4418 | ext4_mb_new_preallocation(ac); | 4417 | *errp = ext4_mb_new_preallocation(ac); |
| 4418 | if (*errp) { | ||
| 4419 | discard_and_exit: | ||
| 4420 | ext4_discard_allocated_blocks(ac); | ||
| 4421 | goto errout; | ||
| 4422 | } | ||
| 4419 | } | 4423 | } |
| 4420 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4424 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
| 4421 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); | 4425 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs); |
| @@ -4612,10 +4616,11 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
| 4612 | BUG_ON(bh && (count > 1)); | 4616 | BUG_ON(bh && (count > 1)); |
| 4613 | 4617 | ||
| 4614 | for (i = 0; i < count; i++) { | 4618 | for (i = 0; i < count; i++) { |
| 4619 | cond_resched(); | ||
| 4615 | if (!bh) | 4620 | if (!bh) |
| 4616 | tbh = sb_find_get_block(inode->i_sb, | 4621 | tbh = sb_find_get_block(inode->i_sb, |
| 4617 | block + i); | 4622 | block + i); |
| 4618 | if (unlikely(!tbh)) | 4623 | if (!tbh) |
| 4619 | continue; | 4624 | continue; |
| 4620 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, | 4625 | ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA, |
| 4621 | inode, tbh, block + i); | 4626 | inode, tbh, block + i); |
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 3dcbf364022f..e86dddbd8296 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c | |||
| @@ -912,7 +912,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, | |||
| 912 | struct page *pagep[2] = {NULL, NULL}; | 912 | struct page *pagep[2] = {NULL, NULL}; |
| 913 | handle_t *handle; | 913 | handle_t *handle; |
| 914 | ext4_lblk_t orig_blk_offset; | 914 | ext4_lblk_t orig_blk_offset; |
| 915 | long long offs = orig_page_offset << PAGE_CACHE_SHIFT; | ||
| 916 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; | 915 | unsigned long blocksize = orig_inode->i_sb->s_blocksize; |
| 917 | unsigned int w_flags = 0; | 916 | unsigned int w_flags = 0; |
| 918 | unsigned int tmp_data_size, data_size, replaced_size; | 917 | unsigned int tmp_data_size, data_size, replaced_size; |
| @@ -940,8 +939,6 @@ again: | |||
| 940 | orig_blk_offset = orig_page_offset * blocks_per_page + | 939 | orig_blk_offset = orig_page_offset * blocks_per_page + |
| 941 | data_offset_in_page; | 940 | data_offset_in_page; |
| 942 | 941 | ||
| 943 | offs = (long long)orig_blk_offset << orig_inode->i_blkbits; | ||
| 944 | |||
| 945 | /* Calculate data_size */ | 942 | /* Calculate data_size */ |
| 946 | if ((orig_blk_offset + block_len_in_page - 1) == | 943 | if ((orig_blk_offset + block_len_in_page - 1) == |
| 947 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { | 944 | ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..ab2f6dc44b3a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
| 918 | bh->b_data, bh->b_size, | 918 | bh->b_data, bh->b_size, |
| 919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) | 919 | (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) |
| 920 | + ((char *)de - bh->b_data))) { | 920 | + ((char *)de - bh->b_data))) { |
| 921 | /* On error, skip the f_pos to the next block. */ | 921 | /* silently ignore the rest of the block */ |
| 922 | dir_file->f_pos = (dir_file->f_pos | | 922 | break; |
| 923 | (dir->i_sb->s_blocksize - 1)) + 1; | ||
| 924 | brelse(bh); | ||
| 925 | return count; | ||
| 926 | } | 923 | } |
| 927 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 924 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
| 928 | if ((hinfo->hash < start_hash) || | 925 | if ((hinfo->hash < start_hash) || |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 4acf1f78881b..48786cdb5e6c 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
| @@ -46,46 +46,121 @@ void ext4_exit_pageio(void) | |||
| 46 | } | 46 | } |
| 47 | 47 | ||
| 48 | /* | 48 | /* |
| 49 | * This function is called by ext4_evict_inode() to make sure there is | 49 | * Print an buffer I/O error compatible with the fs/buffer.c. This |
| 50 | * no more pending I/O completion work left to do. | 50 | * provides compatibility with dmesg scrapers that look for a specific |
| 51 | * buffer I/O error message. We really need a unified error reporting | ||
| 52 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
| 53 | * probably not going to happen in my lifetime, due to LKML politics... | ||
| 51 | */ | 54 | */ |
| 52 | void ext4_ioend_shutdown(struct inode *inode) | 55 | static void buffer_io_error(struct buffer_head *bh) |
| 53 | { | 56 | { |
| 54 | wait_queue_head_t *wq = ext4_ioend_wq(inode); | 57 | char b[BDEVNAME_SIZE]; |
| 58 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | ||
| 59 | bdevname(bh->b_bdev, b), | ||
| 60 | (unsigned long long)bh->b_blocknr); | ||
| 61 | } | ||
| 55 | 62 | ||
| 56 | wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); | 63 | static void ext4_finish_bio(struct bio *bio) |
| 57 | /* | 64 | { |
| 58 | * We need to make sure the work structure is finished being | 65 | int i; |
| 59 | * used before we let the inode get destroyed. | 66 | int error = !test_bit(BIO_UPTODATE, &bio->bi_flags); |
| 60 | */ | 67 | |
| 61 | if (work_pending(&EXT4_I(inode)->i_unwritten_work)) | 68 | for (i = 0; i < bio->bi_vcnt; i++) { |
| 62 | cancel_work_sync(&EXT4_I(inode)->i_unwritten_work); | 69 | struct bio_vec *bvec = &bio->bi_io_vec[i]; |
| 70 | struct page *page = bvec->bv_page; | ||
| 71 | struct buffer_head *bh, *head; | ||
| 72 | unsigned bio_start = bvec->bv_offset; | ||
| 73 | unsigned bio_end = bio_start + bvec->bv_len; | ||
| 74 | unsigned under_io = 0; | ||
| 75 | unsigned long flags; | ||
| 76 | |||
| 77 | if (!page) | ||
| 78 | continue; | ||
| 79 | |||
| 80 | if (error) { | ||
| 81 | SetPageError(page); | ||
| 82 | set_bit(AS_EIO, &page->mapping->flags); | ||
| 83 | } | ||
| 84 | bh = head = page_buffers(page); | ||
| 85 | /* | ||
| 86 | * We check all buffers in the page under BH_Uptodate_Lock | ||
| 87 | * to avoid races with other end io clearing async_write flags | ||
| 88 | */ | ||
| 89 | local_irq_save(flags); | ||
| 90 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | ||
| 91 | do { | ||
| 92 | if (bh_offset(bh) < bio_start || | ||
| 93 | bh_offset(bh) + bh->b_size > bio_end) { | ||
| 94 | if (buffer_async_write(bh)) | ||
| 95 | under_io++; | ||
| 96 | continue; | ||
| 97 | } | ||
| 98 | clear_buffer_async_write(bh); | ||
| 99 | if (error) | ||
| 100 | buffer_io_error(bh); | ||
| 101 | } while ((bh = bh->b_this_page) != head); | ||
| 102 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
| 103 | local_irq_restore(flags); | ||
| 104 | if (!under_io) | ||
| 105 | end_page_writeback(page); | ||
| 106 | } | ||
| 63 | } | 107 | } |
| 64 | 108 | ||
| 65 | void ext4_free_io_end(ext4_io_end_t *io) | 109 | static void ext4_release_io_end(ext4_io_end_t *io_end) |
| 66 | { | 110 | { |
| 67 | BUG_ON(!io); | 111 | struct bio *bio, *next_bio; |
| 68 | BUG_ON(!list_empty(&io->list)); | 112 | |
| 69 | BUG_ON(io->flag & EXT4_IO_END_UNWRITTEN); | 113 | BUG_ON(!list_empty(&io_end->list)); |
| 114 | BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN); | ||
| 115 | WARN_ON(io_end->handle); | ||
| 70 | 116 | ||
| 71 | if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count)) | 117 | if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count)) |
| 72 | wake_up_all(ext4_ioend_wq(io->inode)); | 118 | wake_up_all(ext4_ioend_wq(io_end->inode)); |
| 73 | kmem_cache_free(io_end_cachep, io); | 119 | |
| 120 | for (bio = io_end->bio; bio; bio = next_bio) { | ||
| 121 | next_bio = bio->bi_private; | ||
| 122 | ext4_finish_bio(bio); | ||
| 123 | bio_put(bio); | ||
| 124 | } | ||
| 125 | if (io_end->flag & EXT4_IO_END_DIRECT) | ||
| 126 | inode_dio_done(io_end->inode); | ||
| 127 | if (io_end->iocb) | ||
| 128 | aio_complete(io_end->iocb, io_end->result, 0); | ||
| 129 | kmem_cache_free(io_end_cachep, io_end); | ||
| 74 | } | 130 | } |
| 75 | 131 | ||
| 76 | /* check a range of space and convert unwritten extents to written. */ | 132 | static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end) |
| 133 | { | ||
| 134 | struct inode *inode = io_end->inode; | ||
| 135 | |||
| 136 | io_end->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
| 137 | /* Wake up anyone waiting on unwritten extent conversion */ | ||
| 138 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | ||
| 139 | wake_up_all(ext4_ioend_wq(inode)); | ||
| 140 | } | ||
| 141 | |||
| 142 | /* | ||
| 143 | * Check a range of space and convert unwritten extents to written. Note that | ||
| 144 | * we are protected from truncate touching same part of extent tree by the | ||
| 145 | * fact that truncate code waits for all DIO to finish (thus exclusion from | ||
| 146 | * direct IO is achieved) and also waits for PageWriteback bits. Thus we | ||
| 147 | * cannot get to ext4_ext_truncate() before all IOs overlapping that range are | ||
| 148 | * completed (happens from ext4_free_ioend()). | ||
| 149 | */ | ||
| 77 | static int ext4_end_io(ext4_io_end_t *io) | 150 | static int ext4_end_io(ext4_io_end_t *io) |
| 78 | { | 151 | { |
| 79 | struct inode *inode = io->inode; | 152 | struct inode *inode = io->inode; |
| 80 | loff_t offset = io->offset; | 153 | loff_t offset = io->offset; |
| 81 | ssize_t size = io->size; | 154 | ssize_t size = io->size; |
| 155 | handle_t *handle = io->handle; | ||
| 82 | int ret = 0; | 156 | int ret = 0; |
| 83 | 157 | ||
| 84 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," | 158 | ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p," |
| 85 | "list->prev 0x%p\n", | 159 | "list->prev 0x%p\n", |
| 86 | io, inode->i_ino, io->list.next, io->list.prev); | 160 | io, inode->i_ino, io->list.next, io->list.prev); |
| 87 | 161 | ||
| 88 | ret = ext4_convert_unwritten_extents(inode, offset, size); | 162 | io->handle = NULL; /* Following call will use up the handle */ |
| 163 | ret = ext4_convert_unwritten_extents(handle, inode, offset, size); | ||
| 89 | if (ret < 0) { | 164 | if (ret < 0) { |
| 90 | ext4_msg(inode->i_sb, KERN_EMERG, | 165 | ext4_msg(inode->i_sb, KERN_EMERG, |
| 91 | "failed to convert unwritten extents to written " | 166 | "failed to convert unwritten extents to written " |
| @@ -93,30 +168,22 @@ static int ext4_end_io(ext4_io_end_t *io) | |||
| 93 | "(inode %lu, offset %llu, size %zd, error %d)", | 168 | "(inode %lu, offset %llu, size %zd, error %d)", |
| 94 | inode->i_ino, offset, size, ret); | 169 | inode->i_ino, offset, size, ret); |
| 95 | } | 170 | } |
| 96 | /* Wake up anyone waiting on unwritten extent conversion */ | 171 | ext4_clear_io_unwritten_flag(io); |
| 97 | if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten)) | 172 | ext4_release_io_end(io); |
| 98 | wake_up_all(ext4_ioend_wq(inode)); | ||
| 99 | if (io->flag & EXT4_IO_END_DIRECT) | ||
| 100 | inode_dio_done(inode); | ||
| 101 | if (io->iocb) | ||
| 102 | aio_complete(io->iocb, io->result, 0); | ||
| 103 | return ret; | 173 | return ret; |
| 104 | } | 174 | } |
| 105 | 175 | ||
| 106 | static void dump_completed_IO(struct inode *inode) | 176 | static void dump_completed_IO(struct inode *inode, struct list_head *head) |
| 107 | { | 177 | { |
| 108 | #ifdef EXT4FS_DEBUG | 178 | #ifdef EXT4FS_DEBUG |
| 109 | struct list_head *cur, *before, *after; | 179 | struct list_head *cur, *before, *after; |
| 110 | ext4_io_end_t *io, *io0, *io1; | 180 | ext4_io_end_t *io, *io0, *io1; |
| 111 | 181 | ||
| 112 | if (list_empty(&EXT4_I(inode)->i_completed_io_list)) { | 182 | if (list_empty(head)) |
| 113 | ext4_debug("inode %lu completed_io list is empty\n", | ||
| 114 | inode->i_ino); | ||
| 115 | return; | 183 | return; |
| 116 | } | ||
| 117 | 184 | ||
| 118 | ext4_debug("Dump inode %lu completed_io list\n", inode->i_ino); | 185 | ext4_debug("Dump inode %lu completed io list\n", inode->i_ino); |
| 119 | list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list) { | 186 | list_for_each_entry(io, head, list) { |
| 120 | cur = &io->list; | 187 | cur = &io->list; |
| 121 | before = cur->prev; | 188 | before = cur->prev; |
| 122 | io0 = container_of(before, ext4_io_end_t, list); | 189 | io0 = container_of(before, ext4_io_end_t, list); |
| @@ -130,23 +197,30 @@ static void dump_completed_IO(struct inode *inode) | |||
| 130 | } | 197 | } |
| 131 | 198 | ||
| 132 | /* Add the io_end to per-inode completed end_io list. */ | 199 | /* Add the io_end to per-inode completed end_io list. */ |
| 133 | void ext4_add_complete_io(ext4_io_end_t *io_end) | 200 | static void ext4_add_complete_io(ext4_io_end_t *io_end) |
| 134 | { | 201 | { |
| 135 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); | 202 | struct ext4_inode_info *ei = EXT4_I(io_end->inode); |
| 136 | struct workqueue_struct *wq; | 203 | struct workqueue_struct *wq; |
| 137 | unsigned long flags; | 204 | unsigned long flags; |
| 138 | 205 | ||
| 139 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); | 206 | BUG_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN)); |
| 140 | wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq; | ||
| 141 | |||
| 142 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 207 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
| 143 | if (list_empty(&ei->i_completed_io_list)) | 208 | if (io_end->handle) { |
| 144 | queue_work(wq, &ei->i_unwritten_work); | 209 | wq = EXT4_SB(io_end->inode->i_sb)->rsv_conversion_wq; |
| 145 | list_add_tail(&io_end->list, &ei->i_completed_io_list); | 210 | if (list_empty(&ei->i_rsv_conversion_list)) |
| 211 | queue_work(wq, &ei->i_rsv_conversion_work); | ||
| 212 | list_add_tail(&io_end->list, &ei->i_rsv_conversion_list); | ||
| 213 | } else { | ||
| 214 | wq = EXT4_SB(io_end->inode->i_sb)->unrsv_conversion_wq; | ||
| 215 | if (list_empty(&ei->i_unrsv_conversion_list)) | ||
| 216 | queue_work(wq, &ei->i_unrsv_conversion_work); | ||
| 217 | list_add_tail(&io_end->list, &ei->i_unrsv_conversion_list); | ||
| 218 | } | ||
| 146 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 219 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 147 | } | 220 | } |
| 148 | 221 | ||
| 149 | static int ext4_do_flush_completed_IO(struct inode *inode) | 222 | static int ext4_do_flush_completed_IO(struct inode *inode, |
| 223 | struct list_head *head) | ||
| 150 | { | 224 | { |
| 151 | ext4_io_end_t *io; | 225 | ext4_io_end_t *io; |
| 152 | struct list_head unwritten; | 226 | struct list_head unwritten; |
| @@ -155,8 +229,8 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
| 155 | int err, ret = 0; | 229 | int err, ret = 0; |
| 156 | 230 | ||
| 157 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); | 231 | spin_lock_irqsave(&ei->i_completed_io_lock, flags); |
| 158 | dump_completed_IO(inode); | 232 | dump_completed_IO(inode, head); |
| 159 | list_replace_init(&ei->i_completed_io_list, &unwritten); | 233 | list_replace_init(head, &unwritten); |
| 160 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); | 234 | spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); |
| 161 | 235 | ||
| 162 | while (!list_empty(&unwritten)) { | 236 | while (!list_empty(&unwritten)) { |
| @@ -167,30 +241,25 @@ static int ext4_do_flush_completed_IO(struct inode *inode) | |||
| 167 | err = ext4_end_io(io); | 241 | err = ext4_end_io(io); |
| 168 | if (unlikely(!ret && err)) | 242 | if (unlikely(!ret && err)) |
| 169 | ret = err; | 243 | ret = err; |
| 170 | io->flag &= ~EXT4_IO_END_UNWRITTEN; | ||
| 171 | ext4_free_io_end(io); | ||
| 172 | } | 244 | } |
| 173 | return ret; | 245 | return ret; |
| 174 | } | 246 | } |
| 175 | 247 | ||
| 176 | /* | 248 | /* |
| 177 | * work on completed aio dio IO, to convert unwritten extents to extents | 249 | * work on completed IO, to convert unwritten extents to extents |
| 178 | */ | 250 | */ |
| 179 | void ext4_end_io_work(struct work_struct *work) | 251 | void ext4_end_io_rsv_work(struct work_struct *work) |
| 180 | { | 252 | { |
| 181 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, | 253 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
| 182 | i_unwritten_work); | 254 | i_rsv_conversion_work); |
| 183 | ext4_do_flush_completed_IO(&ei->vfs_inode); | 255 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list); |
| 184 | } | 256 | } |
| 185 | 257 | ||
| 186 | int ext4_flush_unwritten_io(struct inode *inode) | 258 | void ext4_end_io_unrsv_work(struct work_struct *work) |
| 187 | { | 259 | { |
| 188 | int ret; | 260 | struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info, |
| 189 | WARN_ON_ONCE(!mutex_is_locked(&inode->i_mutex) && | 261 | i_unrsv_conversion_work); |
| 190 | !(inode->i_state & I_FREEING)); | 262 | ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_unrsv_conversion_list); |
| 191 | ret = ext4_do_flush_completed_IO(inode); | ||
| 192 | ext4_unwritten_wait(inode); | ||
| 193 | return ret; | ||
| 194 | } | 263 | } |
| 195 | 264 | ||
| 196 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | 265 | ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) |
| @@ -200,83 +269,70 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags) | |||
| 200 | atomic_inc(&EXT4_I(inode)->i_ioend_count); | 269 | atomic_inc(&EXT4_I(inode)->i_ioend_count); |
| 201 | io->inode = inode; | 270 | io->inode = inode; |
| 202 | INIT_LIST_HEAD(&io->list); | 271 | INIT_LIST_HEAD(&io->list); |
| 272 | atomic_set(&io->count, 1); | ||
| 203 | } | 273 | } |
| 204 | return io; | 274 | return io; |
| 205 | } | 275 | } |
| 206 | 276 | ||
| 207 | /* | 277 | void ext4_put_io_end_defer(ext4_io_end_t *io_end) |
| 208 | * Print an buffer I/O error compatible with the fs/buffer.c. This | ||
| 209 | * provides compatibility with dmesg scrapers that look for a specific | ||
| 210 | * buffer I/O error message. We really need a unified error reporting | ||
| 211 | * structure to userspace ala Digital Unix's uerf system, but it's | ||
| 212 | * probably not going to happen in my lifetime, due to LKML politics... | ||
| 213 | */ | ||
| 214 | static void buffer_io_error(struct buffer_head *bh) | ||
| 215 | { | 278 | { |
| 216 | char b[BDEVNAME_SIZE]; | 279 | if (atomic_dec_and_test(&io_end->count)) { |
| 217 | printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n", | 280 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) { |
| 218 | bdevname(bh->b_bdev, b), | 281 | ext4_release_io_end(io_end); |
| 219 | (unsigned long long)bh->b_blocknr); | 282 | return; |
| 283 | } | ||
| 284 | ext4_add_complete_io(io_end); | ||
| 285 | } | ||
| 286 | } | ||
| 287 | |||
| 288 | int ext4_put_io_end(ext4_io_end_t *io_end) | ||
| 289 | { | ||
| 290 | int err = 0; | ||
| 291 | |||
| 292 | if (atomic_dec_and_test(&io_end->count)) { | ||
| 293 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { | ||
| 294 | err = ext4_convert_unwritten_extents(io_end->handle, | ||
| 295 | io_end->inode, io_end->offset, | ||
| 296 | io_end->size); | ||
| 297 | io_end->handle = NULL; | ||
| 298 | ext4_clear_io_unwritten_flag(io_end); | ||
| 299 | } | ||
| 300 | ext4_release_io_end(io_end); | ||
| 301 | } | ||
| 302 | return err; | ||
| 303 | } | ||
| 304 | |||
| 305 | ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end) | ||
| 306 | { | ||
| 307 | atomic_inc(&io_end->count); | ||
| 308 | return io_end; | ||
| 220 | } | 309 | } |
| 221 | 310 | ||
| 222 | static void ext4_end_bio(struct bio *bio, int error) | 311 | static void ext4_end_bio(struct bio *bio, int error) |
| 223 | { | 312 | { |
| 224 | ext4_io_end_t *io_end = bio->bi_private; | 313 | ext4_io_end_t *io_end = bio->bi_private; |
| 225 | struct inode *inode; | ||
| 226 | int i; | ||
| 227 | int blocksize; | ||
| 228 | sector_t bi_sector = bio->bi_sector; | 314 | sector_t bi_sector = bio->bi_sector; |
| 229 | 315 | ||
| 230 | BUG_ON(!io_end); | 316 | BUG_ON(!io_end); |
| 231 | inode = io_end->inode; | ||
| 232 | blocksize = 1 << inode->i_blkbits; | ||
| 233 | bio->bi_private = NULL; | ||
| 234 | bio->bi_end_io = NULL; | 317 | bio->bi_end_io = NULL; |
| 235 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) | 318 | if (test_bit(BIO_UPTODATE, &bio->bi_flags)) |
| 236 | error = 0; | 319 | error = 0; |
| 237 | for (i = 0; i < bio->bi_vcnt; i++) { | ||
| 238 | struct bio_vec *bvec = &bio->bi_io_vec[i]; | ||
| 239 | struct page *page = bvec->bv_page; | ||
| 240 | struct buffer_head *bh, *head; | ||
| 241 | unsigned bio_start = bvec->bv_offset; | ||
| 242 | unsigned bio_end = bio_start + bvec->bv_len; | ||
| 243 | unsigned under_io = 0; | ||
| 244 | unsigned long flags; | ||
| 245 | 320 | ||
| 246 | if (!page) | 321 | if (io_end->flag & EXT4_IO_END_UNWRITTEN) { |
| 247 | continue; | ||
| 248 | |||
| 249 | if (error) { | ||
| 250 | SetPageError(page); | ||
| 251 | set_bit(AS_EIO, &page->mapping->flags); | ||
| 252 | } | ||
| 253 | bh = head = page_buffers(page); | ||
| 254 | /* | 322 | /* |
| 255 | * We check all buffers in the page under BH_Uptodate_Lock | 323 | * Link bio into list hanging from io_end. We have to do it |
| 256 | * to avoid races with other end io clearing async_write flags | 324 | * atomically as bio completions can be racing against each |
| 325 | * other. | ||
| 257 | */ | 326 | */ |
| 258 | local_irq_save(flags); | 327 | bio->bi_private = xchg(&io_end->bio, bio); |
| 259 | bit_spin_lock(BH_Uptodate_Lock, &head->b_state); | 328 | } else { |
| 260 | do { | 329 | ext4_finish_bio(bio); |
| 261 | if (bh_offset(bh) < bio_start || | 330 | bio_put(bio); |
| 262 | bh_offset(bh) + blocksize > bio_end) { | ||
| 263 | if (buffer_async_write(bh)) | ||
| 264 | under_io++; | ||
| 265 | continue; | ||
| 266 | } | ||
| 267 | clear_buffer_async_write(bh); | ||
| 268 | if (error) | ||
| 269 | buffer_io_error(bh); | ||
| 270 | } while ((bh = bh->b_this_page) != head); | ||
| 271 | bit_spin_unlock(BH_Uptodate_Lock, &head->b_state); | ||
| 272 | local_irq_restore(flags); | ||
| 273 | if (!under_io) | ||
| 274 | end_page_writeback(page); | ||
| 275 | } | 331 | } |
| 276 | bio_put(bio); | ||
| 277 | 332 | ||
| 278 | if (error) { | 333 | if (error) { |
| 279 | io_end->flag |= EXT4_IO_END_ERROR; | 334 | struct inode *inode = io_end->inode; |
| 335 | |||
| 280 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " | 336 | ext4_warning(inode->i_sb, "I/O error writing to inode %lu " |
| 281 | "(offset %llu size %ld starting block %llu)", | 337 | "(offset %llu size %ld starting block %llu)", |
| 282 | inode->i_ino, | 338 | inode->i_ino, |
| @@ -285,13 +341,7 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
| 285 | (unsigned long long) | 341 | (unsigned long long) |
| 286 | bi_sector >> (inode->i_blkbits - 9)); | 342 | bi_sector >> (inode->i_blkbits - 9)); |
| 287 | } | 343 | } |
| 288 | 344 | ext4_put_io_end_defer(io_end); | |
| 289 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
| 290 | ext4_free_io_end(io_end); | ||
| 291 | return; | ||
| 292 | } | ||
| 293 | |||
| 294 | ext4_add_complete_io(io_end); | ||
| 295 | } | 345 | } |
| 296 | 346 | ||
| 297 | void ext4_io_submit(struct ext4_io_submit *io) | 347 | void ext4_io_submit(struct ext4_io_submit *io) |
| @@ -305,43 +355,38 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
| 305 | bio_put(io->io_bio); | 355 | bio_put(io->io_bio); |
| 306 | } | 356 | } |
| 307 | io->io_bio = NULL; | 357 | io->io_bio = NULL; |
| 308 | io->io_op = 0; | 358 | } |
| 359 | |||
| 360 | void ext4_io_submit_init(struct ext4_io_submit *io, | ||
| 361 | struct writeback_control *wbc) | ||
| 362 | { | ||
| 363 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
| 364 | io->io_bio = NULL; | ||
| 309 | io->io_end = NULL; | 365 | io->io_end = NULL; |
| 310 | } | 366 | } |
| 311 | 367 | ||
| 312 | static int io_submit_init(struct ext4_io_submit *io, | 368 | static int io_submit_init_bio(struct ext4_io_submit *io, |
| 313 | struct inode *inode, | 369 | struct buffer_head *bh) |
| 314 | struct writeback_control *wbc, | ||
| 315 | struct buffer_head *bh) | ||
| 316 | { | 370 | { |
| 317 | ext4_io_end_t *io_end; | ||
| 318 | struct page *page = bh->b_page; | ||
| 319 | int nvecs = bio_get_nr_vecs(bh->b_bdev); | 371 | int nvecs = bio_get_nr_vecs(bh->b_bdev); |
| 320 | struct bio *bio; | 372 | struct bio *bio; |
| 321 | 373 | ||
| 322 | io_end = ext4_init_io_end(inode, GFP_NOFS); | ||
| 323 | if (!io_end) | ||
| 324 | return -ENOMEM; | ||
| 325 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); | 374 | bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES)); |
| 375 | if (!bio) | ||
| 376 | return -ENOMEM; | ||
| 326 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); | 377 | bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); |
| 327 | bio->bi_bdev = bh->b_bdev; | 378 | bio->bi_bdev = bh->b_bdev; |
| 328 | bio->bi_private = io->io_end = io_end; | ||
| 329 | bio->bi_end_io = ext4_end_bio; | 379 | bio->bi_end_io = ext4_end_bio; |
| 330 | 380 | bio->bi_private = ext4_get_io_end(io->io_end); | |
| 331 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | ||
| 332 | |||
| 333 | io->io_bio = bio; | 381 | io->io_bio = bio; |
| 334 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); | ||
| 335 | io->io_next_block = bh->b_blocknr; | 382 | io->io_next_block = bh->b_blocknr; |
| 336 | return 0; | 383 | return 0; |
| 337 | } | 384 | } |
| 338 | 385 | ||
| 339 | static int io_submit_add_bh(struct ext4_io_submit *io, | 386 | static int io_submit_add_bh(struct ext4_io_submit *io, |
| 340 | struct inode *inode, | 387 | struct inode *inode, |
| 341 | struct writeback_control *wbc, | ||
| 342 | struct buffer_head *bh) | 388 | struct buffer_head *bh) |
| 343 | { | 389 | { |
| 344 | ext4_io_end_t *io_end; | ||
| 345 | int ret; | 390 | int ret; |
| 346 | 391 | ||
| 347 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { | 392 | if (io->io_bio && bh->b_blocknr != io->io_next_block) { |
| @@ -349,18 +394,14 @@ submit_and_retry: | |||
| 349 | ext4_io_submit(io); | 394 | ext4_io_submit(io); |
| 350 | } | 395 | } |
| 351 | if (io->io_bio == NULL) { | 396 | if (io->io_bio == NULL) { |
| 352 | ret = io_submit_init(io, inode, wbc, bh); | 397 | ret = io_submit_init_bio(io, bh); |
| 353 | if (ret) | 398 | if (ret) |
| 354 | return ret; | 399 | return ret; |
| 355 | } | 400 | } |
| 356 | io_end = io->io_end; | ||
| 357 | if (test_clear_buffer_uninit(bh)) | ||
| 358 | ext4_set_io_unwritten_flag(inode, io_end); | ||
| 359 | io->io_end->size += bh->b_size; | ||
| 360 | io->io_next_block++; | ||
| 361 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); | 401 | ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh)); |
| 362 | if (ret != bh->b_size) | 402 | if (ret != bh->b_size) |
| 363 | goto submit_and_retry; | 403 | goto submit_and_retry; |
| 404 | io->io_next_block++; | ||
| 364 | return 0; | 405 | return 0; |
| 365 | } | 406 | } |
| 366 | 407 | ||
| @@ -432,7 +473,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
| 432 | do { | 473 | do { |
| 433 | if (!buffer_async_write(bh)) | 474 | if (!buffer_async_write(bh)) |
| 434 | continue; | 475 | continue; |
| 435 | ret = io_submit_add_bh(io, inode, wbc, bh); | 476 | ret = io_submit_add_bh(io, inode, bh); |
| 436 | if (ret) { | 477 | if (ret) { |
| 437 | /* | 478 | /* |
| 438 | * We only get here on ENOMEM. Not much else | 479 | * We only get here on ENOMEM. Not much else |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..c5adbb318a90 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -79,12 +79,20 @@ static int verify_group_input(struct super_block *sb, | |||
| 79 | ext4_fsblk_t end = start + input->blocks_count; | 79 | ext4_fsblk_t end = start + input->blocks_count; |
| 80 | ext4_group_t group = input->group; | 80 | ext4_group_t group = input->group; |
| 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; | 81 | ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group; |
| 82 | unsigned overhead = ext4_group_overhead_blocks(sb, group); | 82 | unsigned overhead; |
| 83 | ext4_fsblk_t metaend = start + overhead; | 83 | ext4_fsblk_t metaend; |
| 84 | struct buffer_head *bh = NULL; | 84 | struct buffer_head *bh = NULL; |
| 85 | ext4_grpblk_t free_blocks_count, offset; | 85 | ext4_grpblk_t free_blocks_count, offset; |
| 86 | int err = -EINVAL; | 86 | int err = -EINVAL; |
| 87 | 87 | ||
| 88 | if (group != sbi->s_groups_count) { | ||
| 89 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
| 90 | input->group, sbi->s_groups_count); | ||
| 91 | return -EINVAL; | ||
| 92 | } | ||
| 93 | |||
| 94 | overhead = ext4_group_overhead_blocks(sb, group); | ||
| 95 | metaend = start + overhead; | ||
| 88 | input->free_blocks_count = free_blocks_count = | 96 | input->free_blocks_count = free_blocks_count = |
| 89 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; | 97 | input->blocks_count - 2 - overhead - sbi->s_itb_per_group; |
| 90 | 98 | ||
| @@ -96,10 +104,7 @@ static int verify_group_input(struct super_block *sb, | |||
| 96 | free_blocks_count, input->reserved_blocks); | 104 | free_blocks_count, input->reserved_blocks); |
| 97 | 105 | ||
| 98 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); | 106 | ext4_get_group_no_and_offset(sb, start, NULL, &offset); |
| 99 | if (group != sbi->s_groups_count) | 107 | if (offset != 0) |
| 100 | ext4_warning(sb, "Cannot add at group %u (only %u groups)", | ||
| 101 | input->group, sbi->s_groups_count); | ||
| 102 | else if (offset != 0) | ||
| 103 | ext4_warning(sb, "Last group not full"); | 108 | ext4_warning(sb, "Last group not full"); |
| 104 | else if (input->reserved_blocks > input->blocks_count / 5) | 109 | else if (input->reserved_blocks > input->blocks_count / 5) |
| 105 | ext4_warning(sb, "Reserved blocks too high (%u)", | 110 | ext4_warning(sb, "Reserved blocks too high (%u)", |
| @@ -1551,11 +1556,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 1551 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? | 1556 | int reserved_gdb = ext4_bg_has_super(sb, input->group) ? |
| 1552 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; | 1557 | le16_to_cpu(es->s_reserved_gdt_blocks) : 0; |
| 1553 | struct inode *inode = NULL; | 1558 | struct inode *inode = NULL; |
| 1554 | int gdb_off, gdb_num; | 1559 | int gdb_off; |
| 1555 | int err; | 1560 | int err; |
| 1556 | __u16 bg_flags = 0; | 1561 | __u16 bg_flags = 0; |
| 1557 | 1562 | ||
| 1558 | gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb); | ||
| 1559 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); | 1563 | gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb); |
| 1560 | 1564 | ||
| 1561 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, | 1565 | if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| @@ -1656,12 +1660,10 @@ errout: | |||
| 1656 | err = err2; | 1660 | err = err2; |
| 1657 | 1661 | ||
| 1658 | if (!err) { | 1662 | if (!err) { |
| 1659 | ext4_fsblk_t first_block; | ||
| 1660 | first_block = ext4_group_first_block_no(sb, 0); | ||
| 1661 | if (test_opt(sb, DEBUG)) | 1663 | if (test_opt(sb, DEBUG)) |
| 1662 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " | 1664 | printk(KERN_DEBUG "EXT4-fs: extended group to %llu " |
| 1663 | "blocks\n", ext4_blocks_count(es)); | 1665 | "blocks\n", ext4_blocks_count(es)); |
| 1664 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, | 1666 | update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, |
| 1665 | (char *)es, sizeof(struct ext4_super_block), 0); | 1667 | (char *)es, sizeof(struct ext4_super_block), 0); |
| 1666 | } | 1668 | } |
| 1667 | return err; | 1669 | return err; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..85b3dd60169b 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -69,6 +69,7 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
| 69 | static void ext4_clear_journal_err(struct super_block *sb, | 69 | static void ext4_clear_journal_err(struct super_block *sb, |
| 70 | struct ext4_super_block *es); | 70 | struct ext4_super_block *es); |
| 71 | static int ext4_sync_fs(struct super_block *sb, int wait); | 71 | static int ext4_sync_fs(struct super_block *sb, int wait); |
| 72 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait); | ||
| 72 | static int ext4_remount(struct super_block *sb, int *flags, char *data); | 73 | static int ext4_remount(struct super_block *sb, int *flags, char *data); |
| 73 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); | 74 | static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); |
| 74 | static int ext4_unfreeze(struct super_block *sb); | 75 | static int ext4_unfreeze(struct super_block *sb); |
| @@ -398,6 +399,11 @@ static void ext4_handle_error(struct super_block *sb) | |||
| 398 | } | 399 | } |
| 399 | if (test_opt(sb, ERRORS_RO)) { | 400 | if (test_opt(sb, ERRORS_RO)) { |
| 400 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 401 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
| 402 | /* | ||
| 403 | * Make sure updated value of ->s_mount_flags will be visible | ||
| 404 | * before ->s_flags update | ||
| 405 | */ | ||
| 406 | smp_wmb(); | ||
| 401 | sb->s_flags |= MS_RDONLY; | 407 | sb->s_flags |= MS_RDONLY; |
| 402 | } | 408 | } |
| 403 | if (test_opt(sb, ERRORS_PANIC)) | 409 | if (test_opt(sb, ERRORS_PANIC)) |
| @@ -422,9 +428,9 @@ void __ext4_error(struct super_block *sb, const char *function, | |||
| 422 | ext4_handle_error(sb); | 428 | ext4_handle_error(sb); |
| 423 | } | 429 | } |
| 424 | 430 | ||
| 425 | void ext4_error_inode(struct inode *inode, const char *function, | 431 | void __ext4_error_inode(struct inode *inode, const char *function, |
| 426 | unsigned int line, ext4_fsblk_t block, | 432 | unsigned int line, ext4_fsblk_t block, |
| 427 | const char *fmt, ...) | 433 | const char *fmt, ...) |
| 428 | { | 434 | { |
| 429 | va_list args; | 435 | va_list args; |
| 430 | struct va_format vaf; | 436 | struct va_format vaf; |
| @@ -451,9 +457,9 @@ void ext4_error_inode(struct inode *inode, const char *function, | |||
| 451 | ext4_handle_error(inode->i_sb); | 457 | ext4_handle_error(inode->i_sb); |
| 452 | } | 458 | } |
| 453 | 459 | ||
| 454 | void ext4_error_file(struct file *file, const char *function, | 460 | void __ext4_error_file(struct file *file, const char *function, |
| 455 | unsigned int line, ext4_fsblk_t block, | 461 | unsigned int line, ext4_fsblk_t block, |
| 456 | const char *fmt, ...) | 462 | const char *fmt, ...) |
| 457 | { | 463 | { |
| 458 | va_list args; | 464 | va_list args; |
| 459 | struct va_format vaf; | 465 | struct va_format vaf; |
| @@ -570,8 +576,13 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
| 570 | 576 | ||
| 571 | if ((sb->s_flags & MS_RDONLY) == 0) { | 577 | if ((sb->s_flags & MS_RDONLY) == 0) { |
| 572 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); | 578 | ext4_msg(sb, KERN_CRIT, "Remounting filesystem read-only"); |
| 573 | sb->s_flags |= MS_RDONLY; | ||
| 574 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; | 579 | EXT4_SB(sb)->s_mount_flags |= EXT4_MF_FS_ABORTED; |
| 580 | /* | ||
| 581 | * Make sure updated value of ->s_mount_flags will be visible | ||
| 582 | * before ->s_flags update | ||
| 583 | */ | ||
| 584 | smp_wmb(); | ||
| 585 | sb->s_flags |= MS_RDONLY; | ||
| 575 | if (EXT4_SB(sb)->s_journal) | 586 | if (EXT4_SB(sb)->s_journal) |
| 576 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); | 587 | jbd2_journal_abort(EXT4_SB(sb)->s_journal, -EIO); |
| 577 | save_error_info(sb, function, line); | 588 | save_error_info(sb, function, line); |
| @@ -580,7 +591,8 @@ void __ext4_abort(struct super_block *sb, const char *function, | |||
| 580 | panic("EXT4-fs panic from previous error\n"); | 591 | panic("EXT4-fs panic from previous error\n"); |
| 581 | } | 592 | } |
| 582 | 593 | ||
| 583 | void ext4_msg(struct super_block *sb, const char *prefix, const char *fmt, ...) | 594 | void __ext4_msg(struct super_block *sb, |
| 595 | const char *prefix, const char *fmt, ...) | ||
| 584 | { | 596 | { |
| 585 | struct va_format vaf; | 597 | struct va_format vaf; |
| 586 | va_list args; | 598 | va_list args; |
| @@ -750,8 +762,10 @@ static void ext4_put_super(struct super_block *sb) | |||
| 750 | ext4_unregister_li_request(sb); | 762 | ext4_unregister_li_request(sb); |
| 751 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); | 763 | dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED); |
| 752 | 764 | ||
| 753 | flush_workqueue(sbi->dio_unwritten_wq); | 765 | flush_workqueue(sbi->unrsv_conversion_wq); |
| 754 | destroy_workqueue(sbi->dio_unwritten_wq); | 766 | flush_workqueue(sbi->rsv_conversion_wq); |
| 767 | destroy_workqueue(sbi->unrsv_conversion_wq); | ||
| 768 | destroy_workqueue(sbi->rsv_conversion_wq); | ||
| 755 | 769 | ||
| 756 | if (sbi->s_journal) { | 770 | if (sbi->s_journal) { |
| 757 | err = jbd2_journal_destroy(sbi->s_journal); | 771 | err = jbd2_journal_destroy(sbi->s_journal); |
| @@ -760,7 +774,7 @@ static void ext4_put_super(struct super_block *sb) | |||
| 760 | ext4_abort(sb, "Couldn't clean up the journal"); | 774 | ext4_abort(sb, "Couldn't clean up the journal"); |
| 761 | } | 775 | } |
| 762 | 776 | ||
| 763 | ext4_es_unregister_shrinker(sb); | 777 | ext4_es_unregister_shrinker(sbi); |
| 764 | del_timer(&sbi->s_err_report); | 778 | del_timer(&sbi->s_err_report); |
| 765 | ext4_release_system_zone(sb); | 779 | ext4_release_system_zone(sb); |
| 766 | ext4_mb_release(sb); | 780 | ext4_mb_release(sb); |
| @@ -849,6 +863,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 849 | rwlock_init(&ei->i_es_lock); | 863 | rwlock_init(&ei->i_es_lock); |
| 850 | INIT_LIST_HEAD(&ei->i_es_lru); | 864 | INIT_LIST_HEAD(&ei->i_es_lru); |
| 851 | ei->i_es_lru_nr = 0; | 865 | ei->i_es_lru_nr = 0; |
| 866 | ei->i_touch_when = 0; | ||
| 852 | ei->i_reserved_data_blocks = 0; | 867 | ei->i_reserved_data_blocks = 0; |
| 853 | ei->i_reserved_meta_blocks = 0; | 868 | ei->i_reserved_meta_blocks = 0; |
| 854 | ei->i_allocated_meta_blocks = 0; | 869 | ei->i_allocated_meta_blocks = 0; |
| @@ -859,13 +874,15 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 859 | ei->i_reserved_quota = 0; | 874 | ei->i_reserved_quota = 0; |
| 860 | #endif | 875 | #endif |
| 861 | ei->jinode = NULL; | 876 | ei->jinode = NULL; |
| 862 | INIT_LIST_HEAD(&ei->i_completed_io_list); | 877 | INIT_LIST_HEAD(&ei->i_rsv_conversion_list); |
| 878 | INIT_LIST_HEAD(&ei->i_unrsv_conversion_list); | ||
| 863 | spin_lock_init(&ei->i_completed_io_lock); | 879 | spin_lock_init(&ei->i_completed_io_lock); |
| 864 | ei->i_sync_tid = 0; | 880 | ei->i_sync_tid = 0; |
| 865 | ei->i_datasync_tid = 0; | 881 | ei->i_datasync_tid = 0; |
| 866 | atomic_set(&ei->i_ioend_count, 0); | 882 | atomic_set(&ei->i_ioend_count, 0); |
| 867 | atomic_set(&ei->i_unwritten, 0); | 883 | atomic_set(&ei->i_unwritten, 0); |
| 868 | INIT_WORK(&ei->i_unwritten_work, ext4_end_io_work); | 884 | INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work); |
| 885 | INIT_WORK(&ei->i_unrsv_conversion_work, ext4_end_io_unrsv_work); | ||
| 869 | 886 | ||
| 870 | return &ei->vfs_inode; | 887 | return &ei->vfs_inode; |
| 871 | } | 888 | } |
| @@ -1093,6 +1110,7 @@ static const struct super_operations ext4_nojournal_sops = { | |||
| 1093 | .dirty_inode = ext4_dirty_inode, | 1110 | .dirty_inode = ext4_dirty_inode, |
| 1094 | .drop_inode = ext4_drop_inode, | 1111 | .drop_inode = ext4_drop_inode, |
| 1095 | .evict_inode = ext4_evict_inode, | 1112 | .evict_inode = ext4_evict_inode, |
| 1113 | .sync_fs = ext4_sync_fs_nojournal, | ||
| 1096 | .put_super = ext4_put_super, | 1114 | .put_super = ext4_put_super, |
| 1097 | .statfs = ext4_statfs, | 1115 | .statfs = ext4_statfs, |
| 1098 | .remount_fs = ext4_remount, | 1116 | .remount_fs = ext4_remount, |
| @@ -1908,7 +1926,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 1908 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1926 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 1909 | struct ext4_group_desc *gdp = NULL; | 1927 | struct ext4_group_desc *gdp = NULL; |
| 1910 | ext4_group_t flex_group; | 1928 | ext4_group_t flex_group; |
| 1911 | unsigned int groups_per_flex = 0; | ||
| 1912 | int i, err; | 1929 | int i, err; |
| 1913 | 1930 | ||
| 1914 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1931 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
| @@ -1916,7 +1933,6 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 1916 | sbi->s_log_groups_per_flex = 0; | 1933 | sbi->s_log_groups_per_flex = 0; |
| 1917 | return 1; | 1934 | return 1; |
| 1918 | } | 1935 | } |
| 1919 | groups_per_flex = 1U << sbi->s_log_groups_per_flex; | ||
| 1920 | 1936 | ||
| 1921 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); | 1937 | err = ext4_alloc_flex_bg_array(sb, sbi->s_groups_count); |
| 1922 | if (err) | 1938 | if (err) |
| @@ -2164,19 +2180,22 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 2164 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); | 2180 | list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); |
| 2165 | dquot_initialize(inode); | 2181 | dquot_initialize(inode); |
| 2166 | if (inode->i_nlink) { | 2182 | if (inode->i_nlink) { |
| 2167 | ext4_msg(sb, KERN_DEBUG, | 2183 | if (test_opt(sb, DEBUG)) |
| 2168 | "%s: truncating inode %lu to %lld bytes", | 2184 | ext4_msg(sb, KERN_DEBUG, |
| 2169 | __func__, inode->i_ino, inode->i_size); | 2185 | "%s: truncating inode %lu to %lld bytes", |
| 2186 | __func__, inode->i_ino, inode->i_size); | ||
| 2170 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", | 2187 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
| 2171 | inode->i_ino, inode->i_size); | 2188 | inode->i_ino, inode->i_size); |
| 2172 | mutex_lock(&inode->i_mutex); | 2189 | mutex_lock(&inode->i_mutex); |
| 2190 | truncate_inode_pages(inode->i_mapping, inode->i_size); | ||
| 2173 | ext4_truncate(inode); | 2191 | ext4_truncate(inode); |
| 2174 | mutex_unlock(&inode->i_mutex); | 2192 | mutex_unlock(&inode->i_mutex); |
| 2175 | nr_truncates++; | 2193 | nr_truncates++; |
| 2176 | } else { | 2194 | } else { |
| 2177 | ext4_msg(sb, KERN_DEBUG, | 2195 | if (test_opt(sb, DEBUG)) |
| 2178 | "%s: deleting unreferenced inode %lu", | 2196 | ext4_msg(sb, KERN_DEBUG, |
| 2179 | __func__, inode->i_ino); | 2197 | "%s: deleting unreferenced inode %lu", |
| 2198 | __func__, inode->i_ino); | ||
| 2180 | jbd_debug(2, "deleting unreferenced inode %lu\n", | 2199 | jbd_debug(2, "deleting unreferenced inode %lu\n", |
| 2181 | inode->i_ino); | 2200 | inode->i_ino); |
| 2182 | nr_orphans++; | 2201 | nr_orphans++; |
| @@ -2377,7 +2396,10 @@ struct ext4_attr { | |||
| 2377 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); | 2396 | ssize_t (*show)(struct ext4_attr *, struct ext4_sb_info *, char *); |
| 2378 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, | 2397 | ssize_t (*store)(struct ext4_attr *, struct ext4_sb_info *, |
| 2379 | const char *, size_t); | 2398 | const char *, size_t); |
| 2380 | int offset; | 2399 | union { |
| 2400 | int offset; | ||
| 2401 | int deprecated_val; | ||
| 2402 | } u; | ||
| 2381 | }; | 2403 | }; |
| 2382 | 2404 | ||
| 2383 | static int parse_strtoull(const char *buf, | 2405 | static int parse_strtoull(const char *buf, |
| @@ -2446,7 +2468,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
| 2446 | static ssize_t sbi_ui_show(struct ext4_attr *a, | 2468 | static ssize_t sbi_ui_show(struct ext4_attr *a, |
| 2447 | struct ext4_sb_info *sbi, char *buf) | 2469 | struct ext4_sb_info *sbi, char *buf) |
| 2448 | { | 2470 | { |
| 2449 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2471 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
| 2450 | 2472 | ||
| 2451 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); | 2473 | return snprintf(buf, PAGE_SIZE, "%u\n", *ui); |
| 2452 | } | 2474 | } |
| @@ -2455,7 +2477,7 @@ static ssize_t sbi_ui_store(struct ext4_attr *a, | |||
| 2455 | struct ext4_sb_info *sbi, | 2477 | struct ext4_sb_info *sbi, |
| 2456 | const char *buf, size_t count) | 2478 | const char *buf, size_t count) |
| 2457 | { | 2479 | { |
| 2458 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->offset); | 2480 | unsigned int *ui = (unsigned int *) (((char *) sbi) + a->u.offset); |
| 2459 | unsigned long t; | 2481 | unsigned long t; |
| 2460 | int ret; | 2482 | int ret; |
| 2461 | 2483 | ||
| @@ -2504,12 +2526,20 @@ static ssize_t trigger_test_error(struct ext4_attr *a, | |||
| 2504 | return count; | 2526 | return count; |
| 2505 | } | 2527 | } |
| 2506 | 2528 | ||
| 2529 | static ssize_t sbi_deprecated_show(struct ext4_attr *a, | ||
| 2530 | struct ext4_sb_info *sbi, char *buf) | ||
| 2531 | { | ||
| 2532 | return snprintf(buf, PAGE_SIZE, "%d\n", a->u.deprecated_val); | ||
| 2533 | } | ||
| 2534 | |||
| 2507 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ | 2535 | #define EXT4_ATTR_OFFSET(_name,_mode,_show,_store,_elname) \ |
| 2508 | static struct ext4_attr ext4_attr_##_name = { \ | 2536 | static struct ext4_attr ext4_attr_##_name = { \ |
| 2509 | .attr = {.name = __stringify(_name), .mode = _mode }, \ | 2537 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
| 2510 | .show = _show, \ | 2538 | .show = _show, \ |
| 2511 | .store = _store, \ | 2539 | .store = _store, \ |
| 2512 | .offset = offsetof(struct ext4_sb_info, _elname), \ | 2540 | .u = { \ |
| 2541 | .offset = offsetof(struct ext4_sb_info, _elname),\ | ||
| 2542 | }, \ | ||
| 2513 | } | 2543 | } |
| 2514 | #define EXT4_ATTR(name, mode, show, store) \ | 2544 | #define EXT4_ATTR(name, mode, show, store) \ |
| 2515 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | 2545 | static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) |
| @@ -2520,6 +2550,14 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) | |||
| 2520 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ | 2550 | #define EXT4_RW_ATTR_SBI_UI(name, elname) \ |
| 2521 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) | 2551 | EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) |
| 2522 | #define ATTR_LIST(name) &ext4_attr_##name.attr | 2552 | #define ATTR_LIST(name) &ext4_attr_##name.attr |
| 2553 | #define EXT4_DEPRECATED_ATTR(_name, _val) \ | ||
| 2554 | static struct ext4_attr ext4_attr_##_name = { \ | ||
| 2555 | .attr = {.name = __stringify(_name), .mode = 0444 }, \ | ||
| 2556 | .show = sbi_deprecated_show, \ | ||
| 2557 | .u = { \ | ||
| 2558 | .deprecated_val = _val, \ | ||
| 2559 | }, \ | ||
| 2560 | } | ||
| 2523 | 2561 | ||
| 2524 | EXT4_RO_ATTR(delayed_allocation_blocks); | 2562 | EXT4_RO_ATTR(delayed_allocation_blocks); |
| 2525 | EXT4_RO_ATTR(session_write_kbytes); | 2563 | EXT4_RO_ATTR(session_write_kbytes); |
| @@ -2534,7 +2572,7 @@ EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); | |||
| 2534 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); | 2572 | EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); |
| 2535 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); | 2573 | EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); |
| 2536 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); | 2574 | EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); |
| 2537 | EXT4_RW_ATTR_SBI_UI(max_writeback_mb_bump, s_max_writeback_mb_bump); | 2575 | EXT4_DEPRECATED_ATTR(max_writeback_mb_bump, 128); |
| 2538 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); | 2576 | EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); |
| 2539 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); | 2577 | EXT4_ATTR(trigger_fs_error, 0200, NULL, trigger_test_error); |
| 2540 | 2578 | ||
| @@ -3763,7 +3801,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3763 | sbi->s_err_report.data = (unsigned long) sb; | 3801 | sbi->s_err_report.data = (unsigned long) sb; |
| 3764 | 3802 | ||
| 3765 | /* Register extent status tree shrinker */ | 3803 | /* Register extent status tree shrinker */ |
| 3766 | ext4_es_register_shrinker(sb); | 3804 | ext4_es_register_shrinker(sbi); |
| 3767 | 3805 | ||
| 3768 | err = percpu_counter_init(&sbi->s_freeclusters_counter, | 3806 | err = percpu_counter_init(&sbi->s_freeclusters_counter, |
| 3769 | ext4_count_free_clusters(sb)); | 3807 | ext4_count_free_clusters(sb)); |
| @@ -3787,7 +3825,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 3787 | } | 3825 | } |
| 3788 | 3826 | ||
| 3789 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 3827 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
| 3790 | sbi->s_max_writeback_mb_bump = 128; | ||
| 3791 | sbi->s_extent_max_zeroout_kb = 32; | 3828 | sbi->s_extent_max_zeroout_kb = 32; |
| 3792 | 3829 | ||
| 3793 | /* | 3830 | /* |
| @@ -3915,12 +3952,20 @@ no_journal: | |||
| 3915 | * The maximum number of concurrent works can be high and | 3952 | * The maximum number of concurrent works can be high and |
| 3916 | * concurrency isn't really necessary. Limit it to 1. | 3953 | * concurrency isn't really necessary. Limit it to 1. |
| 3917 | */ | 3954 | */ |
| 3918 | EXT4_SB(sb)->dio_unwritten_wq = | 3955 | EXT4_SB(sb)->rsv_conversion_wq = |
| 3919 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | 3956 | alloc_workqueue("ext4-rsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
| 3920 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3957 | if (!EXT4_SB(sb)->rsv_conversion_wq) { |
| 3921 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3958 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); |
| 3922 | ret = -ENOMEM; | 3959 | ret = -ENOMEM; |
| 3923 | goto failed_mount_wq; | 3960 | goto failed_mount4; |
| 3961 | } | ||
| 3962 | |||
| 3963 | EXT4_SB(sb)->unrsv_conversion_wq = | ||
| 3964 | alloc_workqueue("ext4-unrsv-conversion", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); | ||
| 3965 | if (!EXT4_SB(sb)->unrsv_conversion_wq) { | ||
| 3966 | printk(KERN_ERR "EXT4-fs: failed to create workqueue\n"); | ||
| 3967 | ret = -ENOMEM; | ||
| 3968 | goto failed_mount4; | ||
| 3924 | } | 3969 | } |
| 3925 | 3970 | ||
| 3926 | /* | 3971 | /* |
| @@ -4074,14 +4119,17 @@ failed_mount4a: | |||
| 4074 | sb->s_root = NULL; | 4119 | sb->s_root = NULL; |
| 4075 | failed_mount4: | 4120 | failed_mount4: |
| 4076 | ext4_msg(sb, KERN_ERR, "mount failed"); | 4121 | ext4_msg(sb, KERN_ERR, "mount failed"); |
| 4077 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 4122 | if (EXT4_SB(sb)->rsv_conversion_wq) |
| 4123 | destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
| 4124 | if (EXT4_SB(sb)->unrsv_conversion_wq) | ||
| 4125 | destroy_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
| 4078 | failed_mount_wq: | 4126 | failed_mount_wq: |
| 4079 | if (sbi->s_journal) { | 4127 | if (sbi->s_journal) { |
| 4080 | jbd2_journal_destroy(sbi->s_journal); | 4128 | jbd2_journal_destroy(sbi->s_journal); |
| 4081 | sbi->s_journal = NULL; | 4129 | sbi->s_journal = NULL; |
| 4082 | } | 4130 | } |
| 4083 | failed_mount3: | 4131 | failed_mount3: |
| 4084 | ext4_es_unregister_shrinker(sb); | 4132 | ext4_es_unregister_shrinker(sbi); |
| 4085 | del_timer(&sbi->s_err_report); | 4133 | del_timer(&sbi->s_err_report); |
| 4086 | if (sbi->s_flex_groups) | 4134 | if (sbi->s_flex_groups) |
| 4087 | ext4_kvfree(sbi->s_flex_groups); | 4135 | ext4_kvfree(sbi->s_flex_groups); |
| @@ -4517,19 +4565,52 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
| 4517 | { | 4565 | { |
| 4518 | int ret = 0; | 4566 | int ret = 0; |
| 4519 | tid_t target; | 4567 | tid_t target; |
| 4568 | bool needs_barrier = false; | ||
| 4520 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4569 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 4521 | 4570 | ||
| 4522 | trace_ext4_sync_fs(sb, wait); | 4571 | trace_ext4_sync_fs(sb, wait); |
| 4523 | flush_workqueue(sbi->dio_unwritten_wq); | 4572 | flush_workqueue(sbi->rsv_conversion_wq); |
| 4573 | flush_workqueue(sbi->unrsv_conversion_wq); | ||
| 4524 | /* | 4574 | /* |
| 4525 | * Writeback quota in non-journalled quota case - journalled quota has | 4575 | * Writeback quota in non-journalled quota case - journalled quota has |
| 4526 | * no dirty dquots | 4576 | * no dirty dquots |
| 4527 | */ | 4577 | */ |
| 4528 | dquot_writeback_dquots(sb, -1); | 4578 | dquot_writeback_dquots(sb, -1); |
| 4579 | /* | ||
| 4580 | * Data writeback is possible w/o journal transaction, so barrier must | ||
| 4581 | * being sent at the end of the function. But we can skip it if | ||
| 4582 | * transaction_commit will do it for us. | ||
| 4583 | */ | ||
| 4584 | target = jbd2_get_latest_transaction(sbi->s_journal); | ||
| 4585 | if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && | ||
| 4586 | !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) | ||
| 4587 | needs_barrier = true; | ||
| 4588 | |||
| 4529 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { | 4589 | if (jbd2_journal_start_commit(sbi->s_journal, &target)) { |
| 4530 | if (wait) | 4590 | if (wait) |
| 4531 | jbd2_log_wait_commit(sbi->s_journal, target); | 4591 | ret = jbd2_log_wait_commit(sbi->s_journal, target); |
| 4592 | } | ||
| 4593 | if (needs_barrier) { | ||
| 4594 | int err; | ||
| 4595 | err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
| 4596 | if (!ret) | ||
| 4597 | ret = err; | ||
| 4532 | } | 4598 | } |
| 4599 | |||
| 4600 | return ret; | ||
| 4601 | } | ||
| 4602 | |||
| 4603 | static int ext4_sync_fs_nojournal(struct super_block *sb, int wait) | ||
| 4604 | { | ||
| 4605 | int ret = 0; | ||
| 4606 | |||
| 4607 | trace_ext4_sync_fs(sb, wait); | ||
| 4608 | flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq); | ||
| 4609 | flush_workqueue(EXT4_SB(sb)->unrsv_conversion_wq); | ||
| 4610 | dquot_writeback_dquots(sb, -1); | ||
| 4611 | if (wait && test_opt(sb, BARRIER)) | ||
| 4612 | ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); | ||
| 4613 | |||
| 4533 | return ret; | 4614 | return ret; |
| 4534 | } | 4615 | } |
| 4535 | 4616 | ||
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index fd27e7e6326e..e06e0995e00f 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig | |||
| @@ -51,3 +51,15 @@ config F2FS_FS_POSIX_ACL | |||
| 51 | Linux website <http://acl.bestbits.at/>. | 51 | Linux website <http://acl.bestbits.at/>. |
| 52 | 52 | ||
| 53 | If you don't know what Access Control Lists are, say N | 53 | If you don't know what Access Control Lists are, say N |
| 54 | |||
| 55 | config F2FS_FS_SECURITY | ||
| 56 | bool "F2FS Security Labels" | ||
| 57 | depends on F2FS_FS_XATTR | ||
| 58 | help | ||
| 59 | Security labels provide an access control facility to support Linux | ||
| 60 | Security Models (LSMs) accepted by AppArmor, SELinux, Smack and TOMOYO | ||
| 61 | Linux. This option enables an extended attribute handler for file | ||
| 62 | security labels in the f2fs filesystem, so that it requires enabling | ||
| 63 | the extended attribute support in advance. | ||
| 64 | |||
| 65 | If you are not using a security module, say N. | ||
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c index 44abc2f286e0..b7826ec1b470 100644 --- a/fs/f2fs/acl.c +++ b/fs/f2fs/acl.c | |||
| @@ -250,7 +250,7 @@ static int f2fs_set_acl(struct inode *inode, int type, struct posix_acl *acl) | |||
| 250 | } | 250 | } |
| 251 | } | 251 | } |
| 252 | 252 | ||
| 253 | error = f2fs_setxattr(inode, name_index, "", value, size); | 253 | error = f2fs_setxattr(inode, name_index, "", value, size, NULL); |
| 254 | 254 | ||
| 255 | kfree(value); | 255 | kfree(value); |
| 256 | if (!error) | 256 | if (!error) |
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b1de01da1a40..66a6b85a51d8 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c | |||
| @@ -357,8 +357,8 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
| 357 | unsigned long blk_size = sbi->blocksize; | 357 | unsigned long blk_size = sbi->blocksize; |
| 358 | struct f2fs_checkpoint *cp_block; | 358 | struct f2fs_checkpoint *cp_block; |
| 359 | unsigned long long cur_version = 0, pre_version = 0; | 359 | unsigned long long cur_version = 0, pre_version = 0; |
| 360 | unsigned int crc = 0; | ||
| 361 | size_t crc_offset; | 360 | size_t crc_offset; |
| 361 | __u32 crc = 0; | ||
| 362 | 362 | ||
| 363 | /* Read the 1st cp block in this CP pack */ | 363 | /* Read the 1st cp block in this CP pack */ |
| 364 | cp_page_1 = get_meta_page(sbi, cp_addr); | 364 | cp_page_1 = get_meta_page(sbi, cp_addr); |
| @@ -369,7 +369,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
| 369 | if (crc_offset >= blk_size) | 369 | if (crc_offset >= blk_size) |
| 370 | goto invalid_cp1; | 370 | goto invalid_cp1; |
| 371 | 371 | ||
| 372 | crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); | 372 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); |
| 373 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 373 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
| 374 | goto invalid_cp1; | 374 | goto invalid_cp1; |
| 375 | 375 | ||
| @@ -384,7 +384,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi, | |||
| 384 | if (crc_offset >= blk_size) | 384 | if (crc_offset >= blk_size) |
| 385 | goto invalid_cp2; | 385 | goto invalid_cp2; |
| 386 | 386 | ||
| 387 | crc = *(unsigned int *)((unsigned char *)cp_block + crc_offset); | 387 | crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset))); |
| 388 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) | 388 | if (!f2fs_crc_valid(crc, cp_block, crc_offset)) |
| 389 | goto invalid_cp2; | 389 | goto invalid_cp2; |
| 390 | 390 | ||
| @@ -450,13 +450,30 @@ fail_no_cp: | |||
| 450 | return -EINVAL; | 450 | return -EINVAL; |
| 451 | } | 451 | } |
| 452 | 452 | ||
| 453 | void set_dirty_dir_page(struct inode *inode, struct page *page) | 453 | static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new) |
| 454 | { | 454 | { |
| 455 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 455 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| 456 | struct list_head *head = &sbi->dir_inode_list; | 456 | struct list_head *head = &sbi->dir_inode_list; |
| 457 | struct dir_inode_entry *new; | ||
| 458 | struct list_head *this; | 457 | struct list_head *this; |
| 459 | 458 | ||
| 459 | list_for_each(this, head) { | ||
| 460 | struct dir_inode_entry *entry; | ||
| 461 | entry = list_entry(this, struct dir_inode_entry, list); | ||
| 462 | if (entry->inode == inode) | ||
| 463 | return -EEXIST; | ||
| 464 | } | ||
| 465 | list_add_tail(&new->list, head); | ||
| 466 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 467 | sbi->n_dirty_dirs++; | ||
| 468 | #endif | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | void set_dirty_dir_page(struct inode *inode, struct page *page) | ||
| 473 | { | ||
| 474 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
| 475 | struct dir_inode_entry *new; | ||
| 476 | |||
| 460 | if (!S_ISDIR(inode->i_mode)) | 477 | if (!S_ISDIR(inode->i_mode)) |
| 461 | return; | 478 | return; |
| 462 | retry: | 479 | retry: |
| @@ -469,23 +486,31 @@ retry: | |||
| 469 | INIT_LIST_HEAD(&new->list); | 486 | INIT_LIST_HEAD(&new->list); |
| 470 | 487 | ||
| 471 | spin_lock(&sbi->dir_inode_lock); | 488 | spin_lock(&sbi->dir_inode_lock); |
| 472 | list_for_each(this, head) { | 489 | if (__add_dirty_inode(inode, new)) |
| 473 | struct dir_inode_entry *entry; | 490 | kmem_cache_free(inode_entry_slab, new); |
| 474 | entry = list_entry(this, struct dir_inode_entry, list); | ||
| 475 | if (entry->inode == inode) { | ||
| 476 | kmem_cache_free(inode_entry_slab, new); | ||
| 477 | goto out; | ||
| 478 | } | ||
| 479 | } | ||
| 480 | list_add_tail(&new->list, head); | ||
| 481 | sbi->n_dirty_dirs++; | ||
| 482 | 491 | ||
| 483 | BUG_ON(!S_ISDIR(inode->i_mode)); | ||
| 484 | out: | ||
| 485 | inc_page_count(sbi, F2FS_DIRTY_DENTS); | 492 | inc_page_count(sbi, F2FS_DIRTY_DENTS); |
| 486 | inode_inc_dirty_dents(inode); | 493 | inode_inc_dirty_dents(inode); |
| 487 | SetPagePrivate(page); | 494 | SetPagePrivate(page); |
| 495 | spin_unlock(&sbi->dir_inode_lock); | ||
| 496 | } | ||
| 488 | 497 | ||
| 498 | void add_dirty_dir_inode(struct inode *inode) | ||
| 499 | { | ||
| 500 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | ||
| 501 | struct dir_inode_entry *new; | ||
| 502 | retry: | ||
| 503 | new = kmem_cache_alloc(inode_entry_slab, GFP_NOFS); | ||
| 504 | if (!new) { | ||
| 505 | cond_resched(); | ||
| 506 | goto retry; | ||
| 507 | } | ||
| 508 | new->inode = inode; | ||
| 509 | INIT_LIST_HEAD(&new->list); | ||
| 510 | |||
| 511 | spin_lock(&sbi->dir_inode_lock); | ||
| 512 | if (__add_dirty_inode(inode, new)) | ||
| 513 | kmem_cache_free(inode_entry_slab, new); | ||
| 489 | spin_unlock(&sbi->dir_inode_lock); | 514 | spin_unlock(&sbi->dir_inode_lock); |
| 490 | } | 515 | } |
| 491 | 516 | ||
| @@ -499,8 +524,10 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
| 499 | return; | 524 | return; |
| 500 | 525 | ||
| 501 | spin_lock(&sbi->dir_inode_lock); | 526 | spin_lock(&sbi->dir_inode_lock); |
| 502 | if (atomic_read(&F2FS_I(inode)->dirty_dents)) | 527 | if (atomic_read(&F2FS_I(inode)->dirty_dents)) { |
| 503 | goto out; | 528 | spin_unlock(&sbi->dir_inode_lock); |
| 529 | return; | ||
| 530 | } | ||
| 504 | 531 | ||
| 505 | list_for_each(this, head) { | 532 | list_for_each(this, head) { |
| 506 | struct dir_inode_entry *entry; | 533 | struct dir_inode_entry *entry; |
| @@ -508,12 +535,38 @@ void remove_dirty_dir_inode(struct inode *inode) | |||
| 508 | if (entry->inode == inode) { | 535 | if (entry->inode == inode) { |
| 509 | list_del(&entry->list); | 536 | list_del(&entry->list); |
| 510 | kmem_cache_free(inode_entry_slab, entry); | 537 | kmem_cache_free(inode_entry_slab, entry); |
| 538 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 511 | sbi->n_dirty_dirs--; | 539 | sbi->n_dirty_dirs--; |
| 540 | #endif | ||
| 541 | break; | ||
| 542 | } | ||
| 543 | } | ||
| 544 | spin_unlock(&sbi->dir_inode_lock); | ||
| 545 | |||
| 546 | /* Only from the recovery routine */ | ||
| 547 | if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) { | ||
| 548 | clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT); | ||
| 549 | iput(inode); | ||
| 550 | } | ||
| 551 | } | ||
| 552 | |||
| 553 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *sbi, nid_t ino) | ||
| 554 | { | ||
| 555 | struct list_head *head = &sbi->dir_inode_list; | ||
| 556 | struct list_head *this; | ||
| 557 | struct inode *inode = NULL; | ||
| 558 | |||
| 559 | spin_lock(&sbi->dir_inode_lock); | ||
| 560 | list_for_each(this, head) { | ||
| 561 | struct dir_inode_entry *entry; | ||
| 562 | entry = list_entry(this, struct dir_inode_entry, list); | ||
| 563 | if (entry->inode->i_ino == ino) { | ||
| 564 | inode = entry->inode; | ||
| 512 | break; | 565 | break; |
| 513 | } | 566 | } |
| 514 | } | 567 | } |
| 515 | out: | ||
| 516 | spin_unlock(&sbi->dir_inode_lock); | 568 | spin_unlock(&sbi->dir_inode_lock); |
| 569 | return inode; | ||
| 517 | } | 570 | } |
| 518 | 571 | ||
| 519 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) | 572 | void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi) |
| @@ -595,7 +648,7 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
| 595 | block_t start_blk; | 648 | block_t start_blk; |
| 596 | struct page *cp_page; | 649 | struct page *cp_page; |
| 597 | unsigned int data_sum_blocks, orphan_blocks; | 650 | unsigned int data_sum_blocks, orphan_blocks; |
| 598 | unsigned int crc32 = 0; | 651 | __u32 crc32 = 0; |
| 599 | void *kaddr; | 652 | void *kaddr; |
| 600 | int i; | 653 | int i; |
| 601 | 654 | ||
| @@ -664,8 +717,8 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount) | |||
| 664 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); | 717 | get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP)); |
| 665 | 718 | ||
| 666 | crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); | 719 | crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset)); |
| 667 | *(__le32 *)((unsigned char *)ckpt + | 720 | *((__le32 *)((unsigned char *)ckpt + |
| 668 | le32_to_cpu(ckpt->checksum_offset)) | 721 | le32_to_cpu(ckpt->checksum_offset))) |
| 669 | = cpu_to_le32(crc32); | 722 | = cpu_to_le32(crc32); |
| 670 | 723 | ||
| 671 | start_blk = __start_cp_addr(sbi); | 724 | start_blk = __start_cp_addr(sbi); |
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 91ff93b0b0f4..035f9a345cdf 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c | |||
| @@ -68,7 +68,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
| 68 | struct buffer_head *bh_result) | 68 | struct buffer_head *bh_result) |
| 69 | { | 69 | { |
| 70 | struct f2fs_inode_info *fi = F2FS_I(inode); | 70 | struct f2fs_inode_info *fi = F2FS_I(inode); |
| 71 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 71 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 72 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| 73 | #endif | ||
| 72 | pgoff_t start_fofs, end_fofs; | 74 | pgoff_t start_fofs, end_fofs; |
| 73 | block_t start_blkaddr; | 75 | block_t start_blkaddr; |
| 74 | 76 | ||
| @@ -78,7 +80,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
| 78 | return 0; | 80 | return 0; |
| 79 | } | 81 | } |
| 80 | 82 | ||
| 83 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 81 | sbi->total_hit_ext++; | 84 | sbi->total_hit_ext++; |
| 85 | #endif | ||
| 82 | start_fofs = fi->ext.fofs; | 86 | start_fofs = fi->ext.fofs; |
| 83 | end_fofs = fi->ext.fofs + fi->ext.len - 1; | 87 | end_fofs = fi->ext.fofs + fi->ext.len - 1; |
| 84 | start_blkaddr = fi->ext.blk_addr; | 88 | start_blkaddr = fi->ext.blk_addr; |
| @@ -96,7 +100,9 @@ static int check_extent_cache(struct inode *inode, pgoff_t pgofs, | |||
| 96 | else | 100 | else |
| 97 | bh_result->b_size = UINT_MAX; | 101 | bh_result->b_size = UINT_MAX; |
| 98 | 102 | ||
| 103 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 99 | sbi->read_hit_ext++; | 104 | sbi->read_hit_ext++; |
| 105 | #endif | ||
| 100 | read_unlock(&fi->ext.ext_lock); | 106 | read_unlock(&fi->ext.ext_lock); |
| 101 | return 1; | 107 | return 1; |
| 102 | } | 108 | } |
| @@ -199,7 +205,7 @@ struct page *find_data_page(struct inode *inode, pgoff_t index, bool sync) | |||
| 199 | if (dn.data_blkaddr == NEW_ADDR) | 205 | if (dn.data_blkaddr == NEW_ADDR) |
| 200 | return ERR_PTR(-EINVAL); | 206 | return ERR_PTR(-EINVAL); |
| 201 | 207 | ||
| 202 | page = grab_cache_page(mapping, index); | 208 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); |
| 203 | if (!page) | 209 | if (!page) |
| 204 | return ERR_PTR(-ENOMEM); | 210 | return ERR_PTR(-ENOMEM); |
| 205 | 211 | ||
| @@ -233,18 +239,23 @@ struct page *get_lock_data_page(struct inode *inode, pgoff_t index) | |||
| 233 | struct page *page; | 239 | struct page *page; |
| 234 | int err; | 240 | int err; |
| 235 | 241 | ||
| 242 | repeat: | ||
| 243 | page = grab_cache_page_write_begin(mapping, index, AOP_FLAG_NOFS); | ||
| 244 | if (!page) | ||
| 245 | return ERR_PTR(-ENOMEM); | ||
| 246 | |||
| 236 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 247 | set_new_dnode(&dn, inode, NULL, NULL, 0); |
| 237 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); | 248 | err = get_dnode_of_data(&dn, index, LOOKUP_NODE); |
| 238 | if (err) | 249 | if (err) { |
| 250 | f2fs_put_page(page, 1); | ||
| 239 | return ERR_PTR(err); | 251 | return ERR_PTR(err); |
| 252 | } | ||
| 240 | f2fs_put_dnode(&dn); | 253 | f2fs_put_dnode(&dn); |
| 241 | 254 | ||
| 242 | if (dn.data_blkaddr == NULL_ADDR) | 255 | if (dn.data_blkaddr == NULL_ADDR) { |
| 256 | f2fs_put_page(page, 1); | ||
| 243 | return ERR_PTR(-ENOENT); | 257 | return ERR_PTR(-ENOENT); |
| 244 | repeat: | 258 | } |
| 245 | page = grab_cache_page(mapping, index); | ||
| 246 | if (!page) | ||
| 247 | return ERR_PTR(-ENOMEM); | ||
| 248 | 259 | ||
| 249 | if (PageUptodate(page)) | 260 | if (PageUptodate(page)) |
| 250 | return page; | 261 | return page; |
| @@ -274,9 +285,10 @@ repeat: | |||
| 274 | * | 285 | * |
| 275 | * Also, caller should grab and release a mutex by calling mutex_lock_op() and | 286 | * Also, caller should grab and release a mutex by calling mutex_lock_op() and |
| 276 | * mutex_unlock_op(). | 287 | * mutex_unlock_op(). |
| 288 | * Note that, npage is set only by make_empty_dir. | ||
| 277 | */ | 289 | */ |
| 278 | struct page *get_new_data_page(struct inode *inode, pgoff_t index, | 290 | struct page *get_new_data_page(struct inode *inode, |
| 279 | bool new_i_size) | 291 | struct page *npage, pgoff_t index, bool new_i_size) |
| 280 | { | 292 | { |
| 281 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 293 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| 282 | struct address_space *mapping = inode->i_mapping; | 294 | struct address_space *mapping = inode->i_mapping; |
| @@ -284,18 +296,20 @@ struct page *get_new_data_page(struct inode *inode, pgoff_t index, | |||
| 284 | struct dnode_of_data dn; | 296 | struct dnode_of_data dn; |
| 285 | int err; | 297 | int err; |
| 286 | 298 | ||
| 287 | set_new_dnode(&dn, inode, NULL, NULL, 0); | 299 | set_new_dnode(&dn, inode, npage, npage, 0); |
| 288 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); | 300 | err = get_dnode_of_data(&dn, index, ALLOC_NODE); |
| 289 | if (err) | 301 | if (err) |
| 290 | return ERR_PTR(err); | 302 | return ERR_PTR(err); |
| 291 | 303 | ||
| 292 | if (dn.data_blkaddr == NULL_ADDR) { | 304 | if (dn.data_blkaddr == NULL_ADDR) { |
| 293 | if (reserve_new_block(&dn)) { | 305 | if (reserve_new_block(&dn)) { |
| 294 | f2fs_put_dnode(&dn); | 306 | if (!npage) |
| 307 | f2fs_put_dnode(&dn); | ||
| 295 | return ERR_PTR(-ENOSPC); | 308 | return ERR_PTR(-ENOSPC); |
| 296 | } | 309 | } |
| 297 | } | 310 | } |
| 298 | f2fs_put_dnode(&dn); | 311 | if (!npage) |
| 312 | f2fs_put_dnode(&dn); | ||
| 299 | repeat: | 313 | repeat: |
| 300 | page = grab_cache_page(mapping, index); | 314 | page = grab_cache_page(mapping, index); |
| 301 | if (!page) | 315 | if (!page) |
| @@ -325,6 +339,8 @@ repeat: | |||
| 325 | if (new_i_size && | 339 | if (new_i_size && |
| 326 | i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { | 340 | i_size_read(inode) < ((index + 1) << PAGE_CACHE_SHIFT)) { |
| 327 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); | 341 | i_size_write(inode, ((index + 1) << PAGE_CACHE_SHIFT)); |
| 342 | /* Only the directory inode sets new_i_size */ | ||
| 343 | set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR); | ||
| 328 | mark_inode_dirty_sync(inode); | 344 | mark_inode_dirty_sync(inode); |
| 329 | } | 345 | } |
| 330 | return page; | 346 | return page; |
| @@ -481,8 +497,9 @@ int do_write_data_page(struct page *page) | |||
| 481 | * If current allocation needs SSR, | 497 | * If current allocation needs SSR, |
| 482 | * it had better in-place writes for updated data. | 498 | * it had better in-place writes for updated data. |
| 483 | */ | 499 | */ |
| 484 | if (old_blk_addr != NEW_ADDR && !is_cold_data(page) && | 500 | if (unlikely(old_blk_addr != NEW_ADDR && |
| 485 | need_inplace_update(inode)) { | 501 | !is_cold_data(page) && |
| 502 | need_inplace_update(inode))) { | ||
| 486 | rewrite_data_page(F2FS_SB(inode->i_sb), page, | 503 | rewrite_data_page(F2FS_SB(inode->i_sb), page, |
| 487 | old_blk_addr); | 504 | old_blk_addr); |
| 488 | } else { | 505 | } else { |
| @@ -684,6 +701,27 @@ err: | |||
| 684 | return err; | 701 | return err; |
| 685 | } | 702 | } |
| 686 | 703 | ||
| 704 | static int f2fs_write_end(struct file *file, | ||
| 705 | struct address_space *mapping, | ||
| 706 | loff_t pos, unsigned len, unsigned copied, | ||
| 707 | struct page *page, void *fsdata) | ||
| 708 | { | ||
| 709 | struct inode *inode = page->mapping->host; | ||
| 710 | |||
| 711 | SetPageUptodate(page); | ||
| 712 | set_page_dirty(page); | ||
| 713 | |||
| 714 | if (pos + copied > i_size_read(inode)) { | ||
| 715 | i_size_write(inode, pos + copied); | ||
| 716 | mark_inode_dirty(inode); | ||
| 717 | update_inode_page(inode); | ||
| 718 | } | ||
| 719 | |||
| 720 | unlock_page(page); | ||
| 721 | page_cache_release(page); | ||
| 722 | return copied; | ||
| 723 | } | ||
| 724 | |||
| 687 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | 725 | static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, |
| 688 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) | 726 | const struct iovec *iov, loff_t offset, unsigned long nr_segs) |
| 689 | { | 727 | { |
| @@ -698,7 +736,8 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, | |||
| 698 | get_data_block_ro); | 736 | get_data_block_ro); |
| 699 | } | 737 | } |
| 700 | 738 | ||
| 701 | static void f2fs_invalidate_data_page(struct page *page, unsigned long offset) | 739 | static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, |
| 740 | unsigned int length) | ||
| 702 | { | 741 | { |
| 703 | struct inode *inode = page->mapping->host; | 742 | struct inode *inode = page->mapping->host; |
| 704 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 743 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| @@ -740,7 +779,7 @@ const struct address_space_operations f2fs_dblock_aops = { | |||
| 740 | .writepage = f2fs_write_data_page, | 779 | .writepage = f2fs_write_data_page, |
| 741 | .writepages = f2fs_write_data_pages, | 780 | .writepages = f2fs_write_data_pages, |
| 742 | .write_begin = f2fs_write_begin, | 781 | .write_begin = f2fs_write_begin, |
| 743 | .write_end = nobh_write_end, | 782 | .write_end = f2fs_write_end, |
| 744 | .set_page_dirty = f2fs_set_data_page_dirty, | 783 | .set_page_dirty = f2fs_set_data_page_dirty, |
| 745 | .invalidatepage = f2fs_invalidate_data_page, | 784 | .invalidatepage = f2fs_invalidate_data_page, |
| 746 | .releasepage = f2fs_release_data_page, | 785 | .releasepage = f2fs_release_data_page, |
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 8d9943786c31..0d6c6aafb235 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c | |||
| @@ -175,12 +175,12 @@ get_cache: | |||
| 175 | 175 | ||
| 176 | static int stat_show(struct seq_file *s, void *v) | 176 | static int stat_show(struct seq_file *s, void *v) |
| 177 | { | 177 | { |
| 178 | struct f2fs_stat_info *si, *next; | 178 | struct f2fs_stat_info *si; |
| 179 | int i = 0; | 179 | int i = 0; |
| 180 | int j; | 180 | int j; |
| 181 | 181 | ||
| 182 | mutex_lock(&f2fs_stat_mutex); | 182 | mutex_lock(&f2fs_stat_mutex); |
| 183 | list_for_each_entry_safe(si, next, &f2fs_stat_list, stat_list) { | 183 | list_for_each_entry(si, &f2fs_stat_list, stat_list) { |
| 184 | char devname[BDEVNAME_SIZE]; | 184 | char devname[BDEVNAME_SIZE]; |
| 185 | 185 | ||
| 186 | update_general_status(si->sbi); | 186 | update_general_status(si->sbi); |
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 1ac6b93036b7..9d1cd423450d 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c | |||
| @@ -13,6 +13,7 @@ | |||
| 13 | #include "f2fs.h" | 13 | #include "f2fs.h" |
| 14 | #include "node.h" | 14 | #include "node.h" |
| 15 | #include "acl.h" | 15 | #include "acl.h" |
| 16 | #include "xattr.h" | ||
| 16 | 17 | ||
| 17 | static unsigned long dir_blocks(struct inode *inode) | 18 | static unsigned long dir_blocks(struct inode *inode) |
| 18 | { | 19 | { |
| @@ -215,9 +216,9 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir, | |||
| 215 | 216 | ||
| 216 | struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) | 217 | struct f2fs_dir_entry *f2fs_parent_dir(struct inode *dir, struct page **p) |
| 217 | { | 218 | { |
| 218 | struct page *page = NULL; | 219 | struct page *page; |
| 219 | struct f2fs_dir_entry *de = NULL; | 220 | struct f2fs_dir_entry *de; |
| 220 | struct f2fs_dentry_block *dentry_blk = NULL; | 221 | struct f2fs_dentry_block *dentry_blk; |
| 221 | 222 | ||
| 222 | page = get_lock_data_page(dir, 0); | 223 | page = get_lock_data_page(dir, 0); |
| 223 | if (IS_ERR(page)) | 224 | if (IS_ERR(page)) |
| @@ -264,15 +265,10 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, | |||
| 264 | f2fs_put_page(page, 1); | 265 | f2fs_put_page(page, 1); |
| 265 | } | 266 | } |
| 266 | 267 | ||
| 267 | void init_dent_inode(const struct qstr *name, struct page *ipage) | 268 | static void init_dent_inode(const struct qstr *name, struct page *ipage) |
| 268 | { | 269 | { |
| 269 | struct f2fs_node *rn; | 270 | struct f2fs_node *rn; |
| 270 | 271 | ||
| 271 | if (IS_ERR(ipage)) | ||
| 272 | return; | ||
| 273 | |||
| 274 | wait_on_page_writeback(ipage); | ||
| 275 | |||
| 276 | /* copy name info. to this inode page */ | 272 | /* copy name info. to this inode page */ |
| 277 | rn = (struct f2fs_node *)page_address(ipage); | 273 | rn = (struct f2fs_node *)page_address(ipage); |
| 278 | rn->i.i_namelen = cpu_to_le32(name->len); | 274 | rn->i.i_namelen = cpu_to_le32(name->len); |
| @@ -280,14 +276,15 @@ void init_dent_inode(const struct qstr *name, struct page *ipage) | |||
| 280 | set_page_dirty(ipage); | 276 | set_page_dirty(ipage); |
| 281 | } | 277 | } |
| 282 | 278 | ||
| 283 | static int make_empty_dir(struct inode *inode, struct inode *parent) | 279 | static int make_empty_dir(struct inode *inode, |
| 280 | struct inode *parent, struct page *page) | ||
| 284 | { | 281 | { |
| 285 | struct page *dentry_page; | 282 | struct page *dentry_page; |
| 286 | struct f2fs_dentry_block *dentry_blk; | 283 | struct f2fs_dentry_block *dentry_blk; |
| 287 | struct f2fs_dir_entry *de; | 284 | struct f2fs_dir_entry *de; |
| 288 | void *kaddr; | 285 | void *kaddr; |
| 289 | 286 | ||
| 290 | dentry_page = get_new_data_page(inode, 0, true); | 287 | dentry_page = get_new_data_page(inode, page, 0, true); |
| 291 | if (IS_ERR(dentry_page)) | 288 | if (IS_ERR(dentry_page)) |
| 292 | return PTR_ERR(dentry_page); | 289 | return PTR_ERR(dentry_page); |
| 293 | 290 | ||
| @@ -317,63 +314,76 @@ static int make_empty_dir(struct inode *inode, struct inode *parent) | |||
| 317 | return 0; | 314 | return 0; |
| 318 | } | 315 | } |
| 319 | 316 | ||
| 320 | static int init_inode_metadata(struct inode *inode, | 317 | static struct page *init_inode_metadata(struct inode *inode, |
| 321 | struct inode *dir, const struct qstr *name) | 318 | struct inode *dir, const struct qstr *name) |
| 322 | { | 319 | { |
| 320 | struct page *page; | ||
| 321 | int err; | ||
| 322 | |||
| 323 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | 323 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { |
| 324 | int err; | 324 | page = new_inode_page(inode, name); |
| 325 | err = new_inode_page(inode, name); | 325 | if (IS_ERR(page)) |
| 326 | if (err) | 326 | return page; |
| 327 | return err; | ||
| 328 | 327 | ||
| 329 | if (S_ISDIR(inode->i_mode)) { | 328 | if (S_ISDIR(inode->i_mode)) { |
| 330 | err = make_empty_dir(inode, dir); | 329 | err = make_empty_dir(inode, dir, page); |
| 331 | if (err) { | 330 | if (err) |
| 332 | remove_inode_page(inode); | 331 | goto error; |
| 333 | return err; | ||
| 334 | } | ||
| 335 | } | 332 | } |
| 336 | 333 | ||
| 337 | err = f2fs_init_acl(inode, dir); | 334 | err = f2fs_init_acl(inode, dir); |
| 338 | if (err) { | 335 | if (err) |
| 339 | remove_inode_page(inode); | 336 | goto error; |
| 340 | return err; | 337 | |
| 341 | } | 338 | err = f2fs_init_security(inode, dir, name, page); |
| 339 | if (err) | ||
| 340 | goto error; | ||
| 341 | |||
| 342 | wait_on_page_writeback(page); | ||
| 342 | } else { | 343 | } else { |
| 343 | struct page *ipage; | 344 | page = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); |
| 344 | ipage = get_node_page(F2FS_SB(dir->i_sb), inode->i_ino); | 345 | if (IS_ERR(page)) |
| 345 | if (IS_ERR(ipage)) | 346 | return page; |
| 346 | return PTR_ERR(ipage); | 347 | |
| 347 | set_cold_node(inode, ipage); | 348 | wait_on_page_writeback(page); |
| 348 | init_dent_inode(name, ipage); | 349 | set_cold_node(inode, page); |
| 349 | f2fs_put_page(ipage, 1); | ||
| 350 | } | 350 | } |
| 351 | |||
| 352 | init_dent_inode(name, page); | ||
| 353 | |||
| 354 | /* | ||
| 355 | * This file should be checkpointed during fsync. | ||
| 356 | * We lost i_pino from now on. | ||
| 357 | */ | ||
| 351 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { | 358 | if (is_inode_flag_set(F2FS_I(inode), FI_INC_LINK)) { |
| 359 | file_lost_pino(inode); | ||
| 352 | inc_nlink(inode); | 360 | inc_nlink(inode); |
| 353 | update_inode_page(inode); | ||
| 354 | } | 361 | } |
| 355 | return 0; | 362 | return page; |
| 363 | |||
| 364 | error: | ||
| 365 | f2fs_put_page(page, 1); | ||
| 366 | remove_inode_page(inode); | ||
| 367 | return ERR_PTR(err); | ||
| 356 | } | 368 | } |
| 357 | 369 | ||
| 358 | static void update_parent_metadata(struct inode *dir, struct inode *inode, | 370 | static void update_parent_metadata(struct inode *dir, struct inode *inode, |
| 359 | unsigned int current_depth) | 371 | unsigned int current_depth) |
| 360 | { | 372 | { |
| 361 | bool need_dir_update = false; | ||
| 362 | |||
| 363 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { | 373 | if (is_inode_flag_set(F2FS_I(inode), FI_NEW_INODE)) { |
| 364 | if (S_ISDIR(inode->i_mode)) { | 374 | if (S_ISDIR(inode->i_mode)) { |
| 365 | inc_nlink(dir); | 375 | inc_nlink(dir); |
| 366 | need_dir_update = true; | 376 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
| 367 | } | 377 | } |
| 368 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); | 378 | clear_inode_flag(F2FS_I(inode), FI_NEW_INODE); |
| 369 | } | 379 | } |
| 370 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 380 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
| 371 | if (F2FS_I(dir)->i_current_depth != current_depth) { | 381 | if (F2FS_I(dir)->i_current_depth != current_depth) { |
| 372 | F2FS_I(dir)->i_current_depth = current_depth; | 382 | F2FS_I(dir)->i_current_depth = current_depth; |
| 373 | need_dir_update = true; | 383 | set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); |
| 374 | } | 384 | } |
| 375 | 385 | ||
| 376 | if (need_dir_update) | 386 | if (is_inode_flag_set(F2FS_I(dir), FI_UPDATE_DIR)) |
| 377 | update_inode_page(dir); | 387 | update_inode_page(dir); |
| 378 | else | 388 | else |
| 379 | mark_inode_dirty(dir); | 389 | mark_inode_dirty(dir); |
| @@ -423,6 +433,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name, struct inode *in | |||
| 423 | struct page *dentry_page = NULL; | 433 | struct page *dentry_page = NULL; |
| 424 | struct f2fs_dentry_block *dentry_blk = NULL; | 434 | struct f2fs_dentry_block *dentry_blk = NULL; |
| 425 | int slots = GET_DENTRY_SLOTS(namelen); | 435 | int slots = GET_DENTRY_SLOTS(namelen); |
| 436 | struct page *page; | ||
| 426 | int err = 0; | 437 | int err = 0; |
| 427 | int i; | 438 | int i; |
| 428 | 439 | ||
| @@ -448,7 +459,7 @@ start: | |||
| 448 | bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); | 459 | bidx = dir_block_index(level, (le32_to_cpu(dentry_hash) % nbucket)); |
| 449 | 460 | ||
| 450 | for (block = bidx; block <= (bidx + nblock - 1); block++) { | 461 | for (block = bidx; block <= (bidx + nblock - 1); block++) { |
| 451 | dentry_page = get_new_data_page(dir, block, true); | 462 | dentry_page = get_new_data_page(dir, NULL, block, true); |
| 452 | if (IS_ERR(dentry_page)) | 463 | if (IS_ERR(dentry_page)) |
| 453 | return PTR_ERR(dentry_page); | 464 | return PTR_ERR(dentry_page); |
| 454 | 465 | ||
| @@ -465,12 +476,13 @@ start: | |||
| 465 | ++level; | 476 | ++level; |
| 466 | goto start; | 477 | goto start; |
| 467 | add_dentry: | 478 | add_dentry: |
| 468 | err = init_inode_metadata(inode, dir, name); | ||
| 469 | if (err) | ||
| 470 | goto fail; | ||
| 471 | |||
| 472 | wait_on_page_writeback(dentry_page); | 479 | wait_on_page_writeback(dentry_page); |
| 473 | 480 | ||
| 481 | page = init_inode_metadata(inode, dir, name); | ||
| 482 | if (IS_ERR(page)) { | ||
| 483 | err = PTR_ERR(page); | ||
| 484 | goto fail; | ||
| 485 | } | ||
| 474 | de = &dentry_blk->dentry[bit_pos]; | 486 | de = &dentry_blk->dentry[bit_pos]; |
| 475 | de->hash_code = dentry_hash; | 487 | de->hash_code = dentry_hash; |
| 476 | de->name_len = cpu_to_le16(namelen); | 488 | de->name_len = cpu_to_le16(namelen); |
| @@ -481,11 +493,14 @@ add_dentry: | |||
| 481 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); | 493 | test_and_set_bit_le(bit_pos + i, &dentry_blk->dentry_bitmap); |
| 482 | set_page_dirty(dentry_page); | 494 | set_page_dirty(dentry_page); |
| 483 | 495 | ||
| 484 | update_parent_metadata(dir, inode, current_depth); | 496 | /* we don't need to mark_inode_dirty now */ |
| 485 | |||
| 486 | /* update parent inode number before releasing dentry page */ | ||
| 487 | F2FS_I(inode)->i_pino = dir->i_ino; | 497 | F2FS_I(inode)->i_pino = dir->i_ino; |
| 498 | update_inode(inode, page); | ||
| 499 | f2fs_put_page(page, 1); | ||
| 500 | |||
| 501 | update_parent_metadata(dir, inode, current_depth); | ||
| 488 | fail: | 502 | fail: |
| 503 | clear_inode_flag(F2FS_I(dir), FI_UPDATE_DIR); | ||
| 489 | kunmap(dentry_page); | 504 | kunmap(dentry_page); |
| 490 | f2fs_put_page(dentry_page, 1); | 505 | f2fs_put_page(dentry_page, 1); |
| 491 | return err; | 506 | return err; |
| @@ -591,24 +606,19 @@ bool f2fs_empty_dir(struct inode *dir) | |||
| 591 | return true; | 606 | return true; |
| 592 | } | 607 | } |
| 593 | 608 | ||
| 594 | static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) | 609 | static int f2fs_readdir(struct file *file, struct dir_context *ctx) |
| 595 | { | 610 | { |
| 596 | unsigned long pos = file->f_pos; | ||
| 597 | struct inode *inode = file_inode(file); | 611 | struct inode *inode = file_inode(file); |
| 598 | unsigned long npages = dir_blocks(inode); | 612 | unsigned long npages = dir_blocks(inode); |
| 599 | unsigned char *types = NULL; | ||
| 600 | unsigned int bit_pos = 0, start_bit_pos = 0; | 613 | unsigned int bit_pos = 0, start_bit_pos = 0; |
| 601 | int over = 0; | ||
| 602 | struct f2fs_dentry_block *dentry_blk = NULL; | 614 | struct f2fs_dentry_block *dentry_blk = NULL; |
| 603 | struct f2fs_dir_entry *de = NULL; | 615 | struct f2fs_dir_entry *de = NULL; |
| 604 | struct page *dentry_page = NULL; | 616 | struct page *dentry_page = NULL; |
| 605 | unsigned int n = 0; | 617 | unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK); |
| 606 | unsigned char d_type = DT_UNKNOWN; | 618 | unsigned char d_type = DT_UNKNOWN; |
| 607 | int slots; | 619 | int slots; |
| 608 | 620 | ||
| 609 | types = f2fs_filetype_table; | 621 | bit_pos = ((unsigned long)ctx->pos % NR_DENTRY_IN_BLOCK); |
| 610 | bit_pos = (pos % NR_DENTRY_IN_BLOCK); | ||
| 611 | n = (pos / NR_DENTRY_IN_BLOCK); | ||
| 612 | 622 | ||
| 613 | for ( ; n < npages; n++) { | 623 | for ( ; n < npages; n++) { |
| 614 | dentry_page = get_lock_data_page(inode, n); | 624 | dentry_page = get_lock_data_page(inode, n); |
| @@ -618,31 +628,28 @@ static int f2fs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 618 | start_bit_pos = bit_pos; | 628 | start_bit_pos = bit_pos; |
| 619 | dentry_blk = kmap(dentry_page); | 629 | dentry_blk = kmap(dentry_page); |
| 620 | while (bit_pos < NR_DENTRY_IN_BLOCK) { | 630 | while (bit_pos < NR_DENTRY_IN_BLOCK) { |
| 621 | d_type = DT_UNKNOWN; | ||
| 622 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, | 631 | bit_pos = find_next_bit_le(&dentry_blk->dentry_bitmap, |
| 623 | NR_DENTRY_IN_BLOCK, | 632 | NR_DENTRY_IN_BLOCK, |
| 624 | bit_pos); | 633 | bit_pos); |
| 625 | if (bit_pos >= NR_DENTRY_IN_BLOCK) | 634 | if (bit_pos >= NR_DENTRY_IN_BLOCK) |
| 626 | break; | 635 | break; |
| 627 | 636 | ||
| 637 | ctx->pos += bit_pos - start_bit_pos; | ||
| 628 | de = &dentry_blk->dentry[bit_pos]; | 638 | de = &dentry_blk->dentry[bit_pos]; |
| 629 | if (types && de->file_type < F2FS_FT_MAX) | 639 | if (de->file_type < F2FS_FT_MAX) |
| 630 | d_type = types[de->file_type]; | 640 | d_type = f2fs_filetype_table[de->file_type]; |
| 631 | 641 | else | |
| 632 | over = filldir(dirent, | 642 | d_type = DT_UNKNOWN; |
| 633 | dentry_blk->filename[bit_pos], | 643 | if (!dir_emit(ctx, |
| 634 | le16_to_cpu(de->name_len), | 644 | dentry_blk->filename[bit_pos], |
| 635 | (n * NR_DENTRY_IN_BLOCK) + bit_pos, | 645 | le16_to_cpu(de->name_len), |
| 636 | le32_to_cpu(de->ino), d_type); | 646 | le32_to_cpu(de->ino), d_type)) |
| 637 | if (over) { | ||
| 638 | file->f_pos += bit_pos - start_bit_pos; | ||
| 639 | goto success; | 647 | goto success; |
| 640 | } | ||
| 641 | slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); | 648 | slots = GET_DENTRY_SLOTS(le16_to_cpu(de->name_len)); |
| 642 | bit_pos += slots; | 649 | bit_pos += slots; |
| 643 | } | 650 | } |
| 644 | bit_pos = 0; | 651 | bit_pos = 0; |
| 645 | file->f_pos = (n + 1) * NR_DENTRY_IN_BLOCK; | 652 | ctx->pos = (n + 1) * NR_DENTRY_IN_BLOCK; |
| 646 | kunmap(dentry_page); | 653 | kunmap(dentry_page); |
| 647 | f2fs_put_page(dentry_page, 1); | 654 | f2fs_put_page(dentry_page, 1); |
| 648 | dentry_page = NULL; | 655 | dentry_page = NULL; |
| @@ -659,7 +666,7 @@ success: | |||
| 659 | const struct file_operations f2fs_dir_operations = { | 666 | const struct file_operations f2fs_dir_operations = { |
| 660 | .llseek = generic_file_llseek, | 667 | .llseek = generic_file_llseek, |
| 661 | .read = generic_read_dir, | 668 | .read = generic_read_dir, |
| 662 | .readdir = f2fs_readdir, | 669 | .iterate = f2fs_readdir, |
| 663 | .fsync = f2fs_sync_file, | 670 | .fsync = f2fs_sync_file, |
| 664 | .unlocked_ioctl = f2fs_ioctl, | 671 | .unlocked_ioctl = f2fs_ioctl, |
| 665 | }; | 672 | }; |
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20aab02f2a42..467d42d65c48 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h | |||
| @@ -37,21 +37,35 @@ | |||
| 37 | typecheck(unsigned long long, b) && \ | 37 | typecheck(unsigned long long, b) && \ |
| 38 | ((long long)((a) - (b)) > 0)) | 38 | ((long long)((a) - (b)) > 0)) |
| 39 | 39 | ||
| 40 | typedef u64 block_t; | 40 | typedef u32 block_t; /* |
| 41 | * should not change u32, since it is the on-disk block | ||
| 42 | * address format, __le32. | ||
| 43 | */ | ||
| 41 | typedef u32 nid_t; | 44 | typedef u32 nid_t; |
| 42 | 45 | ||
| 43 | struct f2fs_mount_info { | 46 | struct f2fs_mount_info { |
| 44 | unsigned int opt; | 47 | unsigned int opt; |
| 45 | }; | 48 | }; |
| 46 | 49 | ||
| 47 | static inline __u32 f2fs_crc32(void *buff, size_t len) | 50 | #define CRCPOLY_LE 0xedb88320 |
| 51 | |||
| 52 | static inline __u32 f2fs_crc32(void *buf, size_t len) | ||
| 48 | { | 53 | { |
| 49 | return crc32_le(F2FS_SUPER_MAGIC, buff, len); | 54 | unsigned char *p = (unsigned char *)buf; |
| 55 | __u32 crc = F2FS_SUPER_MAGIC; | ||
| 56 | int i; | ||
| 57 | |||
| 58 | while (len--) { | ||
| 59 | crc ^= *p++; | ||
| 60 | for (i = 0; i < 8; i++) | ||
| 61 | crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0); | ||
| 62 | } | ||
| 63 | return crc; | ||
| 50 | } | 64 | } |
| 51 | 65 | ||
| 52 | static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) | 66 | static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size) |
| 53 | { | 67 | { |
| 54 | return f2fs_crc32(buff, buff_size) == blk_crc; | 68 | return f2fs_crc32(buf, buf_size) == blk_crc; |
| 55 | } | 69 | } |
| 56 | 70 | ||
| 57 | /* | 71 | /* |
| @@ -148,7 +162,7 @@ struct extent_info { | |||
| 148 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. | 162 | * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. |
| 149 | */ | 163 | */ |
| 150 | #define FADVISE_COLD_BIT 0x01 | 164 | #define FADVISE_COLD_BIT 0x01 |
| 151 | #define FADVISE_CP_BIT 0x02 | 165 | #define FADVISE_LOST_PINO_BIT 0x02 |
| 152 | 166 | ||
| 153 | struct f2fs_inode_info { | 167 | struct f2fs_inode_info { |
| 154 | struct inode vfs_inode; /* serve a vfs inode */ | 168 | struct inode vfs_inode; /* serve a vfs inode */ |
| @@ -369,7 +383,6 @@ struct f2fs_sb_info { | |||
| 369 | /* for directory inode management */ | 383 | /* for directory inode management */ |
| 370 | struct list_head dir_inode_list; /* dir inode list */ | 384 | struct list_head dir_inode_list; /* dir inode list */ |
| 371 | spinlock_t dir_inode_lock; /* for dir inode list lock */ | 385 | spinlock_t dir_inode_lock; /* for dir inode list lock */ |
| 372 | unsigned int n_dirty_dirs; /* # of dir inodes */ | ||
| 373 | 386 | ||
| 374 | /* basic file system units */ | 387 | /* basic file system units */ |
| 375 | unsigned int log_sectors_per_block; /* log2 sectors per block */ | 388 | unsigned int log_sectors_per_block; /* log2 sectors per block */ |
| @@ -406,12 +419,15 @@ struct f2fs_sb_info { | |||
| 406 | * for stat information. | 419 | * for stat information. |
| 407 | * one is for the LFS mode, and the other is for the SSR mode. | 420 | * one is for the LFS mode, and the other is for the SSR mode. |
| 408 | */ | 421 | */ |
| 422 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 409 | struct f2fs_stat_info *stat_info; /* FS status information */ | 423 | struct f2fs_stat_info *stat_info; /* FS status information */ |
| 410 | unsigned int segment_count[2]; /* # of allocated segments */ | 424 | unsigned int segment_count[2]; /* # of allocated segments */ |
| 411 | unsigned int block_count[2]; /* # of allocated blocks */ | 425 | unsigned int block_count[2]; /* # of allocated blocks */ |
| 412 | unsigned int last_victim[2]; /* last victim segment # */ | ||
| 413 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ | 426 | int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ |
| 414 | int bg_gc; /* background gc calls */ | 427 | int bg_gc; /* background gc calls */ |
| 428 | unsigned int n_dirty_dirs; /* # of dir inodes */ | ||
| 429 | #endif | ||
| 430 | unsigned int last_victim[2]; /* last victim segment # */ | ||
| 415 | spinlock_t stat_lock; /* lock for stat operations */ | 431 | spinlock_t stat_lock; /* lock for stat operations */ |
| 416 | }; | 432 | }; |
| 417 | 433 | ||
| @@ -495,9 +511,17 @@ static inline void clear_ckpt_flags(struct f2fs_checkpoint *cp, unsigned int f) | |||
| 495 | 511 | ||
| 496 | static inline void mutex_lock_all(struct f2fs_sb_info *sbi) | 512 | static inline void mutex_lock_all(struct f2fs_sb_info *sbi) |
| 497 | { | 513 | { |
| 498 | int i = 0; | 514 | int i; |
| 499 | for (; i < NR_GLOBAL_LOCKS; i++) | 515 | |
| 500 | mutex_lock(&sbi->fs_lock[i]); | 516 | for (i = 0; i < NR_GLOBAL_LOCKS; i++) { |
| 517 | /* | ||
| 518 | * This is the only time we take multiple fs_lock[] | ||
| 519 | * instances; the order is immaterial since we | ||
| 520 | * always hold cp_mutex, which serializes multiple | ||
| 521 | * such operations. | ||
| 522 | */ | ||
| 523 | mutex_lock_nest_lock(&sbi->fs_lock[i], &sbi->cp_mutex); | ||
| 524 | } | ||
| 501 | } | 525 | } |
| 502 | 526 | ||
| 503 | static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) | 527 | static inline void mutex_unlock_all(struct f2fs_sb_info *sbi) |
| @@ -843,9 +867,12 @@ static inline int f2fs_clear_bit(unsigned int nr, char *addr) | |||
| 843 | /* used for f2fs_inode_info->flags */ | 867 | /* used for f2fs_inode_info->flags */ |
| 844 | enum { | 868 | enum { |
| 845 | FI_NEW_INODE, /* indicate newly allocated inode */ | 869 | FI_NEW_INODE, /* indicate newly allocated inode */ |
| 870 | FI_DIRTY_INODE, /* indicate inode is dirty or not */ | ||
| 846 | FI_INC_LINK, /* need to increment i_nlink */ | 871 | FI_INC_LINK, /* need to increment i_nlink */ |
| 847 | FI_ACL_MODE, /* indicate acl mode */ | 872 | FI_ACL_MODE, /* indicate acl mode */ |
| 848 | FI_NO_ALLOC, /* should not allocate any blocks */ | 873 | FI_NO_ALLOC, /* should not allocate any blocks */ |
| 874 | FI_UPDATE_DIR, /* should update inode block for consistency */ | ||
| 875 | FI_DELAY_IPUT, /* used for the recovery */ | ||
| 849 | }; | 876 | }; |
| 850 | 877 | ||
| 851 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) | 878 | static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) |
| @@ -878,14 +905,21 @@ static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) | |||
| 878 | return 0; | 905 | return 0; |
| 879 | } | 906 | } |
| 880 | 907 | ||
| 908 | static inline int f2fs_readonly(struct super_block *sb) | ||
| 909 | { | ||
| 910 | return sb->s_flags & MS_RDONLY; | ||
| 911 | } | ||
| 912 | |||
| 881 | /* | 913 | /* |
| 882 | * file.c | 914 | * file.c |
| 883 | */ | 915 | */ |
| 884 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); | 916 | int f2fs_sync_file(struct file *, loff_t, loff_t, int); |
| 885 | void truncate_data_blocks(struct dnode_of_data *); | 917 | void truncate_data_blocks(struct dnode_of_data *); |
| 886 | void f2fs_truncate(struct inode *); | 918 | void f2fs_truncate(struct inode *); |
| 919 | int f2fs_getattr(struct vfsmount *, struct dentry *, struct kstat *); | ||
| 887 | int f2fs_setattr(struct dentry *, struct iattr *); | 920 | int f2fs_setattr(struct dentry *, struct iattr *); |
| 888 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); | 921 | int truncate_hole(struct inode *, pgoff_t, pgoff_t); |
| 922 | int truncate_data_blocks_range(struct dnode_of_data *, int); | ||
| 889 | long f2fs_ioctl(struct file *, unsigned int, unsigned long); | 923 | long f2fs_ioctl(struct file *, unsigned int, unsigned long); |
| 890 | long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); | 924 | long f2fs_compat_ioctl(struct file *, unsigned int, unsigned long); |
| 891 | 925 | ||
| @@ -913,7 +947,6 @@ struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); | |||
| 913 | ino_t f2fs_inode_by_name(struct inode *, struct qstr *); | 947 | ino_t f2fs_inode_by_name(struct inode *, struct qstr *); |
| 914 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, | 948 | void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, |
| 915 | struct page *, struct inode *); | 949 | struct page *, struct inode *); |
| 916 | void init_dent_inode(const struct qstr *, struct page *); | ||
| 917 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); | 950 | int __f2fs_add_link(struct inode *, const struct qstr *, struct inode *); |
| 918 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); | 951 | void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); |
| 919 | int f2fs_make_empty(struct inode *, struct inode *); | 952 | int f2fs_make_empty(struct inode *, struct inode *); |
| @@ -948,8 +981,8 @@ void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); | |||
| 948 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); | 981 | int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); |
| 949 | int truncate_inode_blocks(struct inode *, pgoff_t); | 982 | int truncate_inode_blocks(struct inode *, pgoff_t); |
| 950 | int remove_inode_page(struct inode *); | 983 | int remove_inode_page(struct inode *); |
| 951 | int new_inode_page(struct inode *, const struct qstr *); | 984 | struct page *new_inode_page(struct inode *, const struct qstr *); |
| 952 | struct page *new_node_page(struct dnode_of_data *, unsigned int); | 985 | struct page *new_node_page(struct dnode_of_data *, unsigned int, struct page *); |
| 953 | void ra_node_page(struct f2fs_sb_info *, nid_t); | 986 | void ra_node_page(struct f2fs_sb_info *, nid_t); |
| 954 | struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); | 987 | struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); |
| 955 | struct page *get_node_page_ra(struct page *, int); | 988 | struct page *get_node_page_ra(struct page *, int); |
| @@ -974,7 +1007,6 @@ void destroy_node_manager_caches(void); | |||
| 974 | */ | 1007 | */ |
| 975 | void f2fs_balance_fs(struct f2fs_sb_info *); | 1008 | void f2fs_balance_fs(struct f2fs_sb_info *); |
| 976 | void invalidate_blocks(struct f2fs_sb_info *, block_t); | 1009 | void invalidate_blocks(struct f2fs_sb_info *, block_t); |
| 977 | void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); | ||
| 978 | void clear_prefree_segments(struct f2fs_sb_info *); | 1010 | void clear_prefree_segments(struct f2fs_sb_info *); |
| 979 | int npages_for_summary_flush(struct f2fs_sb_info *); | 1011 | int npages_for_summary_flush(struct f2fs_sb_info *); |
| 980 | void allocate_new_segments(struct f2fs_sb_info *); | 1012 | void allocate_new_segments(struct f2fs_sb_info *); |
| @@ -1011,7 +1043,9 @@ void remove_orphan_inode(struct f2fs_sb_info *, nid_t); | |||
| 1011 | int recover_orphan_inodes(struct f2fs_sb_info *); | 1043 | int recover_orphan_inodes(struct f2fs_sb_info *); |
| 1012 | int get_valid_checkpoint(struct f2fs_sb_info *); | 1044 | int get_valid_checkpoint(struct f2fs_sb_info *); |
| 1013 | void set_dirty_dir_page(struct inode *, struct page *); | 1045 | void set_dirty_dir_page(struct inode *, struct page *); |
| 1046 | void add_dirty_dir_inode(struct inode *); | ||
| 1014 | void remove_dirty_dir_inode(struct inode *); | 1047 | void remove_dirty_dir_inode(struct inode *); |
| 1048 | struct inode *check_dirty_dir_inode(struct f2fs_sb_info *, nid_t); | ||
| 1015 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); | 1049 | void sync_dirty_dir_inodes(struct f2fs_sb_info *); |
| 1016 | void write_checkpoint(struct f2fs_sb_info *, bool); | 1050 | void write_checkpoint(struct f2fs_sb_info *, bool); |
| 1017 | void init_orphan_info(struct f2fs_sb_info *); | 1051 | void init_orphan_info(struct f2fs_sb_info *); |
| @@ -1025,7 +1059,7 @@ int reserve_new_block(struct dnode_of_data *); | |||
| 1025 | void update_extent_cache(block_t, struct dnode_of_data *); | 1059 | void update_extent_cache(block_t, struct dnode_of_data *); |
| 1026 | struct page *find_data_page(struct inode *, pgoff_t, bool); | 1060 | struct page *find_data_page(struct inode *, pgoff_t, bool); |
| 1027 | struct page *get_lock_data_page(struct inode *, pgoff_t); | 1061 | struct page *get_lock_data_page(struct inode *, pgoff_t); |
| 1028 | struct page *get_new_data_page(struct inode *, pgoff_t, bool); | 1062 | struct page *get_new_data_page(struct inode *, struct page *, pgoff_t, bool); |
| 1029 | int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); | 1063 | int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); |
| 1030 | int do_write_data_page(struct page *); | 1064 | int do_write_data_page(struct page *); |
| 1031 | 1065 | ||
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1cae864f8dfc..d2d2b7dbdcc1 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c | |||
| @@ -63,9 +63,10 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
| 63 | f2fs_put_dnode(&dn); | 63 | f2fs_put_dnode(&dn); |
| 64 | mutex_unlock_op(sbi, ilock); | 64 | mutex_unlock_op(sbi, ilock); |
| 65 | 65 | ||
| 66 | file_update_time(vma->vm_file); | ||
| 66 | lock_page(page); | 67 | lock_page(page); |
| 67 | if (page->mapping != inode->i_mapping || | 68 | if (page->mapping != inode->i_mapping || |
| 68 | page_offset(page) >= i_size_read(inode) || | 69 | page_offset(page) > i_size_read(inode) || |
| 69 | !PageUptodate(page)) { | 70 | !PageUptodate(page)) { |
| 70 | unlock_page(page); | 71 | unlock_page(page); |
| 71 | err = -EFAULT; | 72 | err = -EFAULT; |
| @@ -76,10 +77,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
| 76 | * check to see if the page is mapped already (no holes) | 77 | * check to see if the page is mapped already (no holes) |
| 77 | */ | 78 | */ |
| 78 | if (PageMappedToDisk(page)) | 79 | if (PageMappedToDisk(page)) |
| 79 | goto out; | 80 | goto mapped; |
| 80 | |||
| 81 | /* fill the page */ | ||
| 82 | wait_on_page_writeback(page); | ||
| 83 | 81 | ||
| 84 | /* page is wholly or partially inside EOF */ | 82 | /* page is wholly or partially inside EOF */ |
| 85 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { | 83 | if (((page->index + 1) << PAGE_CACHE_SHIFT) > i_size_read(inode)) { |
| @@ -90,7 +88,9 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma, | |||
| 90 | set_page_dirty(page); | 88 | set_page_dirty(page); |
| 91 | SetPageUptodate(page); | 89 | SetPageUptodate(page); |
| 92 | 90 | ||
| 93 | file_update_time(vma->vm_file); | 91 | mapped: |
| 92 | /* fill the page */ | ||
| 93 | wait_on_page_writeback(page); | ||
| 94 | out: | 94 | out: |
| 95 | sb_end_pagefault(inode->i_sb); | 95 | sb_end_pagefault(inode->i_sb); |
| 96 | return block_page_mkwrite_return(err); | 96 | return block_page_mkwrite_return(err); |
| @@ -102,6 +102,24 @@ static const struct vm_operations_struct f2fs_file_vm_ops = { | |||
| 102 | .remap_pages = generic_file_remap_pages, | 102 | .remap_pages = generic_file_remap_pages, |
| 103 | }; | 103 | }; |
| 104 | 104 | ||
| 105 | static int get_parent_ino(struct inode *inode, nid_t *pino) | ||
| 106 | { | ||
| 107 | struct dentry *dentry; | ||
| 108 | |||
| 109 | inode = igrab(inode); | ||
| 110 | dentry = d_find_any_alias(inode); | ||
| 111 | iput(inode); | ||
| 112 | if (!dentry) | ||
| 113 | return 0; | ||
| 114 | |||
| 115 | inode = igrab(dentry->d_parent->d_inode); | ||
| 116 | dput(dentry); | ||
| 117 | |||
| 118 | *pino = inode->i_ino; | ||
| 119 | iput(inode); | ||
| 120 | return 1; | ||
| 121 | } | ||
| 122 | |||
| 105 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | 123 | int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) |
| 106 | { | 124 | { |
| 107 | struct inode *inode = file->f_mapping->host; | 125 | struct inode *inode = file->f_mapping->host; |
| @@ -114,7 +132,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 114 | .for_reclaim = 0, | 132 | .for_reclaim = 0, |
| 115 | }; | 133 | }; |
| 116 | 134 | ||
| 117 | if (inode->i_sb->s_flags & MS_RDONLY) | 135 | if (f2fs_readonly(inode->i_sb)) |
| 118 | return 0; | 136 | return 0; |
| 119 | 137 | ||
| 120 | trace_f2fs_sync_file_enter(inode); | 138 | trace_f2fs_sync_file_enter(inode); |
| @@ -134,7 +152,7 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 134 | 152 | ||
| 135 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) | 153 | if (!S_ISREG(inode->i_mode) || inode->i_nlink != 1) |
| 136 | need_cp = true; | 154 | need_cp = true; |
| 137 | else if (is_cp_file(inode)) | 155 | else if (file_wrong_pino(inode)) |
| 138 | need_cp = true; | 156 | need_cp = true; |
| 139 | else if (!space_for_roll_forward(sbi)) | 157 | else if (!space_for_roll_forward(sbi)) |
| 140 | need_cp = true; | 158 | need_cp = true; |
| @@ -142,11 +160,23 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) | |||
| 142 | need_cp = true; | 160 | need_cp = true; |
| 143 | 161 | ||
| 144 | if (need_cp) { | 162 | if (need_cp) { |
| 163 | nid_t pino; | ||
| 164 | |||
| 145 | /* all the dirty node pages should be flushed for POR */ | 165 | /* all the dirty node pages should be flushed for POR */ |
| 146 | ret = f2fs_sync_fs(inode->i_sb, 1); | 166 | ret = f2fs_sync_fs(inode->i_sb, 1); |
| 167 | if (file_wrong_pino(inode) && inode->i_nlink == 1 && | ||
| 168 | get_parent_ino(inode, &pino)) { | ||
| 169 | F2FS_I(inode)->i_pino = pino; | ||
| 170 | file_got_pino(inode); | ||
| 171 | mark_inode_dirty_sync(inode); | ||
| 172 | ret = f2fs_write_inode(inode, NULL); | ||
| 173 | if (ret) | ||
| 174 | goto out; | ||
| 175 | } | ||
| 147 | } else { | 176 | } else { |
| 148 | /* if there is no written node page, write its inode page */ | 177 | /* if there is no written node page, write its inode page */ |
| 149 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { | 178 | while (!sync_node_pages(sbi, inode->i_ino, &wbc)) { |
| 179 | mark_inode_dirty_sync(inode); | ||
| 150 | ret = f2fs_write_inode(inode, NULL); | 180 | ret = f2fs_write_inode(inode, NULL); |
| 151 | if (ret) | 181 | if (ret) |
| 152 | goto out; | 182 | goto out; |
| @@ -168,7 +198,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 168 | return 0; | 198 | return 0; |
| 169 | } | 199 | } |
| 170 | 200 | ||
| 171 | static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | 201 | int truncate_data_blocks_range(struct dnode_of_data *dn, int count) |
| 172 | { | 202 | { |
| 173 | int nr_free = 0, ofs = dn->ofs_in_node; | 203 | int nr_free = 0, ofs = dn->ofs_in_node; |
| 174 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 204 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
| @@ -185,10 +215,10 @@ static int truncate_data_blocks_range(struct dnode_of_data *dn, int count) | |||
| 185 | 215 | ||
| 186 | update_extent_cache(NULL_ADDR, dn); | 216 | update_extent_cache(NULL_ADDR, dn); |
| 187 | invalidate_blocks(sbi, blkaddr); | 217 | invalidate_blocks(sbi, blkaddr); |
| 188 | dec_valid_block_count(sbi, dn->inode, 1); | ||
| 189 | nr_free++; | 218 | nr_free++; |
| 190 | } | 219 | } |
| 191 | if (nr_free) { | 220 | if (nr_free) { |
| 221 | dec_valid_block_count(sbi, dn->inode, nr_free); | ||
| 192 | set_page_dirty(dn->node_page); | 222 | set_page_dirty(dn->node_page); |
| 193 | sync_inode_page(dn); | 223 | sync_inode_page(dn); |
| 194 | } | 224 | } |
| @@ -291,7 +321,7 @@ void f2fs_truncate(struct inode *inode) | |||
| 291 | } | 321 | } |
| 292 | } | 322 | } |
| 293 | 323 | ||
| 294 | static int f2fs_getattr(struct vfsmount *mnt, | 324 | int f2fs_getattr(struct vfsmount *mnt, |
| 295 | struct dentry *dentry, struct kstat *stat) | 325 | struct dentry *dentry, struct kstat *stat) |
| 296 | { | 326 | { |
| 297 | struct inode *inode = dentry->d_inode; | 327 | struct inode *inode = dentry->d_inode; |
| @@ -387,7 +417,7 @@ static void fill_zero(struct inode *inode, pgoff_t index, | |||
| 387 | f2fs_balance_fs(sbi); | 417 | f2fs_balance_fs(sbi); |
| 388 | 418 | ||
| 389 | ilock = mutex_lock_op(sbi); | 419 | ilock = mutex_lock_op(sbi); |
| 390 | page = get_new_data_page(inode, index, false); | 420 | page = get_new_data_page(inode, NULL, index, false); |
| 391 | mutex_unlock_op(sbi, ilock); | 421 | mutex_unlock_op(sbi, ilock); |
| 392 | 422 | ||
| 393 | if (!IS_ERR(page)) { | 423 | if (!IS_ERR(page)) { |
| @@ -575,10 +605,10 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 575 | int ret; | 605 | int ret; |
| 576 | 606 | ||
| 577 | switch (cmd) { | 607 | switch (cmd) { |
| 578 | case FS_IOC_GETFLAGS: | 608 | case F2FS_IOC_GETFLAGS: |
| 579 | flags = fi->i_flags & FS_FL_USER_VISIBLE; | 609 | flags = fi->i_flags & FS_FL_USER_VISIBLE; |
| 580 | return put_user(flags, (int __user *) arg); | 610 | return put_user(flags, (int __user *) arg); |
| 581 | case FS_IOC_SETFLAGS: | 611 | case F2FS_IOC_SETFLAGS: |
| 582 | { | 612 | { |
| 583 | unsigned int oldflags; | 613 | unsigned int oldflags; |
| 584 | 614 | ||
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 14961593e93c..35f9b1a196aa 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c | |||
| @@ -76,7 +76,9 @@ static int gc_thread_func(void *data) | |||
| 76 | else | 76 | else |
| 77 | wait_ms = increase_sleep_time(wait_ms); | 77 | wait_ms = increase_sleep_time(wait_ms); |
| 78 | 78 | ||
| 79 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 79 | sbi->bg_gc++; | 80 | sbi->bg_gc++; |
| 81 | #endif | ||
| 80 | 82 | ||
| 81 | /* if return value is not zero, no victim was selected */ | 83 | /* if return value is not zero, no victim was selected */ |
| 82 | if (f2fs_gc(sbi)) | 84 | if (f2fs_gc(sbi)) |
| @@ -89,23 +91,28 @@ int start_gc_thread(struct f2fs_sb_info *sbi) | |||
| 89 | { | 91 | { |
| 90 | struct f2fs_gc_kthread *gc_th; | 92 | struct f2fs_gc_kthread *gc_th; |
| 91 | dev_t dev = sbi->sb->s_bdev->bd_dev; | 93 | dev_t dev = sbi->sb->s_bdev->bd_dev; |
| 94 | int err = 0; | ||
| 92 | 95 | ||
| 93 | if (!test_opt(sbi, BG_GC)) | 96 | if (!test_opt(sbi, BG_GC)) |
| 94 | return 0; | 97 | goto out; |
| 95 | gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); | 98 | gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL); |
| 96 | if (!gc_th) | 99 | if (!gc_th) { |
| 97 | return -ENOMEM; | 100 | err = -ENOMEM; |
| 101 | goto out; | ||
| 102 | } | ||
| 98 | 103 | ||
| 99 | sbi->gc_thread = gc_th; | 104 | sbi->gc_thread = gc_th; |
| 100 | init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); | 105 | init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head); |
| 101 | sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, | 106 | sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi, |
| 102 | "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); | 107 | "f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev)); |
| 103 | if (IS_ERR(gc_th->f2fs_gc_task)) { | 108 | if (IS_ERR(gc_th->f2fs_gc_task)) { |
| 109 | err = PTR_ERR(gc_th->f2fs_gc_task); | ||
| 104 | kfree(gc_th); | 110 | kfree(gc_th); |
| 105 | sbi->gc_thread = NULL; | 111 | sbi->gc_thread = NULL; |
| 106 | return -ENOMEM; | ||
| 107 | } | 112 | } |
| 108 | return 0; | 113 | |
| 114 | out: | ||
| 115 | return err; | ||
| 109 | } | 116 | } |
| 110 | 117 | ||
| 111 | void stop_gc_thread(struct f2fs_sb_info *sbi) | 118 | void stop_gc_thread(struct f2fs_sb_info *sbi) |
| @@ -234,14 +241,14 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
| 234 | { | 241 | { |
| 235 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 242 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
| 236 | struct victim_sel_policy p; | 243 | struct victim_sel_policy p; |
| 237 | unsigned int secno; | 244 | unsigned int secno, max_cost; |
| 238 | int nsearched = 0; | 245 | int nsearched = 0; |
| 239 | 246 | ||
| 240 | p.alloc_mode = alloc_mode; | 247 | p.alloc_mode = alloc_mode; |
| 241 | select_policy(sbi, gc_type, type, &p); | 248 | select_policy(sbi, gc_type, type, &p); |
| 242 | 249 | ||
| 243 | p.min_segno = NULL_SEGNO; | 250 | p.min_segno = NULL_SEGNO; |
| 244 | p.min_cost = get_max_cost(sbi, &p); | 251 | p.min_cost = max_cost = get_max_cost(sbi, &p); |
| 245 | 252 | ||
| 246 | mutex_lock(&dirty_i->seglist_lock); | 253 | mutex_lock(&dirty_i->seglist_lock); |
| 247 | 254 | ||
| @@ -280,7 +287,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
| 280 | p.min_cost = cost; | 287 | p.min_cost = cost; |
| 281 | } | 288 | } |
| 282 | 289 | ||
| 283 | if (cost == get_max_cost(sbi, &p)) | 290 | if (cost == max_cost) |
| 284 | continue; | 291 | continue; |
| 285 | 292 | ||
| 286 | if (nsearched++ >= MAX_VICTIM_SEARCH) { | 293 | if (nsearched++ >= MAX_VICTIM_SEARCH) { |
| @@ -288,8 +295,8 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, | |||
| 288 | break; | 295 | break; |
| 289 | } | 296 | } |
| 290 | } | 297 | } |
| 291 | got_it: | ||
| 292 | if (p.min_segno != NULL_SEGNO) { | 298 | if (p.min_segno != NULL_SEGNO) { |
| 299 | got_it: | ||
| 293 | if (p.alloc_mode == LFS) { | 300 | if (p.alloc_mode == LFS) { |
| 294 | secno = GET_SECNO(sbi, p.min_segno); | 301 | secno = GET_SECNO(sbi, p.min_segno); |
| 295 | if (gc_type == FG_GC) | 302 | if (gc_type == FG_GC) |
| @@ -314,28 +321,21 @@ static const struct victim_selection default_v_ops = { | |||
| 314 | 321 | ||
| 315 | static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) | 322 | static struct inode *find_gc_inode(nid_t ino, struct list_head *ilist) |
| 316 | { | 323 | { |
| 317 | struct list_head *this; | ||
| 318 | struct inode_entry *ie; | 324 | struct inode_entry *ie; |
| 319 | 325 | ||
| 320 | list_for_each(this, ilist) { | 326 | list_for_each_entry(ie, ilist, list) |
| 321 | ie = list_entry(this, struct inode_entry, list); | ||
| 322 | if (ie->inode->i_ino == ino) | 327 | if (ie->inode->i_ino == ino) |
| 323 | return ie->inode; | 328 | return ie->inode; |
| 324 | } | ||
| 325 | return NULL; | 329 | return NULL; |
| 326 | } | 330 | } |
| 327 | 331 | ||
| 328 | static void add_gc_inode(struct inode *inode, struct list_head *ilist) | 332 | static void add_gc_inode(struct inode *inode, struct list_head *ilist) |
| 329 | { | 333 | { |
| 330 | struct list_head *this; | 334 | struct inode_entry *new_ie; |
| 331 | struct inode_entry *new_ie, *ie; | ||
| 332 | 335 | ||
| 333 | list_for_each(this, ilist) { | 336 | if (inode == find_gc_inode(inode->i_ino, ilist)) { |
| 334 | ie = list_entry(this, struct inode_entry, list); | 337 | iput(inode); |
| 335 | if (ie->inode == inode) { | 338 | return; |
| 336 | iput(inode); | ||
| 337 | return; | ||
| 338 | } | ||
| 339 | } | 339 | } |
| 340 | repeat: | 340 | repeat: |
| 341 | new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); | 341 | new_ie = kmem_cache_alloc(winode_slab, GFP_NOFS); |
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 91ac7f9d88ee..2b2d45d19e3e 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c | |||
| @@ -109,12 +109,6 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned long ino) | |||
| 109 | ret = do_read_inode(inode); | 109 | ret = do_read_inode(inode); |
| 110 | if (ret) | 110 | if (ret) |
| 111 | goto bad_inode; | 111 | goto bad_inode; |
| 112 | |||
| 113 | if (!sbi->por_doing && inode->i_nlink == 0) { | ||
| 114 | ret = -ENOENT; | ||
| 115 | goto bad_inode; | ||
| 116 | } | ||
| 117 | |||
| 118 | make_now: | 112 | make_now: |
| 119 | if (ino == F2FS_NODE_INO(sbi)) { | 113 | if (ino == F2FS_NODE_INO(sbi)) { |
| 120 | inode->i_mapping->a_ops = &f2fs_node_aops; | 114 | inode->i_mapping->a_ops = &f2fs_node_aops; |
| @@ -130,8 +124,7 @@ make_now: | |||
| 130 | inode->i_op = &f2fs_dir_inode_operations; | 124 | inode->i_op = &f2fs_dir_inode_operations; |
| 131 | inode->i_fop = &f2fs_dir_operations; | 125 | inode->i_fop = &f2fs_dir_operations; |
| 132 | inode->i_mapping->a_ops = &f2fs_dblock_aops; | 126 | inode->i_mapping->a_ops = &f2fs_dblock_aops; |
| 133 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER_MOVABLE | | 127 | mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO); |
| 134 | __GFP_ZERO); | ||
| 135 | } else if (S_ISLNK(inode->i_mode)) { | 128 | } else if (S_ISLNK(inode->i_mode)) { |
| 136 | inode->i_op = &f2fs_symlink_inode_operations; | 129 | inode->i_op = &f2fs_symlink_inode_operations; |
| 137 | inode->i_mapping->a_ops = &f2fs_dblock_aops; | 130 | inode->i_mapping->a_ops = &f2fs_dblock_aops; |
| @@ -199,6 +192,7 @@ void update_inode(struct inode *inode, struct page *node_page) | |||
| 199 | 192 | ||
| 200 | set_cold_node(inode, node_page); | 193 | set_cold_node(inode, node_page); |
| 201 | set_page_dirty(node_page); | 194 | set_page_dirty(node_page); |
| 195 | clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | ||
| 202 | } | 196 | } |
| 203 | 197 | ||
| 204 | int update_inode_page(struct inode *inode) | 198 | int update_inode_page(struct inode *inode) |
| @@ -224,6 +218,9 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 224 | inode->i_ino == F2FS_META_INO(sbi)) | 218 | inode->i_ino == F2FS_META_INO(sbi)) |
| 225 | return 0; | 219 | return 0; |
| 226 | 220 | ||
| 221 | if (!is_inode_flag_set(F2FS_I(inode), FI_DIRTY_INODE)) | ||
| 222 | return 0; | ||
| 223 | |||
| 227 | if (wbc) | 224 | if (wbc) |
| 228 | f2fs_balance_fs(sbi); | 225 | f2fs_balance_fs(sbi); |
| 229 | 226 | ||
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 47abc9722b17..64c07169df05 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c | |||
| @@ -112,7 +112,7 @@ static inline void set_cold_files(struct f2fs_sb_info *sbi, struct inode *inode, | |||
| 112 | int count = le32_to_cpu(sbi->raw_super->extension_count); | 112 | int count = le32_to_cpu(sbi->raw_super->extension_count); |
| 113 | for (i = 0; i < count; i++) { | 113 | for (i = 0; i < count; i++) { |
| 114 | if (is_multimedia_file(name, extlist[i])) { | 114 | if (is_multimedia_file(name, extlist[i])) { |
| 115 | set_cold_file(inode); | 115 | file_set_cold(inode); |
| 116 | break; | 116 | break; |
| 117 | } | 117 | } |
| 118 | } | 118 | } |
| @@ -149,8 +149,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, | |||
| 149 | 149 | ||
| 150 | alloc_nid_done(sbi, ino); | 150 | alloc_nid_done(sbi, ino); |
| 151 | 151 | ||
| 152 | if (!sbi->por_doing) | 152 | d_instantiate(dentry, inode); |
| 153 | d_instantiate(dentry, inode); | ||
| 154 | unlock_new_inode(inode); | 153 | unlock_new_inode(inode); |
| 155 | return 0; | 154 | return 0; |
| 156 | out: | 155 | out: |
| @@ -173,7 +172,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 173 | f2fs_balance_fs(sbi); | 172 | f2fs_balance_fs(sbi); |
| 174 | 173 | ||
| 175 | inode->i_ctime = CURRENT_TIME; | 174 | inode->i_ctime = CURRENT_TIME; |
| 176 | atomic_inc(&inode->i_count); | 175 | ihold(inode); |
| 177 | 176 | ||
| 178 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); | 177 | set_inode_flag(F2FS_I(inode), FI_INC_LINK); |
| 179 | ilock = mutex_lock_op(sbi); | 178 | ilock = mutex_lock_op(sbi); |
| @@ -182,17 +181,10 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 182 | if (err) | 181 | if (err) |
| 183 | goto out; | 182 | goto out; |
| 184 | 183 | ||
| 185 | /* | ||
| 186 | * This file should be checkpointed during fsync. | ||
| 187 | * We lost i_pino from now on. | ||
| 188 | */ | ||
| 189 | set_cp_file(inode); | ||
| 190 | |||
| 191 | d_instantiate(dentry, inode); | 184 | d_instantiate(dentry, inode); |
| 192 | return 0; | 185 | return 0; |
| 193 | out: | 186 | out: |
| 194 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); | 187 | clear_inode_flag(F2FS_I(inode), FI_INC_LINK); |
| 195 | make_bad_inode(inode); | ||
| 196 | iput(inode); | 188 | iput(inode); |
| 197 | return err; | 189 | return err; |
| 198 | } | 190 | } |
| @@ -498,6 +490,7 @@ const struct inode_operations f2fs_dir_inode_operations = { | |||
| 498 | .rmdir = f2fs_rmdir, | 490 | .rmdir = f2fs_rmdir, |
| 499 | .mknod = f2fs_mknod, | 491 | .mknod = f2fs_mknod, |
| 500 | .rename = f2fs_rename, | 492 | .rename = f2fs_rename, |
| 493 | .getattr = f2fs_getattr, | ||
| 501 | .setattr = f2fs_setattr, | 494 | .setattr = f2fs_setattr, |
| 502 | .get_acl = f2fs_get_acl, | 495 | .get_acl = f2fs_get_acl, |
| 503 | #ifdef CONFIG_F2FS_FS_XATTR | 496 | #ifdef CONFIG_F2FS_FS_XATTR |
| @@ -512,6 +505,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { | |||
| 512 | .readlink = generic_readlink, | 505 | .readlink = generic_readlink, |
| 513 | .follow_link = page_follow_link_light, | 506 | .follow_link = page_follow_link_light, |
| 514 | .put_link = page_put_link, | 507 | .put_link = page_put_link, |
| 508 | .getattr = f2fs_getattr, | ||
| 515 | .setattr = f2fs_setattr, | 509 | .setattr = f2fs_setattr, |
| 516 | #ifdef CONFIG_F2FS_FS_XATTR | 510 | #ifdef CONFIG_F2FS_FS_XATTR |
| 517 | .setxattr = generic_setxattr, | 511 | .setxattr = generic_setxattr, |
| @@ -522,6 +516,7 @@ const struct inode_operations f2fs_symlink_inode_operations = { | |||
| 522 | }; | 516 | }; |
| 523 | 517 | ||
| 524 | const struct inode_operations f2fs_special_inode_operations = { | 518 | const struct inode_operations f2fs_special_inode_operations = { |
| 519 | .getattr = f2fs_getattr, | ||
| 525 | .setattr = f2fs_setattr, | 520 | .setattr = f2fs_setattr, |
| 526 | .get_acl = f2fs_get_acl, | 521 | .get_acl = f2fs_get_acl, |
| 527 | #ifdef CONFIG_F2FS_FS_XATTR | 522 | #ifdef CONFIG_F2FS_FS_XATTR |
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3df43b4efd89..b418aee09573 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c | |||
| @@ -408,10 +408,13 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
| 408 | level = get_node_path(index, offset, noffset); | 408 | level = get_node_path(index, offset, noffset); |
| 409 | 409 | ||
| 410 | nids[0] = dn->inode->i_ino; | 410 | nids[0] = dn->inode->i_ino; |
| 411 | npage[0] = get_node_page(sbi, nids[0]); | 411 | npage[0] = dn->inode_page; |
| 412 | if (IS_ERR(npage[0])) | ||
| 413 | return PTR_ERR(npage[0]); | ||
| 414 | 412 | ||
| 413 | if (!npage[0]) { | ||
| 414 | npage[0] = get_node_page(sbi, nids[0]); | ||
| 415 | if (IS_ERR(npage[0])) | ||
| 416 | return PTR_ERR(npage[0]); | ||
| 417 | } | ||
| 415 | parent = npage[0]; | 418 | parent = npage[0]; |
| 416 | if (level != 0) | 419 | if (level != 0) |
| 417 | nids[1] = get_nid(parent, offset[0], true); | 420 | nids[1] = get_nid(parent, offset[0], true); |
| @@ -430,7 +433,7 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) | |||
| 430 | } | 433 | } |
| 431 | 434 | ||
| 432 | dn->nid = nids[i]; | 435 | dn->nid = nids[i]; |
| 433 | npage[i] = new_node_page(dn, noffset[i]); | 436 | npage[i] = new_node_page(dn, noffset[i], NULL); |
| 434 | if (IS_ERR(npage[i])) { | 437 | if (IS_ERR(npage[i])) { |
| 435 | alloc_nid_failed(sbi, nids[i]); | 438 | alloc_nid_failed(sbi, nids[i]); |
| 436 | err = PTR_ERR(npage[i]); | 439 | err = PTR_ERR(npage[i]); |
| @@ -803,22 +806,19 @@ int remove_inode_page(struct inode *inode) | |||
| 803 | return 0; | 806 | return 0; |
| 804 | } | 807 | } |
| 805 | 808 | ||
| 806 | int new_inode_page(struct inode *inode, const struct qstr *name) | 809 | struct page *new_inode_page(struct inode *inode, const struct qstr *name) |
| 807 | { | 810 | { |
| 808 | struct page *page; | ||
| 809 | struct dnode_of_data dn; | 811 | struct dnode_of_data dn; |
| 810 | 812 | ||
| 811 | /* allocate inode page for new inode */ | 813 | /* allocate inode page for new inode */ |
| 812 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); | 814 | set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); |
| 813 | page = new_node_page(&dn, 0); | 815 | |
| 814 | init_dent_inode(name, page); | 816 | /* caller should f2fs_put_page(page, 1); */ |
| 815 | if (IS_ERR(page)) | 817 | return new_node_page(&dn, 0, NULL); |
| 816 | return PTR_ERR(page); | ||
| 817 | f2fs_put_page(page, 1); | ||
| 818 | return 0; | ||
| 819 | } | 818 | } |
| 820 | 819 | ||
| 821 | struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) | 820 | struct page *new_node_page(struct dnode_of_data *dn, |
| 821 | unsigned int ofs, struct page *ipage) | ||
| 822 | { | 822 | { |
| 823 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); | 823 | struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); |
| 824 | struct address_space *mapping = sbi->node_inode->i_mapping; | 824 | struct address_space *mapping = sbi->node_inode->i_mapping; |
| @@ -851,7 +851,10 @@ struct page *new_node_page(struct dnode_of_data *dn, unsigned int ofs) | |||
| 851 | set_cold_node(dn->inode, page); | 851 | set_cold_node(dn->inode, page); |
| 852 | 852 | ||
| 853 | dn->node_page = page; | 853 | dn->node_page = page; |
| 854 | sync_inode_page(dn); | 854 | if (ipage) |
| 855 | update_inode(dn->inode, ipage); | ||
| 856 | else | ||
| 857 | sync_inode_page(dn); | ||
| 855 | set_page_dirty(page); | 858 | set_page_dirty(page); |
| 856 | if (ofs == 0) | 859 | if (ofs == 0) |
| 857 | inc_valid_inode_count(sbi); | 860 | inc_valid_inode_count(sbi); |
| @@ -1205,7 +1208,8 @@ static int f2fs_set_node_page_dirty(struct page *page) | |||
| 1205 | return 0; | 1208 | return 0; |
| 1206 | } | 1209 | } |
| 1207 | 1210 | ||
| 1208 | static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) | 1211 | static void f2fs_invalidate_node_page(struct page *page, unsigned int offset, |
| 1212 | unsigned int length) | ||
| 1209 | { | 1213 | { |
| 1210 | struct inode *inode = page->mapping->host; | 1214 | struct inode *inode = page->mapping->host; |
| 1211 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 1215 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| @@ -1492,9 +1496,10 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct page *page) | |||
| 1492 | new_ni = old_ni; | 1496 | new_ni = old_ni; |
| 1493 | new_ni.ino = ino; | 1497 | new_ni.ino = ino; |
| 1494 | 1498 | ||
| 1499 | if (!inc_valid_node_count(sbi, NULL, 1)) | ||
| 1500 | WARN_ON(1); | ||
| 1495 | set_node_addr(sbi, &new_ni, NEW_ADDR); | 1501 | set_node_addr(sbi, &new_ni, NEW_ADDR); |
| 1496 | inc_valid_inode_count(sbi); | 1502 | inc_valid_inode_count(sbi); |
| 1497 | |||
| 1498 | f2fs_put_page(ipage, 1); | 1503 | f2fs_put_page(ipage, 1); |
| 1499 | return 0; | 1504 | return 0; |
| 1500 | } | 1505 | } |
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 0a2d72f0024d..c65fb4f4230f 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h | |||
| @@ -275,25 +275,27 @@ static inline nid_t get_nid(struct page *p, int off, bool i) | |||
| 275 | * - Mark cold node blocks in their node footer | 275 | * - Mark cold node blocks in their node footer |
| 276 | * - Mark cold data pages in page cache | 276 | * - Mark cold data pages in page cache |
| 277 | */ | 277 | */ |
| 278 | static inline int is_cold_file(struct inode *inode) | 278 | static inline int is_file(struct inode *inode, int type) |
| 279 | { | 279 | { |
| 280 | return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; | 280 | return F2FS_I(inode)->i_advise & type; |
| 281 | } | 281 | } |
| 282 | 282 | ||
| 283 | static inline void set_cold_file(struct inode *inode) | 283 | static inline void set_file(struct inode *inode, int type) |
| 284 | { | 284 | { |
| 285 | F2FS_I(inode)->i_advise |= FADVISE_COLD_BIT; | 285 | F2FS_I(inode)->i_advise |= type; |
| 286 | } | 286 | } |
| 287 | 287 | ||
| 288 | static inline int is_cp_file(struct inode *inode) | 288 | static inline void clear_file(struct inode *inode, int type) |
| 289 | { | 289 | { |
| 290 | return F2FS_I(inode)->i_advise & FADVISE_CP_BIT; | 290 | F2FS_I(inode)->i_advise &= ~type; |
| 291 | } | 291 | } |
| 292 | 292 | ||
| 293 | static inline void set_cp_file(struct inode *inode) | 293 | #define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT) |
| 294 | { | 294 | #define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT) |
| 295 | F2FS_I(inode)->i_advise |= FADVISE_CP_BIT; | 295 | #define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT) |
| 296 | } | 296 | #define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT) |
| 297 | #define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT) | ||
| 298 | #define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT) | ||
| 297 | 299 | ||
| 298 | static inline int is_cold_data(struct page *page) | 300 | static inline int is_cold_data(struct page *page) |
| 299 | { | 301 | { |
| @@ -310,29 +312,16 @@ static inline void clear_cold_data(struct page *page) | |||
| 310 | ClearPageChecked(page); | 312 | ClearPageChecked(page); |
| 311 | } | 313 | } |
| 312 | 314 | ||
| 313 | static inline int is_cold_node(struct page *page) | 315 | static inline int is_node(struct page *page, int type) |
| 314 | { | 316 | { |
| 315 | void *kaddr = page_address(page); | 317 | void *kaddr = page_address(page); |
| 316 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | 318 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; |
| 317 | unsigned int flag = le32_to_cpu(rn->footer.flag); | 319 | return le32_to_cpu(rn->footer.flag) & (1 << type); |
| 318 | return flag & (0x1 << COLD_BIT_SHIFT); | ||
| 319 | } | 320 | } |
| 320 | 321 | ||
| 321 | static inline unsigned char is_fsync_dnode(struct page *page) | 322 | #define is_cold_node(page) is_node(page, COLD_BIT_SHIFT) |
| 322 | { | 323 | #define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT) |
| 323 | void *kaddr = page_address(page); | 324 | #define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT) |
| 324 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
| 325 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
| 326 | return flag & (0x1 << FSYNC_BIT_SHIFT); | ||
| 327 | } | ||
| 328 | |||
| 329 | static inline unsigned char is_dent_dnode(struct page *page) | ||
| 330 | { | ||
| 331 | void *kaddr = page_address(page); | ||
| 332 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
| 333 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
| 334 | return flag & (0x1 << DENT_BIT_SHIFT); | ||
| 335 | } | ||
| 336 | 325 | ||
| 337 | static inline void set_cold_node(struct inode *inode, struct page *page) | 326 | static inline void set_cold_node(struct inode *inode, struct page *page) |
| 338 | { | 327 | { |
| @@ -346,26 +335,15 @@ static inline void set_cold_node(struct inode *inode, struct page *page) | |||
| 346 | rn->footer.flag = cpu_to_le32(flag); | 335 | rn->footer.flag = cpu_to_le32(flag); |
| 347 | } | 336 | } |
| 348 | 337 | ||
| 349 | static inline void set_fsync_mark(struct page *page, int mark) | 338 | static inline void set_mark(struct page *page, int mark, int type) |
| 350 | { | 339 | { |
| 351 | void *kaddr = page_address(page); | 340 | struct f2fs_node *rn = (struct f2fs_node *)page_address(page); |
| 352 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
| 353 | unsigned int flag = le32_to_cpu(rn->footer.flag); | ||
| 354 | if (mark) | ||
| 355 | flag |= (0x1 << FSYNC_BIT_SHIFT); | ||
| 356 | else | ||
| 357 | flag &= ~(0x1 << FSYNC_BIT_SHIFT); | ||
| 358 | rn->footer.flag = cpu_to_le32(flag); | ||
| 359 | } | ||
| 360 | |||
| 361 | static inline void set_dentry_mark(struct page *page, int mark) | ||
| 362 | { | ||
| 363 | void *kaddr = page_address(page); | ||
| 364 | struct f2fs_node *rn = (struct f2fs_node *)kaddr; | ||
| 365 | unsigned int flag = le32_to_cpu(rn->footer.flag); | 341 | unsigned int flag = le32_to_cpu(rn->footer.flag); |
| 366 | if (mark) | 342 | if (mark) |
| 367 | flag |= (0x1 << DENT_BIT_SHIFT); | 343 | flag |= (0x1 << type); |
| 368 | else | 344 | else |
| 369 | flag &= ~(0x1 << DENT_BIT_SHIFT); | 345 | flag &= ~(0x1 << type); |
| 370 | rn->footer.flag = cpu_to_le32(flag); | 346 | rn->footer.flag = cpu_to_le32(flag); |
| 371 | } | 347 | } |
| 348 | #define set_dentry_mark(page, mark) set_mark(page, mark, DENT_BIT_SHIFT) | ||
| 349 | #define set_fsync_mark(page, mark) set_mark(page, mark, FSYNC_BIT_SHIFT) | ||
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index 60c8a5097058..d56d951c2253 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c | |||
| @@ -40,36 +40,54 @@ static struct fsync_inode_entry *get_fsync_inode(struct list_head *head, | |||
| 40 | 40 | ||
| 41 | static int recover_dentry(struct page *ipage, struct inode *inode) | 41 | static int recover_dentry(struct page *ipage, struct inode *inode) |
| 42 | { | 42 | { |
| 43 | struct f2fs_node *raw_node = (struct f2fs_node *)kmap(ipage); | 43 | void *kaddr = page_address(ipage); |
| 44 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; | ||
| 44 | struct f2fs_inode *raw_inode = &(raw_node->i); | 45 | struct f2fs_inode *raw_inode = &(raw_node->i); |
| 45 | struct qstr name; | 46 | nid_t pino = le32_to_cpu(raw_inode->i_pino); |
| 46 | struct f2fs_dir_entry *de; | 47 | struct f2fs_dir_entry *de; |
| 48 | struct qstr name; | ||
| 47 | struct page *page; | 49 | struct page *page; |
| 48 | struct inode *dir; | 50 | struct inode *dir, *einode; |
| 49 | int err = 0; | 51 | int err = 0; |
| 50 | 52 | ||
| 51 | if (!is_dent_dnode(ipage)) | 53 | dir = check_dirty_dir_inode(F2FS_SB(inode->i_sb), pino); |
| 52 | goto out; | 54 | if (!dir) { |
| 53 | 55 | dir = f2fs_iget(inode->i_sb, pino); | |
| 54 | dir = f2fs_iget(inode->i_sb, le32_to_cpu(raw_inode->i_pino)); | 56 | if (IS_ERR(dir)) { |
| 55 | if (IS_ERR(dir)) { | 57 | err = PTR_ERR(dir); |
| 56 | err = PTR_ERR(dir); | 58 | goto out; |
| 57 | goto out; | 59 | } |
| 60 | set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT); | ||
| 61 | add_dirty_dir_inode(dir); | ||
| 58 | } | 62 | } |
| 59 | 63 | ||
| 60 | name.len = le32_to_cpu(raw_inode->i_namelen); | 64 | name.len = le32_to_cpu(raw_inode->i_namelen); |
| 61 | name.name = raw_inode->i_name; | 65 | name.name = raw_inode->i_name; |
| 62 | 66 | retry: | |
| 63 | de = f2fs_find_entry(dir, &name, &page); | 67 | de = f2fs_find_entry(dir, &name, &page); |
| 64 | if (de) { | 68 | if (de && inode->i_ino == le32_to_cpu(de->ino)) { |
| 65 | kunmap(page); | 69 | kunmap(page); |
| 66 | f2fs_put_page(page, 0); | 70 | f2fs_put_page(page, 0); |
| 67 | } else { | 71 | goto out; |
| 68 | err = __f2fs_add_link(dir, &name, inode); | 72 | } |
| 73 | if (de) { | ||
| 74 | einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino)); | ||
| 75 | if (IS_ERR(einode)) { | ||
| 76 | WARN_ON(1); | ||
| 77 | if (PTR_ERR(einode) == -ENOENT) | ||
| 78 | err = -EEXIST; | ||
| 79 | goto out; | ||
| 80 | } | ||
| 81 | f2fs_delete_entry(de, page, einode); | ||
| 82 | iput(einode); | ||
| 83 | goto retry; | ||
| 69 | } | 84 | } |
| 70 | iput(dir); | 85 | err = __f2fs_add_link(dir, &name, inode); |
| 71 | out: | 86 | out: |
| 72 | kunmap(ipage); | 87 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode and its dentry: " |
| 88 | "ino = %x, name = %s, dir = %lx, err = %d", | ||
| 89 | ino_of_node(ipage), raw_inode->i_name, | ||
| 90 | IS_ERR(dir) ? 0 : dir->i_ino, err); | ||
| 73 | return err; | 91 | return err; |
| 74 | } | 92 | } |
| 75 | 93 | ||
| @@ -79,6 +97,9 @@ static int recover_inode(struct inode *inode, struct page *node_page) | |||
| 79 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; | 97 | struct f2fs_node *raw_node = (struct f2fs_node *)kaddr; |
| 80 | struct f2fs_inode *raw_inode = &(raw_node->i); | 98 | struct f2fs_inode *raw_inode = &(raw_node->i); |
| 81 | 99 | ||
| 100 | if (!IS_INODE(node_page)) | ||
| 101 | return 0; | ||
| 102 | |||
| 82 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 103 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
| 83 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); | 104 | i_size_write(inode, le64_to_cpu(raw_inode->i_size)); |
| 84 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); | 105 | inode->i_atime.tv_sec = le64_to_cpu(raw_inode->i_mtime); |
| @@ -88,7 +109,12 @@ static int recover_inode(struct inode *inode, struct page *node_page) | |||
| 88 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); | 109 | inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec); |
| 89 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); | 110 | inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec); |
| 90 | 111 | ||
| 91 | return recover_dentry(node_page, inode); | 112 | if (is_dent_dnode(node_page)) |
| 113 | return recover_dentry(node_page, inode); | ||
| 114 | |||
| 115 | f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s", | ||
| 116 | ino_of_node(node_page), raw_inode->i_name); | ||
| 117 | return 0; | ||
| 92 | } | 118 | } |
| 93 | 119 | ||
| 94 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | 120 | static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) |
| @@ -119,14 +145,13 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
| 119 | lock_page(page); | 145 | lock_page(page); |
| 120 | 146 | ||
| 121 | if (cp_ver != cpver_of_node(page)) | 147 | if (cp_ver != cpver_of_node(page)) |
| 122 | goto unlock_out; | 148 | break; |
| 123 | 149 | ||
| 124 | if (!is_fsync_dnode(page)) | 150 | if (!is_fsync_dnode(page)) |
| 125 | goto next; | 151 | goto next; |
| 126 | 152 | ||
| 127 | entry = get_fsync_inode(head, ino_of_node(page)); | 153 | entry = get_fsync_inode(head, ino_of_node(page)); |
| 128 | if (entry) { | 154 | if (entry) { |
| 129 | entry->blkaddr = blkaddr; | ||
| 130 | if (IS_INODE(page) && is_dent_dnode(page)) | 155 | if (IS_INODE(page) && is_dent_dnode(page)) |
| 131 | set_inode_flag(F2FS_I(entry->inode), | 156 | set_inode_flag(F2FS_I(entry->inode), |
| 132 | FI_INC_LINK); | 157 | FI_INC_LINK); |
| @@ -134,48 +159,40 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) | |||
| 134 | if (IS_INODE(page) && is_dent_dnode(page)) { | 159 | if (IS_INODE(page) && is_dent_dnode(page)) { |
| 135 | err = recover_inode_page(sbi, page); | 160 | err = recover_inode_page(sbi, page); |
| 136 | if (err) | 161 | if (err) |
| 137 | goto unlock_out; | 162 | break; |
| 138 | } | 163 | } |
| 139 | 164 | ||
| 140 | /* add this fsync inode to the list */ | 165 | /* add this fsync inode to the list */ |
| 141 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); | 166 | entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS); |
| 142 | if (!entry) { | 167 | if (!entry) { |
| 143 | err = -ENOMEM; | 168 | err = -ENOMEM; |
| 144 | goto unlock_out; | 169 | break; |
| 145 | } | 170 | } |
| 146 | 171 | ||
| 147 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); | 172 | entry->inode = f2fs_iget(sbi->sb, ino_of_node(page)); |
| 148 | if (IS_ERR(entry->inode)) { | 173 | if (IS_ERR(entry->inode)) { |
| 149 | err = PTR_ERR(entry->inode); | 174 | err = PTR_ERR(entry->inode); |
| 150 | kmem_cache_free(fsync_entry_slab, entry); | 175 | kmem_cache_free(fsync_entry_slab, entry); |
| 151 | goto unlock_out; | 176 | break; |
| 152 | } | 177 | } |
| 153 | |||
| 154 | list_add_tail(&entry->list, head); | 178 | list_add_tail(&entry->list, head); |
| 155 | entry->blkaddr = blkaddr; | ||
| 156 | } | ||
| 157 | if (IS_INODE(page)) { | ||
| 158 | err = recover_inode(entry->inode, page); | ||
| 159 | if (err == -ENOENT) { | ||
| 160 | goto next; | ||
| 161 | } else if (err) { | ||
| 162 | err = -EINVAL; | ||
| 163 | goto unlock_out; | ||
| 164 | } | ||
| 165 | } | 179 | } |
| 180 | entry->blkaddr = blkaddr; | ||
| 181 | |||
| 182 | err = recover_inode(entry->inode, page); | ||
| 183 | if (err && err != -ENOENT) | ||
| 184 | break; | ||
| 166 | next: | 185 | next: |
| 167 | /* check next segment */ | 186 | /* check next segment */ |
| 168 | blkaddr = next_blkaddr_of_node(page); | 187 | blkaddr = next_blkaddr_of_node(page); |
| 169 | } | 188 | } |
| 170 | unlock_out: | ||
| 171 | unlock_page(page); | 189 | unlock_page(page); |
| 172 | out: | 190 | out: |
| 173 | __free_pages(page, 0); | 191 | __free_pages(page, 0); |
| 174 | return err; | 192 | return err; |
| 175 | } | 193 | } |
| 176 | 194 | ||
| 177 | static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, | 195 | static void destroy_fsync_dnodes(struct list_head *head) |
| 178 | struct list_head *head) | ||
| 179 | { | 196 | { |
| 180 | struct fsync_inode_entry *entry, *tmp; | 197 | struct fsync_inode_entry *entry, *tmp; |
| 181 | 198 | ||
| @@ -186,15 +203,15 @@ static void destroy_fsync_dnodes(struct f2fs_sb_info *sbi, | |||
| 186 | } | 203 | } |
| 187 | } | 204 | } |
| 188 | 205 | ||
| 189 | static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | 206 | static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi, |
| 190 | block_t blkaddr) | 207 | block_t blkaddr, struct dnode_of_data *dn) |
| 191 | { | 208 | { |
| 192 | struct seg_entry *sentry; | 209 | struct seg_entry *sentry; |
| 193 | unsigned int segno = GET_SEGNO(sbi, blkaddr); | 210 | unsigned int segno = GET_SEGNO(sbi, blkaddr); |
| 194 | unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & | 211 | unsigned short blkoff = GET_SEGOFF_FROM_SEG0(sbi, blkaddr) & |
| 195 | (sbi->blocks_per_seg - 1); | 212 | (sbi->blocks_per_seg - 1); |
| 196 | struct f2fs_summary sum; | 213 | struct f2fs_summary sum; |
| 197 | nid_t ino; | 214 | nid_t ino, nid; |
| 198 | void *kaddr; | 215 | void *kaddr; |
| 199 | struct inode *inode; | 216 | struct inode *inode; |
| 200 | struct page *node_page; | 217 | struct page *node_page; |
| @@ -203,7 +220,7 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
| 203 | 220 | ||
| 204 | sentry = get_seg_entry(sbi, segno); | 221 | sentry = get_seg_entry(sbi, segno); |
| 205 | if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) | 222 | if (!f2fs_test_bit(blkoff, sentry->cur_valid_map)) |
| 206 | return; | 223 | return 0; |
| 207 | 224 | ||
| 208 | /* Get the previous summary */ | 225 | /* Get the previous summary */ |
| 209 | for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { | 226 | for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) { |
| @@ -222,20 +239,39 @@ static void check_index_in_prev_nodes(struct f2fs_sb_info *sbi, | |||
| 222 | f2fs_put_page(sum_page, 1); | 239 | f2fs_put_page(sum_page, 1); |
| 223 | } | 240 | } |
| 224 | 241 | ||
| 242 | /* Use the locked dnode page and inode */ | ||
| 243 | nid = le32_to_cpu(sum.nid); | ||
| 244 | if (dn->inode->i_ino == nid) { | ||
| 245 | struct dnode_of_data tdn = *dn; | ||
| 246 | tdn.nid = nid; | ||
| 247 | tdn.node_page = dn->inode_page; | ||
| 248 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | ||
| 249 | truncate_data_blocks_range(&tdn, 1); | ||
| 250 | return 0; | ||
| 251 | } else if (dn->nid == nid) { | ||
| 252 | struct dnode_of_data tdn = *dn; | ||
| 253 | tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node); | ||
| 254 | truncate_data_blocks_range(&tdn, 1); | ||
| 255 | return 0; | ||
| 256 | } | ||
| 257 | |||
| 225 | /* Get the node page */ | 258 | /* Get the node page */ |
| 226 | node_page = get_node_page(sbi, le32_to_cpu(sum.nid)); | 259 | node_page = get_node_page(sbi, nid); |
| 260 | if (IS_ERR(node_page)) | ||
| 261 | return PTR_ERR(node_page); | ||
| 227 | bidx = start_bidx_of_node(ofs_of_node(node_page)) + | 262 | bidx = start_bidx_of_node(ofs_of_node(node_page)) + |
| 228 | le16_to_cpu(sum.ofs_in_node); | 263 | le16_to_cpu(sum.ofs_in_node); |
| 229 | ino = ino_of_node(node_page); | 264 | ino = ino_of_node(node_page); |
| 230 | f2fs_put_page(node_page, 1); | 265 | f2fs_put_page(node_page, 1); |
| 231 | 266 | ||
| 232 | /* Deallocate previous index in the node page */ | 267 | /* Deallocate previous index in the node page */ |
| 233 | inode = f2fs_iget(sbi->sb, ino); | 268 | inode = f2fs_iget(sbi->sb, ino); |
| 234 | if (IS_ERR(inode)) | 269 | if (IS_ERR(inode)) |
| 235 | return; | 270 | return PTR_ERR(inode); |
| 236 | 271 | ||
| 237 | truncate_hole(inode, bidx, bidx + 1); | 272 | truncate_hole(inode, bidx, bidx + 1); |
| 238 | iput(inode); | 273 | iput(inode); |
| 274 | return 0; | ||
| 239 | } | 275 | } |
| 240 | 276 | ||
| 241 | static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | 277 | static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, |
| @@ -245,7 +281,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
| 245 | struct dnode_of_data dn; | 281 | struct dnode_of_data dn; |
| 246 | struct f2fs_summary sum; | 282 | struct f2fs_summary sum; |
| 247 | struct node_info ni; | 283 | struct node_info ni; |
| 248 | int err = 0; | 284 | int err = 0, recovered = 0; |
| 249 | int ilock; | 285 | int ilock; |
| 250 | 286 | ||
| 251 | start = start_bidx_of_node(ofs_of_node(page)); | 287 | start = start_bidx_of_node(ofs_of_node(page)); |
| @@ -283,13 +319,16 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
| 283 | } | 319 | } |
| 284 | 320 | ||
| 285 | /* Check the previous node page having this index */ | 321 | /* Check the previous node page having this index */ |
| 286 | check_index_in_prev_nodes(sbi, dest); | 322 | err = check_index_in_prev_nodes(sbi, dest, &dn); |
| 323 | if (err) | ||
| 324 | goto err; | ||
| 287 | 325 | ||
| 288 | set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); | 326 | set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version); |
| 289 | 327 | ||
| 290 | /* write dummy data page */ | 328 | /* write dummy data page */ |
| 291 | recover_data_page(sbi, NULL, &sum, src, dest); | 329 | recover_data_page(sbi, NULL, &sum, src, dest); |
| 292 | update_extent_cache(dest, &dn); | 330 | update_extent_cache(dest, &dn); |
| 331 | recovered++; | ||
| 293 | } | 332 | } |
| 294 | dn.ofs_in_node++; | 333 | dn.ofs_in_node++; |
| 295 | } | 334 | } |
| @@ -305,9 +344,14 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode, | |||
| 305 | set_page_dirty(dn.node_page); | 344 | set_page_dirty(dn.node_page); |
| 306 | 345 | ||
| 307 | recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); | 346 | recover_node_page(sbi, dn.node_page, &sum, &ni, blkaddr); |
| 347 | err: | ||
| 308 | f2fs_put_dnode(&dn); | 348 | f2fs_put_dnode(&dn); |
| 309 | mutex_unlock_op(sbi, ilock); | 349 | mutex_unlock_op(sbi, ilock); |
| 310 | return 0; | 350 | |
| 351 | f2fs_msg(sbi->sb, KERN_NOTICE, "recover_data: ino = %lx, " | ||
| 352 | "recovered_data = %d blocks, err = %d", | ||
| 353 | inode->i_ino, recovered, err); | ||
| 354 | return err; | ||
| 311 | } | 355 | } |
| 312 | 356 | ||
| 313 | static int recover_data(struct f2fs_sb_info *sbi, | 357 | static int recover_data(struct f2fs_sb_info *sbi, |
| @@ -340,7 +384,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
| 340 | lock_page(page); | 384 | lock_page(page); |
| 341 | 385 | ||
| 342 | if (cp_ver != cpver_of_node(page)) | 386 | if (cp_ver != cpver_of_node(page)) |
| 343 | goto unlock_out; | 387 | break; |
| 344 | 388 | ||
| 345 | entry = get_fsync_inode(head, ino_of_node(page)); | 389 | entry = get_fsync_inode(head, ino_of_node(page)); |
| 346 | if (!entry) | 390 | if (!entry) |
| @@ -348,7 +392,7 @@ static int recover_data(struct f2fs_sb_info *sbi, | |||
| 348 | 392 | ||
| 349 | err = do_recover_data(sbi, entry->inode, page, blkaddr); | 393 | err = do_recover_data(sbi, entry->inode, page, blkaddr); |
| 350 | if (err) | 394 | if (err) |
| 351 | goto out; | 395 | break; |
| 352 | 396 | ||
| 353 | if (entry->blkaddr == blkaddr) { | 397 | if (entry->blkaddr == blkaddr) { |
| 354 | iput(entry->inode); | 398 | iput(entry->inode); |
| @@ -359,7 +403,6 @@ next: | |||
| 359 | /* check next segment */ | 403 | /* check next segment */ |
| 360 | blkaddr = next_blkaddr_of_node(page); | 404 | blkaddr = next_blkaddr_of_node(page); |
| 361 | } | 405 | } |
| 362 | unlock_out: | ||
| 363 | unlock_page(page); | 406 | unlock_page(page); |
| 364 | out: | 407 | out: |
| 365 | __free_pages(page, 0); | 408 | __free_pages(page, 0); |
| @@ -382,6 +425,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
| 382 | INIT_LIST_HEAD(&inode_list); | 425 | INIT_LIST_HEAD(&inode_list); |
| 383 | 426 | ||
| 384 | /* step #1: find fsynced inode numbers */ | 427 | /* step #1: find fsynced inode numbers */ |
| 428 | sbi->por_doing = 1; | ||
| 385 | err = find_fsync_dnodes(sbi, &inode_list); | 429 | err = find_fsync_dnodes(sbi, &inode_list); |
| 386 | if (err) | 430 | if (err) |
| 387 | goto out; | 431 | goto out; |
| @@ -390,13 +434,13 @@ int recover_fsync_data(struct f2fs_sb_info *sbi) | |||
| 390 | goto out; | 434 | goto out; |
| 391 | 435 | ||
| 392 | /* step #2: recover data */ | 436 | /* step #2: recover data */ |
| 393 | sbi->por_doing = 1; | ||
| 394 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); | 437 | err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE); |
| 395 | sbi->por_doing = 0; | ||
| 396 | BUG_ON(!list_empty(&inode_list)); | 438 | BUG_ON(!list_empty(&inode_list)); |
| 397 | out: | 439 | out: |
| 398 | destroy_fsync_dnodes(sbi, &inode_list); | 440 | destroy_fsync_dnodes(&inode_list); |
| 399 | kmem_cache_destroy(fsync_entry_slab); | 441 | kmem_cache_destroy(fsync_entry_slab); |
| 400 | write_checkpoint(sbi, false); | 442 | sbi->por_doing = 0; |
| 443 | if (!err) | ||
| 444 | write_checkpoint(sbi, false); | ||
| 401 | return err; | 445 | return err; |
| 402 | } | 446 | } |
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d8e84e49a5c3..a86d125a9885 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c | |||
| @@ -94,7 +94,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, | |||
| 94 | * Adding dirty entry into seglist is not critical operation. | 94 | * Adding dirty entry into seglist is not critical operation. |
| 95 | * If a given segment is one of current working segments, it won't be added. | 95 | * If a given segment is one of current working segments, it won't be added. |
| 96 | */ | 96 | */ |
| 97 | void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | 97 | static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) |
| 98 | { | 98 | { |
| 99 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 99 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
| 100 | unsigned short valid_blocks; | 100 | unsigned short valid_blocks; |
| @@ -126,17 +126,16 @@ void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) | |||
| 126 | static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | 126 | static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) |
| 127 | { | 127 | { |
| 128 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 128 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
| 129 | unsigned int segno, offset = 0; | 129 | unsigned int segno = -1; |
| 130 | unsigned int total_segs = TOTAL_SEGS(sbi); | 130 | unsigned int total_segs = TOTAL_SEGS(sbi); |
| 131 | 131 | ||
| 132 | mutex_lock(&dirty_i->seglist_lock); | 132 | mutex_lock(&dirty_i->seglist_lock); |
| 133 | while (1) { | 133 | while (1) { |
| 134 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | 134 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, |
| 135 | offset); | 135 | segno + 1); |
| 136 | if (segno >= total_segs) | 136 | if (segno >= total_segs) |
| 137 | break; | 137 | break; |
| 138 | __set_test_and_free(sbi, segno); | 138 | __set_test_and_free(sbi, segno); |
| 139 | offset = segno + 1; | ||
| 140 | } | 139 | } |
| 141 | mutex_unlock(&dirty_i->seglist_lock); | 140 | mutex_unlock(&dirty_i->seglist_lock); |
| 142 | } | 141 | } |
| @@ -144,17 +143,16 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) | |||
| 144 | void clear_prefree_segments(struct f2fs_sb_info *sbi) | 143 | void clear_prefree_segments(struct f2fs_sb_info *sbi) |
| 145 | { | 144 | { |
| 146 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 145 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
| 147 | unsigned int segno, offset = 0; | 146 | unsigned int segno = -1; |
| 148 | unsigned int total_segs = TOTAL_SEGS(sbi); | 147 | unsigned int total_segs = TOTAL_SEGS(sbi); |
| 149 | 148 | ||
| 150 | mutex_lock(&dirty_i->seglist_lock); | 149 | mutex_lock(&dirty_i->seglist_lock); |
| 151 | while (1) { | 150 | while (1) { |
| 152 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, | 151 | segno = find_next_bit(dirty_i->dirty_segmap[PRE], total_segs, |
| 153 | offset); | 152 | segno + 1); |
| 154 | if (segno >= total_segs) | 153 | if (segno >= total_segs) |
| 155 | break; | 154 | break; |
| 156 | 155 | ||
| 157 | offset = segno + 1; | ||
| 158 | if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) | 156 | if (test_and_clear_bit(segno, dirty_i->dirty_segmap[PRE])) |
| 159 | dirty_i->nr_dirty[PRE]--; | 157 | dirty_i->nr_dirty[PRE]--; |
| 160 | 158 | ||
| @@ -257,11 +255,11 @@ void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr) | |||
| 257 | * This function should be resided under the curseg_mutex lock | 255 | * This function should be resided under the curseg_mutex lock |
| 258 | */ | 256 | */ |
| 259 | static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, | 257 | static void __add_sum_entry(struct f2fs_sb_info *sbi, int type, |
| 260 | struct f2fs_summary *sum, unsigned short offset) | 258 | struct f2fs_summary *sum) |
| 261 | { | 259 | { |
| 262 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 260 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
| 263 | void *addr = curseg->sum_blk; | 261 | void *addr = curseg->sum_blk; |
| 264 | addr += offset * sizeof(struct f2fs_summary); | 262 | addr += curseg->next_blkoff * sizeof(struct f2fs_summary); |
| 265 | memcpy(addr, sum, sizeof(struct f2fs_summary)); | 263 | memcpy(addr, sum, sizeof(struct f2fs_summary)); |
| 266 | return; | 264 | return; |
| 267 | } | 265 | } |
| @@ -311,64 +309,14 @@ static void write_sum_page(struct f2fs_sb_info *sbi, | |||
| 311 | f2fs_put_page(page, 1); | 309 | f2fs_put_page(page, 1); |
| 312 | } | 310 | } |
| 313 | 311 | ||
| 314 | static unsigned int check_prefree_segments(struct f2fs_sb_info *sbi, int type) | ||
| 315 | { | ||
| 316 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | ||
| 317 | unsigned long *prefree_segmap = dirty_i->dirty_segmap[PRE]; | ||
| 318 | unsigned int segno; | ||
| 319 | unsigned int ofs = 0; | ||
| 320 | |||
| 321 | /* | ||
| 322 | * If there is not enough reserved sections, | ||
| 323 | * we should not reuse prefree segments. | ||
| 324 | */ | ||
| 325 | if (has_not_enough_free_secs(sbi, 0)) | ||
| 326 | return NULL_SEGNO; | ||
| 327 | |||
| 328 | /* | ||
| 329 | * NODE page should not reuse prefree segment, | ||
| 330 | * since those information is used for SPOR. | ||
| 331 | */ | ||
| 332 | if (IS_NODESEG(type)) | ||
| 333 | return NULL_SEGNO; | ||
| 334 | next: | ||
| 335 | segno = find_next_bit(prefree_segmap, TOTAL_SEGS(sbi), ofs); | ||
| 336 | ofs += sbi->segs_per_sec; | ||
| 337 | |||
| 338 | if (segno < TOTAL_SEGS(sbi)) { | ||
| 339 | int i; | ||
| 340 | |||
| 341 | /* skip intermediate segments in a section */ | ||
| 342 | if (segno % sbi->segs_per_sec) | ||
| 343 | goto next; | ||
| 344 | |||
| 345 | /* skip if the section is currently used */ | ||
| 346 | if (sec_usage_check(sbi, GET_SECNO(sbi, segno))) | ||
| 347 | goto next; | ||
| 348 | |||
| 349 | /* skip if whole section is not prefree */ | ||
| 350 | for (i = 1; i < sbi->segs_per_sec; i++) | ||
| 351 | if (!test_bit(segno + i, prefree_segmap)) | ||
| 352 | goto next; | ||
| 353 | |||
| 354 | /* skip if whole section was not free at the last checkpoint */ | ||
| 355 | for (i = 0; i < sbi->segs_per_sec; i++) | ||
| 356 | if (get_seg_entry(sbi, segno + i)->ckpt_valid_blocks) | ||
| 357 | goto next; | ||
| 358 | |||
| 359 | return segno; | ||
| 360 | } | ||
| 361 | return NULL_SEGNO; | ||
| 362 | } | ||
| 363 | |||
| 364 | static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) | 312 | static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) |
| 365 | { | 313 | { |
| 366 | struct curseg_info *curseg = CURSEG_I(sbi, type); | 314 | struct curseg_info *curseg = CURSEG_I(sbi, type); |
| 367 | unsigned int segno = curseg->segno; | 315 | unsigned int segno = curseg->segno + 1; |
| 368 | struct free_segmap_info *free_i = FREE_I(sbi); | 316 | struct free_segmap_info *free_i = FREE_I(sbi); |
| 369 | 317 | ||
| 370 | if (segno + 1 < TOTAL_SEGS(sbi) && (segno + 1) % sbi->segs_per_sec) | 318 | if (segno < TOTAL_SEGS(sbi) && segno % sbi->segs_per_sec) |
| 371 | return !test_bit(segno + 1, free_i->free_segmap); | 319 | return !test_bit(segno, free_i->free_segmap); |
| 372 | return 0; | 320 | return 0; |
| 373 | } | 321 | } |
| 374 | 322 | ||
| @@ -495,7 +443,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) | |||
| 495 | int dir = ALLOC_LEFT; | 443 | int dir = ALLOC_LEFT; |
| 496 | 444 | ||
| 497 | write_sum_page(sbi, curseg->sum_blk, | 445 | write_sum_page(sbi, curseg->sum_blk, |
| 498 | GET_SUM_BLOCK(sbi, curseg->segno)); | 446 | GET_SUM_BLOCK(sbi, segno)); |
| 499 | if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) | 447 | if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA) |
| 500 | dir = ALLOC_RIGHT; | 448 | dir = ALLOC_RIGHT; |
| 501 | 449 | ||
| @@ -599,11 +547,7 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |||
| 599 | goto out; | 547 | goto out; |
| 600 | } | 548 | } |
| 601 | 549 | ||
| 602 | curseg->next_segno = check_prefree_segments(sbi, type); | 550 | if (type == CURSEG_WARM_NODE) |
| 603 | |||
| 604 | if (curseg->next_segno != NULL_SEGNO) | ||
| 605 | change_curseg(sbi, type, false); | ||
| 606 | else if (type == CURSEG_WARM_NODE) | ||
| 607 | new_curseg(sbi, type, false); | 551 | new_curseg(sbi, type, false); |
| 608 | else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) | 552 | else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) |
| 609 | new_curseg(sbi, type, false); | 553 | new_curseg(sbi, type, false); |
| @@ -612,7 +556,10 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi, | |||
| 612 | else | 556 | else |
| 613 | new_curseg(sbi, type, false); | 557 | new_curseg(sbi, type, false); |
| 614 | out: | 558 | out: |
| 559 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 615 | sbi->segment_count[curseg->alloc_type]++; | 560 | sbi->segment_count[curseg->alloc_type]++; |
| 561 | #endif | ||
| 562 | return; | ||
| 616 | } | 563 | } |
| 617 | 564 | ||
| 618 | void allocate_new_segments(struct f2fs_sb_info *sbi) | 565 | void allocate_new_segments(struct f2fs_sb_info *sbi) |
| @@ -795,7 +742,7 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) | |||
| 795 | 742 | ||
| 796 | if (S_ISDIR(inode->i_mode)) | 743 | if (S_ISDIR(inode->i_mode)) |
| 797 | return CURSEG_HOT_DATA; | 744 | return CURSEG_HOT_DATA; |
| 798 | else if (is_cold_data(page) || is_cold_file(inode)) | 745 | else if (is_cold_data(page) || file_is_cold(inode)) |
| 799 | return CURSEG_COLD_DATA; | 746 | return CURSEG_COLD_DATA; |
| 800 | else | 747 | else |
| 801 | return CURSEG_WARM_DATA; | 748 | return CURSEG_WARM_DATA; |
| @@ -844,11 +791,13 @@ static void do_write_page(struct f2fs_sb_info *sbi, struct page *page, | |||
| 844 | * because, this function updates a summary entry in the | 791 | * because, this function updates a summary entry in the |
| 845 | * current summary block. | 792 | * current summary block. |
| 846 | */ | 793 | */ |
| 847 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 794 | __add_sum_entry(sbi, type, sum); |
| 848 | 795 | ||
| 849 | mutex_lock(&sit_i->sentry_lock); | 796 | mutex_lock(&sit_i->sentry_lock); |
| 850 | __refresh_next_blkoff(sbi, curseg); | 797 | __refresh_next_blkoff(sbi, curseg); |
| 798 | #ifdef CONFIG_F2FS_STAT_FS | ||
| 851 | sbi->block_count[curseg->alloc_type]++; | 799 | sbi->block_count[curseg->alloc_type]++; |
| 800 | #endif | ||
| 852 | 801 | ||
| 853 | /* | 802 | /* |
| 854 | * SIT information should be updated before segment allocation, | 803 | * SIT information should be updated before segment allocation, |
| @@ -943,7 +892,7 @@ void recover_data_page(struct f2fs_sb_info *sbi, | |||
| 943 | 892 | ||
| 944 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 893 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & |
| 945 | (sbi->blocks_per_seg - 1); | 894 | (sbi->blocks_per_seg - 1); |
| 946 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 895 | __add_sum_entry(sbi, type, sum); |
| 947 | 896 | ||
| 948 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); | 897 | refresh_sit_entry(sbi, old_blkaddr, new_blkaddr); |
| 949 | 898 | ||
| @@ -980,7 +929,7 @@ void rewrite_node_page(struct f2fs_sb_info *sbi, | |||
| 980 | } | 929 | } |
| 981 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & | 930 | curseg->next_blkoff = GET_SEGOFF_FROM_SEG0(sbi, new_blkaddr) & |
| 982 | (sbi->blocks_per_seg - 1); | 931 | (sbi->blocks_per_seg - 1); |
| 983 | __add_sum_entry(sbi, type, sum, curseg->next_blkoff); | 932 | __add_sum_entry(sbi, type, sum); |
| 984 | 933 | ||
| 985 | /* change the current log to the next block addr in advance */ | 934 | /* change the current log to the next block addr in advance */ |
| 986 | if (next_segno != segno) { | 935 | if (next_segno != segno) { |
| @@ -1579,13 +1528,13 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) | |||
| 1579 | { | 1528 | { |
| 1580 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); | 1529 | struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); |
| 1581 | struct free_segmap_info *free_i = FREE_I(sbi); | 1530 | struct free_segmap_info *free_i = FREE_I(sbi); |
| 1582 | unsigned int segno = 0, offset = 0; | 1531 | unsigned int segno = 0, offset = 0, total_segs = TOTAL_SEGS(sbi); |
| 1583 | unsigned short valid_blocks; | 1532 | unsigned short valid_blocks; |
| 1584 | 1533 | ||
| 1585 | while (segno < TOTAL_SEGS(sbi)) { | 1534 | while (1) { |
| 1586 | /* find dirty segment based on free segmap */ | 1535 | /* find dirty segment based on free segmap */ |
| 1587 | segno = find_next_inuse(free_i, TOTAL_SEGS(sbi), offset); | 1536 | segno = find_next_inuse(free_i, total_segs, offset); |
| 1588 | if (segno >= TOTAL_SEGS(sbi)) | 1537 | if (segno >= total_segs) |
| 1589 | break; | 1538 | break; |
| 1590 | offset = segno + 1; | 1539 | offset = segno + 1; |
| 1591 | valid_blocks = get_valid_blocks(sbi, segno, 0); | 1540 | valid_blocks = get_valid_blocks(sbi, segno, 0); |
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 8555f7df82c7..75c7dc363e92 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c | |||
| @@ -34,7 +34,7 @@ | |||
| 34 | static struct kmem_cache *f2fs_inode_cachep; | 34 | static struct kmem_cache *f2fs_inode_cachep; |
| 35 | 35 | ||
| 36 | enum { | 36 | enum { |
| 37 | Opt_gc_background_off, | 37 | Opt_gc_background, |
| 38 | Opt_disable_roll_forward, | 38 | Opt_disable_roll_forward, |
| 39 | Opt_discard, | 39 | Opt_discard, |
| 40 | Opt_noheap, | 40 | Opt_noheap, |
| @@ -46,7 +46,7 @@ enum { | |||
| 46 | }; | 46 | }; |
| 47 | 47 | ||
| 48 | static match_table_t f2fs_tokens = { | 48 | static match_table_t f2fs_tokens = { |
| 49 | {Opt_gc_background_off, "background_gc_off"}, | 49 | {Opt_gc_background, "background_gc=%s"}, |
| 50 | {Opt_disable_roll_forward, "disable_roll_forward"}, | 50 | {Opt_disable_roll_forward, "disable_roll_forward"}, |
| 51 | {Opt_discard, "discard"}, | 51 | {Opt_discard, "discard"}, |
| 52 | {Opt_noheap, "no_heap"}, | 52 | {Opt_noheap, "no_heap"}, |
| @@ -76,6 +76,91 @@ static void init_once(void *foo) | |||
| 76 | inode_init_once(&fi->vfs_inode); | 76 | inode_init_once(&fi->vfs_inode); |
| 77 | } | 77 | } |
| 78 | 78 | ||
| 79 | static int parse_options(struct super_block *sb, char *options) | ||
| 80 | { | ||
| 81 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
| 82 | substring_t args[MAX_OPT_ARGS]; | ||
| 83 | char *p, *name; | ||
| 84 | int arg = 0; | ||
| 85 | |||
| 86 | if (!options) | ||
| 87 | return 0; | ||
| 88 | |||
| 89 | while ((p = strsep(&options, ",")) != NULL) { | ||
| 90 | int token; | ||
| 91 | if (!*p) | ||
| 92 | continue; | ||
| 93 | /* | ||
| 94 | * Initialize args struct so we know whether arg was | ||
| 95 | * found; some options take optional arguments. | ||
| 96 | */ | ||
| 97 | args[0].to = args[0].from = NULL; | ||
| 98 | token = match_token(p, f2fs_tokens, args); | ||
| 99 | |||
| 100 | switch (token) { | ||
| 101 | case Opt_gc_background: | ||
| 102 | name = match_strdup(&args[0]); | ||
| 103 | |||
| 104 | if (!name) | ||
| 105 | return -ENOMEM; | ||
| 106 | if (!strncmp(name, "on", 2)) | ||
| 107 | set_opt(sbi, BG_GC); | ||
| 108 | else if (!strncmp(name, "off", 3)) | ||
| 109 | clear_opt(sbi, BG_GC); | ||
| 110 | else { | ||
| 111 | kfree(name); | ||
| 112 | return -EINVAL; | ||
| 113 | } | ||
| 114 | kfree(name); | ||
| 115 | break; | ||
| 116 | case Opt_disable_roll_forward: | ||
| 117 | set_opt(sbi, DISABLE_ROLL_FORWARD); | ||
| 118 | break; | ||
| 119 | case Opt_discard: | ||
| 120 | set_opt(sbi, DISCARD); | ||
| 121 | break; | ||
| 122 | case Opt_noheap: | ||
| 123 | set_opt(sbi, NOHEAP); | ||
| 124 | break; | ||
| 125 | #ifdef CONFIG_F2FS_FS_XATTR | ||
| 126 | case Opt_nouser_xattr: | ||
| 127 | clear_opt(sbi, XATTR_USER); | ||
| 128 | break; | ||
| 129 | #else | ||
| 130 | case Opt_nouser_xattr: | ||
| 131 | f2fs_msg(sb, KERN_INFO, | ||
| 132 | "nouser_xattr options not supported"); | ||
| 133 | break; | ||
| 134 | #endif | ||
| 135 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | ||
| 136 | case Opt_noacl: | ||
| 137 | clear_opt(sbi, POSIX_ACL); | ||
| 138 | break; | ||
| 139 | #else | ||
| 140 | case Opt_noacl: | ||
| 141 | f2fs_msg(sb, KERN_INFO, "noacl options not supported"); | ||
| 142 | break; | ||
| 143 | #endif | ||
| 144 | case Opt_active_logs: | ||
| 145 | if (args->from && match_int(args, &arg)) | ||
| 146 | return -EINVAL; | ||
| 147 | if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) | ||
| 148 | return -EINVAL; | ||
| 149 | sbi->active_logs = arg; | ||
| 150 | break; | ||
| 151 | case Opt_disable_ext_identify: | ||
| 152 | set_opt(sbi, DISABLE_EXT_IDENTIFY); | ||
| 153 | break; | ||
| 154 | default: | ||
| 155 | f2fs_msg(sb, KERN_ERR, | ||
| 156 | "Unrecognized mount option \"%s\" or missing value", | ||
| 157 | p); | ||
| 158 | return -EINVAL; | ||
| 159 | } | ||
| 160 | } | ||
| 161 | return 0; | ||
| 162 | } | ||
| 163 | |||
| 79 | static struct inode *f2fs_alloc_inode(struct super_block *sb) | 164 | static struct inode *f2fs_alloc_inode(struct super_block *sb) |
| 80 | { | 165 | { |
| 81 | struct f2fs_inode_info *fi; | 166 | struct f2fs_inode_info *fi; |
| @@ -112,6 +197,17 @@ static int f2fs_drop_inode(struct inode *inode) | |||
| 112 | return generic_drop_inode(inode); | 197 | return generic_drop_inode(inode); |
| 113 | } | 198 | } |
| 114 | 199 | ||
| 200 | /* | ||
| 201 | * f2fs_dirty_inode() is called from __mark_inode_dirty() | ||
| 202 | * | ||
| 203 | * We should call set_dirty_inode to write the dirty inode through write_inode. | ||
| 204 | */ | ||
| 205 | static void f2fs_dirty_inode(struct inode *inode, int flags) | ||
| 206 | { | ||
| 207 | set_inode_flag(F2FS_I(inode), FI_DIRTY_INODE); | ||
| 208 | return; | ||
| 209 | } | ||
| 210 | |||
| 115 | static void f2fs_i_callback(struct rcu_head *head) | 211 | static void f2fs_i_callback(struct rcu_head *head) |
| 116 | { | 212 | { |
| 117 | struct inode *inode = container_of(head, struct inode, i_rcu); | 213 | struct inode *inode = container_of(head, struct inode, i_rcu); |
| @@ -170,7 +266,7 @@ static int f2fs_freeze(struct super_block *sb) | |||
| 170 | { | 266 | { |
| 171 | int err; | 267 | int err; |
| 172 | 268 | ||
| 173 | if (sb->s_flags & MS_RDONLY) | 269 | if (f2fs_readonly(sb)) |
| 174 | return 0; | 270 | return 0; |
| 175 | 271 | ||
| 176 | err = f2fs_sync_fs(sb, 1); | 272 | err = f2fs_sync_fs(sb, 1); |
| @@ -214,10 +310,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
| 214 | { | 310 | { |
| 215 | struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); | 311 | struct f2fs_sb_info *sbi = F2FS_SB(root->d_sb); |
| 216 | 312 | ||
| 217 | if (test_opt(sbi, BG_GC)) | 313 | if (!(root->d_sb->s_flags & MS_RDONLY) && test_opt(sbi, BG_GC)) |
| 218 | seq_puts(seq, ",background_gc_on"); | 314 | seq_printf(seq, ",background_gc=%s", "on"); |
| 219 | else | 315 | else |
| 220 | seq_puts(seq, ",background_gc_off"); | 316 | seq_printf(seq, ",background_gc=%s", "off"); |
| 221 | if (test_opt(sbi, DISABLE_ROLL_FORWARD)) | 317 | if (test_opt(sbi, DISABLE_ROLL_FORWARD)) |
| 222 | seq_puts(seq, ",disable_roll_forward"); | 318 | seq_puts(seq, ",disable_roll_forward"); |
| 223 | if (test_opt(sbi, DISCARD)) | 319 | if (test_opt(sbi, DISCARD)) |
| @@ -244,11 +340,64 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) | |||
| 244 | return 0; | 340 | return 0; |
| 245 | } | 341 | } |
| 246 | 342 | ||
| 343 | static int f2fs_remount(struct super_block *sb, int *flags, char *data) | ||
| 344 | { | ||
| 345 | struct f2fs_sb_info *sbi = F2FS_SB(sb); | ||
| 346 | struct f2fs_mount_info org_mount_opt; | ||
| 347 | int err, active_logs; | ||
| 348 | |||
| 349 | /* | ||
| 350 | * Save the old mount options in case we | ||
| 351 | * need to restore them. | ||
| 352 | */ | ||
| 353 | org_mount_opt = sbi->mount_opt; | ||
| 354 | active_logs = sbi->active_logs; | ||
| 355 | |||
| 356 | /* parse mount options */ | ||
| 357 | err = parse_options(sb, data); | ||
| 358 | if (err) | ||
| 359 | goto restore_opts; | ||
| 360 | |||
| 361 | /* | ||
| 362 | * Previous and new state of filesystem is RO, | ||
| 363 | * so no point in checking GC conditions. | ||
| 364 | */ | ||
| 365 | if ((sb->s_flags & MS_RDONLY) && (*flags & MS_RDONLY)) | ||
| 366 | goto skip; | ||
| 367 | |||
| 368 | /* | ||
| 369 | * We stop the GC thread if FS is mounted as RO | ||
| 370 | * or if background_gc = off is passed in mount | ||
| 371 | * option. Also sync the filesystem. | ||
| 372 | */ | ||
| 373 | if ((*flags & MS_RDONLY) || !test_opt(sbi, BG_GC)) { | ||
| 374 | if (sbi->gc_thread) { | ||
| 375 | stop_gc_thread(sbi); | ||
| 376 | f2fs_sync_fs(sb, 1); | ||
| 377 | } | ||
| 378 | } else if (test_opt(sbi, BG_GC) && !sbi->gc_thread) { | ||
| 379 | err = start_gc_thread(sbi); | ||
| 380 | if (err) | ||
| 381 | goto restore_opts; | ||
| 382 | } | ||
| 383 | skip: | ||
| 384 | /* Update the POSIXACL Flag */ | ||
| 385 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | ||
| 386 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | ||
| 387 | return 0; | ||
| 388 | |||
| 389 | restore_opts: | ||
| 390 | sbi->mount_opt = org_mount_opt; | ||
| 391 | sbi->active_logs = active_logs; | ||
| 392 | return err; | ||
| 393 | } | ||
| 394 | |||
| 247 | static struct super_operations f2fs_sops = { | 395 | static struct super_operations f2fs_sops = { |
| 248 | .alloc_inode = f2fs_alloc_inode, | 396 | .alloc_inode = f2fs_alloc_inode, |
| 249 | .drop_inode = f2fs_drop_inode, | 397 | .drop_inode = f2fs_drop_inode, |
| 250 | .destroy_inode = f2fs_destroy_inode, | 398 | .destroy_inode = f2fs_destroy_inode, |
| 251 | .write_inode = f2fs_write_inode, | 399 | .write_inode = f2fs_write_inode, |
| 400 | .dirty_inode = f2fs_dirty_inode, | ||
| 252 | .show_options = f2fs_show_options, | 401 | .show_options = f2fs_show_options, |
| 253 | .evict_inode = f2fs_evict_inode, | 402 | .evict_inode = f2fs_evict_inode, |
| 254 | .put_super = f2fs_put_super, | 403 | .put_super = f2fs_put_super, |
| @@ -256,6 +405,7 @@ static struct super_operations f2fs_sops = { | |||
| 256 | .freeze_fs = f2fs_freeze, | 405 | .freeze_fs = f2fs_freeze, |
| 257 | .unfreeze_fs = f2fs_unfreeze, | 406 | .unfreeze_fs = f2fs_unfreeze, |
| 258 | .statfs = f2fs_statfs, | 407 | .statfs = f2fs_statfs, |
| 408 | .remount_fs = f2fs_remount, | ||
| 259 | }; | 409 | }; |
| 260 | 410 | ||
| 261 | static struct inode *f2fs_nfs_get_inode(struct super_block *sb, | 411 | static struct inode *f2fs_nfs_get_inode(struct super_block *sb, |
| @@ -303,79 +453,6 @@ static const struct export_operations f2fs_export_ops = { | |||
| 303 | .get_parent = f2fs_get_parent, | 453 | .get_parent = f2fs_get_parent, |
| 304 | }; | 454 | }; |
| 305 | 455 | ||
| 306 | static int parse_options(struct super_block *sb, struct f2fs_sb_info *sbi, | ||
| 307 | char *options) | ||
| 308 | { | ||
| 309 | substring_t args[MAX_OPT_ARGS]; | ||
| 310 | char *p; | ||
| 311 | int arg = 0; | ||
| 312 | |||
| 313 | if (!options) | ||
| 314 | return 0; | ||
| 315 | |||
| 316 | while ((p = strsep(&options, ",")) != NULL) { | ||
| 317 | int token; | ||
| 318 | if (!*p) | ||
| 319 | continue; | ||
| 320 | /* | ||
| 321 | * Initialize args struct so we know whether arg was | ||
| 322 | * found; some options take optional arguments. | ||
| 323 | */ | ||
| 324 | args[0].to = args[0].from = NULL; | ||
| 325 | token = match_token(p, f2fs_tokens, args); | ||
| 326 | |||
| 327 | switch (token) { | ||
| 328 | case Opt_gc_background_off: | ||
| 329 | clear_opt(sbi, BG_GC); | ||
| 330 | break; | ||
| 331 | case Opt_disable_roll_forward: | ||
| 332 | set_opt(sbi, DISABLE_ROLL_FORWARD); | ||
| 333 | break; | ||
| 334 | case Opt_discard: | ||
| 335 | set_opt(sbi, DISCARD); | ||
| 336 | break; | ||
| 337 | case Opt_noheap: | ||
| 338 | set_opt(sbi, NOHEAP); | ||
| 339 | break; | ||
| 340 | #ifdef CONFIG_F2FS_FS_XATTR | ||
| 341 | case Opt_nouser_xattr: | ||
| 342 | clear_opt(sbi, XATTR_USER); | ||
| 343 | break; | ||
| 344 | #else | ||
| 345 | case Opt_nouser_xattr: | ||
| 346 | f2fs_msg(sb, KERN_INFO, | ||
| 347 | "nouser_xattr options not supported"); | ||
| 348 | break; | ||
| 349 | #endif | ||
| 350 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | ||
| 351 | case Opt_noacl: | ||
| 352 | clear_opt(sbi, POSIX_ACL); | ||
| 353 | break; | ||
| 354 | #else | ||
| 355 | case Opt_noacl: | ||
| 356 | f2fs_msg(sb, KERN_INFO, "noacl options not supported"); | ||
| 357 | break; | ||
| 358 | #endif | ||
| 359 | case Opt_active_logs: | ||
| 360 | if (args->from && match_int(args, &arg)) | ||
| 361 | return -EINVAL; | ||
| 362 | if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE) | ||
| 363 | return -EINVAL; | ||
| 364 | sbi->active_logs = arg; | ||
| 365 | break; | ||
| 366 | case Opt_disable_ext_identify: | ||
| 367 | set_opt(sbi, DISABLE_EXT_IDENTIFY); | ||
| 368 | break; | ||
| 369 | default: | ||
| 370 | f2fs_msg(sb, KERN_ERR, | ||
| 371 | "Unrecognized mount option \"%s\" or missing value", | ||
| 372 | p); | ||
| 373 | return -EINVAL; | ||
| 374 | } | ||
| 375 | } | ||
| 376 | return 0; | ||
| 377 | } | ||
| 378 | |||
| 379 | static loff_t max_file_size(unsigned bits) | 456 | static loff_t max_file_size(unsigned bits) |
| 380 | { | 457 | { |
| 381 | loff_t result = ADDRS_PER_INODE; | 458 | loff_t result = ADDRS_PER_INODE; |
| @@ -541,6 +618,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 541 | if (err) | 618 | if (err) |
| 542 | goto free_sb_buf; | 619 | goto free_sb_buf; |
| 543 | } | 620 | } |
| 621 | sb->s_fs_info = sbi; | ||
| 544 | /* init some FS parameters */ | 622 | /* init some FS parameters */ |
| 545 | sbi->active_logs = NR_CURSEG_TYPE; | 623 | sbi->active_logs = NR_CURSEG_TYPE; |
| 546 | 624 | ||
| @@ -553,7 +631,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 553 | set_opt(sbi, POSIX_ACL); | 631 | set_opt(sbi, POSIX_ACL); |
| 554 | #endif | 632 | #endif |
| 555 | /* parse mount options */ | 633 | /* parse mount options */ |
| 556 | err = parse_options(sb, sbi, (char *)data); | 634 | err = parse_options(sb, (char *)data); |
| 557 | if (err) | 635 | if (err) |
| 558 | goto free_sb_buf; | 636 | goto free_sb_buf; |
| 559 | 637 | ||
| @@ -565,7 +643,6 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 565 | sb->s_xattr = f2fs_xattr_handlers; | 643 | sb->s_xattr = f2fs_xattr_handlers; |
| 566 | sb->s_export_op = &f2fs_export_ops; | 644 | sb->s_export_op = &f2fs_export_ops; |
| 567 | sb->s_magic = F2FS_SUPER_MAGIC; | 645 | sb->s_magic = F2FS_SUPER_MAGIC; |
| 568 | sb->s_fs_info = sbi; | ||
| 569 | sb->s_time_gran = 1; | 646 | sb->s_time_gran = 1; |
| 570 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | | 647 | sb->s_flags = (sb->s_flags & ~MS_POSIXACL) | |
| 571 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); | 648 | (test_opt(sbi, POSIX_ACL) ? MS_POSIXACL : 0); |
| @@ -674,10 +751,16 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 674 | "Cannot recover all fsync data errno=%ld", err); | 751 | "Cannot recover all fsync data errno=%ld", err); |
| 675 | } | 752 | } |
| 676 | 753 | ||
| 677 | /* After POR, we can run background GC thread */ | 754 | /* |
| 678 | err = start_gc_thread(sbi); | 755 | * If filesystem is not mounted as read-only then |
| 679 | if (err) | 756 | * do start the gc_thread. |
| 680 | goto fail; | 757 | */ |
| 758 | if (!(sb->s_flags & MS_RDONLY)) { | ||
| 759 | /* After POR, we can run background GC thread.*/ | ||
| 760 | err = start_gc_thread(sbi); | ||
| 761 | if (err) | ||
| 762 | goto fail; | ||
| 763 | } | ||
| 681 | 764 | ||
| 682 | err = f2fs_build_stats(sbi); | 765 | err = f2fs_build_stats(sbi); |
| 683 | if (err) | 766 | if (err) |
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 0b02dce31356..3ab07ecd86ca 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | */ | 20 | */ |
| 21 | #include <linux/rwsem.h> | 21 | #include <linux/rwsem.h> |
| 22 | #include <linux/f2fs_fs.h> | 22 | #include <linux/f2fs_fs.h> |
| 23 | #include <linux/security.h> | ||
| 23 | #include "f2fs.h" | 24 | #include "f2fs.h" |
| 24 | #include "xattr.h" | 25 | #include "xattr.h" |
| 25 | 26 | ||
| @@ -43,6 +44,10 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, | |||
| 43 | prefix = XATTR_TRUSTED_PREFIX; | 44 | prefix = XATTR_TRUSTED_PREFIX; |
| 44 | prefix_len = XATTR_TRUSTED_PREFIX_LEN; | 45 | prefix_len = XATTR_TRUSTED_PREFIX_LEN; |
| 45 | break; | 46 | break; |
| 47 | case F2FS_XATTR_INDEX_SECURITY: | ||
| 48 | prefix = XATTR_SECURITY_PREFIX; | ||
| 49 | prefix_len = XATTR_SECURITY_PREFIX_LEN; | ||
| 50 | break; | ||
| 46 | default: | 51 | default: |
| 47 | return -EINVAL; | 52 | return -EINVAL; |
| 48 | } | 53 | } |
| @@ -50,7 +55,7 @@ static size_t f2fs_xattr_generic_list(struct dentry *dentry, char *list, | |||
| 50 | total_len = prefix_len + name_len + 1; | 55 | total_len = prefix_len + name_len + 1; |
| 51 | if (list && total_len <= list_size) { | 56 | if (list && total_len <= list_size) { |
| 52 | memcpy(list, prefix, prefix_len); | 57 | memcpy(list, prefix, prefix_len); |
| 53 | memcpy(list+prefix_len, name, name_len); | 58 | memcpy(list + prefix_len, name, name_len); |
| 54 | list[prefix_len + name_len] = '\0'; | 59 | list[prefix_len + name_len] = '\0'; |
| 55 | } | 60 | } |
| 56 | return total_len; | 61 | return total_len; |
| @@ -70,13 +75,14 @@ static int f2fs_xattr_generic_get(struct dentry *dentry, const char *name, | |||
| 70 | if (!capable(CAP_SYS_ADMIN)) | 75 | if (!capable(CAP_SYS_ADMIN)) |
| 71 | return -EPERM; | 76 | return -EPERM; |
| 72 | break; | 77 | break; |
| 78 | case F2FS_XATTR_INDEX_SECURITY: | ||
| 79 | break; | ||
| 73 | default: | 80 | default: |
| 74 | return -EINVAL; | 81 | return -EINVAL; |
| 75 | } | 82 | } |
| 76 | if (strcmp(name, "") == 0) | 83 | if (strcmp(name, "") == 0) |
| 77 | return -EINVAL; | 84 | return -EINVAL; |
| 78 | return f2fs_getxattr(dentry->d_inode, type, name, | 85 | return f2fs_getxattr(dentry->d_inode, type, name, buffer, size); |
| 79 | buffer, size); | ||
| 80 | } | 86 | } |
| 81 | 87 | ||
| 82 | static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, | 88 | static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, |
| @@ -93,13 +99,15 @@ static int f2fs_xattr_generic_set(struct dentry *dentry, const char *name, | |||
| 93 | if (!capable(CAP_SYS_ADMIN)) | 99 | if (!capable(CAP_SYS_ADMIN)) |
| 94 | return -EPERM; | 100 | return -EPERM; |
| 95 | break; | 101 | break; |
| 102 | case F2FS_XATTR_INDEX_SECURITY: | ||
| 103 | break; | ||
| 96 | default: | 104 | default: |
| 97 | return -EINVAL; | 105 | return -EINVAL; |
| 98 | } | 106 | } |
| 99 | if (strcmp(name, "") == 0) | 107 | if (strcmp(name, "") == 0) |
| 100 | return -EINVAL; | 108 | return -EINVAL; |
| 101 | 109 | ||
| 102 | return f2fs_setxattr(dentry->d_inode, type, name, value, size); | 110 | return f2fs_setxattr(dentry->d_inode, type, name, value, size, NULL); |
| 103 | } | 111 | } |
| 104 | 112 | ||
| 105 | static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, | 113 | static size_t f2fs_xattr_advise_list(struct dentry *dentry, char *list, |
| @@ -145,6 +153,31 @@ static int f2fs_xattr_advise_set(struct dentry *dentry, const char *name, | |||
| 145 | return 0; | 153 | return 0; |
| 146 | } | 154 | } |
| 147 | 155 | ||
| 156 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
| 157 | static int f2fs_initxattrs(struct inode *inode, const struct xattr *xattr_array, | ||
| 158 | void *page) | ||
| 159 | { | ||
| 160 | const struct xattr *xattr; | ||
| 161 | int err = 0; | ||
| 162 | |||
| 163 | for (xattr = xattr_array; xattr->name != NULL; xattr++) { | ||
| 164 | err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_SECURITY, | ||
| 165 | xattr->name, xattr->value, | ||
| 166 | xattr->value_len, (struct page *)page); | ||
| 167 | if (err < 0) | ||
| 168 | break; | ||
| 169 | } | ||
| 170 | return err; | ||
| 171 | } | ||
| 172 | |||
| 173 | int f2fs_init_security(struct inode *inode, struct inode *dir, | ||
| 174 | const struct qstr *qstr, struct page *ipage) | ||
| 175 | { | ||
| 176 | return security_inode_init_security(inode, dir, qstr, | ||
| 177 | &f2fs_initxattrs, ipage); | ||
| 178 | } | ||
| 179 | #endif | ||
| 180 | |||
| 148 | const struct xattr_handler f2fs_xattr_user_handler = { | 181 | const struct xattr_handler f2fs_xattr_user_handler = { |
| 149 | .prefix = XATTR_USER_PREFIX, | 182 | .prefix = XATTR_USER_PREFIX, |
| 150 | .flags = F2FS_XATTR_INDEX_USER, | 183 | .flags = F2FS_XATTR_INDEX_USER, |
| @@ -169,6 +202,14 @@ const struct xattr_handler f2fs_xattr_advise_handler = { | |||
| 169 | .set = f2fs_xattr_advise_set, | 202 | .set = f2fs_xattr_advise_set, |
| 170 | }; | 203 | }; |
| 171 | 204 | ||
| 205 | const struct xattr_handler f2fs_xattr_security_handler = { | ||
| 206 | .prefix = XATTR_SECURITY_PREFIX, | ||
| 207 | .flags = F2FS_XATTR_INDEX_SECURITY, | ||
| 208 | .list = f2fs_xattr_generic_list, | ||
| 209 | .get = f2fs_xattr_generic_get, | ||
| 210 | .set = f2fs_xattr_generic_set, | ||
| 211 | }; | ||
| 212 | |||
| 172 | static const struct xattr_handler *f2fs_xattr_handler_map[] = { | 213 | static const struct xattr_handler *f2fs_xattr_handler_map[] = { |
| 173 | [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, | 214 | [F2FS_XATTR_INDEX_USER] = &f2fs_xattr_user_handler, |
| 174 | #ifdef CONFIG_F2FS_FS_POSIX_ACL | 215 | #ifdef CONFIG_F2FS_FS_POSIX_ACL |
| @@ -176,6 +217,9 @@ static const struct xattr_handler *f2fs_xattr_handler_map[] = { | |||
| 176 | [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, | 217 | [F2FS_XATTR_INDEX_POSIX_ACL_DEFAULT] = &f2fs_xattr_acl_default_handler, |
| 177 | #endif | 218 | #endif |
| 178 | [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, | 219 | [F2FS_XATTR_INDEX_TRUSTED] = &f2fs_xattr_trusted_handler, |
| 220 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
| 221 | [F2FS_XATTR_INDEX_SECURITY] = &f2fs_xattr_security_handler, | ||
| 222 | #endif | ||
| 179 | [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, | 223 | [F2FS_XATTR_INDEX_ADVISE] = &f2fs_xattr_advise_handler, |
| 180 | }; | 224 | }; |
| 181 | 225 | ||
| @@ -186,6 +230,9 @@ const struct xattr_handler *f2fs_xattr_handlers[] = { | |||
| 186 | &f2fs_xattr_acl_default_handler, | 230 | &f2fs_xattr_acl_default_handler, |
| 187 | #endif | 231 | #endif |
| 188 | &f2fs_xattr_trusted_handler, | 232 | &f2fs_xattr_trusted_handler, |
| 233 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
| 234 | &f2fs_xattr_security_handler, | ||
| 235 | #endif | ||
| 189 | &f2fs_xattr_advise_handler, | 236 | &f2fs_xattr_advise_handler, |
| 190 | NULL, | 237 | NULL, |
| 191 | }; | 238 | }; |
| @@ -218,6 +265,8 @@ int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | |||
| 218 | return -ENODATA; | 265 | return -ENODATA; |
| 219 | 266 | ||
| 220 | page = get_node_page(sbi, fi->i_xattr_nid); | 267 | page = get_node_page(sbi, fi->i_xattr_nid); |
| 268 | if (IS_ERR(page)) | ||
| 269 | return PTR_ERR(page); | ||
| 221 | base_addr = page_address(page); | 270 | base_addr = page_address(page); |
| 222 | 271 | ||
| 223 | list_for_each_xattr(entry, base_addr) { | 272 | list_for_each_xattr(entry, base_addr) { |
| @@ -268,6 +317,8 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size) | |||
| 268 | return 0; | 317 | return 0; |
| 269 | 318 | ||
| 270 | page = get_node_page(sbi, fi->i_xattr_nid); | 319 | page = get_node_page(sbi, fi->i_xattr_nid); |
| 320 | if (IS_ERR(page)) | ||
| 321 | return PTR_ERR(page); | ||
| 271 | base_addr = page_address(page); | 322 | base_addr = page_address(page); |
| 272 | 323 | ||
| 273 | list_for_each_xattr(entry, base_addr) { | 324 | list_for_each_xattr(entry, base_addr) { |
| @@ -296,7 +347,7 @@ cleanup: | |||
| 296 | } | 347 | } |
| 297 | 348 | ||
| 298 | int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | 349 | int f2fs_setxattr(struct inode *inode, int name_index, const char *name, |
| 299 | const void *value, size_t value_len) | 350 | const void *value, size_t value_len, struct page *ipage) |
| 300 | { | 351 | { |
| 301 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); | 352 | struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); |
| 302 | struct f2fs_inode_info *fi = F2FS_I(inode); | 353 | struct f2fs_inode_info *fi = F2FS_I(inode); |
| @@ -335,7 +386,7 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |||
| 335 | set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); | 386 | set_new_dnode(&dn, inode, NULL, NULL, fi->i_xattr_nid); |
| 336 | mark_inode_dirty(inode); | 387 | mark_inode_dirty(inode); |
| 337 | 388 | ||
| 338 | page = new_node_page(&dn, XATTR_NODE_OFFSET); | 389 | page = new_node_page(&dn, XATTR_NODE_OFFSET, ipage); |
| 339 | if (IS_ERR(page)) { | 390 | if (IS_ERR(page)) { |
| 340 | alloc_nid_failed(sbi, fi->i_xattr_nid); | 391 | alloc_nid_failed(sbi, fi->i_xattr_nid); |
| 341 | fi->i_xattr_nid = 0; | 392 | fi->i_xattr_nid = 0; |
| @@ -435,7 +486,10 @@ int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | |||
| 435 | inode->i_ctime = CURRENT_TIME; | 486 | inode->i_ctime = CURRENT_TIME; |
| 436 | clear_inode_flag(fi, FI_ACL_MODE); | 487 | clear_inode_flag(fi, FI_ACL_MODE); |
| 437 | } | 488 | } |
| 438 | update_inode_page(inode); | 489 | if (ipage) |
| 490 | update_inode(inode, ipage); | ||
| 491 | else | ||
| 492 | update_inode_page(inode); | ||
| 439 | mutex_unlock_op(sbi, ilock); | 493 | mutex_unlock_op(sbi, ilock); |
| 440 | 494 | ||
| 441 | return 0; | 495 | return 0; |
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 49c9558305e3..3c0817bef25d 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h | |||
| @@ -112,21 +112,19 @@ extern const struct xattr_handler f2fs_xattr_trusted_handler; | |||
| 112 | extern const struct xattr_handler f2fs_xattr_acl_access_handler; | 112 | extern const struct xattr_handler f2fs_xattr_acl_access_handler; |
| 113 | extern const struct xattr_handler f2fs_xattr_acl_default_handler; | 113 | extern const struct xattr_handler f2fs_xattr_acl_default_handler; |
| 114 | extern const struct xattr_handler f2fs_xattr_advise_handler; | 114 | extern const struct xattr_handler f2fs_xattr_advise_handler; |
| 115 | extern const struct xattr_handler f2fs_xattr_security_handler; | ||
| 115 | 116 | ||
| 116 | extern const struct xattr_handler *f2fs_xattr_handlers[]; | 117 | extern const struct xattr_handler *f2fs_xattr_handlers[]; |
| 117 | 118 | ||
| 118 | extern int f2fs_setxattr(struct inode *inode, int name_index, const char *name, | 119 | extern int f2fs_setxattr(struct inode *, int, const char *, |
| 119 | const void *value, size_t value_len); | 120 | const void *, size_t, struct page *); |
| 120 | extern int f2fs_getxattr(struct inode *inode, int name_index, const char *name, | 121 | extern int f2fs_getxattr(struct inode *, int, const char *, void *, size_t); |
| 121 | void *buffer, size_t buffer_size); | 122 | extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t); |
| 122 | extern ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, | ||
| 123 | size_t buffer_size); | ||
| 124 | |||
| 125 | #else | 123 | #else |
| 126 | 124 | ||
| 127 | #define f2fs_xattr_handlers NULL | 125 | #define f2fs_xattr_handlers NULL |
| 128 | static inline int f2fs_setxattr(struct inode *inode, int name_index, | 126 | static inline int f2fs_setxattr(struct inode *inode, int name_index, |
| 129 | const char *name, const void *value, size_t value_len) | 127 | const char *name, const void *value, size_t value_len) |
| 130 | { | 128 | { |
| 131 | return -EOPNOTSUPP; | 129 | return -EOPNOTSUPP; |
| 132 | } | 130 | } |
| @@ -142,4 +140,14 @@ static inline ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, | |||
| 142 | } | 140 | } |
| 143 | #endif | 141 | #endif |
| 144 | 142 | ||
| 143 | #ifdef CONFIG_F2FS_FS_SECURITY | ||
| 144 | extern int f2fs_init_security(struct inode *, struct inode *, | ||
| 145 | const struct qstr *, struct page *); | ||
| 146 | #else | ||
| 147 | static inline int f2fs_init_security(struct inode *inode, struct inode *dir, | ||
| 148 | const struct qstr *qstr, struct page *ipage) | ||
| 149 | { | ||
| 150 | return 0; | ||
| 151 | } | ||
| 152 | #endif | ||
| 145 | #endif /* __F2FS_XATTR_H__ */ | 153 | #endif /* __F2FS_XATTR_H__ */ |
diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 7a6f02caf286..3963ede84eb0 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c | |||
| @@ -543,6 +543,7 @@ end_of_dir: | |||
| 543 | EXPORT_SYMBOL_GPL(fat_search_long); | 543 | EXPORT_SYMBOL_GPL(fat_search_long); |
| 544 | 544 | ||
| 545 | struct fat_ioctl_filldir_callback { | 545 | struct fat_ioctl_filldir_callback { |
| 546 | struct dir_context ctx; | ||
| 546 | void __user *dirent; | 547 | void __user *dirent; |
| 547 | int result; | 548 | int result; |
| 548 | /* for dir ioctl */ | 549 | /* for dir ioctl */ |
| @@ -552,8 +553,9 @@ struct fat_ioctl_filldir_callback { | |||
| 552 | int short_len; | 553 | int short_len; |
| 553 | }; | 554 | }; |
| 554 | 555 | ||
| 555 | static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | 556 | static int __fat_readdir(struct inode *inode, struct file *file, |
| 556 | filldir_t filldir, int short_only, int both) | 557 | struct dir_context *ctx, int short_only, |
| 558 | struct fat_ioctl_filldir_callback *both) | ||
| 557 | { | 559 | { |
| 558 | struct super_block *sb = inode->i_sb; | 560 | struct super_block *sb = inode->i_sb; |
| 559 | struct msdos_sb_info *sbi = MSDOS_SB(sb); | 561 | struct msdos_sb_info *sbi = MSDOS_SB(sb); |
| @@ -564,27 +566,20 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent, | |||
| 564 | unsigned char bufname[FAT_MAX_SHORT_SIZE]; | 566 | unsigned char bufname[FAT_MAX_SHORT_SIZE]; |
| 565 | int isvfat = sbi->options.isvfat; | 567 | int isvfat = sbi->options.isvfat; |
| 566 | const char *fill_name = NULL; | 568 | const char *fill_name = NULL; |
| 567 | unsigned long inum; | 569 | int fake_offset = 0; |
| 568 | unsigned long lpos, dummy, *furrfu = &lpos; | ||
| 569 | loff_t cpos; | 570 | loff_t cpos; |
| 570 | int short_len = 0, fill_len = 0; | 571 | int short_len = 0, fill_len = 0; |
| 571 | int ret = 0; | 572 | int ret = 0; |
| 572 | 573 | ||
| 573 | mutex_lock(&sbi->s_lock); | 574 | mutex_lock(&sbi->s_lock); |
| 574 | 575 | ||
| 575 | cpos = filp->f_pos; | 576 | cpos = ctx->pos; |
| 576 | /* Fake . and .. for the root directory. */ | 577 | /* Fake . and .. for the root directory. */ |
| 577 | if (inode->i_ino == MSDOS_ROOT_INO) { | 578 | if (inode->i_ino == MSDOS_ROOT_INO) { |
| 578 | while (cpos < 2) { | 579 | if (!dir_emit_dots(file, ctx)) |
| 579 | if (filldir(dirent, "..", cpos+1, cpos, | 580 | goto out; |
| 580 | MSDOS_ROOT_INO, DT_DIR) < 0) | 581 | if (ctx->pos == 2) { |
| 581 | goto out; | 582 | fake_offset = 1; |
| 582 | cpos++; | ||
| 583 | filp->f_pos++; | ||
| 584 | } | ||
| 585 | if (cpos == 2) { | ||
| 586 | dummy = 2; | ||
| 587 | furrfu = &dummy; | ||
| 588 | cpos = 0; | 583 | cpos = 0; |
| 589 | } | 584 | } |
| 590 | } | 585 | } |
| @@ -619,7 +614,7 @@ parse_record: | |||
| 619 | int status = fat_parse_long(inode, &cpos, &bh, &de, | 614 | int status = fat_parse_long(inode, &cpos, &bh, &de, |
| 620 | &unicode, &nr_slots); | 615 | &unicode, &nr_slots); |
| 621 | if (status < 0) { | 616 | if (status < 0) { |
| 622 | filp->f_pos = cpos; | 617 | ctx->pos = cpos; |
| 623 | ret = status; | 618 | ret = status; |
| 624 | goto out; | 619 | goto out; |
| 625 | } else if (status == PARSE_INVALID) | 620 | } else if (status == PARSE_INVALID) |
| @@ -639,6 +634,19 @@ parse_record: | |||
| 639 | /* !both && !short_only, so we don't need shortname. */ | 634 | /* !both && !short_only, so we don't need shortname. */ |
| 640 | if (!both) | 635 | if (!both) |
| 641 | goto start_filldir; | 636 | goto start_filldir; |
| 637 | |||
| 638 | short_len = fat_parse_short(sb, de, bufname, | ||
| 639 | sbi->options.dotsOK); | ||
| 640 | if (short_len == 0) | ||
| 641 | goto record_end; | ||
| 642 | /* hack for fat_ioctl_filldir() */ | ||
| 643 | both->longname = fill_name; | ||
| 644 | both->long_len = fill_len; | ||
| 645 | both->shortname = bufname; | ||
| 646 | both->short_len = short_len; | ||
| 647 | fill_name = NULL; | ||
| 648 | fill_len = 0; | ||
| 649 | goto start_filldir; | ||
| 642 | } | 650 | } |
| 643 | } | 651 | } |
| 644 | 652 | ||
| @@ -646,28 +654,21 @@ parse_record: | |||
| 646 | if (short_len == 0) | 654 | if (short_len == 0) |
| 647 | goto record_end; | 655 | goto record_end; |
| 648 | 656 | ||
| 649 | if (nr_slots) { | 657 | fill_name = bufname; |
| 650 | /* hack for fat_ioctl_filldir() */ | 658 | fill_len = short_len; |
| 651 | struct fat_ioctl_filldir_callback *p = dirent; | ||
| 652 | |||
| 653 | p->longname = fill_name; | ||
| 654 | p->long_len = fill_len; | ||
| 655 | p->shortname = bufname; | ||
| 656 | p->short_len = short_len; | ||
| 657 | fill_name = NULL; | ||
| 658 | fill_len = 0; | ||
| 659 | } else { | ||
| 660 | fill_name = bufname; | ||
| 661 | fill_len = short_len; | ||
| 662 | } | ||
| 663 | 659 | ||
| 664 | start_filldir: | 660 | start_filldir: |
| 665 | lpos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); | 661 | if (!fake_offset) |
| 666 | if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) | 662 | ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); |
| 667 | inum = inode->i_ino; | 663 | |
| 668 | else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { | 664 | if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { |
| 669 | inum = parent_ino(filp->f_path.dentry); | 665 | if (!dir_emit_dot(file, ctx)) |
| 666 | goto fill_failed; | ||
| 667 | } else if (!memcmp(de->name, MSDOS_DOTDOT, MSDOS_NAME)) { | ||
| 668 | if (!dir_emit_dotdot(file, ctx)) | ||
| 669 | goto fill_failed; | ||
| 670 | } else { | 670 | } else { |
| 671 | unsigned long inum; | ||
| 671 | loff_t i_pos = fat_make_i_pos(sb, bh, de); | 672 | loff_t i_pos = fat_make_i_pos(sb, bh, de); |
| 672 | struct inode *tmp = fat_iget(sb, i_pos); | 673 | struct inode *tmp = fat_iget(sb, i_pos); |
| 673 | if (tmp) { | 674 | if (tmp) { |
| @@ -675,18 +676,17 @@ start_filldir: | |||
| 675 | iput(tmp); | 676 | iput(tmp); |
| 676 | } else | 677 | } else |
| 677 | inum = iunique(sb, MSDOS_ROOT_INO); | 678 | inum = iunique(sb, MSDOS_ROOT_INO); |
| 679 | if (!dir_emit(ctx, fill_name, fill_len, inum, | ||
| 680 | (de->attr & ATTR_DIR) ? DT_DIR : DT_REG)) | ||
| 681 | goto fill_failed; | ||
| 678 | } | 682 | } |
| 679 | 683 | ||
| 680 | if (filldir(dirent, fill_name, fill_len, *furrfu, inum, | ||
| 681 | (de->attr & ATTR_DIR) ? DT_DIR : DT_REG) < 0) | ||
| 682 | goto fill_failed; | ||
| 683 | |||
| 684 | record_end: | 684 | record_end: |
| 685 | furrfu = &lpos; | 685 | fake_offset = 0; |
| 686 | filp->f_pos = cpos; | 686 | ctx->pos = cpos; |
| 687 | goto get_new; | 687 | goto get_new; |
| 688 | end_of_dir: | 688 | end_of_dir: |
| 689 | filp->f_pos = cpos; | 689 | ctx->pos = cpos; |
| 690 | fill_failed: | 690 | fill_failed: |
| 691 | brelse(bh); | 691 | brelse(bh); |
| 692 | if (unicode) | 692 | if (unicode) |
| @@ -696,10 +696,9 @@ out: | |||
| 696 | return ret; | 696 | return ret; |
| 697 | } | 697 | } |
| 698 | 698 | ||
| 699 | static int fat_readdir(struct file *filp, void *dirent, filldir_t filldir) | 699 | static int fat_readdir(struct file *file, struct dir_context *ctx) |
| 700 | { | 700 | { |
| 701 | struct inode *inode = file_inode(filp); | 701 | return __fat_readdir(file_inode(file), file, ctx, 0, NULL); |
| 702 | return __fat_readdir(inode, filp, dirent, filldir, 0, 0); | ||
| 703 | } | 702 | } |
| 704 | 703 | ||
| 705 | #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ | 704 | #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type) \ |
| @@ -755,20 +754,25 @@ efault: \ | |||
| 755 | 754 | ||
| 756 | FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) | 755 | FAT_IOCTL_FILLDIR_FUNC(fat_ioctl_filldir, __fat_dirent) |
| 757 | 756 | ||
| 758 | static int fat_ioctl_readdir(struct inode *inode, struct file *filp, | 757 | static int fat_ioctl_readdir(struct inode *inode, struct file *file, |
| 759 | void __user *dirent, filldir_t filldir, | 758 | void __user *dirent, filldir_t filldir, |
| 760 | int short_only, int both) | 759 | int short_only, int both) |
| 761 | { | 760 | { |
| 762 | struct fat_ioctl_filldir_callback buf; | 761 | struct fat_ioctl_filldir_callback buf = { |
| 762 | .ctx.actor = filldir, | ||
| 763 | .dirent = dirent | ||
| 764 | }; | ||
| 763 | int ret; | 765 | int ret; |
| 764 | 766 | ||
| 765 | buf.dirent = dirent; | 767 | buf.dirent = dirent; |
| 766 | buf.result = 0; | 768 | buf.result = 0; |
| 767 | mutex_lock(&inode->i_mutex); | 769 | mutex_lock(&inode->i_mutex); |
| 770 | buf.ctx.pos = file->f_pos; | ||
| 768 | ret = -ENOENT; | 771 | ret = -ENOENT; |
| 769 | if (!IS_DEADDIR(inode)) { | 772 | if (!IS_DEADDIR(inode)) { |
| 770 | ret = __fat_readdir(inode, filp, &buf, filldir, | 773 | ret = __fat_readdir(inode, file, &buf.ctx, |
| 771 | short_only, both); | 774 | short_only, both ? &buf : NULL); |
| 775 | file->f_pos = buf.ctx.pos; | ||
| 772 | } | 776 | } |
| 773 | mutex_unlock(&inode->i_mutex); | 777 | mutex_unlock(&inode->i_mutex); |
| 774 | if (ret >= 0) | 778 | if (ret >= 0) |
| @@ -854,7 +858,7 @@ static long fat_compat_dir_ioctl(struct file *filp, unsigned cmd, | |||
| 854 | const struct file_operations fat_dir_operations = { | 858 | const struct file_operations fat_dir_operations = { |
| 855 | .llseek = generic_file_llseek, | 859 | .llseek = generic_file_llseek, |
| 856 | .read = generic_read_dir, | 860 | .read = generic_read_dir, |
| 857 | .readdir = fat_readdir, | 861 | .iterate = fat_readdir, |
| 858 | .unlocked_ioctl = fat_dir_ioctl, | 862 | .unlocked_ioctl = fat_dir_ioctl, |
| 859 | #ifdef CONFIG_COMPAT | 863 | #ifdef CONFIG_COMPAT |
| 860 | .compat_ioctl = fat_compat_dir_ioctl, | 864 | .compat_ioctl = fat_compat_dir_ioctl, |
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c index 664b07a53870..25d4099a4aea 100644 --- a/fs/freevxfs/vxfs_lookup.c +++ b/fs/freevxfs/vxfs_lookup.c | |||
| @@ -49,7 +49,7 @@ | |||
| 49 | 49 | ||
| 50 | 50 | ||
| 51 | static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); | 51 | static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int); |
| 52 | static int vxfs_readdir(struct file *, void *, filldir_t); | 52 | static int vxfs_readdir(struct file *, struct dir_context *); |
| 53 | 53 | ||
| 54 | const struct inode_operations vxfs_dir_inode_ops = { | 54 | const struct inode_operations vxfs_dir_inode_ops = { |
| 55 | .lookup = vxfs_lookup, | 55 | .lookup = vxfs_lookup, |
| @@ -58,7 +58,7 @@ const struct inode_operations vxfs_dir_inode_ops = { | |||
| 58 | const struct file_operations vxfs_dir_operations = { | 58 | const struct file_operations vxfs_dir_operations = { |
| 59 | .llseek = generic_file_llseek, | 59 | .llseek = generic_file_llseek, |
| 60 | .read = generic_read_dir, | 60 | .read = generic_read_dir, |
| 61 | .readdir = vxfs_readdir, | 61 | .iterate = vxfs_readdir, |
| 62 | }; | 62 | }; |
| 63 | 63 | ||
| 64 | 64 | ||
| @@ -235,7 +235,7 @@ vxfs_lookup(struct inode *dip, struct dentry *dp, unsigned int flags) | |||
| 235 | * Zero. | 235 | * Zero. |
| 236 | */ | 236 | */ |
| 237 | static int | 237 | static int |
| 238 | vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | 238 | vxfs_readdir(struct file *fp, struct dir_context *ctx) |
| 239 | { | 239 | { |
| 240 | struct inode *ip = file_inode(fp); | 240 | struct inode *ip = file_inode(fp); |
| 241 | struct super_block *sbp = ip->i_sb; | 241 | struct super_block *sbp = ip->i_sb; |
| @@ -243,20 +243,17 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
| 243 | u_long page, npages, block, pblocks, nblocks, offset; | 243 | u_long page, npages, block, pblocks, nblocks, offset; |
| 244 | loff_t pos; | 244 | loff_t pos; |
| 245 | 245 | ||
| 246 | switch ((long)fp->f_pos) { | 246 | if (ctx->pos == 0) { |
| 247 | case 0: | 247 | if (!dir_emit_dot(fp, ctx)) |
| 248 | if (filler(retp, ".", 1, fp->f_pos, ip->i_ino, DT_DIR) < 0) | 248 | return 0; |
| 249 | goto out; | 249 | ctx->pos = 1; |
| 250 | fp->f_pos++; | ||
| 251 | /* fallthrough */ | ||
| 252 | case 1: | ||
| 253 | if (filler(retp, "..", 2, fp->f_pos, VXFS_INO(ip)->vii_dotdot, DT_DIR) < 0) | ||
| 254 | goto out; | ||
| 255 | fp->f_pos++; | ||
| 256 | /* fallthrough */ | ||
| 257 | } | 250 | } |
| 258 | 251 | if (ctx->pos == 1) { | |
| 259 | pos = fp->f_pos - 2; | 252 | if (!dir_emit(ctx, "..", 2, VXFS_INO(ip)->vii_dotdot, DT_DIR)) |
| 253 | return 0; | ||
| 254 | ctx->pos = 2; | ||
| 255 | } | ||
| 256 | pos = ctx->pos - 2; | ||
| 260 | 257 | ||
| 261 | if (pos > VXFS_DIRROUND(ip->i_size)) | 258 | if (pos > VXFS_DIRROUND(ip->i_size)) |
| 262 | return 0; | 259 | return 0; |
| @@ -270,16 +267,16 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
| 270 | block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks; | 267 | block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks; |
| 271 | 268 | ||
| 272 | for (; page < npages; page++, block = 0) { | 269 | for (; page < npages; page++, block = 0) { |
| 273 | caddr_t kaddr; | 270 | char *kaddr; |
| 274 | struct page *pp; | 271 | struct page *pp; |
| 275 | 272 | ||
| 276 | pp = vxfs_get_page(ip->i_mapping, page); | 273 | pp = vxfs_get_page(ip->i_mapping, page); |
| 277 | if (IS_ERR(pp)) | 274 | if (IS_ERR(pp)) |
| 278 | continue; | 275 | continue; |
| 279 | kaddr = (caddr_t)page_address(pp); | 276 | kaddr = (char *)page_address(pp); |
| 280 | 277 | ||
| 281 | for (; block <= nblocks && block <= pblocks; block++) { | 278 | for (; block <= nblocks && block <= pblocks; block++) { |
| 282 | caddr_t baddr, limit; | 279 | char *baddr, *limit; |
| 283 | struct vxfs_dirblk *dbp; | 280 | struct vxfs_dirblk *dbp; |
| 284 | struct vxfs_direct *de; | 281 | struct vxfs_direct *de; |
| 285 | 282 | ||
| @@ -292,21 +289,18 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
| 292 | (kaddr + offset) : | 289 | (kaddr + offset) : |
| 293 | (baddr + VXFS_DIRBLKOV(dbp))); | 290 | (baddr + VXFS_DIRBLKOV(dbp))); |
| 294 | 291 | ||
| 295 | for (; (caddr_t)de <= limit; de = vxfs_next_entry(de)) { | 292 | for (; (char *)de <= limit; de = vxfs_next_entry(de)) { |
| 296 | int over; | ||
| 297 | |||
| 298 | if (!de->d_reclen) | 293 | if (!de->d_reclen) |
| 299 | break; | 294 | break; |
| 300 | if (!de->d_ino) | 295 | if (!de->d_ino) |
| 301 | continue; | 296 | continue; |
| 302 | 297 | ||
| 303 | offset = (caddr_t)de - kaddr; | 298 | offset = (char *)de - kaddr; |
| 304 | over = filler(retp, de->d_name, de->d_namelen, | 299 | ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; |
| 305 | ((page << PAGE_CACHE_SHIFT) | offset) + 2, | 300 | if (!dir_emit(ctx, de->d_name, de->d_namelen, |
| 306 | de->d_ino, DT_UNKNOWN); | 301 | de->d_ino, DT_UNKNOWN)) { |
| 307 | if (over) { | ||
| 308 | vxfs_put_page(pp); | 302 | vxfs_put_page(pp); |
| 309 | goto done; | 303 | return 0; |
| 310 | } | 304 | } |
| 311 | } | 305 | } |
| 312 | offset = 0; | 306 | offset = 0; |
| @@ -314,9 +308,6 @@ vxfs_readdir(struct file *fp, void *retp, filldir_t filler) | |||
| 314 | vxfs_put_page(pp); | 308 | vxfs_put_page(pp); |
| 315 | offset = 0; | 309 | offset = 0; |
| 316 | } | 310 | } |
| 317 | 311 | ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; | |
| 318 | done: | ||
| 319 | fp->f_pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2; | ||
| 320 | out: | ||
| 321 | return 0; | 312 | return 0; |
| 322 | } | 313 | } |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..a85ac4e33436 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -45,6 +45,7 @@ struct wb_writeback_work { | |||
| 45 | unsigned int for_kupdate:1; | 45 | unsigned int for_kupdate:1; |
| 46 | unsigned int range_cyclic:1; | 46 | unsigned int range_cyclic:1; |
| 47 | unsigned int for_background:1; | 47 | unsigned int for_background:1; |
| 48 | unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ | ||
| 48 | enum wb_reason reason; /* why was writeback initiated? */ | 49 | enum wb_reason reason; /* why was writeback initiated? */ |
| 49 | 50 | ||
| 50 | struct list_head list; /* pending work list */ | 51 | struct list_head list; /* pending work list */ |
| @@ -443,9 +444,11 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 443 | /* | 444 | /* |
| 444 | * Make sure to wait on the data before writing out the metadata. | 445 | * Make sure to wait on the data before writing out the metadata. |
| 445 | * This is important for filesystems that modify metadata on data | 446 | * This is important for filesystems that modify metadata on data |
| 446 | * I/O completion. | 447 | * I/O completion. We don't do it for sync(2) writeback because it has a |
| 448 | * separate, external IO completion path and ->sync_fs for guaranteeing | ||
| 449 | * inode metadata is written back correctly. | ||
| 447 | */ | 450 | */ |
| 448 | if (wbc->sync_mode == WB_SYNC_ALL) { | 451 | if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) { |
| 449 | int err = filemap_fdatawait(mapping); | 452 | int err = filemap_fdatawait(mapping); |
| 450 | if (ret == 0) | 453 | if (ret == 0) |
| 451 | ret = err; | 454 | ret = err; |
| @@ -578,6 +581,7 @@ static long writeback_sb_inodes(struct super_block *sb, | |||
| 578 | .tagged_writepages = work->tagged_writepages, | 581 | .tagged_writepages = work->tagged_writepages, |
| 579 | .for_kupdate = work->for_kupdate, | 582 | .for_kupdate = work->for_kupdate, |
| 580 | .for_background = work->for_background, | 583 | .for_background = work->for_background, |
| 584 | .for_sync = work->for_sync, | ||
| 581 | .range_cyclic = work->range_cyclic, | 585 | .range_cyclic = work->range_cyclic, |
| 582 | .range_start = 0, | 586 | .range_start = 0, |
| 583 | .range_end = LLONG_MAX, | 587 | .range_end = LLONG_MAX, |
| @@ -1362,6 +1366,7 @@ void sync_inodes_sb(struct super_block *sb) | |||
| 1362 | .range_cyclic = 0, | 1366 | .range_cyclic = 0, |
| 1363 | .done = &done, | 1367 | .done = &done, |
| 1364 | .reason = WB_REASON_SYNC, | 1368 | .reason = WB_REASON_SYNC, |
| 1369 | .for_sync = 1, | ||
| 1365 | }; | 1370 | }; |
| 1366 | 1371 | ||
| 1367 | /* Nothing to do? */ | 1372 | /* Nothing to do? */ |
diff --git a/fs/fscache/cache.c b/fs/fscache/cache.c index b52aed1dca97..f7cff367db7f 100644 --- a/fs/fscache/cache.c +++ b/fs/fscache/cache.c | |||
| @@ -115,7 +115,7 @@ struct fscache_cache *fscache_select_cache_for_object( | |||
| 115 | struct fscache_object, cookie_link); | 115 | struct fscache_object, cookie_link); |
| 116 | 116 | ||
| 117 | cache = object->cache; | 117 | cache = object->cache; |
| 118 | if (object->state >= FSCACHE_OBJECT_DYING || | 118 | if (fscache_object_is_dying(object) || |
| 119 | test_bit(FSCACHE_IOERROR, &cache->flags)) | 119 | test_bit(FSCACHE_IOERROR, &cache->flags)) |
| 120 | cache = NULL; | 120 | cache = NULL; |
| 121 | 121 | ||
| @@ -224,8 +224,10 @@ int fscache_add_cache(struct fscache_cache *cache, | |||
| 224 | BUG_ON(!ifsdef); | 224 | BUG_ON(!ifsdef); |
| 225 | 225 | ||
| 226 | cache->flags = 0; | 226 | cache->flags = 0; |
| 227 | ifsdef->event_mask = ULONG_MAX & ~(1 << FSCACHE_OBJECT_EV_CLEARED); | 227 | ifsdef->event_mask = |
| 228 | ifsdef->state = FSCACHE_OBJECT_ACTIVE; | 228 | ((1 << NR_FSCACHE_OBJECT_EVENTS) - 1) & |
| 229 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
| 230 | __set_bit(FSCACHE_OBJECT_IS_AVAILABLE, &ifsdef->flags); | ||
| 229 | 231 | ||
| 230 | if (!tagname) | 232 | if (!tagname) |
| 231 | tagname = cache->identifier; | 233 | tagname = cache->identifier; |
| @@ -330,25 +332,25 @@ static void fscache_withdraw_all_objects(struct fscache_cache *cache, | |||
| 330 | { | 332 | { |
| 331 | struct fscache_object *object; | 333 | struct fscache_object *object; |
| 332 | 334 | ||
| 333 | spin_lock(&cache->object_list_lock); | ||
| 334 | |||
| 335 | while (!list_empty(&cache->object_list)) { | 335 | while (!list_empty(&cache->object_list)) { |
| 336 | object = list_entry(cache->object_list.next, | 336 | spin_lock(&cache->object_list_lock); |
| 337 | struct fscache_object, cache_link); | ||
| 338 | list_move_tail(&object->cache_link, dying_objects); | ||
| 339 | 337 | ||
| 340 | _debug("withdraw %p", object->cookie); | 338 | if (!list_empty(&cache->object_list)) { |
| 339 | object = list_entry(cache->object_list.next, | ||
| 340 | struct fscache_object, cache_link); | ||
| 341 | list_move_tail(&object->cache_link, dying_objects); | ||
| 341 | 342 | ||
| 342 | spin_lock(&object->lock); | 343 | _debug("withdraw %p", object->cookie); |
| 343 | spin_unlock(&cache->object_list_lock); | 344 | |
| 344 | fscache_raise_event(object, FSCACHE_OBJECT_EV_WITHDRAW); | 345 | /* This must be done under object_list_lock to prevent |
| 345 | spin_unlock(&object->lock); | 346 | * a race with fscache_drop_object(). |
| 347 | */ | ||
| 348 | fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); | ||
| 349 | } | ||
| 346 | 350 | ||
| 351 | spin_unlock(&cache->object_list_lock); | ||
| 347 | cond_resched(); | 352 | cond_resched(); |
| 348 | spin_lock(&cache->object_list_lock); | ||
| 349 | } | 353 | } |
| 350 | |||
| 351 | spin_unlock(&cache->object_list_lock); | ||
| 352 | } | 354 | } |
| 353 | 355 | ||
| 354 | /** | 356 | /** |
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index e2cba1f60c21..0e91a3c9fdb2 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c | |||
| @@ -95,6 +95,11 @@ struct fscache_cookie *__fscache_acquire_cookie( | |||
| 95 | atomic_set(&cookie->usage, 1); | 95 | atomic_set(&cookie->usage, 1); |
| 96 | atomic_set(&cookie->n_children, 0); | 96 | atomic_set(&cookie->n_children, 0); |
| 97 | 97 | ||
| 98 | /* We keep the active count elevated until relinquishment to prevent an | ||
| 99 | * attempt to wake up every time the object operations queue quiesces. | ||
| 100 | */ | ||
| 101 | atomic_set(&cookie->n_active, 1); | ||
| 102 | |||
| 98 | atomic_inc(&parent->usage); | 103 | atomic_inc(&parent->usage); |
| 99 | atomic_inc(&parent->n_children); | 104 | atomic_inc(&parent->n_children); |
| 100 | 105 | ||
| @@ -177,7 +182,6 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | |||
| 177 | 182 | ||
| 178 | cookie->flags = | 183 | cookie->flags = |
| 179 | (1 << FSCACHE_COOKIE_LOOKING_UP) | | 184 | (1 << FSCACHE_COOKIE_LOOKING_UP) | |
| 180 | (1 << FSCACHE_COOKIE_CREATING) | | ||
| 181 | (1 << FSCACHE_COOKIE_NO_DATA_YET); | 185 | (1 << FSCACHE_COOKIE_NO_DATA_YET); |
| 182 | 186 | ||
| 183 | /* ask the cache to allocate objects for this cookie and its parent | 187 | /* ask the cache to allocate objects for this cookie and its parent |
| @@ -205,7 +209,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) | |||
| 205 | 209 | ||
| 206 | /* initiate the process of looking up all the objects in the chain | 210 | /* initiate the process of looking up all the objects in the chain |
| 207 | * (done by fscache_initialise_object()) */ | 211 | * (done by fscache_initialise_object()) */ |
| 208 | fscache_enqueue_object(object); | 212 | fscache_raise_event(object, FSCACHE_OBJECT_EV_NEW_CHILD); |
| 209 | 213 | ||
| 210 | spin_unlock(&cookie->lock); | 214 | spin_unlock(&cookie->lock); |
| 211 | 215 | ||
| @@ -285,7 +289,7 @@ static int fscache_alloc_object(struct fscache_cache *cache, | |||
| 285 | 289 | ||
| 286 | object_already_extant: | 290 | object_already_extant: |
| 287 | ret = -ENOBUFS; | 291 | ret = -ENOBUFS; |
| 288 | if (object->state >= FSCACHE_OBJECT_DYING) { | 292 | if (fscache_object_is_dead(object)) { |
| 289 | spin_unlock(&cookie->lock); | 293 | spin_unlock(&cookie->lock); |
| 290 | goto error; | 294 | goto error; |
| 291 | } | 295 | } |
| @@ -321,7 +325,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, | |||
| 321 | ret = -EEXIST; | 325 | ret = -EEXIST; |
| 322 | hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { | 326 | hlist_for_each_entry(p, &cookie->backing_objects, cookie_link) { |
| 323 | if (p->cache == object->cache) { | 327 | if (p->cache == object->cache) { |
| 324 | if (p->state >= FSCACHE_OBJECT_DYING) | 328 | if (fscache_object_is_dying(p)) |
| 325 | ret = -ENOBUFS; | 329 | ret = -ENOBUFS; |
| 326 | goto cant_attach_object; | 330 | goto cant_attach_object; |
| 327 | } | 331 | } |
| @@ -332,7 +336,7 @@ static int fscache_attach_object(struct fscache_cookie *cookie, | |||
| 332 | hlist_for_each_entry(p, &cookie->parent->backing_objects, | 336 | hlist_for_each_entry(p, &cookie->parent->backing_objects, |
| 333 | cookie_link) { | 337 | cookie_link) { |
| 334 | if (p->cache == object->cache) { | 338 | if (p->cache == object->cache) { |
| 335 | if (p->state >= FSCACHE_OBJECT_DYING) { | 339 | if (fscache_object_is_dying(p)) { |
| 336 | ret = -ENOBUFS; | 340 | ret = -ENOBUFS; |
| 337 | spin_unlock(&cookie->parent->lock); | 341 | spin_unlock(&cookie->parent->lock); |
| 338 | goto cant_attach_object; | 342 | goto cant_attach_object; |
| @@ -400,7 +404,7 @@ void __fscache_invalidate(struct fscache_cookie *cookie) | |||
| 400 | object = hlist_entry(cookie->backing_objects.first, | 404 | object = hlist_entry(cookie->backing_objects.first, |
| 401 | struct fscache_object, | 405 | struct fscache_object, |
| 402 | cookie_link); | 406 | cookie_link); |
| 403 | if (object->state < FSCACHE_OBJECT_DYING) | 407 | if (fscache_object_is_live(object)) |
| 404 | fscache_raise_event( | 408 | fscache_raise_event( |
| 405 | object, FSCACHE_OBJECT_EV_INVALIDATE); | 409 | object, FSCACHE_OBJECT_EV_INVALIDATE); |
| 406 | } | 410 | } |
| @@ -467,9 +471,7 @@ EXPORT_SYMBOL(__fscache_update_cookie); | |||
| 467 | */ | 471 | */ |
| 468 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | 472 | void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) |
| 469 | { | 473 | { |
| 470 | struct fscache_cache *cache; | ||
| 471 | struct fscache_object *object; | 474 | struct fscache_object *object; |
| 472 | unsigned long event; | ||
| 473 | 475 | ||
| 474 | fscache_stat(&fscache_n_relinquishes); | 476 | fscache_stat(&fscache_n_relinquishes); |
| 475 | if (retire) | 477 | if (retire) |
| @@ -481,8 +483,11 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | |||
| 481 | return; | 483 | return; |
| 482 | } | 484 | } |
| 483 | 485 | ||
| 484 | _enter("%p{%s,%p},%d", | 486 | _enter("%p{%s,%p,%d},%d", |
| 485 | cookie, cookie->def->name, cookie->netfs_data, retire); | 487 | cookie, cookie->def->name, cookie->netfs_data, |
| 488 | atomic_read(&cookie->n_active), retire); | ||
| 489 | |||
| 490 | ASSERTCMP(atomic_read(&cookie->n_active), >, 0); | ||
| 486 | 491 | ||
| 487 | if (atomic_read(&cookie->n_children) != 0) { | 492 | if (atomic_read(&cookie->n_children) != 0) { |
| 488 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", | 493 | printk(KERN_ERR "FS-Cache: Cookie '%s' still has children\n", |
| @@ -490,62 +495,28 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) | |||
| 490 | BUG(); | 495 | BUG(); |
| 491 | } | 496 | } |
| 492 | 497 | ||
| 493 | /* wait for the cookie to finish being instantiated (or to fail) */ | 498 | /* No further netfs-accessing operations on this cookie permitted */ |
| 494 | if (test_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) { | 499 | set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); |
| 495 | fscache_stat(&fscache_n_relinquishes_waitcrt); | 500 | if (retire) |
| 496 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_CREATING, | 501 | set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); |
| 497 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
| 498 | } | ||
| 499 | |||
| 500 | event = retire ? FSCACHE_OBJECT_EV_RETIRE : FSCACHE_OBJECT_EV_RELEASE; | ||
| 501 | 502 | ||
| 502 | try_again: | ||
| 503 | spin_lock(&cookie->lock); | 503 | spin_lock(&cookie->lock); |
| 504 | 504 | hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { | |
| 505 | /* break links with all the active objects */ | 505 | fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); |
| 506 | while (!hlist_empty(&cookie->backing_objects)) { | ||
| 507 | int n_reads; | ||
| 508 | object = hlist_entry(cookie->backing_objects.first, | ||
| 509 | struct fscache_object, | ||
| 510 | cookie_link); | ||
| 511 | |||
| 512 | _debug("RELEASE OBJ%x", object->debug_id); | ||
| 513 | |||
| 514 | set_bit(FSCACHE_COOKIE_WAITING_ON_READS, &cookie->flags); | ||
| 515 | n_reads = atomic_read(&object->n_reads); | ||
| 516 | if (n_reads) { | ||
| 517 | int n_ops = object->n_ops; | ||
| 518 | int n_in_progress = object->n_in_progress; | ||
| 519 | spin_unlock(&cookie->lock); | ||
| 520 | printk(KERN_ERR "FS-Cache:" | ||
| 521 | " Cookie '%s' still has %d outstanding reads (%d,%d)\n", | ||
| 522 | cookie->def->name, | ||
| 523 | n_reads, n_ops, n_in_progress); | ||
| 524 | wait_on_bit(&cookie->flags, FSCACHE_COOKIE_WAITING_ON_READS, | ||
| 525 | fscache_wait_bit, TASK_UNINTERRUPTIBLE); | ||
| 526 | printk("Wait finished\n"); | ||
| 527 | goto try_again; | ||
| 528 | } | ||
| 529 | |||
| 530 | /* detach each cache object from the object cookie */ | ||
| 531 | spin_lock(&object->lock); | ||
| 532 | hlist_del_init(&object->cookie_link); | ||
| 533 | |||
| 534 | cache = object->cache; | ||
| 535 | object->cookie = NULL; | ||
| 536 | fscache_raise_event(object, event); | ||
| 537 | spin_unlock(&object->lock); | ||
| 538 | |||
| 539 | if (atomic_dec_and_test(&cookie->usage)) | ||
| 540 | /* the cookie refcount shouldn't be reduced to 0 yet */ | ||
| 541 | BUG(); | ||
| 542 | } | 506 | } |
| 507 | spin_unlock(&cookie->lock); | ||
| 543 | 508 | ||
| 544 | /* detach pointers back to the netfs */ | 509 | /* Wait for cessation of activity requiring access to the netfs (when |
| 510 | * n_active reaches 0). | ||
| 511 | */ | ||
| 512 | if (!atomic_dec_and_test(&cookie->n_active)) | ||
| 513 | wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, | ||
| 514 | TASK_UNINTERRUPTIBLE); | ||
| 515 | |||
| 516 | /* Clear pointers back to the netfs */ | ||
| 545 | cookie->netfs_data = NULL; | 517 | cookie->netfs_data = NULL; |
| 546 | cookie->def = NULL; | 518 | cookie->def = NULL; |
| 547 | 519 | BUG_ON(cookie->stores.rnode); | |
| 548 | spin_unlock(&cookie->lock); | ||
| 549 | 520 | ||
| 550 | if (cookie->parent) { | 521 | if (cookie->parent) { |
| 551 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); | 522 | ASSERTCMP(atomic_read(&cookie->parent->usage), >, 0); |
| @@ -553,7 +524,7 @@ try_again: | |||
| 553 | atomic_dec(&cookie->parent->n_children); | 524 | atomic_dec(&cookie->parent->n_children); |
| 554 | } | 525 | } |
| 555 | 526 | ||
| 556 | /* finally dispose of the cookie */ | 527 | /* Dispose of the netfs's link to the cookie */ |
| 557 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); | 528 | ASSERTCMP(atomic_read(&cookie->usage), >, 0); |
| 558 | fscache_cookie_put(cookie); | 529 | fscache_cookie_put(cookie); |
| 559 | 530 | ||
diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index f5b4baee7352..10a2ade0bdf8 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c | |||
| @@ -55,6 +55,7 @@ static struct fscache_cookie_def fscache_fsdef_index_def = { | |||
| 55 | 55 | ||
| 56 | struct fscache_cookie fscache_fsdef_index = { | 56 | struct fscache_cookie fscache_fsdef_index = { |
| 57 | .usage = ATOMIC_INIT(1), | 57 | .usage = ATOMIC_INIT(1), |
| 58 | .n_active = ATOMIC_INIT(1), | ||
| 58 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), | 59 | .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), |
| 59 | .backing_objects = HLIST_HEAD_INIT, | 60 | .backing_objects = HLIST_HEAD_INIT, |
| 60 | .def = &fscache_fsdef_index_def, | 61 | .def = &fscache_fsdef_index_def, |
diff --git a/fs/fscache/internal.h b/fs/fscache/internal.h index ee38fef4be51..12d505bedb5c 100644 --- a/fs/fscache/internal.h +++ b/fs/fscache/internal.h | |||
| @@ -93,14 +93,11 @@ static inline bool fscache_object_congested(void) | |||
| 93 | 93 | ||
| 94 | extern int fscache_wait_bit(void *); | 94 | extern int fscache_wait_bit(void *); |
| 95 | extern int fscache_wait_bit_interruptible(void *); | 95 | extern int fscache_wait_bit_interruptible(void *); |
| 96 | extern int fscache_wait_atomic_t(atomic_t *); | ||
| 96 | 97 | ||
| 97 | /* | 98 | /* |
| 98 | * object.c | 99 | * object.c |
| 99 | */ | 100 | */ |
| 100 | extern const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5]; | ||
| 101 | |||
| 102 | extern void fscache_withdrawing_object(struct fscache_cache *, | ||
| 103 | struct fscache_object *); | ||
| 104 | extern void fscache_enqueue_object(struct fscache_object *); | 101 | extern void fscache_enqueue_object(struct fscache_object *); |
| 105 | 102 | ||
| 106 | /* | 103 | /* |
| @@ -110,8 +107,10 @@ extern void fscache_enqueue_object(struct fscache_object *); | |||
| 110 | extern const struct file_operations fscache_objlist_fops; | 107 | extern const struct file_operations fscache_objlist_fops; |
| 111 | 108 | ||
| 112 | extern void fscache_objlist_add(struct fscache_object *); | 109 | extern void fscache_objlist_add(struct fscache_object *); |
| 110 | extern void fscache_objlist_remove(struct fscache_object *); | ||
| 113 | #else | 111 | #else |
| 114 | #define fscache_objlist_add(object) do {} while(0) | 112 | #define fscache_objlist_add(object) do {} while(0) |
| 113 | #define fscache_objlist_remove(object) do {} while(0) | ||
| 115 | #endif | 114 | #endif |
| 116 | 115 | ||
| 117 | /* | 116 | /* |
| @@ -291,6 +290,10 @@ static inline void fscache_raise_event(struct fscache_object *object, | |||
| 291 | unsigned event) | 290 | unsigned event) |
| 292 | { | 291 | { |
| 293 | BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); | 292 | BUG_ON(event >= NR_FSCACHE_OBJECT_EVENTS); |
| 293 | #if 0 | ||
| 294 | printk("*** fscache_raise_event(OBJ%d{%lx},%x)\n", | ||
| 295 | object->debug_id, object->event_mask, (1 << event)); | ||
| 296 | #endif | ||
| 294 | if (!test_and_set_bit(event, &object->events) && | 297 | if (!test_and_set_bit(event, &object->events) && |
| 295 | test_bit(event, &object->event_mask)) | 298 | test_bit(event, &object->event_mask)) |
| 296 | fscache_enqueue_object(object); | 299 | fscache_enqueue_object(object); |
diff --git a/fs/fscache/main.c b/fs/fscache/main.c index f9d856773f79..7c27907e650c 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c | |||
| @@ -205,7 +205,6 @@ int fscache_wait_bit(void *flags) | |||
| 205 | schedule(); | 205 | schedule(); |
| 206 | return 0; | 206 | return 0; |
| 207 | } | 207 | } |
| 208 | EXPORT_SYMBOL(fscache_wait_bit); | ||
| 209 | 208 | ||
| 210 | /* | 209 | /* |
| 211 | * wait_on_bit() sleep function for interruptible waiting | 210 | * wait_on_bit() sleep function for interruptible waiting |
| @@ -215,4 +214,12 @@ int fscache_wait_bit_interruptible(void *flags) | |||
| 215 | schedule(); | 214 | schedule(); |
| 216 | return signal_pending(current); | 215 | return signal_pending(current); |
| 217 | } | 216 | } |
| 218 | EXPORT_SYMBOL(fscache_wait_bit_interruptible); | 217 | |
| 218 | /* | ||
| 219 | * wait_on_atomic_t() sleep function for uninterruptible waiting | ||
| 220 | */ | ||
| 221 | int fscache_wait_atomic_t(atomic_t *p) | ||
| 222 | { | ||
| 223 | schedule(); | ||
| 224 | return 0; | ||
| 225 | } | ||
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index e028b8eb1c40..b1bb6117473a 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c | |||
| @@ -40,6 +40,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) | |||
| 40 | /* initialise the primary index cookie */ | 40 | /* initialise the primary index cookie */ |
| 41 | atomic_set(&netfs->primary_index->usage, 1); | 41 | atomic_set(&netfs->primary_index->usage, 1); |
| 42 | atomic_set(&netfs->primary_index->n_children, 0); | 42 | atomic_set(&netfs->primary_index->n_children, 0); |
| 43 | atomic_set(&netfs->primary_index->n_active, 1); | ||
| 43 | 44 | ||
| 44 | netfs->primary_index->def = &fscache_fsdef_netfs_def; | 45 | netfs->primary_index->def = &fscache_fsdef_netfs_def; |
| 45 | netfs->primary_index->parent = &fscache_fsdef_index; | 46 | netfs->primary_index->parent = &fscache_fsdef_index; |
diff --git a/fs/fscache/object-list.c b/fs/fscache/object-list.c index f27c89d17885..e1959efad64f 100644 --- a/fs/fscache/object-list.c +++ b/fs/fscache/object-list.c | |||
| @@ -70,13 +70,10 @@ void fscache_objlist_add(struct fscache_object *obj) | |||
| 70 | write_unlock(&fscache_object_list_lock); | 70 | write_unlock(&fscache_object_list_lock); |
| 71 | } | 71 | } |
| 72 | 72 | ||
| 73 | /** | 73 | /* |
| 74 | * fscache_object_destroy - Note that a cache object is about to be destroyed | 74 | * Remove an object from the object list. |
| 75 | * @object: The object to be destroyed | ||
| 76 | * | ||
| 77 | * Note the imminent destruction and deallocation of a cache object record. | ||
| 78 | */ | 75 | */ |
| 79 | void fscache_object_destroy(struct fscache_object *obj) | 76 | void fscache_objlist_remove(struct fscache_object *obj) |
| 80 | { | 77 | { |
| 81 | write_lock(&fscache_object_list_lock); | 78 | write_lock(&fscache_object_list_lock); |
| 82 | 79 | ||
| @@ -85,7 +82,6 @@ void fscache_object_destroy(struct fscache_object *obj) | |||
| 85 | 82 | ||
| 86 | write_unlock(&fscache_object_list_lock); | 83 | write_unlock(&fscache_object_list_lock); |
| 87 | } | 84 | } |
| 88 | EXPORT_SYMBOL(fscache_object_destroy); | ||
| 89 | 85 | ||
| 90 | /* | 86 | /* |
| 91 | * find the object in the tree on or after the specified index | 87 | * find the object in the tree on or after the specified index |
| @@ -166,15 +162,14 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 166 | { | 162 | { |
| 167 | struct fscache_objlist_data *data = m->private; | 163 | struct fscache_objlist_data *data = m->private; |
| 168 | struct fscache_object *obj = v; | 164 | struct fscache_object *obj = v; |
| 165 | struct fscache_cookie *cookie; | ||
| 169 | unsigned long config = data->config; | 166 | unsigned long config = data->config; |
| 170 | uint16_t keylen, auxlen; | ||
| 171 | char _type[3], *type; | 167 | char _type[3], *type; |
| 172 | bool no_cookie; | ||
| 173 | u8 *buf = data->buf, *p; | 168 | u8 *buf = data->buf, *p; |
| 174 | 169 | ||
| 175 | if ((unsigned long) v == 1) { | 170 | if ((unsigned long) v == 1) { |
| 176 | seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" | 171 | seq_puts(m, "OBJECT PARENT STAT CHLDN OPS OOP IPR EX READS" |
| 177 | " EM EV F S" | 172 | " EM EV FL S" |
| 178 | " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); | 173 | " | NETFS_COOKIE_DEF TY FL NETFS_DATA"); |
| 179 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | | 174 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | |
| 180 | FSCACHE_OBJLIST_CONFIG_AUX)) | 175 | FSCACHE_OBJLIST_CONFIG_AUX)) |
| @@ -193,7 +188,7 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 193 | 188 | ||
| 194 | if ((unsigned long) v == 2) { | 189 | if ((unsigned long) v == 2) { |
| 195 | seq_puts(m, "======== ======== ==== ===== === === === == =====" | 190 | seq_puts(m, "======== ======== ==== ===== === === === == =====" |
| 196 | " == == = =" | 191 | " == == == =" |
| 197 | " | ================ == == ================"); | 192 | " | ================ == == ================"); |
| 198 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | | 193 | if (config & (FSCACHE_OBJLIST_CONFIG_KEY | |
| 199 | FSCACHE_OBJLIST_CONFIG_AUX)) | 194 | FSCACHE_OBJLIST_CONFIG_AUX)) |
| @@ -216,10 +211,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 216 | } \ | 211 | } \ |
| 217 | } while(0) | 212 | } while(0) |
| 218 | 213 | ||
| 214 | cookie = obj->cookie; | ||
| 219 | if (~config) { | 215 | if (~config) { |
| 220 | FILTER(obj->cookie, | 216 | FILTER(cookie->def, |
| 221 | COOKIE, NOCOOKIE); | 217 | COOKIE, NOCOOKIE); |
| 222 | FILTER(obj->state != FSCACHE_OBJECT_ACTIVE || | 218 | FILTER(fscache_object_is_active(obj) || |
| 223 | obj->n_ops != 0 || | 219 | obj->n_ops != 0 || |
| 224 | obj->n_obj_ops != 0 || | 220 | obj->n_obj_ops != 0 || |
| 225 | obj->flags || | 221 | obj->flags || |
| @@ -235,10 +231,10 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 235 | } | 231 | } |
| 236 | 232 | ||
| 237 | seq_printf(m, | 233 | seq_printf(m, |
| 238 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %1lx %1x | ", | 234 | "%8x %8x %s %5u %3u %3u %3u %2u %5u %2lx %2lx %2lx %1x | ", |
| 239 | obj->debug_id, | 235 | obj->debug_id, |
| 240 | obj->parent ? obj->parent->debug_id : -1, | 236 | obj->parent ? obj->parent->debug_id : -1, |
| 241 | fscache_object_states_short[obj->state], | 237 | obj->state->short_name, |
| 242 | obj->n_children, | 238 | obj->n_children, |
| 243 | obj->n_ops, | 239 | obj->n_ops, |
| 244 | obj->n_obj_ops, | 240 | obj->n_obj_ops, |
| @@ -250,48 +246,40 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 250 | obj->flags, | 246 | obj->flags, |
| 251 | work_busy(&obj->work)); | 247 | work_busy(&obj->work)); |
| 252 | 248 | ||
| 253 | no_cookie = true; | 249 | if (fscache_use_cookie(obj)) { |
| 254 | keylen = auxlen = 0; | 250 | uint16_t keylen = 0, auxlen = 0; |
| 255 | if (obj->cookie) { | ||
| 256 | spin_lock(&obj->lock); | ||
| 257 | if (obj->cookie) { | ||
| 258 | switch (obj->cookie->def->type) { | ||
| 259 | case 0: | ||
| 260 | type = "IX"; | ||
| 261 | break; | ||
| 262 | case 1: | ||
| 263 | type = "DT"; | ||
| 264 | break; | ||
| 265 | default: | ||
| 266 | sprintf(_type, "%02u", | ||
| 267 | obj->cookie->def->type); | ||
| 268 | type = _type; | ||
| 269 | break; | ||
| 270 | } | ||
| 271 | 251 | ||
| 272 | seq_printf(m, "%-16s %s %2lx %16p", | 252 | switch (cookie->def->type) { |
| 273 | obj->cookie->def->name, | 253 | case 0: |
| 274 | type, | 254 | type = "IX"; |
| 275 | obj->cookie->flags, | 255 | break; |
| 276 | obj->cookie->netfs_data); | 256 | case 1: |
| 277 | 257 | type = "DT"; | |
| 278 | if (obj->cookie->def->get_key && | 258 | break; |
| 279 | config & FSCACHE_OBJLIST_CONFIG_KEY) | 259 | default: |
| 280 | keylen = obj->cookie->def->get_key( | 260 | sprintf(_type, "%02u", cookie->def->type); |
| 281 | obj->cookie->netfs_data, | 261 | type = _type; |
| 282 | buf, 400); | 262 | break; |
| 283 | |||
| 284 | if (obj->cookie->def->get_aux && | ||
| 285 | config & FSCACHE_OBJLIST_CONFIG_AUX) | ||
| 286 | auxlen = obj->cookie->def->get_aux( | ||
| 287 | obj->cookie->netfs_data, | ||
| 288 | buf + keylen, 512 - keylen); | ||
| 289 | |||
| 290 | no_cookie = false; | ||
| 291 | } | 263 | } |
| 292 | spin_unlock(&obj->lock); | ||
| 293 | 264 | ||
| 294 | if (!no_cookie && (keylen > 0 || auxlen > 0)) { | 265 | seq_printf(m, "%-16s %s %2lx %16p", |
| 266 | cookie->def->name, | ||
| 267 | type, | ||
| 268 | cookie->flags, | ||
| 269 | cookie->netfs_data); | ||
| 270 | |||
| 271 | if (cookie->def->get_key && | ||
| 272 | config & FSCACHE_OBJLIST_CONFIG_KEY) | ||
| 273 | keylen = cookie->def->get_key(cookie->netfs_data, | ||
| 274 | buf, 400); | ||
| 275 | |||
| 276 | if (cookie->def->get_aux && | ||
| 277 | config & FSCACHE_OBJLIST_CONFIG_AUX) | ||
| 278 | auxlen = cookie->def->get_aux(cookie->netfs_data, | ||
| 279 | buf + keylen, 512 - keylen); | ||
| 280 | fscache_unuse_cookie(obj); | ||
| 281 | |||
| 282 | if (keylen > 0 || auxlen > 0) { | ||
| 295 | seq_printf(m, " "); | 283 | seq_printf(m, " "); |
| 296 | for (p = buf; keylen > 0; keylen--) | 284 | for (p = buf; keylen > 0; keylen--) |
| 297 | seq_printf(m, "%02x", *p++); | 285 | seq_printf(m, "%02x", *p++); |
| @@ -302,12 +290,11 @@ static int fscache_objlist_show(struct seq_file *m, void *v) | |||
| 302 | seq_printf(m, "%02x", *p++); | 290 | seq_printf(m, "%02x", *p++); |
| 303 | } | 291 | } |
| 304 | } | 292 | } |
| 305 | } | ||
| 306 | 293 | ||
| 307 | if (no_cookie) | ||
| 308 | seq_printf(m, "<no_cookie>\n"); | ||
| 309 | else | ||
| 310 | seq_printf(m, "\n"); | 294 | seq_printf(m, "\n"); |
| 295 | } else { | ||
| 296 | seq_printf(m, "<no_netfs>\n"); | ||
| 297 | } | ||
| 311 | return 0; | 298 | return 0; |
| 312 | } | 299 | } |
| 313 | 300 | ||
diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 50d41c180211..86d75a60b20c 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c | |||
| @@ -15,52 +15,131 @@ | |||
| 15 | #define FSCACHE_DEBUG_LEVEL COOKIE | 15 | #define FSCACHE_DEBUG_LEVEL COOKIE |
| 16 | #include <linux/module.h> | 16 | #include <linux/module.h> |
| 17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
| 18 | #include <linux/prefetch.h> | ||
| 18 | #include "internal.h" | 19 | #include "internal.h" |
| 19 | 20 | ||
| 20 | const char *fscache_object_states[FSCACHE_OBJECT__NSTATES] = { | 21 | static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *, int); |
| 21 | [FSCACHE_OBJECT_INIT] = "OBJECT_INIT", | 22 | static const struct fscache_state *fscache_kill_dependents(struct fscache_object *, int); |
| 22 | [FSCACHE_OBJECT_LOOKING_UP] = "OBJECT_LOOKING_UP", | 23 | static const struct fscache_state *fscache_drop_object(struct fscache_object *, int); |
| 23 | [FSCACHE_OBJECT_CREATING] = "OBJECT_CREATING", | 24 | static const struct fscache_state *fscache_initialise_object(struct fscache_object *, int); |
| 24 | [FSCACHE_OBJECT_AVAILABLE] = "OBJECT_AVAILABLE", | 25 | static const struct fscache_state *fscache_invalidate_object(struct fscache_object *, int); |
| 25 | [FSCACHE_OBJECT_ACTIVE] = "OBJECT_ACTIVE", | 26 | static const struct fscache_state *fscache_jumpstart_dependents(struct fscache_object *, int); |
| 26 | [FSCACHE_OBJECT_INVALIDATING] = "OBJECT_INVALIDATING", | 27 | static const struct fscache_state *fscache_kill_object(struct fscache_object *, int); |
| 27 | [FSCACHE_OBJECT_UPDATING] = "OBJECT_UPDATING", | 28 | static const struct fscache_state *fscache_lookup_failure(struct fscache_object *, int); |
| 28 | [FSCACHE_OBJECT_DYING] = "OBJECT_DYING", | 29 | static const struct fscache_state *fscache_look_up_object(struct fscache_object *, int); |
| 29 | [FSCACHE_OBJECT_LC_DYING] = "OBJECT_LC_DYING", | 30 | static const struct fscache_state *fscache_object_available(struct fscache_object *, int); |
| 30 | [FSCACHE_OBJECT_ABORT_INIT] = "OBJECT_ABORT_INIT", | 31 | static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int); |
| 31 | [FSCACHE_OBJECT_RELEASING] = "OBJECT_RELEASING", | 32 | static const struct fscache_state *fscache_update_object(struct fscache_object *, int); |
| 32 | [FSCACHE_OBJECT_RECYCLING] = "OBJECT_RECYCLING", | 33 | |
| 33 | [FSCACHE_OBJECT_WITHDRAWING] = "OBJECT_WITHDRAWING", | 34 | #define __STATE_NAME(n) fscache_osm_##n |
| 34 | [FSCACHE_OBJECT_DEAD] = "OBJECT_DEAD", | 35 | #define STATE(n) (&__STATE_NAME(n)) |
| 36 | |||
| 37 | /* | ||
| 38 | * Define a work state. Work states are execution states. No event processing | ||
| 39 | * is performed by them. The function attached to a work state returns a | ||
| 40 | * pointer indicating the next state to which the state machine should | ||
| 41 | * transition. Returning NO_TRANSIT repeats the current state, but goes back | ||
| 42 | * to the scheduler first. | ||
| 43 | */ | ||
| 44 | #define WORK_STATE(n, sn, f) \ | ||
| 45 | const struct fscache_state __STATE_NAME(n) = { \ | ||
| 46 | .name = #n, \ | ||
| 47 | .short_name = sn, \ | ||
| 48 | .work = f \ | ||
| 49 | } | ||
| 50 | |||
| 51 | /* | ||
| 52 | * Returns from work states. | ||
| 53 | */ | ||
| 54 | #define transit_to(state) ({ prefetch(&STATE(state)->work); STATE(state); }) | ||
| 55 | |||
| 56 | #define NO_TRANSIT ((struct fscache_state *)NULL) | ||
| 57 | |||
| 58 | /* | ||
| 59 | * Define a wait state. Wait states are event processing states. No execution | ||
| 60 | * is performed by them. Wait states are just tables of "if event X occurs, | ||
| 61 | * clear it and transition to state Y". The dispatcher returns to the | ||
| 62 | * scheduler if none of the events in which the wait state has an interest are | ||
| 63 | * currently pending. | ||
| 64 | */ | ||
| 65 | #define WAIT_STATE(n, sn, ...) \ | ||
| 66 | const struct fscache_state __STATE_NAME(n) = { \ | ||
| 67 | .name = #n, \ | ||
| 68 | .short_name = sn, \ | ||
| 69 | .work = NULL, \ | ||
| 70 | .transitions = { __VA_ARGS__, { 0, NULL } } \ | ||
| 71 | } | ||
| 72 | |||
| 73 | #define TRANSIT_TO(state, emask) \ | ||
| 74 | { .events = (emask), .transit_to = STATE(state) } | ||
| 75 | |||
| 76 | /* | ||
| 77 | * The object state machine. | ||
| 78 | */ | ||
| 79 | static WORK_STATE(INIT_OBJECT, "INIT", fscache_initialise_object); | ||
| 80 | static WORK_STATE(PARENT_READY, "PRDY", fscache_parent_ready); | ||
| 81 | static WORK_STATE(ABORT_INIT, "ABRT", fscache_abort_initialisation); | ||
| 82 | static WORK_STATE(LOOK_UP_OBJECT, "LOOK", fscache_look_up_object); | ||
| 83 | static WORK_STATE(CREATE_OBJECT, "CRTO", fscache_look_up_object); | ||
| 84 | static WORK_STATE(OBJECT_AVAILABLE, "AVBL", fscache_object_available); | ||
| 85 | static WORK_STATE(JUMPSTART_DEPS, "JUMP", fscache_jumpstart_dependents); | ||
| 86 | |||
| 87 | static WORK_STATE(INVALIDATE_OBJECT, "INVL", fscache_invalidate_object); | ||
| 88 | static WORK_STATE(UPDATE_OBJECT, "UPDT", fscache_update_object); | ||
| 89 | |||
| 90 | static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure); | ||
| 91 | static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object); | ||
| 92 | static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents); | ||
| 93 | static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object); | ||
| 94 | static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL); | ||
| 95 | |||
| 96 | static WAIT_STATE(WAIT_FOR_INIT, "?INI", | ||
| 97 | TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); | ||
| 98 | |||
| 99 | static WAIT_STATE(WAIT_FOR_PARENT, "?PRN", | ||
| 100 | TRANSIT_TO(PARENT_READY, 1 << FSCACHE_OBJECT_EV_PARENT_READY)); | ||
| 101 | |||
| 102 | static WAIT_STATE(WAIT_FOR_CMD, "?CMD", | ||
| 103 | TRANSIT_TO(INVALIDATE_OBJECT, 1 << FSCACHE_OBJECT_EV_INVALIDATE), | ||
| 104 | TRANSIT_TO(UPDATE_OBJECT, 1 << FSCACHE_OBJECT_EV_UPDATE), | ||
| 105 | TRANSIT_TO(JUMPSTART_DEPS, 1 << FSCACHE_OBJECT_EV_NEW_CHILD)); | ||
| 106 | |||
| 107 | static WAIT_STATE(WAIT_FOR_CLEARANCE, "?CLR", | ||
| 108 | TRANSIT_TO(KILL_OBJECT, 1 << FSCACHE_OBJECT_EV_CLEARED)); | ||
| 109 | |||
| 110 | /* | ||
| 111 | * Out-of-band event transition tables. These are for handling unexpected | ||
| 112 | * events, such as an I/O error. If an OOB event occurs, the state machine | ||
| 113 | * clears and disables the event and forces a transition to the nominated work | ||
| 114 | * state (acurrently executing work states will complete first). | ||
| 115 | * | ||
| 116 | * In such a situation, object->state remembers the state the machine should | ||
| 117 | * have been in/gone to and returning NO_TRANSIT returns to that. | ||
| 118 | */ | ||
| 119 | static const struct fscache_transition fscache_osm_init_oob[] = { | ||
| 120 | TRANSIT_TO(ABORT_INIT, | ||
| 121 | (1 << FSCACHE_OBJECT_EV_ERROR) | | ||
| 122 | (1 << FSCACHE_OBJECT_EV_KILL)), | ||
| 123 | { 0, NULL } | ||
| 124 | }; | ||
| 125 | |||
| 126 | static const struct fscache_transition fscache_osm_lookup_oob[] = { | ||
| 127 | TRANSIT_TO(LOOKUP_FAILURE, | ||
| 128 | (1 << FSCACHE_OBJECT_EV_ERROR) | | ||
| 129 | (1 << FSCACHE_OBJECT_EV_KILL)), | ||
| 130 | { 0, NULL } | ||
| 35 | }; | 131 | }; |
| 36 | EXPORT_SYMBOL(fscache_object_states); | 132 | |
| 37 | 133 | static const struct fscache_transition fscache_osm_run_oob[] = { | |
| 38 | const char fscache_object_states_short[FSCACHE_OBJECT__NSTATES][5] = { | 134 | TRANSIT_TO(KILL_OBJECT, |
| 39 | [FSCACHE_OBJECT_INIT] = "INIT", | 135 | (1 << FSCACHE_OBJECT_EV_ERROR) | |
| 40 | [FSCACHE_OBJECT_LOOKING_UP] = "LOOK", | 136 | (1 << FSCACHE_OBJECT_EV_KILL)), |
| 41 | [FSCACHE_OBJECT_CREATING] = "CRTN", | 137 | { 0, NULL } |
| 42 | [FSCACHE_OBJECT_AVAILABLE] = "AVBL", | ||
| 43 | [FSCACHE_OBJECT_ACTIVE] = "ACTV", | ||
| 44 | [FSCACHE_OBJECT_INVALIDATING] = "INVL", | ||
| 45 | [FSCACHE_OBJECT_UPDATING] = "UPDT", | ||
| 46 | [FSCACHE_OBJECT_DYING] = "DYNG", | ||
| 47 | [FSCACHE_OBJECT_LC_DYING] = "LCDY", | ||
| 48 | [FSCACHE_OBJECT_ABORT_INIT] = "ABTI", | ||
| 49 | [FSCACHE_OBJECT_RELEASING] = "RELS", | ||
| 50 | [FSCACHE_OBJECT_RECYCLING] = "RCYC", | ||
| 51 | [FSCACHE_OBJECT_WITHDRAWING] = "WTHD", | ||
| 52 | [FSCACHE_OBJECT_DEAD] = "DEAD", | ||
| 53 | }; | 138 | }; |
| 54 | 139 | ||
| 55 | static int fscache_get_object(struct fscache_object *); | 140 | static int fscache_get_object(struct fscache_object *); |
| 56 | static void fscache_put_object(struct fscache_object *); | 141 | static void fscache_put_object(struct fscache_object *); |
| 57 | static void fscache_initialise_object(struct fscache_object *); | 142 | static bool fscache_enqueue_dependents(struct fscache_object *, int); |
| 58 | static void fscache_lookup_object(struct fscache_object *); | ||
| 59 | static void fscache_object_available(struct fscache_object *); | ||
| 60 | static void fscache_invalidate_object(struct fscache_object *); | ||
| 61 | static void fscache_release_object(struct fscache_object *); | ||
| 62 | static void fscache_withdraw_object(struct fscache_object *); | ||
| 63 | static void fscache_enqueue_dependents(struct fscache_object *); | ||
| 64 | static void fscache_dequeue_object(struct fscache_object *); | 143 | static void fscache_dequeue_object(struct fscache_object *); |
| 65 | 144 | ||
| 66 | /* | 145 | /* |
| @@ -75,295 +154,116 @@ static inline void fscache_done_parent_op(struct fscache_object *object) | |||
| 75 | object->debug_id, parent->debug_id, parent->n_ops); | 154 | object->debug_id, parent->debug_id, parent->n_ops); |
| 76 | 155 | ||
| 77 | spin_lock_nested(&parent->lock, 1); | 156 | spin_lock_nested(&parent->lock, 1); |
| 78 | parent->n_ops--; | ||
| 79 | parent->n_obj_ops--; | 157 | parent->n_obj_ops--; |
| 158 | parent->n_ops--; | ||
| 80 | if (parent->n_ops == 0) | 159 | if (parent->n_ops == 0) |
| 81 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | 160 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); |
| 82 | spin_unlock(&parent->lock); | 161 | spin_unlock(&parent->lock); |
| 83 | } | 162 | } |
| 84 | 163 | ||
| 85 | /* | 164 | /* |
| 86 | * Notify netfs of invalidation completion. | 165 | * Object state machine dispatcher. |
| 87 | */ | 166 | */ |
| 88 | static inline void fscache_invalidation_complete(struct fscache_cookie *cookie) | 167 | static void fscache_object_sm_dispatcher(struct fscache_object *object) |
| 89 | { | 168 | { |
| 90 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) | 169 | const struct fscache_transition *t; |
| 91 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); | 170 | const struct fscache_state *state, *new_state; |
| 92 | } | 171 | unsigned long events, event_mask; |
| 93 | 172 | int event = -1; | |
| 94 | /* | ||
| 95 | * process events that have been sent to an object's state machine | ||
| 96 | * - initiates parent lookup | ||
| 97 | * - does object lookup | ||
| 98 | * - does object creation | ||
| 99 | * - does object recycling and retirement | ||
| 100 | * - does object withdrawal | ||
| 101 | */ | ||
| 102 | static void fscache_object_state_machine(struct fscache_object *object) | ||
| 103 | { | ||
| 104 | enum fscache_object_state new_state; | ||
| 105 | struct fscache_cookie *cookie; | ||
| 106 | int event; | ||
| 107 | 173 | ||
| 108 | ASSERT(object != NULL); | 174 | ASSERT(object != NULL); |
| 109 | 175 | ||
| 110 | _enter("{OBJ%x,%s,%lx}", | 176 | _enter("{OBJ%x,%s,%lx}", |
| 111 | object->debug_id, fscache_object_states[object->state], | 177 | object->debug_id, object->state->name, object->events); |
| 112 | object->events); | 178 | |
| 113 | 179 | event_mask = object->event_mask; | |
| 114 | switch (object->state) { | 180 | restart: |
| 115 | /* wait for the parent object to become ready */ | 181 | object->event_mask = 0; /* Mask normal event handling */ |
| 116 | case FSCACHE_OBJECT_INIT: | 182 | state = object->state; |
| 117 | object->event_mask = | 183 | restart_masked: |
| 118 | FSCACHE_OBJECT_EVENTS_MASK & | 184 | events = object->events; |
| 119 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | 185 | |
| 120 | fscache_initialise_object(object); | 186 | /* Handle any out-of-band events (typically an error) */ |
| 121 | goto done; | 187 | if (events & object->oob_event_mask) { |
| 122 | 188 | _debug("{OBJ%x} oob %lx", | |
| 123 | /* look up the object metadata on disk */ | 189 | object->debug_id, events & object->oob_event_mask); |
| 124 | case FSCACHE_OBJECT_LOOKING_UP: | 190 | for (t = object->oob_table; t->events; t++) { |
| 125 | fscache_lookup_object(object); | 191 | if (events & t->events) { |
| 126 | goto lookup_transit; | 192 | state = t->transit_to; |
| 127 | 193 | ASSERT(state->work != NULL); | |
| 128 | /* create the object metadata on disk */ | 194 | event = fls(events & t->events) - 1; |
| 129 | case FSCACHE_OBJECT_CREATING: | 195 | __clear_bit(event, &object->oob_event_mask); |
| 130 | fscache_lookup_object(object); | 196 | clear_bit(event, &object->events); |
| 131 | goto lookup_transit; | 197 | goto execute_work_state; |
| 132 | 198 | } | |
| 133 | /* handle an object becoming available; start pending | ||
| 134 | * operations and queue dependent operations for processing */ | ||
| 135 | case FSCACHE_OBJECT_AVAILABLE: | ||
| 136 | fscache_object_available(object); | ||
| 137 | goto active_transit; | ||
| 138 | |||
| 139 | /* normal running state */ | ||
| 140 | case FSCACHE_OBJECT_ACTIVE: | ||
| 141 | goto active_transit; | ||
| 142 | |||
| 143 | /* Invalidate an object on disk */ | ||
| 144 | case FSCACHE_OBJECT_INVALIDATING: | ||
| 145 | clear_bit(FSCACHE_OBJECT_EV_INVALIDATE, &object->events); | ||
| 146 | fscache_stat(&fscache_n_invalidates_run); | ||
| 147 | fscache_stat(&fscache_n_cop_invalidate_object); | ||
| 148 | fscache_invalidate_object(object); | ||
| 149 | fscache_stat_d(&fscache_n_cop_invalidate_object); | ||
| 150 | fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); | ||
| 151 | goto active_transit; | ||
| 152 | |||
| 153 | /* update the object metadata on disk */ | ||
| 154 | case FSCACHE_OBJECT_UPDATING: | ||
| 155 | clear_bit(FSCACHE_OBJECT_EV_UPDATE, &object->events); | ||
| 156 | fscache_stat(&fscache_n_updates_run); | ||
| 157 | fscache_stat(&fscache_n_cop_update_object); | ||
| 158 | object->cache->ops->update_object(object); | ||
| 159 | fscache_stat_d(&fscache_n_cop_update_object); | ||
| 160 | goto active_transit; | ||
| 161 | |||
| 162 | /* handle an object dying during lookup or creation */ | ||
| 163 | case FSCACHE_OBJECT_LC_DYING: | ||
| 164 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
| 165 | fscache_stat(&fscache_n_cop_lookup_complete); | ||
| 166 | object->cache->ops->lookup_complete(object); | ||
| 167 | fscache_stat_d(&fscache_n_cop_lookup_complete); | ||
| 168 | |||
| 169 | spin_lock(&object->lock); | ||
| 170 | object->state = FSCACHE_OBJECT_DYING; | ||
| 171 | cookie = object->cookie; | ||
| 172 | if (cookie) { | ||
| 173 | if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, | ||
| 174 | &cookie->flags)) | ||
| 175 | wake_up_bit(&cookie->flags, | ||
| 176 | FSCACHE_COOKIE_LOOKING_UP); | ||
| 177 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
| 178 | &cookie->flags)) | ||
| 179 | wake_up_bit(&cookie->flags, | ||
| 180 | FSCACHE_COOKIE_CREATING); | ||
| 181 | } | 199 | } |
| 182 | spin_unlock(&object->lock); | 200 | } |
| 183 | 201 | ||
| 184 | fscache_done_parent_op(object); | 202 | /* Wait states are just transition tables */ |
| 203 | if (!state->work) { | ||
| 204 | if (events & event_mask) { | ||
| 205 | for (t = state->transitions; t->events; t++) { | ||
| 206 | if (events & t->events) { | ||
| 207 | new_state = t->transit_to; | ||
| 208 | event = fls(events & t->events) - 1; | ||
| 209 | clear_bit(event, &object->events); | ||
| 210 | _debug("{OBJ%x} ev %d: %s -> %s", | ||
| 211 | object->debug_id, event, | ||
| 212 | state->name, new_state->name); | ||
| 213 | object->state = state = new_state; | ||
| 214 | goto execute_work_state; | ||
| 215 | } | ||
| 216 | } | ||
| 185 | 217 | ||
| 186 | /* wait for completion of all active operations on this object | 218 | /* The event mask didn't include all the tabled bits */ |
| 187 | * and the death of all child objects of this object */ | 219 | BUG(); |
| 188 | case FSCACHE_OBJECT_DYING: | ||
| 189 | dying: | ||
| 190 | clear_bit(FSCACHE_OBJECT_EV_CLEARED, &object->events); | ||
| 191 | spin_lock(&object->lock); | ||
| 192 | _debug("dying OBJ%x {%d,%d}", | ||
| 193 | object->debug_id, object->n_ops, object->n_children); | ||
| 194 | if (object->n_ops == 0 && object->n_children == 0) { | ||
| 195 | object->event_mask &= | ||
| 196 | ~(1 << FSCACHE_OBJECT_EV_CLEARED); | ||
| 197 | object->event_mask |= | ||
| 198 | (1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
| 199 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
| 200 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
| 201 | (1 << FSCACHE_OBJECT_EV_ERROR); | ||
| 202 | } else { | ||
| 203 | object->event_mask &= | ||
| 204 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
| 205 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
| 206 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
| 207 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
| 208 | object->event_mask |= | ||
| 209 | 1 << FSCACHE_OBJECT_EV_CLEARED; | ||
| 210 | } | 220 | } |
| 211 | spin_unlock(&object->lock); | 221 | /* Randomly woke up */ |
| 212 | fscache_enqueue_dependents(object); | 222 | goto unmask_events; |
| 213 | fscache_start_operations(object); | ||
| 214 | goto terminal_transit; | ||
| 215 | |||
| 216 | /* handle an abort during initialisation */ | ||
| 217 | case FSCACHE_OBJECT_ABORT_INIT: | ||
| 218 | _debug("handle abort init %lx", object->events); | ||
| 219 | object->event_mask &= ~(1 << FSCACHE_OBJECT_EV_UPDATE); | ||
| 220 | |||
| 221 | spin_lock(&object->lock); | ||
| 222 | fscache_dequeue_object(object); | ||
| 223 | |||
| 224 | object->state = FSCACHE_OBJECT_DYING; | ||
| 225 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, | ||
| 226 | &object->cookie->flags)) | ||
| 227 | wake_up_bit(&object->cookie->flags, | ||
| 228 | FSCACHE_COOKIE_CREATING); | ||
| 229 | spin_unlock(&object->lock); | ||
| 230 | goto dying; | ||
| 231 | |||
| 232 | /* handle the netfs releasing an object and possibly marking it | ||
| 233 | * obsolete too */ | ||
| 234 | case FSCACHE_OBJECT_RELEASING: | ||
| 235 | case FSCACHE_OBJECT_RECYCLING: | ||
| 236 | object->event_mask &= | ||
| 237 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
| 238 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
| 239 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
| 240 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
| 241 | fscache_release_object(object); | ||
| 242 | spin_lock(&object->lock); | ||
| 243 | object->state = FSCACHE_OBJECT_DEAD; | ||
| 244 | spin_unlock(&object->lock); | ||
| 245 | fscache_stat(&fscache_n_object_dead); | ||
| 246 | goto terminal_transit; | ||
| 247 | |||
| 248 | /* handle the parent cache of this object being withdrawn from | ||
| 249 | * active service */ | ||
| 250 | case FSCACHE_OBJECT_WITHDRAWING: | ||
| 251 | object->event_mask &= | ||
| 252 | ~((1 << FSCACHE_OBJECT_EV_WITHDRAW) | | ||
| 253 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
| 254 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
| 255 | (1 << FSCACHE_OBJECT_EV_ERROR)); | ||
| 256 | fscache_withdraw_object(object); | ||
| 257 | spin_lock(&object->lock); | ||
| 258 | object->state = FSCACHE_OBJECT_DEAD; | ||
| 259 | spin_unlock(&object->lock); | ||
| 260 | fscache_stat(&fscache_n_object_dead); | ||
| 261 | goto terminal_transit; | ||
| 262 | |||
| 263 | /* complain about the object being woken up once it is | ||
| 264 | * deceased */ | ||
| 265 | case FSCACHE_OBJECT_DEAD: | ||
| 266 | printk(KERN_ERR "FS-Cache:" | ||
| 267 | " Unexpected event in dead state %lx\n", | ||
| 268 | object->events & object->event_mask); | ||
| 269 | BUG(); | ||
| 270 | |||
| 271 | default: | ||
| 272 | printk(KERN_ERR "FS-Cache: Unknown object state %u\n", | ||
| 273 | object->state); | ||
| 274 | BUG(); | ||
| 275 | } | ||
| 276 | |||
| 277 | /* determine the transition from a lookup state */ | ||
| 278 | lookup_transit: | ||
| 279 | event = fls(object->events & object->event_mask) - 1; | ||
| 280 | switch (event) { | ||
| 281 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
| 282 | case FSCACHE_OBJECT_EV_RETIRE: | ||
| 283 | case FSCACHE_OBJECT_EV_RELEASE: | ||
| 284 | case FSCACHE_OBJECT_EV_ERROR: | ||
| 285 | new_state = FSCACHE_OBJECT_LC_DYING; | ||
| 286 | goto change_state; | ||
| 287 | case FSCACHE_OBJECT_EV_INVALIDATE: | ||
| 288 | new_state = FSCACHE_OBJECT_INVALIDATING; | ||
| 289 | goto change_state; | ||
| 290 | case FSCACHE_OBJECT_EV_REQUEUE: | ||
| 291 | goto done; | ||
| 292 | case -1: | ||
| 293 | goto done; /* sleep until event */ | ||
| 294 | default: | ||
| 295 | goto unsupported_event; | ||
| 296 | } | 223 | } |
| 297 | 224 | ||
| 298 | /* determine the transition from an active state */ | 225 | execute_work_state: |
| 299 | active_transit: | 226 | _debug("{OBJ%x} exec %s", object->debug_id, state->name); |
| 300 | event = fls(object->events & object->event_mask) - 1; | ||
| 301 | switch (event) { | ||
| 302 | case FSCACHE_OBJECT_EV_WITHDRAW: | ||
| 303 | case FSCACHE_OBJECT_EV_RETIRE: | ||
| 304 | case FSCACHE_OBJECT_EV_RELEASE: | ||
| 305 | case FSCACHE_OBJECT_EV_ERROR: | ||
| 306 | new_state = FSCACHE_OBJECT_DYING; | ||
| 307 | goto change_state; | ||
| 308 | case FSCACHE_OBJECT_EV_INVALIDATE: | ||
| 309 | new_state = FSCACHE_OBJECT_INVALIDATING; | ||
| 310 | goto change_state; | ||
| 311 | case FSCACHE_OBJECT_EV_UPDATE: | ||
| 312 | new_state = FSCACHE_OBJECT_UPDATING; | ||
| 313 | goto change_state; | ||
| 314 | case -1: | ||
| 315 | new_state = FSCACHE_OBJECT_ACTIVE; | ||
| 316 | goto change_state; /* sleep until event */ | ||
| 317 | default: | ||
| 318 | goto unsupported_event; | ||
| 319 | } | ||
| 320 | 227 | ||
| 321 | /* determine the transition from a terminal state */ | 228 | new_state = state->work(object, event); |
| 322 | terminal_transit: | 229 | event = -1; |
| 323 | event = fls(object->events & object->event_mask) - 1; | 230 | if (new_state == NO_TRANSIT) { |
| 324 | switch (event) { | 231 | _debug("{OBJ%x} %s notrans", object->debug_id, state->name); |
| 325 | case FSCACHE_OBJECT_EV_WITHDRAW: | 232 | fscache_enqueue_object(object); |
| 326 | new_state = FSCACHE_OBJECT_WITHDRAWING; | 233 | event_mask = object->oob_event_mask; |
| 327 | goto change_state; | 234 | goto unmask_events; |
| 328 | case FSCACHE_OBJECT_EV_RETIRE: | ||
| 329 | new_state = FSCACHE_OBJECT_RECYCLING; | ||
| 330 | goto change_state; | ||
| 331 | case FSCACHE_OBJECT_EV_RELEASE: | ||
| 332 | new_state = FSCACHE_OBJECT_RELEASING; | ||
| 333 | goto change_state; | ||
| 334 | case FSCACHE_OBJECT_EV_ERROR: | ||
| 335 | new_state = FSCACHE_OBJECT_WITHDRAWING; | ||
| 336 | goto change_state; | ||
| 337 | case FSCACHE_OBJECT_EV_CLEARED: | ||
| 338 | new_state = FSCACHE_OBJECT_DYING; | ||
| 339 | goto change_state; | ||
| 340 | case -1: | ||
| 341 | goto done; /* sleep until event */ | ||
| 342 | default: | ||
| 343 | goto unsupported_event; | ||
| 344 | } | 235 | } |
| 345 | 236 | ||
| 346 | change_state: | 237 | _debug("{OBJ%x} %s -> %s", |
| 347 | spin_lock(&object->lock); | 238 | object->debug_id, state->name, new_state->name); |
| 348 | object->state = new_state; | 239 | object->state = state = new_state; |
| 349 | spin_unlock(&object->lock); | ||
| 350 | 240 | ||
| 351 | done: | 241 | if (state->work) { |
| 352 | _leave(" [->%s]", fscache_object_states[object->state]); | 242 | if (unlikely(state->work == ((void *)2UL))) { |
| 353 | return; | 243 | _leave(" [dead]"); |
| 244 | return; | ||
| 245 | } | ||
| 246 | goto restart_masked; | ||
| 247 | } | ||
| 354 | 248 | ||
| 355 | unsupported_event: | 249 | /* Transited to wait state */ |
| 356 | printk(KERN_ERR "FS-Cache:" | 250 | event_mask = object->oob_event_mask; |
| 357 | " Unsupported event %d [%lx/%lx] in state %s\n", | 251 | for (t = state->transitions; t->events; t++) |
| 358 | event, object->events, object->event_mask, | 252 | event_mask |= t->events; |
| 359 | fscache_object_states[object->state]); | 253 | |
| 360 | BUG(); | 254 | unmask_events: |
| 255 | object->event_mask = event_mask; | ||
| 256 | smp_mb(); | ||
| 257 | events = object->events; | ||
| 258 | if (events & event_mask) | ||
| 259 | goto restart; | ||
| 260 | _leave(" [msk %lx]", event_mask); | ||
| 361 | } | 261 | } |
| 362 | 262 | ||
| 363 | /* | 263 | /* |
| 364 | * execute an object | 264 | * execute an object |
| 365 | */ | 265 | */ |
| 366 | void fscache_object_work_func(struct work_struct *work) | 266 | static void fscache_object_work_func(struct work_struct *work) |
| 367 | { | 267 | { |
| 368 | struct fscache_object *object = | 268 | struct fscache_object *object = |
| 369 | container_of(work, struct fscache_object, work); | 269 | container_of(work, struct fscache_object, work); |
| @@ -372,14 +272,70 @@ void fscache_object_work_func(struct work_struct *work) | |||
| 372 | _enter("{OBJ%x}", object->debug_id); | 272 | _enter("{OBJ%x}", object->debug_id); |
| 373 | 273 | ||
| 374 | start = jiffies; | 274 | start = jiffies; |
| 375 | fscache_object_state_machine(object); | 275 | fscache_object_sm_dispatcher(object); |
| 376 | fscache_hist(fscache_objs_histogram, start); | 276 | fscache_hist(fscache_objs_histogram, start); |
| 377 | if (object->events & object->event_mask) | ||
| 378 | fscache_enqueue_object(object); | ||
| 379 | clear_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
| 380 | fscache_put_object(object); | 277 | fscache_put_object(object); |
| 381 | } | 278 | } |
| 382 | EXPORT_SYMBOL(fscache_object_work_func); | 279 | |
| 280 | /** | ||
| 281 | * fscache_object_init - Initialise a cache object description | ||
| 282 | * @object: Object description | ||
| 283 | * @cookie: Cookie object will be attached to | ||
| 284 | * @cache: Cache in which backing object will be found | ||
| 285 | * | ||
| 286 | * Initialise a cache object description to its basic values. | ||
| 287 | * | ||
| 288 | * See Documentation/filesystems/caching/backend-api.txt for a complete | ||
| 289 | * description. | ||
| 290 | */ | ||
| 291 | void fscache_object_init(struct fscache_object *object, | ||
| 292 | struct fscache_cookie *cookie, | ||
| 293 | struct fscache_cache *cache) | ||
| 294 | { | ||
| 295 | const struct fscache_transition *t; | ||
| 296 | |||
| 297 | atomic_inc(&cache->object_count); | ||
| 298 | |||
| 299 | object->state = STATE(WAIT_FOR_INIT); | ||
| 300 | object->oob_table = fscache_osm_init_oob; | ||
| 301 | object->flags = 1 << FSCACHE_OBJECT_IS_LIVE; | ||
| 302 | spin_lock_init(&object->lock); | ||
| 303 | INIT_LIST_HEAD(&object->cache_link); | ||
| 304 | INIT_HLIST_NODE(&object->cookie_link); | ||
| 305 | INIT_WORK(&object->work, fscache_object_work_func); | ||
| 306 | INIT_LIST_HEAD(&object->dependents); | ||
| 307 | INIT_LIST_HEAD(&object->dep_link); | ||
| 308 | INIT_LIST_HEAD(&object->pending_ops); | ||
| 309 | object->n_children = 0; | ||
| 310 | object->n_ops = object->n_in_progress = object->n_exclusive = 0; | ||
| 311 | object->events = 0; | ||
| 312 | object->store_limit = 0; | ||
| 313 | object->store_limit_l = 0; | ||
| 314 | object->cache = cache; | ||
| 315 | object->cookie = cookie; | ||
| 316 | object->parent = NULL; | ||
| 317 | |||
| 318 | object->oob_event_mask = 0; | ||
| 319 | for (t = object->oob_table; t->events; t++) | ||
| 320 | object->oob_event_mask |= t->events; | ||
| 321 | object->event_mask = object->oob_event_mask; | ||
| 322 | for (t = object->state->transitions; t->events; t++) | ||
| 323 | object->event_mask |= t->events; | ||
| 324 | } | ||
| 325 | EXPORT_SYMBOL(fscache_object_init); | ||
| 326 | |||
| 327 | /* | ||
| 328 | * Abort object initialisation before we start it. | ||
| 329 | */ | ||
| 330 | static const struct fscache_state *fscache_abort_initialisation(struct fscache_object *object, | ||
| 331 | int event) | ||
| 332 | { | ||
| 333 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
| 334 | |||
| 335 | object->oob_event_mask = 0; | ||
| 336 | fscache_dequeue_object(object); | ||
| 337 | return transit_to(KILL_OBJECT); | ||
| 338 | } | ||
| 383 | 339 | ||
| 384 | /* | 340 | /* |
| 385 | * initialise an object | 341 | * initialise an object |
| @@ -387,130 +343,136 @@ EXPORT_SYMBOL(fscache_object_work_func); | |||
| 387 | * immediately to do a creation | 343 | * immediately to do a creation |
| 388 | * - we may need to start the process of creating a parent and we need to wait | 344 | * - we may need to start the process of creating a parent and we need to wait |
| 389 | * for the parent's lookup and creation to complete if it's not there yet | 345 | * for the parent's lookup and creation to complete if it's not there yet |
| 390 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
| 391 | * leaf-most cookies of the object and all its children | ||
| 392 | */ | 346 | */ |
| 393 | static void fscache_initialise_object(struct fscache_object *object) | 347 | static const struct fscache_state *fscache_initialise_object(struct fscache_object *object, |
| 348 | int event) | ||
| 394 | { | 349 | { |
| 395 | struct fscache_object *parent; | 350 | struct fscache_object *parent; |
| 351 | bool success; | ||
| 396 | 352 | ||
| 397 | _enter(""); | 353 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 398 | ASSERT(object->cookie != NULL); | ||
| 399 | ASSERT(object->cookie->parent != NULL); | ||
| 400 | |||
| 401 | if (object->events & ((1 << FSCACHE_OBJECT_EV_ERROR) | | ||
| 402 | (1 << FSCACHE_OBJECT_EV_RELEASE) | | ||
| 403 | (1 << FSCACHE_OBJECT_EV_RETIRE) | | ||
| 404 | (1 << FSCACHE_OBJECT_EV_WITHDRAW))) { | ||
| 405 | _debug("abort init %lx", object->events); | ||
| 406 | spin_lock(&object->lock); | ||
| 407 | object->state = FSCACHE_OBJECT_ABORT_INIT; | ||
| 408 | spin_unlock(&object->lock); | ||
| 409 | return; | ||
| 410 | } | ||
| 411 | 354 | ||
| 412 | spin_lock(&object->cookie->lock); | 355 | ASSERT(list_empty(&object->dep_link)); |
| 413 | spin_lock_nested(&object->cookie->parent->lock, 1); | ||
| 414 | 356 | ||
| 415 | parent = object->parent; | 357 | parent = object->parent; |
| 416 | if (!parent) { | 358 | if (!parent) { |
| 417 | _debug("no parent"); | 359 | _leave(" [no parent]"); |
| 418 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | 360 | return transit_to(DROP_OBJECT); |
| 419 | } else { | 361 | } |
| 420 | spin_lock(&object->lock); | ||
| 421 | spin_lock_nested(&parent->lock, 1); | ||
| 422 | _debug("parent %s", fscache_object_states[parent->state]); | ||
| 423 | |||
| 424 | if (parent->state >= FSCACHE_OBJECT_DYING) { | ||
| 425 | _debug("bad parent"); | ||
| 426 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | ||
| 427 | } else if (parent->state < FSCACHE_OBJECT_AVAILABLE) { | ||
| 428 | _debug("wait"); | ||
| 429 | |||
| 430 | /* we may get woken up in this state by child objects | ||
| 431 | * binding on to us, so we need to make sure we don't | ||
| 432 | * add ourself to the list multiple times */ | ||
| 433 | if (list_empty(&object->dep_link)) { | ||
| 434 | fscache_stat(&fscache_n_cop_grab_object); | ||
| 435 | object->cache->ops->grab_object(object); | ||
| 436 | fscache_stat_d(&fscache_n_cop_grab_object); | ||
| 437 | list_add(&object->dep_link, | ||
| 438 | &parent->dependents); | ||
| 439 | |||
| 440 | /* fscache_acquire_non_index_cookie() uses this | ||
| 441 | * to wake the chain up */ | ||
| 442 | if (parent->state == FSCACHE_OBJECT_INIT) | ||
| 443 | fscache_enqueue_object(parent); | ||
| 444 | } | ||
| 445 | } else { | ||
| 446 | _debug("go"); | ||
| 447 | parent->n_ops++; | ||
| 448 | parent->n_obj_ops++; | ||
| 449 | object->lookup_jif = jiffies; | ||
| 450 | object->state = FSCACHE_OBJECT_LOOKING_UP; | ||
| 451 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
| 452 | } | ||
| 453 | 362 | ||
| 454 | spin_unlock(&parent->lock); | 363 | _debug("parent: %s of:%lx", parent->state->name, parent->flags); |
| 455 | spin_unlock(&object->lock); | 364 | |
| 365 | if (fscache_object_is_dying(parent)) { | ||
| 366 | _leave(" [bad parent]"); | ||
| 367 | return transit_to(DROP_OBJECT); | ||
| 456 | } | 368 | } |
| 457 | 369 | ||
| 458 | spin_unlock(&object->cookie->parent->lock); | 370 | if (fscache_object_is_available(parent)) { |
| 459 | spin_unlock(&object->cookie->lock); | 371 | _leave(" [ready]"); |
| 372 | return transit_to(PARENT_READY); | ||
| 373 | } | ||
| 374 | |||
| 375 | _debug("wait"); | ||
| 376 | |||
| 377 | spin_lock(&parent->lock); | ||
| 378 | fscache_stat(&fscache_n_cop_grab_object); | ||
| 379 | success = false; | ||
| 380 | if (fscache_object_is_live(parent) && | ||
| 381 | object->cache->ops->grab_object(object)) { | ||
| 382 | list_add(&object->dep_link, &parent->dependents); | ||
| 383 | success = true; | ||
| 384 | } | ||
| 385 | fscache_stat_d(&fscache_n_cop_grab_object); | ||
| 386 | spin_unlock(&parent->lock); | ||
| 387 | if (!success) { | ||
| 388 | _leave(" [grab failed]"); | ||
| 389 | return transit_to(DROP_OBJECT); | ||
| 390 | } | ||
| 391 | |||
| 392 | /* fscache_acquire_non_index_cookie() uses this | ||
| 393 | * to wake the chain up */ | ||
| 394 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_NEW_CHILD); | ||
| 395 | _leave(" [wait]"); | ||
| 396 | return transit_to(WAIT_FOR_PARENT); | ||
| 397 | } | ||
| 398 | |||
| 399 | /* | ||
| 400 | * Once the parent object is ready, we should kick off our lookup op. | ||
| 401 | */ | ||
| 402 | static const struct fscache_state *fscache_parent_ready(struct fscache_object *object, | ||
| 403 | int event) | ||
| 404 | { | ||
| 405 | struct fscache_object *parent = object->parent; | ||
| 406 | |||
| 407 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
| 408 | |||
| 409 | ASSERT(parent != NULL); | ||
| 410 | |||
| 411 | spin_lock(&parent->lock); | ||
| 412 | parent->n_ops++; | ||
| 413 | parent->n_obj_ops++; | ||
| 414 | object->lookup_jif = jiffies; | ||
| 415 | spin_unlock(&parent->lock); | ||
| 416 | |||
| 460 | _leave(""); | 417 | _leave(""); |
| 418 | return transit_to(LOOK_UP_OBJECT); | ||
| 461 | } | 419 | } |
| 462 | 420 | ||
| 463 | /* | 421 | /* |
| 464 | * look an object up in the cache from which it was allocated | 422 | * look an object up in the cache from which it was allocated |
| 465 | * - we hold an "access lock" on the parent object, so the parent object cannot | 423 | * - we hold an "access lock" on the parent object, so the parent object cannot |
| 466 | * be withdrawn by either party till we've finished | 424 | * be withdrawn by either party till we've finished |
| 467 | * - an object's cookie is pinned until we clear FSCACHE_COOKIE_CREATING on the | ||
| 468 | * leaf-most cookies of the object and all its children | ||
| 469 | */ | 425 | */ |
| 470 | static void fscache_lookup_object(struct fscache_object *object) | 426 | static const struct fscache_state *fscache_look_up_object(struct fscache_object *object, |
| 427 | int event) | ||
| 471 | { | 428 | { |
| 472 | struct fscache_cookie *cookie = object->cookie; | 429 | struct fscache_cookie *cookie = object->cookie; |
| 473 | struct fscache_object *parent; | 430 | struct fscache_object *parent = object->parent; |
| 474 | int ret; | 431 | int ret; |
| 475 | 432 | ||
| 476 | _enter(""); | 433 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 434 | |||
| 435 | object->oob_table = fscache_osm_lookup_oob; | ||
| 477 | 436 | ||
| 478 | parent = object->parent; | ||
| 479 | ASSERT(parent != NULL); | 437 | ASSERT(parent != NULL); |
| 480 | ASSERTCMP(parent->n_ops, >, 0); | 438 | ASSERTCMP(parent->n_ops, >, 0); |
| 481 | ASSERTCMP(parent->n_obj_ops, >, 0); | 439 | ASSERTCMP(parent->n_obj_ops, >, 0); |
| 482 | 440 | ||
| 483 | /* make sure the parent is still available */ | 441 | /* make sure the parent is still available */ |
| 484 | ASSERTCMP(parent->state, >=, FSCACHE_OBJECT_AVAILABLE); | 442 | ASSERT(fscache_object_is_available(parent)); |
| 485 | 443 | ||
| 486 | if (parent->state >= FSCACHE_OBJECT_DYING || | 444 | if (fscache_object_is_dying(parent) || |
| 487 | test_bit(FSCACHE_IOERROR, &object->cache->flags)) { | 445 | test_bit(FSCACHE_IOERROR, &object->cache->flags) || |
| 488 | _debug("unavailable"); | 446 | !fscache_use_cookie(object)) { |
| 489 | set_bit(FSCACHE_OBJECT_EV_WITHDRAW, &object->events); | 447 | _leave(" [unavailable]"); |
| 490 | _leave(""); | 448 | return transit_to(LOOKUP_FAILURE); |
| 491 | return; | ||
| 492 | } | 449 | } |
| 493 | 450 | ||
| 494 | _debug("LOOKUP \"%s/%s\" in \"%s\"", | 451 | _debug("LOOKUP \"%s\" in \"%s\"", |
| 495 | parent->cookie->def->name, cookie->def->name, | 452 | cookie->def->name, object->cache->tag->name); |
| 496 | object->cache->tag->name); | ||
| 497 | 453 | ||
| 498 | fscache_stat(&fscache_n_object_lookups); | 454 | fscache_stat(&fscache_n_object_lookups); |
| 499 | fscache_stat(&fscache_n_cop_lookup_object); | 455 | fscache_stat(&fscache_n_cop_lookup_object); |
| 500 | ret = object->cache->ops->lookup_object(object); | 456 | ret = object->cache->ops->lookup_object(object); |
| 501 | fscache_stat_d(&fscache_n_cop_lookup_object); | 457 | fscache_stat_d(&fscache_n_cop_lookup_object); |
| 502 | 458 | ||
| 503 | if (test_bit(FSCACHE_OBJECT_EV_ERROR, &object->events)) | 459 | fscache_unuse_cookie(object); |
| 504 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); | ||
| 505 | 460 | ||
| 506 | if (ret == -ETIMEDOUT) { | 461 | if (ret == -ETIMEDOUT) { |
| 507 | /* probably stuck behind another object, so move this one to | 462 | /* probably stuck behind another object, so move this one to |
| 508 | * the back of the queue */ | 463 | * the back of the queue */ |
| 509 | fscache_stat(&fscache_n_object_lookups_timed_out); | 464 | fscache_stat(&fscache_n_object_lookups_timed_out); |
| 510 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | 465 | _leave(" [timeout]"); |
| 466 | return NO_TRANSIT; | ||
| 511 | } | 467 | } |
| 512 | 468 | ||
| 513 | _leave(""); | 469 | if (ret < 0) { |
| 470 | _leave(" [error]"); | ||
| 471 | return transit_to(LOOKUP_FAILURE); | ||
| 472 | } | ||
| 473 | |||
| 474 | _leave(" [ok]"); | ||
| 475 | return transit_to(OBJECT_AVAILABLE); | ||
| 514 | } | 476 | } |
| 515 | 477 | ||
| 516 | /** | 478 | /** |
| @@ -524,32 +486,20 @@ void fscache_object_lookup_negative(struct fscache_object *object) | |||
| 524 | { | 486 | { |
| 525 | struct fscache_cookie *cookie = object->cookie; | 487 | struct fscache_cookie *cookie = object->cookie; |
| 526 | 488 | ||
| 527 | _enter("{OBJ%x,%s}", | 489 | _enter("{OBJ%x,%s}", object->debug_id, object->state->name); |
| 528 | object->debug_id, fscache_object_states[object->state]); | ||
| 529 | 490 | ||
| 530 | spin_lock(&object->lock); | 491 | if (!test_and_set_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
| 531 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
| 532 | fscache_stat(&fscache_n_object_lookups_negative); | 492 | fscache_stat(&fscache_n_object_lookups_negative); |
| 533 | 493 | ||
| 534 | /* transit here to allow write requests to begin stacking up | 494 | /* Allow write requests to begin stacking up and read requests to begin |
| 535 | * and read requests to begin returning ENODATA */ | 495 | * returning ENODATA. |
| 536 | object->state = FSCACHE_OBJECT_CREATING; | 496 | */ |
| 537 | spin_unlock(&object->lock); | ||
| 538 | |||
| 539 | set_bit(FSCACHE_COOKIE_PENDING_FILL, &cookie->flags); | ||
| 540 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | 497 | set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); |
| 541 | 498 | ||
| 542 | _debug("wake up lookup %p", &cookie->flags); | 499 | _debug("wake up lookup %p", &cookie->flags); |
| 543 | smp_mb__before_clear_bit(); | 500 | clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); |
| 544 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
| 545 | smp_mb__after_clear_bit(); | ||
| 546 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | 501 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
| 547 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
| 548 | } else { | ||
| 549 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
| 550 | spin_unlock(&object->lock); | ||
| 551 | } | 502 | } |
| 552 | |||
| 553 | _leave(""); | 503 | _leave(""); |
| 554 | } | 504 | } |
| 555 | EXPORT_SYMBOL(fscache_object_lookup_negative); | 505 | EXPORT_SYMBOL(fscache_object_lookup_negative); |
| @@ -568,38 +518,26 @@ void fscache_obtained_object(struct fscache_object *object) | |||
| 568 | { | 518 | { |
| 569 | struct fscache_cookie *cookie = object->cookie; | 519 | struct fscache_cookie *cookie = object->cookie; |
| 570 | 520 | ||
| 571 | _enter("{OBJ%x,%s}", | 521 | _enter("{OBJ%x,%s}", object->debug_id, object->state->name); |
| 572 | object->debug_id, fscache_object_states[object->state]); | ||
| 573 | 522 | ||
| 574 | /* if we were still looking up, then we must have a positive lookup | 523 | /* if we were still looking up, then we must have a positive lookup |
| 575 | * result, in which case there may be data available */ | 524 | * result, in which case there may be data available */ |
| 576 | spin_lock(&object->lock); | 525 | if (!test_and_set_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
| 577 | if (object->state == FSCACHE_OBJECT_LOOKING_UP) { | ||
| 578 | fscache_stat(&fscache_n_object_lookups_positive); | 526 | fscache_stat(&fscache_n_object_lookups_positive); |
| 579 | 527 | ||
| 580 | clear_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | 528 | /* We do (presumably) have data */ |
| 529 | clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); | ||
| 581 | 530 | ||
| 582 | object->state = FSCACHE_OBJECT_AVAILABLE; | 531 | /* Allow write requests to begin stacking up and read requests |
| 583 | spin_unlock(&object->lock); | 532 | * to begin shovelling data. |
| 584 | 533 | */ | |
| 585 | smp_mb__before_clear_bit(); | 534 | clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); |
| 586 | clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); | ||
| 587 | smp_mb__after_clear_bit(); | ||
| 588 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); | 535 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
| 589 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
| 590 | } else { | 536 | } else { |
| 591 | ASSERTCMP(object->state, ==, FSCACHE_OBJECT_CREATING); | ||
| 592 | fscache_stat(&fscache_n_object_created); | 537 | fscache_stat(&fscache_n_object_created); |
| 593 | |||
| 594 | object->state = FSCACHE_OBJECT_AVAILABLE; | ||
| 595 | spin_unlock(&object->lock); | ||
| 596 | set_bit(FSCACHE_OBJECT_EV_REQUEUE, &object->events); | ||
| 597 | smp_wmb(); | ||
| 598 | } | 538 | } |
| 599 | 539 | ||
| 600 | if (test_and_clear_bit(FSCACHE_COOKIE_CREATING, &cookie->flags)) | 540 | set_bit(FSCACHE_OBJECT_IS_AVAILABLE, &object->flags); |
| 601 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_CREATING); | ||
| 602 | |||
| 603 | _leave(""); | 541 | _leave(""); |
| 604 | } | 542 | } |
| 605 | EXPORT_SYMBOL(fscache_obtained_object); | 543 | EXPORT_SYMBOL(fscache_obtained_object); |
| @@ -607,15 +545,14 @@ EXPORT_SYMBOL(fscache_obtained_object); | |||
| 607 | /* | 545 | /* |
| 608 | * handle an object that has just become available | 546 | * handle an object that has just become available |
| 609 | */ | 547 | */ |
| 610 | static void fscache_object_available(struct fscache_object *object) | 548 | static const struct fscache_state *fscache_object_available(struct fscache_object *object, |
| 549 | int event) | ||
| 611 | { | 550 | { |
| 612 | _enter("{OBJ%x}", object->debug_id); | 551 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 613 | 552 | ||
| 614 | spin_lock(&object->lock); | 553 | object->oob_table = fscache_osm_run_oob; |
| 615 | 554 | ||
| 616 | if (object->cookie && | 555 | spin_lock(&object->lock); |
| 617 | test_and_clear_bit(FSCACHE_COOKIE_CREATING, &object->cookie->flags)) | ||
| 618 | wake_up_bit(&object->cookie->flags, FSCACHE_COOKIE_CREATING); | ||
| 619 | 556 | ||
| 620 | fscache_done_parent_op(object); | 557 | fscache_done_parent_op(object); |
| 621 | if (object->n_in_progress == 0) { | 558 | if (object->n_in_progress == 0) { |
| @@ -631,130 +568,158 @@ static void fscache_object_available(struct fscache_object *object) | |||
| 631 | fscache_stat(&fscache_n_cop_lookup_complete); | 568 | fscache_stat(&fscache_n_cop_lookup_complete); |
| 632 | object->cache->ops->lookup_complete(object); | 569 | object->cache->ops->lookup_complete(object); |
| 633 | fscache_stat_d(&fscache_n_cop_lookup_complete); | 570 | fscache_stat_d(&fscache_n_cop_lookup_complete); |
| 634 | fscache_enqueue_dependents(object); | ||
| 635 | 571 | ||
| 636 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); | 572 | fscache_hist(fscache_obj_instantiate_histogram, object->lookup_jif); |
| 637 | fscache_stat(&fscache_n_object_avail); | 573 | fscache_stat(&fscache_n_object_avail); |
| 638 | 574 | ||
| 639 | _leave(""); | 575 | _leave(""); |
| 576 | return transit_to(JUMPSTART_DEPS); | ||
| 640 | } | 577 | } |
| 641 | 578 | ||
| 642 | /* | 579 | /* |
| 643 | * drop an object's attachments | 580 | * Wake up this object's dependent objects now that we've become available. |
| 644 | */ | 581 | */ |
| 645 | static void fscache_drop_object(struct fscache_object *object) | 582 | static const struct fscache_state *fscache_jumpstart_dependents(struct fscache_object *object, |
| 583 | int event) | ||
| 646 | { | 584 | { |
| 647 | struct fscache_object *parent = object->parent; | 585 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 648 | struct fscache_cache *cache = object->cache; | ||
| 649 | 586 | ||
| 650 | _enter("{OBJ%x,%d}", object->debug_id, object->n_children); | 587 | if (!fscache_enqueue_dependents(object, FSCACHE_OBJECT_EV_PARENT_READY)) |
| 588 | return NO_TRANSIT; /* Not finished; requeue */ | ||
| 589 | return transit_to(WAIT_FOR_CMD); | ||
| 590 | } | ||
| 651 | 591 | ||
| 652 | ASSERTCMP(object->cookie, ==, NULL); | 592 | /* |
| 653 | ASSERT(hlist_unhashed(&object->cookie_link)); | 593 | * Handle lookup or creation failute. |
| 594 | */ | ||
| 595 | static const struct fscache_state *fscache_lookup_failure(struct fscache_object *object, | ||
| 596 | int event) | ||
| 597 | { | ||
| 598 | struct fscache_cookie *cookie; | ||
| 654 | 599 | ||
| 655 | spin_lock(&cache->object_list_lock); | 600 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 656 | list_del_init(&object->cache_link); | ||
| 657 | spin_unlock(&cache->object_list_lock); | ||
| 658 | 601 | ||
| 659 | fscache_stat(&fscache_n_cop_drop_object); | 602 | object->oob_event_mask = 0; |
| 660 | cache->ops->drop_object(object); | ||
| 661 | fscache_stat_d(&fscache_n_cop_drop_object); | ||
| 662 | 603 | ||
| 663 | if (parent) { | 604 | fscache_stat(&fscache_n_cop_lookup_complete); |
| 664 | _debug("release parent OBJ%x {%d}", | 605 | object->cache->ops->lookup_complete(object); |
| 665 | parent->debug_id, parent->n_children); | 606 | fscache_stat_d(&fscache_n_cop_lookup_complete); |
| 666 | 607 | ||
| 667 | spin_lock(&parent->lock); | 608 | cookie = object->cookie; |
| 668 | parent->n_children--; | 609 | set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); |
| 669 | if (parent->n_children == 0) | 610 | if (test_and_clear_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags)) |
| 670 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); | 611 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_LOOKING_UP); |
| 671 | spin_unlock(&parent->lock); | 612 | |
| 672 | object->parent = NULL; | 613 | fscache_done_parent_op(object); |
| 614 | return transit_to(KILL_OBJECT); | ||
| 615 | } | ||
| 616 | |||
| 617 | /* | ||
| 618 | * Wait for completion of all active operations on this object and the death of | ||
| 619 | * all child objects of this object. | ||
| 620 | */ | ||
| 621 | static const struct fscache_state *fscache_kill_object(struct fscache_object *object, | ||
| 622 | int event) | ||
| 623 | { | ||
| 624 | _enter("{OBJ%x,%d,%d},%d", | ||
| 625 | object->debug_id, object->n_ops, object->n_children, event); | ||
| 626 | |||
| 627 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
| 628 | object->oob_event_mask = 0; | ||
| 629 | |||
| 630 | if (list_empty(&object->dependents) && | ||
| 631 | object->n_ops == 0 && | ||
| 632 | object->n_children == 0) | ||
| 633 | return transit_to(DROP_OBJECT); | ||
| 634 | |||
| 635 | if (object->n_in_progress == 0) { | ||
| 636 | spin_lock(&object->lock); | ||
| 637 | if (object->n_ops > 0 && object->n_in_progress == 0) | ||
| 638 | fscache_start_operations(object); | ||
| 639 | spin_unlock(&object->lock); | ||
| 673 | } | 640 | } |
| 674 | 641 | ||
| 675 | /* this just shifts the object release to the work processor */ | 642 | if (!list_empty(&object->dependents)) |
| 676 | fscache_put_object(object); | 643 | return transit_to(KILL_DEPENDENTS); |
| 677 | 644 | ||
| 678 | _leave(""); | 645 | return transit_to(WAIT_FOR_CLEARANCE); |
| 679 | } | 646 | } |
| 680 | 647 | ||
| 681 | /* | 648 | /* |
| 682 | * release or recycle an object that the netfs has discarded | 649 | * Kill dependent objects. |
| 683 | */ | 650 | */ |
| 684 | static void fscache_release_object(struct fscache_object *object) | 651 | static const struct fscache_state *fscache_kill_dependents(struct fscache_object *object, |
| 652 | int event) | ||
| 685 | { | 653 | { |
| 686 | _enter(""); | 654 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 687 | 655 | ||
| 688 | fscache_drop_object(object); | 656 | if (!fscache_enqueue_dependents(object, FSCACHE_OBJECT_EV_KILL)) |
| 657 | return NO_TRANSIT; /* Not finished */ | ||
| 658 | return transit_to(WAIT_FOR_CLEARANCE); | ||
| 689 | } | 659 | } |
| 690 | 660 | ||
| 691 | /* | 661 | /* |
| 692 | * withdraw an object from active service | 662 | * Drop an object's attachments |
| 693 | */ | 663 | */ |
| 694 | static void fscache_withdraw_object(struct fscache_object *object) | 664 | static const struct fscache_state *fscache_drop_object(struct fscache_object *object, |
| 665 | int event) | ||
| 695 | { | 666 | { |
| 696 | struct fscache_cookie *cookie; | 667 | struct fscache_object *parent = object->parent; |
| 697 | bool detached; | 668 | struct fscache_cookie *cookie = object->cookie; |
| 669 | struct fscache_cache *cache = object->cache; | ||
| 670 | bool awaken = false; | ||
| 698 | 671 | ||
| 699 | _enter(""); | 672 | _enter("{OBJ%x,%d},%d", object->debug_id, object->n_children, event); |
| 700 | 673 | ||
| 701 | spin_lock(&object->lock); | 674 | ASSERT(cookie != NULL); |
| 702 | cookie = object->cookie; | 675 | ASSERT(!hlist_unhashed(&object->cookie_link)); |
| 703 | if (cookie) { | ||
| 704 | /* need to get the cookie lock before the object lock, starting | ||
| 705 | * from the object pointer */ | ||
| 706 | atomic_inc(&cookie->usage); | ||
| 707 | spin_unlock(&object->lock); | ||
| 708 | 676 | ||
| 709 | detached = false; | 677 | /* Make sure the cookie no longer points here and that the netfs isn't |
| 710 | spin_lock(&cookie->lock); | 678 | * waiting for us. |
| 711 | spin_lock(&object->lock); | 679 | */ |
| 680 | spin_lock(&cookie->lock); | ||
| 681 | hlist_del_init(&object->cookie_link); | ||
| 682 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) | ||
| 683 | awaken = true; | ||
| 684 | spin_unlock(&cookie->lock); | ||
| 712 | 685 | ||
| 713 | if (object->cookie == cookie) { | 686 | if (awaken) |
| 714 | hlist_del_init(&object->cookie_link); | 687 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); |
| 715 | object->cookie = NULL; | ||
| 716 | fscache_invalidation_complete(cookie); | ||
| 717 | detached = true; | ||
| 718 | } | ||
| 719 | spin_unlock(&cookie->lock); | ||
| 720 | fscache_cookie_put(cookie); | ||
| 721 | if (detached) | ||
| 722 | fscache_cookie_put(cookie); | ||
| 723 | } | ||
| 724 | 688 | ||
| 689 | /* Prevent a race with our last child, which has to signal EV_CLEARED | ||
| 690 | * before dropping our spinlock. | ||
| 691 | */ | ||
| 692 | spin_lock(&object->lock); | ||
| 725 | spin_unlock(&object->lock); | 693 | spin_unlock(&object->lock); |
| 726 | 694 | ||
| 727 | fscache_drop_object(object); | 695 | /* Discard from the cache's collection of objects */ |
| 728 | } | 696 | spin_lock(&cache->object_list_lock); |
| 697 | list_del_init(&object->cache_link); | ||
| 698 | spin_unlock(&cache->object_list_lock); | ||
| 729 | 699 | ||
| 730 | /* | 700 | fscache_stat(&fscache_n_cop_drop_object); |
| 731 | * withdraw an object from active service at the behest of the cache | 701 | cache->ops->drop_object(object); |
| 732 | * - need break the links to a cached object cookie | 702 | fscache_stat_d(&fscache_n_cop_drop_object); |
| 733 | * - called under two situations: | ||
| 734 | * (1) recycler decides to reclaim an in-use object | ||
| 735 | * (2) a cache is unmounted | ||
| 736 | * - have to take care as the cookie can be being relinquished by the netfs | ||
| 737 | * simultaneously | ||
| 738 | * - the object is pinned by the caller holding a refcount on it | ||
| 739 | */ | ||
| 740 | void fscache_withdrawing_object(struct fscache_cache *cache, | ||
| 741 | struct fscache_object *object) | ||
| 742 | { | ||
| 743 | bool enqueue = false; | ||
| 744 | 703 | ||
| 745 | _enter(",OBJ%x", object->debug_id); | 704 | /* The parent object wants to know when all it dependents have gone */ |
| 705 | if (parent) { | ||
| 706 | _debug("release parent OBJ%x {%d}", | ||
| 707 | parent->debug_id, parent->n_children); | ||
| 746 | 708 | ||
| 747 | spin_lock(&object->lock); | 709 | spin_lock(&parent->lock); |
| 748 | if (object->state < FSCACHE_OBJECT_WITHDRAWING) { | 710 | parent->n_children--; |
| 749 | object->state = FSCACHE_OBJECT_WITHDRAWING; | 711 | if (parent->n_children == 0) |
| 750 | enqueue = true; | 712 | fscache_raise_event(parent, FSCACHE_OBJECT_EV_CLEARED); |
| 713 | spin_unlock(&parent->lock); | ||
| 714 | object->parent = NULL; | ||
| 751 | } | 715 | } |
| 752 | spin_unlock(&object->lock); | ||
| 753 | 716 | ||
| 754 | if (enqueue) | 717 | /* this just shifts the object release to the work processor */ |
| 755 | fscache_enqueue_object(object); | 718 | fscache_put_object(object); |
| 719 | fscache_stat(&fscache_n_object_dead); | ||
| 756 | 720 | ||
| 757 | _leave(""); | 721 | _leave(""); |
| 722 | return transit_to(OBJECT_DEAD); | ||
| 758 | } | 723 | } |
| 759 | 724 | ||
| 760 | /* | 725 | /* |
| @@ -771,7 +736,7 @@ static int fscache_get_object(struct fscache_object *object) | |||
| 771 | } | 736 | } |
| 772 | 737 | ||
| 773 | /* | 738 | /* |
| 774 | * discard a ref on a work item | 739 | * Discard a ref on an object |
| 775 | */ | 740 | */ |
| 776 | static void fscache_put_object(struct fscache_object *object) | 741 | static void fscache_put_object(struct fscache_object *object) |
| 777 | { | 742 | { |
| @@ -780,6 +745,22 @@ static void fscache_put_object(struct fscache_object *object) | |||
| 780 | fscache_stat_d(&fscache_n_cop_put_object); | 745 | fscache_stat_d(&fscache_n_cop_put_object); |
| 781 | } | 746 | } |
| 782 | 747 | ||
| 748 | /** | ||
| 749 | * fscache_object_destroy - Note that a cache object is about to be destroyed | ||
| 750 | * @object: The object to be destroyed | ||
| 751 | * | ||
| 752 | * Note the imminent destruction and deallocation of a cache object record. | ||
| 753 | */ | ||
| 754 | void fscache_object_destroy(struct fscache_object *object) | ||
| 755 | { | ||
| 756 | fscache_objlist_remove(object); | ||
| 757 | |||
| 758 | /* We can get rid of the cookie now */ | ||
| 759 | fscache_cookie_put(object->cookie); | ||
| 760 | object->cookie = NULL; | ||
| 761 | } | ||
| 762 | EXPORT_SYMBOL(fscache_object_destroy); | ||
| 763 | |||
| 783 | /* | 764 | /* |
| 784 | * enqueue an object for metadata-type processing | 765 | * enqueue an object for metadata-type processing |
| 785 | */ | 766 | */ |
| @@ -803,7 +784,7 @@ void fscache_enqueue_object(struct fscache_object *object) | |||
| 803 | 784 | ||
| 804 | /** | 785 | /** |
| 805 | * fscache_object_sleep_till_congested - Sleep until object wq is congested | 786 | * fscache_object_sleep_till_congested - Sleep until object wq is congested |
| 806 | * @timoutp: Scheduler sleep timeout | 787 | * @timeoutp: Scheduler sleep timeout |
| 807 | * | 788 | * |
| 808 | * Allow an object handler to sleep until the object workqueue is congested. | 789 | * Allow an object handler to sleep until the object workqueue is congested. |
| 809 | * | 790 | * |
| @@ -831,18 +812,21 @@ bool fscache_object_sleep_till_congested(signed long *timeoutp) | |||
| 831 | EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); | 812 | EXPORT_SYMBOL_GPL(fscache_object_sleep_till_congested); |
| 832 | 813 | ||
| 833 | /* | 814 | /* |
| 834 | * enqueue the dependents of an object for metadata-type processing | 815 | * Enqueue the dependents of an object for metadata-type processing. |
| 835 | * - the caller must hold the object's lock | 816 | * |
| 836 | * - this may cause an already locked object to wind up being processed again | 817 | * If we don't manage to finish the list before the scheduler wants to run |
| 818 | * again then return false immediately. We return true if the list was | ||
| 819 | * cleared. | ||
| 837 | */ | 820 | */ |
| 838 | static void fscache_enqueue_dependents(struct fscache_object *object) | 821 | static bool fscache_enqueue_dependents(struct fscache_object *object, int event) |
| 839 | { | 822 | { |
| 840 | struct fscache_object *dep; | 823 | struct fscache_object *dep; |
| 824 | bool ret = true; | ||
| 841 | 825 | ||
| 842 | _enter("{OBJ%x}", object->debug_id); | 826 | _enter("{OBJ%x}", object->debug_id); |
| 843 | 827 | ||
| 844 | if (list_empty(&object->dependents)) | 828 | if (list_empty(&object->dependents)) |
| 845 | return; | 829 | return true; |
| 846 | 830 | ||
| 847 | spin_lock(&object->lock); | 831 | spin_lock(&object->lock); |
| 848 | 832 | ||
| @@ -851,23 +835,23 @@ static void fscache_enqueue_dependents(struct fscache_object *object) | |||
| 851 | struct fscache_object, dep_link); | 835 | struct fscache_object, dep_link); |
| 852 | list_del_init(&dep->dep_link); | 836 | list_del_init(&dep->dep_link); |
| 853 | 837 | ||
| 854 | 838 | fscache_raise_event(dep, event); | |
| 855 | /* sort onto appropriate lists */ | ||
| 856 | fscache_enqueue_object(dep); | ||
| 857 | fscache_put_object(dep); | 839 | fscache_put_object(dep); |
| 858 | 840 | ||
| 859 | if (!list_empty(&object->dependents)) | 841 | if (!list_empty(&object->dependents) && need_resched()) { |
| 860 | cond_resched_lock(&object->lock); | 842 | ret = false; |
| 843 | break; | ||
| 844 | } | ||
| 861 | } | 845 | } |
| 862 | 846 | ||
| 863 | spin_unlock(&object->lock); | 847 | spin_unlock(&object->lock); |
| 848 | return ret; | ||
| 864 | } | 849 | } |
| 865 | 850 | ||
| 866 | /* | 851 | /* |
| 867 | * remove an object from whatever queue it's waiting on | 852 | * remove an object from whatever queue it's waiting on |
| 868 | * - the caller must hold object->lock | ||
| 869 | */ | 853 | */ |
| 870 | void fscache_dequeue_object(struct fscache_object *object) | 854 | static void fscache_dequeue_object(struct fscache_object *object) |
| 871 | { | 855 | { |
| 872 | _enter("{OBJ%x}", object->debug_id); | 856 | _enter("{OBJ%x}", object->debug_id); |
| 873 | 857 | ||
| @@ -886,7 +870,10 @@ void fscache_dequeue_object(struct fscache_object *object) | |||
| 886 | * @data: The auxiliary data for the object | 870 | * @data: The auxiliary data for the object |
| 887 | * @datalen: The size of the auxiliary data | 871 | * @datalen: The size of the auxiliary data |
| 888 | * | 872 | * |
| 889 | * This function consults the netfs about the coherency state of an object | 873 | * This function consults the netfs about the coherency state of an object. |
| 874 | * The caller must be holding a ref on cookie->n_active (held by | ||
| 875 | * fscache_look_up_object() on behalf of the cache backend during object lookup | ||
| 876 | * and creation). | ||
| 890 | */ | 877 | */ |
| 891 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, | 878 | enum fscache_checkaux fscache_check_aux(struct fscache_object *object, |
| 892 | const void *data, uint16_t datalen) | 879 | const void *data, uint16_t datalen) |
| @@ -927,12 +914,23 @@ EXPORT_SYMBOL(fscache_check_aux); | |||
| 927 | /* | 914 | /* |
| 928 | * Asynchronously invalidate an object. | 915 | * Asynchronously invalidate an object. |
| 929 | */ | 916 | */ |
| 930 | static void fscache_invalidate_object(struct fscache_object *object) | 917 | static const struct fscache_state *_fscache_invalidate_object(struct fscache_object *object, |
| 918 | int event) | ||
| 931 | { | 919 | { |
| 932 | struct fscache_operation *op; | 920 | struct fscache_operation *op; |
| 933 | struct fscache_cookie *cookie = object->cookie; | 921 | struct fscache_cookie *cookie = object->cookie; |
| 934 | 922 | ||
| 935 | _enter("{OBJ%x}", object->debug_id); | 923 | _enter("{OBJ%x},%d", object->debug_id, event); |
| 924 | |||
| 925 | /* We're going to need the cookie. If the cookie is not available then | ||
| 926 | * retire the object instead. | ||
| 927 | */ | ||
| 928 | if (!fscache_use_cookie(object)) { | ||
| 929 | ASSERT(object->cookie->stores.rnode == NULL); | ||
| 930 | set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); | ||
| 931 | _leave(" [no cookie]"); | ||
| 932 | return transit_to(KILL_OBJECT); | ||
| 933 | } | ||
| 936 | 934 | ||
| 937 | /* Reject any new read/write ops and abort any that are pending. */ | 935 | /* Reject any new read/write ops and abort any that are pending. */ |
| 938 | fscache_invalidate_writes(cookie); | 936 | fscache_invalidate_writes(cookie); |
| @@ -941,14 +939,13 @@ static void fscache_invalidate_object(struct fscache_object *object) | |||
| 941 | 939 | ||
| 942 | /* Now we have to wait for in-progress reads and writes */ | 940 | /* Now we have to wait for in-progress reads and writes */ |
| 943 | op = kzalloc(sizeof(*op), GFP_KERNEL); | 941 | op = kzalloc(sizeof(*op), GFP_KERNEL); |
| 944 | if (!op) { | 942 | if (!op) |
| 945 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | 943 | goto nomem; |
| 946 | _leave(" [ENOMEM]"); | ||
| 947 | return; | ||
| 948 | } | ||
| 949 | 944 | ||
| 950 | fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); | 945 | fscache_operation_init(op, object->cache->ops->invalidate_object, NULL); |
| 951 | op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); | 946 | op->flags = FSCACHE_OP_ASYNC | |
| 947 | (1 << FSCACHE_OP_EXCLUSIVE) | | ||
| 948 | (1 << FSCACHE_OP_UNUSE_COOKIE); | ||
| 952 | 949 | ||
| 953 | spin_lock(&cookie->lock); | 950 | spin_lock(&cookie->lock); |
| 954 | if (fscache_submit_exclusive_op(object, op) < 0) | 951 | if (fscache_submit_exclusive_op(object, op) < 0) |
| @@ -965,13 +962,50 @@ static void fscache_invalidate_object(struct fscache_object *object) | |||
| 965 | /* We can allow read and write requests to come in once again. They'll | 962 | /* We can allow read and write requests to come in once again. They'll |
| 966 | * queue up behind our exclusive invalidation operation. | 963 | * queue up behind our exclusive invalidation operation. |
| 967 | */ | 964 | */ |
| 968 | fscache_invalidation_complete(cookie); | 965 | if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) |
| 969 | _leave(""); | 966 | wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); |
| 970 | return; | 967 | _leave(" [ok]"); |
| 968 | return transit_to(UPDATE_OBJECT); | ||
| 969 | |||
| 970 | nomem: | ||
| 971 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
| 972 | fscache_unuse_cookie(object); | ||
| 973 | _leave(" [ENOMEM]"); | ||
| 974 | return transit_to(KILL_OBJECT); | ||
| 971 | 975 | ||
| 972 | submit_op_failed: | 976 | submit_op_failed: |
| 977 | clear_bit(FSCACHE_OBJECT_IS_LIVE, &object->flags); | ||
| 973 | spin_unlock(&cookie->lock); | 978 | spin_unlock(&cookie->lock); |
| 974 | kfree(op); | 979 | kfree(op); |
| 975 | fscache_raise_event(object, FSCACHE_OBJECT_EV_ERROR); | ||
| 976 | _leave(" [EIO]"); | 980 | _leave(" [EIO]"); |
| 981 | return transit_to(KILL_OBJECT); | ||
| 982 | } | ||
| 983 | |||
| 984 | static const struct fscache_state *fscache_invalidate_object(struct fscache_object *object, | ||
| 985 | int event) | ||
| 986 | { | ||
| 987 | const struct fscache_state *s; | ||
| 988 | |||
| 989 | fscache_stat(&fscache_n_invalidates_run); | ||
| 990 | fscache_stat(&fscache_n_cop_invalidate_object); | ||
| 991 | s = _fscache_invalidate_object(object, event); | ||
| 992 | fscache_stat_d(&fscache_n_cop_invalidate_object); | ||
| 993 | return s; | ||
| 994 | } | ||
| 995 | |||
| 996 | /* | ||
| 997 | * Asynchronously update an object. | ||
| 998 | */ | ||
| 999 | static const struct fscache_state *fscache_update_object(struct fscache_object *object, | ||
| 1000 | int event) | ||
| 1001 | { | ||
| 1002 | _enter("{OBJ%x},%d", object->debug_id, event); | ||
| 1003 | |||
| 1004 | fscache_stat(&fscache_n_updates_run); | ||
| 1005 | fscache_stat(&fscache_n_cop_update_object); | ||
| 1006 | object->cache->ops->update_object(object); | ||
| 1007 | fscache_stat_d(&fscache_n_cop_update_object); | ||
| 1008 | |||
| 1009 | _leave(""); | ||
| 1010 | return transit_to(WAIT_FOR_CMD); | ||
| 977 | } | 1011 | } |
diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c index 762a9ec4ffa4..318071aca217 100644 --- a/fs/fscache/operation.c +++ b/fs/fscache/operation.c | |||
| @@ -35,7 +35,7 @@ void fscache_enqueue_operation(struct fscache_operation *op) | |||
| 35 | 35 | ||
| 36 | ASSERT(list_empty(&op->pend_link)); | 36 | ASSERT(list_empty(&op->pend_link)); |
| 37 | ASSERT(op->processor != NULL); | 37 | ASSERT(op->processor != NULL); |
| 38 | ASSERTCMP(op->object->state, >=, FSCACHE_OBJECT_AVAILABLE); | 38 | ASSERT(fscache_object_is_available(op->object)); |
| 39 | ASSERTCMP(atomic_read(&op->usage), >, 0); | 39 | ASSERTCMP(atomic_read(&op->usage), >, 0); |
| 40 | ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); | 40 | ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); |
| 41 | 41 | ||
| @@ -119,7 +119,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
| 119 | /* need to issue a new write op after this */ | 119 | /* need to issue a new write op after this */ |
| 120 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); | 120 | clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags); |
| 121 | ret = 0; | 121 | ret = 0; |
| 122 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | 122 | } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
| 123 | op->object = object; | 123 | op->object = object; |
| 124 | object->n_ops++; | 124 | object->n_ops++; |
| 125 | object->n_exclusive++; /* reads and writes must wait */ | 125 | object->n_exclusive++; /* reads and writes must wait */ |
| @@ -144,7 +144,7 @@ int fscache_submit_exclusive_op(struct fscache_object *object, | |||
| 144 | */ | 144 | */ |
| 145 | static void fscache_report_unexpected_submission(struct fscache_object *object, | 145 | static void fscache_report_unexpected_submission(struct fscache_object *object, |
| 146 | struct fscache_operation *op, | 146 | struct fscache_operation *op, |
| 147 | unsigned long ostate) | 147 | const struct fscache_state *ostate) |
| 148 | { | 148 | { |
| 149 | static bool once_only; | 149 | static bool once_only; |
| 150 | struct fscache_operation *p; | 150 | struct fscache_operation *p; |
| @@ -155,11 +155,8 @@ static void fscache_report_unexpected_submission(struct fscache_object *object, | |||
| 155 | once_only = true; | 155 | once_only = true; |
| 156 | 156 | ||
| 157 | kdebug("unexpected submission OP%x [OBJ%x %s]", | 157 | kdebug("unexpected submission OP%x [OBJ%x %s]", |
| 158 | op->debug_id, object->debug_id, | 158 | op->debug_id, object->debug_id, object->state->name); |
| 159 | fscache_object_states[object->state]); | 159 | kdebug("objstate=%s [%s]", object->state->name, ostate->name); |
| 160 | kdebug("objstate=%s [%s]", | ||
| 161 | fscache_object_states[object->state], | ||
| 162 | fscache_object_states[ostate]); | ||
| 163 | kdebug("objflags=%lx", object->flags); | 160 | kdebug("objflags=%lx", object->flags); |
| 164 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); | 161 | kdebug("objevent=%lx [%lx]", object->events, object->event_mask); |
| 165 | kdebug("ops=%u inp=%u exc=%u", | 162 | kdebug("ops=%u inp=%u exc=%u", |
| @@ -190,7 +187,7 @@ static void fscache_report_unexpected_submission(struct fscache_object *object, | |||
| 190 | int fscache_submit_op(struct fscache_object *object, | 187 | int fscache_submit_op(struct fscache_object *object, |
| 191 | struct fscache_operation *op) | 188 | struct fscache_operation *op) |
| 192 | { | 189 | { |
| 193 | unsigned long ostate; | 190 | const struct fscache_state *ostate; |
| 194 | int ret; | 191 | int ret; |
| 195 | 192 | ||
| 196 | _enter("{OBJ%x OP%x},{%u}", | 193 | _enter("{OBJ%x OP%x},{%u}", |
| @@ -226,16 +223,14 @@ int fscache_submit_op(struct fscache_object *object, | |||
| 226 | fscache_run_op(object, op); | 223 | fscache_run_op(object, op); |
| 227 | } | 224 | } |
| 228 | ret = 0; | 225 | ret = 0; |
| 229 | } else if (object->state == FSCACHE_OBJECT_CREATING) { | 226 | } else if (test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)) { |
| 230 | op->object = object; | 227 | op->object = object; |
| 231 | object->n_ops++; | 228 | object->n_ops++; |
| 232 | atomic_inc(&op->usage); | 229 | atomic_inc(&op->usage); |
| 233 | list_add_tail(&op->pend_link, &object->pending_ops); | 230 | list_add_tail(&op->pend_link, &object->pending_ops); |
| 234 | fscache_stat(&fscache_n_op_pend); | 231 | fscache_stat(&fscache_n_op_pend); |
| 235 | ret = 0; | 232 | ret = 0; |
| 236 | } else if (object->state == FSCACHE_OBJECT_DYING || | 233 | } else if (fscache_object_is_dying(object)) { |
| 237 | object->state == FSCACHE_OBJECT_LC_DYING || | ||
| 238 | object->state == FSCACHE_OBJECT_WITHDRAWING) { | ||
| 239 | fscache_stat(&fscache_n_op_rejected); | 234 | fscache_stat(&fscache_n_op_rejected); |
| 240 | op->state = FSCACHE_OP_ST_CANCELLED; | 235 | op->state = FSCACHE_OP_ST_CANCELLED; |
| 241 | ret = -ENOBUFS; | 236 | ret = -ENOBUFS; |
| @@ -265,8 +260,8 @@ void fscache_abort_object(struct fscache_object *object) | |||
| 265 | } | 260 | } |
| 266 | 261 | ||
| 267 | /* | 262 | /* |
| 268 | * jump start the operation processing on an object | 263 | * Jump start the operation processing on an object. The caller must hold |
| 269 | * - caller must hold object->lock | 264 | * object->lock. |
| 270 | */ | 265 | */ |
| 271 | void fscache_start_operations(struct fscache_object *object) | 266 | void fscache_start_operations(struct fscache_object *object) |
| 272 | { | 267 | { |
| @@ -428,14 +423,10 @@ void fscache_put_operation(struct fscache_operation *op) | |||
| 428 | 423 | ||
| 429 | object = op->object; | 424 | object = op->object; |
| 430 | 425 | ||
| 431 | if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) { | 426 | if (test_bit(FSCACHE_OP_DEC_READ_CNT, &op->flags)) |
| 432 | if (atomic_dec_and_test(&object->n_reads)) { | 427 | atomic_dec(&object->n_reads); |
| 433 | clear_bit(FSCACHE_COOKIE_WAITING_ON_READS, | 428 | if (test_bit(FSCACHE_OP_UNUSE_COOKIE, &op->flags)) |
| 434 | &object->cookie->flags); | 429 | fscache_unuse_cookie(object); |
| 435 | wake_up_bit(&object->cookie->flags, | ||
| 436 | FSCACHE_COOKIE_WAITING_ON_READS); | ||
| 437 | } | ||
| 438 | } | ||
| 439 | 430 | ||
| 440 | /* now... we may get called with the object spinlock held, so we | 431 | /* now... we may get called with the object spinlock held, so we |
| 441 | * complete the cleanup here only if we can immediately acquire the | 432 | * complete the cleanup here only if we can immediately acquire the |
diff --git a/fs/fscache/page.c b/fs/fscache/page.c index ff000e52072d..d479ab3c63e4 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c | |||
| @@ -109,7 +109,7 @@ page_busy: | |||
| 109 | * allocator as the work threads writing to the cache may all end up | 109 | * allocator as the work threads writing to the cache may all end up |
| 110 | * sleeping on memory allocation, so we may need to impose a timeout | 110 | * sleeping on memory allocation, so we may need to impose a timeout |
| 111 | * too. */ | 111 | * too. */ |
| 112 | if (!(gfp & __GFP_WAIT)) { | 112 | if (!(gfp & __GFP_WAIT) || !(gfp & __GFP_FS)) { |
| 113 | fscache_stat(&fscache_n_store_vmscan_busy); | 113 | fscache_stat(&fscache_n_store_vmscan_busy); |
| 114 | return false; | 114 | return false; |
| 115 | } | 115 | } |
| @@ -163,10 +163,12 @@ static void fscache_attr_changed_op(struct fscache_operation *op) | |||
| 163 | 163 | ||
| 164 | fscache_stat(&fscache_n_attr_changed_calls); | 164 | fscache_stat(&fscache_n_attr_changed_calls); |
| 165 | 165 | ||
| 166 | if (fscache_object_is_active(object)) { | 166 | if (fscache_object_is_active(object) && |
| 167 | fscache_use_cookie(object)) { | ||
| 167 | fscache_stat(&fscache_n_cop_attr_changed); | 168 | fscache_stat(&fscache_n_cop_attr_changed); |
| 168 | ret = object->cache->ops->attr_changed(object); | 169 | ret = object->cache->ops->attr_changed(object); |
| 169 | fscache_stat_d(&fscache_n_cop_attr_changed); | 170 | fscache_stat_d(&fscache_n_cop_attr_changed); |
| 171 | fscache_unuse_cookie(object); | ||
| 170 | if (ret < 0) | 172 | if (ret < 0) |
| 171 | fscache_abort_object(object); | 173 | fscache_abort_object(object); |
| 172 | } | 174 | } |
| @@ -233,7 +235,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) | |||
| 233 | 235 | ||
| 234 | _enter("{OP%x}", op->op.debug_id); | 236 | _enter("{OP%x}", op->op.debug_id); |
| 235 | 237 | ||
| 236 | ASSERTCMP(op->n_pages, ==, 0); | 238 | ASSERTCMP(atomic_read(&op->n_pages), ==, 0); |
| 237 | 239 | ||
| 238 | fscache_hist(fscache_retrieval_histogram, op->start_time); | 240 | fscache_hist(fscache_retrieval_histogram, op->start_time); |
| 239 | if (op->context) | 241 | if (op->context) |
| @@ -246,6 +248,7 @@ static void fscache_release_retrieval_op(struct fscache_operation *_op) | |||
| 246 | * allocate a retrieval op | 248 | * allocate a retrieval op |
| 247 | */ | 249 | */ |
| 248 | static struct fscache_retrieval *fscache_alloc_retrieval( | 250 | static struct fscache_retrieval *fscache_alloc_retrieval( |
| 251 | struct fscache_cookie *cookie, | ||
| 249 | struct address_space *mapping, | 252 | struct address_space *mapping, |
| 250 | fscache_rw_complete_t end_io_func, | 253 | fscache_rw_complete_t end_io_func, |
| 251 | void *context) | 254 | void *context) |
| @@ -260,7 +263,10 @@ static struct fscache_retrieval *fscache_alloc_retrieval( | |||
| 260 | } | 263 | } |
| 261 | 264 | ||
| 262 | fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); | 265 | fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); |
| 263 | op->op.flags = FSCACHE_OP_MYTHREAD | (1 << FSCACHE_OP_WAITING); | 266 | atomic_inc(&cookie->n_active); |
| 267 | op->op.flags = FSCACHE_OP_MYTHREAD | | ||
| 268 | (1UL << FSCACHE_OP_WAITING) | | ||
| 269 | (1UL << FSCACHE_OP_UNUSE_COOKIE); | ||
| 264 | op->mapping = mapping; | 270 | op->mapping = mapping; |
| 265 | op->end_io_func = end_io_func; | 271 | op->end_io_func = end_io_func; |
| 266 | op->context = context; | 272 | op->context = context; |
| @@ -310,7 +316,7 @@ static void fscache_do_cancel_retrieval(struct fscache_operation *_op) | |||
| 310 | struct fscache_retrieval *op = | 316 | struct fscache_retrieval *op = |
| 311 | container_of(_op, struct fscache_retrieval, op); | 317 | container_of(_op, struct fscache_retrieval, op); |
| 312 | 318 | ||
| 313 | op->n_pages = 0; | 319 | atomic_set(&op->n_pages, 0); |
| 314 | } | 320 | } |
| 315 | 321 | ||
| 316 | /* | 322 | /* |
| @@ -394,12 +400,13 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
| 394 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 400 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
| 395 | return -ERESTARTSYS; | 401 | return -ERESTARTSYS; |
| 396 | 402 | ||
| 397 | op = fscache_alloc_retrieval(page->mapping, end_io_func, context); | 403 | op = fscache_alloc_retrieval(cookie, page->mapping, |
| 404 | end_io_func,context); | ||
| 398 | if (!op) { | 405 | if (!op) { |
| 399 | _leave(" = -ENOMEM"); | 406 | _leave(" = -ENOMEM"); |
| 400 | return -ENOMEM; | 407 | return -ENOMEM; |
| 401 | } | 408 | } |
| 402 | op->n_pages = 1; | 409 | atomic_set(&op->n_pages, 1); |
| 403 | 410 | ||
| 404 | spin_lock(&cookie->lock); | 411 | spin_lock(&cookie->lock); |
| 405 | 412 | ||
| @@ -408,7 +415,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, | |||
| 408 | object = hlist_entry(cookie->backing_objects.first, | 415 | object = hlist_entry(cookie->backing_objects.first, |
| 409 | struct fscache_object, cookie_link); | 416 | struct fscache_object, cookie_link); |
| 410 | 417 | ||
| 411 | ASSERTCMP(object->state, >, FSCACHE_OBJECT_LOOKING_UP); | 418 | ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)); |
| 412 | 419 | ||
| 413 | atomic_inc(&object->n_reads); | 420 | atomic_inc(&object->n_reads); |
| 414 | __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); | 421 | __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); |
| @@ -465,6 +472,7 @@ nobufs_unlock_dec: | |||
| 465 | atomic_dec(&object->n_reads); | 472 | atomic_dec(&object->n_reads); |
| 466 | nobufs_unlock: | 473 | nobufs_unlock: |
| 467 | spin_unlock(&cookie->lock); | 474 | spin_unlock(&cookie->lock); |
| 475 | atomic_dec(&cookie->n_active); | ||
| 468 | kfree(op); | 476 | kfree(op); |
| 469 | nobufs: | 477 | nobufs: |
| 470 | fscache_stat(&fscache_n_retrievals_nobufs); | 478 | fscache_stat(&fscache_n_retrievals_nobufs); |
| @@ -522,10 +530,10 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, | |||
| 522 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 530 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
| 523 | return -ERESTARTSYS; | 531 | return -ERESTARTSYS; |
| 524 | 532 | ||
| 525 | op = fscache_alloc_retrieval(mapping, end_io_func, context); | 533 | op = fscache_alloc_retrieval(cookie, mapping, end_io_func, context); |
| 526 | if (!op) | 534 | if (!op) |
| 527 | return -ENOMEM; | 535 | return -ENOMEM; |
| 528 | op->n_pages = *nr_pages; | 536 | atomic_set(&op->n_pages, *nr_pages); |
| 529 | 537 | ||
| 530 | spin_lock(&cookie->lock); | 538 | spin_lock(&cookie->lock); |
| 531 | 539 | ||
| @@ -589,6 +597,7 @@ nobufs_unlock_dec: | |||
| 589 | atomic_dec(&object->n_reads); | 597 | atomic_dec(&object->n_reads); |
| 590 | nobufs_unlock: | 598 | nobufs_unlock: |
| 591 | spin_unlock(&cookie->lock); | 599 | spin_unlock(&cookie->lock); |
| 600 | atomic_dec(&cookie->n_active); | ||
| 592 | kfree(op); | 601 | kfree(op); |
| 593 | nobufs: | 602 | nobufs: |
| 594 | fscache_stat(&fscache_n_retrievals_nobufs); | 603 | fscache_stat(&fscache_n_retrievals_nobufs); |
| @@ -631,10 +640,10 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, | |||
| 631 | if (fscache_wait_for_deferred_lookup(cookie) < 0) | 640 | if (fscache_wait_for_deferred_lookup(cookie) < 0) |
| 632 | return -ERESTARTSYS; | 641 | return -ERESTARTSYS; |
| 633 | 642 | ||
| 634 | op = fscache_alloc_retrieval(page->mapping, NULL, NULL); | 643 | op = fscache_alloc_retrieval(cookie, page->mapping, NULL, NULL); |
| 635 | if (!op) | 644 | if (!op) |
| 636 | return -ENOMEM; | 645 | return -ENOMEM; |
| 637 | op->n_pages = 1; | 646 | atomic_set(&op->n_pages, 1); |
| 638 | 647 | ||
| 639 | spin_lock(&cookie->lock); | 648 | spin_lock(&cookie->lock); |
| 640 | 649 | ||
| @@ -675,6 +684,7 @@ error: | |||
| 675 | 684 | ||
| 676 | nobufs_unlock: | 685 | nobufs_unlock: |
| 677 | spin_unlock(&cookie->lock); | 686 | spin_unlock(&cookie->lock); |
| 687 | atomic_dec(&cookie->n_active); | ||
| 678 | kfree(op); | 688 | kfree(op); |
| 679 | nobufs: | 689 | nobufs: |
| 680 | fscache_stat(&fscache_n_allocs_nobufs); | 690 | fscache_stat(&fscache_n_allocs_nobufs); |
| @@ -729,8 +739,9 @@ static void fscache_write_op(struct fscache_operation *_op) | |||
| 729 | */ | 739 | */ |
| 730 | spin_unlock(&object->lock); | 740 | spin_unlock(&object->lock); |
| 731 | fscache_op_complete(&op->op, false); | 741 | fscache_op_complete(&op->op, false); |
| 732 | _leave(" [cancel] op{f=%lx s=%u} obj{s=%u f=%lx}", | 742 | _leave(" [cancel] op{f=%lx s=%u} obj{s=%s f=%lx}", |
| 733 | _op->flags, _op->state, object->state, object->flags); | 743 | _op->flags, _op->state, object->state->short_name, |
| 744 | object->flags); | ||
| 734 | return; | 745 | return; |
| 735 | } | 746 | } |
| 736 | 747 | ||
| @@ -796,11 +807,16 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
| 796 | 807 | ||
| 797 | _enter(""); | 808 | _enter(""); |
| 798 | 809 | ||
| 799 | while (spin_lock(&cookie->stores_lock), | 810 | for (;;) { |
| 800 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, | 811 | spin_lock(&cookie->stores_lock); |
| 801 | ARRAY_SIZE(results), | 812 | n = radix_tree_gang_lookup_tag(&cookie->stores, results, 0, |
| 802 | FSCACHE_COOKIE_PENDING_TAG), | 813 | ARRAY_SIZE(results), |
| 803 | n > 0) { | 814 | FSCACHE_COOKIE_PENDING_TAG); |
| 815 | if (n == 0) { | ||
| 816 | spin_unlock(&cookie->stores_lock); | ||
| 817 | break; | ||
| 818 | } | ||
| 819 | |||
| 804 | for (i = n - 1; i >= 0; i--) { | 820 | for (i = n - 1; i >= 0; i--) { |
| 805 | page = results[i]; | 821 | page = results[i]; |
| 806 | radix_tree_delete(&cookie->stores, page->index); | 822 | radix_tree_delete(&cookie->stores, page->index); |
| @@ -812,7 +828,6 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
| 812 | page_cache_release(results[i]); | 828 | page_cache_release(results[i]); |
| 813 | } | 829 | } |
| 814 | 830 | ||
| 815 | spin_unlock(&cookie->stores_lock); | ||
| 816 | _leave(""); | 831 | _leave(""); |
| 817 | } | 832 | } |
| 818 | 833 | ||
| @@ -829,14 +844,12 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie) | |||
| 829 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is | 844 | * (1) negative lookup, object not yet created (FSCACHE_COOKIE_CREATING is |
| 830 | * set) | 845 | * set) |
| 831 | * | 846 | * |
| 832 | * (a) no writes yet (set FSCACHE_COOKIE_PENDING_FILL and queue deferred | 847 | * (a) no writes yet |
| 833 | * fill op) | ||
| 834 | * | 848 | * |
| 835 | * (b) writes deferred till post-creation (mark page for writing and | 849 | * (b) writes deferred till post-creation (mark page for writing and |
| 836 | * return immediately) | 850 | * return immediately) |
| 837 | * | 851 | * |
| 838 | * (2) negative lookup, object created, initial fill being made from netfs | 852 | * (2) negative lookup, object created, initial fill being made from netfs |
| 839 | * (FSCACHE_COOKIE_INITIAL_FILL is set) | ||
| 840 | * | 853 | * |
| 841 | * (a) fill point not yet reached this page (mark page for writing and | 854 | * (a) fill point not yet reached this page (mark page for writing and |
| 842 | * return) | 855 | * return) |
| @@ -873,7 +886,9 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
| 873 | 886 | ||
| 874 | fscache_operation_init(&op->op, fscache_write_op, | 887 | fscache_operation_init(&op->op, fscache_write_op, |
| 875 | fscache_release_write_op); | 888 | fscache_release_write_op); |
| 876 | op->op.flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_WAITING); | 889 | op->op.flags = FSCACHE_OP_ASYNC | |
| 890 | (1 << FSCACHE_OP_WAITING) | | ||
| 891 | (1 << FSCACHE_OP_UNUSE_COOKIE); | ||
| 877 | 892 | ||
| 878 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); | 893 | ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM); |
| 879 | if (ret < 0) | 894 | if (ret < 0) |
| @@ -919,6 +934,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, | |||
| 919 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); | 934 | op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); |
| 920 | op->store_limit = object->store_limit; | 935 | op->store_limit = object->store_limit; |
| 921 | 936 | ||
| 937 | atomic_inc(&cookie->n_active); | ||
| 922 | if (fscache_submit_op(object, &op->op) < 0) | 938 | if (fscache_submit_op(object, &op->op) < 0) |
| 923 | goto submit_failed; | 939 | goto submit_failed; |
| 924 | 940 | ||
| @@ -945,6 +961,7 @@ already_pending: | |||
| 945 | return 0; | 961 | return 0; |
| 946 | 962 | ||
| 947 | submit_failed: | 963 | submit_failed: |
| 964 | atomic_dec(&cookie->n_active); | ||
| 948 | spin_lock(&cookie->stores_lock); | 965 | spin_lock(&cookie->stores_lock); |
| 949 | radix_tree_delete(&cookie->stores, page->index); | 966 | radix_tree_delete(&cookie->stores, page->index); |
| 950 | spin_unlock(&cookie->stores_lock); | 967 | spin_unlock(&cookie->stores_lock); |
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f3f783dc4f75..0eda52738ec4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c | |||
| @@ -14,7 +14,7 @@ | |||
| 14 | #include <linux/namei.h> | 14 | #include <linux/namei.h> |
| 15 | #include <linux/slab.h> | 15 | #include <linux/slab.h> |
| 16 | 16 | ||
| 17 | static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) | 17 | static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx) |
| 18 | { | 18 | { |
| 19 | struct fuse_conn *fc = get_fuse_conn(dir); | 19 | struct fuse_conn *fc = get_fuse_conn(dir); |
| 20 | struct fuse_inode *fi = get_fuse_inode(dir); | 20 | struct fuse_inode *fi = get_fuse_inode(dir); |
| @@ -25,7 +25,7 @@ static bool fuse_use_readdirplus(struct inode *dir, struct file *filp) | |||
| 25 | return true; | 25 | return true; |
| 26 | if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) | 26 | if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state)) |
| 27 | return true; | 27 | return true; |
| 28 | if (filp->f_pos == 0) | 28 | if (ctx->pos == 0) |
| 29 | return true; | 29 | return true; |
| 30 | return false; | 30 | return false; |
| 31 | } | 31 | } |
| @@ -1165,25 +1165,23 @@ static int fuse_permission(struct inode *inode, int mask) | |||
| 1165 | } | 1165 | } |
| 1166 | 1166 | ||
| 1167 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, | 1167 | static int parse_dirfile(char *buf, size_t nbytes, struct file *file, |
| 1168 | void *dstbuf, filldir_t filldir) | 1168 | struct dir_context *ctx) |
| 1169 | { | 1169 | { |
| 1170 | while (nbytes >= FUSE_NAME_OFFSET) { | 1170 | while (nbytes >= FUSE_NAME_OFFSET) { |
| 1171 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; | 1171 | struct fuse_dirent *dirent = (struct fuse_dirent *) buf; |
| 1172 | size_t reclen = FUSE_DIRENT_SIZE(dirent); | 1172 | size_t reclen = FUSE_DIRENT_SIZE(dirent); |
| 1173 | int over; | ||
| 1174 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) | 1173 | if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) |
| 1175 | return -EIO; | 1174 | return -EIO; |
| 1176 | if (reclen > nbytes) | 1175 | if (reclen > nbytes) |
| 1177 | break; | 1176 | break; |
| 1178 | 1177 | ||
| 1179 | over = filldir(dstbuf, dirent->name, dirent->namelen, | 1178 | if (!dir_emit(ctx, dirent->name, dirent->namelen, |
| 1180 | file->f_pos, dirent->ino, dirent->type); | 1179 | dirent->ino, dirent->type)) |
| 1181 | if (over) | ||
| 1182 | break; | 1180 | break; |
| 1183 | 1181 | ||
| 1184 | buf += reclen; | 1182 | buf += reclen; |
| 1185 | nbytes -= reclen; | 1183 | nbytes -= reclen; |
| 1186 | file->f_pos = dirent->off; | 1184 | ctx->pos = dirent->off; |
| 1187 | } | 1185 | } |
| 1188 | 1186 | ||
| 1189 | return 0; | 1187 | return 0; |
| @@ -1284,7 +1282,7 @@ out: | |||
| 1284 | } | 1282 | } |
| 1285 | 1283 | ||
| 1286 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | 1284 | static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, |
| 1287 | void *dstbuf, filldir_t filldir, u64 attr_version) | 1285 | struct dir_context *ctx, u64 attr_version) |
| 1288 | { | 1286 | { |
| 1289 | struct fuse_direntplus *direntplus; | 1287 | struct fuse_direntplus *direntplus; |
| 1290 | struct fuse_dirent *dirent; | 1288 | struct fuse_dirent *dirent; |
| @@ -1309,10 +1307,9 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | |||
| 1309 | we need to send a FORGET for each of those | 1307 | we need to send a FORGET for each of those |
| 1310 | which we did not link. | 1308 | which we did not link. |
| 1311 | */ | 1309 | */ |
| 1312 | over = filldir(dstbuf, dirent->name, dirent->namelen, | 1310 | over = !dir_emit(ctx, dirent->name, dirent->namelen, |
| 1313 | file->f_pos, dirent->ino, | 1311 | dirent->ino, dirent->type); |
| 1314 | dirent->type); | 1312 | ctx->pos = dirent->off; |
| 1315 | file->f_pos = dirent->off; | ||
| 1316 | } | 1313 | } |
| 1317 | 1314 | ||
| 1318 | buf += reclen; | 1315 | buf += reclen; |
| @@ -1326,7 +1323,7 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, | |||
| 1326 | return 0; | 1323 | return 0; |
| 1327 | } | 1324 | } |
| 1328 | 1325 | ||
| 1329 | static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | 1326 | static int fuse_readdir(struct file *file, struct dir_context *ctx) |
| 1330 | { | 1327 | { |
| 1331 | int plus, err; | 1328 | int plus, err; |
| 1332 | size_t nbytes; | 1329 | size_t nbytes; |
| @@ -1349,17 +1346,17 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
| 1349 | return -ENOMEM; | 1346 | return -ENOMEM; |
| 1350 | } | 1347 | } |
| 1351 | 1348 | ||
| 1352 | plus = fuse_use_readdirplus(inode, file); | 1349 | plus = fuse_use_readdirplus(inode, ctx); |
| 1353 | req->out.argpages = 1; | 1350 | req->out.argpages = 1; |
| 1354 | req->num_pages = 1; | 1351 | req->num_pages = 1; |
| 1355 | req->pages[0] = page; | 1352 | req->pages[0] = page; |
| 1356 | req->page_descs[0].length = PAGE_SIZE; | 1353 | req->page_descs[0].length = PAGE_SIZE; |
| 1357 | if (plus) { | 1354 | if (plus) { |
| 1358 | attr_version = fuse_get_attr_version(fc); | 1355 | attr_version = fuse_get_attr_version(fc); |
| 1359 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | 1356 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, |
| 1360 | FUSE_READDIRPLUS); | 1357 | FUSE_READDIRPLUS); |
| 1361 | } else { | 1358 | } else { |
| 1362 | fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, | 1359 | fuse_read_fill(req, file, ctx->pos, PAGE_SIZE, |
| 1363 | FUSE_READDIR); | 1360 | FUSE_READDIR); |
| 1364 | } | 1361 | } |
| 1365 | fuse_request_send(fc, req); | 1362 | fuse_request_send(fc, req); |
| @@ -1369,11 +1366,11 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) | |||
| 1369 | if (!err) { | 1366 | if (!err) { |
| 1370 | if (plus) { | 1367 | if (plus) { |
| 1371 | err = parse_dirplusfile(page_address(page), nbytes, | 1368 | err = parse_dirplusfile(page_address(page), nbytes, |
| 1372 | file, dstbuf, filldir, | 1369 | file, ctx, |
| 1373 | attr_version); | 1370 | attr_version); |
| 1374 | } else { | 1371 | } else { |
| 1375 | err = parse_dirfile(page_address(page), nbytes, file, | 1372 | err = parse_dirfile(page_address(page), nbytes, file, |
| 1376 | dstbuf, filldir); | 1373 | ctx); |
| 1377 | } | 1374 | } |
| 1378 | } | 1375 | } |
| 1379 | 1376 | ||
| @@ -1886,7 +1883,7 @@ static const struct inode_operations fuse_dir_inode_operations = { | |||
| 1886 | static const struct file_operations fuse_dir_operations = { | 1883 | static const struct file_operations fuse_dir_operations = { |
| 1887 | .llseek = generic_file_llseek, | 1884 | .llseek = generic_file_llseek, |
| 1888 | .read = generic_read_dir, | 1885 | .read = generic_read_dir, |
| 1889 | .readdir = fuse_readdir, | 1886 | .iterate = fuse_readdir, |
| 1890 | .open = fuse_dir_open, | 1887 | .open = fuse_dir_open, |
| 1891 | .release = fuse_dir_release, | 1888 | .release = fuse_dir_release, |
| 1892 | .fsync = fuse_dir_fsync, | 1889 | .fsync = fuse_dir_fsync, |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e570081f9f76..35f281033142 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -2470,13 +2470,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, | |||
| 2470 | .mode = mode | 2470 | .mode = mode |
| 2471 | }; | 2471 | }; |
| 2472 | int err; | 2472 | int err; |
| 2473 | bool lock_inode = !(mode & FALLOC_FL_KEEP_SIZE) || | ||
| 2474 | (mode & FALLOC_FL_PUNCH_HOLE); | ||
| 2473 | 2475 | ||
| 2474 | if (fc->no_fallocate) | 2476 | if (fc->no_fallocate) |
| 2475 | return -EOPNOTSUPP; | 2477 | return -EOPNOTSUPP; |
| 2476 | 2478 | ||
| 2477 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 2479 | if (lock_inode) { |
| 2478 | mutex_lock(&inode->i_mutex); | 2480 | mutex_lock(&inode->i_mutex); |
| 2479 | fuse_set_nowrite(inode); | 2481 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 2482 | fuse_set_nowrite(inode); | ||
| 2480 | } | 2483 | } |
| 2481 | 2484 | ||
| 2482 | req = fuse_get_req_nopages(fc); | 2485 | req = fuse_get_req_nopages(fc); |
| @@ -2511,8 +2514,9 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, | |||
| 2511 | fuse_invalidate_attr(inode); | 2514 | fuse_invalidate_attr(inode); |
| 2512 | 2515 | ||
| 2513 | out: | 2516 | out: |
| 2514 | if (mode & FALLOC_FL_PUNCH_HOLE) { | 2517 | if (lock_inode) { |
| 2515 | fuse_release_nowrite(inode); | 2518 | if (mode & FALLOC_FL_PUNCH_HOLE) |
| 2519 | fuse_release_nowrite(inode); | ||
| 2516 | mutex_unlock(&inode->i_mutex); | 2520 | mutex_unlock(&inode->i_mutex); |
| 2517 | } | 2521 | } |
| 2518 | 2522 | ||
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 5a376ab81feb..90c6a8faaecb 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig | |||
| @@ -20,13 +20,12 @@ config GFS2_FS | |||
| 20 | be found here: http://sources.redhat.com/cluster | 20 | be found here: http://sources.redhat.com/cluster |
| 21 | 21 | ||
| 22 | The "nolock" lock module is now built in to GFS2 by default. If | 22 | The "nolock" lock module is now built in to GFS2 by default. If |
| 23 | you want to use the DLM, be sure to enable HOTPLUG and IPv4/6 | 23 | you want to use the DLM, be sure to enable IPv4/6 networking. |
| 24 | networking. | ||
| 25 | 24 | ||
| 26 | config GFS2_FS_LOCKING_DLM | 25 | config GFS2_FS_LOCKING_DLM |
| 27 | bool "GFS2 DLM locking" | 26 | bool "GFS2 DLM locking" |
| 28 | depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ | 27 | depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ |
| 29 | HOTPLUG && CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) | 28 | CONFIGFS_FS && SYSFS && (DLM=y || DLM=GFS2_FS) |
| 30 | help | 29 | help |
| 31 | Multiple node locking module for GFS2 | 30 | Multiple node locking module for GFS2 |
| 32 | 31 | ||
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 0bad69ed6336..ee48ad37d9c0 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c | |||
| @@ -110,7 +110,7 @@ static int gfs2_writepage_common(struct page *page, | |||
| 110 | /* Is the page fully outside i_size? (truncate in progress) */ | 110 | /* Is the page fully outside i_size? (truncate in progress) */ |
| 111 | offset = i_size & (PAGE_CACHE_SIZE-1); | 111 | offset = i_size & (PAGE_CACHE_SIZE-1); |
| 112 | if (page->index > end_index || (page->index == end_index && !offset)) { | 112 | if (page->index > end_index || (page->index == end_index && !offset)) { |
| 113 | page->mapping->a_ops->invalidatepage(page, 0); | 113 | page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 114 | goto out; | 114 | goto out; |
| 115 | } | 115 | } |
| 116 | return 1; | 116 | return 1; |
| @@ -299,7 +299,8 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping, | |||
| 299 | 299 | ||
| 300 | /* Is the page fully outside i_size? (truncate in progress) */ | 300 | /* Is the page fully outside i_size? (truncate in progress) */ |
| 301 | if (page->index > end_index || (page->index == end_index && !offset)) { | 301 | if (page->index > end_index || (page->index == end_index && !offset)) { |
| 302 | page->mapping->a_ops->invalidatepage(page, 0); | 302 | page->mapping->a_ops->invalidatepage(page, 0, |
| 303 | PAGE_CACHE_SIZE); | ||
| 303 | unlock_page(page); | 304 | unlock_page(page); |
| 304 | continue; | 305 | continue; |
| 305 | } | 306 | } |
| @@ -943,27 +944,33 @@ static void gfs2_discard(struct gfs2_sbd *sdp, struct buffer_head *bh) | |||
| 943 | unlock_buffer(bh); | 944 | unlock_buffer(bh); |
| 944 | } | 945 | } |
| 945 | 946 | ||
| 946 | static void gfs2_invalidatepage(struct page *page, unsigned long offset) | 947 | static void gfs2_invalidatepage(struct page *page, unsigned int offset, |
| 948 | unsigned int length) | ||
| 947 | { | 949 | { |
| 948 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); | 950 | struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host); |
| 951 | unsigned int stop = offset + length; | ||
| 952 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
| 949 | struct buffer_head *bh, *head; | 953 | struct buffer_head *bh, *head; |
| 950 | unsigned long pos = 0; | 954 | unsigned long pos = 0; |
| 951 | 955 | ||
| 952 | BUG_ON(!PageLocked(page)); | 956 | BUG_ON(!PageLocked(page)); |
| 953 | if (offset == 0) | 957 | if (!partial_page) |
| 954 | ClearPageChecked(page); | 958 | ClearPageChecked(page); |
| 955 | if (!page_has_buffers(page)) | 959 | if (!page_has_buffers(page)) |
| 956 | goto out; | 960 | goto out; |
| 957 | 961 | ||
| 958 | bh = head = page_buffers(page); | 962 | bh = head = page_buffers(page); |
| 959 | do { | 963 | do { |
| 964 | if (pos + bh->b_size > stop) | ||
| 965 | return; | ||
| 966 | |||
| 960 | if (offset <= pos) | 967 | if (offset <= pos) |
| 961 | gfs2_discard(sdp, bh); | 968 | gfs2_discard(sdp, bh); |
| 962 | pos += bh->b_size; | 969 | pos += bh->b_size; |
| 963 | bh = bh->b_this_page; | 970 | bh = bh->b_this_page; |
| 964 | } while (bh != head); | 971 | } while (bh != head); |
| 965 | out: | 972 | out: |
| 966 | if (offset == 0) | 973 | if (!partial_page) |
| 967 | try_to_release_page(page, 0); | 974 | try_to_release_page(page, 0); |
| 968 | } | 975 | } |
| 969 | 976 | ||
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93b5809c20bb..5e2f56fccf6b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c | |||
| @@ -1232,7 +1232,9 @@ static int do_grow(struct inode *inode, u64 size) | |||
| 1232 | unstuff = 1; | 1232 | unstuff = 1; |
| 1233 | } | 1233 | } |
| 1234 | 1234 | ||
| 1235 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT, 0); | 1235 | error = gfs2_trans_begin(sdp, RES_DINODE + RES_STATFS + RES_RG_BIT + |
| 1236 | (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF ? | ||
| 1237 | 0 : RES_QUOTA), 0); | ||
| 1236 | if (error) | 1238 | if (error) |
| 1237 | goto do_grow_release; | 1239 | goto do_grow_release; |
| 1238 | 1240 | ||
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index b631c9043460..0cb4c1557f20 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c | |||
| @@ -1125,13 +1125,14 @@ static int dir_double_exhash(struct gfs2_inode *dip) | |||
| 1125 | if (IS_ERR(hc)) | 1125 | if (IS_ERR(hc)) |
| 1126 | return PTR_ERR(hc); | 1126 | return PTR_ERR(hc); |
| 1127 | 1127 | ||
| 1128 | h = hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); | 1128 | hc2 = kmalloc(hsize_bytes * 2, GFP_NOFS | __GFP_NOWARN); |
| 1129 | if (hc2 == NULL) | 1129 | if (hc2 == NULL) |
| 1130 | hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); | 1130 | hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); |
| 1131 | 1131 | ||
| 1132 | if (!hc2) | 1132 | if (!hc2) |
| 1133 | return -ENOMEM; | 1133 | return -ENOMEM; |
| 1134 | 1134 | ||
| 1135 | h = hc2; | ||
| 1135 | error = gfs2_meta_inode_buffer(dip, &dibh); | 1136 | error = gfs2_meta_inode_buffer(dip, &dibh); |
| 1136 | if (error) | 1137 | if (error) |
| 1137 | goto out_kfree; | 1138 | goto out_kfree; |
| @@ -1212,9 +1213,7 @@ static int compare_dents(const void *a, const void *b) | |||
| 1212 | /** | 1213 | /** |
| 1213 | * do_filldir_main - read out directory entries | 1214 | * do_filldir_main - read out directory entries |
| 1214 | * @dip: The GFS2 inode | 1215 | * @dip: The GFS2 inode |
| 1215 | * @offset: The offset in the file to read from | 1216 | * @ctx: what to feed the entries to |
| 1216 | * @opaque: opaque data to pass to filldir | ||
| 1217 | * @filldir: The function to pass entries to | ||
| 1218 | * @darr: an array of struct gfs2_dirent pointers to read | 1217 | * @darr: an array of struct gfs2_dirent pointers to read |
| 1219 | * @entries: the number of entries in darr | 1218 | * @entries: the number of entries in darr |
| 1220 | * @copied: pointer to int that's non-zero if a entry has been copied out | 1219 | * @copied: pointer to int that's non-zero if a entry has been copied out |
| @@ -1224,11 +1223,10 @@ static int compare_dents(const void *a, const void *b) | |||
| 1224 | * the possibility that they will fall into different readdir buffers or | 1223 | * the possibility that they will fall into different readdir buffers or |
| 1225 | * that someone will want to seek to that location. | 1224 | * that someone will want to seek to that location. |
| 1226 | * | 1225 | * |
| 1227 | * Returns: errno, >0 on exception from filldir | 1226 | * Returns: errno, >0 if the actor tells you to stop |
| 1228 | */ | 1227 | */ |
| 1229 | 1228 | ||
| 1230 | static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | 1229 | static int do_filldir_main(struct gfs2_inode *dip, struct dir_context *ctx, |
| 1231 | void *opaque, filldir_t filldir, | ||
| 1232 | const struct gfs2_dirent **darr, u32 entries, | 1230 | const struct gfs2_dirent **darr, u32 entries, |
| 1233 | int *copied) | 1231 | int *copied) |
| 1234 | { | 1232 | { |
| @@ -1236,7 +1234,6 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
| 1236 | u64 off, off_next; | 1234 | u64 off, off_next; |
| 1237 | unsigned int x, y; | 1235 | unsigned int x, y; |
| 1238 | int run = 0; | 1236 | int run = 0; |
| 1239 | int error = 0; | ||
| 1240 | 1237 | ||
| 1241 | sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); | 1238 | sort(darr, entries, sizeof(struct gfs2_dirent *), compare_dents, NULL); |
| 1242 | 1239 | ||
| @@ -1253,9 +1250,9 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
| 1253 | off_next = be32_to_cpu(dent_next->de_hash); | 1250 | off_next = be32_to_cpu(dent_next->de_hash); |
| 1254 | off_next = gfs2_disk_hash2offset(off_next); | 1251 | off_next = gfs2_disk_hash2offset(off_next); |
| 1255 | 1252 | ||
| 1256 | if (off < *offset) | 1253 | if (off < ctx->pos) |
| 1257 | continue; | 1254 | continue; |
| 1258 | *offset = off; | 1255 | ctx->pos = off; |
| 1259 | 1256 | ||
| 1260 | if (off_next == off) { | 1257 | if (off_next == off) { |
| 1261 | if (*copied && !run) | 1258 | if (*copied && !run) |
| @@ -1264,26 +1261,25 @@ static int do_filldir_main(struct gfs2_inode *dip, u64 *offset, | |||
| 1264 | } else | 1261 | } else |
| 1265 | run = 0; | 1262 | run = 0; |
| 1266 | } else { | 1263 | } else { |
| 1267 | if (off < *offset) | 1264 | if (off < ctx->pos) |
| 1268 | continue; | 1265 | continue; |
| 1269 | *offset = off; | 1266 | ctx->pos = off; |
| 1270 | } | 1267 | } |
| 1271 | 1268 | ||
| 1272 | error = filldir(opaque, (const char *)(dent + 1), | 1269 | if (!dir_emit(ctx, (const char *)(dent + 1), |
| 1273 | be16_to_cpu(dent->de_name_len), | 1270 | be16_to_cpu(dent->de_name_len), |
| 1274 | off, be64_to_cpu(dent->de_inum.no_addr), | 1271 | be64_to_cpu(dent->de_inum.no_addr), |
| 1275 | be16_to_cpu(dent->de_type)); | 1272 | be16_to_cpu(dent->de_type))) |
| 1276 | if (error) | ||
| 1277 | return 1; | 1273 | return 1; |
| 1278 | 1274 | ||
| 1279 | *copied = 1; | 1275 | *copied = 1; |
| 1280 | } | 1276 | } |
| 1281 | 1277 | ||
| 1282 | /* Increment the *offset by one, so the next time we come into the | 1278 | /* Increment the ctx->pos by one, so the next time we come into the |
| 1283 | do_filldir fxn, we get the next entry instead of the last one in the | 1279 | do_filldir fxn, we get the next entry instead of the last one in the |
| 1284 | current leaf */ | 1280 | current leaf */ |
| 1285 | 1281 | ||
| 1286 | (*offset)++; | 1282 | ctx->pos++; |
| 1287 | 1283 | ||
| 1288 | return 0; | 1284 | return 0; |
| 1289 | } | 1285 | } |
| @@ -1307,8 +1303,8 @@ static void gfs2_free_sort_buffer(void *ptr) | |||
| 1307 | kfree(ptr); | 1303 | kfree(ptr); |
| 1308 | } | 1304 | } |
| 1309 | 1305 | ||
| 1310 | static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | 1306 | static int gfs2_dir_read_leaf(struct inode *inode, struct dir_context *ctx, |
| 1311 | filldir_t filldir, int *copied, unsigned *depth, | 1307 | int *copied, unsigned *depth, |
| 1312 | u64 leaf_no) | 1308 | u64 leaf_no) |
| 1313 | { | 1309 | { |
| 1314 | struct gfs2_inode *ip = GFS2_I(inode); | 1310 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -1386,8 +1382,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, | |||
| 1386 | } while(lfn); | 1382 | } while(lfn); |
| 1387 | 1383 | ||
| 1388 | BUG_ON(entries2 != entries); | 1384 | BUG_ON(entries2 != entries); |
| 1389 | error = do_filldir_main(ip, offset, opaque, filldir, darr, | 1385 | error = do_filldir_main(ip, ctx, darr, entries, copied); |
| 1390 | entries, copied); | ||
| 1391 | out_free: | 1386 | out_free: |
| 1392 | for(i = 0; i < leaf; i++) | 1387 | for(i = 0; i < leaf; i++) |
| 1393 | brelse(larr[i]); | 1388 | brelse(larr[i]); |
| @@ -1446,15 +1441,13 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, | |||
| 1446 | /** | 1441 | /** |
| 1447 | * dir_e_read - Reads the entries from a directory into a filldir buffer | 1442 | * dir_e_read - Reads the entries from a directory into a filldir buffer |
| 1448 | * @dip: dinode pointer | 1443 | * @dip: dinode pointer |
| 1449 | * @offset: the hash of the last entry read shifted to the right once | 1444 | * @ctx: actor to feed the entries to |
| 1450 | * @opaque: buffer for the filldir function to fill | ||
| 1451 | * @filldir: points to the filldir function to use | ||
| 1452 | * | 1445 | * |
| 1453 | * Returns: errno | 1446 | * Returns: errno |
| 1454 | */ | 1447 | */ |
| 1455 | 1448 | ||
| 1456 | static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | 1449 | static int dir_e_read(struct inode *inode, struct dir_context *ctx, |
| 1457 | filldir_t filldir, struct file_ra_state *f_ra) | 1450 | struct file_ra_state *f_ra) |
| 1458 | { | 1451 | { |
| 1459 | struct gfs2_inode *dip = GFS2_I(inode); | 1452 | struct gfs2_inode *dip = GFS2_I(inode); |
| 1460 | u32 hsize, len = 0; | 1453 | u32 hsize, len = 0; |
| @@ -1465,7 +1458,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1465 | unsigned depth = 0; | 1458 | unsigned depth = 0; |
| 1466 | 1459 | ||
| 1467 | hsize = 1 << dip->i_depth; | 1460 | hsize = 1 << dip->i_depth; |
| 1468 | hash = gfs2_dir_offset2hash(*offset); | 1461 | hash = gfs2_dir_offset2hash(ctx->pos); |
| 1469 | index = hash >> (32 - dip->i_depth); | 1462 | index = hash >> (32 - dip->i_depth); |
| 1470 | 1463 | ||
| 1471 | if (dip->i_hash_cache == NULL) | 1464 | if (dip->i_hash_cache == NULL) |
| @@ -1477,7 +1470,7 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1477 | gfs2_dir_readahead(inode, hsize, index, f_ra); | 1470 | gfs2_dir_readahead(inode, hsize, index, f_ra); |
| 1478 | 1471 | ||
| 1479 | while (index < hsize) { | 1472 | while (index < hsize) { |
| 1480 | error = gfs2_dir_read_leaf(inode, offset, opaque, filldir, | 1473 | error = gfs2_dir_read_leaf(inode, ctx, |
| 1481 | &copied, &depth, | 1474 | &copied, &depth, |
| 1482 | be64_to_cpu(lp[index])); | 1475 | be64_to_cpu(lp[index])); |
| 1483 | if (error) | 1476 | if (error) |
| @@ -1492,8 +1485,8 @@ static int dir_e_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1492 | return error; | 1485 | return error; |
| 1493 | } | 1486 | } |
| 1494 | 1487 | ||
| 1495 | int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 1488 | int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
| 1496 | filldir_t filldir, struct file_ra_state *f_ra) | 1489 | struct file_ra_state *f_ra) |
| 1497 | { | 1490 | { |
| 1498 | struct gfs2_inode *dip = GFS2_I(inode); | 1491 | struct gfs2_inode *dip = GFS2_I(inode); |
| 1499 | struct gfs2_sbd *sdp = GFS2_SB(inode); | 1492 | struct gfs2_sbd *sdp = GFS2_SB(inode); |
| @@ -1507,7 +1500,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1507 | return 0; | 1500 | return 0; |
| 1508 | 1501 | ||
| 1509 | if (dip->i_diskflags & GFS2_DIF_EXHASH) | 1502 | if (dip->i_diskflags & GFS2_DIF_EXHASH) |
| 1510 | return dir_e_read(inode, offset, opaque, filldir, f_ra); | 1503 | return dir_e_read(inode, ctx, f_ra); |
| 1511 | 1504 | ||
| 1512 | if (!gfs2_is_stuffed(dip)) { | 1505 | if (!gfs2_is_stuffed(dip)) { |
| 1513 | gfs2_consist_inode(dip); | 1506 | gfs2_consist_inode(dip); |
| @@ -1539,7 +1532,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | |||
| 1539 | error = -EIO; | 1532 | error = -EIO; |
| 1540 | goto out; | 1533 | goto out; |
| 1541 | } | 1534 | } |
| 1542 | error = do_filldir_main(dip, offset, opaque, filldir, darr, | 1535 | error = do_filldir_main(dip, ctx, darr, |
| 1543 | dip->i_entries, &copied); | 1536 | dip->i_entries, &copied); |
| 1544 | out: | 1537 | out: |
| 1545 | kfree(darr); | 1538 | kfree(darr); |
| @@ -1555,9 +1548,9 @@ out: | |||
| 1555 | 1548 | ||
| 1556 | /** | 1549 | /** |
| 1557 | * gfs2_dir_search - Search a directory | 1550 | * gfs2_dir_search - Search a directory |
| 1558 | * @dip: The GFS2 inode | 1551 | * @dip: The GFS2 dir inode |
| 1559 | * @filename: | 1552 | * @name: The name we are looking up |
| 1560 | * @inode: | 1553 | * @fail_on_exist: Fail if the name exists rather than looking it up |
| 1561 | * | 1554 | * |
| 1562 | * This routine searches a directory for a file or another directory. | 1555 | * This routine searches a directory for a file or another directory. |
| 1563 | * Assumes a glock is held on dip. | 1556 | * Assumes a glock is held on dip. |
| @@ -1565,22 +1558,25 @@ out: | |||
| 1565 | * Returns: errno | 1558 | * Returns: errno |
| 1566 | */ | 1559 | */ |
| 1567 | 1560 | ||
| 1568 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name) | 1561 | struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name, |
| 1562 | bool fail_on_exist) | ||
| 1569 | { | 1563 | { |
| 1570 | struct buffer_head *bh; | 1564 | struct buffer_head *bh; |
| 1571 | struct gfs2_dirent *dent; | 1565 | struct gfs2_dirent *dent; |
| 1572 | struct inode *inode; | 1566 | u64 addr, formal_ino; |
| 1567 | u16 dtype; | ||
| 1573 | 1568 | ||
| 1574 | dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); | 1569 | dent = gfs2_dirent_search(dir, name, gfs2_dirent_find, &bh); |
| 1575 | if (dent) { | 1570 | if (dent) { |
| 1576 | if (IS_ERR(dent)) | 1571 | if (IS_ERR(dent)) |
| 1577 | return ERR_CAST(dent); | 1572 | return ERR_CAST(dent); |
| 1578 | inode = gfs2_inode_lookup(dir->i_sb, | 1573 | dtype = be16_to_cpu(dent->de_type); |
| 1579 | be16_to_cpu(dent->de_type), | 1574 | addr = be64_to_cpu(dent->de_inum.no_addr); |
| 1580 | be64_to_cpu(dent->de_inum.no_addr), | 1575 | formal_ino = be64_to_cpu(dent->de_inum.no_formal_ino); |
| 1581 | be64_to_cpu(dent->de_inum.no_formal_ino), 0); | ||
| 1582 | brelse(bh); | 1576 | brelse(bh); |
| 1583 | return inode; | 1577 | if (fail_on_exist) |
| 1578 | return ERR_PTR(-EEXIST); | ||
| 1579 | return gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0); | ||
| 1584 | } | 1580 | } |
| 1585 | return ERR_PTR(-ENOENT); | 1581 | return ERR_PTR(-ENOENT); |
| 1586 | } | 1582 | } |
diff --git a/fs/gfs2/dir.h b/fs/gfs2/dir.h index 98c960beab35..4f03bbd1873f 100644 --- a/fs/gfs2/dir.h +++ b/fs/gfs2/dir.h | |||
| @@ -18,14 +18,15 @@ struct gfs2_inode; | |||
| 18 | struct gfs2_inum; | 18 | struct gfs2_inum; |
| 19 | 19 | ||
| 20 | extern struct inode *gfs2_dir_search(struct inode *dir, | 20 | extern struct inode *gfs2_dir_search(struct inode *dir, |
| 21 | const struct qstr *filename); | 21 | const struct qstr *filename, |
| 22 | bool fail_on_exist); | ||
| 22 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, | 23 | extern int gfs2_dir_check(struct inode *dir, const struct qstr *filename, |
| 23 | const struct gfs2_inode *ip); | 24 | const struct gfs2_inode *ip); |
| 24 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, | 25 | extern int gfs2_dir_add(struct inode *inode, const struct qstr *filename, |
| 25 | const struct gfs2_inode *ip); | 26 | const struct gfs2_inode *ip); |
| 26 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); | 27 | extern int gfs2_dir_del(struct gfs2_inode *dip, const struct dentry *dentry); |
| 27 | extern int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, | 28 | extern int gfs2_dir_read(struct inode *inode, struct dir_context *ctx, |
| 28 | filldir_t filldir, struct file_ra_state *f_ra); | 29 | struct file_ra_state *f_ra); |
| 29 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, | 30 | extern int gfs2_dir_mvino(struct gfs2_inode *dip, const struct qstr *filename, |
| 30 | const struct gfs2_inode *nip, unsigned int new_type); | 31 | const struct gfs2_inode *nip, unsigned int new_type); |
| 31 | 32 | ||
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c index 9973df4ff565..8b9b3775e2e7 100644 --- a/fs/gfs2/export.c +++ b/fs/gfs2/export.c | |||
| @@ -64,6 +64,7 @@ static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len, | |||
| 64 | } | 64 | } |
| 65 | 65 | ||
| 66 | struct get_name_filldir { | 66 | struct get_name_filldir { |
| 67 | struct dir_context ctx; | ||
| 67 | struct gfs2_inum_host inum; | 68 | struct gfs2_inum_host inum; |
| 68 | char *name; | 69 | char *name; |
| 69 | }; | 70 | }; |
| @@ -88,9 +89,11 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
| 88 | struct inode *dir = parent->d_inode; | 89 | struct inode *dir = parent->d_inode; |
| 89 | struct inode *inode = child->d_inode; | 90 | struct inode *inode = child->d_inode; |
| 90 | struct gfs2_inode *dip, *ip; | 91 | struct gfs2_inode *dip, *ip; |
| 91 | struct get_name_filldir gnfd; | 92 | struct get_name_filldir gnfd = { |
| 93 | .ctx.actor = get_name_filldir, | ||
| 94 | .name = name | ||
| 95 | }; | ||
| 92 | struct gfs2_holder gh; | 96 | struct gfs2_holder gh; |
| 93 | u64 offset = 0; | ||
| 94 | int error; | 97 | int error; |
| 95 | struct file_ra_state f_ra = { .start = 0 }; | 98 | struct file_ra_state f_ra = { .start = 0 }; |
| 96 | 99 | ||
| @@ -106,13 +109,12 @@ static int gfs2_get_name(struct dentry *parent, char *name, | |||
| 106 | *name = 0; | 109 | *name = 0; |
| 107 | gnfd.inum.no_addr = ip->i_no_addr; | 110 | gnfd.inum.no_addr = ip->i_no_addr; |
| 108 | gnfd.inum.no_formal_ino = ip->i_no_formal_ino; | 111 | gnfd.inum.no_formal_ino = ip->i_no_formal_ino; |
| 109 | gnfd.name = name; | ||
| 110 | 112 | ||
| 111 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); | 113 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &gh); |
| 112 | if (error) | 114 | if (error) |
| 113 | return error; | 115 | return error; |
| 114 | 116 | ||
| 115 | error = gfs2_dir_read(dir, &offset, &gnfd, get_name_filldir, &f_ra); | 117 | error = gfs2_dir_read(dir, &gnfd.ctx, &f_ra); |
| 116 | 118 | ||
| 117 | gfs2_glock_dq_uninit(&gh); | 119 | gfs2_glock_dq_uninit(&gh); |
| 118 | 120 | ||
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ad0dc38d87ab..f99f9e8a325f 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c | |||
| @@ -82,35 +82,28 @@ static loff_t gfs2_llseek(struct file *file, loff_t offset, int whence) | |||
| 82 | } | 82 | } |
| 83 | 83 | ||
| 84 | /** | 84 | /** |
| 85 | * gfs2_readdir - Read directory entries from a directory | 85 | * gfs2_readdir - Iterator for a directory |
| 86 | * @file: The directory to read from | 86 | * @file: The directory to read from |
| 87 | * @dirent: Buffer for dirents | 87 | * @ctx: What to feed directory entries to |
| 88 | * @filldir: Function used to do the copying | ||
| 89 | * | 88 | * |
| 90 | * Returns: errno | 89 | * Returns: errno |
| 91 | */ | 90 | */ |
| 92 | 91 | ||
| 93 | static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | 92 | static int gfs2_readdir(struct file *file, struct dir_context *ctx) |
| 94 | { | 93 | { |
| 95 | struct inode *dir = file->f_mapping->host; | 94 | struct inode *dir = file->f_mapping->host; |
| 96 | struct gfs2_inode *dip = GFS2_I(dir); | 95 | struct gfs2_inode *dip = GFS2_I(dir); |
| 97 | struct gfs2_holder d_gh; | 96 | struct gfs2_holder d_gh; |
| 98 | u64 offset = file->f_pos; | ||
| 99 | int error; | 97 | int error; |
| 100 | 98 | ||
| 101 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); | 99 | error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
| 102 | error = gfs2_glock_nq(&d_gh); | 100 | if (error) |
| 103 | if (error) { | ||
| 104 | gfs2_holder_uninit(&d_gh); | ||
| 105 | return error; | 101 | return error; |
| 106 | } | ||
| 107 | 102 | ||
| 108 | error = gfs2_dir_read(dir, &offset, dirent, filldir, &file->f_ra); | 103 | error = gfs2_dir_read(dir, ctx, &file->f_ra); |
| 109 | 104 | ||
| 110 | gfs2_glock_dq_uninit(&d_gh); | 105 | gfs2_glock_dq_uninit(&d_gh); |
| 111 | 106 | ||
| 112 | file->f_pos = offset; | ||
| 113 | |||
| 114 | return error; | 107 | return error; |
| 115 | } | 108 | } |
| 116 | 109 | ||
| @@ -538,21 +531,30 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 538 | } | 531 | } |
| 539 | 532 | ||
| 540 | /** | 533 | /** |
| 541 | * gfs2_open - open a file | 534 | * gfs2_open_common - This is common to open and atomic_open |
| 542 | * @inode: the inode to open | 535 | * @inode: The inode being opened |
| 543 | * @file: the struct file for this opening | 536 | * @file: The file being opened |
| 544 | * | 537 | * |
| 545 | * Returns: errno | 538 | * This maybe called under a glock or not depending upon how it has |
| 539 | * been called. We must always be called under a glock for regular | ||
| 540 | * files, however. For other file types, it does not matter whether | ||
| 541 | * we hold the glock or not. | ||
| 542 | * | ||
| 543 | * Returns: Error code or 0 for success | ||
| 546 | */ | 544 | */ |
| 547 | 545 | ||
| 548 | static int gfs2_open(struct inode *inode, struct file *file) | 546 | int gfs2_open_common(struct inode *inode, struct file *file) |
| 549 | { | 547 | { |
| 550 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 551 | struct gfs2_holder i_gh; | ||
| 552 | struct gfs2_file *fp; | 548 | struct gfs2_file *fp; |
| 553 | int error; | 549 | int ret; |
| 550 | |||
| 551 | if (S_ISREG(inode->i_mode)) { | ||
| 552 | ret = generic_file_open(inode, file); | ||
| 553 | if (ret) | ||
| 554 | return ret; | ||
| 555 | } | ||
| 554 | 556 | ||
| 555 | fp = kzalloc(sizeof(struct gfs2_file), GFP_KERNEL); | 557 | fp = kzalloc(sizeof(struct gfs2_file), GFP_NOFS); |
| 556 | if (!fp) | 558 | if (!fp) |
| 557 | return -ENOMEM; | 559 | return -ENOMEM; |
| 558 | 560 | ||
| @@ -560,29 +562,43 @@ static int gfs2_open(struct inode *inode, struct file *file) | |||
| 560 | 562 | ||
| 561 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); | 563 | gfs2_assert_warn(GFS2_SB(inode), !file->private_data); |
| 562 | file->private_data = fp; | 564 | file->private_data = fp; |
| 565 | return 0; | ||
| 566 | } | ||
| 567 | |||
| 568 | /** | ||
| 569 | * gfs2_open - open a file | ||
| 570 | * @inode: the inode to open | ||
| 571 | * @file: the struct file for this opening | ||
| 572 | * | ||
| 573 | * After atomic_open, this function is only used for opening files | ||
| 574 | * which are already cached. We must still get the glock for regular | ||
| 575 | * files to ensure that we have the file size uptodate for the large | ||
| 576 | * file check which is in the common code. That is only an issue for | ||
| 577 | * regular files though. | ||
| 578 | * | ||
| 579 | * Returns: errno | ||
| 580 | */ | ||
| 581 | |||
| 582 | static int gfs2_open(struct inode *inode, struct file *file) | ||
| 583 | { | ||
| 584 | struct gfs2_inode *ip = GFS2_I(inode); | ||
| 585 | struct gfs2_holder i_gh; | ||
| 586 | int error; | ||
| 587 | bool need_unlock = false; | ||
| 563 | 588 | ||
| 564 | if (S_ISREG(ip->i_inode.i_mode)) { | 589 | if (S_ISREG(ip->i_inode.i_mode)) { |
| 565 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, | 590 | error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, |
| 566 | &i_gh); | 591 | &i_gh); |
| 567 | if (error) | 592 | if (error) |
| 568 | goto fail; | 593 | return error; |
| 594 | need_unlock = true; | ||
| 595 | } | ||
| 569 | 596 | ||
| 570 | if (!(file->f_flags & O_LARGEFILE) && | 597 | error = gfs2_open_common(inode, file); |
| 571 | i_size_read(inode) > MAX_NON_LFS) { | ||
| 572 | error = -EOVERFLOW; | ||
| 573 | goto fail_gunlock; | ||
| 574 | } | ||
| 575 | 598 | ||
| 599 | if (need_unlock) | ||
| 576 | gfs2_glock_dq_uninit(&i_gh); | 600 | gfs2_glock_dq_uninit(&i_gh); |
| 577 | } | ||
| 578 | 601 | ||
| 579 | return 0; | ||
| 580 | |||
| 581 | fail_gunlock: | ||
| 582 | gfs2_glock_dq_uninit(&i_gh); | ||
| 583 | fail: | ||
| 584 | file->private_data = NULL; | ||
| 585 | kfree(fp); | ||
| 586 | return error; | 602 | return error; |
| 587 | } | 603 | } |
| 588 | 604 | ||
| @@ -1048,7 +1064,7 @@ const struct file_operations gfs2_file_fops = { | |||
| 1048 | }; | 1064 | }; |
| 1049 | 1065 | ||
| 1050 | const struct file_operations gfs2_dir_fops = { | 1066 | const struct file_operations gfs2_dir_fops = { |
| 1051 | .readdir = gfs2_readdir, | 1067 | .iterate = gfs2_readdir, |
| 1052 | .unlocked_ioctl = gfs2_ioctl, | 1068 | .unlocked_ioctl = gfs2_ioctl, |
| 1053 | .open = gfs2_open, | 1069 | .open = gfs2_open, |
| 1054 | .release = gfs2_release, | 1070 | .release = gfs2_release, |
| @@ -1078,7 +1094,7 @@ const struct file_operations gfs2_file_fops_nolock = { | |||
| 1078 | }; | 1094 | }; |
| 1079 | 1095 | ||
| 1080 | const struct file_operations gfs2_dir_fops_nolock = { | 1096 | const struct file_operations gfs2_dir_fops_nolock = { |
| 1081 | .readdir = gfs2_readdir, | 1097 | .iterate = gfs2_readdir, |
| 1082 | .unlocked_ioctl = gfs2_ioctl, | 1098 | .unlocked_ioctl = gfs2_ioctl, |
| 1083 | .open = gfs2_open, | 1099 | .open = gfs2_open, |
| 1084 | .release = gfs2_release, | 1100 | .release = gfs2_release, |
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index c66e99c97571..5f2e5224c51c 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c | |||
| @@ -54,7 +54,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
| 54 | struct gfs2_bufdata *bd, *tmp; | 54 | struct gfs2_bufdata *bd, *tmp; |
| 55 | struct buffer_head *bh; | 55 | struct buffer_head *bh; |
| 56 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); | 56 | const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock); |
| 57 | sector_t blocknr; | ||
| 58 | 57 | ||
| 59 | gfs2_log_lock(sdp); | 58 | gfs2_log_lock(sdp); |
| 60 | spin_lock(&sdp->sd_ail_lock); | 59 | spin_lock(&sdp->sd_ail_lock); |
| @@ -65,13 +64,6 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync) | |||
| 65 | continue; | 64 | continue; |
| 66 | gfs2_ail_error(gl, bh); | 65 | gfs2_ail_error(gl, bh); |
| 67 | } | 66 | } |
| 68 | blocknr = bh->b_blocknr; | ||
| 69 | bh->b_private = NULL; | ||
| 70 | gfs2_remove_from_ail(bd); /* drops ref on bh */ | ||
| 71 | |||
| 72 | bd->bd_bh = NULL; | ||
| 73 | bd->bd_blkno = blocknr; | ||
| 74 | |||
| 75 | gfs2_trans_add_revoke(sdp, bd); | 67 | gfs2_trans_add_revoke(sdp, bd); |
| 76 | } | 68 | } |
| 77 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); | 69 | GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 62b484e4a9e4..bbb2715171cd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -313,7 +313,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name, | |||
| 313 | goto out; | 313 | goto out; |
| 314 | } | 314 | } |
| 315 | 315 | ||
| 316 | inode = gfs2_dir_search(dir, name); | 316 | inode = gfs2_dir_search(dir, name, false); |
| 317 | if (IS_ERR(inode)) | 317 | if (IS_ERR(inode)) |
| 318 | error = PTR_ERR(inode); | 318 | error = PTR_ERR(inode); |
| 319 | out: | 319 | out: |
| @@ -346,17 +346,6 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 346 | if (!dip->i_inode.i_nlink) | 346 | if (!dip->i_inode.i_nlink) |
| 347 | return -ENOENT; | 347 | return -ENOENT; |
| 348 | 348 | ||
| 349 | error = gfs2_dir_check(&dip->i_inode, name, NULL); | ||
| 350 | switch (error) { | ||
| 351 | case -ENOENT: | ||
| 352 | error = 0; | ||
| 353 | break; | ||
| 354 | case 0: | ||
| 355 | return -EEXIST; | ||
| 356 | default: | ||
| 357 | return error; | ||
| 358 | } | ||
| 359 | |||
| 360 | if (dip->i_entries == (u32)-1) | 349 | if (dip->i_entries == (u32)-1) |
| 361 | return -EFBIG; | 350 | return -EFBIG; |
| 362 | if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) | 351 | if (S_ISDIR(mode) && dip->i_inode.i_nlink == (u32)-1) |
| @@ -546,6 +535,7 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
| 546 | * gfs2_create_inode - Create a new inode | 535 | * gfs2_create_inode - Create a new inode |
| 547 | * @dir: The parent directory | 536 | * @dir: The parent directory |
| 548 | * @dentry: The new dentry | 537 | * @dentry: The new dentry |
| 538 | * @file: If non-NULL, the file which is being opened | ||
| 549 | * @mode: The permissions on the new inode | 539 | * @mode: The permissions on the new inode |
| 550 | * @dev: For device nodes, this is the device number | 540 | * @dev: For device nodes, this is the device number |
| 551 | * @symname: For symlinks, this is the link destination | 541 | * @symname: For symlinks, this is the link destination |
| @@ -555,8 +545,9 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip, | |||
| 555 | */ | 545 | */ |
| 556 | 546 | ||
| 557 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | 547 | static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, |
| 548 | struct file *file, | ||
| 558 | umode_t mode, dev_t dev, const char *symname, | 549 | umode_t mode, dev_t dev, const char *symname, |
| 559 | unsigned int size, int excl) | 550 | unsigned int size, int excl, int *opened) |
| 560 | { | 551 | { |
| 561 | const struct qstr *name = &dentry->d_name; | 552 | const struct qstr *name = &dentry->d_name; |
| 562 | struct gfs2_holder ghs[2]; | 553 | struct gfs2_holder ghs[2]; |
| @@ -564,6 +555,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 564 | struct gfs2_inode *dip = GFS2_I(dir), *ip; | 555 | struct gfs2_inode *dip = GFS2_I(dir), *ip; |
| 565 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); | 556 | struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); |
| 566 | struct gfs2_glock *io_gl; | 557 | struct gfs2_glock *io_gl; |
| 558 | struct dentry *d; | ||
| 567 | int error; | 559 | int error; |
| 568 | u32 aflags = 0; | 560 | u32 aflags = 0; |
| 569 | int arq; | 561 | int arq; |
| @@ -584,15 +576,30 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 584 | goto fail; | 576 | goto fail; |
| 585 | 577 | ||
| 586 | error = create_ok(dip, name, mode); | 578 | error = create_ok(dip, name, mode); |
| 587 | if ((error == -EEXIST) && S_ISREG(mode) && !excl) { | ||
| 588 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); | ||
| 589 | gfs2_glock_dq_uninit(ghs); | ||
| 590 | d_instantiate(dentry, inode); | ||
| 591 | return IS_ERR(inode) ? PTR_ERR(inode) : 0; | ||
| 592 | } | ||
| 593 | if (error) | 579 | if (error) |
| 594 | goto fail_gunlock; | 580 | goto fail_gunlock; |
| 595 | 581 | ||
| 582 | inode = gfs2_dir_search(dir, &dentry->d_name, !S_ISREG(mode) || excl); | ||
| 583 | error = PTR_ERR(inode); | ||
| 584 | if (!IS_ERR(inode)) { | ||
| 585 | d = d_splice_alias(inode, dentry); | ||
| 586 | error = 0; | ||
| 587 | if (file && !IS_ERR(d)) { | ||
| 588 | if (d == NULL) | ||
| 589 | d = dentry; | ||
| 590 | if (S_ISREG(inode->i_mode)) | ||
| 591 | error = finish_open(file, d, gfs2_open_common, opened); | ||
| 592 | else | ||
| 593 | error = finish_no_open(file, d); | ||
| 594 | } | ||
| 595 | gfs2_glock_dq_uninit(ghs); | ||
| 596 | if (IS_ERR(d)) | ||
| 597 | return PTR_RET(d); | ||
| 598 | return error; | ||
| 599 | } else if (error != -ENOENT) { | ||
| 600 | goto fail_gunlock; | ||
| 601 | } | ||
| 602 | |||
| 596 | arq = error = gfs2_diradd_alloc_required(dir, name); | 603 | arq = error = gfs2_diradd_alloc_required(dir, name); |
| 597 | if (error < 0) | 604 | if (error < 0) |
| 598 | goto fail_gunlock; | 605 | goto fail_gunlock; |
| @@ -686,10 +693,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, | |||
| 686 | goto fail_gunlock3; | 693 | goto fail_gunlock3; |
| 687 | 694 | ||
| 688 | mark_inode_dirty(inode); | 695 | mark_inode_dirty(inode); |
| 696 | d_instantiate(dentry, inode); | ||
| 697 | if (file) | ||
| 698 | error = finish_open(file, dentry, gfs2_open_common, opened); | ||
| 689 | gfs2_glock_dq_uninit(ghs); | 699 | gfs2_glock_dq_uninit(ghs); |
| 690 | gfs2_glock_dq_uninit(ghs + 1); | 700 | gfs2_glock_dq_uninit(ghs + 1); |
| 691 | d_instantiate(dentry, inode); | 701 | return error; |
| 692 | return 0; | ||
| 693 | 702 | ||
| 694 | fail_gunlock3: | 703 | fail_gunlock3: |
| 695 | gfs2_glock_dq_uninit(ghs + 1); | 704 | gfs2_glock_dq_uninit(ghs + 1); |
| @@ -729,36 +738,56 @@ fail: | |||
| 729 | static int gfs2_create(struct inode *dir, struct dentry *dentry, | 738 | static int gfs2_create(struct inode *dir, struct dentry *dentry, |
| 730 | umode_t mode, bool excl) | 739 | umode_t mode, bool excl) |
| 731 | { | 740 | { |
| 732 | return gfs2_create_inode(dir, dentry, S_IFREG | mode, 0, NULL, 0, excl); | 741 | return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl, NULL); |
| 733 | } | 742 | } |
| 734 | 743 | ||
| 735 | /** | 744 | /** |
| 736 | * gfs2_lookup - Look up a filename in a directory and return its inode | 745 | * __gfs2_lookup - Look up a filename in a directory and return its inode |
| 737 | * @dir: The directory inode | 746 | * @dir: The directory inode |
| 738 | * @dentry: The dentry of the new inode | 747 | * @dentry: The dentry of the new inode |
| 739 | * @nd: passed from Linux VFS, ignored by us | 748 | * @file: File to be opened |
| 749 | * @opened: atomic_open flags | ||
| 740 | * | 750 | * |
| 741 | * Called by the VFS layer. Lock dir and call gfs2_lookupi() | ||
| 742 | * | 751 | * |
| 743 | * Returns: errno | 752 | * Returns: errno |
| 744 | */ | 753 | */ |
| 745 | 754 | ||
| 746 | static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | 755 | static struct dentry *__gfs2_lookup(struct inode *dir, struct dentry *dentry, |
| 747 | unsigned int flags) | 756 | struct file *file, int *opened) |
| 748 | { | 757 | { |
| 749 | struct inode *inode = gfs2_lookupi(dir, &dentry->d_name, 0); | 758 | struct inode *inode; |
| 750 | if (inode && !IS_ERR(inode)) { | 759 | struct dentry *d; |
| 751 | struct gfs2_glock *gl = GFS2_I(inode)->i_gl; | 760 | struct gfs2_holder gh; |
| 752 | struct gfs2_holder gh; | 761 | struct gfs2_glock *gl; |
| 753 | int error; | 762 | int error; |
| 754 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | 763 | |
| 755 | if (error) { | 764 | inode = gfs2_lookupi(dir, &dentry->d_name, 0); |
| 756 | iput(inode); | 765 | if (!inode) |
| 757 | return ERR_PTR(error); | 766 | return NULL; |
| 758 | } | 767 | if (IS_ERR(inode)) |
| 759 | gfs2_glock_dq_uninit(&gh); | 768 | return ERR_CAST(inode); |
| 769 | |||
| 770 | gl = GFS2_I(inode)->i_gl; | ||
| 771 | error = gfs2_glock_nq_init(gl, LM_ST_SHARED, LM_FLAG_ANY, &gh); | ||
| 772 | if (error) { | ||
| 773 | iput(inode); | ||
| 774 | return ERR_PTR(error); | ||
| 760 | } | 775 | } |
| 761 | return d_splice_alias(inode, dentry); | 776 | |
| 777 | d = d_splice_alias(inode, dentry); | ||
| 778 | if (file && S_ISREG(inode->i_mode)) | ||
| 779 | error = finish_open(file, dentry, gfs2_open_common, opened); | ||
| 780 | |||
| 781 | gfs2_glock_dq_uninit(&gh); | ||
| 782 | if (error) | ||
| 783 | return ERR_PTR(error); | ||
| 784 | return d; | ||
| 785 | } | ||
| 786 | |||
| 787 | static struct dentry *gfs2_lookup(struct inode *dir, struct dentry *dentry, | ||
| 788 | unsigned flags) | ||
| 789 | { | ||
| 790 | return __gfs2_lookup(dir, dentry, NULL, NULL); | ||
| 762 | } | 791 | } |
| 763 | 792 | ||
| 764 | /** | 793 | /** |
| @@ -1076,7 +1105,7 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry, | |||
| 1076 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) | 1105 | if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode) - 1) |
| 1077 | return -ENAMETOOLONG; | 1106 | return -ENAMETOOLONG; |
| 1078 | 1107 | ||
| 1079 | return gfs2_create_inode(dir, dentry, S_IFLNK | S_IRWXUGO, 0, symname, size, 0); | 1108 | return gfs2_create_inode(dir, dentry, NULL, S_IFLNK | S_IRWXUGO, 0, symname, size, 0, NULL); |
| 1080 | } | 1109 | } |
| 1081 | 1110 | ||
| 1082 | /** | 1111 | /** |
| @@ -1092,7 +1121,7 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 1092 | { | 1121 | { |
| 1093 | struct gfs2_sbd *sdp = GFS2_SB(dir); | 1122 | struct gfs2_sbd *sdp = GFS2_SB(dir); |
| 1094 | unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); | 1123 | unsigned dsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode); |
| 1095 | return gfs2_create_inode(dir, dentry, S_IFDIR | mode, 0, NULL, dsize, 0); | 1124 | return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0, NULL); |
| 1096 | } | 1125 | } |
| 1097 | 1126 | ||
| 1098 | /** | 1127 | /** |
| @@ -1107,7 +1136,43 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) | |||
| 1107 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, | 1136 | static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, |
| 1108 | dev_t dev) | 1137 | dev_t dev) |
| 1109 | { | 1138 | { |
| 1110 | return gfs2_create_inode(dir, dentry, mode, dev, NULL, 0, 0); | 1139 | return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0, NULL); |
| 1140 | } | ||
| 1141 | |||
| 1142 | /** | ||
| 1143 | * gfs2_atomic_open - Atomically open a file | ||
| 1144 | * @dir: The directory | ||
| 1145 | * @dentry: The proposed new entry | ||
| 1146 | * @file: The proposed new struct file | ||
| 1147 | * @flags: open flags | ||
| 1148 | * @mode: File mode | ||
| 1149 | * @opened: Flag to say whether the file has been opened or not | ||
| 1150 | * | ||
| 1151 | * Returns: error code or 0 for success | ||
| 1152 | */ | ||
| 1153 | |||
| 1154 | static int gfs2_atomic_open(struct inode *dir, struct dentry *dentry, | ||
| 1155 | struct file *file, unsigned flags, | ||
| 1156 | umode_t mode, int *opened) | ||
| 1157 | { | ||
| 1158 | struct dentry *d; | ||
| 1159 | bool excl = !!(flags & O_EXCL); | ||
| 1160 | |||
| 1161 | d = __gfs2_lookup(dir, dentry, file, opened); | ||
| 1162 | if (IS_ERR(d)) | ||
| 1163 | return PTR_ERR(d); | ||
| 1164 | if (d == NULL) | ||
| 1165 | d = dentry; | ||
| 1166 | if (d->d_inode) { | ||
| 1167 | if (!(*opened & FILE_OPENED)) | ||
| 1168 | return finish_no_open(file, d); | ||
| 1169 | return 0; | ||
| 1170 | } | ||
| 1171 | |||
| 1172 | if (!(flags & O_CREAT)) | ||
| 1173 | return -ENOENT; | ||
| 1174 | |||
| 1175 | return gfs2_create_inode(dir, dentry, file, S_IFREG | mode, 0, NULL, 0, excl, opened); | ||
| 1111 | } | 1176 | } |
| 1112 | 1177 | ||
| 1113 | /* | 1178 | /* |
| @@ -1787,6 +1852,7 @@ const struct inode_operations gfs2_dir_iops = { | |||
| 1787 | .removexattr = gfs2_removexattr, | 1852 | .removexattr = gfs2_removexattr, |
| 1788 | .fiemap = gfs2_fiemap, | 1853 | .fiemap = gfs2_fiemap, |
| 1789 | .get_acl = gfs2_get_acl, | 1854 | .get_acl = gfs2_get_acl, |
| 1855 | .atomic_open = gfs2_atomic_open, | ||
| 1790 | }; | 1856 | }; |
| 1791 | 1857 | ||
| 1792 | const struct inode_operations gfs2_symlink_iops = { | 1858 | const struct inode_operations gfs2_symlink_iops = { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index c53c7477f6da..ba4d9492d422 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -109,6 +109,7 @@ extern int gfs2_permission(struct inode *inode, int mask); | |||
| 109 | extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); | 109 | extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr); |
| 110 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 110 | extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
| 111 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 111 | extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
| 112 | extern int gfs2_open_common(struct inode *inode, struct file *file); | ||
| 112 | 113 | ||
| 113 | extern const struct inode_operations gfs2_file_iops; | 114 | extern const struct inode_operations gfs2_file_iops; |
| 114 | extern const struct inode_operations gfs2_dir_iops; | 115 | extern const struct inode_operations gfs2_dir_iops; |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index b404f4853034..610613fb65b5 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -211,15 +211,16 @@ static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr) | |||
| 211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp) | 211 | static int gfs2_ail1_empty(struct gfs2_sbd *sdp) |
| 212 | { | 212 | { |
| 213 | struct gfs2_trans *tr, *s; | 213 | struct gfs2_trans *tr, *s; |
| 214 | int oldest_tr = 1; | ||
| 214 | int ret; | 215 | int ret; |
| 215 | 216 | ||
| 216 | spin_lock(&sdp->sd_ail_lock); | 217 | spin_lock(&sdp->sd_ail_lock); |
| 217 | list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { | 218 | list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { |
| 218 | gfs2_ail1_empty_one(sdp, tr); | 219 | gfs2_ail1_empty_one(sdp, tr); |
| 219 | if (list_empty(&tr->tr_ail1_list)) | 220 | if (list_empty(&tr->tr_ail1_list) && oldest_tr) |
| 220 | list_move(&tr->tr_list, &sdp->sd_ail2_list); | 221 | list_move(&tr->tr_list, &sdp->sd_ail2_list); |
| 221 | else | 222 | else |
| 222 | break; | 223 | oldest_tr = 0; |
| 223 | } | 224 | } |
| 224 | ret = list_empty(&sdp->sd_ail1_list); | 225 | ret = list_empty(&sdp->sd_ail1_list); |
| 225 | spin_unlock(&sdp->sd_ail_lock); | 226 | spin_unlock(&sdp->sd_ail_lock); |
| @@ -317,7 +318,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) | |||
| 317 | 318 | ||
| 318 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) | 319 | int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) |
| 319 | { | 320 | { |
| 320 | unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); | 321 | unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); |
| 321 | unsigned wanted = blks + reserved_blks; | 322 | unsigned wanted = blks + reserved_blks; |
| 322 | DEFINE_WAIT(wait); | 323 | DEFINE_WAIT(wait); |
| 323 | int did_wait = 0; | 324 | int did_wait = 0; |
| @@ -545,6 +546,76 @@ void gfs2_ordered_del_inode(struct gfs2_inode *ip) | |||
| 545 | spin_unlock(&sdp->sd_ordered_lock); | 546 | spin_unlock(&sdp->sd_ordered_lock); |
| 546 | } | 547 | } |
| 547 | 548 | ||
| 549 | void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | ||
| 550 | { | ||
| 551 | struct buffer_head *bh = bd->bd_bh; | ||
| 552 | struct gfs2_glock *gl = bd->bd_gl; | ||
| 553 | |||
| 554 | gfs2_remove_from_ail(bd); | ||
| 555 | bd->bd_bh = NULL; | ||
| 556 | bh->b_private = NULL; | ||
| 557 | bd->bd_blkno = bh->b_blocknr; | ||
| 558 | bd->bd_ops = &gfs2_revoke_lops; | ||
| 559 | sdp->sd_log_num_revoke++; | ||
| 560 | atomic_inc(&gl->gl_revokes); | ||
| 561 | set_bit(GLF_LFLUSH, &gl->gl_flags); | ||
| 562 | list_add(&bd->bd_list, &sdp->sd_log_le_revoke); | ||
| 563 | } | ||
| 564 | |||
| 565 | void gfs2_write_revokes(struct gfs2_sbd *sdp) | ||
| 566 | { | ||
| 567 | struct gfs2_trans *tr; | ||
| 568 | struct gfs2_bufdata *bd, *tmp; | ||
| 569 | int have_revokes = 0; | ||
| 570 | int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); | ||
| 571 | |||
| 572 | gfs2_ail1_empty(sdp); | ||
| 573 | spin_lock(&sdp->sd_ail_lock); | ||
| 574 | list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { | ||
| 575 | list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { | ||
| 576 | if (list_empty(&bd->bd_list)) { | ||
| 577 | have_revokes = 1; | ||
| 578 | goto done; | ||
| 579 | } | ||
| 580 | } | ||
| 581 | } | ||
| 582 | done: | ||
| 583 | spin_unlock(&sdp->sd_ail_lock); | ||
| 584 | if (have_revokes == 0) | ||
| 585 | return; | ||
| 586 | while (sdp->sd_log_num_revoke > max_revokes) | ||
| 587 | max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
| 588 | max_revokes -= sdp->sd_log_num_revoke; | ||
| 589 | if (!sdp->sd_log_num_revoke) { | ||
| 590 | atomic_dec(&sdp->sd_log_blks_free); | ||
| 591 | /* If no blocks have been reserved, we need to also | ||
| 592 | * reserve a block for the header */ | ||
| 593 | if (!sdp->sd_log_blks_reserved) | ||
| 594 | atomic_dec(&sdp->sd_log_blks_free); | ||
| 595 | } | ||
| 596 | gfs2_log_lock(sdp); | ||
| 597 | spin_lock(&sdp->sd_ail_lock); | ||
| 598 | list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { | ||
| 599 | list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { | ||
| 600 | if (max_revokes == 0) | ||
| 601 | goto out_of_blocks; | ||
| 602 | if (!list_empty(&bd->bd_list)) | ||
| 603 | continue; | ||
| 604 | gfs2_add_revoke(sdp, bd); | ||
| 605 | max_revokes--; | ||
| 606 | } | ||
| 607 | } | ||
| 608 | out_of_blocks: | ||
| 609 | spin_unlock(&sdp->sd_ail_lock); | ||
| 610 | gfs2_log_unlock(sdp); | ||
| 611 | |||
| 612 | if (!sdp->sd_log_num_revoke) { | ||
| 613 | atomic_inc(&sdp->sd_log_blks_free); | ||
| 614 | if (!sdp->sd_log_blks_reserved) | ||
| 615 | atomic_inc(&sdp->sd_log_blks_free); | ||
| 616 | } | ||
| 617 | } | ||
| 618 | |||
| 548 | /** | 619 | /** |
| 549 | * log_write_header - Get and initialize a journal header buffer | 620 | * log_write_header - Get and initialize a journal header buffer |
| 550 | * @sdp: The GFS2 superblock | 621 | * @sdp: The GFS2 superblock |
| @@ -562,7 +633,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags) | |||
| 562 | lh = page_address(page); | 633 | lh = page_address(page); |
| 563 | clear_page(lh); | 634 | clear_page(lh); |
| 564 | 635 | ||
| 565 | gfs2_ail1_empty(sdp); | ||
| 566 | tail = current_tail(sdp); | 636 | tail = current_tail(sdp); |
| 567 | 637 | ||
| 568 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); | 638 | lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); |
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h index 3566f35915e0..37216634f0aa 100644 --- a/fs/gfs2/log.h +++ b/fs/gfs2/log.h | |||
| @@ -72,5 +72,7 @@ extern void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) | |||
| 72 | extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); | 72 | extern void gfs2_log_shutdown(struct gfs2_sbd *sdp); |
| 73 | extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); | 73 | extern void gfs2_meta_syncfs(struct gfs2_sbd *sdp); |
| 74 | extern int gfs2_logd(void *data); | 74 | extern int gfs2_logd(void *data); |
| 75 | extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd); | ||
| 76 | extern void gfs2_write_revokes(struct gfs2_sbd *sdp); | ||
| 75 | 77 | ||
| 76 | #endif /* __LOG_DOT_H__ */ | 78 | #endif /* __LOG_DOT_H__ */ |
diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 6c33d7b6e0c4..17c5b5d7dc88 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/gfs2_ondisk.h> | 16 | #include <linux/gfs2_ondisk.h> |
| 17 | #include <linux/bio.h> | 17 | #include <linux/bio.h> |
| 18 | #include <linux/fs.h> | 18 | #include <linux/fs.h> |
| 19 | #include <linux/list_sort.h> | ||
| 19 | 20 | ||
| 20 | #include "gfs2.h" | 21 | #include "gfs2.h" |
| 21 | #include "incore.h" | 22 | #include "incore.h" |
| @@ -401,6 +402,20 @@ static void gfs2_check_magic(struct buffer_head *bh) | |||
| 401 | kunmap_atomic(kaddr); | 402 | kunmap_atomic(kaddr); |
| 402 | } | 403 | } |
| 403 | 404 | ||
| 405 | static int blocknr_cmp(void *priv, struct list_head *a, struct list_head *b) | ||
| 406 | { | ||
| 407 | struct gfs2_bufdata *bda, *bdb; | ||
| 408 | |||
| 409 | bda = list_entry(a, struct gfs2_bufdata, bd_list); | ||
| 410 | bdb = list_entry(b, struct gfs2_bufdata, bd_list); | ||
| 411 | |||
| 412 | if (bda->bd_bh->b_blocknr < bdb->bd_bh->b_blocknr) | ||
| 413 | return -1; | ||
| 414 | if (bda->bd_bh->b_blocknr > bdb->bd_bh->b_blocknr) | ||
| 415 | return 1; | ||
| 416 | return 0; | ||
| 417 | } | ||
| 418 | |||
| 404 | static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | 419 | static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, |
| 405 | unsigned int total, struct list_head *blist, | 420 | unsigned int total, struct list_head *blist, |
| 406 | bool is_databuf) | 421 | bool is_databuf) |
| @@ -413,6 +428,7 @@ static void gfs2_before_commit(struct gfs2_sbd *sdp, unsigned int limit, | |||
| 413 | __be64 *ptr; | 428 | __be64 *ptr; |
| 414 | 429 | ||
| 415 | gfs2_log_lock(sdp); | 430 | gfs2_log_lock(sdp); |
| 431 | list_sort(NULL, blist, blocknr_cmp); | ||
| 416 | bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); | 432 | bd1 = bd2 = list_prepare_entry(bd1, blist, bd_list); |
| 417 | while(total) { | 433 | while(total) { |
| 418 | num = total; | 434 | num = total; |
| @@ -590,6 +606,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) | |||
| 590 | struct page *page; | 606 | struct page *page; |
| 591 | unsigned int length; | 607 | unsigned int length; |
| 592 | 608 | ||
| 609 | gfs2_write_revokes(sdp); | ||
| 593 | if (!sdp->sd_log_num_revoke) | 610 | if (!sdp->sd_log_num_revoke) |
| 594 | return; | 611 | return; |
| 595 | 612 | ||
| @@ -836,10 +853,6 @@ const struct gfs2_log_operations gfs2_revoke_lops = { | |||
| 836 | .lo_name = "revoke", | 853 | .lo_name = "revoke", |
| 837 | }; | 854 | }; |
| 838 | 855 | ||
| 839 | const struct gfs2_log_operations gfs2_rg_lops = { | ||
| 840 | .lo_name = "rg", | ||
| 841 | }; | ||
| 842 | |||
| 843 | const struct gfs2_log_operations gfs2_databuf_lops = { | 856 | const struct gfs2_log_operations gfs2_databuf_lops = { |
| 844 | .lo_before_commit = databuf_lo_before_commit, | 857 | .lo_before_commit = databuf_lo_before_commit, |
| 845 | .lo_after_commit = databuf_lo_after_commit, | 858 | .lo_after_commit = databuf_lo_after_commit, |
| @@ -851,7 +864,6 @@ const struct gfs2_log_operations gfs2_databuf_lops = { | |||
| 851 | const struct gfs2_log_operations *gfs2_log_ops[] = { | 864 | const struct gfs2_log_operations *gfs2_log_ops[] = { |
| 852 | &gfs2_databuf_lops, | 865 | &gfs2_databuf_lops, |
| 853 | &gfs2_buf_lops, | 866 | &gfs2_buf_lops, |
| 854 | &gfs2_rg_lops, | ||
| 855 | &gfs2_revoke_lops, | 867 | &gfs2_revoke_lops, |
| 856 | NULL, | 868 | NULL, |
| 857 | }; | 869 | }; |
diff --git a/fs/gfs2/lops.h b/fs/gfs2/lops.h index 87e062e05c92..9ca2e6438419 100644 --- a/fs/gfs2/lops.h +++ b/fs/gfs2/lops.h | |||
| @@ -23,7 +23,6 @@ | |||
| 23 | extern const struct gfs2_log_operations gfs2_glock_lops; | 23 | extern const struct gfs2_log_operations gfs2_glock_lops; |
| 24 | extern const struct gfs2_log_operations gfs2_buf_lops; | 24 | extern const struct gfs2_log_operations gfs2_buf_lops; |
| 25 | extern const struct gfs2_log_operations gfs2_revoke_lops; | 25 | extern const struct gfs2_log_operations gfs2_revoke_lops; |
| 26 | extern const struct gfs2_log_operations gfs2_rg_lops; | ||
| 27 | extern const struct gfs2_log_operations gfs2_databuf_lops; | 26 | extern const struct gfs2_log_operations gfs2_databuf_lops; |
| 28 | 27 | ||
| 29 | extern const struct gfs2_log_operations *gfs2_log_ops[]; | 28 | extern const struct gfs2_log_operations *gfs2_log_ops[]; |
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 1a89afb68472..0da390686c08 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c | |||
| @@ -296,10 +296,6 @@ void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int | |||
| 296 | if (bd) { | 296 | if (bd) { |
| 297 | spin_lock(&sdp->sd_ail_lock); | 297 | spin_lock(&sdp->sd_ail_lock); |
| 298 | if (bd->bd_tr) { | 298 | if (bd->bd_tr) { |
| 299 | gfs2_remove_from_ail(bd); | ||
| 300 | bh->b_private = NULL; | ||
| 301 | bd->bd_bh = NULL; | ||
| 302 | bd->bd_blkno = bh->b_blocknr; | ||
| 303 | gfs2_trans_add_revoke(sdp, bd); | 299 | gfs2_trans_add_revoke(sdp, bd); |
| 304 | } | 300 | } |
| 305 | spin_unlock(&sdp->sd_ail_lock); | 301 | spin_unlock(&sdp->sd_ail_lock); |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 60ede2a0f43f..0262c190b6f9 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -916,16 +916,16 @@ static int init_threads(struct gfs2_sbd *sdp, int undo) | |||
| 916 | goto fail_quotad; | 916 | goto fail_quotad; |
| 917 | 917 | ||
| 918 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); | 918 | p = kthread_run(gfs2_logd, sdp, "gfs2_logd"); |
| 919 | error = IS_ERR(p); | 919 | if (IS_ERR(p)) { |
| 920 | if (error) { | 920 | error = PTR_ERR(p); |
| 921 | fs_err(sdp, "can't start logd thread: %d\n", error); | 921 | fs_err(sdp, "can't start logd thread: %d\n", error); |
| 922 | return error; | 922 | return error; |
| 923 | } | 923 | } |
| 924 | sdp->sd_logd_process = p; | 924 | sdp->sd_logd_process = p; |
| 925 | 925 | ||
| 926 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); | 926 | p = kthread_run(gfs2_quotad, sdp, "gfs2_quotad"); |
| 927 | error = IS_ERR(p); | 927 | if (IS_ERR(p)) { |
| 928 | if (error) { | 928 | error = PTR_ERR(p); |
| 929 | fs_err(sdp, "can't start quotad thread: %d\n", error); | 929 | fs_err(sdp, "can't start quotad thread: %d\n", error); |
| 930 | goto fail; | 930 | goto fail; |
| 931 | } | 931 | } |
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index c253b13722e8..3768c2f40e43 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c | |||
| @@ -1154,11 +1154,6 @@ int gfs2_quota_sync(struct super_block *sb, int type) | |||
| 1154 | return error; | 1154 | return error; |
| 1155 | } | 1155 | } |
| 1156 | 1156 | ||
| 1157 | static int gfs2_quota_sync_timeo(struct super_block *sb, int type) | ||
| 1158 | { | ||
| 1159 | return gfs2_quota_sync(sb, type); | ||
| 1160 | } | ||
| 1161 | |||
| 1162 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) | 1157 | int gfs2_quota_refresh(struct gfs2_sbd *sdp, struct kqid qid) |
| 1163 | { | 1158 | { |
| 1164 | struct gfs2_quota_data *qd; | 1159 | struct gfs2_quota_data *qd; |
| @@ -1414,7 +1409,7 @@ int gfs2_quotad(void *data) | |||
| 1414 | &tune->gt_statfs_quantum); | 1409 | &tune->gt_statfs_quantum); |
| 1415 | 1410 | ||
| 1416 | /* Update quota file */ | 1411 | /* Update quota file */ |
| 1417 | quotad_check_timeo(sdp, "sync", gfs2_quota_sync_timeo, t, | 1412 | quotad_check_timeo(sdp, "sync", gfs2_quota_sync, t, |
| 1418 | "ad_timeo, &tune->gt_quota_quantum); | 1413 | "ad_timeo, &tune->gt_quota_quantum); |
| 1419 | 1414 | ||
| 1420 | /* Check for & recover partially truncated inodes */ | 1415 | /* Check for & recover partially truncated inodes */ |
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 9809156e3d04..69317435faa7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c | |||
| @@ -1288,13 +1288,15 @@ int gfs2_fitrim(struct file *filp, void __user *argp) | |||
| 1288 | minlen = max_t(u64, r.minlen, | 1288 | minlen = max_t(u64, r.minlen, |
| 1289 | q->limits.discard_granularity) >> bs_shift; | 1289 | q->limits.discard_granularity) >> bs_shift; |
| 1290 | 1290 | ||
| 1291 | if (end <= start || minlen > sdp->sd_max_rg_data) | ||
| 1292 | return -EINVAL; | ||
| 1293 | |||
| 1291 | rgd = gfs2_blk2rgrpd(sdp, start, 0); | 1294 | rgd = gfs2_blk2rgrpd(sdp, start, 0); |
| 1292 | rgd_end = gfs2_blk2rgrpd(sdp, end - 1, 0); | 1295 | rgd_end = gfs2_blk2rgrpd(sdp, end, 0); |
| 1293 | 1296 | ||
| 1294 | if (end <= start || | 1297 | if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end)) |
| 1295 | minlen > sdp->sd_max_rg_data || | 1298 | && (start > rgd_end->rd_data0 + rgd_end->rd_data)) |
| 1296 | start > rgd_end->rd_data0 + rgd_end->rd_data) | 1299 | return -EINVAL; /* start is beyond the end of the fs */ |
| 1297 | return -EINVAL; | ||
| 1298 | 1300 | ||
| 1299 | while (1) { | 1301 | while (1) { |
| 1300 | 1302 | ||
| @@ -1336,7 +1338,7 @@ int gfs2_fitrim(struct file *filp, void __user *argp) | |||
| 1336 | } | 1338 | } |
| 1337 | 1339 | ||
| 1338 | out: | 1340 | out: |
| 1339 | r.len = trimmed << 9; | 1341 | r.len = trimmed << bs_shift; |
| 1340 | if (copy_to_user(argp, &r, sizeof(r))) | 1342 | if (copy_to_user(argp, &r, sizeof(r))) |
| 1341 | return -EFAULT; | 1343 | return -EFAULT; |
| 1342 | 1344 | ||
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 7374907742a8..2b20d7046bf3 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c | |||
| @@ -270,19 +270,12 @@ void gfs2_trans_add_meta(struct gfs2_glock *gl, struct buffer_head *bh) | |||
| 270 | 270 | ||
| 271 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) | 271 | void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) |
| 272 | { | 272 | { |
| 273 | struct gfs2_glock *gl = bd->bd_gl; | ||
| 274 | struct gfs2_trans *tr = current->journal_info; | 273 | struct gfs2_trans *tr = current->journal_info; |
| 275 | 274 | ||
| 276 | BUG_ON(!list_empty(&bd->bd_list)); | 275 | BUG_ON(!list_empty(&bd->bd_list)); |
| 277 | BUG_ON(!list_empty(&bd->bd_ail_st_list)); | 276 | gfs2_add_revoke(sdp, bd); |
| 278 | BUG_ON(!list_empty(&bd->bd_ail_gl_list)); | ||
| 279 | bd->bd_ops = &gfs2_revoke_lops; | ||
| 280 | tr->tr_touched = 1; | 277 | tr->tr_touched = 1; |
| 281 | tr->tr_num_revoke++; | 278 | tr->tr_num_revoke++; |
| 282 | sdp->sd_log_num_revoke++; | ||
| 283 | atomic_inc(&gl->gl_revokes); | ||
| 284 | set_bit(GLF_LFLUSH, &gl->gl_flags); | ||
| 285 | list_add(&bd->bd_list, &sdp->sd_log_le_revoke); | ||
| 286 | } | 279 | } |
| 287 | 280 | ||
| 288 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) | 281 | void gfs2_trans_add_unrevoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len) |
diff --git a/fs/hfs/dir.c b/fs/hfs/dir.c index e0101b6fb0d7..145566851e7a 100644 --- a/fs/hfs/dir.c +++ b/fs/hfs/dir.c | |||
| @@ -51,9 +51,9 @@ done: | |||
| 51 | /* | 51 | /* |
| 52 | * hfs_readdir | 52 | * hfs_readdir |
| 53 | */ | 53 | */ |
| 54 | static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 54 | static int hfs_readdir(struct file *file, struct dir_context *ctx) |
| 55 | { | 55 | { |
| 56 | struct inode *inode = file_inode(filp); | 56 | struct inode *inode = file_inode(file); |
| 57 | struct super_block *sb = inode->i_sb; | 57 | struct super_block *sb = inode->i_sb; |
| 58 | int len, err; | 58 | int len, err; |
| 59 | char strbuf[HFS_MAX_NAMELEN]; | 59 | char strbuf[HFS_MAX_NAMELEN]; |
| @@ -62,7 +62,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 62 | struct hfs_readdir_data *rd; | 62 | struct hfs_readdir_data *rd; |
| 63 | u16 type; | 63 | u16 type; |
| 64 | 64 | ||
| 65 | if (filp->f_pos >= inode->i_size) | 65 | if (ctx->pos >= inode->i_size) |
| 66 | return 0; | 66 | return 0; |
| 67 | 67 | ||
| 68 | err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); | 68 | err = hfs_find_init(HFS_SB(sb)->cat_tree, &fd); |
| @@ -73,14 +73,13 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 73 | if (err) | 73 | if (err) |
| 74 | goto out; | 74 | goto out; |
| 75 | 75 | ||
| 76 | switch ((u32)filp->f_pos) { | 76 | if (ctx->pos == 0) { |
| 77 | case 0: | ||
| 78 | /* This is completely artificial... */ | 77 | /* This is completely artificial... */ |
| 79 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 78 | if (!dir_emit_dot(file, ctx)) |
| 80 | goto out; | 79 | goto out; |
| 81 | filp->f_pos++; | 80 | ctx->pos = 1; |
| 82 | /* fall through */ | 81 | } |
| 83 | case 1: | 82 | if (ctx->pos == 1) { |
| 84 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { | 83 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { |
| 85 | err = -EIO; | 84 | err = -EIO; |
| 86 | goto out; | 85 | goto out; |
| @@ -97,18 +96,16 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 97 | // err = -EIO; | 96 | // err = -EIO; |
| 98 | // goto out; | 97 | // goto out; |
| 99 | //} | 98 | //} |
| 100 | if (filldir(dirent, "..", 2, 1, | 99 | if (!dir_emit(ctx, "..", 2, |
| 101 | be32_to_cpu(entry.thread.ParID), DT_DIR)) | 100 | be32_to_cpu(entry.thread.ParID), DT_DIR)) |
| 102 | goto out; | 101 | goto out; |
| 103 | filp->f_pos++; | 102 | ctx->pos = 2; |
| 104 | /* fall through */ | ||
| 105 | default: | ||
| 106 | if (filp->f_pos >= inode->i_size) | ||
| 107 | goto out; | ||
| 108 | err = hfs_brec_goto(&fd, filp->f_pos - 1); | ||
| 109 | if (err) | ||
| 110 | goto out; | ||
| 111 | } | 103 | } |
| 104 | if (ctx->pos >= inode->i_size) | ||
| 105 | goto out; | ||
| 106 | err = hfs_brec_goto(&fd, ctx->pos - 1); | ||
| 107 | if (err) | ||
| 108 | goto out; | ||
| 112 | 109 | ||
| 113 | for (;;) { | 110 | for (;;) { |
| 114 | if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { | 111 | if (be32_to_cpu(fd.key->cat.ParID) != inode->i_ino) { |
| @@ -131,7 +128,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 131 | err = -EIO; | 128 | err = -EIO; |
| 132 | goto out; | 129 | goto out; |
| 133 | } | 130 | } |
| 134 | if (filldir(dirent, strbuf, len, filp->f_pos, | 131 | if (!dir_emit(ctx, strbuf, len, |
| 135 | be32_to_cpu(entry.dir.DirID), DT_DIR)) | 132 | be32_to_cpu(entry.dir.DirID), DT_DIR)) |
| 136 | break; | 133 | break; |
| 137 | } else if (type == HFS_CDR_FIL) { | 134 | } else if (type == HFS_CDR_FIL) { |
| @@ -140,7 +137,7 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 140 | err = -EIO; | 137 | err = -EIO; |
| 141 | goto out; | 138 | goto out; |
| 142 | } | 139 | } |
| 143 | if (filldir(dirent, strbuf, len, filp->f_pos, | 140 | if (!dir_emit(ctx, strbuf, len, |
| 144 | be32_to_cpu(entry.file.FlNum), DT_REG)) | 141 | be32_to_cpu(entry.file.FlNum), DT_REG)) |
| 145 | break; | 142 | break; |
| 146 | } else { | 143 | } else { |
| @@ -148,22 +145,22 @@ static int hfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 148 | err = -EIO; | 145 | err = -EIO; |
| 149 | goto out; | 146 | goto out; |
| 150 | } | 147 | } |
| 151 | filp->f_pos++; | 148 | ctx->pos++; |
| 152 | if (filp->f_pos >= inode->i_size) | 149 | if (ctx->pos >= inode->i_size) |
| 153 | goto out; | 150 | goto out; |
| 154 | err = hfs_brec_goto(&fd, 1); | 151 | err = hfs_brec_goto(&fd, 1); |
| 155 | if (err) | 152 | if (err) |
| 156 | goto out; | 153 | goto out; |
| 157 | } | 154 | } |
| 158 | rd = filp->private_data; | 155 | rd = file->private_data; |
| 159 | if (!rd) { | 156 | if (!rd) { |
| 160 | rd = kmalloc(sizeof(struct hfs_readdir_data), GFP_KERNEL); | 157 | rd = kmalloc(sizeof(struct hfs_readdir_data), GFP_KERNEL); |
| 161 | if (!rd) { | 158 | if (!rd) { |
| 162 | err = -ENOMEM; | 159 | err = -ENOMEM; |
| 163 | goto out; | 160 | goto out; |
| 164 | } | 161 | } |
| 165 | filp->private_data = rd; | 162 | file->private_data = rd; |
| 166 | rd->file = filp; | 163 | rd->file = file; |
| 167 | list_add(&rd->list, &HFS_I(inode)->open_dir_list); | 164 | list_add(&rd->list, &HFS_I(inode)->open_dir_list); |
| 168 | } | 165 | } |
| 169 | memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); | 166 | memcpy(&rd->key, &fd.key, sizeof(struct hfs_cat_key)); |
| @@ -306,7 +303,7 @@ static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 306 | 303 | ||
| 307 | const struct file_operations hfs_dir_operations = { | 304 | const struct file_operations hfs_dir_operations = { |
| 308 | .read = generic_read_dir, | 305 | .read = generic_read_dir, |
| 309 | .readdir = hfs_readdir, | 306 | .iterate = hfs_readdir, |
| 310 | .llseek = generic_file_llseek, | 307 | .llseek = generic_file_llseek, |
| 311 | .release = hfs_dir_release, | 308 | .release = hfs_dir_release, |
| 312 | }; | 309 | }; |
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index a37ac934732f..d8ce4bd17fc5 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c | |||
| @@ -121,9 +121,9 @@ fail: | |||
| 121 | return ERR_PTR(err); | 121 | return ERR_PTR(err); |
| 122 | } | 122 | } |
| 123 | 123 | ||
| 124 | static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | 124 | static int hfsplus_readdir(struct file *file, struct dir_context *ctx) |
| 125 | { | 125 | { |
| 126 | struct inode *inode = file_inode(filp); | 126 | struct inode *inode = file_inode(file); |
| 127 | struct super_block *sb = inode->i_sb; | 127 | struct super_block *sb = inode->i_sb; |
| 128 | int len, err; | 128 | int len, err; |
| 129 | char strbuf[HFSPLUS_MAX_STRLEN + 1]; | 129 | char strbuf[HFSPLUS_MAX_STRLEN + 1]; |
| @@ -132,7 +132,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 132 | struct hfsplus_readdir_data *rd; | 132 | struct hfsplus_readdir_data *rd; |
| 133 | u16 type; | 133 | u16 type; |
| 134 | 134 | ||
| 135 | if (filp->f_pos >= inode->i_size) | 135 | if (file->f_pos >= inode->i_size) |
| 136 | return 0; | 136 | return 0; |
| 137 | 137 | ||
| 138 | err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); | 138 | err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); |
| @@ -143,14 +143,13 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 143 | if (err) | 143 | if (err) |
| 144 | goto out; | 144 | goto out; |
| 145 | 145 | ||
| 146 | switch ((u32)filp->f_pos) { | 146 | if (ctx->pos == 0) { |
| 147 | case 0: | ||
| 148 | /* This is completely artificial... */ | 147 | /* This is completely artificial... */ |
| 149 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 148 | if (!dir_emit_dot(file, ctx)) |
| 150 | goto out; | 149 | goto out; |
| 151 | filp->f_pos++; | 150 | ctx->pos = 1; |
| 152 | /* fall through */ | 151 | } |
| 153 | case 1: | 152 | if (ctx->pos == 1) { |
| 154 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { | 153 | if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { |
| 155 | err = -EIO; | 154 | err = -EIO; |
| 156 | goto out; | 155 | goto out; |
| @@ -168,19 +167,16 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 168 | err = -EIO; | 167 | err = -EIO; |
| 169 | goto out; | 168 | goto out; |
| 170 | } | 169 | } |
| 171 | if (filldir(dirent, "..", 2, 1, | 170 | if (!dir_emit(ctx, "..", 2, |
| 172 | be32_to_cpu(entry.thread.parentID), DT_DIR)) | 171 | be32_to_cpu(entry.thread.parentID), DT_DIR)) |
| 173 | goto out; | 172 | goto out; |
| 174 | filp->f_pos++; | 173 | ctx->pos = 2; |
| 175 | /* fall through */ | ||
| 176 | default: | ||
| 177 | if (filp->f_pos >= inode->i_size) | ||
| 178 | goto out; | ||
| 179 | err = hfs_brec_goto(&fd, filp->f_pos - 1); | ||
| 180 | if (err) | ||
| 181 | goto out; | ||
| 182 | } | 174 | } |
| 183 | 175 | if (ctx->pos >= inode->i_size) | |
| 176 | goto out; | ||
| 177 | err = hfs_brec_goto(&fd, ctx->pos - 1); | ||
| 178 | if (err) | ||
| 179 | goto out; | ||
| 184 | for (;;) { | 180 | for (;;) { |
| 185 | if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { | 181 | if (be32_to_cpu(fd.key->cat.parent) != inode->i_ino) { |
| 186 | pr_err("walked past end of dir\n"); | 182 | pr_err("walked past end of dir\n"); |
| @@ -211,7 +207,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 211 | HFSPLUS_SB(sb)->hidden_dir->i_ino == | 207 | HFSPLUS_SB(sb)->hidden_dir->i_ino == |
| 212 | be32_to_cpu(entry.folder.id)) | 208 | be32_to_cpu(entry.folder.id)) |
| 213 | goto next; | 209 | goto next; |
| 214 | if (filldir(dirent, strbuf, len, filp->f_pos, | 210 | if (!dir_emit(ctx, strbuf, len, |
| 215 | be32_to_cpu(entry.folder.id), DT_DIR)) | 211 | be32_to_cpu(entry.folder.id), DT_DIR)) |
| 216 | break; | 212 | break; |
| 217 | } else if (type == HFSPLUS_FILE) { | 213 | } else if (type == HFSPLUS_FILE) { |
| @@ -220,7 +216,7 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 220 | err = -EIO; | 216 | err = -EIO; |
| 221 | goto out; | 217 | goto out; |
| 222 | } | 218 | } |
| 223 | if (filldir(dirent, strbuf, len, filp->f_pos, | 219 | if (!dir_emit(ctx, strbuf, len, |
| 224 | be32_to_cpu(entry.file.id), DT_REG)) | 220 | be32_to_cpu(entry.file.id), DT_REG)) |
| 225 | break; | 221 | break; |
| 226 | } else { | 222 | } else { |
| @@ -229,22 +225,22 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 229 | goto out; | 225 | goto out; |
| 230 | } | 226 | } |
| 231 | next: | 227 | next: |
| 232 | filp->f_pos++; | 228 | ctx->pos++; |
| 233 | if (filp->f_pos >= inode->i_size) | 229 | if (ctx->pos >= inode->i_size) |
| 234 | goto out; | 230 | goto out; |
| 235 | err = hfs_brec_goto(&fd, 1); | 231 | err = hfs_brec_goto(&fd, 1); |
| 236 | if (err) | 232 | if (err) |
| 237 | goto out; | 233 | goto out; |
| 238 | } | 234 | } |
| 239 | rd = filp->private_data; | 235 | rd = file->private_data; |
| 240 | if (!rd) { | 236 | if (!rd) { |
| 241 | rd = kmalloc(sizeof(struct hfsplus_readdir_data), GFP_KERNEL); | 237 | rd = kmalloc(sizeof(struct hfsplus_readdir_data), GFP_KERNEL); |
| 242 | if (!rd) { | 238 | if (!rd) { |
| 243 | err = -ENOMEM; | 239 | err = -ENOMEM; |
| 244 | goto out; | 240 | goto out; |
| 245 | } | 241 | } |
| 246 | filp->private_data = rd; | 242 | file->private_data = rd; |
| 247 | rd->file = filp; | 243 | rd->file = file; |
| 248 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); | 244 | list_add(&rd->list, &HFSPLUS_I(inode)->open_dir_list); |
| 249 | } | 245 | } |
| 250 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); | 246 | memcpy(&rd->key, fd.key, sizeof(struct hfsplus_cat_key)); |
| @@ -538,7 +534,7 @@ const struct inode_operations hfsplus_dir_inode_operations = { | |||
| 538 | const struct file_operations hfsplus_dir_operations = { | 534 | const struct file_operations hfsplus_dir_operations = { |
| 539 | .fsync = hfsplus_file_fsync, | 535 | .fsync = hfsplus_file_fsync, |
| 540 | .read = generic_read_dir, | 536 | .read = generic_read_dir, |
| 541 | .readdir = hfsplus_readdir, | 537 | .iterate = hfsplus_readdir, |
| 542 | .unlocked_ioctl = hfsplus_ioctl, | 538 | .unlocked_ioctl = hfsplus_ioctl, |
| 543 | .llseek = generic_file_llseek, | 539 | .llseek = generic_file_llseek, |
| 544 | .release = hfsplus_dir_release, | 540 | .release = hfsplus_dir_release, |
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 32f35f187989..cddb05217512 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c | |||
| @@ -277,7 +277,7 @@ static const struct super_operations hostfs_sbops = { | |||
| 277 | .show_options = hostfs_show_options, | 277 | .show_options = hostfs_show_options, |
| 278 | }; | 278 | }; |
| 279 | 279 | ||
| 280 | int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | 280 | int hostfs_readdir(struct file *file, struct dir_context *ctx) |
| 281 | { | 281 | { |
| 282 | void *dir; | 282 | void *dir; |
| 283 | char *name; | 283 | char *name; |
| @@ -292,12 +292,11 @@ int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) | |||
| 292 | __putname(name); | 292 | __putname(name); |
| 293 | if (dir == NULL) | 293 | if (dir == NULL) |
| 294 | return -error; | 294 | return -error; |
| 295 | next = file->f_pos; | 295 | next = ctx->pos; |
| 296 | while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { | 296 | while ((name = read_dir(dir, &next, &ino, &len, &type)) != NULL) { |
| 297 | error = (*filldir)(ent, name, len, file->f_pos, | 297 | if (!dir_emit(ctx, name, len, ino, type)) |
| 298 | ino, type); | 298 | break; |
| 299 | if (error) break; | 299 | ctx->pos = next; |
| 300 | file->f_pos = next; | ||
| 301 | } | 300 | } |
| 302 | close_dir(dir); | 301 | close_dir(dir); |
| 303 | return 0; | 302 | return 0; |
| @@ -393,7 +392,7 @@ static const struct file_operations hostfs_file_fops = { | |||
| 393 | 392 | ||
| 394 | static const struct file_operations hostfs_dir_fops = { | 393 | static const struct file_operations hostfs_dir_fops = { |
| 395 | .llseek = generic_file_llseek, | 394 | .llseek = generic_file_llseek, |
| 396 | .readdir = hostfs_readdir, | 395 | .iterate = hostfs_readdir, |
| 397 | .read = generic_read_dir, | 396 | .read = generic_read_dir, |
| 398 | }; | 397 | }; |
| 399 | 398 | ||
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c index 834ac13c04b7..292b1acb9b81 100644 --- a/fs/hpfs/dir.c +++ b/fs/hpfs/dir.c | |||
| @@ -57,14 +57,14 @@ fail: | |||
| 57 | return -ESPIPE; | 57 | return -ESPIPE; |
| 58 | } | 58 | } |
| 59 | 59 | ||
| 60 | static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 60 | static int hpfs_readdir(struct file *file, struct dir_context *ctx) |
| 61 | { | 61 | { |
| 62 | struct inode *inode = file_inode(filp); | 62 | struct inode *inode = file_inode(file); |
| 63 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); | 63 | struct hpfs_inode_info *hpfs_inode = hpfs_i(inode); |
| 64 | struct quad_buffer_head qbh; | 64 | struct quad_buffer_head qbh; |
| 65 | struct hpfs_dirent *de; | 65 | struct hpfs_dirent *de; |
| 66 | int lc; | 66 | int lc; |
| 67 | long old_pos; | 67 | loff_t next_pos; |
| 68 | unsigned char *tempname; | 68 | unsigned char *tempname; |
| 69 | int c1, c2 = 0; | 69 | int c1, c2 = 0; |
| 70 | int ret = 0; | 70 | int ret = 0; |
| @@ -105,11 +105,11 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 105 | } | 105 | } |
| 106 | } | 106 | } |
| 107 | lc = hpfs_sb(inode->i_sb)->sb_lowercase; | 107 | lc = hpfs_sb(inode->i_sb)->sb_lowercase; |
| 108 | if (filp->f_pos == 12) { /* diff -r requires this (note, that diff -r */ | 108 | if (ctx->pos == 12) { /* diff -r requires this (note, that diff -r */ |
| 109 | filp->f_pos = 13; /* also fails on msdos filesystem in 2.0) */ | 109 | ctx->pos = 13; /* also fails on msdos filesystem in 2.0) */ |
| 110 | goto out; | 110 | goto out; |
| 111 | } | 111 | } |
| 112 | if (filp->f_pos == 13) { | 112 | if (ctx->pos == 13) { |
| 113 | ret = -ENOENT; | 113 | ret = -ENOENT; |
| 114 | goto out; | 114 | goto out; |
| 115 | } | 115 | } |
| @@ -120,33 +120,34 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 120 | accepted by filldir, but what can I do? | 120 | accepted by filldir, but what can I do? |
| 121 | maybe killall -9 ls helps */ | 121 | maybe killall -9 ls helps */ |
| 122 | if (hpfs_sb(inode->i_sb)->sb_chk) | 122 | if (hpfs_sb(inode->i_sb)->sb_chk) |
| 123 | if (hpfs_stop_cycles(inode->i_sb, filp->f_pos, &c1, &c2, "hpfs_readdir")) { | 123 | if (hpfs_stop_cycles(inode->i_sb, ctx->pos, &c1, &c2, "hpfs_readdir")) { |
| 124 | ret = -EFSERROR; | 124 | ret = -EFSERROR; |
| 125 | goto out; | 125 | goto out; |
| 126 | } | 126 | } |
| 127 | if (filp->f_pos == 12) | 127 | if (ctx->pos == 12) |
| 128 | goto out; | 128 | goto out; |
| 129 | if (filp->f_pos == 3 || filp->f_pos == 4 || filp->f_pos == 5) { | 129 | if (ctx->pos == 3 || ctx->pos == 4 || ctx->pos == 5) { |
| 130 | printk("HPFS: warning: pos==%d\n",(int)filp->f_pos); | 130 | printk("HPFS: warning: pos==%d\n",(int)ctx->pos); |
| 131 | goto out; | 131 | goto out; |
| 132 | } | 132 | } |
| 133 | if (filp->f_pos == 0) { | 133 | if (ctx->pos == 0) { |
| 134 | if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) | 134 | if (!dir_emit_dot(file, ctx)) |
| 135 | goto out; | 135 | goto out; |
| 136 | filp->f_pos = 11; | 136 | ctx->pos = 11; |
| 137 | } | 137 | } |
| 138 | if (filp->f_pos == 11) { | 138 | if (ctx->pos == 11) { |
| 139 | if (filldir(dirent, "..", 2, filp->f_pos, hpfs_inode->i_parent_dir, DT_DIR) < 0) | 139 | if (!dir_emit(ctx, "..", 2, hpfs_inode->i_parent_dir, DT_DIR)) |
| 140 | goto out; | 140 | goto out; |
| 141 | filp->f_pos = 1; | 141 | ctx->pos = 1; |
| 142 | } | 142 | } |
| 143 | if (filp->f_pos == 1) { | 143 | if (ctx->pos == 1) { |
| 144 | filp->f_pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; | 144 | ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; |
| 145 | hpfs_add_pos(inode, &filp->f_pos); | 145 | hpfs_add_pos(inode, &file->f_pos); |
| 146 | filp->f_version = inode->i_version; | 146 | file->f_version = inode->i_version; |
| 147 | } | 147 | } |
| 148 | old_pos = filp->f_pos; | 148 | next_pos = ctx->pos; |
| 149 | if (!(de = map_pos_dirent(inode, &filp->f_pos, &qbh))) { | 149 | if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) { |
| 150 | ctx->pos = next_pos; | ||
| 150 | ret = -EIOERROR; | 151 | ret = -EIOERROR; |
| 151 | goto out; | 152 | goto out; |
| 152 | } | 153 | } |
| @@ -154,20 +155,21 @@ static int hpfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 154 | if (hpfs_sb(inode->i_sb)->sb_chk) { | 155 | if (hpfs_sb(inode->i_sb)->sb_chk) { |
| 155 | if (de->first && !de->last && (de->namelen != 2 | 156 | if (de->first && !de->last && (de->namelen != 2 |
| 156 | || de ->name[0] != 1 || de->name[1] != 1)) | 157 | || de ->name[0] != 1 || de->name[1] != 1)) |
| 157 | hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", old_pos); | 158 | hpfs_error(inode->i_sb, "hpfs_readdir: bad ^A^A entry; pos = %08lx", (unsigned long)ctx->pos); |
| 158 | if (de->last && (de->namelen != 1 || de ->name[0] != 255)) | 159 | if (de->last && (de->namelen != 1 || de ->name[0] != 255)) |
| 159 | hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", old_pos); | 160 | hpfs_error(inode->i_sb, "hpfs_readdir: bad \\377 entry; pos = %08lx", (unsigned long)ctx->pos); |
| 160 | } | 161 | } |
| 161 | hpfs_brelse4(&qbh); | 162 | hpfs_brelse4(&qbh); |
| 163 | ctx->pos = next_pos; | ||
| 162 | goto again; | 164 | goto again; |
| 163 | } | 165 | } |
| 164 | tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); | 166 | tempname = hpfs_translate_name(inode->i_sb, de->name, de->namelen, lc, de->not_8x3); |
| 165 | if (filldir(dirent, tempname, de->namelen, old_pos, le32_to_cpu(de->fnode), DT_UNKNOWN) < 0) { | 167 | if (!dir_emit(ctx, tempname, de->namelen, le32_to_cpu(de->fnode), DT_UNKNOWN)) { |
| 166 | filp->f_pos = old_pos; | ||
| 167 | if (tempname != de->name) kfree(tempname); | 168 | if (tempname != de->name) kfree(tempname); |
| 168 | hpfs_brelse4(&qbh); | 169 | hpfs_brelse4(&qbh); |
| 169 | goto out; | 170 | goto out; |
| 170 | } | 171 | } |
| 172 | ctx->pos = next_pos; | ||
| 171 | if (tempname != de->name) kfree(tempname); | 173 | if (tempname != de->name) kfree(tempname); |
| 172 | hpfs_brelse4(&qbh); | 174 | hpfs_brelse4(&qbh); |
| 173 | } | 175 | } |
| @@ -322,7 +324,7 @@ const struct file_operations hpfs_dir_ops = | |||
| 322 | { | 324 | { |
| 323 | .llseek = hpfs_dir_lseek, | 325 | .llseek = hpfs_dir_lseek, |
| 324 | .read = generic_read_dir, | 326 | .read = generic_read_dir, |
| 325 | .readdir = hpfs_readdir, | 327 | .iterate = hpfs_readdir, |
| 326 | .release = hpfs_dir_release, | 328 | .release = hpfs_dir_release, |
| 327 | .fsync = hpfs_file_fsync, | 329 | .fsync = hpfs_file_fsync, |
| 328 | }; | 330 | }; |
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index cd3e38972c86..fc90ab11c340 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c | |||
| @@ -542,8 +542,8 @@ static const struct file_operations hppfs_file_fops = { | |||
| 542 | }; | 542 | }; |
| 543 | 543 | ||
| 544 | struct hppfs_dirent { | 544 | struct hppfs_dirent { |
| 545 | void *vfs_dirent; | 545 | struct dir_context ctx; |
| 546 | filldir_t filldir; | 546 | struct dir_context *caller; |
| 547 | struct dentry *dentry; | 547 | struct dentry *dentry; |
| 548 | }; | 548 | }; |
| 549 | 549 | ||
| @@ -555,34 +555,29 @@ static int hppfs_filldir(void *d, const char *name, int size, | |||
| 555 | if (file_removed(dirent->dentry, name)) | 555 | if (file_removed(dirent->dentry, name)) |
| 556 | return 0; | 556 | return 0; |
| 557 | 557 | ||
| 558 | return (*dirent->filldir)(dirent->vfs_dirent, name, size, offset, | 558 | dirent->caller->pos = dirent->ctx.pos; |
| 559 | inode, type); | 559 | return !dir_emit(dirent->caller, name, size, inode, type); |
| 560 | } | 560 | } |
| 561 | 561 | ||
| 562 | static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) | 562 | static int hppfs_readdir(struct file *file, struct dir_context *ctx) |
| 563 | { | 563 | { |
| 564 | struct hppfs_private *data = file->private_data; | 564 | struct hppfs_private *data = file->private_data; |
| 565 | struct file *proc_file = data->proc_file; | 565 | struct file *proc_file = data->proc_file; |
| 566 | int (*readdir)(struct file *, void *, filldir_t); | 566 | struct hppfs_dirent d = { |
| 567 | struct hppfs_dirent dirent = ((struct hppfs_dirent) | 567 | .ctx.actor = hppfs_filldir, |
| 568 | { .vfs_dirent = ent, | 568 | .caller = ctx, |
| 569 | .filldir = filldir, | 569 | .dentry = file->f_path.dentry |
| 570 | .dentry = file->f_path.dentry | 570 | }; |
| 571 | }); | ||
| 572 | int err; | 571 | int err; |
| 573 | 572 | proc_file->f_pos = ctx->pos; | |
| 574 | readdir = file_inode(proc_file)->i_fop->readdir; | 573 | err = iterate_dir(proc_file, &d.ctx); |
| 575 | 574 | ctx->pos = d.ctx.pos; | |
| 576 | proc_file->f_pos = file->f_pos; | ||
| 577 | err = (*readdir)(proc_file, &dirent, hppfs_filldir); | ||
| 578 | file->f_pos = proc_file->f_pos; | ||
| 579 | |||
| 580 | return err; | 575 | return err; |
| 581 | } | 576 | } |
| 582 | 577 | ||
| 583 | static const struct file_operations hppfs_dir_fops = { | 578 | static const struct file_operations hppfs_dir_fops = { |
| 584 | .owner = NULL, | 579 | .owner = NULL, |
| 585 | .readdir = hppfs_readdir, | 580 | .iterate = hppfs_readdir, |
| 586 | .open = hppfs_dir_open, | 581 | .open = hppfs_dir_open, |
| 587 | .llseek = default_llseek, | 582 | .llseek = default_llseek, |
| 588 | .release = hppfs_release, | 583 | .release = hppfs_release, |
diff --git a/fs/internal.h b/fs/internal.h index eaa75f75b625..68121584ae37 100644 --- a/fs/internal.h +++ b/fs/internal.h | |||
| @@ -132,6 +132,12 @@ extern struct dentry *__d_alloc(struct super_block *, const struct qstr *); | |||
| 132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); | 132 | extern ssize_t __kernel_write(struct file *, const char *, size_t, loff_t *); |
| 133 | 133 | ||
| 134 | /* | 134 | /* |
| 135 | * splice.c | ||
| 136 | */ | ||
| 137 | extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | ||
| 138 | loff_t *opos, size_t len, unsigned int flags); | ||
| 139 | |||
| 140 | /* | ||
| 135 | * pipe.c | 141 | * pipe.c |
| 136 | */ | 142 | */ |
| 137 | extern const struct file_operations pipefifo_fops; | 143 | extern const struct file_operations pipefifo_fops; |
diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index a7d5c3c3d4e6..b943cbd963bb 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c | |||
| @@ -78,8 +78,8 @@ int get_acorn_filename(struct iso_directory_record *de, | |||
| 78 | /* | 78 | /* |
| 79 | * This should _really_ be cleaned up some day.. | 79 | * This should _really_ be cleaned up some day.. |
| 80 | */ | 80 | */ |
| 81 | static int do_isofs_readdir(struct inode *inode, struct file *filp, | 81 | static int do_isofs_readdir(struct inode *inode, struct file *file, |
| 82 | void *dirent, filldir_t filldir, | 82 | struct dir_context *ctx, |
| 83 | char *tmpname, struct iso_directory_record *tmpde) | 83 | char *tmpname, struct iso_directory_record *tmpde) |
| 84 | { | 84 | { |
| 85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); | 85 | unsigned long bufsize = ISOFS_BUFFER_SIZE(inode); |
| @@ -94,10 +94,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 94 | struct iso_directory_record *de; | 94 | struct iso_directory_record *de; |
| 95 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); | 95 | struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb); |
| 96 | 96 | ||
| 97 | offset = filp->f_pos & (bufsize - 1); | 97 | offset = ctx->pos & (bufsize - 1); |
| 98 | block = filp->f_pos >> bufbits; | 98 | block = ctx->pos >> bufbits; |
| 99 | 99 | ||
| 100 | while (filp->f_pos < inode->i_size) { | 100 | while (ctx->pos < inode->i_size) { |
| 101 | int de_len; | 101 | int de_len; |
| 102 | 102 | ||
| 103 | if (!bh) { | 103 | if (!bh) { |
| @@ -108,7 +108,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 108 | 108 | ||
| 109 | de = (struct iso_directory_record *) (bh->b_data + offset); | 109 | de = (struct iso_directory_record *) (bh->b_data + offset); |
| 110 | 110 | ||
| 111 | de_len = *(unsigned char *) de; | 111 | de_len = *(unsigned char *)de; |
| 112 | 112 | ||
| 113 | /* | 113 | /* |
| 114 | * If the length byte is zero, we should move on to the next | 114 | * If the length byte is zero, we should move on to the next |
| @@ -119,8 +119,8 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 119 | if (de_len == 0) { | 119 | if (de_len == 0) { |
| 120 | brelse(bh); | 120 | brelse(bh); |
| 121 | bh = NULL; | 121 | bh = NULL; |
| 122 | filp->f_pos = (filp->f_pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); | 122 | ctx->pos = (ctx->pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1); |
| 123 | block = filp->f_pos >> bufbits; | 123 | block = ctx->pos >> bufbits; |
| 124 | offset = 0; | 124 | offset = 0; |
| 125 | continue; | 125 | continue; |
| 126 | } | 126 | } |
| @@ -164,16 +164,16 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 164 | 164 | ||
| 165 | if (de->flags[-sbi->s_high_sierra] & 0x80) { | 165 | if (de->flags[-sbi->s_high_sierra] & 0x80) { |
| 166 | first_de = 0; | 166 | first_de = 0; |
| 167 | filp->f_pos += de_len; | 167 | ctx->pos += de_len; |
| 168 | continue; | 168 | continue; |
| 169 | } | 169 | } |
| 170 | first_de = 1; | 170 | first_de = 1; |
| 171 | 171 | ||
| 172 | /* Handle the case of the '.' directory */ | 172 | /* Handle the case of the '.' directory */ |
| 173 | if (de->name_len[0] == 1 && de->name[0] == 0) { | 173 | if (de->name_len[0] == 1 && de->name[0] == 0) { |
| 174 | if (filldir(dirent, ".", 1, filp->f_pos, inode->i_ino, DT_DIR) < 0) | 174 | if (!dir_emit_dot(file, ctx)) |
| 175 | break; | 175 | break; |
| 176 | filp->f_pos += de_len; | 176 | ctx->pos += de_len; |
| 177 | continue; | 177 | continue; |
| 178 | } | 178 | } |
| 179 | 179 | ||
| @@ -181,10 +181,9 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 181 | 181 | ||
| 182 | /* Handle the case of the '..' directory */ | 182 | /* Handle the case of the '..' directory */ |
| 183 | if (de->name_len[0] == 1 && de->name[0] == 1) { | 183 | if (de->name_len[0] == 1 && de->name[0] == 1) { |
| 184 | inode_number = parent_ino(filp->f_path.dentry); | 184 | if (!dir_emit_dotdot(file, ctx)) |
| 185 | if (filldir(dirent, "..", 2, filp->f_pos, inode_number, DT_DIR) < 0) | ||
| 186 | break; | 185 | break; |
| 187 | filp->f_pos += de_len; | 186 | ctx->pos += de_len; |
| 188 | continue; | 187 | continue; |
| 189 | } | 188 | } |
| 190 | 189 | ||
| @@ -198,7 +197,7 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 198 | if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || | 197 | if ((sbi->s_hide && (de->flags[-sbi->s_high_sierra] & 1)) || |
| 199 | (!sbi->s_showassoc && | 198 | (!sbi->s_showassoc && |
| 200 | (de->flags[-sbi->s_high_sierra] & 4))) { | 199 | (de->flags[-sbi->s_high_sierra] & 4))) { |
| 201 | filp->f_pos += de_len; | 200 | ctx->pos += de_len; |
| 202 | continue; | 201 | continue; |
| 203 | } | 202 | } |
| 204 | 203 | ||
| @@ -230,10 +229,10 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 230 | } | 229 | } |
| 231 | } | 230 | } |
| 232 | if (len > 0) { | 231 | if (len > 0) { |
| 233 | if (filldir(dirent, p, len, filp->f_pos, inode_number, DT_UNKNOWN) < 0) | 232 | if (!dir_emit(ctx, p, len, inode_number, DT_UNKNOWN)) |
| 234 | break; | 233 | break; |
| 235 | } | 234 | } |
| 236 | filp->f_pos += de_len; | 235 | ctx->pos += de_len; |
| 237 | 236 | ||
| 238 | continue; | 237 | continue; |
| 239 | } | 238 | } |
| @@ -247,13 +246,12 @@ static int do_isofs_readdir(struct inode *inode, struct file *filp, | |||
| 247 | * handling split directory entries.. The real work is done by | 246 | * handling split directory entries.. The real work is done by |
| 248 | * "do_isofs_readdir()". | 247 | * "do_isofs_readdir()". |
| 249 | */ | 248 | */ |
| 250 | static int isofs_readdir(struct file *filp, | 249 | static int isofs_readdir(struct file *file, struct dir_context *ctx) |
| 251 | void *dirent, filldir_t filldir) | ||
| 252 | { | 250 | { |
| 253 | int result; | 251 | int result; |
| 254 | char *tmpname; | 252 | char *tmpname; |
| 255 | struct iso_directory_record *tmpde; | 253 | struct iso_directory_record *tmpde; |
| 256 | struct inode *inode = file_inode(filp); | 254 | struct inode *inode = file_inode(file); |
| 257 | 255 | ||
| 258 | tmpname = (char *)__get_free_page(GFP_KERNEL); | 256 | tmpname = (char *)__get_free_page(GFP_KERNEL); |
| 259 | if (tmpname == NULL) | 257 | if (tmpname == NULL) |
| @@ -261,7 +259,7 @@ static int isofs_readdir(struct file *filp, | |||
| 261 | 259 | ||
| 262 | tmpde = (struct iso_directory_record *) (tmpname+1024); | 260 | tmpde = (struct iso_directory_record *) (tmpname+1024); |
| 263 | 261 | ||
| 264 | result = do_isofs_readdir(inode, filp, dirent, filldir, tmpname, tmpde); | 262 | result = do_isofs_readdir(inode, file, ctx, tmpname, tmpde); |
| 265 | 263 | ||
| 266 | free_page((unsigned long) tmpname); | 264 | free_page((unsigned long) tmpname); |
| 267 | return result; | 265 | return result; |
| @@ -271,7 +269,7 @@ const struct file_operations isofs_dir_operations = | |||
| 271 | { | 269 | { |
| 272 | .llseek = generic_file_llseek, | 270 | .llseek = generic_file_llseek, |
| 273 | .read = generic_read_dir, | 271 | .read = generic_read_dir, |
| 274 | .readdir = isofs_readdir, | 272 | .iterate = isofs_readdir, |
| 275 | }; | 273 | }; |
| 276 | 274 | ||
| 277 | /* | 275 | /* |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index e3e255c0a509..be0c39b66fe0 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -2019,16 +2019,20 @@ zap_buffer_unlocked: | |||
| 2019 | * void journal_invalidatepage() - invalidate a journal page | 2019 | * void journal_invalidatepage() - invalidate a journal page |
| 2020 | * @journal: journal to use for flush | 2020 | * @journal: journal to use for flush |
| 2021 | * @page: page to flush | 2021 | * @page: page to flush |
| 2022 | * @offset: length of page to invalidate. | 2022 | * @offset: offset of the range to invalidate |
| 2023 | * @length: length of the range to invalidate | ||
| 2023 | * | 2024 | * |
| 2024 | * Reap page buffers containing data after offset in page. | 2025 | * Reap page buffers containing data in specified range in page. |
| 2025 | */ | 2026 | */ |
| 2026 | void journal_invalidatepage(journal_t *journal, | 2027 | void journal_invalidatepage(journal_t *journal, |
| 2027 | struct page *page, | 2028 | struct page *page, |
| 2028 | unsigned long offset) | 2029 | unsigned int offset, |
| 2030 | unsigned int length) | ||
| 2029 | { | 2031 | { |
| 2030 | struct buffer_head *head, *bh, *next; | 2032 | struct buffer_head *head, *bh, *next; |
| 2033 | unsigned int stop = offset + length; | ||
| 2031 | unsigned int curr_off = 0; | 2034 | unsigned int curr_off = 0; |
| 2035 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
| 2032 | int may_free = 1; | 2036 | int may_free = 1; |
| 2033 | 2037 | ||
| 2034 | if (!PageLocked(page)) | 2038 | if (!PageLocked(page)) |
| @@ -2036,6 +2040,8 @@ void journal_invalidatepage(journal_t *journal, | |||
| 2036 | if (!page_has_buffers(page)) | 2040 | if (!page_has_buffers(page)) |
| 2037 | return; | 2041 | return; |
| 2038 | 2042 | ||
| 2043 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
| 2044 | |||
| 2039 | /* We will potentially be playing with lists other than just the | 2045 | /* We will potentially be playing with lists other than just the |
| 2040 | * data lists (especially for journaled data mode), so be | 2046 | * data lists (especially for journaled data mode), so be |
| 2041 | * cautious in our locking. */ | 2047 | * cautious in our locking. */ |
| @@ -2045,11 +2051,14 @@ void journal_invalidatepage(journal_t *journal, | |||
| 2045 | unsigned int next_off = curr_off + bh->b_size; | 2051 | unsigned int next_off = curr_off + bh->b_size; |
| 2046 | next = bh->b_this_page; | 2052 | next = bh->b_this_page; |
| 2047 | 2053 | ||
| 2054 | if (next_off > stop) | ||
| 2055 | return; | ||
| 2056 | |||
| 2048 | if (offset <= curr_off) { | 2057 | if (offset <= curr_off) { |
| 2049 | /* This block is wholly outside the truncation point */ | 2058 | /* This block is wholly outside the truncation point */ |
| 2050 | lock_buffer(bh); | 2059 | lock_buffer(bh); |
| 2051 | may_free &= journal_unmap_buffer(journal, bh, | 2060 | may_free &= journal_unmap_buffer(journal, bh, |
| 2052 | offset > 0); | 2061 | partial_page); |
| 2053 | unlock_buffer(bh); | 2062 | unlock_buffer(bh); |
| 2054 | } | 2063 | } |
| 2055 | curr_off = next_off; | 2064 | curr_off = next_off; |
| @@ -2057,7 +2066,7 @@ void journal_invalidatepage(journal_t *journal, | |||
| 2057 | 2066 | ||
| 2058 | } while (bh != head); | 2067 | } while (bh != head); |
| 2059 | 2068 | ||
| 2060 | if (!offset) { | 2069 | if (!partial_page) { |
| 2061 | if (may_free && try_to_free_buffers(page)) | 2070 | if (may_free && try_to_free_buffers(page)) |
| 2062 | J_ASSERT(!page_has_buffers(page)); | 2071 | J_ASSERT(!page_has_buffers(page)); |
| 2063 | } | 2072 | } |
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 69a48c2944da..5a9f5534d57b 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig | |||
| @@ -20,7 +20,7 @@ config JBD2 | |||
| 20 | 20 | ||
| 21 | config JBD2_DEBUG | 21 | config JBD2_DEBUG |
| 22 | bool "JBD2 (ext4) debugging support" | 22 | bool "JBD2 (ext4) debugging support" |
| 23 | depends on JBD2 && DEBUG_FS | 23 | depends on JBD2 |
| 24 | help | 24 | help |
| 25 | If you are using the ext4 journaled file system (or | 25 | If you are using the ext4 journaled file system (or |
| 26 | potentially any other filesystem/device using JBD2), this option | 26 | potentially any other filesystem/device using JBD2), this option |
| @@ -29,7 +29,7 @@ config JBD2_DEBUG | |||
| 29 | By default, the debugging output will be turned off. | 29 | By default, the debugging output will be turned off. |
| 30 | 30 | ||
| 31 | If you select Y here, then you will be able to turn on debugging | 31 | If you select Y here, then you will be able to turn on debugging |
| 32 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | 32 | with "echo N > /sys/module/jbd2/parameters/jbd2_debug", where N is a |
| 33 | number between 1 and 5. The higher the number, the more debugging | 33 | number between 1 and 5. The higher the number, the more debugging |
| 34 | output is generated. To turn debugging off again, do | 34 | output is generated. To turn debugging off again, do |
| 35 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | 35 | "echo 0 > /sys/module/jbd2/parameters/jbd2_debug". |
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81cf..7f34f4716165 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 120 | int nblocks, space_left; | 120 | int nblocks, space_left; |
| 121 | /* assert_spin_locked(&journal->j_state_lock); */ | 121 | /* assert_spin_locked(&journal->j_state_lock); */ |
| 122 | 122 | ||
| 123 | nblocks = jbd_space_needed(journal); | 123 | nblocks = jbd2_space_needed(journal); |
| 124 | while (__jbd2_log_space_left(journal) < nblocks) { | 124 | while (jbd2_log_space_left(journal) < nblocks) { |
| 125 | if (journal->j_flags & JBD2_ABORT) | 125 | if (journal->j_flags & JBD2_ABORT) |
| 126 | return; | 126 | return; |
| 127 | write_unlock(&journal->j_state_lock); | 127 | write_unlock(&journal->j_state_lock); |
| @@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 140 | */ | 140 | */ |
| 141 | write_lock(&journal->j_state_lock); | 141 | write_lock(&journal->j_state_lock); |
| 142 | spin_lock(&journal->j_list_lock); | 142 | spin_lock(&journal->j_list_lock); |
| 143 | nblocks = jbd_space_needed(journal); | 143 | nblocks = jbd2_space_needed(journal); |
| 144 | space_left = __jbd2_log_space_left(journal); | 144 | space_left = jbd2_log_space_left(journal); |
| 145 | if (space_left < nblocks) { | 145 | if (space_left < nblocks) { |
| 146 | int chkpt = journal->j_checkpoint_transactions != NULL; | 146 | int chkpt = journal->j_checkpoint_transactions != NULL; |
| 147 | tid_t tid = 0; | 147 | tid_t tid = 0; |
| @@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 156 | /* We were able to recover space; yay! */ | 156 | /* We were able to recover space; yay! */ |
| 157 | ; | 157 | ; |
| 158 | } else if (tid) { | 158 | } else if (tid) { |
| 159 | /* | ||
| 160 | * jbd2_journal_commit_transaction() may want | ||
| 161 | * to take the checkpoint_mutex if JBD2_FLUSHED | ||
| 162 | * is set. So we need to temporarily drop it. | ||
| 163 | */ | ||
| 164 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
| 159 | jbd2_log_wait_commit(journal, tid); | 165 | jbd2_log_wait_commit(journal, tid); |
| 166 | write_lock(&journal->j_state_lock); | ||
| 167 | continue; | ||
| 160 | } else { | 168 | } else { |
| 161 | printk(KERN_ERR "%s: needed %d blocks and " | 169 | printk(KERN_ERR "%s: needed %d blocks and " |
| 162 | "only had %d space available\n", | 170 | "only had %d space available\n", |
| @@ -625,10 +633,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) | |||
| 625 | 633 | ||
| 626 | __jbd2_journal_drop_transaction(journal, transaction); | 634 | __jbd2_journal_drop_transaction(journal, transaction); |
| 627 | jbd2_journal_free_transaction(transaction); | 635 | jbd2_journal_free_transaction(transaction); |
| 628 | |||
| 629 | /* Just in case anybody was waiting for more transactions to be | ||
| 630 | checkpointed... */ | ||
| 631 | wake_up(&journal->j_wait_logspace); | ||
| 632 | ret = 1; | 636 | ret = 1; |
| 633 | out: | 637 | out: |
| 634 | return ret; | 638 | return ret; |
| @@ -690,9 +694,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact | |||
| 690 | J_ASSERT(transaction->t_state == T_FINISHED); | 694 | J_ASSERT(transaction->t_state == T_FINISHED); |
| 691 | J_ASSERT(transaction->t_buffers == NULL); | 695 | J_ASSERT(transaction->t_buffers == NULL); |
| 692 | J_ASSERT(transaction->t_forget == NULL); | 696 | J_ASSERT(transaction->t_forget == NULL); |
| 693 | J_ASSERT(transaction->t_iobuf_list == NULL); | ||
| 694 | J_ASSERT(transaction->t_shadow_list == NULL); | 697 | J_ASSERT(transaction->t_shadow_list == NULL); |
| 695 | J_ASSERT(transaction->t_log_list == NULL); | ||
| 696 | J_ASSERT(transaction->t_checkpoint_list == NULL); | 698 | J_ASSERT(transaction->t_checkpoint_list == NULL); |
| 697 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); | 699 | J_ASSERT(transaction->t_checkpoint_io_list == NULL); |
| 698 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); | 700 | J_ASSERT(atomic_read(&transaction->t_updates) == 0); |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 0f53946f13c1..559bec1a37b4 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -30,15 +30,22 @@ | |||
| 30 | #include <trace/events/jbd2.h> | 30 | #include <trace/events/jbd2.h> |
| 31 | 31 | ||
| 32 | /* | 32 | /* |
| 33 | * Default IO end handler for temporary BJ_IO buffer_heads. | 33 | * IO end handler for temporary buffer_heads handling writes to the journal. |
| 34 | */ | 34 | */ |
| 35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) | 35 | static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) |
| 36 | { | 36 | { |
| 37 | struct buffer_head *orig_bh = bh->b_private; | ||
| 38 | |||
| 37 | BUFFER_TRACE(bh, ""); | 39 | BUFFER_TRACE(bh, ""); |
| 38 | if (uptodate) | 40 | if (uptodate) |
| 39 | set_buffer_uptodate(bh); | 41 | set_buffer_uptodate(bh); |
| 40 | else | 42 | else |
| 41 | clear_buffer_uptodate(bh); | 43 | clear_buffer_uptodate(bh); |
| 44 | if (orig_bh) { | ||
| 45 | clear_bit_unlock(BH_Shadow, &orig_bh->b_state); | ||
| 46 | smp_mb__after_clear_bit(); | ||
| 47 | wake_up_bit(&orig_bh->b_state, BH_Shadow); | ||
| 48 | } | ||
| 42 | unlock_buffer(bh); | 49 | unlock_buffer(bh); |
| 43 | } | 50 | } |
| 44 | 51 | ||
| @@ -85,8 +92,7 @@ nope: | |||
| 85 | __brelse(bh); | 92 | __brelse(bh); |
| 86 | } | 93 | } |
| 87 | 94 | ||
| 88 | static void jbd2_commit_block_csum_set(journal_t *j, | 95 | static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) |
| 89 | struct journal_head *descriptor) | ||
| 90 | { | 96 | { |
| 91 | struct commit_header *h; | 97 | struct commit_header *h; |
| 92 | __u32 csum; | 98 | __u32 csum; |
| @@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, | |||
| 94 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 100 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 95 | return; | 101 | return; |
| 96 | 102 | ||
| 97 | h = (struct commit_header *)(jh2bh(descriptor)->b_data); | 103 | h = (struct commit_header *)(bh->b_data); |
| 98 | h->h_chksum_type = 0; | 104 | h->h_chksum_type = 0; |
| 99 | h->h_chksum_size = 0; | 105 | h->h_chksum_size = 0; |
| 100 | h->h_chksum[0] = 0; | 106 | h->h_chksum[0] = 0; |
| 101 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 107 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
| 102 | j->j_blocksize); | ||
| 103 | h->h_chksum[0] = cpu_to_be32(csum); | 108 | h->h_chksum[0] = cpu_to_be32(csum); |
| 104 | } | 109 | } |
| 105 | 110 | ||
| @@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 116 | struct buffer_head **cbh, | 121 | struct buffer_head **cbh, |
| 117 | __u32 crc32_sum) | 122 | __u32 crc32_sum) |
| 118 | { | 123 | { |
| 119 | struct journal_head *descriptor; | ||
| 120 | struct commit_header *tmp; | 124 | struct commit_header *tmp; |
| 121 | struct buffer_head *bh; | 125 | struct buffer_head *bh; |
| 122 | int ret; | 126 | int ret; |
| @@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 127 | if (is_journal_aborted(journal)) | 131 | if (is_journal_aborted(journal)) |
| 128 | return 0; | 132 | return 0; |
| 129 | 133 | ||
| 130 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 134 | bh = jbd2_journal_get_descriptor_buffer(journal); |
| 131 | if (!descriptor) | 135 | if (!bh) |
| 132 | return 1; | 136 | return 1; |
| 133 | 137 | ||
| 134 | bh = jh2bh(descriptor); | ||
| 135 | |||
| 136 | tmp = (struct commit_header *)bh->b_data; | 138 | tmp = (struct commit_header *)bh->b_data; |
| 137 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 139 | tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 138 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); | 140 | tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); |
| @@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 146 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; | 148 | tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; |
| 147 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); | 149 | tmp->h_chksum[0] = cpu_to_be32(crc32_sum); |
| 148 | } | 150 | } |
| 149 | jbd2_commit_block_csum_set(journal, descriptor); | 151 | jbd2_commit_block_csum_set(journal, bh); |
| 150 | 152 | ||
| 151 | JBUFFER_TRACE(descriptor, "submit commit block"); | 153 | BUFFER_TRACE(bh, "submit commit block"); |
| 152 | lock_buffer(bh); | 154 | lock_buffer(bh); |
| 153 | clear_buffer_dirty(bh); | 155 | clear_buffer_dirty(bh); |
| 154 | set_buffer_uptodate(bh); | 156 | set_buffer_uptodate(bh); |
| @@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal, | |||
| 180 | if (unlikely(!buffer_uptodate(bh))) | 182 | if (unlikely(!buffer_uptodate(bh))) |
| 181 | ret = -EIO; | 183 | ret = -EIO; |
| 182 | put_bh(bh); /* One for getblk() */ | 184 | put_bh(bh); /* One for getblk() */ |
| 183 | jbd2_journal_put_journal_head(bh2jh(bh)); | ||
| 184 | 185 | ||
| 185 | return ret; | 186 | return ret; |
| 186 | } | 187 | } |
| @@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, | |||
| 321 | } | 322 | } |
| 322 | 323 | ||
| 323 | static void jbd2_descr_block_csum_set(journal_t *j, | 324 | static void jbd2_descr_block_csum_set(journal_t *j, |
| 324 | struct journal_head *descriptor) | 325 | struct buffer_head *bh) |
| 325 | { | 326 | { |
| 326 | struct jbd2_journal_block_tail *tail; | 327 | struct jbd2_journal_block_tail *tail; |
| 327 | __u32 csum; | 328 | __u32 csum; |
| @@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, | |||
| 329 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 330 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 330 | return; | 331 | return; |
| 331 | 332 | ||
| 332 | tail = (struct jbd2_journal_block_tail *) | 333 | tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - |
| 333 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
| 334 | sizeof(struct jbd2_journal_block_tail)); | 334 | sizeof(struct jbd2_journal_block_tail)); |
| 335 | tail->t_checksum = 0; | 335 | tail->t_checksum = 0; |
| 336 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 336 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
| 337 | j->j_blocksize); | ||
| 338 | tail->t_checksum = cpu_to_be32(csum); | 337 | tail->t_checksum = cpu_to_be32(csum); |
| 339 | } | 338 | } |
| 340 | 339 | ||
| @@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, | |||
| 343 | { | 342 | { |
| 344 | struct page *page = bh->b_page; | 343 | struct page *page = bh->b_page; |
| 345 | __u8 *addr; | 344 | __u8 *addr; |
| 346 | __u32 csum; | 345 | __u32 csum32; |
| 347 | 346 | ||
| 348 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 347 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 349 | return; | 348 | return; |
| 350 | 349 | ||
| 351 | sequence = cpu_to_be32(sequence); | 350 | sequence = cpu_to_be32(sequence); |
| 352 | addr = kmap_atomic(page); | 351 | addr = kmap_atomic(page); |
| 353 | csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 352 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, |
| 354 | sizeof(sequence)); | 353 | sizeof(sequence)); |
| 355 | csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), | 354 | csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), |
| 356 | bh->b_size); | 355 | bh->b_size); |
| 357 | kunmap_atomic(addr); | 356 | kunmap_atomic(addr); |
| 358 | 357 | ||
| 359 | tag->t_checksum = cpu_to_be32(csum); | 358 | /* We only have space to store the lower 16 bits of the crc32c. */ |
| 359 | tag->t_checksum = cpu_to_be16(csum32); | ||
| 360 | } | 360 | } |
| 361 | /* | 361 | /* |
| 362 | * jbd2_journal_commit_transaction | 362 | * jbd2_journal_commit_transaction |
| @@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 368 | { | 368 | { |
| 369 | struct transaction_stats_s stats; | 369 | struct transaction_stats_s stats; |
| 370 | transaction_t *commit_transaction; | 370 | transaction_t *commit_transaction; |
| 371 | struct journal_head *jh, *new_jh, *descriptor; | 371 | struct journal_head *jh; |
| 372 | struct buffer_head *descriptor; | ||
| 372 | struct buffer_head **wbuf = journal->j_wbuf; | 373 | struct buffer_head **wbuf = journal->j_wbuf; |
| 373 | int bufs; | 374 | int bufs; |
| 374 | int flags; | 375 | int flags; |
| @@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 392 | tid_t first_tid; | 393 | tid_t first_tid; |
| 393 | int update_tail; | 394 | int update_tail; |
| 394 | int csum_size = 0; | 395 | int csum_size = 0; |
| 396 | LIST_HEAD(io_bufs); | ||
| 397 | LIST_HEAD(log_bufs); | ||
| 395 | 398 | ||
| 396 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 399 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 397 | csum_size = sizeof(struct jbd2_journal_block_tail); | 400 | csum_size = sizeof(struct jbd2_journal_block_tail); |
| @@ -424,13 +427,13 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 424 | J_ASSERT(journal->j_committing_transaction == NULL); | 427 | J_ASSERT(journal->j_committing_transaction == NULL); |
| 425 | 428 | ||
| 426 | commit_transaction = journal->j_running_transaction; | 429 | commit_transaction = journal->j_running_transaction; |
| 427 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
| 428 | 430 | ||
| 429 | trace_jbd2_start_commit(journal, commit_transaction); | 431 | trace_jbd2_start_commit(journal, commit_transaction); |
| 430 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", | 432 | jbd_debug(1, "JBD2: starting commit of transaction %d\n", |
| 431 | commit_transaction->t_tid); | 433 | commit_transaction->t_tid); |
| 432 | 434 | ||
| 433 | write_lock(&journal->j_state_lock); | 435 | write_lock(&journal->j_state_lock); |
| 436 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | ||
| 434 | commit_transaction->t_state = T_LOCKED; | 437 | commit_transaction->t_state = T_LOCKED; |
| 435 | 438 | ||
| 436 | trace_jbd2_commit_locking(journal, commit_transaction); | 439 | trace_jbd2_commit_locking(journal, commit_transaction); |
| @@ -520,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 520 | */ | 523 | */ |
| 521 | jbd2_journal_switch_revoke_table(journal); | 524 | jbd2_journal_switch_revoke_table(journal); |
| 522 | 525 | ||
| 526 | /* | ||
| 527 | * Reserved credits cannot be claimed anymore, free them | ||
| 528 | */ | ||
| 529 | atomic_sub(atomic_read(&journal->j_reserved_credits), | ||
| 530 | &commit_transaction->t_outstanding_credits); | ||
| 531 | |||
| 523 | trace_jbd2_commit_flushing(journal, commit_transaction); | 532 | trace_jbd2_commit_flushing(journal, commit_transaction); |
| 524 | stats.run.rs_flushing = jiffies; | 533 | stats.run.rs_flushing = jiffies; |
| 525 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, | 534 | stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, |
| @@ -533,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 533 | wake_up(&journal->j_wait_transaction_locked); | 542 | wake_up(&journal->j_wait_transaction_locked); |
| 534 | write_unlock(&journal->j_state_lock); | 543 | write_unlock(&journal->j_state_lock); |
| 535 | 544 | ||
| 536 | jbd_debug(3, "JBD2: commit phase 2\n"); | 545 | jbd_debug(3, "JBD2: commit phase 2a\n"); |
| 537 | 546 | ||
| 538 | /* | 547 | /* |
| 539 | * Now start flushing things to disk, in the order they appear | 548 | * Now start flushing things to disk, in the order they appear |
| @@ -545,10 +554,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 545 | 554 | ||
| 546 | blk_start_plug(&plug); | 555 | blk_start_plug(&plug); |
| 547 | jbd2_journal_write_revoke_records(journal, commit_transaction, | 556 | jbd2_journal_write_revoke_records(journal, commit_transaction, |
| 548 | WRITE_SYNC); | 557 | &log_bufs, WRITE_SYNC); |
| 549 | blk_finish_plug(&plug); | 558 | blk_finish_plug(&plug); |
| 550 | 559 | ||
| 551 | jbd_debug(3, "JBD2: commit phase 2\n"); | 560 | jbd_debug(3, "JBD2: commit phase 2b\n"); |
| 552 | 561 | ||
| 553 | /* | 562 | /* |
| 554 | * Way to go: we have now written out all of the data for a | 563 | * Way to go: we have now written out all of the data for a |
| @@ -571,8 +580,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 571 | atomic_read(&commit_transaction->t_outstanding_credits)); | 580 | atomic_read(&commit_transaction->t_outstanding_credits)); |
| 572 | 581 | ||
| 573 | err = 0; | 582 | err = 0; |
| 574 | descriptor = NULL; | ||
| 575 | bufs = 0; | 583 | bufs = 0; |
| 584 | descriptor = NULL; | ||
| 576 | blk_start_plug(&plug); | 585 | blk_start_plug(&plug); |
| 577 | while (commit_transaction->t_buffers) { | 586 | while (commit_transaction->t_buffers) { |
| 578 | 587 | ||
| @@ -604,8 +613,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 604 | record the metadata buffer. */ | 613 | record the metadata buffer. */ |
| 605 | 614 | ||
| 606 | if (!descriptor) { | 615 | if (!descriptor) { |
| 607 | struct buffer_head *bh; | ||
| 608 | |||
| 609 | J_ASSERT (bufs == 0); | 616 | J_ASSERT (bufs == 0); |
| 610 | 617 | ||
| 611 | jbd_debug(4, "JBD2: get descriptor\n"); | 618 | jbd_debug(4, "JBD2: get descriptor\n"); |
| @@ -616,26 +623,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 616 | continue; | 623 | continue; |
| 617 | } | 624 | } |
| 618 | 625 | ||
| 619 | bh = jh2bh(descriptor); | ||
| 620 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", | 626 | jbd_debug(4, "JBD2: got buffer %llu (%p)\n", |
| 621 | (unsigned long long)bh->b_blocknr, bh->b_data); | 627 | (unsigned long long)descriptor->b_blocknr, |
| 622 | header = (journal_header_t *)&bh->b_data[0]; | 628 | descriptor->b_data); |
| 629 | header = (journal_header_t *)descriptor->b_data; | ||
| 623 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 630 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 624 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); | 631 | header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); |
| 625 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); | 632 | header->h_sequence = cpu_to_be32(commit_transaction->t_tid); |
| 626 | 633 | ||
| 627 | tagp = &bh->b_data[sizeof(journal_header_t)]; | 634 | tagp = &descriptor->b_data[sizeof(journal_header_t)]; |
| 628 | space_left = bh->b_size - sizeof(journal_header_t); | 635 | space_left = descriptor->b_size - |
| 636 | sizeof(journal_header_t); | ||
| 629 | first_tag = 1; | 637 | first_tag = 1; |
| 630 | set_buffer_jwrite(bh); | 638 | set_buffer_jwrite(descriptor); |
| 631 | set_buffer_dirty(bh); | 639 | set_buffer_dirty(descriptor); |
| 632 | wbuf[bufs++] = bh; | 640 | wbuf[bufs++] = descriptor; |
| 633 | 641 | ||
| 634 | /* Record it so that we can wait for IO | 642 | /* Record it so that we can wait for IO |
| 635 | completion later */ | 643 | completion later */ |
| 636 | BUFFER_TRACE(bh, "ph3: file as descriptor"); | 644 | BUFFER_TRACE(descriptor, "ph3: file as descriptor"); |
| 637 | jbd2_journal_file_buffer(descriptor, commit_transaction, | 645 | jbd2_file_log_bh(&log_bufs, descriptor); |
| 638 | BJ_LogCtl); | ||
| 639 | } | 646 | } |
| 640 | 647 | ||
| 641 | /* Where is the buffer to be written? */ | 648 | /* Where is the buffer to be written? */ |
| @@ -658,29 +665,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 658 | 665 | ||
| 659 | /* Bump b_count to prevent truncate from stumbling over | 666 | /* Bump b_count to prevent truncate from stumbling over |
| 660 | the shadowed buffer! @@@ This can go if we ever get | 667 | the shadowed buffer! @@@ This can go if we ever get |
| 661 | rid of the BJ_IO/BJ_Shadow pairing of buffers. */ | 668 | rid of the shadow pairing of buffers. */ |
| 662 | atomic_inc(&jh2bh(jh)->b_count); | 669 | atomic_inc(&jh2bh(jh)->b_count); |
| 663 | 670 | ||
| 664 | /* Make a temporary IO buffer with which to write it out | ||
| 665 | (this will requeue both the metadata buffer and the | ||
| 666 | temporary IO buffer). new_bh goes on BJ_IO*/ | ||
| 667 | |||
| 668 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
| 669 | /* | 671 | /* |
| 670 | * akpm: jbd2_journal_write_metadata_buffer() sets | 672 | * Make a temporary IO buffer with which to write it out |
| 671 | * new_bh->b_transaction to commit_transaction. | 673 | * (this will requeue the metadata buffer to BJ_Shadow). |
| 672 | * We need to clean this up before we release new_bh | ||
| 673 | * (which is of type BJ_IO) | ||
| 674 | */ | 674 | */ |
| 675 | set_bit(BH_JWrite, &jh2bh(jh)->b_state); | ||
| 675 | JBUFFER_TRACE(jh, "ph3: write metadata"); | 676 | JBUFFER_TRACE(jh, "ph3: write metadata"); |
| 676 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, | 677 | flags = jbd2_journal_write_metadata_buffer(commit_transaction, |
| 677 | jh, &new_jh, blocknr); | 678 | jh, &wbuf[bufs], blocknr); |
| 678 | if (flags < 0) { | 679 | if (flags < 0) { |
| 679 | jbd2_journal_abort(journal, flags); | 680 | jbd2_journal_abort(journal, flags); |
| 680 | continue; | 681 | continue; |
| 681 | } | 682 | } |
| 682 | set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); | 683 | jbd2_file_log_bh(&io_bufs, wbuf[bufs]); |
| 683 | wbuf[bufs++] = jh2bh(new_jh); | ||
| 684 | 684 | ||
| 685 | /* Record the new block's tag in the current descriptor | 685 | /* Record the new block's tag in the current descriptor |
| 686 | buffer */ | 686 | buffer */ |
| @@ -694,10 +694,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 694 | tag = (journal_block_tag_t *) tagp; | 694 | tag = (journal_block_tag_t *) tagp; |
| 695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); | 695 | write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); |
| 696 | tag->t_flags = cpu_to_be16(tag_flag); | 696 | tag->t_flags = cpu_to_be16(tag_flag); |
| 697 | jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), | 697 | jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], |
| 698 | commit_transaction->t_tid); | 698 | commit_transaction->t_tid); |
| 699 | tagp += tag_bytes; | 699 | tagp += tag_bytes; |
| 700 | space_left -= tag_bytes; | 700 | space_left -= tag_bytes; |
| 701 | bufs++; | ||
| 701 | 702 | ||
| 702 | if (first_tag) { | 703 | if (first_tag) { |
| 703 | memcpy (tagp, journal->j_uuid, 16); | 704 | memcpy (tagp, journal->j_uuid, 16); |
| @@ -809,7 +810,7 @@ start_journal_io: | |||
| 809 | the log. Before we can commit it, wait for the IO so far to | 810 | the log. Before we can commit it, wait for the IO so far to |
| 810 | complete. Control buffers being written are on the | 811 | complete. Control buffers being written are on the |
| 811 | transaction's t_log_list queue, and metadata buffers are on | 812 | transaction's t_log_list queue, and metadata buffers are on |
| 812 | the t_iobuf_list queue. | 813 | the io_bufs list. |
| 813 | 814 | ||
| 814 | Wait for the buffers in reverse order. That way we are | 815 | Wait for the buffers in reverse order. That way we are |
| 815 | less likely to be woken up until all IOs have completed, and | 816 | less likely to be woken up until all IOs have completed, and |
| @@ -818,47 +819,33 @@ start_journal_io: | |||
| 818 | 819 | ||
| 819 | jbd_debug(3, "JBD2: commit phase 3\n"); | 820 | jbd_debug(3, "JBD2: commit phase 3\n"); |
| 820 | 821 | ||
| 821 | /* | 822 | while (!list_empty(&io_bufs)) { |
| 822 | * akpm: these are BJ_IO, and j_list_lock is not needed. | 823 | struct buffer_head *bh = list_entry(io_bufs.prev, |
| 823 | * See __journal_try_to_free_buffer. | 824 | struct buffer_head, |
| 824 | */ | 825 | b_assoc_buffers); |
| 825 | wait_for_iobuf: | ||
| 826 | while (commit_transaction->t_iobuf_list != NULL) { | ||
| 827 | struct buffer_head *bh; | ||
| 828 | 826 | ||
| 829 | jh = commit_transaction->t_iobuf_list->b_tprev; | 827 | wait_on_buffer(bh); |
| 830 | bh = jh2bh(jh); | 828 | cond_resched(); |
| 831 | if (buffer_locked(bh)) { | ||
| 832 | wait_on_buffer(bh); | ||
| 833 | goto wait_for_iobuf; | ||
| 834 | } | ||
| 835 | if (cond_resched()) | ||
| 836 | goto wait_for_iobuf; | ||
| 837 | 829 | ||
| 838 | if (unlikely(!buffer_uptodate(bh))) | 830 | if (unlikely(!buffer_uptodate(bh))) |
| 839 | err = -EIO; | 831 | err = -EIO; |
| 840 | 832 | jbd2_unfile_log_bh(bh); | |
| 841 | clear_buffer_jwrite(bh); | ||
| 842 | |||
| 843 | JBUFFER_TRACE(jh, "ph4: unfile after journal write"); | ||
| 844 | jbd2_journal_unfile_buffer(journal, jh); | ||
| 845 | 833 | ||
| 846 | /* | 834 | /* |
| 847 | * ->t_iobuf_list should contain only dummy buffer_heads | 835 | * The list contains temporary buffer heads created by |
| 848 | * which were created by jbd2_journal_write_metadata_buffer(). | 836 | * jbd2_journal_write_metadata_buffer(). |
| 849 | */ | 837 | */ |
| 850 | BUFFER_TRACE(bh, "dumping temporary bh"); | 838 | BUFFER_TRACE(bh, "dumping temporary bh"); |
| 851 | jbd2_journal_put_journal_head(jh); | ||
| 852 | __brelse(bh); | 839 | __brelse(bh); |
| 853 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); | 840 | J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); |
| 854 | free_buffer_head(bh); | 841 | free_buffer_head(bh); |
| 855 | 842 | ||
| 856 | /* We also have to unlock and free the corresponding | 843 | /* We also have to refile the corresponding shadowed buffer */ |
| 857 | shadowed buffer */ | ||
| 858 | jh = commit_transaction->t_shadow_list->b_tprev; | 844 | jh = commit_transaction->t_shadow_list->b_tprev; |
| 859 | bh = jh2bh(jh); | 845 | bh = jh2bh(jh); |
| 860 | clear_bit(BH_JWrite, &bh->b_state); | 846 | clear_buffer_jwrite(bh); |
| 861 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); | 847 | J_ASSERT_BH(bh, buffer_jbddirty(bh)); |
| 848 | J_ASSERT_BH(bh, !buffer_shadow(bh)); | ||
| 862 | 849 | ||
| 863 | /* The metadata is now released for reuse, but we need | 850 | /* The metadata is now released for reuse, but we need |
| 864 | to remember it against this transaction so that when | 851 | to remember it against this transaction so that when |
| @@ -866,14 +853,6 @@ wait_for_iobuf: | |||
| 866 | required. */ | 853 | required. */ |
| 867 | JBUFFER_TRACE(jh, "file as BJ_Forget"); | 854 | JBUFFER_TRACE(jh, "file as BJ_Forget"); |
| 868 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); | 855 | jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); |
| 869 | /* | ||
| 870 | * Wake up any transactions which were waiting for this IO to | ||
| 871 | * complete. The barrier must be here so that changes by | ||
| 872 | * jbd2_journal_file_buffer() take effect before wake_up_bit() | ||
| 873 | * does the waitqueue check. | ||
| 874 | */ | ||
| 875 | smp_mb(); | ||
| 876 | wake_up_bit(&bh->b_state, BH_Unshadow); | ||
| 877 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); | 856 | JBUFFER_TRACE(jh, "brelse shadowed buffer"); |
| 878 | __brelse(bh); | 857 | __brelse(bh); |
| 879 | } | 858 | } |
| @@ -883,26 +862,19 @@ wait_for_iobuf: | |||
| 883 | jbd_debug(3, "JBD2: commit phase 4\n"); | 862 | jbd_debug(3, "JBD2: commit phase 4\n"); |
| 884 | 863 | ||
| 885 | /* Here we wait for the revoke record and descriptor record buffers */ | 864 | /* Here we wait for the revoke record and descriptor record buffers */ |
| 886 | wait_for_ctlbuf: | 865 | while (!list_empty(&log_bufs)) { |
| 887 | while (commit_transaction->t_log_list != NULL) { | ||
| 888 | struct buffer_head *bh; | 866 | struct buffer_head *bh; |
| 889 | 867 | ||
| 890 | jh = commit_transaction->t_log_list->b_tprev; | 868 | bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); |
| 891 | bh = jh2bh(jh); | 869 | wait_on_buffer(bh); |
| 892 | if (buffer_locked(bh)) { | 870 | cond_resched(); |
| 893 | wait_on_buffer(bh); | ||
| 894 | goto wait_for_ctlbuf; | ||
| 895 | } | ||
| 896 | if (cond_resched()) | ||
| 897 | goto wait_for_ctlbuf; | ||
| 898 | 871 | ||
| 899 | if (unlikely(!buffer_uptodate(bh))) | 872 | if (unlikely(!buffer_uptodate(bh))) |
| 900 | err = -EIO; | 873 | err = -EIO; |
| 901 | 874 | ||
| 902 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); | 875 | BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); |
| 903 | clear_buffer_jwrite(bh); | 876 | clear_buffer_jwrite(bh); |
| 904 | jbd2_journal_unfile_buffer(journal, jh); | 877 | jbd2_unfile_log_bh(bh); |
| 905 | jbd2_journal_put_journal_head(jh); | ||
| 906 | __brelse(bh); /* One for getblk */ | 878 | __brelse(bh); /* One for getblk */ |
| 907 | /* AKPM: bforget here */ | 879 | /* AKPM: bforget here */ |
| 908 | } | 880 | } |
| @@ -952,9 +924,7 @@ wait_for_iobuf: | |||
| 952 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); | 924 | J_ASSERT(list_empty(&commit_transaction->t_inode_list)); |
| 953 | J_ASSERT(commit_transaction->t_buffers == NULL); | 925 | J_ASSERT(commit_transaction->t_buffers == NULL); |
| 954 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); | 926 | J_ASSERT(commit_transaction->t_checkpoint_list == NULL); |
| 955 | J_ASSERT(commit_transaction->t_iobuf_list == NULL); | ||
| 956 | J_ASSERT(commit_transaction->t_shadow_list == NULL); | 927 | J_ASSERT(commit_transaction->t_shadow_list == NULL); |
| 957 | J_ASSERT(commit_transaction->t_log_list == NULL); | ||
| 958 | 928 | ||
| 959 | restart_loop: | 929 | restart_loop: |
| 960 | /* | 930 | /* |
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..02c7ad9d7a41 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -103,6 +103,24 @@ EXPORT_SYMBOL(jbd2_inode_cache); | |||
| 103 | static void __journal_abort_soft (journal_t *journal, int errno); | 103 | static void __journal_abort_soft (journal_t *journal, int errno); |
| 104 | static int jbd2_journal_create_slab(size_t slab_size); | 104 | static int jbd2_journal_create_slab(size_t slab_size); |
| 105 | 105 | ||
| 106 | #ifdef CONFIG_JBD2_DEBUG | ||
| 107 | void __jbd2_debug(int level, const char *file, const char *func, | ||
| 108 | unsigned int line, const char *fmt, ...) | ||
| 109 | { | ||
| 110 | struct va_format vaf; | ||
| 111 | va_list args; | ||
| 112 | |||
| 113 | if (level > jbd2_journal_enable_debug) | ||
| 114 | return; | ||
| 115 | va_start(args, fmt); | ||
| 116 | vaf.fmt = fmt; | ||
| 117 | vaf.va = &args; | ||
| 118 | printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); | ||
| 119 | va_end(args); | ||
| 120 | } | ||
| 121 | EXPORT_SYMBOL(__jbd2_debug); | ||
| 122 | #endif | ||
| 123 | |||
| 106 | /* Checksumming functions */ | 124 | /* Checksumming functions */ |
| 107 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) | 125 | int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) |
| 108 | { | 126 | { |
| @@ -310,14 +328,12 @@ static void journal_kill_thread(journal_t *journal) | |||
| 310 | * | 328 | * |
| 311 | * If the source buffer has already been modified by a new transaction | 329 | * If the source buffer has already been modified by a new transaction |
| 312 | * since we took the last commit snapshot, we use the frozen copy of | 330 | * since we took the last commit snapshot, we use the frozen copy of |
| 313 | * that data for IO. If we end up using the existing buffer_head's data | 331 | * that data for IO. If we end up using the existing buffer_head's data |
| 314 | * for the write, then we *have* to lock the buffer to prevent anyone | 332 | * for the write, then we have to make sure nobody modifies it while the |
| 315 | * else from using and possibly modifying it while the IO is in | 333 | * IO is in progress. do_get_write_access() handles this. |
| 316 | * progress. | ||
| 317 | * | 334 | * |
| 318 | * The function returns a pointer to the buffer_heads to be used for IO. | 335 | * The function returns a pointer to the buffer_head to be used for IO. |
| 319 | * | 336 | * |
| 320 | * We assume that the journal has already been locked in this function. | ||
| 321 | * | 337 | * |
| 322 | * Return value: | 338 | * Return value: |
| 323 | * <0: Error | 339 | * <0: Error |
| @@ -330,15 +346,14 @@ static void journal_kill_thread(journal_t *journal) | |||
| 330 | 346 | ||
| 331 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, | 347 | int jbd2_journal_write_metadata_buffer(transaction_t *transaction, |
| 332 | struct journal_head *jh_in, | 348 | struct journal_head *jh_in, |
| 333 | struct journal_head **jh_out, | 349 | struct buffer_head **bh_out, |
| 334 | unsigned long long blocknr) | 350 | sector_t blocknr) |
| 335 | { | 351 | { |
| 336 | int need_copy_out = 0; | 352 | int need_copy_out = 0; |
| 337 | int done_copy_out = 0; | 353 | int done_copy_out = 0; |
| 338 | int do_escape = 0; | 354 | int do_escape = 0; |
| 339 | char *mapped_data; | 355 | char *mapped_data; |
| 340 | struct buffer_head *new_bh; | 356 | struct buffer_head *new_bh; |
| 341 | struct journal_head *new_jh; | ||
| 342 | struct page *new_page; | 357 | struct page *new_page; |
| 343 | unsigned int new_offset; | 358 | unsigned int new_offset; |
| 344 | struct buffer_head *bh_in = jh2bh(jh_in); | 359 | struct buffer_head *bh_in = jh2bh(jh_in); |
| @@ -368,14 +383,13 @@ retry_alloc: | |||
| 368 | 383 | ||
| 369 | /* keep subsequent assertions sane */ | 384 | /* keep subsequent assertions sane */ |
| 370 | atomic_set(&new_bh->b_count, 1); | 385 | atomic_set(&new_bh->b_count, 1); |
| 371 | new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ | ||
| 372 | 386 | ||
| 387 | jbd_lock_bh_state(bh_in); | ||
| 388 | repeat: | ||
| 373 | /* | 389 | /* |
| 374 | * If a new transaction has already done a buffer copy-out, then | 390 | * If a new transaction has already done a buffer copy-out, then |
| 375 | * we use that version of the data for the commit. | 391 | * we use that version of the data for the commit. |
| 376 | */ | 392 | */ |
| 377 | jbd_lock_bh_state(bh_in); | ||
| 378 | repeat: | ||
| 379 | if (jh_in->b_frozen_data) { | 393 | if (jh_in->b_frozen_data) { |
| 380 | done_copy_out = 1; | 394 | done_copy_out = 1; |
| 381 | new_page = virt_to_page(jh_in->b_frozen_data); | 395 | new_page = virt_to_page(jh_in->b_frozen_data); |
| @@ -415,7 +429,7 @@ repeat: | |||
| 415 | jbd_unlock_bh_state(bh_in); | 429 | jbd_unlock_bh_state(bh_in); |
| 416 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); | 430 | tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); |
| 417 | if (!tmp) { | 431 | if (!tmp) { |
| 418 | jbd2_journal_put_journal_head(new_jh); | 432 | brelse(new_bh); |
| 419 | return -ENOMEM; | 433 | return -ENOMEM; |
| 420 | } | 434 | } |
| 421 | jbd_lock_bh_state(bh_in); | 435 | jbd_lock_bh_state(bh_in); |
| @@ -426,7 +440,7 @@ repeat: | |||
| 426 | 440 | ||
| 427 | jh_in->b_frozen_data = tmp; | 441 | jh_in->b_frozen_data = tmp; |
| 428 | mapped_data = kmap_atomic(new_page); | 442 | mapped_data = kmap_atomic(new_page); |
| 429 | memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); | 443 | memcpy(tmp, mapped_data + new_offset, bh_in->b_size); |
| 430 | kunmap_atomic(mapped_data); | 444 | kunmap_atomic(mapped_data); |
| 431 | 445 | ||
| 432 | new_page = virt_to_page(tmp); | 446 | new_page = virt_to_page(tmp); |
| @@ -452,14 +466,14 @@ repeat: | |||
| 452 | } | 466 | } |
| 453 | 467 | ||
| 454 | set_bh_page(new_bh, new_page, new_offset); | 468 | set_bh_page(new_bh, new_page, new_offset); |
| 455 | new_jh->b_transaction = NULL; | 469 | new_bh->b_size = bh_in->b_size; |
| 456 | new_bh->b_size = jh2bh(jh_in)->b_size; | 470 | new_bh->b_bdev = journal->j_dev; |
| 457 | new_bh->b_bdev = transaction->t_journal->j_dev; | ||
| 458 | new_bh->b_blocknr = blocknr; | 471 | new_bh->b_blocknr = blocknr; |
| 472 | new_bh->b_private = bh_in; | ||
| 459 | set_buffer_mapped(new_bh); | 473 | set_buffer_mapped(new_bh); |
| 460 | set_buffer_dirty(new_bh); | 474 | set_buffer_dirty(new_bh); |
| 461 | 475 | ||
| 462 | *jh_out = new_jh; | 476 | *bh_out = new_bh; |
| 463 | 477 | ||
| 464 | /* | 478 | /* |
| 465 | * The to-be-written buffer needs to get moved to the io queue, | 479 | * The to-be-written buffer needs to get moved to the io queue, |
| @@ -470,11 +484,9 @@ repeat: | |||
| 470 | spin_lock(&journal->j_list_lock); | 484 | spin_lock(&journal->j_list_lock); |
| 471 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); | 485 | __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); |
| 472 | spin_unlock(&journal->j_list_lock); | 486 | spin_unlock(&journal->j_list_lock); |
| 487 | set_buffer_shadow(bh_in); | ||
| 473 | jbd_unlock_bh_state(bh_in); | 488 | jbd_unlock_bh_state(bh_in); |
| 474 | 489 | ||
| 475 | JBUFFER_TRACE(new_jh, "file as BJ_IO"); | ||
| 476 | jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); | ||
| 477 | |||
| 478 | return do_escape | (done_copy_out << 1); | 490 | return do_escape | (done_copy_out << 1); |
| 479 | } | 491 | } |
| 480 | 492 | ||
| @@ -484,35 +496,6 @@ repeat: | |||
| 484 | */ | 496 | */ |
| 485 | 497 | ||
| 486 | /* | 498 | /* |
| 487 | * __jbd2_log_space_left: Return the number of free blocks left in the journal. | ||
| 488 | * | ||
| 489 | * Called with the journal already locked. | ||
| 490 | * | ||
| 491 | * Called under j_state_lock | ||
| 492 | */ | ||
| 493 | |||
| 494 | int __jbd2_log_space_left(journal_t *journal) | ||
| 495 | { | ||
| 496 | int left = journal->j_free; | ||
| 497 | |||
| 498 | /* assert_spin_locked(&journal->j_state_lock); */ | ||
| 499 | |||
| 500 | /* | ||
| 501 | * Be pessimistic here about the number of those free blocks which | ||
| 502 | * might be required for log descriptor control blocks. | ||
| 503 | */ | ||
| 504 | |||
| 505 | #define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ | ||
| 506 | |||
| 507 | left -= MIN_LOG_RESERVED_BLOCKS; | ||
| 508 | |||
| 509 | if (left <= 0) | ||
| 510 | return 0; | ||
| 511 | left -= (left >> 3); | ||
| 512 | return left; | ||
| 513 | } | ||
| 514 | |||
| 515 | /* | ||
| 516 | * Called with j_state_lock locked for writing. | 499 | * Called with j_state_lock locked for writing. |
| 517 | * Returns true if a transaction commit was started. | 500 | * Returns true if a transaction commit was started. |
| 518 | */ | 501 | */ |
| @@ -564,20 +547,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) | |||
| 564 | } | 547 | } |
| 565 | 548 | ||
| 566 | /* | 549 | /* |
| 567 | * Force and wait upon a commit if the calling process is not within | 550 | * Force and wait any uncommitted transactions. We can only force the running |
| 568 | * transaction. This is used for forcing out undo-protected data which contains | 551 | * transaction if we don't have an active handle, otherwise, we will deadlock. |
| 569 | * bitmaps, when the fs is running out of space. | 552 | * Returns: <0 in case of error, |
| 570 | * | 553 | * 0 if nothing to commit, |
| 571 | * We can only force the running transaction if we don't have an active handle; | 554 | * 1 if transaction was successfully committed. |
| 572 | * otherwise, we will deadlock. | ||
| 573 | * | ||
| 574 | * Returns true if a transaction was started. | ||
| 575 | */ | 555 | */ |
| 576 | int jbd2_journal_force_commit_nested(journal_t *journal) | 556 | static int __jbd2_journal_force_commit(journal_t *journal) |
| 577 | { | 557 | { |
| 578 | transaction_t *transaction = NULL; | 558 | transaction_t *transaction = NULL; |
| 579 | tid_t tid; | 559 | tid_t tid; |
| 580 | int need_to_start = 0; | 560 | int need_to_start = 0, ret = 0; |
| 581 | 561 | ||
| 582 | read_lock(&journal->j_state_lock); | 562 | read_lock(&journal->j_state_lock); |
| 583 | if (journal->j_running_transaction && !current->journal_info) { | 563 | if (journal->j_running_transaction && !current->journal_info) { |
| @@ -588,16 +568,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) | |||
| 588 | transaction = journal->j_committing_transaction; | 568 | transaction = journal->j_committing_transaction; |
| 589 | 569 | ||
| 590 | if (!transaction) { | 570 | if (!transaction) { |
| 571 | /* Nothing to commit */ | ||
| 591 | read_unlock(&journal->j_state_lock); | 572 | read_unlock(&journal->j_state_lock); |
| 592 | return 0; /* Nothing to retry */ | 573 | return 0; |
| 593 | } | 574 | } |
| 594 | |||
| 595 | tid = transaction->t_tid; | 575 | tid = transaction->t_tid; |
| 596 | read_unlock(&journal->j_state_lock); | 576 | read_unlock(&journal->j_state_lock); |
| 597 | if (need_to_start) | 577 | if (need_to_start) |
| 598 | jbd2_log_start_commit(journal, tid); | 578 | jbd2_log_start_commit(journal, tid); |
| 599 | jbd2_log_wait_commit(journal, tid); | 579 | ret = jbd2_log_wait_commit(journal, tid); |
| 600 | return 1; | 580 | if (!ret) |
| 581 | ret = 1; | ||
| 582 | |||
| 583 | return ret; | ||
| 584 | } | ||
| 585 | |||
| 586 | /** | ||
| 587 | * Force and wait upon a commit if the calling process is not within | ||
| 588 | * transaction. This is used for forcing out undo-protected data which contains | ||
| 589 | * bitmaps, when the fs is running out of space. | ||
| 590 | * | ||
| 591 | * @journal: journal to force | ||
| 592 | * Returns true if progress was made. | ||
| 593 | */ | ||
| 594 | int jbd2_journal_force_commit_nested(journal_t *journal) | ||
| 595 | { | ||
| 596 | int ret; | ||
| 597 | |||
| 598 | ret = __jbd2_journal_force_commit(journal); | ||
| 599 | return ret > 0; | ||
| 600 | } | ||
| 601 | |||
| 602 | /** | ||
| 603 | * int journal_force_commit() - force any uncommitted transactions | ||
| 604 | * @journal: journal to force | ||
| 605 | * | ||
| 606 | * Caller want unconditional commit. We can only force the running transaction | ||
| 607 | * if we don't have an active handle, otherwise, we will deadlock. | ||
| 608 | */ | ||
| 609 | int jbd2_journal_force_commit(journal_t *journal) | ||
| 610 | { | ||
| 611 | int ret; | ||
| 612 | |||
| 613 | J_ASSERT(!current->journal_info); | ||
| 614 | ret = __jbd2_journal_force_commit(journal); | ||
| 615 | if (ret > 0) | ||
| 616 | ret = 0; | ||
| 617 | return ret; | ||
| 601 | } | 618 | } |
| 602 | 619 | ||
| 603 | /* | 620 | /* |
| @@ -798,7 +815,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
| 798 | * But we don't bother doing that, so there will be coherency problems with | 815 | * But we don't bother doing that, so there will be coherency problems with |
| 799 | * mmaps of blockdevs which hold live JBD-controlled filesystems. | 816 | * mmaps of blockdevs which hold live JBD-controlled filesystems. |
| 800 | */ | 817 | */ |
| 801 | struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | 818 | struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) |
| 802 | { | 819 | { |
| 803 | struct buffer_head *bh; | 820 | struct buffer_head *bh; |
| 804 | unsigned long long blocknr; | 821 | unsigned long long blocknr; |
| @@ -817,7 +834,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) | |||
| 817 | set_buffer_uptodate(bh); | 834 | set_buffer_uptodate(bh); |
| 818 | unlock_buffer(bh); | 835 | unlock_buffer(bh); |
| 819 | BUFFER_TRACE(bh, "return this buffer"); | 836 | BUFFER_TRACE(bh, "return this buffer"); |
| 820 | return jbd2_journal_add_journal_head(bh); | 837 | return bh; |
| 821 | } | 838 | } |
| 822 | 839 | ||
| 823 | /* | 840 | /* |
| @@ -1062,11 +1079,10 @@ static journal_t * journal_init_common (void) | |||
| 1062 | return NULL; | 1079 | return NULL; |
| 1063 | 1080 | ||
| 1064 | init_waitqueue_head(&journal->j_wait_transaction_locked); | 1081 | init_waitqueue_head(&journal->j_wait_transaction_locked); |
| 1065 | init_waitqueue_head(&journal->j_wait_logspace); | ||
| 1066 | init_waitqueue_head(&journal->j_wait_done_commit); | 1082 | init_waitqueue_head(&journal->j_wait_done_commit); |
| 1067 | init_waitqueue_head(&journal->j_wait_checkpoint); | ||
| 1068 | init_waitqueue_head(&journal->j_wait_commit); | 1083 | init_waitqueue_head(&journal->j_wait_commit); |
| 1069 | init_waitqueue_head(&journal->j_wait_updates); | 1084 | init_waitqueue_head(&journal->j_wait_updates); |
| 1085 | init_waitqueue_head(&journal->j_wait_reserved); | ||
| 1070 | mutex_init(&journal->j_barrier); | 1086 | mutex_init(&journal->j_barrier); |
| 1071 | mutex_init(&journal->j_checkpoint_mutex); | 1087 | mutex_init(&journal->j_checkpoint_mutex); |
| 1072 | spin_lock_init(&journal->j_revoke_lock); | 1088 | spin_lock_init(&journal->j_revoke_lock); |
| @@ -1076,6 +1092,7 @@ static journal_t * journal_init_common (void) | |||
| 1076 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); | 1092 | journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); |
| 1077 | journal->j_min_batch_time = 0; | 1093 | journal->j_min_batch_time = 0; |
| 1078 | journal->j_max_batch_time = 15000; /* 15ms */ | 1094 | journal->j_max_batch_time = 15000; /* 15ms */ |
| 1095 | atomic_set(&journal->j_reserved_credits, 0); | ||
| 1079 | 1096 | ||
| 1080 | /* The journal is marked for error until we succeed with recovery! */ | 1097 | /* The journal is marked for error until we succeed with recovery! */ |
| 1081 | journal->j_flags = JBD2_ABORT; | 1098 | journal->j_flags = JBD2_ABORT; |
| @@ -1318,6 +1335,7 @@ static int journal_reset(journal_t *journal) | |||
| 1318 | static void jbd2_write_superblock(journal_t *journal, int write_op) | 1335 | static void jbd2_write_superblock(journal_t *journal, int write_op) |
| 1319 | { | 1336 | { |
| 1320 | struct buffer_head *bh = journal->j_sb_buffer; | 1337 | struct buffer_head *bh = journal->j_sb_buffer; |
| 1338 | journal_superblock_t *sb = journal->j_superblock; | ||
| 1321 | int ret; | 1339 | int ret; |
| 1322 | 1340 | ||
| 1323 | trace_jbd2_write_superblock(journal, write_op); | 1341 | trace_jbd2_write_superblock(journal, write_op); |
| @@ -1339,6 +1357,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) | |||
| 1339 | clear_buffer_write_io_error(bh); | 1357 | clear_buffer_write_io_error(bh); |
| 1340 | set_buffer_uptodate(bh); | 1358 | set_buffer_uptodate(bh); |
| 1341 | } | 1359 | } |
| 1360 | jbd2_superblock_csum_set(journal, sb); | ||
| 1342 | get_bh(bh); | 1361 | get_bh(bh); |
| 1343 | bh->b_end_io = end_buffer_write_sync; | 1362 | bh->b_end_io = end_buffer_write_sync; |
| 1344 | ret = submit_bh(write_op, bh); | 1363 | ret = submit_bh(write_op, bh); |
| @@ -1435,7 +1454,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) | |||
| 1435 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", | 1454 | jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", |
| 1436 | journal->j_errno); | 1455 | journal->j_errno); |
| 1437 | sb->s_errno = cpu_to_be32(journal->j_errno); | 1456 | sb->s_errno = cpu_to_be32(journal->j_errno); |
| 1438 | jbd2_superblock_csum_set(journal, sb); | ||
| 1439 | read_unlock(&journal->j_state_lock); | 1457 | read_unlock(&journal->j_state_lock); |
| 1440 | 1458 | ||
| 1441 | jbd2_write_superblock(journal, WRITE_SYNC); | 1459 | jbd2_write_superblock(journal, WRITE_SYNC); |
| @@ -2325,13 +2343,13 @@ static struct journal_head *journal_alloc_journal_head(void) | |||
| 2325 | #ifdef CONFIG_JBD2_DEBUG | 2343 | #ifdef CONFIG_JBD2_DEBUG |
| 2326 | atomic_inc(&nr_journal_heads); | 2344 | atomic_inc(&nr_journal_heads); |
| 2327 | #endif | 2345 | #endif |
| 2328 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2346 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
| 2329 | if (!ret) { | 2347 | if (!ret) { |
| 2330 | jbd_debug(1, "out of memory for journal_head\n"); | 2348 | jbd_debug(1, "out of memory for journal_head\n"); |
| 2331 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); | 2349 | pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); |
| 2332 | while (!ret) { | 2350 | while (!ret) { |
| 2333 | yield(); | 2351 | yield(); |
| 2334 | ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); | 2352 | ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); |
| 2335 | } | 2353 | } |
| 2336 | } | 2354 | } |
| 2337 | return ret; | 2355 | return ret; |
| @@ -2393,10 +2411,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) | |||
| 2393 | struct journal_head *new_jh = NULL; | 2411 | struct journal_head *new_jh = NULL; |
| 2394 | 2412 | ||
| 2395 | repeat: | 2413 | repeat: |
| 2396 | if (!buffer_jbd(bh)) { | 2414 | if (!buffer_jbd(bh)) |
| 2397 | new_jh = journal_alloc_journal_head(); | 2415 | new_jh = journal_alloc_journal_head(); |
| 2398 | memset(new_jh, 0, sizeof(*new_jh)); | ||
| 2399 | } | ||
| 2400 | 2416 | ||
| 2401 | jbd_lock_bh_journal_head(bh); | 2417 | jbd_lock_bh_journal_head(bh); |
| 2402 | if (buffer_jbd(bh)) { | 2418 | if (buffer_jbd(bh)) { |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 626846bac32f..d4851464b57e 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
| @@ -399,18 +399,17 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) | |||
| 399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, | 399 | static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, |
| 400 | void *buf, __u32 sequence) | 400 | void *buf, __u32 sequence) |
| 401 | { | 401 | { |
| 402 | __u32 provided, calculated; | 402 | __u32 csum32; |
| 403 | 403 | ||
| 404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 404 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 405 | return 1; | 405 | return 1; |
| 406 | 406 | ||
| 407 | sequence = cpu_to_be32(sequence); | 407 | sequence = cpu_to_be32(sequence); |
| 408 | calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, | 408 | csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, |
| 409 | sizeof(sequence)); | 409 | sizeof(sequence)); |
| 410 | calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); | 410 | csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); |
| 411 | provided = be32_to_cpu(tag->t_checksum); | ||
| 412 | 411 | ||
| 413 | return provided == cpu_to_be32(calculated); | 412 | return tag->t_checksum == cpu_to_be16(csum32); |
| 414 | } | 413 | } |
| 415 | 414 | ||
| 416 | static int do_one_pass(journal_t *journal, | 415 | static int do_one_pass(journal_t *journal, |
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8b..198c9c10276d 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c | |||
| @@ -122,9 +122,10 @@ struct jbd2_revoke_table_s | |||
| 122 | 122 | ||
| 123 | #ifdef __KERNEL__ | 123 | #ifdef __KERNEL__ |
| 124 | static void write_one_revoke_record(journal_t *, transaction_t *, | 124 | static void write_one_revoke_record(journal_t *, transaction_t *, |
| 125 | struct journal_head **, int *, | 125 | struct list_head *, |
| 126 | struct buffer_head **, int *, | ||
| 126 | struct jbd2_revoke_record_s *, int); | 127 | struct jbd2_revoke_record_s *, int); |
| 127 | static void flush_descriptor(journal_t *, struct journal_head *, int, int); | 128 | static void flush_descriptor(journal_t *, struct buffer_head *, int, int); |
| 128 | #endif | 129 | #endif |
| 129 | 130 | ||
| 130 | /* Utility functions to maintain the revoke table */ | 131 | /* Utility functions to maintain the revoke table */ |
| @@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) | |||
| 531 | */ | 532 | */ |
| 532 | void jbd2_journal_write_revoke_records(journal_t *journal, | 533 | void jbd2_journal_write_revoke_records(journal_t *journal, |
| 533 | transaction_t *transaction, | 534 | transaction_t *transaction, |
| 535 | struct list_head *log_bufs, | ||
| 534 | int write_op) | 536 | int write_op) |
| 535 | { | 537 | { |
| 536 | struct journal_head *descriptor; | 538 | struct buffer_head *descriptor; |
| 537 | struct jbd2_revoke_record_s *record; | 539 | struct jbd2_revoke_record_s *record; |
| 538 | struct jbd2_revoke_table_s *revoke; | 540 | struct jbd2_revoke_table_s *revoke; |
| 539 | struct list_head *hash_list; | 541 | struct list_head *hash_list; |
| @@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
| 553 | while (!list_empty(hash_list)) { | 555 | while (!list_empty(hash_list)) { |
| 554 | record = (struct jbd2_revoke_record_s *) | 556 | record = (struct jbd2_revoke_record_s *) |
| 555 | hash_list->next; | 557 | hash_list->next; |
| 556 | write_one_revoke_record(journal, transaction, | 558 | write_one_revoke_record(journal, transaction, log_bufs, |
| 557 | &descriptor, &offset, | 559 | &descriptor, &offset, |
| 558 | record, write_op); | 560 | record, write_op); |
| 559 | count++; | 561 | count++; |
| @@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, | |||
| 573 | 575 | ||
| 574 | static void write_one_revoke_record(journal_t *journal, | 576 | static void write_one_revoke_record(journal_t *journal, |
| 575 | transaction_t *transaction, | 577 | transaction_t *transaction, |
| 576 | struct journal_head **descriptorp, | 578 | struct list_head *log_bufs, |
| 579 | struct buffer_head **descriptorp, | ||
| 577 | int *offsetp, | 580 | int *offsetp, |
| 578 | struct jbd2_revoke_record_s *record, | 581 | struct jbd2_revoke_record_s *record, |
| 579 | int write_op) | 582 | int write_op) |
| 580 | { | 583 | { |
| 581 | int csum_size = 0; | 584 | int csum_size = 0; |
| 582 | struct journal_head *descriptor; | 585 | struct buffer_head *descriptor; |
| 583 | int offset; | 586 | int offset; |
| 584 | journal_header_t *header; | 587 | journal_header_t *header; |
| 585 | 588 | ||
| @@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, | |||
| 609 | descriptor = jbd2_journal_get_descriptor_buffer(journal); | 612 | descriptor = jbd2_journal_get_descriptor_buffer(journal); |
| 610 | if (!descriptor) | 613 | if (!descriptor) |
| 611 | return; | 614 | return; |
| 612 | header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; | 615 | header = (journal_header_t *)descriptor->b_data; |
| 613 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); | 616 | header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); |
| 614 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); | 617 | header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); |
| 615 | header->h_sequence = cpu_to_be32(transaction->t_tid); | 618 | header->h_sequence = cpu_to_be32(transaction->t_tid); |
| 616 | 619 | ||
| 617 | /* Record it so that we can wait for IO completion later */ | 620 | /* Record it so that we can wait for IO completion later */ |
| 618 | JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); | 621 | BUFFER_TRACE(descriptor, "file in log_bufs"); |
| 619 | jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); | 622 | jbd2_file_log_bh(log_bufs, descriptor); |
| 620 | 623 | ||
| 621 | offset = sizeof(jbd2_journal_revoke_header_t); | 624 | offset = sizeof(jbd2_journal_revoke_header_t); |
| 622 | *descriptorp = descriptor; | 625 | *descriptorp = descriptor; |
| 623 | } | 626 | } |
| 624 | 627 | ||
| 625 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { | 628 | if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { |
| 626 | * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = | 629 | * ((__be64 *)(&descriptor->b_data[offset])) = |
| 627 | cpu_to_be64(record->blocknr); | 630 | cpu_to_be64(record->blocknr); |
| 628 | offset += 8; | 631 | offset += 8; |
| 629 | 632 | ||
| 630 | } else { | 633 | } else { |
| 631 | * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = | 634 | * ((__be32 *)(&descriptor->b_data[offset])) = |
| 632 | cpu_to_be32(record->blocknr); | 635 | cpu_to_be32(record->blocknr); |
| 633 | offset += 4; | 636 | offset += 4; |
| 634 | } | 637 | } |
| @@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, | |||
| 636 | *offsetp = offset; | 639 | *offsetp = offset; |
| 637 | } | 640 | } |
| 638 | 641 | ||
| 639 | static void jbd2_revoke_csum_set(journal_t *j, | 642 | static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) |
| 640 | struct journal_head *descriptor) | ||
| 641 | { | 643 | { |
| 642 | struct jbd2_journal_revoke_tail *tail; | 644 | struct jbd2_journal_revoke_tail *tail; |
| 643 | __u32 csum; | 645 | __u32 csum; |
| @@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
| 645 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) | 647 | if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) |
| 646 | return; | 648 | return; |
| 647 | 649 | ||
| 648 | tail = (struct jbd2_journal_revoke_tail *) | 650 | tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - |
| 649 | (jh2bh(descriptor)->b_data + j->j_blocksize - | ||
| 650 | sizeof(struct jbd2_journal_revoke_tail)); | 651 | sizeof(struct jbd2_journal_revoke_tail)); |
| 651 | tail->r_checksum = 0; | 652 | tail->r_checksum = 0; |
| 652 | csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, | 653 | csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); |
| 653 | j->j_blocksize); | ||
| 654 | tail->r_checksum = cpu_to_be32(csum); | 654 | tail->r_checksum = cpu_to_be32(csum); |
| 655 | } | 655 | } |
| 656 | 656 | ||
| @@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, | |||
| 662 | */ | 662 | */ |
| 663 | 663 | ||
| 664 | static void flush_descriptor(journal_t *journal, | 664 | static void flush_descriptor(journal_t *journal, |
| 665 | struct journal_head *descriptor, | 665 | struct buffer_head *descriptor, |
| 666 | int offset, int write_op) | 666 | int offset, int write_op) |
| 667 | { | 667 | { |
| 668 | jbd2_journal_revoke_header_t *header; | 668 | jbd2_journal_revoke_header_t *header; |
| 669 | struct buffer_head *bh = jh2bh(descriptor); | ||
| 670 | 669 | ||
| 671 | if (is_journal_aborted(journal)) { | 670 | if (is_journal_aborted(journal)) { |
| 672 | put_bh(bh); | 671 | put_bh(descriptor); |
| 673 | return; | 672 | return; |
| 674 | } | 673 | } |
| 675 | 674 | ||
| 676 | header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; | 675 | header = (jbd2_journal_revoke_header_t *)descriptor->b_data; |
| 677 | header->r_count = cpu_to_be32(offset); | 676 | header->r_count = cpu_to_be32(offset); |
| 678 | jbd2_revoke_csum_set(journal, descriptor); | 677 | jbd2_revoke_csum_set(journal, descriptor); |
| 679 | 678 | ||
| 680 | set_buffer_jwrite(bh); | 679 | set_buffer_jwrite(descriptor); |
| 681 | BUFFER_TRACE(bh, "write"); | 680 | BUFFER_TRACE(descriptor, "write"); |
| 682 | set_buffer_dirty(bh); | 681 | set_buffer_dirty(descriptor); |
| 683 | write_dirty_buffer(bh, write_op); | 682 | write_dirty_buffer(descriptor, write_op); |
| 684 | } | 683 | } |
| 685 | #endif | 684 | #endif |
| 686 | 685 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 10f524c59ea8..7aa9a32573bb 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -89,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 89 | transaction->t_expires = jiffies + journal->j_commit_interval; | 89 | transaction->t_expires = jiffies + journal->j_commit_interval; |
| 90 | spin_lock_init(&transaction->t_handle_lock); | 90 | spin_lock_init(&transaction->t_handle_lock); |
| 91 | atomic_set(&transaction->t_updates, 0); | 91 | atomic_set(&transaction->t_updates, 0); |
| 92 | atomic_set(&transaction->t_outstanding_credits, 0); | 92 | atomic_set(&transaction->t_outstanding_credits, |
| 93 | atomic_read(&journal->j_reserved_credits)); | ||
| 93 | atomic_set(&transaction->t_handle_count, 0); | 94 | atomic_set(&transaction->t_handle_count, 0); |
| 94 | INIT_LIST_HEAD(&transaction->t_inode_list); | 95 | INIT_LIST_HEAD(&transaction->t_inode_list); |
| 95 | INIT_LIST_HEAD(&transaction->t_private_list); | 96 | INIT_LIST_HEAD(&transaction->t_private_list); |
| @@ -141,6 +142,112 @@ static inline void update_t_max_wait(transaction_t *transaction, | |||
| 141 | } | 142 | } |
| 142 | 143 | ||
| 143 | /* | 144 | /* |
| 145 | * Wait until running transaction passes T_LOCKED state. Also starts the commit | ||
| 146 | * if needed. The function expects running transaction to exist and releases | ||
| 147 | * j_state_lock. | ||
| 148 | */ | ||
| 149 | static void wait_transaction_locked(journal_t *journal) | ||
| 150 | __releases(journal->j_state_lock) | ||
| 151 | { | ||
| 152 | DEFINE_WAIT(wait); | ||
| 153 | int need_to_start; | ||
| 154 | tid_t tid = journal->j_running_transaction->t_tid; | ||
| 155 | |||
| 156 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
| 157 | TASK_UNINTERRUPTIBLE); | ||
| 158 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
| 159 | read_unlock(&journal->j_state_lock); | ||
| 160 | if (need_to_start) | ||
| 161 | jbd2_log_start_commit(journal, tid); | ||
| 162 | schedule(); | ||
| 163 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
| 164 | } | ||
| 165 | |||
| 166 | static void sub_reserved_credits(journal_t *journal, int blocks) | ||
| 167 | { | ||
| 168 | atomic_sub(blocks, &journal->j_reserved_credits); | ||
| 169 | wake_up(&journal->j_wait_reserved); | ||
| 170 | } | ||
| 171 | |||
| 172 | /* | ||
| 173 | * Wait until we can add credits for handle to the running transaction. Called | ||
| 174 | * with j_state_lock held for reading. Returns 0 if handle joined the running | ||
| 175 | * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and | ||
| 176 | * caller must retry. | ||
| 177 | */ | ||
| 178 | static int add_transaction_credits(journal_t *journal, int blocks, | ||
| 179 | int rsv_blocks) | ||
| 180 | { | ||
| 181 | transaction_t *t = journal->j_running_transaction; | ||
| 182 | int needed; | ||
| 183 | int total = blocks + rsv_blocks; | ||
| 184 | |||
| 185 | /* | ||
| 186 | * If the current transaction is locked down for commit, wait | ||
| 187 | * for the lock to be released. | ||
| 188 | */ | ||
| 189 | if (t->t_state == T_LOCKED) { | ||
| 190 | wait_transaction_locked(journal); | ||
| 191 | return 1; | ||
| 192 | } | ||
| 193 | |||
| 194 | /* | ||
| 195 | * If there is not enough space left in the log to write all | ||
| 196 | * potential buffers requested by this operation, we need to | ||
| 197 | * stall pending a log checkpoint to free some more log space. | ||
| 198 | */ | ||
| 199 | needed = atomic_add_return(total, &t->t_outstanding_credits); | ||
| 200 | if (needed > journal->j_max_transaction_buffers) { | ||
| 201 | /* | ||
| 202 | * If the current transaction is already too large, | ||
| 203 | * then start to commit it: we can then go back and | ||
| 204 | * attach this handle to a new transaction. | ||
| 205 | */ | ||
| 206 | atomic_sub(total, &t->t_outstanding_credits); | ||
| 207 | wait_transaction_locked(journal); | ||
| 208 | return 1; | ||
| 209 | } | ||
| 210 | |||
| 211 | /* | ||
| 212 | * The commit code assumes that it can get enough log space | ||
| 213 | * without forcing a checkpoint. This is *critical* for | ||
| 214 | * correctness: a checkpoint of a buffer which is also | ||
| 215 | * associated with a committing transaction creates a deadlock, | ||
| 216 | * so commit simply cannot force through checkpoints. | ||
| 217 | * | ||
| 218 | * We must therefore ensure the necessary space in the journal | ||
| 219 | * *before* starting to dirty potentially checkpointed buffers | ||
| 220 | * in the new transaction. | ||
| 221 | */ | ||
| 222 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { | ||
| 223 | atomic_sub(total, &t->t_outstanding_credits); | ||
| 224 | read_unlock(&journal->j_state_lock); | ||
| 225 | write_lock(&journal->j_state_lock); | ||
| 226 | if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) | ||
| 227 | __jbd2_log_wait_for_space(journal); | ||
| 228 | write_unlock(&journal->j_state_lock); | ||
| 229 | return 1; | ||
| 230 | } | ||
| 231 | |||
| 232 | /* No reservation? We are done... */ | ||
| 233 | if (!rsv_blocks) | ||
| 234 | return 0; | ||
| 235 | |||
| 236 | needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); | ||
| 237 | /* We allow at most half of a transaction to be reserved */ | ||
| 238 | if (needed > journal->j_max_transaction_buffers / 2) { | ||
| 239 | sub_reserved_credits(journal, rsv_blocks); | ||
| 240 | atomic_sub(total, &t->t_outstanding_credits); | ||
| 241 | read_unlock(&journal->j_state_lock); | ||
| 242 | wait_event(journal->j_wait_reserved, | ||
| 243 | atomic_read(&journal->j_reserved_credits) + rsv_blocks | ||
| 244 | <= journal->j_max_transaction_buffers / 2); | ||
| 245 | return 1; | ||
| 246 | } | ||
| 247 | return 0; | ||
| 248 | } | ||
| 249 | |||
| 250 | /* | ||
| 144 | * start_this_handle: Given a handle, deal with any locking or stalling | 251 | * start_this_handle: Given a handle, deal with any locking or stalling |
| 145 | * needed to make sure that there is enough journal space for the handle | 252 | * needed to make sure that there is enough journal space for the handle |
| 146 | * to begin. Attach the handle to a transaction and set up the | 253 | * to begin. Attach the handle to a transaction and set up the |
| @@ -151,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, | |||
| 151 | gfp_t gfp_mask) | 258 | gfp_t gfp_mask) |
| 152 | { | 259 | { |
| 153 | transaction_t *transaction, *new_transaction = NULL; | 260 | transaction_t *transaction, *new_transaction = NULL; |
| 154 | tid_t tid; | 261 | int blocks = handle->h_buffer_credits; |
| 155 | int needed, need_to_start; | 262 | int rsv_blocks = 0; |
| 156 | int nblocks = handle->h_buffer_credits; | ||
| 157 | unsigned long ts = jiffies; | 263 | unsigned long ts = jiffies; |
| 158 | 264 | ||
| 159 | if (nblocks > journal->j_max_transaction_buffers) { | 265 | /* |
| 266 | * 1/2 of transaction can be reserved so we can practically handle | ||
| 267 | * only 1/2 of maximum transaction size per operation | ||
| 268 | */ | ||
| 269 | if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { | ||
| 160 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", | 270 | printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", |
| 161 | current->comm, nblocks, | 271 | current->comm, blocks, |
| 162 | journal->j_max_transaction_buffers); | 272 | journal->j_max_transaction_buffers / 2); |
| 163 | return -ENOSPC; | 273 | return -ENOSPC; |
| 164 | } | 274 | } |
| 165 | 275 | ||
| 276 | if (handle->h_rsv_handle) | ||
| 277 | rsv_blocks = handle->h_rsv_handle->h_buffer_credits; | ||
| 278 | |||
| 166 | alloc_transaction: | 279 | alloc_transaction: |
| 167 | if (!journal->j_running_transaction) { | 280 | if (!journal->j_running_transaction) { |
| 168 | new_transaction = kmem_cache_zalloc(transaction_cache, | 281 | new_transaction = kmem_cache_zalloc(transaction_cache, |
| @@ -199,8 +312,12 @@ repeat: | |||
| 199 | return -EROFS; | 312 | return -EROFS; |
| 200 | } | 313 | } |
| 201 | 314 | ||
| 202 | /* Wait on the journal's transaction barrier if necessary */ | 315 | /* |
| 203 | if (journal->j_barrier_count) { | 316 | * Wait on the journal's transaction barrier if necessary. Specifically |
| 317 | * we allow reserved handles to proceed because otherwise commit could | ||
| 318 | * deadlock on page writeback not being able to complete. | ||
| 319 | */ | ||
| 320 | if (!handle->h_reserved && journal->j_barrier_count) { | ||
| 204 | read_unlock(&journal->j_state_lock); | 321 | read_unlock(&journal->j_state_lock); |
| 205 | wait_event(journal->j_wait_transaction_locked, | 322 | wait_event(journal->j_wait_transaction_locked, |
| 206 | journal->j_barrier_count == 0); | 323 | journal->j_barrier_count == 0); |
| @@ -213,7 +330,7 @@ repeat: | |||
| 213 | goto alloc_transaction; | 330 | goto alloc_transaction; |
| 214 | write_lock(&journal->j_state_lock); | 331 | write_lock(&journal->j_state_lock); |
| 215 | if (!journal->j_running_transaction && | 332 | if (!journal->j_running_transaction && |
| 216 | !journal->j_barrier_count) { | 333 | (handle->h_reserved || !journal->j_barrier_count)) { |
| 217 | jbd2_get_transaction(journal, new_transaction); | 334 | jbd2_get_transaction(journal, new_transaction); |
| 218 | new_transaction = NULL; | 335 | new_transaction = NULL; |
| 219 | } | 336 | } |
| @@ -223,85 +340,18 @@ repeat: | |||
| 223 | 340 | ||
| 224 | transaction = journal->j_running_transaction; | 341 | transaction = journal->j_running_transaction; |
| 225 | 342 | ||
| 226 | /* | 343 | if (!handle->h_reserved) { |
| 227 | * If the current transaction is locked down for commit, wait for the | 344 | /* We may have dropped j_state_lock - restart in that case */ |
| 228 | * lock to be released. | 345 | if (add_transaction_credits(journal, blocks, rsv_blocks)) |
| 229 | */ | 346 | goto repeat; |
| 230 | if (transaction->t_state == T_LOCKED) { | 347 | } else { |
| 231 | DEFINE_WAIT(wait); | ||
| 232 | |||
| 233 | prepare_to_wait(&journal->j_wait_transaction_locked, | ||
| 234 | &wait, TASK_UNINTERRUPTIBLE); | ||
| 235 | read_unlock(&journal->j_state_lock); | ||
| 236 | schedule(); | ||
| 237 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
| 238 | goto repeat; | ||
| 239 | } | ||
| 240 | |||
| 241 | /* | ||
| 242 | * If there is not enough space left in the log to write all potential | ||
| 243 | * buffers requested by this operation, we need to stall pending a log | ||
| 244 | * checkpoint to free some more log space. | ||
| 245 | */ | ||
| 246 | needed = atomic_add_return(nblocks, | ||
| 247 | &transaction->t_outstanding_credits); | ||
| 248 | |||
| 249 | if (needed > journal->j_max_transaction_buffers) { | ||
| 250 | /* | 348 | /* |
| 251 | * If the current transaction is already too large, then start | 349 | * We have handle reserved so we are allowed to join T_LOCKED |
| 252 | * to commit it: we can then go back and attach this handle to | 350 | * transaction and we don't have to check for transaction size |
| 253 | * a new transaction. | 351 | * and journal space. |
| 254 | */ | 352 | */ |
| 255 | DEFINE_WAIT(wait); | 353 | sub_reserved_credits(journal, blocks); |
| 256 | 354 | handle->h_reserved = 0; | |
| 257 | jbd_debug(2, "Handle %p starting new commit...\n", handle); | ||
| 258 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
| 259 | prepare_to_wait(&journal->j_wait_transaction_locked, &wait, | ||
| 260 | TASK_UNINTERRUPTIBLE); | ||
| 261 | tid = transaction->t_tid; | ||
| 262 | need_to_start = !tid_geq(journal->j_commit_request, tid); | ||
| 263 | read_unlock(&journal->j_state_lock); | ||
| 264 | if (need_to_start) | ||
| 265 | jbd2_log_start_commit(journal, tid); | ||
| 266 | schedule(); | ||
| 267 | finish_wait(&journal->j_wait_transaction_locked, &wait); | ||
| 268 | goto repeat; | ||
| 269 | } | ||
| 270 | |||
| 271 | /* | ||
| 272 | * The commit code assumes that it can get enough log space | ||
| 273 | * without forcing a checkpoint. This is *critical* for | ||
| 274 | * correctness: a checkpoint of a buffer which is also | ||
| 275 | * associated with a committing transaction creates a deadlock, | ||
| 276 | * so commit simply cannot force through checkpoints. | ||
| 277 | * | ||
| 278 | * We must therefore ensure the necessary space in the journal | ||
| 279 | * *before* starting to dirty potentially checkpointed buffers | ||
| 280 | * in the new transaction. | ||
| 281 | * | ||
| 282 | * The worst part is, any transaction currently committing can | ||
| 283 | * reduce the free space arbitrarily. Be careful to account for | ||
| 284 | * those buffers when checkpointing. | ||
| 285 | */ | ||
| 286 | |||
| 287 | /* | ||
| 288 | * @@@ AKPM: This seems rather over-defensive. We're giving commit | ||
| 289 | * a _lot_ of headroom: 1/4 of the journal plus the size of | ||
| 290 | * the committing transaction. Really, we only need to give it | ||
| 291 | * committing_transaction->t_outstanding_credits plus "enough" for | ||
| 292 | * the log control blocks. | ||
| 293 | * Also, this test is inconsistent with the matching one in | ||
| 294 | * jbd2_journal_extend(). | ||
| 295 | */ | ||
| 296 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { | ||
| 297 | jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); | ||
| 298 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
| 299 | read_unlock(&journal->j_state_lock); | ||
| 300 | write_lock(&journal->j_state_lock); | ||
| 301 | if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) | ||
| 302 | __jbd2_log_wait_for_space(journal); | ||
| 303 | write_unlock(&journal->j_state_lock); | ||
| 304 | goto repeat; | ||
| 305 | } | 355 | } |
| 306 | 356 | ||
| 307 | /* OK, account for the buffers that this operation expects to | 357 | /* OK, account for the buffers that this operation expects to |
| @@ -309,15 +359,16 @@ repeat: | |||
| 309 | */ | 359 | */ |
| 310 | update_t_max_wait(transaction, ts); | 360 | update_t_max_wait(transaction, ts); |
| 311 | handle->h_transaction = transaction; | 361 | handle->h_transaction = transaction; |
| 312 | handle->h_requested_credits = nblocks; | 362 | handle->h_requested_credits = blocks; |
| 313 | handle->h_start_jiffies = jiffies; | 363 | handle->h_start_jiffies = jiffies; |
| 314 | atomic_inc(&transaction->t_updates); | 364 | atomic_inc(&transaction->t_updates); |
| 315 | atomic_inc(&transaction->t_handle_count); | 365 | atomic_inc(&transaction->t_handle_count); |
| 316 | jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", | 366 | jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", |
| 317 | handle, nblocks, | 367 | handle, blocks, |
| 318 | atomic_read(&transaction->t_outstanding_credits), | 368 | atomic_read(&transaction->t_outstanding_credits), |
| 319 | __jbd2_log_space_left(journal)); | 369 | jbd2_log_space_left(journal)); |
| 320 | read_unlock(&journal->j_state_lock); | 370 | read_unlock(&journal->j_state_lock); |
| 371 | current->journal_info = handle; | ||
| 321 | 372 | ||
| 322 | lock_map_acquire(&handle->h_lockdep_map); | 373 | lock_map_acquire(&handle->h_lockdep_map); |
| 323 | jbd2_journal_free_transaction(new_transaction); | 374 | jbd2_journal_free_transaction(new_transaction); |
| @@ -348,16 +399,21 @@ static handle_t *new_handle(int nblocks) | |||
| 348 | * | 399 | * |
| 349 | * We make sure that the transaction can guarantee at least nblocks of | 400 | * We make sure that the transaction can guarantee at least nblocks of |
| 350 | * modified buffers in the log. We block until the log can guarantee | 401 | * modified buffers in the log. We block until the log can guarantee |
| 351 | * that much space. | 402 | * that much space. Additionally, if rsv_blocks > 0, we also create another |
| 352 | * | 403 | * handle with rsv_blocks reserved blocks in the journal. This handle is |
| 353 | * This function is visible to journal users (like ext3fs), so is not | 404 | * is stored in h_rsv_handle. It is not attached to any particular transaction |
| 354 | * called with the journal already locked. | 405 | * and thus doesn't block transaction commit. If the caller uses this reserved |
| 406 | * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() | ||
| 407 | * on the parent handle will dispose the reserved one. Reserved handle has to | ||
| 408 | * be converted to a normal handle using jbd2_journal_start_reserved() before | ||
| 409 | * it can be used. | ||
| 355 | * | 410 | * |
| 356 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value | 411 | * Return a pointer to a newly allocated handle, or an ERR_PTR() value |
| 357 | * on failure. | 412 | * on failure. |
| 358 | */ | 413 | */ |
| 359 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | 414 | handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, |
| 360 | unsigned int type, unsigned int line_no) | 415 | gfp_t gfp_mask, unsigned int type, |
| 416 | unsigned int line_no) | ||
| 361 | { | 417 | { |
| 362 | handle_t *handle = journal_current_handle(); | 418 | handle_t *handle = journal_current_handle(); |
| 363 | int err; | 419 | int err; |
| @@ -374,13 +430,24 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask, | |||
| 374 | handle = new_handle(nblocks); | 430 | handle = new_handle(nblocks); |
| 375 | if (!handle) | 431 | if (!handle) |
| 376 | return ERR_PTR(-ENOMEM); | 432 | return ERR_PTR(-ENOMEM); |
| 433 | if (rsv_blocks) { | ||
| 434 | handle_t *rsv_handle; | ||
| 377 | 435 | ||
| 378 | current->journal_info = handle; | 436 | rsv_handle = new_handle(rsv_blocks); |
| 437 | if (!rsv_handle) { | ||
| 438 | jbd2_free_handle(handle); | ||
| 439 | return ERR_PTR(-ENOMEM); | ||
| 440 | } | ||
| 441 | rsv_handle->h_reserved = 1; | ||
| 442 | rsv_handle->h_journal = journal; | ||
| 443 | handle->h_rsv_handle = rsv_handle; | ||
| 444 | } | ||
| 379 | 445 | ||
| 380 | err = start_this_handle(journal, handle, gfp_mask); | 446 | err = start_this_handle(journal, handle, gfp_mask); |
| 381 | if (err < 0) { | 447 | if (err < 0) { |
| 448 | if (handle->h_rsv_handle) | ||
| 449 | jbd2_free_handle(handle->h_rsv_handle); | ||
| 382 | jbd2_free_handle(handle); | 450 | jbd2_free_handle(handle); |
| 383 | current->journal_info = NULL; | ||
| 384 | return ERR_PTR(err); | 451 | return ERR_PTR(err); |
| 385 | } | 452 | } |
| 386 | handle->h_type = type; | 453 | handle->h_type = type; |
| @@ -395,10 +462,65 @@ EXPORT_SYMBOL(jbd2__journal_start); | |||
| 395 | 462 | ||
| 396 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) | 463 | handle_t *jbd2_journal_start(journal_t *journal, int nblocks) |
| 397 | { | 464 | { |
| 398 | return jbd2__journal_start(journal, nblocks, GFP_NOFS, 0, 0); | 465 | return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); |
| 399 | } | 466 | } |
| 400 | EXPORT_SYMBOL(jbd2_journal_start); | 467 | EXPORT_SYMBOL(jbd2_journal_start); |
| 401 | 468 | ||
| 469 | void jbd2_journal_free_reserved(handle_t *handle) | ||
| 470 | { | ||
| 471 | journal_t *journal = handle->h_journal; | ||
| 472 | |||
| 473 | WARN_ON(!handle->h_reserved); | ||
| 474 | sub_reserved_credits(journal, handle->h_buffer_credits); | ||
| 475 | jbd2_free_handle(handle); | ||
| 476 | } | ||
| 477 | EXPORT_SYMBOL(jbd2_journal_free_reserved); | ||
| 478 | |||
| 479 | /** | ||
| 480 | * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle | ||
| 481 | * @handle: handle to start | ||
| 482 | * | ||
| 483 | * Start handle that has been previously reserved with jbd2_journal_reserve(). | ||
| 484 | * This attaches @handle to the running transaction (or creates one if there's | ||
| 485 | * not transaction running). Unlike jbd2_journal_start() this function cannot | ||
| 486 | * block on journal commit, checkpointing, or similar stuff. It can block on | ||
| 487 | * memory allocation or frozen journal though. | ||
| 488 | * | ||
| 489 | * Return 0 on success, non-zero on error - handle is freed in that case. | ||
| 490 | */ | ||
| 491 | int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, | ||
| 492 | unsigned int line_no) | ||
| 493 | { | ||
| 494 | journal_t *journal = handle->h_journal; | ||
| 495 | int ret = -EIO; | ||
| 496 | |||
| 497 | if (WARN_ON(!handle->h_reserved)) { | ||
| 498 | /* Someone passed in normal handle? Just stop it. */ | ||
| 499 | jbd2_journal_stop(handle); | ||
| 500 | return ret; | ||
| 501 | } | ||
| 502 | /* | ||
| 503 | * Usefulness of mixing of reserved and unreserved handles is | ||
| 504 | * questionable. So far nobody seems to need it so just error out. | ||
| 505 | */ | ||
| 506 | if (WARN_ON(current->journal_info)) { | ||
| 507 | jbd2_journal_free_reserved(handle); | ||
| 508 | return ret; | ||
| 509 | } | ||
| 510 | |||
| 511 | handle->h_journal = NULL; | ||
| 512 | /* | ||
| 513 | * GFP_NOFS is here because callers are likely from writeback or | ||
| 514 | * similarly constrained call sites | ||
| 515 | */ | ||
| 516 | ret = start_this_handle(journal, handle, GFP_NOFS); | ||
| 517 | if (ret < 0) | ||
| 518 | jbd2_journal_free_reserved(handle); | ||
| 519 | handle->h_type = type; | ||
| 520 | handle->h_line_no = line_no; | ||
| 521 | return ret; | ||
| 522 | } | ||
| 523 | EXPORT_SYMBOL(jbd2_journal_start_reserved); | ||
| 402 | 524 | ||
| 403 | /** | 525 | /** |
| 404 | * int jbd2_journal_extend() - extend buffer credits. | 526 | * int jbd2_journal_extend() - extend buffer credits. |
| @@ -423,49 +545,53 @@ EXPORT_SYMBOL(jbd2_journal_start); | |||
| 423 | int jbd2_journal_extend(handle_t *handle, int nblocks) | 545 | int jbd2_journal_extend(handle_t *handle, int nblocks) |
| 424 | { | 546 | { |
| 425 | transaction_t *transaction = handle->h_transaction; | 547 | transaction_t *transaction = handle->h_transaction; |
| 426 | journal_t *journal = transaction->t_journal; | 548 | journal_t *journal; |
| 427 | int result; | 549 | int result; |
| 428 | int wanted; | 550 | int wanted; |
| 429 | 551 | ||
| 430 | result = -EIO; | 552 | WARN_ON(!transaction); |
| 431 | if (is_handle_aborted(handle)) | 553 | if (is_handle_aborted(handle)) |
| 432 | goto out; | 554 | return -EROFS; |
| 555 | journal = transaction->t_journal; | ||
| 433 | 556 | ||
| 434 | result = 1; | 557 | result = 1; |
| 435 | 558 | ||
| 436 | read_lock(&journal->j_state_lock); | 559 | read_lock(&journal->j_state_lock); |
| 437 | 560 | ||
| 438 | /* Don't extend a locked-down transaction! */ | 561 | /* Don't extend a locked-down transaction! */ |
| 439 | if (handle->h_transaction->t_state != T_RUNNING) { | 562 | if (transaction->t_state != T_RUNNING) { |
| 440 | jbd_debug(3, "denied handle %p %d blocks: " | 563 | jbd_debug(3, "denied handle %p %d blocks: " |
| 441 | "transaction not running\n", handle, nblocks); | 564 | "transaction not running\n", handle, nblocks); |
| 442 | goto error_out; | 565 | goto error_out; |
| 443 | } | 566 | } |
| 444 | 567 | ||
| 445 | spin_lock(&transaction->t_handle_lock); | 568 | spin_lock(&transaction->t_handle_lock); |
| 446 | wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; | 569 | wanted = atomic_add_return(nblocks, |
| 570 | &transaction->t_outstanding_credits); | ||
| 447 | 571 | ||
| 448 | if (wanted > journal->j_max_transaction_buffers) { | 572 | if (wanted > journal->j_max_transaction_buffers) { |
| 449 | jbd_debug(3, "denied handle %p %d blocks: " | 573 | jbd_debug(3, "denied handle %p %d blocks: " |
| 450 | "transaction too large\n", handle, nblocks); | 574 | "transaction too large\n", handle, nblocks); |
| 575 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
| 451 | goto unlock; | 576 | goto unlock; |
| 452 | } | 577 | } |
| 453 | 578 | ||
| 454 | if (wanted > __jbd2_log_space_left(journal)) { | 579 | if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > |
| 580 | jbd2_log_space_left(journal)) { | ||
| 455 | jbd_debug(3, "denied handle %p %d blocks: " | 581 | jbd_debug(3, "denied handle %p %d blocks: " |
| 456 | "insufficient log space\n", handle, nblocks); | 582 | "insufficient log space\n", handle, nblocks); |
| 583 | atomic_sub(nblocks, &transaction->t_outstanding_credits); | ||
| 457 | goto unlock; | 584 | goto unlock; |
| 458 | } | 585 | } |
| 459 | 586 | ||
| 460 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, | 587 | trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, |
| 461 | handle->h_transaction->t_tid, | 588 | transaction->t_tid, |
| 462 | handle->h_type, handle->h_line_no, | 589 | handle->h_type, handle->h_line_no, |
| 463 | handle->h_buffer_credits, | 590 | handle->h_buffer_credits, |
| 464 | nblocks); | 591 | nblocks); |
| 465 | 592 | ||
| 466 | handle->h_buffer_credits += nblocks; | 593 | handle->h_buffer_credits += nblocks; |
| 467 | handle->h_requested_credits += nblocks; | 594 | handle->h_requested_credits += nblocks; |
| 468 | atomic_add(nblocks, &transaction->t_outstanding_credits); | ||
| 469 | result = 0; | 595 | result = 0; |
| 470 | 596 | ||
| 471 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); | 597 | jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); |
| @@ -473,7 +599,6 @@ unlock: | |||
| 473 | spin_unlock(&transaction->t_handle_lock); | 599 | spin_unlock(&transaction->t_handle_lock); |
| 474 | error_out: | 600 | error_out: |
| 475 | read_unlock(&journal->j_state_lock); | 601 | read_unlock(&journal->j_state_lock); |
| 476 | out: | ||
| 477 | return result; | 602 | return result; |
| 478 | } | 603 | } |
| 479 | 604 | ||
| @@ -490,19 +615,22 @@ out: | |||
| 490 | * to a running handle, a call to jbd2_journal_restart will commit the | 615 | * to a running handle, a call to jbd2_journal_restart will commit the |
| 491 | * handle's transaction so far and reattach the handle to a new | 616 | * handle's transaction so far and reattach the handle to a new |
| 492 | * transaction capabable of guaranteeing the requested number of | 617 | * transaction capabable of guaranteeing the requested number of |
| 493 | * credits. | 618 | * credits. We preserve reserved handle if there's any attached to the |
| 619 | * passed in handle. | ||
| 494 | */ | 620 | */ |
| 495 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | 621 | int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) |
| 496 | { | 622 | { |
| 497 | transaction_t *transaction = handle->h_transaction; | 623 | transaction_t *transaction = handle->h_transaction; |
| 498 | journal_t *journal = transaction->t_journal; | 624 | journal_t *journal; |
| 499 | tid_t tid; | 625 | tid_t tid; |
| 500 | int need_to_start, ret; | 626 | int need_to_start, ret; |
| 501 | 627 | ||
| 628 | WARN_ON(!transaction); | ||
| 502 | /* If we've had an abort of any type, don't even think about | 629 | /* If we've had an abort of any type, don't even think about |
| 503 | * actually doing the restart! */ | 630 | * actually doing the restart! */ |
| 504 | if (is_handle_aborted(handle)) | 631 | if (is_handle_aborted(handle)) |
| 505 | return 0; | 632 | return 0; |
| 633 | journal = transaction->t_journal; | ||
| 506 | 634 | ||
| 507 | /* | 635 | /* |
| 508 | * First unlink the handle from its current transaction, and start the | 636 | * First unlink the handle from its current transaction, and start the |
| @@ -515,12 +643,18 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) | |||
| 515 | spin_lock(&transaction->t_handle_lock); | 643 | spin_lock(&transaction->t_handle_lock); |
| 516 | atomic_sub(handle->h_buffer_credits, | 644 | atomic_sub(handle->h_buffer_credits, |
| 517 | &transaction->t_outstanding_credits); | 645 | &transaction->t_outstanding_credits); |
| 646 | if (handle->h_rsv_handle) { | ||
| 647 | sub_reserved_credits(journal, | ||
| 648 | handle->h_rsv_handle->h_buffer_credits); | ||
| 649 | } | ||
| 518 | if (atomic_dec_and_test(&transaction->t_updates)) | 650 | if (atomic_dec_and_test(&transaction->t_updates)) |
| 519 | wake_up(&journal->j_wait_updates); | 651 | wake_up(&journal->j_wait_updates); |
| 652 | tid = transaction->t_tid; | ||
| 520 | spin_unlock(&transaction->t_handle_lock); | 653 | spin_unlock(&transaction->t_handle_lock); |
| 654 | handle->h_transaction = NULL; | ||
| 655 | current->journal_info = NULL; | ||
| 521 | 656 | ||
| 522 | jbd_debug(2, "restarting handle %p\n", handle); | 657 | jbd_debug(2, "restarting handle %p\n", handle); |
| 523 | tid = transaction->t_tid; | ||
| 524 | need_to_start = !tid_geq(journal->j_commit_request, tid); | 658 | need_to_start = !tid_geq(journal->j_commit_request, tid); |
| 525 | read_unlock(&journal->j_state_lock); | 659 | read_unlock(&journal->j_state_lock); |
| 526 | if (need_to_start) | 660 | if (need_to_start) |
| @@ -557,6 +691,14 @@ void jbd2_journal_lock_updates(journal_t *journal) | |||
| 557 | write_lock(&journal->j_state_lock); | 691 | write_lock(&journal->j_state_lock); |
| 558 | ++journal->j_barrier_count; | 692 | ++journal->j_barrier_count; |
| 559 | 693 | ||
| 694 | /* Wait until there are no reserved handles */ | ||
| 695 | if (atomic_read(&journal->j_reserved_credits)) { | ||
| 696 | write_unlock(&journal->j_state_lock); | ||
| 697 | wait_event(journal->j_wait_reserved, | ||
| 698 | atomic_read(&journal->j_reserved_credits) == 0); | ||
| 699 | write_lock(&journal->j_state_lock); | ||
| 700 | } | ||
| 701 | |||
| 560 | /* Wait until there are no running updates */ | 702 | /* Wait until there are no running updates */ |
| 561 | while (1) { | 703 | while (1) { |
| 562 | transaction_t *transaction = journal->j_running_transaction; | 704 | transaction_t *transaction = journal->j_running_transaction; |
| @@ -619,6 +761,12 @@ static void warn_dirty_buffer(struct buffer_head *bh) | |||
| 619 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); | 761 | bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); |
| 620 | } | 762 | } |
| 621 | 763 | ||
| 764 | static int sleep_on_shadow_bh(void *word) | ||
| 765 | { | ||
| 766 | io_schedule(); | ||
| 767 | return 0; | ||
| 768 | } | ||
| 769 | |||
| 622 | /* | 770 | /* |
| 623 | * If the buffer is already part of the current transaction, then there | 771 | * If the buffer is already part of the current transaction, then there |
| 624 | * is nothing we need to do. If it is already part of a prior | 772 | * is nothing we need to do. If it is already part of a prior |
| @@ -634,17 +782,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, | |||
| 634 | int force_copy) | 782 | int force_copy) |
| 635 | { | 783 | { |
| 636 | struct buffer_head *bh; | 784 | struct buffer_head *bh; |
| 637 | transaction_t *transaction; | 785 | transaction_t *transaction = handle->h_transaction; |
| 638 | journal_t *journal; | 786 | journal_t *journal; |
| 639 | int error; | 787 | int error; |
| 640 | char *frozen_buffer = NULL; | 788 | char *frozen_buffer = NULL; |
| 641 | int need_copy = 0; | 789 | int need_copy = 0; |
| 642 | unsigned long start_lock, time_lock; | 790 | unsigned long start_lock, time_lock; |
| 643 | 791 | ||
| 792 | WARN_ON(!transaction); | ||
| 644 | if (is_handle_aborted(handle)) | 793 | if (is_handle_aborted(handle)) |
| 645 | return -EROFS; | 794 | return -EROFS; |
| 646 | |||
| 647 | transaction = handle->h_transaction; | ||
| 648 | journal = transaction->t_journal; | 795 | journal = transaction->t_journal; |
| 649 | 796 | ||
| 650 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); | 797 | jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); |
| @@ -754,41 +901,29 @@ repeat: | |||
| 754 | * journaled. If the primary copy is already going to | 901 | * journaled. If the primary copy is already going to |
| 755 | * disk then we cannot do copy-out here. */ | 902 | * disk then we cannot do copy-out here. */ |
| 756 | 903 | ||
| 757 | if (jh->b_jlist == BJ_Shadow) { | 904 | if (buffer_shadow(bh)) { |
| 758 | DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); | ||
| 759 | wait_queue_head_t *wqh; | ||
| 760 | |||
| 761 | wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); | ||
| 762 | |||
| 763 | JBUFFER_TRACE(jh, "on shadow: sleep"); | 905 | JBUFFER_TRACE(jh, "on shadow: sleep"); |
| 764 | jbd_unlock_bh_state(bh); | 906 | jbd_unlock_bh_state(bh); |
| 765 | /* commit wakes up all shadow buffers after IO */ | 907 | wait_on_bit(&bh->b_state, BH_Shadow, |
| 766 | for ( ; ; ) { | 908 | sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); |
| 767 | prepare_to_wait(wqh, &wait.wait, | ||
| 768 | TASK_UNINTERRUPTIBLE); | ||
| 769 | if (jh->b_jlist != BJ_Shadow) | ||
| 770 | break; | ||
| 771 | schedule(); | ||
| 772 | } | ||
| 773 | finish_wait(wqh, &wait.wait); | ||
| 774 | goto repeat; | 909 | goto repeat; |
| 775 | } | 910 | } |
| 776 | 911 | ||
| 777 | /* Only do the copy if the currently-owning transaction | 912 | /* |
| 778 | * still needs it. If it is on the Forget list, the | 913 | * Only do the copy if the currently-owning transaction still |
| 779 | * committing transaction is past that stage. The | 914 | * needs it. If buffer isn't on BJ_Metadata list, the |
| 780 | * buffer had better remain locked during the kmalloc, | 915 | * committing transaction is past that stage (here we use the |
| 781 | * but that should be true --- we hold the journal lock | 916 | * fact that BH_Shadow is set under bh_state lock together with |
| 782 | * still and the buffer is already on the BUF_JOURNAL | 917 | * refiling to BJ_Shadow list and at this point we know the |
| 783 | * list so won't be flushed. | 918 | * buffer doesn't have BH_Shadow set). |
| 784 | * | 919 | * |
| 785 | * Subtle point, though: if this is a get_undo_access, | 920 | * Subtle point, though: if this is a get_undo_access, |
| 786 | * then we will be relying on the frozen_data to contain | 921 | * then we will be relying on the frozen_data to contain |
| 787 | * the new value of the committed_data record after the | 922 | * the new value of the committed_data record after the |
| 788 | * transaction, so we HAVE to force the frozen_data copy | 923 | * transaction, so we HAVE to force the frozen_data copy |
| 789 | * in that case. */ | 924 | * in that case. |
| 790 | 925 | */ | |
| 791 | if (jh->b_jlist != BJ_Forget || force_copy) { | 926 | if (jh->b_jlist == BJ_Metadata || force_copy) { |
| 792 | JBUFFER_TRACE(jh, "generate frozen data"); | 927 | JBUFFER_TRACE(jh, "generate frozen data"); |
| 793 | if (!frozen_buffer) { | 928 | if (!frozen_buffer) { |
| 794 | JBUFFER_TRACE(jh, "allocate memory for buffer"); | 929 | JBUFFER_TRACE(jh, "allocate memory for buffer"); |
| @@ -915,14 +1050,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) | |||
| 915 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) | 1050 | int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) |
| 916 | { | 1051 | { |
| 917 | transaction_t *transaction = handle->h_transaction; | 1052 | transaction_t *transaction = handle->h_transaction; |
| 918 | journal_t *journal = transaction->t_journal; | 1053 | journal_t *journal; |
| 919 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); | 1054 | struct journal_head *jh = jbd2_journal_add_journal_head(bh); |
| 920 | int err; | 1055 | int err; |
| 921 | 1056 | ||
| 922 | jbd_debug(5, "journal_head %p\n", jh); | 1057 | jbd_debug(5, "journal_head %p\n", jh); |
| 1058 | WARN_ON(!transaction); | ||
| 923 | err = -EROFS; | 1059 | err = -EROFS; |
| 924 | if (is_handle_aborted(handle)) | 1060 | if (is_handle_aborted(handle)) |
| 925 | goto out; | 1061 | goto out; |
| 1062 | journal = transaction->t_journal; | ||
| 926 | err = 0; | 1063 | err = 0; |
| 927 | 1064 | ||
| 928 | JBUFFER_TRACE(jh, "entry"); | 1065 | JBUFFER_TRACE(jh, "entry"); |
| @@ -1128,12 +1265,14 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, | |||
| 1128 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | 1265 | int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) |
| 1129 | { | 1266 | { |
| 1130 | transaction_t *transaction = handle->h_transaction; | 1267 | transaction_t *transaction = handle->h_transaction; |
| 1131 | journal_t *journal = transaction->t_journal; | 1268 | journal_t *journal; |
| 1132 | struct journal_head *jh; | 1269 | struct journal_head *jh; |
| 1133 | int ret = 0; | 1270 | int ret = 0; |
| 1134 | 1271 | ||
| 1272 | WARN_ON(!transaction); | ||
| 1135 | if (is_handle_aborted(handle)) | 1273 | if (is_handle_aborted(handle)) |
| 1136 | goto out; | 1274 | return -EROFS; |
| 1275 | journal = transaction->t_journal; | ||
| 1137 | jh = jbd2_journal_grab_journal_head(bh); | 1276 | jh = jbd2_journal_grab_journal_head(bh); |
| 1138 | if (!jh) { | 1277 | if (!jh) { |
| 1139 | ret = -EUCLEAN; | 1278 | ret = -EUCLEAN; |
| @@ -1227,7 +1366,7 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) | |||
| 1227 | 1366 | ||
| 1228 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); | 1367 | JBUFFER_TRACE(jh, "file as BJ_Metadata"); |
| 1229 | spin_lock(&journal->j_list_lock); | 1368 | spin_lock(&journal->j_list_lock); |
| 1230 | __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); | 1369 | __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); |
| 1231 | spin_unlock(&journal->j_list_lock); | 1370 | spin_unlock(&journal->j_list_lock); |
| 1232 | out_unlock_bh: | 1371 | out_unlock_bh: |
| 1233 | jbd_unlock_bh_state(bh); | 1372 | jbd_unlock_bh_state(bh); |
| @@ -1258,12 +1397,17 @@ out: | |||
| 1258 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | 1397 | int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) |
| 1259 | { | 1398 | { |
| 1260 | transaction_t *transaction = handle->h_transaction; | 1399 | transaction_t *transaction = handle->h_transaction; |
| 1261 | journal_t *journal = transaction->t_journal; | 1400 | journal_t *journal; |
| 1262 | struct journal_head *jh; | 1401 | struct journal_head *jh; |
| 1263 | int drop_reserve = 0; | 1402 | int drop_reserve = 0; |
| 1264 | int err = 0; | 1403 | int err = 0; |
| 1265 | int was_modified = 0; | 1404 | int was_modified = 0; |
| 1266 | 1405 | ||
| 1406 | WARN_ON(!transaction); | ||
| 1407 | if (is_handle_aborted(handle)) | ||
| 1408 | return -EROFS; | ||
| 1409 | journal = transaction->t_journal; | ||
| 1410 | |||
| 1267 | BUFFER_TRACE(bh, "entry"); | 1411 | BUFFER_TRACE(bh, "entry"); |
| 1268 | 1412 | ||
| 1269 | jbd_lock_bh_state(bh); | 1413 | jbd_lock_bh_state(bh); |
| @@ -1290,7 +1434,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) | |||
| 1290 | */ | 1434 | */ |
| 1291 | jh->b_modified = 0; | 1435 | jh->b_modified = 0; |
| 1292 | 1436 | ||
| 1293 | if (jh->b_transaction == handle->h_transaction) { | 1437 | if (jh->b_transaction == transaction) { |
| 1294 | J_ASSERT_JH(jh, !jh->b_frozen_data); | 1438 | J_ASSERT_JH(jh, !jh->b_frozen_data); |
| 1295 | 1439 | ||
| 1296 | /* If we are forgetting a buffer which is already part | 1440 | /* If we are forgetting a buffer which is already part |
| @@ -1385,19 +1529,21 @@ drop: | |||
| 1385 | int jbd2_journal_stop(handle_t *handle) | 1529 | int jbd2_journal_stop(handle_t *handle) |
| 1386 | { | 1530 | { |
| 1387 | transaction_t *transaction = handle->h_transaction; | 1531 | transaction_t *transaction = handle->h_transaction; |
| 1388 | journal_t *journal = transaction->t_journal; | 1532 | journal_t *journal; |
| 1389 | int err, wait_for_commit = 0; | 1533 | int err = 0, wait_for_commit = 0; |
| 1390 | tid_t tid; | 1534 | tid_t tid; |
| 1391 | pid_t pid; | 1535 | pid_t pid; |
| 1392 | 1536 | ||
| 1537 | if (!transaction) | ||
| 1538 | goto free_and_exit; | ||
| 1539 | journal = transaction->t_journal; | ||
| 1540 | |||
| 1393 | J_ASSERT(journal_current_handle() == handle); | 1541 | J_ASSERT(journal_current_handle() == handle); |
| 1394 | 1542 | ||
| 1395 | if (is_handle_aborted(handle)) | 1543 | if (is_handle_aborted(handle)) |
| 1396 | err = -EIO; | 1544 | err = -EIO; |
| 1397 | else { | 1545 | else |
| 1398 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); | 1546 | J_ASSERT(atomic_read(&transaction->t_updates) > 0); |
| 1399 | err = 0; | ||
| 1400 | } | ||
| 1401 | 1547 | ||
| 1402 | if (--handle->h_ref > 0) { | 1548 | if (--handle->h_ref > 0) { |
| 1403 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, | 1549 | jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, |
| @@ -1407,7 +1553,7 @@ int jbd2_journal_stop(handle_t *handle) | |||
| 1407 | 1553 | ||
| 1408 | jbd_debug(4, "Handle %p going down\n", handle); | 1554 | jbd_debug(4, "Handle %p going down\n", handle); |
| 1409 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, | 1555 | trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, |
| 1410 | handle->h_transaction->t_tid, | 1556 | transaction->t_tid, |
| 1411 | handle->h_type, handle->h_line_no, | 1557 | handle->h_type, handle->h_line_no, |
| 1412 | jiffies - handle->h_start_jiffies, | 1558 | jiffies - handle->h_start_jiffies, |
| 1413 | handle->h_sync, handle->h_requested_credits, | 1559 | handle->h_sync, handle->h_requested_credits, |
| @@ -1518,33 +1664,13 @@ int jbd2_journal_stop(handle_t *handle) | |||
| 1518 | 1664 | ||
| 1519 | lock_map_release(&handle->h_lockdep_map); | 1665 | lock_map_release(&handle->h_lockdep_map); |
| 1520 | 1666 | ||
| 1667 | if (handle->h_rsv_handle) | ||
| 1668 | jbd2_journal_free_reserved(handle->h_rsv_handle); | ||
| 1669 | free_and_exit: | ||
| 1521 | jbd2_free_handle(handle); | 1670 | jbd2_free_handle(handle); |
| 1522 | return err; | 1671 | return err; |
| 1523 | } | 1672 | } |
| 1524 | 1673 | ||
| 1525 | /** | ||
| 1526 | * int jbd2_journal_force_commit() - force any uncommitted transactions | ||
| 1527 | * @journal: journal to force | ||
| 1528 | * | ||
| 1529 | * For synchronous operations: force any uncommitted transactions | ||
| 1530 | * to disk. May seem kludgy, but it reuses all the handle batching | ||
| 1531 | * code in a very simple manner. | ||
| 1532 | */ | ||
| 1533 | int jbd2_journal_force_commit(journal_t *journal) | ||
| 1534 | { | ||
| 1535 | handle_t *handle; | ||
| 1536 | int ret; | ||
| 1537 | |||
| 1538 | handle = jbd2_journal_start(journal, 1); | ||
| 1539 | if (IS_ERR(handle)) { | ||
| 1540 | ret = PTR_ERR(handle); | ||
| 1541 | } else { | ||
| 1542 | handle->h_sync = 1; | ||
| 1543 | ret = jbd2_journal_stop(handle); | ||
| 1544 | } | ||
| 1545 | return ret; | ||
| 1546 | } | ||
| 1547 | |||
| 1548 | /* | 1674 | /* |
| 1549 | * | 1675 | * |
| 1550 | * List management code snippets: various functions for manipulating the | 1676 | * List management code snippets: various functions for manipulating the |
| @@ -1601,10 +1727,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) | |||
| 1601 | * Remove a buffer from the appropriate transaction list. | 1727 | * Remove a buffer from the appropriate transaction list. |
| 1602 | * | 1728 | * |
| 1603 | * Note that this function can *change* the value of | 1729 | * Note that this function can *change* the value of |
| 1604 | * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, | 1730 | * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or |
| 1605 | * t_log_list or t_reserved_list. If the caller is holding onto a copy of one | 1731 | * t_reserved_list. If the caller is holding onto a copy of one of these |
| 1606 | * of these pointers, it could go bad. Generally the caller needs to re-read | 1732 | * pointers, it could go bad. Generally the caller needs to re-read the |
| 1607 | * the pointer from the transaction_t. | 1733 | * pointer from the transaction_t. |
| 1608 | * | 1734 | * |
| 1609 | * Called under j_list_lock. | 1735 | * Called under j_list_lock. |
| 1610 | */ | 1736 | */ |
| @@ -1634,15 +1760,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) | |||
| 1634 | case BJ_Forget: | 1760 | case BJ_Forget: |
| 1635 | list = &transaction->t_forget; | 1761 | list = &transaction->t_forget; |
| 1636 | break; | 1762 | break; |
| 1637 | case BJ_IO: | ||
| 1638 | list = &transaction->t_iobuf_list; | ||
| 1639 | break; | ||
| 1640 | case BJ_Shadow: | 1763 | case BJ_Shadow: |
| 1641 | list = &transaction->t_shadow_list; | 1764 | list = &transaction->t_shadow_list; |
| 1642 | break; | 1765 | break; |
| 1643 | case BJ_LogCtl: | ||
| 1644 | list = &transaction->t_log_list; | ||
| 1645 | break; | ||
| 1646 | case BJ_Reserved: | 1766 | case BJ_Reserved: |
| 1647 | list = &transaction->t_reserved_list; | 1767 | list = &transaction->t_reserved_list; |
| 1648 | break; | 1768 | break; |
| @@ -2034,18 +2154,23 @@ zap_buffer_unlocked: | |||
| 2034 | * void jbd2_journal_invalidatepage() | 2154 | * void jbd2_journal_invalidatepage() |
| 2035 | * @journal: journal to use for flush... | 2155 | * @journal: journal to use for flush... |
| 2036 | * @page: page to flush | 2156 | * @page: page to flush |
| 2037 | * @offset: length of page to invalidate. | 2157 | * @offset: start of the range to invalidate |
| 2158 | * @length: length of the range to invalidate | ||
| 2038 | * | 2159 | * |
| 2039 | * Reap page buffers containing data after offset in page. Can return -EBUSY | 2160 | * Reap page buffers containing data after in the specified range in page. |
| 2040 | * if buffers are part of the committing transaction and the page is straddling | 2161 | * Can return -EBUSY if buffers are part of the committing transaction and |
| 2041 | * i_size. Caller then has to wait for current commit and try again. | 2162 | * the page is straddling i_size. Caller then has to wait for current commit |
| 2163 | * and try again. | ||
| 2042 | */ | 2164 | */ |
| 2043 | int jbd2_journal_invalidatepage(journal_t *journal, | 2165 | int jbd2_journal_invalidatepage(journal_t *journal, |
| 2044 | struct page *page, | 2166 | struct page *page, |
| 2045 | unsigned long offset) | 2167 | unsigned int offset, |
| 2168 | unsigned int length) | ||
| 2046 | { | 2169 | { |
| 2047 | struct buffer_head *head, *bh, *next; | 2170 | struct buffer_head *head, *bh, *next; |
| 2171 | unsigned int stop = offset + length; | ||
| 2048 | unsigned int curr_off = 0; | 2172 | unsigned int curr_off = 0; |
| 2173 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
| 2049 | int may_free = 1; | 2174 | int may_free = 1; |
| 2050 | int ret = 0; | 2175 | int ret = 0; |
| 2051 | 2176 | ||
| @@ -2054,6 +2179,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
| 2054 | if (!page_has_buffers(page)) | 2179 | if (!page_has_buffers(page)) |
| 2055 | return 0; | 2180 | return 0; |
| 2056 | 2181 | ||
| 2182 | BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); | ||
| 2183 | |||
| 2057 | /* We will potentially be playing with lists other than just the | 2184 | /* We will potentially be playing with lists other than just the |
| 2058 | * data lists (especially for journaled data mode), so be | 2185 | * data lists (especially for journaled data mode), so be |
| 2059 | * cautious in our locking. */ | 2186 | * cautious in our locking. */ |
| @@ -2063,10 +2190,13 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
| 2063 | unsigned int next_off = curr_off + bh->b_size; | 2190 | unsigned int next_off = curr_off + bh->b_size; |
| 2064 | next = bh->b_this_page; | 2191 | next = bh->b_this_page; |
| 2065 | 2192 | ||
| 2193 | if (next_off > stop) | ||
| 2194 | return 0; | ||
| 2195 | |||
| 2066 | if (offset <= curr_off) { | 2196 | if (offset <= curr_off) { |
| 2067 | /* This block is wholly outside the truncation point */ | 2197 | /* This block is wholly outside the truncation point */ |
| 2068 | lock_buffer(bh); | 2198 | lock_buffer(bh); |
| 2069 | ret = journal_unmap_buffer(journal, bh, offset > 0); | 2199 | ret = journal_unmap_buffer(journal, bh, partial_page); |
| 2070 | unlock_buffer(bh); | 2200 | unlock_buffer(bh); |
| 2071 | if (ret < 0) | 2201 | if (ret < 0) |
| 2072 | return ret; | 2202 | return ret; |
| @@ -2077,7 +2207,7 @@ int jbd2_journal_invalidatepage(journal_t *journal, | |||
| 2077 | 2207 | ||
| 2078 | } while (bh != head); | 2208 | } while (bh != head); |
| 2079 | 2209 | ||
| 2080 | if (!offset) { | 2210 | if (!partial_page) { |
| 2081 | if (may_free && try_to_free_buffers(page)) | 2211 | if (may_free && try_to_free_buffers(page)) |
| 2082 | J_ASSERT(!page_has_buffers(page)); | 2212 | J_ASSERT(!page_has_buffers(page)); |
| 2083 | } | 2213 | } |
| @@ -2138,15 +2268,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, | |||
| 2138 | case BJ_Forget: | 2268 | case BJ_Forget: |
| 2139 | list = &transaction->t_forget; | 2269 | list = &transaction->t_forget; |
| 2140 | break; | 2270 | break; |
| 2141 | case BJ_IO: | ||
| 2142 | list = &transaction->t_iobuf_list; | ||
| 2143 | break; | ||
| 2144 | case BJ_Shadow: | 2271 | case BJ_Shadow: |
| 2145 | list = &transaction->t_shadow_list; | 2272 | list = &transaction->t_shadow_list; |
| 2146 | break; | 2273 | break; |
| 2147 | case BJ_LogCtl: | ||
| 2148 | list = &transaction->t_log_list; | ||
| 2149 | break; | ||
| 2150 | case BJ_Reserved: | 2274 | case BJ_Reserved: |
| 2151 | list = &transaction->t_reserved_list; | 2275 | list = &transaction->t_reserved_list; |
| 2152 | break; | 2276 | break; |
| @@ -2248,10 +2372,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) | |||
| 2248 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) | 2372 | int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) |
| 2249 | { | 2373 | { |
| 2250 | transaction_t *transaction = handle->h_transaction; | 2374 | transaction_t *transaction = handle->h_transaction; |
| 2251 | journal_t *journal = transaction->t_journal; | 2375 | journal_t *journal; |
| 2252 | 2376 | ||
| 2377 | WARN_ON(!transaction); | ||
| 2253 | if (is_handle_aborted(handle)) | 2378 | if (is_handle_aborted(handle)) |
| 2254 | return -EIO; | 2379 | return -EROFS; |
| 2380 | journal = transaction->t_journal; | ||
| 2255 | 2381 | ||
| 2256 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, | 2382 | jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, |
| 2257 | transaction->t_tid); | 2383 | transaction->t_tid); |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index acd46a4160cb..e3aac222472e 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
| @@ -22,7 +22,7 @@ | |||
| 22 | #include <linux/time.h> | 22 | #include <linux/time.h> |
| 23 | #include "nodelist.h" | 23 | #include "nodelist.h" |
| 24 | 24 | ||
| 25 | static int jffs2_readdir (struct file *, void *, filldir_t); | 25 | static int jffs2_readdir (struct file *, struct dir_context *); |
| 26 | 26 | ||
| 27 | static int jffs2_create (struct inode *,struct dentry *,umode_t, | 27 | static int jffs2_create (struct inode *,struct dentry *,umode_t, |
| 28 | bool); | 28 | bool); |
| @@ -40,7 +40,7 @@ static int jffs2_rename (struct inode *, struct dentry *, | |||
| 40 | const struct file_operations jffs2_dir_operations = | 40 | const struct file_operations jffs2_dir_operations = |
| 41 | { | 41 | { |
| 42 | .read = generic_read_dir, | 42 | .read = generic_read_dir, |
| 43 | .readdir = jffs2_readdir, | 43 | .iterate = jffs2_readdir, |
| 44 | .unlocked_ioctl=jffs2_ioctl, | 44 | .unlocked_ioctl=jffs2_ioctl, |
| 45 | .fsync = jffs2_fsync, | 45 | .fsync = jffs2_fsync, |
| 46 | .llseek = generic_file_llseek, | 46 | .llseek = generic_file_llseek, |
| @@ -114,60 +114,40 @@ static struct dentry *jffs2_lookup(struct inode *dir_i, struct dentry *target, | |||
| 114 | /***********************************************************************/ | 114 | /***********************************************************************/ |
| 115 | 115 | ||
| 116 | 116 | ||
| 117 | static int jffs2_readdir(struct file *filp, void *dirent, filldir_t filldir) | 117 | static int jffs2_readdir(struct file *file, struct dir_context *ctx) |
| 118 | { | 118 | { |
| 119 | struct jffs2_inode_info *f; | 119 | struct inode *inode = file_inode(file); |
| 120 | struct inode *inode = file_inode(filp); | 120 | struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode); |
| 121 | struct jffs2_full_dirent *fd; | 121 | struct jffs2_full_dirent *fd; |
| 122 | unsigned long offset, curofs; | 122 | unsigned long curofs = 1; |
| 123 | 123 | ||
| 124 | jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", | 124 | jffs2_dbg(1, "jffs2_readdir() for dir_i #%lu\n", inode->i_ino); |
| 125 | file_inode(filp)->i_ino); | ||
| 126 | 125 | ||
| 127 | f = JFFS2_INODE_INFO(inode); | 126 | if (!dir_emit_dots(file, ctx)) |
| 128 | 127 | return 0; | |
| 129 | offset = filp->f_pos; | ||
| 130 | |||
| 131 | if (offset == 0) { | ||
| 132 | jffs2_dbg(1, "Dirent 0: \".\", ino #%lu\n", inode->i_ino); | ||
| 133 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
| 134 | goto out; | ||
| 135 | offset++; | ||
| 136 | } | ||
| 137 | if (offset == 1) { | ||
| 138 | unsigned long pino = parent_ino(filp->f_path.dentry); | ||
| 139 | jffs2_dbg(1, "Dirent 1: \"..\", ino #%lu\n", pino); | ||
| 140 | if (filldir(dirent, "..", 2, 1, pino, DT_DIR) < 0) | ||
| 141 | goto out; | ||
| 142 | offset++; | ||
| 143 | } | ||
| 144 | 128 | ||
| 145 | curofs=1; | ||
| 146 | mutex_lock(&f->sem); | 129 | mutex_lock(&f->sem); |
| 147 | for (fd = f->dents; fd; fd = fd->next) { | 130 | for (fd = f->dents; fd; fd = fd->next) { |
| 148 | |||
| 149 | curofs++; | 131 | curofs++; |
| 150 | /* First loop: curofs = 2; offset = 2 */ | 132 | /* First loop: curofs = 2; pos = 2 */ |
| 151 | if (curofs < offset) { | 133 | if (curofs < ctx->pos) { |
| 152 | jffs2_dbg(2, "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n", | 134 | jffs2_dbg(2, "Skipping dirent: \"%s\", ino #%u, type %d, because curofs %ld < offset %ld\n", |
| 153 | fd->name, fd->ino, fd->type, curofs, offset); | 135 | fd->name, fd->ino, fd->type, curofs, (unsigned long)ctx->pos); |
| 154 | continue; | 136 | continue; |
| 155 | } | 137 | } |
| 156 | if (!fd->ino) { | 138 | if (!fd->ino) { |
| 157 | jffs2_dbg(2, "Skipping deletion dirent \"%s\"\n", | 139 | jffs2_dbg(2, "Skipping deletion dirent \"%s\"\n", |
| 158 | fd->name); | 140 | fd->name); |
| 159 | offset++; | 141 | ctx->pos++; |
| 160 | continue; | 142 | continue; |
| 161 | } | 143 | } |
| 162 | jffs2_dbg(2, "Dirent %ld: \"%s\", ino #%u, type %d\n", | 144 | jffs2_dbg(2, "Dirent %ld: \"%s\", ino #%u, type %d\n", |
| 163 | offset, fd->name, fd->ino, fd->type); | 145 | (unsigned long)ctx->pos, fd->name, fd->ino, fd->type); |
| 164 | if (filldir(dirent, fd->name, strlen(fd->name), offset, fd->ino, fd->type) < 0) | 146 | if (!dir_emit(ctx, fd->name, strlen(fd->name), fd->ino, fd->type)) |
| 165 | break; | 147 | break; |
| 166 | offset++; | 148 | ctx->pos++; |
| 167 | } | 149 | } |
| 168 | mutex_unlock(&f->sem); | 150 | mutex_unlock(&f->sem); |
| 169 | out: | ||
| 170 | filp->f_pos = offset; | ||
| 171 | return 0; | 151 | return 0; |
| 172 | } | 152 | } |
| 173 | 153 | ||
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 0ddbeceafc62..9f4ed13d9f15 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c | |||
| @@ -3002,9 +3002,9 @@ static inline struct jfs_dirent *next_jfs_dirent(struct jfs_dirent *dirent) | |||
| 3002 | * return: offset = (pn, index) of start entry | 3002 | * return: offset = (pn, index) of start entry |
| 3003 | * of next jfs_readdir()/dtRead() | 3003 | * of next jfs_readdir()/dtRead() |
| 3004 | */ | 3004 | */ |
| 3005 | int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 3005 | int jfs_readdir(struct file *file, struct dir_context *ctx) |
| 3006 | { | 3006 | { |
| 3007 | struct inode *ip = file_inode(filp); | 3007 | struct inode *ip = file_inode(file); |
| 3008 | struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; | 3008 | struct nls_table *codepage = JFS_SBI(ip->i_sb)->nls_tab; |
| 3009 | int rc = 0; | 3009 | int rc = 0; |
| 3010 | loff_t dtpos; /* legacy OS/2 style position */ | 3010 | loff_t dtpos; /* legacy OS/2 style position */ |
| @@ -3033,7 +3033,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3033 | int overflow, fix_page, page_fixed = 0; | 3033 | int overflow, fix_page, page_fixed = 0; |
| 3034 | static int unique_pos = 2; /* If we can't fix broken index */ | 3034 | static int unique_pos = 2; /* If we can't fix broken index */ |
| 3035 | 3035 | ||
| 3036 | if (filp->f_pos == DIREND) | 3036 | if (ctx->pos == DIREND) |
| 3037 | return 0; | 3037 | return 0; |
| 3038 | 3038 | ||
| 3039 | if (DO_INDEX(ip)) { | 3039 | if (DO_INDEX(ip)) { |
| @@ -3045,7 +3045,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3045 | */ | 3045 | */ |
| 3046 | do_index = 1; | 3046 | do_index = 1; |
| 3047 | 3047 | ||
| 3048 | dir_index = (u32) filp->f_pos; | 3048 | dir_index = (u32) ctx->pos; |
| 3049 | 3049 | ||
| 3050 | if (dir_index > 1) { | 3050 | if (dir_index > 1) { |
| 3051 | struct dir_table_slot dirtab_slot; | 3051 | struct dir_table_slot dirtab_slot; |
| @@ -3053,25 +3053,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3053 | if (dtEmpty(ip) || | 3053 | if (dtEmpty(ip) || |
| 3054 | (dir_index >= JFS_IP(ip)->next_index)) { | 3054 | (dir_index >= JFS_IP(ip)->next_index)) { |
| 3055 | /* Stale position. Directory has shrunk */ | 3055 | /* Stale position. Directory has shrunk */ |
| 3056 | filp->f_pos = DIREND; | 3056 | ctx->pos = DIREND; |
| 3057 | return 0; | 3057 | return 0; |
| 3058 | } | 3058 | } |
| 3059 | repeat: | 3059 | repeat: |
| 3060 | rc = read_index(ip, dir_index, &dirtab_slot); | 3060 | rc = read_index(ip, dir_index, &dirtab_slot); |
| 3061 | if (rc) { | 3061 | if (rc) { |
| 3062 | filp->f_pos = DIREND; | 3062 | ctx->pos = DIREND; |
| 3063 | return rc; | 3063 | return rc; |
| 3064 | } | 3064 | } |
| 3065 | if (dirtab_slot.flag == DIR_INDEX_FREE) { | 3065 | if (dirtab_slot.flag == DIR_INDEX_FREE) { |
| 3066 | if (loop_count++ > JFS_IP(ip)->next_index) { | 3066 | if (loop_count++ > JFS_IP(ip)->next_index) { |
| 3067 | jfs_err("jfs_readdir detected " | 3067 | jfs_err("jfs_readdir detected " |
| 3068 | "infinite loop!"); | 3068 | "infinite loop!"); |
| 3069 | filp->f_pos = DIREND; | 3069 | ctx->pos = DIREND; |
| 3070 | return 0; | 3070 | return 0; |
| 3071 | } | 3071 | } |
| 3072 | dir_index = le32_to_cpu(dirtab_slot.addr2); | 3072 | dir_index = le32_to_cpu(dirtab_slot.addr2); |
| 3073 | if (dir_index == -1) { | 3073 | if (dir_index == -1) { |
| 3074 | filp->f_pos = DIREND; | 3074 | ctx->pos = DIREND; |
| 3075 | return 0; | 3075 | return 0; |
| 3076 | } | 3076 | } |
| 3077 | goto repeat; | 3077 | goto repeat; |
| @@ -3080,13 +3080,13 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3080 | index = dirtab_slot.slot; | 3080 | index = dirtab_slot.slot; |
| 3081 | DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); | 3081 | DT_GETPAGE(ip, bn, mp, PSIZE, p, rc); |
| 3082 | if (rc) { | 3082 | if (rc) { |
| 3083 | filp->f_pos = DIREND; | 3083 | ctx->pos = DIREND; |
| 3084 | return 0; | 3084 | return 0; |
| 3085 | } | 3085 | } |
| 3086 | if (p->header.flag & BT_INTERNAL) { | 3086 | if (p->header.flag & BT_INTERNAL) { |
| 3087 | jfs_err("jfs_readdir: bad index table"); | 3087 | jfs_err("jfs_readdir: bad index table"); |
| 3088 | DT_PUTPAGE(mp); | 3088 | DT_PUTPAGE(mp); |
| 3089 | filp->f_pos = -1; | 3089 | ctx->pos = -1; |
| 3090 | return 0; | 3090 | return 0; |
| 3091 | } | 3091 | } |
| 3092 | } else { | 3092 | } else { |
| @@ -3094,23 +3094,22 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3094 | /* | 3094 | /* |
| 3095 | * self "." | 3095 | * self "." |
| 3096 | */ | 3096 | */ |
| 3097 | filp->f_pos = 0; | 3097 | ctx->pos = 0; |
| 3098 | if (filldir(dirent, ".", 1, 0, ip->i_ino, | 3098 | if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) |
| 3099 | DT_DIR)) | ||
| 3100 | return 0; | 3099 | return 0; |
| 3101 | } | 3100 | } |
| 3102 | /* | 3101 | /* |
| 3103 | * parent ".." | 3102 | * parent ".." |
| 3104 | */ | 3103 | */ |
| 3105 | filp->f_pos = 1; | 3104 | ctx->pos = 1; |
| 3106 | if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) | 3105 | if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) |
| 3107 | return 0; | 3106 | return 0; |
| 3108 | 3107 | ||
| 3109 | /* | 3108 | /* |
| 3110 | * Find first entry of left-most leaf | 3109 | * Find first entry of left-most leaf |
| 3111 | */ | 3110 | */ |
| 3112 | if (dtEmpty(ip)) { | 3111 | if (dtEmpty(ip)) { |
| 3113 | filp->f_pos = DIREND; | 3112 | ctx->pos = DIREND; |
| 3114 | return 0; | 3113 | return 0; |
| 3115 | } | 3114 | } |
| 3116 | 3115 | ||
| @@ -3128,23 +3127,19 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3128 | * pn > 0: Real entries, pn=1 -> leftmost page | 3127 | * pn > 0: Real entries, pn=1 -> leftmost page |
| 3129 | * pn = index = -1: No more entries | 3128 | * pn = index = -1: No more entries |
| 3130 | */ | 3129 | */ |
| 3131 | dtpos = filp->f_pos; | 3130 | dtpos = ctx->pos; |
| 3132 | if (dtpos == 0) { | 3131 | if (dtpos == 0) { |
| 3133 | /* build "." entry */ | 3132 | /* build "." entry */ |
| 3134 | 3133 | if (!dir_emit(ctx, ".", 1, ip->i_ino, DT_DIR)) | |
| 3135 | if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, | ||
| 3136 | DT_DIR)) | ||
| 3137 | return 0; | 3134 | return 0; |
| 3138 | dtoffset->index = 1; | 3135 | dtoffset->index = 1; |
| 3139 | filp->f_pos = dtpos; | 3136 | ctx->pos = dtpos; |
| 3140 | } | 3137 | } |
| 3141 | 3138 | ||
| 3142 | if (dtoffset->pn == 0) { | 3139 | if (dtoffset->pn == 0) { |
| 3143 | if (dtoffset->index == 1) { | 3140 | if (dtoffset->index == 1) { |
| 3144 | /* build ".." entry */ | 3141 | /* build ".." entry */ |
| 3145 | 3142 | if (!dir_emit(ctx, "..", 2, PARENT(ip), DT_DIR)) | |
| 3146 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
| 3147 | PARENT(ip), DT_DIR)) | ||
| 3148 | return 0; | 3143 | return 0; |
| 3149 | } else { | 3144 | } else { |
| 3150 | jfs_err("jfs_readdir called with " | 3145 | jfs_err("jfs_readdir called with " |
| @@ -3152,18 +3147,18 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3152 | } | 3147 | } |
| 3153 | dtoffset->pn = 1; | 3148 | dtoffset->pn = 1; |
| 3154 | dtoffset->index = 0; | 3149 | dtoffset->index = 0; |
| 3155 | filp->f_pos = dtpos; | 3150 | ctx->pos = dtpos; |
| 3156 | } | 3151 | } |
| 3157 | 3152 | ||
| 3158 | if (dtEmpty(ip)) { | 3153 | if (dtEmpty(ip)) { |
| 3159 | filp->f_pos = DIREND; | 3154 | ctx->pos = DIREND; |
| 3160 | return 0; | 3155 | return 0; |
| 3161 | } | 3156 | } |
| 3162 | 3157 | ||
| 3163 | if ((rc = dtReadNext(ip, &filp->f_pos, &btstack))) { | 3158 | if ((rc = dtReadNext(ip, &ctx->pos, &btstack))) { |
| 3164 | jfs_err("jfs_readdir: unexpected rc = %d " | 3159 | jfs_err("jfs_readdir: unexpected rc = %d " |
| 3165 | "from dtReadNext", rc); | 3160 | "from dtReadNext", rc); |
| 3166 | filp->f_pos = DIREND; | 3161 | ctx->pos = DIREND; |
| 3167 | return 0; | 3162 | return 0; |
| 3168 | } | 3163 | } |
| 3169 | /* get start leaf page and index */ | 3164 | /* get start leaf page and index */ |
| @@ -3171,7 +3166,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3171 | 3166 | ||
| 3172 | /* offset beyond directory eof ? */ | 3167 | /* offset beyond directory eof ? */ |
| 3173 | if (bn < 0) { | 3168 | if (bn < 0) { |
| 3174 | filp->f_pos = DIREND; | 3169 | ctx->pos = DIREND; |
| 3175 | return 0; | 3170 | return 0; |
| 3176 | } | 3171 | } |
| 3177 | } | 3172 | } |
| @@ -3180,7 +3175,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 3180 | if (dirent_buf == 0) { | 3175 | if (dirent_buf == 0) { |
| 3181 | DT_PUTPAGE(mp); | 3176 | DT_PUTPAGE(mp); |
| 3182 | jfs_warn("jfs_readdir: __get_free_page failed!"); | 3177 | jfs_warn("jfs_readdir: __get_free_page failed!"); |
| 3183 | filp->f_pos = DIREND; | 3178 | ctx->pos = DIREND; |
| 3184 | return -ENOMEM; | 3179 | return -ENOMEM; |
| 3185 | } | 3180 | } |
| 3186 | 3181 | ||
| @@ -3295,9 +3290,9 @@ skip_one: | |||
| 3295 | 3290 | ||
| 3296 | jfs_dirent = (struct jfs_dirent *) dirent_buf; | 3291 | jfs_dirent = (struct jfs_dirent *) dirent_buf; |
| 3297 | while (jfs_dirents--) { | 3292 | while (jfs_dirents--) { |
| 3298 | filp->f_pos = jfs_dirent->position; | 3293 | ctx->pos = jfs_dirent->position; |
| 3299 | if (filldir(dirent, jfs_dirent->name, | 3294 | if (!dir_emit(ctx, jfs_dirent->name, |
| 3300 | jfs_dirent->name_len, filp->f_pos, | 3295 | jfs_dirent->name_len, |
| 3301 | jfs_dirent->ino, DT_UNKNOWN)) | 3296 | jfs_dirent->ino, DT_UNKNOWN)) |
| 3302 | goto out; | 3297 | goto out; |
| 3303 | jfs_dirent = next_jfs_dirent(jfs_dirent); | 3298 | jfs_dirent = next_jfs_dirent(jfs_dirent); |
| @@ -3309,7 +3304,7 @@ skip_one: | |||
| 3309 | } | 3304 | } |
| 3310 | 3305 | ||
| 3311 | if (!overflow && (bn == 0)) { | 3306 | if (!overflow && (bn == 0)) { |
| 3312 | filp->f_pos = DIREND; | 3307 | ctx->pos = DIREND; |
| 3313 | break; | 3308 | break; |
| 3314 | } | 3309 | } |
| 3315 | 3310 | ||
diff --git a/fs/jfs/jfs_dtree.h b/fs/jfs/jfs_dtree.h index 2545bb317235..fd4169e6e698 100644 --- a/fs/jfs/jfs_dtree.h +++ b/fs/jfs/jfs_dtree.h | |||
| @@ -265,5 +265,5 @@ extern int dtDelete(tid_t tid, struct inode *ip, struct component_name * key, | |||
| 265 | extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, | 265 | extern int dtModify(tid_t tid, struct inode *ip, struct component_name * key, |
| 266 | ino_t * orig_ino, ino_t new_ino, int flag); | 266 | ino_t * orig_ino, ino_t new_ino, int flag); |
| 267 | 267 | ||
| 268 | extern int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir); | 268 | extern int jfs_readdir(struct file *file, struct dir_context *ctx); |
| 269 | #endif /* !_H_JFS_DTREE */ | 269 | #endif /* !_H_JFS_DTREE */ |
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 6740d34cd82b..9e3aaff11f89 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c | |||
| @@ -571,9 +571,10 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask) | |||
| 571 | return ret; | 571 | return ret; |
| 572 | } | 572 | } |
| 573 | 573 | ||
| 574 | static void metapage_invalidatepage(struct page *page, unsigned long offset) | 574 | static void metapage_invalidatepage(struct page *page, unsigned int offset, |
| 575 | unsigned int length) | ||
| 575 | { | 576 | { |
| 576 | BUG_ON(offset); | 577 | BUG_ON(offset || length < PAGE_CACHE_SIZE); |
| 577 | 578 | ||
| 578 | BUG_ON(PageWriteback(page)); | 579 | BUG_ON(PageWriteback(page)); |
| 579 | 580 | ||
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 3b91a7ad6086..89186b7b9002 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c | |||
| @@ -1529,7 +1529,7 @@ const struct inode_operations jfs_dir_inode_operations = { | |||
| 1529 | 1529 | ||
| 1530 | const struct file_operations jfs_dir_operations = { | 1530 | const struct file_operations jfs_dir_operations = { |
| 1531 | .read = generic_read_dir, | 1531 | .read = generic_read_dir, |
| 1532 | .readdir = jfs_readdir, | 1532 | .iterate = jfs_readdir, |
| 1533 | .fsync = jfs_fsync, | 1533 | .fsync = jfs_fsync, |
| 1534 | .unlocked_ioctl = jfs_ioctl, | 1534 | .unlocked_ioctl = jfs_ioctl, |
| 1535 | #ifdef CONFIG_COMPAT | 1535 | #ifdef CONFIG_COMPAT |
diff --git a/fs/libfs.c b/fs/libfs.c index 916da8c4158b..c3a0837fb861 100644 --- a/fs/libfs.c +++ b/fs/libfs.c | |||
| @@ -135,60 +135,40 @@ static inline unsigned char dt_type(struct inode *inode) | |||
| 135 | * both impossible due to the lock on directory. | 135 | * both impossible due to the lock on directory. |
| 136 | */ | 136 | */ |
| 137 | 137 | ||
| 138 | int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir) | 138 | int dcache_readdir(struct file *file, struct dir_context *ctx) |
| 139 | { | 139 | { |
| 140 | struct dentry *dentry = filp->f_path.dentry; | 140 | struct dentry *dentry = file->f_path.dentry; |
| 141 | struct dentry *cursor = filp->private_data; | 141 | struct dentry *cursor = file->private_data; |
| 142 | struct list_head *p, *q = &cursor->d_u.d_child; | 142 | struct list_head *p, *q = &cursor->d_u.d_child; |
| 143 | ino_t ino; | ||
| 144 | int i = filp->f_pos; | ||
| 145 | 143 | ||
| 146 | switch (i) { | 144 | if (!dir_emit_dots(file, ctx)) |
| 147 | case 0: | 145 | return 0; |
| 148 | ino = dentry->d_inode->i_ino; | 146 | spin_lock(&dentry->d_lock); |
| 149 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 147 | if (ctx->pos == 2) |
| 150 | break; | 148 | list_move(q, &dentry->d_subdirs); |
| 151 | filp->f_pos++; | 149 | |
| 152 | i++; | 150 | for (p = q->next; p != &dentry->d_subdirs; p = p->next) { |
| 153 | /* fallthrough */ | 151 | struct dentry *next = list_entry(p, struct dentry, d_u.d_child); |
| 154 | case 1: | 152 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); |
| 155 | ino = parent_ino(dentry); | 153 | if (!simple_positive(next)) { |
| 156 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 154 | spin_unlock(&next->d_lock); |
| 157 | break; | 155 | continue; |
| 158 | filp->f_pos++; | 156 | } |
| 159 | i++; | ||
| 160 | /* fallthrough */ | ||
| 161 | default: | ||
| 162 | spin_lock(&dentry->d_lock); | ||
| 163 | if (filp->f_pos == 2) | ||
| 164 | list_move(q, &dentry->d_subdirs); | ||
| 165 | |||
| 166 | for (p=q->next; p != &dentry->d_subdirs; p=p->next) { | ||
| 167 | struct dentry *next; | ||
| 168 | next = list_entry(p, struct dentry, d_u.d_child); | ||
| 169 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); | ||
| 170 | if (!simple_positive(next)) { | ||
| 171 | spin_unlock(&next->d_lock); | ||
| 172 | continue; | ||
| 173 | } | ||
| 174 | 157 | ||
| 175 | spin_unlock(&next->d_lock); | 158 | spin_unlock(&next->d_lock); |
| 176 | spin_unlock(&dentry->d_lock); | 159 | spin_unlock(&dentry->d_lock); |
| 177 | if (filldir(dirent, next->d_name.name, | 160 | if (!dir_emit(ctx, next->d_name.name, next->d_name.len, |
| 178 | next->d_name.len, filp->f_pos, | 161 | next->d_inode->i_ino, dt_type(next->d_inode))) |
| 179 | next->d_inode->i_ino, | 162 | return 0; |
| 180 | dt_type(next->d_inode)) < 0) | 163 | spin_lock(&dentry->d_lock); |
| 181 | return 0; | 164 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); |
| 182 | spin_lock(&dentry->d_lock); | 165 | /* next is still alive */ |
| 183 | spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED); | 166 | list_move(q, p); |
| 184 | /* next is still alive */ | 167 | spin_unlock(&next->d_lock); |
| 185 | list_move(q, p); | 168 | p = q; |
| 186 | spin_unlock(&next->d_lock); | 169 | ctx->pos++; |
| 187 | p = q; | ||
| 188 | filp->f_pos++; | ||
| 189 | } | ||
| 190 | spin_unlock(&dentry->d_lock); | ||
| 191 | } | 170 | } |
| 171 | spin_unlock(&dentry->d_lock); | ||
| 192 | return 0; | 172 | return 0; |
| 193 | } | 173 | } |
| 194 | 174 | ||
| @@ -202,7 +182,7 @@ const struct file_operations simple_dir_operations = { | |||
| 202 | .release = dcache_dir_close, | 182 | .release = dcache_dir_close, |
| 203 | .llseek = dcache_dir_lseek, | 183 | .llseek = dcache_dir_lseek, |
| 204 | .read = generic_read_dir, | 184 | .read = generic_read_dir, |
| 205 | .readdir = dcache_readdir, | 185 | .iterate = dcache_readdir, |
| 206 | .fsync = noop_fsync, | 186 | .fsync = noop_fsync, |
| 207 | }; | 187 | }; |
| 208 | 188 | ||
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index b82751082112..6bdc347008f5 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c | |||
| @@ -281,17 +281,23 @@ static int logfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 281 | 281 | ||
| 282 | /* FIXME: readdir currently has it's own dir_walk code. I don't see a good | 282 | /* FIXME: readdir currently has it's own dir_walk code. I don't see a good |
| 283 | * way to combine the two copies */ | 283 | * way to combine the two copies */ |
| 284 | #define IMPLICIT_NODES 2 | 284 | static int logfs_readdir(struct file *file, struct dir_context *ctx) |
| 285 | static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | ||
| 286 | { | 285 | { |
| 287 | struct inode *dir = file_inode(file); | 286 | struct inode *dir = file_inode(file); |
| 288 | loff_t pos = file->f_pos - IMPLICIT_NODES; | 287 | loff_t pos; |
| 289 | struct page *page; | 288 | struct page *page; |
| 290 | struct logfs_disk_dentry *dd; | 289 | struct logfs_disk_dentry *dd; |
| 291 | int full; | ||
| 292 | 290 | ||
| 291 | if (ctx->pos < 0) | ||
| 292 | return -EINVAL; | ||
| 293 | |||
| 294 | if (!dir_emit_dots(file, ctx)) | ||
| 295 | return 0; | ||
| 296 | |||
| 297 | pos = ctx->pos - 2; | ||
| 293 | BUG_ON(pos < 0); | 298 | BUG_ON(pos < 0); |
| 294 | for (;; pos++) { | 299 | for (;; pos++, ctx->pos++) { |
| 300 | bool full; | ||
| 295 | if (beyond_eof(dir, pos)) | 301 | if (beyond_eof(dir, pos)) |
| 296 | break; | 302 | break; |
| 297 | if (!logfs_exist_block(dir, pos)) { | 303 | if (!logfs_exist_block(dir, pos)) { |
| @@ -306,42 +312,17 @@ static int __logfs_readdir(struct file *file, void *buf, filldir_t filldir) | |||
| 306 | dd = kmap(page); | 312 | dd = kmap(page); |
| 307 | BUG_ON(dd->namelen == 0); | 313 | BUG_ON(dd->namelen == 0); |
| 308 | 314 | ||
| 309 | full = filldir(buf, (char *)dd->name, be16_to_cpu(dd->namelen), | 315 | full = !dir_emit(ctx, (char *)dd->name, |
| 310 | pos, be64_to_cpu(dd->ino), dd->type); | 316 | be16_to_cpu(dd->namelen), |
| 317 | be64_to_cpu(dd->ino), dd->type); | ||
| 311 | kunmap(page); | 318 | kunmap(page); |
| 312 | page_cache_release(page); | 319 | page_cache_release(page); |
| 313 | if (full) | 320 | if (full) |
| 314 | break; | 321 | break; |
| 315 | } | 322 | } |
| 316 | |||
| 317 | file->f_pos = pos + IMPLICIT_NODES; | ||
| 318 | return 0; | 323 | return 0; |
| 319 | } | 324 | } |
| 320 | 325 | ||
| 321 | static int logfs_readdir(struct file *file, void *buf, filldir_t filldir) | ||
| 322 | { | ||
| 323 | struct inode *inode = file_inode(file); | ||
| 324 | ino_t pino = parent_ino(file->f_dentry); | ||
| 325 | int err; | ||
| 326 | |||
| 327 | if (file->f_pos < 0) | ||
| 328 | return -EINVAL; | ||
| 329 | |||
| 330 | if (file->f_pos == 0) { | ||
| 331 | if (filldir(buf, ".", 1, 1, inode->i_ino, DT_DIR) < 0) | ||
| 332 | return 0; | ||
| 333 | file->f_pos++; | ||
| 334 | } | ||
| 335 | if (file->f_pos == 1) { | ||
| 336 | if (filldir(buf, "..", 2, 2, pino, DT_DIR) < 0) | ||
| 337 | return 0; | ||
| 338 | file->f_pos++; | ||
| 339 | } | ||
| 340 | |||
| 341 | err = __logfs_readdir(file, buf, filldir); | ||
| 342 | return err; | ||
| 343 | } | ||
| 344 | |||
| 345 | static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) | 326 | static void logfs_set_name(struct logfs_disk_dentry *dd, struct qstr *name) |
| 346 | { | 327 | { |
| 347 | dd->namelen = cpu_to_be16(name->len); | 328 | dd->namelen = cpu_to_be16(name->len); |
| @@ -814,7 +795,7 @@ const struct inode_operations logfs_dir_iops = { | |||
| 814 | const struct file_operations logfs_dir_fops = { | 795 | const struct file_operations logfs_dir_fops = { |
| 815 | .fsync = logfs_fsync, | 796 | .fsync = logfs_fsync, |
| 816 | .unlocked_ioctl = logfs_ioctl, | 797 | .unlocked_ioctl = logfs_ioctl, |
| 817 | .readdir = logfs_readdir, | 798 | .iterate = logfs_readdir, |
| 818 | .read = generic_read_dir, | 799 | .read = generic_read_dir, |
| 819 | .llseek = default_llseek, | 800 | .llseek = default_llseek, |
| 820 | }; | 801 | }; |
diff --git a/fs/logfs/file.c b/fs/logfs/file.c index c2219a6dd3c8..57914fc32b62 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c | |||
| @@ -159,7 +159,8 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc) | |||
| 159 | return __logfs_writepage(page); | 159 | return __logfs_writepage(page); |
| 160 | } | 160 | } |
| 161 | 161 | ||
| 162 | static void logfs_invalidatepage(struct page *page, unsigned long offset) | 162 | static void logfs_invalidatepage(struct page *page, unsigned int offset, |
| 163 | unsigned int length) | ||
| 163 | { | 164 | { |
| 164 | struct logfs_block *block = logfs_block(page); | 165 | struct logfs_block *block = logfs_block(page); |
| 165 | 166 | ||
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c index 038da0991794..d448a777166b 100644 --- a/fs/logfs/segment.c +++ b/fs/logfs/segment.c | |||
| @@ -884,7 +884,8 @@ static struct logfs_area *alloc_area(struct super_block *sb) | |||
| 884 | return area; | 884 | return area; |
| 885 | } | 885 | } |
| 886 | 886 | ||
| 887 | static void map_invalidatepage(struct page *page, unsigned long l) | 887 | static void map_invalidatepage(struct page *page, unsigned int o, |
| 888 | unsigned int l) | ||
| 888 | { | 889 | { |
| 889 | return; | 890 | return; |
| 890 | } | 891 | } |
diff --git a/fs/minix/dir.c b/fs/minix/dir.c index a9ed6f36e6ea..08c442902fcd 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c | |||
| @@ -16,12 +16,12 @@ | |||
| 16 | typedef struct minix_dir_entry minix_dirent; | 16 | typedef struct minix_dir_entry minix_dirent; |
| 17 | typedef struct minix3_dir_entry minix3_dirent; | 17 | typedef struct minix3_dir_entry minix3_dirent; |
| 18 | 18 | ||
| 19 | static int minix_readdir(struct file *, void *, filldir_t); | 19 | static int minix_readdir(struct file *, struct dir_context *); |
| 20 | 20 | ||
| 21 | const struct file_operations minix_dir_operations = { | 21 | const struct file_operations minix_dir_operations = { |
| 22 | .llseek = generic_file_llseek, | 22 | .llseek = generic_file_llseek, |
| 23 | .read = generic_read_dir, | 23 | .read = generic_read_dir, |
| 24 | .readdir = minix_readdir, | 24 | .iterate = minix_readdir, |
| 25 | .fsync = generic_file_fsync, | 25 | .fsync = generic_file_fsync, |
| 26 | }; | 26 | }; |
| 27 | 27 | ||
| @@ -82,22 +82,23 @@ static inline void *minix_next_entry(void *de, struct minix_sb_info *sbi) | |||
| 82 | return (void*)((char*)de + sbi->s_dirsize); | 82 | return (void*)((char*)de + sbi->s_dirsize); |
| 83 | } | 83 | } |
| 84 | 84 | ||
| 85 | static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | 85 | static int minix_readdir(struct file *file, struct dir_context *ctx) |
| 86 | { | 86 | { |
| 87 | unsigned long pos = filp->f_pos; | 87 | struct inode *inode = file_inode(file); |
| 88 | struct inode *inode = file_inode(filp); | ||
| 89 | struct super_block *sb = inode->i_sb; | 88 | struct super_block *sb = inode->i_sb; |
| 90 | unsigned offset = pos & ~PAGE_CACHE_MASK; | ||
| 91 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
| 92 | unsigned long npages = dir_pages(inode); | ||
| 93 | struct minix_sb_info *sbi = minix_sb(sb); | 89 | struct minix_sb_info *sbi = minix_sb(sb); |
| 94 | unsigned chunk_size = sbi->s_dirsize; | 90 | unsigned chunk_size = sbi->s_dirsize; |
| 95 | char *name; | 91 | unsigned long npages = dir_pages(inode); |
| 96 | __u32 inumber; | 92 | unsigned long pos = ctx->pos; |
| 93 | unsigned offset; | ||
| 94 | unsigned long n; | ||
| 97 | 95 | ||
| 98 | pos = (pos + chunk_size-1) & ~(chunk_size-1); | 96 | ctx->pos = pos = (pos + chunk_size-1) & ~(chunk_size-1); |
| 99 | if (pos >= inode->i_size) | 97 | if (pos >= inode->i_size) |
| 100 | goto done; | 98 | return 0; |
| 99 | |||
| 100 | offset = pos & ~PAGE_CACHE_MASK; | ||
| 101 | n = pos >> PAGE_CACHE_SHIFT; | ||
| 101 | 102 | ||
| 102 | for ( ; n < npages; n++, offset = 0) { | 103 | for ( ; n < npages; n++, offset = 0) { |
| 103 | char *p, *kaddr, *limit; | 104 | char *p, *kaddr, *limit; |
| @@ -109,6 +110,8 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 109 | p = kaddr+offset; | 110 | p = kaddr+offset; |
| 110 | limit = kaddr + minix_last_byte(inode, n) - chunk_size; | 111 | limit = kaddr + minix_last_byte(inode, n) - chunk_size; |
| 111 | for ( ; p <= limit; p = minix_next_entry(p, sbi)) { | 112 | for ( ; p <= limit; p = minix_next_entry(p, sbi)) { |
| 113 | const char *name; | ||
| 114 | __u32 inumber; | ||
| 112 | if (sbi->s_version == MINIX_V3) { | 115 | if (sbi->s_version == MINIX_V3) { |
| 113 | minix3_dirent *de3 = (minix3_dirent *)p; | 116 | minix3_dirent *de3 = (minix3_dirent *)p; |
| 114 | name = de3->name; | 117 | name = de3->name; |
| @@ -119,24 +122,17 @@ static int minix_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 119 | inumber = de->inode; | 122 | inumber = de->inode; |
| 120 | } | 123 | } |
| 121 | if (inumber) { | 124 | if (inumber) { |
| 122 | int over; | ||
| 123 | |||
| 124 | unsigned l = strnlen(name, sbi->s_namelen); | 125 | unsigned l = strnlen(name, sbi->s_namelen); |
| 125 | offset = p - kaddr; | 126 | if (!dir_emit(ctx, name, l, |
| 126 | over = filldir(dirent, name, l, | 127 | inumber, DT_UNKNOWN)) { |
| 127 | (n << PAGE_CACHE_SHIFT) | offset, | ||
| 128 | inumber, DT_UNKNOWN); | ||
| 129 | if (over) { | ||
| 130 | dir_put_page(page); | 128 | dir_put_page(page); |
| 131 | goto done; | 129 | return 0; |
| 132 | } | 130 | } |
| 133 | } | 131 | } |
| 132 | ctx->pos += chunk_size; | ||
| 134 | } | 133 | } |
| 135 | dir_put_page(page); | 134 | dir_put_page(page); |
| 136 | } | 135 | } |
| 137 | |||
| 138 | done: | ||
| 139 | filp->f_pos = (n << PAGE_CACHE_SHIFT) | offset; | ||
| 140 | return 0; | 136 | return 0; |
| 141 | } | 137 | } |
| 142 | 138 | ||
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 6792ce11f2bf..0e7f00298213 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c | |||
| @@ -23,12 +23,12 @@ | |||
| 23 | 23 | ||
| 24 | #include "ncp_fs.h" | 24 | #include "ncp_fs.h" |
| 25 | 25 | ||
| 26 | static void ncp_read_volume_list(struct file *, void *, filldir_t, | 26 | static void ncp_read_volume_list(struct file *, struct dir_context *, |
| 27 | struct ncp_cache_control *); | 27 | struct ncp_cache_control *); |
| 28 | static void ncp_do_readdir(struct file *, void *, filldir_t, | 28 | static void ncp_do_readdir(struct file *, struct dir_context *, |
| 29 | struct ncp_cache_control *); | 29 | struct ncp_cache_control *); |
| 30 | 30 | ||
| 31 | static int ncp_readdir(struct file *, void *, filldir_t); | 31 | static int ncp_readdir(struct file *, struct dir_context *); |
| 32 | 32 | ||
| 33 | static int ncp_create(struct inode *, struct dentry *, umode_t, bool); | 33 | static int ncp_create(struct inode *, struct dentry *, umode_t, bool); |
| 34 | static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); | 34 | static struct dentry *ncp_lookup(struct inode *, struct dentry *, unsigned int); |
| @@ -49,7 +49,7 @@ const struct file_operations ncp_dir_operations = | |||
| 49 | { | 49 | { |
| 50 | .llseek = generic_file_llseek, | 50 | .llseek = generic_file_llseek, |
| 51 | .read = generic_read_dir, | 51 | .read = generic_read_dir, |
| 52 | .readdir = ncp_readdir, | 52 | .iterate = ncp_readdir, |
| 53 | .unlocked_ioctl = ncp_ioctl, | 53 | .unlocked_ioctl = ncp_ioctl, |
| 54 | #ifdef CONFIG_COMPAT | 54 | #ifdef CONFIG_COMPAT |
| 55 | .compat_ioctl = ncp_compat_ioctl, | 55 | .compat_ioctl = ncp_compat_ioctl, |
| @@ -424,9 +424,9 @@ static time_t ncp_obtain_mtime(struct dentry *dentry) | |||
| 424 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); | 424 | return ncp_date_dos2unix(i.modifyTime, i.modifyDate); |
| 425 | } | 425 | } |
| 426 | 426 | ||
| 427 | static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | 427 | static int ncp_readdir(struct file *file, struct dir_context *ctx) |
| 428 | { | 428 | { |
| 429 | struct dentry *dentry = filp->f_path.dentry; | 429 | struct dentry *dentry = file->f_path.dentry; |
| 430 | struct inode *inode = dentry->d_inode; | 430 | struct inode *inode = dentry->d_inode; |
| 431 | struct page *page = NULL; | 431 | struct page *page = NULL; |
| 432 | struct ncp_server *server = NCP_SERVER(inode); | 432 | struct ncp_server *server = NCP_SERVER(inode); |
| @@ -440,7 +440,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 440 | 440 | ||
| 441 | DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", | 441 | DDPRINTK("ncp_readdir: reading %s/%s, pos=%d\n", |
| 442 | dentry->d_parent->d_name.name, dentry->d_name.name, | 442 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 443 | (int) filp->f_pos); | 443 | (int) ctx->pos); |
| 444 | 444 | ||
| 445 | result = -EIO; | 445 | result = -EIO; |
| 446 | /* Do not generate '.' and '..' when server is dead. */ | 446 | /* Do not generate '.' and '..' when server is dead. */ |
| @@ -448,16 +448,8 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 448 | goto out; | 448 | goto out; |
| 449 | 449 | ||
| 450 | result = 0; | 450 | result = 0; |
| 451 | if (filp->f_pos == 0) { | 451 | if (!dir_emit_dots(file, ctx)) |
| 452 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR)) | 452 | goto out; |
| 453 | goto out; | ||
| 454 | filp->f_pos = 1; | ||
| 455 | } | ||
| 456 | if (filp->f_pos == 1) { | ||
| 457 | if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR)) | ||
| 458 | goto out; | ||
| 459 | filp->f_pos = 2; | ||
| 460 | } | ||
| 461 | 453 | ||
| 462 | page = grab_cache_page(&inode->i_data, 0); | 454 | page = grab_cache_page(&inode->i_data, 0); |
| 463 | if (!page) | 455 | if (!page) |
| @@ -469,7 +461,7 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 469 | if (!PageUptodate(page) || !ctl.head.eof) | 461 | if (!PageUptodate(page) || !ctl.head.eof) |
| 470 | goto init_cache; | 462 | goto init_cache; |
| 471 | 463 | ||
| 472 | if (filp->f_pos == 2) { | 464 | if (ctx->pos == 2) { |
| 473 | if (jiffies - ctl.head.time >= NCP_MAX_AGE(server)) | 465 | if (jiffies - ctl.head.time >= NCP_MAX_AGE(server)) |
| 474 | goto init_cache; | 466 | goto init_cache; |
| 475 | 467 | ||
| @@ -479,10 +471,10 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 479 | goto init_cache; | 471 | goto init_cache; |
| 480 | } | 472 | } |
| 481 | 473 | ||
| 482 | if (filp->f_pos > ctl.head.end) | 474 | if (ctx->pos > ctl.head.end) |
| 483 | goto finished; | 475 | goto finished; |
| 484 | 476 | ||
| 485 | ctl.fpos = filp->f_pos + (NCP_DIRCACHE_START - 2); | 477 | ctl.fpos = ctx->pos + (NCP_DIRCACHE_START - 2); |
| 486 | ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE; | 478 | ctl.ofs = ctl.fpos / NCP_DIRCACHE_SIZE; |
| 487 | ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE; | 479 | ctl.idx = ctl.fpos % NCP_DIRCACHE_SIZE; |
| 488 | 480 | ||
| @@ -497,21 +489,21 @@ static int ncp_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 497 | } | 489 | } |
| 498 | while (ctl.idx < NCP_DIRCACHE_SIZE) { | 490 | while (ctl.idx < NCP_DIRCACHE_SIZE) { |
| 499 | struct dentry *dent; | 491 | struct dentry *dent; |
| 500 | int res; | 492 | bool over; |
| 501 | 493 | ||
| 502 | dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], | 494 | dent = ncp_dget_fpos(ctl.cache->dentry[ctl.idx], |
| 503 | dentry, filp->f_pos); | 495 | dentry, ctx->pos); |
| 504 | if (!dent) | 496 | if (!dent) |
| 505 | goto invalid_cache; | 497 | goto invalid_cache; |
| 506 | res = filldir(dirent, dent->d_name.name, | 498 | over = !dir_emit(ctx, dent->d_name.name, |
| 507 | dent->d_name.len, filp->f_pos, | 499 | dent->d_name.len, |
| 508 | dent->d_inode->i_ino, DT_UNKNOWN); | 500 | dent->d_inode->i_ino, DT_UNKNOWN); |
| 509 | dput(dent); | 501 | dput(dent); |
| 510 | if (res) | 502 | if (over) |
| 511 | goto finished; | 503 | goto finished; |
| 512 | filp->f_pos += 1; | 504 | ctx->pos += 1; |
| 513 | ctl.idx += 1; | 505 | ctl.idx += 1; |
| 514 | if (filp->f_pos > ctl.head.end) | 506 | if (ctx->pos > ctl.head.end) |
| 515 | goto finished; | 507 | goto finished; |
| 516 | } | 508 | } |
| 517 | if (ctl.page) { | 509 | if (ctl.page) { |
| @@ -548,9 +540,9 @@ init_cache: | |||
| 548 | ctl.valid = 1; | 540 | ctl.valid = 1; |
| 549 | read_really: | 541 | read_really: |
| 550 | if (ncp_is_server_root(inode)) { | 542 | if (ncp_is_server_root(inode)) { |
| 551 | ncp_read_volume_list(filp, dirent, filldir, &ctl); | 543 | ncp_read_volume_list(file, ctx, &ctl); |
| 552 | } else { | 544 | } else { |
| 553 | ncp_do_readdir(filp, dirent, filldir, &ctl); | 545 | ncp_do_readdir(file, ctx, &ctl); |
| 554 | } | 546 | } |
| 555 | ctl.head.end = ctl.fpos - 1; | 547 | ctl.head.end = ctl.fpos - 1; |
| 556 | ctl.head.eof = ctl.valid; | 548 | ctl.head.eof = ctl.valid; |
| @@ -573,11 +565,11 @@ out: | |||
| 573 | } | 565 | } |
| 574 | 566 | ||
| 575 | static int | 567 | static int |
| 576 | ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 568 | ncp_fill_cache(struct file *file, struct dir_context *ctx, |
| 577 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, | 569 | struct ncp_cache_control *ctrl, struct ncp_entry_info *entry, |
| 578 | int inval_childs) | 570 | int inval_childs) |
| 579 | { | 571 | { |
| 580 | struct dentry *newdent, *dentry = filp->f_path.dentry; | 572 | struct dentry *newdent, *dentry = file->f_path.dentry; |
| 581 | struct inode *dir = dentry->d_inode; | 573 | struct inode *dir = dentry->d_inode; |
| 582 | struct ncp_cache_control ctl = *ctrl; | 574 | struct ncp_cache_control ctl = *ctrl; |
| 583 | struct qstr qname; | 575 | struct qstr qname; |
| @@ -666,15 +658,15 @@ ncp_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | |||
| 666 | end_advance: | 658 | end_advance: |
| 667 | if (!valid) | 659 | if (!valid) |
| 668 | ctl.valid = 0; | 660 | ctl.valid = 0; |
| 669 | if (!ctl.filled && (ctl.fpos == filp->f_pos)) { | 661 | if (!ctl.filled && (ctl.fpos == ctx->pos)) { |
| 670 | if (!ino) | 662 | if (!ino) |
| 671 | ino = find_inode_number(dentry, &qname); | 663 | ino = find_inode_number(dentry, &qname); |
| 672 | if (!ino) | 664 | if (!ino) |
| 673 | ino = iunique(dir->i_sb, 2); | 665 | ino = iunique(dir->i_sb, 2); |
| 674 | ctl.filled = filldir(dirent, qname.name, qname.len, | 666 | ctl.filled = !dir_emit(ctx, qname.name, qname.len, |
| 675 | filp->f_pos, ino, DT_UNKNOWN); | 667 | ino, DT_UNKNOWN); |
| 676 | if (!ctl.filled) | 668 | if (!ctl.filled) |
| 677 | filp->f_pos += 1; | 669 | ctx->pos += 1; |
| 678 | } | 670 | } |
| 679 | ctl.fpos += 1; | 671 | ctl.fpos += 1; |
| 680 | ctl.idx += 1; | 672 | ctl.idx += 1; |
| @@ -683,10 +675,10 @@ end_advance: | |||
| 683 | } | 675 | } |
| 684 | 676 | ||
| 685 | static void | 677 | static void |
| 686 | ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | 678 | ncp_read_volume_list(struct file *file, struct dir_context *ctx, |
| 687 | struct ncp_cache_control *ctl) | 679 | struct ncp_cache_control *ctl) |
| 688 | { | 680 | { |
| 689 | struct dentry *dentry = filp->f_path.dentry; | 681 | struct dentry *dentry = file->f_path.dentry; |
| 690 | struct inode *inode = dentry->d_inode; | 682 | struct inode *inode = dentry->d_inode; |
| 691 | struct ncp_server *server = NCP_SERVER(inode); | 683 | struct ncp_server *server = NCP_SERVER(inode); |
| 692 | struct ncp_volume_info info; | 684 | struct ncp_volume_info info; |
| @@ -694,7 +686,7 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
| 694 | int i; | 686 | int i; |
| 695 | 687 | ||
| 696 | DPRINTK("ncp_read_volume_list: pos=%ld\n", | 688 | DPRINTK("ncp_read_volume_list: pos=%ld\n", |
| 697 | (unsigned long) filp->f_pos); | 689 | (unsigned long) ctx->pos); |
| 698 | 690 | ||
| 699 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { | 691 | for (i = 0; i < NCP_NUMBER_OF_VOLUMES; i++) { |
| 700 | int inval_dentry; | 692 | int inval_dentry; |
| @@ -715,16 +707,16 @@ ncp_read_volume_list(struct file *filp, void *dirent, filldir_t filldir, | |||
| 715 | } | 707 | } |
| 716 | inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); | 708 | inval_dentry = ncp_update_known_namespace(server, entry.i.volNumber, NULL); |
| 717 | entry.volume = entry.i.volNumber; | 709 | entry.volume = entry.i.volNumber; |
| 718 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, inval_dentry)) | 710 | if (!ncp_fill_cache(file, ctx, ctl, &entry, inval_dentry)) |
| 719 | return; | 711 | return; |
| 720 | } | 712 | } |
| 721 | } | 713 | } |
| 722 | 714 | ||
| 723 | static void | 715 | static void |
| 724 | ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | 716 | ncp_do_readdir(struct file *file, struct dir_context *ctx, |
| 725 | struct ncp_cache_control *ctl) | 717 | struct ncp_cache_control *ctl) |
| 726 | { | 718 | { |
| 727 | struct dentry *dentry = filp->f_path.dentry; | 719 | struct dentry *dentry = file->f_path.dentry; |
| 728 | struct inode *dir = dentry->d_inode; | 720 | struct inode *dir = dentry->d_inode; |
| 729 | struct ncp_server *server = NCP_SERVER(dir); | 721 | struct ncp_server *server = NCP_SERVER(dir); |
| 730 | struct nw_search_sequence seq; | 722 | struct nw_search_sequence seq; |
| @@ -736,7 +728,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | |||
| 736 | 728 | ||
| 737 | DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", | 729 | DPRINTK("ncp_do_readdir: %s/%s, fpos=%ld\n", |
| 738 | dentry->d_parent->d_name.name, dentry->d_name.name, | 730 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 739 | (unsigned long) filp->f_pos); | 731 | (unsigned long) ctx->pos); |
| 740 | PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", | 732 | PPRINTK("ncp_do_readdir: init %s, volnum=%d, dirent=%u\n", |
| 741 | dentry->d_name.name, NCP_FINFO(dir)->volNumber, | 733 | dentry->d_name.name, NCP_FINFO(dir)->volNumber, |
| 742 | NCP_FINFO(dir)->dirEntNum); | 734 | NCP_FINFO(dir)->dirEntNum); |
| @@ -778,7 +770,7 @@ ncp_do_readdir(struct file *filp, void *dirent, filldir_t filldir, | |||
| 778 | rpl += onerpl; | 770 | rpl += onerpl; |
| 779 | rpls -= onerpl; | 771 | rpls -= onerpl; |
| 780 | entry.volume = entry.i.volNumber; | 772 | entry.volume = entry.i.volNumber; |
| 781 | if (!ncp_fill_cache(filp, dirent, filldir, ctl, &entry, 0)) | 773 | if (!ncp_fill_cache(file, ctx, ctl, &entry, 0)) |
| 782 | break; | 774 | break; |
| 783 | } | 775 | } |
| 784 | } while (more); | 776 | } while (more); |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e093e73178b7..5d051419527b 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -46,7 +46,7 @@ | |||
| 46 | 46 | ||
| 47 | static int nfs_opendir(struct inode *, struct file *); | 47 | static int nfs_opendir(struct inode *, struct file *); |
| 48 | static int nfs_closedir(struct inode *, struct file *); | 48 | static int nfs_closedir(struct inode *, struct file *); |
| 49 | static int nfs_readdir(struct file *, void *, filldir_t); | 49 | static int nfs_readdir(struct file *, struct dir_context *); |
| 50 | static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); | 50 | static int nfs_fsync_dir(struct file *, loff_t, loff_t, int); |
| 51 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); | 51 | static loff_t nfs_llseek_dir(struct file *, loff_t, int); |
| 52 | static void nfs_readdir_clear_array(struct page*); | 52 | static void nfs_readdir_clear_array(struct page*); |
| @@ -54,7 +54,7 @@ static void nfs_readdir_clear_array(struct page*); | |||
| 54 | const struct file_operations nfs_dir_operations = { | 54 | const struct file_operations nfs_dir_operations = { |
| 55 | .llseek = nfs_llseek_dir, | 55 | .llseek = nfs_llseek_dir, |
| 56 | .read = generic_read_dir, | 56 | .read = generic_read_dir, |
| 57 | .readdir = nfs_readdir, | 57 | .iterate = nfs_readdir, |
| 58 | .open = nfs_opendir, | 58 | .open = nfs_opendir, |
| 59 | .release = nfs_closedir, | 59 | .release = nfs_closedir, |
| 60 | .fsync = nfs_fsync_dir, | 60 | .fsync = nfs_fsync_dir, |
| @@ -147,6 +147,7 @@ typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, int); | |||
| 147 | typedef struct { | 147 | typedef struct { |
| 148 | struct file *file; | 148 | struct file *file; |
| 149 | struct page *page; | 149 | struct page *page; |
| 150 | struct dir_context *ctx; | ||
| 150 | unsigned long page_index; | 151 | unsigned long page_index; |
| 151 | u64 *dir_cookie; | 152 | u64 *dir_cookie; |
| 152 | u64 last_cookie; | 153 | u64 last_cookie; |
| @@ -252,7 +253,7 @@ out: | |||
| 252 | static | 253 | static |
| 253 | int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) | 254 | int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) |
| 254 | { | 255 | { |
| 255 | loff_t diff = desc->file->f_pos - desc->current_index; | 256 | loff_t diff = desc->ctx->pos - desc->current_index; |
| 256 | unsigned int index; | 257 | unsigned int index; |
| 257 | 258 | ||
| 258 | if (diff < 0) | 259 | if (diff < 0) |
| @@ -289,7 +290,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
| 289 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { | 290 | || (nfsi->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))) { |
| 290 | ctx->duped = 0; | 291 | ctx->duped = 0; |
| 291 | ctx->attr_gencount = nfsi->attr_gencount; | 292 | ctx->attr_gencount = nfsi->attr_gencount; |
| 292 | } else if (new_pos < desc->file->f_pos) { | 293 | } else if (new_pos < desc->ctx->pos) { |
| 293 | if (ctx->duped > 0 | 294 | if (ctx->duped > 0 |
| 294 | && ctx->dup_cookie == *desc->dir_cookie) { | 295 | && ctx->dup_cookie == *desc->dir_cookie) { |
| 295 | if (printk_ratelimit()) { | 296 | if (printk_ratelimit()) { |
| @@ -307,7 +308,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des | |||
| 307 | ctx->dup_cookie = *desc->dir_cookie; | 308 | ctx->dup_cookie = *desc->dir_cookie; |
| 308 | ctx->duped = -1; | 309 | ctx->duped = -1; |
| 309 | } | 310 | } |
| 310 | desc->file->f_pos = new_pos; | 311 | desc->ctx->pos = new_pos; |
| 311 | desc->cache_entry_index = i; | 312 | desc->cache_entry_index = i; |
| 312 | return 0; | 313 | return 0; |
| 313 | } | 314 | } |
| @@ -405,13 +406,13 @@ different: | |||
| 405 | } | 406 | } |
| 406 | 407 | ||
| 407 | static | 408 | static |
| 408 | bool nfs_use_readdirplus(struct inode *dir, struct file *filp) | 409 | bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) |
| 409 | { | 410 | { |
| 410 | if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) | 411 | if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) |
| 411 | return false; | 412 | return false; |
| 412 | if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) | 413 | if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)->flags)) |
| 413 | return true; | 414 | return true; |
| 414 | if (filp->f_pos == 0) | 415 | if (ctx->pos == 0) |
| 415 | return true; | 416 | return true; |
| 416 | return false; | 417 | return false; |
| 417 | } | 418 | } |
| @@ -702,8 +703,7 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) | |||
| 702 | * Once we've found the start of the dirent within a page: fill 'er up... | 703 | * Once we've found the start of the dirent within a page: fill 'er up... |
| 703 | */ | 704 | */ |
| 704 | static | 705 | static |
| 705 | int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | 706 | int nfs_do_filldir(nfs_readdir_descriptor_t *desc) |
| 706 | filldir_t filldir) | ||
| 707 | { | 707 | { |
| 708 | struct file *file = desc->file; | 708 | struct file *file = desc->file; |
| 709 | int i = 0; | 709 | int i = 0; |
| @@ -721,13 +721,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 721 | struct nfs_cache_array_entry *ent; | 721 | struct nfs_cache_array_entry *ent; |
| 722 | 722 | ||
| 723 | ent = &array->array[i]; | 723 | ent = &array->array[i]; |
| 724 | if (filldir(dirent, ent->string.name, ent->string.len, | 724 | if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, |
| 725 | file->f_pos, nfs_compat_user_ino64(ent->ino), | 725 | nfs_compat_user_ino64(ent->ino), ent->d_type)) { |
| 726 | ent->d_type) < 0) { | ||
| 727 | desc->eof = 1; | 726 | desc->eof = 1; |
| 728 | break; | 727 | break; |
| 729 | } | 728 | } |
| 730 | file->f_pos++; | 729 | desc->ctx->pos++; |
| 731 | if (i < (array->size-1)) | 730 | if (i < (array->size-1)) |
| 732 | *desc->dir_cookie = array->array[i+1].cookie; | 731 | *desc->dir_cookie = array->array[i+1].cookie; |
| 733 | else | 732 | else |
| @@ -759,8 +758,7 @@ out: | |||
| 759 | * directory in the page cache by the time we get here. | 758 | * directory in the page cache by the time we get here. |
| 760 | */ | 759 | */ |
| 761 | static inline | 760 | static inline |
| 762 | int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | 761 | int uncached_readdir(nfs_readdir_descriptor_t *desc) |
| 763 | filldir_t filldir) | ||
| 764 | { | 762 | { |
| 765 | struct page *page = NULL; | 763 | struct page *page = NULL; |
| 766 | int status; | 764 | int status; |
| @@ -785,7 +783,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 785 | if (status < 0) | 783 | if (status < 0) |
| 786 | goto out_release; | 784 | goto out_release; |
| 787 | 785 | ||
| 788 | status = nfs_do_filldir(desc, dirent, filldir); | 786 | status = nfs_do_filldir(desc); |
| 789 | 787 | ||
| 790 | out: | 788 | out: |
| 791 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", | 789 | dfprintk(DIRCACHE, "NFS: %s: returns %d\n", |
| @@ -800,35 +798,36 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 800 | last cookie cache takes care of the common case of reading the | 798 | last cookie cache takes care of the common case of reading the |
| 801 | whole directory. | 799 | whole directory. |
| 802 | */ | 800 | */ |
| 803 | static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 801 | static int nfs_readdir(struct file *file, struct dir_context *ctx) |
| 804 | { | 802 | { |
| 805 | struct dentry *dentry = filp->f_path.dentry; | 803 | struct dentry *dentry = file->f_path.dentry; |
| 806 | struct inode *inode = dentry->d_inode; | 804 | struct inode *inode = dentry->d_inode; |
| 807 | nfs_readdir_descriptor_t my_desc, | 805 | nfs_readdir_descriptor_t my_desc, |
| 808 | *desc = &my_desc; | 806 | *desc = &my_desc; |
| 809 | struct nfs_open_dir_context *dir_ctx = filp->private_data; | 807 | struct nfs_open_dir_context *dir_ctx = file->private_data; |
| 810 | int res; | 808 | int res; |
| 811 | 809 | ||
| 812 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", | 810 | dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", |
| 813 | dentry->d_parent->d_name.name, dentry->d_name.name, | 811 | dentry->d_parent->d_name.name, dentry->d_name.name, |
| 814 | (long long)filp->f_pos); | 812 | (long long)ctx->pos); |
| 815 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); | 813 | nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); |
| 816 | 814 | ||
| 817 | /* | 815 | /* |
| 818 | * filp->f_pos points to the dirent entry number. | 816 | * ctx->pos points to the dirent entry number. |
| 819 | * *desc->dir_cookie has the cookie for the next entry. We have | 817 | * *desc->dir_cookie has the cookie for the next entry. We have |
| 820 | * to either find the entry with the appropriate number or | 818 | * to either find the entry with the appropriate number or |
| 821 | * revalidate the cookie. | 819 | * revalidate the cookie. |
| 822 | */ | 820 | */ |
| 823 | memset(desc, 0, sizeof(*desc)); | 821 | memset(desc, 0, sizeof(*desc)); |
| 824 | 822 | ||
| 825 | desc->file = filp; | 823 | desc->file = file; |
| 824 | desc->ctx = ctx; | ||
| 826 | desc->dir_cookie = &dir_ctx->dir_cookie; | 825 | desc->dir_cookie = &dir_ctx->dir_cookie; |
| 827 | desc->decode = NFS_PROTO(inode)->decode_dirent; | 826 | desc->decode = NFS_PROTO(inode)->decode_dirent; |
| 828 | desc->plus = nfs_use_readdirplus(inode, filp) ? 1 : 0; | 827 | desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0; |
| 829 | 828 | ||
| 830 | nfs_block_sillyrename(dentry); | 829 | nfs_block_sillyrename(dentry); |
| 831 | res = nfs_revalidate_mapping(inode, filp->f_mapping); | 830 | res = nfs_revalidate_mapping(inode, file->f_mapping); |
| 832 | if (res < 0) | 831 | if (res < 0) |
| 833 | goto out; | 832 | goto out; |
| 834 | 833 | ||
| @@ -840,7 +839,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 840 | /* This means either end of directory */ | 839 | /* This means either end of directory */ |
| 841 | if (*desc->dir_cookie && desc->eof == 0) { | 840 | if (*desc->dir_cookie && desc->eof == 0) { |
| 842 | /* Or that the server has 'lost' a cookie */ | 841 | /* Or that the server has 'lost' a cookie */ |
| 843 | res = uncached_readdir(desc, dirent, filldir); | 842 | res = uncached_readdir(desc); |
| 844 | if (res == 0) | 843 | if (res == 0) |
| 845 | continue; | 844 | continue; |
| 846 | } | 845 | } |
| @@ -857,7 +856,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 857 | if (res < 0) | 856 | if (res < 0) |
| 858 | break; | 857 | break; |
| 859 | 858 | ||
| 860 | res = nfs_do_filldir(desc, dirent, filldir); | 859 | res = nfs_do_filldir(desc); |
| 861 | if (res < 0) | 860 | if (res < 0) |
| 862 | break; | 861 | break; |
| 863 | } while (!desc->eof); | 862 | } while (!desc->eof); |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index a87a44f84113..6b4a79f4ad1d 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -451,11 +451,13 @@ static int nfs_write_end(struct file *file, struct address_space *mapping, | |||
| 451 | * - Called if either PG_private or PG_fscache is set on the page | 451 | * - Called if either PG_private or PG_fscache is set on the page |
| 452 | * - Caller holds page lock | 452 | * - Caller holds page lock |
| 453 | */ | 453 | */ |
| 454 | static void nfs_invalidate_page(struct page *page, unsigned long offset) | 454 | static void nfs_invalidate_page(struct page *page, unsigned int offset, |
| 455 | unsigned int length) | ||
| 455 | { | 456 | { |
| 456 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %lu)\n", page, offset); | 457 | dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n", |
| 458 | page, offset, length); | ||
| 457 | 459 | ||
| 458 | if (offset != 0) | 460 | if (offset != 0 || length < PAGE_CACHE_SIZE) |
| 459 | return; | 461 | return; |
| 460 | /* Cancel any unstarted writes on this page */ | 462 | /* Cancel any unstarted writes on this page */ |
| 461 | nfs_wb_page_cancel(page_file_mapping(page)->host, page); | 463 | nfs_wb_page_cancel(page_file_mapping(page)->host, page); |
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4e9a21db867a..105a3b080d12 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c | |||
| @@ -240,11 +240,16 @@ struct name_list { | |||
| 240 | struct list_head list; | 240 | struct list_head list; |
| 241 | }; | 241 | }; |
| 242 | 242 | ||
| 243 | struct nfs4_dir_ctx { | ||
| 244 | struct dir_context ctx; | ||
| 245 | struct list_head names; | ||
| 246 | }; | ||
| 247 | |||
| 243 | static int | 248 | static int |
| 244 | nfsd4_build_namelist(void *arg, const char *name, int namlen, | 249 | nfsd4_build_namelist(void *arg, const char *name, int namlen, |
| 245 | loff_t offset, u64 ino, unsigned int d_type) | 250 | loff_t offset, u64 ino, unsigned int d_type) |
| 246 | { | 251 | { |
| 247 | struct list_head *names = arg; | 252 | struct nfs4_dir_ctx *ctx = arg; |
| 248 | struct name_list *entry; | 253 | struct name_list *entry; |
| 249 | 254 | ||
| 250 | if (namlen != HEXDIR_LEN - 1) | 255 | if (namlen != HEXDIR_LEN - 1) |
| @@ -254,7 +259,7 @@ nfsd4_build_namelist(void *arg, const char *name, int namlen, | |||
| 254 | return -ENOMEM; | 259 | return -ENOMEM; |
| 255 | memcpy(entry->name, name, HEXDIR_LEN - 1); | 260 | memcpy(entry->name, name, HEXDIR_LEN - 1); |
| 256 | entry->name[HEXDIR_LEN - 1] = '\0'; | 261 | entry->name[HEXDIR_LEN - 1] = '\0'; |
| 257 | list_add(&entry->list, names); | 262 | list_add(&entry->list, &ctx->names); |
| 258 | return 0; | 263 | return 0; |
| 259 | } | 264 | } |
| 260 | 265 | ||
| @@ -263,7 +268,10 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) | |||
| 263 | { | 268 | { |
| 264 | const struct cred *original_cred; | 269 | const struct cred *original_cred; |
| 265 | struct dentry *dir = nn->rec_file->f_path.dentry; | 270 | struct dentry *dir = nn->rec_file->f_path.dentry; |
| 266 | LIST_HEAD(names); | 271 | struct nfs4_dir_ctx ctx = { |
| 272 | .ctx.actor = nfsd4_build_namelist, | ||
| 273 | .names = LIST_HEAD_INIT(ctx.names) | ||
| 274 | }; | ||
| 267 | int status; | 275 | int status; |
| 268 | 276 | ||
| 269 | status = nfs4_save_creds(&original_cred); | 277 | status = nfs4_save_creds(&original_cred); |
| @@ -276,11 +284,11 @@ nfsd4_list_rec_dir(recdir_func *f, struct nfsd_net *nn) | |||
| 276 | return status; | 284 | return status; |
| 277 | } | 285 | } |
| 278 | 286 | ||
| 279 | status = vfs_readdir(nn->rec_file, nfsd4_build_namelist, &names); | 287 | status = iterate_dir(nn->rec_file, &ctx.ctx); |
| 280 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); | 288 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT); |
| 281 | while (!list_empty(&names)) { | 289 | while (!list_empty(&ctx.names)) { |
| 282 | struct name_list *entry; | 290 | struct name_list *entry; |
| 283 | entry = list_entry(names.next, struct name_list, list); | 291 | entry = list_entry(ctx.names.next, struct name_list, list); |
| 284 | if (!status) { | 292 | if (!status) { |
| 285 | struct dentry *dentry; | 293 | struct dentry *dentry; |
| 286 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); | 294 | dentry = lookup_one_len(entry->name, dir, HEXDIR_LEN-1); |
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..a6bc8a7423db 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -1912,6 +1912,7 @@ struct buffered_dirent { | |||
| 1912 | }; | 1912 | }; |
| 1913 | 1913 | ||
| 1914 | struct readdir_data { | 1914 | struct readdir_data { |
| 1915 | struct dir_context ctx; | ||
| 1915 | char *dirent; | 1916 | char *dirent; |
| 1916 | size_t used; | 1917 | size_t used; |
| 1917 | int full; | 1918 | int full; |
| @@ -1943,13 +1944,15 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen, | |||
| 1943 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, | 1944 | static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, |
| 1944 | struct readdir_cd *cdp, loff_t *offsetp) | 1945 | struct readdir_cd *cdp, loff_t *offsetp) |
| 1945 | { | 1946 | { |
| 1946 | struct readdir_data buf; | ||
| 1947 | struct buffered_dirent *de; | 1947 | struct buffered_dirent *de; |
| 1948 | int host_err; | 1948 | int host_err; |
| 1949 | int size; | 1949 | int size; |
| 1950 | loff_t offset; | 1950 | loff_t offset; |
| 1951 | struct readdir_data buf = { | ||
| 1952 | .ctx.actor = nfsd_buffered_filldir, | ||
| 1953 | .dirent = (void *)__get_free_page(GFP_KERNEL) | ||
| 1954 | }; | ||
| 1951 | 1955 | ||
| 1952 | buf.dirent = (void *)__get_free_page(GFP_KERNEL); | ||
| 1953 | if (!buf.dirent) | 1956 | if (!buf.dirent) |
| 1954 | return nfserrno(-ENOMEM); | 1957 | return nfserrno(-ENOMEM); |
| 1955 | 1958 | ||
| @@ -1963,7 +1966,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func, | |||
| 1963 | buf.used = 0; | 1966 | buf.used = 0; |
| 1964 | buf.full = 0; | 1967 | buf.full = 0; |
| 1965 | 1968 | ||
| 1966 | host_err = vfs_readdir(file, nfsd_buffered_filldir, &buf); | 1969 | host_err = iterate_dir(file, &buf.ctx); |
| 1967 | if (buf.full) | 1970 | if (buf.full) |
| 1968 | host_err = 0; | 1971 | host_err = 0; |
| 1969 | 1972 | ||
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index f30b017740a7..197a63e9d102 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c | |||
| @@ -256,22 +256,18 @@ static void nilfs_set_de_type(struct nilfs_dir_entry *de, struct inode *inode) | |||
| 256 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; | 256 | de->file_type = nilfs_type_by_mode[(mode & S_IFMT)>>S_SHIFT]; |
| 257 | } | 257 | } |
| 258 | 258 | ||
| 259 | static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 259 | static int nilfs_readdir(struct file *file, struct dir_context *ctx) |
| 260 | { | 260 | { |
| 261 | loff_t pos = filp->f_pos; | 261 | loff_t pos = ctx->pos; |
| 262 | struct inode *inode = file_inode(filp); | 262 | struct inode *inode = file_inode(file); |
| 263 | struct super_block *sb = inode->i_sb; | 263 | struct super_block *sb = inode->i_sb; |
| 264 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 264 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
| 265 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 265 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
| 266 | unsigned long npages = dir_pages(inode); | 266 | unsigned long npages = dir_pages(inode); |
| 267 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ | 267 | /* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */ |
| 268 | unsigned char *types = NULL; | ||
| 269 | int ret; | ||
| 270 | 268 | ||
| 271 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) | 269 | if (pos > inode->i_size - NILFS_DIR_REC_LEN(1)) |
| 272 | goto success; | 270 | return 0; |
| 273 | |||
| 274 | types = nilfs_filetype_table; | ||
| 275 | 271 | ||
| 276 | for ( ; n < npages; n++, offset = 0) { | 272 | for ( ; n < npages; n++, offset = 0) { |
| 277 | char *kaddr, *limit; | 273 | char *kaddr, *limit; |
| @@ -281,9 +277,8 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 281 | if (IS_ERR(page)) { | 277 | if (IS_ERR(page)) { |
| 282 | nilfs_error(sb, __func__, "bad page in #%lu", | 278 | nilfs_error(sb, __func__, "bad page in #%lu", |
| 283 | inode->i_ino); | 279 | inode->i_ino); |
| 284 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 280 | ctx->pos += PAGE_CACHE_SIZE - offset; |
| 285 | ret = -EIO; | 281 | return -EIO; |
| 286 | goto done; | ||
| 287 | } | 282 | } |
| 288 | kaddr = page_address(page); | 283 | kaddr = page_address(page); |
| 289 | de = (struct nilfs_dir_entry *)(kaddr + offset); | 284 | de = (struct nilfs_dir_entry *)(kaddr + offset); |
| @@ -293,35 +288,28 @@ static int nilfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 293 | if (de->rec_len == 0) { | 288 | if (de->rec_len == 0) { |
| 294 | nilfs_error(sb, __func__, | 289 | nilfs_error(sb, __func__, |
| 295 | "zero-length directory entry"); | 290 | "zero-length directory entry"); |
| 296 | ret = -EIO; | ||
| 297 | nilfs_put_page(page); | 291 | nilfs_put_page(page); |
| 298 | goto done; | 292 | return -EIO; |
| 299 | } | 293 | } |
| 300 | if (de->inode) { | 294 | if (de->inode) { |
| 301 | int over; | 295 | unsigned char t; |
| 302 | unsigned char d_type = DT_UNKNOWN; | ||
| 303 | 296 | ||
| 304 | if (types && de->file_type < NILFS_FT_MAX) | 297 | if (de->file_type < NILFS_FT_MAX) |
| 305 | d_type = types[de->file_type]; | 298 | t = nilfs_filetype_table[de->file_type]; |
| 299 | else | ||
| 300 | t = DT_UNKNOWN; | ||
| 306 | 301 | ||
| 307 | offset = (char *)de - kaddr; | 302 | if (!dir_emit(ctx, de->name, de->name_len, |
| 308 | over = filldir(dirent, de->name, de->name_len, | 303 | le64_to_cpu(de->inode), t)) { |
| 309 | (n<<PAGE_CACHE_SHIFT) | offset, | ||
| 310 | le64_to_cpu(de->inode), d_type); | ||
| 311 | if (over) { | ||
| 312 | nilfs_put_page(page); | 304 | nilfs_put_page(page); |
| 313 | goto success; | 305 | return 0; |
| 314 | } | 306 | } |
| 315 | } | 307 | } |
| 316 | filp->f_pos += nilfs_rec_len_from_disk(de->rec_len); | 308 | ctx->pos += nilfs_rec_len_from_disk(de->rec_len); |
| 317 | } | 309 | } |
| 318 | nilfs_put_page(page); | 310 | nilfs_put_page(page); |
| 319 | } | 311 | } |
| 320 | 312 | return 0; | |
| 321 | success: | ||
| 322 | ret = 0; | ||
| 323 | done: | ||
| 324 | return ret; | ||
| 325 | } | 313 | } |
| 326 | 314 | ||
| 327 | /* | 315 | /* |
| @@ -678,7 +666,7 @@ not_empty: | |||
| 678 | const struct file_operations nilfs_dir_operations = { | 666 | const struct file_operations nilfs_dir_operations = { |
| 679 | .llseek = generic_file_llseek, | 667 | .llseek = generic_file_llseek, |
| 680 | .read = generic_read_dir, | 668 | .read = generic_read_dir, |
| 681 | .readdir = nilfs_readdir, | 669 | .iterate = nilfs_readdir, |
| 682 | .unlocked_ioctl = nilfs_ioctl, | 670 | .unlocked_ioctl = nilfs_ioctl, |
| 683 | #ifdef CONFIG_COMPAT | 671 | #ifdef CONFIG_COMPAT |
| 684 | .compat_ioctl = nilfs_compat_ioctl, | 672 | .compat_ioctl = nilfs_compat_ioctl, |
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index fa9c05f97af4..d267ea6aa1a0 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c | |||
| @@ -1372,7 +1372,7 @@ retry_writepage: | |||
| 1372 | * The page may have dirty, unmapped buffers. Make them | 1372 | * The page may have dirty, unmapped buffers. Make them |
| 1373 | * freeable here, so the page does not leak. | 1373 | * freeable here, so the page does not leak. |
| 1374 | */ | 1374 | */ |
| 1375 | block_invalidatepage(page, 0); | 1375 | block_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 1376 | unlock_page(page); | 1376 | unlock_page(page); |
| 1377 | ntfs_debug("Write outside i_size - truncated?"); | 1377 | ntfs_debug("Write outside i_size - truncated?"); |
| 1378 | return 0; | 1378 | return 0; |
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index aa411c3f20e9..9e38dafa3bc7 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c | |||
| @@ -1004,13 +1004,11 @@ dir_err_out: | |||
| 1004 | /** | 1004 | /** |
| 1005 | * ntfs_filldir - ntfs specific filldir method | 1005 | * ntfs_filldir - ntfs specific filldir method |
| 1006 | * @vol: current ntfs volume | 1006 | * @vol: current ntfs volume |
| 1007 | * @fpos: position in the directory | ||
| 1008 | * @ndir: ntfs inode of current directory | 1007 | * @ndir: ntfs inode of current directory |
| 1009 | * @ia_page: page in which the index allocation buffer @ie is in resides | 1008 | * @ia_page: page in which the index allocation buffer @ie is in resides |
| 1010 | * @ie: current index entry | 1009 | * @ie: current index entry |
| 1011 | * @name: buffer to use for the converted name | 1010 | * @name: buffer to use for the converted name |
| 1012 | * @dirent: vfs filldir callback context | 1011 | * @actor: what to feed the entries to |
| 1013 | * @filldir: vfs filldir callback | ||
| 1014 | * | 1012 | * |
| 1015 | * Convert the Unicode @name to the loaded NLS and pass it to the @filldir | 1013 | * Convert the Unicode @name to the loaded NLS and pass it to the @filldir |
| 1016 | * callback. | 1014 | * callback. |
| @@ -1024,12 +1022,12 @@ dir_err_out: | |||
| 1024 | * retake the lock if we are returning a non-zero value as ntfs_readdir() | 1022 | * retake the lock if we are returning a non-zero value as ntfs_readdir() |
| 1025 | * would need to drop the lock immediately anyway. | 1023 | * would need to drop the lock immediately anyway. |
| 1026 | */ | 1024 | */ |
| 1027 | static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | 1025 | static inline int ntfs_filldir(ntfs_volume *vol, |
| 1028 | ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, | 1026 | ntfs_inode *ndir, struct page *ia_page, INDEX_ENTRY *ie, |
| 1029 | u8 *name, void *dirent, filldir_t filldir) | 1027 | u8 *name, struct dir_context *actor) |
| 1030 | { | 1028 | { |
| 1031 | unsigned long mref; | 1029 | unsigned long mref; |
| 1032 | int name_len, rc; | 1030 | int name_len; |
| 1033 | unsigned dt_type; | 1031 | unsigned dt_type; |
| 1034 | FILE_NAME_TYPE_FLAGS name_type; | 1032 | FILE_NAME_TYPE_FLAGS name_type; |
| 1035 | 1033 | ||
| @@ -1068,13 +1066,14 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | |||
| 1068 | if (ia_page) | 1066 | if (ia_page) |
| 1069 | unlock_page(ia_page); | 1067 | unlock_page(ia_page); |
| 1070 | ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " | 1068 | ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode " |
| 1071 | "0x%lx, DT_%s.", name, name_len, fpos, mref, | 1069 | "0x%lx, DT_%s.", name, name_len, actor->pos, mref, |
| 1072 | dt_type == DT_DIR ? "DIR" : "REG"); | 1070 | dt_type == DT_DIR ? "DIR" : "REG"); |
| 1073 | rc = filldir(dirent, name, name_len, fpos, mref, dt_type); | 1071 | if (!dir_emit(actor, name, name_len, mref, dt_type)) |
| 1072 | return 1; | ||
| 1074 | /* Relock the page but not if we are aborting ->readdir. */ | 1073 | /* Relock the page but not if we are aborting ->readdir. */ |
| 1075 | if (!rc && ia_page) | 1074 | if (ia_page) |
| 1076 | lock_page(ia_page); | 1075 | lock_page(ia_page); |
| 1077 | return rc; | 1076 | return 0; |
| 1078 | } | 1077 | } |
| 1079 | 1078 | ||
| 1080 | /* | 1079 | /* |
| @@ -1097,11 +1096,11 @@ static inline int ntfs_filldir(ntfs_volume *vol, loff_t fpos, | |||
| 1097 | * removes them again after the write is complete after which it | 1096 | * removes them again after the write is complete after which it |
| 1098 | * unlocks the page. | 1097 | * unlocks the page. |
| 1099 | */ | 1098 | */ |
| 1100 | static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 1099 | static int ntfs_readdir(struct file *file, struct dir_context *actor) |
| 1101 | { | 1100 | { |
| 1102 | s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; | 1101 | s64 ia_pos, ia_start, prev_ia_pos, bmp_pos; |
| 1103 | loff_t fpos, i_size; | 1102 | loff_t i_size; |
| 1104 | struct inode *bmp_vi, *vdir = file_inode(filp); | 1103 | struct inode *bmp_vi, *vdir = file_inode(file); |
| 1105 | struct super_block *sb = vdir->i_sb; | 1104 | struct super_block *sb = vdir->i_sb; |
| 1106 | ntfs_inode *ndir = NTFS_I(vdir); | 1105 | ntfs_inode *ndir = NTFS_I(vdir); |
| 1107 | ntfs_volume *vol = NTFS_SB(sb); | 1106 | ntfs_volume *vol = NTFS_SB(sb); |
| @@ -1116,33 +1115,16 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1116 | u8 *kaddr, *bmp, *index_end; | 1115 | u8 *kaddr, *bmp, *index_end; |
| 1117 | ntfs_attr_search_ctx *ctx; | 1116 | ntfs_attr_search_ctx *ctx; |
| 1118 | 1117 | ||
| 1119 | fpos = filp->f_pos; | ||
| 1120 | ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", | 1118 | ntfs_debug("Entering for inode 0x%lx, fpos 0x%llx.", |
| 1121 | vdir->i_ino, fpos); | 1119 | vdir->i_ino, actor->pos); |
| 1122 | rc = err = 0; | 1120 | rc = err = 0; |
| 1123 | /* Are we at end of dir yet? */ | 1121 | /* Are we at end of dir yet? */ |
| 1124 | i_size = i_size_read(vdir); | 1122 | i_size = i_size_read(vdir); |
| 1125 | if (fpos >= i_size + vol->mft_record_size) | 1123 | if (actor->pos >= i_size + vol->mft_record_size) |
| 1126 | goto done; | 1124 | return 0; |
| 1127 | /* Emulate . and .. for all directories. */ | 1125 | /* Emulate . and .. for all directories. */ |
| 1128 | if (!fpos) { | 1126 | if (!dir_emit_dots(file, actor)) |
| 1129 | ntfs_debug("Calling filldir for . with len 1, fpos 0x0, " | 1127 | return 0; |
| 1130 | "inode 0x%lx, DT_DIR.", vdir->i_ino); | ||
| 1131 | rc = filldir(dirent, ".", 1, fpos, vdir->i_ino, DT_DIR); | ||
| 1132 | if (rc) | ||
| 1133 | goto done; | ||
| 1134 | fpos++; | ||
| 1135 | } | ||
| 1136 | if (fpos == 1) { | ||
| 1137 | ntfs_debug("Calling filldir for .. with len 2, fpos 0x1, " | ||
| 1138 | "inode 0x%lx, DT_DIR.", | ||
| 1139 | (unsigned long)parent_ino(filp->f_path.dentry)); | ||
| 1140 | rc = filldir(dirent, "..", 2, fpos, | ||
| 1141 | parent_ino(filp->f_path.dentry), DT_DIR); | ||
| 1142 | if (rc) | ||
| 1143 | goto done; | ||
| 1144 | fpos++; | ||
| 1145 | } | ||
| 1146 | m = NULL; | 1128 | m = NULL; |
| 1147 | ctx = NULL; | 1129 | ctx = NULL; |
| 1148 | /* | 1130 | /* |
| @@ -1155,7 +1137,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1155 | goto err_out; | 1137 | goto err_out; |
| 1156 | } | 1138 | } |
| 1157 | /* Are we jumping straight into the index allocation attribute? */ | 1139 | /* Are we jumping straight into the index allocation attribute? */ |
| 1158 | if (fpos >= vol->mft_record_size) | 1140 | if (actor->pos >= vol->mft_record_size) |
| 1159 | goto skip_index_root; | 1141 | goto skip_index_root; |
| 1160 | /* Get hold of the mft record for the directory. */ | 1142 | /* Get hold of the mft record for the directory. */ |
| 1161 | m = map_mft_record(ndir); | 1143 | m = map_mft_record(ndir); |
| @@ -1170,7 +1152,7 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1170 | goto err_out; | 1152 | goto err_out; |
| 1171 | } | 1153 | } |
| 1172 | /* Get the offset into the index root attribute. */ | 1154 | /* Get the offset into the index root attribute. */ |
| 1173 | ir_pos = (s64)fpos; | 1155 | ir_pos = (s64)actor->pos; |
| 1174 | /* Find the index root attribute in the mft record. */ | 1156 | /* Find the index root attribute in the mft record. */ |
| 1175 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, | 1157 | err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, |
| 1176 | 0, ctx); | 1158 | 0, ctx); |
| @@ -1226,10 +1208,9 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1226 | if (ir_pos > (u8*)ie - (u8*)ir) | 1208 | if (ir_pos > (u8*)ie - (u8*)ir) |
| 1227 | continue; | 1209 | continue; |
| 1228 | /* Advance the position even if going to skip the entry. */ | 1210 | /* Advance the position even if going to skip the entry. */ |
| 1229 | fpos = (u8*)ie - (u8*)ir; | 1211 | actor->pos = (u8*)ie - (u8*)ir; |
| 1230 | /* Submit the name to the filldir callback. */ | 1212 | /* Submit the name to the filldir callback. */ |
| 1231 | rc = ntfs_filldir(vol, fpos, ndir, NULL, ie, name, dirent, | 1213 | rc = ntfs_filldir(vol, ndir, NULL, ie, name, actor); |
| 1232 | filldir); | ||
| 1233 | if (rc) { | 1214 | if (rc) { |
| 1234 | kfree(ir); | 1215 | kfree(ir); |
| 1235 | goto abort; | 1216 | goto abort; |
| @@ -1242,12 +1223,12 @@ static int ntfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1242 | if (!NInoIndexAllocPresent(ndir)) | 1223 | if (!NInoIndexAllocPresent(ndir)) |
| 1243 | goto EOD; | 1224 | goto EOD; |
| 1244 | /* Advance fpos to the beginning of the index allocation. */ | 1225 | /* Advance fpos to the beginning of the index allocation. */ |
| 1245 | fpos = vol->mft_record_size; | 1226 | actor->pos = vol->mft_record_size; |
| 1246 | skip_index_root: | 1227 | skip_index_root: |
| 1247 | kaddr = NULL; | 1228 | kaddr = NULL; |
| 1248 | prev_ia_pos = -1LL; | 1229 | prev_ia_pos = -1LL; |
| 1249 | /* Get the offset into the index allocation attribute. */ | 1230 | /* Get the offset into the index allocation attribute. */ |
| 1250 | ia_pos = (s64)fpos - vol->mft_record_size; | 1231 | ia_pos = (s64)actor->pos - vol->mft_record_size; |
| 1251 | ia_mapping = vdir->i_mapping; | 1232 | ia_mapping = vdir->i_mapping; |
| 1252 | ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); | 1233 | ntfs_debug("Inode 0x%lx, getting index bitmap.", vdir->i_ino); |
| 1253 | bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); | 1234 | bmp_vi = ntfs_attr_iget(vdir, AT_BITMAP, I30, 4); |
| @@ -1409,7 +1390,7 @@ find_next_index_buffer: | |||
| 1409 | if (ia_pos - ia_start > (u8*)ie - (u8*)ia) | 1390 | if (ia_pos - ia_start > (u8*)ie - (u8*)ia) |
| 1410 | continue; | 1391 | continue; |
| 1411 | /* Advance the position even if going to skip the entry. */ | 1392 | /* Advance the position even if going to skip the entry. */ |
| 1412 | fpos = (u8*)ie - (u8*)ia + | 1393 | actor->pos = (u8*)ie - (u8*)ia + |
| 1413 | (sle64_to_cpu(ia->index_block_vcn) << | 1394 | (sle64_to_cpu(ia->index_block_vcn) << |
| 1414 | ndir->itype.index.vcn_size_bits) + | 1395 | ndir->itype.index.vcn_size_bits) + |
| 1415 | vol->mft_record_size; | 1396 | vol->mft_record_size; |
| @@ -1419,8 +1400,7 @@ find_next_index_buffer: | |||
| 1419 | * before returning, unless a non-zero value is returned in | 1400 | * before returning, unless a non-zero value is returned in |
| 1420 | * which case the page is left unlocked. | 1401 | * which case the page is left unlocked. |
| 1421 | */ | 1402 | */ |
| 1422 | rc = ntfs_filldir(vol, fpos, ndir, ia_page, ie, name, dirent, | 1403 | rc = ntfs_filldir(vol, ndir, ia_page, ie, name, actor); |
| 1423 | filldir); | ||
| 1424 | if (rc) { | 1404 | if (rc) { |
| 1425 | /* @ia_page is already unlocked in this case. */ | 1405 | /* @ia_page is already unlocked in this case. */ |
| 1426 | ntfs_unmap_page(ia_page); | 1406 | ntfs_unmap_page(ia_page); |
| @@ -1439,18 +1419,9 @@ unm_EOD: | |||
| 1439 | iput(bmp_vi); | 1419 | iput(bmp_vi); |
| 1440 | EOD: | 1420 | EOD: |
| 1441 | /* We are finished, set fpos to EOD. */ | 1421 | /* We are finished, set fpos to EOD. */ |
| 1442 | fpos = i_size + vol->mft_record_size; | 1422 | actor->pos = i_size + vol->mft_record_size; |
| 1443 | abort: | 1423 | abort: |
| 1444 | kfree(name); | 1424 | kfree(name); |
| 1445 | done: | ||
| 1446 | #ifdef DEBUG | ||
| 1447 | if (!rc) | ||
| 1448 | ntfs_debug("EOD, fpos 0x%llx, returning 0.", fpos); | ||
| 1449 | else | ||
| 1450 | ntfs_debug("filldir returned %i, fpos 0x%llx, returning 0.", | ||
| 1451 | rc, fpos); | ||
| 1452 | #endif | ||
| 1453 | filp->f_pos = fpos; | ||
| 1454 | return 0; | 1425 | return 0; |
| 1455 | err_out: | 1426 | err_out: |
| 1456 | if (bmp_page) { | 1427 | if (bmp_page) { |
| @@ -1471,7 +1442,6 @@ iput_err_out: | |||
| 1471 | if (!err) | 1442 | if (!err) |
| 1472 | err = -EIO; | 1443 | err = -EIO; |
| 1473 | ntfs_debug("Failed. Returning error code %i.", -err); | 1444 | ntfs_debug("Failed. Returning error code %i.", -err); |
| 1474 | filp->f_pos = fpos; | ||
| 1475 | return err; | 1445 | return err; |
| 1476 | } | 1446 | } |
| 1477 | 1447 | ||
| @@ -1571,7 +1541,7 @@ static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | |||
| 1571 | const struct file_operations ntfs_dir_ops = { | 1541 | const struct file_operations ntfs_dir_ops = { |
| 1572 | .llseek = generic_file_llseek, /* Seek inside directory. */ | 1542 | .llseek = generic_file_llseek, /* Seek inside directory. */ |
| 1573 | .read = generic_read_dir, /* Return -EISDIR. */ | 1543 | .read = generic_read_dir, /* Return -EISDIR. */ |
| 1574 | .readdir = ntfs_readdir, /* Read directory contents. */ | 1544 | .iterate = ntfs_readdir, /* Read directory contents. */ |
| 1575 | #ifdef NTFS_RW | 1545 | #ifdef NTFS_RW |
| 1576 | .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ | 1546 | .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ |
| 1577 | /*.aio_fsync = ,*/ /* Sync all outstanding async | 1547 | /*.aio_fsync = ,*/ /* Sync all outstanding async |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 20dfec72e903..79736a28d84f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -603,11 +603,12 @@ static void ocfs2_dio_end_io(struct kiocb *iocb, | |||
| 603 | * from ext3. PageChecked() bits have been removed as OCFS2 does not | 603 | * from ext3. PageChecked() bits have been removed as OCFS2 does not |
| 604 | * do journalled data. | 604 | * do journalled data. |
| 605 | */ | 605 | */ |
| 606 | static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | 606 | static void ocfs2_invalidatepage(struct page *page, unsigned int offset, |
| 607 | unsigned int length) | ||
| 607 | { | 608 | { |
| 608 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 609 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
| 609 | 610 | ||
| 610 | jbd2_journal_invalidatepage(journal, page, offset); | 611 | jbd2_journal_invalidatepage(journal, page, offset, length); |
| 611 | } | 612 | } |
| 612 | 613 | ||
| 613 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 614 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f1e1aed8f638..eb760d8acd50 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -1761,11 +1761,10 @@ bail: | |||
| 1761 | 1761 | ||
| 1762 | static int ocfs2_dir_foreach_blk_id(struct inode *inode, | 1762 | static int ocfs2_dir_foreach_blk_id(struct inode *inode, |
| 1763 | u64 *f_version, | 1763 | u64 *f_version, |
| 1764 | loff_t *f_pos, void *priv, | 1764 | struct dir_context *ctx) |
| 1765 | filldir_t filldir, int *filldir_err) | ||
| 1766 | { | 1765 | { |
| 1767 | int ret, i, filldir_ret; | 1766 | int ret, i; |
| 1768 | unsigned long offset = *f_pos; | 1767 | unsigned long offset = ctx->pos; |
| 1769 | struct buffer_head *di_bh = NULL; | 1768 | struct buffer_head *di_bh = NULL; |
| 1770 | struct ocfs2_dinode *di; | 1769 | struct ocfs2_dinode *di; |
| 1771 | struct ocfs2_inline_data *data; | 1770 | struct ocfs2_inline_data *data; |
| @@ -1781,8 +1780,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, | |||
| 1781 | di = (struct ocfs2_dinode *)di_bh->b_data; | 1780 | di = (struct ocfs2_dinode *)di_bh->b_data; |
| 1782 | data = &di->id2.i_data; | 1781 | data = &di->id2.i_data; |
| 1783 | 1782 | ||
| 1784 | while (*f_pos < i_size_read(inode)) { | 1783 | while (ctx->pos < i_size_read(inode)) { |
| 1785 | revalidate: | ||
| 1786 | /* If the dir block has changed since the last call to | 1784 | /* If the dir block has changed since the last call to |
| 1787 | * readdir(2), then we might be pointing to an invalid | 1785 | * readdir(2), then we might be pointing to an invalid |
| 1788 | * dirent right now. Scan from the start of the block | 1786 | * dirent right now. Scan from the start of the block |
| @@ -1802,50 +1800,31 @@ revalidate: | |||
| 1802 | break; | 1800 | break; |
| 1803 | i += le16_to_cpu(de->rec_len); | 1801 | i += le16_to_cpu(de->rec_len); |
| 1804 | } | 1802 | } |
| 1805 | *f_pos = offset = i; | 1803 | ctx->pos = offset = i; |
| 1806 | *f_version = inode->i_version; | 1804 | *f_version = inode->i_version; |
| 1807 | } | 1805 | } |
| 1808 | 1806 | ||
| 1809 | de = (struct ocfs2_dir_entry *) (data->id_data + *f_pos); | 1807 | de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos); |
| 1810 | if (!ocfs2_check_dir_entry(inode, de, di_bh, *f_pos)) { | 1808 | if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) { |
| 1811 | /* On error, skip the f_pos to the end. */ | 1809 | /* On error, skip the f_pos to the end. */ |
| 1812 | *f_pos = i_size_read(inode); | 1810 | ctx->pos = i_size_read(inode); |
| 1813 | goto out; | 1811 | break; |
| 1814 | } | 1812 | } |
| 1815 | offset += le16_to_cpu(de->rec_len); | 1813 | offset += le16_to_cpu(de->rec_len); |
| 1816 | if (le64_to_cpu(de->inode)) { | 1814 | if (le64_to_cpu(de->inode)) { |
| 1817 | /* We might block in the next section | ||
| 1818 | * if the data destination is | ||
| 1819 | * currently swapped out. So, use a | ||
| 1820 | * version stamp to detect whether or | ||
| 1821 | * not the directory has been modified | ||
| 1822 | * during the copy operation. | ||
| 1823 | */ | ||
| 1824 | u64 version = *f_version; | ||
| 1825 | unsigned char d_type = DT_UNKNOWN; | 1815 | unsigned char d_type = DT_UNKNOWN; |
| 1826 | 1816 | ||
| 1827 | if (de->file_type < OCFS2_FT_MAX) | 1817 | if (de->file_type < OCFS2_FT_MAX) |
| 1828 | d_type = ocfs2_filetype_table[de->file_type]; | 1818 | d_type = ocfs2_filetype_table[de->file_type]; |
| 1829 | 1819 | ||
| 1830 | filldir_ret = filldir(priv, de->name, | 1820 | if (!dir_emit(ctx, de->name, de->name_len, |
| 1831 | de->name_len, | 1821 | le64_to_cpu(de->inode), d_type)) |
| 1832 | *f_pos, | 1822 | goto out; |
| 1833 | le64_to_cpu(de->inode), | ||
| 1834 | d_type); | ||
| 1835 | if (filldir_ret) { | ||
| 1836 | if (filldir_err) | ||
| 1837 | *filldir_err = filldir_ret; | ||
| 1838 | break; | ||
| 1839 | } | ||
| 1840 | if (version != *f_version) | ||
| 1841 | goto revalidate; | ||
| 1842 | } | 1823 | } |
| 1843 | *f_pos += le16_to_cpu(de->rec_len); | 1824 | ctx->pos += le16_to_cpu(de->rec_len); |
| 1844 | } | 1825 | } |
| 1845 | |||
| 1846 | out: | 1826 | out: |
| 1847 | brelse(di_bh); | 1827 | brelse(di_bh); |
| 1848 | |||
| 1849 | return 0; | 1828 | return 0; |
| 1850 | } | 1829 | } |
| 1851 | 1830 | ||
| @@ -1855,27 +1834,26 @@ out: | |||
| 1855 | */ | 1834 | */ |
| 1856 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, | 1835 | static int ocfs2_dir_foreach_blk_el(struct inode *inode, |
| 1857 | u64 *f_version, | 1836 | u64 *f_version, |
| 1858 | loff_t *f_pos, void *priv, | 1837 | struct dir_context *ctx, |
| 1859 | filldir_t filldir, int *filldir_err) | 1838 | bool persist) |
| 1860 | { | 1839 | { |
| 1861 | int error = 0; | ||
| 1862 | unsigned long offset, blk, last_ra_blk = 0; | 1840 | unsigned long offset, blk, last_ra_blk = 0; |
| 1863 | int i, stored; | 1841 | int i; |
| 1864 | struct buffer_head * bh, * tmp; | 1842 | struct buffer_head * bh, * tmp; |
| 1865 | struct ocfs2_dir_entry * de; | 1843 | struct ocfs2_dir_entry * de; |
| 1866 | struct super_block * sb = inode->i_sb; | 1844 | struct super_block * sb = inode->i_sb; |
| 1867 | unsigned int ra_sectors = 16; | 1845 | unsigned int ra_sectors = 16; |
| 1846 | int stored = 0; | ||
| 1868 | 1847 | ||
| 1869 | stored = 0; | ||
| 1870 | bh = NULL; | 1848 | bh = NULL; |
| 1871 | 1849 | ||
| 1872 | offset = (*f_pos) & (sb->s_blocksize - 1); | 1850 | offset = ctx->pos & (sb->s_blocksize - 1); |
| 1873 | 1851 | ||
| 1874 | while (!error && !stored && *f_pos < i_size_read(inode)) { | 1852 | while (ctx->pos < i_size_read(inode)) { |
| 1875 | blk = (*f_pos) >> sb->s_blocksize_bits; | 1853 | blk = ctx->pos >> sb->s_blocksize_bits; |
| 1876 | if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { | 1854 | if (ocfs2_read_dir_block(inode, blk, &bh, 0)) { |
| 1877 | /* Skip the corrupt dirblock and keep trying */ | 1855 | /* Skip the corrupt dirblock and keep trying */ |
| 1878 | *f_pos += sb->s_blocksize - offset; | 1856 | ctx->pos += sb->s_blocksize - offset; |
| 1879 | continue; | 1857 | continue; |
| 1880 | } | 1858 | } |
| 1881 | 1859 | ||
| @@ -1897,7 +1875,6 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
| 1897 | ra_sectors = 8; | 1875 | ra_sectors = 8; |
| 1898 | } | 1876 | } |
| 1899 | 1877 | ||
| 1900 | revalidate: | ||
| 1901 | /* If the dir block has changed since the last call to | 1878 | /* If the dir block has changed since the last call to |
| 1902 | * readdir(2), then we might be pointing to an invalid | 1879 | * readdir(2), then we might be pointing to an invalid |
| 1903 | * dirent right now. Scan from the start of the block | 1880 | * dirent right now. Scan from the start of the block |
| @@ -1917,93 +1894,64 @@ revalidate: | |||
| 1917 | i += le16_to_cpu(de->rec_len); | 1894 | i += le16_to_cpu(de->rec_len); |
| 1918 | } | 1895 | } |
| 1919 | offset = i; | 1896 | offset = i; |
| 1920 | *f_pos = ((*f_pos) & ~(sb->s_blocksize - 1)) | 1897 | ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1)) |
| 1921 | | offset; | 1898 | | offset; |
| 1922 | *f_version = inode->i_version; | 1899 | *f_version = inode->i_version; |
| 1923 | } | 1900 | } |
| 1924 | 1901 | ||
| 1925 | while (!error && *f_pos < i_size_read(inode) | 1902 | while (ctx->pos < i_size_read(inode) |
| 1926 | && offset < sb->s_blocksize) { | 1903 | && offset < sb->s_blocksize) { |
| 1927 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); | 1904 | de = (struct ocfs2_dir_entry *) (bh->b_data + offset); |
| 1928 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { | 1905 | if (!ocfs2_check_dir_entry(inode, de, bh, offset)) { |
| 1929 | /* On error, skip the f_pos to the | 1906 | /* On error, skip the f_pos to the |
| 1930 | next block. */ | 1907 | next block. */ |
| 1931 | *f_pos = ((*f_pos) | (sb->s_blocksize - 1)) + 1; | 1908 | ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1; |
| 1932 | brelse(bh); | 1909 | brelse(bh); |
| 1933 | goto out; | 1910 | continue; |
| 1934 | } | 1911 | } |
| 1935 | offset += le16_to_cpu(de->rec_len); | ||
| 1936 | if (le64_to_cpu(de->inode)) { | 1912 | if (le64_to_cpu(de->inode)) { |
| 1937 | /* We might block in the next section | ||
| 1938 | * if the data destination is | ||
| 1939 | * currently swapped out. So, use a | ||
| 1940 | * version stamp to detect whether or | ||
| 1941 | * not the directory has been modified | ||
| 1942 | * during the copy operation. | ||
| 1943 | */ | ||
| 1944 | unsigned long version = *f_version; | ||
| 1945 | unsigned char d_type = DT_UNKNOWN; | 1913 | unsigned char d_type = DT_UNKNOWN; |
| 1946 | 1914 | ||
| 1947 | if (de->file_type < OCFS2_FT_MAX) | 1915 | if (de->file_type < OCFS2_FT_MAX) |
| 1948 | d_type = ocfs2_filetype_table[de->file_type]; | 1916 | d_type = ocfs2_filetype_table[de->file_type]; |
| 1949 | error = filldir(priv, de->name, | 1917 | if (!dir_emit(ctx, de->name, |
| 1950 | de->name_len, | 1918 | de->name_len, |
| 1951 | *f_pos, | ||
| 1952 | le64_to_cpu(de->inode), | 1919 | le64_to_cpu(de->inode), |
| 1953 | d_type); | 1920 | d_type)) { |
| 1954 | if (error) { | 1921 | brelse(bh); |
| 1955 | if (filldir_err) | 1922 | return 0; |
| 1956 | *filldir_err = error; | ||
| 1957 | break; | ||
| 1958 | } | 1923 | } |
| 1959 | if (version != *f_version) | 1924 | stored++; |
| 1960 | goto revalidate; | ||
| 1961 | stored ++; | ||
| 1962 | } | 1925 | } |
| 1963 | *f_pos += le16_to_cpu(de->rec_len); | 1926 | offset += le16_to_cpu(de->rec_len); |
| 1927 | ctx->pos += le16_to_cpu(de->rec_len); | ||
| 1964 | } | 1928 | } |
| 1965 | offset = 0; | 1929 | offset = 0; |
| 1966 | brelse(bh); | 1930 | brelse(bh); |
| 1967 | bh = NULL; | 1931 | bh = NULL; |
| 1932 | if (!persist && stored) | ||
| 1933 | break; | ||
| 1968 | } | 1934 | } |
| 1969 | 1935 | return 0; | |
| 1970 | stored = 0; | ||
| 1971 | out: | ||
| 1972 | return stored; | ||
| 1973 | } | 1936 | } |
| 1974 | 1937 | ||
| 1975 | static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, | 1938 | static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version, |
| 1976 | loff_t *f_pos, void *priv, filldir_t filldir, | 1939 | struct dir_context *ctx, |
| 1977 | int *filldir_err) | 1940 | bool persist) |
| 1978 | { | 1941 | { |
| 1979 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) | 1942 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) |
| 1980 | return ocfs2_dir_foreach_blk_id(inode, f_version, f_pos, priv, | 1943 | return ocfs2_dir_foreach_blk_id(inode, f_version, ctx); |
| 1981 | filldir, filldir_err); | 1944 | return ocfs2_dir_foreach_blk_el(inode, f_version, ctx, persist); |
| 1982 | |||
| 1983 | return ocfs2_dir_foreach_blk_el(inode, f_version, f_pos, priv, filldir, | ||
| 1984 | filldir_err); | ||
| 1985 | } | 1945 | } |
| 1986 | 1946 | ||
| 1987 | /* | 1947 | /* |
| 1988 | * This is intended to be called from inside other kernel functions, | 1948 | * This is intended to be called from inside other kernel functions, |
| 1989 | * so we fake some arguments. | 1949 | * so we fake some arguments. |
| 1990 | */ | 1950 | */ |
| 1991 | int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | 1951 | int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx) |
| 1992 | filldir_t filldir) | ||
| 1993 | { | 1952 | { |
| 1994 | int ret = 0, filldir_err = 0; | ||
| 1995 | u64 version = inode->i_version; | 1953 | u64 version = inode->i_version; |
| 1996 | 1954 | ocfs2_dir_foreach_blk(inode, &version, ctx, true); | |
| 1997 | while (*f_pos < i_size_read(inode)) { | ||
| 1998 | ret = ocfs2_dir_foreach_blk(inode, &version, f_pos, priv, | ||
| 1999 | filldir, &filldir_err); | ||
| 2000 | if (ret || filldir_err) | ||
| 2001 | break; | ||
| 2002 | } | ||
| 2003 | |||
| 2004 | if (ret > 0) | ||
| 2005 | ret = -EIO; | ||
| 2006 | |||
| 2007 | return 0; | 1955 | return 0; |
| 2008 | } | 1956 | } |
| 2009 | 1957 | ||
| @@ -2011,15 +1959,15 @@ int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | |||
| 2011 | * ocfs2_readdir() | 1959 | * ocfs2_readdir() |
| 2012 | * | 1960 | * |
| 2013 | */ | 1961 | */ |
| 2014 | int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1962 | int ocfs2_readdir(struct file *file, struct dir_context *ctx) |
| 2015 | { | 1963 | { |
| 2016 | int error = 0; | 1964 | int error = 0; |
| 2017 | struct inode *inode = file_inode(filp); | 1965 | struct inode *inode = file_inode(file); |
| 2018 | int lock_level = 0; | 1966 | int lock_level = 0; |
| 2019 | 1967 | ||
| 2020 | trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); | 1968 | trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 2021 | 1969 | ||
| 2022 | error = ocfs2_inode_lock_atime(inode, filp->f_path.mnt, &lock_level); | 1970 | error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level); |
| 2023 | if (lock_level && error >= 0) { | 1971 | if (lock_level && error >= 0) { |
| 2024 | /* We release EX lock which used to update atime | 1972 | /* We release EX lock which used to update atime |
| 2025 | * and get PR lock again to reduce contention | 1973 | * and get PR lock again to reduce contention |
| @@ -2035,8 +1983,7 @@ int ocfs2_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 2035 | goto bail_nolock; | 1983 | goto bail_nolock; |
| 2036 | } | 1984 | } |
| 2037 | 1985 | ||
| 2038 | error = ocfs2_dir_foreach_blk(inode, &filp->f_version, &filp->f_pos, | 1986 | error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false); |
| 2039 | dirent, filldir, NULL); | ||
| 2040 | 1987 | ||
| 2041 | ocfs2_inode_unlock(inode, lock_level); | 1988 | ocfs2_inode_unlock(inode, lock_level); |
| 2042 | if (error) | 1989 | if (error) |
| @@ -2120,6 +2067,7 @@ bail: | |||
| 2120 | } | 2067 | } |
| 2121 | 2068 | ||
| 2122 | struct ocfs2_empty_dir_priv { | 2069 | struct ocfs2_empty_dir_priv { |
| 2070 | struct dir_context ctx; | ||
| 2123 | unsigned seen_dot; | 2071 | unsigned seen_dot; |
| 2124 | unsigned seen_dot_dot; | 2072 | unsigned seen_dot_dot; |
| 2125 | unsigned seen_other; | 2073 | unsigned seen_other; |
| @@ -2204,8 +2152,9 @@ out: | |||
| 2204 | int ocfs2_empty_dir(struct inode *inode) | 2152 | int ocfs2_empty_dir(struct inode *inode) |
| 2205 | { | 2153 | { |
| 2206 | int ret; | 2154 | int ret; |
| 2207 | loff_t start = 0; | 2155 | struct ocfs2_empty_dir_priv priv = { |
| 2208 | struct ocfs2_empty_dir_priv priv; | 2156 | .ctx.actor = ocfs2_empty_dir_filldir |
| 2157 | }; | ||
| 2209 | 2158 | ||
| 2210 | memset(&priv, 0, sizeof(priv)); | 2159 | memset(&priv, 0, sizeof(priv)); |
| 2211 | 2160 | ||
| @@ -2219,7 +2168,7 @@ int ocfs2_empty_dir(struct inode *inode) | |||
| 2219 | */ | 2168 | */ |
| 2220 | } | 2169 | } |
| 2221 | 2170 | ||
| 2222 | ret = ocfs2_dir_foreach(inode, &start, &priv, ocfs2_empty_dir_filldir); | 2171 | ret = ocfs2_dir_foreach(inode, &priv.ctx); |
| 2223 | if (ret) | 2172 | if (ret) |
| 2224 | mlog_errno(ret); | 2173 | mlog_errno(ret); |
| 2225 | 2174 | ||
diff --git a/fs/ocfs2/dir.h b/fs/ocfs2/dir.h index e683f3deb645..f0344b75b14d 100644 --- a/fs/ocfs2/dir.h +++ b/fs/ocfs2/dir.h | |||
| @@ -92,9 +92,8 @@ int ocfs2_find_files_on_disk(const char *name, | |||
| 92 | struct ocfs2_dir_lookup_result *res); | 92 | struct ocfs2_dir_lookup_result *res); |
| 93 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, | 93 | int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name, |
| 94 | int namelen, u64 *blkno); | 94 | int namelen, u64 *blkno); |
| 95 | int ocfs2_readdir(struct file *filp, void *dirent, filldir_t filldir); | 95 | int ocfs2_readdir(struct file *file, struct dir_context *ctx); |
| 96 | int ocfs2_dir_foreach(struct inode *inode, loff_t *f_pos, void *priv, | 96 | int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx); |
| 97 | filldir_t filldir); | ||
| 98 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | 97 | int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, |
| 99 | struct inode *dir, | 98 | struct inode *dir, |
| 100 | struct buffer_head *parent_fe_bh, | 99 | struct buffer_head *parent_fe_bh, |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ff54014a24ec..8a38714f1d92 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -2712,7 +2712,7 @@ const struct file_operations ocfs2_fops = { | |||
| 2712 | const struct file_operations ocfs2_dops = { | 2712 | const struct file_operations ocfs2_dops = { |
| 2713 | .llseek = generic_file_llseek, | 2713 | .llseek = generic_file_llseek, |
| 2714 | .read = generic_read_dir, | 2714 | .read = generic_read_dir, |
| 2715 | .readdir = ocfs2_readdir, | 2715 | .iterate = ocfs2_readdir, |
| 2716 | .fsync = ocfs2_sync_file, | 2716 | .fsync = ocfs2_sync_file, |
| 2717 | .release = ocfs2_dir_release, | 2717 | .release = ocfs2_dir_release, |
| 2718 | .open = ocfs2_dir_open, | 2718 | .open = ocfs2_dir_open, |
| @@ -2759,7 +2759,7 @@ const struct file_operations ocfs2_fops_no_plocks = { | |||
| 2759 | const struct file_operations ocfs2_dops_no_plocks = { | 2759 | const struct file_operations ocfs2_dops_no_plocks = { |
| 2760 | .llseek = generic_file_llseek, | 2760 | .llseek = generic_file_llseek, |
| 2761 | .read = generic_read_dir, | 2761 | .read = generic_read_dir, |
| 2762 | .readdir = ocfs2_readdir, | 2762 | .iterate = ocfs2_readdir, |
| 2763 | .fsync = ocfs2_sync_file, | 2763 | .fsync = ocfs2_sync_file, |
| 2764 | .release = ocfs2_dir_release, | 2764 | .release = ocfs2_dir_release, |
| 2765 | .open = ocfs2_dir_open, | 2765 | .open = ocfs2_dir_open, |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 8eccfabcd12e..242170d83971 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -1941,6 +1941,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) | |||
| 1941 | } | 1941 | } |
| 1942 | 1942 | ||
| 1943 | struct ocfs2_orphan_filldir_priv { | 1943 | struct ocfs2_orphan_filldir_priv { |
| 1944 | struct dir_context ctx; | ||
| 1944 | struct inode *head; | 1945 | struct inode *head; |
| 1945 | struct ocfs2_super *osb; | 1946 | struct ocfs2_super *osb; |
| 1946 | }; | 1947 | }; |
| @@ -1977,11 +1978,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
| 1977 | { | 1978 | { |
| 1978 | int status; | 1979 | int status; |
| 1979 | struct inode *orphan_dir_inode = NULL; | 1980 | struct inode *orphan_dir_inode = NULL; |
| 1980 | struct ocfs2_orphan_filldir_priv priv; | 1981 | struct ocfs2_orphan_filldir_priv priv = { |
| 1981 | loff_t pos = 0; | 1982 | .ctx.actor = ocfs2_orphan_filldir, |
| 1982 | 1983 | .osb = osb, | |
| 1983 | priv.osb = osb; | 1984 | .head = *head |
| 1984 | priv.head = *head; | 1985 | }; |
| 1985 | 1986 | ||
| 1986 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, | 1987 | orphan_dir_inode = ocfs2_get_system_file_inode(osb, |
| 1987 | ORPHAN_DIR_SYSTEM_INODE, | 1988 | ORPHAN_DIR_SYSTEM_INODE, |
| @@ -1999,8 +2000,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, | |||
| 1999 | goto out; | 2000 | goto out; |
| 2000 | } | 2001 | } |
| 2001 | 2002 | ||
| 2002 | status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv, | 2003 | status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx); |
| 2003 | ocfs2_orphan_filldir); | ||
| 2004 | if (status) { | 2004 | if (status) { |
| 2005 | mlog_errno(status); | 2005 | mlog_errno(status); |
| 2006 | goto out_cluster; | 2006 | goto out_cluster; |
diff --git a/fs/omfs/dir.c b/fs/omfs/dir.c index acbaebcad3a8..1b8e9e8405b2 100644 --- a/fs/omfs/dir.c +++ b/fs/omfs/dir.c | |||
| @@ -327,26 +327,23 @@ int omfs_is_bad(struct omfs_sb_info *sbi, struct omfs_header *header, | |||
| 327 | return is_bad; | 327 | return is_bad; |
| 328 | } | 328 | } |
| 329 | 329 | ||
| 330 | static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, | 330 | static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx, |
| 331 | u64 fsblock, int hindex) | 331 | u64 fsblock, int hindex) |
| 332 | { | 332 | { |
| 333 | struct inode *dir = file_inode(filp); | ||
| 334 | struct buffer_head *bh; | ||
| 335 | struct omfs_inode *oi; | ||
| 336 | u64 self; | ||
| 337 | int res = 0; | ||
| 338 | unsigned char d_type; | ||
| 339 | |||
| 340 | /* follow chain in this bucket */ | 333 | /* follow chain in this bucket */ |
| 341 | while (fsblock != ~0) { | 334 | while (fsblock != ~0) { |
| 342 | bh = omfs_bread(dir->i_sb, fsblock); | 335 | struct buffer_head *bh = omfs_bread(dir->i_sb, fsblock); |
| 336 | struct omfs_inode *oi; | ||
| 337 | u64 self; | ||
| 338 | unsigned char d_type; | ||
| 339 | |||
| 343 | if (!bh) | 340 | if (!bh) |
| 344 | goto out; | 341 | return true; |
| 345 | 342 | ||
| 346 | oi = (struct omfs_inode *) bh->b_data; | 343 | oi = (struct omfs_inode *) bh->b_data; |
| 347 | if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) { | 344 | if (omfs_is_bad(OMFS_SB(dir->i_sb), &oi->i_head, fsblock)) { |
| 348 | brelse(bh); | 345 | brelse(bh); |
| 349 | goto out; | 346 | return true; |
| 350 | } | 347 | } |
| 351 | 348 | ||
| 352 | self = fsblock; | 349 | self = fsblock; |
| @@ -361,15 +358,16 @@ static int omfs_fill_chain(struct file *filp, void *dirent, filldir_t filldir, | |||
| 361 | 358 | ||
| 362 | d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG; | 359 | d_type = (oi->i_type == OMFS_DIR) ? DT_DIR : DT_REG; |
| 363 | 360 | ||
| 364 | res = filldir(dirent, oi->i_name, strnlen(oi->i_name, | 361 | if (!dir_emit(ctx, oi->i_name, |
| 365 | OMFS_NAMELEN), filp->f_pos, self, d_type); | 362 | strnlen(oi->i_name, OMFS_NAMELEN), |
| 363 | self, d_type)) { | ||
| 364 | brelse(bh); | ||
| 365 | return false; | ||
| 366 | } | ||
| 366 | brelse(bh); | 367 | brelse(bh); |
| 367 | if (res < 0) | 368 | ctx->pos++; |
| 368 | break; | ||
| 369 | filp->f_pos++; | ||
| 370 | } | 369 | } |
| 371 | out: | 370 | return true; |
| 372 | return res; | ||
| 373 | } | 371 | } |
| 374 | 372 | ||
| 375 | static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, | 373 | static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry, |
| @@ -403,60 +401,44 @@ out: | |||
| 403 | return err; | 401 | return err; |
| 404 | } | 402 | } |
| 405 | 403 | ||
| 406 | static int omfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 404 | static int omfs_readdir(struct file *file, struct dir_context *ctx) |
| 407 | { | 405 | { |
| 408 | struct inode *dir = file_inode(filp); | 406 | struct inode *dir = file_inode(file); |
| 409 | struct buffer_head *bh; | 407 | struct buffer_head *bh; |
| 410 | loff_t offset, res; | 408 | __be64 *p; |
| 411 | unsigned int hchain, hindex; | 409 | unsigned int hchain, hindex; |
| 412 | int nbuckets; | 410 | int nbuckets; |
| 413 | u64 fsblock; | 411 | |
| 414 | int ret = -EINVAL; | 412 | if (ctx->pos >> 32) |
| 415 | 413 | return -EINVAL; | |
| 416 | if (filp->f_pos >> 32) | 414 | |
| 417 | goto success; | 415 | if (ctx->pos < 1 << 20) { |
| 418 | 416 | if (!dir_emit_dots(file, ctx)) | |
| 419 | switch ((unsigned long) filp->f_pos) { | 417 | return 0; |
| 420 | case 0: | 418 | ctx->pos = 1 << 20; |
| 421 | if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0) | ||
| 422 | goto success; | ||
| 423 | filp->f_pos++; | ||
| 424 | /* fall through */ | ||
| 425 | case 1: | ||
| 426 | if (filldir(dirent, "..", 2, 1, | ||
| 427 | parent_ino(filp->f_dentry), DT_DIR) < 0) | ||
| 428 | goto success; | ||
| 429 | filp->f_pos = 1 << 20; | ||
| 430 | /* fall through */ | ||
| 431 | } | 419 | } |
| 432 | 420 | ||
| 433 | nbuckets = (dir->i_size - OMFS_DIR_START) / 8; | 421 | nbuckets = (dir->i_size - OMFS_DIR_START) / 8; |
| 434 | 422 | ||
| 435 | /* high 12 bits store bucket + 1 and low 20 bits store hash index */ | 423 | /* high 12 bits store bucket + 1 and low 20 bits store hash index */ |
| 436 | hchain = (filp->f_pos >> 20) - 1; | 424 | hchain = (ctx->pos >> 20) - 1; |
| 437 | hindex = filp->f_pos & 0xfffff; | 425 | hindex = ctx->pos & 0xfffff; |
| 438 | 426 | ||
| 439 | bh = omfs_bread(dir->i_sb, dir->i_ino); | 427 | bh = omfs_bread(dir->i_sb, dir->i_ino); |
| 440 | if (!bh) | 428 | if (!bh) |
| 441 | goto out; | 429 | return -EINVAL; |
| 442 | 430 | ||
| 443 | offset = OMFS_DIR_START + hchain * 8; | 431 | p = (__be64 *)(bh->b_data + OMFS_DIR_START) + hchain; |
| 444 | 432 | ||
| 445 | for (; hchain < nbuckets; hchain++, offset += 8) { | 433 | for (; hchain < nbuckets; hchain++) { |
| 446 | fsblock = be64_to_cpu(*((__be64 *) &bh->b_data[offset])); | 434 | __u64 fsblock = be64_to_cpu(*p++); |
| 447 | 435 | if (!omfs_fill_chain(dir, ctx, fsblock, hindex)) | |
| 448 | res = omfs_fill_chain(filp, dirent, filldir, fsblock, hindex); | ||
| 449 | hindex = 0; | ||
| 450 | if (res < 0) | ||
| 451 | break; | 436 | break; |
| 452 | 437 | hindex = 0; | |
| 453 | filp->f_pos = (hchain+2) << 20; | 438 | ctx->pos = (hchain+2) << 20; |
| 454 | } | 439 | } |
| 455 | brelse(bh); | 440 | brelse(bh); |
| 456 | success: | 441 | return 0; |
| 457 | ret = 0; | ||
| 458 | out: | ||
| 459 | return ret; | ||
| 460 | } | 442 | } |
| 461 | 443 | ||
| 462 | const struct inode_operations omfs_dir_inops = { | 444 | const struct inode_operations omfs_dir_inops = { |
| @@ -470,6 +452,6 @@ const struct inode_operations omfs_dir_inops = { | |||
| 470 | 452 | ||
| 471 | const struct file_operations omfs_dir_operations = { | 453 | const struct file_operations omfs_dir_operations = { |
| 472 | .read = generic_read_dir, | 454 | .read = generic_read_dir, |
| 473 | .readdir = omfs_readdir, | 455 | .iterate = omfs_readdir, |
| 474 | .llseek = generic_file_llseek, | 456 | .llseek = generic_file_llseek, |
| 475 | }; | 457 | }; |
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 75885ffde44e..8c0ceb8dd1f7 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c | |||
| @@ -162,11 +162,11 @@ static const struct file_operations openpromfs_prop_ops = { | |||
| 162 | .release = seq_release, | 162 | .release = seq_release, |
| 163 | }; | 163 | }; |
| 164 | 164 | ||
| 165 | static int openpromfs_readdir(struct file *, void *, filldir_t); | 165 | static int openpromfs_readdir(struct file *, struct dir_context *); |
| 166 | 166 | ||
| 167 | static const struct file_operations openprom_operations = { | 167 | static const struct file_operations openprom_operations = { |
| 168 | .read = generic_read_dir, | 168 | .read = generic_read_dir, |
| 169 | .readdir = openpromfs_readdir, | 169 | .iterate = openpromfs_readdir, |
| 170 | .llseek = generic_file_llseek, | 170 | .llseek = generic_file_llseek, |
| 171 | }; | 171 | }; |
| 172 | 172 | ||
| @@ -260,71 +260,64 @@ found: | |||
| 260 | return NULL; | 260 | return NULL; |
| 261 | } | 261 | } |
| 262 | 262 | ||
| 263 | static int openpromfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 263 | static int openpromfs_readdir(struct file *file, struct dir_context *ctx) |
| 264 | { | 264 | { |
| 265 | struct inode *inode = file_inode(filp); | 265 | struct inode *inode = file_inode(file); |
| 266 | struct op_inode_info *oi = OP_I(inode); | 266 | struct op_inode_info *oi = OP_I(inode); |
| 267 | struct device_node *dp = oi->u.node; | 267 | struct device_node *dp = oi->u.node; |
| 268 | struct device_node *child; | 268 | struct device_node *child; |
| 269 | struct property *prop; | 269 | struct property *prop; |
| 270 | unsigned int ino; | ||
| 271 | int i; | 270 | int i; |
| 272 | 271 | ||
| 273 | mutex_lock(&op_mutex); | 272 | mutex_lock(&op_mutex); |
| 274 | 273 | ||
| 275 | ino = inode->i_ino; | 274 | if (ctx->pos == 0) { |
| 276 | i = filp->f_pos; | 275 | if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) |
| 277 | switch (i) { | ||
| 278 | case 0: | ||
| 279 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
| 280 | goto out; | 276 | goto out; |
| 281 | i++; | 277 | ctx->pos = 1; |
| 282 | filp->f_pos++; | 278 | } |
| 283 | /* fall thru */ | 279 | if (ctx->pos == 1) { |
| 284 | case 1: | 280 | if (!dir_emit(ctx, "..", 2, |
| 285 | if (filldir(dirent, "..", 2, i, | ||
| 286 | (dp->parent == NULL ? | 281 | (dp->parent == NULL ? |
| 287 | OPENPROM_ROOT_INO : | 282 | OPENPROM_ROOT_INO : |
| 288 | dp->parent->unique_id), DT_DIR) < 0) | 283 | dp->parent->unique_id), DT_DIR)) |
| 289 | goto out; | 284 | goto out; |
| 290 | i++; | 285 | ctx->pos = 2; |
| 291 | filp->f_pos++; | 286 | } |
| 292 | /* fall thru */ | 287 | i = ctx->pos - 2; |
| 293 | default: | ||
| 294 | i -= 2; | ||
| 295 | |||
| 296 | /* First, the children nodes as directories. */ | ||
| 297 | child = dp->child; | ||
| 298 | while (i && child) { | ||
| 299 | child = child->sibling; | ||
| 300 | i--; | ||
| 301 | } | ||
| 302 | while (child) { | ||
| 303 | if (filldir(dirent, | ||
| 304 | child->path_component_name, | ||
| 305 | strlen(child->path_component_name), | ||
| 306 | filp->f_pos, child->unique_id, DT_DIR) < 0) | ||
| 307 | goto out; | ||
| 308 | |||
| 309 | filp->f_pos++; | ||
| 310 | child = child->sibling; | ||
| 311 | } | ||
| 312 | 288 | ||
| 313 | /* Next, the properties as files. */ | 289 | /* First, the children nodes as directories. */ |
| 314 | prop = dp->properties; | 290 | child = dp->child; |
| 315 | while (i && prop) { | 291 | while (i && child) { |
| 316 | prop = prop->next; | 292 | child = child->sibling; |
| 317 | i--; | 293 | i--; |
| 318 | } | 294 | } |
| 319 | while (prop) { | 295 | while (child) { |
| 320 | if (filldir(dirent, prop->name, strlen(prop->name), | 296 | if (!dir_emit(ctx, |
| 321 | filp->f_pos, prop->unique_id, DT_REG) < 0) | 297 | child->path_component_name, |
| 322 | goto out; | 298 | strlen(child->path_component_name), |
| 299 | child->unique_id, DT_DIR)) | ||
| 300 | goto out; | ||
| 323 | 301 | ||
| 324 | filp->f_pos++; | 302 | ctx->pos++; |
| 325 | prop = prop->next; | 303 | child = child->sibling; |
| 326 | } | 304 | } |
| 305 | |||
| 306 | /* Next, the properties as files. */ | ||
| 307 | prop = dp->properties; | ||
| 308 | while (i && prop) { | ||
| 309 | prop = prop->next; | ||
| 310 | i--; | ||
| 327 | } | 311 | } |
| 312 | while (prop) { | ||
| 313 | if (!dir_emit(ctx, prop->name, strlen(prop->name), | ||
| 314 | prop->unique_id, DT_REG)) | ||
| 315 | goto out; | ||
| 316 | |||
| 317 | ctx->pos++; | ||
| 318 | prop = prop->next; | ||
| 319 | } | ||
| 320 | |||
| 328 | out: | 321 | out: |
| 329 | mutex_unlock(&op_mutex); | 322 | mutex_unlock(&op_mutex); |
| 330 | return 0; | 323 | return 0; |
diff --git a/fs/proc/base.c b/fs/proc/base.c index c3834dad09b3..0016350ad95e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -1681,11 +1681,11 @@ const struct dentry_operations pid_dentry_operations = | |||
| 1681 | * reported by readdir in sync with the inode numbers reported | 1681 | * reported by readdir in sync with the inode numbers reported |
| 1682 | * by stat. | 1682 | * by stat. |
| 1683 | */ | 1683 | */ |
| 1684 | int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | 1684 | bool proc_fill_cache(struct file *file, struct dir_context *ctx, |
| 1685 | const char *name, int len, | 1685 | const char *name, int len, |
| 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) | 1686 | instantiate_t instantiate, struct task_struct *task, const void *ptr) |
| 1687 | { | 1687 | { |
| 1688 | struct dentry *child, *dir = filp->f_path.dentry; | 1688 | struct dentry *child, *dir = file->f_path.dentry; |
| 1689 | struct inode *inode; | 1689 | struct inode *inode; |
| 1690 | struct qstr qname; | 1690 | struct qstr qname; |
| 1691 | ino_t ino = 0; | 1691 | ino_t ino = 0; |
| @@ -1720,7 +1720,7 @@ end_instantiate: | |||
| 1720 | ino = find_inode_number(dir, &qname); | 1720 | ino = find_inode_number(dir, &qname); |
| 1721 | if (!ino) | 1721 | if (!ino) |
| 1722 | ino = 1; | 1722 | ino = 1; |
| 1723 | return filldir(dirent, name, len, filp->f_pos, ino, type); | 1723 | return dir_emit(ctx, name, len, ino, type); |
| 1724 | } | 1724 | } |
| 1725 | 1725 | ||
| 1726 | #ifdef CONFIG_CHECKPOINT_RESTORE | 1726 | #ifdef CONFIG_CHECKPOINT_RESTORE |
| @@ -1931,14 +1931,15 @@ static const struct inode_operations proc_map_files_inode_operations = { | |||
| 1931 | }; | 1931 | }; |
| 1932 | 1932 | ||
| 1933 | static int | 1933 | static int |
| 1934 | proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | 1934 | proc_map_files_readdir(struct file *file, struct dir_context *ctx) |
| 1935 | { | 1935 | { |
| 1936 | struct dentry *dentry = filp->f_path.dentry; | ||
| 1937 | struct inode *inode = dentry->d_inode; | ||
| 1938 | struct vm_area_struct *vma; | 1936 | struct vm_area_struct *vma; |
| 1939 | struct task_struct *task; | 1937 | struct task_struct *task; |
| 1940 | struct mm_struct *mm; | 1938 | struct mm_struct *mm; |
| 1941 | ino_t ino; | 1939 | unsigned long nr_files, pos, i; |
| 1940 | struct flex_array *fa = NULL; | ||
| 1941 | struct map_files_info info; | ||
| 1942 | struct map_files_info *p; | ||
| 1942 | int ret; | 1943 | int ret; |
| 1943 | 1944 | ||
| 1944 | ret = -EPERM; | 1945 | ret = -EPERM; |
| @@ -1946,7 +1947,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1946 | goto out; | 1947 | goto out; |
| 1947 | 1948 | ||
| 1948 | ret = -ENOENT; | 1949 | ret = -ENOENT; |
| 1949 | task = get_proc_task(inode); | 1950 | task = get_proc_task(file_inode(file)); |
| 1950 | if (!task) | 1951 | if (!task) |
| 1951 | goto out; | 1952 | goto out; |
| 1952 | 1953 | ||
| @@ -1955,91 +1956,73 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 1955 | goto out_put_task; | 1956 | goto out_put_task; |
| 1956 | 1957 | ||
| 1957 | ret = 0; | 1958 | ret = 0; |
| 1958 | switch (filp->f_pos) { | 1959 | if (!dir_emit_dots(file, ctx)) |
| 1959 | case 0: | 1960 | goto out_put_task; |
| 1960 | ino = inode->i_ino; | ||
| 1961 | if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) | ||
| 1962 | goto out_put_task; | ||
| 1963 | filp->f_pos++; | ||
| 1964 | case 1: | ||
| 1965 | ino = parent_ino(dentry); | ||
| 1966 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
| 1967 | goto out_put_task; | ||
| 1968 | filp->f_pos++; | ||
| 1969 | default: | ||
| 1970 | { | ||
| 1971 | unsigned long nr_files, pos, i; | ||
| 1972 | struct flex_array *fa = NULL; | ||
| 1973 | struct map_files_info info; | ||
| 1974 | struct map_files_info *p; | ||
| 1975 | |||
| 1976 | mm = get_task_mm(task); | ||
| 1977 | if (!mm) | ||
| 1978 | goto out_put_task; | ||
| 1979 | down_read(&mm->mmap_sem); | ||
| 1980 | 1961 | ||
| 1981 | nr_files = 0; | 1962 | mm = get_task_mm(task); |
| 1963 | if (!mm) | ||
| 1964 | goto out_put_task; | ||
| 1965 | down_read(&mm->mmap_sem); | ||
| 1982 | 1966 | ||
| 1983 | /* | 1967 | nr_files = 0; |
| 1984 | * We need two passes here: | ||
| 1985 | * | ||
| 1986 | * 1) Collect vmas of mapped files with mmap_sem taken | ||
| 1987 | * 2) Release mmap_sem and instantiate entries | ||
| 1988 | * | ||
| 1989 | * otherwise we get lockdep complained, since filldir() | ||
| 1990 | * routine might require mmap_sem taken in might_fault(). | ||
| 1991 | */ | ||
| 1992 | 1968 | ||
| 1993 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { | 1969 | /* |
| 1994 | if (vma->vm_file && ++pos > filp->f_pos) | 1970 | * We need two passes here: |
| 1995 | nr_files++; | 1971 | * |
| 1996 | } | 1972 | * 1) Collect vmas of mapped files with mmap_sem taken |
| 1973 | * 2) Release mmap_sem and instantiate entries | ||
| 1974 | * | ||
| 1975 | * otherwise we get lockdep complained, since filldir() | ||
| 1976 | * routine might require mmap_sem taken in might_fault(). | ||
| 1977 | */ | ||
| 1997 | 1978 | ||
| 1998 | if (nr_files) { | 1979 | for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { |
| 1999 | fa = flex_array_alloc(sizeof(info), nr_files, | 1980 | if (vma->vm_file && ++pos > ctx->pos) |
| 2000 | GFP_KERNEL); | 1981 | nr_files++; |
| 2001 | if (!fa || flex_array_prealloc(fa, 0, nr_files, | 1982 | } |
| 2002 | GFP_KERNEL)) { | 1983 | |
| 2003 | ret = -ENOMEM; | 1984 | if (nr_files) { |
| 2004 | if (fa) | 1985 | fa = flex_array_alloc(sizeof(info), nr_files, |
| 2005 | flex_array_free(fa); | 1986 | GFP_KERNEL); |
| 2006 | up_read(&mm->mmap_sem); | 1987 | if (!fa || flex_array_prealloc(fa, 0, nr_files, |
| 2007 | mmput(mm); | 1988 | GFP_KERNEL)) { |
| 2008 | goto out_put_task; | 1989 | ret = -ENOMEM; |
| 2009 | } | 1990 | if (fa) |
| 2010 | for (i = 0, vma = mm->mmap, pos = 2; vma; | 1991 | flex_array_free(fa); |
| 2011 | vma = vma->vm_next) { | 1992 | up_read(&mm->mmap_sem); |
| 2012 | if (!vma->vm_file) | 1993 | mmput(mm); |
| 2013 | continue; | 1994 | goto out_put_task; |
| 2014 | if (++pos <= filp->f_pos) | ||
| 2015 | continue; | ||
| 2016 | |||
| 2017 | info.mode = vma->vm_file->f_mode; | ||
| 2018 | info.len = snprintf(info.name, | ||
| 2019 | sizeof(info.name), "%lx-%lx", | ||
| 2020 | vma->vm_start, vma->vm_end); | ||
| 2021 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) | ||
| 2022 | BUG(); | ||
| 2023 | } | ||
| 2024 | } | 1995 | } |
| 2025 | up_read(&mm->mmap_sem); | 1996 | for (i = 0, vma = mm->mmap, pos = 2; vma; |
| 2026 | 1997 | vma = vma->vm_next) { | |
| 2027 | for (i = 0; i < nr_files; i++) { | 1998 | if (!vma->vm_file) |
| 2028 | p = flex_array_get(fa, i); | 1999 | continue; |
| 2029 | ret = proc_fill_cache(filp, dirent, filldir, | 2000 | if (++pos <= ctx->pos) |
| 2030 | p->name, p->len, | 2001 | continue; |
| 2031 | proc_map_files_instantiate, | 2002 | |
| 2032 | task, | 2003 | info.mode = vma->vm_file->f_mode; |
| 2033 | (void *)(unsigned long)p->mode); | 2004 | info.len = snprintf(info.name, |
| 2034 | if (ret) | 2005 | sizeof(info.name), "%lx-%lx", |
| 2035 | break; | 2006 | vma->vm_start, vma->vm_end); |
| 2036 | filp->f_pos++; | 2007 | if (flex_array_put(fa, i++, &info, GFP_KERNEL)) |
| 2008 | BUG(); | ||
| 2037 | } | 2009 | } |
| 2038 | if (fa) | ||
| 2039 | flex_array_free(fa); | ||
| 2040 | mmput(mm); | ||
| 2041 | } | 2010 | } |
| 2011 | up_read(&mm->mmap_sem); | ||
| 2012 | |||
| 2013 | for (i = 0; i < nr_files; i++) { | ||
| 2014 | p = flex_array_get(fa, i); | ||
| 2015 | if (!proc_fill_cache(file, ctx, | ||
| 2016 | p->name, p->len, | ||
| 2017 | proc_map_files_instantiate, | ||
| 2018 | task, | ||
| 2019 | (void *)(unsigned long)p->mode)) | ||
| 2020 | break; | ||
| 2021 | ctx->pos++; | ||
| 2042 | } | 2022 | } |
| 2023 | if (fa) | ||
| 2024 | flex_array_free(fa); | ||
| 2025 | mmput(mm); | ||
| 2043 | 2026 | ||
| 2044 | out_put_task: | 2027 | out_put_task: |
| 2045 | put_task_struct(task); | 2028 | put_task_struct(task); |
| @@ -2049,7 +2032,7 @@ out: | |||
| 2049 | 2032 | ||
| 2050 | static const struct file_operations proc_map_files_operations = { | 2033 | static const struct file_operations proc_map_files_operations = { |
| 2051 | .read = generic_read_dir, | 2034 | .read = generic_read_dir, |
| 2052 | .readdir = proc_map_files_readdir, | 2035 | .iterate = proc_map_files_readdir, |
| 2053 | .llseek = default_llseek, | 2036 | .llseek = default_llseek, |
| 2054 | }; | 2037 | }; |
| 2055 | 2038 | ||
| @@ -2217,67 +2200,30 @@ out_no_task: | |||
| 2217 | return error; | 2200 | return error; |
| 2218 | } | 2201 | } |
| 2219 | 2202 | ||
| 2220 | static int proc_pident_fill_cache(struct file *filp, void *dirent, | 2203 | static int proc_pident_readdir(struct file *file, struct dir_context *ctx, |
| 2221 | filldir_t filldir, struct task_struct *task, const struct pid_entry *p) | ||
| 2222 | { | ||
| 2223 | return proc_fill_cache(filp, dirent, filldir, p->name, p->len, | ||
| 2224 | proc_pident_instantiate, task, p); | ||
| 2225 | } | ||
| 2226 | |||
| 2227 | static int proc_pident_readdir(struct file *filp, | ||
| 2228 | void *dirent, filldir_t filldir, | ||
| 2229 | const struct pid_entry *ents, unsigned int nents) | 2204 | const struct pid_entry *ents, unsigned int nents) |
| 2230 | { | 2205 | { |
| 2231 | int i; | 2206 | struct task_struct *task = get_proc_task(file_inode(file)); |
| 2232 | struct dentry *dentry = filp->f_path.dentry; | 2207 | const struct pid_entry *p; |
| 2233 | struct inode *inode = dentry->d_inode; | ||
| 2234 | struct task_struct *task = get_proc_task(inode); | ||
| 2235 | const struct pid_entry *p, *last; | ||
| 2236 | ino_t ino; | ||
| 2237 | int ret; | ||
| 2238 | 2208 | ||
| 2239 | ret = -ENOENT; | ||
| 2240 | if (!task) | 2209 | if (!task) |
| 2241 | goto out_no_task; | 2210 | return -ENOENT; |
| 2242 | 2211 | ||
| 2243 | ret = 0; | 2212 | if (!dir_emit_dots(file, ctx)) |
| 2244 | i = filp->f_pos; | 2213 | goto out; |
| 2245 | switch (i) { | 2214 | |
| 2246 | case 0: | 2215 | if (ctx->pos >= nents + 2) |
| 2247 | ino = inode->i_ino; | 2216 | goto out; |
| 2248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
| 2249 | goto out; | ||
| 2250 | i++; | ||
| 2251 | filp->f_pos++; | ||
| 2252 | /* fall through */ | ||
| 2253 | case 1: | ||
| 2254 | ino = parent_ino(dentry); | ||
| 2255 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | ||
| 2256 | goto out; | ||
| 2257 | i++; | ||
| 2258 | filp->f_pos++; | ||
| 2259 | /* fall through */ | ||
| 2260 | default: | ||
| 2261 | i -= 2; | ||
| 2262 | if (i >= nents) { | ||
| 2263 | ret = 1; | ||
| 2264 | goto out; | ||
| 2265 | } | ||
| 2266 | p = ents + i; | ||
| 2267 | last = &ents[nents - 1]; | ||
| 2268 | while (p <= last) { | ||
| 2269 | if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0) | ||
| 2270 | goto out; | ||
| 2271 | filp->f_pos++; | ||
| 2272 | p++; | ||
| 2273 | } | ||
| 2274 | } | ||
| 2275 | 2217 | ||
| 2276 | ret = 1; | 2218 | for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) { |
| 2219 | if (!proc_fill_cache(file, ctx, p->name, p->len, | ||
| 2220 | proc_pident_instantiate, task, p)) | ||
| 2221 | break; | ||
| 2222 | ctx->pos++; | ||
| 2223 | } | ||
| 2277 | out: | 2224 | out: |
| 2278 | put_task_struct(task); | 2225 | put_task_struct(task); |
| 2279 | out_no_task: | 2226 | return 0; |
| 2280 | return ret; | ||
| 2281 | } | 2227 | } |
| 2282 | 2228 | ||
| 2283 | #ifdef CONFIG_SECURITY | 2229 | #ifdef CONFIG_SECURITY |
| @@ -2362,16 +2308,15 @@ static const struct pid_entry attr_dir_stuff[] = { | |||
| 2362 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), | 2308 | REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations), |
| 2363 | }; | 2309 | }; |
| 2364 | 2310 | ||
| 2365 | static int proc_attr_dir_readdir(struct file * filp, | 2311 | static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx) |
| 2366 | void * dirent, filldir_t filldir) | ||
| 2367 | { | 2312 | { |
| 2368 | return proc_pident_readdir(filp,dirent,filldir, | 2313 | return proc_pident_readdir(file, ctx, |
| 2369 | attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff)); | 2314 | attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff)); |
| 2370 | } | 2315 | } |
| 2371 | 2316 | ||
| 2372 | static const struct file_operations proc_attr_dir_operations = { | 2317 | static const struct file_operations proc_attr_dir_operations = { |
| 2373 | .read = generic_read_dir, | 2318 | .read = generic_read_dir, |
| 2374 | .readdir = proc_attr_dir_readdir, | 2319 | .iterate = proc_attr_dir_readdir, |
| 2375 | .llseek = default_llseek, | 2320 | .llseek = default_llseek, |
| 2376 | }; | 2321 | }; |
| 2377 | 2322 | ||
| @@ -2725,16 +2670,15 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 2725 | #endif | 2670 | #endif |
| 2726 | }; | 2671 | }; |
| 2727 | 2672 | ||
| 2728 | static int proc_tgid_base_readdir(struct file * filp, | 2673 | static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx) |
| 2729 | void * dirent, filldir_t filldir) | ||
| 2730 | { | 2674 | { |
| 2731 | return proc_pident_readdir(filp,dirent,filldir, | 2675 | return proc_pident_readdir(file, ctx, |
| 2732 | tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff)); | 2676 | tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff)); |
| 2733 | } | 2677 | } |
| 2734 | 2678 | ||
| 2735 | static const struct file_operations proc_tgid_base_operations = { | 2679 | static const struct file_operations proc_tgid_base_operations = { |
| 2736 | .read = generic_read_dir, | 2680 | .read = generic_read_dir, |
| 2737 | .readdir = proc_tgid_base_readdir, | 2681 | .iterate = proc_tgid_base_readdir, |
| 2738 | .llseek = default_llseek, | 2682 | .llseek = default_llseek, |
| 2739 | }; | 2683 | }; |
| 2740 | 2684 | ||
| @@ -2936,58 +2880,42 @@ retry: | |||
| 2936 | 2880 | ||
| 2937 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) | 2881 | #define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1) |
| 2938 | 2882 | ||
| 2939 | static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
| 2940 | struct tgid_iter iter) | ||
| 2941 | { | ||
| 2942 | char name[PROC_NUMBUF]; | ||
| 2943 | int len = snprintf(name, sizeof(name), "%d", iter.tgid); | ||
| 2944 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
| 2945 | proc_pid_instantiate, iter.task, NULL); | ||
| 2946 | } | ||
| 2947 | |||
| 2948 | static int fake_filldir(void *buf, const char *name, int namelen, | ||
| 2949 | loff_t offset, u64 ino, unsigned d_type) | ||
| 2950 | { | ||
| 2951 | return 0; | ||
| 2952 | } | ||
| 2953 | |||
| 2954 | /* for the /proc/ directory itself, after non-process stuff has been done */ | 2883 | /* for the /proc/ directory itself, after non-process stuff has been done */ |
| 2955 | int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) | 2884 | int proc_pid_readdir(struct file *file, struct dir_context *ctx) |
| 2956 | { | 2885 | { |
| 2957 | struct tgid_iter iter; | 2886 | struct tgid_iter iter; |
| 2958 | struct pid_namespace *ns; | 2887 | struct pid_namespace *ns; |
| 2959 | filldir_t __filldir; | 2888 | loff_t pos = ctx->pos; |
| 2960 | loff_t pos = filp->f_pos; | ||
| 2961 | 2889 | ||
| 2962 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) | 2890 | if (pos >= PID_MAX_LIMIT + TGID_OFFSET) |
| 2963 | goto out; | 2891 | return 0; |
| 2964 | 2892 | ||
| 2965 | if (pos == TGID_OFFSET - 1) { | 2893 | if (pos == TGID_OFFSET - 1) { |
| 2966 | if (proc_fill_cache(filp, dirent, filldir, "self", 4, | 2894 | if (!proc_fill_cache(file, ctx, "self", 4, NULL, NULL, NULL)) |
| 2967 | NULL, NULL, NULL) < 0) | 2895 | return 0; |
| 2968 | goto out; | ||
| 2969 | iter.tgid = 0; | 2896 | iter.tgid = 0; |
| 2970 | } else { | 2897 | } else { |
| 2971 | iter.tgid = pos - TGID_OFFSET; | 2898 | iter.tgid = pos - TGID_OFFSET; |
| 2972 | } | 2899 | } |
| 2973 | iter.task = NULL; | 2900 | iter.task = NULL; |
| 2974 | ns = filp->f_dentry->d_sb->s_fs_info; | 2901 | ns = file->f_dentry->d_sb->s_fs_info; |
| 2975 | for (iter = next_tgid(ns, iter); | 2902 | for (iter = next_tgid(ns, iter); |
| 2976 | iter.task; | 2903 | iter.task; |
| 2977 | iter.tgid += 1, iter = next_tgid(ns, iter)) { | 2904 | iter.tgid += 1, iter = next_tgid(ns, iter)) { |
| 2978 | if (has_pid_permissions(ns, iter.task, 2)) | 2905 | char name[PROC_NUMBUF]; |
| 2979 | __filldir = filldir; | 2906 | int len; |
| 2980 | else | 2907 | if (!has_pid_permissions(ns, iter.task, 2)) |
| 2981 | __filldir = fake_filldir; | 2908 | continue; |
| 2982 | 2909 | ||
| 2983 | filp->f_pos = iter.tgid + TGID_OFFSET; | 2910 | len = snprintf(name, sizeof(name), "%d", iter.tgid); |
| 2984 | if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) { | 2911 | ctx->pos = iter.tgid + TGID_OFFSET; |
| 2912 | if (!proc_fill_cache(file, ctx, name, len, | ||
| 2913 | proc_pid_instantiate, iter.task, NULL)) { | ||
| 2985 | put_task_struct(iter.task); | 2914 | put_task_struct(iter.task); |
| 2986 | goto out; | 2915 | return 0; |
| 2987 | } | 2916 | } |
| 2988 | } | 2917 | } |
| 2989 | filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; | 2918 | ctx->pos = PID_MAX_LIMIT + TGID_OFFSET; |
| 2990 | out: | ||
| 2991 | return 0; | 2919 | return 0; |
| 2992 | } | 2920 | } |
| 2993 | 2921 | ||
| @@ -3075,11 +3003,10 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 3075 | #endif | 3003 | #endif |
| 3076 | }; | 3004 | }; |
| 3077 | 3005 | ||
| 3078 | static int proc_tid_base_readdir(struct file * filp, | 3006 | static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx) |
| 3079 | void * dirent, filldir_t filldir) | ||
| 3080 | { | 3007 | { |
| 3081 | return proc_pident_readdir(filp,dirent,filldir, | 3008 | return proc_pident_readdir(file, ctx, |
| 3082 | tid_base_stuff,ARRAY_SIZE(tid_base_stuff)); | 3009 | tid_base_stuff, ARRAY_SIZE(tid_base_stuff)); |
| 3083 | } | 3010 | } |
| 3084 | 3011 | ||
| 3085 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) | 3012 | static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) |
| @@ -3090,7 +3017,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den | |||
| 3090 | 3017 | ||
| 3091 | static const struct file_operations proc_tid_base_operations = { | 3018 | static const struct file_operations proc_tid_base_operations = { |
| 3092 | .read = generic_read_dir, | 3019 | .read = generic_read_dir, |
| 3093 | .readdir = proc_tid_base_readdir, | 3020 | .iterate = proc_tid_base_readdir, |
| 3094 | .llseek = default_llseek, | 3021 | .llseek = default_llseek, |
| 3095 | }; | 3022 | }; |
| 3096 | 3023 | ||
| @@ -3231,30 +3158,16 @@ static struct task_struct *next_tid(struct task_struct *start) | |||
| 3231 | return pos; | 3158 | return pos; |
| 3232 | } | 3159 | } |
| 3233 | 3160 | ||
| 3234 | static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir, | ||
| 3235 | struct task_struct *task, int tid) | ||
| 3236 | { | ||
| 3237 | char name[PROC_NUMBUF]; | ||
| 3238 | int len = snprintf(name, sizeof(name), "%d", tid); | ||
| 3239 | return proc_fill_cache(filp, dirent, filldir, name, len, | ||
| 3240 | proc_task_instantiate, task, NULL); | ||
| 3241 | } | ||
| 3242 | |||
| 3243 | /* for the /proc/TGID/task/ directories */ | 3161 | /* for the /proc/TGID/task/ directories */ |
| 3244 | static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir) | 3162 | static int proc_task_readdir(struct file *file, struct dir_context *ctx) |
| 3245 | { | 3163 | { |
| 3246 | struct dentry *dentry = filp->f_path.dentry; | ||
| 3247 | struct inode *inode = dentry->d_inode; | ||
| 3248 | struct task_struct *leader = NULL; | 3164 | struct task_struct *leader = NULL; |
| 3249 | struct task_struct *task; | 3165 | struct task_struct *task = get_proc_task(file_inode(file)); |
| 3250 | int retval = -ENOENT; | ||
| 3251 | ino_t ino; | ||
| 3252 | int tid; | ||
| 3253 | struct pid_namespace *ns; | 3166 | struct pid_namespace *ns; |
| 3167 | int tid; | ||
| 3254 | 3168 | ||
| 3255 | task = get_proc_task(inode); | ||
| 3256 | if (!task) | 3169 | if (!task) |
| 3257 | goto out_no_task; | 3170 | return -ENOENT; |
| 3258 | rcu_read_lock(); | 3171 | rcu_read_lock(); |
| 3259 | if (pid_alive(task)) { | 3172 | if (pid_alive(task)) { |
| 3260 | leader = task->group_leader; | 3173 | leader = task->group_leader; |
| @@ -3263,46 +3176,36 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi | |||
| 3263 | rcu_read_unlock(); | 3176 | rcu_read_unlock(); |
| 3264 | put_task_struct(task); | 3177 | put_task_struct(task); |
| 3265 | if (!leader) | 3178 | if (!leader) |
| 3266 | goto out_no_task; | 3179 | return -ENOENT; |
| 3267 | retval = 0; | ||
| 3268 | 3180 | ||
| 3269 | switch ((unsigned long)filp->f_pos) { | 3181 | if (!dir_emit_dots(file, ctx)) |
| 3270 | case 0: | 3182 | goto out; |
| 3271 | ino = inode->i_ino; | ||
| 3272 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0) | ||
| 3273 | goto out; | ||
| 3274 | filp->f_pos++; | ||
| 3275 | /* fall through */ | ||
| 3276 | case 1: | ||
| 3277 | ino = parent_ino(dentry); | ||
| 3278 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0) | ||
| 3279 | goto out; | ||
| 3280 | filp->f_pos++; | ||
| 3281 | /* fall through */ | ||
| 3282 | } | ||
| 3283 | 3183 | ||
| 3284 | /* f_version caches the tgid value that the last readdir call couldn't | 3184 | /* f_version caches the tgid value that the last readdir call couldn't |
| 3285 | * return. lseek aka telldir automagically resets f_version to 0. | 3185 | * return. lseek aka telldir automagically resets f_version to 0. |
| 3286 | */ | 3186 | */ |
| 3287 | ns = filp->f_dentry->d_sb->s_fs_info; | 3187 | ns = file->f_dentry->d_sb->s_fs_info; |
| 3288 | tid = (int)filp->f_version; | 3188 | tid = (int)file->f_version; |
| 3289 | filp->f_version = 0; | 3189 | file->f_version = 0; |
| 3290 | for (task = first_tid(leader, tid, filp->f_pos - 2, ns); | 3190 | for (task = first_tid(leader, tid, ctx->pos - 2, ns); |
| 3291 | task; | 3191 | task; |
| 3292 | task = next_tid(task), filp->f_pos++) { | 3192 | task = next_tid(task), ctx->pos++) { |
| 3193 | char name[PROC_NUMBUF]; | ||
| 3194 | int len; | ||
| 3293 | tid = task_pid_nr_ns(task, ns); | 3195 | tid = task_pid_nr_ns(task, ns); |
| 3294 | if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { | 3196 | len = snprintf(name, sizeof(name), "%d", tid); |
| 3197 | if (!proc_fill_cache(file, ctx, name, len, | ||
| 3198 | proc_task_instantiate, task, NULL)) { | ||
| 3295 | /* returning this tgid failed, save it as the first | 3199 | /* returning this tgid failed, save it as the first |
| 3296 | * pid for the next readir call */ | 3200 | * pid for the next readir call */ |
| 3297 | filp->f_version = (u64)tid; | 3201 | file->f_version = (u64)tid; |
| 3298 | put_task_struct(task); | 3202 | put_task_struct(task); |
| 3299 | break; | 3203 | break; |
| 3300 | } | 3204 | } |
| 3301 | } | 3205 | } |
| 3302 | out: | 3206 | out: |
| 3303 | put_task_struct(leader); | 3207 | put_task_struct(leader); |
| 3304 | out_no_task: | 3208 | return 0; |
| 3305 | return retval; | ||
| 3306 | } | 3209 | } |
| 3307 | 3210 | ||
| 3308 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 3211 | static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
| @@ -3328,6 +3231,6 @@ static const struct inode_operations proc_task_inode_operations = { | |||
| 3328 | 3231 | ||
| 3329 | static const struct file_operations proc_task_operations = { | 3232 | static const struct file_operations proc_task_operations = { |
| 3330 | .read = generic_read_dir, | 3233 | .read = generic_read_dir, |
| 3331 | .readdir = proc_task_readdir, | 3234 | .iterate = proc_task_readdir, |
| 3332 | .llseek = default_llseek, | 3235 | .llseek = default_llseek, |
| 3333 | }; | 3236 | }; |
diff --git a/fs/proc/fd.c b/fs/proc/fd.c index d7a4a28ef630..1441f143c43b 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c | |||
| @@ -219,74 +219,58 @@ out_no_task: | |||
| 219 | return result; | 219 | return result; |
| 220 | } | 220 | } |
| 221 | 221 | ||
| 222 | static int proc_readfd_common(struct file * filp, void * dirent, | 222 | static int proc_readfd_common(struct file *file, struct dir_context *ctx, |
| 223 | filldir_t filldir, instantiate_t instantiate) | 223 | instantiate_t instantiate) |
| 224 | { | 224 | { |
| 225 | struct dentry *dentry = filp->f_path.dentry; | 225 | struct task_struct *p = get_proc_task(file_inode(file)); |
| 226 | struct inode *inode = dentry->d_inode; | ||
| 227 | struct task_struct *p = get_proc_task(inode); | ||
| 228 | struct files_struct *files; | 226 | struct files_struct *files; |
| 229 | unsigned int fd, ino; | 227 | unsigned int fd; |
| 230 | int retval; | ||
| 231 | 228 | ||
| 232 | retval = -ENOENT; | ||
| 233 | if (!p) | 229 | if (!p) |
| 234 | goto out_no_task; | 230 | return -ENOENT; |
| 235 | retval = 0; | ||
| 236 | |||
| 237 | fd = filp->f_pos; | ||
| 238 | switch (fd) { | ||
| 239 | case 0: | ||
| 240 | if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0) | ||
| 241 | goto out; | ||
| 242 | filp->f_pos++; | ||
| 243 | case 1: | ||
| 244 | ino = parent_ino(dentry); | ||
| 245 | if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) | ||
| 246 | goto out; | ||
| 247 | filp->f_pos++; | ||
| 248 | default: | ||
| 249 | files = get_files_struct(p); | ||
| 250 | if (!files) | ||
| 251 | goto out; | ||
| 252 | rcu_read_lock(); | ||
| 253 | for (fd = filp->f_pos - 2; | ||
| 254 | fd < files_fdtable(files)->max_fds; | ||
| 255 | fd++, filp->f_pos++) { | ||
| 256 | char name[PROC_NUMBUF]; | ||
| 257 | int len; | ||
| 258 | int rv; | ||
| 259 | |||
| 260 | if (!fcheck_files(files, fd)) | ||
| 261 | continue; | ||
| 262 | rcu_read_unlock(); | ||
| 263 | 231 | ||
| 264 | len = snprintf(name, sizeof(name), "%d", fd); | 232 | if (!dir_emit_dots(file, ctx)) |
| 265 | rv = proc_fill_cache(filp, dirent, filldir, | 233 | goto out; |
| 266 | name, len, instantiate, p, | 234 | if (!dir_emit_dots(file, ctx)) |
| 267 | (void *)(unsigned long)fd); | 235 | goto out; |
| 268 | if (rv < 0) | 236 | files = get_files_struct(p); |
| 269 | goto out_fd_loop; | 237 | if (!files) |
| 270 | rcu_read_lock(); | 238 | goto out; |
| 271 | } | 239 | |
| 272 | rcu_read_unlock(); | 240 | rcu_read_lock(); |
| 273 | out_fd_loop: | 241 | for (fd = ctx->pos - 2; |
| 274 | put_files_struct(files); | 242 | fd < files_fdtable(files)->max_fds; |
| 243 | fd++, ctx->pos++) { | ||
| 244 | char name[PROC_NUMBUF]; | ||
| 245 | int len; | ||
| 246 | |||
| 247 | if (!fcheck_files(files, fd)) | ||
| 248 | continue; | ||
| 249 | rcu_read_unlock(); | ||
| 250 | |||
| 251 | len = snprintf(name, sizeof(name), "%d", fd); | ||
| 252 | if (!proc_fill_cache(file, ctx, | ||
| 253 | name, len, instantiate, p, | ||
| 254 | (void *)(unsigned long)fd)) | ||
| 255 | goto out_fd_loop; | ||
| 256 | rcu_read_lock(); | ||
| 275 | } | 257 | } |
| 258 | rcu_read_unlock(); | ||
| 259 | out_fd_loop: | ||
| 260 | put_files_struct(files); | ||
| 276 | out: | 261 | out: |
| 277 | put_task_struct(p); | 262 | put_task_struct(p); |
| 278 | out_no_task: | 263 | return 0; |
| 279 | return retval; | ||
| 280 | } | 264 | } |
| 281 | 265 | ||
| 282 | static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir) | 266 | static int proc_readfd(struct file *file, struct dir_context *ctx) |
| 283 | { | 267 | { |
| 284 | return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate); | 268 | return proc_readfd_common(file, ctx, proc_fd_instantiate); |
| 285 | } | 269 | } |
| 286 | 270 | ||
| 287 | const struct file_operations proc_fd_operations = { | 271 | const struct file_operations proc_fd_operations = { |
| 288 | .read = generic_read_dir, | 272 | .read = generic_read_dir, |
| 289 | .readdir = proc_readfd, | 273 | .iterate = proc_readfd, |
| 290 | .llseek = default_llseek, | 274 | .llseek = default_llseek, |
| 291 | }; | 275 | }; |
| 292 | 276 | ||
| @@ -351,9 +335,9 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags) | |||
| 351 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); | 335 | return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate); |
| 352 | } | 336 | } |
| 353 | 337 | ||
| 354 | static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir) | 338 | static int proc_readfdinfo(struct file *file, struct dir_context *ctx) |
| 355 | { | 339 | { |
| 356 | return proc_readfd_common(filp, dirent, filldir, | 340 | return proc_readfd_common(file, ctx, |
| 357 | proc_fdinfo_instantiate); | 341 | proc_fdinfo_instantiate); |
| 358 | } | 342 | } |
| 359 | 343 | ||
| @@ -364,6 +348,6 @@ const struct inode_operations proc_fdinfo_inode_operations = { | |||
| 364 | 348 | ||
| 365 | const struct file_operations proc_fdinfo_operations = { | 349 | const struct file_operations proc_fdinfo_operations = { |
| 366 | .read = generic_read_dir, | 350 | .read = generic_read_dir, |
| 367 | .readdir = proc_readfdinfo, | 351 | .iterate = proc_readfdinfo, |
| 368 | .llseek = default_llseek, | 352 | .llseek = default_llseek, |
| 369 | }; | 353 | }; |
diff --git a/fs/proc/generic.c b/fs/proc/generic.c index a2596afffae6..94441a407337 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c | |||
| @@ -233,76 +233,52 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, | |||
| 233 | * value of the readdir() call, as long as it's non-negative | 233 | * value of the readdir() call, as long as it's non-negative |
| 234 | * for success.. | 234 | * for success.. |
| 235 | */ | 235 | */ |
| 236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent, | 236 | int proc_readdir_de(struct proc_dir_entry *de, struct file *file, |
| 237 | filldir_t filldir) | 237 | struct dir_context *ctx) |
| 238 | { | 238 | { |
| 239 | unsigned int ino; | ||
| 240 | int i; | 239 | int i; |
| 241 | struct inode *inode = file_inode(filp); | ||
| 242 | int ret = 0; | ||
| 243 | |||
| 244 | ino = inode->i_ino; | ||
| 245 | i = filp->f_pos; | ||
| 246 | switch (i) { | ||
| 247 | case 0: | ||
| 248 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | ||
| 249 | goto out; | ||
| 250 | i++; | ||
| 251 | filp->f_pos++; | ||
| 252 | /* fall through */ | ||
| 253 | case 1: | ||
| 254 | if (filldir(dirent, "..", 2, i, | ||
| 255 | parent_ino(filp->f_path.dentry), | ||
| 256 | DT_DIR) < 0) | ||
| 257 | goto out; | ||
| 258 | i++; | ||
| 259 | filp->f_pos++; | ||
| 260 | /* fall through */ | ||
| 261 | default: | ||
| 262 | spin_lock(&proc_subdir_lock); | ||
| 263 | de = de->subdir; | ||
| 264 | i -= 2; | ||
| 265 | for (;;) { | ||
| 266 | if (!de) { | ||
| 267 | ret = 1; | ||
| 268 | spin_unlock(&proc_subdir_lock); | ||
| 269 | goto out; | ||
| 270 | } | ||
| 271 | if (!i) | ||
| 272 | break; | ||
| 273 | de = de->next; | ||
| 274 | i--; | ||
| 275 | } | ||
| 276 | 240 | ||
| 277 | do { | 241 | if (!dir_emit_dots(file, ctx)) |
| 278 | struct proc_dir_entry *next; | 242 | return 0; |
| 279 | 243 | ||
| 280 | /* filldir passes info to user space */ | 244 | spin_lock(&proc_subdir_lock); |
| 281 | pde_get(de); | 245 | de = de->subdir; |
| 282 | spin_unlock(&proc_subdir_lock); | 246 | i = ctx->pos - 2; |
| 283 | if (filldir(dirent, de->name, de->namelen, filp->f_pos, | 247 | for (;;) { |
| 284 | de->low_ino, de->mode >> 12) < 0) { | 248 | if (!de) { |
| 285 | pde_put(de); | ||
| 286 | goto out; | ||
| 287 | } | ||
| 288 | spin_lock(&proc_subdir_lock); | ||
| 289 | filp->f_pos++; | ||
| 290 | next = de->next; | ||
| 291 | pde_put(de); | ||
| 292 | de = next; | ||
| 293 | } while (de); | ||
| 294 | spin_unlock(&proc_subdir_lock); | 249 | spin_unlock(&proc_subdir_lock); |
| 250 | return 0; | ||
| 251 | } | ||
| 252 | if (!i) | ||
| 253 | break; | ||
| 254 | de = de->next; | ||
| 255 | i--; | ||
| 295 | } | 256 | } |
| 296 | ret = 1; | 257 | |
| 297 | out: | 258 | do { |
| 298 | return ret; | 259 | struct proc_dir_entry *next; |
| 260 | pde_get(de); | ||
| 261 | spin_unlock(&proc_subdir_lock); | ||
| 262 | if (!dir_emit(ctx, de->name, de->namelen, | ||
| 263 | de->low_ino, de->mode >> 12)) { | ||
| 264 | pde_put(de); | ||
| 265 | return 0; | ||
| 266 | } | ||
| 267 | spin_lock(&proc_subdir_lock); | ||
| 268 | ctx->pos++; | ||
| 269 | next = de->next; | ||
| 270 | pde_put(de); | ||
| 271 | de = next; | ||
| 272 | } while (de); | ||
| 273 | spin_unlock(&proc_subdir_lock); | ||
| 274 | return 0; | ||
| 299 | } | 275 | } |
| 300 | 276 | ||
| 301 | int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | 277 | int proc_readdir(struct file *file, struct dir_context *ctx) |
| 302 | { | 278 | { |
| 303 | struct inode *inode = file_inode(filp); | 279 | struct inode *inode = file_inode(file); |
| 304 | 280 | ||
| 305 | return proc_readdir_de(PDE(inode), filp, dirent, filldir); | 281 | return proc_readdir_de(PDE(inode), file, ctx); |
| 306 | } | 282 | } |
| 307 | 283 | ||
| 308 | /* | 284 | /* |
| @@ -313,7 +289,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 313 | static const struct file_operations proc_dir_operations = { | 289 | static const struct file_operations proc_dir_operations = { |
| 314 | .llseek = generic_file_llseek, | 290 | .llseek = generic_file_llseek, |
| 315 | .read = generic_read_dir, | 291 | .read = generic_read_dir, |
| 316 | .readdir = proc_readdir, | 292 | .iterate = proc_readdir, |
| 317 | }; | 293 | }; |
| 318 | 294 | ||
| 319 | /* | 295 | /* |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index d600fb098b6a..4eae2e149f31 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -165,14 +165,14 @@ extern int proc_setattr(struct dentry *, struct iattr *); | |||
| 165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); | 165 | extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *); |
| 166 | extern int pid_revalidate(struct dentry *, unsigned int); | 166 | extern int pid_revalidate(struct dentry *, unsigned int); |
| 167 | extern int pid_delete_dentry(const struct dentry *); | 167 | extern int pid_delete_dentry(const struct dentry *); |
| 168 | extern int proc_pid_readdir(struct file *, void *, filldir_t); | 168 | extern int proc_pid_readdir(struct file *, struct dir_context *); |
| 169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); | 169 | extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int); |
| 170 | extern loff_t mem_lseek(struct file *, loff_t, int); | 170 | extern loff_t mem_lseek(struct file *, loff_t, int); |
| 171 | 171 | ||
| 172 | /* Lookups */ | 172 | /* Lookups */ |
| 173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, | 173 | typedef struct dentry *instantiate_t(struct inode *, struct dentry *, |
| 174 | struct task_struct *, const void *); | 174 | struct task_struct *, const void *); |
| 175 | extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int, | 175 | extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int, |
| 176 | instantiate_t, struct task_struct *, const void *); | 176 | instantiate_t, struct task_struct *, const void *); |
| 177 | 177 | ||
| 178 | /* | 178 | /* |
| @@ -183,8 +183,8 @@ extern spinlock_t proc_subdir_lock; | |||
| 183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); | 183 | extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); |
| 184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, | 184 | extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, |
| 185 | struct dentry *); | 185 | struct dentry *); |
| 186 | extern int proc_readdir(struct file *, void *, filldir_t); | 186 | extern int proc_readdir(struct file *, struct dir_context *); |
| 187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t); | 187 | extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); |
| 188 | 188 | ||
| 189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) | 189 | static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) |
| 190 | { | 190 | { |
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index 54bdc6701e9f..f6abbbbfad8a 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c | |||
| @@ -213,74 +213,36 @@ out: | |||
| 213 | return error; | 213 | return error; |
| 214 | } | 214 | } |
| 215 | 215 | ||
| 216 | static int proc_ns_fill_cache(struct file *filp, void *dirent, | 216 | static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx) |
| 217 | filldir_t filldir, struct task_struct *task, | ||
| 218 | const struct proc_ns_operations *ops) | ||
| 219 | { | 217 | { |
| 220 | return proc_fill_cache(filp, dirent, filldir, | 218 | struct task_struct *task = get_proc_task(file_inode(file)); |
| 221 | ops->name, strlen(ops->name), | ||
| 222 | proc_ns_instantiate, task, ops); | ||
| 223 | } | ||
| 224 | |||
| 225 | static int proc_ns_dir_readdir(struct file *filp, void *dirent, | ||
| 226 | filldir_t filldir) | ||
| 227 | { | ||
| 228 | int i; | ||
| 229 | struct dentry *dentry = filp->f_path.dentry; | ||
| 230 | struct inode *inode = dentry->d_inode; | ||
| 231 | struct task_struct *task = get_proc_task(inode); | ||
| 232 | const struct proc_ns_operations **entry, **last; | 219 | const struct proc_ns_operations **entry, **last; |
| 233 | ino_t ino; | ||
| 234 | int ret; | ||
| 235 | 220 | ||
| 236 | ret = -ENOENT; | ||
| 237 | if (!task) | 221 | if (!task) |
| 238 | goto out_no_task; | 222 | return -ENOENT; |
| 239 | 223 | ||
| 240 | ret = 0; | 224 | if (!dir_emit_dots(file, ctx)) |
| 241 | i = filp->f_pos; | 225 | goto out; |
| 242 | switch (i) { | 226 | if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries)) |
| 243 | case 0: | 227 | goto out; |
| 244 | ino = inode->i_ino; | 228 | entry = ns_entries + (ctx->pos - 2); |
| 245 | if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0) | 229 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; |
| 246 | goto out; | 230 | while (entry <= last) { |
| 247 | i++; | 231 | const struct proc_ns_operations *ops = *entry; |
| 248 | filp->f_pos++; | 232 | if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name), |
| 249 | /* fall through */ | 233 | proc_ns_instantiate, task, ops)) |
| 250 | case 1: | 234 | break; |
| 251 | ino = parent_ino(dentry); | 235 | ctx->pos++; |
| 252 | if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0) | 236 | entry++; |
| 253 | goto out; | ||
| 254 | i++; | ||
| 255 | filp->f_pos++; | ||
| 256 | /* fall through */ | ||
| 257 | default: | ||
| 258 | i -= 2; | ||
| 259 | if (i >= ARRAY_SIZE(ns_entries)) { | ||
| 260 | ret = 1; | ||
| 261 | goto out; | ||
| 262 | } | ||
| 263 | entry = ns_entries + i; | ||
| 264 | last = &ns_entries[ARRAY_SIZE(ns_entries) - 1]; | ||
| 265 | while (entry <= last) { | ||
| 266 | if (proc_ns_fill_cache(filp, dirent, filldir, | ||
| 267 | task, *entry) < 0) | ||
| 268 | goto out; | ||
| 269 | filp->f_pos++; | ||
| 270 | entry++; | ||
| 271 | } | ||
| 272 | } | 237 | } |
| 273 | |||
| 274 | ret = 1; | ||
| 275 | out: | 238 | out: |
| 276 | put_task_struct(task); | 239 | put_task_struct(task); |
| 277 | out_no_task: | 240 | return 0; |
| 278 | return ret; | ||
| 279 | } | 241 | } |
| 280 | 242 | ||
| 281 | const struct file_operations proc_ns_dir_operations = { | 243 | const struct file_operations proc_ns_dir_operations = { |
| 282 | .read = generic_read_dir, | 244 | .read = generic_read_dir, |
| 283 | .readdir = proc_ns_dir_readdir, | 245 | .iterate = proc_ns_dir_readdir, |
| 284 | }; | 246 | }; |
| 285 | 247 | ||
| 286 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, | 248 | static struct dentry *proc_ns_dir_lookup(struct inode *dir, |
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 986e83220d56..4677bb7dc7c2 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c | |||
| @@ -160,16 +160,15 @@ const struct inode_operations proc_net_inode_operations = { | |||
| 160 | .getattr = proc_tgid_net_getattr, | 160 | .getattr = proc_tgid_net_getattr, |
| 161 | }; | 161 | }; |
| 162 | 162 | ||
| 163 | static int proc_tgid_net_readdir(struct file *filp, void *dirent, | 163 | static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) |
| 164 | filldir_t filldir) | ||
| 165 | { | 164 | { |
| 166 | int ret; | 165 | int ret; |
| 167 | struct net *net; | 166 | struct net *net; |
| 168 | 167 | ||
| 169 | ret = -EINVAL; | 168 | ret = -EINVAL; |
| 170 | net = get_proc_task_net(file_inode(filp)); | 169 | net = get_proc_task_net(file_inode(file)); |
| 171 | if (net != NULL) { | 170 | if (net != NULL) { |
| 172 | ret = proc_readdir_de(net->proc_net, filp, dirent, filldir); | 171 | ret = proc_readdir_de(net->proc_net, file, ctx); |
| 173 | put_net(net); | 172 | put_net(net); |
| 174 | } | 173 | } |
| 175 | return ret; | 174 | return ret; |
| @@ -178,7 +177,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent, | |||
| 178 | const struct file_operations proc_net_operations = { | 177 | const struct file_operations proc_net_operations = { |
| 179 | .llseek = generic_file_llseek, | 178 | .llseek = generic_file_llseek, |
| 180 | .read = generic_read_dir, | 179 | .read = generic_read_dir, |
| 181 | .readdir = proc_tgid_net_readdir, | 180 | .iterate = proc_tgid_net_readdir, |
| 182 | }; | 181 | }; |
| 183 | 182 | ||
| 184 | static __net_init int proc_net_ns_init(struct net *net) | 183 | static __net_init int proc_net_ns_init(struct net *net) |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index ac05f33a0dde..f3a570e7c257 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
| @@ -573,12 +573,12 @@ out: | |||
| 573 | return ret; | 573 | return ret; |
| 574 | } | 574 | } |
| 575 | 575 | ||
| 576 | static int proc_sys_fill_cache(struct file *filp, void *dirent, | 576 | static bool proc_sys_fill_cache(struct file *file, |
| 577 | filldir_t filldir, | 577 | struct dir_context *ctx, |
| 578 | struct ctl_table_header *head, | 578 | struct ctl_table_header *head, |
| 579 | struct ctl_table *table) | 579 | struct ctl_table *table) |
| 580 | { | 580 | { |
| 581 | struct dentry *child, *dir = filp->f_path.dentry; | 581 | struct dentry *child, *dir = file->f_path.dentry; |
| 582 | struct inode *inode; | 582 | struct inode *inode; |
| 583 | struct qstr qname; | 583 | struct qstr qname; |
| 584 | ino_t ino = 0; | 584 | ino_t ino = 0; |
| @@ -595,38 +595,38 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent, | |||
| 595 | inode = proc_sys_make_inode(dir->d_sb, head, table); | 595 | inode = proc_sys_make_inode(dir->d_sb, head, table); |
| 596 | if (!inode) { | 596 | if (!inode) { |
| 597 | dput(child); | 597 | dput(child); |
| 598 | return -ENOMEM; | 598 | return false; |
| 599 | } else { | 599 | } else { |
| 600 | d_set_d_op(child, &proc_sys_dentry_operations); | 600 | d_set_d_op(child, &proc_sys_dentry_operations); |
| 601 | d_add(child, inode); | 601 | d_add(child, inode); |
| 602 | } | 602 | } |
| 603 | } else { | 603 | } else { |
| 604 | return -ENOMEM; | 604 | return false; |
| 605 | } | 605 | } |
| 606 | } | 606 | } |
| 607 | inode = child->d_inode; | 607 | inode = child->d_inode; |
| 608 | ino = inode->i_ino; | 608 | ino = inode->i_ino; |
| 609 | type = inode->i_mode >> 12; | 609 | type = inode->i_mode >> 12; |
| 610 | dput(child); | 610 | dput(child); |
| 611 | return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type); | 611 | return dir_emit(ctx, qname.name, qname.len, ino, type); |
| 612 | } | 612 | } |
| 613 | 613 | ||
| 614 | static int proc_sys_link_fill_cache(struct file *filp, void *dirent, | 614 | static bool proc_sys_link_fill_cache(struct file *file, |
| 615 | filldir_t filldir, | 615 | struct dir_context *ctx, |
| 616 | struct ctl_table_header *head, | 616 | struct ctl_table_header *head, |
| 617 | struct ctl_table *table) | 617 | struct ctl_table *table) |
| 618 | { | 618 | { |
| 619 | int err, ret = 0; | 619 | bool ret = true; |
| 620 | head = sysctl_head_grab(head); | 620 | head = sysctl_head_grab(head); |
| 621 | 621 | ||
| 622 | if (S_ISLNK(table->mode)) { | 622 | if (S_ISLNK(table->mode)) { |
| 623 | /* It is not an error if we can not follow the link ignore it */ | 623 | /* It is not an error if we can not follow the link ignore it */ |
| 624 | err = sysctl_follow_link(&head, &table, current->nsproxy); | 624 | int err = sysctl_follow_link(&head, &table, current->nsproxy); |
| 625 | if (err) | 625 | if (err) |
| 626 | goto out; | 626 | goto out; |
| 627 | } | 627 | } |
| 628 | 628 | ||
| 629 | ret = proc_sys_fill_cache(filp, dirent, filldir, head, table); | 629 | ret = proc_sys_fill_cache(file, ctx, head, table); |
| 630 | out: | 630 | out: |
| 631 | sysctl_head_finish(head); | 631 | sysctl_head_finish(head); |
| 632 | return ret; | 632 | return ret; |
| @@ -634,67 +634,50 @@ out: | |||
| 634 | 634 | ||
| 635 | static int scan(struct ctl_table_header *head, ctl_table *table, | 635 | static int scan(struct ctl_table_header *head, ctl_table *table, |
| 636 | unsigned long *pos, struct file *file, | 636 | unsigned long *pos, struct file *file, |
| 637 | void *dirent, filldir_t filldir) | 637 | struct dir_context *ctx) |
| 638 | { | 638 | { |
| 639 | int res; | 639 | bool res; |
| 640 | 640 | ||
| 641 | if ((*pos)++ < file->f_pos) | 641 | if ((*pos)++ < ctx->pos) |
| 642 | return 0; | 642 | return true; |
| 643 | 643 | ||
| 644 | if (unlikely(S_ISLNK(table->mode))) | 644 | if (unlikely(S_ISLNK(table->mode))) |
| 645 | res = proc_sys_link_fill_cache(file, dirent, filldir, head, table); | 645 | res = proc_sys_link_fill_cache(file, ctx, head, table); |
| 646 | else | 646 | else |
| 647 | res = proc_sys_fill_cache(file, dirent, filldir, head, table); | 647 | res = proc_sys_fill_cache(file, ctx, head, table); |
| 648 | 648 | ||
| 649 | if (res == 0) | 649 | if (res) |
| 650 | file->f_pos = *pos; | 650 | ctx->pos = *pos; |
| 651 | 651 | ||
| 652 | return res; | 652 | return res; |
| 653 | } | 653 | } |
| 654 | 654 | ||
| 655 | static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir) | 655 | static int proc_sys_readdir(struct file *file, struct dir_context *ctx) |
| 656 | { | 656 | { |
| 657 | struct dentry *dentry = filp->f_path.dentry; | 657 | struct ctl_table_header *head = grab_header(file_inode(file)); |
| 658 | struct inode *inode = dentry->d_inode; | ||
| 659 | struct ctl_table_header *head = grab_header(inode); | ||
| 660 | struct ctl_table_header *h = NULL; | 658 | struct ctl_table_header *h = NULL; |
| 661 | struct ctl_table *entry; | 659 | struct ctl_table *entry; |
| 662 | struct ctl_dir *ctl_dir; | 660 | struct ctl_dir *ctl_dir; |
| 663 | unsigned long pos; | 661 | unsigned long pos; |
| 664 | int ret = -EINVAL; | ||
| 665 | 662 | ||
| 666 | if (IS_ERR(head)) | 663 | if (IS_ERR(head)) |
| 667 | return PTR_ERR(head); | 664 | return PTR_ERR(head); |
| 668 | 665 | ||
| 669 | ctl_dir = container_of(head, struct ctl_dir, header); | 666 | ctl_dir = container_of(head, struct ctl_dir, header); |
| 670 | 667 | ||
| 671 | ret = 0; | 668 | if (!dir_emit_dots(file, ctx)) |
| 672 | /* Avoid a switch here: arm builds fail with missing __cmpdi2 */ | 669 | return 0; |
| 673 | if (filp->f_pos == 0) { | 670 | |
| 674 | if (filldir(dirent, ".", 1, filp->f_pos, | ||
| 675 | inode->i_ino, DT_DIR) < 0) | ||
| 676 | goto out; | ||
| 677 | filp->f_pos++; | ||
| 678 | } | ||
| 679 | if (filp->f_pos == 1) { | ||
| 680 | if (filldir(dirent, "..", 2, filp->f_pos, | ||
| 681 | parent_ino(dentry), DT_DIR) < 0) | ||
| 682 | goto out; | ||
| 683 | filp->f_pos++; | ||
| 684 | } | ||
| 685 | pos = 2; | 671 | pos = 2; |
| 686 | 672 | ||
| 687 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { | 673 | for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) { |
| 688 | ret = scan(h, entry, &pos, filp, dirent, filldir); | 674 | if (!scan(h, entry, &pos, file, ctx)) { |
| 689 | if (ret) { | ||
| 690 | sysctl_head_finish(h); | 675 | sysctl_head_finish(h); |
| 691 | break; | 676 | break; |
| 692 | } | 677 | } |
| 693 | } | 678 | } |
| 694 | ret = 1; | ||
| 695 | out: | ||
| 696 | sysctl_head_finish(head); | 679 | sysctl_head_finish(head); |
| 697 | return ret; | 680 | return 0; |
| 698 | } | 681 | } |
| 699 | 682 | ||
| 700 | static int proc_sys_permission(struct inode *inode, int mask) | 683 | static int proc_sys_permission(struct inode *inode, int mask) |
| @@ -769,7 +752,7 @@ static const struct file_operations proc_sys_file_operations = { | |||
| 769 | 752 | ||
| 770 | static const struct file_operations proc_sys_dir_file_operations = { | 753 | static const struct file_operations proc_sys_dir_file_operations = { |
| 771 | .read = generic_read_dir, | 754 | .read = generic_read_dir, |
| 772 | .readdir = proc_sys_readdir, | 755 | .iterate = proc_sys_readdir, |
| 773 | .llseek = generic_file_llseek, | 756 | .llseek = generic_file_llseek, |
| 774 | }; | 757 | }; |
| 775 | 758 | ||
diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..229e366598da 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c | |||
| @@ -202,21 +202,14 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr | |||
| 202 | return proc_pid_lookup(dir, dentry, flags); | 202 | return proc_pid_lookup(dir, dentry, flags); |
| 203 | } | 203 | } |
| 204 | 204 | ||
| 205 | static int proc_root_readdir(struct file * filp, | 205 | static int proc_root_readdir(struct file *file, struct dir_context *ctx) |
| 206 | void * dirent, filldir_t filldir) | ||
| 207 | { | 206 | { |
| 208 | unsigned int nr = filp->f_pos; | 207 | if (ctx->pos < FIRST_PROCESS_ENTRY) { |
| 209 | int ret; | 208 | proc_readdir(file, ctx); |
| 210 | 209 | ctx->pos = FIRST_PROCESS_ENTRY; | |
| 211 | if (nr < FIRST_PROCESS_ENTRY) { | ||
| 212 | int error = proc_readdir(filp, dirent, filldir); | ||
| 213 | if (error <= 0) | ||
| 214 | return error; | ||
| 215 | filp->f_pos = FIRST_PROCESS_ENTRY; | ||
| 216 | } | 210 | } |
| 217 | 211 | ||
| 218 | ret = proc_pid_readdir(filp, dirent, filldir); | 212 | return proc_pid_readdir(file, ctx); |
| 219 | return ret; | ||
| 220 | } | 213 | } |
| 221 | 214 | ||
| 222 | /* | 215 | /* |
| @@ -226,7 +219,7 @@ static int proc_root_readdir(struct file * filp, | |||
| 226 | */ | 219 | */ |
| 227 | static const struct file_operations proc_root_operations = { | 220 | static const struct file_operations proc_root_operations = { |
| 228 | .read = generic_read_dir, | 221 | .read = generic_read_dir, |
| 229 | .readdir = proc_root_readdir, | 222 | .iterate = proc_root_readdir, |
| 230 | .llseek = default_llseek, | 223 | .llseek = default_llseek, |
| 231 | }; | 224 | }; |
| 232 | 225 | ||
diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 28ce014b3cef..b218f965817b 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c | |||
| @@ -14,9 +14,9 @@ | |||
| 14 | #include <linux/buffer_head.h> | 14 | #include <linux/buffer_head.h> |
| 15 | #include "qnx4.h" | 15 | #include "qnx4.h" |
| 16 | 16 | ||
| 17 | static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | 17 | static int qnx4_readdir(struct file *file, struct dir_context *ctx) |
| 18 | { | 18 | { |
| 19 | struct inode *inode = file_inode(filp); | 19 | struct inode *inode = file_inode(file); |
| 20 | unsigned int offset; | 20 | unsigned int offset; |
| 21 | struct buffer_head *bh; | 21 | struct buffer_head *bh; |
| 22 | struct qnx4_inode_entry *de; | 22 | struct qnx4_inode_entry *de; |
| @@ -26,48 +26,44 @@ static int qnx4_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 26 | int size; | 26 | int size; |
| 27 | 27 | ||
| 28 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); | 28 | QNX4DEBUG((KERN_INFO "qnx4_readdir:i_size = %ld\n", (long) inode->i_size)); |
| 29 | QNX4DEBUG((KERN_INFO "filp->f_pos = %ld\n", (long) filp->f_pos)); | 29 | QNX4DEBUG((KERN_INFO "pos = %ld\n", (long) ctx->pos)); |
| 30 | 30 | ||
| 31 | while (filp->f_pos < inode->i_size) { | 31 | while (ctx->pos < inode->i_size) { |
| 32 | blknum = qnx4_block_map( inode, filp->f_pos >> QNX4_BLOCK_SIZE_BITS ); | 32 | blknum = qnx4_block_map(inode, ctx->pos >> QNX4_BLOCK_SIZE_BITS); |
| 33 | bh = sb_bread(inode->i_sb, blknum); | 33 | bh = sb_bread(inode->i_sb, blknum); |
| 34 | if(bh==NULL) { | 34 | if (bh == NULL) { |
| 35 | printk(KERN_ERR "qnx4_readdir: bread failed (%ld)\n", blknum); | 35 | printk(KERN_ERR "qnx4_readdir: bread failed (%ld)\n", blknum); |
| 36 | break; | 36 | return 0; |
| 37 | } | 37 | } |
| 38 | ix = (int)(filp->f_pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; | 38 | ix = (ctx->pos >> QNX4_DIR_ENTRY_SIZE_BITS) % QNX4_INODES_PER_BLOCK; |
| 39 | while (ix < QNX4_INODES_PER_BLOCK) { | 39 | for (; ix < QNX4_INODES_PER_BLOCK; ix++, ctx->pos += QNX4_DIR_ENTRY_SIZE) { |
| 40 | offset = ix * QNX4_DIR_ENTRY_SIZE; | 40 | offset = ix * QNX4_DIR_ENTRY_SIZE; |
| 41 | de = (struct qnx4_inode_entry *) (bh->b_data + offset); | 41 | de = (struct qnx4_inode_entry *) (bh->b_data + offset); |
| 42 | size = strlen(de->di_fname); | 42 | if (!de->di_fname[0]) |
| 43 | if (size) { | 43 | continue; |
| 44 | if ( !( de->di_status & QNX4_FILE_LINK ) && size > QNX4_SHORT_NAME_MAX ) | 44 | if (!(de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK))) |
| 45 | size = QNX4_SHORT_NAME_MAX; | 45 | continue; |
| 46 | else if ( size > QNX4_NAME_MAX ) | 46 | if (!(de->di_status & QNX4_FILE_LINK)) |
| 47 | size = QNX4_NAME_MAX; | 47 | size = QNX4_SHORT_NAME_MAX; |
| 48 | 48 | else | |
| 49 | if ( ( de->di_status & (QNX4_FILE_USED|QNX4_FILE_LINK) ) != 0 ) { | 49 | size = QNX4_NAME_MAX; |
| 50 | QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); | 50 | size = strnlen(de->di_fname, size); |
| 51 | if ( ( de->di_status & QNX4_FILE_LINK ) == 0 ) | 51 | QNX4DEBUG((KERN_INFO "qnx4_readdir:%.*s\n", size, de->di_fname)); |
| 52 | ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; | 52 | if (!(de->di_status & QNX4_FILE_LINK)) |
| 53 | else { | 53 | ino = blknum * QNX4_INODES_PER_BLOCK + ix - 1; |
| 54 | le = (struct qnx4_link_info*)de; | 54 | else { |
| 55 | ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * | 55 | le = (struct qnx4_link_info*)de; |
| 56 | QNX4_INODES_PER_BLOCK + | 56 | ino = ( le32_to_cpu(le->dl_inode_blk) - 1 ) * |
| 57 | le->dl_inode_ndx; | 57 | QNX4_INODES_PER_BLOCK + |
| 58 | } | 58 | le->dl_inode_ndx; |
| 59 | if (filldir(dirent, de->di_fname, size, filp->f_pos, ino, DT_UNKNOWN) < 0) { | 59 | } |
| 60 | brelse(bh); | 60 | if (!dir_emit(ctx, de->di_fname, size, ino, DT_UNKNOWN)) { |
| 61 | goto out; | 61 | brelse(bh); |
| 62 | } | 62 | return 0; |
| 63 | } | ||
| 64 | } | 63 | } |
| 65 | ix++; | ||
| 66 | filp->f_pos += QNX4_DIR_ENTRY_SIZE; | ||
| 67 | } | 64 | } |
| 68 | brelse(bh); | 65 | brelse(bh); |
| 69 | } | 66 | } |
| 70 | out: | ||
| 71 | return 0; | 67 | return 0; |
| 72 | } | 68 | } |
| 73 | 69 | ||
| @@ -75,7 +71,7 @@ const struct file_operations qnx4_dir_operations = | |||
| 75 | { | 71 | { |
| 76 | .llseek = generic_file_llseek, | 72 | .llseek = generic_file_llseek, |
| 77 | .read = generic_read_dir, | 73 | .read = generic_read_dir, |
| 78 | .readdir = qnx4_readdir, | 74 | .iterate = qnx4_readdir, |
| 79 | .fsync = generic_file_fsync, | 75 | .fsync = generic_file_fsync, |
| 80 | }; | 76 | }; |
| 81 | 77 | ||
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c index afa6be6fc397..15b7d92ed60d 100644 --- a/fs/qnx6/dir.c +++ b/fs/qnx6/dir.c | |||
| @@ -65,8 +65,8 @@ static struct qnx6_long_filename *qnx6_longname(struct super_block *sb, | |||
| 65 | 65 | ||
| 66 | static int qnx6_dir_longfilename(struct inode *inode, | 66 | static int qnx6_dir_longfilename(struct inode *inode, |
| 67 | struct qnx6_long_dir_entry *de, | 67 | struct qnx6_long_dir_entry *de, |
| 68 | void *dirent, loff_t pos, | 68 | struct dir_context *ctx, |
| 69 | unsigned de_inode, filldir_t filldir) | 69 | unsigned de_inode) |
| 70 | { | 70 | { |
| 71 | struct qnx6_long_filename *lf; | 71 | struct qnx6_long_filename *lf; |
| 72 | struct super_block *s = inode->i_sb; | 72 | struct super_block *s = inode->i_sb; |
| @@ -104,8 +104,7 @@ static int qnx6_dir_longfilename(struct inode *inode, | |||
| 104 | 104 | ||
| 105 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", | 105 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s inode:%u\n", |
| 106 | lf_size, lf->lf_fname, de_inode)); | 106 | lf_size, lf->lf_fname, de_inode)); |
| 107 | if (filldir(dirent, lf->lf_fname, lf_size, pos, de_inode, | 107 | if (!dir_emit(ctx, lf->lf_fname, lf_size, de_inode, DT_UNKNOWN)) { |
| 108 | DT_UNKNOWN) < 0) { | ||
| 109 | qnx6_put_page(page); | 108 | qnx6_put_page(page); |
| 110 | return 0; | 109 | return 0; |
| 111 | } | 110 | } |
| @@ -115,18 +114,19 @@ static int qnx6_dir_longfilename(struct inode *inode, | |||
| 115 | return 1; | 114 | return 1; |
| 116 | } | 115 | } |
| 117 | 116 | ||
| 118 | static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | 117 | static int qnx6_readdir(struct file *file, struct dir_context *ctx) |
| 119 | { | 118 | { |
| 120 | struct inode *inode = file_inode(filp); | 119 | struct inode *inode = file_inode(file); |
| 121 | struct super_block *s = inode->i_sb; | 120 | struct super_block *s = inode->i_sb; |
| 122 | struct qnx6_sb_info *sbi = QNX6_SB(s); | 121 | struct qnx6_sb_info *sbi = QNX6_SB(s); |
| 123 | loff_t pos = filp->f_pos & ~(QNX6_DIR_ENTRY_SIZE - 1); | 122 | loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1); |
| 124 | unsigned long npages = dir_pages(inode); | 123 | unsigned long npages = dir_pages(inode); |
| 125 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 124 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
| 126 | unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; | 125 | unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE; |
| 127 | bool done = false; | 126 | bool done = false; |
| 128 | 127 | ||
| 129 | if (filp->f_pos >= inode->i_size) | 128 | ctx->pos = pos; |
| 129 | if (ctx->pos >= inode->i_size) | ||
| 130 | return 0; | 130 | return 0; |
| 131 | 131 | ||
| 132 | for ( ; !done && n < npages; n++, start = 0) { | 132 | for ( ; !done && n < npages; n++, start = 0) { |
| @@ -137,11 +137,11 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 137 | 137 | ||
| 138 | if (IS_ERR(page)) { | 138 | if (IS_ERR(page)) { |
| 139 | printk(KERN_ERR "qnx6_readdir: read failed\n"); | 139 | printk(KERN_ERR "qnx6_readdir: read failed\n"); |
| 140 | filp->f_pos = (n + 1) << PAGE_CACHE_SHIFT; | 140 | ctx->pos = (n + 1) << PAGE_CACHE_SHIFT; |
| 141 | return PTR_ERR(page); | 141 | return PTR_ERR(page); |
| 142 | } | 142 | } |
| 143 | de = ((struct qnx6_dir_entry *)page_address(page)) + start; | 143 | de = ((struct qnx6_dir_entry *)page_address(page)) + start; |
| 144 | for (; i < limit; i++, de++, pos += QNX6_DIR_ENTRY_SIZE) { | 144 | for (; i < limit; i++, de++, ctx->pos += QNX6_DIR_ENTRY_SIZE) { |
| 145 | int size = de->de_size; | 145 | int size = de->de_size; |
| 146 | u32 no_inode = fs32_to_cpu(sbi, de->de_inode); | 146 | u32 no_inode = fs32_to_cpu(sbi, de->de_inode); |
| 147 | 147 | ||
| @@ -154,8 +154,7 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 154 | structure / block */ | 154 | structure / block */ |
| 155 | if (!qnx6_dir_longfilename(inode, | 155 | if (!qnx6_dir_longfilename(inode, |
| 156 | (struct qnx6_long_dir_entry *)de, | 156 | (struct qnx6_long_dir_entry *)de, |
| 157 | dirent, pos, no_inode, | 157 | ctx, no_inode)) { |
| 158 | filldir)) { | ||
| 159 | done = true; | 158 | done = true; |
| 160 | break; | 159 | break; |
| 161 | } | 160 | } |
| @@ -163,9 +162,8 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 163 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" | 162 | QNX6DEBUG((KERN_INFO "qnx6_readdir:%.*s" |
| 164 | " inode:%u\n", size, de->de_fname, | 163 | " inode:%u\n", size, de->de_fname, |
| 165 | no_inode)); | 164 | no_inode)); |
| 166 | if (filldir(dirent, de->de_fname, size, | 165 | if (!dir_emit(ctx, de->de_fname, size, |
| 167 | pos, no_inode, DT_UNKNOWN) | 166 | no_inode, DT_UNKNOWN)) { |
| 168 | < 0) { | ||
| 169 | done = true; | 167 | done = true; |
| 170 | break; | 168 | break; |
| 171 | } | 169 | } |
| @@ -173,7 +171,6 @@ static int qnx6_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 173 | } | 171 | } |
| 174 | qnx6_put_page(page); | 172 | qnx6_put_page(page); |
| 175 | } | 173 | } |
| 176 | filp->f_pos = pos; | ||
| 177 | return 0; | 174 | return 0; |
| 178 | } | 175 | } |
| 179 | 176 | ||
| @@ -282,7 +279,7 @@ found: | |||
| 282 | const struct file_operations qnx6_dir_operations = { | 279 | const struct file_operations qnx6_dir_operations = { |
| 283 | .llseek = generic_file_llseek, | 280 | .llseek = generic_file_llseek, |
| 284 | .read = generic_read_dir, | 281 | .read = generic_read_dir, |
| 285 | .readdir = qnx6_readdir, | 282 | .iterate = qnx6_readdir, |
| 286 | .fsync = generic_file_fsync, | 283 | .fsync = generic_file_fsync, |
| 287 | }; | 284 | }; |
| 288 | 285 | ||
diff --git a/fs/read_write.c b/fs/read_write.c index 03430008704e..2cefa417be34 100644 --- a/fs/read_write.c +++ b/fs/read_write.c | |||
| @@ -1064,6 +1064,7 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1064 | struct fd in, out; | 1064 | struct fd in, out; |
| 1065 | struct inode *in_inode, *out_inode; | 1065 | struct inode *in_inode, *out_inode; |
| 1066 | loff_t pos; | 1066 | loff_t pos; |
| 1067 | loff_t out_pos; | ||
| 1067 | ssize_t retval; | 1068 | ssize_t retval; |
| 1068 | int fl; | 1069 | int fl; |
| 1069 | 1070 | ||
| @@ -1077,12 +1078,14 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1077 | if (!(in.file->f_mode & FMODE_READ)) | 1078 | if (!(in.file->f_mode & FMODE_READ)) |
| 1078 | goto fput_in; | 1079 | goto fput_in; |
| 1079 | retval = -ESPIPE; | 1080 | retval = -ESPIPE; |
| 1080 | if (!ppos) | 1081 | if (!ppos) { |
| 1081 | ppos = &in.file->f_pos; | 1082 | pos = in.file->f_pos; |
| 1082 | else | 1083 | } else { |
| 1084 | pos = *ppos; | ||
| 1083 | if (!(in.file->f_mode & FMODE_PREAD)) | 1085 | if (!(in.file->f_mode & FMODE_PREAD)) |
| 1084 | goto fput_in; | 1086 | goto fput_in; |
| 1085 | retval = rw_verify_area(READ, in.file, ppos, count); | 1087 | } |
| 1088 | retval = rw_verify_area(READ, in.file, &pos, count); | ||
| 1086 | if (retval < 0) | 1089 | if (retval < 0) |
| 1087 | goto fput_in; | 1090 | goto fput_in; |
| 1088 | count = retval; | 1091 | count = retval; |
| @@ -1099,7 +1102,8 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1099 | retval = -EINVAL; | 1102 | retval = -EINVAL; |
| 1100 | in_inode = file_inode(in.file); | 1103 | in_inode = file_inode(in.file); |
| 1101 | out_inode = file_inode(out.file); | 1104 | out_inode = file_inode(out.file); |
| 1102 | retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); | 1105 | out_pos = out.file->f_pos; |
| 1106 | retval = rw_verify_area(WRITE, out.file, &out_pos, count); | ||
| 1103 | if (retval < 0) | 1107 | if (retval < 0) |
| 1104 | goto fput_out; | 1108 | goto fput_out; |
| 1105 | count = retval; | 1109 | count = retval; |
| @@ -1107,7 +1111,6 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1107 | if (!max) | 1111 | if (!max) |
| 1108 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); | 1112 | max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); |
| 1109 | 1113 | ||
| 1110 | pos = *ppos; | ||
| 1111 | if (unlikely(pos + count > max)) { | 1114 | if (unlikely(pos + count > max)) { |
| 1112 | retval = -EOVERFLOW; | 1115 | retval = -EOVERFLOW; |
| 1113 | if (pos >= max) | 1116 | if (pos >= max) |
| @@ -1126,18 +1129,23 @@ static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, | |||
| 1126 | if (in.file->f_flags & O_NONBLOCK) | 1129 | if (in.file->f_flags & O_NONBLOCK) |
| 1127 | fl = SPLICE_F_NONBLOCK; | 1130 | fl = SPLICE_F_NONBLOCK; |
| 1128 | #endif | 1131 | #endif |
| 1129 | retval = do_splice_direct(in.file, ppos, out.file, count, fl); | 1132 | retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); |
| 1130 | 1133 | ||
| 1131 | if (retval > 0) { | 1134 | if (retval > 0) { |
| 1132 | add_rchar(current, retval); | 1135 | add_rchar(current, retval); |
| 1133 | add_wchar(current, retval); | 1136 | add_wchar(current, retval); |
| 1134 | fsnotify_access(in.file); | 1137 | fsnotify_access(in.file); |
| 1135 | fsnotify_modify(out.file); | 1138 | fsnotify_modify(out.file); |
| 1139 | out.file->f_pos = out_pos; | ||
| 1140 | if (ppos) | ||
| 1141 | *ppos = pos; | ||
| 1142 | else | ||
| 1143 | in.file->f_pos = pos; | ||
| 1136 | } | 1144 | } |
| 1137 | 1145 | ||
| 1138 | inc_syscr(current); | 1146 | inc_syscr(current); |
| 1139 | inc_syscw(current); | 1147 | inc_syscw(current); |
| 1140 | if (*ppos > max) | 1148 | if (pos > max) |
| 1141 | retval = -EOVERFLOW; | 1149 | retval = -EOVERFLOW; |
| 1142 | 1150 | ||
| 1143 | fput_out: | 1151 | fput_out: |
diff --git a/fs/readdir.c b/fs/readdir.c index fee38e04fae4..93d71e574310 100644 --- a/fs/readdir.c +++ b/fs/readdir.c | |||
| @@ -20,11 +20,11 @@ | |||
| 20 | 20 | ||
| 21 | #include <asm/uaccess.h> | 21 | #include <asm/uaccess.h> |
| 22 | 22 | ||
| 23 | int vfs_readdir(struct file *file, filldir_t filler, void *buf) | 23 | int iterate_dir(struct file *file, struct dir_context *ctx) |
| 24 | { | 24 | { |
| 25 | struct inode *inode = file_inode(file); | 25 | struct inode *inode = file_inode(file); |
| 26 | int res = -ENOTDIR; | 26 | int res = -ENOTDIR; |
| 27 | if (!file->f_op || !file->f_op->readdir) | 27 | if (!file->f_op || !file->f_op->iterate) |
| 28 | goto out; | 28 | goto out; |
| 29 | 29 | ||
| 30 | res = security_file_permission(file, MAY_READ); | 30 | res = security_file_permission(file, MAY_READ); |
| @@ -37,15 +37,16 @@ int vfs_readdir(struct file *file, filldir_t filler, void *buf) | |||
| 37 | 37 | ||
| 38 | res = -ENOENT; | 38 | res = -ENOENT; |
| 39 | if (!IS_DEADDIR(inode)) { | 39 | if (!IS_DEADDIR(inode)) { |
| 40 | res = file->f_op->readdir(file, buf, filler); | 40 | ctx->pos = file->f_pos; |
| 41 | res = file->f_op->iterate(file, ctx); | ||
| 42 | file->f_pos = ctx->pos; | ||
| 41 | file_accessed(file); | 43 | file_accessed(file); |
| 42 | } | 44 | } |
| 43 | mutex_unlock(&inode->i_mutex); | 45 | mutex_unlock(&inode->i_mutex); |
| 44 | out: | 46 | out: |
| 45 | return res; | 47 | return res; |
| 46 | } | 48 | } |
| 47 | 49 | EXPORT_SYMBOL(iterate_dir); | |
| 48 | EXPORT_SYMBOL(vfs_readdir); | ||
| 49 | 50 | ||
| 50 | /* | 51 | /* |
| 51 | * Traditional linux readdir() handling.. | 52 | * Traditional linux readdir() handling.. |
| @@ -66,6 +67,7 @@ struct old_linux_dirent { | |||
| 66 | }; | 67 | }; |
| 67 | 68 | ||
| 68 | struct readdir_callback { | 69 | struct readdir_callback { |
| 70 | struct dir_context ctx; | ||
| 69 | struct old_linux_dirent __user * dirent; | 71 | struct old_linux_dirent __user * dirent; |
| 70 | int result; | 72 | int result; |
| 71 | }; | 73 | }; |
| @@ -73,7 +75,7 @@ struct readdir_callback { | |||
| 73 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, | 75 | static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset, |
| 74 | u64 ino, unsigned int d_type) | 76 | u64 ino, unsigned int d_type) |
| 75 | { | 77 | { |
| 76 | struct readdir_callback * buf = (struct readdir_callback *) __buf; | 78 | struct readdir_callback *buf = (struct readdir_callback *) __buf; |
| 77 | struct old_linux_dirent __user * dirent; | 79 | struct old_linux_dirent __user * dirent; |
| 78 | unsigned long d_ino; | 80 | unsigned long d_ino; |
| 79 | 81 | ||
| @@ -107,15 +109,15 @@ SYSCALL_DEFINE3(old_readdir, unsigned int, fd, | |||
| 107 | { | 109 | { |
| 108 | int error; | 110 | int error; |
| 109 | struct fd f = fdget(fd); | 111 | struct fd f = fdget(fd); |
| 110 | struct readdir_callback buf; | 112 | struct readdir_callback buf = { |
| 113 | .ctx.actor = fillonedir, | ||
| 114 | .dirent = dirent | ||
| 115 | }; | ||
| 111 | 116 | ||
| 112 | if (!f.file) | 117 | if (!f.file) |
| 113 | return -EBADF; | 118 | return -EBADF; |
| 114 | 119 | ||
| 115 | buf.result = 0; | 120 | error = iterate_dir(f.file, &buf.ctx); |
| 116 | buf.dirent = dirent; | ||
| 117 | |||
| 118 | error = vfs_readdir(f.file, fillonedir, &buf); | ||
| 119 | if (buf.result) | 121 | if (buf.result) |
| 120 | error = buf.result; | 122 | error = buf.result; |
| 121 | 123 | ||
| @@ -137,6 +139,7 @@ struct linux_dirent { | |||
| 137 | }; | 139 | }; |
| 138 | 140 | ||
| 139 | struct getdents_callback { | 141 | struct getdents_callback { |
| 142 | struct dir_context ctx; | ||
| 140 | struct linux_dirent __user * current_dir; | 143 | struct linux_dirent __user * current_dir; |
| 141 | struct linux_dirent __user * previous; | 144 | struct linux_dirent __user * previous; |
| 142 | int count; | 145 | int count; |
| @@ -191,7 +194,11 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
| 191 | { | 194 | { |
| 192 | struct fd f; | 195 | struct fd f; |
| 193 | struct linux_dirent __user * lastdirent; | 196 | struct linux_dirent __user * lastdirent; |
| 194 | struct getdents_callback buf; | 197 | struct getdents_callback buf = { |
| 198 | .ctx.actor = filldir, | ||
| 199 | .count = count, | ||
| 200 | .current_dir = dirent | ||
| 201 | }; | ||
| 195 | int error; | 202 | int error; |
| 196 | 203 | ||
| 197 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 204 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
| @@ -201,17 +208,12 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
| 201 | if (!f.file) | 208 | if (!f.file) |
| 202 | return -EBADF; | 209 | return -EBADF; |
| 203 | 210 | ||
| 204 | buf.current_dir = dirent; | 211 | error = iterate_dir(f.file, &buf.ctx); |
| 205 | buf.previous = NULL; | ||
| 206 | buf.count = count; | ||
| 207 | buf.error = 0; | ||
| 208 | |||
| 209 | error = vfs_readdir(f.file, filldir, &buf); | ||
| 210 | if (error >= 0) | 212 | if (error >= 0) |
| 211 | error = buf.error; | 213 | error = buf.error; |
| 212 | lastdirent = buf.previous; | 214 | lastdirent = buf.previous; |
| 213 | if (lastdirent) { | 215 | if (lastdirent) { |
| 214 | if (put_user(f.file->f_pos, &lastdirent->d_off)) | 216 | if (put_user(buf.ctx.pos, &lastdirent->d_off)) |
| 215 | error = -EFAULT; | 217 | error = -EFAULT; |
| 216 | else | 218 | else |
| 217 | error = count - buf.count; | 219 | error = count - buf.count; |
| @@ -221,6 +223,7 @@ SYSCALL_DEFINE3(getdents, unsigned int, fd, | |||
| 221 | } | 223 | } |
| 222 | 224 | ||
| 223 | struct getdents_callback64 { | 225 | struct getdents_callback64 { |
| 226 | struct dir_context ctx; | ||
| 224 | struct linux_dirent64 __user * current_dir; | 227 | struct linux_dirent64 __user * current_dir; |
| 225 | struct linux_dirent64 __user * previous; | 228 | struct linux_dirent64 __user * previous; |
| 226 | int count; | 229 | int count; |
| @@ -271,7 +274,11 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
| 271 | { | 274 | { |
| 272 | struct fd f; | 275 | struct fd f; |
| 273 | struct linux_dirent64 __user * lastdirent; | 276 | struct linux_dirent64 __user * lastdirent; |
| 274 | struct getdents_callback64 buf; | 277 | struct getdents_callback64 buf = { |
| 278 | .ctx.actor = filldir64, | ||
| 279 | .count = count, | ||
| 280 | .current_dir = dirent | ||
| 281 | }; | ||
| 275 | int error; | 282 | int error; |
| 276 | 283 | ||
| 277 | if (!access_ok(VERIFY_WRITE, dirent, count)) | 284 | if (!access_ok(VERIFY_WRITE, dirent, count)) |
| @@ -281,17 +288,12 @@ SYSCALL_DEFINE3(getdents64, unsigned int, fd, | |||
| 281 | if (!f.file) | 288 | if (!f.file) |
| 282 | return -EBADF; | 289 | return -EBADF; |
| 283 | 290 | ||
| 284 | buf.current_dir = dirent; | 291 | error = iterate_dir(f.file, &buf.ctx); |
| 285 | buf.previous = NULL; | ||
| 286 | buf.count = count; | ||
| 287 | buf.error = 0; | ||
| 288 | |||
| 289 | error = vfs_readdir(f.file, filldir64, &buf); | ||
| 290 | if (error >= 0) | 292 | if (error >= 0) |
| 291 | error = buf.error; | 293 | error = buf.error; |
| 292 | lastdirent = buf.previous; | 294 | lastdirent = buf.previous; |
| 293 | if (lastdirent) { | 295 | if (lastdirent) { |
| 294 | typeof(lastdirent->d_off) d_off = f.file->f_pos; | 296 | typeof(lastdirent->d_off) d_off = buf.ctx.pos; |
| 295 | if (__put_user(d_off, &lastdirent->d_off)) | 297 | if (__put_user(d_off, &lastdirent->d_off)) |
| 296 | error = -EFAULT; | 298 | error = -EFAULT; |
| 297 | else | 299 | else |
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c index 6c2d136561cb..03e4ca5624d6 100644 --- a/fs/reiserfs/dir.c +++ b/fs/reiserfs/dir.c | |||
| @@ -13,14 +13,14 @@ | |||
| 13 | 13 | ||
| 14 | extern const struct reiserfs_key MIN_KEY; | 14 | extern const struct reiserfs_key MIN_KEY; |
| 15 | 15 | ||
| 16 | static int reiserfs_readdir(struct file *, void *, filldir_t); | 16 | static int reiserfs_readdir(struct file *, struct dir_context *); |
| 17 | static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | 17 | static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, |
| 18 | int datasync); | 18 | int datasync); |
| 19 | 19 | ||
| 20 | const struct file_operations reiserfs_dir_operations = { | 20 | const struct file_operations reiserfs_dir_operations = { |
| 21 | .llseek = generic_file_llseek, | 21 | .llseek = generic_file_llseek, |
| 22 | .read = generic_read_dir, | 22 | .read = generic_read_dir, |
| 23 | .readdir = reiserfs_readdir, | 23 | .iterate = reiserfs_readdir, |
| 24 | .fsync = reiserfs_dir_fsync, | 24 | .fsync = reiserfs_dir_fsync, |
| 25 | .unlocked_ioctl = reiserfs_ioctl, | 25 | .unlocked_ioctl = reiserfs_ioctl, |
| 26 | #ifdef CONFIG_COMPAT | 26 | #ifdef CONFIG_COMPAT |
| @@ -50,18 +50,15 @@ static int reiserfs_dir_fsync(struct file *filp, loff_t start, loff_t end, | |||
| 50 | 50 | ||
| 51 | #define store_ih(where,what) copy_item_head (where, what) | 51 | #define store_ih(where,what) copy_item_head (where, what) |
| 52 | 52 | ||
| 53 | static inline bool is_privroot_deh(struct dentry *dir, | 53 | static inline bool is_privroot_deh(struct inode *dir, struct reiserfs_de_head *deh) |
| 54 | struct reiserfs_de_head *deh) | ||
| 55 | { | 54 | { |
| 56 | struct dentry *privroot = REISERFS_SB(dir->d_sb)->priv_root; | 55 | struct dentry *privroot = REISERFS_SB(dir->i_sb)->priv_root; |
| 57 | return (dir == dir->d_parent && privroot->d_inode && | 56 | return (privroot->d_inode && |
| 58 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); | 57 | deh->deh_objectid == INODE_PKEY(privroot->d_inode)->k_objectid); |
| 59 | } | 58 | } |
| 60 | 59 | ||
| 61 | int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | 60 | int reiserfs_readdir_inode(struct inode *inode, struct dir_context *ctx) |
| 62 | filldir_t filldir, loff_t *pos) | ||
| 63 | { | 61 | { |
| 64 | struct inode *inode = dentry->d_inode; | ||
| 65 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ | 62 | struct cpu_key pos_key; /* key of current position in the directory (key of directory entry) */ |
| 66 | INITIALIZE_PATH(path_to_entry); | 63 | INITIALIZE_PATH(path_to_entry); |
| 67 | struct buffer_head *bh; | 64 | struct buffer_head *bh; |
| @@ -81,7 +78,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
| 81 | 78 | ||
| 82 | /* form key for search the next directory entry using f_pos field of | 79 | /* form key for search the next directory entry using f_pos field of |
| 83 | file structure */ | 80 | file structure */ |
| 84 | make_cpu_key(&pos_key, inode, *pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); | 81 | make_cpu_key(&pos_key, inode, ctx->pos ?: DOT_OFFSET, TYPE_DIRENTRY, 3); |
| 85 | next_pos = cpu_key_k_offset(&pos_key); | 82 | next_pos = cpu_key_k_offset(&pos_key); |
| 86 | 83 | ||
| 87 | path_to_entry.reada = PATH_READA; | 84 | path_to_entry.reada = PATH_READA; |
| @@ -126,7 +123,6 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
| 126 | entry_num++, deh++) { | 123 | entry_num++, deh++) { |
| 127 | int d_reclen; | 124 | int d_reclen; |
| 128 | char *d_name; | 125 | char *d_name; |
| 129 | off_t d_off; | ||
| 130 | ino_t d_ino; | 126 | ino_t d_ino; |
| 131 | 127 | ||
| 132 | if (!de_visible(deh)) | 128 | if (!de_visible(deh)) |
| @@ -155,11 +151,10 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
| 155 | } | 151 | } |
| 156 | 152 | ||
| 157 | /* Ignore the .reiserfs_priv entry */ | 153 | /* Ignore the .reiserfs_priv entry */ |
| 158 | if (is_privroot_deh(dentry, deh)) | 154 | if (is_privroot_deh(inode, deh)) |
| 159 | continue; | 155 | continue; |
| 160 | 156 | ||
| 161 | d_off = deh_offset(deh); | 157 | ctx->pos = deh_offset(deh); |
| 162 | *pos = d_off; | ||
| 163 | d_ino = deh_objectid(deh); | 158 | d_ino = deh_objectid(deh); |
| 164 | if (d_reclen <= 32) { | 159 | if (d_reclen <= 32) { |
| 165 | local_buf = small_buf; | 160 | local_buf = small_buf; |
| @@ -187,9 +182,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
| 187 | * the write lock here for other waiters | 182 | * the write lock here for other waiters |
| 188 | */ | 183 | */ |
| 189 | reiserfs_write_unlock(inode->i_sb); | 184 | reiserfs_write_unlock(inode->i_sb); |
| 190 | if (filldir | 185 | if (!dir_emit |
| 191 | (dirent, local_buf, d_reclen, d_off, d_ino, | 186 | (ctx, local_buf, d_reclen, d_ino, |
| 192 | DT_UNKNOWN) < 0) { | 187 | DT_UNKNOWN)) { |
| 193 | reiserfs_write_lock(inode->i_sb); | 188 | reiserfs_write_lock(inode->i_sb); |
| 194 | if (local_buf != small_buf) { | 189 | if (local_buf != small_buf) { |
| 195 | kfree(local_buf); | 190 | kfree(local_buf); |
| @@ -237,7 +232,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent, | |||
| 237 | } /* while */ | 232 | } /* while */ |
| 238 | 233 | ||
| 239 | end: | 234 | end: |
| 240 | *pos = next_pos; | 235 | ctx->pos = next_pos; |
| 241 | pathrelse(&path_to_entry); | 236 | pathrelse(&path_to_entry); |
| 242 | reiserfs_check_path(&path_to_entry); | 237 | reiserfs_check_path(&path_to_entry); |
| 243 | out: | 238 | out: |
| @@ -245,10 +240,9 @@ out: | |||
| 245 | return ret; | 240 | return ret; |
| 246 | } | 241 | } |
| 247 | 242 | ||
| 248 | static int reiserfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 243 | static int reiserfs_readdir(struct file *file, struct dir_context *ctx) |
| 249 | { | 244 | { |
| 250 | struct dentry *dentry = file->f_path.dentry; | 245 | return reiserfs_readdir_inode(file_inode(file), ctx); |
| 251 | return reiserfs_readdir_dentry(dentry, dirent, filldir, &file->f_pos); | ||
| 252 | } | 246 | } |
| 253 | 247 | ||
| 254 | /* compose directory item containing "." and ".." entries (entries are | 248 | /* compose directory item containing "." and ".." entries (entries are |
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index f844533792ee..0048cc16a6a8 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c | |||
| @@ -2975,16 +2975,19 @@ static int invalidatepage_can_drop(struct inode *inode, struct buffer_head *bh) | |||
| 2975 | } | 2975 | } |
| 2976 | 2976 | ||
| 2977 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ | 2977 | /* clm -- taken from fs/buffer.c:block_invalidate_page */ |
| 2978 | static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | 2978 | static void reiserfs_invalidatepage(struct page *page, unsigned int offset, |
| 2979 | unsigned int length) | ||
| 2979 | { | 2980 | { |
| 2980 | struct buffer_head *head, *bh, *next; | 2981 | struct buffer_head *head, *bh, *next; |
| 2981 | struct inode *inode = page->mapping->host; | 2982 | struct inode *inode = page->mapping->host; |
| 2982 | unsigned int curr_off = 0; | 2983 | unsigned int curr_off = 0; |
| 2984 | unsigned int stop = offset + length; | ||
| 2985 | int partial_page = (offset || length < PAGE_CACHE_SIZE); | ||
| 2983 | int ret = 1; | 2986 | int ret = 1; |
| 2984 | 2987 | ||
| 2985 | BUG_ON(!PageLocked(page)); | 2988 | BUG_ON(!PageLocked(page)); |
| 2986 | 2989 | ||
| 2987 | if (offset == 0) | 2990 | if (!partial_page) |
| 2988 | ClearPageChecked(page); | 2991 | ClearPageChecked(page); |
| 2989 | 2992 | ||
| 2990 | if (!page_has_buffers(page)) | 2993 | if (!page_has_buffers(page)) |
| @@ -2996,6 +2999,9 @@ static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 2996 | unsigned int next_off = curr_off + bh->b_size; | 2999 | unsigned int next_off = curr_off + bh->b_size; |
| 2997 | next = bh->b_this_page; | 3000 | next = bh->b_this_page; |
| 2998 | 3001 | ||
| 3002 | if (next_off > stop) | ||
| 3003 | goto out; | ||
| 3004 | |||
| 2999 | /* | 3005 | /* |
| 3000 | * is this block fully invalidated? | 3006 | * is this block fully invalidated? |
| 3001 | */ | 3007 | */ |
| @@ -3014,7 +3020,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned long offset) | |||
| 3014 | * The get_block cached value has been unconditionally invalidated, | 3020 | * The get_block cached value has been unconditionally invalidated, |
| 3015 | * so real IO is not possible anymore. | 3021 | * so real IO is not possible anymore. |
| 3016 | */ | 3022 | */ |
| 3017 | if (!offset && ret) { | 3023 | if (!partial_page && ret) { |
| 3018 | ret = try_to_release_page(page, 0); | 3024 | ret = try_to_release_page(page, 0); |
| 3019 | /* maybe should BUG_ON(!ret); - neilb */ | 3025 | /* maybe should BUG_ON(!ret); - neilb */ |
| 3020 | } | 3026 | } |
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 157e474ab303..3df5ce6c724d 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h | |||
| @@ -2709,7 +2709,7 @@ extern const struct inode_operations reiserfs_dir_inode_operations; | |||
| 2709 | extern const struct inode_operations reiserfs_symlink_inode_operations; | 2709 | extern const struct inode_operations reiserfs_symlink_inode_operations; |
| 2710 | extern const struct inode_operations reiserfs_special_inode_operations; | 2710 | extern const struct inode_operations reiserfs_special_inode_operations; |
| 2711 | extern const struct file_operations reiserfs_dir_operations; | 2711 | extern const struct file_operations reiserfs_dir_operations; |
| 2712 | int reiserfs_readdir_dentry(struct dentry *, void *, filldir_t, loff_t *); | 2712 | int reiserfs_readdir_inode(struct inode *, struct dir_context *); |
| 2713 | 2713 | ||
| 2714 | /* tail_conversion.c */ | 2714 | /* tail_conversion.c */ |
| 2715 | int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, | 2715 | int direct2indirect(struct reiserfs_transaction_handle *, struct inode *, |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 821bcf70e467..c69cdd749f09 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
| @@ -171,6 +171,7 @@ static struct dentry *open_xa_dir(const struct inode *inode, int flags) | |||
| 171 | * modifying extended attributes. This includes operations such as permissions | 171 | * modifying extended attributes. This includes operations such as permissions |
| 172 | * or ownership changes, object deletions, etc. */ | 172 | * or ownership changes, object deletions, etc. */ |
| 173 | struct reiserfs_dentry_buf { | 173 | struct reiserfs_dentry_buf { |
| 174 | struct dir_context ctx; | ||
| 174 | struct dentry *xadir; | 175 | struct dentry *xadir; |
| 175 | int count; | 176 | int count; |
| 176 | struct dentry *dentries[8]; | 177 | struct dentry *dentries[8]; |
| @@ -223,9 +224,8 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
| 223 | { | 224 | { |
| 224 | struct dentry *dir; | 225 | struct dentry *dir; |
| 225 | int i, err = 0; | 226 | int i, err = 0; |
| 226 | loff_t pos = 0; | ||
| 227 | struct reiserfs_dentry_buf buf = { | 227 | struct reiserfs_dentry_buf buf = { |
| 228 | .count = 0, | 228 | .ctx.actor = fill_with_dentries, |
| 229 | }; | 229 | }; |
| 230 | 230 | ||
| 231 | /* Skip out, an xattr has no xattrs associated with it */ | 231 | /* Skip out, an xattr has no xattrs associated with it */ |
| @@ -249,29 +249,27 @@ static int reiserfs_for_each_xattr(struct inode *inode, | |||
| 249 | reiserfs_write_lock(inode->i_sb); | 249 | reiserfs_write_lock(inode->i_sb); |
| 250 | 250 | ||
| 251 | buf.xadir = dir; | 251 | buf.xadir = dir; |
| 252 | err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos); | 252 | while (1) { |
| 253 | while ((err == 0 || err == -ENOSPC) && buf.count) { | 253 | err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx); |
| 254 | err = 0; | 254 | if (err) |
| 255 | 255 | break; | |
| 256 | for (i = 0; i < buf.count && buf.dentries[i]; i++) { | 256 | if (!buf.count) |
| 257 | int lerr = 0; | 257 | break; |
| 258 | for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) { | ||
| 258 | struct dentry *dentry = buf.dentries[i]; | 259 | struct dentry *dentry = buf.dentries[i]; |
| 259 | 260 | ||
| 260 | if (err == 0 && !S_ISDIR(dentry->d_inode->i_mode)) | 261 | if (!S_ISDIR(dentry->d_inode->i_mode)) |
| 261 | lerr = action(dentry, data); | 262 | err = action(dentry, data); |
| 262 | 263 | ||
| 263 | dput(dentry); | 264 | dput(dentry); |
| 264 | buf.dentries[i] = NULL; | 265 | buf.dentries[i] = NULL; |
| 265 | err = lerr ?: err; | ||
| 266 | } | 266 | } |
| 267 | if (err) | ||
| 268 | break; | ||
| 267 | buf.count = 0; | 269 | buf.count = 0; |
| 268 | if (!err) | ||
| 269 | err = reiserfs_readdir_dentry(dir, &buf, | ||
| 270 | fill_with_dentries, &pos); | ||
| 271 | } | 270 | } |
| 272 | mutex_unlock(&dir->d_inode->i_mutex); | 271 | mutex_unlock(&dir->d_inode->i_mutex); |
| 273 | 272 | ||
| 274 | /* Clean up after a failed readdir */ | ||
| 275 | cleanup_dentry_buf(&buf); | 273 | cleanup_dentry_buf(&buf); |
| 276 | 274 | ||
| 277 | if (!err) { | 275 | if (!err) { |
| @@ -800,6 +798,7 @@ int reiserfs_removexattr(struct dentry *dentry, const char *name) | |||
| 800 | } | 798 | } |
| 801 | 799 | ||
| 802 | struct listxattr_buf { | 800 | struct listxattr_buf { |
| 801 | struct dir_context ctx; | ||
| 803 | size_t size; | 802 | size_t size; |
| 804 | size_t pos; | 803 | size_t pos; |
| 805 | char *buf; | 804 | char *buf; |
| @@ -845,8 +844,8 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) | |||
| 845 | { | 844 | { |
| 846 | struct dentry *dir; | 845 | struct dentry *dir; |
| 847 | int err = 0; | 846 | int err = 0; |
| 848 | loff_t pos = 0; | ||
| 849 | struct listxattr_buf buf = { | 847 | struct listxattr_buf buf = { |
| 848 | .ctx.actor = listxattr_filler, | ||
| 850 | .dentry = dentry, | 849 | .dentry = dentry, |
| 851 | .buf = buffer, | 850 | .buf = buffer, |
| 852 | .size = buffer ? size : 0, | 851 | .size = buffer ? size : 0, |
| @@ -868,7 +867,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size) | |||
| 868 | } | 867 | } |
| 869 | 868 | ||
| 870 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); | 869 | mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR); |
| 871 | err = reiserfs_readdir_dentry(dir, &buf, listxattr_filler, &pos); | 870 | err = reiserfs_readdir_inode(dir->d_inode, &buf.ctx); |
| 872 | mutex_unlock(&dir->d_inode->i_mutex); | 871 | mutex_unlock(&dir->d_inode->i_mutex); |
| 873 | 872 | ||
| 874 | if (!err) | 873 | if (!err) |
diff --git a/fs/romfs/super.c b/fs/romfs/super.c index 15cbc41ee365..ff1d3d42e72a 100644 --- a/fs/romfs/super.c +++ b/fs/romfs/super.c | |||
| @@ -145,19 +145,18 @@ static const struct address_space_operations romfs_aops = { | |||
| 145 | /* | 145 | /* |
| 146 | * read the entries from a directory | 146 | * read the entries from a directory |
| 147 | */ | 147 | */ |
| 148 | static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 148 | static int romfs_readdir(struct file *file, struct dir_context *ctx) |
| 149 | { | 149 | { |
| 150 | struct inode *i = file_inode(filp); | 150 | struct inode *i = file_inode(file); |
| 151 | struct romfs_inode ri; | 151 | struct romfs_inode ri; |
| 152 | unsigned long offset, maxoff; | 152 | unsigned long offset, maxoff; |
| 153 | int j, ino, nextfh; | 153 | int j, ino, nextfh; |
| 154 | int stored = 0; | ||
| 155 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ | 154 | char fsname[ROMFS_MAXFN]; /* XXX dynamic? */ |
| 156 | int ret; | 155 | int ret; |
| 157 | 156 | ||
| 158 | maxoff = romfs_maxsize(i->i_sb); | 157 | maxoff = romfs_maxsize(i->i_sb); |
| 159 | 158 | ||
| 160 | offset = filp->f_pos; | 159 | offset = ctx->pos; |
| 161 | if (!offset) { | 160 | if (!offset) { |
| 162 | offset = i->i_ino & ROMFH_MASK; | 161 | offset = i->i_ino & ROMFH_MASK; |
| 163 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | 162 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); |
| @@ -170,10 +169,10 @@ static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 170 | for (;;) { | 169 | for (;;) { |
| 171 | if (!offset || offset >= maxoff) { | 170 | if (!offset || offset >= maxoff) { |
| 172 | offset = maxoff; | 171 | offset = maxoff; |
| 173 | filp->f_pos = offset; | 172 | ctx->pos = offset; |
| 174 | goto out; | 173 | goto out; |
| 175 | } | 174 | } |
| 176 | filp->f_pos = offset; | 175 | ctx->pos = offset; |
| 177 | 176 | ||
| 178 | /* Fetch inode info */ | 177 | /* Fetch inode info */ |
| 179 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); | 178 | ret = romfs_dev_read(i->i_sb, offset, &ri, ROMFH_SIZE); |
| @@ -194,16 +193,14 @@ static int romfs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 194 | nextfh = be32_to_cpu(ri.next); | 193 | nextfh = be32_to_cpu(ri.next); |
| 195 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) | 194 | if ((nextfh & ROMFH_TYPE) == ROMFH_HRD) |
| 196 | ino = be32_to_cpu(ri.spec); | 195 | ino = be32_to_cpu(ri.spec); |
| 197 | if (filldir(dirent, fsname, j, offset, ino, | 196 | if (!dir_emit(ctx, fsname, j, ino, |
| 198 | romfs_dtype_table[nextfh & ROMFH_TYPE]) < 0) | 197 | romfs_dtype_table[nextfh & ROMFH_TYPE])) |
| 199 | goto out; | 198 | goto out; |
| 200 | 199 | ||
| 201 | stored++; | ||
| 202 | offset = nextfh & ROMFH_MASK; | 200 | offset = nextfh & ROMFH_MASK; |
| 203 | } | 201 | } |
| 204 | |||
| 205 | out: | 202 | out: |
| 206 | return stored; | 203 | return 0; |
| 207 | } | 204 | } |
| 208 | 205 | ||
| 209 | /* | 206 | /* |
| @@ -281,7 +278,7 @@ error: | |||
| 281 | 278 | ||
| 282 | static const struct file_operations romfs_dir_operations = { | 279 | static const struct file_operations romfs_dir_operations = { |
| 283 | .read = generic_read_dir, | 280 | .read = generic_read_dir, |
| 284 | .readdir = romfs_readdir, | 281 | .iterate = romfs_readdir, |
| 285 | .llseek = default_llseek, | 282 | .llseek = default_llseek, |
| 286 | }; | 283 | }; |
| 287 | 284 | ||
diff --git a/fs/splice.c b/fs/splice.c index e6b25598c8c4..d37431dd60a1 100644 --- a/fs/splice.c +++ b/fs/splice.c | |||
| @@ -1274,7 +1274,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
| 1274 | { | 1274 | { |
| 1275 | struct file *file = sd->u.file; | 1275 | struct file *file = sd->u.file; |
| 1276 | 1276 | ||
| 1277 | return do_splice_from(pipe, file, &file->f_pos, sd->total_len, | 1277 | return do_splice_from(pipe, file, sd->opos, sd->total_len, |
| 1278 | sd->flags); | 1278 | sd->flags); |
| 1279 | } | 1279 | } |
| 1280 | 1280 | ||
| @@ -1283,6 +1283,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
| 1283 | * @in: file to splice from | 1283 | * @in: file to splice from |
| 1284 | * @ppos: input file offset | 1284 | * @ppos: input file offset |
| 1285 | * @out: file to splice to | 1285 | * @out: file to splice to |
| 1286 | * @opos: output file offset | ||
| 1286 | * @len: number of bytes to splice | 1287 | * @len: number of bytes to splice |
| 1287 | * @flags: splice modifier flags | 1288 | * @flags: splice modifier flags |
| 1288 | * | 1289 | * |
| @@ -1294,7 +1295,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, | |||
| 1294 | * | 1295 | * |
| 1295 | */ | 1296 | */ |
| 1296 | long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | 1297 | long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, |
| 1297 | size_t len, unsigned int flags) | 1298 | loff_t *opos, size_t len, unsigned int flags) |
| 1298 | { | 1299 | { |
| 1299 | struct splice_desc sd = { | 1300 | struct splice_desc sd = { |
| 1300 | .len = len, | 1301 | .len = len, |
| @@ -1302,6 +1303,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, | |||
| 1302 | .flags = flags, | 1303 | .flags = flags, |
| 1303 | .pos = *ppos, | 1304 | .pos = *ppos, |
| 1304 | .u.file = out, | 1305 | .u.file = out, |
| 1306 | .opos = opos, | ||
| 1305 | }; | 1307 | }; |
| 1306 | long ret; | 1308 | long ret; |
| 1307 | 1309 | ||
| @@ -1325,7 +1327,7 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1325 | { | 1327 | { |
| 1326 | struct pipe_inode_info *ipipe; | 1328 | struct pipe_inode_info *ipipe; |
| 1327 | struct pipe_inode_info *opipe; | 1329 | struct pipe_inode_info *opipe; |
| 1328 | loff_t offset, *off; | 1330 | loff_t offset; |
| 1329 | long ret; | 1331 | long ret; |
| 1330 | 1332 | ||
| 1331 | ipipe = get_pipe_info(in); | 1333 | ipipe = get_pipe_info(in); |
| @@ -1356,13 +1358,15 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1356 | return -EINVAL; | 1358 | return -EINVAL; |
| 1357 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) | 1359 | if (copy_from_user(&offset, off_out, sizeof(loff_t))) |
| 1358 | return -EFAULT; | 1360 | return -EFAULT; |
| 1359 | off = &offset; | 1361 | } else { |
| 1360 | } else | 1362 | offset = out->f_pos; |
| 1361 | off = &out->f_pos; | 1363 | } |
| 1362 | 1364 | ||
| 1363 | ret = do_splice_from(ipipe, out, off, len, flags); | 1365 | ret = do_splice_from(ipipe, out, &offset, len, flags); |
| 1364 | 1366 | ||
| 1365 | if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) | 1367 | if (!off_out) |
| 1368 | out->f_pos = offset; | ||
| 1369 | else if (copy_to_user(off_out, &offset, sizeof(loff_t))) | ||
| 1366 | ret = -EFAULT; | 1370 | ret = -EFAULT; |
| 1367 | 1371 | ||
| 1368 | return ret; | 1372 | return ret; |
| @@ -1376,13 +1380,15 @@ static long do_splice(struct file *in, loff_t __user *off_in, | |||
| 1376 | return -EINVAL; | 1380 | return -EINVAL; |
| 1377 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) | 1381 | if (copy_from_user(&offset, off_in, sizeof(loff_t))) |
| 1378 | return -EFAULT; | 1382 | return -EFAULT; |
| 1379 | off = &offset; | 1383 | } else { |
| 1380 | } else | 1384 | offset = in->f_pos; |
| 1381 | off = &in->f_pos; | 1385 | } |
| 1382 | 1386 | ||
| 1383 | ret = do_splice_to(in, off, opipe, len, flags); | 1387 | ret = do_splice_to(in, &offset, opipe, len, flags); |
| 1384 | 1388 | ||
| 1385 | if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) | 1389 | if (!off_in) |
| 1390 | in->f_pos = offset; | ||
| 1391 | else if (copy_to_user(off_in, &offset, sizeof(loff_t))) | ||
| 1386 | ret = -EFAULT; | 1392 | ret = -EFAULT; |
| 1387 | 1393 | ||
| 1388 | return ret; | 1394 | return ret; |
diff --git a/fs/squashfs/dir.c b/fs/squashfs/dir.c index 57dc70ebbb19..f7f527bf8c10 100644 --- a/fs/squashfs/dir.c +++ b/fs/squashfs/dir.c | |||
| @@ -100,7 +100,7 @@ static int get_dir_index_using_offset(struct super_block *sb, | |||
| 100 | } | 100 | } |
| 101 | 101 | ||
| 102 | 102 | ||
| 103 | static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | 103 | static int squashfs_readdir(struct file *file, struct dir_context *ctx) |
| 104 | { | 104 | { |
| 105 | struct inode *inode = file_inode(file); | 105 | struct inode *inode = file_inode(file); |
| 106 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; | 106 | struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info; |
| @@ -127,11 +127,11 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 127 | * It also means that the external f_pos is offset by 3 from the | 127 | * It also means that the external f_pos is offset by 3 from the |
| 128 | * on-disk directory f_pos. | 128 | * on-disk directory f_pos. |
| 129 | */ | 129 | */ |
| 130 | while (file->f_pos < 3) { | 130 | while (ctx->pos < 3) { |
| 131 | char *name; | 131 | char *name; |
| 132 | int i_ino; | 132 | int i_ino; |
| 133 | 133 | ||
| 134 | if (file->f_pos == 0) { | 134 | if (ctx->pos == 0) { |
| 135 | name = "."; | 135 | name = "."; |
| 136 | size = 1; | 136 | size = 1; |
| 137 | i_ino = inode->i_ino; | 137 | i_ino = inode->i_ino; |
| @@ -141,24 +141,18 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 141 | i_ino = squashfs_i(inode)->parent; | 141 | i_ino = squashfs_i(inode)->parent; |
| 142 | } | 142 | } |
| 143 | 143 | ||
| 144 | TRACE("Calling filldir(%p, %s, %d, %lld, %d, %d)\n", | 144 | if (!dir_emit(ctx, name, size, i_ino, |
| 145 | dirent, name, size, file->f_pos, i_ino, | 145 | squashfs_filetype_table[1])) |
| 146 | squashfs_filetype_table[1]); | ||
| 147 | |||
| 148 | if (filldir(dirent, name, size, file->f_pos, i_ino, | ||
| 149 | squashfs_filetype_table[1]) < 0) { | ||
| 150 | TRACE("Filldir returned less than 0\n"); | ||
| 151 | goto finish; | 146 | goto finish; |
| 152 | } | ||
| 153 | 147 | ||
| 154 | file->f_pos += size; | 148 | ctx->pos += size; |
| 155 | } | 149 | } |
| 156 | 150 | ||
| 157 | length = get_dir_index_using_offset(inode->i_sb, &block, &offset, | 151 | length = get_dir_index_using_offset(inode->i_sb, &block, &offset, |
| 158 | squashfs_i(inode)->dir_idx_start, | 152 | squashfs_i(inode)->dir_idx_start, |
| 159 | squashfs_i(inode)->dir_idx_offset, | 153 | squashfs_i(inode)->dir_idx_offset, |
| 160 | squashfs_i(inode)->dir_idx_cnt, | 154 | squashfs_i(inode)->dir_idx_cnt, |
| 161 | file->f_pos); | 155 | ctx->pos); |
| 162 | 156 | ||
| 163 | while (length < i_size_read(inode)) { | 157 | while (length < i_size_read(inode)) { |
| 164 | /* | 158 | /* |
| @@ -198,7 +192,7 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 198 | 192 | ||
| 199 | length += sizeof(*dire) + size; | 193 | length += sizeof(*dire) + size; |
| 200 | 194 | ||
| 201 | if (file->f_pos >= length) | 195 | if (ctx->pos >= length) |
| 202 | continue; | 196 | continue; |
| 203 | 197 | ||
| 204 | dire->name[size] = '\0'; | 198 | dire->name[size] = '\0'; |
| @@ -206,22 +200,12 @@ static int squashfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 206 | ((short) le16_to_cpu(dire->inode_number)); | 200 | ((short) le16_to_cpu(dire->inode_number)); |
| 207 | type = le16_to_cpu(dire->type); | 201 | type = le16_to_cpu(dire->type); |
| 208 | 202 | ||
| 209 | TRACE("Calling filldir(%p, %s, %d, %lld, %x:%x, %d, %d)" | 203 | if (!dir_emit(ctx, dire->name, size, |
| 210 | "\n", dirent, dire->name, size, | ||
| 211 | file->f_pos, | ||
| 212 | le32_to_cpu(dirh.start_block), | ||
| 213 | le16_to_cpu(dire->offset), | ||
| 214 | inode_number, | ||
| 215 | squashfs_filetype_table[type]); | ||
| 216 | |||
| 217 | if (filldir(dirent, dire->name, size, file->f_pos, | ||
| 218 | inode_number, | 204 | inode_number, |
| 219 | squashfs_filetype_table[type]) < 0) { | 205 | squashfs_filetype_table[type])) |
| 220 | TRACE("Filldir returned less than 0\n"); | ||
| 221 | goto finish; | 206 | goto finish; |
| 222 | } | ||
| 223 | 207 | ||
| 224 | file->f_pos = length; | 208 | ctx->pos = length; |
| 225 | } | 209 | } |
| 226 | } | 210 | } |
| 227 | 211 | ||
| @@ -238,6 +222,6 @@ failed_read: | |||
| 238 | 222 | ||
| 239 | const struct file_operations squashfs_dir_ops = { | 223 | const struct file_operations squashfs_dir_ops = { |
| 240 | .read = generic_read_dir, | 224 | .read = generic_read_dir, |
| 241 | .readdir = squashfs_readdir, | 225 | .iterate = squashfs_readdir, |
| 242 | .llseek = default_llseek, | 226 | .llseek = default_llseek, |
| 243 | }; | 227 | }; |
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e8e0e71b29d5..e068e744dbdd 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
| @@ -74,7 +74,7 @@ static int sysfs_sd_compare(const struct sysfs_dirent *left, | |||
| 74 | } | 74 | } |
| 75 | 75 | ||
| 76 | /** | 76 | /** |
| 77 | * sysfs_link_subling - link sysfs_dirent into sibling rbtree | 77 | * sysfs_link_sibling - link sysfs_dirent into sibling rbtree |
| 78 | * @sd: sysfs_dirent of interest | 78 | * @sd: sysfs_dirent of interest |
| 79 | * | 79 | * |
| 80 | * Link @sd into its sibling rbtree which starts from | 80 | * Link @sd into its sibling rbtree which starts from |
| @@ -998,68 +998,38 @@ static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns, | |||
| 998 | return pos; | 998 | return pos; |
| 999 | } | 999 | } |
| 1000 | 1000 | ||
| 1001 | static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir) | 1001 | static int sysfs_readdir(struct file *file, struct dir_context *ctx) |
| 1002 | { | 1002 | { |
| 1003 | struct dentry *dentry = filp->f_path.dentry; | 1003 | struct dentry *dentry = file->f_path.dentry; |
| 1004 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; | 1004 | struct sysfs_dirent * parent_sd = dentry->d_fsdata; |
| 1005 | struct sysfs_dirent *pos = filp->private_data; | 1005 | struct sysfs_dirent *pos = file->private_data; |
| 1006 | enum kobj_ns_type type; | 1006 | enum kobj_ns_type type; |
| 1007 | const void *ns; | 1007 | const void *ns; |
| 1008 | ino_t ino; | ||
| 1009 | loff_t off; | ||
| 1010 | 1008 | ||
| 1011 | type = sysfs_ns_type(parent_sd); | 1009 | type = sysfs_ns_type(parent_sd); |
| 1012 | ns = sysfs_info(dentry->d_sb)->ns[type]; | 1010 | ns = sysfs_info(dentry->d_sb)->ns[type]; |
| 1013 | 1011 | ||
| 1014 | if (filp->f_pos == 0) { | 1012 | if (!dir_emit_dots(file, ctx)) |
| 1015 | ino = parent_sd->s_ino; | 1013 | return 0; |
| 1016 | if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0) | ||
| 1017 | filp->f_pos++; | ||
| 1018 | else | ||
| 1019 | return 0; | ||
| 1020 | } | ||
| 1021 | if (filp->f_pos == 1) { | ||
| 1022 | if (parent_sd->s_parent) | ||
| 1023 | ino = parent_sd->s_parent->s_ino; | ||
| 1024 | else | ||
| 1025 | ino = parent_sd->s_ino; | ||
| 1026 | if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) == 0) | ||
| 1027 | filp->f_pos++; | ||
| 1028 | else | ||
| 1029 | return 0; | ||
| 1030 | } | ||
| 1031 | mutex_lock(&sysfs_mutex); | 1014 | mutex_lock(&sysfs_mutex); |
| 1032 | off = filp->f_pos; | 1015 | for (pos = sysfs_dir_pos(ns, parent_sd, ctx->pos, pos); |
| 1033 | for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos); | ||
| 1034 | pos; | 1016 | pos; |
| 1035 | pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) { | 1017 | pos = sysfs_dir_next_pos(ns, parent_sd, ctx->pos, pos)) { |
| 1036 | const char * name; | 1018 | const char *name = pos->s_name; |
| 1037 | unsigned int type; | 1019 | unsigned int type = dt_type(pos); |
| 1038 | int len, ret; | 1020 | int len = strlen(name); |
| 1039 | 1021 | ino_t ino = pos->s_ino; | |
| 1040 | name = pos->s_name; | 1022 | ctx->pos = pos->s_hash; |
| 1041 | len = strlen(name); | 1023 | file->private_data = sysfs_get(pos); |
| 1042 | ino = pos->s_ino; | ||
| 1043 | type = dt_type(pos); | ||
| 1044 | off = filp->f_pos = pos->s_hash; | ||
| 1045 | filp->private_data = sysfs_get(pos); | ||
| 1046 | 1024 | ||
| 1047 | mutex_unlock(&sysfs_mutex); | 1025 | mutex_unlock(&sysfs_mutex); |
| 1048 | ret = filldir(dirent, name, len, off, ino, type); | 1026 | if (!dir_emit(ctx, name, len, ino, type)) |
| 1027 | return 0; | ||
| 1049 | mutex_lock(&sysfs_mutex); | 1028 | mutex_lock(&sysfs_mutex); |
| 1050 | if (ret < 0) | ||
| 1051 | break; | ||
| 1052 | } | 1029 | } |
| 1053 | mutex_unlock(&sysfs_mutex); | 1030 | mutex_unlock(&sysfs_mutex); |
| 1054 | 1031 | file->private_data = NULL; | |
| 1055 | /* don't reference last entry if its refcount is dropped */ | 1032 | ctx->pos = INT_MAX; |
| 1056 | if (!pos) { | ||
| 1057 | filp->private_data = NULL; | ||
| 1058 | |||
| 1059 | /* EOF and not changed as 0 or 1 in read/write path */ | ||
| 1060 | if (off == filp->f_pos && off > 1) | ||
| 1061 | filp->f_pos = INT_MAX; | ||
| 1062 | } | ||
| 1063 | return 0; | 1033 | return 0; |
| 1064 | } | 1034 | } |
| 1065 | 1035 | ||
| @@ -1077,7 +1047,7 @@ static loff_t sysfs_dir_llseek(struct file *file, loff_t offset, int whence) | |||
| 1077 | 1047 | ||
| 1078 | const struct file_operations sysfs_dir_operations = { | 1048 | const struct file_operations sysfs_dir_operations = { |
| 1079 | .read = generic_read_dir, | 1049 | .read = generic_read_dir, |
| 1080 | .readdir = sysfs_readdir, | 1050 | .iterate = sysfs_readdir, |
| 1081 | .release = sysfs_dir_release, | 1051 | .release = sysfs_dir_release, |
| 1082 | .llseek = sysfs_dir_llseek, | 1052 | .llseek = sysfs_dir_llseek, |
| 1083 | }; | 1053 | }; |
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 602f56db0442..d2bb7ed8fa74 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
| @@ -449,10 +449,12 @@ void sysfs_notify_dirent(struct sysfs_dirent *sd) | |||
| 449 | 449 | ||
| 450 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); | 450 | spin_lock_irqsave(&sysfs_open_dirent_lock, flags); |
| 451 | 451 | ||
| 452 | od = sd->s_attr.open; | 452 | if (!WARN_ON(sysfs_type(sd) != SYSFS_KOBJ_ATTR)) { |
| 453 | if (od) { | 453 | od = sd->s_attr.open; |
| 454 | atomic_inc(&od->event); | 454 | if (od) { |
| 455 | wake_up_interruptible(&od->poll); | 455 | atomic_inc(&od->event); |
| 456 | wake_up_interruptible(&od->poll); | ||
| 457 | } | ||
| 456 | } | 458 | } |
| 457 | 459 | ||
| 458 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); | 460 | spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); |
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 0ce3ccf7f401..3e2837a633ed 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c | |||
| @@ -24,8 +24,6 @@ | |||
| 24 | #include <linux/security.h> | 24 | #include <linux/security.h> |
| 25 | #include "sysfs.h" | 25 | #include "sysfs.h" |
| 26 | 26 | ||
| 27 | extern struct super_block * sysfs_sb; | ||
| 28 | |||
| 29 | static const struct address_space_operations sysfs_aops = { | 27 | static const struct address_space_operations sysfs_aops = { |
| 30 | .readpage = simple_readpage, | 28 | .readpage = simple_readpage, |
| 31 | .write_begin = simple_write_begin, | 29 | .write_begin = simple_write_begin, |
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index 3799e8dac3eb..d42291d08215 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c | |||
| @@ -18,12 +18,12 @@ | |||
| 18 | #include <linux/swap.h> | 18 | #include <linux/swap.h> |
| 19 | #include "sysv.h" | 19 | #include "sysv.h" |
| 20 | 20 | ||
| 21 | static int sysv_readdir(struct file *, void *, filldir_t); | 21 | static int sysv_readdir(struct file *, struct dir_context *); |
| 22 | 22 | ||
| 23 | const struct file_operations sysv_dir_operations = { | 23 | const struct file_operations sysv_dir_operations = { |
| 24 | .llseek = generic_file_llseek, | 24 | .llseek = generic_file_llseek, |
| 25 | .read = generic_read_dir, | 25 | .read = generic_read_dir, |
| 26 | .readdir = sysv_readdir, | 26 | .iterate = sysv_readdir, |
| 27 | .fsync = generic_file_fsync, | 27 | .fsync = generic_file_fsync, |
| 28 | }; | 28 | }; |
| 29 | 29 | ||
| @@ -65,18 +65,21 @@ static struct page * dir_get_page(struct inode *dir, unsigned long n) | |||
| 65 | return page; | 65 | return page; |
| 66 | } | 66 | } |
| 67 | 67 | ||
| 68 | static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) | 68 | static int sysv_readdir(struct file *file, struct dir_context *ctx) |
| 69 | { | 69 | { |
| 70 | unsigned long pos = filp->f_pos; | 70 | unsigned long pos = ctx->pos; |
| 71 | struct inode *inode = file_inode(filp); | 71 | struct inode *inode = file_inode(file); |
| 72 | struct super_block *sb = inode->i_sb; | 72 | struct super_block *sb = inode->i_sb; |
| 73 | unsigned offset = pos & ~PAGE_CACHE_MASK; | ||
| 74 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | ||
| 75 | unsigned long npages = dir_pages(inode); | 73 | unsigned long npages = dir_pages(inode); |
| 74 | unsigned offset; | ||
| 75 | unsigned long n; | ||
| 76 | 76 | ||
| 77 | pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); | 77 | ctx->pos = pos = (pos + SYSV_DIRSIZE-1) & ~(SYSV_DIRSIZE-1); |
| 78 | if (pos >= inode->i_size) | 78 | if (pos >= inode->i_size) |
| 79 | goto done; | 79 | return 0; |
| 80 | |||
| 81 | offset = pos & ~PAGE_CACHE_MASK; | ||
| 82 | n = pos >> PAGE_CACHE_SHIFT; | ||
| 80 | 83 | ||
| 81 | for ( ; n < npages; n++, offset = 0) { | 84 | for ( ; n < npages; n++, offset = 0) { |
| 82 | char *kaddr, *limit; | 85 | char *kaddr, *limit; |
| @@ -88,29 +91,21 @@ static int sysv_readdir(struct file * filp, void * dirent, filldir_t filldir) | |||
| 88 | kaddr = (char *)page_address(page); | 91 | kaddr = (char *)page_address(page); |
| 89 | de = (struct sysv_dir_entry *)(kaddr+offset); | 92 | de = (struct sysv_dir_entry *)(kaddr+offset); |
| 90 | limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE; | 93 | limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE; |
| 91 | for ( ;(char*)de <= limit; de++) { | 94 | for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) { |
| 92 | char *name = de->name; | 95 | char *name = de->name; |
| 93 | int over; | ||
| 94 | 96 | ||
| 95 | if (!de->inode) | 97 | if (!de->inode) |
| 96 | continue; | 98 | continue; |
| 97 | 99 | ||
| 98 | offset = (char *)de - kaddr; | 100 | if (!dir_emit(ctx, name, strnlen(name,SYSV_NAMELEN), |
| 99 | |||
| 100 | over = filldir(dirent, name, strnlen(name,SYSV_NAMELEN), | ||
| 101 | ((loff_t)n<<PAGE_CACHE_SHIFT) | offset, | ||
| 102 | fs16_to_cpu(SYSV_SB(sb), de->inode), | 101 | fs16_to_cpu(SYSV_SB(sb), de->inode), |
| 103 | DT_UNKNOWN); | 102 | DT_UNKNOWN)) { |
| 104 | if (over) { | ||
| 105 | dir_put_page(page); | 103 | dir_put_page(page); |
| 106 | goto done; | 104 | return 0; |
| 107 | } | 105 | } |
| 108 | } | 106 | } |
| 109 | dir_put_page(page); | 107 | dir_put_page(page); |
| 110 | } | 108 | } |
| 111 | |||
| 112 | done: | ||
| 113 | filp->f_pos = ((loff_t)n << PAGE_CACHE_SHIFT) | offset; | ||
| 114 | return 0; | 109 | return 0; |
| 115 | } | 110 | } |
| 116 | 111 | ||
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index de08c92f2e23..6b4947f75af7 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c | |||
| @@ -346,38 +346,46 @@ static unsigned int vfs_dent_type(uint8_t type) | |||
| 346 | * This means that UBIFS cannot support NFS which requires full | 346 | * This means that UBIFS cannot support NFS which requires full |
| 347 | * 'seekdir()'/'telldir()' support. | 347 | * 'seekdir()'/'telldir()' support. |
| 348 | */ | 348 | */ |
| 349 | static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | 349 | static int ubifs_readdir(struct file *file, struct dir_context *ctx) |
| 350 | { | 350 | { |
| 351 | int err, over = 0; | 351 | int err; |
| 352 | struct qstr nm; | 352 | struct qstr nm; |
| 353 | union ubifs_key key; | 353 | union ubifs_key key; |
| 354 | struct ubifs_dent_node *dent; | 354 | struct ubifs_dent_node *dent; |
| 355 | struct inode *dir = file_inode(file); | 355 | struct inode *dir = file_inode(file); |
| 356 | struct ubifs_info *c = dir->i_sb->s_fs_info; | 356 | struct ubifs_info *c = dir->i_sb->s_fs_info; |
| 357 | 357 | ||
| 358 | dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, file->f_pos); | 358 | dbg_gen("dir ino %lu, f_pos %#llx", dir->i_ino, ctx->pos); |
| 359 | 359 | ||
| 360 | if (file->f_pos > UBIFS_S_KEY_HASH_MASK || file->f_pos == 2) | 360 | if (ctx->pos > UBIFS_S_KEY_HASH_MASK || ctx->pos == 2) |
| 361 | /* | 361 | /* |
| 362 | * The directory was seek'ed to a senseless position or there | 362 | * The directory was seek'ed to a senseless position or there |
| 363 | * are no more entries. | 363 | * are no more entries. |
| 364 | */ | 364 | */ |
| 365 | return 0; | 365 | return 0; |
| 366 | 366 | ||
| 367 | /* File positions 0 and 1 correspond to "." and ".." */ | 367 | if (file->f_version == 0) { |
| 368 | if (file->f_pos == 0) { | 368 | /* |
| 369 | ubifs_assert(!file->private_data); | 369 | * The file was seek'ed, which means that @file->private_data |
| 370 | over = filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR); | 370 | * is now invalid. This may also be just the first |
| 371 | if (over) | 371 | * 'ubifs_readdir()' invocation, in which case |
| 372 | return 0; | 372 | * @file->private_data is NULL, and the below code is |
| 373 | file->f_pos = 1; | 373 | * basically a no-op. |
| 374 | */ | ||
| 375 | kfree(file->private_data); | ||
| 376 | file->private_data = NULL; | ||
| 374 | } | 377 | } |
| 375 | 378 | ||
| 376 | if (file->f_pos == 1) { | 379 | /* |
| 380 | * 'generic_file_llseek()' unconditionally sets @file->f_version to | ||
| 381 | * zero, and we use this for detecting whether the file was seek'ed. | ||
| 382 | */ | ||
| 383 | file->f_version = 1; | ||
| 384 | |||
| 385 | /* File positions 0 and 1 correspond to "." and ".." */ | ||
| 386 | if (ctx->pos < 2) { | ||
| 377 | ubifs_assert(!file->private_data); | 387 | ubifs_assert(!file->private_data); |
| 378 | over = filldir(dirent, "..", 2, 1, | 388 | if (!dir_emit_dots(file, ctx)) |
| 379 | parent_ino(file->f_path.dentry), DT_DIR); | ||
| 380 | if (over) | ||
| 381 | return 0; | 389 | return 0; |
| 382 | 390 | ||
| 383 | /* Find the first entry in TNC and save it */ | 391 | /* Find the first entry in TNC and save it */ |
| @@ -389,7 +397,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 389 | goto out; | 397 | goto out; |
| 390 | } | 398 | } |
| 391 | 399 | ||
| 392 | file->f_pos = key_hash_flash(c, &dent->key); | 400 | ctx->pos = key_hash_flash(c, &dent->key); |
| 393 | file->private_data = dent; | 401 | file->private_data = dent; |
| 394 | } | 402 | } |
| 395 | 403 | ||
| @@ -397,17 +405,16 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 397 | if (!dent) { | 405 | if (!dent) { |
| 398 | /* | 406 | /* |
| 399 | * The directory was seek'ed to and is now readdir'ed. | 407 | * The directory was seek'ed to and is now readdir'ed. |
| 400 | * Find the entry corresponding to @file->f_pos or the | 408 | * Find the entry corresponding to @ctx->pos or the closest one. |
| 401 | * closest one. | ||
| 402 | */ | 409 | */ |
| 403 | dent_key_init_hash(c, &key, dir->i_ino, file->f_pos); | 410 | dent_key_init_hash(c, &key, dir->i_ino, ctx->pos); |
| 404 | nm.name = NULL; | 411 | nm.name = NULL; |
| 405 | dent = ubifs_tnc_next_ent(c, &key, &nm); | 412 | dent = ubifs_tnc_next_ent(c, &key, &nm); |
| 406 | if (IS_ERR(dent)) { | 413 | if (IS_ERR(dent)) { |
| 407 | err = PTR_ERR(dent); | 414 | err = PTR_ERR(dent); |
| 408 | goto out; | 415 | goto out; |
| 409 | } | 416 | } |
| 410 | file->f_pos = key_hash_flash(c, &dent->key); | 417 | ctx->pos = key_hash_flash(c, &dent->key); |
| 411 | file->private_data = dent; | 418 | file->private_data = dent; |
| 412 | } | 419 | } |
| 413 | 420 | ||
| @@ -419,10 +426,9 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 419 | ubifs_inode(dir)->creat_sqnum); | 426 | ubifs_inode(dir)->creat_sqnum); |
| 420 | 427 | ||
| 421 | nm.len = le16_to_cpu(dent->nlen); | 428 | nm.len = le16_to_cpu(dent->nlen); |
| 422 | over = filldir(dirent, dent->name, nm.len, file->f_pos, | 429 | if (!dir_emit(ctx, dent->name, nm.len, |
| 423 | le64_to_cpu(dent->inum), | 430 | le64_to_cpu(dent->inum), |
| 424 | vfs_dent_type(dent->type)); | 431 | vfs_dent_type(dent->type))) |
| 425 | if (over) | ||
| 426 | return 0; | 432 | return 0; |
| 427 | 433 | ||
| 428 | /* Switch to the next entry */ | 434 | /* Switch to the next entry */ |
| @@ -435,7 +441,7 @@ static int ubifs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 435 | } | 441 | } |
| 436 | 442 | ||
| 437 | kfree(file->private_data); | 443 | kfree(file->private_data); |
| 438 | file->f_pos = key_hash_flash(c, &dent->key); | 444 | ctx->pos = key_hash_flash(c, &dent->key); |
| 439 | file->private_data = dent; | 445 | file->private_data = dent; |
| 440 | cond_resched(); | 446 | cond_resched(); |
| 441 | } | 447 | } |
| @@ -448,18 +454,11 @@ out: | |||
| 448 | 454 | ||
| 449 | kfree(file->private_data); | 455 | kfree(file->private_data); |
| 450 | file->private_data = NULL; | 456 | file->private_data = NULL; |
| 451 | file->f_pos = 2; | 457 | /* 2 is a special value indicating that there are no more direntries */ |
| 458 | ctx->pos = 2; | ||
| 452 | return 0; | 459 | return 0; |
| 453 | } | 460 | } |
| 454 | 461 | ||
| 455 | /* If a directory is seeked, we have to free saved readdir() state */ | ||
| 456 | static loff_t ubifs_dir_llseek(struct file *file, loff_t offset, int whence) | ||
| 457 | { | ||
| 458 | kfree(file->private_data); | ||
| 459 | file->private_data = NULL; | ||
| 460 | return generic_file_llseek(file, offset, whence); | ||
| 461 | } | ||
| 462 | |||
| 463 | /* Free saved readdir() state when the directory is closed */ | 462 | /* Free saved readdir() state when the directory is closed */ |
| 464 | static int ubifs_dir_release(struct inode *dir, struct file *file) | 463 | static int ubifs_dir_release(struct inode *dir, struct file *file) |
| 465 | { | 464 | { |
| @@ -1177,10 +1176,10 @@ const struct inode_operations ubifs_dir_inode_operations = { | |||
| 1177 | }; | 1176 | }; |
| 1178 | 1177 | ||
| 1179 | const struct file_operations ubifs_dir_operations = { | 1178 | const struct file_operations ubifs_dir_operations = { |
| 1180 | .llseek = ubifs_dir_llseek, | 1179 | .llseek = generic_file_llseek, |
| 1181 | .release = ubifs_dir_release, | 1180 | .release = ubifs_dir_release, |
| 1182 | .read = generic_read_dir, | 1181 | .read = generic_read_dir, |
| 1183 | .readdir = ubifs_readdir, | 1182 | .iterate = ubifs_readdir, |
| 1184 | .fsync = ubifs_fsync, | 1183 | .fsync = ubifs_fsync, |
| 1185 | .unlocked_ioctl = ubifs_ioctl, | 1184 | .unlocked_ioctl = ubifs_ioctl, |
| 1186 | #ifdef CONFIG_COMPAT | 1185 | #ifdef CONFIG_COMPAT |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 14374530784c..123c79b7261e 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -1277,13 +1277,14 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1277 | return err; | 1277 | return err; |
| 1278 | } | 1278 | } |
| 1279 | 1279 | ||
| 1280 | static void ubifs_invalidatepage(struct page *page, unsigned long offset) | 1280 | static void ubifs_invalidatepage(struct page *page, unsigned int offset, |
| 1281 | unsigned int length) | ||
| 1281 | { | 1282 | { |
| 1282 | struct inode *inode = page->mapping->host; | 1283 | struct inode *inode = page->mapping->host; |
| 1283 | struct ubifs_info *c = inode->i_sb->s_fs_info; | 1284 | struct ubifs_info *c = inode->i_sb->s_fs_info; |
| 1284 | 1285 | ||
| 1285 | ubifs_assert(PagePrivate(page)); | 1286 | ubifs_assert(PagePrivate(page)); |
| 1286 | if (offset) | 1287 | if (offset || length < PAGE_CACHE_SIZE) |
| 1287 | /* Partial page remains dirty */ | 1288 | /* Partial page remains dirty */ |
| 1288 | return; | 1289 | return; |
| 1289 | 1290 | ||
diff --git a/fs/udf/dir.c b/fs/udf/dir.c index b3e93f5e17c3..a012c51caffd 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c | |||
| @@ -35,14 +35,16 @@ | |||
| 35 | #include "udf_i.h" | 35 | #include "udf_i.h" |
| 36 | #include "udf_sb.h" | 36 | #include "udf_sb.h" |
| 37 | 37 | ||
| 38 | static int do_udf_readdir(struct inode *dir, struct file *filp, | 38 | |
| 39 | filldir_t filldir, void *dirent) | 39 | static int udf_readdir(struct file *file, struct dir_context *ctx) |
| 40 | { | 40 | { |
| 41 | struct inode *dir = file_inode(file); | ||
| 42 | struct udf_inode_info *iinfo = UDF_I(dir); | ||
| 41 | struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; | 43 | struct udf_fileident_bh fibh = { .sbh = NULL, .ebh = NULL}; |
| 42 | struct fileIdentDesc *fi = NULL; | 44 | struct fileIdentDesc *fi = NULL; |
| 43 | struct fileIdentDesc cfi; | 45 | struct fileIdentDesc cfi; |
| 44 | int block, iblock; | 46 | int block, iblock; |
| 45 | loff_t nf_pos = (filp->f_pos - 1) << 2; | 47 | loff_t nf_pos; |
| 46 | int flen; | 48 | int flen; |
| 47 | unsigned char *fname = NULL; | 49 | unsigned char *fname = NULL; |
| 48 | unsigned char *nameptr; | 50 | unsigned char *nameptr; |
| @@ -54,10 +56,14 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
| 54 | uint32_t elen; | 56 | uint32_t elen; |
| 55 | sector_t offset; | 57 | sector_t offset; |
| 56 | int i, num, ret = 0; | 58 | int i, num, ret = 0; |
| 57 | unsigned int dt_type; | ||
| 58 | struct extent_position epos = { NULL, 0, {0, 0} }; | 59 | struct extent_position epos = { NULL, 0, {0, 0} }; |
| 59 | struct udf_inode_info *iinfo; | ||
| 60 | 60 | ||
| 61 | if (ctx->pos == 0) { | ||
| 62 | if (!dir_emit_dot(file, ctx)) | ||
| 63 | return 0; | ||
| 64 | ctx->pos = 1; | ||
| 65 | } | ||
| 66 | nf_pos = (ctx->pos - 1) << 2; | ||
| 61 | if (nf_pos >= size) | 67 | if (nf_pos >= size) |
| 62 | goto out; | 68 | goto out; |
| 63 | 69 | ||
| @@ -71,7 +77,6 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
| 71 | nf_pos = udf_ext0_offset(dir); | 77 | nf_pos = udf_ext0_offset(dir); |
| 72 | 78 | ||
| 73 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); | 79 | fibh.soffset = fibh.eoffset = nf_pos & (dir->i_sb->s_blocksize - 1); |
| 74 | iinfo = UDF_I(dir); | ||
| 75 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { | 80 | if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) { |
| 76 | if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, | 81 | if (inode_bmap(dir, nf_pos >> dir->i_sb->s_blocksize_bits, |
| 77 | &epos, &eloc, &elen, &offset) | 82 | &epos, &eloc, &elen, &offset) |
| @@ -116,7 +121,9 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
| 116 | } | 121 | } |
| 117 | 122 | ||
| 118 | while (nf_pos < size) { | 123 | while (nf_pos < size) { |
| 119 | filp->f_pos = (nf_pos >> 2) + 1; | 124 | struct kernel_lb_addr tloc; |
| 125 | |||
| 126 | ctx->pos = (nf_pos >> 2) + 1; | ||
| 120 | 127 | ||
| 121 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, | 128 | fi = udf_fileident_read(dir, &nf_pos, &fibh, &cfi, &epos, &eloc, |
| 122 | &elen, &offset); | 129 | &elen, &offset); |
| @@ -155,24 +162,22 @@ static int do_udf_readdir(struct inode *dir, struct file *filp, | |||
| 155 | } | 162 | } |
| 156 | 163 | ||
| 157 | if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) { | 164 | if (cfi.fileCharacteristics & FID_FILE_CHAR_PARENT) { |
| 158 | iblock = parent_ino(filp->f_path.dentry); | 165 | if (!dir_emit_dotdot(file, ctx)) |
| 159 | flen = 2; | 166 | goto out; |
| 160 | memcpy(fname, "..", flen); | 167 | continue; |
| 161 | dt_type = DT_DIR; | ||
| 162 | } else { | ||
| 163 | struct kernel_lb_addr tloc = lelb_to_cpu(cfi.icb.extLocation); | ||
| 164 | |||
| 165 | iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); | ||
| 166 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); | ||
| 167 | dt_type = DT_UNKNOWN; | ||
| 168 | } | 168 | } |
| 169 | 169 | ||
| 170 | if (flen && filldir(dirent, fname, flen, filp->f_pos, | 170 | flen = udf_get_filename(dir->i_sb, nameptr, fname, lfi); |
| 171 | iblock, dt_type) < 0) | 171 | if (!flen) |
| 172 | continue; | ||
| 173 | |||
| 174 | tloc = lelb_to_cpu(cfi.icb.extLocation); | ||
| 175 | iblock = udf_get_lb_pblock(dir->i_sb, &tloc, 0); | ||
| 176 | if (!dir_emit(ctx, fname, flen, iblock, DT_UNKNOWN)) | ||
| 172 | goto out; | 177 | goto out; |
| 173 | } /* end while */ | 178 | } /* end while */ |
| 174 | 179 | ||
| 175 | filp->f_pos = (nf_pos >> 2) + 1; | 180 | ctx->pos = (nf_pos >> 2) + 1; |
| 176 | 181 | ||
| 177 | out: | 182 | out: |
| 178 | if (fibh.sbh != fibh.ebh) | 183 | if (fibh.sbh != fibh.ebh) |
| @@ -184,27 +189,11 @@ out: | |||
| 184 | return ret; | 189 | return ret; |
| 185 | } | 190 | } |
| 186 | 191 | ||
| 187 | static int udf_readdir(struct file *filp, void *dirent, filldir_t filldir) | ||
| 188 | { | ||
| 189 | struct inode *dir = file_inode(filp); | ||
| 190 | int result; | ||
| 191 | |||
| 192 | if (filp->f_pos == 0) { | ||
| 193 | if (filldir(dirent, ".", 1, filp->f_pos, dir->i_ino, DT_DIR) < 0) { | ||
| 194 | return 0; | ||
| 195 | } | ||
| 196 | filp->f_pos++; | ||
| 197 | } | ||
| 198 | |||
| 199 | result = do_udf_readdir(dir, filp, filldir, dirent); | ||
| 200 | return result; | ||
| 201 | } | ||
| 202 | |||
| 203 | /* readdir and lookup functions */ | 192 | /* readdir and lookup functions */ |
| 204 | const struct file_operations udf_dir_operations = { | 193 | const struct file_operations udf_dir_operations = { |
| 205 | .llseek = generic_file_llseek, | 194 | .llseek = generic_file_llseek, |
| 206 | .read = generic_read_dir, | 195 | .read = generic_read_dir, |
| 207 | .readdir = udf_readdir, | 196 | .iterate = udf_readdir, |
| 208 | .unlocked_ioctl = udf_ioctl, | 197 | .unlocked_ioctl = udf_ioctl, |
| 209 | .fsync = generic_file_fsync, | 198 | .fsync = generic_file_fsync, |
| 210 | }; | 199 | }; |
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 3a75ca09c506..0ecc2cebed8f 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c | |||
| @@ -430,16 +430,16 @@ ufs_validate_entry(struct super_block *sb, char *base, | |||
| 430 | * This is blatantly stolen from ext2fs | 430 | * This is blatantly stolen from ext2fs |
| 431 | */ | 431 | */ |
| 432 | static int | 432 | static int |
| 433 | ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | 433 | ufs_readdir(struct file *file, struct dir_context *ctx) |
| 434 | { | 434 | { |
| 435 | loff_t pos = filp->f_pos; | 435 | loff_t pos = ctx->pos; |
| 436 | struct inode *inode = file_inode(filp); | 436 | struct inode *inode = file_inode(file); |
| 437 | struct super_block *sb = inode->i_sb; | 437 | struct super_block *sb = inode->i_sb; |
| 438 | unsigned int offset = pos & ~PAGE_CACHE_MASK; | 438 | unsigned int offset = pos & ~PAGE_CACHE_MASK; |
| 439 | unsigned long n = pos >> PAGE_CACHE_SHIFT; | 439 | unsigned long n = pos >> PAGE_CACHE_SHIFT; |
| 440 | unsigned long npages = ufs_dir_pages(inode); | 440 | unsigned long npages = ufs_dir_pages(inode); |
| 441 | unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); | 441 | unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1); |
| 442 | int need_revalidate = filp->f_version != inode->i_version; | 442 | int need_revalidate = file->f_version != inode->i_version; |
| 443 | unsigned flags = UFS_SB(sb)->s_flags; | 443 | unsigned flags = UFS_SB(sb)->s_flags; |
| 444 | 444 | ||
| 445 | UFSD("BEGIN\n"); | 445 | UFSD("BEGIN\n"); |
| @@ -457,16 +457,16 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 457 | ufs_error(sb, __func__, | 457 | ufs_error(sb, __func__, |
| 458 | "bad page in #%lu", | 458 | "bad page in #%lu", |
| 459 | inode->i_ino); | 459 | inode->i_ino); |
| 460 | filp->f_pos += PAGE_CACHE_SIZE - offset; | 460 | ctx->pos += PAGE_CACHE_SIZE - offset; |
| 461 | return -EIO; | 461 | return -EIO; |
| 462 | } | 462 | } |
| 463 | kaddr = page_address(page); | 463 | kaddr = page_address(page); |
| 464 | if (unlikely(need_revalidate)) { | 464 | if (unlikely(need_revalidate)) { |
| 465 | if (offset) { | 465 | if (offset) { |
| 466 | offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); | 466 | offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask); |
| 467 | filp->f_pos = (n<<PAGE_CACHE_SHIFT) + offset; | 467 | ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset; |
| 468 | } | 468 | } |
| 469 | filp->f_version = inode->i_version; | 469 | file->f_version = inode->i_version; |
| 470 | need_revalidate = 0; | 470 | need_revalidate = 0; |
| 471 | } | 471 | } |
| 472 | de = (struct ufs_dir_entry *)(kaddr+offset); | 472 | de = (struct ufs_dir_entry *)(kaddr+offset); |
| @@ -479,11 +479,8 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 479 | return -EIO; | 479 | return -EIO; |
| 480 | } | 480 | } |
| 481 | if (de->d_ino) { | 481 | if (de->d_ino) { |
| 482 | int over; | ||
| 483 | unsigned char d_type = DT_UNKNOWN; | 482 | unsigned char d_type = DT_UNKNOWN; |
| 484 | 483 | ||
| 485 | offset = (char *)de - kaddr; | ||
| 486 | |||
| 487 | UFSD("filldir(%s,%u)\n", de->d_name, | 484 | UFSD("filldir(%s,%u)\n", de->d_name, |
| 488 | fs32_to_cpu(sb, de->d_ino)); | 485 | fs32_to_cpu(sb, de->d_ino)); |
| 489 | UFSD("namlen %u\n", ufs_get_de_namlen(sb, de)); | 486 | UFSD("namlen %u\n", ufs_get_de_namlen(sb, de)); |
| @@ -491,16 +488,15 @@ ufs_readdir(struct file *filp, void *dirent, filldir_t filldir) | |||
| 491 | if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) | 488 | if ((flags & UFS_DE_MASK) == UFS_DE_44BSD) |
| 492 | d_type = de->d_u.d_44.d_type; | 489 | d_type = de->d_u.d_44.d_type; |
| 493 | 490 | ||
| 494 | over = filldir(dirent, de->d_name, | 491 | if (!dir_emit(ctx, de->d_name, |
| 495 | ufs_get_de_namlen(sb, de), | 492 | ufs_get_de_namlen(sb, de), |
| 496 | (n<<PAGE_CACHE_SHIFT) | offset, | 493 | fs32_to_cpu(sb, de->d_ino), |
| 497 | fs32_to_cpu(sb, de->d_ino), d_type); | 494 | d_type)) { |
| 498 | if (over) { | ||
| 499 | ufs_put_page(page); | 495 | ufs_put_page(page); |
| 500 | return 0; | 496 | return 0; |
| 501 | } | 497 | } |
| 502 | } | 498 | } |
| 503 | filp->f_pos += fs16_to_cpu(sb, de->d_reclen); | 499 | ctx->pos += fs16_to_cpu(sb, de->d_reclen); |
| 504 | } | 500 | } |
| 505 | ufs_put_page(page); | 501 | ufs_put_page(page); |
| 506 | } | 502 | } |
| @@ -660,7 +656,7 @@ not_empty: | |||
| 660 | 656 | ||
| 661 | const struct file_operations ufs_dir_operations = { | 657 | const struct file_operations ufs_dir_operations = { |
| 662 | .read = generic_read_dir, | 658 | .read = generic_read_dir, |
| 663 | .readdir = ufs_readdir, | 659 | .iterate = ufs_readdir, |
| 664 | .fsync = generic_file_fsync, | 660 | .fsync = generic_file_fsync, |
| 665 | .llseek = generic_file_llseek, | 661 | .llseek = generic_file_llseek, |
| 666 | }; | 662 | }; |
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 41a695048be7..596ec71da00e 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c | |||
| @@ -843,10 +843,12 @@ xfs_cluster_write( | |||
| 843 | STATIC void | 843 | STATIC void |
| 844 | xfs_vm_invalidatepage( | 844 | xfs_vm_invalidatepage( |
| 845 | struct page *page, | 845 | struct page *page, |
| 846 | unsigned long offset) | 846 | unsigned int offset, |
| 847 | unsigned int length) | ||
| 847 | { | 848 | { |
| 848 | trace_xfs_invalidatepage(page->mapping->host, page, offset); | 849 | trace_xfs_invalidatepage(page->mapping->host, page, offset, |
| 849 | block_invalidatepage(page, offset); | 850 | length); |
| 851 | block_invalidatepage(page, offset, length); | ||
| 850 | } | 852 | } |
| 851 | 853 | ||
| 852 | /* | 854 | /* |
| @@ -910,7 +912,7 @@ next_buffer: | |||
| 910 | 912 | ||
| 911 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | 913 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
| 912 | out_invalidate: | 914 | out_invalidate: |
| 913 | xfs_vm_invalidatepage(page, 0); | 915 | xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE); |
| 914 | return; | 916 | return; |
| 915 | } | 917 | } |
| 916 | 918 | ||
| @@ -940,7 +942,7 @@ xfs_vm_writepage( | |||
| 940 | int count = 0; | 942 | int count = 0; |
| 941 | int nonblocking = 0; | 943 | int nonblocking = 0; |
| 942 | 944 | ||
| 943 | trace_xfs_writepage(inode, page, 0); | 945 | trace_xfs_writepage(inode, page, 0, 0); |
| 944 | 946 | ||
| 945 | ASSERT(page_has_buffers(page)); | 947 | ASSERT(page_has_buffers(page)); |
| 946 | 948 | ||
| @@ -1171,7 +1173,7 @@ xfs_vm_releasepage( | |||
| 1171 | { | 1173 | { |
| 1172 | int delalloc, unwritten; | 1174 | int delalloc, unwritten; |
| 1173 | 1175 | ||
| 1174 | trace_xfs_releasepage(page->mapping->host, page, 0); | 1176 | trace_xfs_releasepage(page->mapping->host, page, 0, 0); |
| 1175 | 1177 | ||
| 1176 | xfs_count_page_state(page, &delalloc, &unwritten); | 1178 | xfs_count_page_state(page, &delalloc, &unwritten); |
| 1177 | 1179 | ||
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index b26a50f9921d..8f023dee404d 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c | |||
| @@ -368,10 +368,8 @@ xfs_dir_removename( | |||
| 368 | int | 368 | int |
| 369 | xfs_readdir( | 369 | xfs_readdir( |
| 370 | xfs_inode_t *dp, | 370 | xfs_inode_t *dp, |
| 371 | void *dirent, | 371 | struct dir_context *ctx, |
| 372 | size_t bufsize, | 372 | size_t bufsize) |
| 373 | xfs_off_t *offset, | ||
| 374 | filldir_t filldir) | ||
| 375 | { | 373 | { |
| 376 | int rval; /* return value */ | 374 | int rval; /* return value */ |
| 377 | int v; /* type-checking value */ | 375 | int v; /* type-checking value */ |
| @@ -385,14 +383,13 @@ xfs_readdir( | |||
| 385 | XFS_STATS_INC(xs_dir_getdents); | 383 | XFS_STATS_INC(xs_dir_getdents); |
| 386 | 384 | ||
| 387 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) | 385 | if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) |
| 388 | rval = xfs_dir2_sf_getdents(dp, dirent, offset, filldir); | 386 | rval = xfs_dir2_sf_getdents(dp, ctx); |
| 389 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) | 387 | else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) |
| 390 | ; | 388 | ; |
| 391 | else if (v) | 389 | else if (v) |
| 392 | rval = xfs_dir2_block_getdents(dp, dirent, offset, filldir); | 390 | rval = xfs_dir2_block_getdents(dp, ctx); |
| 393 | else | 391 | else |
| 394 | rval = xfs_dir2_leaf_getdents(dp, dirent, bufsize, offset, | 392 | rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); |
| 395 | filldir); | ||
| 396 | return rval; | 393 | return rval; |
| 397 | } | 394 | } |
| 398 | 395 | ||
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index e59f5fc816fe..09aea0247d96 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c | |||
| @@ -569,9 +569,7 @@ xfs_dir2_block_addname( | |||
| 569 | int /* error */ | 569 | int /* error */ |
| 570 | xfs_dir2_block_getdents( | 570 | xfs_dir2_block_getdents( |
| 571 | xfs_inode_t *dp, /* incore inode */ | 571 | xfs_inode_t *dp, /* incore inode */ |
| 572 | void *dirent, | 572 | struct dir_context *ctx) |
| 573 | xfs_off_t *offset, | ||
| 574 | filldir_t filldir) | ||
| 575 | { | 573 | { |
| 576 | xfs_dir2_data_hdr_t *hdr; /* block header */ | 574 | xfs_dir2_data_hdr_t *hdr; /* block header */ |
| 577 | struct xfs_buf *bp; /* buffer for block */ | 575 | struct xfs_buf *bp; /* buffer for block */ |
| @@ -589,7 +587,7 @@ xfs_dir2_block_getdents( | |||
| 589 | /* | 587 | /* |
| 590 | * If the block number in the offset is out of range, we're done. | 588 | * If the block number in the offset is out of range, we're done. |
| 591 | */ | 589 | */ |
| 592 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) | 590 | if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) |
| 593 | return 0; | 591 | return 0; |
| 594 | 592 | ||
| 595 | error = xfs_dir3_block_read(NULL, dp, &bp); | 593 | error = xfs_dir3_block_read(NULL, dp, &bp); |
| @@ -600,7 +598,7 @@ xfs_dir2_block_getdents( | |||
| 600 | * Extract the byte offset we start at from the seek pointer. | 598 | * Extract the byte offset we start at from the seek pointer. |
| 601 | * We'll skip entries before this. | 599 | * We'll skip entries before this. |
| 602 | */ | 600 | */ |
| 603 | wantoff = xfs_dir2_dataptr_to_off(mp, *offset); | 601 | wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos); |
| 604 | hdr = bp->b_addr; | 602 | hdr = bp->b_addr; |
| 605 | xfs_dir3_data_check(dp, bp); | 603 | xfs_dir3_data_check(dp, bp); |
| 606 | /* | 604 | /* |
| @@ -639,13 +637,12 @@ xfs_dir2_block_getdents( | |||
| 639 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 637 | cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
| 640 | (char *)dep - (char *)hdr); | 638 | (char *)dep - (char *)hdr); |
| 641 | 639 | ||
| 640 | ctx->pos = cook & 0x7fffffff; | ||
| 642 | /* | 641 | /* |
| 643 | * If it didn't fit, set the final offset to here & return. | 642 | * If it didn't fit, set the final offset to here & return. |
| 644 | */ | 643 | */ |
| 645 | if (filldir(dirent, (char *)dep->name, dep->namelen, | 644 | if (!dir_emit(ctx, (char *)dep->name, dep->namelen, |
| 646 | cook & 0x7fffffff, be64_to_cpu(dep->inumber), | 645 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) { |
| 647 | DT_UNKNOWN)) { | ||
| 648 | *offset = cook & 0x7fffffff; | ||
| 649 | xfs_trans_brelse(NULL, bp); | 646 | xfs_trans_brelse(NULL, bp); |
| 650 | return 0; | 647 | return 0; |
| 651 | } | 648 | } |
| @@ -655,7 +652,7 @@ xfs_dir2_block_getdents( | |||
| 655 | * Reached the end of the block. | 652 | * Reached the end of the block. |
| 656 | * Set the offset to a non-existent block 1 and return. | 653 | * Set the offset to a non-existent block 1 and return. |
| 657 | */ | 654 | */ |
| 658 | *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & | 655 | ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & |
| 659 | 0x7fffffff; | 656 | 0x7fffffff; |
| 660 | xfs_trans_brelse(NULL, bp); | 657 | xfs_trans_brelse(NULL, bp); |
| 661 | return 0; | 658 | return 0; |
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index da71a1819d78..e0cc1243a8aa 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c | |||
| @@ -1300,10 +1300,8 @@ out: | |||
| 1300 | int /* error */ | 1300 | int /* error */ |
| 1301 | xfs_dir2_leaf_getdents( | 1301 | xfs_dir2_leaf_getdents( |
| 1302 | xfs_inode_t *dp, /* incore directory inode */ | 1302 | xfs_inode_t *dp, /* incore directory inode */ |
| 1303 | void *dirent, | 1303 | struct dir_context *ctx, |
| 1304 | size_t bufsize, | 1304 | size_t bufsize) |
| 1305 | xfs_off_t *offset, | ||
| 1306 | filldir_t filldir) | ||
| 1307 | { | 1305 | { |
| 1308 | struct xfs_buf *bp = NULL; /* data block buffer */ | 1306 | struct xfs_buf *bp = NULL; /* data block buffer */ |
| 1309 | xfs_dir2_data_hdr_t *hdr; /* data block header */ | 1307 | xfs_dir2_data_hdr_t *hdr; /* data block header */ |
| @@ -1322,7 +1320,7 @@ xfs_dir2_leaf_getdents( | |||
| 1322 | * If the offset is at or past the largest allowed value, | 1320 | * If the offset is at or past the largest allowed value, |
| 1323 | * give up right away. | 1321 | * give up right away. |
| 1324 | */ | 1322 | */ |
| 1325 | if (*offset >= XFS_DIR2_MAX_DATAPTR) | 1323 | if (ctx->pos >= XFS_DIR2_MAX_DATAPTR) |
| 1326 | return 0; | 1324 | return 0; |
| 1327 | 1325 | ||
| 1328 | mp = dp->i_mount; | 1326 | mp = dp->i_mount; |
| @@ -1343,7 +1341,7 @@ xfs_dir2_leaf_getdents( | |||
| 1343 | * Inside the loop we keep the main offset value as a byte offset | 1341 | * Inside the loop we keep the main offset value as a byte offset |
| 1344 | * in the directory file. | 1342 | * in the directory file. |
| 1345 | */ | 1343 | */ |
| 1346 | curoff = xfs_dir2_dataptr_to_byte(mp, *offset); | 1344 | curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); |
| 1347 | 1345 | ||
| 1348 | /* | 1346 | /* |
| 1349 | * Force this conversion through db so we truncate the offset | 1347 | * Force this conversion through db so we truncate the offset |
| @@ -1444,8 +1442,8 @@ xfs_dir2_leaf_getdents( | |||
| 1444 | dep = (xfs_dir2_data_entry_t *)ptr; | 1442 | dep = (xfs_dir2_data_entry_t *)ptr; |
| 1445 | length = xfs_dir2_data_entsize(dep->namelen); | 1443 | length = xfs_dir2_data_entsize(dep->namelen); |
| 1446 | 1444 | ||
| 1447 | if (filldir(dirent, (char *)dep->name, dep->namelen, | 1445 | ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; |
| 1448 | xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff, | 1446 | if (!dir_emit(ctx, (char *)dep->name, dep->namelen, |
| 1449 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) | 1447 | be64_to_cpu(dep->inumber), DT_UNKNOWN)) |
| 1450 | break; | 1448 | break; |
| 1451 | 1449 | ||
| @@ -1462,9 +1460,9 @@ xfs_dir2_leaf_getdents( | |||
| 1462 | * All done. Set output offset value to current offset. | 1460 | * All done. Set output offset value to current offset. |
| 1463 | */ | 1461 | */ |
| 1464 | if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) | 1462 | if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) |
| 1465 | *offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; | 1463 | ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff; |
| 1466 | else | 1464 | else |
| 1467 | *offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; | 1465 | ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; |
| 1468 | kmem_free(map_info); | 1466 | kmem_free(map_info); |
| 1469 | if (bp) | 1467 | if (bp) |
| 1470 | xfs_trans_brelse(NULL, bp); | 1468 | xfs_trans_brelse(NULL, bp); |
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 7cf573c88aad..0511cda4a712 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h | |||
| @@ -33,8 +33,8 @@ extern int xfs_dir_cilookup_result(struct xfs_da_args *args, | |||
| 33 | extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; | 33 | extern const struct xfs_buf_ops xfs_dir3_block_buf_ops; |
| 34 | 34 | ||
| 35 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); | 35 | extern int xfs_dir2_block_addname(struct xfs_da_args *args); |
| 36 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, void *dirent, | 36 | extern int xfs_dir2_block_getdents(struct xfs_inode *dp, |
| 37 | xfs_off_t *offset, filldir_t filldir); | 37 | struct dir_context *ctx); |
| 38 | extern int xfs_dir2_block_lookup(struct xfs_da_args *args); | 38 | extern int xfs_dir2_block_lookup(struct xfs_da_args *args); |
| 39 | extern int xfs_dir2_block_removename(struct xfs_da_args *args); | 39 | extern int xfs_dir2_block_removename(struct xfs_da_args *args); |
| 40 | extern int xfs_dir2_block_replace(struct xfs_da_args *args); | 40 | extern int xfs_dir2_block_replace(struct xfs_da_args *args); |
| @@ -91,8 +91,8 @@ extern void xfs_dir3_leaf_compact(struct xfs_da_args *args, | |||
| 91 | extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, | 91 | extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr, |
| 92 | struct xfs_dir2_leaf_entry *ents, int *indexp, | 92 | struct xfs_dir2_leaf_entry *ents, int *indexp, |
| 93 | int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); | 93 | int *lowstalep, int *highstalep, int *lowlogp, int *highlogp); |
| 94 | extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent, | 94 | extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, struct dir_context *ctx, |
| 95 | size_t bufsize, xfs_off_t *offset, filldir_t filldir); | 95 | size_t bufsize); |
| 96 | extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, | 96 | extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno, |
| 97 | struct xfs_buf **bpp, __uint16_t magic); | 97 | struct xfs_buf **bpp, __uint16_t magic); |
| 98 | extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, | 98 | extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, |
| @@ -153,8 +153,7 @@ extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp, | |||
| 153 | int size, xfs_dir2_sf_hdr_t *sfhp); | 153 | int size, xfs_dir2_sf_hdr_t *sfhp); |
| 154 | extern int xfs_dir2_sf_addname(struct xfs_da_args *args); | 154 | extern int xfs_dir2_sf_addname(struct xfs_da_args *args); |
| 155 | extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); | 155 | extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino); |
| 156 | extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, void *dirent, | 156 | extern int xfs_dir2_sf_getdents(struct xfs_inode *dp, struct dir_context *ctx); |
| 157 | xfs_off_t *offset, filldir_t filldir); | ||
| 158 | extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); | 157 | extern int xfs_dir2_sf_lookup(struct xfs_da_args *args); |
| 159 | extern int xfs_dir2_sf_removename(struct xfs_da_args *args); | 158 | extern int xfs_dir2_sf_removename(struct xfs_da_args *args); |
| 160 | extern int xfs_dir2_sf_replace(struct xfs_da_args *args); | 159 | extern int xfs_dir2_sf_replace(struct xfs_da_args *args); |
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index 6157424dbf8f..97676a347da1 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c | |||
| @@ -768,9 +768,7 @@ xfs_dir2_sf_create( | |||
| 768 | int /* error */ | 768 | int /* error */ |
| 769 | xfs_dir2_sf_getdents( | 769 | xfs_dir2_sf_getdents( |
| 770 | xfs_inode_t *dp, /* incore directory inode */ | 770 | xfs_inode_t *dp, /* incore directory inode */ |
| 771 | void *dirent, | 771 | struct dir_context *ctx) |
| 772 | xfs_off_t *offset, | ||
| 773 | filldir_t filldir) | ||
| 774 | { | 772 | { |
| 775 | int i; /* shortform entry number */ | 773 | int i; /* shortform entry number */ |
| 776 | xfs_mount_t *mp; /* filesystem mount point */ | 774 | xfs_mount_t *mp; /* filesystem mount point */ |
| @@ -802,7 +800,7 @@ xfs_dir2_sf_getdents( | |||
| 802 | /* | 800 | /* |
| 803 | * If the block number in the offset is out of range, we're done. | 801 | * If the block number in the offset is out of range, we're done. |
| 804 | */ | 802 | */ |
| 805 | if (xfs_dir2_dataptr_to_db(mp, *offset) > mp->m_dirdatablk) | 803 | if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) |
| 806 | return 0; | 804 | return 0; |
| 807 | 805 | ||
| 808 | /* | 806 | /* |
| @@ -819,22 +817,20 @@ xfs_dir2_sf_getdents( | |||
| 819 | /* | 817 | /* |
| 820 | * Put . entry unless we're starting past it. | 818 | * Put . entry unless we're starting past it. |
| 821 | */ | 819 | */ |
| 822 | if (*offset <= dot_offset) { | 820 | if (ctx->pos <= dot_offset) { |
| 823 | if (filldir(dirent, ".", 1, dot_offset & 0x7fffffff, dp->i_ino, DT_DIR)) { | 821 | ctx->pos = dot_offset & 0x7fffffff; |
| 824 | *offset = dot_offset & 0x7fffffff; | 822 | if (!dir_emit(ctx, ".", 1, dp->i_ino, DT_DIR)) |
| 825 | return 0; | 823 | return 0; |
| 826 | } | ||
| 827 | } | 824 | } |
| 828 | 825 | ||
| 829 | /* | 826 | /* |
| 830 | * Put .. entry unless we're starting past it. | 827 | * Put .. entry unless we're starting past it. |
| 831 | */ | 828 | */ |
| 832 | if (*offset <= dotdot_offset) { | 829 | if (ctx->pos <= dotdot_offset) { |
| 833 | ino = xfs_dir2_sf_get_parent_ino(sfp); | 830 | ino = xfs_dir2_sf_get_parent_ino(sfp); |
| 834 | if (filldir(dirent, "..", 2, dotdot_offset & 0x7fffffff, ino, DT_DIR)) { | 831 | ctx->pos = dotdot_offset & 0x7fffffff; |
| 835 | *offset = dotdot_offset & 0x7fffffff; | 832 | if (!dir_emit(ctx, "..", 2, ino, DT_DIR)) |
| 836 | return 0; | 833 | return 0; |
| 837 | } | ||
| 838 | } | 834 | } |
| 839 | 835 | ||
| 840 | /* | 836 | /* |
| @@ -845,21 +841,20 @@ xfs_dir2_sf_getdents( | |||
| 845 | off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, | 841 | off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, |
| 846 | xfs_dir2_sf_get_offset(sfep)); | 842 | xfs_dir2_sf_get_offset(sfep)); |
| 847 | 843 | ||
| 848 | if (*offset > off) { | 844 | if (ctx->pos > off) { |
| 849 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); | 845 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); |
| 850 | continue; | 846 | continue; |
| 851 | } | 847 | } |
| 852 | 848 | ||
| 853 | ino = xfs_dir2_sfe_get_ino(sfp, sfep); | 849 | ino = xfs_dir2_sfe_get_ino(sfp, sfep); |
| 854 | if (filldir(dirent, (char *)sfep->name, sfep->namelen, | 850 | ctx->pos = off & 0x7fffffff; |
| 855 | off & 0x7fffffff, ino, DT_UNKNOWN)) { | 851 | if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, |
| 856 | *offset = off & 0x7fffffff; | 852 | ino, DT_UNKNOWN)) |
| 857 | return 0; | 853 | return 0; |
| 858 | } | ||
| 859 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); | 854 | sfep = xfs_dir2_sf_nextentry(sfp, sfep); |
| 860 | } | 855 | } |
| 861 | 856 | ||
| 862 | *offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & | 857 | ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & |
| 863 | 0x7fffffff; | 858 | 0x7fffffff; |
| 864 | return 0; | 859 | return 0; |
| 865 | } | 860 | } |
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index a5f2042aec8b..0ad2b95fca12 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c | |||
| @@ -906,11 +906,10 @@ xfs_file_release( | |||
| 906 | 906 | ||
| 907 | STATIC int | 907 | STATIC int |
| 908 | xfs_file_readdir( | 908 | xfs_file_readdir( |
| 909 | struct file *filp, | 909 | struct file *file, |
| 910 | void *dirent, | 910 | struct dir_context *ctx) |
| 911 | filldir_t filldir) | ||
| 912 | { | 911 | { |
| 913 | struct inode *inode = file_inode(filp); | 912 | struct inode *inode = file_inode(file); |
| 914 | xfs_inode_t *ip = XFS_I(inode); | 913 | xfs_inode_t *ip = XFS_I(inode); |
| 915 | int error; | 914 | int error; |
| 916 | size_t bufsize; | 915 | size_t bufsize; |
| @@ -929,8 +928,7 @@ xfs_file_readdir( | |||
| 929 | */ | 928 | */ |
| 930 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); | 929 | bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); |
| 931 | 930 | ||
| 932 | error = xfs_readdir(ip, dirent, bufsize, | 931 | error = xfs_readdir(ip, ctx, bufsize); |
| 933 | (xfs_off_t *)&filp->f_pos, filldir); | ||
| 934 | if (error) | 932 | if (error) |
| 935 | return -error; | 933 | return -error; |
| 936 | return 0; | 934 | return 0; |
| @@ -1432,7 +1430,7 @@ const struct file_operations xfs_file_operations = { | |||
| 1432 | const struct file_operations xfs_dir_file_operations = { | 1430 | const struct file_operations xfs_dir_file_operations = { |
| 1433 | .open = xfs_dir_open, | 1431 | .open = xfs_dir_open, |
| 1434 | .read = generic_read_dir, | 1432 | .read = generic_read_dir, |
| 1435 | .readdir = xfs_file_readdir, | 1433 | .iterate = xfs_file_readdir, |
| 1436 | .llseek = generic_file_llseek, | 1434 | .llseek = generic_file_llseek, |
| 1437 | .unlocked_ioctl = xfs_file_ioctl, | 1435 | .unlocked_ioctl = xfs_file_ioctl, |
| 1438 | #ifdef CONFIG_COMPAT | 1436 | #ifdef CONFIG_COMPAT |
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index aa4db3307d36..a04701de6bbd 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h | |||
| @@ -974,14 +974,16 @@ DEFINE_RW_EVENT(xfs_file_splice_read); | |||
| 974 | DEFINE_RW_EVENT(xfs_file_splice_write); | 974 | DEFINE_RW_EVENT(xfs_file_splice_write); |
| 975 | 975 | ||
| 976 | DECLARE_EVENT_CLASS(xfs_page_class, | 976 | DECLARE_EVENT_CLASS(xfs_page_class, |
| 977 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off), | 977 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |
| 978 | TP_ARGS(inode, page, off), | 978 | unsigned int len), |
| 979 | TP_ARGS(inode, page, off, len), | ||
| 979 | TP_STRUCT__entry( | 980 | TP_STRUCT__entry( |
| 980 | __field(dev_t, dev) | 981 | __field(dev_t, dev) |
| 981 | __field(xfs_ino_t, ino) | 982 | __field(xfs_ino_t, ino) |
| 982 | __field(pgoff_t, pgoff) | 983 | __field(pgoff_t, pgoff) |
| 983 | __field(loff_t, size) | 984 | __field(loff_t, size) |
| 984 | __field(unsigned long, offset) | 985 | __field(unsigned long, offset) |
| 986 | __field(unsigned int, length) | ||
| 985 | __field(int, delalloc) | 987 | __field(int, delalloc) |
| 986 | __field(int, unwritten) | 988 | __field(int, unwritten) |
| 987 | ), | 989 | ), |
| @@ -995,24 +997,27 @@ DECLARE_EVENT_CLASS(xfs_page_class, | |||
| 995 | __entry->pgoff = page_offset(page); | 997 | __entry->pgoff = page_offset(page); |
| 996 | __entry->size = i_size_read(inode); | 998 | __entry->size = i_size_read(inode); |
| 997 | __entry->offset = off; | 999 | __entry->offset = off; |
| 1000 | __entry->length = len; | ||
| 998 | __entry->delalloc = delalloc; | 1001 | __entry->delalloc = delalloc; |
| 999 | __entry->unwritten = unwritten; | 1002 | __entry->unwritten = unwritten; |
| 1000 | ), | 1003 | ), |
| 1001 | TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " | 1004 | TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx " |
| 1002 | "delalloc %d unwritten %d", | 1005 | "length %x delalloc %d unwritten %d", |
| 1003 | MAJOR(__entry->dev), MINOR(__entry->dev), | 1006 | MAJOR(__entry->dev), MINOR(__entry->dev), |
| 1004 | __entry->ino, | 1007 | __entry->ino, |
| 1005 | __entry->pgoff, | 1008 | __entry->pgoff, |
| 1006 | __entry->size, | 1009 | __entry->size, |
| 1007 | __entry->offset, | 1010 | __entry->offset, |
| 1011 | __entry->length, | ||
| 1008 | __entry->delalloc, | 1012 | __entry->delalloc, |
| 1009 | __entry->unwritten) | 1013 | __entry->unwritten) |
| 1010 | ) | 1014 | ) |
| 1011 | 1015 | ||
| 1012 | #define DEFINE_PAGE_EVENT(name) \ | 1016 | #define DEFINE_PAGE_EVENT(name) \ |
| 1013 | DEFINE_EVENT(xfs_page_class, name, \ | 1017 | DEFINE_EVENT(xfs_page_class, name, \ |
| 1014 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off), \ | 1018 | TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \ |
| 1015 | TP_ARGS(inode, page, off)) | 1019 | unsigned int len), \ |
| 1020 | TP_ARGS(inode, page, off, len)) | ||
| 1016 | DEFINE_PAGE_EVENT(xfs_writepage); | 1021 | DEFINE_PAGE_EVENT(xfs_writepage); |
| 1017 | DEFINE_PAGE_EVENT(xfs_releasepage); | 1022 | DEFINE_PAGE_EVENT(xfs_releasepage); |
| 1018 | DEFINE_PAGE_EVENT(xfs_invalidatepage); | 1023 | DEFINE_PAGE_EVENT(xfs_invalidatepage); |
diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index 5163022d9808..38c67c34d73f 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h | |||
| @@ -31,8 +31,7 @@ int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, | |||
| 31 | struct xfs_inode *ip); | 31 | struct xfs_inode *ip); |
| 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, | 32 | int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, |
| 33 | struct xfs_name *target_name); | 33 | struct xfs_name *target_name); |
| 34 | int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, | 34 | int xfs_readdir(struct xfs_inode *dp, struct dir_context *ctx, size_t bufsize); |
| 35 | xfs_off_t *offset, filldir_t filldir); | ||
| 36 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, | 35 | int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, |
| 37 | const char *target_path, umode_t mode, struct xfs_inode **ipp); | 36 | const char *target_path, umode_t mode, struct xfs_inode **ipp); |
| 38 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); | 37 | int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); |
