diff options
Diffstat (limited to 'fs')
300 files changed, 16655 insertions, 7772 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 047c791427aa..24eb01087b6d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c | |||
| @@ -30,8 +30,8 @@ | |||
| 30 | #include <linux/parser.h> | 30 | #include <linux/parser.h> |
| 31 | #include <linux/idr.h> | 31 | #include <linux/idr.h> |
| 32 | #include <net/9p/9p.h> | 32 | #include <net/9p/9p.h> |
| 33 | #include <net/9p/transport.h> | ||
| 34 | #include <net/9p/client.h> | 33 | #include <net/9p/client.h> |
| 34 | #include <net/9p/transport.h> | ||
| 35 | #include "v9fs.h" | 35 | #include "v9fs.h" |
| 36 | #include "v9fs_vfs.h" | 36 | #include "v9fs_vfs.h" |
| 37 | 37 | ||
| @@ -55,7 +55,7 @@ enum { | |||
| 55 | Opt_err | 55 | Opt_err |
| 56 | }; | 56 | }; |
| 57 | 57 | ||
| 58 | static match_table_t tokens = { | 58 | static const match_table_t tokens = { |
| 59 | {Opt_debug, "debug=%x"}, | 59 | {Opt_debug, "debug=%x"}, |
| 60 | {Opt_dfltuid, "dfltuid=%u"}, | 60 | {Opt_dfltuid, "dfltuid=%u"}, |
| 61 | {Opt_dfltgid, "dfltgid=%u"}, | 61 | {Opt_dfltgid, "dfltgid=%u"}, |
| @@ -234,7 +234,7 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses, | |||
| 234 | if (!v9ses->clnt->dotu) | 234 | if (!v9ses->clnt->dotu) |
| 235 | v9ses->flags &= ~V9FS_EXTENDED; | 235 | v9ses->flags &= ~V9FS_EXTENDED; |
| 236 | 236 | ||
| 237 | v9ses->maxdata = v9ses->clnt->msize; | 237 | v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ; |
| 238 | 238 | ||
| 239 | /* for legacy mode, fall back to V9FS_ACCESS_ANY */ | 239 | /* for legacy mode, fall back to V9FS_ACCESS_ANY */ |
| 240 | if (!v9fs_extended(v9ses) && | 240 | if (!v9fs_extended(v9ses) && |
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 57997fa14e69..c295ba786edd 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h | |||
| @@ -46,9 +46,11 @@ extern struct dentry_operations v9fs_cached_dentry_operations; | |||
| 46 | 46 | ||
| 47 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); | 47 | struct inode *v9fs_get_inode(struct super_block *sb, int mode); |
| 48 | ino_t v9fs_qid2ino(struct p9_qid *qid); | 48 | ino_t v9fs_qid2ino(struct p9_qid *qid); |
| 49 | void v9fs_stat2inode(struct p9_stat *, struct inode *, struct super_block *); | 49 | void v9fs_stat2inode(struct p9_wstat *, struct inode *, struct super_block *); |
| 50 | int v9fs_dir_release(struct inode *inode, struct file *filp); | 50 | int v9fs_dir_release(struct inode *inode, struct file *filp); |
| 51 | int v9fs_file_open(struct inode *inode, struct file *file); | 51 | int v9fs_file_open(struct inode *inode, struct file *file); |
| 52 | void v9fs_inode2stat(struct inode *inode, struct p9_stat *stat); | 52 | void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat); |
| 53 | void v9fs_dentry_release(struct dentry *); | 53 | void v9fs_dentry_release(struct dentry *); |
| 54 | int v9fs_uflags2omode(int uflags, int extended); | 54 | int v9fs_uflags2omode(int uflags, int extended); |
| 55 | |||
| 56 | ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); | ||
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 97d3aed57983..6fcb1e7095cf 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c | |||
| @@ -38,7 +38,6 @@ | |||
| 38 | 38 | ||
| 39 | #include "v9fs.h" | 39 | #include "v9fs.h" |
| 40 | #include "v9fs_vfs.h" | 40 | #include "v9fs_vfs.h" |
| 41 | #include "fid.h" | ||
| 42 | 41 | ||
| 43 | /** | 42 | /** |
| 44 | * v9fs_vfs_readpage - read an entire page in from 9P | 43 | * v9fs_vfs_readpage - read an entire page in from 9P |
| @@ -53,14 +52,12 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page) | |||
| 53 | int retval; | 52 | int retval; |
| 54 | loff_t offset; | 53 | loff_t offset; |
| 55 | char *buffer; | 54 | char *buffer; |
| 56 | struct p9_fid *fid; | ||
| 57 | 55 | ||
| 58 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 56 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); |
| 59 | fid = filp->private_data; | ||
| 60 | buffer = kmap(page); | 57 | buffer = kmap(page); |
| 61 | offset = page_offset(page); | 58 | offset = page_offset(page); |
| 62 | 59 | ||
| 63 | retval = p9_client_readn(fid, buffer, offset, PAGE_CACHE_SIZE); | 60 | retval = v9fs_file_readn(filp, buffer, NULL, offset, PAGE_CACHE_SIZE); |
| 64 | if (retval < 0) | 61 | if (retval < 0) |
| 65 | goto done; | 62 | goto done; |
| 66 | 63 | ||
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index e298fe194093..873cd31baa47 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c | |||
| @@ -45,7 +45,7 @@ | |||
| 45 | * | 45 | * |
| 46 | */ | 46 | */ |
| 47 | 47 | ||
| 48 | static inline int dt_type(struct p9_stat *mistat) | 48 | static inline int dt_type(struct p9_wstat *mistat) |
| 49 | { | 49 | { |
| 50 | unsigned long perm = mistat->mode; | 50 | unsigned long perm = mistat->mode; |
| 51 | int rettype = DT_REG; | 51 | int rettype = DT_REG; |
| @@ -69,32 +69,58 @@ static inline int dt_type(struct p9_stat *mistat) | |||
| 69 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) | 69 | static int v9fs_dir_readdir(struct file *filp, void *dirent, filldir_t filldir) |
| 70 | { | 70 | { |
| 71 | int over; | 71 | int over; |
| 72 | struct p9_wstat st; | ||
| 73 | int err; | ||
| 72 | struct p9_fid *fid; | 74 | struct p9_fid *fid; |
| 73 | struct v9fs_session_info *v9ses; | 75 | int buflen; |
| 74 | struct inode *inode; | 76 | char *statbuf; |
| 75 | struct p9_stat *st; | 77 | int n, i = 0; |
| 76 | 78 | ||
| 77 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); | 79 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", filp->f_path.dentry->d_name.name); |
| 78 | inode = filp->f_path.dentry->d_inode; | ||
| 79 | v9ses = v9fs_inode2v9ses(inode); | ||
| 80 | fid = filp->private_data; | 80 | fid = filp->private_data; |
| 81 | while ((st = p9_client_dirread(fid, filp->f_pos)) != NULL) { | ||
| 82 | if (IS_ERR(st)) | ||
| 83 | return PTR_ERR(st); | ||
| 84 | 81 | ||
| 85 | over = filldir(dirent, st->name.str, st->name.len, filp->f_pos, | 82 | buflen = fid->clnt->msize - P9_IOHDRSZ; |
| 86 | v9fs_qid2ino(&st->qid), dt_type(st)); | 83 | statbuf = kmalloc(buflen, GFP_KERNEL); |
| 84 | if (!statbuf) | ||
| 85 | return -ENOMEM; | ||
| 87 | 86 | ||
| 88 | if (over) | 87 | while (1) { |
| 88 | err = v9fs_file_readn(filp, statbuf, NULL, buflen, | ||
| 89 | fid->rdir_fpos); | ||
| 90 | if (err <= 0) | ||
| 89 | break; | 91 | break; |
| 90 | 92 | ||
| 91 | filp->f_pos += st->size; | 93 | n = err; |
| 92 | kfree(st); | 94 | while (i < n) { |
| 93 | st = NULL; | 95 | err = p9stat_read(statbuf + i, buflen-i, &st, |
| 96 | fid->clnt->dotu); | ||
| 97 | if (err) { | ||
| 98 | P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err); | ||
| 99 | err = -EIO; | ||
| 100 | p9stat_free(&st); | ||
| 101 | goto free_and_exit; | ||
| 102 | } | ||
| 103 | |||
| 104 | i += st.size+2; | ||
| 105 | fid->rdir_fpos += st.size+2; | ||
| 106 | |||
| 107 | over = filldir(dirent, st.name, strlen(st.name), | ||
| 108 | filp->f_pos, v9fs_qid2ino(&st.qid), dt_type(&st)); | ||
| 109 | |||
| 110 | filp->f_pos += st.size+2; | ||
| 111 | |||
| 112 | p9stat_free(&st); | ||
| 113 | |||
| 114 | if (over) { | ||
| 115 | err = 0; | ||
| 116 | goto free_and_exit; | ||
| 117 | } | ||
| 118 | } | ||
| 94 | } | 119 | } |
| 95 | 120 | ||
| 96 | kfree(st); | 121 | free_and_exit: |
| 97 | return 0; | 122 | kfree(statbuf); |
| 123 | return err; | ||
| 98 | } | 124 | } |
| 99 | 125 | ||
| 100 | 126 | ||
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 52944d2249a4..041c52692284 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c | |||
| @@ -120,23 +120,72 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 120 | } | 120 | } |
| 121 | 121 | ||
| 122 | /** | 122 | /** |
| 123 | * v9fs_file_read - read from a file | 123 | * v9fs_file_readn - read from a file |
| 124 | * @filp: file pointer to read | 124 | * @filp: file pointer to read |
| 125 | * @data: data buffer to read data into | 125 | * @data: data buffer to read data into |
| 126 | * @udata: user data buffer to read data into | ||
| 126 | * @count: size of buffer | 127 | * @count: size of buffer |
| 127 | * @offset: offset at which to read data | 128 | * @offset: offset at which to read data |
| 128 | * | 129 | * |
| 129 | */ | 130 | */ |
| 131 | |||
| 132 | ssize_t | ||
| 133 | v9fs_file_readn(struct file *filp, char *data, char __user *udata, u32 count, | ||
| 134 | u64 offset) | ||
| 135 | { | ||
| 136 | int n, total; | ||
| 137 | struct p9_fid *fid = filp->private_data; | ||
| 138 | |||
| 139 | P9_DPRINTK(P9_DEBUG_VFS, "fid %d offset %llu count %d\n", fid->fid, | ||
| 140 | (long long unsigned) offset, count); | ||
| 141 | |||
| 142 | n = 0; | ||
| 143 | total = 0; | ||
| 144 | do { | ||
| 145 | n = p9_client_read(fid, data, udata, offset, count); | ||
| 146 | if (n <= 0) | ||
| 147 | break; | ||
| 148 | |||
| 149 | if (data) | ||
| 150 | data += n; | ||
| 151 | if (udata) | ||
| 152 | udata += n; | ||
| 153 | |||
| 154 | offset += n; | ||
| 155 | count -= n; | ||
| 156 | total += n; | ||
| 157 | } while (count > 0 && n == (fid->clnt->msize - P9_IOHDRSZ)); | ||
| 158 | |||
| 159 | if (n < 0) | ||
| 160 | total = n; | ||
| 161 | |||
| 162 | return total; | ||
| 163 | } | ||
| 164 | |||
| 165 | /** | ||
| 166 | * v9fs_file_read - read from a file | ||
| 167 | * @filp: file pointer to read | ||
| 168 | * @udata: user data buffer to read data into | ||
| 169 | * @count: size of buffer | ||
| 170 | * @offset: offset at which to read data | ||
| 171 | * | ||
| 172 | */ | ||
| 173 | |||
| 130 | static ssize_t | 174 | static ssize_t |
| 131 | v9fs_file_read(struct file *filp, char __user * data, size_t count, | 175 | v9fs_file_read(struct file *filp, char __user *udata, size_t count, |
| 132 | loff_t * offset) | 176 | loff_t * offset) |
| 133 | { | 177 | { |
| 134 | int ret; | 178 | int ret; |
| 135 | struct p9_fid *fid; | 179 | struct p9_fid *fid; |
| 136 | 180 | ||
| 137 | P9_DPRINTK(P9_DEBUG_VFS, "\n"); | 181 | P9_DPRINTK(P9_DEBUG_VFS, "count %d offset %lld\n", count, *offset); |
| 138 | fid = filp->private_data; | 182 | fid = filp->private_data; |
| 139 | ret = p9_client_uread(fid, data, *offset, count); | 183 | |
| 184 | if (count > (fid->clnt->msize - P9_IOHDRSZ)) | ||
| 185 | ret = v9fs_file_readn(filp, NULL, udata, count, *offset); | ||
| 186 | else | ||
| 187 | ret = p9_client_read(fid, NULL, udata, *offset, count); | ||
| 188 | |||
| 140 | if (ret > 0) | 189 | if (ret > 0) |
| 141 | *offset += ret; | 190 | *offset += ret; |
| 142 | 191 | ||
| @@ -156,19 +205,38 @@ static ssize_t | |||
| 156 | v9fs_file_write(struct file *filp, const char __user * data, | 205 | v9fs_file_write(struct file *filp, const char __user * data, |
| 157 | size_t count, loff_t * offset) | 206 | size_t count, loff_t * offset) |
| 158 | { | 207 | { |
| 159 | int ret; | 208 | int n, rsize, total = 0; |
| 160 | struct p9_fid *fid; | 209 | struct p9_fid *fid; |
| 210 | struct p9_client *clnt; | ||
| 161 | struct inode *inode = filp->f_path.dentry->d_inode; | 211 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 212 | int origin = *offset; | ||
| 162 | 213 | ||
| 163 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, | 214 | P9_DPRINTK(P9_DEBUG_VFS, "data %p count %d offset %x\n", data, |
| 164 | (int)count, (int)*offset); | 215 | (int)count, (int)*offset); |
| 165 | 216 | ||
| 166 | fid = filp->private_data; | 217 | fid = filp->private_data; |
| 167 | ret = p9_client_uwrite(fid, data, *offset, count); | 218 | clnt = fid->clnt; |
| 168 | if (ret > 0) { | 219 | |
| 169 | invalidate_inode_pages2_range(inode->i_mapping, *offset, | 220 | rsize = fid->iounit; |
| 170 | *offset+ret); | 221 | if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) |
| 171 | *offset += ret; | 222 | rsize = clnt->msize - P9_IOHDRSZ; |
| 223 | |||
| 224 | do { | ||
| 225 | if (count < rsize) | ||
| 226 | rsize = count; | ||
| 227 | |||
| 228 | n = p9_client_write(fid, NULL, data+total, *offset+total, | ||
| 229 | rsize); | ||
| 230 | if (n <= 0) | ||
| 231 | break; | ||
| 232 | count -= n; | ||
| 233 | total += n; | ||
| 234 | } while (count > 0); | ||
| 235 | |||
| 236 | if (total > 0) { | ||
| 237 | invalidate_inode_pages2_range(inode->i_mapping, origin, | ||
| 238 | origin+total); | ||
| 239 | *offset += total; | ||
| 172 | } | 240 | } |
| 173 | 241 | ||
| 174 | if (*offset > inode->i_size) { | 242 | if (*offset > inode->i_size) { |
| @@ -176,7 +244,10 @@ v9fs_file_write(struct file *filp, const char __user * data, | |||
| 176 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; | 244 | inode->i_blocks = (inode->i_size + 512 - 1) >> 9; |
| 177 | } | 245 | } |
| 178 | 246 | ||
| 179 | return ret; | 247 | if (n < 0) |
| 248 | return n; | ||
| 249 | |||
| 250 | return total; | ||
| 180 | } | 251 | } |
| 181 | 252 | ||
| 182 | static const struct file_operations v9fs_cached_file_operations = { | 253 | static const struct file_operations v9fs_cached_file_operations = { |
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index e83aa5ebe861..8314d3f43b71 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c | |||
| @@ -334,7 +334,7 @@ v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, | |||
| 334 | { | 334 | { |
| 335 | int err, umode; | 335 | int err, umode; |
| 336 | struct inode *ret; | 336 | struct inode *ret; |
| 337 | struct p9_stat *st; | 337 | struct p9_wstat *st; |
| 338 | 338 | ||
| 339 | ret = NULL; | 339 | ret = NULL; |
| 340 | st = p9_client_stat(fid); | 340 | st = p9_client_stat(fid); |
| @@ -417,6 +417,8 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
| 417 | struct p9_fid *dfid, *ofid, *fid; | 417 | struct p9_fid *dfid, *ofid, *fid; |
| 418 | struct inode *inode; | 418 | struct inode *inode; |
| 419 | 419 | ||
| 420 | P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name); | ||
| 421 | |||
| 420 | err = 0; | 422 | err = 0; |
| 421 | ofid = NULL; | 423 | ofid = NULL; |
| 422 | fid = NULL; | 424 | fid = NULL; |
| @@ -424,6 +426,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
| 424 | dfid = v9fs_fid_clone(dentry->d_parent); | 426 | dfid = v9fs_fid_clone(dentry->d_parent); |
| 425 | if (IS_ERR(dfid)) { | 427 | if (IS_ERR(dfid)) { |
| 426 | err = PTR_ERR(dfid); | 428 | err = PTR_ERR(dfid); |
| 429 | P9_DPRINTK(P9_DEBUG_VFS, "fid clone failed %d\n", err); | ||
| 427 | dfid = NULL; | 430 | dfid = NULL; |
| 428 | goto error; | 431 | goto error; |
| 429 | } | 432 | } |
| @@ -432,18 +435,22 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
| 432 | ofid = p9_client_walk(dfid, 0, NULL, 1); | 435 | ofid = p9_client_walk(dfid, 0, NULL, 1); |
| 433 | if (IS_ERR(ofid)) { | 436 | if (IS_ERR(ofid)) { |
| 434 | err = PTR_ERR(ofid); | 437 | err = PTR_ERR(ofid); |
| 438 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | ||
| 435 | ofid = NULL; | 439 | ofid = NULL; |
| 436 | goto error; | 440 | goto error; |
| 437 | } | 441 | } |
| 438 | 442 | ||
| 439 | err = p9_client_fcreate(ofid, name, perm, mode, extension); | 443 | err = p9_client_fcreate(ofid, name, perm, mode, extension); |
| 440 | if (err < 0) | 444 | if (err < 0) { |
| 445 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err); | ||
| 441 | goto error; | 446 | goto error; |
| 447 | } | ||
| 442 | 448 | ||
| 443 | /* now walk from the parent so we can get unopened fid */ | 449 | /* now walk from the parent so we can get unopened fid */ |
| 444 | fid = p9_client_walk(dfid, 1, &name, 0); | 450 | fid = p9_client_walk(dfid, 1, &name, 0); |
| 445 | if (IS_ERR(fid)) { | 451 | if (IS_ERR(fid)) { |
| 446 | err = PTR_ERR(fid); | 452 | err = PTR_ERR(fid); |
| 453 | P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err); | ||
| 447 | fid = NULL; | 454 | fid = NULL; |
| 448 | goto error; | 455 | goto error; |
| 449 | } else | 456 | } else |
| @@ -453,6 +460,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir, | |||
| 453 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); | 460 | inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb); |
| 454 | if (IS_ERR(inode)) { | 461 | if (IS_ERR(inode)) { |
| 455 | err = PTR_ERR(inode); | 462 | err = PTR_ERR(inode); |
| 463 | P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err); | ||
| 456 | goto error; | 464 | goto error; |
| 457 | } | 465 | } |
| 458 | 466 | ||
| @@ -734,7 +742,7 @@ v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, | |||
| 734 | int err; | 742 | int err; |
| 735 | struct v9fs_session_info *v9ses; | 743 | struct v9fs_session_info *v9ses; |
| 736 | struct p9_fid *fid; | 744 | struct p9_fid *fid; |
| 737 | struct p9_stat *st; | 745 | struct p9_wstat *st; |
| 738 | 746 | ||
| 739 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); | 747 | P9_DPRINTK(P9_DEBUG_VFS, "dentry: %p\n", dentry); |
| 740 | err = -EPERM; | 748 | err = -EPERM; |
| @@ -815,10 +823,9 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr) | |||
| 815 | */ | 823 | */ |
| 816 | 824 | ||
| 817 | void | 825 | void |
| 818 | v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | 826 | v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, |
| 819 | struct super_block *sb) | 827 | struct super_block *sb) |
| 820 | { | 828 | { |
| 821 | int n; | ||
| 822 | char ext[32]; | 829 | char ext[32]; |
| 823 | struct v9fs_session_info *v9ses = sb->s_fs_info; | 830 | struct v9fs_session_info *v9ses = sb->s_fs_info; |
| 824 | 831 | ||
| @@ -842,11 +849,7 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | |||
| 842 | int major = -1; | 849 | int major = -1; |
| 843 | int minor = -1; | 850 | int minor = -1; |
| 844 | 851 | ||
| 845 | n = stat->extension.len; | 852 | strncpy(ext, stat->extension, sizeof(ext)); |
| 846 | if (n > sizeof(ext)-1) | ||
| 847 | n = sizeof(ext)-1; | ||
| 848 | memmove(ext, stat->extension.str, n); | ||
| 849 | ext[n] = 0; | ||
| 850 | sscanf(ext, "%c %u %u", &type, &major, &minor); | 853 | sscanf(ext, "%c %u %u", &type, &major, &minor); |
| 851 | switch (type) { | 854 | switch (type) { |
| 852 | case 'c': | 855 | case 'c': |
| @@ -857,10 +860,11 @@ v9fs_stat2inode(struct p9_stat *stat, struct inode *inode, | |||
| 857 | break; | 860 | break; |
| 858 | default: | 861 | default: |
| 859 | P9_DPRINTK(P9_DEBUG_ERROR, | 862 | P9_DPRINTK(P9_DEBUG_ERROR, |
| 860 | "Unknown special type %c (%.*s)\n", type, | 863 | "Unknown special type %c %s\n", type, |
| 861 | stat->extension.len, stat->extension.str); | 864 | stat->extension); |
| 862 | }; | 865 | }; |
| 863 | inode->i_rdev = MKDEV(major, minor); | 866 | inode->i_rdev = MKDEV(major, minor); |
| 867 | init_special_inode(inode, inode->i_mode, inode->i_rdev); | ||
| 864 | } else | 868 | } else |
| 865 | inode->i_rdev = 0; | 869 | inode->i_rdev = 0; |
| 866 | 870 | ||
| @@ -904,7 +908,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
| 904 | 908 | ||
| 905 | struct v9fs_session_info *v9ses; | 909 | struct v9fs_session_info *v9ses; |
| 906 | struct p9_fid *fid; | 910 | struct p9_fid *fid; |
| 907 | struct p9_stat *st; | 911 | struct p9_wstat *st; |
| 908 | 912 | ||
| 909 | P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); | 913 | P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name); |
| 910 | retval = -EPERM; | 914 | retval = -EPERM; |
| @@ -926,15 +930,10 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) | |||
| 926 | } | 930 | } |
| 927 | 931 | ||
| 928 | /* copy extension buffer into buffer */ | 932 | /* copy extension buffer into buffer */ |
| 929 | if (st->extension.len < buflen) | 933 | strncpy(buffer, st->extension, buflen); |
| 930 | buflen = st->extension.len + 1; | ||
| 931 | |||
| 932 | memmove(buffer, st->extension.str, buflen - 1); | ||
| 933 | buffer[buflen-1] = 0; | ||
| 934 | 934 | ||
| 935 | P9_DPRINTK(P9_DEBUG_VFS, | 935 | P9_DPRINTK(P9_DEBUG_VFS, |
| 936 | "%s -> %.*s (%s)\n", dentry->d_name.name, st->extension.len, | 936 | "%s -> %s (%s)\n", dentry->d_name.name, st->extension, buffer); |
| 937 | st->extension.str, buffer); | ||
| 938 | 937 | ||
| 939 | retval = buflen; | 938 | retval = buflen; |
| 940 | 939 | ||
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index bf59c3960494..d6cb1a0ca724 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c | |||
| @@ -111,7 +111,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
| 111 | struct inode *inode = NULL; | 111 | struct inode *inode = NULL; |
| 112 | struct dentry *root = NULL; | 112 | struct dentry *root = NULL; |
| 113 | struct v9fs_session_info *v9ses = NULL; | 113 | struct v9fs_session_info *v9ses = NULL; |
| 114 | struct p9_stat *st = NULL; | 114 | struct p9_wstat *st = NULL; |
| 115 | int mode = S_IRWXUGO | S_ISVTX; | 115 | int mode = S_IRWXUGO | S_ISVTX; |
| 116 | uid_t uid = current->fsuid; | 116 | uid_t uid = current->fsuid; |
| 117 | gid_t gid = current->fsgid; | 117 | gid_t gid = current->fsgid; |
| @@ -161,10 +161,14 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags, | |||
| 161 | 161 | ||
| 162 | sb->s_root = root; | 162 | sb->s_root = root; |
| 163 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); | 163 | root->d_inode->i_ino = v9fs_qid2ino(&st->qid); |
| 164 | |||
| 164 | v9fs_stat2inode(st, root->d_inode, sb); | 165 | v9fs_stat2inode(st, root->d_inode, sb); |
| 166 | |||
| 165 | v9fs_fid_add(root, fid); | 167 | v9fs_fid_add(root, fid); |
| 168 | p9stat_free(st); | ||
| 166 | kfree(st); | 169 | kfree(st); |
| 167 | 170 | ||
| 171 | P9_DPRINTK(P9_DEBUG_VFS, " return simple set mount\n"); | ||
| 168 | return simple_set_mnt(mnt, sb); | 172 | return simple_set_mnt(mnt, sb); |
| 169 | 173 | ||
| 170 | release_sb: | 174 | release_sb: |
diff --git a/fs/Kconfig b/fs/Kconfig index abccb5dab9a8..e46297f020c1 100644 --- a/fs/Kconfig +++ b/fs/Kconfig | |||
| @@ -6,61 +6,9 @@ menu "File systems" | |||
| 6 | 6 | ||
| 7 | if BLOCK | 7 | if BLOCK |
| 8 | 8 | ||
| 9 | config EXT2_FS | 9 | source "fs/ext2/Kconfig" |
| 10 | tristate "Second extended fs support" | 10 | source "fs/ext3/Kconfig" |
| 11 | help | 11 | source "fs/ext4/Kconfig" |
| 12 | Ext2 is a standard Linux file system for hard disks. | ||
| 13 | |||
| 14 | To compile this file system support as a module, choose M here: the | ||
| 15 | module will be called ext2. | ||
| 16 | |||
| 17 | If unsure, say Y. | ||
| 18 | |||
| 19 | config EXT2_FS_XATTR | ||
| 20 | bool "Ext2 extended attributes" | ||
| 21 | depends on EXT2_FS | ||
| 22 | help | ||
| 23 | Extended attributes are name:value pairs associated with inodes by | ||
| 24 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 25 | <http://acl.bestbits.at/> for details). | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
| 29 | config EXT2_FS_POSIX_ACL | ||
| 30 | bool "Ext2 POSIX Access Control Lists" | ||
| 31 | depends on EXT2_FS_XATTR | ||
| 32 | select FS_POSIX_ACL | ||
| 33 | help | ||
| 34 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 35 | groups beyond the owner/group/world scheme. | ||
| 36 | |||
| 37 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 38 | Linux website <http://acl.bestbits.at/>. | ||
| 39 | |||
| 40 | If you don't know what Access Control Lists are, say N | ||
| 41 | |||
| 42 | config EXT2_FS_SECURITY | ||
| 43 | bool "Ext2 Security Labels" | ||
| 44 | depends on EXT2_FS_XATTR | ||
| 45 | help | ||
| 46 | Security labels support alternative access control models | ||
| 47 | implemented by security modules like SELinux. This option | ||
| 48 | enables an extended attribute handler for file security | ||
| 49 | labels in the ext2 filesystem. | ||
| 50 | |||
| 51 | If you are not using a security module that requires using | ||
| 52 | extended attributes for file security labels, say N. | ||
| 53 | |||
| 54 | config EXT2_FS_XIP | ||
| 55 | bool "Ext2 execute in place support" | ||
| 56 | depends on EXT2_FS && MMU | ||
| 57 | help | ||
| 58 | Execute in place can be used on memory-backed block devices. If you | ||
| 59 | enable this option, you can select to mount block devices which are | ||
| 60 | capable of this feature without using the page cache. | ||
| 61 | |||
| 62 | If you do not use a block device that is capable of using this, | ||
| 63 | or if unsure, say N. | ||
| 64 | 12 | ||
| 65 | config FS_XIP | 13 | config FS_XIP |
| 66 | # execute in place | 14 | # execute in place |
| @@ -68,211 +16,15 @@ config FS_XIP | |||
| 68 | depends on EXT2_FS_XIP | 16 | depends on EXT2_FS_XIP |
| 69 | default y | 17 | default y |
| 70 | 18 | ||
| 71 | config EXT3_FS | 19 | source "fs/jbd/Kconfig" |
| 72 | tristate "Ext3 journalling file system support" | 20 | source "fs/jbd2/Kconfig" |
| 73 | select JBD | ||
| 74 | help | ||
| 75 | This is the journalling version of the Second extended file system | ||
| 76 | (often called ext3), the de facto standard Linux file system | ||
| 77 | (method to organize files on a storage device) for hard disks. | ||
| 78 | |||
| 79 | The journalling code included in this driver means you do not have | ||
| 80 | to run e2fsck (file system checker) on your file systems after a | ||
| 81 | crash. The journal keeps track of any changes that were being made | ||
| 82 | at the time the system crashed, and can ensure that your file system | ||
| 83 | is consistent without the need for a lengthy check. | ||
| 84 | |||
| 85 | Other than adding the journal to the file system, the on-disk format | ||
| 86 | of ext3 is identical to ext2. It is possible to freely switch | ||
| 87 | between using the ext3 driver and the ext2 driver, as long as the | ||
| 88 | file system has been cleanly unmounted, or e2fsck is run on the file | ||
| 89 | system. | ||
| 90 | |||
| 91 | To add a journal on an existing ext2 file system or change the | ||
| 92 | behavior of ext3 file systems, you can use the tune2fs utility ("man | ||
| 93 | tune2fs"). To modify attributes of files and directories on ext3 | ||
| 94 | file systems, use chattr ("man chattr"). You need to be using | ||
| 95 | e2fsprogs version 1.20 or later in order to create ext3 journals | ||
| 96 | (available at <http://sourceforge.net/projects/e2fsprogs/>). | ||
| 97 | |||
| 98 | To compile this file system support as a module, choose M here: the | ||
| 99 | module will be called ext3. | ||
| 100 | |||
| 101 | config EXT3_FS_XATTR | ||
| 102 | bool "Ext3 extended attributes" | ||
| 103 | depends on EXT3_FS | ||
| 104 | default y | ||
| 105 | help | ||
| 106 | Extended attributes are name:value pairs associated with inodes by | ||
| 107 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 108 | <http://acl.bestbits.at/> for details). | ||
| 109 | |||
| 110 | If unsure, say N. | ||
| 111 | |||
| 112 | You need this for POSIX ACL support on ext3. | ||
| 113 | |||
| 114 | config EXT3_FS_POSIX_ACL | ||
| 115 | bool "Ext3 POSIX Access Control Lists" | ||
| 116 | depends on EXT3_FS_XATTR | ||
| 117 | select FS_POSIX_ACL | ||
| 118 | help | ||
| 119 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 120 | groups beyond the owner/group/world scheme. | ||
| 121 | |||
| 122 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 123 | Linux website <http://acl.bestbits.at/>. | ||
| 124 | |||
| 125 | If you don't know what Access Control Lists are, say N | ||
| 126 | |||
| 127 | config EXT3_FS_SECURITY | ||
| 128 | bool "Ext3 Security Labels" | ||
| 129 | depends on EXT3_FS_XATTR | ||
| 130 | help | ||
| 131 | Security labels support alternative access control models | ||
| 132 | implemented by security modules like SELinux. This option | ||
| 133 | enables an extended attribute handler for file security | ||
| 134 | labels in the ext3 filesystem. | ||
| 135 | |||
| 136 | If you are not using a security module that requires using | ||
| 137 | extended attributes for file security labels, say N. | ||
| 138 | |||
| 139 | config EXT4DEV_FS | ||
| 140 | tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)" | ||
| 141 | depends on EXPERIMENTAL | ||
| 142 | select JBD2 | ||
| 143 | select CRC16 | ||
| 144 | help | ||
| 145 | Ext4dev is a predecessor filesystem of the next generation | ||
| 146 | extended fs ext4, based on ext3 filesystem code. It will be | ||
| 147 | renamed ext4 fs later, once ext4dev is mature and stabilized. | ||
| 148 | |||
| 149 | Unlike the change from ext2 filesystem to ext3 filesystem, | ||
| 150 | the on-disk format of ext4dev is not the same as ext3 any more: | ||
| 151 | it is based on extent maps and it supports 48-bit physical block | ||
| 152 | numbers. These combined on-disk format changes will allow | ||
| 153 | ext4dev/ext4 to handle more than 16 TB filesystem volumes -- | ||
| 154 | a hard limit that ext3 cannot overcome without changing the | ||
| 155 | on-disk format. | ||
| 156 | |||
| 157 | Other than extent maps and 48-bit block numbers, ext4dev also is | ||
| 158 | likely to have other new features such as persistent preallocation, | ||
| 159 | high resolution time stamps, and larger file support etc. These | ||
| 160 | features will be added to ext4dev gradually. | ||
| 161 | |||
| 162 | To compile this file system support as a module, choose M here. The | ||
| 163 | module will be called ext4dev. | ||
| 164 | |||
| 165 | If unsure, say N. | ||
| 166 | |||
| 167 | config EXT4DEV_FS_XATTR | ||
| 168 | bool "Ext4dev extended attributes" | ||
| 169 | depends on EXT4DEV_FS | ||
| 170 | default y | ||
| 171 | help | ||
| 172 | Extended attributes are name:value pairs associated with inodes by | ||
| 173 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 174 | <http://acl.bestbits.at/> for details). | ||
| 175 | |||
| 176 | If unsure, say N. | ||
| 177 | |||
| 178 | You need this for POSIX ACL support on ext4dev/ext4. | ||
| 179 | |||
| 180 | config EXT4DEV_FS_POSIX_ACL | ||
| 181 | bool "Ext4dev POSIX Access Control Lists" | ||
| 182 | depends on EXT4DEV_FS_XATTR | ||
| 183 | select FS_POSIX_ACL | ||
| 184 | help | ||
| 185 | POSIX Access Control Lists (ACLs) support permissions for users and | ||
| 186 | groups beyond the owner/group/world scheme. | ||
| 187 | |||
| 188 | To learn more about Access Control Lists, visit the POSIX ACLs for | ||
| 189 | Linux website <http://acl.bestbits.at/>. | ||
| 190 | |||
| 191 | If you don't know what Access Control Lists are, say N | ||
| 192 | |||
| 193 | config EXT4DEV_FS_SECURITY | ||
| 194 | bool "Ext4dev Security Labels" | ||
| 195 | depends on EXT4DEV_FS_XATTR | ||
| 196 | help | ||
| 197 | Security labels support alternative access control models | ||
| 198 | implemented by security modules like SELinux. This option | ||
| 199 | enables an extended attribute handler for file security | ||
| 200 | labels in the ext4dev/ext4 filesystem. | ||
| 201 | |||
| 202 | If you are not using a security module that requires using | ||
| 203 | extended attributes for file security labels, say N. | ||
| 204 | |||
| 205 | config JBD | ||
| 206 | tristate | ||
| 207 | help | ||
| 208 | This is a generic journalling layer for block devices. It is | ||
| 209 | currently used by the ext3 and OCFS2 file systems, but it could | ||
| 210 | also be used to add journal support to other file systems or block | ||
| 211 | devices such as RAID or LVM. | ||
| 212 | |||
| 213 | If you are using the ext3 or OCFS2 file systems, you need to | ||
| 214 | say Y here. If you are not using ext3 OCFS2 then you will probably | ||
| 215 | want to say N. | ||
| 216 | |||
| 217 | To compile this device as a module, choose M here: the module will be | ||
| 218 | called jbd. If you are compiling ext3 or OCFS2 into the kernel, | ||
| 219 | you cannot compile this code as a module. | ||
| 220 | |||
| 221 | config JBD_DEBUG | ||
| 222 | bool "JBD (ext3) debugging support" | ||
| 223 | depends on JBD && DEBUG_FS | ||
| 224 | help | ||
| 225 | If you are using the ext3 journaled file system (or potentially any | ||
| 226 | other file system/device using JBD), this option allows you to | ||
| 227 | enable debugging output while the system is running, in order to | ||
| 228 | help track down any problems you are having. By default the | ||
| 229 | debugging output will be turned off. | ||
| 230 | |||
| 231 | If you select Y here, then you will be able to turn on debugging | ||
| 232 | with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a | ||
| 233 | number between 1 and 5, the higher the number, the more debugging | ||
| 234 | output is generated. To turn debugging off again, do | ||
| 235 | "echo 0 > /sys/kernel/debug/jbd/jbd-debug". | ||
| 236 | |||
| 237 | config JBD2 | ||
| 238 | tristate | ||
| 239 | select CRC32 | ||
| 240 | help | ||
| 241 | This is a generic journaling layer for block devices that support | ||
| 242 | both 32-bit and 64-bit block numbers. It is currently used by | ||
| 243 | the ext4dev/ext4 filesystem, but it could also be used to add | ||
| 244 | journal support to other file systems or block devices such | ||
| 245 | as RAID or LVM. | ||
| 246 | |||
| 247 | If you are using ext4dev/ext4, you need to say Y here. If you are not | ||
| 248 | using ext4dev/ext4 then you will probably want to say N. | ||
| 249 | |||
| 250 | To compile this device as a module, choose M here. The module will be | ||
| 251 | called jbd2. If you are compiling ext4dev/ext4 into the kernel, | ||
| 252 | you cannot compile this code as a module. | ||
| 253 | |||
| 254 | config JBD2_DEBUG | ||
| 255 | bool "JBD2 (ext4dev/ext4) debugging support" | ||
| 256 | depends on JBD2 && DEBUG_FS | ||
| 257 | help | ||
| 258 | If you are using the ext4dev/ext4 journaled file system (or | ||
| 259 | potentially any other filesystem/device using JBD2), this option | ||
| 260 | allows you to enable debugging output while the system is running, | ||
| 261 | in order to help track down any problems you are having. | ||
| 262 | By default, the debugging output will be turned off. | ||
| 263 | |||
| 264 | If you select Y here, then you will be able to turn on debugging | ||
| 265 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | ||
| 266 | number between 1 and 5. The higher the number, the more debugging | ||
| 267 | output is generated. To turn debugging off again, do | ||
| 268 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | ||
| 269 | 21 | ||
| 270 | config FS_MBCACHE | 22 | config FS_MBCACHE |
| 271 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) | 23 | # Meta block cache for Extended Attributes (ext2/ext3/ext4) |
| 272 | tristate | 24 | tristate |
| 273 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR | 25 | depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4_FS_XATTR |
| 274 | default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y | 26 | default y if EXT2_FS=y || EXT3_FS=y || EXT4_FS=y |
| 275 | default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m | 27 | default m if EXT2_FS=m || EXT3_FS=m || EXT4_FS=m |
| 276 | 28 | ||
| 277 | config REISERFS_FS | 29 | config REISERFS_FS |
| 278 | tristate "Reiserfs support" | 30 | tristate "Reiserfs support" |
| @@ -419,6 +171,14 @@ config FS_POSIX_ACL | |||
| 419 | bool | 171 | bool |
| 420 | default n | 172 | default n |
| 421 | 173 | ||
| 174 | config FILE_LOCKING | ||
| 175 | bool "Enable POSIX file locking API" if EMBEDDED | ||
| 176 | default y | ||
| 177 | help | ||
| 178 | This option enables standard file locking support, required | ||
| 179 | for filesystems like NFS and for the flock() system | ||
| 180 | call. Disabling this option saves about 11k. | ||
| 181 | |||
| 422 | source "fs/xfs/Kconfig" | 182 | source "fs/xfs/Kconfig" |
| 423 | source "fs/gfs2/Kconfig" | 183 | source "fs/gfs2/Kconfig" |
| 424 | 184 | ||
| @@ -426,7 +186,7 @@ config OCFS2_FS | |||
| 426 | tristate "OCFS2 file system support" | 186 | tristate "OCFS2 file system support" |
| 427 | depends on NET && SYSFS | 187 | depends on NET && SYSFS |
| 428 | select CONFIGFS_FS | 188 | select CONFIGFS_FS |
| 429 | select JBD | 189 | select JBD2 |
| 430 | select CRC32 | 190 | select CRC32 |
| 431 | help | 191 | help |
| 432 | OCFS2 is a general purpose extent based shared disk cluster file | 192 | OCFS2 is a general purpose extent based shared disk cluster file |
| @@ -497,6 +257,16 @@ config OCFS2_DEBUG_FS | |||
| 497 | this option for debugging only as it is likely to decrease | 257 | this option for debugging only as it is likely to decrease |
| 498 | performance of the filesystem. | 258 | performance of the filesystem. |
| 499 | 259 | ||
| 260 | config OCFS2_COMPAT_JBD | ||
| 261 | bool "Use JBD for compatibility" | ||
| 262 | depends on OCFS2_FS | ||
| 263 | default n | ||
| 264 | select JBD | ||
| 265 | help | ||
| 266 | The ocfs2 filesystem now uses JBD2 for its journalling. JBD2 | ||
| 267 | is backwards compatible with JBD. It is safe to say N here. | ||
| 268 | However, if you really want to use the original JBD, say Y here. | ||
| 269 | |||
| 500 | endif # BLOCK | 270 | endif # BLOCK |
| 501 | 271 | ||
| 502 | config DNOTIFY | 272 | config DNOTIFY |
| @@ -633,7 +403,7 @@ config AUTOFS4_FS | |||
| 633 | N here. | 403 | N here. |
| 634 | 404 | ||
| 635 | config FUSE_FS | 405 | config FUSE_FS |
| 636 | tristate "Filesystem in Userspace support" | 406 | tristate "FUSE (Filesystem in Userspace) support" |
| 637 | help | 407 | help |
| 638 | With FUSE it is possible to implement a fully functional filesystem | 408 | With FUSE it is possible to implement a fully functional filesystem |
| 639 | in a userspace program. | 409 | in a userspace program. |
| @@ -1136,195 +906,7 @@ config EFS_FS | |||
| 1136 | To compile the EFS file system support as a module, choose M here: the | 906 | To compile the EFS file system support as a module, choose M here: the |
| 1137 | module will be called efs. | 907 | module will be called efs. |
| 1138 | 908 | ||
| 1139 | config JFFS2_FS | 909 | source "fs/jffs2/Kconfig" |
| 1140 | tristate "Journalling Flash File System v2 (JFFS2) support" | ||
| 1141 | select CRC32 | ||
| 1142 | depends on MTD | ||
| 1143 | help | ||
| 1144 | JFFS2 is the second generation of the Journalling Flash File System | ||
| 1145 | for use on diskless embedded devices. It provides improved wear | ||
| 1146 | levelling, compression and support for hard links. You cannot use | ||
| 1147 | this on normal block devices, only on 'MTD' devices. | ||
| 1148 | |||
| 1149 | Further information on the design and implementation of JFFS2 is | ||
| 1150 | available at <http://sources.redhat.com/jffs2/>. | ||
| 1151 | |||
| 1152 | config JFFS2_FS_DEBUG | ||
| 1153 | int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)" | ||
| 1154 | depends on JFFS2_FS | ||
| 1155 | default "0" | ||
| 1156 | help | ||
| 1157 | This controls the amount of debugging messages produced by the JFFS2 | ||
| 1158 | code. Set it to zero for use in production systems. For evaluation, | ||
| 1159 | testing and debugging, it's advisable to set it to one. This will | ||
| 1160 | enable a few assertions and will print debugging messages at the | ||
| 1161 | KERN_DEBUG loglevel, where they won't normally be visible. Level 2 | ||
| 1162 | is unlikely to be useful - it enables extra debugging in certain | ||
| 1163 | areas which at one point needed debugging, but when the bugs were | ||
| 1164 | located and fixed, the detailed messages were relegated to level 2. | ||
| 1165 | |||
| 1166 | If reporting bugs, please try to have available a full dump of the | ||
| 1167 | messages at debug level 1 while the misbehaviour was occurring. | ||
| 1168 | |||
| 1169 | config JFFS2_FS_WRITEBUFFER | ||
| 1170 | bool "JFFS2 write-buffering support" | ||
| 1171 | depends on JFFS2_FS | ||
| 1172 | default y | ||
| 1173 | help | ||
| 1174 | This enables the write-buffering support in JFFS2. | ||
| 1175 | |||
| 1176 | This functionality is required to support JFFS2 on the following | ||
| 1177 | types of flash devices: | ||
| 1178 | - NAND flash | ||
| 1179 | - NOR flash with transparent ECC | ||
| 1180 | - DataFlash | ||
| 1181 | |||
| 1182 | config JFFS2_FS_WBUF_VERIFY | ||
| 1183 | bool "Verify JFFS2 write-buffer reads" | ||
| 1184 | depends on JFFS2_FS_WRITEBUFFER | ||
| 1185 | default n | ||
| 1186 | help | ||
| 1187 | This causes JFFS2 to read back every page written through the | ||
| 1188 | write-buffer, and check for errors. | ||
| 1189 | |||
| 1190 | config JFFS2_SUMMARY | ||
| 1191 | bool "JFFS2 summary support (EXPERIMENTAL)" | ||
| 1192 | depends on JFFS2_FS && EXPERIMENTAL | ||
| 1193 | default n | ||
| 1194 | help | ||
| 1195 | This feature makes it possible to use summary information | ||
| 1196 | for faster filesystem mount. | ||
| 1197 | |||
| 1198 | The summary information can be inserted into a filesystem image | ||
| 1199 | by the utility 'sumtool'. | ||
| 1200 | |||
| 1201 | If unsure, say 'N'. | ||
| 1202 | |||
| 1203 | config JFFS2_FS_XATTR | ||
| 1204 | bool "JFFS2 XATTR support (EXPERIMENTAL)" | ||
| 1205 | depends on JFFS2_FS && EXPERIMENTAL | ||
| 1206 | default n | ||
| 1207 | help | ||
| 1208 | Extended attributes are name:value pairs associated with inodes by | ||
| 1209 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 1210 | <http://acl.bestbits.at/> for details). | ||
| 1211 | |||
| 1212 | If unsure, say N. | ||
| 1213 | |||
| 1214 | config JFFS2_FS_POSIX_ACL | ||
| 1215 | bool "JFFS2 POSIX Access Control Lists" | ||
| 1216 | depends on JFFS2_FS_XATTR | ||
| 1217 | default y | ||
| 1218 | select FS_POSIX_ACL | ||
| 1219 | help | ||
| 1220 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 1221 | groups beyond the owner/group/world scheme. | ||
| 1222 | |||
| 1223 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 1224 | Linux website <http://acl.bestbits.at/>. | ||
| 1225 | |||
| 1226 | If you don't know what Access Control Lists are, say N | ||
| 1227 | |||
| 1228 | config JFFS2_FS_SECURITY | ||
| 1229 | bool "JFFS2 Security Labels" | ||
| 1230 | depends on JFFS2_FS_XATTR | ||
| 1231 | default y | ||
| 1232 | help | ||
| 1233 | Security labels support alternative access control models | ||
| 1234 | implemented by security modules like SELinux. This option | ||
| 1235 | enables an extended attribute handler for file security | ||
| 1236 | labels in the jffs2 filesystem. | ||
| 1237 | |||
| 1238 | If you are not using a security module that requires using | ||
| 1239 | extended attributes for file security labels, say N. | ||
| 1240 | |||
| 1241 | config JFFS2_COMPRESSION_OPTIONS | ||
| 1242 | bool "Advanced compression options for JFFS2" | ||
| 1243 | depends on JFFS2_FS | ||
| 1244 | default n | ||
| 1245 | help | ||
| 1246 | Enabling this option allows you to explicitly choose which | ||
| 1247 | compression modules, if any, are enabled in JFFS2. Removing | ||
| 1248 | compressors can mean you cannot read existing file systems, | ||
| 1249 | and enabling experimental compressors can mean that you | ||
| 1250 | write a file system which cannot be read by a standard kernel. | ||
| 1251 | |||
| 1252 | If unsure, you should _definitely_ say 'N'. | ||
| 1253 | |||
| 1254 | config JFFS2_ZLIB | ||
| 1255 | bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 1256 | select ZLIB_INFLATE | ||
| 1257 | select ZLIB_DEFLATE | ||
| 1258 | depends on JFFS2_FS | ||
| 1259 | default y | ||
| 1260 | help | ||
| 1261 | Zlib is designed to be a free, general-purpose, legally unencumbered, | ||
| 1262 | lossless data-compression library for use on virtually any computer | ||
| 1263 | hardware and operating system. See <http://www.gzip.org/zlib/> for | ||
| 1264 | further information. | ||
| 1265 | |||
| 1266 | Say 'Y' if unsure. | ||
| 1267 | |||
| 1268 | config JFFS2_LZO | ||
| 1269 | bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 1270 | select LZO_COMPRESS | ||
| 1271 | select LZO_DECOMPRESS | ||
| 1272 | depends on JFFS2_FS | ||
| 1273 | default n | ||
| 1274 | help | ||
| 1275 | minilzo-based compression. Generally works better than Zlib. | ||
| 1276 | |||
| 1277 | This feature was added in July, 2007. Say 'N' if you need | ||
| 1278 | compatibility with older bootloaders or kernels. | ||
| 1279 | |||
| 1280 | config JFFS2_RTIME | ||
| 1281 | bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 1282 | depends on JFFS2_FS | ||
| 1283 | default y | ||
| 1284 | help | ||
| 1285 | Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. | ||
| 1286 | |||
| 1287 | config JFFS2_RUBIN | ||
| 1288 | bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 1289 | depends on JFFS2_FS | ||
| 1290 | default n | ||
| 1291 | help | ||
| 1292 | RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. | ||
| 1293 | |||
| 1294 | choice | ||
| 1295 | prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS | ||
| 1296 | default JFFS2_CMODE_PRIORITY | ||
| 1297 | depends on JFFS2_FS | ||
| 1298 | help | ||
| 1299 | You can set here the default compression mode of JFFS2 from | ||
| 1300 | the available compression modes. Don't touch if unsure. | ||
| 1301 | |||
| 1302 | config JFFS2_CMODE_NONE | ||
| 1303 | bool "no compression" | ||
| 1304 | help | ||
| 1305 | Uses no compression. | ||
| 1306 | |||
| 1307 | config JFFS2_CMODE_PRIORITY | ||
| 1308 | bool "priority" | ||
| 1309 | help | ||
| 1310 | Tries the compressors in a predefined order and chooses the first | ||
| 1311 | successful one. | ||
| 1312 | |||
| 1313 | config JFFS2_CMODE_SIZE | ||
| 1314 | bool "size (EXPERIMENTAL)" | ||
| 1315 | help | ||
| 1316 | Tries all compressors and chooses the one which has the smallest | ||
| 1317 | result. | ||
| 1318 | |||
| 1319 | config JFFS2_CMODE_FAVOURLZO | ||
| 1320 | bool "Favour LZO" | ||
| 1321 | help | ||
| 1322 | Tries all compressors and chooses the one which has the smallest | ||
| 1323 | result but gives some preference to LZO (which has faster | ||
| 1324 | decompression) at the expense of size. | ||
| 1325 | |||
| 1326 | endchoice | ||
| 1327 | |||
| 1328 | # UBIFS File system configuration | 910 | # UBIFS File system configuration |
| 1329 | source "fs/ubifs/Kconfig" | 911 | source "fs/ubifs/Kconfig" |
| 1330 | 912 | ||
| @@ -1765,6 +1347,28 @@ config SUNRPC_XPRT_RDMA | |||
| 1765 | 1347 | ||
| 1766 | If unsure, say N. | 1348 | If unsure, say N. |
| 1767 | 1349 | ||
| 1350 | config SUNRPC_REGISTER_V4 | ||
| 1351 | bool "Register local RPC services via rpcbind v4 (EXPERIMENTAL)" | ||
| 1352 | depends on SUNRPC && EXPERIMENTAL | ||
| 1353 | default n | ||
| 1354 | help | ||
| 1355 | Sun added support for registering RPC services at an IPv6 | ||
| 1356 | address by creating two new versions of the rpcbind protocol | ||
| 1357 | (RFC 1833). | ||
| 1358 | |||
| 1359 | This option enables support in the kernel RPC server for | ||
| 1360 | registering kernel RPC services via version 4 of the rpcbind | ||
| 1361 | protocol. If you enable this option, you must run a portmapper | ||
| 1362 | daemon that supports rpcbind protocol version 4. | ||
| 1363 | |||
| 1364 | Serving NFS over IPv6 from knfsd (the kernel's NFS server) | ||
| 1365 | requires that you enable this option and use a portmapper that | ||
| 1366 | supports rpcbind version 4. | ||
| 1367 | |||
| 1368 | If unsure, say N to get traditional behavior (register kernel | ||
| 1369 | RPC services using only rpcbind version 2). Distributions | ||
| 1370 | using the legacy Linux portmapper daemon must say N here. | ||
| 1371 | |||
| 1768 | config RPCSEC_GSS_KRB5 | 1372 | config RPCSEC_GSS_KRB5 |
| 1769 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" | 1373 | tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" |
| 1770 | depends on SUNRPC && EXPERIMENTAL | 1374 | depends on SUNRPC && EXPERIMENTAL |
| @@ -1859,148 +1463,7 @@ config SMB_NLS_REMOTE | |||
| 1859 | 1463 | ||
| 1860 | smbmount from samba 2.2.0 or later supports this. | 1464 | smbmount from samba 2.2.0 or later supports this. |
| 1861 | 1465 | ||
| 1862 | config CIFS | 1466 | source "fs/cifs/Kconfig" |
| 1863 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | ||
| 1864 | depends on INET | ||
| 1865 | select NLS | ||
| 1866 | help | ||
| 1867 | This is the client VFS module for the Common Internet File System | ||
| 1868 | (CIFS) protocol which is the successor to the Server Message Block | ||
| 1869 | (SMB) protocol, the native file sharing mechanism for most early | ||
| 1870 | PC operating systems. The CIFS protocol is fully supported by | ||
| 1871 | file servers such as Windows 2000 (including Windows 2003, NT 4 | ||
| 1872 | and Windows XP) as well by Samba (which provides excellent CIFS | ||
| 1873 | server support for Linux and many other operating systems). Limited | ||
| 1874 | support for OS/2 and Windows ME and similar servers is provided as | ||
| 1875 | well. | ||
| 1876 | |||
| 1877 | The cifs module provides an advanced network file system | ||
| 1878 | client for mounting to CIFS compliant servers. It includes | ||
| 1879 | support for DFS (hierarchical name space), secure per-user | ||
| 1880 | session establishment via Kerberos or NTLM or NTLMv2, | ||
| 1881 | safe distributed caching (oplock), optional packet | ||
| 1882 | signing, Unicode and other internationalization improvements. | ||
| 1883 | If you need to mount to Samba or Windows from this machine, say Y. | ||
| 1884 | |||
| 1885 | config CIFS_STATS | ||
| 1886 | bool "CIFS statistics" | ||
| 1887 | depends on CIFS | ||
| 1888 | help | ||
| 1889 | Enabling this option will cause statistics for each server share | ||
| 1890 | mounted by the cifs client to be displayed in /proc/fs/cifs/Stats | ||
| 1891 | |||
| 1892 | config CIFS_STATS2 | ||
| 1893 | bool "Extended statistics" | ||
| 1894 | depends on CIFS_STATS | ||
| 1895 | help | ||
| 1896 | Enabling this option will allow more detailed statistics on SMB | ||
| 1897 | request timing to be displayed in /proc/fs/cifs/DebugData and also | ||
| 1898 | allow optional logging of slow responses to dmesg (depending on the | ||
| 1899 | value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). | ||
| 1900 | These additional statistics may have a minor effect on performance | ||
| 1901 | and memory utilization. | ||
| 1902 | |||
| 1903 | Unless you are a developer or are doing network performance analysis | ||
| 1904 | or tuning, say N. | ||
| 1905 | |||
| 1906 | config CIFS_WEAK_PW_HASH | ||
| 1907 | bool "Support legacy servers which use weaker LANMAN security" | ||
| 1908 | depends on CIFS | ||
| 1909 | help | ||
| 1910 | Modern CIFS servers including Samba and most Windows versions | ||
| 1911 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) | ||
| 1912 | security mechanisms. These hash the password more securely | ||
| 1913 | than the mechanisms used in the older LANMAN version of the | ||
| 1914 | SMB protocol but LANMAN based authentication is needed to | ||
| 1915 | establish sessions with some old SMB servers. | ||
| 1916 | |||
| 1917 | Enabling this option allows the cifs module to mount to older | ||
| 1918 | LANMAN based servers such as OS/2 and Windows 95, but such | ||
| 1919 | mounts may be less secure than mounts using NTLM or more recent | ||
| 1920 | security mechanisms if you are on a public network. Unless you | ||
| 1921 | have a need to access old SMB servers (and are on a private | ||
| 1922 | network) you probably want to say N. Even if this support | ||
| 1923 | is enabled in the kernel build, LANMAN authentication will not be | ||
| 1924 | used automatically. At runtime LANMAN mounts are disabled but | ||
| 1925 | can be set to required (or optional) either in | ||
| 1926 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an | ||
| 1927 | option on the mount command. This support is disabled by | ||
| 1928 | default in order to reduce the possibility of a downgrade | ||
| 1929 | attack. | ||
| 1930 | |||
| 1931 | If unsure, say N. | ||
| 1932 | |||
| 1933 | config CIFS_UPCALL | ||
| 1934 | bool "Kerberos/SPNEGO advanced session setup" | ||
| 1935 | depends on CIFS && KEYS | ||
| 1936 | help | ||
| 1937 | Enables an upcall mechanism for CIFS which accesses | ||
| 1938 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
| 1939 | Kerberos tickets which are needed to mount to certain secure servers | ||
| 1940 | (for which more secure Kerberos authentication is required). If | ||
| 1941 | unsure, say N. | ||
| 1942 | |||
| 1943 | config CIFS_XATTR | ||
| 1944 | bool "CIFS extended attributes" | ||
| 1945 | depends on CIFS | ||
| 1946 | help | ||
| 1947 | Extended attributes are name:value pairs associated with inodes by | ||
| 1948 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 1949 | <http://acl.bestbits.at/> for details). CIFS maps the name of | ||
| 1950 | extended attributes beginning with the user namespace prefix | ||
| 1951 | to SMB/CIFS EAs. EAs are stored on Windows servers without the | ||
| 1952 | user namespace prefix, but their names are seen by Linux cifs clients | ||
| 1953 | prefaced by the user namespace prefix. The system namespace | ||
| 1954 | (used by some filesystems to store ACLs) is not supported at | ||
| 1955 | this time. | ||
| 1956 | |||
| 1957 | If unsure, say N. | ||
| 1958 | |||
| 1959 | config CIFS_POSIX | ||
| 1960 | bool "CIFS POSIX Extensions" | ||
| 1961 | depends on CIFS_XATTR | ||
| 1962 | help | ||
| 1963 | Enabling this option will cause the cifs client to attempt to | ||
| 1964 | negotiate a newer dialect with servers, such as Samba 3.0.5 | ||
| 1965 | or later, that optionally can handle more POSIX like (rather | ||
| 1966 | than Windows like) file behavior. It also enables | ||
| 1967 | support for POSIX ACLs (getfacl and setfacl) to servers | ||
| 1968 | (such as Samba 3.10 and later) which can negotiate | ||
| 1969 | CIFS POSIX ACL support. If unsure, say N. | ||
| 1970 | |||
| 1971 | config CIFS_DEBUG2 | ||
| 1972 | bool "Enable additional CIFS debugging routines" | ||
| 1973 | depends on CIFS | ||
| 1974 | help | ||
| 1975 | Enabling this option adds a few more debugging routines | ||
| 1976 | to the cifs code which slightly increases the size of | ||
| 1977 | the cifs module and can cause additional logging of debug | ||
| 1978 | messages in some error paths, slowing performance. This | ||
| 1979 | option can be turned off unless you are debugging | ||
| 1980 | cifs problems. If unsure, say N. | ||
| 1981 | |||
| 1982 | config CIFS_EXPERIMENTAL | ||
| 1983 | bool "CIFS Experimental Features (EXPERIMENTAL)" | ||
| 1984 | depends on CIFS && EXPERIMENTAL | ||
| 1985 | help | ||
| 1986 | Enables cifs features under testing. These features are | ||
| 1987 | experimental and currently include DFS support and directory | ||
| 1988 | change notification ie fcntl(F_DNOTIFY), as well as the upcall | ||
| 1989 | mechanism which will be used for Kerberos session negotiation | ||
| 1990 | and uid remapping. Some of these features also may depend on | ||
| 1991 | setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental | ||
| 1992 | (which is disabled by default). See the file fs/cifs/README | ||
| 1993 | for more details. If unsure, say N. | ||
| 1994 | |||
| 1995 | config CIFS_DFS_UPCALL | ||
| 1996 | bool "DFS feature support (EXPERIMENTAL)" | ||
| 1997 | depends on CIFS_EXPERIMENTAL | ||
| 1998 | depends on KEYS | ||
| 1999 | help | ||
| 2000 | Enables an upcall mechanism for CIFS which contacts userspace | ||
| 2001 | helper utilities to provide server name resolution (host names to | ||
| 2002 | IP addresses) which is needed for implicit mounts of DFS junction | ||
| 2003 | points. If unsure, say N. | ||
| 2004 | 1467 | ||
| 2005 | config NCP_FS | 1468 | config NCP_FS |
| 2006 | tristate "NCP file system support (to mount NetWare volumes)" | 1469 | tristate "NCP file system support (to mount NetWare volumes)" |
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt index 4a551af6f3fc..ce9fb3fbfae4 100644 --- a/fs/Kconfig.binfmt +++ b/fs/Kconfig.binfmt | |||
| @@ -25,7 +25,7 @@ config BINFMT_ELF | |||
| 25 | 25 | ||
| 26 | config COMPAT_BINFMT_ELF | 26 | config COMPAT_BINFMT_ELF |
| 27 | bool | 27 | bool |
| 28 | depends on COMPAT && MMU | 28 | depends on COMPAT && BINFMT_ELF |
| 29 | 29 | ||
| 30 | config BINFMT_ELF_FDPIC | 30 | config BINFMT_ELF_FDPIC |
| 31 | bool "Kernel support for FDPIC ELF binaries" | 31 | bool "Kernel support for FDPIC ELF binaries" |
| @@ -40,6 +40,28 @@ config BINFMT_ELF_FDPIC | |||
| 40 | 40 | ||
| 41 | It is also possible to run FDPIC ELF binaries on MMU linux also. | 41 | It is also possible to run FDPIC ELF binaries on MMU linux also. |
| 42 | 42 | ||
| 43 | config CORE_DUMP_DEFAULT_ELF_HEADERS | ||
| 44 | bool "Write ELF core dumps with partial segments" | ||
| 45 | default n | ||
| 46 | depends on BINFMT_ELF | ||
| 47 | help | ||
| 48 | ELF core dump files describe each memory mapping of the crashed | ||
| 49 | process, and can contain or omit the memory contents of each one. | ||
| 50 | The contents of an unmodified text mapping are omitted by default. | ||
| 51 | |||
| 52 | For an unmodified text mapping of an ELF object, including just | ||
| 53 | the first page of the file in a core dump makes it possible to | ||
| 54 | identify the build ID bits in the file, without paying the i/o | ||
| 55 | cost and disk space to dump all the text. However, versions of | ||
| 56 | GDB before 6.7 are confused by ELF core dump files in this format. | ||
| 57 | |||
| 58 | The core dump behavior can be controlled per process using | ||
| 59 | the /proc/PID/coredump_filter pseudo-file; this setting is | ||
| 60 | inherited. See Documentation/filesystems/proc.txt for details. | ||
| 61 | |||
| 62 | This config option changes the default setting of coredump_filter | ||
| 63 | seen at boot time. If unsure, say N. | ||
| 64 | |||
| 43 | config BINFMT_FLAT | 65 | config BINFMT_FLAT |
| 44 | bool "Kernel support for flat binaries" | 66 | bool "Kernel support for flat binaries" |
| 45 | depends on !MMU && (!FRV || BROKEN) | 67 | depends on !MMU && (!FRV || BROKEN) |
| @@ -59,10 +81,12 @@ config BINFMT_SHARED_FLAT | |||
| 59 | help | 81 | help |
| 60 | Support FLAT shared libraries | 82 | Support FLAT shared libraries |
| 61 | 83 | ||
| 84 | config HAVE_AOUT | ||
| 85 | def_bool n | ||
| 86 | |||
| 62 | config BINFMT_AOUT | 87 | config BINFMT_AOUT |
| 63 | tristate "Kernel support for a.out and ECOFF binaries" | 88 | tristate "Kernel support for a.out and ECOFF binaries" |
| 64 | depends on ARCH_SUPPORTS_AOUT && \ | 89 | depends on HAVE_AOUT |
| 65 | (X86_32 || ALPHA || ARM || M68K) | ||
| 66 | ---help--- | 90 | ---help--- |
| 67 | A.out (Assembler.OUTput) is a set of formats for libraries and | 91 | A.out (Assembler.OUTput) is a set of formats for libraries and |
| 68 | executables used in the earliest versions of UNIX. Linux used | 92 | executables used in the earliest versions of UNIX. Linux used |
diff --git a/fs/Makefile b/fs/Makefile index a1482a5eff15..2168c902d5ca 100644 --- a/fs/Makefile +++ b/fs/Makefile | |||
| @@ -7,8 +7,8 @@ | |||
| 7 | 7 | ||
| 8 | obj-y := open.o read_write.o file_table.o super.o \ | 8 | obj-y := open.o read_write.o file_table.o super.o \ |
| 9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ | 9 | char_dev.o stat.o exec.o pipe.o namei.o fcntl.o \ |
| 10 | ioctl.o readdir.o select.o fifo.o locks.o dcache.o inode.o \ | 10 | ioctl.o readdir.o select.o fifo.o dcache.o inode.o \ |
| 11 | attr.o bad_inode.o file.o filesystems.o namespace.o aio.o \ | 11 | attr.o bad_inode.o file.o filesystems.o namespace.o \ |
| 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ | 12 | seq_file.o xattr.o libfs.o fs-writeback.o \ |
| 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ | 13 | pnode.o drop_caches.o splice.o sync.o utimes.o \ |
| 14 | stack.o | 14 | stack.o |
| @@ -27,6 +27,8 @@ obj-$(CONFIG_ANON_INODES) += anon_inodes.o | |||
| 27 | obj-$(CONFIG_SIGNALFD) += signalfd.o | 27 | obj-$(CONFIG_SIGNALFD) += signalfd.o |
| 28 | obj-$(CONFIG_TIMERFD) += timerfd.o | 28 | obj-$(CONFIG_TIMERFD) += timerfd.o |
| 29 | obj-$(CONFIG_EVENTFD) += eventfd.o | 29 | obj-$(CONFIG_EVENTFD) += eventfd.o |
| 30 | obj-$(CONFIG_AIO) += aio.o | ||
| 31 | obj-$(CONFIG_FILE_LOCKING) += locks.o | ||
| 30 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o | 32 | obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o |
| 31 | 33 | ||
| 32 | nfsd-$(CONFIG_NFSD) := nfsctl.o | 34 | nfsd-$(CONFIG_NFSD) := nfsctl.o |
| @@ -69,7 +71,7 @@ obj-$(CONFIG_DLM) += dlm/ | |||
| 69 | # Do not add any filesystems before this line | 71 | # Do not add any filesystems before this line |
| 70 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ | 72 | obj-$(CONFIG_REISERFS_FS) += reiserfs/ |
| 71 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 | 73 | obj-$(CONFIG_EXT3_FS) += ext3/ # Before ext2 so root fs can be ext3 |
| 72 | obj-$(CONFIG_EXT4DEV_FS) += ext4/ # Before ext2 so root fs can be ext4dev | 74 | obj-$(CONFIG_EXT4_FS) += ext4/ # Before ext2 so root fs can be ext4 |
| 73 | obj-$(CONFIG_JBD) += jbd/ | 75 | obj-$(CONFIG_JBD) += jbd/ |
| 74 | obj-$(CONFIG_JBD2) += jbd2/ | 76 | obj-$(CONFIG_JBD2) += jbd2/ |
| 75 | obj-$(CONFIG_EXT2_FS) += ext2/ | 77 | obj-$(CONFIG_EXT2_FS) += ext2/ |
diff --git a/fs/adfs/super.c b/fs/adfs/super.c index 26f3b43726bb..7f83a46f2b7e 100644 --- a/fs/adfs/super.c +++ b/fs/adfs/super.c | |||
| @@ -157,7 +157,7 @@ static int adfs_show_options(struct seq_file *seq, struct vfsmount *mnt) | |||
| 157 | 157 | ||
| 158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; | 158 | enum {Opt_uid, Opt_gid, Opt_ownmask, Opt_othmask, Opt_err}; |
| 159 | 159 | ||
| 160 | static match_table_t tokens = { | 160 | static const match_table_t tokens = { |
| 161 | {Opt_uid, "uid=%u"}, | 161 | {Opt_uid, "uid=%u"}, |
| 162 | {Opt_gid, "gid=%u"}, | 162 | {Opt_gid, "gid=%u"}, |
| 163 | {Opt_ownmask, "ownmask=%o"}, | 163 | {Opt_ownmask, "ownmask=%o"}, |
diff --git a/fs/affs/super.c b/fs/affs/super.c index 3a89094f93d0..8989c93193ed 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c | |||
| @@ -135,7 +135,7 @@ enum { | |||
| 135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, | 135 | Opt_verbose, Opt_volume, Opt_ignore, Opt_err, |
| 136 | }; | 136 | }; |
| 137 | 137 | ||
| 138 | static match_table_t tokens = { | 138 | static const match_table_t tokens = { |
| 139 | {Opt_bs, "bs=%u"}, | 139 | {Opt_bs, "bs=%u"}, |
| 140 | {Opt_mode, "mode=%o"}, | 140 | {Opt_mode, "mode=%o"}, |
| 141 | {Opt_mufs, "mufs"}, | 141 | {Opt_mufs, "mufs"}, |
diff --git a/fs/afs/file.c b/fs/afs/file.c index 525f7c56e068..a3901769a96c 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c | |||
| @@ -50,8 +50,8 @@ const struct address_space_operations afs_fs_aops = { | |||
| 50 | .launder_page = afs_launder_page, | 50 | .launder_page = afs_launder_page, |
| 51 | .releasepage = afs_releasepage, | 51 | .releasepage = afs_releasepage, |
| 52 | .invalidatepage = afs_invalidatepage, | 52 | .invalidatepage = afs_invalidatepage, |
| 53 | .prepare_write = afs_prepare_write, | 53 | .write_begin = afs_write_begin, |
| 54 | .commit_write = afs_commit_write, | 54 | .write_end = afs_write_end, |
| 55 | .writepage = afs_writepage, | 55 | .writepage = afs_writepage, |
| 56 | .writepages = afs_writepages, | 56 | .writepages = afs_writepages, |
| 57 | }; | 57 | }; |
diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 3cb6920ff30b..67f259d99cd6 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h | |||
| @@ -728,8 +728,12 @@ extern int afs_volume_release_fileserver(struct afs_vnode *, | |||
| 728 | */ | 728 | */ |
| 729 | extern int afs_set_page_dirty(struct page *); | 729 | extern int afs_set_page_dirty(struct page *); |
| 730 | extern void afs_put_writeback(struct afs_writeback *); | 730 | extern void afs_put_writeback(struct afs_writeback *); |
| 731 | extern int afs_prepare_write(struct file *, struct page *, unsigned, unsigned); | 731 | extern int afs_write_begin(struct file *file, struct address_space *mapping, |
| 732 | extern int afs_commit_write(struct file *, struct page *, unsigned, unsigned); | 732 | loff_t pos, unsigned len, unsigned flags, |
| 733 | struct page **pagep, void **fsdata); | ||
| 734 | extern int afs_write_end(struct file *file, struct address_space *mapping, | ||
| 735 | loff_t pos, unsigned len, unsigned copied, | ||
| 736 | struct page *page, void *fsdata); | ||
| 733 | extern int afs_writepage(struct page *, struct writeback_control *); | 737 | extern int afs_writepage(struct page *, struct writeback_control *); |
| 734 | extern int afs_writepages(struct address_space *, struct writeback_control *); | 738 | extern int afs_writepages(struct address_space *, struct writeback_control *); |
| 735 | extern int afs_write_inode(struct inode *, int); | 739 | extern int afs_write_inode(struct inode *, int); |
diff --git a/fs/afs/super.c b/fs/afs/super.c index 250d8c4d66e4..aee239a048cb 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c | |||
| @@ -64,7 +64,7 @@ enum { | |||
| 64 | afs_opt_vol, | 64 | afs_opt_vol, |
| 65 | }; | 65 | }; |
| 66 | 66 | ||
| 67 | static match_table_t afs_options_list = { | 67 | static const match_table_t afs_options_list = { |
| 68 | { afs_opt_cell, "cell=%s" }, | 68 | { afs_opt_cell, "cell=%s" }, |
| 69 | { afs_opt_rwpath, "rwpath" }, | 69 | { afs_opt_rwpath, "rwpath" }, |
| 70 | { afs_opt_vol, "vol=%s" }, | 70 | { afs_opt_vol, "vol=%s" }, |
diff --git a/fs/afs/write.c b/fs/afs/write.c index 065b4e10681a..d6b85dab35fc 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c | |||
| @@ -84,15 +84,23 @@ void afs_put_writeback(struct afs_writeback *wb) | |||
| 84 | * partly or wholly fill a page that's under preparation for writing | 84 | * partly or wholly fill a page that's under preparation for writing |
| 85 | */ | 85 | */ |
| 86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | 86 | static int afs_fill_page(struct afs_vnode *vnode, struct key *key, |
| 87 | unsigned start, unsigned len, struct page *page) | 87 | loff_t pos, unsigned len, struct page *page) |
| 88 | { | 88 | { |
| 89 | loff_t i_size; | ||
| 90 | unsigned eof; | ||
| 89 | int ret; | 91 | int ret; |
| 90 | 92 | ||
| 91 | _enter(",,%u,%u", start, len); | 93 | _enter(",,%llu,%u", (unsigned long long)pos, len); |
| 92 | 94 | ||
| 93 | ASSERTCMP(start + len, <=, PAGE_SIZE); | 95 | ASSERTCMP(len, <=, PAGE_CACHE_SIZE); |
| 94 | 96 | ||
| 95 | ret = afs_vnode_fetch_data(vnode, key, start, len, page); | 97 | i_size = i_size_read(&vnode->vfs_inode); |
| 98 | if (pos + len > i_size) | ||
| 99 | eof = i_size; | ||
| 100 | else | ||
| 101 | eof = PAGE_CACHE_SIZE; | ||
| 102 | |||
| 103 | ret = afs_vnode_fetch_data(vnode, key, 0, eof, page); | ||
| 96 | if (ret < 0) { | 104 | if (ret < 0) { |
| 97 | if (ret == -ENOENT) { | 105 | if (ret == -ENOENT) { |
| 98 | _debug("got NOENT from server" | 106 | _debug("got NOENT from server" |
| @@ -107,109 +115,55 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key, | |||
| 107 | } | 115 | } |
| 108 | 116 | ||
| 109 | /* | 117 | /* |
| 110 | * prepare a page for being written to | ||
| 111 | */ | ||
| 112 | static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, | ||
| 113 | struct key *key, unsigned offset, unsigned to) | ||
| 114 | { | ||
| 115 | unsigned eof, tail, start, stop, len; | ||
| 116 | loff_t i_size, pos; | ||
| 117 | void *p; | ||
| 118 | int ret; | ||
| 119 | |||
| 120 | _enter(""); | ||
| 121 | |||
| 122 | if (offset == 0 && to == PAGE_SIZE) | ||
| 123 | return 0; | ||
| 124 | |||
| 125 | p = kmap_atomic(page, KM_USER0); | ||
| 126 | |||
| 127 | i_size = i_size_read(&vnode->vfs_inode); | ||
| 128 | pos = (loff_t) page->index << PAGE_SHIFT; | ||
| 129 | if (pos >= i_size) { | ||
| 130 | /* partial write, page beyond EOF */ | ||
| 131 | _debug("beyond"); | ||
| 132 | if (offset > 0) | ||
| 133 | memset(p, 0, offset); | ||
| 134 | if (to < PAGE_SIZE) | ||
| 135 | memset(p + to, 0, PAGE_SIZE - to); | ||
| 136 | kunmap_atomic(p, KM_USER0); | ||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | if (i_size - pos >= PAGE_SIZE) { | ||
| 141 | /* partial write, page entirely before EOF */ | ||
| 142 | _debug("before"); | ||
| 143 | tail = eof = PAGE_SIZE; | ||
| 144 | } else { | ||
| 145 | /* partial write, page overlaps EOF */ | ||
| 146 | eof = i_size - pos; | ||
| 147 | _debug("overlap %u", eof); | ||
| 148 | tail = max(eof, to); | ||
| 149 | if (tail < PAGE_SIZE) | ||
| 150 | memset(p + tail, 0, PAGE_SIZE - tail); | ||
| 151 | if (offset > eof) | ||
| 152 | memset(p + eof, 0, PAGE_SIZE - eof); | ||
| 153 | } | ||
| 154 | |||
| 155 | kunmap_atomic(p, KM_USER0); | ||
| 156 | |||
| 157 | ret = 0; | ||
| 158 | if (offset > 0 || eof > to) { | ||
| 159 | /* need to fill one or two bits that aren't going to be written | ||
| 160 | * (cover both fillers in one read if there are two) */ | ||
| 161 | start = (offset > 0) ? 0 : to; | ||
| 162 | stop = (eof > to) ? eof : offset; | ||
| 163 | len = stop - start; | ||
| 164 | _debug("wr=%u-%u av=0-%u rd=%u@%u", | ||
| 165 | offset, to, eof, start, len); | ||
| 166 | ret = afs_fill_page(vnode, key, start, len, page); | ||
| 167 | } | ||
| 168 | |||
| 169 | _leave(" = %d", ret); | ||
| 170 | return ret; | ||
| 171 | } | ||
| 172 | |||
| 173 | /* | ||
| 174 | * prepare to perform part of a write to a page | 118 | * prepare to perform part of a write to a page |
| 175 | * - the caller holds the page locked, preventing it from being written out or | ||
| 176 | * modified by anyone else | ||
| 177 | */ | 119 | */ |
| 178 | int afs_prepare_write(struct file *file, struct page *page, | 120 | int afs_write_begin(struct file *file, struct address_space *mapping, |
| 179 | unsigned offset, unsigned to) | 121 | loff_t pos, unsigned len, unsigned flags, |
| 122 | struct page **pagep, void **fsdata) | ||
| 180 | { | 123 | { |
| 181 | struct afs_writeback *candidate, *wb; | 124 | struct afs_writeback *candidate, *wb; |
| 182 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | 125 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); |
| 126 | struct page *page; | ||
| 183 | struct key *key = file->private_data; | 127 | struct key *key = file->private_data; |
| 184 | pgoff_t index; | 128 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 129 | unsigned to = from + len; | ||
| 130 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 185 | int ret; | 131 | int ret; |
| 186 | 132 | ||
| 187 | _enter("{%x:%u},{%lx},%u,%u", | 133 | _enter("{%x:%u},{%lx},%u,%u", |
| 188 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | 134 | vnode->fid.vid, vnode->fid.vnode, index, from, to); |
| 189 | 135 | ||
| 190 | candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); | 136 | candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); |
| 191 | if (!candidate) | 137 | if (!candidate) |
| 192 | return -ENOMEM; | 138 | return -ENOMEM; |
| 193 | candidate->vnode = vnode; | 139 | candidate->vnode = vnode; |
| 194 | candidate->first = candidate->last = page->index; | 140 | candidate->first = candidate->last = index; |
| 195 | candidate->offset_first = offset; | 141 | candidate->offset_first = from; |
| 196 | candidate->to_last = to; | 142 | candidate->to_last = to; |
| 197 | candidate->usage = 1; | 143 | candidate->usage = 1; |
| 198 | candidate->state = AFS_WBACK_PENDING; | 144 | candidate->state = AFS_WBACK_PENDING; |
| 199 | init_waitqueue_head(&candidate->waitq); | 145 | init_waitqueue_head(&candidate->waitq); |
| 200 | 146 | ||
| 147 | page = __grab_cache_page(mapping, index); | ||
| 148 | if (!page) { | ||
| 149 | kfree(candidate); | ||
| 150 | return -ENOMEM; | ||
| 151 | } | ||
| 152 | *pagep = page; | ||
| 153 | /* page won't leak in error case: it eventually gets cleaned off LRU */ | ||
| 154 | |||
| 201 | if (!PageUptodate(page)) { | 155 | if (!PageUptodate(page)) { |
| 202 | _debug("not up to date"); | 156 | _debug("not up to date"); |
| 203 | ret = afs_prepare_page(vnode, page, key, offset, to); | 157 | ret = afs_fill_page(vnode, key, pos, len, page); |
| 204 | if (ret < 0) { | 158 | if (ret < 0) { |
| 205 | kfree(candidate); | 159 | kfree(candidate); |
| 206 | _leave(" = %d [prep]", ret); | 160 | _leave(" = %d [prep]", ret); |
| 207 | return ret; | 161 | return ret; |
| 208 | } | 162 | } |
| 163 | SetPageUptodate(page); | ||
| 209 | } | 164 | } |
| 210 | 165 | ||
| 211 | try_again: | 166 | try_again: |
| 212 | index = page->index; | ||
| 213 | spin_lock(&vnode->writeback_lock); | 167 | spin_lock(&vnode->writeback_lock); |
| 214 | 168 | ||
| 215 | /* see if this page is already pending a writeback under a suitable key | 169 | /* see if this page is already pending a writeback under a suitable key |
| @@ -242,8 +196,8 @@ try_again: | |||
| 242 | subsume_in_current_wb: | 196 | subsume_in_current_wb: |
| 243 | _debug("subsume"); | 197 | _debug("subsume"); |
| 244 | ASSERTRANGE(wb->first, <=, index, <=, wb->last); | 198 | ASSERTRANGE(wb->first, <=, index, <=, wb->last); |
| 245 | if (index == wb->first && offset < wb->offset_first) | 199 | if (index == wb->first && from < wb->offset_first) |
| 246 | wb->offset_first = offset; | 200 | wb->offset_first = from; |
| 247 | if (index == wb->last && to > wb->to_last) | 201 | if (index == wb->last && to > wb->to_last) |
| 248 | wb->to_last = to; | 202 | wb->to_last = to; |
| 249 | spin_unlock(&vnode->writeback_lock); | 203 | spin_unlock(&vnode->writeback_lock); |
| @@ -289,17 +243,17 @@ flush_conflicting_wb: | |||
| 289 | /* | 243 | /* |
| 290 | * finalise part of a write to a page | 244 | * finalise part of a write to a page |
| 291 | */ | 245 | */ |
| 292 | int afs_commit_write(struct file *file, struct page *page, | 246 | int afs_write_end(struct file *file, struct address_space *mapping, |
| 293 | unsigned offset, unsigned to) | 247 | loff_t pos, unsigned len, unsigned copied, |
| 248 | struct page *page, void *fsdata) | ||
| 294 | { | 249 | { |
| 295 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); | 250 | struct afs_vnode *vnode = AFS_FS_I(file->f_dentry->d_inode); |
| 296 | loff_t i_size, maybe_i_size; | 251 | loff_t i_size, maybe_i_size; |
| 297 | 252 | ||
| 298 | _enter("{%x:%u},{%lx},%u,%u", | 253 | _enter("{%x:%u},{%lx}", |
| 299 | vnode->fid.vid, vnode->fid.vnode, page->index, offset, to); | 254 | vnode->fid.vid, vnode->fid.vnode, page->index); |
| 300 | 255 | ||
| 301 | maybe_i_size = (loff_t) page->index << PAGE_SHIFT; | 256 | maybe_i_size = pos + copied; |
| 302 | maybe_i_size += to; | ||
| 303 | 257 | ||
| 304 | i_size = i_size_read(&vnode->vfs_inode); | 258 | i_size = i_size_read(&vnode->vfs_inode); |
| 305 | if (maybe_i_size > i_size) { | 259 | if (maybe_i_size > i_size) { |
| @@ -310,12 +264,13 @@ int afs_commit_write(struct file *file, struct page *page, | |||
| 310 | spin_unlock(&vnode->writeback_lock); | 264 | spin_unlock(&vnode->writeback_lock); |
| 311 | } | 265 | } |
| 312 | 266 | ||
| 313 | SetPageUptodate(page); | ||
| 314 | set_page_dirty(page); | 267 | set_page_dirty(page); |
| 315 | if (PageDirty(page)) | 268 | if (PageDirty(page)) |
| 316 | _debug("dirtied"); | 269 | _debug("dirtied"); |
| 270 | unlock_page(page); | ||
| 271 | page_cache_release(page); | ||
| 317 | 272 | ||
| 318 | return 0; | 273 | return copied; |
| 319 | } | 274 | } |
| 320 | 275 | ||
| 321 | /* | 276 | /* |
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index dda510d31f84..b70eea1e8c59 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c | |||
| @@ -59,7 +59,7 @@ static const struct super_operations autofs_sops = { | |||
| 59 | 59 | ||
| 60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; | 60 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto}; |
| 61 | 61 | ||
| 62 | static match_table_t autofs_tokens = { | 62 | static const match_table_t autofs_tokens = { |
| 63 | {Opt_fd, "fd=%u"}, | 63 | {Opt_fd, "fd=%u"}, |
| 64 | {Opt_uid, "uid=%u"}, | 64 | {Opt_uid, "uid=%u"}, |
| 65 | {Opt_gid, "gid=%u"}, | 65 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/autofs4/Makefile b/fs/autofs4/Makefile index f2c3b79e94d2..a811c1f7d9ab 100644 --- a/fs/autofs4/Makefile +++ b/fs/autofs4/Makefile | |||
| @@ -4,4 +4,4 @@ | |||
| 4 | 4 | ||
| 5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o | 5 | obj-$(CONFIG_AUTOFS4_FS) += autofs4.o |
| 6 | 6 | ||
| 7 | autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o | 7 | autofs4-objs := init.o inode.o root.o symlink.o waitq.o expire.o dev-ioctl.o |
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 69a2f5c92319..e0f16da00e54 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | /* Internal header file for autofs */ | 14 | /* Internal header file for autofs */ |
| 15 | 15 | ||
| 16 | #include <linux/auto_fs4.h> | 16 | #include <linux/auto_fs4.h> |
| 17 | #include <linux/auto_dev-ioctl.h> | ||
| 17 | #include <linux/mutex.h> | 18 | #include <linux/mutex.h> |
| 18 | #include <linux/list.h> | 19 | #include <linux/list.h> |
| 19 | 20 | ||
| @@ -21,6 +22,11 @@ | |||
| 21 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY | 22 | #define AUTOFS_IOC_FIRST AUTOFS_IOC_READY |
| 22 | #define AUTOFS_IOC_COUNT 32 | 23 | #define AUTOFS_IOC_COUNT 32 |
| 23 | 24 | ||
| 25 | #define AUTOFS_DEV_IOCTL_IOC_FIRST (AUTOFS_DEV_IOCTL_VERSION) | ||
| 26 | #define AUTOFS_DEV_IOCTL_IOC_COUNT (AUTOFS_IOC_COUNT - 11) | ||
| 27 | |||
| 28 | #define AUTOFS_TYPE_TRIGGER (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET) | ||
| 29 | |||
| 24 | #include <linux/kernel.h> | 30 | #include <linux/kernel.h> |
| 25 | #include <linux/slab.h> | 31 | #include <linux/slab.h> |
| 26 | #include <linux/time.h> | 32 | #include <linux/time.h> |
| @@ -35,11 +41,27 @@ | |||
| 35 | /* #define DEBUG */ | 41 | /* #define DEBUG */ |
| 36 | 42 | ||
| 37 | #ifdef DEBUG | 43 | #ifdef DEBUG |
| 38 | #define DPRINTK(fmt,args...) do { printk(KERN_DEBUG "pid %d: %s: " fmt "\n" , current->pid , __func__ , ##args); } while(0) | 44 | #define DPRINTK(fmt, args...) \ |
| 45 | do { \ | ||
| 46 | printk(KERN_DEBUG "pid %d: %s: " fmt "\n", \ | ||
| 47 | current->pid, __func__, ##args); \ | ||
| 48 | } while (0) | ||
| 39 | #else | 49 | #else |
| 40 | #define DPRINTK(fmt,args...) do {} while(0) | 50 | #define DPRINTK(fmt, args...) do {} while (0) |
| 41 | #endif | 51 | #endif |
| 42 | 52 | ||
| 53 | #define AUTOFS_WARN(fmt, args...) \ | ||
| 54 | do { \ | ||
| 55 | printk(KERN_WARNING "pid %d: %s: " fmt "\n", \ | ||
| 56 | current->pid, __func__, ##args); \ | ||
| 57 | } while (0) | ||
| 58 | |||
| 59 | #define AUTOFS_ERROR(fmt, args...) \ | ||
| 60 | do { \ | ||
| 61 | printk(KERN_ERR "pid %d: %s: " fmt "\n", \ | ||
| 62 | current->pid, __func__, ##args); \ | ||
| 63 | } while (0) | ||
| 64 | |||
| 43 | /* Unified info structure. This is pointed to by both the dentry and | 65 | /* Unified info structure. This is pointed to by both the dentry and |
| 44 | inode structures. Each file in the filesystem has an instance of this | 66 | inode structures. Each file in the filesystem has an instance of this |
| 45 | structure. It holds a reference to the dentry, so dentries are never | 67 | structure. It holds a reference to the dentry, so dentries are never |
| @@ -61,6 +83,9 @@ struct autofs_info { | |||
| 61 | unsigned long last_used; | 83 | unsigned long last_used; |
| 62 | atomic_t count; | 84 | atomic_t count; |
| 63 | 85 | ||
| 86 | uid_t uid; | ||
| 87 | gid_t gid; | ||
| 88 | |||
| 64 | mode_t mode; | 89 | mode_t mode; |
| 65 | size_t size; | 90 | size_t size; |
| 66 | 91 | ||
| @@ -92,10 +117,6 @@ struct autofs_wait_queue { | |||
| 92 | 117 | ||
| 93 | #define AUTOFS_SBI_MAGIC 0x6d4a556d | 118 | #define AUTOFS_SBI_MAGIC 0x6d4a556d |
| 94 | 119 | ||
| 95 | #define AUTOFS_TYPE_INDIRECT 0x0001 | ||
| 96 | #define AUTOFS_TYPE_DIRECT 0x0002 | ||
| 97 | #define AUTOFS_TYPE_OFFSET 0x0004 | ||
| 98 | |||
| 99 | struct autofs_sb_info { | 120 | struct autofs_sb_info { |
| 100 | u32 magic; | 121 | u32 magic; |
| 101 | int pipefd; | 122 | int pipefd; |
| @@ -169,6 +190,17 @@ int autofs4_expire_run(struct super_block *, struct vfsmount *, | |||
| 169 | struct autofs_packet_expire __user *); | 190 | struct autofs_packet_expire __user *); |
| 170 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, | 191 | int autofs4_expire_multi(struct super_block *, struct vfsmount *, |
| 171 | struct autofs_sb_info *, int __user *); | 192 | struct autofs_sb_info *, int __user *); |
| 193 | struct dentry *autofs4_expire_direct(struct super_block *sb, | ||
| 194 | struct vfsmount *mnt, | ||
| 195 | struct autofs_sb_info *sbi, int how); | ||
| 196 | struct dentry *autofs4_expire_indirect(struct super_block *sb, | ||
| 197 | struct vfsmount *mnt, | ||
| 198 | struct autofs_sb_info *sbi, int how); | ||
| 199 | |||
| 200 | /* Device node initialization */ | ||
| 201 | |||
| 202 | int autofs_dev_ioctl_init(void); | ||
| 203 | void autofs_dev_ioctl_exit(void); | ||
| 172 | 204 | ||
| 173 | /* Operations structures */ | 205 | /* Operations structures */ |
| 174 | 206 | ||
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c new file mode 100644 index 000000000000..625abf5422e2 --- /dev/null +++ b/fs/autofs4/dev-ioctl.c | |||
| @@ -0,0 +1,863 @@ | |||
| 1 | /* | ||
| 2 | * Copyright 2008 Red Hat, Inc. All rights reserved. | ||
| 3 | * Copyright 2008 Ian Kent <raven@themaw.net> | ||
| 4 | * | ||
| 5 | * This file is part of the Linux kernel and is made available under | ||
| 6 | * the terms of the GNU General Public License, version 2, or at your | ||
| 7 | * option, any later version, incorporated herein by reference. | ||
| 8 | */ | ||
| 9 | |||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/vmalloc.h> | ||
| 12 | #include <linux/miscdevice.h> | ||
| 13 | #include <linux/init.h> | ||
| 14 | #include <linux/wait.h> | ||
| 15 | #include <linux/namei.h> | ||
| 16 | #include <linux/fcntl.h> | ||
| 17 | #include <linux/file.h> | ||
| 18 | #include <linux/fdtable.h> | ||
| 19 | #include <linux/sched.h> | ||
| 20 | #include <linux/compat.h> | ||
| 21 | #include <linux/syscalls.h> | ||
| 22 | #include <linux/smp_lock.h> | ||
| 23 | #include <linux/magic.h> | ||
| 24 | #include <linux/dcache.h> | ||
| 25 | #include <linux/uaccess.h> | ||
| 26 | |||
| 27 | #include "autofs_i.h" | ||
| 28 | |||
| 29 | /* | ||
| 30 | * This module implements an interface for routing autofs ioctl control | ||
| 31 | * commands via a miscellaneous device file. | ||
| 32 | * | ||
| 33 | * The alternate interface is needed because we need to be able open | ||
| 34 | * an ioctl file descriptor on an autofs mount that may be covered by | ||
| 35 | * another mount. This situation arises when starting automount(8) | ||
| 36 | * or other user space daemon which uses direct mounts or offset | ||
| 37 | * mounts (used for autofs lazy mount/umount of nested mount trees), | ||
| 38 | * which have been left busy at at service shutdown. | ||
| 39 | */ | ||
| 40 | |||
| 41 | #define AUTOFS_DEV_IOCTL_SIZE sizeof(struct autofs_dev_ioctl) | ||
| 42 | |||
| 43 | typedef int (*ioctl_fn)(struct file *, struct autofs_sb_info *, | ||
| 44 | struct autofs_dev_ioctl *); | ||
| 45 | |||
| 46 | static int check_name(const char *name) | ||
| 47 | { | ||
| 48 | if (!strchr(name, '/')) | ||
| 49 | return -EINVAL; | ||
| 50 | return 0; | ||
| 51 | } | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Check a string doesn't overrun the chunk of | ||
| 55 | * memory we copied from user land. | ||
| 56 | */ | ||
| 57 | static int invalid_str(char *str, void *end) | ||
| 58 | { | ||
| 59 | while ((void *) str <= end) | ||
| 60 | if (!*str++) | ||
| 61 | return 0; | ||
| 62 | return -EINVAL; | ||
| 63 | } | ||
| 64 | |||
| 65 | /* | ||
| 66 | * Check that the user compiled against correct version of autofs | ||
| 67 | * misc device code. | ||
| 68 | * | ||
| 69 | * As well as checking the version compatibility this always copies | ||
| 70 | * the kernel interface version out. | ||
| 71 | */ | ||
| 72 | static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param) | ||
| 73 | { | ||
| 74 | int err = 0; | ||
| 75 | |||
| 76 | if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) || | ||
| 77 | (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) { | ||
| 78 | AUTOFS_WARN("ioctl control interface version mismatch: " | ||
| 79 | "kernel(%u.%u), user(%u.%u), cmd(%d)", | ||
| 80 | AUTOFS_DEV_IOCTL_VERSION_MAJOR, | ||
| 81 | AUTOFS_DEV_IOCTL_VERSION_MINOR, | ||
| 82 | param->ver_major, param->ver_minor, cmd); | ||
| 83 | err = -EINVAL; | ||
| 84 | } | ||
| 85 | |||
| 86 | /* Fill in the kernel version. */ | ||
| 87 | param->ver_major = AUTOFS_DEV_IOCTL_VERSION_MAJOR; | ||
| 88 | param->ver_minor = AUTOFS_DEV_IOCTL_VERSION_MINOR; | ||
| 89 | |||
| 90 | return err; | ||
| 91 | } | ||
| 92 | |||
| 93 | /* | ||
| 94 | * Copy parameter control struct, including a possible path allocated | ||
| 95 | * at the end of the struct. | ||
| 96 | */ | ||
| 97 | static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in) | ||
| 98 | { | ||
| 99 | struct autofs_dev_ioctl tmp, *ads; | ||
| 100 | |||
| 101 | if (copy_from_user(&tmp, in, sizeof(tmp))) | ||
| 102 | return ERR_PTR(-EFAULT); | ||
| 103 | |||
| 104 | if (tmp.size < sizeof(tmp)) | ||
| 105 | return ERR_PTR(-EINVAL); | ||
| 106 | |||
| 107 | ads = kmalloc(tmp.size, GFP_KERNEL); | ||
| 108 | if (!ads) | ||
| 109 | return ERR_PTR(-ENOMEM); | ||
| 110 | |||
| 111 | if (copy_from_user(ads, in, tmp.size)) { | ||
| 112 | kfree(ads); | ||
| 113 | return ERR_PTR(-EFAULT); | ||
| 114 | } | ||
| 115 | |||
| 116 | return ads; | ||
| 117 | } | ||
| 118 | |||
| 119 | static inline void free_dev_ioctl(struct autofs_dev_ioctl *param) | ||
| 120 | { | ||
| 121 | kfree(param); | ||
| 122 | return; | ||
| 123 | } | ||
| 124 | |||
| 125 | /* | ||
| 126 | * Check sanity of parameter control fields and if a path is present | ||
| 127 | * check that it has a "/" and is terminated. | ||
| 128 | */ | ||
| 129 | static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) | ||
| 130 | { | ||
| 131 | int err = -EINVAL; | ||
| 132 | |||
| 133 | if (check_dev_ioctl_version(cmd, param)) { | ||
| 134 | AUTOFS_WARN("invalid device control module version " | ||
| 135 | "supplied for cmd(0x%08x)", cmd); | ||
| 136 | goto out; | ||
| 137 | } | ||
| 138 | |||
| 139 | if (param->size > sizeof(*param)) { | ||
| 140 | err = check_name(param->path); | ||
| 141 | if (err) { | ||
| 142 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | ||
| 143 | cmd); | ||
| 144 | goto out; | ||
| 145 | } | ||
| 146 | |||
| 147 | err = invalid_str(param->path, | ||
| 148 | (void *) ((size_t) param + param->size)); | ||
| 149 | if (err) { | ||
| 150 | AUTOFS_WARN("invalid path supplied for cmd(0x%08x)", | ||
| 151 | cmd); | ||
| 152 | goto out; | ||
| 153 | } | ||
| 154 | } | ||
| 155 | |||
| 156 | err = 0; | ||
| 157 | out: | ||
| 158 | return err; | ||
| 159 | } | ||
| 160 | |||
| 161 | /* | ||
| 162 | * Get the autofs super block info struct from the file opened on | ||
| 163 | * the autofs mount point. | ||
| 164 | */ | ||
| 165 | static struct autofs_sb_info *autofs_dev_ioctl_sbi(struct file *f) | ||
| 166 | { | ||
| 167 | struct autofs_sb_info *sbi = NULL; | ||
| 168 | struct inode *inode; | ||
| 169 | |||
| 170 | if (f) { | ||
| 171 | inode = f->f_path.dentry->d_inode; | ||
| 172 | sbi = autofs4_sbi(inode->i_sb); | ||
| 173 | } | ||
| 174 | return sbi; | ||
| 175 | } | ||
| 176 | |||
| 177 | /* Return autofs module protocol version */ | ||
| 178 | static int autofs_dev_ioctl_protover(struct file *fp, | ||
| 179 | struct autofs_sb_info *sbi, | ||
| 180 | struct autofs_dev_ioctl *param) | ||
| 181 | { | ||
| 182 | param->arg1 = sbi->version; | ||
| 183 | return 0; | ||
| 184 | } | ||
| 185 | |||
| 186 | /* Return autofs module protocol sub version */ | ||
| 187 | static int autofs_dev_ioctl_protosubver(struct file *fp, | ||
| 188 | struct autofs_sb_info *sbi, | ||
| 189 | struct autofs_dev_ioctl *param) | ||
| 190 | { | ||
| 191 | param->arg1 = sbi->sub_version; | ||
| 192 | return 0; | ||
| 193 | } | ||
| 194 | |||
| 195 | /* | ||
| 196 | * Walk down the mount stack looking for an autofs mount that | ||
| 197 | * has the requested device number (aka. new_encode_dev(sb->s_dev). | ||
| 198 | */ | ||
| 199 | static int autofs_dev_ioctl_find_super(struct nameidata *nd, dev_t devno) | ||
| 200 | { | ||
| 201 | struct dentry *dentry; | ||
| 202 | struct inode *inode; | ||
| 203 | struct super_block *sb; | ||
| 204 | dev_t s_dev; | ||
| 205 | unsigned int err; | ||
| 206 | |||
| 207 | err = -ENOENT; | ||
| 208 | |||
| 209 | /* Lookup the dentry name at the base of our mount point */ | ||
| 210 | dentry = d_lookup(nd->path.dentry, &nd->last); | ||
| 211 | if (!dentry) | ||
| 212 | goto out; | ||
| 213 | |||
| 214 | dput(nd->path.dentry); | ||
| 215 | nd->path.dentry = dentry; | ||
| 216 | |||
| 217 | /* And follow the mount stack looking for our autofs mount */ | ||
| 218 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | ||
| 219 | inode = nd->path.dentry->d_inode; | ||
| 220 | if (!inode) | ||
| 221 | break; | ||
| 222 | |||
| 223 | sb = inode->i_sb; | ||
| 224 | s_dev = new_encode_dev(sb->s_dev); | ||
| 225 | if (devno == s_dev) { | ||
| 226 | if (sb->s_magic == AUTOFS_SUPER_MAGIC) { | ||
| 227 | err = 0; | ||
| 228 | break; | ||
| 229 | } | ||
| 230 | } | ||
| 231 | } | ||
| 232 | out: | ||
| 233 | return err; | ||
| 234 | } | ||
| 235 | |||
| 236 | /* | ||
| 237 | * Walk down the mount stack looking for an autofs mount that | ||
| 238 | * has the requested mount type (ie. indirect, direct or offset). | ||
| 239 | */ | ||
| 240 | static int autofs_dev_ioctl_find_sbi_type(struct nameidata *nd, unsigned int type) | ||
| 241 | { | ||
| 242 | struct dentry *dentry; | ||
| 243 | struct autofs_info *ino; | ||
| 244 | unsigned int err; | ||
| 245 | |||
| 246 | err = -ENOENT; | ||
| 247 | |||
| 248 | /* Lookup the dentry name at the base of our mount point */ | ||
| 249 | dentry = d_lookup(nd->path.dentry, &nd->last); | ||
| 250 | if (!dentry) | ||
| 251 | goto out; | ||
| 252 | |||
| 253 | dput(nd->path.dentry); | ||
| 254 | nd->path.dentry = dentry; | ||
| 255 | |||
| 256 | /* And follow the mount stack looking for our autofs mount */ | ||
| 257 | while (follow_down(&nd->path.mnt, &nd->path.dentry)) { | ||
| 258 | ino = autofs4_dentry_ino(nd->path.dentry); | ||
| 259 | if (ino && ino->sbi->type & type) { | ||
| 260 | err = 0; | ||
| 261 | break; | ||
| 262 | } | ||
| 263 | } | ||
| 264 | out: | ||
| 265 | return err; | ||
| 266 | } | ||
| 267 | |||
| 268 | static void autofs_dev_ioctl_fd_install(unsigned int fd, struct file *file) | ||
| 269 | { | ||
| 270 | struct files_struct *files = current->files; | ||
| 271 | struct fdtable *fdt; | ||
| 272 | |||
| 273 | spin_lock(&files->file_lock); | ||
| 274 | fdt = files_fdtable(files); | ||
| 275 | BUG_ON(fdt->fd[fd] != NULL); | ||
| 276 | rcu_assign_pointer(fdt->fd[fd], file); | ||
| 277 | FD_SET(fd, fdt->close_on_exec); | ||
| 278 | spin_unlock(&files->file_lock); | ||
| 279 | } | ||
| 280 | |||
| 281 | |||
| 282 | /* | ||
| 283 | * Open a file descriptor on the autofs mount point corresponding | ||
| 284 | * to the given path and device number (aka. new_encode_dev(sb->s_dev)). | ||
| 285 | */ | ||
| 286 | static int autofs_dev_ioctl_open_mountpoint(const char *path, dev_t devid) | ||
| 287 | { | ||
| 288 | struct file *filp; | ||
| 289 | struct nameidata nd; | ||
| 290 | int err, fd; | ||
| 291 | |||
| 292 | fd = get_unused_fd(); | ||
| 293 | if (likely(fd >= 0)) { | ||
| 294 | /* Get nameidata of the parent directory */ | ||
| 295 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
| 296 | if (err) | ||
| 297 | goto out; | ||
| 298 | |||
| 299 | /* | ||
| 300 | * Search down, within the parent, looking for an | ||
| 301 | * autofs super block that has the device number | ||
| 302 | * corresponding to the autofs fs we want to open. | ||
| 303 | */ | ||
| 304 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
| 305 | if (err) { | ||
| 306 | path_put(&nd.path); | ||
| 307 | goto out; | ||
| 308 | } | ||
| 309 | |||
| 310 | filp = dentry_open(nd.path.dentry, nd.path.mnt, O_RDONLY); | ||
| 311 | if (IS_ERR(filp)) { | ||
| 312 | err = PTR_ERR(filp); | ||
| 313 | goto out; | ||
| 314 | } | ||
| 315 | |||
| 316 | autofs_dev_ioctl_fd_install(fd, filp); | ||
| 317 | } | ||
| 318 | |||
| 319 | return fd; | ||
| 320 | |||
| 321 | out: | ||
| 322 | put_unused_fd(fd); | ||
| 323 | return err; | ||
| 324 | } | ||
| 325 | |||
| 326 | /* Open a file descriptor on an autofs mount point */ | ||
| 327 | static int autofs_dev_ioctl_openmount(struct file *fp, | ||
| 328 | struct autofs_sb_info *sbi, | ||
| 329 | struct autofs_dev_ioctl *param) | ||
| 330 | { | ||
| 331 | const char *path; | ||
| 332 | dev_t devid; | ||
| 333 | int err, fd; | ||
| 334 | |||
| 335 | /* param->path has already been checked */ | ||
| 336 | if (!param->arg1) | ||
| 337 | return -EINVAL; | ||
| 338 | |||
| 339 | param->ioctlfd = -1; | ||
| 340 | |||
| 341 | path = param->path; | ||
| 342 | devid = param->arg1; | ||
| 343 | |||
| 344 | err = 0; | ||
| 345 | fd = autofs_dev_ioctl_open_mountpoint(path, devid); | ||
| 346 | if (unlikely(fd < 0)) { | ||
| 347 | err = fd; | ||
| 348 | goto out; | ||
| 349 | } | ||
| 350 | |||
| 351 | param->ioctlfd = fd; | ||
| 352 | out: | ||
| 353 | return err; | ||
| 354 | } | ||
| 355 | |||
| 356 | /* Close file descriptor allocated above (user can also use close(2)). */ | ||
| 357 | static int autofs_dev_ioctl_closemount(struct file *fp, | ||
| 358 | struct autofs_sb_info *sbi, | ||
| 359 | struct autofs_dev_ioctl *param) | ||
| 360 | { | ||
| 361 | return sys_close(param->ioctlfd); | ||
| 362 | } | ||
| 363 | |||
| 364 | /* | ||
| 365 | * Send "ready" status for an existing wait (either a mount or an expire | ||
| 366 | * request). | ||
| 367 | */ | ||
| 368 | static int autofs_dev_ioctl_ready(struct file *fp, | ||
| 369 | struct autofs_sb_info *sbi, | ||
| 370 | struct autofs_dev_ioctl *param) | ||
| 371 | { | ||
| 372 | autofs_wqt_t token; | ||
| 373 | |||
| 374 | token = (autofs_wqt_t) param->arg1; | ||
| 375 | return autofs4_wait_release(sbi, token, 0); | ||
| 376 | } | ||
| 377 | |||
| 378 | /* | ||
| 379 | * Send "fail" status for an existing wait (either a mount or an expire | ||
| 380 | * request). | ||
| 381 | */ | ||
| 382 | static int autofs_dev_ioctl_fail(struct file *fp, | ||
| 383 | struct autofs_sb_info *sbi, | ||
| 384 | struct autofs_dev_ioctl *param) | ||
| 385 | { | ||
| 386 | autofs_wqt_t token; | ||
| 387 | int status; | ||
| 388 | |||
| 389 | token = (autofs_wqt_t) param->arg1; | ||
| 390 | status = param->arg2 ? param->arg2 : -ENOENT; | ||
| 391 | return autofs4_wait_release(sbi, token, status); | ||
| 392 | } | ||
| 393 | |||
| 394 | /* | ||
| 395 | * Set the pipe fd for kernel communication to the daemon. | ||
| 396 | * | ||
| 397 | * Normally this is set at mount using an option but if we | ||
| 398 | * are reconnecting to a busy mount then we need to use this | ||
| 399 | * to tell the autofs mount about the new kernel pipe fd. In | ||
| 400 | * order to protect mounts against incorrectly setting the | ||
| 401 | * pipefd we also require that the autofs mount be catatonic. | ||
| 402 | * | ||
| 403 | * This also sets the process group id used to identify the | ||
| 404 | * controlling process (eg. the owning automount(8) daemon). | ||
| 405 | */ | ||
| 406 | static int autofs_dev_ioctl_setpipefd(struct file *fp, | ||
| 407 | struct autofs_sb_info *sbi, | ||
| 408 | struct autofs_dev_ioctl *param) | ||
| 409 | { | ||
| 410 | int pipefd; | ||
| 411 | int err = 0; | ||
| 412 | |||
| 413 | if (param->arg1 == -1) | ||
| 414 | return -EINVAL; | ||
| 415 | |||
| 416 | pipefd = param->arg1; | ||
| 417 | |||
| 418 | mutex_lock(&sbi->wq_mutex); | ||
| 419 | if (!sbi->catatonic) { | ||
| 420 | mutex_unlock(&sbi->wq_mutex); | ||
| 421 | return -EBUSY; | ||
| 422 | } else { | ||
| 423 | struct file *pipe = fget(pipefd); | ||
| 424 | if (!pipe->f_op || !pipe->f_op->write) { | ||
| 425 | err = -EPIPE; | ||
| 426 | fput(pipe); | ||
| 427 | goto out; | ||
| 428 | } | ||
| 429 | sbi->oz_pgrp = task_pgrp_nr(current); | ||
| 430 | sbi->pipefd = pipefd; | ||
| 431 | sbi->pipe = pipe; | ||
| 432 | sbi->catatonic = 0; | ||
| 433 | } | ||
| 434 | out: | ||
| 435 | mutex_unlock(&sbi->wq_mutex); | ||
| 436 | return err; | ||
| 437 | } | ||
| 438 | |||
| 439 | /* | ||
| 440 | * Make the autofs mount point catatonic, no longer responsive to | ||
| 441 | * mount requests. Also closes the kernel pipe file descriptor. | ||
| 442 | */ | ||
| 443 | static int autofs_dev_ioctl_catatonic(struct file *fp, | ||
| 444 | struct autofs_sb_info *sbi, | ||
| 445 | struct autofs_dev_ioctl *param) | ||
| 446 | { | ||
| 447 | autofs4_catatonic_mode(sbi); | ||
| 448 | return 0; | ||
| 449 | } | ||
| 450 | |||
| 451 | /* Set the autofs mount timeout */ | ||
| 452 | static int autofs_dev_ioctl_timeout(struct file *fp, | ||
| 453 | struct autofs_sb_info *sbi, | ||
| 454 | struct autofs_dev_ioctl *param) | ||
| 455 | { | ||
| 456 | unsigned long timeout; | ||
| 457 | |||
| 458 | timeout = param->arg1; | ||
| 459 | param->arg1 = sbi->exp_timeout / HZ; | ||
| 460 | sbi->exp_timeout = timeout * HZ; | ||
| 461 | return 0; | ||
| 462 | } | ||
| 463 | |||
| 464 | /* | ||
| 465 | * Return the uid and gid of the last request for the mount | ||
| 466 | * | ||
| 467 | * When reconstructing an autofs mount tree with active mounts | ||
| 468 | * we need to re-connect to mounts that may have used the original | ||
| 469 | * process uid and gid (or string variations of them) for mount | ||
| 470 | * lookups within the map entry. | ||
| 471 | */ | ||
| 472 | static int autofs_dev_ioctl_requester(struct file *fp, | ||
| 473 | struct autofs_sb_info *sbi, | ||
| 474 | struct autofs_dev_ioctl *param) | ||
| 475 | { | ||
| 476 | struct autofs_info *ino; | ||
| 477 | struct nameidata nd; | ||
| 478 | const char *path; | ||
| 479 | dev_t devid; | ||
| 480 | int err = -ENOENT; | ||
| 481 | |||
| 482 | if (param->size <= sizeof(*param)) { | ||
| 483 | err = -EINVAL; | ||
| 484 | goto out; | ||
| 485 | } | ||
| 486 | |||
| 487 | path = param->path; | ||
| 488 | devid = sbi->sb->s_dev; | ||
| 489 | |||
| 490 | param->arg1 = param->arg2 = -1; | ||
| 491 | |||
| 492 | /* Get nameidata of the parent directory */ | ||
| 493 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
| 494 | if (err) | ||
| 495 | goto out; | ||
| 496 | |||
| 497 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
| 498 | if (err) | ||
| 499 | goto out_release; | ||
| 500 | |||
| 501 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
| 502 | if (ino) { | ||
| 503 | err = 0; | ||
| 504 | autofs4_expire_wait(nd.path.dentry); | ||
| 505 | spin_lock(&sbi->fs_lock); | ||
| 506 | param->arg1 = ino->uid; | ||
| 507 | param->arg2 = ino->gid; | ||
| 508 | spin_unlock(&sbi->fs_lock); | ||
| 509 | } | ||
| 510 | |||
| 511 | out_release: | ||
| 512 | path_put(&nd.path); | ||
| 513 | out: | ||
| 514 | return err; | ||
| 515 | } | ||
| 516 | |||
| 517 | /* | ||
| 518 | * Call repeatedly until it returns -EAGAIN, meaning there's nothing | ||
| 519 | * more that can be done. | ||
| 520 | */ | ||
| 521 | static int autofs_dev_ioctl_expire(struct file *fp, | ||
| 522 | struct autofs_sb_info *sbi, | ||
| 523 | struct autofs_dev_ioctl *param) | ||
| 524 | { | ||
| 525 | struct dentry *dentry; | ||
| 526 | struct vfsmount *mnt; | ||
| 527 | int err = -EAGAIN; | ||
| 528 | int how; | ||
| 529 | |||
| 530 | how = param->arg1; | ||
| 531 | mnt = fp->f_path.mnt; | ||
| 532 | |||
| 533 | if (sbi->type & AUTOFS_TYPE_TRIGGER) | ||
| 534 | dentry = autofs4_expire_direct(sbi->sb, mnt, sbi, how); | ||
| 535 | else | ||
| 536 | dentry = autofs4_expire_indirect(sbi->sb, mnt, sbi, how); | ||
| 537 | |||
| 538 | if (dentry) { | ||
| 539 | struct autofs_info *ino = autofs4_dentry_ino(dentry); | ||
| 540 | |||
| 541 | /* | ||
| 542 | * This is synchronous because it makes the daemon a | ||
| 543 | * little easier | ||
| 544 | */ | ||
| 545 | err = autofs4_wait(sbi, dentry, NFY_EXPIRE); | ||
| 546 | |||
| 547 | spin_lock(&sbi->fs_lock); | ||
| 548 | if (ino->flags & AUTOFS_INF_MOUNTPOINT) { | ||
| 549 | ino->flags &= ~AUTOFS_INF_MOUNTPOINT; | ||
| 550 | sbi->sb->s_root->d_mounted++; | ||
| 551 | } | ||
| 552 | ino->flags &= ~AUTOFS_INF_EXPIRING; | ||
| 553 | complete_all(&ino->expire_complete); | ||
| 554 | spin_unlock(&sbi->fs_lock); | ||
| 555 | dput(dentry); | ||
| 556 | } | ||
| 557 | |||
| 558 | return err; | ||
| 559 | } | ||
| 560 | |||
| 561 | /* Check if autofs mount point is in use */ | ||
| 562 | static int autofs_dev_ioctl_askumount(struct file *fp, | ||
| 563 | struct autofs_sb_info *sbi, | ||
| 564 | struct autofs_dev_ioctl *param) | ||
| 565 | { | ||
| 566 | param->arg1 = 0; | ||
| 567 | if (may_umount(fp->f_path.mnt)) | ||
| 568 | param->arg1 = 1; | ||
| 569 | return 0; | ||
| 570 | } | ||
| 571 | |||
| 572 | /* | ||
| 573 | * Check if the given path is a mountpoint. | ||
| 574 | * | ||
| 575 | * If we are supplied with the file descriptor of an autofs | ||
| 576 | * mount we're looking for a specific mount. In this case | ||
| 577 | * the path is considered a mountpoint if it is itself a | ||
| 578 | * mountpoint or contains a mount, such as a multi-mount | ||
| 579 | * without a root mount. In this case we return 1 if the | ||
| 580 | * path is a mount point and the super magic of the covering | ||
| 581 | * mount if there is one or 0 if it isn't a mountpoint. | ||
| 582 | * | ||
| 583 | * If we aren't supplied with a file descriptor then we | ||
| 584 | * lookup the nameidata of the path and check if it is the | ||
| 585 | * root of a mount. If a type is given we are looking for | ||
| 586 | * a particular autofs mount and if we don't find a match | ||
| 587 | * we return fail. If the located nameidata path is the | ||
| 588 | * root of a mount we return 1 along with the super magic | ||
| 589 | * of the mount or 0 otherwise. | ||
| 590 | * | ||
| 591 | * In both cases the the device number (as returned by | ||
| 592 | * new_encode_dev()) is also returned. | ||
| 593 | */ | ||
| 594 | static int autofs_dev_ioctl_ismountpoint(struct file *fp, | ||
| 595 | struct autofs_sb_info *sbi, | ||
| 596 | struct autofs_dev_ioctl *param) | ||
| 597 | { | ||
| 598 | struct nameidata nd; | ||
| 599 | const char *path; | ||
| 600 | unsigned int type; | ||
| 601 | int err = -ENOENT; | ||
| 602 | |||
| 603 | if (param->size <= sizeof(*param)) { | ||
| 604 | err = -EINVAL; | ||
| 605 | goto out; | ||
| 606 | } | ||
| 607 | |||
| 608 | path = param->path; | ||
| 609 | type = param->arg1; | ||
| 610 | |||
| 611 | param->arg1 = 0; | ||
| 612 | param->arg2 = 0; | ||
| 613 | |||
| 614 | if (!fp || param->ioctlfd == -1) { | ||
| 615 | if (type == AUTOFS_TYPE_ANY) { | ||
| 616 | struct super_block *sb; | ||
| 617 | |||
| 618 | err = path_lookup(path, LOOKUP_FOLLOW, &nd); | ||
| 619 | if (err) | ||
| 620 | goto out; | ||
| 621 | |||
| 622 | sb = nd.path.dentry->d_sb; | ||
| 623 | param->arg1 = new_encode_dev(sb->s_dev); | ||
| 624 | } else { | ||
| 625 | struct autofs_info *ino; | ||
| 626 | |||
| 627 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
| 628 | if (err) | ||
| 629 | goto out; | ||
| 630 | |||
| 631 | err = autofs_dev_ioctl_find_sbi_type(&nd, type); | ||
| 632 | if (err) | ||
| 633 | goto out_release; | ||
| 634 | |||
| 635 | ino = autofs4_dentry_ino(nd.path.dentry); | ||
| 636 | param->arg1 = autofs4_get_dev(ino->sbi); | ||
| 637 | } | ||
| 638 | |||
| 639 | err = 0; | ||
| 640 | if (nd.path.dentry->d_inode && | ||
| 641 | nd.path.mnt->mnt_root == nd.path.dentry) { | ||
| 642 | err = 1; | ||
| 643 | param->arg2 = nd.path.dentry->d_inode->i_sb->s_magic; | ||
| 644 | } | ||
| 645 | } else { | ||
| 646 | dev_t devid = new_encode_dev(sbi->sb->s_dev); | ||
| 647 | |||
| 648 | err = path_lookup(path, LOOKUP_PARENT, &nd); | ||
| 649 | if (err) | ||
| 650 | goto out; | ||
| 651 | |||
| 652 | err = autofs_dev_ioctl_find_super(&nd, devid); | ||
| 653 | if (err) | ||
| 654 | goto out_release; | ||
| 655 | |||
| 656 | param->arg1 = autofs4_get_dev(sbi); | ||
| 657 | |||
| 658 | err = have_submounts(nd.path.dentry); | ||
| 659 | |||
| 660 | if (nd.path.mnt->mnt_mountpoint != nd.path.mnt->mnt_root) { | ||
| 661 | if (follow_down(&nd.path.mnt, &nd.path.dentry)) { | ||
| 662 | struct inode *inode = nd.path.dentry->d_inode; | ||
| 663 | param->arg2 = inode->i_sb->s_magic; | ||
| 664 | } | ||
| 665 | } | ||
| 666 | } | ||
| 667 | |||
| 668 | out_release: | ||
| 669 | path_put(&nd.path); | ||
| 670 | out: | ||
| 671 | return err; | ||
| 672 | } | ||
| 673 | |||
| 674 | /* | ||
| 675 | * Our range of ioctl numbers isn't 0 based so we need to shift | ||
| 676 | * the array index by _IOC_NR(AUTOFS_CTL_IOC_FIRST) for the table | ||
| 677 | * lookup. | ||
| 678 | */ | ||
| 679 | #define cmd_idx(cmd) (cmd - _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST)) | ||
| 680 | |||
| 681 | static ioctl_fn lookup_dev_ioctl(unsigned int cmd) | ||
| 682 | { | ||
| 683 | static struct { | ||
| 684 | int cmd; | ||
| 685 | ioctl_fn fn; | ||
| 686 | } _ioctls[] = { | ||
| 687 | {cmd_idx(AUTOFS_DEV_IOCTL_VERSION_CMD), NULL}, | ||
| 688 | {cmd_idx(AUTOFS_DEV_IOCTL_PROTOVER_CMD), | ||
| 689 | autofs_dev_ioctl_protover}, | ||
| 690 | {cmd_idx(AUTOFS_DEV_IOCTL_PROTOSUBVER_CMD), | ||
| 691 | autofs_dev_ioctl_protosubver}, | ||
| 692 | {cmd_idx(AUTOFS_DEV_IOCTL_OPENMOUNT_CMD), | ||
| 693 | autofs_dev_ioctl_openmount}, | ||
| 694 | {cmd_idx(AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD), | ||
| 695 | autofs_dev_ioctl_closemount}, | ||
| 696 | {cmd_idx(AUTOFS_DEV_IOCTL_READY_CMD), | ||
| 697 | autofs_dev_ioctl_ready}, | ||
| 698 | {cmd_idx(AUTOFS_DEV_IOCTL_FAIL_CMD), | ||
| 699 | autofs_dev_ioctl_fail}, | ||
| 700 | {cmd_idx(AUTOFS_DEV_IOCTL_SETPIPEFD_CMD), | ||
| 701 | autofs_dev_ioctl_setpipefd}, | ||
| 702 | {cmd_idx(AUTOFS_DEV_IOCTL_CATATONIC_CMD), | ||
| 703 | autofs_dev_ioctl_catatonic}, | ||
| 704 | {cmd_idx(AUTOFS_DEV_IOCTL_TIMEOUT_CMD), | ||
| 705 | autofs_dev_ioctl_timeout}, | ||
| 706 | {cmd_idx(AUTOFS_DEV_IOCTL_REQUESTER_CMD), | ||
| 707 | autofs_dev_ioctl_requester}, | ||
| 708 | {cmd_idx(AUTOFS_DEV_IOCTL_EXPIRE_CMD), | ||
| 709 | autofs_dev_ioctl_expire}, | ||
| 710 | {cmd_idx(AUTOFS_DEV_IOCTL_ASKUMOUNT_CMD), | ||
| 711 | autofs_dev_ioctl_askumount}, | ||
| 712 | {cmd_idx(AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD), | ||
| 713 | autofs_dev_ioctl_ismountpoint} | ||
| 714 | }; | ||
| 715 | unsigned int idx = cmd_idx(cmd); | ||
| 716 | |||
| 717 | return (idx >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[idx].fn; | ||
| 718 | } | ||
| 719 | |||
| 720 | /* ioctl dispatcher */ | ||
| 721 | static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user) | ||
| 722 | { | ||
| 723 | struct autofs_dev_ioctl *param; | ||
| 724 | struct file *fp; | ||
| 725 | struct autofs_sb_info *sbi; | ||
| 726 | unsigned int cmd_first, cmd; | ||
| 727 | ioctl_fn fn = NULL; | ||
| 728 | int err = 0; | ||
| 729 | |||
| 730 | /* only root can play with this */ | ||
| 731 | if (!capable(CAP_SYS_ADMIN)) | ||
| 732 | return -EPERM; | ||
| 733 | |||
| 734 | cmd_first = _IOC_NR(AUTOFS_DEV_IOCTL_IOC_FIRST); | ||
| 735 | cmd = _IOC_NR(command); | ||
| 736 | |||
| 737 | if (_IOC_TYPE(command) != _IOC_TYPE(AUTOFS_DEV_IOCTL_IOC_FIRST) || | ||
| 738 | cmd - cmd_first >= AUTOFS_DEV_IOCTL_IOC_COUNT) { | ||
| 739 | return -ENOTTY; | ||
| 740 | } | ||
| 741 | |||
| 742 | /* Copy the parameters into kernel space. */ | ||
| 743 | param = copy_dev_ioctl(user); | ||
| 744 | if (IS_ERR(param)) | ||
| 745 | return PTR_ERR(param); | ||
| 746 | |||
| 747 | err = validate_dev_ioctl(command, param); | ||
| 748 | if (err) | ||
| 749 | goto out; | ||
| 750 | |||
| 751 | /* The validate routine above always sets the version */ | ||
| 752 | if (cmd == AUTOFS_DEV_IOCTL_VERSION_CMD) | ||
| 753 | goto done; | ||
| 754 | |||
| 755 | fn = lookup_dev_ioctl(cmd); | ||
| 756 | if (!fn) { | ||
| 757 | AUTOFS_WARN("unknown command 0x%08x", command); | ||
| 758 | return -ENOTTY; | ||
| 759 | } | ||
| 760 | |||
| 761 | fp = NULL; | ||
| 762 | sbi = NULL; | ||
| 763 | |||
| 764 | /* | ||
| 765 | * For obvious reasons the openmount can't have a file | ||
| 766 | * descriptor yet. We don't take a reference to the | ||
| 767 | * file during close to allow for immediate release. | ||
| 768 | */ | ||
| 769 | if (cmd != AUTOFS_DEV_IOCTL_OPENMOUNT_CMD && | ||
| 770 | cmd != AUTOFS_DEV_IOCTL_CLOSEMOUNT_CMD) { | ||
| 771 | fp = fget(param->ioctlfd); | ||
| 772 | if (!fp) { | ||
| 773 | if (cmd == AUTOFS_DEV_IOCTL_ISMOUNTPOINT_CMD) | ||
| 774 | goto cont; | ||
| 775 | err = -EBADF; | ||
| 776 | goto out; | ||
| 777 | } | ||
| 778 | |||
| 779 | if (!fp->f_op) { | ||
| 780 | err = -ENOTTY; | ||
| 781 | fput(fp); | ||
| 782 | goto out; | ||
| 783 | } | ||
| 784 | |||
| 785 | sbi = autofs_dev_ioctl_sbi(fp); | ||
| 786 | if (!sbi || sbi->magic != AUTOFS_SBI_MAGIC) { | ||
| 787 | err = -EINVAL; | ||
| 788 | fput(fp); | ||
| 789 | goto out; | ||
| 790 | } | ||
| 791 | |||
| 792 | /* | ||
| 793 | * Admin needs to be able to set the mount catatonic in | ||
| 794 | * order to be able to perform the re-open. | ||
| 795 | */ | ||
| 796 | if (!autofs4_oz_mode(sbi) && | ||
| 797 | cmd != AUTOFS_DEV_IOCTL_CATATONIC_CMD) { | ||
| 798 | err = -EACCES; | ||
| 799 | fput(fp); | ||
| 800 | goto out; | ||
| 801 | } | ||
| 802 | } | ||
| 803 | cont: | ||
| 804 | err = fn(fp, sbi, param); | ||
| 805 | |||
| 806 | if (fp) | ||
| 807 | fput(fp); | ||
| 808 | done: | ||
| 809 | if (err >= 0 && copy_to_user(user, param, AUTOFS_DEV_IOCTL_SIZE)) | ||
| 810 | err = -EFAULT; | ||
| 811 | out: | ||
| 812 | free_dev_ioctl(param); | ||
| 813 | return err; | ||
| 814 | } | ||
| 815 | |||
| 816 | static long autofs_dev_ioctl(struct file *file, uint command, ulong u) | ||
| 817 | { | ||
| 818 | int err; | ||
| 819 | err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u); | ||
| 820 | return (long) err; | ||
| 821 | } | ||
| 822 | |||
| 823 | #ifdef CONFIG_COMPAT | ||
| 824 | static long autofs_dev_ioctl_compat(struct file *file, uint command, ulong u) | ||
| 825 | { | ||
| 826 | return (long) autofs_dev_ioctl(file, command, (ulong) compat_ptr(u)); | ||
| 827 | } | ||
| 828 | #else | ||
| 829 | #define autofs_dev_ioctl_compat NULL | ||
| 830 | #endif | ||
| 831 | |||
| 832 | static const struct file_operations _dev_ioctl_fops = { | ||
| 833 | .unlocked_ioctl = autofs_dev_ioctl, | ||
| 834 | .compat_ioctl = autofs_dev_ioctl_compat, | ||
| 835 | .owner = THIS_MODULE, | ||
| 836 | }; | ||
| 837 | |||
| 838 | static struct miscdevice _autofs_dev_ioctl_misc = { | ||
| 839 | .minor = MISC_DYNAMIC_MINOR, | ||
| 840 | .name = AUTOFS_DEVICE_NAME, | ||
| 841 | .fops = &_dev_ioctl_fops | ||
| 842 | }; | ||
| 843 | |||
| 844 | /* Register/deregister misc character device */ | ||
| 845 | int autofs_dev_ioctl_init(void) | ||
| 846 | { | ||
| 847 | int r; | ||
| 848 | |||
| 849 | r = misc_register(&_autofs_dev_ioctl_misc); | ||
| 850 | if (r) { | ||
| 851 | AUTOFS_ERROR("misc_register failed for control device"); | ||
| 852 | return r; | ||
| 853 | } | ||
| 854 | |||
| 855 | return 0; | ||
| 856 | } | ||
| 857 | |||
| 858 | void autofs_dev_ioctl_exit(void) | ||
| 859 | { | ||
| 860 | misc_deregister(&_autofs_dev_ioctl_misc); | ||
| 861 | return; | ||
| 862 | } | ||
| 863 | |||
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c index cdabb796ff01..cde2f8e8935a 100644 --- a/fs/autofs4/expire.c +++ b/fs/autofs4/expire.c | |||
| @@ -244,10 +244,10 @@ cont: | |||
| 244 | } | 244 | } |
| 245 | 245 | ||
| 246 | /* Check if we can expire a direct mount (possibly a tree) */ | 246 | /* Check if we can expire a direct mount (possibly a tree) */ |
| 247 | static struct dentry *autofs4_expire_direct(struct super_block *sb, | 247 | struct dentry *autofs4_expire_direct(struct super_block *sb, |
| 248 | struct vfsmount *mnt, | 248 | struct vfsmount *mnt, |
| 249 | struct autofs_sb_info *sbi, | 249 | struct autofs_sb_info *sbi, |
| 250 | int how) | 250 | int how) |
| 251 | { | 251 | { |
| 252 | unsigned long timeout; | 252 | unsigned long timeout; |
| 253 | struct dentry *root = dget(sb->s_root); | 253 | struct dentry *root = dget(sb->s_root); |
| @@ -283,10 +283,10 @@ static struct dentry *autofs4_expire_direct(struct super_block *sb, | |||
| 283 | * - it is unused by any user process | 283 | * - it is unused by any user process |
| 284 | * - it has been unused for exp_timeout time | 284 | * - it has been unused for exp_timeout time |
| 285 | */ | 285 | */ |
| 286 | static struct dentry *autofs4_expire_indirect(struct super_block *sb, | 286 | struct dentry *autofs4_expire_indirect(struct super_block *sb, |
| 287 | struct vfsmount *mnt, | 287 | struct vfsmount *mnt, |
| 288 | struct autofs_sb_info *sbi, | 288 | struct autofs_sb_info *sbi, |
| 289 | int how) | 289 | int how) |
| 290 | { | 290 | { |
| 291 | unsigned long timeout; | 291 | unsigned long timeout; |
| 292 | struct dentry *root = sb->s_root; | 292 | struct dentry *root = sb->s_root; |
| @@ -479,7 +479,7 @@ int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt, | |||
| 479 | if (arg && get_user(do_now, arg)) | 479 | if (arg && get_user(do_now, arg)) |
| 480 | return -EFAULT; | 480 | return -EFAULT; |
| 481 | 481 | ||
| 482 | if (sbi->type & AUTOFS_TYPE_DIRECT) | 482 | if (sbi->type & AUTOFS_TYPE_TRIGGER) |
| 483 | dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); | 483 | dentry = autofs4_expire_direct(sb, mnt, sbi, do_now); |
| 484 | else | 484 | else |
| 485 | dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); | 485 | dentry = autofs4_expire_indirect(sb, mnt, sbi, do_now); |
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c index 723a1c5e361b..9722e4bd8957 100644 --- a/fs/autofs4/init.c +++ b/fs/autofs4/init.c | |||
| @@ -29,11 +29,20 @@ static struct file_system_type autofs_fs_type = { | |||
| 29 | 29 | ||
| 30 | static int __init init_autofs4_fs(void) | 30 | static int __init init_autofs4_fs(void) |
| 31 | { | 31 | { |
| 32 | return register_filesystem(&autofs_fs_type); | 32 | int err; |
| 33 | |||
| 34 | err = register_filesystem(&autofs_fs_type); | ||
| 35 | if (err) | ||
| 36 | return err; | ||
| 37 | |||
| 38 | autofs_dev_ioctl_init(); | ||
| 39 | |||
| 40 | return err; | ||
| 33 | } | 41 | } |
| 34 | 42 | ||
| 35 | static void __exit exit_autofs4_fs(void) | 43 | static void __exit exit_autofs4_fs(void) |
| 36 | { | 44 | { |
| 45 | autofs_dev_ioctl_exit(); | ||
| 37 | unregister_filesystem(&autofs_fs_type); | 46 | unregister_filesystem(&autofs_fs_type); |
| 38 | } | 47 | } |
| 39 | 48 | ||
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 7bb3e5ba0537..c7e65bb30ba0 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c | |||
| @@ -53,6 +53,8 @@ struct autofs_info *autofs4_init_ino(struct autofs_info *ino, | |||
| 53 | atomic_set(&ino->count, 0); | 53 | atomic_set(&ino->count, 0); |
| 54 | } | 54 | } |
| 55 | 55 | ||
| 56 | ino->uid = 0; | ||
| 57 | ino->gid = 0; | ||
| 56 | ino->mode = mode; | 58 | ino->mode = mode; |
| 57 | ino->last_used = jiffies; | 59 | ino->last_used = jiffies; |
| 58 | 60 | ||
| @@ -213,7 +215,7 @@ static const struct super_operations autofs4_sops = { | |||
| 213 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, | 215 | enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto, |
| 214 | Opt_indirect, Opt_direct, Opt_offset}; | 216 | Opt_indirect, Opt_direct, Opt_offset}; |
| 215 | 217 | ||
| 216 | static match_table_t tokens = { | 218 | static const match_table_t tokens = { |
| 217 | {Opt_fd, "fd=%u"}, | 219 | {Opt_fd, "fd=%u"}, |
| 218 | {Opt_uid, "uid=%u"}, | 220 | {Opt_uid, "uid=%u"}, |
| 219 | {Opt_gid, "gid=%u"}, | 221 | {Opt_gid, "gid=%u"}, |
| @@ -288,7 +290,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, | |||
| 288 | *type = AUTOFS_TYPE_DIRECT; | 290 | *type = AUTOFS_TYPE_DIRECT; |
| 289 | break; | 291 | break; |
| 290 | case Opt_offset: | 292 | case Opt_offset: |
| 291 | *type = AUTOFS_TYPE_DIRECT | AUTOFS_TYPE_OFFSET; | 293 | *type = AUTOFS_TYPE_OFFSET; |
| 292 | break; | 294 | break; |
| 293 | default: | 295 | default: |
| 294 | return 1; | 296 | return 1; |
| @@ -336,7 +338,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 336 | sbi->sb = s; | 338 | sbi->sb = s; |
| 337 | sbi->version = 0; | 339 | sbi->version = 0; |
| 338 | sbi->sub_version = 0; | 340 | sbi->sub_version = 0; |
| 339 | sbi->type = 0; | 341 | sbi->type = AUTOFS_TYPE_INDIRECT; |
| 340 | sbi->min_proto = 0; | 342 | sbi->min_proto = 0; |
| 341 | sbi->max_proto = 0; | 343 | sbi->max_proto = 0; |
| 342 | mutex_init(&sbi->wq_mutex); | 344 | mutex_init(&sbi->wq_mutex); |
| @@ -378,7 +380,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) | |||
| 378 | } | 380 | } |
| 379 | 381 | ||
| 380 | root_inode->i_fop = &autofs4_root_operations; | 382 | root_inode->i_fop = &autofs4_root_operations; |
| 381 | root_inode->i_op = sbi->type & AUTOFS_TYPE_DIRECT ? | 383 | root_inode->i_op = sbi->type & AUTOFS_TYPE_TRIGGER ? |
| 382 | &autofs4_direct_root_inode_operations : | 384 | &autofs4_direct_root_inode_operations : |
| 383 | &autofs4_indirect_root_inode_operations; | 385 | &autofs4_indirect_root_inode_operations; |
| 384 | 386 | ||
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 35216d18d8b5..4b67c2a2d77c 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c | |||
| @@ -337,7 +337,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 337 | * is very similar for indirect mounts except only dentrys | 337 | * is very similar for indirect mounts except only dentrys |
| 338 | * in the root of the autofs file system may be negative. | 338 | * in the root of the autofs file system may be negative. |
| 339 | */ | 339 | */ |
| 340 | if (sbi->type & (AUTOFS_TYPE_DIRECT|AUTOFS_TYPE_OFFSET)) | 340 | if (sbi->type & AUTOFS_TYPE_TRIGGER) |
| 341 | return -ENOENT; | 341 | return -ENOENT; |
| 342 | else if (!IS_ROOT(dentry->d_parent)) | 342 | else if (!IS_ROOT(dentry->d_parent)) |
| 343 | return -ENOENT; | 343 | return -ENOENT; |
| @@ -348,7 +348,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 348 | return -ENOMEM; | 348 | return -ENOMEM; |
| 349 | 349 | ||
| 350 | /* If this is a direct mount request create a dummy name */ | 350 | /* If this is a direct mount request create a dummy name */ |
| 351 | if (IS_ROOT(dentry) && (sbi->type & AUTOFS_TYPE_DIRECT)) | 351 | if (IS_ROOT(dentry) && sbi->type & AUTOFS_TYPE_TRIGGER) |
| 352 | qstr.len = sprintf(name, "%p", dentry); | 352 | qstr.len = sprintf(name, "%p", dentry); |
| 353 | else { | 353 | else { |
| 354 | qstr.len = autofs4_getpath(sbi, dentry, &name); | 354 | qstr.len = autofs4_getpath(sbi, dentry, &name); |
| @@ -406,11 +406,11 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 406 | type = autofs_ptype_expire_multi; | 406 | type = autofs_ptype_expire_multi; |
| 407 | } else { | 407 | } else { |
| 408 | if (notify == NFY_MOUNT) | 408 | if (notify == NFY_MOUNT) |
| 409 | type = (sbi->type & AUTOFS_TYPE_DIRECT) ? | 409 | type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? |
| 410 | autofs_ptype_missing_direct : | 410 | autofs_ptype_missing_direct : |
| 411 | autofs_ptype_missing_indirect; | 411 | autofs_ptype_missing_indirect; |
| 412 | else | 412 | else |
| 413 | type = (sbi->type & AUTOFS_TYPE_DIRECT) ? | 413 | type = (sbi->type & AUTOFS_TYPE_TRIGGER) ? |
| 414 | autofs_ptype_expire_direct : | 414 | autofs_ptype_expire_direct : |
| 415 | autofs_ptype_expire_indirect; | 415 | autofs_ptype_expire_indirect; |
| 416 | } | 416 | } |
| @@ -457,6 +457,40 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry, | |||
| 457 | 457 | ||
| 458 | status = wq->status; | 458 | status = wq->status; |
| 459 | 459 | ||
| 460 | /* | ||
| 461 | * For direct and offset mounts we need to track the requester's | ||
| 462 | * uid and gid in the dentry info struct. This is so it can be | ||
| 463 | * supplied, on request, by the misc device ioctl interface. | ||
| 464 | * This is needed during daemon resatart when reconnecting | ||
| 465 | * to existing, active, autofs mounts. The uid and gid (and | ||
| 466 | * related string values) may be used for macro substitution | ||
| 467 | * in autofs mount maps. | ||
| 468 | */ | ||
| 469 | if (!status) { | ||
| 470 | struct autofs_info *ino; | ||
| 471 | struct dentry *de = NULL; | ||
| 472 | |||
| 473 | /* direct mount or browsable map */ | ||
| 474 | ino = autofs4_dentry_ino(dentry); | ||
| 475 | if (!ino) { | ||
| 476 | /* If not lookup actual dentry used */ | ||
| 477 | de = d_lookup(dentry->d_parent, &dentry->d_name); | ||
| 478 | if (de) | ||
| 479 | ino = autofs4_dentry_ino(de); | ||
| 480 | } | ||
| 481 | |||
| 482 | /* Set mount requester */ | ||
| 483 | if (ino) { | ||
| 484 | spin_lock(&sbi->fs_lock); | ||
| 485 | ino->uid = wq->uid; | ||
| 486 | ino->gid = wq->gid; | ||
| 487 | spin_unlock(&sbi->fs_lock); | ||
| 488 | } | ||
| 489 | |||
| 490 | if (de) | ||
| 491 | dput(de); | ||
| 492 | } | ||
| 493 | |||
| 460 | /* Are we the last process to need status? */ | 494 | /* Are we the last process to need status? */ |
| 461 | mutex_lock(&sbi->wq_mutex); | 495 | mutex_lock(&sbi->wq_mutex); |
| 462 | if (!--wq->wait_ctr) | 496 | if (!--wq->wait_ctr) |
diff --git a/fs/befs/befs_fs_types.h b/fs/befs/befs_fs_types.h index e2595c2c403a..7893eaa1e58c 100644 --- a/fs/befs/befs_fs_types.h +++ b/fs/befs/befs_fs_types.h | |||
| @@ -55,8 +55,12 @@ enum super_flags { | |||
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | #define BEFS_BYTEORDER_NATIVE 0x42494745 | 57 | #define BEFS_BYTEORDER_NATIVE 0x42494745 |
| 58 | #define BEFS_BYTEORDER_NATIVE_LE (__force fs32)cpu_to_le32(BEFS_BYTEORDER_NATIVE) | ||
| 59 | #define BEFS_BYTEORDER_NATIVE_BE (__force fs32)cpu_to_be32(BEFS_BYTEORDER_NATIVE) | ||
| 58 | 60 | ||
| 59 | #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 | 61 | #define BEFS_SUPER_MAGIC BEFS_SUPER_MAGIC1 |
| 62 | #define BEFS_SUPER_MAGIC1_LE (__force fs32)cpu_to_le32(BEFS_SUPER_MAGIC1) | ||
| 63 | #define BEFS_SUPER_MAGIC1_BE (__force fs32)cpu_to_be32(BEFS_SUPER_MAGIC1) | ||
| 60 | 64 | ||
| 61 | /* | 65 | /* |
| 62 | * Flags of inode | 66 | * Flags of inode |
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 740f53672a8a..b6dfee37c7b7 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c | |||
| @@ -650,7 +650,7 @@ enum { | |||
| 650 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, | 650 | Opt_uid, Opt_gid, Opt_charset, Opt_debug, Opt_err, |
| 651 | }; | 651 | }; |
| 652 | 652 | ||
| 653 | static match_table_t befs_tokens = { | 653 | static const match_table_t befs_tokens = { |
| 654 | {Opt_uid, "uid=%d"}, | 654 | {Opt_uid, "uid=%d"}, |
| 655 | {Opt_gid, "gid=%d"}, | 655 | {Opt_gid, "gid=%d"}, |
| 656 | {Opt_charset, "iocharset=%s"}, | 656 | {Opt_charset, "iocharset=%s"}, |
| @@ -809,8 +809,8 @@ befs_fill_super(struct super_block *sb, void *data, int silent) | |||
| 809 | 809 | ||
| 810 | /* account for offset of super block on x86 */ | 810 | /* account for offset of super block on x86 */ |
| 811 | disk_sb = (befs_super_block *) bh->b_data; | 811 | disk_sb = (befs_super_block *) bh->b_data; |
| 812 | if ((le32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1) || | 812 | if ((disk_sb->magic1 == BEFS_SUPER_MAGIC1_LE) || |
| 813 | (be32_to_cpu(disk_sb->magic1) == BEFS_SUPER_MAGIC1)) { | 813 | (disk_sb->magic1 == BEFS_SUPER_MAGIC1_BE)) { |
| 814 | befs_debug(sb, "Using PPC superblock location"); | 814 | befs_debug(sb, "Using PPC superblock location"); |
| 815 | } else { | 815 | } else { |
| 816 | befs_debug(sb, "Using x86 superblock location"); | 816 | befs_debug(sb, "Using x86 superblock location"); |
diff --git a/fs/befs/super.c b/fs/befs/super.c index 8c3401ff6d6a..41f2b4d0093e 100644 --- a/fs/befs/super.c +++ b/fs/befs/super.c | |||
| @@ -26,10 +26,10 @@ befs_load_sb(struct super_block *sb, befs_super_block * disk_sb) | |||
| 26 | befs_sb_info *befs_sb = BEFS_SB(sb); | 26 | befs_sb_info *befs_sb = BEFS_SB(sb); |
| 27 | 27 | ||
| 28 | /* Check the byte order of the filesystem */ | 28 | /* Check the byte order of the filesystem */ |
| 29 | if (le32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) | 29 | if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_LE) |
| 30 | befs_sb->byte_order = BEFS_BYTESEX_LE; | 30 | befs_sb->byte_order = BEFS_BYTESEX_LE; |
| 31 | else if (be32_to_cpu(disk_sb->fs_byte_order) == BEFS_BYTEORDER_NATIVE) | 31 | else if (disk_sb->fs_byte_order == BEFS_BYTEORDER_NATIVE_BE) |
| 32 | befs_sb->byte_order = BEFS_BYTESEX_BE; | 32 | befs_sb->byte_order = BEFS_BYTESEX_BE; |
| 33 | 33 | ||
| 34 | befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); | 34 | befs_sb->magic1 = fs32_to_cpu(sb, disk_sb->magic1); |
| 35 | befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); | 35 | befs_sb->magic2 = fs32_to_cpu(sb, disk_sb->magic2); |
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 655ed8d30a86..8fcfa398d350 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c | |||
| @@ -683,7 +683,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 683 | * switch really is going to happen - do this in | 683 | * switch really is going to happen - do this in |
| 684 | * flush_thread(). - akpm | 684 | * flush_thread(). - akpm |
| 685 | */ | 685 | */ |
| 686 | SET_PERSONALITY(loc->elf_ex, 0); | 686 | SET_PERSONALITY(loc->elf_ex); |
| 687 | 687 | ||
| 688 | interpreter = open_exec(elf_interpreter); | 688 | interpreter = open_exec(elf_interpreter); |
| 689 | retval = PTR_ERR(interpreter); | 689 | retval = PTR_ERR(interpreter); |
| @@ -734,7 +734,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 734 | goto out_free_dentry; | 734 | goto out_free_dentry; |
| 735 | } else { | 735 | } else { |
| 736 | /* Executables without an interpreter also need a personality */ | 736 | /* Executables without an interpreter also need a personality */ |
| 737 | SET_PERSONALITY(loc->elf_ex, 0); | 737 | SET_PERSONALITY(loc->elf_ex); |
| 738 | } | 738 | } |
| 739 | 739 | ||
| 740 | /* Flush all traces of the currently running executable */ | 740 | /* Flush all traces of the currently running executable */ |
| @@ -748,7 +748,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 748 | 748 | ||
| 749 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages | 749 | /* Do this immediately, since STACK_TOP as used in setup_arg_pages |
| 750 | may depend on the personality. */ | 750 | may depend on the personality. */ |
| 751 | SET_PERSONALITY(loc->elf_ex, 0); | 751 | SET_PERSONALITY(loc->elf_ex); |
| 752 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) | 752 | if (elf_read_implies_exec(loc->elf_ex, executable_stack)) |
| 753 | current->personality |= READ_IMPLIES_EXEC; | 753 | current->personality |= READ_IMPLIES_EXEC; |
| 754 | 754 | ||
| @@ -1156,16 +1156,24 @@ static int dump_seek(struct file *file, loff_t off) | |||
| 1156 | static unsigned long vma_dump_size(struct vm_area_struct *vma, | 1156 | static unsigned long vma_dump_size(struct vm_area_struct *vma, |
| 1157 | unsigned long mm_flags) | 1157 | unsigned long mm_flags) |
| 1158 | { | 1158 | { |
| 1159 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) | ||
| 1160 | |||
| 1159 | /* The vma can be set up to tell us the answer directly. */ | 1161 | /* The vma can be set up to tell us the answer directly. */ |
| 1160 | if (vma->vm_flags & VM_ALWAYSDUMP) | 1162 | if (vma->vm_flags & VM_ALWAYSDUMP) |
| 1161 | goto whole; | 1163 | goto whole; |
| 1162 | 1164 | ||
| 1165 | /* Hugetlb memory check */ | ||
| 1166 | if (vma->vm_flags & VM_HUGETLB) { | ||
| 1167 | if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED)) | ||
| 1168 | goto whole; | ||
| 1169 | if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE)) | ||
| 1170 | goto whole; | ||
| 1171 | } | ||
| 1172 | |||
| 1163 | /* Do not dump I/O mapped devices or special mappings */ | 1173 | /* Do not dump I/O mapped devices or special mappings */ |
| 1164 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) | 1174 | if (vma->vm_flags & (VM_IO | VM_RESERVED)) |
| 1165 | return 0; | 1175 | return 0; |
| 1166 | 1176 | ||
| 1167 | #define FILTER(type) (mm_flags & (1UL << MMF_DUMP_##type)) | ||
| 1168 | |||
| 1169 | /* By default, dump shared memory if mapped from an anonymous file. */ | 1177 | /* By default, dump shared memory if mapped from an anonymous file. */ |
| 1170 | if (vma->vm_flags & VM_SHARED) { | 1178 | if (vma->vm_flags & VM_SHARED) { |
| 1171 | if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? | 1179 | if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ? |
| @@ -1333,20 +1341,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
| 1333 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1341 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
| 1334 | prstatus->pr_sid = task_session_vnr(p); | 1342 | prstatus->pr_sid = task_session_vnr(p); |
| 1335 | if (thread_group_leader(p)) { | 1343 | if (thread_group_leader(p)) { |
| 1344 | struct task_cputime cputime; | ||
| 1345 | |||
| 1336 | /* | 1346 | /* |
| 1337 | * This is the record for the group leader. Add in the | 1347 | * This is the record for the group leader. It shows the |
| 1338 | * cumulative times of previous dead threads. This total | 1348 | * group-wide total, not its individual thread total. |
| 1339 | * won't include the time of each live thread whose state | ||
| 1340 | * is included in the core dump. The final total reported | ||
| 1341 | * to our parent process when it calls wait4 will include | ||
| 1342 | * those sums as well as the little bit more time it takes | ||
| 1343 | * this and each other thread to finish dying after the | ||
| 1344 | * core dump synchronization phase. | ||
| 1345 | */ | 1349 | */ |
| 1346 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1350 | thread_group_cputime(p, &cputime); |
| 1347 | &prstatus->pr_utime); | 1351 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
| 1348 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1352 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
| 1349 | &prstatus->pr_stime); | ||
| 1350 | } else { | 1353 | } else { |
| 1351 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1354 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
| 1352 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1355 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 80c1f952ef78..5b5424cb3391 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/fcntl.h> | 25 | #include <linux/fcntl.h> |
| 26 | #include <linux/slab.h> | 26 | #include <linux/slab.h> |
| 27 | #include <linux/pagemap.h> | 27 | #include <linux/pagemap.h> |
| 28 | #include <linux/security.h> | ||
| 28 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 29 | #include <linux/highuid.h> | 30 | #include <linux/highuid.h> |
| 30 | #include <linux/personality.h> | 31 | #include <linux/personality.h> |
| @@ -455,8 +456,19 @@ error_kill: | |||
| 455 | } | 456 | } |
| 456 | 457 | ||
| 457 | /*****************************************************************************/ | 458 | /*****************************************************************************/ |
| 459 | |||
| 460 | #ifndef ELF_BASE_PLATFORM | ||
| 461 | /* | ||
| 462 | * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture. | ||
| 463 | * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value | ||
| 464 | * will be copied to the user stack in the same manner as AT_PLATFORM. | ||
| 465 | */ | ||
| 466 | #define ELF_BASE_PLATFORM NULL | ||
| 467 | #endif | ||
| 468 | |||
| 458 | /* | 469 | /* |
| 459 | * present useful information to the program | 470 | * present useful information to the program by shovelling it onto the new |
| 471 | * process's stack | ||
| 460 | */ | 472 | */ |
| 461 | static int create_elf_fdpic_tables(struct linux_binprm *bprm, | 473 | static int create_elf_fdpic_tables(struct linux_binprm *bprm, |
| 462 | struct mm_struct *mm, | 474 | struct mm_struct *mm, |
| @@ -466,15 +478,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 466 | unsigned long sp, csp, nitems; | 478 | unsigned long sp, csp, nitems; |
| 467 | elf_caddr_t __user *argv, *envp; | 479 | elf_caddr_t __user *argv, *envp; |
| 468 | size_t platform_len = 0, len; | 480 | size_t platform_len = 0, len; |
| 469 | char *k_platform; | 481 | char *k_platform, *k_base_platform; |
| 470 | char __user *u_platform, *p; | 482 | char __user *u_platform, *u_base_platform, *p; |
| 471 | long hwcap; | 483 | long hwcap; |
| 472 | int loop; | 484 | int loop; |
| 473 | int nr; /* reset for each csp adjustment */ | 485 | int nr; /* reset for each csp adjustment */ |
| 474 | 486 | ||
| 475 | /* we're going to shovel a whole load of stuff onto the stack */ | ||
| 476 | #ifdef CONFIG_MMU | 487 | #ifdef CONFIG_MMU |
| 477 | sp = bprm->p; | 488 | /* In some cases (e.g. Hyper-Threading), we want to avoid L1 evictions |
| 489 | * by the processes running on the same package. One thing we can do is | ||
| 490 | * to shuffle the initial stack for them, so we give the architecture | ||
| 491 | * an opportunity to do so here. | ||
| 492 | */ | ||
| 493 | sp = arch_align_stack(bprm->p); | ||
| 478 | #else | 494 | #else |
| 479 | sp = mm->start_stack; | 495 | sp = mm->start_stack; |
| 480 | 496 | ||
| @@ -483,11 +499,14 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 483 | return -EFAULT; | 499 | return -EFAULT; |
| 484 | #endif | 500 | #endif |
| 485 | 501 | ||
| 486 | /* get hold of platform and hardware capabilities masks for the machine | ||
| 487 | * we are running on. In some cases (Sparc), this info is impossible | ||
| 488 | * to get, in others (i386) it is merely difficult. | ||
| 489 | */ | ||
| 490 | hwcap = ELF_HWCAP; | 502 | hwcap = ELF_HWCAP; |
| 503 | |||
| 504 | /* | ||
| 505 | * If this architecture has a platform capability string, copy it | ||
| 506 | * to userspace. In some cases (Sparc), this info is impossible | ||
| 507 | * for userspace to get any other way, in others (i386) it is | ||
| 508 | * merely difficult. | ||
| 509 | */ | ||
| 491 | k_platform = ELF_PLATFORM; | 510 | k_platform = ELF_PLATFORM; |
| 492 | u_platform = NULL; | 511 | u_platform = NULL; |
| 493 | 512 | ||
| @@ -499,19 +518,20 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 499 | return -EFAULT; | 518 | return -EFAULT; |
| 500 | } | 519 | } |
| 501 | 520 | ||
| 502 | #if defined(__i386__) && defined(CONFIG_SMP) | 521 | /* |
| 503 | /* in some cases (e.g. Hyper-Threading), we want to avoid L1 evictions | 522 | * If this architecture has a "base" platform capability |
| 504 | * by the processes running on the same package. One thing we can do is | 523 | * string, copy it to userspace. |
| 505 | * to shuffle the initial stack for them. | ||
| 506 | * | ||
| 507 | * the conditionals here are unneeded, but kept in to make the code | ||
| 508 | * behaviour the same as pre change unless we have hyperthreaded | ||
| 509 | * processors. This keeps Mr Marcelo Person happier but should be | ||
| 510 | * removed for 2.5 | ||
| 511 | */ | 524 | */ |
| 512 | if (smp_num_siblings > 1) | 525 | k_base_platform = ELF_BASE_PLATFORM; |
| 513 | sp = sp - ((current->pid % 64) << 7); | 526 | u_base_platform = NULL; |
| 514 | #endif | 527 | |
| 528 | if (k_base_platform) { | ||
| 529 | platform_len = strlen(k_base_platform) + 1; | ||
| 530 | sp -= platform_len; | ||
| 531 | u_base_platform = (char __user *) sp; | ||
| 532 | if (__copy_to_user(u_base_platform, k_base_platform, platform_len) != 0) | ||
| 533 | return -EFAULT; | ||
| 534 | } | ||
| 515 | 535 | ||
| 516 | sp &= ~7UL; | 536 | sp &= ~7UL; |
| 517 | 537 | ||
| @@ -541,9 +561,13 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 541 | } | 561 | } |
| 542 | 562 | ||
| 543 | /* force 16 byte _final_ alignment here for generality */ | 563 | /* force 16 byte _final_ alignment here for generality */ |
| 544 | #define DLINFO_ITEMS 13 | 564 | #define DLINFO_ITEMS 15 |
| 545 | 565 | ||
| 546 | nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; | 566 | nitems = 1 + DLINFO_ITEMS + (k_platform ? 1 : 0) + |
| 567 | (k_base_platform ? 1 : 0) + AT_VECTOR_SIZE_ARCH; | ||
| 568 | |||
| 569 | if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) | ||
| 570 | nitems++; | ||
| 547 | 571 | ||
| 548 | csp = sp; | 572 | csp = sp; |
| 549 | sp -= nitems * 2 * sizeof(unsigned long); | 573 | sp -= nitems * 2 * sizeof(unsigned long); |
| @@ -575,6 +599,19 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 575 | (elf_addr_t) (unsigned long) u_platform); | 599 | (elf_addr_t) (unsigned long) u_platform); |
| 576 | } | 600 | } |
| 577 | 601 | ||
| 602 | if (k_base_platform) { | ||
| 603 | nr = 0; | ||
| 604 | csp -= 2 * sizeof(unsigned long); | ||
| 605 | NEW_AUX_ENT(AT_BASE_PLATFORM, | ||
| 606 | (elf_addr_t) (unsigned long) u_base_platform); | ||
| 607 | } | ||
| 608 | |||
| 609 | if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) { | ||
| 610 | nr = 0; | ||
| 611 | csp -= 2 * sizeof(unsigned long); | ||
| 612 | NEW_AUX_ENT(AT_EXECFD, bprm->interp_data); | ||
| 613 | } | ||
| 614 | |||
| 578 | nr = 0; | 615 | nr = 0; |
| 579 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); | 616 | csp -= DLINFO_ITEMS * 2 * sizeof(unsigned long); |
| 580 | NEW_AUX_ENT(AT_HWCAP, hwcap); | 617 | NEW_AUX_ENT(AT_HWCAP, hwcap); |
| @@ -590,6 +627,8 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, | |||
| 590 | NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); | 627 | NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->euid); |
| 591 | NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); | 628 | NEW_AUX_ENT(AT_GID, (elf_addr_t) current->gid); |
| 592 | NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); | 629 | NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->egid); |
| 630 | NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); | ||
| 631 | NEW_AUX_ENT(AT_EXECFN, bprm->exec); | ||
| 593 | 632 | ||
| 594 | #ifdef ARCH_DLINFO | 633 | #ifdef ARCH_DLINFO |
| 595 | nr = 0; | 634 | nr = 0; |
| @@ -1351,20 +1390,15 @@ static void fill_prstatus(struct elf_prstatus *prstatus, | |||
| 1351 | prstatus->pr_pgrp = task_pgrp_vnr(p); | 1390 | prstatus->pr_pgrp = task_pgrp_vnr(p); |
| 1352 | prstatus->pr_sid = task_session_vnr(p); | 1391 | prstatus->pr_sid = task_session_vnr(p); |
| 1353 | if (thread_group_leader(p)) { | 1392 | if (thread_group_leader(p)) { |
| 1393 | struct task_cputime cputime; | ||
| 1394 | |||
| 1354 | /* | 1395 | /* |
| 1355 | * This is the record for the group leader. Add in the | 1396 | * This is the record for the group leader. It shows the |
| 1356 | * cumulative times of previous dead threads. This total | 1397 | * group-wide total, not its individual thread total. |
| 1357 | * won't include the time of each live thread whose state | ||
| 1358 | * is included in the core dump. The final total reported | ||
| 1359 | * to our parent process when it calls wait4 will include | ||
| 1360 | * those sums as well as the little bit more time it takes | ||
| 1361 | * this and each other thread to finish dying after the | ||
| 1362 | * core dump synchronization phase. | ||
| 1363 | */ | 1398 | */ |
| 1364 | cputime_to_timeval(cputime_add(p->utime, p->signal->utime), | 1399 | thread_group_cputime(p, &cputime); |
| 1365 | &prstatus->pr_utime); | 1400 | cputime_to_timeval(cputime.utime, &prstatus->pr_utime); |
| 1366 | cputime_to_timeval(cputime_add(p->stime, p->signal->stime), | 1401 | cputime_to_timeval(cputime.stime, &prstatus->pr_stime); |
| 1367 | &prstatus->pr_stime); | ||
| 1368 | } else { | 1402 | } else { |
| 1369 | cputime_to_timeval(p->utime, &prstatus->pr_utime); | 1403 | cputime_to_timeval(p->utime, &prstatus->pr_utime); |
| 1370 | cputime_to_timeval(p->stime, &prstatus->pr_stime); | 1404 | cputime_to_timeval(p->stime, &prstatus->pr_stime); |
diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c index f9c88d0c8ced..32fb00b52cd0 100644 --- a/fs/binfmt_em86.c +++ b/fs/binfmt_em86.c | |||
| @@ -43,7 +43,7 @@ static int load_em86(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 43 | return -ENOEXEC; | 43 | return -ENOEXEC; |
| 44 | } | 44 | } |
| 45 | 45 | ||
| 46 | bprm->sh_bang = 1; /* Well, the bang-shell is implicit... */ | 46 | bprm->recursion_depth++; /* Well, the bang-shell is implicit... */ |
| 47 | allow_write_access(bprm->file); | 47 | allow_write_access(bprm->file); |
| 48 | fput(bprm->file); | 48 | fput(bprm->file); |
| 49 | bprm->file = NULL; | 49 | bprm->file = NULL; |
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index dfc0197905ca..ccb781a6a804 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c | |||
| @@ -229,13 +229,13 @@ static int decompress_exec( | |||
| 229 | ret = 10; | 229 | ret = 10; |
| 230 | if (buf[3] & EXTRA_FIELD) { | 230 | if (buf[3] & EXTRA_FIELD) { |
| 231 | ret += 2 + buf[10] + (buf[11] << 8); | 231 | ret += 2 + buf[10] + (buf[11] << 8); |
| 232 | if (unlikely(LBUFSIZE == ret)) { | 232 | if (unlikely(LBUFSIZE <= ret)) { |
| 233 | DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); | 233 | DBG_FLT("binfmt_flat: buffer overflow (EXTRA)?\n"); |
| 234 | goto out_free_buf; | 234 | goto out_free_buf; |
| 235 | } | 235 | } |
| 236 | } | 236 | } |
| 237 | if (buf[3] & ORIG_NAME) { | 237 | if (buf[3] & ORIG_NAME) { |
| 238 | for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) | 238 | while (ret < LBUFSIZE && buf[ret++] != 0) |
| 239 | ; | 239 | ; |
| 240 | if (unlikely(LBUFSIZE == ret)) { | 240 | if (unlikely(LBUFSIZE == ret)) { |
| 241 | DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); | 241 | DBG_FLT("binfmt_flat: buffer overflow (ORIG_NAME)?\n"); |
| @@ -243,7 +243,7 @@ static int decompress_exec( | |||
| 243 | } | 243 | } |
| 244 | } | 244 | } |
| 245 | if (buf[3] & COMMENT) { | 245 | if (buf[3] & COMMENT) { |
| 246 | for (; ret < LBUFSIZE && (buf[ret] != 0); ret++) | 246 | while (ret < LBUFSIZE && buf[ret++] != 0) |
| 247 | ; | 247 | ; |
| 248 | if (unlikely(LBUFSIZE == ret)) { | 248 | if (unlikely(LBUFSIZE == ret)) { |
| 249 | DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); | 249 | DBG_FLT("binfmt_flat: buffer overflow (COMMENT)?\n"); |
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 8d7e88e02e0f..f2744ab4e5b3 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c | |||
| @@ -117,7 +117,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 117 | goto _ret; | 117 | goto _ret; |
| 118 | 118 | ||
| 119 | retval = -ENOEXEC; | 119 | retval = -ENOEXEC; |
| 120 | if (bprm->misc_bang) | 120 | if (bprm->recursion_depth > BINPRM_MAX_RECURSION) |
| 121 | goto _ret; | 121 | goto _ret; |
| 122 | 122 | ||
| 123 | /* to keep locking time low, we copy the interpreter string */ | 123 | /* to keep locking time low, we copy the interpreter string */ |
| @@ -197,7 +197,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs) | |||
| 197 | if (retval < 0) | 197 | if (retval < 0) |
| 198 | goto _error; | 198 | goto _error; |
| 199 | 199 | ||
| 200 | bprm->misc_bang = 1; | 200 | bprm->recursion_depth++; |
| 201 | 201 | ||
| 202 | retval = search_binary_handler (bprm, regs); | 202 | retval = search_binary_handler (bprm, regs); |
| 203 | if (retval < 0) | 203 | if (retval < 0) |
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c index 9e3963f7ebf1..08343505e184 100644 --- a/fs/binfmt_script.c +++ b/fs/binfmt_script.c | |||
| @@ -22,14 +22,15 @@ static int load_script(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 22 | char interp[BINPRM_BUF_SIZE]; | 22 | char interp[BINPRM_BUF_SIZE]; |
| 23 | int retval; | 23 | int retval; |
| 24 | 24 | ||
| 25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || (bprm->sh_bang)) | 25 | if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!') || |
| 26 | (bprm->recursion_depth > BINPRM_MAX_RECURSION)) | ||
| 26 | return -ENOEXEC; | 27 | return -ENOEXEC; |
| 27 | /* | 28 | /* |
| 28 | * This section does the #! interpretation. | 29 | * This section does the #! interpretation. |
| 29 | * Sorta complicated, but hopefully it will work. -TYT | 30 | * Sorta complicated, but hopefully it will work. -TYT |
| 30 | */ | 31 | */ |
| 31 | 32 | ||
| 32 | bprm->sh_bang = 1; | 33 | bprm->recursion_depth++; |
| 33 | allow_write_access(bprm->file); | 34 | allow_write_access(bprm->file); |
| 34 | fput(bprm->file); | 35 | fput(bprm->file); |
| 35 | bprm->file = NULL; | 36 | bprm->file = NULL; |
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index 68be580ba289..74e587a52796 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c | |||
| @@ -306,3 +306,5 @@ static void __exit exit_som_binfmt(void) | |||
| 306 | 306 | ||
| 307 | core_initcall(init_som_binfmt); | 307 | core_initcall(init_som_binfmt); |
| 308 | module_exit(exit_som_binfmt); | 308 | module_exit(exit_som_binfmt); |
| 309 | |||
| 310 | MODULE_LICENSE("GPL"); | ||
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index c3e174b35fe6..19caf7c962ac 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c | |||
| @@ -107,7 +107,8 @@ void bio_integrity_free(struct bio *bio, struct bio_set *bs) | |||
| 107 | BUG_ON(bip == NULL); | 107 | BUG_ON(bip == NULL); |
| 108 | 108 | ||
| 109 | /* A cloned bio doesn't own the integrity metadata */ | 109 | /* A cloned bio doesn't own the integrity metadata */ |
| 110 | if (!bio_flagged(bio, BIO_CLONED) && bip->bip_buf != NULL) | 110 | if (!bio_flagged(bio, BIO_CLONED) && !bio_flagged(bio, BIO_FS_INTEGRITY) |
| 111 | && bip->bip_buf != NULL) | ||
| 111 | kfree(bip->bip_buf); | 112 | kfree(bip->bip_buf); |
| 112 | 113 | ||
| 113 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); | 114 | mempool_free(bip->bip_vec, bs->bvec_pools[bip->bip_pool]); |
| @@ -150,6 +151,24 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, | |||
| 150 | } | 151 | } |
| 151 | EXPORT_SYMBOL(bio_integrity_add_page); | 152 | EXPORT_SYMBOL(bio_integrity_add_page); |
| 152 | 153 | ||
| 154 | static int bdev_integrity_enabled(struct block_device *bdev, int rw) | ||
| 155 | { | ||
| 156 | struct blk_integrity *bi = bdev_get_integrity(bdev); | ||
| 157 | |||
| 158 | if (bi == NULL) | ||
| 159 | return 0; | ||
| 160 | |||
| 161 | if (rw == READ && bi->verify_fn != NULL && | ||
| 162 | (bi->flags & INTEGRITY_FLAG_READ)) | ||
| 163 | return 1; | ||
| 164 | |||
| 165 | if (rw == WRITE && bi->generate_fn != NULL && | ||
| 166 | (bi->flags & INTEGRITY_FLAG_WRITE)) | ||
| 167 | return 1; | ||
| 168 | |||
| 169 | return 0; | ||
| 170 | } | ||
| 171 | |||
| 153 | /** | 172 | /** |
| 154 | * bio_integrity_enabled - Check whether integrity can be passed | 173 | * bio_integrity_enabled - Check whether integrity can be passed |
| 155 | * @bio: bio to check | 174 | * @bio: bio to check |
| @@ -313,6 +332,14 @@ static void bio_integrity_generate(struct bio *bio) | |||
| 313 | } | 332 | } |
| 314 | } | 333 | } |
| 315 | 334 | ||
| 335 | static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi) | ||
| 336 | { | ||
| 337 | if (bi) | ||
| 338 | return bi->tuple_size; | ||
| 339 | |||
| 340 | return 0; | ||
| 341 | } | ||
| 342 | |||
| 316 | /** | 343 | /** |
| 317 | * bio_integrity_prep - Prepare bio for integrity I/O | 344 | * bio_integrity_prep - Prepare bio for integrity I/O |
| 318 | * @bio: bio to prepare | 345 | * @bio: bio to prepare |
| @@ -30,7 +30,7 @@ | |||
| 30 | 30 | ||
| 31 | static struct kmem_cache *bio_slab __read_mostly; | 31 | static struct kmem_cache *bio_slab __read_mostly; |
| 32 | 32 | ||
| 33 | mempool_t *bio_split_pool __read_mostly; | 33 | static mempool_t *bio_split_pool __read_mostly; |
| 34 | 34 | ||
| 35 | /* | 35 | /* |
| 36 | * if you change this list, also change bvec_alloc or things will | 36 | * if you change this list, also change bvec_alloc or things will |
| @@ -60,25 +60,46 @@ struct bio_vec *bvec_alloc_bs(gfp_t gfp_mask, int nr, unsigned long *idx, struct | |||
| 60 | struct bio_vec *bvl; | 60 | struct bio_vec *bvl; |
| 61 | 61 | ||
| 62 | /* | 62 | /* |
| 63 | * see comment near bvec_array define! | 63 | * If 'bs' is given, lookup the pool and do the mempool alloc. |
| 64 | * If not, this is a bio_kmalloc() allocation and just do a | ||
| 65 | * kzalloc() for the exact number of vecs right away. | ||
| 64 | */ | 66 | */ |
| 65 | switch (nr) { | 67 | if (bs) { |
| 66 | case 1 : *idx = 0; break; | 68 | /* |
| 67 | case 2 ... 4: *idx = 1; break; | 69 | * see comment near bvec_array define! |
| 68 | case 5 ... 16: *idx = 2; break; | 70 | */ |
| 69 | case 17 ... 64: *idx = 3; break; | 71 | switch (nr) { |
| 70 | case 65 ... 128: *idx = 4; break; | 72 | case 1: |
| 71 | case 129 ... BIO_MAX_PAGES: *idx = 5; break; | 73 | *idx = 0; |
| 74 | break; | ||
| 75 | case 2 ... 4: | ||
| 76 | *idx = 1; | ||
| 77 | break; | ||
| 78 | case 5 ... 16: | ||
| 79 | *idx = 2; | ||
| 80 | break; | ||
| 81 | case 17 ... 64: | ||
| 82 | *idx = 3; | ||
| 83 | break; | ||
| 84 | case 65 ... 128: | ||
| 85 | *idx = 4; | ||
| 86 | break; | ||
| 87 | case 129 ... BIO_MAX_PAGES: | ||
| 88 | *idx = 5; | ||
| 89 | break; | ||
| 72 | default: | 90 | default: |
| 73 | return NULL; | 91 | return NULL; |
| 74 | } | 92 | } |
| 75 | /* | ||
| 76 | * idx now points to the pool we want to allocate from | ||
| 77 | */ | ||
| 78 | 93 | ||
| 79 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | 94 | /* |
| 80 | if (bvl) | 95 | * idx now points to the pool we want to allocate from |
| 81 | memset(bvl, 0, bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | 96 | */ |
| 97 | bvl = mempool_alloc(bs->bvec_pools[*idx], gfp_mask); | ||
| 98 | if (bvl) | ||
| 99 | memset(bvl, 0, | ||
| 100 | bvec_nr_vecs(*idx) * sizeof(struct bio_vec)); | ||
| 101 | } else | ||
| 102 | bvl = kzalloc(nr * sizeof(struct bio_vec), gfp_mask); | ||
| 82 | 103 | ||
| 83 | return bvl; | 104 | return bvl; |
| 84 | } | 105 | } |
| @@ -107,10 +128,17 @@ static void bio_fs_destructor(struct bio *bio) | |||
| 107 | bio_free(bio, fs_bio_set); | 128 | bio_free(bio, fs_bio_set); |
| 108 | } | 129 | } |
| 109 | 130 | ||
| 131 | static void bio_kmalloc_destructor(struct bio *bio) | ||
| 132 | { | ||
| 133 | kfree(bio->bi_io_vec); | ||
| 134 | kfree(bio); | ||
| 135 | } | ||
| 136 | |||
| 110 | void bio_init(struct bio *bio) | 137 | void bio_init(struct bio *bio) |
| 111 | { | 138 | { |
| 112 | memset(bio, 0, sizeof(*bio)); | 139 | memset(bio, 0, sizeof(*bio)); |
| 113 | bio->bi_flags = 1 << BIO_UPTODATE; | 140 | bio->bi_flags = 1 << BIO_UPTODATE; |
| 141 | bio->bi_comp_cpu = -1; | ||
| 114 | atomic_set(&bio->bi_cnt, 1); | 142 | atomic_set(&bio->bi_cnt, 1); |
| 115 | } | 143 | } |
| 116 | 144 | ||
| @@ -118,19 +146,25 @@ void bio_init(struct bio *bio) | |||
| 118 | * bio_alloc_bioset - allocate a bio for I/O | 146 | * bio_alloc_bioset - allocate a bio for I/O |
| 119 | * @gfp_mask: the GFP_ mask given to the slab allocator | 147 | * @gfp_mask: the GFP_ mask given to the slab allocator |
| 120 | * @nr_iovecs: number of iovecs to pre-allocate | 148 | * @nr_iovecs: number of iovecs to pre-allocate |
| 121 | * @bs: the bio_set to allocate from | 149 | * @bs: the bio_set to allocate from. If %NULL, just use kmalloc |
| 122 | * | 150 | * |
| 123 | * Description: | 151 | * Description: |
| 124 | * bio_alloc_bioset will first try it's on mempool to satisfy the allocation. | 152 | * bio_alloc_bioset will first try its own mempool to satisfy the allocation. |
| 125 | * If %__GFP_WAIT is set then we will block on the internal pool waiting | 153 | * If %__GFP_WAIT is set then we will block on the internal pool waiting |
| 126 | * for a &struct bio to become free. | 154 | * for a &struct bio to become free. If a %NULL @bs is passed in, we will |
| 155 | * fall back to just using @kmalloc to allocate the required memory. | ||
| 127 | * | 156 | * |
| 128 | * allocate bio and iovecs from the memory pools specified by the | 157 | * allocate bio and iovecs from the memory pools specified by the |
| 129 | * bio_set structure. | 158 | * bio_set structure, or @kmalloc if none given. |
| 130 | **/ | 159 | **/ |
| 131 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | 160 | struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) |
| 132 | { | 161 | { |
| 133 | struct bio *bio = mempool_alloc(bs->bio_pool, gfp_mask); | 162 | struct bio *bio; |
| 163 | |||
| 164 | if (bs) | ||
| 165 | bio = mempool_alloc(bs->bio_pool, gfp_mask); | ||
| 166 | else | ||
| 167 | bio = kmalloc(sizeof(*bio), gfp_mask); | ||
| 134 | 168 | ||
| 135 | if (likely(bio)) { | 169 | if (likely(bio)) { |
| 136 | struct bio_vec *bvl = NULL; | 170 | struct bio_vec *bvl = NULL; |
| @@ -141,7 +175,10 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs) | |||
| 141 | 175 | ||
| 142 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); | 176 | bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs); |
| 143 | if (unlikely(!bvl)) { | 177 | if (unlikely(!bvl)) { |
| 144 | mempool_free(bio, bs->bio_pool); | 178 | if (bs) |
| 179 | mempool_free(bio, bs->bio_pool); | ||
| 180 | else | ||
| 181 | kfree(bio); | ||
| 145 | bio = NULL; | 182 | bio = NULL; |
| 146 | goto out; | 183 | goto out; |
| 147 | } | 184 | } |
| @@ -164,6 +201,23 @@ struct bio *bio_alloc(gfp_t gfp_mask, int nr_iovecs) | |||
| 164 | return bio; | 201 | return bio; |
| 165 | } | 202 | } |
| 166 | 203 | ||
| 204 | /* | ||
| 205 | * Like bio_alloc(), but doesn't use a mempool backing. This means that | ||
| 206 | * it CAN fail, but while bio_alloc() can only be used for allocations | ||
| 207 | * that have a short (finite) life span, bio_kmalloc() should be used | ||
| 208 | * for more permanent bio allocations (like allocating some bio's for | ||
| 209 | * initalization or setup purposes). | ||
| 210 | */ | ||
| 211 | struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs) | ||
| 212 | { | ||
| 213 | struct bio *bio = bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); | ||
| 214 | |||
| 215 | if (bio) | ||
| 216 | bio->bi_destructor = bio_kmalloc_destructor; | ||
| 217 | |||
| 218 | return bio; | ||
| 219 | } | ||
| 220 | |||
| 167 | void zero_fill_bio(struct bio *bio) | 221 | void zero_fill_bio(struct bio *bio) |
| 168 | { | 222 | { |
| 169 | unsigned long flags; | 223 | unsigned long flags; |
| @@ -208,14 +262,6 @@ inline int bio_phys_segments(struct request_queue *q, struct bio *bio) | |||
| 208 | return bio->bi_phys_segments; | 262 | return bio->bi_phys_segments; |
| 209 | } | 263 | } |
| 210 | 264 | ||
| 211 | inline int bio_hw_segments(struct request_queue *q, struct bio *bio) | ||
| 212 | { | ||
| 213 | if (unlikely(!bio_flagged(bio, BIO_SEG_VALID))) | ||
| 214 | blk_recount_segments(q, bio); | ||
| 215 | |||
| 216 | return bio->bi_hw_segments; | ||
| 217 | } | ||
| 218 | |||
| 219 | /** | 265 | /** |
| 220 | * __bio_clone - clone a bio | 266 | * __bio_clone - clone a bio |
| 221 | * @bio: destination bio | 267 | * @bio: destination bio |
| @@ -350,8 +396,7 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 350 | */ | 396 | */ |
| 351 | 397 | ||
| 352 | while (bio->bi_phys_segments >= q->max_phys_segments | 398 | while (bio->bi_phys_segments >= q->max_phys_segments |
| 353 | || bio->bi_hw_segments >= q->max_hw_segments | 399 | || bio->bi_phys_segments >= q->max_hw_segments) { |
| 354 | || BIOVEC_VIRT_OVERSIZE(bio->bi_size)) { | ||
| 355 | 400 | ||
| 356 | if (retried_segments) | 401 | if (retried_segments) |
| 357 | return 0; | 402 | return 0; |
| @@ -395,13 +440,11 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page | |||
| 395 | } | 440 | } |
| 396 | 441 | ||
| 397 | /* If we may be able to merge these biovecs, force a recount */ | 442 | /* If we may be able to merge these biovecs, force a recount */ |
| 398 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec) || | 443 | if (bio->bi_vcnt && (BIOVEC_PHYS_MERGEABLE(bvec-1, bvec))) |
| 399 | BIOVEC_VIRT_MERGEABLE(bvec-1, bvec))) | ||
| 400 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); | 444 | bio->bi_flags &= ~(1 << BIO_SEG_VALID); |
| 401 | 445 | ||
| 402 | bio->bi_vcnt++; | 446 | bio->bi_vcnt++; |
| 403 | bio->bi_phys_segments++; | 447 | bio->bi_phys_segments++; |
| 404 | bio->bi_hw_segments++; | ||
| 405 | done: | 448 | done: |
| 406 | bio->bi_size += len; | 449 | bio->bi_size += len; |
| 407 | return len; | 450 | return len; |
| @@ -449,16 +492,19 @@ int bio_add_page(struct bio *bio, struct page *page, unsigned int len, | |||
| 449 | 492 | ||
| 450 | struct bio_map_data { | 493 | struct bio_map_data { |
| 451 | struct bio_vec *iovecs; | 494 | struct bio_vec *iovecs; |
| 452 | int nr_sgvecs; | ||
| 453 | struct sg_iovec *sgvecs; | 495 | struct sg_iovec *sgvecs; |
| 496 | int nr_sgvecs; | ||
| 497 | int is_our_pages; | ||
| 454 | }; | 498 | }; |
| 455 | 499 | ||
| 456 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, | 500 | static void bio_set_map_data(struct bio_map_data *bmd, struct bio *bio, |
| 457 | struct sg_iovec *iov, int iov_count) | 501 | struct sg_iovec *iov, int iov_count, |
| 502 | int is_our_pages) | ||
| 458 | { | 503 | { |
| 459 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); | 504 | memcpy(bmd->iovecs, bio->bi_io_vec, sizeof(struct bio_vec) * bio->bi_vcnt); |
| 460 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); | 505 | memcpy(bmd->sgvecs, iov, sizeof(struct sg_iovec) * iov_count); |
| 461 | bmd->nr_sgvecs = iov_count; | 506 | bmd->nr_sgvecs = iov_count; |
| 507 | bmd->is_our_pages = is_our_pages; | ||
| 462 | bio->bi_private = bmd; | 508 | bio->bi_private = bmd; |
| 463 | } | 509 | } |
| 464 | 510 | ||
| @@ -493,7 +539,8 @@ static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count, | |||
| 493 | } | 539 | } |
| 494 | 540 | ||
| 495 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | 541 | static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, |
| 496 | struct sg_iovec *iov, int iov_count, int uncopy) | 542 | struct sg_iovec *iov, int iov_count, int uncopy, |
| 543 | int do_free_page) | ||
| 497 | { | 544 | { |
| 498 | int ret = 0, i; | 545 | int ret = 0, i; |
| 499 | struct bio_vec *bvec; | 546 | struct bio_vec *bvec; |
| @@ -536,7 +583,7 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
| 536 | } | 583 | } |
| 537 | } | 584 | } |
| 538 | 585 | ||
| 539 | if (uncopy) | 586 | if (do_free_page) |
| 540 | __free_page(bvec->bv_page); | 587 | __free_page(bvec->bv_page); |
| 541 | } | 588 | } |
| 542 | 589 | ||
| @@ -553,10 +600,11 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, | |||
| 553 | int bio_uncopy_user(struct bio *bio) | 600 | int bio_uncopy_user(struct bio *bio) |
| 554 | { | 601 | { |
| 555 | struct bio_map_data *bmd = bio->bi_private; | 602 | struct bio_map_data *bmd = bio->bi_private; |
| 556 | int ret; | 603 | int ret = 0; |
| 557 | |||
| 558 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, bmd->nr_sgvecs, 1); | ||
| 559 | 604 | ||
| 605 | if (!bio_flagged(bio, BIO_NULL_MAPPED)) | ||
| 606 | ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, | ||
| 607 | bmd->nr_sgvecs, 1, bmd->is_our_pages); | ||
| 560 | bio_free_map_data(bmd); | 608 | bio_free_map_data(bmd); |
| 561 | bio_put(bio); | 609 | bio_put(bio); |
| 562 | return ret; | 610 | return ret; |
| @@ -565,16 +613,20 @@ int bio_uncopy_user(struct bio *bio) | |||
| 565 | /** | 613 | /** |
| 566 | * bio_copy_user_iov - copy user data to bio | 614 | * bio_copy_user_iov - copy user data to bio |
| 567 | * @q: destination block queue | 615 | * @q: destination block queue |
| 616 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
| 568 | * @iov: the iovec. | 617 | * @iov: the iovec. |
| 569 | * @iov_count: number of elements in the iovec | 618 | * @iov_count: number of elements in the iovec |
| 570 | * @write_to_vm: bool indicating writing to pages or not | 619 | * @write_to_vm: bool indicating writing to pages or not |
| 620 | * @gfp_mask: memory allocation flags | ||
| 571 | * | 621 | * |
| 572 | * Prepares and returns a bio for indirect user io, bouncing data | 622 | * Prepares and returns a bio for indirect user io, bouncing data |
| 573 | * to/from kernel pages as necessary. Must be paired with | 623 | * to/from kernel pages as necessary. Must be paired with |
| 574 | * call bio_uncopy_user() on io completion. | 624 | * call bio_uncopy_user() on io completion. |
| 575 | */ | 625 | */ |
| 576 | struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | 626 | struct bio *bio_copy_user_iov(struct request_queue *q, |
| 577 | int iov_count, int write_to_vm) | 627 | struct rq_map_data *map_data, |
| 628 | struct sg_iovec *iov, int iov_count, | ||
| 629 | int write_to_vm, gfp_t gfp_mask) | ||
| 578 | { | 630 | { |
| 579 | struct bio_map_data *bmd; | 631 | struct bio_map_data *bmd; |
| 580 | struct bio_vec *bvec; | 632 | struct bio_vec *bvec; |
| @@ -597,25 +649,38 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
| 597 | len += iov[i].iov_len; | 649 | len += iov[i].iov_len; |
| 598 | } | 650 | } |
| 599 | 651 | ||
| 600 | bmd = bio_alloc_map_data(nr_pages, iov_count, GFP_KERNEL); | 652 | bmd = bio_alloc_map_data(nr_pages, iov_count, gfp_mask); |
| 601 | if (!bmd) | 653 | if (!bmd) |
| 602 | return ERR_PTR(-ENOMEM); | 654 | return ERR_PTR(-ENOMEM); |
| 603 | 655 | ||
| 604 | ret = -ENOMEM; | 656 | ret = -ENOMEM; |
| 605 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 657 | bio = bio_alloc(gfp_mask, nr_pages); |
| 606 | if (!bio) | 658 | if (!bio) |
| 607 | goto out_bmd; | 659 | goto out_bmd; |
| 608 | 660 | ||
| 609 | bio->bi_rw |= (!write_to_vm << BIO_RW); | 661 | bio->bi_rw |= (!write_to_vm << BIO_RW); |
| 610 | 662 | ||
| 611 | ret = 0; | 663 | ret = 0; |
| 664 | i = 0; | ||
| 612 | while (len) { | 665 | while (len) { |
| 613 | unsigned int bytes = PAGE_SIZE; | 666 | unsigned int bytes; |
| 667 | |||
| 668 | if (map_data) | ||
| 669 | bytes = 1U << (PAGE_SHIFT + map_data->page_order); | ||
| 670 | else | ||
| 671 | bytes = PAGE_SIZE; | ||
| 614 | 672 | ||
| 615 | if (bytes > len) | 673 | if (bytes > len) |
| 616 | bytes = len; | 674 | bytes = len; |
| 617 | 675 | ||
| 618 | page = alloc_page(q->bounce_gfp | GFP_KERNEL); | 676 | if (map_data) { |
| 677 | if (i == map_data->nr_entries) { | ||
| 678 | ret = -ENOMEM; | ||
| 679 | break; | ||
| 680 | } | ||
| 681 | page = map_data->pages[i++]; | ||
| 682 | } else | ||
| 683 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
| 619 | if (!page) { | 684 | if (!page) { |
| 620 | ret = -ENOMEM; | 685 | ret = -ENOMEM; |
| 621 | break; | 686 | break; |
| @@ -634,16 +699,17 @@ struct bio *bio_copy_user_iov(struct request_queue *q, struct sg_iovec *iov, | |||
| 634 | * success | 699 | * success |
| 635 | */ | 700 | */ |
| 636 | if (!write_to_vm) { | 701 | if (!write_to_vm) { |
| 637 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0); | 702 | ret = __bio_copy_iov(bio, bio->bi_io_vec, iov, iov_count, 0, 0); |
| 638 | if (ret) | 703 | if (ret) |
| 639 | goto cleanup; | 704 | goto cleanup; |
| 640 | } | 705 | } |
| 641 | 706 | ||
| 642 | bio_set_map_data(bmd, bio, iov, iov_count); | 707 | bio_set_map_data(bmd, bio, iov, iov_count, map_data ? 0 : 1); |
| 643 | return bio; | 708 | return bio; |
| 644 | cleanup: | 709 | cleanup: |
| 645 | bio_for_each_segment(bvec, bio, i) | 710 | if (!map_data) |
| 646 | __free_page(bvec->bv_page); | 711 | bio_for_each_segment(bvec, bio, i) |
| 712 | __free_page(bvec->bv_page); | ||
| 647 | 713 | ||
| 648 | bio_put(bio); | 714 | bio_put(bio); |
| 649 | out_bmd: | 715 | out_bmd: |
| @@ -654,29 +720,32 @@ out_bmd: | |||
| 654 | /** | 720 | /** |
| 655 | * bio_copy_user - copy user data to bio | 721 | * bio_copy_user - copy user data to bio |
| 656 | * @q: destination block queue | 722 | * @q: destination block queue |
| 723 | * @map_data: pointer to the rq_map_data holding pages (if necessary) | ||
| 657 | * @uaddr: start of user address | 724 | * @uaddr: start of user address |
| 658 | * @len: length in bytes | 725 | * @len: length in bytes |
| 659 | * @write_to_vm: bool indicating writing to pages or not | 726 | * @write_to_vm: bool indicating writing to pages or not |
| 727 | * @gfp_mask: memory allocation flags | ||
| 660 | * | 728 | * |
| 661 | * Prepares and returns a bio for indirect user io, bouncing data | 729 | * Prepares and returns a bio for indirect user io, bouncing data |
| 662 | * to/from kernel pages as necessary. Must be paired with | 730 | * to/from kernel pages as necessary. Must be paired with |
| 663 | * call bio_uncopy_user() on io completion. | 731 | * call bio_uncopy_user() on io completion. |
| 664 | */ | 732 | */ |
| 665 | struct bio *bio_copy_user(struct request_queue *q, unsigned long uaddr, | 733 | struct bio *bio_copy_user(struct request_queue *q, struct rq_map_data *map_data, |
| 666 | unsigned int len, int write_to_vm) | 734 | unsigned long uaddr, unsigned int len, |
| 735 | int write_to_vm, gfp_t gfp_mask) | ||
| 667 | { | 736 | { |
| 668 | struct sg_iovec iov; | 737 | struct sg_iovec iov; |
| 669 | 738 | ||
| 670 | iov.iov_base = (void __user *)uaddr; | 739 | iov.iov_base = (void __user *)uaddr; |
| 671 | iov.iov_len = len; | 740 | iov.iov_len = len; |
| 672 | 741 | ||
| 673 | return bio_copy_user_iov(q, &iov, 1, write_to_vm); | 742 | return bio_copy_user_iov(q, map_data, &iov, 1, write_to_vm, gfp_mask); |
| 674 | } | 743 | } |
| 675 | 744 | ||
| 676 | static struct bio *__bio_map_user_iov(struct request_queue *q, | 745 | static struct bio *__bio_map_user_iov(struct request_queue *q, |
| 677 | struct block_device *bdev, | 746 | struct block_device *bdev, |
| 678 | struct sg_iovec *iov, int iov_count, | 747 | struct sg_iovec *iov, int iov_count, |
| 679 | int write_to_vm) | 748 | int write_to_vm, gfp_t gfp_mask) |
| 680 | { | 749 | { |
| 681 | int i, j; | 750 | int i, j; |
| 682 | int nr_pages = 0; | 751 | int nr_pages = 0; |
| @@ -702,12 +771,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
| 702 | if (!nr_pages) | 771 | if (!nr_pages) |
| 703 | return ERR_PTR(-EINVAL); | 772 | return ERR_PTR(-EINVAL); |
| 704 | 773 | ||
| 705 | bio = bio_alloc(GFP_KERNEL, nr_pages); | 774 | bio = bio_alloc(gfp_mask, nr_pages); |
| 706 | if (!bio) | 775 | if (!bio) |
| 707 | return ERR_PTR(-ENOMEM); | 776 | return ERR_PTR(-ENOMEM); |
| 708 | 777 | ||
| 709 | ret = -ENOMEM; | 778 | ret = -ENOMEM; |
| 710 | pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL); | 779 | pages = kcalloc(nr_pages, sizeof(struct page *), gfp_mask); |
| 711 | if (!pages) | 780 | if (!pages) |
| 712 | goto out; | 781 | goto out; |
| 713 | 782 | ||
| @@ -786,19 +855,21 @@ static struct bio *__bio_map_user_iov(struct request_queue *q, | |||
| 786 | * @uaddr: start of user address | 855 | * @uaddr: start of user address |
| 787 | * @len: length in bytes | 856 | * @len: length in bytes |
| 788 | * @write_to_vm: bool indicating writing to pages or not | 857 | * @write_to_vm: bool indicating writing to pages or not |
| 858 | * @gfp_mask: memory allocation flags | ||
| 789 | * | 859 | * |
| 790 | * Map the user space address into a bio suitable for io to a block | 860 | * Map the user space address into a bio suitable for io to a block |
| 791 | * device. Returns an error pointer in case of error. | 861 | * device. Returns an error pointer in case of error. |
| 792 | */ | 862 | */ |
| 793 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | 863 | struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, |
| 794 | unsigned long uaddr, unsigned int len, int write_to_vm) | 864 | unsigned long uaddr, unsigned int len, int write_to_vm, |
| 865 | gfp_t gfp_mask) | ||
| 795 | { | 866 | { |
| 796 | struct sg_iovec iov; | 867 | struct sg_iovec iov; |
| 797 | 868 | ||
| 798 | iov.iov_base = (void __user *)uaddr; | 869 | iov.iov_base = (void __user *)uaddr; |
| 799 | iov.iov_len = len; | 870 | iov.iov_len = len; |
| 800 | 871 | ||
| 801 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm); | 872 | return bio_map_user_iov(q, bdev, &iov, 1, write_to_vm, gfp_mask); |
| 802 | } | 873 | } |
| 803 | 874 | ||
| 804 | /** | 875 | /** |
| @@ -808,18 +879,19 @@ struct bio *bio_map_user(struct request_queue *q, struct block_device *bdev, | |||
| 808 | * @iov: the iovec. | 879 | * @iov: the iovec. |
| 809 | * @iov_count: number of elements in the iovec | 880 | * @iov_count: number of elements in the iovec |
| 810 | * @write_to_vm: bool indicating writing to pages or not | 881 | * @write_to_vm: bool indicating writing to pages or not |
| 882 | * @gfp_mask: memory allocation flags | ||
| 811 | * | 883 | * |
| 812 | * Map the user space address into a bio suitable for io to a block | 884 | * Map the user space address into a bio suitable for io to a block |
| 813 | * device. Returns an error pointer in case of error. | 885 | * device. Returns an error pointer in case of error. |
| 814 | */ | 886 | */ |
| 815 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, | 887 | struct bio *bio_map_user_iov(struct request_queue *q, struct block_device *bdev, |
| 816 | struct sg_iovec *iov, int iov_count, | 888 | struct sg_iovec *iov, int iov_count, |
| 817 | int write_to_vm) | 889 | int write_to_vm, gfp_t gfp_mask) |
| 818 | { | 890 | { |
| 819 | struct bio *bio; | 891 | struct bio *bio; |
| 820 | 892 | ||
| 821 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm); | 893 | bio = __bio_map_user_iov(q, bdev, iov, iov_count, write_to_vm, |
| 822 | 894 | gfp_mask); | |
| 823 | if (IS_ERR(bio)) | 895 | if (IS_ERR(bio)) |
| 824 | return bio; | 896 | return bio; |
| 825 | 897 | ||
| @@ -976,48 +1048,13 @@ static void bio_copy_kern_endio(struct bio *bio, int err) | |||
| 976 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | 1048 | struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, |
| 977 | gfp_t gfp_mask, int reading) | 1049 | gfp_t gfp_mask, int reading) |
| 978 | { | 1050 | { |
| 979 | unsigned long kaddr = (unsigned long)data; | ||
| 980 | unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
| 981 | unsigned long start = kaddr >> PAGE_SHIFT; | ||
| 982 | const int nr_pages = end - start; | ||
| 983 | struct bio *bio; | 1051 | struct bio *bio; |
| 984 | struct bio_vec *bvec; | 1052 | struct bio_vec *bvec; |
| 985 | struct bio_map_data *bmd; | 1053 | int i; |
| 986 | int i, ret; | ||
| 987 | struct sg_iovec iov; | ||
| 988 | |||
| 989 | iov.iov_base = data; | ||
| 990 | iov.iov_len = len; | ||
| 991 | |||
| 992 | bmd = bio_alloc_map_data(nr_pages, 1, gfp_mask); | ||
| 993 | if (!bmd) | ||
| 994 | return ERR_PTR(-ENOMEM); | ||
| 995 | |||
| 996 | ret = -ENOMEM; | ||
| 997 | bio = bio_alloc(gfp_mask, nr_pages); | ||
| 998 | if (!bio) | ||
| 999 | goto out_bmd; | ||
| 1000 | |||
| 1001 | while (len) { | ||
| 1002 | struct page *page; | ||
| 1003 | unsigned int bytes = PAGE_SIZE; | ||
| 1004 | |||
| 1005 | if (bytes > len) | ||
| 1006 | bytes = len; | ||
| 1007 | |||
| 1008 | page = alloc_page(q->bounce_gfp | gfp_mask); | ||
| 1009 | if (!page) { | ||
| 1010 | ret = -ENOMEM; | ||
| 1011 | goto cleanup; | ||
| 1012 | } | ||
| 1013 | |||
| 1014 | if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) { | ||
| 1015 | ret = -EINVAL; | ||
| 1016 | goto cleanup; | ||
| 1017 | } | ||
| 1018 | 1054 | ||
| 1019 | len -= bytes; | 1055 | bio = bio_copy_user(q, NULL, (unsigned long)data, len, 1, gfp_mask); |
| 1020 | } | 1056 | if (IS_ERR(bio)) |
| 1057 | return bio; | ||
| 1021 | 1058 | ||
| 1022 | if (!reading) { | 1059 | if (!reading) { |
| 1023 | void *p = data; | 1060 | void *p = data; |
| @@ -1030,20 +1067,9 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len, | |||
| 1030 | } | 1067 | } |
| 1031 | } | 1068 | } |
| 1032 | 1069 | ||
| 1033 | bio->bi_private = bmd; | ||
| 1034 | bio->bi_end_io = bio_copy_kern_endio; | 1070 | bio->bi_end_io = bio_copy_kern_endio; |
| 1035 | 1071 | ||
| 1036 | bio_set_map_data(bmd, bio, &iov, 1); | ||
| 1037 | return bio; | 1072 | return bio; |
| 1038 | cleanup: | ||
| 1039 | bio_for_each_segment(bvec, bio, i) | ||
| 1040 | __free_page(bvec->bv_page); | ||
| 1041 | |||
| 1042 | bio_put(bio); | ||
| 1043 | out_bmd: | ||
| 1044 | bio_free_map_data(bmd); | ||
| 1045 | |||
| 1046 | return ERR_PTR(ret); | ||
| 1047 | } | 1073 | } |
| 1048 | 1074 | ||
| 1049 | /* | 1075 | /* |
| @@ -1230,9 +1256,9 @@ static void bio_pair_end_2(struct bio *bi, int err) | |||
| 1230 | * split a bio - only worry about a bio with a single page | 1256 | * split a bio - only worry about a bio with a single page |
| 1231 | * in it's iovec | 1257 | * in it's iovec |
| 1232 | */ | 1258 | */ |
| 1233 | struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | 1259 | struct bio_pair *bio_split(struct bio *bi, int first_sectors) |
| 1234 | { | 1260 | { |
| 1235 | struct bio_pair *bp = mempool_alloc(pool, GFP_NOIO); | 1261 | struct bio_pair *bp = mempool_alloc(bio_split_pool, GFP_NOIO); |
| 1236 | 1262 | ||
| 1237 | if (!bp) | 1263 | if (!bp) |
| 1238 | return bp; | 1264 | return bp; |
| @@ -1266,7 +1292,7 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
| 1266 | bp->bio2.bi_end_io = bio_pair_end_2; | 1292 | bp->bio2.bi_end_io = bio_pair_end_2; |
| 1267 | 1293 | ||
| 1268 | bp->bio1.bi_private = bi; | 1294 | bp->bio1.bi_private = bi; |
| 1269 | bp->bio2.bi_private = pool; | 1295 | bp->bio2.bi_private = bio_split_pool; |
| 1270 | 1296 | ||
| 1271 | if (bio_integrity(bi)) | 1297 | if (bio_integrity(bi)) |
| 1272 | bio_integrity_split(bi, bp, first_sectors); | 1298 | bio_integrity_split(bi, bp, first_sectors); |
| @@ -1274,6 +1300,42 @@ struct bio_pair *bio_split(struct bio *bi, mempool_t *pool, int first_sectors) | |||
| 1274 | return bp; | 1300 | return bp; |
| 1275 | } | 1301 | } |
| 1276 | 1302 | ||
| 1303 | /** | ||
| 1304 | * bio_sector_offset - Find hardware sector offset in bio | ||
| 1305 | * @bio: bio to inspect | ||
| 1306 | * @index: bio_vec index | ||
| 1307 | * @offset: offset in bv_page | ||
| 1308 | * | ||
| 1309 | * Return the number of hardware sectors between beginning of bio | ||
| 1310 | * and an end point indicated by a bio_vec index and an offset | ||
| 1311 | * within that vector's page. | ||
| 1312 | */ | ||
| 1313 | sector_t bio_sector_offset(struct bio *bio, unsigned short index, | ||
| 1314 | unsigned int offset) | ||
| 1315 | { | ||
| 1316 | unsigned int sector_sz = queue_hardsect_size(bio->bi_bdev->bd_disk->queue); | ||
| 1317 | struct bio_vec *bv; | ||
| 1318 | sector_t sectors; | ||
| 1319 | int i; | ||
| 1320 | |||
| 1321 | sectors = 0; | ||
| 1322 | |||
| 1323 | if (index >= bio->bi_idx) | ||
| 1324 | index = bio->bi_vcnt - 1; | ||
| 1325 | |||
| 1326 | __bio_for_each_segment(bv, bio, i, 0) { | ||
| 1327 | if (i == index) { | ||
| 1328 | if (offset > bv->bv_offset) | ||
| 1329 | sectors += (offset - bv->bv_offset) / sector_sz; | ||
| 1330 | break; | ||
| 1331 | } | ||
| 1332 | |||
| 1333 | sectors += bv->bv_len / sector_sz; | ||
| 1334 | } | ||
| 1335 | |||
| 1336 | return sectors; | ||
| 1337 | } | ||
| 1338 | EXPORT_SYMBOL(bio_sector_offset); | ||
| 1277 | 1339 | ||
| 1278 | /* | 1340 | /* |
| 1279 | * create memory pools for biovec's in a bio_set. | 1341 | * create memory pools for biovec's in a bio_set. |
| @@ -1376,6 +1438,7 @@ static int __init init_bio(void) | |||
| 1376 | subsys_initcall(init_bio); | 1438 | subsys_initcall(init_bio); |
| 1377 | 1439 | ||
| 1378 | EXPORT_SYMBOL(bio_alloc); | 1440 | EXPORT_SYMBOL(bio_alloc); |
| 1441 | EXPORT_SYMBOL(bio_kmalloc); | ||
| 1379 | EXPORT_SYMBOL(bio_put); | 1442 | EXPORT_SYMBOL(bio_put); |
| 1380 | EXPORT_SYMBOL(bio_free); | 1443 | EXPORT_SYMBOL(bio_free); |
| 1381 | EXPORT_SYMBOL(bio_endio); | 1444 | EXPORT_SYMBOL(bio_endio); |
| @@ -1383,7 +1446,6 @@ EXPORT_SYMBOL(bio_init); | |||
| 1383 | EXPORT_SYMBOL(__bio_clone); | 1446 | EXPORT_SYMBOL(__bio_clone); |
| 1384 | EXPORT_SYMBOL(bio_clone); | 1447 | EXPORT_SYMBOL(bio_clone); |
| 1385 | EXPORT_SYMBOL(bio_phys_segments); | 1448 | EXPORT_SYMBOL(bio_phys_segments); |
| 1386 | EXPORT_SYMBOL(bio_hw_segments); | ||
| 1387 | EXPORT_SYMBOL(bio_add_page); | 1449 | EXPORT_SYMBOL(bio_add_page); |
| 1388 | EXPORT_SYMBOL(bio_add_pc_page); | 1450 | EXPORT_SYMBOL(bio_add_pc_page); |
| 1389 | EXPORT_SYMBOL(bio_get_nr_vecs); | 1451 | EXPORT_SYMBOL(bio_get_nr_vecs); |
| @@ -1393,7 +1455,6 @@ EXPORT_SYMBOL(bio_map_kern); | |||
| 1393 | EXPORT_SYMBOL(bio_copy_kern); | 1455 | EXPORT_SYMBOL(bio_copy_kern); |
| 1394 | EXPORT_SYMBOL(bio_pair_release); | 1456 | EXPORT_SYMBOL(bio_pair_release); |
| 1395 | EXPORT_SYMBOL(bio_split); | 1457 | EXPORT_SYMBOL(bio_split); |
| 1396 | EXPORT_SYMBOL(bio_split_pool); | ||
| 1397 | EXPORT_SYMBOL(bio_copy_user); | 1458 | EXPORT_SYMBOL(bio_copy_user); |
| 1398 | EXPORT_SYMBOL(bio_uncopy_user); | 1459 | EXPORT_SYMBOL(bio_uncopy_user); |
| 1399 | EXPORT_SYMBOL(bioset_create); | 1460 | EXPORT_SYMBOL(bioset_create); |
diff --git a/fs/block_dev.c b/fs/block_dev.c index aff54219e049..218408eed1bb 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c | |||
| @@ -540,22 +540,6 @@ EXPORT_SYMBOL(bd_release); | |||
| 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 | 540 | * /sys/block/sda/holders/dm-0 --> /sys/block/dm-0 |
| 541 | */ | 541 | */ |
| 542 | 542 | ||
| 543 | static struct kobject *bdev_get_kobj(struct block_device *bdev) | ||
| 544 | { | ||
| 545 | if (bdev->bd_contains != bdev) | ||
| 546 | return kobject_get(&bdev->bd_part->dev.kobj); | ||
| 547 | else | ||
| 548 | return kobject_get(&bdev->bd_disk->dev.kobj); | ||
| 549 | } | ||
| 550 | |||
| 551 | static struct kobject *bdev_get_holder(struct block_device *bdev) | ||
| 552 | { | ||
| 553 | if (bdev->bd_contains != bdev) | ||
| 554 | return kobject_get(bdev->bd_part->holder_dir); | ||
| 555 | else | ||
| 556 | return kobject_get(bdev->bd_disk->holder_dir); | ||
| 557 | } | ||
| 558 | |||
| 559 | static int add_symlink(struct kobject *from, struct kobject *to) | 543 | static int add_symlink(struct kobject *from, struct kobject *to) |
| 560 | { | 544 | { |
| 561 | if (!from || !to) | 545 | if (!from || !to) |
| @@ -604,11 +588,11 @@ static int bd_holder_grab_dirs(struct block_device *bdev, | |||
| 604 | if (!bo->hdev) | 588 | if (!bo->hdev) |
| 605 | goto fail_put_sdir; | 589 | goto fail_put_sdir; |
| 606 | 590 | ||
| 607 | bo->sdev = bdev_get_kobj(bdev); | 591 | bo->sdev = kobject_get(&part_to_dev(bdev->bd_part)->kobj); |
| 608 | if (!bo->sdev) | 592 | if (!bo->sdev) |
| 609 | goto fail_put_hdev; | 593 | goto fail_put_hdev; |
| 610 | 594 | ||
| 611 | bo->hdir = bdev_get_holder(bdev); | 595 | bo->hdir = kobject_get(bdev->bd_part->holder_dir); |
| 612 | if (!bo->hdir) | 596 | if (!bo->hdir) |
| 613 | goto fail_put_sdev; | 597 | goto fail_put_sdev; |
| 614 | 598 | ||
| @@ -868,6 +852,87 @@ struct block_device *open_by_devnum(dev_t dev, unsigned mode) | |||
| 868 | 852 | ||
| 869 | EXPORT_SYMBOL(open_by_devnum); | 853 | EXPORT_SYMBOL(open_by_devnum); |
| 870 | 854 | ||
| 855 | /** | ||
| 856 | * flush_disk - invalidates all buffer-cache entries on a disk | ||
| 857 | * | ||
| 858 | * @bdev: struct block device to be flushed | ||
| 859 | * | ||
| 860 | * Invalidates all buffer-cache entries on a disk. It should be called | ||
| 861 | * when a disk has been changed -- either by a media change or online | ||
| 862 | * resize. | ||
| 863 | */ | ||
| 864 | static void flush_disk(struct block_device *bdev) | ||
| 865 | { | ||
| 866 | if (__invalidate_device(bdev)) { | ||
| 867 | char name[BDEVNAME_SIZE] = ""; | ||
| 868 | |||
| 869 | if (bdev->bd_disk) | ||
| 870 | disk_name(bdev->bd_disk, 0, name); | ||
| 871 | printk(KERN_WARNING "VFS: busy inodes on changed media or " | ||
| 872 | "resized disk %s\n", name); | ||
| 873 | } | ||
| 874 | |||
| 875 | if (!bdev->bd_disk) | ||
| 876 | return; | ||
| 877 | if (disk_partitionable(bdev->bd_disk)) | ||
| 878 | bdev->bd_invalidated = 1; | ||
| 879 | } | ||
| 880 | |||
| 881 | /** | ||
| 882 | * check_disk_size_change - checks for disk size change and adjusts bdev size. | ||
| 883 | * @disk: struct gendisk to check | ||
| 884 | * @bdev: struct bdev to adjust. | ||
| 885 | * | ||
| 886 | * This routine checks to see if the bdev size does not match the disk size | ||
| 887 | * and adjusts it if it differs. | ||
| 888 | */ | ||
| 889 | void check_disk_size_change(struct gendisk *disk, struct block_device *bdev) | ||
| 890 | { | ||
| 891 | loff_t disk_size, bdev_size; | ||
| 892 | |||
| 893 | disk_size = (loff_t)get_capacity(disk) << 9; | ||
| 894 | bdev_size = i_size_read(bdev->bd_inode); | ||
| 895 | if (disk_size != bdev_size) { | ||
| 896 | char name[BDEVNAME_SIZE]; | ||
| 897 | |||
| 898 | disk_name(disk, 0, name); | ||
| 899 | printk(KERN_INFO | ||
| 900 | "%s: detected capacity change from %lld to %lld\n", | ||
| 901 | name, bdev_size, disk_size); | ||
| 902 | i_size_write(bdev->bd_inode, disk_size); | ||
| 903 | flush_disk(bdev); | ||
| 904 | } | ||
| 905 | } | ||
| 906 | EXPORT_SYMBOL(check_disk_size_change); | ||
| 907 | |||
| 908 | /** | ||
| 909 | * revalidate_disk - wrapper for lower-level driver's revalidate_disk call-back | ||
| 910 | * @disk: struct gendisk to be revalidated | ||
| 911 | * | ||
| 912 | * This routine is a wrapper for lower-level driver's revalidate_disk | ||
| 913 | * call-backs. It is used to do common pre and post operations needed | ||
| 914 | * for all revalidate_disk operations. | ||
| 915 | */ | ||
| 916 | int revalidate_disk(struct gendisk *disk) | ||
| 917 | { | ||
| 918 | struct block_device *bdev; | ||
| 919 | int ret = 0; | ||
| 920 | |||
| 921 | if (disk->fops->revalidate_disk) | ||
| 922 | ret = disk->fops->revalidate_disk(disk); | ||
| 923 | |||
| 924 | bdev = bdget_disk(disk, 0); | ||
| 925 | if (!bdev) | ||
| 926 | return ret; | ||
| 927 | |||
| 928 | mutex_lock(&bdev->bd_mutex); | ||
| 929 | check_disk_size_change(disk, bdev); | ||
| 930 | mutex_unlock(&bdev->bd_mutex); | ||
| 931 | bdput(bdev); | ||
| 932 | return ret; | ||
| 933 | } | ||
| 934 | EXPORT_SYMBOL(revalidate_disk); | ||
| 935 | |||
| 871 | /* | 936 | /* |
| 872 | * This routine checks whether a removable media has been changed, | 937 | * This routine checks whether a removable media has been changed, |
| 873 | * and invalidates all buffer-cache-entries in that case. This | 938 | * and invalidates all buffer-cache-entries in that case. This |
| @@ -887,13 +952,9 @@ int check_disk_change(struct block_device *bdev) | |||
| 887 | if (!bdops->media_changed(bdev->bd_disk)) | 952 | if (!bdops->media_changed(bdev->bd_disk)) |
| 888 | return 0; | 953 | return 0; |
| 889 | 954 | ||
| 890 | if (__invalidate_device(bdev)) | 955 | flush_disk(bdev); |
| 891 | printk("VFS: busy inodes on changed media.\n"); | ||
| 892 | |||
| 893 | if (bdops->revalidate_disk) | 956 | if (bdops->revalidate_disk) |
| 894 | bdops->revalidate_disk(bdev->bd_disk); | 957 | bdops->revalidate_disk(bdev->bd_disk); |
| 895 | if (bdev->bd_disk->minors > 1) | ||
| 896 | bdev->bd_invalidated = 1; | ||
| 897 | return 1; | 958 | return 1; |
| 898 | } | 959 | } |
| 899 | 960 | ||
| @@ -927,10 +988,10 @@ static int __blkdev_put(struct block_device *bdev, int for_part); | |||
| 927 | 988 | ||
| 928 | static int do_open(struct block_device *bdev, struct file *file, int for_part) | 989 | static int do_open(struct block_device *bdev, struct file *file, int for_part) |
| 929 | { | 990 | { |
| 930 | struct module *owner = NULL; | ||
| 931 | struct gendisk *disk; | 991 | struct gendisk *disk; |
| 992 | struct hd_struct *part = NULL; | ||
| 932 | int ret; | 993 | int ret; |
| 933 | int part; | 994 | int partno; |
| 934 | int perm = 0; | 995 | int perm = 0; |
| 935 | 996 | ||
| 936 | if (file->f_mode & FMODE_READ) | 997 | if (file->f_mode & FMODE_READ) |
| @@ -948,25 +1009,27 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
| 948 | 1009 | ||
| 949 | ret = -ENXIO; | 1010 | ret = -ENXIO; |
| 950 | file->f_mapping = bdev->bd_inode->i_mapping; | 1011 | file->f_mapping = bdev->bd_inode->i_mapping; |
| 1012 | |||
| 951 | lock_kernel(); | 1013 | lock_kernel(); |
| 952 | disk = get_gendisk(bdev->bd_dev, &part); | 1014 | |
| 953 | if (!disk) { | 1015 | disk = get_gendisk(bdev->bd_dev, &partno); |
| 954 | unlock_kernel(); | 1016 | if (!disk) |
| 955 | bdput(bdev); | 1017 | goto out_unlock_kernel; |
| 956 | return ret; | 1018 | part = disk_get_part(disk, partno); |
| 957 | } | 1019 | if (!part) |
| 958 | owner = disk->fops->owner; | 1020 | goto out_unlock_kernel; |
| 959 | 1021 | ||
| 960 | mutex_lock_nested(&bdev->bd_mutex, for_part); | 1022 | mutex_lock_nested(&bdev->bd_mutex, for_part); |
| 961 | if (!bdev->bd_openers) { | 1023 | if (!bdev->bd_openers) { |
| 962 | bdev->bd_disk = disk; | 1024 | bdev->bd_disk = disk; |
| 1025 | bdev->bd_part = part; | ||
| 963 | bdev->bd_contains = bdev; | 1026 | bdev->bd_contains = bdev; |
| 964 | if (!part) { | 1027 | if (!partno) { |
| 965 | struct backing_dev_info *bdi; | 1028 | struct backing_dev_info *bdi; |
| 966 | if (disk->fops->open) { | 1029 | if (disk->fops->open) { |
| 967 | ret = disk->fops->open(bdev->bd_inode, file); | 1030 | ret = disk->fops->open(bdev->bd_inode, file); |
| 968 | if (ret) | 1031 | if (ret) |
| 969 | goto out_first; | 1032 | goto out_clear; |
| 970 | } | 1033 | } |
| 971 | if (!bdev->bd_openers) { | 1034 | if (!bdev->bd_openers) { |
| 972 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); | 1035 | bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); |
| @@ -978,36 +1041,36 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
| 978 | if (bdev->bd_invalidated) | 1041 | if (bdev->bd_invalidated) |
| 979 | rescan_partitions(disk, bdev); | 1042 | rescan_partitions(disk, bdev); |
| 980 | } else { | 1043 | } else { |
| 981 | struct hd_struct *p; | ||
| 982 | struct block_device *whole; | 1044 | struct block_device *whole; |
| 983 | whole = bdget_disk(disk, 0); | 1045 | whole = bdget_disk(disk, 0); |
| 984 | ret = -ENOMEM; | 1046 | ret = -ENOMEM; |
| 985 | if (!whole) | 1047 | if (!whole) |
| 986 | goto out_first; | 1048 | goto out_clear; |
| 987 | BUG_ON(for_part); | 1049 | BUG_ON(for_part); |
| 988 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); | 1050 | ret = __blkdev_get(whole, file->f_mode, file->f_flags, 1); |
| 989 | if (ret) | 1051 | if (ret) |
| 990 | goto out_first; | 1052 | goto out_clear; |
| 991 | bdev->bd_contains = whole; | 1053 | bdev->bd_contains = whole; |
| 992 | p = disk->part[part - 1]; | ||
| 993 | bdev->bd_inode->i_data.backing_dev_info = | 1054 | bdev->bd_inode->i_data.backing_dev_info = |
| 994 | whole->bd_inode->i_data.backing_dev_info; | 1055 | whole->bd_inode->i_data.backing_dev_info; |
| 995 | if (!(disk->flags & GENHD_FL_UP) || !p || !p->nr_sects) { | 1056 | if (!(disk->flags & GENHD_FL_UP) || |
| 1057 | !part || !part->nr_sects) { | ||
| 996 | ret = -ENXIO; | 1058 | ret = -ENXIO; |
| 997 | goto out_first; | 1059 | goto out_clear; |
| 998 | } | 1060 | } |
| 999 | kobject_get(&p->dev.kobj); | 1061 | bd_set_size(bdev, (loff_t)part->nr_sects << 9); |
| 1000 | bdev->bd_part = p; | ||
| 1001 | bd_set_size(bdev, (loff_t) p->nr_sects << 9); | ||
| 1002 | } | 1062 | } |
| 1003 | } else { | 1063 | } else { |
| 1064 | disk_put_part(part); | ||
| 1004 | put_disk(disk); | 1065 | put_disk(disk); |
| 1005 | module_put(owner); | 1066 | module_put(disk->fops->owner); |
| 1067 | part = NULL; | ||
| 1068 | disk = NULL; | ||
| 1006 | if (bdev->bd_contains == bdev) { | 1069 | if (bdev->bd_contains == bdev) { |
| 1007 | if (bdev->bd_disk->fops->open) { | 1070 | if (bdev->bd_disk->fops->open) { |
| 1008 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); | 1071 | ret = bdev->bd_disk->fops->open(bdev->bd_inode, file); |
| 1009 | if (ret) | 1072 | if (ret) |
| 1010 | goto out; | 1073 | goto out_unlock_bdev; |
| 1011 | } | 1074 | } |
| 1012 | if (bdev->bd_invalidated) | 1075 | if (bdev->bd_invalidated) |
| 1013 | rescan_partitions(bdev->bd_disk, bdev); | 1076 | rescan_partitions(bdev->bd_disk, bdev); |
| @@ -1020,19 +1083,24 @@ static int do_open(struct block_device *bdev, struct file *file, int for_part) | |||
| 1020 | unlock_kernel(); | 1083 | unlock_kernel(); |
| 1021 | return 0; | 1084 | return 0; |
| 1022 | 1085 | ||
| 1023 | out_first: | 1086 | out_clear: |
| 1024 | bdev->bd_disk = NULL; | 1087 | bdev->bd_disk = NULL; |
| 1088 | bdev->bd_part = NULL; | ||
| 1025 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1089 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
| 1026 | if (bdev != bdev->bd_contains) | 1090 | if (bdev != bdev->bd_contains) |
| 1027 | __blkdev_put(bdev->bd_contains, 1); | 1091 | __blkdev_put(bdev->bd_contains, 1); |
| 1028 | bdev->bd_contains = NULL; | 1092 | bdev->bd_contains = NULL; |
| 1029 | put_disk(disk); | 1093 | out_unlock_bdev: |
| 1030 | module_put(owner); | ||
| 1031 | out: | ||
| 1032 | mutex_unlock(&bdev->bd_mutex); | 1094 | mutex_unlock(&bdev->bd_mutex); |
| 1095 | out_unlock_kernel: | ||
| 1033 | unlock_kernel(); | 1096 | unlock_kernel(); |
| 1034 | if (ret) | 1097 | |
| 1035 | bdput(bdev); | 1098 | disk_put_part(part); |
| 1099 | if (disk) | ||
| 1100 | module_put(disk->fops->owner); | ||
| 1101 | put_disk(disk); | ||
| 1102 | bdput(bdev); | ||
| 1103 | |||
| 1036 | return ret; | 1104 | return ret; |
| 1037 | } | 1105 | } |
| 1038 | 1106 | ||
| @@ -1117,11 +1185,8 @@ static int __blkdev_put(struct block_device *bdev, int for_part) | |||
| 1117 | 1185 | ||
| 1118 | put_disk(disk); | 1186 | put_disk(disk); |
| 1119 | module_put(owner); | 1187 | module_put(owner); |
| 1120 | 1188 | disk_put_part(bdev->bd_part); | |
| 1121 | if (bdev->bd_contains != bdev) { | 1189 | bdev->bd_part = NULL; |
| 1122 | kobject_put(&bdev->bd_part->dev.kobj); | ||
| 1123 | bdev->bd_part = NULL; | ||
| 1124 | } | ||
| 1125 | bdev->bd_disk = NULL; | 1190 | bdev->bd_disk = NULL; |
| 1126 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; | 1191 | bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info; |
| 1127 | if (bdev != bdev->bd_contains) | 1192 | if (bdev != bdev->bd_contains) |
| @@ -1197,10 +1262,9 @@ EXPORT_SYMBOL(ioctl_by_bdev); | |||
| 1197 | 1262 | ||
| 1198 | /** | 1263 | /** |
| 1199 | * lookup_bdev - lookup a struct block_device by name | 1264 | * lookup_bdev - lookup a struct block_device by name |
| 1200 | * | ||
| 1201 | * @path: special file representing the block device | 1265 | * @path: special file representing the block device |
| 1202 | * | 1266 | * |
| 1203 | * Get a reference to the blockdevice at @path in the current | 1267 | * Get a reference to the blockdevice at @pathname in the current |
| 1204 | * namespace if possible and return it. Return ERR_PTR(error) | 1268 | * namespace if possible and return it. Return ERR_PTR(error) |
| 1205 | * otherwise. | 1269 | * otherwise. |
| 1206 | */ | 1270 | */ |
diff --git a/fs/buffer.c b/fs/buffer.c index ac78d4c19b3b..6569fda5cfed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c | |||
| @@ -76,8 +76,7 @@ EXPORT_SYMBOL(__lock_buffer); | |||
| 76 | 76 | ||
| 77 | void unlock_buffer(struct buffer_head *bh) | 77 | void unlock_buffer(struct buffer_head *bh) |
| 78 | { | 78 | { |
| 79 | smp_mb__before_clear_bit(); | 79 | clear_bit_unlock(BH_Lock, &bh->b_state); |
| 80 | clear_buffer_locked(bh); | ||
| 81 | smp_mb__after_clear_bit(); | 80 | smp_mb__after_clear_bit(); |
| 82 | wake_up_bit(&bh->b_state, BH_Lock); | 81 | wake_up_bit(&bh->b_state, BH_Lock); |
| 83 | } | 82 | } |
diff --git a/fs/char_dev.c b/fs/char_dev.c index 3cb7cda3d780..262fa10e213d 100644 --- a/fs/char_dev.c +++ b/fs/char_dev.c | |||
| @@ -22,9 +22,6 @@ | |||
| 22 | #include <linux/mutex.h> | 22 | #include <linux/mutex.h> |
| 23 | #include <linux/backing-dev.h> | 23 | #include <linux/backing-dev.h> |
| 24 | 24 | ||
| 25 | #ifdef CONFIG_KMOD | ||
| 26 | #include <linux/kmod.h> | ||
| 27 | #endif | ||
| 28 | #include "internal.h" | 25 | #include "internal.h" |
| 29 | 26 | ||
| 30 | /* | 27 | /* |
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig new file mode 100644 index 000000000000..341a98965bd0 --- /dev/null +++ b/fs/cifs/Kconfig | |||
| @@ -0,0 +1,142 @@ | |||
| 1 | config CIFS | ||
| 2 | tristate "CIFS support (advanced network filesystem, SMBFS successor)" | ||
| 3 | depends on INET | ||
| 4 | select NLS | ||
| 5 | help | ||
| 6 | This is the client VFS module for the Common Internet File System | ||
| 7 | (CIFS) protocol which is the successor to the Server Message Block | ||
| 8 | (SMB) protocol, the native file sharing mechanism for most early | ||
| 9 | PC operating systems. The CIFS protocol is fully supported by | ||
| 10 | file servers such as Windows 2000 (including Windows 2003, NT 4 | ||
| 11 | and Windows XP) as well by Samba (which provides excellent CIFS | ||
| 12 | server support for Linux and many other operating systems). Limited | ||
| 13 | support for OS/2 and Windows ME and similar servers is provided as | ||
| 14 | well. | ||
| 15 | |||
| 16 | The cifs module provides an advanced network file system | ||
| 17 | client for mounting to CIFS compliant servers. It includes | ||
| 18 | support for DFS (hierarchical name space), secure per-user | ||
| 19 | session establishment via Kerberos or NTLM or NTLMv2, | ||
| 20 | safe distributed caching (oplock), optional packet | ||
| 21 | signing, Unicode and other internationalization improvements. | ||
| 22 | If you need to mount to Samba or Windows from this machine, say Y. | ||
| 23 | |||
| 24 | config CIFS_STATS | ||
| 25 | bool "CIFS statistics" | ||
| 26 | depends on CIFS | ||
| 27 | help | ||
| 28 | Enabling this option will cause statistics for each server share | ||
| 29 | mounted by the cifs client to be displayed in /proc/fs/cifs/Stats | ||
| 30 | |||
| 31 | config CIFS_STATS2 | ||
| 32 | bool "Extended statistics" | ||
| 33 | depends on CIFS_STATS | ||
| 34 | help | ||
| 35 | Enabling this option will allow more detailed statistics on SMB | ||
| 36 | request timing to be displayed in /proc/fs/cifs/DebugData and also | ||
| 37 | allow optional logging of slow responses to dmesg (depending on the | ||
| 38 | value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details). | ||
| 39 | These additional statistics may have a minor effect on performance | ||
| 40 | and memory utilization. | ||
| 41 | |||
| 42 | Unless you are a developer or are doing network performance analysis | ||
| 43 | or tuning, say N. | ||
| 44 | |||
| 45 | config CIFS_WEAK_PW_HASH | ||
| 46 | bool "Support legacy servers which use weaker LANMAN security" | ||
| 47 | depends on CIFS | ||
| 48 | help | ||
| 49 | Modern CIFS servers including Samba and most Windows versions | ||
| 50 | (since 1997) support stronger NTLM (and even NTLMv2 and Kerberos) | ||
| 51 | security mechanisms. These hash the password more securely | ||
| 52 | than the mechanisms used in the older LANMAN version of the | ||
| 53 | SMB protocol but LANMAN based authentication is needed to | ||
| 54 | establish sessions with some old SMB servers. | ||
| 55 | |||
| 56 | Enabling this option allows the cifs module to mount to older | ||
| 57 | LANMAN based servers such as OS/2 and Windows 95, but such | ||
| 58 | mounts may be less secure than mounts using NTLM or more recent | ||
| 59 | security mechanisms if you are on a public network. Unless you | ||
| 60 | have a need to access old SMB servers (and are on a private | ||
| 61 | network) you probably want to say N. Even if this support | ||
| 62 | is enabled in the kernel build, LANMAN authentication will not be | ||
| 63 | used automatically. At runtime LANMAN mounts are disabled but | ||
| 64 | can be set to required (or optional) either in | ||
| 65 | /proc/fs/cifs (see fs/cifs/README for more detail) or via an | ||
| 66 | option on the mount command. This support is disabled by | ||
| 67 | default in order to reduce the possibility of a downgrade | ||
| 68 | attack. | ||
| 69 | |||
| 70 | If unsure, say N. | ||
| 71 | |||
| 72 | config CIFS_UPCALL | ||
| 73 | bool "Kerberos/SPNEGO advanced session setup" | ||
| 74 | depends on CIFS && KEYS | ||
| 75 | help | ||
| 76 | Enables an upcall mechanism for CIFS which accesses | ||
| 77 | userspace helper utilities to provide SPNEGO packaged (RFC 4178) | ||
| 78 | Kerberos tickets which are needed to mount to certain secure servers | ||
| 79 | (for which more secure Kerberos authentication is required). If | ||
| 80 | unsure, say N. | ||
| 81 | |||
| 82 | config CIFS_XATTR | ||
| 83 | bool "CIFS extended attributes" | ||
| 84 | depends on CIFS | ||
| 85 | help | ||
| 86 | Extended attributes are name:value pairs associated with inodes by | ||
| 87 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 88 | <http://acl.bestbits.at/> for details). CIFS maps the name of | ||
| 89 | extended attributes beginning with the user namespace prefix | ||
| 90 | to SMB/CIFS EAs. EAs are stored on Windows servers without the | ||
| 91 | user namespace prefix, but their names are seen by Linux cifs clients | ||
| 92 | prefaced by the user namespace prefix. The system namespace | ||
| 93 | (used by some filesystems to store ACLs) is not supported at | ||
| 94 | this time. | ||
| 95 | |||
| 96 | If unsure, say N. | ||
| 97 | |||
| 98 | config CIFS_POSIX | ||
| 99 | bool "CIFS POSIX Extensions" | ||
| 100 | depends on CIFS_XATTR | ||
| 101 | help | ||
| 102 | Enabling this option will cause the cifs client to attempt to | ||
| 103 | negotiate a newer dialect with servers, such as Samba 3.0.5 | ||
| 104 | or later, that optionally can handle more POSIX like (rather | ||
| 105 | than Windows like) file behavior. It also enables | ||
| 106 | support for POSIX ACLs (getfacl and setfacl) to servers | ||
| 107 | (such as Samba 3.10 and later) which can negotiate | ||
| 108 | CIFS POSIX ACL support. If unsure, say N. | ||
| 109 | |||
| 110 | config CIFS_DEBUG2 | ||
| 111 | bool "Enable additional CIFS debugging routines" | ||
| 112 | depends on CIFS | ||
| 113 | help | ||
| 114 | Enabling this option adds a few more debugging routines | ||
| 115 | to the cifs code which slightly increases the size of | ||
| 116 | the cifs module and can cause additional logging of debug | ||
| 117 | messages in some error paths, slowing performance. This | ||
| 118 | option can be turned off unless you are debugging | ||
| 119 | cifs problems. If unsure, say N. | ||
| 120 | |||
| 121 | config CIFS_EXPERIMENTAL | ||
| 122 | bool "CIFS Experimental Features (EXPERIMENTAL)" | ||
| 123 | depends on CIFS && EXPERIMENTAL | ||
| 124 | help | ||
| 125 | Enables cifs features under testing. These features are | ||
| 126 | experimental and currently include DFS support and directory | ||
| 127 | change notification ie fcntl(F_DNOTIFY), as well as the upcall | ||
| 128 | mechanism which will be used for Kerberos session negotiation | ||
| 129 | and uid remapping. Some of these features also may depend on | ||
| 130 | setting a value of 1 to the pseudo-file /proc/fs/cifs/Experimental | ||
| 131 | (which is disabled by default). See the file fs/cifs/README | ||
| 132 | for more details. If unsure, say N. | ||
| 133 | |||
| 134 | config CIFS_DFS_UPCALL | ||
| 135 | bool "DFS feature support (EXPERIMENTAL)" | ||
| 136 | depends on CIFS_EXPERIMENTAL | ||
| 137 | depends on KEYS | ||
| 138 | help | ||
| 139 | Enables an upcall mechanism for CIFS which contacts userspace | ||
| 140 | helper utilities to provide server name resolution (host names to | ||
| 141 | IP addresses) which is needed for implicit mounts of DFS junction | ||
| 142 | points. If unsure, say N. | ||
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c index 117ef4bba68e..fcee9298b620 100644 --- a/fs/cifs/cifs_spnego.c +++ b/fs/cifs/cifs_spnego.c | |||
| @@ -66,11 +66,28 @@ struct key_type cifs_spnego_key_type = { | |||
| 66 | .describe = user_describe, | 66 | .describe = user_describe, |
| 67 | }; | 67 | }; |
| 68 | 68 | ||
| 69 | #define MAX_VER_STR_LEN 8 /* length of longest version string e.g. | 69 | /* length of longest version string e.g. strlen("ver=0xFF") */ |
| 70 | strlen("ver=0xFF") */ | 70 | #define MAX_VER_STR_LEN 8 |
| 71 | #define MAX_MECH_STR_LEN 13 /* length of longest security mechanism name, eg | 71 | |
| 72 | in future could have strlen(";sec=ntlmsspi") */ | 72 | /* length of longest security mechanism name, eg in future could have |
| 73 | #define MAX_IPV6_ADDR_LEN 42 /* eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | 73 | * strlen(";sec=ntlmsspi") */ |
| 74 | #define MAX_MECH_STR_LEN 13 | ||
| 75 | |||
| 76 | /* max possible addr len eg FEDC:BA98:7654:3210:FEDC:BA98:7654:3210/60 */ | ||
| 77 | #define MAX_IPV6_ADDR_LEN 42 | ||
| 78 | |||
| 79 | /* strlen of "host=" */ | ||
| 80 | #define HOST_KEY_LEN 5 | ||
| 81 | |||
| 82 | /* strlen of ";ip4=" or ";ip6=" */ | ||
| 83 | #define IP_KEY_LEN 5 | ||
| 84 | |||
| 85 | /* strlen of ";uid=0x" */ | ||
| 86 | #define UID_KEY_LEN 7 | ||
| 87 | |||
| 88 | /* strlen of ";user=" */ | ||
| 89 | #define USER_KEY_LEN 6 | ||
| 90 | |||
| 74 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ | 91 | /* get a key struct with a SPNEGO security blob, suitable for session setup */ |
| 75 | struct key * | 92 | struct key * |
| 76 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | 93 | cifs_get_spnego_key(struct cifsSesInfo *sesInfo) |
| @@ -84,11 +101,11 @@ cifs_get_spnego_key(struct cifsSesInfo *sesInfo) | |||
| 84 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress | 101 | /* length of fields (with semicolons): ver=0xyz ip4=ipaddress |
| 85 | host=hostname sec=mechanism uid=0xFF user=username */ | 102 | host=hostname sec=mechanism uid=0xFF user=username */ |
| 86 | desc_len = MAX_VER_STR_LEN + | 103 | desc_len = MAX_VER_STR_LEN + |
| 87 | 6 /* len of "host=" */ + strlen(hostname) + | 104 | HOST_KEY_LEN + strlen(hostname) + |
| 88 | 5 /* len of ";ipv4=" */ + MAX_IPV6_ADDR_LEN + | 105 | IP_KEY_LEN + MAX_IPV6_ADDR_LEN + |
| 89 | MAX_MECH_STR_LEN + | 106 | MAX_MECH_STR_LEN + |
| 90 | 7 /* len of ";uid=0x" */ + (sizeof(uid_t) * 2) + | 107 | UID_KEY_LEN + (sizeof(uid_t) * 2) + |
| 91 | 6 /* len of ";user=" */ + strlen(sesInfo->userName) + 1; | 108 | USER_KEY_LEN + strlen(sesInfo->userName) + 1; |
| 92 | 109 | ||
| 93 | spnego_key = ERR_PTR(-ENOMEM); | 110 | spnego_key = ERR_PTR(-ENOMEM); |
| 94 | description = kzalloc(desc_len, GFP_KERNEL); | 111 | description = kzalloc(desc_len, GFP_KERNEL); |
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 135c965c4137..f7b4a5cd837b 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h | |||
| @@ -41,7 +41,7 @@ extern int cifs_create(struct inode *, struct dentry *, int, | |||
| 41 | struct nameidata *); | 41 | struct nameidata *); |
| 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, | 42 | extern struct dentry *cifs_lookup(struct inode *, struct dentry *, |
| 43 | struct nameidata *); | 43 | struct nameidata *); |
| 44 | extern int cifs_unlink(struct inode *, struct dentry *); | 44 | extern int cifs_unlink(struct inode *dir, struct dentry *dentry); |
| 45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); | 45 | extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *); |
| 46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); | 46 | extern int cifs_mknod(struct inode *, struct dentry *, int, dev_t); |
| 47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); | 47 | extern int cifs_mkdir(struct inode *, struct dentry *, int); |
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 8dfd6f24d488..0d22479d99b7 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h | |||
| @@ -309,6 +309,7 @@ struct cifs_search_info { | |||
| 309 | __u32 resume_key; | 309 | __u32 resume_key; |
| 310 | char *ntwrk_buf_start; | 310 | char *ntwrk_buf_start; |
| 311 | char *srch_entries_start; | 311 | char *srch_entries_start; |
| 312 | char *last_entry; | ||
| 312 | char *presume_name; | 313 | char *presume_name; |
| 313 | unsigned int resume_name_len; | 314 | unsigned int resume_name_len; |
| 314 | bool endOfSearch:1; | 315 | bool endOfSearch:1; |
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a729d083e6f4..0cff7fe986e8 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h | |||
| @@ -179,6 +179,8 @@ extern int CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | |||
| 179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | 179 | extern int CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, |
| 180 | const FILE_BASIC_INFO *data, __u16 fid, | 180 | const FILE_BASIC_INFO *data, __u16 fid, |
| 181 | __u32 pid_of_opener); | 181 | __u32 pid_of_opener); |
| 182 | extern int CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
| 183 | bool delete_file, __u16 fid, __u32 pid_of_opener); | ||
| 182 | #if 0 | 184 | #if 0 |
| 183 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, | 185 | extern int CIFSSMBSetAttrLegacy(int xid, struct cifsTconInfo *tcon, |
| 184 | char *fileName, __u16 dos_attributes, | 186 | char *fileName, __u16 dos_attributes, |
| @@ -229,7 +231,7 @@ extern int CIFSSMBRename(const int xid, struct cifsTconInfo *tcon, | |||
| 229 | const struct nls_table *nls_codepage, | 231 | const struct nls_table *nls_codepage, |
| 230 | int remap_special_chars); | 232 | int remap_special_chars); |
| 231 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 233 | extern int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
| 232 | int netfid, char *target_name, | 234 | int netfid, const char *target_name, |
| 233 | const struct nls_table *nls_codepage, | 235 | const struct nls_table *nls_codepage, |
| 234 | int remap_special_chars); | 236 | int remap_special_chars); |
| 235 | extern int CIFSCreateHardLink(const int xid, | 237 | extern int CIFSCreateHardLink(const int xid, |
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 994de7c90474..6f4ffe15d68d 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c | |||
| @@ -2017,7 +2017,7 @@ renameRetry: | |||
| 2017 | } | 2017 | } |
| 2018 | 2018 | ||
| 2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | 2019 | int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, |
| 2020 | int netfid, char *target_name, | 2020 | int netfid, const char *target_name, |
| 2021 | const struct nls_table *nls_codepage, int remap) | 2021 | const struct nls_table *nls_codepage, int remap) |
| 2022 | { | 2022 | { |
| 2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | 2023 | struct smb_com_transaction2_sfi_req *pSMB = NULL; |
| @@ -2071,7 +2071,7 @@ int CIFSSMBRenameOpenFile(const int xid, struct cifsTconInfo *pTcon, | |||
| 2071 | remap); | 2071 | remap); |
| 2072 | } | 2072 | } |
| 2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); | 2073 | rename_info->target_name_len = cpu_to_le32(2 * len_of_str); |
| 2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str) + 2; | 2074 | count = 12 /* sizeof(struct set_file_rename) */ + (2 * len_of_str); |
| 2075 | byte_count += count; | 2075 | byte_count += count; |
| 2076 | pSMB->DataCount = cpu_to_le16(count); | 2076 | pSMB->DataCount = cpu_to_le16(count); |
| 2077 | pSMB->TotalDataCount = pSMB->DataCount; | 2077 | pSMB->TotalDataCount = pSMB->DataCount; |
| @@ -3614,6 +3614,8 @@ findFirstRetry: | |||
| 3614 | /* BB remember to free buffer if error BB */ | 3614 | /* BB remember to free buffer if error BB */ |
| 3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3615 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
| 3616 | if (rc == 0) { | 3616 | if (rc == 0) { |
| 3617 | unsigned int lnoff; | ||
| 3618 | |||
| 3617 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3619 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
| 3618 | psrch_inf->unicode = true; | 3620 | psrch_inf->unicode = true; |
| 3619 | else | 3621 | else |
| @@ -3636,6 +3638,17 @@ findFirstRetry: | |||
| 3636 | le16_to_cpu(parms->SearchCount); | 3638 | le16_to_cpu(parms->SearchCount); |
| 3637 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + | 3639 | psrch_inf->index_of_last_entry = 2 /* skip . and .. */ + |
| 3638 | psrch_inf->entries_in_buffer; | 3640 | psrch_inf->entries_in_buffer; |
| 3641 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
| 3642 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
| 3643 | lnoff) { | ||
| 3644 | cERROR(1, ("ignoring corrupt resume name")); | ||
| 3645 | psrch_inf->last_entry = NULL; | ||
| 3646 | return rc; | ||
| 3647 | } | ||
| 3648 | |||
| 3649 | psrch_inf->last_entry = psrch_inf->srch_entries_start + | ||
| 3650 | lnoff; | ||
| 3651 | |||
| 3639 | *pnetfid = parms->SearchHandle; | 3652 | *pnetfid = parms->SearchHandle; |
| 3640 | } else { | 3653 | } else { |
| 3641 | cifs_buf_release(pSMB); | 3654 | cifs_buf_release(pSMB); |
| @@ -3725,6 +3738,8 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
| 3725 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); | 3738 | rc = validate_t2((struct smb_t2_rsp *)pSMBr); |
| 3726 | 3739 | ||
| 3727 | if (rc == 0) { | 3740 | if (rc == 0) { |
| 3741 | unsigned int lnoff; | ||
| 3742 | |||
| 3728 | /* BB fixme add lock for file (srch_info) struct here */ | 3743 | /* BB fixme add lock for file (srch_info) struct here */ |
| 3729 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) | 3744 | if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) |
| 3730 | psrch_inf->unicode = true; | 3745 | psrch_inf->unicode = true; |
| @@ -3751,6 +3766,16 @@ int CIFSFindNext(const int xid, struct cifsTconInfo *tcon, | |||
| 3751 | le16_to_cpu(parms->SearchCount); | 3766 | le16_to_cpu(parms->SearchCount); |
| 3752 | psrch_inf->index_of_last_entry += | 3767 | psrch_inf->index_of_last_entry += |
| 3753 | psrch_inf->entries_in_buffer; | 3768 | psrch_inf->entries_in_buffer; |
| 3769 | lnoff = le16_to_cpu(parms->LastNameOffset); | ||
| 3770 | if (tcon->ses->server->maxBuf - MAX_CIFS_HDR_SIZE < | ||
| 3771 | lnoff) { | ||
| 3772 | cERROR(1, ("ignoring corrupt resume name")); | ||
| 3773 | psrch_inf->last_entry = NULL; | ||
| 3774 | return rc; | ||
| 3775 | } else | ||
| 3776 | psrch_inf->last_entry = | ||
| 3777 | psrch_inf->srch_entries_start + lnoff; | ||
| 3778 | |||
| 3754 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", | 3779 | /* cFYI(1,("fnxt2 entries in buf %d index_of_last %d", |
| 3755 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ | 3780 | psrch_inf->entries_in_buffer, psrch_inf->index_of_last_entry)); */ |
| 3756 | 3781 | ||
| @@ -4876,6 +4901,61 @@ CIFSSMBSetFileInfo(const int xid, struct cifsTconInfo *tcon, | |||
| 4876 | return rc; | 4901 | return rc; |
| 4877 | } | 4902 | } |
| 4878 | 4903 | ||
| 4904 | int | ||
| 4905 | CIFSSMBSetFileDisposition(const int xid, struct cifsTconInfo *tcon, | ||
| 4906 | bool delete_file, __u16 fid, __u32 pid_of_opener) | ||
| 4907 | { | ||
| 4908 | struct smb_com_transaction2_sfi_req *pSMB = NULL; | ||
| 4909 | char *data_offset; | ||
| 4910 | int rc = 0; | ||
| 4911 | __u16 params, param_offset, offset, byte_count, count; | ||
| 4912 | |||
| 4913 | cFYI(1, ("Set File Disposition (via SetFileInfo)")); | ||
| 4914 | rc = small_smb_init(SMB_COM_TRANSACTION2, 15, tcon, (void **) &pSMB); | ||
| 4915 | |||
| 4916 | if (rc) | ||
| 4917 | return rc; | ||
| 4918 | |||
| 4919 | pSMB->hdr.Pid = cpu_to_le16((__u16)pid_of_opener); | ||
| 4920 | pSMB->hdr.PidHigh = cpu_to_le16((__u16)(pid_of_opener >> 16)); | ||
| 4921 | |||
| 4922 | params = 6; | ||
| 4923 | pSMB->MaxSetupCount = 0; | ||
| 4924 | pSMB->Reserved = 0; | ||
| 4925 | pSMB->Flags = 0; | ||
| 4926 | pSMB->Timeout = 0; | ||
| 4927 | pSMB->Reserved2 = 0; | ||
| 4928 | param_offset = offsetof(struct smb_com_transaction2_sfi_req, Fid) - 4; | ||
| 4929 | offset = param_offset + params; | ||
| 4930 | |||
| 4931 | data_offset = (char *) (&pSMB->hdr.Protocol) + offset; | ||
| 4932 | |||
| 4933 | count = 1; | ||
| 4934 | pSMB->MaxParameterCount = cpu_to_le16(2); | ||
| 4935 | /* BB find max SMB PDU from sess */ | ||
| 4936 | pSMB->MaxDataCount = cpu_to_le16(1000); | ||
| 4937 | pSMB->SetupCount = 1; | ||
| 4938 | pSMB->Reserved3 = 0; | ||
| 4939 | pSMB->SubCommand = cpu_to_le16(TRANS2_SET_FILE_INFORMATION); | ||
| 4940 | byte_count = 3 /* pad */ + params + count; | ||
| 4941 | pSMB->DataCount = cpu_to_le16(count); | ||
| 4942 | pSMB->ParameterCount = cpu_to_le16(params); | ||
| 4943 | pSMB->TotalDataCount = pSMB->DataCount; | ||
| 4944 | pSMB->TotalParameterCount = pSMB->ParameterCount; | ||
| 4945 | pSMB->ParameterOffset = cpu_to_le16(param_offset); | ||
| 4946 | pSMB->DataOffset = cpu_to_le16(offset); | ||
| 4947 | pSMB->Fid = fid; | ||
| 4948 | pSMB->InformationLevel = cpu_to_le16(SMB_SET_FILE_DISPOSITION_INFO); | ||
| 4949 | pSMB->Reserved4 = 0; | ||
| 4950 | pSMB->hdr.smb_buf_length += byte_count; | ||
| 4951 | pSMB->ByteCount = cpu_to_le16(byte_count); | ||
| 4952 | *data_offset = delete_file ? 1 : 0; | ||
| 4953 | rc = SendReceiveNoRsp(xid, tcon->ses, (struct smb_hdr *) pSMB, 0); | ||
| 4954 | if (rc) | ||
| 4955 | cFYI(1, ("Send error in SetFileDisposition = %d", rc)); | ||
| 4956 | |||
| 4957 | return rc; | ||
| 4958 | } | ||
| 4879 | 4959 | ||
| 4880 | int | 4960 | int |
| 4881 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, | 4961 | CIFSSMBSetPathInfo(const int xid, struct cifsTconInfo *tcon, |
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index a2e0673e1b08..1e0c1bd8f2e4 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c | |||
| @@ -29,19 +29,55 @@ | |||
| 29 | #include "cifsproto.h" | 29 | #include "cifsproto.h" |
| 30 | #include "cifs_debug.h" | 30 | #include "cifs_debug.h" |
| 31 | 31 | ||
| 32 | static int dns_resolver_instantiate(struct key *key, const void *data, | 32 | /* Checks if supplied name is IP address |
| 33 | * returns: | ||
| 34 | * 1 - name is IP | ||
| 35 | * 0 - name is not IP | ||
| 36 | */ | ||
| 37 | static int | ||
| 38 | is_ip(const char *name) | ||
| 39 | { | ||
| 40 | int rc; | ||
| 41 | struct sockaddr_in sin_server; | ||
| 42 | struct sockaddr_in6 sin_server6; | ||
| 43 | |||
| 44 | rc = cifs_inet_pton(AF_INET, name, | ||
| 45 | &sin_server.sin_addr.s_addr); | ||
| 46 | |||
| 47 | if (rc <= 0) { | ||
| 48 | /* not ipv4 address, try ipv6 */ | ||
| 49 | rc = cifs_inet_pton(AF_INET6, name, | ||
| 50 | &sin_server6.sin6_addr.in6_u); | ||
| 51 | if (rc > 0) | ||
| 52 | return 1; | ||
| 53 | } else { | ||
| 54 | return 1; | ||
| 55 | } | ||
| 56 | /* we failed translating address */ | ||
| 57 | return 0; | ||
| 58 | } | ||
| 59 | |||
| 60 | static int | ||
| 61 | dns_resolver_instantiate(struct key *key, const void *data, | ||
| 33 | size_t datalen) | 62 | size_t datalen) |
| 34 | { | 63 | { |
| 35 | int rc = 0; | 64 | int rc = 0; |
| 36 | char *ip; | 65 | char *ip; |
| 37 | 66 | ||
| 38 | ip = kmalloc(datalen+1, GFP_KERNEL); | 67 | ip = kmalloc(datalen + 1, GFP_KERNEL); |
| 39 | if (!ip) | 68 | if (!ip) |
| 40 | return -ENOMEM; | 69 | return -ENOMEM; |
| 41 | 70 | ||
| 42 | memcpy(ip, data, datalen); | 71 | memcpy(ip, data, datalen); |
| 43 | ip[datalen] = '\0'; | 72 | ip[datalen] = '\0'; |
| 44 | 73 | ||
| 74 | /* make sure this looks like an address */ | ||
| 75 | if (!is_ip((const char *) ip)) { | ||
| 76 | kfree(ip); | ||
| 77 | return -EINVAL; | ||
| 78 | } | ||
| 79 | |||
| 80 | key->type_data.x[0] = datalen; | ||
| 45 | rcu_assign_pointer(key->payload.data, ip); | 81 | rcu_assign_pointer(key->payload.data, ip); |
| 46 | 82 | ||
| 47 | return rc; | 83 | return rc; |
| @@ -62,33 +98,6 @@ struct key_type key_type_dns_resolver = { | |||
| 62 | .match = user_match, | 98 | .match = user_match, |
| 63 | }; | 99 | }; |
| 64 | 100 | ||
| 65 | /* Checks if supplied name is IP address | ||
| 66 | * returns: | ||
| 67 | * 1 - name is IP | ||
| 68 | * 0 - name is not IP | ||
| 69 | */ | ||
| 70 | static int is_ip(const char *name) | ||
| 71 | { | ||
| 72 | int rc; | ||
| 73 | struct sockaddr_in sin_server; | ||
| 74 | struct sockaddr_in6 sin_server6; | ||
| 75 | |||
| 76 | rc = cifs_inet_pton(AF_INET, name, | ||
| 77 | &sin_server.sin_addr.s_addr); | ||
| 78 | |||
| 79 | if (rc <= 0) { | ||
| 80 | /* not ipv4 address, try ipv6 */ | ||
| 81 | rc = cifs_inet_pton(AF_INET6, name, | ||
| 82 | &sin_server6.sin6_addr.in6_u); | ||
| 83 | if (rc > 0) | ||
| 84 | return 1; | ||
| 85 | } else { | ||
| 86 | return 1; | ||
| 87 | } | ||
| 88 | /* we failed translating address */ | ||
| 89 | return 0; | ||
| 90 | } | ||
| 91 | |||
| 92 | /* Resolves server name to ip address. | 101 | /* Resolves server name to ip address. |
| 93 | * input: | 102 | * input: |
| 94 | * unc - server UNC | 103 | * unc - server UNC |
| @@ -140,6 +149,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
| 140 | 149 | ||
| 141 | rkey = request_key(&key_type_dns_resolver, name, ""); | 150 | rkey = request_key(&key_type_dns_resolver, name, ""); |
| 142 | if (!IS_ERR(rkey)) { | 151 | if (!IS_ERR(rkey)) { |
| 152 | len = rkey->type_data.x[0]; | ||
| 143 | data = rkey->payload.data; | 153 | data = rkey->payload.data; |
| 144 | } else { | 154 | } else { |
| 145 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); | 155 | cERROR(1, ("%s: unable to resolve: %s", __func__, name)); |
| @@ -148,11 +158,9 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr) | |||
| 148 | 158 | ||
| 149 | skip_upcall: | 159 | skip_upcall: |
| 150 | if (data) { | 160 | if (data) { |
| 151 | len = strlen(data); | 161 | *ip_addr = kmalloc(len + 1, GFP_KERNEL); |
| 152 | *ip_addr = kmalloc(len+1, GFP_KERNEL); | ||
| 153 | if (*ip_addr) { | 162 | if (*ip_addr) { |
| 154 | memcpy(*ip_addr, data, len); | 163 | memcpy(*ip_addr, data, len + 1); |
| 155 | (*ip_addr)[len] = '\0'; | ||
| 156 | if (!IS_ERR(rkey)) | 164 | if (!IS_ERR(rkey)) |
| 157 | cFYI(1, ("%s: resolved: %s to %s", __func__, | 165 | cFYI(1, ("%s: resolved: %s to %s", __func__, |
| 158 | name, | 166 | name, |
diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cbefe1f1f9fe..62d8bd8f14c0 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c | |||
| @@ -107,7 +107,7 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file, | |||
| 107 | 107 | ||
| 108 | /* want handles we can use to read with first | 108 | /* want handles we can use to read with first |
| 109 | in the list so we do not have to walk the | 109 | in the list so we do not have to walk the |
| 110 | list to search for one in prepare_write */ | 110 | list to search for one in write_begin */ |
| 111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { | 111 | if ((file->f_flags & O_ACCMODE) == O_WRONLY) { |
| 112 | list_add_tail(&pCifsFile->flist, | 112 | list_add_tail(&pCifsFile->flist, |
| 113 | &pCifsInode->openFileList); | 113 | &pCifsInode->openFileList); |
| @@ -915,7 +915,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data, | |||
| 915 | } | 915 | } |
| 916 | 916 | ||
| 917 | static ssize_t cifs_write(struct file *file, const char *write_data, | 917 | static ssize_t cifs_write(struct file *file, const char *write_data, |
| 918 | size_t write_size, loff_t *poffset) | 918 | size_t write_size, loff_t *poffset) |
| 919 | { | 919 | { |
| 920 | int rc = 0; | 920 | int rc = 0; |
| 921 | unsigned int bytes_written = 0; | 921 | unsigned int bytes_written = 0; |
| @@ -1065,6 +1065,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode) | |||
| 1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | 1065 | struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) |
| 1066 | { | 1066 | { |
| 1067 | struct cifsFileInfo *open_file; | 1067 | struct cifsFileInfo *open_file; |
| 1068 | bool any_available = false; | ||
| 1068 | int rc; | 1069 | int rc; |
| 1069 | 1070 | ||
| 1070 | /* Having a null inode here (because mapping->host was set to zero by | 1071 | /* Having a null inode here (because mapping->host was set to zero by |
| @@ -1080,8 +1081,10 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode) | |||
| 1080 | read_lock(&GlobalSMBSeslock); | 1081 | read_lock(&GlobalSMBSeslock); |
| 1081 | refind_writable: | 1082 | refind_writable: |
| 1082 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { | 1083 | list_for_each_entry(open_file, &cifs_inode->openFileList, flist) { |
| 1083 | if (open_file->closePend) | 1084 | if (open_file->closePend || |
| 1085 | (!any_available && open_file->pid != current->tgid)) | ||
| 1084 | continue; | 1086 | continue; |
| 1087 | |||
| 1085 | if (open_file->pfile && | 1088 | if (open_file->pfile && |
| 1086 | ((open_file->pfile->f_flags & O_RDWR) || | 1089 | ((open_file->pfile->f_flags & O_RDWR) || |
| 1087 | (open_file->pfile->f_flags & O_WRONLY))) { | 1090 | (open_file->pfile->f_flags & O_WRONLY))) { |
| @@ -1131,6 +1134,11 @@ refind_writable: | |||
| 1131 | of the loop here. */ | 1134 | of the loop here. */ |
| 1132 | } | 1135 | } |
| 1133 | } | 1136 | } |
| 1137 | /* couldn't find useable FH with same pid, try any available */ | ||
| 1138 | if (!any_available) { | ||
| 1139 | any_available = true; | ||
| 1140 | goto refind_writable; | ||
| 1141 | } | ||
| 1134 | read_unlock(&GlobalSMBSeslock); | 1142 | read_unlock(&GlobalSMBSeslock); |
| 1135 | return NULL; | 1143 | return NULL; |
| 1136 | } | 1144 | } |
| @@ -1447,49 +1455,52 @@ static int cifs_writepage(struct page *page, struct writeback_control *wbc) | |||
| 1447 | return rc; | 1455 | return rc; |
| 1448 | } | 1456 | } |
| 1449 | 1457 | ||
| 1450 | static int cifs_commit_write(struct file *file, struct page *page, | 1458 | static int cifs_write_end(struct file *file, struct address_space *mapping, |
| 1451 | unsigned offset, unsigned to) | 1459 | loff_t pos, unsigned len, unsigned copied, |
| 1460 | struct page *page, void *fsdata) | ||
| 1452 | { | 1461 | { |
| 1453 | int xid; | 1462 | int rc; |
| 1454 | int rc = 0; | 1463 | struct inode *inode = mapping->host; |
| 1455 | struct inode *inode = page->mapping->host; | ||
| 1456 | loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to; | ||
| 1457 | char *page_data; | ||
| 1458 | 1464 | ||
| 1459 | xid = GetXid(); | 1465 | cFYI(1, ("write_end for page %p from pos %lld with %d bytes", |
| 1460 | cFYI(1, ("commit write for page %p up to position %lld for %d", | 1466 | page, pos, copied)); |
| 1461 | page, position, to)); | 1467 | |
| 1462 | spin_lock(&inode->i_lock); | 1468 | if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE) |
| 1463 | if (position > inode->i_size) | 1469 | SetPageUptodate(page); |
| 1464 | i_size_write(inode, position); | ||
| 1465 | 1470 | ||
| 1466 | spin_unlock(&inode->i_lock); | ||
| 1467 | if (!PageUptodate(page)) { | 1471 | if (!PageUptodate(page)) { |
| 1468 | position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset; | 1472 | char *page_data; |
| 1469 | /* can not rely on (or let) writepage write this data */ | 1473 | unsigned offset = pos & (PAGE_CACHE_SIZE - 1); |
| 1470 | if (to < offset) { | 1474 | int xid; |
| 1471 | cFYI(1, ("Illegal offsets, can not copy from %d to %d", | 1475 | |
| 1472 | offset, to)); | 1476 | xid = GetXid(); |
| 1473 | FreeXid(xid); | ||
| 1474 | return rc; | ||
| 1475 | } | ||
| 1476 | /* this is probably better than directly calling | 1477 | /* this is probably better than directly calling |
| 1477 | partialpage_write since in this function the file handle is | 1478 | partialpage_write since in this function the file handle is |
| 1478 | known which we might as well leverage */ | 1479 | known which we might as well leverage */ |
| 1479 | /* BB check if anything else missing out of ppw | 1480 | /* BB check if anything else missing out of ppw |
| 1480 | such as updating last write time */ | 1481 | such as updating last write time */ |
| 1481 | page_data = kmap(page); | 1482 | page_data = kmap(page); |
| 1482 | rc = cifs_write(file, page_data + offset, to-offset, | 1483 | rc = cifs_write(file, page_data + offset, copied, &pos); |
| 1483 | &position); | 1484 | /* if (rc < 0) should we set writebehind rc? */ |
| 1484 | if (rc > 0) | ||
| 1485 | rc = 0; | ||
| 1486 | /* else if (rc < 0) should we set writebehind rc? */ | ||
| 1487 | kunmap(page); | 1485 | kunmap(page); |
| 1486 | |||
| 1487 | FreeXid(xid); | ||
| 1488 | } else { | 1488 | } else { |
| 1489 | rc = copied; | ||
| 1490 | pos += copied; | ||
| 1489 | set_page_dirty(page); | 1491 | set_page_dirty(page); |
| 1490 | } | 1492 | } |
| 1491 | 1493 | ||
| 1492 | FreeXid(xid); | 1494 | if (rc > 0) { |
| 1495 | spin_lock(&inode->i_lock); | ||
| 1496 | if (pos > inode->i_size) | ||
| 1497 | i_size_write(inode, pos); | ||
| 1498 | spin_unlock(&inode->i_lock); | ||
| 1499 | } | ||
| 1500 | |||
| 1501 | unlock_page(page); | ||
| 1502 | page_cache_release(page); | ||
| 1503 | |||
| 1493 | return rc; | 1504 | return rc; |
| 1494 | } | 1505 | } |
| 1495 | 1506 | ||
| @@ -1780,7 +1791,7 @@ static void cifs_copy_cache_pages(struct address_space *mapping, | |||
| 1780 | SetPageUptodate(page); | 1791 | SetPageUptodate(page); |
| 1781 | unlock_page(page); | 1792 | unlock_page(page); |
| 1782 | if (!pagevec_add(plru_pvec, page)) | 1793 | if (!pagevec_add(plru_pvec, page)) |
| 1783 | __pagevec_lru_add(plru_pvec); | 1794 | __pagevec_lru_add_file(plru_pvec); |
| 1784 | data += PAGE_CACHE_SIZE; | 1795 | data += PAGE_CACHE_SIZE; |
| 1785 | } | 1796 | } |
| 1786 | return; | 1797 | return; |
| @@ -1914,7 +1925,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping, | |||
| 1914 | bytes_read = 0; | 1925 | bytes_read = 0; |
| 1915 | } | 1926 | } |
| 1916 | 1927 | ||
| 1917 | pagevec_lru_add(&lru_pvec); | 1928 | pagevec_lru_add_file(&lru_pvec); |
| 1918 | 1929 | ||
| 1919 | /* need to free smb_read_data buf before exit */ | 1930 | /* need to free smb_read_data buf before exit */ |
| 1920 | if (smb_read_data) { | 1931 | if (smb_read_data) { |
| @@ -2035,49 +2046,44 @@ bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file) | |||
| 2035 | return true; | 2046 | return true; |
| 2036 | } | 2047 | } |
| 2037 | 2048 | ||
| 2038 | static int cifs_prepare_write(struct file *file, struct page *page, | 2049 | static int cifs_write_begin(struct file *file, struct address_space *mapping, |
| 2039 | unsigned from, unsigned to) | 2050 | loff_t pos, unsigned len, unsigned flags, |
| 2051 | struct page **pagep, void **fsdata) | ||
| 2040 | { | 2052 | { |
| 2041 | int rc = 0; | 2053 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 2042 | loff_t i_size; | 2054 | loff_t offset = pos & (PAGE_CACHE_SIZE - 1); |
| 2043 | loff_t offset; | ||
| 2044 | 2055 | ||
| 2045 | cFYI(1, ("prepare write for page %p from %d to %d", page, from, to)); | 2056 | cFYI(1, ("write_begin from %lld len %d", (long long)pos, len)); |
| 2046 | if (PageUptodate(page)) | 2057 | |
| 2058 | *pagep = __grab_cache_page(mapping, index); | ||
| 2059 | if (!*pagep) | ||
| 2060 | return -ENOMEM; | ||
| 2061 | |||
| 2062 | if (PageUptodate(*pagep)) | ||
| 2047 | return 0; | 2063 | return 0; |
| 2048 | 2064 | ||
| 2049 | /* If we are writing a full page it will be up to date, | 2065 | /* If we are writing a full page it will be up to date, |
| 2050 | no need to read from the server */ | 2066 | no need to read from the server */ |
| 2051 | if ((to == PAGE_CACHE_SIZE) && (from == 0)) { | 2067 | if (len == PAGE_CACHE_SIZE && flags & AOP_FLAG_UNINTERRUPTIBLE) |
| 2052 | SetPageUptodate(page); | ||
| 2053 | return 0; | 2068 | return 0; |
| 2054 | } | ||
| 2055 | 2069 | ||
| 2056 | offset = (loff_t)page->index << PAGE_CACHE_SHIFT; | 2070 | if ((file->f_flags & O_ACCMODE) != O_WRONLY) { |
| 2057 | i_size = i_size_read(page->mapping->host); | 2071 | int rc; |
| 2058 | 2072 | ||
| 2059 | if ((offset >= i_size) || | ||
| 2060 | ((from == 0) && (offset + to) >= i_size)) { | ||
| 2061 | /* | ||
| 2062 | * We don't need to read data beyond the end of the file. | ||
| 2063 | * zero it, and set the page uptodate | ||
| 2064 | */ | ||
| 2065 | simple_prepare_write(file, page, from, to); | ||
| 2066 | SetPageUptodate(page); | ||
| 2067 | } else if ((file->f_flags & O_ACCMODE) != O_WRONLY) { | ||
| 2068 | /* might as well read a page, it is fast enough */ | 2073 | /* might as well read a page, it is fast enough */ |
| 2069 | rc = cifs_readpage_worker(file, page, &offset); | 2074 | rc = cifs_readpage_worker(file, *pagep, &offset); |
| 2075 | |||
| 2076 | /* we do not need to pass errors back | ||
| 2077 | e.g. if we do not have read access to the file | ||
| 2078 | because cifs_write_end will attempt synchronous writes | ||
| 2079 | -- shaggy */ | ||
| 2070 | } else { | 2080 | } else { |
| 2071 | /* we could try using another file handle if there is one - | 2081 | /* we could try using another file handle if there is one - |
| 2072 | but how would we lock it to prevent close of that handle | 2082 | but how would we lock it to prevent close of that handle |
| 2073 | racing with this read? In any case | 2083 | racing with this read? In any case |
| 2074 | this will be written out by commit_write so is fine */ | 2084 | this will be written out by write_end so is fine */ |
| 2075 | } | 2085 | } |
| 2076 | 2086 | ||
| 2077 | /* we do not need to pass errors back | ||
| 2078 | e.g. if we do not have read access to the file | ||
| 2079 | because cifs_commit_write will do the right thing. -- shaggy */ | ||
| 2080 | |||
| 2081 | return 0; | 2087 | return 0; |
| 2082 | } | 2088 | } |
| 2083 | 2089 | ||
| @@ -2086,8 +2092,8 @@ const struct address_space_operations cifs_addr_ops = { | |||
| 2086 | .readpages = cifs_readpages, | 2092 | .readpages = cifs_readpages, |
| 2087 | .writepage = cifs_writepage, | 2093 | .writepage = cifs_writepage, |
| 2088 | .writepages = cifs_writepages, | 2094 | .writepages = cifs_writepages, |
| 2089 | .prepare_write = cifs_prepare_write, | 2095 | .write_begin = cifs_write_begin, |
| 2090 | .commit_write = cifs_commit_write, | 2096 | .write_end = cifs_write_end, |
| 2091 | .set_page_dirty = __set_page_dirty_nobuffers, | 2097 | .set_page_dirty = __set_page_dirty_nobuffers, |
| 2092 | /* .sync_page = cifs_sync_page, */ | 2098 | /* .sync_page = cifs_sync_page, */ |
| 2093 | /* .direct_IO = */ | 2099 | /* .direct_IO = */ |
| @@ -2102,8 +2108,8 @@ const struct address_space_operations cifs_addr_ops_smallbuf = { | |||
| 2102 | .readpage = cifs_readpage, | 2108 | .readpage = cifs_readpage, |
| 2103 | .writepage = cifs_writepage, | 2109 | .writepage = cifs_writepage, |
| 2104 | .writepages = cifs_writepages, | 2110 | .writepages = cifs_writepages, |
| 2105 | .prepare_write = cifs_prepare_write, | 2111 | .write_begin = cifs_write_begin, |
| 2106 | .commit_write = cifs_commit_write, | 2112 | .write_end = cifs_write_end, |
| 2107 | .set_page_dirty = __set_page_dirty_nobuffers, | 2113 | .set_page_dirty = __set_page_dirty_nobuffers, |
| 2108 | /* .sync_page = cifs_sync_page, */ | 2114 | /* .sync_page = cifs_sync_page, */ |
| 2109 | /* .direct_IO = */ | 2115 | /* .direct_IO = */ |
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 9c548f110102..a8c833345fc9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c | |||
| @@ -665,40 +665,201 @@ struct inode *cifs_iget(struct super_block *sb, unsigned long ino) | |||
| 665 | return inode; | 665 | return inode; |
| 666 | } | 666 | } |
| 667 | 667 | ||
| 668 | int cifs_unlink(struct inode *inode, struct dentry *direntry) | 668 | static int |
| 669 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
| 670 | char *full_path, __u32 dosattr) | ||
| 671 | { | ||
| 672 | int rc; | ||
| 673 | int oplock = 0; | ||
| 674 | __u16 netfid; | ||
| 675 | __u32 netpid; | ||
| 676 | bool set_time = false; | ||
| 677 | struct cifsFileInfo *open_file; | ||
| 678 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
| 679 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
| 680 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
| 681 | FILE_BASIC_INFO info_buf; | ||
| 682 | |||
| 683 | if (attrs->ia_valid & ATTR_ATIME) { | ||
| 684 | set_time = true; | ||
| 685 | info_buf.LastAccessTime = | ||
| 686 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
| 687 | } else | ||
| 688 | info_buf.LastAccessTime = 0; | ||
| 689 | |||
| 690 | if (attrs->ia_valid & ATTR_MTIME) { | ||
| 691 | set_time = true; | ||
| 692 | info_buf.LastWriteTime = | ||
| 693 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
| 694 | } else | ||
| 695 | info_buf.LastWriteTime = 0; | ||
| 696 | |||
| 697 | /* | ||
| 698 | * Samba throws this field away, but windows may actually use it. | ||
| 699 | * Do not set ctime unless other time stamps are changed explicitly | ||
| 700 | * (i.e. by utimes()) since we would then have a mix of client and | ||
| 701 | * server times. | ||
| 702 | */ | ||
| 703 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
| 704 | cFYI(1, ("CIFS - CTIME changed")); | ||
| 705 | info_buf.ChangeTime = | ||
| 706 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
| 707 | } else | ||
| 708 | info_buf.ChangeTime = 0; | ||
| 709 | |||
| 710 | info_buf.CreationTime = 0; /* don't change */ | ||
| 711 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
| 712 | |||
| 713 | /* | ||
| 714 | * If the file is already open for write, just use that fileid | ||
| 715 | */ | ||
| 716 | open_file = find_writable_file(cifsInode); | ||
| 717 | if (open_file) { | ||
| 718 | netfid = open_file->netfid; | ||
| 719 | netpid = open_file->pid; | ||
| 720 | goto set_via_filehandle; | ||
| 721 | } | ||
| 722 | |||
| 723 | /* | ||
| 724 | * NT4 apparently returns success on this call, but it doesn't | ||
| 725 | * really work. | ||
| 726 | */ | ||
| 727 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
| 728 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
| 729 | &info_buf, cifs_sb->local_nls, | ||
| 730 | cifs_sb->mnt_cifs_flags & | ||
| 731 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 732 | if (rc == 0) { | ||
| 733 | cifsInode->cifsAttrs = dosattr; | ||
| 734 | goto out; | ||
| 735 | } else if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
| 736 | goto out; | ||
| 737 | } | ||
| 738 | |||
| 739 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
| 740 | "times not supported by this server")); | ||
| 741 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
| 742 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
| 743 | CREATE_NOT_DIR, &netfid, &oplock, | ||
| 744 | NULL, cifs_sb->local_nls, | ||
| 745 | cifs_sb->mnt_cifs_flags & | ||
| 746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 747 | |||
| 748 | if (rc != 0) { | ||
| 749 | if (rc == -EIO) | ||
| 750 | rc = -EINVAL; | ||
| 751 | goto out; | ||
| 752 | } | ||
| 753 | |||
| 754 | netpid = current->tgid; | ||
| 755 | |||
| 756 | set_via_filehandle: | ||
| 757 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
| 758 | if (!rc) | ||
| 759 | cifsInode->cifsAttrs = dosattr; | ||
| 760 | |||
| 761 | if (open_file == NULL) | ||
| 762 | CIFSSMBClose(xid, pTcon, netfid); | ||
| 763 | else | ||
| 764 | atomic_dec(&open_file->wrtPending); | ||
| 765 | out: | ||
| 766 | return rc; | ||
| 767 | } | ||
| 768 | |||
| 769 | /* | ||
| 770 | * open the given file (if it isn't already), set the DELETE_ON_CLOSE bit | ||
| 771 | * and rename it to a random name that hopefully won't conflict with | ||
| 772 | * anything else. | ||
| 773 | */ | ||
| 774 | static int | ||
| 775 | cifs_rename_pending_delete(char *full_path, struct inode *inode, int xid) | ||
| 776 | { | ||
| 777 | int oplock = 0; | ||
| 778 | int rc; | ||
| 779 | __u16 netfid; | ||
| 780 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
| 781 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
| 782 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
| 783 | __u32 dosattr; | ||
| 784 | FILE_BASIC_INFO *info_buf; | ||
| 785 | |||
| 786 | rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN, | ||
| 787 | DELETE|FILE_WRITE_ATTRIBUTES, | ||
| 788 | CREATE_NOT_DIR|CREATE_DELETE_ON_CLOSE, | ||
| 789 | &netfid, &oplock, NULL, cifs_sb->local_nls, | ||
| 790 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 791 | if (rc != 0) | ||
| 792 | goto out; | ||
| 793 | |||
| 794 | /* set ATTR_HIDDEN and clear ATTR_READONLY */ | ||
| 795 | cifsInode = CIFS_I(inode); | ||
| 796 | dosattr = cifsInode->cifsAttrs & ~ATTR_READONLY; | ||
| 797 | if (dosattr == 0) | ||
| 798 | dosattr |= ATTR_NORMAL; | ||
| 799 | dosattr |= ATTR_HIDDEN; | ||
| 800 | |||
| 801 | info_buf = kzalloc(sizeof(*info_buf), GFP_KERNEL); | ||
| 802 | if (info_buf == NULL) { | ||
| 803 | rc = -ENOMEM; | ||
| 804 | goto out_close; | ||
| 805 | } | ||
| 806 | info_buf->Attributes = cpu_to_le32(dosattr); | ||
| 807 | rc = CIFSSMBSetFileInfo(xid, tcon, info_buf, netfid, current->tgid); | ||
| 808 | kfree(info_buf); | ||
| 809 | if (rc != 0) | ||
| 810 | goto out_close; | ||
| 811 | cifsInode->cifsAttrs = dosattr; | ||
| 812 | |||
| 813 | /* silly-rename the file */ | ||
| 814 | CIFSSMBRenameOpenFile(xid, tcon, netfid, NULL, cifs_sb->local_nls, | ||
| 815 | cifs_sb->mnt_cifs_flags & | ||
| 816 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 817 | |||
| 818 | /* set DELETE_ON_CLOSE */ | ||
| 819 | rc = CIFSSMBSetFileDisposition(xid, tcon, true, netfid, current->tgid); | ||
| 820 | |||
| 821 | /* | ||
| 822 | * some samba versions return -ENOENT when we try to set the file | ||
| 823 | * disposition here. Likely a samba bug, but work around it for now | ||
| 824 | */ | ||
| 825 | if (rc == -ENOENT) | ||
| 826 | rc = 0; | ||
| 827 | |||
| 828 | out_close: | ||
| 829 | CIFSSMBClose(xid, tcon, netfid); | ||
| 830 | out: | ||
| 831 | return rc; | ||
| 832 | } | ||
| 833 | |||
| 834 | int cifs_unlink(struct inode *dir, struct dentry *dentry) | ||
| 669 | { | 835 | { |
| 670 | int rc = 0; | 836 | int rc = 0; |
| 671 | int xid; | 837 | int xid; |
| 672 | struct cifs_sb_info *cifs_sb; | ||
| 673 | struct cifsTconInfo *pTcon; | ||
| 674 | char *full_path = NULL; | 838 | char *full_path = NULL; |
| 675 | struct cifsInodeInfo *cifsInode; | 839 | struct inode *inode = dentry->d_inode; |
| 676 | FILE_BASIC_INFO *pinfo_buf; | 840 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); |
| 841 | struct super_block *sb = dir->i_sb; | ||
| 842 | struct cifs_sb_info *cifs_sb = CIFS_SB(sb); | ||
| 843 | struct cifsTconInfo *tcon = cifs_sb->tcon; | ||
| 844 | struct iattr *attrs = NULL; | ||
| 845 | __u32 dosattr = 0, origattr = 0; | ||
| 677 | 846 | ||
| 678 | cFYI(1, ("cifs_unlink, inode = 0x%p", inode)); | 847 | cFYI(1, ("cifs_unlink, dir=0x%p, dentry=0x%p", dir, dentry)); |
| 679 | 848 | ||
| 680 | xid = GetXid(); | 849 | xid = GetXid(); |
| 681 | 850 | ||
| 682 | if (inode) | 851 | /* Unlink can be called from rename so we can not take the |
| 683 | cifs_sb = CIFS_SB(inode->i_sb); | 852 | * sb->s_vfs_rename_mutex here */ |
| 684 | else | 853 | full_path = build_path_from_dentry(dentry); |
| 685 | cifs_sb = CIFS_SB(direntry->d_sb); | ||
| 686 | pTcon = cifs_sb->tcon; | ||
| 687 | |||
| 688 | /* Unlink can be called from rename so we can not grab the sem here | ||
| 689 | since we deadlock otherwise */ | ||
| 690 | /* mutex_lock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
| 691 | full_path = build_path_from_dentry(direntry); | ||
| 692 | /* mutex_unlock(&direntry->d_sb->s_vfs_rename_mutex);*/ | ||
| 693 | if (full_path == NULL) { | 854 | if (full_path == NULL) { |
| 694 | FreeXid(xid); | 855 | FreeXid(xid); |
| 695 | return -ENOMEM; | 856 | return -ENOMEM; |
| 696 | } | 857 | } |
| 697 | 858 | ||
| 698 | if ((pTcon->ses->capabilities & CAP_UNIX) && | 859 | if ((tcon->ses->capabilities & CAP_UNIX) && |
| 699 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & | 860 | (CIFS_UNIX_POSIX_PATH_OPS_CAP & |
| 700 | le64_to_cpu(pTcon->fsUnixInfo.Capability))) { | 861 | le64_to_cpu(tcon->fsUnixInfo.Capability))) { |
| 701 | rc = CIFSPOSIXDelFile(xid, pTcon, full_path, | 862 | rc = CIFSPOSIXDelFile(xid, tcon, full_path, |
| 702 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, | 863 | SMB_POSIX_UNLINK_FILE_TARGET, cifs_sb->local_nls, |
| 703 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 864 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 704 | cFYI(1, ("posix del rc %d", rc)); | 865 | cFYI(1, ("posix del rc %d", rc)); |
| @@ -706,125 +867,60 @@ int cifs_unlink(struct inode *inode, struct dentry *direntry) | |||
| 706 | goto psx_del_no_retry; | 867 | goto psx_del_no_retry; |
| 707 | } | 868 | } |
| 708 | 869 | ||
| 709 | rc = CIFSSMBDelFile(xid, pTcon, full_path, cifs_sb->local_nls, | 870 | retry_std_delete: |
| 871 | rc = CIFSSMBDelFile(xid, tcon, full_path, cifs_sb->local_nls, | ||
| 710 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); | 872 | cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 873 | |||
| 711 | psx_del_no_retry: | 874 | psx_del_no_retry: |
| 712 | if (!rc) { | 875 | if (!rc) { |
| 713 | if (direntry->d_inode) | 876 | if (inode) |
| 714 | drop_nlink(direntry->d_inode); | 877 | drop_nlink(inode); |
| 715 | } else if (rc == -ENOENT) { | 878 | } else if (rc == -ENOENT) { |
| 716 | d_drop(direntry); | 879 | d_drop(dentry); |
| 717 | } else if (rc == -ETXTBSY) { | 880 | } else if (rc == -ETXTBSY) { |
| 718 | int oplock = 0; | 881 | rc = cifs_rename_pending_delete(full_path, inode, xid); |
| 719 | __u16 netfid; | 882 | if (rc == 0) |
| 720 | 883 | drop_nlink(inode); | |
| 721 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, DELETE, | 884 | } else if (rc == -EACCES && dosattr == 0) { |
| 722 | CREATE_NOT_DIR | CREATE_DELETE_ON_CLOSE, | 885 | attrs = kzalloc(sizeof(*attrs), GFP_KERNEL); |
| 723 | &netfid, &oplock, NULL, cifs_sb->local_nls, | 886 | if (attrs == NULL) { |
| 724 | cifs_sb->mnt_cifs_flags & | 887 | rc = -ENOMEM; |
| 725 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 888 | goto out_reval; |
| 726 | if (rc == 0) { | ||
| 727 | CIFSSMBRenameOpenFile(xid, pTcon, netfid, NULL, | ||
| 728 | cifs_sb->local_nls, | ||
| 729 | cifs_sb->mnt_cifs_flags & | ||
| 730 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 731 | CIFSSMBClose(xid, pTcon, netfid); | ||
| 732 | if (direntry->d_inode) | ||
| 733 | drop_nlink(direntry->d_inode); | ||
| 734 | } | 889 | } |
| 735 | } else if (rc == -EACCES) { | ||
| 736 | /* try only if r/o attribute set in local lookup data? */ | ||
| 737 | pinfo_buf = kzalloc(sizeof(FILE_BASIC_INFO), GFP_KERNEL); | ||
| 738 | if (pinfo_buf) { | ||
| 739 | /* ATTRS set to normal clears r/o bit */ | ||
| 740 | pinfo_buf->Attributes = cpu_to_le32(ATTR_NORMAL); | ||
| 741 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) | ||
| 742 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
| 743 | pinfo_buf, | ||
| 744 | cifs_sb->local_nls, | ||
| 745 | cifs_sb->mnt_cifs_flags & | ||
| 746 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 747 | else | ||
| 748 | rc = -EOPNOTSUPP; | ||
| 749 | 890 | ||
| 750 | if (rc == -EOPNOTSUPP) { | 891 | /* try to reset dos attributes */ |
| 751 | int oplock = 0; | 892 | origattr = cifsInode->cifsAttrs; |
| 752 | __u16 netfid; | 893 | if (origattr == 0) |
| 753 | /* rc = CIFSSMBSetAttrLegacy(xid, pTcon, | 894 | origattr |= ATTR_NORMAL; |
| 754 | full_path, | 895 | dosattr = origattr & ~ATTR_READONLY; |
| 755 | (__u16)ATTR_NORMAL, | 896 | if (dosattr == 0) |
| 756 | cifs_sb->local_nls); | 897 | dosattr |= ATTR_NORMAL; |
| 757 | For some strange reason it seems that NT4 eats the | 898 | dosattr |= ATTR_HIDDEN; |
| 758 | old setattr call without actually setting the | 899 | |
| 759 | attributes so on to the third attempted workaround | 900 | rc = cifs_set_file_info(inode, attrs, xid, full_path, dosattr); |
| 760 | */ | 901 | if (rc != 0) |
| 761 | 902 | goto out_reval; | |
| 762 | /* BB could scan to see if we already have it open | 903 | |
| 763 | and pass in pid of opener to function */ | 904 | goto retry_std_delete; |
| 764 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
| 765 | FILE_OPEN, SYNCHRONIZE | | ||
| 766 | FILE_WRITE_ATTRIBUTES, 0, | ||
| 767 | &netfid, &oplock, NULL, | ||
| 768 | cifs_sb->local_nls, | ||
| 769 | cifs_sb->mnt_cifs_flags & | ||
| 770 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 771 | if (rc == 0) { | ||
| 772 | rc = CIFSSMBSetFileInfo(xid, pTcon, | ||
| 773 | pinfo_buf, | ||
| 774 | netfid, | ||
| 775 | current->tgid); | ||
| 776 | CIFSSMBClose(xid, pTcon, netfid); | ||
| 777 | } | ||
| 778 | } | ||
| 779 | kfree(pinfo_buf); | ||
| 780 | } | ||
| 781 | if (rc == 0) { | ||
| 782 | rc = CIFSSMBDelFile(xid, pTcon, full_path, | ||
| 783 | cifs_sb->local_nls, | ||
| 784 | cifs_sb->mnt_cifs_flags & | ||
| 785 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 786 | if (!rc) { | ||
| 787 | if (direntry->d_inode) | ||
| 788 | drop_nlink(direntry->d_inode); | ||
| 789 | } else if (rc == -ETXTBSY) { | ||
| 790 | int oplock = 0; | ||
| 791 | __u16 netfid; | ||
| 792 | |||
| 793 | rc = CIFSSMBOpen(xid, pTcon, full_path, | ||
| 794 | FILE_OPEN, DELETE, | ||
| 795 | CREATE_NOT_DIR | | ||
| 796 | CREATE_DELETE_ON_CLOSE, | ||
| 797 | &netfid, &oplock, NULL, | ||
| 798 | cifs_sb->local_nls, | ||
| 799 | cifs_sb->mnt_cifs_flags & | ||
| 800 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 801 | if (rc == 0) { | ||
| 802 | CIFSSMBRenameOpenFile(xid, pTcon, | ||
| 803 | netfid, NULL, | ||
| 804 | cifs_sb->local_nls, | ||
| 805 | cifs_sb->mnt_cifs_flags & | ||
| 806 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 807 | CIFSSMBClose(xid, pTcon, netfid); | ||
| 808 | if (direntry->d_inode) | ||
| 809 | drop_nlink(direntry->d_inode); | ||
| 810 | } | ||
| 811 | /* BB if rc = -ETXTBUSY goto the rename logic BB */ | ||
| 812 | } | ||
| 813 | } | ||
| 814 | } | ||
| 815 | if (direntry->d_inode) { | ||
| 816 | cifsInode = CIFS_I(direntry->d_inode); | ||
| 817 | cifsInode->time = 0; /* will force revalidate to get info | ||
| 818 | when needed */ | ||
| 819 | direntry->d_inode->i_ctime = current_fs_time(inode->i_sb); | ||
| 820 | } | 905 | } |
| 906 | |||
| 907 | /* undo the setattr if we errored out and it's needed */ | ||
| 908 | if (rc != 0 && dosattr != 0) | ||
| 909 | cifs_set_file_info(inode, attrs, xid, full_path, origattr); | ||
| 910 | |||
| 911 | out_reval: | ||
| 821 | if (inode) { | 912 | if (inode) { |
| 822 | inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); | ||
| 823 | cifsInode = CIFS_I(inode); | 913 | cifsInode = CIFS_I(inode); |
| 824 | cifsInode->time = 0; /* force revalidate of dir as well */ | 914 | cifsInode->time = 0; /* will force revalidate to get info |
| 915 | when needed */ | ||
| 916 | inode->i_ctime = current_fs_time(sb); | ||
| 825 | } | 917 | } |
| 918 | dir->i_ctime = dir->i_mtime = current_fs_time(sb); | ||
| 919 | cifsInode = CIFS_I(dir); | ||
| 920 | CIFS_I(dir)->time = 0; /* force revalidate of dir as well */ | ||
| 826 | 921 | ||
| 827 | kfree(full_path); | 922 | kfree(full_path); |
| 923 | kfree(attrs); | ||
| 828 | FreeXid(xid); | 924 | FreeXid(xid); |
| 829 | return rc; | 925 | return rc; |
| 830 | } | 926 | } |
| @@ -869,7 +965,7 @@ static void posix_fill_in_inode(struct inode *tmp_inode, | |||
| 869 | 965 | ||
| 870 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | 966 | int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) |
| 871 | { | 967 | { |
| 872 | int rc = 0; | 968 | int rc = 0, tmprc; |
| 873 | int xid; | 969 | int xid; |
| 874 | struct cifs_sb_info *cifs_sb; | 970 | struct cifs_sb_info *cifs_sb; |
| 875 | struct cifsTconInfo *pTcon; | 971 | struct cifsTconInfo *pTcon; |
| @@ -931,6 +1027,7 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) | |||
| 931 | kfree(pInfo); | 1027 | kfree(pInfo); |
| 932 | goto mkdir_get_info; | 1028 | goto mkdir_get_info; |
| 933 | } | 1029 | } |
| 1030 | |||
| 934 | /* Is an i_ino of zero legal? */ | 1031 | /* Is an i_ino of zero legal? */ |
| 935 | /* Are there sanity checks we can use to ensure that | 1032 | /* Are there sanity checks we can use to ensure that |
| 936 | the server is really filling in that field? */ | 1033 | the server is really filling in that field? */ |
| @@ -1019,12 +1116,20 @@ mkdir_get_info: | |||
| 1019 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && | 1116 | if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) && |
| 1020 | (mode & S_IWUGO) == 0) { | 1117 | (mode & S_IWUGO) == 0) { |
| 1021 | FILE_BASIC_INFO pInfo; | 1118 | FILE_BASIC_INFO pInfo; |
| 1119 | struct cifsInodeInfo *cifsInode; | ||
| 1120 | u32 dosattrs; | ||
| 1121 | |||
| 1022 | memset(&pInfo, 0, sizeof(pInfo)); | 1122 | memset(&pInfo, 0, sizeof(pInfo)); |
| 1023 | pInfo.Attributes = cpu_to_le32(ATTR_READONLY); | 1123 | cifsInode = CIFS_I(newinode); |
| 1024 | CIFSSMBSetPathInfo(xid, pTcon, full_path, | 1124 | dosattrs = cifsInode->cifsAttrs|ATTR_READONLY; |
| 1025 | &pInfo, cifs_sb->local_nls, | 1125 | pInfo.Attributes = cpu_to_le32(dosattrs); |
| 1126 | tmprc = CIFSSMBSetPathInfo(xid, pTcon, | ||
| 1127 | full_path, &pInfo, | ||
| 1128 | cifs_sb->local_nls, | ||
| 1026 | cifs_sb->mnt_cifs_flags & | 1129 | cifs_sb->mnt_cifs_flags & |
| 1027 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1130 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 1131 | if (tmprc == 0) | ||
| 1132 | cifsInode->cifsAttrs = dosattrs; | ||
| 1028 | } | 1133 | } |
| 1029 | if (direntry->d_inode) { | 1134 | if (direntry->d_inode) { |
| 1030 | if (cifs_sb->mnt_cifs_flags & | 1135 | if (cifs_sb->mnt_cifs_flags & |
| @@ -1096,117 +1201,141 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry) | |||
| 1096 | return rc; | 1201 | return rc; |
| 1097 | } | 1202 | } |
| 1098 | 1203 | ||
| 1204 | static int | ||
| 1205 | cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath, | ||
| 1206 | struct dentry *to_dentry, const char *toPath) | ||
| 1207 | { | ||
| 1208 | struct cifs_sb_info *cifs_sb = CIFS_SB(from_dentry->d_sb); | ||
| 1209 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
| 1210 | __u16 srcfid; | ||
| 1211 | int oplock, rc; | ||
| 1212 | |||
| 1213 | /* try path-based rename first */ | ||
| 1214 | rc = CIFSSMBRename(xid, pTcon, fromPath, toPath, cifs_sb->local_nls, | ||
| 1215 | cifs_sb->mnt_cifs_flags & | ||
| 1216 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1217 | |||
| 1218 | /* | ||
| 1219 | * don't bother with rename by filehandle unless file is busy and | ||
| 1220 | * source Note that cross directory moves do not work with | ||
| 1221 | * rename by filehandle to various Windows servers. | ||
| 1222 | */ | ||
| 1223 | if (rc == 0 || rc != -ETXTBSY) | ||
| 1224 | return rc; | ||
| 1225 | |||
| 1226 | /* open the file to be renamed -- we need DELETE perms */ | ||
| 1227 | rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE, | ||
| 1228 | CREATE_NOT_DIR, &srcfid, &oplock, NULL, | ||
| 1229 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
| 1230 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1231 | |||
| 1232 | if (rc == 0) { | ||
| 1233 | rc = CIFSSMBRenameOpenFile(xid, pTcon, srcfid, | ||
| 1234 | (const char *) to_dentry->d_name.name, | ||
| 1235 | cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & | ||
| 1236 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1237 | |||
| 1238 | CIFSSMBClose(xid, pTcon, srcfid); | ||
| 1239 | } | ||
| 1240 | |||
| 1241 | return rc; | ||
| 1242 | } | ||
| 1243 | |||
| 1099 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, | 1244 | int cifs_rename(struct inode *source_inode, struct dentry *source_direntry, |
| 1100 | struct inode *target_inode, struct dentry *target_direntry) | 1245 | struct inode *target_inode, struct dentry *target_direntry) |
| 1101 | { | 1246 | { |
| 1102 | char *fromName; | 1247 | char *fromName = NULL; |
| 1103 | char *toName; | 1248 | char *toName = NULL; |
| 1104 | struct cifs_sb_info *cifs_sb_source; | 1249 | struct cifs_sb_info *cifs_sb_source; |
| 1105 | struct cifs_sb_info *cifs_sb_target; | 1250 | struct cifs_sb_info *cifs_sb_target; |
| 1106 | struct cifsTconInfo *pTcon; | 1251 | struct cifsTconInfo *pTcon; |
| 1252 | FILE_UNIX_BASIC_INFO *info_buf_source = NULL; | ||
| 1253 | FILE_UNIX_BASIC_INFO *info_buf_target; | ||
| 1107 | int xid; | 1254 | int xid; |
| 1108 | int rc = 0; | 1255 | int rc; |
| 1109 | |||
| 1110 | xid = GetXid(); | ||
| 1111 | 1256 | ||
| 1112 | cifs_sb_target = CIFS_SB(target_inode->i_sb); | 1257 | cifs_sb_target = CIFS_SB(target_inode->i_sb); |
| 1113 | cifs_sb_source = CIFS_SB(source_inode->i_sb); | 1258 | cifs_sb_source = CIFS_SB(source_inode->i_sb); |
| 1114 | pTcon = cifs_sb_source->tcon; | 1259 | pTcon = cifs_sb_source->tcon; |
| 1115 | 1260 | ||
| 1261 | xid = GetXid(); | ||
| 1262 | |||
| 1263 | /* | ||
| 1264 | * BB: this might be allowed if same server, but different share. | ||
| 1265 | * Consider adding support for this | ||
| 1266 | */ | ||
| 1116 | if (pTcon != cifs_sb_target->tcon) { | 1267 | if (pTcon != cifs_sb_target->tcon) { |
| 1117 | FreeXid(xid); | 1268 | rc = -EXDEV; |
| 1118 | return -EXDEV; /* BB actually could be allowed if same server, | 1269 | goto cifs_rename_exit; |
| 1119 | but different share. | ||
| 1120 | Might eventually add support for this */ | ||
| 1121 | } | 1270 | } |
| 1122 | 1271 | ||
| 1123 | /* we already have the rename sem so we do not need to grab it again | 1272 | /* |
| 1124 | here to protect the path integrity */ | 1273 | * we already have the rename sem so we do not need to |
| 1274 | * grab it again here to protect the path integrity | ||
| 1275 | */ | ||
| 1125 | fromName = build_path_from_dentry(source_direntry); | 1276 | fromName = build_path_from_dentry(source_direntry); |
| 1277 | if (fromName == NULL) { | ||
| 1278 | rc = -ENOMEM; | ||
| 1279 | goto cifs_rename_exit; | ||
| 1280 | } | ||
| 1281 | |||
| 1126 | toName = build_path_from_dentry(target_direntry); | 1282 | toName = build_path_from_dentry(target_direntry); |
| 1127 | if ((fromName == NULL) || (toName == NULL)) { | 1283 | if (toName == NULL) { |
| 1128 | rc = -ENOMEM; | 1284 | rc = -ENOMEM; |
| 1129 | goto cifs_rename_exit; | 1285 | goto cifs_rename_exit; |
| 1130 | } | 1286 | } |
| 1131 | 1287 | ||
| 1132 | rc = CIFSSMBRename(xid, pTcon, fromName, toName, | 1288 | rc = cifs_do_rename(xid, source_direntry, fromName, |
| 1133 | cifs_sb_source->local_nls, | 1289 | target_direntry, toName); |
| 1134 | cifs_sb_source->mnt_cifs_flags & | 1290 | |
| 1135 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1136 | if (rc == -EEXIST) { | 1291 | if (rc == -EEXIST) { |
| 1137 | /* check if they are the same file because rename of hardlinked | 1292 | if (pTcon->unix_ext) { |
| 1138 | files is a noop */ | 1293 | /* |
| 1139 | FILE_UNIX_BASIC_INFO *info_buf_source; | 1294 | * Are src and dst hardlinks of same inode? We can |
| 1140 | FILE_UNIX_BASIC_INFO *info_buf_target; | 1295 | * only tell with unix extensions enabled |
| 1141 | 1296 | */ | |
| 1142 | info_buf_source = | 1297 | info_buf_source = |
| 1143 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL); | 1298 | kmalloc(2 * sizeof(FILE_UNIX_BASIC_INFO), |
| 1144 | if (info_buf_source != NULL) { | 1299 | GFP_KERNEL); |
| 1300 | if (info_buf_source == NULL) | ||
| 1301 | goto unlink_target; | ||
| 1302 | |||
| 1145 | info_buf_target = info_buf_source + 1; | 1303 | info_buf_target = info_buf_source + 1; |
| 1146 | if (pTcon->unix_ext) | 1304 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, |
| 1147 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, fromName, | 1305 | info_buf_source, |
| 1148 | info_buf_source, | 1306 | cifs_sb_source->local_nls, |
| 1149 | cifs_sb_source->local_nls, | 1307 | cifs_sb_source->mnt_cifs_flags & |
| 1150 | cifs_sb_source->mnt_cifs_flags & | ||
| 1151 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1308 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 1152 | /* else rc is still EEXIST so will fall through to | 1309 | if (rc != 0) |
| 1153 | unlink the target and retry rename */ | 1310 | goto unlink_target; |
| 1154 | if (rc == 0) { | 1311 | |
| 1155 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, toName, | 1312 | rc = CIFSSMBUnixQPathInfo(xid, pTcon, |
| 1156 | info_buf_target, | 1313 | toName, info_buf_target, |
| 1157 | cifs_sb_target->local_nls, | 1314 | cifs_sb_target->local_nls, |
| 1158 | /* remap based on source sb */ | 1315 | /* remap based on source sb */ |
| 1159 | cifs_sb_source->mnt_cifs_flags & | 1316 | cifs_sb_source->mnt_cifs_flags & |
| 1160 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1161 | } | ||
| 1162 | if ((rc == 0) && | ||
| 1163 | (info_buf_source->UniqueId == | ||
| 1164 | info_buf_target->UniqueId)) { | ||
| 1165 | /* do not rename since the files are hardlinked which | ||
| 1166 | is a noop */ | ||
| 1167 | } else { | ||
| 1168 | /* we either can not tell the files are hardlinked | ||
| 1169 | (as with Windows servers) or files are not | ||
| 1170 | hardlinked so delete the target manually before | ||
| 1171 | renaming to follow POSIX rather than Windows | ||
| 1172 | semantics */ | ||
| 1173 | cifs_unlink(target_inode, target_direntry); | ||
| 1174 | rc = CIFSSMBRename(xid, pTcon, fromName, | ||
| 1175 | toName, | ||
| 1176 | cifs_sb_source->local_nls, | ||
| 1177 | cifs_sb_source->mnt_cifs_flags | ||
| 1178 | & CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1179 | } | ||
| 1180 | kfree(info_buf_source); | ||
| 1181 | } /* if we can not get memory just leave rc as EEXIST */ | ||
| 1182 | } | ||
| 1183 | |||
| 1184 | if (rc) | ||
| 1185 | cFYI(1, ("rename rc %d", rc)); | ||
| 1186 | |||
| 1187 | if ((rc == -EIO) || (rc == -EEXIST)) { | ||
| 1188 | int oplock = 0; | ||
| 1189 | __u16 netfid; | ||
| 1190 | |||
| 1191 | /* BB FIXME Is Generic Read correct for rename? */ | ||
| 1192 | /* if renaming directory - we should not say CREATE_NOT_DIR, | ||
| 1193 | need to test renaming open directory, also GENERIC_READ | ||
| 1194 | might not right be right access to request */ | ||
| 1195 | rc = CIFSSMBOpen(xid, pTcon, fromName, FILE_OPEN, GENERIC_READ, | ||
| 1196 | CREATE_NOT_DIR, &netfid, &oplock, NULL, | ||
| 1197 | cifs_sb_source->local_nls, | ||
| 1198 | cifs_sb_source->mnt_cifs_flags & | ||
| 1199 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1200 | if (rc == 0) { | ||
| 1201 | rc = CIFSSMBRenameOpenFile(xid, pTcon, netfid, toName, | ||
| 1202 | cifs_sb_source->local_nls, | ||
| 1203 | cifs_sb_source->mnt_cifs_flags & | ||
| 1204 | CIFS_MOUNT_MAP_SPECIAL_CHR); | 1317 | CIFS_MOUNT_MAP_SPECIAL_CHR); |
| 1205 | CIFSSMBClose(xid, pTcon, netfid); | 1318 | |
| 1206 | } | 1319 | if (rc == 0 && (info_buf_source->UniqueId == |
| 1320 | info_buf_target->UniqueId)) | ||
| 1321 | /* same file, POSIX says that this is a noop */ | ||
| 1322 | goto cifs_rename_exit; | ||
| 1323 | } /* else ... BB we could add the same check for Windows by | ||
| 1324 | checking the UniqueId via FILE_INTERNAL_INFO */ | ||
| 1325 | unlink_target: | ||
| 1326 | /* | ||
| 1327 | * we either can not tell the files are hardlinked (as with | ||
| 1328 | * Windows servers) or files are not hardlinked. Delete the | ||
| 1329 | * target manually before renaming to follow POSIX rather than | ||
| 1330 | * Windows semantics | ||
| 1331 | */ | ||
| 1332 | cifs_unlink(target_inode, target_direntry); | ||
| 1333 | rc = cifs_do_rename(xid, source_direntry, fromName, | ||
| 1334 | target_direntry, toName); | ||
| 1207 | } | 1335 | } |
| 1208 | 1336 | ||
| 1209 | cifs_rename_exit: | 1337 | cifs_rename_exit: |
| 1338 | kfree(info_buf_source); | ||
| 1210 | kfree(fromName); | 1339 | kfree(fromName); |
| 1211 | kfree(toName); | 1340 | kfree(toName); |
| 1212 | FreeXid(xid); | 1341 | FreeXid(xid); |
| @@ -1507,101 +1636,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, | |||
| 1507 | } | 1636 | } |
| 1508 | 1637 | ||
| 1509 | static int | 1638 | static int |
| 1510 | cifs_set_file_info(struct inode *inode, struct iattr *attrs, int xid, | ||
| 1511 | char *full_path, __u32 dosattr) | ||
| 1512 | { | ||
| 1513 | int rc; | ||
| 1514 | int oplock = 0; | ||
| 1515 | __u16 netfid; | ||
| 1516 | __u32 netpid; | ||
| 1517 | bool set_time = false; | ||
| 1518 | struct cifsFileInfo *open_file; | ||
| 1519 | struct cifsInodeInfo *cifsInode = CIFS_I(inode); | ||
| 1520 | struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); | ||
| 1521 | struct cifsTconInfo *pTcon = cifs_sb->tcon; | ||
| 1522 | FILE_BASIC_INFO info_buf; | ||
| 1523 | |||
| 1524 | if (attrs->ia_valid & ATTR_ATIME) { | ||
| 1525 | set_time = true; | ||
| 1526 | info_buf.LastAccessTime = | ||
| 1527 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_atime)); | ||
| 1528 | } else | ||
| 1529 | info_buf.LastAccessTime = 0; | ||
| 1530 | |||
| 1531 | if (attrs->ia_valid & ATTR_MTIME) { | ||
| 1532 | set_time = true; | ||
| 1533 | info_buf.LastWriteTime = | ||
| 1534 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_mtime)); | ||
| 1535 | } else | ||
| 1536 | info_buf.LastWriteTime = 0; | ||
| 1537 | |||
| 1538 | /* | ||
| 1539 | * Samba throws this field away, but windows may actually use it. | ||
| 1540 | * Do not set ctime unless other time stamps are changed explicitly | ||
| 1541 | * (i.e. by utimes()) since we would then have a mix of client and | ||
| 1542 | * server times. | ||
| 1543 | */ | ||
| 1544 | if (set_time && (attrs->ia_valid & ATTR_CTIME)) { | ||
| 1545 | cFYI(1, ("CIFS - CTIME changed")); | ||
| 1546 | info_buf.ChangeTime = | ||
| 1547 | cpu_to_le64(cifs_UnixTimeToNT(attrs->ia_ctime)); | ||
| 1548 | } else | ||
| 1549 | info_buf.ChangeTime = 0; | ||
| 1550 | |||
| 1551 | info_buf.CreationTime = 0; /* don't change */ | ||
| 1552 | info_buf.Attributes = cpu_to_le32(dosattr); | ||
| 1553 | |||
| 1554 | /* | ||
| 1555 | * If the file is already open for write, just use that fileid | ||
| 1556 | */ | ||
| 1557 | open_file = find_writable_file(cifsInode); | ||
| 1558 | if (open_file) { | ||
| 1559 | netfid = open_file->netfid; | ||
| 1560 | netpid = open_file->pid; | ||
| 1561 | goto set_via_filehandle; | ||
| 1562 | } | ||
| 1563 | |||
| 1564 | /* | ||
| 1565 | * NT4 apparently returns success on this call, but it doesn't | ||
| 1566 | * really work. | ||
| 1567 | */ | ||
| 1568 | if (!(pTcon->ses->flags & CIFS_SES_NT4)) { | ||
| 1569 | rc = CIFSSMBSetPathInfo(xid, pTcon, full_path, | ||
| 1570 | &info_buf, cifs_sb->local_nls, | ||
| 1571 | cifs_sb->mnt_cifs_flags & | ||
| 1572 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1573 | if (rc != -EOPNOTSUPP && rc != -EINVAL) | ||
| 1574 | goto out; | ||
| 1575 | } | ||
| 1576 | |||
| 1577 | cFYI(1, ("calling SetFileInfo since SetPathInfo for " | ||
| 1578 | "times not supported by this server")); | ||
| 1579 | rc = CIFSSMBOpen(xid, pTcon, full_path, FILE_OPEN, | ||
| 1580 | SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, | ||
| 1581 | CREATE_NOT_DIR, &netfid, &oplock, | ||
| 1582 | NULL, cifs_sb->local_nls, | ||
| 1583 | cifs_sb->mnt_cifs_flags & | ||
| 1584 | CIFS_MOUNT_MAP_SPECIAL_CHR); | ||
| 1585 | |||
| 1586 | if (rc != 0) { | ||
| 1587 | if (rc == -EIO) | ||
| 1588 | rc = -EINVAL; | ||
| 1589 | goto out; | ||
| 1590 | } | ||
| 1591 | |||
| 1592 | netpid = current->tgid; | ||
| 1593 | |||
| 1594 | set_via_filehandle: | ||
| 1595 | rc = CIFSSMBSetFileInfo(xid, pTcon, &info_buf, netfid, netpid); | ||
| 1596 | if (open_file == NULL) | ||
| 1597 | CIFSSMBClose(xid, pTcon, netfid); | ||
| 1598 | else | ||
| 1599 | atomic_dec(&open_file->wrtPending); | ||
| 1600 | out: | ||
| 1601 | return rc; | ||
| 1602 | } | ||
| 1603 | |||
| 1604 | static int | ||
| 1605 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) | 1639 | cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) |
| 1606 | { | 1640 | { |
| 1607 | int rc; | 1641 | int rc; |
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index 4b17f8fe3157..88786ba02d27 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c | |||
| @@ -150,8 +150,7 @@ cifs_buf_get(void) | |||
| 150 | but it may be more efficient to always alloc same size | 150 | but it may be more efficient to always alloc same size |
| 151 | albeit slightly larger than necessary and maxbuffersize | 151 | albeit slightly larger than necessary and maxbuffersize |
| 152 | defaults to this and can not be bigger */ | 152 | defaults to this and can not be bigger */ |
| 153 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_req_poolp, | 153 | ret_buf = mempool_alloc(cifs_req_poolp, GFP_NOFS); |
| 154 | GFP_KERNEL | GFP_NOFS); | ||
| 155 | 154 | ||
| 156 | /* clear the first few header bytes */ | 155 | /* clear the first few header bytes */ |
| 157 | /* for most paths, more is cleared in header_assemble */ | 156 | /* for most paths, more is cleared in header_assemble */ |
| @@ -188,8 +187,7 @@ cifs_small_buf_get(void) | |||
| 188 | but it may be more efficient to always alloc same size | 187 | but it may be more efficient to always alloc same size |
| 189 | albeit slightly larger than necessary and maxbuffersize | 188 | albeit slightly larger than necessary and maxbuffersize |
| 190 | defaults to this and can not be bigger */ | 189 | defaults to this and can not be bigger */ |
| 191 | ret_buf = (struct smb_hdr *) mempool_alloc(cifs_sm_req_poolp, | 190 | ret_buf = mempool_alloc(cifs_sm_req_poolp, GFP_NOFS); |
| 192 | GFP_KERNEL | GFP_NOFS); | ||
| 193 | if (ret_buf) { | 191 | if (ret_buf) { |
| 194 | /* No need to clear memory here, cleared in header assemble */ | 192 | /* No need to clear memory here, cleared in header assemble */ |
| 195 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ | 193 | /* memset(ret_buf, 0, sizeof(struct smb_hdr) + 27);*/ |
| @@ -313,8 +311,6 @@ header_assemble(struct smb_hdr *buffer, char smb_command /* command */ , | |||
| 313 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; | 311 | buffer->Flags2 = SMBFLG2_KNOWS_LONG_NAMES; |
| 314 | buffer->Pid = cpu_to_le16((__u16)current->tgid); | 312 | buffer->Pid = cpu_to_le16((__u16)current->tgid); |
| 315 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); | 313 | buffer->PidHigh = cpu_to_le16((__u16)(current->tgid >> 16)); |
| 316 | spin_lock(&GlobalMid_Lock); | ||
| 317 | spin_unlock(&GlobalMid_Lock); | ||
| 318 | if (treeCon) { | 314 | if (treeCon) { |
| 319 | buffer->Tid = treeCon->tid; | 315 | buffer->Tid = treeCon->tid; |
| 320 | if (treeCon->ses) { | 316 | if (treeCon->ses) { |
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 5f40ed3473f5..765adf12d54f 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c | |||
| @@ -640,6 +640,70 @@ static int is_dir_changed(struct file *file) | |||
| 640 | 640 | ||
| 641 | } | 641 | } |
| 642 | 642 | ||
| 643 | static int cifs_save_resume_key(const char *current_entry, | ||
| 644 | struct cifsFileInfo *cifsFile) | ||
| 645 | { | ||
| 646 | int rc = 0; | ||
| 647 | unsigned int len = 0; | ||
| 648 | __u16 level; | ||
| 649 | char *filename; | ||
| 650 | |||
| 651 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
| 652 | return -EINVAL; | ||
| 653 | |||
| 654 | level = cifsFile->srch_inf.info_level; | ||
| 655 | |||
| 656 | if (level == SMB_FIND_FILE_UNIX) { | ||
| 657 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
| 658 | |||
| 659 | filename = &pFindData->FileName[0]; | ||
| 660 | if (cifsFile->srch_inf.unicode) { | ||
| 661 | len = cifs_unicode_bytelen(filename); | ||
| 662 | } else { | ||
| 663 | /* BB should we make this strnlen of PATH_MAX? */ | ||
| 664 | len = strnlen(filename, PATH_MAX); | ||
| 665 | } | ||
| 666 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
| 667 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
| 668 | FILE_DIRECTORY_INFO *pFindData = | ||
| 669 | (FILE_DIRECTORY_INFO *)current_entry; | ||
| 670 | filename = &pFindData->FileName[0]; | ||
| 671 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 672 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 673 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
| 674 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
| 675 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
| 676 | filename = &pFindData->FileName[0]; | ||
| 677 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 678 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 679 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
| 680 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
| 681 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
| 682 | filename = &pFindData->FileName[0]; | ||
| 683 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 684 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 685 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
| 686 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
| 687 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
| 688 | filename = &pFindData->FileName[0]; | ||
| 689 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 690 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 691 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
| 692 | FIND_FILE_STANDARD_INFO *pFindData = | ||
| 693 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
| 694 | filename = &pFindData->FileName[0]; | ||
| 695 | /* one byte length, no name conversion */ | ||
| 696 | len = (unsigned int)pFindData->FileNameLength; | ||
| 697 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
| 698 | } else { | ||
| 699 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
| 700 | return -EINVAL; | ||
| 701 | } | ||
| 702 | cifsFile->srch_inf.resume_name_len = len; | ||
| 703 | cifsFile->srch_inf.presume_name = filename; | ||
| 704 | return rc; | ||
| 705 | } | ||
| 706 | |||
| 643 | /* find the corresponding entry in the search */ | 707 | /* find the corresponding entry in the search */ |
| 644 | /* Note that the SMB server returns search entries for . and .. which | 708 | /* Note that the SMB server returns search entries for . and .. which |
| 645 | complicates logic here if we choose to parse for them and we do not | 709 | complicates logic here if we choose to parse for them and we do not |
| @@ -703,6 +767,7 @@ static int find_cifs_entry(const int xid, struct cifsTconInfo *pTcon, | |||
| 703 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && | 767 | while ((index_to_find >= cifsFile->srch_inf.index_of_last_entry) && |
| 704 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { | 768 | (rc == 0) && !cifsFile->srch_inf.endOfSearch) { |
| 705 | cFYI(1, ("calling findnext2")); | 769 | cFYI(1, ("calling findnext2")); |
| 770 | cifs_save_resume_key(cifsFile->srch_inf.last_entry, cifsFile); | ||
| 706 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, | 771 | rc = CIFSFindNext(xid, pTcon, cifsFile->netfid, |
| 707 | &cifsFile->srch_inf); | 772 | &cifsFile->srch_inf); |
| 708 | if (rc) | 773 | if (rc) |
| @@ -919,69 +984,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, | |||
| 919 | return rc; | 984 | return rc; |
| 920 | } | 985 | } |
| 921 | 986 | ||
| 922 | static int cifs_save_resume_key(const char *current_entry, | ||
| 923 | struct cifsFileInfo *cifsFile) | ||
| 924 | { | ||
| 925 | int rc = 0; | ||
| 926 | unsigned int len = 0; | ||
| 927 | __u16 level; | ||
| 928 | char *filename; | ||
| 929 | |||
| 930 | if ((cifsFile == NULL) || (current_entry == NULL)) | ||
| 931 | return -EINVAL; | ||
| 932 | |||
| 933 | level = cifsFile->srch_inf.info_level; | ||
| 934 | |||
| 935 | if (level == SMB_FIND_FILE_UNIX) { | ||
| 936 | FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; | ||
| 937 | |||
| 938 | filename = &pFindData->FileName[0]; | ||
| 939 | if (cifsFile->srch_inf.unicode) { | ||
| 940 | len = cifs_unicode_bytelen(filename); | ||
| 941 | } else { | ||
| 942 | /* BB should we make this strnlen of PATH_MAX? */ | ||
| 943 | len = strnlen(filename, PATH_MAX); | ||
| 944 | } | ||
| 945 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
| 946 | } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { | ||
| 947 | FILE_DIRECTORY_INFO *pFindData = | ||
| 948 | (FILE_DIRECTORY_INFO *)current_entry; | ||
| 949 | filename = &pFindData->FileName[0]; | ||
| 950 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 951 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 952 | } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { | ||
| 953 | FILE_FULL_DIRECTORY_INFO *pFindData = | ||
| 954 | (FILE_FULL_DIRECTORY_INFO *)current_entry; | ||
| 955 | filename = &pFindData->FileName[0]; | ||
| 956 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 957 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 958 | } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { | ||
| 959 | SEARCH_ID_FULL_DIR_INFO *pFindData = | ||
| 960 | (SEARCH_ID_FULL_DIR_INFO *)current_entry; | ||
| 961 | filename = &pFindData->FileName[0]; | ||
| 962 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 963 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 964 | } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { | ||
| 965 | FILE_BOTH_DIRECTORY_INFO *pFindData = | ||
| 966 | (FILE_BOTH_DIRECTORY_INFO *)current_entry; | ||
| 967 | filename = &pFindData->FileName[0]; | ||
| 968 | len = le32_to_cpu(pFindData->FileNameLength); | ||
| 969 | cifsFile->srch_inf.resume_key = pFindData->FileIndex; | ||
| 970 | } else if (level == SMB_FIND_FILE_INFO_STANDARD) { | ||
| 971 | FIND_FILE_STANDARD_INFO *pFindData = | ||
| 972 | (FIND_FILE_STANDARD_INFO *)current_entry; | ||
| 973 | filename = &pFindData->FileName[0]; | ||
| 974 | /* one byte length, no name conversion */ | ||
| 975 | len = (unsigned int)pFindData->FileNameLength; | ||
| 976 | cifsFile->srch_inf.resume_key = pFindData->ResumeKey; | ||
| 977 | } else { | ||
| 978 | cFYI(1, ("Unknown findfirst level %d", level)); | ||
| 979 | return -EINVAL; | ||
| 980 | } | ||
| 981 | cifsFile->srch_inf.resume_name_len = len; | ||
| 982 | cifsFile->srch_inf.presume_name = filename; | ||
| 983 | return rc; | ||
| 984 | } | ||
| 985 | 987 | ||
| 986 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) | 988 | int cifs_readdir(struct file *file, void *direntry, filldir_t filldir) |
| 987 | { | 989 | { |
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 252fdc0567f1..2851d5da0c8c 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c | |||
| @@ -624,8 +624,10 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses, int first_time, | |||
| 624 | ses, nls_cp); | 624 | ses, nls_cp); |
| 625 | 625 | ||
| 626 | ssetup_exit: | 626 | ssetup_exit: |
| 627 | if (spnego_key) | 627 | if (spnego_key) { |
| 628 | key_revoke(spnego_key); | ||
| 628 | key_put(spnego_key); | 629 | key_put(spnego_key); |
| 630 | } | ||
| 629 | kfree(str_area); | 631 | kfree(str_area); |
| 630 | if (resp_buf_type == CIFS_SMALL_BUFFER) { | 632 | if (resp_buf_type == CIFS_SMALL_BUFFER) { |
| 631 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); | 633 | cFYI(1, ("ssetup freeing small buf %p", iov[0].iov_base)); |
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c index e286db9f5ee2..bf0e6d8e382a 100644 --- a/fs/cifs/transport.c +++ b/fs/cifs/transport.c | |||
| @@ -50,8 +50,7 @@ AllocMidQEntry(const struct smb_hdr *smb_buffer, struct cifsSesInfo *ses) | |||
| 50 | return NULL; | 50 | return NULL; |
| 51 | } | 51 | } |
| 52 | 52 | ||
| 53 | temp = (struct mid_q_entry *) mempool_alloc(cifs_mid_poolp, | 53 | temp = mempool_alloc(cifs_mid_poolp, GFP_NOFS); |
| 54 | GFP_KERNEL | GFP_NOFS); | ||
| 55 | if (temp == NULL) | 54 | if (temp == NULL) |
| 56 | return temp; | 55 | return temp; |
| 57 | else { | 56 | else { |
diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 0d9b80ec689c..cfd29da714d1 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c | |||
| @@ -362,9 +362,8 @@ static int init_coda_psdev(void) | |||
| 362 | goto out_chrdev; | 362 | goto out_chrdev; |
| 363 | } | 363 | } |
| 364 | for (i = 0; i < MAX_CODADEVS; i++) | 364 | for (i = 0; i < MAX_CODADEVS; i++) |
| 365 | device_create_drvdata(coda_psdev_class, NULL, | 365 | device_create(coda_psdev_class, NULL, |
| 366 | MKDEV(CODA_PSDEV_MAJOR, i), | 366 | MKDEV(CODA_PSDEV_MAJOR, i), NULL, "cfs%d", i); |
| 367 | NULL, "cfs%d", i); | ||
| 368 | coda_sysctl_init(); | 367 | coda_sysctl_init(); |
| 369 | goto out; | 368 | goto out; |
| 370 | 369 | ||
diff --git a/fs/compat.c b/fs/compat.c index 075d0509970d..5f9ec449c799 100644 --- a/fs/compat.c +++ b/fs/compat.c | |||
| @@ -137,6 +137,45 @@ asmlinkage long compat_sys_utimes(char __user *filename, struct compat_timeval _ | |||
| 137 | return compat_sys_futimesat(AT_FDCWD, filename, t); | 137 | return compat_sys_futimesat(AT_FDCWD, filename, t); |
| 138 | } | 138 | } |
| 139 | 139 | ||
| 140 | static int cp_compat_stat(struct kstat *stat, struct compat_stat __user *ubuf) | ||
| 141 | { | ||
| 142 | compat_ino_t ino = stat->ino; | ||
| 143 | typeof(ubuf->st_uid) uid = 0; | ||
| 144 | typeof(ubuf->st_gid) gid = 0; | ||
| 145 | int err; | ||
| 146 | |||
| 147 | SET_UID(uid, stat->uid); | ||
| 148 | SET_GID(gid, stat->gid); | ||
| 149 | |||
| 150 | if ((u64) stat->size > MAX_NON_LFS || | ||
| 151 | !old_valid_dev(stat->dev) || | ||
| 152 | !old_valid_dev(stat->rdev)) | ||
| 153 | return -EOVERFLOW; | ||
| 154 | if (sizeof(ino) < sizeof(stat->ino) && ino != stat->ino) | ||
| 155 | return -EOVERFLOW; | ||
| 156 | |||
| 157 | if (clear_user(ubuf, sizeof(*ubuf))) | ||
| 158 | return -EFAULT; | ||
| 159 | |||
| 160 | err = __put_user(old_encode_dev(stat->dev), &ubuf->st_dev); | ||
| 161 | err |= __put_user(ino, &ubuf->st_ino); | ||
| 162 | err |= __put_user(stat->mode, &ubuf->st_mode); | ||
| 163 | err |= __put_user(stat->nlink, &ubuf->st_nlink); | ||
| 164 | err |= __put_user(uid, &ubuf->st_uid); | ||
| 165 | err |= __put_user(gid, &ubuf->st_gid); | ||
| 166 | err |= __put_user(old_encode_dev(stat->rdev), &ubuf->st_rdev); | ||
| 167 | err |= __put_user(stat->size, &ubuf->st_size); | ||
| 168 | err |= __put_user(stat->atime.tv_sec, &ubuf->st_atime); | ||
| 169 | err |= __put_user(stat->atime.tv_nsec, &ubuf->st_atime_nsec); | ||
| 170 | err |= __put_user(stat->mtime.tv_sec, &ubuf->st_mtime); | ||
| 171 | err |= __put_user(stat->mtime.tv_nsec, &ubuf->st_mtime_nsec); | ||
| 172 | err |= __put_user(stat->ctime.tv_sec, &ubuf->st_ctime); | ||
| 173 | err |= __put_user(stat->ctime.tv_nsec, &ubuf->st_ctime_nsec); | ||
| 174 | err |= __put_user(stat->blksize, &ubuf->st_blksize); | ||
| 175 | err |= __put_user(stat->blocks, &ubuf->st_blocks); | ||
| 176 | return err; | ||
| 177 | } | ||
| 178 | |||
| 140 | asmlinkage long compat_sys_newstat(char __user * filename, | 179 | asmlinkage long compat_sys_newstat(char __user * filename, |
| 141 | struct compat_stat __user *statbuf) | 180 | struct compat_stat __user *statbuf) |
| 142 | { | 181 | { |
| @@ -1239,7 +1278,7 @@ static int compat_count(compat_uptr_t __user *argv, int max) | |||
| 1239 | if (!p) | 1278 | if (!p) |
| 1240 | break; | 1279 | break; |
| 1241 | argv++; | 1280 | argv++; |
| 1242 | if(++i > max) | 1281 | if (i++ >= max) |
| 1243 | return -E2BIG; | 1282 | return -E2BIG; |
| 1244 | } | 1283 | } |
| 1245 | } | 1284 | } |
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 08e28c9bb416..3dbe2169cf36 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c | |||
| @@ -26,8 +26,7 @@ | |||
| 26 | #include <linux/debugfs.h> | 26 | #include <linux/debugfs.h> |
| 27 | #include <linux/fsnotify.h> | 27 | #include <linux/fsnotify.h> |
| 28 | #include <linux/string.h> | 28 | #include <linux/string.h> |
| 29 | 29 | #include <linux/magic.h> | |
| 30 | #define DEBUGFS_MAGIC 0x64626720 | ||
| 31 | 30 | ||
| 32 | static struct vfsmount *debugfs_mount; | 31 | static struct vfsmount *debugfs_mount; |
| 33 | static int debugfs_mount_count; | 32 | static int debugfs_mount_count; |
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 488eb424f662..4a714f6c1bed 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 | 27 | #define DEVPTS_SUPER_MAGIC 0x1cd1 |
| 28 | 28 | ||
| 29 | #define DEVPTS_DEFAULT_MODE 0600 | 29 | #define DEVPTS_DEFAULT_MODE 0600 |
| 30 | #define PTMX_MINOR 2 | ||
| 30 | 31 | ||
| 31 | extern int pty_limit; /* Config limit on Unix98 ptys */ | 32 | extern int pty_limit; /* Config limit on Unix98 ptys */ |
| 32 | static DEFINE_IDA(allocated_ptys); | 33 | static DEFINE_IDA(allocated_ptys); |
| @@ -48,7 +49,7 @@ enum { | |||
| 48 | Opt_err | 49 | Opt_err |
| 49 | }; | 50 | }; |
| 50 | 51 | ||
| 51 | static match_table_t tokens = { | 52 | static const match_table_t tokens = { |
| 52 | {Opt_uid, "uid=%u"}, | 53 | {Opt_uid, "uid=%u"}, |
| 53 | {Opt_gid, "gid=%u"}, | 54 | {Opt_gid, "gid=%u"}, |
| 54 | {Opt_mode, "mode=%o"}, | 55 | {Opt_mode, "mode=%o"}, |
| @@ -169,15 +170,7 @@ static struct file_system_type devpts_fs_type = { | |||
| 169 | * to the System V naming convention | 170 | * to the System V naming convention |
| 170 | */ | 171 | */ |
| 171 | 172 | ||
| 172 | static struct dentry *get_node(int num) | 173 | int devpts_new_index(struct inode *ptmx_inode) |
| 173 | { | ||
| 174 | char s[12]; | ||
| 175 | struct dentry *root = devpts_root; | ||
| 176 | mutex_lock(&root->d_inode->i_mutex); | ||
| 177 | return lookup_one_len(s, root, sprintf(s, "%d", num)); | ||
| 178 | } | ||
| 179 | |||
| 180 | int devpts_new_index(void) | ||
| 181 | { | 174 | { |
| 182 | int index; | 175 | int index; |
| 183 | int ida_ret; | 176 | int ida_ret; |
| @@ -205,20 +198,21 @@ retry: | |||
| 205 | return index; | 198 | return index; |
| 206 | } | 199 | } |
| 207 | 200 | ||
| 208 | void devpts_kill_index(int idx) | 201 | void devpts_kill_index(struct inode *ptmx_inode, int idx) |
| 209 | { | 202 | { |
| 210 | mutex_lock(&allocated_ptys_lock); | 203 | mutex_lock(&allocated_ptys_lock); |
| 211 | ida_remove(&allocated_ptys, idx); | 204 | ida_remove(&allocated_ptys, idx); |
| 212 | mutex_unlock(&allocated_ptys_lock); | 205 | mutex_unlock(&allocated_ptys_lock); |
| 213 | } | 206 | } |
| 214 | 207 | ||
| 215 | int devpts_pty_new(struct tty_struct *tty) | 208 | int devpts_pty_new(struct inode *ptmx_inode, struct tty_struct *tty) |
| 216 | { | 209 | { |
| 217 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ | 210 | int number = tty->index; /* tty layer puts index from devpts_new_index() in here */ |
| 218 | struct tty_driver *driver = tty->driver; | 211 | struct tty_driver *driver = tty->driver; |
| 219 | dev_t device = MKDEV(driver->major, driver->minor_start+number); | 212 | dev_t device = MKDEV(driver->major, driver->minor_start+number); |
| 220 | struct dentry *dentry; | 213 | struct dentry *dentry; |
| 221 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); | 214 | struct inode *inode = new_inode(devpts_mnt->mnt_sb); |
| 215 | char s[12]; | ||
| 222 | 216 | ||
| 223 | /* We're supposed to be given the slave end of a pty */ | 217 | /* We're supposed to be given the slave end of a pty */ |
| 224 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); | 218 | BUG_ON(driver->type != TTY_DRIVER_TYPE_PTY); |
| @@ -233,10 +227,15 @@ int devpts_pty_new(struct tty_struct *tty) | |||
| 233 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; | 227 | inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; |
| 234 | init_special_inode(inode, S_IFCHR|config.mode, device); | 228 | init_special_inode(inode, S_IFCHR|config.mode, device); |
| 235 | inode->i_private = tty; | 229 | inode->i_private = tty; |
| 230 | tty->driver_data = inode; | ||
| 236 | 231 | ||
| 237 | dentry = get_node(number); | 232 | sprintf(s, "%d", number); |
| 238 | if (!IS_ERR(dentry) && !dentry->d_inode) { | 233 | |
| 239 | d_instantiate(dentry, inode); | 234 | mutex_lock(&devpts_root->d_inode->i_mutex); |
| 235 | |||
| 236 | dentry = d_alloc_name(devpts_root, s); | ||
| 237 | if (!IS_ERR(dentry)) { | ||
| 238 | d_add(dentry, inode); | ||
| 240 | fsnotify_create(devpts_root->d_inode, dentry); | 239 | fsnotify_create(devpts_root->d_inode, dentry); |
| 241 | } | 240 | } |
| 242 | 241 | ||
| @@ -245,36 +244,31 @@ int devpts_pty_new(struct tty_struct *tty) | |||
| 245 | return 0; | 244 | return 0; |
| 246 | } | 245 | } |
| 247 | 246 | ||
| 248 | struct tty_struct *devpts_get_tty(int number) | 247 | struct tty_struct *devpts_get_tty(struct inode *pts_inode, int number) |
| 249 | { | 248 | { |
| 250 | struct dentry *dentry = get_node(number); | 249 | BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
| 251 | struct tty_struct *tty; | ||
| 252 | |||
| 253 | tty = NULL; | ||
| 254 | if (!IS_ERR(dentry)) { | ||
| 255 | if (dentry->d_inode) | ||
| 256 | tty = dentry->d_inode->i_private; | ||
| 257 | dput(dentry); | ||
| 258 | } | ||
| 259 | 250 | ||
| 260 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 251 | if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC) |
| 261 | 252 | return (struct tty_struct *)pts_inode->i_private; | |
| 262 | return tty; | 253 | return NULL; |
| 263 | } | 254 | } |
| 264 | 255 | ||
| 265 | void devpts_pty_kill(int number) | 256 | void devpts_pty_kill(struct tty_struct *tty) |
| 266 | { | 257 | { |
| 267 | struct dentry *dentry = get_node(number); | 258 | struct inode *inode = tty->driver_data; |
| 259 | struct dentry *dentry; | ||
| 268 | 260 | ||
| 269 | if (!IS_ERR(dentry)) { | 261 | BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR)); |
| 270 | struct inode *inode = dentry->d_inode; | 262 | |
| 271 | if (inode) { | 263 | mutex_lock(&devpts_root->d_inode->i_mutex); |
| 272 | inode->i_nlink--; | 264 | |
| 273 | d_delete(dentry); | 265 | dentry = d_find_alias(inode); |
| 274 | dput(dentry); | 266 | if (dentry && !IS_ERR(dentry)) { |
| 275 | } | 267 | inode->i_nlink--; |
| 268 | d_delete(dentry); | ||
| 276 | dput(dentry); | 269 | dput(dentry); |
| 277 | } | 270 | } |
| 271 | |||
| 278 | mutex_unlock(&devpts_root->d_inode->i_mutex); | 272 | mutex_unlock(&devpts_root->d_inode->i_mutex); |
| 279 | } | 273 | } |
| 280 | 274 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index 9606ee848fd8..af0558dbe8b7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -5,11 +5,11 @@ | |||
| 5 | * | 5 | * |
| 6 | * O_DIRECT | 6 | * O_DIRECT |
| 7 | * | 7 | * |
| 8 | * 04Jul2002 akpm@zip.com.au | 8 | * 04Jul2002 Andrew Morton |
| 9 | * Initial version | 9 | * Initial version |
| 10 | * 11Sep2002 janetinc@us.ibm.com | 10 | * 11Sep2002 janetinc@us.ibm.com |
| 11 | * added readv/writev support. | 11 | * added readv/writev support. |
| 12 | * 29Oct2002 akpm@zip.com.au | 12 | * 29Oct2002 Andrew Morton |
| 13 | * rewrote bio_add_page() support. | 13 | * rewrote bio_add_page() support. |
| 14 | * 30Oct2002 pbadari@us.ibm.com | 14 | * 30Oct2002 pbadari@us.ibm.com |
| 15 | * added support for non-aligned IO. | 15 | * added support for non-aligned IO. |
diff --git a/fs/dlm/config.c b/fs/dlm/config.c index 89d2fb7b991a..fd9859f92fad 100644 --- a/fs/dlm/config.c +++ b/fs/dlm/config.c | |||
| @@ -14,6 +14,9 @@ | |||
| 14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
| 15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
| 16 | #include <linux/configfs.h> | 16 | #include <linux/configfs.h> |
| 17 | #include <linux/in.h> | ||
| 18 | #include <linux/in6.h> | ||
| 19 | #include <net/ipv6.h> | ||
| 17 | #include <net/sock.h> | 20 | #include <net/sock.h> |
| 18 | 21 | ||
| 19 | #include "config.h" | 22 | #include "config.h" |
| @@ -377,24 +380,24 @@ static struct config_item_type node_type = { | |||
| 377 | .ct_owner = THIS_MODULE, | 380 | .ct_owner = THIS_MODULE, |
| 378 | }; | 381 | }; |
| 379 | 382 | ||
| 380 | static struct dlm_cluster *to_cluster(struct config_item *i) | 383 | static struct dlm_cluster *config_item_to_cluster(struct config_item *i) |
| 381 | { | 384 | { |
| 382 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : | 385 | return i ? container_of(to_config_group(i), struct dlm_cluster, group) : |
| 383 | NULL; | 386 | NULL; |
| 384 | } | 387 | } |
| 385 | 388 | ||
| 386 | static struct dlm_space *to_space(struct config_item *i) | 389 | static struct dlm_space *config_item_to_space(struct config_item *i) |
| 387 | { | 390 | { |
| 388 | return i ? container_of(to_config_group(i), struct dlm_space, group) : | 391 | return i ? container_of(to_config_group(i), struct dlm_space, group) : |
| 389 | NULL; | 392 | NULL; |
| 390 | } | 393 | } |
| 391 | 394 | ||
| 392 | static struct dlm_comm *to_comm(struct config_item *i) | 395 | static struct dlm_comm *config_item_to_comm(struct config_item *i) |
| 393 | { | 396 | { |
| 394 | return i ? container_of(i, struct dlm_comm, item) : NULL; | 397 | return i ? container_of(i, struct dlm_comm, item) : NULL; |
| 395 | } | 398 | } |
| 396 | 399 | ||
| 397 | static struct dlm_node *to_node(struct config_item *i) | 400 | static struct dlm_node *config_item_to_node(struct config_item *i) |
| 398 | { | 401 | { |
| 399 | return i ? container_of(i, struct dlm_node, item) : NULL; | 402 | return i ? container_of(i, struct dlm_node, item) : NULL; |
| 400 | } | 403 | } |
| @@ -450,7 +453,7 @@ static struct config_group *make_cluster(struct config_group *g, | |||
| 450 | 453 | ||
| 451 | static void drop_cluster(struct config_group *g, struct config_item *i) | 454 | static void drop_cluster(struct config_group *g, struct config_item *i) |
| 452 | { | 455 | { |
| 453 | struct dlm_cluster *cl = to_cluster(i); | 456 | struct dlm_cluster *cl = config_item_to_cluster(i); |
| 454 | struct config_item *tmp; | 457 | struct config_item *tmp; |
| 455 | int j; | 458 | int j; |
| 456 | 459 | ||
| @@ -468,7 +471,7 @@ static void drop_cluster(struct config_group *g, struct config_item *i) | |||
| 468 | 471 | ||
| 469 | static void release_cluster(struct config_item *i) | 472 | static void release_cluster(struct config_item *i) |
| 470 | { | 473 | { |
| 471 | struct dlm_cluster *cl = to_cluster(i); | 474 | struct dlm_cluster *cl = config_item_to_cluster(i); |
| 472 | kfree(cl->group.default_groups); | 475 | kfree(cl->group.default_groups); |
| 473 | kfree(cl); | 476 | kfree(cl); |
| 474 | } | 477 | } |
| @@ -507,7 +510,7 @@ static struct config_group *make_space(struct config_group *g, const char *name) | |||
| 507 | 510 | ||
| 508 | static void drop_space(struct config_group *g, struct config_item *i) | 511 | static void drop_space(struct config_group *g, struct config_item *i) |
| 509 | { | 512 | { |
| 510 | struct dlm_space *sp = to_space(i); | 513 | struct dlm_space *sp = config_item_to_space(i); |
| 511 | struct config_item *tmp; | 514 | struct config_item *tmp; |
| 512 | int j; | 515 | int j; |
| 513 | 516 | ||
| @@ -524,7 +527,7 @@ static void drop_space(struct config_group *g, struct config_item *i) | |||
| 524 | 527 | ||
| 525 | static void release_space(struct config_item *i) | 528 | static void release_space(struct config_item *i) |
| 526 | { | 529 | { |
| 527 | struct dlm_space *sp = to_space(i); | 530 | struct dlm_space *sp = config_item_to_space(i); |
| 528 | kfree(sp->group.default_groups); | 531 | kfree(sp->group.default_groups); |
| 529 | kfree(sp); | 532 | kfree(sp); |
| 530 | } | 533 | } |
| @@ -546,7 +549,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name) | |||
| 546 | 549 | ||
| 547 | static void drop_comm(struct config_group *g, struct config_item *i) | 550 | static void drop_comm(struct config_group *g, struct config_item *i) |
| 548 | { | 551 | { |
| 549 | struct dlm_comm *cm = to_comm(i); | 552 | struct dlm_comm *cm = config_item_to_comm(i); |
| 550 | if (local_comm == cm) | 553 | if (local_comm == cm) |
| 551 | local_comm = NULL; | 554 | local_comm = NULL; |
| 552 | dlm_lowcomms_close(cm->nodeid); | 555 | dlm_lowcomms_close(cm->nodeid); |
| @@ -557,13 +560,13 @@ static void drop_comm(struct config_group *g, struct config_item *i) | |||
| 557 | 560 | ||
| 558 | static void release_comm(struct config_item *i) | 561 | static void release_comm(struct config_item *i) |
| 559 | { | 562 | { |
| 560 | struct dlm_comm *cm = to_comm(i); | 563 | struct dlm_comm *cm = config_item_to_comm(i); |
| 561 | kfree(cm); | 564 | kfree(cm); |
| 562 | } | 565 | } |
| 563 | 566 | ||
| 564 | static struct config_item *make_node(struct config_group *g, const char *name) | 567 | static struct config_item *make_node(struct config_group *g, const char *name) |
| 565 | { | 568 | { |
| 566 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 569 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
| 567 | struct dlm_node *nd; | 570 | struct dlm_node *nd; |
| 568 | 571 | ||
| 569 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); | 572 | nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL); |
| @@ -585,8 +588,8 @@ static struct config_item *make_node(struct config_group *g, const char *name) | |||
| 585 | 588 | ||
| 586 | static void drop_node(struct config_group *g, struct config_item *i) | 589 | static void drop_node(struct config_group *g, struct config_item *i) |
| 587 | { | 590 | { |
| 588 | struct dlm_space *sp = to_space(g->cg_item.ci_parent); | 591 | struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent); |
| 589 | struct dlm_node *nd = to_node(i); | 592 | struct dlm_node *nd = config_item_to_node(i); |
| 590 | 593 | ||
| 591 | mutex_lock(&sp->members_lock); | 594 | mutex_lock(&sp->members_lock); |
| 592 | list_del(&nd->list); | 595 | list_del(&nd->list); |
| @@ -598,7 +601,7 @@ static void drop_node(struct config_group *g, struct config_item *i) | |||
| 598 | 601 | ||
| 599 | static void release_node(struct config_item *i) | 602 | static void release_node(struct config_item *i) |
| 600 | { | 603 | { |
| 601 | struct dlm_node *nd = to_node(i); | 604 | struct dlm_node *nd = config_item_to_node(i); |
| 602 | kfree(nd); | 605 | kfree(nd); |
| 603 | } | 606 | } |
| 604 | 607 | ||
| @@ -632,7 +635,7 @@ void dlm_config_exit(void) | |||
| 632 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, | 635 | static ssize_t show_cluster(struct config_item *i, struct configfs_attribute *a, |
| 633 | char *buf) | 636 | char *buf) |
| 634 | { | 637 | { |
| 635 | struct dlm_cluster *cl = to_cluster(i); | 638 | struct dlm_cluster *cl = config_item_to_cluster(i); |
| 636 | struct cluster_attribute *cla = | 639 | struct cluster_attribute *cla = |
| 637 | container_of(a, struct cluster_attribute, attr); | 640 | container_of(a, struct cluster_attribute, attr); |
| 638 | return cla->show ? cla->show(cl, buf) : 0; | 641 | return cla->show ? cla->show(cl, buf) : 0; |
| @@ -642,7 +645,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
| 642 | struct configfs_attribute *a, | 645 | struct configfs_attribute *a, |
| 643 | const char *buf, size_t len) | 646 | const char *buf, size_t len) |
| 644 | { | 647 | { |
| 645 | struct dlm_cluster *cl = to_cluster(i); | 648 | struct dlm_cluster *cl = config_item_to_cluster(i); |
| 646 | struct cluster_attribute *cla = | 649 | struct cluster_attribute *cla = |
| 647 | container_of(a, struct cluster_attribute, attr); | 650 | container_of(a, struct cluster_attribute, attr); |
| 648 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; | 651 | return cla->store ? cla->store(cl, buf, len) : -EINVAL; |
| @@ -651,7 +654,7 @@ static ssize_t store_cluster(struct config_item *i, | |||
| 651 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | 654 | static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, |
| 652 | char *buf) | 655 | char *buf) |
| 653 | { | 656 | { |
| 654 | struct dlm_comm *cm = to_comm(i); | 657 | struct dlm_comm *cm = config_item_to_comm(i); |
| 655 | struct comm_attribute *cma = | 658 | struct comm_attribute *cma = |
| 656 | container_of(a, struct comm_attribute, attr); | 659 | container_of(a, struct comm_attribute, attr); |
| 657 | return cma->show ? cma->show(cm, buf) : 0; | 660 | return cma->show ? cma->show(cm, buf) : 0; |
| @@ -660,7 +663,7 @@ static ssize_t show_comm(struct config_item *i, struct configfs_attribute *a, | |||
| 660 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, | 663 | static ssize_t store_comm(struct config_item *i, struct configfs_attribute *a, |
| 661 | const char *buf, size_t len) | 664 | const char *buf, size_t len) |
| 662 | { | 665 | { |
| 663 | struct dlm_comm *cm = to_comm(i); | 666 | struct dlm_comm *cm = config_item_to_comm(i); |
| 664 | struct comm_attribute *cma = | 667 | struct comm_attribute *cma = |
| 665 | container_of(a, struct comm_attribute, attr); | 668 | container_of(a, struct comm_attribute, attr); |
| 666 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; | 669 | return cma->store ? cma->store(cm, buf, len) : -EINVAL; |
| @@ -714,7 +717,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) | |||
| 714 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | 717 | static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, |
| 715 | char *buf) | 718 | char *buf) |
| 716 | { | 719 | { |
| 717 | struct dlm_node *nd = to_node(i); | 720 | struct dlm_node *nd = config_item_to_node(i); |
| 718 | struct node_attribute *nda = | 721 | struct node_attribute *nda = |
| 719 | container_of(a, struct node_attribute, attr); | 722 | container_of(a, struct node_attribute, attr); |
| 720 | return nda->show ? nda->show(nd, buf) : 0; | 723 | return nda->show ? nda->show(nd, buf) : 0; |
| @@ -723,7 +726,7 @@ static ssize_t show_node(struct config_item *i, struct configfs_attribute *a, | |||
| 723 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, | 726 | static ssize_t store_node(struct config_item *i, struct configfs_attribute *a, |
| 724 | const char *buf, size_t len) | 727 | const char *buf, size_t len) |
| 725 | { | 728 | { |
| 726 | struct dlm_node *nd = to_node(i); | 729 | struct dlm_node *nd = config_item_to_node(i); |
| 727 | struct node_attribute *nda = | 730 | struct node_attribute *nda = |
| 728 | container_of(a, struct node_attribute, attr); | 731 | container_of(a, struct node_attribute, attr); |
| 729 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; | 732 | return nda->store ? nda->store(nd, buf, len) : -EINVAL; |
| @@ -768,7 +771,7 @@ static struct dlm_space *get_space(char *name) | |||
| 768 | i = config_group_find_item(space_list, name); | 771 | i = config_group_find_item(space_list, name); |
| 769 | mutex_unlock(&space_list->cg_subsys->su_mutex); | 772 | mutex_unlock(&space_list->cg_subsys->su_mutex); |
| 770 | 773 | ||
| 771 | return to_space(i); | 774 | return config_item_to_space(i); |
| 772 | } | 775 | } |
| 773 | 776 | ||
| 774 | static void put_space(struct dlm_space *sp) | 777 | static void put_space(struct dlm_space *sp) |
| @@ -776,6 +779,33 @@ static void put_space(struct dlm_space *sp) | |||
| 776 | config_item_put(&sp->group.cg_item); | 779 | config_item_put(&sp->group.cg_item); |
| 777 | } | 780 | } |
| 778 | 781 | ||
| 782 | static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) | ||
| 783 | { | ||
| 784 | switch (x->ss_family) { | ||
| 785 | case AF_INET: { | ||
| 786 | struct sockaddr_in *sinx = (struct sockaddr_in *)x; | ||
| 787 | struct sockaddr_in *siny = (struct sockaddr_in *)y; | ||
| 788 | if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) | ||
| 789 | return 0; | ||
| 790 | if (sinx->sin_port != siny->sin_port) | ||
| 791 | return 0; | ||
| 792 | break; | ||
| 793 | } | ||
| 794 | case AF_INET6: { | ||
| 795 | struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; | ||
| 796 | struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; | ||
| 797 | if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) | ||
| 798 | return 0; | ||
| 799 | if (sinx->sin6_port != siny->sin6_port) | ||
| 800 | return 0; | ||
| 801 | break; | ||
| 802 | } | ||
| 803 | default: | ||
| 804 | return 0; | ||
| 805 | } | ||
| 806 | return 1; | ||
| 807 | } | ||
| 808 | |||
| 779 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | 809 | static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) |
| 780 | { | 810 | { |
| 781 | struct config_item *i; | 811 | struct config_item *i; |
| @@ -788,7 +818,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
| 788 | mutex_lock(&clusters_root.subsys.su_mutex); | 818 | mutex_lock(&clusters_root.subsys.su_mutex); |
| 789 | 819 | ||
| 790 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { | 820 | list_for_each_entry(i, &comm_list->cg_children, ci_entry) { |
| 791 | cm = to_comm(i); | 821 | cm = config_item_to_comm(i); |
| 792 | 822 | ||
| 793 | if (nodeid) { | 823 | if (nodeid) { |
| 794 | if (cm->nodeid != nodeid) | 824 | if (cm->nodeid != nodeid) |
| @@ -797,8 +827,7 @@ static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) | |||
| 797 | config_item_get(i); | 827 | config_item_get(i); |
| 798 | break; | 828 | break; |
| 799 | } else { | 829 | } else { |
| 800 | if (!cm->addr_count || | 830 | if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) |
| 801 | memcmp(cm->addr[0], addr, sizeof(*addr))) | ||
| 802 | continue; | 831 | continue; |
| 803 | found = 1; | 832 | found = 1; |
| 804 | config_item_get(i); | 833 | config_item_get(i); |
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 5a7ac33b629c..868e4c9ef127 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* |
| 3 | ** | 3 | ** |
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 6 | ** | 6 | ** |
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
| @@ -441,8 +441,11 @@ struct dlm_ls { | |||
| 441 | uint32_t ls_global_id; /* global unique lockspace ID */ | 441 | uint32_t ls_global_id; /* global unique lockspace ID */ |
| 442 | uint32_t ls_exflags; | 442 | uint32_t ls_exflags; |
| 443 | int ls_lvblen; | 443 | int ls_lvblen; |
| 444 | int ls_count; /* reference count */ | 444 | int ls_count; /* refcount of processes in |
| 445 | the dlm using this ls */ | ||
| 446 | int ls_create_count; /* create/release refcount */ | ||
| 445 | unsigned long ls_flags; /* LSFL_ */ | 447 | unsigned long ls_flags; /* LSFL_ */ |
| 448 | unsigned long ls_scan_time; | ||
| 446 | struct kobject ls_kobj; | 449 | struct kobject ls_kobj; |
| 447 | 450 | ||
| 448 | struct dlm_rsbtable *ls_rsbtbl; | 451 | struct dlm_rsbtable *ls_rsbtbl; |
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c index 499e16759e96..d910501de6d2 100644 --- a/fs/dlm/lockspace.c +++ b/fs/dlm/lockspace.c | |||
| @@ -2,7 +2,7 @@ | |||
| 2 | ******************************************************************************* | 2 | ******************************************************************************* |
| 3 | ** | 3 | ** |
| 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. | 4 | ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. |
| 5 | ** Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. | 5 | ** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. |
| 6 | ** | 6 | ** |
| 7 | ** This copyrighted material is made available to anyone wishing to use, | 7 | ** This copyrighted material is made available to anyone wishing to use, |
| 8 | ** modify, copy, or redistribute it subject to the terms and conditions | 8 | ** modify, copy, or redistribute it subject to the terms and conditions |
| @@ -23,6 +23,7 @@ | |||
| 23 | #include "lock.h" | 23 | #include "lock.h" |
| 24 | #include "recover.h" | 24 | #include "recover.h" |
| 25 | #include "requestqueue.h" | 25 | #include "requestqueue.h" |
| 26 | #include "user.h" | ||
| 26 | 27 | ||
| 27 | static int ls_count; | 28 | static int ls_count; |
| 28 | static struct mutex ls_lock; | 29 | static struct mutex ls_lock; |
| @@ -211,19 +212,41 @@ void dlm_lockspace_exit(void) | |||
| 211 | kset_unregister(dlm_kset); | 212 | kset_unregister(dlm_kset); |
| 212 | } | 213 | } |
| 213 | 214 | ||
| 215 | static struct dlm_ls *find_ls_to_scan(void) | ||
| 216 | { | ||
| 217 | struct dlm_ls *ls; | ||
| 218 | |||
| 219 | spin_lock(&lslist_lock); | ||
| 220 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 221 | if (time_after_eq(jiffies, ls->ls_scan_time + | ||
| 222 | dlm_config.ci_scan_secs * HZ)) { | ||
| 223 | spin_unlock(&lslist_lock); | ||
| 224 | return ls; | ||
| 225 | } | ||
| 226 | } | ||
| 227 | spin_unlock(&lslist_lock); | ||
| 228 | return NULL; | ||
| 229 | } | ||
| 230 | |||
| 214 | static int dlm_scand(void *data) | 231 | static int dlm_scand(void *data) |
| 215 | { | 232 | { |
| 216 | struct dlm_ls *ls; | 233 | struct dlm_ls *ls; |
| 234 | int timeout_jiffies = dlm_config.ci_scan_secs * HZ; | ||
| 217 | 235 | ||
| 218 | while (!kthread_should_stop()) { | 236 | while (!kthread_should_stop()) { |
| 219 | list_for_each_entry(ls, &lslist, ls_list) { | 237 | ls = find_ls_to_scan(); |
| 238 | if (ls) { | ||
| 220 | if (dlm_lock_recovery_try(ls)) { | 239 | if (dlm_lock_recovery_try(ls)) { |
| 240 | ls->ls_scan_time = jiffies; | ||
| 221 | dlm_scan_rsbs(ls); | 241 | dlm_scan_rsbs(ls); |
| 222 | dlm_scan_timeout(ls); | 242 | dlm_scan_timeout(ls); |
| 223 | dlm_unlock_recovery(ls); | 243 | dlm_unlock_recovery(ls); |
| 244 | } else { | ||
| 245 | ls->ls_scan_time += HZ; | ||
| 224 | } | 246 | } |
| 247 | } else { | ||
| 248 | schedule_timeout_interruptible(timeout_jiffies); | ||
| 225 | } | 249 | } |
| 226 | schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ); | ||
| 227 | } | 250 | } |
| 228 | return 0; | 251 | return 0; |
| 229 | } | 252 | } |
| @@ -246,23 +269,6 @@ static void dlm_scand_stop(void) | |||
| 246 | kthread_stop(scand_task); | 269 | kthread_stop(scand_task); |
| 247 | } | 270 | } |
| 248 | 271 | ||
| 249 | static struct dlm_ls *dlm_find_lockspace_name(char *name, int namelen) | ||
| 250 | { | ||
| 251 | struct dlm_ls *ls; | ||
| 252 | |||
| 253 | spin_lock(&lslist_lock); | ||
| 254 | |||
| 255 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 256 | if (ls->ls_namelen == namelen && | ||
| 257 | memcmp(ls->ls_name, name, namelen) == 0) | ||
| 258 | goto out; | ||
| 259 | } | ||
| 260 | ls = NULL; | ||
| 261 | out: | ||
| 262 | spin_unlock(&lslist_lock); | ||
| 263 | return ls; | ||
| 264 | } | ||
| 265 | |||
| 266 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) | 272 | struct dlm_ls *dlm_find_lockspace_global(uint32_t id) |
| 267 | { | 273 | { |
| 268 | struct dlm_ls *ls; | 274 | struct dlm_ls *ls; |
| @@ -327,6 +333,7 @@ static void remove_lockspace(struct dlm_ls *ls) | |||
| 327 | for (;;) { | 333 | for (;;) { |
| 328 | spin_lock(&lslist_lock); | 334 | spin_lock(&lslist_lock); |
| 329 | if (ls->ls_count == 0) { | 335 | if (ls->ls_count == 0) { |
| 336 | WARN_ON(ls->ls_create_count != 0); | ||
| 330 | list_del(&ls->ls_list); | 337 | list_del(&ls->ls_list); |
| 331 | spin_unlock(&lslist_lock); | 338 | spin_unlock(&lslist_lock); |
| 332 | return; | 339 | return; |
| @@ -381,7 +388,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 381 | uint32_t flags, int lvblen) | 388 | uint32_t flags, int lvblen) |
| 382 | { | 389 | { |
| 383 | struct dlm_ls *ls; | 390 | struct dlm_ls *ls; |
| 384 | int i, size, error = -ENOMEM; | 391 | int i, size, error; |
| 385 | int do_unreg = 0; | 392 | int do_unreg = 0; |
| 386 | 393 | ||
| 387 | if (namelen > DLM_LOCKSPACE_LEN) | 394 | if (namelen > DLM_LOCKSPACE_LEN) |
| @@ -393,12 +400,37 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 393 | if (!try_module_get(THIS_MODULE)) | 400 | if (!try_module_get(THIS_MODULE)) |
| 394 | return -EINVAL; | 401 | return -EINVAL; |
| 395 | 402 | ||
| 396 | ls = dlm_find_lockspace_name(name, namelen); | 403 | if (!dlm_user_daemon_available()) { |
| 397 | if (ls) { | 404 | module_put(THIS_MODULE); |
| 398 | *lockspace = ls; | 405 | return -EUNATCH; |
| 406 | } | ||
| 407 | |||
| 408 | error = 0; | ||
| 409 | |||
| 410 | spin_lock(&lslist_lock); | ||
| 411 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 412 | WARN_ON(ls->ls_create_count <= 0); | ||
| 413 | if (ls->ls_namelen != namelen) | ||
| 414 | continue; | ||
| 415 | if (memcmp(ls->ls_name, name, namelen)) | ||
| 416 | continue; | ||
| 417 | if (flags & DLM_LSFL_NEWEXCL) { | ||
| 418 | error = -EEXIST; | ||
| 419 | break; | ||
| 420 | } | ||
| 421 | ls->ls_create_count++; | ||
| 399 | module_put(THIS_MODULE); | 422 | module_put(THIS_MODULE); |
| 400 | return -EEXIST; | 423 | error = 1; /* not an error, return 0 */ |
| 424 | break; | ||
| 401 | } | 425 | } |
| 426 | spin_unlock(&lslist_lock); | ||
| 427 | |||
| 428 | if (error < 0) | ||
| 429 | goto out; | ||
| 430 | if (error) | ||
| 431 | goto ret_zero; | ||
| 432 | |||
| 433 | error = -ENOMEM; | ||
| 402 | 434 | ||
| 403 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); | 435 | ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL); |
| 404 | if (!ls) | 436 | if (!ls) |
| @@ -408,6 +440,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 408 | ls->ls_lvblen = lvblen; | 440 | ls->ls_lvblen = lvblen; |
| 409 | ls->ls_count = 0; | 441 | ls->ls_count = 0; |
| 410 | ls->ls_flags = 0; | 442 | ls->ls_flags = 0; |
| 443 | ls->ls_scan_time = jiffies; | ||
| 411 | 444 | ||
| 412 | if (flags & DLM_LSFL_TIMEWARN) | 445 | if (flags & DLM_LSFL_TIMEWARN) |
| 413 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); | 446 | set_bit(LSFL_TIMEWARN, &ls->ls_flags); |
| @@ -418,8 +451,9 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 418 | ls->ls_allocation = GFP_KERNEL; | 451 | ls->ls_allocation = GFP_KERNEL; |
| 419 | 452 | ||
| 420 | /* ls_exflags are forced to match among nodes, and we don't | 453 | /* ls_exflags are forced to match among nodes, and we don't |
| 421 | need to require all nodes to have TIMEWARN or FS set */ | 454 | need to require all nodes to have some flags set */ |
| 422 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS)); | 455 | ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS | |
| 456 | DLM_LSFL_NEWEXCL)); | ||
| 423 | 457 | ||
| 424 | size = dlm_config.ci_rsbtbl_size; | 458 | size = dlm_config.ci_rsbtbl_size; |
| 425 | ls->ls_rsbtbl_size = size; | 459 | ls->ls_rsbtbl_size = size; |
| @@ -510,6 +544,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 510 | down_write(&ls->ls_in_recovery); | 544 | down_write(&ls->ls_in_recovery); |
| 511 | 545 | ||
| 512 | spin_lock(&lslist_lock); | 546 | spin_lock(&lslist_lock); |
| 547 | ls->ls_create_count = 1; | ||
| 513 | list_add(&ls->ls_list, &lslist); | 548 | list_add(&ls->ls_list, &lslist); |
| 514 | spin_unlock(&lslist_lock); | 549 | spin_unlock(&lslist_lock); |
| 515 | 550 | ||
| @@ -548,7 +583,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace, | |||
| 548 | dlm_create_debug_file(ls); | 583 | dlm_create_debug_file(ls); |
| 549 | 584 | ||
| 550 | log_debug(ls, "join complete"); | 585 | log_debug(ls, "join complete"); |
| 551 | 586 | ret_zero: | |
| 552 | *lockspace = ls; | 587 | *lockspace = ls; |
| 553 | return 0; | 588 | return 0; |
| 554 | 589 | ||
| @@ -635,13 +670,34 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
| 635 | struct dlm_lkb *lkb; | 670 | struct dlm_lkb *lkb; |
| 636 | struct dlm_rsb *rsb; | 671 | struct dlm_rsb *rsb; |
| 637 | struct list_head *head; | 672 | struct list_head *head; |
| 638 | int i; | 673 | int i, busy, rv; |
| 639 | int busy = lockspace_busy(ls); | 674 | |
| 675 | busy = lockspace_busy(ls); | ||
| 676 | |||
| 677 | spin_lock(&lslist_lock); | ||
| 678 | if (ls->ls_create_count == 1) { | ||
| 679 | if (busy > force) | ||
| 680 | rv = -EBUSY; | ||
| 681 | else { | ||
| 682 | /* remove_lockspace takes ls off lslist */ | ||
| 683 | ls->ls_create_count = 0; | ||
| 684 | rv = 0; | ||
| 685 | } | ||
| 686 | } else if (ls->ls_create_count > 1) { | ||
| 687 | rv = --ls->ls_create_count; | ||
| 688 | } else { | ||
| 689 | rv = -EINVAL; | ||
| 690 | } | ||
| 691 | spin_unlock(&lslist_lock); | ||
| 640 | 692 | ||
| 641 | if (busy > force) | 693 | if (rv) { |
| 642 | return -EBUSY; | 694 | log_debug(ls, "release_lockspace no remove %d", rv); |
| 695 | return rv; | ||
| 696 | } | ||
| 697 | |||
| 698 | dlm_device_deregister(ls); | ||
| 643 | 699 | ||
| 644 | if (force < 3) | 700 | if (force < 3 && dlm_user_daemon_available()) |
| 645 | do_uevent(ls, 0); | 701 | do_uevent(ls, 0); |
| 646 | 702 | ||
| 647 | dlm_recoverd_stop(ls); | 703 | dlm_recoverd_stop(ls); |
| @@ -720,15 +776,10 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
| 720 | dlm_clear_members(ls); | 776 | dlm_clear_members(ls); |
| 721 | dlm_clear_members_gone(ls); | 777 | dlm_clear_members_gone(ls); |
| 722 | kfree(ls->ls_node_array); | 778 | kfree(ls->ls_node_array); |
| 779 | log_debug(ls, "release_lockspace final free"); | ||
| 723 | kobject_put(&ls->ls_kobj); | 780 | kobject_put(&ls->ls_kobj); |
| 724 | /* The ls structure will be freed when the kobject is done with */ | 781 | /* The ls structure will be freed when the kobject is done with */ |
| 725 | 782 | ||
| 726 | mutex_lock(&ls_lock); | ||
| 727 | ls_count--; | ||
| 728 | if (!ls_count) | ||
| 729 | threads_stop(); | ||
| 730 | mutex_unlock(&ls_lock); | ||
| 731 | |||
| 732 | module_put(THIS_MODULE); | 783 | module_put(THIS_MODULE); |
| 733 | return 0; | 784 | return 0; |
| 734 | } | 785 | } |
| @@ -750,11 +801,38 @@ static int release_lockspace(struct dlm_ls *ls, int force) | |||
| 750 | int dlm_release_lockspace(void *lockspace, int force) | 801 | int dlm_release_lockspace(void *lockspace, int force) |
| 751 | { | 802 | { |
| 752 | struct dlm_ls *ls; | 803 | struct dlm_ls *ls; |
| 804 | int error; | ||
| 753 | 805 | ||
| 754 | ls = dlm_find_lockspace_local(lockspace); | 806 | ls = dlm_find_lockspace_local(lockspace); |
| 755 | if (!ls) | 807 | if (!ls) |
| 756 | return -EINVAL; | 808 | return -EINVAL; |
| 757 | dlm_put_lockspace(ls); | 809 | dlm_put_lockspace(ls); |
| 758 | return release_lockspace(ls, force); | 810 | |
| 811 | mutex_lock(&ls_lock); | ||
| 812 | error = release_lockspace(ls, force); | ||
| 813 | if (!error) | ||
| 814 | ls_count--; | ||
| 815 | else if (!ls_count) | ||
| 816 | threads_stop(); | ||
| 817 | mutex_unlock(&ls_lock); | ||
| 818 | |||
| 819 | return error; | ||
| 820 | } | ||
| 821 | |||
| 822 | void dlm_stop_lockspaces(void) | ||
| 823 | { | ||
| 824 | struct dlm_ls *ls; | ||
| 825 | |||
| 826 | restart: | ||
| 827 | spin_lock(&lslist_lock); | ||
| 828 | list_for_each_entry(ls, &lslist, ls_list) { | ||
| 829 | if (!test_bit(LSFL_RUNNING, &ls->ls_flags)) | ||
| 830 | continue; | ||
| 831 | spin_unlock(&lslist_lock); | ||
| 832 | log_error(ls, "no userland control daemon, stopping lockspace"); | ||
| 833 | dlm_ls_stop(ls); | ||
| 834 | goto restart; | ||
| 835 | } | ||
| 836 | spin_unlock(&lslist_lock); | ||
| 759 | } | 837 | } |
| 760 | 838 | ||
diff --git a/fs/dlm/lockspace.h b/fs/dlm/lockspace.h index 891eabbdd021..f879f87901f8 100644 --- a/fs/dlm/lockspace.h +++ b/fs/dlm/lockspace.h | |||
| @@ -20,6 +20,7 @@ struct dlm_ls *dlm_find_lockspace_global(uint32_t id); | |||
| 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); | 20 | struct dlm_ls *dlm_find_lockspace_local(void *id); |
| 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); | 21 | struct dlm_ls *dlm_find_lockspace_device(int minor); |
| 22 | void dlm_put_lockspace(struct dlm_ls *ls); | 22 | void dlm_put_lockspace(struct dlm_ls *ls); |
| 23 | void dlm_stop_lockspaces(void); | ||
| 23 | 24 | ||
| 24 | #endif /* __LOCKSPACE_DOT_H__ */ | 25 | #endif /* __LOCKSPACE_DOT_H__ */ |
| 25 | 26 | ||
diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 34f14a14fb4e..b3832c67194a 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2006-2007 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
| 3 | * | 3 | * |
| 4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -15,7 +15,6 @@ | |||
| 15 | #include <linux/poll.h> | 15 | #include <linux/poll.h> |
| 16 | #include <linux/signal.h> | 16 | #include <linux/signal.h> |
| 17 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 18 | #include <linux/smp_lock.h> | ||
| 19 | #include <linux/dlm.h> | 18 | #include <linux/dlm.h> |
| 20 | #include <linux/dlm_device.h> | 19 | #include <linux/dlm_device.h> |
| 21 | 20 | ||
| @@ -27,6 +26,8 @@ | |||
| 27 | 26 | ||
| 28 | static const char name_prefix[] = "dlm"; | 27 | static const char name_prefix[] = "dlm"; |
| 29 | static const struct file_operations device_fops; | 28 | static const struct file_operations device_fops; |
| 29 | static atomic_t dlm_monitor_opened; | ||
| 30 | static int dlm_monitor_unused = 1; | ||
| 30 | 31 | ||
| 31 | #ifdef CONFIG_COMPAT | 32 | #ifdef CONFIG_COMPAT |
| 32 | 33 | ||
| @@ -340,10 +341,15 @@ static int device_user_deadlock(struct dlm_user_proc *proc, | |||
| 340 | return error; | 341 | return error; |
| 341 | } | 342 | } |
| 342 | 343 | ||
| 343 | static int create_misc_device(struct dlm_ls *ls, char *name) | 344 | static int dlm_device_register(struct dlm_ls *ls, char *name) |
| 344 | { | 345 | { |
| 345 | int error, len; | 346 | int error, len; |
| 346 | 347 | ||
| 348 | /* The device is already registered. This happens when the | ||
| 349 | lockspace is created multiple times from userspace. */ | ||
| 350 | if (ls->ls_device.name) | ||
| 351 | return 0; | ||
| 352 | |||
| 347 | error = -ENOMEM; | 353 | error = -ENOMEM; |
| 348 | len = strlen(name) + strlen(name_prefix) + 2; | 354 | len = strlen(name) + strlen(name_prefix) + 2; |
| 349 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); | 355 | ls->ls_device.name = kzalloc(len, GFP_KERNEL); |
| @@ -363,6 +369,22 @@ fail: | |||
| 363 | return error; | 369 | return error; |
| 364 | } | 370 | } |
| 365 | 371 | ||
| 372 | int dlm_device_deregister(struct dlm_ls *ls) | ||
| 373 | { | ||
| 374 | int error; | ||
| 375 | |||
| 376 | /* The device is not registered. This happens when the lockspace | ||
| 377 | was never used from userspace, or when device_create_lockspace() | ||
| 378 | calls dlm_release_lockspace() after the register fails. */ | ||
| 379 | if (!ls->ls_device.name) | ||
| 380 | return 0; | ||
| 381 | |||
| 382 | error = misc_deregister(&ls->ls_device); | ||
| 383 | if (!error) | ||
| 384 | kfree(ls->ls_device.name); | ||
| 385 | return error; | ||
| 386 | } | ||
| 387 | |||
| 366 | static int device_user_purge(struct dlm_user_proc *proc, | 388 | static int device_user_purge(struct dlm_user_proc *proc, |
| 367 | struct dlm_purge_params *params) | 389 | struct dlm_purge_params *params) |
| 368 | { | 390 | { |
| @@ -397,7 +419,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params) | |||
| 397 | if (!ls) | 419 | if (!ls) |
| 398 | return -ENOENT; | 420 | return -ENOENT; |
| 399 | 421 | ||
| 400 | error = create_misc_device(ls, params->name); | 422 | error = dlm_device_register(ls, params->name); |
| 401 | dlm_put_lockspace(ls); | 423 | dlm_put_lockspace(ls); |
| 402 | 424 | ||
| 403 | if (error) | 425 | if (error) |
| @@ -421,31 +443,22 @@ static int device_remove_lockspace(struct dlm_lspace_params *params) | |||
| 421 | if (!ls) | 443 | if (!ls) |
| 422 | return -ENOENT; | 444 | return -ENOENT; |
| 423 | 445 | ||
| 424 | /* Deregister the misc device first, so we don't have | ||
| 425 | * a device that's not attached to a lockspace. If | ||
| 426 | * dlm_release_lockspace fails then we can recreate it | ||
| 427 | */ | ||
| 428 | error = misc_deregister(&ls->ls_device); | ||
| 429 | if (error) { | ||
| 430 | dlm_put_lockspace(ls); | ||
| 431 | goto out; | ||
| 432 | } | ||
| 433 | kfree(ls->ls_device.name); | ||
| 434 | |||
| 435 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) | 446 | if (params->flags & DLM_USER_LSFLG_FORCEFREE) |
| 436 | force = 2; | 447 | force = 2; |
| 437 | 448 | ||
| 438 | lockspace = ls->ls_local_handle; | 449 | lockspace = ls->ls_local_handle; |
| 450 | dlm_put_lockspace(ls); | ||
| 439 | 451 | ||
| 440 | /* dlm_release_lockspace waits for references to go to zero, | 452 | /* The final dlm_release_lockspace waits for references to go to |
| 441 | so all processes will need to close their device for the ls | 453 | zero, so all processes will need to close their device for the |
| 442 | before the release will procede */ | 454 | ls before the release will proceed. release also calls the |
| 455 | device_deregister above. Converting a positive return value | ||
| 456 | from release to zero means that userspace won't know when its | ||
| 457 | release was the final one, but it shouldn't need to know. */ | ||
| 443 | 458 | ||
| 444 | dlm_put_lockspace(ls); | ||
| 445 | error = dlm_release_lockspace(lockspace, force); | 459 | error = dlm_release_lockspace(lockspace, force); |
| 446 | if (error) | 460 | if (error > 0) |
| 447 | create_misc_device(ls, ls->ls_name); | 461 | error = 0; |
| 448 | out: | ||
| 449 | return error; | 462 | return error; |
| 450 | } | 463 | } |
| 451 | 464 | ||
| @@ -623,17 +636,13 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 623 | struct dlm_user_proc *proc; | 636 | struct dlm_user_proc *proc; |
| 624 | struct dlm_ls *ls; | 637 | struct dlm_ls *ls; |
| 625 | 638 | ||
| 626 | lock_kernel(); | ||
| 627 | ls = dlm_find_lockspace_device(iminor(inode)); | 639 | ls = dlm_find_lockspace_device(iminor(inode)); |
| 628 | if (!ls) { | 640 | if (!ls) |
| 629 | unlock_kernel(); | ||
| 630 | return -ENOENT; | 641 | return -ENOENT; |
| 631 | } | ||
| 632 | 642 | ||
| 633 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); | 643 | proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL); |
| 634 | if (!proc) { | 644 | if (!proc) { |
| 635 | dlm_put_lockspace(ls); | 645 | dlm_put_lockspace(ls); |
| 636 | unlock_kernel(); | ||
| 637 | return -ENOMEM; | 646 | return -ENOMEM; |
| 638 | } | 647 | } |
| 639 | 648 | ||
| @@ -645,7 +654,6 @@ static int device_open(struct inode *inode, struct file *file) | |||
| 645 | spin_lock_init(&proc->locks_spin); | 654 | spin_lock_init(&proc->locks_spin); |
| 646 | init_waitqueue_head(&proc->wait); | 655 | init_waitqueue_head(&proc->wait); |
| 647 | file->private_data = proc; | 656 | file->private_data = proc; |
| 648 | unlock_kernel(); | ||
| 649 | 657 | ||
| 650 | return 0; | 658 | return 0; |
| 651 | } | 659 | } |
| @@ -878,9 +886,28 @@ static unsigned int device_poll(struct file *file, poll_table *wait) | |||
| 878 | return 0; | 886 | return 0; |
| 879 | } | 887 | } |
| 880 | 888 | ||
| 889 | int dlm_user_daemon_available(void) | ||
| 890 | { | ||
| 891 | /* dlm_controld hasn't started (or, has started, but not | ||
| 892 | properly populated configfs) */ | ||
| 893 | |||
| 894 | if (!dlm_our_nodeid()) | ||
| 895 | return 0; | ||
| 896 | |||
| 897 | /* This is to deal with versions of dlm_controld that don't | ||
| 898 | know about the monitor device. We assume that if the | ||
| 899 | dlm_controld was started (above), but the monitor device | ||
| 900 | was never opened, that it's an old version. dlm_controld | ||
| 901 | should open the monitor device before populating configfs. */ | ||
| 902 | |||
| 903 | if (dlm_monitor_unused) | ||
| 904 | return 1; | ||
| 905 | |||
| 906 | return atomic_read(&dlm_monitor_opened) ? 1 : 0; | ||
| 907 | } | ||
| 908 | |||
| 881 | static int ctl_device_open(struct inode *inode, struct file *file) | 909 | static int ctl_device_open(struct inode *inode, struct file *file) |
| 882 | { | 910 | { |
| 883 | cycle_kernel_lock(); | ||
| 884 | file->private_data = NULL; | 911 | file->private_data = NULL; |
| 885 | return 0; | 912 | return 0; |
| 886 | } | 913 | } |
| @@ -890,6 +917,20 @@ static int ctl_device_close(struct inode *inode, struct file *file) | |||
| 890 | return 0; | 917 | return 0; |
| 891 | } | 918 | } |
| 892 | 919 | ||
| 920 | static int monitor_device_open(struct inode *inode, struct file *file) | ||
| 921 | { | ||
| 922 | atomic_inc(&dlm_monitor_opened); | ||
| 923 | dlm_monitor_unused = 0; | ||
| 924 | return 0; | ||
| 925 | } | ||
| 926 | |||
| 927 | static int monitor_device_close(struct inode *inode, struct file *file) | ||
| 928 | { | ||
| 929 | if (atomic_dec_and_test(&dlm_monitor_opened)) | ||
| 930 | dlm_stop_lockspaces(); | ||
| 931 | return 0; | ||
| 932 | } | ||
| 933 | |||
| 893 | static const struct file_operations device_fops = { | 934 | static const struct file_operations device_fops = { |
| 894 | .open = device_open, | 935 | .open = device_open, |
| 895 | .release = device_close, | 936 | .release = device_close, |
| @@ -913,19 +954,42 @@ static struct miscdevice ctl_device = { | |||
| 913 | .minor = MISC_DYNAMIC_MINOR, | 954 | .minor = MISC_DYNAMIC_MINOR, |
| 914 | }; | 955 | }; |
| 915 | 956 | ||
| 957 | static const struct file_operations monitor_device_fops = { | ||
| 958 | .open = monitor_device_open, | ||
| 959 | .release = monitor_device_close, | ||
| 960 | .owner = THIS_MODULE, | ||
| 961 | }; | ||
| 962 | |||
| 963 | static struct miscdevice monitor_device = { | ||
| 964 | .name = "dlm-monitor", | ||
| 965 | .fops = &monitor_device_fops, | ||
| 966 | .minor = MISC_DYNAMIC_MINOR, | ||
| 967 | }; | ||
| 968 | |||
| 916 | int __init dlm_user_init(void) | 969 | int __init dlm_user_init(void) |
| 917 | { | 970 | { |
| 918 | int error; | 971 | int error; |
| 919 | 972 | ||
| 973 | atomic_set(&dlm_monitor_opened, 0); | ||
| 974 | |||
| 920 | error = misc_register(&ctl_device); | 975 | error = misc_register(&ctl_device); |
| 921 | if (error) | 976 | if (error) { |
| 922 | log_print("misc_register failed for control device"); | 977 | log_print("misc_register failed for control device"); |
| 978 | goto out; | ||
| 979 | } | ||
| 923 | 980 | ||
| 981 | error = misc_register(&monitor_device); | ||
| 982 | if (error) { | ||
| 983 | log_print("misc_register failed for monitor device"); | ||
| 984 | misc_deregister(&ctl_device); | ||
| 985 | } | ||
| 986 | out: | ||
| 924 | return error; | 987 | return error; |
| 925 | } | 988 | } |
| 926 | 989 | ||
| 927 | void dlm_user_exit(void) | 990 | void dlm_user_exit(void) |
| 928 | { | 991 | { |
| 929 | misc_deregister(&ctl_device); | 992 | misc_deregister(&ctl_device); |
| 993 | misc_deregister(&monitor_device); | ||
| 930 | } | 994 | } |
| 931 | 995 | ||
diff --git a/fs/dlm/user.h b/fs/dlm/user.h index d38e9f3e4151..35eb6a13d616 100644 --- a/fs/dlm/user.h +++ b/fs/dlm/user.h | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | /* | 1 | /* |
| 2 | * Copyright (C) 2006 Red Hat, Inc. All rights reserved. | 2 | * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved. |
| 3 | * | 3 | * |
| 4 | * This copyrighted material is made available to anyone wishing to use, | 4 | * This copyrighted material is made available to anyone wishing to use, |
| 5 | * modify, copy, or redistribute it subject to the terms and conditions | 5 | * modify, copy, or redistribute it subject to the terms and conditions |
| @@ -12,5 +12,7 @@ | |||
| 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); | 12 | void dlm_user_add_ast(struct dlm_lkb *lkb, int type); |
| 13 | int dlm_user_init(void); | 13 | int dlm_user_init(void); |
| 14 | void dlm_user_exit(void); | 14 | void dlm_user_exit(void); |
| 15 | int dlm_device_deregister(struct dlm_ls *ls); | ||
| 16 | int dlm_user_daemon_available(void); | ||
| 15 | 17 | ||
| 16 | #endif | 18 | #endif |
diff --git a/fs/dquot.c b/fs/dquot.c index 8ec4d6cc7633..da30a27f2242 100644 --- a/fs/dquot.c +++ b/fs/dquot.c | |||
| @@ -9,8 +9,6 @@ | |||
| 9 | * implementation is based on one of the several variants of the LINUX | 9 | * implementation is based on one of the several variants of the LINUX |
| 10 | * inode-subsystem with added complexity of the diskquota system. | 10 | * inode-subsystem with added complexity of the diskquota system. |
| 11 | * | 11 | * |
| 12 | * Version: $Id: dquot.c,v 6.3 1996/11/17 18:35:34 mvw Exp mvw $ | ||
| 13 | * | ||
| 14 | * Author: Marco van Wieringen <mvw@planets.elm.net> | 12 | * Author: Marco van Wieringen <mvw@planets.elm.net> |
| 15 | * | 13 | * |
| 16 | * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 | 14 | * Fixes: Dmitry Gorodchanin <pgmdsg@ibi.com>, 11 Feb 96 |
| @@ -895,10 +893,9 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
| 895 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) | 893 | warntype == QUOTA_NL_BSOFTBELOW || !need_print_warning(dquot)) |
| 896 | return; | 894 | return; |
| 897 | 895 | ||
| 898 | mutex_lock(&tty_mutex); | ||
| 899 | tty = get_current_tty(); | 896 | tty = get_current_tty(); |
| 900 | if (!tty) | 897 | if (!tty) |
| 901 | goto out_lock; | 898 | return; |
| 902 | tty_write_message(tty, dquot->dq_sb->s_id); | 899 | tty_write_message(tty, dquot->dq_sb->s_id); |
| 903 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) | 900 | if (warntype == QUOTA_NL_ISOFTWARN || warntype == QUOTA_NL_BSOFTWARN) |
| 904 | tty_write_message(tty, ": warning, "); | 901 | tty_write_message(tty, ": warning, "); |
| @@ -926,8 +923,7 @@ static void print_warning(struct dquot *dquot, const int warntype) | |||
| 926 | break; | 923 | break; |
| 927 | } | 924 | } |
| 928 | tty_write_message(tty, msg); | 925 | tty_write_message(tty, msg); |
| 929 | out_lock: | 926 | tty_kref_put(tty); |
| 930 | mutex_unlock(&tty_mutex); | ||
| 931 | } | 927 | } |
| 932 | #endif | 928 | #endif |
| 933 | 929 | ||
diff --git a/fs/ecryptfs/Makefile b/fs/ecryptfs/Makefile index b4755a85996e..2cc9ee4ad2eb 100644 --- a/fs/ecryptfs/Makefile +++ b/fs/ecryptfs/Makefile | |||
| @@ -4,4 +4,4 @@ | |||
| 4 | 4 | ||
| 5 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o | 5 | obj-$(CONFIG_ECRYPT_FS) += ecryptfs.o |
| 6 | 6 | ||
| 7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o netlink.o miscdev.o kthread.o debug.o | 7 | ecryptfs-objs := dentry.o file.o inode.o main.o super.o mmap.o read_write.o crypto.o keystore.o messaging.o miscdev.o kthread.o debug.o |
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index b73fb752c5f8..3504cf9df358 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h | |||
| @@ -79,11 +79,6 @@ | |||
| 79 | #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 | 79 | #define ECRYPTFS_MAX_PKI_NAME_BYTES 16 |
| 80 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 | 80 | #define ECRYPTFS_DEFAULT_NUM_USERS 4 |
| 81 | #define ECRYPTFS_MAX_NUM_USERS 32768 | 81 | #define ECRYPTFS_MAX_NUM_USERS 32768 |
| 82 | #define ECRYPTFS_TRANSPORT_NETLINK 0 | ||
| 83 | #define ECRYPTFS_TRANSPORT_CONNECTOR 1 | ||
| 84 | #define ECRYPTFS_TRANSPORT_RELAYFS 2 | ||
| 85 | #define ECRYPTFS_TRANSPORT_MISCDEV 3 | ||
| 86 | #define ECRYPTFS_DEFAULT_TRANSPORT ECRYPTFS_TRANSPORT_MISCDEV | ||
| 87 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" | 82 | #define ECRYPTFS_XATTR_NAME "user.ecryptfs" |
| 88 | 83 | ||
| 89 | #define RFC2440_CIPHER_DES3_EDE 0x02 | 84 | #define RFC2440_CIPHER_DES3_EDE 0x02 |
| @@ -400,8 +395,6 @@ struct ecryptfs_msg_ctx { | |||
| 400 | struct mutex mux; | 395 | struct mutex mux; |
| 401 | }; | 396 | }; |
| 402 | 397 | ||
| 403 | extern unsigned int ecryptfs_transport; | ||
| 404 | |||
| 405 | struct ecryptfs_daemon; | 398 | struct ecryptfs_daemon; |
| 406 | 399 | ||
| 407 | struct ecryptfs_daemon { | 400 | struct ecryptfs_daemon { |
| @@ -627,31 +620,20 @@ int | |||
| 627 | ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, | 620 | ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value, |
| 628 | size_t size, int flags); | 621 | size_t size, int flags); |
| 629 | int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); | 622 | int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode); |
| 630 | int ecryptfs_process_helo(unsigned int transport, uid_t euid, | 623 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, |
| 631 | struct user_namespace *user_ns, struct pid *pid); | 624 | struct pid *pid); |
| 632 | int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, | 625 | int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns, |
| 633 | struct pid *pid); | 626 | struct pid *pid); |
| 634 | int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, | 627 | int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid, |
| 635 | struct user_namespace *user_ns, struct pid *pid, | 628 | struct user_namespace *user_ns, struct pid *pid, |
| 636 | u32 seq); | 629 | u32 seq); |
| 637 | int ecryptfs_send_message(unsigned int transport, char *data, int data_len, | 630 | int ecryptfs_send_message(char *data, int data_len, |
| 638 | struct ecryptfs_msg_ctx **msg_ctx); | 631 | struct ecryptfs_msg_ctx **msg_ctx); |
| 639 | int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, | 632 | int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx, |
| 640 | struct ecryptfs_message **emsg); | 633 | struct ecryptfs_message **emsg); |
| 641 | int ecryptfs_init_messaging(unsigned int transport); | 634 | int ecryptfs_init_messaging(void); |
| 642 | void ecryptfs_release_messaging(unsigned int transport); | 635 | void ecryptfs_release_messaging(void); |
| 643 | 636 | ||
| 644 | int ecryptfs_send_netlink(char *data, int data_len, | ||
| 645 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
| 646 | u16 msg_flags, struct pid *daemon_pid); | ||
| 647 | int ecryptfs_init_netlink(void); | ||
| 648 | void ecryptfs_release_netlink(void); | ||
| 649 | |||
| 650 | int ecryptfs_send_connector(char *data, int data_len, | ||
| 651 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
| 652 | u16 msg_flags, struct pid *daemon_pid); | ||
| 653 | int ecryptfs_init_connector(void); | ||
| 654 | void ecryptfs_release_connector(void); | ||
| 655 | void | 637 | void |
| 656 | ecryptfs_write_header_metadata(char *virt, | 638 | ecryptfs_write_header_metadata(char *virt, |
| 657 | struct ecryptfs_crypt_stat *crypt_stat, | 639 | struct ecryptfs_crypt_stat *crypt_stat, |
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index 9244d653743e..eb3dc4c7ac06 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c | |||
| @@ -71,12 +71,11 @@ struct ecryptfs_getdents_callback { | |||
| 71 | void *dirent; | 71 | void *dirent; |
| 72 | struct dentry *dentry; | 72 | struct dentry *dentry; |
| 73 | filldir_t filldir; | 73 | filldir_t filldir; |
| 74 | int err; | ||
| 75 | int filldir_called; | 74 | int filldir_called; |
| 76 | int entries_written; | 75 | int entries_written; |
| 77 | }; | 76 | }; |
| 78 | 77 | ||
| 79 | /* Inspired by generic filldir in fs/readir.c */ | 78 | /* Inspired by generic filldir in fs/readdir.c */ |
| 80 | static int | 79 | static int |
| 81 | ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, | 80 | ecryptfs_filldir(void *dirent, const char *name, int namelen, loff_t offset, |
| 82 | u64 ino, unsigned int d_type) | 81 | u64 ino, unsigned int d_type) |
| @@ -125,18 +124,18 @@ static int ecryptfs_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 125 | buf.dirent = dirent; | 124 | buf.dirent = dirent; |
| 126 | buf.dentry = file->f_path.dentry; | 125 | buf.dentry = file->f_path.dentry; |
| 127 | buf.filldir = filldir; | 126 | buf.filldir = filldir; |
| 128 | retry: | ||
| 129 | buf.filldir_called = 0; | 127 | buf.filldir_called = 0; |
| 130 | buf.entries_written = 0; | 128 | buf.entries_written = 0; |
| 131 | buf.err = 0; | ||
| 132 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); | 129 | rc = vfs_readdir(lower_file, ecryptfs_filldir, (void *)&buf); |
| 133 | if (buf.err) | ||
| 134 | rc = buf.err; | ||
| 135 | if (buf.filldir_called && !buf.entries_written) | ||
| 136 | goto retry; | ||
| 137 | file->f_pos = lower_file->f_pos; | 130 | file->f_pos = lower_file->f_pos; |
| 131 | if (rc < 0) | ||
| 132 | goto out; | ||
| 133 | if (buf.filldir_called && !buf.entries_written) | ||
| 134 | goto out; | ||
| 138 | if (rc >= 0) | 135 | if (rc >= 0) |
| 139 | fsstack_copy_attr_atime(inode, lower_file->f_path.dentry->d_inode); | 136 | fsstack_copy_attr_atime(inode, |
| 137 | lower_file->f_path.dentry->d_inode); | ||
| 138 | out: | ||
| 140 | return rc; | 139 | return rc; |
| 141 | } | 140 | } |
| 142 | 141 | ||
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index f5b76a331b9c..e22bc3961345 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c | |||
| @@ -234,8 +234,8 @@ parse_tag_65_packet(struct ecryptfs_session_key *session_key, u8 *cipher_code, | |||
| 234 | } | 234 | } |
| 235 | i += data_len; | 235 | i += data_len; |
| 236 | if (message_len < (i + m_size)) { | 236 | if (message_len < (i + m_size)) { |
| 237 | ecryptfs_printk(KERN_ERR, "The received netlink message is " | 237 | ecryptfs_printk(KERN_ERR, "The message received from ecryptfsd " |
| 238 | "shorter than expected\n"); | 238 | "is shorter than expected\n"); |
| 239 | rc = -EIO; | 239 | rc = -EIO; |
| 240 | goto out; | 240 | goto out; |
| 241 | } | 241 | } |
| @@ -438,8 +438,8 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
| 438 | struct ecryptfs_msg_ctx *msg_ctx; | 438 | struct ecryptfs_msg_ctx *msg_ctx; |
| 439 | struct ecryptfs_message *msg = NULL; | 439 | struct ecryptfs_message *msg = NULL; |
| 440 | char *auth_tok_sig; | 440 | char *auth_tok_sig; |
| 441 | char *netlink_message; | 441 | char *payload; |
| 442 | size_t netlink_message_length; | 442 | size_t payload_len; |
| 443 | int rc; | 443 | int rc; |
| 444 | 444 | ||
| 445 | rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); | 445 | rc = ecryptfs_get_auth_tok_sig(&auth_tok_sig, auth_tok); |
| @@ -449,15 +449,15 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
| 449 | goto out; | 449 | goto out; |
| 450 | } | 450 | } |
| 451 | rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), | 451 | rc = write_tag_64_packet(auth_tok_sig, &(auth_tok->session_key), |
| 452 | &netlink_message, &netlink_message_length); | 452 | &payload, &payload_len); |
| 453 | if (rc) { | 453 | if (rc) { |
| 454 | ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); | 454 | ecryptfs_printk(KERN_ERR, "Failed to write tag 64 packet\n"); |
| 455 | goto out; | 455 | goto out; |
| 456 | } | 456 | } |
| 457 | rc = ecryptfs_send_message(ecryptfs_transport, netlink_message, | 457 | rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); |
| 458 | netlink_message_length, &msg_ctx); | ||
| 459 | if (rc) { | 458 | if (rc) { |
| 460 | ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); | 459 | ecryptfs_printk(KERN_ERR, "Error sending message to " |
| 460 | "ecryptfsd\n"); | ||
| 461 | goto out; | 461 | goto out; |
| 462 | } | 462 | } |
| 463 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); | 463 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); |
| @@ -1333,23 +1333,22 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
| 1333 | struct ecryptfs_key_record *key_rec) | 1333 | struct ecryptfs_key_record *key_rec) |
| 1334 | { | 1334 | { |
| 1335 | struct ecryptfs_msg_ctx *msg_ctx = NULL; | 1335 | struct ecryptfs_msg_ctx *msg_ctx = NULL; |
| 1336 | char *netlink_payload; | 1336 | char *payload = NULL; |
| 1337 | size_t netlink_payload_length; | 1337 | size_t payload_len; |
| 1338 | struct ecryptfs_message *msg; | 1338 | struct ecryptfs_message *msg; |
| 1339 | int rc; | 1339 | int rc; |
| 1340 | 1340 | ||
| 1341 | rc = write_tag_66_packet(auth_tok->token.private_key.signature, | 1341 | rc = write_tag_66_packet(auth_tok->token.private_key.signature, |
| 1342 | ecryptfs_code_for_cipher_string(crypt_stat), | 1342 | ecryptfs_code_for_cipher_string(crypt_stat), |
| 1343 | crypt_stat, &netlink_payload, | 1343 | crypt_stat, &payload, &payload_len); |
| 1344 | &netlink_payload_length); | ||
| 1345 | if (rc) { | 1344 | if (rc) { |
| 1346 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); | 1345 | ecryptfs_printk(KERN_ERR, "Error generating tag 66 packet\n"); |
| 1347 | goto out; | 1346 | goto out; |
| 1348 | } | 1347 | } |
| 1349 | rc = ecryptfs_send_message(ecryptfs_transport, netlink_payload, | 1348 | rc = ecryptfs_send_message(payload, payload_len, &msg_ctx); |
| 1350 | netlink_payload_length, &msg_ctx); | ||
| 1351 | if (rc) { | 1349 | if (rc) { |
| 1352 | ecryptfs_printk(KERN_ERR, "Error sending netlink message\n"); | 1350 | ecryptfs_printk(KERN_ERR, "Error sending message to " |
| 1351 | "ecryptfsd\n"); | ||
| 1353 | goto out; | 1352 | goto out; |
| 1354 | } | 1353 | } |
| 1355 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); | 1354 | rc = ecryptfs_wait_for_response(msg_ctx, &msg); |
| @@ -1364,8 +1363,7 @@ pki_encrypt_session_key(struct ecryptfs_auth_tok *auth_tok, | |||
| 1364 | ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); | 1363 | ecryptfs_printk(KERN_ERR, "Error parsing tag 67 packet\n"); |
| 1365 | kfree(msg); | 1364 | kfree(msg); |
| 1366 | out: | 1365 | out: |
| 1367 | if (netlink_payload) | 1366 | kfree(payload); |
| 1368 | kfree(netlink_payload); | ||
| 1369 | return rc; | 1367 | return rc; |
| 1370 | } | 1368 | } |
| 1371 | /** | 1369 | /** |
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c index 448dfd597b5f..046e027a4cb1 100644 --- a/fs/ecryptfs/main.c +++ b/fs/ecryptfs/main.c | |||
| @@ -30,7 +30,6 @@ | |||
| 30 | #include <linux/namei.h> | 30 | #include <linux/namei.h> |
| 31 | #include <linux/skbuff.h> | 31 | #include <linux/skbuff.h> |
| 32 | #include <linux/crypto.h> | 32 | #include <linux/crypto.h> |
| 33 | #include <linux/netlink.h> | ||
| 34 | #include <linux/mount.h> | 33 | #include <linux/mount.h> |
| 35 | #include <linux/pagemap.h> | 34 | #include <linux/pagemap.h> |
| 36 | #include <linux/key.h> | 35 | #include <linux/key.h> |
| @@ -49,8 +48,7 @@ MODULE_PARM_DESC(ecryptfs_verbosity, | |||
| 49 | "0, which is Quiet)"); | 48 | "0, which is Quiet)"); |
| 50 | 49 | ||
| 51 | /** | 50 | /** |
| 52 | * Module parameter that defines the number of netlink message buffer | 51 | * Module parameter that defines the number of message buffer elements |
| 53 | * elements | ||
| 54 | */ | 52 | */ |
| 55 | unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; | 53 | unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS; |
| 56 | 54 | ||
| @@ -60,9 +58,9 @@ MODULE_PARM_DESC(ecryptfs_message_buf_len, | |||
| 60 | 58 | ||
| 61 | /** | 59 | /** |
| 62 | * Module parameter that defines the maximum guaranteed amount of time to wait | 60 | * Module parameter that defines the maximum guaranteed amount of time to wait |
| 63 | * for a response through netlink. The actual sleep time will be, more than | 61 | * for a response from ecryptfsd. The actual sleep time will be, more than |
| 64 | * likely, a small amount greater than this specified value, but only less if | 62 | * likely, a small amount greater than this specified value, but only less if |
| 65 | * the netlink message successfully arrives. | 63 | * the message successfully arrives. |
| 66 | */ | 64 | */ |
| 67 | signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; | 65 | signed long ecryptfs_message_wait_timeout = ECRYPTFS_MAX_MSG_CTX_TTL / HZ; |
| 68 | 66 | ||
| @@ -83,8 +81,6 @@ module_param(ecryptfs_number_of_users, uint, 0); | |||
| 83 | MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " | 81 | MODULE_PARM_DESC(ecryptfs_number_of_users, "An estimate of the number of " |
| 84 | "concurrent users of eCryptfs"); | 82 | "concurrent users of eCryptfs"); |
| 85 | 83 | ||
| 86 | unsigned int ecryptfs_transport = ECRYPTFS_DEFAULT_TRANSPORT; | ||
| 87 | |||
| 88 | void __ecryptfs_printk(const char *fmt, ...) | 84 | void __ecryptfs_printk(const char *fmt, ...) |
| 89 | { | 85 | { |
| 90 | va_list args; | 86 | va_list args; |
| @@ -211,7 +207,7 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig, | |||
| 211 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, | 207 | ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata, |
| 212 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; | 208 | ecryptfs_opt_encrypted_view, ecryptfs_opt_err }; |
| 213 | 209 | ||
| 214 | static match_table_t tokens = { | 210 | static const match_table_t tokens = { |
| 215 | {ecryptfs_opt_sig, "sig=%s"}, | 211 | {ecryptfs_opt_sig, "sig=%s"}, |
| 216 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, | 212 | {ecryptfs_opt_ecryptfs_sig, "ecryptfs_sig=%s"}, |
| 217 | {ecryptfs_opt_cipher, "cipher=%s"}, | 213 | {ecryptfs_opt_cipher, "cipher=%s"}, |
| @@ -779,10 +775,11 @@ static int __init ecryptfs_init(void) | |||
| 779 | "rc = [%d]\n", __func__, rc); | 775 | "rc = [%d]\n", __func__, rc); |
| 780 | goto out_do_sysfs_unregistration; | 776 | goto out_do_sysfs_unregistration; |
| 781 | } | 777 | } |
| 782 | rc = ecryptfs_init_messaging(ecryptfs_transport); | 778 | rc = ecryptfs_init_messaging(); |
| 783 | if (rc) { | 779 | if (rc) { |
| 784 | printk(KERN_ERR "Failure occured while attempting to " | 780 | printk(KERN_ERR "Failure occured while attempting to " |
| 785 | "initialize the eCryptfs netlink socket\n"); | 781 | "initialize the communications channel to " |
| 782 | "ecryptfsd\n"); | ||
| 786 | goto out_destroy_kthread; | 783 | goto out_destroy_kthread; |
| 787 | } | 784 | } |
| 788 | rc = ecryptfs_init_crypto(); | 785 | rc = ecryptfs_init_crypto(); |
| @@ -797,7 +794,7 @@ static int __init ecryptfs_init(void) | |||
| 797 | 794 | ||
| 798 | goto out; | 795 | goto out; |
| 799 | out_release_messaging: | 796 | out_release_messaging: |
| 800 | ecryptfs_release_messaging(ecryptfs_transport); | 797 | ecryptfs_release_messaging(); |
| 801 | out_destroy_kthread: | 798 | out_destroy_kthread: |
| 802 | ecryptfs_destroy_kthread(); | 799 | ecryptfs_destroy_kthread(); |
| 803 | out_do_sysfs_unregistration: | 800 | out_do_sysfs_unregistration: |
| @@ -818,7 +815,7 @@ static void __exit ecryptfs_exit(void) | |||
| 818 | if (rc) | 815 | if (rc) |
| 819 | printk(KERN_ERR "Failure whilst attempting to destroy crypto; " | 816 | printk(KERN_ERR "Failure whilst attempting to destroy crypto; " |
| 820 | "rc = [%d]\n", rc); | 817 | "rc = [%d]\n", rc); |
| 821 | ecryptfs_release_messaging(ecryptfs_transport); | 818 | ecryptfs_release_messaging(); |
| 822 | ecryptfs_destroy_kthread(); | 819 | ecryptfs_destroy_kthread(); |
| 823 | do_sysfs_unregistration(); | 820 | do_sysfs_unregistration(); |
| 824 | unregister_filesystem(&ecryptfs_fs_type); | 821 | unregister_filesystem(&ecryptfs_fs_type); |
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c index 1b5c20058acb..c6983978a31e 100644 --- a/fs/ecryptfs/messaging.c +++ b/fs/ecryptfs/messaging.c | |||
| @@ -134,12 +134,11 @@ out: | |||
| 134 | } | 134 | } |
| 135 | 135 | ||
| 136 | static int | 136 | static int |
| 137 | ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | 137 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, |
| 138 | u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx); | 138 | struct ecryptfs_msg_ctx **msg_ctx); |
| 139 | 139 | ||
| 140 | /** | 140 | /** |
| 141 | * ecryptfs_send_raw_message | 141 | * ecryptfs_send_raw_message |
| 142 | * @transport: Transport type | ||
| 143 | * @msg_type: Message type | 142 | * @msg_type: Message type |
| 144 | * @daemon: Daemon struct for recipient of message | 143 | * @daemon: Daemon struct for recipient of message |
| 145 | * | 144 | * |
| @@ -150,38 +149,25 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | |||
| 150 | * | 149 | * |
| 151 | * Returns zero on success; non-zero otherwise | 150 | * Returns zero on success; non-zero otherwise |
| 152 | */ | 151 | */ |
| 153 | static int ecryptfs_send_raw_message(unsigned int transport, u8 msg_type, | 152 | static int ecryptfs_send_raw_message(u8 msg_type, |
| 154 | struct ecryptfs_daemon *daemon) | 153 | struct ecryptfs_daemon *daemon) |
| 155 | { | 154 | { |
| 156 | struct ecryptfs_msg_ctx *msg_ctx; | 155 | struct ecryptfs_msg_ctx *msg_ctx; |
| 157 | int rc; | 156 | int rc; |
| 158 | 157 | ||
| 159 | switch(transport) { | 158 | rc = ecryptfs_send_message_locked(NULL, 0, msg_type, &msg_ctx); |
| 160 | case ECRYPTFS_TRANSPORT_NETLINK: | 159 | if (rc) { |
| 161 | rc = ecryptfs_send_netlink(NULL, 0, NULL, msg_type, 0, | 160 | printk(KERN_ERR "%s: Error whilst attempting to send " |
| 162 | daemon->pid); | 161 | "message to ecryptfsd; rc = [%d]\n", __func__, rc); |
| 163 | break; | 162 | goto out; |
| 164 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
| 165 | rc = ecryptfs_send_message_locked(transport, NULL, 0, msg_type, | ||
| 166 | &msg_ctx); | ||
| 167 | if (rc) { | ||
| 168 | printk(KERN_ERR "%s: Error whilst attempting to send " | ||
| 169 | "message via procfs; rc = [%d]\n", __func__, rc); | ||
| 170 | goto out; | ||
| 171 | } | ||
| 172 | /* Raw messages are logically context-free (e.g., no | ||
| 173 | * reply is expected), so we set the state of the | ||
| 174 | * ecryptfs_msg_ctx object to indicate that it should | ||
| 175 | * be freed as soon as the transport sends out the message. */ | ||
| 176 | mutex_lock(&msg_ctx->mux); | ||
| 177 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
| 178 | mutex_unlock(&msg_ctx->mux); | ||
| 179 | break; | ||
| 180 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
| 181 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
| 182 | default: | ||
| 183 | rc = -ENOSYS; | ||
| 184 | } | 163 | } |
| 164 | /* Raw messages are logically context-free (e.g., no | ||
| 165 | * reply is expected), so we set the state of the | ||
| 166 | * ecryptfs_msg_ctx object to indicate that it should | ||
| 167 | * be freed as soon as the message is sent. */ | ||
| 168 | mutex_lock(&msg_ctx->mux); | ||
| 169 | msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_NO_REPLY; | ||
| 170 | mutex_unlock(&msg_ctx->mux); | ||
| 185 | out: | 171 | out: |
| 186 | return rc; | 172 | return rc; |
| 187 | } | 173 | } |
| @@ -227,7 +213,6 @@ out: | |||
| 227 | 213 | ||
| 228 | /** | 214 | /** |
| 229 | * ecryptfs_process_helo | 215 | * ecryptfs_process_helo |
| 230 | * @transport: The underlying transport (netlink, etc.) | ||
| 231 | * @euid: The user ID owner of the message | 216 | * @euid: The user ID owner of the message |
| 232 | * @user_ns: The namespace in which @euid applies | 217 | * @user_ns: The namespace in which @euid applies |
| 233 | * @pid: The process ID for the userspace program that sent the | 218 | * @pid: The process ID for the userspace program that sent the |
| @@ -239,8 +224,8 @@ out: | |||
| 239 | * Returns zero after adding a new daemon to the hash list; | 224 | * Returns zero after adding a new daemon to the hash list; |
| 240 | * non-zero otherwise. | 225 | * non-zero otherwise. |
| 241 | */ | 226 | */ |
| 242 | int ecryptfs_process_helo(unsigned int transport, uid_t euid, | 227 | int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns, |
| 243 | struct user_namespace *user_ns, struct pid *pid) | 228 | struct pid *pid) |
| 244 | { | 229 | { |
| 245 | struct ecryptfs_daemon *new_daemon; | 230 | struct ecryptfs_daemon *new_daemon; |
| 246 | struct ecryptfs_daemon *old_daemon; | 231 | struct ecryptfs_daemon *old_daemon; |
| @@ -252,8 +237,7 @@ int ecryptfs_process_helo(unsigned int transport, uid_t euid, | |||
| 252 | printk(KERN_WARNING "Received request from user [%d] " | 237 | printk(KERN_WARNING "Received request from user [%d] " |
| 253 | "to register daemon [0x%p]; unregistering daemon " | 238 | "to register daemon [0x%p]; unregistering daemon " |
| 254 | "[0x%p]\n", euid, pid, old_daemon->pid); | 239 | "[0x%p]\n", euid, pid, old_daemon->pid); |
| 255 | rc = ecryptfs_send_raw_message(transport, ECRYPTFS_MSG_QUIT, | 240 | rc = ecryptfs_send_raw_message(ECRYPTFS_MSG_QUIT, old_daemon); |
| 256 | old_daemon); | ||
| 257 | if (rc) | 241 | if (rc) |
| 258 | printk(KERN_WARNING "Failed to send QUIT " | 242 | printk(KERN_WARNING "Failed to send QUIT " |
| 259 | "message to daemon [0x%p]; rc = [%d]\n", | 243 | "message to daemon [0x%p]; rc = [%d]\n", |
| @@ -467,8 +451,6 @@ out: | |||
| 467 | 451 | ||
| 468 | /** | 452 | /** |
| 469 | * ecryptfs_send_message_locked | 453 | * ecryptfs_send_message_locked |
| 470 | * @transport: The transport over which to send the message (i.e., | ||
| 471 | * netlink) | ||
| 472 | * @data: The data to send | 454 | * @data: The data to send |
| 473 | * @data_len: The length of data | 455 | * @data_len: The length of data |
| 474 | * @msg_ctx: The message context allocated for the send | 456 | * @msg_ctx: The message context allocated for the send |
| @@ -478,8 +460,8 @@ out: | |||
| 478 | * Returns zero on success; non-zero otherwise | 460 | * Returns zero on success; non-zero otherwise |
| 479 | */ | 461 | */ |
| 480 | static int | 462 | static int |
| 481 | ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | 463 | ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type, |
| 482 | u8 msg_type, struct ecryptfs_msg_ctx **msg_ctx) | 464 | struct ecryptfs_msg_ctx **msg_ctx) |
| 483 | { | 465 | { |
| 484 | struct ecryptfs_daemon *daemon; | 466 | struct ecryptfs_daemon *daemon; |
| 485 | int rc; | 467 | int rc; |
| @@ -503,20 +485,8 @@ ecryptfs_send_message_locked(unsigned int transport, char *data, int data_len, | |||
| 503 | ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); | 485 | ecryptfs_msg_ctx_free_to_alloc(*msg_ctx); |
| 504 | mutex_unlock(&(*msg_ctx)->mux); | 486 | mutex_unlock(&(*msg_ctx)->mux); |
| 505 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); | 487 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); |
| 506 | switch (transport) { | 488 | rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, 0, |
| 507 | case ECRYPTFS_TRANSPORT_NETLINK: | 489 | daemon); |
| 508 | rc = ecryptfs_send_netlink(data, data_len, *msg_ctx, msg_type, | ||
| 509 | 0, daemon->pid); | ||
| 510 | break; | ||
| 511 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
| 512 | rc = ecryptfs_send_miscdev(data, data_len, *msg_ctx, msg_type, | ||
| 513 | 0, daemon); | ||
| 514 | break; | ||
| 515 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
| 516 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
| 517 | default: | ||
| 518 | rc = -ENOSYS; | ||
| 519 | } | ||
| 520 | if (rc) | 490 | if (rc) |
| 521 | printk(KERN_ERR "%s: Error attempting to send message to " | 491 | printk(KERN_ERR "%s: Error attempting to send message to " |
| 522 | "userspace daemon; rc = [%d]\n", __func__, rc); | 492 | "userspace daemon; rc = [%d]\n", __func__, rc); |
| @@ -526,8 +496,6 @@ out: | |||
| 526 | 496 | ||
| 527 | /** | 497 | /** |
| 528 | * ecryptfs_send_message | 498 | * ecryptfs_send_message |
| 529 | * @transport: The transport over which to send the message (i.e., | ||
| 530 | * netlink) | ||
| 531 | * @data: The data to send | 499 | * @data: The data to send |
| 532 | * @data_len: The length of data | 500 | * @data_len: The length of data |
| 533 | * @msg_ctx: The message context allocated for the send | 501 | * @msg_ctx: The message context allocated for the send |
| @@ -536,14 +504,14 @@ out: | |||
| 536 | * | 504 | * |
| 537 | * Returns zero on success; non-zero otherwise | 505 | * Returns zero on success; non-zero otherwise |
| 538 | */ | 506 | */ |
| 539 | int ecryptfs_send_message(unsigned int transport, char *data, int data_len, | 507 | int ecryptfs_send_message(char *data, int data_len, |
| 540 | struct ecryptfs_msg_ctx **msg_ctx) | 508 | struct ecryptfs_msg_ctx **msg_ctx) |
| 541 | { | 509 | { |
| 542 | int rc; | 510 | int rc; |
| 543 | 511 | ||
| 544 | mutex_lock(&ecryptfs_daemon_hash_mux); | 512 | mutex_lock(&ecryptfs_daemon_hash_mux); |
| 545 | rc = ecryptfs_send_message_locked(transport, data, data_len, | 513 | rc = ecryptfs_send_message_locked(data, data_len, ECRYPTFS_MSG_REQUEST, |
| 546 | ECRYPTFS_MSG_REQUEST, msg_ctx); | 514 | msg_ctx); |
| 547 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 515 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
| 548 | return rc; | 516 | return rc; |
| 549 | } | 517 | } |
| @@ -586,7 +554,7 @@ sleep: | |||
| 586 | return rc; | 554 | return rc; |
| 587 | } | 555 | } |
| 588 | 556 | ||
| 589 | int ecryptfs_init_messaging(unsigned int transport) | 557 | int ecryptfs_init_messaging(void) |
| 590 | { | 558 | { |
| 591 | int i; | 559 | int i; |
| 592 | int rc = 0; | 560 | int rc = 0; |
| @@ -639,27 +607,14 @@ int ecryptfs_init_messaging(unsigned int transport) | |||
| 639 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); | 607 | mutex_unlock(&ecryptfs_msg_ctx_arr[i].mux); |
| 640 | } | 608 | } |
| 641 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); | 609 | mutex_unlock(&ecryptfs_msg_ctx_lists_mux); |
| 642 | switch(transport) { | 610 | rc = ecryptfs_init_ecryptfs_miscdev(); |
| 643 | case ECRYPTFS_TRANSPORT_NETLINK: | 611 | if (rc) |
| 644 | rc = ecryptfs_init_netlink(); | 612 | ecryptfs_release_messaging(); |
| 645 | if (rc) | ||
| 646 | ecryptfs_release_messaging(transport); | ||
| 647 | break; | ||
| 648 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
| 649 | rc = ecryptfs_init_ecryptfs_miscdev(); | ||
| 650 | if (rc) | ||
| 651 | ecryptfs_release_messaging(transport); | ||
| 652 | break; | ||
| 653 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
| 654 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
| 655 | default: | ||
| 656 | rc = -ENOSYS; | ||
| 657 | } | ||
| 658 | out: | 613 | out: |
| 659 | return rc; | 614 | return rc; |
| 660 | } | 615 | } |
| 661 | 616 | ||
| 662 | void ecryptfs_release_messaging(unsigned int transport) | 617 | void ecryptfs_release_messaging(void) |
| 663 | { | 618 | { |
| 664 | if (ecryptfs_msg_ctx_arr) { | 619 | if (ecryptfs_msg_ctx_arr) { |
| 665 | int i; | 620 | int i; |
| @@ -698,17 +653,6 @@ void ecryptfs_release_messaging(unsigned int transport) | |||
| 698 | kfree(ecryptfs_daemon_hash); | 653 | kfree(ecryptfs_daemon_hash); |
| 699 | mutex_unlock(&ecryptfs_daemon_hash_mux); | 654 | mutex_unlock(&ecryptfs_daemon_hash_mux); |
| 700 | } | 655 | } |
| 701 | switch(transport) { | 656 | ecryptfs_destroy_ecryptfs_miscdev(); |
| 702 | case ECRYPTFS_TRANSPORT_NETLINK: | ||
| 703 | ecryptfs_release_netlink(); | ||
| 704 | break; | ||
| 705 | case ECRYPTFS_TRANSPORT_MISCDEV: | ||
| 706 | ecryptfs_destroy_ecryptfs_miscdev(); | ||
| 707 | break; | ||
| 708 | case ECRYPTFS_TRANSPORT_CONNECTOR: | ||
| 709 | case ECRYPTFS_TRANSPORT_RELAYFS: | ||
| 710 | default: | ||
| 711 | break; | ||
| 712 | } | ||
| 713 | return; | 657 | return; |
| 714 | } | 658 | } |
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c index 245c2dc02d5c..04d7b3fa1ac6 100644 --- a/fs/ecryptfs/mmap.c +++ b/fs/ecryptfs/mmap.c | |||
| @@ -265,22 +265,34 @@ out: | |||
| 265 | } | 265 | } |
| 266 | 266 | ||
| 267 | /** | 267 | /** |
| 268 | * ecryptfs_prepare_write | 268 | * ecryptfs_write_begin |
| 269 | * @file: The eCryptfs file | 269 | * @file: The eCryptfs file |
| 270 | * @page: The eCryptfs page | 270 | * @mapping: The eCryptfs object |
| 271 | * @from: The start byte from which we will write | 271 | * @pos: The file offset at which to start writing |
| 272 | * @to: The end byte to which we will write | 272 | * @len: Length of the write |
| 273 | * @flags: Various flags | ||
| 274 | * @pagep: Pointer to return the page | ||
| 275 | * @fsdata: Pointer to return fs data (unused) | ||
| 273 | * | 276 | * |
| 274 | * This function must zero any hole we create | 277 | * This function must zero any hole we create |
| 275 | * | 278 | * |
| 276 | * Returns zero on success; non-zero otherwise | 279 | * Returns zero on success; non-zero otherwise |
| 277 | */ | 280 | */ |
| 278 | static int ecryptfs_prepare_write(struct file *file, struct page *page, | 281 | static int ecryptfs_write_begin(struct file *file, |
| 279 | unsigned from, unsigned to) | 282 | struct address_space *mapping, |
| 283 | loff_t pos, unsigned len, unsigned flags, | ||
| 284 | struct page **pagep, void **fsdata) | ||
| 280 | { | 285 | { |
| 286 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; | ||
| 287 | struct page *page; | ||
| 281 | loff_t prev_page_end_size; | 288 | loff_t prev_page_end_size; |
| 282 | int rc = 0; | 289 | int rc = 0; |
| 283 | 290 | ||
| 291 | page = __grab_cache_page(mapping, index); | ||
| 292 | if (!page) | ||
| 293 | return -ENOMEM; | ||
| 294 | *pagep = page; | ||
| 295 | |||
| 284 | if (!PageUptodate(page)) { | 296 | if (!PageUptodate(page)) { |
| 285 | struct ecryptfs_crypt_stat *crypt_stat = | 297 | struct ecryptfs_crypt_stat *crypt_stat = |
| 286 | &ecryptfs_inode_to_private( | 298 | &ecryptfs_inode_to_private( |
| @@ -289,8 +301,7 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
| 289 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) | 301 | if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED) |
| 290 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { | 302 | || (crypt_stat->flags & ECRYPTFS_NEW_FILE)) { |
| 291 | rc = ecryptfs_read_lower_page_segment( | 303 | rc = ecryptfs_read_lower_page_segment( |
| 292 | page, page->index, 0, PAGE_CACHE_SIZE, | 304 | page, index, 0, PAGE_CACHE_SIZE, mapping->host); |
| 293 | page->mapping->host); | ||
| 294 | if (rc) { | 305 | if (rc) { |
| 295 | printk(KERN_ERR "%s: Error attemping to read " | 306 | printk(KERN_ERR "%s: Error attemping to read " |
| 296 | "lower page segment; rc = [%d]\n", | 307 | "lower page segment; rc = [%d]\n", |
| @@ -316,8 +327,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
| 316 | SetPageUptodate(page); | 327 | SetPageUptodate(page); |
| 317 | } else { | 328 | } else { |
| 318 | rc = ecryptfs_read_lower_page_segment( | 329 | rc = ecryptfs_read_lower_page_segment( |
| 319 | page, page->index, 0, PAGE_CACHE_SIZE, | 330 | page, index, 0, PAGE_CACHE_SIZE, |
| 320 | page->mapping->host); | 331 | mapping->host); |
| 321 | if (rc) { | 332 | if (rc) { |
| 322 | printk(KERN_ERR "%s: Error reading " | 333 | printk(KERN_ERR "%s: Error reading " |
| 323 | "page; rc = [%d]\n", | 334 | "page; rc = [%d]\n", |
| @@ -339,10 +350,10 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
| 339 | SetPageUptodate(page); | 350 | SetPageUptodate(page); |
| 340 | } | 351 | } |
| 341 | } | 352 | } |
| 342 | prev_page_end_size = ((loff_t)page->index << PAGE_CACHE_SHIFT); | 353 | prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT); |
| 343 | /* If creating a page or more of holes, zero them out via truncate. | 354 | /* If creating a page or more of holes, zero them out via truncate. |
| 344 | * Note, this will increase i_size. */ | 355 | * Note, this will increase i_size. */ |
| 345 | if (page->index != 0) { | 356 | if (index != 0) { |
| 346 | if (prev_page_end_size > i_size_read(page->mapping->host)) { | 357 | if (prev_page_end_size > i_size_read(page->mapping->host)) { |
| 347 | rc = ecryptfs_truncate(file->f_path.dentry, | 358 | rc = ecryptfs_truncate(file->f_path.dentry, |
| 348 | prev_page_end_size); | 359 | prev_page_end_size); |
| @@ -357,8 +368,8 @@ static int ecryptfs_prepare_write(struct file *file, struct page *page, | |||
| 357 | } | 368 | } |
| 358 | /* Writing to a new page, and creating a small hole from start | 369 | /* Writing to a new page, and creating a small hole from start |
| 359 | * of page? Zero it out. */ | 370 | * of page? Zero it out. */ |
| 360 | if ((i_size_read(page->mapping->host) == prev_page_end_size) | 371 | if ((i_size_read(mapping->host) == prev_page_end_size) |
| 361 | && (from != 0)) | 372 | && (pos != 0)) |
| 362 | zero_user(page, 0, PAGE_CACHE_SIZE); | 373 | zero_user(page, 0, PAGE_CACHE_SIZE); |
| 363 | out: | 374 | out: |
| 364 | return rc; | 375 | return rc; |
| @@ -445,21 +456,28 @@ int ecryptfs_write_inode_size_to_metadata(struct inode *ecryptfs_inode) | |||
| 445 | } | 456 | } |
| 446 | 457 | ||
| 447 | /** | 458 | /** |
| 448 | * ecryptfs_commit_write | 459 | * ecryptfs_write_end |
| 449 | * @file: The eCryptfs file object | 460 | * @file: The eCryptfs file object |
| 461 | * @mapping: The eCryptfs object | ||
| 462 | * @pos: The file position | ||
| 463 | * @len: The length of the data (unused) | ||
| 464 | * @copied: The amount of data copied | ||
| 450 | * @page: The eCryptfs page | 465 | * @page: The eCryptfs page |
| 451 | * @from: Ignored (we rotate the page IV on each write) | 466 | * @fsdata: The fsdata (unused) |
| 452 | * @to: Ignored | ||
| 453 | * | 467 | * |
| 454 | * This is where we encrypt the data and pass the encrypted data to | 468 | * This is where we encrypt the data and pass the encrypted data to |
| 455 | * the lower filesystem. In OpenPGP-compatible mode, we operate on | 469 | * the lower filesystem. In OpenPGP-compatible mode, we operate on |
| 456 | * entire underlying packets. | 470 | * entire underlying packets. |
| 457 | */ | 471 | */ |
| 458 | static int ecryptfs_commit_write(struct file *file, struct page *page, | 472 | static int ecryptfs_write_end(struct file *file, |
| 459 | unsigned from, unsigned to) | 473 | struct address_space *mapping, |
| 474 | loff_t pos, unsigned len, unsigned copied, | ||
| 475 | struct page *page, void *fsdata) | ||
| 460 | { | 476 | { |
| 461 | loff_t pos; | 477 | pgoff_t index = pos >> PAGE_CACHE_SHIFT; |
| 462 | struct inode *ecryptfs_inode = page->mapping->host; | 478 | unsigned from = pos & (PAGE_CACHE_SIZE - 1); |
| 479 | unsigned to = from + copied; | ||
| 480 | struct inode *ecryptfs_inode = mapping->host; | ||
| 463 | struct ecryptfs_crypt_stat *crypt_stat = | 481 | struct ecryptfs_crypt_stat *crypt_stat = |
| 464 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; | 482 | &ecryptfs_inode_to_private(file->f_path.dentry->d_inode)->crypt_stat; |
| 465 | int rc; | 483 | int rc; |
| @@ -471,25 +489,22 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, | |||
| 471 | } else | 489 | } else |
| 472 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); | 490 | ecryptfs_printk(KERN_DEBUG, "Not a new file\n"); |
| 473 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" | 491 | ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page" |
| 474 | "(page w/ index = [0x%.16x], to = [%d])\n", page->index, | 492 | "(page w/ index = [0x%.16x], to = [%d])\n", index, to); |
| 475 | to); | ||
| 476 | /* Fills in zeros if 'to' goes beyond inode size */ | 493 | /* Fills in zeros if 'to' goes beyond inode size */ |
| 477 | rc = fill_zeros_to_end_of_page(page, to); | 494 | rc = fill_zeros_to_end_of_page(page, to); |
| 478 | if (rc) { | 495 | if (rc) { |
| 479 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " | 496 | ecryptfs_printk(KERN_WARNING, "Error attempting to fill " |
| 480 | "zeros in page with index = [0x%.16x]\n", | 497 | "zeros in page with index = [0x%.16x]\n", index); |
| 481 | page->index); | ||
| 482 | goto out; | 498 | goto out; |
| 483 | } | 499 | } |
| 484 | rc = ecryptfs_encrypt_page(page); | 500 | rc = ecryptfs_encrypt_page(page); |
| 485 | if (rc) { | 501 | if (rc) { |
| 486 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " | 502 | ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper " |
| 487 | "index [0x%.16x])\n", page->index); | 503 | "index [0x%.16x])\n", index); |
| 488 | goto out; | 504 | goto out; |
| 489 | } | 505 | } |
| 490 | pos = (((loff_t)page->index) << PAGE_CACHE_SHIFT) + to; | 506 | if (pos + copied > i_size_read(ecryptfs_inode)) { |
| 491 | if (pos > i_size_read(ecryptfs_inode)) { | 507 | i_size_write(ecryptfs_inode, pos + copied); |
| 492 | i_size_write(ecryptfs_inode, pos); | ||
| 493 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " | 508 | ecryptfs_printk(KERN_DEBUG, "Expanded file size to " |
| 494 | "[0x%.16x]\n", i_size_read(ecryptfs_inode)); | 509 | "[0x%.16x]\n", i_size_read(ecryptfs_inode)); |
| 495 | } | 510 | } |
| @@ -497,7 +512,11 @@ static int ecryptfs_commit_write(struct file *file, struct page *page, | |||
| 497 | if (rc) | 512 | if (rc) |
| 498 | printk(KERN_ERR "Error writing inode size to metadata; " | 513 | printk(KERN_ERR "Error writing inode size to metadata; " |
| 499 | "rc = [%d]\n", rc); | 514 | "rc = [%d]\n", rc); |
| 515 | else | ||
| 516 | rc = copied; | ||
| 500 | out: | 517 | out: |
| 518 | unlock_page(page); | ||
| 519 | page_cache_release(page); | ||
| 501 | return rc; | 520 | return rc; |
| 502 | } | 521 | } |
| 503 | 522 | ||
| @@ -518,7 +537,7 @@ static sector_t ecryptfs_bmap(struct address_space *mapping, sector_t block) | |||
| 518 | struct address_space_operations ecryptfs_aops = { | 537 | struct address_space_operations ecryptfs_aops = { |
| 519 | .writepage = ecryptfs_writepage, | 538 | .writepage = ecryptfs_writepage, |
| 520 | .readpage = ecryptfs_readpage, | 539 | .readpage = ecryptfs_readpage, |
| 521 | .prepare_write = ecryptfs_prepare_write, | 540 | .write_begin = ecryptfs_write_begin, |
| 522 | .commit_write = ecryptfs_commit_write, | 541 | .write_end = ecryptfs_write_end, |
| 523 | .bmap = ecryptfs_bmap, | 542 | .bmap = ecryptfs_bmap, |
| 524 | }; | 543 | }; |
diff --git a/fs/ecryptfs/netlink.c b/fs/ecryptfs/netlink.c deleted file mode 100644 index e0abad62b395..000000000000 --- a/fs/ecryptfs/netlink.c +++ /dev/null | |||
| @@ -1,249 +0,0 @@ | |||
| 1 | /** | ||
| 2 | * eCryptfs: Linux filesystem encryption layer | ||
| 3 | * | ||
| 4 | * Copyright (C) 2004-2006 International Business Machines Corp. | ||
| 5 | * Author(s): Michael A. Halcrow <mhalcrow@us.ibm.com> | ||
| 6 | * Tyler Hicks <tyhicks@ou.edu> | ||
| 7 | * | ||
| 8 | * This program is free software; you can redistribute it and/or | ||
| 9 | * modify it under the terms of the GNU General Public License version | ||
| 10 | * 2 as published by the Free Software Foundation. | ||
| 11 | * | ||
| 12 | * This program is distributed in the hope that it will be useful, but | ||
| 13 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 15 | * General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA | ||
| 20 | * 02111-1307, USA. | ||
| 21 | */ | ||
| 22 | |||
| 23 | #include <net/sock.h> | ||
| 24 | #include <linux/hash.h> | ||
| 25 | #include <linux/random.h> | ||
| 26 | #include "ecryptfs_kernel.h" | ||
| 27 | |||
| 28 | static struct sock *ecryptfs_nl_sock; | ||
| 29 | |||
| 30 | /** | ||
| 31 | * ecryptfs_send_netlink | ||
| 32 | * @data: The data to include as the payload | ||
| 33 | * @data_len: The byte count of the data | ||
| 34 | * @msg_ctx: The netlink context that will be used to handle the | ||
| 35 | * response message | ||
| 36 | * @msg_type: The type of netlink message to send | ||
| 37 | * @msg_flags: The flags to include in the netlink header | ||
| 38 | * @daemon_pid: The process id of the daemon to send the message to | ||
| 39 | * | ||
| 40 | * Sends the data to the specified daemon pid and uses the netlink | ||
| 41 | * context element to store the data needed for validation upon | ||
| 42 | * receiving the response. The data and the netlink context can be | ||
| 43 | * null if just sending a netlink header is sufficient. Returns zero | ||
| 44 | * upon sending the message; non-zero upon error. | ||
| 45 | */ | ||
| 46 | int ecryptfs_send_netlink(char *data, int data_len, | ||
| 47 | struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type, | ||
| 48 | u16 msg_flags, struct pid *daemon_pid) | ||
| 49 | { | ||
| 50 | struct sk_buff *skb; | ||
| 51 | struct nlmsghdr *nlh; | ||
| 52 | struct ecryptfs_message *msg; | ||
| 53 | size_t payload_len; | ||
| 54 | int rc; | ||
| 55 | |||
| 56 | payload_len = ((data && data_len) ? (sizeof(*msg) + data_len) : 0); | ||
| 57 | skb = alloc_skb(NLMSG_SPACE(payload_len), GFP_KERNEL); | ||
| 58 | if (!skb) { | ||
| 59 | rc = -ENOMEM; | ||
| 60 | ecryptfs_printk(KERN_ERR, "Failed to allocate socket buffer\n"); | ||
| 61 | goto out; | ||
| 62 | } | ||
| 63 | nlh = NLMSG_PUT(skb, pid_nr(daemon_pid), msg_ctx ? msg_ctx->counter : 0, | ||
| 64 | msg_type, payload_len); | ||
| 65 | nlh->nlmsg_flags = msg_flags; | ||
| 66 | if (msg_ctx && payload_len) { | ||
| 67 | msg = (struct ecryptfs_message *)NLMSG_DATA(nlh); | ||
| 68 | msg->index = msg_ctx->index; | ||
| 69 | msg->data_len = data_len; | ||
| 70 | memcpy(msg->data, data, data_len); | ||
| 71 | } | ||
| 72 | rc = netlink_unicast(ecryptfs_nl_sock, skb, pid_nr(daemon_pid), 0); | ||
| 73 | if (rc < 0) { | ||
| 74 | ecryptfs_printk(KERN_ERR, "Failed to send eCryptfs netlink " | ||
| 75 | "message; rc = [%d]\n", rc); | ||
| 76 | goto out; | ||
| 77 | } | ||
| 78 | rc = 0; | ||
| 79 | goto out; | ||
| 80 | nlmsg_failure: | ||
| 81 | rc = -EMSGSIZE; | ||
| 82 | kfree_skb(skb); | ||
| 83 | out: | ||
| 84 | return rc; | ||
| 85 | } | ||
| 86 | |||
| 87 | /** | ||
| 88 | * ecryptfs_process_nl_reponse | ||
| 89 | * @skb: The socket buffer containing the netlink message of state | ||
| 90 | * RESPONSE | ||
| 91 | * | ||
| 92 | * Processes a response message after sending a operation request to | ||
| 93 | * userspace. Attempts to assign the msg to a netlink context element | ||
| 94 | * at the index specified in the msg. The sk_buff and nlmsghdr must | ||
| 95 | * be validated before this function. Returns zero upon delivery to | ||
| 96 | * desired context element; non-zero upon delivery failure or error. | ||
| 97 | */ | ||
| 98 | static int ecryptfs_process_nl_response(struct sk_buff *skb) | ||
| 99 | { | ||
| 100 | struct nlmsghdr *nlh = nlmsg_hdr(skb); | ||
| 101 | struct ecryptfs_message *msg = NLMSG_DATA(nlh); | ||
| 102 | struct pid *pid; | ||
| 103 | int rc; | ||
| 104 | |||
| 105 | if (skb->len - NLMSG_HDRLEN - sizeof(*msg) != msg->data_len) { | ||
| 106 | rc = -EINVAL; | ||
| 107 | ecryptfs_printk(KERN_ERR, "Received netlink message with " | ||
| 108 | "incorrectly specified data length\n"); | ||
| 109 | goto out; | ||
| 110 | } | ||
| 111 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
| 112 | rc = ecryptfs_process_response(msg, NETLINK_CREDS(skb)->uid, NULL, | ||
| 113 | pid, nlh->nlmsg_seq); | ||
| 114 | put_pid(pid); | ||
| 115 | if (rc) | ||
| 116 | printk(KERN_ERR | ||
| 117 | "Error processing response message; rc = [%d]\n", rc); | ||
| 118 | out: | ||
| 119 | return rc; | ||
| 120 | } | ||
| 121 | |||
| 122 | /** | ||
| 123 | * ecryptfs_process_nl_helo | ||
| 124 | * @skb: The socket buffer containing the nlmsghdr in HELO state | ||
| 125 | * | ||
| 126 | * Gets uid and pid of the skb and adds the values to the daemon id | ||
| 127 | * hash. Returns zero after adding a new daemon id to the hash list; | ||
| 128 | * non-zero otherwise. | ||
| 129 | */ | ||
| 130 | static int ecryptfs_process_nl_helo(struct sk_buff *skb) | ||
| 131 | { | ||
| 132 | struct pid *pid; | ||
| 133 | int rc; | ||
| 134 | |||
| 135 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
| 136 | rc = ecryptfs_process_helo(ECRYPTFS_TRANSPORT_NETLINK, | ||
| 137 | NETLINK_CREDS(skb)->uid, NULL, pid); | ||
| 138 | put_pid(pid); | ||
| 139 | if (rc) | ||
| 140 | printk(KERN_WARNING "Error processing HELO; rc = [%d]\n", rc); | ||
| 141 | return rc; | ||
| 142 | } | ||
| 143 | |||
| 144 | /** | ||
| 145 | * ecryptfs_process_nl_quit | ||
| 146 | * @skb: The socket buffer containing the nlmsghdr in QUIT state | ||
| 147 | * | ||
| 148 | * Gets uid and pid of the skb and deletes the corresponding daemon | ||
| 149 | * id, if it is the registered that is requesting the | ||
| 150 | * deletion. Returns zero after deleting the desired daemon id; | ||
| 151 | * non-zero otherwise. | ||
| 152 | */ | ||
| 153 | static int ecryptfs_process_nl_quit(struct sk_buff *skb) | ||
| 154 | { | ||
| 155 | struct pid *pid; | ||
| 156 | int rc; | ||
| 157 | |||
| 158 | pid = find_get_pid(NETLINK_CREDS(skb)->pid); | ||
| 159 | rc = ecryptfs_process_quit(NETLINK_CREDS(skb)->uid, NULL, pid); | ||
| 160 | put_pid(pid); | ||
| 161 | if (rc) | ||
| 162 | printk(KERN_WARNING | ||
| 163 | "Error processing QUIT message; rc = [%d]\n", rc); | ||
| 164 | return rc; | ||
| 165 | } | ||
| 166 | |||
| 167 | /** | ||
| 168 | * ecryptfs_receive_nl_message | ||
| 169 | * | ||
| 170 | * Callback function called by netlink system when a message arrives. | ||
| 171 | * If the message looks to be valid, then an attempt is made to assign | ||
| 172 | * it to its desired netlink context element and wake up the process | ||
| 173 | * that is waiting for a response. | ||
| 174 | */ | ||
| 175 | static void ecryptfs_receive_nl_message(struct sk_buff *skb) | ||
| 176 | { | ||
| 177 | struct nlmsghdr *nlh; | ||
| 178 | |||
| 179 | nlh = nlmsg_hdr(skb); | ||
| 180 | if (!NLMSG_OK(nlh, skb->len)) { | ||
| 181 | ecryptfs_printk(KERN_ERR, "Received corrupt netlink " | ||
| 182 | "message\n"); | ||
| 183 | goto free; | ||
| 184 | } | ||
| 185 | switch (nlh->nlmsg_type) { | ||
| 186 | case ECRYPTFS_MSG_RESPONSE: | ||
| 187 | if (ecryptfs_process_nl_response(skb)) { | ||
| 188 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
| 189 | "deliver netlink response to " | ||
| 190 | "requesting operation\n"); | ||
| 191 | } | ||
| 192 | break; | ||
| 193 | case ECRYPTFS_MSG_HELO: | ||
| 194 | if (ecryptfs_process_nl_helo(skb)) { | ||
| 195 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
| 196 | "fulfill HELO request\n"); | ||
| 197 | } | ||
| 198 | break; | ||
| 199 | case ECRYPTFS_MSG_QUIT: | ||
| 200 | if (ecryptfs_process_nl_quit(skb)) { | ||
| 201 | ecryptfs_printk(KERN_WARNING, "Failed to " | ||
| 202 | "fulfill QUIT request\n"); | ||
| 203 | } | ||
| 204 | break; | ||
| 205 | default: | ||
| 206 | ecryptfs_printk(KERN_WARNING, "Dropping netlink " | ||
| 207 | "message of unrecognized type [%d]\n", | ||
| 208 | nlh->nlmsg_type); | ||
| 209 | break; | ||
| 210 | } | ||
| 211 | free: | ||
| 212 | kfree_skb(skb); | ||
| 213 | } | ||
| 214 | |||
| 215 | /** | ||
| 216 | * ecryptfs_init_netlink | ||
| 217 | * | ||
| 218 | * Initializes the daemon id hash list, netlink context array, and | ||
| 219 | * necessary locks. Returns zero upon success; non-zero upon error. | ||
| 220 | */ | ||
| 221 | int ecryptfs_init_netlink(void) | ||
| 222 | { | ||
| 223 | int rc; | ||
| 224 | |||
| 225 | ecryptfs_nl_sock = netlink_kernel_create(&init_net, NETLINK_ECRYPTFS, 0, | ||
| 226 | ecryptfs_receive_nl_message, | ||
| 227 | NULL, THIS_MODULE); | ||
| 228 | if (!ecryptfs_nl_sock) { | ||
| 229 | rc = -EIO; | ||
| 230 | ecryptfs_printk(KERN_ERR, "Failed to create netlink socket\n"); | ||
| 231 | goto out; | ||
| 232 | } | ||
| 233 | ecryptfs_nl_sock->sk_sndtimeo = ECRYPTFS_DEFAULT_SEND_TIMEOUT; | ||
| 234 | rc = 0; | ||
| 235 | out: | ||
| 236 | return rc; | ||
| 237 | } | ||
| 238 | |||
| 239 | /** | ||
| 240 | * ecryptfs_release_netlink | ||
| 241 | * | ||
| 242 | * Frees all memory used by the netlink context array and releases the | ||
| 243 | * netlink socket. | ||
| 244 | */ | ||
| 245 | void ecryptfs_release_netlink(void) | ||
| 246 | { | ||
| 247 | netlink_kernel_release(ecryptfs_nl_sock); | ||
| 248 | ecryptfs_nl_sock = NULL; | ||
| 249 | } | ||
diff --git a/fs/efs/super.c b/fs/efs/super.c index 567b134fa1f1..73b19cfc91fc 100644 --- a/fs/efs/super.c +++ b/fs/efs/super.c | |||
| @@ -341,8 +341,6 @@ static int efs_statfs(struct dentry *dentry, struct kstatfs *buf) { | |||
| 341 | sb->inode_blocks * | 341 | sb->inode_blocks * |
| 342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); | 342 | (EFS_BLOCKSIZE / sizeof(struct efs_dinode)); |
| 343 | buf->f_ffree = sb->inode_free; /* free inodes */ | 343 | buf->f_ffree = sb->inode_free; /* free inodes */ |
| 344 | buf->f_fsid.val[0] = (sb->fs_magic >> 16) & 0xffff; /* fs ID */ | ||
| 345 | buf->f_fsid.val[1] = sb->fs_magic & 0xffff; /* fs ID */ | ||
| 346 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ | 344 | buf->f_namelen = EFS_MAXNAMELEN; /* max filename length */ |
| 347 | 345 | ||
| 348 | return 0; | 346 | return 0; |
diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 7cc0eb756b55..99368bda0261 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c | |||
| @@ -927,14 +927,11 @@ errxit: | |||
| 927 | /* | 927 | /* |
| 928 | * During the time we spent in the loop above, some other events | 928 | * During the time we spent in the loop above, some other events |
| 929 | * might have been queued by the poll callback. We re-insert them | 929 | * might have been queued by the poll callback. We re-insert them |
| 930 | * here (in case they are not already queued, or they're one-shot). | 930 | * inside the main ready-list here. |
| 931 | */ | 931 | */ |
| 932 | for (nepi = ep->ovflist; (epi = nepi) != NULL; | 932 | for (nepi = ep->ovflist; (epi = nepi) != NULL; |
| 933 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) { | 933 | nepi = epi->next, epi->next = EP_UNACTIVE_PTR) |
| 934 | if (!ep_is_linked(&epi->rdllink) && | 934 | list_add_tail(&epi->rdllink, &ep->rdllist); |
| 935 | (epi->event.events & ~EP_PRIVATE_BITS)) | ||
| 936 | list_add_tail(&epi->rdllink, &ep->rdllist); | ||
| 937 | } | ||
| 938 | /* | 935 | /* |
| 939 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after | 936 | * We need to set back ep->ovflist to EP_UNACTIVE_PTR, so that after |
| 940 | * releasing the lock, events will be queued in the normal way inside | 937 | * releasing the lock, events will be queued in the normal way inside |
| @@ -50,15 +50,12 @@ | |||
| 50 | #include <linux/cn_proc.h> | 50 | #include <linux/cn_proc.h> |
| 51 | #include <linux/audit.h> | 51 | #include <linux/audit.h> |
| 52 | #include <linux/tracehook.h> | 52 | #include <linux/tracehook.h> |
| 53 | #include <linux/kmod.h> | ||
| 53 | 54 | ||
| 54 | #include <asm/uaccess.h> | 55 | #include <asm/uaccess.h> |
| 55 | #include <asm/mmu_context.h> | 56 | #include <asm/mmu_context.h> |
| 56 | #include <asm/tlb.h> | 57 | #include <asm/tlb.h> |
| 57 | 58 | ||
| 58 | #ifdef CONFIG_KMOD | ||
| 59 | #include <linux/kmod.h> | ||
| 60 | #endif | ||
| 61 | |||
| 62 | #ifdef __alpha__ | 59 | #ifdef __alpha__ |
| 63 | /* for /sbin/loader handling in search_binary_handler() */ | 60 | /* for /sbin/loader handling in search_binary_handler() */ |
| 64 | #include <linux/a.out.h> | 61 | #include <linux/a.out.h> |
| @@ -391,7 +388,7 @@ static int count(char __user * __user * argv, int max) | |||
| 391 | if (!p) | 388 | if (!p) |
| 392 | break; | 389 | break; |
| 393 | argv++; | 390 | argv++; |
| 394 | if(++i > max) | 391 | if (i++ >= max) |
| 395 | return -E2BIG; | 392 | return -E2BIG; |
| 396 | cond_resched(); | 393 | cond_resched(); |
| 397 | } | 394 | } |
| @@ -825,8 +822,6 @@ static int de_thread(struct task_struct *tsk) | |||
| 825 | schedule(); | 822 | schedule(); |
| 826 | } | 823 | } |
| 827 | 824 | ||
| 828 | if (unlikely(task_child_reaper(tsk) == leader)) | ||
| 829 | task_active_pid_ns(tsk)->child_reaper = tsk; | ||
| 830 | /* | 825 | /* |
| 831 | * The only record we have of the real-time age of a | 826 | * The only record we have of the real-time age of a |
| 832 | * process, regardless of execs it's done, is start_time. | 827 | * process, regardless of execs it's done, is start_time. |
| @@ -1189,7 +1184,7 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 1189 | return retval; | 1184 | return retval; |
| 1190 | 1185 | ||
| 1191 | /* Remember if the application is TASO. */ | 1186 | /* Remember if the application is TASO. */ |
| 1192 | bprm->sh_bang = eh->ah.entry < 0x100000000UL; | 1187 | bprm->taso = eh->ah.entry < 0x100000000UL; |
| 1193 | 1188 | ||
| 1194 | bprm->file = file; | 1189 | bprm->file = file; |
| 1195 | bprm->loader = loader; | 1190 | bprm->loader = loader; |
| @@ -1247,8 +1242,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) | |||
| 1247 | read_unlock(&binfmt_lock); | 1242 | read_unlock(&binfmt_lock); |
| 1248 | if (retval != -ENOEXEC || bprm->mm == NULL) { | 1243 | if (retval != -ENOEXEC || bprm->mm == NULL) { |
| 1249 | break; | 1244 | break; |
| 1250 | #ifdef CONFIG_KMOD | 1245 | #ifdef CONFIG_MODULES |
| 1251 | }else{ | 1246 | } else { |
| 1252 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) | 1247 | #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) |
| 1253 | if (printable(bprm->buf[0]) && | 1248 | if (printable(bprm->buf[0]) && |
| 1254 | printable(bprm->buf[1]) && | 1249 | printable(bprm->buf[1]) && |
| @@ -1391,7 +1386,7 @@ EXPORT_SYMBOL(set_binfmt); | |||
| 1391 | * name into corename, which must have space for at least | 1386 | * name into corename, which must have space for at least |
| 1392 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. | 1387 | * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. |
| 1393 | */ | 1388 | */ |
| 1394 | static int format_corename(char *corename, int nr_threads, long signr) | 1389 | static int format_corename(char *corename, long signr) |
| 1395 | { | 1390 | { |
| 1396 | const char *pat_ptr = core_pattern; | 1391 | const char *pat_ptr = core_pattern; |
| 1397 | int ispipe = (*pat_ptr == '|'); | 1392 | int ispipe = (*pat_ptr == '|'); |
| @@ -1498,8 +1493,7 @@ static int format_corename(char *corename, int nr_threads, long signr) | |||
| 1498 | * If core_pattern does not include a %p (as is the default) | 1493 | * If core_pattern does not include a %p (as is the default) |
| 1499 | * and core_uses_pid is set, then .%pid will be appended to | 1494 | * and core_uses_pid is set, then .%pid will be appended to |
| 1500 | * the filename. Do not do this for piped commands. */ | 1495 | * the filename. Do not do this for piped commands. */ |
| 1501 | if (!ispipe && !pid_in_pattern | 1496 | if (!ispipe && !pid_in_pattern && core_uses_pid) { |
| 1502 | && (core_uses_pid || nr_threads)) { | ||
| 1503 | rc = snprintf(out_ptr, out_end - out_ptr, | 1497 | rc = snprintf(out_ptr, out_end - out_ptr, |
| 1504 | ".%d", task_tgid_vnr(current)); | 1498 | ".%d", task_tgid_vnr(current)); |
| 1505 | if (rc > out_end - out_ptr) | 1499 | if (rc > out_end - out_ptr) |
| @@ -1762,7 +1756,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) | |||
| 1762 | * uses lock_kernel() | 1756 | * uses lock_kernel() |
| 1763 | */ | 1757 | */ |
| 1764 | lock_kernel(); | 1758 | lock_kernel(); |
| 1765 | ispipe = format_corename(corename, retval, signr); | 1759 | ispipe = format_corename(corename, signr); |
| 1766 | unlock_kernel(); | 1760 | unlock_kernel(); |
| 1767 | /* | 1761 | /* |
| 1768 | * Don't bother to check the RLIMIT_CORE value if core_pattern points | 1762 | * Don't bother to check the RLIMIT_CORE value if core_pattern points |
diff --git a/fs/ext2/Kconfig b/fs/ext2/Kconfig new file mode 100644 index 000000000000..14a6780fd034 --- /dev/null +++ b/fs/ext2/Kconfig | |||
| @@ -0,0 +1,55 @@ | |||
| 1 | config EXT2_FS | ||
| 2 | tristate "Second extended fs support" | ||
| 3 | help | ||
| 4 | Ext2 is a standard Linux file system for hard disks. | ||
| 5 | |||
| 6 | To compile this file system support as a module, choose M here: the | ||
| 7 | module will be called ext2. | ||
| 8 | |||
| 9 | If unsure, say Y. | ||
| 10 | |||
| 11 | config EXT2_FS_XATTR | ||
| 12 | bool "Ext2 extended attributes" | ||
| 13 | depends on EXT2_FS | ||
| 14 | help | ||
| 15 | Extended attributes are name:value pairs associated with inodes by | ||
| 16 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 17 | <http://acl.bestbits.at/> for details). | ||
| 18 | |||
| 19 | If unsure, say N. | ||
| 20 | |||
| 21 | config EXT2_FS_POSIX_ACL | ||
| 22 | bool "Ext2 POSIX Access Control Lists" | ||
| 23 | depends on EXT2_FS_XATTR | ||
| 24 | select FS_POSIX_ACL | ||
| 25 | help | ||
| 26 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 27 | groups beyond the owner/group/world scheme. | ||
| 28 | |||
| 29 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 30 | Linux website <http://acl.bestbits.at/>. | ||
| 31 | |||
| 32 | If you don't know what Access Control Lists are, say N | ||
| 33 | |||
| 34 | config EXT2_FS_SECURITY | ||
| 35 | bool "Ext2 Security Labels" | ||
| 36 | depends on EXT2_FS_XATTR | ||
| 37 | help | ||
| 38 | Security labels support alternative access control models | ||
| 39 | implemented by security modules like SELinux. This option | ||
| 40 | enables an extended attribute handler for file security | ||
| 41 | labels in the ext2 filesystem. | ||
| 42 | |||
| 43 | If you are not using a security module that requires using | ||
| 44 | extended attributes for file security labels, say N. | ||
| 45 | |||
| 46 | config EXT2_FS_XIP | ||
| 47 | bool "Ext2 execute in place support" | ||
| 48 | depends on EXT2_FS && MMU | ||
| 49 | help | ||
| 50 | Execute in place can be used on memory-backed block devices. If you | ||
| 51 | enable this option, you can select to mount block devices which are | ||
| 52 | capable of this feature without using the page cache. | ||
| 53 | |||
| 54 | If you do not use a block device that is capable of using this, | ||
| 55 | or if unsure, say N. | ||
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 10bb02c3f25c..6dac7ba2d22d 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c | |||
| @@ -1295,6 +1295,7 @@ retry_alloc: | |||
| 1295 | * turn off reservation for this allocation | 1295 | * turn off reservation for this allocation |
| 1296 | */ | 1296 | */ |
| 1297 | if (my_rsv && (free_blocks < windowsz) | 1297 | if (my_rsv && (free_blocks < windowsz) |
| 1298 | && (free_blocks > 0) | ||
| 1298 | && (rsv_is_empty(&my_rsv->rsv_window))) | 1299 | && (rsv_is_empty(&my_rsv->rsv_window))) |
| 1299 | my_rsv = NULL; | 1300 | my_rsv = NULL; |
| 1300 | 1301 | ||
| @@ -1332,7 +1333,7 @@ retry_alloc: | |||
| 1332 | * free blocks is less than half of the reservation | 1333 | * free blocks is less than half of the reservation |
| 1333 | * window size. | 1334 | * window size. |
| 1334 | */ | 1335 | */ |
| 1335 | if (free_blocks <= (windowsz/2)) | 1336 | if (my_rsv && (free_blocks <= (windowsz/2))) |
| 1336 | continue; | 1337 | continue; |
| 1337 | 1338 | ||
| 1338 | brelse(bitmap_bh); | 1339 | brelse(bitmap_bh); |
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index a78c6b4af060..11a49ce84392 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c | |||
| @@ -103,7 +103,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len) | |||
| 103 | return err; | 103 | return err; |
| 104 | } | 104 | } |
| 105 | 105 | ||
| 106 | static void ext2_check_page(struct page *page) | 106 | static void ext2_check_page(struct page *page, int quiet) |
| 107 | { | 107 | { |
| 108 | struct inode *dir = page->mapping->host; | 108 | struct inode *dir = page->mapping->host; |
| 109 | struct super_block *sb = dir->i_sb; | 109 | struct super_block *sb = dir->i_sb; |
| @@ -146,10 +146,10 @@ out: | |||
| 146 | /* Too bad, we had an error */ | 146 | /* Too bad, we had an error */ |
| 147 | 147 | ||
| 148 | Ebadsize: | 148 | Ebadsize: |
| 149 | ext2_error(sb, "ext2_check_page", | 149 | if (!quiet) |
| 150 | "size of directory #%lu is not a multiple of chunk size", | 150 | ext2_error(sb, __func__, |
| 151 | dir->i_ino | 151 | "size of directory #%lu is not a multiple " |
| 152 | ); | 152 | "of chunk size", dir->i_ino); |
| 153 | goto fail; | 153 | goto fail; |
| 154 | Eshort: | 154 | Eshort: |
| 155 | error = "rec_len is smaller than minimal"; | 155 | error = "rec_len is smaller than minimal"; |
| @@ -166,32 +166,36 @@ Espan: | |||
| 166 | Einumber: | 166 | Einumber: |
| 167 | error = "inode out of bounds"; | 167 | error = "inode out of bounds"; |
| 168 | bad_entry: | 168 | bad_entry: |
| 169 | ext2_error (sb, "ext2_check_page", "bad entry in directory #%lu: %s - " | 169 | if (!quiet) |
| 170 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 170 | ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - " |
| 171 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, | 171 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
| 172 | (unsigned long) le32_to_cpu(p->inode), | 172 | dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs, |
| 173 | rec_len, p->name_len); | 173 | (unsigned long) le32_to_cpu(p->inode), |
| 174 | rec_len, p->name_len); | ||
| 174 | goto fail; | 175 | goto fail; |
| 175 | Eend: | 176 | Eend: |
| 176 | p = (ext2_dirent *)(kaddr + offs); | 177 | if (!quiet) { |
| 177 | ext2_error (sb, "ext2_check_page", | 178 | p = (ext2_dirent *)(kaddr + offs); |
| 178 | "entry in directory #%lu spans the page boundary" | 179 | ext2_error(sb, "ext2_check_page", |
| 179 | "offset=%lu, inode=%lu", | 180 | "entry in directory #%lu spans the page boundary" |
| 180 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, | 181 | "offset=%lu, inode=%lu", |
| 181 | (unsigned long) le32_to_cpu(p->inode)); | 182 | dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs, |
| 183 | (unsigned long) le32_to_cpu(p->inode)); | ||
| 184 | } | ||
| 182 | fail: | 185 | fail: |
| 183 | SetPageChecked(page); | 186 | SetPageChecked(page); |
| 184 | SetPageError(page); | 187 | SetPageError(page); |
| 185 | } | 188 | } |
| 186 | 189 | ||
| 187 | static struct page * ext2_get_page(struct inode *dir, unsigned long n) | 190 | static struct page * ext2_get_page(struct inode *dir, unsigned long n, |
| 191 | int quiet) | ||
| 188 | { | 192 | { |
| 189 | struct address_space *mapping = dir->i_mapping; | 193 | struct address_space *mapping = dir->i_mapping; |
| 190 | struct page *page = read_mapping_page(mapping, n, NULL); | 194 | struct page *page = read_mapping_page(mapping, n, NULL); |
| 191 | if (!IS_ERR(page)) { | 195 | if (!IS_ERR(page)) { |
| 192 | kmap(page); | 196 | kmap(page); |
| 193 | if (!PageChecked(page)) | 197 | if (!PageChecked(page)) |
| 194 | ext2_check_page(page); | 198 | ext2_check_page(page, quiet); |
| 195 | if (PageError(page)) | 199 | if (PageError(page)) |
| 196 | goto fail; | 200 | goto fail; |
| 197 | } | 201 | } |
| @@ -292,7 +296,7 @@ ext2_readdir (struct file * filp, void * dirent, filldir_t filldir) | |||
| 292 | for ( ; n < npages; n++, offset = 0) { | 296 | for ( ; n < npages; n++, offset = 0) { |
| 293 | char *kaddr, *limit; | 297 | char *kaddr, *limit; |
| 294 | ext2_dirent *de; | 298 | ext2_dirent *de; |
| 295 | struct page *page = ext2_get_page(inode, n); | 299 | struct page *page = ext2_get_page(inode, n, 0); |
| 296 | 300 | ||
| 297 | if (IS_ERR(page)) { | 301 | if (IS_ERR(page)) { |
| 298 | ext2_error(sb, __func__, | 302 | ext2_error(sb, __func__, |
| @@ -361,6 +365,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
| 361 | struct page *page = NULL; | 365 | struct page *page = NULL; |
| 362 | struct ext2_inode_info *ei = EXT2_I(dir); | 366 | struct ext2_inode_info *ei = EXT2_I(dir); |
| 363 | ext2_dirent * de; | 367 | ext2_dirent * de; |
| 368 | int dir_has_error = 0; | ||
| 364 | 369 | ||
| 365 | if (npages == 0) | 370 | if (npages == 0) |
| 366 | goto out; | 371 | goto out; |
| @@ -374,7 +379,7 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
| 374 | n = start; | 379 | n = start; |
| 375 | do { | 380 | do { |
| 376 | char *kaddr; | 381 | char *kaddr; |
| 377 | page = ext2_get_page(dir, n); | 382 | page = ext2_get_page(dir, n, dir_has_error); |
| 378 | if (!IS_ERR(page)) { | 383 | if (!IS_ERR(page)) { |
| 379 | kaddr = page_address(page); | 384 | kaddr = page_address(page); |
| 380 | de = (ext2_dirent *) kaddr; | 385 | de = (ext2_dirent *) kaddr; |
| @@ -391,7 +396,9 @@ struct ext2_dir_entry_2 * ext2_find_entry (struct inode * dir, | |||
| 391 | de = ext2_next_entry(de); | 396 | de = ext2_next_entry(de); |
| 392 | } | 397 | } |
| 393 | ext2_put_page(page); | 398 | ext2_put_page(page); |
| 394 | } | 399 | } else |
| 400 | dir_has_error = 1; | ||
| 401 | |||
| 395 | if (++n >= npages) | 402 | if (++n >= npages) |
| 396 | n = 0; | 403 | n = 0; |
| 397 | /* next page is past the blocks we've got */ | 404 | /* next page is past the blocks we've got */ |
| @@ -414,7 +421,7 @@ found: | |||
| 414 | 421 | ||
| 415 | struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) | 422 | struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p) |
| 416 | { | 423 | { |
| 417 | struct page *page = ext2_get_page(dir, 0); | 424 | struct page *page = ext2_get_page(dir, 0, 0); |
| 418 | ext2_dirent *de = NULL; | 425 | ext2_dirent *de = NULL; |
| 419 | 426 | ||
| 420 | if (!IS_ERR(page)) { | 427 | if (!IS_ERR(page)) { |
| @@ -487,7 +494,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode) | |||
| 487 | for (n = 0; n <= npages; n++) { | 494 | for (n = 0; n <= npages; n++) { |
| 488 | char *dir_end; | 495 | char *dir_end; |
| 489 | 496 | ||
| 490 | page = ext2_get_page(dir, n); | 497 | page = ext2_get_page(dir, n, 0); |
| 491 | err = PTR_ERR(page); | 498 | err = PTR_ERR(page); |
| 492 | if (IS_ERR(page)) | 499 | if (IS_ERR(page)) |
| 493 | goto out; | 500 | goto out; |
| @@ -655,14 +662,17 @@ int ext2_empty_dir (struct inode * inode) | |||
| 655 | { | 662 | { |
| 656 | struct page *page = NULL; | 663 | struct page *page = NULL; |
| 657 | unsigned long i, npages = dir_pages(inode); | 664 | unsigned long i, npages = dir_pages(inode); |
| 665 | int dir_has_error = 0; | ||
| 658 | 666 | ||
| 659 | for (i = 0; i < npages; i++) { | 667 | for (i = 0; i < npages; i++) { |
| 660 | char *kaddr; | 668 | char *kaddr; |
| 661 | ext2_dirent * de; | 669 | ext2_dirent * de; |
| 662 | page = ext2_get_page(inode, i); | 670 | page = ext2_get_page(inode, i, dir_has_error); |
| 663 | 671 | ||
| 664 | if (IS_ERR(page)) | 672 | if (IS_ERR(page)) { |
| 673 | dir_has_error = 1; | ||
| 665 | continue; | 674 | continue; |
| 675 | } | ||
| 666 | 676 | ||
| 667 | kaddr = page_address(page); | 677 | kaddr = page_address(page); |
| 668 | de = (ext2_dirent *)kaddr; | 678 | de = (ext2_dirent *)kaddr; |
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index 47d88da2d33b..bae998c1e44e 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h | |||
| @@ -133,6 +133,8 @@ extern void ext2_truncate (struct inode *); | |||
| 133 | extern int ext2_setattr (struct dentry *, struct iattr *); | 133 | extern int ext2_setattr (struct dentry *, struct iattr *); |
| 134 | extern void ext2_set_inode_flags(struct inode *inode); | 134 | extern void ext2_set_inode_flags(struct inode *inode); |
| 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); | 135 | extern void ext2_get_inode_flags(struct ext2_inode_info *); |
| 136 | extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 137 | u64 start, u64 len); | ||
| 136 | int __ext2_write_begin(struct file *file, struct address_space *mapping, | 138 | int __ext2_write_begin(struct file *file, struct address_space *mapping, |
| 137 | loff_t pos, unsigned len, unsigned flags, | 139 | loff_t pos, unsigned len, unsigned flags, |
| 138 | struct page **pagep, void **fsdata); | 140 | struct page **pagep, void **fsdata); |
diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 5f2fa9c36293..45ed07122182 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c | |||
| @@ -86,4 +86,5 @@ const struct inode_operations ext2_file_inode_operations = { | |||
| 86 | #endif | 86 | #endif |
| 87 | .setattr = ext2_setattr, | 87 | .setattr = ext2_setattr, |
| 88 | .permission = ext2_permission, | 88 | .permission = ext2_permission, |
| 89 | .fiemap = ext2_fiemap, | ||
| 89 | }; | 90 | }; |
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 991d6dfeb51f..7658b33e2653 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c | |||
| @@ -31,6 +31,7 @@ | |||
| 31 | #include <linux/writeback.h> | 31 | #include <linux/writeback.h> |
| 32 | #include <linux/buffer_head.h> | 32 | #include <linux/buffer_head.h> |
| 33 | #include <linux/mpage.h> | 33 | #include <linux/mpage.h> |
| 34 | #include <linux/fiemap.h> | ||
| 34 | #include "ext2.h" | 35 | #include "ext2.h" |
| 35 | #include "acl.h" | 36 | #include "acl.h" |
| 36 | #include "xip.h" | 37 | #include "xip.h" |
| @@ -704,6 +705,13 @@ int ext2_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_ | |||
| 704 | 705 | ||
| 705 | } | 706 | } |
| 706 | 707 | ||
| 708 | int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 709 | u64 start, u64 len) | ||
| 710 | { | ||
| 711 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
| 712 | ext2_get_block); | ||
| 713 | } | ||
| 714 | |||
| 707 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) | 715 | static int ext2_writepage(struct page *page, struct writeback_control *wbc) |
| 708 | { | 716 | { |
| 709 | return block_write_full_page(page, ext2_get_block, wbc); | 717 | return block_write_full_page(page, ext2_get_block, wbc); |
diff --git a/fs/ext2/super.c b/fs/ext2/super.c index fd88c7b43e66..647cd888ac87 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c | |||
| @@ -393,7 +393,7 @@ enum { | |||
| 393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation | 393 | Opt_usrquota, Opt_grpquota, Opt_reservation, Opt_noreservation |
| 394 | }; | 394 | }; |
| 395 | 395 | ||
| 396 | static match_table_t tokens = { | 396 | static const match_table_t tokens = { |
| 397 | {Opt_bsd_df, "bsddf"}, | 397 | {Opt_bsd_df, "bsddf"}, |
| 398 | {Opt_minix_df, "minixdf"}, | 398 | {Opt_minix_df, "minixdf"}, |
| 399 | {Opt_grpid, "grpid"}, | 399 | {Opt_grpid, "grpid"}, |
diff --git a/fs/ext3/Kconfig b/fs/ext3/Kconfig new file mode 100644 index 000000000000..8e0cfe44b0fc --- /dev/null +++ b/fs/ext3/Kconfig | |||
| @@ -0,0 +1,67 @@ | |||
| 1 | config EXT3_FS | ||
| 2 | tristate "Ext3 journalling file system support" | ||
| 3 | select JBD | ||
| 4 | help | ||
| 5 | This is the journalling version of the Second extended file system | ||
| 6 | (often called ext3), the de facto standard Linux file system | ||
| 7 | (method to organize files on a storage device) for hard disks. | ||
| 8 | |||
| 9 | The journalling code included in this driver means you do not have | ||
| 10 | to run e2fsck (file system checker) on your file systems after a | ||
| 11 | crash. The journal keeps track of any changes that were being made | ||
| 12 | at the time the system crashed, and can ensure that your file system | ||
| 13 | is consistent without the need for a lengthy check. | ||
| 14 | |||
| 15 | Other than adding the journal to the file system, the on-disk format | ||
| 16 | of ext3 is identical to ext2. It is possible to freely switch | ||
| 17 | between using the ext3 driver and the ext2 driver, as long as the | ||
| 18 | file system has been cleanly unmounted, or e2fsck is run on the file | ||
| 19 | system. | ||
| 20 | |||
| 21 | To add a journal on an existing ext2 file system or change the | ||
| 22 | behavior of ext3 file systems, you can use the tune2fs utility ("man | ||
| 23 | tune2fs"). To modify attributes of files and directories on ext3 | ||
| 24 | file systems, use chattr ("man chattr"). You need to be using | ||
| 25 | e2fsprogs version 1.20 or later in order to create ext3 journals | ||
| 26 | (available at <http://sourceforge.net/projects/e2fsprogs/>). | ||
| 27 | |||
| 28 | To compile this file system support as a module, choose M here: the | ||
| 29 | module will be called ext3. | ||
| 30 | |||
| 31 | config EXT3_FS_XATTR | ||
| 32 | bool "Ext3 extended attributes" | ||
| 33 | depends on EXT3_FS | ||
| 34 | default y | ||
| 35 | help | ||
| 36 | Extended attributes are name:value pairs associated with inodes by | ||
| 37 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 38 | <http://acl.bestbits.at/> for details). | ||
| 39 | |||
| 40 | If unsure, say N. | ||
| 41 | |||
| 42 | You need this for POSIX ACL support on ext3. | ||
| 43 | |||
| 44 | config EXT3_FS_POSIX_ACL | ||
| 45 | bool "Ext3 POSIX Access Control Lists" | ||
| 46 | depends on EXT3_FS_XATTR | ||
| 47 | select FS_POSIX_ACL | ||
| 48 | help | ||
| 49 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 50 | groups beyond the owner/group/world scheme. | ||
| 51 | |||
| 52 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 53 | Linux website <http://acl.bestbits.at/>. | ||
| 54 | |||
| 55 | If you don't know what Access Control Lists are, say N | ||
| 56 | |||
| 57 | config EXT3_FS_SECURITY | ||
| 58 | bool "Ext3 Security Labels" | ||
| 59 | depends on EXT3_FS_XATTR | ||
| 60 | help | ||
| 61 | Security labels support alternative access control models | ||
| 62 | implemented by security modules like SELinux. This option | ||
| 63 | enables an extended attribute handler for file security | ||
| 64 | labels in the ext3 filesystem. | ||
| 65 | |||
| 66 | If you are not using a security module that requires using | ||
| 67 | extended attributes for file security labels, say N. | ||
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index 92fd0338a6eb..f5b57a2ca35a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c | |||
| @@ -1547,6 +1547,7 @@ retry_alloc: | |||
| 1547 | * turn off reservation for this allocation | 1547 | * turn off reservation for this allocation |
| 1548 | */ | 1548 | */ |
| 1549 | if (my_rsv && (free_blocks < windowsz) | 1549 | if (my_rsv && (free_blocks < windowsz) |
| 1550 | && (free_blocks > 0) | ||
| 1550 | && (rsv_is_empty(&my_rsv->rsv_window))) | 1551 | && (rsv_is_empty(&my_rsv->rsv_window))) |
| 1551 | my_rsv = NULL; | 1552 | my_rsv = NULL; |
| 1552 | 1553 | ||
| @@ -1585,7 +1586,7 @@ retry_alloc: | |||
| 1585 | * free blocks is less than half of the reservation | 1586 | * free blocks is less than half of the reservation |
| 1586 | * window size. | 1587 | * window size. |
| 1587 | */ | 1588 | */ |
| 1588 | if (free_blocks <= (windowsz/2)) | 1589 | if (my_rsv && (free_blocks <= (windowsz/2))) |
| 1589 | continue; | 1590 | continue; |
| 1590 | 1591 | ||
| 1591 | brelse(bitmap_bh); | 1592 | brelse(bitmap_bh); |
diff --git a/fs/ext3/dir.c b/fs/ext3/dir.c index 2eea96ec78ed..4c82531ea0a8 100644 --- a/fs/ext3/dir.c +++ b/fs/ext3/dir.c | |||
| @@ -102,6 +102,7 @@ static int ext3_readdir(struct file * filp, | |||
| 102 | int err; | 102 | int err; |
| 103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 104 | int ret = 0; | 104 | int ret = 0; |
| 105 | int dir_has_error = 0; | ||
| 105 | 106 | ||
| 106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
| 107 | 108 | ||
| @@ -148,9 +149,12 @@ static int ext3_readdir(struct file * filp, | |||
| 148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
| 149 | */ | 150 | */ |
| 150 | if (!bh) { | 151 | if (!bh) { |
| 151 | ext3_error (sb, "ext3_readdir", | 152 | if (!dir_has_error) { |
| 152 | "directory #%lu contains a hole at offset %lu", | 153 | ext3_error(sb, __func__, "directory #%lu " |
| 153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %lld", |
| 155 | inode->i_ino, filp->f_pos); | ||
| 156 | dir_has_error = 1; | ||
| 157 | } | ||
| 154 | /* corrupt size? Maybe no more blocks to read */ | 158 | /* corrupt size? Maybe no more blocks to read */ |
| 155 | if (filp->f_pos > inode->i_blocks << 9) | 159 | if (filp->f_pos > inode->i_blocks << 9) |
| 156 | break; | 160 | break; |
| @@ -410,7 +414,7 @@ static int call_filldir(struct file * filp, void * dirent, | |||
| 410 | get_dtype(sb, fname->file_type)); | 414 | get_dtype(sb, fname->file_type)); |
| 411 | if (error) { | 415 | if (error) { |
| 412 | filp->f_pos = curr_pos; | 416 | filp->f_pos = curr_pos; |
| 413 | info->extra_fname = fname->next; | 417 | info->extra_fname = fname; |
| 414 | return error; | 418 | return error; |
| 415 | } | 419 | } |
| 416 | fname = fname->next; | 420 | fname = fname->next; |
| @@ -449,11 +453,21 @@ static int ext3_dx_readdir(struct file * filp, | |||
| 449 | * If there are any leftover names on the hash collision | 453 | * If there are any leftover names on the hash collision |
| 450 | * chain, return them first. | 454 | * chain, return them first. |
| 451 | */ | 455 | */ |
| 452 | if (info->extra_fname && | 456 | if (info->extra_fname) { |
| 453 | call_filldir(filp, dirent, filldir, info->extra_fname)) | 457 | if (call_filldir(filp, dirent, filldir, info->extra_fname)) |
| 454 | goto finished; | 458 | goto finished; |
| 455 | 459 | ||
| 456 | if (!info->curr_node) | 460 | info->extra_fname = NULL; |
| 461 | info->curr_node = rb_next(info->curr_node); | ||
| 462 | if (!info->curr_node) { | ||
| 463 | if (info->next_hash == ~0) { | ||
| 464 | filp->f_pos = EXT3_HTREE_EOF; | ||
| 465 | goto finished; | ||
| 466 | } | ||
| 467 | info->curr_hash = info->next_hash; | ||
| 468 | info->curr_minor_hash = 0; | ||
| 469 | } | ||
| 470 | } else if (!info->curr_node) | ||
| 457 | info->curr_node = rb_first(&info->root); | 471 | info->curr_node = rb_first(&info->root); |
| 458 | 472 | ||
| 459 | while (1) { | 473 | while (1) { |
diff --git a/fs/ext3/file.c b/fs/ext3/file.c index acc4913d3019..3be1e0689c9a 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c | |||
| @@ -134,5 +134,6 @@ const struct inode_operations ext3_file_inode_operations = { | |||
| 134 | .removexattr = generic_removexattr, | 134 | .removexattr = generic_removexattr, |
| 135 | #endif | 135 | #endif |
| 136 | .permission = ext3_permission, | 136 | .permission = ext3_permission, |
| 137 | .fiemap = ext3_fiemap, | ||
| 137 | }; | 138 | }; |
| 138 | 139 | ||
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 507d8689b111..f8424ad89971 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c | |||
| @@ -36,6 +36,7 @@ | |||
| 36 | #include <linux/mpage.h> | 36 | #include <linux/mpage.h> |
| 37 | #include <linux/uio.h> | 37 | #include <linux/uio.h> |
| 38 | #include <linux/bio.h> | 38 | #include <linux/bio.h> |
| 39 | #include <linux/fiemap.h> | ||
| 39 | #include "xattr.h" | 40 | #include "xattr.h" |
| 40 | #include "acl.h" | 41 | #include "acl.h" |
| 41 | 42 | ||
| @@ -981,6 +982,13 @@ out: | |||
| 981 | return ret; | 982 | return ret; |
| 982 | } | 983 | } |
| 983 | 984 | ||
| 985 | int ext3_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 986 | u64 start, u64 len) | ||
| 987 | { | ||
| 988 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
| 989 | ext3_get_block); | ||
| 990 | } | ||
| 991 | |||
| 984 | /* | 992 | /* |
| 985 | * `handle' can be NULL if create is zero | 993 | * `handle' can be NULL if create is zero |
| 986 | */ | 994 | */ |
| @@ -1178,6 +1186,13 @@ write_begin_failed: | |||
| 1178 | ext3_journal_stop(handle); | 1186 | ext3_journal_stop(handle); |
| 1179 | unlock_page(page); | 1187 | unlock_page(page); |
| 1180 | page_cache_release(page); | 1188 | page_cache_release(page); |
| 1189 | /* | ||
| 1190 | * block_write_begin may have instantiated a few blocks | ||
| 1191 | * outside i_size. Trim these off again. Don't need | ||
| 1192 | * i_size_read because we hold i_mutex. | ||
| 1193 | */ | ||
| 1194 | if (pos + len > inode->i_size) | ||
| 1195 | vmtruncate(inode, inode->i_size); | ||
| 1181 | } | 1196 | } |
| 1182 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) | 1197 | if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) |
| 1183 | goto retry; | 1198 | goto retry; |
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c index 77278e947e94..78fdf3836370 100644 --- a/fs/ext3/resize.c +++ b/fs/ext3/resize.c | |||
| @@ -790,7 +790,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input) | |||
| 790 | 790 | ||
| 791 | if (reserved_gdb || gdb_off == 0) { | 791 | if (reserved_gdb || gdb_off == 0) { |
| 792 | if (!EXT3_HAS_COMPAT_FEATURE(sb, | 792 | if (!EXT3_HAS_COMPAT_FEATURE(sb, |
| 793 | EXT3_FEATURE_COMPAT_RESIZE_INODE)){ | 793 | EXT3_FEATURE_COMPAT_RESIZE_INODE) |
| 794 | || !le16_to_cpu(es->s_reserved_gdt_blocks)) { | ||
| 794 | ext3_warning(sb, __func__, | 795 | ext3_warning(sb, __func__, |
| 795 | "No reserved GDT blocks, can't resize"); | 796 | "No reserved GDT blocks, can't resize"); |
| 796 | return -EPERM; | 797 | return -EPERM; |
diff --git a/fs/ext3/super.c b/fs/ext3/super.c index f38a5afc39a1..3a260af5544d 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c | |||
| @@ -625,6 +625,9 @@ static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 625 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) | 625 | else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA) |
| 626 | seq_puts(seq, ",data=writeback"); | 626 | seq_puts(seq, ",data=writeback"); |
| 627 | 627 | ||
| 628 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
| 629 | seq_puts(seq, ",data_err=abort"); | ||
| 630 | |||
| 628 | ext3_show_quota_options(seq, sb); | 631 | ext3_show_quota_options(seq, sb); |
| 629 | 632 | ||
| 630 | return 0; | 633 | return 0; |
| @@ -754,13 +757,14 @@ enum { | |||
| 754 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 757 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
| 755 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 758 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
| 756 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 759 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
| 760 | Opt_data_err_abort, Opt_data_err_ignore, | ||
| 757 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 761 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
| 758 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 762 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
| 759 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 763 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
| 760 | Opt_grpquota | 764 | Opt_grpquota |
| 761 | }; | 765 | }; |
| 762 | 766 | ||
| 763 | static match_table_t tokens = { | 767 | static const match_table_t tokens = { |
| 764 | {Opt_bsd_df, "bsddf"}, | 768 | {Opt_bsd_df, "bsddf"}, |
| 765 | {Opt_minix_df, "minixdf"}, | 769 | {Opt_minix_df, "minixdf"}, |
| 766 | {Opt_grpid, "grpid"}, | 770 | {Opt_grpid, "grpid"}, |
| @@ -796,6 +800,8 @@ static match_table_t tokens = { | |||
| 796 | {Opt_data_journal, "data=journal"}, | 800 | {Opt_data_journal, "data=journal"}, |
| 797 | {Opt_data_ordered, "data=ordered"}, | 801 | {Opt_data_ordered, "data=ordered"}, |
| 798 | {Opt_data_writeback, "data=writeback"}, | 802 | {Opt_data_writeback, "data=writeback"}, |
| 803 | {Opt_data_err_abort, "data_err=abort"}, | ||
| 804 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
| 799 | {Opt_offusrjquota, "usrjquota="}, | 805 | {Opt_offusrjquota, "usrjquota="}, |
| 800 | {Opt_usrjquota, "usrjquota=%s"}, | 806 | {Opt_usrjquota, "usrjquota=%s"}, |
| 801 | {Opt_offgrpjquota, "grpjquota="}, | 807 | {Opt_offgrpjquota, "grpjquota="}, |
| @@ -1011,6 +1017,12 @@ static int parse_options (char *options, struct super_block *sb, | |||
| 1011 | sbi->s_mount_opt |= data_opt; | 1017 | sbi->s_mount_opt |= data_opt; |
| 1012 | } | 1018 | } |
| 1013 | break; | 1019 | break; |
| 1020 | case Opt_data_err_abort: | ||
| 1021 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
| 1022 | break; | ||
| 1023 | case Opt_data_err_ignore: | ||
| 1024 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
| 1025 | break; | ||
| 1014 | #ifdef CONFIG_QUOTA | 1026 | #ifdef CONFIG_QUOTA |
| 1015 | case Opt_usrjquota: | 1027 | case Opt_usrjquota: |
| 1016 | qtype = USRQUOTA; | 1028 | qtype = USRQUOTA; |
| @@ -1986,6 +1998,10 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal) | |||
| 1986 | journal->j_flags |= JFS_BARRIER; | 1998 | journal->j_flags |= JFS_BARRIER; |
| 1987 | else | 1999 | else |
| 1988 | journal->j_flags &= ~JFS_BARRIER; | 2000 | journal->j_flags &= ~JFS_BARRIER; |
| 2001 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
| 2002 | journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR; | ||
| 2003 | else | ||
| 2004 | journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR; | ||
| 1989 | spin_unlock(&journal->j_state_lock); | 2005 | spin_unlock(&journal->j_state_lock); |
| 1990 | } | 2006 | } |
| 1991 | 2007 | ||
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig new file mode 100644 index 000000000000..7505482a08fa --- /dev/null +++ b/fs/ext4/Kconfig | |||
| @@ -0,0 +1,79 @@ | |||
| 1 | config EXT4_FS | ||
| 2 | tristate "The Extended 4 (ext4) filesystem" | ||
| 3 | select JBD2 | ||
| 4 | select CRC16 | ||
| 5 | help | ||
| 6 | This is the next generation of the ext3 filesystem. | ||
| 7 | |||
| 8 | Unlike the change from ext2 filesystem to ext3 filesystem, | ||
| 9 | the on-disk format of ext4 is not forwards compatible with | ||
| 10 | ext3; it is based on extent maps and it supports 48-bit | ||
| 11 | physical block numbers. The ext4 filesystem also supports delayed | ||
| 12 | allocation, persistent preallocation, high resolution time stamps, | ||
| 13 | and a number of other features to improve performance and speed | ||
| 14 | up fsck time. For more information, please see the web pages at | ||
| 15 | http://ext4.wiki.kernel.org. | ||
| 16 | |||
| 17 | The ext4 filesystem will support mounting an ext3 | ||
| 18 | filesystem; while there will be some performance gains from | ||
| 19 | the delayed allocation and inode table readahead, the best | ||
| 20 | performance gains will require enabling ext4 features in the | ||
| 21 | filesystem, or formating a new filesystem as an ext4 | ||
| 22 | filesystem initially. | ||
| 23 | |||
| 24 | To compile this file system support as a module, choose M here. The | ||
| 25 | module will be called ext4. | ||
| 26 | |||
| 27 | If unsure, say N. | ||
| 28 | |||
| 29 | config EXT4DEV_COMPAT | ||
| 30 | bool "Enable ext4dev compatibility" | ||
| 31 | depends on EXT4_FS | ||
| 32 | help | ||
| 33 | Starting with 2.6.28, the name of the ext4 filesystem was | ||
| 34 | renamed from ext4dev to ext4. Unfortunately there are some | ||
| 35 | legacy userspace programs (such as klibc's fstype) have | ||
| 36 | "ext4dev" hardcoded. | ||
| 37 | |||
| 38 | To enable backwards compatibility so that systems that are | ||
| 39 | still expecting to mount ext4 filesystems using ext4dev, | ||
| 40 | chose Y here. This feature will go away by 2.6.31, so | ||
| 41 | please arrange to get your userspace programs fixed! | ||
| 42 | |||
| 43 | config EXT4_FS_XATTR | ||
| 44 | bool "Ext4 extended attributes" | ||
| 45 | depends on EXT4_FS | ||
| 46 | default y | ||
| 47 | help | ||
| 48 | Extended attributes are name:value pairs associated with inodes by | ||
| 49 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 50 | <http://acl.bestbits.at/> for details). | ||
| 51 | |||
| 52 | If unsure, say N. | ||
| 53 | |||
| 54 | You need this for POSIX ACL support on ext4. | ||
| 55 | |||
| 56 | config EXT4_FS_POSIX_ACL | ||
| 57 | bool "Ext4 POSIX Access Control Lists" | ||
| 58 | depends on EXT4_FS_XATTR | ||
| 59 | select FS_POSIX_ACL | ||
| 60 | help | ||
| 61 | POSIX Access Control Lists (ACLs) support permissions for users and | ||
| 62 | groups beyond the owner/group/world scheme. | ||
| 63 | |||
| 64 | To learn more about Access Control Lists, visit the POSIX ACLs for | ||
| 65 | Linux website <http://acl.bestbits.at/>. | ||
| 66 | |||
| 67 | If you don't know what Access Control Lists are, say N | ||
| 68 | |||
| 69 | config EXT4_FS_SECURITY | ||
| 70 | bool "Ext4 Security Labels" | ||
| 71 | depends on EXT4_FS_XATTR | ||
| 72 | help | ||
| 73 | Security labels support alternative access control models | ||
| 74 | implemented by security modules like SELinux. This option | ||
| 75 | enables an extended attribute handler for file security | ||
| 76 | labels in the ext4 filesystem. | ||
| 77 | |||
| 78 | If you are not using a security module that requires using | ||
| 79 | extended attributes for file security labels, say N. | ||
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile index ac6fa8ca0a2f..a8ff003a00f7 100644 --- a/fs/ext4/Makefile +++ b/fs/ext4/Makefile | |||
| @@ -2,12 +2,12 @@ | |||
| 2 | # Makefile for the linux ext4-filesystem routines. | 2 | # Makefile for the linux ext4-filesystem routines. |
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o | 5 | obj-$(CONFIG_EXT4_FS) += ext4.o |
| 6 | 6 | ||
| 7 | ext4dev-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ | 7 | ext4-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ |
| 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ | 8 | ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \ |
| 9 | ext4_jbd2.o migrate.o mballoc.o | 9 | ext4_jbd2.o migrate.o mballoc.o |
| 10 | 10 | ||
| 11 | ext4dev-$(CONFIG_EXT4DEV_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o | 11 | ext4-$(CONFIG_EXT4_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o |
| 12 | ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL) += acl.o | 12 | ext4-$(CONFIG_EXT4_FS_POSIX_ACL) += acl.o |
| 13 | ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY) += xattr_security.o | 13 | ext4-$(CONFIG_EXT4_FS_SECURITY) += xattr_security.o |
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index cd2b855a07d6..cb45257a246e 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h | |||
| @@ -51,18 +51,18 @@ static inline int ext4_acl_count(size_t size) | |||
| 51 | } | 51 | } |
| 52 | } | 52 | } |
| 53 | 53 | ||
| 54 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 54 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 55 | 55 | ||
| 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl | 56 | /* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl |
| 57 | if the ACL has not been cached */ | 57 | if the ACL has not been cached */ |
| 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) | 58 | #define EXT4_ACL_NOT_CACHED ((void *)-1) |
| 59 | 59 | ||
| 60 | /* acl.c */ | 60 | /* acl.c */ |
| 61 | extern int ext4_permission (struct inode *, int); | 61 | extern int ext4_permission(struct inode *, int); |
| 62 | extern int ext4_acl_chmod (struct inode *); | 62 | extern int ext4_acl_chmod(struct inode *); |
| 63 | extern int ext4_init_acl (handle_t *, struct inode *, struct inode *); | 63 | extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); |
| 64 | 64 | ||
| 65 | #else /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 65 | #else /* CONFIG_EXT4_FS_POSIX_ACL */ |
| 66 | #include <linux/sched.h> | 66 | #include <linux/sched.h> |
| 67 | #define ext4_permission NULL | 67 | #define ext4_permission NULL |
| 68 | 68 | ||
| @@ -77,5 +77,5 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) | |||
| 77 | { | 77 | { |
| 78 | return 0; | 78 | return 0; |
| 79 | } | 79 | } |
| 80 | #endif /* CONFIG_EXT4DEV_FS_POSIX_ACL */ | 80 | #endif /* CONFIG_EXT4_FS_POSIX_ACL */ |
| 81 | 81 | ||
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e9fa960ba6da..b9821be709bd 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
| @@ -83,6 +83,7 @@ static int ext4_group_used_meta_blocks(struct super_block *sb, | |||
| 83 | } | 83 | } |
| 84 | return used_blocks; | 84 | return used_blocks; |
| 85 | } | 85 | } |
| 86 | |||
| 86 | /* Initializes an uninitialized block bitmap if given, and returns the | 87 | /* Initializes an uninitialized block bitmap if given, and returns the |
| 87 | * number of blocks free in the group. */ | 88 | * number of blocks free in the group. */ |
| 88 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | 89 | unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, |
| @@ -132,7 +133,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
| 132 | */ | 133 | */ |
| 133 | group_blocks = ext4_blocks_count(sbi->s_es) - | 134 | group_blocks = ext4_blocks_count(sbi->s_es) - |
| 134 | le32_to_cpu(sbi->s_es->s_first_data_block) - | 135 | le32_to_cpu(sbi->s_es->s_first_data_block) - |
| 135 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count -1)); | 136 | (EXT4_BLOCKS_PER_GROUP(sb) * (sbi->s_groups_count - 1)); |
| 136 | } else { | 137 | } else { |
| 137 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); | 138 | group_blocks = EXT4_BLOCKS_PER_GROUP(sb); |
| 138 | } | 139 | } |
| @@ -200,20 +201,20 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, | |||
| 200 | * @bh: pointer to the buffer head to store the block | 201 | * @bh: pointer to the buffer head to store the block |
| 201 | * group descriptor | 202 | * group descriptor |
| 202 | */ | 203 | */ |
| 203 | struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 204 | struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb, |
| 204 | ext4_group_t block_group, | 205 | ext4_group_t block_group, |
| 205 | struct buffer_head ** bh) | 206 | struct buffer_head **bh) |
| 206 | { | 207 | { |
| 207 | unsigned long group_desc; | 208 | unsigned long group_desc; |
| 208 | unsigned long offset; | 209 | unsigned long offset; |
| 209 | struct ext4_group_desc * desc; | 210 | struct ext4_group_desc *desc; |
| 210 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 211 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 211 | 212 | ||
| 212 | if (block_group >= sbi->s_groups_count) { | 213 | if (block_group >= sbi->s_groups_count) { |
| 213 | ext4_error (sb, "ext4_get_group_desc", | 214 | ext4_error(sb, "ext4_get_group_desc", |
| 214 | "block_group >= groups_count - " | 215 | "block_group >= groups_count - " |
| 215 | "block_group = %lu, groups_count = %lu", | 216 | "block_group = %lu, groups_count = %lu", |
| 216 | block_group, sbi->s_groups_count); | 217 | block_group, sbi->s_groups_count); |
| 217 | 218 | ||
| 218 | return NULL; | 219 | return NULL; |
| 219 | } | 220 | } |
| @@ -222,10 +223,10 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | |||
| 222 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); | 223 | group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb); |
| 223 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); | 224 | offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1); |
| 224 | if (!sbi->s_group_desc[group_desc]) { | 225 | if (!sbi->s_group_desc[group_desc]) { |
| 225 | ext4_error (sb, "ext4_get_group_desc", | 226 | ext4_error(sb, "ext4_get_group_desc", |
| 226 | "Group descriptor not loaded - " | 227 | "Group descriptor not loaded - " |
| 227 | "block_group = %lu, group_desc = %lu, desc = %lu", | 228 | "block_group = %lu, group_desc = %lu, desc = %lu", |
| 228 | block_group, group_desc, offset); | 229 | block_group, group_desc, offset); |
| 229 | return NULL; | 230 | return NULL; |
| 230 | } | 231 | } |
| 231 | 232 | ||
| @@ -302,8 +303,8 @@ err_out: | |||
| 302 | struct buffer_head * | 303 | struct buffer_head * |
| 303 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | 304 | ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) |
| 304 | { | 305 | { |
| 305 | struct ext4_group_desc * desc; | 306 | struct ext4_group_desc *desc; |
| 306 | struct buffer_head * bh = NULL; | 307 | struct buffer_head *bh = NULL; |
| 307 | ext4_fsblk_t bitmap_blk; | 308 | ext4_fsblk_t bitmap_blk; |
| 308 | 309 | ||
| 309 | desc = ext4_get_group_desc(sb, block_group, NULL); | 310 | desc = ext4_get_group_desc(sb, block_group, NULL); |
| @@ -318,9 +319,11 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 318 | block_group, bitmap_blk); | 319 | block_group, bitmap_blk); |
| 319 | return NULL; | 320 | return NULL; |
| 320 | } | 321 | } |
| 321 | if (bh_uptodate_or_lock(bh)) | 322 | if (buffer_uptodate(bh) && |
| 323 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
| 322 | return bh; | 324 | return bh; |
| 323 | 325 | ||
| 326 | lock_buffer(bh); | ||
| 324 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 327 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
| 325 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 328 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 326 | ext4_init_block_bitmap(sb, bh, block_group, desc); | 329 | ext4_init_block_bitmap(sb, bh, block_group, desc); |
| @@ -345,301 +348,6 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 345 | */ | 348 | */ |
| 346 | return bh; | 349 | return bh; |
| 347 | } | 350 | } |
| 348 | /* | ||
| 349 | * The reservation window structure operations | ||
| 350 | * -------------------------------------------- | ||
| 351 | * Operations include: | ||
| 352 | * dump, find, add, remove, is_empty, find_next_reservable_window, etc. | ||
| 353 | * | ||
| 354 | * We use a red-black tree to represent per-filesystem reservation | ||
| 355 | * windows. | ||
| 356 | * | ||
| 357 | */ | ||
| 358 | |||
| 359 | /** | ||
| 360 | * __rsv_window_dump() -- Dump the filesystem block allocation reservation map | ||
| 361 | * @rb_root: root of per-filesystem reservation rb tree | ||
| 362 | * @verbose: verbose mode | ||
| 363 | * @fn: function which wishes to dump the reservation map | ||
| 364 | * | ||
| 365 | * If verbose is turned on, it will print the whole block reservation | ||
| 366 | * windows(start, end). Otherwise, it will only print out the "bad" windows, | ||
| 367 | * those windows that overlap with their immediate neighbors. | ||
| 368 | */ | ||
| 369 | #if 1 | ||
| 370 | static void __rsv_window_dump(struct rb_root *root, int verbose, | ||
| 371 | const char *fn) | ||
| 372 | { | ||
| 373 | struct rb_node *n; | ||
| 374 | struct ext4_reserve_window_node *rsv, *prev; | ||
| 375 | int bad; | ||
| 376 | |||
| 377 | restart: | ||
| 378 | n = rb_first(root); | ||
| 379 | bad = 0; | ||
| 380 | prev = NULL; | ||
| 381 | |||
| 382 | printk("Block Allocation Reservation Windows Map (%s):\n", fn); | ||
| 383 | while (n) { | ||
| 384 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
| 385 | if (verbose) | ||
| 386 | printk("reservation window 0x%p " | ||
| 387 | "start: %llu, end: %llu\n", | ||
| 388 | rsv, rsv->rsv_start, rsv->rsv_end); | ||
| 389 | if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) { | ||
| 390 | printk("Bad reservation %p (start >= end)\n", | ||
| 391 | rsv); | ||
| 392 | bad = 1; | ||
| 393 | } | ||
| 394 | if (prev && prev->rsv_end >= rsv->rsv_start) { | ||
| 395 | printk("Bad reservation %p (prev->end >= start)\n", | ||
| 396 | rsv); | ||
| 397 | bad = 1; | ||
| 398 | } | ||
| 399 | if (bad) { | ||
| 400 | if (!verbose) { | ||
| 401 | printk("Restarting reservation walk in verbose mode\n"); | ||
| 402 | verbose = 1; | ||
| 403 | goto restart; | ||
| 404 | } | ||
| 405 | } | ||
| 406 | n = rb_next(n); | ||
| 407 | prev = rsv; | ||
| 408 | } | ||
| 409 | printk("Window map complete.\n"); | ||
| 410 | BUG_ON(bad); | ||
| 411 | } | ||
| 412 | #define rsv_window_dump(root, verbose) \ | ||
| 413 | __rsv_window_dump((root), (verbose), __func__) | ||
| 414 | #else | ||
| 415 | #define rsv_window_dump(root, verbose) do {} while (0) | ||
| 416 | #endif | ||
| 417 | |||
| 418 | /** | ||
| 419 | * goal_in_my_reservation() | ||
| 420 | * @rsv: inode's reservation window | ||
| 421 | * @grp_goal: given goal block relative to the allocation block group | ||
| 422 | * @group: the current allocation block group | ||
| 423 | * @sb: filesystem super block | ||
| 424 | * | ||
| 425 | * Test if the given goal block (group relative) is within the file's | ||
| 426 | * own block reservation window range. | ||
| 427 | * | ||
| 428 | * If the reservation window is outside the goal allocation group, return 0; | ||
| 429 | * grp_goal (given goal block) could be -1, which means no specific | ||
| 430 | * goal block. In this case, always return 1. | ||
| 431 | * If the goal block is within the reservation window, return 1; | ||
| 432 | * otherwise, return 0; | ||
| 433 | */ | ||
| 434 | static int | ||
| 435 | goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal, | ||
| 436 | ext4_group_t group, struct super_block *sb) | ||
| 437 | { | ||
| 438 | ext4_fsblk_t group_first_block, group_last_block; | ||
| 439 | |||
| 440 | group_first_block = ext4_group_first_block_no(sb, group); | ||
| 441 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
| 442 | |||
| 443 | if ((rsv->_rsv_start > group_last_block) || | ||
| 444 | (rsv->_rsv_end < group_first_block)) | ||
| 445 | return 0; | ||
| 446 | if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start) | ||
| 447 | || (grp_goal + group_first_block > rsv->_rsv_end))) | ||
| 448 | return 0; | ||
| 449 | return 1; | ||
| 450 | } | ||
| 451 | |||
| 452 | /** | ||
| 453 | * search_reserve_window() | ||
| 454 | * @rb_root: root of reservation tree | ||
| 455 | * @goal: target allocation block | ||
| 456 | * | ||
| 457 | * Find the reserved window which includes the goal, or the previous one | ||
| 458 | * if the goal is not in any window. | ||
| 459 | * Returns NULL if there are no windows or if all windows start after the goal. | ||
| 460 | */ | ||
| 461 | static struct ext4_reserve_window_node * | ||
| 462 | search_reserve_window(struct rb_root *root, ext4_fsblk_t goal) | ||
| 463 | { | ||
| 464 | struct rb_node *n = root->rb_node; | ||
| 465 | struct ext4_reserve_window_node *rsv; | ||
| 466 | |||
| 467 | if (!n) | ||
| 468 | return NULL; | ||
| 469 | |||
| 470 | do { | ||
| 471 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
| 472 | |||
| 473 | if (goal < rsv->rsv_start) | ||
| 474 | n = n->rb_left; | ||
| 475 | else if (goal > rsv->rsv_end) | ||
| 476 | n = n->rb_right; | ||
| 477 | else | ||
| 478 | return rsv; | ||
| 479 | } while (n); | ||
| 480 | /* | ||
| 481 | * We've fallen off the end of the tree: the goal wasn't inside | ||
| 482 | * any particular node. OK, the previous node must be to one | ||
| 483 | * side of the interval containing the goal. If it's the RHS, | ||
| 484 | * we need to back up one. | ||
| 485 | */ | ||
| 486 | if (rsv->rsv_start > goal) { | ||
| 487 | n = rb_prev(&rsv->rsv_node); | ||
| 488 | rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node); | ||
| 489 | } | ||
| 490 | return rsv; | ||
| 491 | } | ||
| 492 | |||
| 493 | /** | ||
| 494 | * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree. | ||
| 495 | * @sb: super block | ||
| 496 | * @rsv: reservation window to add | ||
| 497 | * | ||
| 498 | * Must be called with rsv_lock hold. | ||
| 499 | */ | ||
| 500 | void ext4_rsv_window_add(struct super_block *sb, | ||
| 501 | struct ext4_reserve_window_node *rsv) | ||
| 502 | { | ||
| 503 | struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root; | ||
| 504 | struct rb_node *node = &rsv->rsv_node; | ||
| 505 | ext4_fsblk_t start = rsv->rsv_start; | ||
| 506 | |||
| 507 | struct rb_node ** p = &root->rb_node; | ||
| 508 | struct rb_node * parent = NULL; | ||
| 509 | struct ext4_reserve_window_node *this; | ||
| 510 | |||
| 511 | while (*p) | ||
| 512 | { | ||
| 513 | parent = *p; | ||
| 514 | this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node); | ||
| 515 | |||
| 516 | if (start < this->rsv_start) | ||
| 517 | p = &(*p)->rb_left; | ||
| 518 | else if (start > this->rsv_end) | ||
| 519 | p = &(*p)->rb_right; | ||
| 520 | else { | ||
| 521 | rsv_window_dump(root, 1); | ||
| 522 | BUG(); | ||
| 523 | } | ||
| 524 | } | ||
| 525 | |||
| 526 | rb_link_node(node, parent, p); | ||
| 527 | rb_insert_color(node, root); | ||
| 528 | } | ||
| 529 | |||
| 530 | /** | ||
| 531 | * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree | ||
| 532 | * @sb: super block | ||
| 533 | * @rsv: reservation window to remove | ||
| 534 | * | ||
| 535 | * Mark the block reservation window as not allocated, and unlink it | ||
| 536 | * from the filesystem reservation window rb tree. Must be called with | ||
| 537 | * rsv_lock hold. | ||
| 538 | */ | ||
| 539 | static void rsv_window_remove(struct super_block *sb, | ||
| 540 | struct ext4_reserve_window_node *rsv) | ||
| 541 | { | ||
| 542 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 543 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 544 | rsv->rsv_alloc_hit = 0; | ||
| 545 | rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root); | ||
| 546 | } | ||
| 547 | |||
| 548 | /* | ||
| 549 | * rsv_is_empty() -- Check if the reservation window is allocated. | ||
| 550 | * @rsv: given reservation window to check | ||
| 551 | * | ||
| 552 | * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED. | ||
| 553 | */ | ||
| 554 | static inline int rsv_is_empty(struct ext4_reserve_window *rsv) | ||
| 555 | { | ||
| 556 | /* a valid reservation end block could not be 0 */ | ||
| 557 | return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 558 | } | ||
| 559 | |||
| 560 | /** | ||
| 561 | * ext4_init_block_alloc_info() | ||
| 562 | * @inode: file inode structure | ||
| 563 | * | ||
| 564 | * Allocate and initialize the reservation window structure, and | ||
| 565 | * link the window to the ext4 inode structure at last | ||
| 566 | * | ||
| 567 | * The reservation window structure is only dynamically allocated | ||
| 568 | * and linked to ext4 inode the first time the open file | ||
| 569 | * needs a new block. So, before every ext4_new_block(s) call, for | ||
| 570 | * regular files, we should check whether the reservation window | ||
| 571 | * structure exists or not. In the latter case, this function is called. | ||
| 572 | * Fail to do so will result in block reservation being turned off for that | ||
| 573 | * open file. | ||
| 574 | * | ||
| 575 | * This function is called from ext4_get_blocks_handle(), also called | ||
| 576 | * when setting the reservation window size through ioctl before the file | ||
| 577 | * is open for write (needs block allocation). | ||
| 578 | * | ||
| 579 | * Needs down_write(i_data_sem) protection prior to call this function. | ||
| 580 | */ | ||
| 581 | void ext4_init_block_alloc_info(struct inode *inode) | ||
| 582 | { | ||
| 583 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 584 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
| 585 | struct super_block *sb = inode->i_sb; | ||
| 586 | |||
| 587 | block_i = kmalloc(sizeof(*block_i), GFP_NOFS); | ||
| 588 | if (block_i) { | ||
| 589 | struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node; | ||
| 590 | |||
| 591 | rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 592 | rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 593 | |||
| 594 | /* | ||
| 595 | * if filesystem is mounted with NORESERVATION, the goal | ||
| 596 | * reservation window size is set to zero to indicate | ||
| 597 | * block reservation is off | ||
| 598 | */ | ||
| 599 | if (!test_opt(sb, RESERVATION)) | ||
| 600 | rsv->rsv_goal_size = 0; | ||
| 601 | else | ||
| 602 | rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS; | ||
| 603 | rsv->rsv_alloc_hit = 0; | ||
| 604 | block_i->last_alloc_logical_block = 0; | ||
| 605 | block_i->last_alloc_physical_block = 0; | ||
| 606 | } | ||
| 607 | ei->i_block_alloc_info = block_i; | ||
| 608 | } | ||
| 609 | |||
| 610 | /** | ||
| 611 | * ext4_discard_reservation() | ||
| 612 | * @inode: inode | ||
| 613 | * | ||
| 614 | * Discard(free) block reservation window on last file close, or truncate | ||
| 615 | * or at last iput(). | ||
| 616 | * | ||
| 617 | * It is being called in three cases: | ||
| 618 | * ext4_release_file(): last writer close the file | ||
| 619 | * ext4_clear_inode(): last iput(), when nobody link to this file. | ||
| 620 | * ext4_truncate(): when the block indirect map is about to change. | ||
| 621 | * | ||
| 622 | */ | ||
| 623 | void ext4_discard_reservation(struct inode *inode) | ||
| 624 | { | ||
| 625 | struct ext4_inode_info *ei = EXT4_I(inode); | ||
| 626 | struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info; | ||
| 627 | struct ext4_reserve_window_node *rsv; | ||
| 628 | spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock; | ||
| 629 | |||
| 630 | ext4_mb_discard_inode_preallocations(inode); | ||
| 631 | |||
| 632 | if (!block_i) | ||
| 633 | return; | ||
| 634 | |||
| 635 | rsv = &block_i->rsv_window_node; | ||
| 636 | if (!rsv_is_empty(&rsv->rsv_window)) { | ||
| 637 | spin_lock(rsv_lock); | ||
| 638 | if (!rsv_is_empty(&rsv->rsv_window)) | ||
| 639 | rsv_window_remove(inode->i_sb, rsv); | ||
| 640 | spin_unlock(rsv_lock); | ||
| 641 | } | ||
| 642 | } | ||
| 643 | 351 | ||
| 644 | /** | 352 | /** |
| 645 | * ext4_free_blocks_sb() -- Free given blocks and update quota | 353 | * ext4_free_blocks_sb() -- Free given blocks and update quota |
| @@ -648,6 +356,13 @@ void ext4_discard_reservation(struct inode *inode) | |||
| 648 | * @block: start physcial block to free | 356 | * @block: start physcial block to free |
| 649 | * @count: number of blocks to free | 357 | * @count: number of blocks to free |
| 650 | * @pdquot_freed_blocks: pointer to quota | 358 | * @pdquot_freed_blocks: pointer to quota |
| 359 | * | ||
| 360 | * XXX This function is only used by the on-line resizing code, which | ||
| 361 | * should probably be fixed up to call the mballoc variant. There | ||
| 362 | * this needs to be cleaned up later; in fact, I'm not convinced this | ||
| 363 | * is 100% correct in the face of the mballoc code. The online resizing | ||
| 364 | * code needs to be fixed up to more tightly (and correctly) interlock | ||
| 365 | * with the mballoc code. | ||
| 651 | */ | 366 | */ |
| 652 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | 367 | void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
| 653 | ext4_fsblk_t block, unsigned long count, | 368 | ext4_fsblk_t block, unsigned long count, |
| @@ -659,8 +374,8 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
| 659 | ext4_grpblk_t bit; | 374 | ext4_grpblk_t bit; |
| 660 | unsigned long i; | 375 | unsigned long i; |
| 661 | unsigned long overflow; | 376 | unsigned long overflow; |
| 662 | struct ext4_group_desc * desc; | 377 | struct ext4_group_desc *desc; |
| 663 | struct ext4_super_block * es; | 378 | struct ext4_super_block *es; |
| 664 | struct ext4_sb_info *sbi; | 379 | struct ext4_sb_info *sbi; |
| 665 | int err = 0, ret; | 380 | int err = 0, ret; |
| 666 | ext4_grpblk_t group_freed; | 381 | ext4_grpblk_t group_freed; |
| @@ -671,13 +386,13 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, | |||
| 671 | if (block < le32_to_cpu(es->s_first_data_block) || | 386 | if (block < le32_to_cpu(es->s_first_data_block) || |
| 672 | block + count < block || | 387 | block + count < block || |
| 673 | block + count > ext4_blocks_count(es)) { | 388 | block + count > ext4_blocks_count(es)) { |
| 674 | ext4_error (sb, "ext4_free_blocks", | 389 | ext4_error(sb, "ext4_free_blocks", |
| 675 | "Freeing blocks not in datazone - " | 390 | "Freeing blocks not in datazone - " |
| 676 | "block = %llu, count = %lu", block, count); | 391 | "block = %llu, count = %lu", block, count); |
| 677 | goto error_return; | 392 | goto error_return; |
| 678 | } | 393 | } |
| 679 | 394 | ||
| 680 | ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1); | 395 | ext4_debug("freeing block(s) %llu-%llu\n", block, block + count - 1); |
| 681 | 396 | ||
| 682 | do_more: | 397 | do_more: |
| 683 | overflow = 0; | 398 | overflow = 0; |
| @@ -694,7 +409,7 @@ do_more: | |||
| 694 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); | 409 | bitmap_bh = ext4_read_block_bitmap(sb, block_group); |
| 695 | if (!bitmap_bh) | 410 | if (!bitmap_bh) |
| 696 | goto error_return; | 411 | goto error_return; |
| 697 | desc = ext4_get_group_desc (sb, block_group, &gd_bh); | 412 | desc = ext4_get_group_desc(sb, block_group, &gd_bh); |
| 698 | if (!desc) | 413 | if (!desc) |
| 699 | goto error_return; | 414 | goto error_return; |
| 700 | 415 | ||
| @@ -703,10 +418,10 @@ do_more: | |||
| 703 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || | 418 | in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) || |
| 704 | in_range(block + count - 1, ext4_inode_table(sb, desc), | 419 | in_range(block + count - 1, ext4_inode_table(sb, desc), |
| 705 | sbi->s_itb_per_group)) { | 420 | sbi->s_itb_per_group)) { |
| 706 | ext4_error (sb, "ext4_free_blocks", | 421 | ext4_error(sb, "ext4_free_blocks", |
| 707 | "Freeing blocks in system zones - " | 422 | "Freeing blocks in system zones - " |
| 708 | "Block = %llu, count = %lu", | 423 | "Block = %llu, count = %lu", |
| 709 | block, count); | 424 | block, count); |
| 710 | goto error_return; | 425 | goto error_return; |
| 711 | } | 426 | } |
| 712 | 427 | ||
| @@ -848,759 +563,71 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, | |||
| 848 | ext4_fsblk_t block, unsigned long count, | 563 | ext4_fsblk_t block, unsigned long count, |
| 849 | int metadata) | 564 | int metadata) |
| 850 | { | 565 | { |
| 851 | struct super_block * sb; | 566 | struct super_block *sb; |
| 852 | unsigned long dquot_freed_blocks; | 567 | unsigned long dquot_freed_blocks; |
| 853 | 568 | ||
| 854 | /* this isn't the right place to decide whether block is metadata | 569 | /* this isn't the right place to decide whether block is metadata |
| 855 | * inode.c/extents.c knows better, but for safety ... */ | 570 | * inode.c/extents.c knows better, but for safety ... */ |
| 856 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) || | 571 | if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) |
| 857 | ext4_should_journal_data(inode)) | 572 | metadata = 1; |
| 573 | |||
| 574 | /* We need to make sure we don't reuse | ||
| 575 | * block released untill the transaction commit. | ||
| 576 | * writeback mode have weak data consistency so | ||
| 577 | * don't force data as metadata when freeing block | ||
| 578 | * for writeback mode. | ||
| 579 | */ | ||
| 580 | if (metadata == 0 && !ext4_should_writeback_data(inode)) | ||
| 858 | metadata = 1; | 581 | metadata = 1; |
| 859 | 582 | ||
| 860 | sb = inode->i_sb; | 583 | sb = inode->i_sb; |
| 861 | 584 | ||
| 862 | if (!test_opt(sb, MBALLOC) || !EXT4_SB(sb)->s_group_info) | 585 | ext4_mb_free_blocks(handle, inode, block, count, |
| 863 | ext4_free_blocks_sb(handle, sb, block, count, | 586 | metadata, &dquot_freed_blocks); |
| 864 | &dquot_freed_blocks); | ||
| 865 | else | ||
| 866 | ext4_mb_free_blocks(handle, inode, block, count, | ||
| 867 | metadata, &dquot_freed_blocks); | ||
| 868 | if (dquot_freed_blocks) | 587 | if (dquot_freed_blocks) |
| 869 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); | 588 | DQUOT_FREE_BLOCK(inode, dquot_freed_blocks); |
| 870 | return; | 589 | return; |
| 871 | } | 590 | } |
| 872 | 591 | ||
| 873 | /** | 592 | int ext4_claim_free_blocks(struct ext4_sb_info *sbi, |
| 874 | * ext4_test_allocatable() | 593 | s64 nblocks) |
| 875 | * @nr: given allocation block group | ||
| 876 | * @bh: bufferhead contains the bitmap of the given block group | ||
| 877 | * | ||
| 878 | * For ext4 allocations, we must not reuse any blocks which are | ||
| 879 | * allocated in the bitmap buffer's "last committed data" copy. This | ||
| 880 | * prevents deletes from freeing up the page for reuse until we have | ||
| 881 | * committed the delete transaction. | ||
| 882 | * | ||
| 883 | * If we didn't do this, then deleting something and reallocating it as | ||
| 884 | * data would allow the old block to be overwritten before the | ||
| 885 | * transaction committed (because we force data to disk before commit). | ||
| 886 | * This would lead to corruption if we crashed between overwriting the | ||
| 887 | * data and committing the delete. | ||
| 888 | * | ||
| 889 | * @@@ We may want to make this allocation behaviour conditional on | ||
| 890 | * data-writes at some point, and disable it for metadata allocations or | ||
| 891 | * sync-data inodes. | ||
| 892 | */ | ||
| 893 | static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh) | ||
| 894 | { | 594 | { |
| 895 | int ret; | 595 | s64 free_blocks, dirty_blocks; |
| 896 | struct journal_head *jh = bh2jh(bh); | 596 | s64 root_blocks = 0; |
| 897 | 597 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | |
| 898 | if (ext4_test_bit(nr, bh->b_data)) | 598 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; |
| 899 | return 0; | ||
| 900 | |||
| 901 | jbd_lock_bh_state(bh); | ||
| 902 | if (!jh->b_committed_data) | ||
| 903 | ret = 1; | ||
| 904 | else | ||
| 905 | ret = !ext4_test_bit(nr, jh->b_committed_data); | ||
| 906 | jbd_unlock_bh_state(bh); | ||
| 907 | return ret; | ||
| 908 | } | ||
| 909 | 599 | ||
| 910 | /** | 600 | free_blocks = percpu_counter_read_positive(fbc); |
| 911 | * bitmap_search_next_usable_block() | 601 | dirty_blocks = percpu_counter_read_positive(dbc); |
| 912 | * @start: the starting block (group relative) of the search | ||
| 913 | * @bh: bufferhead contains the block group bitmap | ||
| 914 | * @maxblocks: the ending block (group relative) of the reservation | ||
| 915 | * | ||
| 916 | * The bitmap search --- search forward alternately through the actual | ||
| 917 | * bitmap on disk and the last-committed copy in journal, until we find a | ||
| 918 | * bit free in both bitmaps. | ||
| 919 | */ | ||
| 920 | static ext4_grpblk_t | ||
| 921 | bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
| 922 | ext4_grpblk_t maxblocks) | ||
| 923 | { | ||
| 924 | ext4_grpblk_t next; | ||
| 925 | struct journal_head *jh = bh2jh(bh); | ||
| 926 | |||
| 927 | while (start < maxblocks) { | ||
| 928 | next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start); | ||
| 929 | if (next >= maxblocks) | ||
| 930 | return -1; | ||
| 931 | if (ext4_test_allocatable(next, bh)) | ||
| 932 | return next; | ||
| 933 | jbd_lock_bh_state(bh); | ||
| 934 | if (jh->b_committed_data) | ||
| 935 | start = ext4_find_next_zero_bit(jh->b_committed_data, | ||
| 936 | maxblocks, next); | ||
| 937 | jbd_unlock_bh_state(bh); | ||
| 938 | } | ||
| 939 | return -1; | ||
| 940 | } | ||
| 941 | 602 | ||
| 942 | /** | 603 | if (!capable(CAP_SYS_RESOURCE) && |
| 943 | * find_next_usable_block() | 604 | sbi->s_resuid != current->fsuid && |
| 944 | * @start: the starting block (group relative) to find next | 605 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
| 945 | * allocatable block in bitmap. | 606 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
| 946 | * @bh: bufferhead contains the block group bitmap | ||
| 947 | * @maxblocks: the ending block (group relative) for the search | ||
| 948 | * | ||
| 949 | * Find an allocatable block in a bitmap. We honor both the bitmap and | ||
| 950 | * its last-committed copy (if that exists), and perform the "most | ||
| 951 | * appropriate allocation" algorithm of looking for a free block near | ||
| 952 | * the initial goal; then for a free byte somewhere in the bitmap; then | ||
| 953 | * for any free bit in the bitmap. | ||
| 954 | */ | ||
| 955 | static ext4_grpblk_t | ||
| 956 | find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh, | ||
| 957 | ext4_grpblk_t maxblocks) | ||
| 958 | { | ||
| 959 | ext4_grpblk_t here, next; | ||
| 960 | char *p, *r; | ||
| 961 | |||
| 962 | if (start > 0) { | ||
| 963 | /* | ||
| 964 | * The goal was occupied; search forward for a free | ||
| 965 | * block within the next XX blocks. | ||
| 966 | * | ||
| 967 | * end_goal is more or less random, but it has to be | ||
| 968 | * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the | ||
| 969 | * next 64-bit boundary is simple.. | ||
| 970 | */ | ||
| 971 | ext4_grpblk_t end_goal = (start + 63) & ~63; | ||
| 972 | if (end_goal > maxblocks) | ||
| 973 | end_goal = maxblocks; | ||
| 974 | here = ext4_find_next_zero_bit(bh->b_data, end_goal, start); | ||
| 975 | if (here < end_goal && ext4_test_allocatable(here, bh)) | ||
| 976 | return here; | ||
| 977 | ext4_debug("Bit not found near goal\n"); | ||
| 978 | } | ||
| 979 | |||
| 980 | here = start; | ||
| 981 | if (here < 0) | ||
| 982 | here = 0; | ||
| 983 | |||
| 984 | p = ((char *)bh->b_data) + (here >> 3); | ||
| 985 | r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3)); | ||
| 986 | next = (r - ((char *)bh->b_data)) << 3; | ||
| 987 | |||
| 988 | if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh)) | ||
| 989 | return next; | ||
| 990 | |||
| 991 | /* | ||
| 992 | * The bitmap search --- search forward alternately through the actual | ||
| 993 | * bitmap and the last-committed copy until we find a bit free in | ||
| 994 | * both | ||
| 995 | */ | ||
| 996 | here = bitmap_search_next_usable_block(here, bh, maxblocks); | ||
| 997 | return here; | ||
| 998 | } | ||
| 999 | |||
| 1000 | /** | ||
| 1001 | * claim_block() | ||
| 1002 | * @block: the free block (group relative) to allocate | ||
| 1003 | * @bh: the bufferhead containts the block group bitmap | ||
| 1004 | * | ||
| 1005 | * We think we can allocate this block in this bitmap. Try to set the bit. | ||
| 1006 | * If that succeeds then check that nobody has allocated and then freed the | ||
| 1007 | * block since we saw that is was not marked in b_committed_data. If it _was_ | ||
| 1008 | * allocated and freed then clear the bit in the bitmap again and return | ||
| 1009 | * zero (failure). | ||
| 1010 | */ | ||
| 1011 | static inline int | ||
| 1012 | claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh) | ||
| 1013 | { | ||
| 1014 | struct journal_head *jh = bh2jh(bh); | ||
| 1015 | int ret; | ||
| 1016 | |||
| 1017 | if (ext4_set_bit_atomic(lock, block, bh->b_data)) | ||
| 1018 | return 0; | ||
| 1019 | jbd_lock_bh_state(bh); | ||
| 1020 | if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) { | ||
| 1021 | ext4_clear_bit_atomic(lock, block, bh->b_data); | ||
| 1022 | ret = 0; | ||
| 1023 | } else { | ||
| 1024 | ret = 1; | ||
| 1025 | } | ||
| 1026 | jbd_unlock_bh_state(bh); | ||
| 1027 | return ret; | ||
| 1028 | } | ||
| 1029 | |||
| 1030 | /** | ||
| 1031 | * ext4_try_to_allocate() | ||
| 1032 | * @sb: superblock | ||
| 1033 | * @handle: handle to this transaction | ||
| 1034 | * @group: given allocation block group | ||
| 1035 | * @bitmap_bh: bufferhead holds the block bitmap | ||
| 1036 | * @grp_goal: given target block within the group | ||
| 1037 | * @count: target number of blocks to allocate | ||
| 1038 | * @my_rsv: reservation window | ||
| 1039 | * | ||
| 1040 | * Attempt to allocate blocks within a give range. Set the range of allocation | ||
| 1041 | * first, then find the first free bit(s) from the bitmap (within the range), | ||
| 1042 | * and at last, allocate the blocks by claiming the found free bit as allocated. | ||
| 1043 | * | ||
| 1044 | * To set the range of this allocation: | ||
| 1045 | * if there is a reservation window, only try to allocate block(s) from the | ||
| 1046 | * file's own reservation window; | ||
| 1047 | * Otherwise, the allocation range starts from the give goal block, ends at | ||
| 1048 | * the block group's last block. | ||
| 1049 | * | ||
| 1050 | * If we failed to allocate the desired block then we may end up crossing to a | ||
| 1051 | * new bitmap. In that case we must release write access to the old one via | ||
| 1052 | * ext4_journal_release_buffer(), else we'll run out of credits. | ||
| 1053 | */ | ||
| 1054 | static ext4_grpblk_t | ||
| 1055 | ext4_try_to_allocate(struct super_block *sb, handle_t *handle, | ||
| 1056 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
| 1057 | ext4_grpblk_t grp_goal, unsigned long *count, | ||
| 1058 | struct ext4_reserve_window *my_rsv) | ||
| 1059 | { | ||
| 1060 | ext4_fsblk_t group_first_block; | ||
| 1061 | ext4_grpblk_t start, end; | ||
| 1062 | unsigned long num = 0; | ||
| 1063 | |||
| 1064 | /* we do allocation within the reservation window if we have a window */ | ||
| 1065 | if (my_rsv) { | ||
| 1066 | group_first_block = ext4_group_first_block_no(sb, group); | ||
| 1067 | if (my_rsv->_rsv_start >= group_first_block) | ||
| 1068 | start = my_rsv->_rsv_start - group_first_block; | ||
| 1069 | else | ||
| 1070 | /* reservation window cross group boundary */ | ||
| 1071 | start = 0; | ||
| 1072 | end = my_rsv->_rsv_end - group_first_block + 1; | ||
| 1073 | if (end > EXT4_BLOCKS_PER_GROUP(sb)) | ||
| 1074 | /* reservation window crosses group boundary */ | ||
| 1075 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
| 1076 | if ((start <= grp_goal) && (grp_goal < end)) | ||
| 1077 | start = grp_goal; | ||
| 1078 | else | ||
| 1079 | grp_goal = -1; | ||
| 1080 | } else { | ||
| 1081 | if (grp_goal > 0) | ||
| 1082 | start = grp_goal; | ||
| 1083 | else | ||
| 1084 | start = 0; | ||
| 1085 | end = EXT4_BLOCKS_PER_GROUP(sb); | ||
| 1086 | } | ||
| 1087 | |||
| 1088 | BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb)); | ||
| 1089 | |||
| 1090 | repeat: | ||
| 1091 | if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) { | ||
| 1092 | grp_goal = find_next_usable_block(start, bitmap_bh, end); | ||
| 1093 | if (grp_goal < 0) | ||
| 1094 | goto fail_access; | ||
| 1095 | if (!my_rsv) { | ||
| 1096 | int i; | ||
| 1097 | |||
| 1098 | for (i = 0; i < 7 && grp_goal > start && | ||
| 1099 | ext4_test_allocatable(grp_goal - 1, | ||
| 1100 | bitmap_bh); | ||
| 1101 | i++, grp_goal--) | ||
| 1102 | ; | ||
| 1103 | } | ||
| 1104 | } | ||
| 1105 | start = grp_goal; | ||
| 1106 | |||
| 1107 | if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
| 1108 | grp_goal, bitmap_bh)) { | ||
| 1109 | /* | ||
| 1110 | * The block was allocated by another thread, or it was | ||
| 1111 | * allocated and then freed by another thread | ||
| 1112 | */ | ||
| 1113 | start++; | ||
| 1114 | grp_goal++; | ||
| 1115 | if (start >= end) | ||
| 1116 | goto fail_access; | ||
| 1117 | goto repeat; | ||
| 1118 | } | ||
| 1119 | num++; | ||
| 1120 | grp_goal++; | ||
| 1121 | while (num < *count && grp_goal < end | ||
| 1122 | && ext4_test_allocatable(grp_goal, bitmap_bh) | ||
| 1123 | && claim_block(sb_bgl_lock(EXT4_SB(sb), group), | ||
| 1124 | grp_goal, bitmap_bh)) { | ||
| 1125 | num++; | ||
| 1126 | grp_goal++; | ||
| 1127 | } | ||
| 1128 | *count = num; | ||
| 1129 | return grp_goal - num; | ||
| 1130 | fail_access: | ||
| 1131 | *count = num; | ||
| 1132 | return -1; | ||
| 1133 | } | ||
| 1134 | |||
| 1135 | /** | ||
| 1136 | * find_next_reservable_window(): | ||
| 1137 | * find a reservable space within the given range. | ||
| 1138 | * It does not allocate the reservation window for now: | ||
| 1139 | * alloc_new_reservation() will do the work later. | ||
| 1140 | * | ||
| 1141 | * @search_head: the head of the searching list; | ||
| 1142 | * This is not necessarily the list head of the whole filesystem | ||
| 1143 | * | ||
| 1144 | * We have both head and start_block to assist the search | ||
| 1145 | * for the reservable space. The list starts from head, | ||
| 1146 | * but we will shift to the place where start_block is, | ||
| 1147 | * then start from there, when looking for a reservable space. | ||
| 1148 | * | ||
| 1149 | * @size: the target new reservation window size | ||
| 1150 | * | ||
| 1151 | * @group_first_block: the first block we consider to start | ||
| 1152 | * the real search from | ||
| 1153 | * | ||
| 1154 | * @last_block: | ||
| 1155 | * the maximum block number that our goal reservable space | ||
| 1156 | * could start from. This is normally the last block in this | ||
| 1157 | * group. The search will end when we found the start of next | ||
| 1158 | * possible reservable space is out of this boundary. | ||
| 1159 | * This could handle the cross boundary reservation window | ||
| 1160 | * request. | ||
| 1161 | * | ||
| 1162 | * basically we search from the given range, rather than the whole | ||
| 1163 | * reservation double linked list, (start_block, last_block) | ||
| 1164 | * to find a free region that is of my size and has not | ||
| 1165 | * been reserved. | ||
| 1166 | * | ||
| 1167 | */ | ||
| 1168 | static int find_next_reservable_window( | ||
| 1169 | struct ext4_reserve_window_node *search_head, | ||
| 1170 | struct ext4_reserve_window_node *my_rsv, | ||
| 1171 | struct super_block * sb, | ||
| 1172 | ext4_fsblk_t start_block, | ||
| 1173 | ext4_fsblk_t last_block) | ||
| 1174 | { | ||
| 1175 | struct rb_node *next; | ||
| 1176 | struct ext4_reserve_window_node *rsv, *prev; | ||
| 1177 | ext4_fsblk_t cur; | ||
| 1178 | int size = my_rsv->rsv_goal_size; | ||
| 1179 | |||
| 1180 | /* TODO: make the start of the reservation window byte-aligned */ | ||
| 1181 | /* cur = *start_block & ~7;*/ | ||
| 1182 | cur = start_block; | ||
| 1183 | rsv = search_head; | ||
| 1184 | if (!rsv) | ||
| 1185 | return -1; | ||
| 1186 | |||
| 1187 | while (1) { | ||
| 1188 | if (cur <= rsv->rsv_end) | ||
| 1189 | cur = rsv->rsv_end + 1; | ||
| 1190 | |||
| 1191 | /* TODO? | ||
| 1192 | * in the case we could not find a reservable space | ||
| 1193 | * that is what is expected, during the re-search, we could | ||
| 1194 | * remember what's the largest reservable space we could have | ||
| 1195 | * and return that one. | ||
| 1196 | * | ||
| 1197 | * For now it will fail if we could not find the reservable | ||
| 1198 | * space with expected-size (or more)... | ||
| 1199 | */ | ||
| 1200 | if (cur > last_block) | ||
| 1201 | return -1; /* fail */ | ||
| 1202 | |||
| 1203 | prev = rsv; | ||
| 1204 | next = rb_next(&rsv->rsv_node); | ||
| 1205 | rsv = rb_entry(next,struct ext4_reserve_window_node,rsv_node); | ||
| 1206 | 607 | ||
| 1207 | /* | 608 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
| 1208 | * Reached the last reservation, we can just append to the | 609 | EXT4_FREEBLOCKS_WATERMARK) { |
| 1209 | * previous one. | 610 | free_blocks = percpu_counter_sum(fbc); |
| 1210 | */ | 611 | dirty_blocks = percpu_counter_sum(dbc); |
| 1211 | if (!next) | 612 | if (dirty_blocks < 0) { |
| 1212 | break; | 613 | printk(KERN_CRIT "Dirty block accounting " |
| 1213 | 614 | "went wrong %lld\n", | |
| 1214 | if (cur + size <= rsv->rsv_start) { | 615 | dirty_blocks); |
| 1215 | /* | ||
| 1216 | * Found a reserveable space big enough. We could | ||
| 1217 | * have a reservation across the group boundary here | ||
| 1218 | */ | ||
| 1219 | break; | ||
| 1220 | } | 616 | } |
| 1221 | } | 617 | } |
| 1222 | /* | 618 | /* Check whether we have space after |
| 1223 | * we come here either : | 619 | * accounting for current dirty blocks |
| 1224 | * when we reach the end of the whole list, | ||
| 1225 | * and there is empty reservable space after last entry in the list. | ||
| 1226 | * append it to the end of the list. | ||
| 1227 | * | ||
| 1228 | * or we found one reservable space in the middle of the list, | ||
| 1229 | * return the reservation window that we could append to. | ||
| 1230 | * succeed. | ||
| 1231 | */ | 620 | */ |
| 621 | if (free_blocks < ((root_blocks + nblocks) + dirty_blocks)) | ||
| 622 | /* we don't have free space */ | ||
| 623 | return -ENOSPC; | ||
| 1232 | 624 | ||
| 1233 | if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window))) | 625 | /* Add the blocks to nblocks */ |
| 1234 | rsv_window_remove(sb, my_rsv); | 626 | percpu_counter_add(dbc, nblocks); |
| 1235 | |||
| 1236 | /* | ||
| 1237 | * Let's book the whole avaliable window for now. We will check the | ||
| 1238 | * disk bitmap later and then, if there are free blocks then we adjust | ||
| 1239 | * the window size if it's larger than requested. | ||
| 1240 | * Otherwise, we will remove this node from the tree next time | ||
| 1241 | * call find_next_reservable_window. | ||
| 1242 | */ | ||
| 1243 | my_rsv->rsv_start = cur; | ||
| 1244 | my_rsv->rsv_end = cur + size - 1; | ||
| 1245 | my_rsv->rsv_alloc_hit = 0; | ||
| 1246 | |||
| 1247 | if (prev != my_rsv) | ||
| 1248 | ext4_rsv_window_add(sb, my_rsv); | ||
| 1249 | |||
| 1250 | return 0; | 627 | return 0; |
| 1251 | } | 628 | } |
| 1252 | 629 | ||
| 1253 | /** | 630 | /** |
| 1254 | * alloc_new_reservation()--allocate a new reservation window | ||
| 1255 | * | ||
| 1256 | * To make a new reservation, we search part of the filesystem | ||
| 1257 | * reservation list (the list that inside the group). We try to | ||
| 1258 | * allocate a new reservation window near the allocation goal, | ||
| 1259 | * or the beginning of the group, if there is no goal. | ||
| 1260 | * | ||
| 1261 | * We first find a reservable space after the goal, then from | ||
| 1262 | * there, we check the bitmap for the first free block after | ||
| 1263 | * it. If there is no free block until the end of group, then the | ||
| 1264 | * whole group is full, we failed. Otherwise, check if the free | ||
| 1265 | * block is inside the expected reservable space, if so, we | ||
| 1266 | * succeed. | ||
| 1267 | * If the first free block is outside the reservable space, then | ||
| 1268 | * start from the first free block, we search for next available | ||
| 1269 | * space, and go on. | ||
| 1270 | * | ||
| 1271 | * on succeed, a new reservation will be found and inserted into the list | ||
| 1272 | * It contains at least one free block, and it does not overlap with other | ||
| 1273 | * reservation windows. | ||
| 1274 | * | ||
| 1275 | * failed: we failed to find a reservation window in this group | ||
| 1276 | * | ||
| 1277 | * @rsv: the reservation | ||
| 1278 | * | ||
| 1279 | * @grp_goal: The goal (group-relative). It is where the search for a | ||
| 1280 | * free reservable space should start from. | ||
| 1281 | * if we have a grp_goal(grp_goal >0 ), then start from there, | ||
| 1282 | * no grp_goal(grp_goal = -1), we start from the first block | ||
| 1283 | * of the group. | ||
| 1284 | * | ||
| 1285 | * @sb: the super block | ||
| 1286 | * @group: the group we are trying to allocate in | ||
| 1287 | * @bitmap_bh: the block group block bitmap | ||
| 1288 | * | ||
| 1289 | */ | ||
| 1290 | static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv, | ||
| 1291 | ext4_grpblk_t grp_goal, struct super_block *sb, | ||
| 1292 | ext4_group_t group, struct buffer_head *bitmap_bh) | ||
| 1293 | { | ||
| 1294 | struct ext4_reserve_window_node *search_head; | ||
| 1295 | ext4_fsblk_t group_first_block, group_end_block, start_block; | ||
| 1296 | ext4_grpblk_t first_free_block; | ||
| 1297 | struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root; | ||
| 1298 | unsigned long size; | ||
| 1299 | int ret; | ||
| 1300 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
| 1301 | |||
| 1302 | group_first_block = ext4_group_first_block_no(sb, group); | ||
| 1303 | group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
| 1304 | |||
| 1305 | if (grp_goal < 0) | ||
| 1306 | start_block = group_first_block; | ||
| 1307 | else | ||
| 1308 | start_block = grp_goal + group_first_block; | ||
| 1309 | |||
| 1310 | size = my_rsv->rsv_goal_size; | ||
| 1311 | |||
| 1312 | if (!rsv_is_empty(&my_rsv->rsv_window)) { | ||
| 1313 | /* | ||
| 1314 | * if the old reservation is cross group boundary | ||
| 1315 | * and if the goal is inside the old reservation window, | ||
| 1316 | * we will come here when we just failed to allocate from | ||
| 1317 | * the first part of the window. We still have another part | ||
| 1318 | * that belongs to the next group. In this case, there is no | ||
| 1319 | * point to discard our window and try to allocate a new one | ||
| 1320 | * in this group(which will fail). we should | ||
| 1321 | * keep the reservation window, just simply move on. | ||
| 1322 | * | ||
| 1323 | * Maybe we could shift the start block of the reservation | ||
| 1324 | * window to the first block of next group. | ||
| 1325 | */ | ||
| 1326 | |||
| 1327 | if ((my_rsv->rsv_start <= group_end_block) && | ||
| 1328 | (my_rsv->rsv_end > group_end_block) && | ||
| 1329 | (start_block >= my_rsv->rsv_start)) | ||
| 1330 | return -1; | ||
| 1331 | |||
| 1332 | if ((my_rsv->rsv_alloc_hit > | ||
| 1333 | (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) { | ||
| 1334 | /* | ||
| 1335 | * if the previously allocation hit ratio is | ||
| 1336 | * greater than 1/2, then we double the size of | ||
| 1337 | * the reservation window the next time, | ||
| 1338 | * otherwise we keep the same size window | ||
| 1339 | */ | ||
| 1340 | size = size * 2; | ||
| 1341 | if (size > EXT4_MAX_RESERVE_BLOCKS) | ||
| 1342 | size = EXT4_MAX_RESERVE_BLOCKS; | ||
| 1343 | my_rsv->rsv_goal_size= size; | ||
| 1344 | } | ||
| 1345 | } | ||
| 1346 | |||
| 1347 | spin_lock(rsv_lock); | ||
| 1348 | /* | ||
| 1349 | * shift the search start to the window near the goal block | ||
| 1350 | */ | ||
| 1351 | search_head = search_reserve_window(fs_rsv_root, start_block); | ||
| 1352 | |||
| 1353 | /* | ||
| 1354 | * find_next_reservable_window() simply finds a reservable window | ||
| 1355 | * inside the given range(start_block, group_end_block). | ||
| 1356 | * | ||
| 1357 | * To make sure the reservation window has a free bit inside it, we | ||
| 1358 | * need to check the bitmap after we found a reservable window. | ||
| 1359 | */ | ||
| 1360 | retry: | ||
| 1361 | ret = find_next_reservable_window(search_head, my_rsv, sb, | ||
| 1362 | start_block, group_end_block); | ||
| 1363 | |||
| 1364 | if (ret == -1) { | ||
| 1365 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
| 1366 | rsv_window_remove(sb, my_rsv); | ||
| 1367 | spin_unlock(rsv_lock); | ||
| 1368 | return -1; | ||
| 1369 | } | ||
| 1370 | |||
| 1371 | /* | ||
| 1372 | * On success, find_next_reservable_window() returns the | ||
| 1373 | * reservation window where there is a reservable space after it. | ||
| 1374 | * Before we reserve this reservable space, we need | ||
| 1375 | * to make sure there is at least a free block inside this region. | ||
| 1376 | * | ||
| 1377 | * searching the first free bit on the block bitmap and copy of | ||
| 1378 | * last committed bitmap alternatively, until we found a allocatable | ||
| 1379 | * block. Search start from the start block of the reservable space | ||
| 1380 | * we just found. | ||
| 1381 | */ | ||
| 1382 | spin_unlock(rsv_lock); | ||
| 1383 | first_free_block = bitmap_search_next_usable_block( | ||
| 1384 | my_rsv->rsv_start - group_first_block, | ||
| 1385 | bitmap_bh, group_end_block - group_first_block + 1); | ||
| 1386 | |||
| 1387 | if (first_free_block < 0) { | ||
| 1388 | /* | ||
| 1389 | * no free block left on the bitmap, no point | ||
| 1390 | * to reserve the space. return failed. | ||
| 1391 | */ | ||
| 1392 | spin_lock(rsv_lock); | ||
| 1393 | if (!rsv_is_empty(&my_rsv->rsv_window)) | ||
| 1394 | rsv_window_remove(sb, my_rsv); | ||
| 1395 | spin_unlock(rsv_lock); | ||
| 1396 | return -1; /* failed */ | ||
| 1397 | } | ||
| 1398 | |||
| 1399 | start_block = first_free_block + group_first_block; | ||
| 1400 | /* | ||
| 1401 | * check if the first free block is within the | ||
| 1402 | * free space we just reserved | ||
| 1403 | */ | ||
| 1404 | if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) | ||
| 1405 | return 0; /* success */ | ||
| 1406 | /* | ||
| 1407 | * if the first free bit we found is out of the reservable space | ||
| 1408 | * continue search for next reservable space, | ||
| 1409 | * start from where the free block is, | ||
| 1410 | * we also shift the list head to where we stopped last time | ||
| 1411 | */ | ||
| 1412 | search_head = my_rsv; | ||
| 1413 | spin_lock(rsv_lock); | ||
| 1414 | goto retry; | ||
| 1415 | } | ||
| 1416 | |||
| 1417 | /** | ||
| 1418 | * try_to_extend_reservation() | ||
| 1419 | * @my_rsv: given reservation window | ||
| 1420 | * @sb: super block | ||
| 1421 | * @size: the delta to extend | ||
| 1422 | * | ||
| 1423 | * Attempt to expand the reservation window large enough to have | ||
| 1424 | * required number of free blocks | ||
| 1425 | * | ||
| 1426 | * Since ext4_try_to_allocate() will always allocate blocks within | ||
| 1427 | * the reservation window range, if the window size is too small, | ||
| 1428 | * multiple blocks allocation has to stop at the end of the reservation | ||
| 1429 | * window. To make this more efficient, given the total number of | ||
| 1430 | * blocks needed and the current size of the window, we try to | ||
| 1431 | * expand the reservation window size if necessary on a best-effort | ||
| 1432 | * basis before ext4_new_blocks() tries to allocate blocks, | ||
| 1433 | */ | ||
| 1434 | static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv, | ||
| 1435 | struct super_block *sb, int size) | ||
| 1436 | { | ||
| 1437 | struct ext4_reserve_window_node *next_rsv; | ||
| 1438 | struct rb_node *next; | ||
| 1439 | spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock; | ||
| 1440 | |||
| 1441 | if (!spin_trylock(rsv_lock)) | ||
| 1442 | return; | ||
| 1443 | |||
| 1444 | next = rb_next(&my_rsv->rsv_node); | ||
| 1445 | |||
| 1446 | if (!next) | ||
| 1447 | my_rsv->rsv_end += size; | ||
| 1448 | else { | ||
| 1449 | next_rsv = rb_entry(next, struct ext4_reserve_window_node, rsv_node); | ||
| 1450 | |||
| 1451 | if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size) | ||
| 1452 | my_rsv->rsv_end += size; | ||
| 1453 | else | ||
| 1454 | my_rsv->rsv_end = next_rsv->rsv_start - 1; | ||
| 1455 | } | ||
| 1456 | spin_unlock(rsv_lock); | ||
| 1457 | } | ||
| 1458 | |||
| 1459 | /** | ||
| 1460 | * ext4_try_to_allocate_with_rsv() | ||
| 1461 | * @sb: superblock | ||
| 1462 | * @handle: handle to this transaction | ||
| 1463 | * @group: given allocation block group | ||
| 1464 | * @bitmap_bh: bufferhead holds the block bitmap | ||
| 1465 | * @grp_goal: given target block within the group | ||
| 1466 | * @count: target number of blocks to allocate | ||
| 1467 | * @my_rsv: reservation window | ||
| 1468 | * @errp: pointer to store the error code | ||
| 1469 | * | ||
| 1470 | * This is the main function used to allocate a new block and its reservation | ||
| 1471 | * window. | ||
| 1472 | * | ||
| 1473 | * Each time when a new block allocation is need, first try to allocate from | ||
| 1474 | * its own reservation. If it does not have a reservation window, instead of | ||
| 1475 | * looking for a free bit on bitmap first, then look up the reservation list to | ||
| 1476 | * see if it is inside somebody else's reservation window, we try to allocate a | ||
| 1477 | * reservation window for it starting from the goal first. Then do the block | ||
| 1478 | * allocation within the reservation window. | ||
| 1479 | * | ||
| 1480 | * This will avoid keeping on searching the reservation list again and | ||
| 1481 | * again when somebody is looking for a free block (without | ||
| 1482 | * reservation), and there are lots of free blocks, but they are all | ||
| 1483 | * being reserved. | ||
| 1484 | * | ||
| 1485 | * We use a red-black tree for the per-filesystem reservation list. | ||
| 1486 | * | ||
| 1487 | */ | ||
| 1488 | static ext4_grpblk_t | ||
| 1489 | ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, | ||
| 1490 | ext4_group_t group, struct buffer_head *bitmap_bh, | ||
| 1491 | ext4_grpblk_t grp_goal, | ||
| 1492 | struct ext4_reserve_window_node * my_rsv, | ||
| 1493 | unsigned long *count, int *errp) | ||
| 1494 | { | ||
| 1495 | ext4_fsblk_t group_first_block, group_last_block; | ||
| 1496 | ext4_grpblk_t ret = 0; | ||
| 1497 | int fatal; | ||
| 1498 | unsigned long num = *count; | ||
| 1499 | |||
| 1500 | *errp = 0; | ||
| 1501 | |||
| 1502 | /* | ||
| 1503 | * Make sure we use undo access for the bitmap, because it is critical | ||
| 1504 | * that we do the frozen_data COW on bitmap buffers in all cases even | ||
| 1505 | * if the buffer is in BJ_Forget state in the committing transaction. | ||
| 1506 | */ | ||
| 1507 | BUFFER_TRACE(bitmap_bh, "get undo access for new block"); | ||
| 1508 | fatal = ext4_journal_get_undo_access(handle, bitmap_bh); | ||
| 1509 | if (fatal) { | ||
| 1510 | *errp = fatal; | ||
| 1511 | return -1; | ||
| 1512 | } | ||
| 1513 | |||
| 1514 | /* | ||
| 1515 | * we don't deal with reservation when | ||
| 1516 | * filesystem is mounted without reservation | ||
| 1517 | * or the file is not a regular file | ||
| 1518 | * or last attempt to allocate a block with reservation turned on failed | ||
| 1519 | */ | ||
| 1520 | if (my_rsv == NULL ) { | ||
| 1521 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
| 1522 | grp_goal, count, NULL); | ||
| 1523 | goto out; | ||
| 1524 | } | ||
| 1525 | /* | ||
| 1526 | * grp_goal is a group relative block number (if there is a goal) | ||
| 1527 | * 0 <= grp_goal < EXT4_BLOCKS_PER_GROUP(sb) | ||
| 1528 | * first block is a filesystem wide block number | ||
| 1529 | * first block is the block number of the first block in this group | ||
| 1530 | */ | ||
| 1531 | group_first_block = ext4_group_first_block_no(sb, group); | ||
| 1532 | group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1); | ||
| 1533 | |||
| 1534 | /* | ||
| 1535 | * Basically we will allocate a new block from inode's reservation | ||
| 1536 | * window. | ||
| 1537 | * | ||
| 1538 | * We need to allocate a new reservation window, if: | ||
| 1539 | * a) inode does not have a reservation window; or | ||
| 1540 | * b) last attempt to allocate a block from existing reservation | ||
| 1541 | * failed; or | ||
| 1542 | * c) we come here with a goal and with a reservation window | ||
| 1543 | * | ||
| 1544 | * We do not need to allocate a new reservation window if we come here | ||
| 1545 | * at the beginning with a goal and the goal is inside the window, or | ||
| 1546 | * we don't have a goal but already have a reservation window. | ||
| 1547 | * then we could go to allocate from the reservation window directly. | ||
| 1548 | */ | ||
| 1549 | while (1) { | ||
| 1550 | if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) || | ||
| 1551 | !goal_in_my_reservation(&my_rsv->rsv_window, | ||
| 1552 | grp_goal, group, sb)) { | ||
| 1553 | if (my_rsv->rsv_goal_size < *count) | ||
| 1554 | my_rsv->rsv_goal_size = *count; | ||
| 1555 | ret = alloc_new_reservation(my_rsv, grp_goal, sb, | ||
| 1556 | group, bitmap_bh); | ||
| 1557 | if (ret < 0) | ||
| 1558 | break; /* failed */ | ||
| 1559 | |||
| 1560 | if (!goal_in_my_reservation(&my_rsv->rsv_window, | ||
| 1561 | grp_goal, group, sb)) | ||
| 1562 | grp_goal = -1; | ||
| 1563 | } else if (grp_goal >= 0) { | ||
| 1564 | int curr = my_rsv->rsv_end - | ||
| 1565 | (grp_goal + group_first_block) + 1; | ||
| 1566 | |||
| 1567 | if (curr < *count) | ||
| 1568 | try_to_extend_reservation(my_rsv, sb, | ||
| 1569 | *count - curr); | ||
| 1570 | } | ||
| 1571 | |||
| 1572 | if ((my_rsv->rsv_start > group_last_block) || | ||
| 1573 | (my_rsv->rsv_end < group_first_block)) { | ||
| 1574 | rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1); | ||
| 1575 | BUG(); | ||
| 1576 | } | ||
| 1577 | ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh, | ||
| 1578 | grp_goal, &num, &my_rsv->rsv_window); | ||
| 1579 | if (ret >= 0) { | ||
| 1580 | my_rsv->rsv_alloc_hit += num; | ||
| 1581 | *count = num; | ||
| 1582 | break; /* succeed */ | ||
| 1583 | } | ||
| 1584 | num = *count; | ||
| 1585 | } | ||
| 1586 | out: | ||
| 1587 | if (ret >= 0) { | ||
| 1588 | BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for " | ||
| 1589 | "bitmap block"); | ||
| 1590 | fatal = ext4_journal_dirty_metadata(handle, bitmap_bh); | ||
| 1591 | if (fatal) { | ||
| 1592 | *errp = fatal; | ||
| 1593 | return -1; | ||
| 1594 | } | ||
| 1595 | return ret; | ||
| 1596 | } | ||
| 1597 | |||
| 1598 | BUFFER_TRACE(bitmap_bh, "journal_release_buffer"); | ||
| 1599 | ext4_journal_release_buffer(handle, bitmap_bh); | ||
| 1600 | return ret; | ||
| 1601 | } | ||
| 1602 | |||
| 1603 | /** | ||
| 1604 | * ext4_has_free_blocks() | 631 | * ext4_has_free_blocks() |
| 1605 | * @sbi: in-core super block structure. | 632 | * @sbi: in-core super block structure. |
| 1606 | * @nblocks: number of neeed blocks | 633 | * @nblocks: number of neeed blocks |
| @@ -1610,29 +637,34 @@ out: | |||
| 1610 | * On success, return nblocks | 637 | * On success, return nblocks |
| 1611 | */ | 638 | */ |
| 1612 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 639 | ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
| 1613 | ext4_fsblk_t nblocks) | 640 | s64 nblocks) |
| 1614 | { | 641 | { |
| 1615 | ext4_fsblk_t free_blocks; | 642 | s64 free_blocks, dirty_blocks; |
| 1616 | ext4_fsblk_t root_blocks = 0; | 643 | s64 root_blocks = 0; |
| 644 | struct percpu_counter *fbc = &sbi->s_freeblocks_counter; | ||
| 645 | struct percpu_counter *dbc = &sbi->s_dirtyblocks_counter; | ||
| 1617 | 646 | ||
| 1618 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | 647 | free_blocks = percpu_counter_read_positive(fbc); |
| 648 | dirty_blocks = percpu_counter_read_positive(dbc); | ||
| 1619 | 649 | ||
| 1620 | if (!capable(CAP_SYS_RESOURCE) && | 650 | if (!capable(CAP_SYS_RESOURCE) && |
| 1621 | sbi->s_resuid != current->fsuid && | 651 | sbi->s_resuid != current->fsuid && |
| 1622 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) | 652 | (sbi->s_resgid == 0 || !in_group_p(sbi->s_resgid))) |
| 1623 | root_blocks = ext4_r_blocks_count(sbi->s_es); | 653 | root_blocks = ext4_r_blocks_count(sbi->s_es); |
| 1624 | #ifdef CONFIG_SMP | 654 | |
| 1625 | if (free_blocks - root_blocks < FBC_BATCH) | 655 | if (free_blocks - (nblocks + root_blocks + dirty_blocks) < |
| 1626 | free_blocks = | 656 | EXT4_FREEBLOCKS_WATERMARK) { |
| 1627 | percpu_counter_sum_and_set(&sbi->s_freeblocks_counter); | 657 | free_blocks = percpu_counter_sum(fbc); |
| 1628 | #endif | 658 | dirty_blocks = percpu_counter_sum(dbc); |
| 1629 | if (free_blocks <= root_blocks) | 659 | } |
| 660 | if (free_blocks <= (root_blocks + dirty_blocks)) | ||
| 1630 | /* we don't have free space */ | 661 | /* we don't have free space */ |
| 1631 | return 0; | 662 | return 0; |
| 1632 | if (free_blocks - root_blocks < nblocks) | 663 | |
| 1633 | return free_blocks - root_blocks; | 664 | if (free_blocks - (root_blocks + dirty_blocks) < nblocks) |
| 665 | return free_blocks - (root_blocks + dirty_blocks); | ||
| 1634 | return nblocks; | 666 | return nblocks; |
| 1635 | } | 667 | } |
| 1636 | 668 | ||
| 1637 | 669 | ||
| 1638 | /** | 670 | /** |
| @@ -1657,303 +689,6 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries) | |||
| 1657 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); | 689 | return jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal); |
| 1658 | } | 690 | } |
| 1659 | 691 | ||
| 1660 | /** | ||
| 1661 | * ext4_old_new_blocks() -- core block bitmap based block allocation function | ||
| 1662 | * | ||
| 1663 | * @handle: handle to this transaction | ||
| 1664 | * @inode: file inode | ||
| 1665 | * @goal: given target block(filesystem wide) | ||
| 1666 | * @count: target number of blocks to allocate | ||
| 1667 | * @errp: error code | ||
| 1668 | * | ||
| 1669 | * ext4_old_new_blocks uses a goal block to assist allocation and look up | ||
| 1670 | * the block bitmap directly to do block allocation. It tries to | ||
| 1671 | * allocate block(s) from the block group contains the goal block first. If | ||
| 1672 | * that fails, it will try to allocate block(s) from other block groups | ||
| 1673 | * without any specific goal block. | ||
| 1674 | * | ||
| 1675 | * This function is called when -o nomballoc mount option is enabled | ||
| 1676 | * | ||
| 1677 | */ | ||
| 1678 | ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | ||
| 1679 | ext4_fsblk_t goal, unsigned long *count, int *errp) | ||
| 1680 | { | ||
| 1681 | struct buffer_head *bitmap_bh = NULL; | ||
| 1682 | struct buffer_head *gdp_bh; | ||
| 1683 | ext4_group_t group_no; | ||
| 1684 | ext4_group_t goal_group; | ||
| 1685 | ext4_grpblk_t grp_target_blk; /* blockgroup relative goal block */ | ||
| 1686 | ext4_grpblk_t grp_alloc_blk; /* blockgroup-relative allocated block*/ | ||
| 1687 | ext4_fsblk_t ret_block; /* filesyetem-wide allocated block */ | ||
| 1688 | ext4_group_t bgi; /* blockgroup iteration index */ | ||
| 1689 | int fatal = 0, err; | ||
| 1690 | int performed_allocation = 0; | ||
| 1691 | ext4_grpblk_t free_blocks; /* number of free blocks in a group */ | ||
| 1692 | struct super_block *sb; | ||
| 1693 | struct ext4_group_desc *gdp; | ||
| 1694 | struct ext4_super_block *es; | ||
| 1695 | struct ext4_sb_info *sbi; | ||
| 1696 | struct ext4_reserve_window_node *my_rsv = NULL; | ||
| 1697 | struct ext4_block_alloc_info *block_i; | ||
| 1698 | unsigned short windowsz = 0; | ||
| 1699 | ext4_group_t ngroups; | ||
| 1700 | unsigned long num = *count; | ||
| 1701 | |||
| 1702 | sb = inode->i_sb; | ||
| 1703 | if (!sb) { | ||
| 1704 | *errp = -ENODEV; | ||
| 1705 | printk("ext4_new_block: nonexistent device"); | ||
| 1706 | return 0; | ||
| 1707 | } | ||
| 1708 | |||
| 1709 | sbi = EXT4_SB(sb); | ||
| 1710 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) { | ||
| 1711 | /* | ||
| 1712 | * With delalloc we already reserved the blocks | ||
| 1713 | */ | ||
| 1714 | *count = ext4_has_free_blocks(sbi, *count); | ||
| 1715 | } | ||
| 1716 | if (*count == 0) { | ||
| 1717 | *errp = -ENOSPC; | ||
| 1718 | return 0; /*return with ENOSPC error */ | ||
| 1719 | } | ||
| 1720 | num = *count; | ||
| 1721 | |||
| 1722 | /* | ||
| 1723 | * Check quota for allocation of this block. | ||
| 1724 | */ | ||
| 1725 | if (DQUOT_ALLOC_BLOCK(inode, num)) { | ||
| 1726 | *errp = -EDQUOT; | ||
| 1727 | return 0; | ||
| 1728 | } | ||
| 1729 | |||
| 1730 | sbi = EXT4_SB(sb); | ||
| 1731 | es = EXT4_SB(sb)->s_es; | ||
| 1732 | ext4_debug("goal=%llu.\n", goal); | ||
| 1733 | /* | ||
| 1734 | * Allocate a block from reservation only when | ||
| 1735 | * filesystem is mounted with reservation(default,-o reservation), and | ||
| 1736 | * it's a regular file, and | ||
| 1737 | * the desired window size is greater than 0 (One could use ioctl | ||
| 1738 | * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off | ||
| 1739 | * reservation on that particular file) | ||
| 1740 | */ | ||
| 1741 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
| 1742 | if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0)) | ||
| 1743 | my_rsv = &block_i->rsv_window_node; | ||
| 1744 | |||
| 1745 | /* | ||
| 1746 | * First, test whether the goal block is free. | ||
| 1747 | */ | ||
| 1748 | if (goal < le32_to_cpu(es->s_first_data_block) || | ||
| 1749 | goal >= ext4_blocks_count(es)) | ||
| 1750 | goal = le32_to_cpu(es->s_first_data_block); | ||
| 1751 | ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk); | ||
| 1752 | goal_group = group_no; | ||
| 1753 | retry_alloc: | ||
| 1754 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
| 1755 | if (!gdp) | ||
| 1756 | goto io_error; | ||
| 1757 | |||
| 1758 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
| 1759 | /* | ||
| 1760 | * if there is not enough free blocks to make a new resevation | ||
| 1761 | * turn off reservation for this allocation | ||
| 1762 | */ | ||
| 1763 | if (my_rsv && (free_blocks < windowsz) | ||
| 1764 | && (rsv_is_empty(&my_rsv->rsv_window))) | ||
| 1765 | my_rsv = NULL; | ||
| 1766 | |||
| 1767 | if (free_blocks > 0) { | ||
| 1768 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
| 1769 | if (!bitmap_bh) | ||
| 1770 | goto io_error; | ||
| 1771 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
| 1772 | group_no, bitmap_bh, grp_target_blk, | ||
| 1773 | my_rsv, &num, &fatal); | ||
| 1774 | if (fatal) | ||
| 1775 | goto out; | ||
| 1776 | if (grp_alloc_blk >= 0) | ||
| 1777 | goto allocated; | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | ngroups = EXT4_SB(sb)->s_groups_count; | ||
| 1781 | smp_rmb(); | ||
| 1782 | |||
| 1783 | /* | ||
| 1784 | * Now search the rest of the groups. We assume that | ||
| 1785 | * group_no and gdp correctly point to the last group visited. | ||
| 1786 | */ | ||
| 1787 | for (bgi = 0; bgi < ngroups; bgi++) { | ||
| 1788 | group_no++; | ||
| 1789 | if (group_no >= ngroups) | ||
| 1790 | group_no = 0; | ||
| 1791 | gdp = ext4_get_group_desc(sb, group_no, &gdp_bh); | ||
| 1792 | if (!gdp) | ||
| 1793 | goto io_error; | ||
| 1794 | free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); | ||
| 1795 | /* | ||
| 1796 | * skip this group if the number of | ||
| 1797 | * free blocks is less than half of the reservation | ||
| 1798 | * window size. | ||
| 1799 | */ | ||
| 1800 | if (free_blocks <= (windowsz/2)) | ||
| 1801 | continue; | ||
| 1802 | |||
| 1803 | brelse(bitmap_bh); | ||
| 1804 | bitmap_bh = ext4_read_block_bitmap(sb, group_no); | ||
| 1805 | if (!bitmap_bh) | ||
| 1806 | goto io_error; | ||
| 1807 | /* | ||
| 1808 | * try to allocate block(s) from this group, without a goal(-1). | ||
| 1809 | */ | ||
| 1810 | grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle, | ||
| 1811 | group_no, bitmap_bh, -1, my_rsv, | ||
| 1812 | &num, &fatal); | ||
| 1813 | if (fatal) | ||
| 1814 | goto out; | ||
| 1815 | if (grp_alloc_blk >= 0) | ||
| 1816 | goto allocated; | ||
| 1817 | } | ||
| 1818 | /* | ||
| 1819 | * We may end up a bogus ealier ENOSPC error due to | ||
| 1820 | * filesystem is "full" of reservations, but | ||
| 1821 | * there maybe indeed free blocks avaliable on disk | ||
| 1822 | * In this case, we just forget about the reservations | ||
| 1823 | * just do block allocation as without reservations. | ||
| 1824 | */ | ||
| 1825 | if (my_rsv) { | ||
| 1826 | my_rsv = NULL; | ||
| 1827 | windowsz = 0; | ||
| 1828 | group_no = goal_group; | ||
| 1829 | goto retry_alloc; | ||
| 1830 | } | ||
| 1831 | /* No space left on the device */ | ||
| 1832 | *errp = -ENOSPC; | ||
| 1833 | goto out; | ||
| 1834 | |||
| 1835 | allocated: | ||
| 1836 | |||
| 1837 | ext4_debug("using block group %lu(%d)\n", | ||
| 1838 | group_no, gdp->bg_free_blocks_count); | ||
| 1839 | |||
| 1840 | BUFFER_TRACE(gdp_bh, "get_write_access"); | ||
| 1841 | fatal = ext4_journal_get_write_access(handle, gdp_bh); | ||
| 1842 | if (fatal) | ||
| 1843 | goto out; | ||
| 1844 | |||
| 1845 | ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no); | ||
| 1846 | |||
| 1847 | if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) || | ||
| 1848 | in_range(ext4_inode_bitmap(sb, gdp), ret_block, num) || | ||
| 1849 | in_range(ret_block, ext4_inode_table(sb, gdp), | ||
| 1850 | EXT4_SB(sb)->s_itb_per_group) || | ||
| 1851 | in_range(ret_block + num - 1, ext4_inode_table(sb, gdp), | ||
| 1852 | EXT4_SB(sb)->s_itb_per_group)) { | ||
| 1853 | ext4_error(sb, "ext4_new_block", | ||
| 1854 | "Allocating block in system zone - " | ||
| 1855 | "blocks from %llu, length %lu", | ||
| 1856 | ret_block, num); | ||
| 1857 | /* | ||
| 1858 | * claim_block marked the blocks we allocated | ||
| 1859 | * as in use. So we may want to selectively | ||
| 1860 | * mark some of the blocks as free | ||
| 1861 | */ | ||
| 1862 | goto retry_alloc; | ||
| 1863 | } | ||
| 1864 | |||
| 1865 | performed_allocation = 1; | ||
| 1866 | |||
| 1867 | #ifdef CONFIG_JBD2_DEBUG | ||
| 1868 | { | ||
| 1869 | struct buffer_head *debug_bh; | ||
| 1870 | |||
| 1871 | /* Record bitmap buffer state in the newly allocated block */ | ||
| 1872 | debug_bh = sb_find_get_block(sb, ret_block); | ||
| 1873 | if (debug_bh) { | ||
| 1874 | BUFFER_TRACE(debug_bh, "state when allocated"); | ||
| 1875 | BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state"); | ||
| 1876 | brelse(debug_bh); | ||
| 1877 | } | ||
| 1878 | } | ||
| 1879 | jbd_lock_bh_state(bitmap_bh); | ||
| 1880 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
| 1881 | if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) { | ||
| 1882 | int i; | ||
| 1883 | |||
| 1884 | for (i = 0; i < num; i++) { | ||
| 1885 | if (ext4_test_bit(grp_alloc_blk+i, | ||
| 1886 | bh2jh(bitmap_bh)->b_committed_data)) { | ||
| 1887 | printk("%s: block was unexpectedly set in " | ||
| 1888 | "b_committed_data\n", __func__); | ||
| 1889 | } | ||
| 1890 | } | ||
| 1891 | } | ||
| 1892 | ext4_debug("found bit %d\n", grp_alloc_blk); | ||
| 1893 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
| 1894 | jbd_unlock_bh_state(bitmap_bh); | ||
| 1895 | #endif | ||
| 1896 | |||
| 1897 | if (ret_block + num - 1 >= ext4_blocks_count(es)) { | ||
| 1898 | ext4_error(sb, "ext4_new_block", | ||
| 1899 | "block(%llu) >= blocks count(%llu) - " | ||
| 1900 | "block_group = %lu, es == %p ", ret_block, | ||
| 1901 | ext4_blocks_count(es), group_no, es); | ||
| 1902 | goto out; | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | /* | ||
| 1906 | * It is up to the caller to add the new buffer to a journal | ||
| 1907 | * list of some description. We don't know in advance whether | ||
| 1908 | * the caller wants to use it as metadata or data. | ||
| 1909 | */ | ||
| 1910 | spin_lock(sb_bgl_lock(sbi, group_no)); | ||
| 1911 | if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) | ||
| 1912 | gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); | ||
| 1913 | le16_add_cpu(&gdp->bg_free_blocks_count, -num); | ||
| 1914 | gdp->bg_checksum = ext4_group_desc_csum(sbi, group_no, gdp); | ||
| 1915 | spin_unlock(sb_bgl_lock(sbi, group_no)); | ||
| 1916 | if (!EXT4_I(inode)->i_delalloc_reserved_flag) | ||
| 1917 | percpu_counter_sub(&sbi->s_freeblocks_counter, num); | ||
| 1918 | |||
| 1919 | if (sbi->s_log_groups_per_flex) { | ||
| 1920 | ext4_group_t flex_group = ext4_flex_group(sbi, group_no); | ||
| 1921 | spin_lock(sb_bgl_lock(sbi, flex_group)); | ||
| 1922 | sbi->s_flex_groups[flex_group].free_blocks -= num; | ||
| 1923 | spin_unlock(sb_bgl_lock(sbi, flex_group)); | ||
| 1924 | } | ||
| 1925 | |||
| 1926 | BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); | ||
| 1927 | err = ext4_journal_dirty_metadata(handle, gdp_bh); | ||
| 1928 | if (!fatal) | ||
| 1929 | fatal = err; | ||
| 1930 | |||
| 1931 | sb->s_dirt = 1; | ||
| 1932 | if (fatal) | ||
| 1933 | goto out; | ||
| 1934 | |||
| 1935 | *errp = 0; | ||
| 1936 | brelse(bitmap_bh); | ||
| 1937 | DQUOT_FREE_BLOCK(inode, *count-num); | ||
| 1938 | *count = num; | ||
| 1939 | return ret_block; | ||
| 1940 | |||
| 1941 | io_error: | ||
| 1942 | *errp = -EIO; | ||
| 1943 | out: | ||
| 1944 | if (fatal) { | ||
| 1945 | *errp = fatal; | ||
| 1946 | ext4_std_error(sb, fatal); | ||
| 1947 | } | ||
| 1948 | /* | ||
| 1949 | * Undo the block allocation | ||
| 1950 | */ | ||
| 1951 | if (!performed_allocation) | ||
| 1952 | DQUOT_FREE_BLOCK(inode, *count); | ||
| 1953 | brelse(bitmap_bh); | ||
| 1954 | return 0; | ||
| 1955 | } | ||
| 1956 | |||
| 1957 | #define EXT4_META_BLOCK 0x1 | 692 | #define EXT4_META_BLOCK 0x1 |
| 1958 | 693 | ||
| 1959 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | 694 | static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, |
| @@ -1963,10 +698,6 @@ static ext4_fsblk_t do_blk_alloc(handle_t *handle, struct inode *inode, | |||
| 1963 | struct ext4_allocation_request ar; | 698 | struct ext4_allocation_request ar; |
| 1964 | ext4_fsblk_t ret; | 699 | ext4_fsblk_t ret; |
| 1965 | 700 | ||
| 1966 | if (!test_opt(inode->i_sb, MBALLOC)) { | ||
| 1967 | return ext4_old_new_blocks(handle, inode, goal, count, errp); | ||
| 1968 | } | ||
| 1969 | |||
| 1970 | memset(&ar, 0, sizeof(ar)); | 701 | memset(&ar, 0, sizeof(ar)); |
| 1971 | /* Fill with neighbour allocated blocks */ | 702 | /* Fill with neighbour allocated blocks */ |
| 1972 | 703 | ||
| @@ -2008,7 +739,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
| 2008 | /* | 739 | /* |
| 2009 | * Account for the allocated meta blocks | 740 | * Account for the allocated meta blocks |
| 2010 | */ | 741 | */ |
| 2011 | if (!(*errp)) { | 742 | if (!(*errp) && EXT4_I(inode)->i_delalloc_reserved_flag) { |
| 2012 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 743 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 2013 | EXT4_I(inode)->i_allocated_meta_blocks += *count; | 744 | EXT4_I(inode)->i_allocated_meta_blocks += *count; |
| 2014 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 745 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| @@ -2093,10 +824,9 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb) | |||
| 2093 | bitmap_count += x; | 824 | bitmap_count += x; |
| 2094 | } | 825 | } |
| 2095 | brelse(bitmap_bh); | 826 | brelse(bitmap_bh); |
| 2096 | printk("ext4_count_free_blocks: stored = %llu" | 827 | printk(KERN_DEBUG "ext4_count_free_blocks: stored = %llu" |
| 2097 | ", computed = %llu, %llu\n", | 828 | ", computed = %llu, %llu\n", ext4_free_blocks_count(es), |
| 2098 | ext4_free_blocks_count(es), | 829 | desc_count, bitmap_count); |
| 2099 | desc_count, bitmap_count); | ||
| 2100 | return bitmap_count; | 830 | return bitmap_count; |
| 2101 | #else | 831 | #else |
| 2102 | desc_count = 0; | 832 | desc_count = 0; |
| @@ -2183,8 +913,9 @@ unsigned long ext4_bg_num_gdb(struct super_block *sb, ext4_group_t group) | |||
| 2183 | 913 | ||
| 2184 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || | 914 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) || |
| 2185 | metagroup < first_meta_bg) | 915 | metagroup < first_meta_bg) |
| 2186 | return ext4_bg_num_gdb_nometa(sb,group); | 916 | return ext4_bg_num_gdb_nometa(sb, group); |
| 2187 | 917 | ||
| 2188 | return ext4_bg_num_gdb_meta(sb,group); | 918 | return ext4_bg_num_gdb_meta(sb,group); |
| 2189 | 919 | ||
| 2190 | } | 920 | } |
| 921 | |||
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c index d37ea6750454..0a7a6663c190 100644 --- a/fs/ext4/bitmap.c +++ b/fs/ext4/bitmap.c | |||
| @@ -15,17 +15,17 @@ | |||
| 15 | 15 | ||
| 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; | 16 | static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0}; |
| 17 | 17 | ||
| 18 | unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars) | 18 | unsigned long ext4_count_free(struct buffer_head *map, unsigned int numchars) |
| 19 | { | 19 | { |
| 20 | unsigned int i; | 20 | unsigned int i; |
| 21 | unsigned long sum = 0; | 21 | unsigned long sum = 0; |
| 22 | 22 | ||
| 23 | if (!map) | 23 | if (!map) |
| 24 | return (0); | 24 | return 0; |
| 25 | for (i = 0; i < numchars; i++) | 25 | for (i = 0; i < numchars; i++) |
| 26 | sum += nibblemap[map->b_data[i] & 0xf] + | 26 | sum += nibblemap[map->b_data[i] & 0xf] + |
| 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; | 27 | nibblemap[(map->b_data[i] >> 4) & 0xf]; |
| 28 | return (sum); | 28 | return sum; |
| 29 | } | 29 | } |
| 30 | 30 | ||
| 31 | #endif /* EXT4FS_DEBUG */ | 31 | #endif /* EXT4FS_DEBUG */ |
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c index ec8e33b45219..3ca6a2b7632d 100644 --- a/fs/ext4/dir.c +++ b/fs/ext4/dir.c | |||
| @@ -33,10 +33,10 @@ static unsigned char ext4_filetype_table[] = { | |||
| 33 | }; | 33 | }; |
| 34 | 34 | ||
| 35 | static int ext4_readdir(struct file *, void *, filldir_t); | 35 | static int ext4_readdir(struct file *, void *, filldir_t); |
| 36 | static int ext4_dx_readdir(struct file * filp, | 36 | static int ext4_dx_readdir(struct file *filp, |
| 37 | void * dirent, filldir_t filldir); | 37 | void *dirent, filldir_t filldir); |
| 38 | static int ext4_release_dir (struct inode * inode, | 38 | static int ext4_release_dir(struct inode *inode, |
| 39 | struct file * filp); | 39 | struct file *filp); |
| 40 | 40 | ||
| 41 | const struct file_operations ext4_dir_operations = { | 41 | const struct file_operations ext4_dir_operations = { |
| 42 | .llseek = generic_file_llseek, | 42 | .llseek = generic_file_llseek, |
| @@ -61,12 +61,12 @@ static unsigned char get_dtype(struct super_block *sb, int filetype) | |||
| 61 | } | 61 | } |
| 62 | 62 | ||
| 63 | 63 | ||
| 64 | int ext4_check_dir_entry (const char * function, struct inode * dir, | 64 | int ext4_check_dir_entry(const char *function, struct inode *dir, |
| 65 | struct ext4_dir_entry_2 * de, | 65 | struct ext4_dir_entry_2 *de, |
| 66 | struct buffer_head * bh, | 66 | struct buffer_head *bh, |
| 67 | unsigned long offset) | 67 | unsigned long offset) |
| 68 | { | 68 | { |
| 69 | const char * error_msg = NULL; | 69 | const char *error_msg = NULL; |
| 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); | 70 | const int rlen = ext4_rec_len_from_disk(de->rec_len); |
| 71 | 71 | ||
| 72 | if (rlen < EXT4_DIR_REC_LEN(1)) | 72 | if (rlen < EXT4_DIR_REC_LEN(1)) |
| @@ -82,7 +82,7 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
| 82 | error_msg = "inode out of bounds"; | 82 | error_msg = "inode out of bounds"; |
| 83 | 83 | ||
| 84 | if (error_msg != NULL) | 84 | if (error_msg != NULL) |
| 85 | ext4_error (dir->i_sb, function, | 85 | ext4_error(dir->i_sb, function, |
| 86 | "bad entry in directory #%lu: %s - " | 86 | "bad entry in directory #%lu: %s - " |
| 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", | 87 | "offset=%lu, inode=%lu, rec_len=%d, name_len=%d", |
| 88 | dir->i_ino, error_msg, offset, | 88 | dir->i_ino, error_msg, offset, |
| @@ -91,8 +91,8 @@ int ext4_check_dir_entry (const char * function, struct inode * dir, | |||
| 91 | return error_msg == NULL ? 1 : 0; | 91 | return error_msg == NULL ? 1 : 0; |
| 92 | } | 92 | } |
| 93 | 93 | ||
| 94 | static int ext4_readdir(struct file * filp, | 94 | static int ext4_readdir(struct file *filp, |
| 95 | void * dirent, filldir_t filldir) | 95 | void *dirent, filldir_t filldir) |
| 96 | { | 96 | { |
| 97 | int error = 0; | 97 | int error = 0; |
| 98 | unsigned long offset; | 98 | unsigned long offset; |
| @@ -102,6 +102,7 @@ static int ext4_readdir(struct file * filp, | |||
| 102 | int err; | 102 | int err; |
| 103 | struct inode *inode = filp->f_path.dentry->d_inode; | 103 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 104 | int ret = 0; | 104 | int ret = 0; |
| 105 | int dir_has_error = 0; | ||
| 105 | 106 | ||
| 106 | sb = inode->i_sb; | 107 | sb = inode->i_sb; |
| 107 | 108 | ||
| @@ -148,9 +149,13 @@ static int ext4_readdir(struct file * filp, | |||
| 148 | * of recovering data when there's a bad sector | 149 | * of recovering data when there's a bad sector |
| 149 | */ | 150 | */ |
| 150 | if (!bh) { | 151 | if (!bh) { |
| 151 | ext4_error (sb, "ext4_readdir", | 152 | if (!dir_has_error) { |
| 152 | "directory #%lu contains a hole at offset %lu", | 153 | ext4_error(sb, __func__, "directory #%lu " |
| 153 | inode->i_ino, (unsigned long)filp->f_pos); | 154 | "contains a hole at offset %Lu", |
| 155 | inode->i_ino, | ||
| 156 | (unsigned long long) filp->f_pos); | ||
| 157 | dir_has_error = 1; | ||
| 158 | } | ||
| 154 | /* corrupt size? Maybe no more blocks to read */ | 159 | /* corrupt size? Maybe no more blocks to read */ |
| 155 | if (filp->f_pos > inode->i_blocks << 9) | 160 | if (filp->f_pos > inode->i_blocks << 9) |
| 156 | break; | 161 | break; |
| @@ -187,14 +192,14 @@ revalidate: | |||
| 187 | while (!error && filp->f_pos < inode->i_size | 192 | while (!error && filp->f_pos < inode->i_size |
| 188 | && offset < sb->s_blocksize) { | 193 | && offset < sb->s_blocksize) { |
| 189 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); | 194 | de = (struct ext4_dir_entry_2 *) (bh->b_data + offset); |
| 190 | if (!ext4_check_dir_entry ("ext4_readdir", inode, de, | 195 | if (!ext4_check_dir_entry("ext4_readdir", inode, de, |
| 191 | bh, offset)) { | 196 | bh, offset)) { |
| 192 | /* | 197 | /* |
| 193 | * On error, skip the f_pos to the next block | 198 | * On error, skip the f_pos to the next block |
| 194 | */ | 199 | */ |
| 195 | filp->f_pos = (filp->f_pos | | 200 | filp->f_pos = (filp->f_pos | |
| 196 | (sb->s_blocksize - 1)) + 1; | 201 | (sb->s_blocksize - 1)) + 1; |
| 197 | brelse (bh); | 202 | brelse(bh); |
| 198 | ret = stored; | 203 | ret = stored; |
| 199 | goto out; | 204 | goto out; |
| 200 | } | 205 | } |
| @@ -218,12 +223,12 @@ revalidate: | |||
| 218 | break; | 223 | break; |
| 219 | if (version != filp->f_version) | 224 | if (version != filp->f_version) |
| 220 | goto revalidate; | 225 | goto revalidate; |
| 221 | stored ++; | 226 | stored++; |
| 222 | } | 227 | } |
| 223 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); | 228 | filp->f_pos += ext4_rec_len_from_disk(de->rec_len); |
| 224 | } | 229 | } |
| 225 | offset = 0; | 230 | offset = 0; |
| 226 | brelse (bh); | 231 | brelse(bh); |
| 227 | } | 232 | } |
| 228 | out: | 233 | out: |
| 229 | return ret; | 234 | return ret; |
| @@ -290,9 +295,9 @@ static void free_rb_tree_fname(struct rb_root *root) | |||
| 290 | parent = rb_parent(n); | 295 | parent = rb_parent(n); |
| 291 | fname = rb_entry(n, struct fname, rb_hash); | 296 | fname = rb_entry(n, struct fname, rb_hash); |
| 292 | while (fname) { | 297 | while (fname) { |
| 293 | struct fname * old = fname; | 298 | struct fname *old = fname; |
| 294 | fname = fname->next; | 299 | fname = fname->next; |
| 295 | kfree (old); | 300 | kfree(old); |
| 296 | } | 301 | } |
| 297 | if (!parent) | 302 | if (!parent) |
| 298 | root->rb_node = NULL; | 303 | root->rb_node = NULL; |
| @@ -331,7 +336,7 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
| 331 | struct ext4_dir_entry_2 *dirent) | 336 | struct ext4_dir_entry_2 *dirent) |
| 332 | { | 337 | { |
| 333 | struct rb_node **p, *parent = NULL; | 338 | struct rb_node **p, *parent = NULL; |
| 334 | struct fname * fname, *new_fn; | 339 | struct fname *fname, *new_fn; |
| 335 | struct dir_private_info *info; | 340 | struct dir_private_info *info; |
| 336 | int len; | 341 | int len; |
| 337 | 342 | ||
| @@ -388,19 +393,20 @@ int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
| 388 | * for all entres on the fname linked list. (Normally there is only | 393 | * for all entres on the fname linked list. (Normally there is only |
| 389 | * one entry on the linked list, unless there are 62 bit hash collisions.) | 394 | * one entry on the linked list, unless there are 62 bit hash collisions.) |
| 390 | */ | 395 | */ |
| 391 | static int call_filldir(struct file * filp, void * dirent, | 396 | static int call_filldir(struct file *filp, void *dirent, |
| 392 | filldir_t filldir, struct fname *fname) | 397 | filldir_t filldir, struct fname *fname) |
| 393 | { | 398 | { |
| 394 | struct dir_private_info *info = filp->private_data; | 399 | struct dir_private_info *info = filp->private_data; |
| 395 | loff_t curr_pos; | 400 | loff_t curr_pos; |
| 396 | struct inode *inode = filp->f_path.dentry->d_inode; | 401 | struct inode *inode = filp->f_path.dentry->d_inode; |
| 397 | struct super_block * sb; | 402 | struct super_block *sb; |
| 398 | int error; | 403 | int error; |
| 399 | 404 | ||
| 400 | sb = inode->i_sb; | 405 | sb = inode->i_sb; |
| 401 | 406 | ||
| 402 | if (!fname) { | 407 | if (!fname) { |
| 403 | printk("call_filldir: called with null fname?!?\n"); | 408 | printk(KERN_ERR "ext4: call_filldir: called with " |
| 409 | "null fname?!?\n"); | ||
| 404 | return 0; | 410 | return 0; |
| 405 | } | 411 | } |
| 406 | curr_pos = hash2pos(fname->hash, fname->minor_hash); | 412 | curr_pos = hash2pos(fname->hash, fname->minor_hash); |
| @@ -419,8 +425,8 @@ static int call_filldir(struct file * filp, void * dirent, | |||
| 419 | return 0; | 425 | return 0; |
| 420 | } | 426 | } |
| 421 | 427 | ||
| 422 | static int ext4_dx_readdir(struct file * filp, | 428 | static int ext4_dx_readdir(struct file *filp, |
| 423 | void * dirent, filldir_t filldir) | 429 | void *dirent, filldir_t filldir) |
| 424 | { | 430 | { |
| 425 | struct dir_private_info *info = filp->private_data; | 431 | struct dir_private_info *info = filp->private_data; |
| 426 | struct inode *inode = filp->f_path.dentry->d_inode; | 432 | struct inode *inode = filp->f_path.dentry->d_inode; |
| @@ -511,7 +517,7 @@ finished: | |||
| 511 | return 0; | 517 | return 0; |
| 512 | } | 518 | } |
| 513 | 519 | ||
| 514 | static int ext4_release_dir (struct inode * inode, struct file * filp) | 520 | static int ext4_release_dir(struct inode *inode, struct file *filp) |
| 515 | { | 521 | { |
| 516 | if (filp->private_data) | 522 | if (filp->private_data) |
| 517 | ext4_htree_free_dir_info(filp->private_data); | 523 | ext4_htree_free_dir_info(filp->private_data); |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 295003241d3d..4880cc3e6727 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
| @@ -44,9 +44,9 @@ | |||
| 44 | #ifdef EXT4FS_DEBUG | 44 | #ifdef EXT4FS_DEBUG |
| 45 | #define ext4_debug(f, a...) \ | 45 | #define ext4_debug(f, a...) \ |
| 46 | do { \ | 46 | do { \ |
| 47 | printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ | 47 | printk(KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:", \ |
| 48 | __FILE__, __LINE__, __func__); \ | 48 | __FILE__, __LINE__, __func__); \ |
| 49 | printk (KERN_DEBUG f, ## a); \ | 49 | printk(KERN_DEBUG f, ## a); \ |
| 50 | } while (0) | 50 | } while (0) |
| 51 | #else | 51 | #else |
| 52 | #define ext4_debug(f, a...) do {} while (0) | 52 | #define ext4_debug(f, a...) do {} while (0) |
| @@ -128,7 +128,7 @@ struct ext4_allocation_request { | |||
| 128 | #else | 128 | #else |
| 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) | 129 | # define EXT4_BLOCK_SIZE(s) (EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size) |
| 130 | #endif | 130 | #endif |
| 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof (__u32)) | 131 | #define EXT4_ADDR_PER_BLOCK(s) (EXT4_BLOCK_SIZE(s) / sizeof(__u32)) |
| 132 | #ifdef __KERNEL__ | 132 | #ifdef __KERNEL__ |
| 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) | 133 | # define EXT4_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits) |
| 134 | #else | 134 | #else |
| @@ -245,7 +245,7 @@ struct flex_groups { | |||
| 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ | 245 | #define EXT4_RESERVED_FL 0x80000000 /* reserved for ext4 lib */ |
| 246 | 246 | ||
| 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ | 247 | #define EXT4_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ |
| 248 | #define EXT4_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ | 248 | #define EXT4_FL_USER_MODIFIABLE 0x000B80FF /* User modifiable flags */ |
| 249 | 249 | ||
| 250 | /* | 250 | /* |
| 251 | * Inode dynamic state flags | 251 | * Inode dynamic state flags |
| @@ -291,8 +291,6 @@ struct ext4_new_group_data { | |||
| 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS | 291 | #define EXT4_IOC_SETFLAGS FS_IOC_SETFLAGS |
| 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) | 292 | #define EXT4_IOC_GETVERSION _IOR('f', 3, long) |
| 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) | 293 | #define EXT4_IOC_SETVERSION _IOW('f', 4, long) |
| 294 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) | ||
| 295 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8,struct ext4_new_group_input) | ||
| 296 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION | 294 | #define EXT4_IOC_GETVERSION_OLD FS_IOC_GETVERSION |
| 297 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION | 295 | #define EXT4_IOC_SETVERSION_OLD FS_IOC_SETVERSION |
| 298 | #ifdef CONFIG_JBD2_DEBUG | 296 | #ifdef CONFIG_JBD2_DEBUG |
| @@ -300,7 +298,10 @@ struct ext4_new_group_data { | |||
| 300 | #endif | 298 | #endif |
| 301 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) | 299 | #define EXT4_IOC_GETRSVSZ _IOR('f', 5, long) |
| 302 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) | 300 | #define EXT4_IOC_SETRSVSZ _IOW('f', 6, long) |
| 303 | #define EXT4_IOC_MIGRATE _IO('f', 7) | 301 | #define EXT4_IOC_GROUP_EXTEND _IOW('f', 7, unsigned long) |
| 302 | #define EXT4_IOC_GROUP_ADD _IOW('f', 8, struct ext4_new_group_input) | ||
| 303 | #define EXT4_IOC_MIGRATE _IO('f', 9) | ||
| 304 | /* note ioctl 11 reserved for filesystem-independent FIEMAP ioctl */ | ||
| 304 | 305 | ||
| 305 | /* | 306 | /* |
| 306 | * ioctl commands in 32 bit emulation | 307 | * ioctl commands in 32 bit emulation |
| @@ -510,7 +511,6 @@ do { \ | |||
| 510 | /* | 511 | /* |
| 511 | * Mount flags | 512 | * Mount flags |
| 512 | */ | 513 | */ |
| 513 | #define EXT4_MOUNT_CHECK 0x00001 /* Do mount-time checks */ | ||
| 514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ | 514 | #define EXT4_MOUNT_OLDALLOC 0x00002 /* Don't use the new Orlov allocator */ |
| 515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ | 515 | #define EXT4_MOUNT_GRPID 0x00004 /* Create files with directory's group */ |
| 516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ | 516 | #define EXT4_MOUNT_DEBUG 0x00008 /* Some debugging messages */ |
| @@ -538,8 +538,9 @@ do { \ | |||
| 538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ | 538 | #define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */ |
| 539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ | 539 | #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ |
| 540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ | 540 | #define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ |
| 541 | #define EXT4_MOUNT_MBALLOC 0x4000000 /* Buddy allocation support */ | ||
| 542 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ | 541 | #define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ |
| 542 | #define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ | ||
| 543 | |||
| 543 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ | 544 | /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */ |
| 544 | #ifndef _LINUX_EXT2_FS_H | 545 | #ifndef _LINUX_EXT2_FS_H |
| 545 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt | 546 | #define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt |
| @@ -667,7 +668,7 @@ struct ext4_super_block { | |||
| 667 | }; | 668 | }; |
| 668 | 669 | ||
| 669 | #ifdef __KERNEL__ | 670 | #ifdef __KERNEL__ |
| 670 | static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb) | 671 | static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) |
| 671 | { | 672 | { |
| 672 | return sb->s_fs_info; | 673 | return sb->s_fs_info; |
| 673 | } | 674 | } |
| @@ -725,11 +726,11 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
| 725 | */ | 726 | */ |
| 726 | 727 | ||
| 727 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ | 728 | #define EXT4_HAS_COMPAT_FEATURE(sb,mask) \ |
| 728 | ( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) ) | 729 | (EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask)) |
| 729 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ | 730 | #define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask) \ |
| 730 | ( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) ) | 731 | (EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask)) |
| 731 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ | 732 | #define EXT4_HAS_INCOMPAT_FEATURE(sb,mask) \ |
| 732 | ( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) ) | 733 | (EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask)) |
| 733 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ | 734 | #define EXT4_SET_COMPAT_FEATURE(sb,mask) \ |
| 734 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) | 735 | EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask) |
| 735 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ | 736 | #define EXT4_SET_RO_COMPAT_FEATURE(sb,mask) \ |
| @@ -789,6 +790,8 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) | |||
| 789 | #define EXT4_DEF_RESUID 0 | 790 | #define EXT4_DEF_RESUID 0 |
| 790 | #define EXT4_DEF_RESGID 0 | 791 | #define EXT4_DEF_RESGID 0 |
| 791 | 792 | ||
| 793 | #define EXT4_DEF_INODE_READAHEAD_BLKS 32 | ||
| 794 | |||
| 792 | /* | 795 | /* |
| 793 | * Default mount options | 796 | * Default mount options |
| 794 | */ | 797 | */ |
| @@ -954,6 +957,24 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no) | |||
| 954 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, | 957 | void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr, |
| 955 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); | 958 | unsigned long *blockgrpp, ext4_grpblk_t *offsetp); |
| 956 | 959 | ||
| 960 | extern struct proc_dir_entry *ext4_proc_root; | ||
| 961 | |||
| 962 | #ifdef CONFIG_PROC_FS | ||
| 963 | extern const struct file_operations ext4_ui_proc_fops; | ||
| 964 | |||
| 965 | #define EXT4_PROC_HANDLER(name, var) \ | ||
| 966 | do { \ | ||
| 967 | proc = proc_create_data(name, mode, sbi->s_proc, \ | ||
| 968 | &ext4_ui_proc_fops, &sbi->s_##var); \ | ||
| 969 | if (proc == NULL) { \ | ||
| 970 | printk(KERN_ERR "EXT4-fs: can't create %s\n", name); \ | ||
| 971 | goto err_out; \ | ||
| 972 | } \ | ||
| 973 | } while (0) | ||
| 974 | #else | ||
| 975 | #define EXT4_PROC_HANDLER(name, var) | ||
| 976 | #endif | ||
| 977 | |||
| 957 | /* | 978 | /* |
| 958 | * Function prototypes | 979 | * Function prototypes |
| 959 | */ | 980 | */ |
| @@ -981,23 +1002,20 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode, | |||
| 981 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, | 1002 | extern ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode, |
| 982 | ext4_lblk_t iblock, ext4_fsblk_t goal, | 1003 | ext4_lblk_t iblock, ext4_fsblk_t goal, |
| 983 | unsigned long *count, int *errp); | 1004 | unsigned long *count, int *errp); |
| 984 | extern ext4_fsblk_t ext4_old_new_blocks(handle_t *handle, struct inode *inode, | 1005 | extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); |
| 985 | ext4_fsblk_t goal, unsigned long *count, int *errp); | ||
| 986 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, | 1006 | extern ext4_fsblk_t ext4_has_free_blocks(struct ext4_sb_info *sbi, |
| 987 | ext4_fsblk_t nblocks); | 1007 | s64 nblocks); |
| 988 | extern void ext4_free_blocks (handle_t *handle, struct inode *inode, | 1008 | extern void ext4_free_blocks(handle_t *handle, struct inode *inode, |
| 989 | ext4_fsblk_t block, unsigned long count, int metadata); | 1009 | ext4_fsblk_t block, unsigned long count, int metadata); |
| 990 | extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb, | 1010 | extern void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb, |
| 991 | ext4_fsblk_t block, unsigned long count, | 1011 | ext4_fsblk_t block, unsigned long count, |
| 992 | unsigned long *pdquot_freed_blocks); | 1012 | unsigned long *pdquot_freed_blocks); |
| 993 | extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *); | 1013 | extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); |
| 994 | extern void ext4_check_blocks_bitmap (struct super_block *); | 1014 | extern void ext4_check_blocks_bitmap(struct super_block *); |
| 995 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, | 1015 | extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb, |
| 996 | ext4_group_t block_group, | 1016 | ext4_group_t block_group, |
| 997 | struct buffer_head ** bh); | 1017 | struct buffer_head ** bh); |
| 998 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); | 1018 | extern int ext4_should_retry_alloc(struct super_block *sb, int *retries); |
| 999 | extern void ext4_init_block_alloc_info(struct inode *); | ||
| 1000 | extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv); | ||
| 1001 | 1019 | ||
| 1002 | /* dir.c */ | 1020 | /* dir.c */ |
| 1003 | extern int ext4_check_dir_entry(const char *, struct inode *, | 1021 | extern int ext4_check_dir_entry(const char *, struct inode *, |
| @@ -1009,20 +1027,20 @@ extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash, | |||
| 1009 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); | 1027 | extern void ext4_htree_free_dir_info(struct dir_private_info *p); |
| 1010 | 1028 | ||
| 1011 | /* fsync.c */ | 1029 | /* fsync.c */ |
| 1012 | extern int ext4_sync_file (struct file *, struct dentry *, int); | 1030 | extern int ext4_sync_file(struct file *, struct dentry *, int); |
| 1013 | 1031 | ||
| 1014 | /* hash.c */ | 1032 | /* hash.c */ |
| 1015 | extern int ext4fs_dirhash(const char *name, int len, struct | 1033 | extern int ext4fs_dirhash(const char *name, int len, struct |
| 1016 | dx_hash_info *hinfo); | 1034 | dx_hash_info *hinfo); |
| 1017 | 1035 | ||
| 1018 | /* ialloc.c */ | 1036 | /* ialloc.c */ |
| 1019 | extern struct inode * ext4_new_inode (handle_t *, struct inode *, int); | 1037 | extern struct inode * ext4_new_inode(handle_t *, struct inode *, int); |
| 1020 | extern void ext4_free_inode (handle_t *, struct inode *); | 1038 | extern void ext4_free_inode(handle_t *, struct inode *); |
| 1021 | extern struct inode * ext4_orphan_get (struct super_block *, unsigned long); | 1039 | extern struct inode * ext4_orphan_get(struct super_block *, unsigned long); |
| 1022 | extern unsigned long ext4_count_free_inodes (struct super_block *); | 1040 | extern unsigned long ext4_count_free_inodes(struct super_block *); |
| 1023 | extern unsigned long ext4_count_dirs (struct super_block *); | 1041 | extern unsigned long ext4_count_dirs(struct super_block *); |
| 1024 | extern void ext4_check_inodes_bitmap (struct super_block *); | 1042 | extern void ext4_check_inodes_bitmap(struct super_block *); |
| 1025 | extern unsigned long ext4_count_free (struct buffer_head *, unsigned); | 1043 | extern unsigned long ext4_count_free(struct buffer_head *, unsigned); |
| 1026 | 1044 | ||
| 1027 | /* mballoc.c */ | 1045 | /* mballoc.c */ |
| 1028 | extern long ext4_mb_stats; | 1046 | extern long ext4_mb_stats; |
| @@ -1032,7 +1050,7 @@ extern int ext4_mb_release(struct super_block *); | |||
| 1032 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, | 1050 | extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *, |
| 1033 | struct ext4_allocation_request *, int *); | 1051 | struct ext4_allocation_request *, int *); |
| 1034 | extern int ext4_mb_reserve_blocks(struct super_block *, int); | 1052 | extern int ext4_mb_reserve_blocks(struct super_block *, int); |
| 1035 | extern void ext4_mb_discard_inode_preallocations(struct inode *); | 1053 | extern void ext4_discard_preallocations(struct inode *); |
| 1036 | extern int __init init_ext4_mballoc(void); | 1054 | extern int __init init_ext4_mballoc(void); |
| 1037 | extern void exit_ext4_mballoc(void); | 1055 | extern void exit_ext4_mballoc(void); |
| 1038 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, | 1056 | extern void ext4_mb_free_blocks(handle_t *, struct inode *, |
| @@ -1050,24 +1068,25 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, | |||
| 1050 | ext4_lblk_t, int, int *); | 1068 | ext4_lblk_t, int, int *); |
| 1051 | struct buffer_head *ext4_bread(handle_t *, struct inode *, | 1069 | struct buffer_head *ext4_bread(handle_t *, struct inode *, |
| 1052 | ext4_lblk_t, int, int *); | 1070 | ext4_lblk_t, int, int *); |
| 1071 | int ext4_get_block(struct inode *inode, sector_t iblock, | ||
| 1072 | struct buffer_head *bh_result, int create); | ||
| 1053 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | 1073 | int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, |
| 1054 | ext4_lblk_t iblock, unsigned long maxblocks, | 1074 | ext4_lblk_t iblock, unsigned long maxblocks, |
| 1055 | struct buffer_head *bh_result, | 1075 | struct buffer_head *bh_result, |
| 1056 | int create, int extend_disksize); | 1076 | int create, int extend_disksize); |
| 1057 | 1077 | ||
| 1058 | extern struct inode *ext4_iget(struct super_block *, unsigned long); | 1078 | extern struct inode *ext4_iget(struct super_block *, unsigned long); |
| 1059 | extern int ext4_write_inode (struct inode *, int); | 1079 | extern int ext4_write_inode(struct inode *, int); |
| 1060 | extern int ext4_setattr (struct dentry *, struct iattr *); | 1080 | extern int ext4_setattr(struct dentry *, struct iattr *); |
| 1061 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, | 1081 | extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, |
| 1062 | struct kstat *stat); | 1082 | struct kstat *stat); |
| 1063 | extern void ext4_delete_inode (struct inode *); | 1083 | extern void ext4_delete_inode(struct inode *); |
| 1064 | extern int ext4_sync_inode (handle_t *, struct inode *); | 1084 | extern int ext4_sync_inode(handle_t *, struct inode *); |
| 1065 | extern void ext4_discard_reservation (struct inode *); | ||
| 1066 | extern void ext4_dirty_inode(struct inode *); | 1085 | extern void ext4_dirty_inode(struct inode *); |
| 1067 | extern int ext4_change_inode_journal_flag(struct inode *, int); | 1086 | extern int ext4_change_inode_journal_flag(struct inode *, int); |
| 1068 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); | 1087 | extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); |
| 1069 | extern int ext4_can_truncate(struct inode *inode); | 1088 | extern int ext4_can_truncate(struct inode *inode); |
| 1070 | extern void ext4_truncate (struct inode *); | 1089 | extern void ext4_truncate(struct inode *); |
| 1071 | extern void ext4_set_inode_flags(struct inode *); | 1090 | extern void ext4_set_inode_flags(struct inode *); |
| 1072 | extern void ext4_get_inode_flags(struct ext4_inode_info *); | 1091 | extern void ext4_get_inode_flags(struct ext4_inode_info *); |
| 1073 | extern void ext4_set_aops(struct inode *inode); | 1092 | extern void ext4_set_aops(struct inode *inode); |
| @@ -1080,11 +1099,10 @@ extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page); | |||
| 1080 | 1099 | ||
| 1081 | /* ioctl.c */ | 1100 | /* ioctl.c */ |
| 1082 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); | 1101 | extern long ext4_ioctl(struct file *, unsigned int, unsigned long); |
| 1083 | extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long); | 1102 | extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long); |
| 1084 | 1103 | ||
| 1085 | /* migrate.c */ | 1104 | /* migrate.c */ |
| 1086 | extern int ext4_ext_migrate(struct inode *, struct file *, unsigned int, | 1105 | extern int ext4_ext_migrate(struct inode *); |
| 1087 | unsigned long); | ||
| 1088 | /* namei.c */ | 1106 | /* namei.c */ |
| 1089 | extern int ext4_orphan_add(handle_t *, struct inode *); | 1107 | extern int ext4_orphan_add(handle_t *, struct inode *); |
| 1090 | extern int ext4_orphan_del(handle_t *, struct inode *); | 1108 | extern int ext4_orphan_del(handle_t *, struct inode *); |
| @@ -1099,14 +1117,14 @@ extern int ext4_group_extend(struct super_block *sb, | |||
| 1099 | ext4_fsblk_t n_blocks_count); | 1117 | ext4_fsblk_t n_blocks_count); |
| 1100 | 1118 | ||
| 1101 | /* super.c */ | 1119 | /* super.c */ |
| 1102 | extern void ext4_error (struct super_block *, const char *, const char *, ...) | 1120 | extern void ext4_error(struct super_block *, const char *, const char *, ...) |
| 1103 | __attribute__ ((format (printf, 3, 4))); | 1121 | __attribute__ ((format (printf, 3, 4))); |
| 1104 | extern void __ext4_std_error (struct super_block *, const char *, int); | 1122 | extern void __ext4_std_error(struct super_block *, const char *, int); |
| 1105 | extern void ext4_abort (struct super_block *, const char *, const char *, ...) | 1123 | extern void ext4_abort(struct super_block *, const char *, const char *, ...) |
| 1106 | __attribute__ ((format (printf, 3, 4))); | 1124 | __attribute__ ((format (printf, 3, 4))); |
| 1107 | extern void ext4_warning (struct super_block *, const char *, const char *, ...) | 1125 | extern void ext4_warning(struct super_block *, const char *, const char *, ...) |
| 1108 | __attribute__ ((format (printf, 3, 4))); | 1126 | __attribute__ ((format (printf, 3, 4))); |
| 1109 | extern void ext4_update_dynamic_rev (struct super_block *sb); | 1127 | extern void ext4_update_dynamic_rev(struct super_block *sb); |
| 1110 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, | 1128 | extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, |
| 1111 | __u32 compat); | 1129 | __u32 compat); |
| 1112 | extern int ext4_update_rocompat_feature(handle_t *handle, | 1130 | extern int ext4_update_rocompat_feature(handle_t *handle, |
| @@ -1179,7 +1197,7 @@ static inline void ext4_isize_set(struct ext4_inode *raw_inode, loff_t i_size) | |||
| 1179 | 1197 | ||
| 1180 | static inline | 1198 | static inline |
| 1181 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, | 1199 | struct ext4_group_info *ext4_get_group_info(struct super_block *sb, |
| 1182 | ext4_group_t group) | 1200 | ext4_group_t group) |
| 1183 | { | 1201 | { |
| 1184 | struct ext4_group_info ***grp_info; | 1202 | struct ext4_group_info ***grp_info; |
| 1185 | long indexv, indexh; | 1203 | long indexv, indexh; |
| @@ -1207,6 +1225,28 @@ do { \ | |||
| 1207 | __ext4_std_error((sb), __func__, (errno)); \ | 1225 | __ext4_std_error((sb), __func__, (errno)); \ |
| 1208 | } while (0) | 1226 | } while (0) |
| 1209 | 1227 | ||
| 1228 | #ifdef CONFIG_SMP | ||
| 1229 | /* Each CPU can accumulate FBC_BATCH blocks in their local | ||
| 1230 | * counters. So we need to make sure we have free blocks more | ||
| 1231 | * than FBC_BATCH * nr_cpu_ids. Also add a window of 4 times. | ||
| 1232 | */ | ||
| 1233 | #define EXT4_FREEBLOCKS_WATERMARK (4 * (FBC_BATCH * nr_cpu_ids)) | ||
| 1234 | #else | ||
| 1235 | #define EXT4_FREEBLOCKS_WATERMARK 0 | ||
| 1236 | #endif | ||
| 1237 | |||
| 1238 | static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize) | ||
| 1239 | { | ||
| 1240 | /* | ||
| 1241 | * XXX: replace with spinlock if seen contended -bzzz | ||
| 1242 | */ | ||
| 1243 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 1244 | if (newsize > EXT4_I(inode)->i_disksize) | ||
| 1245 | EXT4_I(inode)->i_disksize = newsize; | ||
| 1246 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 1247 | return ; | ||
| 1248 | } | ||
| 1249 | |||
| 1210 | /* | 1250 | /* |
| 1211 | * Inodes and files operations | 1251 | * Inodes and files operations |
| 1212 | */ | 1252 | */ |
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h index d33dc56d6986..bec7ce59fc0d 100644 --- a/fs/ext4/ext4_extents.h +++ b/fs/ext4/ext4_extents.h | |||
| @@ -124,6 +124,19 @@ struct ext4_ext_path { | |||
| 124 | #define EXT4_EXT_CACHE_GAP 1 | 124 | #define EXT4_EXT_CACHE_GAP 1 |
| 125 | #define EXT4_EXT_CACHE_EXTENT 2 | 125 | #define EXT4_EXT_CACHE_EXTENT 2 |
| 126 | 126 | ||
| 127 | /* | ||
| 128 | * to be called by ext4_ext_walk_space() | ||
| 129 | * negative retcode - error | ||
| 130 | * positive retcode - signal for ext4_ext_walk_space(), see below | ||
| 131 | * callback must return valid extent (passed or newly created) | ||
| 132 | */ | ||
| 133 | typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *, | ||
| 134 | struct ext4_ext_cache *, | ||
| 135 | struct ext4_extent *, void *); | ||
| 136 | |||
| 137 | #define EXT_CONTINUE 0 | ||
| 138 | #define EXT_BREAK 1 | ||
| 139 | #define EXT_REPEAT 2 | ||
| 127 | 140 | ||
| 128 | #define EXT_MAX_BLOCK 0xffffffff | 141 | #define EXT_MAX_BLOCK 0xffffffff |
| 129 | 142 | ||
| @@ -224,6 +237,8 @@ extern int ext4_ext_try_to_merge(struct inode *inode, | |||
| 224 | struct ext4_extent *); | 237 | struct ext4_extent *); |
| 225 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); | 238 | extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *); |
| 226 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); | 239 | extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *); |
| 240 | extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t, | ||
| 241 | ext_prepare_callback, void *); | ||
| 227 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, | 242 | extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, |
| 228 | struct ext4_ext_path *); | 243 | struct ext4_ext_path *); |
| 229 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, | 244 | extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *, |
diff --git a/fs/ext4/ext4_i.h b/fs/ext4/ext4_i.h index ef7409f0e7e4..5c124c0ac6d3 100644 --- a/fs/ext4/ext4_i.h +++ b/fs/ext4/ext4_i.h | |||
| @@ -33,38 +33,6 @@ typedef __u32 ext4_lblk_t; | |||
| 33 | /* data type for block group number */ | 33 | /* data type for block group number */ |
| 34 | typedef unsigned long ext4_group_t; | 34 | typedef unsigned long ext4_group_t; |
| 35 | 35 | ||
| 36 | struct ext4_reserve_window { | ||
| 37 | ext4_fsblk_t _rsv_start; /* First byte reserved */ | ||
| 38 | ext4_fsblk_t _rsv_end; /* Last byte reserved or 0 */ | ||
| 39 | }; | ||
| 40 | |||
| 41 | struct ext4_reserve_window_node { | ||
| 42 | struct rb_node rsv_node; | ||
| 43 | __u32 rsv_goal_size; | ||
| 44 | __u32 rsv_alloc_hit; | ||
| 45 | struct ext4_reserve_window rsv_window; | ||
| 46 | }; | ||
| 47 | |||
| 48 | struct ext4_block_alloc_info { | ||
| 49 | /* information about reservation window */ | ||
| 50 | struct ext4_reserve_window_node rsv_window_node; | ||
| 51 | /* | ||
| 52 | * was i_next_alloc_block in ext4_inode_info | ||
| 53 | * is the logical (file-relative) number of the | ||
| 54 | * most-recently-allocated block in this file. | ||
| 55 | * We use this for detecting linearly ascending allocation requests. | ||
| 56 | */ | ||
| 57 | ext4_lblk_t last_alloc_logical_block; | ||
| 58 | /* | ||
| 59 | * Was i_next_alloc_goal in ext4_inode_info | ||
| 60 | * is the *physical* companion to i_next_alloc_block. | ||
| 61 | * it the physical block number of the block which was most-recentl | ||
| 62 | * allocated to this file. This give us the goal (target) for the next | ||
| 63 | * allocation when we detect linearly ascending requests. | ||
| 64 | */ | ||
| 65 | ext4_fsblk_t last_alloc_physical_block; | ||
| 66 | }; | ||
| 67 | |||
| 68 | #define rsv_start rsv_window._rsv_start | 36 | #define rsv_start rsv_window._rsv_start |
| 69 | #define rsv_end rsv_window._rsv_end | 37 | #define rsv_end rsv_window._rsv_end |
| 70 | 38 | ||
| @@ -97,11 +65,8 @@ struct ext4_inode_info { | |||
| 97 | ext4_group_t i_block_group; | 65 | ext4_group_t i_block_group; |
| 98 | __u32 i_state; /* Dynamic state flags for ext4 */ | 66 | __u32 i_state; /* Dynamic state flags for ext4 */ |
| 99 | 67 | ||
| 100 | /* block reservation info */ | ||
| 101 | struct ext4_block_alloc_info *i_block_alloc_info; | ||
| 102 | |||
| 103 | ext4_lblk_t i_dir_start_lookup; | 68 | ext4_lblk_t i_dir_start_lookup; |
| 104 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 69 | #ifdef CONFIG_EXT4_FS_XATTR |
| 105 | /* | 70 | /* |
| 106 | * Extended attributes can be read independently of the main file | 71 | * Extended attributes can be read independently of the main file |
| 107 | * data. Taking i_mutex even when reading would cause contention | 72 | * data. Taking i_mutex even when reading would cause contention |
| @@ -111,7 +76,7 @@ struct ext4_inode_info { | |||
| 111 | */ | 76 | */ |
| 112 | struct rw_semaphore xattr_sem; | 77 | struct rw_semaphore xattr_sem; |
| 113 | #endif | 78 | #endif |
| 114 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 79 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 115 | struct posix_acl *i_acl; | 80 | struct posix_acl *i_acl; |
| 116 | struct posix_acl *i_default_acl; | 81 | struct posix_acl *i_default_acl; |
| 117 | #endif | 82 | #endif |
diff --git a/fs/ext4/ext4_sb.h b/fs/ext4/ext4_sb.h index 6300226d5531..445fde603df8 100644 --- a/fs/ext4/ext4_sb.h +++ b/fs/ext4/ext4_sb.h | |||
| @@ -40,8 +40,8 @@ struct ext4_sb_info { | |||
| 40 | unsigned long s_blocks_last; /* Last seen block count */ | 40 | unsigned long s_blocks_last; /* Last seen block count */ |
| 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ | 41 | loff_t s_bitmap_maxbytes; /* max bytes for bitmap files */ |
| 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ | 42 | struct buffer_head * s_sbh; /* Buffer containing the super block */ |
| 43 | struct ext4_super_block * s_es; /* Pointer to the super block in the buffer */ | 43 | struct ext4_super_block *s_es; /* Pointer to the super block in the buffer */ |
| 44 | struct buffer_head ** s_group_desc; | 44 | struct buffer_head **s_group_desc; |
| 45 | unsigned long s_mount_opt; | 45 | unsigned long s_mount_opt; |
| 46 | ext4_fsblk_t s_sb_block; | 46 | ext4_fsblk_t s_sb_block; |
| 47 | uid_t s_resuid; | 47 | uid_t s_resuid; |
| @@ -52,6 +52,7 @@ struct ext4_sb_info { | |||
| 52 | int s_desc_per_block_bits; | 52 | int s_desc_per_block_bits; |
| 53 | int s_inode_size; | 53 | int s_inode_size; |
| 54 | int s_first_ino; | 54 | int s_first_ino; |
| 55 | unsigned int s_inode_readahead_blks; | ||
| 55 | spinlock_t s_next_gen_lock; | 56 | spinlock_t s_next_gen_lock; |
| 56 | u32 s_next_generation; | 57 | u32 s_next_generation; |
| 57 | u32 s_hash_seed[4]; | 58 | u32 s_hash_seed[4]; |
| @@ -59,16 +60,17 @@ struct ext4_sb_info { | |||
| 59 | struct percpu_counter s_freeblocks_counter; | 60 | struct percpu_counter s_freeblocks_counter; |
| 60 | struct percpu_counter s_freeinodes_counter; | 61 | struct percpu_counter s_freeinodes_counter; |
| 61 | struct percpu_counter s_dirs_counter; | 62 | struct percpu_counter s_dirs_counter; |
| 63 | struct percpu_counter s_dirtyblocks_counter; | ||
| 62 | struct blockgroup_lock s_blockgroup_lock; | 64 | struct blockgroup_lock s_blockgroup_lock; |
| 65 | struct proc_dir_entry *s_proc; | ||
| 63 | 66 | ||
| 64 | /* root of the per fs reservation window tree */ | 67 | /* root of the per fs reservation window tree */ |
| 65 | spinlock_t s_rsv_window_lock; | 68 | spinlock_t s_rsv_window_lock; |
| 66 | struct rb_root s_rsv_window_root; | 69 | struct rb_root s_rsv_window_root; |
| 67 | struct ext4_reserve_window_node s_rsv_window_head; | ||
| 68 | 70 | ||
| 69 | /* Journaling */ | 71 | /* Journaling */ |
| 70 | struct inode * s_journal_inode; | 72 | struct inode *s_journal_inode; |
| 71 | struct journal_s * s_journal; | 73 | struct journal_s *s_journal; |
| 72 | struct list_head s_orphan; | 74 | struct list_head s_orphan; |
| 73 | unsigned long s_commit_interval; | 75 | unsigned long s_commit_interval; |
| 74 | struct block_device *journal_bdev; | 76 | struct block_device *journal_bdev; |
| @@ -97,21 +99,18 @@ struct ext4_sb_info { | |||
| 97 | struct inode *s_buddy_cache; | 99 | struct inode *s_buddy_cache; |
| 98 | long s_blocks_reserved; | 100 | long s_blocks_reserved; |
| 99 | spinlock_t s_reserve_lock; | 101 | spinlock_t s_reserve_lock; |
| 100 | struct list_head s_active_transaction; | ||
| 101 | struct list_head s_closed_transaction; | ||
| 102 | struct list_head s_committed_transaction; | ||
| 103 | spinlock_t s_md_lock; | 102 | spinlock_t s_md_lock; |
| 104 | tid_t s_last_transaction; | 103 | tid_t s_last_transaction; |
| 105 | unsigned short *s_mb_offsets, *s_mb_maxs; | 104 | unsigned short *s_mb_offsets, *s_mb_maxs; |
| 106 | 105 | ||
| 107 | /* tunables */ | 106 | /* tunables */ |
| 108 | unsigned long s_stripe; | 107 | unsigned long s_stripe; |
| 109 | unsigned long s_mb_stream_request; | 108 | unsigned int s_mb_stream_request; |
| 110 | unsigned long s_mb_max_to_scan; | 109 | unsigned int s_mb_max_to_scan; |
| 111 | unsigned long s_mb_min_to_scan; | 110 | unsigned int s_mb_min_to_scan; |
| 112 | unsigned long s_mb_stats; | 111 | unsigned int s_mb_stats; |
| 113 | unsigned long s_mb_order2_reqs; | 112 | unsigned int s_mb_order2_reqs; |
| 114 | unsigned long s_mb_group_prealloc; | 113 | unsigned int s_mb_group_prealloc; |
| 115 | /* where last allocation was done - for stream allocation */ | 114 | /* where last allocation was done - for stream allocation */ |
| 116 | unsigned long s_mb_last_group; | 115 | unsigned long s_mb_last_group; |
| 117 | unsigned long s_mb_last_start; | 116 | unsigned long s_mb_last_start; |
| @@ -121,7 +120,6 @@ struct ext4_sb_info { | |||
| 121 | int s_mb_history_cur; | 120 | int s_mb_history_cur; |
| 122 | int s_mb_history_max; | 121 | int s_mb_history_max; |
| 123 | int s_mb_history_num; | 122 | int s_mb_history_num; |
| 124 | struct proc_dir_entry *s_mb_proc; | ||
| 125 | spinlock_t s_mb_history_lock; | 123 | spinlock_t s_mb_history_lock; |
| 126 | int s_mb_history_filter; | 124 | int s_mb_history_filter; |
| 127 | 125 | ||
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index b24d3c53f20c..ea2ce3c0ae66 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
| @@ -40,6 +40,7 @@ | |||
| 40 | #include <linux/slab.h> | 40 | #include <linux/slab.h> |
| 41 | #include <linux/falloc.h> | 41 | #include <linux/falloc.h> |
| 42 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
| 43 | #include <linux/fiemap.h> | ||
| 43 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
| 44 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
| 45 | 46 | ||
| @@ -383,8 +384,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path) | |||
| 383 | ext_debug("\n"); | 384 | ext_debug("\n"); |
| 384 | } | 385 | } |
| 385 | #else | 386 | #else |
| 386 | #define ext4_ext_show_path(inode,path) | 387 | #define ext4_ext_show_path(inode, path) |
| 387 | #define ext4_ext_show_leaf(inode,path) | 388 | #define ext4_ext_show_leaf(inode, path) |
| 388 | #endif | 389 | #endif |
| 389 | 390 | ||
| 390 | void ext4_ext_drop_refs(struct ext4_ext_path *path) | 391 | void ext4_ext_drop_refs(struct ext4_ext_path *path) |
| @@ -440,9 +441,10 @@ ext4_ext_binsearch_idx(struct inode *inode, | |||
| 440 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { | 441 | for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) { |
| 441 | if (k != 0 && | 442 | if (k != 0 && |
| 442 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { | 443 | le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) { |
| 443 | printk("k=%d, ix=0x%p, first=0x%p\n", k, | 444 | printk(KERN_DEBUG "k=%d, ix=0x%p, " |
| 444 | ix, EXT_FIRST_INDEX(eh)); | 445 | "first=0x%p\n", k, |
| 445 | printk("%u <= %u\n", | 446 | ix, EXT_FIRST_INDEX(eh)); |
| 447 | printk(KERN_DEBUG "%u <= %u\n", | ||
| 446 | le32_to_cpu(ix->ei_block), | 448 | le32_to_cpu(ix->ei_block), |
| 447 | le32_to_cpu(ix[-1].ei_block)); | 449 | le32_to_cpu(ix[-1].ei_block)); |
| 448 | } | 450 | } |
| @@ -1475,7 +1477,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, | |||
| 1475 | struct ext4_ext_path *path, | 1477 | struct ext4_ext_path *path, |
| 1476 | struct ext4_extent *newext) | 1478 | struct ext4_extent *newext) |
| 1477 | { | 1479 | { |
| 1478 | struct ext4_extent_header * eh; | 1480 | struct ext4_extent_header *eh; |
| 1479 | struct ext4_extent *ex, *fex; | 1481 | struct ext4_extent *ex, *fex; |
| 1480 | struct ext4_extent *nearex; /* nearest extent */ | 1482 | struct ext4_extent *nearex; /* nearest extent */ |
| 1481 | struct ext4_ext_path *npath = NULL; | 1483 | struct ext4_ext_path *npath = NULL; |
| @@ -1625,6 +1627,113 @@ cleanup: | |||
| 1625 | return err; | 1627 | return err; |
| 1626 | } | 1628 | } |
| 1627 | 1629 | ||
| 1630 | int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block, | ||
| 1631 | ext4_lblk_t num, ext_prepare_callback func, | ||
| 1632 | void *cbdata) | ||
| 1633 | { | ||
| 1634 | struct ext4_ext_path *path = NULL; | ||
| 1635 | struct ext4_ext_cache cbex; | ||
| 1636 | struct ext4_extent *ex; | ||
| 1637 | ext4_lblk_t next, start = 0, end = 0; | ||
| 1638 | ext4_lblk_t last = block + num; | ||
| 1639 | int depth, exists, err = 0; | ||
| 1640 | |||
| 1641 | BUG_ON(func == NULL); | ||
| 1642 | BUG_ON(inode == NULL); | ||
| 1643 | |||
| 1644 | while (block < last && block != EXT_MAX_BLOCK) { | ||
| 1645 | num = last - block; | ||
| 1646 | /* find extent for this block */ | ||
| 1647 | path = ext4_ext_find_extent(inode, block, path); | ||
| 1648 | if (IS_ERR(path)) { | ||
| 1649 | err = PTR_ERR(path); | ||
| 1650 | path = NULL; | ||
| 1651 | break; | ||
| 1652 | } | ||
| 1653 | |||
| 1654 | depth = ext_depth(inode); | ||
| 1655 | BUG_ON(path[depth].p_hdr == NULL); | ||
| 1656 | ex = path[depth].p_ext; | ||
| 1657 | next = ext4_ext_next_allocated_block(path); | ||
| 1658 | |||
| 1659 | exists = 0; | ||
| 1660 | if (!ex) { | ||
| 1661 | /* there is no extent yet, so try to allocate | ||
| 1662 | * all requested space */ | ||
| 1663 | start = block; | ||
| 1664 | end = block + num; | ||
| 1665 | } else if (le32_to_cpu(ex->ee_block) > block) { | ||
| 1666 | /* need to allocate space before found extent */ | ||
| 1667 | start = block; | ||
| 1668 | end = le32_to_cpu(ex->ee_block); | ||
| 1669 | if (block + num < end) | ||
| 1670 | end = block + num; | ||
| 1671 | } else if (block >= le32_to_cpu(ex->ee_block) | ||
| 1672 | + ext4_ext_get_actual_len(ex)) { | ||
| 1673 | /* need to allocate space after found extent */ | ||
| 1674 | start = block; | ||
| 1675 | end = block + num; | ||
| 1676 | if (end >= next) | ||
| 1677 | end = next; | ||
| 1678 | } else if (block >= le32_to_cpu(ex->ee_block)) { | ||
| 1679 | /* | ||
| 1680 | * some part of requested space is covered | ||
| 1681 | * by found extent | ||
| 1682 | */ | ||
| 1683 | start = block; | ||
| 1684 | end = le32_to_cpu(ex->ee_block) | ||
| 1685 | + ext4_ext_get_actual_len(ex); | ||
| 1686 | if (block + num < end) | ||
| 1687 | end = block + num; | ||
| 1688 | exists = 1; | ||
| 1689 | } else { | ||
| 1690 | BUG(); | ||
| 1691 | } | ||
| 1692 | BUG_ON(end <= start); | ||
| 1693 | |||
| 1694 | if (!exists) { | ||
| 1695 | cbex.ec_block = start; | ||
| 1696 | cbex.ec_len = end - start; | ||
| 1697 | cbex.ec_start = 0; | ||
| 1698 | cbex.ec_type = EXT4_EXT_CACHE_GAP; | ||
| 1699 | } else { | ||
| 1700 | cbex.ec_block = le32_to_cpu(ex->ee_block); | ||
| 1701 | cbex.ec_len = ext4_ext_get_actual_len(ex); | ||
| 1702 | cbex.ec_start = ext_pblock(ex); | ||
| 1703 | cbex.ec_type = EXT4_EXT_CACHE_EXTENT; | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | BUG_ON(cbex.ec_len == 0); | ||
| 1707 | err = func(inode, path, &cbex, ex, cbdata); | ||
| 1708 | ext4_ext_drop_refs(path); | ||
| 1709 | |||
| 1710 | if (err < 0) | ||
| 1711 | break; | ||
| 1712 | |||
| 1713 | if (err == EXT_REPEAT) | ||
| 1714 | continue; | ||
| 1715 | else if (err == EXT_BREAK) { | ||
| 1716 | err = 0; | ||
| 1717 | break; | ||
| 1718 | } | ||
| 1719 | |||
| 1720 | if (ext_depth(inode) != depth) { | ||
| 1721 | /* depth was changed. we have to realloc path */ | ||
| 1722 | kfree(path); | ||
| 1723 | path = NULL; | ||
| 1724 | } | ||
| 1725 | |||
| 1726 | block = cbex.ec_block + cbex.ec_len; | ||
| 1727 | } | ||
| 1728 | |||
| 1729 | if (path) { | ||
| 1730 | ext4_ext_drop_refs(path); | ||
| 1731 | kfree(path); | ||
| 1732 | } | ||
| 1733 | |||
| 1734 | return err; | ||
| 1735 | } | ||
| 1736 | |||
| 1628 | static void | 1737 | static void |
| 1629 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, | 1738 | ext4_ext_put_in_cache(struct inode *inode, ext4_lblk_t block, |
| 1630 | __u32 len, ext4_fsblk_t start, int type) | 1739 | __u32 len, ext4_fsblk_t start, int type) |
| @@ -2142,7 +2251,7 @@ void ext4_ext_init(struct super_block *sb) | |||
| 2142 | */ | 2251 | */ |
| 2143 | 2252 | ||
| 2144 | if (test_opt(sb, EXTENTS)) { | 2253 | if (test_opt(sb, EXTENTS)) { |
| 2145 | printk("EXT4-fs: file extents enabled"); | 2254 | printk(KERN_INFO "EXT4-fs: file extents enabled"); |
| 2146 | #ifdef AGGRESSIVE_TEST | 2255 | #ifdef AGGRESSIVE_TEST |
| 2147 | printk(", aggressive tests"); | 2256 | printk(", aggressive tests"); |
| 2148 | #endif | 2257 | #endif |
| @@ -2696,11 +2805,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2696 | goto out2; | 2805 | goto out2; |
| 2697 | } | 2806 | } |
| 2698 | /* | 2807 | /* |
| 2699 | * Okay, we need to do block allocation. Lazily initialize the block | 2808 | * Okay, we need to do block allocation. |
| 2700 | * allocation info here if necessary. | ||
| 2701 | */ | 2809 | */ |
| 2702 | if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info)) | ||
| 2703 | ext4_init_block_alloc_info(inode); | ||
| 2704 | 2810 | ||
| 2705 | /* find neighbour allocated blocks */ | 2811 | /* find neighbour allocated blocks */ |
| 2706 | ar.lleft = iblock; | 2812 | ar.lleft = iblock; |
| @@ -2760,7 +2866,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, | |||
| 2760 | /* free data blocks we just allocated */ | 2866 | /* free data blocks we just allocated */ |
| 2761 | /* not a good idea to call discard here directly, | 2867 | /* not a good idea to call discard here directly, |
| 2762 | * but otherwise we'd need to call it every free() */ | 2868 | * but otherwise we'd need to call it every free() */ |
| 2763 | ext4_mb_discard_inode_preallocations(inode); | 2869 | ext4_discard_preallocations(inode); |
| 2764 | ext4_free_blocks(handle, inode, ext_pblock(&newex), | 2870 | ext4_free_blocks(handle, inode, ext_pblock(&newex), |
| 2765 | ext4_ext_get_actual_len(&newex), 0); | 2871 | ext4_ext_get_actual_len(&newex), 0); |
| 2766 | goto out2; | 2872 | goto out2; |
| @@ -2824,7 +2930,7 @@ void ext4_ext_truncate(struct inode *inode) | |||
| 2824 | down_write(&EXT4_I(inode)->i_data_sem); | 2930 | down_write(&EXT4_I(inode)->i_data_sem); |
| 2825 | ext4_ext_invalidate_cache(inode); | 2931 | ext4_ext_invalidate_cache(inode); |
| 2826 | 2932 | ||
| 2827 | ext4_discard_reservation(inode); | 2933 | ext4_discard_preallocations(inode); |
| 2828 | 2934 | ||
| 2829 | /* | 2935 | /* |
| 2830 | * TODO: optimization is possible here. | 2936 | * TODO: optimization is possible here. |
| @@ -2877,10 +2983,11 @@ static void ext4_falloc_update_inode(struct inode *inode, | |||
| 2877 | * Update only when preallocation was requested beyond | 2983 | * Update only when preallocation was requested beyond |
| 2878 | * the file size. | 2984 | * the file size. |
| 2879 | */ | 2985 | */ |
| 2880 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 2986 | if (!(mode & FALLOC_FL_KEEP_SIZE)) { |
| 2881 | new_size > i_size_read(inode)) { | 2987 | if (new_size > i_size_read(inode)) |
| 2882 | i_size_write(inode, new_size); | 2988 | i_size_write(inode, new_size); |
| 2883 | EXT4_I(inode)->i_disksize = new_size; | 2989 | if (new_size > EXT4_I(inode)->i_disksize) |
| 2990 | ext4_update_i_disksize(inode, new_size); | ||
| 2884 | } | 2991 | } |
| 2885 | 2992 | ||
| 2886 | } | 2993 | } |
| @@ -2972,3 +3079,143 @@ retry: | |||
| 2972 | mutex_unlock(&inode->i_mutex); | 3079 | mutex_unlock(&inode->i_mutex); |
| 2973 | return ret > 0 ? ret2 : ret; | 3080 | return ret > 0 ? ret2 : ret; |
| 2974 | } | 3081 | } |
| 3082 | |||
| 3083 | /* | ||
| 3084 | * Callback function called for each extent to gather FIEMAP information. | ||
| 3085 | */ | ||
| 3086 | int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | ||
| 3087 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | ||
| 3088 | void *data) | ||
| 3089 | { | ||
| 3090 | struct fiemap_extent_info *fieinfo = data; | ||
| 3091 | unsigned long blksize_bits = inode->i_sb->s_blocksize_bits; | ||
| 3092 | __u64 logical; | ||
| 3093 | __u64 physical; | ||
| 3094 | __u64 length; | ||
| 3095 | __u32 flags = 0; | ||
| 3096 | int error; | ||
| 3097 | |||
| 3098 | logical = (__u64)newex->ec_block << blksize_bits; | ||
| 3099 | |||
| 3100 | if (newex->ec_type == EXT4_EXT_CACHE_GAP) { | ||
| 3101 | pgoff_t offset; | ||
| 3102 | struct page *page; | ||
| 3103 | struct buffer_head *bh = NULL; | ||
| 3104 | |||
| 3105 | offset = logical >> PAGE_SHIFT; | ||
| 3106 | page = find_get_page(inode->i_mapping, offset); | ||
| 3107 | if (!page || !page_has_buffers(page)) | ||
| 3108 | return EXT_CONTINUE; | ||
| 3109 | |||
| 3110 | bh = page_buffers(page); | ||
| 3111 | |||
| 3112 | if (!bh) | ||
| 3113 | return EXT_CONTINUE; | ||
| 3114 | |||
| 3115 | if (buffer_delay(bh)) { | ||
| 3116 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
| 3117 | page_cache_release(page); | ||
| 3118 | } else { | ||
| 3119 | page_cache_release(page); | ||
| 3120 | return EXT_CONTINUE; | ||
| 3121 | } | ||
| 3122 | } | ||
| 3123 | |||
| 3124 | physical = (__u64)newex->ec_start << blksize_bits; | ||
| 3125 | length = (__u64)newex->ec_len << blksize_bits; | ||
| 3126 | |||
| 3127 | if (ex && ext4_ext_is_uninitialized(ex)) | ||
| 3128 | flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
| 3129 | |||
| 3130 | /* | ||
| 3131 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | ||
| 3132 | * | ||
| 3133 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
| 3134 | * this also indicates no more allocated blocks. | ||
| 3135 | * | ||
| 3136 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
| 3137 | */ | ||
| 3138 | if (logical + length - 1 == EXT_MAX_BLOCK || | ||
| 3139 | ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK) | ||
| 3140 | flags |= FIEMAP_EXTENT_LAST; | ||
| 3141 | |||
| 3142 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | ||
| 3143 | length, flags); | ||
| 3144 | if (error < 0) | ||
| 3145 | return error; | ||
| 3146 | if (error == 1) | ||
| 3147 | return EXT_BREAK; | ||
| 3148 | |||
| 3149 | return EXT_CONTINUE; | ||
| 3150 | } | ||
| 3151 | |||
| 3152 | /* fiemap flags we can handle specified here */ | ||
| 3153 | #define EXT4_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR) | ||
| 3154 | |||
| 3155 | int ext4_xattr_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo) | ||
| 3156 | { | ||
| 3157 | __u64 physical = 0; | ||
| 3158 | __u64 length; | ||
| 3159 | __u32 flags = FIEMAP_EXTENT_LAST; | ||
| 3160 | int blockbits = inode->i_sb->s_blocksize_bits; | ||
| 3161 | int error = 0; | ||
| 3162 | |||
| 3163 | /* in-inode? */ | ||
| 3164 | if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { | ||
| 3165 | struct ext4_iloc iloc; | ||
| 3166 | int offset; /* offset of xattr in inode */ | ||
| 3167 | |||
| 3168 | error = ext4_get_inode_loc(inode, &iloc); | ||
| 3169 | if (error) | ||
| 3170 | return error; | ||
| 3171 | physical = iloc.bh->b_blocknr << blockbits; | ||
| 3172 | offset = EXT4_GOOD_OLD_INODE_SIZE + | ||
| 3173 | EXT4_I(inode)->i_extra_isize; | ||
| 3174 | physical += offset; | ||
| 3175 | length = EXT4_SB(inode->i_sb)->s_inode_size - offset; | ||
| 3176 | flags |= FIEMAP_EXTENT_DATA_INLINE; | ||
| 3177 | } else { /* external block */ | ||
| 3178 | physical = EXT4_I(inode)->i_file_acl << blockbits; | ||
| 3179 | length = inode->i_sb->s_blocksize; | ||
| 3180 | } | ||
| 3181 | |||
| 3182 | if (physical) | ||
| 3183 | error = fiemap_fill_next_extent(fieinfo, 0, physical, | ||
| 3184 | length, flags); | ||
| 3185 | return (error < 0 ? error : 0); | ||
| 3186 | } | ||
| 3187 | |||
| 3188 | int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 3189 | __u64 start, __u64 len) | ||
| 3190 | { | ||
| 3191 | ext4_lblk_t start_blk; | ||
| 3192 | ext4_lblk_t len_blks; | ||
| 3193 | int error = 0; | ||
| 3194 | |||
| 3195 | /* fallback to generic here if not in extents fmt */ | ||
| 3196 | if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)) | ||
| 3197 | return generic_block_fiemap(inode, fieinfo, start, len, | ||
| 3198 | ext4_get_block); | ||
| 3199 | |||
| 3200 | if (fiemap_check_flags(fieinfo, EXT4_FIEMAP_FLAGS)) | ||
| 3201 | return -EBADR; | ||
| 3202 | |||
| 3203 | if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) { | ||
| 3204 | error = ext4_xattr_fiemap(inode, fieinfo); | ||
| 3205 | } else { | ||
| 3206 | start_blk = start >> inode->i_sb->s_blocksize_bits; | ||
| 3207 | len_blks = len >> inode->i_sb->s_blocksize_bits; | ||
| 3208 | |||
| 3209 | /* | ||
| 3210 | * Walk the extent tree gathering extent information. | ||
| 3211 | * ext4_ext_fiemap_cb will push extents back to user. | ||
| 3212 | */ | ||
| 3213 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 3214 | error = ext4_ext_walk_space(inode, start_blk, len_blks, | ||
| 3215 | ext4_ext_fiemap_cb, fieinfo); | ||
| 3216 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 3217 | } | ||
| 3218 | |||
| 3219 | return error; | ||
| 3220 | } | ||
| 3221 | |||
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 430eb7978db4..6bd11fba71f7 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c | |||
| @@ -31,14 +31,14 @@ | |||
| 31 | * from ext4_file_open: open gets called at every open, but release | 31 | * from ext4_file_open: open gets called at every open, but release |
| 32 | * gets called only when /all/ the files are closed. | 32 | * gets called only when /all/ the files are closed. |
| 33 | */ | 33 | */ |
| 34 | static int ext4_release_file (struct inode * inode, struct file * filp) | 34 | static int ext4_release_file(struct inode *inode, struct file *filp) |
| 35 | { | 35 | { |
| 36 | /* if we are the last writer on the inode, drop the block reservation */ | 36 | /* if we are the last writer on the inode, drop the block reservation */ |
| 37 | if ((filp->f_mode & FMODE_WRITE) && | 37 | if ((filp->f_mode & FMODE_WRITE) && |
| 38 | (atomic_read(&inode->i_writecount) == 1)) | 38 | (atomic_read(&inode->i_writecount) == 1)) |
| 39 | { | 39 | { |
| 40 | down_write(&EXT4_I(inode)->i_data_sem); | 40 | down_write(&EXT4_I(inode)->i_data_sem); |
| 41 | ext4_discard_reservation(inode); | 41 | ext4_discard_preallocations(inode); |
| 42 | up_write(&EXT4_I(inode)->i_data_sem); | 42 | up_write(&EXT4_I(inode)->i_data_sem); |
| 43 | } | 43 | } |
| 44 | if (is_dx(inode) && filp->private_data) | 44 | if (is_dx(inode) && filp->private_data) |
| @@ -140,6 +140,9 @@ static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 140 | return 0; | 140 | return 0; |
| 141 | } | 141 | } |
| 142 | 142 | ||
| 143 | extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 144 | __u64 start, __u64 len); | ||
| 145 | |||
| 143 | const struct file_operations ext4_file_operations = { | 146 | const struct file_operations ext4_file_operations = { |
| 144 | .llseek = generic_file_llseek, | 147 | .llseek = generic_file_llseek, |
| 145 | .read = do_sync_read, | 148 | .read = do_sync_read, |
| @@ -162,7 +165,7 @@ const struct inode_operations ext4_file_inode_operations = { | |||
| 162 | .truncate = ext4_truncate, | 165 | .truncate = ext4_truncate, |
| 163 | .setattr = ext4_setattr, | 166 | .setattr = ext4_setattr, |
| 164 | .getattr = ext4_getattr, | 167 | .getattr = ext4_getattr, |
| 165 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 168 | #ifdef CONFIG_EXT4_FS_XATTR |
| 166 | .setxattr = generic_setxattr, | 169 | .setxattr = generic_setxattr, |
| 167 | .getxattr = generic_getxattr, | 170 | .getxattr = generic_getxattr, |
| 168 | .listxattr = ext4_listxattr, | 171 | .listxattr = ext4_listxattr, |
| @@ -170,5 +173,6 @@ const struct inode_operations ext4_file_inode_operations = { | |||
| 170 | #endif | 173 | #endif |
| 171 | .permission = ext4_permission, | 174 | .permission = ext4_permission, |
| 172 | .fallocate = ext4_fallocate, | 175 | .fallocate = ext4_fallocate, |
| 176 | .fiemap = ext4_fiemap, | ||
| 173 | }; | 177 | }; |
| 174 | 178 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index a45c3737ad31..5afe4370840b 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/writeback.h> | 28 | #include <linux/writeback.h> |
| 29 | #include <linux/jbd2.h> | 29 | #include <linux/jbd2.h> |
| 30 | #include <linux/blkdev.h> | 30 | #include <linux/blkdev.h> |
| 31 | #include <linux/marker.h> | ||
| 31 | #include "ext4.h" | 32 | #include "ext4.h" |
| 32 | #include "ext4_jbd2.h" | 33 | #include "ext4_jbd2.h" |
| 33 | 34 | ||
| @@ -43,7 +44,7 @@ | |||
| 43 | * inode to disk. | 44 | * inode to disk. |
| 44 | */ | 45 | */ |
| 45 | 46 | ||
| 46 | int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | 47 | int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) |
| 47 | { | 48 | { |
| 48 | struct inode *inode = dentry->d_inode; | 49 | struct inode *inode = dentry->d_inode; |
| 49 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; | 50 | journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; |
| @@ -51,6 +52,10 @@ int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync) | |||
| 51 | 52 | ||
| 52 | J_ASSERT(ext4_journal_current_handle() == NULL); | 53 | J_ASSERT(ext4_journal_current_handle() == NULL); |
| 53 | 54 | ||
| 55 | trace_mark(ext4_sync_file, "dev %s datasync %d ino %ld parent %ld", | ||
| 56 | inode->i_sb->s_id, datasync, inode->i_ino, | ||
| 57 | dentry->d_parent->d_inode->i_ino); | ||
| 58 | |||
| 54 | /* | 59 | /* |
| 55 | * data=writeback: | 60 | * data=writeback: |
| 56 | * The caller's filemap_fdatawrite()/wait will sync the data. | 61 | * The caller's filemap_fdatawrite()/wait will sync the data. |
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c index 1d6329dbe390..556ca8eba3db 100644 --- a/fs/ext4/hash.c +++ b/fs/ext4/hash.c | |||
| @@ -27,7 +27,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
| 27 | sum += DELTA; | 27 | sum += DELTA; |
| 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); | 28 | b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b); |
| 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); | 29 | b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d); |
| 30 | } while(--n); | 30 | } while (--n); |
| 31 | 31 | ||
| 32 | buf[0] += b0; | 32 | buf[0] += b0; |
| 33 | buf[1] += b1; | 33 | buf[1] += b1; |
| @@ -35,7 +35,7 @@ static void TEA_transform(__u32 buf[4], __u32 const in[]) | |||
| 35 | 35 | ||
| 36 | 36 | ||
| 37 | /* The old legacy hash */ | 37 | /* The old legacy hash */ |
| 38 | static __u32 dx_hack_hash (const char *name, int len) | 38 | static __u32 dx_hack_hash(const char *name, int len) |
| 39 | { | 39 | { |
| 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; | 40 | __u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9; |
| 41 | while (len--) { | 41 | while (len--) { |
| @@ -59,7 +59,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num) | |||
| 59 | val = pad; | 59 | val = pad; |
| 60 | if (len > num*4) | 60 | if (len > num*4) |
| 61 | len = num * 4; | 61 | len = num * 4; |
| 62 | for (i=0; i < len; i++) { | 62 | for (i = 0; i < len; i++) { |
| 63 | if ((i % 4) == 0) | 63 | if ((i % 4) == 0) |
| 64 | val = pad; | 64 | val = pad; |
| 65 | val = msg[i] + (val << 8); | 65 | val = msg[i] + (val << 8); |
| @@ -104,7 +104,7 @@ int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo) | |||
| 104 | 104 | ||
| 105 | /* Check to see if the seed is all zero's */ | 105 | /* Check to see if the seed is all zero's */ |
| 106 | if (hinfo->seed) { | 106 | if (hinfo->seed) { |
| 107 | for (i=0; i < 4; i++) { | 107 | for (i = 0; i < 4; i++) { |
| 108 | if (hinfo->seed[i]) | 108 | if (hinfo->seed[i]) |
| 109 | break; | 109 | break; |
| 110 | } | 110 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index f344834bbf58..fe34d74cfb19 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
| @@ -115,9 +115,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 115 | block_group, bitmap_blk); | 115 | block_group, bitmap_blk); |
| 116 | return NULL; | 116 | return NULL; |
| 117 | } | 117 | } |
| 118 | if (bh_uptodate_or_lock(bh)) | 118 | if (buffer_uptodate(bh) && |
| 119 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT))) | ||
| 119 | return bh; | 120 | return bh; |
| 120 | 121 | ||
| 122 | lock_buffer(bh); | ||
| 121 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); | 123 | spin_lock(sb_bgl_lock(EXT4_SB(sb), block_group)); |
| 122 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { | 124 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { |
| 123 | ext4_init_inode_bitmap(sb, bh, block_group, desc); | 125 | ext4_init_inode_bitmap(sb, bh, block_group, desc); |
| @@ -154,39 +156,40 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
| 154 | * though), and then we'd have two inodes sharing the | 156 | * though), and then we'd have two inodes sharing the |
| 155 | * same inode number and space on the harddisk. | 157 | * same inode number and space on the harddisk. |
| 156 | */ | 158 | */ |
| 157 | void ext4_free_inode (handle_t *handle, struct inode * inode) | 159 | void ext4_free_inode(handle_t *handle, struct inode *inode) |
| 158 | { | 160 | { |
| 159 | struct super_block * sb = inode->i_sb; | 161 | struct super_block *sb = inode->i_sb; |
| 160 | int is_directory; | 162 | int is_directory; |
| 161 | unsigned long ino; | 163 | unsigned long ino; |
| 162 | struct buffer_head *bitmap_bh = NULL; | 164 | struct buffer_head *bitmap_bh = NULL; |
| 163 | struct buffer_head *bh2; | 165 | struct buffer_head *bh2; |
| 164 | ext4_group_t block_group; | 166 | ext4_group_t block_group; |
| 165 | unsigned long bit; | 167 | unsigned long bit; |
| 166 | struct ext4_group_desc * gdp; | 168 | struct ext4_group_desc *gdp; |
| 167 | struct ext4_super_block * es; | 169 | struct ext4_super_block *es; |
| 168 | struct ext4_sb_info *sbi; | 170 | struct ext4_sb_info *sbi; |
| 169 | int fatal = 0, err; | 171 | int fatal = 0, err; |
| 170 | ext4_group_t flex_group; | 172 | ext4_group_t flex_group; |
| 171 | 173 | ||
| 172 | if (atomic_read(&inode->i_count) > 1) { | 174 | if (atomic_read(&inode->i_count) > 1) { |
| 173 | printk ("ext4_free_inode: inode has count=%d\n", | 175 | printk(KERN_ERR "ext4_free_inode: inode has count=%d\n", |
| 174 | atomic_read(&inode->i_count)); | 176 | atomic_read(&inode->i_count)); |
| 175 | return; | 177 | return; |
| 176 | } | 178 | } |
| 177 | if (inode->i_nlink) { | 179 | if (inode->i_nlink) { |
| 178 | printk ("ext4_free_inode: inode has nlink=%d\n", | 180 | printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n", |
| 179 | inode->i_nlink); | 181 | inode->i_nlink); |
| 180 | return; | 182 | return; |
| 181 | } | 183 | } |
| 182 | if (!sb) { | 184 | if (!sb) { |
| 183 | printk("ext4_free_inode: inode on nonexistent device\n"); | 185 | printk(KERN_ERR "ext4_free_inode: inode on " |
| 186 | "nonexistent device\n"); | ||
| 184 | return; | 187 | return; |
| 185 | } | 188 | } |
| 186 | sbi = EXT4_SB(sb); | 189 | sbi = EXT4_SB(sb); |
| 187 | 190 | ||
| 188 | ino = inode->i_ino; | 191 | ino = inode->i_ino; |
| 189 | ext4_debug ("freeing inode %lu\n", ino); | 192 | ext4_debug("freeing inode %lu\n", ino); |
| 190 | 193 | ||
| 191 | /* | 194 | /* |
| 192 | * Note: we must free any quota before locking the superblock, | 195 | * Note: we must free any quota before locking the superblock, |
| @@ -200,12 +203,12 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 200 | is_directory = S_ISDIR(inode->i_mode); | 203 | is_directory = S_ISDIR(inode->i_mode); |
| 201 | 204 | ||
| 202 | /* Do this BEFORE marking the inode not in use or returning an error */ | 205 | /* Do this BEFORE marking the inode not in use or returning an error */ |
| 203 | clear_inode (inode); | 206 | clear_inode(inode); |
| 204 | 207 | ||
| 205 | es = EXT4_SB(sb)->s_es; | 208 | es = EXT4_SB(sb)->s_es; |
| 206 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { | 209 | if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) { |
| 207 | ext4_error (sb, "ext4_free_inode", | 210 | ext4_error(sb, "ext4_free_inode", |
| 208 | "reserved or nonexistent inode %lu", ino); | 211 | "reserved or nonexistent inode %lu", ino); |
| 209 | goto error_return; | 212 | goto error_return; |
| 210 | } | 213 | } |
| 211 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | 214 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); |
| @@ -222,10 +225,10 @@ void ext4_free_inode (handle_t *handle, struct inode * inode) | |||
| 222 | /* Ok, now we can actually update the inode bitmaps.. */ | 225 | /* Ok, now we can actually update the inode bitmaps.. */ |
| 223 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), | 226 | if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group), |
| 224 | bit, bitmap_bh->b_data)) | 227 | bit, bitmap_bh->b_data)) |
| 225 | ext4_error (sb, "ext4_free_inode", | 228 | ext4_error(sb, "ext4_free_inode", |
| 226 | "bit already cleared for inode %lu", ino); | 229 | "bit already cleared for inode %lu", ino); |
| 227 | else { | 230 | else { |
| 228 | gdp = ext4_get_group_desc (sb, block_group, &bh2); | 231 | gdp = ext4_get_group_desc(sb, block_group, &bh2); |
| 229 | 232 | ||
| 230 | BUFFER_TRACE(bh2, "get_write_access"); | 233 | BUFFER_TRACE(bh2, "get_write_access"); |
| 231 | fatal = ext4_journal_get_write_access(handle, bh2); | 234 | fatal = ext4_journal_get_write_access(handle, bh2); |
| @@ -287,7 +290,7 @@ static int find_group_dir(struct super_block *sb, struct inode *parent, | |||
| 287 | avefreei = freei / ngroups; | 290 | avefreei = freei / ngroups; |
| 288 | 291 | ||
| 289 | for (group = 0; group < ngroups; group++) { | 292 | for (group = 0; group < ngroups; group++) { |
| 290 | desc = ext4_get_group_desc (sb, group, NULL); | 293 | desc = ext4_get_group_desc(sb, group, NULL); |
| 291 | if (!desc || !desc->bg_free_inodes_count) | 294 | if (!desc || !desc->bg_free_inodes_count) |
| 292 | continue; | 295 | continue; |
| 293 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) | 296 | if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) |
| @@ -576,16 +579,16 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
| 576 | * For other inodes, search forward from the parent directory's block | 579 | * For other inodes, search forward from the parent directory's block |
| 577 | * group to find a free inode. | 580 | * group to find a free inode. |
| 578 | */ | 581 | */ |
| 579 | struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | 582 | struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode) |
| 580 | { | 583 | { |
| 581 | struct super_block *sb; | 584 | struct super_block *sb; |
| 582 | struct buffer_head *bitmap_bh = NULL; | 585 | struct buffer_head *bitmap_bh = NULL; |
| 583 | struct buffer_head *bh2; | 586 | struct buffer_head *bh2; |
| 584 | ext4_group_t group = 0; | 587 | ext4_group_t group = 0; |
| 585 | unsigned long ino = 0; | 588 | unsigned long ino = 0; |
| 586 | struct inode * inode; | 589 | struct inode *inode; |
| 587 | struct ext4_group_desc * gdp = NULL; | 590 | struct ext4_group_desc *gdp = NULL; |
| 588 | struct ext4_super_block * es; | 591 | struct ext4_super_block *es; |
| 589 | struct ext4_inode_info *ei; | 592 | struct ext4_inode_info *ei; |
| 590 | struct ext4_sb_info *sbi; | 593 | struct ext4_sb_info *sbi; |
| 591 | int ret2, err = 0; | 594 | int ret2, err = 0; |
| @@ -613,7 +616,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode) | |||
| 613 | } | 616 | } |
| 614 | 617 | ||
| 615 | if (S_ISDIR(mode)) { | 618 | if (S_ISDIR(mode)) { |
| 616 | if (test_opt (sb, OLDALLOC)) | 619 | if (test_opt(sb, OLDALLOC)) |
| 617 | ret2 = find_group_dir(sb, dir, &group); | 620 | ret2 = find_group_dir(sb, dir, &group); |
| 618 | else | 621 | else |
| 619 | ret2 = find_group_orlov(sb, dir, &group); | 622 | ret2 = find_group_orlov(sb, dir, &group); |
| @@ -783,7 +786,7 @@ got: | |||
| 783 | } | 786 | } |
| 784 | 787 | ||
| 785 | inode->i_uid = current->fsuid; | 788 | inode->i_uid = current->fsuid; |
| 786 | if (test_opt (sb, GRPID)) | 789 | if (test_opt(sb, GRPID)) |
| 787 | inode->i_gid = dir->i_gid; | 790 | inode->i_gid = dir->i_gid; |
| 788 | else if (dir->i_mode & S_ISGID) { | 791 | else if (dir->i_mode & S_ISGID) { |
| 789 | inode->i_gid = dir->i_gid; | 792 | inode->i_gid = dir->i_gid; |
| @@ -816,7 +819,6 @@ got: | |||
| 816 | ei->i_flags &= ~EXT4_DIRSYNC_FL; | 819 | ei->i_flags &= ~EXT4_DIRSYNC_FL; |
| 817 | ei->i_file_acl = 0; | 820 | ei->i_file_acl = 0; |
| 818 | ei->i_dtime = 0; | 821 | ei->i_dtime = 0; |
| 819 | ei->i_block_alloc_info = NULL; | ||
| 820 | ei->i_block_group = group; | 822 | ei->i_block_group = group; |
| 821 | 823 | ||
| 822 | ext4_set_inode_flags(inode); | 824 | ext4_set_inode_flags(inode); |
| @@ -832,7 +834,7 @@ got: | |||
| 832 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; | 834 | ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; |
| 833 | 835 | ||
| 834 | ret = inode; | 836 | ret = inode; |
| 835 | if(DQUOT_ALLOC_INODE(inode)) { | 837 | if (DQUOT_ALLOC_INODE(inode)) { |
| 836 | err = -EDQUOT; | 838 | err = -EDQUOT; |
| 837 | goto fail_drop; | 839 | goto fail_drop; |
| 838 | } | 840 | } |
| @@ -841,7 +843,7 @@ got: | |||
| 841 | if (err) | 843 | if (err) |
| 842 | goto fail_free_drop; | 844 | goto fail_free_drop; |
| 843 | 845 | ||
| 844 | err = ext4_init_security(handle,inode, dir); | 846 | err = ext4_init_security(handle, inode, dir); |
| 845 | if (err) | 847 | if (err) |
| 846 | goto fail_free_drop; | 848 | goto fail_free_drop; |
| 847 | 849 | ||
| @@ -959,7 +961,7 @@ error: | |||
| 959 | return ERR_PTR(err); | 961 | return ERR_PTR(err); |
| 960 | } | 962 | } |
| 961 | 963 | ||
| 962 | unsigned long ext4_count_free_inodes (struct super_block * sb) | 964 | unsigned long ext4_count_free_inodes(struct super_block *sb) |
| 963 | { | 965 | { |
| 964 | unsigned long desc_count; | 966 | unsigned long desc_count; |
| 965 | struct ext4_group_desc *gdp; | 967 | struct ext4_group_desc *gdp; |
| @@ -974,7 +976,7 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
| 974 | bitmap_count = 0; | 976 | bitmap_count = 0; |
| 975 | gdp = NULL; | 977 | gdp = NULL; |
| 976 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 978 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
| 977 | gdp = ext4_get_group_desc (sb, i, NULL); | 979 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 978 | if (!gdp) | 980 | if (!gdp) |
| 979 | continue; | 981 | continue; |
| 980 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 982 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
| @@ -989,13 +991,14 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
| 989 | bitmap_count += x; | 991 | bitmap_count += x; |
| 990 | } | 992 | } |
| 991 | brelse(bitmap_bh); | 993 | brelse(bitmap_bh); |
| 992 | printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n", | 994 | printk(KERN_DEBUG "ext4_count_free_inodes: " |
| 993 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | 995 | "stored = %u, computed = %lu, %lu\n", |
| 996 | le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count); | ||
| 994 | return desc_count; | 997 | return desc_count; |
| 995 | #else | 998 | #else |
| 996 | desc_count = 0; | 999 | desc_count = 0; |
| 997 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1000 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
| 998 | gdp = ext4_get_group_desc (sb, i, NULL); | 1001 | gdp = ext4_get_group_desc(sb, i, NULL); |
| 999 | if (!gdp) | 1002 | if (!gdp) |
| 1000 | continue; | 1003 | continue; |
| 1001 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); | 1004 | desc_count += le16_to_cpu(gdp->bg_free_inodes_count); |
| @@ -1006,13 +1009,13 @@ unsigned long ext4_count_free_inodes (struct super_block * sb) | |||
| 1006 | } | 1009 | } |
| 1007 | 1010 | ||
| 1008 | /* Called at mount-time, super-block is locked */ | 1011 | /* Called at mount-time, super-block is locked */ |
| 1009 | unsigned long ext4_count_dirs (struct super_block * sb) | 1012 | unsigned long ext4_count_dirs(struct super_block * sb) |
| 1010 | { | 1013 | { |
| 1011 | unsigned long count = 0; | 1014 | unsigned long count = 0; |
| 1012 | ext4_group_t i; | 1015 | ext4_group_t i; |
| 1013 | 1016 | ||
| 1014 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { | 1017 | for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) { |
| 1015 | struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL); | 1018 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
| 1016 | if (!gdp) | 1019 | if (!gdp) |
| 1017 | continue; | 1020 | continue; |
| 1018 | count += le16_to_cpu(gdp->bg_used_dirs_count); | 1021 | count += le16_to_cpu(gdp->bg_used_dirs_count); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7e91913e325b..8dbf6953845b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
| @@ -190,7 +190,7 @@ static int ext4_journal_test_restart(handle_t *handle, struct inode *inode) | |||
| 190 | /* | 190 | /* |
| 191 | * Called at the last iput() if i_nlink is zero. | 191 | * Called at the last iput() if i_nlink is zero. |
| 192 | */ | 192 | */ |
| 193 | void ext4_delete_inode (struct inode * inode) | 193 | void ext4_delete_inode(struct inode *inode) |
| 194 | { | 194 | { |
| 195 | handle_t *handle; | 195 | handle_t *handle; |
| 196 | int err; | 196 | int err; |
| @@ -330,11 +330,11 @@ static int ext4_block_to_path(struct inode *inode, | |||
| 330 | int final = 0; | 330 | int final = 0; |
| 331 | 331 | ||
| 332 | if (i_block < 0) { | 332 | if (i_block < 0) { |
| 333 | ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0"); | 333 | ext4_warning(inode->i_sb, "ext4_block_to_path", "block < 0"); |
| 334 | } else if (i_block < direct_blocks) { | 334 | } else if (i_block < direct_blocks) { |
| 335 | offsets[n++] = i_block; | 335 | offsets[n++] = i_block; |
| 336 | final = direct_blocks; | 336 | final = direct_blocks; |
| 337 | } else if ( (i_block -= direct_blocks) < indirect_blocks) { | 337 | } else if ((i_block -= direct_blocks) < indirect_blocks) { |
| 338 | offsets[n++] = EXT4_IND_BLOCK; | 338 | offsets[n++] = EXT4_IND_BLOCK; |
| 339 | offsets[n++] = i_block; | 339 | offsets[n++] = i_block; |
| 340 | final = ptrs; | 340 | final = ptrs; |
| @@ -400,14 +400,14 @@ static Indirect *ext4_get_branch(struct inode *inode, int depth, | |||
| 400 | 400 | ||
| 401 | *err = 0; | 401 | *err = 0; |
| 402 | /* i_data is not going away, no lock needed */ | 402 | /* i_data is not going away, no lock needed */ |
| 403 | add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets); | 403 | add_chain(chain, NULL, EXT4_I(inode)->i_data + *offsets); |
| 404 | if (!p->key) | 404 | if (!p->key) |
| 405 | goto no_block; | 405 | goto no_block; |
| 406 | while (--depth) { | 406 | while (--depth) { |
| 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); | 407 | bh = sb_bread(sb, le32_to_cpu(p->key)); |
| 408 | if (!bh) | 408 | if (!bh) |
| 409 | goto failure; | 409 | goto failure; |
| 410 | add_chain(++p, bh, (__le32*)bh->b_data + *++offsets); | 410 | add_chain(++p, bh, (__le32 *)bh->b_data + *++offsets); |
| 411 | /* Reader: end */ | 411 | /* Reader: end */ |
| 412 | if (!p->key) | 412 | if (!p->key) |
| 413 | goto no_block; | 413 | goto no_block; |
| @@ -443,7 +443,7 @@ no_block: | |||
| 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | 443 | static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) |
| 444 | { | 444 | { |
| 445 | struct ext4_inode_info *ei = EXT4_I(inode); | 445 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 446 | __le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data; | 446 | __le32 *start = ind->bh ? (__le32 *) ind->bh->b_data : ei->i_data; |
| 447 | __le32 *p; | 447 | __le32 *p; |
| 448 | ext4_fsblk_t bg_start; | 448 | ext4_fsblk_t bg_start; |
| 449 | ext4_fsblk_t last_block; | 449 | ext4_fsblk_t last_block; |
| @@ -486,18 +486,9 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
| 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 486 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
| 487 | Indirect *partial) | 487 | Indirect *partial) |
| 488 | { | 488 | { |
| 489 | struct ext4_block_alloc_info *block_i; | ||
| 490 | |||
| 491 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
| 492 | |||
| 493 | /* | 489 | /* |
| 494 | * try the heuristic for sequential allocation, | 490 | * XXX need to get goal block from mballoc's data structures |
| 495 | * failing that at least try to get decent locality. | ||
| 496 | */ | 491 | */ |
| 497 | if (block_i && (block == block_i->last_alloc_logical_block + 1) | ||
| 498 | && (block_i->last_alloc_physical_block != 0)) { | ||
| 499 | return block_i->last_alloc_physical_block + 1; | ||
| 500 | } | ||
| 501 | 492 | ||
| 502 | return ext4_find_near(inode, partial); | 493 | return ext4_find_near(inode, partial); |
| 503 | } | 494 | } |
| @@ -630,7 +621,7 @@ allocated: | |||
| 630 | *err = 0; | 621 | *err = 0; |
| 631 | return ret; | 622 | return ret; |
| 632 | failed_out: | 623 | failed_out: |
| 633 | for (i = 0; i <index; i++) | 624 | for (i = 0; i < index; i++) |
| 634 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 625 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
| 635 | return ret; | 626 | return ret; |
| 636 | } | 627 | } |
| @@ -703,7 +694,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
| 703 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; | 694 | branch[n].p = (__le32 *) bh->b_data + offsets[n]; |
| 704 | branch[n].key = cpu_to_le32(new_blocks[n]); | 695 | branch[n].key = cpu_to_le32(new_blocks[n]); |
| 705 | *branch[n].p = branch[n].key; | 696 | *branch[n].p = branch[n].key; |
| 706 | if ( n == indirect_blks) { | 697 | if (n == indirect_blks) { |
| 707 | current_block = new_blocks[n]; | 698 | current_block = new_blocks[n]; |
| 708 | /* | 699 | /* |
| 709 | * End of chain, update the last new metablock of | 700 | * End of chain, update the last new metablock of |
| @@ -730,7 +721,7 @@ failed: | |||
| 730 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); | 721 | BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); |
| 731 | ext4_journal_forget(handle, branch[i].bh); | 722 | ext4_journal_forget(handle, branch[i].bh); |
| 732 | } | 723 | } |
| 733 | for (i = 0; i <indirect_blks; i++) | 724 | for (i = 0; i < indirect_blks; i++) |
| 734 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); | 725 | ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); |
| 735 | 726 | ||
| 736 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); | 727 | ext4_free_blocks(handle, inode, new_blocks[i], num, 0); |
| @@ -757,10 +748,8 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
| 757 | { | 748 | { |
| 758 | int i; | 749 | int i; |
| 759 | int err = 0; | 750 | int err = 0; |
| 760 | struct ext4_block_alloc_info *block_i; | ||
| 761 | ext4_fsblk_t current_block; | 751 | ext4_fsblk_t current_block; |
| 762 | 752 | ||
| 763 | block_i = EXT4_I(inode)->i_block_alloc_info; | ||
| 764 | /* | 753 | /* |
| 765 | * If we're splicing into a [td]indirect block (as opposed to the | 754 | * If we're splicing into a [td]indirect block (as opposed to the |
| 766 | * inode) then we need to get write access to the [td]indirect block | 755 | * inode) then we need to get write access to the [td]indirect block |
| @@ -783,18 +772,7 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode, | |||
| 783 | if (num == 0 && blks > 1) { | 772 | if (num == 0 && blks > 1) { |
| 784 | current_block = le32_to_cpu(where->key) + 1; | 773 | current_block = le32_to_cpu(where->key) + 1; |
| 785 | for (i = 1; i < blks; i++) | 774 | for (i = 1; i < blks; i++) |
| 786 | *(where->p + i ) = cpu_to_le32(current_block++); | 775 | *(where->p + i) = cpu_to_le32(current_block++); |
| 787 | } | ||
| 788 | |||
| 789 | /* | ||
| 790 | * update the most recently allocated logical & physical block | ||
| 791 | * in i_block_alloc_info, to assist find the proper goal block for next | ||
| 792 | * allocation | ||
| 793 | */ | ||
| 794 | if (block_i) { | ||
| 795 | block_i->last_alloc_logical_block = block + blks - 1; | ||
| 796 | block_i->last_alloc_physical_block = | ||
| 797 | le32_to_cpu(where[num].key) + blks - 1; | ||
| 798 | } | 776 | } |
| 799 | 777 | ||
| 800 | /* We are done with atomic stuff, now do the rest of housekeeping */ | 778 | /* We are done with atomic stuff, now do the rest of housekeeping */ |
| @@ -914,12 +892,8 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
| 914 | goto cleanup; | 892 | goto cleanup; |
| 915 | 893 | ||
| 916 | /* | 894 | /* |
| 917 | * Okay, we need to do block allocation. Lazily initialize the block | 895 | * Okay, we need to do block allocation. |
| 918 | * allocation info here if necessary | ||
| 919 | */ | 896 | */ |
| 920 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | ||
| 921 | ext4_init_block_alloc_info(inode); | ||
| 922 | |||
| 923 | goal = ext4_find_goal(inode, iblock, partial); | 897 | goal = ext4_find_goal(inode, iblock, partial); |
| 924 | 898 | ||
| 925 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 899 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
| @@ -1030,19 +1004,20 @@ static void ext4_da_update_reserve_space(struct inode *inode, int used) | |||
| 1030 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | 1004 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); |
| 1031 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; | 1005 | mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb; |
| 1032 | 1006 | ||
| 1033 | /* Account for allocated meta_blocks */ | 1007 | if (mdb_free) { |
| 1034 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | 1008 | /* Account for allocated meta_blocks */ |
| 1009 | mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks; | ||
| 1035 | 1010 | ||
| 1036 | /* update fs free blocks counter for truncate case */ | 1011 | /* update fs dirty blocks counter */ |
| 1037 | percpu_counter_add(&sbi->s_freeblocks_counter, mdb_free); | 1012 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free); |
| 1013 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
| 1014 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1015 | } | ||
| 1038 | 1016 | ||
| 1039 | /* update per-inode reservations */ | 1017 | /* update per-inode reservations */ |
| 1040 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); | 1018 | BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks); |
| 1041 | EXT4_I(inode)->i_reserved_data_blocks -= used; | 1019 | EXT4_I(inode)->i_reserved_data_blocks -= used; |
| 1042 | 1020 | ||
| 1043 | BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1044 | EXT4_I(inode)->i_reserved_meta_blocks = mdb; | ||
| 1045 | EXT4_I(inode)->i_allocated_meta_blocks = 0; | ||
| 1046 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1021 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1047 | } | 1022 | } |
| 1048 | 1023 | ||
| @@ -1160,8 +1135,8 @@ int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | |||
| 1160 | /* Maximum number of blocks we map for direct IO at once. */ | 1135 | /* Maximum number of blocks we map for direct IO at once. */ |
| 1161 | #define DIO_MAX_BLOCKS 4096 | 1136 | #define DIO_MAX_BLOCKS 4096 |
| 1162 | 1137 | ||
| 1163 | static int ext4_get_block(struct inode *inode, sector_t iblock, | 1138 | int ext4_get_block(struct inode *inode, sector_t iblock, |
| 1164 | struct buffer_head *bh_result, int create) | 1139 | struct buffer_head *bh_result, int create) |
| 1165 | { | 1140 | { |
| 1166 | handle_t *handle = ext4_journal_current_handle(); | 1141 | handle_t *handle = ext4_journal_current_handle(); |
| 1167 | int ret = 0, started = 0; | 1142 | int ret = 0, started = 0; |
| @@ -1241,7 +1216,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, | |||
| 1241 | BUFFER_TRACE(bh, "call get_create_access"); | 1216 | BUFFER_TRACE(bh, "call get_create_access"); |
| 1242 | fatal = ext4_journal_get_create_access(handle, bh); | 1217 | fatal = ext4_journal_get_create_access(handle, bh); |
| 1243 | if (!fatal && !buffer_uptodate(bh)) { | 1218 | if (!fatal && !buffer_uptodate(bh)) { |
| 1244 | memset(bh->b_data,0,inode->i_sb->s_blocksize); | 1219 | memset(bh->b_data, 0, inode->i_sb->s_blocksize); |
| 1245 | set_buffer_uptodate(bh); | 1220 | set_buffer_uptodate(bh); |
| 1246 | } | 1221 | } |
| 1247 | unlock_buffer(bh); | 1222 | unlock_buffer(bh); |
| @@ -1266,7 +1241,7 @@ err: | |||
| 1266 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | 1241 | struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, |
| 1267 | ext4_lblk_t block, int create, int *err) | 1242 | ext4_lblk_t block, int create, int *err) |
| 1268 | { | 1243 | { |
| 1269 | struct buffer_head * bh; | 1244 | struct buffer_head *bh; |
| 1270 | 1245 | ||
| 1271 | bh = ext4_getblk(handle, inode, block, create, err); | 1246 | bh = ext4_getblk(handle, inode, block, create, err); |
| 1272 | if (!bh) | 1247 | if (!bh) |
| @@ -1282,13 +1257,13 @@ struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, | |||
| 1282 | return NULL; | 1257 | return NULL; |
| 1283 | } | 1258 | } |
| 1284 | 1259 | ||
| 1285 | static int walk_page_buffers( handle_t *handle, | 1260 | static int walk_page_buffers(handle_t *handle, |
| 1286 | struct buffer_head *head, | 1261 | struct buffer_head *head, |
| 1287 | unsigned from, | 1262 | unsigned from, |
| 1288 | unsigned to, | 1263 | unsigned to, |
| 1289 | int *partial, | 1264 | int *partial, |
| 1290 | int (*fn)( handle_t *handle, | 1265 | int (*fn)(handle_t *handle, |
| 1291 | struct buffer_head *bh)) | 1266 | struct buffer_head *bh)) |
| 1292 | { | 1267 | { |
| 1293 | struct buffer_head *bh; | 1268 | struct buffer_head *bh; |
| 1294 | unsigned block_start, block_end; | 1269 | unsigned block_start, block_end; |
| @@ -1296,9 +1271,9 @@ static int walk_page_buffers( handle_t *handle, | |||
| 1296 | int err, ret = 0; | 1271 | int err, ret = 0; |
| 1297 | struct buffer_head *next; | 1272 | struct buffer_head *next; |
| 1298 | 1273 | ||
| 1299 | for ( bh = head, block_start = 0; | 1274 | for (bh = head, block_start = 0; |
| 1300 | ret == 0 && (bh != head || !block_start); | 1275 | ret == 0 && (bh != head || !block_start); |
| 1301 | block_start = block_end, bh = next) | 1276 | block_start = block_end, bh = next) |
| 1302 | { | 1277 | { |
| 1303 | next = bh->b_this_page; | 1278 | next = bh->b_this_page; |
| 1304 | block_end = block_start + blocksize; | 1279 | block_end = block_start + blocksize; |
| @@ -1351,23 +1326,23 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping, | |||
| 1351 | loff_t pos, unsigned len, unsigned flags, | 1326 | loff_t pos, unsigned len, unsigned flags, |
| 1352 | struct page **pagep, void **fsdata) | 1327 | struct page **pagep, void **fsdata) |
| 1353 | { | 1328 | { |
| 1354 | struct inode *inode = mapping->host; | 1329 | struct inode *inode = mapping->host; |
| 1355 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); | 1330 | int ret, needed_blocks = ext4_writepage_trans_blocks(inode); |
| 1356 | handle_t *handle; | 1331 | handle_t *handle; |
| 1357 | int retries = 0; | 1332 | int retries = 0; |
| 1358 | struct page *page; | 1333 | struct page *page; |
| 1359 | pgoff_t index; | 1334 | pgoff_t index; |
| 1360 | unsigned from, to; | 1335 | unsigned from, to; |
| 1361 | 1336 | ||
| 1362 | index = pos >> PAGE_CACHE_SHIFT; | 1337 | index = pos >> PAGE_CACHE_SHIFT; |
| 1363 | from = pos & (PAGE_CACHE_SIZE - 1); | 1338 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 1364 | to = from + len; | 1339 | to = from + len; |
| 1365 | 1340 | ||
| 1366 | retry: | 1341 | retry: |
| 1367 | handle = ext4_journal_start(inode, needed_blocks); | 1342 | handle = ext4_journal_start(inode, needed_blocks); |
| 1368 | if (IS_ERR(handle)) { | 1343 | if (IS_ERR(handle)) { |
| 1369 | ret = PTR_ERR(handle); | 1344 | ret = PTR_ERR(handle); |
| 1370 | goto out; | 1345 | goto out; |
| 1371 | } | 1346 | } |
| 1372 | 1347 | ||
| 1373 | page = __grab_cache_page(mapping, index); | 1348 | page = __grab_cache_page(mapping, index); |
| @@ -1387,9 +1362,16 @@ retry: | |||
| 1387 | } | 1362 | } |
| 1388 | 1363 | ||
| 1389 | if (ret) { | 1364 | if (ret) { |
| 1390 | unlock_page(page); | 1365 | unlock_page(page); |
| 1391 | ext4_journal_stop(handle); | 1366 | ext4_journal_stop(handle); |
| 1392 | page_cache_release(page); | 1367 | page_cache_release(page); |
| 1368 | /* | ||
| 1369 | * block_write_begin may have instantiated a few blocks | ||
| 1370 | * outside i_size. Trim these off again. Don't need | ||
| 1371 | * i_size_read because we hold i_mutex. | ||
| 1372 | */ | ||
| 1373 | if (pos + len > inode->i_size) | ||
| 1374 | vmtruncate(inode, inode->i_size); | ||
| 1393 | } | 1375 | } |
| 1394 | 1376 | ||
| 1395 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 1377 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| @@ -1426,16 +1408,18 @@ static int ext4_ordered_write_end(struct file *file, | |||
| 1426 | ret = ext4_jbd2_file_inode(handle, inode); | 1408 | ret = ext4_jbd2_file_inode(handle, inode); |
| 1427 | 1409 | ||
| 1428 | if (ret == 0) { | 1410 | if (ret == 0) { |
| 1429 | /* | ||
| 1430 | * generic_write_end() will run mark_inode_dirty() if i_size | ||
| 1431 | * changes. So let's piggyback the i_disksize mark_inode_dirty | ||
| 1432 | * into that. | ||
| 1433 | */ | ||
| 1434 | loff_t new_i_size; | 1411 | loff_t new_i_size; |
| 1435 | 1412 | ||
| 1436 | new_i_size = pos + copied; | 1413 | new_i_size = pos + copied; |
| 1437 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1414 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
| 1438 | EXT4_I(inode)->i_disksize = new_i_size; | 1415 | ext4_update_i_disksize(inode, new_i_size); |
| 1416 | /* We need to mark inode dirty even if | ||
| 1417 | * new_i_size is less that inode->i_size | ||
| 1418 | * bu greater than i_disksize.(hint delalloc) | ||
| 1419 | */ | ||
| 1420 | ext4_mark_inode_dirty(handle, inode); | ||
| 1421 | } | ||
| 1422 | |||
| 1439 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1423 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1440 | page, fsdata); | 1424 | page, fsdata); |
| 1441 | copied = ret2; | 1425 | copied = ret2; |
| @@ -1460,8 +1444,14 @@ static int ext4_writeback_write_end(struct file *file, | |||
| 1460 | loff_t new_i_size; | 1444 | loff_t new_i_size; |
| 1461 | 1445 | ||
| 1462 | new_i_size = pos + copied; | 1446 | new_i_size = pos + copied; |
| 1463 | if (new_i_size > EXT4_I(inode)->i_disksize) | 1447 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
| 1464 | EXT4_I(inode)->i_disksize = new_i_size; | 1448 | ext4_update_i_disksize(inode, new_i_size); |
| 1449 | /* We need to mark inode dirty even if | ||
| 1450 | * new_i_size is less that inode->i_size | ||
| 1451 | * bu greater than i_disksize.(hint delalloc) | ||
| 1452 | */ | ||
| 1453 | ext4_mark_inode_dirty(handle, inode); | ||
| 1454 | } | ||
| 1465 | 1455 | ||
| 1466 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 1456 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| 1467 | page, fsdata); | 1457 | page, fsdata); |
| @@ -1486,6 +1476,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1486 | int ret = 0, ret2; | 1476 | int ret = 0, ret2; |
| 1487 | int partial = 0; | 1477 | int partial = 0; |
| 1488 | unsigned from, to; | 1478 | unsigned from, to; |
| 1479 | loff_t new_i_size; | ||
| 1489 | 1480 | ||
| 1490 | from = pos & (PAGE_CACHE_SIZE - 1); | 1481 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 1491 | to = from + len; | 1482 | to = from + len; |
| @@ -1500,11 +1491,12 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1500 | to, &partial, write_end_fn); | 1491 | to, &partial, write_end_fn); |
| 1501 | if (!partial) | 1492 | if (!partial) |
| 1502 | SetPageUptodate(page); | 1493 | SetPageUptodate(page); |
| 1503 | if (pos+copied > inode->i_size) | 1494 | new_i_size = pos + copied; |
| 1495 | if (new_i_size > inode->i_size) | ||
| 1504 | i_size_write(inode, pos+copied); | 1496 | i_size_write(inode, pos+copied); |
| 1505 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; | 1497 | EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; |
| 1506 | if (inode->i_size > EXT4_I(inode)->i_disksize) { | 1498 | if (new_i_size > EXT4_I(inode)->i_disksize) { |
| 1507 | EXT4_I(inode)->i_disksize = inode->i_size; | 1499 | ext4_update_i_disksize(inode, new_i_size); |
| 1508 | ret2 = ext4_mark_inode_dirty(handle, inode); | 1500 | ret2 = ext4_mark_inode_dirty(handle, inode); |
| 1509 | if (!ret) | 1501 | if (!ret) |
| 1510 | ret = ret2; | 1502 | ret = ret2; |
| @@ -1521,6 +1513,7 @@ static int ext4_journalled_write_end(struct file *file, | |||
| 1521 | 1513 | ||
| 1522 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | 1514 | static int ext4_da_reserve_space(struct inode *inode, int nrblocks) |
| 1523 | { | 1515 | { |
| 1516 | int retries = 0; | ||
| 1524 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | 1517 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); |
| 1525 | unsigned long md_needed, mdblocks, total = 0; | 1518 | unsigned long md_needed, mdblocks, total = 0; |
| 1526 | 1519 | ||
| @@ -1529,6 +1522,7 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
| 1529 | * in order to allocate nrblocks | 1522 | * in order to allocate nrblocks |
| 1530 | * worse case is one extent per block | 1523 | * worse case is one extent per block |
| 1531 | */ | 1524 | */ |
| 1525 | repeat: | ||
| 1532 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); | 1526 | spin_lock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1533 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; | 1527 | total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks; |
| 1534 | mdblocks = ext4_calc_metadata_amount(inode, total); | 1528 | mdblocks = ext4_calc_metadata_amount(inode, total); |
| @@ -1537,13 +1531,14 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks) | |||
| 1537 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; | 1531 | md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks; |
| 1538 | total = md_needed + nrblocks; | 1532 | total = md_needed + nrblocks; |
| 1539 | 1533 | ||
| 1540 | if (ext4_has_free_blocks(sbi, total) < total) { | 1534 | if (ext4_claim_free_blocks(sbi, total)) { |
| 1541 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); | 1535 | spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); |
| 1536 | if (ext4_should_retry_alloc(inode->i_sb, &retries)) { | ||
| 1537 | yield(); | ||
| 1538 | goto repeat; | ||
| 1539 | } | ||
| 1542 | return -ENOSPC; | 1540 | return -ENOSPC; |
| 1543 | } | 1541 | } |
| 1544 | /* reduce fs free blocks counter */ | ||
| 1545 | percpu_counter_sub(&sbi->s_freeblocks_counter, total); | ||
| 1546 | |||
| 1547 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; | 1542 | EXT4_I(inode)->i_reserved_data_blocks += nrblocks; |
| 1548 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; | 1543 | EXT4_I(inode)->i_reserved_meta_blocks = mdblocks; |
| 1549 | 1544 | ||
| @@ -1585,8 +1580,8 @@ static void ext4_da_release_space(struct inode *inode, int to_free) | |||
| 1585 | 1580 | ||
| 1586 | release = to_free + mdb_free; | 1581 | release = to_free + mdb_free; |
| 1587 | 1582 | ||
| 1588 | /* update fs free blocks counter for truncate case */ | 1583 | /* update fs dirty blocks counter for truncate case */ |
| 1589 | percpu_counter_add(&sbi->s_freeblocks_counter, release); | 1584 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, release); |
| 1590 | 1585 | ||
| 1591 | /* update per-inode reservations */ | 1586 | /* update per-inode reservations */ |
| 1592 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); | 1587 | BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks); |
| @@ -1630,6 +1625,7 @@ struct mpage_da_data { | |||
| 1630 | struct writeback_control *wbc; | 1625 | struct writeback_control *wbc; |
| 1631 | int io_done; | 1626 | int io_done; |
| 1632 | long pages_written; | 1627 | long pages_written; |
| 1628 | int retval; | ||
| 1633 | }; | 1629 | }; |
| 1634 | 1630 | ||
| 1635 | /* | 1631 | /* |
| @@ -1652,6 +1648,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
| 1652 | int ret = 0, err, nr_pages, i; | 1648 | int ret = 0, err, nr_pages, i; |
| 1653 | unsigned long index, end; | 1649 | unsigned long index, end; |
| 1654 | struct pagevec pvec; | 1650 | struct pagevec pvec; |
| 1651 | long pages_skipped; | ||
| 1655 | 1652 | ||
| 1656 | BUG_ON(mpd->next_page <= mpd->first_page); | 1653 | BUG_ON(mpd->next_page <= mpd->first_page); |
| 1657 | pagevec_init(&pvec, 0); | 1654 | pagevec_init(&pvec, 0); |
| @@ -1659,20 +1656,30 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd) | |||
| 1659 | end = mpd->next_page - 1; | 1656 | end = mpd->next_page - 1; |
| 1660 | 1657 | ||
| 1661 | while (index <= end) { | 1658 | while (index <= end) { |
| 1662 | /* XXX: optimize tail */ | 1659 | /* |
| 1663 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 1660 | * We can use PAGECACHE_TAG_DIRTY lookup here because |
| 1661 | * even though we have cleared the dirty flag on the page | ||
| 1662 | * We still keep the page in the radix tree with tag | ||
| 1663 | * PAGECACHE_TAG_DIRTY. See clear_page_dirty_for_io. | ||
| 1664 | * The PAGECACHE_TAG_DIRTY is cleared in set_page_writeback | ||
| 1665 | * which is called via the below writepage callback. | ||
| 1666 | */ | ||
| 1667 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | ||
| 1668 | PAGECACHE_TAG_DIRTY, | ||
| 1669 | min(end - index, | ||
| 1670 | (pgoff_t)PAGEVEC_SIZE-1) + 1); | ||
| 1664 | if (nr_pages == 0) | 1671 | if (nr_pages == 0) |
| 1665 | break; | 1672 | break; |
| 1666 | for (i = 0; i < nr_pages; i++) { | 1673 | for (i = 0; i < nr_pages; i++) { |
| 1667 | struct page *page = pvec.pages[i]; | 1674 | struct page *page = pvec.pages[i]; |
| 1668 | 1675 | ||
| 1669 | index = page->index; | 1676 | pages_skipped = mpd->wbc->pages_skipped; |
| 1670 | if (index > end) | ||
| 1671 | break; | ||
| 1672 | index++; | ||
| 1673 | |||
| 1674 | err = mapping->a_ops->writepage(page, mpd->wbc); | 1677 | err = mapping->a_ops->writepage(page, mpd->wbc); |
| 1675 | if (!err) | 1678 | if (!err && (pages_skipped == mpd->wbc->pages_skipped)) |
| 1679 | /* | ||
| 1680 | * have successfully written the page | ||
| 1681 | * without skipping the same | ||
| 1682 | */ | ||
| 1676 | mpd->pages_written++; | 1683 | mpd->pages_written++; |
| 1677 | /* | 1684 | /* |
| 1678 | * In error case, we have to continue because | 1685 | * In error case, we have to continue because |
| @@ -1783,6 +1790,57 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
| 1783 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); | 1790 | unmap_underlying_metadata(bdev, bh->b_blocknr + i); |
| 1784 | } | 1791 | } |
| 1785 | 1792 | ||
| 1793 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | ||
| 1794 | sector_t logical, long blk_cnt) | ||
| 1795 | { | ||
| 1796 | int nr_pages, i; | ||
| 1797 | pgoff_t index, end; | ||
| 1798 | struct pagevec pvec; | ||
| 1799 | struct inode *inode = mpd->inode; | ||
| 1800 | struct address_space *mapping = inode->i_mapping; | ||
| 1801 | |||
| 1802 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1803 | end = (logical + blk_cnt - 1) >> | ||
| 1804 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
| 1805 | while (index <= end) { | ||
| 1806 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | ||
| 1807 | if (nr_pages == 0) | ||
| 1808 | break; | ||
| 1809 | for (i = 0; i < nr_pages; i++) { | ||
| 1810 | struct page *page = pvec.pages[i]; | ||
| 1811 | index = page->index; | ||
| 1812 | if (index > end) | ||
| 1813 | break; | ||
| 1814 | index++; | ||
| 1815 | |||
| 1816 | BUG_ON(!PageLocked(page)); | ||
| 1817 | BUG_ON(PageWriteback(page)); | ||
| 1818 | block_invalidatepage(page, 0); | ||
| 1819 | ClearPageUptodate(page); | ||
| 1820 | unlock_page(page); | ||
| 1821 | } | ||
| 1822 | } | ||
| 1823 | return; | ||
| 1824 | } | ||
| 1825 | |||
| 1826 | static void ext4_print_free_blocks(struct inode *inode) | ||
| 1827 | { | ||
| 1828 | struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); | ||
| 1829 | printk(KERN_EMERG "Total free blocks count %lld\n", | ||
| 1830 | ext4_count_free_blocks(inode->i_sb)); | ||
| 1831 | printk(KERN_EMERG "Free/Dirty block details\n"); | ||
| 1832 | printk(KERN_EMERG "free_blocks=%lld\n", | ||
| 1833 | percpu_counter_sum(&sbi->s_freeblocks_counter)); | ||
| 1834 | printk(KERN_EMERG "dirty_blocks=%lld\n", | ||
| 1835 | percpu_counter_sum(&sbi->s_dirtyblocks_counter)); | ||
| 1836 | printk(KERN_EMERG "Block reservation details\n"); | ||
| 1837 | printk(KERN_EMERG "i_reserved_data_blocks=%lu\n", | ||
| 1838 | EXT4_I(inode)->i_reserved_data_blocks); | ||
| 1839 | printk(KERN_EMERG "i_reserved_meta_blocks=%lu\n", | ||
| 1840 | EXT4_I(inode)->i_reserved_meta_blocks); | ||
| 1841 | return; | ||
| 1842 | } | ||
| 1843 | |||
| 1786 | /* | 1844 | /* |
| 1787 | * mpage_da_map_blocks - go through given space | 1845 | * mpage_da_map_blocks - go through given space |
| 1788 | * | 1846 | * |
| @@ -1792,32 +1850,69 @@ static inline void __unmap_underlying_blocks(struct inode *inode, | |||
| 1792 | * The function skips space we know is already mapped to disk blocks. | 1850 | * The function skips space we know is already mapped to disk blocks. |
| 1793 | * | 1851 | * |
| 1794 | */ | 1852 | */ |
| 1795 | static void mpage_da_map_blocks(struct mpage_da_data *mpd) | 1853 | static int mpage_da_map_blocks(struct mpage_da_data *mpd) |
| 1796 | { | 1854 | { |
| 1797 | int err = 0; | 1855 | int err = 0; |
| 1798 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1799 | sector_t next = lbh->b_blocknr; | ||
| 1800 | struct buffer_head new; | 1856 | struct buffer_head new; |
| 1857 | struct buffer_head *lbh = &mpd->lbh; | ||
| 1858 | sector_t next; | ||
| 1801 | 1859 | ||
| 1802 | /* | 1860 | /* |
| 1803 | * We consider only non-mapped and non-allocated blocks | 1861 | * We consider only non-mapped and non-allocated blocks |
| 1804 | */ | 1862 | */ |
| 1805 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) | 1863 | if (buffer_mapped(lbh) && !buffer_delay(lbh)) |
| 1806 | return; | 1864 | return 0; |
| 1807 | |||
| 1808 | new.b_state = lbh->b_state; | 1865 | new.b_state = lbh->b_state; |
| 1809 | new.b_blocknr = 0; | 1866 | new.b_blocknr = 0; |
| 1810 | new.b_size = lbh->b_size; | 1867 | new.b_size = lbh->b_size; |
| 1811 | 1868 | next = lbh->b_blocknr; | |
| 1812 | /* | 1869 | /* |
| 1813 | * If we didn't accumulate anything | 1870 | * If we didn't accumulate anything |
| 1814 | * to write simply return | 1871 | * to write simply return |
| 1815 | */ | 1872 | */ |
| 1816 | if (!new.b_size) | 1873 | if (!new.b_size) |
| 1817 | return; | 1874 | return 0; |
| 1818 | err = mpd->get_block(mpd->inode, next, &new, 1); | 1875 | err = mpd->get_block(mpd->inode, next, &new, 1); |
| 1819 | if (err) | 1876 | if (err) { |
| 1820 | return; | 1877 | |
| 1878 | /* If get block returns with error | ||
| 1879 | * we simply return. Later writepage | ||
| 1880 | * will redirty the page and writepages | ||
| 1881 | * will find the dirty page again | ||
| 1882 | */ | ||
| 1883 | if (err == -EAGAIN) | ||
| 1884 | return 0; | ||
| 1885 | |||
| 1886 | if (err == -ENOSPC && | ||
| 1887 | ext4_count_free_blocks(mpd->inode->i_sb)) { | ||
| 1888 | mpd->retval = err; | ||
| 1889 | return 0; | ||
| 1890 | } | ||
| 1891 | |||
| 1892 | /* | ||
| 1893 | * get block failure will cause us | ||
| 1894 | * to loop in writepages. Because | ||
| 1895 | * a_ops->writepage won't be able to | ||
| 1896 | * make progress. The page will be redirtied | ||
| 1897 | * by writepage and writepages will again | ||
| 1898 | * try to write the same. | ||
| 1899 | */ | ||
| 1900 | printk(KERN_EMERG "%s block allocation failed for inode %lu " | ||
| 1901 | "at logical offset %llu with max blocks " | ||
| 1902 | "%zd with error %d\n", | ||
| 1903 | __func__, mpd->inode->i_ino, | ||
| 1904 | (unsigned long long)next, | ||
| 1905 | lbh->b_size >> mpd->inode->i_blkbits, err); | ||
| 1906 | printk(KERN_EMERG "This should not happen.!! " | ||
| 1907 | "Data will be lost\n"); | ||
| 1908 | if (err == -ENOSPC) { | ||
| 1909 | ext4_print_free_blocks(mpd->inode); | ||
| 1910 | } | ||
| 1911 | /* invlaidate all the pages */ | ||
| 1912 | ext4_da_block_invalidatepages(mpd, next, | ||
| 1913 | lbh->b_size >> mpd->inode->i_blkbits); | ||
| 1914 | return err; | ||
| 1915 | } | ||
| 1821 | BUG_ON(new.b_size == 0); | 1916 | BUG_ON(new.b_size == 0); |
| 1822 | 1917 | ||
| 1823 | if (buffer_new(&new)) | 1918 | if (buffer_new(&new)) |
| @@ -1830,7 +1925,7 @@ static void mpage_da_map_blocks(struct mpage_da_data *mpd) | |||
| 1830 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) | 1925 | if (buffer_delay(lbh) || buffer_unwritten(lbh)) |
| 1831 | mpage_put_bnr_to_bhs(mpd, next, &new); | 1926 | mpage_put_bnr_to_bhs(mpd, next, &new); |
| 1832 | 1927 | ||
| 1833 | return; | 1928 | return 0; |
| 1834 | } | 1929 | } |
| 1835 | 1930 | ||
| 1836 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ | 1931 | #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \ |
| @@ -1899,8 +1994,8 @@ flush_it: | |||
| 1899 | * We couldn't merge the block to our extent, so we | 1994 | * We couldn't merge the block to our extent, so we |
| 1900 | * need to flush current extent and start new one | 1995 | * need to flush current extent and start new one |
| 1901 | */ | 1996 | */ |
| 1902 | mpage_da_map_blocks(mpd); | 1997 | if (mpage_da_map_blocks(mpd) == 0) |
| 1903 | mpage_da_submit_io(mpd); | 1998 | mpage_da_submit_io(mpd); |
| 1904 | mpd->io_done = 1; | 1999 | mpd->io_done = 1; |
| 1905 | return; | 2000 | return; |
| 1906 | } | 2001 | } |
| @@ -1942,8 +2037,8 @@ static int __mpage_da_writepage(struct page *page, | |||
| 1942 | * and start IO on them using writepage() | 2037 | * and start IO on them using writepage() |
| 1943 | */ | 2038 | */ |
| 1944 | if (mpd->next_page != mpd->first_page) { | 2039 | if (mpd->next_page != mpd->first_page) { |
| 1945 | mpage_da_map_blocks(mpd); | 2040 | if (mpage_da_map_blocks(mpd) == 0) |
| 1946 | mpage_da_submit_io(mpd); | 2041 | mpage_da_submit_io(mpd); |
| 1947 | /* | 2042 | /* |
| 1948 | * skip rest of the page in the page_vec | 2043 | * skip rest of the page in the page_vec |
| 1949 | */ | 2044 | */ |
| @@ -2018,39 +2113,34 @@ static int __mpage_da_writepage(struct page *page, | |||
| 2018 | */ | 2113 | */ |
| 2019 | static int mpage_da_writepages(struct address_space *mapping, | 2114 | static int mpage_da_writepages(struct address_space *mapping, |
| 2020 | struct writeback_control *wbc, | 2115 | struct writeback_control *wbc, |
| 2021 | get_block_t get_block) | 2116 | struct mpage_da_data *mpd) |
| 2022 | { | 2117 | { |
| 2023 | struct mpage_da_data mpd; | ||
| 2024 | long to_write; | ||
| 2025 | int ret; | 2118 | int ret; |
| 2026 | 2119 | ||
| 2027 | if (!get_block) | 2120 | if (!mpd->get_block) |
| 2028 | return generic_writepages(mapping, wbc); | 2121 | return generic_writepages(mapping, wbc); |
| 2029 | 2122 | ||
| 2030 | mpd.wbc = wbc; | 2123 | mpd->lbh.b_size = 0; |
| 2031 | mpd.inode = mapping->host; | 2124 | mpd->lbh.b_state = 0; |
| 2032 | mpd.lbh.b_size = 0; | 2125 | mpd->lbh.b_blocknr = 0; |
| 2033 | mpd.lbh.b_state = 0; | 2126 | mpd->first_page = 0; |
| 2034 | mpd.lbh.b_blocknr = 0; | 2127 | mpd->next_page = 0; |
| 2035 | mpd.first_page = 0; | 2128 | mpd->io_done = 0; |
| 2036 | mpd.next_page = 0; | 2129 | mpd->pages_written = 0; |
| 2037 | mpd.get_block = get_block; | 2130 | mpd->retval = 0; |
| 2038 | mpd.io_done = 0; | ||
| 2039 | mpd.pages_written = 0; | ||
| 2040 | |||
| 2041 | to_write = wbc->nr_to_write; | ||
| 2042 | |||
| 2043 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, &mpd); | ||
| 2044 | 2131 | ||
| 2132 | ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, mpd); | ||
| 2045 | /* | 2133 | /* |
| 2046 | * Handle last extent of pages | 2134 | * Handle last extent of pages |
| 2047 | */ | 2135 | */ |
| 2048 | if (!mpd.io_done && mpd.next_page != mpd.first_page) { | 2136 | if (!mpd->io_done && mpd->next_page != mpd->first_page) { |
| 2049 | mpage_da_map_blocks(&mpd); | 2137 | if (mpage_da_map_blocks(mpd) == 0) |
| 2050 | mpage_da_submit_io(&mpd); | 2138 | mpage_da_submit_io(mpd); |
| 2051 | } | ||
| 2052 | 2139 | ||
| 2053 | wbc->nr_to_write = to_write - mpd.pages_written; | 2140 | mpd->io_done = 1; |
| 2141 | ret = MPAGE_DA_EXTENT_TAIL; | ||
| 2142 | } | ||
| 2143 | wbc->nr_to_write -= mpd->pages_written; | ||
| 2054 | return ret; | 2144 | return ret; |
| 2055 | } | 2145 | } |
| 2056 | 2146 | ||
| @@ -2103,18 +2193,24 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
| 2103 | handle_t *handle = NULL; | 2193 | handle_t *handle = NULL; |
| 2104 | 2194 | ||
| 2105 | handle = ext4_journal_current_handle(); | 2195 | handle = ext4_journal_current_handle(); |
| 2106 | if (!handle) { | 2196 | BUG_ON(!handle); |
| 2107 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | 2197 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, |
| 2108 | bh_result, 0, 0, 0); | 2198 | bh_result, create, 0, EXT4_DELALLOC_RSVED); |
| 2109 | BUG_ON(!ret); | ||
| 2110 | } else { | ||
| 2111 | ret = ext4_get_blocks_wrap(handle, inode, iblock, max_blocks, | ||
| 2112 | bh_result, create, 0, EXT4_DELALLOC_RSVED); | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | if (ret > 0) { | 2199 | if (ret > 0) { |
| 2200 | |||
| 2116 | bh_result->b_size = (ret << inode->i_blkbits); | 2201 | bh_result->b_size = (ret << inode->i_blkbits); |
| 2117 | 2202 | ||
| 2203 | if (ext4_should_order_data(inode)) { | ||
| 2204 | int retval; | ||
| 2205 | retval = ext4_jbd2_file_inode(handle, inode); | ||
| 2206 | if (retval) | ||
| 2207 | /* | ||
| 2208 | * Failed to add inode for ordered | ||
| 2209 | * mode. Don't update file size | ||
| 2210 | */ | ||
| 2211 | return retval; | ||
| 2212 | } | ||
| 2213 | |||
| 2118 | /* | 2214 | /* |
| 2119 | * Update on-disk size along with block allocation | 2215 | * Update on-disk size along with block allocation |
| 2120 | * we don't use 'extend_disksize' as size may change | 2216 | * we don't use 'extend_disksize' as size may change |
| @@ -2124,18 +2220,9 @@ static int ext4_da_get_block_write(struct inode *inode, sector_t iblock, | |||
| 2124 | if (disksize > i_size_read(inode)) | 2220 | if (disksize > i_size_read(inode)) |
| 2125 | disksize = i_size_read(inode); | 2221 | disksize = i_size_read(inode); |
| 2126 | if (disksize > EXT4_I(inode)->i_disksize) { | 2222 | if (disksize > EXT4_I(inode)->i_disksize) { |
| 2127 | /* | 2223 | ext4_update_i_disksize(inode, disksize); |
| 2128 | * XXX: replace with spinlock if seen contended -bzzz | 2224 | ret = ext4_mark_inode_dirty(handle, inode); |
| 2129 | */ | 2225 | return ret; |
| 2130 | down_write(&EXT4_I(inode)->i_data_sem); | ||
| 2131 | if (disksize > EXT4_I(inode)->i_disksize) | ||
| 2132 | EXT4_I(inode)->i_disksize = disksize; | ||
| 2133 | up_write(&EXT4_I(inode)->i_data_sem); | ||
| 2134 | |||
| 2135 | if (EXT4_I(inode)->i_disksize == disksize) { | ||
| 2136 | ret = ext4_mark_inode_dirty(handle, inode); | ||
| 2137 | return ret; | ||
| 2138 | } | ||
| 2139 | } | 2226 | } |
| 2140 | ret = 0; | 2227 | ret = 0; |
| 2141 | } | 2228 | } |
| @@ -2282,11 +2369,14 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
| 2282 | static int ext4_da_writepages(struct address_space *mapping, | 2369 | static int ext4_da_writepages(struct address_space *mapping, |
| 2283 | struct writeback_control *wbc) | 2370 | struct writeback_control *wbc) |
| 2284 | { | 2371 | { |
| 2372 | pgoff_t index; | ||
| 2373 | int range_whole = 0; | ||
| 2285 | handle_t *handle = NULL; | 2374 | handle_t *handle = NULL; |
| 2286 | loff_t range_start = 0; | 2375 | struct mpage_da_data mpd; |
| 2287 | struct inode *inode = mapping->host; | 2376 | struct inode *inode = mapping->host; |
| 2377 | int no_nrwrite_index_update; | ||
| 2378 | long pages_written = 0, pages_skipped; | ||
| 2288 | int needed_blocks, ret = 0, nr_to_writebump = 0; | 2379 | int needed_blocks, ret = 0, nr_to_writebump = 0; |
| 2289 | long to_write, pages_skipped = 0; | ||
| 2290 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); | 2380 | struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb); |
| 2291 | 2381 | ||
| 2292 | /* | 2382 | /* |
| @@ -2306,20 +2396,26 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
| 2306 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; | 2396 | nr_to_writebump = sbi->s_mb_stream_request - wbc->nr_to_write; |
| 2307 | wbc->nr_to_write = sbi->s_mb_stream_request; | 2397 | wbc->nr_to_write = sbi->s_mb_stream_request; |
| 2308 | } | 2398 | } |
| 2399 | if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) | ||
| 2400 | range_whole = 1; | ||
| 2309 | 2401 | ||
| 2310 | if (!wbc->range_cyclic) | 2402 | if (wbc->range_cyclic) |
| 2311 | /* | 2403 | index = mapping->writeback_index; |
| 2312 | * If range_cyclic is not set force range_cont | 2404 | else |
| 2313 | * and save the old writeback_index | 2405 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
| 2314 | */ | 2406 | |
| 2315 | wbc->range_cont = 1; | 2407 | mpd.wbc = wbc; |
| 2408 | mpd.inode = mapping->host; | ||
| 2316 | 2409 | ||
| 2317 | range_start = wbc->range_start; | 2410 | /* |
| 2411 | * we don't want write_cache_pages to update | ||
| 2412 | * nr_to_write and writeback_index | ||
| 2413 | */ | ||
| 2414 | no_nrwrite_index_update = wbc->no_nrwrite_index_update; | ||
| 2415 | wbc->no_nrwrite_index_update = 1; | ||
| 2318 | pages_skipped = wbc->pages_skipped; | 2416 | pages_skipped = wbc->pages_skipped; |
| 2319 | 2417 | ||
| 2320 | restart_loop: | 2418 | while (!ret && wbc->nr_to_write > 0) { |
| 2321 | to_write = wbc->nr_to_write; | ||
| 2322 | while (!ret && to_write > 0) { | ||
| 2323 | 2419 | ||
| 2324 | /* | 2420 | /* |
| 2325 | * we insert one extent at a time. So we need | 2421 | * we insert one extent at a time. So we need |
| @@ -2340,57 +2436,83 @@ restart_loop: | |||
| 2340 | dump_stack(); | 2436 | dump_stack(); |
| 2341 | goto out_writepages; | 2437 | goto out_writepages; |
| 2342 | } | 2438 | } |
| 2343 | if (ext4_should_order_data(inode)) { | 2439 | mpd.get_block = ext4_da_get_block_write; |
| 2344 | /* | 2440 | ret = mpage_da_writepages(mapping, wbc, &mpd); |
| 2345 | * With ordered mode we need to add | ||
| 2346 | * the inode to the journal handl | ||
| 2347 | * when we do block allocation. | ||
| 2348 | */ | ||
| 2349 | ret = ext4_jbd2_file_inode(handle, inode); | ||
| 2350 | if (ret) { | ||
| 2351 | ext4_journal_stop(handle); | ||
| 2352 | goto out_writepages; | ||
| 2353 | } | ||
| 2354 | } | ||
| 2355 | 2441 | ||
| 2356 | to_write -= wbc->nr_to_write; | ||
| 2357 | ret = mpage_da_writepages(mapping, wbc, | ||
| 2358 | ext4_da_get_block_write); | ||
| 2359 | ext4_journal_stop(handle); | 2442 | ext4_journal_stop(handle); |
| 2360 | if (ret == MPAGE_DA_EXTENT_TAIL) { | 2443 | |
| 2444 | if (mpd.retval == -ENOSPC) { | ||
| 2445 | /* commit the transaction which would | ||
| 2446 | * free blocks released in the transaction | ||
| 2447 | * and try again | ||
| 2448 | */ | ||
| 2449 | jbd2_journal_force_commit_nested(sbi->s_journal); | ||
| 2450 | wbc->pages_skipped = pages_skipped; | ||
| 2451 | ret = 0; | ||
| 2452 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | ||
| 2361 | /* | 2453 | /* |
| 2362 | * got one extent now try with | 2454 | * got one extent now try with |
| 2363 | * rest of the pages | 2455 | * rest of the pages |
| 2364 | */ | 2456 | */ |
| 2365 | to_write += wbc->nr_to_write; | 2457 | pages_written += mpd.pages_written; |
| 2458 | wbc->pages_skipped = pages_skipped; | ||
| 2366 | ret = 0; | 2459 | ret = 0; |
| 2367 | } else if (wbc->nr_to_write) { | 2460 | } else if (wbc->nr_to_write) |
| 2368 | /* | 2461 | /* |
| 2369 | * There is no more writeout needed | 2462 | * There is no more writeout needed |
| 2370 | * or we requested for a noblocking writeout | 2463 | * or we requested for a noblocking writeout |
| 2371 | * and we found the device congested | 2464 | * and we found the device congested |
| 2372 | */ | 2465 | */ |
| 2373 | to_write += wbc->nr_to_write; | ||
| 2374 | break; | 2466 | break; |
| 2375 | } | ||
| 2376 | wbc->nr_to_write = to_write; | ||
| 2377 | } | ||
| 2378 | |||
| 2379 | if (wbc->range_cont && (pages_skipped != wbc->pages_skipped)) { | ||
| 2380 | /* We skipped pages in this loop */ | ||
| 2381 | wbc->range_start = range_start; | ||
| 2382 | wbc->nr_to_write = to_write + | ||
| 2383 | wbc->pages_skipped - pages_skipped; | ||
| 2384 | wbc->pages_skipped = pages_skipped; | ||
| 2385 | goto restart_loop; | ||
| 2386 | } | 2467 | } |
| 2468 | if (pages_skipped != wbc->pages_skipped) | ||
| 2469 | printk(KERN_EMERG "This should not happen leaving %s " | ||
| 2470 | "with nr_to_write = %ld ret = %d\n", | ||
| 2471 | __func__, wbc->nr_to_write, ret); | ||
| 2472 | |||
| 2473 | /* Update index */ | ||
| 2474 | index += pages_written; | ||
| 2475 | if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) | ||
| 2476 | /* | ||
| 2477 | * set the writeback_index so that range_cyclic | ||
| 2478 | * mode will write it back later | ||
| 2479 | */ | ||
| 2480 | mapping->writeback_index = index; | ||
| 2387 | 2481 | ||
| 2388 | out_writepages: | 2482 | out_writepages: |
| 2389 | wbc->nr_to_write = to_write - nr_to_writebump; | 2483 | if (!no_nrwrite_index_update) |
| 2390 | wbc->range_start = range_start; | 2484 | wbc->no_nrwrite_index_update = 0; |
| 2485 | wbc->nr_to_write -= nr_to_writebump; | ||
| 2391 | return ret; | 2486 | return ret; |
| 2392 | } | 2487 | } |
| 2393 | 2488 | ||
| 2489 | #define FALL_BACK_TO_NONDELALLOC 1 | ||
| 2490 | static int ext4_nonda_switch(struct super_block *sb) | ||
| 2491 | { | ||
| 2492 | s64 free_blocks, dirty_blocks; | ||
| 2493 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 2494 | |||
| 2495 | /* | ||
| 2496 | * switch to non delalloc mode if we are running low | ||
| 2497 | * on free block. The free block accounting via percpu | ||
| 2498 | * counters can get slightly wrong with FBC_BATCH getting | ||
| 2499 | * accumulated on each CPU without updating global counters | ||
| 2500 | * Delalloc need an accurate free block accounting. So switch | ||
| 2501 | * to non delalloc when we are near to error range. | ||
| 2502 | */ | ||
| 2503 | free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter); | ||
| 2504 | dirty_blocks = percpu_counter_read_positive(&sbi->s_dirtyblocks_counter); | ||
| 2505 | if (2 * free_blocks < 3 * dirty_blocks || | ||
| 2506 | free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) { | ||
| 2507 | /* | ||
| 2508 | * free block count is less that 150% of dirty blocks | ||
| 2509 | * or free blocks is less that watermark | ||
| 2510 | */ | ||
| 2511 | return 1; | ||
| 2512 | } | ||
| 2513 | return 0; | ||
| 2514 | } | ||
| 2515 | |||
| 2394 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | 2516 | static int ext4_da_write_begin(struct file *file, struct address_space *mapping, |
| 2395 | loff_t pos, unsigned len, unsigned flags, | 2517 | loff_t pos, unsigned len, unsigned flags, |
| 2396 | struct page **pagep, void **fsdata) | 2518 | struct page **pagep, void **fsdata) |
| @@ -2406,6 +2528,12 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping, | |||
| 2406 | from = pos & (PAGE_CACHE_SIZE - 1); | 2528 | from = pos & (PAGE_CACHE_SIZE - 1); |
| 2407 | to = from + len; | 2529 | to = from + len; |
| 2408 | 2530 | ||
| 2531 | if (ext4_nonda_switch(inode->i_sb)) { | ||
| 2532 | *fsdata = (void *)FALL_BACK_TO_NONDELALLOC; | ||
| 2533 | return ext4_write_begin(file, mapping, pos, | ||
| 2534 | len, flags, pagep, fsdata); | ||
| 2535 | } | ||
| 2536 | *fsdata = (void *)0; | ||
| 2409 | retry: | 2537 | retry: |
| 2410 | /* | 2538 | /* |
| 2411 | * With delayed allocation, we don't log the i_disksize update | 2539 | * With delayed allocation, we don't log the i_disksize update |
| @@ -2433,6 +2561,13 @@ retry: | |||
| 2433 | unlock_page(page); | 2561 | unlock_page(page); |
| 2434 | ext4_journal_stop(handle); | 2562 | ext4_journal_stop(handle); |
| 2435 | page_cache_release(page); | 2563 | page_cache_release(page); |
| 2564 | /* | ||
| 2565 | * block_write_begin may have instantiated a few blocks | ||
| 2566 | * outside i_size. Trim these off again. Don't need | ||
| 2567 | * i_size_read because we hold i_mutex. | ||
| 2568 | */ | ||
| 2569 | if (pos + len > inode->i_size) | ||
| 2570 | vmtruncate(inode, inode->i_size); | ||
| 2436 | } | 2571 | } |
| 2437 | 2572 | ||
| 2438 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) | 2573 | if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) |
| @@ -2456,7 +2591,7 @@ static int ext4_da_should_update_i_disksize(struct page *page, | |||
| 2456 | bh = page_buffers(page); | 2591 | bh = page_buffers(page); |
| 2457 | idx = offset >> inode->i_blkbits; | 2592 | idx = offset >> inode->i_blkbits; |
| 2458 | 2593 | ||
| 2459 | for (i=0; i < idx; i++) | 2594 | for (i = 0; i < idx; i++) |
| 2460 | bh = bh->b_this_page; | 2595 | bh = bh->b_this_page; |
| 2461 | 2596 | ||
| 2462 | if (!buffer_mapped(bh) || (buffer_delay(bh))) | 2597 | if (!buffer_mapped(bh) || (buffer_delay(bh))) |
| @@ -2474,9 +2609,22 @@ static int ext4_da_write_end(struct file *file, | |||
| 2474 | handle_t *handle = ext4_journal_current_handle(); | 2609 | handle_t *handle = ext4_journal_current_handle(); |
| 2475 | loff_t new_i_size; | 2610 | loff_t new_i_size; |
| 2476 | unsigned long start, end; | 2611 | unsigned long start, end; |
| 2612 | int write_mode = (int)(unsigned long)fsdata; | ||
| 2613 | |||
| 2614 | if (write_mode == FALL_BACK_TO_NONDELALLOC) { | ||
| 2615 | if (ext4_should_order_data(inode)) { | ||
| 2616 | return ext4_ordered_write_end(file, mapping, pos, | ||
| 2617 | len, copied, page, fsdata); | ||
| 2618 | } else if (ext4_should_writeback_data(inode)) { | ||
| 2619 | return ext4_writeback_write_end(file, mapping, pos, | ||
| 2620 | len, copied, page, fsdata); | ||
| 2621 | } else { | ||
| 2622 | BUG(); | ||
| 2623 | } | ||
| 2624 | } | ||
| 2477 | 2625 | ||
| 2478 | start = pos & (PAGE_CACHE_SIZE - 1); | 2626 | start = pos & (PAGE_CACHE_SIZE - 1); |
| 2479 | end = start + copied -1; | 2627 | end = start + copied - 1; |
| 2480 | 2628 | ||
| 2481 | /* | 2629 | /* |
| 2482 | * generic_write_end() will run mark_inode_dirty() if i_size | 2630 | * generic_write_end() will run mark_inode_dirty() if i_size |
| @@ -2500,6 +2648,11 @@ static int ext4_da_write_end(struct file *file, | |||
| 2500 | EXT4_I(inode)->i_disksize = new_i_size; | 2648 | EXT4_I(inode)->i_disksize = new_i_size; |
| 2501 | } | 2649 | } |
| 2502 | up_write(&EXT4_I(inode)->i_data_sem); | 2650 | up_write(&EXT4_I(inode)->i_data_sem); |
| 2651 | /* We need to mark inode dirty even if | ||
| 2652 | * new_i_size is less that inode->i_size | ||
| 2653 | * bu greater than i_disksize.(hint delalloc) | ||
| 2654 | */ | ||
| 2655 | ext4_mark_inode_dirty(handle, inode); | ||
| 2503 | } | 2656 | } |
| 2504 | } | 2657 | } |
| 2505 | ret2 = generic_write_end(file, mapping, pos, len, copied, | 2658 | ret2 = generic_write_end(file, mapping, pos, len, copied, |
| @@ -2591,7 +2744,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
| 2591 | return 0; | 2744 | return 0; |
| 2592 | } | 2745 | } |
| 2593 | 2746 | ||
| 2594 | return generic_block_bmap(mapping,block,ext4_get_block); | 2747 | return generic_block_bmap(mapping, block, ext4_get_block); |
| 2595 | } | 2748 | } |
| 2596 | 2749 | ||
| 2597 | static int bget_one(handle_t *handle, struct buffer_head *bh) | 2750 | static int bget_one(handle_t *handle, struct buffer_head *bh) |
| @@ -3197,7 +3350,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
| 3197 | if (!partial->key && *partial->p) | 3350 | if (!partial->key && *partial->p) |
| 3198 | /* Writer: end */ | 3351 | /* Writer: end */ |
| 3199 | goto no_top; | 3352 | goto no_top; |
| 3200 | for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--) | 3353 | for (p = partial; (p > chain) && all_zeroes((__le32 *) p->bh->b_data, p->p); p--) |
| 3201 | ; | 3354 | ; |
| 3202 | /* | 3355 | /* |
| 3203 | * OK, we've found the last block that must survive. The rest of our | 3356 | * OK, we've found the last block that must survive. The rest of our |
| @@ -3216,7 +3369,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth, | |||
| 3216 | } | 3369 | } |
| 3217 | /* Writer: end */ | 3370 | /* Writer: end */ |
| 3218 | 3371 | ||
| 3219 | while(partial > p) { | 3372 | while (partial > p) { |
| 3220 | brelse(partial->bh); | 3373 | brelse(partial->bh); |
| 3221 | partial--; | 3374 | partial--; |
| 3222 | } | 3375 | } |
| @@ -3408,9 +3561,9 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
| 3408 | /* This zaps the entire block. Bottom up. */ | 3561 | /* This zaps the entire block. Bottom up. */ |
| 3409 | BUFFER_TRACE(bh, "free child branches"); | 3562 | BUFFER_TRACE(bh, "free child branches"); |
| 3410 | ext4_free_branches(handle, inode, bh, | 3563 | ext4_free_branches(handle, inode, bh, |
| 3411 | (__le32*)bh->b_data, | 3564 | (__le32 *) bh->b_data, |
| 3412 | (__le32*)bh->b_data + addr_per_block, | 3565 | (__le32 *) bh->b_data + addr_per_block, |
| 3413 | depth); | 3566 | depth); |
| 3414 | 3567 | ||
| 3415 | /* | 3568 | /* |
| 3416 | * We've probably journalled the indirect block several | 3569 | * We've probably journalled the indirect block several |
| @@ -3578,7 +3731,7 @@ void ext4_truncate(struct inode *inode) | |||
| 3578 | */ | 3731 | */ |
| 3579 | down_write(&ei->i_data_sem); | 3732 | down_write(&ei->i_data_sem); |
| 3580 | 3733 | ||
| 3581 | ext4_discard_reservation(inode); | 3734 | ext4_discard_preallocations(inode); |
| 3582 | 3735 | ||
| 3583 | /* | 3736 | /* |
| 3584 | * The orphan list entry will now protect us from any crash which | 3737 | * The orphan list entry will now protect us from any crash which |
| @@ -3673,41 +3826,6 @@ out_stop: | |||
| 3673 | ext4_journal_stop(handle); | 3826 | ext4_journal_stop(handle); |
| 3674 | } | 3827 | } |
| 3675 | 3828 | ||
| 3676 | static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | ||
| 3677 | unsigned long ino, struct ext4_iloc *iloc) | ||
| 3678 | { | ||
| 3679 | ext4_group_t block_group; | ||
| 3680 | unsigned long offset; | ||
| 3681 | ext4_fsblk_t block; | ||
| 3682 | struct ext4_group_desc *gdp; | ||
| 3683 | |||
| 3684 | if (!ext4_valid_inum(sb, ino)) { | ||
| 3685 | /* | ||
| 3686 | * This error is already checked for in namei.c unless we are | ||
| 3687 | * looking at an NFS filehandle, in which case no error | ||
| 3688 | * report is needed | ||
| 3689 | */ | ||
| 3690 | return 0; | ||
| 3691 | } | ||
| 3692 | |||
| 3693 | block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); | ||
| 3694 | gdp = ext4_get_group_desc(sb, block_group, NULL); | ||
| 3695 | if (!gdp) | ||
| 3696 | return 0; | ||
| 3697 | |||
| 3698 | /* | ||
| 3699 | * Figure out the offset within the block group inode table | ||
| 3700 | */ | ||
| 3701 | offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) * | ||
| 3702 | EXT4_INODE_SIZE(sb); | ||
| 3703 | block = ext4_inode_table(sb, gdp) + | ||
| 3704 | (offset >> EXT4_BLOCK_SIZE_BITS(sb)); | ||
| 3705 | |||
| 3706 | iloc->block_group = block_group; | ||
| 3707 | iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1); | ||
| 3708 | return block; | ||
| 3709 | } | ||
| 3710 | |||
| 3711 | /* | 3829 | /* |
| 3712 | * ext4_get_inode_loc returns with an extra refcount against the inode's | 3830 | * ext4_get_inode_loc returns with an extra refcount against the inode's |
| 3713 | * underlying buffer_head on success. If 'in_mem' is true, we have all | 3831 | * underlying buffer_head on success. If 'in_mem' is true, we have all |
| @@ -3717,19 +3835,35 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb, | |||
| 3717 | static int __ext4_get_inode_loc(struct inode *inode, | 3835 | static int __ext4_get_inode_loc(struct inode *inode, |
| 3718 | struct ext4_iloc *iloc, int in_mem) | 3836 | struct ext4_iloc *iloc, int in_mem) |
| 3719 | { | 3837 | { |
| 3720 | ext4_fsblk_t block; | 3838 | struct ext4_group_desc *gdp; |
| 3721 | struct buffer_head *bh; | 3839 | struct buffer_head *bh; |
| 3840 | struct super_block *sb = inode->i_sb; | ||
| 3841 | ext4_fsblk_t block; | ||
| 3842 | int inodes_per_block, inode_offset; | ||
| 3843 | |||
| 3844 | iloc->bh = 0; | ||
| 3845 | if (!ext4_valid_inum(sb, inode->i_ino)) | ||
| 3846 | return -EIO; | ||
| 3722 | 3847 | ||
| 3723 | block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc); | 3848 | iloc->block_group = (inode->i_ino - 1) / EXT4_INODES_PER_GROUP(sb); |
| 3724 | if (!block) | 3849 | gdp = ext4_get_group_desc(sb, iloc->block_group, NULL); |
| 3850 | if (!gdp) | ||
| 3725 | return -EIO; | 3851 | return -EIO; |
| 3726 | 3852 | ||
| 3727 | bh = sb_getblk(inode->i_sb, block); | 3853 | /* |
| 3854 | * Figure out the offset within the block group inode table | ||
| 3855 | */ | ||
| 3856 | inodes_per_block = (EXT4_BLOCK_SIZE(sb) / EXT4_INODE_SIZE(sb)); | ||
| 3857 | inode_offset = ((inode->i_ino - 1) % | ||
| 3858 | EXT4_INODES_PER_GROUP(sb)); | ||
| 3859 | block = ext4_inode_table(sb, gdp) + (inode_offset / inodes_per_block); | ||
| 3860 | iloc->offset = (inode_offset % inodes_per_block) * EXT4_INODE_SIZE(sb); | ||
| 3861 | |||
| 3862 | bh = sb_getblk(sb, block); | ||
| 3728 | if (!bh) { | 3863 | if (!bh) { |
| 3729 | ext4_error (inode->i_sb, "ext4_get_inode_loc", | 3864 | ext4_error(sb, "ext4_get_inode_loc", "unable to read " |
| 3730 | "unable to read inode block - " | 3865 | "inode block - inode=%lu, block=%llu", |
| 3731 | "inode=%lu, block=%llu", | 3866 | inode->i_ino, block); |
| 3732 | inode->i_ino, block); | ||
| 3733 | return -EIO; | 3867 | return -EIO; |
| 3734 | } | 3868 | } |
| 3735 | if (!buffer_uptodate(bh)) { | 3869 | if (!buffer_uptodate(bh)) { |
| @@ -3757,28 +3891,12 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
| 3757 | */ | 3891 | */ |
| 3758 | if (in_mem) { | 3892 | if (in_mem) { |
| 3759 | struct buffer_head *bitmap_bh; | 3893 | struct buffer_head *bitmap_bh; |
| 3760 | struct ext4_group_desc *desc; | 3894 | int i, start; |
| 3761 | int inodes_per_buffer; | ||
| 3762 | int inode_offset, i; | ||
| 3763 | ext4_group_t block_group; | ||
| 3764 | int start; | ||
| 3765 | |||
| 3766 | block_group = (inode->i_ino - 1) / | ||
| 3767 | EXT4_INODES_PER_GROUP(inode->i_sb); | ||
| 3768 | inodes_per_buffer = bh->b_size / | ||
| 3769 | EXT4_INODE_SIZE(inode->i_sb); | ||
| 3770 | inode_offset = ((inode->i_ino - 1) % | ||
| 3771 | EXT4_INODES_PER_GROUP(inode->i_sb)); | ||
| 3772 | start = inode_offset & ~(inodes_per_buffer - 1); | ||
| 3773 | 3895 | ||
| 3774 | /* Is the inode bitmap in cache? */ | 3896 | start = inode_offset & ~(inodes_per_block - 1); |
| 3775 | desc = ext4_get_group_desc(inode->i_sb, | ||
| 3776 | block_group, NULL); | ||
| 3777 | if (!desc) | ||
| 3778 | goto make_io; | ||
| 3779 | 3897 | ||
| 3780 | bitmap_bh = sb_getblk(inode->i_sb, | 3898 | /* Is the inode bitmap in cache? */ |
| 3781 | ext4_inode_bitmap(inode->i_sb, desc)); | 3899 | bitmap_bh = sb_getblk(sb, ext4_inode_bitmap(sb, gdp)); |
| 3782 | if (!bitmap_bh) | 3900 | if (!bitmap_bh) |
| 3783 | goto make_io; | 3901 | goto make_io; |
| 3784 | 3902 | ||
| @@ -3791,14 +3909,14 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
| 3791 | brelse(bitmap_bh); | 3909 | brelse(bitmap_bh); |
| 3792 | goto make_io; | 3910 | goto make_io; |
| 3793 | } | 3911 | } |
| 3794 | for (i = start; i < start + inodes_per_buffer; i++) { | 3912 | for (i = start; i < start + inodes_per_block; i++) { |
| 3795 | if (i == inode_offset) | 3913 | if (i == inode_offset) |
| 3796 | continue; | 3914 | continue; |
| 3797 | if (ext4_test_bit(i, bitmap_bh->b_data)) | 3915 | if (ext4_test_bit(i, bitmap_bh->b_data)) |
| 3798 | break; | 3916 | break; |
| 3799 | } | 3917 | } |
| 3800 | brelse(bitmap_bh); | 3918 | brelse(bitmap_bh); |
| 3801 | if (i == start + inodes_per_buffer) { | 3919 | if (i == start + inodes_per_block) { |
| 3802 | /* all other inodes are free, so skip I/O */ | 3920 | /* all other inodes are free, so skip I/O */ |
| 3803 | memset(bh->b_data, 0, bh->b_size); | 3921 | memset(bh->b_data, 0, bh->b_size); |
| 3804 | set_buffer_uptodate(bh); | 3922 | set_buffer_uptodate(bh); |
| @@ -3809,6 +3927,36 @@ static int __ext4_get_inode_loc(struct inode *inode, | |||
| 3809 | 3927 | ||
| 3810 | make_io: | 3928 | make_io: |
| 3811 | /* | 3929 | /* |
| 3930 | * If we need to do any I/O, try to pre-readahead extra | ||
| 3931 | * blocks from the inode table. | ||
| 3932 | */ | ||
| 3933 | if (EXT4_SB(sb)->s_inode_readahead_blks) { | ||
| 3934 | ext4_fsblk_t b, end, table; | ||
| 3935 | unsigned num; | ||
| 3936 | |||
| 3937 | table = ext4_inode_table(sb, gdp); | ||
| 3938 | /* Make sure s_inode_readahead_blks is a power of 2 */ | ||
| 3939 | while (EXT4_SB(sb)->s_inode_readahead_blks & | ||
| 3940 | (EXT4_SB(sb)->s_inode_readahead_blks-1)) | ||
| 3941 | EXT4_SB(sb)->s_inode_readahead_blks = | ||
| 3942 | (EXT4_SB(sb)->s_inode_readahead_blks & | ||
| 3943 | (EXT4_SB(sb)->s_inode_readahead_blks-1)); | ||
| 3944 | b = block & ~(EXT4_SB(sb)->s_inode_readahead_blks-1); | ||
| 3945 | if (table > b) | ||
| 3946 | b = table; | ||
| 3947 | end = b + EXT4_SB(sb)->s_inode_readahead_blks; | ||
| 3948 | num = EXT4_INODES_PER_GROUP(sb); | ||
| 3949 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, | ||
| 3950 | EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) | ||
| 3951 | num -= le16_to_cpu(gdp->bg_itable_unused); | ||
| 3952 | table += num / inodes_per_block; | ||
| 3953 | if (end > table) | ||
| 3954 | end = table; | ||
| 3955 | while (b <= end) | ||
| 3956 | sb_breadahead(sb, b++); | ||
| 3957 | } | ||
| 3958 | |||
| 3959 | /* | ||
| 3812 | * There are other valid inodes in the buffer, this inode | 3960 | * There are other valid inodes in the buffer, this inode |
| 3813 | * has in-inode xattrs, or we don't have this inode in memory. | 3961 | * has in-inode xattrs, or we don't have this inode in memory. |
| 3814 | * Read the block from disk. | 3962 | * Read the block from disk. |
| @@ -3818,10 +3966,9 @@ make_io: | |||
| 3818 | submit_bh(READ_META, bh); | 3966 | submit_bh(READ_META, bh); |
| 3819 | wait_on_buffer(bh); | 3967 | wait_on_buffer(bh); |
| 3820 | if (!buffer_uptodate(bh)) { | 3968 | if (!buffer_uptodate(bh)) { |
| 3821 | ext4_error(inode->i_sb, "ext4_get_inode_loc", | 3969 | ext4_error(sb, __func__, |
| 3822 | "unable to read inode block - " | 3970 | "unable to read inode block - inode=%lu, " |
| 3823 | "inode=%lu, block=%llu", | 3971 | "block=%llu", inode->i_ino, block); |
| 3824 | inode->i_ino, block); | ||
| 3825 | brelse(bh); | 3972 | brelse(bh); |
| 3826 | return -EIO; | 3973 | return -EIO; |
| 3827 | } | 3974 | } |
| @@ -3913,11 +4060,10 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 3913 | return inode; | 4060 | return inode; |
| 3914 | 4061 | ||
| 3915 | ei = EXT4_I(inode); | 4062 | ei = EXT4_I(inode); |
| 3916 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 4063 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 3917 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 4064 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
| 3918 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 4065 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
| 3919 | #endif | 4066 | #endif |
| 3920 | ei->i_block_alloc_info = NULL; | ||
| 3921 | 4067 | ||
| 3922 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4068 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
| 3923 | if (ret < 0) | 4069 | if (ret < 0) |
| @@ -3927,7 +4073,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 3927 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); | 4073 | inode->i_mode = le16_to_cpu(raw_inode->i_mode); |
| 3928 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); | 4074 | inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); |
| 3929 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); | 4075 | inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low); |
| 3930 | if(!(test_opt (inode->i_sb, NO_UID32))) { | 4076 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
| 3931 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; | 4077 | inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16; |
| 3932 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; | 4078 | inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16; |
| 3933 | } | 4079 | } |
| @@ -3945,7 +4091,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 3945 | if (inode->i_mode == 0 || | 4091 | if (inode->i_mode == 0 || |
| 3946 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 4092 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
| 3947 | /* this inode is deleted */ | 4093 | /* this inode is deleted */ |
| 3948 | brelse (bh); | 4094 | brelse(bh); |
| 3949 | ret = -ESTALE; | 4095 | ret = -ESTALE; |
| 3950 | goto bad_inode; | 4096 | goto bad_inode; |
| 3951 | } | 4097 | } |
| @@ -3978,7 +4124,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 3978 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 4124 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
| 3979 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 4125 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
| 3980 | EXT4_INODE_SIZE(inode->i_sb)) { | 4126 | EXT4_INODE_SIZE(inode->i_sb)) { |
| 3981 | brelse (bh); | 4127 | brelse(bh); |
| 3982 | ret = -EIO; | 4128 | ret = -EIO; |
| 3983 | goto bad_inode; | 4129 | goto bad_inode; |
| 3984 | } | 4130 | } |
| @@ -4031,7 +4177,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
| 4031 | init_special_inode(inode, inode->i_mode, | 4177 | init_special_inode(inode, inode->i_mode, |
| 4032 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); | 4178 | new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); |
| 4033 | } | 4179 | } |
| 4034 | brelse (iloc.bh); | 4180 | brelse(iloc.bh); |
| 4035 | ext4_set_inode_flags(inode); | 4181 | ext4_set_inode_flags(inode); |
| 4036 | unlock_new_inode(inode); | 4182 | unlock_new_inode(inode); |
| 4037 | return inode; | 4183 | return inode; |
| @@ -4048,7 +4194,6 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
| 4048 | struct inode *inode = &(ei->vfs_inode); | 4194 | struct inode *inode = &(ei->vfs_inode); |
| 4049 | u64 i_blocks = inode->i_blocks; | 4195 | u64 i_blocks = inode->i_blocks; |
| 4050 | struct super_block *sb = inode->i_sb; | 4196 | struct super_block *sb = inode->i_sb; |
| 4051 | int err = 0; | ||
| 4052 | 4197 | ||
| 4053 | if (i_blocks <= ~0U) { | 4198 | if (i_blocks <= ~0U) { |
| 4054 | /* | 4199 | /* |
| @@ -4058,36 +4203,27 @@ static int ext4_inode_blocks_set(handle_t *handle, | |||
| 4058 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4203 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
| 4059 | raw_inode->i_blocks_high = 0; | 4204 | raw_inode->i_blocks_high = 0; |
| 4060 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4205 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
| 4061 | } else if (i_blocks <= 0xffffffffffffULL) { | 4206 | return 0; |
| 4207 | } | ||
| 4208 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) | ||
| 4209 | return -EFBIG; | ||
| 4210 | |||
| 4211 | if (i_blocks <= 0xffffffffffffULL) { | ||
| 4062 | /* | 4212 | /* |
| 4063 | * i_blocks can be represented in a 48 bit variable | 4213 | * i_blocks can be represented in a 48 bit variable |
| 4064 | * as multiple of 512 bytes | 4214 | * as multiple of 512 bytes |
| 4065 | */ | 4215 | */ |
| 4066 | err = ext4_update_rocompat_feature(handle, sb, | ||
| 4067 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
| 4068 | if (err) | ||
| 4069 | goto err_out; | ||
| 4070 | /* i_block is stored in the split 48 bit fields */ | ||
| 4071 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4216 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
| 4072 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4217 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
| 4073 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; | 4218 | ei->i_flags &= ~EXT4_HUGE_FILE_FL; |
| 4074 | } else { | 4219 | } else { |
| 4075 | /* | ||
| 4076 | * i_blocks should be represented in a 48 bit variable | ||
| 4077 | * as multiple of file system block size | ||
| 4078 | */ | ||
| 4079 | err = ext4_update_rocompat_feature(handle, sb, | ||
| 4080 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
| 4081 | if (err) | ||
| 4082 | goto err_out; | ||
| 4083 | ei->i_flags |= EXT4_HUGE_FILE_FL; | 4220 | ei->i_flags |= EXT4_HUGE_FILE_FL; |
| 4084 | /* i_block is stored in file system block size */ | 4221 | /* i_block is stored in file system block size */ |
| 4085 | i_blocks = i_blocks >> (inode->i_blkbits - 9); | 4222 | i_blocks = i_blocks >> (inode->i_blkbits - 9); |
| 4086 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); | 4223 | raw_inode->i_blocks_lo = cpu_to_le32(i_blocks); |
| 4087 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); | 4224 | raw_inode->i_blocks_high = cpu_to_le16(i_blocks >> 32); |
| 4088 | } | 4225 | } |
| 4089 | err_out: | 4226 | return 0; |
| 4090 | return err; | ||
| 4091 | } | 4227 | } |
| 4092 | 4228 | ||
| 4093 | /* | 4229 | /* |
| @@ -4113,14 +4249,14 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4113 | 4249 | ||
| 4114 | ext4_get_inode_flags(ei); | 4250 | ext4_get_inode_flags(ei); |
| 4115 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); | 4251 | raw_inode->i_mode = cpu_to_le16(inode->i_mode); |
| 4116 | if(!(test_opt(inode->i_sb, NO_UID32))) { | 4252 | if (!(test_opt(inode->i_sb, NO_UID32))) { |
| 4117 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); | 4253 | raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid)); |
| 4118 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); | 4254 | raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid)); |
| 4119 | /* | 4255 | /* |
| 4120 | * Fix up interoperability with old kernels. Otherwise, old inodes get | 4256 | * Fix up interoperability with old kernels. Otherwise, old inodes get |
| 4121 | * re-used with the upper 16 bits of the uid/gid intact | 4257 | * re-used with the upper 16 bits of the uid/gid intact |
| 4122 | */ | 4258 | */ |
| 4123 | if(!ei->i_dtime) { | 4259 | if (!ei->i_dtime) { |
| 4124 | raw_inode->i_uid_high = | 4260 | raw_inode->i_uid_high = |
| 4125 | cpu_to_le16(high_16_bits(inode->i_uid)); | 4261 | cpu_to_le16(high_16_bits(inode->i_uid)); |
| 4126 | raw_inode->i_gid_high = | 4262 | raw_inode->i_gid_high = |
| @@ -4208,7 +4344,7 @@ static int ext4_do_update_inode(handle_t *handle, | |||
| 4208 | ei->i_state &= ~EXT4_STATE_NEW; | 4344 | ei->i_state &= ~EXT4_STATE_NEW; |
| 4209 | 4345 | ||
| 4210 | out_brelse: | 4346 | out_brelse: |
| 4211 | brelse (bh); | 4347 | brelse(bh); |
| 4212 | ext4_std_error(inode->i_sb, err); | 4348 | ext4_std_error(inode->i_sb, err); |
| 4213 | return err; | 4349 | return err; |
| 4214 | } | 4350 | } |
| @@ -4811,6 +4947,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 4811 | loff_t size; | 4947 | loff_t size; |
| 4812 | unsigned long len; | 4948 | unsigned long len; |
| 4813 | int ret = -EINVAL; | 4949 | int ret = -EINVAL; |
| 4950 | void *fsdata; | ||
| 4814 | struct file *file = vma->vm_file; | 4951 | struct file *file = vma->vm_file; |
| 4815 | struct inode *inode = file->f_path.dentry->d_inode; | 4952 | struct inode *inode = file->f_path.dentry->d_inode; |
| 4816 | struct address_space *mapping = inode->i_mapping; | 4953 | struct address_space *mapping = inode->i_mapping; |
| @@ -4849,11 +4986,11 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 4849 | * on the same page though | 4986 | * on the same page though |
| 4850 | */ | 4987 | */ |
| 4851 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), | 4988 | ret = mapping->a_ops->write_begin(file, mapping, page_offset(page), |
| 4852 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, NULL); | 4989 | len, AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); |
| 4853 | if (ret < 0) | 4990 | if (ret < 0) |
| 4854 | goto out_unlock; | 4991 | goto out_unlock; |
| 4855 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), | 4992 | ret = mapping->a_ops->write_end(file, mapping, page_offset(page), |
| 4856 | len, len, page, NULL); | 4993 | len, len, page, fsdata); |
| 4857 | if (ret < 0) | 4994 | if (ret < 0) |
| 4858 | goto out_unlock; | 4995 | goto out_unlock; |
| 4859 | ret = 0; | 4996 | ret = 0; |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 7a6c2f1faba6..dc99b4776d58 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
| @@ -23,9 +23,8 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 23 | struct inode *inode = filp->f_dentry->d_inode; | 23 | struct inode *inode = filp->f_dentry->d_inode; |
| 24 | struct ext4_inode_info *ei = EXT4_I(inode); | 24 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 25 | unsigned int flags; | 25 | unsigned int flags; |
| 26 | unsigned short rsv_window_size; | ||
| 27 | 26 | ||
| 28 | ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg); | 27 | ext4_debug("cmd = %u, arg = %lu\n", cmd, arg); |
| 29 | 28 | ||
| 30 | switch (cmd) { | 29 | switch (cmd) { |
| 31 | case EXT4_IOC_GETFLAGS: | 30 | case EXT4_IOC_GETFLAGS: |
| @@ -34,7 +33,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 34 | return put_user(flags, (int __user *) arg); | 33 | return put_user(flags, (int __user *) arg); |
| 35 | case EXT4_IOC_SETFLAGS: { | 34 | case EXT4_IOC_SETFLAGS: { |
| 36 | handle_t *handle = NULL; | 35 | handle_t *handle = NULL; |
| 37 | int err; | 36 | int err, migrate = 0; |
| 38 | struct ext4_iloc iloc; | 37 | struct ext4_iloc iloc; |
| 39 | unsigned int oldflags; | 38 | unsigned int oldflags; |
| 40 | unsigned int jflag; | 39 | unsigned int jflag; |
| @@ -82,6 +81,17 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
| 82 | if (!capable(CAP_SYS_RESOURCE)) | 81 | if (!capable(CAP_SYS_RESOURCE)) |
| 83 | goto flags_out; | 82 | goto flags_out; |
| 84 | } | 83 | } |
| 84 | if (oldflags & EXT4_EXTENTS_FL) { | ||
| 85 | /* We don't support clearning extent flags */ | ||
| 86 | if (!(flags & EXT4_EXTENTS_FL)) { | ||
| 87 | err = -EOPNOTSUPP; | ||
| 88 | goto flags_out; | ||
| 89 | } | ||
| 90 | } else if (flags & EXT4_EXTENTS_FL) { | ||
| 91 | /* migrate the file */ | ||
| 92 | migrate = 1; | ||
| 93 | flags &= ~EXT4_EXTENTS_FL; | ||
| 94 | } | ||
| 85 | 95 | ||
| 86 | handle = ext4_journal_start(inode, 1); | 96 | handle = ext4_journal_start(inode, 1); |
| 87 | if (IS_ERR(handle)) { | 97 | if (IS_ERR(handle)) { |
| @@ -109,6 +119,10 @@ flags_err: | |||
| 109 | 119 | ||
| 110 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) | 120 | if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) |
| 111 | err = ext4_change_inode_journal_flag(inode, jflag); | 121 | err = ext4_change_inode_journal_flag(inode, jflag); |
| 122 | if (err) | ||
| 123 | goto flags_out; | ||
| 124 | if (migrate) | ||
| 125 | err = ext4_ext_migrate(inode); | ||
| 112 | flags_out: | 126 | flags_out: |
| 113 | mutex_unlock(&inode->i_mutex); | 127 | mutex_unlock(&inode->i_mutex); |
| 114 | mnt_drop_write(filp->f_path.mnt); | 128 | mnt_drop_write(filp->f_path.mnt); |
| @@ -175,53 +189,10 @@ setversion_out: | |||
| 175 | return ret; | 189 | return ret; |
| 176 | } | 190 | } |
| 177 | #endif | 191 | #endif |
| 178 | case EXT4_IOC_GETRSVSZ: | ||
| 179 | if (test_opt(inode->i_sb, RESERVATION) | ||
| 180 | && S_ISREG(inode->i_mode) | ||
| 181 | && ei->i_block_alloc_info) { | ||
| 182 | rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size; | ||
| 183 | return put_user(rsv_window_size, (int __user *)arg); | ||
| 184 | } | ||
| 185 | return -ENOTTY; | ||
| 186 | case EXT4_IOC_SETRSVSZ: { | ||
| 187 | int err; | ||
| 188 | |||
| 189 | if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode)) | ||
| 190 | return -ENOTTY; | ||
| 191 | |||
| 192 | if (!is_owner_or_cap(inode)) | ||
| 193 | return -EACCES; | ||
| 194 | |||
| 195 | if (get_user(rsv_window_size, (int __user *)arg)) | ||
| 196 | return -EFAULT; | ||
| 197 | |||
| 198 | err = mnt_want_write(filp->f_path.mnt); | ||
| 199 | if (err) | ||
| 200 | return err; | ||
| 201 | |||
| 202 | if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS) | ||
| 203 | rsv_window_size = EXT4_MAX_RESERVE_BLOCKS; | ||
| 204 | |||
| 205 | /* | ||
| 206 | * need to allocate reservation structure for this inode | ||
| 207 | * before set the window size | ||
| 208 | */ | ||
| 209 | down_write(&ei->i_data_sem); | ||
| 210 | if (!ei->i_block_alloc_info) | ||
| 211 | ext4_init_block_alloc_info(inode); | ||
| 212 | |||
| 213 | if (ei->i_block_alloc_info){ | ||
| 214 | struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node; | ||
| 215 | rsv->rsv_goal_size = rsv_window_size; | ||
| 216 | } | ||
| 217 | up_write(&ei->i_data_sem); | ||
| 218 | mnt_drop_write(filp->f_path.mnt); | ||
| 219 | return 0; | ||
| 220 | } | ||
| 221 | case EXT4_IOC_GROUP_EXTEND: { | 192 | case EXT4_IOC_GROUP_EXTEND: { |
| 222 | ext4_fsblk_t n_blocks_count; | 193 | ext4_fsblk_t n_blocks_count; |
| 223 | struct super_block *sb = inode->i_sb; | 194 | struct super_block *sb = inode->i_sb; |
| 224 | int err; | 195 | int err, err2; |
| 225 | 196 | ||
| 226 | if (!capable(CAP_SYS_RESOURCE)) | 197 | if (!capable(CAP_SYS_RESOURCE)) |
| 227 | return -EPERM; | 198 | return -EPERM; |
| @@ -235,8 +206,10 @@ setversion_out: | |||
| 235 | 206 | ||
| 236 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); | 207 | err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count); |
| 237 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 208 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
| 238 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 209 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
| 239 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 210 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
| 211 | if (err == 0) | ||
| 212 | err = err2; | ||
| 240 | mnt_drop_write(filp->f_path.mnt); | 213 | mnt_drop_write(filp->f_path.mnt); |
| 241 | 214 | ||
| 242 | return err; | 215 | return err; |
| @@ -244,7 +217,7 @@ setversion_out: | |||
| 244 | case EXT4_IOC_GROUP_ADD: { | 217 | case EXT4_IOC_GROUP_ADD: { |
| 245 | struct ext4_new_group_data input; | 218 | struct ext4_new_group_data input; |
| 246 | struct super_block *sb = inode->i_sb; | 219 | struct super_block *sb = inode->i_sb; |
| 247 | int err; | 220 | int err, err2; |
| 248 | 221 | ||
| 249 | if (!capable(CAP_SYS_RESOURCE)) | 222 | if (!capable(CAP_SYS_RESOURCE)) |
| 250 | return -EPERM; | 223 | return -EPERM; |
| @@ -259,15 +232,36 @@ setversion_out: | |||
| 259 | 232 | ||
| 260 | err = ext4_group_add(sb, &input); | 233 | err = ext4_group_add(sb, &input); |
| 261 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 234 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
| 262 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 235 | err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
| 263 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 236 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
| 237 | if (err == 0) | ||
| 238 | err = err2; | ||
| 264 | mnt_drop_write(filp->f_path.mnt); | 239 | mnt_drop_write(filp->f_path.mnt); |
| 265 | 240 | ||
| 266 | return err; | 241 | return err; |
| 267 | } | 242 | } |
| 268 | 243 | ||
| 269 | case EXT4_IOC_MIGRATE: | 244 | case EXT4_IOC_MIGRATE: |
| 270 | return ext4_ext_migrate(inode, filp, cmd, arg); | 245 | { |
| 246 | int err; | ||
| 247 | if (!is_owner_or_cap(inode)) | ||
| 248 | return -EACCES; | ||
| 249 | |||
| 250 | err = mnt_want_write(filp->f_path.mnt); | ||
| 251 | if (err) | ||
| 252 | return err; | ||
| 253 | /* | ||
| 254 | * inode_mutex prevent write and truncate on the file. | ||
| 255 | * Read still goes through. We take i_data_sem in | ||
| 256 | * ext4_ext_swap_inode_data before we switch the | ||
| 257 | * inode format to prevent read. | ||
| 258 | */ | ||
| 259 | mutex_lock(&(inode->i_mutex)); | ||
| 260 | err = ext4_ext_migrate(inode); | ||
| 261 | mutex_unlock(&(inode->i_mutex)); | ||
| 262 | mnt_drop_write(filp->f_path.mnt); | ||
| 263 | return err; | ||
| 264 | } | ||
| 271 | 265 | ||
| 272 | default: | 266 | default: |
| 273 | return -ENOTTY; | 267 | return -ENOTTY; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index e0e3a5eb1ddb..dfe17a134052 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
| @@ -477,9 +477,10 @@ static void mb_cmp_bitmaps(struct ext4_buddy *e4b, void *bitmap) | |||
| 477 | b2 = (unsigned char *) bitmap; | 477 | b2 = (unsigned char *) bitmap; |
| 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { | 478 | for (i = 0; i < e4b->bd_sb->s_blocksize; i++) { |
| 479 | if (b1[i] != b2[i]) { | 479 | if (b1[i] != b2[i]) { |
| 480 | printk("corruption in group %lu at byte %u(%u):" | 480 | printk(KERN_ERR "corruption in group %lu " |
| 481 | " %x in copy != %x on disk/prealloc\n", | 481 | "at byte %u(%u): %x in copy != %x " |
| 482 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | 482 | "on disk/prealloc\n", |
| 483 | e4b->bd_group, i, i * 8, b1[i], b2[i]); | ||
| 483 | BUG(); | 484 | BUG(); |
| 484 | } | 485 | } |
| 485 | } | 486 | } |
| @@ -533,9 +534,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
| 533 | void *buddy; | 534 | void *buddy; |
| 534 | void *buddy2; | 535 | void *buddy2; |
| 535 | 536 | ||
| 536 | if (!test_opt(sb, MBALLOC)) | ||
| 537 | return 0; | ||
| 538 | |||
| 539 | { | 537 | { |
| 540 | static int mb_check_counter; | 538 | static int mb_check_counter; |
| 541 | if (mb_check_counter++ % 100 != 0) | 539 | if (mb_check_counter++ % 100 != 0) |
| @@ -784,9 +782,11 @@ static int ext4_mb_init_cache(struct page *page, char *incore) | |||
| 784 | if (bh[i] == NULL) | 782 | if (bh[i] == NULL) |
| 785 | goto out; | 783 | goto out; |
| 786 | 784 | ||
| 787 | if (bh_uptodate_or_lock(bh[i])) | 785 | if (buffer_uptodate(bh[i]) && |
| 786 | !(desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) | ||
| 788 | continue; | 787 | continue; |
| 789 | 788 | ||
| 789 | lock_buffer(bh[i]); | ||
| 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); | 790 | spin_lock(sb_bgl_lock(EXT4_SB(sb), first_group + i)); |
| 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { | 791 | if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { |
| 792 | ext4_init_block_bitmap(sb, bh[i], | 792 | ext4_init_block_bitmap(sb, bh[i], |
| @@ -2169,9 +2169,10 @@ static void ext4_mb_history_release(struct super_block *sb) | |||
| 2169 | { | 2169 | { |
| 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2170 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2171 | 2171 | ||
| 2172 | remove_proc_entry("mb_groups", sbi->s_mb_proc); | 2172 | if (sbi->s_proc != NULL) { |
| 2173 | remove_proc_entry("mb_history", sbi->s_mb_proc); | 2173 | remove_proc_entry("mb_groups", sbi->s_proc); |
| 2174 | 2174 | remove_proc_entry("mb_history", sbi->s_proc); | |
| 2175 | } | ||
| 2175 | kfree(sbi->s_mb_history); | 2176 | kfree(sbi->s_mb_history); |
| 2176 | } | 2177 | } |
| 2177 | 2178 | ||
| @@ -2180,10 +2181,10 @@ static void ext4_mb_history_init(struct super_block *sb) | |||
| 2180 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2181 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2181 | int i; | 2182 | int i; |
| 2182 | 2183 | ||
| 2183 | if (sbi->s_mb_proc != NULL) { | 2184 | if (sbi->s_proc != NULL) { |
| 2184 | proc_create_data("mb_history", S_IRUGO, sbi->s_mb_proc, | 2185 | proc_create_data("mb_history", S_IRUGO, sbi->s_proc, |
| 2185 | &ext4_mb_seq_history_fops, sb); | 2186 | &ext4_mb_seq_history_fops, sb); |
| 2186 | proc_create_data("mb_groups", S_IRUGO, sbi->s_mb_proc, | 2187 | proc_create_data("mb_groups", S_IRUGO, sbi->s_proc, |
| 2187 | &ext4_mb_seq_groups_fops, sb); | 2188 | &ext4_mb_seq_groups_fops, sb); |
| 2188 | } | 2189 | } |
| 2189 | 2190 | ||
| @@ -2299,6 +2300,7 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group, | |||
| 2299 | } | 2300 | } |
| 2300 | 2301 | ||
| 2301 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); | 2302 | INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list); |
| 2303 | meta_group_info[i]->bb_free_root.rb_node = NULL;; | ||
| 2302 | 2304 | ||
| 2303 | #ifdef DOUBLE_CHECK | 2305 | #ifdef DOUBLE_CHECK |
| 2304 | { | 2306 | { |
| @@ -2485,19 +2487,14 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2485 | unsigned max; | 2487 | unsigned max; |
| 2486 | int ret; | 2488 | int ret; |
| 2487 | 2489 | ||
| 2488 | if (!test_opt(sb, MBALLOC)) | ||
| 2489 | return 0; | ||
| 2490 | |||
| 2491 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); | 2490 | i = (sb->s_blocksize_bits + 2) * sizeof(unsigned short); |
| 2492 | 2491 | ||
| 2493 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); | 2492 | sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL); |
| 2494 | if (sbi->s_mb_offsets == NULL) { | 2493 | if (sbi->s_mb_offsets == NULL) { |
| 2495 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
| 2496 | return -ENOMEM; | 2494 | return -ENOMEM; |
| 2497 | } | 2495 | } |
| 2498 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); | 2496 | sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL); |
| 2499 | if (sbi->s_mb_maxs == NULL) { | 2497 | if (sbi->s_mb_maxs == NULL) { |
| 2500 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
| 2501 | kfree(sbi->s_mb_maxs); | 2498 | kfree(sbi->s_mb_maxs); |
| 2502 | return -ENOMEM; | 2499 | return -ENOMEM; |
| 2503 | } | 2500 | } |
| @@ -2520,16 +2517,12 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2520 | /* init file for buddy data */ | 2517 | /* init file for buddy data */ |
| 2521 | ret = ext4_mb_init_backend(sb); | 2518 | ret = ext4_mb_init_backend(sb); |
| 2522 | if (ret != 0) { | 2519 | if (ret != 0) { |
| 2523 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
| 2524 | kfree(sbi->s_mb_offsets); | 2520 | kfree(sbi->s_mb_offsets); |
| 2525 | kfree(sbi->s_mb_maxs); | 2521 | kfree(sbi->s_mb_maxs); |
| 2526 | return ret; | 2522 | return ret; |
| 2527 | } | 2523 | } |
| 2528 | 2524 | ||
| 2529 | spin_lock_init(&sbi->s_md_lock); | 2525 | spin_lock_init(&sbi->s_md_lock); |
| 2530 | INIT_LIST_HEAD(&sbi->s_active_transaction); | ||
| 2531 | INIT_LIST_HEAD(&sbi->s_closed_transaction); | ||
| 2532 | INIT_LIST_HEAD(&sbi->s_committed_transaction); | ||
| 2533 | spin_lock_init(&sbi->s_bal_lock); | 2526 | spin_lock_init(&sbi->s_bal_lock); |
| 2534 | 2527 | ||
| 2535 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; | 2528 | sbi->s_mb_max_to_scan = MB_DEFAULT_MAX_TO_SCAN; |
| @@ -2540,17 +2533,15 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2540 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; | 2533 | sbi->s_mb_history_filter = EXT4_MB_HISTORY_DEFAULT; |
| 2541 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; | 2534 | sbi->s_mb_group_prealloc = MB_DEFAULT_GROUP_PREALLOC; |
| 2542 | 2535 | ||
| 2543 | i = sizeof(struct ext4_locality_group) * nr_cpu_ids; | 2536 | sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); |
| 2544 | sbi->s_locality_groups = kmalloc(i, GFP_KERNEL); | ||
| 2545 | if (sbi->s_locality_groups == NULL) { | 2537 | if (sbi->s_locality_groups == NULL) { |
| 2546 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
| 2547 | kfree(sbi->s_mb_offsets); | 2538 | kfree(sbi->s_mb_offsets); |
| 2548 | kfree(sbi->s_mb_maxs); | 2539 | kfree(sbi->s_mb_maxs); |
| 2549 | return -ENOMEM; | 2540 | return -ENOMEM; |
| 2550 | } | 2541 | } |
| 2551 | for (i = 0; i < nr_cpu_ids; i++) { | 2542 | for_each_possible_cpu(i) { |
| 2552 | struct ext4_locality_group *lg; | 2543 | struct ext4_locality_group *lg; |
| 2553 | lg = &sbi->s_locality_groups[i]; | 2544 | lg = per_cpu_ptr(sbi->s_locality_groups, i); |
| 2554 | mutex_init(&lg->lg_mutex); | 2545 | mutex_init(&lg->lg_mutex); |
| 2555 | for (j = 0; j < PREALLOC_TB_SIZE; j++) | 2546 | for (j = 0; j < PREALLOC_TB_SIZE; j++) |
| 2556 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); | 2547 | INIT_LIST_HEAD(&lg->lg_prealloc_list[j]); |
| @@ -2560,7 +2551,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery) | |||
| 2560 | ext4_mb_init_per_dev_proc(sb); | 2551 | ext4_mb_init_per_dev_proc(sb); |
| 2561 | ext4_mb_history_init(sb); | 2552 | ext4_mb_history_init(sb); |
| 2562 | 2553 | ||
| 2563 | printk("EXT4-fs: mballoc enabled\n"); | 2554 | sbi->s_journal->j_commit_callback = release_blocks_on_commit; |
| 2555 | |||
| 2556 | printk(KERN_INFO "EXT4-fs: mballoc enabled\n"); | ||
| 2564 | return 0; | 2557 | return 0; |
| 2565 | } | 2558 | } |
| 2566 | 2559 | ||
| @@ -2575,7 +2568,7 @@ static void ext4_mb_cleanup_pa(struct ext4_group_info *grp) | |||
| 2575 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); | 2568 | pa = list_entry(cur, struct ext4_prealloc_space, pa_group_list); |
| 2576 | list_del(&pa->pa_group_list); | 2569 | list_del(&pa->pa_group_list); |
| 2577 | count++; | 2570 | count++; |
| 2578 | kfree(pa); | 2571 | kmem_cache_free(ext4_pspace_cachep, pa); |
| 2579 | } | 2572 | } |
| 2580 | if (count) | 2573 | if (count) |
| 2581 | mb_debug("mballoc: %u PAs left\n", count); | 2574 | mb_debug("mballoc: %u PAs left\n", count); |
| @@ -2589,18 +2582,6 @@ int ext4_mb_release(struct super_block *sb) | |||
| 2589 | struct ext4_group_info *grinfo; | 2582 | struct ext4_group_info *grinfo; |
| 2590 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2583 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2591 | 2584 | ||
| 2592 | if (!test_opt(sb, MBALLOC)) | ||
| 2593 | return 0; | ||
| 2594 | |||
| 2595 | /* release freed, non-committed blocks */ | ||
| 2596 | spin_lock(&sbi->s_md_lock); | ||
| 2597 | list_splice_init(&sbi->s_closed_transaction, | ||
| 2598 | &sbi->s_committed_transaction); | ||
| 2599 | list_splice_init(&sbi->s_active_transaction, | ||
| 2600 | &sbi->s_committed_transaction); | ||
| 2601 | spin_unlock(&sbi->s_md_lock); | ||
| 2602 | ext4_mb_free_committed_blocks(sb); | ||
| 2603 | |||
| 2604 | if (sbi->s_group_info) { | 2585 | if (sbi->s_group_info) { |
| 2605 | for (i = 0; i < sbi->s_groups_count; i++) { | 2586 | for (i = 0; i < sbi->s_groups_count; i++) { |
| 2606 | grinfo = ext4_get_group_info(sb, i); | 2587 | grinfo = ext4_get_group_info(sb, i); |
| @@ -2647,69 +2628,64 @@ int ext4_mb_release(struct super_block *sb) | |||
| 2647 | atomic_read(&sbi->s_mb_discarded)); | 2628 | atomic_read(&sbi->s_mb_discarded)); |
| 2648 | } | 2629 | } |
| 2649 | 2630 | ||
| 2650 | kfree(sbi->s_locality_groups); | 2631 | free_percpu(sbi->s_locality_groups); |
| 2651 | |||
| 2652 | ext4_mb_history_release(sb); | 2632 | ext4_mb_history_release(sb); |
| 2653 | ext4_mb_destroy_per_dev_proc(sb); | 2633 | ext4_mb_destroy_per_dev_proc(sb); |
| 2654 | 2634 | ||
| 2655 | return 0; | 2635 | return 0; |
| 2656 | } | 2636 | } |
| 2657 | 2637 | ||
| 2658 | static noinline_for_stack void | 2638 | /* |
| 2659 | ext4_mb_free_committed_blocks(struct super_block *sb) | 2639 | * This function is called by the jbd2 layer once the commit has finished, |
| 2640 | * so we know we can free the blocks that were released with that commit. | ||
| 2641 | */ | ||
| 2642 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn) | ||
| 2660 | { | 2643 | { |
| 2661 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2644 | struct super_block *sb = journal->j_private; |
| 2662 | int err; | ||
| 2663 | int i; | ||
| 2664 | int count = 0; | ||
| 2665 | int count2 = 0; | ||
| 2666 | struct ext4_free_metadata *md; | ||
| 2667 | struct ext4_buddy e4b; | 2645 | struct ext4_buddy e4b; |
| 2646 | struct ext4_group_info *db; | ||
| 2647 | int err, count = 0, count2 = 0; | ||
| 2648 | struct ext4_free_data *entry; | ||
| 2649 | ext4_fsblk_t discard_block; | ||
| 2650 | struct list_head *l, *ltmp; | ||
| 2668 | 2651 | ||
| 2669 | if (list_empty(&sbi->s_committed_transaction)) | 2652 | list_for_each_safe(l, ltmp, &txn->t_private_list) { |
| 2670 | return; | 2653 | entry = list_entry(l, struct ext4_free_data, list); |
| 2671 | |||
| 2672 | /* there is committed blocks to be freed yet */ | ||
| 2673 | do { | ||
| 2674 | /* get next array of blocks */ | ||
| 2675 | md = NULL; | ||
| 2676 | spin_lock(&sbi->s_md_lock); | ||
| 2677 | if (!list_empty(&sbi->s_committed_transaction)) { | ||
| 2678 | md = list_entry(sbi->s_committed_transaction.next, | ||
| 2679 | struct ext4_free_metadata, list); | ||
| 2680 | list_del(&md->list); | ||
| 2681 | } | ||
| 2682 | spin_unlock(&sbi->s_md_lock); | ||
| 2683 | |||
| 2684 | if (md == NULL) | ||
| 2685 | break; | ||
| 2686 | 2654 | ||
| 2687 | mb_debug("gonna free %u blocks in group %lu (0x%p):", | 2655 | mb_debug("gonna free %u blocks in group %lu (0x%p):", |
| 2688 | md->num, md->group, md); | 2656 | entry->count, entry->group, entry); |
| 2689 | 2657 | ||
| 2690 | err = ext4_mb_load_buddy(sb, md->group, &e4b); | 2658 | err = ext4_mb_load_buddy(sb, entry->group, &e4b); |
| 2691 | /* we expect to find existing buddy because it's pinned */ | 2659 | /* we expect to find existing buddy because it's pinned */ |
| 2692 | BUG_ON(err != 0); | 2660 | BUG_ON(err != 0); |
| 2693 | 2661 | ||
| 2662 | db = e4b.bd_info; | ||
| 2694 | /* there are blocks to put in buddy to make them really free */ | 2663 | /* there are blocks to put in buddy to make them really free */ |
| 2695 | count += md->num; | 2664 | count += entry->count; |
| 2696 | count2++; | 2665 | count2++; |
| 2697 | ext4_lock_group(sb, md->group); | 2666 | ext4_lock_group(sb, entry->group); |
| 2698 | for (i = 0; i < md->num; i++) { | 2667 | /* Take it out of per group rb tree */ |
| 2699 | mb_debug(" %u", md->blocks[i]); | 2668 | rb_erase(&entry->node, &(db->bb_free_root)); |
| 2700 | mb_free_blocks(NULL, &e4b, md->blocks[i], 1); | 2669 | mb_free_blocks(NULL, &e4b, entry->start_blk, entry->count); |
| 2670 | |||
| 2671 | if (!db->bb_free_root.rb_node) { | ||
| 2672 | /* No more items in the per group rb tree | ||
| 2673 | * balance refcounts from ext4_mb_free_metadata() | ||
| 2674 | */ | ||
| 2675 | page_cache_release(e4b.bd_buddy_page); | ||
| 2676 | page_cache_release(e4b.bd_bitmap_page); | ||
| 2701 | } | 2677 | } |
| 2702 | mb_debug("\n"); | 2678 | ext4_unlock_group(sb, entry->group); |
| 2703 | ext4_unlock_group(sb, md->group); | 2679 | discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) |
| 2704 | 2680 | + entry->start_blk | |
| 2705 | /* balance refcounts from ext4_mb_free_metadata() */ | 2681 | + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); |
| 2706 | page_cache_release(e4b.bd_buddy_page); | 2682 | trace_mark(ext4_discard_blocks, "dev %s blk %llu count %u", sb->s_id, |
| 2707 | page_cache_release(e4b.bd_bitmap_page); | 2683 | (unsigned long long) discard_block, entry->count); |
| 2708 | 2684 | sb_issue_discard(sb, discard_block, entry->count); | |
| 2709 | kfree(md); | 2685 | |
| 2686 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
| 2710 | ext4_mb_release_desc(&e4b); | 2687 | ext4_mb_release_desc(&e4b); |
| 2711 | 2688 | } | |
| 2712 | } while (md); | ||
| 2713 | 2689 | ||
| 2714 | mb_debug("freed %u blocks in %u structures\n", count, count2); | 2690 | mb_debug("freed %u blocks in %u structures\n", count, count2); |
| 2715 | } | 2691 | } |
| @@ -2721,119 +2697,52 @@ ext4_mb_free_committed_blocks(struct super_block *sb) | |||
| 2721 | #define EXT4_MB_STREAM_REQ "stream_req" | 2697 | #define EXT4_MB_STREAM_REQ "stream_req" |
| 2722 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" | 2698 | #define EXT4_MB_GROUP_PREALLOC "group_prealloc" |
| 2723 | 2699 | ||
| 2724 | |||
| 2725 | |||
| 2726 | #define MB_PROC_FOPS(name) \ | ||
| 2727 | static int ext4_mb_##name##_proc_show(struct seq_file *m, void *v) \ | ||
| 2728 | { \ | ||
| 2729 | struct ext4_sb_info *sbi = m->private; \ | ||
| 2730 | \ | ||
| 2731 | seq_printf(m, "%ld\n", sbi->s_mb_##name); \ | ||
| 2732 | return 0; \ | ||
| 2733 | } \ | ||
| 2734 | \ | ||
| 2735 | static int ext4_mb_##name##_proc_open(struct inode *inode, struct file *file)\ | ||
| 2736 | { \ | ||
| 2737 | return single_open(file, ext4_mb_##name##_proc_show, PDE(inode)->data);\ | ||
| 2738 | } \ | ||
| 2739 | \ | ||
| 2740 | static ssize_t ext4_mb_##name##_proc_write(struct file *file, \ | ||
| 2741 | const char __user *buf, size_t cnt, loff_t *ppos) \ | ||
| 2742 | { \ | ||
| 2743 | struct ext4_sb_info *sbi = PDE(file->f_path.dentry->d_inode)->data;\ | ||
| 2744 | char str[32]; \ | ||
| 2745 | long value; \ | ||
| 2746 | if (cnt >= sizeof(str)) \ | ||
| 2747 | return -EINVAL; \ | ||
| 2748 | if (copy_from_user(str, buf, cnt)) \ | ||
| 2749 | return -EFAULT; \ | ||
| 2750 | value = simple_strtol(str, NULL, 0); \ | ||
| 2751 | if (value <= 0) \ | ||
| 2752 | return -ERANGE; \ | ||
| 2753 | sbi->s_mb_##name = value; \ | ||
| 2754 | return cnt; \ | ||
| 2755 | } \ | ||
| 2756 | \ | ||
| 2757 | static const struct file_operations ext4_mb_##name##_proc_fops = { \ | ||
| 2758 | .owner = THIS_MODULE, \ | ||
| 2759 | .open = ext4_mb_##name##_proc_open, \ | ||
| 2760 | .read = seq_read, \ | ||
| 2761 | .llseek = seq_lseek, \ | ||
| 2762 | .release = single_release, \ | ||
| 2763 | .write = ext4_mb_##name##_proc_write, \ | ||
| 2764 | }; | ||
| 2765 | |||
| 2766 | MB_PROC_FOPS(stats); | ||
| 2767 | MB_PROC_FOPS(max_to_scan); | ||
| 2768 | MB_PROC_FOPS(min_to_scan); | ||
| 2769 | MB_PROC_FOPS(order2_reqs); | ||
| 2770 | MB_PROC_FOPS(stream_request); | ||
| 2771 | MB_PROC_FOPS(group_prealloc); | ||
| 2772 | |||
| 2773 | #define MB_PROC_HANDLER(name, var) \ | ||
| 2774 | do { \ | ||
| 2775 | proc = proc_create_data(name, mode, sbi->s_mb_proc, \ | ||
| 2776 | &ext4_mb_##var##_proc_fops, sbi); \ | ||
| 2777 | if (proc == NULL) { \ | ||
| 2778 | printk(KERN_ERR "EXT4-fs: can't to create %s\n", name); \ | ||
| 2779 | goto err_out; \ | ||
| 2780 | } \ | ||
| 2781 | } while (0) | ||
| 2782 | |||
| 2783 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) | 2700 | static int ext4_mb_init_per_dev_proc(struct super_block *sb) |
| 2784 | { | 2701 | { |
| 2702 | #ifdef CONFIG_PROC_FS | ||
| 2785 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; | 2703 | mode_t mode = S_IFREG | S_IRUGO | S_IWUSR; |
| 2786 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2704 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2787 | struct proc_dir_entry *proc; | 2705 | struct proc_dir_entry *proc; |
| 2788 | char devname[64]; | ||
| 2789 | 2706 | ||
| 2790 | if (proc_root_ext4 == NULL) { | 2707 | if (sbi->s_proc == NULL) |
| 2791 | sbi->s_mb_proc = NULL; | ||
| 2792 | return -EINVAL; | 2708 | return -EINVAL; |
| 2793 | } | ||
| 2794 | bdevname(sb->s_bdev, devname); | ||
| 2795 | sbi->s_mb_proc = proc_mkdir(devname, proc_root_ext4); | ||
| 2796 | |||
| 2797 | MB_PROC_HANDLER(EXT4_MB_STATS_NAME, stats); | ||
| 2798 | MB_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, max_to_scan); | ||
| 2799 | MB_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, min_to_scan); | ||
| 2800 | MB_PROC_HANDLER(EXT4_MB_ORDER2_REQ, order2_reqs); | ||
| 2801 | MB_PROC_HANDLER(EXT4_MB_STREAM_REQ, stream_request); | ||
| 2802 | MB_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, group_prealloc); | ||
| 2803 | 2709 | ||
| 2710 | EXT4_PROC_HANDLER(EXT4_MB_STATS_NAME, mb_stats); | ||
| 2711 | EXT4_PROC_HANDLER(EXT4_MB_MAX_TO_SCAN_NAME, mb_max_to_scan); | ||
| 2712 | EXT4_PROC_HANDLER(EXT4_MB_MIN_TO_SCAN_NAME, mb_min_to_scan); | ||
| 2713 | EXT4_PROC_HANDLER(EXT4_MB_ORDER2_REQ, mb_order2_reqs); | ||
| 2714 | EXT4_PROC_HANDLER(EXT4_MB_STREAM_REQ, mb_stream_request); | ||
| 2715 | EXT4_PROC_HANDLER(EXT4_MB_GROUP_PREALLOC, mb_group_prealloc); | ||
| 2804 | return 0; | 2716 | return 0; |
| 2805 | 2717 | ||
| 2806 | err_out: | 2718 | err_out: |
| 2807 | printk(KERN_ERR "EXT4-fs: Unable to create %s\n", devname); | 2719 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
| 2808 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2720 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
| 2809 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2721 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
| 2810 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2722 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
| 2811 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2723 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
| 2812 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2724 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
| 2813 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | ||
| 2814 | remove_proc_entry(devname, proc_root_ext4); | ||
| 2815 | sbi->s_mb_proc = NULL; | ||
| 2816 | |||
| 2817 | return -ENOMEM; | 2725 | return -ENOMEM; |
| 2726 | #else | ||
| 2727 | return 0; | ||
| 2728 | #endif | ||
| 2818 | } | 2729 | } |
| 2819 | 2730 | ||
| 2820 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) | 2731 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb) |
| 2821 | { | 2732 | { |
| 2733 | #ifdef CONFIG_PROC_FS | ||
| 2822 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2734 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 2823 | char devname[64]; | ||
| 2824 | 2735 | ||
| 2825 | if (sbi->s_mb_proc == NULL) | 2736 | if (sbi->s_proc == NULL) |
| 2826 | return -EINVAL; | 2737 | return -EINVAL; |
| 2827 | 2738 | ||
| 2828 | bdevname(sb->s_bdev, devname); | 2739 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_proc); |
| 2829 | remove_proc_entry(EXT4_MB_GROUP_PREALLOC, sbi->s_mb_proc); | 2740 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_proc); |
| 2830 | remove_proc_entry(EXT4_MB_STREAM_REQ, sbi->s_mb_proc); | 2741 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_proc); |
| 2831 | remove_proc_entry(EXT4_MB_ORDER2_REQ, sbi->s_mb_proc); | 2742 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_proc); |
| 2832 | remove_proc_entry(EXT4_MB_MIN_TO_SCAN_NAME, sbi->s_mb_proc); | 2743 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_proc); |
| 2833 | remove_proc_entry(EXT4_MB_MAX_TO_SCAN_NAME, sbi->s_mb_proc); | 2744 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_proc); |
| 2834 | remove_proc_entry(EXT4_MB_STATS_NAME, sbi->s_mb_proc); | 2745 | #endif |
| 2835 | remove_proc_entry(devname, proc_root_ext4); | ||
| 2836 | |||
| 2837 | return 0; | 2746 | return 0; |
| 2838 | } | 2747 | } |
| 2839 | 2748 | ||
| @@ -2854,11 +2763,16 @@ int __init init_ext4_mballoc(void) | |||
| 2854 | kmem_cache_destroy(ext4_pspace_cachep); | 2763 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2855 | return -ENOMEM; | 2764 | return -ENOMEM; |
| 2856 | } | 2765 | } |
| 2857 | #ifdef CONFIG_PROC_FS | 2766 | |
| 2858 | proc_root_ext4 = proc_mkdir("fs/ext4", NULL); | 2767 | ext4_free_ext_cachep = |
| 2859 | if (proc_root_ext4 == NULL) | 2768 | kmem_cache_create("ext4_free_block_extents", |
| 2860 | printk(KERN_ERR "EXT4-fs: Unable to create fs/ext4\n"); | 2769 | sizeof(struct ext4_free_data), |
| 2861 | #endif | 2770 | 0, SLAB_RECLAIM_ACCOUNT, NULL); |
| 2771 | if (ext4_free_ext_cachep == NULL) { | ||
| 2772 | kmem_cache_destroy(ext4_pspace_cachep); | ||
| 2773 | kmem_cache_destroy(ext4_ac_cachep); | ||
| 2774 | return -ENOMEM; | ||
| 2775 | } | ||
| 2862 | return 0; | 2776 | return 0; |
| 2863 | } | 2777 | } |
| 2864 | 2778 | ||
| @@ -2867,9 +2781,7 @@ void exit_ext4_mballoc(void) | |||
| 2867 | /* XXX: synchronize_rcu(); */ | 2781 | /* XXX: synchronize_rcu(); */ |
| 2868 | kmem_cache_destroy(ext4_pspace_cachep); | 2782 | kmem_cache_destroy(ext4_pspace_cachep); |
| 2869 | kmem_cache_destroy(ext4_ac_cachep); | 2783 | kmem_cache_destroy(ext4_ac_cachep); |
| 2870 | #ifdef CONFIG_PROC_FS | 2784 | kmem_cache_destroy(ext4_free_ext_cachep); |
| 2871 | remove_proc_entry("fs/ext4", NULL); | ||
| 2872 | #endif | ||
| 2873 | } | 2785 | } |
| 2874 | 2786 | ||
| 2875 | 2787 | ||
| @@ -2879,7 +2791,7 @@ void exit_ext4_mballoc(void) | |||
| 2879 | */ | 2791 | */ |
| 2880 | static noinline_for_stack int | 2792 | static noinline_for_stack int |
| 2881 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | 2793 | ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, |
| 2882 | handle_t *handle) | 2794 | handle_t *handle, unsigned long reserv_blks) |
| 2883 | { | 2795 | { |
| 2884 | struct buffer_head *bitmap_bh = NULL; | 2796 | struct buffer_head *bitmap_bh = NULL; |
| 2885 | struct ext4_super_block *es; | 2797 | struct ext4_super_block *es; |
| @@ -2968,15 +2880,16 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, | |||
| 2968 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); | 2880 | le16_add_cpu(&gdp->bg_free_blocks_count, -ac->ac_b_ex.fe_len); |
| 2969 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); | 2881 | gdp->bg_checksum = ext4_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp); |
| 2970 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); | 2882 | spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); |
| 2971 | 2883 | percpu_counter_sub(&sbi->s_freeblocks_counter, ac->ac_b_ex.fe_len); | |
| 2972 | /* | 2884 | /* |
| 2973 | * free blocks account has already be reduced/reserved | 2885 | * Now reduce the dirty block count also. Should not go negative |
| 2974 | * at write_begin() time for delayed allocation | ||
| 2975 | * do not double accounting | ||
| 2976 | */ | 2886 | */ |
| 2977 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) | 2887 | if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED)) |
| 2978 | percpu_counter_sub(&sbi->s_freeblocks_counter, | 2888 | /* release all the reserved blocks if non delalloc */ |
| 2979 | ac->ac_b_ex.fe_len); | 2889 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks); |
| 2890 | else | ||
| 2891 | percpu_counter_sub(&sbi->s_dirtyblocks_counter, | ||
| 2892 | ac->ac_b_ex.fe_len); | ||
| 2980 | 2893 | ||
| 2981 | if (sbi->s_log_groups_per_flex) { | 2894 | if (sbi->s_log_groups_per_flex) { |
| 2982 | ext4_group_t flex_group = ext4_flex_group(sbi, | 2895 | ext4_group_t flex_group = ext4_flex_group(sbi, |
| @@ -3884,7 +3797,7 @@ out: | |||
| 3884 | * | 3797 | * |
| 3885 | * FIXME!! Make sure it is valid at all the call sites | 3798 | * FIXME!! Make sure it is valid at all the call sites |
| 3886 | */ | 3799 | */ |
| 3887 | void ext4_mb_discard_inode_preallocations(struct inode *inode) | 3800 | void ext4_discard_preallocations(struct inode *inode) |
| 3888 | { | 3801 | { |
| 3889 | struct ext4_inode_info *ei = EXT4_I(inode); | 3802 | struct ext4_inode_info *ei = EXT4_I(inode); |
| 3890 | struct super_block *sb = inode->i_sb; | 3803 | struct super_block *sb = inode->i_sb; |
| @@ -3896,7 +3809,7 @@ void ext4_mb_discard_inode_preallocations(struct inode *inode) | |||
| 3896 | struct ext4_buddy e4b; | 3809 | struct ext4_buddy e4b; |
| 3897 | int err; | 3810 | int err; |
| 3898 | 3811 | ||
| 3899 | if (!test_opt(sb, MBALLOC) || !S_ISREG(inode->i_mode)) { | 3812 | if (!S_ISREG(inode->i_mode)) { |
| 3900 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ | 3813 | /*BUG_ON(!list_empty(&ei->i_prealloc_list));*/ |
| 3901 | return; | 3814 | return; |
| 3902 | } | 3815 | } |
| @@ -4094,8 +4007,7 @@ static void ext4_mb_group_or_file(struct ext4_allocation_context *ac) | |||
| 4094 | * per cpu locality group is to reduce the contention between block | 4007 | * per cpu locality group is to reduce the contention between block |
| 4095 | * request from multiple CPUs. | 4008 | * request from multiple CPUs. |
| 4096 | */ | 4009 | */ |
| 4097 | ac->ac_lg = &sbi->s_locality_groups[get_cpu()]; | 4010 | ac->ac_lg = per_cpu_ptr(sbi->s_locality_groups, raw_smp_processor_id()); |
| 4098 | put_cpu(); | ||
| 4099 | 4011 | ||
| 4100 | /* we're going to use group allocation */ | 4012 | /* we're going to use group allocation */ |
| 4101 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; | 4013 | ac->ac_flags |= EXT4_MB_HINT_GROUP_ALLOC; |
| @@ -4369,33 +4281,32 @@ static int ext4_mb_discard_preallocations(struct super_block *sb, int needed) | |||
| 4369 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | 4281 | ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, |
| 4370 | struct ext4_allocation_request *ar, int *errp) | 4282 | struct ext4_allocation_request *ar, int *errp) |
| 4371 | { | 4283 | { |
| 4284 | int freed; | ||
| 4372 | struct ext4_allocation_context *ac = NULL; | 4285 | struct ext4_allocation_context *ac = NULL; |
| 4373 | struct ext4_sb_info *sbi; | 4286 | struct ext4_sb_info *sbi; |
| 4374 | struct super_block *sb; | 4287 | struct super_block *sb; |
| 4375 | ext4_fsblk_t block = 0; | 4288 | ext4_fsblk_t block = 0; |
| 4376 | int freed; | 4289 | unsigned long inquota; |
| 4377 | int inquota; | 4290 | unsigned long reserv_blks = 0; |
| 4378 | 4291 | ||
| 4379 | sb = ar->inode->i_sb; | 4292 | sb = ar->inode->i_sb; |
| 4380 | sbi = EXT4_SB(sb); | 4293 | sbi = EXT4_SB(sb); |
| 4381 | 4294 | ||
| 4382 | if (!test_opt(sb, MBALLOC)) { | ||
| 4383 | block = ext4_old_new_blocks(handle, ar->inode, ar->goal, | ||
| 4384 | &(ar->len), errp); | ||
| 4385 | return block; | ||
| 4386 | } | ||
| 4387 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { | 4295 | if (!EXT4_I(ar->inode)->i_delalloc_reserved_flag) { |
| 4388 | /* | 4296 | /* |
| 4389 | * With delalloc we already reserved the blocks | 4297 | * With delalloc we already reserved the blocks |
| 4390 | */ | 4298 | */ |
| 4391 | ar->len = ext4_has_free_blocks(sbi, ar->len); | 4299 | while (ar->len && ext4_claim_free_blocks(sbi, ar->len)) { |
| 4392 | } | 4300 | /* let others to free the space */ |
| 4393 | 4301 | yield(); | |
| 4394 | if (ar->len == 0) { | 4302 | ar->len = ar->len >> 1; |
| 4395 | *errp = -ENOSPC; | 4303 | } |
| 4396 | return 0; | 4304 | if (!ar->len) { |
| 4305 | *errp = -ENOSPC; | ||
| 4306 | return 0; | ||
| 4307 | } | ||
| 4308 | reserv_blks = ar->len; | ||
| 4397 | } | 4309 | } |
| 4398 | |||
| 4399 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { | 4310 | while (ar->len && DQUOT_ALLOC_BLOCK(ar->inode, ar->len)) { |
| 4400 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; | 4311 | ar->flags |= EXT4_MB_HINT_NOPREALLOC; |
| 4401 | ar->len--; | 4312 | ar->len--; |
| @@ -4416,8 +4327,6 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle, | |||
| 4416 | goto out1; | 4327 | goto out1; |
| 4417 | } | 4328 | } |
| 4418 | 4329 | ||
| 4419 | ext4_mb_poll_new_transaction(sb, handle); | ||
| 4420 | |||
| 4421 | *errp = ext4_mb_initialize_context(ac, ar); | 4330 | *errp = ext4_mb_initialize_context(ac, ar); |
| 4422 | if (*errp) { | 4331 | if (*errp) { |
| 4423 | ar->len = 0; | 4332 | ar->len = 0; |
| @@ -4441,7 +4350,7 @@ repeat: | |||
| 4441 | } | 4350 | } |
| 4442 | 4351 | ||
| 4443 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { | 4352 | if (likely(ac->ac_status == AC_STATUS_FOUND)) { |
| 4444 | *errp = ext4_mb_mark_diskspace_used(ac, handle); | 4353 | *errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_blks); |
| 4445 | if (*errp == -EAGAIN) { | 4354 | if (*errp == -EAGAIN) { |
| 4446 | ac->ac_b_ex.fe_group = 0; | 4355 | ac->ac_b_ex.fe_group = 0; |
| 4447 | ac->ac_b_ex.fe_start = 0; | 4356 | ac->ac_b_ex.fe_start = 0; |
| @@ -4476,35 +4385,20 @@ out1: | |||
| 4476 | 4385 | ||
| 4477 | return block; | 4386 | return block; |
| 4478 | } | 4387 | } |
| 4479 | static void ext4_mb_poll_new_transaction(struct super_block *sb, | ||
| 4480 | handle_t *handle) | ||
| 4481 | { | ||
| 4482 | struct ext4_sb_info *sbi = EXT4_SB(sb); | ||
| 4483 | 4388 | ||
| 4484 | if (sbi->s_last_transaction == handle->h_transaction->t_tid) | 4389 | /* |
| 4485 | return; | 4390 | * We can merge two free data extents only if the physical blocks |
| 4486 | 4391 | * are contiguous, AND the extents were freed by the same transaction, | |
| 4487 | /* new transaction! time to close last one and free blocks for | 4392 | * AND the blocks are associated with the same group. |
| 4488 | * committed transaction. we know that only transaction can be | 4393 | */ |
| 4489 | * active, so previos transaction can be being logged and we | 4394 | static int can_merge(struct ext4_free_data *entry1, |
| 4490 | * know that transaction before previous is known to be already | 4395 | struct ext4_free_data *entry2) |
| 4491 | * logged. this means that now we may free blocks freed in all | 4396 | { |
| 4492 | * transactions before previous one. hope I'm clear enough ... */ | 4397 | if ((entry1->t_tid == entry2->t_tid) && |
| 4493 | 4398 | (entry1->group == entry2->group) && | |
| 4494 | spin_lock(&sbi->s_md_lock); | 4399 | ((entry1->start_blk + entry1->count) == entry2->start_blk)) |
| 4495 | if (sbi->s_last_transaction != handle->h_transaction->t_tid) { | 4400 | return 1; |
| 4496 | mb_debug("new transaction %lu, old %lu\n", | 4401 | return 0; |
| 4497 | (unsigned long) handle->h_transaction->t_tid, | ||
| 4498 | (unsigned long) sbi->s_last_transaction); | ||
| 4499 | list_splice_init(&sbi->s_closed_transaction, | ||
| 4500 | &sbi->s_committed_transaction); | ||
| 4501 | list_splice_init(&sbi->s_active_transaction, | ||
| 4502 | &sbi->s_closed_transaction); | ||
| 4503 | sbi->s_last_transaction = handle->h_transaction->t_tid; | ||
| 4504 | } | ||
| 4505 | spin_unlock(&sbi->s_md_lock); | ||
| 4506 | |||
| 4507 | ext4_mb_free_committed_blocks(sb); | ||
| 4508 | } | 4402 | } |
| 4509 | 4403 | ||
| 4510 | static noinline_for_stack int | 4404 | static noinline_for_stack int |
| @@ -4514,57 +4408,80 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b, | |||
| 4514 | struct ext4_group_info *db = e4b->bd_info; | 4408 | struct ext4_group_info *db = e4b->bd_info; |
| 4515 | struct super_block *sb = e4b->bd_sb; | 4409 | struct super_block *sb = e4b->bd_sb; |
| 4516 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 4410 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 4517 | struct ext4_free_metadata *md; | 4411 | struct ext4_free_data *entry, *new_entry; |
| 4518 | int i; | 4412 | struct rb_node **n = &db->bb_free_root.rb_node, *node; |
| 4413 | struct rb_node *parent = NULL, *new_node; | ||
| 4414 | |||
| 4519 | 4415 | ||
| 4520 | BUG_ON(e4b->bd_bitmap_page == NULL); | 4416 | BUG_ON(e4b->bd_bitmap_page == NULL); |
| 4521 | BUG_ON(e4b->bd_buddy_page == NULL); | 4417 | BUG_ON(e4b->bd_buddy_page == NULL); |
| 4522 | 4418 | ||
| 4419 | new_entry = kmem_cache_alloc(ext4_free_ext_cachep, GFP_NOFS); | ||
| 4420 | new_entry->start_blk = block; | ||
| 4421 | new_entry->group = group; | ||
| 4422 | new_entry->count = count; | ||
| 4423 | new_entry->t_tid = handle->h_transaction->t_tid; | ||
| 4424 | new_node = &new_entry->node; | ||
| 4425 | |||
| 4523 | ext4_lock_group(sb, group); | 4426 | ext4_lock_group(sb, group); |
| 4524 | for (i = 0; i < count; i++) { | 4427 | if (!*n) { |
| 4525 | md = db->bb_md_cur; | 4428 | /* first free block exent. We need to |
| 4526 | if (md && db->bb_tid != handle->h_transaction->t_tid) { | 4429 | protect buddy cache from being freed, |
| 4527 | db->bb_md_cur = NULL; | 4430 | * otherwise we'll refresh it from |
| 4528 | md = NULL; | 4431 | * on-disk bitmap and lose not-yet-available |
| 4432 | * blocks */ | ||
| 4433 | page_cache_get(e4b->bd_buddy_page); | ||
| 4434 | page_cache_get(e4b->bd_bitmap_page); | ||
| 4435 | } | ||
| 4436 | while (*n) { | ||
| 4437 | parent = *n; | ||
| 4438 | entry = rb_entry(parent, struct ext4_free_data, node); | ||
| 4439 | if (block < entry->start_blk) | ||
| 4440 | n = &(*n)->rb_left; | ||
| 4441 | else if (block >= (entry->start_blk + entry->count)) | ||
| 4442 | n = &(*n)->rb_right; | ||
| 4443 | else { | ||
| 4444 | ext4_error(sb, __func__, | ||
| 4445 | "Double free of blocks %d (%d %d)\n", | ||
| 4446 | block, entry->start_blk, entry->count); | ||
| 4447 | return 0; | ||
| 4529 | } | 4448 | } |
| 4449 | } | ||
| 4530 | 4450 | ||
| 4531 | if (md == NULL) { | 4451 | rb_link_node(new_node, parent, n); |
| 4532 | ext4_unlock_group(sb, group); | 4452 | rb_insert_color(new_node, &db->bb_free_root); |
| 4533 | md = kmalloc(sizeof(*md), GFP_NOFS); | 4453 | |
| 4534 | if (md == NULL) | 4454 | /* Now try to see the extent can be merged to left and right */ |
| 4535 | return -ENOMEM; | 4455 | node = rb_prev(new_node); |
| 4536 | md->num = 0; | 4456 | if (node) { |
| 4537 | md->group = group; | 4457 | entry = rb_entry(node, struct ext4_free_data, node); |
| 4538 | 4458 | if (can_merge(entry, new_entry)) { | |
| 4539 | ext4_lock_group(sb, group); | 4459 | new_entry->start_blk = entry->start_blk; |
| 4540 | if (db->bb_md_cur == NULL) { | 4460 | new_entry->count += entry->count; |
| 4541 | spin_lock(&sbi->s_md_lock); | 4461 | rb_erase(node, &(db->bb_free_root)); |
| 4542 | list_add(&md->list, &sbi->s_active_transaction); | 4462 | spin_lock(&sbi->s_md_lock); |
| 4543 | spin_unlock(&sbi->s_md_lock); | 4463 | list_del(&entry->list); |
| 4544 | /* protect buddy cache from being freed, | 4464 | spin_unlock(&sbi->s_md_lock); |
| 4545 | * otherwise we'll refresh it from | 4465 | kmem_cache_free(ext4_free_ext_cachep, entry); |
| 4546 | * on-disk bitmap and lose not-yet-available | ||
| 4547 | * blocks */ | ||
| 4548 | page_cache_get(e4b->bd_buddy_page); | ||
| 4549 | page_cache_get(e4b->bd_bitmap_page); | ||
| 4550 | db->bb_md_cur = md; | ||
| 4551 | db->bb_tid = handle->h_transaction->t_tid; | ||
| 4552 | mb_debug("new md 0x%p for group %lu\n", | ||
| 4553 | md, md->group); | ||
| 4554 | } else { | ||
| 4555 | kfree(md); | ||
| 4556 | md = db->bb_md_cur; | ||
| 4557 | } | ||
| 4558 | } | 4466 | } |
| 4467 | } | ||
| 4559 | 4468 | ||
| 4560 | BUG_ON(md->num >= EXT4_BB_MAX_BLOCKS); | 4469 | node = rb_next(new_node); |
| 4561 | md->blocks[md->num] = block + i; | 4470 | if (node) { |
| 4562 | md->num++; | 4471 | entry = rb_entry(node, struct ext4_free_data, node); |
| 4563 | if (md->num == EXT4_BB_MAX_BLOCKS) { | 4472 | if (can_merge(new_entry, entry)) { |
| 4564 | /* no more space, put full container on a sb's list */ | 4473 | new_entry->count += entry->count; |
| 4565 | db->bb_md_cur = NULL; | 4474 | rb_erase(node, &(db->bb_free_root)); |
| 4475 | spin_lock(&sbi->s_md_lock); | ||
| 4476 | list_del(&entry->list); | ||
| 4477 | spin_unlock(&sbi->s_md_lock); | ||
| 4478 | kmem_cache_free(ext4_free_ext_cachep, entry); | ||
| 4566 | } | 4479 | } |
| 4567 | } | 4480 | } |
| 4481 | /* Add the extent to transaction's private list */ | ||
| 4482 | spin_lock(&sbi->s_md_lock); | ||
| 4483 | list_add(&new_entry->list, &handle->h_transaction->t_private_list); | ||
| 4484 | spin_unlock(&sbi->s_md_lock); | ||
| 4568 | ext4_unlock_group(sb, group); | 4485 | ext4_unlock_group(sb, group); |
| 4569 | return 0; | 4486 | return 0; |
| 4570 | } | 4487 | } |
| @@ -4592,8 +4509,6 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, | |||
| 4592 | 4509 | ||
| 4593 | *freed = 0; | 4510 | *freed = 0; |
| 4594 | 4511 | ||
| 4595 | ext4_mb_poll_new_transaction(sb, handle); | ||
| 4596 | |||
| 4597 | sbi = EXT4_SB(sb); | 4512 | sbi = EXT4_SB(sb); |
| 4598 | es = EXT4_SB(sb)->s_es; | 4513 | es = EXT4_SB(sb)->s_es; |
| 4599 | if (block < le32_to_cpu(es->s_first_data_block) || | 4514 | if (block < le32_to_cpu(es->s_first_data_block) || |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index c7c9906c2a75..b5dff1fff1e5 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
| @@ -18,6 +18,8 @@ | |||
| 18 | #include <linux/pagemap.h> | 18 | #include <linux/pagemap.h> |
| 19 | #include <linux/seq_file.h> | 19 | #include <linux/seq_file.h> |
| 20 | #include <linux/version.h> | 20 | #include <linux/version.h> |
| 21 | #include <linux/blkdev.h> | ||
| 22 | #include <linux/marker.h> | ||
| 21 | #include "ext4_jbd2.h" | 23 | #include "ext4_jbd2.h" |
| 22 | #include "ext4.h" | 24 | #include "ext4.h" |
| 23 | #include "group.h" | 25 | #include "group.h" |
| @@ -98,23 +100,29 @@ | |||
| 98 | 100 | ||
| 99 | static struct kmem_cache *ext4_pspace_cachep; | 101 | static struct kmem_cache *ext4_pspace_cachep; |
| 100 | static struct kmem_cache *ext4_ac_cachep; | 102 | static struct kmem_cache *ext4_ac_cachep; |
| 103 | static struct kmem_cache *ext4_free_ext_cachep; | ||
| 101 | 104 | ||
| 102 | #ifdef EXT4_BB_MAX_BLOCKS | 105 | struct ext4_free_data { |
| 103 | #undef EXT4_BB_MAX_BLOCKS | 106 | /* this links the free block information from group_info */ |
| 104 | #endif | 107 | struct rb_node node; |
| 105 | #define EXT4_BB_MAX_BLOCKS 30 | ||
| 106 | 108 | ||
| 107 | struct ext4_free_metadata { | 109 | /* this links the free block information from ext4_sb_info */ |
| 108 | ext4_group_t group; | ||
| 109 | unsigned short num; | ||
| 110 | ext4_grpblk_t blocks[EXT4_BB_MAX_BLOCKS]; | ||
| 111 | struct list_head list; | 110 | struct list_head list; |
| 111 | |||
| 112 | /* group which free block extent belongs */ | ||
| 113 | ext4_group_t group; | ||
| 114 | |||
| 115 | /* free block extent */ | ||
| 116 | ext4_grpblk_t start_blk; | ||
| 117 | ext4_grpblk_t count; | ||
| 118 | |||
| 119 | /* transaction which freed this extent */ | ||
| 120 | tid_t t_tid; | ||
| 112 | }; | 121 | }; |
| 113 | 122 | ||
| 114 | struct ext4_group_info { | 123 | struct ext4_group_info { |
| 115 | unsigned long bb_state; | 124 | unsigned long bb_state; |
| 116 | unsigned long bb_tid; | 125 | struct rb_root bb_free_root; |
| 117 | struct ext4_free_metadata *bb_md_cur; | ||
| 118 | unsigned short bb_first_free; | 126 | unsigned short bb_first_free; |
| 119 | unsigned short bb_free; | 127 | unsigned short bb_free; |
| 120 | unsigned short bb_fragments; | 128 | unsigned short bb_fragments; |
| @@ -257,13 +265,10 @@ static void ext4_mb_store_history(struct ext4_allocation_context *ac); | |||
| 257 | 265 | ||
| 258 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) | 266 | #define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1) |
| 259 | 267 | ||
| 260 | static struct proc_dir_entry *proc_root_ext4; | ||
| 261 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); | 268 | struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t); |
| 262 | 269 | ||
| 263 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, | 270 | static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, |
| 264 | ext4_group_t group); | 271 | ext4_group_t group); |
| 265 | static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *); | ||
| 266 | static void ext4_mb_free_committed_blocks(struct super_block *); | ||
| 267 | static void ext4_mb_return_to_preallocation(struct inode *inode, | 272 | static void ext4_mb_return_to_preallocation(struct inode *inode, |
| 268 | struct ext4_buddy *e4b, sector_t block, | 273 | struct ext4_buddy *e4b, sector_t block, |
| 269 | int count); | 274 | int count); |
| @@ -271,6 +276,7 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *, | |||
| 271 | struct super_block *, struct ext4_prealloc_space *pa); | 276 | struct super_block *, struct ext4_prealloc_space *pa); |
| 272 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); | 277 | static int ext4_mb_init_per_dev_proc(struct super_block *sb); |
| 273 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); | 278 | static int ext4_mb_destroy_per_dev_proc(struct super_block *sb); |
| 279 | static void release_blocks_on_commit(journal_t *journal, transaction_t *txn); | ||
| 274 | 280 | ||
| 275 | 281 | ||
| 276 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) | 282 | static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group) |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index 46fc0b5b12ba..f2a9cf498ecd 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
| @@ -447,8 +447,7 @@ static int free_ext_block(handle_t *handle, struct inode *inode) | |||
| 447 | 447 | ||
| 448 | } | 448 | } |
| 449 | 449 | ||
| 450 | int ext4_ext_migrate(struct inode *inode, struct file *filp, | 450 | int ext4_ext_migrate(struct inode *inode) |
| 451 | unsigned int cmd, unsigned long arg) | ||
| 452 | { | 451 | { |
| 453 | handle_t *handle; | 452 | handle_t *handle; |
| 454 | int retval = 0, i; | 453 | int retval = 0, i; |
| @@ -516,12 +515,6 @@ int ext4_ext_migrate(struct inode *inode, struct file *filp, | |||
| 516 | * when we add extents we extent the journal | 515 | * when we add extents we extent the journal |
| 517 | */ | 516 | */ |
| 518 | /* | 517 | /* |
| 519 | * inode_mutex prevent write and truncate on the file. Read still goes | ||
| 520 | * through. We take i_data_sem in ext4_ext_swap_inode_data before we | ||
| 521 | * switch the inode format to prevent read. | ||
| 522 | */ | ||
| 523 | mutex_lock(&(inode->i_mutex)); | ||
| 524 | /* | ||
| 525 | * Even though we take i_mutex we can still cause block allocation | 518 | * Even though we take i_mutex we can still cause block allocation |
| 526 | * via mmap write to holes. If we have allocated new blocks we fail | 519 | * via mmap write to holes. If we have allocated new blocks we fail |
| 527 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. | 520 | * migrate. New block allocation will clear EXT4_EXT_MIGRATE flag. |
| @@ -623,7 +616,6 @@ err_out: | |||
| 623 | tmp_inode->i_nlink = 0; | 616 | tmp_inode->i_nlink = 0; |
| 624 | 617 | ||
| 625 | ext4_journal_stop(handle); | 618 | ext4_journal_stop(handle); |
| 626 | mutex_unlock(&(inode->i_mutex)); | ||
| 627 | 619 | ||
| 628 | if (tmp_inode) | 620 | if (tmp_inode) |
| 629 | iput(tmp_inode); | 621 | iput(tmp_inode); |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 387ad98350c3..92db9e945147 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
| @@ -151,34 +151,36 @@ struct dx_map_entry | |||
| 151 | 151 | ||
| 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); | 152 | static inline ext4_lblk_t dx_get_block(struct dx_entry *entry); |
| 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); | 153 | static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value); |
| 154 | static inline unsigned dx_get_hash (struct dx_entry *entry); | 154 | static inline unsigned dx_get_hash(struct dx_entry *entry); |
| 155 | static void dx_set_hash (struct dx_entry *entry, unsigned value); | 155 | static void dx_set_hash(struct dx_entry *entry, unsigned value); |
| 156 | static unsigned dx_get_count (struct dx_entry *entries); | 156 | static unsigned dx_get_count(struct dx_entry *entries); |
| 157 | static unsigned dx_get_limit (struct dx_entry *entries); | 157 | static unsigned dx_get_limit(struct dx_entry *entries); |
| 158 | static void dx_set_count (struct dx_entry *entries, unsigned value); | 158 | static void dx_set_count(struct dx_entry *entries, unsigned value); |
| 159 | static void dx_set_limit (struct dx_entry *entries, unsigned value); | 159 | static void dx_set_limit(struct dx_entry *entries, unsigned value); |
| 160 | static unsigned dx_root_limit (struct inode *dir, unsigned infosize); | 160 | static unsigned dx_root_limit(struct inode *dir, unsigned infosize); |
| 161 | static unsigned dx_node_limit (struct inode *dir); | 161 | static unsigned dx_node_limit(struct inode *dir); |
| 162 | static struct dx_frame *dx_probe(struct dentry *dentry, | 162 | static struct dx_frame *dx_probe(const struct qstr *d_name, |
| 163 | struct inode *dir, | 163 | struct inode *dir, |
| 164 | struct dx_hash_info *hinfo, | 164 | struct dx_hash_info *hinfo, |
| 165 | struct dx_frame *frame, | 165 | struct dx_frame *frame, |
| 166 | int *err); | 166 | int *err); |
| 167 | static void dx_release (struct dx_frame *frames); | 167 | static void dx_release(struct dx_frame *frames); |
| 168 | static int dx_make_map (struct ext4_dir_entry_2 *de, int size, | 168 | static int dx_make_map(struct ext4_dir_entry_2 *de, int size, |
| 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); | 169 | struct dx_hash_info *hinfo, struct dx_map_entry map[]); |
| 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); | 170 | static void dx_sort_map(struct dx_map_entry *map, unsigned count); |
| 171 | static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to, | 171 | static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to, |
| 172 | struct dx_map_entry *offsets, int count); | 172 | struct dx_map_entry *offsets, int count); |
| 173 | static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size); | 173 | static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size); |
| 174 | static void dx_insert_block(struct dx_frame *frame, | 174 | static void dx_insert_block(struct dx_frame *frame, |
| 175 | u32 hash, ext4_lblk_t block); | 175 | u32 hash, ext4_lblk_t block); |
| 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, | 176 | static int ext4_htree_next_block(struct inode *dir, __u32 hash, |
| 177 | struct dx_frame *frame, | 177 | struct dx_frame *frame, |
| 178 | struct dx_frame *frames, | 178 | struct dx_frame *frames, |
| 179 | __u32 *start_hash); | 179 | __u32 *start_hash); |
| 180 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 180 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, |
| 181 | struct ext4_dir_entry_2 **res_dir, int *err); | 181 | const struct qstr *d_name, |
| 182 | struct ext4_dir_entry_2 **res_dir, | ||
| 183 | int *err); | ||
| 182 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | 184 | static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, |
| 183 | struct inode *inode); | 185 | struct inode *inode); |
| 184 | 186 | ||
| @@ -207,44 +209,44 @@ static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value) | |||
| 207 | entry->block = cpu_to_le32(value); | 209 | entry->block = cpu_to_le32(value); |
| 208 | } | 210 | } |
| 209 | 211 | ||
| 210 | static inline unsigned dx_get_hash (struct dx_entry *entry) | 212 | static inline unsigned dx_get_hash(struct dx_entry *entry) |
| 211 | { | 213 | { |
| 212 | return le32_to_cpu(entry->hash); | 214 | return le32_to_cpu(entry->hash); |
| 213 | } | 215 | } |
| 214 | 216 | ||
| 215 | static inline void dx_set_hash (struct dx_entry *entry, unsigned value) | 217 | static inline void dx_set_hash(struct dx_entry *entry, unsigned value) |
| 216 | { | 218 | { |
| 217 | entry->hash = cpu_to_le32(value); | 219 | entry->hash = cpu_to_le32(value); |
| 218 | } | 220 | } |
| 219 | 221 | ||
| 220 | static inline unsigned dx_get_count (struct dx_entry *entries) | 222 | static inline unsigned dx_get_count(struct dx_entry *entries) |
| 221 | { | 223 | { |
| 222 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); | 224 | return le16_to_cpu(((struct dx_countlimit *) entries)->count); |
| 223 | } | 225 | } |
| 224 | 226 | ||
| 225 | static inline unsigned dx_get_limit (struct dx_entry *entries) | 227 | static inline unsigned dx_get_limit(struct dx_entry *entries) |
| 226 | { | 228 | { |
| 227 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); | 229 | return le16_to_cpu(((struct dx_countlimit *) entries)->limit); |
| 228 | } | 230 | } |
| 229 | 231 | ||
| 230 | static inline void dx_set_count (struct dx_entry *entries, unsigned value) | 232 | static inline void dx_set_count(struct dx_entry *entries, unsigned value) |
| 231 | { | 233 | { |
| 232 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); | 234 | ((struct dx_countlimit *) entries)->count = cpu_to_le16(value); |
| 233 | } | 235 | } |
| 234 | 236 | ||
| 235 | static inline void dx_set_limit (struct dx_entry *entries, unsigned value) | 237 | static inline void dx_set_limit(struct dx_entry *entries, unsigned value) |
| 236 | { | 238 | { |
| 237 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); | 239 | ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value); |
| 238 | } | 240 | } |
| 239 | 241 | ||
| 240 | static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize) | 242 | static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize) |
| 241 | { | 243 | { |
| 242 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - | 244 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - |
| 243 | EXT4_DIR_REC_LEN(2) - infosize; | 245 | EXT4_DIR_REC_LEN(2) - infosize; |
| 244 | return entry_space / sizeof(struct dx_entry); | 246 | return entry_space / sizeof(struct dx_entry); |
| 245 | } | 247 | } |
| 246 | 248 | ||
| 247 | static inline unsigned dx_node_limit (struct inode *dir) | 249 | static inline unsigned dx_node_limit(struct inode *dir) |
| 248 | { | 250 | { |
| 249 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); | 251 | unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); |
| 250 | return entry_space / sizeof(struct dx_entry); | 252 | return entry_space / sizeof(struct dx_entry); |
| @@ -254,12 +256,12 @@ static inline unsigned dx_node_limit (struct inode *dir) | |||
| 254 | * Debug | 256 | * Debug |
| 255 | */ | 257 | */ |
| 256 | #ifdef DX_DEBUG | 258 | #ifdef DX_DEBUG |
| 257 | static void dx_show_index (char * label, struct dx_entry *entries) | 259 | static void dx_show_index(char * label, struct dx_entry *entries) |
| 258 | { | 260 | { |
| 259 | int i, n = dx_get_count (entries); | 261 | int i, n = dx_get_count (entries); |
| 260 | printk("%s index ", label); | 262 | printk(KERN_DEBUG "%s index ", label); |
| 261 | for (i = 0; i < n; i++) { | 263 | for (i = 0; i < n; i++) { |
| 262 | printk("%x->%lu ", i? dx_get_hash(entries + i) : | 264 | printk("%x->%lu ", i ? dx_get_hash(entries + i) : |
| 263 | 0, (unsigned long)dx_get_block(entries + i)); | 265 | 0, (unsigned long)dx_get_block(entries + i)); |
| 264 | } | 266 | } |
| 265 | printk("\n"); | 267 | printk("\n"); |
| @@ -306,7 +308,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
| 306 | struct dx_entry *entries, int levels) | 308 | struct dx_entry *entries, int levels) |
| 307 | { | 309 | { |
| 308 | unsigned blocksize = dir->i_sb->s_blocksize; | 310 | unsigned blocksize = dir->i_sb->s_blocksize; |
| 309 | unsigned count = dx_get_count (entries), names = 0, space = 0, i; | 311 | unsigned count = dx_get_count(entries), names = 0, space = 0, i; |
| 310 | unsigned bcount = 0; | 312 | unsigned bcount = 0; |
| 311 | struct buffer_head *bh; | 313 | struct buffer_head *bh; |
| 312 | int err; | 314 | int err; |
| @@ -325,11 +327,12 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
| 325 | names += stats.names; | 327 | names += stats.names; |
| 326 | space += stats.space; | 328 | space += stats.space; |
| 327 | bcount += stats.bcount; | 329 | bcount += stats.bcount; |
| 328 | brelse (bh); | 330 | brelse(bh); |
| 329 | } | 331 | } |
| 330 | if (bcount) | 332 | if (bcount) |
| 331 | printk("%snames %u, fullness %u (%u%%)\n", levels?"":" ", | 333 | printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n", |
| 332 | names, space/bcount,(space/bcount)*100/blocksize); | 334 | levels ? "" : " ", names, space/bcount, |
| 335 | (space/bcount)*100/blocksize); | ||
| 333 | return (struct stats) { names, space, bcount}; | 336 | return (struct stats) { names, space, bcount}; |
| 334 | } | 337 | } |
| 335 | #endif /* DX_DEBUG */ | 338 | #endif /* DX_DEBUG */ |
| @@ -344,7 +347,7 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir, | |||
| 344 | * back to userspace. | 347 | * back to userspace. |
| 345 | */ | 348 | */ |
| 346 | static struct dx_frame * | 349 | static struct dx_frame * |
| 347 | dx_probe(struct dentry *dentry, struct inode *dir, | 350 | dx_probe(const struct qstr *d_name, struct inode *dir, |
| 348 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) | 351 | struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) |
| 349 | { | 352 | { |
| 350 | unsigned count, indirect; | 353 | unsigned count, indirect; |
| @@ -355,8 +358,6 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
| 355 | u32 hash; | 358 | u32 hash; |
| 356 | 359 | ||
| 357 | frame->bh = NULL; | 360 | frame->bh = NULL; |
| 358 | if (dentry) | ||
| 359 | dir = dentry->d_parent->d_inode; | ||
| 360 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) | 361 | if (!(bh = ext4_bread (NULL,dir, 0, 0, err))) |
| 361 | goto fail; | 362 | goto fail; |
| 362 | root = (struct dx_root *) bh->b_data; | 363 | root = (struct dx_root *) bh->b_data; |
| @@ -372,8 +373,8 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
| 372 | } | 373 | } |
| 373 | hinfo->hash_version = root->info.hash_version; | 374 | hinfo->hash_version = root->info.hash_version; |
| 374 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; | 375 | hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed; |
| 375 | if (dentry) | 376 | if (d_name) |
| 376 | ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo); | 377 | ext4fs_dirhash(d_name->name, d_name->len, hinfo); |
| 377 | hash = hinfo->hash; | 378 | hash = hinfo->hash; |
| 378 | 379 | ||
| 379 | if (root->info.unused_flags & 1) { | 380 | if (root->info.unused_flags & 1) { |
| @@ -406,7 +407,7 @@ dx_probe(struct dentry *dentry, struct inode *dir, | |||
| 406 | goto fail; | 407 | goto fail; |
| 407 | } | 408 | } |
| 408 | 409 | ||
| 409 | dxtrace (printk("Look up %x", hash)); | 410 | dxtrace(printk("Look up %x", hash)); |
| 410 | while (1) | 411 | while (1) |
| 411 | { | 412 | { |
| 412 | count = dx_get_count(entries); | 413 | count = dx_get_count(entries); |
| @@ -555,7 +556,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash, | |||
| 555 | 0, &err))) | 556 | 0, &err))) |
| 556 | return err; /* Failure */ | 557 | return err; /* Failure */ |
| 557 | p++; | 558 | p++; |
| 558 | brelse (p->bh); | 559 | brelse(p->bh); |
| 559 | p->bh = bh; | 560 | p->bh = bh; |
| 560 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; | 561 | p->at = p->entries = ((struct dx_node *) bh->b_data)->entries; |
| 561 | } | 562 | } |
| @@ -593,7 +594,7 @@ static int htree_dirblock_to_tree(struct file *dir_file, | |||
| 593 | /* On error, skip the f_pos to the next block. */ | 594 | /* On error, skip the f_pos to the next block. */ |
| 594 | dir_file->f_pos = (dir_file->f_pos | | 595 | dir_file->f_pos = (dir_file->f_pos | |
| 595 | (dir->i_sb->s_blocksize - 1)) + 1; | 596 | (dir->i_sb->s_blocksize - 1)) + 1; |
| 596 | brelse (bh); | 597 | brelse(bh); |
| 597 | return count; | 598 | return count; |
| 598 | } | 599 | } |
| 599 | ext4fs_dirhash(de->name, de->name_len, hinfo); | 600 | ext4fs_dirhash(de->name, de->name_len, hinfo); |
| @@ -635,8 +636,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
| 635 | int ret, err; | 636 | int ret, err; |
| 636 | __u32 hashval; | 637 | __u32 hashval; |
| 637 | 638 | ||
| 638 | dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash, | 639 | dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n", |
| 639 | start_minor_hash)); | 640 | start_hash, start_minor_hash)); |
| 640 | dir = dir_file->f_path.dentry->d_inode; | 641 | dir = dir_file->f_path.dentry->d_inode; |
| 641 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { | 642 | if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) { |
| 642 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 643 | hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
| @@ -648,7 +649,7 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
| 648 | } | 649 | } |
| 649 | hinfo.hash = start_hash; | 650 | hinfo.hash = start_hash; |
| 650 | hinfo.minor_hash = 0; | 651 | hinfo.minor_hash = 0; |
| 651 | frame = dx_probe(NULL, dir_file->f_path.dentry->d_inode, &hinfo, frames, &err); | 652 | frame = dx_probe(NULL, dir, &hinfo, frames, &err); |
| 652 | if (!frame) | 653 | if (!frame) |
| 653 | return err; | 654 | return err; |
| 654 | 655 | ||
| @@ -694,8 +695,8 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash, | |||
| 694 | break; | 695 | break; |
| 695 | } | 696 | } |
| 696 | dx_release(frames); | 697 | dx_release(frames); |
| 697 | dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", | 698 | dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, " |
| 698 | count, *next_hash)); | 699 | "next hash: %x\n", count, *next_hash)); |
| 699 | return count; | 700 | return count; |
| 700 | errout: | 701 | errout: |
| 701 | dx_release(frames); | 702 | dx_release(frames); |
| @@ -802,17 +803,17 @@ static inline int ext4_match (int len, const char * const name, | |||
| 802 | /* | 803 | /* |
| 803 | * Returns 0 if not found, -1 on failure, and 1 on success | 804 | * Returns 0 if not found, -1 on failure, and 1 on success |
| 804 | */ | 805 | */ |
| 805 | static inline int search_dirblock(struct buffer_head * bh, | 806 | static inline int search_dirblock(struct buffer_head *bh, |
| 806 | struct inode *dir, | 807 | struct inode *dir, |
| 807 | struct dentry *dentry, | 808 | const struct qstr *d_name, |
| 808 | unsigned long offset, | 809 | unsigned long offset, |
| 809 | struct ext4_dir_entry_2 ** res_dir) | 810 | struct ext4_dir_entry_2 ** res_dir) |
| 810 | { | 811 | { |
| 811 | struct ext4_dir_entry_2 * de; | 812 | struct ext4_dir_entry_2 * de; |
| 812 | char * dlimit; | 813 | char * dlimit; |
| 813 | int de_len; | 814 | int de_len; |
| 814 | const char *name = dentry->d_name.name; | 815 | const char *name = d_name->name; |
| 815 | int namelen = dentry->d_name.len; | 816 | int namelen = d_name->len; |
| 816 | 817 | ||
| 817 | de = (struct ext4_dir_entry_2 *) bh->b_data; | 818 | de = (struct ext4_dir_entry_2 *) bh->b_data; |
| 818 | dlimit = bh->b_data + dir->i_sb->s_blocksize; | 819 | dlimit = bh->b_data + dir->i_sb->s_blocksize; |
| @@ -851,12 +852,13 @@ static inline int search_dirblock(struct buffer_head * bh, | |||
| 851 | * The returned buffer_head has ->b_count elevated. The caller is expected | 852 | * The returned buffer_head has ->b_count elevated. The caller is expected |
| 852 | * to brelse() it when appropriate. | 853 | * to brelse() it when appropriate. |
| 853 | */ | 854 | */ |
| 854 | static struct buffer_head * ext4_find_entry (struct dentry *dentry, | 855 | static struct buffer_head * ext4_find_entry (struct inode *dir, |
| 856 | const struct qstr *d_name, | ||
| 855 | struct ext4_dir_entry_2 ** res_dir) | 857 | struct ext4_dir_entry_2 ** res_dir) |
| 856 | { | 858 | { |
| 857 | struct super_block * sb; | 859 | struct super_block *sb; |
| 858 | struct buffer_head * bh_use[NAMEI_RA_SIZE]; | 860 | struct buffer_head *bh_use[NAMEI_RA_SIZE]; |
| 859 | struct buffer_head * bh, *ret = NULL; | 861 | struct buffer_head *bh, *ret = NULL; |
| 860 | ext4_lblk_t start, block, b; | 862 | ext4_lblk_t start, block, b; |
| 861 | int ra_max = 0; /* Number of bh's in the readahead | 863 | int ra_max = 0; /* Number of bh's in the readahead |
| 862 | buffer, bh_use[] */ | 864 | buffer, bh_use[] */ |
| @@ -865,16 +867,15 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
| 865 | int num = 0; | 867 | int num = 0; |
| 866 | ext4_lblk_t nblocks; | 868 | ext4_lblk_t nblocks; |
| 867 | int i, err; | 869 | int i, err; |
| 868 | struct inode *dir = dentry->d_parent->d_inode; | ||
| 869 | int namelen; | 870 | int namelen; |
| 870 | 871 | ||
| 871 | *res_dir = NULL; | 872 | *res_dir = NULL; |
| 872 | sb = dir->i_sb; | 873 | sb = dir->i_sb; |
| 873 | namelen = dentry->d_name.len; | 874 | namelen = d_name->len; |
| 874 | if (namelen > EXT4_NAME_LEN) | 875 | if (namelen > EXT4_NAME_LEN) |
| 875 | return NULL; | 876 | return NULL; |
| 876 | if (is_dx(dir)) { | 877 | if (is_dx(dir)) { |
| 877 | bh = ext4_dx_find_entry(dentry, res_dir, &err); | 878 | bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); |
| 878 | /* | 879 | /* |
| 879 | * On success, or if the error was file not found, | 880 | * On success, or if the error was file not found, |
| 880 | * return. Otherwise, fall back to doing a search the | 881 | * return. Otherwise, fall back to doing a search the |
| @@ -882,7 +883,8 @@ static struct buffer_head * ext4_find_entry (struct dentry *dentry, | |||
| 882 | */ | 883 | */ |
| 883 | if (bh || (err != ERR_BAD_DX_DIR)) | 884 | if (bh || (err != ERR_BAD_DX_DIR)) |
| 884 | return bh; | 885 | return bh; |
| 885 | dxtrace(printk("ext4_find_entry: dx failed, falling back\n")); | 886 | dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " |
| 887 | "falling back\n")); | ||
| 886 | } | 888 | } |
| 887 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); | 889 | nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); |
| 888 | start = EXT4_I(dir)->i_dir_start_lookup; | 890 | start = EXT4_I(dir)->i_dir_start_lookup; |
| @@ -926,7 +928,7 @@ restart: | |||
| 926 | brelse(bh); | 928 | brelse(bh); |
| 927 | goto next; | 929 | goto next; |
| 928 | } | 930 | } |
| 929 | i = search_dirblock(bh, dir, dentry, | 931 | i = search_dirblock(bh, dir, d_name, |
| 930 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); | 932 | block << EXT4_BLOCK_SIZE_BITS(sb), res_dir); |
| 931 | if (i == 1) { | 933 | if (i == 1) { |
| 932 | EXT4_I(dir)->i_dir_start_lookup = block; | 934 | EXT4_I(dir)->i_dir_start_lookup = block; |
| @@ -956,11 +958,11 @@ restart: | |||
| 956 | cleanup_and_exit: | 958 | cleanup_and_exit: |
| 957 | /* Clean up the read-ahead blocks */ | 959 | /* Clean up the read-ahead blocks */ |
| 958 | for (; ra_ptr < ra_max; ra_ptr++) | 960 | for (; ra_ptr < ra_max; ra_ptr++) |
| 959 | brelse (bh_use[ra_ptr]); | 961 | brelse(bh_use[ra_ptr]); |
| 960 | return ret; | 962 | return ret; |
| 961 | } | 963 | } |
| 962 | 964 | ||
| 963 | static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | 965 | static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, |
| 964 | struct ext4_dir_entry_2 **res_dir, int *err) | 966 | struct ext4_dir_entry_2 **res_dir, int *err) |
| 965 | { | 967 | { |
| 966 | struct super_block * sb; | 968 | struct super_block * sb; |
| @@ -971,14 +973,13 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
| 971 | struct buffer_head *bh; | 973 | struct buffer_head *bh; |
| 972 | ext4_lblk_t block; | 974 | ext4_lblk_t block; |
| 973 | int retval; | 975 | int retval; |
| 974 | int namelen = dentry->d_name.len; | 976 | int namelen = d_name->len; |
| 975 | const u8 *name = dentry->d_name.name; | 977 | const u8 *name = d_name->name; |
| 976 | struct inode *dir = dentry->d_parent->d_inode; | ||
| 977 | 978 | ||
| 978 | sb = dir->i_sb; | 979 | sb = dir->i_sb; |
| 979 | /* NFS may look up ".." - look at dx_root directory block */ | 980 | /* NFS may look up ".." - look at dx_root directory block */ |
| 980 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ | 981 | if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){ |
| 981 | if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err))) | 982 | if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) |
| 982 | return NULL; | 983 | return NULL; |
| 983 | } else { | 984 | } else { |
| 984 | frame = frames; | 985 | frame = frames; |
| @@ -1010,7 +1011,7 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
| 1010 | return bh; | 1011 | return bh; |
| 1011 | } | 1012 | } |
| 1012 | } | 1013 | } |
| 1013 | brelse (bh); | 1014 | brelse(bh); |
| 1014 | /* Check to see if we should continue to search */ | 1015 | /* Check to see if we should continue to search */ |
| 1015 | retval = ext4_htree_next_block(dir, hash, frame, | 1016 | retval = ext4_htree_next_block(dir, hash, frame, |
| 1016 | frames, NULL); | 1017 | frames, NULL); |
| @@ -1025,25 +1026,25 @@ static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry, | |||
| 1025 | 1026 | ||
| 1026 | *err = -ENOENT; | 1027 | *err = -ENOENT; |
| 1027 | errout: | 1028 | errout: |
| 1028 | dxtrace(printk("%s not found\n", name)); | 1029 | dxtrace(printk(KERN_DEBUG "%s not found\n", name)); |
| 1029 | dx_release (frames); | 1030 | dx_release (frames); |
| 1030 | return NULL; | 1031 | return NULL; |
| 1031 | } | 1032 | } |
| 1032 | 1033 | ||
| 1033 | static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) | 1034 | static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) |
| 1034 | { | 1035 | { |
| 1035 | struct inode * inode; | 1036 | struct inode *inode; |
| 1036 | struct ext4_dir_entry_2 * de; | 1037 | struct ext4_dir_entry_2 *de; |
| 1037 | struct buffer_head * bh; | 1038 | struct buffer_head *bh; |
| 1038 | 1039 | ||
| 1039 | if (dentry->d_name.len > EXT4_NAME_LEN) | 1040 | if (dentry->d_name.len > EXT4_NAME_LEN) |
| 1040 | return ERR_PTR(-ENAMETOOLONG); | 1041 | return ERR_PTR(-ENAMETOOLONG); |
| 1041 | 1042 | ||
| 1042 | bh = ext4_find_entry(dentry, &de); | 1043 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
| 1043 | inode = NULL; | 1044 | inode = NULL; |
| 1044 | if (bh) { | 1045 | if (bh) { |
| 1045 | unsigned long ino = le32_to_cpu(de->inode); | 1046 | unsigned long ino = le32_to_cpu(de->inode); |
| 1046 | brelse (bh); | 1047 | brelse(bh); |
| 1047 | if (!ext4_valid_inum(dir->i_sb, ino)) { | 1048 | if (!ext4_valid_inum(dir->i_sb, ino)) { |
| 1048 | ext4_error(dir->i_sb, "ext4_lookup", | 1049 | ext4_error(dir->i_sb, "ext4_lookup", |
| 1049 | "bad inode number: %lu", ino); | 1050 | "bad inode number: %lu", ino); |
| @@ -1062,15 +1063,14 @@ struct dentry *ext4_get_parent(struct dentry *child) | |||
| 1062 | unsigned long ino; | 1063 | unsigned long ino; |
| 1063 | struct dentry *parent; | 1064 | struct dentry *parent; |
| 1064 | struct inode *inode; | 1065 | struct inode *inode; |
| 1065 | struct dentry dotdot; | 1066 | static const struct qstr dotdot = { |
| 1067 | .name = "..", | ||
| 1068 | .len = 2, | ||
| 1069 | }; | ||
| 1066 | struct ext4_dir_entry_2 * de; | 1070 | struct ext4_dir_entry_2 * de; |
| 1067 | struct buffer_head *bh; | 1071 | struct buffer_head *bh; |
| 1068 | 1072 | ||
| 1069 | dotdot.d_name.name = ".."; | 1073 | bh = ext4_find_entry(child->d_inode, &dotdot, &de); |
| 1070 | dotdot.d_name.len = 2; | ||
| 1071 | dotdot.d_parent = child; /* confusing, isn't it! */ | ||
| 1072 | |||
| 1073 | bh = ext4_find_entry(&dotdot, &de); | ||
| 1074 | inode = NULL; | 1074 | inode = NULL; |
| 1075 | if (!bh) | 1075 | if (!bh) |
| 1076 | return ERR_PTR(-ENOENT); | 1076 | return ERR_PTR(-ENOENT); |
| @@ -1201,10 +1201,10 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
| 1201 | 1201 | ||
| 1202 | /* create map in the end of data2 block */ | 1202 | /* create map in the end of data2 block */ |
| 1203 | map = (struct dx_map_entry *) (data2 + blocksize); | 1203 | map = (struct dx_map_entry *) (data2 + blocksize); |
| 1204 | count = dx_make_map ((struct ext4_dir_entry_2 *) data1, | 1204 | count = dx_make_map((struct ext4_dir_entry_2 *) data1, |
| 1205 | blocksize, hinfo, map); | 1205 | blocksize, hinfo, map); |
| 1206 | map -= count; | 1206 | map -= count; |
| 1207 | dx_sort_map (map, count); | 1207 | dx_sort_map(map, count); |
| 1208 | /* Split the existing block in the middle, size-wise */ | 1208 | /* Split the existing block in the middle, size-wise */ |
| 1209 | size = 0; | 1209 | size = 0; |
| 1210 | move = 0; | 1210 | move = 0; |
| @@ -1225,7 +1225,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
| 1225 | 1225 | ||
| 1226 | /* Fancy dance to stay within two buffers */ | 1226 | /* Fancy dance to stay within two buffers */ |
| 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); | 1227 | de2 = dx_move_dirents(data1, data2, map + split, count - split); |
| 1228 | de = dx_pack_dirents(data1,blocksize); | 1228 | de = dx_pack_dirents(data1, blocksize); |
| 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); | 1229 | de->rec_len = ext4_rec_len_to_disk(data1 + blocksize - (char *) de); |
| 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); | 1230 | de2->rec_len = ext4_rec_len_to_disk(data2 + blocksize - (char *) de2); |
| 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); | 1231 | dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1)); |
| @@ -1237,15 +1237,15 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, | |||
| 1237 | swap(*bh, bh2); | 1237 | swap(*bh, bh2); |
| 1238 | de = de2; | 1238 | de = de2; |
| 1239 | } | 1239 | } |
| 1240 | dx_insert_block (frame, hash2 + continued, newblock); | 1240 | dx_insert_block(frame, hash2 + continued, newblock); |
| 1241 | err = ext4_journal_dirty_metadata (handle, bh2); | 1241 | err = ext4_journal_dirty_metadata(handle, bh2); |
| 1242 | if (err) | 1242 | if (err) |
| 1243 | goto journal_error; | 1243 | goto journal_error; |
| 1244 | err = ext4_journal_dirty_metadata (handle, frame->bh); | 1244 | err = ext4_journal_dirty_metadata(handle, frame->bh); |
| 1245 | if (err) | 1245 | if (err) |
| 1246 | goto journal_error; | 1246 | goto journal_error; |
| 1247 | brelse (bh2); | 1247 | brelse(bh2); |
| 1248 | dxtrace(dx_show_index ("frame", frame->entries)); | 1248 | dxtrace(dx_show_index("frame", frame->entries)); |
| 1249 | return de; | 1249 | return de; |
| 1250 | 1250 | ||
| 1251 | journal_error: | 1251 | journal_error: |
| @@ -1271,7 +1271,7 @@ errout: | |||
| 1271 | */ | 1271 | */ |
| 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | 1272 | static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, |
| 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, | 1273 | struct inode *inode, struct ext4_dir_entry_2 *de, |
| 1274 | struct buffer_head * bh) | 1274 | struct buffer_head *bh) |
| 1275 | { | 1275 | { |
| 1276 | struct inode *dir = dentry->d_parent->d_inode; | 1276 | struct inode *dir = dentry->d_parent->d_inode; |
| 1277 | const char *name = dentry->d_name.name; | 1277 | const char *name = dentry->d_name.name; |
| @@ -1288,11 +1288,11 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
| 1288 | while ((char *) de <= top) { | 1288 | while ((char *) de <= top) { |
| 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, | 1289 | if (!ext4_check_dir_entry("ext4_add_entry", dir, de, |
| 1290 | bh, offset)) { | 1290 | bh, offset)) { |
| 1291 | brelse (bh); | 1291 | brelse(bh); |
| 1292 | return -EIO; | 1292 | return -EIO; |
| 1293 | } | 1293 | } |
| 1294 | if (ext4_match (namelen, name, de)) { | 1294 | if (ext4_match(namelen, name, de)) { |
| 1295 | brelse (bh); | 1295 | brelse(bh); |
| 1296 | return -EEXIST; | 1296 | return -EEXIST; |
| 1297 | } | 1297 | } |
| 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); | 1298 | nlen = EXT4_DIR_REC_LEN(de->name_len); |
| @@ -1329,7 +1329,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, | |||
| 1329 | } else | 1329 | } else |
| 1330 | de->inode = 0; | 1330 | de->inode = 0; |
| 1331 | de->name_len = namelen; | 1331 | de->name_len = namelen; |
| 1332 | memcpy (de->name, name, namelen); | 1332 | memcpy(de->name, name, namelen); |
| 1333 | /* | 1333 | /* |
| 1334 | * XXX shouldn't update any times until successful | 1334 | * XXX shouldn't update any times until successful |
| 1335 | * completion of syscall, but too many callers depend | 1335 | * completion of syscall, but too many callers depend |
| @@ -1377,7 +1377,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1377 | struct fake_dirent *fde; | 1377 | struct fake_dirent *fde; |
| 1378 | 1378 | ||
| 1379 | blocksize = dir->i_sb->s_blocksize; | 1379 | blocksize = dir->i_sb->s_blocksize; |
| 1380 | dxtrace(printk("Creating index\n")); | 1380 | dxtrace(printk(KERN_DEBUG "Creating index\n")); |
| 1381 | retval = ext4_journal_get_write_access(handle, bh); | 1381 | retval = ext4_journal_get_write_access(handle, bh); |
| 1382 | if (retval) { | 1382 | if (retval) { |
| 1383 | ext4_std_error(dir->i_sb, retval); | 1383 | ext4_std_error(dir->i_sb, retval); |
| @@ -1386,7 +1386,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1386 | } | 1386 | } |
| 1387 | root = (struct dx_root *) bh->b_data; | 1387 | root = (struct dx_root *) bh->b_data; |
| 1388 | 1388 | ||
| 1389 | bh2 = ext4_append (handle, dir, &block, &retval); | 1389 | bh2 = ext4_append(handle, dir, &block, &retval); |
| 1390 | if (!(bh2)) { | 1390 | if (!(bh2)) { |
| 1391 | brelse(bh); | 1391 | brelse(bh); |
| 1392 | return retval; | 1392 | return retval; |
| @@ -1412,9 +1412,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1412 | root->info.info_length = sizeof(root->info); | 1412 | root->info.info_length = sizeof(root->info); |
| 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; | 1413 | root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version; |
| 1414 | entries = root->entries; | 1414 | entries = root->entries; |
| 1415 | dx_set_block (entries, 1); | 1415 | dx_set_block(entries, 1); |
| 1416 | dx_set_count (entries, 1); | 1416 | dx_set_count(entries, 1); |
| 1417 | dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info))); | 1417 | dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info))); |
| 1418 | 1418 | ||
| 1419 | /* Initialize as for dx_probe */ | 1419 | /* Initialize as for dx_probe */ |
| 1420 | hinfo.hash_version = root->info.hash_version; | 1420 | hinfo.hash_version = root->info.hash_version; |
| @@ -1443,14 +1443,14 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry, | |||
| 1443 | * may not sleep between calling this and putting something into | 1443 | * may not sleep between calling this and putting something into |
| 1444 | * the entry, as someone else might have used it while you slept. | 1444 | * the entry, as someone else might have used it while you slept. |
| 1445 | */ | 1445 | */ |
| 1446 | static int ext4_add_entry (handle_t *handle, struct dentry *dentry, | 1446 | static int ext4_add_entry(handle_t *handle, struct dentry *dentry, |
| 1447 | struct inode *inode) | 1447 | struct inode *inode) |
| 1448 | { | 1448 | { |
| 1449 | struct inode *dir = dentry->d_parent->d_inode; | 1449 | struct inode *dir = dentry->d_parent->d_inode; |
| 1450 | unsigned long offset; | 1450 | unsigned long offset; |
| 1451 | struct buffer_head * bh; | 1451 | struct buffer_head *bh; |
| 1452 | struct ext4_dir_entry_2 *de; | 1452 | struct ext4_dir_entry_2 *de; |
| 1453 | struct super_block * sb; | 1453 | struct super_block *sb; |
| 1454 | int retval; | 1454 | int retval; |
| 1455 | int dx_fallback=0; | 1455 | int dx_fallback=0; |
| 1456 | unsigned blocksize; | 1456 | unsigned blocksize; |
| @@ -1500,13 +1500,13 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1500 | struct dx_frame frames[2], *frame; | 1500 | struct dx_frame frames[2], *frame; |
| 1501 | struct dx_entry *entries, *at; | 1501 | struct dx_entry *entries, *at; |
| 1502 | struct dx_hash_info hinfo; | 1502 | struct dx_hash_info hinfo; |
| 1503 | struct buffer_head * bh; | 1503 | struct buffer_head *bh; |
| 1504 | struct inode *dir = dentry->d_parent->d_inode; | 1504 | struct inode *dir = dentry->d_parent->d_inode; |
| 1505 | struct super_block * sb = dir->i_sb; | 1505 | struct super_block *sb = dir->i_sb; |
| 1506 | struct ext4_dir_entry_2 *de; | 1506 | struct ext4_dir_entry_2 *de; |
| 1507 | int err; | 1507 | int err; |
| 1508 | 1508 | ||
| 1509 | frame = dx_probe(dentry, NULL, &hinfo, frames, &err); | 1509 | frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); |
| 1510 | if (!frame) | 1510 | if (!frame) |
| 1511 | return err; | 1511 | return err; |
| 1512 | entries = frame->entries; | 1512 | entries = frame->entries; |
| @@ -1527,7 +1527,7 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1527 | } | 1527 | } |
| 1528 | 1528 | ||
| 1529 | /* Block full, should compress but for now just split */ | 1529 | /* Block full, should compress but for now just split */ |
| 1530 | dxtrace(printk("using %u of %u node entries\n", | 1530 | dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", |
| 1531 | dx_get_count(entries), dx_get_limit(entries))); | 1531 | dx_get_count(entries), dx_get_limit(entries))); |
| 1532 | /* Need to split index? */ | 1532 | /* Need to split index? */ |
| 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { | 1533 | if (dx_get_count(entries) == dx_get_limit(entries)) { |
| @@ -1559,7 +1559,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1559 | if (levels) { | 1559 | if (levels) { |
| 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; | 1560 | unsigned icount1 = icount/2, icount2 = icount - icount1; |
| 1561 | unsigned hash2 = dx_get_hash(entries + icount1); | 1561 | unsigned hash2 = dx_get_hash(entries + icount1); |
| 1562 | dxtrace(printk("Split index %i/%i\n", icount1, icount2)); | 1562 | dxtrace(printk(KERN_DEBUG "Split index %i/%i\n", |
| 1563 | icount1, icount2)); | ||
| 1563 | 1564 | ||
| 1564 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ | 1565 | BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */ |
| 1565 | err = ext4_journal_get_write_access(handle, | 1566 | err = ext4_journal_get_write_access(handle, |
| @@ -1567,11 +1568,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1567 | if (err) | 1568 | if (err) |
| 1568 | goto journal_error; | 1569 | goto journal_error; |
| 1569 | 1570 | ||
| 1570 | memcpy ((char *) entries2, (char *) (entries + icount1), | 1571 | memcpy((char *) entries2, (char *) (entries + icount1), |
| 1571 | icount2 * sizeof(struct dx_entry)); | 1572 | icount2 * sizeof(struct dx_entry)); |
| 1572 | dx_set_count (entries, icount1); | 1573 | dx_set_count(entries, icount1); |
| 1573 | dx_set_count (entries2, icount2); | 1574 | dx_set_count(entries2, icount2); |
| 1574 | dx_set_limit (entries2, dx_node_limit(dir)); | 1575 | dx_set_limit(entries2, dx_node_limit(dir)); |
| 1575 | 1576 | ||
| 1576 | /* Which index block gets the new entry? */ | 1577 | /* Which index block gets the new entry? */ |
| 1577 | if (at - entries >= icount1) { | 1578 | if (at - entries >= icount1) { |
| @@ -1579,16 +1580,17 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, | |||
| 1579 | frame->entries = entries = entries2; | 1580 | frame->entries = entries = entries2; |
| 1580 | swap(frame->bh, bh2); | 1581 | swap(frame->bh, bh2); |
| 1581 | } | 1582 | } |
| 1582 | dx_insert_block (frames + 0, hash2, newblock); | 1583 | dx_insert_block(frames + 0, hash2, newblock); |
| 1583 | dxtrace(dx_show_index ("node", frames[1].entries)); | 1584 | dxtrace(dx_show_index("node", frames[1].entries)); |
| 1584 | dxtrace(dx_show_index ("node", | 1585 | dxtrace(dx_show_index("node", |
| 1585 | ((struct dx_node *) bh2->b_data)->entries)); | 1586 | ((struct dx_node *) bh2->b_data)->entries)); |
| 1586 | err = ext4_journal_dirty_metadata(handle, bh2); | 1587 | err = ext4_journal_dirty_metadata(handle, bh2); |
| 1587 | if (err) | 1588 | if (err) |
| 1588 | goto journal_error; | 1589 | goto journal_error; |
| 1589 | brelse (bh2); | 1590 | brelse (bh2); |
| 1590 | } else { | 1591 | } else { |
| 1591 | dxtrace(printk("Creating second level index...\n")); | 1592 | dxtrace(printk(KERN_DEBUG |
| 1593 | "Creating second level index...\n")); | ||
| 1592 | memcpy((char *) entries2, (char *) entries, | 1594 | memcpy((char *) entries2, (char *) entries, |
| 1593 | icount * sizeof(struct dx_entry)); | 1595 | icount * sizeof(struct dx_entry)); |
| 1594 | dx_set_limit(entries2, dx_node_limit(dir)); | 1596 | dx_set_limit(entries2, dx_node_limit(dir)); |
| @@ -1630,12 +1632,12 @@ cleanup: | |||
| 1630 | * ext4_delete_entry deletes a directory entry by merging it with the | 1632 | * ext4_delete_entry deletes a directory entry by merging it with the |
| 1631 | * previous entry | 1633 | * previous entry |
| 1632 | */ | 1634 | */ |
| 1633 | static int ext4_delete_entry (handle_t *handle, | 1635 | static int ext4_delete_entry(handle_t *handle, |
| 1634 | struct inode * dir, | 1636 | struct inode *dir, |
| 1635 | struct ext4_dir_entry_2 * de_del, | 1637 | struct ext4_dir_entry_2 *de_del, |
| 1636 | struct buffer_head * bh) | 1638 | struct buffer_head *bh) |
| 1637 | { | 1639 | { |
| 1638 | struct ext4_dir_entry_2 * de, * pde; | 1640 | struct ext4_dir_entry_2 *de, *pde; |
| 1639 | int i; | 1641 | int i; |
| 1640 | 1642 | ||
| 1641 | i = 0; | 1643 | i = 0; |
| @@ -1716,11 +1718,11 @@ static int ext4_add_nondir(handle_t *handle, | |||
| 1716 | * If the create succeeds, we fill in the inode information | 1718 | * If the create succeeds, we fill in the inode information |
| 1717 | * with d_instantiate(). | 1719 | * with d_instantiate(). |
| 1718 | */ | 1720 | */ |
| 1719 | static int ext4_create (struct inode * dir, struct dentry * dentry, int mode, | 1721 | static int ext4_create(struct inode *dir, struct dentry *dentry, int mode, |
| 1720 | struct nameidata *nd) | 1722 | struct nameidata *nd) |
| 1721 | { | 1723 | { |
| 1722 | handle_t *handle; | 1724 | handle_t *handle; |
| 1723 | struct inode * inode; | 1725 | struct inode *inode; |
| 1724 | int err, retries = 0; | 1726 | int err, retries = 0; |
| 1725 | 1727 | ||
| 1726 | retry: | 1728 | retry: |
| @@ -1747,8 +1749,8 @@ retry: | |||
| 1747 | return err; | 1749 | return err; |
| 1748 | } | 1750 | } |
| 1749 | 1751 | ||
| 1750 | static int ext4_mknod (struct inode * dir, struct dentry *dentry, | 1752 | static int ext4_mknod(struct inode *dir, struct dentry *dentry, |
| 1751 | int mode, dev_t rdev) | 1753 | int mode, dev_t rdev) |
| 1752 | { | 1754 | { |
| 1753 | handle_t *handle; | 1755 | handle_t *handle; |
| 1754 | struct inode *inode; | 1756 | struct inode *inode; |
| @@ -1767,11 +1769,11 @@ retry: | |||
| 1767 | if (IS_DIRSYNC(dir)) | 1769 | if (IS_DIRSYNC(dir)) |
| 1768 | handle->h_sync = 1; | 1770 | handle->h_sync = 1; |
| 1769 | 1771 | ||
| 1770 | inode = ext4_new_inode (handle, dir, mode); | 1772 | inode = ext4_new_inode(handle, dir, mode); |
| 1771 | err = PTR_ERR(inode); | 1773 | err = PTR_ERR(inode); |
| 1772 | if (!IS_ERR(inode)) { | 1774 | if (!IS_ERR(inode)) { |
| 1773 | init_special_inode(inode, inode->i_mode, rdev); | 1775 | init_special_inode(inode, inode->i_mode, rdev); |
| 1774 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1776 | #ifdef CONFIG_EXT4_FS_XATTR |
| 1775 | inode->i_op = &ext4_special_inode_operations; | 1777 | inode->i_op = &ext4_special_inode_operations; |
| 1776 | #endif | 1778 | #endif |
| 1777 | err = ext4_add_nondir(handle, dentry, inode); | 1779 | err = ext4_add_nondir(handle, dentry, inode); |
| @@ -1782,12 +1784,12 @@ retry: | |||
| 1782 | return err; | 1784 | return err; |
| 1783 | } | 1785 | } |
| 1784 | 1786 | ||
| 1785 | static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode) | 1787 | static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode) |
| 1786 | { | 1788 | { |
| 1787 | handle_t *handle; | 1789 | handle_t *handle; |
| 1788 | struct inode * inode; | 1790 | struct inode *inode; |
| 1789 | struct buffer_head * dir_block; | 1791 | struct buffer_head *dir_block; |
| 1790 | struct ext4_dir_entry_2 * de; | 1792 | struct ext4_dir_entry_2 *de; |
| 1791 | int err, retries = 0; | 1793 | int err, retries = 0; |
| 1792 | 1794 | ||
| 1793 | if (EXT4_DIR_LINK_MAX(dir)) | 1795 | if (EXT4_DIR_LINK_MAX(dir)) |
| @@ -1803,7 +1805,7 @@ retry: | |||
| 1803 | if (IS_DIRSYNC(dir)) | 1805 | if (IS_DIRSYNC(dir)) |
| 1804 | handle->h_sync = 1; | 1806 | handle->h_sync = 1; |
| 1805 | 1807 | ||
| 1806 | inode = ext4_new_inode (handle, dir, S_IFDIR | mode); | 1808 | inode = ext4_new_inode(handle, dir, S_IFDIR | mode); |
| 1807 | err = PTR_ERR(inode); | 1809 | err = PTR_ERR(inode); |
| 1808 | if (IS_ERR(inode)) | 1810 | if (IS_ERR(inode)) |
| 1809 | goto out_stop; | 1811 | goto out_stop; |
| @@ -1811,7 +1813,7 @@ retry: | |||
| 1811 | inode->i_op = &ext4_dir_inode_operations; | 1813 | inode->i_op = &ext4_dir_inode_operations; |
| 1812 | inode->i_fop = &ext4_dir_operations; | 1814 | inode->i_fop = &ext4_dir_operations; |
| 1813 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; | 1815 | inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; |
| 1814 | dir_block = ext4_bread (handle, inode, 0, 1, &err); | 1816 | dir_block = ext4_bread(handle, inode, 0, 1, &err); |
| 1815 | if (!dir_block) | 1817 | if (!dir_block) |
| 1816 | goto out_clear_inode; | 1818 | goto out_clear_inode; |
| 1817 | BUFFER_TRACE(dir_block, "get_write_access"); | 1819 | BUFFER_TRACE(dir_block, "get_write_access"); |
| @@ -1820,26 +1822,26 @@ retry: | |||
| 1820 | de->inode = cpu_to_le32(inode->i_ino); | 1822 | de->inode = cpu_to_le32(inode->i_ino); |
| 1821 | de->name_len = 1; | 1823 | de->name_len = 1; |
| 1822 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); | 1824 | de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len)); |
| 1823 | strcpy (de->name, "."); | 1825 | strcpy(de->name, "."); |
| 1824 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1826 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
| 1825 | de = ext4_next_entry(de); | 1827 | de = ext4_next_entry(de); |
| 1826 | de->inode = cpu_to_le32(dir->i_ino); | 1828 | de->inode = cpu_to_le32(dir->i_ino); |
| 1827 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - | 1829 | de->rec_len = ext4_rec_len_to_disk(inode->i_sb->s_blocksize - |
| 1828 | EXT4_DIR_REC_LEN(1)); | 1830 | EXT4_DIR_REC_LEN(1)); |
| 1829 | de->name_len = 2; | 1831 | de->name_len = 2; |
| 1830 | strcpy (de->name, ".."); | 1832 | strcpy(de->name, ".."); |
| 1831 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); | 1833 | ext4_set_de_type(dir->i_sb, de, S_IFDIR); |
| 1832 | inode->i_nlink = 2; | 1834 | inode->i_nlink = 2; |
| 1833 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); | 1835 | BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata"); |
| 1834 | ext4_journal_dirty_metadata(handle, dir_block); | 1836 | ext4_journal_dirty_metadata(handle, dir_block); |
| 1835 | brelse (dir_block); | 1837 | brelse(dir_block); |
| 1836 | ext4_mark_inode_dirty(handle, inode); | 1838 | ext4_mark_inode_dirty(handle, inode); |
| 1837 | err = ext4_add_entry (handle, dentry, inode); | 1839 | err = ext4_add_entry(handle, dentry, inode); |
| 1838 | if (err) { | 1840 | if (err) { |
| 1839 | out_clear_inode: | 1841 | out_clear_inode: |
| 1840 | clear_nlink(inode); | 1842 | clear_nlink(inode); |
| 1841 | ext4_mark_inode_dirty(handle, inode); | 1843 | ext4_mark_inode_dirty(handle, inode); |
| 1842 | iput (inode); | 1844 | iput(inode); |
| 1843 | goto out_stop; | 1845 | goto out_stop; |
| 1844 | } | 1846 | } |
| 1845 | ext4_inc_count(handle, dir); | 1847 | ext4_inc_count(handle, dir); |
| @@ -1856,17 +1858,17 @@ out_stop: | |||
| 1856 | /* | 1858 | /* |
| 1857 | * routine to check that the specified directory is empty (for rmdir) | 1859 | * routine to check that the specified directory is empty (for rmdir) |
| 1858 | */ | 1860 | */ |
| 1859 | static int empty_dir (struct inode * inode) | 1861 | static int empty_dir(struct inode *inode) |
| 1860 | { | 1862 | { |
| 1861 | unsigned long offset; | 1863 | unsigned long offset; |
| 1862 | struct buffer_head * bh; | 1864 | struct buffer_head *bh; |
| 1863 | struct ext4_dir_entry_2 * de, * de1; | 1865 | struct ext4_dir_entry_2 *de, *de1; |
| 1864 | struct super_block * sb; | 1866 | struct super_block *sb; |
| 1865 | int err = 0; | 1867 | int err = 0; |
| 1866 | 1868 | ||
| 1867 | sb = inode->i_sb; | 1869 | sb = inode->i_sb; |
| 1868 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || | 1870 | if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) || |
| 1869 | !(bh = ext4_bread (NULL, inode, 0, 0, &err))) { | 1871 | !(bh = ext4_bread(NULL, inode, 0, 0, &err))) { |
| 1870 | if (err) | 1872 | if (err) |
| 1871 | ext4_error(inode->i_sb, __func__, | 1873 | ext4_error(inode->i_sb, __func__, |
| 1872 | "error %d reading directory #%lu offset 0", | 1874 | "error %d reading directory #%lu offset 0", |
| @@ -1881,23 +1883,23 @@ static int empty_dir (struct inode * inode) | |||
| 1881 | de1 = ext4_next_entry(de); | 1883 | de1 = ext4_next_entry(de); |
| 1882 | if (le32_to_cpu(de->inode) != inode->i_ino || | 1884 | if (le32_to_cpu(de->inode) != inode->i_ino || |
| 1883 | !le32_to_cpu(de1->inode) || | 1885 | !le32_to_cpu(de1->inode) || |
| 1884 | strcmp (".", de->name) || | 1886 | strcmp(".", de->name) || |
| 1885 | strcmp ("..", de1->name)) { | 1887 | strcmp("..", de1->name)) { |
| 1886 | ext4_warning (inode->i_sb, "empty_dir", | 1888 | ext4_warning(inode->i_sb, "empty_dir", |
| 1887 | "bad directory (dir #%lu) - no `.' or `..'", | 1889 | "bad directory (dir #%lu) - no `.' or `..'", |
| 1888 | inode->i_ino); | 1890 | inode->i_ino); |
| 1889 | brelse (bh); | 1891 | brelse(bh); |
| 1890 | return 1; | 1892 | return 1; |
| 1891 | } | 1893 | } |
| 1892 | offset = ext4_rec_len_from_disk(de->rec_len) + | 1894 | offset = ext4_rec_len_from_disk(de->rec_len) + |
| 1893 | ext4_rec_len_from_disk(de1->rec_len); | 1895 | ext4_rec_len_from_disk(de1->rec_len); |
| 1894 | de = ext4_next_entry(de1); | 1896 | de = ext4_next_entry(de1); |
| 1895 | while (offset < inode->i_size ) { | 1897 | while (offset < inode->i_size) { |
| 1896 | if (!bh || | 1898 | if (!bh || |
| 1897 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { | 1899 | (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) { |
| 1898 | err = 0; | 1900 | err = 0; |
| 1899 | brelse (bh); | 1901 | brelse(bh); |
| 1900 | bh = ext4_bread (NULL, inode, | 1902 | bh = ext4_bread(NULL, inode, |
| 1901 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); | 1903 | offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err); |
| 1902 | if (!bh) { | 1904 | if (!bh) { |
| 1903 | if (err) | 1905 | if (err) |
| @@ -1917,13 +1919,13 @@ static int empty_dir (struct inode * inode) | |||
| 1917 | continue; | 1919 | continue; |
| 1918 | } | 1920 | } |
| 1919 | if (le32_to_cpu(de->inode)) { | 1921 | if (le32_to_cpu(de->inode)) { |
| 1920 | brelse (bh); | 1922 | brelse(bh); |
| 1921 | return 0; | 1923 | return 0; |
| 1922 | } | 1924 | } |
| 1923 | offset += ext4_rec_len_from_disk(de->rec_len); | 1925 | offset += ext4_rec_len_from_disk(de->rec_len); |
| 1924 | de = ext4_next_entry(de); | 1926 | de = ext4_next_entry(de); |
| 1925 | } | 1927 | } |
| 1926 | brelse (bh); | 1928 | brelse(bh); |
| 1927 | return 1; | 1929 | return 1; |
| 1928 | } | 1930 | } |
| 1929 | 1931 | ||
| @@ -1954,8 +1956,8 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
| 1954 | * ->i_nlink. For, say it, character device. Not a regular file, | 1956 | * ->i_nlink. For, say it, character device. Not a regular file, |
| 1955 | * not a directory, not a symlink and ->i_nlink > 0. | 1957 | * not a directory, not a symlink and ->i_nlink > 0. |
| 1956 | */ | 1958 | */ |
| 1957 | J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || | 1959 | J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || |
| 1958 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); | 1960 | S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); |
| 1959 | 1961 | ||
| 1960 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); | 1962 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); |
| 1961 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); | 1963 | err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); |
| @@ -2069,12 +2071,12 @@ out_brelse: | |||
| 2069 | goto out_err; | 2071 | goto out_err; |
| 2070 | } | 2072 | } |
| 2071 | 2073 | ||
| 2072 | static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | 2074 | static int ext4_rmdir(struct inode *dir, struct dentry *dentry) |
| 2073 | { | 2075 | { |
| 2074 | int retval; | 2076 | int retval; |
| 2075 | struct inode * inode; | 2077 | struct inode *inode; |
| 2076 | struct buffer_head * bh; | 2078 | struct buffer_head *bh; |
| 2077 | struct ext4_dir_entry_2 * de; | 2079 | struct ext4_dir_entry_2 *de; |
| 2078 | handle_t *handle; | 2080 | handle_t *handle; |
| 2079 | 2081 | ||
| 2080 | /* Initialize quotas before so that eventual writes go in | 2082 | /* Initialize quotas before so that eventual writes go in |
| @@ -2085,7 +2087,7 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
| 2085 | return PTR_ERR(handle); | 2087 | return PTR_ERR(handle); |
| 2086 | 2088 | ||
| 2087 | retval = -ENOENT; | 2089 | retval = -ENOENT; |
| 2088 | bh = ext4_find_entry (dentry, &de); | 2090 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
| 2089 | if (!bh) | 2091 | if (!bh) |
| 2090 | goto end_rmdir; | 2092 | goto end_rmdir; |
| 2091 | 2093 | ||
| @@ -2099,16 +2101,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
| 2099 | goto end_rmdir; | 2101 | goto end_rmdir; |
| 2100 | 2102 | ||
| 2101 | retval = -ENOTEMPTY; | 2103 | retval = -ENOTEMPTY; |
| 2102 | if (!empty_dir (inode)) | 2104 | if (!empty_dir(inode)) |
| 2103 | goto end_rmdir; | 2105 | goto end_rmdir; |
| 2104 | 2106 | ||
| 2105 | retval = ext4_delete_entry(handle, dir, de, bh); | 2107 | retval = ext4_delete_entry(handle, dir, de, bh); |
| 2106 | if (retval) | 2108 | if (retval) |
| 2107 | goto end_rmdir; | 2109 | goto end_rmdir; |
| 2108 | if (!EXT4_DIR_LINK_EMPTY(inode)) | 2110 | if (!EXT4_DIR_LINK_EMPTY(inode)) |
| 2109 | ext4_warning (inode->i_sb, "ext4_rmdir", | 2111 | ext4_warning(inode->i_sb, "ext4_rmdir", |
| 2110 | "empty directory has too many links (%d)", | 2112 | "empty directory has too many links (%d)", |
| 2111 | inode->i_nlink); | 2113 | inode->i_nlink); |
| 2112 | inode->i_version++; | 2114 | inode->i_version++; |
| 2113 | clear_nlink(inode); | 2115 | clear_nlink(inode); |
| 2114 | /* There's no need to set i_disksize: the fact that i_nlink is | 2116 | /* There's no need to set i_disksize: the fact that i_nlink is |
| @@ -2124,16 +2126,16 @@ static int ext4_rmdir (struct inode * dir, struct dentry *dentry) | |||
| 2124 | 2126 | ||
| 2125 | end_rmdir: | 2127 | end_rmdir: |
| 2126 | ext4_journal_stop(handle); | 2128 | ext4_journal_stop(handle); |
| 2127 | brelse (bh); | 2129 | brelse(bh); |
| 2128 | return retval; | 2130 | return retval; |
| 2129 | } | 2131 | } |
| 2130 | 2132 | ||
| 2131 | static int ext4_unlink(struct inode * dir, struct dentry *dentry) | 2133 | static int ext4_unlink(struct inode *dir, struct dentry *dentry) |
| 2132 | { | 2134 | { |
| 2133 | int retval; | 2135 | int retval; |
| 2134 | struct inode * inode; | 2136 | struct inode *inode; |
| 2135 | struct buffer_head * bh; | 2137 | struct buffer_head *bh; |
| 2136 | struct ext4_dir_entry_2 * de; | 2138 | struct ext4_dir_entry_2 *de; |
| 2137 | handle_t *handle; | 2139 | handle_t *handle; |
| 2138 | 2140 | ||
| 2139 | /* Initialize quotas before so that eventual writes go | 2141 | /* Initialize quotas before so that eventual writes go |
| @@ -2147,7 +2149,7 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
| 2147 | handle->h_sync = 1; | 2149 | handle->h_sync = 1; |
| 2148 | 2150 | ||
| 2149 | retval = -ENOENT; | 2151 | retval = -ENOENT; |
| 2150 | bh = ext4_find_entry (dentry, &de); | 2152 | bh = ext4_find_entry(dir, &dentry->d_name, &de); |
| 2151 | if (!bh) | 2153 | if (!bh) |
| 2152 | goto end_unlink; | 2154 | goto end_unlink; |
| 2153 | 2155 | ||
| @@ -2158,9 +2160,9 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
| 2158 | goto end_unlink; | 2160 | goto end_unlink; |
| 2159 | 2161 | ||
| 2160 | if (!inode->i_nlink) { | 2162 | if (!inode->i_nlink) { |
| 2161 | ext4_warning (inode->i_sb, "ext4_unlink", | 2163 | ext4_warning(inode->i_sb, "ext4_unlink", |
| 2162 | "Deleting nonexistent file (%lu), %d", | 2164 | "Deleting nonexistent file (%lu), %d", |
| 2163 | inode->i_ino, inode->i_nlink); | 2165 | inode->i_ino, inode->i_nlink); |
| 2164 | inode->i_nlink = 1; | 2166 | inode->i_nlink = 1; |
| 2165 | } | 2167 | } |
| 2166 | retval = ext4_delete_entry(handle, dir, de, bh); | 2168 | retval = ext4_delete_entry(handle, dir, de, bh); |
| @@ -2178,15 +2180,15 @@ static int ext4_unlink(struct inode * dir, struct dentry *dentry) | |||
| 2178 | 2180 | ||
| 2179 | end_unlink: | 2181 | end_unlink: |
| 2180 | ext4_journal_stop(handle); | 2182 | ext4_journal_stop(handle); |
| 2181 | brelse (bh); | 2183 | brelse(bh); |
| 2182 | return retval; | 2184 | return retval; |
| 2183 | } | 2185 | } |
| 2184 | 2186 | ||
| 2185 | static int ext4_symlink (struct inode * dir, | 2187 | static int ext4_symlink(struct inode *dir, |
| 2186 | struct dentry *dentry, const char * symname) | 2188 | struct dentry *dentry, const char *symname) |
| 2187 | { | 2189 | { |
| 2188 | handle_t *handle; | 2190 | handle_t *handle; |
| 2189 | struct inode * inode; | 2191 | struct inode *inode; |
| 2190 | int l, err, retries = 0; | 2192 | int l, err, retries = 0; |
| 2191 | 2193 | ||
| 2192 | l = strlen(symname)+1; | 2194 | l = strlen(symname)+1; |
| @@ -2203,12 +2205,12 @@ retry: | |||
| 2203 | if (IS_DIRSYNC(dir)) | 2205 | if (IS_DIRSYNC(dir)) |
| 2204 | handle->h_sync = 1; | 2206 | handle->h_sync = 1; |
| 2205 | 2207 | ||
| 2206 | inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO); | 2208 | inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO); |
| 2207 | err = PTR_ERR(inode); | 2209 | err = PTR_ERR(inode); |
| 2208 | if (IS_ERR(inode)) | 2210 | if (IS_ERR(inode)) |
| 2209 | goto out_stop; | 2211 | goto out_stop; |
| 2210 | 2212 | ||
| 2211 | if (l > sizeof (EXT4_I(inode)->i_data)) { | 2213 | if (l > sizeof(EXT4_I(inode)->i_data)) { |
| 2212 | inode->i_op = &ext4_symlink_inode_operations; | 2214 | inode->i_op = &ext4_symlink_inode_operations; |
| 2213 | ext4_set_aops(inode); | 2215 | ext4_set_aops(inode); |
| 2214 | /* | 2216 | /* |
| @@ -2221,14 +2223,14 @@ retry: | |||
| 2221 | if (err) { | 2223 | if (err) { |
| 2222 | clear_nlink(inode); | 2224 | clear_nlink(inode); |
| 2223 | ext4_mark_inode_dirty(handle, inode); | 2225 | ext4_mark_inode_dirty(handle, inode); |
| 2224 | iput (inode); | 2226 | iput(inode); |
| 2225 | goto out_stop; | 2227 | goto out_stop; |
| 2226 | } | 2228 | } |
| 2227 | } else { | 2229 | } else { |
| 2228 | /* clear the extent format for fast symlink */ | 2230 | /* clear the extent format for fast symlink */ |
| 2229 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; | 2231 | EXT4_I(inode)->i_flags &= ~EXT4_EXTENTS_FL; |
| 2230 | inode->i_op = &ext4_fast_symlink_inode_operations; | 2232 | inode->i_op = &ext4_fast_symlink_inode_operations; |
| 2231 | memcpy((char*)&EXT4_I(inode)->i_data,symname,l); | 2233 | memcpy((char *)&EXT4_I(inode)->i_data, symname, l); |
| 2232 | inode->i_size = l-1; | 2234 | inode->i_size = l-1; |
| 2233 | } | 2235 | } |
| 2234 | EXT4_I(inode)->i_disksize = inode->i_size; | 2236 | EXT4_I(inode)->i_disksize = inode->i_size; |
| @@ -2240,8 +2242,8 @@ out_stop: | |||
| 2240 | return err; | 2242 | return err; |
| 2241 | } | 2243 | } |
| 2242 | 2244 | ||
| 2243 | static int ext4_link (struct dentry * old_dentry, | 2245 | static int ext4_link(struct dentry *old_dentry, |
| 2244 | struct inode * dir, struct dentry *dentry) | 2246 | struct inode *dir, struct dentry *dentry) |
| 2245 | { | 2247 | { |
| 2246 | handle_t *handle; | 2248 | handle_t *handle; |
| 2247 | struct inode *inode = old_dentry->d_inode; | 2249 | struct inode *inode = old_dentry->d_inode; |
| @@ -2284,13 +2286,13 @@ retry: | |||
| 2284 | * Anybody can rename anything with this: the permission checks are left to the | 2286 | * Anybody can rename anything with this: the permission checks are left to the |
| 2285 | * higher-level routines. | 2287 | * higher-level routines. |
| 2286 | */ | 2288 | */ |
| 2287 | static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | 2289 | static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, |
| 2288 | struct inode * new_dir,struct dentry *new_dentry) | 2290 | struct inode *new_dir, struct dentry *new_dentry) |
| 2289 | { | 2291 | { |
| 2290 | handle_t *handle; | 2292 | handle_t *handle; |
| 2291 | struct inode * old_inode, * new_inode; | 2293 | struct inode *old_inode, *new_inode; |
| 2292 | struct buffer_head * old_bh, * new_bh, * dir_bh; | 2294 | struct buffer_head *old_bh, *new_bh, *dir_bh; |
| 2293 | struct ext4_dir_entry_2 * old_de, * new_de; | 2295 | struct ext4_dir_entry_2 *old_de, *new_de; |
| 2294 | int retval; | 2296 | int retval; |
| 2295 | 2297 | ||
| 2296 | old_bh = new_bh = dir_bh = NULL; | 2298 | old_bh = new_bh = dir_bh = NULL; |
| @@ -2308,7 +2310,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
| 2308 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) | 2310 | if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) |
| 2309 | handle->h_sync = 1; | 2311 | handle->h_sync = 1; |
| 2310 | 2312 | ||
| 2311 | old_bh = ext4_find_entry (old_dentry, &old_de); | 2313 | old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de); |
| 2312 | /* | 2314 | /* |
| 2313 | * Check for inode number is _not_ due to possible IO errors. | 2315 | * Check for inode number is _not_ due to possible IO errors. |
| 2314 | * We might rmdir the source, keep it as pwd of some process | 2316 | * We might rmdir the source, keep it as pwd of some process |
| @@ -2321,32 +2323,32 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
| 2321 | goto end_rename; | 2323 | goto end_rename; |
| 2322 | 2324 | ||
| 2323 | new_inode = new_dentry->d_inode; | 2325 | new_inode = new_dentry->d_inode; |
| 2324 | new_bh = ext4_find_entry (new_dentry, &new_de); | 2326 | new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de); |
| 2325 | if (new_bh) { | 2327 | if (new_bh) { |
| 2326 | if (!new_inode) { | 2328 | if (!new_inode) { |
| 2327 | brelse (new_bh); | 2329 | brelse(new_bh); |
| 2328 | new_bh = NULL; | 2330 | new_bh = NULL; |
| 2329 | } | 2331 | } |
| 2330 | } | 2332 | } |
| 2331 | if (S_ISDIR(old_inode->i_mode)) { | 2333 | if (S_ISDIR(old_inode->i_mode)) { |
| 2332 | if (new_inode) { | 2334 | if (new_inode) { |
| 2333 | retval = -ENOTEMPTY; | 2335 | retval = -ENOTEMPTY; |
| 2334 | if (!empty_dir (new_inode)) | 2336 | if (!empty_dir(new_inode)) |
| 2335 | goto end_rename; | 2337 | goto end_rename; |
| 2336 | } | 2338 | } |
| 2337 | retval = -EIO; | 2339 | retval = -EIO; |
| 2338 | dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval); | 2340 | dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval); |
| 2339 | if (!dir_bh) | 2341 | if (!dir_bh) |
| 2340 | goto end_rename; | 2342 | goto end_rename; |
| 2341 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) | 2343 | if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino) |
| 2342 | goto end_rename; | 2344 | goto end_rename; |
| 2343 | retval = -EMLINK; | 2345 | retval = -EMLINK; |
| 2344 | if (!new_inode && new_dir!=old_dir && | 2346 | if (!new_inode && new_dir != old_dir && |
| 2345 | new_dir->i_nlink >= EXT4_LINK_MAX) | 2347 | new_dir->i_nlink >= EXT4_LINK_MAX) |
| 2346 | goto end_rename; | 2348 | goto end_rename; |
| 2347 | } | 2349 | } |
| 2348 | if (!new_bh) { | 2350 | if (!new_bh) { |
| 2349 | retval = ext4_add_entry (handle, new_dentry, old_inode); | 2351 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
| 2350 | if (retval) | 2352 | if (retval) |
| 2351 | goto end_rename; | 2353 | goto end_rename; |
| 2352 | } else { | 2354 | } else { |
| @@ -2388,7 +2390,7 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
| 2388 | struct buffer_head *old_bh2; | 2390 | struct buffer_head *old_bh2; |
| 2389 | struct ext4_dir_entry_2 *old_de2; | 2391 | struct ext4_dir_entry_2 *old_de2; |
| 2390 | 2392 | ||
| 2391 | old_bh2 = ext4_find_entry(old_dentry, &old_de2); | 2393 | old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2); |
| 2392 | if (old_bh2) { | 2394 | if (old_bh2) { |
| 2393 | retval = ext4_delete_entry(handle, old_dir, | 2395 | retval = ext4_delete_entry(handle, old_dir, |
| 2394 | old_de2, old_bh2); | 2396 | old_de2, old_bh2); |
| @@ -2433,9 +2435,9 @@ static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry, | |||
| 2433 | retval = 0; | 2435 | retval = 0; |
| 2434 | 2436 | ||
| 2435 | end_rename: | 2437 | end_rename: |
| 2436 | brelse (dir_bh); | 2438 | brelse(dir_bh); |
| 2437 | brelse (old_bh); | 2439 | brelse(old_bh); |
| 2438 | brelse (new_bh); | 2440 | brelse(new_bh); |
| 2439 | ext4_journal_stop(handle); | 2441 | ext4_journal_stop(handle); |
| 2440 | return retval; | 2442 | return retval; |
| 2441 | } | 2443 | } |
| @@ -2454,7 +2456,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
| 2454 | .mknod = ext4_mknod, | 2456 | .mknod = ext4_mknod, |
| 2455 | .rename = ext4_rename, | 2457 | .rename = ext4_rename, |
| 2456 | .setattr = ext4_setattr, | 2458 | .setattr = ext4_setattr, |
| 2457 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2459 | #ifdef CONFIG_EXT4_FS_XATTR |
| 2458 | .setxattr = generic_setxattr, | 2460 | .setxattr = generic_setxattr, |
| 2459 | .getxattr = generic_getxattr, | 2461 | .getxattr = generic_getxattr, |
| 2460 | .listxattr = ext4_listxattr, | 2462 | .listxattr = ext4_listxattr, |
| @@ -2465,7 +2467,7 @@ const struct inode_operations ext4_dir_inode_operations = { | |||
| 2465 | 2467 | ||
| 2466 | const struct inode_operations ext4_special_inode_operations = { | 2468 | const struct inode_operations ext4_special_inode_operations = { |
| 2467 | .setattr = ext4_setattr, | 2469 | .setattr = ext4_setattr, |
| 2468 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 2470 | #ifdef CONFIG_EXT4_FS_XATTR |
| 2469 | .setxattr = generic_setxattr, | 2471 | .setxattr = generic_setxattr, |
| 2470 | .getxattr = generic_getxattr, | 2472 | .getxattr = generic_getxattr, |
| 2471 | .listxattr = ext4_listxattr, | 2473 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b3d35604ea18..b6ec1843a015 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
| @@ -416,8 +416,8 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
| 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", | 416 | "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n", |
| 417 | gdb_num); | 417 | gdb_num); |
| 418 | 418 | ||
| 419 | /* | 419 | /* |
| 420 | * If we are not using the primary superblock/GDT copy don't resize, | 420 | * If we are not using the primary superblock/GDT copy don't resize, |
| 421 | * because the user tools have no way of handling this. Probably a | 421 | * because the user tools have no way of handling this. Probably a |
| 422 | * bad time to do it anyways. | 422 | * bad time to do it anyways. |
| 423 | */ | 423 | */ |
| @@ -870,11 +870,10 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 870 | * We can allocate memory for mb_alloc based on the new group | 870 | * We can allocate memory for mb_alloc based on the new group |
| 871 | * descriptor | 871 | * descriptor |
| 872 | */ | 872 | */ |
| 873 | if (test_opt(sb, MBALLOC)) { | 873 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); |
| 874 | err = ext4_mb_add_more_groupinfo(sb, input->group, gdp); | 874 | if (err) |
| 875 | if (err) | 875 | goto exit_journal; |
| 876 | goto exit_journal; | 876 | |
| 877 | } | ||
| 878 | /* | 877 | /* |
| 879 | * Make the new blocks and inodes valid next. We do this before | 878 | * Make the new blocks and inodes valid next. We do this before |
| 880 | * increasing the group count so that once the group is enabled, | 879 | * increasing the group count so that once the group is enabled, |
| @@ -929,6 +928,15 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input) | |||
| 929 | percpu_counter_add(&sbi->s_freeinodes_counter, | 928 | percpu_counter_add(&sbi->s_freeinodes_counter, |
| 930 | EXT4_INODES_PER_GROUP(sb)); | 929 | EXT4_INODES_PER_GROUP(sb)); |
| 931 | 930 | ||
| 931 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) { | ||
| 932 | ext4_group_t flex_group; | ||
| 933 | flex_group = ext4_flex_group(sbi, input->group); | ||
| 934 | sbi->s_flex_groups[flex_group].free_blocks += | ||
| 935 | input->free_blocks_count; | ||
| 936 | sbi->s_flex_groups[flex_group].free_inodes += | ||
| 937 | EXT4_INODES_PER_GROUP(sb); | ||
| 938 | } | ||
| 939 | |||
| 932 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); | 940 | ext4_journal_dirty_metadata(handle, sbi->s_sbh); |
| 933 | sb->s_dirt = 1; | 941 | sb->s_dirt = 1; |
| 934 | 942 | ||
| @@ -964,7 +972,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 964 | ext4_group_t o_groups_count; | 972 | ext4_group_t o_groups_count; |
| 965 | ext4_grpblk_t last; | 973 | ext4_grpblk_t last; |
| 966 | ext4_grpblk_t add; | 974 | ext4_grpblk_t add; |
| 967 | struct buffer_head * bh; | 975 | struct buffer_head *bh; |
| 968 | handle_t *handle; | 976 | handle_t *handle; |
| 969 | int err; | 977 | int err; |
| 970 | unsigned long freed_blocks; | 978 | unsigned long freed_blocks; |
| @@ -1077,8 +1085,15 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es, | |||
| 1077 | /* | 1085 | /* |
| 1078 | * Mark mballoc pages as not up to date so that they will be updated | 1086 | * Mark mballoc pages as not up to date so that they will be updated |
| 1079 | * next time they are loaded by ext4_mb_load_buddy. | 1087 | * next time they are loaded by ext4_mb_load_buddy. |
| 1088 | * | ||
| 1089 | * XXX Bad, Bad, BAD!!! We should not be overloading the | ||
| 1090 | * Uptodate flag, particularly on thte bitmap bh, as way of | ||
| 1091 | * hinting to ext4_mb_load_buddy() that it needs to be | ||
| 1092 | * overloaded. A user could take a LVM snapshot, then do an | ||
| 1093 | * on-line fsck, and clear the uptodate flag, and this would | ||
| 1094 | * not be a bug in userspace, but a bug in the kernel. FIXME!!! | ||
| 1080 | */ | 1095 | */ |
| 1081 | if (test_opt(sb, MBALLOC)) { | 1096 | { |
| 1082 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 1097 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
| 1083 | struct inode *inode = sbi->s_buddy_cache; | 1098 | struct inode *inode = sbi->s_buddy_cache; |
| 1084 | int blocks_per_page; | 1099 | int blocks_per_page; |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 566344b926b7..9b2b2bc4ec17 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
| @@ -34,6 +34,8 @@ | |||
| 34 | #include <linux/namei.h> | 34 | #include <linux/namei.h> |
| 35 | #include <linux/quotaops.h> | 35 | #include <linux/quotaops.h> |
| 36 | #include <linux/seq_file.h> | 36 | #include <linux/seq_file.h> |
| 37 | #include <linux/proc_fs.h> | ||
| 38 | #include <linux/marker.h> | ||
| 37 | #include <linux/log2.h> | 39 | #include <linux/log2.h> |
| 38 | #include <linux/crc16.h> | 40 | #include <linux/crc16.h> |
| 39 | #include <asm/uaccess.h> | 41 | #include <asm/uaccess.h> |
| @@ -45,6 +47,8 @@ | |||
| 45 | #include "namei.h" | 47 | #include "namei.h" |
| 46 | #include "group.h" | 48 | #include "group.h" |
| 47 | 49 | ||
| 50 | struct proc_dir_entry *ext4_proc_root; | ||
| 51 | |||
| 48 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 52 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
| 49 | unsigned long journal_devnum); | 53 | unsigned long journal_devnum); |
| 50 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, | 54 | static int ext4_create_journal(struct super_block *, struct ext4_super_block *, |
| @@ -370,66 +374,6 @@ void ext4_update_dynamic_rev(struct super_block *sb) | |||
| 370 | */ | 374 | */ |
| 371 | } | 375 | } |
| 372 | 376 | ||
| 373 | int ext4_update_compat_feature(handle_t *handle, | ||
| 374 | struct super_block *sb, __u32 compat) | ||
| 375 | { | ||
| 376 | int err = 0; | ||
| 377 | if (!EXT4_HAS_COMPAT_FEATURE(sb, compat)) { | ||
| 378 | err = ext4_journal_get_write_access(handle, | ||
| 379 | EXT4_SB(sb)->s_sbh); | ||
| 380 | if (err) | ||
| 381 | return err; | ||
| 382 | EXT4_SET_COMPAT_FEATURE(sb, compat); | ||
| 383 | sb->s_dirt = 1; | ||
| 384 | handle->h_sync = 1; | ||
| 385 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
| 386 | "call ext4_journal_dirty_met adata"); | ||
| 387 | err = ext4_journal_dirty_metadata(handle, | ||
| 388 | EXT4_SB(sb)->s_sbh); | ||
| 389 | } | ||
| 390 | return err; | ||
| 391 | } | ||
| 392 | |||
| 393 | int ext4_update_rocompat_feature(handle_t *handle, | ||
| 394 | struct super_block *sb, __u32 rocompat) | ||
| 395 | { | ||
| 396 | int err = 0; | ||
| 397 | if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, rocompat)) { | ||
| 398 | err = ext4_journal_get_write_access(handle, | ||
| 399 | EXT4_SB(sb)->s_sbh); | ||
| 400 | if (err) | ||
| 401 | return err; | ||
| 402 | EXT4_SET_RO_COMPAT_FEATURE(sb, rocompat); | ||
| 403 | sb->s_dirt = 1; | ||
| 404 | handle->h_sync = 1; | ||
| 405 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
| 406 | "call ext4_journal_dirty_met adata"); | ||
| 407 | err = ext4_journal_dirty_metadata(handle, | ||
| 408 | EXT4_SB(sb)->s_sbh); | ||
| 409 | } | ||
| 410 | return err; | ||
| 411 | } | ||
| 412 | |||
| 413 | int ext4_update_incompat_feature(handle_t *handle, | ||
| 414 | struct super_block *sb, __u32 incompat) | ||
| 415 | { | ||
| 416 | int err = 0; | ||
| 417 | if (!EXT4_HAS_INCOMPAT_FEATURE(sb, incompat)) { | ||
| 418 | err = ext4_journal_get_write_access(handle, | ||
| 419 | EXT4_SB(sb)->s_sbh); | ||
| 420 | if (err) | ||
| 421 | return err; | ||
| 422 | EXT4_SET_INCOMPAT_FEATURE(sb, incompat); | ||
| 423 | sb->s_dirt = 1; | ||
| 424 | handle->h_sync = 1; | ||
| 425 | BUFFER_TRACE(EXT4_SB(sb)->s_sbh, | ||
| 426 | "call ext4_journal_dirty_met adata"); | ||
| 427 | err = ext4_journal_dirty_metadata(handle, | ||
| 428 | EXT4_SB(sb)->s_sbh); | ||
| 429 | } | ||
| 430 | return err; | ||
| 431 | } | ||
| 432 | |||
| 433 | /* | 377 | /* |
| 434 | * Open the external journal device | 378 | * Open the external journal device |
| 435 | */ | 379 | */ |
| @@ -503,15 +447,18 @@ static void ext4_put_super(struct super_block *sb) | |||
| 503 | ext4_mb_release(sb); | 447 | ext4_mb_release(sb); |
| 504 | ext4_ext_release(sb); | 448 | ext4_ext_release(sb); |
| 505 | ext4_xattr_put_super(sb); | 449 | ext4_xattr_put_super(sb); |
| 506 | jbd2_journal_destroy(sbi->s_journal); | 450 | if (jbd2_journal_destroy(sbi->s_journal) < 0) |
| 451 | ext4_abort(sb, __func__, "Couldn't clean up the journal"); | ||
| 507 | sbi->s_journal = NULL; | 452 | sbi->s_journal = NULL; |
| 508 | if (!(sb->s_flags & MS_RDONLY)) { | 453 | if (!(sb->s_flags & MS_RDONLY)) { |
| 509 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 454 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| 510 | es->s_state = cpu_to_le16(sbi->s_mount_state); | 455 | es->s_state = cpu_to_le16(sbi->s_mount_state); |
| 511 | BUFFER_TRACE(sbi->s_sbh, "marking dirty"); | ||
| 512 | mark_buffer_dirty(sbi->s_sbh); | ||
| 513 | ext4_commit_super(sb, es, 1); | 456 | ext4_commit_super(sb, es, 1); |
| 514 | } | 457 | } |
| 458 | if (sbi->s_proc) { | ||
| 459 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
| 460 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
| 461 | } | ||
| 515 | 462 | ||
| 516 | for (i = 0; i < sbi->s_gdb_count; i++) | 463 | for (i = 0; i < sbi->s_gdb_count; i++) |
| 517 | brelse(sbi->s_group_desc[i]); | 464 | brelse(sbi->s_group_desc[i]); |
| @@ -520,6 +467,7 @@ static void ext4_put_super(struct super_block *sb) | |||
| 520 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 467 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 521 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 468 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 522 | percpu_counter_destroy(&sbi->s_dirs_counter); | 469 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| 470 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
| 523 | brelse(sbi->s_sbh); | 471 | brelse(sbi->s_sbh); |
| 524 | #ifdef CONFIG_QUOTA | 472 | #ifdef CONFIG_QUOTA |
| 525 | for (i = 0; i < MAXQUOTAS; i++) | 473 | for (i = 0; i < MAXQUOTAS; i++) |
| @@ -562,11 +510,10 @@ static struct inode *ext4_alloc_inode(struct super_block *sb) | |||
| 562 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); | 510 | ei = kmem_cache_alloc(ext4_inode_cachep, GFP_NOFS); |
| 563 | if (!ei) | 511 | if (!ei) |
| 564 | return NULL; | 512 | return NULL; |
| 565 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 513 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 566 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 514 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
| 567 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 515 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
| 568 | #endif | 516 | #endif |
| 569 | ei->i_block_alloc_info = NULL; | ||
| 570 | ei->vfs_inode.i_version = 1; | 517 | ei->vfs_inode.i_version = 1; |
| 571 | ei->vfs_inode.i_data.writeback_index = 0; | 518 | ei->vfs_inode.i_data.writeback_index = 0; |
| 572 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); | 519 | memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache)); |
| @@ -599,7 +546,7 @@ static void init_once(void *foo) | |||
| 599 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; | 546 | struct ext4_inode_info *ei = (struct ext4_inode_info *) foo; |
| 600 | 547 | ||
| 601 | INIT_LIST_HEAD(&ei->i_orphan); | 548 | INIT_LIST_HEAD(&ei->i_orphan); |
| 602 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 549 | #ifdef CONFIG_EXT4_FS_XATTR |
| 603 | init_rwsem(&ei->xattr_sem); | 550 | init_rwsem(&ei->xattr_sem); |
| 604 | #endif | 551 | #endif |
| 605 | init_rwsem(&ei->i_data_sem); | 552 | init_rwsem(&ei->i_data_sem); |
| @@ -625,8 +572,7 @@ static void destroy_inodecache(void) | |||
| 625 | 572 | ||
| 626 | static void ext4_clear_inode(struct inode *inode) | 573 | static void ext4_clear_inode(struct inode *inode) |
| 627 | { | 574 | { |
| 628 | struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info; | 575 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 629 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | ||
| 630 | if (EXT4_I(inode)->i_acl && | 576 | if (EXT4_I(inode)->i_acl && |
| 631 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { | 577 | EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) { |
| 632 | posix_acl_release(EXT4_I(inode)->i_acl); | 578 | posix_acl_release(EXT4_I(inode)->i_acl); |
| @@ -638,10 +584,7 @@ static void ext4_clear_inode(struct inode *inode) | |||
| 638 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; | 584 | EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED; |
| 639 | } | 585 | } |
| 640 | #endif | 586 | #endif |
| 641 | ext4_discard_reservation(inode); | 587 | ext4_discard_preallocations(inode); |
| 642 | EXT4_I(inode)->i_block_alloc_info = NULL; | ||
| 643 | if (unlikely(rsv)) | ||
| 644 | kfree(rsv); | ||
| 645 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, | 588 | jbd2_journal_release_jbd_inode(EXT4_SB(inode->i_sb)->s_journal, |
| 646 | &EXT4_I(inode)->jinode); | 589 | &EXT4_I(inode)->jinode); |
| 647 | } | 590 | } |
| @@ -654,7 +597,7 @@ static inline void ext4_show_quota_options(struct seq_file *seq, | |||
| 654 | 597 | ||
| 655 | if (sbi->s_jquota_fmt) | 598 | if (sbi->s_jquota_fmt) |
| 656 | seq_printf(seq, ",jqfmt=%s", | 599 | seq_printf(seq, ",jqfmt=%s", |
| 657 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0"); | 600 | (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); |
| 658 | 601 | ||
| 659 | if (sbi->s_qf_names[USRQUOTA]) | 602 | if (sbi->s_qf_names[USRQUOTA]) |
| 660 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); | 603 | seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); |
| @@ -718,7 +661,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 718 | seq_puts(seq, ",debug"); | 661 | seq_puts(seq, ",debug"); |
| 719 | if (test_opt(sb, OLDALLOC)) | 662 | if (test_opt(sb, OLDALLOC)) |
| 720 | seq_puts(seq, ",oldalloc"); | 663 | seq_puts(seq, ",oldalloc"); |
| 721 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 664 | #ifdef CONFIG_EXT4_FS_XATTR |
| 722 | if (test_opt(sb, XATTR_USER) && | 665 | if (test_opt(sb, XATTR_USER) && |
| 723 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | 666 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) |
| 724 | seq_puts(seq, ",user_xattr"); | 667 | seq_puts(seq, ",user_xattr"); |
| @@ -727,7 +670,7 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 727 | seq_puts(seq, ",nouser_xattr"); | 670 | seq_puts(seq, ",nouser_xattr"); |
| 728 | } | 671 | } |
| 729 | #endif | 672 | #endif |
| 730 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 673 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 731 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 674 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
| 732 | seq_puts(seq, ",acl"); | 675 | seq_puts(seq, ",acl"); |
| 733 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) | 676 | if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT4_DEFM_ACL)) |
| @@ -752,8 +695,6 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 752 | seq_puts(seq, ",nobh"); | 695 | seq_puts(seq, ",nobh"); |
| 753 | if (!test_opt(sb, EXTENTS)) | 696 | if (!test_opt(sb, EXTENTS)) |
| 754 | seq_puts(seq, ",noextents"); | 697 | seq_puts(seq, ",noextents"); |
| 755 | if (!test_opt(sb, MBALLOC)) | ||
| 756 | seq_puts(seq, ",nomballoc"); | ||
| 757 | if (test_opt(sb, I_VERSION)) | 698 | if (test_opt(sb, I_VERSION)) |
| 758 | seq_puts(seq, ",i_version"); | 699 | seq_puts(seq, ",i_version"); |
| 759 | if (!test_opt(sb, DELALLOC)) | 700 | if (!test_opt(sb, DELALLOC)) |
| @@ -773,6 +714,13 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 773 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) | 714 | else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA) |
| 774 | seq_puts(seq, ",data=writeback"); | 715 | seq_puts(seq, ",data=writeback"); |
| 775 | 716 | ||
| 717 | if (sbi->s_inode_readahead_blks != EXT4_DEF_INODE_READAHEAD_BLKS) | ||
| 718 | seq_printf(seq, ",inode_readahead_blks=%u", | ||
| 719 | sbi->s_inode_readahead_blks); | ||
| 720 | |||
| 721 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
| 722 | seq_puts(seq, ",data_err=abort"); | ||
| 723 | |||
| 776 | ext4_show_quota_options(seq, sb); | 724 | ext4_show_quota_options(seq, sb); |
| 777 | return 0; | 725 | return 0; |
| 778 | } | 726 | } |
| @@ -822,7 +770,7 @@ static struct dentry *ext4_fh_to_parent(struct super_block *sb, struct fid *fid, | |||
| 822 | } | 770 | } |
| 823 | 771 | ||
| 824 | #ifdef CONFIG_QUOTA | 772 | #ifdef CONFIG_QUOTA |
| 825 | #define QTYPE2NAME(t) ((t) == USRQUOTA?"user":"group") | 773 | #define QTYPE2NAME(t) ((t) == USRQUOTA ? "user" : "group") |
| 826 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) | 774 | #define QTYPE2MOPT(on, t) ((t) == USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA)) |
| 827 | 775 | ||
| 828 | static int ext4_dquot_initialize(struct inode *inode, int type); | 776 | static int ext4_dquot_initialize(struct inode *inode, int type); |
| @@ -896,20 +844,22 @@ static const struct export_operations ext4_export_ops = { | |||
| 896 | enum { | 844 | enum { |
| 897 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, | 845 | Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid, |
| 898 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, | 846 | Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro, |
| 899 | Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov, | 847 | Opt_nouid32, Opt_debug, Opt_oldalloc, Opt_orlov, |
| 900 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, | 848 | Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl, |
| 901 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, | 849 | Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh, |
| 902 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, | 850 | Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev, |
| 903 | Opt_journal_checksum, Opt_journal_async_commit, | 851 | Opt_journal_checksum, Opt_journal_async_commit, |
| 904 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, | 852 | Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, |
| 853 | Opt_data_err_abort, Opt_data_err_ignore, | ||
| 905 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, | 854 | Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, |
| 906 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, | 855 | Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, |
| 907 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, | 856 | Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota, |
| 908 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, | 857 | Opt_grpquota, Opt_extents, Opt_noextents, Opt_i_version, |
| 909 | Opt_mballoc, Opt_nomballoc, Opt_stripe, Opt_delalloc, Opt_nodelalloc, | 858 | Opt_stripe, Opt_delalloc, Opt_nodelalloc, |
| 859 | Opt_inode_readahead_blks | ||
| 910 | }; | 860 | }; |
| 911 | 861 | ||
| 912 | static match_table_t tokens = { | 862 | static const match_table_t tokens = { |
| 913 | {Opt_bsd_df, "bsddf"}, | 863 | {Opt_bsd_df, "bsddf"}, |
| 914 | {Opt_minix_df, "minixdf"}, | 864 | {Opt_minix_df, "minixdf"}, |
| 915 | {Opt_grpid, "grpid"}, | 865 | {Opt_grpid, "grpid"}, |
| @@ -923,8 +873,6 @@ static match_table_t tokens = { | |||
| 923 | {Opt_err_panic, "errors=panic"}, | 873 | {Opt_err_panic, "errors=panic"}, |
| 924 | {Opt_err_ro, "errors=remount-ro"}, | 874 | {Opt_err_ro, "errors=remount-ro"}, |
| 925 | {Opt_nouid32, "nouid32"}, | 875 | {Opt_nouid32, "nouid32"}, |
| 926 | {Opt_nocheck, "nocheck"}, | ||
| 927 | {Opt_nocheck, "check=none"}, | ||
| 928 | {Opt_debug, "debug"}, | 876 | {Opt_debug, "debug"}, |
| 929 | {Opt_oldalloc, "oldalloc"}, | 877 | {Opt_oldalloc, "oldalloc"}, |
| 930 | {Opt_orlov, "orlov"}, | 878 | {Opt_orlov, "orlov"}, |
| @@ -947,6 +895,8 @@ static match_table_t tokens = { | |||
| 947 | {Opt_data_journal, "data=journal"}, | 895 | {Opt_data_journal, "data=journal"}, |
| 948 | {Opt_data_ordered, "data=ordered"}, | 896 | {Opt_data_ordered, "data=ordered"}, |
| 949 | {Opt_data_writeback, "data=writeback"}, | 897 | {Opt_data_writeback, "data=writeback"}, |
| 898 | {Opt_data_err_abort, "data_err=abort"}, | ||
| 899 | {Opt_data_err_ignore, "data_err=ignore"}, | ||
| 950 | {Opt_offusrjquota, "usrjquota="}, | 900 | {Opt_offusrjquota, "usrjquota="}, |
| 951 | {Opt_usrjquota, "usrjquota=%s"}, | 901 | {Opt_usrjquota, "usrjquota=%s"}, |
| 952 | {Opt_offgrpjquota, "grpjquota="}, | 902 | {Opt_offgrpjquota, "grpjquota="}, |
| @@ -961,12 +911,11 @@ static match_table_t tokens = { | |||
| 961 | {Opt_extents, "extents"}, | 911 | {Opt_extents, "extents"}, |
| 962 | {Opt_noextents, "noextents"}, | 912 | {Opt_noextents, "noextents"}, |
| 963 | {Opt_i_version, "i_version"}, | 913 | {Opt_i_version, "i_version"}, |
| 964 | {Opt_mballoc, "mballoc"}, | ||
| 965 | {Opt_nomballoc, "nomballoc"}, | ||
| 966 | {Opt_stripe, "stripe=%u"}, | 914 | {Opt_stripe, "stripe=%u"}, |
| 967 | {Opt_resize, "resize"}, | 915 | {Opt_resize, "resize"}, |
| 968 | {Opt_delalloc, "delalloc"}, | 916 | {Opt_delalloc, "delalloc"}, |
| 969 | {Opt_nodelalloc, "nodelalloc"}, | 917 | {Opt_nodelalloc, "nodelalloc"}, |
| 918 | {Opt_inode_readahead_blks, "inode_readahead_blks=%u"}, | ||
| 970 | {Opt_err, NULL}, | 919 | {Opt_err, NULL}, |
| 971 | }; | 920 | }; |
| 972 | 921 | ||
| @@ -981,7 +930,7 @@ static ext4_fsblk_t get_sb_block(void **data) | |||
| 981 | /*todo: use simple_strtoll with >32bit ext4 */ | 930 | /*todo: use simple_strtoll with >32bit ext4 */ |
| 982 | sb_block = simple_strtoul(options, &options, 0); | 931 | sb_block = simple_strtoul(options, &options, 0); |
| 983 | if (*options && *options != ',') { | 932 | if (*options && *options != ',') { |
| 984 | printk("EXT4-fs: Invalid sb specification: %s\n", | 933 | printk(KERN_ERR "EXT4-fs: Invalid sb specification: %s\n", |
| 985 | (char *) *data); | 934 | (char *) *data); |
| 986 | return 1; | 935 | return 1; |
| 987 | } | 936 | } |
| @@ -1060,9 +1009,6 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1060 | case Opt_nouid32: | 1009 | case Opt_nouid32: |
| 1061 | set_opt(sbi->s_mount_opt, NO_UID32); | 1010 | set_opt(sbi->s_mount_opt, NO_UID32); |
| 1062 | break; | 1011 | break; |
| 1063 | case Opt_nocheck: | ||
| 1064 | clear_opt(sbi->s_mount_opt, CHECK); | ||
| 1065 | break; | ||
| 1066 | case Opt_debug: | 1012 | case Opt_debug: |
| 1067 | set_opt(sbi->s_mount_opt, DEBUG); | 1013 | set_opt(sbi->s_mount_opt, DEBUG); |
| 1068 | break; | 1014 | break; |
| @@ -1072,7 +1018,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1072 | case Opt_orlov: | 1018 | case Opt_orlov: |
| 1073 | clear_opt(sbi->s_mount_opt, OLDALLOC); | 1019 | clear_opt(sbi->s_mount_opt, OLDALLOC); |
| 1074 | break; | 1020 | break; |
| 1075 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1021 | #ifdef CONFIG_EXT4_FS_XATTR |
| 1076 | case Opt_user_xattr: | 1022 | case Opt_user_xattr: |
| 1077 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1023 | set_opt(sbi->s_mount_opt, XATTR_USER); |
| 1078 | break; | 1024 | break; |
| @@ -1082,10 +1028,11 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1082 | #else | 1028 | #else |
| 1083 | case Opt_user_xattr: | 1029 | case Opt_user_xattr: |
| 1084 | case Opt_nouser_xattr: | 1030 | case Opt_nouser_xattr: |
| 1085 | printk("EXT4 (no)user_xattr options not supported\n"); | 1031 | printk(KERN_ERR "EXT4 (no)user_xattr options " |
| 1032 | "not supported\n"); | ||
| 1086 | break; | 1033 | break; |
| 1087 | #endif | 1034 | #endif |
| 1088 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1035 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 1089 | case Opt_acl: | 1036 | case Opt_acl: |
| 1090 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1037 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
| 1091 | break; | 1038 | break; |
| @@ -1095,7 +1042,8 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1095 | #else | 1042 | #else |
| 1096 | case Opt_acl: | 1043 | case Opt_acl: |
| 1097 | case Opt_noacl: | 1044 | case Opt_noacl: |
| 1098 | printk("EXT4 (no)acl options not supported\n"); | 1045 | printk(KERN_ERR "EXT4 (no)acl options " |
| 1046 | "not supported\n"); | ||
| 1099 | break; | 1047 | break; |
| 1100 | #endif | 1048 | #endif |
| 1101 | case Opt_reservation: | 1049 | case Opt_reservation: |
| @@ -1178,6 +1126,12 @@ static int parse_options(char *options, struct super_block *sb, | |||
| 1178 | sbi->s_mount_opt |= data_opt; | 1126 | sbi->s_mount_opt |= data_opt; |
| 1179 | } | 1127 | } |
| 1180 | break; | 1128 | break; |
| 1129 | case Opt_data_err_abort: | ||
| 1130 | set_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
| 1131 | break; | ||
| 1132 | case Opt_data_err_ignore: | ||
| 1133 | clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT); | ||
| 1134 | break; | ||
| 1181 | #ifdef CONFIG_QUOTA | 1135 | #ifdef CONFIG_QUOTA |
| 1182 | case Opt_usrjquota: | 1136 | case Opt_usrjquota: |
| 1183 | qtype = USRQUOTA; | 1137 | qtype = USRQUOTA; |
| @@ -1189,8 +1143,8 @@ set_qf_name: | |||
| 1189 | sb_any_quota_suspended(sb)) && | 1143 | sb_any_quota_suspended(sb)) && |
| 1190 | !sbi->s_qf_names[qtype]) { | 1144 | !sbi->s_qf_names[qtype]) { |
| 1191 | printk(KERN_ERR | 1145 | printk(KERN_ERR |
| 1192 | "EXT4-fs: Cannot change journaled " | 1146 | "EXT4-fs: Cannot change journaled " |
| 1193 | "quota options when quota turned on.\n"); | 1147 | "quota options when quota turned on.\n"); |
| 1194 | return 0; | 1148 | return 0; |
| 1195 | } | 1149 | } |
| 1196 | qname = match_strdup(&args[0]); | 1150 | qname = match_strdup(&args[0]); |
| @@ -1357,12 +1311,6 @@ set_qf_format: | |||
| 1357 | case Opt_nodelalloc: | 1311 | case Opt_nodelalloc: |
| 1358 | clear_opt(sbi->s_mount_opt, DELALLOC); | 1312 | clear_opt(sbi->s_mount_opt, DELALLOC); |
| 1359 | break; | 1313 | break; |
| 1360 | case Opt_mballoc: | ||
| 1361 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
| 1362 | break; | ||
| 1363 | case Opt_nomballoc: | ||
| 1364 | clear_opt(sbi->s_mount_opt, MBALLOC); | ||
| 1365 | break; | ||
| 1366 | case Opt_stripe: | 1314 | case Opt_stripe: |
| 1367 | if (match_int(&args[0], &option)) | 1315 | if (match_int(&args[0], &option)) |
| 1368 | return 0; | 1316 | return 0; |
| @@ -1373,6 +1321,13 @@ set_qf_format: | |||
| 1373 | case Opt_delalloc: | 1321 | case Opt_delalloc: |
| 1374 | set_opt(sbi->s_mount_opt, DELALLOC); | 1322 | set_opt(sbi->s_mount_opt, DELALLOC); |
| 1375 | break; | 1323 | break; |
| 1324 | case Opt_inode_readahead_blks: | ||
| 1325 | if (match_int(&args[0], &option)) | ||
| 1326 | return 0; | ||
| 1327 | if (option < 0 || option > (1 << 30)) | ||
| 1328 | return 0; | ||
| 1329 | sbi->s_inode_readahead_blks = option; | ||
| 1330 | break; | ||
| 1376 | default: | 1331 | default: |
| 1377 | printk(KERN_ERR | 1332 | printk(KERN_ERR |
| 1378 | "EXT4-fs: Unrecognized mount option \"%s\" " | 1333 | "EXT4-fs: Unrecognized mount option \"%s\" " |
| @@ -1473,15 +1428,9 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es, | |||
| 1473 | EXT4_INODES_PER_GROUP(sb), | 1428 | EXT4_INODES_PER_GROUP(sb), |
| 1474 | sbi->s_mount_opt); | 1429 | sbi->s_mount_opt); |
| 1475 | 1430 | ||
| 1476 | printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id); | 1431 | printk(KERN_INFO "EXT4 FS on %s, %s journal on %s\n", |
| 1477 | if (EXT4_SB(sb)->s_journal->j_inode == NULL) { | 1432 | sb->s_id, EXT4_SB(sb)->s_journal->j_inode ? "internal" : |
| 1478 | char b[BDEVNAME_SIZE]; | 1433 | "external", EXT4_SB(sb)->s_journal->j_devname); |
| 1479 | |||
| 1480 | printk("external journal on %s\n", | ||
| 1481 | bdevname(EXT4_SB(sb)->s_journal->j_dev, b)); | ||
| 1482 | } else { | ||
| 1483 | printk("internal journal\n"); | ||
| 1484 | } | ||
| 1485 | return res; | 1434 | return res; |
| 1486 | } | 1435 | } |
| 1487 | 1436 | ||
| @@ -1504,8 +1453,11 @@ static int ext4_fill_flex_info(struct super_block *sb) | |||
| 1504 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; | 1453 | sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex; |
| 1505 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; | 1454 | groups_per_flex = 1 << sbi->s_log_groups_per_flex; |
| 1506 | 1455 | ||
| 1507 | flex_group_count = (sbi->s_groups_count + groups_per_flex - 1) / | 1456 | /* We allocate both existing and potentially added groups */ |
| 1508 | groups_per_flex; | 1457 | flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + |
| 1458 | ((sbi->s_es->s_reserved_gdt_blocks +1 ) << | ||
| 1459 | EXT4_DESC_PER_BLOCK_BITS(sb))) / | ||
| 1460 | groups_per_flex; | ||
| 1509 | sbi->s_flex_groups = kzalloc(flex_group_count * | 1461 | sbi->s_flex_groups = kzalloc(flex_group_count * |
| 1510 | sizeof(struct flex_groups), GFP_KERNEL); | 1462 | sizeof(struct flex_groups), GFP_KERNEL); |
| 1511 | if (sbi->s_flex_groups == NULL) { | 1463 | if (sbi->s_flex_groups == NULL) { |
| @@ -1584,7 +1536,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
| 1584 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) | 1536 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) |
| 1585 | flexbg_flag = 1; | 1537 | flexbg_flag = 1; |
| 1586 | 1538 | ||
| 1587 | ext4_debug ("Checking group descriptors"); | 1539 | ext4_debug("Checking group descriptors"); |
| 1588 | 1540 | ||
| 1589 | for (i = 0; i < sbi->s_groups_count; i++) { | 1541 | for (i = 0; i < sbi->s_groups_count; i++) { |
| 1590 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); | 1542 | struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL); |
| @@ -1599,14 +1551,14 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
| 1599 | if (block_bitmap < first_block || block_bitmap > last_block) { | 1551 | if (block_bitmap < first_block || block_bitmap > last_block) { |
| 1600 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1552 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
| 1601 | "Block bitmap for group %lu not in group " | 1553 | "Block bitmap for group %lu not in group " |
| 1602 | "(block %llu)!", i, block_bitmap); | 1554 | "(block %llu)!\n", i, block_bitmap); |
| 1603 | return 0; | 1555 | return 0; |
| 1604 | } | 1556 | } |
| 1605 | inode_bitmap = ext4_inode_bitmap(sb, gdp); | 1557 | inode_bitmap = ext4_inode_bitmap(sb, gdp); |
| 1606 | if (inode_bitmap < first_block || inode_bitmap > last_block) { | 1558 | if (inode_bitmap < first_block || inode_bitmap > last_block) { |
| 1607 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1559 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
| 1608 | "Inode bitmap for group %lu not in group " | 1560 | "Inode bitmap for group %lu not in group " |
| 1609 | "(block %llu)!", i, inode_bitmap); | 1561 | "(block %llu)!\n", i, inode_bitmap); |
| 1610 | return 0; | 1562 | return 0; |
| 1611 | } | 1563 | } |
| 1612 | inode_table = ext4_inode_table(sb, gdp); | 1564 | inode_table = ext4_inode_table(sb, gdp); |
| @@ -1614,7 +1566,7 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
| 1614 | inode_table + sbi->s_itb_per_group - 1 > last_block) { | 1566 | inode_table + sbi->s_itb_per_group - 1 > last_block) { |
| 1615 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " | 1567 | printk(KERN_ERR "EXT4-fs: ext4_check_descriptors: " |
| 1616 | "Inode table for group %lu not in group " | 1568 | "Inode table for group %lu not in group " |
| 1617 | "(block %llu)!", i, inode_table); | 1569 | "(block %llu)!\n", i, inode_table); |
| 1618 | return 0; | 1570 | return 0; |
| 1619 | } | 1571 | } |
| 1620 | spin_lock(sb_bgl_lock(sbi, i)); | 1572 | spin_lock(sb_bgl_lock(sbi, i)); |
| @@ -1623,8 +1575,10 @@ static int ext4_check_descriptors(struct super_block *sb) | |||
| 1623 | "Checksum for group %lu failed (%u!=%u)\n", | 1575 | "Checksum for group %lu failed (%u!=%u)\n", |
| 1624 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, | 1576 | i, le16_to_cpu(ext4_group_desc_csum(sbi, i, |
| 1625 | gdp)), le16_to_cpu(gdp->bg_checksum)); | 1577 | gdp)), le16_to_cpu(gdp->bg_checksum)); |
| 1626 | if (!(sb->s_flags & MS_RDONLY)) | 1578 | if (!(sb->s_flags & MS_RDONLY)) { |
| 1579 | spin_unlock(sb_bgl_lock(sbi, i)); | ||
| 1627 | return 0; | 1580 | return 0; |
| 1581 | } | ||
| 1628 | } | 1582 | } |
| 1629 | spin_unlock(sb_bgl_lock(sbi, i)); | 1583 | spin_unlock(sb_bgl_lock(sbi, i)); |
| 1630 | if (!flexbg_flag) | 1584 | if (!flexbg_flag) |
| @@ -1714,9 +1668,9 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 1714 | DQUOT_INIT(inode); | 1668 | DQUOT_INIT(inode); |
| 1715 | if (inode->i_nlink) { | 1669 | if (inode->i_nlink) { |
| 1716 | printk(KERN_DEBUG | 1670 | printk(KERN_DEBUG |
| 1717 | "%s: truncating inode %lu to %Ld bytes\n", | 1671 | "%s: truncating inode %lu to %lld bytes\n", |
| 1718 | __func__, inode->i_ino, inode->i_size); | 1672 | __func__, inode->i_ino, inode->i_size); |
| 1719 | jbd_debug(2, "truncating inode %lu to %Ld bytes\n", | 1673 | jbd_debug(2, "truncating inode %lu to %lld bytes\n", |
| 1720 | inode->i_ino, inode->i_size); | 1674 | inode->i_ino, inode->i_size); |
| 1721 | ext4_truncate(inode); | 1675 | ext4_truncate(inode); |
| 1722 | nr_truncates++; | 1676 | nr_truncates++; |
| @@ -1757,13 +1711,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
| 1757 | * | 1711 | * |
| 1758 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. | 1712 | * Note, this does *not* consider any metadata overhead for vfs i_blocks. |
| 1759 | */ | 1713 | */ |
| 1760 | static loff_t ext4_max_size(int blkbits) | 1714 | static loff_t ext4_max_size(int blkbits, int has_huge_files) |
| 1761 | { | 1715 | { |
| 1762 | loff_t res; | 1716 | loff_t res; |
| 1763 | loff_t upper_limit = MAX_LFS_FILESIZE; | 1717 | loff_t upper_limit = MAX_LFS_FILESIZE; |
| 1764 | 1718 | ||
| 1765 | /* small i_blocks in vfs inode? */ | 1719 | /* small i_blocks in vfs inode? */ |
| 1766 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1720 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
| 1767 | /* | 1721 | /* |
| 1768 | * CONFIG_LSF is not enabled implies the inode | 1722 | * CONFIG_LSF is not enabled implies the inode |
| 1769 | * i_block represent total blocks in 512 bytes | 1723 | * i_block represent total blocks in 512 bytes |
| @@ -1793,7 +1747,7 @@ static loff_t ext4_max_size(int blkbits) | |||
| 1793 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. | 1747 | * block limit, and also a limit of (2^48 - 1) 512-byte sectors in i_blocks. |
| 1794 | * We need to be 1 filesystem block less than the 2^48 sector limit. | 1748 | * We need to be 1 filesystem block less than the 2^48 sector limit. |
| 1795 | */ | 1749 | */ |
| 1796 | static loff_t ext4_max_bitmap_size(int bits) | 1750 | static loff_t ext4_max_bitmap_size(int bits, int has_huge_files) |
| 1797 | { | 1751 | { |
| 1798 | loff_t res = EXT4_NDIR_BLOCKS; | 1752 | loff_t res = EXT4_NDIR_BLOCKS; |
| 1799 | int meta_blocks; | 1753 | int meta_blocks; |
| @@ -1806,11 +1760,11 @@ static loff_t ext4_max_bitmap_size(int bits) | |||
| 1806 | * total number of 512 bytes blocks of the file | 1760 | * total number of 512 bytes blocks of the file |
| 1807 | */ | 1761 | */ |
| 1808 | 1762 | ||
| 1809 | if (sizeof(blkcnt_t) < sizeof(u64)) { | 1763 | if (!has_huge_files || sizeof(blkcnt_t) < sizeof(u64)) { |
| 1810 | /* | 1764 | /* |
| 1811 | * CONFIG_LSF is not enabled implies the inode | 1765 | * !has_huge_files or CONFIG_LSF is not enabled |
| 1812 | * i_block represent total blocks in 512 bytes | 1766 | * implies the inode i_block represent total blocks in |
| 1813 | * 32 == size of vfs inode i_blocks * 8 | 1767 | * 512 bytes 32 == size of vfs inode i_blocks * 8 |
| 1814 | */ | 1768 | */ |
| 1815 | upper_limit = (1LL << 32) - 1; | 1769 | upper_limit = (1LL << 32) - 1; |
| 1816 | 1770 | ||
| @@ -1914,11 +1868,12 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1914 | unsigned long journal_devnum = 0; | 1868 | unsigned long journal_devnum = 0; |
| 1915 | unsigned long def_mount_opts; | 1869 | unsigned long def_mount_opts; |
| 1916 | struct inode *root; | 1870 | struct inode *root; |
| 1871 | char *cp; | ||
| 1917 | int ret = -EINVAL; | 1872 | int ret = -EINVAL; |
| 1918 | int blocksize; | 1873 | int blocksize; |
| 1919 | int db_count; | 1874 | int db_count; |
| 1920 | int i; | 1875 | int i; |
| 1921 | int needs_recovery; | 1876 | int needs_recovery, has_huge_files; |
| 1922 | __le32 features; | 1877 | __le32 features; |
| 1923 | __u64 blocks_count; | 1878 | __u64 blocks_count; |
| 1924 | int err; | 1879 | int err; |
| @@ -1930,10 +1885,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1930 | sbi->s_mount_opt = 0; | 1885 | sbi->s_mount_opt = 0; |
| 1931 | sbi->s_resuid = EXT4_DEF_RESUID; | 1886 | sbi->s_resuid = EXT4_DEF_RESUID; |
| 1932 | sbi->s_resgid = EXT4_DEF_RESGID; | 1887 | sbi->s_resgid = EXT4_DEF_RESGID; |
| 1888 | sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; | ||
| 1933 | sbi->s_sb_block = sb_block; | 1889 | sbi->s_sb_block = sb_block; |
| 1934 | 1890 | ||
| 1935 | unlock_kernel(); | 1891 | unlock_kernel(); |
| 1936 | 1892 | ||
| 1893 | /* Cleanup superblock name */ | ||
| 1894 | for (cp = sb->s_id; (cp = strchr(cp, '/'));) | ||
| 1895 | *cp = '!'; | ||
| 1896 | |||
| 1937 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); | 1897 | blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE); |
| 1938 | if (!blocksize) { | 1898 | if (!blocksize) { |
| 1939 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); | 1899 | printk(KERN_ERR "EXT4-fs: unable to set blocksize\n"); |
| @@ -1973,11 +1933,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 1973 | set_opt(sbi->s_mount_opt, GRPID); | 1933 | set_opt(sbi->s_mount_opt, GRPID); |
| 1974 | if (def_mount_opts & EXT4_DEFM_UID16) | 1934 | if (def_mount_opts & EXT4_DEFM_UID16) |
| 1975 | set_opt(sbi->s_mount_opt, NO_UID32); | 1935 | set_opt(sbi->s_mount_opt, NO_UID32); |
| 1976 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 1936 | #ifdef CONFIG_EXT4_FS_XATTR |
| 1977 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 1937 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) |
| 1978 | set_opt(sbi->s_mount_opt, XATTR_USER); | 1938 | set_opt(sbi->s_mount_opt, XATTR_USER); |
| 1979 | #endif | 1939 | #endif |
| 1980 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 1940 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 1981 | if (def_mount_opts & EXT4_DEFM_ACL) | 1941 | if (def_mount_opts & EXT4_DEFM_ACL) |
| 1982 | set_opt(sbi->s_mount_opt, POSIX_ACL); | 1942 | set_opt(sbi->s_mount_opt, POSIX_ACL); |
| 1983 | #endif | 1943 | #endif |
| @@ -2012,11 +1972,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2012 | ext4_warning(sb, __func__, | 1972 | ext4_warning(sb, __func__, |
| 2013 | "extents feature not enabled on this filesystem, " | 1973 | "extents feature not enabled on this filesystem, " |
| 2014 | "use tune2fs.\n"); | 1974 | "use tune2fs.\n"); |
| 2015 | /* | ||
| 2016 | * turn on mballoc code by default in ext4 filesystem | ||
| 2017 | * Use -o nomballoc to turn it off | ||
| 2018 | */ | ||
| 2019 | set_opt(sbi->s_mount_opt, MBALLOC); | ||
| 2020 | 1975 | ||
| 2021 | /* | 1976 | /* |
| 2022 | * enable delayed allocation by default | 1977 | * enable delayed allocation by default |
| @@ -2041,16 +1996,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2041 | "running e2fsck is recommended\n"); | 1996 | "running e2fsck is recommended\n"); |
| 2042 | 1997 | ||
| 2043 | /* | 1998 | /* |
| 2044 | * Since ext4 is still considered development code, we require | ||
| 2045 | * that the TEST_FILESYS flag in s->flags be set. | ||
| 2046 | */ | ||
| 2047 | if (!(le32_to_cpu(es->s_flags) & EXT2_FLAGS_TEST_FILESYS)) { | ||
| 2048 | printk(KERN_WARNING "EXT4-fs: %s: not marked " | ||
| 2049 | "OK to use with test code.\n", sb->s_id); | ||
| 2050 | goto failed_mount; | ||
| 2051 | } | ||
| 2052 | |||
| 2053 | /* | ||
| 2054 | * Check feature flags regardless of the revision level, since we | 1999 | * Check feature flags regardless of the revision level, since we |
| 2055 | * previously didn't change the revision level when setting the flags, | 2000 | * previously didn't change the revision level when setting the flags, |
| 2056 | * so there is a chance incompat flags are set on a rev 0 filesystem. | 2001 | * so there is a chance incompat flags are set on a rev 0 filesystem. |
| @@ -2069,7 +2014,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2069 | sb->s_id, le32_to_cpu(features)); | 2014 | sb->s_id, le32_to_cpu(features)); |
| 2070 | goto failed_mount; | 2015 | goto failed_mount; |
| 2071 | } | 2016 | } |
| 2072 | if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_HUGE_FILE)) { | 2017 | has_huge_files = EXT4_HAS_RO_COMPAT_FEATURE(sb, |
| 2018 | EXT4_FEATURE_RO_COMPAT_HUGE_FILE); | ||
| 2019 | if (has_huge_files) { | ||
| 2073 | /* | 2020 | /* |
| 2074 | * Large file size enabled file system can only be | 2021 | * Large file size enabled file system can only be |
| 2075 | * mount if kernel is build with CONFIG_LSF | 2022 | * mount if kernel is build with CONFIG_LSF |
| @@ -2119,8 +2066,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2119 | } | 2066 | } |
| 2120 | } | 2067 | } |
| 2121 | 2068 | ||
| 2122 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits); | 2069 | sbi->s_bitmap_maxbytes = ext4_max_bitmap_size(sb->s_blocksize_bits, |
| 2123 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits); | 2070 | has_huge_files); |
| 2071 | sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits, has_huge_files); | ||
| 2124 | 2072 | ||
| 2125 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { | 2073 | if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) { |
| 2126 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; | 2074 | sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE; |
| @@ -2219,6 +2167,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2219 | goto failed_mount; | 2167 | goto failed_mount; |
| 2220 | } | 2168 | } |
| 2221 | 2169 | ||
| 2170 | #ifdef CONFIG_PROC_FS | ||
| 2171 | if (ext4_proc_root) | ||
| 2172 | sbi->s_proc = proc_mkdir(sb->s_id, ext4_proc_root); | ||
| 2173 | |||
| 2174 | if (sbi->s_proc) | ||
| 2175 | proc_create_data("inode_readahead_blks", 0644, sbi->s_proc, | ||
| 2176 | &ext4_ui_proc_fops, | ||
| 2177 | &sbi->s_inode_readahead_blks); | ||
| 2178 | #endif | ||
| 2179 | |||
| 2222 | bgl_lock_init(&sbi->s_blockgroup_lock); | 2180 | bgl_lock_init(&sbi->s_blockgroup_lock); |
| 2223 | 2181 | ||
| 2224 | for (i = 0; i < db_count; i++) { | 2182 | for (i = 0; i < db_count; i++) { |
| @@ -2257,24 +2215,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2257 | err = percpu_counter_init(&sbi->s_dirs_counter, | 2215 | err = percpu_counter_init(&sbi->s_dirs_counter, |
| 2258 | ext4_count_dirs(sb)); | 2216 | ext4_count_dirs(sb)); |
| 2259 | } | 2217 | } |
| 2218 | if (!err) { | ||
| 2219 | err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0); | ||
| 2220 | } | ||
| 2260 | if (err) { | 2221 | if (err) { |
| 2261 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); | 2222 | printk(KERN_ERR "EXT4-fs: insufficient memory\n"); |
| 2262 | goto failed_mount3; | 2223 | goto failed_mount3; |
| 2263 | } | 2224 | } |
| 2264 | 2225 | ||
| 2265 | /* per fileystem reservation list head & lock */ | ||
| 2266 | spin_lock_init(&sbi->s_rsv_window_lock); | ||
| 2267 | sbi->s_rsv_window_root = RB_ROOT; | ||
| 2268 | /* Add a single, static dummy reservation to the start of the | ||
| 2269 | * reservation window list --- it gives us a placeholder for | ||
| 2270 | * append-at-start-of-list which makes the allocation logic | ||
| 2271 | * _much_ simpler. */ | ||
| 2272 | sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 2273 | sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED; | ||
| 2274 | sbi->s_rsv_window_head.rsv_alloc_hit = 0; | ||
| 2275 | sbi->s_rsv_window_head.rsv_goal_size = 0; | ||
| 2276 | ext4_rsv_window_add(sb, &sbi->s_rsv_window_head); | ||
| 2277 | |||
| 2278 | sbi->s_stripe = ext4_get_stripe_size(sbi); | 2226 | sbi->s_stripe = ext4_get_stripe_size(sbi); |
| 2279 | 2227 | ||
| 2280 | /* | 2228 | /* |
| @@ -2444,6 +2392,21 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2444 | "available.\n"); | 2392 | "available.\n"); |
| 2445 | } | 2393 | } |
| 2446 | 2394 | ||
| 2395 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 2396 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
| 2397 | "requested data journaling mode\n"); | ||
| 2398 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2399 | } else if (test_opt(sb, DELALLOC)) | ||
| 2400 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
| 2401 | |||
| 2402 | ext4_ext_init(sb); | ||
| 2403 | err = ext4_mb_init(sb, needs_recovery); | ||
| 2404 | if (err) { | ||
| 2405 | printk(KERN_ERR "EXT4-fs: failed to initalize mballoc (%d)\n", | ||
| 2406 | err); | ||
| 2407 | goto failed_mount4; | ||
| 2408 | } | ||
| 2409 | |||
| 2447 | /* | 2410 | /* |
| 2448 | * akpm: core read_super() calls in here with the superblock locked. | 2411 | * akpm: core read_super() calls in here with the superblock locked. |
| 2449 | * That deadlocks, because orphan cleanup needs to lock the superblock | 2412 | * That deadlocks, because orphan cleanup needs to lock the superblock |
| @@ -2463,16 +2426,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
| 2463 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": | 2426 | test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered": |
| 2464 | "writeback"); | 2427 | "writeback"); |
| 2465 | 2428 | ||
| 2466 | if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { | ||
| 2467 | printk(KERN_WARNING "EXT4-fs: Ignoring delalloc option - " | ||
| 2468 | "requested data journaling mode\n"); | ||
| 2469 | clear_opt(sbi->s_mount_opt, DELALLOC); | ||
| 2470 | } else if (test_opt(sb, DELALLOC)) | ||
| 2471 | printk(KERN_INFO "EXT4-fs: delayed allocation enabled\n"); | ||
| 2472 | |||
| 2473 | ext4_ext_init(sb); | ||
| 2474 | ext4_mb_init(sb, needs_recovery); | ||
| 2475 | |||
| 2476 | lock_kernel(); | 2429 | lock_kernel(); |
| 2477 | return 0; | 2430 | return 0; |
| 2478 | 2431 | ||
| @@ -2489,11 +2442,16 @@ failed_mount3: | |||
| 2489 | percpu_counter_destroy(&sbi->s_freeblocks_counter); | 2442 | percpu_counter_destroy(&sbi->s_freeblocks_counter); |
| 2490 | percpu_counter_destroy(&sbi->s_freeinodes_counter); | 2443 | percpu_counter_destroy(&sbi->s_freeinodes_counter); |
| 2491 | percpu_counter_destroy(&sbi->s_dirs_counter); | 2444 | percpu_counter_destroy(&sbi->s_dirs_counter); |
| 2445 | percpu_counter_destroy(&sbi->s_dirtyblocks_counter); | ||
| 2492 | failed_mount2: | 2446 | failed_mount2: |
| 2493 | for (i = 0; i < db_count; i++) | 2447 | for (i = 0; i < db_count; i++) |
| 2494 | brelse(sbi->s_group_desc[i]); | 2448 | brelse(sbi->s_group_desc[i]); |
| 2495 | kfree(sbi->s_group_desc); | 2449 | kfree(sbi->s_group_desc); |
| 2496 | failed_mount: | 2450 | failed_mount: |
| 2451 | if (sbi->s_proc) { | ||
| 2452 | remove_proc_entry("inode_readahead_blks", sbi->s_proc); | ||
| 2453 | remove_proc_entry(sb->s_id, ext4_proc_root); | ||
| 2454 | } | ||
| 2497 | #ifdef CONFIG_QUOTA | 2455 | #ifdef CONFIG_QUOTA |
| 2498 | for (i = 0; i < MAXQUOTAS; i++) | 2456 | for (i = 0; i < MAXQUOTAS; i++) |
| 2499 | kfree(sbi->s_qf_names[i]); | 2457 | kfree(sbi->s_qf_names[i]); |
| @@ -2527,6 +2485,10 @@ static void ext4_init_journal_params(struct super_block *sb, journal_t *journal) | |||
| 2527 | journal->j_flags |= JBD2_BARRIER; | 2485 | journal->j_flags |= JBD2_BARRIER; |
| 2528 | else | 2486 | else |
| 2529 | journal->j_flags &= ~JBD2_BARRIER; | 2487 | journal->j_flags &= ~JBD2_BARRIER; |
| 2488 | if (test_opt(sb, DATA_ERR_ABORT)) | ||
| 2489 | journal->j_flags |= JBD2_ABORT_ON_SYNCDATA_ERR; | ||
| 2490 | else | ||
| 2491 | journal->j_flags &= ~JBD2_ABORT_ON_SYNCDATA_ERR; | ||
| 2530 | spin_unlock(&journal->j_state_lock); | 2492 | spin_unlock(&journal->j_state_lock); |
| 2531 | } | 2493 | } |
| 2532 | 2494 | ||
| @@ -2552,7 +2514,7 @@ static journal_t *ext4_get_journal(struct super_block *sb, | |||
| 2552 | return NULL; | 2514 | return NULL; |
| 2553 | } | 2515 | } |
| 2554 | 2516 | ||
| 2555 | jbd_debug(2, "Journal inode found at %p: %Ld bytes\n", | 2517 | jbd_debug(2, "Journal inode found at %p: %lld bytes\n", |
| 2556 | journal_inode, journal_inode->i_size); | 2518 | journal_inode, journal_inode->i_size); |
| 2557 | if (!S_ISREG(journal_inode->i_mode)) { | 2519 | if (!S_ISREG(journal_inode->i_mode)) { |
| 2558 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); | 2520 | printk(KERN_ERR "EXT4-fs: invalid journal inode.\n"); |
| @@ -2715,6 +2677,11 @@ static int ext4_load_journal(struct super_block *sb, | |||
| 2715 | return -EINVAL; | 2677 | return -EINVAL; |
| 2716 | } | 2678 | } |
| 2717 | 2679 | ||
| 2680 | if (journal->j_flags & JBD2_BARRIER) | ||
| 2681 | printk(KERN_INFO "EXT4-fs: barriers enabled\n"); | ||
| 2682 | else | ||
| 2683 | printk(KERN_INFO "EXT4-fs: barriers disabled\n"); | ||
| 2684 | |||
| 2718 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { | 2685 | if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) { |
| 2719 | err = jbd2_journal_update_format(journal); | 2686 | err = jbd2_journal_update_format(journal); |
| 2720 | if (err) { | 2687 | if (err) { |
| @@ -2799,13 +2766,34 @@ static void ext4_commit_super(struct super_block *sb, | |||
| 2799 | 2766 | ||
| 2800 | if (!sbh) | 2767 | if (!sbh) |
| 2801 | return; | 2768 | return; |
| 2769 | if (buffer_write_io_error(sbh)) { | ||
| 2770 | /* | ||
| 2771 | * Oh, dear. A previous attempt to write the | ||
| 2772 | * superblock failed. This could happen because the | ||
| 2773 | * USB device was yanked out. Or it could happen to | ||
| 2774 | * be a transient write error and maybe the block will | ||
| 2775 | * be remapped. Nothing we can do but to retry the | ||
| 2776 | * write and hope for the best. | ||
| 2777 | */ | ||
| 2778 | printk(KERN_ERR "ext4: previous I/O error to " | ||
| 2779 | "superblock detected for %s.\n", sb->s_id); | ||
| 2780 | clear_buffer_write_io_error(sbh); | ||
| 2781 | set_buffer_uptodate(sbh); | ||
| 2782 | } | ||
| 2802 | es->s_wtime = cpu_to_le32(get_seconds()); | 2783 | es->s_wtime = cpu_to_le32(get_seconds()); |
| 2803 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); | 2784 | ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb)); |
| 2804 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); | 2785 | es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb)); |
| 2805 | BUFFER_TRACE(sbh, "marking dirty"); | 2786 | BUFFER_TRACE(sbh, "marking dirty"); |
| 2806 | mark_buffer_dirty(sbh); | 2787 | mark_buffer_dirty(sbh); |
| 2807 | if (sync) | 2788 | if (sync) { |
| 2808 | sync_dirty_buffer(sbh); | 2789 | sync_dirty_buffer(sbh); |
| 2790 | if (buffer_write_io_error(sbh)) { | ||
| 2791 | printk(KERN_ERR "ext4: I/O error while writing " | ||
| 2792 | "superblock for %s.\n", sb->s_id); | ||
| 2793 | clear_buffer_write_io_error(sbh); | ||
| 2794 | set_buffer_uptodate(sbh); | ||
| 2795 | } | ||
| 2796 | } | ||
| 2809 | } | 2797 | } |
| 2810 | 2798 | ||
| 2811 | 2799 | ||
| @@ -2820,7 +2808,9 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
| 2820 | journal_t *journal = EXT4_SB(sb)->s_journal; | 2808 | journal_t *journal = EXT4_SB(sb)->s_journal; |
| 2821 | 2809 | ||
| 2822 | jbd2_journal_lock_updates(journal); | 2810 | jbd2_journal_lock_updates(journal); |
| 2823 | jbd2_journal_flush(journal); | 2811 | if (jbd2_journal_flush(journal) < 0) |
| 2812 | goto out; | ||
| 2813 | |||
| 2824 | lock_super(sb); | 2814 | lock_super(sb); |
| 2825 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && | 2815 | if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) && |
| 2826 | sb->s_flags & MS_RDONLY) { | 2816 | sb->s_flags & MS_RDONLY) { |
| @@ -2829,6 +2819,8 @@ static void ext4_mark_recovery_complete(struct super_block *sb, | |||
| 2829 | ext4_commit_super(sb, es, 1); | 2819 | ext4_commit_super(sb, es, 1); |
| 2830 | } | 2820 | } |
| 2831 | unlock_super(sb); | 2821 | unlock_super(sb); |
| 2822 | |||
| 2823 | out: | ||
| 2832 | jbd2_journal_unlock_updates(journal); | 2824 | jbd2_journal_unlock_updates(journal); |
| 2833 | } | 2825 | } |
| 2834 | 2826 | ||
| @@ -2907,6 +2899,7 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
| 2907 | { | 2899 | { |
| 2908 | tid_t target; | 2900 | tid_t target; |
| 2909 | 2901 | ||
| 2902 | trace_mark(ext4_sync_fs, "dev %s wait %d", sb->s_id, wait); | ||
| 2910 | sb->s_dirt = 0; | 2903 | sb->s_dirt = 0; |
| 2911 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { | 2904 | if (jbd2_journal_start_commit(EXT4_SB(sb)->s_journal, &target)) { |
| 2912 | if (wait) | 2905 | if (wait) |
| @@ -2928,7 +2921,13 @@ static void ext4_write_super_lockfs(struct super_block *sb) | |||
| 2928 | 2921 | ||
| 2929 | /* Now we set up the journal barrier. */ | 2922 | /* Now we set up the journal barrier. */ |
| 2930 | jbd2_journal_lock_updates(journal); | 2923 | jbd2_journal_lock_updates(journal); |
| 2931 | jbd2_journal_flush(journal); | 2924 | |
| 2925 | /* | ||
| 2926 | * We don't want to clear needs_recovery flag when we failed | ||
| 2927 | * to flush the journal. | ||
| 2928 | */ | ||
| 2929 | if (jbd2_journal_flush(journal) < 0) | ||
| 2930 | return; | ||
| 2932 | 2931 | ||
| 2933 | /* Journal blocked and flushed, clear needs_recovery flag. */ | 2932 | /* Journal blocked and flushed, clear needs_recovery flag. */ |
| 2934 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); | 2933 | EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); |
| @@ -3162,7 +3161,8 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 3162 | buf->f_type = EXT4_SUPER_MAGIC; | 3161 | buf->f_type = EXT4_SUPER_MAGIC; |
| 3163 | buf->f_bsize = sb->s_blocksize; | 3162 | buf->f_bsize = sb->s_blocksize; |
| 3164 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; | 3163 | buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; |
| 3165 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter); | 3164 | buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - |
| 3165 | percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); | ||
| 3166 | ext4_free_blocks_count_set(es, buf->f_bfree); | 3166 | ext4_free_blocks_count_set(es, buf->f_bfree); |
| 3167 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); | 3167 | buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); |
| 3168 | if (buf->f_bfree < ext4_r_blocks_count(es)) | 3168 | if (buf->f_bfree < ext4_r_blocks_count(es)) |
| @@ -3367,8 +3367,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
| 3367 | * otherwise be livelocked... | 3367 | * otherwise be livelocked... |
| 3368 | */ | 3368 | */ |
| 3369 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); | 3369 | jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal); |
| 3370 | jbd2_journal_flush(EXT4_SB(sb)->s_journal); | 3370 | err = jbd2_journal_flush(EXT4_SB(sb)->s_journal); |
| 3371 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); | 3371 | jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); |
| 3372 | if (err) { | ||
| 3373 | path_put(&nd.path); | ||
| 3374 | return err; | ||
| 3375 | } | ||
| 3372 | } | 3376 | } |
| 3373 | 3377 | ||
| 3374 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); | 3378 | err = vfs_quota_on_path(sb, type, format_id, &nd.path); |
| @@ -3432,7 +3436,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type, | |||
| 3432 | handle_t *handle = journal_current_handle(); | 3436 | handle_t *handle = journal_current_handle(); |
| 3433 | 3437 | ||
| 3434 | if (!handle) { | 3438 | if (!handle) { |
| 3435 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%Lu, len=%Lu)" | 3439 | printk(KERN_WARNING "EXT4-fs: Quota write (off=%llu, len=%llu)" |
| 3436 | " cancelled because transaction is not started.\n", | 3440 | " cancelled because transaction is not started.\n", |
| 3437 | (unsigned long long)off, (unsigned long long)len); | 3441 | (unsigned long long)off, (unsigned long long)len); |
| 3438 | return -EIO; | 3442 | return -EIO; |
| @@ -3493,18 +3497,82 @@ static int ext4_get_sb(struct file_system_type *fs_type, | |||
| 3493 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | 3497 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); |
| 3494 | } | 3498 | } |
| 3495 | 3499 | ||
| 3500 | #ifdef CONFIG_PROC_FS | ||
| 3501 | static int ext4_ui_proc_show(struct seq_file *m, void *v) | ||
| 3502 | { | ||
| 3503 | unsigned int *p = m->private; | ||
| 3504 | |||
| 3505 | seq_printf(m, "%u\n", *p); | ||
| 3506 | return 0; | ||
| 3507 | } | ||
| 3508 | |||
| 3509 | static int ext4_ui_proc_open(struct inode *inode, struct file *file) | ||
| 3510 | { | ||
| 3511 | return single_open(file, ext4_ui_proc_show, PDE(inode)->data); | ||
| 3512 | } | ||
| 3513 | |||
| 3514 | static ssize_t ext4_ui_proc_write(struct file *file, const char __user *buf, | ||
| 3515 | size_t cnt, loff_t *ppos) | ||
| 3516 | { | ||
| 3517 | unsigned int *p = PDE(file->f_path.dentry->d_inode)->data; | ||
| 3518 | char str[32]; | ||
| 3519 | unsigned long value; | ||
| 3520 | |||
| 3521 | if (cnt >= sizeof(str)) | ||
| 3522 | return -EINVAL; | ||
| 3523 | if (copy_from_user(str, buf, cnt)) | ||
| 3524 | return -EFAULT; | ||
| 3525 | value = simple_strtol(str, NULL, 0); | ||
| 3526 | if (value < 0) | ||
| 3527 | return -ERANGE; | ||
| 3528 | *p = value; | ||
| 3529 | return cnt; | ||
| 3530 | } | ||
| 3531 | |||
| 3532 | const struct file_operations ext4_ui_proc_fops = { | ||
| 3533 | .owner = THIS_MODULE, | ||
| 3534 | .open = ext4_ui_proc_open, | ||
| 3535 | .read = seq_read, | ||
| 3536 | .llseek = seq_lseek, | ||
| 3537 | .release = single_release, | ||
| 3538 | .write = ext4_ui_proc_write, | ||
| 3539 | }; | ||
| 3540 | #endif | ||
| 3541 | |||
| 3542 | static struct file_system_type ext4_fs_type = { | ||
| 3543 | .owner = THIS_MODULE, | ||
| 3544 | .name = "ext4", | ||
| 3545 | .get_sb = ext4_get_sb, | ||
| 3546 | .kill_sb = kill_block_super, | ||
| 3547 | .fs_flags = FS_REQUIRES_DEV, | ||
| 3548 | }; | ||
| 3549 | |||
| 3550 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
| 3551 | static int ext4dev_get_sb(struct file_system_type *fs_type, | ||
| 3552 | int flags, const char *dev_name, void *data, struct vfsmount *mnt) | ||
| 3553 | { | ||
| 3554 | printk(KERN_WARNING "EXT4-fs: Update your userspace programs " | ||
| 3555 | "to mount using ext4\n"); | ||
| 3556 | printk(KERN_WARNING "EXT4-fs: ext4dev backwards compatibility " | ||
| 3557 | "will go away by 2.6.31\n"); | ||
| 3558 | return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt); | ||
| 3559 | } | ||
| 3560 | |||
| 3496 | static struct file_system_type ext4dev_fs_type = { | 3561 | static struct file_system_type ext4dev_fs_type = { |
| 3497 | .owner = THIS_MODULE, | 3562 | .owner = THIS_MODULE, |
| 3498 | .name = "ext4dev", | 3563 | .name = "ext4dev", |
| 3499 | .get_sb = ext4_get_sb, | 3564 | .get_sb = ext4dev_get_sb, |
| 3500 | .kill_sb = kill_block_super, | 3565 | .kill_sb = kill_block_super, |
| 3501 | .fs_flags = FS_REQUIRES_DEV, | 3566 | .fs_flags = FS_REQUIRES_DEV, |
| 3502 | }; | 3567 | }; |
| 3568 | MODULE_ALIAS("ext4dev"); | ||
| 3569 | #endif | ||
| 3503 | 3570 | ||
| 3504 | static int __init init_ext4_fs(void) | 3571 | static int __init init_ext4_fs(void) |
| 3505 | { | 3572 | { |
| 3506 | int err; | 3573 | int err; |
| 3507 | 3574 | ||
| 3575 | ext4_proc_root = proc_mkdir("fs/ext4", NULL); | ||
| 3508 | err = init_ext4_mballoc(); | 3576 | err = init_ext4_mballoc(); |
| 3509 | if (err) | 3577 | if (err) |
| 3510 | return err; | 3578 | return err; |
| @@ -3515,9 +3583,16 @@ static int __init init_ext4_fs(void) | |||
| 3515 | err = init_inodecache(); | 3583 | err = init_inodecache(); |
| 3516 | if (err) | 3584 | if (err) |
| 3517 | goto out1; | 3585 | goto out1; |
| 3518 | err = register_filesystem(&ext4dev_fs_type); | 3586 | err = register_filesystem(&ext4_fs_type); |
| 3519 | if (err) | 3587 | if (err) |
| 3520 | goto out; | 3588 | goto out; |
| 3589 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
| 3590 | err = register_filesystem(&ext4dev_fs_type); | ||
| 3591 | if (err) { | ||
| 3592 | unregister_filesystem(&ext4_fs_type); | ||
| 3593 | goto out; | ||
| 3594 | } | ||
| 3595 | #endif | ||
| 3521 | return 0; | 3596 | return 0; |
| 3522 | out: | 3597 | out: |
| 3523 | destroy_inodecache(); | 3598 | destroy_inodecache(); |
| @@ -3530,10 +3605,14 @@ out2: | |||
| 3530 | 3605 | ||
| 3531 | static void __exit exit_ext4_fs(void) | 3606 | static void __exit exit_ext4_fs(void) |
| 3532 | { | 3607 | { |
| 3608 | unregister_filesystem(&ext4_fs_type); | ||
| 3609 | #ifdef CONFIG_EXT4DEV_COMPAT | ||
| 3533 | unregister_filesystem(&ext4dev_fs_type); | 3610 | unregister_filesystem(&ext4dev_fs_type); |
| 3611 | #endif | ||
| 3534 | destroy_inodecache(); | 3612 | destroy_inodecache(); |
| 3535 | exit_ext4_xattr(); | 3613 | exit_ext4_xattr(); |
| 3536 | exit_ext4_mballoc(); | 3614 | exit_ext4_mballoc(); |
| 3615 | remove_proc_entry("fs/ext4", NULL); | ||
| 3537 | } | 3616 | } |
| 3538 | 3617 | ||
| 3539 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); | 3618 | MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); |
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index e9178643dc01..00740cb32be3 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c | |||
| @@ -23,10 +23,10 @@ | |||
| 23 | #include "ext4.h" | 23 | #include "ext4.h" |
| 24 | #include "xattr.h" | 24 | #include "xattr.h" |
| 25 | 25 | ||
| 26 | static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd) | 26 | static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) |
| 27 | { | 27 | { |
| 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); | 28 | struct ext4_inode_info *ei = EXT4_I(dentry->d_inode); |
| 29 | nd_set_link(nd, (char*)ei->i_data); | 29 | nd_set_link(nd, (char *) ei->i_data); |
| 30 | return NULL; | 30 | return NULL; |
| 31 | } | 31 | } |
| 32 | 32 | ||
| @@ -34,7 +34,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
| 34 | .readlink = generic_readlink, | 34 | .readlink = generic_readlink, |
| 35 | .follow_link = page_follow_link_light, | 35 | .follow_link = page_follow_link_light, |
| 36 | .put_link = page_put_link, | 36 | .put_link = page_put_link, |
| 37 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 37 | #ifdef CONFIG_EXT4_FS_XATTR |
| 38 | .setxattr = generic_setxattr, | 38 | .setxattr = generic_setxattr, |
| 39 | .getxattr = generic_getxattr, | 39 | .getxattr = generic_getxattr, |
| 40 | .listxattr = ext4_listxattr, | 40 | .listxattr = ext4_listxattr, |
| @@ -45,7 +45,7 @@ const struct inode_operations ext4_symlink_inode_operations = { | |||
| 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { | 45 | const struct inode_operations ext4_fast_symlink_inode_operations = { |
| 46 | .readlink = generic_readlink, | 46 | .readlink = generic_readlink, |
| 47 | .follow_link = ext4_follow_link, | 47 | .follow_link = ext4_follow_link, |
| 48 | #ifdef CONFIG_EXT4DEV_FS_XATTR | 48 | #ifdef CONFIG_EXT4_FS_XATTR |
| 49 | .setxattr = generic_setxattr, | 49 | .setxattr = generic_setxattr, |
| 50 | .getxattr = generic_getxattr, | 50 | .getxattr = generic_getxattr, |
| 51 | .listxattr = ext4_listxattr, | 51 | .listxattr = ext4_listxattr, |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 8954208b4893..80626d516fee 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
| @@ -99,12 +99,12 @@ static struct mb_cache *ext4_xattr_cache; | |||
| 99 | 99 | ||
| 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { | 100 | static struct xattr_handler *ext4_xattr_handler_map[] = { |
| 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, | 101 | [EXT4_XATTR_INDEX_USER] = &ext4_xattr_user_handler, |
| 102 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, | 103 | [EXT4_XATTR_INDEX_POSIX_ACL_ACCESS] = &ext4_xattr_acl_access_handler, |
| 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, | 104 | [EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler, |
| 105 | #endif | 105 | #endif |
| 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, | 106 | [EXT4_XATTR_INDEX_TRUSTED] = &ext4_xattr_trusted_handler, |
| 107 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 107 | #ifdef CONFIG_EXT4_FS_SECURITY |
| 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, | 108 | [EXT4_XATTR_INDEX_SECURITY] = &ext4_xattr_security_handler, |
| 109 | #endif | 109 | #endif |
| 110 | }; | 110 | }; |
| @@ -112,11 +112,11 @@ static struct xattr_handler *ext4_xattr_handler_map[] = { | |||
| 112 | struct xattr_handler *ext4_xattr_handlers[] = { | 112 | struct xattr_handler *ext4_xattr_handlers[] = { |
| 113 | &ext4_xattr_user_handler, | 113 | &ext4_xattr_user_handler, |
| 114 | &ext4_xattr_trusted_handler, | 114 | &ext4_xattr_trusted_handler, |
| 115 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 115 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
| 116 | &ext4_xattr_acl_access_handler, | 116 | &ext4_xattr_acl_access_handler, |
| 117 | &ext4_xattr_acl_default_handler, | 117 | &ext4_xattr_acl_default_handler, |
| 118 | #endif | 118 | #endif |
| 119 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 119 | #ifdef CONFIG_EXT4_FS_SECURITY |
| 120 | &ext4_xattr_security_handler, | 120 | &ext4_xattr_security_handler, |
| 121 | #endif | 121 | #endif |
| 122 | NULL | 122 | NULL |
| @@ -959,6 +959,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
| 959 | struct ext4_xattr_block_find bs = { | 959 | struct ext4_xattr_block_find bs = { |
| 960 | .s = { .not_found = -ENODATA, }, | 960 | .s = { .not_found = -ENODATA, }, |
| 961 | }; | 961 | }; |
| 962 | unsigned long no_expand; | ||
| 962 | int error; | 963 | int error; |
| 963 | 964 | ||
| 964 | if (!name) | 965 | if (!name) |
| @@ -966,6 +967,9 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
| 966 | if (strlen(name) > 255) | 967 | if (strlen(name) > 255) |
| 967 | return -ERANGE; | 968 | return -ERANGE; |
| 968 | down_write(&EXT4_I(inode)->xattr_sem); | 969 | down_write(&EXT4_I(inode)->xattr_sem); |
| 970 | no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; | ||
| 971 | EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; | ||
| 972 | |||
| 969 | error = ext4_get_inode_loc(inode, &is.iloc); | 973 | error = ext4_get_inode_loc(inode, &is.iloc); |
| 970 | if (error) | 974 | if (error) |
| 971 | goto cleanup; | 975 | goto cleanup; |
| @@ -1042,6 +1046,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, | |||
| 1042 | cleanup: | 1046 | cleanup: |
| 1043 | brelse(is.iloc.bh); | 1047 | brelse(is.iloc.bh); |
| 1044 | brelse(bs.bh); | 1048 | brelse(bs.bh); |
| 1049 | if (no_expand == 0) | ||
| 1050 | EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; | ||
| 1045 | up_write(&EXT4_I(inode)->xattr_sem); | 1051 | up_write(&EXT4_I(inode)->xattr_sem); |
| 1046 | return error; | 1052 | return error; |
| 1047 | } | 1053 | } |
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h index 5992fe979bb9..8ede88b18c29 100644 --- a/fs/ext4/xattr.h +++ b/fs/ext4/xattr.h | |||
| @@ -51,8 +51,8 @@ struct ext4_xattr_entry { | |||
| 51 | (((name_len) + EXT4_XATTR_ROUND + \ | 51 | (((name_len) + EXT4_XATTR_ROUND + \ |
| 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) | 52 | sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND) |
| 53 | #define EXT4_XATTR_NEXT(entry) \ | 53 | #define EXT4_XATTR_NEXT(entry) \ |
| 54 | ( (struct ext4_xattr_entry *)( \ | 54 | ((struct ext4_xattr_entry *)( \ |
| 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) ) | 55 | (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len))) |
| 56 | #define EXT4_XATTR_SIZE(size) \ | 56 | #define EXT4_XATTR_SIZE(size) \ |
| 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) | 57 | (((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND) |
| 58 | 58 | ||
| @@ -63,7 +63,7 @@ struct ext4_xattr_entry { | |||
| 63 | EXT4_I(inode)->i_extra_isize)) | 63 | EXT4_I(inode)->i_extra_isize)) |
| 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) | 64 | #define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1)) |
| 65 | 65 | ||
| 66 | # ifdef CONFIG_EXT4DEV_FS_XATTR | 66 | # ifdef CONFIG_EXT4_FS_XATTR |
| 67 | 67 | ||
| 68 | extern struct xattr_handler ext4_xattr_user_handler; | 68 | extern struct xattr_handler ext4_xattr_user_handler; |
| 69 | extern struct xattr_handler ext4_xattr_trusted_handler; | 69 | extern struct xattr_handler ext4_xattr_trusted_handler; |
| @@ -88,7 +88,7 @@ extern void exit_ext4_xattr(void); | |||
| 88 | 88 | ||
| 89 | extern struct xattr_handler *ext4_xattr_handlers[]; | 89 | extern struct xattr_handler *ext4_xattr_handlers[]; |
| 90 | 90 | ||
| 91 | # else /* CONFIG_EXT4DEV_FS_XATTR */ | 91 | # else /* CONFIG_EXT4_FS_XATTR */ |
| 92 | 92 | ||
| 93 | static inline int | 93 | static inline int |
| 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, | 94 | ext4_xattr_get(struct inode *inode, int name_index, const char *name, |
| @@ -141,9 +141,9 @@ ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize, | |||
| 141 | 141 | ||
| 142 | #define ext4_xattr_handlers NULL | 142 | #define ext4_xattr_handlers NULL |
| 143 | 143 | ||
| 144 | # endif /* CONFIG_EXT4DEV_FS_XATTR */ | 144 | # endif /* CONFIG_EXT4_FS_XATTR */ |
| 145 | 145 | ||
| 146 | #ifdef CONFIG_EXT4DEV_FS_SECURITY | 146 | #ifdef CONFIG_EXT4_FS_SECURITY |
| 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, | 147 | extern int ext4_init_security(handle_t *handle, struct inode *inode, |
| 148 | struct inode *dir); | 148 | struct inode *dir); |
| 149 | #else | 149 | #else |
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c index 302e95c4af7e..fb98b3d847ed 100644 --- a/fs/fat/fatent.c +++ b/fs/fat/fatent.c | |||
| @@ -6,6 +6,7 @@ | |||
| 6 | #include <linux/module.h> | 6 | #include <linux/module.h> |
| 7 | #include <linux/fs.h> | 7 | #include <linux/fs.h> |
| 8 | #include <linux/msdos_fs.h> | 8 | #include <linux/msdos_fs.h> |
| 9 | #include <linux/blkdev.h> | ||
| 9 | 10 | ||
| 10 | struct fatent_operations { | 11 | struct fatent_operations { |
| 11 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); | 12 | void (*ent_blocknr)(struct super_block *, int, int *, sector_t *); |
| @@ -535,6 +536,7 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
| 535 | struct fat_entry fatent; | 536 | struct fat_entry fatent; |
| 536 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; | 537 | struct buffer_head *bhs[MAX_BUF_PER_PAGE]; |
| 537 | int i, err, nr_bhs; | 538 | int i, err, nr_bhs; |
| 539 | int first_cl = cluster; | ||
| 538 | 540 | ||
| 539 | nr_bhs = 0; | 541 | nr_bhs = 0; |
| 540 | fatent_init(&fatent); | 542 | fatent_init(&fatent); |
| @@ -551,6 +553,18 @@ int fat_free_clusters(struct inode *inode, int cluster) | |||
| 551 | goto error; | 553 | goto error; |
| 552 | } | 554 | } |
| 553 | 555 | ||
| 556 | /* | ||
| 557 | * Issue discard for the sectors we no longer care about, | ||
| 558 | * batching contiguous clusters into one request | ||
| 559 | */ | ||
| 560 | if (cluster != fatent.entry + 1) { | ||
| 561 | int nr_clus = fatent.entry - first_cl + 1; | ||
| 562 | |||
| 563 | sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl), | ||
| 564 | nr_clus * sbi->sec_per_clus); | ||
| 565 | first_cl = cluster; | ||
| 566 | } | ||
| 567 | |||
| 554 | ops->ent_put(&fatent, FAT_ENT_FREE); | 568 | ops->ent_put(&fatent, FAT_ENT_FREE); |
| 555 | if (sbi->free_clusters != -1) { | 569 | if (sbi->free_clusters != -1) { |
| 556 | sbi->free_clusters++; | 570 | sbi->free_clusters++; |
diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 80ff3381fa21..d12cdf2a0406 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c | |||
| @@ -855,7 +855,7 @@ enum { | |||
| 855 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, | 855 | Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_err, |
| 856 | }; | 856 | }; |
| 857 | 857 | ||
| 858 | static match_table_t fat_tokens = { | 858 | static const match_table_t fat_tokens = { |
| 859 | {Opt_check_r, "check=relaxed"}, | 859 | {Opt_check_r, "check=relaxed"}, |
| 860 | {Opt_check_s, "check=strict"}, | 860 | {Opt_check_s, "check=strict"}, |
| 861 | {Opt_check_n, "check=normal"}, | 861 | {Opt_check_n, "check=normal"}, |
| @@ -890,14 +890,14 @@ static match_table_t fat_tokens = { | |||
| 890 | {Opt_tz_utc, "tz=UTC"}, | 890 | {Opt_tz_utc, "tz=UTC"}, |
| 891 | {Opt_err, NULL}, | 891 | {Opt_err, NULL}, |
| 892 | }; | 892 | }; |
| 893 | static match_table_t msdos_tokens = { | 893 | static const match_table_t msdos_tokens = { |
| 894 | {Opt_nodots, "nodots"}, | 894 | {Opt_nodots, "nodots"}, |
| 895 | {Opt_nodots, "dotsOK=no"}, | 895 | {Opt_nodots, "dotsOK=no"}, |
| 896 | {Opt_dots, "dots"}, | 896 | {Opt_dots, "dots"}, |
| 897 | {Opt_dots, "dotsOK=yes"}, | 897 | {Opt_dots, "dotsOK=yes"}, |
| 898 | {Opt_err, NULL} | 898 | {Opt_err, NULL} |
| 899 | }; | 899 | }; |
| 900 | static match_table_t vfat_tokens = { | 900 | static const match_table_t vfat_tokens = { |
| 901 | {Opt_charset, "iocharset=%s"}, | 901 | {Opt_charset, "iocharset=%s"}, |
| 902 | {Opt_shortname_lower, "shortname=lower"}, | 902 | {Opt_shortname_lower, "shortname=lower"}, |
| 903 | {Opt_shortname_win95, "shortname=win95"}, | 903 | {Opt_shortname_win95, "shortname=win95"}, |
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 25adfc3c693a..d0ff0b8cf309 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c | |||
| @@ -8,7 +8,7 @@ | |||
| 8 | * pages against inodes. ie: data writeback. Writeout of the | 8 | * pages against inodes. ie: data writeback. Writeout of the |
| 9 | * inode itself is not handled here. | 9 | * inode itself is not handled here. |
| 10 | * | 10 | * |
| 11 | * 10Apr2002 akpm@zip.com.au | 11 | * 10Apr2002 Andrew Morton |
| 12 | * Split out of fs/inode.c | 12 | * Split out of fs/inode.c |
| 13 | * Additions for address_space-based writeback | 13 | * Additions for address_space-based writeback |
| 14 | */ | 14 | */ |
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 2bada6bbc317..34930a964b82 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c | |||
| @@ -101,6 +101,8 @@ void fuse_finish_open(struct inode *inode, struct file *file, | |||
| 101 | file->f_op = &fuse_direct_io_file_operations; | 101 | file->f_op = &fuse_direct_io_file_operations; |
| 102 | if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) | 102 | if (!(outarg->open_flags & FOPEN_KEEP_CACHE)) |
| 103 | invalidate_inode_pages2(inode->i_mapping); | 103 | invalidate_inode_pages2(inode->i_mapping); |
| 104 | if (outarg->open_flags & FOPEN_NONSEEKABLE) | ||
| 105 | nonseekable_open(inode, file); | ||
| 104 | ff->fh = outarg->fh; | 106 | ff->fh = outarg->fh; |
| 105 | file->private_data = fuse_file_get(ff); | 107 | file->private_data = fuse_file_get(ff); |
| 106 | } | 108 | } |
| @@ -1448,6 +1450,9 @@ static loff_t fuse_file_llseek(struct file *file, loff_t offset, int origin) | |||
| 1448 | mutex_lock(&inode->i_mutex); | 1450 | mutex_lock(&inode->i_mutex); |
| 1449 | switch (origin) { | 1451 | switch (origin) { |
| 1450 | case SEEK_END: | 1452 | case SEEK_END: |
| 1453 | retval = fuse_update_attributes(inode, NULL, file, NULL); | ||
| 1454 | if (retval) | ||
| 1455 | return retval; | ||
| 1451 | offset += i_size_read(inode); | 1456 | offset += i_size_read(inode); |
| 1452 | break; | 1457 | break; |
| 1453 | case SEEK_CUR: | 1458 | case SEEK_CUR: |
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 3a876076bdd1..35accfdd747f 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h | |||
| @@ -6,6 +6,9 @@ | |||
| 6 | See the file COPYING. | 6 | See the file COPYING. |
| 7 | */ | 7 | */ |
| 8 | 8 | ||
| 9 | #ifndef _FS_FUSE_I_H | ||
| 10 | #define _FS_FUSE_I_H | ||
| 11 | |||
| 9 | #include <linux/fuse.h> | 12 | #include <linux/fuse.h> |
| 10 | #include <linux/fs.h> | 13 | #include <linux/fs.h> |
| 11 | #include <linux/mount.h> | 14 | #include <linux/mount.h> |
| @@ -655,3 +658,5 @@ void fuse_set_nowrite(struct inode *inode); | |||
| 655 | void fuse_release_nowrite(struct inode *inode); | 658 | void fuse_release_nowrite(struct inode *inode); |
| 656 | 659 | ||
| 657 | u64 fuse_get_attr_version(struct fuse_conn *fc); | 660 | u64 fuse_get_attr_version(struct fuse_conn *fc); |
| 661 | |||
| 662 | #endif /* _FS_FUSE_I_H */ | ||
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index d2249f174e20..54b1f0e1ef58 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c | |||
| @@ -354,7 +354,7 @@ enum { | |||
| 354 | OPT_ERR | 354 | OPT_ERR |
| 355 | }; | 355 | }; |
| 356 | 356 | ||
| 357 | static match_table_t tokens = { | 357 | static const match_table_t tokens = { |
| 358 | {OPT_FD, "fd=%u"}, | 358 | {OPT_FD, "fd=%u"}, |
| 359 | {OPT_ROOTMODE, "rootmode=%o"}, | 359 | {OPT_ROOTMODE, "rootmode=%o"}, |
| 360 | {OPT_USER_ID, "user_id=%u"}, | 360 | {OPT_USER_ID, "user_id=%u"}, |
| @@ -865,7 +865,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
| 865 | if (is_bdev) { | 865 | if (is_bdev) { |
| 866 | fc->destroy_req = fuse_request_alloc(); | 866 | fc->destroy_req = fuse_request_alloc(); |
| 867 | if (!fc->destroy_req) | 867 | if (!fc->destroy_req) |
| 868 | goto err_put_root; | 868 | goto err_free_init_req; |
| 869 | } | 869 | } |
| 870 | 870 | ||
| 871 | mutex_lock(&fuse_mutex); | 871 | mutex_lock(&fuse_mutex); |
| @@ -895,6 +895,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) | |||
| 895 | 895 | ||
| 896 | err_unlock: | 896 | err_unlock: |
| 897 | mutex_unlock(&fuse_mutex); | 897 | mutex_unlock(&fuse_mutex); |
| 898 | err_free_init_req: | ||
| 898 | fuse_request_free(init_req); | 899 | fuse_request_free(init_req); |
| 899 | err_put_root: | 900 | err_put_root: |
| 900 | dput(root_dentry); | 901 | dput(root_dentry); |
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 13391e546616..c962283d4e7f 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c | |||
| @@ -1265,6 +1265,8 @@ static void blocking_cb(struct gfs2_sbd *sdp, struct lm_lockname *name, | |||
| 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; | 1265 | holdtime = gl->gl_tchange + gl->gl_ops->go_min_hold_time; |
| 1266 | if (time_before(now, holdtime)) | 1266 | if (time_before(now, holdtime)) |
| 1267 | delay = holdtime - now; | 1267 | delay = holdtime - now; |
| 1268 | if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) | ||
| 1269 | delay = gl->gl_ops->go_min_hold_time; | ||
| 1268 | 1270 | ||
| 1269 | spin_lock(&gl->gl_spin); | 1271 | spin_lock(&gl->gl_spin); |
| 1270 | handle_callback(gl, state, 1, delay); | 1272 | handle_callback(gl, state, 1, delay); |
| @@ -1578,8 +1580,6 @@ static const char *hflags2str(char *buf, unsigned flags, unsigned long iflags) | |||
| 1578 | *p++ = 'a'; | 1580 | *p++ = 'a'; |
| 1579 | if (flags & GL_EXACT) | 1581 | if (flags & GL_EXACT) |
| 1580 | *p++ = 'E'; | 1582 | *p++ = 'E'; |
| 1581 | if (flags & GL_ATIME) | ||
| 1582 | *p++ = 'a'; | ||
| 1583 | if (flags & GL_NOCACHE) | 1583 | if (flags & GL_NOCACHE) |
| 1584 | *p++ = 'c'; | 1584 | *p++ = 'c'; |
| 1585 | if (test_bit(HIF_HOLDER, &iflags)) | 1585 | if (test_bit(HIF_HOLDER, &iflags)) |
| @@ -1816,15 +1816,17 @@ restart: | |||
| 1816 | if (gl) { | 1816 | if (gl) { |
| 1817 | gi->gl = hlist_entry(gl->gl_list.next, | 1817 | gi->gl = hlist_entry(gl->gl_list.next, |
| 1818 | struct gfs2_glock, gl_list); | 1818 | struct gfs2_glock, gl_list); |
| 1819 | if (gi->gl) | 1819 | } else { |
| 1820 | gfs2_glock_hold(gi->gl); | 1820 | gi->gl = hlist_entry(gl_hash_table[gi->hash].hb_list.first, |
| 1821 | struct gfs2_glock, gl_list); | ||
| 1821 | } | 1822 | } |
| 1823 | if (gi->gl) | ||
| 1824 | gfs2_glock_hold(gi->gl); | ||
| 1822 | read_unlock(gl_lock_addr(gi->hash)); | 1825 | read_unlock(gl_lock_addr(gi->hash)); |
| 1823 | if (gl) | 1826 | if (gl) |
| 1824 | gfs2_glock_put(gl); | 1827 | gfs2_glock_put(gl); |
| 1825 | if (gl && gi->gl == NULL) | ||
| 1826 | gi->hash++; | ||
| 1827 | while (gi->gl == NULL) { | 1828 | while (gi->gl == NULL) { |
| 1829 | gi->hash++; | ||
| 1828 | if (gi->hash >= GFS2_GL_HASH_SIZE) | 1830 | if (gi->hash >= GFS2_GL_HASH_SIZE) |
| 1829 | return 1; | 1831 | return 1; |
| 1830 | read_lock(gl_lock_addr(gi->hash)); | 1832 | read_lock(gl_lock_addr(gi->hash)); |
| @@ -1833,7 +1835,6 @@ restart: | |||
| 1833 | if (gi->gl) | 1835 | if (gi->gl) |
| 1834 | gfs2_glock_hold(gi->gl); | 1836 | gfs2_glock_hold(gi->gl); |
| 1835 | read_unlock(gl_lock_addr(gi->hash)); | 1837 | read_unlock(gl_lock_addr(gi->hash)); |
| 1836 | gi->hash++; | ||
| 1837 | } | 1838 | } |
| 1838 | 1839 | ||
| 1839 | if (gi->sdp != gi->gl->gl_sbd) | 1840 | if (gi->sdp != gi->gl->gl_sbd) |
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 971d92af70fc..695c6b193611 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h | |||
| @@ -24,7 +24,6 @@ | |||
| 24 | #define GL_ASYNC 0x00000040 | 24 | #define GL_ASYNC 0x00000040 |
| 25 | #define GL_EXACT 0x00000080 | 25 | #define GL_EXACT 0x00000080 |
| 26 | #define GL_SKIP 0x00000100 | 26 | #define GL_SKIP 0x00000100 |
| 27 | #define GL_ATIME 0x00000200 | ||
| 28 | #define GL_NOCACHE 0x00000400 | 27 | #define GL_NOCACHE 0x00000400 |
| 29 | 28 | ||
| 30 | #define GLR_TRYFAILED 13 | 29 | #define GLR_TRYFAILED 13 |
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 448697a5c462..f566ec1b4e8e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h | |||
| @@ -386,20 +386,21 @@ struct gfs2_statfs_change_host { | |||
| 386 | #define GFS2_DATA_ORDERED 2 | 386 | #define GFS2_DATA_ORDERED 2 |
| 387 | 387 | ||
| 388 | struct gfs2_args { | 388 | struct gfs2_args { |
| 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ | 389 | char ar_lockproto[GFS2_LOCKNAME_LEN]; /* Name of the Lock Protocol */ |
| 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ | 390 | char ar_locktable[GFS2_LOCKNAME_LEN]; /* Name of the Lock Table */ |
| 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ | 391 | char ar_hostdata[GFS2_LOCKNAME_LEN]; /* Host specific data */ |
| 392 | int ar_spectator; /* Don't get a journal because we're always RO */ | 392 | unsigned int ar_spectator:1; /* Don't get a journal */ |
| 393 | int ar_ignore_local_fs; /* Don't optimize even if local_fs is 1 */ | 393 | unsigned int ar_ignore_local_fs:1; /* Ignore optimisations */ |
| 394 | int ar_localflocks; /* Let the VFS do flock|fcntl locks for us */ | 394 | unsigned int ar_localflocks:1; /* Let the VFS do flock|fcntl */ |
| 395 | int ar_localcaching; /* Local-style caching (dangerous on multihost) */ | 395 | unsigned int ar_localcaching:1; /* Local caching */ |
| 396 | int ar_debug; /* Oops on errors instead of trying to be graceful */ | 396 | unsigned int ar_debug:1; /* Oops on errors */ |
| 397 | int ar_upgrade; /* Upgrade ondisk/multihost format */ | 397 | unsigned int ar_upgrade:1; /* Upgrade ondisk format */ |
| 398 | unsigned int ar_num_glockd; /* Number of glockd threads */ | 398 | unsigned int ar_posix_acl:1; /* Enable posix acls */ |
| 399 | int ar_posix_acl; /* Enable posix acls */ | 399 | unsigned int ar_quota:2; /* off/account/on */ |
| 400 | int ar_quota; /* off/account/on */ | 400 | unsigned int ar_suiddir:1; /* suiddir support */ |
| 401 | int ar_suiddir; /* suiddir support */ | 401 | unsigned int ar_data:2; /* ordered/writeback */ |
| 402 | int ar_data; /* ordered/writeback */ | 402 | unsigned int ar_meta:1; /* mount metafs */ |
| 403 | unsigned int ar_num_glockd; /* Number of glockd threads */ | ||
| 403 | }; | 404 | }; |
| 404 | 405 | ||
| 405 | struct gfs2_tune { | 406 | struct gfs2_tune { |
| @@ -419,7 +420,6 @@ struct gfs2_tune { | |||
| 419 | unsigned int gt_quota_scale_den; /* Denominator */ | 420 | unsigned int gt_quota_scale_den; /* Denominator */ |
| 420 | unsigned int gt_quota_cache_secs; | 421 | unsigned int gt_quota_cache_secs; |
| 421 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ | 422 | unsigned int gt_quota_quantum; /* Secs between syncs to quota file */ |
| 422 | unsigned int gt_atime_quantum; /* Min secs between atime updates */ | ||
| 423 | unsigned int gt_new_files_jdata; | 423 | unsigned int gt_new_files_jdata; |
| 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ | 424 | unsigned int gt_max_readahead; /* Max bytes to read-ahead from disk */ |
| 425 | unsigned int gt_stall_secs; /* Detects trouble! */ | 425 | unsigned int gt_stall_secs; /* Detects trouble! */ |
| @@ -432,7 +432,7 @@ enum { | |||
| 432 | SDF_JOURNAL_CHECKED = 0, | 432 | SDF_JOURNAL_CHECKED = 0, |
| 433 | SDF_JOURNAL_LIVE = 1, | 433 | SDF_JOURNAL_LIVE = 1, |
| 434 | SDF_SHUTDOWN = 2, | 434 | SDF_SHUTDOWN = 2, |
| 435 | SDF_NOATIME = 3, | 435 | SDF_NOBARRIERS = 3, |
| 436 | }; | 436 | }; |
| 437 | 437 | ||
| 438 | #define GFS2_FSNAME_LEN 256 | 438 | #define GFS2_FSNAME_LEN 256 |
| @@ -461,7 +461,6 @@ struct gfs2_sb_host { | |||
| 461 | 461 | ||
| 462 | struct gfs2_sbd { | 462 | struct gfs2_sbd { |
| 463 | struct super_block *sd_vfs; | 463 | struct super_block *sd_vfs; |
| 464 | struct super_block *sd_vfs_meta; | ||
| 465 | struct kobject sd_kobj; | 464 | struct kobject sd_kobj; |
| 466 | unsigned long sd_flags; /* SDF_... */ | 465 | unsigned long sd_flags; /* SDF_... */ |
| 467 | struct gfs2_sb_host sd_sb; | 466 | struct gfs2_sb_host sd_sb; |
| @@ -499,7 +498,9 @@ struct gfs2_sbd { | |||
| 499 | 498 | ||
| 500 | /* Inode Stuff */ | 499 | /* Inode Stuff */ |
| 501 | 500 | ||
| 502 | struct inode *sd_master_dir; | 501 | struct dentry *sd_master_dir; |
| 502 | struct dentry *sd_root_dir; | ||
| 503 | |||
| 503 | struct inode *sd_jindex; | 504 | struct inode *sd_jindex; |
| 504 | struct inode *sd_inum_inode; | 505 | struct inode *sd_inum_inode; |
| 505 | struct inode *sd_statfs_inode; | 506 | struct inode *sd_statfs_inode; |
| @@ -634,7 +635,6 @@ struct gfs2_sbd { | |||
| 634 | /* Debugging crud */ | 635 | /* Debugging crud */ |
| 635 | 636 | ||
| 636 | unsigned long sd_last_warning; | 637 | unsigned long sd_last_warning; |
| 637 | struct vfsmount *sd_gfs2mnt; | ||
| 638 | struct dentry *debugfs_dir; /* debugfs directory */ | 638 | struct dentry *debugfs_dir; /* debugfs directory */ |
| 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ | 639 | struct dentry *debugfs_dentry_glocks; /* for debugfs */ |
| 640 | }; | 640 | }; |
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 8b0806a32948..7cee695fa441 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/crc32.h> | 18 | #include <linux/crc32.h> |
| 19 | #include <linux/lm_interface.h> | 19 | #include <linux/lm_interface.h> |
| 20 | #include <linux/security.h> | 20 | #include <linux/security.h> |
| 21 | #include <linux/time.h> | ||
| 21 | 22 | ||
| 22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
| 23 | #include "incore.h" | 24 | #include "incore.h" |
| @@ -249,6 +250,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 249 | { | 250 | { |
| 250 | struct gfs2_dinode_host *di = &ip->i_di; | 251 | struct gfs2_dinode_host *di = &ip->i_di; |
| 251 | const struct gfs2_dinode *str = buf; | 252 | const struct gfs2_dinode *str = buf; |
| 253 | struct timespec atime; | ||
| 252 | u16 height, depth; | 254 | u16 height, depth; |
| 253 | 255 | ||
| 254 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) | 256 | if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr))) |
| @@ -275,8 +277,10 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf) | |||
| 275 | di->di_size = be64_to_cpu(str->di_size); | 277 | di->di_size = be64_to_cpu(str->di_size); |
| 276 | i_size_write(&ip->i_inode, di->di_size); | 278 | i_size_write(&ip->i_inode, di->di_size); |
| 277 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); | 279 | gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks)); |
| 278 | ip->i_inode.i_atime.tv_sec = be64_to_cpu(str->di_atime); | 280 | atime.tv_sec = be64_to_cpu(str->di_atime); |
| 279 | ip->i_inode.i_atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); | 281 | atime.tv_nsec = be32_to_cpu(str->di_atime_nsec); |
| 282 | if (timespec_compare(&ip->i_inode.i_atime, &atime) < 0) | ||
| 283 | ip->i_inode.i_atime = atime; | ||
| 280 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); | 284 | ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime); |
| 281 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); | 285 | ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec); |
| 282 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); | 286 | ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime); |
| @@ -1033,13 +1037,11 @@ struct inode *gfs2_createi(struct gfs2_holder *ghs, const struct qstr *name, | |||
| 1033 | 1037 | ||
| 1034 | if (bh) | 1038 | if (bh) |
| 1035 | brelse(bh); | 1039 | brelse(bh); |
| 1036 | if (!inode) | ||
| 1037 | return ERR_PTR(-ENOMEM); | ||
| 1038 | return inode; | 1040 | return inode; |
| 1039 | 1041 | ||
| 1040 | fail_gunlock2: | 1042 | fail_gunlock2: |
| 1041 | gfs2_glock_dq_uninit(ghs + 1); | 1043 | gfs2_glock_dq_uninit(ghs + 1); |
| 1042 | if (inode) | 1044 | if (inode && !IS_ERR(inode)) |
| 1043 | iput(inode); | 1045 | iput(inode); |
| 1044 | fail_gunlock: | 1046 | fail_gunlock: |
| 1045 | gfs2_glock_dq(ghs); | 1047 | gfs2_glock_dq(ghs); |
| @@ -1140,54 +1142,6 @@ int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | |||
| 1140 | return 0; | 1142 | return 0; |
| 1141 | } | 1143 | } |
| 1142 | 1144 | ||
| 1143 | /* | ||
| 1144 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
| 1145 | * @this: move this | ||
| 1146 | * @to: to here | ||
| 1147 | * | ||
| 1148 | * Follow @to back to the root and make sure we don't encounter @this | ||
| 1149 | * Assumes we already hold the rename lock. | ||
| 1150 | * | ||
| 1151 | * Returns: errno | ||
| 1152 | */ | ||
| 1153 | |||
| 1154 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
| 1155 | { | ||
| 1156 | struct inode *dir = &to->i_inode; | ||
| 1157 | struct super_block *sb = dir->i_sb; | ||
| 1158 | struct inode *tmp; | ||
| 1159 | struct qstr dotdot; | ||
| 1160 | int error = 0; | ||
| 1161 | |||
| 1162 | gfs2_str2qstr(&dotdot, ".."); | ||
| 1163 | |||
| 1164 | igrab(dir); | ||
| 1165 | |||
| 1166 | for (;;) { | ||
| 1167 | if (dir == &this->i_inode) { | ||
| 1168 | error = -EINVAL; | ||
| 1169 | break; | ||
| 1170 | } | ||
| 1171 | if (dir == sb->s_root->d_inode) { | ||
| 1172 | error = 0; | ||
| 1173 | break; | ||
| 1174 | } | ||
| 1175 | |||
| 1176 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
| 1177 | if (IS_ERR(tmp)) { | ||
| 1178 | error = PTR_ERR(tmp); | ||
| 1179 | break; | ||
| 1180 | } | ||
| 1181 | |||
| 1182 | iput(dir); | ||
| 1183 | dir = tmp; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | iput(dir); | ||
| 1187 | |||
| 1188 | return error; | ||
| 1189 | } | ||
| 1190 | |||
| 1191 | /** | 1145 | /** |
| 1192 | * gfs2_readlinki - return the contents of a symlink | 1146 | * gfs2_readlinki - return the contents of a symlink |
| 1193 | * @ip: the symlink's inode | 1147 | * @ip: the symlink's inode |
| @@ -1207,8 +1161,8 @@ int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len) | |||
| 1207 | unsigned int x; | 1161 | unsigned int x; |
| 1208 | int error; | 1162 | int error; |
| 1209 | 1163 | ||
| 1210 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 1164 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
| 1211 | error = gfs2_glock_nq_atime(&i_gh); | 1165 | error = gfs2_glock_nq(&i_gh); |
| 1212 | if (error) { | 1166 | if (error) { |
| 1213 | gfs2_holder_uninit(&i_gh); | 1167 | gfs2_holder_uninit(&i_gh); |
| 1214 | return error; | 1168 | return error; |
| @@ -1243,101 +1197,6 @@ out: | |||
| 1243 | return error; | 1197 | return error; |
| 1244 | } | 1198 | } |
| 1245 | 1199 | ||
| 1246 | /** | ||
| 1247 | * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and | ||
| 1248 | * conditionally update the inode's atime | ||
| 1249 | * @gh: the holder to acquire | ||
| 1250 | * | ||
| 1251 | * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap | ||
| 1252 | * Update if the difference between the current time and the inode's current | ||
| 1253 | * atime is greater than an interval specified at mount. | ||
| 1254 | * | ||
| 1255 | * Returns: errno | ||
| 1256 | */ | ||
| 1257 | |||
| 1258 | int gfs2_glock_nq_atime(struct gfs2_holder *gh) | ||
| 1259 | { | ||
| 1260 | struct gfs2_glock *gl = gh->gh_gl; | ||
| 1261 | struct gfs2_sbd *sdp = gl->gl_sbd; | ||
| 1262 | struct gfs2_inode *ip = gl->gl_object; | ||
| 1263 | s64 quantum = gfs2_tune_get(sdp, gt_atime_quantum); | ||
| 1264 | unsigned int state; | ||
| 1265 | int flags; | ||
| 1266 | int error; | ||
| 1267 | struct timespec tv = CURRENT_TIME; | ||
| 1268 | |||
| 1269 | if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) || | ||
| 1270 | gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) || | ||
| 1271 | gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops)) | ||
| 1272 | return -EINVAL; | ||
| 1273 | |||
| 1274 | state = gh->gh_state; | ||
| 1275 | flags = gh->gh_flags; | ||
| 1276 | |||
| 1277 | error = gfs2_glock_nq(gh); | ||
| 1278 | if (error) | ||
| 1279 | return error; | ||
| 1280 | |||
| 1281 | if (test_bit(SDF_NOATIME, &sdp->sd_flags) || | ||
| 1282 | (sdp->sd_vfs->s_flags & MS_RDONLY)) | ||
| 1283 | return 0; | ||
| 1284 | |||
| 1285 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
| 1286 | gfs2_glock_dq(gh); | ||
| 1287 | gfs2_holder_reinit(LM_ST_EXCLUSIVE, gh->gh_flags & ~LM_FLAG_ANY, | ||
| 1288 | gh); | ||
| 1289 | error = gfs2_glock_nq(gh); | ||
| 1290 | if (error) | ||
| 1291 | return error; | ||
| 1292 | |||
| 1293 | /* Verify that atime hasn't been updated while we were | ||
| 1294 | trying to get exclusive lock. */ | ||
| 1295 | |||
| 1296 | tv = CURRENT_TIME; | ||
| 1297 | if (tv.tv_sec - ip->i_inode.i_atime.tv_sec >= quantum) { | ||
| 1298 | struct buffer_head *dibh; | ||
| 1299 | struct gfs2_dinode *di; | ||
| 1300 | |||
| 1301 | error = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 1302 | if (error == -EROFS) | ||
| 1303 | return 0; | ||
| 1304 | if (error) | ||
| 1305 | goto fail; | ||
| 1306 | |||
| 1307 | error = gfs2_meta_inode_buffer(ip, &dibh); | ||
| 1308 | if (error) | ||
| 1309 | goto fail_end_trans; | ||
| 1310 | |||
| 1311 | ip->i_inode.i_atime = tv; | ||
| 1312 | |||
| 1313 | gfs2_trans_add_bh(ip->i_gl, dibh, 1); | ||
| 1314 | di = (struct gfs2_dinode *)dibh->b_data; | ||
| 1315 | di->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec); | ||
| 1316 | di->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec); | ||
| 1317 | brelse(dibh); | ||
| 1318 | |||
| 1319 | gfs2_trans_end(sdp); | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | /* If someone else has asked for the glock, | ||
| 1323 | unlock and let them have it. Then reacquire | ||
| 1324 | in the original state. */ | ||
| 1325 | if (gfs2_glock_is_blocking(gl)) { | ||
| 1326 | gfs2_glock_dq(gh); | ||
| 1327 | gfs2_holder_reinit(state, flags, gh); | ||
| 1328 | return gfs2_glock_nq(gh); | ||
| 1329 | } | ||
| 1330 | } | ||
| 1331 | |||
| 1332 | return 0; | ||
| 1333 | |||
| 1334 | fail_end_trans: | ||
| 1335 | gfs2_trans_end(sdp); | ||
| 1336 | fail: | ||
| 1337 | gfs2_glock_dq(gh); | ||
| 1338 | return error; | ||
| 1339 | } | ||
| 1340 | |||
| 1341 | static int | 1200 | static int |
| 1342 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) | 1201 | __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr) |
| 1343 | { | 1202 | { |
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index 58f9607d6a86..2d43f69610a0 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h | |||
| @@ -91,9 +91,7 @@ int gfs2_rmdiri(struct gfs2_inode *dip, const struct qstr *name, | |||
| 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, | 91 | int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name, |
| 92 | const struct gfs2_inode *ip); | 92 | const struct gfs2_inode *ip); |
| 93 | int gfs2_permission(struct inode *inode, int mask); | 93 | int gfs2_permission(struct inode *inode, int mask); |
| 94 | int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to); | ||
| 95 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); | 94 | int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len); |
| 96 | int gfs2_glock_nq_atime(struct gfs2_holder *gh); | ||
| 97 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); | 95 | int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr); |
| 98 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); | 96 | struct inode *gfs2_lookup_simple(struct inode *dip, const char *name); |
| 99 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); | 97 | void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf); |
diff --git a/fs/gfs2/locking/dlm/mount.c b/fs/gfs2/locking/dlm/mount.c index 09d78c216f48..0c4cbe6c8285 100644 --- a/fs/gfs2/locking/dlm/mount.c +++ b/fs/gfs2/locking/dlm/mount.c | |||
| @@ -144,7 +144,8 @@ static int gdlm_mount(char *table_name, char *host_data, | |||
| 144 | 144 | ||
| 145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), | 145 | error = dlm_new_lockspace(ls->fsname, strlen(ls->fsname), |
| 146 | &ls->dlm_lockspace, | 146 | &ls->dlm_lockspace, |
| 147 | DLM_LSFL_FS | (nodir ? DLM_LSFL_NODIR : 0), | 147 | DLM_LSFL_FS | DLM_LSFL_NEWEXCL | |
| 148 | (nodir ? DLM_LSFL_NODIR : 0), | ||
| 148 | GDLM_LVB_SIZE); | 149 | GDLM_LVB_SIZE); |
| 149 | if (error) { | 150 | if (error) { |
| 150 | log_error("dlm_new_lockspace error %d", error); | 151 | log_error("dlm_new_lockspace error %d", error); |
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 6c6af9f5e3ab..ad305854bdc6 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c | |||
| @@ -18,6 +18,7 @@ | |||
| 18 | #include <linux/delay.h> | 18 | #include <linux/delay.h> |
| 19 | #include <linux/kthread.h> | 19 | #include <linux/kthread.h> |
| 20 | #include <linux/freezer.h> | 20 | #include <linux/freezer.h> |
| 21 | #include <linux/bio.h> | ||
| 21 | 22 | ||
| 22 | #include "gfs2.h" | 23 | #include "gfs2.h" |
| 23 | #include "incore.h" | 24 | #include "incore.h" |
| @@ -584,7 +585,6 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
| 584 | memset(bh->b_data, 0, bh->b_size); | 585 | memset(bh->b_data, 0, bh->b_size); |
| 585 | set_buffer_uptodate(bh); | 586 | set_buffer_uptodate(bh); |
| 586 | clear_buffer_dirty(bh); | 587 | clear_buffer_dirty(bh); |
| 587 | unlock_buffer(bh); | ||
| 588 | 588 | ||
| 589 | gfs2_ail1_empty(sdp, 0); | 589 | gfs2_ail1_empty(sdp, 0); |
| 590 | tail = current_tail(sdp); | 590 | tail = current_tail(sdp); |
| @@ -601,8 +601,23 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) | |||
| 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); | 601 | hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); |
| 602 | lh->lh_hash = cpu_to_be32(hash); | 602 | lh->lh_hash = cpu_to_be32(hash); |
| 603 | 603 | ||
| 604 | set_buffer_dirty(bh); | 604 | bh->b_end_io = end_buffer_write_sync; |
| 605 | if (sync_dirty_buffer(bh)) | 605 | if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) |
| 606 | goto skip_barrier; | ||
| 607 | get_bh(bh); | ||
| 608 | submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); | ||
| 609 | wait_on_buffer(bh); | ||
| 610 | if (buffer_eopnotsupp(bh)) { | ||
| 611 | clear_buffer_eopnotsupp(bh); | ||
| 612 | set_buffer_uptodate(bh); | ||
| 613 | set_bit(SDF_NOBARRIERS, &sdp->sd_flags); | ||
| 614 | lock_buffer(bh); | ||
| 615 | skip_barrier: | ||
| 616 | get_bh(bh); | ||
| 617 | submit_bh(WRITE_SYNC | (1 << BIO_RW_META), bh); | ||
| 618 | wait_on_buffer(bh); | ||
| 619 | } | ||
| 620 | if (!buffer_uptodate(bh)) | ||
| 606 | gfs2_io_error_bh(sdp, bh); | 621 | gfs2_io_error_bh(sdp, bh); |
| 607 | brelse(bh); | 622 | brelse(bh); |
| 608 | 623 | ||
diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index b941f9f9f958..f96eb90a2cfa 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c | |||
| @@ -42,10 +42,11 @@ enum { | |||
| 42 | Opt_nosuiddir, | 42 | Opt_nosuiddir, |
| 43 | Opt_data_writeback, | 43 | Opt_data_writeback, |
| 44 | Opt_data_ordered, | 44 | Opt_data_ordered, |
| 45 | Opt_meta, | ||
| 45 | Opt_err, | 46 | Opt_err, |
| 46 | }; | 47 | }; |
| 47 | 48 | ||
| 48 | static match_table_t tokens = { | 49 | static const match_table_t tokens = { |
| 49 | {Opt_lockproto, "lockproto=%s"}, | 50 | {Opt_lockproto, "lockproto=%s"}, |
| 50 | {Opt_locktable, "locktable=%s"}, | 51 | {Opt_locktable, "locktable=%s"}, |
| 51 | {Opt_hostdata, "hostdata=%s"}, | 52 | {Opt_hostdata, "hostdata=%s"}, |
| @@ -66,6 +67,7 @@ static match_table_t tokens = { | |||
| 66 | {Opt_nosuiddir, "nosuiddir"}, | 67 | {Opt_nosuiddir, "nosuiddir"}, |
| 67 | {Opt_data_writeback, "data=writeback"}, | 68 | {Opt_data_writeback, "data=writeback"}, |
| 68 | {Opt_data_ordered, "data=ordered"}, | 69 | {Opt_data_ordered, "data=ordered"}, |
| 70 | {Opt_meta, "meta"}, | ||
| 69 | {Opt_err, NULL} | 71 | {Opt_err, NULL} |
| 70 | }; | 72 | }; |
| 71 | 73 | ||
| @@ -239,6 +241,11 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) | |||
| 239 | case Opt_data_ordered: | 241 | case Opt_data_ordered: |
| 240 | args->ar_data = GFS2_DATA_ORDERED; | 242 | args->ar_data = GFS2_DATA_ORDERED; |
| 241 | break; | 243 | break; |
| 244 | case Opt_meta: | ||
| 245 | if (remount && args->ar_meta != 1) | ||
| 246 | goto cant_remount; | ||
| 247 | args->ar_meta = 1; | ||
| 248 | break; | ||
| 242 | case Opt_err: | 249 | case Opt_err: |
| 243 | default: | 250 | default: |
| 244 | fs_info(sdp, "unknown option: %s\n", o); | 251 | fs_info(sdp, "unknown option: %s\n", o); |
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index e64a1b04117a..27563816e1c5 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c | |||
| @@ -512,8 +512,8 @@ static int gfs2_readpage(struct file *file, struct page *page) | |||
| 512 | int error; | 512 | int error; |
| 513 | 513 | ||
| 514 | unlock_page(page); | 514 | unlock_page(page); |
| 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 515 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
| 516 | error = gfs2_glock_nq_atime(&gh); | 516 | error = gfs2_glock_nq(&gh); |
| 517 | if (unlikely(error)) | 517 | if (unlikely(error)) |
| 518 | goto out; | 518 | goto out; |
| 519 | error = AOP_TRUNCATED_PAGE; | 519 | error = AOP_TRUNCATED_PAGE; |
| @@ -594,8 +594,8 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, | |||
| 594 | struct gfs2_holder gh; | 594 | struct gfs2_holder gh; |
| 595 | int ret; | 595 | int ret; |
| 596 | 596 | ||
| 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 597 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
| 598 | ret = gfs2_glock_nq_atime(&gh); | 598 | ret = gfs2_glock_nq(&gh); |
| 599 | if (unlikely(ret)) | 599 | if (unlikely(ret)) |
| 600 | goto out_uninit; | 600 | goto out_uninit; |
| 601 | if (!gfs2_is_stuffed(ip)) | 601 | if (!gfs2_is_stuffed(ip)) |
| @@ -636,8 +636,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, | |||
| 636 | unsigned to = from + len; | 636 | unsigned to = from + len; |
| 637 | struct page *page; | 637 | struct page *page; |
| 638 | 638 | ||
| 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &ip->i_gh); | 639 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); |
| 640 | error = gfs2_glock_nq_atime(&ip->i_gh); | 640 | error = gfs2_glock_nq(&ip->i_gh); |
| 641 | if (unlikely(error)) | 641 | if (unlikely(error)) |
| 642 | goto out_uninit; | 642 | goto out_uninit; |
| 643 | 643 | ||
| @@ -975,7 +975,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) | |||
| 975 | if (gfs2_is_stuffed(ip)) | 975 | if (gfs2_is_stuffed(ip)) |
| 976 | return 0; | 976 | return 0; |
| 977 | 977 | ||
| 978 | if (offset > i_size_read(&ip->i_inode)) | 978 | if (offset >= i_size_read(&ip->i_inode)) |
| 979 | return 0; | 979 | return 0; |
| 980 | return 1; | 980 | return 1; |
| 981 | } | 981 | } |
| @@ -1000,8 +1000,8 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, | |||
| 1000 | * unfortunately have the option of only flushing a range like | 1000 | * unfortunately have the option of only flushing a range like |
| 1001 | * the VFS does. | 1001 | * the VFS does. |
| 1002 | */ | 1002 | */ |
| 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, GL_ATIME, &gh); | 1003 | gfs2_holder_init(ip->i_gl, LM_ST_DEFERRED, 0, &gh); |
| 1004 | rv = gfs2_glock_nq_atime(&gh); | 1004 | rv = gfs2_glock_nq(&gh); |
| 1005 | if (rv) | 1005 | if (rv) |
| 1006 | return rv; | 1006 | return rv; |
| 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); | 1007 | rv = gfs2_ok_for_dio(ip, rw, offset); |
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index e9a366d4411c..3a747f8e2188 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c | |||
| @@ -89,8 +89,8 @@ static int gfs2_readdir(struct file *file, void *dirent, filldir_t filldir) | |||
| 89 | u64 offset = file->f_pos; | 89 | u64 offset = file->f_pos; |
| 90 | int error; | 90 | int error; |
| 91 | 91 | ||
| 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, GL_ATIME, &d_gh); | 92 | gfs2_holder_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh); |
| 93 | error = gfs2_glock_nq_atime(&d_gh); | 93 | error = gfs2_glock_nq(&d_gh); |
| 94 | if (error) { | 94 | if (error) { |
| 95 | gfs2_holder_uninit(&d_gh); | 95 | gfs2_holder_uninit(&d_gh); |
| 96 | return error; | 96 | return error; |
| @@ -153,8 +153,8 @@ static int gfs2_get_flags(struct file *filp, u32 __user *ptr) | |||
| 153 | int error; | 153 | int error; |
| 154 | u32 fsflags; | 154 | u32 fsflags; |
| 155 | 155 | ||
| 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh); | 156 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); |
| 157 | error = gfs2_glock_nq_atime(&gh); | 157 | error = gfs2_glock_nq(&gh); |
| 158 | if (error) | 158 | if (error) |
| 159 | return error; | 159 | return error; |
| 160 | 160 | ||
| @@ -351,8 +351,8 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct page *page) | |||
| 351 | struct gfs2_alloc *al; | 351 | struct gfs2_alloc *al; |
| 352 | int ret; | 352 | int ret; |
| 353 | 353 | ||
| 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME, &gh); | 354 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); |
| 355 | ret = gfs2_glock_nq_atime(&gh); | 355 | ret = gfs2_glock_nq(&gh); |
| 356 | if (ret) | 356 | if (ret) |
| 357 | goto out; | 357 | goto out; |
| 358 | 358 | ||
| @@ -434,8 +434,8 @@ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) | |||
| 434 | struct gfs2_holder i_gh; | 434 | struct gfs2_holder i_gh; |
| 435 | int error; | 435 | int error; |
| 436 | 436 | ||
| 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh); | 437 | gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); |
| 438 | error = gfs2_glock_nq_atime(&i_gh); | 438 | error = gfs2_glock_nq(&i_gh); |
| 439 | if (error) { | 439 | if (error) { |
| 440 | gfs2_holder_uninit(&i_gh); | 440 | gfs2_holder_uninit(&i_gh); |
| 441 | return error; | 441 | return error; |
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index b4d1d6490633..b117fcf2c4f5 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c | |||
| @@ -40,6 +40,44 @@ | |||
| 40 | #define DO 0 | 40 | #define DO 0 |
| 41 | #define UNDO 1 | 41 | #define UNDO 1 |
| 42 | 42 | ||
| 43 | static const u32 gfs2_old_fs_formats[] = { | ||
| 44 | 0 | ||
| 45 | }; | ||
| 46 | |||
| 47 | static const u32 gfs2_old_multihost_formats[] = { | ||
| 48 | 0 | ||
| 49 | }; | ||
| 50 | |||
| 51 | /** | ||
| 52 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
| 53 | * @gt: tune | ||
| 54 | * | ||
| 55 | */ | ||
| 56 | |||
| 57 | static void gfs2_tune_init(struct gfs2_tune *gt) | ||
| 58 | { | ||
| 59 | spin_lock_init(>->gt_spin); | ||
| 60 | |||
| 61 | gt->gt_demote_secs = 300; | ||
| 62 | gt->gt_incore_log_blocks = 1024; | ||
| 63 | gt->gt_log_flush_secs = 60; | ||
| 64 | gt->gt_recoverd_secs = 60; | ||
| 65 | gt->gt_logd_secs = 1; | ||
| 66 | gt->gt_quotad_secs = 5; | ||
| 67 | gt->gt_quota_simul_sync = 64; | ||
| 68 | gt->gt_quota_warn_period = 10; | ||
| 69 | gt->gt_quota_scale_num = 1; | ||
| 70 | gt->gt_quota_scale_den = 1; | ||
| 71 | gt->gt_quota_cache_secs = 300; | ||
| 72 | gt->gt_quota_quantum = 60; | ||
| 73 | gt->gt_new_files_jdata = 0; | ||
| 74 | gt->gt_max_readahead = 1 << 18; | ||
| 75 | gt->gt_stall_secs = 600; | ||
| 76 | gt->gt_complain_secs = 10; | ||
| 77 | gt->gt_statfs_quantum = 30; | ||
| 78 | gt->gt_statfs_slow = 0; | ||
| 79 | } | ||
| 80 | |||
| 43 | static struct gfs2_sbd *init_sbd(struct super_block *sb) | 81 | static struct gfs2_sbd *init_sbd(struct super_block *sb) |
| 44 | { | 82 | { |
| 45 | struct gfs2_sbd *sdp; | 83 | struct gfs2_sbd *sdp; |
| @@ -96,21 +134,271 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) | |||
| 96 | return sdp; | 134 | return sdp; |
| 97 | } | 135 | } |
| 98 | 136 | ||
| 99 | static void init_vfs(struct super_block *sb, unsigned noatime) | 137 | |
| 138 | /** | ||
| 139 | * gfs2_check_sb - Check superblock | ||
| 140 | * @sdp: the filesystem | ||
| 141 | * @sb: The superblock | ||
| 142 | * @silent: Don't print a message if the check fails | ||
| 143 | * | ||
| 144 | * Checks the version code of the FS is one that we understand how to | ||
| 145 | * read and that the sizes of the various on-disk structures have not | ||
| 146 | * changed. | ||
| 147 | */ | ||
| 148 | |||
| 149 | static int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
| 100 | { | 150 | { |
| 101 | struct gfs2_sbd *sdp = sb->s_fs_info; | 151 | unsigned int x; |
| 102 | 152 | ||
| 103 | sb->s_magic = GFS2_MAGIC; | 153 | if (sb->sb_magic != GFS2_MAGIC || |
| 104 | sb->s_op = &gfs2_super_ops; | 154 | sb->sb_type != GFS2_METATYPE_SB) { |
| 105 | sb->s_export_op = &gfs2_export_ops; | 155 | if (!silent) |
| 106 | sb->s_time_gran = 1; | 156 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); |
| 107 | sb->s_maxbytes = MAX_LFS_FILESIZE; | 157 | return -EINVAL; |
| 158 | } | ||
| 159 | |||
| 160 | /* If format numbers match exactly, we're done. */ | ||
| 161 | |||
| 162 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
| 163 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
| 164 | return 0; | ||
| 165 | |||
| 166 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
| 167 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
| 168 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
| 169 | break; | ||
| 170 | |||
| 171 | if (!gfs2_old_fs_formats[x]) { | ||
| 172 | printk(KERN_WARNING | ||
| 173 | "GFS2: code version (%u, %u) is incompatible " | ||
| 174 | "with ondisk format (%u, %u)\n", | ||
| 175 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 176 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 177 | printk(KERN_WARNING | ||
| 178 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 179 | return -EINVAL; | ||
| 180 | } | ||
| 181 | } | ||
| 182 | |||
| 183 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
| 184 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
| 185 | if (gfs2_old_multihost_formats[x] == | ||
| 186 | sb->sb_multihost_format) | ||
| 187 | break; | ||
| 188 | |||
| 189 | if (!gfs2_old_multihost_formats[x]) { | ||
| 190 | printk(KERN_WARNING | ||
| 191 | "GFS2: code version (%u, %u) is incompatible " | ||
| 192 | "with ondisk format (%u, %u)\n", | ||
| 193 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 194 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 195 | printk(KERN_WARNING | ||
| 196 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 197 | return -EINVAL; | ||
| 198 | } | ||
| 199 | } | ||
| 200 | |||
| 201 | if (!sdp->sd_args.ar_upgrade) { | ||
| 202 | printk(KERN_WARNING | ||
| 203 | "GFS2: code version (%u, %u) is incompatible " | ||
| 204 | "with ondisk format (%u, %u)\n", | ||
| 205 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 206 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 207 | printk(KERN_INFO | ||
| 208 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
| 209 | "the FS\n"); | ||
| 210 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
| 211 | return -EINVAL; | ||
| 212 | } | ||
| 213 | |||
| 214 | return 0; | ||
| 215 | } | ||
| 216 | |||
| 217 | static void end_bio_io_page(struct bio *bio, int error) | ||
| 218 | { | ||
| 219 | struct page *page = bio->bi_private; | ||
| 108 | 220 | ||
| 109 | if (sb->s_flags & (MS_NOATIME | MS_NODIRATIME)) | 221 | if (!error) |
| 110 | set_bit(noatime, &sdp->sd_flags); | 222 | SetPageUptodate(page); |
| 223 | else | ||
| 224 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
| 225 | unlock_page(page); | ||
| 226 | } | ||
| 227 | |||
| 228 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
| 229 | { | ||
| 230 | const struct gfs2_sb *str = buf; | ||
| 231 | |||
| 232 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
| 233 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
| 234 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
| 235 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
| 236 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
| 237 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
| 238 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
| 239 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
| 240 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
| 241 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
| 242 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
| 243 | |||
| 244 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
| 245 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
| 246 | } | ||
| 247 | |||
| 248 | /** | ||
| 249 | * gfs2_read_super - Read the gfs2 super block from disk | ||
| 250 | * @sdp: The GFS2 super block | ||
| 251 | * @sector: The location of the super block | ||
| 252 | * @error: The error code to return | ||
| 253 | * | ||
| 254 | * This uses the bio functions to read the super block from disk | ||
| 255 | * because we want to be 100% sure that we never read cached data. | ||
| 256 | * A super block is read twice only during each GFS2 mount and is | ||
| 257 | * never written to by the filesystem. The first time its read no | ||
| 258 | * locks are held, and the only details which are looked at are those | ||
| 259 | * relating to the locking protocol. Once locking is up and working, | ||
| 260 | * the sb is read again under the lock to establish the location of | ||
| 261 | * the master directory (contains pointers to journals etc) and the | ||
| 262 | * root directory. | ||
| 263 | * | ||
| 264 | * Returns: 0 on success or error | ||
| 265 | */ | ||
| 266 | |||
| 267 | static int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
| 268 | { | ||
| 269 | struct super_block *sb = sdp->sd_vfs; | ||
| 270 | struct gfs2_sb *p; | ||
| 271 | struct page *page; | ||
| 272 | struct bio *bio; | ||
| 273 | |||
| 274 | page = alloc_page(GFP_NOFS); | ||
| 275 | if (unlikely(!page)) | ||
| 276 | return -ENOBUFS; | ||
| 277 | |||
| 278 | ClearPageUptodate(page); | ||
| 279 | ClearPageDirty(page); | ||
| 280 | lock_page(page); | ||
| 281 | |||
| 282 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 283 | if (unlikely(!bio)) { | ||
| 284 | __free_page(page); | ||
| 285 | return -ENOBUFS; | ||
| 286 | } | ||
| 111 | 287 | ||
| 112 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | 288 | bio->bi_sector = sector * (sb->s_blocksize >> 9); |
| 113 | sb->s_flags |= MS_NOATIME | MS_NODIRATIME; | 289 | bio->bi_bdev = sb->s_bdev; |
| 290 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
| 291 | |||
| 292 | bio->bi_end_io = end_bio_io_page; | ||
| 293 | bio->bi_private = page; | ||
| 294 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
| 295 | wait_on_page_locked(page); | ||
| 296 | bio_put(bio); | ||
| 297 | if (!PageUptodate(page)) { | ||
| 298 | __free_page(page); | ||
| 299 | return -EIO; | ||
| 300 | } | ||
| 301 | p = kmap(page); | ||
| 302 | gfs2_sb_in(&sdp->sd_sb, p); | ||
| 303 | kunmap(page); | ||
| 304 | __free_page(page); | ||
| 305 | return 0; | ||
| 306 | } | ||
| 307 | /** | ||
| 308 | * gfs2_read_sb - Read super block | ||
| 309 | * @sdp: The GFS2 superblock | ||
| 310 | * @gl: the glock for the superblock (assumed to be held) | ||
| 311 | * @silent: Don't print message if mount fails | ||
| 312 | * | ||
| 313 | */ | ||
| 314 | |||
| 315 | static int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
| 316 | { | ||
| 317 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
| 318 | u32 tmp_blocks; | ||
| 319 | unsigned int x; | ||
| 320 | int error; | ||
| 321 | |||
| 322 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
| 323 | if (error) { | ||
| 324 | if (!silent) | ||
| 325 | fs_err(sdp, "can't read superblock\n"); | ||
| 326 | return error; | ||
| 327 | } | ||
| 328 | |||
| 329 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
| 330 | if (error) | ||
| 331 | return error; | ||
| 332 | |||
| 333 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
| 334 | GFS2_BASIC_BLOCK_SHIFT; | ||
| 335 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
| 336 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
| 337 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
| 338 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
| 339 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
| 340 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
| 341 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
| 342 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
| 343 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
| 344 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
| 345 | sizeof(struct gfs2_meta_header)) / | ||
| 346 | sizeof(struct gfs2_quota_change); | ||
| 347 | |||
| 348 | /* Compute maximum reservation required to add a entry to a directory */ | ||
| 349 | |||
| 350 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
| 351 | sdp->sd_jbsize); | ||
| 352 | |||
| 353 | ind_blocks = 0; | ||
| 354 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
| 355 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
| 356 | ind_blocks += tmp_blocks; | ||
| 357 | } | ||
| 358 | |||
| 359 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
| 360 | |||
| 361 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
| 362 | |||
| 363 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 364 | sizeof(struct gfs2_dinode); | ||
| 365 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
| 366 | for (x = 2;; x++) { | ||
| 367 | u64 space, d; | ||
| 368 | u32 m; | ||
| 369 | |||
| 370 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
| 371 | d = space; | ||
| 372 | m = do_div(d, sdp->sd_inptrs); | ||
| 373 | |||
| 374 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
| 375 | break; | ||
| 376 | sdp->sd_heightsize[x] = space; | ||
| 377 | } | ||
| 378 | sdp->sd_max_height = x; | ||
| 379 | sdp->sd_heightsize[x] = ~0; | ||
| 380 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
| 381 | |||
| 382 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 383 | sizeof(struct gfs2_dinode); | ||
| 384 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
| 385 | for (x = 2;; x++) { | ||
| 386 | u64 space, d; | ||
| 387 | u32 m; | ||
| 388 | |||
| 389 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
| 390 | d = space; | ||
| 391 | m = do_div(d, sdp->sd_inptrs); | ||
| 392 | |||
| 393 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
| 394 | break; | ||
| 395 | sdp->sd_jheightsize[x] = space; | ||
| 396 | } | ||
| 397 | sdp->sd_max_jheight = x; | ||
| 398 | sdp->sd_jheightsize[x] = ~0; | ||
| 399 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
| 400 | |||
| 401 | return 0; | ||
| 114 | } | 402 | } |
| 115 | 403 | ||
| 116 | static int init_names(struct gfs2_sbd *sdp, int silent) | 404 | static int init_names(struct gfs2_sbd *sdp, int silent) |
| @@ -224,51 +512,59 @@ fail: | |||
| 224 | return error; | 512 | return error; |
| 225 | } | 513 | } |
| 226 | 514 | ||
| 227 | static inline struct inode *gfs2_lookup_root(struct super_block *sb, | 515 | static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr, |
| 228 | u64 no_addr) | 516 | u64 no_addr, const char *name) |
| 229 | { | 517 | { |
| 230 | return gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | 518 | struct gfs2_sbd *sdp = sb->s_fs_info; |
| 519 | struct dentry *dentry; | ||
| 520 | struct inode *inode; | ||
| 521 | |||
| 522 | inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0); | ||
| 523 | if (IS_ERR(inode)) { | ||
| 524 | fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode)); | ||
| 525 | return PTR_ERR(inode); | ||
| 526 | } | ||
| 527 | dentry = d_alloc_root(inode); | ||
| 528 | if (!dentry) { | ||
| 529 | fs_err(sdp, "can't alloc %s dentry\n", name); | ||
| 530 | iput(inode); | ||
| 531 | return -ENOMEM; | ||
| 532 | } | ||
| 533 | dentry->d_op = &gfs2_dops; | ||
| 534 | *dptr = dentry; | ||
| 535 | return 0; | ||
| 231 | } | 536 | } |
| 232 | 537 | ||
| 233 | static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | 538 | static int init_sb(struct gfs2_sbd *sdp, int silent) |
| 234 | { | 539 | { |
| 235 | struct super_block *sb = sdp->sd_vfs; | 540 | struct super_block *sb = sdp->sd_vfs; |
| 236 | struct gfs2_holder sb_gh; | 541 | struct gfs2_holder sb_gh; |
| 237 | u64 no_addr; | 542 | u64 no_addr; |
| 238 | struct inode *inode; | 543 | int ret; |
| 239 | int error = 0; | ||
| 240 | 544 | ||
| 241 | if (undo) { | 545 | ret = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, |
| 242 | if (sb->s_root) { | 546 | LM_ST_SHARED, 0, &sb_gh); |
| 243 | dput(sb->s_root); | 547 | if (ret) { |
| 244 | sb->s_root = NULL; | 548 | fs_err(sdp, "can't acquire superblock glock: %d\n", ret); |
| 245 | } | 549 | return ret; |
| 246 | return 0; | ||
| 247 | } | 550 | } |
| 248 | 551 | ||
| 249 | error = gfs2_glock_nq_num(sdp, GFS2_SB_LOCK, &gfs2_meta_glops, | 552 | ret = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); |
| 250 | LM_ST_SHARED, 0, &sb_gh); | 553 | if (ret) { |
| 251 | if (error) { | 554 | fs_err(sdp, "can't read superblock: %d\n", ret); |
| 252 | fs_err(sdp, "can't acquire superblock glock: %d\n", error); | ||
| 253 | return error; | ||
| 254 | } | ||
| 255 | |||
| 256 | error = gfs2_read_sb(sdp, sb_gh.gh_gl, silent); | ||
| 257 | if (error) { | ||
| 258 | fs_err(sdp, "can't read superblock: %d\n", error); | ||
| 259 | goto out; | 555 | goto out; |
| 260 | } | 556 | } |
| 261 | 557 | ||
| 262 | /* Set up the buffer cache and SB for real */ | 558 | /* Set up the buffer cache and SB for real */ |
| 263 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { | 559 | if (sdp->sd_sb.sb_bsize < bdev_hardsect_size(sb->s_bdev)) { |
| 264 | error = -EINVAL; | 560 | ret = -EINVAL; |
| 265 | fs_err(sdp, "FS block size (%u) is too small for device " | 561 | fs_err(sdp, "FS block size (%u) is too small for device " |
| 266 | "block size (%u)\n", | 562 | "block size (%u)\n", |
| 267 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); | 563 | sdp->sd_sb.sb_bsize, bdev_hardsect_size(sb->s_bdev)); |
| 268 | goto out; | 564 | goto out; |
| 269 | } | 565 | } |
| 270 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { | 566 | if (sdp->sd_sb.sb_bsize > PAGE_SIZE) { |
| 271 | error = -EINVAL; | 567 | ret = -EINVAL; |
| 272 | fs_err(sdp, "FS block size (%u) is too big for machine " | 568 | fs_err(sdp, "FS block size (%u) is too big for machine " |
| 273 | "page size (%u)\n", | 569 | "page size (%u)\n", |
| 274 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); | 570 | sdp->sd_sb.sb_bsize, (unsigned int)PAGE_SIZE); |
| @@ -278,26 +574,21 @@ static int init_sb(struct gfs2_sbd *sdp, int silent, int undo) | |||
| 278 | 574 | ||
| 279 | /* Get the root inode */ | 575 | /* Get the root inode */ |
| 280 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; | 576 | no_addr = sdp->sd_sb.sb_root_dir.no_addr; |
| 281 | if (sb->s_type == &gfs2meta_fs_type) | 577 | ret = gfs2_lookup_root(sb, &sdp->sd_root_dir, no_addr, "root"); |
| 282 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; | 578 | if (ret) |
| 283 | inode = gfs2_lookup_root(sb, no_addr); | ||
| 284 | if (IS_ERR(inode)) { | ||
| 285 | error = PTR_ERR(inode); | ||
| 286 | fs_err(sdp, "can't read in root inode: %d\n", error); | ||
| 287 | goto out; | 579 | goto out; |
| 288 | } | ||
| 289 | 580 | ||
| 290 | sb->s_root = d_alloc_root(inode); | 581 | /* Get the master inode */ |
| 291 | if (!sb->s_root) { | 582 | no_addr = sdp->sd_sb.sb_master_dir.no_addr; |
| 292 | fs_err(sdp, "can't get root dentry\n"); | 583 | ret = gfs2_lookup_root(sb, &sdp->sd_master_dir, no_addr, "master"); |
| 293 | error = -ENOMEM; | 584 | if (ret) { |
| 294 | iput(inode); | 585 | dput(sdp->sd_root_dir); |
| 295 | } else | 586 | goto out; |
| 296 | sb->s_root->d_op = &gfs2_dops; | 587 | } |
| 297 | 588 | sb->s_root = dget(sdp->sd_args.ar_meta ? sdp->sd_master_dir : sdp->sd_root_dir); | |
| 298 | out: | 589 | out: |
| 299 | gfs2_glock_dq_uninit(&sb_gh); | 590 | gfs2_glock_dq_uninit(&sb_gh); |
| 300 | return error; | 591 | return ret; |
| 301 | } | 592 | } |
| 302 | 593 | ||
| 303 | /** | 594 | /** |
| @@ -372,6 +663,7 @@ static void gfs2_lm_others_may_mount(struct gfs2_sbd *sdp) | |||
| 372 | 663 | ||
| 373 | static int init_journal(struct gfs2_sbd *sdp, int undo) | 664 | static int init_journal(struct gfs2_sbd *sdp, int undo) |
| 374 | { | 665 | { |
| 666 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
| 375 | struct gfs2_holder ji_gh; | 667 | struct gfs2_holder ji_gh; |
| 376 | struct task_struct *p; | 668 | struct task_struct *p; |
| 377 | struct gfs2_inode *ip; | 669 | struct gfs2_inode *ip; |
| @@ -383,7 +675,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) | |||
| 383 | goto fail_recoverd; | 675 | goto fail_recoverd; |
| 384 | } | 676 | } |
| 385 | 677 | ||
| 386 | sdp->sd_jindex = gfs2_lookup_simple(sdp->sd_master_dir, "jindex"); | 678 | sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); |
| 387 | if (IS_ERR(sdp->sd_jindex)) { | 679 | if (IS_ERR(sdp->sd_jindex)) { |
| 388 | fs_err(sdp, "can't lookup journal index: %d\n", error); | 680 | fs_err(sdp, "can't lookup journal index: %d\n", error); |
| 389 | return PTR_ERR(sdp->sd_jindex); | 681 | return PTR_ERR(sdp->sd_jindex); |
| @@ -506,25 +798,17 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
| 506 | { | 798 | { |
| 507 | int error = 0; | 799 | int error = 0; |
| 508 | struct gfs2_inode *ip; | 800 | struct gfs2_inode *ip; |
| 509 | struct inode *inode; | 801 | struct inode *master = sdp->sd_master_dir->d_inode; |
| 510 | 802 | ||
| 511 | if (undo) | 803 | if (undo) |
| 512 | goto fail_qinode; | 804 | goto fail_qinode; |
| 513 | 805 | ||
| 514 | inode = gfs2_lookup_root(sdp->sd_vfs, sdp->sd_sb.sb_master_dir.no_addr); | ||
| 515 | if (IS_ERR(inode)) { | ||
| 516 | error = PTR_ERR(inode); | ||
| 517 | fs_err(sdp, "can't read in master directory: %d\n", error); | ||
| 518 | goto fail; | ||
| 519 | } | ||
| 520 | sdp->sd_master_dir = inode; | ||
| 521 | |||
| 522 | error = init_journal(sdp, undo); | 806 | error = init_journal(sdp, undo); |
| 523 | if (error) | 807 | if (error) |
| 524 | goto fail_master; | 808 | goto fail; |
| 525 | 809 | ||
| 526 | /* Read in the master inode number inode */ | 810 | /* Read in the master inode number inode */ |
| 527 | sdp->sd_inum_inode = gfs2_lookup_simple(sdp->sd_master_dir, "inum"); | 811 | sdp->sd_inum_inode = gfs2_lookup_simple(master, "inum"); |
| 528 | if (IS_ERR(sdp->sd_inum_inode)) { | 812 | if (IS_ERR(sdp->sd_inum_inode)) { |
| 529 | error = PTR_ERR(sdp->sd_inum_inode); | 813 | error = PTR_ERR(sdp->sd_inum_inode); |
| 530 | fs_err(sdp, "can't read in inum inode: %d\n", error); | 814 | fs_err(sdp, "can't read in inum inode: %d\n", error); |
| @@ -533,7 +817,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
| 533 | 817 | ||
| 534 | 818 | ||
| 535 | /* Read in the master statfs inode */ | 819 | /* Read in the master statfs inode */ |
| 536 | sdp->sd_statfs_inode = gfs2_lookup_simple(sdp->sd_master_dir, "statfs"); | 820 | sdp->sd_statfs_inode = gfs2_lookup_simple(master, "statfs"); |
| 537 | if (IS_ERR(sdp->sd_statfs_inode)) { | 821 | if (IS_ERR(sdp->sd_statfs_inode)) { |
| 538 | error = PTR_ERR(sdp->sd_statfs_inode); | 822 | error = PTR_ERR(sdp->sd_statfs_inode); |
| 539 | fs_err(sdp, "can't read in statfs inode: %d\n", error); | 823 | fs_err(sdp, "can't read in statfs inode: %d\n", error); |
| @@ -541,7 +825,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
| 541 | } | 825 | } |
| 542 | 826 | ||
| 543 | /* Read in the resource index inode */ | 827 | /* Read in the resource index inode */ |
| 544 | sdp->sd_rindex = gfs2_lookup_simple(sdp->sd_master_dir, "rindex"); | 828 | sdp->sd_rindex = gfs2_lookup_simple(master, "rindex"); |
| 545 | if (IS_ERR(sdp->sd_rindex)) { | 829 | if (IS_ERR(sdp->sd_rindex)) { |
| 546 | error = PTR_ERR(sdp->sd_rindex); | 830 | error = PTR_ERR(sdp->sd_rindex); |
| 547 | fs_err(sdp, "can't get resource index inode: %d\n", error); | 831 | fs_err(sdp, "can't get resource index inode: %d\n", error); |
| @@ -552,7 +836,7 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) | |||
| 552 | sdp->sd_rindex_uptodate = 0; | 836 | sdp->sd_rindex_uptodate = 0; |
| 553 | 837 | ||
| 554 | /* Read in the quota inode */ | 838 | /* Read in the quota inode */ |
| 555 | sdp->sd_quota_inode = gfs2_lookup_simple(sdp->sd_master_dir, "quota"); | 839 | sdp->sd_quota_inode = gfs2_lookup_simple(master, "quota"); |
| 556 | if (IS_ERR(sdp->sd_quota_inode)) { | 840 | if (IS_ERR(sdp->sd_quota_inode)) { |
| 557 | error = PTR_ERR(sdp->sd_quota_inode); | 841 | error = PTR_ERR(sdp->sd_quota_inode); |
| 558 | fs_err(sdp, "can't get quota file inode: %d\n", error); | 842 | fs_err(sdp, "can't get quota file inode: %d\n", error); |
| @@ -571,8 +855,6 @@ fail_inum: | |||
| 571 | iput(sdp->sd_inum_inode); | 855 | iput(sdp->sd_inum_inode); |
| 572 | fail_journal: | 856 | fail_journal: |
| 573 | init_journal(sdp, UNDO); | 857 | init_journal(sdp, UNDO); |
| 574 | fail_master: | ||
| 575 | iput(sdp->sd_master_dir); | ||
| 576 | fail: | 858 | fail: |
| 577 | return error; | 859 | return error; |
| 578 | } | 860 | } |
| @@ -583,6 +865,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
| 583 | char buf[30]; | 865 | char buf[30]; |
| 584 | int error = 0; | 866 | int error = 0; |
| 585 | struct gfs2_inode *ip; | 867 | struct gfs2_inode *ip; |
| 868 | struct inode *master = sdp->sd_master_dir->d_inode; | ||
| 586 | 869 | ||
| 587 | if (sdp->sd_args.ar_spectator) | 870 | if (sdp->sd_args.ar_spectator) |
| 588 | return 0; | 871 | return 0; |
| @@ -590,7 +873,7 @@ static int init_per_node(struct gfs2_sbd *sdp, int undo) | |||
| 590 | if (undo) | 873 | if (undo) |
| 591 | goto fail_qc_gh; | 874 | goto fail_qc_gh; |
| 592 | 875 | ||
| 593 | pn = gfs2_lookup_simple(sdp->sd_master_dir, "per_node"); | 876 | pn = gfs2_lookup_simple(master, "per_node"); |
| 594 | if (IS_ERR(pn)) { | 877 | if (IS_ERR(pn)) { |
| 595 | error = PTR_ERR(pn); | 878 | error = PTR_ERR(pn); |
| 596 | fs_err(sdp, "can't find per_node directory: %d\n", error); | 879 | fs_err(sdp, "can't find per_node directory: %d\n", error); |
| @@ -800,7 +1083,11 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
| 800 | goto fail; | 1083 | goto fail; |
| 801 | } | 1084 | } |
| 802 | 1085 | ||
| 803 | init_vfs(sb, SDF_NOATIME); | 1086 | sb->s_magic = GFS2_MAGIC; |
| 1087 | sb->s_op = &gfs2_super_ops; | ||
| 1088 | sb->s_export_op = &gfs2_export_ops; | ||
| 1089 | sb->s_time_gran = 1; | ||
| 1090 | sb->s_maxbytes = MAX_LFS_FILESIZE; | ||
| 804 | 1091 | ||
| 805 | /* Set up the buffer cache and fill in some fake block size values | 1092 | /* Set up the buffer cache and fill in some fake block size values |
| 806 | to allow us to read-in the on-disk superblock. */ | 1093 | to allow us to read-in the on-disk superblock. */ |
| @@ -828,7 +1115,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) | |||
| 828 | if (error) | 1115 | if (error) |
| 829 | goto fail_lm; | 1116 | goto fail_lm; |
| 830 | 1117 | ||
| 831 | error = init_sb(sdp, silent, DO); | 1118 | error = init_sb(sdp, silent); |
| 832 | if (error) | 1119 | if (error) |
| 833 | goto fail_locking; | 1120 | goto fail_locking; |
| 834 | 1121 | ||
| @@ -869,7 +1156,11 @@ fail_per_node: | |||
| 869 | fail_inodes: | 1156 | fail_inodes: |
| 870 | init_inodes(sdp, UNDO); | 1157 | init_inodes(sdp, UNDO); |
| 871 | fail_sb: | 1158 | fail_sb: |
| 872 | init_sb(sdp, 0, UNDO); | 1159 | if (sdp->sd_root_dir) |
| 1160 | dput(sdp->sd_root_dir); | ||
| 1161 | if (sdp->sd_master_dir) | ||
| 1162 | dput(sdp->sd_master_dir); | ||
| 1163 | sb->s_root = NULL; | ||
| 873 | fail_locking: | 1164 | fail_locking: |
| 874 | init_locking(sdp, &mount_gh, UNDO); | 1165 | init_locking(sdp, &mount_gh, UNDO); |
| 875 | fail_lm: | 1166 | fail_lm: |
| @@ -887,151 +1178,63 @@ fail: | |||
| 887 | } | 1178 | } |
| 888 | 1179 | ||
| 889 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, | 1180 | static int gfs2_get_sb(struct file_system_type *fs_type, int flags, |
| 890 | const char *dev_name, void *data, struct vfsmount *mnt) | 1181 | const char *dev_name, void *data, struct vfsmount *mnt) |
| 891 | { | 1182 | { |
| 892 | struct super_block *sb; | 1183 | return get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); |
| 893 | struct gfs2_sbd *sdp; | ||
| 894 | int error = get_sb_bdev(fs_type, flags, dev_name, data, fill_super, mnt); | ||
| 895 | if (error) | ||
| 896 | goto out; | ||
| 897 | sb = mnt->mnt_sb; | ||
| 898 | sdp = sb->s_fs_info; | ||
| 899 | sdp->sd_gfs2mnt = mnt; | ||
| 900 | out: | ||
| 901 | return error; | ||
| 902 | } | 1184 | } |
| 903 | 1185 | ||
| 904 | static int fill_super_meta(struct super_block *sb, struct super_block *new, | 1186 | static struct super_block *get_gfs2_sb(const char *dev_name) |
| 905 | void *data, int silent) | ||
| 906 | { | 1187 | { |
| 907 | struct gfs2_sbd *sdp = sb->s_fs_info; | 1188 | struct super_block *sb; |
| 908 | struct inode *inode; | ||
| 909 | int error = 0; | ||
| 910 | |||
| 911 | new->s_fs_info = sdp; | ||
| 912 | sdp->sd_vfs_meta = sb; | ||
| 913 | |||
| 914 | init_vfs(new, SDF_NOATIME); | ||
| 915 | |||
| 916 | /* Get the master inode */ | ||
| 917 | inode = igrab(sdp->sd_master_dir); | ||
| 918 | |||
| 919 | new->s_root = d_alloc_root(inode); | ||
| 920 | if (!new->s_root) { | ||
| 921 | fs_err(sdp, "can't get root dentry\n"); | ||
| 922 | error = -ENOMEM; | ||
| 923 | iput(inode); | ||
| 924 | } else | ||
| 925 | new->s_root->d_op = &gfs2_dops; | ||
| 926 | |||
| 927 | return error; | ||
| 928 | } | ||
| 929 | |||
| 930 | static int set_bdev_super(struct super_block *s, void *data) | ||
| 931 | { | ||
| 932 | s->s_bdev = data; | ||
| 933 | s->s_dev = s->s_bdev->bd_dev; | ||
| 934 | return 0; | ||
| 935 | } | ||
| 936 | |||
| 937 | static int test_bdev_super(struct super_block *s, void *data) | ||
| 938 | { | ||
| 939 | return s->s_bdev == data; | ||
| 940 | } | ||
| 941 | |||
| 942 | static struct super_block* get_gfs2_sb(const char *dev_name) | ||
| 943 | { | ||
| 944 | struct kstat stat; | ||
| 945 | struct nameidata nd; | 1189 | struct nameidata nd; |
| 946 | struct super_block *sb = NULL, *s; | ||
| 947 | int error; | 1190 | int error; |
| 948 | 1191 | ||
| 949 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); | 1192 | error = path_lookup(dev_name, LOOKUP_FOLLOW, &nd); |
| 950 | if (error) { | 1193 | if (error) { |
| 951 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error\n", | 1194 | printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n", |
| 952 | dev_name); | 1195 | dev_name, error); |
| 953 | goto out; | 1196 | return NULL; |
| 954 | } | ||
| 955 | error = vfs_getattr(nd.path.mnt, nd.path.dentry, &stat); | ||
| 956 | |||
| 957 | list_for_each_entry(s, &gfs2_fs_type.fs_supers, s_instances) { | ||
| 958 | if ((S_ISBLK(stat.mode) && s->s_dev == stat.rdev) || | ||
| 959 | (S_ISDIR(stat.mode) && | ||
| 960 | s == nd.path.dentry->d_inode->i_sb)) { | ||
| 961 | sb = s; | ||
| 962 | goto free_nd; | ||
| 963 | } | ||
| 964 | } | 1197 | } |
| 965 | 1198 | sb = nd.path.dentry->d_inode->i_sb; | |
| 966 | printk(KERN_WARNING "GFS2: Unrecognized block device or " | 1199 | if (sb && (sb->s_type == &gfs2_fs_type)) |
| 967 | "mount point %s\n", dev_name); | 1200 | atomic_inc(&sb->s_active); |
| 968 | 1201 | else | |
| 969 | free_nd: | 1202 | sb = NULL; |
| 970 | path_put(&nd.path); | 1203 | path_put(&nd.path); |
| 971 | out: | ||
| 972 | return sb; | 1204 | return sb; |
| 973 | } | 1205 | } |
| 974 | 1206 | ||
| 975 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, | 1207 | static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags, |
| 976 | const char *dev_name, void *data, struct vfsmount *mnt) | 1208 | const char *dev_name, void *data, struct vfsmount *mnt) |
| 977 | { | 1209 | { |
| 978 | int error = 0; | 1210 | struct super_block *sb = NULL; |
| 979 | struct super_block *sb = NULL, *new; | ||
| 980 | struct gfs2_sbd *sdp; | 1211 | struct gfs2_sbd *sdp; |
| 981 | 1212 | ||
| 982 | sb = get_gfs2_sb(dev_name); | 1213 | sb = get_gfs2_sb(dev_name); |
| 983 | if (!sb) { | 1214 | if (!sb) { |
| 984 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); | 1215 | printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n"); |
| 985 | error = -ENOENT; | 1216 | return -ENOENT; |
| 986 | goto error; | ||
| 987 | } | 1217 | } |
| 988 | sdp = sb->s_fs_info; | 1218 | sdp = sb->s_fs_info; |
| 989 | if (sdp->sd_vfs_meta) { | 1219 | mnt->mnt_sb = sb; |
| 990 | printk(KERN_WARNING "GFS2: gfs2meta mount already exists\n"); | 1220 | mnt->mnt_root = dget(sdp->sd_master_dir); |
| 991 | error = -EBUSY; | 1221 | return 0; |
| 992 | goto error; | ||
| 993 | } | ||
| 994 | down(&sb->s_bdev->bd_mount_sem); | ||
| 995 | new = sget(fs_type, test_bdev_super, set_bdev_super, sb->s_bdev); | ||
| 996 | up(&sb->s_bdev->bd_mount_sem); | ||
| 997 | if (IS_ERR(new)) { | ||
| 998 | error = PTR_ERR(new); | ||
| 999 | goto error; | ||
| 1000 | } | ||
| 1001 | new->s_flags = flags; | ||
| 1002 | strlcpy(new->s_id, sb->s_id, sizeof(new->s_id)); | ||
| 1003 | sb_set_blocksize(new, sb->s_blocksize); | ||
| 1004 | error = fill_super_meta(sb, new, data, flags & MS_SILENT ? 1 : 0); | ||
| 1005 | if (error) { | ||
| 1006 | up_write(&new->s_umount); | ||
| 1007 | deactivate_super(new); | ||
| 1008 | goto error; | ||
| 1009 | } | ||
| 1010 | |||
| 1011 | new->s_flags |= MS_ACTIVE; | ||
| 1012 | |||
| 1013 | /* Grab a reference to the gfs2 mount point */ | ||
| 1014 | atomic_inc(&sdp->sd_gfs2mnt->mnt_count); | ||
| 1015 | return simple_set_mnt(mnt, new); | ||
| 1016 | error: | ||
| 1017 | return error; | ||
| 1018 | } | 1222 | } |
| 1019 | 1223 | ||
| 1020 | static void gfs2_kill_sb(struct super_block *sb) | 1224 | static void gfs2_kill_sb(struct super_block *sb) |
| 1021 | { | 1225 | { |
| 1022 | if (sb->s_fs_info) { | 1226 | struct gfs2_sbd *sdp = sb->s_fs_info; |
| 1023 | gfs2_delete_debugfs_file(sb->s_fs_info); | 1227 | if (sdp) { |
| 1024 | gfs2_meta_syncfs(sb->s_fs_info); | 1228 | gfs2_meta_syncfs(sdp); |
| 1229 | dput(sdp->sd_root_dir); | ||
| 1230 | dput(sdp->sd_master_dir); | ||
| 1231 | sdp->sd_root_dir = NULL; | ||
| 1232 | sdp->sd_master_dir = NULL; | ||
| 1025 | } | 1233 | } |
| 1234 | shrink_dcache_sb(sb); | ||
| 1026 | kill_block_super(sb); | 1235 | kill_block_super(sb); |
| 1027 | } | 1236 | if (sdp) |
| 1028 | 1237 | gfs2_delete_debugfs_file(sdp); | |
| 1029 | static void gfs2_kill_sb_meta(struct super_block *sb) | ||
| 1030 | { | ||
| 1031 | struct gfs2_sbd *sdp = sb->s_fs_info; | ||
| 1032 | generic_shutdown_super(sb); | ||
| 1033 | sdp->sd_vfs_meta = NULL; | ||
| 1034 | atomic_dec(&sdp->sd_gfs2mnt->mnt_count); | ||
| 1035 | } | 1238 | } |
| 1036 | 1239 | ||
| 1037 | struct file_system_type gfs2_fs_type = { | 1240 | struct file_system_type gfs2_fs_type = { |
| @@ -1046,7 +1249,6 @@ struct file_system_type gfs2meta_fs_type = { | |||
| 1046 | .name = "gfs2meta", | 1249 | .name = "gfs2meta", |
| 1047 | .fs_flags = FS_REQUIRES_DEV, | 1250 | .fs_flags = FS_REQUIRES_DEV, |
| 1048 | .get_sb = gfs2_get_sb_meta, | 1251 | .get_sb = gfs2_get_sb_meta, |
| 1049 | .kill_sb = gfs2_kill_sb_meta, | ||
| 1050 | .owner = THIS_MODULE, | 1252 | .owner = THIS_MODULE, |
| 1051 | }; | 1253 | }; |
| 1052 | 1254 | ||
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index e2c62f73a778..534e1e2c65ca 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c | |||
| @@ -159,9 +159,13 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, | |||
| 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); | 159 | gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); |
| 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); | 160 | gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); |
| 161 | 161 | ||
| 162 | error = gfs2_glock_nq_m(2, ghs); | 162 | error = gfs2_glock_nq(ghs); /* parent */ |
| 163 | if (error) | 163 | if (error) |
| 164 | goto out; | 164 | goto out_parent; |
| 165 | |||
| 166 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
| 167 | if (error) | ||
| 168 | goto out_child; | ||
| 165 | 169 | ||
| 166 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); | 170 | error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC); |
| 167 | if (error) | 171 | if (error) |
| @@ -245,8 +249,10 @@ out_alloc: | |||
| 245 | if (alloc_required) | 249 | if (alloc_required) |
| 246 | gfs2_alloc_put(dip); | 250 | gfs2_alloc_put(dip); |
| 247 | out_gunlock: | 251 | out_gunlock: |
| 248 | gfs2_glock_dq_m(2, ghs); | 252 | gfs2_glock_dq(ghs + 1); |
| 249 | out: | 253 | out_child: |
| 254 | gfs2_glock_dq(ghs); | ||
| 255 | out_parent: | ||
| 250 | gfs2_holder_uninit(ghs); | 256 | gfs2_holder_uninit(ghs); |
| 251 | gfs2_holder_uninit(ghs + 1); | 257 | gfs2_holder_uninit(ghs + 1); |
| 252 | if (!error) { | 258 | if (!error) { |
| @@ -302,7 +308,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
| 302 | 308 | ||
| 303 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 309 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
| 304 | if (error) | 310 | if (error) |
| 305 | goto out_rgrp; | 311 | goto out_gunlock; |
| 306 | 312 | ||
| 307 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); | 313 | error = gfs2_trans_begin(sdp, 2*RES_DINODE + RES_LEAF + RES_RG_BIT, 0); |
| 308 | if (error) | 314 | if (error) |
| @@ -316,6 +322,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) | |||
| 316 | 322 | ||
| 317 | out_end_trans: | 323 | out_end_trans: |
| 318 | gfs2_trans_end(sdp); | 324 | gfs2_trans_end(sdp); |
| 325 | out_gunlock: | ||
| 319 | gfs2_glock_dq(ghs + 2); | 326 | gfs2_glock_dq(ghs + 2); |
| 320 | out_rgrp: | 327 | out_rgrp: |
| 321 | gfs2_holder_uninit(ghs + 2); | 328 | gfs2_holder_uninit(ghs + 2); |
| @@ -485,7 +492,6 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 485 | struct gfs2_holder ri_gh; | 492 | struct gfs2_holder ri_gh; |
| 486 | int error; | 493 | int error; |
| 487 | 494 | ||
| 488 | |||
| 489 | error = gfs2_rindex_hold(sdp, &ri_gh); | 495 | error = gfs2_rindex_hold(sdp, &ri_gh); |
| 490 | if (error) | 496 | if (error) |
| 491 | return error; | 497 | return error; |
| @@ -495,9 +501,17 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 495 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); | 501 | rgd = gfs2_blk2rgrpd(sdp, ip->i_no_addr); |
| 496 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); | 502 | gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2); |
| 497 | 503 | ||
| 498 | error = gfs2_glock_nq_m(3, ghs); | 504 | error = gfs2_glock_nq(ghs); /* parent */ |
| 499 | if (error) | 505 | if (error) |
| 500 | goto out; | 506 | goto out_parent; |
| 507 | |||
| 508 | error = gfs2_glock_nq(ghs + 1); /* child */ | ||
| 509 | if (error) | ||
| 510 | goto out_child; | ||
| 511 | |||
| 512 | error = gfs2_glock_nq(ghs + 2); /* rgrp */ | ||
| 513 | if (error) | ||
| 514 | goto out_rgrp; | ||
| 501 | 515 | ||
| 502 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); | 516 | error = gfs2_unlink_ok(dip, &dentry->d_name, ip); |
| 503 | if (error) | 517 | if (error) |
| @@ -523,11 +537,15 @@ static int gfs2_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 523 | gfs2_trans_end(sdp); | 537 | gfs2_trans_end(sdp); |
| 524 | 538 | ||
| 525 | out_gunlock: | 539 | out_gunlock: |
| 526 | gfs2_glock_dq_m(3, ghs); | 540 | gfs2_glock_dq(ghs + 2); |
| 527 | out: | 541 | out_rgrp: |
| 528 | gfs2_holder_uninit(ghs); | ||
| 529 | gfs2_holder_uninit(ghs + 1); | ||
| 530 | gfs2_holder_uninit(ghs + 2); | 542 | gfs2_holder_uninit(ghs + 2); |
| 543 | gfs2_glock_dq(ghs + 1); | ||
| 544 | out_child: | ||
| 545 | gfs2_holder_uninit(ghs + 1); | ||
| 546 | gfs2_glock_dq(ghs); | ||
| 547 | out_parent: | ||
| 548 | gfs2_holder_uninit(ghs); | ||
| 531 | gfs2_glock_dq_uninit(&ri_gh); | 549 | gfs2_glock_dq_uninit(&ri_gh); |
| 532 | return error; | 550 | return error; |
| 533 | } | 551 | } |
| @@ -571,6 +589,54 @@ static int gfs2_mknod(struct inode *dir, struct dentry *dentry, int mode, | |||
| 571 | return 0; | 589 | return 0; |
| 572 | } | 590 | } |
| 573 | 591 | ||
| 592 | /* | ||
| 593 | * gfs2_ok_to_move - check if it's ok to move a directory to another directory | ||
| 594 | * @this: move this | ||
| 595 | * @to: to here | ||
| 596 | * | ||
| 597 | * Follow @to back to the root and make sure we don't encounter @this | ||
| 598 | * Assumes we already hold the rename lock. | ||
| 599 | * | ||
| 600 | * Returns: errno | ||
| 601 | */ | ||
| 602 | |||
| 603 | static int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to) | ||
| 604 | { | ||
| 605 | struct inode *dir = &to->i_inode; | ||
| 606 | struct super_block *sb = dir->i_sb; | ||
| 607 | struct inode *tmp; | ||
| 608 | struct qstr dotdot; | ||
| 609 | int error = 0; | ||
| 610 | |||
| 611 | gfs2_str2qstr(&dotdot, ".."); | ||
| 612 | |||
| 613 | igrab(dir); | ||
| 614 | |||
| 615 | for (;;) { | ||
| 616 | if (dir == &this->i_inode) { | ||
| 617 | error = -EINVAL; | ||
| 618 | break; | ||
| 619 | } | ||
| 620 | if (dir == sb->s_root->d_inode) { | ||
| 621 | error = 0; | ||
| 622 | break; | ||
| 623 | } | ||
| 624 | |||
| 625 | tmp = gfs2_lookupi(dir, &dotdot, 1); | ||
| 626 | if (IS_ERR(tmp)) { | ||
| 627 | error = PTR_ERR(tmp); | ||
| 628 | break; | ||
| 629 | } | ||
| 630 | |||
| 631 | iput(dir); | ||
| 632 | dir = tmp; | ||
| 633 | } | ||
| 634 | |||
| 635 | iput(dir); | ||
| 636 | |||
| 637 | return error; | ||
| 638 | } | ||
| 639 | |||
| 574 | /** | 640 | /** |
| 575 | * gfs2_rename - Rename a file | 641 | * gfs2_rename - Rename a file |
| 576 | * @odir: Parent directory of old file name | 642 | * @odir: Parent directory of old file name |
| @@ -589,7 +655,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 589 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); | 655 | struct gfs2_inode *ip = GFS2_I(odentry->d_inode); |
| 590 | struct gfs2_inode *nip = NULL; | 656 | struct gfs2_inode *nip = NULL; |
| 591 | struct gfs2_sbd *sdp = GFS2_SB(odir); | 657 | struct gfs2_sbd *sdp = GFS2_SB(odir); |
| 592 | struct gfs2_holder ghs[5], r_gh; | 658 | struct gfs2_holder ghs[5], r_gh = { .gh_gl = NULL, }; |
| 593 | struct gfs2_rgrpd *nrgd; | 659 | struct gfs2_rgrpd *nrgd; |
| 594 | unsigned int num_gh; | 660 | unsigned int num_gh; |
| 595 | int dir_rename = 0; | 661 | int dir_rename = 0; |
| @@ -603,19 +669,20 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 603 | return 0; | 669 | return 0; |
| 604 | } | 670 | } |
| 605 | 671 | ||
| 606 | /* Make sure we aren't trying to move a dirctory into it's subdir */ | ||
| 607 | |||
| 608 | if (S_ISDIR(ip->i_inode.i_mode) && odip != ndip) { | ||
| 609 | dir_rename = 1; | ||
| 610 | 672 | ||
| 611 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, | 673 | if (odip != ndip) { |
| 612 | &r_gh); | 674 | error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, |
| 675 | 0, &r_gh); | ||
| 613 | if (error) | 676 | if (error) |
| 614 | goto out; | 677 | goto out; |
| 615 | 678 | ||
| 616 | error = gfs2_ok_to_move(ip, ndip); | 679 | if (S_ISDIR(ip->i_inode.i_mode)) { |
| 617 | if (error) | 680 | dir_rename = 1; |
| 618 | goto out_gunlock_r; | 681 | /* don't move a dirctory into it's subdir */ |
| 682 | error = gfs2_ok_to_move(ip, ndip); | ||
| 683 | if (error) | ||
| 684 | goto out_gunlock_r; | ||
| 685 | } | ||
| 619 | } | 686 | } |
| 620 | 687 | ||
| 621 | num_gh = 1; | 688 | num_gh = 1; |
| @@ -639,9 +706,11 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, | |||
| 639 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); | 706 | gfs2_holder_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + num_gh++); |
| 640 | } | 707 | } |
| 641 | 708 | ||
| 642 | error = gfs2_glock_nq_m(num_gh, ghs); | 709 | for (x = 0; x < num_gh; x++) { |
| 643 | if (error) | 710 | error = gfs2_glock_nq(ghs + x); |
| 644 | goto out_uninit; | 711 | if (error) |
| 712 | goto out_gunlock; | ||
| 713 | } | ||
| 645 | 714 | ||
| 646 | /* Check out the old directory */ | 715 | /* Check out the old directory */ |
| 647 | 716 | ||
| @@ -804,12 +873,12 @@ out_alloc: | |||
| 804 | if (alloc_required) | 873 | if (alloc_required) |
| 805 | gfs2_alloc_put(ndip); | 874 | gfs2_alloc_put(ndip); |
| 806 | out_gunlock: | 875 | out_gunlock: |
| 807 | gfs2_glock_dq_m(num_gh, ghs); | 876 | while (x--) { |
| 808 | out_uninit: | 877 | gfs2_glock_dq(ghs + x); |
| 809 | for (x = 0; x < num_gh; x++) | ||
| 810 | gfs2_holder_uninit(ghs + x); | 878 | gfs2_holder_uninit(ghs + x); |
| 879 | } | ||
| 811 | out_gunlock_r: | 880 | out_gunlock_r: |
| 812 | if (dir_rename) | 881 | if (r_gh.gh_gl) |
| 813 | gfs2_glock_dq_uninit(&r_gh); | 882 | gfs2_glock_dq_uninit(&r_gh); |
| 814 | out: | 883 | out: |
| 815 | return error; | 884 | return error; |
diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index f66ea0f7a356..d5355d9b5926 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/gfs2_ondisk.h> | 20 | #include <linux/gfs2_ondisk.h> |
| 21 | #include <linux/crc32.h> | 21 | #include <linux/crc32.h> |
| 22 | #include <linux/lm_interface.h> | 22 | #include <linux/lm_interface.h> |
| 23 | #include <linux/time.h> | ||
| 23 | 24 | ||
| 24 | #include "gfs2.h" | 25 | #include "gfs2.h" |
| 25 | #include "incore.h" | 26 | #include "incore.h" |
| @@ -38,6 +39,7 @@ | |||
| 38 | #include "dir.h" | 39 | #include "dir.h" |
| 39 | #include "eattr.h" | 40 | #include "eattr.h" |
| 40 | #include "bmap.h" | 41 | #include "bmap.h" |
| 42 | #include "meta_io.h" | ||
| 41 | 43 | ||
| 42 | /** | 44 | /** |
| 43 | * gfs2_write_inode - Make sure the inode is stable on the disk | 45 | * gfs2_write_inode - Make sure the inode is stable on the disk |
| @@ -50,16 +52,74 @@ | |||
| 50 | static int gfs2_write_inode(struct inode *inode, int sync) | 52 | static int gfs2_write_inode(struct inode *inode, int sync) |
| 51 | { | 53 | { |
| 52 | struct gfs2_inode *ip = GFS2_I(inode); | 54 | struct gfs2_inode *ip = GFS2_I(inode); |
| 53 | 55 | struct gfs2_sbd *sdp = GFS2_SB(inode); | |
| 54 | /* Check this is a "normal" inode */ | 56 | struct gfs2_holder gh; |
| 55 | if (test_bit(GIF_USER, &ip->i_flags)) { | 57 | struct buffer_head *bh; |
| 56 | if (current->flags & PF_MEMALLOC) | 58 | struct timespec atime; |
| 57 | return 0; | 59 | struct gfs2_dinode *di; |
| 58 | if (sync) | 60 | int ret = 0; |
| 59 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | 61 | |
| 62 | /* Check this is a "normal" inode, etc */ | ||
| 63 | if (!test_bit(GIF_USER, &ip->i_flags) || | ||
| 64 | (current->flags & PF_MEMALLOC)) | ||
| 65 | return 0; | ||
| 66 | ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh); | ||
| 67 | if (ret) | ||
| 68 | goto do_flush; | ||
| 69 | ret = gfs2_trans_begin(sdp, RES_DINODE, 0); | ||
| 70 | if (ret) | ||
| 71 | goto do_unlock; | ||
| 72 | ret = gfs2_meta_inode_buffer(ip, &bh); | ||
| 73 | if (ret == 0) { | ||
| 74 | di = (struct gfs2_dinode *)bh->b_data; | ||
| 75 | atime.tv_sec = be64_to_cpu(di->di_atime); | ||
| 76 | atime.tv_nsec = be32_to_cpu(di->di_atime_nsec); | ||
| 77 | if (timespec_compare(&inode->i_atime, &atime) > 0) { | ||
| 78 | gfs2_trans_add_bh(ip->i_gl, bh, 1); | ||
| 79 | gfs2_dinode_out(ip, bh->b_data); | ||
| 80 | } | ||
| 81 | brelse(bh); | ||
| 60 | } | 82 | } |
| 83 | gfs2_trans_end(sdp); | ||
| 84 | do_unlock: | ||
| 85 | gfs2_glock_dq_uninit(&gh); | ||
| 86 | do_flush: | ||
| 87 | if (sync != 0) | ||
| 88 | gfs2_log_flush(GFS2_SB(inode), ip->i_gl); | ||
| 89 | return ret; | ||
| 90 | } | ||
| 61 | 91 | ||
| 62 | return 0; | 92 | /** |
| 93 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
| 94 | * @sdp: the filesystem | ||
| 95 | * | ||
| 96 | * Returns: errno | ||
| 97 | */ | ||
| 98 | |||
| 99 | static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
| 100 | { | ||
| 101 | struct gfs2_holder t_gh; | ||
| 102 | int error; | ||
| 103 | |||
| 104 | gfs2_quota_sync(sdp); | ||
| 105 | gfs2_statfs_sync(sdp); | ||
| 106 | |||
| 107 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
| 108 | &t_gh); | ||
| 109 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 110 | return error; | ||
| 111 | |||
| 112 | gfs2_meta_syncfs(sdp); | ||
| 113 | gfs2_log_shutdown(sdp); | ||
| 114 | |||
| 115 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
| 116 | |||
| 117 | if (t_gh.gh_gl) | ||
| 118 | gfs2_glock_dq_uninit(&t_gh); | ||
| 119 | |||
| 120 | gfs2_quota_cleanup(sdp); | ||
| 121 | |||
| 122 | return error; | ||
| 63 | } | 123 | } |
| 64 | 124 | ||
| 65 | /** | 125 | /** |
| @@ -73,12 +133,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
| 73 | struct gfs2_sbd *sdp = sb->s_fs_info; | 133 | struct gfs2_sbd *sdp = sb->s_fs_info; |
| 74 | int error; | 134 | int error; |
| 75 | 135 | ||
| 76 | if (!sdp) | ||
| 77 | return; | ||
| 78 | |||
| 79 | if (!strncmp(sb->s_type->name, "gfs2meta", 8)) | ||
| 80 | return; /* Nothing to do */ | ||
| 81 | |||
| 82 | /* Unfreeze the filesystem, if we need to */ | 136 | /* Unfreeze the filesystem, if we need to */ |
| 83 | 137 | ||
| 84 | mutex_lock(&sdp->sd_freeze_lock); | 138 | mutex_lock(&sdp->sd_freeze_lock); |
| @@ -101,7 +155,6 @@ static void gfs2_put_super(struct super_block *sb) | |||
| 101 | 155 | ||
| 102 | /* Release stuff */ | 156 | /* Release stuff */ |
| 103 | 157 | ||
| 104 | iput(sdp->sd_master_dir); | ||
| 105 | iput(sdp->sd_jindex); | 158 | iput(sdp->sd_jindex); |
| 106 | iput(sdp->sd_inum_inode); | 159 | iput(sdp->sd_inum_inode); |
| 107 | iput(sdp->sd_statfs_inode); | 160 | iput(sdp->sd_statfs_inode); |
| @@ -152,6 +205,7 @@ static void gfs2_write_super(struct super_block *sb) | |||
| 152 | * | 205 | * |
| 153 | * Flushes the log to disk. | 206 | * Flushes the log to disk. |
| 154 | */ | 207 | */ |
| 208 | |||
| 155 | static int gfs2_sync_fs(struct super_block *sb, int wait) | 209 | static int gfs2_sync_fs(struct super_block *sb, int wait) |
| 156 | { | 210 | { |
| 157 | sb->s_dirt = 0; | 211 | sb->s_dirt = 0; |
| @@ -270,14 +324,6 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 270 | } | 324 | } |
| 271 | } | 325 | } |
| 272 | 326 | ||
| 273 | if (*flags & (MS_NOATIME | MS_NODIRATIME)) | ||
| 274 | set_bit(SDF_NOATIME, &sdp->sd_flags); | ||
| 275 | else | ||
| 276 | clear_bit(SDF_NOATIME, &sdp->sd_flags); | ||
| 277 | |||
| 278 | /* Don't let the VFS update atimes. GFS2 handles this itself. */ | ||
| 279 | *flags |= MS_NOATIME | MS_NODIRATIME; | ||
| 280 | |||
| 281 | return error; | 327 | return error; |
| 282 | } | 328 | } |
| 283 | 329 | ||
| @@ -295,6 +341,7 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) | |||
| 295 | * inode's blocks, or alternatively pass the baton on to another | 341 | * inode's blocks, or alternatively pass the baton on to another |
| 296 | * node for later deallocation. | 342 | * node for later deallocation. |
| 297 | */ | 343 | */ |
| 344 | |||
| 298 | static void gfs2_drop_inode(struct inode *inode) | 345 | static void gfs2_drop_inode(struct inode *inode) |
| 299 | { | 346 | { |
| 300 | struct gfs2_inode *ip = GFS2_I(inode); | 347 | struct gfs2_inode *ip = GFS2_I(inode); |
| @@ -333,6 +380,16 @@ static void gfs2_clear_inode(struct inode *inode) | |||
| 333 | } | 380 | } |
| 334 | } | 381 | } |
| 335 | 382 | ||
| 383 | static int is_ancestor(const struct dentry *d1, const struct dentry *d2) | ||
| 384 | { | ||
| 385 | do { | ||
| 386 | if (d1 == d2) | ||
| 387 | return 1; | ||
| 388 | d1 = d1->d_parent; | ||
| 389 | } while (!IS_ROOT(d1)); | ||
| 390 | return 0; | ||
| 391 | } | ||
| 392 | |||
| 336 | /** | 393 | /** |
| 337 | * gfs2_show_options - Show mount options for /proc/mounts | 394 | * gfs2_show_options - Show mount options for /proc/mounts |
| 338 | * @s: seq_file structure | 395 | * @s: seq_file structure |
| @@ -346,6 +403,8 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 346 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; | 403 | struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; |
| 347 | struct gfs2_args *args = &sdp->sd_args; | 404 | struct gfs2_args *args = &sdp->sd_args; |
| 348 | 405 | ||
| 406 | if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) | ||
| 407 | seq_printf(s, ",meta"); | ||
| 349 | if (args->ar_lockproto[0]) | 408 | if (args->ar_lockproto[0]) |
| 350 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); | 409 | seq_printf(s, ",lockproto=%s", args->ar_lockproto); |
| 351 | if (args->ar_locktable[0]) | 410 | if (args->ar_locktable[0]) |
| @@ -414,6 +473,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 414 | * conversion on the iopen lock, but we can change that later. This | 473 | * conversion on the iopen lock, but we can change that later. This |
| 415 | * is safe, just less efficient. | 474 | * is safe, just less efficient. |
| 416 | */ | 475 | */ |
| 476 | |||
| 417 | static void gfs2_delete_inode(struct inode *inode) | 477 | static void gfs2_delete_inode(struct inode *inode) |
| 418 | { | 478 | { |
| 419 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; | 479 | struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; |
| @@ -478,8 +538,6 @@ out: | |||
| 478 | clear_inode(inode); | 538 | clear_inode(inode); |
| 479 | } | 539 | } |
| 480 | 540 | ||
| 481 | |||
| 482 | |||
| 483 | static struct inode *gfs2_alloc_inode(struct super_block *sb) | 541 | static struct inode *gfs2_alloc_inode(struct super_block *sb) |
| 484 | { | 542 | { |
| 485 | struct gfs2_inode *ip; | 543 | struct gfs2_inode *ip; |
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index ca831991cbc2..c3ba3d9d0aac 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c | |||
| @@ -33,313 +33,6 @@ | |||
| 33 | #include "trans.h" | 33 | #include "trans.h" |
| 34 | #include "util.h" | 34 | #include "util.h" |
| 35 | 35 | ||
| 36 | static const u32 gfs2_old_fs_formats[] = { | ||
| 37 | 0 | ||
| 38 | }; | ||
| 39 | |||
| 40 | static const u32 gfs2_old_multihost_formats[] = { | ||
| 41 | 0 | ||
| 42 | }; | ||
| 43 | |||
| 44 | /** | ||
| 45 | * gfs2_tune_init - Fill a gfs2_tune structure with default values | ||
| 46 | * @gt: tune | ||
| 47 | * | ||
| 48 | */ | ||
| 49 | |||
| 50 | void gfs2_tune_init(struct gfs2_tune *gt) | ||
| 51 | { | ||
| 52 | spin_lock_init(>->gt_spin); | ||
| 53 | |||
| 54 | gt->gt_demote_secs = 300; | ||
| 55 | gt->gt_incore_log_blocks = 1024; | ||
| 56 | gt->gt_log_flush_secs = 60; | ||
| 57 | gt->gt_recoverd_secs = 60; | ||
| 58 | gt->gt_logd_secs = 1; | ||
| 59 | gt->gt_quotad_secs = 5; | ||
| 60 | gt->gt_quota_simul_sync = 64; | ||
| 61 | gt->gt_quota_warn_period = 10; | ||
| 62 | gt->gt_quota_scale_num = 1; | ||
| 63 | gt->gt_quota_scale_den = 1; | ||
| 64 | gt->gt_quota_cache_secs = 300; | ||
| 65 | gt->gt_quota_quantum = 60; | ||
| 66 | gt->gt_atime_quantum = 3600; | ||
| 67 | gt->gt_new_files_jdata = 0; | ||
| 68 | gt->gt_max_readahead = 1 << 18; | ||
| 69 | gt->gt_stall_secs = 600; | ||
| 70 | gt->gt_complain_secs = 10; | ||
| 71 | gt->gt_statfs_quantum = 30; | ||
| 72 | gt->gt_statfs_slow = 0; | ||
| 73 | } | ||
| 74 | |||
| 75 | /** | ||
| 76 | * gfs2_check_sb - Check superblock | ||
| 77 | * @sdp: the filesystem | ||
| 78 | * @sb: The superblock | ||
| 79 | * @silent: Don't print a message if the check fails | ||
| 80 | * | ||
| 81 | * Checks the version code of the FS is one that we understand how to | ||
| 82 | * read and that the sizes of the various on-disk structures have not | ||
| 83 | * changed. | ||
| 84 | */ | ||
| 85 | |||
| 86 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent) | ||
| 87 | { | ||
| 88 | unsigned int x; | ||
| 89 | |||
| 90 | if (sb->sb_magic != GFS2_MAGIC || | ||
| 91 | sb->sb_type != GFS2_METATYPE_SB) { | ||
| 92 | if (!silent) | ||
| 93 | printk(KERN_WARNING "GFS2: not a GFS2 filesystem\n"); | ||
| 94 | return -EINVAL; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* If format numbers match exactly, we're done. */ | ||
| 98 | |||
| 99 | if (sb->sb_fs_format == GFS2_FORMAT_FS && | ||
| 100 | sb->sb_multihost_format == GFS2_FORMAT_MULTI) | ||
| 101 | return 0; | ||
| 102 | |||
| 103 | if (sb->sb_fs_format != GFS2_FORMAT_FS) { | ||
| 104 | for (x = 0; gfs2_old_fs_formats[x]; x++) | ||
| 105 | if (gfs2_old_fs_formats[x] == sb->sb_fs_format) | ||
| 106 | break; | ||
| 107 | |||
| 108 | if (!gfs2_old_fs_formats[x]) { | ||
| 109 | printk(KERN_WARNING | ||
| 110 | "GFS2: code version (%u, %u) is incompatible " | ||
| 111 | "with ondisk format (%u, %u)\n", | ||
| 112 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 113 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 114 | printk(KERN_WARNING | ||
| 115 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 116 | return -EINVAL; | ||
| 117 | } | ||
| 118 | } | ||
| 119 | |||
| 120 | if (sb->sb_multihost_format != GFS2_FORMAT_MULTI) { | ||
| 121 | for (x = 0; gfs2_old_multihost_formats[x]; x++) | ||
| 122 | if (gfs2_old_multihost_formats[x] == | ||
| 123 | sb->sb_multihost_format) | ||
| 124 | break; | ||
| 125 | |||
| 126 | if (!gfs2_old_multihost_formats[x]) { | ||
| 127 | printk(KERN_WARNING | ||
| 128 | "GFS2: code version (%u, %u) is incompatible " | ||
| 129 | "with ondisk format (%u, %u)\n", | ||
| 130 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 131 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 132 | printk(KERN_WARNING | ||
| 133 | "GFS2: I don't know how to upgrade this FS\n"); | ||
| 134 | return -EINVAL; | ||
| 135 | } | ||
| 136 | } | ||
| 137 | |||
| 138 | if (!sdp->sd_args.ar_upgrade) { | ||
| 139 | printk(KERN_WARNING | ||
| 140 | "GFS2: code version (%u, %u) is incompatible " | ||
| 141 | "with ondisk format (%u, %u)\n", | ||
| 142 | GFS2_FORMAT_FS, GFS2_FORMAT_MULTI, | ||
| 143 | sb->sb_fs_format, sb->sb_multihost_format); | ||
| 144 | printk(KERN_INFO | ||
| 145 | "GFS2: Use the \"upgrade\" mount option to upgrade " | ||
| 146 | "the FS\n"); | ||
| 147 | printk(KERN_INFO "GFS2: See the manual for more details\n"); | ||
| 148 | return -EINVAL; | ||
| 149 | } | ||
| 150 | |||
| 151 | return 0; | ||
| 152 | } | ||
| 153 | |||
| 154 | |||
| 155 | static void end_bio_io_page(struct bio *bio, int error) | ||
| 156 | { | ||
| 157 | struct page *page = bio->bi_private; | ||
| 158 | |||
| 159 | if (!error) | ||
| 160 | SetPageUptodate(page); | ||
| 161 | else | ||
| 162 | printk(KERN_WARNING "gfs2: error %d reading superblock\n", error); | ||
| 163 | unlock_page(page); | ||
| 164 | } | ||
| 165 | |||
| 166 | static void gfs2_sb_in(struct gfs2_sb_host *sb, const void *buf) | ||
| 167 | { | ||
| 168 | const struct gfs2_sb *str = buf; | ||
| 169 | |||
| 170 | sb->sb_magic = be32_to_cpu(str->sb_header.mh_magic); | ||
| 171 | sb->sb_type = be32_to_cpu(str->sb_header.mh_type); | ||
| 172 | sb->sb_format = be32_to_cpu(str->sb_header.mh_format); | ||
| 173 | sb->sb_fs_format = be32_to_cpu(str->sb_fs_format); | ||
| 174 | sb->sb_multihost_format = be32_to_cpu(str->sb_multihost_format); | ||
| 175 | sb->sb_bsize = be32_to_cpu(str->sb_bsize); | ||
| 176 | sb->sb_bsize_shift = be32_to_cpu(str->sb_bsize_shift); | ||
| 177 | sb->sb_master_dir.no_addr = be64_to_cpu(str->sb_master_dir.no_addr); | ||
| 178 | sb->sb_master_dir.no_formal_ino = be64_to_cpu(str->sb_master_dir.no_formal_ino); | ||
| 179 | sb->sb_root_dir.no_addr = be64_to_cpu(str->sb_root_dir.no_addr); | ||
| 180 | sb->sb_root_dir.no_formal_ino = be64_to_cpu(str->sb_root_dir.no_formal_ino); | ||
| 181 | |||
| 182 | memcpy(sb->sb_lockproto, str->sb_lockproto, GFS2_LOCKNAME_LEN); | ||
| 183 | memcpy(sb->sb_locktable, str->sb_locktable, GFS2_LOCKNAME_LEN); | ||
| 184 | } | ||
| 185 | |||
| 186 | /** | ||
| 187 | * gfs2_read_super - Read the gfs2 super block from disk | ||
| 188 | * @sdp: The GFS2 super block | ||
| 189 | * @sector: The location of the super block | ||
| 190 | * @error: The error code to return | ||
| 191 | * | ||
| 192 | * This uses the bio functions to read the super block from disk | ||
| 193 | * because we want to be 100% sure that we never read cached data. | ||
| 194 | * A super block is read twice only during each GFS2 mount and is | ||
| 195 | * never written to by the filesystem. The first time its read no | ||
| 196 | * locks are held, and the only details which are looked at are those | ||
| 197 | * relating to the locking protocol. Once locking is up and working, | ||
| 198 | * the sb is read again under the lock to establish the location of | ||
| 199 | * the master directory (contains pointers to journals etc) and the | ||
| 200 | * root directory. | ||
| 201 | * | ||
| 202 | * Returns: 0 on success or error | ||
| 203 | */ | ||
| 204 | |||
| 205 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector) | ||
| 206 | { | ||
| 207 | struct super_block *sb = sdp->sd_vfs; | ||
| 208 | struct gfs2_sb *p; | ||
| 209 | struct page *page; | ||
| 210 | struct bio *bio; | ||
| 211 | |||
| 212 | page = alloc_page(GFP_NOFS); | ||
| 213 | if (unlikely(!page)) | ||
| 214 | return -ENOBUFS; | ||
| 215 | |||
| 216 | ClearPageUptodate(page); | ||
| 217 | ClearPageDirty(page); | ||
| 218 | lock_page(page); | ||
| 219 | |||
| 220 | bio = bio_alloc(GFP_NOFS, 1); | ||
| 221 | if (unlikely(!bio)) { | ||
| 222 | __free_page(page); | ||
| 223 | return -ENOBUFS; | ||
| 224 | } | ||
| 225 | |||
| 226 | bio->bi_sector = sector * (sb->s_blocksize >> 9); | ||
| 227 | bio->bi_bdev = sb->s_bdev; | ||
| 228 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
| 229 | |||
| 230 | bio->bi_end_io = end_bio_io_page; | ||
| 231 | bio->bi_private = page; | ||
| 232 | submit_bio(READ_SYNC | (1 << BIO_RW_META), bio); | ||
| 233 | wait_on_page_locked(page); | ||
| 234 | bio_put(bio); | ||
| 235 | if (!PageUptodate(page)) { | ||
| 236 | __free_page(page); | ||
| 237 | return -EIO; | ||
| 238 | } | ||
| 239 | p = kmap(page); | ||
| 240 | gfs2_sb_in(&sdp->sd_sb, p); | ||
| 241 | kunmap(page); | ||
| 242 | __free_page(page); | ||
| 243 | return 0; | ||
| 244 | } | ||
| 245 | |||
| 246 | /** | ||
| 247 | * gfs2_read_sb - Read super block | ||
| 248 | * @sdp: The GFS2 superblock | ||
| 249 | * @gl: the glock for the superblock (assumed to be held) | ||
| 250 | * @silent: Don't print message if mount fails | ||
| 251 | * | ||
| 252 | */ | ||
| 253 | |||
| 254 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent) | ||
| 255 | { | ||
| 256 | u32 hash_blocks, ind_blocks, leaf_blocks; | ||
| 257 | u32 tmp_blocks; | ||
| 258 | unsigned int x; | ||
| 259 | int error; | ||
| 260 | |||
| 261 | error = gfs2_read_super(sdp, GFS2_SB_ADDR >> sdp->sd_fsb2bb_shift); | ||
| 262 | if (error) { | ||
| 263 | if (!silent) | ||
| 264 | fs_err(sdp, "can't read superblock\n"); | ||
| 265 | return error; | ||
| 266 | } | ||
| 267 | |||
| 268 | error = gfs2_check_sb(sdp, &sdp->sd_sb, silent); | ||
| 269 | if (error) | ||
| 270 | return error; | ||
| 271 | |||
| 272 | sdp->sd_fsb2bb_shift = sdp->sd_sb.sb_bsize_shift - | ||
| 273 | GFS2_BASIC_BLOCK_SHIFT; | ||
| 274 | sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; | ||
| 275 | sdp->sd_diptrs = (sdp->sd_sb.sb_bsize - | ||
| 276 | sizeof(struct gfs2_dinode)) / sizeof(u64); | ||
| 277 | sdp->sd_inptrs = (sdp->sd_sb.sb_bsize - | ||
| 278 | sizeof(struct gfs2_meta_header)) / sizeof(u64); | ||
| 279 | sdp->sd_jbsize = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header); | ||
| 280 | sdp->sd_hash_bsize = sdp->sd_sb.sb_bsize / 2; | ||
| 281 | sdp->sd_hash_bsize_shift = sdp->sd_sb.sb_bsize_shift - 1; | ||
| 282 | sdp->sd_hash_ptrs = sdp->sd_hash_bsize / sizeof(u64); | ||
| 283 | sdp->sd_qc_per_block = (sdp->sd_sb.sb_bsize - | ||
| 284 | sizeof(struct gfs2_meta_header)) / | ||
| 285 | sizeof(struct gfs2_quota_change); | ||
| 286 | |||
| 287 | /* Compute maximum reservation required to add a entry to a directory */ | ||
| 288 | |||
| 289 | hash_blocks = DIV_ROUND_UP(sizeof(u64) * (1 << GFS2_DIR_MAX_DEPTH), | ||
| 290 | sdp->sd_jbsize); | ||
| 291 | |||
| 292 | ind_blocks = 0; | ||
| 293 | for (tmp_blocks = hash_blocks; tmp_blocks > sdp->sd_diptrs;) { | ||
| 294 | tmp_blocks = DIV_ROUND_UP(tmp_blocks, sdp->sd_inptrs); | ||
| 295 | ind_blocks += tmp_blocks; | ||
| 296 | } | ||
| 297 | |||
| 298 | leaf_blocks = 2 + GFS2_DIR_MAX_DEPTH; | ||
| 299 | |||
| 300 | sdp->sd_max_dirres = hash_blocks + ind_blocks + leaf_blocks; | ||
| 301 | |||
| 302 | sdp->sd_heightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 303 | sizeof(struct gfs2_dinode); | ||
| 304 | sdp->sd_heightsize[1] = sdp->sd_sb.sb_bsize * sdp->sd_diptrs; | ||
| 305 | for (x = 2;; x++) { | ||
| 306 | u64 space, d; | ||
| 307 | u32 m; | ||
| 308 | |||
| 309 | space = sdp->sd_heightsize[x - 1] * sdp->sd_inptrs; | ||
| 310 | d = space; | ||
| 311 | m = do_div(d, sdp->sd_inptrs); | ||
| 312 | |||
| 313 | if (d != sdp->sd_heightsize[x - 1] || m) | ||
| 314 | break; | ||
| 315 | sdp->sd_heightsize[x] = space; | ||
| 316 | } | ||
| 317 | sdp->sd_max_height = x; | ||
| 318 | sdp->sd_heightsize[x] = ~0; | ||
| 319 | gfs2_assert(sdp, sdp->sd_max_height <= GFS2_MAX_META_HEIGHT); | ||
| 320 | |||
| 321 | sdp->sd_jheightsize[0] = sdp->sd_sb.sb_bsize - | ||
| 322 | sizeof(struct gfs2_dinode); | ||
| 323 | sdp->sd_jheightsize[1] = sdp->sd_jbsize * sdp->sd_diptrs; | ||
| 324 | for (x = 2;; x++) { | ||
| 325 | u64 space, d; | ||
| 326 | u32 m; | ||
| 327 | |||
| 328 | space = sdp->sd_jheightsize[x - 1] * sdp->sd_inptrs; | ||
| 329 | d = space; | ||
| 330 | m = do_div(d, sdp->sd_inptrs); | ||
| 331 | |||
| 332 | if (d != sdp->sd_jheightsize[x - 1] || m) | ||
| 333 | break; | ||
| 334 | sdp->sd_jheightsize[x] = space; | ||
| 335 | } | ||
| 336 | sdp->sd_max_jheight = x; | ||
| 337 | sdp->sd_jheightsize[x] = ~0; | ||
| 338 | gfs2_assert(sdp, sdp->sd_max_jheight <= GFS2_MAX_META_HEIGHT); | ||
| 339 | |||
| 340 | return 0; | ||
| 341 | } | ||
| 342 | |||
| 343 | /** | 36 | /** |
| 344 | * gfs2_jindex_hold - Grab a lock on the jindex | 37 | * gfs2_jindex_hold - Grab a lock on the jindex |
| 345 | * @sdp: The GFS2 superblock | 38 | * @sdp: The GFS2 superblock |
| @@ -581,39 +274,6 @@ fail: | |||
| 581 | return error; | 274 | return error; |
| 582 | } | 275 | } |
| 583 | 276 | ||
| 584 | /** | ||
| 585 | * gfs2_make_fs_ro - Turn a Read-Write FS into a Read-Only one | ||
| 586 | * @sdp: the filesystem | ||
| 587 | * | ||
| 588 | * Returns: errno | ||
| 589 | */ | ||
| 590 | |||
| 591 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp) | ||
| 592 | { | ||
| 593 | struct gfs2_holder t_gh; | ||
| 594 | int error; | ||
| 595 | |||
| 596 | gfs2_quota_sync(sdp); | ||
| 597 | gfs2_statfs_sync(sdp); | ||
| 598 | |||
| 599 | error = gfs2_glock_nq_init(sdp->sd_trans_gl, LM_ST_SHARED, GL_NOCACHE, | ||
| 600 | &t_gh); | ||
| 601 | if (error && !test_bit(SDF_SHUTDOWN, &sdp->sd_flags)) | ||
| 602 | return error; | ||
| 603 | |||
| 604 | gfs2_meta_syncfs(sdp); | ||
| 605 | gfs2_log_shutdown(sdp); | ||
| 606 | |||
| 607 | clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); | ||
| 608 | |||
| 609 | if (t_gh.gh_gl) | ||
| 610 | gfs2_glock_dq_uninit(&t_gh); | ||
| 611 | |||
| 612 | gfs2_quota_cleanup(sdp); | ||
| 613 | |||
| 614 | return error; | ||
| 615 | } | ||
| 616 | |||
| 617 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) | 277 | static void gfs2_statfs_change_in(struct gfs2_statfs_change_host *sc, const void *buf) |
| 618 | { | 278 | { |
| 619 | const struct gfs2_statfs_change *str = buf; | 279 | const struct gfs2_statfs_change *str = buf; |
diff --git a/fs/gfs2/super.h b/fs/gfs2/super.h index 44361ecc44f7..50a4c9b1215e 100644 --- a/fs/gfs2/super.h +++ b/fs/gfs2/super.h | |||
| @@ -12,11 +12,6 @@ | |||
| 12 | 12 | ||
| 13 | #include "incore.h" | 13 | #include "incore.h" |
| 14 | 14 | ||
| 15 | void gfs2_tune_init(struct gfs2_tune *gt); | ||
| 16 | |||
| 17 | int gfs2_check_sb(struct gfs2_sbd *sdp, struct gfs2_sb_host *sb, int silent); | ||
| 18 | int gfs2_read_sb(struct gfs2_sbd *sdp, struct gfs2_glock *gl, int silent); | ||
| 19 | int gfs2_read_super(struct gfs2_sbd *sdp, sector_t sector); | ||
| 20 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); | 15 | void gfs2_lm_unmount(struct gfs2_sbd *sdp); |
| 21 | 16 | ||
| 22 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) | 17 | static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp) |
| @@ -40,7 +35,6 @@ int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename, | |||
| 40 | struct gfs2_inode **ipp); | 35 | struct gfs2_inode **ipp); |
| 41 | 36 | ||
| 42 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); | 37 | int gfs2_make_fs_rw(struct gfs2_sbd *sdp); |
| 43 | int gfs2_make_fs_ro(struct gfs2_sbd *sdp); | ||
| 44 | 38 | ||
| 45 | int gfs2_statfs_init(struct gfs2_sbd *sdp); | 39 | int gfs2_statfs_init(struct gfs2_sbd *sdp); |
| 46 | void gfs2_statfs_change(struct gfs2_sbd *sdp, | 40 | void gfs2_statfs_change(struct gfs2_sbd *sdp, |
diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 74846559fc3f..7e1879f1a02c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c | |||
| @@ -269,14 +269,6 @@ ARGS_ATTR(quota, "%u\n"); | |||
| 269 | ARGS_ATTR(suiddir, "%d\n"); | 269 | ARGS_ATTR(suiddir, "%d\n"); |
| 270 | ARGS_ATTR(data, "%d\n"); | 270 | ARGS_ATTR(data, "%d\n"); |
| 271 | 271 | ||
| 272 | /* one oddball doesn't fit the macro mold */ | ||
| 273 | static ssize_t noatime_show(struct gfs2_sbd *sdp, char *buf) | ||
| 274 | { | ||
| 275 | return snprintf(buf, PAGE_SIZE, "%d\n", | ||
| 276 | !!test_bit(SDF_NOATIME, &sdp->sd_flags)); | ||
| 277 | } | ||
| 278 | static struct args_attr args_attr_noatime = __ATTR_RO(noatime); | ||
| 279 | |||
| 280 | static struct attribute *args_attrs[] = { | 272 | static struct attribute *args_attrs[] = { |
| 281 | &args_attr_lockproto.attr, | 273 | &args_attr_lockproto.attr, |
| 282 | &args_attr_locktable.attr, | 274 | &args_attr_locktable.attr, |
| @@ -292,7 +284,6 @@ static struct attribute *args_attrs[] = { | |||
| 292 | &args_attr_quota.attr, | 284 | &args_attr_quota.attr, |
| 293 | &args_attr_suiddir.attr, | 285 | &args_attr_suiddir.attr, |
| 294 | &args_attr_data.attr, | 286 | &args_attr_data.attr, |
| 295 | &args_attr_noatime.attr, | ||
| 296 | NULL, | 287 | NULL, |
| 297 | }; | 288 | }; |
| 298 | 289 | ||
| @@ -407,7 +398,6 @@ TUNE_ATTR(incore_log_blocks, 0); | |||
| 407 | TUNE_ATTR(log_flush_secs, 0); | 398 | TUNE_ATTR(log_flush_secs, 0); |
| 408 | TUNE_ATTR(quota_warn_period, 0); | 399 | TUNE_ATTR(quota_warn_period, 0); |
| 409 | TUNE_ATTR(quota_quantum, 0); | 400 | TUNE_ATTR(quota_quantum, 0); |
| 410 | TUNE_ATTR(atime_quantum, 0); | ||
| 411 | TUNE_ATTR(max_readahead, 0); | 401 | TUNE_ATTR(max_readahead, 0); |
| 412 | TUNE_ATTR(complain_secs, 0); | 402 | TUNE_ATTR(complain_secs, 0); |
| 413 | TUNE_ATTR(statfs_slow, 0); | 403 | TUNE_ATTR(statfs_slow, 0); |
| @@ -427,7 +417,6 @@ static struct attribute *tune_attrs[] = { | |||
| 427 | &tune_attr_log_flush_secs.attr, | 417 | &tune_attr_log_flush_secs.attr, |
| 428 | &tune_attr_quota_warn_period.attr, | 418 | &tune_attr_quota_warn_period.attr, |
| 429 | &tune_attr_quota_quantum.attr, | 419 | &tune_attr_quota_quantum.attr, |
| 430 | &tune_attr_atime_quantum.attr, | ||
| 431 | &tune_attr_max_readahead.attr, | 420 | &tune_attr_max_readahead.attr, |
| 432 | &tune_attr_complain_secs.attr, | 421 | &tune_attr_complain_secs.attr, |
| 433 | &tune_attr_statfs_slow.attr, | 422 | &tune_attr_statfs_slow.attr, |
diff --git a/fs/hfs/catalog.c b/fs/hfs/catalog.c index ba851576ebb1..6d98f116ca03 100644 --- a/fs/hfs/catalog.c +++ b/fs/hfs/catalog.c | |||
| @@ -190,6 +190,10 @@ int hfs_cat_find_brec(struct super_block *sb, u32 cnid, | |||
| 190 | 190 | ||
| 191 | fd->search_key->cat.ParID = rec.thread.ParID; | 191 | fd->search_key->cat.ParID = rec.thread.ParID; |
| 192 | len = fd->search_key->cat.CName.len = rec.thread.CName.len; | 192 | len = fd->search_key->cat.CName.len = rec.thread.CName.len; |
| 193 | if (len > HFS_NAMELEN) { | ||
| 194 | printk(KERN_ERR "hfs: bad catalog namelength\n"); | ||
| 195 | return -EIO; | ||
| 196 | } | ||
| 193 | memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); | 197 | memcpy(fd->search_key->cat.CName.name, rec.thread.CName.name, len); |
| 194 | return hfs_brec_find(fd); | 198 | return hfs_brec_find(fd); |
| 195 | } | 199 | } |
diff --git a/fs/hfs/super.c b/fs/hfs/super.c index 4abb1047c689..3c7c7637719c 100644 --- a/fs/hfs/super.c +++ b/fs/hfs/super.c | |||
| @@ -173,7 +173,7 @@ enum { | |||
| 173 | opt_err | 173 | opt_err |
| 174 | }; | 174 | }; |
| 175 | 175 | ||
| 176 | static match_table_t tokens = { | 176 | static const match_table_t tokens = { |
| 177 | { opt_uid, "uid=%u" }, | 177 | { opt_uid, "uid=%u" }, |
| 178 | { opt_gid, "gid=%u" }, | 178 | { opt_gid, "gid=%u" }, |
| 179 | { opt_umask, "umask=%o" }, | 179 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c index d128a25b74d2..ea30afc2a03c 100644 --- a/fs/hfsplus/bitmap.c +++ b/fs/hfsplus/bitmap.c | |||
| @@ -32,6 +32,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
| 32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); | 32 | mutex_lock(&HFSPLUS_SB(sb).alloc_file->i_mutex); |
| 33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; | 33 | mapping = HFSPLUS_SB(sb).alloc_file->i_mapping; |
| 34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); | 34 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, NULL); |
| 35 | if (IS_ERR(page)) { | ||
| 36 | start = size; | ||
| 37 | goto out; | ||
| 38 | } | ||
| 35 | pptr = kmap(page); | 39 | pptr = kmap(page); |
| 36 | curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; | 40 | curr = pptr + (offset & (PAGE_CACHE_BITS - 1)) / 32; |
| 37 | i = offset % 32; | 41 | i = offset % 32; |
| @@ -73,6 +77,10 @@ int hfsplus_block_allocate(struct super_block *sb, u32 size, u32 offset, u32 *ma | |||
| 73 | break; | 77 | break; |
| 74 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, | 78 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, |
| 75 | NULL); | 79 | NULL); |
| 80 | if (IS_ERR(page)) { | ||
| 81 | start = size; | ||
| 82 | goto out; | ||
| 83 | } | ||
| 76 | curr = pptr = kmap(page); | 84 | curr = pptr = kmap(page); |
| 77 | if ((size ^ offset) / PAGE_CACHE_BITS) | 85 | if ((size ^ offset) / PAGE_CACHE_BITS) |
| 78 | end = pptr + PAGE_CACHE_BITS / 32; | 86 | end = pptr + PAGE_CACHE_BITS / 32; |
| @@ -120,6 +128,10 @@ found: | |||
| 120 | offset += PAGE_CACHE_BITS; | 128 | offset += PAGE_CACHE_BITS; |
| 121 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, | 129 | page = read_mapping_page(mapping, offset / PAGE_CACHE_BITS, |
| 122 | NULL); | 130 | NULL); |
| 131 | if (IS_ERR(page)) { | ||
| 132 | start = size; | ||
| 133 | goto out; | ||
| 134 | } | ||
| 123 | pptr = kmap(page); | 135 | pptr = kmap(page); |
| 124 | curr = pptr; | 136 | curr = pptr; |
| 125 | end = pptr + PAGE_CACHE_BITS / 32; | 137 | end = pptr + PAGE_CACHE_BITS / 32; |
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index ba117c445e78..f6874acb2cf2 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c | |||
| @@ -168,6 +168,11 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, | |||
| 168 | return -EIO; | 168 | return -EIO; |
| 169 | } | 169 | } |
| 170 | 170 | ||
| 171 | if (be16_to_cpu(tmp.thread.nodeName.length) > 255) { | ||
| 172 | printk(KERN_ERR "hfs: catalog name length corrupted\n"); | ||
| 173 | return -EIO; | ||
| 174 | } | ||
| 175 | |||
| 171 | hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), | 176 | hfsplus_cat_build_key_uni(fd->search_key, be32_to_cpu(tmp.thread.parentID), |
| 172 | &tmp.thread.nodeName); | 177 | &tmp.thread.nodeName); |
| 173 | return hfs_brec_find(fd); | 178 | return hfs_brec_find(fd); |
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c index fec8f61227ff..0022eec63cda 100644 --- a/fs/hfsplus/extents.c +++ b/fs/hfsplus/extents.c | |||
| @@ -199,6 +199,9 @@ int hfsplus_get_block(struct inode *inode, sector_t iblock, | |||
| 199 | goto done; | 199 | goto done; |
| 200 | } | 200 | } |
| 201 | 201 | ||
| 202 | if (inode->i_ino == HFSPLUS_EXT_CNID) | ||
| 203 | return -EIO; | ||
| 204 | |||
| 202 | mutex_lock(&HFSPLUS_I(inode).extents_lock); | 205 | mutex_lock(&HFSPLUS_I(inode).extents_lock); |
| 203 | res = hfsplus_ext_read_extent(inode, ablock); | 206 | res = hfsplus_ext_read_extent(inode, ablock); |
| 204 | if (!res) { | 207 | if (!res) { |
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index b085d64a2b67..963be644297a 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c | |||
| @@ -254,6 +254,8 @@ static int hfsplus_file_open(struct inode *inode, struct file *file) | |||
| 254 | { | 254 | { |
| 255 | if (HFSPLUS_IS_RSRC(inode)) | 255 | if (HFSPLUS_IS_RSRC(inode)) |
| 256 | inode = HFSPLUS_I(inode).rsrc_inode; | 256 | inode = HFSPLUS_I(inode).rsrc_inode; |
| 257 | if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) | ||
| 258 | return -EOVERFLOW; | ||
| 257 | atomic_inc(&HFSPLUS_I(inode).opencnt); | 259 | atomic_inc(&HFSPLUS_I(inode).opencnt); |
| 258 | return 0; | 260 | return 0; |
| 259 | } | 261 | } |
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c index 9997cbf8beb5..9699c56d323f 100644 --- a/fs/hfsplus/options.c +++ b/fs/hfsplus/options.c | |||
| @@ -25,7 +25,7 @@ enum { | |||
| 25 | opt_force, opt_err | 25 | opt_force, opt_err |
| 26 | }; | 26 | }; |
| 27 | 27 | ||
| 28 | static match_table_t tokens = { | 28 | static const match_table_t tokens = { |
| 29 | { opt_creator, "creator=%s" }, | 29 | { opt_creator, "creator=%s" }, |
| 30 | { opt_type, "type=%s" }, | 30 | { opt_type, "type=%s" }, |
| 31 | { opt_umask, "umask=%o" }, | 31 | { opt_umask, "umask=%o" }, |
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index e834e578c93f..eb74531a0a8e 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c | |||
| @@ -356,7 +356,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) | |||
| 356 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { | 356 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_SOFTLOCK)) { |
| 357 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); | 357 | printk(KERN_WARNING "hfs: Filesystem is marked locked, mounting read-only.\n"); |
| 358 | sb->s_flags |= MS_RDONLY; | 358 | sb->s_flags |= MS_RDONLY; |
| 359 | } else if (vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) { | 359 | } else if ((vhdr->attributes & cpu_to_be32(HFSPLUS_VOL_JOURNALED)) && !(sb->s_flags & MS_RDONLY)) { |
| 360 | printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " | 360 | printk(KERN_WARNING "hfs: write access to a journaled filesystem is not supported, " |
| 361 | "use the force option at your own risk, mounting read-only.\n"); | 361 | "use the force option at your own risk, mounting read-only.\n"); |
| 362 | sb->s_flags |= MS_RDONLY; | 362 | sb->s_flags |= MS_RDONLY; |
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index b8ae9c90ada0..29ad461d568f 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c | |||
| @@ -215,7 +215,7 @@ enum { | |||
| 215 | Opt_timeshift, Opt_err, | 215 | Opt_timeshift, Opt_err, |
| 216 | }; | 216 | }; |
| 217 | 217 | ||
| 218 | static match_table_t tokens = { | 218 | static const match_table_t tokens = { |
| 219 | {Opt_help, "help"}, | 219 | {Opt_help, "help"}, |
| 220 | {Opt_uid, "uid=%u"}, | 220 | {Opt_uid, "uid=%u"}, |
| 221 | {Opt_gid, "gid=%u"}, | 221 | {Opt_gid, "gid=%u"}, |
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 3f58923fb39b..61edc701b0e6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c | |||
| @@ -57,7 +57,7 @@ enum { | |||
| 57 | Opt_err, | 57 | Opt_err, |
| 58 | }; | 58 | }; |
| 59 | 59 | ||
| 60 | static match_table_t tokens = { | 60 | static const match_table_t tokens = { |
| 61 | {Opt_size, "size=%s"}, | 61 | {Opt_size, "size=%s"}, |
| 62 | {Opt_nr_inodes, "nr_inodes=%s"}, | 62 | {Opt_nr_inodes, "nr_inodes=%s"}, |
| 63 | {Opt_mode, "mode=%o"}, | 63 | {Opt_mode, "mode=%o"}, |
diff --git a/fs/ioctl.c b/fs/ioctl.c index 7db32b3382d3..d152856c371b 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c | |||
| @@ -13,9 +13,14 @@ | |||
| 13 | #include <linux/security.h> | 13 | #include <linux/security.h> |
| 14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
| 15 | #include <linux/uaccess.h> | 15 | #include <linux/uaccess.h> |
| 16 | #include <linux/writeback.h> | ||
| 17 | #include <linux/buffer_head.h> | ||
| 16 | 18 | ||
| 17 | #include <asm/ioctls.h> | 19 | #include <asm/ioctls.h> |
| 18 | 20 | ||
| 21 | /* So that the fiemap access checks can't overflow on 32 bit machines. */ | ||
| 22 | #define FIEMAP_MAX_EXTENTS (UINT_MAX / sizeof(struct fiemap_extent)) | ||
| 23 | |||
| 19 | /** | 24 | /** |
| 20 | * vfs_ioctl - call filesystem specific ioctl methods | 25 | * vfs_ioctl - call filesystem specific ioctl methods |
| 21 | * @filp: open file to invoke ioctl method on | 26 | * @filp: open file to invoke ioctl method on |
| @@ -71,6 +76,276 @@ static int ioctl_fibmap(struct file *filp, int __user *p) | |||
| 71 | return put_user(res, p); | 76 | return put_user(res, p); |
| 72 | } | 77 | } |
| 73 | 78 | ||
| 79 | /** | ||
| 80 | * fiemap_fill_next_extent - Fiemap helper function | ||
| 81 | * @fieinfo: Fiemap context passed into ->fiemap | ||
| 82 | * @logical: Extent logical start offset, in bytes | ||
| 83 | * @phys: Extent physical start offset, in bytes | ||
| 84 | * @len: Extent length, in bytes | ||
| 85 | * @flags: FIEMAP_EXTENT flags that describe this extent | ||
| 86 | * | ||
| 87 | * Called from file system ->fiemap callback. Will populate extent | ||
| 88 | * info as passed in via arguments and copy to user memory. On | ||
| 89 | * success, extent count on fieinfo is incremented. | ||
| 90 | * | ||
| 91 | * Returns 0 on success, -errno on error, 1 if this was the last | ||
| 92 | * extent that will fit in user array. | ||
| 93 | */ | ||
| 94 | #define SET_UNKNOWN_FLAGS (FIEMAP_EXTENT_DELALLOC) | ||
| 95 | #define SET_NO_UNMOUNTED_IO_FLAGS (FIEMAP_EXTENT_DATA_ENCRYPTED) | ||
| 96 | #define SET_NOT_ALIGNED_FLAGS (FIEMAP_EXTENT_DATA_TAIL|FIEMAP_EXTENT_DATA_INLINE) | ||
| 97 | int fiemap_fill_next_extent(struct fiemap_extent_info *fieinfo, u64 logical, | ||
| 98 | u64 phys, u64 len, u32 flags) | ||
| 99 | { | ||
| 100 | struct fiemap_extent extent; | ||
| 101 | struct fiemap_extent *dest = fieinfo->fi_extents_start; | ||
| 102 | |||
| 103 | /* only count the extents */ | ||
| 104 | if (fieinfo->fi_extents_max == 0) { | ||
| 105 | fieinfo->fi_extents_mapped++; | ||
| 106 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
| 107 | } | ||
| 108 | |||
| 109 | if (fieinfo->fi_extents_mapped >= fieinfo->fi_extents_max) | ||
| 110 | return 1; | ||
| 111 | |||
| 112 | if (flags & SET_UNKNOWN_FLAGS) | ||
| 113 | flags |= FIEMAP_EXTENT_UNKNOWN; | ||
| 114 | if (flags & SET_NO_UNMOUNTED_IO_FLAGS) | ||
| 115 | flags |= FIEMAP_EXTENT_ENCODED; | ||
| 116 | if (flags & SET_NOT_ALIGNED_FLAGS) | ||
| 117 | flags |= FIEMAP_EXTENT_NOT_ALIGNED; | ||
| 118 | |||
| 119 | memset(&extent, 0, sizeof(extent)); | ||
| 120 | extent.fe_logical = logical; | ||
| 121 | extent.fe_physical = phys; | ||
| 122 | extent.fe_length = len; | ||
| 123 | extent.fe_flags = flags; | ||
| 124 | |||
| 125 | dest += fieinfo->fi_extents_mapped; | ||
| 126 | if (copy_to_user(dest, &extent, sizeof(extent))) | ||
| 127 | return -EFAULT; | ||
| 128 | |||
| 129 | fieinfo->fi_extents_mapped++; | ||
| 130 | if (fieinfo->fi_extents_mapped == fieinfo->fi_extents_max) | ||
| 131 | return 1; | ||
| 132 | return (flags & FIEMAP_EXTENT_LAST) ? 1 : 0; | ||
| 133 | } | ||
| 134 | EXPORT_SYMBOL(fiemap_fill_next_extent); | ||
| 135 | |||
| 136 | /** | ||
| 137 | * fiemap_check_flags - check validity of requested flags for fiemap | ||
| 138 | * @fieinfo: Fiemap context passed into ->fiemap | ||
| 139 | * @fs_flags: Set of fiemap flags that the file system understands | ||
| 140 | * | ||
| 141 | * Called from file system ->fiemap callback. This will compute the | ||
| 142 | * intersection of valid fiemap flags and those that the fs supports. That | ||
| 143 | * value is then compared against the user supplied flags. In case of bad user | ||
| 144 | * flags, the invalid values will be written into the fieinfo structure, and | ||
| 145 | * -EBADR is returned, which tells ioctl_fiemap() to return those values to | ||
| 146 | * userspace. For this reason, a return code of -EBADR should be preserved. | ||
| 147 | * | ||
| 148 | * Returns 0 on success, -EBADR on bad flags. | ||
| 149 | */ | ||
| 150 | int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags) | ||
| 151 | { | ||
| 152 | u32 incompat_flags; | ||
| 153 | |||
| 154 | incompat_flags = fieinfo->fi_flags & ~(FIEMAP_FLAGS_COMPAT & fs_flags); | ||
| 155 | if (incompat_flags) { | ||
| 156 | fieinfo->fi_flags = incompat_flags; | ||
| 157 | return -EBADR; | ||
| 158 | } | ||
| 159 | return 0; | ||
| 160 | } | ||
| 161 | EXPORT_SYMBOL(fiemap_check_flags); | ||
| 162 | |||
| 163 | static int fiemap_check_ranges(struct super_block *sb, | ||
| 164 | u64 start, u64 len, u64 *new_len) | ||
| 165 | { | ||
| 166 | *new_len = len; | ||
| 167 | |||
| 168 | if (len == 0) | ||
| 169 | return -EINVAL; | ||
| 170 | |||
| 171 | if (start > sb->s_maxbytes) | ||
| 172 | return -EFBIG; | ||
| 173 | |||
| 174 | /* | ||
| 175 | * Shrink request scope to what the fs can actually handle. | ||
| 176 | */ | ||
| 177 | if ((len > sb->s_maxbytes) || | ||
| 178 | (sb->s_maxbytes - len) < start) | ||
| 179 | *new_len = sb->s_maxbytes - start; | ||
| 180 | |||
| 181 | return 0; | ||
| 182 | } | ||
| 183 | |||
| 184 | static int ioctl_fiemap(struct file *filp, unsigned long arg) | ||
| 185 | { | ||
| 186 | struct fiemap fiemap; | ||
| 187 | struct fiemap_extent_info fieinfo = { 0, }; | ||
| 188 | struct inode *inode = filp->f_path.dentry->d_inode; | ||
| 189 | struct super_block *sb = inode->i_sb; | ||
| 190 | u64 len; | ||
| 191 | int error; | ||
| 192 | |||
| 193 | if (!inode->i_op->fiemap) | ||
| 194 | return -EOPNOTSUPP; | ||
| 195 | |||
| 196 | if (copy_from_user(&fiemap, (struct fiemap __user *)arg, | ||
| 197 | sizeof(struct fiemap))) | ||
| 198 | return -EFAULT; | ||
| 199 | |||
| 200 | if (fiemap.fm_extent_count > FIEMAP_MAX_EXTENTS) | ||
| 201 | return -EINVAL; | ||
| 202 | |||
| 203 | error = fiemap_check_ranges(sb, fiemap.fm_start, fiemap.fm_length, | ||
| 204 | &len); | ||
| 205 | if (error) | ||
| 206 | return error; | ||
| 207 | |||
| 208 | fieinfo.fi_flags = fiemap.fm_flags; | ||
| 209 | fieinfo.fi_extents_max = fiemap.fm_extent_count; | ||
| 210 | fieinfo.fi_extents_start = (struct fiemap_extent *)(arg + sizeof(fiemap)); | ||
| 211 | |||
| 212 | if (fiemap.fm_extent_count != 0 && | ||
| 213 | !access_ok(VERIFY_WRITE, fieinfo.fi_extents_start, | ||
| 214 | fieinfo.fi_extents_max * sizeof(struct fiemap_extent))) | ||
| 215 | return -EFAULT; | ||
| 216 | |||
| 217 | if (fieinfo.fi_flags & FIEMAP_FLAG_SYNC) | ||
| 218 | filemap_write_and_wait(inode->i_mapping); | ||
| 219 | |||
| 220 | error = inode->i_op->fiemap(inode, &fieinfo, fiemap.fm_start, len); | ||
| 221 | fiemap.fm_flags = fieinfo.fi_flags; | ||
| 222 | fiemap.fm_mapped_extents = fieinfo.fi_extents_mapped; | ||
| 223 | if (copy_to_user((char *)arg, &fiemap, sizeof(fiemap))) | ||
| 224 | error = -EFAULT; | ||
| 225 | |||
| 226 | return error; | ||
| 227 | } | ||
| 228 | |||
| 229 | #ifdef CONFIG_BLOCK | ||
| 230 | |||
| 231 | #define blk_to_logical(inode, blk) (blk << (inode)->i_blkbits) | ||
| 232 | #define logical_to_blk(inode, offset) (offset >> (inode)->i_blkbits); | ||
| 233 | |||
| 234 | /* | ||
| 235 | * @inode - the inode to map | ||
| 236 | * @arg - the pointer to userspace where we copy everything to | ||
| 237 | * @get_block - the fs's get_block function | ||
| 238 | * | ||
| 239 | * This does FIEMAP for block based inodes. Basically it will just loop | ||
| 240 | * through get_block until we hit the number of extents we want to map, or we | ||
| 241 | * go past the end of the file and hit a hole. | ||
| 242 | * | ||
| 243 | * If it is possible to have data blocks beyond a hole past @inode->i_size, then | ||
| 244 | * please do not use this function, it will stop at the first unmapped block | ||
| 245 | * beyond i_size | ||
| 246 | */ | ||
| 247 | int generic_block_fiemap(struct inode *inode, | ||
| 248 | struct fiemap_extent_info *fieinfo, u64 start, | ||
| 249 | u64 len, get_block_t *get_block) | ||
| 250 | { | ||
| 251 | struct buffer_head tmp; | ||
| 252 | unsigned int start_blk; | ||
| 253 | long long length = 0, map_len = 0; | ||
| 254 | u64 logical = 0, phys = 0, size = 0; | ||
| 255 | u32 flags = FIEMAP_EXTENT_MERGED; | ||
| 256 | int ret = 0; | ||
| 257 | |||
| 258 | if ((ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC))) | ||
| 259 | return ret; | ||
| 260 | |||
| 261 | start_blk = logical_to_blk(inode, start); | ||
| 262 | |||
| 263 | /* guard against change */ | ||
| 264 | mutex_lock(&inode->i_mutex); | ||
| 265 | |||
| 266 | length = (long long)min_t(u64, len, i_size_read(inode)); | ||
| 267 | map_len = length; | ||
| 268 | |||
| 269 | do { | ||
| 270 | /* | ||
| 271 | * we set b_size to the total size we want so it will map as | ||
| 272 | * many contiguous blocks as possible at once | ||
| 273 | */ | ||
| 274 | memset(&tmp, 0, sizeof(struct buffer_head)); | ||
| 275 | tmp.b_size = map_len; | ||
| 276 | |||
| 277 | ret = get_block(inode, start_blk, &tmp, 0); | ||
| 278 | if (ret) | ||
| 279 | break; | ||
| 280 | |||
| 281 | /* HOLE */ | ||
| 282 | if (!buffer_mapped(&tmp)) { | ||
| 283 | /* | ||
| 284 | * first hole after going past the EOF, this is our | ||
| 285 | * last extent | ||
| 286 | */ | ||
| 287 | if (length <= 0) { | ||
| 288 | flags = FIEMAP_EXTENT_MERGED|FIEMAP_EXTENT_LAST; | ||
| 289 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
| 290 | phys, size, | ||
| 291 | flags); | ||
| 292 | break; | ||
| 293 | } | ||
| 294 | |||
| 295 | length -= blk_to_logical(inode, 1); | ||
| 296 | |||
| 297 | /* if we have holes up to/past EOF then we're done */ | ||
| 298 | if (length <= 0) | ||
| 299 | break; | ||
| 300 | |||
| 301 | start_blk++; | ||
| 302 | } else { | ||
| 303 | if (length <= 0 && size) { | ||
| 304 | ret = fiemap_fill_next_extent(fieinfo, logical, | ||
| 305 | phys, size, | ||
| 306 | flags); | ||
| 307 | if (ret) | ||
| 308 | break; | ||
| 309 | } | ||
| 310 | |||
| 311 | logical = blk_to_logical(inode, start_blk); | ||
| 312 | phys = blk_to_logical(inode, tmp.b_blocknr); | ||
| 313 | size = tmp.b_size; | ||
| 314 | flags = FIEMAP_EXTENT_MERGED; | ||
| 315 | |||
| 316 | length -= tmp.b_size; | ||
| 317 | start_blk += logical_to_blk(inode, size); | ||
| 318 | |||
| 319 | /* | ||
| 320 | * if we are past the EOF we need to loop again to see | ||
| 321 | * if there is a hole so we can mark this extent as the | ||
| 322 | * last one, and if not keep mapping things until we | ||
| 323 | * find a hole, or we run out of slots in the extent | ||
| 324 | * array | ||
| 325 | */ | ||
| 326 | if (length <= 0) | ||
| 327 | continue; | ||
| 328 | |||
| 329 | ret = fiemap_fill_next_extent(fieinfo, logical, phys, | ||
| 330 | size, flags); | ||
| 331 | if (ret) | ||
| 332 | break; | ||
| 333 | } | ||
| 334 | cond_resched(); | ||
| 335 | } while (1); | ||
| 336 | |||
| 337 | mutex_unlock(&inode->i_mutex); | ||
| 338 | |||
| 339 | /* if ret is 1 then we just hit the end of the extent array */ | ||
| 340 | if (ret == 1) | ||
| 341 | ret = 0; | ||
| 342 | |||
| 343 | return ret; | ||
| 344 | } | ||
| 345 | EXPORT_SYMBOL(generic_block_fiemap); | ||
| 346 | |||
| 347 | #endif /* CONFIG_BLOCK */ | ||
| 348 | |||
| 74 | static int file_ioctl(struct file *filp, unsigned int cmd, | 349 | static int file_ioctl(struct file *filp, unsigned int cmd, |
| 75 | unsigned long arg) | 350 | unsigned long arg) |
| 76 | { | 351 | { |
| @@ -80,6 +355,8 @@ static int file_ioctl(struct file *filp, unsigned int cmd, | |||
| 80 | switch (cmd) { | 355 | switch (cmd) { |
| 81 | case FIBMAP: | 356 | case FIBMAP: |
| 82 | return ioctl_fibmap(filp, p); | 357 | return ioctl_fibmap(filp, p); |
| 358 | case FS_IOC_FIEMAP: | ||
| 359 | return ioctl_fiemap(filp, arg); | ||
| 83 | case FIGETBSZ: | 360 | case FIGETBSZ: |
| 84 | return put_user(inode->i_sb->s_blocksize, p); | 361 | return put_user(inode->i_sb->s_blocksize, p); |
| 85 | case FIONREAD: | 362 | case FIONREAD: |
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 26948a6033b6..3f8af0f1505b 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c | |||
| @@ -310,7 +310,7 @@ enum { | |||
| 310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, | 310 | Opt_nocompress, Opt_hide, Opt_showassoc, Opt_dmode, |
| 311 | }; | 311 | }; |
| 312 | 312 | ||
| 313 | static match_table_t tokens = { | 313 | static const match_table_t tokens = { |
| 314 | {Opt_norock, "norock"}, | 314 | {Opt_norock, "norock"}, |
| 315 | {Opt_nojoliet, "nojoliet"}, | 315 | {Opt_nojoliet, "nojoliet"}, |
| 316 | {Opt_unhide, "unhide"}, | 316 | {Opt_unhide, "unhide"}, |
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig new file mode 100644 index 000000000000..4e28beeed157 --- /dev/null +++ b/fs/jbd/Kconfig | |||
| @@ -0,0 +1,30 @@ | |||
| 1 | config JBD | ||
| 2 | tristate | ||
| 3 | help | ||
| 4 | This is a generic journalling layer for block devices. It is | ||
| 5 | currently used by the ext3 file system, but it could also be | ||
| 6 | used to add journal support to other file systems or block | ||
| 7 | devices such as RAID or LVM. | ||
| 8 | |||
| 9 | If you are using the ext3 file system, you need to say Y here. | ||
| 10 | If you are not using ext3 then you will probably want to say N. | ||
| 11 | |||
| 12 | To compile this device as a module, choose M here: the module will be | ||
| 13 | called jbd. If you are compiling ext3 into the kernel, you | ||
| 14 | cannot compile this code as a module. | ||
| 15 | |||
| 16 | config JBD_DEBUG | ||
| 17 | bool "JBD (ext3) debugging support" | ||
| 18 | depends on JBD && DEBUG_FS | ||
| 19 | help | ||
| 20 | If you are using the ext3 journaled file system (or potentially any | ||
| 21 | other file system/device using JBD), this option allows you to | ||
| 22 | enable debugging output while the system is running, in order to | ||
| 23 | help track down any problems you are having. By default the | ||
| 24 | debugging output will be turned off. | ||
| 25 | |||
| 26 | If you select Y here, then you will be able to turn on debugging | ||
| 27 | with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a | ||
| 28 | number between 1 and 5, the higher the number, the more debugging | ||
| 29 | output is generated. To turn debugging off again, do | ||
| 30 | "echo 0 > /sys/kernel/debug/jbd/jbd-debug". | ||
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index ae08c057e751..25719d902c51 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c | |||
| @@ -482,6 +482,8 @@ void journal_commit_transaction(journal_t *journal) | |||
| 482 | printk(KERN_WARNING | 482 | printk(KERN_WARNING |
| 483 | "JBD: Detected IO errors while flushing file data " | 483 | "JBD: Detected IO errors while flushing file data " |
| 484 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 484 | "on %s\n", bdevname(journal->j_fs_dev, b)); |
| 485 | if (journal->j_flags & JFS_ABORT_ON_SYNCDATA_ERR) | ||
| 486 | journal_abort(journal, err); | ||
| 485 | err = 0; | 487 | err = 0; |
| 486 | } | 488 | } |
| 487 | 489 | ||
| @@ -518,9 +520,10 @@ void journal_commit_transaction(journal_t *journal) | |||
| 518 | jh = commit_transaction->t_buffers; | 520 | jh = commit_transaction->t_buffers; |
| 519 | 521 | ||
| 520 | /* If we're in abort mode, we just un-journal the buffer and | 522 | /* If we're in abort mode, we just un-journal the buffer and |
| 521 | release it for background writing. */ | 523 | release it. */ |
| 522 | 524 | ||
| 523 | if (is_journal_aborted(journal)) { | 525 | if (is_journal_aborted(journal)) { |
| 526 | clear_buffer_jbddirty(jh2bh(jh)); | ||
| 524 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 527 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
| 525 | journal_refile_buffer(journal, jh); | 528 | journal_refile_buffer(journal, jh); |
| 526 | /* If that was the last one, we need to clean up | 529 | /* If that was the last one, we need to clean up |
| @@ -762,6 +765,9 @@ wait_for_iobuf: | |||
| 762 | /* AKPM: bforget here */ | 765 | /* AKPM: bforget here */ |
| 763 | } | 766 | } |
| 764 | 767 | ||
| 768 | if (err) | ||
| 769 | journal_abort(journal, err); | ||
| 770 | |||
| 765 | jbd_debug(3, "JBD: commit phase 6\n"); | 771 | jbd_debug(3, "JBD: commit phase 6\n"); |
| 766 | 772 | ||
| 767 | if (journal_write_commit_record(journal, commit_transaction)) | 773 | if (journal_write_commit_record(journal, commit_transaction)) |
| @@ -852,6 +858,8 @@ restart_loop: | |||
| 852 | if (buffer_jbddirty(bh)) { | 858 | if (buffer_jbddirty(bh)) { |
| 853 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 859 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
| 854 | __journal_insert_checkpoint(jh, commit_transaction); | 860 | __journal_insert_checkpoint(jh, commit_transaction); |
| 861 | if (is_journal_aborted(journal)) | ||
| 862 | clear_buffer_jbddirty(bh); | ||
| 855 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 863 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
| 856 | __journal_refile_buffer(jh); | 864 | __journal_refile_buffer(jh); |
| 857 | jbd_unlock_bh_state(bh); | 865 | jbd_unlock_bh_state(bh); |
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 0540ca27a446..d15cd6e7251e 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c | |||
| @@ -954,9 +954,10 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
| 954 | journal_t *journal = handle->h_transaction->t_journal; | 954 | journal_t *journal = handle->h_transaction->t_journal; |
| 955 | int need_brelse = 0; | 955 | int need_brelse = 0; |
| 956 | struct journal_head *jh; | 956 | struct journal_head *jh; |
| 957 | int ret = 0; | ||
| 957 | 958 | ||
| 958 | if (is_handle_aborted(handle)) | 959 | if (is_handle_aborted(handle)) |
| 959 | return 0; | 960 | return ret; |
| 960 | 961 | ||
| 961 | jh = journal_add_journal_head(bh); | 962 | jh = journal_add_journal_head(bh); |
| 962 | JBUFFER_TRACE(jh, "entry"); | 963 | JBUFFER_TRACE(jh, "entry"); |
| @@ -1067,7 +1068,16 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) | |||
| 1067 | time if it is redirtied */ | 1068 | time if it is redirtied */ |
| 1068 | } | 1069 | } |
| 1069 | 1070 | ||
| 1070 | /* journal_clean_data_list() may have got there first */ | 1071 | /* |
| 1072 | * We cannot remove the buffer with io error from the | ||
| 1073 | * committing transaction, because otherwise it would | ||
| 1074 | * miss the error and the commit would not abort. | ||
| 1075 | */ | ||
| 1076 | if (unlikely(!buffer_uptodate(bh))) { | ||
| 1077 | ret = -EIO; | ||
| 1078 | goto no_journal; | ||
| 1079 | } | ||
| 1080 | |||
| 1071 | if (jh->b_transaction != NULL) { | 1081 | if (jh->b_transaction != NULL) { |
| 1072 | JBUFFER_TRACE(jh, "unfile from commit"); | 1082 | JBUFFER_TRACE(jh, "unfile from commit"); |
| 1073 | __journal_temp_unlink_buffer(jh); | 1083 | __journal_temp_unlink_buffer(jh); |
| @@ -1108,7 +1118,7 @@ no_journal: | |||
| 1108 | } | 1118 | } |
| 1109 | JBUFFER_TRACE(jh, "exit"); | 1119 | JBUFFER_TRACE(jh, "exit"); |
| 1110 | journal_put_journal_head(jh); | 1120 | journal_put_journal_head(jh); |
| 1111 | return 0; | 1121 | return ret; |
| 1112 | } | 1122 | } |
| 1113 | 1123 | ||
| 1114 | /** | 1124 | /** |
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig new file mode 100644 index 000000000000..f32f346f4b0a --- /dev/null +++ b/fs/jbd2/Kconfig | |||
| @@ -0,0 +1,33 @@ | |||
| 1 | config JBD2 | ||
| 2 | tristate | ||
| 3 | select CRC32 | ||
| 4 | help | ||
| 5 | This is a generic journaling layer for block devices that support | ||
| 6 | both 32-bit and 64-bit block numbers. It is currently used by | ||
| 7 | the ext4 and OCFS2 filesystems, but it could also be used to add | ||
| 8 | journal support to other file systems or block devices such | ||
| 9 | as RAID or LVM. | ||
| 10 | |||
| 11 | If you are using ext4 or OCFS2, you need to say Y here. | ||
| 12 | If you are not using ext4 or OCFS2 then you will | ||
| 13 | probably want to say N. | ||
| 14 | |||
| 15 | To compile this device as a module, choose M here. The module will be | ||
| 16 | called jbd2. If you are compiling ext4 or OCFS2 into the kernel, | ||
| 17 | you cannot compile this code as a module. | ||
| 18 | |||
| 19 | config JBD2_DEBUG | ||
| 20 | bool "JBD2 (ext4) debugging support" | ||
| 21 | depends on JBD2 && DEBUG_FS | ||
| 22 | help | ||
| 23 | If you are using the ext4 journaled file system (or | ||
| 24 | potentially any other filesystem/device using JBD2), this option | ||
| 25 | allows you to enable debugging output while the system is running, | ||
| 26 | in order to help track down any problems you are having. | ||
| 27 | By default, the debugging output will be turned off. | ||
| 28 | |||
| 29 | If you select Y here, then you will be able to turn on debugging | ||
| 30 | with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a | ||
| 31 | number between 1 and 5. The higher the number, the more debugging | ||
| 32 | output is generated. To turn debugging off again, do | ||
| 33 | "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". | ||
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8a..9203c3332f17 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c | |||
| @@ -20,6 +20,7 @@ | |||
| 20 | #include <linux/time.h> | 20 | #include <linux/time.h> |
| 21 | #include <linux/fs.h> | 21 | #include <linux/fs.h> |
| 22 | #include <linux/jbd2.h> | 22 | #include <linux/jbd2.h> |
| 23 | #include <linux/marker.h> | ||
| 23 | #include <linux/errno.h> | 24 | #include <linux/errno.h> |
| 24 | #include <linux/slab.h> | 25 | #include <linux/slab.h> |
| 25 | 26 | ||
| @@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) | |||
| 93 | int ret = 0; | 94 | int ret = 0; |
| 94 | struct buffer_head *bh = jh2bh(jh); | 95 | struct buffer_head *bh = jh2bh(jh); |
| 95 | 96 | ||
| 96 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { | 97 | if (jh->b_jlist == BJ_None && !buffer_locked(bh) && |
| 98 | !buffer_dirty(bh) && !buffer_write_io_error(bh)) { | ||
| 97 | JBUFFER_TRACE(jh, "remove from checkpoint list"); | 99 | JBUFFER_TRACE(jh, "remove from checkpoint list"); |
| 98 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; | 100 | ret = __jbd2_journal_remove_checkpoint(jh) + 1; |
| 99 | jbd_unlock_bh_state(bh); | 101 | jbd_unlock_bh_state(bh); |
| @@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) | |||
| 126 | 128 | ||
| 127 | /* | 129 | /* |
| 128 | * Test again, another process may have checkpointed while we | 130 | * Test again, another process may have checkpointed while we |
| 129 | * were waiting for the checkpoint lock | 131 | * were waiting for the checkpoint lock. If there are no |
| 132 | * outstanding transactions there is nothing to checkpoint and | ||
| 133 | * we can't make progress. Abort the journal in this case. | ||
| 130 | */ | 134 | */ |
| 131 | spin_lock(&journal->j_state_lock); | 135 | spin_lock(&journal->j_state_lock); |
| 136 | spin_lock(&journal->j_list_lock); | ||
| 132 | nblocks = jbd_space_needed(journal); | 137 | nblocks = jbd_space_needed(journal); |
| 133 | if (__jbd2_log_space_left(journal) < nblocks) { | 138 | if (__jbd2_log_space_left(journal) < nblocks) { |
| 139 | int chkpt = journal->j_checkpoint_transactions != NULL; | ||
| 140 | |||
| 141 | spin_unlock(&journal->j_list_lock); | ||
| 134 | spin_unlock(&journal->j_state_lock); | 142 | spin_unlock(&journal->j_state_lock); |
| 135 | jbd2_log_do_checkpoint(journal); | 143 | if (chkpt) { |
| 144 | jbd2_log_do_checkpoint(journal); | ||
| 145 | } else { | ||
| 146 | printk(KERN_ERR "%s: no transactions\n", | ||
| 147 | __func__); | ||
| 148 | jbd2_journal_abort(journal, 0); | ||
| 149 | } | ||
| 150 | |||
| 136 | spin_lock(&journal->j_state_lock); | 151 | spin_lock(&journal->j_state_lock); |
| 152 | } else { | ||
| 153 | spin_unlock(&journal->j_list_lock); | ||
| 137 | } | 154 | } |
| 138 | mutex_unlock(&journal->j_checkpoint_mutex); | 155 | mutex_unlock(&journal->j_checkpoint_mutex); |
| 139 | } | 156 | } |
| @@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) | |||
| 160 | * buffers. Note that we take the buffers in the opposite ordering | 177 | * buffers. Note that we take the buffers in the opposite ordering |
| 161 | * from the one in which they were submitted for IO. | 178 | * from the one in which they were submitted for IO. |
| 162 | * | 179 | * |
| 180 | * Return 0 on success, and return <0 if some buffers have failed | ||
| 181 | * to be written out. | ||
| 182 | * | ||
| 163 | * Called with j_list_lock held. | 183 | * Called with j_list_lock held. |
| 164 | */ | 184 | */ |
| 165 | static void __wait_cp_io(journal_t *journal, transaction_t *transaction) | 185 | static int __wait_cp_io(journal_t *journal, transaction_t *transaction) |
| 166 | { | 186 | { |
| 167 | struct journal_head *jh; | 187 | struct journal_head *jh; |
| 168 | struct buffer_head *bh; | 188 | struct buffer_head *bh; |
| 169 | tid_t this_tid; | 189 | tid_t this_tid; |
| 170 | int released = 0; | 190 | int released = 0; |
| 191 | int ret = 0; | ||
| 171 | 192 | ||
| 172 | this_tid = transaction->t_tid; | 193 | this_tid = transaction->t_tid; |
| 173 | restart: | 194 | restart: |
| 174 | /* Did somebody clean up the transaction in the meanwhile? */ | 195 | /* Did somebody clean up the transaction in the meanwhile? */ |
| 175 | if (journal->j_checkpoint_transactions != transaction || | 196 | if (journal->j_checkpoint_transactions != transaction || |
| 176 | transaction->t_tid != this_tid) | 197 | transaction->t_tid != this_tid) |
| 177 | return; | 198 | return ret; |
| 178 | while (!released && transaction->t_checkpoint_io_list) { | 199 | while (!released && transaction->t_checkpoint_io_list) { |
| 179 | jh = transaction->t_checkpoint_io_list; | 200 | jh = transaction->t_checkpoint_io_list; |
| 180 | bh = jh2bh(jh); | 201 | bh = jh2bh(jh); |
| @@ -194,6 +215,9 @@ restart: | |||
| 194 | spin_lock(&journal->j_list_lock); | 215 | spin_lock(&journal->j_list_lock); |
| 195 | goto restart; | 216 | goto restart; |
| 196 | } | 217 | } |
| 218 | if (unlikely(buffer_write_io_error(bh))) | ||
| 219 | ret = -EIO; | ||
| 220 | |||
| 197 | /* | 221 | /* |
| 198 | * Now in whatever state the buffer currently is, we know that | 222 | * Now in whatever state the buffer currently is, we know that |
| 199 | * it has been written out and so we can drop it from the list | 223 | * it has been written out and so we can drop it from the list |
| @@ -203,6 +227,8 @@ restart: | |||
| 203 | jbd2_journal_remove_journal_head(bh); | 227 | jbd2_journal_remove_journal_head(bh); |
| 204 | __brelse(bh); | 228 | __brelse(bh); |
| 205 | } | 229 | } |
| 230 | |||
| 231 | return ret; | ||
| 206 | } | 232 | } |
| 207 | 233 | ||
| 208 | #define NR_BATCH 64 | 234 | #define NR_BATCH 64 |
| @@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) | |||
| 226 | * Try to flush one buffer from the checkpoint list to disk. | 252 | * Try to flush one buffer from the checkpoint list to disk. |
| 227 | * | 253 | * |
| 228 | * Return 1 if something happened which requires us to abort the current | 254 | * Return 1 if something happened which requires us to abort the current |
| 229 | * scan of the checkpoint list. | 255 | * scan of the checkpoint list. Return <0 if the buffer has failed to |
| 256 | * be written out. | ||
| 230 | * | 257 | * |
| 231 | * Called with j_list_lock held and drops it if 1 is returned | 258 | * Called with j_list_lock held and drops it if 1 is returned |
| 232 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it | 259 | * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it |
| @@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 258 | jbd2_log_wait_commit(journal, tid); | 285 | jbd2_log_wait_commit(journal, tid); |
| 259 | ret = 1; | 286 | ret = 1; |
| 260 | } else if (!buffer_dirty(bh)) { | 287 | } else if (!buffer_dirty(bh)) { |
| 288 | ret = 1; | ||
| 289 | if (unlikely(buffer_write_io_error(bh))) | ||
| 290 | ret = -EIO; | ||
| 261 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); | 291 | J_ASSERT_JH(jh, !buffer_jbddirty(bh)); |
| 262 | BUFFER_TRACE(bh, "remove from checkpoint"); | 292 | BUFFER_TRACE(bh, "remove from checkpoint"); |
| 263 | __jbd2_journal_remove_checkpoint(jh); | 293 | __jbd2_journal_remove_checkpoint(jh); |
| @@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 265 | jbd_unlock_bh_state(bh); | 295 | jbd_unlock_bh_state(bh); |
| 266 | jbd2_journal_remove_journal_head(bh); | 296 | jbd2_journal_remove_journal_head(bh); |
| 267 | __brelse(bh); | 297 | __brelse(bh); |
| 268 | ret = 1; | ||
| 269 | } else { | 298 | } else { |
| 270 | /* | 299 | /* |
| 271 | * Important: we are about to write the buffer, and | 300 | * Important: we are about to write the buffer, and |
| @@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, | |||
| 298 | * to disk. We submit larger chunks of data at once. | 327 | * to disk. We submit larger chunks of data at once. |
| 299 | * | 328 | * |
| 300 | * The journal should be locked before calling this function. | 329 | * The journal should be locked before calling this function. |
| 330 | * Called with j_checkpoint_mutex held. | ||
| 301 | */ | 331 | */ |
| 302 | int jbd2_log_do_checkpoint(journal_t *journal) | 332 | int jbd2_log_do_checkpoint(journal_t *journal) |
| 303 | { | 333 | { |
| @@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
| 313 | * journal straight away. | 343 | * journal straight away. |
| 314 | */ | 344 | */ |
| 315 | result = jbd2_cleanup_journal_tail(journal); | 345 | result = jbd2_cleanup_journal_tail(journal); |
| 346 | trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", | ||
| 347 | journal->j_devname, result); | ||
| 316 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); | 348 | jbd_debug(1, "cleanup_journal_tail returned %d\n", result); |
| 317 | if (result <= 0) | 349 | if (result <= 0) |
| 318 | return result; | 350 | return result; |
| @@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) | |||
| 321 | * OK, we need to start writing disk blocks. Take one transaction | 353 | * OK, we need to start writing disk blocks. Take one transaction |
| 322 | * and write it. | 354 | * and write it. |
| 323 | */ | 355 | */ |
| 356 | result = 0; | ||
| 324 | spin_lock(&journal->j_list_lock); | 357 | spin_lock(&journal->j_list_lock); |
| 325 | if (!journal->j_checkpoint_transactions) | 358 | if (!journal->j_checkpoint_transactions) |
| 326 | goto out; | 359 | goto out; |
| @@ -339,7 +372,7 @@ restart: | |||
| 339 | int batch_count = 0; | 372 | int batch_count = 0; |
| 340 | struct buffer_head *bhs[NR_BATCH]; | 373 | struct buffer_head *bhs[NR_BATCH]; |
| 341 | struct journal_head *jh; | 374 | struct journal_head *jh; |
| 342 | int retry = 0; | 375 | int retry = 0, err; |
| 343 | 376 | ||
| 344 | while (!retry && transaction->t_checkpoint_list) { | 377 | while (!retry && transaction->t_checkpoint_list) { |
| 345 | struct buffer_head *bh; | 378 | struct buffer_head *bh; |
| @@ -353,6 +386,8 @@ restart: | |||
| 353 | } | 386 | } |
| 354 | retry = __process_buffer(journal, jh, bhs, &batch_count, | 387 | retry = __process_buffer(journal, jh, bhs, &batch_count, |
| 355 | transaction); | 388 | transaction); |
| 389 | if (retry < 0 && !result) | ||
| 390 | result = retry; | ||
| 356 | if (!retry && (need_resched() || | 391 | if (!retry && (need_resched() || |
| 357 | spin_needbreak(&journal->j_list_lock))) { | 392 | spin_needbreak(&journal->j_list_lock))) { |
| 358 | spin_unlock(&journal->j_list_lock); | 393 | spin_unlock(&journal->j_list_lock); |
| @@ -377,14 +412,18 @@ restart: | |||
| 377 | * Now we have cleaned up the first transaction's checkpoint | 412 | * Now we have cleaned up the first transaction's checkpoint |
| 378 | * list. Let's clean up the second one | 413 | * list. Let's clean up the second one |
| 379 | */ | 414 | */ |
| 380 | __wait_cp_io(journal, transaction); | 415 | err = __wait_cp_io(journal, transaction); |
| 416 | if (!result) | ||
| 417 | result = err; | ||
| 381 | } | 418 | } |
| 382 | out: | 419 | out: |
| 383 | spin_unlock(&journal->j_list_lock); | 420 | spin_unlock(&journal->j_list_lock); |
| 384 | result = jbd2_cleanup_journal_tail(journal); | ||
| 385 | if (result < 0) | 421 | if (result < 0) |
| 386 | return result; | 422 | jbd2_journal_abort(journal, result); |
| 387 | return 0; | 423 | else |
| 424 | result = jbd2_cleanup_journal_tail(journal); | ||
| 425 | |||
| 426 | return (result < 0) ? result : 0; | ||
| 388 | } | 427 | } |
| 389 | 428 | ||
| 390 | /* | 429 | /* |
| @@ -400,8 +439,9 @@ out: | |||
| 400 | * This is the only part of the journaling code which really needs to be | 439 | * This is the only part of the journaling code which really needs to be |
| 401 | * aware of transaction aborts. Checkpointing involves writing to the | 440 | * aware of transaction aborts. Checkpointing involves writing to the |
| 402 | * main filesystem area rather than to the journal, so it can proceed | 441 | * main filesystem area rather than to the journal, so it can proceed |
| 403 | * even in abort state, but we must not update the journal superblock if | 442 | * even in abort state, but we must not update the super block if |
| 404 | * we have an abort error outstanding. | 443 | * checkpointing may have failed. Otherwise, we would lose some metadata |
| 444 | * buffers which should be written-back to the filesystem. | ||
| 405 | */ | 445 | */ |
| 406 | 446 | ||
| 407 | int jbd2_cleanup_journal_tail(journal_t *journal) | 447 | int jbd2_cleanup_journal_tail(journal_t *journal) |
| @@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) | |||
| 410 | tid_t first_tid; | 450 | tid_t first_tid; |
| 411 | unsigned long blocknr, freed; | 451 | unsigned long blocknr, freed; |
| 412 | 452 | ||
| 453 | if (is_journal_aborted(journal)) | ||
| 454 | return 1; | ||
| 455 | |||
| 413 | /* OK, work out the oldest transaction remaining in the log, and | 456 | /* OK, work out the oldest transaction remaining in the log, and |
| 414 | * the log block it starts at. | 457 | * the log block it starts at. |
| 415 | * | 458 | * |
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95ec..8b119e16aa36 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/time.h> | 16 | #include <linux/time.h> |
| 17 | #include <linux/fs.h> | 17 | #include <linux/fs.h> |
| 18 | #include <linux/jbd2.h> | 18 | #include <linux/jbd2.h> |
| 19 | #include <linux/marker.h> | ||
| 19 | #include <linux/errno.h> | 20 | #include <linux/errno.h> |
| 20 | #include <linux/slab.h> | 21 | #include <linux/slab.h> |
| 21 | #include <linux/mm.h> | 22 | #include <linux/mm.h> |
| @@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 126 | 127 | ||
| 127 | JBUFFER_TRACE(descriptor, "submit commit block"); | 128 | JBUFFER_TRACE(descriptor, "submit commit block"); |
| 128 | lock_buffer(bh); | 129 | lock_buffer(bh); |
| 129 | get_bh(bh); | 130 | clear_buffer_dirty(bh); |
| 130 | set_buffer_dirty(bh); | ||
| 131 | set_buffer_uptodate(bh); | 131 | set_buffer_uptodate(bh); |
| 132 | bh->b_end_io = journal_end_buffer_io_sync; | 132 | bh->b_end_io = journal_end_buffer_io_sync; |
| 133 | 133 | ||
| @@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 147 | * to remember if we sent a barrier request | 147 | * to remember if we sent a barrier request |
| 148 | */ | 148 | */ |
| 149 | if (ret == -EOPNOTSUPP && barrier_done) { | 149 | if (ret == -EOPNOTSUPP && barrier_done) { |
| 150 | char b[BDEVNAME_SIZE]; | ||
| 151 | |||
| 152 | printk(KERN_WARNING | 150 | printk(KERN_WARNING |
| 153 | "JBD: barrier-based sync failed on %s - " | 151 | "JBD: barrier-based sync failed on %s - " |
| 154 | "disabling barriers\n", | 152 | "disabling barriers\n", journal->j_devname); |
| 155 | bdevname(journal->j_dev, b)); | ||
| 156 | spin_lock(&journal->j_state_lock); | 153 | spin_lock(&journal->j_state_lock); |
| 157 | journal->j_flags &= ~JBD2_BARRIER; | 154 | journal->j_flags &= ~JBD2_BARRIER; |
| 158 | spin_unlock(&journal->j_state_lock); | 155 | spin_unlock(&journal->j_state_lock); |
| @@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, | |||
| 160 | /* And try again, without the barrier */ | 157 | /* And try again, without the barrier */ |
| 161 | lock_buffer(bh); | 158 | lock_buffer(bh); |
| 162 | set_buffer_uptodate(bh); | 159 | set_buffer_uptodate(bh); |
| 163 | set_buffer_dirty(bh); | 160 | clear_buffer_dirty(bh); |
| 164 | ret = submit_bh(WRITE, bh); | 161 | ret = submit_bh(WRITE, bh); |
| 165 | } | 162 | } |
| 166 | *cbh = bh; | 163 | *cbh = bh; |
| @@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 371 | commit_transaction = journal->j_running_transaction; | 368 | commit_transaction = journal->j_running_transaction; |
| 372 | J_ASSERT(commit_transaction->t_state == T_RUNNING); | 369 | J_ASSERT(commit_transaction->t_state == T_RUNNING); |
| 373 | 370 | ||
| 371 | trace_mark(jbd2_start_commit, "dev %s transaction %d", | ||
| 372 | journal->j_devname, commit_transaction->t_tid); | ||
| 374 | jbd_debug(1, "JBD: starting commit of transaction %d\n", | 373 | jbd_debug(1, "JBD: starting commit of transaction %d\n", |
| 375 | commit_transaction->t_tid); | 374 | commit_transaction->t_tid); |
| 376 | 375 | ||
| @@ -505,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) | |||
| 505 | jh = commit_transaction->t_buffers; | 504 | jh = commit_transaction->t_buffers; |
| 506 | 505 | ||
| 507 | /* If we're in abort mode, we just un-journal the buffer and | 506 | /* If we're in abort mode, we just un-journal the buffer and |
| 508 | release it for background writing. */ | 507 | release it. */ |
| 509 | 508 | ||
| 510 | if (is_journal_aborted(journal)) { | 509 | if (is_journal_aborted(journal)) { |
| 510 | clear_buffer_jbddirty(jh2bh(jh)); | ||
| 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); | 511 | JBUFFER_TRACE(jh, "journal is aborting: refile"); |
| 512 | jbd2_journal_refile_buffer(journal, jh); | 512 | jbd2_journal_refile_buffer(journal, jh); |
| 513 | /* If that was the last one, we need to clean up | 513 | /* If that was the last one, we need to clean up |
| @@ -681,11 +681,11 @@ start_journal_io: | |||
| 681 | */ | 681 | */ |
| 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); | 682 | err = journal_finish_inode_data_buffers(journal, commit_transaction); |
| 683 | if (err) { | 683 | if (err) { |
| 684 | char b[BDEVNAME_SIZE]; | ||
| 685 | |||
| 686 | printk(KERN_WARNING | 684 | printk(KERN_WARNING |
| 687 | "JBD2: Detected IO errors while flushing file data " | 685 | "JBD2: Detected IO errors while flushing file data " |
| 688 | "on %s\n", bdevname(journal->j_fs_dev, b)); | 686 | "on %s\n", journal->j_devname); |
| 687 | if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) | ||
| 688 | jbd2_journal_abort(journal, err); | ||
| 689 | err = 0; | 689 | err = 0; |
| 690 | } | 690 | } |
| 691 | 691 | ||
| @@ -786,6 +786,9 @@ wait_for_iobuf: | |||
| 786 | /* AKPM: bforget here */ | 786 | /* AKPM: bforget here */ |
| 787 | } | 787 | } |
| 788 | 788 | ||
| 789 | if (err) | ||
| 790 | jbd2_journal_abort(journal, err); | ||
| 791 | |||
| 789 | jbd_debug(3, "JBD: commit phase 5\n"); | 792 | jbd_debug(3, "JBD: commit phase 5\n"); |
| 790 | 793 | ||
| 791 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, | 794 | if (!JBD2_HAS_INCOMPAT_FEATURE(journal, |
| @@ -884,6 +887,8 @@ restart_loop: | |||
| 884 | if (buffer_jbddirty(bh)) { | 887 | if (buffer_jbddirty(bh)) { |
| 885 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); | 888 | JBUFFER_TRACE(jh, "add to new checkpointing trans"); |
| 886 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); | 889 | __jbd2_journal_insert_checkpoint(jh, commit_transaction); |
| 890 | if (is_journal_aborted(journal)) | ||
| 891 | clear_buffer_jbddirty(bh); | ||
| 887 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); | 892 | JBUFFER_TRACE(jh, "refile for checkpoint writeback"); |
| 888 | __jbd2_journal_refile_buffer(jh); | 893 | __jbd2_journal_refile_buffer(jh); |
| 889 | jbd_unlock_bh_state(bh); | 894 | jbd_unlock_bh_state(bh); |
| @@ -990,6 +995,12 @@ restart_loop: | |||
| 990 | } | 995 | } |
| 991 | spin_unlock(&journal->j_list_lock); | 996 | spin_unlock(&journal->j_list_lock); |
| 992 | 997 | ||
| 998 | if (journal->j_commit_callback) | ||
| 999 | journal->j_commit_callback(journal, commit_transaction); | ||
| 1000 | |||
| 1001 | trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", | ||
| 1002 | journal->j_devname, commit_transaction->t_tid, | ||
| 1003 | journal->j_tail_sequence); | ||
| 993 | jbd_debug(1, "JBD: commit %d complete, head %d\n", | 1004 | jbd_debug(1, "JBD: commit %d complete, head %d\n", |
| 994 | journal->j_commit_sequence, journal->j_tail_sequence); | 1005 | journal->j_commit_sequence, journal->j_tail_sequence); |
| 995 | 1006 | ||
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4edb..783de118de92 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c | |||
| @@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, | |||
| 597 | if (ret) | 597 | if (ret) |
| 598 | *retp = ret; | 598 | *retp = ret; |
| 599 | else { | 599 | else { |
| 600 | char b[BDEVNAME_SIZE]; | ||
| 601 | |||
| 602 | printk(KERN_ALERT "%s: journal block not found " | 600 | printk(KERN_ALERT "%s: journal block not found " |
| 603 | "at offset %lu on %s\n", | 601 | "at offset %lu on %s\n", |
| 604 | __func__, | 602 | __func__, blocknr, journal->j_devname); |
| 605 | blocknr, | ||
| 606 | bdevname(journal->j_dev, b)); | ||
| 607 | err = -EIO; | 603 | err = -EIO; |
| 608 | __journal_abort_soft(journal, err); | 604 | __journal_abort_soft(journal, err); |
| 609 | } | 605 | } |
| @@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; | |||
| 901 | 897 | ||
| 902 | static void jbd2_stats_proc_init(journal_t *journal) | 898 | static void jbd2_stats_proc_init(journal_t *journal) |
| 903 | { | 899 | { |
| 904 | char name[BDEVNAME_SIZE]; | 900 | journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); |
| 905 | |||
| 906 | bdevname(journal->j_dev, name); | ||
| 907 | journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); | ||
| 908 | if (journal->j_proc_entry) { | 901 | if (journal->j_proc_entry) { |
| 909 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, | 902 | proc_create_data("history", S_IRUGO, journal->j_proc_entry, |
| 910 | &jbd2_seq_history_fops, journal); | 903 | &jbd2_seq_history_fops, journal); |
| @@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) | |||
| 915 | 908 | ||
| 916 | static void jbd2_stats_proc_exit(journal_t *journal) | 909 | static void jbd2_stats_proc_exit(journal_t *journal) |
| 917 | { | 910 | { |
| 918 | char name[BDEVNAME_SIZE]; | ||
| 919 | |||
| 920 | bdevname(journal->j_dev, name); | ||
| 921 | remove_proc_entry("info", journal->j_proc_entry); | 911 | remove_proc_entry("info", journal->j_proc_entry); |
| 922 | remove_proc_entry("history", journal->j_proc_entry); | 912 | remove_proc_entry("history", journal->j_proc_entry); |
| 923 | remove_proc_entry(name, proc_jbd2_stats); | 913 | remove_proc_entry(journal->j_devname, proc_jbd2_stats); |
| 924 | } | 914 | } |
| 925 | 915 | ||
| 926 | static void journal_init_stats(journal_t *journal) | 916 | static void journal_init_stats(journal_t *journal) |
| @@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
| 1018 | { | 1008 | { |
| 1019 | journal_t *journal = journal_init_common(); | 1009 | journal_t *journal = journal_init_common(); |
| 1020 | struct buffer_head *bh; | 1010 | struct buffer_head *bh; |
| 1011 | char *p; | ||
| 1021 | int n; | 1012 | int n; |
| 1022 | 1013 | ||
| 1023 | if (!journal) | 1014 | if (!journal) |
| @@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, | |||
| 1039 | journal->j_fs_dev = fs_dev; | 1030 | journal->j_fs_dev = fs_dev; |
| 1040 | journal->j_blk_offset = start; | 1031 | journal->j_blk_offset = start; |
| 1041 | journal->j_maxlen = len; | 1032 | journal->j_maxlen = len; |
| 1033 | bdevname(journal->j_dev, journal->j_devname); | ||
| 1034 | p = journal->j_devname; | ||
| 1035 | while ((p = strchr(p, '/'))) | ||
| 1036 | *p = '!'; | ||
| 1042 | jbd2_stats_proc_init(journal); | 1037 | jbd2_stats_proc_init(journal); |
| 1043 | 1038 | ||
| 1044 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); | 1039 | bh = __getblk(journal->j_dev, start, journal->j_blocksize); |
| @@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
| 1061 | { | 1056 | { |
| 1062 | struct buffer_head *bh; | 1057 | struct buffer_head *bh; |
| 1063 | journal_t *journal = journal_init_common(); | 1058 | journal_t *journal = journal_init_common(); |
| 1059 | char *p; | ||
| 1064 | int err; | 1060 | int err; |
| 1065 | int n; | 1061 | int n; |
| 1066 | unsigned long long blocknr; | 1062 | unsigned long long blocknr; |
| @@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) | |||
| 1070 | 1066 | ||
| 1071 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; | 1067 | journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; |
| 1072 | journal->j_inode = inode; | 1068 | journal->j_inode = inode; |
| 1069 | bdevname(journal->j_dev, journal->j_devname); | ||
| 1070 | p = journal->j_devname; | ||
| 1071 | while ((p = strchr(p, '/'))) | ||
| 1072 | *p = '!'; | ||
| 1073 | p = journal->j_devname + strlen(journal->j_devname); | ||
| 1074 | sprintf(p, ":%lu", journal->j_inode->i_ino); | ||
| 1073 | jbd_debug(1, | 1075 | jbd_debug(1, |
| 1074 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", | 1076 | "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", |
| 1075 | journal, inode->i_sb->s_id, inode->i_ino, | 1077 | journal, inode->i_sb->s_id, inode->i_ino, |
| @@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
| 1253 | goto out; | 1255 | goto out; |
| 1254 | } | 1256 | } |
| 1255 | 1257 | ||
| 1258 | if (buffer_write_io_error(bh)) { | ||
| 1259 | /* | ||
| 1260 | * Oh, dear. A previous attempt to write the journal | ||
| 1261 | * superblock failed. This could happen because the | ||
| 1262 | * USB device was yanked out. Or it could happen to | ||
| 1263 | * be a transient write error and maybe the block will | ||
| 1264 | * be remapped. Nothing we can do but to retry the | ||
| 1265 | * write and hope for the best. | ||
| 1266 | */ | ||
| 1267 | printk(KERN_ERR "JBD2: previous I/O error detected " | ||
| 1268 | "for journal superblock update for %s.\n", | ||
| 1269 | journal->j_devname); | ||
| 1270 | clear_buffer_write_io_error(bh); | ||
| 1271 | set_buffer_uptodate(bh); | ||
| 1272 | } | ||
| 1273 | |||
| 1256 | spin_lock(&journal->j_state_lock); | 1274 | spin_lock(&journal->j_state_lock); |
| 1257 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", | 1275 | jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", |
| 1258 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); | 1276 | journal->j_tail, journal->j_tail_sequence, journal->j_errno); |
| @@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) | |||
| 1264 | 1282 | ||
| 1265 | BUFFER_TRACE(bh, "marking dirty"); | 1283 | BUFFER_TRACE(bh, "marking dirty"); |
| 1266 | mark_buffer_dirty(bh); | 1284 | mark_buffer_dirty(bh); |
| 1267 | if (wait) | 1285 | if (wait) { |
| 1268 | sync_dirty_buffer(bh); | 1286 | sync_dirty_buffer(bh); |
| 1269 | else | 1287 | if (buffer_write_io_error(bh)) { |
| 1288 | printk(KERN_ERR "JBD2: I/O error detected " | ||
| 1289 | "when updating journal superblock for %s.\n", | ||
| 1290 | journal->j_devname); | ||
| 1291 | clear_buffer_write_io_error(bh); | ||
| 1292 | set_buffer_uptodate(bh); | ||
| 1293 | } | ||
| 1294 | } else | ||
| 1270 | ll_rw_block(SWRITE, 1, &bh); | 1295 | ll_rw_block(SWRITE, 1, &bh); |
| 1271 | 1296 | ||
| 1272 | out: | 1297 | out: |
| @@ -1426,9 +1451,12 @@ recovery_error: | |||
| 1426 | * | 1451 | * |
| 1427 | * Release a journal_t structure once it is no longer in use by the | 1452 | * Release a journal_t structure once it is no longer in use by the |
| 1428 | * journaled object. | 1453 | * journaled object. |
| 1454 | * Return <0 if we couldn't clean up the journal. | ||
| 1429 | */ | 1455 | */ |
| 1430 | void jbd2_journal_destroy(journal_t *journal) | 1456 | int jbd2_journal_destroy(journal_t *journal) |
| 1431 | { | 1457 | { |
| 1458 | int err = 0; | ||
| 1459 | |||
| 1432 | /* Wait for the commit thread to wake up and die. */ | 1460 | /* Wait for the commit thread to wake up and die. */ |
| 1433 | journal_kill_thread(journal); | 1461 | journal_kill_thread(journal); |
| 1434 | 1462 | ||
| @@ -1451,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) | |||
| 1451 | J_ASSERT(journal->j_checkpoint_transactions == NULL); | 1479 | J_ASSERT(journal->j_checkpoint_transactions == NULL); |
| 1452 | spin_unlock(&journal->j_list_lock); | 1480 | spin_unlock(&journal->j_list_lock); |
| 1453 | 1481 | ||
| 1454 | /* We can now mark the journal as empty. */ | ||
| 1455 | journal->j_tail = 0; | ||
| 1456 | journal->j_tail_sequence = ++journal->j_transaction_sequence; | ||
| 1457 | if (journal->j_sb_buffer) { | 1482 | if (journal->j_sb_buffer) { |
| 1458 | jbd2_journal_update_superblock(journal, 1); | 1483 | if (!is_journal_aborted(journal)) { |
| 1484 | /* We can now mark the journal as empty. */ | ||
| 1485 | journal->j_tail = 0; | ||
| 1486 | journal->j_tail_sequence = | ||
| 1487 | ++journal->j_transaction_sequence; | ||
| 1488 | jbd2_journal_update_superblock(journal, 1); | ||
| 1489 | } else { | ||
| 1490 | err = -EIO; | ||
| 1491 | } | ||
| 1459 | brelse(journal->j_sb_buffer); | 1492 | brelse(journal->j_sb_buffer); |
| 1460 | } | 1493 | } |
| 1461 | 1494 | ||
| @@ -1467,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) | |||
| 1467 | jbd2_journal_destroy_revoke(journal); | 1500 | jbd2_journal_destroy_revoke(journal); |
| 1468 | kfree(journal->j_wbuf); | 1501 | kfree(journal->j_wbuf); |
| 1469 | kfree(journal); | 1502 | kfree(journal); |
| 1503 | |||
| 1504 | return err; | ||
| 1470 | } | 1505 | } |
| 1471 | 1506 | ||
| 1472 | 1507 | ||
| @@ -1692,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) | |||
| 1692 | spin_lock(&journal->j_list_lock); | 1727 | spin_lock(&journal->j_list_lock); |
| 1693 | while (!err && journal->j_checkpoint_transactions != NULL) { | 1728 | while (!err && journal->j_checkpoint_transactions != NULL) { |
| 1694 | spin_unlock(&journal->j_list_lock); | 1729 | spin_unlock(&journal->j_list_lock); |
| 1730 | mutex_lock(&journal->j_checkpoint_mutex); | ||
| 1695 | err = jbd2_log_do_checkpoint(journal); | 1731 | err = jbd2_log_do_checkpoint(journal); |
| 1732 | mutex_unlock(&journal->j_checkpoint_mutex); | ||
| 1696 | spin_lock(&journal->j_list_lock); | 1733 | spin_lock(&journal->j_list_lock); |
| 1697 | } | 1734 | } |
| 1698 | spin_unlock(&journal->j_list_lock); | 1735 | spin_unlock(&journal->j_list_lock); |
| 1736 | |||
| 1737 | if (is_journal_aborted(journal)) | ||
| 1738 | return -EIO; | ||
| 1739 | |||
| 1699 | jbd2_cleanup_journal_tail(journal); | 1740 | jbd2_cleanup_journal_tail(journal); |
| 1700 | 1741 | ||
| 1701 | /* Finally, mark the journal as really needing no recovery. | 1742 | /* Finally, mark the journal as really needing no recovery. |
| @@ -1717,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) | |||
| 1717 | J_ASSERT(journal->j_head == journal->j_tail); | 1758 | J_ASSERT(journal->j_head == journal->j_tail); |
| 1718 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); | 1759 | J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); |
| 1719 | spin_unlock(&journal->j_state_lock); | 1760 | spin_unlock(&journal->j_state_lock); |
| 1720 | return err; | 1761 | return 0; |
| 1721 | } | 1762 | } |
| 1722 | 1763 | ||
| 1723 | /** | 1764 | /** |
| @@ -1761,23 +1802,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) | |||
| 1761 | } | 1802 | } |
| 1762 | 1803 | ||
| 1763 | /* | 1804 | /* |
| 1764 | * journal_dev_name: format a character string to describe on what | ||
| 1765 | * device this journal is present. | ||
| 1766 | */ | ||
| 1767 | |||
| 1768 | static const char *journal_dev_name(journal_t *journal, char *buffer) | ||
| 1769 | { | ||
| 1770 | struct block_device *bdev; | ||
| 1771 | |||
| 1772 | if (journal->j_inode) | ||
| 1773 | bdev = journal->j_inode->i_sb->s_bdev; | ||
| 1774 | else | ||
| 1775 | bdev = journal->j_dev; | ||
| 1776 | |||
| 1777 | return bdevname(bdev, buffer); | ||
| 1778 | } | ||
| 1779 | |||
| 1780 | /* | ||
| 1781 | * Journal abort has very specific semantics, which we describe | 1805 | * Journal abort has very specific semantics, which we describe |
| 1782 | * for journal abort. | 1806 | * for journal abort. |
| 1783 | * | 1807 | * |
| @@ -1793,13 +1817,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) | |||
| 1793 | void __jbd2_journal_abort_hard(journal_t *journal) | 1817 | void __jbd2_journal_abort_hard(journal_t *journal) |
| 1794 | { | 1818 | { |
| 1795 | transaction_t *transaction; | 1819 | transaction_t *transaction; |
| 1796 | char b[BDEVNAME_SIZE]; | ||
| 1797 | 1820 | ||
| 1798 | if (journal->j_flags & JBD2_ABORT) | 1821 | if (journal->j_flags & JBD2_ABORT) |
| 1799 | return; | 1822 | return; |
| 1800 | 1823 | ||
| 1801 | printk(KERN_ERR "Aborting journal on device %s.\n", | 1824 | printk(KERN_ERR "Aborting journal on device %s.\n", |
| 1802 | journal_dev_name(journal, b)); | 1825 | journal->j_devname); |
| 1803 | 1826 | ||
| 1804 | spin_lock(&journal->j_state_lock); | 1827 | spin_lock(&journal->j_state_lock); |
| 1805 | journal->j_flags |= JBD2_ABORT; | 1828 | journal->j_flags |= JBD2_ABORT; |
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b76..73063285b13f 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c | |||
| @@ -225,7 +225,7 @@ do { \ | |||
| 225 | */ | 225 | */ |
| 226 | int jbd2_journal_recover(journal_t *journal) | 226 | int jbd2_journal_recover(journal_t *journal) |
| 227 | { | 227 | { |
| 228 | int err; | 228 | int err, err2; |
| 229 | journal_superblock_t * sb; | 229 | journal_superblock_t * sb; |
| 230 | 230 | ||
| 231 | struct recovery_info info; | 231 | struct recovery_info info; |
| @@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) | |||
| 263 | journal->j_transaction_sequence = ++info.end_transaction; | 263 | journal->j_transaction_sequence = ++info.end_transaction; |
| 264 | 264 | ||
| 265 | jbd2_journal_clear_revoke(journal); | 265 | jbd2_journal_clear_revoke(journal); |
| 266 | sync_blockdev(journal->j_fs_dev); | 266 | err2 = sync_blockdev(journal->j_fs_dev); |
| 267 | if (!err) | ||
| 268 | err = err2; | ||
| 269 | |||
| 267 | return err; | 270 | return err; |
| 268 | } | 271 | } |
| 269 | 272 | ||
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa9..39b7805a599a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c | |||
| @@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) | |||
| 52 | transaction->t_expires = jiffies + journal->j_commit_interval; | 52 | transaction->t_expires = jiffies + journal->j_commit_interval; |
| 53 | spin_lock_init(&transaction->t_handle_lock); | 53 | spin_lock_init(&transaction->t_handle_lock); |
| 54 | INIT_LIST_HEAD(&transaction->t_inode_list); | 54 | INIT_LIST_HEAD(&transaction->t_inode_list); |
| 55 | INIT_LIST_HEAD(&transaction->t_private_list); | ||
| 55 | 56 | ||
| 56 | /* Set up the commit timer for the new transaction. */ | 57 | /* Set up the commit timer for the new transaction. */ |
| 57 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); | 58 | journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |
diff --git a/fs/jffs2/Kconfig b/fs/jffs2/Kconfig new file mode 100644 index 000000000000..6ae169cd8faa --- /dev/null +++ b/fs/jffs2/Kconfig | |||
| @@ -0,0 +1,188 @@ | |||
| 1 | config JFFS2_FS | ||
| 2 | tristate "Journalling Flash File System v2 (JFFS2) support" | ||
| 3 | select CRC32 | ||
| 4 | depends on MTD | ||
| 5 | help | ||
| 6 | JFFS2 is the second generation of the Journalling Flash File System | ||
| 7 | for use on diskless embedded devices. It provides improved wear | ||
| 8 | levelling, compression and support for hard links. You cannot use | ||
| 9 | this on normal block devices, only on 'MTD' devices. | ||
| 10 | |||
| 11 | Further information on the design and implementation of JFFS2 is | ||
| 12 | available at <http://sources.redhat.com/jffs2/>. | ||
| 13 | |||
| 14 | config JFFS2_FS_DEBUG | ||
| 15 | int "JFFS2 debugging verbosity (0 = quiet, 2 = noisy)" | ||
| 16 | depends on JFFS2_FS | ||
| 17 | default "0" | ||
| 18 | help | ||
| 19 | This controls the amount of debugging messages produced by the JFFS2 | ||
| 20 | code. Set it to zero for use in production systems. For evaluation, | ||
| 21 | testing and debugging, it's advisable to set it to one. This will | ||
| 22 | enable a few assertions and will print debugging messages at the | ||
| 23 | KERN_DEBUG loglevel, where they won't normally be visible. Level 2 | ||
| 24 | is unlikely to be useful - it enables extra debugging in certain | ||
| 25 | areas which at one point needed debugging, but when the bugs were | ||
| 26 | located and fixed, the detailed messages were relegated to level 2. | ||
| 27 | |||
| 28 | If reporting bugs, please try to have available a full dump of the | ||
| 29 | messages at debug level 1 while the misbehaviour was occurring. | ||
| 30 | |||
| 31 | config JFFS2_FS_WRITEBUFFER | ||
| 32 | bool "JFFS2 write-buffering support" | ||
| 33 | depends on JFFS2_FS | ||
| 34 | default y | ||
| 35 | help | ||
| 36 | This enables the write-buffering support in JFFS2. | ||
| 37 | |||
| 38 | This functionality is required to support JFFS2 on the following | ||
| 39 | types of flash devices: | ||
| 40 | - NAND flash | ||
| 41 | - NOR flash with transparent ECC | ||
| 42 | - DataFlash | ||
| 43 | |||
| 44 | config JFFS2_FS_WBUF_VERIFY | ||
| 45 | bool "Verify JFFS2 write-buffer reads" | ||
| 46 | depends on JFFS2_FS_WRITEBUFFER | ||
| 47 | default n | ||
| 48 | help | ||
| 49 | This causes JFFS2 to read back every page written through the | ||
| 50 | write-buffer, and check for errors. | ||
| 51 | |||
| 52 | config JFFS2_SUMMARY | ||
| 53 | bool "JFFS2 summary support (EXPERIMENTAL)" | ||
| 54 | depends on JFFS2_FS && EXPERIMENTAL | ||
| 55 | default n | ||
| 56 | help | ||
| 57 | This feature makes it possible to use summary information | ||
| 58 | for faster filesystem mount. | ||
| 59 | |||
| 60 | The summary information can be inserted into a filesystem image | ||
| 61 | by the utility 'sumtool'. | ||
| 62 | |||
| 63 | If unsure, say 'N'. | ||
| 64 | |||
| 65 | config JFFS2_FS_XATTR | ||
| 66 | bool "JFFS2 XATTR support (EXPERIMENTAL)" | ||
| 67 | depends on JFFS2_FS && EXPERIMENTAL | ||
| 68 | default n | ||
| 69 | help | ||
| 70 | Extended attributes are name:value pairs associated with inodes by | ||
| 71 | the kernel or by users (see the attr(5) manual page, or visit | ||
| 72 | <http://acl.bestbits.at/> for details). | ||
| 73 | |||
| 74 | If unsure, say N. | ||
| 75 | |||
| 76 | config JFFS2_FS_POSIX_ACL | ||
| 77 | bool "JFFS2 POSIX Access Control Lists" | ||
| 78 | depends on JFFS2_FS_XATTR | ||
| 79 | default y | ||
| 80 | select FS_POSIX_ACL | ||
| 81 | help | ||
| 82 | Posix Access Control Lists (ACLs) support permissions for users and | ||
| 83 | groups beyond the owner/group/world scheme. | ||
| 84 | |||
| 85 | To learn more about Access Control Lists, visit the Posix ACLs for | ||
| 86 | Linux website <http://acl.bestbits.at/>. | ||
| 87 | |||
| 88 | If you don't know what Access Control Lists are, say N | ||
| 89 | |||
| 90 | config JFFS2_FS_SECURITY | ||
| 91 | bool "JFFS2 Security Labels" | ||
| 92 | depends on JFFS2_FS_XATTR | ||
| 93 | default y | ||
| 94 | help | ||
| 95 | Security labels support alternative access control models | ||
| 96 | implemented by security modules like SELinux. This option | ||
| 97 | enables an extended attribute handler for file security | ||
| 98 | labels in the jffs2 filesystem. | ||
| 99 | |||
| 100 | If you are not using a security module that requires using | ||
| 101 | extended attributes for file security labels, say N. | ||
| 102 | |||
| 103 | config JFFS2_COMPRESSION_OPTIONS | ||
| 104 | bool "Advanced compression options for JFFS2" | ||
| 105 | depends on JFFS2_FS | ||
| 106 | default n | ||
| 107 | help | ||
| 108 | Enabling this option allows you to explicitly choose which | ||
| 109 | compression modules, if any, are enabled in JFFS2. Removing | ||
| 110 | compressors can mean you cannot read existing file systems, | ||
| 111 | and enabling experimental compressors can mean that you | ||
| 112 | write a file system which cannot be read by a standard kernel. | ||
| 113 | |||
| 114 | If unsure, you should _definitely_ say 'N'. | ||
| 115 | |||
| 116 | config JFFS2_ZLIB | ||
| 117 | bool "JFFS2 ZLIB compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 118 | select ZLIB_INFLATE | ||
| 119 | select ZLIB_DEFLATE | ||
| 120 | depends on JFFS2_FS | ||
| 121 | default y | ||
| 122 | help | ||
| 123 | Zlib is designed to be a free, general-purpose, legally unencumbered, | ||
| 124 | lossless data-compression library for use on virtually any computer | ||
| 125 | hardware and operating system. See <http://www.gzip.org/zlib/> for | ||
| 126 | further information. | ||
| 127 | |||
| 128 | Say 'Y' if unsure. | ||
| 129 | |||
| 130 | config JFFS2_LZO | ||
| 131 | bool "JFFS2 LZO compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 132 | select LZO_COMPRESS | ||
| 133 | select LZO_DECOMPRESS | ||
| 134 | depends on JFFS2_FS | ||
| 135 | default n | ||
| 136 | help | ||
| 137 | minilzo-based compression. Generally works better than Zlib. | ||
| 138 | |||
| 139 | This feature was added in July, 2007. Say 'N' if you need | ||
| 140 | compatibility with older bootloaders or kernels. | ||
| 141 | |||
| 142 | config JFFS2_RTIME | ||
| 143 | bool "JFFS2 RTIME compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 144 | depends on JFFS2_FS | ||
| 145 | default y | ||
| 146 | help | ||
| 147 | Rtime does manage to recompress already-compressed data. Say 'Y' if unsure. | ||
| 148 | |||
| 149 | config JFFS2_RUBIN | ||
| 150 | bool "JFFS2 RUBIN compression support" if JFFS2_COMPRESSION_OPTIONS | ||
| 151 | depends on JFFS2_FS | ||
| 152 | default n | ||
| 153 | help | ||
| 154 | RUBINMIPS and DYNRUBIN compressors. Say 'N' if unsure. | ||
| 155 | |||
| 156 | choice | ||
| 157 | prompt "JFFS2 default compression mode" if JFFS2_COMPRESSION_OPTIONS | ||
| 158 | default JFFS2_CMODE_PRIORITY | ||
| 159 | depends on JFFS2_FS | ||
| 160 | help | ||
| 161 | You can set here the default compression mode of JFFS2 from | ||
| 162 | the available compression modes. Don't touch if unsure. | ||
| 163 | |||
| 164 | config JFFS2_CMODE_NONE | ||
| 165 | bool "no compression" | ||
| 166 | help | ||
| 167 | Uses no compression. | ||
| 168 | |||
| 169 | config JFFS2_CMODE_PRIORITY | ||
| 170 | bool "priority" | ||
| 171 | help | ||
| 172 | Tries the compressors in a predefined order and chooses the first | ||
| 173 | successful one. | ||
| 174 | |||
| 175 | config JFFS2_CMODE_SIZE | ||
| 176 | bool "size (EXPERIMENTAL)" | ||
| 177 | help | ||
| 178 | Tries all compressors and chooses the one which has the smallest | ||
| 179 | result. | ||
| 180 | |||
| 181 | config JFFS2_CMODE_FAVOURLZO | ||
| 182 | bool "Favour LZO" | ||
| 183 | help | ||
| 184 | Tries all compressors and chooses the one which has the smallest | ||
| 185 | result but gives some preference to LZO (which has faster | ||
| 186 | decompression) at the expense of size. | ||
| 187 | |||
| 188 | endchoice | ||
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c index 86739ee53b37..f25e70c1b51c 100644 --- a/fs/jffs2/compr.c +++ b/fs/jffs2/compr.c | |||
| @@ -53,8 +53,8 @@ static int jffs2_is_best_compression(struct jffs2_compressor *this, | |||
| 53 | } | 53 | } |
| 54 | 54 | ||
| 55 | /* jffs2_compress: | 55 | /* jffs2_compress: |
| 56 | * @data: Pointer to uncompressed data | 56 | * @data_in: Pointer to uncompressed data |
| 57 | * @cdata: Pointer to returned pointer to buffer for compressed data | 57 | * @cpage_out: Pointer to returned pointer to buffer for compressed data |
| 58 | * @datalen: On entry, holds the amount of data available for compression. | 58 | * @datalen: On entry, holds the amount of data available for compression. |
| 59 | * On exit, expected to hold the amount of data actually compressed. | 59 | * On exit, expected to hold the amount of data actually compressed. |
| 60 | * @cdatalen: On entry, holds the amount of space available for compressed | 60 | * @cdatalen: On entry, holds the amount of space available for compressed |
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index cd219ef55254..b1aaae823a52 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c | |||
| @@ -311,7 +311,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char | |||
| 311 | /* FIXME: If you care. We'd need to use frags for the target | 311 | /* FIXME: If you care. We'd need to use frags for the target |
| 312 | if it grows much more than this */ | 312 | if it grows much more than this */ |
| 313 | if (targetlen > 254) | 313 | if (targetlen > 254) |
| 314 | return -EINVAL; | 314 | return -ENAMETOOLONG; |
| 315 | 315 | ||
| 316 | ri = jffs2_alloc_raw_inode(); | 316 | ri = jffs2_alloc_raw_inode(); |
| 317 | 317 | ||
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c index dddb2a6c9e2c..259461b910af 100644 --- a/fs/jffs2/erase.c +++ b/fs/jffs2/erase.c | |||
| @@ -68,7 +68,7 @@ static void jffs2_erase_block(struct jffs2_sb_info *c, | |||
| 68 | instr->len = c->sector_size; | 68 | instr->len = c->sector_size; |
| 69 | instr->callback = jffs2_erase_callback; | 69 | instr->callback = jffs2_erase_callback; |
| 70 | instr->priv = (unsigned long)(&instr[1]); | 70 | instr->priv = (unsigned long)(&instr[1]); |
| 71 | instr->fail_addr = 0xffffffff; | 71 | instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; |
| 72 | 72 | ||
| 73 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; | 73 | ((struct erase_priv_struct *)instr->priv)->jeb = jeb; |
| 74 | ((struct erase_priv_struct *)instr->priv)->c = c; | 74 | ((struct erase_priv_struct *)instr->priv)->c = c; |
| @@ -175,7 +175,7 @@ static void jffs2_erase_failed(struct jffs2_sb_info *c, struct jffs2_eraseblock | |||
| 175 | { | 175 | { |
| 176 | /* For NAND, if the failure did not occur at the device level for a | 176 | /* For NAND, if the failure did not occur at the device level for a |
| 177 | specific physical page, don't bother updating the bad block table. */ | 177 | specific physical page, don't bother updating the bad block table. */ |
| 178 | if (jffs2_cleanmarker_oob(c) && (bad_offset != 0xffffffff)) { | 178 | if (jffs2_cleanmarker_oob(c) && (bad_offset != MTD_FAIL_ADDR_UNKNOWN)) { |
| 179 | /* We had a device-level failure to erase. Let's see if we've | 179 | /* We had a device-level failure to erase. Let's see if we've |
| 180 | failed too many times. */ | 180 | failed too many times. */ |
| 181 | if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { | 181 | if (!jffs2_write_nand_badblock(c, jeb, bad_offset)) { |
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 086c43830221..249305d65d5b 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c | |||
| @@ -207,6 +207,8 @@ int jffs2_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 207 | buf->f_files = 0; | 207 | buf->f_files = 0; |
| 208 | buf->f_ffree = 0; | 208 | buf->f_ffree = 0; |
| 209 | buf->f_namelen = JFFS2_MAX_NAME_LEN; | 209 | buf->f_namelen = JFFS2_MAX_NAME_LEN; |
| 210 | buf->f_fsid.val[0] = JFFS2_SUPER_MAGIC; | ||
| 211 | buf->f_fsid.val[1] = c->mtd->index; | ||
| 210 | 212 | ||
| 211 | spin_lock(&c->erase_completion_lock); | 213 | spin_lock(&c->erase_completion_lock); |
| 212 | avail = c->dirty_size + c->free_size; | 214 | avail = c->dirty_size + c->free_size; |
| @@ -440,14 +442,14 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i | |||
| 440 | 442 | ||
| 441 | memset(ri, 0, sizeof(*ri)); | 443 | memset(ri, 0, sizeof(*ri)); |
| 442 | /* Set OS-specific defaults for new inodes */ | 444 | /* Set OS-specific defaults for new inodes */ |
| 443 | ri->uid = cpu_to_je16(current->fsuid); | 445 | ri->uid = cpu_to_je16(current_fsuid()); |
| 444 | 446 | ||
| 445 | if (dir_i->i_mode & S_ISGID) { | 447 | if (dir_i->i_mode & S_ISGID) { |
| 446 | ri->gid = cpu_to_je16(dir_i->i_gid); | 448 | ri->gid = cpu_to_je16(dir_i->i_gid); |
| 447 | if (S_ISDIR(mode)) | 449 | if (S_ISDIR(mode)) |
| 448 | mode |= S_ISGID; | 450 | mode |= S_ISGID; |
| 449 | } else { | 451 | } else { |
| 450 | ri->gid = cpu_to_je16(current->fsgid); | 452 | ri->gid = cpu_to_je16(current_fsgid()); |
| 451 | } | 453 | } |
| 452 | 454 | ||
| 453 | /* POSIX ACLs have to be processed now, at least partly. | 455 | /* POSIX ACLs have to be processed now, at least partly. |
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index a9bf9603c1ba..0875b60b4bf7 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c | |||
| @@ -261,6 +261,10 @@ static int jffs2_find_nextblock(struct jffs2_sb_info *c) | |||
| 261 | 261 | ||
| 262 | jffs2_sum_reset_collected(c->summary); /* reset collected summary */ | 262 | jffs2_sum_reset_collected(c->summary); /* reset collected summary */ |
| 263 | 263 | ||
| 264 | /* adjust write buffer offset, else we get a non contiguous write bug */ | ||
| 265 | if (!(c->wbuf_ofs % c->sector_size) && !c->wbuf_len) | ||
| 266 | c->wbuf_ofs = 0xffffffff; | ||
| 267 | |||
| 264 | D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); | 268 | D1(printk(KERN_DEBUG "jffs2_find_nextblock(): new nextblock = 0x%08x\n", c->nextblock->offset)); |
| 265 | 269 | ||
| 266 | return 0; | 270 | return 0; |
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c index 0e78b00035e4..d9a721e6db70 100644 --- a/fs/jffs2/wbuf.c +++ b/fs/jffs2/wbuf.c | |||
| @@ -679,10 +679,7 @@ static int __jffs2_flush_wbuf(struct jffs2_sb_info *c, int pad) | |||
| 679 | 679 | ||
| 680 | memset(c->wbuf,0xff,c->wbuf_pagesize); | 680 | memset(c->wbuf,0xff,c->wbuf_pagesize); |
| 681 | /* adjust write buffer offset, else we get a non contiguous write bug */ | 681 | /* adjust write buffer offset, else we get a non contiguous write bug */ |
| 682 | if (SECTOR_ADDR(c->wbuf_ofs) == SECTOR_ADDR(c->wbuf_ofs+c->wbuf_pagesize)) | 682 | c->wbuf_ofs += c->wbuf_pagesize; |
| 683 | c->wbuf_ofs += c->wbuf_pagesize; | ||
| 684 | else | ||
| 685 | c->wbuf_ofs = 0xffffffff; | ||
| 686 | c->wbuf_len = 0; | 683 | c->wbuf_len = 0; |
| 687 | return 0; | 684 | return 0; |
| 688 | } | 685 | } |
diff --git a/fs/jfs/super.c b/fs/jfs/super.c index 3630718be395..0dae345e481b 100644 --- a/fs/jfs/super.c +++ b/fs/jfs/super.c | |||
| @@ -199,7 +199,7 @@ enum { | |||
| 199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask | 199 | Opt_usrquota, Opt_grpquota, Opt_uid, Opt_gid, Opt_umask |
| 200 | }; | 200 | }; |
| 201 | 201 | ||
| 202 | static match_table_t tokens = { | 202 | static const match_table_t tokens = { |
| 203 | {Opt_integrity, "integrity"}, | 203 | {Opt_integrity, "integrity"}, |
| 204 | {Opt_nointegrity, "nointegrity"}, | 204 | {Opt_nointegrity, "nointegrity"}, |
| 205 | {Opt_iocharset, "iocharset=%s"}, | 205 | {Opt_iocharset, "iocharset=%s"}, |
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile index 7725a0a9a555..97f6073ab339 100644 --- a/fs/lockd/Makefile +++ b/fs/lockd/Makefile | |||
| @@ -5,6 +5,6 @@ | |||
| 5 | obj-$(CONFIG_LOCKD) += lockd.o | 5 | obj-$(CONFIG_LOCKD) += lockd.o |
| 6 | 6 | ||
| 7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ | 7 | lockd-objs-y := clntlock.o clntproc.o host.o svc.o svclock.o svcshare.o \ |
| 8 | svcproc.o svcsubs.o mon.o xdr.o | 8 | svcproc.o svcsubs.o mon.o xdr.o grace.o |
| 9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o | 9 | lockd-objs-$(CONFIG_LOCKD_V4) += xdr4.o svc4proc.o |
| 10 | lockd-objs := $(lockd-objs-y) | 10 | lockd-objs := $(lockd-objs-y) |
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 0b45fd3a4bfd..8307dd64bf46 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c | |||
| @@ -54,14 +54,13 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) | |||
| 54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; | 54 | u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4; |
| 55 | int status; | 55 | int status; |
| 56 | 56 | ||
| 57 | status = lockd_up(nlm_init->protocol); | 57 | status = lockd_up(); |
| 58 | if (status < 0) | 58 | if (status < 0) |
| 59 | return ERR_PTR(status); | 59 | return ERR_PTR(status); |
| 60 | 60 | ||
| 61 | host = nlmclnt_lookup_host((struct sockaddr_in *)nlm_init->address, | 61 | host = nlmclnt_lookup_host(nlm_init->address, nlm_init->addrlen, |
| 62 | nlm_init->protocol, nlm_version, | 62 | nlm_init->protocol, nlm_version, |
| 63 | nlm_init->hostname, | 63 | nlm_init->hostname); |
| 64 | strlen(nlm_init->hostname)); | ||
| 65 | if (host == NULL) { | 64 | if (host == NULL) { |
| 66 | lockd_down(); | 65 | lockd_down(); |
| 67 | return ERR_PTR(-ENOLCK); | 66 | return ERR_PTR(-ENOLCK); |
| @@ -142,7 +141,7 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout) | |||
| 142 | /* | 141 | /* |
| 143 | * The server lockd has called us back to tell us the lock was granted | 142 | * The server lockd has called us back to tell us the lock was granted |
| 144 | */ | 143 | */ |
| 145 | __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock) | 144 | __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock) |
| 146 | { | 145 | { |
| 147 | const struct file_lock *fl = &lock->fl; | 146 | const struct file_lock *fl = &lock->fl; |
| 148 | const struct nfs_fh *fh = &lock->fh; | 147 | const struct nfs_fh *fh = &lock->fh; |
| @@ -166,7 +165,7 @@ __be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock | |||
| 166 | */ | 165 | */ |
| 167 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) | 166 | if (fl_blocked->fl_u.nfs_fl.owner->pid != lock->svid) |
| 168 | continue; | 167 | continue; |
| 169 | if (!nlm_cmp_addr(&block->b_host->h_addr, addr)) | 168 | if (!nlm_cmp_addr(nlm_addr(block->b_host), addr)) |
| 170 | continue; | 169 | continue; |
| 171 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) | 170 | if (nfs_compare_fh(NFS_FH(fl_blocked->fl_file->f_path.dentry->d_inode) ,fh) != 0) |
| 172 | continue; | 171 | continue; |
| @@ -216,7 +215,7 @@ reclaimer(void *ptr) | |||
| 216 | /* This one ensures that our parent doesn't terminate while the | 215 | /* This one ensures that our parent doesn't terminate while the |
| 217 | * reclaim is in progress */ | 216 | * reclaim is in progress */ |
| 218 | lock_kernel(); | 217 | lock_kernel(); |
| 219 | lockd_up(0); /* note: this cannot fail as lockd is already running */ | 218 | lockd_up(); /* note: this cannot fail as lockd is already running */ |
| 220 | 219 | ||
| 221 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); | 220 | dprintk("lockd: reclaiming locks for host %s\n", host->h_name); |
| 222 | 221 | ||
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c new file mode 100644 index 000000000000..183cc1f0af1c --- /dev/null +++ b/fs/lockd/grace.c | |||
| @@ -0,0 +1,59 @@ | |||
| 1 | /* | ||
| 2 | * Common code for control of lockd and nfsv4 grace periods. | ||
| 3 | */ | ||
| 4 | |||
| 5 | #include <linux/module.h> | ||
| 6 | #include <linux/lockd/bind.h> | ||
| 7 | |||
| 8 | static LIST_HEAD(grace_list); | ||
| 9 | static DEFINE_SPINLOCK(grace_lock); | ||
| 10 | |||
| 11 | /** | ||
| 12 | * locks_start_grace | ||
| 13 | * @lm: who this grace period is for | ||
| 14 | * | ||
| 15 | * A grace period is a period during which locks should not be given | ||
| 16 | * out. Currently grace periods are only enforced by the two lock | ||
| 17 | * managers (lockd and nfsd), using the locks_in_grace() function to | ||
| 18 | * check when they are in a grace period. | ||
| 19 | * | ||
| 20 | * This function is called to start a grace period. | ||
| 21 | */ | ||
| 22 | void locks_start_grace(struct lock_manager *lm) | ||
| 23 | { | ||
| 24 | spin_lock(&grace_lock); | ||
| 25 | list_add(&lm->list, &grace_list); | ||
| 26 | spin_unlock(&grace_lock); | ||
| 27 | } | ||
| 28 | EXPORT_SYMBOL_GPL(locks_start_grace); | ||
| 29 | |||
| 30 | /** | ||
| 31 | * locks_end_grace | ||
| 32 | * @lm: who this grace period is for | ||
| 33 | * | ||
| 34 | * Call this function to state that the given lock manager is ready to | ||
| 35 | * resume regular locking. The grace period will not end until all lock | ||
| 36 | * managers that called locks_start_grace() also call locks_end_grace(). | ||
| 37 | * Note that callers count on it being safe to call this more than once, | ||
| 38 | * and the second call should be a no-op. | ||
| 39 | */ | ||
| 40 | void locks_end_grace(struct lock_manager *lm) | ||
| 41 | { | ||
| 42 | spin_lock(&grace_lock); | ||
| 43 | list_del_init(&lm->list); | ||
| 44 | spin_unlock(&grace_lock); | ||
| 45 | } | ||
| 46 | EXPORT_SYMBOL_GPL(locks_end_grace); | ||
| 47 | |||
| 48 | /** | ||
| 49 | * locks_in_grace | ||
| 50 | * | ||
| 51 | * Lock managers call this function to determine when it is OK for them | ||
| 52 | * to answer ordinary lock requests, and when they should accept only | ||
| 53 | * lock reclaims. | ||
| 54 | */ | ||
| 55 | int locks_in_grace(void) | ||
| 56 | { | ||
| 57 | return !list_empty(&grace_list); | ||
| 58 | } | ||
| 59 | EXPORT_SYMBOL_GPL(locks_in_grace); | ||
diff --git a/fs/lockd/host.c b/fs/lockd/host.c index a17664c7eacc..9fd8889097b7 100644 --- a/fs/lockd/host.c +++ b/fs/lockd/host.c | |||
| @@ -11,16 +11,17 @@ | |||
| 11 | #include <linux/types.h> | 11 | #include <linux/types.h> |
| 12 | #include <linux/slab.h> | 12 | #include <linux/slab.h> |
| 13 | #include <linux/in.h> | 13 | #include <linux/in.h> |
| 14 | #include <linux/in6.h> | ||
| 14 | #include <linux/sunrpc/clnt.h> | 15 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sunrpc/svc.h> | 16 | #include <linux/sunrpc/svc.h> |
| 16 | #include <linux/lockd/lockd.h> | 17 | #include <linux/lockd/lockd.h> |
| 17 | #include <linux/lockd/sm_inter.h> | 18 | #include <linux/lockd/sm_inter.h> |
| 18 | #include <linux/mutex.h> | 19 | #include <linux/mutex.h> |
| 19 | 20 | ||
| 21 | #include <net/ipv6.h> | ||
| 20 | 22 | ||
| 21 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE | 23 | #define NLMDBG_FACILITY NLMDBG_HOSTCACHE |
| 22 | #define NLM_HOST_NRHASH 32 | 24 | #define NLM_HOST_NRHASH 32 |
| 23 | #define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1)) | ||
| 24 | #define NLM_HOST_REBIND (60 * HZ) | 25 | #define NLM_HOST_REBIND (60 * HZ) |
| 25 | #define NLM_HOST_EXPIRE (300 * HZ) | 26 | #define NLM_HOST_EXPIRE (300 * HZ) |
| 26 | #define NLM_HOST_COLLECT (120 * HZ) | 27 | #define NLM_HOST_COLLECT (120 * HZ) |
| @@ -30,42 +31,115 @@ static unsigned long next_gc; | |||
| 30 | static int nrhosts; | 31 | static int nrhosts; |
| 31 | static DEFINE_MUTEX(nlm_host_mutex); | 32 | static DEFINE_MUTEX(nlm_host_mutex); |
| 32 | 33 | ||
| 33 | |||
| 34 | static void nlm_gc_hosts(void); | 34 | static void nlm_gc_hosts(void); |
| 35 | static struct nsm_handle * __nsm_find(const struct sockaddr_in *, | 35 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
| 36 | const char *, unsigned int, int); | 36 | const size_t salen, |
| 37 | static struct nsm_handle * nsm_find(const struct sockaddr_in *sin, | 37 | const char *hostname, |
| 38 | const char *hostname, | 38 | const size_t hostname_len, |
| 39 | unsigned int hostname_len); | 39 | const int create); |
| 40 | |||
| 41 | struct nlm_lookup_host_info { | ||
| 42 | const int server; /* search for server|client */ | ||
| 43 | const struct sockaddr *sap; /* address to search for */ | ||
| 44 | const size_t salen; /* it's length */ | ||
| 45 | const unsigned short protocol; /* transport to search for*/ | ||
| 46 | const u32 version; /* NLM version to search for */ | ||
| 47 | const char *hostname; /* remote's hostname */ | ||
| 48 | const size_t hostname_len; /* it's length */ | ||
| 49 | const struct sockaddr *src_sap; /* our address (optional) */ | ||
| 50 | const size_t src_len; /* it's length */ | ||
| 51 | }; | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Hash function must work well on big- and little-endian platforms | ||
| 55 | */ | ||
| 56 | static unsigned int __nlm_hash32(const __be32 n) | ||
| 57 | { | ||
| 58 | unsigned int hash = (__force u32)n ^ ((__force u32)n >> 16); | ||
| 59 | return hash ^ (hash >> 8); | ||
| 60 | } | ||
| 61 | |||
| 62 | static unsigned int __nlm_hash_addr4(const struct sockaddr *sap) | ||
| 63 | { | ||
| 64 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
| 65 | return __nlm_hash32(sin->sin_addr.s_addr); | ||
| 66 | } | ||
| 67 | |||
| 68 | static unsigned int __nlm_hash_addr6(const struct sockaddr *sap) | ||
| 69 | { | ||
| 70 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
| 71 | const struct in6_addr addr = sin6->sin6_addr; | ||
| 72 | return __nlm_hash32(addr.s6_addr32[0]) ^ | ||
| 73 | __nlm_hash32(addr.s6_addr32[1]) ^ | ||
| 74 | __nlm_hash32(addr.s6_addr32[2]) ^ | ||
| 75 | __nlm_hash32(addr.s6_addr32[3]); | ||
| 76 | } | ||
| 77 | |||
| 78 | static unsigned int nlm_hash_address(const struct sockaddr *sap) | ||
| 79 | { | ||
| 80 | unsigned int hash; | ||
| 81 | |||
| 82 | switch (sap->sa_family) { | ||
| 83 | case AF_INET: | ||
| 84 | hash = __nlm_hash_addr4(sap); | ||
| 85 | break; | ||
| 86 | case AF_INET6: | ||
| 87 | hash = __nlm_hash_addr6(sap); | ||
| 88 | break; | ||
| 89 | default: | ||
| 90 | hash = 0; | ||
| 91 | } | ||
| 92 | return hash & (NLM_HOST_NRHASH - 1); | ||
| 93 | } | ||
| 94 | |||
| 95 | static void nlm_clear_port(struct sockaddr *sap) | ||
| 96 | { | ||
| 97 | switch (sap->sa_family) { | ||
| 98 | case AF_INET: | ||
| 99 | ((struct sockaddr_in *)sap)->sin_port = 0; | ||
| 100 | break; | ||
| 101 | case AF_INET6: | ||
| 102 | ((struct sockaddr_in6 *)sap)->sin6_port = 0; | ||
| 103 | break; | ||
| 104 | } | ||
| 105 | } | ||
| 106 | |||
| 107 | static void nlm_display_address(const struct sockaddr *sap, | ||
| 108 | char *buf, const size_t len) | ||
| 109 | { | ||
| 110 | const struct sockaddr_in *sin = (struct sockaddr_in *)sap; | ||
| 111 | const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap; | ||
| 112 | |||
| 113 | switch (sap->sa_family) { | ||
| 114 | case AF_UNSPEC: | ||
| 115 | snprintf(buf, len, "unspecified"); | ||
| 116 | break; | ||
| 117 | case AF_INET: | ||
| 118 | snprintf(buf, len, NIPQUAD_FMT, NIPQUAD(sin->sin_addr.s_addr)); | ||
| 119 | break; | ||
| 120 | case AF_INET6: | ||
| 121 | if (ipv6_addr_v4mapped(&sin6->sin6_addr)) | ||
| 122 | snprintf(buf, len, NIPQUAD_FMT, | ||
| 123 | NIPQUAD(sin6->sin6_addr.s6_addr32[3])); | ||
| 124 | else | ||
| 125 | snprintf(buf, len, NIP6_FMT, NIP6(sin6->sin6_addr)); | ||
| 126 | break; | ||
| 127 | default: | ||
| 128 | snprintf(buf, len, "unsupported address family"); | ||
| 129 | break; | ||
| 130 | } | ||
| 131 | } | ||
| 40 | 132 | ||
| 41 | /* | 133 | /* |
| 42 | * Common host lookup routine for server & client | 134 | * Common host lookup routine for server & client |
| 43 | */ | 135 | */ |
| 44 | static struct nlm_host *nlm_lookup_host(int server, | 136 | static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni) |
| 45 | const struct sockaddr_in *sin, | ||
| 46 | int proto, u32 version, | ||
| 47 | const char *hostname, | ||
| 48 | unsigned int hostname_len, | ||
| 49 | const struct sockaddr_in *ssin) | ||
| 50 | { | 137 | { |
| 51 | struct hlist_head *chain; | 138 | struct hlist_head *chain; |
| 52 | struct hlist_node *pos; | 139 | struct hlist_node *pos; |
| 53 | struct nlm_host *host; | 140 | struct nlm_host *host; |
| 54 | struct nsm_handle *nsm = NULL; | 141 | struct nsm_handle *nsm = NULL; |
| 55 | int hash; | ||
| 56 | |||
| 57 | dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT | ||
| 58 | ", p=%d, v=%u, my role=%s, name=%.*s)\n", | ||
| 59 | NIPQUAD(ssin->sin_addr.s_addr), | ||
| 60 | NIPQUAD(sin->sin_addr.s_addr), proto, version, | ||
| 61 | server? "server" : "client", | ||
| 62 | hostname_len, | ||
| 63 | hostname? hostname : "<none>"); | ||
| 64 | 142 | ||
| 65 | |||
| 66 | hash = NLM_ADDRHASH(sin->sin_addr.s_addr); | ||
| 67 | |||
| 68 | /* Lock hash table */ | ||
| 69 | mutex_lock(&nlm_host_mutex); | 143 | mutex_lock(&nlm_host_mutex); |
| 70 | 144 | ||
| 71 | if (time_after_eq(jiffies, next_gc)) | 145 | if (time_after_eq(jiffies, next_gc)) |
| @@ -78,22 +152,22 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
| 78 | * different NLM rpc_clients into one single nlm_host object. | 152 | * different NLM rpc_clients into one single nlm_host object. |
| 79 | * This would allow us to have one nlm_host per address. | 153 | * This would allow us to have one nlm_host per address. |
| 80 | */ | 154 | */ |
| 81 | chain = &nlm_hosts[hash]; | 155 | chain = &nlm_hosts[nlm_hash_address(ni->sap)]; |
| 82 | hlist_for_each_entry(host, pos, chain, h_hash) { | 156 | hlist_for_each_entry(host, pos, chain, h_hash) { |
| 83 | if (!nlm_cmp_addr(&host->h_addr, sin)) | 157 | if (!nlm_cmp_addr(nlm_addr(host), ni->sap)) |
| 84 | continue; | 158 | continue; |
| 85 | 159 | ||
| 86 | /* See if we have an NSM handle for this client */ | 160 | /* See if we have an NSM handle for this client */ |
| 87 | if (!nsm) | 161 | if (!nsm) |
| 88 | nsm = host->h_nsmhandle; | 162 | nsm = host->h_nsmhandle; |
| 89 | 163 | ||
| 90 | if (host->h_proto != proto) | 164 | if (host->h_proto != ni->protocol) |
| 91 | continue; | 165 | continue; |
| 92 | if (host->h_version != version) | 166 | if (host->h_version != ni->version) |
| 93 | continue; | 167 | continue; |
| 94 | if (host->h_server != server) | 168 | if (host->h_server != ni->server) |
| 95 | continue; | 169 | continue; |
| 96 | if (!nlm_cmp_addr(&host->h_saddr, ssin)) | 170 | if (!nlm_cmp_addr(nlm_srcaddr(host), ni->src_sap)) |
| 97 | continue; | 171 | continue; |
| 98 | 172 | ||
| 99 | /* Move to head of hash chain. */ | 173 | /* Move to head of hash chain. */ |
| @@ -101,30 +175,41 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
| 101 | hlist_add_head(&host->h_hash, chain); | 175 | hlist_add_head(&host->h_hash, chain); |
| 102 | 176 | ||
| 103 | nlm_get_host(host); | 177 | nlm_get_host(host); |
| 178 | dprintk("lockd: nlm_lookup_host found host %s (%s)\n", | ||
| 179 | host->h_name, host->h_addrbuf); | ||
| 104 | goto out; | 180 | goto out; |
| 105 | } | 181 | } |
| 106 | if (nsm) | ||
| 107 | atomic_inc(&nsm->sm_count); | ||
| 108 | |||
| 109 | host = NULL; | ||
| 110 | 182 | ||
| 111 | /* Sadly, the host isn't in our hash table yet. See if | 183 | /* |
| 112 | * we have an NSM handle for it. If not, create one. | 184 | * The host wasn't in our hash table. If we don't |
| 185 | * have an NSM handle for it yet, create one. | ||
| 113 | */ | 186 | */ |
| 114 | if (!nsm && !(nsm = nsm_find(sin, hostname, hostname_len))) | 187 | if (nsm) |
| 115 | goto out; | 188 | atomic_inc(&nsm->sm_count); |
| 189 | else { | ||
| 190 | host = NULL; | ||
| 191 | nsm = nsm_find(ni->sap, ni->salen, | ||
| 192 | ni->hostname, ni->hostname_len, 1); | ||
| 193 | if (!nsm) { | ||
| 194 | dprintk("lockd: nlm_lookup_host failed; " | ||
| 195 | "no nsm handle\n"); | ||
| 196 | goto out; | ||
| 197 | } | ||
| 198 | } | ||
| 116 | 199 | ||
| 117 | host = kzalloc(sizeof(*host), GFP_KERNEL); | 200 | host = kzalloc(sizeof(*host), GFP_KERNEL); |
| 118 | if (!host) { | 201 | if (!host) { |
| 119 | nsm_release(nsm); | 202 | nsm_release(nsm); |
| 203 | dprintk("lockd: nlm_lookup_host failed; no memory\n"); | ||
| 120 | goto out; | 204 | goto out; |
| 121 | } | 205 | } |
| 122 | host->h_name = nsm->sm_name; | 206 | host->h_name = nsm->sm_name; |
| 123 | host->h_addr = *sin; | 207 | memcpy(nlm_addr(host), ni->sap, ni->salen); |
| 124 | host->h_addr.sin_port = 0; /* ouch! */ | 208 | host->h_addrlen = ni->salen; |
| 125 | host->h_saddr = *ssin; | 209 | nlm_clear_port(nlm_addr(host)); |
| 126 | host->h_version = version; | 210 | memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len); |
| 127 | host->h_proto = proto; | 211 | host->h_version = ni->version; |
| 212 | host->h_proto = ni->protocol; | ||
| 128 | host->h_rpcclnt = NULL; | 213 | host->h_rpcclnt = NULL; |
| 129 | mutex_init(&host->h_mutex); | 214 | mutex_init(&host->h_mutex); |
| 130 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 215 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
| @@ -135,7 +220,7 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
| 135 | host->h_state = 0; /* pseudo NSM state */ | 220 | host->h_state = 0; /* pseudo NSM state */ |
| 136 | host->h_nsmstate = 0; /* real NSM state */ | 221 | host->h_nsmstate = 0; /* real NSM state */ |
| 137 | host->h_nsmhandle = nsm; | 222 | host->h_nsmhandle = nsm; |
| 138 | host->h_server = server; | 223 | host->h_server = ni->server; |
| 139 | hlist_add_head(&host->h_hash, chain); | 224 | hlist_add_head(&host->h_hash, chain); |
| 140 | INIT_LIST_HEAD(&host->h_lockowners); | 225 | INIT_LIST_HEAD(&host->h_lockowners); |
| 141 | spin_lock_init(&host->h_lock); | 226 | spin_lock_init(&host->h_lock); |
| @@ -143,6 +228,15 @@ static struct nlm_host *nlm_lookup_host(int server, | |||
| 143 | INIT_LIST_HEAD(&host->h_reclaim); | 228 | INIT_LIST_HEAD(&host->h_reclaim); |
| 144 | 229 | ||
| 145 | nrhosts++; | 230 | nrhosts++; |
| 231 | |||
| 232 | nlm_display_address((struct sockaddr *)&host->h_addr, | ||
| 233 | host->h_addrbuf, sizeof(host->h_addrbuf)); | ||
| 234 | nlm_display_address((struct sockaddr *)&host->h_srcaddr, | ||
| 235 | host->h_srcaddrbuf, sizeof(host->h_srcaddrbuf)); | ||
| 236 | |||
| 237 | dprintk("lockd: nlm_lookup_host created host %s\n", | ||
| 238 | host->h_name); | ||
| 239 | |||
| 146 | out: | 240 | out: |
| 147 | mutex_unlock(&nlm_host_mutex); | 241 | mutex_unlock(&nlm_host_mutex); |
| 148 | return host; | 242 | return host; |
| @@ -170,33 +264,103 @@ nlm_destroy_host(struct nlm_host *host) | |||
| 170 | kfree(host); | 264 | kfree(host); |
| 171 | } | 265 | } |
| 172 | 266 | ||
| 173 | /* | 267 | /** |
| 174 | * Find an NLM server handle in the cache. If there is none, create it. | 268 | * nlmclnt_lookup_host - Find an NLM host handle matching a remote server |
| 269 | * @sap: network address of server | ||
| 270 | * @salen: length of server address | ||
| 271 | * @protocol: transport protocol to use | ||
| 272 | * @version: NLM protocol version | ||
| 273 | * @hostname: '\0'-terminated hostname of server | ||
| 274 | * | ||
| 275 | * Returns an nlm_host structure that matches the passed-in | ||
| 276 | * [server address, transport protocol, NLM version, server hostname]. | ||
| 277 | * If one doesn't already exist in the host cache, a new handle is | ||
| 278 | * created and returned. | ||
| 175 | */ | 279 | */ |
| 176 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin, | 280 | struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap, |
| 177 | int proto, u32 version, | 281 | const size_t salen, |
| 178 | const char *hostname, | 282 | const unsigned short protocol, |
| 179 | unsigned int hostname_len) | 283 | const u32 version, const char *hostname) |
| 180 | { | 284 | { |
| 181 | struct sockaddr_in ssin = {0}; | 285 | const struct sockaddr source = { |
| 182 | 286 | .sa_family = AF_UNSPEC, | |
| 183 | return nlm_lookup_host(0, sin, proto, version, | 287 | }; |
| 184 | hostname, hostname_len, &ssin); | 288 | struct nlm_lookup_host_info ni = { |
| 289 | .server = 0, | ||
| 290 | .sap = sap, | ||
| 291 | .salen = salen, | ||
| 292 | .protocol = protocol, | ||
| 293 | .version = version, | ||
| 294 | .hostname = hostname, | ||
| 295 | .hostname_len = strlen(hostname), | ||
| 296 | .src_sap = &source, | ||
| 297 | .src_len = sizeof(source), | ||
| 298 | }; | ||
| 299 | |||
| 300 | dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__, | ||
| 301 | (hostname ? hostname : "<none>"), version, | ||
| 302 | (protocol == IPPROTO_UDP ? "udp" : "tcp")); | ||
| 303 | |||
| 304 | return nlm_lookup_host(&ni); | ||
| 185 | } | 305 | } |
| 186 | 306 | ||
| 187 | /* | 307 | /** |
| 188 | * Find an NLM client handle in the cache. If there is none, create it. | 308 | * nlmsvc_lookup_host - Find an NLM host handle matching a remote client |
| 309 | * @rqstp: incoming NLM request | ||
| 310 | * @hostname: name of client host | ||
| 311 | * @hostname_len: length of client hostname | ||
| 312 | * | ||
| 313 | * Returns an nlm_host structure that matches the [client address, | ||
| 314 | * transport protocol, NLM version, client hostname] of the passed-in | ||
| 315 | * NLM request. If one doesn't already exist in the host cache, a | ||
| 316 | * new handle is created and returned. | ||
| 317 | * | ||
| 318 | * Before possibly creating a new nlm_host, construct a sockaddr | ||
| 319 | * for a specific source address in case the local system has | ||
| 320 | * multiple network addresses. The family of the address in | ||
| 321 | * rq_daddr is guaranteed to be the same as the family of the | ||
| 322 | * address in rq_addr, so it's safe to use the same family for | ||
| 323 | * the source address. | ||
| 189 | */ | 324 | */ |
| 190 | struct nlm_host * | 325 | struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp, |
| 191 | nlmsvc_lookup_host(struct svc_rqst *rqstp, | 326 | const char *hostname, |
| 192 | const char *hostname, unsigned int hostname_len) | 327 | const size_t hostname_len) |
| 193 | { | 328 | { |
| 194 | struct sockaddr_in ssin = {0}; | 329 | struct sockaddr_in sin = { |
| 330 | .sin_family = AF_INET, | ||
| 331 | }; | ||
| 332 | struct sockaddr_in6 sin6 = { | ||
| 333 | .sin6_family = AF_INET6, | ||
| 334 | }; | ||
| 335 | struct nlm_lookup_host_info ni = { | ||
| 336 | .server = 1, | ||
| 337 | .sap = svc_addr(rqstp), | ||
| 338 | .salen = rqstp->rq_addrlen, | ||
| 339 | .protocol = rqstp->rq_prot, | ||
| 340 | .version = rqstp->rq_vers, | ||
| 341 | .hostname = hostname, | ||
| 342 | .hostname_len = hostname_len, | ||
| 343 | .src_len = rqstp->rq_addrlen, | ||
| 344 | }; | ||
| 345 | |||
| 346 | dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__, | ||
| 347 | (int)hostname_len, hostname, rqstp->rq_vers, | ||
| 348 | (rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp")); | ||
| 349 | |||
| 350 | switch (ni.sap->sa_family) { | ||
| 351 | case AF_INET: | ||
| 352 | sin.sin_addr.s_addr = rqstp->rq_daddr.addr.s_addr; | ||
| 353 | ni.src_sap = (struct sockaddr *)&sin; | ||
| 354 | break; | ||
| 355 | case AF_INET6: | ||
| 356 | ipv6_addr_copy(&sin6.sin6_addr, &rqstp->rq_daddr.addr6); | ||
| 357 | ni.src_sap = (struct sockaddr *)&sin6; | ||
| 358 | break; | ||
| 359 | default: | ||
| 360 | return NULL; | ||
| 361 | } | ||
| 195 | 362 | ||
| 196 | ssin.sin_addr = rqstp->rq_daddr.addr; | 363 | return nlm_lookup_host(&ni); |
| 197 | return nlm_lookup_host(1, svc_addr_in(rqstp), | ||
| 198 | rqstp->rq_prot, rqstp->rq_vers, | ||
| 199 | hostname, hostname_len, &ssin); | ||
| 200 | } | 364 | } |
| 201 | 365 | ||
| 202 | /* | 366 | /* |
| @@ -207,9 +371,8 @@ nlm_bind_host(struct nlm_host *host) | |||
| 207 | { | 371 | { |
| 208 | struct rpc_clnt *clnt; | 372 | struct rpc_clnt *clnt; |
| 209 | 373 | ||
| 210 | dprintk("lockd: nlm_bind_host("NIPQUAD_FMT"->"NIPQUAD_FMT")\n", | 374 | dprintk("lockd: nlm_bind_host %s (%s), my addr=%s\n", |
| 211 | NIPQUAD(host->h_saddr.sin_addr), | 375 | host->h_name, host->h_addrbuf, host->h_srcaddrbuf); |
| 212 | NIPQUAD(host->h_addr.sin_addr)); | ||
| 213 | 376 | ||
| 214 | /* Lock host handle */ | 377 | /* Lock host handle */ |
| 215 | mutex_lock(&host->h_mutex); | 378 | mutex_lock(&host->h_mutex); |
| @@ -221,7 +384,7 @@ nlm_bind_host(struct nlm_host *host) | |||
| 221 | if (time_after_eq(jiffies, host->h_nextrebind)) { | 384 | if (time_after_eq(jiffies, host->h_nextrebind)) { |
| 222 | rpc_force_rebind(clnt); | 385 | rpc_force_rebind(clnt); |
| 223 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; | 386 | host->h_nextrebind = jiffies + NLM_HOST_REBIND; |
| 224 | dprintk("lockd: next rebind in %ld jiffies\n", | 387 | dprintk("lockd: next rebind in %lu jiffies\n", |
| 225 | host->h_nextrebind - jiffies); | 388 | host->h_nextrebind - jiffies); |
| 226 | } | 389 | } |
| 227 | } else { | 390 | } else { |
| @@ -234,9 +397,9 @@ nlm_bind_host(struct nlm_host *host) | |||
| 234 | }; | 397 | }; |
| 235 | struct rpc_create_args args = { | 398 | struct rpc_create_args args = { |
| 236 | .protocol = host->h_proto, | 399 | .protocol = host->h_proto, |
| 237 | .address = (struct sockaddr *)&host->h_addr, | 400 | .address = nlm_addr(host), |
| 238 | .addrsize = sizeof(host->h_addr), | 401 | .addrsize = host->h_addrlen, |
| 239 | .saddress = (struct sockaddr *)&host->h_saddr, | 402 | .saddress = nlm_srcaddr(host), |
| 240 | .timeout = &timeparms, | 403 | .timeout = &timeparms, |
| 241 | .servername = host->h_name, | 404 | .servername = host->h_name, |
| 242 | .program = &nlm_program, | 405 | .program = &nlm_program, |
| @@ -324,12 +487,16 @@ void nlm_host_rebooted(const struct sockaddr_in *sin, | |||
| 324 | struct nsm_handle *nsm; | 487 | struct nsm_handle *nsm; |
| 325 | struct nlm_host *host; | 488 | struct nlm_host *host; |
| 326 | 489 | ||
| 327 | dprintk("lockd: nlm_host_rebooted(%s, %u.%u.%u.%u)\n", | 490 | nsm = nsm_find((struct sockaddr *)sin, sizeof(*sin), |
| 328 | hostname, NIPQUAD(sin->sin_addr)); | 491 | hostname, hostname_len, 0); |
| 329 | 492 | if (nsm == NULL) { | |
| 330 | /* Find the NSM handle for this peer */ | 493 | dprintk("lockd: never saw rebooted peer '%.*s' before\n", |
| 331 | if (!(nsm = __nsm_find(sin, hostname, hostname_len, 0))) | 494 | hostname_len, hostname); |
| 332 | return; | 495 | return; |
| 496 | } | ||
| 497 | |||
| 498 | dprintk("lockd: nlm_host_rebooted(%.*s, %s)\n", | ||
| 499 | hostname_len, hostname, nsm->sm_addrbuf); | ||
| 333 | 500 | ||
| 334 | /* When reclaiming locks on this peer, make sure that | 501 | /* When reclaiming locks on this peer, make sure that |
| 335 | * we set up a new notification */ | 502 | * we set up a new notification */ |
| @@ -461,22 +628,23 @@ nlm_gc_hosts(void) | |||
| 461 | static LIST_HEAD(nsm_handles); | 628 | static LIST_HEAD(nsm_handles); |
| 462 | static DEFINE_SPINLOCK(nsm_lock); | 629 | static DEFINE_SPINLOCK(nsm_lock); |
| 463 | 630 | ||
| 464 | static struct nsm_handle * | 631 | static struct nsm_handle *nsm_find(const struct sockaddr *sap, |
| 465 | __nsm_find(const struct sockaddr_in *sin, | 632 | const size_t salen, |
| 466 | const char *hostname, unsigned int hostname_len, | 633 | const char *hostname, |
| 467 | int create) | 634 | const size_t hostname_len, |
| 635 | const int create) | ||
| 468 | { | 636 | { |
| 469 | struct nsm_handle *nsm = NULL; | 637 | struct nsm_handle *nsm = NULL; |
| 470 | struct nsm_handle *pos; | 638 | struct nsm_handle *pos; |
| 471 | 639 | ||
| 472 | if (!sin) | 640 | if (!sap) |
| 473 | return NULL; | 641 | return NULL; |
| 474 | 642 | ||
| 475 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { | 643 | if (hostname && memchr(hostname, '/', hostname_len) != NULL) { |
| 476 | if (printk_ratelimit()) { | 644 | if (printk_ratelimit()) { |
| 477 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " | 645 | printk(KERN_WARNING "Invalid hostname \"%.*s\" " |
| 478 | "in NFS lock request\n", | 646 | "in NFS lock request\n", |
| 479 | hostname_len, hostname); | 647 | (int)hostname_len, hostname); |
| 480 | } | 648 | } |
| 481 | return NULL; | 649 | return NULL; |
| 482 | } | 650 | } |
| @@ -489,7 +657,7 @@ retry: | |||
| 489 | if (strlen(pos->sm_name) != hostname_len | 657 | if (strlen(pos->sm_name) != hostname_len |
| 490 | || memcmp(pos->sm_name, hostname, hostname_len)) | 658 | || memcmp(pos->sm_name, hostname, hostname_len)) |
| 491 | continue; | 659 | continue; |
| 492 | } else if (!nlm_cmp_addr(&pos->sm_addr, sin)) | 660 | } else if (!nlm_cmp_addr(nsm_addr(pos), sap)) |
| 493 | continue; | 661 | continue; |
| 494 | atomic_inc(&pos->sm_count); | 662 | atomic_inc(&pos->sm_count); |
| 495 | kfree(nsm); | 663 | kfree(nsm); |
| @@ -509,10 +677,13 @@ retry: | |||
| 509 | if (nsm == NULL) | 677 | if (nsm == NULL) |
| 510 | return NULL; | 678 | return NULL; |
| 511 | 679 | ||
| 512 | nsm->sm_addr = *sin; | 680 | memcpy(nsm_addr(nsm), sap, salen); |
| 681 | nsm->sm_addrlen = salen; | ||
| 513 | nsm->sm_name = (char *) (nsm + 1); | 682 | nsm->sm_name = (char *) (nsm + 1); |
| 514 | memcpy(nsm->sm_name, hostname, hostname_len); | 683 | memcpy(nsm->sm_name, hostname, hostname_len); |
| 515 | nsm->sm_name[hostname_len] = '\0'; | 684 | nsm->sm_name[hostname_len] = '\0'; |
| 685 | nlm_display_address((struct sockaddr *)&nsm->sm_addr, | ||
| 686 | nsm->sm_addrbuf, sizeof(nsm->sm_addrbuf)); | ||
| 516 | atomic_set(&nsm->sm_count, 1); | 687 | atomic_set(&nsm->sm_count, 1); |
| 517 | goto retry; | 688 | goto retry; |
| 518 | 689 | ||
| @@ -521,13 +692,6 @@ found: | |||
| 521 | return nsm; | 692 | return nsm; |
| 522 | } | 693 | } |
| 523 | 694 | ||
| 524 | static struct nsm_handle * | ||
| 525 | nsm_find(const struct sockaddr_in *sin, const char *hostname, | ||
| 526 | unsigned int hostname_len) | ||
| 527 | { | ||
| 528 | return __nsm_find(sin, hostname, hostname_len, 1); | ||
| 529 | } | ||
| 530 | |||
| 531 | /* | 695 | /* |
| 532 | * Release an NSM handle | 696 | * Release an NSM handle |
| 533 | */ | 697 | */ |
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c index e4d563543b11..4e7e958e8f67 100644 --- a/fs/lockd/mon.c +++ b/fs/lockd/mon.c | |||
| @@ -51,7 +51,7 @@ nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res) | |||
| 51 | 51 | ||
| 52 | memset(&args, 0, sizeof(args)); | 52 | memset(&args, 0, sizeof(args)); |
| 53 | args.mon_name = nsm->sm_name; | 53 | args.mon_name = nsm->sm_name; |
| 54 | args.addr = nsm->sm_addr.sin_addr.s_addr; | 54 | args.addr = nsm_addr_in(nsm)->sin_addr.s_addr; |
| 55 | args.prog = NLM_PROGRAM; | 55 | args.prog = NLM_PROGRAM; |
| 56 | args.vers = 3; | 56 | args.vers = 3; |
| 57 | args.proc = NLMPROC_NSM_NOTIFY; | 57 | args.proc = NLMPROC_NSM_NOTIFY; |
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 5bd9bf0fa9df..c631a83931ce 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c | |||
| @@ -51,7 +51,6 @@ static DEFINE_MUTEX(nlmsvc_mutex); | |||
| 51 | static unsigned int nlmsvc_users; | 51 | static unsigned int nlmsvc_users; |
| 52 | static struct task_struct *nlmsvc_task; | 52 | static struct task_struct *nlmsvc_task; |
| 53 | static struct svc_rqst *nlmsvc_rqst; | 53 | static struct svc_rqst *nlmsvc_rqst; |
| 54 | int nlmsvc_grace_period; | ||
| 55 | unsigned long nlmsvc_timeout; | 54 | unsigned long nlmsvc_timeout; |
| 56 | 55 | ||
| 57 | /* | 56 | /* |
| @@ -85,27 +84,23 @@ static unsigned long get_lockd_grace_period(void) | |||
| 85 | return nlm_timeout * 5 * HZ; | 84 | return nlm_timeout * 5 * HZ; |
| 86 | } | 85 | } |
| 87 | 86 | ||
| 88 | unsigned long get_nfs_grace_period(void) | 87 | static struct lock_manager lockd_manager = { |
| 89 | { | 88 | }; |
| 90 | unsigned long lockdgrace = get_lockd_grace_period(); | ||
| 91 | unsigned long nfsdgrace = 0; | ||
| 92 | |||
| 93 | if (nlmsvc_ops) | ||
| 94 | nfsdgrace = nlmsvc_ops->get_grace_period(); | ||
| 95 | |||
| 96 | return max(lockdgrace, nfsdgrace); | ||
| 97 | } | ||
| 98 | EXPORT_SYMBOL(get_nfs_grace_period); | ||
| 99 | 89 | ||
| 100 | static unsigned long set_grace_period(void) | 90 | static void grace_ender(struct work_struct *not_used) |
| 101 | { | 91 | { |
| 102 | nlmsvc_grace_period = 1; | 92 | locks_end_grace(&lockd_manager); |
| 103 | return get_nfs_grace_period() + jiffies; | ||
| 104 | } | 93 | } |
| 105 | 94 | ||
| 106 | static inline void clear_grace_period(void) | 95 | static DECLARE_DELAYED_WORK(grace_period_end, grace_ender); |
| 96 | |||
| 97 | static void set_grace_period(void) | ||
| 107 | { | 98 | { |
| 108 | nlmsvc_grace_period = 0; | 99 | unsigned long grace_period = get_lockd_grace_period(); |
| 100 | |||
| 101 | locks_start_grace(&lockd_manager); | ||
| 102 | cancel_delayed_work_sync(&grace_period_end); | ||
| 103 | schedule_delayed_work(&grace_period_end, grace_period); | ||
| 109 | } | 104 | } |
| 110 | 105 | ||
| 111 | /* | 106 | /* |
| @@ -116,7 +111,6 @@ lockd(void *vrqstp) | |||
| 116 | { | 111 | { |
| 117 | int err = 0, preverr = 0; | 112 | int err = 0, preverr = 0; |
| 118 | struct svc_rqst *rqstp = vrqstp; | 113 | struct svc_rqst *rqstp = vrqstp; |
| 119 | unsigned long grace_period_expire; | ||
| 120 | 114 | ||
| 121 | /* try_to_freeze() is called from svc_recv() */ | 115 | /* try_to_freeze() is called from svc_recv() */ |
| 122 | set_freezable(); | 116 | set_freezable(); |
| @@ -139,7 +133,7 @@ lockd(void *vrqstp) | |||
| 139 | nlm_timeout = LOCKD_DFLT_TIMEO; | 133 | nlm_timeout = LOCKD_DFLT_TIMEO; |
| 140 | nlmsvc_timeout = nlm_timeout * HZ; | 134 | nlmsvc_timeout = nlm_timeout * HZ; |
| 141 | 135 | ||
| 142 | grace_period_expire = set_grace_period(); | 136 | set_grace_period(); |
| 143 | 137 | ||
| 144 | /* | 138 | /* |
| 145 | * The main request loop. We don't terminate until the last | 139 | * The main request loop. We don't terminate until the last |
| @@ -153,21 +147,12 @@ lockd(void *vrqstp) | |||
| 153 | flush_signals(current); | 147 | flush_signals(current); |
| 154 | if (nlmsvc_ops) { | 148 | if (nlmsvc_ops) { |
| 155 | nlmsvc_invalidate_all(); | 149 | nlmsvc_invalidate_all(); |
| 156 | grace_period_expire = set_grace_period(); | 150 | set_grace_period(); |
| 157 | } | 151 | } |
| 158 | continue; | 152 | continue; |
| 159 | } | 153 | } |
| 160 | 154 | ||
| 161 | /* | 155 | timeout = nlmsvc_retry_blocked(); |
| 162 | * Retry any blocked locks that have been notified by | ||
| 163 | * the VFS. Don't do this during grace period. | ||
| 164 | * (Theoretically, there shouldn't even be blocked locks | ||
| 165 | * during grace period). | ||
| 166 | */ | ||
| 167 | if (!nlmsvc_grace_period) { | ||
| 168 | timeout = nlmsvc_retry_blocked(); | ||
| 169 | } else if (time_before(grace_period_expire, jiffies)) | ||
| 170 | clear_grace_period(); | ||
| 171 | 156 | ||
| 172 | /* | 157 | /* |
| 173 | * Find a socket with data available and call its | 158 | * Find a socket with data available and call its |
| @@ -195,6 +180,7 @@ lockd(void *vrqstp) | |||
| 195 | svc_process(rqstp); | 180 | svc_process(rqstp); |
| 196 | } | 181 | } |
| 197 | flush_signals(current); | 182 | flush_signals(current); |
| 183 | cancel_delayed_work_sync(&grace_period_end); | ||
| 198 | if (nlmsvc_ops) | 184 | if (nlmsvc_ops) |
| 199 | nlmsvc_invalidate_all(); | 185 | nlmsvc_invalidate_all(); |
| 200 | nlm_shutdown_hosts(); | 186 | nlm_shutdown_hosts(); |
| @@ -203,25 +189,28 @@ lockd(void *vrqstp) | |||
| 203 | } | 189 | } |
| 204 | 190 | ||
| 205 | /* | 191 | /* |
| 206 | * Make any sockets that are needed but not present. | 192 | * Ensure there are active UDP and TCP listeners for lockd. |
| 207 | * If nlm_udpport or nlm_tcpport were set as module | 193 | * |
| 208 | * options, make those sockets unconditionally | 194 | * Even if we have only TCP NFS mounts and/or TCP NFSDs, some |
| 195 | * local services (such as rpc.statd) still require UDP, and | ||
| 196 | * some NFS servers do not yet support NLM over TCP. | ||
| 197 | * | ||
| 198 | * Returns zero if all listeners are available; otherwise a | ||
| 199 | * negative errno value is returned. | ||
| 209 | */ | 200 | */ |
| 210 | static int make_socks(struct svc_serv *serv, int proto) | 201 | static int make_socks(struct svc_serv *serv) |
| 211 | { | 202 | { |
| 212 | static int warned; | 203 | static int warned; |
| 213 | struct svc_xprt *xprt; | 204 | struct svc_xprt *xprt; |
| 214 | int err = 0; | 205 | int err = 0; |
| 215 | 206 | ||
| 216 | if (proto == IPPROTO_UDP || nlm_udpport) { | 207 | xprt = svc_find_xprt(serv, "udp", 0, 0); |
| 217 | xprt = svc_find_xprt(serv, "udp", 0, 0); | 208 | if (!xprt) |
| 218 | if (!xprt) | 209 | err = svc_create_xprt(serv, "udp", nlm_udpport, |
| 219 | err = svc_create_xprt(serv, "udp", nlm_udpport, | 210 | SVC_SOCK_DEFAULTS); |
| 220 | SVC_SOCK_DEFAULTS); | 211 | else |
| 221 | else | 212 | svc_xprt_put(xprt); |
| 222 | svc_xprt_put(xprt); | 213 | if (err >= 0) { |
| 223 | } | ||
| 224 | if (err >= 0 && (proto == IPPROTO_TCP || nlm_tcpport)) { | ||
| 225 | xprt = svc_find_xprt(serv, "tcp", 0, 0); | 214 | xprt = svc_find_xprt(serv, "tcp", 0, 0); |
| 226 | if (!xprt) | 215 | if (!xprt) |
| 227 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, | 216 | err = svc_create_xprt(serv, "tcp", nlm_tcpport, |
| @@ -241,8 +230,7 @@ static int make_socks(struct svc_serv *serv, int proto) | |||
| 241 | /* | 230 | /* |
| 242 | * Bring up the lockd process if it's not already up. | 231 | * Bring up the lockd process if it's not already up. |
| 243 | */ | 232 | */ |
| 244 | int | 233 | int lockd_up(void) |
| 245 | lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | ||
| 246 | { | 234 | { |
| 247 | struct svc_serv *serv; | 235 | struct svc_serv *serv; |
| 248 | int error = 0; | 236 | int error = 0; |
| @@ -251,11 +239,8 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
| 251 | /* | 239 | /* |
| 252 | * Check whether we're already up and running. | 240 | * Check whether we're already up and running. |
| 253 | */ | 241 | */ |
| 254 | if (nlmsvc_rqst) { | 242 | if (nlmsvc_rqst) |
| 255 | if (proto) | ||
| 256 | error = make_socks(nlmsvc_rqst->rq_server, proto); | ||
| 257 | goto out; | 243 | goto out; |
| 258 | } | ||
| 259 | 244 | ||
| 260 | /* | 245 | /* |
| 261 | * Sanity check: if there's no pid, | 246 | * Sanity check: if there's no pid, |
| @@ -266,13 +251,14 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */ | |||
| 266 | "lockd_up: no pid, %d users??\n", nlmsvc_users); | 251 | "lockd_up: no pid, %d users??\n", nlmsvc_users); |
| 267 | 252 | ||
| 268 | error = -ENOMEM; | 253 | error = -ENOMEM; |
| 269 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL); | 254 | serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, AF_INET, NULL); |
| 270 | if (!serv) { | 255 | if (!serv) { |
| 271 | printk(KERN_WARNING "lockd_up: create service failed\n"); | 256 | printk(KERN_WARNING "lockd_up: create service failed\n"); |
| 272 | goto out; | 257 | goto out; |
| 273 | } | 258 | } |
| 274 | 259 | ||
| 275 | if ((error = make_socks(serv, proto)) < 0) | 260 | error = make_socks(serv); |
| 261 | if (error < 0) | ||
| 276 | goto destroy_and_out; | 262 | goto destroy_and_out; |
| 277 | 263 | ||
| 278 | /* | 264 | /* |
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c index 4a714f64515b..014f6ce48172 100644 --- a/fs/lockd/svc4proc.c +++ b/fs/lockd/svc4proc.c | |||
| @@ -88,12 +88,6 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 88 | dprintk("lockd: TEST4 called\n"); | 88 | dprintk("lockd: TEST4 called\n"); |
| 89 | resp->cookie = argp->cookie; | 89 | resp->cookie = argp->cookie; |
| 90 | 90 | ||
| 91 | /* Don't accept test requests during grace period */ | ||
| 92 | if (nlmsvc_grace_period) { | ||
| 93 | resp->status = nlm_lck_denied_grace_period; | ||
| 94 | return rc; | ||
| 95 | } | ||
| 96 | |||
| 97 | /* Obtain client and file */ | 91 | /* Obtain client and file */ |
| 98 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 92 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
| 99 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 93 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
| @@ -122,12 +116,6 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 122 | 116 | ||
| 123 | resp->cookie = argp->cookie; | 117 | resp->cookie = argp->cookie; |
| 124 | 118 | ||
| 125 | /* Don't accept new lock requests during grace period */ | ||
| 126 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
| 127 | resp->status = nlm_lck_denied_grace_period; | ||
| 128 | return rc; | ||
| 129 | } | ||
| 130 | |||
| 131 | /* Obtain client and file */ | 119 | /* Obtain client and file */ |
| 132 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) | 120 | if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file))) |
| 133 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 121 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
| @@ -146,7 +134,8 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 146 | 134 | ||
| 147 | /* Now try to lock the file */ | 135 | /* Now try to lock the file */ |
| 148 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, | 136 | resp->status = nlmsvc_lock(rqstp, file, host, &argp->lock, |
| 149 | argp->block, &argp->cookie); | 137 | argp->block, &argp->cookie, |
| 138 | argp->reclaim); | ||
| 150 | if (resp->status == nlm_drop_reply) | 139 | if (resp->status == nlm_drop_reply) |
| 151 | rc = rpc_drop_reply; | 140 | rc = rpc_drop_reply; |
| 152 | else | 141 | else |
| @@ -169,7 +158,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 169 | resp->cookie = argp->cookie; | 158 | resp->cookie = argp->cookie; |
| 170 | 159 | ||
| 171 | /* Don't accept requests during grace period */ | 160 | /* Don't accept requests during grace period */ |
| 172 | if (nlmsvc_grace_period) { | 161 | if (locks_in_grace()) { |
| 173 | resp->status = nlm_lck_denied_grace_period; | 162 | resp->status = nlm_lck_denied_grace_period; |
| 174 | return rpc_success; | 163 | return rpc_success; |
| 175 | } | 164 | } |
| @@ -202,7 +191,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 202 | resp->cookie = argp->cookie; | 191 | resp->cookie = argp->cookie; |
| 203 | 192 | ||
| 204 | /* Don't accept new lock requests during grace period */ | 193 | /* Don't accept new lock requests during grace period */ |
| 205 | if (nlmsvc_grace_period) { | 194 | if (locks_in_grace()) { |
| 206 | resp->status = nlm_lck_denied_grace_period; | 195 | resp->status = nlm_lck_denied_grace_period; |
| 207 | return rpc_success; | 196 | return rpc_success; |
| 208 | } | 197 | } |
| @@ -231,7 +220,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 231 | resp->cookie = argp->cookie; | 220 | resp->cookie = argp->cookie; |
| 232 | 221 | ||
| 233 | dprintk("lockd: GRANTED called\n"); | 222 | dprintk("lockd: GRANTED called\n"); |
| 234 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 223 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
| 235 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 224 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
| 236 | return rpc_success; | 225 | return rpc_success; |
| 237 | } | 226 | } |
| @@ -341,7 +330,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 341 | resp->cookie = argp->cookie; | 330 | resp->cookie = argp->cookie; |
| 342 | 331 | ||
| 343 | /* Don't accept new lock requests during grace period */ | 332 | /* Don't accept new lock requests during grace period */ |
| 344 | if (nlmsvc_grace_period && !argp->reclaim) { | 333 | if (locks_in_grace() && !argp->reclaim) { |
| 345 | resp->status = nlm_lck_denied_grace_period; | 334 | resp->status = nlm_lck_denied_grace_period; |
| 346 | return rpc_success; | 335 | return rpc_success; |
| 347 | } | 336 | } |
| @@ -374,7 +363,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 374 | resp->cookie = argp->cookie; | 363 | resp->cookie = argp->cookie; |
| 375 | 364 | ||
| 376 | /* Don't accept requests during grace period */ | 365 | /* Don't accept requests during grace period */ |
| 377 | if (nlmsvc_grace_period) { | 366 | if (locks_in_grace()) { |
| 378 | resp->status = nlm_lck_denied_grace_period; | 367 | resp->status = nlm_lck_denied_grace_period; |
| 379 | return rpc_success; | 368 | return rpc_success; |
| 380 | } | 369 | } |
| @@ -432,11 +421,9 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
| 432 | { | 421 | { |
| 433 | struct sockaddr_in saddr; | 422 | struct sockaddr_in saddr; |
| 434 | 423 | ||
| 435 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
| 436 | |||
| 437 | dprintk("lockd: SM_NOTIFY called\n"); | 424 | dprintk("lockd: SM_NOTIFY called\n"); |
| 438 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 425 | |
| 439 | || ntohs(saddr.sin_port) >= 1024) { | 426 | if (!nlm_privileged_requester(rqstp)) { |
| 440 | char buf[RPC_MAX_ADDRBUFLEN]; | 427 | char buf[RPC_MAX_ADDRBUFLEN]; |
| 441 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 428 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
| 442 | svc_print_addr(rqstp, buf, sizeof(buf))); | 429 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index cf0d5c2c318d..6063a8e4b9f3 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c | |||
| @@ -360,7 +360,7 @@ nlmsvc_defer_lock_rqst(struct svc_rqst *rqstp, struct nlm_block *block) | |||
| 360 | __be32 | 360 | __be32 |
| 361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | 361 | nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, |
| 362 | struct nlm_host *host, struct nlm_lock *lock, int wait, | 362 | struct nlm_host *host, struct nlm_lock *lock, int wait, |
| 363 | struct nlm_cookie *cookie) | 363 | struct nlm_cookie *cookie, int reclaim) |
| 364 | { | 364 | { |
| 365 | struct nlm_block *block = NULL; | 365 | struct nlm_block *block = NULL; |
| 366 | int error; | 366 | int error; |
| @@ -406,6 +406,15 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
| 406 | goto out; | 406 | goto out; |
| 407 | } | 407 | } |
| 408 | 408 | ||
| 409 | if (locks_in_grace() && !reclaim) { | ||
| 410 | ret = nlm_lck_denied_grace_period; | ||
| 411 | goto out; | ||
| 412 | } | ||
| 413 | if (reclaim && !locks_in_grace()) { | ||
| 414 | ret = nlm_lck_denied_grace_period; | ||
| 415 | goto out; | ||
| 416 | } | ||
| 417 | |||
| 409 | if (!wait) | 418 | if (!wait) |
| 410 | lock->fl.fl_flags &= ~FL_SLEEP; | 419 | lock->fl.fl_flags &= ~FL_SLEEP; |
| 411 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); | 420 | error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); |
| @@ -502,6 +511,10 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file, | |||
| 502 | goto out; | 511 | goto out; |
| 503 | } | 512 | } |
| 504 | 513 | ||
| 514 | if (locks_in_grace()) { | ||
| 515 | ret = nlm_lck_denied_grace_period; | ||
| 516 | goto out; | ||
| 517 | } | ||
| 505 | error = vfs_test_lock(file->f_file, &lock->fl); | 518 | error = vfs_test_lock(file->f_file, &lock->fl); |
| 506 | if (error == FILE_LOCK_DEFERRED) { | 519 | if (error == FILE_LOCK_DEFERRED) { |
| 507 | ret = nlmsvc_defer_lock_rqst(rqstp, block); | 520 | ret = nlmsvc_defer_lock_rqst(rqstp, block); |
| @@ -582,6 +595,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) | |||
| 582 | (long long)lock->fl.fl_start, | 595 | (long long)lock->fl.fl_start, |
| 583 | (long long)lock->fl.fl_end); | 596 | (long long)lock->fl.fl_end); |
| 584 | 597 | ||
| 598 | if (locks_in_grace()) | ||
| 599 | return nlm_lck_denied_grace_period; | ||
| 600 | |||
| 585 | mutex_lock(&file->f_mutex); | 601 | mutex_lock(&file->f_mutex); |
| 586 | block = nlmsvc_lookup_block(file, lock); | 602 | block = nlmsvc_lookup_block(file, lock); |
| 587 | mutex_unlock(&file->f_mutex); | 603 | mutex_unlock(&file->f_mutex); |
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c index 76262c1986f2..548b0bb2b84d 100644 --- a/fs/lockd/svcproc.c +++ b/fs/lockd/svcproc.c | |||
| @@ -117,12 +117,6 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 117 | dprintk("lockd: TEST called\n"); | 117 | dprintk("lockd: TEST called\n"); |
| 118 | resp->cookie = argp->cookie; | 118 | resp->cookie = argp->cookie; |
| 119 | 119 | ||
| 120 | /* Don't accept test requests during grace period */ | ||
| 121 | if (nlmsvc_grace_period) { | ||
| 122 | resp->status = nlm_lck_denied_grace_period; | ||
| 123 | return rc; | ||
| 124 | } | ||
| 125 | |||
| 126 | /* Obtain client and file */ | 120 | /* Obtain client and file */ |
| 127 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 121 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
| 128 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 122 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
| @@ -152,12 +146,6 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 152 | 146 | ||
| 153 | resp->cookie = argp->cookie; | 147 | resp->cookie = argp->cookie; |
| 154 | 148 | ||
| 155 | /* Don't accept new lock requests during grace period */ | ||
| 156 | if (nlmsvc_grace_period && !argp->reclaim) { | ||
| 157 | resp->status = nlm_lck_denied_grace_period; | ||
| 158 | return rc; | ||
| 159 | } | ||
| 160 | |||
| 161 | /* Obtain client and file */ | 149 | /* Obtain client and file */ |
| 162 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) | 150 | if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file))) |
| 163 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; | 151 | return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success; |
| @@ -176,7 +164,8 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 176 | 164 | ||
| 177 | /* Now try to lock the file */ | 165 | /* Now try to lock the file */ |
| 178 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, | 166 | resp->status = cast_status(nlmsvc_lock(rqstp, file, host, &argp->lock, |
| 179 | argp->block, &argp->cookie)); | 167 | argp->block, &argp->cookie, |
| 168 | argp->reclaim)); | ||
| 180 | if (resp->status == nlm_drop_reply) | 169 | if (resp->status == nlm_drop_reply) |
| 181 | rc = rpc_drop_reply; | 170 | rc = rpc_drop_reply; |
| 182 | else | 171 | else |
| @@ -199,7 +188,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 199 | resp->cookie = argp->cookie; | 188 | resp->cookie = argp->cookie; |
| 200 | 189 | ||
| 201 | /* Don't accept requests during grace period */ | 190 | /* Don't accept requests during grace period */ |
| 202 | if (nlmsvc_grace_period) { | 191 | if (locks_in_grace()) { |
| 203 | resp->status = nlm_lck_denied_grace_period; | 192 | resp->status = nlm_lck_denied_grace_period; |
| 204 | return rpc_success; | 193 | return rpc_success; |
| 205 | } | 194 | } |
| @@ -232,7 +221,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 232 | resp->cookie = argp->cookie; | 221 | resp->cookie = argp->cookie; |
| 233 | 222 | ||
| 234 | /* Don't accept new lock requests during grace period */ | 223 | /* Don't accept new lock requests during grace period */ |
| 235 | if (nlmsvc_grace_period) { | 224 | if (locks_in_grace()) { |
| 236 | resp->status = nlm_lck_denied_grace_period; | 225 | resp->status = nlm_lck_denied_grace_period; |
| 237 | return rpc_success; | 226 | return rpc_success; |
| 238 | } | 227 | } |
| @@ -261,7 +250,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 261 | resp->cookie = argp->cookie; | 250 | resp->cookie = argp->cookie; |
| 262 | 251 | ||
| 263 | dprintk("lockd: GRANTED called\n"); | 252 | dprintk("lockd: GRANTED called\n"); |
| 264 | resp->status = nlmclnt_grant(svc_addr_in(rqstp), &argp->lock); | 253 | resp->status = nlmclnt_grant(svc_addr(rqstp), &argp->lock); |
| 265 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); | 254 | dprintk("lockd: GRANTED status %d\n", ntohl(resp->status)); |
| 266 | return rpc_success; | 255 | return rpc_success; |
| 267 | } | 256 | } |
| @@ -373,7 +362,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 373 | resp->cookie = argp->cookie; | 362 | resp->cookie = argp->cookie; |
| 374 | 363 | ||
| 375 | /* Don't accept new lock requests during grace period */ | 364 | /* Don't accept new lock requests during grace period */ |
| 376 | if (nlmsvc_grace_period && !argp->reclaim) { | 365 | if (locks_in_grace() && !argp->reclaim) { |
| 377 | resp->status = nlm_lck_denied_grace_period; | 366 | resp->status = nlm_lck_denied_grace_period; |
| 378 | return rpc_success; | 367 | return rpc_success; |
| 379 | } | 368 | } |
| @@ -406,7 +395,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp, | |||
| 406 | resp->cookie = argp->cookie; | 395 | resp->cookie = argp->cookie; |
| 407 | 396 | ||
| 408 | /* Don't accept requests during grace period */ | 397 | /* Don't accept requests during grace period */ |
| 409 | if (nlmsvc_grace_period) { | 398 | if (locks_in_grace()) { |
| 410 | resp->status = nlm_lck_denied_grace_period; | 399 | resp->status = nlm_lck_denied_grace_period; |
| 411 | return rpc_success; | 400 | return rpc_success; |
| 412 | } | 401 | } |
| @@ -464,11 +453,9 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp, | |||
| 464 | { | 453 | { |
| 465 | struct sockaddr_in saddr; | 454 | struct sockaddr_in saddr; |
| 466 | 455 | ||
| 467 | memcpy(&saddr, svc_addr_in(rqstp), sizeof(saddr)); | ||
| 468 | |||
| 469 | dprintk("lockd: SM_NOTIFY called\n"); | 456 | dprintk("lockd: SM_NOTIFY called\n"); |
| 470 | if (saddr.sin_addr.s_addr != htonl(INADDR_LOOPBACK) | 457 | |
| 471 | || ntohs(saddr.sin_port) >= 1024) { | 458 | if (!nlm_privileged_requester(rqstp)) { |
| 472 | char buf[RPC_MAX_ADDRBUFLEN]; | 459 | char buf[RPC_MAX_ADDRBUFLEN]; |
| 473 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", | 460 | printk(KERN_WARNING "lockd: rejected NSM callback from %s\n", |
| 474 | svc_print_addr(rqstp, buf, sizeof(buf))); | 461 | svc_print_addr(rqstp, buf, sizeof(buf))); |
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c index 198b4e55b373..34c2766e27c7 100644 --- a/fs/lockd/svcsubs.c +++ b/fs/lockd/svcsubs.c | |||
| @@ -418,7 +418,7 @@ EXPORT_SYMBOL_GPL(nlmsvc_unlock_all_by_sb); | |||
| 418 | static int | 418 | static int |
| 419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) | 419 | nlmsvc_match_ip(void *datap, struct nlm_host *host) |
| 420 | { | 420 | { |
| 421 | return nlm_cmp_addr(&host->h_saddr, datap); | 421 | return nlm_cmp_addr(nlm_srcaddr(host), datap); |
| 422 | } | 422 | } |
| 423 | 423 | ||
| 424 | /** | 424 | /** |
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c index 3e459e18cc31..1f226290c67c 100644 --- a/fs/lockd/xdr.c +++ b/fs/lockd/xdr.c | |||
| @@ -351,8 +351,6 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp) | |||
| 351 | argp->state = ntohl(*p++); | 351 | argp->state = ntohl(*p++); |
| 352 | /* Preserve the address in network byte order */ | 352 | /* Preserve the address in network byte order */ |
| 353 | argp->addr = *p++; | 353 | argp->addr = *p++; |
| 354 | argp->vers = *p++; | ||
| 355 | argp->proto = *p++; | ||
| 356 | return xdr_argsize_check(rqstp, p); | 354 | return xdr_argsize_check(rqstp, p); |
| 357 | } | 355 | } |
| 358 | 356 | ||
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c index 43ff9397e6c6..50c493a8ad8e 100644 --- a/fs/lockd/xdr4.c +++ b/fs/lockd/xdr4.c | |||
| @@ -358,8 +358,6 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp | |||
| 358 | argp->state = ntohl(*p++); | 358 | argp->state = ntohl(*p++); |
| 359 | /* Preserve the address in network byte order */ | 359 | /* Preserve the address in network byte order */ |
| 360 | argp->addr = *p++; | 360 | argp->addr = *p++; |
| 361 | argp->vers = *p++; | ||
| 362 | argp->proto = *p++; | ||
| 363 | return xdr_argsize_check(rqstp, p); | 361 | return xdr_argsize_check(rqstp, p); |
| 364 | } | 362 | } |
| 365 | 363 | ||
diff --git a/fs/mpage.c b/fs/mpage.c index dbcc7af76a15..552b80b3facc 100644 --- a/fs/mpage.c +++ b/fs/mpage.c | |||
| @@ -6,7 +6,7 @@ | |||
| 6 | * Contains functions related to preparing and submitting BIOs which contain | 6 | * Contains functions related to preparing and submitting BIOs which contain |
| 7 | * multiple pagecache pages. | 7 | * multiple pagecache pages. |
| 8 | * | 8 | * |
| 9 | * 15May2002 akpm@zip.com.au | 9 | * 15May2002 Andrew Morton |
| 10 | * Initial version | 10 | * Initial version |
| 11 | * 27Jun2002 axboe@suse.de | 11 | * 27Jun2002 axboe@suse.de |
| 12 | * use bio_add_page() to build bio's just the right size | 12 | * use bio_add_page() to build bio's just the right size |
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index f447f4b4476c..c2e9cfd9e5a4 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c | |||
| @@ -40,6 +40,16 @@ unsigned short nfs_callback_tcpport; | |||
| 40 | static const int nfs_set_port_min = 0; | 40 | static const int nfs_set_port_min = 0; |
| 41 | static const int nfs_set_port_max = 65535; | 41 | static const int nfs_set_port_max = 65535; |
| 42 | 42 | ||
| 43 | /* | ||
| 44 | * If the kernel has IPv6 support available, always listen for | ||
| 45 | * both AF_INET and AF_INET6 requests. | ||
| 46 | */ | ||
| 47 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | ||
| 48 | static const sa_family_t nfs_callback_family = AF_INET6; | ||
| 49 | #else | ||
| 50 | static const sa_family_t nfs_callback_family = AF_INET; | ||
| 51 | #endif | ||
| 52 | |||
| 43 | static int param_set_port(const char *val, struct kernel_param *kp) | 53 | static int param_set_port(const char *val, struct kernel_param *kp) |
| 44 | { | 54 | { |
| 45 | char *endp; | 55 | char *endp; |
| @@ -105,7 +115,8 @@ int nfs_callback_up(void) | |||
| 105 | mutex_lock(&nfs_callback_mutex); | 115 | mutex_lock(&nfs_callback_mutex); |
| 106 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) | 116 | if (nfs_callback_info.users++ || nfs_callback_info.task != NULL) |
| 107 | goto out; | 117 | goto out; |
| 108 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL); | 118 | serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, |
| 119 | nfs_callback_family, NULL); | ||
| 109 | ret = -ENOMEM; | 120 | ret = -ENOMEM; |
| 110 | if (!serv) | 121 | if (!serv) |
| 111 | goto out_err; | 122 | goto out_err; |
| @@ -115,7 +126,8 @@ int nfs_callback_up(void) | |||
| 115 | if (ret <= 0) | 126 | if (ret <= 0) |
| 116 | goto out_err; | 127 | goto out_err; |
| 117 | nfs_callback_tcpport = ret; | 128 | nfs_callback_tcpport = ret; |
| 118 | dprintk("Callback port = 0x%x\n", nfs_callback_tcpport); | 129 | dprintk("NFS: Callback listener port = %u (af %u)\n", |
| 130 | nfs_callback_tcpport, nfs_callback_family); | ||
| 119 | 131 | ||
| 120 | nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); | 132 | nfs_callback_info.rqst = svc_prepare_thread(serv, &serv->sv_pools[0]); |
| 121 | if (IS_ERR(nfs_callback_info.rqst)) { | 133 | if (IS_ERR(nfs_callback_info.rqst)) { |
| @@ -148,8 +160,8 @@ out: | |||
| 148 | mutex_unlock(&nfs_callback_mutex); | 160 | mutex_unlock(&nfs_callback_mutex); |
| 149 | return ret; | 161 | return ret; |
| 150 | out_err: | 162 | out_err: |
| 151 | dprintk("Couldn't create callback socket or server thread; err = %d\n", | 163 | dprintk("NFS: Couldn't create callback socket or server thread; " |
| 152 | ret); | 164 | "err = %d\n", ret); |
| 153 | nfs_callback_info.users--; | 165 | nfs_callback_info.users--; |
| 154 | goto out; | 166 | goto out; |
| 155 | } | 167 | } |
diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5ee23e7058b3..7547600b6174 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c | |||
| @@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, | |||
| 675 | server->nfs_client = clp; | 675 | server->nfs_client = clp; |
| 676 | 676 | ||
| 677 | /* Initialise the client representation from the mount data */ | 677 | /* Initialise the client representation from the mount data */ |
| 678 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 678 | server->flags = data->flags; |
| 679 | 679 | ||
| 680 | if (data->rsize) | 680 | if (data->rsize) |
| 681 | server->rsize = nfs_block_size(data->rsize, NULL); | 681 | server->rsize = nfs_block_size(data->rsize, NULL); |
| @@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) | |||
| 850 | INIT_LIST_HEAD(&server->client_link); | 850 | INIT_LIST_HEAD(&server->client_link); |
| 851 | INIT_LIST_HEAD(&server->master_link); | 851 | INIT_LIST_HEAD(&server->master_link); |
| 852 | 852 | ||
| 853 | init_waitqueue_head(&server->active_wq); | ||
| 854 | atomic_set(&server->active, 0); | 853 | atomic_set(&server->active, 0); |
| 855 | 854 | ||
| 856 | server->io_stats = nfs_alloc_iostats(); | 855 | server->io_stats = nfs_alloc_iostats(); |
| @@ -1073,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, | |||
| 1073 | goto error; | 1072 | goto error; |
| 1074 | 1073 | ||
| 1075 | /* Initialise the client representation from the mount data */ | 1074 | /* Initialise the client representation from the mount data */ |
| 1076 | server->flags = data->flags & NFS_MOUNT_FLAGMASK; | 1075 | server->flags = data->flags; |
| 1077 | server->caps |= NFS_CAP_ATOMIC_OPEN; | 1076 | server->caps |= NFS_CAP_ATOMIC_OPEN; |
| 1078 | 1077 | ||
| 1079 | if (data->rsize) | 1078 | if (data->rsize) |
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..efdba2e802d7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c | |||
| @@ -156,6 +156,7 @@ typedef struct { | |||
| 156 | decode_dirent_t decode; | 156 | decode_dirent_t decode; |
| 157 | int plus; | 157 | int plus; |
| 158 | unsigned long timestamp; | 158 | unsigned long timestamp; |
| 159 | unsigned long gencount; | ||
| 159 | int timestamp_valid; | 160 | int timestamp_valid; |
| 160 | } nfs_readdir_descriptor_t; | 161 | } nfs_readdir_descriptor_t; |
| 161 | 162 | ||
| @@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 177 | struct file *file = desc->file; | 178 | struct file *file = desc->file; |
| 178 | struct inode *inode = file->f_path.dentry->d_inode; | 179 | struct inode *inode = file->f_path.dentry->d_inode; |
| 179 | struct rpc_cred *cred = nfs_file_cred(file); | 180 | struct rpc_cred *cred = nfs_file_cred(file); |
| 180 | unsigned long timestamp; | 181 | unsigned long timestamp, gencount; |
| 181 | int error; | 182 | int error; |
| 182 | 183 | ||
| 183 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", | 184 | dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", |
| @@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 186 | 187 | ||
| 187 | again: | 188 | again: |
| 188 | timestamp = jiffies; | 189 | timestamp = jiffies; |
| 190 | gencount = nfs_inc_attr_generation_counter(); | ||
| 189 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, | 191 | error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, |
| 190 | NFS_SERVER(inode)->dtsize, desc->plus); | 192 | NFS_SERVER(inode)->dtsize, desc->plus); |
| 191 | if (error < 0) { | 193 | if (error < 0) { |
| @@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) | |||
| 199 | goto error; | 201 | goto error; |
| 200 | } | 202 | } |
| 201 | desc->timestamp = timestamp; | 203 | desc->timestamp = timestamp; |
| 204 | desc->gencount = gencount; | ||
| 202 | desc->timestamp_valid = 1; | 205 | desc->timestamp_valid = 1; |
| 203 | SetPageUptodate(page); | 206 | SetPageUptodate(page); |
| 204 | /* Ensure consistent page alignment of the data. | 207 | /* Ensure consistent page alignment of the data. |
| @@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) | |||
| 224 | if (IS_ERR(p)) | 227 | if (IS_ERR(p)) |
| 225 | return PTR_ERR(p); | 228 | return PTR_ERR(p); |
| 226 | desc->ptr = p; | 229 | desc->ptr = p; |
| 227 | if (desc->timestamp_valid) | 230 | if (desc->timestamp_valid) { |
| 228 | desc->entry->fattr->time_start = desc->timestamp; | 231 | desc->entry->fattr->time_start = desc->timestamp; |
| 229 | else | 232 | desc->entry->fattr->gencount = desc->gencount; |
| 233 | } else | ||
| 230 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; | 234 | desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; |
| 231 | return 0; | 235 | return 0; |
| 232 | } | 236 | } |
| @@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 471 | struct rpc_cred *cred = nfs_file_cred(file); | 475 | struct rpc_cred *cred = nfs_file_cred(file); |
| 472 | struct page *page = NULL; | 476 | struct page *page = NULL; |
| 473 | int status; | 477 | int status; |
| 474 | unsigned long timestamp; | 478 | unsigned long timestamp, gencount; |
| 475 | 479 | ||
| 476 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", | 480 | dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", |
| 477 | (unsigned long long)*desc->dir_cookie); | 481 | (unsigned long long)*desc->dir_cookie); |
| @@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 482 | goto out; | 486 | goto out; |
| 483 | } | 487 | } |
| 484 | timestamp = jiffies; | 488 | timestamp = jiffies; |
| 489 | gencount = nfs_inc_attr_generation_counter(); | ||
| 485 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, | 490 | status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, |
| 486 | *desc->dir_cookie, page, | 491 | *desc->dir_cookie, page, |
| 487 | NFS_SERVER(inode)->dtsize, | 492 | NFS_SERVER(inode)->dtsize, |
| @@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, | |||
| 490 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ | 495 | desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ |
| 491 | if (status >= 0) { | 496 | if (status >= 0) { |
| 492 | desc->timestamp = timestamp; | 497 | desc->timestamp = timestamp; |
| 498 | desc->gencount = gencount; | ||
| 493 | desc->timestamp_valid = 1; | 499 | desc->timestamp_valid = 1; |
| 494 | if ((status = dir_decode(desc)) == 0) | 500 | if ((status = dir_decode(desc)) == 0) |
| 495 | desc->entry->prev_cookie = *desc->dir_cookie; | 501 | desc->entry->prev_cookie = *desc->dir_cookie; |
| @@ -655,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) | |||
| 655 | */ | 661 | */ |
| 656 | void nfs_force_lookup_revalidate(struct inode *dir) | 662 | void nfs_force_lookup_revalidate(struct inode *dir) |
| 657 | { | 663 | { |
| 658 | NFS_I(dir)->cache_change_attribute = jiffies; | 664 | NFS_I(dir)->cache_change_attribute++; |
| 659 | } | 665 | } |
| 660 | 666 | ||
| 661 | /* | 667 | /* |
| @@ -667,6 +673,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) | |||
| 667 | { | 673 | { |
| 668 | if (IS_ROOT(dentry)) | 674 | if (IS_ROOT(dentry)) |
| 669 | return 1; | 675 | return 1; |
| 676 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) | ||
| 677 | return 0; | ||
| 670 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) | 678 | if (!nfs_verify_change_attribute(dir, dentry->d_time)) |
| 671 | return 0; | 679 | return 0; |
| 672 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ | 680 | /* Revalidate nfsi->cache_change_attribute before we declare a match */ |
| @@ -750,6 +758,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, | |||
| 750 | /* Don't revalidate a negative dentry if we're creating a new file */ | 758 | /* Don't revalidate a negative dentry if we're creating a new file */ |
| 751 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) | 759 | if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) |
| 752 | return 0; | 760 | return 0; |
| 761 | if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) | ||
| 762 | return 1; | ||
| 753 | return !nfs_check_verifier(dir, dentry); | 763 | return !nfs_check_verifier(dir, dentry); |
| 754 | } | 764 | } |
| 755 | 765 | ||
| @@ -1507,7 +1517,7 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym | |||
| 1507 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, | 1517 | if (!add_to_page_cache(page, dentry->d_inode->i_mapping, 0, |
| 1508 | GFP_KERNEL)) { | 1518 | GFP_KERNEL)) { |
| 1509 | pagevec_add(&lru_pvec, page); | 1519 | pagevec_add(&lru_pvec, page); |
| 1510 | pagevec_lru_add(&lru_pvec); | 1520 | pagevec_lru_add_file(&lru_pvec); |
| 1511 | SetPageUptodate(page); | 1521 | SetPageUptodate(page); |
| 1512 | unlock_page(page); | 1522 | unlock_page(page); |
| 1513 | } else | 1523 | } else |
diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5cb..d319b49f8f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c | |||
| @@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) | |||
| 188 | /* origin == SEEK_END => we must revalidate the cached file length */ | 188 | /* origin == SEEK_END => we must revalidate the cached file length */ |
| 189 | if (origin == SEEK_END) { | 189 | if (origin == SEEK_END) { |
| 190 | struct inode *inode = filp->f_mapping->host; | 190 | struct inode *inode = filp->f_mapping->host; |
| 191 | |||
| 191 | int retval = nfs_revalidate_file_size(inode, filp); | 192 | int retval = nfs_revalidate_file_size(inode, filp); |
| 192 | if (retval < 0) | 193 | if (retval < 0) |
| 193 | return (loff_t)retval; | 194 | return (loff_t)retval; |
| 194 | } | 195 | |
| 195 | lock_kernel(); /* BKL needed? */ | 196 | spin_lock(&inode->i_lock); |
| 196 | loff = generic_file_llseek_unlocked(filp, offset, origin); | 197 | loff = generic_file_llseek_unlocked(filp, offset, origin); |
| 197 | unlock_kernel(); | 198 | spin_unlock(&inode->i_lock); |
| 199 | } else | ||
| 200 | loff = generic_file_llseek_unlocked(filp, offset, origin); | ||
| 198 | return loff; | 201 | return loff; |
| 199 | } | 202 | } |
| 200 | 203 | ||
| @@ -699,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) | |||
| 699 | filp->f_path.dentry->d_name.name, | 702 | filp->f_path.dentry->d_name.name, |
| 700 | fl->fl_type, fl->fl_flags); | 703 | fl->fl_type, fl->fl_flags); |
| 701 | 704 | ||
| 702 | /* | ||
| 703 | * No BSD flocks over NFS allowed. | ||
| 704 | * Note: we could try to fake a POSIX lock request here by | ||
| 705 | * using ((u32) filp | 0x80000000) or some such as the pid. | ||
| 706 | * Not sure whether that would be unique, though, or whether | ||
| 707 | * that would break in other places. | ||
| 708 | */ | ||
| 709 | if (!(fl->fl_flags & FL_FLOCK)) | 705 | if (!(fl->fl_flags & FL_FLOCK)) |
| 710 | return -ENOLCK; | 706 | return -ENOLCK; |
| 711 | 707 | ||
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52daefa2f521..b9195c02a863 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c | |||
| @@ -305,8 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) | |||
| 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); | 305 | init_special_inode(inode, inode->i_mode, fattr->rdev); |
| 306 | 306 | ||
| 307 | nfsi->read_cache_jiffies = fattr->time_start; | 307 | nfsi->read_cache_jiffies = fattr->time_start; |
| 308 | nfsi->last_updated = now; | 308 | nfsi->attr_gencount = fattr->gencount; |
| 309 | nfsi->cache_change_attribute = now; | ||
| 310 | inode->i_atime = fattr->atime; | 309 | inode->i_atime = fattr->atime; |
| 311 | inode->i_mtime = fattr->mtime; | 310 | inode->i_mtime = fattr->mtime; |
| 312 | inode->i_ctime = fattr->ctime; | 311 | inode->i_ctime = fattr->ctime; |
| @@ -453,6 +452,7 @@ out_big: | |||
| 453 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | 452 | void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) |
| 454 | { | 453 | { |
| 455 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { | 454 | if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { |
| 455 | spin_lock(&inode->i_lock); | ||
| 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { | 456 | if ((attr->ia_valid & ATTR_MODE) != 0) { |
| 457 | int mode = attr->ia_mode & S_IALLUGO; | 457 | int mode = attr->ia_mode & S_IALLUGO; |
| 458 | mode |= inode->i_mode & ~S_IALLUGO; | 458 | mode |= inode->i_mode & ~S_IALLUGO; |
| @@ -462,7 +462,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 462 | inode->i_uid = attr->ia_uid; | 462 | inode->i_uid = attr->ia_uid; |
| 463 | if ((attr->ia_valid & ATTR_GID) != 0) | 463 | if ((attr->ia_valid & ATTR_GID) != 0) |
| 464 | inode->i_gid = attr->ia_gid; | 464 | inode->i_gid = attr->ia_gid; |
| 465 | spin_lock(&inode->i_lock); | ||
| 466 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 465 | NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 467 | spin_unlock(&inode->i_lock); | 466 | spin_unlock(&inode->i_lock); |
| 468 | } | 467 | } |
| @@ -472,37 +471,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) | |||
| 472 | } | 471 | } |
| 473 | } | 472 | } |
| 474 | 473 | ||
| 475 | static int nfs_wait_schedule(void *word) | ||
| 476 | { | ||
| 477 | if (signal_pending(current)) | ||
| 478 | return -ERESTARTSYS; | ||
| 479 | schedule(); | ||
| 480 | return 0; | ||
| 481 | } | ||
| 482 | |||
| 483 | /* | ||
| 484 | * Wait for the inode to get unlocked. | ||
| 485 | */ | ||
| 486 | static int nfs_wait_on_inode(struct inode *inode) | ||
| 487 | { | ||
| 488 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 489 | int error; | ||
| 490 | |||
| 491 | error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, | ||
| 492 | nfs_wait_schedule, TASK_KILLABLE); | ||
| 493 | |||
| 494 | return error; | ||
| 495 | } | ||
| 496 | |||
| 497 | static void nfs_wake_up_inode(struct inode *inode) | ||
| 498 | { | ||
| 499 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 500 | |||
| 501 | clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); | ||
| 502 | smp_mb__after_clear_bit(); | ||
| 503 | wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); | ||
| 504 | } | ||
| 505 | |||
| 506 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) | 474 | int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) |
| 507 | { | 475 | { |
| 508 | struct inode *inode = dentry->d_inode; | 476 | struct inode *inode = dentry->d_inode; |
| @@ -697,20 +665,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 697 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", | 665 | dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", |
| 698 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); | 666 | inode->i_sb->s_id, (long long)NFS_FILEID(inode)); |
| 699 | 667 | ||
| 700 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
| 701 | if (is_bad_inode(inode)) | 668 | if (is_bad_inode(inode)) |
| 702 | goto out_nowait; | 669 | goto out; |
| 703 | if (NFS_STALE(inode)) | 670 | if (NFS_STALE(inode)) |
| 704 | goto out_nowait; | ||
| 705 | |||
| 706 | status = nfs_wait_on_inode(inode); | ||
| 707 | if (status < 0) | ||
| 708 | goto out; | 671 | goto out; |
| 709 | 672 | ||
| 710 | status = -ESTALE; | ||
| 711 | if (NFS_STALE(inode)) | 673 | if (NFS_STALE(inode)) |
| 712 | goto out; | 674 | goto out; |
| 713 | 675 | ||
| 676 | nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); | ||
| 714 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); | 677 | status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); |
| 715 | if (status != 0) { | 678 | if (status != 0) { |
| 716 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", | 679 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", |
| @@ -724,16 +687,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 724 | goto out; | 687 | goto out; |
| 725 | } | 688 | } |
| 726 | 689 | ||
| 727 | spin_lock(&inode->i_lock); | 690 | status = nfs_refresh_inode(inode, &fattr); |
| 728 | status = nfs_update_inode(inode, &fattr); | ||
| 729 | if (status) { | 691 | if (status) { |
| 730 | spin_unlock(&inode->i_lock); | ||
| 731 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", | 692 | dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", |
| 732 | inode->i_sb->s_id, | 693 | inode->i_sb->s_id, |
| 733 | (long long)NFS_FILEID(inode), status); | 694 | (long long)NFS_FILEID(inode), status); |
| 734 | goto out; | 695 | goto out; |
| 735 | } | 696 | } |
| 736 | spin_unlock(&inode->i_lock); | ||
| 737 | 697 | ||
| 738 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) | 698 | if (nfsi->cache_validity & NFS_INO_INVALID_ACL) |
| 739 | nfs_zap_acl_cache(inode); | 699 | nfs_zap_acl_cache(inode); |
| @@ -743,9 +703,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) | |||
| 743 | (long long)NFS_FILEID(inode)); | 703 | (long long)NFS_FILEID(inode)); |
| 744 | 704 | ||
| 745 | out: | 705 | out: |
| 746 | nfs_wake_up_inode(inode); | ||
| 747 | |||
| 748 | out_nowait: | ||
| 749 | return status; | 706 | return status; |
| 750 | } | 707 | } |
| 751 | 708 | ||
| @@ -908,9 +865,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 908 | return -EIO; | 865 | return -EIO; |
| 909 | } | 866 | } |
| 910 | 867 | ||
| 911 | /* Do atomic weak cache consistency updates */ | ||
| 912 | nfs_wcc_update_inode(inode, fattr); | ||
| 913 | |||
| 914 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 868 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
| 915 | nfsi->change_attr != fattr->change_attr) | 869 | nfsi->change_attr != fattr->change_attr) |
| 916 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 870 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; |
| @@ -939,15 +893,81 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 939 | 893 | ||
| 940 | if (invalid != 0) | 894 | if (invalid != 0) |
| 941 | nfsi->cache_validity |= invalid; | 895 | nfsi->cache_validity |= invalid; |
| 942 | else | ||
| 943 | nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR | ||
| 944 | | NFS_INO_INVALID_ATIME | ||
| 945 | | NFS_INO_REVAL_PAGECACHE); | ||
| 946 | 896 | ||
| 947 | nfsi->read_cache_jiffies = fattr->time_start; | 897 | nfsi->read_cache_jiffies = fattr->time_start; |
| 948 | return 0; | 898 | return 0; |
| 949 | } | 899 | } |
| 950 | 900 | ||
| 901 | static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 902 | { | ||
| 903 | return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; | ||
| 904 | } | ||
| 905 | |||
| 906 | static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 907 | { | ||
| 908 | return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); | ||
| 909 | } | ||
| 910 | |||
| 911 | static unsigned long nfs_attr_generation_counter; | ||
| 912 | |||
| 913 | static unsigned long nfs_read_attr_generation_counter(void) | ||
| 914 | { | ||
| 915 | smp_rmb(); | ||
| 916 | return nfs_attr_generation_counter; | ||
| 917 | } | ||
| 918 | |||
| 919 | unsigned long nfs_inc_attr_generation_counter(void) | ||
| 920 | { | ||
| 921 | unsigned long ret; | ||
| 922 | smp_rmb(); | ||
| 923 | ret = ++nfs_attr_generation_counter; | ||
| 924 | smp_wmb(); | ||
| 925 | return ret; | ||
| 926 | } | ||
| 927 | |||
| 928 | void nfs_fattr_init(struct nfs_fattr *fattr) | ||
| 929 | { | ||
| 930 | fattr->valid = 0; | ||
| 931 | fattr->time_start = jiffies; | ||
| 932 | fattr->gencount = nfs_inc_attr_generation_counter(); | ||
| 933 | } | ||
| 934 | |||
| 935 | /** | ||
| 936 | * nfs_inode_attrs_need_update - check if the inode attributes need updating | ||
| 937 | * @inode - pointer to inode | ||
| 938 | * @fattr - attributes | ||
| 939 | * | ||
| 940 | * Attempt to divine whether or not an RPC call reply carrying stale | ||
| 941 | * attributes got scheduled after another call carrying updated ones. | ||
| 942 | * | ||
| 943 | * To do so, the function first assumes that a more recent ctime means | ||
| 944 | * that the attributes in fattr are newer, however it also attempt to | ||
| 945 | * catch the case where ctime either didn't change, or went backwards | ||
| 946 | * (if someone reset the clock on the server) by looking at whether | ||
| 947 | * or not this RPC call was started after the inode was last updated. | ||
| 948 | * Note also the check for wraparound of 'attr_gencount' | ||
| 949 | * | ||
| 950 | * The function returns 'true' if it thinks the attributes in 'fattr' are | ||
| 951 | * more recent than the ones cached in the inode. | ||
| 952 | * | ||
| 953 | */ | ||
| 954 | static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) | ||
| 955 | { | ||
| 956 | const struct nfs_inode *nfsi = NFS_I(inode); | ||
| 957 | |||
| 958 | return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || | ||
| 959 | nfs_ctime_need_update(inode, fattr) || | ||
| 960 | nfs_size_need_update(inode, fattr) || | ||
| 961 | ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); | ||
| 962 | } | ||
| 963 | |||
| 964 | static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
| 965 | { | ||
| 966 | if (nfs_inode_attrs_need_update(inode, fattr)) | ||
| 967 | return nfs_update_inode(inode, fattr); | ||
| 968 | return nfs_check_inode_attributes(inode, fattr); | ||
| 969 | } | ||
| 970 | |||
| 951 | /** | 971 | /** |
| 952 | * nfs_refresh_inode - try to update the inode attribute cache | 972 | * nfs_refresh_inode - try to update the inode attribute cache |
| 953 | * @inode - pointer to inode | 973 | * @inode - pointer to inode |
| @@ -960,21 +980,28 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat | |||
| 960 | */ | 980 | */ |
| 961 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | 981 | int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) |
| 962 | { | 982 | { |
| 963 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 964 | int status; | 983 | int status; |
| 965 | 984 | ||
| 966 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | 985 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) |
| 967 | return 0; | 986 | return 0; |
| 968 | spin_lock(&inode->i_lock); | 987 | spin_lock(&inode->i_lock); |
| 969 | if (time_after(fattr->time_start, nfsi->last_updated)) | 988 | status = nfs_refresh_inode_locked(inode, fattr); |
| 970 | status = nfs_update_inode(inode, fattr); | ||
| 971 | else | ||
| 972 | status = nfs_check_inode_attributes(inode, fattr); | ||
| 973 | |||
| 974 | spin_unlock(&inode->i_lock); | 989 | spin_unlock(&inode->i_lock); |
| 975 | return status; | 990 | return status; |
| 976 | } | 991 | } |
| 977 | 992 | ||
| 993 | static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) | ||
| 994 | { | ||
| 995 | struct nfs_inode *nfsi = NFS_I(inode); | ||
| 996 | |||
| 997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | ||
| 998 | if (S_ISDIR(inode->i_mode)) | ||
| 999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
| 1000 | if ((fattr->valid & NFS_ATTR_FATTR) == 0) | ||
| 1001 | return 0; | ||
| 1002 | return nfs_refresh_inode_locked(inode, fattr); | ||
| 1003 | } | ||
| 1004 | |||
| 978 | /** | 1005 | /** |
| 979 | * nfs_post_op_update_inode - try to update the inode attribute cache | 1006 | * nfs_post_op_update_inode - try to update the inode attribute cache |
| 980 | * @inode - pointer to inode | 1007 | * @inode - pointer to inode |
| @@ -991,14 +1018,12 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 991 | */ | 1018 | */ |
| 992 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | 1019 | int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) |
| 993 | { | 1020 | { |
| 994 | struct nfs_inode *nfsi = NFS_I(inode); | 1021 | int status; |
| 995 | 1022 | ||
| 996 | spin_lock(&inode->i_lock); | 1023 | spin_lock(&inode->i_lock); |
| 997 | nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; | 1024 | status = nfs_post_op_update_inode_locked(inode, fattr); |
| 998 | if (S_ISDIR(inode->i_mode)) | ||
| 999 | nfsi->cache_validity |= NFS_INO_INVALID_DATA; | ||
| 1000 | spin_unlock(&inode->i_lock); | 1025 | spin_unlock(&inode->i_lock); |
| 1001 | return nfs_refresh_inode(inode, fattr); | 1026 | return status; |
| 1002 | } | 1027 | } |
| 1003 | 1028 | ||
| 1004 | /** | 1029 | /** |
| @@ -1014,6 +1039,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1014 | */ | 1039 | */ |
| 1015 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) | 1040 | int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) |
| 1016 | { | 1041 | { |
| 1042 | int status; | ||
| 1043 | |||
| 1044 | spin_lock(&inode->i_lock); | ||
| 1045 | /* Don't do a WCC update if these attributes are already stale */ | ||
| 1046 | if ((fattr->valid & NFS_ATTR_FATTR) == 0 || | ||
| 1047 | !nfs_inode_attrs_need_update(inode, fattr)) { | ||
| 1048 | fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); | ||
| 1049 | goto out_noforce; | ||
| 1050 | } | ||
| 1017 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && | 1051 | if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && |
| 1018 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { | 1052 | (fattr->valid & NFS_ATTR_WCC_V4) == 0) { |
| 1019 | fattr->pre_change_attr = NFS_I(inode)->change_attr; | 1053 | fattr->pre_change_attr = NFS_I(inode)->change_attr; |
| @@ -1026,7 +1060,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa | |||
| 1026 | fattr->pre_size = i_size_read(inode); | 1060 | fattr->pre_size = i_size_read(inode); |
| 1027 | fattr->valid |= NFS_ATTR_WCC; | 1061 | fattr->valid |= NFS_ATTR_WCC; |
| 1028 | } | 1062 | } |
| 1029 | return nfs_post_op_update_inode(inode, fattr); | 1063 | out_noforce: |
| 1064 | status = nfs_post_op_update_inode_locked(inode, fattr); | ||
| 1065 | spin_unlock(&inode->i_lock); | ||
| 1066 | return status; | ||
| 1030 | } | 1067 | } |
| 1031 | 1068 | ||
| 1032 | /* | 1069 | /* |
| @@ -1092,7 +1129,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1092 | } | 1129 | } |
| 1093 | /* If ctime has changed we should definitely clear access+acl caches */ | 1130 | /* If ctime has changed we should definitely clear access+acl caches */ |
| 1094 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) | 1131 | if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) |
| 1095 | invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1132 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 1096 | } else if (nfsi->change_attr != fattr->change_attr) { | 1133 | } else if (nfsi->change_attr != fattr->change_attr) { |
| 1097 | dprintk("NFS: change_attr change on server for file %s/%ld\n", | 1134 | dprintk("NFS: change_attr change on server for file %s/%ld\n", |
| 1098 | inode->i_sb->s_id, inode->i_ino); | 1135 | inode->i_sb->s_id, inode->i_ino); |
| @@ -1126,6 +1163,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1126 | inode->i_gid != fattr->gid) | 1163 | inode->i_gid != fattr->gid) |
| 1127 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; | 1164 | invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; |
| 1128 | 1165 | ||
| 1166 | if (inode->i_nlink != fattr->nlink) | ||
| 1167 | invalid |= NFS_INO_INVALID_ATTR; | ||
| 1168 | |||
| 1129 | inode->i_mode = fattr->mode; | 1169 | inode->i_mode = fattr->mode; |
| 1130 | inode->i_nlink = fattr->nlink; | 1170 | inode->i_nlink = fattr->nlink; |
| 1131 | inode->i_uid = fattr->uid; | 1171 | inode->i_uid = fattr->uid; |
| @@ -1145,18 +1185,13 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) | |||
| 1145 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); | 1185 | nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); |
| 1146 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); | 1186 | nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); |
| 1147 | nfsi->attrtimeo_timestamp = now; | 1187 | nfsi->attrtimeo_timestamp = now; |
| 1148 | nfsi->last_updated = now; | 1188 | nfsi->attr_gencount = nfs_inc_attr_generation_counter(); |
| 1149 | } else { | 1189 | } else { |
| 1150 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { | 1190 | if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { |
| 1151 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) | 1191 | if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) |
| 1152 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); | 1192 | nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); |
| 1153 | nfsi->attrtimeo_timestamp = now; | 1193 | nfsi->attrtimeo_timestamp = now; |
| 1154 | } | 1194 | } |
| 1155 | /* | ||
| 1156 | * Avoid jiffy wraparound issues with nfsi->last_updated | ||
| 1157 | */ | ||
| 1158 | if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) | ||
| 1159 | nfsi->last_updated = nfsi->read_cache_jiffies; | ||
| 1160 | } | 1195 | } |
| 1161 | invalid &= ~NFS_INO_INVALID_ATTR; | 1196 | invalid &= ~NFS_INO_INVALID_ATTR; |
| 1162 | /* Don't invalidate the data if we were to blame */ | 1197 | /* Don't invalidate the data if we were to blame */ |
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24241fcbb98d..d212ee41caf2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h | |||
| @@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); | |||
| 153 | void nfs_zap_acl_cache(struct inode *inode); | 153 | void nfs_zap_acl_cache(struct inode *inode); |
| 154 | 154 | ||
| 155 | /* super.c */ | 155 | /* super.c */ |
| 156 | void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); | ||
| 156 | extern struct file_system_type nfs_xdev_fs_type; | 157 | extern struct file_system_type nfs_xdev_fs_type; |
| 157 | #ifdef CONFIG_NFS_V4 | 158 | #ifdef CONFIG_NFS_V4 |
| 158 | extern struct file_system_type nfs4_xdev_fs_type; | 159 | extern struct file_system_type nfs4_xdev_fs_type; |
| @@ -163,8 +164,8 @@ extern struct rpc_stat nfs_rpcstat; | |||
| 163 | 164 | ||
| 164 | extern int __init register_nfs_fs(void); | 165 | extern int __init register_nfs_fs(void); |
| 165 | extern void __exit unregister_nfs_fs(void); | 166 | extern void __exit unregister_nfs_fs(void); |
| 166 | extern void nfs_sb_active(struct nfs_server *server); | 167 | extern void nfs_sb_active(struct super_block *sb); |
| 167 | extern void nfs_sb_deactive(struct nfs_server *server); | 168 | extern void nfs_sb_deactive(struct super_block *sb); |
| 168 | 169 | ||
| 169 | /* namespace.c */ | 170 | /* namespace.c */ |
| 170 | extern char *nfs_path(const char *base, | 171 | extern char *nfs_path(const char *base, |
| @@ -276,3 +277,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) | |||
| 276 | PAGE_SIZE - 1) >> PAGE_SHIFT; | 277 | PAGE_SIZE - 1) >> PAGE_SHIFT; |
| 277 | } | 278 | } |
| 278 | 279 | ||
| 280 | #define IPV6_SCOPE_DELIMITER '%' | ||
| 281 | |||
| 282 | /* | ||
| 283 | * Set the port number in an address. Be agnostic about the address | ||
| 284 | * family. | ||
| 285 | */ | ||
| 286 | static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
| 287 | { | ||
| 288 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
| 289 | struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; | ||
| 290 | |||
| 291 | switch (sap->sa_family) { | ||
| 292 | case AF_INET: | ||
| 293 | ap->sin_port = htons(port); | ||
| 294 | break; | ||
| 295 | case AF_INET6: | ||
| 296 | ap6->sin6_port = htons(port); | ||
| 297 | break; | ||
| 298 | } | ||
| 299 | } | ||
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 779d2eb649c5..086a6830d785 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c | |||
| @@ -14,6 +14,7 @@ | |||
| 14 | #include <linux/sunrpc/clnt.h> | 14 | #include <linux/sunrpc/clnt.h> |
| 15 | #include <linux/sunrpc/sched.h> | 15 | #include <linux/sunrpc/sched.h> |
| 16 | #include <linux/nfs_fs.h> | 16 | #include <linux/nfs_fs.h> |
| 17 | #include "internal.h" | ||
| 17 | 18 | ||
| 18 | #ifdef RPC_DEBUG | 19 | #ifdef RPC_DEBUG |
| 19 | # define NFSDBG_FACILITY NFSDBG_MOUNT | 20 | # define NFSDBG_FACILITY NFSDBG_MOUNT |
| @@ -98,7 +99,7 @@ out_call_err: | |||
| 98 | 99 | ||
| 99 | out_mnt_err: | 100 | out_mnt_err: |
| 100 | dprintk("NFS: MNT server returned result %d\n", result.status); | 101 | dprintk("NFS: MNT server returned result %d\n", result.status); |
| 101 | status = -EACCES; | 102 | status = nfs_stat_to_errno(result.status); |
| 102 | goto out; | 103 | goto out; |
| 103 | } | 104 | } |
| 104 | 105 | ||
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 66df08dd1caf..64a288ee046d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c | |||
| @@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) | |||
| 105 | 105 | ||
| 106 | dprintk("--> nfs_follow_mountpoint()\n"); | 106 | dprintk("--> nfs_follow_mountpoint()\n"); |
| 107 | 107 | ||
| 108 | BUG_ON(IS_ROOT(dentry)); | 108 | err = -ESTALE; |
| 109 | if (IS_ROOT(dentry)) | ||
| 110 | goto out_err; | ||
| 111 | |||
| 109 | dprintk("%s: enter\n", __func__); | 112 | dprintk("%s: enter\n", __func__); |
| 110 | dput(nd->path.dentry); | 113 | dput(nd->path.dentry); |
| 111 | nd->path.dentry = dget(dentry); | 114 | nd->path.dentry = dget(dentry); |
| @@ -189,7 +192,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, | |||
| 189 | struct nfs_clone_mount *mountdata) | 192 | struct nfs_clone_mount *mountdata) |
| 190 | { | 193 | { |
| 191 | #ifdef CONFIG_NFS_V4 | 194 | #ifdef CONFIG_NFS_V4 |
| 192 | struct vfsmount *mnt = NULL; | 195 | struct vfsmount *mnt = ERR_PTR(-EINVAL); |
| 193 | switch (server->nfs_client->rpc_ops->version) { | 196 | switch (server->nfs_client->rpc_ops->version) { |
| 194 | case 2: | 197 | case 2: |
| 195 | case 3: | 198 | case 3: |
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 423842f51ac9..cef62557c87d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c | |||
| @@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) | |||
| 229 | 229 | ||
| 230 | dprintk("NFS call getacl\n"); | 230 | dprintk("NFS call getacl\n"); |
| 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; | 231 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; |
| 232 | nfs_fattr_init(&fattr); | ||
| 232 | status = rpc_call_sync(server->client_acl, &msg, 0); | 233 | status = rpc_call_sync(server->client_acl, &msg, 0); |
| 233 | dprintk("NFS reply getacl: %d\n", status); | 234 | dprintk("NFS reply getacl: %d\n", status); |
| 234 | 235 | ||
| @@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, | |||
| 322 | 323 | ||
| 323 | dprintk("NFS call setacl\n"); | 324 | dprintk("NFS call setacl\n"); |
| 324 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; | 325 | msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; |
| 326 | nfs_fattr_init(&fattr); | ||
| 325 | status = rpc_call_sync(server->client_acl, &msg, 0); | 327 | status = rpc_call_sync(server->client_acl, &msg, 0); |
| 326 | nfs_access_zap_cache(inode); | 328 | nfs_access_zap_cache(inode); |
| 327 | nfs_zap_acl_cache(inode); | 329 | nfs_zap_acl_cache(inode); |
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1e750e4574a9..c55be7a7679e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c | |||
| @@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 699 | } | 699 | } |
| 700 | 700 | ||
| 701 | static int | 701 | static int |
| 702 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | 702 | do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, |
| 703 | struct nfs_fsinfo *info) | 703 | struct nfs_fsinfo *info) |
| 704 | { | 704 | { |
| 705 | struct rpc_message msg = { | 705 | struct rpc_message msg = { |
| @@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 711 | 711 | ||
| 712 | dprintk("NFS call fsinfo\n"); | 712 | dprintk("NFS call fsinfo\n"); |
| 713 | nfs_fattr_init(info->fattr); | 713 | nfs_fattr_init(info->fattr); |
| 714 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 714 | status = rpc_call_sync(client, &msg, 0); |
| 715 | dprintk("NFS reply fsinfo: %d\n", status); | 715 | dprintk("NFS reply fsinfo: %d\n", status); |
| 716 | return status; | 716 | return status; |
| 717 | } | 717 | } |
| 718 | 718 | ||
| 719 | /* | ||
| 720 | * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via | ||
| 721 | * nfs_create_server | ||
| 722 | */ | ||
| 723 | static int | ||
| 724 | nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, | ||
| 725 | struct nfs_fsinfo *info) | ||
| 726 | { | ||
| 727 | int status; | ||
| 728 | |||
| 729 | status = do_proc_fsinfo(server->client, fhandle, info); | ||
| 730 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 731 | status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); | ||
| 732 | return status; | ||
| 733 | } | ||
| 734 | |||
| 719 | static int | 735 | static int |
| 720 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, | 736 | nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, |
| 721 | struct nfs_pathconf *info) | 737 | struct nfs_pathconf *info) |
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b112857301f7..30befc39b3c6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c | |||
| @@ -93,21 +93,52 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, | |||
| 93 | return 0; | 93 | return 0; |
| 94 | } | 94 | } |
| 95 | 95 | ||
| 96 | /* | 96 | static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, |
| 97 | * Check if the string represents a "valid" IPv4 address | 97 | char *page, char *page2, |
| 98 | */ | 98 | const struct nfs4_fs_location *location) |
| 99 | static inline int valid_ipaddr4(const char *buf) | ||
| 100 | { | 99 | { |
| 101 | int rc, count, in[4]; | 100 | struct vfsmount *mnt = ERR_PTR(-ENOENT); |
| 102 | 101 | char *mnt_path; | |
| 103 | rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); | 102 | int page2len; |
| 104 | if (rc != 4) | 103 | unsigned int s; |
| 105 | return -EINVAL; | 104 | |
| 106 | for (count = 0; count < 4; count++) { | 105 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); |
| 107 | if (in[count] > 255) | 106 | if (IS_ERR(mnt_path)) |
| 108 | return -EINVAL; | 107 | return mnt; |
| 108 | mountdata->mnt_path = mnt_path; | ||
| 109 | page2 += strlen(mnt_path) + 1; | ||
| 110 | page2len = PAGE_SIZE - strlen(mnt_path) - 1; | ||
| 111 | |||
| 112 | for (s = 0; s < location->nservers; s++) { | ||
| 113 | const struct nfs4_string *buf = &location->servers[s]; | ||
| 114 | struct sockaddr_storage addr; | ||
| 115 | |||
| 116 | if (buf->len <= 0 || buf->len >= PAGE_SIZE) | ||
| 117 | continue; | ||
| 118 | |||
| 119 | mountdata->addr = (struct sockaddr *)&addr; | ||
| 120 | |||
| 121 | if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) | ||
| 122 | continue; | ||
| 123 | nfs_parse_ip_address(buf->data, buf->len, | ||
| 124 | mountdata->addr, &mountdata->addrlen); | ||
| 125 | if (mountdata->addr->sa_family == AF_UNSPEC) | ||
| 126 | continue; | ||
| 127 | nfs_set_port(mountdata->addr, NFS_PORT); | ||
| 128 | |||
| 129 | strncpy(page2, buf->data, page2len); | ||
| 130 | page2[page2len] = '\0'; | ||
| 131 | mountdata->hostname = page2; | ||
| 132 | |||
| 133 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
| 134 | mountdata->hostname, | ||
| 135 | mountdata->mnt_path); | ||
| 136 | |||
| 137 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); | ||
| 138 | if (!IS_ERR(mnt)) | ||
| 139 | break; | ||
| 109 | } | 140 | } |
| 110 | return 0; | 141 | return mnt; |
| 111 | } | 142 | } |
| 112 | 143 | ||
| 113 | /** | 144 | /** |
| @@ -128,7 +159,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
| 128 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, | 159 | .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, |
| 129 | }; | 160 | }; |
| 130 | char *page = NULL, *page2 = NULL; | 161 | char *page = NULL, *page2 = NULL; |
| 131 | unsigned int s; | ||
| 132 | int loc, error; | 162 | int loc, error; |
| 133 | 163 | ||
| 134 | if (locations == NULL || locations->nlocations <= 0) | 164 | if (locations == NULL || locations->nlocations <= 0) |
| @@ -152,53 +182,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, | |||
| 152 | goto out; | 182 | goto out; |
| 153 | } | 183 | } |
| 154 | 184 | ||
| 155 | loc = 0; | 185 | for (loc = 0; loc < locations->nlocations; loc++) { |
| 156 | while (loc < locations->nlocations && IS_ERR(mnt)) { | ||
| 157 | const struct nfs4_fs_location *location = &locations->locations[loc]; | 186 | const struct nfs4_fs_location *location = &locations->locations[loc]; |
| 158 | char *mnt_path; | ||
| 159 | 187 | ||
| 160 | if (location == NULL || location->nservers <= 0 || | 188 | if (location == NULL || location->nservers <= 0 || |
| 161 | location->rootpath.ncomponents == 0) { | 189 | location->rootpath.ncomponents == 0) |
| 162 | loc++; | ||
| 163 | continue; | 190 | continue; |
| 164 | } | ||
| 165 | 191 | ||
| 166 | mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); | 192 | mnt = try_location(&mountdata, page, page2, location); |
| 167 | if (IS_ERR(mnt_path)) { | 193 | if (!IS_ERR(mnt)) |
| 168 | loc++; | 194 | break; |
| 169 | continue; | ||
| 170 | } | ||
| 171 | mountdata.mnt_path = mnt_path; | ||
| 172 | |||
| 173 | s = 0; | ||
| 174 | while (s < location->nservers) { | ||
| 175 | struct sockaddr_in addr = { | ||
| 176 | .sin_family = AF_INET, | ||
| 177 | .sin_port = htons(NFS_PORT), | ||
| 178 | }; | ||
| 179 | |||
| 180 | if (location->servers[s].len <= 0 || | ||
| 181 | valid_ipaddr4(location->servers[s].data) < 0) { | ||
| 182 | s++; | ||
| 183 | continue; | ||
| 184 | } | ||
| 185 | |||
| 186 | mountdata.hostname = location->servers[s].data; | ||
| 187 | addr.sin_addr.s_addr = in_aton(mountdata.hostname), | ||
| 188 | mountdata.addr = (struct sockaddr *)&addr; | ||
| 189 | mountdata.addrlen = sizeof(addr); | ||
| 190 | |||
| 191 | snprintf(page, PAGE_SIZE, "%s:%s", | ||
| 192 | mountdata.hostname, | ||
| 193 | mountdata.mnt_path); | ||
| 194 | |||
| 195 | mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); | ||
| 196 | if (!IS_ERR(mnt)) { | ||
| 197 | break; | ||
| 198 | } | ||
| 199 | s++; | ||
| 200 | } | ||
| 201 | loc++; | ||
| 202 | } | 195 | } |
| 203 | 196 | ||
| 204 | out: | 197 | out: |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index c910413eaeca..83e700a2b0c0 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
| @@ -1659,8 +1659,10 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, | |||
| 1659 | struct nfs_open_context *ctx; | 1659 | struct nfs_open_context *ctx; |
| 1660 | 1660 | ||
| 1661 | ctx = nfs_file_open_context(sattr->ia_file); | 1661 | ctx = nfs_file_open_context(sattr->ia_file); |
| 1662 | cred = ctx->cred; | 1662 | if (ctx) { |
| 1663 | state = ctx->state; | 1663 | cred = ctx->cred; |
| 1664 | state = ctx->state; | ||
| 1665 | } | ||
| 1664 | } | 1666 | } |
| 1665 | 1667 | ||
| 1666 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); | 1668 | status = nfs4_do_setattr(inode, cred, fattr, sattr, state); |
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c | |||
| @@ -127,7 +127,7 @@ enum { | |||
| 127 | Opt_err | 127 | Opt_err |
| 128 | }; | 128 | }; |
| 129 | 129 | ||
| 130 | static match_table_t __initdata tokens = { | 130 | static match_table_t __initconst tokens = { |
| 131 | {Opt_port, "port=%u"}, | 131 | {Opt_port, "port=%u"}, |
| 132 | {Opt_rsize, "rsize=%u"}, | 132 | {Opt_rsize, "rsize=%u"}, |
| 133 | {Opt_wsize, "wsize=%u"}, | 133 | {Opt_wsize, "wsize=%u"}, |
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4dbb84df1b68..193465210d7c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c | |||
| @@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, | |||
| 65 | 65 | ||
| 66 | dprintk("%s: call getattr\n", __func__); | 66 | dprintk("%s: call getattr\n", __func__); |
| 67 | nfs_fattr_init(fattr); | 67 | nfs_fattr_init(fattr); |
| 68 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 68 | status = rpc_call_sync(server->client, &msg, 0); |
| 69 | /* Retry with default authentication if different */ | ||
| 70 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 71 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
| 69 | dprintk("%s: reply getattr: %d\n", __func__, status); | 72 | dprintk("%s: reply getattr: %d\n", __func__, status); |
| 70 | if (status) | 73 | if (status) |
| 71 | return status; | 74 | return status; |
| 72 | dprintk("%s: call statfs\n", __func__); | 75 | dprintk("%s: call statfs\n", __func__); |
| 73 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; | 76 | msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; |
| 74 | msg.rpc_resp = &fsinfo; | 77 | msg.rpc_resp = &fsinfo; |
| 75 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | 78 | status = rpc_call_sync(server->client, &msg, 0); |
| 79 | /* Retry with default authentication if different */ | ||
| 80 | if (status && server->nfs_client->cl_rpcclient != server->client) | ||
| 81 | status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); | ||
| 76 | dprintk("%s: reply statfs: %d\n", __func__, status); | 82 | dprintk("%s: reply statfs: %d\n", __func__, status); |
| 77 | if (status) | 83 | if (status) |
| 78 | return status; | 84 | return status; |
diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e9b20173fef3..a3b0061dfd45 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c | |||
| @@ -91,6 +91,7 @@ enum { | |||
| 91 | /* Mount options that take string arguments */ | 91 | /* Mount options that take string arguments */ |
| 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, | 92 | Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, |
| 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, | 93 | Opt_addr, Opt_mountaddr, Opt_clientaddr, |
| 94 | Opt_lookupcache, | ||
| 94 | 95 | ||
| 95 | /* Special mount options */ | 96 | /* Special mount options */ |
| 96 | Opt_userspace, Opt_deprecated, Opt_sloppy, | 97 | Opt_userspace, Opt_deprecated, Opt_sloppy, |
| @@ -98,7 +99,7 @@ enum { | |||
| 98 | Opt_err | 99 | Opt_err |
| 99 | }; | 100 | }; |
| 100 | 101 | ||
| 101 | static match_table_t nfs_mount_option_tokens = { | 102 | static const match_table_t nfs_mount_option_tokens = { |
| 102 | { Opt_userspace, "bg" }, | 103 | { Opt_userspace, "bg" }, |
| 103 | { Opt_userspace, "fg" }, | 104 | { Opt_userspace, "fg" }, |
| 104 | { Opt_userspace, "retry=%s" }, | 105 | { Opt_userspace, "retry=%s" }, |
| @@ -154,6 +155,8 @@ static match_table_t nfs_mount_option_tokens = { | |||
| 154 | { Opt_mounthost, "mounthost=%s" }, | 155 | { Opt_mounthost, "mounthost=%s" }, |
| 155 | { Opt_mountaddr, "mountaddr=%s" }, | 156 | { Opt_mountaddr, "mountaddr=%s" }, |
| 156 | 157 | ||
| 158 | { Opt_lookupcache, "lookupcache=%s" }, | ||
| 159 | |||
| 157 | { Opt_err, NULL } | 160 | { Opt_err, NULL } |
| 158 | }; | 161 | }; |
| 159 | 162 | ||
| @@ -163,7 +166,7 @@ enum { | |||
| 163 | Opt_xprt_err | 166 | Opt_xprt_err |
| 164 | }; | 167 | }; |
| 165 | 168 | ||
| 166 | static match_table_t nfs_xprt_protocol_tokens = { | 169 | static const match_table_t nfs_xprt_protocol_tokens = { |
| 167 | { Opt_xprt_udp, "udp" }, | 170 | { Opt_xprt_udp, "udp" }, |
| 168 | { Opt_xprt_tcp, "tcp" }, | 171 | { Opt_xprt_tcp, "tcp" }, |
| 169 | { Opt_xprt_rdma, "rdma" }, | 172 | { Opt_xprt_rdma, "rdma" }, |
| @@ -180,7 +183,7 @@ enum { | |||
| 180 | Opt_sec_err | 183 | Opt_sec_err |
| 181 | }; | 184 | }; |
| 182 | 185 | ||
| 183 | static match_table_t nfs_secflavor_tokens = { | 186 | static const match_table_t nfs_secflavor_tokens = { |
| 184 | { Opt_sec_none, "none" }, | 187 | { Opt_sec_none, "none" }, |
| 185 | { Opt_sec_none, "null" }, | 188 | { Opt_sec_none, "null" }, |
| 186 | { Opt_sec_sys, "sys" }, | 189 | { Opt_sec_sys, "sys" }, |
| @@ -200,6 +203,22 @@ static match_table_t nfs_secflavor_tokens = { | |||
| 200 | { Opt_sec_err, NULL } | 203 | { Opt_sec_err, NULL } |
| 201 | }; | 204 | }; |
| 202 | 205 | ||
| 206 | enum { | ||
| 207 | Opt_lookupcache_all, Opt_lookupcache_positive, | ||
| 208 | Opt_lookupcache_none, | ||
| 209 | |||
| 210 | Opt_lookupcache_err | ||
| 211 | }; | ||
| 212 | |||
| 213 | static match_table_t nfs_lookupcache_tokens = { | ||
| 214 | { Opt_lookupcache_all, "all" }, | ||
| 215 | { Opt_lookupcache_positive, "pos" }, | ||
| 216 | { Opt_lookupcache_positive, "positive" }, | ||
| 217 | { Opt_lookupcache_none, "none" }, | ||
| 218 | |||
| 219 | { Opt_lookupcache_err, NULL } | ||
| 220 | }; | ||
| 221 | |||
| 203 | 222 | ||
| 204 | static void nfs_umount_begin(struct super_block *); | 223 | static void nfs_umount_begin(struct super_block *); |
| 205 | static int nfs_statfs(struct dentry *, struct kstatfs *); | 224 | static int nfs_statfs(struct dentry *, struct kstatfs *); |
| @@ -209,7 +228,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru | |||
| 209 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, | 228 | static int nfs_xdev_get_sb(struct file_system_type *fs_type, |
| 210 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); | 229 | int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); |
| 211 | static void nfs_kill_super(struct super_block *); | 230 | static void nfs_kill_super(struct super_block *); |
| 212 | static void nfs_put_super(struct super_block *); | ||
| 213 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); | 231 | static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); |
| 214 | 232 | ||
| 215 | static struct file_system_type nfs_fs_type = { | 233 | static struct file_system_type nfs_fs_type = { |
| @@ -232,7 +250,6 @@ static const struct super_operations nfs_sops = { | |||
| 232 | .alloc_inode = nfs_alloc_inode, | 250 | .alloc_inode = nfs_alloc_inode, |
| 233 | .destroy_inode = nfs_destroy_inode, | 251 | .destroy_inode = nfs_destroy_inode, |
| 234 | .write_inode = nfs_write_inode, | 252 | .write_inode = nfs_write_inode, |
| 235 | .put_super = nfs_put_super, | ||
| 236 | .statfs = nfs_statfs, | 253 | .statfs = nfs_statfs, |
| 237 | .clear_inode = nfs_clear_inode, | 254 | .clear_inode = nfs_clear_inode, |
| 238 | .umount_begin = nfs_umount_begin, | 255 | .umount_begin = nfs_umount_begin, |
| @@ -337,26 +354,20 @@ void __exit unregister_nfs_fs(void) | |||
| 337 | unregister_filesystem(&nfs_fs_type); | 354 | unregister_filesystem(&nfs_fs_type); |
| 338 | } | 355 | } |
| 339 | 356 | ||
| 340 | void nfs_sb_active(struct nfs_server *server) | 357 | void nfs_sb_active(struct super_block *sb) |
| 341 | { | 358 | { |
| 342 | atomic_inc(&server->active); | 359 | struct nfs_server *server = NFS_SB(sb); |
| 343 | } | ||
| 344 | 360 | ||
| 345 | void nfs_sb_deactive(struct nfs_server *server) | 361 | if (atomic_inc_return(&server->active) == 1) |
| 346 | { | 362 | atomic_inc(&sb->s_active); |
| 347 | if (atomic_dec_and_test(&server->active)) | ||
| 348 | wake_up(&server->active_wq); | ||
| 349 | } | 363 | } |
| 350 | 364 | ||
| 351 | static void nfs_put_super(struct super_block *sb) | 365 | void nfs_sb_deactive(struct super_block *sb) |
| 352 | { | 366 | { |
| 353 | struct nfs_server *server = NFS_SB(sb); | 367 | struct nfs_server *server = NFS_SB(sb); |
| 354 | /* | 368 | |
| 355 | * Make sure there are no outstanding ops to this server. | 369 | if (atomic_dec_and_test(&server->active)) |
| 356 | * If so, wait for them to finish before allowing the | 370 | deactivate_super(sb); |
| 357 | * unmount to continue. | ||
| 358 | */ | ||
| 359 | wait_event(server->active_wq, atomic_read(&server->active) == 0); | ||
| 360 | } | 371 | } |
| 361 | 372 | ||
| 362 | /* | 373 | /* |
| @@ -664,25 +675,6 @@ static void nfs_umount_begin(struct super_block *sb) | |||
| 664 | } | 675 | } |
| 665 | 676 | ||
| 666 | /* | 677 | /* |
| 667 | * Set the port number in an address. Be agnostic about the address family. | ||
| 668 | */ | ||
| 669 | static void nfs_set_port(struct sockaddr *sap, unsigned short port) | ||
| 670 | { | ||
| 671 | switch (sap->sa_family) { | ||
| 672 | case AF_INET: { | ||
| 673 | struct sockaddr_in *ap = (struct sockaddr_in *)sap; | ||
| 674 | ap->sin_port = htons(port); | ||
| 675 | break; | ||
| 676 | } | ||
| 677 | case AF_INET6: { | ||
| 678 | struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; | ||
| 679 | ap->sin6_port = htons(port); | ||
| 680 | break; | ||
| 681 | } | ||
| 682 | } | ||
| 683 | } | ||
| 684 | |||
| 685 | /* | ||
| 686 | * Sanity-check a server address provided by the mount command. | 678 | * Sanity-check a server address provided by the mount command. |
| 687 | * | 679 | * |
| 688 | * Address family must be initialized, and address must not be | 680 | * Address family must be initialized, and address must not be |
| @@ -724,20 +716,22 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, | |||
| 724 | *addr_len = 0; | 716 | *addr_len = 0; |
| 725 | } | 717 | } |
| 726 | 718 | ||
| 727 | #define IPV6_SCOPE_DELIMITER '%' | ||
| 728 | |||
| 729 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 719 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 730 | static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | 720 | static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, |
| 731 | const char *delim, | 721 | const char *delim, |
| 732 | struct sockaddr_in6 *sin6) | 722 | struct sockaddr_in6 *sin6) |
| 733 | { | 723 | { |
| 734 | char *p; | 724 | char *p; |
| 735 | size_t len; | 725 | size_t len; |
| 736 | 726 | ||
| 737 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | 727 | if ((string + str_len) == delim) |
| 738 | return ; | 728 | return 1; |
| 729 | |||
| 739 | if (*delim != IPV6_SCOPE_DELIMITER) | 730 | if (*delim != IPV6_SCOPE_DELIMITER) |
| 740 | return; | 731 | return 0; |
| 732 | |||
| 733 | if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) | ||
| 734 | return 0; | ||
| 741 | 735 | ||
| 742 | len = (string + str_len) - delim - 1; | 736 | len = (string + str_len) - delim - 1; |
| 743 | p = kstrndup(delim + 1, len, GFP_KERNEL); | 737 | p = kstrndup(delim + 1, len, GFP_KERNEL); |
| @@ -750,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, | |||
| 750 | scope_id = dev->ifindex; | 744 | scope_id = dev->ifindex; |
| 751 | dev_put(dev); | 745 | dev_put(dev); |
| 752 | } else { | 746 | } else { |
| 753 | /* scope_id is set to zero on error */ | 747 | if (strict_strtoul(p, 10, &scope_id) == 0) { |
| 754 | strict_strtoul(p, 10, &scope_id); | 748 | kfree(p); |
| 749 | return 0; | ||
| 750 | } | ||
| 755 | } | 751 | } |
| 756 | 752 | ||
| 757 | kfree(p); | 753 | kfree(p); |
| 754 | |||
| 758 | sin6->sin6_scope_id = scope_id; | 755 | sin6->sin6_scope_id = scope_id; |
| 759 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); | 756 | dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); |
| 757 | return 1; | ||
| 760 | } | 758 | } |
| 759 | |||
| 760 | return 0; | ||
| 761 | } | 761 | } |
| 762 | 762 | ||
| 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, | 763 | static void nfs_parse_ipv6_address(char *string, size_t str_len, |
| @@ -773,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
| 773 | 773 | ||
| 774 | sin6->sin6_family = AF_INET6; | 774 | sin6->sin6_family = AF_INET6; |
| 775 | *addr_len = sizeof(*sin6); | 775 | *addr_len = sizeof(*sin6); |
| 776 | if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { | 776 | if (in6_pton(string, str_len, addr, |
| 777 | nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); | 777 | IPV6_SCOPE_DELIMITER, &delim) != 0) { |
| 778 | return; | 778 | if (nfs_parse_ipv6_scope_id(string, str_len, |
| 779 | delim, sin6) != 0) | ||
| 780 | return; | ||
| 779 | } | 781 | } |
| 780 | } | 782 | } |
| 781 | 783 | ||
| @@ -798,7 +800,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, | |||
| 798 | * If there is a problem constructing the new sockaddr, set the address | 800 | * If there is a problem constructing the new sockaddr, set the address |
| 799 | * family to AF_UNSPEC. | 801 | * family to AF_UNSPEC. |
| 800 | */ | 802 | */ |
| 801 | static void nfs_parse_ip_address(char *string, size_t str_len, | 803 | void nfs_parse_ip_address(char *string, size_t str_len, |
| 802 | struct sockaddr *sap, size_t *addr_len) | 804 | struct sockaddr *sap, size_t *addr_len) |
| 803 | { | 805 | { |
| 804 | unsigned int i, colons; | 806 | unsigned int i, colons; |
| @@ -1258,6 +1260,30 @@ static int nfs_parse_mount_options(char *raw, | |||
| 1258 | &mnt->mount_server.addrlen); | 1260 | &mnt->mount_server.addrlen); |
| 1259 | kfree(string); | 1261 | kfree(string); |
| 1260 | break; | 1262 | break; |
| 1263 | case Opt_lookupcache: | ||
| 1264 | string = match_strdup(args); | ||
| 1265 | if (string == NULL) | ||
| 1266 | goto out_nomem; | ||
| 1267 | token = match_token(string, | ||
| 1268 | nfs_lookupcache_tokens, args); | ||
| 1269 | kfree(string); | ||
| 1270 | switch (token) { | ||
| 1271 | case Opt_lookupcache_all: | ||
| 1272 | mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); | ||
| 1273 | break; | ||
| 1274 | case Opt_lookupcache_positive: | ||
| 1275 | mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
| 1276 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; | ||
| 1277 | break; | ||
| 1278 | case Opt_lookupcache_none: | ||
| 1279 | mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; | ||
| 1280 | break; | ||
| 1281 | default: | ||
| 1282 | errors++; | ||
| 1283 | dfprintk(MOUNT, "NFS: invalid " | ||
| 1284 | "lookupcache argument\n"); | ||
| 1285 | }; | ||
| 1286 | break; | ||
| 1261 | 1287 | ||
| 1262 | /* | 1288 | /* |
| 1263 | * Special options | 1289 | * Special options |
| @@ -1558,7 +1584,7 @@ static int nfs_validate_mount_data(void *options, | |||
| 1558 | * Translate to nfs_parsed_mount_data, which nfs_fill_super | 1584 | * Translate to nfs_parsed_mount_data, which nfs_fill_super |
| 1559 | * can deal with. | 1585 | * can deal with. |
| 1560 | */ | 1586 | */ |
| 1561 | args->flags = data->flags; | 1587 | args->flags = data->flags & NFS_MOUNT_FLAGMASK; |
| 1562 | args->rsize = data->rsize; | 1588 | args->rsize = data->rsize; |
| 1563 | args->wsize = data->wsize; | 1589 | args->wsize = data->wsize; |
| 1564 | args->timeo = data->timeo; | 1590 | args->timeo = data->timeo; |
| @@ -2433,7 +2459,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags, | |||
| 2433 | compare_super = NULL; | 2459 | compare_super = NULL; |
| 2434 | 2460 | ||
| 2435 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2461 | /* Get a superblock - note that we may end up sharing one that already exists */ |
| 2436 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2462 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
| 2437 | if (IS_ERR(s)) { | 2463 | if (IS_ERR(s)) { |
| 2438 | error = PTR_ERR(s); | 2464 | error = PTR_ERR(s); |
| 2439 | goto out_err_nosb; | 2465 | goto out_err_nosb; |
| @@ -2518,7 +2544,7 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type, int flags, | |||
| 2518 | compare_super = NULL; | 2544 | compare_super = NULL; |
| 2519 | 2545 | ||
| 2520 | /* Get a superblock - note that we may end up sharing one that already exists */ | 2546 | /* Get a superblock - note that we may end up sharing one that already exists */ |
| 2521 | s = sget(&nfs_fs_type, compare_super, nfs_set_super, &sb_mntdata); | 2547 | s = sget(&nfs4_fs_type, compare_super, nfs_set_super, &sb_mntdata); |
| 2522 | if (IS_ERR(s)) { | 2548 | if (IS_ERR(s)) { |
| 2523 | error = PTR_ERR(s); | 2549 | error = PTR_ERR(s); |
| 2524 | goto out_err_nosb; | 2550 | goto out_err_nosb; |
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index f089e5839d7d..ecc295347775 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c | |||
| @@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) | |||
| 99 | 99 | ||
| 100 | nfs_dec_sillycount(data->dir); | 100 | nfs_dec_sillycount(data->dir); |
| 101 | nfs_free_unlinkdata(data); | 101 | nfs_free_unlinkdata(data); |
| 102 | nfs_sb_deactive(NFS_SB(sb)); | 102 | nfs_sb_deactive(sb); |
| 103 | } | 103 | } |
| 104 | 104 | ||
| 105 | static const struct rpc_call_ops nfs_unlink_ops = { | 105 | static const struct rpc_call_ops nfs_unlink_ops = { |
| @@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
| 118 | .rpc_message = &msg, | 118 | .rpc_message = &msg, |
| 119 | .callback_ops = &nfs_unlink_ops, | 119 | .callback_ops = &nfs_unlink_ops, |
| 120 | .callback_data = data, | 120 | .callback_data = data, |
| 121 | .workqueue = nfsiod_workqueue, | ||
| 121 | .flags = RPC_TASK_ASYNC, | 122 | .flags = RPC_TASK_ASYNC, |
| 122 | }; | 123 | }; |
| 123 | struct rpc_task *task; | 124 | struct rpc_task *task; |
| @@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n | |||
| 149 | nfs_dec_sillycount(dir); | 150 | nfs_dec_sillycount(dir); |
| 150 | return 0; | 151 | return 0; |
| 151 | } | 152 | } |
| 152 | nfs_sb_active(NFS_SERVER(dir)); | 153 | nfs_sb_active(dir->i_sb); |
| 153 | data->args.fh = NFS_FH(dir); | 154 | data->args.fh = NFS_FH(dir); |
| 154 | nfs_fattr_init(&data->res.dir_attr); | 155 | nfs_fattr_init(&data->res.dir_attr); |
| 155 | 156 | ||
diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3229e217c773..9f9845859fc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c | |||
| @@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) | |||
| 1427 | .bdi = mapping->backing_dev_info, | 1427 | .bdi = mapping->backing_dev_info, |
| 1428 | .sync_mode = WB_SYNC_NONE, | 1428 | .sync_mode = WB_SYNC_NONE, |
| 1429 | .nr_to_write = LONG_MAX, | 1429 | .nr_to_write = LONG_MAX, |
| 1430 | .range_start = 0, | ||
| 1431 | .range_end = LLONG_MAX, | ||
| 1430 | .for_writepages = 1, | 1432 | .for_writepages = 1, |
| 1431 | .range_cyclic = 1, | ||
| 1432 | }; | 1433 | }; |
| 1433 | int ret; | 1434 | int ret; |
| 1434 | 1435 | ||
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 15c6faeec77c..b2786a5f9afe 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c | |||
| @@ -70,7 +70,6 @@ nlm_fclose(struct file *filp) | |||
| 70 | static struct nlmsvc_binding nfsd_nlm_ops = { | 70 | static struct nlmsvc_binding nfsd_nlm_ops = { |
| 71 | .fopen = nlm_fopen, /* open file for locking */ | 71 | .fopen = nlm_fopen, /* open file for locking */ |
| 72 | .fclose = nlm_fclose, /* close file */ | 72 | .fclose = nlm_fclose, /* close file */ |
| 73 | .get_grace_period = get_nfs4_grace_period, | ||
| 74 | }; | 73 | }; |
| 75 | 74 | ||
| 76 | void | 75 | void |
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c index 4d617ea28cfc..9dbd2eb91281 100644 --- a/fs/nfsd/nfs3proc.c +++ b/fs/nfsd/nfs3proc.c | |||
| @@ -63,7 +63,8 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
| 63 | SVCFH_fmt(&argp->fh)); | 63 | SVCFH_fmt(&argp->fh)); |
| 64 | 64 | ||
| 65 | fh_copy(&resp->fh, &argp->fh); | 65 | fh_copy(&resp->fh, &argp->fh); |
| 66 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 66 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
| 67 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
| 67 | if (nfserr) | 68 | if (nfserr) |
| 68 | RETURN_STATUS(nfserr); | 69 | RETURN_STATUS(nfserr); |
| 69 | 70 | ||
| @@ -530,7 +531,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
| 530 | dprintk("nfsd: FSSTAT(3) %s\n", | 531 | dprintk("nfsd: FSSTAT(3) %s\n", |
| 531 | SVCFH_fmt(&argp->fh)); | 532 | SVCFH_fmt(&argp->fh)); |
| 532 | 533 | ||
| 533 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 534 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, 0); |
| 534 | fh_put(&argp->fh); | 535 | fh_put(&argp->fh); |
| 535 | RETURN_STATUS(nfserr); | 536 | RETURN_STATUS(nfserr); |
| 536 | } | 537 | } |
| @@ -558,7 +559,8 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
| 558 | resp->f_maxfilesize = ~(u32) 0; | 559 | resp->f_maxfilesize = ~(u32) 0; |
| 559 | resp->f_properties = NFS3_FSF_DEFAULT; | 560 | resp->f_properties = NFS3_FSF_DEFAULT; |
| 560 | 561 | ||
| 561 | nfserr = fh_verify(rqstp, &argp->fh, 0, NFSD_MAY_NOP); | 562 | nfserr = fh_verify(rqstp, &argp->fh, 0, |
| 563 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
| 562 | 564 | ||
| 563 | /* Check special features of the file system. May request | 565 | /* Check special features of the file system. May request |
| 564 | * different read/write sizes for file systems known to have | 566 | * different read/write sizes for file systems known to have |
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 702fa577aa6e..094747a1227c 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c | |||
| @@ -225,7 +225,8 @@ encode_cb_recall(struct xdr_stream *xdr, struct nfs4_cb_recall *cb_rec) | |||
| 225 | 225 | ||
| 226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); | 226 | RESERVE_SPACE(12+sizeof(cb_rec->cbr_stateid) + len); |
| 227 | WRITE32(OP_CB_RECALL); | 227 | WRITE32(OP_CB_RECALL); |
| 228 | WRITEMEM(&cb_rec->cbr_stateid, sizeof(stateid_t)); | 228 | WRITE32(cb_rec->cbr_stateid.si_generation); |
| 229 | WRITEMEM(&cb_rec->cbr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 229 | WRITE32(cb_rec->cbr_trunc); | 230 | WRITE32(cb_rec->cbr_trunc); |
| 230 | WRITE32(len); | 231 | WRITE32(len); |
| 231 | WRITEMEM(cb_rec->cbr_fhval, len); | 232 | WRITEMEM(cb_rec->cbr_fhval, len); |
| @@ -379,6 +380,7 @@ static int do_probe_callback(void *data) | |||
| 379 | .addrsize = sizeof(addr), | 380 | .addrsize = sizeof(addr), |
| 380 | .timeout = &timeparms, | 381 | .timeout = &timeparms, |
| 381 | .program = &cb_program, | 382 | .program = &cb_program, |
| 383 | .prognumber = cb->cb_prog, | ||
| 382 | .version = nfs_cb_version[1]->number, | 384 | .version = nfs_cb_version[1]->number, |
| 383 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ | 385 | .authflavor = RPC_AUTH_UNIX, /* XXX: need AUTH_GSS... */ |
| 384 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), | 386 | .flags = (RPC_CLNT_CREATE_NOPING | RPC_CLNT_CREATE_QUIET), |
| @@ -396,9 +398,6 @@ static int do_probe_callback(void *data) | |||
| 396 | addr.sin_port = htons(cb->cb_port); | 398 | addr.sin_port = htons(cb->cb_port); |
| 397 | addr.sin_addr.s_addr = htonl(cb->cb_addr); | 399 | addr.sin_addr.s_addr = htonl(cb->cb_addr); |
| 398 | 400 | ||
| 399 | /* Initialize rpc_stat */ | ||
| 400 | memset(args.program->stats, 0, sizeof(struct rpc_stat)); | ||
| 401 | |||
| 402 | /* Create RPC client */ | 401 | /* Create RPC client */ |
| 403 | client = rpc_create(&args); | 402 | client = rpc_create(&args); |
| 404 | if (IS_ERR(client)) { | 403 | if (IS_ERR(client)) { |
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index e5b51ffafc6c..669461e291ae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c | |||
| @@ -201,10 +201,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 201 | /* Openowner is now set, so sequence id will get bumped. Now we need | 201 | /* Openowner is now set, so sequence id will get bumped. Now we need |
| 202 | * these checks before we do any creates: */ | 202 | * these checks before we do any creates: */ |
| 203 | status = nfserr_grace; | 203 | status = nfserr_grace; |
| 204 | if (nfs4_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) | 204 | if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) |
| 205 | goto out; | 205 | goto out; |
| 206 | status = nfserr_no_grace; | 206 | status = nfserr_no_grace; |
| 207 | if (!nfs4_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) | 207 | if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS) |
| 208 | goto out; | 208 | goto out; |
| 209 | 209 | ||
| 210 | switch (open->op_claim_type) { | 210 | switch (open->op_claim_type) { |
| @@ -575,7 +575,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 575 | { | 575 | { |
| 576 | __be32 status; | 576 | __be32 status; |
| 577 | 577 | ||
| 578 | if (nfs4_in_grace()) | 578 | if (locks_in_grace()) |
| 579 | return nfserr_grace; | 579 | return nfserr_grace; |
| 580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, | 580 | status = nfsd_unlink(rqstp, &cstate->current_fh, 0, |
| 581 | remove->rm_name, remove->rm_namelen); | 581 | remove->rm_name, remove->rm_namelen); |
| @@ -596,7 +596,7 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 596 | 596 | ||
| 597 | if (!cstate->save_fh.fh_dentry) | 597 | if (!cstate->save_fh.fh_dentry) |
| 598 | return status; | 598 | return status; |
| 599 | if (nfs4_in_grace() && !(cstate->save_fh.fh_export->ex_flags | 599 | if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags |
| 600 | & NFSEXP_NOSUBTREECHECK)) | 600 | & NFSEXP_NOSUBTREECHECK)) |
| 601 | return nfserr_grace; | 601 | return nfserr_grace; |
| 602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, | 602 | status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname, |
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1578d7a2667e..0cc7ff5d5ab5 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c | |||
| @@ -61,7 +61,6 @@ | |||
| 61 | static time_t lease_time = 90; /* default lease time */ | 61 | static time_t lease_time = 90; /* default lease time */ |
| 62 | static time_t user_lease_time = 90; | 62 | static time_t user_lease_time = 90; |
| 63 | static time_t boot_time; | 63 | static time_t boot_time; |
| 64 | static int in_grace = 1; | ||
| 65 | static u32 current_ownerid = 1; | 64 | static u32 current_ownerid = 1; |
| 66 | static u32 current_fileid = 1; | 65 | static u32 current_fileid = 1; |
| 67 | static u32 current_delegid = 1; | 66 | static u32 current_delegid = 1; |
| @@ -1640,7 +1639,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta | |||
| 1640 | case NFS4_OPEN_CLAIM_NULL: | 1639 | case NFS4_OPEN_CLAIM_NULL: |
| 1641 | /* Let's not give out any delegations till everyone's | 1640 | /* Let's not give out any delegations till everyone's |
| 1642 | * had the chance to reclaim theirs.... */ | 1641 | * had the chance to reclaim theirs.... */ |
| 1643 | if (nfs4_in_grace()) | 1642 | if (locks_in_grace()) |
| 1644 | goto out; | 1643 | goto out; |
| 1645 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) | 1644 | if (!atomic_read(&cb->cb_set) || !sop->so_confirmed) |
| 1646 | goto out; | 1645 | goto out; |
| @@ -1816,12 +1815,15 @@ out: | |||
| 1816 | return status; | 1815 | return status; |
| 1817 | } | 1816 | } |
| 1818 | 1817 | ||
| 1818 | struct lock_manager nfsd4_manager = { | ||
| 1819 | }; | ||
| 1820 | |||
| 1819 | static void | 1821 | static void |
| 1820 | end_grace(void) | 1822 | nfsd4_end_grace(void) |
| 1821 | { | 1823 | { |
| 1822 | dprintk("NFSD: end of grace period\n"); | 1824 | dprintk("NFSD: end of grace period\n"); |
| 1823 | nfsd4_recdir_purge_old(); | 1825 | nfsd4_recdir_purge_old(); |
| 1824 | in_grace = 0; | 1826 | locks_end_grace(&nfsd4_manager); |
| 1825 | } | 1827 | } |
| 1826 | 1828 | ||
| 1827 | static time_t | 1829 | static time_t |
| @@ -1838,8 +1840,8 @@ nfs4_laundromat(void) | |||
| 1838 | nfs4_lock_state(); | 1840 | nfs4_lock_state(); |
| 1839 | 1841 | ||
| 1840 | dprintk("NFSD: laundromat service - starting\n"); | 1842 | dprintk("NFSD: laundromat service - starting\n"); |
| 1841 | if (in_grace) | 1843 | if (locks_in_grace()) |
| 1842 | end_grace(); | 1844 | nfsd4_end_grace(); |
| 1843 | list_for_each_safe(pos, next, &client_lru) { | 1845 | list_for_each_safe(pos, next, &client_lru) { |
| 1844 | clp = list_entry(pos, struct nfs4_client, cl_lru); | 1846 | clp = list_entry(pos, struct nfs4_client, cl_lru); |
| 1845 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { | 1847 | if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { |
| @@ -1974,7 +1976,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
| 1974 | return nfserr_bad_stateid; | 1976 | return nfserr_bad_stateid; |
| 1975 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) | 1977 | else if (ONE_STATEID(stateid) && (flags & RD_STATE)) |
| 1976 | return nfs_ok; | 1978 | return nfs_ok; |
| 1977 | else if (nfs4_in_grace()) { | 1979 | else if (locks_in_grace()) { |
| 1978 | /* Answer in remaining cases depends on existance of | 1980 | /* Answer in remaining cases depends on existance of |
| 1979 | * conflicting state; so we must wait out the grace period. */ | 1981 | * conflicting state; so we must wait out the grace period. */ |
| 1980 | return nfserr_grace; | 1982 | return nfserr_grace; |
| @@ -1993,7 +1995,7 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags) | |||
| 1993 | static inline int | 1995 | static inline int |
| 1994 | io_during_grace_disallowed(struct inode *inode, int flags) | 1996 | io_during_grace_disallowed(struct inode *inode, int flags) |
| 1995 | { | 1997 | { |
| 1996 | return nfs4_in_grace() && (flags & (RD_STATE | WR_STATE)) | 1998 | return locks_in_grace() && (flags & (RD_STATE | WR_STATE)) |
| 1997 | && mandatory_lock(inode); | 1999 | && mandatory_lock(inode); |
| 1998 | } | 2000 | } |
| 1999 | 2001 | ||
| @@ -2693,10 +2695,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 2693 | filp = lock_stp->st_vfs_file; | 2695 | filp = lock_stp->st_vfs_file; |
| 2694 | 2696 | ||
| 2695 | status = nfserr_grace; | 2697 | status = nfserr_grace; |
| 2696 | if (nfs4_in_grace() && !lock->lk_reclaim) | 2698 | if (locks_in_grace() && !lock->lk_reclaim) |
| 2697 | goto out; | 2699 | goto out; |
| 2698 | status = nfserr_no_grace; | 2700 | status = nfserr_no_grace; |
| 2699 | if (!nfs4_in_grace() && lock->lk_reclaim) | 2701 | if (!locks_in_grace() && lock->lk_reclaim) |
| 2700 | goto out; | 2702 | goto out; |
| 2701 | 2703 | ||
| 2702 | locks_init_lock(&file_lock); | 2704 | locks_init_lock(&file_lock); |
| @@ -2779,7 +2781,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, | |||
| 2779 | int error; | 2781 | int error; |
| 2780 | __be32 status; | 2782 | __be32 status; |
| 2781 | 2783 | ||
| 2782 | if (nfs4_in_grace()) | 2784 | if (locks_in_grace()) |
| 2783 | return nfserr_grace; | 2785 | return nfserr_grace; |
| 2784 | 2786 | ||
| 2785 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) | 2787 | if (check_lock_length(lockt->lt_offset, lockt->lt_length)) |
| @@ -3192,9 +3194,9 @@ __nfs4_state_start(void) | |||
| 3192 | unsigned long grace_time; | 3194 | unsigned long grace_time; |
| 3193 | 3195 | ||
| 3194 | boot_time = get_seconds(); | 3196 | boot_time = get_seconds(); |
| 3195 | grace_time = get_nfs_grace_period(); | 3197 | grace_time = get_nfs4_grace_period(); |
| 3196 | lease_time = user_lease_time; | 3198 | lease_time = user_lease_time; |
| 3197 | in_grace = 1; | 3199 | locks_start_grace(&nfsd4_manager); |
| 3198 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", | 3200 | printk(KERN_INFO "NFSD: starting %ld-second grace period\n", |
| 3199 | grace_time/HZ); | 3201 | grace_time/HZ); |
| 3200 | laundry_wq = create_singlethread_workqueue("nfsd4"); | 3202 | laundry_wq = create_singlethread_workqueue("nfsd4"); |
| @@ -3213,12 +3215,6 @@ nfs4_state_start(void) | |||
| 3213 | return; | 3215 | return; |
| 3214 | } | 3216 | } |
| 3215 | 3217 | ||
| 3216 | int | ||
| 3217 | nfs4_in_grace(void) | ||
| 3218 | { | ||
| 3219 | return in_grace; | ||
| 3220 | } | ||
| 3221 | |||
| 3222 | time_t | 3218 | time_t |
| 3223 | nfs4_lease_time(void) | 3219 | nfs4_lease_time(void) |
| 3224 | { | 3220 | { |
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 14ba4d9b2859..afcdf4b76843 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c | |||
| @@ -413,6 +413,18 @@ out_nfserr: | |||
| 413 | } | 413 | } |
| 414 | 414 | ||
| 415 | static __be32 | 415 | static __be32 |
| 416 | nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid) | ||
| 417 | { | ||
| 418 | DECODE_HEAD; | ||
| 419 | |||
| 420 | READ_BUF(sizeof(stateid_t)); | ||
| 421 | READ32(sid->si_generation); | ||
| 422 | COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
| 423 | |||
| 424 | DECODE_TAIL; | ||
| 425 | } | ||
| 426 | |||
| 427 | static __be32 | ||
| 416 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) | 428 | nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access) |
| 417 | { | 429 | { |
| 418 | DECODE_HEAD; | 430 | DECODE_HEAD; |
| @@ -429,10 +441,9 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close) | |||
| 429 | DECODE_HEAD; | 441 | DECODE_HEAD; |
| 430 | 442 | ||
| 431 | close->cl_stateowner = NULL; | 443 | close->cl_stateowner = NULL; |
| 432 | READ_BUF(4 + sizeof(stateid_t)); | 444 | READ_BUF(4); |
| 433 | READ32(close->cl_seqid); | 445 | READ32(close->cl_seqid); |
| 434 | READ32(close->cl_stateid.si_generation); | 446 | return nfsd4_decode_stateid(argp, &close->cl_stateid); |
| 435 | COPYMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 436 | 447 | ||
| 437 | DECODE_TAIL; | 448 | DECODE_TAIL; |
| 438 | } | 449 | } |
| @@ -493,13 +504,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create | |||
| 493 | static inline __be32 | 504 | static inline __be32 |
| 494 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) | 505 | nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr) |
| 495 | { | 506 | { |
| 496 | DECODE_HEAD; | 507 | return nfsd4_decode_stateid(argp, &dr->dr_stateid); |
| 497 | |||
| 498 | READ_BUF(sizeof(stateid_t)); | ||
| 499 | READ32(dr->dr_stateid.si_generation); | ||
| 500 | COPYMEM(&dr->dr_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 501 | |||
| 502 | DECODE_TAIL; | ||
| 503 | } | 508 | } |
| 504 | 509 | ||
| 505 | static inline __be32 | 510 | static inline __be32 |
| @@ -542,20 +547,22 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock) | |||
| 542 | READ32(lock->lk_is_new); | 547 | READ32(lock->lk_is_new); |
| 543 | 548 | ||
| 544 | if (lock->lk_is_new) { | 549 | if (lock->lk_is_new) { |
| 545 | READ_BUF(36); | 550 | READ_BUF(4); |
| 546 | READ32(lock->lk_new_open_seqid); | 551 | READ32(lock->lk_new_open_seqid); |
| 547 | READ32(lock->lk_new_open_stateid.si_generation); | 552 | status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid); |
| 548 | 553 | if (status) | |
| 549 | COPYMEM(&lock->lk_new_open_stateid.si_opaque, sizeof(stateid_opaque_t)); | 554 | return status; |
| 555 | READ_BUF(8 + sizeof(clientid_t)); | ||
| 550 | READ32(lock->lk_new_lock_seqid); | 556 | READ32(lock->lk_new_lock_seqid); |
| 551 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); | 557 | COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t)); |
| 552 | READ32(lock->lk_new_owner.len); | 558 | READ32(lock->lk_new_owner.len); |
| 553 | READ_BUF(lock->lk_new_owner.len); | 559 | READ_BUF(lock->lk_new_owner.len); |
| 554 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); | 560 | READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len); |
| 555 | } else { | 561 | } else { |
| 556 | READ_BUF(20); | 562 | status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid); |
| 557 | READ32(lock->lk_old_lock_stateid.si_generation); | 563 | if (status) |
| 558 | COPYMEM(&lock->lk_old_lock_stateid.si_opaque, sizeof(stateid_opaque_t)); | 564 | return status; |
| 565 | READ_BUF(4); | ||
| 559 | READ32(lock->lk_old_lock_seqid); | 566 | READ32(lock->lk_old_lock_seqid); |
| 560 | } | 567 | } |
| 561 | 568 | ||
| @@ -587,13 +594,15 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku) | |||
| 587 | DECODE_HEAD; | 594 | DECODE_HEAD; |
| 588 | 595 | ||
| 589 | locku->lu_stateowner = NULL; | 596 | locku->lu_stateowner = NULL; |
| 590 | READ_BUF(24 + sizeof(stateid_t)); | 597 | READ_BUF(8); |
| 591 | READ32(locku->lu_type); | 598 | READ32(locku->lu_type); |
| 592 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) | 599 | if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) |
| 593 | goto xdr_error; | 600 | goto xdr_error; |
| 594 | READ32(locku->lu_seqid); | 601 | READ32(locku->lu_seqid); |
| 595 | READ32(locku->lu_stateid.si_generation); | 602 | status = nfsd4_decode_stateid(argp, &locku->lu_stateid); |
| 596 | COPYMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | 603 | if (status) |
| 604 | return status; | ||
| 605 | READ_BUF(16); | ||
| 597 | READ64(locku->lu_offset); | 606 | READ64(locku->lu_offset); |
| 598 | READ64(locku->lu_length); | 607 | READ64(locku->lu_length); |
| 599 | 608 | ||
| @@ -678,8 +687,10 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open) | |||
| 678 | READ32(open->op_delegate_type); | 687 | READ32(open->op_delegate_type); |
| 679 | break; | 688 | break; |
| 680 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: | 689 | case NFS4_OPEN_CLAIM_DELEGATE_CUR: |
| 681 | READ_BUF(sizeof(stateid_t) + 4); | 690 | status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid); |
| 682 | COPYMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 691 | if (status) |
| 692 | return status; | ||
| 693 | READ_BUF(4); | ||
| 683 | READ32(open->op_fname.len); | 694 | READ32(open->op_fname.len); |
| 684 | READ_BUF(open->op_fname.len); | 695 | READ_BUF(open->op_fname.len); |
| 685 | SAVEMEM(open->op_fname.data, open->op_fname.len); | 696 | SAVEMEM(open->op_fname.data, open->op_fname.len); |
| @@ -699,9 +710,10 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con | |||
| 699 | DECODE_HEAD; | 710 | DECODE_HEAD; |
| 700 | 711 | ||
| 701 | open_conf->oc_stateowner = NULL; | 712 | open_conf->oc_stateowner = NULL; |
| 702 | READ_BUF(4 + sizeof(stateid_t)); | 713 | status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid); |
| 703 | READ32(open_conf->oc_req_stateid.si_generation); | 714 | if (status) |
| 704 | COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); | 715 | return status; |
| 716 | READ_BUF(4); | ||
| 705 | READ32(open_conf->oc_seqid); | 717 | READ32(open_conf->oc_seqid); |
| 706 | 718 | ||
| 707 | DECODE_TAIL; | 719 | DECODE_TAIL; |
| @@ -713,9 +725,10 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d | |||
| 713 | DECODE_HEAD; | 725 | DECODE_HEAD; |
| 714 | 726 | ||
| 715 | open_down->od_stateowner = NULL; | 727 | open_down->od_stateowner = NULL; |
| 716 | READ_BUF(12 + sizeof(stateid_t)); | 728 | status = nfsd4_decode_stateid(argp, &open_down->od_stateid); |
| 717 | READ32(open_down->od_stateid.si_generation); | 729 | if (status) |
| 718 | COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | 730 | return status; |
| 731 | READ_BUF(12); | ||
| 719 | READ32(open_down->od_seqid); | 732 | READ32(open_down->od_seqid); |
| 720 | READ32(open_down->od_share_access); | 733 | READ32(open_down->od_share_access); |
| 721 | READ32(open_down->od_share_deny); | 734 | READ32(open_down->od_share_deny); |
| @@ -743,9 +756,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) | |||
| 743 | { | 756 | { |
| 744 | DECODE_HEAD; | 757 | DECODE_HEAD; |
| 745 | 758 | ||
| 746 | READ_BUF(sizeof(stateid_t) + 12); | 759 | status = nfsd4_decode_stateid(argp, &read->rd_stateid); |
| 747 | READ32(read->rd_stateid.si_generation); | 760 | if (status) |
| 748 | COPYMEM(&read->rd_stateid.si_opaque, sizeof(stateid_opaque_t)); | 761 | return status; |
| 762 | READ_BUF(12); | ||
| 749 | READ64(read->rd_offset); | 763 | READ64(read->rd_offset); |
| 750 | READ32(read->rd_length); | 764 | READ32(read->rd_length); |
| 751 | 765 | ||
| @@ -834,15 +848,13 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp, | |||
| 834 | static __be32 | 848 | static __be32 |
| 835 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) | 849 | nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr) |
| 836 | { | 850 | { |
| 837 | DECODE_HEAD; | 851 | __be32 status; |
| 838 | |||
| 839 | READ_BUF(sizeof(stateid_t)); | ||
| 840 | READ32(setattr->sa_stateid.si_generation); | ||
| 841 | COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 842 | if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) | ||
| 843 | goto out; | ||
| 844 | 852 | ||
| 845 | DECODE_TAIL; | 853 | status = nfsd4_decode_stateid(argp, &setattr->sa_stateid); |
| 854 | if (status) | ||
| 855 | return status; | ||
| 856 | return nfsd4_decode_fattr(argp, setattr->sa_bmval, | ||
| 857 | &setattr->sa_iattr, &setattr->sa_acl); | ||
| 846 | } | 858 | } |
| 847 | 859 | ||
| 848 | static __be32 | 860 | static __be32 |
| @@ -927,9 +939,10 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write) | |||
| 927 | int len; | 939 | int len; |
| 928 | DECODE_HEAD; | 940 | DECODE_HEAD; |
| 929 | 941 | ||
| 930 | READ_BUF(sizeof(stateid_opaque_t) + 20); | 942 | status = nfsd4_decode_stateid(argp, &write->wr_stateid); |
| 931 | READ32(write->wr_stateid.si_generation); | 943 | if (status) |
| 932 | COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); | 944 | return status; |
| 945 | READ_BUF(16); | ||
| 933 | READ64(write->wr_offset); | 946 | READ64(write->wr_offset); |
| 934 | READ32(write->wr_stable_how); | 947 | READ32(write->wr_stable_how); |
| 935 | if (write->wr_stable_how > 2) | 948 | if (write->wr_stable_how > 2) |
| @@ -1183,7 +1196,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) | |||
| 1183 | * Header routine to setup seqid operation replay cache | 1196 | * Header routine to setup seqid operation replay cache |
| 1184 | */ | 1197 | */ |
| 1185 | #define ENCODE_SEQID_OP_HEAD \ | 1198 | #define ENCODE_SEQID_OP_HEAD \ |
| 1186 | __be32 *p; \ | ||
| 1187 | __be32 *save; \ | 1199 | __be32 *save; \ |
| 1188 | \ | 1200 | \ |
| 1189 | save = resp->p; | 1201 | save = resp->p; |
| @@ -1950,6 +1962,17 @@ fail: | |||
| 1950 | return -EINVAL; | 1962 | return -EINVAL; |
| 1951 | } | 1963 | } |
| 1952 | 1964 | ||
| 1965 | static void | ||
| 1966 | nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid) | ||
| 1967 | { | ||
| 1968 | ENCODE_HEAD; | ||
| 1969 | |||
| 1970 | RESERVE_SPACE(sizeof(stateid_t)); | ||
| 1971 | WRITE32(sid->si_generation); | ||
| 1972 | WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t)); | ||
| 1973 | ADJUST_ARGS(); | ||
| 1974 | } | ||
| 1975 | |||
| 1953 | static __be32 | 1976 | static __be32 |
| 1954 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) | 1977 | nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access) |
| 1955 | { | 1978 | { |
| @@ -1969,12 +1992,9 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c | |||
| 1969 | { | 1992 | { |
| 1970 | ENCODE_SEQID_OP_HEAD; | 1993 | ENCODE_SEQID_OP_HEAD; |
| 1971 | 1994 | ||
| 1972 | if (!nfserr) { | 1995 | if (!nfserr) |
| 1973 | RESERVE_SPACE(sizeof(stateid_t)); | 1996 | nfsd4_encode_stateid(resp, &close->cl_stateid); |
| 1974 | WRITE32(close->cl_stateid.si_generation); | 1997 | |
| 1975 | WRITEMEM(&close->cl_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 1976 | ADJUST_ARGS(); | ||
| 1977 | } | ||
| 1978 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); | 1998 | ENCODE_SEQID_OP_TAIL(close->cl_stateowner); |
| 1979 | return nfserr; | 1999 | return nfserr; |
| 1980 | } | 2000 | } |
| @@ -2074,12 +2094,9 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lo | |||
| 2074 | { | 2094 | { |
| 2075 | ENCODE_SEQID_OP_HEAD; | 2095 | ENCODE_SEQID_OP_HEAD; |
| 2076 | 2096 | ||
| 2077 | if (!nfserr) { | 2097 | if (!nfserr) |
| 2078 | RESERVE_SPACE(4 + sizeof(stateid_t)); | 2098 | nfsd4_encode_stateid(resp, &lock->lk_resp_stateid); |
| 2079 | WRITE32(lock->lk_resp_stateid.si_generation); | 2099 | else if (nfserr == nfserr_denied) |
| 2080 | WRITEMEM(&lock->lk_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 2081 | ADJUST_ARGS(); | ||
| 2082 | } else if (nfserr == nfserr_denied) | ||
| 2083 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); | 2100 | nfsd4_encode_lock_denied(resp, &lock->lk_denied); |
| 2084 | 2101 | ||
| 2085 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); | 2102 | ENCODE_SEQID_OP_TAIL(lock->lk_replay_owner); |
| @@ -2099,13 +2116,9 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l | |||
| 2099 | { | 2116 | { |
| 2100 | ENCODE_SEQID_OP_HEAD; | 2117 | ENCODE_SEQID_OP_HEAD; |
| 2101 | 2118 | ||
| 2102 | if (!nfserr) { | 2119 | if (!nfserr) |
| 2103 | RESERVE_SPACE(sizeof(stateid_t)); | 2120 | nfsd4_encode_stateid(resp, &locku->lu_stateid); |
| 2104 | WRITE32(locku->lu_stateid.si_generation); | 2121 | |
| 2105 | WRITEMEM(&locku->lu_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 2106 | ADJUST_ARGS(); | ||
| 2107 | } | ||
| 2108 | |||
| 2109 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); | 2122 | ENCODE_SEQID_OP_TAIL(locku->lu_stateowner); |
| 2110 | return nfserr; | 2123 | return nfserr; |
| 2111 | } | 2124 | } |
| @@ -2128,14 +2141,14 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li | |||
| 2128 | static __be32 | 2141 | static __be32 |
| 2129 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) | 2142 | nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open) |
| 2130 | { | 2143 | { |
| 2144 | ENCODE_HEAD; | ||
| 2131 | ENCODE_SEQID_OP_HEAD; | 2145 | ENCODE_SEQID_OP_HEAD; |
| 2132 | 2146 | ||
| 2133 | if (nfserr) | 2147 | if (nfserr) |
| 2134 | goto out; | 2148 | goto out; |
| 2135 | 2149 | ||
| 2136 | RESERVE_SPACE(36 + sizeof(stateid_t)); | 2150 | nfsd4_encode_stateid(resp, &open->op_stateid); |
| 2137 | WRITE32(open->op_stateid.si_generation); | 2151 | RESERVE_SPACE(40); |
| 2138 | WRITEMEM(&open->op_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 2139 | WRITECINFO(open->op_cinfo); | 2152 | WRITECINFO(open->op_cinfo); |
| 2140 | WRITE32(open->op_rflags); | 2153 | WRITE32(open->op_rflags); |
| 2141 | WRITE32(2); | 2154 | WRITE32(2); |
| @@ -2148,8 +2161,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
| 2148 | case NFS4_OPEN_DELEGATE_NONE: | 2161 | case NFS4_OPEN_DELEGATE_NONE: |
| 2149 | break; | 2162 | break; |
| 2150 | case NFS4_OPEN_DELEGATE_READ: | 2163 | case NFS4_OPEN_DELEGATE_READ: |
| 2151 | RESERVE_SPACE(20 + sizeof(stateid_t)); | 2164 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
| 2152 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2165 | RESERVE_SPACE(20); |
| 2153 | WRITE32(open->op_recall); | 2166 | WRITE32(open->op_recall); |
| 2154 | 2167 | ||
| 2155 | /* | 2168 | /* |
| @@ -2162,8 +2175,8 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op | |||
| 2162 | ADJUST_ARGS(); | 2175 | ADJUST_ARGS(); |
| 2163 | break; | 2176 | break; |
| 2164 | case NFS4_OPEN_DELEGATE_WRITE: | 2177 | case NFS4_OPEN_DELEGATE_WRITE: |
| 2165 | RESERVE_SPACE(32 + sizeof(stateid_t)); | 2178 | nfsd4_encode_stateid(resp, &open->op_delegate_stateid); |
| 2166 | WRITEMEM(&open->op_delegate_stateid, sizeof(stateid_t)); | 2179 | RESERVE_SPACE(32); |
| 2167 | WRITE32(0); | 2180 | WRITE32(0); |
| 2168 | 2181 | ||
| 2169 | /* | 2182 | /* |
| @@ -2195,13 +2208,9 @@ static __be32 | |||
| 2195 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) | 2208 | nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc) |
| 2196 | { | 2209 | { |
| 2197 | ENCODE_SEQID_OP_HEAD; | 2210 | ENCODE_SEQID_OP_HEAD; |
| 2198 | 2211 | ||
| 2199 | if (!nfserr) { | 2212 | if (!nfserr) |
| 2200 | RESERVE_SPACE(sizeof(stateid_t)); | 2213 | nfsd4_encode_stateid(resp, &oc->oc_resp_stateid); |
| 2201 | WRITE32(oc->oc_resp_stateid.si_generation); | ||
| 2202 | WRITEMEM(&oc->oc_resp_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 2203 | ADJUST_ARGS(); | ||
| 2204 | } | ||
| 2205 | 2214 | ||
| 2206 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); | 2215 | ENCODE_SEQID_OP_TAIL(oc->oc_stateowner); |
| 2207 | return nfserr; | 2216 | return nfserr; |
| @@ -2211,13 +2220,9 @@ static __be32 | |||
| 2211 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) | 2220 | nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od) |
| 2212 | { | 2221 | { |
| 2213 | ENCODE_SEQID_OP_HEAD; | 2222 | ENCODE_SEQID_OP_HEAD; |
| 2214 | 2223 | ||
| 2215 | if (!nfserr) { | 2224 | if (!nfserr) |
| 2216 | RESERVE_SPACE(sizeof(stateid_t)); | 2225 | nfsd4_encode_stateid(resp, &od->od_stateid); |
| 2217 | WRITE32(od->od_stateid.si_generation); | ||
| 2218 | WRITEMEM(&od->od_stateid.si_opaque, sizeof(stateid_opaque_t)); | ||
| 2219 | ADJUST_ARGS(); | ||
| 2220 | } | ||
| 2221 | 2226 | ||
| 2222 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); | 2227 | ENCODE_SEQID_OP_TAIL(od->od_stateowner); |
| 2223 | return nfserr; | 2228 | return nfserr; |
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index c53e65f8f3a2..97543df58242 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c | |||
| @@ -614,10 +614,9 @@ static ssize_t __write_ports(struct file *file, char *buf, size_t size) | |||
| 614 | return -EINVAL; | 614 | return -EINVAL; |
| 615 | err = nfsd_create_serv(); | 615 | err = nfsd_create_serv(); |
| 616 | if (!err) { | 616 | if (!err) { |
| 617 | int proto = 0; | 617 | err = svc_addsock(nfsd_serv, fd, buf); |
| 618 | err = svc_addsock(nfsd_serv, fd, buf, &proto); | ||
| 619 | if (err >= 0) { | 618 | if (err >= 0) { |
| 620 | err = lockd_up(proto); | 619 | err = lockd_up(); |
| 621 | if (err < 0) | 620 | if (err < 0) |
| 622 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); | 621 | svc_sock_names(buf+strlen(buf)+1, nfsd_serv, buf); |
| 623 | } | 622 | } |
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index ea37c96f0445..cd25d91895a1 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c | |||
| @@ -302,17 +302,27 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access) | |||
| 302 | if (error) | 302 | if (error) |
| 303 | goto out; | 303 | goto out; |
| 304 | 304 | ||
| 305 | if (!(access & NFSD_MAY_LOCK)) { | 305 | /* |
| 306 | /* | 306 | * pseudoflavor restrictions are not enforced on NLM, |
| 307 | * pseudoflavor restrictions are not enforced on NLM, | 307 | * which clients virtually always use auth_sys for, |
| 308 | * which clients virtually always use auth_sys for, | 308 | * even while using RPCSEC_GSS for NFS. |
| 309 | * even while using RPCSEC_GSS for NFS. | 309 | */ |
| 310 | */ | 310 | if (access & NFSD_MAY_LOCK) |
| 311 | error = check_nfsd_access(exp, rqstp); | 311 | goto skip_pseudoflavor_check; |
| 312 | if (error) | 312 | /* |
| 313 | goto out; | 313 | * Clients may expect to be able to use auth_sys during mount, |
| 314 | } | 314 | * even if they use gss for everything else; see section 2.3.2 |
| 315 | * of rfc 2623. | ||
| 316 | */ | ||
| 317 | if (access & NFSD_MAY_BYPASS_GSS_ON_ROOT | ||
| 318 | && exp->ex_path.dentry == dentry) | ||
| 319 | goto skip_pseudoflavor_check; | ||
| 320 | |||
| 321 | error = check_nfsd_access(exp, rqstp); | ||
| 322 | if (error) | ||
| 323 | goto out; | ||
| 315 | 324 | ||
| 325 | skip_pseudoflavor_check: | ||
| 316 | /* Finally, check access permissions. */ | 326 | /* Finally, check access permissions. */ |
| 317 | error = nfsd_permission(rqstp, exp, dentry, access); | 327 | error = nfsd_permission(rqstp, exp, dentry, access); |
| 318 | 328 | ||
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c index 0766f95d236a..5cffeca7acef 100644 --- a/fs/nfsd/nfsproc.c +++ b/fs/nfsd/nfsproc.c | |||
| @@ -65,7 +65,8 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle *argp, | |||
| 65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); | 65 | dprintk("nfsd: GETATTR %s\n", SVCFH_fmt(&argp->fh)); |
| 66 | 66 | ||
| 67 | fh_copy(&resp->fh, &argp->fh); | 67 | fh_copy(&resp->fh, &argp->fh); |
| 68 | nfserr = fh_verify(rqstp, &resp->fh, 0, NFSD_MAY_NOP); | 68 | nfserr = fh_verify(rqstp, &resp->fh, 0, |
| 69 | NFSD_MAY_NOP | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
| 69 | return nfsd_return_attrs(nfserr, resp); | 70 | return nfsd_return_attrs(nfserr, resp); |
| 70 | } | 71 | } |
| 71 | 72 | ||
| @@ -521,7 +522,8 @@ nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle *argp, | |||
| 521 | 522 | ||
| 522 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); | 523 | dprintk("nfsd: STATFS %s\n", SVCFH_fmt(&argp->fh)); |
| 523 | 524 | ||
| 524 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats); | 525 | nfserr = nfsd_statfs(rqstp, &argp->fh, &resp->stats, |
| 526 | NFSD_MAY_BYPASS_GSS_ON_ROOT); | ||
| 525 | fh_put(&argp->fh); | 527 | fh_put(&argp->fh); |
| 526 | return nfserr; | 528 | return nfserr; |
| 527 | } | 529 | } |
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 80292ff5e924..59eeb46f82c5 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c | |||
| @@ -229,6 +229,7 @@ int nfsd_create_serv(void) | |||
| 229 | 229 | ||
| 230 | atomic_set(&nfsd_busy, 0); | 230 | atomic_set(&nfsd_busy, 0); |
| 231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, | 231 | nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, |
| 232 | AF_INET, | ||
| 232 | nfsd_last_thread, nfsd, THIS_MODULE); | 233 | nfsd_last_thread, nfsd, THIS_MODULE); |
| 233 | if (nfsd_serv == NULL) | 234 | if (nfsd_serv == NULL) |
| 234 | err = -ENOMEM; | 235 | err = -ENOMEM; |
| @@ -243,25 +244,20 @@ static int nfsd_init_socks(int port) | |||
| 243 | if (!list_empty(&nfsd_serv->sv_permsocks)) | 244 | if (!list_empty(&nfsd_serv->sv_permsocks)) |
| 244 | return 0; | 245 | return 0; |
| 245 | 246 | ||
| 246 | error = lockd_up(IPPROTO_UDP); | 247 | error = svc_create_xprt(nfsd_serv, "udp", port, |
| 247 | if (error >= 0) { | ||
| 248 | error = svc_create_xprt(nfsd_serv, "udp", port, | ||
| 249 | SVC_SOCK_DEFAULTS); | 248 | SVC_SOCK_DEFAULTS); |
| 250 | if (error < 0) | ||
| 251 | lockd_down(); | ||
| 252 | } | ||
| 253 | if (error < 0) | 249 | if (error < 0) |
| 254 | return error; | 250 | return error; |
| 255 | 251 | ||
| 256 | error = lockd_up(IPPROTO_TCP); | 252 | error = svc_create_xprt(nfsd_serv, "tcp", port, |
| 257 | if (error >= 0) { | ||
| 258 | error = svc_create_xprt(nfsd_serv, "tcp", port, | ||
| 259 | SVC_SOCK_DEFAULTS); | 253 | SVC_SOCK_DEFAULTS); |
| 260 | if (error < 0) | ||
| 261 | lockd_down(); | ||
| 262 | } | ||
| 263 | if (error < 0) | 254 | if (error < 0) |
| 264 | return error; | 255 | return error; |
| 256 | |||
| 257 | error = lockd_up(); | ||
| 258 | if (error < 0) | ||
| 259 | return error; | ||
| 260 | |||
| 265 | return 0; | 261 | return 0; |
| 266 | } | 262 | } |
| 267 | 263 | ||
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 18060bed5267..aa1d0d6489a1 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c | |||
| @@ -83,7 +83,6 @@ struct raparm_hbucket { | |||
| 83 | spinlock_t pb_lock; | 83 | spinlock_t pb_lock; |
| 84 | } ____cacheline_aligned_in_smp; | 84 | } ____cacheline_aligned_in_smp; |
| 85 | 85 | ||
| 86 | static struct raparms * raparml; | ||
| 87 | #define RAPARM_HASH_BITS 4 | 86 | #define RAPARM_HASH_BITS 4 |
| 88 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) | 87 | #define RAPARM_HASH_SIZE (1<<RAPARM_HASH_BITS) |
| 89 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) | 88 | #define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1) |
| @@ -1866,9 +1865,9 @@ out: | |||
| 1866 | * N.B. After this call fhp needs an fh_put | 1865 | * N.B. After this call fhp needs an fh_put |
| 1867 | */ | 1866 | */ |
| 1868 | __be32 | 1867 | __be32 |
| 1869 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat) | 1868 | nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access) |
| 1870 | { | 1869 | { |
| 1871 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP); | 1870 | __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access); |
| 1872 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) | 1871 | if (!err && vfs_statfs(fhp->fh_dentry,stat)) |
| 1873 | err = nfserr_io; | 1872 | err = nfserr_io; |
| 1874 | return err; | 1873 | return err; |
| @@ -1966,11 +1965,20 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp, | |||
| 1966 | void | 1965 | void |
| 1967 | nfsd_racache_shutdown(void) | 1966 | nfsd_racache_shutdown(void) |
| 1968 | { | 1967 | { |
| 1969 | if (!raparml) | 1968 | struct raparms *raparm, *last_raparm; |
| 1970 | return; | 1969 | unsigned int i; |
| 1970 | |||
| 1971 | dprintk("nfsd: freeing readahead buffers.\n"); | 1971 | dprintk("nfsd: freeing readahead buffers.\n"); |
| 1972 | kfree(raparml); | 1972 | |
| 1973 | raparml = NULL; | 1973 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
| 1974 | raparm = raparm_hash[i].pb_head; | ||
| 1975 | while(raparm) { | ||
| 1976 | last_raparm = raparm; | ||
| 1977 | raparm = raparm->p_next; | ||
| 1978 | kfree(last_raparm); | ||
| 1979 | } | ||
| 1980 | raparm_hash[i].pb_head = NULL; | ||
| 1981 | } | ||
| 1974 | } | 1982 | } |
| 1975 | /* | 1983 | /* |
| 1976 | * Initialize readahead param cache | 1984 | * Initialize readahead param cache |
| @@ -1981,35 +1989,38 @@ nfsd_racache_init(int cache_size) | |||
| 1981 | int i; | 1989 | int i; |
| 1982 | int j = 0; | 1990 | int j = 0; |
| 1983 | int nperbucket; | 1991 | int nperbucket; |
| 1992 | struct raparms **raparm = NULL; | ||
| 1984 | 1993 | ||
| 1985 | 1994 | ||
| 1986 | if (raparml) | 1995 | if (raparm_hash[0].pb_head) |
| 1987 | return 0; | 1996 | return 0; |
| 1988 | if (cache_size < 2*RAPARM_HASH_SIZE) | 1997 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); |
| 1989 | cache_size = 2*RAPARM_HASH_SIZE; | 1998 | if (nperbucket < 2) |
| 1990 | raparml = kcalloc(cache_size, sizeof(struct raparms), GFP_KERNEL); | 1999 | nperbucket = 2; |
| 1991 | 2000 | cache_size = nperbucket * RAPARM_HASH_SIZE; | |
| 1992 | if (!raparml) { | ||
| 1993 | printk(KERN_WARNING | ||
| 1994 | "nfsd: Could not allocate memory read-ahead cache.\n"); | ||
| 1995 | return -ENOMEM; | ||
| 1996 | } | ||
| 1997 | 2001 | ||
| 1998 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); | 2002 | dprintk("nfsd: allocating %d readahead buffers.\n", cache_size); |
| 1999 | for (i = 0 ; i < RAPARM_HASH_SIZE ; i++) { | 2003 | |
| 2000 | raparm_hash[i].pb_head = NULL; | 2004 | for (i = 0; i < RAPARM_HASH_SIZE; i++) { |
| 2001 | spin_lock_init(&raparm_hash[i].pb_lock); | 2005 | spin_lock_init(&raparm_hash[i].pb_lock); |
| 2002 | } | 2006 | |
| 2003 | nperbucket = DIV_ROUND_UP(cache_size, RAPARM_HASH_SIZE); | 2007 | raparm = &raparm_hash[i].pb_head; |
| 2004 | for (i = 0; i < cache_size - 1; i++) { | 2008 | for (j = 0; j < nperbucket; j++) { |
| 2005 | if (i % nperbucket == 0) | 2009 | *raparm = kzalloc(sizeof(struct raparms), GFP_KERNEL); |
| 2006 | raparm_hash[j++].pb_head = raparml + i; | 2010 | if (!*raparm) |
| 2007 | if (i % nperbucket < nperbucket-1) | 2011 | goto out_nomem; |
| 2008 | raparml[i].p_next = raparml + i + 1; | 2012 | raparm = &(*raparm)->p_next; |
| 2013 | } | ||
| 2014 | *raparm = NULL; | ||
| 2009 | } | 2015 | } |
| 2010 | 2016 | ||
| 2011 | nfsdstats.ra_size = cache_size; | 2017 | nfsdstats.ra_size = cache_size; |
| 2012 | return 0; | 2018 | return 0; |
| 2019 | |||
| 2020 | out_nomem: | ||
| 2021 | dprintk("nfsd: kmalloc failed, freeing readahead buffers\n"); | ||
| 2022 | nfsd_racache_shutdown(); | ||
| 2023 | return -ENOMEM; | ||
| 2013 | } | 2024 | } |
| 2014 | 2025 | ||
| 2015 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) | 2026 | #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) |
diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 64965e1c21c4..9b0efdad8910 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c | |||
| @@ -13,9 +13,7 @@ | |||
| 13 | #include <linux/nls.h> | 13 | #include <linux/nls.h> |
| 14 | #include <linux/kernel.h> | 14 | #include <linux/kernel.h> |
| 15 | #include <linux/errno.h> | 15 | #include <linux/errno.h> |
| 16 | #ifdef CONFIG_KMOD | ||
| 17 | #include <linux/kmod.h> | 16 | #include <linux/kmod.h> |
| 18 | #endif | ||
| 19 | #include <linux/spinlock.h> | 17 | #include <linux/spinlock.h> |
| 20 | 18 | ||
| 21 | static struct nls_table default_table; | 19 | static struct nls_table default_table; |
| @@ -215,24 +213,7 @@ static struct nls_table *find_nls(char *charset) | |||
| 215 | 213 | ||
| 216 | struct nls_table *load_nls(char *charset) | 214 | struct nls_table *load_nls(char *charset) |
| 217 | { | 215 | { |
| 218 | struct nls_table *nls; | 216 | return try_then_request_module(find_nls(charset), "nls_%s", charset); |
| 219 | #ifdef CONFIG_KMOD | ||
| 220 | int ret; | ||
| 221 | #endif | ||
| 222 | |||
| 223 | nls = find_nls(charset); | ||
| 224 | if (nls) | ||
| 225 | return nls; | ||
| 226 | |||
| 227 | #ifdef CONFIG_KMOD | ||
| 228 | ret = request_module("nls_%s", charset); | ||
| 229 | if (ret != 0) { | ||
| 230 | printk("Unable to load NLS charset %s\n", charset); | ||
| 231 | return NULL; | ||
| 232 | } | ||
| 233 | nls = find_nls(charset); | ||
| 234 | #endif | ||
| 235 | return nls; | ||
| 236 | } | 217 | } |
| 237 | 218 | ||
| 238 | void unload_nls(struct nls_table *nls) | 219 | void unload_nls(struct nls_table *nls) |
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index d020866d4232..3140a4429af1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c | |||
| @@ -439,7 +439,7 @@ static inline int __ntfs_grab_cache_pages(struct address_space *mapping, | |||
| 439 | pages[nr] = *cached_page; | 439 | pages[nr] = *cached_page; |
| 440 | page_cache_get(*cached_page); | 440 | page_cache_get(*cached_page); |
| 441 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) | 441 | if (unlikely(!pagevec_add(lru_pvec, *cached_page))) |
| 442 | __pagevec_lru_add(lru_pvec); | 442 | __pagevec_lru_add_file(lru_pvec); |
| 443 | *cached_page = NULL; | 443 | *cached_page = NULL; |
| 444 | } | 444 | } |
| 445 | index++; | 445 | index++; |
| @@ -2084,7 +2084,7 @@ err_out: | |||
| 2084 | OSYNC_METADATA|OSYNC_DATA); | 2084 | OSYNC_METADATA|OSYNC_DATA); |
| 2085 | } | 2085 | } |
| 2086 | } | 2086 | } |
| 2087 | pagevec_lru_add(&lru_pvec); | 2087 | pagevec_lru_add_file(&lru_pvec); |
| 2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", | 2088 | ntfs_debug("Done. Returning %s (written 0x%lx, status %li).", |
| 2089 | written ? "written" : "status", (unsigned long)written, | 2089 | written ? "written" : "status", (unsigned long)written, |
| 2090 | (long)status); | 2090 | (long)status); |
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile index f6956de56fdb..589dcdfdfe3c 100644 --- a/fs/ocfs2/Makefile +++ b/fs/ocfs2/Makefile | |||
| @@ -34,7 +34,8 @@ ocfs2-objs := \ | |||
| 34 | symlink.o \ | 34 | symlink.o \ |
| 35 | sysfile.o \ | 35 | sysfile.o \ |
| 36 | uptodate.o \ | 36 | uptodate.o \ |
| 37 | ver.o | 37 | ver.o \ |
| 38 | xattr.o | ||
| 38 | 39 | ||
| 39 | ocfs2_stackglue-objs := stackglue.o | 40 | ocfs2_stackglue-objs := stackglue.o |
| 40 | ocfs2_stack_o2cb-objs := stack_o2cb.o | 41 | ocfs2_stack_o2cb-objs := stack_o2cb.o |
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 10bfb466e068..0cc2deb9394c 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c | |||
| @@ -49,6 +49,340 @@ | |||
| 49 | 49 | ||
| 50 | #include "buffer_head_io.h" | 50 | #include "buffer_head_io.h" |
| 51 | 51 | ||
| 52 | |||
| 53 | /* | ||
| 54 | * Operations for a specific extent tree type. | ||
| 55 | * | ||
| 56 | * To implement an on-disk btree (extent tree) type in ocfs2, add | ||
| 57 | * an ocfs2_extent_tree_operations structure and the matching | ||
| 58 | * ocfs2_init_<thingy>_extent_tree() function. That's pretty much it | ||
| 59 | * for the allocation portion of the extent tree. | ||
| 60 | */ | ||
| 61 | struct ocfs2_extent_tree_operations { | ||
| 62 | /* | ||
| 63 | * last_eb_blk is the block number of the right most leaf extent | ||
| 64 | * block. Most on-disk structures containing an extent tree store | ||
| 65 | * this value for fast access. The ->eo_set_last_eb_blk() and | ||
| 66 | * ->eo_get_last_eb_blk() operations access this value. They are | ||
| 67 | * both required. | ||
| 68 | */ | ||
| 69 | void (*eo_set_last_eb_blk)(struct ocfs2_extent_tree *et, | ||
| 70 | u64 blkno); | ||
| 71 | u64 (*eo_get_last_eb_blk)(struct ocfs2_extent_tree *et); | ||
| 72 | |||
| 73 | /* | ||
| 74 | * The on-disk structure usually keeps track of how many total | ||
| 75 | * clusters are stored in this extent tree. This function updates | ||
| 76 | * that value. new_clusters is the delta, and must be | ||
| 77 | * added to the total. Required. | ||
| 78 | */ | ||
| 79 | void (*eo_update_clusters)(struct inode *inode, | ||
| 80 | struct ocfs2_extent_tree *et, | ||
| 81 | u32 new_clusters); | ||
| 82 | |||
| 83 | /* | ||
| 84 | * If ->eo_insert_check() exists, it is called before rec is | ||
| 85 | * inserted into the extent tree. It is optional. | ||
| 86 | */ | ||
| 87 | int (*eo_insert_check)(struct inode *inode, | ||
| 88 | struct ocfs2_extent_tree *et, | ||
| 89 | struct ocfs2_extent_rec *rec); | ||
| 90 | int (*eo_sanity_check)(struct inode *inode, struct ocfs2_extent_tree *et); | ||
| 91 | |||
| 92 | /* | ||
| 93 | * -------------------------------------------------------------- | ||
| 94 | * The remaining are internal to ocfs2_extent_tree and don't have | ||
| 95 | * accessor functions | ||
| 96 | */ | ||
| 97 | |||
| 98 | /* | ||
| 99 | * ->eo_fill_root_el() takes et->et_object and sets et->et_root_el. | ||
| 100 | * It is required. | ||
| 101 | */ | ||
| 102 | void (*eo_fill_root_el)(struct ocfs2_extent_tree *et); | ||
| 103 | |||
| 104 | /* | ||
| 105 | * ->eo_fill_max_leaf_clusters sets et->et_max_leaf_clusters if | ||
| 106 | * it exists. If it does not, et->et_max_leaf_clusters is set | ||
| 107 | * to 0 (unlimited). Optional. | ||
| 108 | */ | ||
| 109 | void (*eo_fill_max_leaf_clusters)(struct inode *inode, | ||
| 110 | struct ocfs2_extent_tree *et); | ||
| 111 | }; | ||
| 112 | |||
| 113 | |||
| 114 | /* | ||
| 115 | * Pre-declare ocfs2_dinode_et_ops so we can use it as a sanity check | ||
| 116 | * in the methods. | ||
| 117 | */ | ||
| 118 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et); | ||
| 119 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
| 120 | u64 blkno); | ||
| 121 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
| 122 | struct ocfs2_extent_tree *et, | ||
| 123 | u32 clusters); | ||
| 124 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
| 125 | struct ocfs2_extent_tree *et, | ||
| 126 | struct ocfs2_extent_rec *rec); | ||
| 127 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
| 128 | struct ocfs2_extent_tree *et); | ||
| 129 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et); | ||
| 130 | static struct ocfs2_extent_tree_operations ocfs2_dinode_et_ops = { | ||
| 131 | .eo_set_last_eb_blk = ocfs2_dinode_set_last_eb_blk, | ||
| 132 | .eo_get_last_eb_blk = ocfs2_dinode_get_last_eb_blk, | ||
| 133 | .eo_update_clusters = ocfs2_dinode_update_clusters, | ||
| 134 | .eo_insert_check = ocfs2_dinode_insert_check, | ||
| 135 | .eo_sanity_check = ocfs2_dinode_sanity_check, | ||
| 136 | .eo_fill_root_el = ocfs2_dinode_fill_root_el, | ||
| 137 | }; | ||
| 138 | |||
| 139 | static void ocfs2_dinode_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
| 140 | u64 blkno) | ||
| 141 | { | ||
| 142 | struct ocfs2_dinode *di = et->et_object; | ||
| 143 | |||
| 144 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
| 145 | di->i_last_eb_blk = cpu_to_le64(blkno); | ||
| 146 | } | ||
| 147 | |||
| 148 | static u64 ocfs2_dinode_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
| 149 | { | ||
| 150 | struct ocfs2_dinode *di = et->et_object; | ||
| 151 | |||
| 152 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
| 153 | return le64_to_cpu(di->i_last_eb_blk); | ||
| 154 | } | ||
| 155 | |||
| 156 | static void ocfs2_dinode_update_clusters(struct inode *inode, | ||
| 157 | struct ocfs2_extent_tree *et, | ||
| 158 | u32 clusters) | ||
| 159 | { | ||
| 160 | struct ocfs2_dinode *di = et->et_object; | ||
| 161 | |||
| 162 | le32_add_cpu(&di->i_clusters, clusters); | ||
| 163 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
| 164 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
| 165 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
| 166 | } | ||
| 167 | |||
| 168 | static int ocfs2_dinode_insert_check(struct inode *inode, | ||
| 169 | struct ocfs2_extent_tree *et, | ||
| 170 | struct ocfs2_extent_rec *rec) | ||
| 171 | { | ||
| 172 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 173 | |||
| 174 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
| 175 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
| 176 | (OCFS2_I(inode)->ip_clusters != rec->e_cpos), | ||
| 177 | "Device %s, asking for sparse allocation: inode %llu, " | ||
| 178 | "cpos %u, clusters %u\n", | ||
| 179 | osb->dev_str, | ||
| 180 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 181 | rec->e_cpos, | ||
| 182 | OCFS2_I(inode)->ip_clusters); | ||
| 183 | |||
| 184 | return 0; | ||
| 185 | } | ||
| 186 | |||
| 187 | static int ocfs2_dinode_sanity_check(struct inode *inode, | ||
| 188 | struct ocfs2_extent_tree *et) | ||
| 189 | { | ||
| 190 | int ret = 0; | ||
| 191 | struct ocfs2_dinode *di; | ||
| 192 | |||
| 193 | BUG_ON(et->et_ops != &ocfs2_dinode_et_ops); | ||
| 194 | |||
| 195 | di = et->et_object; | ||
| 196 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
| 197 | ret = -EIO; | ||
| 198 | ocfs2_error(inode->i_sb, | ||
| 199 | "Inode %llu has invalid path root", | ||
| 200 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 201 | } | ||
| 202 | |||
| 203 | return ret; | ||
| 204 | } | ||
| 205 | |||
| 206 | static void ocfs2_dinode_fill_root_el(struct ocfs2_extent_tree *et) | ||
| 207 | { | ||
| 208 | struct ocfs2_dinode *di = et->et_object; | ||
| 209 | |||
| 210 | et->et_root_el = &di->id2.i_list; | ||
| 211 | } | ||
| 212 | |||
| 213 | |||
| 214 | static void ocfs2_xattr_value_fill_root_el(struct ocfs2_extent_tree *et) | ||
| 215 | { | ||
| 216 | struct ocfs2_xattr_value_root *xv = et->et_object; | ||
| 217 | |||
| 218 | et->et_root_el = &xv->xr_list; | ||
| 219 | } | ||
| 220 | |||
| 221 | static void ocfs2_xattr_value_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
| 222 | u64 blkno) | ||
| 223 | { | ||
| 224 | struct ocfs2_xattr_value_root *xv = | ||
| 225 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
| 226 | |||
| 227 | xv->xr_last_eb_blk = cpu_to_le64(blkno); | ||
| 228 | } | ||
| 229 | |||
| 230 | static u64 ocfs2_xattr_value_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
| 231 | { | ||
| 232 | struct ocfs2_xattr_value_root *xv = | ||
| 233 | (struct ocfs2_xattr_value_root *) et->et_object; | ||
| 234 | |||
| 235 | return le64_to_cpu(xv->xr_last_eb_blk); | ||
| 236 | } | ||
| 237 | |||
| 238 | static void ocfs2_xattr_value_update_clusters(struct inode *inode, | ||
| 239 | struct ocfs2_extent_tree *et, | ||
| 240 | u32 clusters) | ||
| 241 | { | ||
| 242 | struct ocfs2_xattr_value_root *xv = | ||
| 243 | (struct ocfs2_xattr_value_root *)et->et_object; | ||
| 244 | |||
| 245 | le32_add_cpu(&xv->xr_clusters, clusters); | ||
| 246 | } | ||
| 247 | |||
| 248 | static struct ocfs2_extent_tree_operations ocfs2_xattr_value_et_ops = { | ||
| 249 | .eo_set_last_eb_blk = ocfs2_xattr_value_set_last_eb_blk, | ||
| 250 | .eo_get_last_eb_blk = ocfs2_xattr_value_get_last_eb_blk, | ||
| 251 | .eo_update_clusters = ocfs2_xattr_value_update_clusters, | ||
| 252 | .eo_fill_root_el = ocfs2_xattr_value_fill_root_el, | ||
| 253 | }; | ||
| 254 | |||
| 255 | static void ocfs2_xattr_tree_fill_root_el(struct ocfs2_extent_tree *et) | ||
| 256 | { | ||
| 257 | struct ocfs2_xattr_block *xb = et->et_object; | ||
| 258 | |||
| 259 | et->et_root_el = &xb->xb_attrs.xb_root.xt_list; | ||
| 260 | } | ||
| 261 | |||
| 262 | static void ocfs2_xattr_tree_fill_max_leaf_clusters(struct inode *inode, | ||
| 263 | struct ocfs2_extent_tree *et) | ||
| 264 | { | ||
| 265 | et->et_max_leaf_clusters = | ||
| 266 | ocfs2_clusters_for_bytes(inode->i_sb, | ||
| 267 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE); | ||
| 268 | } | ||
| 269 | |||
| 270 | static void ocfs2_xattr_tree_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
| 271 | u64 blkno) | ||
| 272 | { | ||
| 273 | struct ocfs2_xattr_block *xb = et->et_object; | ||
| 274 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
| 275 | |||
| 276 | xt->xt_last_eb_blk = cpu_to_le64(blkno); | ||
| 277 | } | ||
| 278 | |||
| 279 | static u64 ocfs2_xattr_tree_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
| 280 | { | ||
| 281 | struct ocfs2_xattr_block *xb = et->et_object; | ||
| 282 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
| 283 | |||
| 284 | return le64_to_cpu(xt->xt_last_eb_blk); | ||
| 285 | } | ||
| 286 | |||
| 287 | static void ocfs2_xattr_tree_update_clusters(struct inode *inode, | ||
| 288 | struct ocfs2_extent_tree *et, | ||
| 289 | u32 clusters) | ||
| 290 | { | ||
| 291 | struct ocfs2_xattr_block *xb = et->et_object; | ||
| 292 | |||
| 293 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, clusters); | ||
| 294 | } | ||
| 295 | |||
| 296 | static struct ocfs2_extent_tree_operations ocfs2_xattr_tree_et_ops = { | ||
| 297 | .eo_set_last_eb_blk = ocfs2_xattr_tree_set_last_eb_blk, | ||
| 298 | .eo_get_last_eb_blk = ocfs2_xattr_tree_get_last_eb_blk, | ||
| 299 | .eo_update_clusters = ocfs2_xattr_tree_update_clusters, | ||
| 300 | .eo_fill_root_el = ocfs2_xattr_tree_fill_root_el, | ||
| 301 | .eo_fill_max_leaf_clusters = ocfs2_xattr_tree_fill_max_leaf_clusters, | ||
| 302 | }; | ||
| 303 | |||
| 304 | static void __ocfs2_init_extent_tree(struct ocfs2_extent_tree *et, | ||
| 305 | struct inode *inode, | ||
| 306 | struct buffer_head *bh, | ||
| 307 | void *obj, | ||
| 308 | struct ocfs2_extent_tree_operations *ops) | ||
| 309 | { | ||
| 310 | et->et_ops = ops; | ||
| 311 | et->et_root_bh = bh; | ||
| 312 | if (!obj) | ||
| 313 | obj = (void *)bh->b_data; | ||
| 314 | et->et_object = obj; | ||
| 315 | |||
| 316 | et->et_ops->eo_fill_root_el(et); | ||
| 317 | if (!et->et_ops->eo_fill_max_leaf_clusters) | ||
| 318 | et->et_max_leaf_clusters = 0; | ||
| 319 | else | ||
| 320 | et->et_ops->eo_fill_max_leaf_clusters(inode, et); | ||
| 321 | } | ||
| 322 | |||
| 323 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
| 324 | struct inode *inode, | ||
| 325 | struct buffer_head *bh) | ||
| 326 | { | ||
| 327 | __ocfs2_init_extent_tree(et, inode, bh, NULL, &ocfs2_dinode_et_ops); | ||
| 328 | } | ||
| 329 | |||
| 330 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
| 331 | struct inode *inode, | ||
| 332 | struct buffer_head *bh) | ||
| 333 | { | ||
| 334 | __ocfs2_init_extent_tree(et, inode, bh, NULL, | ||
| 335 | &ocfs2_xattr_tree_et_ops); | ||
| 336 | } | ||
| 337 | |||
| 338 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
| 339 | struct inode *inode, | ||
| 340 | struct buffer_head *bh, | ||
| 341 | struct ocfs2_xattr_value_root *xv) | ||
| 342 | { | ||
| 343 | __ocfs2_init_extent_tree(et, inode, bh, xv, | ||
| 344 | &ocfs2_xattr_value_et_ops); | ||
| 345 | } | ||
| 346 | |||
| 347 | static inline void ocfs2_et_set_last_eb_blk(struct ocfs2_extent_tree *et, | ||
| 348 | u64 new_last_eb_blk) | ||
| 349 | { | ||
| 350 | et->et_ops->eo_set_last_eb_blk(et, new_last_eb_blk); | ||
| 351 | } | ||
| 352 | |||
| 353 | static inline u64 ocfs2_et_get_last_eb_blk(struct ocfs2_extent_tree *et) | ||
| 354 | { | ||
| 355 | return et->et_ops->eo_get_last_eb_blk(et); | ||
| 356 | } | ||
| 357 | |||
| 358 | static inline void ocfs2_et_update_clusters(struct inode *inode, | ||
| 359 | struct ocfs2_extent_tree *et, | ||
| 360 | u32 clusters) | ||
| 361 | { | ||
| 362 | et->et_ops->eo_update_clusters(inode, et, clusters); | ||
| 363 | } | ||
| 364 | |||
| 365 | static inline int ocfs2_et_insert_check(struct inode *inode, | ||
| 366 | struct ocfs2_extent_tree *et, | ||
| 367 | struct ocfs2_extent_rec *rec) | ||
| 368 | { | ||
| 369 | int ret = 0; | ||
| 370 | |||
| 371 | if (et->et_ops->eo_insert_check) | ||
| 372 | ret = et->et_ops->eo_insert_check(inode, et, rec); | ||
| 373 | return ret; | ||
| 374 | } | ||
| 375 | |||
| 376 | static inline int ocfs2_et_sanity_check(struct inode *inode, | ||
| 377 | struct ocfs2_extent_tree *et) | ||
| 378 | { | ||
| 379 | int ret = 0; | ||
| 380 | |||
| 381 | if (et->et_ops->eo_sanity_check) | ||
| 382 | ret = et->et_ops->eo_sanity_check(inode, et); | ||
| 383 | return ret; | ||
| 384 | } | ||
| 385 | |||
| 52 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); | 386 | static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc); |
| 53 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, | 387 | static int ocfs2_cache_extent_block_free(struct ocfs2_cached_dealloc_ctxt *ctxt, |
| 54 | struct ocfs2_extent_block *eb); | 388 | struct ocfs2_extent_block *eb); |
| @@ -205,17 +539,6 @@ static struct ocfs2_path *ocfs2_new_path(struct buffer_head *root_bh, | |||
| 205 | } | 539 | } |
| 206 | 540 | ||
| 207 | /* | 541 | /* |
| 208 | * Allocate and initialize a new path based on a disk inode tree. | ||
| 209 | */ | ||
| 210 | static struct ocfs2_path *ocfs2_new_inode_path(struct buffer_head *di_bh) | ||
| 211 | { | ||
| 212 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 213 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
| 214 | |||
| 215 | return ocfs2_new_path(di_bh, el); | ||
| 216 | } | ||
| 217 | |||
| 218 | /* | ||
| 219 | * Convenience function to journal all components in a path. | 542 | * Convenience function to journal all components in a path. |
| 220 | */ | 543 | */ |
| 221 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, | 544 | static int ocfs2_journal_access_path(struct inode *inode, handle_t *handle, |
| @@ -368,39 +691,35 @@ struct ocfs2_merge_ctxt { | |||
| 368 | */ | 691 | */ |
| 369 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 692 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
| 370 | struct inode *inode, | 693 | struct inode *inode, |
| 371 | struct ocfs2_dinode *fe) | 694 | struct ocfs2_extent_tree *et) |
| 372 | { | 695 | { |
| 373 | int retval; | 696 | int retval; |
| 374 | struct ocfs2_extent_list *el; | 697 | struct ocfs2_extent_list *el = NULL; |
| 375 | struct ocfs2_extent_block *eb; | 698 | struct ocfs2_extent_block *eb; |
| 376 | struct buffer_head *eb_bh = NULL; | 699 | struct buffer_head *eb_bh = NULL; |
| 700 | u64 last_eb_blk = 0; | ||
| 377 | 701 | ||
| 378 | mlog_entry_void(); | 702 | mlog_entry_void(); |
| 379 | 703 | ||
| 380 | if (!OCFS2_IS_VALID_DINODE(fe)) { | 704 | el = et->et_root_el; |
| 381 | OCFS2_RO_ON_INVALID_DINODE(inode->i_sb, fe); | 705 | last_eb_blk = ocfs2_et_get_last_eb_blk(et); |
| 382 | retval = -EIO; | ||
| 383 | goto bail; | ||
| 384 | } | ||
| 385 | 706 | ||
| 386 | if (fe->i_last_eb_blk) { | 707 | if (last_eb_blk) { |
| 387 | retval = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 708 | retval = ocfs2_read_block(inode, last_eb_blk, |
| 388 | &eb_bh, OCFS2_BH_CACHED, inode); | 709 | &eb_bh); |
| 389 | if (retval < 0) { | 710 | if (retval < 0) { |
| 390 | mlog_errno(retval); | 711 | mlog_errno(retval); |
| 391 | goto bail; | 712 | goto bail; |
| 392 | } | 713 | } |
| 393 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 714 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
| 394 | el = &eb->h_list; | 715 | el = &eb->h_list; |
| 395 | } else | 716 | } |
| 396 | el = &fe->id2.i_list; | ||
| 397 | 717 | ||
| 398 | BUG_ON(el->l_tree_depth != 0); | 718 | BUG_ON(el->l_tree_depth != 0); |
| 399 | 719 | ||
| 400 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); | 720 | retval = le16_to_cpu(el->l_count) - le16_to_cpu(el->l_next_free_rec); |
| 401 | bail: | 721 | bail: |
| 402 | if (eb_bh) | 722 | brelse(eb_bh); |
| 403 | brelse(eb_bh); | ||
| 404 | 723 | ||
| 405 | mlog_exit(retval); | 724 | mlog_exit(retval); |
| 406 | return retval; | 725 | return retval; |
| @@ -486,8 +805,7 @@ static int ocfs2_create_new_meta_bhs(struct ocfs2_super *osb, | |||
| 486 | bail: | 805 | bail: |
| 487 | if (status < 0) { | 806 | if (status < 0) { |
| 488 | for(i = 0; i < wanted; i++) { | 807 | for(i = 0; i < wanted; i++) { |
| 489 | if (bhs[i]) | 808 | brelse(bhs[i]); |
| 490 | brelse(bhs[i]); | ||
| 491 | bhs[i] = NULL; | 809 | bhs[i] = NULL; |
| 492 | } | 810 | } |
| 493 | } | 811 | } |
| @@ -531,7 +849,7 @@ static inline u32 ocfs2_sum_rightmost_rec(struct ocfs2_extent_list *el) | |||
| 531 | static int ocfs2_add_branch(struct ocfs2_super *osb, | 849 | static int ocfs2_add_branch(struct ocfs2_super *osb, |
| 532 | handle_t *handle, | 850 | handle_t *handle, |
| 533 | struct inode *inode, | 851 | struct inode *inode, |
| 534 | struct buffer_head *fe_bh, | 852 | struct ocfs2_extent_tree *et, |
| 535 | struct buffer_head *eb_bh, | 853 | struct buffer_head *eb_bh, |
| 536 | struct buffer_head **last_eb_bh, | 854 | struct buffer_head **last_eb_bh, |
| 537 | struct ocfs2_alloc_context *meta_ac) | 855 | struct ocfs2_alloc_context *meta_ac) |
| @@ -540,7 +858,6 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 540 | u64 next_blkno, new_last_eb_blk; | 858 | u64 next_blkno, new_last_eb_blk; |
| 541 | struct buffer_head *bh; | 859 | struct buffer_head *bh; |
| 542 | struct buffer_head **new_eb_bhs = NULL; | 860 | struct buffer_head **new_eb_bhs = NULL; |
| 543 | struct ocfs2_dinode *fe; | ||
| 544 | struct ocfs2_extent_block *eb; | 861 | struct ocfs2_extent_block *eb; |
| 545 | struct ocfs2_extent_list *eb_el; | 862 | struct ocfs2_extent_list *eb_el; |
| 546 | struct ocfs2_extent_list *el; | 863 | struct ocfs2_extent_list *el; |
| @@ -550,13 +867,11 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 550 | 867 | ||
| 551 | BUG_ON(!last_eb_bh || !*last_eb_bh); | 868 | BUG_ON(!last_eb_bh || !*last_eb_bh); |
| 552 | 869 | ||
| 553 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
| 554 | |||
| 555 | if (eb_bh) { | 870 | if (eb_bh) { |
| 556 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | 871 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; |
| 557 | el = &eb->h_list; | 872 | el = &eb->h_list; |
| 558 | } else | 873 | } else |
| 559 | el = &fe->id2.i_list; | 874 | el = et->et_root_el; |
| 560 | 875 | ||
| 561 | /* we never add a branch to a leaf. */ | 876 | /* we never add a branch to a leaf. */ |
| 562 | BUG_ON(!el->l_tree_depth); | 877 | BUG_ON(!el->l_tree_depth); |
| @@ -646,7 +961,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 646 | mlog_errno(status); | 961 | mlog_errno(status); |
| 647 | goto bail; | 962 | goto bail; |
| 648 | } | 963 | } |
| 649 | status = ocfs2_journal_access(handle, inode, fe_bh, | 964 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
| 650 | OCFS2_JOURNAL_ACCESS_WRITE); | 965 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 651 | if (status < 0) { | 966 | if (status < 0) { |
| 652 | mlog_errno(status); | 967 | mlog_errno(status); |
| @@ -662,7 +977,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 662 | } | 977 | } |
| 663 | 978 | ||
| 664 | /* Link the new branch into the rest of the tree (el will | 979 | /* Link the new branch into the rest of the tree (el will |
| 665 | * either be on the fe, or the extent block passed in. */ | 980 | * either be on the root_bh, or the extent block passed in. */ |
| 666 | i = le16_to_cpu(el->l_next_free_rec); | 981 | i = le16_to_cpu(el->l_next_free_rec); |
| 667 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); | 982 | el->l_recs[i].e_blkno = cpu_to_le64(next_blkno); |
| 668 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); | 983 | el->l_recs[i].e_cpos = cpu_to_le32(new_cpos); |
| @@ -671,7 +986,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 671 | 986 | ||
| 672 | /* fe needs a new last extent block pointer, as does the | 987 | /* fe needs a new last extent block pointer, as does the |
| 673 | * next_leaf on the previously last-extent-block. */ | 988 | * next_leaf on the previously last-extent-block. */ |
| 674 | fe->i_last_eb_blk = cpu_to_le64(new_last_eb_blk); | 989 | ocfs2_et_set_last_eb_blk(et, new_last_eb_blk); |
| 675 | 990 | ||
| 676 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; | 991 | eb = (struct ocfs2_extent_block *) (*last_eb_bh)->b_data; |
| 677 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); | 992 | eb->h_next_leaf_blk = cpu_to_le64(new_last_eb_blk); |
| @@ -679,7 +994,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 679 | status = ocfs2_journal_dirty(handle, *last_eb_bh); | 994 | status = ocfs2_journal_dirty(handle, *last_eb_bh); |
| 680 | if (status < 0) | 995 | if (status < 0) |
| 681 | mlog_errno(status); | 996 | mlog_errno(status); |
| 682 | status = ocfs2_journal_dirty(handle, fe_bh); | 997 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
| 683 | if (status < 0) | 998 | if (status < 0) |
| 684 | mlog_errno(status); | 999 | mlog_errno(status); |
| 685 | if (eb_bh) { | 1000 | if (eb_bh) { |
| @@ -700,8 +1015,7 @@ static int ocfs2_add_branch(struct ocfs2_super *osb, | |||
| 700 | bail: | 1015 | bail: |
| 701 | if (new_eb_bhs) { | 1016 | if (new_eb_bhs) { |
| 702 | for (i = 0; i < new_blocks; i++) | 1017 | for (i = 0; i < new_blocks; i++) |
| 703 | if (new_eb_bhs[i]) | 1018 | brelse(new_eb_bhs[i]); |
| 704 | brelse(new_eb_bhs[i]); | ||
| 705 | kfree(new_eb_bhs); | 1019 | kfree(new_eb_bhs); |
| 706 | } | 1020 | } |
| 707 | 1021 | ||
| @@ -717,16 +1031,15 @@ bail: | |||
| 717 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | 1031 | static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, |
| 718 | handle_t *handle, | 1032 | handle_t *handle, |
| 719 | struct inode *inode, | 1033 | struct inode *inode, |
| 720 | struct buffer_head *fe_bh, | 1034 | struct ocfs2_extent_tree *et, |
| 721 | struct ocfs2_alloc_context *meta_ac, | 1035 | struct ocfs2_alloc_context *meta_ac, |
| 722 | struct buffer_head **ret_new_eb_bh) | 1036 | struct buffer_head **ret_new_eb_bh) |
| 723 | { | 1037 | { |
| 724 | int status, i; | 1038 | int status, i; |
| 725 | u32 new_clusters; | 1039 | u32 new_clusters; |
| 726 | struct buffer_head *new_eb_bh = NULL; | 1040 | struct buffer_head *new_eb_bh = NULL; |
| 727 | struct ocfs2_dinode *fe; | ||
| 728 | struct ocfs2_extent_block *eb; | 1041 | struct ocfs2_extent_block *eb; |
| 729 | struct ocfs2_extent_list *fe_el; | 1042 | struct ocfs2_extent_list *root_el; |
| 730 | struct ocfs2_extent_list *eb_el; | 1043 | struct ocfs2_extent_list *eb_el; |
| 731 | 1044 | ||
| 732 | mlog_entry_void(); | 1045 | mlog_entry_void(); |
| @@ -746,8 +1059,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
| 746 | } | 1059 | } |
| 747 | 1060 | ||
| 748 | eb_el = &eb->h_list; | 1061 | eb_el = &eb->h_list; |
| 749 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1062 | root_el = et->et_root_el; |
| 750 | fe_el = &fe->id2.i_list; | ||
| 751 | 1063 | ||
| 752 | status = ocfs2_journal_access(handle, inode, new_eb_bh, | 1064 | status = ocfs2_journal_access(handle, inode, new_eb_bh, |
| 753 | OCFS2_JOURNAL_ACCESS_CREATE); | 1065 | OCFS2_JOURNAL_ACCESS_CREATE); |
| @@ -756,11 +1068,11 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
| 756 | goto bail; | 1068 | goto bail; |
| 757 | } | 1069 | } |
| 758 | 1070 | ||
| 759 | /* copy the fe data into the new extent block */ | 1071 | /* copy the root extent list data into the new extent block */ |
| 760 | eb_el->l_tree_depth = fe_el->l_tree_depth; | 1072 | eb_el->l_tree_depth = root_el->l_tree_depth; |
| 761 | eb_el->l_next_free_rec = fe_el->l_next_free_rec; | 1073 | eb_el->l_next_free_rec = root_el->l_next_free_rec; |
| 762 | for(i = 0; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1074 | for (i = 0; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
| 763 | eb_el->l_recs[i] = fe_el->l_recs[i]; | 1075 | eb_el->l_recs[i] = root_el->l_recs[i]; |
| 764 | 1076 | ||
| 765 | status = ocfs2_journal_dirty(handle, new_eb_bh); | 1077 | status = ocfs2_journal_dirty(handle, new_eb_bh); |
| 766 | if (status < 0) { | 1078 | if (status < 0) { |
| @@ -768,7 +1080,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
| 768 | goto bail; | 1080 | goto bail; |
| 769 | } | 1081 | } |
| 770 | 1082 | ||
| 771 | status = ocfs2_journal_access(handle, inode, fe_bh, | 1083 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, |
| 772 | OCFS2_JOURNAL_ACCESS_WRITE); | 1084 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 773 | if (status < 0) { | 1085 | if (status < 0) { |
| 774 | mlog_errno(status); | 1086 | mlog_errno(status); |
| @@ -777,21 +1089,21 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
| 777 | 1089 | ||
| 778 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); | 1090 | new_clusters = ocfs2_sum_rightmost_rec(eb_el); |
| 779 | 1091 | ||
| 780 | /* update fe now */ | 1092 | /* update root_bh now */ |
| 781 | le16_add_cpu(&fe_el->l_tree_depth, 1); | 1093 | le16_add_cpu(&root_el->l_tree_depth, 1); |
| 782 | fe_el->l_recs[0].e_cpos = 0; | 1094 | root_el->l_recs[0].e_cpos = 0; |
| 783 | fe_el->l_recs[0].e_blkno = eb->h_blkno; | 1095 | root_el->l_recs[0].e_blkno = eb->h_blkno; |
| 784 | fe_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); | 1096 | root_el->l_recs[0].e_int_clusters = cpu_to_le32(new_clusters); |
| 785 | for(i = 1; i < le16_to_cpu(fe_el->l_next_free_rec); i++) | 1097 | for (i = 1; i < le16_to_cpu(root_el->l_next_free_rec); i++) |
| 786 | memset(&fe_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); | 1098 | memset(&root_el->l_recs[i], 0, sizeof(struct ocfs2_extent_rec)); |
| 787 | fe_el->l_next_free_rec = cpu_to_le16(1); | 1099 | root_el->l_next_free_rec = cpu_to_le16(1); |
| 788 | 1100 | ||
| 789 | /* If this is our 1st tree depth shift, then last_eb_blk | 1101 | /* If this is our 1st tree depth shift, then last_eb_blk |
| 790 | * becomes the allocated extent block */ | 1102 | * becomes the allocated extent block */ |
| 791 | if (fe_el->l_tree_depth == cpu_to_le16(1)) | 1103 | if (root_el->l_tree_depth == cpu_to_le16(1)) |
| 792 | fe->i_last_eb_blk = eb->h_blkno; | 1104 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
| 793 | 1105 | ||
| 794 | status = ocfs2_journal_dirty(handle, fe_bh); | 1106 | status = ocfs2_journal_dirty(handle, et->et_root_bh); |
| 795 | if (status < 0) { | 1107 | if (status < 0) { |
| 796 | mlog_errno(status); | 1108 | mlog_errno(status); |
| 797 | goto bail; | 1109 | goto bail; |
| @@ -801,8 +1113,7 @@ static int ocfs2_shift_tree_depth(struct ocfs2_super *osb, | |||
| 801 | new_eb_bh = NULL; | 1113 | new_eb_bh = NULL; |
| 802 | status = 0; | 1114 | status = 0; |
| 803 | bail: | 1115 | bail: |
| 804 | if (new_eb_bh) | 1116 | brelse(new_eb_bh); |
| 805 | brelse(new_eb_bh); | ||
| 806 | 1117 | ||
| 807 | mlog_exit(status); | 1118 | mlog_exit(status); |
| 808 | return status; | 1119 | return status; |
| @@ -817,22 +1128,21 @@ bail: | |||
| 817 | * 1) a lowest extent block is found, then we pass it back in | 1128 | * 1) a lowest extent block is found, then we pass it back in |
| 818 | * *lowest_eb_bh and return '0' | 1129 | * *lowest_eb_bh and return '0' |
| 819 | * | 1130 | * |
| 820 | * 2) the search fails to find anything, but the dinode has room. We | 1131 | * 2) the search fails to find anything, but the root_el has room. We |
| 821 | * pass NULL back in *lowest_eb_bh, but still return '0' | 1132 | * pass NULL back in *lowest_eb_bh, but still return '0' |
| 822 | * | 1133 | * |
| 823 | * 3) the search fails to find anything AND the dinode is full, in | 1134 | * 3) the search fails to find anything AND the root_el is full, in |
| 824 | * which case we return > 0 | 1135 | * which case we return > 0 |
| 825 | * | 1136 | * |
| 826 | * return status < 0 indicates an error. | 1137 | * return status < 0 indicates an error. |
| 827 | */ | 1138 | */ |
| 828 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, | 1139 | static int ocfs2_find_branch_target(struct ocfs2_super *osb, |
| 829 | struct inode *inode, | 1140 | struct inode *inode, |
| 830 | struct buffer_head *fe_bh, | 1141 | struct ocfs2_extent_tree *et, |
| 831 | struct buffer_head **target_bh) | 1142 | struct buffer_head **target_bh) |
| 832 | { | 1143 | { |
| 833 | int status = 0, i; | 1144 | int status = 0, i; |
| 834 | u64 blkno; | 1145 | u64 blkno; |
| 835 | struct ocfs2_dinode *fe; | ||
| 836 | struct ocfs2_extent_block *eb; | 1146 | struct ocfs2_extent_block *eb; |
| 837 | struct ocfs2_extent_list *el; | 1147 | struct ocfs2_extent_list *el; |
| 838 | struct buffer_head *bh = NULL; | 1148 | struct buffer_head *bh = NULL; |
| @@ -842,8 +1152,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
| 842 | 1152 | ||
| 843 | *target_bh = NULL; | 1153 | *target_bh = NULL; |
| 844 | 1154 | ||
| 845 | fe = (struct ocfs2_dinode *) fe_bh->b_data; | 1155 | el = et->et_root_el; |
| 846 | el = &fe->id2.i_list; | ||
| 847 | 1156 | ||
| 848 | while(le16_to_cpu(el->l_tree_depth) > 1) { | 1157 | while(le16_to_cpu(el->l_tree_depth) > 1) { |
| 849 | if (le16_to_cpu(el->l_next_free_rec) == 0) { | 1158 | if (le16_to_cpu(el->l_next_free_rec) == 0) { |
| @@ -864,13 +1173,10 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
| 864 | goto bail; | 1173 | goto bail; |
| 865 | } | 1174 | } |
| 866 | 1175 | ||
| 867 | if (bh) { | 1176 | brelse(bh); |
| 868 | brelse(bh); | 1177 | bh = NULL; |
| 869 | bh = NULL; | ||
| 870 | } | ||
| 871 | 1178 | ||
| 872 | status = ocfs2_read_block(osb, blkno, &bh, OCFS2_BH_CACHED, | 1179 | status = ocfs2_read_block(inode, blkno, &bh); |
| 873 | inode); | ||
| 874 | if (status < 0) { | 1180 | if (status < 0) { |
| 875 | mlog_errno(status); | 1181 | mlog_errno(status); |
| 876 | goto bail; | 1182 | goto bail; |
| @@ -886,8 +1192,7 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
| 886 | 1192 | ||
| 887 | if (le16_to_cpu(el->l_next_free_rec) < | 1193 | if (le16_to_cpu(el->l_next_free_rec) < |
| 888 | le16_to_cpu(el->l_count)) { | 1194 | le16_to_cpu(el->l_count)) { |
| 889 | if (lowest_bh) | 1195 | brelse(lowest_bh); |
| 890 | brelse(lowest_bh); | ||
| 891 | lowest_bh = bh; | 1196 | lowest_bh = bh; |
| 892 | get_bh(lowest_bh); | 1197 | get_bh(lowest_bh); |
| 893 | } | 1198 | } |
| @@ -895,14 +1200,13 @@ static int ocfs2_find_branch_target(struct ocfs2_super *osb, | |||
| 895 | 1200 | ||
| 896 | /* If we didn't find one and the fe doesn't have any room, | 1201 | /* If we didn't find one and the fe doesn't have any room, |
| 897 | * then return '1' */ | 1202 | * then return '1' */ |
| 898 | if (!lowest_bh | 1203 | el = et->et_root_el; |
| 899 | && (fe->id2.i_list.l_next_free_rec == fe->id2.i_list.l_count)) | 1204 | if (!lowest_bh && (el->l_next_free_rec == el->l_count)) |
| 900 | status = 1; | 1205 | status = 1; |
| 901 | 1206 | ||
| 902 | *target_bh = lowest_bh; | 1207 | *target_bh = lowest_bh; |
| 903 | bail: | 1208 | bail: |
| 904 | if (bh) | 1209 | brelse(bh); |
| 905 | brelse(bh); | ||
| 906 | 1210 | ||
| 907 | mlog_exit(status); | 1211 | mlog_exit(status); |
| 908 | return status; | 1212 | return status; |
| @@ -919,19 +1223,19 @@ bail: | |||
| 919 | * *last_eb_bh will be updated by ocfs2_add_branch(). | 1223 | * *last_eb_bh will be updated by ocfs2_add_branch(). |
| 920 | */ | 1224 | */ |
| 921 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | 1225 | static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, |
| 922 | struct buffer_head *di_bh, int *final_depth, | 1226 | struct ocfs2_extent_tree *et, int *final_depth, |
| 923 | struct buffer_head **last_eb_bh, | 1227 | struct buffer_head **last_eb_bh, |
| 924 | struct ocfs2_alloc_context *meta_ac) | 1228 | struct ocfs2_alloc_context *meta_ac) |
| 925 | { | 1229 | { |
| 926 | int ret, shift; | 1230 | int ret, shift; |
| 927 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1231 | struct ocfs2_extent_list *el = et->et_root_el; |
| 928 | int depth = le16_to_cpu(di->id2.i_list.l_tree_depth); | 1232 | int depth = le16_to_cpu(el->l_tree_depth); |
| 929 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 1233 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 930 | struct buffer_head *bh = NULL; | 1234 | struct buffer_head *bh = NULL; |
| 931 | 1235 | ||
| 932 | BUG_ON(meta_ac == NULL); | 1236 | BUG_ON(meta_ac == NULL); |
| 933 | 1237 | ||
| 934 | shift = ocfs2_find_branch_target(osb, inode, di_bh, &bh); | 1238 | shift = ocfs2_find_branch_target(osb, inode, et, &bh); |
| 935 | if (shift < 0) { | 1239 | if (shift < 0) { |
| 936 | ret = shift; | 1240 | ret = shift; |
| 937 | mlog_errno(ret); | 1241 | mlog_errno(ret); |
| @@ -948,7 +1252,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
| 948 | /* ocfs2_shift_tree_depth will return us a buffer with | 1252 | /* ocfs2_shift_tree_depth will return us a buffer with |
| 949 | * the new extent block (so we can pass that to | 1253 | * the new extent block (so we can pass that to |
| 950 | * ocfs2_add_branch). */ | 1254 | * ocfs2_add_branch). */ |
| 951 | ret = ocfs2_shift_tree_depth(osb, handle, inode, di_bh, | 1255 | ret = ocfs2_shift_tree_depth(osb, handle, inode, et, |
| 952 | meta_ac, &bh); | 1256 | meta_ac, &bh); |
| 953 | if (ret < 0) { | 1257 | if (ret < 0) { |
| 954 | mlog_errno(ret); | 1258 | mlog_errno(ret); |
| @@ -975,7 +1279,7 @@ static int ocfs2_grow_tree(struct inode *inode, handle_t *handle, | |||
| 975 | /* call ocfs2_add_branch to add the final part of the tree with | 1279 | /* call ocfs2_add_branch to add the final part of the tree with |
| 976 | * the new data. */ | 1280 | * the new data. */ |
| 977 | mlog(0, "add branch. bh = %p\n", bh); | 1281 | mlog(0, "add branch. bh = %p\n", bh); |
| 978 | ret = ocfs2_add_branch(osb, handle, inode, di_bh, bh, last_eb_bh, | 1282 | ret = ocfs2_add_branch(osb, handle, inode, et, bh, last_eb_bh, |
| 979 | meta_ac); | 1283 | meta_ac); |
| 980 | if (ret < 0) { | 1284 | if (ret < 0) { |
| 981 | mlog_errno(ret); | 1285 | mlog_errno(ret); |
| @@ -990,15 +1294,6 @@ out: | |||
| 990 | } | 1294 | } |
| 991 | 1295 | ||
| 992 | /* | 1296 | /* |
| 993 | * This is only valid for leaf nodes, which are the only ones that can | ||
| 994 | * have empty extents anyway. | ||
| 995 | */ | ||
| 996 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
| 997 | { | ||
| 998 | return !rec->e_leaf_clusters; | ||
| 999 | } | ||
| 1000 | |||
| 1001 | /* | ||
| 1002 | * This function will discard the rightmost extent record. | 1297 | * This function will discard the rightmost extent record. |
| 1003 | */ | 1298 | */ |
| 1004 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) | 1299 | static void ocfs2_shift_records_right(struct ocfs2_extent_list *el) |
| @@ -1245,8 +1540,7 @@ static int __ocfs2_find_path(struct inode *inode, | |||
| 1245 | 1540 | ||
| 1246 | brelse(bh); | 1541 | brelse(bh); |
| 1247 | bh = NULL; | 1542 | bh = NULL; |
| 1248 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), blkno, | 1543 | ret = ocfs2_read_block(inode, blkno, &bh); |
| 1249 | &bh, OCFS2_BH_CACHED, inode); | ||
| 1250 | if (ret) { | 1544 | if (ret) { |
| 1251 | mlog_errno(ret); | 1545 | mlog_errno(ret); |
| 1252 | goto out; | 1546 | goto out; |
| @@ -2067,11 +2361,11 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
| 2067 | struct ocfs2_path *right_path, | 2361 | struct ocfs2_path *right_path, |
| 2068 | int subtree_index, | 2362 | int subtree_index, |
| 2069 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2363 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2070 | int *deleted) | 2364 | int *deleted, |
| 2365 | struct ocfs2_extent_tree *et) | ||
| 2071 | { | 2366 | { |
| 2072 | int ret, i, del_right_subtree = 0, right_has_empty = 0; | 2367 | int ret, i, del_right_subtree = 0, right_has_empty = 0; |
| 2073 | struct buffer_head *root_bh, *di_bh = path_root_bh(right_path); | 2368 | struct buffer_head *root_bh, *et_root_bh = path_root_bh(right_path); |
| 2074 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 2075 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; | 2369 | struct ocfs2_extent_list *right_leaf_el, *left_leaf_el; |
| 2076 | struct ocfs2_extent_block *eb; | 2370 | struct ocfs2_extent_block *eb; |
| 2077 | 2371 | ||
| @@ -2123,7 +2417,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
| 2123 | * We have to update i_last_eb_blk during the meta | 2417 | * We have to update i_last_eb_blk during the meta |
| 2124 | * data delete. | 2418 | * data delete. |
| 2125 | */ | 2419 | */ |
| 2126 | ret = ocfs2_journal_access(handle, inode, di_bh, | 2420 | ret = ocfs2_journal_access(handle, inode, et_root_bh, |
| 2127 | OCFS2_JOURNAL_ACCESS_WRITE); | 2421 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 2128 | if (ret) { | 2422 | if (ret) { |
| 2129 | mlog_errno(ret); | 2423 | mlog_errno(ret); |
| @@ -2198,7 +2492,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
| 2198 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2492 | ocfs2_update_edge_lengths(inode, handle, left_path); |
| 2199 | 2493 | ||
| 2200 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2494 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
| 2201 | di->i_last_eb_blk = eb->h_blkno; | 2495 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
| 2202 | 2496 | ||
| 2203 | /* | 2497 | /* |
| 2204 | * Removal of the extent in the left leaf was skipped | 2498 | * Removal of the extent in the left leaf was skipped |
| @@ -2208,7 +2502,7 @@ static int ocfs2_rotate_subtree_left(struct inode *inode, handle_t *handle, | |||
| 2208 | if (right_has_empty) | 2502 | if (right_has_empty) |
| 2209 | ocfs2_remove_empty_extent(left_leaf_el); | 2503 | ocfs2_remove_empty_extent(left_leaf_el); |
| 2210 | 2504 | ||
| 2211 | ret = ocfs2_journal_dirty(handle, di_bh); | 2505 | ret = ocfs2_journal_dirty(handle, et_root_bh); |
| 2212 | if (ret) | 2506 | if (ret) |
| 2213 | mlog_errno(ret); | 2507 | mlog_errno(ret); |
| 2214 | 2508 | ||
| @@ -2331,7 +2625,8 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
| 2331 | handle_t *handle, int orig_credits, | 2625 | handle_t *handle, int orig_credits, |
| 2332 | struct ocfs2_path *path, | 2626 | struct ocfs2_path *path, |
| 2333 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 2627 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2334 | struct ocfs2_path **empty_extent_path) | 2628 | struct ocfs2_path **empty_extent_path, |
| 2629 | struct ocfs2_extent_tree *et) | ||
| 2335 | { | 2630 | { |
| 2336 | int ret, subtree_root, deleted; | 2631 | int ret, subtree_root, deleted; |
| 2337 | u32 right_cpos; | 2632 | u32 right_cpos; |
| @@ -2404,7 +2699,7 @@ static int __ocfs2_rotate_tree_left(struct inode *inode, | |||
| 2404 | 2699 | ||
| 2405 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, | 2700 | ret = ocfs2_rotate_subtree_left(inode, handle, left_path, |
| 2406 | right_path, subtree_root, | 2701 | right_path, subtree_root, |
| 2407 | dealloc, &deleted); | 2702 | dealloc, &deleted, et); |
| 2408 | if (ret == -EAGAIN) { | 2703 | if (ret == -EAGAIN) { |
| 2409 | /* | 2704 | /* |
| 2410 | * The rotation has to temporarily stop due to | 2705 | * The rotation has to temporarily stop due to |
| @@ -2447,29 +2742,20 @@ out: | |||
| 2447 | } | 2742 | } |
| 2448 | 2743 | ||
| 2449 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | 2744 | static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, |
| 2450 | struct ocfs2_path *path, | 2745 | struct ocfs2_path *path, |
| 2451 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2746 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2747 | struct ocfs2_extent_tree *et) | ||
| 2452 | { | 2748 | { |
| 2453 | int ret, subtree_index; | 2749 | int ret, subtree_index; |
| 2454 | u32 cpos; | 2750 | u32 cpos; |
| 2455 | struct ocfs2_path *left_path = NULL; | 2751 | struct ocfs2_path *left_path = NULL; |
| 2456 | struct ocfs2_dinode *di; | ||
| 2457 | struct ocfs2_extent_block *eb; | 2752 | struct ocfs2_extent_block *eb; |
| 2458 | struct ocfs2_extent_list *el; | 2753 | struct ocfs2_extent_list *el; |
| 2459 | 2754 | ||
| 2460 | /* | ||
| 2461 | * XXX: This code assumes that the root is an inode, which is | ||
| 2462 | * true for now but may change as tree code gets generic. | ||
| 2463 | */ | ||
| 2464 | di = (struct ocfs2_dinode *)path_root_bh(path)->b_data; | ||
| 2465 | if (!OCFS2_IS_VALID_DINODE(di)) { | ||
| 2466 | ret = -EIO; | ||
| 2467 | ocfs2_error(inode->i_sb, | ||
| 2468 | "Inode %llu has invalid path root", | ||
| 2469 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 2470 | goto out; | ||
| 2471 | } | ||
| 2472 | 2755 | ||
| 2756 | ret = ocfs2_et_sanity_check(inode, et); | ||
| 2757 | if (ret) | ||
| 2758 | goto out; | ||
| 2473 | /* | 2759 | /* |
| 2474 | * There's two ways we handle this depending on | 2760 | * There's two ways we handle this depending on |
| 2475 | * whether path is the only existing one. | 2761 | * whether path is the only existing one. |
| @@ -2526,7 +2812,7 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
| 2526 | ocfs2_update_edge_lengths(inode, handle, left_path); | 2812 | ocfs2_update_edge_lengths(inode, handle, left_path); |
| 2527 | 2813 | ||
| 2528 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; | 2814 | eb = (struct ocfs2_extent_block *)path_leaf_bh(left_path)->b_data; |
| 2529 | di->i_last_eb_blk = eb->h_blkno; | 2815 | ocfs2_et_set_last_eb_blk(et, le64_to_cpu(eb->h_blkno)); |
| 2530 | } else { | 2816 | } else { |
| 2531 | /* | 2817 | /* |
| 2532 | * 'path' is also the leftmost path which | 2818 | * 'path' is also the leftmost path which |
| @@ -2537,12 +2823,12 @@ static int ocfs2_remove_rightmost_path(struct inode *inode, handle_t *handle, | |||
| 2537 | */ | 2823 | */ |
| 2538 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); | 2824 | ocfs2_unlink_path(inode, handle, dealloc, path, 1); |
| 2539 | 2825 | ||
| 2540 | el = &di->id2.i_list; | 2826 | el = et->et_root_el; |
| 2541 | el->l_tree_depth = 0; | 2827 | el->l_tree_depth = 0; |
| 2542 | el->l_next_free_rec = 0; | 2828 | el->l_next_free_rec = 0; |
| 2543 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); | 2829 | memset(&el->l_recs[0], 0, sizeof(struct ocfs2_extent_rec)); |
| 2544 | 2830 | ||
| 2545 | di->i_last_eb_blk = 0; | 2831 | ocfs2_et_set_last_eb_blk(et, 0); |
| 2546 | } | 2832 | } |
| 2547 | 2833 | ||
| 2548 | ocfs2_journal_dirty(handle, path_root_bh(path)); | 2834 | ocfs2_journal_dirty(handle, path_root_bh(path)); |
| @@ -2570,7 +2856,8 @@ out: | |||
| 2570 | */ | 2856 | */ |
| 2571 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | 2857 | static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, |
| 2572 | struct ocfs2_path *path, | 2858 | struct ocfs2_path *path, |
| 2573 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 2859 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 2860 | struct ocfs2_extent_tree *et) | ||
| 2574 | { | 2861 | { |
| 2575 | int ret, orig_credits = handle->h_buffer_credits; | 2862 | int ret, orig_credits = handle->h_buffer_credits; |
| 2576 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; | 2863 | struct ocfs2_path *tmp_path = NULL, *restart_path = NULL; |
| @@ -2584,7 +2871,7 @@ static int ocfs2_rotate_tree_left(struct inode *inode, handle_t *handle, | |||
| 2584 | if (path->p_tree_depth == 0) { | 2871 | if (path->p_tree_depth == 0) { |
| 2585 | rightmost_no_delete: | 2872 | rightmost_no_delete: |
| 2586 | /* | 2873 | /* |
| 2587 | * In-inode extents. This is trivially handled, so do | 2874 | * Inline extents. This is trivially handled, so do |
| 2588 | * it up front. | 2875 | * it up front. |
| 2589 | */ | 2876 | */ |
| 2590 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, | 2877 | ret = ocfs2_rotate_rightmost_leaf_left(inode, handle, |
| @@ -2638,7 +2925,7 @@ rightmost_no_delete: | |||
| 2638 | */ | 2925 | */ |
| 2639 | 2926 | ||
| 2640 | ret = ocfs2_remove_rightmost_path(inode, handle, path, | 2927 | ret = ocfs2_remove_rightmost_path(inode, handle, path, |
| 2641 | dealloc); | 2928 | dealloc, et); |
| 2642 | if (ret) | 2929 | if (ret) |
| 2643 | mlog_errno(ret); | 2930 | mlog_errno(ret); |
| 2644 | goto out; | 2931 | goto out; |
| @@ -2650,7 +2937,7 @@ rightmost_no_delete: | |||
| 2650 | */ | 2937 | */ |
| 2651 | try_rotate: | 2938 | try_rotate: |
| 2652 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, | 2939 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, path, |
| 2653 | dealloc, &restart_path); | 2940 | dealloc, &restart_path, et); |
| 2654 | if (ret && ret != -EAGAIN) { | 2941 | if (ret && ret != -EAGAIN) { |
| 2655 | mlog_errno(ret); | 2942 | mlog_errno(ret); |
| 2656 | goto out; | 2943 | goto out; |
| @@ -2662,7 +2949,7 @@ try_rotate: | |||
| 2662 | 2949 | ||
| 2663 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, | 2950 | ret = __ocfs2_rotate_tree_left(inode, handle, orig_credits, |
| 2664 | tmp_path, dealloc, | 2951 | tmp_path, dealloc, |
| 2665 | &restart_path); | 2952 | &restart_path, et); |
| 2666 | if (ret && ret != -EAGAIN) { | 2953 | if (ret && ret != -EAGAIN) { |
| 2667 | mlog_errno(ret); | 2954 | mlog_errno(ret); |
| 2668 | goto out; | 2955 | goto out; |
| @@ -2948,6 +3235,7 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
| 2948 | handle_t *handle, | 3235 | handle_t *handle, |
| 2949 | struct ocfs2_extent_rec *split_rec, | 3236 | struct ocfs2_extent_rec *split_rec, |
| 2950 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3237 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 3238 | struct ocfs2_extent_tree *et, | ||
| 2951 | int index) | 3239 | int index) |
| 2952 | { | 3240 | { |
| 2953 | int ret, i, subtree_index = 0, has_empty_extent = 0; | 3241 | int ret, i, subtree_index = 0, has_empty_extent = 0; |
| @@ -3068,7 +3356,8 @@ static int ocfs2_merge_rec_left(struct inode *inode, | |||
| 3068 | le16_to_cpu(el->l_next_free_rec) == 1) { | 3356 | le16_to_cpu(el->l_next_free_rec) == 1) { |
| 3069 | 3357 | ||
| 3070 | ret = ocfs2_remove_rightmost_path(inode, handle, | 3358 | ret = ocfs2_remove_rightmost_path(inode, handle, |
| 3071 | right_path, dealloc); | 3359 | right_path, |
| 3360 | dealloc, et); | ||
| 3072 | if (ret) { | 3361 | if (ret) { |
| 3073 | mlog_errno(ret); | 3362 | mlog_errno(ret); |
| 3074 | goto out; | 3363 | goto out; |
| @@ -3095,7 +3384,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3095 | int split_index, | 3384 | int split_index, |
| 3096 | struct ocfs2_extent_rec *split_rec, | 3385 | struct ocfs2_extent_rec *split_rec, |
| 3097 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 3386 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 3098 | struct ocfs2_merge_ctxt *ctxt) | 3387 | struct ocfs2_merge_ctxt *ctxt, |
| 3388 | struct ocfs2_extent_tree *et) | ||
| 3099 | 3389 | ||
| 3100 | { | 3390 | { |
| 3101 | int ret = 0; | 3391 | int ret = 0; |
| @@ -3113,7 +3403,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3113 | * illegal. | 3403 | * illegal. |
| 3114 | */ | 3404 | */ |
| 3115 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3405 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 3116 | dealloc); | 3406 | dealloc, et); |
| 3117 | if (ret) { | 3407 | if (ret) { |
| 3118 | mlog_errno(ret); | 3408 | mlog_errno(ret); |
| 3119 | goto out; | 3409 | goto out; |
| @@ -3156,7 +3446,8 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3156 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); | 3446 | BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0])); |
| 3157 | 3447 | ||
| 3158 | /* The merge left us with an empty extent, remove it. */ | 3448 | /* The merge left us with an empty extent, remove it. */ |
| 3159 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 3449 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 3450 | dealloc, et); | ||
| 3160 | if (ret) { | 3451 | if (ret) { |
| 3161 | mlog_errno(ret); | 3452 | mlog_errno(ret); |
| 3162 | goto out; | 3453 | goto out; |
| @@ -3170,7 +3461,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3170 | */ | 3461 | */ |
| 3171 | ret = ocfs2_merge_rec_left(inode, path, | 3462 | ret = ocfs2_merge_rec_left(inode, path, |
| 3172 | handle, rec, | 3463 | handle, rec, |
| 3173 | dealloc, | 3464 | dealloc, et, |
| 3174 | split_index); | 3465 | split_index); |
| 3175 | 3466 | ||
| 3176 | if (ret) { | 3467 | if (ret) { |
| @@ -3179,7 +3470,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3179 | } | 3470 | } |
| 3180 | 3471 | ||
| 3181 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3472 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 3182 | dealloc); | 3473 | dealloc, et); |
| 3183 | /* | 3474 | /* |
| 3184 | * Error from this last rotate is not critical, so | 3475 | * Error from this last rotate is not critical, so |
| 3185 | * print but don't bubble it up. | 3476 | * print but don't bubble it up. |
| @@ -3199,7 +3490,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3199 | ret = ocfs2_merge_rec_left(inode, | 3490 | ret = ocfs2_merge_rec_left(inode, |
| 3200 | path, | 3491 | path, |
| 3201 | handle, split_rec, | 3492 | handle, split_rec, |
| 3202 | dealloc, | 3493 | dealloc, et, |
| 3203 | split_index); | 3494 | split_index); |
| 3204 | if (ret) { | 3495 | if (ret) { |
| 3205 | mlog_errno(ret); | 3496 | mlog_errno(ret); |
| @@ -3222,7 +3513,7 @@ static int ocfs2_try_to_merge_extent(struct inode *inode, | |||
| 3222 | * our leaf. Try to rotate it away. | 3513 | * our leaf. Try to rotate it away. |
| 3223 | */ | 3514 | */ |
| 3224 | ret = ocfs2_rotate_tree_left(inode, handle, path, | 3515 | ret = ocfs2_rotate_tree_left(inode, handle, path, |
| 3225 | dealloc); | 3516 | dealloc, et); |
| 3226 | if (ret) | 3517 | if (ret) |
| 3227 | mlog_errno(ret); | 3518 | mlog_errno(ret); |
| 3228 | ret = 0; | 3519 | ret = 0; |
| @@ -3356,16 +3647,6 @@ rotate: | |||
| 3356 | ocfs2_rotate_leaf(el, insert_rec); | 3647 | ocfs2_rotate_leaf(el, insert_rec); |
| 3357 | } | 3648 | } |
| 3358 | 3649 | ||
| 3359 | static inline void ocfs2_update_dinode_clusters(struct inode *inode, | ||
| 3360 | struct ocfs2_dinode *di, | ||
| 3361 | u32 clusters) | ||
| 3362 | { | ||
| 3363 | le32_add_cpu(&di->i_clusters, clusters); | ||
| 3364 | spin_lock(&OCFS2_I(inode)->ip_lock); | ||
| 3365 | OCFS2_I(inode)->ip_clusters = le32_to_cpu(di->i_clusters); | ||
| 3366 | spin_unlock(&OCFS2_I(inode)->ip_lock); | ||
| 3367 | } | ||
| 3368 | |||
| 3369 | static void ocfs2_adjust_rightmost_records(struct inode *inode, | 3650 | static void ocfs2_adjust_rightmost_records(struct inode *inode, |
| 3370 | handle_t *handle, | 3651 | handle_t *handle, |
| 3371 | struct ocfs2_path *path, | 3652 | struct ocfs2_path *path, |
| @@ -3567,8 +3848,8 @@ static void ocfs2_split_record(struct inode *inode, | |||
| 3567 | } | 3848 | } |
| 3568 | 3849 | ||
| 3569 | /* | 3850 | /* |
| 3570 | * This function only does inserts on an allocation b-tree. For dinode | 3851 | * This function only does inserts on an allocation b-tree. For tree |
| 3571 | * lists, ocfs2_insert_at_leaf() is called directly. | 3852 | * depth = 0, ocfs2_insert_at_leaf() is called directly. |
| 3572 | * | 3853 | * |
| 3573 | * right_path is the path we want to do the actual insert | 3854 | * right_path is the path we want to do the actual insert |
| 3574 | * in. left_path should only be passed in if we need to update that | 3855 | * in. left_path should only be passed in if we need to update that |
| @@ -3665,7 +3946,7 @@ out: | |||
| 3665 | 3946 | ||
| 3666 | static int ocfs2_do_insert_extent(struct inode *inode, | 3947 | static int ocfs2_do_insert_extent(struct inode *inode, |
| 3667 | handle_t *handle, | 3948 | handle_t *handle, |
| 3668 | struct buffer_head *di_bh, | 3949 | struct ocfs2_extent_tree *et, |
| 3669 | struct ocfs2_extent_rec *insert_rec, | 3950 | struct ocfs2_extent_rec *insert_rec, |
| 3670 | struct ocfs2_insert_type *type) | 3951 | struct ocfs2_insert_type *type) |
| 3671 | { | 3952 | { |
| @@ -3673,13 +3954,11 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 3673 | u32 cpos; | 3954 | u32 cpos; |
| 3674 | struct ocfs2_path *right_path = NULL; | 3955 | struct ocfs2_path *right_path = NULL; |
| 3675 | struct ocfs2_path *left_path = NULL; | 3956 | struct ocfs2_path *left_path = NULL; |
| 3676 | struct ocfs2_dinode *di; | ||
| 3677 | struct ocfs2_extent_list *el; | 3957 | struct ocfs2_extent_list *el; |
| 3678 | 3958 | ||
| 3679 | di = (struct ocfs2_dinode *) di_bh->b_data; | 3959 | el = et->et_root_el; |
| 3680 | el = &di->id2.i_list; | ||
| 3681 | 3960 | ||
| 3682 | ret = ocfs2_journal_access(handle, inode, di_bh, | 3961 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
| 3683 | OCFS2_JOURNAL_ACCESS_WRITE); | 3962 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 3684 | if (ret) { | 3963 | if (ret) { |
| 3685 | mlog_errno(ret); | 3964 | mlog_errno(ret); |
| @@ -3691,7 +3970,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 3691 | goto out_update_clusters; | 3970 | goto out_update_clusters; |
| 3692 | } | 3971 | } |
| 3693 | 3972 | ||
| 3694 | right_path = ocfs2_new_inode_path(di_bh); | 3973 | right_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
| 3695 | if (!right_path) { | 3974 | if (!right_path) { |
| 3696 | ret = -ENOMEM; | 3975 | ret = -ENOMEM; |
| 3697 | mlog_errno(ret); | 3976 | mlog_errno(ret); |
| @@ -3741,7 +4020,7 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 3741 | * ocfs2_rotate_tree_right() might have extended the | 4020 | * ocfs2_rotate_tree_right() might have extended the |
| 3742 | * transaction without re-journaling our tree root. | 4021 | * transaction without re-journaling our tree root. |
| 3743 | */ | 4022 | */ |
| 3744 | ret = ocfs2_journal_access(handle, inode, di_bh, | 4023 | ret = ocfs2_journal_access(handle, inode, et->et_root_bh, |
| 3745 | OCFS2_JOURNAL_ACCESS_WRITE); | 4024 | OCFS2_JOURNAL_ACCESS_WRITE); |
| 3746 | if (ret) { | 4025 | if (ret) { |
| 3747 | mlog_errno(ret); | 4026 | mlog_errno(ret); |
| @@ -3766,10 +4045,10 @@ static int ocfs2_do_insert_extent(struct inode *inode, | |||
| 3766 | 4045 | ||
| 3767 | out_update_clusters: | 4046 | out_update_clusters: |
| 3768 | if (type->ins_split == SPLIT_NONE) | 4047 | if (type->ins_split == SPLIT_NONE) |
| 3769 | ocfs2_update_dinode_clusters(inode, di, | 4048 | ocfs2_et_update_clusters(inode, et, |
| 3770 | le16_to_cpu(insert_rec->e_leaf_clusters)); | 4049 | le16_to_cpu(insert_rec->e_leaf_clusters)); |
| 3771 | 4050 | ||
| 3772 | ret = ocfs2_journal_dirty(handle, di_bh); | 4051 | ret = ocfs2_journal_dirty(handle, et->et_root_bh); |
| 3773 | if (ret) | 4052 | if (ret) |
| 3774 | mlog_errno(ret); | 4053 | mlog_errno(ret); |
| 3775 | 4054 | ||
| @@ -3899,7 +4178,8 @@ out: | |||
| 3899 | static void ocfs2_figure_contig_type(struct inode *inode, | 4178 | static void ocfs2_figure_contig_type(struct inode *inode, |
| 3900 | struct ocfs2_insert_type *insert, | 4179 | struct ocfs2_insert_type *insert, |
| 3901 | struct ocfs2_extent_list *el, | 4180 | struct ocfs2_extent_list *el, |
| 3902 | struct ocfs2_extent_rec *insert_rec) | 4181 | struct ocfs2_extent_rec *insert_rec, |
| 4182 | struct ocfs2_extent_tree *et) | ||
| 3903 | { | 4183 | { |
| 3904 | int i; | 4184 | int i; |
| 3905 | enum ocfs2_contig_type contig_type = CONTIG_NONE; | 4185 | enum ocfs2_contig_type contig_type = CONTIG_NONE; |
| @@ -3915,6 +4195,21 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
| 3915 | } | 4195 | } |
| 3916 | } | 4196 | } |
| 3917 | insert->ins_contig = contig_type; | 4197 | insert->ins_contig = contig_type; |
| 4198 | |||
| 4199 | if (insert->ins_contig != CONTIG_NONE) { | ||
| 4200 | struct ocfs2_extent_rec *rec = | ||
| 4201 | &el->l_recs[insert->ins_contig_index]; | ||
| 4202 | unsigned int len = le16_to_cpu(rec->e_leaf_clusters) + | ||
| 4203 | le16_to_cpu(insert_rec->e_leaf_clusters); | ||
| 4204 | |||
| 4205 | /* | ||
| 4206 | * Caller might want us to limit the size of extents, don't | ||
| 4207 | * calculate contiguousness if we might exceed that limit. | ||
| 4208 | */ | ||
| 4209 | if (et->et_max_leaf_clusters && | ||
| 4210 | (len > et->et_max_leaf_clusters)) | ||
| 4211 | insert->ins_contig = CONTIG_NONE; | ||
| 4212 | } | ||
| 3918 | } | 4213 | } |
| 3919 | 4214 | ||
| 3920 | /* | 4215 | /* |
| @@ -3923,8 +4218,8 @@ static void ocfs2_figure_contig_type(struct inode *inode, | |||
| 3923 | * ocfs2_figure_appending_type() will figure out whether we'll have to | 4218 | * ocfs2_figure_appending_type() will figure out whether we'll have to |
| 3924 | * insert at the tail of the rightmost leaf. | 4219 | * insert at the tail of the rightmost leaf. |
| 3925 | * | 4220 | * |
| 3926 | * This should also work against the dinode list for tree's with 0 | 4221 | * This should also work against the root extent list for tree's with 0 |
| 3927 | * depth. If we consider the dinode list to be the rightmost leaf node | 4222 | * depth. If we consider the root extent list to be the rightmost leaf node |
| 3928 | * then the logic here makes sense. | 4223 | * then the logic here makes sense. |
| 3929 | */ | 4224 | */ |
| 3930 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, | 4225 | static void ocfs2_figure_appending_type(struct ocfs2_insert_type *insert, |
| @@ -3975,14 +4270,13 @@ set_tail_append: | |||
| 3975 | * structure. | 4270 | * structure. |
| 3976 | */ | 4271 | */ |
| 3977 | static int ocfs2_figure_insert_type(struct inode *inode, | 4272 | static int ocfs2_figure_insert_type(struct inode *inode, |
| 3978 | struct buffer_head *di_bh, | 4273 | struct ocfs2_extent_tree *et, |
| 3979 | struct buffer_head **last_eb_bh, | 4274 | struct buffer_head **last_eb_bh, |
| 3980 | struct ocfs2_extent_rec *insert_rec, | 4275 | struct ocfs2_extent_rec *insert_rec, |
| 3981 | int *free_records, | 4276 | int *free_records, |
| 3982 | struct ocfs2_insert_type *insert) | 4277 | struct ocfs2_insert_type *insert) |
| 3983 | { | 4278 | { |
| 3984 | int ret; | 4279 | int ret; |
| 3985 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 3986 | struct ocfs2_extent_block *eb; | 4280 | struct ocfs2_extent_block *eb; |
| 3987 | struct ocfs2_extent_list *el; | 4281 | struct ocfs2_extent_list *el; |
| 3988 | struct ocfs2_path *path = NULL; | 4282 | struct ocfs2_path *path = NULL; |
| @@ -3990,7 +4284,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 3990 | 4284 | ||
| 3991 | insert->ins_split = SPLIT_NONE; | 4285 | insert->ins_split = SPLIT_NONE; |
| 3992 | 4286 | ||
| 3993 | el = &di->id2.i_list; | 4287 | el = et->et_root_el; |
| 3994 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); | 4288 | insert->ins_tree_depth = le16_to_cpu(el->l_tree_depth); |
| 3995 | 4289 | ||
| 3996 | if (el->l_tree_depth) { | 4290 | if (el->l_tree_depth) { |
| @@ -4000,9 +4294,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 4000 | * ocfs2_figure_insert_type() and ocfs2_add_branch() | 4294 | * ocfs2_figure_insert_type() and ocfs2_add_branch() |
| 4001 | * may want it later. | 4295 | * may want it later. |
| 4002 | */ | 4296 | */ |
| 4003 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4297 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), &bh); |
| 4004 | le64_to_cpu(di->i_last_eb_blk), &bh, | ||
| 4005 | OCFS2_BH_CACHED, inode); | ||
| 4006 | if (ret) { | 4298 | if (ret) { |
| 4007 | mlog_exit(ret); | 4299 | mlog_exit(ret); |
| 4008 | goto out; | 4300 | goto out; |
| @@ -4023,12 +4315,12 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 4023 | le16_to_cpu(el->l_next_free_rec); | 4315 | le16_to_cpu(el->l_next_free_rec); |
| 4024 | 4316 | ||
| 4025 | if (!insert->ins_tree_depth) { | 4317 | if (!insert->ins_tree_depth) { |
| 4026 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4318 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
| 4027 | ocfs2_figure_appending_type(insert, el, insert_rec); | 4319 | ocfs2_figure_appending_type(insert, el, insert_rec); |
| 4028 | return 0; | 4320 | return 0; |
| 4029 | } | 4321 | } |
| 4030 | 4322 | ||
| 4031 | path = ocfs2_new_inode_path(di_bh); | 4323 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
| 4032 | if (!path) { | 4324 | if (!path) { |
| 4033 | ret = -ENOMEM; | 4325 | ret = -ENOMEM; |
| 4034 | mlog_errno(ret); | 4326 | mlog_errno(ret); |
| @@ -4057,7 +4349,7 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 4057 | * into two types of appends: simple record append, or a | 4349 | * into two types of appends: simple record append, or a |
| 4058 | * rotate inside the tail leaf. | 4350 | * rotate inside the tail leaf. |
| 4059 | */ | 4351 | */ |
| 4060 | ocfs2_figure_contig_type(inode, insert, el, insert_rec); | 4352 | ocfs2_figure_contig_type(inode, insert, el, insert_rec, et); |
| 4061 | 4353 | ||
| 4062 | /* | 4354 | /* |
| 4063 | * The insert code isn't quite ready to deal with all cases of | 4355 | * The insert code isn't quite ready to deal with all cases of |
| @@ -4078,7 +4370,8 @@ static int ocfs2_figure_insert_type(struct inode *inode, | |||
| 4078 | * the case that we're doing a tail append, so maybe we can | 4370 | * the case that we're doing a tail append, so maybe we can |
| 4079 | * take advantage of that information somehow. | 4371 | * take advantage of that information somehow. |
| 4080 | */ | 4372 | */ |
| 4081 | if (le64_to_cpu(di->i_last_eb_blk) == path_leaf_bh(path)->b_blocknr) { | 4373 | if (ocfs2_et_get_last_eb_blk(et) == |
| 4374 | path_leaf_bh(path)->b_blocknr) { | ||
| 4082 | /* | 4375 | /* |
| 4083 | * Ok, ocfs2_find_path() returned us the rightmost | 4376 | * Ok, ocfs2_find_path() returned us the rightmost |
| 4084 | * tree path. This might be an appending insert. There are | 4377 | * tree path. This might be an appending insert. There are |
| @@ -4108,7 +4401,7 @@ out: | |||
| 4108 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 4401 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
| 4109 | handle_t *handle, | 4402 | handle_t *handle, |
| 4110 | struct inode *inode, | 4403 | struct inode *inode, |
| 4111 | struct buffer_head *fe_bh, | 4404 | struct ocfs2_extent_tree *et, |
| 4112 | u32 cpos, | 4405 | u32 cpos, |
| 4113 | u64 start_blk, | 4406 | u64 start_blk, |
| 4114 | u32 new_clusters, | 4407 | u32 new_clusters, |
| @@ -4121,26 +4414,21 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 4121 | struct ocfs2_insert_type insert = {0, }; | 4414 | struct ocfs2_insert_type insert = {0, }; |
| 4122 | struct ocfs2_extent_rec rec; | 4415 | struct ocfs2_extent_rec rec; |
| 4123 | 4416 | ||
| 4124 | BUG_ON(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL); | ||
| 4125 | |||
| 4126 | mlog(0, "add %u clusters at position %u to inode %llu\n", | 4417 | mlog(0, "add %u clusters at position %u to inode %llu\n", |
| 4127 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); | 4418 | new_clusters, cpos, (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 4128 | 4419 | ||
| 4129 | mlog_bug_on_msg(!ocfs2_sparse_alloc(osb) && | ||
| 4130 | (OCFS2_I(inode)->ip_clusters != cpos), | ||
| 4131 | "Device %s, asking for sparse allocation: inode %llu, " | ||
| 4132 | "cpos %u, clusters %u\n", | ||
| 4133 | osb->dev_str, | ||
| 4134 | (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos, | ||
| 4135 | OCFS2_I(inode)->ip_clusters); | ||
| 4136 | |||
| 4137 | memset(&rec, 0, sizeof(rec)); | 4420 | memset(&rec, 0, sizeof(rec)); |
| 4138 | rec.e_cpos = cpu_to_le32(cpos); | 4421 | rec.e_cpos = cpu_to_le32(cpos); |
| 4139 | rec.e_blkno = cpu_to_le64(start_blk); | 4422 | rec.e_blkno = cpu_to_le64(start_blk); |
| 4140 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); | 4423 | rec.e_leaf_clusters = cpu_to_le16(new_clusters); |
| 4141 | rec.e_flags = flags; | 4424 | rec.e_flags = flags; |
| 4425 | status = ocfs2_et_insert_check(inode, et, &rec); | ||
| 4426 | if (status) { | ||
| 4427 | mlog_errno(status); | ||
| 4428 | goto bail; | ||
| 4429 | } | ||
| 4142 | 4430 | ||
| 4143 | status = ocfs2_figure_insert_type(inode, fe_bh, &last_eb_bh, &rec, | 4431 | status = ocfs2_figure_insert_type(inode, et, &last_eb_bh, &rec, |
| 4144 | &free_records, &insert); | 4432 | &free_records, &insert); |
| 4145 | if (status < 0) { | 4433 | if (status < 0) { |
| 4146 | mlog_errno(status); | 4434 | mlog_errno(status); |
| @@ -4154,7 +4442,7 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 4154 | free_records, insert.ins_tree_depth); | 4442 | free_records, insert.ins_tree_depth); |
| 4155 | 4443 | ||
| 4156 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { | 4444 | if (insert.ins_contig == CONTIG_NONE && free_records == 0) { |
| 4157 | status = ocfs2_grow_tree(inode, handle, fe_bh, | 4445 | status = ocfs2_grow_tree(inode, handle, et, |
| 4158 | &insert.ins_tree_depth, &last_eb_bh, | 4446 | &insert.ins_tree_depth, &last_eb_bh, |
| 4159 | meta_ac); | 4447 | meta_ac); |
| 4160 | if (status) { | 4448 | if (status) { |
| @@ -4164,17 +4452,124 @@ int ocfs2_insert_extent(struct ocfs2_super *osb, | |||
| 4164 | } | 4452 | } |
| 4165 | 4453 | ||
| 4166 | /* Finally, we can add clusters. This might rotate the tree for us. */ | 4454 | /* Finally, we can add clusters. This might rotate the tree for us. */ |
| 4167 | status = ocfs2_do_insert_extent(inode, handle, fe_bh, &rec, &insert); | 4455 | status = ocfs2_do_insert_extent(inode, handle, et, &rec, &insert); |
| 4168 | if (status < 0) | 4456 | if (status < 0) |
| 4169 | mlog_errno(status); | 4457 | mlog_errno(status); |
| 4170 | else | 4458 | else if (et->et_ops == &ocfs2_dinode_et_ops) |
| 4171 | ocfs2_extent_map_insert_rec(inode, &rec); | 4459 | ocfs2_extent_map_insert_rec(inode, &rec); |
| 4172 | 4460 | ||
| 4173 | bail: | 4461 | bail: |
| 4174 | if (last_eb_bh) | 4462 | brelse(last_eb_bh); |
| 4175 | brelse(last_eb_bh); | 4463 | |
| 4464 | mlog_exit(status); | ||
| 4465 | return status; | ||
| 4466 | } | ||
| 4467 | |||
| 4468 | /* | ||
| 4469 | * Allcate and add clusters into the extent b-tree. | ||
| 4470 | * The new clusters(clusters_to_add) will be inserted at logical_offset. | ||
| 4471 | * The extent b-tree's root is specified by et, and | ||
| 4472 | * it is not limited to the file storage. Any extent tree can use this | ||
| 4473 | * function if it implements the proper ocfs2_extent_tree. | ||
| 4474 | */ | ||
| 4475 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
| 4476 | struct inode *inode, | ||
| 4477 | u32 *logical_offset, | ||
| 4478 | u32 clusters_to_add, | ||
| 4479 | int mark_unwritten, | ||
| 4480 | struct ocfs2_extent_tree *et, | ||
| 4481 | handle_t *handle, | ||
| 4482 | struct ocfs2_alloc_context *data_ac, | ||
| 4483 | struct ocfs2_alloc_context *meta_ac, | ||
| 4484 | enum ocfs2_alloc_restarted *reason_ret) | ||
| 4485 | { | ||
| 4486 | int status = 0; | ||
| 4487 | int free_extents; | ||
| 4488 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
| 4489 | u32 bit_off, num_bits; | ||
| 4490 | u64 block; | ||
| 4491 | u8 flags = 0; | ||
| 4492 | |||
| 4493 | BUG_ON(!clusters_to_add); | ||
| 4494 | |||
| 4495 | if (mark_unwritten) | ||
| 4496 | flags = OCFS2_EXT_UNWRITTEN; | ||
| 4497 | |||
| 4498 | free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
| 4499 | if (free_extents < 0) { | ||
| 4500 | status = free_extents; | ||
| 4501 | mlog_errno(status); | ||
| 4502 | goto leave; | ||
| 4503 | } | ||
| 4504 | |||
| 4505 | /* there are two cases which could cause us to EAGAIN in the | ||
| 4506 | * we-need-more-metadata case: | ||
| 4507 | * 1) we haven't reserved *any* | ||
| 4508 | * 2) we are so fragmented, we've needed to add metadata too | ||
| 4509 | * many times. */ | ||
| 4510 | if (!free_extents && !meta_ac) { | ||
| 4511 | mlog(0, "we haven't reserved any metadata!\n"); | ||
| 4512 | status = -EAGAIN; | ||
| 4513 | reason = RESTART_META; | ||
| 4514 | goto leave; | ||
| 4515 | } else if ((!free_extents) | ||
| 4516 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
| 4517 | < ocfs2_extend_meta_needed(et->et_root_el))) { | ||
| 4518 | mlog(0, "filesystem is really fragmented...\n"); | ||
| 4519 | status = -EAGAIN; | ||
| 4520 | reason = RESTART_META; | ||
| 4521 | goto leave; | ||
| 4522 | } | ||
| 4523 | |||
| 4524 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
| 4525 | clusters_to_add, &bit_off, &num_bits); | ||
| 4526 | if (status < 0) { | ||
| 4527 | if (status != -ENOSPC) | ||
| 4528 | mlog_errno(status); | ||
| 4529 | goto leave; | ||
| 4530 | } | ||
| 4176 | 4531 | ||
| 4532 | BUG_ON(num_bits > clusters_to_add); | ||
| 4533 | |||
| 4534 | /* reserve our write early -- insert_extent may update the inode */ | ||
| 4535 | status = ocfs2_journal_access(handle, inode, et->et_root_bh, | ||
| 4536 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 4537 | if (status < 0) { | ||
| 4538 | mlog_errno(status); | ||
| 4539 | goto leave; | ||
| 4540 | } | ||
| 4541 | |||
| 4542 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
| 4543 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
| 4544 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 4545 | status = ocfs2_insert_extent(osb, handle, inode, et, | ||
| 4546 | *logical_offset, block, | ||
| 4547 | num_bits, flags, meta_ac); | ||
| 4548 | if (status < 0) { | ||
| 4549 | mlog_errno(status); | ||
| 4550 | goto leave; | ||
| 4551 | } | ||
| 4552 | |||
| 4553 | status = ocfs2_journal_dirty(handle, et->et_root_bh); | ||
| 4554 | if (status < 0) { | ||
| 4555 | mlog_errno(status); | ||
| 4556 | goto leave; | ||
| 4557 | } | ||
| 4558 | |||
| 4559 | clusters_to_add -= num_bits; | ||
| 4560 | *logical_offset += num_bits; | ||
| 4561 | |||
| 4562 | if (clusters_to_add) { | ||
| 4563 | mlog(0, "need to alloc once more, wanted = %u\n", | ||
| 4564 | clusters_to_add); | ||
| 4565 | status = -EAGAIN; | ||
| 4566 | reason = RESTART_TRANS; | ||
| 4567 | } | ||
| 4568 | |||
| 4569 | leave: | ||
| 4177 | mlog_exit(status); | 4570 | mlog_exit(status); |
| 4571 | if (reason_ret) | ||
| 4572 | *reason_ret = reason; | ||
| 4178 | return status; | 4573 | return status; |
| 4179 | } | 4574 | } |
| 4180 | 4575 | ||
| @@ -4201,7 +4596,7 @@ static void ocfs2_make_right_split_rec(struct super_block *sb, | |||
| 4201 | static int ocfs2_split_and_insert(struct inode *inode, | 4596 | static int ocfs2_split_and_insert(struct inode *inode, |
| 4202 | handle_t *handle, | 4597 | handle_t *handle, |
| 4203 | struct ocfs2_path *path, | 4598 | struct ocfs2_path *path, |
| 4204 | struct buffer_head *di_bh, | 4599 | struct ocfs2_extent_tree *et, |
| 4205 | struct buffer_head **last_eb_bh, | 4600 | struct buffer_head **last_eb_bh, |
| 4206 | int split_index, | 4601 | int split_index, |
| 4207 | struct ocfs2_extent_rec *orig_split_rec, | 4602 | struct ocfs2_extent_rec *orig_split_rec, |
| @@ -4215,7 +4610,6 @@ static int ocfs2_split_and_insert(struct inode *inode, | |||
| 4215 | struct ocfs2_extent_rec split_rec = *orig_split_rec; | 4610 | struct ocfs2_extent_rec split_rec = *orig_split_rec; |
| 4216 | struct ocfs2_insert_type insert; | 4611 | struct ocfs2_insert_type insert; |
| 4217 | struct ocfs2_extent_block *eb; | 4612 | struct ocfs2_extent_block *eb; |
| 4218 | struct ocfs2_dinode *di; | ||
| 4219 | 4613 | ||
| 4220 | leftright: | 4614 | leftright: |
| 4221 | /* | 4615 | /* |
| @@ -4224,8 +4618,7 @@ leftright: | |||
| 4224 | */ | 4618 | */ |
| 4225 | rec = path_leaf_el(path)->l_recs[split_index]; | 4619 | rec = path_leaf_el(path)->l_recs[split_index]; |
| 4226 | 4620 | ||
| 4227 | di = (struct ocfs2_dinode *)di_bh->b_data; | 4621 | rightmost_el = et->et_root_el; |
| 4228 | rightmost_el = &di->id2.i_list; | ||
| 4229 | 4622 | ||
| 4230 | depth = le16_to_cpu(rightmost_el->l_tree_depth); | 4623 | depth = le16_to_cpu(rightmost_el->l_tree_depth); |
| 4231 | if (depth) { | 4624 | if (depth) { |
| @@ -4236,8 +4629,8 @@ leftright: | |||
| 4236 | 4629 | ||
| 4237 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4630 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
| 4238 | le16_to_cpu(rightmost_el->l_count)) { | 4631 | le16_to_cpu(rightmost_el->l_count)) { |
| 4239 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, last_eb_bh, | 4632 | ret = ocfs2_grow_tree(inode, handle, et, |
| 4240 | meta_ac); | 4633 | &depth, last_eb_bh, meta_ac); |
| 4241 | if (ret) { | 4634 | if (ret) { |
| 4242 | mlog_errno(ret); | 4635 | mlog_errno(ret); |
| 4243 | goto out; | 4636 | goto out; |
| @@ -4274,8 +4667,7 @@ leftright: | |||
| 4274 | do_leftright = 1; | 4667 | do_leftright = 1; |
| 4275 | } | 4668 | } |
| 4276 | 4669 | ||
| 4277 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, | 4670 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
| 4278 | &insert); | ||
| 4279 | if (ret) { | 4671 | if (ret) { |
| 4280 | mlog_errno(ret); | 4672 | mlog_errno(ret); |
| 4281 | goto out; | 4673 | goto out; |
| @@ -4317,8 +4709,9 @@ out: | |||
| 4317 | * of the tree is required. All other cases will degrade into a less | 4709 | * of the tree is required. All other cases will degrade into a less |
| 4318 | * optimal tree layout. | 4710 | * optimal tree layout. |
| 4319 | * | 4711 | * |
| 4320 | * last_eb_bh should be the rightmost leaf block for any inode with a | 4712 | * last_eb_bh should be the rightmost leaf block for any extent |
| 4321 | * btree. Since a split may grow the tree or a merge might shrink it, the caller cannot trust the contents of that buffer after this call. | 4713 | * btree. Since a split may grow the tree or a merge might shrink it, |
| 4714 | * the caller cannot trust the contents of that buffer after this call. | ||
| 4322 | * | 4715 | * |
| 4323 | * This code is optimized for readability - several passes might be | 4716 | * This code is optimized for readability - several passes might be |
| 4324 | * made over certain portions of the tree. All of those blocks will | 4717 | * made over certain portions of the tree. All of those blocks will |
| @@ -4326,7 +4719,7 @@ out: | |||
| 4326 | * extra overhead is not expressed in terms of disk reads. | 4719 | * extra overhead is not expressed in terms of disk reads. |
| 4327 | */ | 4720 | */ |
| 4328 | static int __ocfs2_mark_extent_written(struct inode *inode, | 4721 | static int __ocfs2_mark_extent_written(struct inode *inode, |
| 4329 | struct buffer_head *di_bh, | 4722 | struct ocfs2_extent_tree *et, |
| 4330 | handle_t *handle, | 4723 | handle_t *handle, |
| 4331 | struct ocfs2_path *path, | 4724 | struct ocfs2_path *path, |
| 4332 | int split_index, | 4725 | int split_index, |
| @@ -4366,11 +4759,9 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
| 4366 | */ | 4759 | */ |
| 4367 | if (path->p_tree_depth) { | 4760 | if (path->p_tree_depth) { |
| 4368 | struct ocfs2_extent_block *eb; | 4761 | struct ocfs2_extent_block *eb; |
| 4369 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 4370 | 4762 | ||
| 4371 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4763 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
| 4372 | le64_to_cpu(di->i_last_eb_blk), | 4764 | &last_eb_bh); |
| 4373 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
| 4374 | if (ret) { | 4765 | if (ret) { |
| 4375 | mlog_exit(ret); | 4766 | mlog_exit(ret); |
| 4376 | goto out; | 4767 | goto out; |
| @@ -4403,7 +4794,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
| 4403 | if (ctxt.c_split_covers_rec) | 4794 | if (ctxt.c_split_covers_rec) |
| 4404 | el->l_recs[split_index] = *split_rec; | 4795 | el->l_recs[split_index] = *split_rec; |
| 4405 | else | 4796 | else |
| 4406 | ret = ocfs2_split_and_insert(inode, handle, path, di_bh, | 4797 | ret = ocfs2_split_and_insert(inode, handle, path, et, |
| 4407 | &last_eb_bh, split_index, | 4798 | &last_eb_bh, split_index, |
| 4408 | split_rec, meta_ac); | 4799 | split_rec, meta_ac); |
| 4409 | if (ret) | 4800 | if (ret) |
| @@ -4411,7 +4802,7 @@ static int __ocfs2_mark_extent_written(struct inode *inode, | |||
| 4411 | } else { | 4802 | } else { |
| 4412 | ret = ocfs2_try_to_merge_extent(inode, handle, path, | 4803 | ret = ocfs2_try_to_merge_extent(inode, handle, path, |
| 4413 | split_index, split_rec, | 4804 | split_index, split_rec, |
| 4414 | dealloc, &ctxt); | 4805 | dealloc, &ctxt, et); |
| 4415 | if (ret) | 4806 | if (ret) |
| 4416 | mlog_errno(ret); | 4807 | mlog_errno(ret); |
| 4417 | } | 4808 | } |
| @@ -4429,7 +4820,8 @@ out: | |||
| 4429 | * | 4820 | * |
| 4430 | * The caller is responsible for passing down meta_ac if we'll need it. | 4821 | * The caller is responsible for passing down meta_ac if we'll need it. |
| 4431 | */ | 4822 | */ |
| 4432 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 4823 | int ocfs2_mark_extent_written(struct inode *inode, |
| 4824 | struct ocfs2_extent_tree *et, | ||
| 4433 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 4825 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
| 4434 | struct ocfs2_alloc_context *meta_ac, | 4826 | struct ocfs2_alloc_context *meta_ac, |
| 4435 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 4827 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
| @@ -4455,10 +4847,14 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
| 4455 | /* | 4847 | /* |
| 4456 | * XXX: This should be fixed up so that we just re-insert the | 4848 | * XXX: This should be fixed up so that we just re-insert the |
| 4457 | * next extent records. | 4849 | * next extent records. |
| 4850 | * | ||
| 4851 | * XXX: This is a hack on the extent tree, maybe it should be | ||
| 4852 | * an op? | ||
| 4458 | */ | 4853 | */ |
| 4459 | ocfs2_extent_map_trunc(inode, 0); | 4854 | if (et->et_ops == &ocfs2_dinode_et_ops) |
| 4855 | ocfs2_extent_map_trunc(inode, 0); | ||
| 4460 | 4856 | ||
| 4461 | left_path = ocfs2_new_inode_path(di_bh); | 4857 | left_path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
| 4462 | if (!left_path) { | 4858 | if (!left_path) { |
| 4463 | ret = -ENOMEM; | 4859 | ret = -ENOMEM; |
| 4464 | mlog_errno(ret); | 4860 | mlog_errno(ret); |
| @@ -4489,8 +4885,9 @@ int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | |||
| 4489 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; | 4885 | split_rec.e_flags = path_leaf_el(left_path)->l_recs[index].e_flags; |
| 4490 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; | 4886 | split_rec.e_flags &= ~OCFS2_EXT_UNWRITTEN; |
| 4491 | 4887 | ||
| 4492 | ret = __ocfs2_mark_extent_written(inode, di_bh, handle, left_path, | 4888 | ret = __ocfs2_mark_extent_written(inode, et, handle, left_path, |
| 4493 | index, &split_rec, meta_ac, dealloc); | 4889 | index, &split_rec, meta_ac, |
| 4890 | dealloc); | ||
| 4494 | if (ret) | 4891 | if (ret) |
| 4495 | mlog_errno(ret); | 4892 | mlog_errno(ret); |
| 4496 | 4893 | ||
| @@ -4499,13 +4896,12 @@ out: | |||
| 4499 | return ret; | 4896 | return ret; |
| 4500 | } | 4897 | } |
| 4501 | 4898 | ||
| 4502 | static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | 4899 | static int ocfs2_split_tree(struct inode *inode, struct ocfs2_extent_tree *et, |
| 4503 | handle_t *handle, struct ocfs2_path *path, | 4900 | handle_t *handle, struct ocfs2_path *path, |
| 4504 | int index, u32 new_range, | 4901 | int index, u32 new_range, |
| 4505 | struct ocfs2_alloc_context *meta_ac) | 4902 | struct ocfs2_alloc_context *meta_ac) |
| 4506 | { | 4903 | { |
| 4507 | int ret, depth, credits = handle->h_buffer_credits; | 4904 | int ret, depth, credits = handle->h_buffer_credits; |
| 4508 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 4509 | struct buffer_head *last_eb_bh = NULL; | 4905 | struct buffer_head *last_eb_bh = NULL; |
| 4510 | struct ocfs2_extent_block *eb; | 4906 | struct ocfs2_extent_block *eb; |
| 4511 | struct ocfs2_extent_list *rightmost_el, *el; | 4907 | struct ocfs2_extent_list *rightmost_el, *el; |
| @@ -4522,9 +4918,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
| 4522 | 4918 | ||
| 4523 | depth = path->p_tree_depth; | 4919 | depth = path->p_tree_depth; |
| 4524 | if (depth > 0) { | 4920 | if (depth > 0) { |
| 4525 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 4921 | ret = ocfs2_read_block(inode, ocfs2_et_get_last_eb_blk(et), |
| 4526 | le64_to_cpu(di->i_last_eb_blk), | 4922 | &last_eb_bh); |
| 4527 | &last_eb_bh, OCFS2_BH_CACHED, inode); | ||
| 4528 | if (ret < 0) { | 4923 | if (ret < 0) { |
| 4529 | mlog_errno(ret); | 4924 | mlog_errno(ret); |
| 4530 | goto out; | 4925 | goto out; |
| @@ -4535,7 +4930,8 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
| 4535 | } else | 4930 | } else |
| 4536 | rightmost_el = path_leaf_el(path); | 4931 | rightmost_el = path_leaf_el(path); |
| 4537 | 4932 | ||
| 4538 | credits += path->p_tree_depth + ocfs2_extend_meta_needed(di); | 4933 | credits += path->p_tree_depth + |
| 4934 | ocfs2_extend_meta_needed(et->et_root_el); | ||
| 4539 | ret = ocfs2_extend_trans(handle, credits); | 4935 | ret = ocfs2_extend_trans(handle, credits); |
| 4540 | if (ret) { | 4936 | if (ret) { |
| 4541 | mlog_errno(ret); | 4937 | mlog_errno(ret); |
| @@ -4544,7 +4940,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
| 4544 | 4940 | ||
| 4545 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == | 4941 | if (le16_to_cpu(rightmost_el->l_next_free_rec) == |
| 4546 | le16_to_cpu(rightmost_el->l_count)) { | 4942 | le16_to_cpu(rightmost_el->l_count)) { |
| 4547 | ret = ocfs2_grow_tree(inode, handle, di_bh, &depth, &last_eb_bh, | 4943 | ret = ocfs2_grow_tree(inode, handle, et, &depth, &last_eb_bh, |
| 4548 | meta_ac); | 4944 | meta_ac); |
| 4549 | if (ret) { | 4945 | if (ret) { |
| 4550 | mlog_errno(ret); | 4946 | mlog_errno(ret); |
| @@ -4558,7 +4954,7 @@ static int ocfs2_split_tree(struct inode *inode, struct buffer_head *di_bh, | |||
| 4558 | insert.ins_split = SPLIT_RIGHT; | 4954 | insert.ins_split = SPLIT_RIGHT; |
| 4559 | insert.ins_tree_depth = depth; | 4955 | insert.ins_tree_depth = depth; |
| 4560 | 4956 | ||
| 4561 | ret = ocfs2_do_insert_extent(inode, handle, di_bh, &split_rec, &insert); | 4957 | ret = ocfs2_do_insert_extent(inode, handle, et, &split_rec, &insert); |
| 4562 | if (ret) | 4958 | if (ret) |
| 4563 | mlog_errno(ret); | 4959 | mlog_errno(ret); |
| 4564 | 4960 | ||
| @@ -4570,7 +4966,8 @@ out: | |||
| 4570 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | 4966 | static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, |
| 4571 | struct ocfs2_path *path, int index, | 4967 | struct ocfs2_path *path, int index, |
| 4572 | struct ocfs2_cached_dealloc_ctxt *dealloc, | 4968 | struct ocfs2_cached_dealloc_ctxt *dealloc, |
| 4573 | u32 cpos, u32 len) | 4969 | u32 cpos, u32 len, |
| 4970 | struct ocfs2_extent_tree *et) | ||
| 4574 | { | 4971 | { |
| 4575 | int ret; | 4972 | int ret; |
| 4576 | u32 left_cpos, rec_range, trunc_range; | 4973 | u32 left_cpos, rec_range, trunc_range; |
| @@ -4582,7 +4979,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
| 4582 | struct ocfs2_extent_block *eb; | 4979 | struct ocfs2_extent_block *eb; |
| 4583 | 4980 | ||
| 4584 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { | 4981 | if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) { |
| 4585 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 4982 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
| 4586 | if (ret) { | 4983 | if (ret) { |
| 4587 | mlog_errno(ret); | 4984 | mlog_errno(ret); |
| 4588 | goto out; | 4985 | goto out; |
| @@ -4713,7 +5110,7 @@ static int ocfs2_truncate_rec(struct inode *inode, handle_t *handle, | |||
| 4713 | 5110 | ||
| 4714 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); | 5111 | ocfs2_journal_dirty(handle, path_leaf_bh(path)); |
| 4715 | 5112 | ||
| 4716 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc); | 5113 | ret = ocfs2_rotate_tree_left(inode, handle, path, dealloc, et); |
| 4717 | if (ret) { | 5114 | if (ret) { |
| 4718 | mlog_errno(ret); | 5115 | mlog_errno(ret); |
| 4719 | goto out; | 5116 | goto out; |
| @@ -4724,7 +5121,8 @@ out: | |||
| 4724 | return ret; | 5121 | return ret; |
| 4725 | } | 5122 | } |
| 4726 | 5123 | ||
| 4727 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 5124 | int ocfs2_remove_extent(struct inode *inode, |
| 5125 | struct ocfs2_extent_tree *et, | ||
| 4728 | u32 cpos, u32 len, handle_t *handle, | 5126 | u32 cpos, u32 len, handle_t *handle, |
| 4729 | struct ocfs2_alloc_context *meta_ac, | 5127 | struct ocfs2_alloc_context *meta_ac, |
| 4730 | struct ocfs2_cached_dealloc_ctxt *dealloc) | 5128 | struct ocfs2_cached_dealloc_ctxt *dealloc) |
| @@ -4733,11 +5131,11 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
| 4733 | u32 rec_range, trunc_range; | 5131 | u32 rec_range, trunc_range; |
| 4734 | struct ocfs2_extent_rec *rec; | 5132 | struct ocfs2_extent_rec *rec; |
| 4735 | struct ocfs2_extent_list *el; | 5133 | struct ocfs2_extent_list *el; |
| 4736 | struct ocfs2_path *path; | 5134 | struct ocfs2_path *path = NULL; |
| 4737 | 5135 | ||
| 4738 | ocfs2_extent_map_trunc(inode, 0); | 5136 | ocfs2_extent_map_trunc(inode, 0); |
| 4739 | 5137 | ||
| 4740 | path = ocfs2_new_inode_path(di_bh); | 5138 | path = ocfs2_new_path(et->et_root_bh, et->et_root_el); |
| 4741 | if (!path) { | 5139 | if (!path) { |
| 4742 | ret = -ENOMEM; | 5140 | ret = -ENOMEM; |
| 4743 | mlog_errno(ret); | 5141 | mlog_errno(ret); |
| @@ -4790,13 +5188,13 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
| 4790 | 5188 | ||
| 4791 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { | 5189 | if (le32_to_cpu(rec->e_cpos) == cpos || rec_range == trunc_range) { |
| 4792 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5190 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
| 4793 | cpos, len); | 5191 | cpos, len, et); |
| 4794 | if (ret) { | 5192 | if (ret) { |
| 4795 | mlog_errno(ret); | 5193 | mlog_errno(ret); |
| 4796 | goto out; | 5194 | goto out; |
| 4797 | } | 5195 | } |
| 4798 | } else { | 5196 | } else { |
| 4799 | ret = ocfs2_split_tree(inode, di_bh, handle, path, index, | 5197 | ret = ocfs2_split_tree(inode, et, handle, path, index, |
| 4800 | trunc_range, meta_ac); | 5198 | trunc_range, meta_ac); |
| 4801 | if (ret) { | 5199 | if (ret) { |
| 4802 | mlog_errno(ret); | 5200 | mlog_errno(ret); |
| @@ -4845,7 +5243,7 @@ int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | |||
| 4845 | } | 5243 | } |
| 4846 | 5244 | ||
| 4847 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, | 5245 | ret = ocfs2_truncate_rec(inode, handle, path, index, dealloc, |
| 4848 | cpos, len); | 5246 | cpos, len, et); |
| 4849 | if (ret) { | 5247 | if (ret) { |
| 4850 | mlog_errno(ret); | 5248 | mlog_errno(ret); |
| 4851 | goto out; | 5249 | goto out; |
| @@ -5188,8 +5586,7 @@ static int ocfs2_get_truncate_log_info(struct ocfs2_super *osb, | |||
| 5188 | goto bail; | 5586 | goto bail; |
| 5189 | } | 5587 | } |
| 5190 | 5588 | ||
| 5191 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 5589 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
| 5192 | OCFS2_BH_CACHED, inode); | ||
| 5193 | if (status < 0) { | 5590 | if (status < 0) { |
| 5194 | iput(inode); | 5591 | iput(inode); |
| 5195 | mlog_errno(status); | 5592 | mlog_errno(status); |
| @@ -5264,8 +5661,7 @@ int ocfs2_begin_truncate_log_recovery(struct ocfs2_super *osb, | |||
| 5264 | bail: | 5661 | bail: |
| 5265 | if (tl_inode) | 5662 | if (tl_inode) |
| 5266 | iput(tl_inode); | 5663 | iput(tl_inode); |
| 5267 | if (tl_bh) | 5664 | brelse(tl_bh); |
| 5268 | brelse(tl_bh); | ||
| 5269 | 5665 | ||
| 5270 | if (status < 0 && (*tl_copy)) { | 5666 | if (status < 0 && (*tl_copy)) { |
| 5271 | kfree(*tl_copy); | 5667 | kfree(*tl_copy); |
| @@ -6008,20 +6404,13 @@ bail: | |||
| 6008 | return status; | 6404 | return status; |
| 6009 | } | 6405 | } |
| 6010 | 6406 | ||
| 6011 | static int ocfs2_writeback_zero_func(handle_t *handle, struct buffer_head *bh) | 6407 | static int ocfs2_zero_func(handle_t *handle, struct buffer_head *bh) |
| 6012 | { | 6408 | { |
| 6013 | set_buffer_uptodate(bh); | 6409 | set_buffer_uptodate(bh); |
| 6014 | mark_buffer_dirty(bh); | 6410 | mark_buffer_dirty(bh); |
| 6015 | return 0; | 6411 | return 0; |
| 6016 | } | 6412 | } |
| 6017 | 6413 | ||
| 6018 | static int ocfs2_ordered_zero_func(handle_t *handle, struct buffer_head *bh) | ||
| 6019 | { | ||
| 6020 | set_buffer_uptodate(bh); | ||
| 6021 | mark_buffer_dirty(bh); | ||
| 6022 | return ocfs2_journal_dirty_data(handle, bh); | ||
| 6023 | } | ||
| 6024 | |||
| 6025 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | 6414 | static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, |
| 6026 | unsigned int from, unsigned int to, | 6415 | unsigned int from, unsigned int to, |
| 6027 | struct page *page, int zero, u64 *phys) | 6416 | struct page *page, int zero, u64 *phys) |
| @@ -6040,17 +6429,18 @@ static void ocfs2_map_and_dirty_page(struct inode *inode, handle_t *handle, | |||
| 6040 | * here if they aren't - ocfs2_map_page_blocks() | 6429 | * here if they aren't - ocfs2_map_page_blocks() |
| 6041 | * might've skipped some | 6430 | * might've skipped some |
| 6042 | */ | 6431 | */ |
| 6043 | if (ocfs2_should_order_data(inode)) { | 6432 | ret = walk_page_buffers(handle, page_buffers(page), |
| 6044 | ret = walk_page_buffers(handle, | 6433 | from, to, &partial, |
| 6045 | page_buffers(page), | 6434 | ocfs2_zero_func); |
| 6046 | from, to, &partial, | 6435 | if (ret < 0) |
| 6047 | ocfs2_ordered_zero_func); | 6436 | mlog_errno(ret); |
| 6048 | if (ret < 0) | 6437 | else if (ocfs2_should_order_data(inode)) { |
| 6049 | mlog_errno(ret); | 6438 | ret = ocfs2_jbd2_file_inode(handle, inode); |
| 6050 | } else { | 6439 | #ifdef CONFIG_OCFS2_COMPAT_JBD |
| 6051 | ret = walk_page_buffers(handle, page_buffers(page), | 6440 | ret = walk_page_buffers(handle, page_buffers(page), |
| 6052 | from, to, &partial, | 6441 | from, to, &partial, |
| 6053 | ocfs2_writeback_zero_func); | 6442 | ocfs2_journal_dirty_data); |
| 6443 | #endif | ||
| 6054 | if (ret < 0) | 6444 | if (ret < 0) |
| 6055 | mlog_errno(ret); | 6445 | mlog_errno(ret); |
| 6056 | } | 6446 | } |
| @@ -6215,20 +6605,29 @@ out: | |||
| 6215 | return ret; | 6605 | return ret; |
| 6216 | } | 6606 | } |
| 6217 | 6607 | ||
| 6218 | static void ocfs2_zero_dinode_id2(struct inode *inode, struct ocfs2_dinode *di) | 6608 | static void ocfs2_zero_dinode_id2_with_xattr(struct inode *inode, |
| 6609 | struct ocfs2_dinode *di) | ||
| 6219 | { | 6610 | { |
| 6220 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; | 6611 | unsigned int blocksize = 1 << inode->i_sb->s_blocksize_bits; |
| 6612 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
| 6221 | 6613 | ||
| 6222 | memset(&di->id2, 0, blocksize - offsetof(struct ocfs2_dinode, id2)); | 6614 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) |
| 6615 | memset(&di->id2, 0, blocksize - | ||
| 6616 | offsetof(struct ocfs2_dinode, id2) - | ||
| 6617 | xattrsize); | ||
| 6618 | else | ||
| 6619 | memset(&di->id2, 0, blocksize - | ||
| 6620 | offsetof(struct ocfs2_dinode, id2)); | ||
| 6223 | } | 6621 | } |
| 6224 | 6622 | ||
| 6225 | void ocfs2_dinode_new_extent_list(struct inode *inode, | 6623 | void ocfs2_dinode_new_extent_list(struct inode *inode, |
| 6226 | struct ocfs2_dinode *di) | 6624 | struct ocfs2_dinode *di) |
| 6227 | { | 6625 | { |
| 6228 | ocfs2_zero_dinode_id2(inode, di); | 6626 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
| 6229 | di->id2.i_list.l_tree_depth = 0; | 6627 | di->id2.i_list.l_tree_depth = 0; |
| 6230 | di->id2.i_list.l_next_free_rec = 0; | 6628 | di->id2.i_list.l_next_free_rec = 0; |
| 6231 | di->id2.i_list.l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(inode->i_sb)); | 6629 | di->id2.i_list.l_count = cpu_to_le16( |
| 6630 | ocfs2_extent_recs_per_inode_with_xattr(inode->i_sb, di)); | ||
| 6232 | } | 6631 | } |
| 6233 | 6632 | ||
| 6234 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | 6633 | void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) |
| @@ -6245,9 +6644,10 @@ void ocfs2_set_inode_data_inline(struct inode *inode, struct ocfs2_dinode *di) | |||
| 6245 | * We clear the entire i_data structure here so that all | 6644 | * We clear the entire i_data structure here so that all |
| 6246 | * fields can be properly initialized. | 6645 | * fields can be properly initialized. |
| 6247 | */ | 6646 | */ |
| 6248 | ocfs2_zero_dinode_id2(inode, di); | 6647 | ocfs2_zero_dinode_id2_with_xattr(inode, di); |
| 6249 | 6648 | ||
| 6250 | idata->id_count = cpu_to_le16(ocfs2_max_inline_data(inode->i_sb)); | 6649 | idata->id_count = cpu_to_le16( |
| 6650 | ocfs2_max_inline_data_with_xattr(inode->i_sb, di)); | ||
| 6251 | } | 6651 | } |
| 6252 | 6652 | ||
| 6253 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, | 6653 | int ocfs2_convert_inline_data_to_extents(struct inode *inode, |
| @@ -6262,6 +6662,7 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6262 | struct ocfs2_alloc_context *data_ac = NULL; | 6662 | struct ocfs2_alloc_context *data_ac = NULL; |
| 6263 | struct page **pages = NULL; | 6663 | struct page **pages = NULL; |
| 6264 | loff_t end = osb->s_clustersize; | 6664 | loff_t end = osb->s_clustersize; |
| 6665 | struct ocfs2_extent_tree et; | ||
| 6265 | 6666 | ||
| 6266 | has_data = i_size_read(inode) ? 1 : 0; | 6667 | has_data = i_size_read(inode) ? 1 : 0; |
| 6267 | 6668 | ||
| @@ -6361,7 +6762,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode, | |||
| 6361 | * this proves to be false, we could always re-build | 6762 | * this proves to be false, we could always re-build |
| 6362 | * the in-inode data from our pages. | 6763 | * the in-inode data from our pages. |
| 6363 | */ | 6764 | */ |
| 6364 | ret = ocfs2_insert_extent(osb, handle, inode, di_bh, | 6765 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
| 6766 | ret = ocfs2_insert_extent(osb, handle, inode, &et, | ||
| 6365 | 0, block, 1, 0, NULL); | 6767 | 0, block, 1, 0, NULL); |
| 6366 | if (ret) { | 6768 | if (ret) { |
| 6367 | mlog_errno(ret); | 6769 | mlog_errno(ret); |
| @@ -6404,13 +6806,14 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, | |||
| 6404 | handle_t *handle = NULL; | 6806 | handle_t *handle = NULL; |
| 6405 | struct inode *tl_inode = osb->osb_tl_inode; | 6807 | struct inode *tl_inode = osb->osb_tl_inode; |
| 6406 | struct ocfs2_path *path = NULL; | 6808 | struct ocfs2_path *path = NULL; |
| 6809 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data; | ||
| 6407 | 6810 | ||
| 6408 | mlog_entry_void(); | 6811 | mlog_entry_void(); |
| 6409 | 6812 | ||
| 6410 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, | 6813 | new_highest_cpos = ocfs2_clusters_for_bytes(osb->sb, |
| 6411 | i_size_read(inode)); | 6814 | i_size_read(inode)); |
| 6412 | 6815 | ||
| 6413 | path = ocfs2_new_inode_path(fe_bh); | 6816 | path = ocfs2_new_path(fe_bh, &di->id2.i_list); |
| 6414 | if (!path) { | 6817 | if (!path) { |
| 6415 | status = -ENOMEM; | 6818 | status = -ENOMEM; |
| 6416 | mlog_errno(status); | 6819 | mlog_errno(status); |
| @@ -6581,8 +6984,8 @@ int ocfs2_prepare_truncate(struct ocfs2_super *osb, | |||
| 6581 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); | 6984 | ocfs2_init_dealloc_ctxt(&(*tc)->tc_dealloc); |
| 6582 | 6985 | ||
| 6583 | if (fe->id2.i_list.l_tree_depth) { | 6986 | if (fe->id2.i_list.l_tree_depth) { |
| 6584 | status = ocfs2_read_block(osb, le64_to_cpu(fe->i_last_eb_blk), | 6987 | status = ocfs2_read_block(inode, le64_to_cpu(fe->i_last_eb_blk), |
| 6585 | &last_eb_bh, OCFS2_BH_CACHED, inode); | 6988 | &last_eb_bh); |
| 6586 | if (status < 0) { | 6989 | if (status < 0) { |
| 6587 | mlog_errno(status); | 6990 | mlog_errno(status); |
| 6588 | goto bail; | 6991 | goto bail; |
| @@ -6695,8 +7098,7 @@ static void ocfs2_free_truncate_context(struct ocfs2_truncate_context *tc) | |||
| 6695 | mlog(ML_NOTICE, | 7098 | mlog(ML_NOTICE, |
| 6696 | "Truncate completion has non-empty dealloc context\n"); | 7099 | "Truncate completion has non-empty dealloc context\n"); |
| 6697 | 7100 | ||
| 6698 | if (tc->tc_last_eb_bh) | 7101 | brelse(tc->tc_last_eb_bh); |
| 6699 | brelse(tc->tc_last_eb_bh); | ||
| 6700 | 7102 | ||
| 6701 | kfree(tc); | 7103 | kfree(tc); |
| 6702 | } | 7104 | } |
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index 42ff94bd8011..70257c84cfbe 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h | |||
| @@ -26,30 +26,102 @@ | |||
| 26 | #ifndef OCFS2_ALLOC_H | 26 | #ifndef OCFS2_ALLOC_H |
| 27 | #define OCFS2_ALLOC_H | 27 | #define OCFS2_ALLOC_H |
| 28 | 28 | ||
| 29 | |||
| 30 | /* | ||
| 31 | * For xattr tree leaf, we limit the leaf byte size to be 64K. | ||
| 32 | */ | ||
| 33 | #define OCFS2_MAX_XATTR_TREE_LEAF_SIZE 65536 | ||
| 34 | |||
| 35 | /* | ||
| 36 | * ocfs2_extent_tree and ocfs2_extent_tree_operations are used to abstract | ||
| 37 | * the b-tree operations in ocfs2. Now all the b-tree operations are not | ||
| 38 | * limited to ocfs2_dinode only. Any data which need to allocate clusters | ||
| 39 | * to store can use b-tree. And it only needs to implement its ocfs2_extent_tree | ||
| 40 | * and operation. | ||
| 41 | * | ||
| 42 | * ocfs2_extent_tree becomes the first-class object for extent tree | ||
| 43 | * manipulation. Callers of the alloc.c code need to fill it via one of | ||
| 44 | * the ocfs2_init_*_extent_tree() operations below. | ||
| 45 | * | ||
| 46 | * ocfs2_extent_tree contains info for the root of the b-tree, it must have a | ||
| 47 | * root ocfs2_extent_list and a root_bh so that they can be used in the b-tree | ||
| 48 | * functions. | ||
| 49 | * ocfs2_extent_tree_operations abstract the normal operations we do for | ||
| 50 | * the root of extent b-tree. | ||
| 51 | */ | ||
| 52 | struct ocfs2_extent_tree_operations; | ||
| 53 | struct ocfs2_extent_tree { | ||
| 54 | struct ocfs2_extent_tree_operations *et_ops; | ||
| 55 | struct buffer_head *et_root_bh; | ||
| 56 | struct ocfs2_extent_list *et_root_el; | ||
| 57 | void *et_object; | ||
| 58 | unsigned int et_max_leaf_clusters; | ||
| 59 | }; | ||
| 60 | |||
| 61 | /* | ||
| 62 | * ocfs2_init_*_extent_tree() will fill an ocfs2_extent_tree from the | ||
| 63 | * specified object buffer. | ||
| 64 | */ | ||
| 65 | void ocfs2_init_dinode_extent_tree(struct ocfs2_extent_tree *et, | ||
| 66 | struct inode *inode, | ||
| 67 | struct buffer_head *bh); | ||
| 68 | void ocfs2_init_xattr_tree_extent_tree(struct ocfs2_extent_tree *et, | ||
| 69 | struct inode *inode, | ||
| 70 | struct buffer_head *bh); | ||
| 71 | void ocfs2_init_xattr_value_extent_tree(struct ocfs2_extent_tree *et, | ||
| 72 | struct inode *inode, | ||
| 73 | struct buffer_head *bh, | ||
| 74 | struct ocfs2_xattr_value_root *xv); | ||
| 75 | |||
| 29 | struct ocfs2_alloc_context; | 76 | struct ocfs2_alloc_context; |
| 30 | int ocfs2_insert_extent(struct ocfs2_super *osb, | 77 | int ocfs2_insert_extent(struct ocfs2_super *osb, |
| 31 | handle_t *handle, | 78 | handle_t *handle, |
| 32 | struct inode *inode, | 79 | struct inode *inode, |
| 33 | struct buffer_head *fe_bh, | 80 | struct ocfs2_extent_tree *et, |
| 34 | u32 cpos, | 81 | u32 cpos, |
| 35 | u64 start_blk, | 82 | u64 start_blk, |
| 36 | u32 new_clusters, | 83 | u32 new_clusters, |
| 37 | u8 flags, | 84 | u8 flags, |
| 38 | struct ocfs2_alloc_context *meta_ac); | 85 | struct ocfs2_alloc_context *meta_ac); |
| 86 | |||
| 87 | enum ocfs2_alloc_restarted { | ||
| 88 | RESTART_NONE = 0, | ||
| 89 | RESTART_TRANS, | ||
| 90 | RESTART_META | ||
| 91 | }; | ||
| 92 | int ocfs2_add_clusters_in_btree(struct ocfs2_super *osb, | ||
| 93 | struct inode *inode, | ||
| 94 | u32 *logical_offset, | ||
| 95 | u32 clusters_to_add, | ||
| 96 | int mark_unwritten, | ||
| 97 | struct ocfs2_extent_tree *et, | ||
| 98 | handle_t *handle, | ||
| 99 | struct ocfs2_alloc_context *data_ac, | ||
| 100 | struct ocfs2_alloc_context *meta_ac, | ||
| 101 | enum ocfs2_alloc_restarted *reason_ret); | ||
| 39 | struct ocfs2_cached_dealloc_ctxt; | 102 | struct ocfs2_cached_dealloc_ctxt; |
| 40 | int ocfs2_mark_extent_written(struct inode *inode, struct buffer_head *di_bh, | 103 | int ocfs2_mark_extent_written(struct inode *inode, |
| 104 | struct ocfs2_extent_tree *et, | ||
| 41 | handle_t *handle, u32 cpos, u32 len, u32 phys, | 105 | handle_t *handle, u32 cpos, u32 len, u32 phys, |
| 42 | struct ocfs2_alloc_context *meta_ac, | 106 | struct ocfs2_alloc_context *meta_ac, |
| 43 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 107 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
| 44 | int ocfs2_remove_extent(struct inode *inode, struct buffer_head *di_bh, | 108 | int ocfs2_remove_extent(struct inode *inode, |
| 109 | struct ocfs2_extent_tree *et, | ||
| 45 | u32 cpos, u32 len, handle_t *handle, | 110 | u32 cpos, u32 len, handle_t *handle, |
| 46 | struct ocfs2_alloc_context *meta_ac, | 111 | struct ocfs2_alloc_context *meta_ac, |
| 47 | struct ocfs2_cached_dealloc_ctxt *dealloc); | 112 | struct ocfs2_cached_dealloc_ctxt *dealloc); |
| 48 | int ocfs2_num_free_extents(struct ocfs2_super *osb, | 113 | int ocfs2_num_free_extents(struct ocfs2_super *osb, |
| 49 | struct inode *inode, | 114 | struct inode *inode, |
| 50 | struct ocfs2_dinode *fe); | 115 | struct ocfs2_extent_tree *et); |
| 51 | /* how many new metadata chunks would an allocation need at maximum? */ | 116 | |
| 52 | static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | 117 | /* |
| 118 | * how many new metadata chunks would an allocation need at maximum? | ||
| 119 | * | ||
| 120 | * Please note that the caller must make sure that root_el is the root | ||
| 121 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
| 122 | * the result may be wrong. | ||
| 123 | */ | ||
| 124 | static inline int ocfs2_extend_meta_needed(struct ocfs2_extent_list *root_el) | ||
| 53 | { | 125 | { |
| 54 | /* | 126 | /* |
| 55 | * Rather than do all the work of determining how much we need | 127 | * Rather than do all the work of determining how much we need |
| @@ -59,7 +131,7 @@ static inline int ocfs2_extend_meta_needed(struct ocfs2_dinode *fe) | |||
| 59 | * new tree_depth==0 extent_block, and one block at the new | 131 | * new tree_depth==0 extent_block, and one block at the new |
| 60 | * top-of-the tree. | 132 | * top-of-the tree. |
| 61 | */ | 133 | */ |
| 62 | return le16_to_cpu(fe->id2.i_list.l_tree_depth) + 2; | 134 | return le16_to_cpu(root_el->l_tree_depth) + 2; |
| 63 | } | 135 | } |
| 64 | 136 | ||
| 65 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); | 137 | void ocfs2_dinode_new_extent_list(struct inode *inode, struct ocfs2_dinode *di); |
| @@ -146,4 +218,13 @@ static inline unsigned int ocfs2_rec_clusters(struct ocfs2_extent_list *el, | |||
| 146 | return le16_to_cpu(rec->e_leaf_clusters); | 218 | return le16_to_cpu(rec->e_leaf_clusters); |
| 147 | } | 219 | } |
| 148 | 220 | ||
| 221 | /* | ||
| 222 | * This is only valid for leaf nodes, which are the only ones that can | ||
| 223 | * have empty extents anyway. | ||
| 224 | */ | ||
| 225 | static inline int ocfs2_is_empty_extent(struct ocfs2_extent_rec *rec) | ||
| 226 | { | ||
| 227 | return !rec->e_leaf_clusters; | ||
| 228 | } | ||
| 229 | |||
| 149 | #endif /* OCFS2_ALLOC_H */ | 230 | #endif /* OCFS2_ALLOC_H */ |
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index a53da1466277..c22543b33420 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c | |||
| @@ -68,9 +68,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
| 68 | goto bail; | 68 | goto bail; |
| 69 | } | 69 | } |
| 70 | 70 | ||
| 71 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 71 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
| 72 | OCFS2_I(inode)->ip_blkno, | ||
| 73 | &bh, OCFS2_BH_CACHED, inode); | ||
| 74 | if (status < 0) { | 72 | if (status < 0) { |
| 75 | mlog_errno(status); | 73 | mlog_errno(status); |
| 76 | goto bail; | 74 | goto bail; |
| @@ -128,8 +126,7 @@ static int ocfs2_symlink_get_block(struct inode *inode, sector_t iblock, | |||
| 128 | err = 0; | 126 | err = 0; |
| 129 | 127 | ||
| 130 | bail: | 128 | bail: |
| 131 | if (bh) | 129 | brelse(bh); |
| 132 | brelse(bh); | ||
| 133 | 130 | ||
| 134 | mlog_exit(err); | 131 | mlog_exit(err); |
| 135 | return err; | 132 | return err; |
| @@ -261,13 +258,11 @@ static int ocfs2_readpage_inline(struct inode *inode, struct page *page) | |||
| 261 | { | 258 | { |
| 262 | int ret; | 259 | int ret; |
| 263 | struct buffer_head *di_bh = NULL; | 260 | struct buffer_head *di_bh = NULL; |
| 264 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 265 | 261 | ||
| 266 | BUG_ON(!PageLocked(page)); | 262 | BUG_ON(!PageLocked(page)); |
| 267 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); | 263 | BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)); |
| 268 | 264 | ||
| 269 | ret = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &di_bh, | 265 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
| 270 | OCFS2_BH_CACHED, inode); | ||
| 271 | if (ret) { | 266 | if (ret) { |
| 272 | mlog_errno(ret); | 267 | mlog_errno(ret); |
| 273 | goto out; | 268 | goto out; |
| @@ -485,11 +480,14 @@ handle_t *ocfs2_start_walk_page_trans(struct inode *inode, | |||
| 485 | } | 480 | } |
| 486 | 481 | ||
| 487 | if (ocfs2_should_order_data(inode)) { | 482 | if (ocfs2_should_order_data(inode)) { |
| 483 | ret = ocfs2_jbd2_file_inode(handle, inode); | ||
| 484 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
| 488 | ret = walk_page_buffers(handle, | 485 | ret = walk_page_buffers(handle, |
| 489 | page_buffers(page), | 486 | page_buffers(page), |
| 490 | from, to, NULL, | 487 | from, to, NULL, |
| 491 | ocfs2_journal_dirty_data); | 488 | ocfs2_journal_dirty_data); |
| 492 | if (ret < 0) | 489 | #endif |
| 490 | if (ret < 0) | ||
| 493 | mlog_errno(ret); | 491 | mlog_errno(ret); |
| 494 | } | 492 | } |
| 495 | out: | 493 | out: |
| @@ -669,7 +667,7 @@ static void ocfs2_invalidatepage(struct page *page, unsigned long offset) | |||
| 669 | { | 667 | { |
| 670 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; | 668 | journal_t *journal = OCFS2_SB(page->mapping->host->i_sb)->journal->j_journal; |
| 671 | 669 | ||
| 672 | journal_invalidatepage(journal, page, offset); | 670 | jbd2_journal_invalidatepage(journal, page, offset); |
| 673 | } | 671 | } |
| 674 | 672 | ||
| 675 | static int ocfs2_releasepage(struct page *page, gfp_t wait) | 673 | static int ocfs2_releasepage(struct page *page, gfp_t wait) |
| @@ -678,7 +676,7 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) | |||
| 678 | 676 | ||
| 679 | if (!page_has_buffers(page)) | 677 | if (!page_has_buffers(page)) |
| 680 | return 0; | 678 | return 0; |
| 681 | return journal_try_to_free_buffers(journal, page, wait); | 679 | return jbd2_journal_try_to_free_buffers(journal, page, wait); |
| 682 | } | 680 | } |
| 683 | 681 | ||
| 684 | static ssize_t ocfs2_direct_IO(int rw, | 682 | static ssize_t ocfs2_direct_IO(int rw, |
| @@ -1074,11 +1072,15 @@ static void ocfs2_write_failure(struct inode *inode, | |||
| 1074 | tmppage = wc->w_pages[i]; | 1072 | tmppage = wc->w_pages[i]; |
| 1075 | 1073 | ||
| 1076 | if (page_has_buffers(tmppage)) { | 1074 | if (page_has_buffers(tmppage)) { |
| 1077 | if (ocfs2_should_order_data(inode)) | 1075 | if (ocfs2_should_order_data(inode)) { |
| 1076 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
| 1077 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
| 1078 | walk_page_buffers(wc->w_handle, | 1078 | walk_page_buffers(wc->w_handle, |
| 1079 | page_buffers(tmppage), | 1079 | page_buffers(tmppage), |
| 1080 | from, to, NULL, | 1080 | from, to, NULL, |
| 1081 | ocfs2_journal_dirty_data); | 1081 | ocfs2_journal_dirty_data); |
| 1082 | #endif | ||
| 1083 | } | ||
| 1082 | 1084 | ||
| 1083 | block_commit_write(tmppage, from, to); | 1085 | block_commit_write(tmppage, from, to); |
| 1084 | } | 1086 | } |
| @@ -1242,6 +1244,7 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
| 1242 | int ret, i, new, should_zero = 0; | 1244 | int ret, i, new, should_zero = 0; |
| 1243 | u64 v_blkno, p_blkno; | 1245 | u64 v_blkno, p_blkno; |
| 1244 | struct inode *inode = mapping->host; | 1246 | struct inode *inode = mapping->host; |
| 1247 | struct ocfs2_extent_tree et; | ||
| 1245 | 1248 | ||
| 1246 | new = phys == 0 ? 1 : 0; | 1249 | new = phys == 0 ? 1 : 0; |
| 1247 | if (new || unwritten) | 1250 | if (new || unwritten) |
| @@ -1255,10 +1258,10 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
| 1255 | * any additional semaphores or cluster locks. | 1258 | * any additional semaphores or cluster locks. |
| 1256 | */ | 1259 | */ |
| 1257 | tmp_pos = cpos; | 1260 | tmp_pos = cpos; |
| 1258 | ret = ocfs2_do_extend_allocation(OCFS2_SB(inode->i_sb), inode, | 1261 | ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode, |
| 1259 | &tmp_pos, 1, 0, wc->w_di_bh, | 1262 | &tmp_pos, 1, 0, wc->w_di_bh, |
| 1260 | wc->w_handle, data_ac, | 1263 | wc->w_handle, data_ac, |
| 1261 | meta_ac, NULL); | 1264 | meta_ac, NULL); |
| 1262 | /* | 1265 | /* |
| 1263 | * This shouldn't happen because we must have already | 1266 | * This shouldn't happen because we must have already |
| 1264 | * calculated the correct meta data allocation required. The | 1267 | * calculated the correct meta data allocation required. The |
| @@ -1276,7 +1279,8 @@ static int ocfs2_write_cluster(struct address_space *mapping, | |||
| 1276 | goto out; | 1279 | goto out; |
| 1277 | } | 1280 | } |
| 1278 | } else if (unwritten) { | 1281 | } else if (unwritten) { |
| 1279 | ret = ocfs2_mark_extent_written(inode, wc->w_di_bh, | 1282 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); |
| 1283 | ret = ocfs2_mark_extent_written(inode, &et, | ||
| 1280 | wc->w_handle, cpos, 1, phys, | 1284 | wc->w_handle, cpos, 1, phys, |
| 1281 | meta_ac, &wc->w_dealloc); | 1285 | meta_ac, &wc->w_dealloc); |
| 1282 | if (ret < 0) { | 1286 | if (ret < 0) { |
| @@ -1665,6 +1669,7 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
| 1665 | struct ocfs2_alloc_context *data_ac = NULL; | 1669 | struct ocfs2_alloc_context *data_ac = NULL; |
| 1666 | struct ocfs2_alloc_context *meta_ac = NULL; | 1670 | struct ocfs2_alloc_context *meta_ac = NULL; |
| 1667 | handle_t *handle; | 1671 | handle_t *handle; |
| 1672 | struct ocfs2_extent_tree et; | ||
| 1668 | 1673 | ||
| 1669 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); | 1674 | ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh); |
| 1670 | if (ret) { | 1675 | if (ret) { |
| @@ -1712,14 +1717,23 @@ int ocfs2_write_begin_nolock(struct address_space *mapping, | |||
| 1712 | * ocfs2_lock_allocators(). It greatly over-estimates | 1717 | * ocfs2_lock_allocators(). It greatly over-estimates |
| 1713 | * the work to be done. | 1718 | * the work to be done. |
| 1714 | */ | 1719 | */ |
| 1715 | ret = ocfs2_lock_allocators(inode, di, clusters_to_alloc, | 1720 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u," |
| 1716 | extents_to_split, &data_ac, &meta_ac); | 1721 | " clusters_to_add = %u, extents_to_split = %u\n", |
| 1722 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 1723 | (long long)i_size_read(inode), le32_to_cpu(di->i_clusters), | ||
| 1724 | clusters_to_alloc, extents_to_split); | ||
| 1725 | |||
| 1726 | ocfs2_init_dinode_extent_tree(&et, inode, wc->w_di_bh); | ||
| 1727 | ret = ocfs2_lock_allocators(inode, &et, | ||
| 1728 | clusters_to_alloc, extents_to_split, | ||
| 1729 | &data_ac, &meta_ac); | ||
| 1717 | if (ret) { | 1730 | if (ret) { |
| 1718 | mlog_errno(ret); | 1731 | mlog_errno(ret); |
| 1719 | goto out; | 1732 | goto out; |
| 1720 | } | 1733 | } |
| 1721 | 1734 | ||
| 1722 | credits = ocfs2_calc_extend_credits(inode->i_sb, di, | 1735 | credits = ocfs2_calc_extend_credits(inode->i_sb, |
| 1736 | &di->id2.i_list, | ||
| 1723 | clusters_to_alloc); | 1737 | clusters_to_alloc); |
| 1724 | 1738 | ||
| 1725 | } | 1739 | } |
| @@ -1905,11 +1919,15 @@ int ocfs2_write_end_nolock(struct address_space *mapping, | |||
| 1905 | } | 1919 | } |
| 1906 | 1920 | ||
| 1907 | if (page_has_buffers(tmppage)) { | 1921 | if (page_has_buffers(tmppage)) { |
| 1908 | if (ocfs2_should_order_data(inode)) | 1922 | if (ocfs2_should_order_data(inode)) { |
| 1923 | ocfs2_jbd2_file_inode(wc->w_handle, inode); | ||
| 1924 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
| 1909 | walk_page_buffers(wc->w_handle, | 1925 | walk_page_buffers(wc->w_handle, |
| 1910 | page_buffers(tmppage), | 1926 | page_buffers(tmppage), |
| 1911 | from, to, NULL, | 1927 | from, to, NULL, |
| 1912 | ocfs2_journal_dirty_data); | 1928 | ocfs2_journal_dirty_data); |
| 1929 | #endif | ||
| 1930 | } | ||
| 1913 | block_commit_write(tmppage, from, to); | 1931 | block_commit_write(tmppage, from, to); |
| 1914 | } | 1932 | } |
| 1915 | } | 1933 | } |
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index f136639f5b41..7e947c672469 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c | |||
| @@ -66,7 +66,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, | |||
| 66 | /* remove from dirty list before I/O. */ | 66 | /* remove from dirty list before I/O. */ |
| 67 | clear_buffer_dirty(bh); | 67 | clear_buffer_dirty(bh); |
| 68 | 68 | ||
| 69 | get_bh(bh); /* for end_buffer_write_sync() */ | 69 | get_bh(bh); /* for end_buffer_write_sync() */ |
| 70 | bh->b_end_io = end_buffer_write_sync; | 70 | bh->b_end_io = end_buffer_write_sync; |
| 71 | submit_bh(WRITE, bh); | 71 | submit_bh(WRITE, bh); |
| 72 | 72 | ||
| @@ -88,22 +88,103 @@ out: | |||
| 88 | return ret; | 88 | return ret; |
| 89 | } | 89 | } |
| 90 | 90 | ||
| 91 | int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | 91 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
| 92 | struct buffer_head *bhs[], int flags, | 92 | unsigned int nr, struct buffer_head *bhs[]) |
| 93 | struct inode *inode) | 93 | { |
| 94 | int status = 0; | ||
| 95 | unsigned int i; | ||
| 96 | struct buffer_head *bh; | ||
| 97 | |||
| 98 | if (!nr) { | ||
| 99 | mlog(ML_BH_IO, "No buffers will be read!\n"); | ||
| 100 | goto bail; | ||
| 101 | } | ||
| 102 | |||
| 103 | for (i = 0 ; i < nr ; i++) { | ||
| 104 | if (bhs[i] == NULL) { | ||
| 105 | bhs[i] = sb_getblk(osb->sb, block++); | ||
| 106 | if (bhs[i] == NULL) { | ||
| 107 | status = -EIO; | ||
| 108 | mlog_errno(status); | ||
| 109 | goto bail; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | bh = bhs[i]; | ||
| 113 | |||
| 114 | if (buffer_jbd(bh)) { | ||
| 115 | mlog(ML_ERROR, | ||
| 116 | "trying to sync read a jbd " | ||
| 117 | "managed bh (blocknr = %llu), skipping\n", | ||
| 118 | (unsigned long long)bh->b_blocknr); | ||
| 119 | continue; | ||
| 120 | } | ||
| 121 | |||
| 122 | if (buffer_dirty(bh)) { | ||
| 123 | /* This should probably be a BUG, or | ||
| 124 | * at least return an error. */ | ||
| 125 | mlog(ML_ERROR, | ||
| 126 | "trying to sync read a dirty " | ||
| 127 | "buffer! (blocknr = %llu), skipping\n", | ||
| 128 | (unsigned long long)bh->b_blocknr); | ||
| 129 | continue; | ||
| 130 | } | ||
| 131 | |||
| 132 | lock_buffer(bh); | ||
| 133 | if (buffer_jbd(bh)) { | ||
| 134 | mlog(ML_ERROR, | ||
| 135 | "block %llu had the JBD bit set " | ||
| 136 | "while I was in lock_buffer!", | ||
| 137 | (unsigned long long)bh->b_blocknr); | ||
| 138 | BUG(); | ||
| 139 | } | ||
| 140 | |||
| 141 | clear_buffer_uptodate(bh); | ||
| 142 | get_bh(bh); /* for end_buffer_read_sync() */ | ||
| 143 | bh->b_end_io = end_buffer_read_sync; | ||
| 144 | submit_bh(READ, bh); | ||
| 145 | } | ||
| 146 | |||
| 147 | for (i = nr; i > 0; i--) { | ||
| 148 | bh = bhs[i - 1]; | ||
| 149 | |||
| 150 | if (buffer_jbd(bh)) { | ||
| 151 | mlog(ML_ERROR, | ||
| 152 | "the journal got the buffer while it was " | ||
| 153 | "locked for io! (blocknr = %llu)\n", | ||
| 154 | (unsigned long long)bh->b_blocknr); | ||
| 155 | BUG(); | ||
| 156 | } | ||
| 157 | |||
| 158 | wait_on_buffer(bh); | ||
| 159 | if (!buffer_uptodate(bh)) { | ||
| 160 | /* Status won't be cleared from here on out, | ||
| 161 | * so we can safely record this and loop back | ||
| 162 | * to cleanup the other buffers. */ | ||
| 163 | status = -EIO; | ||
| 164 | put_bh(bh); | ||
| 165 | bhs[i - 1] = NULL; | ||
| 166 | } | ||
| 167 | } | ||
| 168 | |||
| 169 | bail: | ||
| 170 | return status; | ||
| 171 | } | ||
| 172 | |||
| 173 | int ocfs2_read_blocks(struct inode *inode, u64 block, int nr, | ||
| 174 | struct buffer_head *bhs[], int flags) | ||
| 94 | { | 175 | { |
| 95 | int status = 0; | 176 | int status = 0; |
| 96 | struct super_block *sb; | ||
| 97 | int i, ignore_cache = 0; | 177 | int i, ignore_cache = 0; |
| 98 | struct buffer_head *bh; | 178 | struct buffer_head *bh; |
| 99 | 179 | ||
| 100 | mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", | 180 | mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n", |
| 101 | (unsigned long long)block, nr, flags, inode); | 181 | inode, (unsigned long long)block, nr, flags); |
| 102 | 182 | ||
| 183 | BUG_ON(!inode); | ||
| 103 | BUG_ON((flags & OCFS2_BH_READAHEAD) && | 184 | BUG_ON((flags & OCFS2_BH_READAHEAD) && |
| 104 | (!inode || !(flags & OCFS2_BH_CACHED))); | 185 | (flags & OCFS2_BH_IGNORE_CACHE)); |
| 105 | 186 | ||
| 106 | if (osb == NULL || osb->sb == NULL || bhs == NULL) { | 187 | if (bhs == NULL) { |
| 107 | status = -EINVAL; | 188 | status = -EINVAL; |
| 108 | mlog_errno(status); | 189 | mlog_errno(status); |
| 109 | goto bail; | 190 | goto bail; |
| @@ -122,26 +203,19 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
| 122 | goto bail; | 203 | goto bail; |
| 123 | } | 204 | } |
| 124 | 205 | ||
| 125 | sb = osb->sb; | 206 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); |
| 126 | |||
| 127 | if (flags & OCFS2_BH_CACHED && !inode) | ||
| 128 | flags &= ~OCFS2_BH_CACHED; | ||
| 129 | |||
| 130 | if (inode) | ||
| 131 | mutex_lock(&OCFS2_I(inode)->ip_io_mutex); | ||
| 132 | for (i = 0 ; i < nr ; i++) { | 207 | for (i = 0 ; i < nr ; i++) { |
| 133 | if (bhs[i] == NULL) { | 208 | if (bhs[i] == NULL) { |
| 134 | bhs[i] = sb_getblk(sb, block++); | 209 | bhs[i] = sb_getblk(inode->i_sb, block++); |
| 135 | if (bhs[i] == NULL) { | 210 | if (bhs[i] == NULL) { |
| 136 | if (inode) | 211 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
| 137 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
| 138 | status = -EIO; | 212 | status = -EIO; |
| 139 | mlog_errno(status); | 213 | mlog_errno(status); |
| 140 | goto bail; | 214 | goto bail; |
| 141 | } | 215 | } |
| 142 | } | 216 | } |
| 143 | bh = bhs[i]; | 217 | bh = bhs[i]; |
| 144 | ignore_cache = 0; | 218 | ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); |
| 145 | 219 | ||
| 146 | /* There are three read-ahead cases here which we need to | 220 | /* There are three read-ahead cases here which we need to |
| 147 | * be concerned with. All three assume a buffer has | 221 | * be concerned with. All three assume a buffer has |
| @@ -167,26 +241,27 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
| 167 | * before our is-it-in-flight check. | 241 | * before our is-it-in-flight check. |
| 168 | */ | 242 | */ |
| 169 | 243 | ||
| 170 | if (flags & OCFS2_BH_CACHED && | 244 | if (!ignore_cache && !ocfs2_buffer_uptodate(inode, bh)) { |
| 171 | !ocfs2_buffer_uptodate(inode, bh)) { | ||
| 172 | mlog(ML_UPTODATE, | 245 | mlog(ML_UPTODATE, |
| 173 | "bh (%llu), inode %llu not uptodate\n", | 246 | "bh (%llu), inode %llu not uptodate\n", |
| 174 | (unsigned long long)bh->b_blocknr, | 247 | (unsigned long long)bh->b_blocknr, |
| 175 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 248 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| 249 | /* We're using ignore_cache here to say | ||
| 250 | * "go to disk" */ | ||
| 176 | ignore_cache = 1; | 251 | ignore_cache = 1; |
| 177 | } | 252 | } |
| 178 | 253 | ||
| 179 | /* XXX: Can we ever get this and *not* have the cached | 254 | /* XXX: Can we ever get this and *not* have the cached |
| 180 | * flag set? */ | 255 | * flag set? */ |
| 181 | if (buffer_jbd(bh)) { | 256 | if (buffer_jbd(bh)) { |
| 182 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) | 257 | if (ignore_cache) |
| 183 | mlog(ML_BH_IO, "trying to sync read a jbd " | 258 | mlog(ML_BH_IO, "trying to sync read a jbd " |
| 184 | "managed bh (blocknr = %llu)\n", | 259 | "managed bh (blocknr = %llu)\n", |
| 185 | (unsigned long long)bh->b_blocknr); | 260 | (unsigned long long)bh->b_blocknr); |
| 186 | continue; | 261 | continue; |
| 187 | } | 262 | } |
| 188 | 263 | ||
| 189 | if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { | 264 | if (ignore_cache) { |
| 190 | if (buffer_dirty(bh)) { | 265 | if (buffer_dirty(bh)) { |
| 191 | /* This should probably be a BUG, or | 266 | /* This should probably be a BUG, or |
| 192 | * at least return an error. */ | 267 | * at least return an error. */ |
| @@ -221,7 +296,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
| 221 | * previously read-ahead buffer may have | 296 | * previously read-ahead buffer may have |
| 222 | * completed I/O while we were waiting for the | 297 | * completed I/O while we were waiting for the |
| 223 | * buffer lock. */ | 298 | * buffer lock. */ |
| 224 | if ((flags & OCFS2_BH_CACHED) | 299 | if (!(flags & OCFS2_BH_IGNORE_CACHE) |
| 225 | && !(flags & OCFS2_BH_READAHEAD) | 300 | && !(flags & OCFS2_BH_READAHEAD) |
| 226 | && ocfs2_buffer_uptodate(inode, bh)) { | 301 | && ocfs2_buffer_uptodate(inode, bh)) { |
| 227 | unlock_buffer(bh); | 302 | unlock_buffer(bh); |
| @@ -265,15 +340,14 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, | |||
| 265 | /* Always set the buffer in the cache, even if it was | 340 | /* Always set the buffer in the cache, even if it was |
| 266 | * a forced read, or read-ahead which hasn't yet | 341 | * a forced read, or read-ahead which hasn't yet |
| 267 | * completed. */ | 342 | * completed. */ |
| 268 | if (inode) | 343 | ocfs2_set_buffer_uptodate(inode, bh); |
| 269 | ocfs2_set_buffer_uptodate(inode, bh); | ||
| 270 | } | 344 | } |
| 271 | if (inode) | 345 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); |
| 272 | mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); | ||
| 273 | 346 | ||
| 274 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", | 347 | mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", |
| 275 | (unsigned long long)block, nr, | 348 | (unsigned long long)block, nr, |
| 276 | (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); | 349 | ((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes", |
| 350 | flags); | ||
| 277 | 351 | ||
| 278 | bail: | 352 | bail: |
| 279 | 353 | ||
diff --git a/fs/ocfs2/buffer_head_io.h b/fs/ocfs2/buffer_head_io.h index c2e78614c3e5..75e1dcb1ade7 100644 --- a/fs/ocfs2/buffer_head_io.h +++ b/fs/ocfs2/buffer_head_io.h | |||
| @@ -31,31 +31,29 @@ | |||
| 31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, | 31 | void ocfs2_end_buffer_io_sync(struct buffer_head *bh, |
| 32 | int uptodate); | 32 | int uptodate); |
| 33 | 33 | ||
| 34 | static inline int ocfs2_read_block(struct ocfs2_super *osb, | 34 | static inline int ocfs2_read_block(struct inode *inode, |
| 35 | u64 off, | 35 | u64 off, |
| 36 | struct buffer_head **bh, | 36 | struct buffer_head **bh); |
| 37 | int flags, | ||
| 38 | struct inode *inode); | ||
| 39 | 37 | ||
| 40 | int ocfs2_write_block(struct ocfs2_super *osb, | 38 | int ocfs2_write_block(struct ocfs2_super *osb, |
| 41 | struct buffer_head *bh, | 39 | struct buffer_head *bh, |
| 42 | struct inode *inode); | 40 | struct inode *inode); |
| 43 | int ocfs2_read_blocks(struct ocfs2_super *osb, | 41 | int ocfs2_read_blocks(struct inode *inode, |
| 44 | u64 block, | 42 | u64 block, |
| 45 | int nr, | 43 | int nr, |
| 46 | struct buffer_head *bhs[], | 44 | struct buffer_head *bhs[], |
| 47 | int flags, | 45 | int flags); |
| 48 | struct inode *inode); | 46 | int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, |
| 47 | unsigned int nr, struct buffer_head *bhs[]); | ||
| 49 | 48 | ||
| 50 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, | 49 | int ocfs2_write_super_or_backup(struct ocfs2_super *osb, |
| 51 | struct buffer_head *bh); | 50 | struct buffer_head *bh); |
| 52 | 51 | ||
| 53 | #define OCFS2_BH_CACHED 1 | 52 | #define OCFS2_BH_IGNORE_CACHE 1 |
| 54 | #define OCFS2_BH_READAHEAD 8 | 53 | #define OCFS2_BH_READAHEAD 8 |
| 55 | 54 | ||
| 56 | static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | 55 | static inline int ocfs2_read_block(struct inode *inode, u64 off, |
| 57 | struct buffer_head **bh, int flags, | 56 | struct buffer_head **bh) |
| 58 | struct inode *inode) | ||
| 59 | { | 57 | { |
| 60 | int status = 0; | 58 | int status = 0; |
| 61 | 59 | ||
| @@ -65,8 +63,7 @@ static inline int ocfs2_read_block(struct ocfs2_super * osb, u64 off, | |||
| 65 | goto bail; | 63 | goto bail; |
| 66 | } | 64 | } |
| 67 | 65 | ||
| 68 | status = ocfs2_read_blocks(osb, off, 1, bh, | 66 | status = ocfs2_read_blocks(inode, off, 1, bh, 0); |
| 69 | flags, inode); | ||
| 70 | 67 | ||
| 71 | bail: | 68 | bail: |
| 72 | return status; | 69 | return status; |
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c index 23c732f27529..d8a0cb92cef6 100644 --- a/fs/ocfs2/cluster/masklog.c +++ b/fs/ocfs2/cluster/masklog.c | |||
| @@ -109,6 +109,7 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = { | |||
| 109 | define_mask(CONN), | 109 | define_mask(CONN), |
| 110 | define_mask(QUORUM), | 110 | define_mask(QUORUM), |
| 111 | define_mask(EXPORT), | 111 | define_mask(EXPORT), |
| 112 | define_mask(XATTR), | ||
| 112 | define_mask(ERROR), | 113 | define_mask(ERROR), |
| 113 | define_mask(NOTICE), | 114 | define_mask(NOTICE), |
| 114 | define_mask(KTHREAD), | 115 | define_mask(KTHREAD), |
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 597e064bb94f..57670c680471 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h | |||
| @@ -112,6 +112,7 @@ | |||
| 112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ | 112 | #define ML_CONN 0x0000000004000000ULL /* net connection management */ |
| 113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ | 113 | #define ML_QUORUM 0x0000000008000000ULL /* net connection quorum */ |
| 114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ | 114 | #define ML_EXPORT 0x0000000010000000ULL /* ocfs2 export operations */ |
| 115 | #define ML_XATTR 0x0000000020000000ULL /* ocfs2 extended attributes */ | ||
| 115 | /* bits that are infrequently given and frequently matched in the high word */ | 116 | /* bits that are infrequently given and frequently matched in the high word */ |
| 116 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ | 117 | #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ |
| 117 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ | 118 | #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ |
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 9cce563fd627..026e6eb85187 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c | |||
| @@ -82,6 +82,49 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
| 82 | struct ocfs2_alloc_context *meta_ac, | 82 | struct ocfs2_alloc_context *meta_ac, |
| 83 | struct buffer_head **new_bh); | 83 | struct buffer_head **new_bh); |
| 84 | 84 | ||
| 85 | static struct buffer_head *ocfs2_bread(struct inode *inode, | ||
| 86 | int block, int *err, int reada) | ||
| 87 | { | ||
| 88 | struct buffer_head *bh = NULL; | ||
| 89 | int tmperr; | ||
| 90 | u64 p_blkno; | ||
| 91 | int readflags = 0; | ||
| 92 | |||
| 93 | if (reada) | ||
| 94 | readflags |= OCFS2_BH_READAHEAD; | ||
| 95 | |||
| 96 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
| 97 | i_size_read(inode)) { | ||
| 98 | BUG_ON(!reada); | ||
| 99 | return NULL; | ||
| 100 | } | ||
| 101 | |||
| 102 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 103 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
| 104 | NULL); | ||
| 105 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 106 | if (tmperr < 0) { | ||
| 107 | mlog_errno(tmperr); | ||
| 108 | goto fail; | ||
| 109 | } | ||
| 110 | |||
| 111 | tmperr = ocfs2_read_blocks(inode, p_blkno, 1, &bh, readflags); | ||
| 112 | if (tmperr < 0) | ||
| 113 | goto fail; | ||
| 114 | |||
| 115 | tmperr = 0; | ||
| 116 | |||
| 117 | *err = 0; | ||
| 118 | return bh; | ||
| 119 | |||
| 120 | fail: | ||
| 121 | brelse(bh); | ||
| 122 | bh = NULL; | ||
| 123 | |||
| 124 | *err = -EIO; | ||
| 125 | return NULL; | ||
| 126 | } | ||
| 127 | |||
| 85 | /* | 128 | /* |
| 86 | * bh passed here can be an inode block or a dir data block, depending | 129 | * bh passed here can be an inode block or a dir data block, depending |
| 87 | * on the inode inline data flag. | 130 | * on the inode inline data flag. |
| @@ -188,8 +231,7 @@ static struct buffer_head *ocfs2_find_entry_id(const char *name, | |||
| 188 | struct ocfs2_dinode *di; | 231 | struct ocfs2_dinode *di; |
| 189 | struct ocfs2_inline_data *data; | 232 | struct ocfs2_inline_data *data; |
| 190 | 233 | ||
| 191 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 234 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
| 192 | &di_bh, OCFS2_BH_CACHED, dir); | ||
| 193 | if (ret) { | 235 | if (ret) { |
| 194 | mlog_errno(ret); | 236 | mlog_errno(ret); |
| 195 | goto out; | 237 | goto out; |
| @@ -260,14 +302,13 @@ restart: | |||
| 260 | } | 302 | } |
| 261 | if ((bh = bh_use[ra_ptr++]) == NULL) | 303 | if ((bh = bh_use[ra_ptr++]) == NULL) |
| 262 | goto next; | 304 | goto next; |
| 263 | wait_on_buffer(bh); | 305 | if (ocfs2_read_block(dir, block, &bh)) { |
| 264 | if (!buffer_uptodate(bh)) { | 306 | /* read error, skip block & hope for the best. |
| 265 | /* read error, skip block & hope for the best */ | 307 | * ocfs2_read_block() has released the bh. */ |
| 266 | ocfs2_error(dir->i_sb, "reading directory %llu, " | 308 | ocfs2_error(dir->i_sb, "reading directory %llu, " |
| 267 | "offset %lu\n", | 309 | "offset %lu\n", |
| 268 | (unsigned long long)OCFS2_I(dir)->ip_blkno, | 310 | (unsigned long long)OCFS2_I(dir)->ip_blkno, |
| 269 | block); | 311 | block); |
| 270 | brelse(bh); | ||
| 271 | goto next; | 312 | goto next; |
| 272 | } | 313 | } |
| 273 | i = ocfs2_search_dirblock(bh, dir, name, namelen, | 314 | i = ocfs2_search_dirblock(bh, dir, name, namelen, |
| @@ -417,8 +458,7 @@ static inline int ocfs2_delete_entry_id(handle_t *handle, | |||
| 417 | struct ocfs2_dinode *di; | 458 | struct ocfs2_dinode *di; |
| 418 | struct ocfs2_inline_data *data; | 459 | struct ocfs2_inline_data *data; |
| 419 | 460 | ||
| 420 | ret = ocfs2_read_block(OCFS2_SB(dir->i_sb), OCFS2_I(dir)->ip_blkno, | 461 | ret = ocfs2_read_block(dir, OCFS2_I(dir)->ip_blkno, &di_bh); |
| 421 | &di_bh, OCFS2_BH_CACHED, dir); | ||
| 422 | if (ret) { | 462 | if (ret) { |
| 423 | mlog_errno(ret); | 463 | mlog_errno(ret); |
| 424 | goto out; | 464 | goto out; |
| @@ -596,8 +636,7 @@ static int ocfs2_dir_foreach_blk_id(struct inode *inode, | |||
| 596 | struct ocfs2_inline_data *data; | 636 | struct ocfs2_inline_data *data; |
| 597 | struct ocfs2_dir_entry *de; | 637 | struct ocfs2_dir_entry *de; |
| 598 | 638 | ||
| 599 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 639 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); |
| 600 | &di_bh, OCFS2_BH_CACHED, inode); | ||
| 601 | if (ret) { | 640 | if (ret) { |
| 602 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", | 641 | mlog(ML_ERROR, "Unable to read inode block for dir %llu\n", |
| 603 | (unsigned long long)OCFS2_I(inode)->ip_blkno); | 642 | (unsigned long long)OCFS2_I(inode)->ip_blkno); |
| @@ -716,8 +755,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode, | |||
| 716 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); | 755 | for (i = ra_sectors >> (sb->s_blocksize_bits - 9); |
| 717 | i > 0; i--) { | 756 | i > 0; i--) { |
| 718 | tmp = ocfs2_bread(inode, ++blk, &err, 1); | 757 | tmp = ocfs2_bread(inode, ++blk, &err, 1); |
| 719 | if (tmp) | 758 | brelse(tmp); |
| 720 | brelse(tmp); | ||
| 721 | } | 759 | } |
| 722 | last_ra_blk = blk; | 760 | last_ra_blk = blk; |
| 723 | ra_sectors = 8; | 761 | ra_sectors = 8; |
| @@ -899,10 +937,8 @@ int ocfs2_find_files_on_disk(const char *name, | |||
| 899 | leave: | 937 | leave: |
| 900 | if (status < 0) { | 938 | if (status < 0) { |
| 901 | *dirent = NULL; | 939 | *dirent = NULL; |
| 902 | if (*dirent_bh) { | 940 | brelse(*dirent_bh); |
| 903 | brelse(*dirent_bh); | 941 | *dirent_bh = NULL; |
| 904 | *dirent_bh = NULL; | ||
| 905 | } | ||
| 906 | } | 942 | } |
| 907 | 943 | ||
| 908 | mlog_exit(status); | 944 | mlog_exit(status); |
| @@ -951,8 +987,7 @@ int ocfs2_check_dir_for_entry(struct inode *dir, | |||
| 951 | 987 | ||
| 952 | ret = 0; | 988 | ret = 0; |
| 953 | bail: | 989 | bail: |
| 954 | if (dirent_bh) | 990 | brelse(dirent_bh); |
| 955 | brelse(dirent_bh); | ||
| 956 | 991 | ||
| 957 | mlog_exit(ret); | 992 | mlog_exit(ret); |
| 958 | return ret; | 993 | return ret; |
| @@ -1127,8 +1162,7 @@ static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb, | |||
| 1127 | 1162 | ||
| 1128 | status = 0; | 1163 | status = 0; |
| 1129 | bail: | 1164 | bail: |
| 1130 | if (new_bh) | 1165 | brelse(new_bh); |
| 1131 | brelse(new_bh); | ||
| 1132 | 1166 | ||
| 1133 | mlog_exit(status); | 1167 | mlog_exit(status); |
| 1134 | return status; | 1168 | return status; |
| @@ -1192,6 +1226,9 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1192 | struct buffer_head *dirdata_bh = NULL; | 1226 | struct buffer_head *dirdata_bh = NULL; |
| 1193 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1227 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
| 1194 | handle_t *handle; | 1228 | handle_t *handle; |
| 1229 | struct ocfs2_extent_tree et; | ||
| 1230 | |||
| 1231 | ocfs2_init_dinode_extent_tree(&et, dir, di_bh); | ||
| 1195 | 1232 | ||
| 1196 | alloc = ocfs2_clusters_for_bytes(sb, bytes); | 1233 | alloc = ocfs2_clusters_for_bytes(sb, bytes); |
| 1197 | 1234 | ||
| @@ -1305,8 +1342,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1305 | * This should never fail as our extent list is empty and all | 1342 | * This should never fail as our extent list is empty and all |
| 1306 | * related blocks have been journaled already. | 1343 | * related blocks have been journaled already. |
| 1307 | */ | 1344 | */ |
| 1308 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 0, blkno, len, 0, | 1345 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 0, blkno, len, |
| 1309 | NULL); | 1346 | 0, NULL); |
| 1310 | if (ret) { | 1347 | if (ret) { |
| 1311 | mlog_errno(ret); | 1348 | mlog_errno(ret); |
| 1312 | goto out_commit; | 1349 | goto out_commit; |
| @@ -1337,8 +1374,8 @@ static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh, | |||
| 1337 | } | 1374 | } |
| 1338 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); | 1375 | blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off); |
| 1339 | 1376 | ||
| 1340 | ret = ocfs2_insert_extent(osb, handle, dir, di_bh, 1, blkno, | 1377 | ret = ocfs2_insert_extent(osb, handle, dir, &et, 1, |
| 1341 | len, 0, NULL); | 1378 | blkno, len, 0, NULL); |
| 1342 | if (ret) { | 1379 | if (ret) { |
| 1343 | mlog_errno(ret); | 1380 | mlog_errno(ret); |
| 1344 | goto out_commit; | 1381 | goto out_commit; |
| @@ -1383,9 +1420,9 @@ static int ocfs2_do_extend_dir(struct super_block *sb, | |||
| 1383 | if (extend) { | 1420 | if (extend) { |
| 1384 | u32 offset = OCFS2_I(dir)->ip_clusters; | 1421 | u32 offset = OCFS2_I(dir)->ip_clusters; |
| 1385 | 1422 | ||
| 1386 | status = ocfs2_do_extend_allocation(OCFS2_SB(sb), dir, &offset, | 1423 | status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset, |
| 1387 | 1, 0, parent_fe_bh, handle, | 1424 | 1, 0, parent_fe_bh, handle, |
| 1388 | data_ac, meta_ac, NULL); | 1425 | data_ac, meta_ac, NULL); |
| 1389 | BUG_ON(status == -EAGAIN); | 1426 | BUG_ON(status == -EAGAIN); |
| 1390 | if (status < 0) { | 1427 | if (status < 0) { |
| 1391 | mlog_errno(status); | 1428 | mlog_errno(status); |
| @@ -1430,12 +1467,14 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
| 1430 | int credits, num_free_extents, drop_alloc_sem = 0; | 1467 | int credits, num_free_extents, drop_alloc_sem = 0; |
| 1431 | loff_t dir_i_size; | 1468 | loff_t dir_i_size; |
| 1432 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; | 1469 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data; |
| 1470 | struct ocfs2_extent_list *el = &fe->id2.i_list; | ||
| 1433 | struct ocfs2_alloc_context *data_ac = NULL; | 1471 | struct ocfs2_alloc_context *data_ac = NULL; |
| 1434 | struct ocfs2_alloc_context *meta_ac = NULL; | 1472 | struct ocfs2_alloc_context *meta_ac = NULL; |
| 1435 | handle_t *handle = NULL; | 1473 | handle_t *handle = NULL; |
| 1436 | struct buffer_head *new_bh = NULL; | 1474 | struct buffer_head *new_bh = NULL; |
| 1437 | struct ocfs2_dir_entry * de; | 1475 | struct ocfs2_dir_entry * de; |
| 1438 | struct super_block *sb = osb->sb; | 1476 | struct super_block *sb = osb->sb; |
| 1477 | struct ocfs2_extent_tree et; | ||
| 1439 | 1478 | ||
| 1440 | mlog_entry_void(); | 1479 | mlog_entry_void(); |
| 1441 | 1480 | ||
| @@ -1479,7 +1518,8 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
| 1479 | spin_lock(&OCFS2_I(dir)->ip_lock); | 1518 | spin_lock(&OCFS2_I(dir)->ip_lock); |
| 1480 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { | 1519 | if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) { |
| 1481 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1520 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
| 1482 | num_free_extents = ocfs2_num_free_extents(osb, dir, fe); | 1521 | ocfs2_init_dinode_extent_tree(&et, dir, parent_fe_bh); |
| 1522 | num_free_extents = ocfs2_num_free_extents(osb, dir, &et); | ||
| 1483 | if (num_free_extents < 0) { | 1523 | if (num_free_extents < 0) { |
| 1484 | status = num_free_extents; | 1524 | status = num_free_extents; |
| 1485 | mlog_errno(status); | 1525 | mlog_errno(status); |
| @@ -1487,7 +1527,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
| 1487 | } | 1527 | } |
| 1488 | 1528 | ||
| 1489 | if (!num_free_extents) { | 1529 | if (!num_free_extents) { |
| 1490 | status = ocfs2_reserve_new_metadata(osb, fe, &meta_ac); | 1530 | status = ocfs2_reserve_new_metadata(osb, el, &meta_ac); |
| 1491 | if (status < 0) { | 1531 | if (status < 0) { |
| 1492 | if (status != -ENOSPC) | 1532 | if (status != -ENOSPC) |
| 1493 | mlog_errno(status); | 1533 | mlog_errno(status); |
| @@ -1502,7 +1542,7 @@ static int ocfs2_extend_dir(struct ocfs2_super *osb, | |||
| 1502 | goto bail; | 1542 | goto bail; |
| 1503 | } | 1543 | } |
| 1504 | 1544 | ||
| 1505 | credits = ocfs2_calc_extend_credits(sb, fe, 1); | 1545 | credits = ocfs2_calc_extend_credits(sb, el, 1); |
| 1506 | } else { | 1546 | } else { |
| 1507 | spin_unlock(&OCFS2_I(dir)->ip_lock); | 1547 | spin_unlock(&OCFS2_I(dir)->ip_lock); |
| 1508 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; | 1548 | credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS; |
| @@ -1568,8 +1608,7 @@ bail: | |||
| 1568 | if (meta_ac) | 1608 | if (meta_ac) |
| 1569 | ocfs2_free_alloc_context(meta_ac); | 1609 | ocfs2_free_alloc_context(meta_ac); |
| 1570 | 1610 | ||
| 1571 | if (new_bh) | 1611 | brelse(new_bh); |
| 1572 | brelse(new_bh); | ||
| 1573 | 1612 | ||
| 1574 | mlog_exit(status); | 1613 | mlog_exit(status); |
| 1575 | return status; | 1614 | return status; |
| @@ -1696,8 +1735,7 @@ static int ocfs2_find_dir_space_el(struct inode *dir, const char *name, | |||
| 1696 | 1735 | ||
| 1697 | status = 0; | 1736 | status = 0; |
| 1698 | bail: | 1737 | bail: |
| 1699 | if (bh) | 1738 | brelse(bh); |
| 1700 | brelse(bh); | ||
| 1701 | 1739 | ||
| 1702 | mlog_exit(status); | 1740 | mlog_exit(status); |
| 1703 | return status; | 1741 | return status; |
| @@ -1756,7 +1794,6 @@ int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb, | |||
| 1756 | *ret_de_bh = bh; | 1794 | *ret_de_bh = bh; |
| 1757 | bh = NULL; | 1795 | bh = NULL; |
| 1758 | out: | 1796 | out: |
| 1759 | if (bh) | 1797 | brelse(bh); |
| 1760 | brelse(bh); | ||
| 1761 | return ret; | 1798 | return ret; |
| 1762 | } | 1799 | } |
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index eae3d643a5e4..ec684426034b 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c | |||
| @@ -2024,8 +2024,7 @@ static int ocfs2_inode_lock_update(struct inode *inode, | |||
| 2024 | } else { | 2024 | } else { |
| 2025 | /* Boo, we have to go to disk. */ | 2025 | /* Boo, we have to go to disk. */ |
| 2026 | /* read bh, cast, ocfs2_refresh_inode */ | 2026 | /* read bh, cast, ocfs2_refresh_inode */ |
| 2027 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), oi->ip_blkno, | 2027 | status = ocfs2_read_block(inode, oi->ip_blkno, bh); |
| 2028 | bh, OCFS2_BH_CACHED, inode); | ||
| 2029 | if (status < 0) { | 2028 | if (status < 0) { |
| 2030 | mlog_errno(status); | 2029 | mlog_errno(status); |
| 2031 | goto bail_refresh; | 2030 | goto bail_refresh; |
| @@ -2086,11 +2085,7 @@ static int ocfs2_assign_bh(struct inode *inode, | |||
| 2086 | return 0; | 2085 | return 0; |
| 2087 | } | 2086 | } |
| 2088 | 2087 | ||
| 2089 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 2088 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, ret_bh); |
| 2090 | OCFS2_I(inode)->ip_blkno, | ||
| 2091 | ret_bh, | ||
| 2092 | OCFS2_BH_CACHED, | ||
| 2093 | inode); | ||
| 2094 | if (status < 0) | 2089 | if (status < 0) |
| 2095 | mlog_errno(status); | 2090 | mlog_errno(status); |
| 2096 | 2091 | ||
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index c58668a326fe..2baedac58234 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/fs.h> | 25 | #include <linux/fs.h> |
| 26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
| 27 | #include <linux/types.h> | 27 | #include <linux/types.h> |
| 28 | #include <linux/fiemap.h> | ||
| 28 | 29 | ||
| 29 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP | 30 | #define MLOG_MASK_PREFIX ML_EXTENT_MAP |
| 30 | #include <cluster/masklog.h> | 31 | #include <cluster/masklog.h> |
| @@ -32,6 +33,7 @@ | |||
| 32 | #include "ocfs2.h" | 33 | #include "ocfs2.h" |
| 33 | 34 | ||
| 34 | #include "alloc.h" | 35 | #include "alloc.h" |
| 36 | #include "dlmglue.h" | ||
| 35 | #include "extent_map.h" | 37 | #include "extent_map.h" |
| 36 | #include "inode.h" | 38 | #include "inode.h" |
| 37 | #include "super.h" | 39 | #include "super.h" |
| @@ -282,6 +284,50 @@ out: | |||
| 282 | kfree(new_emi); | 284 | kfree(new_emi); |
| 283 | } | 285 | } |
| 284 | 286 | ||
| 287 | static int ocfs2_last_eb_is_empty(struct inode *inode, | ||
| 288 | struct ocfs2_dinode *di) | ||
| 289 | { | ||
| 290 | int ret, next_free; | ||
| 291 | u64 last_eb_blk = le64_to_cpu(di->i_last_eb_blk); | ||
| 292 | struct buffer_head *eb_bh = NULL; | ||
| 293 | struct ocfs2_extent_block *eb; | ||
| 294 | struct ocfs2_extent_list *el; | ||
| 295 | |||
| 296 | ret = ocfs2_read_block(inode, last_eb_blk, &eb_bh); | ||
| 297 | if (ret) { | ||
| 298 | mlog_errno(ret); | ||
| 299 | goto out; | ||
| 300 | } | ||
| 301 | |||
| 302 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
| 303 | el = &eb->h_list; | ||
| 304 | |||
| 305 | if (!OCFS2_IS_VALID_EXTENT_BLOCK(eb)) { | ||
| 306 | ret = -EROFS; | ||
| 307 | OCFS2_RO_ON_INVALID_EXTENT_BLOCK(inode->i_sb, eb); | ||
| 308 | goto out; | ||
| 309 | } | ||
| 310 | |||
| 311 | if (el->l_tree_depth) { | ||
| 312 | ocfs2_error(inode->i_sb, | ||
| 313 | "Inode %lu has non zero tree depth in " | ||
| 314 | "leaf block %llu\n", inode->i_ino, | ||
| 315 | (unsigned long long)eb_bh->b_blocknr); | ||
| 316 | ret = -EROFS; | ||
| 317 | goto out; | ||
| 318 | } | ||
| 319 | |||
| 320 | next_free = le16_to_cpu(el->l_next_free_rec); | ||
| 321 | |||
| 322 | if (next_free == 0 || | ||
| 323 | (next_free == 1 && ocfs2_is_empty_extent(&el->l_recs[0]))) | ||
| 324 | ret = 1; | ||
| 325 | |||
| 326 | out: | ||
| 327 | brelse(eb_bh); | ||
| 328 | return ret; | ||
| 329 | } | ||
| 330 | |||
| 285 | /* | 331 | /* |
| 286 | * Return the 1st index within el which contains an extent start | 332 | * Return the 1st index within el which contains an extent start |
| 287 | * larger than v_cluster. | 333 | * larger than v_cluster. |
| @@ -335,9 +381,9 @@ static int ocfs2_figure_hole_clusters(struct inode *inode, | |||
| 335 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) | 381 | if (le64_to_cpu(eb->h_next_leaf_blk) == 0ULL) |
| 336 | goto no_more_extents; | 382 | goto no_more_extents; |
| 337 | 383 | ||
| 338 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 384 | ret = ocfs2_read_block(inode, |
| 339 | le64_to_cpu(eb->h_next_leaf_blk), | 385 | le64_to_cpu(eb->h_next_leaf_blk), |
| 340 | &next_eb_bh, OCFS2_BH_CACHED, inode); | 386 | &next_eb_bh); |
| 341 | if (ret) { | 387 | if (ret) { |
| 342 | mlog_errno(ret); | 388 | mlog_errno(ret); |
| 343 | goto out; | 389 | goto out; |
| @@ -373,42 +419,28 @@ out: | |||
| 373 | return ret; | 419 | return ret; |
| 374 | } | 420 | } |
| 375 | 421 | ||
| 376 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | 422 | static int ocfs2_get_clusters_nocache(struct inode *inode, |
| 377 | u32 *p_cluster, u32 *num_clusters, | 423 | struct buffer_head *di_bh, |
| 378 | unsigned int *extent_flags) | 424 | u32 v_cluster, unsigned int *hole_len, |
| 425 | struct ocfs2_extent_rec *ret_rec, | ||
| 426 | unsigned int *is_last) | ||
| 379 | { | 427 | { |
| 380 | int ret, i; | 428 | int i, ret, tree_height, len; |
| 381 | unsigned int flags = 0; | ||
| 382 | struct buffer_head *di_bh = NULL; | ||
| 383 | struct buffer_head *eb_bh = NULL; | ||
| 384 | struct ocfs2_dinode *di; | 429 | struct ocfs2_dinode *di; |
| 385 | struct ocfs2_extent_block *eb; | 430 | struct ocfs2_extent_block *uninitialized_var(eb); |
| 386 | struct ocfs2_extent_list *el; | 431 | struct ocfs2_extent_list *el; |
| 387 | struct ocfs2_extent_rec *rec; | 432 | struct ocfs2_extent_rec *rec; |
| 388 | u32 coff; | 433 | struct buffer_head *eb_bh = NULL; |
| 389 | |||
| 390 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 391 | ret = -ERANGE; | ||
| 392 | mlog_errno(ret); | ||
| 393 | goto out; | ||
| 394 | } | ||
| 395 | |||
| 396 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
| 397 | num_clusters, extent_flags); | ||
| 398 | if (ret == 0) | ||
| 399 | goto out; | ||
| 400 | 434 | ||
| 401 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), OCFS2_I(inode)->ip_blkno, | 435 | memset(ret_rec, 0, sizeof(*ret_rec)); |
| 402 | &di_bh, OCFS2_BH_CACHED, inode); | 436 | if (is_last) |
| 403 | if (ret) { | 437 | *is_last = 0; |
| 404 | mlog_errno(ret); | ||
| 405 | goto out; | ||
| 406 | } | ||
| 407 | 438 | ||
| 408 | di = (struct ocfs2_dinode *) di_bh->b_data; | 439 | di = (struct ocfs2_dinode *) di_bh->b_data; |
| 409 | el = &di->id2.i_list; | 440 | el = &di->id2.i_list; |
| 441 | tree_height = le16_to_cpu(el->l_tree_depth); | ||
| 410 | 442 | ||
| 411 | if (el->l_tree_depth) { | 443 | if (tree_height > 0) { |
| 412 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | 444 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); |
| 413 | if (ret) { | 445 | if (ret) { |
| 414 | mlog_errno(ret); | 446 | mlog_errno(ret); |
| @@ -431,46 +463,202 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
| 431 | i = ocfs2_search_extent_list(el, v_cluster); | 463 | i = ocfs2_search_extent_list(el, v_cluster); |
| 432 | if (i == -1) { | 464 | if (i == -1) { |
| 433 | /* | 465 | /* |
| 434 | * A hole was found. Return some canned values that | 466 | * Holes can be larger than the maximum size of an |
| 435 | * callers can key on. If asked for, num_clusters will | 467 | * extent, so we return their lengths in a seperate |
| 436 | * be populated with the size of the hole. | 468 | * field. |
| 437 | */ | 469 | */ |
| 438 | *p_cluster = 0; | 470 | if (hole_len) { |
| 439 | if (num_clusters) { | ||
| 440 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, | 471 | ret = ocfs2_figure_hole_clusters(inode, el, eb_bh, |
| 441 | v_cluster, | 472 | v_cluster, &len); |
| 442 | num_clusters); | ||
| 443 | if (ret) { | 473 | if (ret) { |
| 444 | mlog_errno(ret); | 474 | mlog_errno(ret); |
| 445 | goto out; | 475 | goto out; |
| 446 | } | 476 | } |
| 477 | |||
| 478 | *hole_len = len; | ||
| 479 | } | ||
| 480 | goto out_hole; | ||
| 481 | } | ||
| 482 | |||
| 483 | rec = &el->l_recs[i]; | ||
| 484 | |||
| 485 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | ||
| 486 | |||
| 487 | if (!rec->e_blkno) { | ||
| 488 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
| 489 | "record (%u, %u, 0)", inode->i_ino, | ||
| 490 | le32_to_cpu(rec->e_cpos), | ||
| 491 | ocfs2_rec_clusters(el, rec)); | ||
| 492 | ret = -EROFS; | ||
| 493 | goto out; | ||
| 494 | } | ||
| 495 | |||
| 496 | *ret_rec = *rec; | ||
| 497 | |||
| 498 | /* | ||
| 499 | * Checking for last extent is potentially expensive - we | ||
| 500 | * might have to look at the next leaf over to see if it's | ||
| 501 | * empty. | ||
| 502 | * | ||
| 503 | * The first two checks are to see whether the caller even | ||
| 504 | * cares for this information, and if the extent is at least | ||
| 505 | * the last in it's list. | ||
| 506 | * | ||
| 507 | * If those hold true, then the extent is last if any of the | ||
| 508 | * additional conditions hold true: | ||
| 509 | * - Extent list is in-inode | ||
| 510 | * - Extent list is right-most | ||
| 511 | * - Extent list is 2nd to rightmost, with empty right-most | ||
| 512 | */ | ||
| 513 | if (is_last) { | ||
| 514 | if (i == (le16_to_cpu(el->l_next_free_rec) - 1)) { | ||
| 515 | if (tree_height == 0) | ||
| 516 | *is_last = 1; | ||
| 517 | else if (eb->h_blkno == di->i_last_eb_blk) | ||
| 518 | *is_last = 1; | ||
| 519 | else if (eb->h_next_leaf_blk == di->i_last_eb_blk) { | ||
| 520 | ret = ocfs2_last_eb_is_empty(inode, di); | ||
| 521 | if (ret < 0) { | ||
| 522 | mlog_errno(ret); | ||
| 523 | goto out; | ||
| 524 | } | ||
| 525 | if (ret == 1) | ||
| 526 | *is_last = 1; | ||
| 527 | } | ||
| 528 | } | ||
| 529 | } | ||
| 530 | |||
| 531 | out_hole: | ||
| 532 | ret = 0; | ||
| 533 | out: | ||
| 534 | brelse(eb_bh); | ||
| 535 | return ret; | ||
| 536 | } | ||
| 537 | |||
| 538 | static void ocfs2_relative_extent_offsets(struct super_block *sb, | ||
| 539 | u32 v_cluster, | ||
| 540 | struct ocfs2_extent_rec *rec, | ||
| 541 | u32 *p_cluster, u32 *num_clusters) | ||
| 542 | |||
| 543 | { | ||
| 544 | u32 coff = v_cluster - le32_to_cpu(rec->e_cpos); | ||
| 545 | |||
| 546 | *p_cluster = ocfs2_blocks_to_clusters(sb, le64_to_cpu(rec->e_blkno)); | ||
| 547 | *p_cluster = *p_cluster + coff; | ||
| 548 | |||
| 549 | if (num_clusters) | ||
| 550 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters) - coff; | ||
| 551 | } | ||
| 552 | |||
| 553 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
| 554 | u32 *p_cluster, u32 *num_clusters, | ||
| 555 | struct ocfs2_extent_list *el) | ||
| 556 | { | ||
| 557 | int ret = 0, i; | ||
| 558 | struct buffer_head *eb_bh = NULL; | ||
| 559 | struct ocfs2_extent_block *eb; | ||
| 560 | struct ocfs2_extent_rec *rec; | ||
| 561 | u32 coff; | ||
| 562 | |||
| 563 | if (el->l_tree_depth) { | ||
| 564 | ret = ocfs2_find_leaf(inode, el, v_cluster, &eb_bh); | ||
| 565 | if (ret) { | ||
| 566 | mlog_errno(ret); | ||
| 567 | goto out; | ||
| 447 | } | 568 | } |
| 569 | |||
| 570 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
| 571 | el = &eb->h_list; | ||
| 572 | |||
| 573 | if (el->l_tree_depth) { | ||
| 574 | ocfs2_error(inode->i_sb, | ||
| 575 | "Inode %lu has non zero tree depth in " | ||
| 576 | "xattr leaf block %llu\n", inode->i_ino, | ||
| 577 | (unsigned long long)eb_bh->b_blocknr); | ||
| 578 | ret = -EROFS; | ||
| 579 | goto out; | ||
| 580 | } | ||
| 581 | } | ||
| 582 | |||
| 583 | i = ocfs2_search_extent_list(el, v_cluster); | ||
| 584 | if (i == -1) { | ||
| 585 | ret = -EROFS; | ||
| 586 | mlog_errno(ret); | ||
| 587 | goto out; | ||
| 448 | } else { | 588 | } else { |
| 449 | rec = &el->l_recs[i]; | 589 | rec = &el->l_recs[i]; |
| 450 | |||
| 451 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); | 590 | BUG_ON(v_cluster < le32_to_cpu(rec->e_cpos)); |
| 452 | 591 | ||
| 453 | if (!rec->e_blkno) { | 592 | if (!rec->e_blkno) { |
| 454 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | 593 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " |
| 455 | "record (%u, %u, 0)", inode->i_ino, | 594 | "record (%u, %u, 0) in xattr", inode->i_ino, |
| 456 | le32_to_cpu(rec->e_cpos), | 595 | le32_to_cpu(rec->e_cpos), |
| 457 | ocfs2_rec_clusters(el, rec)); | 596 | ocfs2_rec_clusters(el, rec)); |
| 458 | ret = -EROFS; | 597 | ret = -EROFS; |
| 459 | goto out; | 598 | goto out; |
| 460 | } | 599 | } |
| 461 | |||
| 462 | coff = v_cluster - le32_to_cpu(rec->e_cpos); | 600 | coff = v_cluster - le32_to_cpu(rec->e_cpos); |
| 463 | |||
| 464 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, | 601 | *p_cluster = ocfs2_blocks_to_clusters(inode->i_sb, |
| 465 | le64_to_cpu(rec->e_blkno)); | 602 | le64_to_cpu(rec->e_blkno)); |
| 466 | *p_cluster = *p_cluster + coff; | 603 | *p_cluster = *p_cluster + coff; |
| 467 | |||
| 468 | if (num_clusters) | 604 | if (num_clusters) |
| 469 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; | 605 | *num_clusters = ocfs2_rec_clusters(el, rec) - coff; |
| 606 | } | ||
| 607 | out: | ||
| 608 | if (eb_bh) | ||
| 609 | brelse(eb_bh); | ||
| 610 | return ret; | ||
| 611 | } | ||
| 612 | |||
| 613 | int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | ||
| 614 | u32 *p_cluster, u32 *num_clusters, | ||
| 615 | unsigned int *extent_flags) | ||
| 616 | { | ||
| 617 | int ret; | ||
| 618 | unsigned int uninitialized_var(hole_len), flags = 0; | ||
| 619 | struct buffer_head *di_bh = NULL; | ||
| 620 | struct ocfs2_extent_rec rec; | ||
| 621 | |||
| 622 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 623 | ret = -ERANGE; | ||
| 624 | mlog_errno(ret); | ||
| 625 | goto out; | ||
| 626 | } | ||
| 627 | |||
| 628 | ret = ocfs2_extent_map_lookup(inode, v_cluster, p_cluster, | ||
| 629 | num_clusters, extent_flags); | ||
| 630 | if (ret == 0) | ||
| 631 | goto out; | ||
| 632 | |||
| 633 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &di_bh); | ||
| 634 | if (ret) { | ||
| 635 | mlog_errno(ret); | ||
| 636 | goto out; | ||
| 637 | } | ||
| 470 | 638 | ||
| 471 | flags = rec->e_flags; | 639 | ret = ocfs2_get_clusters_nocache(inode, di_bh, v_cluster, &hole_len, |
| 640 | &rec, NULL); | ||
| 641 | if (ret) { | ||
| 642 | mlog_errno(ret); | ||
| 643 | goto out; | ||
| 644 | } | ||
| 645 | |||
| 646 | if (rec.e_blkno == 0ULL) { | ||
| 647 | /* | ||
| 648 | * A hole was found. Return some canned values that | ||
| 649 | * callers can key on. If asked for, num_clusters will | ||
| 650 | * be populated with the size of the hole. | ||
| 651 | */ | ||
| 652 | *p_cluster = 0; | ||
| 653 | if (num_clusters) { | ||
| 654 | *num_clusters = hole_len; | ||
| 655 | } | ||
| 656 | } else { | ||
| 657 | ocfs2_relative_extent_offsets(inode->i_sb, v_cluster, &rec, | ||
| 658 | p_cluster, num_clusters); | ||
| 659 | flags = rec.e_flags; | ||
| 472 | 660 | ||
| 473 | ocfs2_extent_map_insert_rec(inode, rec); | 661 | ocfs2_extent_map_insert_rec(inode, &rec); |
| 474 | } | 662 | } |
| 475 | 663 | ||
| 476 | if (extent_flags) | 664 | if (extent_flags) |
| @@ -478,7 +666,6 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, | |||
| 478 | 666 | ||
| 479 | out: | 667 | out: |
| 480 | brelse(di_bh); | 668 | brelse(di_bh); |
| 481 | brelse(eb_bh); | ||
| 482 | return ret; | 669 | return ret; |
| 483 | } | 670 | } |
| 484 | 671 | ||
| @@ -521,3 +708,114 @@ int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | |||
| 521 | out: | 708 | out: |
| 522 | return ret; | 709 | return ret; |
| 523 | } | 710 | } |
| 711 | |||
| 712 | static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh, | ||
| 713 | struct fiemap_extent_info *fieinfo, | ||
| 714 | u64 map_start) | ||
| 715 | { | ||
| 716 | int ret; | ||
| 717 | unsigned int id_count; | ||
| 718 | struct ocfs2_dinode *di; | ||
| 719 | u64 phys; | ||
| 720 | u32 flags = FIEMAP_EXTENT_DATA_INLINE|FIEMAP_EXTENT_LAST; | ||
| 721 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 722 | |||
| 723 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 724 | id_count = le16_to_cpu(di->id2.i_data.id_count); | ||
| 725 | |||
| 726 | if (map_start < id_count) { | ||
| 727 | phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits; | ||
| 728 | phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
| 729 | |||
| 730 | ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count, | ||
| 731 | flags); | ||
| 732 | if (ret < 0) | ||
| 733 | return ret; | ||
| 734 | } | ||
| 735 | |||
| 736 | return 0; | ||
| 737 | } | ||
| 738 | |||
| 739 | #define OCFS2_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC) | ||
| 740 | |||
| 741 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 742 | u64 map_start, u64 map_len) | ||
| 743 | { | ||
| 744 | int ret, is_last; | ||
| 745 | u32 mapping_end, cpos; | ||
| 746 | unsigned int hole_size; | ||
| 747 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 748 | u64 len_bytes, phys_bytes, virt_bytes; | ||
| 749 | struct buffer_head *di_bh = NULL; | ||
| 750 | struct ocfs2_extent_rec rec; | ||
| 751 | |||
| 752 | ret = fiemap_check_flags(fieinfo, OCFS2_FIEMAP_FLAGS); | ||
| 753 | if (ret) | ||
| 754 | return ret; | ||
| 755 | |||
| 756 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
| 757 | if (ret) { | ||
| 758 | mlog_errno(ret); | ||
| 759 | goto out; | ||
| 760 | } | ||
| 761 | |||
| 762 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 763 | |||
| 764 | /* | ||
| 765 | * Handle inline-data separately. | ||
| 766 | */ | ||
| 767 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 768 | ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start); | ||
| 769 | goto out_unlock; | ||
| 770 | } | ||
| 771 | |||
| 772 | cpos = map_start >> osb->s_clustersize_bits; | ||
| 773 | mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, | ||
| 774 | map_start + map_len); | ||
| 775 | mapping_end -= cpos; | ||
| 776 | is_last = 0; | ||
| 777 | while (cpos < mapping_end && !is_last) { | ||
| 778 | u32 fe_flags; | ||
| 779 | |||
| 780 | ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, | ||
| 781 | &hole_size, &rec, &is_last); | ||
| 782 | if (ret) { | ||
| 783 | mlog_errno(ret); | ||
| 784 | goto out; | ||
| 785 | } | ||
| 786 | |||
| 787 | if (rec.e_blkno == 0ULL) { | ||
| 788 | cpos += hole_size; | ||
| 789 | continue; | ||
| 790 | } | ||
| 791 | |||
| 792 | fe_flags = 0; | ||
| 793 | if (rec.e_flags & OCFS2_EXT_UNWRITTEN) | ||
| 794 | fe_flags |= FIEMAP_EXTENT_UNWRITTEN; | ||
| 795 | if (is_last) | ||
| 796 | fe_flags |= FIEMAP_EXTENT_LAST; | ||
| 797 | len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits; | ||
| 798 | phys_bytes = le64_to_cpu(rec.e_blkno) << osb->sb->s_blocksize_bits; | ||
| 799 | virt_bytes = (u64)le32_to_cpu(rec.e_cpos) << osb->s_clustersize_bits; | ||
| 800 | |||
| 801 | ret = fiemap_fill_next_extent(fieinfo, virt_bytes, phys_bytes, | ||
| 802 | len_bytes, fe_flags); | ||
| 803 | if (ret) | ||
| 804 | break; | ||
| 805 | |||
| 806 | cpos = le32_to_cpu(rec.e_cpos)+ le16_to_cpu(rec.e_leaf_clusters); | ||
| 807 | } | ||
| 808 | |||
| 809 | if (ret > 0) | ||
| 810 | ret = 0; | ||
| 811 | |||
| 812 | out_unlock: | ||
| 813 | brelse(di_bh); | ||
| 814 | |||
| 815 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 816 | |||
| 817 | ocfs2_inode_unlock(inode, 0); | ||
| 818 | out: | ||
| 819 | |||
| 820 | return ret; | ||
| 821 | } | ||
diff --git a/fs/ocfs2/extent_map.h b/fs/ocfs2/extent_map.h index de91e3e41a22..1c4aa8b06f34 100644 --- a/fs/ocfs2/extent_map.h +++ b/fs/ocfs2/extent_map.h | |||
| @@ -50,4 +50,11 @@ int ocfs2_get_clusters(struct inode *inode, u32 v_cluster, u32 *p_cluster, | |||
| 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, | 50 | int ocfs2_extent_map_get_blocks(struct inode *inode, u64 v_blkno, u64 *p_blkno, |
| 51 | u64 *ret_count, unsigned int *extent_flags); | 51 | u64 *ret_count, unsigned int *extent_flags); |
| 52 | 52 | ||
| 53 | int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | ||
| 54 | u64 map_start, u64 map_len); | ||
| 55 | |||
| 56 | int ocfs2_xattr_get_clusters(struct inode *inode, u32 v_cluster, | ||
| 57 | u32 *p_cluster, u32 *num_clusters, | ||
| 58 | struct ocfs2_extent_list *el); | ||
| 59 | |||
| 53 | #endif /* _EXTENT_MAP_H */ | 60 | #endif /* _EXTENT_MAP_H */ |
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index ec2ed15c3daa..8d3225a78073 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c | |||
| @@ -55,6 +55,7 @@ | |||
| 55 | #include "mmap.h" | 55 | #include "mmap.h" |
| 56 | #include "suballoc.h" | 56 | #include "suballoc.h" |
| 57 | #include "super.h" | 57 | #include "super.h" |
| 58 | #include "xattr.h" | ||
| 58 | 59 | ||
| 59 | #include "buffer_head_io.h" | 60 | #include "buffer_head_io.h" |
| 60 | 61 | ||
| @@ -184,7 +185,7 @@ static int ocfs2_sync_file(struct file *file, | |||
| 184 | goto bail; | 185 | goto bail; |
| 185 | 186 | ||
| 186 | journal = osb->journal->j_journal; | 187 | journal = osb->journal->j_journal; |
| 187 | err = journal_force_commit(journal); | 188 | err = jbd2_journal_force_commit(journal); |
| 188 | 189 | ||
| 189 | bail: | 190 | bail: |
| 190 | mlog_exit(err); | 191 | mlog_exit(err); |
| @@ -488,7 +489,7 @@ bail: | |||
| 488 | } | 489 | } |
| 489 | 490 | ||
| 490 | /* | 491 | /* |
| 491 | * extend allocation only here. | 492 | * extend file allocation only here. |
| 492 | * we'll update all the disk stuff, and oip->alloc_size | 493 | * we'll update all the disk stuff, and oip->alloc_size |
| 493 | * | 494 | * |
| 494 | * expect stuff to be locked, a transaction started and enough data / | 495 | * expect stuff to be locked, a transaction started and enough data / |
| @@ -497,189 +498,25 @@ bail: | |||
| 497 | * Will return -EAGAIN, and a reason if a restart is needed. | 498 | * Will return -EAGAIN, and a reason if a restart is needed. |
| 498 | * If passed in, *reason will always be set, even in error. | 499 | * If passed in, *reason will always be set, even in error. |
| 499 | */ | 500 | */ |
| 500 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 501 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
| 501 | struct inode *inode, | 502 | struct inode *inode, |
| 502 | u32 *logical_offset, | 503 | u32 *logical_offset, |
| 503 | u32 clusters_to_add, | 504 | u32 clusters_to_add, |
| 504 | int mark_unwritten, | 505 | int mark_unwritten, |
| 505 | struct buffer_head *fe_bh, | 506 | struct buffer_head *fe_bh, |
| 506 | handle_t *handle, | 507 | handle_t *handle, |
| 507 | struct ocfs2_alloc_context *data_ac, | 508 | struct ocfs2_alloc_context *data_ac, |
| 508 | struct ocfs2_alloc_context *meta_ac, | 509 | struct ocfs2_alloc_context *meta_ac, |
| 509 | enum ocfs2_alloc_restarted *reason_ret) | 510 | enum ocfs2_alloc_restarted *reason_ret) |
| 510 | { | 511 | { |
| 511 | int status = 0; | 512 | int ret; |
| 512 | int free_extents; | 513 | struct ocfs2_extent_tree et; |
| 513 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data; | ||
| 514 | enum ocfs2_alloc_restarted reason = RESTART_NONE; | ||
| 515 | u32 bit_off, num_bits; | ||
| 516 | u64 block; | ||
| 517 | u8 flags = 0; | ||
| 518 | |||
| 519 | BUG_ON(!clusters_to_add); | ||
| 520 | |||
| 521 | if (mark_unwritten) | ||
| 522 | flags = OCFS2_EXT_UNWRITTEN; | ||
| 523 | |||
| 524 | free_extents = ocfs2_num_free_extents(osb, inode, fe); | ||
| 525 | if (free_extents < 0) { | ||
| 526 | status = free_extents; | ||
| 527 | mlog_errno(status); | ||
| 528 | goto leave; | ||
| 529 | } | ||
| 530 | |||
| 531 | /* there are two cases which could cause us to EAGAIN in the | ||
| 532 | * we-need-more-metadata case: | ||
| 533 | * 1) we haven't reserved *any* | ||
| 534 | * 2) we are so fragmented, we've needed to add metadata too | ||
| 535 | * many times. */ | ||
| 536 | if (!free_extents && !meta_ac) { | ||
| 537 | mlog(0, "we haven't reserved any metadata!\n"); | ||
| 538 | status = -EAGAIN; | ||
| 539 | reason = RESTART_META; | ||
| 540 | goto leave; | ||
| 541 | } else if ((!free_extents) | ||
| 542 | && (ocfs2_alloc_context_bits_left(meta_ac) | ||
| 543 | < ocfs2_extend_meta_needed(fe))) { | ||
| 544 | mlog(0, "filesystem is really fragmented...\n"); | ||
| 545 | status = -EAGAIN; | ||
| 546 | reason = RESTART_META; | ||
| 547 | goto leave; | ||
| 548 | } | ||
| 549 | |||
| 550 | status = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
| 551 | clusters_to_add, &bit_off, &num_bits); | ||
| 552 | if (status < 0) { | ||
| 553 | if (status != -ENOSPC) | ||
| 554 | mlog_errno(status); | ||
| 555 | goto leave; | ||
| 556 | } | ||
| 557 | |||
| 558 | BUG_ON(num_bits > clusters_to_add); | ||
| 559 | |||
| 560 | /* reserve our write early -- insert_extent may update the inode */ | ||
| 561 | status = ocfs2_journal_access(handle, inode, fe_bh, | ||
| 562 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 563 | if (status < 0) { | ||
| 564 | mlog_errno(status); | ||
| 565 | goto leave; | ||
| 566 | } | ||
| 567 | |||
| 568 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
| 569 | mlog(0, "Allocating %u clusters at block %u for inode %llu\n", | ||
| 570 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 571 | status = ocfs2_insert_extent(osb, handle, inode, fe_bh, | ||
| 572 | *logical_offset, block, num_bits, | ||
| 573 | flags, meta_ac); | ||
| 574 | if (status < 0) { | ||
| 575 | mlog_errno(status); | ||
| 576 | goto leave; | ||
| 577 | } | ||
| 578 | |||
| 579 | status = ocfs2_journal_dirty(handle, fe_bh); | ||
| 580 | if (status < 0) { | ||
| 581 | mlog_errno(status); | ||
| 582 | goto leave; | ||
| 583 | } | ||
| 584 | |||
| 585 | clusters_to_add -= num_bits; | ||
| 586 | *logical_offset += num_bits; | ||
| 587 | |||
| 588 | if (clusters_to_add) { | ||
| 589 | mlog(0, "need to alloc once more, clusters = %u, wanted = " | ||
| 590 | "%u\n", fe->i_clusters, clusters_to_add); | ||
| 591 | status = -EAGAIN; | ||
| 592 | reason = RESTART_TRANS; | ||
| 593 | } | ||
| 594 | |||
| 595 | leave: | ||
| 596 | mlog_exit(status); | ||
| 597 | if (reason_ret) | ||
| 598 | *reason_ret = reason; | ||
| 599 | return status; | ||
| 600 | } | ||
| 601 | |||
| 602 | /* | ||
| 603 | * For a given allocation, determine which allocators will need to be | ||
| 604 | * accessed, and lock them, reserving the appropriate number of bits. | ||
| 605 | * | ||
| 606 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
| 607 | * and ocfs2_allocate_unwritten_extents(). | ||
| 608 | * | ||
| 609 | * File systems which don't support holes call this from | ||
| 610 | * ocfs2_extend_allocation(). | ||
| 611 | */ | ||
| 612 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
| 613 | u32 clusters_to_add, u32 extents_to_split, | ||
| 614 | struct ocfs2_alloc_context **data_ac, | ||
| 615 | struct ocfs2_alloc_context **meta_ac) | ||
| 616 | { | ||
| 617 | int ret = 0, num_free_extents; | ||
| 618 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
| 619 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 620 | |||
| 621 | *meta_ac = NULL; | ||
| 622 | if (data_ac) | ||
| 623 | *data_ac = NULL; | ||
| 624 | |||
| 625 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
| 626 | |||
| 627 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " | ||
| 628 | "clusters_to_add = %u, extents_to_split = %u\n", | ||
| 629 | (unsigned long long)OCFS2_I(inode)->ip_blkno, (long long)i_size_read(inode), | ||
| 630 | le32_to_cpu(di->i_clusters), clusters_to_add, extents_to_split); | ||
| 631 | |||
| 632 | num_free_extents = ocfs2_num_free_extents(osb, inode, di); | ||
| 633 | if (num_free_extents < 0) { | ||
| 634 | ret = num_free_extents; | ||
| 635 | mlog_errno(ret); | ||
| 636 | goto out; | ||
| 637 | } | ||
| 638 | |||
| 639 | /* | ||
| 640 | * Sparse allocation file systems need to be more conservative | ||
| 641 | * with reserving room for expansion - the actual allocation | ||
| 642 | * happens while we've got a journal handle open so re-taking | ||
| 643 | * a cluster lock (because we ran out of room for another | ||
| 644 | * extent) will violate ordering rules. | ||
| 645 | * | ||
| 646 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
| 647 | * anyway. | ||
| 648 | * | ||
| 649 | * Always lock for any unwritten extents - we might want to | ||
| 650 | * add blocks during a split. | ||
| 651 | */ | ||
| 652 | if (!num_free_extents || | ||
| 653 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
| 654 | ret = ocfs2_reserve_new_metadata(osb, di, meta_ac); | ||
| 655 | if (ret < 0) { | ||
| 656 | if (ret != -ENOSPC) | ||
| 657 | mlog_errno(ret); | ||
| 658 | goto out; | ||
| 659 | } | ||
| 660 | } | ||
| 661 | |||
| 662 | if (clusters_to_add == 0) | ||
| 663 | goto out; | ||
| 664 | |||
| 665 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
| 666 | if (ret < 0) { | ||
| 667 | if (ret != -ENOSPC) | ||
| 668 | mlog_errno(ret); | ||
| 669 | goto out; | ||
| 670 | } | ||
| 671 | |||
| 672 | out: | ||
| 673 | if (ret) { | ||
| 674 | if (*meta_ac) { | ||
| 675 | ocfs2_free_alloc_context(*meta_ac); | ||
| 676 | *meta_ac = NULL; | ||
| 677 | } | ||
| 678 | 514 | ||
| 679 | /* | 515 | ocfs2_init_dinode_extent_tree(&et, inode, fe_bh); |
| 680 | * We cannot have an error and a non null *data_ac. | 516 | ret = ocfs2_add_clusters_in_btree(osb, inode, logical_offset, |
| 681 | */ | 517 | clusters_to_add, mark_unwritten, |
| 682 | } | 518 | &et, handle, |
| 519 | data_ac, meta_ac, reason_ret); | ||
| 683 | 520 | ||
| 684 | return ret; | 521 | return ret; |
| 685 | } | 522 | } |
| @@ -698,6 +535,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
| 698 | struct ocfs2_alloc_context *meta_ac = NULL; | 535 | struct ocfs2_alloc_context *meta_ac = NULL; |
| 699 | enum ocfs2_alloc_restarted why; | 536 | enum ocfs2_alloc_restarted why; |
| 700 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | 537 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); |
| 538 | struct ocfs2_extent_tree et; | ||
| 701 | 539 | ||
| 702 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); | 540 | mlog_entry("(clusters_to_add = %u)\n", clusters_to_add); |
| 703 | 541 | ||
| @@ -707,8 +545,7 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
| 707 | */ | 545 | */ |
| 708 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); | 546 | BUG_ON(mark_unwritten && !ocfs2_sparse_alloc(osb)); |
| 709 | 547 | ||
| 710 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, &bh, | 548 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, &bh); |
| 711 | OCFS2_BH_CACHED, inode); | ||
| 712 | if (status < 0) { | 549 | if (status < 0) { |
| 713 | mlog_errno(status); | 550 | mlog_errno(status); |
| 714 | goto leave; | 551 | goto leave; |
| @@ -724,14 +561,21 @@ static int __ocfs2_extend_allocation(struct inode *inode, u32 logical_start, | |||
| 724 | restart_all: | 561 | restart_all: |
| 725 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); | 562 | BUG_ON(le32_to_cpu(fe->i_clusters) != OCFS2_I(inode)->ip_clusters); |
| 726 | 563 | ||
| 727 | status = ocfs2_lock_allocators(inode, fe, clusters_to_add, 0, &data_ac, | 564 | mlog(0, "extend inode %llu, i_size = %lld, di->i_clusters = %u, " |
| 728 | &meta_ac); | 565 | "clusters_to_add = %u\n", |
| 566 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 567 | (long long)i_size_read(inode), le32_to_cpu(fe->i_clusters), | ||
| 568 | clusters_to_add); | ||
| 569 | ocfs2_init_dinode_extent_tree(&et, inode, bh); | ||
| 570 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
| 571 | &data_ac, &meta_ac); | ||
| 729 | if (status) { | 572 | if (status) { |
| 730 | mlog_errno(status); | 573 | mlog_errno(status); |
| 731 | goto leave; | 574 | goto leave; |
| 732 | } | 575 | } |
| 733 | 576 | ||
| 734 | credits = ocfs2_calc_extend_credits(osb->sb, fe, clusters_to_add); | 577 | credits = ocfs2_calc_extend_credits(osb->sb, &fe->id2.i_list, |
| 578 | clusters_to_add); | ||
| 735 | handle = ocfs2_start_trans(osb, credits); | 579 | handle = ocfs2_start_trans(osb, credits); |
| 736 | if (IS_ERR(handle)) { | 580 | if (IS_ERR(handle)) { |
| 737 | status = PTR_ERR(handle); | 581 | status = PTR_ERR(handle); |
| @@ -753,16 +597,16 @@ restarted_transaction: | |||
| 753 | 597 | ||
| 754 | prev_clusters = OCFS2_I(inode)->ip_clusters; | 598 | prev_clusters = OCFS2_I(inode)->ip_clusters; |
| 755 | 599 | ||
| 756 | status = ocfs2_do_extend_allocation(osb, | 600 | status = ocfs2_add_inode_data(osb, |
| 757 | inode, | 601 | inode, |
| 758 | &logical_start, | 602 | &logical_start, |
| 759 | clusters_to_add, | 603 | clusters_to_add, |
| 760 | mark_unwritten, | 604 | mark_unwritten, |
| 761 | bh, | 605 | bh, |
| 762 | handle, | 606 | handle, |
| 763 | data_ac, | 607 | data_ac, |
| 764 | meta_ac, | 608 | meta_ac, |
| 765 | &why); | 609 | &why); |
| 766 | if ((status < 0) && (status != -EAGAIN)) { | 610 | if ((status < 0) && (status != -EAGAIN)) { |
| 767 | if (status != -ENOSPC) | 611 | if (status != -ENOSPC) |
| 768 | mlog_errno(status); | 612 | mlog_errno(status); |
| @@ -789,7 +633,7 @@ restarted_transaction: | |||
| 789 | mlog(0, "restarting transaction.\n"); | 633 | mlog(0, "restarting transaction.\n"); |
| 790 | /* TODO: This can be more intelligent. */ | 634 | /* TODO: This can be more intelligent. */ |
| 791 | credits = ocfs2_calc_extend_credits(osb->sb, | 635 | credits = ocfs2_calc_extend_credits(osb->sb, |
| 792 | fe, | 636 | &fe->id2.i_list, |
| 793 | clusters_to_add); | 637 | clusters_to_add); |
| 794 | status = ocfs2_extend_trans(handle, credits); | 638 | status = ocfs2_extend_trans(handle, credits); |
| 795 | if (status < 0) { | 639 | if (status < 0) { |
| @@ -826,10 +670,8 @@ leave: | |||
| 826 | restart_func = 0; | 670 | restart_func = 0; |
| 827 | goto restart_all; | 671 | goto restart_all; |
| 828 | } | 672 | } |
| 829 | if (bh) { | 673 | brelse(bh); |
| 830 | brelse(bh); | 674 | bh = NULL; |
| 831 | bh = NULL; | ||
| 832 | } | ||
| 833 | 675 | ||
| 834 | mlog_exit(status); | 676 | mlog_exit(status); |
| 835 | return status; | 677 | return status; |
| @@ -1096,9 +938,15 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr) | |||
| 1096 | goto bail_unlock; | 938 | goto bail_unlock; |
| 1097 | } | 939 | } |
| 1098 | 940 | ||
| 1099 | if (i_size_read(inode) > attr->ia_size) | 941 | if (i_size_read(inode) > attr->ia_size) { |
| 942 | if (ocfs2_should_order_data(inode)) { | ||
| 943 | status = ocfs2_begin_ordered_truncate(inode, | ||
| 944 | attr->ia_size); | ||
| 945 | if (status) | ||
| 946 | goto bail_unlock; | ||
| 947 | } | ||
| 1100 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); | 948 | status = ocfs2_truncate_file(inode, bh, attr->ia_size); |
| 1101 | else | 949 | } else |
| 1102 | status = ocfs2_extend_file(inode, bh, attr->ia_size); | 950 | status = ocfs2_extend_file(inode, bh, attr->ia_size); |
| 1103 | if (status < 0) { | 951 | if (status < 0) { |
| 1104 | if (status != -ENOSPC) | 952 | if (status != -ENOSPC) |
| @@ -1140,8 +988,7 @@ bail_unlock_rw: | |||
| 1140 | if (size_change) | 988 | if (size_change) |
| 1141 | ocfs2_rw_unlock(inode, 1); | 989 | ocfs2_rw_unlock(inode, 1); |
| 1142 | bail: | 990 | bail: |
| 1143 | if (bh) | 991 | brelse(bh); |
| 1144 | brelse(bh); | ||
| 1145 | 992 | ||
| 1146 | mlog_exit(status); | 993 | mlog_exit(status); |
| 1147 | return status; | 994 | return status; |
| @@ -1284,8 +1131,7 @@ static int ocfs2_write_remove_suid(struct inode *inode) | |||
| 1284 | struct buffer_head *bh = NULL; | 1131 | struct buffer_head *bh = NULL; |
| 1285 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 1132 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| 1286 | 1133 | ||
| 1287 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1134 | ret = ocfs2_read_block(inode, oi->ip_blkno, &bh); |
| 1288 | oi->ip_blkno, &bh, OCFS2_BH_CACHED, inode); | ||
| 1289 | if (ret < 0) { | 1135 | if (ret < 0) { |
| 1290 | mlog_errno(ret); | 1136 | mlog_errno(ret); |
| 1291 | goto out; | 1137 | goto out; |
| @@ -1311,9 +1157,8 @@ static int ocfs2_allocate_unwritten_extents(struct inode *inode, | |||
| 1311 | struct buffer_head *di_bh = NULL; | 1157 | struct buffer_head *di_bh = NULL; |
| 1312 | 1158 | ||
| 1313 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | 1159 | if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) { |
| 1314 | ret = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 1160 | ret = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, |
| 1315 | OCFS2_I(inode)->ip_blkno, &di_bh, | 1161 | &di_bh); |
| 1316 | OCFS2_BH_CACHED, inode); | ||
| 1317 | if (ret) { | 1162 | if (ret) { |
| 1318 | mlog_errno(ret); | 1163 | mlog_errno(ret); |
| 1319 | goto out; | 1164 | goto out; |
| @@ -1394,8 +1239,11 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
| 1394 | handle_t *handle; | 1239 | handle_t *handle; |
| 1395 | struct ocfs2_alloc_context *meta_ac = NULL; | 1240 | struct ocfs2_alloc_context *meta_ac = NULL; |
| 1396 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | 1241 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; |
| 1242 | struct ocfs2_extent_tree et; | ||
| 1397 | 1243 | ||
| 1398 | ret = ocfs2_lock_allocators(inode, di, 0, 1, NULL, &meta_ac); | 1244 | ocfs2_init_dinode_extent_tree(&et, inode, di_bh); |
| 1245 | |||
| 1246 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
| 1399 | if (ret) { | 1247 | if (ret) { |
| 1400 | mlog_errno(ret); | 1248 | mlog_errno(ret); |
| 1401 | return ret; | 1249 | return ret; |
| @@ -1425,7 +1273,7 @@ static int __ocfs2_remove_inode_range(struct inode *inode, | |||
| 1425 | goto out; | 1273 | goto out; |
| 1426 | } | 1274 | } |
| 1427 | 1275 | ||
| 1428 | ret = ocfs2_remove_extent(inode, di_bh, cpos, len, handle, meta_ac, | 1276 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, |
| 1429 | dealloc); | 1277 | dealloc); |
| 1430 | if (ret) { | 1278 | if (ret) { |
| 1431 | mlog_errno(ret); | 1279 | mlog_errno(ret); |
| @@ -2040,7 +1888,7 @@ out_dio: | |||
| 2040 | */ | 1888 | */ |
| 2041 | if (old_size != i_size_read(inode) || | 1889 | if (old_size != i_size_read(inode) || |
| 2042 | old_clusters != OCFS2_I(inode)->ip_clusters) { | 1890 | old_clusters != OCFS2_I(inode)->ip_clusters) { |
| 2043 | ret = journal_force_commit(osb->journal->j_journal); | 1891 | ret = jbd2_journal_force_commit(osb->journal->j_journal); |
| 2044 | if (ret < 0) | 1892 | if (ret < 0) |
| 2045 | written = ret; | 1893 | written = ret; |
| 2046 | } | 1894 | } |
| @@ -2227,7 +2075,12 @@ const struct inode_operations ocfs2_file_iops = { | |||
| 2227 | .setattr = ocfs2_setattr, | 2075 | .setattr = ocfs2_setattr, |
| 2228 | .getattr = ocfs2_getattr, | 2076 | .getattr = ocfs2_getattr, |
| 2229 | .permission = ocfs2_permission, | 2077 | .permission = ocfs2_permission, |
| 2078 | .setxattr = generic_setxattr, | ||
| 2079 | .getxattr = generic_getxattr, | ||
| 2080 | .listxattr = ocfs2_listxattr, | ||
| 2081 | .removexattr = generic_removexattr, | ||
| 2230 | .fallocate = ocfs2_fallocate, | 2082 | .fallocate = ocfs2_fallocate, |
| 2083 | .fiemap = ocfs2_fiemap, | ||
| 2231 | }; | 2084 | }; |
| 2232 | 2085 | ||
| 2233 | const struct inode_operations ocfs2_special_file_iops = { | 2086 | const struct inode_operations ocfs2_special_file_iops = { |
| @@ -2236,6 +2089,10 @@ const struct inode_operations ocfs2_special_file_iops = { | |||
| 2236 | .permission = ocfs2_permission, | 2089 | .permission = ocfs2_permission, |
| 2237 | }; | 2090 | }; |
| 2238 | 2091 | ||
| 2092 | /* | ||
| 2093 | * Other than ->lock, keep ocfs2_fops and ocfs2_dops in sync with | ||
| 2094 | * ocfs2_fops_no_plocks and ocfs2_dops_no_plocks! | ||
| 2095 | */ | ||
| 2239 | const struct file_operations ocfs2_fops = { | 2096 | const struct file_operations ocfs2_fops = { |
| 2240 | .llseek = generic_file_llseek, | 2097 | .llseek = generic_file_llseek, |
| 2241 | .read = do_sync_read, | 2098 | .read = do_sync_read, |
| @@ -2250,6 +2107,7 @@ const struct file_operations ocfs2_fops = { | |||
| 2250 | #ifdef CONFIG_COMPAT | 2107 | #ifdef CONFIG_COMPAT |
| 2251 | .compat_ioctl = ocfs2_compat_ioctl, | 2108 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2252 | #endif | 2109 | #endif |
| 2110 | .lock = ocfs2_lock, | ||
| 2253 | .flock = ocfs2_flock, | 2111 | .flock = ocfs2_flock, |
| 2254 | .splice_read = ocfs2_file_splice_read, | 2112 | .splice_read = ocfs2_file_splice_read, |
| 2255 | .splice_write = ocfs2_file_splice_write, | 2113 | .splice_write = ocfs2_file_splice_write, |
| @@ -2266,5 +2124,51 @@ const struct file_operations ocfs2_dops = { | |||
| 2266 | #ifdef CONFIG_COMPAT | 2124 | #ifdef CONFIG_COMPAT |
| 2267 | .compat_ioctl = ocfs2_compat_ioctl, | 2125 | .compat_ioctl = ocfs2_compat_ioctl, |
| 2268 | #endif | 2126 | #endif |
| 2127 | .lock = ocfs2_lock, | ||
| 2128 | .flock = ocfs2_flock, | ||
| 2129 | }; | ||
| 2130 | |||
| 2131 | /* | ||
| 2132 | * POSIX-lockless variants of our file_operations. | ||
| 2133 | * | ||
| 2134 | * These will be used if the underlying cluster stack does not support | ||
| 2135 | * posix file locking, if the user passes the "localflocks" mount | ||
| 2136 | * option, or if we have a local-only fs. | ||
| 2137 | * | ||
| 2138 | * ocfs2_flock is in here because all stacks handle UNIX file locks, | ||
| 2139 | * so we still want it in the case of no stack support for | ||
| 2140 | * plocks. Internally, it will do the right thing when asked to ignore | ||
| 2141 | * the cluster. | ||
| 2142 | */ | ||
| 2143 | const struct file_operations ocfs2_fops_no_plocks = { | ||
| 2144 | .llseek = generic_file_llseek, | ||
| 2145 | .read = do_sync_read, | ||
| 2146 | .write = do_sync_write, | ||
| 2147 | .mmap = ocfs2_mmap, | ||
| 2148 | .fsync = ocfs2_sync_file, | ||
| 2149 | .release = ocfs2_file_release, | ||
| 2150 | .open = ocfs2_file_open, | ||
| 2151 | .aio_read = ocfs2_file_aio_read, | ||
| 2152 | .aio_write = ocfs2_file_aio_write, | ||
| 2153 | .unlocked_ioctl = ocfs2_ioctl, | ||
| 2154 | #ifdef CONFIG_COMPAT | ||
| 2155 | .compat_ioctl = ocfs2_compat_ioctl, | ||
| 2156 | #endif | ||
| 2157 | .flock = ocfs2_flock, | ||
| 2158 | .splice_read = ocfs2_file_splice_read, | ||
| 2159 | .splice_write = ocfs2_file_splice_write, | ||
| 2160 | }; | ||
| 2161 | |||
| 2162 | const struct file_operations ocfs2_dops_no_plocks = { | ||
| 2163 | .llseek = generic_file_llseek, | ||
| 2164 | .read = generic_read_dir, | ||
| 2165 | .readdir = ocfs2_readdir, | ||
| 2166 | .fsync = ocfs2_sync_file, | ||
| 2167 | .release = ocfs2_dir_release, | ||
| 2168 | .open = ocfs2_dir_open, | ||
| 2169 | .unlocked_ioctl = ocfs2_ioctl, | ||
| 2170 | #ifdef CONFIG_COMPAT | ||
| 2171 | .compat_ioctl = ocfs2_compat_ioctl, | ||
| 2172 | #endif | ||
| 2269 | .flock = ocfs2_flock, | 2173 | .flock = ocfs2_flock, |
| 2270 | }; | 2174 | }; |
diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 1e27b4d017ea..e92382cbca5f 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h | |||
| @@ -28,9 +28,12 @@ | |||
| 28 | 28 | ||
| 29 | extern const struct file_operations ocfs2_fops; | 29 | extern const struct file_operations ocfs2_fops; |
| 30 | extern const struct file_operations ocfs2_dops; | 30 | extern const struct file_operations ocfs2_dops; |
| 31 | extern const struct file_operations ocfs2_fops_no_plocks; | ||
| 32 | extern const struct file_operations ocfs2_dops_no_plocks; | ||
| 31 | extern const struct inode_operations ocfs2_file_iops; | 33 | extern const struct inode_operations ocfs2_file_iops; |
| 32 | extern const struct inode_operations ocfs2_special_file_iops; | 34 | extern const struct inode_operations ocfs2_special_file_iops; |
| 33 | struct ocfs2_alloc_context; | 35 | struct ocfs2_alloc_context; |
| 36 | enum ocfs2_alloc_restarted; | ||
| 34 | 37 | ||
| 35 | struct ocfs2_file_private { | 38 | struct ocfs2_file_private { |
| 36 | struct file *fp_file; | 39 | struct file *fp_file; |
| @@ -38,27 +41,18 @@ struct ocfs2_file_private { | |||
| 38 | struct ocfs2_lock_res fp_flock; | 41 | struct ocfs2_lock_res fp_flock; |
| 39 | }; | 42 | }; |
| 40 | 43 | ||
| 41 | enum ocfs2_alloc_restarted { | 44 | int ocfs2_add_inode_data(struct ocfs2_super *osb, |
| 42 | RESTART_NONE = 0, | 45 | struct inode *inode, |
| 43 | RESTART_TRANS, | 46 | u32 *logical_offset, |
| 44 | RESTART_META | 47 | u32 clusters_to_add, |
| 45 | }; | 48 | int mark_unwritten, |
| 46 | int ocfs2_do_extend_allocation(struct ocfs2_super *osb, | 49 | struct buffer_head *fe_bh, |
| 47 | struct inode *inode, | 50 | handle_t *handle, |
| 48 | u32 *logical_offset, | 51 | struct ocfs2_alloc_context *data_ac, |
| 49 | u32 clusters_to_add, | 52 | struct ocfs2_alloc_context *meta_ac, |
| 50 | int mark_unwritten, | 53 | enum ocfs2_alloc_restarted *reason_ret); |
| 51 | struct buffer_head *fe_bh, | ||
| 52 | handle_t *handle, | ||
| 53 | struct ocfs2_alloc_context *data_ac, | ||
| 54 | struct ocfs2_alloc_context *meta_ac, | ||
| 55 | enum ocfs2_alloc_restarted *reason_ret); | ||
| 56 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, | 54 | int ocfs2_extend_no_holes(struct inode *inode, u64 new_i_size, |
| 57 | u64 zero_to); | 55 | u64 zero_to); |
| 58 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_dinode *di, | ||
| 59 | u32 clusters_to_add, u32 extents_to_split, | ||
| 60 | struct ocfs2_alloc_context **data_ac, | ||
| 61 | struct ocfs2_alloc_context **meta_ac); | ||
| 62 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); | 56 | int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); |
| 63 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, | 57 | int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, |
| 64 | struct kstat *stat); | 58 | struct kstat *stat); |
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c index 7e9e4c79aec7..4903688f72a9 100644 --- a/fs/ocfs2/inode.c +++ b/fs/ocfs2/inode.c | |||
| @@ -49,6 +49,7 @@ | |||
| 49 | #include "symlink.h" | 49 | #include "symlink.h" |
| 50 | #include "sysfile.h" | 50 | #include "sysfile.h" |
| 51 | #include "uptodate.h" | 51 | #include "uptodate.h" |
| 52 | #include "xattr.h" | ||
| 52 | 53 | ||
| 53 | #include "buffer_head_io.h" | 54 | #include "buffer_head_io.h" |
| 54 | 55 | ||
| @@ -219,6 +220,7 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 219 | struct super_block *sb; | 220 | struct super_block *sb; |
| 220 | struct ocfs2_super *osb; | 221 | struct ocfs2_super *osb; |
| 221 | int status = -EINVAL; | 222 | int status = -EINVAL; |
| 223 | int use_plocks = 1; | ||
| 222 | 224 | ||
| 223 | mlog_entry("(0x%p, size:%llu)\n", inode, | 225 | mlog_entry("(0x%p, size:%llu)\n", inode, |
| 224 | (unsigned long long)le64_to_cpu(fe->i_size)); | 226 | (unsigned long long)le64_to_cpu(fe->i_size)); |
| @@ -226,6 +228,10 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 226 | sb = inode->i_sb; | 228 | sb = inode->i_sb; |
| 227 | osb = OCFS2_SB(sb); | 229 | osb = OCFS2_SB(sb); |
| 228 | 230 | ||
| 231 | if ((osb->s_mount_opt & OCFS2_MOUNT_LOCALFLOCKS) || | ||
| 232 | ocfs2_mount_local(osb) || !ocfs2_stack_supports_plocks()) | ||
| 233 | use_plocks = 0; | ||
| 234 | |||
| 229 | /* this means that read_inode cannot create a superblock inode | 235 | /* this means that read_inode cannot create a superblock inode |
| 230 | * today. change if needed. */ | 236 | * today. change if needed. */ |
| 231 | if (!OCFS2_IS_VALID_DINODE(fe) || | 237 | if (!OCFS2_IS_VALID_DINODE(fe) || |
| @@ -295,13 +301,19 @@ int ocfs2_populate_inode(struct inode *inode, struct ocfs2_dinode *fe, | |||
| 295 | 301 | ||
| 296 | switch (inode->i_mode & S_IFMT) { | 302 | switch (inode->i_mode & S_IFMT) { |
| 297 | case S_IFREG: | 303 | case S_IFREG: |
| 298 | inode->i_fop = &ocfs2_fops; | 304 | if (use_plocks) |
| 305 | inode->i_fop = &ocfs2_fops; | ||
| 306 | else | ||
| 307 | inode->i_fop = &ocfs2_fops_no_plocks; | ||
| 299 | inode->i_op = &ocfs2_file_iops; | 308 | inode->i_op = &ocfs2_file_iops; |
| 300 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 309 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
| 301 | break; | 310 | break; |
| 302 | case S_IFDIR: | 311 | case S_IFDIR: |
| 303 | inode->i_op = &ocfs2_dir_iops; | 312 | inode->i_op = &ocfs2_dir_iops; |
| 304 | inode->i_fop = &ocfs2_dops; | 313 | if (use_plocks) |
| 314 | inode->i_fop = &ocfs2_dops; | ||
| 315 | else | ||
| 316 | inode->i_fop = &ocfs2_dops_no_plocks; | ||
| 305 | i_size_write(inode, le64_to_cpu(fe->i_size)); | 317 | i_size_write(inode, le64_to_cpu(fe->i_size)); |
| 306 | break; | 318 | break; |
| 307 | case S_IFLNK: | 319 | case S_IFLNK: |
| @@ -448,8 +460,11 @@ static int ocfs2_read_locked_inode(struct inode *inode, | |||
| 448 | } | 460 | } |
| 449 | } | 461 | } |
| 450 | 462 | ||
| 451 | status = ocfs2_read_block(osb, args->fi_blkno, &bh, 0, | 463 | if (can_lock) |
| 452 | can_lock ? inode : NULL); | 464 | status = ocfs2_read_blocks(inode, args->fi_blkno, 1, &bh, |
| 465 | OCFS2_BH_IGNORE_CACHE); | ||
| 466 | else | ||
| 467 | status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh); | ||
| 453 | if (status < 0) { | 468 | if (status < 0) { |
| 454 | mlog_errno(status); | 469 | mlog_errno(status); |
| 455 | goto bail; | 470 | goto bail; |
| @@ -522,6 +537,9 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb, | |||
| 522 | * data and fast symlinks. | 537 | * data and fast symlinks. |
| 523 | */ | 538 | */ |
| 524 | if (fe->i_clusters) { | 539 | if (fe->i_clusters) { |
| 540 | if (ocfs2_should_order_data(inode)) | ||
| 541 | ocfs2_begin_ordered_truncate(inode, 0); | ||
| 542 | |||
| 525 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); | 543 | handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); |
| 526 | if (IS_ERR(handle)) { | 544 | if (IS_ERR(handle)) { |
| 527 | status = PTR_ERR(handle); | 545 | status = PTR_ERR(handle); |
| @@ -730,6 +748,13 @@ static int ocfs2_wipe_inode(struct inode *inode, | |||
| 730 | goto bail_unlock_dir; | 748 | goto bail_unlock_dir; |
| 731 | } | 749 | } |
| 732 | 750 | ||
| 751 | /*Free extended attribute resources associated with this inode.*/ | ||
| 752 | status = ocfs2_xattr_remove(inode, di_bh); | ||
| 753 | if (status < 0) { | ||
| 754 | mlog_errno(status); | ||
| 755 | goto bail_unlock_dir; | ||
| 756 | } | ||
| 757 | |||
| 733 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, | 758 | status = ocfs2_remove_inode(inode, di_bh, orphan_dir_inode, |
| 734 | orphan_dir_bh); | 759 | orphan_dir_bh); |
| 735 | if (status < 0) | 760 | if (status < 0) |
| @@ -1081,6 +1106,8 @@ void ocfs2_clear_inode(struct inode *inode) | |||
| 1081 | oi->ip_last_trans = 0; | 1106 | oi->ip_last_trans = 0; |
| 1082 | oi->ip_dir_start_lookup = 0; | 1107 | oi->ip_dir_start_lookup = 0; |
| 1083 | oi->ip_blkno = 0ULL; | 1108 | oi->ip_blkno = 0ULL; |
| 1109 | jbd2_journal_release_jbd_inode(OCFS2_SB(inode->i_sb)->journal->j_journal, | ||
| 1110 | &oi->ip_jinode); | ||
| 1084 | 1111 | ||
| 1085 | bail: | 1112 | bail: |
| 1086 | mlog_exit_void(); | 1113 | mlog_exit_void(); |
| @@ -1107,58 +1134,6 @@ void ocfs2_drop_inode(struct inode *inode) | |||
| 1107 | } | 1134 | } |
| 1108 | 1135 | ||
| 1109 | /* | 1136 | /* |
| 1110 | * TODO: this should probably be merged into ocfs2_get_block | ||
| 1111 | * | ||
| 1112 | * However, you now need to pay attention to the cont_prepare_write() | ||
| 1113 | * stuff in ocfs2_get_block (that is, ocfs2_get_block pretty much | ||
| 1114 | * expects never to extend). | ||
| 1115 | */ | ||
| 1116 | struct buffer_head *ocfs2_bread(struct inode *inode, | ||
| 1117 | int block, int *err, int reada) | ||
| 1118 | { | ||
| 1119 | struct buffer_head *bh = NULL; | ||
| 1120 | int tmperr; | ||
| 1121 | u64 p_blkno; | ||
| 1122 | int readflags = OCFS2_BH_CACHED; | ||
| 1123 | |||
| 1124 | if (reada) | ||
| 1125 | readflags |= OCFS2_BH_READAHEAD; | ||
| 1126 | |||
| 1127 | if (((u64)block << inode->i_sb->s_blocksize_bits) >= | ||
| 1128 | i_size_read(inode)) { | ||
| 1129 | BUG_ON(!reada); | ||
| 1130 | return NULL; | ||
| 1131 | } | ||
| 1132 | |||
| 1133 | down_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1134 | tmperr = ocfs2_extent_map_get_blocks(inode, block, &p_blkno, NULL, | ||
| 1135 | NULL); | ||
| 1136 | up_read(&OCFS2_I(inode)->ip_alloc_sem); | ||
| 1137 | if (tmperr < 0) { | ||
| 1138 | mlog_errno(tmperr); | ||
| 1139 | goto fail; | ||
| 1140 | } | ||
| 1141 | |||
| 1142 | tmperr = ocfs2_read_block(OCFS2_SB(inode->i_sb), p_blkno, &bh, | ||
| 1143 | readflags, inode); | ||
| 1144 | if (tmperr < 0) | ||
| 1145 | goto fail; | ||
| 1146 | |||
| 1147 | tmperr = 0; | ||
| 1148 | |||
| 1149 | *err = 0; | ||
| 1150 | return bh; | ||
| 1151 | |||
| 1152 | fail: | ||
| 1153 | if (bh) { | ||
| 1154 | brelse(bh); | ||
| 1155 | bh = NULL; | ||
| 1156 | } | ||
| 1157 | *err = -EIO; | ||
| 1158 | return NULL; | ||
| 1159 | } | ||
| 1160 | |||
| 1161 | /* | ||
| 1162 | * This is called from our getattr. | 1137 | * This is called from our getattr. |
| 1163 | */ | 1138 | */ |
| 1164 | int ocfs2_inode_revalidate(struct dentry *dentry) | 1139 | int ocfs2_inode_revalidate(struct dentry *dentry) |
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index 390a85596aa0..2f37af9bcc4a 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h | |||
| @@ -40,6 +40,9 @@ struct ocfs2_inode_info | |||
| 40 | /* protects allocation changes on this inode. */ | 40 | /* protects allocation changes on this inode. */ |
| 41 | struct rw_semaphore ip_alloc_sem; | 41 | struct rw_semaphore ip_alloc_sem; |
| 42 | 42 | ||
| 43 | /* protects extended attribute changes on this inode */ | ||
| 44 | struct rw_semaphore ip_xattr_sem; | ||
| 45 | |||
| 43 | /* These fields are protected by ip_lock */ | 46 | /* These fields are protected by ip_lock */ |
| 44 | spinlock_t ip_lock; | 47 | spinlock_t ip_lock; |
| 45 | u32 ip_open_count; | 48 | u32 ip_open_count; |
| @@ -68,6 +71,7 @@ struct ocfs2_inode_info | |||
| 68 | struct ocfs2_extent_map ip_extent_map; | 71 | struct ocfs2_extent_map ip_extent_map; |
| 69 | 72 | ||
| 70 | struct inode vfs_inode; | 73 | struct inode vfs_inode; |
| 74 | struct jbd2_inode ip_jinode; | ||
| 71 | }; | 75 | }; |
| 72 | 76 | ||
| 73 | /* | 77 | /* |
| @@ -113,8 +117,6 @@ extern struct kmem_cache *ocfs2_inode_cache; | |||
| 113 | 117 | ||
| 114 | extern const struct address_space_operations ocfs2_aops; | 118 | extern const struct address_space_operations ocfs2_aops; |
| 115 | 119 | ||
| 116 | struct buffer_head *ocfs2_bread(struct inode *inode, int block, | ||
| 117 | int *err, int reada); | ||
| 118 | void ocfs2_clear_inode(struct inode *inode); | 120 | void ocfs2_clear_inode(struct inode *inode); |
| 119 | void ocfs2_delete_inode(struct inode *inode); | 121 | void ocfs2_delete_inode(struct inode *inode); |
| 120 | void ocfs2_drop_inode(struct inode *inode); | 122 | void ocfs2_drop_inode(struct inode *inode); |
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c index 7b142f0ce995..9fcd36dcc9a0 100644 --- a/fs/ocfs2/ioctl.c +++ b/fs/ocfs2/ioctl.c | |||
| @@ -102,8 +102,7 @@ bail_unlock: | |||
| 102 | bail: | 102 | bail: |
| 103 | mutex_unlock(&inode->i_mutex); | 103 | mutex_unlock(&inode->i_mutex); |
| 104 | 104 | ||
| 105 | if (bh) | 105 | brelse(bh); |
| 106 | brelse(bh); | ||
| 107 | 106 | ||
| 108 | mlog_exit(status); | 107 | mlog_exit(status); |
| 109 | return status; | 108 | return status; |
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index c47bc2a809c2..81e40677eecb 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c | |||
| @@ -215,9 +215,9 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) | |||
| 215 | goto finally; | 215 | goto finally; |
| 216 | } | 216 | } |
| 217 | 217 | ||
| 218 | journal_lock_updates(journal->j_journal); | 218 | jbd2_journal_lock_updates(journal->j_journal); |
| 219 | status = journal_flush(journal->j_journal); | 219 | status = jbd2_journal_flush(journal->j_journal); |
| 220 | journal_unlock_updates(journal->j_journal); | 220 | jbd2_journal_unlock_updates(journal->j_journal); |
| 221 | if (status < 0) { | 221 | if (status < 0) { |
| 222 | up_write(&journal->j_trans_barrier); | 222 | up_write(&journal->j_trans_barrier); |
| 223 | mlog_errno(status); | 223 | mlog_errno(status); |
| @@ -264,7 +264,7 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) | |||
| 264 | 264 | ||
| 265 | down_read(&osb->journal->j_trans_barrier); | 265 | down_read(&osb->journal->j_trans_barrier); |
| 266 | 266 | ||
| 267 | handle = journal_start(journal, max_buffs); | 267 | handle = jbd2_journal_start(journal, max_buffs); |
| 268 | if (IS_ERR(handle)) { | 268 | if (IS_ERR(handle)) { |
| 269 | up_read(&osb->journal->j_trans_barrier); | 269 | up_read(&osb->journal->j_trans_barrier); |
| 270 | 270 | ||
| @@ -290,7 +290,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
| 290 | 290 | ||
| 291 | BUG_ON(!handle); | 291 | BUG_ON(!handle); |
| 292 | 292 | ||
| 293 | ret = journal_stop(handle); | 293 | ret = jbd2_journal_stop(handle); |
| 294 | if (ret < 0) | 294 | if (ret < 0) |
| 295 | mlog_errno(ret); | 295 | mlog_errno(ret); |
| 296 | 296 | ||
| @@ -304,7 +304,7 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, | |||
| 304 | * transaction. extend_trans will either extend the current handle by | 304 | * transaction. extend_trans will either extend the current handle by |
| 305 | * nblocks, or commit it and start a new one with nblocks credits. | 305 | * nblocks, or commit it and start a new one with nblocks credits. |
| 306 | * | 306 | * |
| 307 | * This might call journal_restart() which will commit dirty buffers | 307 | * This might call jbd2_journal_restart() which will commit dirty buffers |
| 308 | * and then restart the transaction. Before calling | 308 | * and then restart the transaction. Before calling |
| 309 | * ocfs2_extend_trans(), any changed blocks should have been | 309 | * ocfs2_extend_trans(), any changed blocks should have been |
| 310 | * dirtied. After calling it, all blocks which need to be changed must | 310 | * dirtied. After calling it, all blocks which need to be changed must |
| @@ -332,7 +332,7 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
| 332 | #ifdef CONFIG_OCFS2_DEBUG_FS | 332 | #ifdef CONFIG_OCFS2_DEBUG_FS |
| 333 | status = 1; | 333 | status = 1; |
| 334 | #else | 334 | #else |
| 335 | status = journal_extend(handle, nblocks); | 335 | status = jbd2_journal_extend(handle, nblocks); |
| 336 | if (status < 0) { | 336 | if (status < 0) { |
| 337 | mlog_errno(status); | 337 | mlog_errno(status); |
| 338 | goto bail; | 338 | goto bail; |
| @@ -340,8 +340,10 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) | |||
| 340 | #endif | 340 | #endif |
| 341 | 341 | ||
| 342 | if (status > 0) { | 342 | if (status > 0) { |
| 343 | mlog(0, "journal_extend failed, trying journal_restart\n"); | 343 | mlog(0, |
| 344 | status = journal_restart(handle, nblocks); | 344 | "jbd2_journal_extend failed, trying " |
| 345 | "jbd2_journal_restart\n"); | ||
| 346 | status = jbd2_journal_restart(handle, nblocks); | ||
| 345 | if (status < 0) { | 347 | if (status < 0) { |
| 346 | mlog_errno(status); | 348 | mlog_errno(status); |
| 347 | goto bail; | 349 | goto bail; |
| @@ -393,11 +395,11 @@ int ocfs2_journal_access(handle_t *handle, | |||
| 393 | switch (type) { | 395 | switch (type) { |
| 394 | case OCFS2_JOURNAL_ACCESS_CREATE: | 396 | case OCFS2_JOURNAL_ACCESS_CREATE: |
| 395 | case OCFS2_JOURNAL_ACCESS_WRITE: | 397 | case OCFS2_JOURNAL_ACCESS_WRITE: |
| 396 | status = journal_get_write_access(handle, bh); | 398 | status = jbd2_journal_get_write_access(handle, bh); |
| 397 | break; | 399 | break; |
| 398 | 400 | ||
| 399 | case OCFS2_JOURNAL_ACCESS_UNDO: | 401 | case OCFS2_JOURNAL_ACCESS_UNDO: |
| 400 | status = journal_get_undo_access(handle, bh); | 402 | status = jbd2_journal_get_undo_access(handle, bh); |
| 401 | break; | 403 | break; |
| 402 | 404 | ||
| 403 | default: | 405 | default: |
| @@ -422,7 +424,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
| 422 | mlog_entry("(bh->b_blocknr=%llu)\n", | 424 | mlog_entry("(bh->b_blocknr=%llu)\n", |
| 423 | (unsigned long long)bh->b_blocknr); | 425 | (unsigned long long)bh->b_blocknr); |
| 424 | 426 | ||
| 425 | status = journal_dirty_metadata(handle, bh); | 427 | status = jbd2_journal_dirty_metadata(handle, bh); |
| 426 | if (status < 0) | 428 | if (status < 0) |
| 427 | mlog(ML_ERROR, "Could not dirty metadata buffer. " | 429 | mlog(ML_ERROR, "Could not dirty metadata buffer. " |
| 428 | "(bh->b_blocknr=%llu)\n", | 430 | "(bh->b_blocknr=%llu)\n", |
| @@ -432,6 +434,7 @@ int ocfs2_journal_dirty(handle_t *handle, | |||
| 432 | return status; | 434 | return status; |
| 433 | } | 435 | } |
| 434 | 436 | ||
| 437 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
| 435 | int ocfs2_journal_dirty_data(handle_t *handle, | 438 | int ocfs2_journal_dirty_data(handle_t *handle, |
| 436 | struct buffer_head *bh) | 439 | struct buffer_head *bh) |
| 437 | { | 440 | { |
| @@ -443,8 +446,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 443 | 446 | ||
| 444 | return err; | 447 | return err; |
| 445 | } | 448 | } |
| 449 | #endif | ||
| 446 | 450 | ||
| 447 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD_DEFAULT_MAX_COMMIT_AGE) | 451 | #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) |
| 448 | 452 | ||
| 449 | void ocfs2_set_journal_params(struct ocfs2_super *osb) | 453 | void ocfs2_set_journal_params(struct ocfs2_super *osb) |
| 450 | { | 454 | { |
| @@ -457,9 +461,9 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) | |||
| 457 | spin_lock(&journal->j_state_lock); | 461 | spin_lock(&journal->j_state_lock); |
| 458 | journal->j_commit_interval = commit_interval; | 462 | journal->j_commit_interval = commit_interval; |
| 459 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) | 463 | if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) |
| 460 | journal->j_flags |= JFS_BARRIER; | 464 | journal->j_flags |= JBD2_BARRIER; |
| 461 | else | 465 | else |
| 462 | journal->j_flags &= ~JFS_BARRIER; | 466 | journal->j_flags &= ~JBD2_BARRIER; |
| 463 | spin_unlock(&journal->j_state_lock); | 467 | spin_unlock(&journal->j_state_lock); |
| 464 | } | 468 | } |
| 465 | 469 | ||
| @@ -524,14 +528,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) | |||
| 524 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); | 528 | mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); |
| 525 | 529 | ||
| 526 | /* call the kernels journal init function now */ | 530 | /* call the kernels journal init function now */ |
| 527 | j_journal = journal_init_inode(inode); | 531 | j_journal = jbd2_journal_init_inode(inode); |
| 528 | if (j_journal == NULL) { | 532 | if (j_journal == NULL) { |
| 529 | mlog(ML_ERROR, "Linux journal layer error\n"); | 533 | mlog(ML_ERROR, "Linux journal layer error\n"); |
| 530 | status = -EINVAL; | 534 | status = -EINVAL; |
| 531 | goto done; | 535 | goto done; |
| 532 | } | 536 | } |
| 533 | 537 | ||
| 534 | mlog(0, "Returned from journal_init_inode\n"); | 538 | mlog(0, "Returned from jbd2_journal_init_inode\n"); |
| 535 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); | 539 | mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); |
| 536 | 540 | ||
| 537 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & | 541 | *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & |
| @@ -550,8 +554,7 @@ done: | |||
| 550 | if (status < 0) { | 554 | if (status < 0) { |
| 551 | if (inode_lock) | 555 | if (inode_lock) |
| 552 | ocfs2_inode_unlock(inode, 1); | 556 | ocfs2_inode_unlock(inode, 1); |
| 553 | if (bh != NULL) | 557 | brelse(bh); |
| 554 | brelse(bh); | ||
| 555 | if (inode) { | 558 | if (inode) { |
| 556 | OCFS2_I(inode)->ip_open_count--; | 559 | OCFS2_I(inode)->ip_open_count--; |
| 557 | iput(inode); | 560 | iput(inode); |
| @@ -639,7 +642,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
| 639 | if (journal->j_state != OCFS2_JOURNAL_LOADED) | 642 | if (journal->j_state != OCFS2_JOURNAL_LOADED) |
| 640 | goto done; | 643 | goto done; |
| 641 | 644 | ||
| 642 | /* need to inc inode use count as journal_destroy will iput. */ | 645 | /* need to inc inode use count - jbd2_journal_destroy will iput. */ |
| 643 | if (!igrab(inode)) | 646 | if (!igrab(inode)) |
| 644 | BUG(); | 647 | BUG(); |
| 645 | 648 | ||
| @@ -668,9 +671,9 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
| 668 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); | 671 | BUG_ON(atomic_read(&(osb->journal->j_num_trans)) != 0); |
| 669 | 672 | ||
| 670 | if (ocfs2_mount_local(osb)) { | 673 | if (ocfs2_mount_local(osb)) { |
| 671 | journal_lock_updates(journal->j_journal); | 674 | jbd2_journal_lock_updates(journal->j_journal); |
| 672 | status = journal_flush(journal->j_journal); | 675 | status = jbd2_journal_flush(journal->j_journal); |
| 673 | journal_unlock_updates(journal->j_journal); | 676 | jbd2_journal_unlock_updates(journal->j_journal); |
| 674 | if (status < 0) | 677 | if (status < 0) |
| 675 | mlog_errno(status); | 678 | mlog_errno(status); |
| 676 | } | 679 | } |
| @@ -686,7 +689,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) | |||
| 686 | } | 689 | } |
| 687 | 690 | ||
| 688 | /* Shutdown the kernel journal system */ | 691 | /* Shutdown the kernel journal system */ |
| 689 | journal_destroy(journal->j_journal); | 692 | jbd2_journal_destroy(journal->j_journal); |
| 690 | 693 | ||
| 691 | OCFS2_I(inode)->ip_open_count--; | 694 | OCFS2_I(inode)->ip_open_count--; |
| 692 | 695 | ||
| @@ -711,15 +714,15 @@ static void ocfs2_clear_journal_error(struct super_block *sb, | |||
| 711 | { | 714 | { |
| 712 | int olderr; | 715 | int olderr; |
| 713 | 716 | ||
| 714 | olderr = journal_errno(journal); | 717 | olderr = jbd2_journal_errno(journal); |
| 715 | if (olderr) { | 718 | if (olderr) { |
| 716 | mlog(ML_ERROR, "File system error %d recorded in " | 719 | mlog(ML_ERROR, "File system error %d recorded in " |
| 717 | "journal %u.\n", olderr, slot); | 720 | "journal %u.\n", olderr, slot); |
| 718 | mlog(ML_ERROR, "File system on device %s needs checking.\n", | 721 | mlog(ML_ERROR, "File system on device %s needs checking.\n", |
| 719 | sb->s_id); | 722 | sb->s_id); |
| 720 | 723 | ||
| 721 | journal_ack_err(journal); | 724 | jbd2_journal_ack_err(journal); |
| 722 | journal_clear_err(journal); | 725 | jbd2_journal_clear_err(journal); |
| 723 | } | 726 | } |
| 724 | } | 727 | } |
| 725 | 728 | ||
| @@ -734,7 +737,7 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) | |||
| 734 | 737 | ||
| 735 | osb = journal->j_osb; | 738 | osb = journal->j_osb; |
| 736 | 739 | ||
| 737 | status = journal_load(journal->j_journal); | 740 | status = jbd2_journal_load(journal->j_journal); |
| 738 | if (status < 0) { | 741 | if (status < 0) { |
| 739 | mlog(ML_ERROR, "Failed to load journal!\n"); | 742 | mlog(ML_ERROR, "Failed to load journal!\n"); |
| 740 | goto done; | 743 | goto done; |
| @@ -778,7 +781,7 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) | |||
| 778 | 781 | ||
| 779 | BUG_ON(!journal); | 782 | BUG_ON(!journal); |
| 780 | 783 | ||
| 781 | status = journal_wipe(journal->j_journal, full); | 784 | status = jbd2_journal_wipe(journal->j_journal, full); |
| 782 | if (status < 0) { | 785 | if (status < 0) { |
| 783 | mlog_errno(status); | 786 | mlog_errno(status); |
| 784 | goto bail; | 787 | goto bail; |
| @@ -847,9 +850,8 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
| 847 | 850 | ||
| 848 | /* We are reading journal data which should not | 851 | /* We are reading journal data which should not |
| 849 | * be put in the uptodate cache */ | 852 | * be put in the uptodate cache */ |
| 850 | status = ocfs2_read_blocks(OCFS2_SB(inode->i_sb), | 853 | status = ocfs2_read_blocks_sync(OCFS2_SB(inode->i_sb), |
| 851 | p_blkno, p_blocks, bhs, 0, | 854 | p_blkno, p_blocks, bhs); |
| 852 | NULL); | ||
| 853 | if (status < 0) { | 855 | if (status < 0) { |
| 854 | mlog_errno(status); | 856 | mlog_errno(status); |
| 855 | goto bail; | 857 | goto bail; |
| @@ -865,8 +867,7 @@ static int ocfs2_force_read_journal(struct inode *inode) | |||
| 865 | 867 | ||
| 866 | bail: | 868 | bail: |
| 867 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) | 869 | for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) |
| 868 | if (bhs[i]) | 870 | brelse(bhs[i]); |
| 869 | brelse(bhs[i]); | ||
| 870 | mlog_exit(status); | 871 | mlog_exit(status); |
| 871 | return status; | 872 | return status; |
| 872 | } | 873 | } |
| @@ -1133,7 +1134,8 @@ static int ocfs2_read_journal_inode(struct ocfs2_super *osb, | |||
| 1133 | } | 1134 | } |
| 1134 | SET_INODE_JOURNAL(inode); | 1135 | SET_INODE_JOURNAL(inode); |
| 1135 | 1136 | ||
| 1136 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, bh, 0, inode); | 1137 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, bh, |
| 1138 | OCFS2_BH_IGNORE_CACHE); | ||
| 1137 | if (status < 0) { | 1139 | if (status < 0) { |
| 1138 | mlog_errno(status); | 1140 | mlog_errno(status); |
| 1139 | goto bail; | 1141 | goto bail; |
| @@ -1229,19 +1231,19 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1229 | } | 1231 | } |
| 1230 | 1232 | ||
| 1231 | mlog(0, "calling journal_init_inode\n"); | 1233 | mlog(0, "calling journal_init_inode\n"); |
| 1232 | journal = journal_init_inode(inode); | 1234 | journal = jbd2_journal_init_inode(inode); |
| 1233 | if (journal == NULL) { | 1235 | if (journal == NULL) { |
| 1234 | mlog(ML_ERROR, "Linux journal layer error\n"); | 1236 | mlog(ML_ERROR, "Linux journal layer error\n"); |
| 1235 | status = -EIO; | 1237 | status = -EIO; |
| 1236 | goto done; | 1238 | goto done; |
| 1237 | } | 1239 | } |
| 1238 | 1240 | ||
| 1239 | status = journal_load(journal); | 1241 | status = jbd2_journal_load(journal); |
| 1240 | if (status < 0) { | 1242 | if (status < 0) { |
| 1241 | mlog_errno(status); | 1243 | mlog_errno(status); |
| 1242 | if (!igrab(inode)) | 1244 | if (!igrab(inode)) |
| 1243 | BUG(); | 1245 | BUG(); |
| 1244 | journal_destroy(journal); | 1246 | jbd2_journal_destroy(journal); |
| 1245 | goto done; | 1247 | goto done; |
| 1246 | } | 1248 | } |
| 1247 | 1249 | ||
| @@ -1249,9 +1251,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1249 | 1251 | ||
| 1250 | /* wipe the journal */ | 1252 | /* wipe the journal */ |
| 1251 | mlog(0, "flushing the journal.\n"); | 1253 | mlog(0, "flushing the journal.\n"); |
| 1252 | journal_lock_updates(journal); | 1254 | jbd2_journal_lock_updates(journal); |
| 1253 | status = journal_flush(journal); | 1255 | status = jbd2_journal_flush(journal); |
| 1254 | journal_unlock_updates(journal); | 1256 | jbd2_journal_unlock_updates(journal); |
| 1255 | if (status < 0) | 1257 | if (status < 0) |
| 1256 | mlog_errno(status); | 1258 | mlog_errno(status); |
| 1257 | 1259 | ||
| @@ -1272,7 +1274,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, | |||
| 1272 | if (!igrab(inode)) | 1274 | if (!igrab(inode)) |
| 1273 | BUG(); | 1275 | BUG(); |
| 1274 | 1276 | ||
| 1275 | journal_destroy(journal); | 1277 | jbd2_journal_destroy(journal); |
| 1276 | 1278 | ||
| 1277 | done: | 1279 | done: |
| 1278 | /* drop the lock on this nodes journal */ | 1280 | /* drop the lock on this nodes journal */ |
| @@ -1282,8 +1284,7 @@ done: | |||
| 1282 | if (inode) | 1284 | if (inode) |
| 1283 | iput(inode); | 1285 | iput(inode); |
| 1284 | 1286 | ||
| 1285 | if (bh) | 1287 | brelse(bh); |
| 1286 | brelse(bh); | ||
| 1287 | 1288 | ||
| 1288 | mlog_exit(status); | 1289 | mlog_exit(status); |
| 1289 | return status; | 1290 | return status; |
diff --git a/fs/ocfs2/journal.h b/fs/ocfs2/journal.h index 2178ebffa05f..d4d14e9a3cea 100644 --- a/fs/ocfs2/journal.h +++ b/fs/ocfs2/journal.h | |||
| @@ -27,7 +27,12 @@ | |||
| 27 | #define OCFS2_JOURNAL_H | 27 | #define OCFS2_JOURNAL_H |
| 28 | 28 | ||
| 29 | #include <linux/fs.h> | 29 | #include <linux/fs.h> |
| 30 | #include <linux/jbd.h> | 30 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
| 31 | # include <linux/jbd2.h> | ||
| 32 | #else | ||
| 33 | # include <linux/jbd.h> | ||
| 34 | # include "ocfs2_jbd_compat.h" | ||
| 35 | #endif | ||
| 31 | 36 | ||
| 32 | enum ocfs2_journal_state { | 37 | enum ocfs2_journal_state { |
| 33 | OCFS2_JOURNAL_FREE = 0, | 38 | OCFS2_JOURNAL_FREE = 0, |
| @@ -215,8 +220,8 @@ static inline void ocfs2_checkpoint_inode(struct inode *inode) | |||
| 215 | * buffer. Will have to call ocfs2_journal_dirty once | 220 | * buffer. Will have to call ocfs2_journal_dirty once |
| 216 | * we've actually dirtied it. Type is one of . or . | 221 | * we've actually dirtied it. Type is one of . or . |
| 217 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. | 222 | * ocfs2_journal_dirty - Mark a journalled buffer as having dirty data. |
| 218 | * ocfs2_journal_dirty_data - Indicate that a data buffer should go out before | 223 | * ocfs2_jbd2_file_inode - Mark an inode so that its data goes out before |
| 219 | * the current handle commits. | 224 | * the current handle commits. |
| 220 | */ | 225 | */ |
| 221 | 226 | ||
| 222 | /* You must always start_trans with a number of buffs > 0, but it's | 227 | /* You must always start_trans with a number of buffs > 0, but it's |
| @@ -268,8 +273,10 @@ int ocfs2_journal_access(handle_t *handle, | |||
| 268 | */ | 273 | */ |
| 269 | int ocfs2_journal_dirty(handle_t *handle, | 274 | int ocfs2_journal_dirty(handle_t *handle, |
| 270 | struct buffer_head *bh); | 275 | struct buffer_head *bh); |
| 276 | #ifdef CONFIG_OCFS2_COMPAT_JBD | ||
| 271 | int ocfs2_journal_dirty_data(handle_t *handle, | 277 | int ocfs2_journal_dirty_data(handle_t *handle, |
| 272 | struct buffer_head *bh); | 278 | struct buffer_head *bh); |
| 279 | #endif | ||
| 273 | 280 | ||
| 274 | /* | 281 | /* |
| 275 | * Credit Macros: | 282 | * Credit Macros: |
| @@ -283,6 +290,9 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 283 | /* simple file updates like chmod, etc. */ | 290 | /* simple file updates like chmod, etc. */ |
| 284 | #define OCFS2_INODE_UPDATE_CREDITS 1 | 291 | #define OCFS2_INODE_UPDATE_CREDITS 1 |
| 285 | 292 | ||
| 293 | /* extended attribute block update */ | ||
| 294 | #define OCFS2_XATTR_BLOCK_UPDATE_CREDITS 1 | ||
| 295 | |||
| 286 | /* group extend. inode update and last group update. */ | 296 | /* group extend. inode update and last group update. */ |
| 287 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) | 297 | #define OCFS2_GROUP_EXTEND_CREDITS (OCFS2_INODE_UPDATE_CREDITS + 1) |
| 288 | 298 | ||
| @@ -340,11 +350,23 @@ int ocfs2_journal_dirty_data(handle_t *handle, | |||
| 340 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ | 350 | #define OCFS2_RENAME_CREDITS (3 * OCFS2_INODE_UPDATE_CREDITS + 3 \ |
| 341 | + OCFS2_UNLINK_CREDITS) | 351 | + OCFS2_UNLINK_CREDITS) |
| 342 | 352 | ||
| 353 | /* global bitmap dinode, group desc., relinked group, | ||
| 354 | * suballocator dinode, group desc., relinked group, | ||
| 355 | * dinode, xattr block */ | ||
| 356 | #define OCFS2_XATTR_BLOCK_CREATE_CREDITS (OCFS2_SUBALLOC_ALLOC * 2 + \ | ||
| 357 | + OCFS2_INODE_UPDATE_CREDITS \ | ||
| 358 | + OCFS2_XATTR_BLOCK_UPDATE_CREDITS) | ||
| 359 | |||
| 360 | /* | ||
| 361 | * Please note that the caller must make sure that root_el is the root | ||
| 362 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
| 363 | * the result may be wrong. | ||
| 364 | */ | ||
| 343 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, | 365 | static inline int ocfs2_calc_extend_credits(struct super_block *sb, |
| 344 | struct ocfs2_dinode *fe, | 366 | struct ocfs2_extent_list *root_el, |
| 345 | u32 bits_wanted) | 367 | u32 bits_wanted) |
| 346 | { | 368 | { |
| 347 | int bitmap_blocks, sysfile_bitmap_blocks, dinode_blocks; | 369 | int bitmap_blocks, sysfile_bitmap_blocks, extent_blocks; |
| 348 | 370 | ||
| 349 | /* bitmap dinode, group desc. + relinked group. */ | 371 | /* bitmap dinode, group desc. + relinked group. */ |
| 350 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; | 372 | bitmap_blocks = OCFS2_SUBALLOC_ALLOC; |
| @@ -355,16 +377,16 @@ static inline int ocfs2_calc_extend_credits(struct super_block *sb, | |||
| 355 | * however many metadata chunks needed * a remaining suballoc | 377 | * however many metadata chunks needed * a remaining suballoc |
| 356 | * alloc. */ | 378 | * alloc. */ |
| 357 | sysfile_bitmap_blocks = 1 + | 379 | sysfile_bitmap_blocks = 1 + |
| 358 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(fe); | 380 | (OCFS2_SUBALLOC_ALLOC - 1) * ocfs2_extend_meta_needed(root_el); |
| 359 | 381 | ||
| 360 | /* this does not include *new* metadata blocks, which are | 382 | /* this does not include *new* metadata blocks, which are |
| 361 | * accounted for in sysfile_bitmap_blocks. fe + | 383 | * accounted for in sysfile_bitmap_blocks. root_el + |
| 362 | * prev. last_eb_blk + blocks along edge of tree. | 384 | * prev. last_eb_blk + blocks along edge of tree. |
| 363 | * calc_symlink_credits passes because we just need 1 | 385 | * calc_symlink_credits passes because we just need 1 |
| 364 | * credit for the dinode there. */ | 386 | * credit for the dinode there. */ |
| 365 | dinode_blocks = 1 + 1 + le16_to_cpu(fe->id2.i_list.l_tree_depth); | 387 | extent_blocks = 1 + 1 + le16_to_cpu(root_el->l_tree_depth); |
| 366 | 388 | ||
| 367 | return bitmap_blocks + sysfile_bitmap_blocks + dinode_blocks; | 389 | return bitmap_blocks + sysfile_bitmap_blocks + extent_blocks; |
| 368 | } | 390 | } |
| 369 | 391 | ||
| 370 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) | 392 | static inline int ocfs2_calc_symlink_credits(struct super_block *sb) |
| @@ -415,4 +437,16 @@ static inline int ocfs2_calc_tree_trunc_credits(struct super_block *sb, | |||
| 415 | return credits; | 437 | return credits; |
| 416 | } | 438 | } |
| 417 | 439 | ||
| 440 | static inline int ocfs2_jbd2_file_inode(handle_t *handle, struct inode *inode) | ||
| 441 | { | ||
| 442 | return jbd2_journal_file_inode(handle, &OCFS2_I(inode)->ip_jinode); | ||
| 443 | } | ||
| 444 | |||
| 445 | static inline int ocfs2_begin_ordered_truncate(struct inode *inode, | ||
| 446 | loff_t new_size) | ||
| 447 | { | ||
| 448 | return jbd2_journal_begin_ordered_truncate(&OCFS2_I(inode)->ip_jinode, | ||
| 449 | new_size); | ||
| 450 | } | ||
| 451 | |||
| 418 | #endif /* OCFS2_JOURNAL_H */ | 452 | #endif /* OCFS2_JOURNAL_H */ |
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index 28e492e4ec88..687b28713c32 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include <linux/slab.h> | 28 | #include <linux/slab.h> |
| 29 | #include <linux/highmem.h> | 29 | #include <linux/highmem.h> |
| 30 | #include <linux/bitops.h> | 30 | #include <linux/bitops.h> |
| 31 | #include <linux/debugfs.h> | ||
| 31 | 32 | ||
| 32 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC | 33 | #define MLOG_MASK_PREFIX ML_DISK_ALLOC |
| 33 | #include <cluster/masklog.h> | 34 | #include <cluster/masklog.h> |
| @@ -47,8 +48,6 @@ | |||
| 47 | 48 | ||
| 48 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) | 49 | #define OCFS2_LOCAL_ALLOC(dinode) (&((dinode)->id2.i_lab)) |
| 49 | 50 | ||
| 50 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb); | ||
| 51 | |||
| 52 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); | 51 | static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc); |
| 53 | 52 | ||
| 54 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, | 53 | static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, |
| @@ -75,24 +74,129 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 75 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | 74 | static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, |
| 76 | struct inode *local_alloc_inode); | 75 | struct inode *local_alloc_inode); |
| 77 | 76 | ||
| 78 | static inline int ocfs2_local_alloc_window_bits(struct ocfs2_super *osb) | 77 | #ifdef CONFIG_OCFS2_FS_STATS |
| 78 | |||
| 79 | static int ocfs2_la_debug_open(struct inode *inode, struct file *file) | ||
| 80 | { | ||
| 81 | file->private_data = inode->i_private; | ||
| 82 | return 0; | ||
| 83 | } | ||
| 84 | |||
| 85 | #define LA_DEBUG_BUF_SZ PAGE_CACHE_SIZE | ||
| 86 | #define LA_DEBUG_VER 1 | ||
| 87 | static ssize_t ocfs2_la_debug_read(struct file *file, char __user *userbuf, | ||
| 88 | size_t count, loff_t *ppos) | ||
| 89 | { | ||
| 90 | static DEFINE_MUTEX(la_debug_mutex); | ||
| 91 | struct ocfs2_super *osb = file->private_data; | ||
| 92 | int written, ret; | ||
| 93 | char *buf = osb->local_alloc_debug_buf; | ||
| 94 | |||
| 95 | mutex_lock(&la_debug_mutex); | ||
| 96 | memset(buf, 0, LA_DEBUG_BUF_SZ); | ||
| 97 | |||
| 98 | written = snprintf(buf, LA_DEBUG_BUF_SZ, | ||
| 99 | "0x%x\t0x%llx\t%u\t%u\t0x%x\n", | ||
| 100 | LA_DEBUG_VER, | ||
| 101 | (unsigned long long)osb->la_last_gd, | ||
| 102 | osb->local_alloc_default_bits, | ||
| 103 | osb->local_alloc_bits, osb->local_alloc_state); | ||
| 104 | |||
| 105 | ret = simple_read_from_buffer(userbuf, count, ppos, buf, written); | ||
| 106 | |||
| 107 | mutex_unlock(&la_debug_mutex); | ||
| 108 | return ret; | ||
| 109 | } | ||
| 110 | |||
| 111 | static const struct file_operations ocfs2_la_debug_fops = { | ||
| 112 | .open = ocfs2_la_debug_open, | ||
| 113 | .read = ocfs2_la_debug_read, | ||
| 114 | }; | ||
| 115 | |||
| 116 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
| 117 | { | ||
| 118 | osb->local_alloc_debug_buf = kmalloc(LA_DEBUG_BUF_SZ, GFP_NOFS); | ||
| 119 | if (!osb->local_alloc_debug_buf) | ||
| 120 | return; | ||
| 121 | |||
| 122 | osb->local_alloc_debug = debugfs_create_file("local_alloc_stats", | ||
| 123 | S_IFREG|S_IRUSR, | ||
| 124 | osb->osb_debug_root, | ||
| 125 | osb, | ||
| 126 | &ocfs2_la_debug_fops); | ||
| 127 | if (!osb->local_alloc_debug) { | ||
| 128 | kfree(osb->local_alloc_debug_buf); | ||
| 129 | osb->local_alloc_debug_buf = NULL; | ||
| 130 | } | ||
| 131 | } | ||
| 132 | |||
| 133 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
| 134 | { | ||
| 135 | if (osb->local_alloc_debug) | ||
| 136 | debugfs_remove(osb->local_alloc_debug); | ||
| 137 | |||
| 138 | if (osb->local_alloc_debug_buf) | ||
| 139 | kfree(osb->local_alloc_debug_buf); | ||
| 140 | |||
| 141 | osb->local_alloc_debug_buf = NULL; | ||
| 142 | osb->local_alloc_debug = NULL; | ||
| 143 | } | ||
| 144 | #else /* CONFIG_OCFS2_FS_STATS */ | ||
| 145 | static void ocfs2_init_la_debug(struct ocfs2_super *osb) | ||
| 146 | { | ||
| 147 | return; | ||
| 148 | } | ||
| 149 | static void ocfs2_shutdown_la_debug(struct ocfs2_super *osb) | ||
| 150 | { | ||
| 151 | return; | ||
| 152 | } | ||
| 153 | #endif | ||
| 154 | |||
| 155 | static inline int ocfs2_la_state_enabled(struct ocfs2_super *osb) | ||
| 79 | { | 156 | { |
| 80 | BUG_ON(osb->s_clustersize_bits > 20); | 157 | return (osb->local_alloc_state == OCFS2_LA_THROTTLED || |
| 158 | osb->local_alloc_state == OCFS2_LA_ENABLED); | ||
| 159 | } | ||
| 81 | 160 | ||
| 82 | /* Size local alloc windows by the megabyte */ | 161 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, |
| 83 | return osb->local_alloc_size << (20 - osb->s_clustersize_bits); | 162 | unsigned int num_clusters) |
| 163 | { | ||
| 164 | spin_lock(&osb->osb_lock); | ||
| 165 | if (osb->local_alloc_state == OCFS2_LA_DISABLED || | ||
| 166 | osb->local_alloc_state == OCFS2_LA_THROTTLED) | ||
| 167 | if (num_clusters >= osb->local_alloc_default_bits) { | ||
| 168 | cancel_delayed_work(&osb->la_enable_wq); | ||
| 169 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
| 170 | } | ||
| 171 | spin_unlock(&osb->osb_lock); | ||
| 172 | } | ||
| 173 | |||
| 174 | void ocfs2_la_enable_worker(struct work_struct *work) | ||
| 175 | { | ||
| 176 | struct ocfs2_super *osb = | ||
| 177 | container_of(work, struct ocfs2_super, | ||
| 178 | la_enable_wq.work); | ||
| 179 | spin_lock(&osb->osb_lock); | ||
| 180 | osb->local_alloc_state = OCFS2_LA_ENABLED; | ||
| 181 | spin_unlock(&osb->osb_lock); | ||
| 84 | } | 182 | } |
| 85 | 183 | ||
| 86 | /* | 184 | /* |
| 87 | * Tell us whether a given allocation should use the local alloc | 185 | * Tell us whether a given allocation should use the local alloc |
| 88 | * file. Otherwise, it has to go to the main bitmap. | 186 | * file. Otherwise, it has to go to the main bitmap. |
| 187 | * | ||
| 188 | * This function does semi-dirty reads of local alloc size and state! | ||
| 189 | * This is ok however, as the values are re-checked once under mutex. | ||
| 89 | */ | 190 | */ |
| 90 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | 191 | int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) |
| 91 | { | 192 | { |
| 92 | int la_bits = ocfs2_local_alloc_window_bits(osb); | ||
| 93 | int ret = 0; | 193 | int ret = 0; |
| 194 | int la_bits; | ||
| 195 | |||
| 196 | spin_lock(&osb->osb_lock); | ||
| 197 | la_bits = osb->local_alloc_bits; | ||
| 94 | 198 | ||
| 95 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) | 199 | if (!ocfs2_la_state_enabled(osb)) |
| 96 | goto bail; | 200 | goto bail; |
| 97 | 201 | ||
| 98 | /* la_bits should be at least twice the size (in clusters) of | 202 | /* la_bits should be at least twice the size (in clusters) of |
| @@ -106,6 +210,7 @@ int ocfs2_alloc_should_use_local(struct ocfs2_super *osb, u64 bits) | |||
| 106 | bail: | 210 | bail: |
| 107 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", | 211 | mlog(0, "state=%d, bits=%llu, la_bits=%d, ret=%d\n", |
| 108 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); | 212 | osb->local_alloc_state, (unsigned long long)bits, la_bits, ret); |
| 213 | spin_unlock(&osb->osb_lock); | ||
| 109 | return ret; | 214 | return ret; |
| 110 | } | 215 | } |
| 111 | 216 | ||
| @@ -120,14 +225,18 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 120 | 225 | ||
| 121 | mlog_entry_void(); | 226 | mlog_entry_void(); |
| 122 | 227 | ||
| 123 | if (osb->local_alloc_size == 0) | 228 | ocfs2_init_la_debug(osb); |
| 229 | |||
| 230 | if (osb->local_alloc_bits == 0) | ||
| 124 | goto bail; | 231 | goto bail; |
| 125 | 232 | ||
| 126 | if (ocfs2_local_alloc_window_bits(osb) >= osb->bitmap_cpg) { | 233 | if (osb->local_alloc_bits >= osb->bitmap_cpg) { |
| 127 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " | 234 | mlog(ML_NOTICE, "Requested local alloc window %d is larger " |
| 128 | "than max possible %u. Using defaults.\n", | 235 | "than max possible %u. Using defaults.\n", |
| 129 | ocfs2_local_alloc_window_bits(osb), (osb->bitmap_cpg - 1)); | 236 | osb->local_alloc_bits, (osb->bitmap_cpg - 1)); |
| 130 | osb->local_alloc_size = OCFS2_DEFAULT_LOCAL_ALLOC_SIZE; | 237 | osb->local_alloc_bits = |
| 238 | ocfs2_megabytes_to_clusters(osb->sb, | ||
| 239 | OCFS2_DEFAULT_LOCAL_ALLOC_SIZE); | ||
| 131 | } | 240 | } |
| 132 | 241 | ||
| 133 | /* read the alloc off disk */ | 242 | /* read the alloc off disk */ |
| @@ -139,8 +248,8 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 139 | goto bail; | 248 | goto bail; |
| 140 | } | 249 | } |
| 141 | 250 | ||
| 142 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 251 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
| 143 | &alloc_bh, 0, inode); | 252 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
| 144 | if (status < 0) { | 253 | if (status < 0) { |
| 145 | mlog_errno(status); | 254 | mlog_errno(status); |
| 146 | goto bail; | 255 | goto bail; |
| @@ -185,13 +294,14 @@ int ocfs2_load_local_alloc(struct ocfs2_super *osb) | |||
| 185 | 294 | ||
| 186 | bail: | 295 | bail: |
| 187 | if (status < 0) | 296 | if (status < 0) |
| 188 | if (alloc_bh) | 297 | brelse(alloc_bh); |
| 189 | brelse(alloc_bh); | ||
| 190 | if (inode) | 298 | if (inode) |
| 191 | iput(inode); | 299 | iput(inode); |
| 192 | 300 | ||
| 193 | mlog(0, "Local alloc window bits = %d\n", | 301 | if (status < 0) |
| 194 | ocfs2_local_alloc_window_bits(osb)); | 302 | ocfs2_shutdown_la_debug(osb); |
| 303 | |||
| 304 | mlog(0, "Local alloc window bits = %d\n", osb->local_alloc_bits); | ||
| 195 | 305 | ||
| 196 | mlog_exit(status); | 306 | mlog_exit(status); |
| 197 | return status; | 307 | return status; |
| @@ -217,6 +327,11 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb) | |||
| 217 | 327 | ||
| 218 | mlog_entry_void(); | 328 | mlog_entry_void(); |
| 219 | 329 | ||
| 330 | cancel_delayed_work(&osb->la_enable_wq); | ||
| 331 | flush_workqueue(ocfs2_wq); | ||
| 332 | |||
| 333 | ocfs2_shutdown_la_debug(osb); | ||
| 334 | |||
| 220 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) | 335 | if (osb->local_alloc_state == OCFS2_LA_UNUSED) |
| 221 | goto out; | 336 | goto out; |
| 222 | 337 | ||
| @@ -295,8 +410,7 @@ out_commit: | |||
| 295 | ocfs2_commit_trans(osb, handle); | 410 | ocfs2_commit_trans(osb, handle); |
| 296 | 411 | ||
| 297 | out_unlock: | 412 | out_unlock: |
| 298 | if (main_bm_bh) | 413 | brelse(main_bm_bh); |
| 299 | brelse(main_bm_bh); | ||
| 300 | 414 | ||
| 301 | ocfs2_inode_unlock(main_bm_inode, 1); | 415 | ocfs2_inode_unlock(main_bm_inode, 1); |
| 302 | 416 | ||
| @@ -345,8 +459,8 @@ int ocfs2_begin_local_alloc_recovery(struct ocfs2_super *osb, | |||
| 345 | 459 | ||
| 346 | mutex_lock(&inode->i_mutex); | 460 | mutex_lock(&inode->i_mutex); |
| 347 | 461 | ||
| 348 | status = ocfs2_read_block(osb, OCFS2_I(inode)->ip_blkno, | 462 | status = ocfs2_read_blocks(inode, OCFS2_I(inode)->ip_blkno, 1, |
| 349 | &alloc_bh, 0, inode); | 463 | &alloc_bh, OCFS2_BH_IGNORE_CACHE); |
| 350 | if (status < 0) { | 464 | if (status < 0) { |
| 351 | mlog_errno(status); | 465 | mlog_errno(status); |
| 352 | goto bail; | 466 | goto bail; |
| @@ -372,8 +486,7 @@ bail: | |||
| 372 | *alloc_copy = NULL; | 486 | *alloc_copy = NULL; |
| 373 | } | 487 | } |
| 374 | 488 | ||
| 375 | if (alloc_bh) | 489 | brelse(alloc_bh); |
| 376 | brelse(alloc_bh); | ||
| 377 | 490 | ||
| 378 | if (inode) { | 491 | if (inode) { |
| 379 | mutex_unlock(&inode->i_mutex); | 492 | mutex_unlock(&inode->i_mutex); |
| @@ -441,8 +554,7 @@ out_unlock: | |||
| 441 | out_mutex: | 554 | out_mutex: |
| 442 | mutex_unlock(&main_bm_inode->i_mutex); | 555 | mutex_unlock(&main_bm_inode->i_mutex); |
| 443 | 556 | ||
| 444 | if (main_bm_bh) | 557 | brelse(main_bm_bh); |
| 445 | brelse(main_bm_bh); | ||
| 446 | 558 | ||
| 447 | iput(main_bm_inode); | 559 | iput(main_bm_inode); |
| 448 | 560 | ||
| @@ -453,8 +565,48 @@ out: | |||
| 453 | return status; | 565 | return status; |
| 454 | } | 566 | } |
| 455 | 567 | ||
| 568 | /* Check to see if the local alloc window is within ac->ac_max_block */ | ||
| 569 | static int ocfs2_local_alloc_in_range(struct inode *inode, | ||
| 570 | struct ocfs2_alloc_context *ac, | ||
| 571 | u32 bits_wanted) | ||
| 572 | { | ||
| 573 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 574 | struct ocfs2_dinode *alloc; | ||
| 575 | struct ocfs2_local_alloc *la; | ||
| 576 | int start; | ||
| 577 | u64 block_off; | ||
| 578 | |||
| 579 | if (!ac->ac_max_block) | ||
| 580 | return 1; | ||
| 581 | |||
| 582 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | ||
| 583 | la = OCFS2_LOCAL_ALLOC(alloc); | ||
| 584 | |||
| 585 | start = ocfs2_local_alloc_find_clear_bits(osb, alloc, bits_wanted); | ||
| 586 | if (start == -1) { | ||
| 587 | mlog_errno(-ENOSPC); | ||
| 588 | return 0; | ||
| 589 | } | ||
| 590 | |||
| 591 | /* | ||
| 592 | * Converting (bm_off + start + bits_wanted) to blocks gives us | ||
| 593 | * the blkno just past our actual allocation. This is perfect | ||
| 594 | * to compare with ac_max_block. | ||
| 595 | */ | ||
| 596 | block_off = ocfs2_clusters_to_blocks(inode->i_sb, | ||
| 597 | le32_to_cpu(la->la_bm_off) + | ||
| 598 | start + bits_wanted); | ||
| 599 | mlog(0, "Checking %llu against %llu\n", | ||
| 600 | (unsigned long long)block_off, | ||
| 601 | (unsigned long long)ac->ac_max_block); | ||
| 602 | if (block_off > ac->ac_max_block) | ||
| 603 | return 0; | ||
| 604 | |||
| 605 | return 1; | ||
| 606 | } | ||
| 607 | |||
| 456 | /* | 608 | /* |
| 457 | * make sure we've got at least bitswanted contiguous bits in the | 609 | * make sure we've got at least bits_wanted contiguous bits in the |
| 458 | * local alloc. You lose them when you drop i_mutex. | 610 | * local alloc. You lose them when you drop i_mutex. |
| 459 | * | 611 | * |
| 460 | * We will add ourselves to the transaction passed in, but may start | 612 | * We will add ourselves to the transaction passed in, but may start |
| @@ -485,16 +637,18 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 485 | 637 | ||
| 486 | mutex_lock(&local_alloc_inode->i_mutex); | 638 | mutex_lock(&local_alloc_inode->i_mutex); |
| 487 | 639 | ||
| 488 | if (osb->local_alloc_state != OCFS2_LA_ENABLED) { | 640 | /* |
| 489 | status = -ENOSPC; | 641 | * We must double check state and allocator bits because |
| 490 | goto bail; | 642 | * another process may have changed them while holding i_mutex. |
| 491 | } | 643 | */ |
| 492 | 644 | spin_lock(&osb->osb_lock); | |
| 493 | if (bits_wanted > ocfs2_local_alloc_window_bits(osb)) { | 645 | if (!ocfs2_la_state_enabled(osb) || |
| 494 | mlog(0, "Asking for more than my max window size!\n"); | 646 | (bits_wanted > osb->local_alloc_bits)) { |
| 647 | spin_unlock(&osb->osb_lock); | ||
| 495 | status = -ENOSPC; | 648 | status = -ENOSPC; |
| 496 | goto bail; | 649 | goto bail; |
| 497 | } | 650 | } |
| 651 | spin_unlock(&osb->osb_lock); | ||
| 498 | 652 | ||
| 499 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; | 653 | alloc = (struct ocfs2_dinode *) osb->local_alloc_bh->b_data; |
| 500 | 654 | ||
| @@ -522,6 +676,36 @@ int ocfs2_reserve_local_alloc_bits(struct ocfs2_super *osb, | |||
| 522 | mlog_errno(status); | 676 | mlog_errno(status); |
| 523 | goto bail; | 677 | goto bail; |
| 524 | } | 678 | } |
| 679 | |||
| 680 | /* | ||
| 681 | * Under certain conditions, the window slide code | ||
| 682 | * might have reduced the number of bits available or | ||
| 683 | * disabled the the local alloc entirely. Re-check | ||
| 684 | * here and return -ENOSPC if necessary. | ||
| 685 | */ | ||
| 686 | status = -ENOSPC; | ||
| 687 | if (!ocfs2_la_state_enabled(osb)) | ||
| 688 | goto bail; | ||
| 689 | |||
| 690 | free_bits = le32_to_cpu(alloc->id1.bitmap1.i_total) - | ||
| 691 | le32_to_cpu(alloc->id1.bitmap1.i_used); | ||
| 692 | if (bits_wanted > free_bits) | ||
| 693 | goto bail; | ||
| 694 | } | ||
| 695 | |||
| 696 | if (ac->ac_max_block) | ||
| 697 | mlog(0, "Calling in_range for max block %llu\n", | ||
| 698 | (unsigned long long)ac->ac_max_block); | ||
| 699 | |||
| 700 | if (!ocfs2_local_alloc_in_range(local_alloc_inode, ac, | ||
| 701 | bits_wanted)) { | ||
| 702 | /* | ||
| 703 | * The window is outside ac->ac_max_block. | ||
| 704 | * This errno tells the caller to keep localalloc enabled | ||
| 705 | * but to get the allocation from the main bitmap. | ||
| 706 | */ | ||
| 707 | status = -EFBIG; | ||
| 708 | goto bail; | ||
| 525 | } | 709 | } |
| 526 | 710 | ||
| 527 | ac->ac_inode = local_alloc_inode; | 711 | ac->ac_inode = local_alloc_inode; |
| @@ -789,6 +973,85 @@ bail: | |||
| 789 | return status; | 973 | return status; |
| 790 | } | 974 | } |
| 791 | 975 | ||
| 976 | enum ocfs2_la_event { | ||
| 977 | OCFS2_LA_EVENT_SLIDE, /* Normal window slide. */ | ||
| 978 | OCFS2_LA_EVENT_FRAGMENTED, /* The global bitmap has | ||
| 979 | * enough bits theoretically | ||
| 980 | * free, but a contiguous | ||
| 981 | * allocation could not be | ||
| 982 | * found. */ | ||
| 983 | OCFS2_LA_EVENT_ENOSPC, /* Global bitmap doesn't have | ||
| 984 | * enough bits free to satisfy | ||
| 985 | * our request. */ | ||
| 986 | }; | ||
| 987 | #define OCFS2_LA_ENABLE_INTERVAL (30 * HZ) | ||
| 988 | /* | ||
| 989 | * Given an event, calculate the size of our next local alloc window. | ||
| 990 | * | ||
| 991 | * This should always be called under i_mutex of the local alloc inode | ||
| 992 | * so that local alloc disabling doesn't race with processes trying to | ||
| 993 | * use the allocator. | ||
| 994 | * | ||
| 995 | * Returns the state which the local alloc was left in. This value can | ||
| 996 | * be ignored by some paths. | ||
| 997 | */ | ||
| 998 | static int ocfs2_recalc_la_window(struct ocfs2_super *osb, | ||
| 999 | enum ocfs2_la_event event) | ||
| 1000 | { | ||
| 1001 | unsigned int bits; | ||
| 1002 | int state; | ||
| 1003 | |||
| 1004 | spin_lock(&osb->osb_lock); | ||
| 1005 | if (osb->local_alloc_state == OCFS2_LA_DISABLED) { | ||
| 1006 | WARN_ON_ONCE(osb->local_alloc_state == OCFS2_LA_DISABLED); | ||
| 1007 | goto out_unlock; | ||
| 1008 | } | ||
| 1009 | |||
| 1010 | /* | ||
| 1011 | * ENOSPC and fragmentation are treated similarly for now. | ||
| 1012 | */ | ||
| 1013 | if (event == OCFS2_LA_EVENT_ENOSPC || | ||
| 1014 | event == OCFS2_LA_EVENT_FRAGMENTED) { | ||
| 1015 | /* | ||
| 1016 | * We ran out of contiguous space in the primary | ||
| 1017 | * bitmap. Drastically reduce the number of bits used | ||
| 1018 | * by local alloc until we have to disable it. | ||
| 1019 | */ | ||
| 1020 | bits = osb->local_alloc_bits >> 1; | ||
| 1021 | if (bits > ocfs2_megabytes_to_clusters(osb->sb, 1)) { | ||
| 1022 | /* | ||
| 1023 | * By setting state to THROTTLED, we'll keep | ||
| 1024 | * the number of local alloc bits used down | ||
| 1025 | * until an event occurs which would give us | ||
| 1026 | * reason to assume the bitmap situation might | ||
| 1027 | * have changed. | ||
| 1028 | */ | ||
| 1029 | osb->local_alloc_state = OCFS2_LA_THROTTLED; | ||
| 1030 | osb->local_alloc_bits = bits; | ||
| 1031 | } else { | ||
| 1032 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
| 1033 | } | ||
| 1034 | queue_delayed_work(ocfs2_wq, &osb->la_enable_wq, | ||
| 1035 | OCFS2_LA_ENABLE_INTERVAL); | ||
| 1036 | goto out_unlock; | ||
| 1037 | } | ||
| 1038 | |||
| 1039 | /* | ||
| 1040 | * Don't increase the size of the local alloc window until we | ||
| 1041 | * know we might be able to fulfill the request. Otherwise, we | ||
| 1042 | * risk bouncing around the global bitmap during periods of | ||
| 1043 | * low space. | ||
| 1044 | */ | ||
| 1045 | if (osb->local_alloc_state != OCFS2_LA_THROTTLED) | ||
| 1046 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
| 1047 | |||
| 1048 | out_unlock: | ||
| 1049 | state = osb->local_alloc_state; | ||
| 1050 | spin_unlock(&osb->osb_lock); | ||
| 1051 | |||
| 1052 | return state; | ||
| 1053 | } | ||
| 1054 | |||
| 792 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | 1055 | static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, |
| 793 | struct ocfs2_alloc_context **ac, | 1056 | struct ocfs2_alloc_context **ac, |
| 794 | struct inode **bitmap_inode, | 1057 | struct inode **bitmap_inode, |
| @@ -803,12 +1066,21 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb, | |||
| 803 | goto bail; | 1066 | goto bail; |
| 804 | } | 1067 | } |
| 805 | 1068 | ||
| 806 | (*ac)->ac_bits_wanted = ocfs2_local_alloc_window_bits(osb); | 1069 | retry_enospc: |
| 1070 | (*ac)->ac_bits_wanted = osb->local_alloc_bits; | ||
| 807 | 1071 | ||
| 808 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); | 1072 | status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac); |
| 1073 | if (status == -ENOSPC) { | ||
| 1074 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) == | ||
| 1075 | OCFS2_LA_DISABLED) | ||
| 1076 | goto bail; | ||
| 1077 | |||
| 1078 | ocfs2_free_ac_resource(*ac); | ||
| 1079 | memset(*ac, 0, sizeof(struct ocfs2_alloc_context)); | ||
| 1080 | goto retry_enospc; | ||
| 1081 | } | ||
| 809 | if (status < 0) { | 1082 | if (status < 0) { |
| 810 | if (status != -ENOSPC) | 1083 | mlog_errno(status); |
| 811 | mlog_errno(status); | ||
| 812 | goto bail; | 1084 | goto bail; |
| 813 | } | 1085 | } |
| 814 | 1086 | ||
| @@ -849,7 +1121,7 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 849 | "one\n"); | 1121 | "one\n"); |
| 850 | 1122 | ||
| 851 | mlog(0, "Allocating %u clusters for a new window.\n", | 1123 | mlog(0, "Allocating %u clusters for a new window.\n", |
| 852 | ocfs2_local_alloc_window_bits(osb)); | 1124 | osb->local_alloc_bits); |
| 853 | 1125 | ||
| 854 | /* Instruct the allocation code to try the most recently used | 1126 | /* Instruct the allocation code to try the most recently used |
| 855 | * cluster group. We'll re-record the group used this pass | 1127 | * cluster group. We'll re-record the group used this pass |
| @@ -859,9 +1131,36 @@ static int ocfs2_local_alloc_new_window(struct ocfs2_super *osb, | |||
| 859 | /* we used the generic suballoc reserve function, but we set | 1131 | /* we used the generic suballoc reserve function, but we set |
| 860 | * everything up nicely, so there's no reason why we can't use | 1132 | * everything up nicely, so there's no reason why we can't use |
| 861 | * the more specific cluster api to claim bits. */ | 1133 | * the more specific cluster api to claim bits. */ |
| 862 | status = ocfs2_claim_clusters(osb, handle, ac, | 1134 | status = ocfs2_claim_clusters(osb, handle, ac, osb->local_alloc_bits, |
| 863 | ocfs2_local_alloc_window_bits(osb), | ||
| 864 | &cluster_off, &cluster_count); | 1135 | &cluster_off, &cluster_count); |
| 1136 | if (status == -ENOSPC) { | ||
| 1137 | retry_enospc: | ||
| 1138 | /* | ||
| 1139 | * Note: We could also try syncing the journal here to | ||
| 1140 | * allow use of any free bits which the current | ||
| 1141 | * transaction can't give us access to. --Mark | ||
| 1142 | */ | ||
| 1143 | if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_FRAGMENTED) == | ||
| 1144 | OCFS2_LA_DISABLED) | ||
| 1145 | goto bail; | ||
| 1146 | |||
| 1147 | status = ocfs2_claim_clusters(osb, handle, ac, | ||
| 1148 | osb->local_alloc_bits, | ||
| 1149 | &cluster_off, | ||
| 1150 | &cluster_count); | ||
| 1151 | if (status == -ENOSPC) | ||
| 1152 | goto retry_enospc; | ||
| 1153 | /* | ||
| 1154 | * We only shrunk the *minimum* number of in our | ||
| 1155 | * request - it's entirely possible that the allocator | ||
| 1156 | * might give us more than we asked for. | ||
| 1157 | */ | ||
| 1158 | if (status == 0) { | ||
| 1159 | spin_lock(&osb->osb_lock); | ||
| 1160 | osb->local_alloc_bits = cluster_count; | ||
| 1161 | spin_unlock(&osb->osb_lock); | ||
| 1162 | } | ||
| 1163 | } | ||
| 865 | if (status < 0) { | 1164 | if (status < 0) { |
| 866 | if (status != -ENOSPC) | 1165 | if (status != -ENOSPC) |
| 867 | mlog_errno(status); | 1166 | mlog_errno(status); |
| @@ -905,6 +1204,8 @@ static int ocfs2_local_alloc_slide_window(struct ocfs2_super *osb, | |||
| 905 | 1204 | ||
| 906 | mlog_entry_void(); | 1205 | mlog_entry_void(); |
| 907 | 1206 | ||
| 1207 | ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_SLIDE); | ||
| 1208 | |||
| 908 | /* This will lock the main bitmap for us. */ | 1209 | /* This will lock the main bitmap for us. */ |
| 909 | status = ocfs2_local_alloc_reserve_for_window(osb, | 1210 | status = ocfs2_local_alloc_reserve_for_window(osb, |
| 910 | &ac, | 1211 | &ac, |
| @@ -976,8 +1277,7 @@ bail: | |||
| 976 | if (handle) | 1277 | if (handle) |
| 977 | ocfs2_commit_trans(osb, handle); | 1278 | ocfs2_commit_trans(osb, handle); |
| 978 | 1279 | ||
| 979 | if (main_bm_bh) | 1280 | brelse(main_bm_bh); |
| 980 | brelse(main_bm_bh); | ||
| 981 | 1281 | ||
| 982 | if (main_bm_inode) | 1282 | if (main_bm_inode) |
| 983 | iput(main_bm_inode); | 1283 | iput(main_bm_inode); |
diff --git a/fs/ocfs2/localalloc.h b/fs/ocfs2/localalloc.h index 3f76631e110c..ac5ea9f86653 100644 --- a/fs/ocfs2/localalloc.h +++ b/fs/ocfs2/localalloc.h | |||
| @@ -52,4 +52,8 @@ int ocfs2_claim_local_alloc_bits(struct ocfs2_super *osb, | |||
| 52 | u32 *bit_off, | 52 | u32 *bit_off, |
| 53 | u32 *num_bits); | 53 | u32 *num_bits); |
| 54 | 54 | ||
| 55 | void ocfs2_local_alloc_seen_free_bits(struct ocfs2_super *osb, | ||
| 56 | unsigned int num_clusters); | ||
| 57 | void ocfs2_la_enable_worker(struct work_struct *work); | ||
| 58 | |||
| 55 | #endif /* OCFS2_LOCALALLOC_H */ | 59 | #endif /* OCFS2_LOCALALLOC_H */ |
diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 203f87143877..544ac6245175 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c | |||
| @@ -24,6 +24,7 @@ | |||
| 24 | */ | 24 | */ |
| 25 | 25 | ||
| 26 | #include <linux/fs.h> | 26 | #include <linux/fs.h> |
| 27 | #include <linux/fcntl.h> | ||
| 27 | 28 | ||
| 28 | #define MLOG_MASK_PREFIX ML_INODE | 29 | #define MLOG_MASK_PREFIX ML_INODE |
| 29 | #include <cluster/masklog.h> | 30 | #include <cluster/masklog.h> |
| @@ -32,6 +33,7 @@ | |||
| 32 | 33 | ||
| 33 | #include "dlmglue.h" | 34 | #include "dlmglue.h" |
| 34 | #include "file.h" | 35 | #include "file.h" |
| 36 | #include "inode.h" | ||
| 35 | #include "locks.h" | 37 | #include "locks.h" |
| 36 | 38 | ||
| 37 | static int ocfs2_do_flock(struct file *file, struct inode *inode, | 39 | static int ocfs2_do_flock(struct file *file, struct inode *inode, |
| @@ -123,3 +125,16 @@ int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl) | |||
| 123 | else | 125 | else |
| 124 | return ocfs2_do_flock(file, inode, cmd, fl); | 126 | return ocfs2_do_flock(file, inode, cmd, fl); |
| 125 | } | 127 | } |
| 128 | |||
| 129 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl) | ||
| 130 | { | ||
| 131 | struct inode *inode = file->f_mapping->host; | ||
| 132 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 133 | |||
| 134 | if (!(fl->fl_flags & FL_POSIX)) | ||
| 135 | return -ENOLCK; | ||
| 136 | if (__mandatory_lock(inode)) | ||
| 137 | return -ENOLCK; | ||
| 138 | |||
| 139 | return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl); | ||
| 140 | } | ||
diff --git a/fs/ocfs2/locks.h b/fs/ocfs2/locks.h index 9743ef2324ec..496d488b271f 100644 --- a/fs/ocfs2/locks.h +++ b/fs/ocfs2/locks.h | |||
| @@ -27,5 +27,6 @@ | |||
| 27 | #define OCFS2_LOCKS_H | 27 | #define OCFS2_LOCKS_H |
| 28 | 28 | ||
| 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); | 29 | int ocfs2_flock(struct file *file, int cmd, struct file_lock *fl); |
| 30 | int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl); | ||
| 30 | 31 | ||
| 31 | #endif /* OCFS2_LOCKS_H */ | 32 | #endif /* OCFS2_LOCKS_H */ |
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index d5d808fe0140..485a6aa0ad39 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c | |||
| @@ -60,6 +60,7 @@ | |||
| 60 | #include "symlink.h" | 60 | #include "symlink.h" |
| 61 | #include "sysfile.h" | 61 | #include "sysfile.h" |
| 62 | #include "uptodate.h" | 62 | #include "uptodate.h" |
| 63 | #include "xattr.h" | ||
| 63 | 64 | ||
| 64 | #include "buffer_head_io.h" | 65 | #include "buffer_head_io.h" |
| 65 | 66 | ||
| @@ -327,14 +328,9 @@ leave: | |||
| 327 | if (status == -ENOSPC) | 328 | if (status == -ENOSPC) |
| 328 | mlog(0, "Disk is full\n"); | 329 | mlog(0, "Disk is full\n"); |
| 329 | 330 | ||
| 330 | if (new_fe_bh) | 331 | brelse(new_fe_bh); |
| 331 | brelse(new_fe_bh); | 332 | brelse(de_bh); |
| 332 | 333 | brelse(parent_fe_bh); | |
| 333 | if (de_bh) | ||
| 334 | brelse(de_bh); | ||
| 335 | |||
| 336 | if (parent_fe_bh) | ||
| 337 | brelse(parent_fe_bh); | ||
| 338 | 334 | ||
| 339 | if ((status < 0) && inode) | 335 | if ((status < 0) && inode) |
| 340 | iput(inode); | 336 | iput(inode); |
| @@ -647,12 +643,9 @@ out_unlock_inode: | |||
| 647 | out: | 643 | out: |
| 648 | ocfs2_inode_unlock(dir, 1); | 644 | ocfs2_inode_unlock(dir, 1); |
| 649 | 645 | ||
| 650 | if (de_bh) | 646 | brelse(de_bh); |
| 651 | brelse(de_bh); | 647 | brelse(fe_bh); |
| 652 | if (fe_bh) | 648 | brelse(parent_fe_bh); |
| 653 | brelse(fe_bh); | ||
| 654 | if (parent_fe_bh) | ||
| 655 | brelse(parent_fe_bh); | ||
| 656 | 649 | ||
| 657 | mlog_exit(err); | 650 | mlog_exit(err); |
| 658 | 651 | ||
| @@ -851,17 +844,10 @@ leave: | |||
| 851 | iput(orphan_dir); | 844 | iput(orphan_dir); |
| 852 | } | 845 | } |
| 853 | 846 | ||
| 854 | if (fe_bh) | 847 | brelse(fe_bh); |
| 855 | brelse(fe_bh); | 848 | brelse(dirent_bh); |
| 856 | 849 | brelse(parent_node_bh); | |
| 857 | if (dirent_bh) | 850 | brelse(orphan_entry_bh); |
| 858 | brelse(dirent_bh); | ||
| 859 | |||
| 860 | if (parent_node_bh) | ||
| 861 | brelse(parent_node_bh); | ||
| 862 | |||
| 863 | if (orphan_entry_bh) | ||
| 864 | brelse(orphan_entry_bh); | ||
| 865 | 851 | ||
| 866 | mlog_exit(status); | 852 | mlog_exit(status); |
| 867 | 853 | ||
| @@ -1372,24 +1358,15 @@ bail: | |||
| 1372 | 1358 | ||
| 1373 | if (new_inode) | 1359 | if (new_inode) |
| 1374 | iput(new_inode); | 1360 | iput(new_inode); |
| 1375 | if (newfe_bh) | 1361 | brelse(newfe_bh); |
| 1376 | brelse(newfe_bh); | 1362 | brelse(old_inode_bh); |
| 1377 | if (old_inode_bh) | 1363 | brelse(old_dir_bh); |
| 1378 | brelse(old_inode_bh); | 1364 | brelse(new_dir_bh); |
| 1379 | if (old_dir_bh) | 1365 | brelse(new_de_bh); |
| 1380 | brelse(old_dir_bh); | 1366 | brelse(old_de_bh); |
| 1381 | if (new_dir_bh) | 1367 | brelse(old_inode_de_bh); |
| 1382 | brelse(new_dir_bh); | 1368 | brelse(orphan_entry_bh); |
| 1383 | if (new_de_bh) | 1369 | brelse(insert_entry_bh); |
| 1384 | brelse(new_de_bh); | ||
| 1385 | if (old_de_bh) | ||
| 1386 | brelse(old_de_bh); | ||
| 1387 | if (old_inode_de_bh) | ||
| 1388 | brelse(old_inode_de_bh); | ||
| 1389 | if (orphan_entry_bh) | ||
| 1390 | brelse(orphan_entry_bh); | ||
| 1391 | if (insert_entry_bh) | ||
| 1392 | brelse(insert_entry_bh); | ||
| 1393 | 1370 | ||
| 1394 | mlog_exit(status); | 1371 | mlog_exit(status); |
| 1395 | 1372 | ||
| @@ -1492,8 +1469,7 @@ bail: | |||
| 1492 | 1469 | ||
| 1493 | if (bhs) { | 1470 | if (bhs) { |
| 1494 | for(i = 0; i < blocks; i++) | 1471 | for(i = 0; i < blocks; i++) |
| 1495 | if (bhs[i]) | 1472 | brelse(bhs[i]); |
| 1496 | brelse(bhs[i]); | ||
| 1497 | kfree(bhs); | 1473 | kfree(bhs); |
| 1498 | } | 1474 | } |
| 1499 | 1475 | ||
| @@ -1598,10 +1574,10 @@ static int ocfs2_symlink(struct inode *dir, | |||
| 1598 | u32 offset = 0; | 1574 | u32 offset = 0; |
| 1599 | 1575 | ||
| 1600 | inode->i_op = &ocfs2_symlink_inode_operations; | 1576 | inode->i_op = &ocfs2_symlink_inode_operations; |
| 1601 | status = ocfs2_do_extend_allocation(osb, inode, &offset, 1, 0, | 1577 | status = ocfs2_add_inode_data(osb, inode, &offset, 1, 0, |
| 1602 | new_fe_bh, | 1578 | new_fe_bh, |
| 1603 | handle, data_ac, NULL, | 1579 | handle, data_ac, NULL, |
| 1604 | NULL); | 1580 | NULL); |
| 1605 | if (status < 0) { | 1581 | if (status < 0) { |
| 1606 | if (status != -ENOSPC && status != -EINTR) { | 1582 | if (status != -ENOSPC && status != -EINTR) { |
| 1607 | mlog(ML_ERROR, | 1583 | mlog(ML_ERROR, |
| @@ -1659,12 +1635,9 @@ bail: | |||
| 1659 | 1635 | ||
| 1660 | ocfs2_inode_unlock(dir, 1); | 1636 | ocfs2_inode_unlock(dir, 1); |
| 1661 | 1637 | ||
| 1662 | if (new_fe_bh) | 1638 | brelse(new_fe_bh); |
| 1663 | brelse(new_fe_bh); | 1639 | brelse(parent_fe_bh); |
| 1664 | if (parent_fe_bh) | 1640 | brelse(de_bh); |
| 1665 | brelse(parent_fe_bh); | ||
| 1666 | if (de_bh) | ||
| 1667 | brelse(de_bh); | ||
| 1668 | if (inode_ac) | 1641 | if (inode_ac) |
| 1669 | ocfs2_free_alloc_context(inode_ac); | 1642 | ocfs2_free_alloc_context(inode_ac); |
| 1670 | if (data_ac) | 1643 | if (data_ac) |
| @@ -1759,8 +1732,7 @@ leave: | |||
| 1759 | iput(orphan_dir_inode); | 1732 | iput(orphan_dir_inode); |
| 1760 | } | 1733 | } |
| 1761 | 1734 | ||
| 1762 | if (orphan_dir_bh) | 1735 | brelse(orphan_dir_bh); |
| 1763 | brelse(orphan_dir_bh); | ||
| 1764 | 1736 | ||
| 1765 | mlog_exit(status); | 1737 | mlog_exit(status); |
| 1766 | return status; | 1738 | return status; |
| @@ -1780,10 +1752,9 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
| 1780 | 1752 | ||
| 1781 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); | 1753 | mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino); |
| 1782 | 1754 | ||
| 1783 | status = ocfs2_read_block(osb, | 1755 | status = ocfs2_read_block(orphan_dir_inode, |
| 1784 | OCFS2_I(orphan_dir_inode)->ip_blkno, | 1756 | OCFS2_I(orphan_dir_inode)->ip_blkno, |
| 1785 | &orphan_dir_bh, OCFS2_BH_CACHED, | 1757 | &orphan_dir_bh); |
| 1786 | orphan_dir_inode); | ||
| 1787 | if (status < 0) { | 1758 | if (status < 0) { |
| 1788 | mlog_errno(status); | 1759 | mlog_errno(status); |
| 1789 | goto leave; | 1760 | goto leave; |
| @@ -1829,8 +1800,7 @@ static int ocfs2_orphan_add(struct ocfs2_super *osb, | |||
| 1829 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); | 1800 | (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num); |
| 1830 | 1801 | ||
| 1831 | leave: | 1802 | leave: |
| 1832 | if (orphan_dir_bh) | 1803 | brelse(orphan_dir_bh); |
| 1833 | brelse(orphan_dir_bh); | ||
| 1834 | 1804 | ||
| 1835 | mlog_exit(status); | 1805 | mlog_exit(status); |
| 1836 | return status; | 1806 | return status; |
| @@ -1898,8 +1868,7 @@ int ocfs2_orphan_del(struct ocfs2_super *osb, | |||
| 1898 | } | 1868 | } |
| 1899 | 1869 | ||
| 1900 | leave: | 1870 | leave: |
| 1901 | if (target_de_bh) | 1871 | brelse(target_de_bh); |
| 1902 | brelse(target_de_bh); | ||
| 1903 | 1872 | ||
| 1904 | mlog_exit(status); | 1873 | mlog_exit(status); |
| 1905 | return status; | 1874 | return status; |
| @@ -1918,4 +1887,8 @@ const struct inode_operations ocfs2_dir_iops = { | |||
| 1918 | .setattr = ocfs2_setattr, | 1887 | .setattr = ocfs2_setattr, |
| 1919 | .getattr = ocfs2_getattr, | 1888 | .getattr = ocfs2_getattr, |
| 1920 | .permission = ocfs2_permission, | 1889 | .permission = ocfs2_permission, |
| 1890 | .setxattr = generic_setxattr, | ||
| 1891 | .getxattr = generic_getxattr, | ||
| 1892 | .listxattr = ocfs2_listxattr, | ||
| 1893 | .removexattr = generic_removexattr, | ||
| 1921 | }; | 1894 | }; |
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 7f625f2b1117..a21a465490c4 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h | |||
| @@ -34,7 +34,12 @@ | |||
| 34 | #include <linux/workqueue.h> | 34 | #include <linux/workqueue.h> |
| 35 | #include <linux/kref.h> | 35 | #include <linux/kref.h> |
| 36 | #include <linux/mutex.h> | 36 | #include <linux/mutex.h> |
| 37 | #include <linux/jbd.h> | 37 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
| 38 | # include <linux/jbd2.h> | ||
| 39 | #else | ||
| 40 | # include <linux/jbd.h> | ||
| 41 | # include "ocfs2_jbd_compat.h" | ||
| 42 | #endif | ||
| 38 | 43 | ||
| 39 | /* For union ocfs2_dlm_lksb */ | 44 | /* For union ocfs2_dlm_lksb */ |
| 40 | #include "stackglue.h" | 45 | #include "stackglue.h" |
| @@ -171,9 +176,13 @@ struct ocfs2_alloc_stats | |||
| 171 | 176 | ||
| 172 | enum ocfs2_local_alloc_state | 177 | enum ocfs2_local_alloc_state |
| 173 | { | 178 | { |
| 174 | OCFS2_LA_UNUSED = 0, | 179 | OCFS2_LA_UNUSED = 0, /* Local alloc will never be used for |
| 175 | OCFS2_LA_ENABLED, | 180 | * this mountpoint. */ |
| 176 | OCFS2_LA_DISABLED | 181 | OCFS2_LA_ENABLED, /* Local alloc is in use. */ |
| 182 | OCFS2_LA_THROTTLED, /* Local alloc is in use, but number | ||
| 183 | * of bits has been reduced. */ | ||
| 184 | OCFS2_LA_DISABLED /* Local alloc has temporarily been | ||
| 185 | * disabled. */ | ||
| 177 | }; | 186 | }; |
| 178 | 187 | ||
| 179 | enum ocfs2_mount_options | 188 | enum ocfs2_mount_options |
| @@ -184,6 +193,8 @@ enum ocfs2_mount_options | |||
| 184 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ | 193 | OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ |
| 185 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ | 194 | OCFS2_MOUNT_DATA_WRITEBACK = 1 << 4, /* No data ordering */ |
| 186 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ | 195 | OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */ |
| 196 | OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */ | ||
| 197 | OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */ | ||
| 187 | }; | 198 | }; |
| 188 | 199 | ||
| 189 | #define OCFS2_OSB_SOFT_RO 0x0001 | 200 | #define OCFS2_OSB_SOFT_RO 0x0001 |
| @@ -214,6 +225,7 @@ struct ocfs2_super | |||
| 214 | u32 bitmap_cpg; | 225 | u32 bitmap_cpg; |
| 215 | u8 *uuid; | 226 | u8 *uuid; |
| 216 | char *uuid_str; | 227 | char *uuid_str; |
| 228 | u32 uuid_hash; | ||
| 217 | u8 *vol_label; | 229 | u8 *vol_label; |
| 218 | u64 first_cluster_group_blkno; | 230 | u64 first_cluster_group_blkno; |
| 219 | u32 fs_generation; | 231 | u32 fs_generation; |
| @@ -241,6 +253,7 @@ struct ocfs2_super | |||
| 241 | int s_sectsize_bits; | 253 | int s_sectsize_bits; |
| 242 | int s_clustersize; | 254 | int s_clustersize; |
| 243 | int s_clustersize_bits; | 255 | int s_clustersize_bits; |
| 256 | unsigned int s_xattr_inline_size; | ||
| 244 | 257 | ||
| 245 | atomic_t vol_state; | 258 | atomic_t vol_state; |
| 246 | struct mutex recovery_lock; | 259 | struct mutex recovery_lock; |
| @@ -252,11 +265,27 @@ struct ocfs2_super | |||
| 252 | struct ocfs2_journal *journal; | 265 | struct ocfs2_journal *journal; |
| 253 | unsigned long osb_commit_interval; | 266 | unsigned long osb_commit_interval; |
| 254 | 267 | ||
| 255 | int local_alloc_size; | 268 | struct delayed_work la_enable_wq; |
| 256 | enum ocfs2_local_alloc_state local_alloc_state; | 269 | |
| 270 | /* | ||
| 271 | * Must hold local alloc i_mutex and osb->osb_lock to change | ||
| 272 | * local_alloc_bits. Reads can be done under either lock. | ||
| 273 | */ | ||
| 274 | unsigned int local_alloc_bits; | ||
| 275 | unsigned int local_alloc_default_bits; | ||
| 276 | |||
| 277 | enum ocfs2_local_alloc_state local_alloc_state; /* protected | ||
| 278 | * by osb_lock */ | ||
| 279 | |||
| 257 | struct buffer_head *local_alloc_bh; | 280 | struct buffer_head *local_alloc_bh; |
| 281 | |||
| 258 | u64 la_last_gd; | 282 | u64 la_last_gd; |
| 259 | 283 | ||
| 284 | #ifdef CONFIG_OCFS2_FS_STATS | ||
| 285 | struct dentry *local_alloc_debug; | ||
| 286 | char *local_alloc_debug_buf; | ||
| 287 | #endif | ||
| 288 | |||
| 260 | /* Next two fields are for local node slot recovery during | 289 | /* Next two fields are for local node slot recovery during |
| 261 | * mount. */ | 290 | * mount. */ |
| 262 | int dirty; | 291 | int dirty; |
| @@ -340,6 +369,13 @@ static inline int ocfs2_supports_inline_data(struct ocfs2_super *osb) | |||
| 340 | return 0; | 369 | return 0; |
| 341 | } | 370 | } |
| 342 | 371 | ||
| 372 | static inline int ocfs2_supports_xattr(struct ocfs2_super *osb) | ||
| 373 | { | ||
| 374 | if (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR) | ||
| 375 | return 1; | ||
| 376 | return 0; | ||
| 377 | } | ||
| 378 | |||
| 343 | /* set / clear functions because cluster events can make these happen | 379 | /* set / clear functions because cluster events can make these happen |
| 344 | * in parallel so we want the transitions to be atomic. this also | 380 | * in parallel so we want the transitions to be atomic. this also |
| 345 | * means that any future flags osb_flags must be protected by spinlock | 381 | * means that any future flags osb_flags must be protected by spinlock |
| @@ -554,6 +590,14 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb) | |||
| 554 | return pages_per_cluster; | 590 | return pages_per_cluster; |
| 555 | } | 591 | } |
| 556 | 592 | ||
| 593 | static inline unsigned int ocfs2_megabytes_to_clusters(struct super_block *sb, | ||
| 594 | unsigned int megs) | ||
| 595 | { | ||
| 596 | BUILD_BUG_ON(OCFS2_MAX_CLUSTERSIZE > 1048576); | ||
| 597 | |||
| 598 | return megs << (20 - OCFS2_SB(sb)->s_clustersize_bits); | ||
| 599 | } | ||
| 600 | |||
| 557 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) | 601 | static inline void ocfs2_init_inode_steal_slot(struct ocfs2_super *osb) |
| 558 | { | 602 | { |
| 559 | spin_lock(&osb->osb_lock); | 603 | spin_lock(&osb->osb_lock); |
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 4f619850ccf7..f24ce3d3f956 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h | |||
| @@ -64,6 +64,7 @@ | |||
| 64 | #define OCFS2_INODE_SIGNATURE "INODE01" | 64 | #define OCFS2_INODE_SIGNATURE "INODE01" |
| 65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" | 65 | #define OCFS2_EXTENT_BLOCK_SIGNATURE "EXBLK01" |
| 66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" | 66 | #define OCFS2_GROUP_DESC_SIGNATURE "GROUP01" |
| 67 | #define OCFS2_XATTR_BLOCK_SIGNATURE "XATTR01" | ||
| 67 | 68 | ||
| 68 | /* Compatibility flags */ | 69 | /* Compatibility flags */ |
| 69 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ | 70 | #define OCFS2_HAS_COMPAT_FEATURE(sb,mask) \ |
| @@ -90,7 +91,8 @@ | |||
| 90 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ | 91 | | OCFS2_FEATURE_INCOMPAT_SPARSE_ALLOC \ |
| 91 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ | 92 | | OCFS2_FEATURE_INCOMPAT_INLINE_DATA \ |
| 92 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ | 93 | | OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP \ |
| 93 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK) | 94 | | OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK \ |
| 95 | | OCFS2_FEATURE_INCOMPAT_XATTR) | ||
| 94 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN | 96 | #define OCFS2_FEATURE_RO_COMPAT_SUPP OCFS2_FEATURE_RO_COMPAT_UNWRITTEN |
| 95 | 97 | ||
| 96 | /* | 98 | /* |
| @@ -127,10 +129,6 @@ | |||
| 127 | /* Support for data packed into inode blocks */ | 129 | /* Support for data packed into inode blocks */ |
| 128 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 | 130 | #define OCFS2_FEATURE_INCOMPAT_INLINE_DATA 0x0040 |
| 129 | 131 | ||
| 130 | /* Support for the extended slot map */ | ||
| 131 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
| 132 | |||
| 133 | |||
| 134 | /* | 132 | /* |
| 135 | * Support for alternate, userspace cluster stacks. If set, the superblock | 133 | * Support for alternate, userspace cluster stacks. If set, the superblock |
| 136 | * field s_cluster_info contains a tag for the alternate stack in use as | 134 | * field s_cluster_info contains a tag for the alternate stack in use as |
| @@ -142,6 +140,12 @@ | |||
| 142 | */ | 140 | */ |
| 143 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 | 141 | #define OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK 0x0080 |
| 144 | 142 | ||
| 143 | /* Support for the extended slot map */ | ||
| 144 | #define OCFS2_FEATURE_INCOMPAT_EXTENDED_SLOT_MAP 0x100 | ||
| 145 | |||
| 146 | /* Support for extended attributes */ | ||
| 147 | #define OCFS2_FEATURE_INCOMPAT_XATTR 0x0200 | ||
| 148 | |||
| 145 | /* | 149 | /* |
| 146 | * backup superblock flag is used to indicate that this volume | 150 | * backup superblock flag is used to indicate that this volume |
| 147 | * has backup superblocks. | 151 | * has backup superblocks. |
| @@ -299,6 +303,12 @@ struct ocfs2_new_group_input { | |||
| 299 | */ | 303 | */ |
| 300 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 | 304 | #define OCFS2_DEFAULT_LOCAL_ALLOC_SIZE 8 |
| 301 | 305 | ||
| 306 | /* | ||
| 307 | * Inline extended attribute size (in bytes) | ||
| 308 | * The value chosen should be aligned to 16 byte boundaries. | ||
| 309 | */ | ||
| 310 | #define OCFS2_MIN_XATTR_INLINE_SIZE 256 | ||
| 311 | |||
| 302 | struct ocfs2_system_inode_info { | 312 | struct ocfs2_system_inode_info { |
| 303 | char *si_name; | 313 | char *si_name; |
| 304 | int si_iflags; | 314 | int si_iflags; |
| @@ -563,7 +573,7 @@ struct ocfs2_super_block { | |||
| 563 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts | 573 | /*40*/ __le16 s_max_slots; /* Max number of simultaneous mounts |
| 564 | before tunefs required */ | 574 | before tunefs required */ |
| 565 | __le16 s_tunefs_flag; | 575 | __le16 s_tunefs_flag; |
| 566 | __le32 s_reserved1; | 576 | __le32 s_uuid_hash; /* hash value of uuid */ |
| 567 | __le64 s_first_cluster_group; /* Block offset of 1st cluster | 577 | __le64 s_first_cluster_group; /* Block offset of 1st cluster |
| 568 | * group header */ | 578 | * group header */ |
| 569 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ | 579 | /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ |
| @@ -571,7 +581,11 @@ struct ocfs2_super_block { | |||
| 571 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace | 581 | /*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace |
| 572 | stack. Only valid | 582 | stack. Only valid |
| 573 | with INCOMPAT flag. */ | 583 | with INCOMPAT flag. */ |
| 574 | /*B8*/ __le64 s_reserved2[17]; /* Fill out superblock */ | 584 | /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size |
| 585 | for this fs*/ | ||
| 586 | __le16 s_reserved0; | ||
| 587 | __le32 s_reserved1; | ||
| 588 | /*C0*/ __le64 s_reserved2[16]; /* Fill out superblock */ | ||
| 575 | /*140*/ | 589 | /*140*/ |
| 576 | 590 | ||
| 577 | /* | 591 | /* |
| @@ -621,7 +635,8 @@ struct ocfs2_dinode { | |||
| 621 | belongs to */ | 635 | belongs to */ |
| 622 | __le16 i_suballoc_bit; /* Bit offset in suballocator | 636 | __le16 i_suballoc_bit; /* Bit offset in suballocator |
| 623 | block group */ | 637 | block group */ |
| 624 | /*10*/ __le32 i_reserved0; | 638 | /*10*/ __le16 i_reserved0; |
| 639 | __le16 i_xattr_inline_size; | ||
| 625 | __le32 i_clusters; /* Cluster count */ | 640 | __le32 i_clusters; /* Cluster count */ |
| 626 | __le32 i_uid; /* Owner UID */ | 641 | __le32 i_uid; /* Owner UID */ |
| 627 | __le32 i_gid; /* Owning GID */ | 642 | __le32 i_gid; /* Owning GID */ |
| @@ -640,11 +655,12 @@ struct ocfs2_dinode { | |||
| 640 | __le32 i_atime_nsec; | 655 | __le32 i_atime_nsec; |
| 641 | __le32 i_ctime_nsec; | 656 | __le32 i_ctime_nsec; |
| 642 | __le32 i_mtime_nsec; | 657 | __le32 i_mtime_nsec; |
| 643 | __le32 i_attr; | 658 | /*70*/ __le32 i_attr; |
| 644 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL | 659 | __le16 i_orphaned_slot; /* Only valid when OCFS2_ORPHANED_FL |
| 645 | was set in i_flags */ | 660 | was set in i_flags */ |
| 646 | __le16 i_dyn_features; | 661 | __le16 i_dyn_features; |
| 647 | /*70*/ __le64 i_reserved2[8]; | 662 | __le64 i_xattr_loc; |
| 663 | /*80*/ __le64 i_reserved2[7]; | ||
| 648 | /*B8*/ union { | 664 | /*B8*/ union { |
| 649 | __le64 i_pad1; /* Generic way to refer to this | 665 | __le64 i_pad1; /* Generic way to refer to this |
| 650 | 64bit union */ | 666 | 64bit union */ |
| @@ -715,6 +731,136 @@ struct ocfs2_group_desc | |||
| 715 | /*40*/ __u8 bg_bitmap[0]; | 731 | /*40*/ __u8 bg_bitmap[0]; |
| 716 | }; | 732 | }; |
| 717 | 733 | ||
| 734 | /* | ||
| 735 | * On disk extended attribute structure for OCFS2. | ||
| 736 | */ | ||
| 737 | |||
| 738 | /* | ||
| 739 | * ocfs2_xattr_entry indicates one extend attribute. | ||
| 740 | * | ||
| 741 | * Note that it can be stored in inode, one block or one xattr bucket. | ||
| 742 | */ | ||
| 743 | struct ocfs2_xattr_entry { | ||
| 744 | __le32 xe_name_hash; /* hash value of xattr prefix+suffix. */ | ||
| 745 | __le16 xe_name_offset; /* byte offset from the 1st etnry in the local | ||
| 746 | local xattr storage(inode, xattr block or | ||
| 747 | xattr bucket). */ | ||
| 748 | __u8 xe_name_len; /* xattr name len, does't include prefix. */ | ||
| 749 | __u8 xe_type; /* the low 7 bits indicates the name prefix's | ||
| 750 | * type and the highest 1 bits indicate whether | ||
| 751 | * the EA is stored in the local storage. */ | ||
| 752 | __le64 xe_value_size; /* real xattr value length. */ | ||
| 753 | }; | ||
| 754 | |||
| 755 | /* | ||
| 756 | * On disk structure for xattr header. | ||
| 757 | * | ||
| 758 | * One ocfs2_xattr_header describes how many ocfs2_xattr_entry records in | ||
| 759 | * the local xattr storage. | ||
| 760 | */ | ||
| 761 | struct ocfs2_xattr_header { | ||
| 762 | __le16 xh_count; /* contains the count of how | ||
| 763 | many records are in the | ||
| 764 | local xattr storage. */ | ||
| 765 | __le16 xh_free_start; /* current offset for storing | ||
| 766 | xattr. */ | ||
| 767 | __le16 xh_name_value_len; /* total length of name/value | ||
| 768 | length in this bucket. */ | ||
| 769 | __le16 xh_num_buckets; /* bucket nums in one extent | ||
| 770 | record, only valid in the | ||
| 771 | first bucket. */ | ||
| 772 | __le64 xh_csum; | ||
| 773 | struct ocfs2_xattr_entry xh_entries[0]; /* xattr entry list. */ | ||
| 774 | }; | ||
| 775 | |||
| 776 | /* | ||
| 777 | * On disk structure for xattr value root. | ||
| 778 | * | ||
| 779 | * It is used when one extended attribute's size is larger, and we will save it | ||
| 780 | * in an outside cluster. It will stored in a b-tree like file content. | ||
| 781 | */ | ||
| 782 | struct ocfs2_xattr_value_root { | ||
| 783 | /*00*/ __le32 xr_clusters; /* clusters covered by xattr value. */ | ||
| 784 | __le32 xr_reserved0; | ||
| 785 | __le64 xr_last_eb_blk; /* Pointer to last extent block */ | ||
| 786 | /*10*/ struct ocfs2_extent_list xr_list; /* Extent record list */ | ||
| 787 | }; | ||
| 788 | |||
| 789 | /* | ||
| 790 | * On disk structure for xattr tree root. | ||
| 791 | * | ||
| 792 | * It is used when there are too many extended attributes for one file. These | ||
| 793 | * attributes will be organized and stored in an indexed-btree. | ||
| 794 | */ | ||
| 795 | struct ocfs2_xattr_tree_root { | ||
| 796 | /*00*/ __le32 xt_clusters; /* clusters covered by xattr. */ | ||
| 797 | __le32 xt_reserved0; | ||
| 798 | __le64 xt_last_eb_blk; /* Pointer to last extent block */ | ||
| 799 | /*10*/ struct ocfs2_extent_list xt_list; /* Extent record list */ | ||
| 800 | }; | ||
| 801 | |||
| 802 | #define OCFS2_XATTR_INDEXED 0x1 | ||
| 803 | #define OCFS2_HASH_SHIFT 5 | ||
| 804 | #define OCFS2_XATTR_ROUND 3 | ||
| 805 | #define OCFS2_XATTR_SIZE(size) (((size) + OCFS2_XATTR_ROUND) & \ | ||
| 806 | ~(OCFS2_XATTR_ROUND)) | ||
| 807 | |||
| 808 | #define OCFS2_XATTR_BUCKET_SIZE 4096 | ||
| 809 | #define OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET (OCFS2_XATTR_BUCKET_SIZE \ | ||
| 810 | / OCFS2_MIN_BLOCKSIZE) | ||
| 811 | |||
| 812 | /* | ||
| 813 | * On disk structure for xattr block. | ||
| 814 | */ | ||
| 815 | struct ocfs2_xattr_block { | ||
| 816 | /*00*/ __u8 xb_signature[8]; /* Signature for verification */ | ||
| 817 | __le16 xb_suballoc_slot; /* Slot suballocator this | ||
| 818 | block belongs to. */ | ||
| 819 | __le16 xb_suballoc_bit; /* Bit offset in suballocator | ||
| 820 | block group */ | ||
| 821 | __le32 xb_fs_generation; /* Must match super block */ | ||
| 822 | /*10*/ __le64 xb_blkno; /* Offset on disk, in blocks */ | ||
| 823 | __le64 xb_csum; | ||
| 824 | /*20*/ __le16 xb_flags; /* Indicates whether this block contains | ||
| 825 | real xattr or a xattr tree. */ | ||
| 826 | __le16 xb_reserved0; | ||
| 827 | __le32 xb_reserved1; | ||
| 828 | __le64 xb_reserved2; | ||
| 829 | /*30*/ union { | ||
| 830 | struct ocfs2_xattr_header xb_header; /* xattr header if this | ||
| 831 | block contains xattr */ | ||
| 832 | struct ocfs2_xattr_tree_root xb_root;/* xattr tree root if this | ||
| 833 | block cotains xattr | ||
| 834 | tree. */ | ||
| 835 | } xb_attrs; | ||
| 836 | }; | ||
| 837 | |||
| 838 | #define OCFS2_XATTR_ENTRY_LOCAL 0x80 | ||
| 839 | #define OCFS2_XATTR_TYPE_MASK 0x7F | ||
| 840 | static inline void ocfs2_xattr_set_local(struct ocfs2_xattr_entry *xe, | ||
| 841 | int local) | ||
| 842 | { | ||
| 843 | if (local) | ||
| 844 | xe->xe_type |= OCFS2_XATTR_ENTRY_LOCAL; | ||
| 845 | else | ||
| 846 | xe->xe_type &= ~OCFS2_XATTR_ENTRY_LOCAL; | ||
| 847 | } | ||
| 848 | |||
| 849 | static inline int ocfs2_xattr_is_local(struct ocfs2_xattr_entry *xe) | ||
| 850 | { | ||
| 851 | return xe->xe_type & OCFS2_XATTR_ENTRY_LOCAL; | ||
| 852 | } | ||
| 853 | |||
| 854 | static inline void ocfs2_xattr_set_type(struct ocfs2_xattr_entry *xe, int type) | ||
| 855 | { | ||
| 856 | xe->xe_type |= type & OCFS2_XATTR_TYPE_MASK; | ||
| 857 | } | ||
| 858 | |||
| 859 | static inline int ocfs2_xattr_get_type(struct ocfs2_xattr_entry *xe) | ||
| 860 | { | ||
| 861 | return xe->xe_type & OCFS2_XATTR_TYPE_MASK; | ||
| 862 | } | ||
| 863 | |||
| 718 | #ifdef __KERNEL__ | 864 | #ifdef __KERNEL__ |
| 719 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) | 865 | static inline int ocfs2_fast_symlink_chars(struct super_block *sb) |
| 720 | { | 866 | { |
| @@ -728,6 +874,20 @@ static inline int ocfs2_max_inline_data(struct super_block *sb) | |||
| 728 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | 874 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); |
| 729 | } | 875 | } |
| 730 | 876 | ||
| 877 | static inline int ocfs2_max_inline_data_with_xattr(struct super_block *sb, | ||
| 878 | struct ocfs2_dinode *di) | ||
| 879 | { | ||
| 880 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
| 881 | |||
| 882 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
| 883 | return sb->s_blocksize - | ||
| 884 | offsetof(struct ocfs2_dinode, id2.i_data.id_data) - | ||
| 885 | xattrsize; | ||
| 886 | else | ||
| 887 | return sb->s_blocksize - | ||
| 888 | offsetof(struct ocfs2_dinode, id2.i_data.id_data); | ||
| 889 | } | ||
| 890 | |||
| 731 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | 891 | static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) |
| 732 | { | 892 | { |
| 733 | int size; | 893 | int size; |
| @@ -738,6 +898,24 @@ static inline int ocfs2_extent_recs_per_inode(struct super_block *sb) | |||
| 738 | return size / sizeof(struct ocfs2_extent_rec); | 898 | return size / sizeof(struct ocfs2_extent_rec); |
| 739 | } | 899 | } |
| 740 | 900 | ||
| 901 | static inline int ocfs2_extent_recs_per_inode_with_xattr( | ||
| 902 | struct super_block *sb, | ||
| 903 | struct ocfs2_dinode *di) | ||
| 904 | { | ||
| 905 | int size; | ||
| 906 | unsigned int xattrsize = le16_to_cpu(di->i_xattr_inline_size); | ||
| 907 | |||
| 908 | if (le16_to_cpu(di->i_dyn_features) & OCFS2_INLINE_XATTR_FL) | ||
| 909 | size = sb->s_blocksize - | ||
| 910 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs) - | ||
| 911 | xattrsize; | ||
| 912 | else | ||
| 913 | size = sb->s_blocksize - | ||
| 914 | offsetof(struct ocfs2_dinode, id2.i_list.l_recs); | ||
| 915 | |||
| 916 | return size / sizeof(struct ocfs2_extent_rec); | ||
| 917 | } | ||
| 918 | |||
| 741 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) | 919 | static inline int ocfs2_chain_recs_per_inode(struct super_block *sb) |
| 742 | { | 920 | { |
| 743 | int size; | 921 | int size; |
| @@ -801,6 +979,17 @@ static inline u64 ocfs2_backup_super_blkno(struct super_block *sb, int index) | |||
| 801 | return 0; | 979 | return 0; |
| 802 | 980 | ||
| 803 | } | 981 | } |
| 982 | |||
| 983 | static inline u16 ocfs2_xattr_recs_per_xb(struct super_block *sb) | ||
| 984 | { | ||
| 985 | int size; | ||
| 986 | |||
| 987 | size = sb->s_blocksize - | ||
| 988 | offsetof(struct ocfs2_xattr_block, | ||
| 989 | xb_attrs.xb_root.xt_list.l_recs); | ||
| 990 | |||
| 991 | return size / sizeof(struct ocfs2_extent_rec); | ||
| 992 | } | ||
| 804 | #else | 993 | #else |
| 805 | static inline int ocfs2_fast_symlink_chars(int blocksize) | 994 | static inline int ocfs2_fast_symlink_chars(int blocksize) |
| 806 | { | 995 | { |
| @@ -884,6 +1073,17 @@ static inline uint64_t ocfs2_backup_super_blkno(int blocksize, int index) | |||
| 884 | 1073 | ||
| 885 | return 0; | 1074 | return 0; |
| 886 | } | 1075 | } |
| 1076 | |||
| 1077 | static inline int ocfs2_xattr_recs_per_xb(int blocksize) | ||
| 1078 | { | ||
| 1079 | int size; | ||
| 1080 | |||
| 1081 | size = blocksize - | ||
| 1082 | offsetof(struct ocfs2_xattr_block, | ||
| 1083 | xb_attrs.xb_root.xt_list.l_recs); | ||
| 1084 | |||
| 1085 | return size / sizeof(struct ocfs2_extent_rec); | ||
| 1086 | } | ||
| 887 | #endif /* __KERNEL__ */ | 1087 | #endif /* __KERNEL__ */ |
| 888 | 1088 | ||
| 889 | 1089 | ||
diff --git a/fs/ocfs2/ocfs2_jbd_compat.h b/fs/ocfs2/ocfs2_jbd_compat.h new file mode 100644 index 000000000000..b91c78f8f558 --- /dev/null +++ b/fs/ocfs2/ocfs2_jbd_compat.h | |||
| @@ -0,0 +1,82 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * ocfs2_jbd_compat.h | ||
| 5 | * | ||
| 6 | * Compatibility defines for JBD. | ||
| 7 | * | ||
| 8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License version 2 as published by the Free Software Foundation. | ||
| 13 | * | ||
| 14 | * This program is distributed in the hope that it will be useful, | ||
| 15 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 16 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 17 | * General Public License for more details. | ||
| 18 | */ | ||
| 19 | |||
| 20 | #ifndef OCFS2_JBD_COMPAT_H | ||
| 21 | #define OCFS2_JBD_COMPAT_H | ||
| 22 | |||
| 23 | #ifndef CONFIG_OCFS2_COMPAT_JBD | ||
| 24 | # error Should not have been included | ||
| 25 | #endif | ||
| 26 | |||
| 27 | struct jbd2_inode { | ||
| 28 | unsigned int dummy; | ||
| 29 | }; | ||
| 30 | |||
| 31 | #define JBD2_BARRIER JFS_BARRIER | ||
| 32 | #define JBD2_DEFAULT_MAX_COMMIT_AGE JBD_DEFAULT_MAX_COMMIT_AGE | ||
| 33 | |||
| 34 | #define jbd2_journal_ack_err journal_ack_err | ||
| 35 | #define jbd2_journal_clear_err journal_clear_err | ||
| 36 | #define jbd2_journal_destroy journal_destroy | ||
| 37 | #define jbd2_journal_dirty_metadata journal_dirty_metadata | ||
| 38 | #define jbd2_journal_errno journal_errno | ||
| 39 | #define jbd2_journal_extend journal_extend | ||
| 40 | #define jbd2_journal_flush journal_flush | ||
| 41 | #define jbd2_journal_force_commit journal_force_commit | ||
| 42 | #define jbd2_journal_get_write_access journal_get_write_access | ||
| 43 | #define jbd2_journal_get_undo_access journal_get_undo_access | ||
| 44 | #define jbd2_journal_init_inode journal_init_inode | ||
| 45 | #define jbd2_journal_invalidatepage journal_invalidatepage | ||
| 46 | #define jbd2_journal_load journal_load | ||
| 47 | #define jbd2_journal_lock_updates journal_lock_updates | ||
| 48 | #define jbd2_journal_restart journal_restart | ||
| 49 | #define jbd2_journal_start journal_start | ||
| 50 | #define jbd2_journal_start_commit journal_start_commit | ||
| 51 | #define jbd2_journal_stop journal_stop | ||
| 52 | #define jbd2_journal_try_to_free_buffers journal_try_to_free_buffers | ||
| 53 | #define jbd2_journal_unlock_updates journal_unlock_updates | ||
| 54 | #define jbd2_journal_wipe journal_wipe | ||
| 55 | #define jbd2_log_wait_commit log_wait_commit | ||
| 56 | |||
| 57 | static inline int jbd2_journal_file_inode(handle_t *handle, | ||
| 58 | struct jbd2_inode *inode) | ||
| 59 | { | ||
| 60 | return 0; | ||
| 61 | } | ||
| 62 | |||
| 63 | static inline int jbd2_journal_begin_ordered_truncate(struct jbd2_inode *inode, | ||
| 64 | loff_t new_size) | ||
| 65 | { | ||
| 66 | return 0; | ||
| 67 | } | ||
| 68 | |||
| 69 | static inline void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, | ||
| 70 | struct inode *inode) | ||
| 71 | { | ||
| 72 | return; | ||
| 73 | } | ||
| 74 | |||
| 75 | static inline void jbd2_journal_release_jbd_inode(journal_t *journal, | ||
| 76 | struct jbd2_inode *jinode) | ||
| 77 | { | ||
| 78 | return; | ||
| 79 | } | ||
| 80 | |||
| 81 | |||
| 82 | #endif /* OCFS2_JBD_COMPAT_H */ | ||
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index 8166968e9015..ffd48db229a7 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c | |||
| @@ -200,7 +200,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data) | |||
| 200 | if (cluster > clusters) | 200 | if (cluster > clusters) |
| 201 | break; | 201 | break; |
| 202 | 202 | ||
| 203 | ret = ocfs2_read_block(osb, blkno, &backup, 0, NULL); | 203 | ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup); |
| 204 | if (ret < 0) { | 204 | if (ret < 0) { |
| 205 | mlog_errno(ret); | 205 | mlog_errno(ret); |
| 206 | break; | 206 | break; |
| @@ -236,8 +236,8 @@ static void ocfs2_update_super_and_backups(struct inode *inode, | |||
| 236 | * update the superblock last. | 236 | * update the superblock last. |
| 237 | * It doesn't matter if the write failed. | 237 | * It doesn't matter if the write failed. |
| 238 | */ | 238 | */ |
| 239 | ret = ocfs2_read_block(osb, OCFS2_SUPER_BLOCK_BLKNO, | 239 | ret = ocfs2_read_blocks_sync(osb, OCFS2_SUPER_BLOCK_BLKNO, 1, |
| 240 | &super_bh, 0, NULL); | 240 | &super_bh); |
| 241 | if (ret < 0) { | 241 | if (ret < 0) { |
| 242 | mlog_errno(ret); | 242 | mlog_errno(ret); |
| 243 | goto out; | 243 | goto out; |
| @@ -332,8 +332,7 @@ int ocfs2_group_extend(struct inode * inode, int new_clusters) | |||
| 332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, | 332 | lgd_blkno = ocfs2_which_cluster_group(main_bm_inode, |
| 333 | first_new_cluster - 1); | 333 | first_new_cluster - 1); |
| 334 | 334 | ||
| 335 | ret = ocfs2_read_block(osb, lgd_blkno, &group_bh, OCFS2_BH_CACHED, | 335 | ret = ocfs2_read_block(main_bm_inode, lgd_blkno, &group_bh); |
| 336 | main_bm_inode); | ||
| 337 | if (ret < 0) { | 336 | if (ret < 0) { |
| 338 | mlog_errno(ret); | 337 | mlog_errno(ret); |
| 339 | goto out_unlock; | 338 | goto out_unlock; |
| @@ -540,7 +539,7 @@ int ocfs2_group_add(struct inode *inode, struct ocfs2_new_group_input *input) | |||
| 540 | goto out_unlock; | 539 | goto out_unlock; |
| 541 | } | 540 | } |
| 542 | 541 | ||
| 543 | ret = ocfs2_read_block(osb, input->group, &group_bh, 0, NULL); | 542 | ret = ocfs2_read_blocks_sync(osb, input->group, 1, &group_bh); |
| 544 | if (ret < 0) { | 543 | if (ret < 0) { |
| 545 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " | 544 | mlog(ML_ERROR, "Can't read the group descriptor # %llu " |
| 546 | "from the device.", (unsigned long long)input->group); | 545 | "from the device.", (unsigned long long)input->group); |
diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index bb5ff8939bf1..bdda2d8f8508 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c | |||
| @@ -150,8 +150,8 @@ int ocfs2_refresh_slot_info(struct ocfs2_super *osb) | |||
| 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If | 150 | * be !NULL. Thus, ocfs2_read_blocks() will ignore blocknr. If |
| 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. | 151 | * this is not true, the read of -1 (UINT64_MAX) will fail. |
| 152 | */ | 152 | */ |
| 153 | ret = ocfs2_read_blocks(osb, -1, si->si_blocks, si->si_bh, 0, | 153 | ret = ocfs2_read_blocks(si->si_inode, -1, si->si_blocks, si->si_bh, |
| 154 | si->si_inode); | 154 | OCFS2_BH_IGNORE_CACHE); |
| 155 | if (ret == 0) { | 155 | if (ret == 0) { |
| 156 | spin_lock(&osb->osb_lock); | 156 | spin_lock(&osb->osb_lock); |
| 157 | ocfs2_update_slot_info(si); | 157 | ocfs2_update_slot_info(si); |
| @@ -404,7 +404,8 @@ static int ocfs2_map_slot_buffers(struct ocfs2_super *osb, | |||
| 404 | (unsigned long long)blkno); | 404 | (unsigned long long)blkno); |
| 405 | 405 | ||
| 406 | bh = NULL; /* Acquire a fresh bh */ | 406 | bh = NULL; /* Acquire a fresh bh */ |
| 407 | status = ocfs2_read_block(osb, blkno, &bh, 0, si->si_inode); | 407 | status = ocfs2_read_blocks(si->si_inode, blkno, 1, &bh, |
| 408 | OCFS2_BH_IGNORE_CACHE); | ||
| 408 | if (status < 0) { | 409 | if (status < 0) { |
| 409 | mlog_errno(status); | 410 | mlog_errno(status); |
| 410 | goto bail; | 411 | goto bail; |
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c index 353fc35c6748..faec2d879357 100644 --- a/fs/ocfs2/stack_user.c +++ b/fs/ocfs2/stack_user.c | |||
| @@ -28,6 +28,7 @@ | |||
| 28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ | 28 | #include "ocfs2.h" /* For struct ocfs2_lock_res */ |
| 29 | #include "stackglue.h" | 29 | #include "stackglue.h" |
| 30 | 30 | ||
| 31 | #include <linux/dlm_plock.h> | ||
| 31 | 32 | ||
| 32 | /* | 33 | /* |
| 33 | * The control protocol starts with a handshake. Until the handshake | 34 | * The control protocol starts with a handshake. Until the handshake |
| @@ -746,6 +747,37 @@ static void user_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
| 746 | { | 747 | { |
| 747 | } | 748 | } |
| 748 | 749 | ||
| 750 | static int user_plock(struct ocfs2_cluster_connection *conn, | ||
| 751 | u64 ino, | ||
| 752 | struct file *file, | ||
| 753 | int cmd, | ||
| 754 | struct file_lock *fl) | ||
| 755 | { | ||
| 756 | /* | ||
| 757 | * This more or less just demuxes the plock request into any | ||
| 758 | * one of three dlm calls. | ||
| 759 | * | ||
| 760 | * Internally, fs/dlm will pass these to a misc device, which | ||
| 761 | * a userspace daemon will read and write to. | ||
| 762 | * | ||
| 763 | * For now, cancel requests (which happen internally only), | ||
| 764 | * are turned into unlocks. Most of this function taken from | ||
| 765 | * gfs2_lock. | ||
| 766 | */ | ||
| 767 | |||
| 768 | if (cmd == F_CANCELLK) { | ||
| 769 | cmd = F_SETLK; | ||
| 770 | fl->fl_type = F_UNLCK; | ||
| 771 | } | ||
| 772 | |||
| 773 | if (IS_GETLK(cmd)) | ||
| 774 | return dlm_posix_get(conn->cc_lockspace, ino, file, fl); | ||
| 775 | else if (fl->fl_type == F_UNLCK) | ||
| 776 | return dlm_posix_unlock(conn->cc_lockspace, ino, file, fl); | ||
| 777 | else | ||
| 778 | return dlm_posix_lock(conn->cc_lockspace, ino, file, cmd, fl); | ||
| 779 | } | ||
| 780 | |||
| 749 | /* | 781 | /* |
| 750 | * Compare a requested locking protocol version against the current one. | 782 | * Compare a requested locking protocol version against the current one. |
| 751 | * | 783 | * |
| @@ -839,6 +871,7 @@ static struct ocfs2_stack_operations ocfs2_user_plugin_ops = { | |||
| 839 | .dlm_unlock = user_dlm_unlock, | 871 | .dlm_unlock = user_dlm_unlock, |
| 840 | .lock_status = user_dlm_lock_status, | 872 | .lock_status = user_dlm_lock_status, |
| 841 | .lock_lvb = user_dlm_lvb, | 873 | .lock_lvb = user_dlm_lvb, |
| 874 | .plock = user_plock, | ||
| 842 | .dump_lksb = user_dlm_dump_lksb, | 875 | .dump_lksb = user_dlm_dump_lksb, |
| 843 | }; | 876 | }; |
| 844 | 877 | ||
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c index 07f348b8d721..68b668b0e60a 100644 --- a/fs/ocfs2/stackglue.c +++ b/fs/ocfs2/stackglue.c | |||
| @@ -288,6 +288,26 @@ void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb) | |||
| 288 | } | 288 | } |
| 289 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); | 289 | EXPORT_SYMBOL_GPL(ocfs2_dlm_dump_lksb); |
| 290 | 290 | ||
| 291 | int ocfs2_stack_supports_plocks(void) | ||
| 292 | { | ||
| 293 | return active_stack && active_stack->sp_ops->plock; | ||
| 294 | } | ||
| 295 | EXPORT_SYMBOL_GPL(ocfs2_stack_supports_plocks); | ||
| 296 | |||
| 297 | /* | ||
| 298 | * ocfs2_plock() can only be safely called if | ||
| 299 | * ocfs2_stack_supports_plocks() returned true | ||
| 300 | */ | ||
| 301 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
| 302 | struct file *file, int cmd, struct file_lock *fl) | ||
| 303 | { | ||
| 304 | WARN_ON_ONCE(active_stack->sp_ops->plock == NULL); | ||
| 305 | if (active_stack->sp_ops->plock) | ||
| 306 | return active_stack->sp_ops->plock(conn, ino, file, cmd, fl); | ||
| 307 | return -EOPNOTSUPP; | ||
| 308 | } | ||
| 309 | EXPORT_SYMBOL_GPL(ocfs2_plock); | ||
| 310 | |||
| 291 | int ocfs2_cluster_connect(const char *stack_name, | 311 | int ocfs2_cluster_connect(const char *stack_name, |
| 292 | const char *group, | 312 | const char *group, |
| 293 | int grouplen, | 313 | int grouplen, |
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h index db56281dd1be..c571af375ef8 100644 --- a/fs/ocfs2/stackglue.h +++ b/fs/ocfs2/stackglue.h | |||
| @@ -28,6 +28,10 @@ | |||
| 28 | #include "dlm/dlmapi.h" | 28 | #include "dlm/dlmapi.h" |
| 29 | #include <linux/dlm.h> | 29 | #include <linux/dlm.h> |
| 30 | 30 | ||
| 31 | /* Needed for plock-related prototypes */ | ||
| 32 | struct file; | ||
| 33 | struct file_lock; | ||
| 34 | |||
| 31 | /* | 35 | /* |
| 32 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it | 36 | * dlmconstants.h does not have a LOCAL flag. We hope to remove it |
| 33 | * some day, but right now we need it. Let's fake it. This value is larger | 37 | * some day, but right now we need it. Let's fake it. This value is larger |
| @@ -187,6 +191,17 @@ struct ocfs2_stack_operations { | |||
| 187 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); | 191 | void *(*lock_lvb)(union ocfs2_dlm_lksb *lksb); |
| 188 | 192 | ||
| 189 | /* | 193 | /* |
| 194 | * Cluster-aware posix locks | ||
| 195 | * | ||
| 196 | * This is NULL for stacks which do not support posix locks. | ||
| 197 | */ | ||
| 198 | int (*plock)(struct ocfs2_cluster_connection *conn, | ||
| 199 | u64 ino, | ||
| 200 | struct file *file, | ||
| 201 | int cmd, | ||
| 202 | struct file_lock *fl); | ||
| 203 | |||
| 204 | /* | ||
| 190 | * This is an optoinal debugging hook. If provided, the | 205 | * This is an optoinal debugging hook. If provided, the |
| 191 | * stack can dump debugging information about this lock. | 206 | * stack can dump debugging information about this lock. |
| 192 | */ | 207 | */ |
| @@ -240,6 +255,10 @@ int ocfs2_dlm_lock_status(union ocfs2_dlm_lksb *lksb); | |||
| 240 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); | 255 | void *ocfs2_dlm_lvb(union ocfs2_dlm_lksb *lksb); |
| 241 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); | 256 | void ocfs2_dlm_dump_lksb(union ocfs2_dlm_lksb *lksb); |
| 242 | 257 | ||
| 258 | int ocfs2_stack_supports_plocks(void); | ||
| 259 | int ocfs2_plock(struct ocfs2_cluster_connection *conn, u64 ino, | ||
| 260 | struct file *file, int cmd, struct file_lock *fl); | ||
| 261 | |||
| 243 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); | 262 | void ocfs2_stack_glue_set_locking_protocol(struct ocfs2_locking_protocol *proto); |
| 244 | 263 | ||
| 245 | 264 | ||
diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index d2d278fb9819..c5ff18b46b57 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c | |||
| @@ -62,15 +62,18 @@ static int ocfs2_block_group_fill(handle_t *handle, | |||
| 62 | struct ocfs2_chain_list *cl); | 62 | struct ocfs2_chain_list *cl); |
| 63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 63 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
| 64 | struct inode *alloc_inode, | 64 | struct inode *alloc_inode, |
| 65 | struct buffer_head *bh); | 65 | struct buffer_head *bh, |
| 66 | u64 max_block); | ||
| 66 | 67 | ||
| 67 | static int ocfs2_cluster_group_search(struct inode *inode, | 68 | static int ocfs2_cluster_group_search(struct inode *inode, |
| 68 | struct buffer_head *group_bh, | 69 | struct buffer_head *group_bh, |
| 69 | u32 bits_wanted, u32 min_bits, | 70 | u32 bits_wanted, u32 min_bits, |
| 71 | u64 max_block, | ||
| 70 | u16 *bit_off, u16 *bits_found); | 72 | u16 *bit_off, u16 *bits_found); |
| 71 | static int ocfs2_block_group_search(struct inode *inode, | 73 | static int ocfs2_block_group_search(struct inode *inode, |
| 72 | struct buffer_head *group_bh, | 74 | struct buffer_head *group_bh, |
| 73 | u32 bits_wanted, u32 min_bits, | 75 | u32 bits_wanted, u32 min_bits, |
| 76 | u64 max_block, | ||
| 74 | u16 *bit_off, u16 *bits_found); | 77 | u16 *bit_off, u16 *bits_found); |
| 75 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, | 78 | static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb, |
| 76 | struct ocfs2_alloc_context *ac, | 79 | struct ocfs2_alloc_context *ac, |
| @@ -110,8 +113,11 @@ static inline void ocfs2_block_to_cluster_group(struct inode *inode, | |||
| 110 | u64 data_blkno, | 113 | u64 data_blkno, |
| 111 | u64 *bg_blkno, | 114 | u64 *bg_blkno, |
| 112 | u16 *bg_bit_off); | 115 | u16 *bg_bit_off); |
| 116 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, | ||
| 117 | u32 bits_wanted, u64 max_block, | ||
| 118 | struct ocfs2_alloc_context **ac); | ||
| 113 | 119 | ||
| 114 | static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | 120 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) |
| 115 | { | 121 | { |
| 116 | struct inode *inode = ac->ac_inode; | 122 | struct inode *inode = ac->ac_inode; |
| 117 | 123 | ||
| @@ -124,10 +130,8 @@ static void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac) | |||
| 124 | iput(inode); | 130 | iput(inode); |
| 125 | ac->ac_inode = NULL; | 131 | ac->ac_inode = NULL; |
| 126 | } | 132 | } |
| 127 | if (ac->ac_bh) { | 133 | brelse(ac->ac_bh); |
| 128 | brelse(ac->ac_bh); | 134 | ac->ac_bh = NULL; |
| 129 | ac->ac_bh = NULL; | ||
| 130 | } | ||
| 131 | } | 135 | } |
| 132 | 136 | ||
| 133 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) | 137 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac) |
| @@ -276,7 +280,8 @@ static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl) | |||
| 276 | */ | 280 | */ |
| 277 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | 281 | static int ocfs2_block_group_alloc(struct ocfs2_super *osb, |
| 278 | struct inode *alloc_inode, | 282 | struct inode *alloc_inode, |
| 279 | struct buffer_head *bh) | 283 | struct buffer_head *bh, |
| 284 | u64 max_block) | ||
| 280 | { | 285 | { |
| 281 | int status, credits; | 286 | int status, credits; |
| 282 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; | 287 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data; |
| @@ -294,9 +299,9 @@ static int ocfs2_block_group_alloc(struct ocfs2_super *osb, | |||
| 294 | mlog_entry_void(); | 299 | mlog_entry_void(); |
| 295 | 300 | ||
| 296 | cl = &fe->id2.i_chain; | 301 | cl = &fe->id2.i_chain; |
| 297 | status = ocfs2_reserve_clusters(osb, | 302 | status = ocfs2_reserve_clusters_with_limit(osb, |
| 298 | le16_to_cpu(cl->cl_cpg), | 303 | le16_to_cpu(cl->cl_cpg), |
| 299 | &ac); | 304 | max_block, &ac); |
| 300 | if (status < 0) { | 305 | if (status < 0) { |
| 301 | if (status != -ENOSPC) | 306 | if (status != -ENOSPC) |
| 302 | mlog_errno(status); | 307 | mlog_errno(status); |
| @@ -394,8 +399,7 @@ bail: | |||
| 394 | if (ac) | 399 | if (ac) |
| 395 | ocfs2_free_alloc_context(ac); | 400 | ocfs2_free_alloc_context(ac); |
| 396 | 401 | ||
| 397 | if (bg_bh) | 402 | brelse(bg_bh); |
| 398 | brelse(bg_bh); | ||
| 399 | 403 | ||
| 400 | mlog_exit(status); | 404 | mlog_exit(status); |
| 401 | return status; | 405 | return status; |
| @@ -469,7 +473,8 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 469 | goto bail; | 473 | goto bail; |
| 470 | } | 474 | } |
| 471 | 475 | ||
| 472 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh); | 476 | status = ocfs2_block_group_alloc(osb, alloc_inode, bh, |
| 477 | ac->ac_max_block); | ||
| 473 | if (status < 0) { | 478 | if (status < 0) { |
| 474 | if (status != -ENOSPC) | 479 | if (status != -ENOSPC) |
| 475 | mlog_errno(status); | 480 | mlog_errno(status); |
| @@ -486,16 +491,15 @@ static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb, | |||
| 486 | get_bh(bh); | 491 | get_bh(bh); |
| 487 | ac->ac_bh = bh; | 492 | ac->ac_bh = bh; |
| 488 | bail: | 493 | bail: |
| 489 | if (bh) | 494 | brelse(bh); |
| 490 | brelse(bh); | ||
| 491 | 495 | ||
| 492 | mlog_exit(status); | 496 | mlog_exit(status); |
| 493 | return status; | 497 | return status; |
| 494 | } | 498 | } |
| 495 | 499 | ||
| 496 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 500 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, |
| 497 | struct ocfs2_dinode *fe, | 501 | int blocks, |
| 498 | struct ocfs2_alloc_context **ac) | 502 | struct ocfs2_alloc_context **ac) |
| 499 | { | 503 | { |
| 500 | int status; | 504 | int status; |
| 501 | u32 slot; | 505 | u32 slot; |
| @@ -507,7 +511,7 @@ int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | |||
| 507 | goto bail; | 511 | goto bail; |
| 508 | } | 512 | } |
| 509 | 513 | ||
| 510 | (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe); | 514 | (*ac)->ac_bits_wanted = blocks; |
| 511 | (*ac)->ac_which = OCFS2_AC_USE_META; | 515 | (*ac)->ac_which = OCFS2_AC_USE_META; |
| 512 | slot = osb->slot_num; | 516 | slot = osb->slot_num; |
| 513 | (*ac)->ac_group_search = ocfs2_block_group_search; | 517 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| @@ -532,6 +536,15 @@ bail: | |||
| 532 | return status; | 536 | return status; |
| 533 | } | 537 | } |
| 534 | 538 | ||
| 539 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | ||
| 540 | struct ocfs2_extent_list *root_el, | ||
| 541 | struct ocfs2_alloc_context **ac) | ||
| 542 | { | ||
| 543 | return ocfs2_reserve_new_metadata_blocks(osb, | ||
| 544 | ocfs2_extend_meta_needed(root_el), | ||
| 545 | ac); | ||
| 546 | } | ||
| 547 | |||
| 535 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, | 548 | static int ocfs2_steal_inode_from_other_nodes(struct ocfs2_super *osb, |
| 536 | struct ocfs2_alloc_context *ac) | 549 | struct ocfs2_alloc_context *ac) |
| 537 | { | 550 | { |
| @@ -582,6 +595,14 @@ int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | |||
| 582 | (*ac)->ac_group_search = ocfs2_block_group_search; | 595 | (*ac)->ac_group_search = ocfs2_block_group_search; |
| 583 | 596 | ||
| 584 | /* | 597 | /* |
| 598 | * stat(2) can't handle i_ino > 32bits, so we tell the | ||
| 599 | * lower levels not to allocate us a block group past that | ||
| 600 | * limit. The 'inode64' mount option avoids this behavior. | ||
| 601 | */ | ||
| 602 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64)) | ||
| 603 | (*ac)->ac_max_block = (u32)~0U; | ||
| 604 | |||
| 605 | /* | ||
| 585 | * slot is set when we successfully steal inode from other nodes. | 606 | * slot is set when we successfully steal inode from other nodes. |
| 586 | * It is reset in 3 places: | 607 | * It is reset in 3 places: |
| 587 | * 1. when we flush the truncate log | 608 | * 1. when we flush the truncate log |
| @@ -661,9 +682,9 @@ bail: | |||
| 661 | /* Callers don't need to care which bitmap (local alloc or main) to | 682 | /* Callers don't need to care which bitmap (local alloc or main) to |
| 662 | * use so we figure it out for them, but unfortunately this clutters | 683 | * use so we figure it out for them, but unfortunately this clutters |
| 663 | * things a bit. */ | 684 | * things a bit. */ |
| 664 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 685 | static int ocfs2_reserve_clusters_with_limit(struct ocfs2_super *osb, |
| 665 | u32 bits_wanted, | 686 | u32 bits_wanted, u64 max_block, |
| 666 | struct ocfs2_alloc_context **ac) | 687 | struct ocfs2_alloc_context **ac) |
| 667 | { | 688 | { |
| 668 | int status; | 689 | int status; |
| 669 | 690 | ||
| @@ -677,24 +698,20 @@ int ocfs2_reserve_clusters(struct ocfs2_super *osb, | |||
| 677 | } | 698 | } |
| 678 | 699 | ||
| 679 | (*ac)->ac_bits_wanted = bits_wanted; | 700 | (*ac)->ac_bits_wanted = bits_wanted; |
| 701 | (*ac)->ac_max_block = max_block; | ||
| 680 | 702 | ||
| 681 | status = -ENOSPC; | 703 | status = -ENOSPC; |
| 682 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { | 704 | if (ocfs2_alloc_should_use_local(osb, bits_wanted)) { |
| 683 | status = ocfs2_reserve_local_alloc_bits(osb, | 705 | status = ocfs2_reserve_local_alloc_bits(osb, |
| 684 | bits_wanted, | 706 | bits_wanted, |
| 685 | *ac); | 707 | *ac); |
| 686 | if ((status < 0) && (status != -ENOSPC)) { | 708 | if (status == -EFBIG) { |
| 709 | /* The local alloc window is outside ac_max_block. | ||
| 710 | * use the main bitmap. */ | ||
| 711 | status = -ENOSPC; | ||
| 712 | } else if ((status < 0) && (status != -ENOSPC)) { | ||
| 687 | mlog_errno(status); | 713 | mlog_errno(status); |
| 688 | goto bail; | 714 | goto bail; |
| 689 | } else if (status == -ENOSPC) { | ||
| 690 | /* reserve_local_bits will return enospc with | ||
| 691 | * the local alloc inode still locked, so we | ||
| 692 | * can change this safely here. */ | ||
| 693 | mlog(0, "Disabling local alloc\n"); | ||
| 694 | /* We set to OCFS2_LA_DISABLED so that umount | ||
| 695 | * can clean up what's left of the local | ||
| 696 | * allocation */ | ||
| 697 | osb->local_alloc_state = OCFS2_LA_DISABLED; | ||
| 698 | } | 715 | } |
| 699 | } | 716 | } |
| 700 | 717 | ||
| @@ -718,6 +735,13 @@ bail: | |||
| 718 | return status; | 735 | return status; |
| 719 | } | 736 | } |
| 720 | 737 | ||
| 738 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | ||
| 739 | u32 bits_wanted, | ||
| 740 | struct ocfs2_alloc_context **ac) | ||
| 741 | { | ||
| 742 | return ocfs2_reserve_clusters_with_limit(osb, bits_wanted, 0, ac); | ||
| 743 | } | ||
| 744 | |||
| 721 | /* | 745 | /* |
| 722 | * More or less lifted from ext3. I'll leave their description below: | 746 | * More or less lifted from ext3. I'll leave their description below: |
| 723 | * | 747 | * |
| @@ -1000,11 +1024,14 @@ static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg | |||
| 1000 | static int ocfs2_cluster_group_search(struct inode *inode, | 1024 | static int ocfs2_cluster_group_search(struct inode *inode, |
| 1001 | struct buffer_head *group_bh, | 1025 | struct buffer_head *group_bh, |
| 1002 | u32 bits_wanted, u32 min_bits, | 1026 | u32 bits_wanted, u32 min_bits, |
| 1027 | u64 max_block, | ||
| 1003 | u16 *bit_off, u16 *bits_found) | 1028 | u16 *bit_off, u16 *bits_found) |
| 1004 | { | 1029 | { |
| 1005 | int search = -ENOSPC; | 1030 | int search = -ENOSPC; |
| 1006 | int ret; | 1031 | int ret; |
| 1032 | u64 blkoff; | ||
| 1007 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; | 1033 | struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data; |
| 1034 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1008 | u16 tmp_off, tmp_found; | 1035 | u16 tmp_off, tmp_found; |
| 1009 | unsigned int max_bits, gd_cluster_off; | 1036 | unsigned int max_bits, gd_cluster_off; |
| 1010 | 1037 | ||
| @@ -1037,6 +1064,17 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
| 1037 | if (ret) | 1064 | if (ret) |
| 1038 | return ret; | 1065 | return ret; |
| 1039 | 1066 | ||
| 1067 | if (max_block) { | ||
| 1068 | blkoff = ocfs2_clusters_to_blocks(inode->i_sb, | ||
| 1069 | gd_cluster_off + | ||
| 1070 | tmp_off + tmp_found); | ||
| 1071 | mlog(0, "Checking %llu against %llu\n", | ||
| 1072 | (unsigned long long)blkoff, | ||
| 1073 | (unsigned long long)max_block); | ||
| 1074 | if (blkoff > max_block) | ||
| 1075 | return -ENOSPC; | ||
| 1076 | } | ||
| 1077 | |||
| 1040 | /* ocfs2_block_group_find_clear_bits() might | 1078 | /* ocfs2_block_group_find_clear_bits() might |
| 1041 | * return success, but we still want to return | 1079 | * return success, but we still want to return |
| 1042 | * -ENOSPC unless it found the minimum number | 1080 | * -ENOSPC unless it found the minimum number |
| @@ -1045,6 +1083,12 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
| 1045 | *bit_off = tmp_off; | 1083 | *bit_off = tmp_off; |
| 1046 | *bits_found = tmp_found; | 1084 | *bits_found = tmp_found; |
| 1047 | search = 0; /* success */ | 1085 | search = 0; /* success */ |
| 1086 | } else if (tmp_found) { | ||
| 1087 | /* | ||
| 1088 | * Don't show bits which we'll be returning | ||
| 1089 | * for allocation to the local alloc bitmap. | ||
| 1090 | */ | ||
| 1091 | ocfs2_local_alloc_seen_free_bits(osb, tmp_found); | ||
| 1048 | } | 1092 | } |
| 1049 | } | 1093 | } |
| 1050 | 1094 | ||
| @@ -1054,19 +1098,31 @@ static int ocfs2_cluster_group_search(struct inode *inode, | |||
| 1054 | static int ocfs2_block_group_search(struct inode *inode, | 1098 | static int ocfs2_block_group_search(struct inode *inode, |
| 1055 | struct buffer_head *group_bh, | 1099 | struct buffer_head *group_bh, |
| 1056 | u32 bits_wanted, u32 min_bits, | 1100 | u32 bits_wanted, u32 min_bits, |
| 1101 | u64 max_block, | ||
| 1057 | u16 *bit_off, u16 *bits_found) | 1102 | u16 *bit_off, u16 *bits_found) |
| 1058 | { | 1103 | { |
| 1059 | int ret = -ENOSPC; | 1104 | int ret = -ENOSPC; |
| 1105 | u64 blkoff; | ||
| 1060 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; | 1106 | struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data; |
| 1061 | 1107 | ||
| 1062 | BUG_ON(min_bits != 1); | 1108 | BUG_ON(min_bits != 1); |
| 1063 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); | 1109 | BUG_ON(ocfs2_is_cluster_bitmap(inode)); |
| 1064 | 1110 | ||
| 1065 | if (bg->bg_free_bits_count) | 1111 | if (bg->bg_free_bits_count) { |
| 1066 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), | 1112 | ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb), |
| 1067 | group_bh, bits_wanted, | 1113 | group_bh, bits_wanted, |
| 1068 | le16_to_cpu(bg->bg_bits), | 1114 | le16_to_cpu(bg->bg_bits), |
| 1069 | bit_off, bits_found); | 1115 | bit_off, bits_found); |
| 1116 | if (!ret && max_block) { | ||
| 1117 | blkoff = le64_to_cpu(bg->bg_blkno) + *bit_off + | ||
| 1118 | *bits_found; | ||
| 1119 | mlog(0, "Checking %llu against %llu\n", | ||
| 1120 | (unsigned long long)blkoff, | ||
| 1121 | (unsigned long long)max_block); | ||
| 1122 | if (blkoff > max_block) | ||
| 1123 | ret = -ENOSPC; | ||
| 1124 | } | ||
| 1125 | } | ||
| 1070 | 1126 | ||
| 1071 | return ret; | 1127 | return ret; |
| 1072 | } | 1128 | } |
| @@ -1116,8 +1172,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
| 1116 | struct ocfs2_group_desc *gd; | 1172 | struct ocfs2_group_desc *gd; |
| 1117 | struct inode *alloc_inode = ac->ac_inode; | 1173 | struct inode *alloc_inode = ac->ac_inode; |
| 1118 | 1174 | ||
| 1119 | ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno, | 1175 | ret = ocfs2_read_block(alloc_inode, gd_blkno, &group_bh); |
| 1120 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | ||
| 1121 | if (ret < 0) { | 1176 | if (ret < 0) { |
| 1122 | mlog_errno(ret); | 1177 | mlog_errno(ret); |
| 1123 | return ret; | 1178 | return ret; |
| @@ -1131,7 +1186,7 @@ static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac, | |||
| 1131 | } | 1186 | } |
| 1132 | 1187 | ||
| 1133 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, | 1188 | ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits, |
| 1134 | bit_off, &found); | 1189 | ac->ac_max_block, bit_off, &found); |
| 1135 | if (ret < 0) { | 1190 | if (ret < 0) { |
| 1136 | if (ret != -ENOSPC) | 1191 | if (ret != -ENOSPC) |
| 1137 | mlog_errno(ret); | 1192 | mlog_errno(ret); |
| @@ -1186,9 +1241,9 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
| 1186 | bits_wanted, chain, | 1241 | bits_wanted, chain, |
| 1187 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); | 1242 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno); |
| 1188 | 1243 | ||
| 1189 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1244 | status = ocfs2_read_block(alloc_inode, |
| 1190 | le64_to_cpu(cl->cl_recs[chain].c_blkno), | 1245 | le64_to_cpu(cl->cl_recs[chain].c_blkno), |
| 1191 | &group_bh, OCFS2_BH_CACHED, alloc_inode); | 1246 | &group_bh); |
| 1192 | if (status < 0) { | 1247 | if (status < 0) { |
| 1193 | mlog_errno(status); | 1248 | mlog_errno(status); |
| 1194 | goto bail; | 1249 | goto bail; |
| @@ -1204,21 +1259,20 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
| 1204 | /* for now, the chain search is a bit simplistic. We just use | 1259 | /* for now, the chain search is a bit simplistic. We just use |
| 1205 | * the 1st group with any empty bits. */ | 1260 | * the 1st group with any empty bits. */ |
| 1206 | while ((status = ac->ac_group_search(alloc_inode, group_bh, | 1261 | while ((status = ac->ac_group_search(alloc_inode, group_bh, |
| 1207 | bits_wanted, min_bits, bit_off, | 1262 | bits_wanted, min_bits, |
| 1263 | ac->ac_max_block, bit_off, | ||
| 1208 | &tmp_bits)) == -ENOSPC) { | 1264 | &tmp_bits)) == -ENOSPC) { |
| 1209 | if (!bg->bg_next_group) | 1265 | if (!bg->bg_next_group) |
| 1210 | break; | 1266 | break; |
| 1211 | 1267 | ||
| 1212 | if (prev_group_bh) { | 1268 | brelse(prev_group_bh); |
| 1213 | brelse(prev_group_bh); | 1269 | prev_group_bh = NULL; |
| 1214 | prev_group_bh = NULL; | 1270 | |
| 1215 | } | ||
| 1216 | next_group = le64_to_cpu(bg->bg_next_group); | 1271 | next_group = le64_to_cpu(bg->bg_next_group); |
| 1217 | prev_group_bh = group_bh; | 1272 | prev_group_bh = group_bh; |
| 1218 | group_bh = NULL; | 1273 | group_bh = NULL; |
| 1219 | status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), | 1274 | status = ocfs2_read_block(alloc_inode, |
| 1220 | next_group, &group_bh, | 1275 | next_group, &group_bh); |
| 1221 | OCFS2_BH_CACHED, alloc_inode); | ||
| 1222 | if (status < 0) { | 1276 | if (status < 0) { |
| 1223 | mlog_errno(status); | 1277 | mlog_errno(status); |
| 1224 | goto bail; | 1278 | goto bail; |
| @@ -1307,10 +1361,8 @@ static int ocfs2_search_chain(struct ocfs2_alloc_context *ac, | |||
| 1307 | *bg_blkno = le64_to_cpu(bg->bg_blkno); | 1361 | *bg_blkno = le64_to_cpu(bg->bg_blkno); |
| 1308 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); | 1362 | *bits_left = le16_to_cpu(bg->bg_free_bits_count); |
| 1309 | bail: | 1363 | bail: |
| 1310 | if (group_bh) | 1364 | brelse(group_bh); |
| 1311 | brelse(group_bh); | 1365 | brelse(prev_group_bh); |
| 1312 | if (prev_group_bh) | ||
| 1313 | brelse(prev_group_bh); | ||
| 1314 | 1366 | ||
| 1315 | mlog_exit(status); | 1367 | mlog_exit(status); |
| 1316 | return status; | 1368 | return status; |
| @@ -1723,7 +1775,6 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
| 1723 | { | 1775 | { |
| 1724 | int status = 0; | 1776 | int status = 0; |
| 1725 | u32 tmp_used; | 1777 | u32 tmp_used; |
| 1726 | struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb); | ||
| 1727 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; | 1778 | struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data; |
| 1728 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; | 1779 | struct ocfs2_chain_list *cl = &fe->id2.i_chain; |
| 1729 | struct buffer_head *group_bh = NULL; | 1780 | struct buffer_head *group_bh = NULL; |
| @@ -1742,8 +1793,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
| 1742 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, | 1793 | (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count, |
| 1743 | (unsigned long long)bg_blkno, start_bit); | 1794 | (unsigned long long)bg_blkno, start_bit); |
| 1744 | 1795 | ||
| 1745 | status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED, | 1796 | status = ocfs2_read_block(alloc_inode, bg_blkno, &group_bh); |
| 1746 | alloc_inode); | ||
| 1747 | if (status < 0) { | 1797 | if (status < 0) { |
| 1748 | mlog_errno(status); | 1798 | mlog_errno(status); |
| 1749 | goto bail; | 1799 | goto bail; |
| @@ -1784,8 +1834,7 @@ int ocfs2_free_suballoc_bits(handle_t *handle, | |||
| 1784 | } | 1834 | } |
| 1785 | 1835 | ||
| 1786 | bail: | 1836 | bail: |
| 1787 | if (group_bh) | 1837 | brelse(group_bh); |
| 1788 | brelse(group_bh); | ||
| 1789 | 1838 | ||
| 1790 | mlog_exit(status); | 1839 | mlog_exit(status); |
| 1791 | return status; | 1840 | return status; |
| @@ -1838,9 +1887,15 @@ int ocfs2_free_clusters(handle_t *handle, | |||
| 1838 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, | 1887 | status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh, |
| 1839 | bg_start_bit, bg_blkno, | 1888 | bg_start_bit, bg_blkno, |
| 1840 | num_clusters); | 1889 | num_clusters); |
| 1841 | if (status < 0) | 1890 | if (status < 0) { |
| 1842 | mlog_errno(status); | 1891 | mlog_errno(status); |
| 1892 | goto out; | ||
| 1893 | } | ||
| 1843 | 1894 | ||
| 1895 | ocfs2_local_alloc_seen_free_bits(OCFS2_SB(bitmap_inode->i_sb), | ||
| 1896 | num_clusters); | ||
| 1897 | |||
| 1898 | out: | ||
| 1844 | mlog_exit(status); | 1899 | mlog_exit(status); |
| 1845 | return status; | 1900 | return status; |
| 1846 | } | 1901 | } |
| @@ -1891,3 +1946,84 @@ static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe) | |||
| 1891 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); | 1946 | (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno); |
| 1892 | } | 1947 | } |
| 1893 | } | 1948 | } |
| 1949 | |||
| 1950 | /* | ||
| 1951 | * For a given allocation, determine which allocators will need to be | ||
| 1952 | * accessed, and lock them, reserving the appropriate number of bits. | ||
| 1953 | * | ||
| 1954 | * Sparse file systems call this from ocfs2_write_begin_nolock() | ||
| 1955 | * and ocfs2_allocate_unwritten_extents(). | ||
| 1956 | * | ||
| 1957 | * File systems which don't support holes call this from | ||
| 1958 | * ocfs2_extend_allocation(). | ||
| 1959 | */ | ||
| 1960 | int ocfs2_lock_allocators(struct inode *inode, | ||
| 1961 | struct ocfs2_extent_tree *et, | ||
| 1962 | u32 clusters_to_add, u32 extents_to_split, | ||
| 1963 | struct ocfs2_alloc_context **data_ac, | ||
| 1964 | struct ocfs2_alloc_context **meta_ac) | ||
| 1965 | { | ||
| 1966 | int ret = 0, num_free_extents; | ||
| 1967 | unsigned int max_recs_needed = clusters_to_add + 2 * extents_to_split; | ||
| 1968 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1969 | |||
| 1970 | *meta_ac = NULL; | ||
| 1971 | if (data_ac) | ||
| 1972 | *data_ac = NULL; | ||
| 1973 | |||
| 1974 | BUG_ON(clusters_to_add != 0 && data_ac == NULL); | ||
| 1975 | |||
| 1976 | num_free_extents = ocfs2_num_free_extents(osb, inode, et); | ||
| 1977 | if (num_free_extents < 0) { | ||
| 1978 | ret = num_free_extents; | ||
| 1979 | mlog_errno(ret); | ||
| 1980 | goto out; | ||
| 1981 | } | ||
| 1982 | |||
| 1983 | /* | ||
| 1984 | * Sparse allocation file systems need to be more conservative | ||
| 1985 | * with reserving room for expansion - the actual allocation | ||
| 1986 | * happens while we've got a journal handle open so re-taking | ||
| 1987 | * a cluster lock (because we ran out of room for another | ||
| 1988 | * extent) will violate ordering rules. | ||
| 1989 | * | ||
| 1990 | * Most of the time we'll only be seeing this 1 cluster at a time | ||
| 1991 | * anyway. | ||
| 1992 | * | ||
| 1993 | * Always lock for any unwritten extents - we might want to | ||
| 1994 | * add blocks during a split. | ||
| 1995 | */ | ||
| 1996 | if (!num_free_extents || | ||
| 1997 | (ocfs2_sparse_alloc(osb) && num_free_extents < max_recs_needed)) { | ||
| 1998 | ret = ocfs2_reserve_new_metadata(osb, et->et_root_el, meta_ac); | ||
| 1999 | if (ret < 0) { | ||
| 2000 | if (ret != -ENOSPC) | ||
| 2001 | mlog_errno(ret); | ||
| 2002 | goto out; | ||
| 2003 | } | ||
| 2004 | } | ||
| 2005 | |||
| 2006 | if (clusters_to_add == 0) | ||
| 2007 | goto out; | ||
| 2008 | |||
| 2009 | ret = ocfs2_reserve_clusters(osb, clusters_to_add, data_ac); | ||
| 2010 | if (ret < 0) { | ||
| 2011 | if (ret != -ENOSPC) | ||
| 2012 | mlog_errno(ret); | ||
| 2013 | goto out; | ||
| 2014 | } | ||
| 2015 | |||
| 2016 | out: | ||
| 2017 | if (ret) { | ||
| 2018 | if (*meta_ac) { | ||
| 2019 | ocfs2_free_alloc_context(*meta_ac); | ||
| 2020 | *meta_ac = NULL; | ||
| 2021 | } | ||
| 2022 | |||
| 2023 | /* | ||
| 2024 | * We cannot have an error and a non null *data_ac. | ||
| 2025 | */ | ||
| 2026 | } | ||
| 2027 | |||
| 2028 | return ret; | ||
| 2029 | } | ||
diff --git a/fs/ocfs2/suballoc.h b/fs/ocfs2/suballoc.h index 544c600662bd..4df159d8f450 100644 --- a/fs/ocfs2/suballoc.h +++ b/fs/ocfs2/suballoc.h | |||
| @@ -28,10 +28,11 @@ | |||
| 28 | 28 | ||
| 29 | typedef int (group_search_t)(struct inode *, | 29 | typedef int (group_search_t)(struct inode *, |
| 30 | struct buffer_head *, | 30 | struct buffer_head *, |
| 31 | u32, | 31 | u32, /* bits_wanted */ |
| 32 | u32, | 32 | u32, /* min_bits */ |
| 33 | u16 *, | 33 | u64, /* max_block */ |
| 34 | u16 *); | 34 | u16 *, /* *bit_off */ |
| 35 | u16 *); /* *bits_found */ | ||
| 35 | 36 | ||
| 36 | struct ocfs2_alloc_context { | 37 | struct ocfs2_alloc_context { |
| 37 | struct inode *ac_inode; /* which bitmap are we allocating from? */ | 38 | struct inode *ac_inode; /* which bitmap are we allocating from? */ |
| @@ -51,6 +52,8 @@ struct ocfs2_alloc_context { | |||
| 51 | group_search_t *ac_group_search; | 52 | group_search_t *ac_group_search; |
| 52 | 53 | ||
| 53 | u64 ac_last_group; | 54 | u64 ac_last_group; |
| 55 | u64 ac_max_block; /* Highest block number to allocate. 0 is | ||
| 56 | is the same as ~0 - unlimited */ | ||
| 54 | }; | 57 | }; |
| 55 | 58 | ||
| 56 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); | 59 | void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac); |
| @@ -59,9 +62,17 @@ static inline int ocfs2_alloc_context_bits_left(struct ocfs2_alloc_context *ac) | |||
| 59 | return ac->ac_bits_wanted - ac->ac_bits_given; | 62 | return ac->ac_bits_wanted - ac->ac_bits_given; |
| 60 | } | 63 | } |
| 61 | 64 | ||
| 65 | /* | ||
| 66 | * Please note that the caller must make sure that root_el is the root | ||
| 67 | * of extent tree. So for an inode, it should be &fe->id2.i_list. Otherwise | ||
| 68 | * the result may be wrong. | ||
| 69 | */ | ||
| 62 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, | 70 | int ocfs2_reserve_new_metadata(struct ocfs2_super *osb, |
| 63 | struct ocfs2_dinode *fe, | 71 | struct ocfs2_extent_list *root_el, |
| 64 | struct ocfs2_alloc_context **ac); | 72 | struct ocfs2_alloc_context **ac); |
| 73 | int ocfs2_reserve_new_metadata_blocks(struct ocfs2_super *osb, | ||
| 74 | int blocks, | ||
| 75 | struct ocfs2_alloc_context **ac); | ||
| 65 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, | 76 | int ocfs2_reserve_new_inode(struct ocfs2_super *osb, |
| 66 | struct ocfs2_alloc_context **ac); | 77 | struct ocfs2_alloc_context **ac); |
| 67 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, | 78 | int ocfs2_reserve_clusters(struct ocfs2_super *osb, |
| @@ -147,6 +158,7 @@ static inline int ocfs2_is_cluster_bitmap(struct inode *inode) | |||
| 147 | * apis above. */ | 158 | * apis above. */ |
| 148 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, | 159 | int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb, |
| 149 | struct ocfs2_alloc_context *ac); | 160 | struct ocfs2_alloc_context *ac); |
| 161 | void ocfs2_free_ac_resource(struct ocfs2_alloc_context *ac); | ||
| 150 | 162 | ||
| 151 | /* given a cluster offset, calculate which block group it belongs to | 163 | /* given a cluster offset, calculate which block group it belongs to |
| 152 | * and return that block offset. */ | 164 | * and return that block offset. */ |
| @@ -156,4 +168,8 @@ u64 ocfs2_which_cluster_group(struct inode *inode, u32 cluster); | |||
| 156 | int ocfs2_check_group_descriptor(struct super_block *sb, | 168 | int ocfs2_check_group_descriptor(struct super_block *sb, |
| 157 | struct ocfs2_dinode *di, | 169 | struct ocfs2_dinode *di, |
| 158 | struct ocfs2_group_desc *gd); | 170 | struct ocfs2_group_desc *gd); |
| 171 | int ocfs2_lock_allocators(struct inode *inode, struct ocfs2_extent_tree *et, | ||
| 172 | u32 clusters_to_add, u32 extents_to_split, | ||
| 173 | struct ocfs2_alloc_context **data_ac, | ||
| 174 | struct ocfs2_alloc_context **meta_ac); | ||
| 159 | #endif /* _CHAINALLOC_H_ */ | 175 | #endif /* _CHAINALLOC_H_ */ |
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 88255d3f52b4..304b63ac78cf 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c | |||
| @@ -64,6 +64,7 @@ | |||
| 64 | #include "sysfile.h" | 64 | #include "sysfile.h" |
| 65 | #include "uptodate.h" | 65 | #include "uptodate.h" |
| 66 | #include "ver.h" | 66 | #include "ver.h" |
| 67 | #include "xattr.h" | ||
| 67 | 68 | ||
| 68 | #include "buffer_head_io.h" | 69 | #include "buffer_head_io.h" |
| 69 | 70 | ||
| @@ -154,10 +155,13 @@ enum { | |||
| 154 | Opt_localalloc, | 155 | Opt_localalloc, |
| 155 | Opt_localflocks, | 156 | Opt_localflocks, |
| 156 | Opt_stack, | 157 | Opt_stack, |
| 158 | Opt_user_xattr, | ||
| 159 | Opt_nouser_xattr, | ||
| 160 | Opt_inode64, | ||
| 157 | Opt_err, | 161 | Opt_err, |
| 158 | }; | 162 | }; |
| 159 | 163 | ||
| 160 | static match_table_t tokens = { | 164 | static const match_table_t tokens = { |
| 161 | {Opt_barrier, "barrier=%u"}, | 165 | {Opt_barrier, "barrier=%u"}, |
| 162 | {Opt_err_panic, "errors=panic"}, | 166 | {Opt_err_panic, "errors=panic"}, |
| 163 | {Opt_err_ro, "errors=remount-ro"}, | 167 | {Opt_err_ro, "errors=remount-ro"}, |
| @@ -173,6 +177,9 @@ static match_table_t tokens = { | |||
| 173 | {Opt_localalloc, "localalloc=%d"}, | 177 | {Opt_localalloc, "localalloc=%d"}, |
| 174 | {Opt_localflocks, "localflocks"}, | 178 | {Opt_localflocks, "localflocks"}, |
| 175 | {Opt_stack, "cluster_stack=%s"}, | 179 | {Opt_stack, "cluster_stack=%s"}, |
| 180 | {Opt_user_xattr, "user_xattr"}, | ||
| 181 | {Opt_nouser_xattr, "nouser_xattr"}, | ||
| 182 | {Opt_inode64, "inode64"}, | ||
| 176 | {Opt_err, NULL} | 183 | {Opt_err, NULL} |
| 177 | }; | 184 | }; |
| 178 | 185 | ||
| @@ -205,10 +212,11 @@ static int ocfs2_sync_fs(struct super_block *sb, int wait) | |||
| 205 | ocfs2_schedule_truncate_log_flush(osb, 0); | 212 | ocfs2_schedule_truncate_log_flush(osb, 0); |
| 206 | } | 213 | } |
| 207 | 214 | ||
| 208 | if (journal_start_commit(OCFS2_SB(sb)->journal->j_journal, &target)) { | 215 | if (jbd2_journal_start_commit(OCFS2_SB(sb)->journal->j_journal, |
| 216 | &target)) { | ||
| 209 | if (wait) | 217 | if (wait) |
| 210 | log_wait_commit(OCFS2_SB(sb)->journal->j_journal, | 218 | jbd2_log_wait_commit(OCFS2_SB(sb)->journal->j_journal, |
| 211 | target); | 219 | target); |
| 212 | } | 220 | } |
| 213 | return 0; | 221 | return 0; |
| 214 | } | 222 | } |
| @@ -325,6 +333,7 @@ static struct inode *ocfs2_alloc_inode(struct super_block *sb) | |||
| 325 | if (!oi) | 333 | if (!oi) |
| 326 | return NULL; | 334 | return NULL; |
| 327 | 335 | ||
| 336 | jbd2_journal_init_jbd_inode(&oi->ip_jinode, &oi->vfs_inode); | ||
| 328 | return &oi->vfs_inode; | 337 | return &oi->vfs_inode; |
| 329 | } | 338 | } |
| 330 | 339 | ||
| @@ -406,6 +415,15 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) | |||
| 406 | goto out; | 415 | goto out; |
| 407 | } | 416 | } |
| 408 | 417 | ||
| 418 | /* Probably don't want this on remount; it might | ||
| 419 | * mess with other nodes */ | ||
| 420 | if (!(osb->s_mount_opt & OCFS2_MOUNT_INODE64) && | ||
| 421 | (parsed_options.mount_opt & OCFS2_MOUNT_INODE64)) { | ||
| 422 | ret = -EINVAL; | ||
| 423 | mlog(ML_ERROR, "Cannot enable inode64 on remount\n"); | ||
| 424 | goto out; | ||
| 425 | } | ||
| 426 | |||
| 409 | /* We're going to/from readonly mode. */ | 427 | /* We're going to/from readonly mode. */ |
| 410 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { | 428 | if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY)) { |
| 411 | /* Lock here so the check of HARD_RO and the potential | 429 | /* Lock here so the check of HARD_RO and the potential |
| @@ -637,7 +655,8 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 637 | osb->s_atime_quantum = parsed_options.atime_quantum; | 655 | osb->s_atime_quantum = parsed_options.atime_quantum; |
| 638 | osb->preferred_slot = parsed_options.slot; | 656 | osb->preferred_slot = parsed_options.slot; |
| 639 | osb->osb_commit_interval = parsed_options.commit_interval; | 657 | osb->osb_commit_interval = parsed_options.commit_interval; |
| 640 | osb->local_alloc_size = parsed_options.localalloc_opt; | 658 | osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt); |
| 659 | osb->local_alloc_bits = osb->local_alloc_default_bits; | ||
| 641 | 660 | ||
| 642 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); | 661 | status = ocfs2_verify_userspace_stack(osb, &parsed_options); |
| 643 | if (status) | 662 | if (status) |
| @@ -743,8 +762,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) | |||
| 743 | return status; | 762 | return status; |
| 744 | 763 | ||
| 745 | read_super_error: | 764 | read_super_error: |
| 746 | if (bh != NULL) | 765 | brelse(bh); |
| 747 | brelse(bh); | ||
| 748 | 766 | ||
| 749 | if (inode) | 767 | if (inode) |
| 750 | iput(inode); | 768 | iput(inode); |
| @@ -847,6 +865,12 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 847 | case Opt_data_writeback: | 865 | case Opt_data_writeback: |
| 848 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; | 866 | mopt->mount_opt |= OCFS2_MOUNT_DATA_WRITEBACK; |
| 849 | break; | 867 | break; |
| 868 | case Opt_user_xattr: | ||
| 869 | mopt->mount_opt &= ~OCFS2_MOUNT_NOUSERXATTR; | ||
| 870 | break; | ||
| 871 | case Opt_nouser_xattr: | ||
| 872 | mopt->mount_opt |= OCFS2_MOUNT_NOUSERXATTR; | ||
| 873 | break; | ||
| 850 | case Opt_atime_quantum: | 874 | case Opt_atime_quantum: |
| 851 | if (match_int(&args[0], &option)) { | 875 | if (match_int(&args[0], &option)) { |
| 852 | status = 0; | 876 | status = 0; |
| @@ -873,7 +897,7 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 873 | if (option < 0) | 897 | if (option < 0) |
| 874 | return 0; | 898 | return 0; |
| 875 | if (option == 0) | 899 | if (option == 0) |
| 876 | option = JBD_DEFAULT_MAX_COMMIT_AGE; | 900 | option = JBD2_DEFAULT_MAX_COMMIT_AGE; |
| 877 | mopt->commit_interval = HZ * option; | 901 | mopt->commit_interval = HZ * option; |
| 878 | break; | 902 | break; |
| 879 | case Opt_localalloc: | 903 | case Opt_localalloc: |
| @@ -918,6 +942,9 @@ static int ocfs2_parse_options(struct super_block *sb, | |||
| 918 | OCFS2_STACK_LABEL_LEN); | 942 | OCFS2_STACK_LABEL_LEN); |
| 919 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; | 943 | mopt->cluster_stack[OCFS2_STACK_LABEL_LEN] = '\0'; |
| 920 | break; | 944 | break; |
| 945 | case Opt_inode64: | ||
| 946 | mopt->mount_opt |= OCFS2_MOUNT_INODE64; | ||
| 947 | break; | ||
| 921 | default: | 948 | default: |
| 922 | mlog(ML_ERROR, | 949 | mlog(ML_ERROR, |
| 923 | "Unrecognized mount option \"%s\" " | 950 | "Unrecognized mount option \"%s\" " |
| @@ -938,6 +965,7 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 938 | { | 965 | { |
| 939 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); | 966 | struct ocfs2_super *osb = OCFS2_SB(mnt->mnt_sb); |
| 940 | unsigned long opts = osb->s_mount_opt; | 967 | unsigned long opts = osb->s_mount_opt; |
| 968 | unsigned int local_alloc_megs; | ||
| 941 | 969 | ||
| 942 | if (opts & OCFS2_MOUNT_HB_LOCAL) | 970 | if (opts & OCFS2_MOUNT_HB_LOCAL) |
| 943 | seq_printf(s, ",_netdev,heartbeat=local"); | 971 | seq_printf(s, ",_netdev,heartbeat=local"); |
| @@ -970,8 +998,9 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 970 | seq_printf(s, ",commit=%u", | 998 | seq_printf(s, ",commit=%u", |
| 971 | (unsigned) (osb->osb_commit_interval / HZ)); | 999 | (unsigned) (osb->osb_commit_interval / HZ)); |
| 972 | 1000 | ||
| 973 | if (osb->local_alloc_size != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) | 1001 | local_alloc_megs = osb->local_alloc_bits >> (20 - osb->s_clustersize_bits); |
| 974 | seq_printf(s, ",localalloc=%d", osb->local_alloc_size); | 1002 | if (local_alloc_megs != OCFS2_DEFAULT_LOCAL_ALLOC_SIZE) |
| 1003 | seq_printf(s, ",localalloc=%d", local_alloc_megs); | ||
| 975 | 1004 | ||
| 976 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) | 1005 | if (opts & OCFS2_MOUNT_LOCALFLOCKS) |
| 977 | seq_printf(s, ",localflocks,"); | 1006 | seq_printf(s, ",localflocks,"); |
| @@ -980,6 +1009,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 980 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, | 1009 | seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN, |
| 981 | osb->osb_cluster_stack); | 1010 | osb->osb_cluster_stack); |
| 982 | 1011 | ||
| 1012 | if (opts & OCFS2_MOUNT_NOUSERXATTR) | ||
| 1013 | seq_printf(s, ",nouser_xattr"); | ||
| 1014 | else | ||
| 1015 | seq_printf(s, ",user_xattr"); | ||
| 1016 | |||
| 1017 | if (opts & OCFS2_MOUNT_INODE64) | ||
| 1018 | seq_printf(s, ",inode64"); | ||
| 1019 | |||
| 983 | return 0; | 1020 | return 0; |
| 984 | } | 1021 | } |
| 985 | 1022 | ||
| @@ -1132,6 +1169,7 @@ static void ocfs2_inode_init_once(void *data) | |||
| 1132 | oi->ip_dir_start_lookup = 0; | 1169 | oi->ip_dir_start_lookup = 0; |
| 1133 | 1170 | ||
| 1134 | init_rwsem(&oi->ip_alloc_sem); | 1171 | init_rwsem(&oi->ip_alloc_sem); |
| 1172 | init_rwsem(&oi->ip_xattr_sem); | ||
| 1135 | mutex_init(&oi->ip_io_mutex); | 1173 | mutex_init(&oi->ip_io_mutex); |
| 1136 | 1174 | ||
| 1137 | oi->ip_blkno = 0ULL; | 1175 | oi->ip_blkno = 0ULL; |
| @@ -1375,6 +1413,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1375 | sb->s_fs_info = osb; | 1413 | sb->s_fs_info = osb; |
| 1376 | sb->s_op = &ocfs2_sops; | 1414 | sb->s_op = &ocfs2_sops; |
| 1377 | sb->s_export_op = &ocfs2_export_ops; | 1415 | sb->s_export_op = &ocfs2_export_ops; |
| 1416 | sb->s_xattr = ocfs2_xattr_handlers; | ||
| 1378 | sb->s_time_gran = 1; | 1417 | sb->s_time_gran = 1; |
| 1379 | sb->s_flags |= MS_NOATIME; | 1418 | sb->s_flags |= MS_NOATIME; |
| 1380 | /* this is needed to support O_LARGEFILE */ | 1419 | /* this is needed to support O_LARGEFILE */ |
| @@ -1421,8 +1460,12 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1421 | 1460 | ||
| 1422 | osb->slot_num = OCFS2_INVALID_SLOT; | 1461 | osb->slot_num = OCFS2_INVALID_SLOT; |
| 1423 | 1462 | ||
| 1463 | osb->s_xattr_inline_size = le16_to_cpu( | ||
| 1464 | di->id2.i_super.s_xattr_inline_size); | ||
| 1465 | |||
| 1424 | osb->local_alloc_state = OCFS2_LA_UNUSED; | 1466 | osb->local_alloc_state = OCFS2_LA_UNUSED; |
| 1425 | osb->local_alloc_bh = NULL; | 1467 | osb->local_alloc_bh = NULL; |
| 1468 | INIT_DELAYED_WORK(&osb->la_enable_wq, ocfs2_la_enable_worker); | ||
| 1426 | 1469 | ||
| 1427 | init_waitqueue_head(&osb->osb_mount_event); | 1470 | init_waitqueue_head(&osb->osb_mount_event); |
| 1428 | 1471 | ||
| @@ -1568,6 +1611,7 @@ static int ocfs2_initialize_super(struct super_block *sb, | |||
| 1568 | osb->first_cluster_group_blkno = | 1611 | osb->first_cluster_group_blkno = |
| 1569 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); | 1612 | le64_to_cpu(di->id2.i_super.s_first_cluster_group); |
| 1570 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); | 1613 | osb->fs_generation = le32_to_cpu(di->i_fs_generation); |
| 1614 | osb->uuid_hash = le32_to_cpu(di->id2.i_super.s_uuid_hash); | ||
| 1571 | mlog(0, "vol_label: %s\n", osb->vol_label); | 1615 | mlog(0, "vol_label: %s\n", osb->vol_label); |
| 1572 | mlog(0, "uuid: %s\n", osb->uuid_str); | 1616 | mlog(0, "uuid: %s\n", osb->uuid_str); |
| 1573 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", | 1617 | mlog(0, "root_blkno=%llu, system_dir_blkno=%llu\n", |
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index ba9dbb51d25b..cbd03dfdc7b9 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c | |||
| @@ -50,6 +50,7 @@ | |||
| 50 | #include "inode.h" | 50 | #include "inode.h" |
| 51 | #include "journal.h" | 51 | #include "journal.h" |
| 52 | #include "symlink.h" | 52 | #include "symlink.h" |
| 53 | #include "xattr.h" | ||
| 53 | 54 | ||
| 54 | #include "buffer_head_io.h" | 55 | #include "buffer_head_io.h" |
| 55 | 56 | ||
| @@ -83,11 +84,7 @@ static char *ocfs2_fast_symlink_getlink(struct inode *inode, | |||
| 83 | 84 | ||
| 84 | mlog_entry_void(); | 85 | mlog_entry_void(); |
| 85 | 86 | ||
| 86 | status = ocfs2_read_block(OCFS2_SB(inode->i_sb), | 87 | status = ocfs2_read_block(inode, OCFS2_I(inode)->ip_blkno, bh); |
| 87 | OCFS2_I(inode)->ip_blkno, | ||
| 88 | bh, | ||
| 89 | OCFS2_BH_CACHED, | ||
| 90 | inode); | ||
| 91 | if (status < 0) { | 88 | if (status < 0) { |
| 92 | mlog_errno(status); | 89 | mlog_errno(status); |
| 93 | link = ERR_PTR(status); | 90 | link = ERR_PTR(status); |
| @@ -157,8 +154,7 @@ bail: | |||
| 157 | kunmap(page); | 154 | kunmap(page); |
| 158 | page_cache_release(page); | 155 | page_cache_release(page); |
| 159 | } | 156 | } |
| 160 | if (bh) | 157 | brelse(bh); |
| 161 | brelse(bh); | ||
| 162 | 158 | ||
| 163 | return ERR_PTR(status); | 159 | return ERR_PTR(status); |
| 164 | } | 160 | } |
| @@ -168,10 +164,18 @@ const struct inode_operations ocfs2_symlink_inode_operations = { | |||
| 168 | .follow_link = ocfs2_follow_link, | 164 | .follow_link = ocfs2_follow_link, |
| 169 | .getattr = ocfs2_getattr, | 165 | .getattr = ocfs2_getattr, |
| 170 | .setattr = ocfs2_setattr, | 166 | .setattr = ocfs2_setattr, |
| 167 | .setxattr = generic_setxattr, | ||
| 168 | .getxattr = generic_getxattr, | ||
| 169 | .listxattr = ocfs2_listxattr, | ||
| 170 | .removexattr = generic_removexattr, | ||
| 171 | }; | 171 | }; |
| 172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { | 172 | const struct inode_operations ocfs2_fast_symlink_inode_operations = { |
| 173 | .readlink = ocfs2_readlink, | 173 | .readlink = ocfs2_readlink, |
| 174 | .follow_link = ocfs2_follow_link, | 174 | .follow_link = ocfs2_follow_link, |
| 175 | .getattr = ocfs2_getattr, | 175 | .getattr = ocfs2_getattr, |
| 176 | .setattr = ocfs2_setattr, | 176 | .setattr = ocfs2_setattr, |
| 177 | .setxattr = generic_setxattr, | ||
| 178 | .getxattr = generic_getxattr, | ||
| 179 | .listxattr = ocfs2_listxattr, | ||
| 180 | .removexattr = generic_removexattr, | ||
| 177 | }; | 181 | }; |
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c index 4da8851f2b23..187b99ff0368 100644 --- a/fs/ocfs2/uptodate.c +++ b/fs/ocfs2/uptodate.c | |||
| @@ -53,7 +53,11 @@ | |||
| 53 | #include <linux/highmem.h> | 53 | #include <linux/highmem.h> |
| 54 | #include <linux/buffer_head.h> | 54 | #include <linux/buffer_head.h> |
| 55 | #include <linux/rbtree.h> | 55 | #include <linux/rbtree.h> |
| 56 | #include <linux/jbd.h> | 56 | #ifndef CONFIG_OCFS2_COMPAT_JBD |
| 57 | # include <linux/jbd2.h> | ||
| 58 | #else | ||
| 59 | # include <linux/jbd.h> | ||
| 60 | #endif | ||
| 57 | 61 | ||
| 58 | #define MLOG_MASK_PREFIX ML_UPTODATE | 62 | #define MLOG_MASK_PREFIX ML_UPTODATE |
| 59 | 63 | ||
| @@ -511,14 +515,10 @@ static void ocfs2_remove_metadata_tree(struct ocfs2_caching_info *ci, | |||
| 511 | ci->ci_num_cached--; | 515 | ci->ci_num_cached--; |
| 512 | } | 516 | } |
| 513 | 517 | ||
| 514 | /* Called when we remove a chunk of metadata from an inode. We don't | 518 | static void ocfs2_remove_block_from_cache(struct inode *inode, |
| 515 | * bother reverting things to an inlined array in the case of a remove | 519 | sector_t block) |
| 516 | * which moves us back under the limit. */ | ||
| 517 | void ocfs2_remove_from_cache(struct inode *inode, | ||
| 518 | struct buffer_head *bh) | ||
| 519 | { | 520 | { |
| 520 | int index; | 521 | int index; |
| 521 | sector_t block = bh->b_blocknr; | ||
| 522 | struct ocfs2_meta_cache_item *item = NULL; | 522 | struct ocfs2_meta_cache_item *item = NULL; |
| 523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | 523 | struct ocfs2_inode_info *oi = OCFS2_I(inode); |
| 524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; | 524 | struct ocfs2_caching_info *ci = &oi->ip_metadata_cache; |
| @@ -544,6 +544,30 @@ void ocfs2_remove_from_cache(struct inode *inode, | |||
| 544 | kmem_cache_free(ocfs2_uptodate_cachep, item); | 544 | kmem_cache_free(ocfs2_uptodate_cachep, item); |
| 545 | } | 545 | } |
| 546 | 546 | ||
| 547 | /* | ||
| 548 | * Called when we remove a chunk of metadata from an inode. We don't | ||
| 549 | * bother reverting things to an inlined array in the case of a remove | ||
| 550 | * which moves us back under the limit. | ||
| 551 | */ | ||
| 552 | void ocfs2_remove_from_cache(struct inode *inode, | ||
| 553 | struct buffer_head *bh) | ||
| 554 | { | ||
| 555 | sector_t block = bh->b_blocknr; | ||
| 556 | |||
| 557 | ocfs2_remove_block_from_cache(inode, block); | ||
| 558 | } | ||
| 559 | |||
| 560 | /* Called when we remove xattr clusters from an inode. */ | ||
| 561 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
| 562 | sector_t block, | ||
| 563 | u32 c_len) | ||
| 564 | { | ||
| 565 | unsigned int i, b_len = ocfs2_clusters_to_blocks(inode->i_sb, 1) * c_len; | ||
| 566 | |||
| 567 | for (i = 0; i < b_len; i++, block++) | ||
| 568 | ocfs2_remove_block_from_cache(inode, block); | ||
| 569 | } | ||
| 570 | |||
| 547 | int __init init_ocfs2_uptodate_cache(void) | 571 | int __init init_ocfs2_uptodate_cache(void) |
| 548 | { | 572 | { |
| 549 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", | 573 | ocfs2_uptodate_cachep = kmem_cache_create("ocfs2_uptodate", |
diff --git a/fs/ocfs2/uptodate.h b/fs/ocfs2/uptodate.h index 2e73206059a8..531b4b3a0c47 100644 --- a/fs/ocfs2/uptodate.h +++ b/fs/ocfs2/uptodate.h | |||
| @@ -40,6 +40,9 @@ void ocfs2_set_new_buffer_uptodate(struct inode *inode, | |||
| 40 | struct buffer_head *bh); | 40 | struct buffer_head *bh); |
| 41 | void ocfs2_remove_from_cache(struct inode *inode, | 41 | void ocfs2_remove_from_cache(struct inode *inode, |
| 42 | struct buffer_head *bh); | 42 | struct buffer_head *bh); |
| 43 | void ocfs2_remove_xattr_clusters_from_cache(struct inode *inode, | ||
| 44 | sector_t block, | ||
| 45 | u32 c_len); | ||
| 43 | int ocfs2_buffer_read_ahead(struct inode *inode, | 46 | int ocfs2_buffer_read_ahead(struct inode *inode, |
| 44 | struct buffer_head *bh); | 47 | struct buffer_head *bh); |
| 45 | 48 | ||
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c new file mode 100644 index 000000000000..802c41492214 --- /dev/null +++ b/fs/ocfs2/xattr.c | |||
| @@ -0,0 +1,4832 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * xattr.c | ||
| 5 | * | ||
| 6 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 7 | * | ||
| 8 | * CREDITS: | ||
| 9 | * Lots of code in this file is taken from ext3. | ||
| 10 | * | ||
| 11 | * This program is free software; you can redistribute it and/or | ||
| 12 | * modify it under the terms of the GNU General Public | ||
| 13 | * License as published by the Free Software Foundation; either | ||
| 14 | * version 2 of the License, or (at your option) any later version. | ||
| 15 | * | ||
| 16 | * This program is distributed in the hope that it will be useful, | ||
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 19 | * General Public License for more details. | ||
| 20 | * | ||
| 21 | * You should have received a copy of the GNU General Public | ||
| 22 | * License along with this program; if not, write to the | ||
| 23 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 24 | * Boston, MA 021110-1307, USA. | ||
| 25 | */ | ||
| 26 | |||
| 27 | #include <linux/capability.h> | ||
| 28 | #include <linux/fs.h> | ||
| 29 | #include <linux/types.h> | ||
| 30 | #include <linux/slab.h> | ||
| 31 | #include <linux/highmem.h> | ||
| 32 | #include <linux/pagemap.h> | ||
| 33 | #include <linux/uio.h> | ||
| 34 | #include <linux/sched.h> | ||
| 35 | #include <linux/splice.h> | ||
| 36 | #include <linux/mount.h> | ||
| 37 | #include <linux/writeback.h> | ||
| 38 | #include <linux/falloc.h> | ||
| 39 | #include <linux/sort.h> | ||
| 40 | #include <linux/init.h> | ||
| 41 | #include <linux/module.h> | ||
| 42 | #include <linux/string.h> | ||
| 43 | |||
| 44 | #define MLOG_MASK_PREFIX ML_XATTR | ||
| 45 | #include <cluster/masklog.h> | ||
| 46 | |||
| 47 | #include "ocfs2.h" | ||
| 48 | #include "alloc.h" | ||
| 49 | #include "dlmglue.h" | ||
| 50 | #include "file.h" | ||
| 51 | #include "symlink.h" | ||
| 52 | #include "sysfile.h" | ||
| 53 | #include "inode.h" | ||
| 54 | #include "journal.h" | ||
| 55 | #include "ocfs2_fs.h" | ||
| 56 | #include "suballoc.h" | ||
| 57 | #include "uptodate.h" | ||
| 58 | #include "buffer_head_io.h" | ||
| 59 | #include "super.h" | ||
| 60 | #include "xattr.h" | ||
| 61 | |||
| 62 | |||
| 63 | struct ocfs2_xattr_def_value_root { | ||
| 64 | struct ocfs2_xattr_value_root xv; | ||
| 65 | struct ocfs2_extent_rec er; | ||
| 66 | }; | ||
| 67 | |||
| 68 | struct ocfs2_xattr_bucket { | ||
| 69 | struct buffer_head *bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET]; | ||
| 70 | struct ocfs2_xattr_header *xh; | ||
| 71 | }; | ||
| 72 | |||
| 73 | #define OCFS2_XATTR_ROOT_SIZE (sizeof(struct ocfs2_xattr_def_value_root)) | ||
| 74 | #define OCFS2_XATTR_INLINE_SIZE 80 | ||
| 75 | |||
| 76 | static struct ocfs2_xattr_def_value_root def_xv = { | ||
| 77 | .xv.xr_list.l_count = cpu_to_le16(1), | ||
| 78 | }; | ||
| 79 | |||
| 80 | struct xattr_handler *ocfs2_xattr_handlers[] = { | ||
| 81 | &ocfs2_xattr_user_handler, | ||
| 82 | &ocfs2_xattr_trusted_handler, | ||
| 83 | NULL | ||
| 84 | }; | ||
| 85 | |||
| 86 | static struct xattr_handler *ocfs2_xattr_handler_map[] = { | ||
| 87 | [OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler, | ||
| 88 | [OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler, | ||
| 89 | }; | ||
| 90 | |||
| 91 | struct ocfs2_xattr_info { | ||
| 92 | int name_index; | ||
| 93 | const char *name; | ||
| 94 | const void *value; | ||
| 95 | size_t value_len; | ||
| 96 | }; | ||
| 97 | |||
| 98 | struct ocfs2_xattr_search { | ||
| 99 | struct buffer_head *inode_bh; | ||
| 100 | /* | ||
| 101 | * xattr_bh point to the block buffer head which has extended attribute | ||
| 102 | * when extended attribute in inode, xattr_bh is equal to inode_bh. | ||
| 103 | */ | ||
| 104 | struct buffer_head *xattr_bh; | ||
| 105 | struct ocfs2_xattr_header *header; | ||
| 106 | struct ocfs2_xattr_bucket bucket; | ||
| 107 | void *base; | ||
| 108 | void *end; | ||
| 109 | struct ocfs2_xattr_entry *here; | ||
| 110 | int not_found; | ||
| 111 | }; | ||
| 112 | |||
| 113 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
| 114 | struct ocfs2_xattr_header *xh, | ||
| 115 | int index, | ||
| 116 | int *block_off, | ||
| 117 | int *new_offset); | ||
| 118 | |||
| 119 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
| 120 | struct buffer_head *root_bh, | ||
| 121 | int name_index, | ||
| 122 | const char *name, | ||
| 123 | struct ocfs2_xattr_search *xs); | ||
| 124 | |||
| 125 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
| 126 | struct ocfs2_xattr_tree_root *xt, | ||
| 127 | char *buffer, | ||
| 128 | size_t buffer_size); | ||
| 129 | |||
| 130 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
| 131 | struct ocfs2_xattr_search *xs); | ||
| 132 | |||
| 133 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
| 134 | struct ocfs2_xattr_info *xi, | ||
| 135 | struct ocfs2_xattr_search *xs); | ||
| 136 | |||
| 137 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
| 138 | struct buffer_head *xb_bh); | ||
| 139 | |||
| 140 | static inline const char *ocfs2_xattr_prefix(int name_index) | ||
| 141 | { | ||
| 142 | struct xattr_handler *handler = NULL; | ||
| 143 | |||
| 144 | if (name_index > 0 && name_index < OCFS2_XATTR_MAX) | ||
| 145 | handler = ocfs2_xattr_handler_map[name_index]; | ||
| 146 | |||
| 147 | return handler ? handler->prefix : NULL; | ||
| 148 | } | ||
| 149 | |||
| 150 | static u32 ocfs2_xattr_name_hash(struct inode *inode, | ||
| 151 | const char *name, | ||
| 152 | int name_len) | ||
| 153 | { | ||
| 154 | /* Get hash value of uuid from super block */ | ||
| 155 | u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash; | ||
| 156 | int i; | ||
| 157 | |||
| 158 | /* hash extended attribute name */ | ||
| 159 | for (i = 0; i < name_len; i++) { | ||
| 160 | hash = (hash << OCFS2_HASH_SHIFT) ^ | ||
| 161 | (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^ | ||
| 162 | *name++; | ||
| 163 | } | ||
| 164 | |||
| 165 | return hash; | ||
| 166 | } | ||
| 167 | |||
| 168 | /* | ||
| 169 | * ocfs2_xattr_hash_entry() | ||
| 170 | * | ||
| 171 | * Compute the hash of an extended attribute. | ||
| 172 | */ | ||
| 173 | static void ocfs2_xattr_hash_entry(struct inode *inode, | ||
| 174 | struct ocfs2_xattr_header *header, | ||
| 175 | struct ocfs2_xattr_entry *entry) | ||
| 176 | { | ||
| 177 | u32 hash = 0; | ||
| 178 | char *name = (char *)header + le16_to_cpu(entry->xe_name_offset); | ||
| 179 | |||
| 180 | hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len); | ||
| 181 | entry->xe_name_hash = cpu_to_le32(hash); | ||
| 182 | |||
| 183 | return; | ||
| 184 | } | ||
| 185 | |||
| 186 | static int ocfs2_xattr_extend_allocation(struct inode *inode, | ||
| 187 | u32 clusters_to_add, | ||
| 188 | struct buffer_head *xattr_bh, | ||
| 189 | struct ocfs2_xattr_value_root *xv) | ||
| 190 | { | ||
| 191 | int status = 0; | ||
| 192 | int restart_func = 0; | ||
| 193 | int credits = 0; | ||
| 194 | handle_t *handle = NULL; | ||
| 195 | struct ocfs2_alloc_context *data_ac = NULL; | ||
| 196 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 197 | enum ocfs2_alloc_restarted why; | ||
| 198 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 199 | u32 prev_clusters, logical_start = le32_to_cpu(xv->xr_clusters); | ||
| 200 | struct ocfs2_extent_tree et; | ||
| 201 | |||
| 202 | mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add); | ||
| 203 | |||
| 204 | ocfs2_init_xattr_value_extent_tree(&et, inode, xattr_bh, xv); | ||
| 205 | |||
| 206 | restart_all: | ||
| 207 | |||
| 208 | status = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
| 209 | &data_ac, &meta_ac); | ||
| 210 | if (status) { | ||
| 211 | mlog_errno(status); | ||
| 212 | goto leave; | ||
| 213 | } | ||
| 214 | |||
| 215 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
| 216 | clusters_to_add); | ||
| 217 | handle = ocfs2_start_trans(osb, credits); | ||
| 218 | if (IS_ERR(handle)) { | ||
| 219 | status = PTR_ERR(handle); | ||
| 220 | handle = NULL; | ||
| 221 | mlog_errno(status); | ||
| 222 | goto leave; | ||
| 223 | } | ||
| 224 | |||
| 225 | restarted_transaction: | ||
| 226 | status = ocfs2_journal_access(handle, inode, xattr_bh, | ||
| 227 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 228 | if (status < 0) { | ||
| 229 | mlog_errno(status); | ||
| 230 | goto leave; | ||
| 231 | } | ||
| 232 | |||
| 233 | prev_clusters = le32_to_cpu(xv->xr_clusters); | ||
| 234 | status = ocfs2_add_clusters_in_btree(osb, | ||
| 235 | inode, | ||
| 236 | &logical_start, | ||
| 237 | clusters_to_add, | ||
| 238 | 0, | ||
| 239 | &et, | ||
| 240 | handle, | ||
| 241 | data_ac, | ||
| 242 | meta_ac, | ||
| 243 | &why); | ||
| 244 | if ((status < 0) && (status != -EAGAIN)) { | ||
| 245 | if (status != -ENOSPC) | ||
| 246 | mlog_errno(status); | ||
| 247 | goto leave; | ||
| 248 | } | ||
| 249 | |||
| 250 | status = ocfs2_journal_dirty(handle, xattr_bh); | ||
| 251 | if (status < 0) { | ||
| 252 | mlog_errno(status); | ||
| 253 | goto leave; | ||
| 254 | } | ||
| 255 | |||
| 256 | clusters_to_add -= le32_to_cpu(xv->xr_clusters) - prev_clusters; | ||
| 257 | |||
| 258 | if (why != RESTART_NONE && clusters_to_add) { | ||
| 259 | if (why == RESTART_META) { | ||
| 260 | mlog(0, "restarting function.\n"); | ||
| 261 | restart_func = 1; | ||
| 262 | } else { | ||
| 263 | BUG_ON(why != RESTART_TRANS); | ||
| 264 | |||
| 265 | mlog(0, "restarting transaction.\n"); | ||
| 266 | /* TODO: This can be more intelligent. */ | ||
| 267 | credits = ocfs2_calc_extend_credits(osb->sb, | ||
| 268 | et.et_root_el, | ||
| 269 | clusters_to_add); | ||
| 270 | status = ocfs2_extend_trans(handle, credits); | ||
| 271 | if (status < 0) { | ||
| 272 | /* handle still has to be committed at | ||
| 273 | * this point. */ | ||
| 274 | status = -ENOMEM; | ||
| 275 | mlog_errno(status); | ||
| 276 | goto leave; | ||
| 277 | } | ||
| 278 | goto restarted_transaction; | ||
| 279 | } | ||
| 280 | } | ||
| 281 | |||
| 282 | leave: | ||
| 283 | if (handle) { | ||
| 284 | ocfs2_commit_trans(osb, handle); | ||
| 285 | handle = NULL; | ||
| 286 | } | ||
| 287 | if (data_ac) { | ||
| 288 | ocfs2_free_alloc_context(data_ac); | ||
| 289 | data_ac = NULL; | ||
| 290 | } | ||
| 291 | if (meta_ac) { | ||
| 292 | ocfs2_free_alloc_context(meta_ac); | ||
| 293 | meta_ac = NULL; | ||
| 294 | } | ||
| 295 | if ((!status) && restart_func) { | ||
| 296 | restart_func = 0; | ||
| 297 | goto restart_all; | ||
| 298 | } | ||
| 299 | |||
| 300 | return status; | ||
| 301 | } | ||
| 302 | |||
| 303 | static int __ocfs2_remove_xattr_range(struct inode *inode, | ||
| 304 | struct buffer_head *root_bh, | ||
| 305 | struct ocfs2_xattr_value_root *xv, | ||
| 306 | u32 cpos, u32 phys_cpos, u32 len, | ||
| 307 | struct ocfs2_cached_dealloc_ctxt *dealloc) | ||
| 308 | { | ||
| 309 | int ret; | ||
| 310 | u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 311 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 312 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 313 | handle_t *handle; | ||
| 314 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 315 | struct ocfs2_extent_tree et; | ||
| 316 | |||
| 317 | ocfs2_init_xattr_value_extent_tree(&et, inode, root_bh, xv); | ||
| 318 | |||
| 319 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
| 320 | if (ret) { | ||
| 321 | mlog_errno(ret); | ||
| 322 | return ret; | ||
| 323 | } | ||
| 324 | |||
| 325 | mutex_lock(&tl_inode->i_mutex); | ||
| 326 | |||
| 327 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 328 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 329 | if (ret < 0) { | ||
| 330 | mlog_errno(ret); | ||
| 331 | goto out; | ||
| 332 | } | ||
| 333 | } | ||
| 334 | |||
| 335 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
| 336 | if (IS_ERR(handle)) { | ||
| 337 | ret = PTR_ERR(handle); | ||
| 338 | mlog_errno(ret); | ||
| 339 | goto out; | ||
| 340 | } | ||
| 341 | |||
| 342 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 343 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 344 | if (ret) { | ||
| 345 | mlog_errno(ret); | ||
| 346 | goto out_commit; | ||
| 347 | } | ||
| 348 | |||
| 349 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
| 350 | dealloc); | ||
| 351 | if (ret) { | ||
| 352 | mlog_errno(ret); | ||
| 353 | goto out_commit; | ||
| 354 | } | ||
| 355 | |||
| 356 | le32_add_cpu(&xv->xr_clusters, -len); | ||
| 357 | |||
| 358 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
| 359 | if (ret) { | ||
| 360 | mlog_errno(ret); | ||
| 361 | goto out_commit; | ||
| 362 | } | ||
| 363 | |||
| 364 | ret = ocfs2_truncate_log_append(osb, handle, phys_blkno, len); | ||
| 365 | if (ret) | ||
| 366 | mlog_errno(ret); | ||
| 367 | |||
| 368 | out_commit: | ||
| 369 | ocfs2_commit_trans(osb, handle); | ||
| 370 | out: | ||
| 371 | mutex_unlock(&tl_inode->i_mutex); | ||
| 372 | |||
| 373 | if (meta_ac) | ||
| 374 | ocfs2_free_alloc_context(meta_ac); | ||
| 375 | |||
| 376 | return ret; | ||
| 377 | } | ||
| 378 | |||
| 379 | static int ocfs2_xattr_shrink_size(struct inode *inode, | ||
| 380 | u32 old_clusters, | ||
| 381 | u32 new_clusters, | ||
| 382 | struct buffer_head *root_bh, | ||
| 383 | struct ocfs2_xattr_value_root *xv) | ||
| 384 | { | ||
| 385 | int ret = 0; | ||
| 386 | u32 trunc_len, cpos, phys_cpos, alloc_size; | ||
| 387 | u64 block; | ||
| 388 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 389 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 390 | |||
| 391 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
| 392 | |||
| 393 | if (old_clusters <= new_clusters) | ||
| 394 | return 0; | ||
| 395 | |||
| 396 | cpos = new_clusters; | ||
| 397 | trunc_len = old_clusters - new_clusters; | ||
| 398 | while (trunc_len) { | ||
| 399 | ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos, | ||
| 400 | &alloc_size, &xv->xr_list); | ||
| 401 | if (ret) { | ||
| 402 | mlog_errno(ret); | ||
| 403 | goto out; | ||
| 404 | } | ||
| 405 | |||
| 406 | if (alloc_size > trunc_len) | ||
| 407 | alloc_size = trunc_len; | ||
| 408 | |||
| 409 | ret = __ocfs2_remove_xattr_range(inode, root_bh, xv, cpos, | ||
| 410 | phys_cpos, alloc_size, | ||
| 411 | &dealloc); | ||
| 412 | if (ret) { | ||
| 413 | mlog_errno(ret); | ||
| 414 | goto out; | ||
| 415 | } | ||
| 416 | |||
| 417 | block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); | ||
| 418 | ocfs2_remove_xattr_clusters_from_cache(inode, block, | ||
| 419 | alloc_size); | ||
| 420 | cpos += alloc_size; | ||
| 421 | trunc_len -= alloc_size; | ||
| 422 | } | ||
| 423 | |||
| 424 | out: | ||
| 425 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 426 | ocfs2_run_deallocs(osb, &dealloc); | ||
| 427 | |||
| 428 | return ret; | ||
| 429 | } | ||
| 430 | |||
| 431 | static int ocfs2_xattr_value_truncate(struct inode *inode, | ||
| 432 | struct buffer_head *root_bh, | ||
| 433 | struct ocfs2_xattr_value_root *xv, | ||
| 434 | int len) | ||
| 435 | { | ||
| 436 | int ret; | ||
| 437 | u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len); | ||
| 438 | u32 old_clusters = le32_to_cpu(xv->xr_clusters); | ||
| 439 | |||
| 440 | if (new_clusters == old_clusters) | ||
| 441 | return 0; | ||
| 442 | |||
| 443 | if (new_clusters > old_clusters) | ||
| 444 | ret = ocfs2_xattr_extend_allocation(inode, | ||
| 445 | new_clusters - old_clusters, | ||
| 446 | root_bh, xv); | ||
| 447 | else | ||
| 448 | ret = ocfs2_xattr_shrink_size(inode, | ||
| 449 | old_clusters, new_clusters, | ||
| 450 | root_bh, xv); | ||
| 451 | |||
| 452 | return ret; | ||
| 453 | } | ||
| 454 | |||
| 455 | static int ocfs2_xattr_list_entry(char *buffer, size_t size, | ||
| 456 | size_t *result, const char *prefix, | ||
| 457 | const char *name, int name_len) | ||
| 458 | { | ||
| 459 | char *p = buffer + *result; | ||
| 460 | int prefix_len = strlen(prefix); | ||
| 461 | int total_len = prefix_len + name_len + 1; | ||
| 462 | |||
| 463 | *result += total_len; | ||
| 464 | |||
| 465 | /* we are just looking for how big our buffer needs to be */ | ||
| 466 | if (!size) | ||
| 467 | return 0; | ||
| 468 | |||
| 469 | if (*result > size) | ||
| 470 | return -ERANGE; | ||
| 471 | |||
| 472 | memcpy(p, prefix, prefix_len); | ||
| 473 | memcpy(p + prefix_len, name, name_len); | ||
| 474 | p[prefix_len + name_len] = '\0'; | ||
| 475 | |||
| 476 | return 0; | ||
| 477 | } | ||
| 478 | |||
| 479 | static int ocfs2_xattr_list_entries(struct inode *inode, | ||
| 480 | struct ocfs2_xattr_header *header, | ||
| 481 | char *buffer, size_t buffer_size) | ||
| 482 | { | ||
| 483 | size_t result = 0; | ||
| 484 | int i, type, ret; | ||
| 485 | const char *prefix, *name; | ||
| 486 | |||
| 487 | for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) { | ||
| 488 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
| 489 | type = ocfs2_xattr_get_type(entry); | ||
| 490 | prefix = ocfs2_xattr_prefix(type); | ||
| 491 | |||
| 492 | if (prefix) { | ||
| 493 | name = (const char *)header + | ||
| 494 | le16_to_cpu(entry->xe_name_offset); | ||
| 495 | |||
| 496 | ret = ocfs2_xattr_list_entry(buffer, buffer_size, | ||
| 497 | &result, prefix, name, | ||
| 498 | entry->xe_name_len); | ||
| 499 | if (ret) | ||
| 500 | return ret; | ||
| 501 | } | ||
| 502 | } | ||
| 503 | |||
| 504 | return result; | ||
| 505 | } | ||
| 506 | |||
| 507 | static int ocfs2_xattr_ibody_list(struct inode *inode, | ||
| 508 | struct ocfs2_dinode *di, | ||
| 509 | char *buffer, | ||
| 510 | size_t buffer_size) | ||
| 511 | { | ||
| 512 | struct ocfs2_xattr_header *header = NULL; | ||
| 513 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 514 | int ret = 0; | ||
| 515 | |||
| 516 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
| 517 | return ret; | ||
| 518 | |||
| 519 | header = (struct ocfs2_xattr_header *) | ||
| 520 | ((void *)di + inode->i_sb->s_blocksize - | ||
| 521 | le16_to_cpu(di->i_xattr_inline_size)); | ||
| 522 | |||
| 523 | ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size); | ||
| 524 | |||
| 525 | return ret; | ||
| 526 | } | ||
| 527 | |||
| 528 | static int ocfs2_xattr_block_list(struct inode *inode, | ||
| 529 | struct ocfs2_dinode *di, | ||
| 530 | char *buffer, | ||
| 531 | size_t buffer_size) | ||
| 532 | { | ||
| 533 | struct buffer_head *blk_bh = NULL; | ||
| 534 | struct ocfs2_xattr_block *xb; | ||
| 535 | int ret = 0; | ||
| 536 | |||
| 537 | if (!di->i_xattr_loc) | ||
| 538 | return ret; | ||
| 539 | |||
| 540 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
| 541 | if (ret < 0) { | ||
| 542 | mlog_errno(ret); | ||
| 543 | return ret; | ||
| 544 | } | ||
| 545 | /*Verify the signature of xattr block*/ | ||
| 546 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
| 547 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
| 548 | ret = -EFAULT; | ||
| 549 | goto cleanup; | ||
| 550 | } | ||
| 551 | |||
| 552 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
| 553 | |||
| 554 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
| 555 | struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header; | ||
| 556 | ret = ocfs2_xattr_list_entries(inode, header, | ||
| 557 | buffer, buffer_size); | ||
| 558 | } else { | ||
| 559 | struct ocfs2_xattr_tree_root *xt = &xb->xb_attrs.xb_root; | ||
| 560 | ret = ocfs2_xattr_tree_list_index_block(inode, xt, | ||
| 561 | buffer, buffer_size); | ||
| 562 | } | ||
| 563 | cleanup: | ||
| 564 | brelse(blk_bh); | ||
| 565 | |||
| 566 | return ret; | ||
| 567 | } | ||
| 568 | |||
| 569 | ssize_t ocfs2_listxattr(struct dentry *dentry, | ||
| 570 | char *buffer, | ||
| 571 | size_t size) | ||
| 572 | { | ||
| 573 | int ret = 0, i_ret = 0, b_ret = 0; | ||
| 574 | struct buffer_head *di_bh = NULL; | ||
| 575 | struct ocfs2_dinode *di = NULL; | ||
| 576 | struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode); | ||
| 577 | |||
| 578 | if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb))) | ||
| 579 | return -EOPNOTSUPP; | ||
| 580 | |||
| 581 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
| 582 | return ret; | ||
| 583 | |||
| 584 | ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0); | ||
| 585 | if (ret < 0) { | ||
| 586 | mlog_errno(ret); | ||
| 587 | return ret; | ||
| 588 | } | ||
| 589 | |||
| 590 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 591 | |||
| 592 | down_read(&oi->ip_xattr_sem); | ||
| 593 | i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size); | ||
| 594 | if (i_ret < 0) | ||
| 595 | b_ret = 0; | ||
| 596 | else { | ||
| 597 | if (buffer) { | ||
| 598 | buffer += i_ret; | ||
| 599 | size -= i_ret; | ||
| 600 | } | ||
| 601 | b_ret = ocfs2_xattr_block_list(dentry->d_inode, di, | ||
| 602 | buffer, size); | ||
| 603 | if (b_ret < 0) | ||
| 604 | i_ret = 0; | ||
| 605 | } | ||
| 606 | up_read(&oi->ip_xattr_sem); | ||
| 607 | ocfs2_inode_unlock(dentry->d_inode, 0); | ||
| 608 | |||
| 609 | brelse(di_bh); | ||
| 610 | |||
| 611 | return i_ret + b_ret; | ||
| 612 | } | ||
| 613 | |||
| 614 | static int ocfs2_xattr_find_entry(int name_index, | ||
| 615 | const char *name, | ||
| 616 | struct ocfs2_xattr_search *xs) | ||
| 617 | { | ||
| 618 | struct ocfs2_xattr_entry *entry; | ||
| 619 | size_t name_len; | ||
| 620 | int i, cmp = 1; | ||
| 621 | |||
| 622 | if (name == NULL) | ||
| 623 | return -EINVAL; | ||
| 624 | |||
| 625 | name_len = strlen(name); | ||
| 626 | entry = xs->here; | ||
| 627 | for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
| 628 | cmp = name_index - ocfs2_xattr_get_type(entry); | ||
| 629 | if (!cmp) | ||
| 630 | cmp = name_len - entry->xe_name_len; | ||
| 631 | if (!cmp) | ||
| 632 | cmp = memcmp(name, (xs->base + | ||
| 633 | le16_to_cpu(entry->xe_name_offset)), | ||
| 634 | name_len); | ||
| 635 | if (cmp == 0) | ||
| 636 | break; | ||
| 637 | entry += 1; | ||
| 638 | } | ||
| 639 | xs->here = entry; | ||
| 640 | |||
| 641 | return cmp ? -ENODATA : 0; | ||
| 642 | } | ||
| 643 | |||
| 644 | static int ocfs2_xattr_get_value_outside(struct inode *inode, | ||
| 645 | struct ocfs2_xattr_value_root *xv, | ||
| 646 | void *buffer, | ||
| 647 | size_t len) | ||
| 648 | { | ||
| 649 | u32 cpos, p_cluster, num_clusters, bpc, clusters; | ||
| 650 | u64 blkno; | ||
| 651 | int i, ret = 0; | ||
| 652 | size_t cplen, blocksize; | ||
| 653 | struct buffer_head *bh = NULL; | ||
| 654 | struct ocfs2_extent_list *el; | ||
| 655 | |||
| 656 | el = &xv->xr_list; | ||
| 657 | clusters = le32_to_cpu(xv->xr_clusters); | ||
| 658 | bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 659 | blocksize = inode->i_sb->s_blocksize; | ||
| 660 | |||
| 661 | cpos = 0; | ||
| 662 | while (cpos < clusters) { | ||
| 663 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
| 664 | &num_clusters, el); | ||
| 665 | if (ret) { | ||
| 666 | mlog_errno(ret); | ||
| 667 | goto out; | ||
| 668 | } | ||
| 669 | |||
| 670 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
| 671 | /* Copy ocfs2_xattr_value */ | ||
| 672 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
| 673 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
| 674 | if (ret) { | ||
| 675 | mlog_errno(ret); | ||
| 676 | goto out; | ||
| 677 | } | ||
| 678 | |||
| 679 | cplen = len >= blocksize ? blocksize : len; | ||
| 680 | memcpy(buffer, bh->b_data, cplen); | ||
| 681 | len -= cplen; | ||
| 682 | buffer += cplen; | ||
| 683 | |||
| 684 | brelse(bh); | ||
| 685 | bh = NULL; | ||
| 686 | if (len == 0) | ||
| 687 | break; | ||
| 688 | } | ||
| 689 | cpos += num_clusters; | ||
| 690 | } | ||
| 691 | out: | ||
| 692 | return ret; | ||
| 693 | } | ||
| 694 | |||
| 695 | static int ocfs2_xattr_ibody_get(struct inode *inode, | ||
| 696 | int name_index, | ||
| 697 | const char *name, | ||
| 698 | void *buffer, | ||
| 699 | size_t buffer_size, | ||
| 700 | struct ocfs2_xattr_search *xs) | ||
| 701 | { | ||
| 702 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 703 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 704 | struct ocfs2_xattr_value_root *xv; | ||
| 705 | size_t size; | ||
| 706 | int ret = 0; | ||
| 707 | |||
| 708 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) | ||
| 709 | return -ENODATA; | ||
| 710 | |||
| 711 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
| 712 | xs->header = (struct ocfs2_xattr_header *) | ||
| 713 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
| 714 | xs->base = (void *)xs->header; | ||
| 715 | xs->here = xs->header->xh_entries; | ||
| 716 | |||
| 717 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
| 718 | if (ret) | ||
| 719 | return ret; | ||
| 720 | size = le64_to_cpu(xs->here->xe_value_size); | ||
| 721 | if (buffer) { | ||
| 722 | if (size > buffer_size) | ||
| 723 | return -ERANGE; | ||
| 724 | if (ocfs2_xattr_is_local(xs->here)) { | ||
| 725 | memcpy(buffer, (void *)xs->base + | ||
| 726 | le16_to_cpu(xs->here->xe_name_offset) + | ||
| 727 | OCFS2_XATTR_SIZE(xs->here->xe_name_len), size); | ||
| 728 | } else { | ||
| 729 | xv = (struct ocfs2_xattr_value_root *) | ||
| 730 | (xs->base + le16_to_cpu( | ||
| 731 | xs->here->xe_name_offset) + | ||
| 732 | OCFS2_XATTR_SIZE(xs->here->xe_name_len)); | ||
| 733 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
| 734 | buffer, size); | ||
| 735 | if (ret < 0) { | ||
| 736 | mlog_errno(ret); | ||
| 737 | return ret; | ||
| 738 | } | ||
| 739 | } | ||
| 740 | } | ||
| 741 | |||
| 742 | return size; | ||
| 743 | } | ||
| 744 | |||
| 745 | static int ocfs2_xattr_block_get(struct inode *inode, | ||
| 746 | int name_index, | ||
| 747 | const char *name, | ||
| 748 | void *buffer, | ||
| 749 | size_t buffer_size, | ||
| 750 | struct ocfs2_xattr_search *xs) | ||
| 751 | { | ||
| 752 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 753 | struct buffer_head *blk_bh = NULL; | ||
| 754 | struct ocfs2_xattr_block *xb; | ||
| 755 | struct ocfs2_xattr_value_root *xv; | ||
| 756 | size_t size; | ||
| 757 | int ret = -ENODATA, name_offset, name_len, block_off, i; | ||
| 758 | |||
| 759 | if (!di->i_xattr_loc) | ||
| 760 | return ret; | ||
| 761 | |||
| 762 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
| 763 | |||
| 764 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
| 765 | if (ret < 0) { | ||
| 766 | mlog_errno(ret); | ||
| 767 | return ret; | ||
| 768 | } | ||
| 769 | /*Verify the signature of xattr block*/ | ||
| 770 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
| 771 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
| 772 | ret = -EFAULT; | ||
| 773 | goto cleanup; | ||
| 774 | } | ||
| 775 | |||
| 776 | xs->xattr_bh = blk_bh; | ||
| 777 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
| 778 | |||
| 779 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
| 780 | xs->header = &xb->xb_attrs.xb_header; | ||
| 781 | xs->base = (void *)xs->header; | ||
| 782 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
| 783 | xs->here = xs->header->xh_entries; | ||
| 784 | |||
| 785 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
| 786 | } else | ||
| 787 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
| 788 | name_index, | ||
| 789 | name, xs); | ||
| 790 | |||
| 791 | if (ret) | ||
| 792 | goto cleanup; | ||
| 793 | size = le64_to_cpu(xs->here->xe_value_size); | ||
| 794 | if (buffer) { | ||
| 795 | ret = -ERANGE; | ||
| 796 | if (size > buffer_size) | ||
| 797 | goto cleanup; | ||
| 798 | |||
| 799 | name_offset = le16_to_cpu(xs->here->xe_name_offset); | ||
| 800 | name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len); | ||
| 801 | i = xs->here - xs->header->xh_entries; | ||
| 802 | |||
| 803 | if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) { | ||
| 804 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
| 805 | xs->bucket.xh, | ||
| 806 | i, | ||
| 807 | &block_off, | ||
| 808 | &name_offset); | ||
| 809 | xs->base = xs->bucket.bhs[block_off]->b_data; | ||
| 810 | } | ||
| 811 | if (ocfs2_xattr_is_local(xs->here)) { | ||
| 812 | memcpy(buffer, (void *)xs->base + | ||
| 813 | name_offset + name_len, size); | ||
| 814 | } else { | ||
| 815 | xv = (struct ocfs2_xattr_value_root *) | ||
| 816 | (xs->base + name_offset + name_len); | ||
| 817 | ret = ocfs2_xattr_get_value_outside(inode, xv, | ||
| 818 | buffer, size); | ||
| 819 | if (ret < 0) { | ||
| 820 | mlog_errno(ret); | ||
| 821 | goto cleanup; | ||
| 822 | } | ||
| 823 | } | ||
| 824 | } | ||
| 825 | ret = size; | ||
| 826 | cleanup: | ||
| 827 | for (i = 0; i < OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET; i++) | ||
| 828 | brelse(xs->bucket.bhs[i]); | ||
| 829 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
| 830 | |||
| 831 | brelse(blk_bh); | ||
| 832 | return ret; | ||
| 833 | } | ||
| 834 | |||
| 835 | /* ocfs2_xattr_get() | ||
| 836 | * | ||
| 837 | * Copy an extended attribute into the buffer provided. | ||
| 838 | * Buffer is NULL to compute the size of buffer required. | ||
| 839 | */ | ||
| 840 | int ocfs2_xattr_get(struct inode *inode, | ||
| 841 | int name_index, | ||
| 842 | const char *name, | ||
| 843 | void *buffer, | ||
| 844 | size_t buffer_size) | ||
| 845 | { | ||
| 846 | int ret; | ||
| 847 | struct ocfs2_dinode *di = NULL; | ||
| 848 | struct buffer_head *di_bh = NULL; | ||
| 849 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 850 | struct ocfs2_xattr_search xis = { | ||
| 851 | .not_found = -ENODATA, | ||
| 852 | }; | ||
| 853 | struct ocfs2_xattr_search xbs = { | ||
| 854 | .not_found = -ENODATA, | ||
| 855 | }; | ||
| 856 | |||
| 857 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
| 858 | return -EOPNOTSUPP; | ||
| 859 | |||
| 860 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
| 861 | ret = -ENODATA; | ||
| 862 | |||
| 863 | ret = ocfs2_inode_lock(inode, &di_bh, 0); | ||
| 864 | if (ret < 0) { | ||
| 865 | mlog_errno(ret); | ||
| 866 | return ret; | ||
| 867 | } | ||
| 868 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
| 869 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 870 | |||
| 871 | down_read(&oi->ip_xattr_sem); | ||
| 872 | ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, | ||
| 873 | buffer_size, &xis); | ||
| 874 | if (ret == -ENODATA) | ||
| 875 | ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, | ||
| 876 | buffer_size, &xbs); | ||
| 877 | up_read(&oi->ip_xattr_sem); | ||
| 878 | ocfs2_inode_unlock(inode, 0); | ||
| 879 | |||
| 880 | brelse(di_bh); | ||
| 881 | |||
| 882 | return ret; | ||
| 883 | } | ||
| 884 | |||
| 885 | static int __ocfs2_xattr_set_value_outside(struct inode *inode, | ||
| 886 | struct ocfs2_xattr_value_root *xv, | ||
| 887 | const void *value, | ||
| 888 | int value_len) | ||
| 889 | { | ||
| 890 | int ret = 0, i, cp_len, credits; | ||
| 891 | u16 blocksize = inode->i_sb->s_blocksize; | ||
| 892 | u32 p_cluster, num_clusters; | ||
| 893 | u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 894 | u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len); | ||
| 895 | u64 blkno; | ||
| 896 | struct buffer_head *bh = NULL; | ||
| 897 | handle_t *handle; | ||
| 898 | |||
| 899 | BUG_ON(clusters > le32_to_cpu(xv->xr_clusters)); | ||
| 900 | |||
| 901 | credits = clusters * bpc; | ||
| 902 | handle = ocfs2_start_trans(OCFS2_SB(inode->i_sb), credits); | ||
| 903 | if (IS_ERR(handle)) { | ||
| 904 | ret = PTR_ERR(handle); | ||
| 905 | mlog_errno(ret); | ||
| 906 | goto out; | ||
| 907 | } | ||
| 908 | |||
| 909 | while (cpos < clusters) { | ||
| 910 | ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster, | ||
| 911 | &num_clusters, &xv->xr_list); | ||
| 912 | if (ret) { | ||
| 913 | mlog_errno(ret); | ||
| 914 | goto out_commit; | ||
| 915 | } | ||
| 916 | |||
| 917 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster); | ||
| 918 | |||
| 919 | for (i = 0; i < num_clusters * bpc; i++, blkno++) { | ||
| 920 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
| 921 | if (ret) { | ||
| 922 | mlog_errno(ret); | ||
| 923 | goto out_commit; | ||
| 924 | } | ||
| 925 | |||
| 926 | ret = ocfs2_journal_access(handle, | ||
| 927 | inode, | ||
| 928 | bh, | ||
| 929 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 930 | if (ret < 0) { | ||
| 931 | mlog_errno(ret); | ||
| 932 | goto out_commit; | ||
| 933 | } | ||
| 934 | |||
| 935 | cp_len = value_len > blocksize ? blocksize : value_len; | ||
| 936 | memcpy(bh->b_data, value, cp_len); | ||
| 937 | value_len -= cp_len; | ||
| 938 | value += cp_len; | ||
| 939 | if (cp_len < blocksize) | ||
| 940 | memset(bh->b_data + cp_len, 0, | ||
| 941 | blocksize - cp_len); | ||
| 942 | |||
| 943 | ret = ocfs2_journal_dirty(handle, bh); | ||
| 944 | if (ret < 0) { | ||
| 945 | mlog_errno(ret); | ||
| 946 | goto out_commit; | ||
| 947 | } | ||
| 948 | brelse(bh); | ||
| 949 | bh = NULL; | ||
| 950 | |||
| 951 | /* | ||
| 952 | * XXX: do we need to empty all the following | ||
| 953 | * blocks in this cluster? | ||
| 954 | */ | ||
| 955 | if (!value_len) | ||
| 956 | break; | ||
| 957 | } | ||
| 958 | cpos += num_clusters; | ||
| 959 | } | ||
| 960 | out_commit: | ||
| 961 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 962 | out: | ||
| 963 | brelse(bh); | ||
| 964 | |||
| 965 | return ret; | ||
| 966 | } | ||
| 967 | |||
| 968 | static int ocfs2_xattr_cleanup(struct inode *inode, | ||
| 969 | struct ocfs2_xattr_info *xi, | ||
| 970 | struct ocfs2_xattr_search *xs, | ||
| 971 | size_t offs) | ||
| 972 | { | ||
| 973 | handle_t *handle = NULL; | ||
| 974 | int ret = 0; | ||
| 975 | size_t name_len = strlen(xi->name); | ||
| 976 | void *val = xs->base + offs; | ||
| 977 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
| 978 | |||
| 979 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
| 980 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
| 981 | if (IS_ERR(handle)) { | ||
| 982 | ret = PTR_ERR(handle); | ||
| 983 | mlog_errno(ret); | ||
| 984 | goto out; | ||
| 985 | } | ||
| 986 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
| 987 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 988 | if (ret) { | ||
| 989 | mlog_errno(ret); | ||
| 990 | goto out_commit; | ||
| 991 | } | ||
| 992 | /* Decrease xattr count */ | ||
| 993 | le16_add_cpu(&xs->header->xh_count, -1); | ||
| 994 | /* Remove the xattr entry and tree root which has already be set*/ | ||
| 995 | memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry)); | ||
| 996 | memset(val, 0, size); | ||
| 997 | |||
| 998 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
| 999 | if (ret < 0) | ||
| 1000 | mlog_errno(ret); | ||
| 1001 | out_commit: | ||
| 1002 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 1003 | out: | ||
| 1004 | return ret; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | static int ocfs2_xattr_update_entry(struct inode *inode, | ||
| 1008 | struct ocfs2_xattr_info *xi, | ||
| 1009 | struct ocfs2_xattr_search *xs, | ||
| 1010 | size_t offs) | ||
| 1011 | { | ||
| 1012 | handle_t *handle = NULL; | ||
| 1013 | int ret = 0; | ||
| 1014 | |||
| 1015 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
| 1016 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
| 1017 | if (IS_ERR(handle)) { | ||
| 1018 | ret = PTR_ERR(handle); | ||
| 1019 | mlog_errno(ret); | ||
| 1020 | goto out; | ||
| 1021 | } | ||
| 1022 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
| 1023 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1024 | if (ret) { | ||
| 1025 | mlog_errno(ret); | ||
| 1026 | goto out_commit; | ||
| 1027 | } | ||
| 1028 | |||
| 1029 | xs->here->xe_name_offset = cpu_to_le16(offs); | ||
| 1030 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
| 1031 | if (xi->value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
| 1032 | ocfs2_xattr_set_local(xs->here, 1); | ||
| 1033 | else | ||
| 1034 | ocfs2_xattr_set_local(xs->here, 0); | ||
| 1035 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
| 1036 | |||
| 1037 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
| 1038 | if (ret < 0) | ||
| 1039 | mlog_errno(ret); | ||
| 1040 | out_commit: | ||
| 1041 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 1042 | out: | ||
| 1043 | return ret; | ||
| 1044 | } | ||
| 1045 | |||
| 1046 | /* | ||
| 1047 | * ocfs2_xattr_set_value_outside() | ||
| 1048 | * | ||
| 1049 | * Set large size value in B tree. | ||
| 1050 | */ | ||
| 1051 | static int ocfs2_xattr_set_value_outside(struct inode *inode, | ||
| 1052 | struct ocfs2_xattr_info *xi, | ||
| 1053 | struct ocfs2_xattr_search *xs, | ||
| 1054 | size_t offs) | ||
| 1055 | { | ||
| 1056 | size_t name_len = strlen(xi->name); | ||
| 1057 | void *val = xs->base + offs; | ||
| 1058 | struct ocfs2_xattr_value_root *xv = NULL; | ||
| 1059 | size_t size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
| 1060 | int ret = 0; | ||
| 1061 | |||
| 1062 | memset(val, 0, size); | ||
| 1063 | memcpy(val, xi->name, name_len); | ||
| 1064 | xv = (struct ocfs2_xattr_value_root *) | ||
| 1065 | (val + OCFS2_XATTR_SIZE(name_len)); | ||
| 1066 | xv->xr_clusters = 0; | ||
| 1067 | xv->xr_last_eb_blk = 0; | ||
| 1068 | xv->xr_list.l_tree_depth = 0; | ||
| 1069 | xv->xr_list.l_count = cpu_to_le16(1); | ||
| 1070 | xv->xr_list.l_next_free_rec = 0; | ||
| 1071 | |||
| 1072 | ret = ocfs2_xattr_value_truncate(inode, xs->xattr_bh, xv, | ||
| 1073 | xi->value_len); | ||
| 1074 | if (ret < 0) { | ||
| 1075 | mlog_errno(ret); | ||
| 1076 | return ret; | ||
| 1077 | } | ||
| 1078 | ret = __ocfs2_xattr_set_value_outside(inode, xv, xi->value, | ||
| 1079 | xi->value_len); | ||
| 1080 | if (ret < 0) { | ||
| 1081 | mlog_errno(ret); | ||
| 1082 | return ret; | ||
| 1083 | } | ||
| 1084 | ret = ocfs2_xattr_update_entry(inode, xi, xs, offs); | ||
| 1085 | if (ret < 0) | ||
| 1086 | mlog_errno(ret); | ||
| 1087 | |||
| 1088 | return ret; | ||
| 1089 | } | ||
| 1090 | |||
| 1091 | /* | ||
| 1092 | * ocfs2_xattr_set_entry_local() | ||
| 1093 | * | ||
| 1094 | * Set, replace or remove extended attribute in local. | ||
| 1095 | */ | ||
| 1096 | static void ocfs2_xattr_set_entry_local(struct inode *inode, | ||
| 1097 | struct ocfs2_xattr_info *xi, | ||
| 1098 | struct ocfs2_xattr_search *xs, | ||
| 1099 | struct ocfs2_xattr_entry *last, | ||
| 1100 | size_t min_offs) | ||
| 1101 | { | ||
| 1102 | size_t name_len = strlen(xi->name); | ||
| 1103 | int i; | ||
| 1104 | |||
| 1105 | if (xi->value && xs->not_found) { | ||
| 1106 | /* Insert the new xattr entry. */ | ||
| 1107 | le16_add_cpu(&xs->header->xh_count, 1); | ||
| 1108 | ocfs2_xattr_set_type(last, xi->name_index); | ||
| 1109 | ocfs2_xattr_set_local(last, 1); | ||
| 1110 | last->xe_name_len = name_len; | ||
| 1111 | } else { | ||
| 1112 | void *first_val; | ||
| 1113 | void *val; | ||
| 1114 | size_t offs, size; | ||
| 1115 | |||
| 1116 | first_val = xs->base + min_offs; | ||
| 1117 | offs = le16_to_cpu(xs->here->xe_name_offset); | ||
| 1118 | val = xs->base + offs; | ||
| 1119 | |||
| 1120 | if (le64_to_cpu(xs->here->xe_value_size) > | ||
| 1121 | OCFS2_XATTR_INLINE_SIZE) | ||
| 1122 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1123 | OCFS2_XATTR_ROOT_SIZE; | ||
| 1124 | else | ||
| 1125 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1126 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
| 1127 | |||
| 1128 | if (xi->value && size == OCFS2_XATTR_SIZE(name_len) + | ||
| 1129 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
| 1130 | /* The old and the new value have the | ||
| 1131 | same size. Just replace the value. */ | ||
| 1132 | ocfs2_xattr_set_local(xs->here, 1); | ||
| 1133 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
| 1134 | /* Clear value bytes. */ | ||
| 1135 | memset(val + OCFS2_XATTR_SIZE(name_len), | ||
| 1136 | 0, | ||
| 1137 | OCFS2_XATTR_SIZE(xi->value_len)); | ||
| 1138 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
| 1139 | xi->value, | ||
| 1140 | xi->value_len); | ||
| 1141 | return; | ||
| 1142 | } | ||
| 1143 | /* Remove the old name+value. */ | ||
| 1144 | memmove(first_val + size, first_val, val - first_val); | ||
| 1145 | memset(first_val, 0, size); | ||
| 1146 | xs->here->xe_name_hash = 0; | ||
| 1147 | xs->here->xe_name_offset = 0; | ||
| 1148 | ocfs2_xattr_set_local(xs->here, 1); | ||
| 1149 | xs->here->xe_value_size = 0; | ||
| 1150 | |||
| 1151 | min_offs += size; | ||
| 1152 | |||
| 1153 | /* Adjust all value offsets. */ | ||
| 1154 | last = xs->header->xh_entries; | ||
| 1155 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
| 1156 | size_t o = le16_to_cpu(last->xe_name_offset); | ||
| 1157 | |||
| 1158 | if (o < offs) | ||
| 1159 | last->xe_name_offset = cpu_to_le16(o + size); | ||
| 1160 | last += 1; | ||
| 1161 | } | ||
| 1162 | |||
| 1163 | if (!xi->value) { | ||
| 1164 | /* Remove the old entry. */ | ||
| 1165 | last -= 1; | ||
| 1166 | memmove(xs->here, xs->here + 1, | ||
| 1167 | (void *)last - (void *)xs->here); | ||
| 1168 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
| 1169 | le16_add_cpu(&xs->header->xh_count, -1); | ||
| 1170 | } | ||
| 1171 | } | ||
| 1172 | if (xi->value) { | ||
| 1173 | /* Insert the new name+value. */ | ||
| 1174 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1175 | OCFS2_XATTR_SIZE(xi->value_len); | ||
| 1176 | void *val = xs->base + min_offs - size; | ||
| 1177 | |||
| 1178 | xs->here->xe_name_offset = cpu_to_le16(min_offs - size); | ||
| 1179 | memset(val, 0, size); | ||
| 1180 | memcpy(val, xi->name, name_len); | ||
| 1181 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
| 1182 | xi->value, | ||
| 1183 | xi->value_len); | ||
| 1184 | xs->here->xe_value_size = cpu_to_le64(xi->value_len); | ||
| 1185 | ocfs2_xattr_set_local(xs->here, 1); | ||
| 1186 | ocfs2_xattr_hash_entry(inode, xs->header, xs->here); | ||
| 1187 | } | ||
| 1188 | |||
| 1189 | return; | ||
| 1190 | } | ||
| 1191 | |||
| 1192 | /* | ||
| 1193 | * ocfs2_xattr_set_entry() | ||
| 1194 | * | ||
| 1195 | * Set extended attribute entry into inode or block. | ||
| 1196 | * | ||
| 1197 | * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE, | ||
| 1198 | * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(), | ||
| 1199 | * then set value in B tree with set_value_outside(). | ||
| 1200 | */ | ||
| 1201 | static int ocfs2_xattr_set_entry(struct inode *inode, | ||
| 1202 | struct ocfs2_xattr_info *xi, | ||
| 1203 | struct ocfs2_xattr_search *xs, | ||
| 1204 | int flag) | ||
| 1205 | { | ||
| 1206 | struct ocfs2_xattr_entry *last; | ||
| 1207 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1208 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 1209 | size_t min_offs = xs->end - xs->base, name_len = strlen(xi->name); | ||
| 1210 | size_t size_l = 0; | ||
| 1211 | handle_t *handle = NULL; | ||
| 1212 | int free, i, ret; | ||
| 1213 | struct ocfs2_xattr_info xi_l = { | ||
| 1214 | .name_index = xi->name_index, | ||
| 1215 | .name = xi->name, | ||
| 1216 | .value = xi->value, | ||
| 1217 | .value_len = xi->value_len, | ||
| 1218 | }; | ||
| 1219 | |||
| 1220 | /* Compute min_offs, last and free space. */ | ||
| 1221 | last = xs->header->xh_entries; | ||
| 1222 | |||
| 1223 | for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) { | ||
| 1224 | size_t offs = le16_to_cpu(last->xe_name_offset); | ||
| 1225 | if (offs < min_offs) | ||
| 1226 | min_offs = offs; | ||
| 1227 | last += 1; | ||
| 1228 | } | ||
| 1229 | |||
| 1230 | free = min_offs - ((void *)last - xs->base) - sizeof(__u32); | ||
| 1231 | if (free < 0) | ||
| 1232 | return -EFAULT; | ||
| 1233 | |||
| 1234 | if (!xs->not_found) { | ||
| 1235 | size_t size = 0; | ||
| 1236 | if (ocfs2_xattr_is_local(xs->here)) | ||
| 1237 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1238 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
| 1239 | else | ||
| 1240 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1241 | OCFS2_XATTR_ROOT_SIZE; | ||
| 1242 | free += (size + sizeof(struct ocfs2_xattr_entry)); | ||
| 1243 | } | ||
| 1244 | /* Check free space in inode or block */ | ||
| 1245 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
| 1246 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
| 1247 | OCFS2_XATTR_SIZE(name_len) + | ||
| 1248 | OCFS2_XATTR_ROOT_SIZE) { | ||
| 1249 | ret = -ENOSPC; | ||
| 1250 | goto out; | ||
| 1251 | } | ||
| 1252 | size_l = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE; | ||
| 1253 | xi_l.value = (void *)&def_xv; | ||
| 1254 | xi_l.value_len = OCFS2_XATTR_ROOT_SIZE; | ||
| 1255 | } else if (xi->value) { | ||
| 1256 | if (free < sizeof(struct ocfs2_xattr_entry) + | ||
| 1257 | OCFS2_XATTR_SIZE(name_len) + | ||
| 1258 | OCFS2_XATTR_SIZE(xi->value_len)) { | ||
| 1259 | ret = -ENOSPC; | ||
| 1260 | goto out; | ||
| 1261 | } | ||
| 1262 | } | ||
| 1263 | |||
| 1264 | if (!xs->not_found) { | ||
| 1265 | /* For existing extended attribute */ | ||
| 1266 | size_t size = OCFS2_XATTR_SIZE(name_len) + | ||
| 1267 | OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size)); | ||
| 1268 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
| 1269 | void *val = xs->base + offs; | ||
| 1270 | |||
| 1271 | if (ocfs2_xattr_is_local(xs->here) && size == size_l) { | ||
| 1272 | /* Replace existing local xattr with tree root */ | ||
| 1273 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, | ||
| 1274 | offs); | ||
| 1275 | if (ret < 0) | ||
| 1276 | mlog_errno(ret); | ||
| 1277 | goto out; | ||
| 1278 | } else if (!ocfs2_xattr_is_local(xs->here)) { | ||
| 1279 | /* For existing xattr which has value outside */ | ||
| 1280 | struct ocfs2_xattr_value_root *xv = NULL; | ||
| 1281 | xv = (struct ocfs2_xattr_value_root *)(val + | ||
| 1282 | OCFS2_XATTR_SIZE(name_len)); | ||
| 1283 | |||
| 1284 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
| 1285 | /* | ||
| 1286 | * If new value need set outside also, | ||
| 1287 | * first truncate old value to new value, | ||
| 1288 | * then set new value with set_value_outside(). | ||
| 1289 | */ | ||
| 1290 | ret = ocfs2_xattr_value_truncate(inode, | ||
| 1291 | xs->xattr_bh, | ||
| 1292 | xv, | ||
| 1293 | xi->value_len); | ||
| 1294 | if (ret < 0) { | ||
| 1295 | mlog_errno(ret); | ||
| 1296 | goto out; | ||
| 1297 | } | ||
| 1298 | |||
| 1299 | ret = __ocfs2_xattr_set_value_outside(inode, | ||
| 1300 | xv, | ||
| 1301 | xi->value, | ||
| 1302 | xi->value_len); | ||
| 1303 | if (ret < 0) { | ||
| 1304 | mlog_errno(ret); | ||
| 1305 | goto out; | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | ret = ocfs2_xattr_update_entry(inode, | ||
| 1309 | xi, | ||
| 1310 | xs, | ||
| 1311 | offs); | ||
| 1312 | if (ret < 0) | ||
| 1313 | mlog_errno(ret); | ||
| 1314 | goto out; | ||
| 1315 | } else { | ||
| 1316 | /* | ||
| 1317 | * If new value need set in local, | ||
| 1318 | * just trucate old value to zero. | ||
| 1319 | */ | ||
| 1320 | ret = ocfs2_xattr_value_truncate(inode, | ||
| 1321 | xs->xattr_bh, | ||
| 1322 | xv, | ||
| 1323 | 0); | ||
| 1324 | if (ret < 0) | ||
| 1325 | mlog_errno(ret); | ||
| 1326 | } | ||
| 1327 | } | ||
| 1328 | } | ||
| 1329 | |||
| 1330 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
| 1331 | OCFS2_INODE_UPDATE_CREDITS); | ||
| 1332 | if (IS_ERR(handle)) { | ||
| 1333 | ret = PTR_ERR(handle); | ||
| 1334 | mlog_errno(ret); | ||
| 1335 | goto out; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
| 1339 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1340 | if (ret) { | ||
| 1341 | mlog_errno(ret); | ||
| 1342 | goto out_commit; | ||
| 1343 | } | ||
| 1344 | |||
| 1345 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
| 1346 | /* set extended attribute in external block. */ | ||
| 1347 | ret = ocfs2_extend_trans(handle, | ||
| 1348 | OCFS2_INODE_UPDATE_CREDITS + | ||
| 1349 | OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
| 1350 | if (ret) { | ||
| 1351 | mlog_errno(ret); | ||
| 1352 | goto out_commit; | ||
| 1353 | } | ||
| 1354 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
| 1355 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1356 | if (ret) { | ||
| 1357 | mlog_errno(ret); | ||
| 1358 | goto out_commit; | ||
| 1359 | } | ||
| 1360 | } | ||
| 1361 | |||
| 1362 | /* | ||
| 1363 | * Set value in local, include set tree root in local. | ||
| 1364 | * This is the first step for value size >INLINE_SIZE. | ||
| 1365 | */ | ||
| 1366 | ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs); | ||
| 1367 | |||
| 1368 | if (!(flag & OCFS2_INLINE_XATTR_FL)) { | ||
| 1369 | ret = ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
| 1370 | if (ret < 0) { | ||
| 1371 | mlog_errno(ret); | ||
| 1372 | goto out_commit; | ||
| 1373 | } | ||
| 1374 | } | ||
| 1375 | |||
| 1376 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) && | ||
| 1377 | (flag & OCFS2_INLINE_XATTR_FL)) { | ||
| 1378 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1379 | unsigned int xattrsize = osb->s_xattr_inline_size; | ||
| 1380 | |||
| 1381 | /* | ||
| 1382 | * Adjust extent record count or inline data size | ||
| 1383 | * to reserve space for extended attribute. | ||
| 1384 | */ | ||
| 1385 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 1386 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
| 1387 | le16_add_cpu(&idata->id_count, -xattrsize); | ||
| 1388 | } else if (!(ocfs2_inode_is_fast_symlink(inode))) { | ||
| 1389 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
| 1390 | le16_add_cpu(&el->l_count, -(xattrsize / | ||
| 1391 | sizeof(struct ocfs2_extent_rec))); | ||
| 1392 | } | ||
| 1393 | di->i_xattr_inline_size = cpu_to_le16(xattrsize); | ||
| 1394 | } | ||
| 1395 | /* Update xattr flag */ | ||
| 1396 | spin_lock(&oi->ip_lock); | ||
| 1397 | oi->ip_dyn_features |= flag; | ||
| 1398 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
| 1399 | spin_unlock(&oi->ip_lock); | ||
| 1400 | /* Update inode ctime */ | ||
| 1401 | inode->i_ctime = CURRENT_TIME; | ||
| 1402 | di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); | ||
| 1403 | di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); | ||
| 1404 | |||
| 1405 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
| 1406 | if (ret < 0) | ||
| 1407 | mlog_errno(ret); | ||
| 1408 | |||
| 1409 | out_commit: | ||
| 1410 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 1411 | |||
| 1412 | if (!ret && xi->value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
| 1413 | /* | ||
| 1414 | * Set value outside in B tree. | ||
| 1415 | * This is the second step for value size > INLINE_SIZE. | ||
| 1416 | */ | ||
| 1417 | size_t offs = le16_to_cpu(xs->here->xe_name_offset); | ||
| 1418 | ret = ocfs2_xattr_set_value_outside(inode, xi, xs, offs); | ||
| 1419 | if (ret < 0) { | ||
| 1420 | int ret2; | ||
| 1421 | |||
| 1422 | mlog_errno(ret); | ||
| 1423 | /* | ||
| 1424 | * If set value outside failed, we have to clean | ||
| 1425 | * the junk tree root we have already set in local. | ||
| 1426 | */ | ||
| 1427 | ret2 = ocfs2_xattr_cleanup(inode, xi, xs, offs); | ||
| 1428 | if (ret2 < 0) | ||
| 1429 | mlog_errno(ret2); | ||
| 1430 | } | ||
| 1431 | } | ||
| 1432 | out: | ||
| 1433 | return ret; | ||
| 1434 | |||
| 1435 | } | ||
| 1436 | |||
| 1437 | static int ocfs2_remove_value_outside(struct inode*inode, | ||
| 1438 | struct buffer_head *bh, | ||
| 1439 | struct ocfs2_xattr_header *header) | ||
| 1440 | { | ||
| 1441 | int ret = 0, i; | ||
| 1442 | |||
| 1443 | for (i = 0; i < le16_to_cpu(header->xh_count); i++) { | ||
| 1444 | struct ocfs2_xattr_entry *entry = &header->xh_entries[i]; | ||
| 1445 | |||
| 1446 | if (!ocfs2_xattr_is_local(entry)) { | ||
| 1447 | struct ocfs2_xattr_value_root *xv; | ||
| 1448 | void *val; | ||
| 1449 | |||
| 1450 | val = (void *)header + | ||
| 1451 | le16_to_cpu(entry->xe_name_offset); | ||
| 1452 | xv = (struct ocfs2_xattr_value_root *) | ||
| 1453 | (val + OCFS2_XATTR_SIZE(entry->xe_name_len)); | ||
| 1454 | ret = ocfs2_xattr_value_truncate(inode, bh, xv, 0); | ||
| 1455 | if (ret < 0) { | ||
| 1456 | mlog_errno(ret); | ||
| 1457 | return ret; | ||
| 1458 | } | ||
| 1459 | } | ||
| 1460 | } | ||
| 1461 | |||
| 1462 | return ret; | ||
| 1463 | } | ||
| 1464 | |||
| 1465 | static int ocfs2_xattr_ibody_remove(struct inode *inode, | ||
| 1466 | struct buffer_head *di_bh) | ||
| 1467 | { | ||
| 1468 | |||
| 1469 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1470 | struct ocfs2_xattr_header *header; | ||
| 1471 | int ret; | ||
| 1472 | |||
| 1473 | header = (struct ocfs2_xattr_header *) | ||
| 1474 | ((void *)di + inode->i_sb->s_blocksize - | ||
| 1475 | le16_to_cpu(di->i_xattr_inline_size)); | ||
| 1476 | |||
| 1477 | ret = ocfs2_remove_value_outside(inode, di_bh, header); | ||
| 1478 | |||
| 1479 | return ret; | ||
| 1480 | } | ||
| 1481 | |||
| 1482 | static int ocfs2_xattr_block_remove(struct inode *inode, | ||
| 1483 | struct buffer_head *blk_bh) | ||
| 1484 | { | ||
| 1485 | struct ocfs2_xattr_block *xb; | ||
| 1486 | int ret = 0; | ||
| 1487 | |||
| 1488 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
| 1489 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
| 1490 | struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header); | ||
| 1491 | ret = ocfs2_remove_value_outside(inode, blk_bh, header); | ||
| 1492 | } else | ||
| 1493 | ret = ocfs2_delete_xattr_index_block(inode, blk_bh); | ||
| 1494 | |||
| 1495 | return ret; | ||
| 1496 | } | ||
| 1497 | |||
| 1498 | static int ocfs2_xattr_free_block(struct inode *inode, | ||
| 1499 | u64 block) | ||
| 1500 | { | ||
| 1501 | struct inode *xb_alloc_inode; | ||
| 1502 | struct buffer_head *xb_alloc_bh = NULL; | ||
| 1503 | struct buffer_head *blk_bh = NULL; | ||
| 1504 | struct ocfs2_xattr_block *xb; | ||
| 1505 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1506 | handle_t *handle; | ||
| 1507 | int ret = 0; | ||
| 1508 | u64 blk, bg_blkno; | ||
| 1509 | u16 bit; | ||
| 1510 | |||
| 1511 | ret = ocfs2_read_block(inode, block, &blk_bh); | ||
| 1512 | if (ret < 0) { | ||
| 1513 | mlog_errno(ret); | ||
| 1514 | goto out; | ||
| 1515 | } | ||
| 1516 | |||
| 1517 | /*Verify the signature of xattr block*/ | ||
| 1518 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
| 1519 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
| 1520 | ret = -EFAULT; | ||
| 1521 | goto out; | ||
| 1522 | } | ||
| 1523 | |||
| 1524 | ret = ocfs2_xattr_block_remove(inode, blk_bh); | ||
| 1525 | if (ret < 0) { | ||
| 1526 | mlog_errno(ret); | ||
| 1527 | goto out; | ||
| 1528 | } | ||
| 1529 | |||
| 1530 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
| 1531 | blk = le64_to_cpu(xb->xb_blkno); | ||
| 1532 | bit = le16_to_cpu(xb->xb_suballoc_bit); | ||
| 1533 | bg_blkno = ocfs2_which_suballoc_group(blk, bit); | ||
| 1534 | |||
| 1535 | xb_alloc_inode = ocfs2_get_system_file_inode(osb, | ||
| 1536 | EXTENT_ALLOC_SYSTEM_INODE, | ||
| 1537 | le16_to_cpu(xb->xb_suballoc_slot)); | ||
| 1538 | if (!xb_alloc_inode) { | ||
| 1539 | ret = -ENOMEM; | ||
| 1540 | mlog_errno(ret); | ||
| 1541 | goto out; | ||
| 1542 | } | ||
| 1543 | mutex_lock(&xb_alloc_inode->i_mutex); | ||
| 1544 | |||
| 1545 | ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1); | ||
| 1546 | if (ret < 0) { | ||
| 1547 | mlog_errno(ret); | ||
| 1548 | goto out_mutex; | ||
| 1549 | } | ||
| 1550 | |||
| 1551 | handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE); | ||
| 1552 | if (IS_ERR(handle)) { | ||
| 1553 | ret = PTR_ERR(handle); | ||
| 1554 | mlog_errno(ret); | ||
| 1555 | goto out_unlock; | ||
| 1556 | } | ||
| 1557 | |||
| 1558 | ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh, | ||
| 1559 | bit, bg_blkno, 1); | ||
| 1560 | if (ret < 0) | ||
| 1561 | mlog_errno(ret); | ||
| 1562 | |||
| 1563 | ocfs2_commit_trans(osb, handle); | ||
| 1564 | out_unlock: | ||
| 1565 | ocfs2_inode_unlock(xb_alloc_inode, 1); | ||
| 1566 | brelse(xb_alloc_bh); | ||
| 1567 | out_mutex: | ||
| 1568 | mutex_unlock(&xb_alloc_inode->i_mutex); | ||
| 1569 | iput(xb_alloc_inode); | ||
| 1570 | out: | ||
| 1571 | brelse(blk_bh); | ||
| 1572 | return ret; | ||
| 1573 | } | ||
| 1574 | |||
| 1575 | /* | ||
| 1576 | * ocfs2_xattr_remove() | ||
| 1577 | * | ||
| 1578 | * Free extended attribute resources associated with this inode. | ||
| 1579 | */ | ||
| 1580 | int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh) | ||
| 1581 | { | ||
| 1582 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1583 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 1584 | handle_t *handle; | ||
| 1585 | int ret; | ||
| 1586 | |||
| 1587 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
| 1588 | return 0; | ||
| 1589 | |||
| 1590 | if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) | ||
| 1591 | return 0; | ||
| 1592 | |||
| 1593 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
| 1594 | ret = ocfs2_xattr_ibody_remove(inode, di_bh); | ||
| 1595 | if (ret < 0) { | ||
| 1596 | mlog_errno(ret); | ||
| 1597 | goto out; | ||
| 1598 | } | ||
| 1599 | } | ||
| 1600 | |||
| 1601 | if (di->i_xattr_loc) { | ||
| 1602 | ret = ocfs2_xattr_free_block(inode, | ||
| 1603 | le64_to_cpu(di->i_xattr_loc)); | ||
| 1604 | if (ret < 0) { | ||
| 1605 | mlog_errno(ret); | ||
| 1606 | goto out; | ||
| 1607 | } | ||
| 1608 | } | ||
| 1609 | |||
| 1610 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), | ||
| 1611 | OCFS2_INODE_UPDATE_CREDITS); | ||
| 1612 | if (IS_ERR(handle)) { | ||
| 1613 | ret = PTR_ERR(handle); | ||
| 1614 | mlog_errno(ret); | ||
| 1615 | goto out; | ||
| 1616 | } | ||
| 1617 | ret = ocfs2_journal_access(handle, inode, di_bh, | ||
| 1618 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1619 | if (ret) { | ||
| 1620 | mlog_errno(ret); | ||
| 1621 | goto out_commit; | ||
| 1622 | } | ||
| 1623 | |||
| 1624 | di->i_xattr_loc = 0; | ||
| 1625 | |||
| 1626 | spin_lock(&oi->ip_lock); | ||
| 1627 | oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL); | ||
| 1628 | di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features); | ||
| 1629 | spin_unlock(&oi->ip_lock); | ||
| 1630 | |||
| 1631 | ret = ocfs2_journal_dirty(handle, di_bh); | ||
| 1632 | if (ret < 0) | ||
| 1633 | mlog_errno(ret); | ||
| 1634 | out_commit: | ||
| 1635 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 1636 | out: | ||
| 1637 | return ret; | ||
| 1638 | } | ||
| 1639 | |||
| 1640 | static int ocfs2_xattr_has_space_inline(struct inode *inode, | ||
| 1641 | struct ocfs2_dinode *di) | ||
| 1642 | { | ||
| 1643 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1644 | unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size; | ||
| 1645 | int free; | ||
| 1646 | |||
| 1647 | if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE) | ||
| 1648 | return 0; | ||
| 1649 | |||
| 1650 | if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) { | ||
| 1651 | struct ocfs2_inline_data *idata = &di->id2.i_data; | ||
| 1652 | free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size); | ||
| 1653 | } else if (ocfs2_inode_is_fast_symlink(inode)) { | ||
| 1654 | free = ocfs2_fast_symlink_chars(inode->i_sb) - | ||
| 1655 | le64_to_cpu(di->i_size); | ||
| 1656 | } else { | ||
| 1657 | struct ocfs2_extent_list *el = &di->id2.i_list; | ||
| 1658 | free = (le16_to_cpu(el->l_count) - | ||
| 1659 | le16_to_cpu(el->l_next_free_rec)) * | ||
| 1660 | sizeof(struct ocfs2_extent_rec); | ||
| 1661 | } | ||
| 1662 | if (free >= xattrsize) | ||
| 1663 | return 1; | ||
| 1664 | |||
| 1665 | return 0; | ||
| 1666 | } | ||
| 1667 | |||
| 1668 | /* | ||
| 1669 | * ocfs2_xattr_ibody_find() | ||
| 1670 | * | ||
| 1671 | * Find extended attribute in inode block and | ||
| 1672 | * fill search info into struct ocfs2_xattr_search. | ||
| 1673 | */ | ||
| 1674 | static int ocfs2_xattr_ibody_find(struct inode *inode, | ||
| 1675 | int name_index, | ||
| 1676 | const char *name, | ||
| 1677 | struct ocfs2_xattr_search *xs) | ||
| 1678 | { | ||
| 1679 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1680 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 1681 | int ret; | ||
| 1682 | int has_space = 0; | ||
| 1683 | |||
| 1684 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
| 1685 | return 0; | ||
| 1686 | |||
| 1687 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
| 1688 | down_read(&oi->ip_alloc_sem); | ||
| 1689 | has_space = ocfs2_xattr_has_space_inline(inode, di); | ||
| 1690 | up_read(&oi->ip_alloc_sem); | ||
| 1691 | if (!has_space) | ||
| 1692 | return 0; | ||
| 1693 | } | ||
| 1694 | |||
| 1695 | xs->xattr_bh = xs->inode_bh; | ||
| 1696 | xs->end = (void *)di + inode->i_sb->s_blocksize; | ||
| 1697 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) | ||
| 1698 | xs->header = (struct ocfs2_xattr_header *) | ||
| 1699 | (xs->end - le16_to_cpu(di->i_xattr_inline_size)); | ||
| 1700 | else | ||
| 1701 | xs->header = (struct ocfs2_xattr_header *) | ||
| 1702 | (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size); | ||
| 1703 | xs->base = (void *)xs->header; | ||
| 1704 | xs->here = xs->header->xh_entries; | ||
| 1705 | |||
| 1706 | /* Find the named attribute. */ | ||
| 1707 | if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) { | ||
| 1708 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
| 1709 | if (ret && ret != -ENODATA) | ||
| 1710 | return ret; | ||
| 1711 | xs->not_found = ret; | ||
| 1712 | } | ||
| 1713 | |||
| 1714 | return 0; | ||
| 1715 | } | ||
| 1716 | |||
| 1717 | /* | ||
| 1718 | * ocfs2_xattr_ibody_set() | ||
| 1719 | * | ||
| 1720 | * Set, replace or remove an extended attribute into inode block. | ||
| 1721 | * | ||
| 1722 | */ | ||
| 1723 | static int ocfs2_xattr_ibody_set(struct inode *inode, | ||
| 1724 | struct ocfs2_xattr_info *xi, | ||
| 1725 | struct ocfs2_xattr_search *xs) | ||
| 1726 | { | ||
| 1727 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 1728 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 1729 | int ret; | ||
| 1730 | |||
| 1731 | if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) | ||
| 1732 | return -ENOSPC; | ||
| 1733 | |||
| 1734 | down_write(&oi->ip_alloc_sem); | ||
| 1735 | if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) { | ||
| 1736 | if (!ocfs2_xattr_has_space_inline(inode, di)) { | ||
| 1737 | ret = -ENOSPC; | ||
| 1738 | goto out; | ||
| 1739 | } | ||
| 1740 | } | ||
| 1741 | |||
| 1742 | ret = ocfs2_xattr_set_entry(inode, xi, xs, | ||
| 1743 | (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL)); | ||
| 1744 | out: | ||
| 1745 | up_write(&oi->ip_alloc_sem); | ||
| 1746 | |||
| 1747 | return ret; | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | /* | ||
| 1751 | * ocfs2_xattr_block_find() | ||
| 1752 | * | ||
| 1753 | * Find extended attribute in external block and | ||
| 1754 | * fill search info into struct ocfs2_xattr_search. | ||
| 1755 | */ | ||
| 1756 | static int ocfs2_xattr_block_find(struct inode *inode, | ||
| 1757 | int name_index, | ||
| 1758 | const char *name, | ||
| 1759 | struct ocfs2_xattr_search *xs) | ||
| 1760 | { | ||
| 1761 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 1762 | struct buffer_head *blk_bh = NULL; | ||
| 1763 | struct ocfs2_xattr_block *xb; | ||
| 1764 | int ret = 0; | ||
| 1765 | |||
| 1766 | if (!di->i_xattr_loc) | ||
| 1767 | return ret; | ||
| 1768 | |||
| 1769 | ret = ocfs2_read_block(inode, le64_to_cpu(di->i_xattr_loc), &blk_bh); | ||
| 1770 | if (ret < 0) { | ||
| 1771 | mlog_errno(ret); | ||
| 1772 | return ret; | ||
| 1773 | } | ||
| 1774 | /*Verify the signature of xattr block*/ | ||
| 1775 | if (memcmp((void *)blk_bh->b_data, OCFS2_XATTR_BLOCK_SIGNATURE, | ||
| 1776 | strlen(OCFS2_XATTR_BLOCK_SIGNATURE))) { | ||
| 1777 | ret = -EFAULT; | ||
| 1778 | goto cleanup; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | xs->xattr_bh = blk_bh; | ||
| 1782 | xb = (struct ocfs2_xattr_block *)blk_bh->b_data; | ||
| 1783 | |||
| 1784 | if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
| 1785 | xs->header = &xb->xb_attrs.xb_header; | ||
| 1786 | xs->base = (void *)xs->header; | ||
| 1787 | xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size; | ||
| 1788 | xs->here = xs->header->xh_entries; | ||
| 1789 | |||
| 1790 | ret = ocfs2_xattr_find_entry(name_index, name, xs); | ||
| 1791 | } else | ||
| 1792 | ret = ocfs2_xattr_index_block_find(inode, blk_bh, | ||
| 1793 | name_index, | ||
| 1794 | name, xs); | ||
| 1795 | |||
| 1796 | if (ret && ret != -ENODATA) { | ||
| 1797 | xs->xattr_bh = NULL; | ||
| 1798 | goto cleanup; | ||
| 1799 | } | ||
| 1800 | xs->not_found = ret; | ||
| 1801 | return 0; | ||
| 1802 | cleanup: | ||
| 1803 | brelse(blk_bh); | ||
| 1804 | |||
| 1805 | return ret; | ||
| 1806 | } | ||
| 1807 | |||
| 1808 | /* | ||
| 1809 | * When all the xattrs are deleted from index btree, the ocfs2_xattr_tree | ||
| 1810 | * will be erased and ocfs2_xattr_block will have its ocfs2_xattr_header | ||
| 1811 | * re-initialized. | ||
| 1812 | */ | ||
| 1813 | static int ocfs2_restore_xattr_block(struct inode *inode, | ||
| 1814 | struct ocfs2_xattr_search *xs) | ||
| 1815 | { | ||
| 1816 | int ret; | ||
| 1817 | handle_t *handle; | ||
| 1818 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1819 | struct ocfs2_xattr_block *xb = | ||
| 1820 | (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
| 1821 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
| 1822 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
| 1823 | |||
| 1824 | BUG_ON(!(xb_flags & OCFS2_XATTR_INDEXED) || | ||
| 1825 | le16_to_cpu(el->l_next_free_rec) != 0); | ||
| 1826 | |||
| 1827 | handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_UPDATE_CREDITS); | ||
| 1828 | if (IS_ERR(handle)) { | ||
| 1829 | ret = PTR_ERR(handle); | ||
| 1830 | handle = NULL; | ||
| 1831 | goto out; | ||
| 1832 | } | ||
| 1833 | |||
| 1834 | ret = ocfs2_journal_access(handle, inode, xs->xattr_bh, | ||
| 1835 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 1836 | if (ret < 0) { | ||
| 1837 | mlog_errno(ret); | ||
| 1838 | goto out_commit; | ||
| 1839 | } | ||
| 1840 | |||
| 1841 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
| 1842 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
| 1843 | |||
| 1844 | xb->xb_flags = cpu_to_le16(xb_flags & ~OCFS2_XATTR_INDEXED); | ||
| 1845 | |||
| 1846 | ocfs2_journal_dirty(handle, xs->xattr_bh); | ||
| 1847 | |||
| 1848 | out_commit: | ||
| 1849 | ocfs2_commit_trans(osb, handle); | ||
| 1850 | out: | ||
| 1851 | return ret; | ||
| 1852 | } | ||
| 1853 | |||
| 1854 | /* | ||
| 1855 | * ocfs2_xattr_block_set() | ||
| 1856 | * | ||
| 1857 | * Set, replace or remove an extended attribute into external block. | ||
| 1858 | * | ||
| 1859 | */ | ||
| 1860 | static int ocfs2_xattr_block_set(struct inode *inode, | ||
| 1861 | struct ocfs2_xattr_info *xi, | ||
| 1862 | struct ocfs2_xattr_search *xs) | ||
| 1863 | { | ||
| 1864 | struct buffer_head *new_bh = NULL; | ||
| 1865 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 1866 | struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data; | ||
| 1867 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 1868 | handle_t *handle = NULL; | ||
| 1869 | struct ocfs2_xattr_block *xblk = NULL; | ||
| 1870 | u16 suballoc_bit_start; | ||
| 1871 | u32 num_got; | ||
| 1872 | u64 first_blkno; | ||
| 1873 | int ret; | ||
| 1874 | |||
| 1875 | if (!xs->xattr_bh) { | ||
| 1876 | /* | ||
| 1877 | * Alloc one external block for extended attribute | ||
| 1878 | * outside of inode. | ||
| 1879 | */ | ||
| 1880 | ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac); | ||
| 1881 | if (ret < 0) { | ||
| 1882 | mlog_errno(ret); | ||
| 1883 | goto out; | ||
| 1884 | } | ||
| 1885 | handle = ocfs2_start_trans(osb, | ||
| 1886 | OCFS2_XATTR_BLOCK_CREATE_CREDITS); | ||
| 1887 | if (IS_ERR(handle)) { | ||
| 1888 | ret = PTR_ERR(handle); | ||
| 1889 | mlog_errno(ret); | ||
| 1890 | goto out; | ||
| 1891 | } | ||
| 1892 | ret = ocfs2_journal_access(handle, inode, xs->inode_bh, | ||
| 1893 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 1894 | if (ret < 0) { | ||
| 1895 | mlog_errno(ret); | ||
| 1896 | goto out_commit; | ||
| 1897 | } | ||
| 1898 | |||
| 1899 | ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1, | ||
| 1900 | &suballoc_bit_start, &num_got, | ||
| 1901 | &first_blkno); | ||
| 1902 | if (ret < 0) { | ||
| 1903 | mlog_errno(ret); | ||
| 1904 | goto out_commit; | ||
| 1905 | } | ||
| 1906 | |||
| 1907 | new_bh = sb_getblk(inode->i_sb, first_blkno); | ||
| 1908 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
| 1909 | |||
| 1910 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
| 1911 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 1912 | if (ret < 0) { | ||
| 1913 | mlog_errno(ret); | ||
| 1914 | goto out_commit; | ||
| 1915 | } | ||
| 1916 | |||
| 1917 | /* Initialize ocfs2_xattr_block */ | ||
| 1918 | xs->xattr_bh = new_bh; | ||
| 1919 | xblk = (struct ocfs2_xattr_block *)new_bh->b_data; | ||
| 1920 | memset(xblk, 0, inode->i_sb->s_blocksize); | ||
| 1921 | strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE); | ||
| 1922 | xblk->xb_suballoc_slot = cpu_to_le16(osb->slot_num); | ||
| 1923 | xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start); | ||
| 1924 | xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation); | ||
| 1925 | xblk->xb_blkno = cpu_to_le64(first_blkno); | ||
| 1926 | |||
| 1927 | xs->header = &xblk->xb_attrs.xb_header; | ||
| 1928 | xs->base = (void *)xs->header; | ||
| 1929 | xs->end = (void *)xblk + inode->i_sb->s_blocksize; | ||
| 1930 | xs->here = xs->header->xh_entries; | ||
| 1931 | |||
| 1932 | |||
| 1933 | ret = ocfs2_journal_dirty(handle, new_bh); | ||
| 1934 | if (ret < 0) { | ||
| 1935 | mlog_errno(ret); | ||
| 1936 | goto out_commit; | ||
| 1937 | } | ||
| 1938 | di->i_xattr_loc = cpu_to_le64(first_blkno); | ||
| 1939 | ret = ocfs2_journal_dirty(handle, xs->inode_bh); | ||
| 1940 | if (ret < 0) | ||
| 1941 | mlog_errno(ret); | ||
| 1942 | out_commit: | ||
| 1943 | ocfs2_commit_trans(osb, handle); | ||
| 1944 | out: | ||
| 1945 | if (meta_ac) | ||
| 1946 | ocfs2_free_alloc_context(meta_ac); | ||
| 1947 | if (ret < 0) | ||
| 1948 | return ret; | ||
| 1949 | } else | ||
| 1950 | xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data; | ||
| 1951 | |||
| 1952 | if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) { | ||
| 1953 | /* Set extended attribute into external block */ | ||
| 1954 | ret = ocfs2_xattr_set_entry(inode, xi, xs, OCFS2_HAS_XATTR_FL); | ||
| 1955 | if (!ret || ret != -ENOSPC) | ||
| 1956 | goto end; | ||
| 1957 | |||
| 1958 | ret = ocfs2_xattr_create_index_block(inode, xs); | ||
| 1959 | if (ret) | ||
| 1960 | goto end; | ||
| 1961 | } | ||
| 1962 | |||
| 1963 | ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs); | ||
| 1964 | if (!ret && xblk->xb_attrs.xb_root.xt_list.l_next_free_rec == 0) | ||
| 1965 | ret = ocfs2_restore_xattr_block(inode, xs); | ||
| 1966 | |||
| 1967 | end: | ||
| 1968 | |||
| 1969 | return ret; | ||
| 1970 | } | ||
| 1971 | |||
| 1972 | /* | ||
| 1973 | * ocfs2_xattr_set() | ||
| 1974 | * | ||
| 1975 | * Set, replace or remove an extended attribute for this inode. | ||
| 1976 | * value is NULL to remove an existing extended attribute, else either | ||
| 1977 | * create or replace an extended attribute. | ||
| 1978 | */ | ||
| 1979 | int ocfs2_xattr_set(struct inode *inode, | ||
| 1980 | int name_index, | ||
| 1981 | const char *name, | ||
| 1982 | const void *value, | ||
| 1983 | size_t value_len, | ||
| 1984 | int flags) | ||
| 1985 | { | ||
| 1986 | struct buffer_head *di_bh = NULL; | ||
| 1987 | struct ocfs2_dinode *di; | ||
| 1988 | int ret; | ||
| 1989 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 1990 | |||
| 1991 | struct ocfs2_xattr_info xi = { | ||
| 1992 | .name_index = name_index, | ||
| 1993 | .name = name, | ||
| 1994 | .value = value, | ||
| 1995 | .value_len = value_len, | ||
| 1996 | }; | ||
| 1997 | |||
| 1998 | struct ocfs2_xattr_search xis = { | ||
| 1999 | .not_found = -ENODATA, | ||
| 2000 | }; | ||
| 2001 | |||
| 2002 | struct ocfs2_xattr_search xbs = { | ||
| 2003 | .not_found = -ENODATA, | ||
| 2004 | }; | ||
| 2005 | |||
| 2006 | if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb))) | ||
| 2007 | return -EOPNOTSUPP; | ||
| 2008 | |||
| 2009 | ret = ocfs2_inode_lock(inode, &di_bh, 1); | ||
| 2010 | if (ret < 0) { | ||
| 2011 | mlog_errno(ret); | ||
| 2012 | return ret; | ||
| 2013 | } | ||
| 2014 | xis.inode_bh = xbs.inode_bh = di_bh; | ||
| 2015 | di = (struct ocfs2_dinode *)di_bh->b_data; | ||
| 2016 | |||
| 2017 | down_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
| 2018 | /* | ||
| 2019 | * Scan inode and external block to find the same name | ||
| 2020 | * extended attribute and collect search infomation. | ||
| 2021 | */ | ||
| 2022 | ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis); | ||
| 2023 | if (ret) | ||
| 2024 | goto cleanup; | ||
| 2025 | if (xis.not_found) { | ||
| 2026 | ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs); | ||
| 2027 | if (ret) | ||
| 2028 | goto cleanup; | ||
| 2029 | } | ||
| 2030 | |||
| 2031 | if (xis.not_found && xbs.not_found) { | ||
| 2032 | ret = -ENODATA; | ||
| 2033 | if (flags & XATTR_REPLACE) | ||
| 2034 | goto cleanup; | ||
| 2035 | ret = 0; | ||
| 2036 | if (!value) | ||
| 2037 | goto cleanup; | ||
| 2038 | } else { | ||
| 2039 | ret = -EEXIST; | ||
| 2040 | if (flags & XATTR_CREATE) | ||
| 2041 | goto cleanup; | ||
| 2042 | } | ||
| 2043 | |||
| 2044 | if (!value) { | ||
| 2045 | /* Remove existing extended attribute */ | ||
| 2046 | if (!xis.not_found) | ||
| 2047 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
| 2048 | else if (!xbs.not_found) | ||
| 2049 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
| 2050 | } else { | ||
| 2051 | /* We always try to set extended attribute into inode first*/ | ||
| 2052 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
| 2053 | if (!ret && !xbs.not_found) { | ||
| 2054 | /* | ||
| 2055 | * If succeed and that extended attribute existing in | ||
| 2056 | * external block, then we will remove it. | ||
| 2057 | */ | ||
| 2058 | xi.value = NULL; | ||
| 2059 | xi.value_len = 0; | ||
| 2060 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
| 2061 | } else if (ret == -ENOSPC) { | ||
| 2062 | if (di->i_xattr_loc && !xbs.xattr_bh) { | ||
| 2063 | ret = ocfs2_xattr_block_find(inode, name_index, | ||
| 2064 | name, &xbs); | ||
| 2065 | if (ret) | ||
| 2066 | goto cleanup; | ||
| 2067 | } | ||
| 2068 | /* | ||
| 2069 | * If no space in inode, we will set extended attribute | ||
| 2070 | * into external block. | ||
| 2071 | */ | ||
| 2072 | ret = ocfs2_xattr_block_set(inode, &xi, &xbs); | ||
| 2073 | if (ret) | ||
| 2074 | goto cleanup; | ||
| 2075 | if (!xis.not_found) { | ||
| 2076 | /* | ||
| 2077 | * If succeed and that extended attribute | ||
| 2078 | * existing in inode, we will remove it. | ||
| 2079 | */ | ||
| 2080 | xi.value = NULL; | ||
| 2081 | xi.value_len = 0; | ||
| 2082 | ret = ocfs2_xattr_ibody_set(inode, &xi, &xis); | ||
| 2083 | } | ||
| 2084 | } | ||
| 2085 | } | ||
| 2086 | cleanup: | ||
| 2087 | up_write(&OCFS2_I(inode)->ip_xattr_sem); | ||
| 2088 | ocfs2_inode_unlock(inode, 1); | ||
| 2089 | brelse(di_bh); | ||
| 2090 | brelse(xbs.xattr_bh); | ||
| 2091 | for (i = 0; i < blk_per_bucket; i++) | ||
| 2092 | brelse(xbs.bucket.bhs[i]); | ||
| 2093 | |||
| 2094 | return ret; | ||
| 2095 | } | ||
| 2096 | |||
| 2097 | /* | ||
| 2098 | * Find the xattr extent rec which may contains name_hash. | ||
| 2099 | * e_cpos will be the first name hash of the xattr rec. | ||
| 2100 | * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list. | ||
| 2101 | */ | ||
| 2102 | static int ocfs2_xattr_get_rec(struct inode *inode, | ||
| 2103 | u32 name_hash, | ||
| 2104 | u64 *p_blkno, | ||
| 2105 | u32 *e_cpos, | ||
| 2106 | u32 *num_clusters, | ||
| 2107 | struct ocfs2_extent_list *el) | ||
| 2108 | { | ||
| 2109 | int ret = 0, i; | ||
| 2110 | struct buffer_head *eb_bh = NULL; | ||
| 2111 | struct ocfs2_extent_block *eb; | ||
| 2112 | struct ocfs2_extent_rec *rec = NULL; | ||
| 2113 | u64 e_blkno = 0; | ||
| 2114 | |||
| 2115 | if (el->l_tree_depth) { | ||
| 2116 | ret = ocfs2_find_leaf(inode, el, name_hash, &eb_bh); | ||
| 2117 | if (ret) { | ||
| 2118 | mlog_errno(ret); | ||
| 2119 | goto out; | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | eb = (struct ocfs2_extent_block *) eb_bh->b_data; | ||
| 2123 | el = &eb->h_list; | ||
| 2124 | |||
| 2125 | if (el->l_tree_depth) { | ||
| 2126 | ocfs2_error(inode->i_sb, | ||
| 2127 | "Inode %lu has non zero tree depth in " | ||
| 2128 | "xattr tree block %llu\n", inode->i_ino, | ||
| 2129 | (unsigned long long)eb_bh->b_blocknr); | ||
| 2130 | ret = -EROFS; | ||
| 2131 | goto out; | ||
| 2132 | } | ||
| 2133 | } | ||
| 2134 | |||
| 2135 | for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) { | ||
| 2136 | rec = &el->l_recs[i]; | ||
| 2137 | |||
| 2138 | if (le32_to_cpu(rec->e_cpos) <= name_hash) { | ||
| 2139 | e_blkno = le64_to_cpu(rec->e_blkno); | ||
| 2140 | break; | ||
| 2141 | } | ||
| 2142 | } | ||
| 2143 | |||
| 2144 | if (!e_blkno) { | ||
| 2145 | ocfs2_error(inode->i_sb, "Inode %lu has bad extent " | ||
| 2146 | "record (%u, %u, 0) in xattr", inode->i_ino, | ||
| 2147 | le32_to_cpu(rec->e_cpos), | ||
| 2148 | ocfs2_rec_clusters(el, rec)); | ||
| 2149 | ret = -EROFS; | ||
| 2150 | goto out; | ||
| 2151 | } | ||
| 2152 | |||
| 2153 | *p_blkno = le64_to_cpu(rec->e_blkno); | ||
| 2154 | *num_clusters = le16_to_cpu(rec->e_leaf_clusters); | ||
| 2155 | if (e_cpos) | ||
| 2156 | *e_cpos = le32_to_cpu(rec->e_cpos); | ||
| 2157 | out: | ||
| 2158 | brelse(eb_bh); | ||
| 2159 | return ret; | ||
| 2160 | } | ||
| 2161 | |||
| 2162 | typedef int (xattr_bucket_func)(struct inode *inode, | ||
| 2163 | struct ocfs2_xattr_bucket *bucket, | ||
| 2164 | void *para); | ||
| 2165 | |||
| 2166 | static int ocfs2_find_xe_in_bucket(struct inode *inode, | ||
| 2167 | struct buffer_head *header_bh, | ||
| 2168 | int name_index, | ||
| 2169 | const char *name, | ||
| 2170 | u32 name_hash, | ||
| 2171 | u16 *xe_index, | ||
| 2172 | int *found) | ||
| 2173 | { | ||
| 2174 | int i, ret = 0, cmp = 1, block_off, new_offset; | ||
| 2175 | struct ocfs2_xattr_header *xh = | ||
| 2176 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
| 2177 | size_t name_len = strlen(name); | ||
| 2178 | struct ocfs2_xattr_entry *xe = NULL; | ||
| 2179 | struct buffer_head *name_bh = NULL; | ||
| 2180 | char *xe_name; | ||
| 2181 | |||
| 2182 | /* | ||
| 2183 | * We don't use binary search in the bucket because there | ||
| 2184 | * may be multiple entries with the same name hash. | ||
| 2185 | */ | ||
| 2186 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
| 2187 | xe = &xh->xh_entries[i]; | ||
| 2188 | |||
| 2189 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) | ||
| 2190 | continue; | ||
| 2191 | else if (name_hash < le32_to_cpu(xe->xe_name_hash)) | ||
| 2192 | break; | ||
| 2193 | |||
| 2194 | cmp = name_index - ocfs2_xattr_get_type(xe); | ||
| 2195 | if (!cmp) | ||
| 2196 | cmp = name_len - xe->xe_name_len; | ||
| 2197 | if (cmp) | ||
| 2198 | continue; | ||
| 2199 | |||
| 2200 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
| 2201 | xh, | ||
| 2202 | i, | ||
| 2203 | &block_off, | ||
| 2204 | &new_offset); | ||
| 2205 | if (ret) { | ||
| 2206 | mlog_errno(ret); | ||
| 2207 | break; | ||
| 2208 | } | ||
| 2209 | |||
| 2210 | ret = ocfs2_read_block(inode, header_bh->b_blocknr + block_off, | ||
| 2211 | &name_bh); | ||
| 2212 | if (ret) { | ||
| 2213 | mlog_errno(ret); | ||
| 2214 | break; | ||
| 2215 | } | ||
| 2216 | xe_name = name_bh->b_data + new_offset; | ||
| 2217 | |||
| 2218 | cmp = memcmp(name, xe_name, name_len); | ||
| 2219 | brelse(name_bh); | ||
| 2220 | name_bh = NULL; | ||
| 2221 | |||
| 2222 | if (cmp == 0) { | ||
| 2223 | *xe_index = i; | ||
| 2224 | *found = 1; | ||
| 2225 | ret = 0; | ||
| 2226 | break; | ||
| 2227 | } | ||
| 2228 | } | ||
| 2229 | |||
| 2230 | return ret; | ||
| 2231 | } | ||
| 2232 | |||
| 2233 | /* | ||
| 2234 | * Find the specified xattr entry in a series of buckets. | ||
| 2235 | * This series start from p_blkno and last for num_clusters. | ||
| 2236 | * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains | ||
| 2237 | * the num of the valid buckets. | ||
| 2238 | * | ||
| 2239 | * Return the buffer_head this xattr should reside in. And if the xattr's | ||
| 2240 | * hash is in the gap of 2 buckets, return the lower bucket. | ||
| 2241 | */ | ||
| 2242 | static int ocfs2_xattr_bucket_find(struct inode *inode, | ||
| 2243 | int name_index, | ||
| 2244 | const char *name, | ||
| 2245 | u32 name_hash, | ||
| 2246 | u64 p_blkno, | ||
| 2247 | u32 first_hash, | ||
| 2248 | u32 num_clusters, | ||
| 2249 | struct ocfs2_xattr_search *xs) | ||
| 2250 | { | ||
| 2251 | int ret, found = 0; | ||
| 2252 | struct buffer_head *bh = NULL; | ||
| 2253 | struct buffer_head *lower_bh = NULL; | ||
| 2254 | struct ocfs2_xattr_header *xh = NULL; | ||
| 2255 | struct ocfs2_xattr_entry *xe = NULL; | ||
| 2256 | u16 index = 0; | ||
| 2257 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 2258 | int low_bucket = 0, bucket, high_bucket; | ||
| 2259 | u32 last_hash; | ||
| 2260 | u64 blkno; | ||
| 2261 | |||
| 2262 | ret = ocfs2_read_block(inode, p_blkno, &bh); | ||
| 2263 | if (ret) { | ||
| 2264 | mlog_errno(ret); | ||
| 2265 | goto out; | ||
| 2266 | } | ||
| 2267 | |||
| 2268 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
| 2269 | high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1; | ||
| 2270 | |||
| 2271 | while (low_bucket <= high_bucket) { | ||
| 2272 | brelse(bh); | ||
| 2273 | bh = NULL; | ||
| 2274 | bucket = (low_bucket + high_bucket) / 2; | ||
| 2275 | |||
| 2276 | blkno = p_blkno + bucket * blk_per_bucket; | ||
| 2277 | |||
| 2278 | ret = ocfs2_read_block(inode, blkno, &bh); | ||
| 2279 | if (ret) { | ||
| 2280 | mlog_errno(ret); | ||
| 2281 | goto out; | ||
| 2282 | } | ||
| 2283 | |||
| 2284 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
| 2285 | xe = &xh->xh_entries[0]; | ||
| 2286 | if (name_hash < le32_to_cpu(xe->xe_name_hash)) { | ||
| 2287 | high_bucket = bucket - 1; | ||
| 2288 | continue; | ||
| 2289 | } | ||
| 2290 | |||
| 2291 | /* | ||
| 2292 | * Check whether the hash of the last entry in our | ||
| 2293 | * bucket is larger than the search one. for an empty | ||
| 2294 | * bucket, the last one is also the first one. | ||
| 2295 | */ | ||
| 2296 | if (xh->xh_count) | ||
| 2297 | xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; | ||
| 2298 | |||
| 2299 | last_hash = le32_to_cpu(xe->xe_name_hash); | ||
| 2300 | |||
| 2301 | /* record lower_bh which may be the insert place. */ | ||
| 2302 | brelse(lower_bh); | ||
| 2303 | lower_bh = bh; | ||
| 2304 | bh = NULL; | ||
| 2305 | |||
| 2306 | if (name_hash > le32_to_cpu(xe->xe_name_hash)) { | ||
| 2307 | low_bucket = bucket + 1; | ||
| 2308 | continue; | ||
| 2309 | } | ||
| 2310 | |||
| 2311 | /* the searched xattr should reside in this bucket if exists. */ | ||
| 2312 | ret = ocfs2_find_xe_in_bucket(inode, lower_bh, | ||
| 2313 | name_index, name, name_hash, | ||
| 2314 | &index, &found); | ||
| 2315 | if (ret) { | ||
| 2316 | mlog_errno(ret); | ||
| 2317 | goto out; | ||
| 2318 | } | ||
| 2319 | break; | ||
| 2320 | } | ||
| 2321 | |||
| 2322 | /* | ||
| 2323 | * Record the bucket we have found. | ||
| 2324 | * When the xattr's hash value is in the gap of 2 buckets, we will | ||
| 2325 | * always set it to the previous bucket. | ||
| 2326 | */ | ||
| 2327 | if (!lower_bh) { | ||
| 2328 | /* | ||
| 2329 | * We can't find any bucket whose first name_hash is less | ||
| 2330 | * than the find name_hash. | ||
| 2331 | */ | ||
| 2332 | BUG_ON(bh->b_blocknr != p_blkno); | ||
| 2333 | lower_bh = bh; | ||
| 2334 | bh = NULL; | ||
| 2335 | } | ||
| 2336 | xs->bucket.bhs[0] = lower_bh; | ||
| 2337 | xs->bucket.xh = (struct ocfs2_xattr_header *) | ||
| 2338 | xs->bucket.bhs[0]->b_data; | ||
| 2339 | lower_bh = NULL; | ||
| 2340 | |||
| 2341 | xs->header = xs->bucket.xh; | ||
| 2342 | xs->base = xs->bucket.bhs[0]->b_data; | ||
| 2343 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
| 2344 | |||
| 2345 | if (found) { | ||
| 2346 | /* | ||
| 2347 | * If we have found the xattr enty, read all the blocks in | ||
| 2348 | * this bucket. | ||
| 2349 | */ | ||
| 2350 | ret = ocfs2_read_blocks(inode, xs->bucket.bhs[0]->b_blocknr + 1, | ||
| 2351 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
| 2352 | 0); | ||
| 2353 | if (ret) { | ||
| 2354 | mlog_errno(ret); | ||
| 2355 | goto out; | ||
| 2356 | } | ||
| 2357 | |||
| 2358 | xs->here = &xs->header->xh_entries[index]; | ||
| 2359 | mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name, | ||
| 2360 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, index); | ||
| 2361 | } else | ||
| 2362 | ret = -ENODATA; | ||
| 2363 | |||
| 2364 | out: | ||
| 2365 | brelse(bh); | ||
| 2366 | brelse(lower_bh); | ||
| 2367 | return ret; | ||
| 2368 | } | ||
| 2369 | |||
| 2370 | static int ocfs2_xattr_index_block_find(struct inode *inode, | ||
| 2371 | struct buffer_head *root_bh, | ||
| 2372 | int name_index, | ||
| 2373 | const char *name, | ||
| 2374 | struct ocfs2_xattr_search *xs) | ||
| 2375 | { | ||
| 2376 | int ret; | ||
| 2377 | struct ocfs2_xattr_block *xb = | ||
| 2378 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
| 2379 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
| 2380 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
| 2381 | u64 p_blkno = 0; | ||
| 2382 | u32 first_hash, num_clusters = 0; | ||
| 2383 | u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name)); | ||
| 2384 | |||
| 2385 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
| 2386 | return -ENODATA; | ||
| 2387 | |||
| 2388 | mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n", | ||
| 2389 | name, name_hash, name_index); | ||
| 2390 | |||
| 2391 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash, | ||
| 2392 | &num_clusters, el); | ||
| 2393 | if (ret) { | ||
| 2394 | mlog_errno(ret); | ||
| 2395 | goto out; | ||
| 2396 | } | ||
| 2397 | |||
| 2398 | BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash); | ||
| 2399 | |||
| 2400 | mlog(0, "find xattr extent rec %u clusters from %llu, the first hash " | ||
| 2401 | "in the rec is %u\n", num_clusters, p_blkno, first_hash); | ||
| 2402 | |||
| 2403 | ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash, | ||
| 2404 | p_blkno, first_hash, num_clusters, xs); | ||
| 2405 | |||
| 2406 | out: | ||
| 2407 | return ret; | ||
| 2408 | } | ||
| 2409 | |||
| 2410 | static int ocfs2_iterate_xattr_buckets(struct inode *inode, | ||
| 2411 | u64 blkno, | ||
| 2412 | u32 clusters, | ||
| 2413 | xattr_bucket_func *func, | ||
| 2414 | void *para) | ||
| 2415 | { | ||
| 2416 | int i, j, ret = 0; | ||
| 2417 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 2418 | u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)); | ||
| 2419 | u32 num_buckets = clusters * bpc; | ||
| 2420 | struct ocfs2_xattr_bucket bucket; | ||
| 2421 | |||
| 2422 | memset(&bucket, 0, sizeof(bucket)); | ||
| 2423 | |||
| 2424 | mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n", | ||
| 2425 | clusters, blkno); | ||
| 2426 | |||
| 2427 | for (i = 0; i < num_buckets; i++, blkno += blk_per_bucket) { | ||
| 2428 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, | ||
| 2429 | bucket.bhs, 0); | ||
| 2430 | if (ret) { | ||
| 2431 | mlog_errno(ret); | ||
| 2432 | goto out; | ||
| 2433 | } | ||
| 2434 | |||
| 2435 | bucket.xh = (struct ocfs2_xattr_header *)bucket.bhs[0]->b_data; | ||
| 2436 | /* | ||
| 2437 | * The real bucket num in this series of blocks is stored | ||
| 2438 | * in the 1st bucket. | ||
| 2439 | */ | ||
| 2440 | if (i == 0) | ||
| 2441 | num_buckets = le16_to_cpu(bucket.xh->xh_num_buckets); | ||
| 2442 | |||
| 2443 | mlog(0, "iterating xattr bucket %llu, first hash %u\n", blkno, | ||
| 2444 | le32_to_cpu(bucket.xh->xh_entries[0].xe_name_hash)); | ||
| 2445 | if (func) { | ||
| 2446 | ret = func(inode, &bucket, para); | ||
| 2447 | if (ret) { | ||
| 2448 | mlog_errno(ret); | ||
| 2449 | break; | ||
| 2450 | } | ||
| 2451 | } | ||
| 2452 | |||
| 2453 | for (j = 0; j < blk_per_bucket; j++) | ||
| 2454 | brelse(bucket.bhs[j]); | ||
| 2455 | memset(&bucket, 0, sizeof(bucket)); | ||
| 2456 | } | ||
| 2457 | |||
| 2458 | out: | ||
| 2459 | for (j = 0; j < blk_per_bucket; j++) | ||
| 2460 | brelse(bucket.bhs[j]); | ||
| 2461 | |||
| 2462 | return ret; | ||
| 2463 | } | ||
| 2464 | |||
| 2465 | struct ocfs2_xattr_tree_list { | ||
| 2466 | char *buffer; | ||
| 2467 | size_t buffer_size; | ||
| 2468 | size_t result; | ||
| 2469 | }; | ||
| 2470 | |||
| 2471 | static int ocfs2_xattr_bucket_get_name_value(struct inode *inode, | ||
| 2472 | struct ocfs2_xattr_header *xh, | ||
| 2473 | int index, | ||
| 2474 | int *block_off, | ||
| 2475 | int *new_offset) | ||
| 2476 | { | ||
| 2477 | u16 name_offset; | ||
| 2478 | |||
| 2479 | if (index < 0 || index >= le16_to_cpu(xh->xh_count)) | ||
| 2480 | return -EINVAL; | ||
| 2481 | |||
| 2482 | name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset); | ||
| 2483 | |||
| 2484 | *block_off = name_offset >> inode->i_sb->s_blocksize_bits; | ||
| 2485 | *new_offset = name_offset % inode->i_sb->s_blocksize; | ||
| 2486 | |||
| 2487 | return 0; | ||
| 2488 | } | ||
| 2489 | |||
| 2490 | static int ocfs2_list_xattr_bucket(struct inode *inode, | ||
| 2491 | struct ocfs2_xattr_bucket *bucket, | ||
| 2492 | void *para) | ||
| 2493 | { | ||
| 2494 | int ret = 0, type; | ||
| 2495 | struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para; | ||
| 2496 | int i, block_off, new_offset; | ||
| 2497 | const char *prefix, *name; | ||
| 2498 | |||
| 2499 | for (i = 0 ; i < le16_to_cpu(bucket->xh->xh_count); i++) { | ||
| 2500 | struct ocfs2_xattr_entry *entry = &bucket->xh->xh_entries[i]; | ||
| 2501 | type = ocfs2_xattr_get_type(entry); | ||
| 2502 | prefix = ocfs2_xattr_prefix(type); | ||
| 2503 | |||
| 2504 | if (prefix) { | ||
| 2505 | ret = ocfs2_xattr_bucket_get_name_value(inode, | ||
| 2506 | bucket->xh, | ||
| 2507 | i, | ||
| 2508 | &block_off, | ||
| 2509 | &new_offset); | ||
| 2510 | if (ret) | ||
| 2511 | break; | ||
| 2512 | |||
| 2513 | name = (const char *)bucket->bhs[block_off]->b_data + | ||
| 2514 | new_offset; | ||
| 2515 | ret = ocfs2_xattr_list_entry(xl->buffer, | ||
| 2516 | xl->buffer_size, | ||
| 2517 | &xl->result, | ||
| 2518 | prefix, name, | ||
| 2519 | entry->xe_name_len); | ||
| 2520 | if (ret) | ||
| 2521 | break; | ||
| 2522 | } | ||
| 2523 | } | ||
| 2524 | |||
| 2525 | return ret; | ||
| 2526 | } | ||
| 2527 | |||
| 2528 | static int ocfs2_xattr_tree_list_index_block(struct inode *inode, | ||
| 2529 | struct ocfs2_xattr_tree_root *xt, | ||
| 2530 | char *buffer, | ||
| 2531 | size_t buffer_size) | ||
| 2532 | { | ||
| 2533 | struct ocfs2_extent_list *el = &xt->xt_list; | ||
| 2534 | int ret = 0; | ||
| 2535 | u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0; | ||
| 2536 | u64 p_blkno = 0; | ||
| 2537 | struct ocfs2_xattr_tree_list xl = { | ||
| 2538 | .buffer = buffer, | ||
| 2539 | .buffer_size = buffer_size, | ||
| 2540 | .result = 0, | ||
| 2541 | }; | ||
| 2542 | |||
| 2543 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
| 2544 | return 0; | ||
| 2545 | |||
| 2546 | while (name_hash > 0) { | ||
| 2547 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
| 2548 | &e_cpos, &num_clusters, el); | ||
| 2549 | if (ret) { | ||
| 2550 | mlog_errno(ret); | ||
| 2551 | goto out; | ||
| 2552 | } | ||
| 2553 | |||
| 2554 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
| 2555 | ocfs2_list_xattr_bucket, | ||
| 2556 | &xl); | ||
| 2557 | if (ret) { | ||
| 2558 | mlog_errno(ret); | ||
| 2559 | goto out; | ||
| 2560 | } | ||
| 2561 | |||
| 2562 | if (e_cpos == 0) | ||
| 2563 | break; | ||
| 2564 | |||
| 2565 | name_hash = e_cpos - 1; | ||
| 2566 | } | ||
| 2567 | |||
| 2568 | ret = xl.result; | ||
| 2569 | out: | ||
| 2570 | return ret; | ||
| 2571 | } | ||
| 2572 | |||
| 2573 | static int cmp_xe(const void *a, const void *b) | ||
| 2574 | { | ||
| 2575 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
| 2576 | u32 l_hash = le32_to_cpu(l->xe_name_hash); | ||
| 2577 | u32 r_hash = le32_to_cpu(r->xe_name_hash); | ||
| 2578 | |||
| 2579 | if (l_hash > r_hash) | ||
| 2580 | return 1; | ||
| 2581 | if (l_hash < r_hash) | ||
| 2582 | return -1; | ||
| 2583 | return 0; | ||
| 2584 | } | ||
| 2585 | |||
| 2586 | static void swap_xe(void *a, void *b, int size) | ||
| 2587 | { | ||
| 2588 | struct ocfs2_xattr_entry *l = a, *r = b, tmp; | ||
| 2589 | |||
| 2590 | tmp = *l; | ||
| 2591 | memcpy(l, r, sizeof(struct ocfs2_xattr_entry)); | ||
| 2592 | memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry)); | ||
| 2593 | } | ||
| 2594 | |||
| 2595 | /* | ||
| 2596 | * When the ocfs2_xattr_block is filled up, new bucket will be created | ||
| 2597 | * and all the xattr entries will be moved to the new bucket. | ||
| 2598 | * Note: we need to sort the entries since they are not saved in order | ||
| 2599 | * in the ocfs2_xattr_block. | ||
| 2600 | */ | ||
| 2601 | static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode, | ||
| 2602 | struct buffer_head *xb_bh, | ||
| 2603 | struct buffer_head *xh_bh, | ||
| 2604 | struct buffer_head *data_bh) | ||
| 2605 | { | ||
| 2606 | int i, blocksize = inode->i_sb->s_blocksize; | ||
| 2607 | u16 offset, size, off_change; | ||
| 2608 | struct ocfs2_xattr_entry *xe; | ||
| 2609 | struct ocfs2_xattr_block *xb = | ||
| 2610 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
| 2611 | struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header; | ||
| 2612 | struct ocfs2_xattr_header *xh = | ||
| 2613 | (struct ocfs2_xattr_header *)xh_bh->b_data; | ||
| 2614 | u16 count = le16_to_cpu(xb_xh->xh_count); | ||
| 2615 | char *target = xh_bh->b_data, *src = xb_bh->b_data; | ||
| 2616 | |||
| 2617 | mlog(0, "cp xattr from block %llu to bucket %llu\n", | ||
| 2618 | (unsigned long long)xb_bh->b_blocknr, | ||
| 2619 | (unsigned long long)xh_bh->b_blocknr); | ||
| 2620 | |||
| 2621 | memset(xh_bh->b_data, 0, blocksize); | ||
| 2622 | if (data_bh) | ||
| 2623 | memset(data_bh->b_data, 0, blocksize); | ||
| 2624 | /* | ||
| 2625 | * Since the xe_name_offset is based on ocfs2_xattr_header, | ||
| 2626 | * there is a offset change corresponding to the change of | ||
| 2627 | * ocfs2_xattr_header's position. | ||
| 2628 | */ | ||
| 2629 | off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
| 2630 | xe = &xb_xh->xh_entries[count - 1]; | ||
| 2631 | offset = le16_to_cpu(xe->xe_name_offset) + off_change; | ||
| 2632 | size = blocksize - offset; | ||
| 2633 | |||
| 2634 | /* copy all the names and values. */ | ||
| 2635 | if (data_bh) | ||
| 2636 | target = data_bh->b_data; | ||
| 2637 | memcpy(target + offset, src + offset, size); | ||
| 2638 | |||
| 2639 | /* Init new header now. */ | ||
| 2640 | xh->xh_count = xb_xh->xh_count; | ||
| 2641 | xh->xh_num_buckets = cpu_to_le16(1); | ||
| 2642 | xh->xh_name_value_len = cpu_to_le16(size); | ||
| 2643 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size); | ||
| 2644 | |||
| 2645 | /* copy all the entries. */ | ||
| 2646 | target = xh_bh->b_data; | ||
| 2647 | offset = offsetof(struct ocfs2_xattr_header, xh_entries); | ||
| 2648 | size = count * sizeof(struct ocfs2_xattr_entry); | ||
| 2649 | memcpy(target + offset, (char *)xb_xh + offset, size); | ||
| 2650 | |||
| 2651 | /* Change the xe offset for all the xe because of the move. */ | ||
| 2652 | off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize + | ||
| 2653 | offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header); | ||
| 2654 | for (i = 0; i < count; i++) | ||
| 2655 | le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change); | ||
| 2656 | |||
| 2657 | mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n", | ||
| 2658 | offset, size, off_change); | ||
| 2659 | |||
| 2660 | sort(target + offset, count, sizeof(struct ocfs2_xattr_entry), | ||
| 2661 | cmp_xe, swap_xe); | ||
| 2662 | } | ||
| 2663 | |||
| 2664 | /* | ||
| 2665 | * After we move xattr from block to index btree, we have to | ||
| 2666 | * update ocfs2_xattr_search to the new xe and base. | ||
| 2667 | * | ||
| 2668 | * When the entry is in xattr block, xattr_bh indicates the storage place. | ||
| 2669 | * While if the entry is in index b-tree, "bucket" indicates the | ||
| 2670 | * real place of the xattr. | ||
| 2671 | */ | ||
| 2672 | static int ocfs2_xattr_update_xattr_search(struct inode *inode, | ||
| 2673 | struct ocfs2_xattr_search *xs, | ||
| 2674 | struct buffer_head *old_bh, | ||
| 2675 | struct buffer_head *new_bh) | ||
| 2676 | { | ||
| 2677 | int ret = 0; | ||
| 2678 | char *buf = old_bh->b_data; | ||
| 2679 | struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf; | ||
| 2680 | struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header; | ||
| 2681 | int i, blocksize = inode->i_sb->s_blocksize; | ||
| 2682 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 2683 | |||
| 2684 | xs->bucket.bhs[0] = new_bh; | ||
| 2685 | get_bh(new_bh); | ||
| 2686 | xs->bucket.xh = (struct ocfs2_xattr_header *)xs->bucket.bhs[0]->b_data; | ||
| 2687 | xs->header = xs->bucket.xh; | ||
| 2688 | |||
| 2689 | xs->base = new_bh->b_data; | ||
| 2690 | xs->end = xs->base + inode->i_sb->s_blocksize; | ||
| 2691 | |||
| 2692 | if (!xs->not_found) { | ||
| 2693 | if (OCFS2_XATTR_BUCKET_SIZE != blocksize) { | ||
| 2694 | ret = ocfs2_read_blocks(inode, | ||
| 2695 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
| 2696 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
| 2697 | 0); | ||
| 2698 | if (ret) { | ||
| 2699 | mlog_errno(ret); | ||
| 2700 | return ret; | ||
| 2701 | } | ||
| 2702 | |||
| 2703 | i = xs->here - old_xh->xh_entries; | ||
| 2704 | xs->here = &xs->header->xh_entries[i]; | ||
| 2705 | } | ||
| 2706 | } | ||
| 2707 | |||
| 2708 | return ret; | ||
| 2709 | } | ||
| 2710 | |||
| 2711 | static int ocfs2_xattr_create_index_block(struct inode *inode, | ||
| 2712 | struct ocfs2_xattr_search *xs) | ||
| 2713 | { | ||
| 2714 | int ret, credits = OCFS2_SUBALLOC_ALLOC; | ||
| 2715 | u32 bit_off, len; | ||
| 2716 | u64 blkno; | ||
| 2717 | handle_t *handle; | ||
| 2718 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 2719 | struct ocfs2_inode_info *oi = OCFS2_I(inode); | ||
| 2720 | struct ocfs2_alloc_context *data_ac; | ||
| 2721 | struct buffer_head *xh_bh = NULL, *data_bh = NULL; | ||
| 2722 | struct buffer_head *xb_bh = xs->xattr_bh; | ||
| 2723 | struct ocfs2_xattr_block *xb = | ||
| 2724 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
| 2725 | struct ocfs2_xattr_tree_root *xr; | ||
| 2726 | u16 xb_flags = le16_to_cpu(xb->xb_flags); | ||
| 2727 | u16 bpb = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 2728 | |||
| 2729 | mlog(0, "create xattr index block for %llu\n", | ||
| 2730 | (unsigned long long)xb_bh->b_blocknr); | ||
| 2731 | |||
| 2732 | BUG_ON(xb_flags & OCFS2_XATTR_INDEXED); | ||
| 2733 | |||
| 2734 | ret = ocfs2_reserve_clusters(osb, 1, &data_ac); | ||
| 2735 | if (ret) { | ||
| 2736 | mlog_errno(ret); | ||
| 2737 | goto out; | ||
| 2738 | } | ||
| 2739 | |||
| 2740 | /* | ||
| 2741 | * XXX: | ||
| 2742 | * We can use this lock for now, and maybe move to a dedicated mutex | ||
| 2743 | * if performance becomes a problem later. | ||
| 2744 | */ | ||
| 2745 | down_write(&oi->ip_alloc_sem); | ||
| 2746 | |||
| 2747 | /* | ||
| 2748 | * 3 more credits, one for xattr block update, one for the 1st block | ||
| 2749 | * of the new xattr bucket and one for the value/data. | ||
| 2750 | */ | ||
| 2751 | credits += 3; | ||
| 2752 | handle = ocfs2_start_trans(osb, credits); | ||
| 2753 | if (IS_ERR(handle)) { | ||
| 2754 | ret = PTR_ERR(handle); | ||
| 2755 | mlog_errno(ret); | ||
| 2756 | goto out_sem; | ||
| 2757 | } | ||
| 2758 | |||
| 2759 | ret = ocfs2_journal_access(handle, inode, xb_bh, | ||
| 2760 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2761 | if (ret) { | ||
| 2762 | mlog_errno(ret); | ||
| 2763 | goto out_commit; | ||
| 2764 | } | ||
| 2765 | |||
| 2766 | ret = ocfs2_claim_clusters(osb, handle, data_ac, 1, &bit_off, &len); | ||
| 2767 | if (ret) { | ||
| 2768 | mlog_errno(ret); | ||
| 2769 | goto out_commit; | ||
| 2770 | } | ||
| 2771 | |||
| 2772 | /* | ||
| 2773 | * The bucket may spread in many blocks, and | ||
| 2774 | * we will only touch the 1st block and the last block | ||
| 2775 | * in the whole bucket(one for entry and one for data). | ||
| 2776 | */ | ||
| 2777 | blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off); | ||
| 2778 | |||
| 2779 | mlog(0, "allocate 1 cluster from %llu to xattr block\n", blkno); | ||
| 2780 | |||
| 2781 | xh_bh = sb_getblk(inode->i_sb, blkno); | ||
| 2782 | if (!xh_bh) { | ||
| 2783 | ret = -EIO; | ||
| 2784 | mlog_errno(ret); | ||
| 2785 | goto out_commit; | ||
| 2786 | } | ||
| 2787 | |||
| 2788 | ocfs2_set_new_buffer_uptodate(inode, xh_bh); | ||
| 2789 | |||
| 2790 | ret = ocfs2_journal_access(handle, inode, xh_bh, | ||
| 2791 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 2792 | if (ret) { | ||
| 2793 | mlog_errno(ret); | ||
| 2794 | goto out_commit; | ||
| 2795 | } | ||
| 2796 | |||
| 2797 | if (bpb > 1) { | ||
| 2798 | data_bh = sb_getblk(inode->i_sb, blkno + bpb - 1); | ||
| 2799 | if (!data_bh) { | ||
| 2800 | ret = -EIO; | ||
| 2801 | mlog_errno(ret); | ||
| 2802 | goto out_commit; | ||
| 2803 | } | ||
| 2804 | |||
| 2805 | ocfs2_set_new_buffer_uptodate(inode, data_bh); | ||
| 2806 | |||
| 2807 | ret = ocfs2_journal_access(handle, inode, data_bh, | ||
| 2808 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 2809 | if (ret) { | ||
| 2810 | mlog_errno(ret); | ||
| 2811 | goto out_commit; | ||
| 2812 | } | ||
| 2813 | } | ||
| 2814 | |||
| 2815 | ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xh_bh, data_bh); | ||
| 2816 | |||
| 2817 | ocfs2_journal_dirty(handle, xh_bh); | ||
| 2818 | if (data_bh) | ||
| 2819 | ocfs2_journal_dirty(handle, data_bh); | ||
| 2820 | |||
| 2821 | ocfs2_xattr_update_xattr_search(inode, xs, xb_bh, xh_bh); | ||
| 2822 | |||
| 2823 | /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */ | ||
| 2824 | memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize - | ||
| 2825 | offsetof(struct ocfs2_xattr_block, xb_attrs)); | ||
| 2826 | |||
| 2827 | xr = &xb->xb_attrs.xb_root; | ||
| 2828 | xr->xt_clusters = cpu_to_le32(1); | ||
| 2829 | xr->xt_last_eb_blk = 0; | ||
| 2830 | xr->xt_list.l_tree_depth = 0; | ||
| 2831 | xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb)); | ||
| 2832 | xr->xt_list.l_next_free_rec = cpu_to_le16(1); | ||
| 2833 | |||
| 2834 | xr->xt_list.l_recs[0].e_cpos = 0; | ||
| 2835 | xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno); | ||
| 2836 | xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1); | ||
| 2837 | |||
| 2838 | xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED); | ||
| 2839 | |||
| 2840 | ret = ocfs2_journal_dirty(handle, xb_bh); | ||
| 2841 | if (ret) { | ||
| 2842 | mlog_errno(ret); | ||
| 2843 | goto out_commit; | ||
| 2844 | } | ||
| 2845 | |||
| 2846 | out_commit: | ||
| 2847 | ocfs2_commit_trans(osb, handle); | ||
| 2848 | |||
| 2849 | out_sem: | ||
| 2850 | up_write(&oi->ip_alloc_sem); | ||
| 2851 | |||
| 2852 | out: | ||
| 2853 | if (data_ac) | ||
| 2854 | ocfs2_free_alloc_context(data_ac); | ||
| 2855 | |||
| 2856 | brelse(xh_bh); | ||
| 2857 | brelse(data_bh); | ||
| 2858 | |||
| 2859 | return ret; | ||
| 2860 | } | ||
| 2861 | |||
| 2862 | static int cmp_xe_offset(const void *a, const void *b) | ||
| 2863 | { | ||
| 2864 | const struct ocfs2_xattr_entry *l = a, *r = b; | ||
| 2865 | u32 l_name_offset = le16_to_cpu(l->xe_name_offset); | ||
| 2866 | u32 r_name_offset = le16_to_cpu(r->xe_name_offset); | ||
| 2867 | |||
| 2868 | if (l_name_offset < r_name_offset) | ||
| 2869 | return 1; | ||
| 2870 | if (l_name_offset > r_name_offset) | ||
| 2871 | return -1; | ||
| 2872 | return 0; | ||
| 2873 | } | ||
| 2874 | |||
| 2875 | /* | ||
| 2876 | * defrag a xattr bucket if we find that the bucket has some | ||
| 2877 | * holes beteen name/value pairs. | ||
| 2878 | * We will move all the name/value pairs to the end of the bucket | ||
| 2879 | * so that we can spare some space for insertion. | ||
| 2880 | */ | ||
| 2881 | static int ocfs2_defrag_xattr_bucket(struct inode *inode, | ||
| 2882 | struct ocfs2_xattr_bucket *bucket) | ||
| 2883 | { | ||
| 2884 | int ret, i; | ||
| 2885 | size_t end, offset, len, value_len; | ||
| 2886 | struct ocfs2_xattr_header *xh; | ||
| 2887 | char *entries, *buf, *bucket_buf = NULL; | ||
| 2888 | u64 blkno = bucket->bhs[0]->b_blocknr; | ||
| 2889 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 2890 | u16 xh_free_start; | ||
| 2891 | size_t blocksize = inode->i_sb->s_blocksize; | ||
| 2892 | handle_t *handle; | ||
| 2893 | struct buffer_head **bhs; | ||
| 2894 | struct ocfs2_xattr_entry *xe; | ||
| 2895 | |||
| 2896 | bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
| 2897 | GFP_NOFS); | ||
| 2898 | if (!bhs) | ||
| 2899 | return -ENOMEM; | ||
| 2900 | |||
| 2901 | ret = ocfs2_read_blocks(inode, blkno, blk_per_bucket, bhs, 0); | ||
| 2902 | if (ret) | ||
| 2903 | goto out; | ||
| 2904 | |||
| 2905 | /* | ||
| 2906 | * In order to make the operation more efficient and generic, | ||
| 2907 | * we copy all the blocks into a contiguous memory and do the | ||
| 2908 | * defragment there, so if anything is error, we will not touch | ||
| 2909 | * the real block. | ||
| 2910 | */ | ||
| 2911 | bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS); | ||
| 2912 | if (!bucket_buf) { | ||
| 2913 | ret = -EIO; | ||
| 2914 | goto out; | ||
| 2915 | } | ||
| 2916 | |||
| 2917 | buf = bucket_buf; | ||
| 2918 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) | ||
| 2919 | memcpy(buf, bhs[i]->b_data, blocksize); | ||
| 2920 | |||
| 2921 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), blk_per_bucket); | ||
| 2922 | if (IS_ERR(handle)) { | ||
| 2923 | ret = PTR_ERR(handle); | ||
| 2924 | handle = NULL; | ||
| 2925 | mlog_errno(ret); | ||
| 2926 | goto out; | ||
| 2927 | } | ||
| 2928 | |||
| 2929 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 2930 | ret = ocfs2_journal_access(handle, inode, bhs[i], | ||
| 2931 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 2932 | if (ret < 0) { | ||
| 2933 | mlog_errno(ret); | ||
| 2934 | goto commit; | ||
| 2935 | } | ||
| 2936 | } | ||
| 2937 | |||
| 2938 | xh = (struct ocfs2_xattr_header *)bucket_buf; | ||
| 2939 | entries = (char *)xh->xh_entries; | ||
| 2940 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
| 2941 | |||
| 2942 | mlog(0, "adjust xattr bucket in %llu, count = %u, " | ||
| 2943 | "xh_free_start = %u, xh_name_value_len = %u.\n", | ||
| 2944 | blkno, le16_to_cpu(xh->xh_count), xh_free_start, | ||
| 2945 | le16_to_cpu(xh->xh_name_value_len)); | ||
| 2946 | |||
| 2947 | /* | ||
| 2948 | * sort all the entries by their offset. | ||
| 2949 | * the largest will be the first, so that we can | ||
| 2950 | * move them to the end one by one. | ||
| 2951 | */ | ||
| 2952 | sort(entries, le16_to_cpu(xh->xh_count), | ||
| 2953 | sizeof(struct ocfs2_xattr_entry), | ||
| 2954 | cmp_xe_offset, swap_xe); | ||
| 2955 | |||
| 2956 | /* Move all name/values to the end of the bucket. */ | ||
| 2957 | xe = xh->xh_entries; | ||
| 2958 | end = OCFS2_XATTR_BUCKET_SIZE; | ||
| 2959 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) { | ||
| 2960 | offset = le16_to_cpu(xe->xe_name_offset); | ||
| 2961 | if (ocfs2_xattr_is_local(xe)) | ||
| 2962 | value_len = OCFS2_XATTR_SIZE( | ||
| 2963 | le64_to_cpu(xe->xe_value_size)); | ||
| 2964 | else | ||
| 2965 | value_len = OCFS2_XATTR_ROOT_SIZE; | ||
| 2966 | len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len; | ||
| 2967 | |||
| 2968 | /* | ||
| 2969 | * We must make sure that the name/value pair | ||
| 2970 | * exist in the same block. So adjust end to | ||
| 2971 | * the previous block end if needed. | ||
| 2972 | */ | ||
| 2973 | if (((end - len) / blocksize != | ||
| 2974 | (end - 1) / blocksize)) | ||
| 2975 | end = end - end % blocksize; | ||
| 2976 | |||
| 2977 | if (end > offset + len) { | ||
| 2978 | memmove(bucket_buf + end - len, | ||
| 2979 | bucket_buf + offset, len); | ||
| 2980 | xe->xe_name_offset = cpu_to_le16(end - len); | ||
| 2981 | } | ||
| 2982 | |||
| 2983 | mlog_bug_on_msg(end < offset + len, "Defrag check failed for " | ||
| 2984 | "bucket %llu\n", (unsigned long long)blkno); | ||
| 2985 | |||
| 2986 | end -= len; | ||
| 2987 | } | ||
| 2988 | |||
| 2989 | mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for " | ||
| 2990 | "bucket %llu\n", (unsigned long long)blkno); | ||
| 2991 | |||
| 2992 | if (xh_free_start == end) | ||
| 2993 | goto commit; | ||
| 2994 | |||
| 2995 | memset(bucket_buf + xh_free_start, 0, end - xh_free_start); | ||
| 2996 | xh->xh_free_start = cpu_to_le16(end); | ||
| 2997 | |||
| 2998 | /* sort the entries by their name_hash. */ | ||
| 2999 | sort(entries, le16_to_cpu(xh->xh_count), | ||
| 3000 | sizeof(struct ocfs2_xattr_entry), | ||
| 3001 | cmp_xe, swap_xe); | ||
| 3002 | |||
| 3003 | buf = bucket_buf; | ||
| 3004 | for (i = 0; i < blk_per_bucket; i++, buf += blocksize) { | ||
| 3005 | memcpy(bhs[i]->b_data, buf, blocksize); | ||
| 3006 | ocfs2_journal_dirty(handle, bhs[i]); | ||
| 3007 | } | ||
| 3008 | |||
| 3009 | commit: | ||
| 3010 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 3011 | out: | ||
| 3012 | |||
| 3013 | if (bhs) { | ||
| 3014 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3015 | brelse(bhs[i]); | ||
| 3016 | } | ||
| 3017 | kfree(bhs); | ||
| 3018 | |||
| 3019 | kfree(bucket_buf); | ||
| 3020 | return ret; | ||
| 3021 | } | ||
| 3022 | |||
| 3023 | /* | ||
| 3024 | * Move half nums of the xattr bucket in the previous cluster to this new | ||
| 3025 | * cluster. We only touch the last cluster of the previous extend record. | ||
| 3026 | * | ||
| 3027 | * first_bh is the first buffer_head of a series of bucket in the same | ||
| 3028 | * extent rec and header_bh is the header of one bucket in this cluster. | ||
| 3029 | * They will be updated if we move the data header_bh contains to the new | ||
| 3030 | * cluster. first_hash will be set as the 1st xe's name_hash of the new cluster. | ||
| 3031 | */ | ||
| 3032 | static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode, | ||
| 3033 | handle_t *handle, | ||
| 3034 | struct buffer_head **first_bh, | ||
| 3035 | struct buffer_head **header_bh, | ||
| 3036 | u64 new_blkno, | ||
| 3037 | u64 prev_blkno, | ||
| 3038 | u32 num_clusters, | ||
| 3039 | u32 *first_hash) | ||
| 3040 | { | ||
| 3041 | int i, ret, credits; | ||
| 3042 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 3043 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 3044 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
| 3045 | int blocksize = inode->i_sb->s_blocksize; | ||
| 3046 | struct buffer_head *old_bh, *new_bh, *prev_bh, *new_first_bh = NULL; | ||
| 3047 | struct ocfs2_xattr_header *new_xh; | ||
| 3048 | struct ocfs2_xattr_header *xh = | ||
| 3049 | (struct ocfs2_xattr_header *)((*first_bh)->b_data); | ||
| 3050 | |||
| 3051 | BUG_ON(le16_to_cpu(xh->xh_num_buckets) < num_buckets); | ||
| 3052 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE == osb->s_clustersize); | ||
| 3053 | |||
| 3054 | prev_bh = *first_bh; | ||
| 3055 | get_bh(prev_bh); | ||
| 3056 | xh = (struct ocfs2_xattr_header *)prev_bh->b_data; | ||
| 3057 | |||
| 3058 | prev_blkno += (num_clusters - 1) * bpc + bpc / 2; | ||
| 3059 | |||
| 3060 | mlog(0, "move half of xattrs in cluster %llu to %llu\n", | ||
| 3061 | prev_blkno, new_blkno); | ||
| 3062 | |||
| 3063 | /* | ||
| 3064 | * We need to update the 1st half of the new cluster and | ||
| 3065 | * 1 more for the update of the 1st bucket of the previous | ||
| 3066 | * extent record. | ||
| 3067 | */ | ||
| 3068 | credits = bpc / 2 + 1; | ||
| 3069 | ret = ocfs2_extend_trans(handle, credits); | ||
| 3070 | if (ret) { | ||
| 3071 | mlog_errno(ret); | ||
| 3072 | goto out; | ||
| 3073 | } | ||
| 3074 | |||
| 3075 | ret = ocfs2_journal_access(handle, inode, prev_bh, | ||
| 3076 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3077 | if (ret) { | ||
| 3078 | mlog_errno(ret); | ||
| 3079 | goto out; | ||
| 3080 | } | ||
| 3081 | |||
| 3082 | for (i = 0; i < bpc / 2; i++, prev_blkno++, new_blkno++) { | ||
| 3083 | old_bh = new_bh = NULL; | ||
| 3084 | new_bh = sb_getblk(inode->i_sb, new_blkno); | ||
| 3085 | if (!new_bh) { | ||
| 3086 | ret = -EIO; | ||
| 3087 | mlog_errno(ret); | ||
| 3088 | goto out; | ||
| 3089 | } | ||
| 3090 | |||
| 3091 | ocfs2_set_new_buffer_uptodate(inode, new_bh); | ||
| 3092 | |||
| 3093 | ret = ocfs2_journal_access(handle, inode, new_bh, | ||
| 3094 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 3095 | if (ret < 0) { | ||
| 3096 | mlog_errno(ret); | ||
| 3097 | brelse(new_bh); | ||
| 3098 | goto out; | ||
| 3099 | } | ||
| 3100 | |||
| 3101 | ret = ocfs2_read_block(inode, prev_blkno, &old_bh); | ||
| 3102 | if (ret < 0) { | ||
| 3103 | mlog_errno(ret); | ||
| 3104 | brelse(new_bh); | ||
| 3105 | goto out; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | memcpy(new_bh->b_data, old_bh->b_data, blocksize); | ||
| 3109 | |||
| 3110 | if (i == 0) { | ||
| 3111 | new_xh = (struct ocfs2_xattr_header *)new_bh->b_data; | ||
| 3112 | new_xh->xh_num_buckets = cpu_to_le16(num_buckets / 2); | ||
| 3113 | |||
| 3114 | if (first_hash) | ||
| 3115 | *first_hash = le32_to_cpu( | ||
| 3116 | new_xh->xh_entries[0].xe_name_hash); | ||
| 3117 | new_first_bh = new_bh; | ||
| 3118 | get_bh(new_first_bh); | ||
| 3119 | } | ||
| 3120 | |||
| 3121 | ocfs2_journal_dirty(handle, new_bh); | ||
| 3122 | |||
| 3123 | if (*header_bh == old_bh) { | ||
| 3124 | brelse(*header_bh); | ||
| 3125 | *header_bh = new_bh; | ||
| 3126 | get_bh(*header_bh); | ||
| 3127 | |||
| 3128 | brelse(*first_bh); | ||
| 3129 | *first_bh = new_first_bh; | ||
| 3130 | get_bh(*first_bh); | ||
| 3131 | } | ||
| 3132 | brelse(new_bh); | ||
| 3133 | brelse(old_bh); | ||
| 3134 | } | ||
| 3135 | |||
| 3136 | le16_add_cpu(&xh->xh_num_buckets, -(num_buckets / 2)); | ||
| 3137 | |||
| 3138 | ocfs2_journal_dirty(handle, prev_bh); | ||
| 3139 | out: | ||
| 3140 | brelse(prev_bh); | ||
| 3141 | brelse(new_first_bh); | ||
| 3142 | return ret; | ||
| 3143 | } | ||
| 3144 | |||
| 3145 | static int ocfs2_read_xattr_bucket(struct inode *inode, | ||
| 3146 | u64 blkno, | ||
| 3147 | struct buffer_head **bhs, | ||
| 3148 | int new) | ||
| 3149 | { | ||
| 3150 | int ret = 0; | ||
| 3151 | u16 i, blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3152 | |||
| 3153 | if (!new) | ||
| 3154 | return ocfs2_read_blocks(inode, blkno, | ||
| 3155 | blk_per_bucket, bhs, 0); | ||
| 3156 | |||
| 3157 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 3158 | bhs[i] = sb_getblk(inode->i_sb, blkno + i); | ||
| 3159 | if (bhs[i] == NULL) { | ||
| 3160 | ret = -EIO; | ||
| 3161 | mlog_errno(ret); | ||
| 3162 | break; | ||
| 3163 | } | ||
| 3164 | ocfs2_set_new_buffer_uptodate(inode, bhs[i]); | ||
| 3165 | } | ||
| 3166 | |||
| 3167 | return ret; | ||
| 3168 | } | ||
| 3169 | |||
| 3170 | /* | ||
| 3171 | * Move half num of the xattrs in old bucket(blk) to new bucket(new_blk). | ||
| 3172 | * first_hash will record the 1st hash of the new bucket. | ||
| 3173 | */ | ||
| 3174 | static int ocfs2_half_xattr_bucket(struct inode *inode, | ||
| 3175 | handle_t *handle, | ||
| 3176 | u64 blk, | ||
| 3177 | u64 new_blk, | ||
| 3178 | u32 *first_hash, | ||
| 3179 | int new_bucket_head) | ||
| 3180 | { | ||
| 3181 | int ret, i; | ||
| 3182 | u16 count, start, len, name_value_len, xe_len, name_offset; | ||
| 3183 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3184 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
| 3185 | struct ocfs2_xattr_header *xh; | ||
| 3186 | struct ocfs2_xattr_entry *xe; | ||
| 3187 | int blocksize = inode->i_sb->s_blocksize; | ||
| 3188 | |||
| 3189 | mlog(0, "move half of xattrs from bucket %llu to %llu\n", | ||
| 3190 | blk, new_blk); | ||
| 3191 | |||
| 3192 | s_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
| 3193 | if (!s_bhs) | ||
| 3194 | return -ENOMEM; | ||
| 3195 | |||
| 3196 | ret = ocfs2_read_xattr_bucket(inode, blk, s_bhs, 0); | ||
| 3197 | if (ret) { | ||
| 3198 | mlog_errno(ret); | ||
| 3199 | goto out; | ||
| 3200 | } | ||
| 3201 | |||
| 3202 | ret = ocfs2_journal_access(handle, inode, s_bhs[0], | ||
| 3203 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3204 | if (ret) { | ||
| 3205 | mlog_errno(ret); | ||
| 3206 | goto out; | ||
| 3207 | } | ||
| 3208 | |||
| 3209 | t_bhs = kcalloc(blk_per_bucket, sizeof(struct buffer_head *), GFP_NOFS); | ||
| 3210 | if (!t_bhs) { | ||
| 3211 | ret = -ENOMEM; | ||
| 3212 | goto out; | ||
| 3213 | } | ||
| 3214 | |||
| 3215 | ret = ocfs2_read_xattr_bucket(inode, new_blk, t_bhs, new_bucket_head); | ||
| 3216 | if (ret) { | ||
| 3217 | mlog_errno(ret); | ||
| 3218 | goto out; | ||
| 3219 | } | ||
| 3220 | |||
| 3221 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 3222 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
| 3223 | OCFS2_JOURNAL_ACCESS_CREATE); | ||
| 3224 | if (ret) { | ||
| 3225 | mlog_errno(ret); | ||
| 3226 | goto out; | ||
| 3227 | } | ||
| 3228 | } | ||
| 3229 | |||
| 3230 | /* copy the whole bucket to the new first. */ | ||
| 3231 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3232 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
| 3233 | |||
| 3234 | /* update the new bucket. */ | ||
| 3235 | xh = (struct ocfs2_xattr_header *)t_bhs[0]->b_data; | ||
| 3236 | count = le16_to_cpu(xh->xh_count); | ||
| 3237 | start = count / 2; | ||
| 3238 | |||
| 3239 | /* | ||
| 3240 | * Calculate the total name/value len and xh_free_start for | ||
| 3241 | * the old bucket first. | ||
| 3242 | */ | ||
| 3243 | name_offset = OCFS2_XATTR_BUCKET_SIZE; | ||
| 3244 | name_value_len = 0; | ||
| 3245 | for (i = 0; i < start; i++) { | ||
| 3246 | xe = &xh->xh_entries[i]; | ||
| 3247 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
| 3248 | if (ocfs2_xattr_is_local(xe)) | ||
| 3249 | xe_len += | ||
| 3250 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
| 3251 | else | ||
| 3252 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
| 3253 | name_value_len += xe_len; | ||
| 3254 | if (le16_to_cpu(xe->xe_name_offset) < name_offset) | ||
| 3255 | name_offset = le16_to_cpu(xe->xe_name_offset); | ||
| 3256 | } | ||
| 3257 | |||
| 3258 | /* | ||
| 3259 | * Now begin the modification to the new bucket. | ||
| 3260 | * | ||
| 3261 | * In the new bucket, We just move the xattr entry to the beginning | ||
| 3262 | * and don't touch the name/value. So there will be some holes in the | ||
| 3263 | * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is | ||
| 3264 | * called. | ||
| 3265 | */ | ||
| 3266 | xe = &xh->xh_entries[start]; | ||
| 3267 | len = sizeof(struct ocfs2_xattr_entry) * (count - start); | ||
| 3268 | mlog(0, "mv xattr entry len %d from %d to %d\n", len, | ||
| 3269 | (int)((char *)xe - (char *)xh), | ||
| 3270 | (int)((char *)xh->xh_entries - (char *)xh)); | ||
| 3271 | memmove((char *)xh->xh_entries, (char *)xe, len); | ||
| 3272 | xe = &xh->xh_entries[count - start]; | ||
| 3273 | len = sizeof(struct ocfs2_xattr_entry) * start; | ||
| 3274 | memset((char *)xe, 0, len); | ||
| 3275 | |||
| 3276 | le16_add_cpu(&xh->xh_count, -start); | ||
| 3277 | le16_add_cpu(&xh->xh_name_value_len, -name_value_len); | ||
| 3278 | |||
| 3279 | /* Calculate xh_free_start for the new bucket. */ | ||
| 3280 | xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
| 3281 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
| 3282 | xe = &xh->xh_entries[i]; | ||
| 3283 | xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
| 3284 | if (ocfs2_xattr_is_local(xe)) | ||
| 3285 | xe_len += | ||
| 3286 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
| 3287 | else | ||
| 3288 | xe_len += OCFS2_XATTR_ROOT_SIZE; | ||
| 3289 | if (le16_to_cpu(xe->xe_name_offset) < | ||
| 3290 | le16_to_cpu(xh->xh_free_start)) | ||
| 3291 | xh->xh_free_start = xe->xe_name_offset; | ||
| 3292 | } | ||
| 3293 | |||
| 3294 | /* set xh->xh_num_buckets for the new xh. */ | ||
| 3295 | if (new_bucket_head) | ||
| 3296 | xh->xh_num_buckets = cpu_to_le16(1); | ||
| 3297 | else | ||
| 3298 | xh->xh_num_buckets = 0; | ||
| 3299 | |||
| 3300 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 3301 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
| 3302 | if (ret) | ||
| 3303 | mlog_errno(ret); | ||
| 3304 | } | ||
| 3305 | |||
| 3306 | /* store the first_hash of the new bucket. */ | ||
| 3307 | if (first_hash) | ||
| 3308 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
| 3309 | |||
| 3310 | /* | ||
| 3311 | * Now only update the 1st block of the old bucket. | ||
| 3312 | * Please note that the entry has been sorted already above. | ||
| 3313 | */ | ||
| 3314 | xh = (struct ocfs2_xattr_header *)s_bhs[0]->b_data; | ||
| 3315 | memset(&xh->xh_entries[start], 0, | ||
| 3316 | sizeof(struct ocfs2_xattr_entry) * (count - start)); | ||
| 3317 | xh->xh_count = cpu_to_le16(start); | ||
| 3318 | xh->xh_free_start = cpu_to_le16(name_offset); | ||
| 3319 | xh->xh_name_value_len = cpu_to_le16(name_value_len); | ||
| 3320 | |||
| 3321 | ocfs2_journal_dirty(handle, s_bhs[0]); | ||
| 3322 | if (ret) | ||
| 3323 | mlog_errno(ret); | ||
| 3324 | |||
| 3325 | out: | ||
| 3326 | if (s_bhs) { | ||
| 3327 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3328 | brelse(s_bhs[i]); | ||
| 3329 | } | ||
| 3330 | kfree(s_bhs); | ||
| 3331 | |||
| 3332 | if (t_bhs) { | ||
| 3333 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3334 | brelse(t_bhs[i]); | ||
| 3335 | } | ||
| 3336 | kfree(t_bhs); | ||
| 3337 | |||
| 3338 | return ret; | ||
| 3339 | } | ||
| 3340 | |||
| 3341 | /* | ||
| 3342 | * Copy xattr from one bucket to another bucket. | ||
| 3343 | * | ||
| 3344 | * The caller must make sure that the journal transaction | ||
| 3345 | * has enough space for journaling. | ||
| 3346 | */ | ||
| 3347 | static int ocfs2_cp_xattr_bucket(struct inode *inode, | ||
| 3348 | handle_t *handle, | ||
| 3349 | u64 s_blkno, | ||
| 3350 | u64 t_blkno, | ||
| 3351 | int t_is_new) | ||
| 3352 | { | ||
| 3353 | int ret, i; | ||
| 3354 | int blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3355 | int blocksize = inode->i_sb->s_blocksize; | ||
| 3356 | struct buffer_head **s_bhs, **t_bhs = NULL; | ||
| 3357 | |||
| 3358 | BUG_ON(s_blkno == t_blkno); | ||
| 3359 | |||
| 3360 | mlog(0, "cp bucket %llu to %llu, target is %d\n", | ||
| 3361 | s_blkno, t_blkno, t_is_new); | ||
| 3362 | |||
| 3363 | s_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
| 3364 | GFP_NOFS); | ||
| 3365 | if (!s_bhs) | ||
| 3366 | return -ENOMEM; | ||
| 3367 | |||
| 3368 | ret = ocfs2_read_xattr_bucket(inode, s_blkno, s_bhs, 0); | ||
| 3369 | if (ret) | ||
| 3370 | goto out; | ||
| 3371 | |||
| 3372 | t_bhs = kzalloc(sizeof(struct buffer_head *) * blk_per_bucket, | ||
| 3373 | GFP_NOFS); | ||
| 3374 | if (!t_bhs) { | ||
| 3375 | ret = -ENOMEM; | ||
| 3376 | goto out; | ||
| 3377 | } | ||
| 3378 | |||
| 3379 | ret = ocfs2_read_xattr_bucket(inode, t_blkno, t_bhs, t_is_new); | ||
| 3380 | if (ret) | ||
| 3381 | goto out; | ||
| 3382 | |||
| 3383 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 3384 | ret = ocfs2_journal_access(handle, inode, t_bhs[i], | ||
| 3385 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3386 | if (ret) | ||
| 3387 | goto out; | ||
| 3388 | } | ||
| 3389 | |||
| 3390 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 3391 | memcpy(t_bhs[i]->b_data, s_bhs[i]->b_data, blocksize); | ||
| 3392 | ocfs2_journal_dirty(handle, t_bhs[i]); | ||
| 3393 | } | ||
| 3394 | |||
| 3395 | out: | ||
| 3396 | if (s_bhs) { | ||
| 3397 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3398 | brelse(s_bhs[i]); | ||
| 3399 | } | ||
| 3400 | kfree(s_bhs); | ||
| 3401 | |||
| 3402 | if (t_bhs) { | ||
| 3403 | for (i = 0; i < blk_per_bucket; i++) | ||
| 3404 | brelse(t_bhs[i]); | ||
| 3405 | } | ||
| 3406 | kfree(t_bhs); | ||
| 3407 | |||
| 3408 | return ret; | ||
| 3409 | } | ||
| 3410 | |||
| 3411 | /* | ||
| 3412 | * Copy one xattr cluster from src_blk to to_blk. | ||
| 3413 | * The to_blk will become the first bucket header of the cluster, so its | ||
| 3414 | * xh_num_buckets will be initialized as the bucket num in the cluster. | ||
| 3415 | */ | ||
| 3416 | static int ocfs2_cp_xattr_cluster(struct inode *inode, | ||
| 3417 | handle_t *handle, | ||
| 3418 | struct buffer_head *first_bh, | ||
| 3419 | u64 src_blk, | ||
| 3420 | u64 to_blk, | ||
| 3421 | u32 *first_hash) | ||
| 3422 | { | ||
| 3423 | int i, ret, credits; | ||
| 3424 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 3425 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 3426 | int num_buckets = ocfs2_xattr_buckets_per_cluster(osb); | ||
| 3427 | struct buffer_head *bh = NULL; | ||
| 3428 | struct ocfs2_xattr_header *xh; | ||
| 3429 | u64 to_blk_start = to_blk; | ||
| 3430 | |||
| 3431 | mlog(0, "cp xattrs from cluster %llu to %llu\n", src_blk, to_blk); | ||
| 3432 | |||
| 3433 | /* | ||
| 3434 | * We need to update the new cluster and 1 more for the update of | ||
| 3435 | * the 1st bucket of the previous extent rec. | ||
| 3436 | */ | ||
| 3437 | credits = bpc + 1; | ||
| 3438 | ret = ocfs2_extend_trans(handle, credits); | ||
| 3439 | if (ret) { | ||
| 3440 | mlog_errno(ret); | ||
| 3441 | goto out; | ||
| 3442 | } | ||
| 3443 | |||
| 3444 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
| 3445 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3446 | if (ret) { | ||
| 3447 | mlog_errno(ret); | ||
| 3448 | goto out; | ||
| 3449 | } | ||
| 3450 | |||
| 3451 | for (i = 0; i < num_buckets; i++) { | ||
| 3452 | ret = ocfs2_cp_xattr_bucket(inode, handle, | ||
| 3453 | src_blk, to_blk, 1); | ||
| 3454 | if (ret) { | ||
| 3455 | mlog_errno(ret); | ||
| 3456 | goto out; | ||
| 3457 | } | ||
| 3458 | |||
| 3459 | src_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3460 | to_blk += ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3461 | } | ||
| 3462 | |||
| 3463 | /* update the old bucket header. */ | ||
| 3464 | xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
| 3465 | le16_add_cpu(&xh->xh_num_buckets, -num_buckets); | ||
| 3466 | |||
| 3467 | ocfs2_journal_dirty(handle, first_bh); | ||
| 3468 | |||
| 3469 | /* update the new bucket header. */ | ||
| 3470 | ret = ocfs2_read_block(inode, to_blk_start, &bh); | ||
| 3471 | if (ret < 0) { | ||
| 3472 | mlog_errno(ret); | ||
| 3473 | goto out; | ||
| 3474 | } | ||
| 3475 | |||
| 3476 | ret = ocfs2_journal_access(handle, inode, bh, | ||
| 3477 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3478 | if (ret) { | ||
| 3479 | mlog_errno(ret); | ||
| 3480 | goto out; | ||
| 3481 | } | ||
| 3482 | |||
| 3483 | xh = (struct ocfs2_xattr_header *)bh->b_data; | ||
| 3484 | xh->xh_num_buckets = cpu_to_le16(num_buckets); | ||
| 3485 | |||
| 3486 | ocfs2_journal_dirty(handle, bh); | ||
| 3487 | |||
| 3488 | if (first_hash) | ||
| 3489 | *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
| 3490 | out: | ||
| 3491 | brelse(bh); | ||
| 3492 | return ret; | ||
| 3493 | } | ||
| 3494 | |||
| 3495 | /* | ||
| 3496 | * Move half of the xattrs in this cluster to the new cluster. | ||
| 3497 | * This function should only be called when bucket size == cluster size. | ||
| 3498 | * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead. | ||
| 3499 | */ | ||
| 3500 | static int ocfs2_half_xattr_cluster(struct inode *inode, | ||
| 3501 | handle_t *handle, | ||
| 3502 | u64 prev_blk, | ||
| 3503 | u64 new_blk, | ||
| 3504 | u32 *first_hash) | ||
| 3505 | { | ||
| 3506 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3507 | int ret, credits = 2 * blk_per_bucket; | ||
| 3508 | |||
| 3509 | BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize); | ||
| 3510 | |||
| 3511 | ret = ocfs2_extend_trans(handle, credits); | ||
| 3512 | if (ret) { | ||
| 3513 | mlog_errno(ret); | ||
| 3514 | return ret; | ||
| 3515 | } | ||
| 3516 | |||
| 3517 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
| 3518 | return ocfs2_half_xattr_bucket(inode, handle, prev_blk, | ||
| 3519 | new_blk, first_hash, 1); | ||
| 3520 | } | ||
| 3521 | |||
| 3522 | /* | ||
| 3523 | * Move some xattrs from the old cluster to the new one since they are not | ||
| 3524 | * contiguous in ocfs2 xattr tree. | ||
| 3525 | * | ||
| 3526 | * new_blk starts a new separate cluster, and we will move some xattrs from | ||
| 3527 | * prev_blk to it. v_start will be set as the first name hash value in this | ||
| 3528 | * new cluster so that it can be used as e_cpos during tree insertion and | ||
| 3529 | * don't collide with our original b-tree operations. first_bh and header_bh | ||
| 3530 | * will also be updated since they will be used in ocfs2_extend_xattr_bucket | ||
| 3531 | * to extend the insert bucket. | ||
| 3532 | * | ||
| 3533 | * The problem is how much xattr should we move to the new one and when should | ||
| 3534 | * we update first_bh and header_bh? | ||
| 3535 | * 1. If cluster size > bucket size, that means the previous cluster has more | ||
| 3536 | * than 1 bucket, so just move half nums of bucket into the new cluster and | ||
| 3537 | * update the first_bh and header_bh if the insert bucket has been moved | ||
| 3538 | * to the new cluster. | ||
| 3539 | * 2. If cluster_size == bucket_size: | ||
| 3540 | * a) If the previous extent rec has more than one cluster and the insert | ||
| 3541 | * place isn't in the last cluster, copy the entire last cluster to the | ||
| 3542 | * new one. This time, we don't need to upate the first_bh and header_bh | ||
| 3543 | * since they will not be moved into the new cluster. | ||
| 3544 | * b) Otherwise, move the bottom half of the xattrs in the last cluster into | ||
| 3545 | * the new one. And we set the extend flag to zero if the insert place is | ||
| 3546 | * moved into the new allocated cluster since no extend is needed. | ||
| 3547 | */ | ||
| 3548 | static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode, | ||
| 3549 | handle_t *handle, | ||
| 3550 | struct buffer_head **first_bh, | ||
| 3551 | struct buffer_head **header_bh, | ||
| 3552 | u64 new_blk, | ||
| 3553 | u64 prev_blk, | ||
| 3554 | u32 prev_clusters, | ||
| 3555 | u32 *v_start, | ||
| 3556 | int *extend) | ||
| 3557 | { | ||
| 3558 | int ret = 0; | ||
| 3559 | int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 3560 | |||
| 3561 | mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n", | ||
| 3562 | prev_blk, prev_clusters, new_blk); | ||
| 3563 | |||
| 3564 | if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) | ||
| 3565 | ret = ocfs2_mv_xattr_bucket_cross_cluster(inode, | ||
| 3566 | handle, | ||
| 3567 | first_bh, | ||
| 3568 | header_bh, | ||
| 3569 | new_blk, | ||
| 3570 | prev_blk, | ||
| 3571 | prev_clusters, | ||
| 3572 | v_start); | ||
| 3573 | else { | ||
| 3574 | u64 last_blk = prev_blk + bpc * (prev_clusters - 1); | ||
| 3575 | |||
| 3576 | if (prev_clusters > 1 && (*header_bh)->b_blocknr != last_blk) | ||
| 3577 | ret = ocfs2_cp_xattr_cluster(inode, handle, *first_bh, | ||
| 3578 | last_blk, new_blk, | ||
| 3579 | v_start); | ||
| 3580 | else { | ||
| 3581 | ret = ocfs2_half_xattr_cluster(inode, handle, | ||
| 3582 | last_blk, new_blk, | ||
| 3583 | v_start); | ||
| 3584 | |||
| 3585 | if ((*header_bh)->b_blocknr == last_blk && extend) | ||
| 3586 | *extend = 0; | ||
| 3587 | } | ||
| 3588 | } | ||
| 3589 | |||
| 3590 | return ret; | ||
| 3591 | } | ||
| 3592 | |||
| 3593 | /* | ||
| 3594 | * Add a new cluster for xattr storage. | ||
| 3595 | * | ||
| 3596 | * If the new cluster is contiguous with the previous one, it will be | ||
| 3597 | * appended to the same extent record, and num_clusters will be updated. | ||
| 3598 | * If not, we will insert a new extent for it and move some xattrs in | ||
| 3599 | * the last cluster into the new allocated one. | ||
| 3600 | * We also need to limit the maximum size of a btree leaf, otherwise we'll | ||
| 3601 | * lose the benefits of hashing because we'll have to search large leaves. | ||
| 3602 | * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize, | ||
| 3603 | * if it's bigger). | ||
| 3604 | * | ||
| 3605 | * first_bh is the first block of the previous extent rec and header_bh | ||
| 3606 | * indicates the bucket we will insert the new xattrs. They will be updated | ||
| 3607 | * when the header_bh is moved into the new cluster. | ||
| 3608 | */ | ||
| 3609 | static int ocfs2_add_new_xattr_cluster(struct inode *inode, | ||
| 3610 | struct buffer_head *root_bh, | ||
| 3611 | struct buffer_head **first_bh, | ||
| 3612 | struct buffer_head **header_bh, | ||
| 3613 | u32 *num_clusters, | ||
| 3614 | u32 prev_cpos, | ||
| 3615 | u64 prev_blkno, | ||
| 3616 | int *extend) | ||
| 3617 | { | ||
| 3618 | int ret, credits; | ||
| 3619 | u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1); | ||
| 3620 | u32 prev_clusters = *num_clusters; | ||
| 3621 | u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0; | ||
| 3622 | u64 block; | ||
| 3623 | handle_t *handle = NULL; | ||
| 3624 | struct ocfs2_alloc_context *data_ac = NULL; | ||
| 3625 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 3626 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 3627 | struct ocfs2_extent_tree et; | ||
| 3628 | |||
| 3629 | mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, " | ||
| 3630 | "previous xattr blkno = %llu\n", | ||
| 3631 | (unsigned long long)OCFS2_I(inode)->ip_blkno, | ||
| 3632 | prev_cpos, prev_blkno); | ||
| 3633 | |||
| 3634 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
| 3635 | |||
| 3636 | ret = ocfs2_lock_allocators(inode, &et, clusters_to_add, 0, | ||
| 3637 | &data_ac, &meta_ac); | ||
| 3638 | if (ret) { | ||
| 3639 | mlog_errno(ret); | ||
| 3640 | goto leave; | ||
| 3641 | } | ||
| 3642 | |||
| 3643 | credits = ocfs2_calc_extend_credits(osb->sb, et.et_root_el, | ||
| 3644 | clusters_to_add); | ||
| 3645 | handle = ocfs2_start_trans(osb, credits); | ||
| 3646 | if (IS_ERR(handle)) { | ||
| 3647 | ret = PTR_ERR(handle); | ||
| 3648 | handle = NULL; | ||
| 3649 | mlog_errno(ret); | ||
| 3650 | goto leave; | ||
| 3651 | } | ||
| 3652 | |||
| 3653 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 3654 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3655 | if (ret < 0) { | ||
| 3656 | mlog_errno(ret); | ||
| 3657 | goto leave; | ||
| 3658 | } | ||
| 3659 | |||
| 3660 | ret = __ocfs2_claim_clusters(osb, handle, data_ac, 1, | ||
| 3661 | clusters_to_add, &bit_off, &num_bits); | ||
| 3662 | if (ret < 0) { | ||
| 3663 | if (ret != -ENOSPC) | ||
| 3664 | mlog_errno(ret); | ||
| 3665 | goto leave; | ||
| 3666 | } | ||
| 3667 | |||
| 3668 | BUG_ON(num_bits > clusters_to_add); | ||
| 3669 | |||
| 3670 | block = ocfs2_clusters_to_blocks(osb->sb, bit_off); | ||
| 3671 | mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n", | ||
| 3672 | num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno); | ||
| 3673 | |||
| 3674 | if (prev_blkno + prev_clusters * bpc == block && | ||
| 3675 | (prev_clusters + num_bits) << osb->s_clustersize_bits <= | ||
| 3676 | OCFS2_MAX_XATTR_TREE_LEAF_SIZE) { | ||
| 3677 | /* | ||
| 3678 | * If this cluster is contiguous with the old one and | ||
| 3679 | * adding this new cluster, we don't surpass the limit of | ||
| 3680 | * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be | ||
| 3681 | * initialized and used like other buckets in the previous | ||
| 3682 | * cluster. | ||
| 3683 | * So add it as a contiguous one. The caller will handle | ||
| 3684 | * its init process. | ||
| 3685 | */ | ||
| 3686 | v_start = prev_cpos + prev_clusters; | ||
| 3687 | *num_clusters = prev_clusters + num_bits; | ||
| 3688 | mlog(0, "Add contiguous %u clusters to previous extent rec.\n", | ||
| 3689 | num_bits); | ||
| 3690 | } else { | ||
| 3691 | ret = ocfs2_adjust_xattr_cross_cluster(inode, | ||
| 3692 | handle, | ||
| 3693 | first_bh, | ||
| 3694 | header_bh, | ||
| 3695 | block, | ||
| 3696 | prev_blkno, | ||
| 3697 | prev_clusters, | ||
| 3698 | &v_start, | ||
| 3699 | extend); | ||
| 3700 | if (ret) { | ||
| 3701 | mlog_errno(ret); | ||
| 3702 | goto leave; | ||
| 3703 | } | ||
| 3704 | } | ||
| 3705 | |||
| 3706 | if (handle->h_buffer_credits < credits) { | ||
| 3707 | /* | ||
| 3708 | * The journal has been restarted before, and don't | ||
| 3709 | * have enough space for the insertion, so extend it | ||
| 3710 | * here. | ||
| 3711 | */ | ||
| 3712 | ret = ocfs2_extend_trans(handle, credits); | ||
| 3713 | if (ret) { | ||
| 3714 | mlog_errno(ret); | ||
| 3715 | goto leave; | ||
| 3716 | } | ||
| 3717 | } | ||
| 3718 | mlog(0, "Insert %u clusters at block %llu for xattr at %u\n", | ||
| 3719 | num_bits, block, v_start); | ||
| 3720 | ret = ocfs2_insert_extent(osb, handle, inode, &et, v_start, block, | ||
| 3721 | num_bits, 0, meta_ac); | ||
| 3722 | if (ret < 0) { | ||
| 3723 | mlog_errno(ret); | ||
| 3724 | goto leave; | ||
| 3725 | } | ||
| 3726 | |||
| 3727 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
| 3728 | if (ret < 0) { | ||
| 3729 | mlog_errno(ret); | ||
| 3730 | goto leave; | ||
| 3731 | } | ||
| 3732 | |||
| 3733 | leave: | ||
| 3734 | if (handle) | ||
| 3735 | ocfs2_commit_trans(osb, handle); | ||
| 3736 | if (data_ac) | ||
| 3737 | ocfs2_free_alloc_context(data_ac); | ||
| 3738 | if (meta_ac) | ||
| 3739 | ocfs2_free_alloc_context(meta_ac); | ||
| 3740 | |||
| 3741 | return ret; | ||
| 3742 | } | ||
| 3743 | |||
| 3744 | /* | ||
| 3745 | * Extend a new xattr bucket and move xattrs to the end one by one until | ||
| 3746 | * We meet with start_bh. Only move half of the xattrs to the bucket after it. | ||
| 3747 | */ | ||
| 3748 | static int ocfs2_extend_xattr_bucket(struct inode *inode, | ||
| 3749 | struct buffer_head *first_bh, | ||
| 3750 | struct buffer_head *start_bh, | ||
| 3751 | u32 num_clusters) | ||
| 3752 | { | ||
| 3753 | int ret, credits; | ||
| 3754 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 3755 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 3756 | u64 start_blk = start_bh->b_blocknr, end_blk; | ||
| 3757 | u32 num_buckets = num_clusters * ocfs2_xattr_buckets_per_cluster(osb); | ||
| 3758 | handle_t *handle; | ||
| 3759 | struct ocfs2_xattr_header *first_xh = | ||
| 3760 | (struct ocfs2_xattr_header *)first_bh->b_data; | ||
| 3761 | u16 bucket = le16_to_cpu(first_xh->xh_num_buckets); | ||
| 3762 | |||
| 3763 | mlog(0, "extend xattr bucket in %llu, xattr extend rec starting " | ||
| 3764 | "from %llu, len = %u\n", start_blk, | ||
| 3765 | (unsigned long long)first_bh->b_blocknr, num_clusters); | ||
| 3766 | |||
| 3767 | BUG_ON(bucket >= num_buckets); | ||
| 3768 | |||
| 3769 | end_blk = first_bh->b_blocknr + (bucket - 1) * blk_per_bucket; | ||
| 3770 | |||
| 3771 | /* | ||
| 3772 | * We will touch all the buckets after the start_bh(include it). | ||
| 3773 | * Add one more bucket and modify the first_bh. | ||
| 3774 | */ | ||
| 3775 | credits = end_blk - start_blk + 2 * blk_per_bucket + 1; | ||
| 3776 | handle = ocfs2_start_trans(osb, credits); | ||
| 3777 | if (IS_ERR(handle)) { | ||
| 3778 | ret = PTR_ERR(handle); | ||
| 3779 | handle = NULL; | ||
| 3780 | mlog_errno(ret); | ||
| 3781 | goto out; | ||
| 3782 | } | ||
| 3783 | |||
| 3784 | ret = ocfs2_journal_access(handle, inode, first_bh, | ||
| 3785 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 3786 | if (ret) { | ||
| 3787 | mlog_errno(ret); | ||
| 3788 | goto commit; | ||
| 3789 | } | ||
| 3790 | |||
| 3791 | while (end_blk != start_blk) { | ||
| 3792 | ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk, | ||
| 3793 | end_blk + blk_per_bucket, 0); | ||
| 3794 | if (ret) | ||
| 3795 | goto commit; | ||
| 3796 | end_blk -= blk_per_bucket; | ||
| 3797 | } | ||
| 3798 | |||
| 3799 | /* Move half of the xattr in start_blk to the next bucket. */ | ||
| 3800 | ret = ocfs2_half_xattr_bucket(inode, handle, start_blk, | ||
| 3801 | start_blk + blk_per_bucket, NULL, 0); | ||
| 3802 | |||
| 3803 | le16_add_cpu(&first_xh->xh_num_buckets, 1); | ||
| 3804 | ocfs2_journal_dirty(handle, first_bh); | ||
| 3805 | |||
| 3806 | commit: | ||
| 3807 | ocfs2_commit_trans(osb, handle); | ||
| 3808 | out: | ||
| 3809 | return ret; | ||
| 3810 | } | ||
| 3811 | |||
| 3812 | /* | ||
| 3813 | * Add new xattr bucket in an extent record and adjust the buckets accordingly. | ||
| 3814 | * xb_bh is the ocfs2_xattr_block. | ||
| 3815 | * We will move all the buckets starting from header_bh to the next place. As | ||
| 3816 | * for this one, half num of its xattrs will be moved to the next one. | ||
| 3817 | * | ||
| 3818 | * We will allocate a new cluster if current cluster is full and adjust | ||
| 3819 | * header_bh and first_bh if the insert place is moved to the new cluster. | ||
| 3820 | */ | ||
| 3821 | static int ocfs2_add_new_xattr_bucket(struct inode *inode, | ||
| 3822 | struct buffer_head *xb_bh, | ||
| 3823 | struct buffer_head *header_bh) | ||
| 3824 | { | ||
| 3825 | struct ocfs2_xattr_header *first_xh = NULL; | ||
| 3826 | struct buffer_head *first_bh = NULL; | ||
| 3827 | struct ocfs2_xattr_block *xb = | ||
| 3828 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
| 3829 | struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root; | ||
| 3830 | struct ocfs2_extent_list *el = &xb_root->xt_list; | ||
| 3831 | struct ocfs2_xattr_header *xh = | ||
| 3832 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
| 3833 | u32 name_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash); | ||
| 3834 | struct super_block *sb = inode->i_sb; | ||
| 3835 | struct ocfs2_super *osb = OCFS2_SB(sb); | ||
| 3836 | int ret, num_buckets, extend = 1; | ||
| 3837 | u64 p_blkno; | ||
| 3838 | u32 e_cpos, num_clusters; | ||
| 3839 | |||
| 3840 | mlog(0, "Add new xattr bucket starting form %llu\n", | ||
| 3841 | (unsigned long long)header_bh->b_blocknr); | ||
| 3842 | |||
| 3843 | /* | ||
| 3844 | * Add refrence for header_bh here because it may be | ||
| 3845 | * changed in ocfs2_add_new_xattr_cluster and we need | ||
| 3846 | * to free it in the end. | ||
| 3847 | */ | ||
| 3848 | get_bh(header_bh); | ||
| 3849 | |||
| 3850 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos, | ||
| 3851 | &num_clusters, el); | ||
| 3852 | if (ret) { | ||
| 3853 | mlog_errno(ret); | ||
| 3854 | goto out; | ||
| 3855 | } | ||
| 3856 | |||
| 3857 | ret = ocfs2_read_block(inode, p_blkno, &first_bh); | ||
| 3858 | if (ret) { | ||
| 3859 | mlog_errno(ret); | ||
| 3860 | goto out; | ||
| 3861 | } | ||
| 3862 | |||
| 3863 | num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters; | ||
| 3864 | first_xh = (struct ocfs2_xattr_header *)first_bh->b_data; | ||
| 3865 | |||
| 3866 | if (num_buckets == le16_to_cpu(first_xh->xh_num_buckets)) { | ||
| 3867 | ret = ocfs2_add_new_xattr_cluster(inode, | ||
| 3868 | xb_bh, | ||
| 3869 | &first_bh, | ||
| 3870 | &header_bh, | ||
| 3871 | &num_clusters, | ||
| 3872 | e_cpos, | ||
| 3873 | p_blkno, | ||
| 3874 | &extend); | ||
| 3875 | if (ret) { | ||
| 3876 | mlog_errno(ret); | ||
| 3877 | goto out; | ||
| 3878 | } | ||
| 3879 | } | ||
| 3880 | |||
| 3881 | if (extend) | ||
| 3882 | ret = ocfs2_extend_xattr_bucket(inode, | ||
| 3883 | first_bh, | ||
| 3884 | header_bh, | ||
| 3885 | num_clusters); | ||
| 3886 | if (ret) | ||
| 3887 | mlog_errno(ret); | ||
| 3888 | out: | ||
| 3889 | brelse(first_bh); | ||
| 3890 | brelse(header_bh); | ||
| 3891 | return ret; | ||
| 3892 | } | ||
| 3893 | |||
| 3894 | static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode, | ||
| 3895 | struct ocfs2_xattr_bucket *bucket, | ||
| 3896 | int offs) | ||
| 3897 | { | ||
| 3898 | int block_off = offs >> inode->i_sb->s_blocksize_bits; | ||
| 3899 | |||
| 3900 | offs = offs % inode->i_sb->s_blocksize; | ||
| 3901 | return bucket->bhs[block_off]->b_data + offs; | ||
| 3902 | } | ||
| 3903 | |||
| 3904 | /* | ||
| 3905 | * Handle the normal xattr set, including replace, delete and new. | ||
| 3906 | * | ||
| 3907 | * Note: "local" indicates the real data's locality. So we can't | ||
| 3908 | * just its bucket locality by its length. | ||
| 3909 | */ | ||
| 3910 | static void ocfs2_xattr_set_entry_normal(struct inode *inode, | ||
| 3911 | struct ocfs2_xattr_info *xi, | ||
| 3912 | struct ocfs2_xattr_search *xs, | ||
| 3913 | u32 name_hash, | ||
| 3914 | int local) | ||
| 3915 | { | ||
| 3916 | struct ocfs2_xattr_entry *last, *xe; | ||
| 3917 | int name_len = strlen(xi->name); | ||
| 3918 | struct ocfs2_xattr_header *xh = xs->header; | ||
| 3919 | u16 count = le16_to_cpu(xh->xh_count), start; | ||
| 3920 | size_t blocksize = inode->i_sb->s_blocksize; | ||
| 3921 | char *val; | ||
| 3922 | size_t offs, size, new_size; | ||
| 3923 | |||
| 3924 | last = &xh->xh_entries[count]; | ||
| 3925 | if (!xs->not_found) { | ||
| 3926 | xe = xs->here; | ||
| 3927 | offs = le16_to_cpu(xe->xe_name_offset); | ||
| 3928 | if (ocfs2_xattr_is_local(xe)) | ||
| 3929 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 3930 | OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
| 3931 | else | ||
| 3932 | size = OCFS2_XATTR_SIZE(name_len) + | ||
| 3933 | OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
| 3934 | |||
| 3935 | /* | ||
| 3936 | * If the new value will be stored outside, xi->value has been | ||
| 3937 | * initalized as an empty ocfs2_xattr_value_root, and the same | ||
| 3938 | * goes with xi->value_len, so we can set new_size safely here. | ||
| 3939 | * See ocfs2_xattr_set_in_bucket. | ||
| 3940 | */ | ||
| 3941 | new_size = OCFS2_XATTR_SIZE(name_len) + | ||
| 3942 | OCFS2_XATTR_SIZE(xi->value_len); | ||
| 3943 | |||
| 3944 | le16_add_cpu(&xh->xh_name_value_len, -size); | ||
| 3945 | if (xi->value) { | ||
| 3946 | if (new_size > size) | ||
| 3947 | goto set_new_name_value; | ||
| 3948 | |||
| 3949 | /* Now replace the old value with new one. */ | ||
| 3950 | if (local) | ||
| 3951 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
| 3952 | else | ||
| 3953 | xe->xe_value_size = 0; | ||
| 3954 | |||
| 3955 | val = ocfs2_xattr_bucket_get_val(inode, | ||
| 3956 | &xs->bucket, offs); | ||
| 3957 | memset(val + OCFS2_XATTR_SIZE(name_len), 0, | ||
| 3958 | size - OCFS2_XATTR_SIZE(name_len)); | ||
| 3959 | if (OCFS2_XATTR_SIZE(xi->value_len) > 0) | ||
| 3960 | memcpy(val + OCFS2_XATTR_SIZE(name_len), | ||
| 3961 | xi->value, xi->value_len); | ||
| 3962 | |||
| 3963 | le16_add_cpu(&xh->xh_name_value_len, new_size); | ||
| 3964 | ocfs2_xattr_set_local(xe, local); | ||
| 3965 | return; | ||
| 3966 | } else { | ||
| 3967 | /* | ||
| 3968 | * Remove the old entry if there is more than one. | ||
| 3969 | * We don't remove the last entry so that we can | ||
| 3970 | * use it to indicate the hash value of the empty | ||
| 3971 | * bucket. | ||
| 3972 | */ | ||
| 3973 | last -= 1; | ||
| 3974 | le16_add_cpu(&xh->xh_count, -1); | ||
| 3975 | if (xh->xh_count) { | ||
| 3976 | memmove(xe, xe + 1, | ||
| 3977 | (void *)last - (void *)xe); | ||
| 3978 | memset(last, 0, | ||
| 3979 | sizeof(struct ocfs2_xattr_entry)); | ||
| 3980 | } else | ||
| 3981 | xh->xh_free_start = | ||
| 3982 | cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE); | ||
| 3983 | |||
| 3984 | return; | ||
| 3985 | } | ||
| 3986 | } else { | ||
| 3987 | /* find a new entry for insert. */ | ||
| 3988 | int low = 0, high = count - 1, tmp; | ||
| 3989 | struct ocfs2_xattr_entry *tmp_xe; | ||
| 3990 | |||
| 3991 | while (low <= high && count) { | ||
| 3992 | tmp = (low + high) / 2; | ||
| 3993 | tmp_xe = &xh->xh_entries[tmp]; | ||
| 3994 | |||
| 3995 | if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash)) | ||
| 3996 | low = tmp + 1; | ||
| 3997 | else if (name_hash < | ||
| 3998 | le32_to_cpu(tmp_xe->xe_name_hash)) | ||
| 3999 | high = tmp - 1; | ||
| 4000 | else { | ||
| 4001 | low = tmp; | ||
| 4002 | break; | ||
| 4003 | } | ||
| 4004 | } | ||
| 4005 | |||
| 4006 | xe = &xh->xh_entries[low]; | ||
| 4007 | if (low != count) | ||
| 4008 | memmove(xe + 1, xe, (void *)last - (void *)xe); | ||
| 4009 | |||
| 4010 | le16_add_cpu(&xh->xh_count, 1); | ||
| 4011 | memset(xe, 0, sizeof(struct ocfs2_xattr_entry)); | ||
| 4012 | xe->xe_name_hash = cpu_to_le32(name_hash); | ||
| 4013 | xe->xe_name_len = name_len; | ||
| 4014 | ocfs2_xattr_set_type(xe, xi->name_index); | ||
| 4015 | } | ||
| 4016 | |||
| 4017 | set_new_name_value: | ||
| 4018 | /* Insert the new name+value. */ | ||
| 4019 | size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(xi->value_len); | ||
| 4020 | |||
| 4021 | /* | ||
| 4022 | * We must make sure that the name/value pair | ||
| 4023 | * exists in the same block. | ||
| 4024 | */ | ||
| 4025 | offs = le16_to_cpu(xh->xh_free_start); | ||
| 4026 | start = offs - size; | ||
| 4027 | |||
| 4028 | if (start >> inode->i_sb->s_blocksize_bits != | ||
| 4029 | (offs - 1) >> inode->i_sb->s_blocksize_bits) { | ||
| 4030 | offs = offs - offs % blocksize; | ||
| 4031 | xh->xh_free_start = cpu_to_le16(offs); | ||
| 4032 | } | ||
| 4033 | |||
| 4034 | val = ocfs2_xattr_bucket_get_val(inode, | ||
| 4035 | &xs->bucket, offs - size); | ||
| 4036 | xe->xe_name_offset = cpu_to_le16(offs - size); | ||
| 4037 | |||
| 4038 | memset(val, 0, size); | ||
| 4039 | memcpy(val, xi->name, name_len); | ||
| 4040 | memcpy(val + OCFS2_XATTR_SIZE(name_len), xi->value, xi->value_len); | ||
| 4041 | |||
| 4042 | xe->xe_value_size = cpu_to_le64(xi->value_len); | ||
| 4043 | ocfs2_xattr_set_local(xe, local); | ||
| 4044 | xs->here = xe; | ||
| 4045 | le16_add_cpu(&xh->xh_free_start, -size); | ||
| 4046 | le16_add_cpu(&xh->xh_name_value_len, size); | ||
| 4047 | |||
| 4048 | return; | ||
| 4049 | } | ||
| 4050 | |||
| 4051 | static int ocfs2_xattr_bucket_handle_journal(struct inode *inode, | ||
| 4052 | handle_t *handle, | ||
| 4053 | struct ocfs2_xattr_search *xs, | ||
| 4054 | struct buffer_head **bhs, | ||
| 4055 | u16 bh_num) | ||
| 4056 | { | ||
| 4057 | int ret = 0, off, block_off; | ||
| 4058 | struct ocfs2_xattr_entry *xe = xs->here; | ||
| 4059 | |||
| 4060 | /* | ||
| 4061 | * First calculate all the blocks we should journal_access | ||
| 4062 | * and journal_dirty. The first block should always be touched. | ||
| 4063 | */ | ||
| 4064 | ret = ocfs2_journal_dirty(handle, bhs[0]); | ||
| 4065 | if (ret) | ||
| 4066 | mlog_errno(ret); | ||
| 4067 | |||
| 4068 | /* calc the data. */ | ||
| 4069 | off = le16_to_cpu(xe->xe_name_offset); | ||
| 4070 | block_off = off >> inode->i_sb->s_blocksize_bits; | ||
| 4071 | ret = ocfs2_journal_dirty(handle, bhs[block_off]); | ||
| 4072 | if (ret) | ||
| 4073 | mlog_errno(ret); | ||
| 4074 | |||
| 4075 | return ret; | ||
| 4076 | } | ||
| 4077 | |||
| 4078 | /* | ||
| 4079 | * Set the xattr entry in the specified bucket. | ||
| 4080 | * The bucket is indicated by xs->bucket and it should have the enough | ||
| 4081 | * space for the xattr insertion. | ||
| 4082 | */ | ||
| 4083 | static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode, | ||
| 4084 | struct ocfs2_xattr_info *xi, | ||
| 4085 | struct ocfs2_xattr_search *xs, | ||
| 4086 | u32 name_hash, | ||
| 4087 | int local) | ||
| 4088 | { | ||
| 4089 | int i, ret; | ||
| 4090 | handle_t *handle = NULL; | ||
| 4091 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 4092 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4093 | |||
| 4094 | mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n", | ||
| 4095 | (unsigned long)xi->value_len, xi->name_index, | ||
| 4096 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr); | ||
| 4097 | |||
| 4098 | if (!xs->bucket.bhs[1]) { | ||
| 4099 | ret = ocfs2_read_blocks(inode, | ||
| 4100 | xs->bucket.bhs[0]->b_blocknr + 1, | ||
| 4101 | blk_per_bucket - 1, &xs->bucket.bhs[1], | ||
| 4102 | 0); | ||
| 4103 | if (ret) { | ||
| 4104 | mlog_errno(ret); | ||
| 4105 | goto out; | ||
| 4106 | } | ||
| 4107 | } | ||
| 4108 | |||
| 4109 | handle = ocfs2_start_trans(osb, blk_per_bucket); | ||
| 4110 | if (IS_ERR(handle)) { | ||
| 4111 | ret = PTR_ERR(handle); | ||
| 4112 | handle = NULL; | ||
| 4113 | mlog_errno(ret); | ||
| 4114 | goto out; | ||
| 4115 | } | ||
| 4116 | |||
| 4117 | for (i = 0; i < blk_per_bucket; i++) { | ||
| 4118 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[i], | ||
| 4119 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 4120 | if (ret < 0) { | ||
| 4121 | mlog_errno(ret); | ||
| 4122 | goto out; | ||
| 4123 | } | ||
| 4124 | } | ||
| 4125 | |||
| 4126 | ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local); | ||
| 4127 | |||
| 4128 | /*Only dirty the blocks we have touched in set xattr. */ | ||
| 4129 | ret = ocfs2_xattr_bucket_handle_journal(inode, handle, xs, | ||
| 4130 | xs->bucket.bhs, blk_per_bucket); | ||
| 4131 | if (ret) | ||
| 4132 | mlog_errno(ret); | ||
| 4133 | out: | ||
| 4134 | ocfs2_commit_trans(osb, handle); | ||
| 4135 | |||
| 4136 | return ret; | ||
| 4137 | } | ||
| 4138 | |||
| 4139 | static int ocfs2_xattr_value_update_size(struct inode *inode, | ||
| 4140 | struct buffer_head *xe_bh, | ||
| 4141 | struct ocfs2_xattr_entry *xe, | ||
| 4142 | u64 new_size) | ||
| 4143 | { | ||
| 4144 | int ret; | ||
| 4145 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4146 | handle_t *handle = NULL; | ||
| 4147 | |||
| 4148 | handle = ocfs2_start_trans(osb, 1); | ||
| 4149 | if (handle == NULL) { | ||
| 4150 | ret = -ENOMEM; | ||
| 4151 | mlog_errno(ret); | ||
| 4152 | goto out; | ||
| 4153 | } | ||
| 4154 | |||
| 4155 | ret = ocfs2_journal_access(handle, inode, xe_bh, | ||
| 4156 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 4157 | if (ret < 0) { | ||
| 4158 | mlog_errno(ret); | ||
| 4159 | goto out_commit; | ||
| 4160 | } | ||
| 4161 | |||
| 4162 | xe->xe_value_size = cpu_to_le64(new_size); | ||
| 4163 | |||
| 4164 | ret = ocfs2_journal_dirty(handle, xe_bh); | ||
| 4165 | if (ret < 0) | ||
| 4166 | mlog_errno(ret); | ||
| 4167 | |||
| 4168 | out_commit: | ||
| 4169 | ocfs2_commit_trans(osb, handle); | ||
| 4170 | out: | ||
| 4171 | return ret; | ||
| 4172 | } | ||
| 4173 | |||
| 4174 | /* | ||
| 4175 | * Truncate the specified xe_off entry in xattr bucket. | ||
| 4176 | * bucket is indicated by header_bh and len is the new length. | ||
| 4177 | * Both the ocfs2_xattr_value_root and the entry will be updated here. | ||
| 4178 | * | ||
| 4179 | * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed. | ||
| 4180 | */ | ||
| 4181 | static int ocfs2_xattr_bucket_value_truncate(struct inode *inode, | ||
| 4182 | struct buffer_head *header_bh, | ||
| 4183 | int xe_off, | ||
| 4184 | int len) | ||
| 4185 | { | ||
| 4186 | int ret, offset; | ||
| 4187 | u64 value_blk; | ||
| 4188 | struct buffer_head *value_bh = NULL; | ||
| 4189 | struct ocfs2_xattr_value_root *xv; | ||
| 4190 | struct ocfs2_xattr_entry *xe; | ||
| 4191 | struct ocfs2_xattr_header *xh = | ||
| 4192 | (struct ocfs2_xattr_header *)header_bh->b_data; | ||
| 4193 | size_t blocksize = inode->i_sb->s_blocksize; | ||
| 4194 | |||
| 4195 | xe = &xh->xh_entries[xe_off]; | ||
| 4196 | |||
| 4197 | BUG_ON(!xe || ocfs2_xattr_is_local(xe)); | ||
| 4198 | |||
| 4199 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
| 4200 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
| 4201 | |||
| 4202 | value_blk = offset / blocksize; | ||
| 4203 | |||
| 4204 | /* We don't allow ocfs2_xattr_value to be stored in different block. */ | ||
| 4205 | BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize); | ||
| 4206 | value_blk += header_bh->b_blocknr; | ||
| 4207 | |||
| 4208 | ret = ocfs2_read_block(inode, value_blk, &value_bh); | ||
| 4209 | if (ret) { | ||
| 4210 | mlog_errno(ret); | ||
| 4211 | goto out; | ||
| 4212 | } | ||
| 4213 | |||
| 4214 | xv = (struct ocfs2_xattr_value_root *) | ||
| 4215 | (value_bh->b_data + offset % blocksize); | ||
| 4216 | |||
| 4217 | mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n", | ||
| 4218 | xe_off, (unsigned long long)header_bh->b_blocknr, len); | ||
| 4219 | ret = ocfs2_xattr_value_truncate(inode, value_bh, xv, len); | ||
| 4220 | if (ret) { | ||
| 4221 | mlog_errno(ret); | ||
| 4222 | goto out; | ||
| 4223 | } | ||
| 4224 | |||
| 4225 | ret = ocfs2_xattr_value_update_size(inode, header_bh, xe, len); | ||
| 4226 | if (ret) { | ||
| 4227 | mlog_errno(ret); | ||
| 4228 | goto out; | ||
| 4229 | } | ||
| 4230 | |||
| 4231 | out: | ||
| 4232 | brelse(value_bh); | ||
| 4233 | return ret; | ||
| 4234 | } | ||
| 4235 | |||
| 4236 | static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode, | ||
| 4237 | struct ocfs2_xattr_search *xs, | ||
| 4238 | int len) | ||
| 4239 | { | ||
| 4240 | int ret, offset; | ||
| 4241 | struct ocfs2_xattr_entry *xe = xs->here; | ||
| 4242 | struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base; | ||
| 4243 | |||
| 4244 | BUG_ON(!xs->bucket.bhs[0] || !xe || ocfs2_xattr_is_local(xe)); | ||
| 4245 | |||
| 4246 | offset = xe - xh->xh_entries; | ||
| 4247 | ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket.bhs[0], | ||
| 4248 | offset, len); | ||
| 4249 | if (ret) | ||
| 4250 | mlog_errno(ret); | ||
| 4251 | |||
| 4252 | return ret; | ||
| 4253 | } | ||
| 4254 | |||
| 4255 | static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode, | ||
| 4256 | struct ocfs2_xattr_search *xs, | ||
| 4257 | char *val, | ||
| 4258 | int value_len) | ||
| 4259 | { | ||
| 4260 | int offset; | ||
| 4261 | struct ocfs2_xattr_value_root *xv; | ||
| 4262 | struct ocfs2_xattr_entry *xe = xs->here; | ||
| 4263 | |||
| 4264 | BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe)); | ||
| 4265 | |||
| 4266 | offset = le16_to_cpu(xe->xe_name_offset) + | ||
| 4267 | OCFS2_XATTR_SIZE(xe->xe_name_len); | ||
| 4268 | |||
| 4269 | xv = (struct ocfs2_xattr_value_root *)(xs->base + offset); | ||
| 4270 | |||
| 4271 | return __ocfs2_xattr_set_value_outside(inode, xv, val, value_len); | ||
| 4272 | } | ||
| 4273 | |||
| 4274 | static int ocfs2_rm_xattr_cluster(struct inode *inode, | ||
| 4275 | struct buffer_head *root_bh, | ||
| 4276 | u64 blkno, | ||
| 4277 | u32 cpos, | ||
| 4278 | u32 len) | ||
| 4279 | { | ||
| 4280 | int ret; | ||
| 4281 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4282 | struct inode *tl_inode = osb->osb_tl_inode; | ||
| 4283 | handle_t *handle; | ||
| 4284 | struct ocfs2_xattr_block *xb = | ||
| 4285 | (struct ocfs2_xattr_block *)root_bh->b_data; | ||
| 4286 | struct ocfs2_alloc_context *meta_ac = NULL; | ||
| 4287 | struct ocfs2_cached_dealloc_ctxt dealloc; | ||
| 4288 | struct ocfs2_extent_tree et; | ||
| 4289 | |||
| 4290 | ocfs2_init_xattr_tree_extent_tree(&et, inode, root_bh); | ||
| 4291 | |||
| 4292 | ocfs2_init_dealloc_ctxt(&dealloc); | ||
| 4293 | |||
| 4294 | mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n", | ||
| 4295 | cpos, len, (unsigned long long)blkno); | ||
| 4296 | |||
| 4297 | ocfs2_remove_xattr_clusters_from_cache(inode, blkno, len); | ||
| 4298 | |||
| 4299 | ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac); | ||
| 4300 | if (ret) { | ||
| 4301 | mlog_errno(ret); | ||
| 4302 | return ret; | ||
| 4303 | } | ||
| 4304 | |||
| 4305 | mutex_lock(&tl_inode->i_mutex); | ||
| 4306 | |||
| 4307 | if (ocfs2_truncate_log_needs_flush(osb)) { | ||
| 4308 | ret = __ocfs2_flush_truncate_log(osb); | ||
| 4309 | if (ret < 0) { | ||
| 4310 | mlog_errno(ret); | ||
| 4311 | goto out; | ||
| 4312 | } | ||
| 4313 | } | ||
| 4314 | |||
| 4315 | handle = ocfs2_start_trans(osb, OCFS2_REMOVE_EXTENT_CREDITS); | ||
| 4316 | if (handle == NULL) { | ||
| 4317 | ret = -ENOMEM; | ||
| 4318 | mlog_errno(ret); | ||
| 4319 | goto out; | ||
| 4320 | } | ||
| 4321 | |||
| 4322 | ret = ocfs2_journal_access(handle, inode, root_bh, | ||
| 4323 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 4324 | if (ret) { | ||
| 4325 | mlog_errno(ret); | ||
| 4326 | goto out_commit; | ||
| 4327 | } | ||
| 4328 | |||
| 4329 | ret = ocfs2_remove_extent(inode, &et, cpos, len, handle, meta_ac, | ||
| 4330 | &dealloc); | ||
| 4331 | if (ret) { | ||
| 4332 | mlog_errno(ret); | ||
| 4333 | goto out_commit; | ||
| 4334 | } | ||
| 4335 | |||
| 4336 | le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len); | ||
| 4337 | |||
| 4338 | ret = ocfs2_journal_dirty(handle, root_bh); | ||
| 4339 | if (ret) { | ||
| 4340 | mlog_errno(ret); | ||
| 4341 | goto out_commit; | ||
| 4342 | } | ||
| 4343 | |||
| 4344 | ret = ocfs2_truncate_log_append(osb, handle, blkno, len); | ||
| 4345 | if (ret) | ||
| 4346 | mlog_errno(ret); | ||
| 4347 | |||
| 4348 | out_commit: | ||
| 4349 | ocfs2_commit_trans(osb, handle); | ||
| 4350 | out: | ||
| 4351 | ocfs2_schedule_truncate_log_flush(osb, 1); | ||
| 4352 | |||
| 4353 | mutex_unlock(&tl_inode->i_mutex); | ||
| 4354 | |||
| 4355 | if (meta_ac) | ||
| 4356 | ocfs2_free_alloc_context(meta_ac); | ||
| 4357 | |||
| 4358 | ocfs2_run_deallocs(osb, &dealloc); | ||
| 4359 | |||
| 4360 | return ret; | ||
| 4361 | } | ||
| 4362 | |||
| 4363 | static void ocfs2_xattr_bucket_remove_xs(struct inode *inode, | ||
| 4364 | struct ocfs2_xattr_search *xs) | ||
| 4365 | { | ||
| 4366 | handle_t *handle = NULL; | ||
| 4367 | struct ocfs2_xattr_header *xh = xs->bucket.xh; | ||
| 4368 | struct ocfs2_xattr_entry *last = &xh->xh_entries[ | ||
| 4369 | le16_to_cpu(xh->xh_count) - 1]; | ||
| 4370 | int ret = 0; | ||
| 4371 | |||
| 4372 | handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)), 1); | ||
| 4373 | if (IS_ERR(handle)) { | ||
| 4374 | ret = PTR_ERR(handle); | ||
| 4375 | mlog_errno(ret); | ||
| 4376 | return; | ||
| 4377 | } | ||
| 4378 | |||
| 4379 | ret = ocfs2_journal_access(handle, inode, xs->bucket.bhs[0], | ||
| 4380 | OCFS2_JOURNAL_ACCESS_WRITE); | ||
| 4381 | if (ret) { | ||
| 4382 | mlog_errno(ret); | ||
| 4383 | goto out_commit; | ||
| 4384 | } | ||
| 4385 | |||
| 4386 | /* Remove the old entry. */ | ||
| 4387 | memmove(xs->here, xs->here + 1, | ||
| 4388 | (void *)last - (void *)xs->here); | ||
| 4389 | memset(last, 0, sizeof(struct ocfs2_xattr_entry)); | ||
| 4390 | le16_add_cpu(&xh->xh_count, -1); | ||
| 4391 | |||
| 4392 | ret = ocfs2_journal_dirty(handle, xs->bucket.bhs[0]); | ||
| 4393 | if (ret < 0) | ||
| 4394 | mlog_errno(ret); | ||
| 4395 | out_commit: | ||
| 4396 | ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle); | ||
| 4397 | } | ||
| 4398 | |||
| 4399 | /* | ||
| 4400 | * Set the xattr name/value in the bucket specified in xs. | ||
| 4401 | * | ||
| 4402 | * As the new value in xi may be stored in the bucket or in an outside cluster, | ||
| 4403 | * we divide the whole process into 3 steps: | ||
| 4404 | * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket) | ||
| 4405 | * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs) | ||
| 4406 | * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside) | ||
| 4407 | * 4. If the clusters for the new outside value can't be allocated, we need | ||
| 4408 | * to free the xattr we allocated in set. | ||
| 4409 | */ | ||
| 4410 | static int ocfs2_xattr_set_in_bucket(struct inode *inode, | ||
| 4411 | struct ocfs2_xattr_info *xi, | ||
| 4412 | struct ocfs2_xattr_search *xs) | ||
| 4413 | { | ||
| 4414 | int ret, local = 1; | ||
| 4415 | size_t value_len; | ||
| 4416 | char *val = (char *)xi->value; | ||
| 4417 | struct ocfs2_xattr_entry *xe = xs->here; | ||
| 4418 | u32 name_hash = ocfs2_xattr_name_hash(inode, xi->name, | ||
| 4419 | strlen(xi->name)); | ||
| 4420 | |||
| 4421 | if (!xs->not_found && !ocfs2_xattr_is_local(xe)) { | ||
| 4422 | /* | ||
| 4423 | * We need to truncate the xattr storage first. | ||
| 4424 | * | ||
| 4425 | * If both the old and new value are stored to | ||
| 4426 | * outside block, we only need to truncate | ||
| 4427 | * the storage and then set the value outside. | ||
| 4428 | * | ||
| 4429 | * If the new value should be stored within block, | ||
| 4430 | * we should free all the outside block first and | ||
| 4431 | * the modification to the xattr block will be done | ||
| 4432 | * by following steps. | ||
| 4433 | */ | ||
| 4434 | if (xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
| 4435 | value_len = xi->value_len; | ||
| 4436 | else | ||
| 4437 | value_len = 0; | ||
| 4438 | |||
| 4439 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
| 4440 | value_len); | ||
| 4441 | if (ret) | ||
| 4442 | goto out; | ||
| 4443 | |||
| 4444 | if (value_len) | ||
| 4445 | goto set_value_outside; | ||
| 4446 | } | ||
| 4447 | |||
| 4448 | value_len = xi->value_len; | ||
| 4449 | /* So we have to handle the inside block change now. */ | ||
| 4450 | if (value_len > OCFS2_XATTR_INLINE_SIZE) { | ||
| 4451 | /* | ||
| 4452 | * If the new value will be stored outside of block, | ||
| 4453 | * initalize a new empty value root and insert it first. | ||
| 4454 | */ | ||
| 4455 | local = 0; | ||
| 4456 | xi->value = &def_xv; | ||
| 4457 | xi->value_len = OCFS2_XATTR_ROOT_SIZE; | ||
| 4458 | } | ||
| 4459 | |||
| 4460 | ret = ocfs2_xattr_set_entry_in_bucket(inode, xi, xs, name_hash, local); | ||
| 4461 | if (ret) { | ||
| 4462 | mlog_errno(ret); | ||
| 4463 | goto out; | ||
| 4464 | } | ||
| 4465 | |||
| 4466 | if (value_len <= OCFS2_XATTR_INLINE_SIZE) | ||
| 4467 | goto out; | ||
| 4468 | |||
| 4469 | /* allocate the space now for the outside block storage. */ | ||
| 4470 | ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs, | ||
| 4471 | value_len); | ||
| 4472 | if (ret) { | ||
| 4473 | mlog_errno(ret); | ||
| 4474 | |||
| 4475 | if (xs->not_found) { | ||
| 4476 | /* | ||
| 4477 | * We can't allocate enough clusters for outside | ||
| 4478 | * storage and we have allocated xattr already, | ||
| 4479 | * so need to remove it. | ||
| 4480 | */ | ||
| 4481 | ocfs2_xattr_bucket_remove_xs(inode, xs); | ||
| 4482 | } | ||
| 4483 | goto out; | ||
| 4484 | } | ||
| 4485 | |||
| 4486 | set_value_outside: | ||
| 4487 | ret = ocfs2_xattr_bucket_set_value_outside(inode, xs, val, value_len); | ||
| 4488 | out: | ||
| 4489 | return ret; | ||
| 4490 | } | ||
| 4491 | |||
| 4492 | /* check whether the xattr bucket is filled up with the same hash value. */ | ||
| 4493 | static int ocfs2_check_xattr_bucket_collision(struct inode *inode, | ||
| 4494 | struct ocfs2_xattr_bucket *bucket) | ||
| 4495 | { | ||
| 4496 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
| 4497 | |||
| 4498 | if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash == | ||
| 4499 | xh->xh_entries[0].xe_name_hash) { | ||
| 4500 | mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, " | ||
| 4501 | "hash = %u\n", | ||
| 4502 | (unsigned long long)bucket->bhs[0]->b_blocknr, | ||
| 4503 | le32_to_cpu(xh->xh_entries[0].xe_name_hash)); | ||
| 4504 | return -ENOSPC; | ||
| 4505 | } | ||
| 4506 | |||
| 4507 | return 0; | ||
| 4508 | } | ||
| 4509 | |||
| 4510 | static int ocfs2_xattr_set_entry_index_block(struct inode *inode, | ||
| 4511 | struct ocfs2_xattr_info *xi, | ||
| 4512 | struct ocfs2_xattr_search *xs) | ||
| 4513 | { | ||
| 4514 | struct ocfs2_xattr_header *xh; | ||
| 4515 | struct ocfs2_xattr_entry *xe; | ||
| 4516 | u16 count, header_size, xh_free_start; | ||
| 4517 | int i, free, max_free, need, old; | ||
| 4518 | size_t value_size = 0, name_len = strlen(xi->name); | ||
| 4519 | size_t blocksize = inode->i_sb->s_blocksize; | ||
| 4520 | int ret, allocation = 0; | ||
| 4521 | u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); | ||
| 4522 | |||
| 4523 | mlog_entry("Set xattr %s in xattr index block\n", xi->name); | ||
| 4524 | |||
| 4525 | try_again: | ||
| 4526 | xh = xs->header; | ||
| 4527 | count = le16_to_cpu(xh->xh_count); | ||
| 4528 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
| 4529 | header_size = sizeof(struct ocfs2_xattr_header) + | ||
| 4530 | count * sizeof(struct ocfs2_xattr_entry); | ||
| 4531 | max_free = OCFS2_XATTR_BUCKET_SIZE - | ||
| 4532 | le16_to_cpu(xh->xh_name_value_len) - header_size; | ||
| 4533 | |||
| 4534 | mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size " | ||
| 4535 | "of %u which exceed block size\n", | ||
| 4536 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
| 4537 | header_size); | ||
| 4538 | |||
| 4539 | if (xi->value && xi->value_len > OCFS2_XATTR_INLINE_SIZE) | ||
| 4540 | value_size = OCFS2_XATTR_ROOT_SIZE; | ||
| 4541 | else if (xi->value) | ||
| 4542 | value_size = OCFS2_XATTR_SIZE(xi->value_len); | ||
| 4543 | |||
| 4544 | if (xs->not_found) | ||
| 4545 | need = sizeof(struct ocfs2_xattr_entry) + | ||
| 4546 | OCFS2_XATTR_SIZE(name_len) + value_size; | ||
| 4547 | else { | ||
| 4548 | need = value_size + OCFS2_XATTR_SIZE(name_len); | ||
| 4549 | |||
| 4550 | /* | ||
| 4551 | * We only replace the old value if the new length is smaller | ||
| 4552 | * than the old one. Otherwise we will allocate new space in the | ||
| 4553 | * bucket to store it. | ||
| 4554 | */ | ||
| 4555 | xe = xs->here; | ||
| 4556 | if (ocfs2_xattr_is_local(xe)) | ||
| 4557 | old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size)); | ||
| 4558 | else | ||
| 4559 | old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE); | ||
| 4560 | |||
| 4561 | if (old >= value_size) | ||
| 4562 | need = 0; | ||
| 4563 | } | ||
| 4564 | |||
| 4565 | free = xh_free_start - header_size; | ||
| 4566 | /* | ||
| 4567 | * We need to make sure the new name/value pair | ||
| 4568 | * can exist in the same block. | ||
| 4569 | */ | ||
| 4570 | if (xh_free_start % blocksize < need) | ||
| 4571 | free -= xh_free_start % blocksize; | ||
| 4572 | |||
| 4573 | mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, " | ||
| 4574 | "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len =" | ||
| 4575 | " %u\n", xs->not_found, | ||
| 4576 | (unsigned long long)xs->bucket.bhs[0]->b_blocknr, | ||
| 4577 | free, need, max_free, le16_to_cpu(xh->xh_free_start), | ||
| 4578 | le16_to_cpu(xh->xh_name_value_len)); | ||
| 4579 | |||
| 4580 | if (free < need || count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
| 4581 | if (need <= max_free && | ||
| 4582 | count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) { | ||
| 4583 | /* | ||
| 4584 | * We can create the space by defragment. Since only the | ||
| 4585 | * name/value will be moved, the xe shouldn't be changed | ||
| 4586 | * in xs. | ||
| 4587 | */ | ||
| 4588 | ret = ocfs2_defrag_xattr_bucket(inode, &xs->bucket); | ||
| 4589 | if (ret) { | ||
| 4590 | mlog_errno(ret); | ||
| 4591 | goto out; | ||
| 4592 | } | ||
| 4593 | |||
| 4594 | xh_free_start = le16_to_cpu(xh->xh_free_start); | ||
| 4595 | free = xh_free_start - header_size; | ||
| 4596 | if (xh_free_start % blocksize < need) | ||
| 4597 | free -= xh_free_start % blocksize; | ||
| 4598 | |||
| 4599 | if (free >= need) | ||
| 4600 | goto xattr_set; | ||
| 4601 | |||
| 4602 | mlog(0, "Can't get enough space for xattr insert by " | ||
| 4603 | "defragment. Need %u bytes, but we have %d, so " | ||
| 4604 | "allocate new bucket for it.\n", need, free); | ||
| 4605 | } | ||
| 4606 | |||
| 4607 | /* | ||
| 4608 | * We have to add new buckets or clusters and one | ||
| 4609 | * allocation should leave us enough space for insert. | ||
| 4610 | */ | ||
| 4611 | BUG_ON(allocation); | ||
| 4612 | |||
| 4613 | /* | ||
| 4614 | * We do not allow for overlapping ranges between buckets. And | ||
| 4615 | * the maximum number of collisions we will allow for then is | ||
| 4616 | * one bucket's worth, so check it here whether we need to | ||
| 4617 | * add a new bucket for the insert. | ||
| 4618 | */ | ||
| 4619 | ret = ocfs2_check_xattr_bucket_collision(inode, &xs->bucket); | ||
| 4620 | if (ret) { | ||
| 4621 | mlog_errno(ret); | ||
| 4622 | goto out; | ||
| 4623 | } | ||
| 4624 | |||
| 4625 | ret = ocfs2_add_new_xattr_bucket(inode, | ||
| 4626 | xs->xattr_bh, | ||
| 4627 | xs->bucket.bhs[0]); | ||
| 4628 | if (ret) { | ||
| 4629 | mlog_errno(ret); | ||
| 4630 | goto out; | ||
| 4631 | } | ||
| 4632 | |||
| 4633 | for (i = 0; i < blk_per_bucket; i++) | ||
| 4634 | brelse(xs->bucket.bhs[i]); | ||
| 4635 | |||
| 4636 | memset(&xs->bucket, 0, sizeof(xs->bucket)); | ||
| 4637 | |||
| 4638 | ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh, | ||
| 4639 | xi->name_index, | ||
| 4640 | xi->name, xs); | ||
| 4641 | if (ret && ret != -ENODATA) | ||
| 4642 | goto out; | ||
| 4643 | xs->not_found = ret; | ||
| 4644 | allocation = 1; | ||
| 4645 | goto try_again; | ||
| 4646 | } | ||
| 4647 | |||
| 4648 | xattr_set: | ||
| 4649 | ret = ocfs2_xattr_set_in_bucket(inode, xi, xs); | ||
| 4650 | out: | ||
| 4651 | mlog_exit(ret); | ||
| 4652 | return ret; | ||
| 4653 | } | ||
| 4654 | |||
| 4655 | static int ocfs2_delete_xattr_in_bucket(struct inode *inode, | ||
| 4656 | struct ocfs2_xattr_bucket *bucket, | ||
| 4657 | void *para) | ||
| 4658 | { | ||
| 4659 | int ret = 0; | ||
| 4660 | struct ocfs2_xattr_header *xh = bucket->xh; | ||
| 4661 | u16 i; | ||
| 4662 | struct ocfs2_xattr_entry *xe; | ||
| 4663 | |||
| 4664 | for (i = 0; i < le16_to_cpu(xh->xh_count); i++) { | ||
| 4665 | xe = &xh->xh_entries[i]; | ||
| 4666 | if (ocfs2_xattr_is_local(xe)) | ||
| 4667 | continue; | ||
| 4668 | |||
| 4669 | ret = ocfs2_xattr_bucket_value_truncate(inode, | ||
| 4670 | bucket->bhs[0], | ||
| 4671 | i, 0); | ||
| 4672 | if (ret) { | ||
| 4673 | mlog_errno(ret); | ||
| 4674 | break; | ||
| 4675 | } | ||
| 4676 | } | ||
| 4677 | |||
| 4678 | return ret; | ||
| 4679 | } | ||
| 4680 | |||
| 4681 | static int ocfs2_delete_xattr_index_block(struct inode *inode, | ||
| 4682 | struct buffer_head *xb_bh) | ||
| 4683 | { | ||
| 4684 | struct ocfs2_xattr_block *xb = | ||
| 4685 | (struct ocfs2_xattr_block *)xb_bh->b_data; | ||
| 4686 | struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list; | ||
| 4687 | int ret = 0; | ||
| 4688 | u32 name_hash = UINT_MAX, e_cpos, num_clusters; | ||
| 4689 | u64 p_blkno; | ||
| 4690 | |||
| 4691 | if (le16_to_cpu(el->l_next_free_rec) == 0) | ||
| 4692 | return 0; | ||
| 4693 | |||
| 4694 | while (name_hash > 0) { | ||
| 4695 | ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, | ||
| 4696 | &e_cpos, &num_clusters, el); | ||
| 4697 | if (ret) { | ||
| 4698 | mlog_errno(ret); | ||
| 4699 | goto out; | ||
| 4700 | } | ||
| 4701 | |||
| 4702 | ret = ocfs2_iterate_xattr_buckets(inode, p_blkno, num_clusters, | ||
| 4703 | ocfs2_delete_xattr_in_bucket, | ||
| 4704 | NULL); | ||
| 4705 | if (ret) { | ||
| 4706 | mlog_errno(ret); | ||
| 4707 | goto out; | ||
| 4708 | } | ||
| 4709 | |||
| 4710 | ret = ocfs2_rm_xattr_cluster(inode, xb_bh, | ||
| 4711 | p_blkno, e_cpos, num_clusters); | ||
| 4712 | if (ret) { | ||
| 4713 | mlog_errno(ret); | ||
| 4714 | break; | ||
| 4715 | } | ||
| 4716 | |||
| 4717 | if (e_cpos == 0) | ||
| 4718 | break; | ||
| 4719 | |||
| 4720 | name_hash = e_cpos - 1; | ||
| 4721 | } | ||
| 4722 | |||
| 4723 | out: | ||
| 4724 | return ret; | ||
| 4725 | } | ||
| 4726 | |||
| 4727 | /* | ||
| 4728 | * 'trusted' attributes support | ||
| 4729 | */ | ||
| 4730 | |||
| 4731 | #define XATTR_TRUSTED_PREFIX "trusted." | ||
| 4732 | |||
| 4733 | static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list, | ||
| 4734 | size_t list_size, const char *name, | ||
| 4735 | size_t name_len) | ||
| 4736 | { | ||
| 4737 | const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX) - 1; | ||
| 4738 | const size_t total_len = prefix_len + name_len + 1; | ||
| 4739 | |||
| 4740 | if (list && total_len <= list_size) { | ||
| 4741 | memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len); | ||
| 4742 | memcpy(list + prefix_len, name, name_len); | ||
| 4743 | list[prefix_len + name_len] = '\0'; | ||
| 4744 | } | ||
| 4745 | return total_len; | ||
| 4746 | } | ||
| 4747 | |||
| 4748 | static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name, | ||
| 4749 | void *buffer, size_t size) | ||
| 4750 | { | ||
| 4751 | if (strcmp(name, "") == 0) | ||
| 4752 | return -EINVAL; | ||
| 4753 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name, | ||
| 4754 | buffer, size); | ||
| 4755 | } | ||
| 4756 | |||
| 4757 | static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name, | ||
| 4758 | const void *value, size_t size, int flags) | ||
| 4759 | { | ||
| 4760 | if (strcmp(name, "") == 0) | ||
| 4761 | return -EINVAL; | ||
| 4762 | |||
| 4763 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value, | ||
| 4764 | size, flags); | ||
| 4765 | } | ||
| 4766 | |||
| 4767 | struct xattr_handler ocfs2_xattr_trusted_handler = { | ||
| 4768 | .prefix = XATTR_TRUSTED_PREFIX, | ||
| 4769 | .list = ocfs2_xattr_trusted_list, | ||
| 4770 | .get = ocfs2_xattr_trusted_get, | ||
| 4771 | .set = ocfs2_xattr_trusted_set, | ||
| 4772 | }; | ||
| 4773 | |||
| 4774 | |||
| 4775 | /* | ||
| 4776 | * 'user' attributes support | ||
| 4777 | */ | ||
| 4778 | |||
| 4779 | #define XATTR_USER_PREFIX "user." | ||
| 4780 | |||
| 4781 | static size_t ocfs2_xattr_user_list(struct inode *inode, char *list, | ||
| 4782 | size_t list_size, const char *name, | ||
| 4783 | size_t name_len) | ||
| 4784 | { | ||
| 4785 | const size_t prefix_len = sizeof(XATTR_USER_PREFIX) - 1; | ||
| 4786 | const size_t total_len = prefix_len + name_len + 1; | ||
| 4787 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4788 | |||
| 4789 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
| 4790 | return 0; | ||
| 4791 | |||
| 4792 | if (list && total_len <= list_size) { | ||
| 4793 | memcpy(list, XATTR_USER_PREFIX, prefix_len); | ||
| 4794 | memcpy(list + prefix_len, name, name_len); | ||
| 4795 | list[prefix_len + name_len] = '\0'; | ||
| 4796 | } | ||
| 4797 | return total_len; | ||
| 4798 | } | ||
| 4799 | |||
| 4800 | static int ocfs2_xattr_user_get(struct inode *inode, const char *name, | ||
| 4801 | void *buffer, size_t size) | ||
| 4802 | { | ||
| 4803 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4804 | |||
| 4805 | if (strcmp(name, "") == 0) | ||
| 4806 | return -EINVAL; | ||
| 4807 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
| 4808 | return -EOPNOTSUPP; | ||
| 4809 | return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name, | ||
| 4810 | buffer, size); | ||
| 4811 | } | ||
| 4812 | |||
| 4813 | static int ocfs2_xattr_user_set(struct inode *inode, const char *name, | ||
| 4814 | const void *value, size_t size, int flags) | ||
| 4815 | { | ||
| 4816 | struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); | ||
| 4817 | |||
| 4818 | if (strcmp(name, "") == 0) | ||
| 4819 | return -EINVAL; | ||
| 4820 | if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR) | ||
| 4821 | return -EOPNOTSUPP; | ||
| 4822 | |||
| 4823 | return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value, | ||
| 4824 | size, flags); | ||
| 4825 | } | ||
| 4826 | |||
| 4827 | struct xattr_handler ocfs2_xattr_user_handler = { | ||
| 4828 | .prefix = XATTR_USER_PREFIX, | ||
| 4829 | .list = ocfs2_xattr_user_list, | ||
| 4830 | .get = ocfs2_xattr_user_get, | ||
| 4831 | .set = ocfs2_xattr_user_set, | ||
| 4832 | }; | ||
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h new file mode 100644 index 000000000000..c25c7c62a059 --- /dev/null +++ b/fs/ocfs2/xattr.h | |||
| @@ -0,0 +1,68 @@ | |||
| 1 | /* -*- mode: c; c-basic-offset: 8; -*- | ||
| 2 | * vim: noexpandtab sw=8 ts=8 sts=0: | ||
| 3 | * | ||
| 4 | * xattr.h | ||
| 5 | * | ||
| 6 | * Function prototypes | ||
| 7 | * | ||
| 8 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
| 9 | * | ||
| 10 | * This program is free software; you can redistribute it and/or | ||
| 11 | * modify it under the terms of the GNU General Public | ||
| 12 | * License as published by the Free Software Foundation; either | ||
| 13 | * version 2 of the License, or (at your option) any later version. | ||
| 14 | * | ||
| 15 | * This program is distributed in the hope that it will be useful, | ||
| 16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
| 18 | * General Public License for more details. | ||
| 19 | * | ||
| 20 | * You should have received a copy of the GNU General Public | ||
| 21 | * License along with this program; if not, write to the | ||
| 22 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
| 23 | * Boston, MA 021110-1307, USA. | ||
| 24 | */ | ||
| 25 | |||
| 26 | #ifndef OCFS2_XATTR_H | ||
| 27 | #define OCFS2_XATTR_H | ||
| 28 | |||
| 29 | #include <linux/init.h> | ||
| 30 | #include <linux/xattr.h> | ||
| 31 | |||
| 32 | enum ocfs2_xattr_type { | ||
| 33 | OCFS2_XATTR_INDEX_USER = 1, | ||
| 34 | OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS, | ||
| 35 | OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT, | ||
| 36 | OCFS2_XATTR_INDEX_TRUSTED, | ||
| 37 | OCFS2_XATTR_INDEX_SECURITY, | ||
| 38 | OCFS2_XATTR_MAX | ||
| 39 | }; | ||
| 40 | |||
| 41 | extern struct xattr_handler ocfs2_xattr_user_handler; | ||
| 42 | extern struct xattr_handler ocfs2_xattr_trusted_handler; | ||
| 43 | |||
| 44 | extern ssize_t ocfs2_listxattr(struct dentry *, char *, size_t); | ||
| 45 | extern int ocfs2_xattr_get(struct inode *, int, const char *, void *, size_t); | ||
| 46 | extern int ocfs2_xattr_set(struct inode *, int, const char *, const void *, | ||
| 47 | size_t, int); | ||
| 48 | extern int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh); | ||
| 49 | extern struct xattr_handler *ocfs2_xattr_handlers[]; | ||
| 50 | |||
| 51 | static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb) | ||
| 52 | { | ||
| 53 | return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE; | ||
| 54 | } | ||
| 55 | |||
| 56 | static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb) | ||
| 57 | { | ||
| 58 | return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits); | ||
| 59 | } | ||
| 60 | |||
| 61 | static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb) | ||
| 62 | { | ||
| 63 | u16 len = sb->s_blocksize - | ||
| 64 | offsetof(struct ocfs2_xattr_header, xh_entries); | ||
| 65 | |||
| 66 | return len / sizeof(struct ocfs2_xattr_entry); | ||
| 67 | } | ||
| 68 | #endif /* OCFS2_XATTR_H */ | ||
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index d29047b1b9b0..cbf047a847c5 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c | |||
| @@ -346,7 +346,7 @@ enum { | |||
| 346 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask | 346 | Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask |
| 347 | }; | 347 | }; |
| 348 | 348 | ||
| 349 | static match_table_t tokens = { | 349 | static const match_table_t tokens = { |
| 350 | {Opt_uid, "uid=%u"}, | 350 | {Opt_uid, "uid=%u"}, |
| 351 | {Opt_gid, "gid=%u"}, | 351 | {Opt_gid, "gid=%u"}, |
| 352 | {Opt_umask, "umask=%o"}, | 352 | {Opt_umask, "umask=%o"}, |
| @@ -1141,8 +1141,7 @@ EXPORT_SYMBOL(sys_close); | |||
| 1141 | asmlinkage long sys_vhangup(void) | 1141 | asmlinkage long sys_vhangup(void) |
| 1142 | { | 1142 | { |
| 1143 | if (capable(CAP_SYS_TTY_CONFIG)) { | 1143 | if (capable(CAP_SYS_TTY_CONFIG)) { |
| 1144 | /* XXX: this needs locking */ | 1144 | tty_vhangup_self(); |
| 1145 | tty_vhangup(current->signal->tty); | ||
| 1146 | return 0; | 1145 | return 0; |
| 1147 | } | 1146 | } |
| 1148 | return -EPERM; | 1147 | return -EPERM; |
diff --git a/fs/partitions/acorn.c b/fs/partitions/acorn.c index 3d3e16631472..a97b477ac0fc 100644 --- a/fs/partitions/acorn.c +++ b/fs/partitions/acorn.c | |||
| @@ -275,16 +275,6 @@ adfspart_check_ADFS(struct parsed_partitions *state, struct block_device *bdev) | |||
| 275 | id = data[0x1fc] & 15; | 275 | id = data[0x1fc] & 15; |
| 276 | put_dev_sector(sect); | 276 | put_dev_sector(sect); |
| 277 | 277 | ||
| 278 | #ifdef CONFIG_BLK_DEV_MFM | ||
| 279 | if (MAJOR(bdev->bd_dev) == MFM_ACORN_MAJOR) { | ||
| 280 | extern void xd_set_geometry(struct block_device *, | ||
| 281 | unsigned char, unsigned char, unsigned int); | ||
| 282 | xd_set_geometry(bdev, dr->secspertrack, heads, 1); | ||
| 283 | invalidate_bh_lrus(); | ||
| 284 | truncate_inode_pages(bdev->bd_inode->i_mapping, 0); | ||
| 285 | } | ||
| 286 | #endif | ||
| 287 | |||
| 288 | /* | 278 | /* |
| 289 | * Work out start of non-adfs partition. | 279 | * Work out start of non-adfs partition. |
| 290 | */ | 280 | */ |
diff --git a/fs/partitions/check.c b/fs/partitions/check.c index ecc3330972e5..cfb0c80690aa 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c | |||
| @@ -120,22 +120,21 @@ static int (*check_part[])(struct parsed_partitions *, struct block_device *) = | |||
| 120 | * a pointer to that same buffer (for convenience). | 120 | * a pointer to that same buffer (for convenience). |
| 121 | */ | 121 | */ |
| 122 | 122 | ||
| 123 | char *disk_name(struct gendisk *hd, int part, char *buf) | 123 | char *disk_name(struct gendisk *hd, int partno, char *buf) |
| 124 | { | 124 | { |
| 125 | if (!part) | 125 | if (!partno) |
| 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); | 126 | snprintf(buf, BDEVNAME_SIZE, "%s", hd->disk_name); |
| 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) | 127 | else if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) |
| 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, part); | 128 | snprintf(buf, BDEVNAME_SIZE, "%sp%d", hd->disk_name, partno); |
| 129 | else | 129 | else |
| 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, part); | 130 | snprintf(buf, BDEVNAME_SIZE, "%s%d", hd->disk_name, partno); |
| 131 | 131 | ||
| 132 | return buf; | 132 | return buf; |
| 133 | } | 133 | } |
| 134 | 134 | ||
| 135 | const char *bdevname(struct block_device *bdev, char *buf) | 135 | const char *bdevname(struct block_device *bdev, char *buf) |
| 136 | { | 136 | { |
| 137 | int part = MINOR(bdev->bd_dev) - bdev->bd_disk->first_minor; | 137 | return disk_name(bdev->bd_disk, bdev->bd_part->partno, buf); |
| 138 | return disk_name(bdev->bd_disk, part, buf); | ||
| 139 | } | 138 | } |
| 140 | 139 | ||
| 141 | EXPORT_SYMBOL(bdevname); | 140 | EXPORT_SYMBOL(bdevname); |
| @@ -169,7 +168,7 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
| 169 | if (isdigit(state->name[strlen(state->name)-1])) | 168 | if (isdigit(state->name[strlen(state->name)-1])) |
| 170 | sprintf(state->name, "p"); | 169 | sprintf(state->name, "p"); |
| 171 | 170 | ||
| 172 | state->limit = hd->minors; | 171 | state->limit = disk_max_parts(hd); |
| 173 | i = res = err = 0; | 172 | i = res = err = 0; |
| 174 | while (!res && check_part[i]) { | 173 | while (!res && check_part[i]) { |
| 175 | memset(&state->parts, 0, sizeof(state->parts)); | 174 | memset(&state->parts, 0, sizeof(state->parts)); |
| @@ -196,6 +195,14 @@ check_partition(struct gendisk *hd, struct block_device *bdev) | |||
| 196 | return ERR_PTR(res); | 195 | return ERR_PTR(res); |
| 197 | } | 196 | } |
| 198 | 197 | ||
| 198 | static ssize_t part_partition_show(struct device *dev, | ||
| 199 | struct device_attribute *attr, char *buf) | ||
| 200 | { | ||
| 201 | struct hd_struct *p = dev_to_part(dev); | ||
| 202 | |||
| 203 | return sprintf(buf, "%d\n", p->partno); | ||
| 204 | } | ||
| 205 | |||
| 199 | static ssize_t part_start_show(struct device *dev, | 206 | static ssize_t part_start_show(struct device *dev, |
| 200 | struct device_attribute *attr, char *buf) | 207 | struct device_attribute *attr, char *buf) |
| 201 | { | 208 | { |
| @@ -204,21 +211,22 @@ static ssize_t part_start_show(struct device *dev, | |||
| 204 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); | 211 | return sprintf(buf, "%llu\n",(unsigned long long)p->start_sect); |
| 205 | } | 212 | } |
| 206 | 213 | ||
| 207 | static ssize_t part_size_show(struct device *dev, | 214 | ssize_t part_size_show(struct device *dev, |
| 208 | struct device_attribute *attr, char *buf) | 215 | struct device_attribute *attr, char *buf) |
| 209 | { | 216 | { |
| 210 | struct hd_struct *p = dev_to_part(dev); | 217 | struct hd_struct *p = dev_to_part(dev); |
| 211 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); | 218 | return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); |
| 212 | } | 219 | } |
| 213 | 220 | ||
| 214 | static ssize_t part_stat_show(struct device *dev, | 221 | ssize_t part_stat_show(struct device *dev, |
| 215 | struct device_attribute *attr, char *buf) | 222 | struct device_attribute *attr, char *buf) |
| 216 | { | 223 | { |
| 217 | struct hd_struct *p = dev_to_part(dev); | 224 | struct hd_struct *p = dev_to_part(dev); |
| 225 | int cpu; | ||
| 218 | 226 | ||
| 219 | preempt_disable(); | 227 | cpu = part_stat_lock(); |
| 220 | part_round_stats(p); | 228 | part_round_stats(cpu, p); |
| 221 | preempt_enable(); | 229 | part_stat_unlock(); |
| 222 | return sprintf(buf, | 230 | return sprintf(buf, |
| 223 | "%8lu %8lu %8llu %8u " | 231 | "%8lu %8lu %8llu %8u " |
| 224 | "%8lu %8lu %8llu %8u " | 232 | "%8lu %8lu %8llu %8u " |
| @@ -238,17 +246,17 @@ static ssize_t part_stat_show(struct device *dev, | |||
| 238 | } | 246 | } |
| 239 | 247 | ||
| 240 | #ifdef CONFIG_FAIL_MAKE_REQUEST | 248 | #ifdef CONFIG_FAIL_MAKE_REQUEST |
| 241 | static ssize_t part_fail_show(struct device *dev, | 249 | ssize_t part_fail_show(struct device *dev, |
| 242 | struct device_attribute *attr, char *buf) | 250 | struct device_attribute *attr, char *buf) |
| 243 | { | 251 | { |
| 244 | struct hd_struct *p = dev_to_part(dev); | 252 | struct hd_struct *p = dev_to_part(dev); |
| 245 | 253 | ||
| 246 | return sprintf(buf, "%d\n", p->make_it_fail); | 254 | return sprintf(buf, "%d\n", p->make_it_fail); |
| 247 | } | 255 | } |
| 248 | 256 | ||
| 249 | static ssize_t part_fail_store(struct device *dev, | 257 | ssize_t part_fail_store(struct device *dev, |
| 250 | struct device_attribute *attr, | 258 | struct device_attribute *attr, |
| 251 | const char *buf, size_t count) | 259 | const char *buf, size_t count) |
| 252 | { | 260 | { |
| 253 | struct hd_struct *p = dev_to_part(dev); | 261 | struct hd_struct *p = dev_to_part(dev); |
| 254 | int i; | 262 | int i; |
| @@ -260,6 +268,7 @@ static ssize_t part_fail_store(struct device *dev, | |||
| 260 | } | 268 | } |
| 261 | #endif | 269 | #endif |
| 262 | 270 | ||
| 271 | static DEVICE_ATTR(partition, S_IRUGO, part_partition_show, NULL); | ||
| 263 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); | 272 | static DEVICE_ATTR(start, S_IRUGO, part_start_show, NULL); |
| 264 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); | 273 | static DEVICE_ATTR(size, S_IRUGO, part_size_show, NULL); |
| 265 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); | 274 | static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL); |
| @@ -269,6 +278,7 @@ static struct device_attribute dev_attr_fail = | |||
| 269 | #endif | 278 | #endif |
| 270 | 279 | ||
| 271 | static struct attribute *part_attrs[] = { | 280 | static struct attribute *part_attrs[] = { |
| 281 | &dev_attr_partition.attr, | ||
| 272 | &dev_attr_start.attr, | 282 | &dev_attr_start.attr, |
| 273 | &dev_attr_size.attr, | 283 | &dev_attr_size.attr, |
| 274 | &dev_attr_stat.attr, | 284 | &dev_attr_stat.attr, |
| @@ -300,40 +310,34 @@ struct device_type part_type = { | |||
| 300 | .release = part_release, | 310 | .release = part_release, |
| 301 | }; | 311 | }; |
| 302 | 312 | ||
| 303 | static inline void partition_sysfs_add_subdir(struct hd_struct *p) | 313 | static void delete_partition_rcu_cb(struct rcu_head *head) |
| 304 | { | 314 | { |
| 305 | struct kobject *k; | 315 | struct hd_struct *part = container_of(head, struct hd_struct, rcu_head); |
| 306 | 316 | ||
| 307 | k = kobject_get(&p->dev.kobj); | 317 | part->start_sect = 0; |
| 308 | p->holder_dir = kobject_create_and_add("holders", k); | 318 | part->nr_sects = 0; |
| 309 | kobject_put(k); | 319 | part_stat_set_all(part, 0); |
| 320 | put_device(part_to_dev(part)); | ||
| 310 | } | 321 | } |
| 311 | 322 | ||
| 312 | static inline void disk_sysfs_add_subdirs(struct gendisk *disk) | 323 | void delete_partition(struct gendisk *disk, int partno) |
| 313 | { | 324 | { |
| 314 | struct kobject *k; | 325 | struct disk_part_tbl *ptbl = disk->part_tbl; |
| 326 | struct hd_struct *part; | ||
| 315 | 327 | ||
| 316 | k = kobject_get(&disk->dev.kobj); | 328 | if (partno >= ptbl->len) |
| 317 | disk->holder_dir = kobject_create_and_add("holders", k); | ||
| 318 | disk->slave_dir = kobject_create_and_add("slaves", k); | ||
| 319 | kobject_put(k); | ||
| 320 | } | ||
| 321 | |||
| 322 | void delete_partition(struct gendisk *disk, int part) | ||
| 323 | { | ||
| 324 | struct hd_struct *p = disk->part[part-1]; | ||
| 325 | |||
| 326 | if (!p) | ||
| 327 | return; | 329 | return; |
| 328 | if (!p->nr_sects) | 330 | |
| 331 | part = ptbl->part[partno]; | ||
| 332 | if (!part) | ||
| 329 | return; | 333 | return; |
| 330 | disk->part[part-1] = NULL; | 334 | |
| 331 | p->start_sect = 0; | 335 | blk_free_devt(part_devt(part)); |
| 332 | p->nr_sects = 0; | 336 | rcu_assign_pointer(ptbl->part[partno], NULL); |
| 333 | part_stat_set_all(p, 0); | 337 | kobject_put(part->holder_dir); |
| 334 | kobject_put(p->holder_dir); | 338 | device_del(part_to_dev(part)); |
| 335 | device_del(&p->dev); | 339 | |
| 336 | put_device(&p->dev); | 340 | call_rcu(&part->rcu_head, delete_partition_rcu_cb); |
| 337 | } | 341 | } |
| 338 | 342 | ||
| 339 | static ssize_t whole_disk_show(struct device *dev, | 343 | static ssize_t whole_disk_show(struct device *dev, |
| @@ -344,102 +348,132 @@ static ssize_t whole_disk_show(struct device *dev, | |||
| 344 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, | 348 | static DEVICE_ATTR(whole_disk, S_IRUSR | S_IRGRP | S_IROTH, |
| 345 | whole_disk_show, NULL); | 349 | whole_disk_show, NULL); |
| 346 | 350 | ||
| 347 | int add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, int flags) | 351 | int add_partition(struct gendisk *disk, int partno, |
| 352 | sector_t start, sector_t len, int flags) | ||
| 348 | { | 353 | { |
| 349 | struct hd_struct *p; | 354 | struct hd_struct *p; |
| 355 | dev_t devt = MKDEV(0, 0); | ||
| 356 | struct device *ddev = disk_to_dev(disk); | ||
| 357 | struct device *pdev; | ||
| 358 | struct disk_part_tbl *ptbl; | ||
| 359 | const char *dname; | ||
| 350 | int err; | 360 | int err; |
| 351 | 361 | ||
| 362 | err = disk_expand_part_tbl(disk, partno); | ||
| 363 | if (err) | ||
| 364 | return err; | ||
| 365 | ptbl = disk->part_tbl; | ||
| 366 | |||
| 367 | if (ptbl->part[partno]) | ||
| 368 | return -EBUSY; | ||
| 369 | |||
| 352 | p = kzalloc(sizeof(*p), GFP_KERNEL); | 370 | p = kzalloc(sizeof(*p), GFP_KERNEL); |
| 353 | if (!p) | 371 | if (!p) |
| 354 | return -ENOMEM; | 372 | return -ENOMEM; |
| 355 | 373 | ||
| 356 | if (!init_part_stats(p)) { | 374 | if (!init_part_stats(p)) { |
| 357 | err = -ENOMEM; | 375 | err = -ENOMEM; |
| 358 | goto out0; | 376 | goto out_free; |
| 359 | } | 377 | } |
| 378 | pdev = part_to_dev(p); | ||
| 379 | |||
| 360 | p->start_sect = start; | 380 | p->start_sect = start; |
| 361 | p->nr_sects = len; | 381 | p->nr_sects = len; |
| 362 | p->partno = part; | 382 | p->partno = partno; |
| 363 | p->policy = disk->policy; | 383 | p->policy = get_disk_ro(disk); |
| 364 | 384 | ||
| 365 | if (isdigit(disk->dev.bus_id[strlen(disk->dev.bus_id)-1])) | 385 | dname = dev_name(ddev); |
| 366 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 386 | if (isdigit(dname[strlen(dname) - 1])) |
| 367 | "%sp%d", disk->dev.bus_id, part); | 387 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%sp%d", dname, partno); |
| 368 | else | 388 | else |
| 369 | snprintf(p->dev.bus_id, BUS_ID_SIZE, | 389 | snprintf(pdev->bus_id, BUS_ID_SIZE, "%s%d", dname, partno); |
| 370 | "%s%d", disk->dev.bus_id, part); | 390 | |
| 391 | device_initialize(pdev); | ||
| 392 | pdev->class = &block_class; | ||
| 393 | pdev->type = &part_type; | ||
| 394 | pdev->parent = ddev; | ||
| 371 | 395 | ||
| 372 | device_initialize(&p->dev); | 396 | err = blk_alloc_devt(p, &devt); |
| 373 | p->dev.devt = MKDEV(disk->major, disk->first_minor + part); | 397 | if (err) |
| 374 | p->dev.class = &block_class; | 398 | goto out_free; |
| 375 | p->dev.type = &part_type; | 399 | pdev->devt = devt; |
| 376 | p->dev.parent = &disk->dev; | ||
| 377 | disk->part[part-1] = p; | ||
| 378 | 400 | ||
| 379 | /* delay uevent until 'holders' subdir is created */ | 401 | /* delay uevent until 'holders' subdir is created */ |
| 380 | p->dev.uevent_suppress = 1; | 402 | pdev->uevent_suppress = 1; |
| 381 | err = device_add(&p->dev); | 403 | err = device_add(pdev); |
| 382 | if (err) | 404 | if (err) |
| 383 | goto out1; | 405 | goto out_put; |
| 384 | partition_sysfs_add_subdir(p); | 406 | |
| 385 | p->dev.uevent_suppress = 0; | 407 | err = -ENOMEM; |
| 408 | p->holder_dir = kobject_create_and_add("holders", &pdev->kobj); | ||
| 409 | if (!p->holder_dir) | ||
| 410 | goto out_del; | ||
| 411 | |||
| 412 | pdev->uevent_suppress = 0; | ||
| 386 | if (flags & ADDPART_FLAG_WHOLEDISK) { | 413 | if (flags & ADDPART_FLAG_WHOLEDISK) { |
| 387 | err = device_create_file(&p->dev, &dev_attr_whole_disk); | 414 | err = device_create_file(pdev, &dev_attr_whole_disk); |
| 388 | if (err) | 415 | if (err) |
| 389 | goto out2; | 416 | goto out_del; |
| 390 | } | 417 | } |
| 391 | 418 | ||
| 419 | /* everything is up and running, commence */ | ||
| 420 | INIT_RCU_HEAD(&p->rcu_head); | ||
| 421 | rcu_assign_pointer(ptbl->part[partno], p); | ||
| 422 | |||
| 392 | /* suppress uevent if the disk supresses it */ | 423 | /* suppress uevent if the disk supresses it */ |
| 393 | if (!disk->dev.uevent_suppress) | 424 | if (!ddev->uevent_suppress) |
| 394 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | 425 | kobject_uevent(&pdev->kobj, KOBJ_ADD); |
| 395 | 426 | ||
| 396 | return 0; | 427 | return 0; |
| 397 | 428 | ||
| 398 | out2: | 429 | out_free: |
| 399 | device_del(&p->dev); | ||
| 400 | out1: | ||
| 401 | put_device(&p->dev); | ||
| 402 | free_part_stats(p); | ||
| 403 | out0: | ||
| 404 | kfree(p); | 430 | kfree(p); |
| 405 | return err; | 431 | return err; |
| 432 | out_del: | ||
| 433 | kobject_put(p->holder_dir); | ||
| 434 | device_del(pdev); | ||
| 435 | out_put: | ||
| 436 | put_device(pdev); | ||
| 437 | blk_free_devt(devt); | ||
| 438 | return err; | ||
| 406 | } | 439 | } |
| 407 | 440 | ||
| 408 | /* Not exported, helper to add_disk(). */ | 441 | /* Not exported, helper to add_disk(). */ |
| 409 | void register_disk(struct gendisk *disk) | 442 | void register_disk(struct gendisk *disk) |
| 410 | { | 443 | { |
| 444 | struct device *ddev = disk_to_dev(disk); | ||
| 411 | struct block_device *bdev; | 445 | struct block_device *bdev; |
| 446 | struct disk_part_iter piter; | ||
| 447 | struct hd_struct *part; | ||
| 412 | char *s; | 448 | char *s; |
| 413 | int i; | ||
| 414 | struct hd_struct *p; | ||
| 415 | int err; | 449 | int err; |
| 416 | 450 | ||
| 417 | disk->dev.parent = disk->driverfs_dev; | 451 | ddev->parent = disk->driverfs_dev; |
| 418 | disk->dev.devt = MKDEV(disk->major, disk->first_minor); | ||
| 419 | 452 | ||
| 420 | strlcpy(disk->dev.bus_id, disk->disk_name, BUS_ID_SIZE); | 453 | strlcpy(ddev->bus_id, disk->disk_name, BUS_ID_SIZE); |
| 421 | /* ewww... some of these buggers have / in the name... */ | 454 | /* ewww... some of these buggers have / in the name... */ |
| 422 | s = strchr(disk->dev.bus_id, '/'); | 455 | s = strchr(ddev->bus_id, '/'); |
| 423 | if (s) | 456 | if (s) |
| 424 | *s = '!'; | 457 | *s = '!'; |
| 425 | 458 | ||
| 426 | /* delay uevents, until we scanned partition table */ | 459 | /* delay uevents, until we scanned partition table */ |
| 427 | disk->dev.uevent_suppress = 1; | 460 | ddev->uevent_suppress = 1; |
| 428 | 461 | ||
| 429 | if (device_add(&disk->dev)) | 462 | if (device_add(ddev)) |
| 430 | return; | 463 | return; |
| 431 | #ifndef CONFIG_SYSFS_DEPRECATED | 464 | #ifndef CONFIG_SYSFS_DEPRECATED |
| 432 | err = sysfs_create_link(block_depr, &disk->dev.kobj, | 465 | err = sysfs_create_link(block_depr, &ddev->kobj, |
| 433 | kobject_name(&disk->dev.kobj)); | 466 | kobject_name(&ddev->kobj)); |
| 434 | if (err) { | 467 | if (err) { |
| 435 | device_del(&disk->dev); | 468 | device_del(ddev); |
| 436 | return; | 469 | return; |
| 437 | } | 470 | } |
| 438 | #endif | 471 | #endif |
| 439 | disk_sysfs_add_subdirs(disk); | 472 | disk->part0.holder_dir = kobject_create_and_add("holders", &ddev->kobj); |
| 473 | disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj); | ||
| 440 | 474 | ||
| 441 | /* No minors to use for partitions */ | 475 | /* No minors to use for partitions */ |
| 442 | if (disk->minors == 1) | 476 | if (!disk_partitionable(disk)) |
| 443 | goto exit; | 477 | goto exit; |
| 444 | 478 | ||
| 445 | /* No such device (e.g., media were just removed) */ | 479 | /* No such device (e.g., media were just removed) */ |
| @@ -458,50 +492,79 @@ void register_disk(struct gendisk *disk) | |||
| 458 | 492 | ||
| 459 | exit: | 493 | exit: |
| 460 | /* announce disk after possible partitions are created */ | 494 | /* announce disk after possible partitions are created */ |
| 461 | disk->dev.uevent_suppress = 0; | 495 | ddev->uevent_suppress = 0; |
| 462 | kobject_uevent(&disk->dev.kobj, KOBJ_ADD); | 496 | kobject_uevent(&ddev->kobj, KOBJ_ADD); |
| 463 | 497 | ||
| 464 | /* announce possible partitions */ | 498 | /* announce possible partitions */ |
| 465 | for (i = 1; i < disk->minors; i++) { | 499 | disk_part_iter_init(&piter, disk, 0); |
| 466 | p = disk->part[i-1]; | 500 | while ((part = disk_part_iter_next(&piter))) |
| 467 | if (!p || !p->nr_sects) | 501 | kobject_uevent(&part_to_dev(part)->kobj, KOBJ_ADD); |
| 468 | continue; | 502 | disk_part_iter_exit(&piter); |
| 469 | kobject_uevent(&p->dev.kobj, KOBJ_ADD); | ||
| 470 | } | ||
| 471 | } | 503 | } |
| 472 | 504 | ||
| 473 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) | 505 | int rescan_partitions(struct gendisk *disk, struct block_device *bdev) |
| 474 | { | 506 | { |
| 507 | struct disk_part_iter piter; | ||
| 508 | struct hd_struct *part; | ||
| 475 | struct parsed_partitions *state; | 509 | struct parsed_partitions *state; |
| 476 | int p, res; | 510 | int p, highest, res; |
| 477 | 511 | ||
| 478 | if (bdev->bd_part_count) | 512 | if (bdev->bd_part_count) |
| 479 | return -EBUSY; | 513 | return -EBUSY; |
| 480 | res = invalidate_partition(disk, 0); | 514 | res = invalidate_partition(disk, 0); |
| 481 | if (res) | 515 | if (res) |
| 482 | return res; | 516 | return res; |
| 483 | bdev->bd_invalidated = 0; | 517 | |
| 484 | for (p = 1; p < disk->minors; p++) | 518 | disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY); |
| 485 | delete_partition(disk, p); | 519 | while ((part = disk_part_iter_next(&piter))) |
| 520 | delete_partition(disk, part->partno); | ||
| 521 | disk_part_iter_exit(&piter); | ||
| 522 | |||
| 486 | if (disk->fops->revalidate_disk) | 523 | if (disk->fops->revalidate_disk) |
| 487 | disk->fops->revalidate_disk(disk); | 524 | disk->fops->revalidate_disk(disk); |
| 525 | check_disk_size_change(disk, bdev); | ||
| 526 | bdev->bd_invalidated = 0; | ||
| 488 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) | 527 | if (!get_capacity(disk) || !(state = check_partition(disk, bdev))) |
| 489 | return 0; | 528 | return 0; |
| 490 | if (IS_ERR(state)) /* I/O error reading the partition table */ | 529 | if (IS_ERR(state)) /* I/O error reading the partition table */ |
| 491 | return -EIO; | 530 | return -EIO; |
| 492 | 531 | ||
| 493 | /* tell userspace that the media / partition table may have changed */ | 532 | /* tell userspace that the media / partition table may have changed */ |
| 494 | kobject_uevent(&disk->dev.kobj, KOBJ_CHANGE); | 533 | kobject_uevent(&disk_to_dev(disk)->kobj, KOBJ_CHANGE); |
| 534 | |||
| 535 | /* Detect the highest partition number and preallocate | ||
| 536 | * disk->part_tbl. This is an optimization and not strictly | ||
| 537 | * necessary. | ||
| 538 | */ | ||
| 539 | for (p = 1, highest = 0; p < state->limit; p++) | ||
| 540 | if (state->parts[p].size) | ||
| 541 | highest = p; | ||
| 495 | 542 | ||
| 543 | disk_expand_part_tbl(disk, highest); | ||
| 544 | |||
| 545 | /* add partitions */ | ||
| 496 | for (p = 1; p < state->limit; p++) { | 546 | for (p = 1; p < state->limit; p++) { |
| 497 | sector_t size = state->parts[p].size; | 547 | sector_t size = state->parts[p].size; |
| 498 | sector_t from = state->parts[p].from; | 548 | sector_t from = state->parts[p].from; |
| 499 | if (!size) | 549 | if (!size) |
| 500 | continue; | 550 | continue; |
| 551 | if (from >= get_capacity(disk)) { | ||
| 552 | printk(KERN_WARNING | ||
| 553 | "%s: p%d ignored, start %llu is behind the end of the disk\n", | ||
| 554 | disk->disk_name, p, (unsigned long long) from); | ||
| 555 | continue; | ||
| 556 | } | ||
| 501 | if (from + size > get_capacity(disk)) { | 557 | if (from + size > get_capacity(disk)) { |
| 558 | /* | ||
| 559 | * we can not ignore partitions of broken tables | ||
| 560 | * created by for example camera firmware, but we | ||
| 561 | * limit them to the end of the disk to avoid | ||
| 562 | * creating invalid block devices | ||
| 563 | */ | ||
| 502 | printk(KERN_WARNING | 564 | printk(KERN_WARNING |
| 503 | "%s: p%d exceeds device capacity\n", | 565 | "%s: p%d size %llu limited to end of disk\n", |
| 504 | disk->disk_name, p); | 566 | disk->disk_name, p, (unsigned long long) size); |
| 567 | size = get_capacity(disk) - from; | ||
| 505 | } | 568 | } |
| 506 | res = add_partition(disk, p, from, size, state->parts[p].flags); | 569 | res = add_partition(disk, p, from, size, state->parts[p].flags); |
| 507 | if (res) { | 570 | if (res) { |
| @@ -541,25 +604,31 @@ EXPORT_SYMBOL(read_dev_sector); | |||
| 541 | 604 | ||
| 542 | void del_gendisk(struct gendisk *disk) | 605 | void del_gendisk(struct gendisk *disk) |
| 543 | { | 606 | { |
| 544 | int p; | 607 | struct disk_part_iter piter; |
| 608 | struct hd_struct *part; | ||
| 545 | 609 | ||
| 546 | /* invalidate stuff */ | 610 | /* invalidate stuff */ |
| 547 | for (p = disk->minors - 1; p > 0; p--) { | 611 | disk_part_iter_init(&piter, disk, |
| 548 | invalidate_partition(disk, p); | 612 | DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE); |
| 549 | delete_partition(disk, p); | 613 | while ((part = disk_part_iter_next(&piter))) { |
| 614 | invalidate_partition(disk, part->partno); | ||
| 615 | delete_partition(disk, part->partno); | ||
| 550 | } | 616 | } |
| 617 | disk_part_iter_exit(&piter); | ||
| 618 | |||
| 551 | invalidate_partition(disk, 0); | 619 | invalidate_partition(disk, 0); |
| 552 | disk->capacity = 0; | 620 | blk_free_devt(disk_to_dev(disk)->devt); |
| 621 | set_capacity(disk, 0); | ||
| 553 | disk->flags &= ~GENHD_FL_UP; | 622 | disk->flags &= ~GENHD_FL_UP; |
| 554 | unlink_gendisk(disk); | 623 | unlink_gendisk(disk); |
| 555 | disk_stat_set_all(disk, 0); | 624 | part_stat_set_all(&disk->part0, 0); |
| 556 | disk->stamp = 0; | 625 | disk->part0.stamp = 0; |
| 557 | 626 | ||
| 558 | kobject_put(disk->holder_dir); | 627 | kobject_put(disk->part0.holder_dir); |
| 559 | kobject_put(disk->slave_dir); | 628 | kobject_put(disk->slave_dir); |
| 560 | disk->driverfs_dev = NULL; | 629 | disk->driverfs_dev = NULL; |
| 561 | #ifndef CONFIG_SYSFS_DEPRECATED | 630 | #ifndef CONFIG_SYSFS_DEPRECATED |
| 562 | sysfs_remove_link(block_depr, disk->dev.bus_id); | 631 | sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); |
| 563 | #endif | 632 | #endif |
| 564 | device_del(&disk->dev); | 633 | device_del(disk_to_dev(disk)); |
| 565 | } | 634 | } |
diff --git a/fs/partitions/check.h b/fs/partitions/check.h index 17ae8ecd9e8b..98dbe1a84528 100644 --- a/fs/partitions/check.h +++ b/fs/partitions/check.h | |||
| @@ -5,15 +5,13 @@ | |||
| 5 | * add_gd_partition adds a partitions details to the devices partition | 5 | * add_gd_partition adds a partitions details to the devices partition |
| 6 | * description. | 6 | * description. |
| 7 | */ | 7 | */ |
| 8 | enum { MAX_PART = 256 }; | ||
| 9 | |||
| 10 | struct parsed_partitions { | 8 | struct parsed_partitions { |
| 11 | char name[BDEVNAME_SIZE]; | 9 | char name[BDEVNAME_SIZE]; |
| 12 | struct { | 10 | struct { |
| 13 | sector_t from; | 11 | sector_t from; |
| 14 | sector_t size; | 12 | sector_t size; |
| 15 | int flags; | 13 | int flags; |
| 16 | } parts[MAX_PART]; | 14 | } parts[DISK_MAX_PARTS]; |
| 17 | int next; | 15 | int next; |
| 18 | int limit; | 16 | int limit; |
| 19 | }; | 17 | }; |
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig index 73cd7a418f06..50f8f0600f06 100644 --- a/fs/proc/Kconfig +++ b/fs/proc/Kconfig | |||
| @@ -57,3 +57,13 @@ config PROC_SYSCTL | |||
| 57 | As it is generally a good thing, you should say Y here unless | 57 | As it is generally a good thing, you should say Y here unless |
| 58 | building a kernel for install/rescue disks or your system is very | 58 | building a kernel for install/rescue disks or your system is very |
| 59 | limited in memory. | 59 | limited in memory. |
| 60 | |||
| 61 | config PROC_PAGE_MONITOR | ||
| 62 | default y | ||
| 63 | depends on PROC_FS && MMU | ||
| 64 | bool "Enable /proc page monitoring" if EMBEDDED | ||
| 65 | help | ||
| 66 | Various /proc files exist to monitor process memory utilization: | ||
| 67 | /proc/pid/smaps, /proc/pid/clear_refs, /proc/pid/pagemap, | ||
| 68 | /proc/kpagecount, and /proc/kpageflags. Disabling these | ||
| 69 | interfaces will reduce the size of the kernel by approximately 4kb. | ||
diff --git a/fs/proc/array.c b/fs/proc/array.c index 71c9be59c9c2..bb9f4b05703d 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c | |||
| @@ -86,11 +86,6 @@ | |||
| 86 | #include <asm/processor.h> | 86 | #include <asm/processor.h> |
| 87 | #include "internal.h" | 87 | #include "internal.h" |
| 88 | 88 | ||
| 89 | /* Gcc optimizes away "strlen(x)" for constant x */ | ||
| 90 | #define ADDBUF(buffer, string) \ | ||
| 91 | do { memcpy(buffer, string, strlen(string)); \ | ||
| 92 | buffer += strlen(string); } while (0) | ||
| 93 | |||
| 94 | static inline void task_name(struct seq_file *m, struct task_struct *p) | 89 | static inline void task_name(struct seq_file *m, struct task_struct *p) |
| 95 | { | 90 | { |
| 96 | int i; | 91 | int i; |
| @@ -261,7 +256,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
| 261 | sigemptyset(&ignored); | 256 | sigemptyset(&ignored); |
| 262 | sigemptyset(&caught); | 257 | sigemptyset(&caught); |
| 263 | 258 | ||
| 264 | rcu_read_lock(); | ||
| 265 | if (lock_task_sighand(p, &flags)) { | 259 | if (lock_task_sighand(p, &flags)) { |
| 266 | pending = p->pending.signal; | 260 | pending = p->pending.signal; |
| 267 | shpending = p->signal->shared_pending.signal; | 261 | shpending = p->signal->shared_pending.signal; |
| @@ -272,7 +266,6 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) | |||
| 272 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; | 266 | qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; |
| 273 | unlock_task_sighand(p, &flags); | 267 | unlock_task_sighand(p, &flags); |
| 274 | } | 268 | } |
| 275 | rcu_read_unlock(); | ||
| 276 | 269 | ||
| 277 | seq_printf(m, "Threads:\t%d\n", num_threads); | 270 | seq_printf(m, "Threads:\t%d\n", num_threads); |
| 278 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); | 271 | seq_printf(m, "SigQ:\t%lu/%lu\n", qsize, qlim); |
| @@ -395,20 +388,20 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, | |||
| 395 | 388 | ||
| 396 | /* add up live thread stats at the group level */ | 389 | /* add up live thread stats at the group level */ |
| 397 | if (whole) { | 390 | if (whole) { |
| 391 | struct task_cputime cputime; | ||
| 398 | struct task_struct *t = task; | 392 | struct task_struct *t = task; |
| 399 | do { | 393 | do { |
| 400 | min_flt += t->min_flt; | 394 | min_flt += t->min_flt; |
| 401 | maj_flt += t->maj_flt; | 395 | maj_flt += t->maj_flt; |
| 402 | utime = cputime_add(utime, task_utime(t)); | ||
| 403 | stime = cputime_add(stime, task_stime(t)); | ||
| 404 | gtime = cputime_add(gtime, task_gtime(t)); | 396 | gtime = cputime_add(gtime, task_gtime(t)); |
| 405 | t = next_thread(t); | 397 | t = next_thread(t); |
| 406 | } while (t != task); | 398 | } while (t != task); |
| 407 | 399 | ||
| 408 | min_flt += sig->min_flt; | 400 | min_flt += sig->min_flt; |
| 409 | maj_flt += sig->maj_flt; | 401 | maj_flt += sig->maj_flt; |
| 410 | utime = cputime_add(utime, sig->utime); | 402 | thread_group_cputime(task, &cputime); |
| 411 | stime = cputime_add(stime, sig->stime); | 403 | utime = cputime.utime; |
| 404 | stime = cputime.stime; | ||
| 412 | gtime = cputime_add(gtime, sig->gtime); | 405 | gtime = cputime_add(gtime, sig->gtime); |
| 413 | } | 406 | } |
| 414 | 407 | ||
diff --git a/fs/proc/base.c b/fs/proc/base.c index a28840b11b89..b5918ae8ca79 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c | |||
| @@ -148,9 +148,6 @@ static unsigned int pid_entry_count_dirs(const struct pid_entry *entries, | |||
| 148 | return count; | 148 | return count; |
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | int maps_protect; | ||
| 152 | EXPORT_SYMBOL(maps_protect); | ||
| 153 | |||
| 154 | static struct fs_struct *get_fs_struct(struct task_struct *task) | 151 | static struct fs_struct *get_fs_struct(struct task_struct *task) |
| 155 | { | 152 | { |
| 156 | struct fs_struct *fs; | 153 | struct fs_struct *fs; |
| @@ -164,7 +161,6 @@ static struct fs_struct *get_fs_struct(struct task_struct *task) | |||
| 164 | 161 | ||
| 165 | static int get_nr_threads(struct task_struct *tsk) | 162 | static int get_nr_threads(struct task_struct *tsk) |
| 166 | { | 163 | { |
| 167 | /* Must be called with the rcu_read_lock held */ | ||
| 168 | unsigned long flags; | 164 | unsigned long flags; |
| 169 | int count = 0; | 165 | int count = 0; |
| 170 | 166 | ||
| @@ -471,14 +467,10 @@ static int proc_pid_limits(struct task_struct *task, char *buffer) | |||
| 471 | 467 | ||
| 472 | struct rlimit rlim[RLIM_NLIMITS]; | 468 | struct rlimit rlim[RLIM_NLIMITS]; |
| 473 | 469 | ||
| 474 | rcu_read_lock(); | 470 | if (!lock_task_sighand(task, &flags)) |
| 475 | if (!lock_task_sighand(task,&flags)) { | ||
| 476 | rcu_read_unlock(); | ||
| 477 | return 0; | 471 | return 0; |
| 478 | } | ||
| 479 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); | 472 | memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); |
| 480 | unlock_task_sighand(task, &flags); | 473 | unlock_task_sighand(task, &flags); |
| 481 | rcu_read_unlock(); | ||
| 482 | 474 | ||
| 483 | /* | 475 | /* |
| 484 | * print the file header | 476 | * print the file header |
| @@ -2443,6 +2435,13 @@ static int proc_tgid_io_accounting(struct task_struct *task, char *buffer) | |||
| 2443 | } | 2435 | } |
| 2444 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ | 2436 | #endif /* CONFIG_TASK_IO_ACCOUNTING */ |
| 2445 | 2437 | ||
| 2438 | static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns, | ||
| 2439 | struct pid *pid, struct task_struct *task) | ||
| 2440 | { | ||
| 2441 | seq_printf(m, "%08x\n", task->personality); | ||
| 2442 | return 0; | ||
| 2443 | } | ||
| 2444 | |||
| 2446 | /* | 2445 | /* |
| 2447 | * Thread groups | 2446 | * Thread groups |
| 2448 | */ | 2447 | */ |
| @@ -2459,6 +2458,7 @@ static const struct pid_entry tgid_base_stuff[] = { | |||
| 2459 | REG("environ", S_IRUSR, environ), | 2458 | REG("environ", S_IRUSR, environ), |
| 2460 | INF("auxv", S_IRUSR, pid_auxv), | 2459 | INF("auxv", S_IRUSR, pid_auxv), |
| 2461 | ONE("status", S_IRUGO, pid_status), | 2460 | ONE("status", S_IRUGO, pid_status), |
| 2461 | ONE("personality", S_IRUSR, pid_personality), | ||
| 2462 | INF("limits", S_IRUSR, pid_limits), | 2462 | INF("limits", S_IRUSR, pid_limits), |
| 2463 | #ifdef CONFIG_SCHED_DEBUG | 2463 | #ifdef CONFIG_SCHED_DEBUG |
| 2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2464 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
| @@ -2794,6 +2794,7 @@ static const struct pid_entry tid_base_stuff[] = { | |||
| 2794 | REG("environ", S_IRUSR, environ), | 2794 | REG("environ", S_IRUSR, environ), |
| 2795 | INF("auxv", S_IRUSR, pid_auxv), | 2795 | INF("auxv", S_IRUSR, pid_auxv), |
| 2796 | ONE("status", S_IRUGO, pid_status), | 2796 | ONE("status", S_IRUGO, pid_status), |
| 2797 | ONE("personality", S_IRUSR, pid_personality), | ||
| 2797 | INF("limits", S_IRUSR, pid_limits), | 2798 | INF("limits", S_IRUSR, pid_limits), |
| 2798 | #ifdef CONFIG_SCHED_DEBUG | 2799 | #ifdef CONFIG_SCHED_DEBUG |
| 2799 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), | 2800 | REG("sched", S_IRUGO|S_IWUSR, pid_sched), |
| @@ -3088,9 +3089,7 @@ static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct | |||
| 3088 | generic_fillattr(inode, stat); | 3089 | generic_fillattr(inode, stat); |
| 3089 | 3090 | ||
| 3090 | if (p) { | 3091 | if (p) { |
| 3091 | rcu_read_lock(); | ||
| 3092 | stat->nlink += get_nr_threads(p); | 3092 | stat->nlink += get_nr_threads(p); |
| 3093 | rcu_read_unlock(); | ||
| 3094 | put_task_struct(p); | 3093 | put_task_struct(p); |
| 3095 | } | 3094 | } |
| 3096 | 3095 | ||
diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8bb03f056c28..c6b4fa7e3b49 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c | |||
| @@ -342,7 +342,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) | |||
| 342 | if (!pde->proc_fops) { | 342 | if (!pde->proc_fops) { |
| 343 | spin_unlock(&pde->pde_unload_lock); | 343 | spin_unlock(&pde->pde_unload_lock); |
| 344 | kfree(pdeo); | 344 | kfree(pdeo); |
| 345 | return rv; | 345 | return -EINVAL; |
| 346 | } | 346 | } |
| 347 | pde->pde_users++; | 347 | pde->pde_users++; |
| 348 | open = pde->proc_fops->open; | 348 | open = pde->proc_fops->open; |
diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 442202314d53..3bfb7b8747b3 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h | |||
| @@ -45,8 +45,6 @@ do { \ | |||
| 45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); | 45 | extern int nommu_vma_show(struct seq_file *, struct vm_area_struct *); |
| 46 | #endif | 46 | #endif |
| 47 | 47 | ||
| 48 | extern int maps_protect; | ||
| 49 | |||
| 50 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, | 48 | extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns, |
| 51 | struct pid *pid, struct task_struct *task); | 49 | struct pid *pid, struct task_struct *task); |
| 52 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, | 50 | extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns, |
diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index 29e20c6b1f7f..7ea52c79b2da 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c | |||
| @@ -30,6 +30,7 @@ | |||
| 30 | #include <linux/mm.h> | 30 | #include <linux/mm.h> |
| 31 | #include <linux/mmzone.h> | 31 | #include <linux/mmzone.h> |
| 32 | #include <linux/pagemap.h> | 32 | #include <linux/pagemap.h> |
| 33 | #include <linux/irq.h> | ||
| 33 | #include <linux/interrupt.h> | 34 | #include <linux/interrupt.h> |
| 34 | #include <linux/swap.h> | 35 | #include <linux/swap.h> |
| 35 | #include <linux/slab.h> | 36 | #include <linux/slab.h> |
| @@ -45,7 +46,6 @@ | |||
| 45 | #include <linux/blkdev.h> | 46 | #include <linux/blkdev.h> |
| 46 | #include <linux/hugetlb.h> | 47 | #include <linux/hugetlb.h> |
| 47 | #include <linux/jiffies.h> | 48 | #include <linux/jiffies.h> |
| 48 | #include <linux/sysrq.h> | ||
| 49 | #include <linux/vmalloc.h> | 49 | #include <linux/vmalloc.h> |
| 50 | #include <linux/crash_dump.h> | 50 | #include <linux/crash_dump.h> |
| 51 | #include <linux/pid_namespace.h> | 51 | #include <linux/pid_namespace.h> |
| @@ -68,7 +68,6 @@ | |||
| 68 | extern int get_hardware_list(char *); | 68 | extern int get_hardware_list(char *); |
| 69 | extern int get_stram_list(char *); | 69 | extern int get_stram_list(char *); |
| 70 | extern int get_exec_domain_list(char *); | 70 | extern int get_exec_domain_list(char *); |
| 71 | extern int get_dma_list(char *); | ||
| 72 | 71 | ||
| 73 | static int proc_calc_metrics(char *page, char **start, off_t off, | 72 | static int proc_calc_metrics(char *page, char **start, off_t off, |
| 74 | int count, int *eof, int len) | 73 | int count, int *eof, int len) |
| @@ -138,6 +137,8 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 138 | unsigned long allowed; | 137 | unsigned long allowed; |
| 139 | struct vmalloc_info vmi; | 138 | struct vmalloc_info vmi; |
| 140 | long cached; | 139 | long cached; |
| 140 | unsigned long pages[NR_LRU_LISTS]; | ||
| 141 | int lru; | ||
| 141 | 142 | ||
| 142 | /* | 143 | /* |
| 143 | * display in kilobytes. | 144 | * display in kilobytes. |
| @@ -156,51 +157,70 @@ static int meminfo_read_proc(char *page, char **start, off_t off, | |||
| 156 | 157 | ||
| 157 | get_vmalloc_info(&vmi); | 158 | get_vmalloc_info(&vmi); |
| 158 | 159 | ||
| 160 | for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++) | ||
| 161 | pages[lru] = global_page_state(NR_LRU_BASE + lru); | ||
| 162 | |||
| 159 | /* | 163 | /* |
| 160 | * Tagged format, for easy grepping and expansion. | 164 | * Tagged format, for easy grepping and expansion. |
| 161 | */ | 165 | */ |
| 162 | len = sprintf(page, | 166 | len = sprintf(page, |
| 163 | "MemTotal: %8lu kB\n" | 167 | "MemTotal: %8lu kB\n" |
| 164 | "MemFree: %8lu kB\n" | 168 | "MemFree: %8lu kB\n" |
| 165 | "Buffers: %8lu kB\n" | 169 | "Buffers: %8lu kB\n" |
| 166 | "Cached: %8lu kB\n" | 170 | "Cached: %8lu kB\n" |
| 167 | "SwapCached: %8lu kB\n" | 171 | "SwapCached: %8lu kB\n" |
| 168 | "Active: %8lu kB\n" | 172 | "Active: %8lu kB\n" |
| 169 | "Inactive: %8lu kB\n" | 173 | "Inactive: %8lu kB\n" |
| 174 | "Active(anon): %8lu kB\n" | ||
| 175 | "Inactive(anon): %8lu kB\n" | ||
| 176 | "Active(file): %8lu kB\n" | ||
| 177 | "Inactive(file): %8lu kB\n" | ||
| 178 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 179 | "Unevictable: %8lu kB\n" | ||
| 180 | "Mlocked: %8lu kB\n" | ||
| 181 | #endif | ||
| 170 | #ifdef CONFIG_HIGHMEM | 182 | #ifdef CONFIG_HIGHMEM |
| 171 | "HighTotal: %8lu kB\n" | 183 | "HighTotal: %8lu kB\n" |
| 172 | "HighFree: %8lu kB\n" | 184 | "HighFree: %8lu kB\n" |
| 173 | "LowTotal: %8lu kB\n" | 185 | "LowTotal: %8lu kB\n" |
| 174 | "LowFree: %8lu kB\n" | 186 | "LowFree: %8lu kB\n" |
| 175 | #endif | 187 | #endif |
| 176 | "SwapTotal: %8lu kB\n" | 188 | "SwapTotal: %8lu kB\n" |
| 177 | "SwapFree: %8lu kB\n" | 189 | "SwapFree: %8lu kB\n" |
| 178 | "Dirty: %8lu kB\n" | 190 | "Dirty: %8lu kB\n" |
| 179 | "Writeback: %8lu kB\n" | 191 | "Writeback: %8lu kB\n" |
| 180 | "AnonPages: %8lu kB\n" | 192 | "AnonPages: %8lu kB\n" |
| 181 | "Mapped: %8lu kB\n" | 193 | "Mapped: %8lu kB\n" |
| 182 | "Slab: %8lu kB\n" | 194 | "Slab: %8lu kB\n" |
| 183 | "SReclaimable: %8lu kB\n" | 195 | "SReclaimable: %8lu kB\n" |
| 184 | "SUnreclaim: %8lu kB\n" | 196 | "SUnreclaim: %8lu kB\n" |
| 185 | "PageTables: %8lu kB\n" | 197 | "PageTables: %8lu kB\n" |
| 186 | #ifdef CONFIG_QUICKLIST | 198 | #ifdef CONFIG_QUICKLIST |
| 187 | "Quicklists: %8lu kB\n" | 199 | "Quicklists: %8lu kB\n" |
| 188 | #endif | 200 | #endif |
| 189 | "NFS_Unstable: %8lu kB\n" | 201 | "NFS_Unstable: %8lu kB\n" |
| 190 | "Bounce: %8lu kB\n" | 202 | "Bounce: %8lu kB\n" |
| 191 | "WritebackTmp: %8lu kB\n" | 203 | "WritebackTmp: %8lu kB\n" |
| 192 | "CommitLimit: %8lu kB\n" | 204 | "CommitLimit: %8lu kB\n" |
| 193 | "Committed_AS: %8lu kB\n" | 205 | "Committed_AS: %8lu kB\n" |
| 194 | "VmallocTotal: %8lu kB\n" | 206 | "VmallocTotal: %8lu kB\n" |
| 195 | "VmallocUsed: %8lu kB\n" | 207 | "VmallocUsed: %8lu kB\n" |
| 196 | "VmallocChunk: %8lu kB\n", | 208 | "VmallocChunk: %8lu kB\n", |
| 197 | K(i.totalram), | 209 | K(i.totalram), |
| 198 | K(i.freeram), | 210 | K(i.freeram), |
| 199 | K(i.bufferram), | 211 | K(i.bufferram), |
| 200 | K(cached), | 212 | K(cached), |
| 201 | K(total_swapcache_pages), | 213 | K(total_swapcache_pages), |
| 202 | K(global_page_state(NR_ACTIVE)), | 214 | K(pages[LRU_ACTIVE_ANON] + pages[LRU_ACTIVE_FILE]), |
| 203 | K(global_page_state(NR_INACTIVE)), | 215 | K(pages[LRU_INACTIVE_ANON] + pages[LRU_INACTIVE_FILE]), |
| 216 | K(pages[LRU_ACTIVE_ANON]), | ||
| 217 | K(pages[LRU_INACTIVE_ANON]), | ||
| 218 | K(pages[LRU_ACTIVE_FILE]), | ||
| 219 | K(pages[LRU_INACTIVE_FILE]), | ||
| 220 | #ifdef CONFIG_UNEVICTABLE_LRU | ||
| 221 | K(pages[LRU_UNEVICTABLE]), | ||
| 222 | K(global_page_state(NR_MLOCK)), | ||
| 223 | #endif | ||
| 204 | #ifdef CONFIG_HIGHMEM | 224 | #ifdef CONFIG_HIGHMEM |
| 205 | K(i.totalhigh), | 225 | K(i.totalhigh), |
| 206 | K(i.freehigh), | 226 | K(i.freehigh), |
| @@ -502,17 +522,13 @@ static const struct file_operations proc_vmalloc_operations = { | |||
| 502 | 522 | ||
| 503 | static int show_stat(struct seq_file *p, void *v) | 523 | static int show_stat(struct seq_file *p, void *v) |
| 504 | { | 524 | { |
| 505 | int i; | 525 | int i, j; |
| 506 | unsigned long jif; | 526 | unsigned long jif; |
| 507 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; | 527 | cputime64_t user, nice, system, idle, iowait, irq, softirq, steal; |
| 508 | cputime64_t guest; | 528 | cputime64_t guest; |
| 509 | u64 sum = 0; | 529 | u64 sum = 0; |
| 510 | struct timespec boottime; | 530 | struct timespec boottime; |
| 511 | unsigned int *per_irq_sum; | 531 | unsigned int per_irq_sum; |
| 512 | |||
| 513 | per_irq_sum = kzalloc(sizeof(unsigned int)*NR_IRQS, GFP_KERNEL); | ||
| 514 | if (!per_irq_sum) | ||
| 515 | return -ENOMEM; | ||
| 516 | 532 | ||
| 517 | user = nice = system = idle = iowait = | 533 | user = nice = system = idle = iowait = |
| 518 | irq = softirq = steal = cputime64_zero; | 534 | irq = softirq = steal = cputime64_zero; |
| @@ -521,8 +537,6 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 521 | jif = boottime.tv_sec; | 537 | jif = boottime.tv_sec; |
| 522 | 538 | ||
| 523 | for_each_possible_cpu(i) { | 539 | for_each_possible_cpu(i) { |
| 524 | int j; | ||
| 525 | |||
| 526 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); | 540 | user = cputime64_add(user, kstat_cpu(i).cpustat.user); |
| 527 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); | 541 | nice = cputime64_add(nice, kstat_cpu(i).cpustat.nice); |
| 528 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); | 542 | system = cputime64_add(system, kstat_cpu(i).cpustat.system); |
| @@ -532,11 +546,10 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 532 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); | 546 | softirq = cputime64_add(softirq, kstat_cpu(i).cpustat.softirq); |
| 533 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); | 547 | steal = cputime64_add(steal, kstat_cpu(i).cpustat.steal); |
| 534 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); | 548 | guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest); |
| 535 | for (j = 0; j < NR_IRQS; j++) { | 549 | |
| 536 | unsigned int temp = kstat_cpu(i).irqs[j]; | 550 | for_each_irq_nr(j) |
| 537 | sum += temp; | 551 | sum += kstat_irqs_cpu(j, i); |
| 538 | per_irq_sum[j] += temp; | 552 | |
| 539 | } | ||
| 540 | sum += arch_irq_stat_cpu(i); | 553 | sum += arch_irq_stat_cpu(i); |
| 541 | } | 554 | } |
| 542 | sum += arch_irq_stat(); | 555 | sum += arch_irq_stat(); |
| @@ -578,8 +591,15 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 578 | } | 591 | } |
| 579 | seq_printf(p, "intr %llu", (unsigned long long)sum); | 592 | seq_printf(p, "intr %llu", (unsigned long long)sum); |
| 580 | 593 | ||
| 581 | for (i = 0; i < NR_IRQS; i++) | 594 | /* sum again ? it could be updated? */ |
| 582 | seq_printf(p, " %u", per_irq_sum[i]); | 595 | for_each_irq_nr(j) { |
| 596 | per_irq_sum = 0; | ||
| 597 | |||
| 598 | for_each_possible_cpu(i) | ||
| 599 | per_irq_sum += kstat_irqs_cpu(j, i); | ||
| 600 | |||
| 601 | seq_printf(p, " %u", per_irq_sum); | ||
| 602 | } | ||
| 583 | 603 | ||
| 584 | seq_printf(p, | 604 | seq_printf(p, |
| 585 | "\nctxt %llu\n" | 605 | "\nctxt %llu\n" |
| @@ -593,7 +613,6 @@ static int show_stat(struct seq_file *p, void *v) | |||
| 593 | nr_running(), | 613 | nr_running(), |
| 594 | nr_iowait()); | 614 | nr_iowait()); |
| 595 | 615 | ||
| 596 | kfree(per_irq_sum); | ||
| 597 | return 0; | 616 | return 0; |
| 598 | } | 617 | } |
| 599 | 618 | ||
| @@ -632,15 +651,14 @@ static const struct file_operations proc_stat_operations = { | |||
| 632 | */ | 651 | */ |
| 633 | static void *int_seq_start(struct seq_file *f, loff_t *pos) | 652 | static void *int_seq_start(struct seq_file *f, loff_t *pos) |
| 634 | { | 653 | { |
| 635 | return (*pos <= NR_IRQS) ? pos : NULL; | 654 | return (*pos <= nr_irqs) ? pos : NULL; |
| 636 | } | 655 | } |
| 637 | 656 | ||
| 657 | |||
| 638 | static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) | 658 | static void *int_seq_next(struct seq_file *f, void *v, loff_t *pos) |
| 639 | { | 659 | { |
| 640 | (*pos)++; | 660 | (*pos)++; |
| 641 | if (*pos > NR_IRQS) | 661 | return (*pos <= nr_irqs) ? pos : NULL; |
| 642 | return NULL; | ||
| 643 | return pos; | ||
| 644 | } | 662 | } |
| 645 | 663 | ||
| 646 | static void int_seq_stop(struct seq_file *f, void *v) | 664 | static void int_seq_stop(struct seq_file *f, void *v) |
| @@ -648,7 +666,6 @@ static void int_seq_stop(struct seq_file *f, void *v) | |||
| 648 | /* Nothing to do */ | 666 | /* Nothing to do */ |
| 649 | } | 667 | } |
| 650 | 668 | ||
| 651 | |||
| 652 | static const struct seq_operations int_seq_ops = { | 669 | static const struct seq_operations int_seq_ops = { |
| 653 | .start = int_seq_start, | 670 | .start = int_seq_start, |
| 654 | .next = int_seq_next, | 671 | .next = int_seq_next, |
| @@ -684,6 +701,7 @@ static int cmdline_read_proc(char *page, char **start, off_t off, | |||
| 684 | return proc_calc_metrics(page, start, off, count, eof, len); | 701 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 685 | } | 702 | } |
| 686 | 703 | ||
| 704 | #ifdef CONFIG_FILE_LOCKING | ||
| 687 | static int locks_open(struct inode *inode, struct file *filp) | 705 | static int locks_open(struct inode *inode, struct file *filp) |
| 688 | { | 706 | { |
| 689 | return seq_open(filp, &locks_seq_operations); | 707 | return seq_open(filp, &locks_seq_operations); |
| @@ -695,6 +713,7 @@ static const struct file_operations proc_locks_operations = { | |||
| 695 | .llseek = seq_lseek, | 713 | .llseek = seq_lseek, |
| 696 | .release = seq_release, | 714 | .release = seq_release, |
| 697 | }; | 715 | }; |
| 716 | #endif /* CONFIG_FILE_LOCKING */ | ||
| 698 | 717 | ||
| 699 | static int execdomains_read_proc(char *page, char **start, off_t off, | 718 | static int execdomains_read_proc(char *page, char **start, off_t off, |
| 700 | int count, int *eof, void *data) | 719 | int count, int *eof, void *data) |
| @@ -703,28 +722,6 @@ static int execdomains_read_proc(char *page, char **start, off_t off, | |||
| 703 | return proc_calc_metrics(page, start, off, count, eof, len); | 722 | return proc_calc_metrics(page, start, off, count, eof, len); |
| 704 | } | 723 | } |
| 705 | 724 | ||
| 706 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 707 | /* | ||
| 708 | * writing 'C' to /proc/sysrq-trigger is like sysrq-C | ||
| 709 | */ | ||
| 710 | static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf, | ||
| 711 | size_t count, loff_t *ppos) | ||
| 712 | { | ||
| 713 | if (count) { | ||
| 714 | char c; | ||
| 715 | |||
| 716 | if (get_user(c, buf)) | ||
| 717 | return -EFAULT; | ||
| 718 | __handle_sysrq(c, NULL, 0); | ||
| 719 | } | ||
| 720 | return count; | ||
| 721 | } | ||
| 722 | |||
| 723 | static const struct file_operations proc_sysrq_trigger_operations = { | ||
| 724 | .write = write_sysrq_trigger, | ||
| 725 | }; | ||
| 726 | #endif | ||
| 727 | |||
| 728 | #ifdef CONFIG_PROC_PAGE_MONITOR | 725 | #ifdef CONFIG_PROC_PAGE_MONITOR |
| 729 | #define KPMSIZE sizeof(u64) | 726 | #define KPMSIZE sizeof(u64) |
| 730 | #define KPMMASK (KPMSIZE - 1) | 727 | #define KPMMASK (KPMSIZE - 1) |
| @@ -888,7 +885,9 @@ void __init proc_misc_init(void) | |||
| 888 | #ifdef CONFIG_PRINTK | 885 | #ifdef CONFIG_PRINTK |
| 889 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); | 886 | proc_create("kmsg", S_IRUSR, NULL, &proc_kmsg_operations); |
| 890 | #endif | 887 | #endif |
| 888 | #ifdef CONFIG_FILE_LOCKING | ||
| 891 | proc_create("locks", 0, NULL, &proc_locks_operations); | 889 | proc_create("locks", 0, NULL, &proc_locks_operations); |
| 890 | #endif | ||
| 892 | proc_create("devices", 0, NULL, &proc_devinfo_operations); | 891 | proc_create("devices", 0, NULL, &proc_devinfo_operations); |
| 893 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); | 892 | proc_create("cpuinfo", 0, NULL, &proc_cpuinfo_operations); |
| 894 | #ifdef CONFIG_BLOCK | 893 | #ifdef CONFIG_BLOCK |
| @@ -931,7 +930,4 @@ void __init proc_misc_init(void) | |||
| 931 | #ifdef CONFIG_PROC_VMCORE | 930 | #ifdef CONFIG_PROC_VMCORE |
| 932 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); | 931 | proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations); |
| 933 | #endif | 932 | #endif |
| 934 | #ifdef CONFIG_MAGIC_SYSRQ | ||
| 935 | proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations); | ||
| 936 | #endif | ||
| 937 | } | 933 | } |
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index f9a8b892718f..945a81043ba2 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c | |||
| @@ -66,7 +66,7 @@ static struct ctl_table *find_in_table(struct ctl_table *p, struct qstr *name) | |||
| 66 | return NULL; | 66 | return NULL; |
| 67 | } | 67 | } |
| 68 | 68 | ||
| 69 | struct ctl_table_header *grab_header(struct inode *inode) | 69 | static struct ctl_table_header *grab_header(struct inode *inode) |
| 70 | { | 70 | { |
| 71 | if (PROC_I(inode)->sysctl) | 71 | if (PROC_I(inode)->sysctl) |
| 72 | return sysctl_head_grab(PROC_I(inode)->sysctl); | 72 | return sysctl_head_grab(PROC_I(inode)->sysctl); |
| @@ -395,10 +395,10 @@ static struct dentry_operations proc_sys_dentry_operations = { | |||
| 395 | .d_compare = proc_sys_compare, | 395 | .d_compare = proc_sys_compare, |
| 396 | }; | 396 | }; |
| 397 | 397 | ||
| 398 | static struct proc_dir_entry *proc_sys_root; | ||
| 399 | |||
| 400 | int proc_sys_init(void) | 398 | int proc_sys_init(void) |
| 401 | { | 399 | { |
| 400 | struct proc_dir_entry *proc_sys_root; | ||
| 401 | |||
| 402 | proc_sys_root = proc_mkdir("sys", NULL); | 402 | proc_sys_root = proc_mkdir("sys", NULL); |
| 403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; | 403 | proc_sys_root->proc_iops = &proc_sys_dir_operations; |
| 404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; | 404 | proc_sys_root->proc_fops = &proc_sys_dir_file_operations; |
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 73d1891ee625..4806830ea2a1 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c | |||
| @@ -210,9 +210,6 @@ static int show_map(struct seq_file *m, void *v) | |||
| 210 | dev_t dev = 0; | 210 | dev_t dev = 0; |
| 211 | int len; | 211 | int len; |
| 212 | 212 | ||
| 213 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
| 214 | return -EACCES; | ||
| 215 | |||
| 216 | if (file) { | 213 | if (file) { |
| 217 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; | 214 | struct inode *inode = vma->vm_file->f_path.dentry->d_inode; |
| 218 | dev = inode->i_sb->s_dev; | 215 | dev = inode->i_sb->s_dev; |
| @@ -742,22 +739,11 @@ const struct file_operations proc_pagemap_operations = { | |||
| 742 | #ifdef CONFIG_NUMA | 739 | #ifdef CONFIG_NUMA |
| 743 | extern int show_numa_map(struct seq_file *m, void *v); | 740 | extern int show_numa_map(struct seq_file *m, void *v); |
| 744 | 741 | ||
| 745 | static int show_numa_map_checked(struct seq_file *m, void *v) | ||
| 746 | { | ||
| 747 | struct proc_maps_private *priv = m->private; | ||
| 748 | struct task_struct *task = priv->task; | ||
| 749 | |||
| 750 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
| 751 | return -EACCES; | ||
| 752 | |||
| 753 | return show_numa_map(m, v); | ||
| 754 | } | ||
| 755 | |||
| 756 | static const struct seq_operations proc_pid_numa_maps_op = { | 742 | static const struct seq_operations proc_pid_numa_maps_op = { |
| 757 | .start = m_start, | 743 | .start = m_start, |
| 758 | .next = m_next, | 744 | .next = m_next, |
| 759 | .stop = m_stop, | 745 | .stop = m_stop, |
| 760 | .show = show_numa_map_checked | 746 | .show = show_numa_map, |
| 761 | }; | 747 | }; |
| 762 | 748 | ||
| 763 | static int numa_maps_open(struct inode *inode, struct file *file) | 749 | static int numa_maps_open(struct inode *inode, struct file *file) |
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 5d84e7121df8..219bd79ea894 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c | |||
| @@ -110,11 +110,6 @@ int task_statm(struct mm_struct *mm, int *shared, int *text, | |||
| 110 | static int show_map(struct seq_file *m, void *_vml) | 110 | static int show_map(struct seq_file *m, void *_vml) |
| 111 | { | 111 | { |
| 112 | struct vm_list_struct *vml = _vml; | 112 | struct vm_list_struct *vml = _vml; |
| 113 | struct proc_maps_private *priv = m->private; | ||
| 114 | struct task_struct *task = priv->task; | ||
| 115 | |||
| 116 | if (maps_protect && !ptrace_may_access(task, PTRACE_MODE_READ)) | ||
| 117 | return -EACCES; | ||
| 118 | 113 | ||
| 119 | return nommu_vma_show(m, vml->vma); | 114 | return nommu_vma_show(m, vml->vma); |
| 120 | } | 115 | } |
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 9ac0f5e064e0..cd9ca67f841b 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c | |||
| @@ -32,9 +32,6 @@ static size_t elfcorebuf_sz; | |||
| 32 | /* Total size of vmcore file. */ | 32 | /* Total size of vmcore file. */ |
| 33 | static u64 vmcore_size; | 33 | static u64 vmcore_size; |
| 34 | 34 | ||
| 35 | /* Stores the physical address of elf header of crash image. */ | ||
| 36 | unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; | ||
| 37 | |||
| 38 | struct proc_dir_entry *proc_vmcore = NULL; | 35 | struct proc_dir_entry *proc_vmcore = NULL; |
| 39 | 36 | ||
| 40 | /* Reads a page from the oldmem device from given offset. */ | 37 | /* Reads a page from the oldmem device from given offset. */ |
| @@ -165,14 +162,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer, | |||
| 165 | return acc; | 162 | return acc; |
| 166 | } | 163 | } |
| 167 | 164 | ||
| 168 | static int open_vmcore(struct inode *inode, struct file *filp) | ||
| 169 | { | ||
| 170 | return 0; | ||
| 171 | } | ||
| 172 | |||
| 173 | const struct file_operations proc_vmcore_operations = { | 165 | const struct file_operations proc_vmcore_operations = { |
| 174 | .read = read_vmcore, | 166 | .read = read_vmcore, |
| 175 | .open = open_vmcore, | ||
| 176 | }; | 167 | }; |
| 177 | 168 | ||
| 178 | static struct vmcore* __init get_new_element(void) | 169 | static struct vmcore* __init get_new_element(void) |
| @@ -653,7 +644,7 @@ static int __init vmcore_init(void) | |||
| 653 | int rc = 0; | 644 | int rc = 0; |
| 654 | 645 | ||
| 655 | /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ | 646 | /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/ |
| 656 | if (!(elfcorehdr_addr < ELFCORE_ADDR_MAX)) | 647 | if (!(is_vmcore_usable())) |
| 657 | return rc; | 648 | return rc; |
| 658 | rc = parse_crash_elf_headers(); | 649 | rc = parse_crash_elf_headers(); |
| 659 | if (rc) { | 650 | if (rc) { |
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 5145cb9125af..76acdbc34611 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c | |||
| @@ -112,12 +112,12 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize) | |||
| 112 | goto add_error; | 112 | goto add_error; |
| 113 | 113 | ||
| 114 | if (!pagevec_add(&lru_pvec, page)) | 114 | if (!pagevec_add(&lru_pvec, page)) |
| 115 | __pagevec_lru_add(&lru_pvec); | 115 | __pagevec_lru_add_file(&lru_pvec); |
| 116 | 116 | ||
| 117 | unlock_page(page); | 117 | unlock_page(page); |
| 118 | } | 118 | } |
| 119 | 119 | ||
| 120 | pagevec_lru_add(&lru_pvec); | 120 | pagevec_lru_add_file(&lru_pvec); |
| 121 | return 0; | 121 | return 0; |
| 122 | 122 | ||
| 123 | fsize_exceeded: | 123 | fsize_exceeded: |
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index b13123424e49..f031d1c925f0 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c | |||
| @@ -61,6 +61,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev) | |||
| 61 | inode->i_mapping->a_ops = &ramfs_aops; | 61 | inode->i_mapping->a_ops = &ramfs_aops; |
| 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; | 62 | inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; |
| 63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); | 63 | mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER); |
| 64 | mapping_set_unevictable(inode->i_mapping); | ||
| 64 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; | 65 | inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; |
| 65 | switch (mode & S_IFMT) { | 66 | switch (mode & S_IFMT) { |
| 66 | default: | 67 | default: |
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index b9dbeeca7049..37173fa07d15 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c | |||
| @@ -8,8 +8,6 @@ | |||
| 8 | 8 | ||
| 9 | /* proc info support a la one created by Sizif@Botik.RU for PGC */ | 9 | /* proc info support a la one created by Sizif@Botik.RU for PGC */ |
| 10 | 10 | ||
| 11 | /* $Id: procfs.c,v 1.1.8.2 2001/07/15 17:08:42 god Exp $ */ | ||
| 12 | |||
| 13 | #include <linux/module.h> | 11 | #include <linux/module.h> |
| 14 | #include <linux/time.h> | 12 | #include <linux/time.h> |
| 15 | #include <linux/seq_file.h> | 13 | #include <linux/seq_file.h> |
| @@ -621,7 +619,6 @@ int reiserfs_global_version_in_proc(char *buffer, char **start, | |||
| 621 | #endif | 619 | #endif |
| 622 | 620 | ||
| 623 | /* | 621 | /* |
| 624 | * $Log: procfs.c,v $ | ||
| 625 | * Revision 1.1.8.2 2001/07/15 17:08:42 god | 622 | * Revision 1.1.8.2 2001/07/15 17:08:42 god |
| 626 | * . use get_super() in procfs.c | 623 | * . use get_super() in procfs.c |
| 627 | * . remove remove_save_link() from reiserfs_do_truncate() | 624 | * . remove remove_save_link() from reiserfs_do_truncate() |
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index bb3cb5b7cdb2..ad92461cbfc3 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c | |||
| @@ -155,7 +155,7 @@ static struct dentry *get_xa_file_dentry(const struct inode *inode, | |||
| 155 | xadir = open_xa_dir(inode, flags); | 155 | xadir = open_xa_dir(inode, flags); |
| 156 | if (IS_ERR(xadir)) { | 156 | if (IS_ERR(xadir)) { |
| 157 | return ERR_CAST(xadir); | 157 | return ERR_CAST(xadir); |
| 158 | } else if (xadir && !xadir->d_inode) { | 158 | } else if (!xadir->d_inode) { |
| 159 | dput(xadir); | 159 | dput(xadir); |
| 160 | return ERR_PTR(-ENODATA); | 160 | return ERR_PTR(-ENODATA); |
| 161 | } | 161 | } |
diff --git a/fs/seq_file.c b/fs/seq_file.c index bd20f7f5a933..eba2eabcd2b8 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c | |||
| @@ -452,17 +452,34 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, char *esc) | |||
| 452 | 452 | ||
| 453 | int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) | 453 | int seq_bitmap(struct seq_file *m, unsigned long *bits, unsigned int nr_bits) |
| 454 | { | 454 | { |
| 455 | size_t len = bitmap_scnprintf_len(nr_bits); | 455 | if (m->count < m->size) { |
| 456 | int len = bitmap_scnprintf(m->buf + m->count, | ||
| 457 | m->size - m->count, bits, nr_bits); | ||
| 458 | if (m->count + len < m->size) { | ||
| 459 | m->count += len; | ||
| 460 | return 0; | ||
| 461 | } | ||
| 462 | } | ||
| 463 | m->count = m->size; | ||
| 464 | return -1; | ||
| 465 | } | ||
| 466 | EXPORT_SYMBOL(seq_bitmap); | ||
| 456 | 467 | ||
| 457 | if (m->count + len < m->size) { | 468 | int seq_bitmap_list(struct seq_file *m, unsigned long *bits, |
| 458 | bitmap_scnprintf(m->buf + m->count, m->size - m->count, | 469 | unsigned int nr_bits) |
| 459 | bits, nr_bits); | 470 | { |
| 460 | m->count += len; | 471 | if (m->count < m->size) { |
| 461 | return 0; | 472 | int len = bitmap_scnlistprintf(m->buf + m->count, |
| 473 | m->size - m->count, bits, nr_bits); | ||
| 474 | if (m->count + len < m->size) { | ||
| 475 | m->count += len; | ||
| 476 | return 0; | ||
| 477 | } | ||
| 462 | } | 478 | } |
| 463 | m->count = m->size; | 479 | m->count = m->size; |
| 464 | return -1; | 480 | return -1; |
| 465 | } | 481 | } |
| 482 | EXPORT_SYMBOL(seq_bitmap_list); | ||
| 466 | 483 | ||
| 467 | static void *single_start(struct seq_file *p, loff_t *pos) | 484 | static void *single_start(struct seq_file *p, loff_t *pos) |
| 468 | { | 485 | { |
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c index 006fc64227dd..66f6e58a7e4b 100644 --- a/fs/sysfs/bin.c +++ b/fs/sysfs/bin.c | |||
| @@ -61,6 +61,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
| 61 | int size = dentry->d_inode->i_size; | 61 | int size = dentry->d_inode->i_size; |
| 62 | loff_t offs = *off; | 62 | loff_t offs = *off; |
| 63 | int count = min_t(size_t, bytes, PAGE_SIZE); | 63 | int count = min_t(size_t, bytes, PAGE_SIZE); |
| 64 | char *temp; | ||
| 64 | 65 | ||
| 65 | if (size) { | 66 | if (size) { |
| 66 | if (offs > size) | 67 | if (offs > size) |
| @@ -69,23 +70,33 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) | |||
| 69 | count = size - offs; | 70 | count = size - offs; |
| 70 | } | 71 | } |
| 71 | 72 | ||
| 73 | temp = kmalloc(count, GFP_KERNEL); | ||
| 74 | if (!temp) | ||
| 75 | return -ENOMEM; | ||
| 76 | |||
| 72 | mutex_lock(&bb->mutex); | 77 | mutex_lock(&bb->mutex); |
| 73 | 78 | ||
| 74 | count = fill_read(dentry, bb->buffer, offs, count); | 79 | count = fill_read(dentry, bb->buffer, offs, count); |
| 75 | if (count < 0) | 80 | if (count < 0) { |
| 76 | goto out_unlock; | 81 | mutex_unlock(&bb->mutex); |
| 82 | goto out_free; | ||
| 83 | } | ||
| 77 | 84 | ||
| 78 | if (copy_to_user(userbuf, bb->buffer, count)) { | 85 | memcpy(temp, bb->buffer, count); |
| 86 | |||
| 87 | mutex_unlock(&bb->mutex); | ||
| 88 | |||
| 89 | if (copy_to_user(userbuf, temp, count)) { | ||
| 79 | count = -EFAULT; | 90 | count = -EFAULT; |
| 80 | goto out_unlock; | 91 | goto out_free; |
| 81 | } | 92 | } |
| 82 | 93 | ||
| 83 | pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); | 94 | pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); |
| 84 | 95 | ||
| 85 | *off = offs + count; | 96 | *off = offs + count; |
| 86 | 97 | ||
| 87 | out_unlock: | 98 | out_free: |
| 88 | mutex_unlock(&bb->mutex); | 99 | kfree(temp); |
| 89 | return count; | 100 | return count; |
| 90 | } | 101 | } |
| 91 | 102 | ||
| @@ -118,6 +129,7 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
| 118 | int size = dentry->d_inode->i_size; | 129 | int size = dentry->d_inode->i_size; |
| 119 | loff_t offs = *off; | 130 | loff_t offs = *off; |
| 120 | int count = min_t(size_t, bytes, PAGE_SIZE); | 131 | int count = min_t(size_t, bytes, PAGE_SIZE); |
| 132 | char *temp; | ||
| 121 | 133 | ||
| 122 | if (size) { | 134 | if (size) { |
| 123 | if (offs > size) | 135 | if (offs > size) |
| @@ -126,19 +138,27 @@ static ssize_t write(struct file *file, const char __user *userbuf, | |||
| 126 | count = size - offs; | 138 | count = size - offs; |
| 127 | } | 139 | } |
| 128 | 140 | ||
| 129 | mutex_lock(&bb->mutex); | 141 | temp = kmalloc(count, GFP_KERNEL); |
| 142 | if (!temp) | ||
| 143 | return -ENOMEM; | ||
| 130 | 144 | ||
| 131 | if (copy_from_user(bb->buffer, userbuf, count)) { | 145 | if (copy_from_user(temp, userbuf, count)) { |
| 132 | count = -EFAULT; | 146 | count = -EFAULT; |
| 133 | goto out_unlock; | 147 | goto out_free; |
| 134 | } | 148 | } |
| 135 | 149 | ||
| 150 | mutex_lock(&bb->mutex); | ||
| 151 | |||
| 152 | memcpy(bb->buffer, temp, count); | ||
| 153 | |||
| 136 | count = flush_write(dentry, bb->buffer, offs, count); | 154 | count = flush_write(dentry, bb->buffer, offs, count); |
| 155 | mutex_unlock(&bb->mutex); | ||
| 156 | |||
| 137 | if (count > 0) | 157 | if (count > 0) |
| 138 | *off = offs + count; | 158 | *off = offs + count; |
| 139 | 159 | ||
| 140 | out_unlock: | 160 | out_free: |
| 141 | mutex_unlock(&bb->mutex); | 161 | kfree(temp); |
| 142 | return count; | 162 | return count; |
| 143 | } | 163 | } |
| 144 | 164 | ||
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index aedaeba82ae5..3a05a596e3b4 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c | |||
| @@ -370,17 +370,17 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, | |||
| 370 | memset(acxt, 0, sizeof(*acxt)); | 370 | memset(acxt, 0, sizeof(*acxt)); |
| 371 | acxt->parent_sd = parent_sd; | 371 | acxt->parent_sd = parent_sd; |
| 372 | 372 | ||
| 373 | /* Lookup parent inode. inode initialization and I_NEW | 373 | /* Lookup parent inode. inode initialization is protected by |
| 374 | * clearing are protected by sysfs_mutex. By grabbing it and | 374 | * sysfs_mutex, so inode existence can be determined by |
| 375 | * looking up with _nowait variant, inode state can be | 375 | * looking up inode while holding sysfs_mutex. |
| 376 | * determined reliably. | ||
| 377 | */ | 376 | */ |
| 378 | mutex_lock(&sysfs_mutex); | 377 | mutex_lock(&sysfs_mutex); |
| 379 | 378 | ||
| 380 | inode = ilookup5_nowait(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, | 379 | inode = ilookup5(sysfs_sb, parent_sd->s_ino, sysfs_ilookup_test, |
| 381 | parent_sd); | 380 | parent_sd); |
| 381 | if (inode) { | ||
| 382 | WARN_ON(inode->i_state & I_NEW); | ||
| 382 | 383 | ||
| 383 | if (inode && !(inode->i_state & I_NEW)) { | ||
| 384 | /* parent inode available */ | 384 | /* parent inode available */ |
| 385 | acxt->parent_inode = inode; | 385 | acxt->parent_inode = inode; |
| 386 | 386 | ||
| @@ -393,8 +393,7 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, | |||
| 393 | mutex_lock(&inode->i_mutex); | 393 | mutex_lock(&inode->i_mutex); |
| 394 | mutex_lock(&sysfs_mutex); | 394 | mutex_lock(&sysfs_mutex); |
| 395 | } | 395 | } |
| 396 | } else | 396 | } |
| 397 | iput(inode); | ||
| 398 | } | 397 | } |
| 399 | 398 | ||
| 400 | /** | 399 | /** |
| @@ -636,6 +635,7 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, | |||
| 636 | 635 | ||
| 637 | return sd; | 636 | return sd; |
| 638 | } | 637 | } |
| 638 | EXPORT_SYMBOL_GPL(sysfs_get_dirent); | ||
| 639 | 639 | ||
| 640 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, | 640 | static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, |
| 641 | const char *name, struct sysfs_dirent **p_sd) | 641 | const char *name, struct sysfs_dirent **p_sd) |
| @@ -829,16 +829,12 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) | |||
| 829 | if (!new_dentry) | 829 | if (!new_dentry) |
| 830 | goto out_unlock; | 830 | goto out_unlock; |
| 831 | 831 | ||
| 832 | /* rename kobject and sysfs_dirent */ | 832 | /* rename sysfs_dirent */ |
| 833 | error = -ENOMEM; | 833 | error = -ENOMEM; |
| 834 | new_name = dup_name = kstrdup(new_name, GFP_KERNEL); | 834 | new_name = dup_name = kstrdup(new_name, GFP_KERNEL); |
| 835 | if (!new_name) | 835 | if (!new_name) |
| 836 | goto out_unlock; | 836 | goto out_unlock; |
| 837 | 837 | ||
| 838 | error = kobject_set_name(kobj, "%s", new_name); | ||
| 839 | if (error) | ||
| 840 | goto out_unlock; | ||
| 841 | |||
| 842 | dup_name = sd->s_name; | 838 | dup_name = sd->s_name; |
| 843 | sd->s_name = new_name; | 839 | sd->s_name = new_name; |
| 844 | 840 | ||
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index c9e4e5091da1..1f4a3f877262 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c | |||
| @@ -19,10 +19,18 @@ | |||
| 19 | #include <linux/poll.h> | 19 | #include <linux/poll.h> |
| 20 | #include <linux/list.h> | 20 | #include <linux/list.h> |
| 21 | #include <linux/mutex.h> | 21 | #include <linux/mutex.h> |
| 22 | #include <linux/limits.h> | ||
| 22 | #include <asm/uaccess.h> | 23 | #include <asm/uaccess.h> |
| 23 | 24 | ||
| 24 | #include "sysfs.h" | 25 | #include "sysfs.h" |
| 25 | 26 | ||
| 27 | /* used in crash dumps to help with debugging */ | ||
| 28 | static char last_sysfs_file[PATH_MAX]; | ||
| 29 | void sysfs_printk_last_file(void) | ||
| 30 | { | ||
| 31 | printk(KERN_EMERG "last sysfs file: %s\n", last_sysfs_file); | ||
| 32 | } | ||
| 33 | |||
| 26 | /* | 34 | /* |
| 27 | * There's one sysfs_buffer for each open file and one | 35 | * There's one sysfs_buffer for each open file and one |
| 28 | * sysfs_open_dirent for each sysfs_dirent with one or more open | 36 | * sysfs_open_dirent for each sysfs_dirent with one or more open |
| @@ -328,6 +336,11 @@ static int sysfs_open_file(struct inode *inode, struct file *file) | |||
| 328 | struct sysfs_buffer *buffer; | 336 | struct sysfs_buffer *buffer; |
| 329 | struct sysfs_ops *ops; | 337 | struct sysfs_ops *ops; |
| 330 | int error = -EACCES; | 338 | int error = -EACCES; |
| 339 | char *p; | ||
| 340 | |||
| 341 | p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file)); | ||
| 342 | if (p) | ||
| 343 | memmove(last_sysfs_file, p, strlen(p) + 1); | ||
| 331 | 344 | ||
| 332 | /* need attr_sd for attr and ops, its parent for kobj */ | 345 | /* need attr_sd for attr and ops, its parent for kobj */ |
| 333 | if (!sysfs_get_active_two(attr_sd)) | 346 | if (!sysfs_get_active_two(attr_sd)) |
| @@ -440,7 +453,23 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) | |||
| 440 | return POLLERR|POLLPRI; | 453 | return POLLERR|POLLPRI; |
| 441 | } | 454 | } |
| 442 | 455 | ||
| 443 | void sysfs_notify(struct kobject *k, char *dir, char *attr) | 456 | void sysfs_notify_dirent(struct sysfs_dirent *sd) |
| 457 | { | ||
| 458 | struct sysfs_open_dirent *od; | ||
| 459 | |||
| 460 | spin_lock(&sysfs_open_dirent_lock); | ||
| 461 | |||
| 462 | od = sd->s_attr.open; | ||
| 463 | if (od) { | ||
| 464 | atomic_inc(&od->event); | ||
| 465 | wake_up_interruptible(&od->poll); | ||
| 466 | } | ||
| 467 | |||
| 468 | spin_unlock(&sysfs_open_dirent_lock); | ||
| 469 | } | ||
| 470 | EXPORT_SYMBOL_GPL(sysfs_notify_dirent); | ||
| 471 | |||
| 472 | void sysfs_notify(struct kobject *k, const char *dir, const char *attr) | ||
| 444 | { | 473 | { |
| 445 | struct sysfs_dirent *sd = k->sd; | 474 | struct sysfs_dirent *sd = k->sd; |
| 446 | 475 | ||
| @@ -450,19 +479,8 @@ void sysfs_notify(struct kobject *k, char *dir, char *attr) | |||
| 450 | sd = sysfs_find_dirent(sd, dir); | 479 | sd = sysfs_find_dirent(sd, dir); |
| 451 | if (sd && attr) | 480 | if (sd && attr) |
| 452 | sd = sysfs_find_dirent(sd, attr); | 481 | sd = sysfs_find_dirent(sd, attr); |
| 453 | if (sd) { | 482 | if (sd) |
| 454 | struct sysfs_open_dirent *od; | 483 | sysfs_notify_dirent(sd); |
| 455 | |||
| 456 | spin_lock(&sysfs_open_dirent_lock); | ||
| 457 | |||
| 458 | od = sd->s_attr.open; | ||
| 459 | if (od) { | ||
| 460 | atomic_inc(&od->event); | ||
| 461 | wake_up_interruptible(&od->poll); | ||
| 462 | } | ||
| 463 | |||
| 464 | spin_unlock(&sysfs_open_dirent_lock); | ||
| 465 | } | ||
| 466 | 484 | ||
| 467 | mutex_unlock(&sysfs_mutex); | 485 | mutex_unlock(&sysfs_mutex); |
| 468 | } | 486 | } |
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 14f0023984d7..ab343e371d64 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c | |||
| @@ -16,6 +16,7 @@ | |||
| 16 | #include <linux/mount.h> | 16 | #include <linux/mount.h> |
| 17 | #include <linux/pagemap.h> | 17 | #include <linux/pagemap.h> |
| 18 | #include <linux/init.h> | 18 | #include <linux/init.h> |
| 19 | #include <linux/module.h> | ||
| 19 | 20 | ||
| 20 | #include "sysfs.h" | 21 | #include "sysfs.h" |
| 21 | 22 | ||
| @@ -115,3 +116,17 @@ out_err: | |||
| 115 | sysfs_dir_cachep = NULL; | 116 | sysfs_dir_cachep = NULL; |
| 116 | goto out; | 117 | goto out; |
| 117 | } | 118 | } |
| 119 | |||
| 120 | #undef sysfs_get | ||
| 121 | struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | ||
| 122 | { | ||
| 123 | return __sysfs_get(sd); | ||
| 124 | } | ||
| 125 | EXPORT_SYMBOL_GPL(sysfs_get); | ||
| 126 | |||
| 127 | #undef sysfs_put | ||
| 128 | void sysfs_put(struct sysfs_dirent *sd) | ||
| 129 | { | ||
| 130 | __sysfs_put(sd); | ||
| 131 | } | ||
| 132 | EXPORT_SYMBOL_GPL(sysfs_put); | ||
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index a5db496f71c7..93c6d6b27c4d 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h | |||
| @@ -124,7 +124,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, | |||
| 124 | struct sysfs_dirent **p_sd); | 124 | struct sysfs_dirent **p_sd); |
| 125 | void sysfs_remove_subdir(struct sysfs_dirent *sd); | 125 | void sysfs_remove_subdir(struct sysfs_dirent *sd); |
| 126 | 126 | ||
| 127 | static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | 127 | static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) |
| 128 | { | 128 | { |
| 129 | if (sd) { | 129 | if (sd) { |
| 130 | WARN_ON(!atomic_read(&sd->s_count)); | 130 | WARN_ON(!atomic_read(&sd->s_count)); |
| @@ -132,12 +132,14 @@ static inline struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd) | |||
| 132 | } | 132 | } |
| 133 | return sd; | 133 | return sd; |
| 134 | } | 134 | } |
| 135 | #define sysfs_get(sd) __sysfs_get(sd) | ||
| 135 | 136 | ||
| 136 | static inline void sysfs_put(struct sysfs_dirent *sd) | 137 | static inline void __sysfs_put(struct sysfs_dirent *sd) |
| 137 | { | 138 | { |
| 138 | if (sd && atomic_dec_and_test(&sd->s_count)) | 139 | if (sd && atomic_dec_and_test(&sd->s_count)) |
| 139 | release_sysfs_dirent(sd); | 140 | release_sysfs_dirent(sd); |
| 140 | } | 141 | } |
| 142 | #define sysfs_put(sd) __sysfs_put(sd) | ||
| 141 | 143 | ||
| 142 | /* | 144 | /* |
| 143 | * inode.c | 145 | * inode.c |
diff --git a/fs/ubifs/budget.c b/fs/ubifs/budget.c index 73db464cd08b..1a4973e10664 100644 --- a/fs/ubifs/budget.c +++ b/fs/ubifs/budget.c | |||
| @@ -414,19 +414,21 @@ static int do_budget_space(struct ubifs_info *c) | |||
| 414 | * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - | 414 | * @c->lst.empty_lebs + @c->freeable_cnt + @c->idx_gc_cnt - |
| 415 | * @c->lst.taken_empty_lebs | 415 | * @c->lst.taken_empty_lebs |
| 416 | * | 416 | * |
| 417 | * @empty_lebs are available because they are empty. @freeable_cnt are | 417 | * @c->lst.empty_lebs are available because they are empty. |
| 418 | * available because they contain only free and dirty space and the | 418 | * @c->freeable_cnt are available because they contain only free and |
| 419 | * index allocation always occurs after wbufs are synch'ed. | 419 | * dirty space, @c->idx_gc_cnt are available because they are index |
| 420 | * @idx_gc_cnt are available because they are index LEBs that have been | 420 | * LEBs that have been garbage collected and are awaiting the commit |
| 421 | * garbage collected (including trivial GC) and are awaiting the commit | 421 | * before they can be used. And the in-the-gaps method will grab these |
| 422 | * before they can be unmapped - note that the in-the-gaps method will | 422 | * if it needs them. @c->lst.taken_empty_lebs are empty LEBs that have |
| 423 | * grab these if it needs them. @taken_empty_lebs are empty_lebs that | 423 | * already been allocated for some purpose. |
| 424 | * have already been allocated for some purpose (also includes those | ||
| 425 | * LEBs on the @idx_gc list). | ||
| 426 | * | 424 | * |
| 427 | * Note, @taken_empty_lebs may temporarily be higher by one because of | 425 | * Note, @c->idx_gc_cnt is included to both @c->lst.empty_lebs (because |
| 428 | * the way we serialize LEB allocations and budgeting. See a comment in | 426 | * these LEBs are empty) and to @c->lst.taken_empty_lebs (because they |
| 429 | * 'ubifs_find_free_space()'. | 427 | * are taken until after the commit). |
| 428 | * | ||
| 429 | * Note, @c->lst.taken_empty_lebs may temporarily be higher by one | ||
| 430 | * because of the way we serialize LEB allocations and budgeting. See a | ||
| 431 | * comment in 'ubifs_find_free_space()'. | ||
| 430 | */ | 432 | */ |
| 431 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - | 433 | lebs = c->lst.empty_lebs + c->freeable_cnt + c->idx_gc_cnt - |
| 432 | c->lst.taken_empty_lebs; | 434 | c->lst.taken_empty_lebs; |
diff --git a/fs/ubifs/compress.c b/fs/ubifs/compress.c index 5bb51dac3c16..a0ada596b17c 100644 --- a/fs/ubifs/compress.c +++ b/fs/ubifs/compress.c | |||
| @@ -91,8 +91,6 @@ struct ubifs_compressor *ubifs_compressors[UBIFS_COMPR_TYPES_CNT]; | |||
| 91 | * | 91 | * |
| 92 | * Note, if the input buffer was not compressed, it is copied to the output | 92 | * Note, if the input buffer was not compressed, it is copied to the output |
| 93 | * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. | 93 | * buffer and %UBIFS_COMPR_NONE is returned in @compr_type. |
| 94 | * | ||
| 95 | * This functions returns %0 on success or a negative error code on failure. | ||
| 96 | */ | 94 | */ |
| 97 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, | 95 | void ubifs_compress(const void *in_buf, int in_len, void *out_buf, int *out_len, |
| 98 | int *compr_type) | 96 | int *compr_type) |
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index d7f7645779f2..7186400750e7 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c | |||
| @@ -222,30 +222,38 @@ void dbg_dump_inode(const struct ubifs_info *c, const struct inode *inode) | |||
| 222 | { | 222 | { |
| 223 | const struct ubifs_inode *ui = ubifs_inode(inode); | 223 | const struct ubifs_inode *ui = ubifs_inode(inode); |
| 224 | 224 | ||
| 225 | printk(KERN_DEBUG "inode %lu\n", inode->i_ino); | 225 | printk(KERN_DEBUG "Dump in-memory inode:"); |
| 226 | printk(KERN_DEBUG "size %llu\n", | 226 | printk(KERN_DEBUG "\tinode %lu\n", inode->i_ino); |
| 227 | printk(KERN_DEBUG "\tsize %llu\n", | ||
| 227 | (unsigned long long)i_size_read(inode)); | 228 | (unsigned long long)i_size_read(inode)); |
| 228 | printk(KERN_DEBUG "nlink %u\n", inode->i_nlink); | 229 | printk(KERN_DEBUG "\tnlink %u\n", inode->i_nlink); |
| 229 | printk(KERN_DEBUG "uid %u\n", (unsigned int)inode->i_uid); | 230 | printk(KERN_DEBUG "\tuid %u\n", (unsigned int)inode->i_uid); |
| 230 | printk(KERN_DEBUG "gid %u\n", (unsigned int)inode->i_gid); | 231 | printk(KERN_DEBUG "\tgid %u\n", (unsigned int)inode->i_gid); |
| 231 | printk(KERN_DEBUG "atime %u.%u\n", | 232 | printk(KERN_DEBUG "\tatime %u.%u\n", |
| 232 | (unsigned int)inode->i_atime.tv_sec, | 233 | (unsigned int)inode->i_atime.tv_sec, |
| 233 | (unsigned int)inode->i_atime.tv_nsec); | 234 | (unsigned int)inode->i_atime.tv_nsec); |
| 234 | printk(KERN_DEBUG "mtime %u.%u\n", | 235 | printk(KERN_DEBUG "\tmtime %u.%u\n", |
| 235 | (unsigned int)inode->i_mtime.tv_sec, | 236 | (unsigned int)inode->i_mtime.tv_sec, |
| 236 | (unsigned int)inode->i_mtime.tv_nsec); | 237 | (unsigned int)inode->i_mtime.tv_nsec); |
| 237 | printk(KERN_DEBUG "ctime %u.%u\n", | 238 | printk(KERN_DEBUG "\tctime %u.%u\n", |
| 238 | (unsigned int)inode->i_ctime.tv_sec, | 239 | (unsigned int)inode->i_ctime.tv_sec, |
| 239 | (unsigned int)inode->i_ctime.tv_nsec); | 240 | (unsigned int)inode->i_ctime.tv_nsec); |
| 240 | printk(KERN_DEBUG "creat_sqnum %llu\n", ui->creat_sqnum); | 241 | printk(KERN_DEBUG "\tcreat_sqnum %llu\n", ui->creat_sqnum); |
| 241 | printk(KERN_DEBUG "xattr_size %u\n", ui->xattr_size); | 242 | printk(KERN_DEBUG "\txattr_size %u\n", ui->xattr_size); |
| 242 | printk(KERN_DEBUG "xattr_cnt %u\n", ui->xattr_cnt); | 243 | printk(KERN_DEBUG "\txattr_cnt %u\n", ui->xattr_cnt); |
| 243 | printk(KERN_DEBUG "xattr_names %u\n", ui->xattr_names); | 244 | printk(KERN_DEBUG "\txattr_names %u\n", ui->xattr_names); |
| 244 | printk(KERN_DEBUG "dirty %u\n", ui->dirty); | 245 | printk(KERN_DEBUG "\tdirty %u\n", ui->dirty); |
| 245 | printk(KERN_DEBUG "xattr %u\n", ui->xattr); | 246 | printk(KERN_DEBUG "\txattr %u\n", ui->xattr); |
| 246 | printk(KERN_DEBUG "flags %d\n", ui->flags); | 247 | printk(KERN_DEBUG "\tbulk_read %u\n", ui->xattr); |
| 247 | printk(KERN_DEBUG "compr_type %d\n", ui->compr_type); | 248 | printk(KERN_DEBUG "\tsynced_i_size %llu\n", |
| 248 | printk(KERN_DEBUG "data_len %d\n", ui->data_len); | 249 | (unsigned long long)ui->synced_i_size); |
| 250 | printk(KERN_DEBUG "\tui_size %llu\n", | ||
| 251 | (unsigned long long)ui->ui_size); | ||
| 252 | printk(KERN_DEBUG "\tflags %d\n", ui->flags); | ||
| 253 | printk(KERN_DEBUG "\tcompr_type %d\n", ui->compr_type); | ||
| 254 | printk(KERN_DEBUG "\tlast_page_read %lu\n", ui->last_page_read); | ||
| 255 | printk(KERN_DEBUG "\tread_in_a_row %lu\n", ui->read_in_a_row); | ||
| 256 | printk(KERN_DEBUG "\tdata_len %d\n", ui->data_len); | ||
| 249 | } | 257 | } |
| 250 | 258 | ||
| 251 | void dbg_dump_node(const struct ubifs_info *c, const void *node) | 259 | void dbg_dump_node(const struct ubifs_info *c, const void *node) |
| @@ -647,6 +655,43 @@ void dbg_dump_lprops(struct ubifs_info *c) | |||
| 647 | } | 655 | } |
| 648 | } | 656 | } |
| 649 | 657 | ||
| 658 | void dbg_dump_lpt_info(struct ubifs_info *c) | ||
| 659 | { | ||
| 660 | int i; | ||
| 661 | |||
| 662 | spin_lock(&dbg_lock); | ||
| 663 | printk(KERN_DEBUG "\tlpt_sz: %lld\n", c->lpt_sz); | ||
| 664 | printk(KERN_DEBUG "\tpnode_sz: %d\n", c->pnode_sz); | ||
| 665 | printk(KERN_DEBUG "\tnnode_sz: %d\n", c->nnode_sz); | ||
| 666 | printk(KERN_DEBUG "\tltab_sz: %d\n", c->ltab_sz); | ||
| 667 | printk(KERN_DEBUG "\tlsave_sz: %d\n", c->lsave_sz); | ||
| 668 | printk(KERN_DEBUG "\tbig_lpt: %d\n", c->big_lpt); | ||
| 669 | printk(KERN_DEBUG "\tlpt_hght: %d\n", c->lpt_hght); | ||
| 670 | printk(KERN_DEBUG "\tpnode_cnt: %d\n", c->pnode_cnt); | ||
| 671 | printk(KERN_DEBUG "\tnnode_cnt: %d\n", c->nnode_cnt); | ||
| 672 | printk(KERN_DEBUG "\tdirty_pn_cnt: %d\n", c->dirty_pn_cnt); | ||
| 673 | printk(KERN_DEBUG "\tdirty_nn_cnt: %d\n", c->dirty_nn_cnt); | ||
| 674 | printk(KERN_DEBUG "\tlsave_cnt: %d\n", c->lsave_cnt); | ||
| 675 | printk(KERN_DEBUG "\tspace_bits: %d\n", c->space_bits); | ||
| 676 | printk(KERN_DEBUG "\tlpt_lnum_bits: %d\n", c->lpt_lnum_bits); | ||
| 677 | printk(KERN_DEBUG "\tlpt_offs_bits: %d\n", c->lpt_offs_bits); | ||
| 678 | printk(KERN_DEBUG "\tlpt_spc_bits: %d\n", c->lpt_spc_bits); | ||
| 679 | printk(KERN_DEBUG "\tpcnt_bits: %d\n", c->pcnt_bits); | ||
| 680 | printk(KERN_DEBUG "\tlnum_bits: %d\n", c->lnum_bits); | ||
| 681 | printk(KERN_DEBUG "\tLPT root is at %d:%d\n", c->lpt_lnum, c->lpt_offs); | ||
| 682 | printk(KERN_DEBUG "\tLPT head is at %d:%d\n", | ||
| 683 | c->nhead_lnum, c->nhead_offs); | ||
| 684 | printk(KERN_DEBUG "\tLPT ltab is at %d:%d\n", c->ltab_lnum, c->ltab_offs); | ||
| 685 | if (c->big_lpt) | ||
| 686 | printk(KERN_DEBUG "\tLPT lsave is at %d:%d\n", | ||
| 687 | c->lsave_lnum, c->lsave_offs); | ||
| 688 | for (i = 0; i < c->lpt_lebs; i++) | ||
| 689 | printk(KERN_DEBUG "\tLPT LEB %d free %d dirty %d tgc %d " | ||
| 690 | "cmt %d\n", i + c->lpt_first, c->ltab[i].free, | ||
| 691 | c->ltab[i].dirty, c->ltab[i].tgc, c->ltab[i].cmt); | ||
| 692 | spin_unlock(&dbg_lock); | ||
| 693 | } | ||
| 694 | |||
| 650 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) | 695 | void dbg_dump_leb(const struct ubifs_info *c, int lnum) |
| 651 | { | 696 | { |
| 652 | struct ubifs_scan_leb *sleb; | 697 | struct ubifs_scan_leb *sleb; |
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 50315fc57185..33d6b95071e4 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h | |||
| @@ -224,6 +224,7 @@ void dbg_dump_lstats(const struct ubifs_lp_stats *lst); | |||
| 224 | void dbg_dump_budg(struct ubifs_info *c); | 224 | void dbg_dump_budg(struct ubifs_info *c); |
| 225 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); | 225 | void dbg_dump_lprop(const struct ubifs_info *c, const struct ubifs_lprops *lp); |
| 226 | void dbg_dump_lprops(struct ubifs_info *c); | 226 | void dbg_dump_lprops(struct ubifs_info *c); |
| 227 | void dbg_dump_lpt_info(struct ubifs_info *c); | ||
| 227 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); | 228 | void dbg_dump_leb(const struct ubifs_info *c, int lnum); |
| 228 | void dbg_dump_znode(const struct ubifs_info *c, | 229 | void dbg_dump_znode(const struct ubifs_info *c, |
| 229 | const struct ubifs_znode *znode); | 230 | const struct ubifs_znode *znode); |
| @@ -249,6 +250,8 @@ int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot); | |||
| 249 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); | 250 | int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot); |
| 250 | int dbg_check_cats(struct ubifs_info *c); | 251 | int dbg_check_cats(struct ubifs_info *c); |
| 251 | int dbg_check_ltab(struct ubifs_info *c); | 252 | int dbg_check_ltab(struct ubifs_info *c); |
| 253 | int dbg_chk_lpt_free_spc(struct ubifs_info *c); | ||
| 254 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len); | ||
| 252 | int dbg_check_synced_i_size(struct inode *inode); | 255 | int dbg_check_synced_i_size(struct inode *inode); |
| 253 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); | 256 | int dbg_check_dir_size(struct ubifs_info *c, const struct inode *dir); |
| 254 | int dbg_check_tnc(struct ubifs_info *c, int extra); | 257 | int dbg_check_tnc(struct ubifs_info *c, int extra); |
| @@ -367,6 +370,7 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | |||
| 367 | #define dbg_dump_budg(c) ({}) | 370 | #define dbg_dump_budg(c) ({}) |
| 368 | #define dbg_dump_lprop(c, lp) ({}) | 371 | #define dbg_dump_lprop(c, lp) ({}) |
| 369 | #define dbg_dump_lprops(c) ({}) | 372 | #define dbg_dump_lprops(c) ({}) |
| 373 | #define dbg_dump_lpt_info(c) ({}) | ||
| 370 | #define dbg_dump_leb(c, lnum) ({}) | 374 | #define dbg_dump_leb(c, lnum) ({}) |
| 371 | #define dbg_dump_znode(c, znode) ({}) | 375 | #define dbg_dump_znode(c, znode) ({}) |
| 372 | #define dbg_dump_heap(c, heap, cat) ({}) | 376 | #define dbg_dump_heap(c, heap, cat) ({}) |
| @@ -379,6 +383,8 @@ static inline int dbg_change(struct ubi_volume_desc *desc, int lnum, | |||
| 379 | #define dbg_check_old_index(c, zroot) 0 | 383 | #define dbg_check_old_index(c, zroot) 0 |
| 380 | #define dbg_check_cats(c) 0 | 384 | #define dbg_check_cats(c) 0 |
| 381 | #define dbg_check_ltab(c) 0 | 385 | #define dbg_check_ltab(c) 0 |
| 386 | #define dbg_chk_lpt_free_spc(c) 0 | ||
| 387 | #define dbg_chk_lpt_sz(c, action, len) 0 | ||
| 382 | #define dbg_check_synced_i_size(inode) 0 | 388 | #define dbg_check_synced_i_size(inode) 0 |
| 383 | #define dbg_check_dir_size(c, dir) 0 | 389 | #define dbg_check_dir_size(c, dir) 0 |
| 384 | #define dbg_check_tnc(c, x) 0 | 390 | #define dbg_check_tnc(c, x) 0 |
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 3d698e2022b1..51cf511d44d9 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c | |||
| @@ -147,6 +147,12 @@ static int do_readpage(struct page *page) | |||
| 147 | err = ret; | 147 | err = ret; |
| 148 | if (err != -ENOENT) | 148 | if (err != -ENOENT) |
| 149 | break; | 149 | break; |
| 150 | } else if (block + 1 == beyond) { | ||
| 151 | int dlen = le32_to_cpu(dn->size); | ||
| 152 | int ilen = i_size & (UBIFS_BLOCK_SIZE - 1); | ||
| 153 | |||
| 154 | if (ilen && ilen < dlen) | ||
| 155 | memset(addr + ilen, 0, dlen - ilen); | ||
| 150 | } | 156 | } |
| 151 | } | 157 | } |
| 152 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | 158 | if (++i >= UBIFS_BLOCKS_PER_PAGE) |
| @@ -577,8 +583,262 @@ out: | |||
| 577 | return copied; | 583 | return copied; |
| 578 | } | 584 | } |
| 579 | 585 | ||
| 586 | /** | ||
| 587 | * populate_page - copy data nodes into a page for bulk-read. | ||
| 588 | * @c: UBIFS file-system description object | ||
| 589 | * @page: page | ||
| 590 | * @bu: bulk-read information | ||
| 591 | * @n: next zbranch slot | ||
| 592 | * | ||
| 593 | * This function returns %0 on success and a negative error code on failure. | ||
| 594 | */ | ||
| 595 | static int populate_page(struct ubifs_info *c, struct page *page, | ||
| 596 | struct bu_info *bu, int *n) | ||
| 597 | { | ||
| 598 | int i = 0, nn = *n, offs = bu->zbranch[0].offs, hole = 0, read = 0; | ||
| 599 | struct inode *inode = page->mapping->host; | ||
| 600 | loff_t i_size = i_size_read(inode); | ||
| 601 | unsigned int page_block; | ||
| 602 | void *addr, *zaddr; | ||
| 603 | pgoff_t end_index; | ||
| 604 | |||
| 605 | dbg_gen("ino %lu, pg %lu, i_size %lld, flags %#lx", | ||
| 606 | inode->i_ino, page->index, i_size, page->flags); | ||
| 607 | |||
| 608 | addr = zaddr = kmap(page); | ||
| 609 | |||
| 610 | end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; | ||
| 611 | if (!i_size || page->index > end_index) { | ||
| 612 | hole = 1; | ||
| 613 | memset(addr, 0, PAGE_CACHE_SIZE); | ||
| 614 | goto out_hole; | ||
| 615 | } | ||
| 616 | |||
| 617 | page_block = page->index << UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
| 618 | while (1) { | ||
| 619 | int err, len, out_len, dlen; | ||
| 620 | |||
| 621 | if (nn >= bu->cnt) { | ||
| 622 | hole = 1; | ||
| 623 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
| 624 | } else if (key_block(c, &bu->zbranch[nn].key) == page_block) { | ||
| 625 | struct ubifs_data_node *dn; | ||
| 626 | |||
| 627 | dn = bu->buf + (bu->zbranch[nn].offs - offs); | ||
| 628 | |||
| 629 | ubifs_assert(dn->ch.sqnum > | ||
| 630 | ubifs_inode(inode)->creat_sqnum); | ||
| 631 | |||
| 632 | len = le32_to_cpu(dn->size); | ||
| 633 | if (len <= 0 || len > UBIFS_BLOCK_SIZE) | ||
| 634 | goto out_err; | ||
| 635 | |||
| 636 | dlen = le32_to_cpu(dn->ch.len) - UBIFS_DATA_NODE_SZ; | ||
| 637 | out_len = UBIFS_BLOCK_SIZE; | ||
| 638 | err = ubifs_decompress(&dn->data, dlen, addr, &out_len, | ||
| 639 | le16_to_cpu(dn->compr_type)); | ||
| 640 | if (err || len != out_len) | ||
| 641 | goto out_err; | ||
| 642 | |||
| 643 | if (len < UBIFS_BLOCK_SIZE) | ||
| 644 | memset(addr + len, 0, UBIFS_BLOCK_SIZE - len); | ||
| 645 | |||
| 646 | nn += 1; | ||
| 647 | read = (i << UBIFS_BLOCK_SHIFT) + len; | ||
| 648 | } else if (key_block(c, &bu->zbranch[nn].key) < page_block) { | ||
| 649 | nn += 1; | ||
| 650 | continue; | ||
| 651 | } else { | ||
| 652 | hole = 1; | ||
| 653 | memset(addr, 0, UBIFS_BLOCK_SIZE); | ||
| 654 | } | ||
| 655 | if (++i >= UBIFS_BLOCKS_PER_PAGE) | ||
| 656 | break; | ||
| 657 | addr += UBIFS_BLOCK_SIZE; | ||
| 658 | page_block += 1; | ||
| 659 | } | ||
| 660 | |||
| 661 | if (end_index == page->index) { | ||
| 662 | int len = i_size & (PAGE_CACHE_SIZE - 1); | ||
| 663 | |||
| 664 | if (len && len < read) | ||
| 665 | memset(zaddr + len, 0, read - len); | ||
| 666 | } | ||
| 667 | |||
| 668 | out_hole: | ||
| 669 | if (hole) { | ||
| 670 | SetPageChecked(page); | ||
| 671 | dbg_gen("hole"); | ||
| 672 | } | ||
| 673 | |||
| 674 | SetPageUptodate(page); | ||
| 675 | ClearPageError(page); | ||
| 676 | flush_dcache_page(page); | ||
| 677 | kunmap(page); | ||
| 678 | *n = nn; | ||
| 679 | return 0; | ||
| 680 | |||
| 681 | out_err: | ||
| 682 | ClearPageUptodate(page); | ||
| 683 | SetPageError(page); | ||
| 684 | flush_dcache_page(page); | ||
| 685 | kunmap(page); | ||
| 686 | ubifs_err("bad data node (block %u, inode %lu)", | ||
| 687 | page_block, inode->i_ino); | ||
| 688 | return -EINVAL; | ||
| 689 | } | ||
| 690 | |||
| 691 | /** | ||
| 692 | * ubifs_do_bulk_read - do bulk-read. | ||
| 693 | * @c: UBIFS file-system description object | ||
| 694 | * @page1: first page | ||
| 695 | * | ||
| 696 | * This function returns %1 if the bulk-read is done, otherwise %0 is returned. | ||
| 697 | */ | ||
| 698 | static int ubifs_do_bulk_read(struct ubifs_info *c, struct page *page1) | ||
| 699 | { | ||
| 700 | pgoff_t offset = page1->index, end_index; | ||
| 701 | struct address_space *mapping = page1->mapping; | ||
| 702 | struct inode *inode = mapping->host; | ||
| 703 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 704 | struct bu_info *bu; | ||
| 705 | int err, page_idx, page_cnt, ret = 0, n = 0; | ||
| 706 | loff_t isize; | ||
| 707 | |||
| 708 | bu = kmalloc(sizeof(struct bu_info), GFP_NOFS); | ||
| 709 | if (!bu) | ||
| 710 | return 0; | ||
| 711 | |||
| 712 | bu->buf_len = c->bulk_read_buf_size; | ||
| 713 | bu->buf = kmalloc(bu->buf_len, GFP_NOFS); | ||
| 714 | if (!bu->buf) | ||
| 715 | goto out_free; | ||
| 716 | |||
| 717 | data_key_init(c, &bu->key, inode->i_ino, | ||
| 718 | offset << UBIFS_BLOCKS_PER_PAGE_SHIFT); | ||
| 719 | |||
| 720 | err = ubifs_tnc_get_bu_keys(c, bu); | ||
| 721 | if (err) | ||
| 722 | goto out_warn; | ||
| 723 | |||
| 724 | if (bu->eof) { | ||
| 725 | /* Turn off bulk-read at the end of the file */ | ||
| 726 | ui->read_in_a_row = 1; | ||
| 727 | ui->bulk_read = 0; | ||
| 728 | } | ||
| 729 | |||
| 730 | page_cnt = bu->blk_cnt >> UBIFS_BLOCKS_PER_PAGE_SHIFT; | ||
| 731 | if (!page_cnt) { | ||
| 732 | /* | ||
| 733 | * This happens when there are multiple blocks per page and the | ||
| 734 | * blocks for the first page we are looking for, are not | ||
| 735 | * together. If all the pages were like this, bulk-read would | ||
| 736 | * reduce performance, so we turn it off for a while. | ||
| 737 | */ | ||
| 738 | ui->read_in_a_row = 0; | ||
| 739 | ui->bulk_read = 0; | ||
| 740 | goto out_free; | ||
| 741 | } | ||
| 742 | |||
| 743 | if (bu->cnt) { | ||
| 744 | err = ubifs_tnc_bulk_read(c, bu); | ||
| 745 | if (err) | ||
| 746 | goto out_warn; | ||
| 747 | } | ||
| 748 | |||
| 749 | err = populate_page(c, page1, bu, &n); | ||
| 750 | if (err) | ||
| 751 | goto out_warn; | ||
| 752 | |||
| 753 | unlock_page(page1); | ||
| 754 | ret = 1; | ||
| 755 | |||
| 756 | isize = i_size_read(inode); | ||
| 757 | if (isize == 0) | ||
| 758 | goto out_free; | ||
| 759 | end_index = ((isize - 1) >> PAGE_CACHE_SHIFT); | ||
| 760 | |||
| 761 | for (page_idx = 1; page_idx < page_cnt; page_idx++) { | ||
| 762 | pgoff_t page_offset = offset + page_idx; | ||
| 763 | struct page *page; | ||
| 764 | |||
| 765 | if (page_offset > end_index) | ||
| 766 | break; | ||
| 767 | page = find_or_create_page(mapping, page_offset, | ||
| 768 | GFP_NOFS | __GFP_COLD); | ||
| 769 | if (!page) | ||
| 770 | break; | ||
| 771 | if (!PageUptodate(page)) | ||
| 772 | err = populate_page(c, page, bu, &n); | ||
| 773 | unlock_page(page); | ||
| 774 | page_cache_release(page); | ||
| 775 | if (err) | ||
| 776 | break; | ||
| 777 | } | ||
| 778 | |||
| 779 | ui->last_page_read = offset + page_idx - 1; | ||
| 780 | |||
| 781 | out_free: | ||
| 782 | kfree(bu->buf); | ||
| 783 | kfree(bu); | ||
| 784 | return ret; | ||
| 785 | |||
| 786 | out_warn: | ||
| 787 | ubifs_warn("ignoring error %d and skipping bulk-read", err); | ||
| 788 | goto out_free; | ||
| 789 | } | ||
| 790 | |||
| 791 | /** | ||
| 792 | * ubifs_bulk_read - determine whether to bulk-read and, if so, do it. | ||
| 793 | * @page: page from which to start bulk-read. | ||
| 794 | * | ||
| 795 | * Some flash media are capable of reading sequentially at faster rates. UBIFS | ||
| 796 | * bulk-read facility is designed to take advantage of that, by reading in one | ||
| 797 | * go consecutive data nodes that are also located consecutively in the same | ||
| 798 | * LEB. This function returns %1 if a bulk-read is done and %0 otherwise. | ||
| 799 | */ | ||
| 800 | static int ubifs_bulk_read(struct page *page) | ||
| 801 | { | ||
| 802 | struct inode *inode = page->mapping->host; | ||
| 803 | struct ubifs_info *c = inode->i_sb->s_fs_info; | ||
| 804 | struct ubifs_inode *ui = ubifs_inode(inode); | ||
| 805 | pgoff_t index = page->index, last_page_read = ui->last_page_read; | ||
| 806 | int ret = 0; | ||
| 807 | |||
| 808 | ui->last_page_read = index; | ||
| 809 | |||
| 810 | if (!c->bulk_read) | ||
| 811 | return 0; | ||
| 812 | /* | ||
| 813 | * Bulk-read is protected by ui_mutex, but it is an optimization, so | ||
| 814 | * don't bother if we cannot lock the mutex. | ||
| 815 | */ | ||
| 816 | if (!mutex_trylock(&ui->ui_mutex)) | ||
| 817 | return 0; | ||
| 818 | if (index != last_page_read + 1) { | ||
| 819 | /* Turn off bulk-read if we stop reading sequentially */ | ||
| 820 | ui->read_in_a_row = 1; | ||
| 821 | if (ui->bulk_read) | ||
| 822 | ui->bulk_read = 0; | ||
| 823 | goto out_unlock; | ||
| 824 | } | ||
| 825 | if (!ui->bulk_read) { | ||
| 826 | ui->read_in_a_row += 1; | ||
| 827 | if (ui->read_in_a_row < 3) | ||
| 828 | goto out_unlock; | ||
| 829 | /* Three reads in a row, so switch on bulk-read */ | ||
| 830 | ui->bulk_read = 1; | ||
| 831 | } | ||
| 832 | ret = ubifs_do_bulk_read(c, page); | ||
| 833 | out_unlock: | ||
| 834 | mutex_unlock(&ui->ui_mutex); | ||
| 835 | return ret; | ||
| 836 | } | ||
| 837 | |||
| 580 | static int ubifs_readpage(struct file *file, struct page *page) | 838 | static int ubifs_readpage(struct file *file, struct page *page) |
| 581 | { | 839 | { |
| 840 | if (ubifs_bulk_read(page)) | ||
| 841 | return 0; | ||
| 582 | do_readpage(page); | 842 | do_readpage(page); |
| 583 | unlock_page(page); | 843 | unlock_page(page); |
| 584 | return 0; | 844 | return 0; |
diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c index 47814cde2407..717d79c97c5e 100644 --- a/fs/ubifs/find.c +++ b/fs/ubifs/find.c | |||
| @@ -901,11 +901,11 @@ static int get_idx_gc_leb(struct ubifs_info *c) | |||
| 901 | * it is needed now for this commit. | 901 | * it is needed now for this commit. |
| 902 | */ | 902 | */ |
| 903 | lp = ubifs_lpt_lookup_dirty(c, lnum); | 903 | lp = ubifs_lpt_lookup_dirty(c, lnum); |
| 904 | if (unlikely(IS_ERR(lp))) | 904 | if (IS_ERR(lp)) |
| 905 | return PTR_ERR(lp); | 905 | return PTR_ERR(lp); |
| 906 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, | 906 | lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, |
| 907 | lp->flags | LPROPS_INDEX, -1); | 907 | lp->flags | LPROPS_INDEX, -1); |
| 908 | if (unlikely(IS_ERR(lp))) | 908 | if (IS_ERR(lp)) |
| 909 | return PTR_ERR(lp); | 909 | return PTR_ERR(lp); |
| 910 | dbg_find("LEB %d, dirty %d and free %d flags %#x", | 910 | dbg_find("LEB %d, dirty %d and free %d flags %#x", |
| 911 | lp->lnum, lp->dirty, lp->free, lp->flags); | 911 | lp->lnum, lp->dirty, lp->free, lp->flags); |
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c index 02aba36fe3d4..0bef6501d58a 100644 --- a/fs/ubifs/gc.c +++ b/fs/ubifs/gc.c | |||
| @@ -96,6 +96,48 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 96 | } | 96 | } |
| 97 | 97 | ||
| 98 | /** | 98 | /** |
| 99 | * joinup - bring data nodes for an inode together. | ||
| 100 | * @c: UBIFS file-system description object | ||
| 101 | * @sleb: describes scanned LEB | ||
| 102 | * @inum: inode number | ||
| 103 | * @blk: block number | ||
| 104 | * @data: list to which to add data nodes | ||
| 105 | * | ||
| 106 | * This function looks at the first few nodes in the scanned LEB @sleb and adds | ||
| 107 | * them to @data if they are data nodes from @inum and have a larger block | ||
| 108 | * number than @blk. This function returns %0 on success and a negative error | ||
| 109 | * code on failure. | ||
| 110 | */ | ||
| 111 | static int joinup(struct ubifs_info *c, struct ubifs_scan_leb *sleb, ino_t inum, | ||
| 112 | unsigned int blk, struct list_head *data) | ||
| 113 | { | ||
| 114 | int err, cnt = 6, lnum = sleb->lnum, offs; | ||
| 115 | struct ubifs_scan_node *snod, *tmp; | ||
| 116 | union ubifs_key *key; | ||
| 117 | |||
| 118 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | ||
| 119 | key = &snod->key; | ||
| 120 | if (key_inum(c, key) == inum && | ||
| 121 | key_type(c, key) == UBIFS_DATA_KEY && | ||
| 122 | key_block(c, key) > blk) { | ||
| 123 | offs = snod->offs; | ||
| 124 | err = ubifs_tnc_has_node(c, key, 0, lnum, offs, 0); | ||
| 125 | if (err < 0) | ||
| 126 | return err; | ||
| 127 | list_del(&snod->list); | ||
| 128 | if (err) { | ||
| 129 | list_add_tail(&snod->list, data); | ||
| 130 | blk = key_block(c, key); | ||
| 131 | } else | ||
| 132 | kfree(snod); | ||
| 133 | cnt = 6; | ||
| 134 | } else if (--cnt == 0) | ||
| 135 | break; | ||
| 136 | } | ||
| 137 | return 0; | ||
| 138 | } | ||
| 139 | |||
| 140 | /** | ||
| 99 | * move_nodes - move nodes. | 141 | * move_nodes - move nodes. |
| 100 | * @c: UBIFS file-system description object | 142 | * @c: UBIFS file-system description object |
| 101 | * @sleb: describes nodes to move | 143 | * @sleb: describes nodes to move |
| @@ -116,16 +158,21 @@ static int switch_gc_head(struct ubifs_info *c) | |||
| 116 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | 158 | static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) |
| 117 | { | 159 | { |
| 118 | struct ubifs_scan_node *snod, *tmp; | 160 | struct ubifs_scan_node *snod, *tmp; |
| 119 | struct list_head large, medium, small; | 161 | struct list_head data, large, medium, small; |
| 120 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; | 162 | struct ubifs_wbuf *wbuf = &c->jheads[GCHD].wbuf; |
| 121 | int avail, err, min = INT_MAX; | 163 | int avail, err, min = INT_MAX; |
| 164 | unsigned int blk = 0; | ||
| 165 | ino_t inum = 0; | ||
| 122 | 166 | ||
| 167 | INIT_LIST_HEAD(&data); | ||
| 123 | INIT_LIST_HEAD(&large); | 168 | INIT_LIST_HEAD(&large); |
| 124 | INIT_LIST_HEAD(&medium); | 169 | INIT_LIST_HEAD(&medium); |
| 125 | INIT_LIST_HEAD(&small); | 170 | INIT_LIST_HEAD(&small); |
| 126 | 171 | ||
| 127 | list_for_each_entry_safe(snod, tmp, &sleb->nodes, list) { | 172 | while (!list_empty(&sleb->nodes)) { |
| 128 | struct list_head *lst; | 173 | struct list_head *lst = sleb->nodes.next; |
| 174 | |||
| 175 | snod = list_entry(lst, struct ubifs_scan_node, list); | ||
| 129 | 176 | ||
| 130 | ubifs_assert(snod->type != UBIFS_IDX_NODE); | 177 | ubifs_assert(snod->type != UBIFS_IDX_NODE); |
| 131 | ubifs_assert(snod->type != UBIFS_REF_NODE); | 178 | ubifs_assert(snod->type != UBIFS_REF_NODE); |
| @@ -136,7 +183,6 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
| 136 | if (err < 0) | 183 | if (err < 0) |
| 137 | goto out; | 184 | goto out; |
| 138 | 185 | ||
| 139 | lst = &snod->list; | ||
| 140 | list_del(lst); | 186 | list_del(lst); |
| 141 | if (!err) { | 187 | if (!err) { |
| 142 | /* The node is obsolete, remove it from the list */ | 188 | /* The node is obsolete, remove it from the list */ |
| @@ -145,15 +191,30 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
| 145 | } | 191 | } |
| 146 | 192 | ||
| 147 | /* | 193 | /* |
| 148 | * Sort the list of nodes so that large nodes go first, and | 194 | * Sort the list of nodes so that data nodes go first, large |
| 149 | * small nodes go last. | 195 | * nodes go second, and small nodes go last. |
| 150 | */ | 196 | */ |
| 151 | if (snod->len > MEDIUM_NODE_WM) | 197 | if (key_type(c, &snod->key) == UBIFS_DATA_KEY) { |
| 152 | list_add(lst, &large); | 198 | if (inum != key_inum(c, &snod->key)) { |
| 199 | if (inum) { | ||
| 200 | /* | ||
| 201 | * Try to move data nodes from the same | ||
| 202 | * inode together. | ||
| 203 | */ | ||
| 204 | err = joinup(c, sleb, inum, blk, &data); | ||
| 205 | if (err) | ||
| 206 | goto out; | ||
| 207 | } | ||
| 208 | inum = key_inum(c, &snod->key); | ||
| 209 | blk = key_block(c, &snod->key); | ||
| 210 | } | ||
| 211 | list_add_tail(lst, &data); | ||
| 212 | } else if (snod->len > MEDIUM_NODE_WM) | ||
| 213 | list_add_tail(lst, &large); | ||
| 153 | else if (snod->len > SMALL_NODE_WM) | 214 | else if (snod->len > SMALL_NODE_WM) |
| 154 | list_add(lst, &medium); | 215 | list_add_tail(lst, &medium); |
| 155 | else | 216 | else |
| 156 | list_add(lst, &small); | 217 | list_add_tail(lst, &small); |
| 157 | 218 | ||
| 158 | /* And find the smallest node */ | 219 | /* And find the smallest node */ |
| 159 | if (snod->len < min) | 220 | if (snod->len < min) |
| @@ -164,6 +225,7 @@ static int move_nodes(struct ubifs_info *c, struct ubifs_scan_leb *sleb) | |||
| 164 | * Join the tree lists so that we'd have one roughly sorted list | 225 | * Join the tree lists so that we'd have one roughly sorted list |
| 165 | * ('large' will be the head of the joined list). | 226 | * ('large' will be the head of the joined list). |
| 166 | */ | 227 | */ |
| 228 | list_splice(&data, &large); | ||
| 167 | list_splice(&medium, large.prev); | 229 | list_splice(&medium, large.prev); |
| 168 | list_splice(&small, large.prev); | 230 | list_splice(&small, large.prev); |
| 169 | 231 | ||
| @@ -653,7 +715,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
| 653 | */ | 715 | */ |
| 654 | while (1) { | 716 | while (1) { |
| 655 | lp = ubifs_fast_find_freeable(c); | 717 | lp = ubifs_fast_find_freeable(c); |
| 656 | if (unlikely(IS_ERR(lp))) { | 718 | if (IS_ERR(lp)) { |
| 657 | err = PTR_ERR(lp); | 719 | err = PTR_ERR(lp); |
| 658 | goto out; | 720 | goto out; |
| 659 | } | 721 | } |
| @@ -665,7 +727,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
| 665 | if (err) | 727 | if (err) |
| 666 | goto out; | 728 | goto out; |
| 667 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); | 729 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, lp->flags, 0); |
| 668 | if (unlikely(IS_ERR(lp))) { | 730 | if (IS_ERR(lp)) { |
| 669 | err = PTR_ERR(lp); | 731 | err = PTR_ERR(lp); |
| 670 | goto out; | 732 | goto out; |
| 671 | } | 733 | } |
| @@ -680,7 +742,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
| 680 | /* Record index freeable LEBs for unmapping after commit */ | 742 | /* Record index freeable LEBs for unmapping after commit */ |
| 681 | while (1) { | 743 | while (1) { |
| 682 | lp = ubifs_fast_find_frdi_idx(c); | 744 | lp = ubifs_fast_find_frdi_idx(c); |
| 683 | if (unlikely(IS_ERR(lp))) { | 745 | if (IS_ERR(lp)) { |
| 684 | err = PTR_ERR(lp); | 746 | err = PTR_ERR(lp); |
| 685 | goto out; | 747 | goto out; |
| 686 | } | 748 | } |
| @@ -696,7 +758,7 @@ int ubifs_gc_start_commit(struct ubifs_info *c) | |||
| 696 | /* Don't release the LEB until after the next commit */ | 758 | /* Don't release the LEB until after the next commit */ |
| 697 | flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; | 759 | flags = (lp->flags | LPROPS_TAKEN) ^ LPROPS_INDEX; |
| 698 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); | 760 | lp = ubifs_change_lp(c, lp, c->leb_size, 0, flags, 1); |
| 699 | if (unlikely(IS_ERR(lp))) { | 761 | if (IS_ERR(lp)) { |
| 700 | err = PTR_ERR(lp); | 762 | err = PTR_ERR(lp); |
| 701 | kfree(idx_gc); | 763 | kfree(idx_gc); |
| 702 | goto out; | 764 | goto out; |
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 054363f2b207..01682713af69 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c | |||
| @@ -62,6 +62,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
| 62 | { | 62 | { |
| 63 | if (!c->ro_media) { | 63 | if (!c->ro_media) { |
| 64 | c->ro_media = 1; | 64 | c->ro_media = 1; |
| 65 | c->no_chk_data_crc = 0; | ||
| 65 | ubifs_warn("switched to read-only mode, error %d", err); | 66 | ubifs_warn("switched to read-only mode, error %d", err); |
| 66 | dbg_dump_stack(); | 67 | dbg_dump_stack(); |
| 67 | } | 68 | } |
| @@ -74,6 +75,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
| 74 | * @lnum: logical eraseblock number | 75 | * @lnum: logical eraseblock number |
| 75 | * @offs: offset within the logical eraseblock | 76 | * @offs: offset within the logical eraseblock |
| 76 | * @quiet: print no messages | 77 | * @quiet: print no messages |
| 78 | * @chk_crc: indicates whether to always check the CRC | ||
| 77 | * | 79 | * |
| 78 | * This function checks node magic number and CRC checksum. This function also | 80 | * This function checks node magic number and CRC checksum. This function also |
| 79 | * validates node length to prevent UBIFS from becoming crazy when an attacker | 81 | * validates node length to prevent UBIFS from becoming crazy when an attacker |
| @@ -85,7 +87,7 @@ void ubifs_ro_mode(struct ubifs_info *c, int err) | |||
| 85 | * or magic. | 87 | * or magic. |
| 86 | */ | 88 | */ |
| 87 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | 89 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, |
| 88 | int offs, int quiet) | 90 | int offs, int quiet, int chk_crc) |
| 89 | { | 91 | { |
| 90 | int err = -EINVAL, type, node_len; | 92 | int err = -EINVAL, type, node_len; |
| 91 | uint32_t crc, node_crc, magic; | 93 | uint32_t crc, node_crc, magic; |
| @@ -121,6 +123,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | |||
| 121 | node_len > c->ranges[type].max_len) | 123 | node_len > c->ranges[type].max_len) |
| 122 | goto out_len; | 124 | goto out_len; |
| 123 | 125 | ||
| 126 | if (!chk_crc && type == UBIFS_DATA_NODE && !c->always_chk_crc) | ||
| 127 | if (c->no_chk_data_crc) | ||
| 128 | return 0; | ||
| 129 | |||
| 124 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | 130 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); |
| 125 | node_crc = le32_to_cpu(ch->crc); | 131 | node_crc = le32_to_cpu(ch->crc); |
| 126 | if (crc != node_crc) { | 132 | if (crc != node_crc) { |
| @@ -722,7 +728,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | |||
| 722 | goto out; | 728 | goto out; |
| 723 | } | 729 | } |
| 724 | 730 | ||
| 725 | err = ubifs_check_node(c, buf, lnum, offs, 0); | 731 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
| 726 | if (err) { | 732 | if (err) { |
| 727 | ubifs_err("expected node type %d", type); | 733 | ubifs_err("expected node type %d", type); |
| 728 | return err; | 734 | return err; |
| @@ -781,7 +787,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, | |||
| 781 | goto out; | 787 | goto out; |
| 782 | } | 788 | } |
| 783 | 789 | ||
| 784 | err = ubifs_check_node(c, buf, lnum, offs, 0); | 790 | err = ubifs_check_node(c, buf, lnum, offs, 0, 0); |
| 785 | if (err) { | 791 | if (err) { |
| 786 | ubifs_err("expected node type %d", type); | 792 | ubifs_err("expected node type %d", type); |
| 787 | return err; | 793 | return err; |
diff --git a/fs/ubifs/key.h b/fs/ubifs/key.h index 8f7476007549..9ee65086f627 100644 --- a/fs/ubifs/key.h +++ b/fs/ubifs/key.h | |||
| @@ -484,7 +484,7 @@ static inline void key_copy(const struct ubifs_info *c, | |||
| 484 | * @key2: the second key to compare | 484 | * @key2: the second key to compare |
| 485 | * | 485 | * |
| 486 | * This function compares 2 keys and returns %-1 if @key1 is less than | 486 | * This function compares 2 keys and returns %-1 if @key1 is less than |
| 487 | * @key2, 0 if the keys are equivalent and %1 if @key1 is greater than @key2. | 487 | * @key2, %0 if the keys are equivalent and %1 if @key1 is greater than @key2. |
| 488 | */ | 488 | */ |
| 489 | static inline int keys_cmp(const struct ubifs_info *c, | 489 | static inline int keys_cmp(const struct ubifs_info *c, |
| 490 | const union ubifs_key *key1, | 490 | const union ubifs_key *key1, |
| @@ -503,6 +503,26 @@ static inline int keys_cmp(const struct ubifs_info *c, | |||
| 503 | } | 503 | } |
| 504 | 504 | ||
| 505 | /** | 505 | /** |
| 506 | * keys_eq - determine if keys are equivalent. | ||
| 507 | * @c: UBIFS file-system description object | ||
| 508 | * @key1: the first key to compare | ||
| 509 | * @key2: the second key to compare | ||
| 510 | * | ||
| 511 | * This function compares 2 keys and returns %1 if @key1 is equal to @key2 and | ||
| 512 | * %0 if not. | ||
| 513 | */ | ||
| 514 | static inline int keys_eq(const struct ubifs_info *c, | ||
| 515 | const union ubifs_key *key1, | ||
| 516 | const union ubifs_key *key2) | ||
| 517 | { | ||
| 518 | if (key1->u32[0] != key2->u32[0]) | ||
| 519 | return 0; | ||
| 520 | if (key1->u32[1] != key2->u32[1]) | ||
| 521 | return 0; | ||
| 522 | return 1; | ||
| 523 | } | ||
| 524 | |||
| 525 | /** | ||
| 506 | * is_hash_key - is a key vulnerable to hash collisions. | 526 | * is_hash_key - is a key vulnerable to hash collisions. |
| 507 | * @c: UBIFS file-system description object | 527 | * @c: UBIFS file-system description object |
| 508 | * @key: key | 528 | * @key: key |
diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 2ba93da71b65..f27176e9b70d 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c | |||
| @@ -125,6 +125,7 @@ static void adjust_lpt_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, | |||
| 125 | } | 125 | } |
| 126 | } | 126 | } |
| 127 | } | 127 | } |
| 128 | |||
| 128 | /* Not greater than parent, so compare to children */ | 129 | /* Not greater than parent, so compare to children */ |
| 129 | while (1) { | 130 | while (1) { |
| 130 | /* Compare to left child */ | 131 | /* Compare to left child */ |
| @@ -460,18 +461,6 @@ static void change_category(struct ubifs_info *c, struct ubifs_lprops *lprops) | |||
| 460 | } | 461 | } |
| 461 | 462 | ||
| 462 | /** | 463 | /** |
| 463 | * ubifs_get_lprops - get reference to LEB properties. | ||
| 464 | * @c: the UBIFS file-system description object | ||
| 465 | * | ||
| 466 | * This function locks lprops. Lprops have to be unlocked by | ||
| 467 | * 'ubifs_release_lprops()'. | ||
| 468 | */ | ||
| 469 | void ubifs_get_lprops(struct ubifs_info *c) | ||
| 470 | { | ||
| 471 | mutex_lock(&c->lp_mutex); | ||
| 472 | } | ||
| 473 | |||
| 474 | /** | ||
| 475 | * calc_dark - calculate LEB dark space size. | 464 | * calc_dark - calculate LEB dark space size. |
| 476 | * @c: the UBIFS file-system description object | 465 | * @c: the UBIFS file-system description object |
| 477 | * @spc: amount of free and dirty space in the LEB | 466 | * @spc: amount of free and dirty space in the LEB |
| @@ -576,7 +565,6 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
| 576 | ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); | 565 | ubifs_assert(!(lprops->free & 7) && !(lprops->dirty & 7)); |
| 577 | 566 | ||
| 578 | spin_lock(&c->space_lock); | 567 | spin_lock(&c->space_lock); |
| 579 | |||
| 580 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) | 568 | if ((lprops->flags & LPROPS_TAKEN) && lprops->free == c->leb_size) |
| 581 | c->lst.taken_empty_lebs -= 1; | 569 | c->lst.taken_empty_lebs -= 1; |
| 582 | 570 | ||
| @@ -637,31 +625,12 @@ const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | |||
| 637 | c->lst.taken_empty_lebs += 1; | 625 | c->lst.taken_empty_lebs += 1; |
| 638 | 626 | ||
| 639 | change_category(c, lprops); | 627 | change_category(c, lprops); |
| 640 | |||
| 641 | c->idx_gc_cnt += idx_gc_cnt; | 628 | c->idx_gc_cnt += idx_gc_cnt; |
| 642 | |||
| 643 | spin_unlock(&c->space_lock); | 629 | spin_unlock(&c->space_lock); |
| 644 | |||
| 645 | return lprops; | 630 | return lprops; |
| 646 | } | 631 | } |
| 647 | 632 | ||
| 648 | /** | 633 | /** |
| 649 | * ubifs_release_lprops - release lprops lock. | ||
| 650 | * @c: the UBIFS file-system description object | ||
| 651 | * | ||
| 652 | * This function has to be called after each 'ubifs_get_lprops()' call to | ||
| 653 | * unlock lprops. | ||
| 654 | */ | ||
| 655 | void ubifs_release_lprops(struct ubifs_info *c) | ||
| 656 | { | ||
| 657 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 658 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
| 659 | c->lst.empty_lebs <= c->main_lebs); | ||
| 660 | |||
| 661 | mutex_unlock(&c->lp_mutex); | ||
| 662 | } | ||
| 663 | |||
| 664 | /** | ||
| 665 | * ubifs_get_lp_stats - get lprops statistics. | 634 | * ubifs_get_lp_stats - get lprops statistics. |
| 666 | * @c: UBIFS file-system description object | 635 | * @c: UBIFS file-system description object |
| 667 | * @st: return statistics | 636 | * @st: return statistics |
| @@ -1262,7 +1231,6 @@ static int scan_check_cb(struct ubifs_info *c, | |||
| 1262 | } | 1231 | } |
| 1263 | 1232 | ||
| 1264 | ubifs_scan_destroy(sleb); | 1233 | ubifs_scan_destroy(sleb); |
| 1265 | |||
| 1266 | return LPT_SCAN_CONTINUE; | 1234 | return LPT_SCAN_CONTINUE; |
| 1267 | 1235 | ||
| 1268 | out_print: | 1236 | out_print: |
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 9ff2463177e5..db8bd0e518b2 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c | |||
| @@ -109,7 +109,8 @@ static void do_calc_lpt_geom(struct ubifs_info *c) | |||
| 109 | c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; | 109 | c->lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; |
| 110 | c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; | 110 | c->lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; |
| 111 | c->lpt_sz += c->ltab_sz; | 111 | c->lpt_sz += c->ltab_sz; |
| 112 | c->lpt_sz += c->lsave_sz; | 112 | if (c->big_lpt) |
| 113 | c->lpt_sz += c->lsave_sz; | ||
| 113 | 114 | ||
| 114 | /* Add wastage */ | 115 | /* Add wastage */ |
| 115 | sz = c->lpt_sz; | 116 | sz = c->lpt_sz; |
| @@ -287,25 +288,56 @@ uint32_t ubifs_unpack_bits(uint8_t **addr, int *pos, int nrbits) | |||
| 287 | const int k = 32 - nrbits; | 288 | const int k = 32 - nrbits; |
| 288 | uint8_t *p = *addr; | 289 | uint8_t *p = *addr; |
| 289 | int b = *pos; | 290 | int b = *pos; |
| 290 | uint32_t val; | 291 | uint32_t uninitialized_var(val); |
| 292 | const int bytes = (nrbits + b + 7) >> 3; | ||
| 291 | 293 | ||
| 292 | ubifs_assert(nrbits > 0); | 294 | ubifs_assert(nrbits > 0); |
| 293 | ubifs_assert(nrbits <= 32); | 295 | ubifs_assert(nrbits <= 32); |
| 294 | ubifs_assert(*pos >= 0); | 296 | ubifs_assert(*pos >= 0); |
| 295 | ubifs_assert(*pos < 8); | 297 | ubifs_assert(*pos < 8); |
| 296 | if (b) { | 298 | if (b) { |
| 297 | val = p[1] | ((uint32_t)p[2] << 8) | ((uint32_t)p[3] << 16) | | 299 | switch (bytes) { |
| 298 | ((uint32_t)p[4] << 24); | 300 | case 2: |
| 301 | val = p[1]; | ||
| 302 | break; | ||
| 303 | case 3: | ||
| 304 | val = p[1] | ((uint32_t)p[2] << 8); | ||
| 305 | break; | ||
| 306 | case 4: | ||
| 307 | val = p[1] | ((uint32_t)p[2] << 8) | | ||
| 308 | ((uint32_t)p[3] << 16); | ||
| 309 | break; | ||
| 310 | case 5: | ||
| 311 | val = p[1] | ((uint32_t)p[2] << 8) | | ||
| 312 | ((uint32_t)p[3] << 16) | | ||
| 313 | ((uint32_t)p[4] << 24); | ||
| 314 | } | ||
| 299 | val <<= (8 - b); | 315 | val <<= (8 - b); |
| 300 | val |= *p >> b; | 316 | val |= *p >> b; |
| 301 | nrbits += b; | 317 | nrbits += b; |
| 302 | } else | 318 | } else { |
| 303 | val = p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | | 319 | switch (bytes) { |
| 304 | ((uint32_t)p[3] << 24); | 320 | case 1: |
| 321 | val = p[0]; | ||
| 322 | break; | ||
| 323 | case 2: | ||
| 324 | val = p[0] | ((uint32_t)p[1] << 8); | ||
| 325 | break; | ||
| 326 | case 3: | ||
| 327 | val = p[0] | ((uint32_t)p[1] << 8) | | ||
| 328 | ((uint32_t)p[2] << 16); | ||
| 329 | break; | ||
| 330 | case 4: | ||
| 331 | val = p[0] | ((uint32_t)p[1] << 8) | | ||
| 332 | ((uint32_t)p[2] << 16) | | ||
| 333 | ((uint32_t)p[3] << 24); | ||
| 334 | break; | ||
| 335 | } | ||
| 336 | } | ||
| 305 | val <<= k; | 337 | val <<= k; |
| 306 | val >>= k; | 338 | val >>= k; |
| 307 | b = nrbits & 7; | 339 | b = nrbits & 7; |
| 308 | p += nrbits / 8; | 340 | p += nrbits >> 3; |
| 309 | *addr = p; | 341 | *addr = p; |
| 310 | *pos = b; | 342 | *pos = b; |
| 311 | ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); | 343 | ubifs_assert((val >> nrbits) == 0 || nrbits - b == 32); |
diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index 5f0b83e20af6..eed5a0025d63 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c | |||
| @@ -177,8 +177,6 @@ static int alloc_lpt_leb(struct ubifs_info *c, int *lnum) | |||
| 177 | return 0; | 177 | return 0; |
| 178 | } | 178 | } |
| 179 | } | 179 | } |
| 180 | dbg_err("last LEB %d", *lnum); | ||
| 181 | dump_stack(); | ||
| 182 | return -ENOSPC; | 180 | return -ENOSPC; |
| 183 | } | 181 | } |
| 184 | 182 | ||
| @@ -193,6 +191,9 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 193 | int lnum, offs, len, alen, done_lsave, done_ltab, err; | 191 | int lnum, offs, len, alen, done_lsave, done_ltab, err; |
| 194 | struct ubifs_cnode *cnode; | 192 | struct ubifs_cnode *cnode; |
| 195 | 193 | ||
| 194 | err = dbg_chk_lpt_sz(c, 0, 0); | ||
| 195 | if (err) | ||
| 196 | return err; | ||
| 196 | cnode = c->lpt_cnext; | 197 | cnode = c->lpt_cnext; |
| 197 | if (!cnode) | 198 | if (!cnode) |
| 198 | return 0; | 199 | return 0; |
| @@ -206,6 +207,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 206 | c->lsave_lnum = lnum; | 207 | c->lsave_lnum = lnum; |
| 207 | c->lsave_offs = offs; | 208 | c->lsave_offs = offs; |
| 208 | offs += c->lsave_sz; | 209 | offs += c->lsave_sz; |
| 210 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 209 | } | 211 | } |
| 210 | 212 | ||
| 211 | if (offs + c->ltab_sz <= c->leb_size) { | 213 | if (offs + c->ltab_sz <= c->leb_size) { |
| @@ -213,6 +215,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 213 | c->ltab_lnum = lnum; | 215 | c->ltab_lnum = lnum; |
| 214 | c->ltab_offs = offs; | 216 | c->ltab_offs = offs; |
| 215 | offs += c->ltab_sz; | 217 | offs += c->ltab_sz; |
| 218 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 216 | } | 219 | } |
| 217 | 220 | ||
| 218 | do { | 221 | do { |
| @@ -226,9 +229,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 226 | while (offs + len > c->leb_size) { | 229 | while (offs + len > c->leb_size) { |
| 227 | alen = ALIGN(offs, c->min_io_size); | 230 | alen = ALIGN(offs, c->min_io_size); |
| 228 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 231 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
| 232 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
| 229 | err = alloc_lpt_leb(c, &lnum); | 233 | err = alloc_lpt_leb(c, &lnum); |
| 230 | if (err) | 234 | if (err) |
| 231 | return err; | 235 | goto no_space; |
| 232 | offs = 0; | 236 | offs = 0; |
| 233 | ubifs_assert(lnum >= c->lpt_first && | 237 | ubifs_assert(lnum >= c->lpt_first && |
| 234 | lnum <= c->lpt_last); | 238 | lnum <= c->lpt_last); |
| @@ -238,6 +242,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 238 | c->lsave_lnum = lnum; | 242 | c->lsave_lnum = lnum; |
| 239 | c->lsave_offs = offs; | 243 | c->lsave_offs = offs; |
| 240 | offs += c->lsave_sz; | 244 | offs += c->lsave_sz; |
| 245 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 241 | continue; | 246 | continue; |
| 242 | } | 247 | } |
| 243 | if (!done_ltab) { | 248 | if (!done_ltab) { |
| @@ -245,6 +250,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 245 | c->ltab_lnum = lnum; | 250 | c->ltab_lnum = lnum; |
| 246 | c->ltab_offs = offs; | 251 | c->ltab_offs = offs; |
| 247 | offs += c->ltab_sz; | 252 | offs += c->ltab_sz; |
| 253 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 248 | continue; | 254 | continue; |
| 249 | } | 255 | } |
| 250 | break; | 256 | break; |
| @@ -257,6 +263,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 257 | c->lpt_offs = offs; | 263 | c->lpt_offs = offs; |
| 258 | } | 264 | } |
| 259 | offs += len; | 265 | offs += len; |
| 266 | dbg_chk_lpt_sz(c, 1, len); | ||
| 260 | cnode = cnode->cnext; | 267 | cnode = cnode->cnext; |
| 261 | } while (cnode && cnode != c->lpt_cnext); | 268 | } while (cnode && cnode != c->lpt_cnext); |
| 262 | 269 | ||
| @@ -265,9 +272,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 265 | if (offs + c->lsave_sz > c->leb_size) { | 272 | if (offs + c->lsave_sz > c->leb_size) { |
| 266 | alen = ALIGN(offs, c->min_io_size); | 273 | alen = ALIGN(offs, c->min_io_size); |
| 267 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 274 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
| 275 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
| 268 | err = alloc_lpt_leb(c, &lnum); | 276 | err = alloc_lpt_leb(c, &lnum); |
| 269 | if (err) | 277 | if (err) |
| 270 | return err; | 278 | goto no_space; |
| 271 | offs = 0; | 279 | offs = 0; |
| 272 | ubifs_assert(lnum >= c->lpt_first && | 280 | ubifs_assert(lnum >= c->lpt_first && |
| 273 | lnum <= c->lpt_last); | 281 | lnum <= c->lpt_last); |
| @@ -276,6 +284,7 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 276 | c->lsave_lnum = lnum; | 284 | c->lsave_lnum = lnum; |
| 277 | c->lsave_offs = offs; | 285 | c->lsave_offs = offs; |
| 278 | offs += c->lsave_sz; | 286 | offs += c->lsave_sz; |
| 287 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 279 | } | 288 | } |
| 280 | 289 | ||
| 281 | /* Make sure to place LPT's own lprops table */ | 290 | /* Make sure to place LPT's own lprops table */ |
| @@ -283,9 +292,10 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 283 | if (offs + c->ltab_sz > c->leb_size) { | 292 | if (offs + c->ltab_sz > c->leb_size) { |
| 284 | alen = ALIGN(offs, c->min_io_size); | 293 | alen = ALIGN(offs, c->min_io_size); |
| 285 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 294 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
| 295 | dbg_chk_lpt_sz(c, 2, alen - offs); | ||
| 286 | err = alloc_lpt_leb(c, &lnum); | 296 | err = alloc_lpt_leb(c, &lnum); |
| 287 | if (err) | 297 | if (err) |
| 288 | return err; | 298 | goto no_space; |
| 289 | offs = 0; | 299 | offs = 0; |
| 290 | ubifs_assert(lnum >= c->lpt_first && | 300 | ubifs_assert(lnum >= c->lpt_first && |
| 291 | lnum <= c->lpt_last); | 301 | lnum <= c->lpt_last); |
| @@ -294,11 +304,23 @@ static int layout_cnodes(struct ubifs_info *c) | |||
| 294 | c->ltab_lnum = lnum; | 304 | c->ltab_lnum = lnum; |
| 295 | c->ltab_offs = offs; | 305 | c->ltab_offs = offs; |
| 296 | offs += c->ltab_sz; | 306 | offs += c->ltab_sz; |
| 307 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 297 | } | 308 | } |
| 298 | 309 | ||
| 299 | alen = ALIGN(offs, c->min_io_size); | 310 | alen = ALIGN(offs, c->min_io_size); |
| 300 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); | 311 | upd_ltab(c, lnum, c->leb_size - alen, alen - offs); |
| 312 | dbg_chk_lpt_sz(c, 4, alen - offs); | ||
| 313 | err = dbg_chk_lpt_sz(c, 3, alen); | ||
| 314 | if (err) | ||
| 315 | return err; | ||
| 301 | return 0; | 316 | return 0; |
| 317 | |||
| 318 | no_space: | ||
| 319 | ubifs_err("LPT out of space"); | ||
| 320 | dbg_err("LPT out of space at LEB %d:%d needing %d, done_ltab %d, " | ||
| 321 | "done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | ||
| 322 | dbg_dump_lpt_info(c); | ||
| 323 | return err; | ||
| 302 | } | 324 | } |
| 303 | 325 | ||
| 304 | /** | 326 | /** |
| @@ -333,8 +355,6 @@ static int realloc_lpt_leb(struct ubifs_info *c, int *lnum) | |||
| 333 | *lnum = i + c->lpt_first; | 355 | *lnum = i + c->lpt_first; |
| 334 | return 0; | 356 | return 0; |
| 335 | } | 357 | } |
| 336 | dbg_err("last LEB %d", *lnum); | ||
| 337 | dump_stack(); | ||
| 338 | return -ENOSPC; | 358 | return -ENOSPC; |
| 339 | } | 359 | } |
| 340 | 360 | ||
| @@ -369,12 +389,14 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 369 | done_lsave = 1; | 389 | done_lsave = 1; |
| 370 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 390 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
| 371 | offs += c->lsave_sz; | 391 | offs += c->lsave_sz; |
| 392 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 372 | } | 393 | } |
| 373 | 394 | ||
| 374 | if (offs + c->ltab_sz <= c->leb_size) { | 395 | if (offs + c->ltab_sz <= c->leb_size) { |
| 375 | done_ltab = 1; | 396 | done_ltab = 1; |
| 376 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 397 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
| 377 | offs += c->ltab_sz; | 398 | offs += c->ltab_sz; |
| 399 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 378 | } | 400 | } |
| 379 | 401 | ||
| 380 | /* Loop for each cnode */ | 402 | /* Loop for each cnode */ |
| @@ -392,10 +414,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 392 | alen, UBI_SHORTTERM); | 414 | alen, UBI_SHORTTERM); |
| 393 | if (err) | 415 | if (err) |
| 394 | return err; | 416 | return err; |
| 417 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
| 395 | } | 418 | } |
| 419 | dbg_chk_lpt_sz(c, 2, 0); | ||
| 396 | err = realloc_lpt_leb(c, &lnum); | 420 | err = realloc_lpt_leb(c, &lnum); |
| 397 | if (err) | 421 | if (err) |
| 398 | return err; | 422 | goto no_space; |
| 399 | offs = 0; | 423 | offs = 0; |
| 400 | from = 0; | 424 | from = 0; |
| 401 | ubifs_assert(lnum >= c->lpt_first && | 425 | ubifs_assert(lnum >= c->lpt_first && |
| @@ -408,12 +432,14 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 408 | done_lsave = 1; | 432 | done_lsave = 1; |
| 409 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 433 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
| 410 | offs += c->lsave_sz; | 434 | offs += c->lsave_sz; |
| 435 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 411 | continue; | 436 | continue; |
| 412 | } | 437 | } |
| 413 | if (!done_ltab) { | 438 | if (!done_ltab) { |
| 414 | done_ltab = 1; | 439 | done_ltab = 1; |
| 415 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 440 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
| 416 | offs += c->ltab_sz; | 441 | offs += c->ltab_sz; |
| 442 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 417 | continue; | 443 | continue; |
| 418 | } | 444 | } |
| 419 | break; | 445 | break; |
| @@ -435,6 +461,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 435 | clear_bit(COW_ZNODE, &cnode->flags); | 461 | clear_bit(COW_ZNODE, &cnode->flags); |
| 436 | smp_mb__after_clear_bit(); | 462 | smp_mb__after_clear_bit(); |
| 437 | offs += len; | 463 | offs += len; |
| 464 | dbg_chk_lpt_sz(c, 1, len); | ||
| 438 | cnode = cnode->cnext; | 465 | cnode = cnode->cnext; |
| 439 | } while (cnode && cnode != c->lpt_cnext); | 466 | } while (cnode && cnode != c->lpt_cnext); |
| 440 | 467 | ||
| @@ -448,9 +475,10 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 448 | UBI_SHORTTERM); | 475 | UBI_SHORTTERM); |
| 449 | if (err) | 476 | if (err) |
| 450 | return err; | 477 | return err; |
| 478 | dbg_chk_lpt_sz(c, 2, alen - wlen); | ||
| 451 | err = realloc_lpt_leb(c, &lnum); | 479 | err = realloc_lpt_leb(c, &lnum); |
| 452 | if (err) | 480 | if (err) |
| 453 | return err; | 481 | goto no_space; |
| 454 | offs = 0; | 482 | offs = 0; |
| 455 | ubifs_assert(lnum >= c->lpt_first && | 483 | ubifs_assert(lnum >= c->lpt_first && |
| 456 | lnum <= c->lpt_last); | 484 | lnum <= c->lpt_last); |
| @@ -461,6 +489,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 461 | done_lsave = 1; | 489 | done_lsave = 1; |
| 462 | ubifs_pack_lsave(c, buf + offs, c->lsave); | 490 | ubifs_pack_lsave(c, buf + offs, c->lsave); |
| 463 | offs += c->lsave_sz; | 491 | offs += c->lsave_sz; |
| 492 | dbg_chk_lpt_sz(c, 1, c->lsave_sz); | ||
| 464 | } | 493 | } |
| 465 | 494 | ||
| 466 | /* Make sure to place LPT's own lprops table */ | 495 | /* Make sure to place LPT's own lprops table */ |
| @@ -473,9 +502,10 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 473 | UBI_SHORTTERM); | 502 | UBI_SHORTTERM); |
| 474 | if (err) | 503 | if (err) |
| 475 | return err; | 504 | return err; |
| 505 | dbg_chk_lpt_sz(c, 2, alen - wlen); | ||
| 476 | err = realloc_lpt_leb(c, &lnum); | 506 | err = realloc_lpt_leb(c, &lnum); |
| 477 | if (err) | 507 | if (err) |
| 478 | return err; | 508 | goto no_space; |
| 479 | offs = 0; | 509 | offs = 0; |
| 480 | ubifs_assert(lnum >= c->lpt_first && | 510 | ubifs_assert(lnum >= c->lpt_first && |
| 481 | lnum <= c->lpt_last); | 511 | lnum <= c->lpt_last); |
| @@ -486,6 +516,7 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 486 | done_ltab = 1; | 516 | done_ltab = 1; |
| 487 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); | 517 | ubifs_pack_ltab(c, buf + offs, c->ltab_cmt); |
| 488 | offs += c->ltab_sz; | 518 | offs += c->ltab_sz; |
| 519 | dbg_chk_lpt_sz(c, 1, c->ltab_sz); | ||
| 489 | } | 520 | } |
| 490 | 521 | ||
| 491 | /* Write remaining data in buffer */ | 522 | /* Write remaining data in buffer */ |
| @@ -495,6 +526,12 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 495 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); | 526 | err = ubifs_leb_write(c, lnum, buf + from, from, alen, UBI_SHORTTERM); |
| 496 | if (err) | 527 | if (err) |
| 497 | return err; | 528 | return err; |
| 529 | |||
| 530 | dbg_chk_lpt_sz(c, 4, alen - wlen); | ||
| 531 | err = dbg_chk_lpt_sz(c, 3, ALIGN(offs, c->min_io_size)); | ||
| 532 | if (err) | ||
| 533 | return err; | ||
| 534 | |||
| 498 | c->nhead_lnum = lnum; | 535 | c->nhead_lnum = lnum; |
| 499 | c->nhead_offs = ALIGN(offs, c->min_io_size); | 536 | c->nhead_offs = ALIGN(offs, c->min_io_size); |
| 500 | 537 | ||
| @@ -503,7 +540,15 @@ static int write_cnodes(struct ubifs_info *c) | |||
| 503 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); | 540 | dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); |
| 504 | if (c->big_lpt) | 541 | if (c->big_lpt) |
| 505 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); | 542 | dbg_lp("LPT lsave is at %d:%d", c->lsave_lnum, c->lsave_offs); |
| 543 | |||
| 506 | return 0; | 544 | return 0; |
| 545 | |||
| 546 | no_space: | ||
| 547 | ubifs_err("LPT out of space mismatch"); | ||
| 548 | dbg_err("LPT out of space mismatch at LEB %d:%d needing %d, done_ltab " | ||
| 549 | "%d, done_lsave %d", lnum, offs, len, done_ltab, done_lsave); | ||
| 550 | dbg_dump_lpt_info(c); | ||
| 551 | return err; | ||
| 507 | } | 552 | } |
| 508 | 553 | ||
| 509 | /** | 554 | /** |
| @@ -1044,6 +1089,8 @@ static int is_a_node(struct ubifs_info *c, uint8_t *buf, int len) | |||
| 1044 | int pos = 0, node_type, node_len; | 1089 | int pos = 0, node_type, node_len; |
| 1045 | uint16_t crc, calc_crc; | 1090 | uint16_t crc, calc_crc; |
| 1046 | 1091 | ||
| 1092 | if (len < UBIFS_LPT_CRC_BYTES + (UBIFS_LPT_TYPE_BITS + 7) / 8) | ||
| 1093 | return 0; | ||
| 1047 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); | 1094 | node_type = ubifs_unpack_bits(&addr, &pos, UBIFS_LPT_TYPE_BITS); |
| 1048 | if (node_type == UBIFS_LPT_NOT_A_NODE) | 1095 | if (node_type == UBIFS_LPT_NOT_A_NODE) |
| 1049 | return 0; | 1096 | return 0; |
| @@ -1156,6 +1203,9 @@ int ubifs_lpt_start_commit(struct ubifs_info *c) | |||
| 1156 | dbg_lp(""); | 1203 | dbg_lp(""); |
| 1157 | 1204 | ||
| 1158 | mutex_lock(&c->lp_mutex); | 1205 | mutex_lock(&c->lp_mutex); |
| 1206 | err = dbg_chk_lpt_free_spc(c); | ||
| 1207 | if (err) | ||
| 1208 | goto out; | ||
| 1159 | err = dbg_check_ltab(c); | 1209 | err = dbg_check_ltab(c); |
| 1160 | if (err) | 1210 | if (err) |
| 1161 | goto out; | 1211 | goto out; |
| @@ -1645,4 +1695,121 @@ int dbg_check_ltab(struct ubifs_info *c) | |||
| 1645 | return 0; | 1695 | return 0; |
| 1646 | } | 1696 | } |
| 1647 | 1697 | ||
| 1698 | /** | ||
| 1699 | * dbg_chk_lpt_free_spc - check LPT free space is enough to write entire LPT. | ||
| 1700 | * @c: the UBIFS file-system description object | ||
| 1701 | * | ||
| 1702 | * This function returns %0 on success and a negative error code on failure. | ||
| 1703 | */ | ||
| 1704 | int dbg_chk_lpt_free_spc(struct ubifs_info *c) | ||
| 1705 | { | ||
| 1706 | long long free = 0; | ||
| 1707 | int i; | ||
| 1708 | |||
| 1709 | for (i = 0; i < c->lpt_lebs; i++) { | ||
| 1710 | if (c->ltab[i].tgc || c->ltab[i].cmt) | ||
| 1711 | continue; | ||
| 1712 | if (i + c->lpt_first == c->nhead_lnum) | ||
| 1713 | free += c->leb_size - c->nhead_offs; | ||
| 1714 | else if (c->ltab[i].free == c->leb_size) | ||
| 1715 | free += c->leb_size; | ||
| 1716 | } | ||
| 1717 | if (free < c->lpt_sz) { | ||
| 1718 | dbg_err("LPT space error: free %lld lpt_sz %lld", | ||
| 1719 | free, c->lpt_sz); | ||
| 1720 | dbg_dump_lpt_info(c); | ||
| 1721 | return -EINVAL; | ||
| 1722 | } | ||
| 1723 | return 0; | ||
| 1724 | } | ||
| 1725 | |||
| 1726 | /** | ||
| 1727 | * dbg_chk_lpt_sz - check LPT does not write more than LPT size. | ||
| 1728 | * @c: the UBIFS file-system description object | ||
| 1729 | * @action: action | ||
| 1730 | * @len: length written | ||
| 1731 | * | ||
| 1732 | * This function returns %0 on success and a negative error code on failure. | ||
| 1733 | */ | ||
| 1734 | int dbg_chk_lpt_sz(struct ubifs_info *c, int action, int len) | ||
| 1735 | { | ||
| 1736 | long long chk_lpt_sz, lpt_sz; | ||
| 1737 | int err = 0; | ||
| 1738 | |||
| 1739 | switch (action) { | ||
| 1740 | case 0: | ||
| 1741 | c->chk_lpt_sz = 0; | ||
| 1742 | c->chk_lpt_sz2 = 0; | ||
| 1743 | c->chk_lpt_lebs = 0; | ||
| 1744 | c->chk_lpt_wastage = 0; | ||
| 1745 | if (c->dirty_pn_cnt > c->pnode_cnt) { | ||
| 1746 | dbg_err("dirty pnodes %d exceed max %d", | ||
| 1747 | c->dirty_pn_cnt, c->pnode_cnt); | ||
| 1748 | err = -EINVAL; | ||
| 1749 | } | ||
| 1750 | if (c->dirty_nn_cnt > c->nnode_cnt) { | ||
| 1751 | dbg_err("dirty nnodes %d exceed max %d", | ||
| 1752 | c->dirty_nn_cnt, c->nnode_cnt); | ||
| 1753 | err = -EINVAL; | ||
| 1754 | } | ||
| 1755 | return err; | ||
| 1756 | case 1: | ||
| 1757 | c->chk_lpt_sz += len; | ||
| 1758 | return 0; | ||
| 1759 | case 2: | ||
| 1760 | c->chk_lpt_sz += len; | ||
| 1761 | c->chk_lpt_wastage += len; | ||
| 1762 | c->chk_lpt_lebs += 1; | ||
| 1763 | return 0; | ||
| 1764 | case 3: | ||
| 1765 | chk_lpt_sz = c->leb_size; | ||
| 1766 | chk_lpt_sz *= c->chk_lpt_lebs; | ||
| 1767 | chk_lpt_sz += len - c->nhead_offs; | ||
| 1768 | if (c->chk_lpt_sz != chk_lpt_sz) { | ||
| 1769 | dbg_err("LPT wrote %lld but space used was %lld", | ||
| 1770 | c->chk_lpt_sz, chk_lpt_sz); | ||
| 1771 | err = -EINVAL; | ||
| 1772 | } | ||
| 1773 | if (c->chk_lpt_sz > c->lpt_sz) { | ||
| 1774 | dbg_err("LPT wrote %lld but lpt_sz is %lld", | ||
| 1775 | c->chk_lpt_sz, c->lpt_sz); | ||
| 1776 | err = -EINVAL; | ||
| 1777 | } | ||
| 1778 | if (c->chk_lpt_sz2 && c->chk_lpt_sz != c->chk_lpt_sz2) { | ||
| 1779 | dbg_err("LPT layout size %lld but wrote %lld", | ||
| 1780 | c->chk_lpt_sz, c->chk_lpt_sz2); | ||
| 1781 | err = -EINVAL; | ||
| 1782 | } | ||
| 1783 | if (c->chk_lpt_sz2 && c->new_nhead_offs != len) { | ||
| 1784 | dbg_err("LPT new nhead offs: expected %d was %d", | ||
| 1785 | c->new_nhead_offs, len); | ||
| 1786 | err = -EINVAL; | ||
| 1787 | } | ||
| 1788 | lpt_sz = (long long)c->pnode_cnt * c->pnode_sz; | ||
| 1789 | lpt_sz += (long long)c->nnode_cnt * c->nnode_sz; | ||
| 1790 | lpt_sz += c->ltab_sz; | ||
| 1791 | if (c->big_lpt) | ||
| 1792 | lpt_sz += c->lsave_sz; | ||
| 1793 | if (c->chk_lpt_sz - c->chk_lpt_wastage > lpt_sz) { | ||
| 1794 | dbg_err("LPT chk_lpt_sz %lld + waste %lld exceeds %lld", | ||
| 1795 | c->chk_lpt_sz, c->chk_lpt_wastage, lpt_sz); | ||
| 1796 | err = -EINVAL; | ||
| 1797 | } | ||
| 1798 | if (err) | ||
| 1799 | dbg_dump_lpt_info(c); | ||
| 1800 | c->chk_lpt_sz2 = c->chk_lpt_sz; | ||
| 1801 | c->chk_lpt_sz = 0; | ||
| 1802 | c->chk_lpt_wastage = 0; | ||
| 1803 | c->chk_lpt_lebs = 0; | ||
| 1804 | c->new_nhead_offs = len; | ||
| 1805 | return err; | ||
| 1806 | case 4: | ||
| 1807 | c->chk_lpt_sz += len; | ||
| 1808 | c->chk_lpt_wastage += len; | ||
| 1809 | return 0; | ||
| 1810 | default: | ||
| 1811 | return -EINVAL; | ||
| 1812 | } | ||
| 1813 | } | ||
| 1814 | |||
| 1648 | #endif /* CONFIG_UBIFS_FS_DEBUG */ | 1815 | #endif /* CONFIG_UBIFS_FS_DEBUG */ |
diff --git a/fs/ubifs/misc.h b/fs/ubifs/misc.h index 4c12a9215d7f..4fa81d867e41 100644 --- a/fs/ubifs/misc.h +++ b/fs/ubifs/misc.h | |||
| @@ -310,4 +310,31 @@ static inline int ubifs_tnc_lookup(struct ubifs_info *c, | |||
| 310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); | 310 | return ubifs_tnc_locate(c, key, node, NULL, NULL); |
| 311 | } | 311 | } |
| 312 | 312 | ||
| 313 | /** | ||
| 314 | * ubifs_get_lprops - get reference to LEB properties. | ||
| 315 | * @c: the UBIFS file-system description object | ||
| 316 | * | ||
| 317 | * This function locks lprops. Lprops have to be unlocked by | ||
| 318 | * 'ubifs_release_lprops()'. | ||
| 319 | */ | ||
| 320 | static inline void ubifs_get_lprops(struct ubifs_info *c) | ||
| 321 | { | ||
| 322 | mutex_lock(&c->lp_mutex); | ||
| 323 | } | ||
| 324 | |||
| 325 | /** | ||
| 326 | * ubifs_release_lprops - release lprops lock. | ||
| 327 | * @c: the UBIFS file-system description object | ||
| 328 | * | ||
| 329 | * This function has to be called after each 'ubifs_get_lprops()' call to | ||
| 330 | * unlock lprops. | ||
| 331 | */ | ||
| 332 | static inline void ubifs_release_lprops(struct ubifs_info *c) | ||
| 333 | { | ||
| 334 | ubifs_assert(mutex_is_locked(&c->lp_mutex)); | ||
| 335 | ubifs_assert(c->lst.empty_lebs >= 0 && | ||
| 336 | c->lst.empty_lebs <= c->main_lebs); | ||
| 337 | mutex_unlock(&c->lp_mutex); | ||
| 338 | } | ||
| 339 | |||
| 313 | #endif /* __UBIFS_MISC_H__ */ | 340 | #endif /* __UBIFS_MISC_H__ */ |
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index acf5c5fffc60..0ed82479b44b 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c | |||
| @@ -87,7 +87,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, | |||
| 87 | 87 | ||
| 88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); | 88 | dbg_scan("scanning %s", dbg_ntype(ch->node_type)); |
| 89 | 89 | ||
| 90 | if (ubifs_check_node(c, buf, lnum, offs, quiet)) | 90 | if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) |
| 91 | return SCANNED_A_CORRUPT_NODE; | 91 | return SCANNED_A_CORRUPT_NODE; |
| 92 | 92 | ||
| 93 | if (ch->node_type == UBIFS_PAD_NODE) { | 93 | if (ch->node_type == UBIFS_PAD_NODE) { |
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 3f4902060c7a..8780efbf40ac 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c | |||
| @@ -401,6 +401,16 @@ static int ubifs_show_options(struct seq_file *s, struct vfsmount *mnt) | |||
| 401 | else if (c->mount_opts.unmount_mode == 1) | 401 | else if (c->mount_opts.unmount_mode == 1) |
| 402 | seq_printf(s, ",norm_unmount"); | 402 | seq_printf(s, ",norm_unmount"); |
| 403 | 403 | ||
| 404 | if (c->mount_opts.bulk_read == 2) | ||
| 405 | seq_printf(s, ",bulk_read"); | ||
| 406 | else if (c->mount_opts.bulk_read == 1) | ||
| 407 | seq_printf(s, ",no_bulk_read"); | ||
| 408 | |||
| 409 | if (c->mount_opts.chk_data_crc == 2) | ||
| 410 | seq_printf(s, ",chk_data_crc"); | ||
| 411 | else if (c->mount_opts.chk_data_crc == 1) | ||
| 412 | seq_printf(s, ",no_chk_data_crc"); | ||
| 413 | |||
| 404 | return 0; | 414 | return 0; |
| 405 | } | 415 | } |
| 406 | 416 | ||
| @@ -408,13 +418,26 @@ static int ubifs_sync_fs(struct super_block *sb, int wait) | |||
| 408 | { | 418 | { |
| 409 | struct ubifs_info *c = sb->s_fs_info; | 419 | struct ubifs_info *c = sb->s_fs_info; |
| 410 | int i, ret = 0, err; | 420 | int i, ret = 0, err; |
| 421 | long long bud_bytes; | ||
| 411 | 422 | ||
| 412 | if (c->jheads) | 423 | if (c->jheads) { |
| 413 | for (i = 0; i < c->jhead_cnt; i++) { | 424 | for (i = 0; i < c->jhead_cnt; i++) { |
| 414 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); | 425 | err = ubifs_wbuf_sync(&c->jheads[i].wbuf); |
| 415 | if (err && !ret) | 426 | if (err && !ret) |
| 416 | ret = err; | 427 | ret = err; |
| 417 | } | 428 | } |
| 429 | |||
| 430 | /* Commit the journal unless it has too little data */ | ||
| 431 | spin_lock(&c->buds_lock); | ||
| 432 | bud_bytes = c->bud_bytes; | ||
| 433 | spin_unlock(&c->buds_lock); | ||
| 434 | if (bud_bytes > c->leb_size) { | ||
| 435 | err = ubifs_run_commit(c); | ||
| 436 | if (err) | ||
| 437 | return err; | ||
| 438 | } | ||
| 439 | } | ||
| 440 | |||
| 418 | /* | 441 | /* |
| 419 | * We ought to call sync for c->ubi but it does not have one. If it had | 442 | * We ought to call sync for c->ubi but it does not have one. If it had |
| 420 | * it would in turn call mtd->sync, however mtd operations are | 443 | * it would in turn call mtd->sync, however mtd operations are |
| @@ -538,6 +561,18 @@ static int init_constants_early(struct ubifs_info *c) | |||
| 538 | * calculations when reporting free space. | 561 | * calculations when reporting free space. |
| 539 | */ | 562 | */ |
| 540 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; | 563 | c->leb_overhead = c->leb_size % UBIFS_MAX_DATA_NODE_SZ; |
| 564 | /* Buffer size for bulk-reads */ | ||
| 565 | c->bulk_read_buf_size = UBIFS_MAX_BULK_READ * UBIFS_MAX_DATA_NODE_SZ; | ||
| 566 | if (c->bulk_read_buf_size > c->leb_size) | ||
| 567 | c->bulk_read_buf_size = c->leb_size; | ||
| 568 | if (c->bulk_read_buf_size > 128 * 1024) { | ||
| 569 | /* Check if we can kmalloc more than 128KiB */ | ||
| 570 | void *try = kmalloc(c->bulk_read_buf_size, GFP_KERNEL); | ||
| 571 | |||
| 572 | kfree(try); | ||
| 573 | if (!try) | ||
| 574 | c->bulk_read_buf_size = 128 * 1024; | ||
| 575 | } | ||
| 541 | return 0; | 576 | return 0; |
| 542 | } | 577 | } |
| 543 | 578 | ||
| @@ -840,17 +875,29 @@ static int check_volume_empty(struct ubifs_info *c) | |||
| 840 | * | 875 | * |
| 841 | * Opt_fast_unmount: do not run a journal commit before un-mounting | 876 | * Opt_fast_unmount: do not run a journal commit before un-mounting |
| 842 | * Opt_norm_unmount: run a journal commit before un-mounting | 877 | * Opt_norm_unmount: run a journal commit before un-mounting |
| 878 | * Opt_bulk_read: enable bulk-reads | ||
| 879 | * Opt_no_bulk_read: disable bulk-reads | ||
| 880 | * Opt_chk_data_crc: check CRCs when reading data nodes | ||
| 881 | * Opt_no_chk_data_crc: do not check CRCs when reading data nodes | ||
| 843 | * Opt_err: just end of array marker | 882 | * Opt_err: just end of array marker |
| 844 | */ | 883 | */ |
| 845 | enum { | 884 | enum { |
| 846 | Opt_fast_unmount, | 885 | Opt_fast_unmount, |
| 847 | Opt_norm_unmount, | 886 | Opt_norm_unmount, |
| 887 | Opt_bulk_read, | ||
| 888 | Opt_no_bulk_read, | ||
| 889 | Opt_chk_data_crc, | ||
| 890 | Opt_no_chk_data_crc, | ||
| 848 | Opt_err, | 891 | Opt_err, |
| 849 | }; | 892 | }; |
| 850 | 893 | ||
| 851 | static match_table_t tokens = { | 894 | static const match_table_t tokens = { |
| 852 | {Opt_fast_unmount, "fast_unmount"}, | 895 | {Opt_fast_unmount, "fast_unmount"}, |
| 853 | {Opt_norm_unmount, "norm_unmount"}, | 896 | {Opt_norm_unmount, "norm_unmount"}, |
| 897 | {Opt_bulk_read, "bulk_read"}, | ||
| 898 | {Opt_no_bulk_read, "no_bulk_read"}, | ||
| 899 | {Opt_chk_data_crc, "chk_data_crc"}, | ||
| 900 | {Opt_no_chk_data_crc, "no_chk_data_crc"}, | ||
| 854 | {Opt_err, NULL}, | 901 | {Opt_err, NULL}, |
| 855 | }; | 902 | }; |
| 856 | 903 | ||
| @@ -888,6 +935,22 @@ static int ubifs_parse_options(struct ubifs_info *c, char *options, | |||
| 888 | c->mount_opts.unmount_mode = 1; | 935 | c->mount_opts.unmount_mode = 1; |
| 889 | c->fast_unmount = 0; | 936 | c->fast_unmount = 0; |
| 890 | break; | 937 | break; |
| 938 | case Opt_bulk_read: | ||
| 939 | c->mount_opts.bulk_read = 2; | ||
| 940 | c->bulk_read = 1; | ||
| 941 | break; | ||
| 942 | case Opt_no_bulk_read: | ||
| 943 | c->mount_opts.bulk_read = 1; | ||
| 944 | c->bulk_read = 0; | ||
| 945 | break; | ||
| 946 | case Opt_chk_data_crc: | ||
| 947 | c->mount_opts.chk_data_crc = 2; | ||
| 948 | c->no_chk_data_crc = 0; | ||
| 949 | break; | ||
| 950 | case Opt_no_chk_data_crc: | ||
| 951 | c->mount_opts.chk_data_crc = 1; | ||
| 952 | c->no_chk_data_crc = 1; | ||
| 953 | break; | ||
| 891 | default: | 954 | default: |
| 892 | ubifs_err("unrecognized mount option \"%s\" " | 955 | ubifs_err("unrecognized mount option \"%s\" " |
| 893 | "or missing value", p); | 956 | "or missing value", p); |
| @@ -996,6 +1059,8 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 996 | goto out_free; | 1059 | goto out_free; |
| 997 | } | 1060 | } |
| 998 | 1061 | ||
| 1062 | c->always_chk_crc = 1; | ||
| 1063 | |||
| 999 | err = ubifs_read_superblock(c); | 1064 | err = ubifs_read_superblock(c); |
| 1000 | if (err) | 1065 | if (err) |
| 1001 | goto out_free; | 1066 | goto out_free; |
| @@ -1032,8 +1097,6 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1032 | 1097 | ||
| 1033 | /* Create background thread */ | 1098 | /* Create background thread */ |
| 1034 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1099 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
| 1035 | if (!c->bgt) | ||
| 1036 | c->bgt = ERR_PTR(-EINVAL); | ||
| 1037 | if (IS_ERR(c->bgt)) { | 1100 | if (IS_ERR(c->bgt)) { |
| 1038 | err = PTR_ERR(c->bgt); | 1101 | err = PTR_ERR(c->bgt); |
| 1039 | c->bgt = NULL; | 1102 | c->bgt = NULL; |
| @@ -1139,24 +1202,28 @@ static int mount_ubifs(struct ubifs_info *c) | |||
| 1139 | if (err) | 1202 | if (err) |
| 1140 | goto out_infos; | 1203 | goto out_infos; |
| 1141 | 1204 | ||
| 1205 | c->always_chk_crc = 0; | ||
| 1206 | |||
| 1142 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", | 1207 | ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"", |
| 1143 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); | 1208 | c->vi.ubi_num, c->vi.vol_id, c->vi.name); |
| 1144 | if (mounted_read_only) | 1209 | if (mounted_read_only) |
| 1145 | ubifs_msg("mounted read-only"); | 1210 | ubifs_msg("mounted read-only"); |
| 1146 | x = (long long)c->main_lebs * c->leb_size; | 1211 | x = (long long)c->main_lebs * c->leb_size; |
| 1147 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | 1212 | ubifs_msg("file system size: %lld bytes (%lld KiB, %lld MiB, %d " |
| 1148 | x, x >> 10, x >> 20, c->main_lebs); | 1213 | "LEBs)", x, x >> 10, x >> 20, c->main_lebs); |
| 1149 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; | 1214 | x = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; |
| 1150 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d LEBs)", | 1215 | ubifs_msg("journal size: %lld bytes (%lld KiB, %lld MiB, %d " |
| 1151 | x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); | 1216 | "LEBs)", x, x >> 10, x >> 20, c->log_lebs + c->max_bud_cnt); |
| 1152 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | 1217 | ubifs_msg("media format: %d (latest is %d)", |
| 1153 | ubifs_msg("media format %d, latest format %d", | ||
| 1154 | c->fmt_version, UBIFS_FORMAT_VERSION); | 1218 | c->fmt_version, UBIFS_FORMAT_VERSION); |
| 1219 | ubifs_msg("default compressor: %s", ubifs_compr_name(c->default_compr)); | ||
| 1220 | ubifs_msg("reserved for root: %llu bytes (%llu KiB)", | ||
| 1221 | c->report_rp_size, c->report_rp_size >> 10); | ||
| 1155 | 1222 | ||
| 1156 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); | 1223 | dbg_msg("compiled on: " __DATE__ " at " __TIME__); |
| 1157 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); | 1224 | dbg_msg("min. I/O unit size: %d bytes", c->min_io_size); |
| 1158 | dbg_msg("LEB size: %d bytes (%d KiB)", | 1225 | dbg_msg("LEB size: %d bytes (%d KiB)", |
| 1159 | c->leb_size, c->leb_size / 1024); | 1226 | c->leb_size, c->leb_size >> 10); |
| 1160 | dbg_msg("data journal heads: %d", | 1227 | dbg_msg("data journal heads: %d", |
| 1161 | c->jhead_cnt - NONDATA_JHEADS_CNT); | 1228 | c->jhead_cnt - NONDATA_JHEADS_CNT); |
| 1162 | dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" | 1229 | dbg_msg("UUID: %02X%02X%02X%02X-%02X%02X" |
| @@ -1282,6 +1349,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1282 | 1349 | ||
| 1283 | mutex_lock(&c->umount_mutex); | 1350 | mutex_lock(&c->umount_mutex); |
| 1284 | c->remounting_rw = 1; | 1351 | c->remounting_rw = 1; |
| 1352 | c->always_chk_crc = 1; | ||
| 1285 | 1353 | ||
| 1286 | /* Check for enough free space */ | 1354 | /* Check for enough free space */ |
| 1287 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { | 1355 | if (ubifs_calc_available(c, c->min_idx_lebs) <= 0) { |
| @@ -1345,20 +1413,20 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1345 | 1413 | ||
| 1346 | /* Create background thread */ | 1414 | /* Create background thread */ |
| 1347 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); | 1415 | c->bgt = kthread_create(ubifs_bg_thread, c, c->bgt_name); |
| 1348 | if (!c->bgt) | ||
| 1349 | c->bgt = ERR_PTR(-EINVAL); | ||
| 1350 | if (IS_ERR(c->bgt)) { | 1416 | if (IS_ERR(c->bgt)) { |
| 1351 | err = PTR_ERR(c->bgt); | 1417 | err = PTR_ERR(c->bgt); |
| 1352 | c->bgt = NULL; | 1418 | c->bgt = NULL; |
| 1353 | ubifs_err("cannot spawn \"%s\", error %d", | 1419 | ubifs_err("cannot spawn \"%s\", error %d", |
| 1354 | c->bgt_name, err); | 1420 | c->bgt_name, err); |
| 1355 | return err; | 1421 | goto out; |
| 1356 | } | 1422 | } |
| 1357 | wake_up_process(c->bgt); | 1423 | wake_up_process(c->bgt); |
| 1358 | 1424 | ||
| 1359 | c->orph_buf = vmalloc(c->leb_size); | 1425 | c->orph_buf = vmalloc(c->leb_size); |
| 1360 | if (!c->orph_buf) | 1426 | if (!c->orph_buf) { |
| 1361 | return -ENOMEM; | 1427 | err = -ENOMEM; |
| 1428 | goto out; | ||
| 1429 | } | ||
| 1362 | 1430 | ||
| 1363 | /* Check for enough log space */ | 1431 | /* Check for enough log space */ |
| 1364 | lnum = c->lhead_lnum + 1; | 1432 | lnum = c->lhead_lnum + 1; |
| @@ -1385,6 +1453,7 @@ static int ubifs_remount_rw(struct ubifs_info *c) | |||
| 1385 | dbg_gen("re-mounted read-write"); | 1453 | dbg_gen("re-mounted read-write"); |
| 1386 | c->vfs_sb->s_flags &= ~MS_RDONLY; | 1454 | c->vfs_sb->s_flags &= ~MS_RDONLY; |
| 1387 | c->remounting_rw = 0; | 1455 | c->remounting_rw = 0; |
| 1456 | c->always_chk_crc = 0; | ||
| 1388 | mutex_unlock(&c->umount_mutex); | 1457 | mutex_unlock(&c->umount_mutex); |
| 1389 | return 0; | 1458 | return 0; |
| 1390 | 1459 | ||
| @@ -1400,6 +1469,7 @@ out: | |||
| 1400 | c->ileb_buf = NULL; | 1469 | c->ileb_buf = NULL; |
| 1401 | ubifs_lpt_free(c, 1); | 1470 | ubifs_lpt_free(c, 1); |
| 1402 | c->remounting_rw = 0; | 1471 | c->remounting_rw = 0; |
| 1472 | c->always_chk_crc = 0; | ||
| 1403 | mutex_unlock(&c->umount_mutex); | 1473 | mutex_unlock(&c->umount_mutex); |
| 1404 | return err; | 1474 | return err; |
| 1405 | } | 1475 | } |
| @@ -1408,12 +1478,9 @@ out: | |||
| 1408 | * commit_on_unmount - commit the journal when un-mounting. | 1478 | * commit_on_unmount - commit the journal when un-mounting. |
| 1409 | * @c: UBIFS file-system description object | 1479 | * @c: UBIFS file-system description object |
| 1410 | * | 1480 | * |
| 1411 | * This function is called during un-mounting and it commits the journal unless | 1481 | * This function is called during un-mounting and re-mounting, and it commits |
| 1412 | * the "fast unmount" mode is enabled. It also avoids committing the journal if | 1482 | * the journal unless the "fast unmount" mode is enabled. It also avoids |
| 1413 | * it contains too few data. | 1483 | * committing the journal if it contains too few data. |
| 1414 | * | ||
| 1415 | * Sometimes recovery requires the journal to be committed at least once, and | ||
| 1416 | * this function takes care about this. | ||
| 1417 | */ | 1484 | */ |
| 1418 | static void commit_on_unmount(struct ubifs_info *c) | 1485 | static void commit_on_unmount(struct ubifs_info *c) |
| 1419 | { | 1486 | { |
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 7634c5970887..d27fd918b9c9 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c | |||
| @@ -284,7 +284,7 @@ static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c, | |||
| 284 | } | 284 | } |
| 285 | 285 | ||
| 286 | zn = copy_znode(c, znode); | 286 | zn = copy_znode(c, znode); |
| 287 | if (unlikely(IS_ERR(zn))) | 287 | if (IS_ERR(zn)) |
| 288 | return zn; | 288 | return zn; |
| 289 | 289 | ||
| 290 | if (zbr->len) { | 290 | if (zbr->len) { |
| @@ -470,6 +470,10 @@ static int try_read_node(const struct ubifs_info *c, void *buf, int type, | |||
| 470 | if (node_len != len) | 470 | if (node_len != len) |
| 471 | return 0; | 471 | return 0; |
| 472 | 472 | ||
| 473 | if (type == UBIFS_DATA_NODE && !c->always_chk_crc) | ||
| 474 | if (c->no_chk_data_crc) | ||
| 475 | return 0; | ||
| 476 | |||
| 473 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); | 477 | crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8); |
| 474 | node_crc = le32_to_cpu(ch->crc); | 478 | node_crc = le32_to_cpu(ch->crc); |
| 475 | if (crc != node_crc) | 479 | if (crc != node_crc) |
| @@ -1128,7 +1132,7 @@ static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c, | |||
| 1128 | ubifs_assert(znode == c->zroot.znode); | 1132 | ubifs_assert(znode == c->zroot.znode); |
| 1129 | znode = dirty_cow_znode(c, &c->zroot); | 1133 | znode = dirty_cow_znode(c, &c->zroot); |
| 1130 | } | 1134 | } |
| 1131 | if (unlikely(IS_ERR(znode)) || !p) | 1135 | if (IS_ERR(znode) || !p) |
| 1132 | break; | 1136 | break; |
| 1133 | ubifs_assert(path[p - 1] >= 0); | 1137 | ubifs_assert(path[p - 1] >= 0); |
| 1134 | ubifs_assert(path[p - 1] < znode->child_cnt); | 1138 | ubifs_assert(path[p - 1] < znode->child_cnt); |
| @@ -1492,6 +1496,289 @@ out: | |||
| 1492 | } | 1496 | } |
| 1493 | 1497 | ||
| 1494 | /** | 1498 | /** |
| 1499 | * ubifs_tnc_get_bu_keys - lookup keys for bulk-read. | ||
| 1500 | * @c: UBIFS file-system description object | ||
| 1501 | * @bu: bulk-read parameters and results | ||
| 1502 | * | ||
| 1503 | * Lookup consecutive data node keys for the same inode that reside | ||
| 1504 | * consecutively in the same LEB. | ||
| 1505 | */ | ||
| 1506 | int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu) | ||
| 1507 | { | ||
| 1508 | int n, err = 0, lnum = -1, uninitialized_var(offs); | ||
| 1509 | int uninitialized_var(len); | ||
| 1510 | unsigned int block = key_block(c, &bu->key); | ||
| 1511 | struct ubifs_znode *znode; | ||
| 1512 | |||
| 1513 | bu->cnt = 0; | ||
| 1514 | bu->blk_cnt = 0; | ||
| 1515 | bu->eof = 0; | ||
| 1516 | |||
| 1517 | mutex_lock(&c->tnc_mutex); | ||
| 1518 | /* Find first key */ | ||
| 1519 | err = ubifs_lookup_level0(c, &bu->key, &znode, &n); | ||
| 1520 | if (err < 0) | ||
| 1521 | goto out; | ||
| 1522 | if (err) { | ||
| 1523 | /* Key found */ | ||
| 1524 | len = znode->zbranch[n].len; | ||
| 1525 | /* The buffer must be big enough for at least 1 node */ | ||
| 1526 | if (len > bu->buf_len) { | ||
| 1527 | err = -EINVAL; | ||
| 1528 | goto out; | ||
| 1529 | } | ||
| 1530 | /* Add this key */ | ||
| 1531 | bu->zbranch[bu->cnt++] = znode->zbranch[n]; | ||
| 1532 | bu->blk_cnt += 1; | ||
| 1533 | lnum = znode->zbranch[n].lnum; | ||
| 1534 | offs = ALIGN(znode->zbranch[n].offs + len, 8); | ||
| 1535 | } | ||
| 1536 | while (1) { | ||
| 1537 | struct ubifs_zbranch *zbr; | ||
| 1538 | union ubifs_key *key; | ||
| 1539 | unsigned int next_block; | ||
| 1540 | |||
| 1541 | /* Find next key */ | ||
| 1542 | err = tnc_next(c, &znode, &n); | ||
| 1543 | if (err) | ||
| 1544 | goto out; | ||
| 1545 | zbr = &znode->zbranch[n]; | ||
| 1546 | key = &zbr->key; | ||
| 1547 | /* See if there is another data key for this file */ | ||
| 1548 | if (key_inum(c, key) != key_inum(c, &bu->key) || | ||
| 1549 | key_type(c, key) != UBIFS_DATA_KEY) { | ||
| 1550 | err = -ENOENT; | ||
| 1551 | goto out; | ||
| 1552 | } | ||
| 1553 | if (lnum < 0) { | ||
| 1554 | /* First key found */ | ||
| 1555 | lnum = zbr->lnum; | ||
| 1556 | offs = ALIGN(zbr->offs + zbr->len, 8); | ||
| 1557 | len = zbr->len; | ||
| 1558 | if (len > bu->buf_len) { | ||
| 1559 | err = -EINVAL; | ||
| 1560 | goto out; | ||
| 1561 | } | ||
| 1562 | } else { | ||
| 1563 | /* | ||
| 1564 | * The data nodes must be in consecutive positions in | ||
| 1565 | * the same LEB. | ||
| 1566 | */ | ||
| 1567 | if (zbr->lnum != lnum || zbr->offs != offs) | ||
| 1568 | goto out; | ||
| 1569 | offs += ALIGN(zbr->len, 8); | ||
| 1570 | len = ALIGN(len, 8) + zbr->len; | ||
| 1571 | /* Must not exceed buffer length */ | ||
| 1572 | if (len > bu->buf_len) | ||
| 1573 | goto out; | ||
| 1574 | } | ||
| 1575 | /* Allow for holes */ | ||
| 1576 | next_block = key_block(c, key); | ||
| 1577 | bu->blk_cnt += (next_block - block - 1); | ||
| 1578 | if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) | ||
| 1579 | goto out; | ||
| 1580 | block = next_block; | ||
| 1581 | /* Add this key */ | ||
| 1582 | bu->zbranch[bu->cnt++] = *zbr; | ||
| 1583 | bu->blk_cnt += 1; | ||
| 1584 | /* See if we have room for more */ | ||
| 1585 | if (bu->cnt >= UBIFS_MAX_BULK_READ) | ||
| 1586 | goto out; | ||
| 1587 | if (bu->blk_cnt >= UBIFS_MAX_BULK_READ) | ||
| 1588 | goto out; | ||
| 1589 | } | ||
| 1590 | out: | ||
| 1591 | if (err == -ENOENT) { | ||
| 1592 | bu->eof = 1; | ||
| 1593 | err = 0; | ||
| 1594 | } | ||
| 1595 | bu->gc_seq = c->gc_seq; | ||
| 1596 | mutex_unlock(&c->tnc_mutex); | ||
| 1597 | if (err) | ||
| 1598 | return err; | ||
| 1599 | /* | ||
| 1600 | * An enormous hole could cause bulk-read to encompass too many | ||
| 1601 | * page cache pages, so limit the number here. | ||
| 1602 | */ | ||
| 1603 | if (bu->blk_cnt > UBIFS_MAX_BULK_READ) | ||
| 1604 | bu->blk_cnt = UBIFS_MAX_BULK_READ; | ||
| 1605 | /* | ||
| 1606 | * Ensure that bulk-read covers a whole number of page cache | ||
| 1607 | * pages. | ||
| 1608 | */ | ||
| 1609 | if (UBIFS_BLOCKS_PER_PAGE == 1 || | ||
| 1610 | !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1))) | ||
| 1611 | return 0; | ||
| 1612 | if (bu->eof) { | ||
| 1613 | /* At the end of file we can round up */ | ||
| 1614 | bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1; | ||
| 1615 | return 0; | ||
| 1616 | } | ||
| 1617 | /* Exclude data nodes that do not make up a whole page cache page */ | ||
| 1618 | block = key_block(c, &bu->key) + bu->blk_cnt; | ||
| 1619 | block &= ~(UBIFS_BLOCKS_PER_PAGE - 1); | ||
| 1620 | while (bu->cnt) { | ||
| 1621 | if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block) | ||
| 1622 | break; | ||
| 1623 | bu->cnt -= 1; | ||
| 1624 | } | ||
| 1625 | return 0; | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | /** | ||
| 1629 | * read_wbuf - bulk-read from a LEB with a wbuf. | ||
| 1630 | * @wbuf: wbuf that may overlap the read | ||
| 1631 | * @buf: buffer into which to read | ||
| 1632 | * @len: read length | ||
| 1633 | * @lnum: LEB number from which to read | ||
| 1634 | * @offs: offset from which to read | ||
| 1635 | * | ||
| 1636 | * This functions returns %0 on success or a negative error code on failure. | ||
| 1637 | */ | ||
| 1638 | static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum, | ||
| 1639 | int offs) | ||
| 1640 | { | ||
| 1641 | const struct ubifs_info *c = wbuf->c; | ||
| 1642 | int rlen, overlap; | ||
| 1643 | |||
| 1644 | dbg_io("LEB %d:%d, length %d", lnum, offs, len); | ||
| 1645 | ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0); | ||
| 1646 | ubifs_assert(!(offs & 7) && offs < c->leb_size); | ||
| 1647 | ubifs_assert(offs + len <= c->leb_size); | ||
| 1648 | |||
| 1649 | spin_lock(&wbuf->lock); | ||
| 1650 | overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs); | ||
| 1651 | if (!overlap) { | ||
| 1652 | /* We may safely unlock the write-buffer and read the data */ | ||
| 1653 | spin_unlock(&wbuf->lock); | ||
| 1654 | return ubi_read(c->ubi, lnum, buf, offs, len); | ||
| 1655 | } | ||
| 1656 | |||
| 1657 | /* Don't read under wbuf */ | ||
| 1658 | rlen = wbuf->offs - offs; | ||
| 1659 | if (rlen < 0) | ||
| 1660 | rlen = 0; | ||
| 1661 | |||
| 1662 | /* Copy the rest from the write-buffer */ | ||
| 1663 | memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen); | ||
| 1664 | spin_unlock(&wbuf->lock); | ||
| 1665 | |||
| 1666 | if (rlen > 0) | ||
| 1667 | /* Read everything that goes before write-buffer */ | ||
| 1668 | return ubi_read(c->ubi, lnum, buf, offs, rlen); | ||
| 1669 | |||
| 1670 | return 0; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | /** | ||
| 1674 | * validate_data_node - validate data nodes for bulk-read. | ||
| 1675 | * @c: UBIFS file-system description object | ||
| 1676 | * @buf: buffer containing data node to validate | ||
| 1677 | * @zbr: zbranch of data node to validate | ||
| 1678 | * | ||
| 1679 | * This functions returns %0 on success or a negative error code on failure. | ||
| 1680 | */ | ||
| 1681 | static int validate_data_node(struct ubifs_info *c, void *buf, | ||
| 1682 | struct ubifs_zbranch *zbr) | ||
| 1683 | { | ||
| 1684 | union ubifs_key key1; | ||
| 1685 | struct ubifs_ch *ch = buf; | ||
| 1686 | int err, len; | ||
| 1687 | |||
| 1688 | if (ch->node_type != UBIFS_DATA_NODE) { | ||
| 1689 | ubifs_err("bad node type (%d but expected %d)", | ||
| 1690 | ch->node_type, UBIFS_DATA_NODE); | ||
| 1691 | goto out_err; | ||
| 1692 | } | ||
| 1693 | |||
| 1694 | err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); | ||
| 1695 | if (err) { | ||
| 1696 | ubifs_err("expected node type %d", UBIFS_DATA_NODE); | ||
| 1697 | goto out; | ||
| 1698 | } | ||
| 1699 | |||
| 1700 | len = le32_to_cpu(ch->len); | ||
| 1701 | if (len != zbr->len) { | ||
| 1702 | ubifs_err("bad node length %d, expected %d", len, zbr->len); | ||
| 1703 | goto out_err; | ||
| 1704 | } | ||
| 1705 | |||
| 1706 | /* Make sure the key of the read node is correct */ | ||
| 1707 | key_read(c, buf + UBIFS_KEY_OFFSET, &key1); | ||
| 1708 | if (!keys_eq(c, &zbr->key, &key1)) { | ||
| 1709 | ubifs_err("bad key in node at LEB %d:%d", | ||
| 1710 | zbr->lnum, zbr->offs); | ||
| 1711 | dbg_tnc("looked for key %s found node's key %s", | ||
| 1712 | DBGKEY(&zbr->key), DBGKEY1(&key1)); | ||
| 1713 | goto out_err; | ||
| 1714 | } | ||
| 1715 | |||
| 1716 | return 0; | ||
| 1717 | |||
| 1718 | out_err: | ||
| 1719 | err = -EINVAL; | ||
| 1720 | out: | ||
| 1721 | ubifs_err("bad node at LEB %d:%d", zbr->lnum, zbr->offs); | ||
| 1722 | dbg_dump_node(c, buf); | ||
| 1723 | dbg_dump_stack(); | ||
| 1724 | return err; | ||
| 1725 | } | ||
| 1726 | |||
| 1727 | /** | ||
| 1728 | * ubifs_tnc_bulk_read - read a number of data nodes in one go. | ||
| 1729 | * @c: UBIFS file-system description object | ||
| 1730 | * @bu: bulk-read parameters and results | ||
| 1731 | * | ||
| 1732 | * This functions reads and validates the data nodes that were identified by the | ||
| 1733 | * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success, | ||
| 1734 | * -EAGAIN to indicate a race with GC, or another negative error code on | ||
| 1735 | * failure. | ||
| 1736 | */ | ||
| 1737 | int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu) | ||
| 1738 | { | ||
| 1739 | int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i; | ||
| 1740 | struct ubifs_wbuf *wbuf; | ||
| 1741 | void *buf; | ||
| 1742 | |||
| 1743 | len = bu->zbranch[bu->cnt - 1].offs; | ||
| 1744 | len += bu->zbranch[bu->cnt - 1].len - offs; | ||
| 1745 | if (len > bu->buf_len) { | ||
| 1746 | ubifs_err("buffer too small %d vs %d", bu->buf_len, len); | ||
| 1747 | return -EINVAL; | ||
| 1748 | } | ||
| 1749 | |||
| 1750 | /* Do the read */ | ||
| 1751 | wbuf = ubifs_get_wbuf(c, lnum); | ||
| 1752 | if (wbuf) | ||
| 1753 | err = read_wbuf(wbuf, bu->buf, len, lnum, offs); | ||
| 1754 | else | ||
| 1755 | err = ubi_read(c->ubi, lnum, bu->buf, offs, len); | ||
| 1756 | |||
| 1757 | /* Check for a race with GC */ | ||
| 1758 | if (maybe_leb_gced(c, lnum, bu->gc_seq)) | ||
| 1759 | return -EAGAIN; | ||
| 1760 | |||
| 1761 | if (err && err != -EBADMSG) { | ||
| 1762 | ubifs_err("failed to read from LEB %d:%d, error %d", | ||
| 1763 | lnum, offs, err); | ||
| 1764 | dbg_dump_stack(); | ||
| 1765 | dbg_tnc("key %s", DBGKEY(&bu->key)); | ||
| 1766 | return err; | ||
| 1767 | } | ||
| 1768 | |||
| 1769 | /* Validate the nodes read */ | ||
| 1770 | buf = bu->buf; | ||
| 1771 | for (i = 0; i < bu->cnt; i++) { | ||
| 1772 | err = validate_data_node(c, buf, &bu->zbranch[i]); | ||
| 1773 | if (err) | ||
| 1774 | return err; | ||
| 1775 | buf = buf + ALIGN(bu->zbranch[i].len, 8); | ||
| 1776 | } | ||
| 1777 | |||
| 1778 | return 0; | ||
| 1779 | } | ||
| 1780 | |||
| 1781 | /** | ||
| 1495 | * do_lookup_nm- look up a "hashed" node. | 1782 | * do_lookup_nm- look up a "hashed" node. |
| 1496 | * @c: UBIFS file-system description object | 1783 | * @c: UBIFS file-system description object |
| 1497 | * @key: node key to lookup | 1784 | * @key: node key to lookup |
| @@ -1675,7 +1962,7 @@ static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode, | |||
| 1675 | { | 1962 | { |
| 1676 | struct ubifs_znode *zn, *zi, *zp; | 1963 | struct ubifs_znode *zn, *zi, *zp; |
| 1677 | int i, keep, move, appending = 0; | 1964 | int i, keep, move, appending = 0; |
| 1678 | union ubifs_key *key = &zbr->key; | 1965 | union ubifs_key *key = &zbr->key, *key1; |
| 1679 | 1966 | ||
| 1680 | ubifs_assert(n >= 0 && n <= c->fanout); | 1967 | ubifs_assert(n >= 0 && n <= c->fanout); |
| 1681 | 1968 | ||
| @@ -1716,20 +2003,33 @@ again: | |||
| 1716 | zn->level = znode->level; | 2003 | zn->level = znode->level; |
| 1717 | 2004 | ||
| 1718 | /* Decide where to split */ | 2005 | /* Decide where to split */ |
| 1719 | if (znode->level == 0 && n == c->fanout && | 2006 | if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) { |
| 1720 | key_type(c, key) == UBIFS_DATA_KEY) { | 2007 | /* Try not to split consecutive data keys */ |
| 1721 | union ubifs_key *key1; | 2008 | if (n == c->fanout) { |
| 1722 | 2009 | key1 = &znode->zbranch[n - 1].key; | |
| 1723 | /* | 2010 | if (key_inum(c, key1) == key_inum(c, key) && |
| 1724 | * If this is an inode which is being appended - do not split | 2011 | key_type(c, key1) == UBIFS_DATA_KEY) |
| 1725 | * it because no other zbranches can be inserted between | 2012 | appending = 1; |
| 1726 | * zbranches of consecutive data nodes anyway. | 2013 | } else |
| 1727 | */ | 2014 | goto check_split; |
| 1728 | key1 = &znode->zbranch[n - 1].key; | 2015 | } else if (appending && n != c->fanout) { |
| 1729 | if (key_inum(c, key1) == key_inum(c, key) && | 2016 | /* Try not to split consecutive data keys */ |
| 1730 | key_type(c, key1) == UBIFS_DATA_KEY && | 2017 | appending = 0; |
| 1731 | key_block(c, key1) == key_block(c, key) - 1) | 2018 | check_split: |
| 1732 | appending = 1; | 2019 | if (n >= (c->fanout + 1) / 2) { |
| 2020 | key1 = &znode->zbranch[0].key; | ||
| 2021 | if (key_inum(c, key1) == key_inum(c, key) && | ||
| 2022 | key_type(c, key1) == UBIFS_DATA_KEY) { | ||
| 2023 | key1 = &znode->zbranch[n].key; | ||
| 2024 | if (key_inum(c, key1) != key_inum(c, key) || | ||
| 2025 | key_type(c, key1) != UBIFS_DATA_KEY) { | ||
| 2026 | keep = n; | ||
| 2027 | move = c->fanout - keep; | ||
| 2028 | zi = znode; | ||
| 2029 | goto do_split; | ||
| 2030 | } | ||
| 2031 | } | ||
| 2032 | } | ||
| 1733 | } | 2033 | } |
| 1734 | 2034 | ||
| 1735 | if (appending) { | 2035 | if (appending) { |
| @@ -1759,6 +2059,8 @@ again: | |||
| 1759 | zbr->znode->parent = zn; | 2059 | zbr->znode->parent = zn; |
| 1760 | } | 2060 | } |
| 1761 | 2061 | ||
| 2062 | do_split: | ||
| 2063 | |||
| 1762 | __set_bit(DIRTY_ZNODE, &zn->flags); | 2064 | __set_bit(DIRTY_ZNODE, &zn->flags); |
| 1763 | atomic_long_inc(&c->dirty_zn_cnt); | 2065 | atomic_long_inc(&c->dirty_zn_cnt); |
| 1764 | 2066 | ||
| @@ -1785,14 +2087,11 @@ again: | |||
| 1785 | 2087 | ||
| 1786 | /* Insert new znode (produced by spitting) into the parent */ | 2088 | /* Insert new znode (produced by spitting) into the parent */ |
| 1787 | if (zp) { | 2089 | if (zp) { |
| 1788 | i = n; | 2090 | if (n == 0 && zi == znode && znode->iip == 0) |
| 2091 | correct_parent_keys(c, znode); | ||
| 2092 | |||
| 1789 | /* Locate insertion point */ | 2093 | /* Locate insertion point */ |
| 1790 | n = znode->iip + 1; | 2094 | n = znode->iip + 1; |
| 1791 | if (appending && n != c->fanout) | ||
| 1792 | appending = 0; | ||
| 1793 | |||
| 1794 | if (i == 0 && zi == znode && znode->iip == 0) | ||
| 1795 | correct_parent_keys(c, znode); | ||
| 1796 | 2095 | ||
| 1797 | /* Tail recursion */ | 2096 | /* Tail recursion */ |
| 1798 | zbr->key = zn->zbranch[0].key; | 2097 | zbr->key = zn->zbranch[0].key; |
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index a25c1cc1f8d9..b48db999903e 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c | |||
| @@ -480,8 +480,8 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, | |||
| 480 | } | 480 | } |
| 481 | 481 | ||
| 482 | /* Make sure the key of the read node is correct */ | 482 | /* Make sure the key of the read node is correct */ |
| 483 | key_read(c, key, &key1); | 483 | key_read(c, node + UBIFS_KEY_OFFSET, &key1); |
| 484 | if (memcmp(node + UBIFS_KEY_OFFSET, &key1, c->key_len)) { | 484 | if (!keys_eq(c, key, &key1)) { |
| 485 | ubifs_err("bad key in node at LEB %d:%d", | 485 | ubifs_err("bad key in node at LEB %d:%d", |
| 486 | zbr->lnum, zbr->offs); | 486 | zbr->lnum, zbr->offs); |
| 487 | dbg_tnc("looked for key %s found node's key %s", | 487 | dbg_tnc("looked for key %s found node's key %s", |
diff --git a/fs/ubifs/ubifs-media.h b/fs/ubifs/ubifs-media.h index a9ecbd9af20d..0b378042a3a2 100644 --- a/fs/ubifs/ubifs-media.h +++ b/fs/ubifs/ubifs-media.h | |||
| @@ -75,7 +75,6 @@ | |||
| 75 | */ | 75 | */ |
| 76 | #define UBIFS_BLOCK_SIZE 4096 | 76 | #define UBIFS_BLOCK_SIZE 4096 |
| 77 | #define UBIFS_BLOCK_SHIFT 12 | 77 | #define UBIFS_BLOCK_SHIFT 12 |
| 78 | #define UBIFS_BLOCK_MASK 0x00000FFF | ||
| 79 | 78 | ||
| 80 | /* UBIFS padding byte pattern (must not be first or last byte of node magic) */ | 79 | /* UBIFS padding byte pattern (must not be first or last byte of node magic) */ |
| 81 | #define UBIFS_PADDING_BYTE 0xCE | 80 | #define UBIFS_PADDING_BYTE 0xCE |
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 17c620b93eec..a7bd32fa15b9 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h | |||
| @@ -142,6 +142,9 @@ | |||
| 142 | /* Maximum expected tree height for use by bottom_up_buf */ | 142 | /* Maximum expected tree height for use by bottom_up_buf */ |
| 143 | #define BOTTOM_UP_HEIGHT 64 | 143 | #define BOTTOM_UP_HEIGHT 64 |
| 144 | 144 | ||
| 145 | /* Maximum number of data nodes to bulk-read */ | ||
| 146 | #define UBIFS_MAX_BULK_READ 32 | ||
| 147 | |||
| 145 | /* | 148 | /* |
| 146 | * Lockdep classes for UBIFS inode @ui_mutex. | 149 | * Lockdep classes for UBIFS inode @ui_mutex. |
| 147 | */ | 150 | */ |
| @@ -328,9 +331,10 @@ struct ubifs_gced_idx_leb { | |||
| 328 | * this inode | 331 | * this inode |
| 329 | * @dirty: non-zero if the inode is dirty | 332 | * @dirty: non-zero if the inode is dirty |
| 330 | * @xattr: non-zero if this is an extended attribute inode | 333 | * @xattr: non-zero if this is an extended attribute inode |
| 334 | * @bulk_read: non-zero if bulk-read should be used | ||
| 331 | * @ui_mutex: serializes inode write-back with the rest of VFS operations, | 335 | * @ui_mutex: serializes inode write-back with the rest of VFS operations, |
| 332 | * serializes "clean <-> dirty" state changes, protects @dirty, | 336 | * serializes "clean <-> dirty" state changes, serializes bulk-read, |
| 333 | * @ui_size, and @xattr_size | 337 | * protects @dirty, @bulk_read, @ui_size, and @xattr_size |
| 334 | * @ui_lock: protects @synced_i_size | 338 | * @ui_lock: protects @synced_i_size |
| 335 | * @synced_i_size: synchronized size of inode, i.e. the value of inode size | 339 | * @synced_i_size: synchronized size of inode, i.e. the value of inode size |
| 336 | * currently stored on the flash; used only for regular file | 340 | * currently stored on the flash; used only for regular file |
| @@ -338,6 +342,8 @@ struct ubifs_gced_idx_leb { | |||
| 338 | * @ui_size: inode size used by UBIFS when writing to flash | 342 | * @ui_size: inode size used by UBIFS when writing to flash |
| 339 | * @flags: inode flags (@UBIFS_COMPR_FL, etc) | 343 | * @flags: inode flags (@UBIFS_COMPR_FL, etc) |
| 340 | * @compr_type: default compression type used for this inode | 344 | * @compr_type: default compression type used for this inode |
| 345 | * @last_page_read: page number of last page read (for bulk read) | ||
| 346 | * @read_in_a_row: number of consecutive pages read in a row (for bulk read) | ||
| 341 | * @data_len: length of the data attached to the inode | 347 | * @data_len: length of the data attached to the inode |
| 342 | * @data: inode's data | 348 | * @data: inode's data |
| 343 | * | 349 | * |
| @@ -379,12 +385,15 @@ struct ubifs_inode { | |||
| 379 | unsigned int xattr_names; | 385 | unsigned int xattr_names; |
| 380 | unsigned int dirty:1; | 386 | unsigned int dirty:1; |
| 381 | unsigned int xattr:1; | 387 | unsigned int xattr:1; |
| 388 | unsigned int bulk_read:1; | ||
| 382 | struct mutex ui_mutex; | 389 | struct mutex ui_mutex; |
| 383 | spinlock_t ui_lock; | 390 | spinlock_t ui_lock; |
| 384 | loff_t synced_i_size; | 391 | loff_t synced_i_size; |
| 385 | loff_t ui_size; | 392 | loff_t ui_size; |
| 386 | int flags; | 393 | int flags; |
| 387 | int compr_type; | 394 | int compr_type; |
| 395 | pgoff_t last_page_read; | ||
| 396 | pgoff_t read_in_a_row; | ||
| 388 | int data_len; | 397 | int data_len; |
| 389 | void *data; | 398 | void *data; |
| 390 | }; | 399 | }; |
| @@ -698,8 +707,8 @@ struct ubifs_jhead { | |||
| 698 | * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. | 707 | * struct ubifs_zbranch - key/coordinate/length branch stored in znodes. |
| 699 | * @key: key | 708 | * @key: key |
| 700 | * @znode: znode address in memory | 709 | * @znode: znode address in memory |
| 701 | * @lnum: LEB number of the indexing node | 710 | * @lnum: LEB number of the target node (indexing node or data node) |
| 702 | * @offs: offset of the indexing node within @lnum | 711 | * @offs: target node offset within @lnum |
| 703 | * @len: target node length | 712 | * @len: target node length |
| 704 | */ | 713 | */ |
| 705 | struct ubifs_zbranch { | 714 | struct ubifs_zbranch { |
| @@ -744,6 +753,28 @@ struct ubifs_znode { | |||
| 744 | }; | 753 | }; |
| 745 | 754 | ||
| 746 | /** | 755 | /** |
| 756 | * struct bu_info - bulk-read information | ||
| 757 | * @key: first data node key | ||
| 758 | * @zbranch: zbranches of data nodes to bulk read | ||
| 759 | * @buf: buffer to read into | ||
| 760 | * @buf_len: buffer length | ||
| 761 | * @gc_seq: GC sequence number to detect races with GC | ||
| 762 | * @cnt: number of data nodes for bulk read | ||
| 763 | * @blk_cnt: number of data blocks including holes | ||
| 764 | * @oef: end of file reached | ||
| 765 | */ | ||
| 766 | struct bu_info { | ||
| 767 | union ubifs_key key; | ||
| 768 | struct ubifs_zbranch zbranch[UBIFS_MAX_BULK_READ]; | ||
| 769 | void *buf; | ||
| 770 | int buf_len; | ||
| 771 | int gc_seq; | ||
| 772 | int cnt; | ||
| 773 | int blk_cnt; | ||
| 774 | int eof; | ||
| 775 | }; | ||
| 776 | |||
| 777 | /** | ||
| 747 | * struct ubifs_node_range - node length range description data structure. | 778 | * struct ubifs_node_range - node length range description data structure. |
| 748 | * @len: fixed node length | 779 | * @len: fixed node length |
| 749 | * @min_len: minimum possible node length | 780 | * @min_len: minimum possible node length |
| @@ -862,9 +893,13 @@ struct ubifs_orphan { | |||
| 862 | /** | 893 | /** |
| 863 | * struct ubifs_mount_opts - UBIFS-specific mount options information. | 894 | * struct ubifs_mount_opts - UBIFS-specific mount options information. |
| 864 | * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) | 895 | * @unmount_mode: selected unmount mode (%0 default, %1 normal, %2 fast) |
| 896 | * @bulk_read: enable bulk-reads | ||
| 897 | * @chk_data_crc: check CRCs when reading data nodes | ||
| 865 | */ | 898 | */ |
| 866 | struct ubifs_mount_opts { | 899 | struct ubifs_mount_opts { |
| 867 | unsigned int unmount_mode:2; | 900 | unsigned int unmount_mode:2; |
| 901 | unsigned int bulk_read:2; | ||
| 902 | unsigned int chk_data_crc:2; | ||
| 868 | }; | 903 | }; |
| 869 | 904 | ||
| 870 | /** | 905 | /** |
| @@ -905,13 +940,12 @@ struct ubifs_mount_opts { | |||
| 905 | * @cmt_state: commit state | 940 | * @cmt_state: commit state |
| 906 | * @cs_lock: commit state lock | 941 | * @cs_lock: commit state lock |
| 907 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running | 942 | * @cmt_wq: wait queue to sleep on if the log is full and a commit is running |
| 943 | * | ||
| 908 | * @fast_unmount: do not run journal commit before un-mounting | 944 | * @fast_unmount: do not run journal commit before un-mounting |
| 909 | * @big_lpt: flag that LPT is too big to write whole during commit | 945 | * @big_lpt: flag that LPT is too big to write whole during commit |
| 910 | * @check_lpt_free: flag that indicates LPT GC may be needed | 946 | * @no_chk_data_crc: do not check CRCs when reading data nodes (except during |
| 911 | * @nospace: non-zero if the file-system does not have flash space (used as | 947 | * recovery) |
| 912 | * optimization) | 948 | * @bulk_read: enable bulk-reads |
| 913 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 914 | * pool is full | ||
| 915 | * | 949 | * |
| 916 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and | 950 | * @tnc_mutex: protects the Tree Node Cache (TNC), @zroot, @cnext, @enext, and |
| 917 | * @calc_idx_sz | 951 | * @calc_idx_sz |
| @@ -935,6 +969,7 @@ struct ubifs_mount_opts { | |||
| 935 | * @mst_node: master node | 969 | * @mst_node: master node |
| 936 | * @mst_offs: offset of valid master node | 970 | * @mst_offs: offset of valid master node |
| 937 | * @mst_mutex: protects the master node area, @mst_node, and @mst_offs | 971 | * @mst_mutex: protects the master node area, @mst_node, and @mst_offs |
| 972 | * @bulk_read_buf_size: buffer size for bulk-reads | ||
| 938 | * | 973 | * |
| 939 | * @log_lebs: number of logical eraseblocks in the log | 974 | * @log_lebs: number of logical eraseblocks in the log |
| 940 | * @log_bytes: log size in bytes | 975 | * @log_bytes: log size in bytes |
| @@ -977,12 +1012,17 @@ struct ubifs_mount_opts { | |||
| 977 | * but which still have to be taken into account because | 1012 | * but which still have to be taken into account because |
| 978 | * the index has not been committed so far | 1013 | * the index has not been committed so far |
| 979 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, | 1014 | * @space_lock: protects @budg_idx_growth, @budg_data_growth, @budg_dd_growth, |
| 980 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, and @lst; | 1015 | * @budg_uncommited_idx, @min_idx_lebs, @old_idx_sz, @lst, |
| 1016 | * @nospace, and @nospace_rp; | ||
| 981 | * @min_idx_lebs: minimum number of LEBs required for the index | 1017 | * @min_idx_lebs: minimum number of LEBs required for the index |
| 982 | * @old_idx_sz: size of index on flash | 1018 | * @old_idx_sz: size of index on flash |
| 983 | * @calc_idx_sz: temporary variable which is used to calculate new index size | 1019 | * @calc_idx_sz: temporary variable which is used to calculate new index size |
| 984 | * (contains accurate new index size at end of TNC commit start) | 1020 | * (contains accurate new index size at end of TNC commit start) |
| 985 | * @lst: lprops statistics | 1021 | * @lst: lprops statistics |
| 1022 | * @nospace: non-zero if the file-system does not have flash space (used as | ||
| 1023 | * optimization) | ||
| 1024 | * @nospace_rp: the same as @nospace, but additionally means that even reserved | ||
| 1025 | * pool is full | ||
| 986 | * | 1026 | * |
| 987 | * @page_budget: budget for a page | 1027 | * @page_budget: budget for a page |
| 988 | * @inode_budget: budget for an inode | 1028 | * @inode_budget: budget for an inode |
| @@ -1061,6 +1101,7 @@ struct ubifs_mount_opts { | |||
| 1061 | * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab | 1101 | * @lpt_drty_flgs: dirty flags for LPT special nodes e.g. ltab |
| 1062 | * @dirty_nn_cnt: number of dirty nnodes | 1102 | * @dirty_nn_cnt: number of dirty nnodes |
| 1063 | * @dirty_pn_cnt: number of dirty pnodes | 1103 | * @dirty_pn_cnt: number of dirty pnodes |
| 1104 | * @check_lpt_free: flag that indicates LPT GC may be needed | ||
| 1064 | * @lpt_sz: LPT size | 1105 | * @lpt_sz: LPT size |
| 1065 | * @lpt_nod_buf: buffer for an on-flash nnode or pnode | 1106 | * @lpt_nod_buf: buffer for an on-flash nnode or pnode |
| 1066 | * @lpt_buf: buffer of LEB size used by LPT | 1107 | * @lpt_buf: buffer of LEB size used by LPT |
| @@ -1102,6 +1143,7 @@ struct ubifs_mount_opts { | |||
| 1102 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw | 1143 | * @rcvrd_mst_node: recovered master node to write when mounting ro to rw |
| 1103 | * @size_tree: inode size information for recovery | 1144 | * @size_tree: inode size information for recovery |
| 1104 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) | 1145 | * @remounting_rw: set while remounting from ro to rw (sb flags have MS_RDONLY) |
| 1146 | * @always_chk_crc: always check CRCs (while mounting and remounting rw) | ||
| 1105 | * @mount_opts: UBIFS-specific mount options | 1147 | * @mount_opts: UBIFS-specific mount options |
| 1106 | * | 1148 | * |
| 1107 | * @dbg_buf: a buffer of LEB size used for debugging purposes | 1149 | * @dbg_buf: a buffer of LEB size used for debugging purposes |
| @@ -1146,11 +1188,11 @@ struct ubifs_info { | |||
| 1146 | int cmt_state; | 1188 | int cmt_state; |
| 1147 | spinlock_t cs_lock; | 1189 | spinlock_t cs_lock; |
| 1148 | wait_queue_head_t cmt_wq; | 1190 | wait_queue_head_t cmt_wq; |
| 1191 | |||
| 1149 | unsigned int fast_unmount:1; | 1192 | unsigned int fast_unmount:1; |
| 1150 | unsigned int big_lpt:1; | 1193 | unsigned int big_lpt:1; |
| 1151 | unsigned int check_lpt_free:1; | 1194 | unsigned int no_chk_data_crc:1; |
| 1152 | unsigned int nospace:1; | 1195 | unsigned int bulk_read:1; |
| 1153 | unsigned int nospace_rp:1; | ||
| 1154 | 1196 | ||
| 1155 | struct mutex tnc_mutex; | 1197 | struct mutex tnc_mutex; |
| 1156 | struct ubifs_zbranch zroot; | 1198 | struct ubifs_zbranch zroot; |
| @@ -1175,6 +1217,7 @@ struct ubifs_info { | |||
| 1175 | struct ubifs_mst_node *mst_node; | 1217 | struct ubifs_mst_node *mst_node; |
| 1176 | int mst_offs; | 1218 | int mst_offs; |
| 1177 | struct mutex mst_mutex; | 1219 | struct mutex mst_mutex; |
| 1220 | int bulk_read_buf_size; | ||
| 1178 | 1221 | ||
| 1179 | int log_lebs; | 1222 | int log_lebs; |
| 1180 | long long log_bytes; | 1223 | long long log_bytes; |
| @@ -1218,6 +1261,8 @@ struct ubifs_info { | |||
| 1218 | unsigned long long old_idx_sz; | 1261 | unsigned long long old_idx_sz; |
| 1219 | unsigned long long calc_idx_sz; | 1262 | unsigned long long calc_idx_sz; |
| 1220 | struct ubifs_lp_stats lst; | 1263 | struct ubifs_lp_stats lst; |
| 1264 | unsigned int nospace:1; | ||
| 1265 | unsigned int nospace_rp:1; | ||
| 1221 | 1266 | ||
| 1222 | int page_budget; | 1267 | int page_budget; |
| 1223 | int inode_budget; | 1268 | int inode_budget; |
| @@ -1294,6 +1339,7 @@ struct ubifs_info { | |||
| 1294 | int lpt_drty_flgs; | 1339 | int lpt_drty_flgs; |
| 1295 | int dirty_nn_cnt; | 1340 | int dirty_nn_cnt; |
| 1296 | int dirty_pn_cnt; | 1341 | int dirty_pn_cnt; |
| 1342 | int check_lpt_free; | ||
| 1297 | long long lpt_sz; | 1343 | long long lpt_sz; |
| 1298 | void *lpt_nod_buf; | 1344 | void *lpt_nod_buf; |
| 1299 | void *lpt_buf; | 1345 | void *lpt_buf; |
| @@ -1335,6 +1381,7 @@ struct ubifs_info { | |||
| 1335 | struct ubifs_mst_node *rcvrd_mst_node; | 1381 | struct ubifs_mst_node *rcvrd_mst_node; |
| 1336 | struct rb_root size_tree; | 1382 | struct rb_root size_tree; |
| 1337 | int remounting_rw; | 1383 | int remounting_rw; |
| 1384 | int always_chk_crc; | ||
| 1338 | struct ubifs_mount_opts mount_opts; | 1385 | struct ubifs_mount_opts mount_opts; |
| 1339 | 1386 | ||
| 1340 | #ifdef CONFIG_UBIFS_FS_DEBUG | 1387 | #ifdef CONFIG_UBIFS_FS_DEBUG |
| @@ -1347,6 +1394,12 @@ struct ubifs_info { | |||
| 1347 | unsigned long fail_timeout; | 1394 | unsigned long fail_timeout; |
| 1348 | unsigned int fail_cnt; | 1395 | unsigned int fail_cnt; |
| 1349 | unsigned int fail_cnt_max; | 1396 | unsigned int fail_cnt_max; |
| 1397 | long long chk_lpt_sz; | ||
| 1398 | long long chk_lpt_sz2; | ||
| 1399 | long long chk_lpt_wastage; | ||
| 1400 | int chk_lpt_lebs; | ||
| 1401 | int new_nhead_lnum; | ||
| 1402 | int new_nhead_offs; | ||
| 1350 | #endif | 1403 | #endif |
| 1351 | }; | 1404 | }; |
| 1352 | 1405 | ||
| @@ -1377,7 +1430,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, | |||
| 1377 | int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, | 1430 | int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, |
| 1378 | int offs, int dtype); | 1431 | int offs, int dtype); |
| 1379 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, | 1432 | int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, |
| 1380 | int offs, int quiet); | 1433 | int offs, int quiet, int chk_crc); |
| 1381 | void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); | 1434 | void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); |
| 1382 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); | 1435 | void ubifs_prep_grp_node(struct ubifs_info *c, void *node, int len, int last); |
| 1383 | int ubifs_io_init(struct ubifs_info *c); | 1436 | int ubifs_io_init(struct ubifs_info *c); |
| @@ -1490,6 +1543,8 @@ void destroy_old_idx(struct ubifs_info *c); | |||
| 1490 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, | 1543 | int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level, |
| 1491 | int lnum, int offs); | 1544 | int lnum, int offs); |
| 1492 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); | 1545 | int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode); |
| 1546 | int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu); | ||
| 1547 | int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu); | ||
| 1493 | 1548 | ||
| 1494 | /* tnc_misc.c */ | 1549 | /* tnc_misc.c */ |
| 1495 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, | 1550 | struct ubifs_znode *ubifs_tnc_levelorder_next(struct ubifs_znode *zr, |
| @@ -1586,12 +1641,10 @@ int ubifs_lpt_post_commit(struct ubifs_info *c); | |||
| 1586 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only); | 1641 | void ubifs_lpt_free(struct ubifs_info *c, int wr_only); |
| 1587 | 1642 | ||
| 1588 | /* lprops.c */ | 1643 | /* lprops.c */ |
| 1589 | void ubifs_get_lprops(struct ubifs_info *c); | ||
| 1590 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, | 1644 | const struct ubifs_lprops *ubifs_change_lp(struct ubifs_info *c, |
| 1591 | const struct ubifs_lprops *lp, | 1645 | const struct ubifs_lprops *lp, |
| 1592 | int free, int dirty, int flags, | 1646 | int free, int dirty, int flags, |
| 1593 | int idx_gc_cnt); | 1647 | int idx_gc_cnt); |
| 1594 | void ubifs_release_lprops(struct ubifs_info *c); | ||
| 1595 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); | 1648 | void ubifs_get_lp_stats(struct ubifs_info *c, struct ubifs_lp_stats *stats); |
| 1596 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, | 1649 | void ubifs_add_to_cat(struct ubifs_info *c, struct ubifs_lprops *lprops, |
| 1597 | int cat); | 1650 | int cat); |
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c index 649bec78b645..cfd31e229c89 100644 --- a/fs/ubifs/xattr.c +++ b/fs/ubifs/xattr.c | |||
| @@ -446,7 +446,7 @@ ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size) | |||
| 446 | int type; | 446 | int type; |
| 447 | 447 | ||
| 448 | xent = ubifs_tnc_next_ent(c, &key, &nm); | 448 | xent = ubifs_tnc_next_ent(c, &key, &nm); |
| 449 | if (unlikely(IS_ERR(xent))) { | 449 | if (IS_ERR(xent)) { |
| 450 | err = PTR_ERR(xent); | 450 | err = PTR_ERR(xent); |
| 451 | break; | 451 | break; |
| 452 | } | 452 | } |
diff --git a/fs/udf/super.c b/fs/udf/super.c index 5698bbf83bbf..e25e7010627b 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c | |||
| @@ -369,7 +369,7 @@ enum { | |||
| 369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore | 369 | Opt_err, Opt_uforget, Opt_uignore, Opt_gforget, Opt_gignore |
| 370 | }; | 370 | }; |
| 371 | 371 | ||
| 372 | static match_table_t tokens = { | 372 | static const match_table_t tokens = { |
| 373 | {Opt_novrs, "novrs"}, | 373 | {Opt_novrs, "novrs"}, |
| 374 | {Opt_nostrict, "nostrict"}, | 374 | {Opt_nostrict, "nostrict"}, |
| 375 | {Opt_bs, "bs=%u"}, | 375 | {Opt_bs, "bs=%u"}, |
diff --git a/fs/ufs/super.c b/fs/ufs/super.c index 3141969b456d..e65212dfb60e 100644 --- a/fs/ufs/super.c +++ b/fs/ufs/super.c | |||
| @@ -309,7 +309,7 @@ enum { | |||
| 309 | Opt_err | 309 | Opt_err |
| 310 | }; | 310 | }; |
| 311 | 311 | ||
| 312 | static match_table_t tokens = { | 312 | static const match_table_t tokens = { |
| 313 | {Opt_type_old, "ufstype=old"}, | 313 | {Opt_type_old, "ufstype=old"}, |
| 314 | {Opt_type_sunx86, "ufstype=sunx86"}, | 314 | {Opt_type_sunx86, "ufstype=sunx86"}, |
| 315 | {Opt_type_sun, "ufstype=sun"}, | 315 | {Opt_type_sun, "ufstype=sun"}, |
| @@ -1233,7 +1233,7 @@ static int ufs_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
| 1233 | { | 1233 | { |
| 1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); | 1234 | struct ufs_sb_info *sbi = UFS_SB(vfs->mnt_sb); |
| 1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; | 1235 | unsigned mval = sbi->s_mount_opt & UFS_MOUNT_UFSTYPE; |
| 1236 | struct match_token *tp = tokens; | 1236 | const struct match_token *tp = tokens; |
| 1237 | 1237 | ||
| 1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) | 1238 | while (tp->token != Opt_onerror_panic && tp->token != mval) |
| 1239 | ++tp; | 1239 | ++tp; |
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index 986061ae1b9b..36d5fcd3f593 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c | |||
| @@ -1001,12 +1001,13 @@ xfs_buf_iodone_work( | |||
| 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the | 1001 | * We can get an EOPNOTSUPP to ordered writes. Here we clear the |
| 1002 | * ordered flag and reissue them. Because we can't tell the higher | 1002 | * ordered flag and reissue them. Because we can't tell the higher |
| 1003 | * layers directly that they should not issue ordered I/O anymore, they | 1003 | * layers directly that they should not issue ordered I/O anymore, they |
| 1004 | * need to check if the ordered flag was cleared during I/O completion. | 1004 | * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion. |
| 1005 | */ | 1005 | */ |
| 1006 | if ((bp->b_error == EOPNOTSUPP) && | 1006 | if ((bp->b_error == EOPNOTSUPP) && |
| 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { | 1007 | (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) { |
| 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); | 1008 | XB_TRACE(bp, "ordered_retry", bp->b_iodone); |
| 1009 | bp->b_flags &= ~XBF_ORDERED; | 1009 | bp->b_flags &= ~XBF_ORDERED; |
| 1010 | bp->b_flags |= _XFS_BARRIER_FAILED; | ||
| 1010 | xfs_buf_iorequest(bp); | 1011 | xfs_buf_iorequest(bp); |
| 1011 | } else if (bp->b_iodone) | 1012 | } else if (bp->b_iodone) |
| 1012 | (*(bp->b_iodone))(bp); | 1013 | (*(bp->b_iodone))(bp); |
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h index fe0109956656..456519a088c7 100644 --- a/fs/xfs/linux-2.6/xfs_buf.h +++ b/fs/xfs/linux-2.6/xfs_buf.h | |||
| @@ -85,6 +85,14 @@ typedef enum { | |||
| 85 | * modifications being lost. | 85 | * modifications being lost. |
| 86 | */ | 86 | */ |
| 87 | _XBF_PAGE_LOCKED = (1 << 22), | 87 | _XBF_PAGE_LOCKED = (1 << 22), |
| 88 | |||
| 89 | /* | ||
| 90 | * If we try a barrier write, but it fails we have to communicate | ||
| 91 | * this to the upper layers. Unfortunately b_error gets overwritten | ||
| 92 | * when the buffer is re-issued so we have to add another flag to | ||
| 93 | * keep this information. | ||
| 94 | */ | ||
| 95 | _XFS_BARRIER_FAILED = (1 << 23), | ||
| 88 | } xfs_buf_flags_t; | 96 | } xfs_buf_flags_t; |
| 89 | 97 | ||
| 90 | typedef enum { | 98 | typedef enum { |
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index 18d3c8487835..e39013619b26 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c | |||
| @@ -158,7 +158,7 @@ enum { | |||
| 158 | Opt_barrier, Opt_nobarrier, Opt_err | 158 | Opt_barrier, Opt_nobarrier, Opt_err |
| 159 | }; | 159 | }; |
| 160 | 160 | ||
| 161 | static match_table_t tokens = { | 161 | static const match_table_t tokens = { |
| 162 | {Opt_barrier, "barrier"}, | 162 | {Opt_barrier, "barrier"}, |
| 163 | {Opt_nobarrier, "nobarrier"}, | 163 | {Opt_nobarrier, "nobarrier"}, |
| 164 | {Opt_err, NULL} | 164 | {Opt_err, NULL} |
| @@ -1323,7 +1323,7 @@ xfs_fs_remount( | |||
| 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); | 1323 | "XFS: mount option \"%s\" not supported for remount\n", p); |
| 1324 | return -EINVAL; | 1324 | return -EINVAL; |
| 1325 | #else | 1325 | #else |
| 1326 | return 0; | 1326 | break; |
| 1327 | #endif | 1327 | #endif |
| 1328 | } | 1328 | } |
| 1329 | } | 1329 | } |
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 503ea89e8b9a..0b02c6443551 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c | |||
| @@ -1033,11 +1033,12 @@ xlog_iodone(xfs_buf_t *bp) | |||
| 1033 | l = iclog->ic_log; | 1033 | l = iclog->ic_log; |
| 1034 | 1034 | ||
| 1035 | /* | 1035 | /* |
| 1036 | * If the ordered flag has been removed by a lower | 1036 | * If the _XFS_BARRIER_FAILED flag was set by a lower |
| 1037 | * layer, it means the underlyin device no longer supports | 1037 | * layer, it means the underlying device no longer supports |
| 1038 | * barrier I/O. Warn loudly and turn off barriers. | 1038 | * barrier I/O. Warn loudly and turn off barriers. |
| 1039 | */ | 1039 | */ |
| 1040 | if ((l->l_mp->m_flags & XFS_MOUNT_BARRIER) && !XFS_BUF_ISORDERED(bp)) { | 1040 | if (bp->b_flags & _XFS_BARRIER_FAILED) { |
| 1041 | bp->b_flags &= ~_XFS_BARRIER_FAILED; | ||
| 1041 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; | 1042 | l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER; |
| 1042 | xfs_fs_cmn_err(CE_WARN, l->l_mp, | 1043 | xfs_fs_cmn_err(CE_WARN, l->l_mp, |
| 1043 | "xlog_iodone: Barriers are no longer supported" | 1044 | "xlog_iodone: Barriers are no longer supported" |
